├── .gitignore ├── Makefile ├── README.md ├── raft-by-bertyoung.pptx └── src ├── .gitignore ├── kvraft ├── client.go ├── common.go ├── config.go ├── faq ├── lab3 ├── server.go ├── snapshot_hints └── test_test.go ├── labgob ├── labgob.go └── test_test.go ├── labrpc ├── labrpc.go └── test_test.go ├── linearizability ├── bitset.go ├── linearizability.go ├── model.go └── models.go ├── main ├── diskvd.go ├── ii.go ├── lockc.go ├── lockd.go ├── mr-challenge.txt ├── mr-testout.txt ├── pbc.go ├── pbd.go ├── pg-being_ernest.txt ├── pg-dorian_gray.txt ├── pg-frankenstein.txt ├── pg-grimm.txt ├── pg-huckleberry_finn.txt ├── pg-metamorphosis.txt ├── pg-sherlock_holmes.txt ├── pg-tom_sawyer.txt ├── test-ii.sh ├── test-mr.sh ├── test-wc.sh ├── viewd.go └── wc.go ├── mapreduce ├── common.go ├── common_map.go ├── common_reduce.go ├── common_rpc.go ├── lab1 ├── master.go ├── master_rpc.go ├── master_splitmerge.go ├── schedule.go ├── test_test.go └── worker.go ├── raft ├── README.md ├── agreement-despite-follower-disconnection.log ├── config.go ├── lab2 ├── persister.go ├── raft.go ├── test_test.go └── util.go ├── shardkv ├── client.go ├── common.go ├── config.go ├── server.go └── test_test.go └── shardmaster ├── client.go ├── common.go ├── config.go ├── server.go └── test_test.go /.gitignore: -------------------------------------------------------------------------------- 1 | pkg/ 2 | api.key 3 | *-handin.tar.gz 4 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # This is the Makefile helping you submit the labs. 2 | # Just create 6.824/api.key with your API key in it, 3 | # and submit your lab with the following command: 4 | # $ make [lab1|lab2a|lab2b|lab2c|lab3a|lab3b|lab4a|lab4b] 5 | 6 | LABS=" lab1 lab2a lab2b lab2c lab3a lab3b lab4a lab4b " 7 | 8 | %: 9 | @echo "Preparing $@-handin.tar.gz" 10 | @echo "Checking for committed temporary files..." 11 | @if git ls-files | grep -E 'mrtmp|mrinput' > /dev/null; then \ 12 | echo "" ; \ 13 | echo "OBS! You have committed some large temporary files:" ; \ 14 | echo "" ; \ 15 | git ls-files | grep -E 'mrtmp|mrinput' | sed 's/^/\t/' ; \ 16 | echo "" ; \ 17 | echo "Follow the instructions at http://stackoverflow.com/a/308684/472927" ; \ 18 | echo "to remove them, and then run make again." ; \ 19 | echo "" ; \ 20 | exit 1 ; \ 21 | fi 22 | @if echo $(LABS) | grep -q " $@ " ; then \ 23 | echo "Tarring up your submission..." ; \ 24 | tar cvzf $@-handin.tar.gz \ 25 | "--exclude=src/main/pg-*.txt" \ 26 | "--exclude=src/main/diskvd" \ 27 | "--exclude=src/mapreduce/824-mrinput-*.txt" \ 28 | "--exclude=mrtmp.*" \ 29 | "--exclude=src/main/diff.out" \ 30 | Makefile src; \ 31 | if ! test -e api.key ; then \ 32 | echo "Missing $(PWD)/api.key. Please create the file with your key in it or submit the $@-handin.tar.gz via the web interface."; \ 33 | else \ 34 | echo "Are you sure you want to submit $@? Enter 'yes' to continue:"; \ 35 | read line; \ 36 | if test "$$line" != "yes" ; then echo "Giving up submission"; exit; fi; \ 37 | if test `stat -c "%s" "$@-handin.tar.gz" 2>/dev/null || stat -f "%z" "$@-handin.tar.gz"` -ge 20971520 ; then echo "File exceeds 20MB."; exit; fi; \ 38 | mv api.key api.key.fix ; \ 39 | cat api.key.fix | tr -d '\n' > api.key ; \ 40 | rm api.key.fix ; \ 41 | curl -F file=@$@-handin.tar.gz -F "key= /dev/null || { \ 43 | echo ; \ 44 | echo "Submit seems to have failed."; \ 45 | echo "Please upload the tarball manually on the submission website."; } \ 46 | fi; \ 47 | else \ 48 | echo "Bad target $@. Usage: make [$(LABS)]"; \ 49 | fi 50 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 2017年春季MIT分布式系统课程labs 2 | 3 | - [x] Lab 1: [mapreduce](src/mapreduce) 4 | 5 | - [x] Lab 2: [raft](src/raft) 6 | 7 | - [x] Part 2A: Raft election 8 | 9 | - [x] Part 2B: Raft log replication 10 | 11 | - [x] Part 2C: Raft state persistence 12 | 13 | - [x] Lab 3: [kvraft](src/kvraft) 14 | 15 | - [x] Part 3A: Key/value service without log compaction 16 | 17 | - [x] Part 3B: Key/value service with log compaction 18 | 19 | - [x] Lab 4: Sharded Key/Value Service 20 | 21 | - [x] Part 4A: Shard Master 22 | 23 | - [x] Part 4B: Sharded Key/Value Server 24 | 25 | - [ ] Challenge: Garbage collection of state 26 | 27 | - [ ] Challenge: Client requests during configuration changes 28 | 29 | -------------------------------------------------------------------------------- /raft-by-bertyoung.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loveyacper/raft_for_dummies/30047f10b2e9851ac17cc006e2ce38486d37de8c/raft-by-bertyoung.pptx -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | *.*/ 2 | mrtmp.* 3 | 824-mrinput-*.txt 4 | /main/diff.out 5 | /mapreduce/x.txt 6 | /pbservice/x.txt 7 | /kvpaxos/x.txt 8 | -------------------------------------------------------------------------------- /src/kvraft/client.go: -------------------------------------------------------------------------------- 1 | package raftkv 2 | 3 | import "labrpc" 4 | import "crypto/rand" 5 | import "math/big" 6 | import "sync/atomic" 7 | 8 | import "time" 9 | 10 | var clientIdGen = int32(0) 11 | 12 | const maxTry = 3 13 | 14 | type Clerk struct { 15 | servers []*labrpc.ClientEnd 16 | // You will have to modify this struct. 17 | leader int // hint or probe, TODO: server no use this field 18 | fail int // successive fail calls for leader 19 | clientId int32 // client id, init by clientIdGen 20 | reqId int64 // req id 21 | } 22 | 23 | func nrand() int64 { 24 | // What's this??? I don't use it 25 | max := big.NewInt(int64(1) << 62) 26 | bigx, _ := rand.Int(rand.Reader, max) 27 | x := bigx.Int64() 28 | return x 29 | } 30 | 31 | func MakeClerk(servers []*labrpc.ClientEnd) *Clerk { 32 | ck := new(Clerk) 33 | ck.servers = servers 34 | // You'll have to add code here. 35 | ck.leader = 0 36 | ck.clientId = atomic.AddInt32(&clientIdGen, 1) 37 | ck.reqId = 1 38 | 39 | return ck 40 | } 41 | 42 | // 43 | // fetch the current value for a key. 44 | // returns "" if the key does not exist. 45 | // keeps trying forever in the face of all other errors. 46 | // 47 | // you can send an RPC with code like this: 48 | // ok := ck.servers[i].Call("KVServer.Get", &args, &reply) 49 | // 50 | // the types of args and reply (including whether they are pointers) 51 | // must match the declared types of the RPC handler function's 52 | // arguments. and reply must be passed as a pointer. 53 | // 54 | func (ck *Clerk) Get(key string) string { 55 | // You will have to modify this function. 56 | args := new(GetArgs) 57 | args.Key = key 58 | args.ReqID = ck.reqId 59 | ck.reqId++ 60 | args.ID = ck.clientId 61 | 62 | fail := 0 63 | for { 64 | reply := new(GetReply) 65 | var done = make(chan bool) 66 | go func(leader int) { 67 | ok := ck.servers[leader].Call("KVServer.Get", args, reply) 68 | done<-ok 69 | }(ck.leader) 70 | 71 | var ok = true 72 | var timeout = false 73 | select { 74 | case <-time.After(RpcTimeout): 75 | timeout = true 76 | 77 | case ok = <-done: 78 | } 79 | 80 | if !timeout && ok && !reply.WrongLeader { 81 | DPrintf("[client %d] succ GET: %s = %s", ck.clientId, key, reply.Value) 82 | return reply.Value 83 | } else { 84 | fail++ 85 | if timeout || reply.WrongLeader || fail >= maxTry { 86 | fail = 0 87 | ck.leader++ 88 | if ck.leader >= len(ck.servers) { 89 | ck.leader = 0 90 | } 91 | } 92 | } 93 | 94 | time.Sleep(1 * time.Millisecond) 95 | if fail == 0 { 96 | DPrintf("[client %d] retry GET to another server %d\n", ck.clientId, ck.leader) 97 | } 98 | } 99 | } 100 | 101 | // 102 | // shared by Put and Append. 103 | // 104 | // you can send an RPC with code like this: 105 | // ok := ck.servers[i].Call("KVServer.PutAppend", &args, &reply) 106 | // 107 | // the types of args and reply (including whether they are pointers) 108 | // must match the declared types of the RPC handler function's 109 | // arguments. and reply must be passed as a pointer. 110 | // 111 | func (ck *Clerk) PutAppend(key string, value string, op string) { 112 | // You will have to modify this function. 113 | DPrintf("[client %d] try %s %s = %s to server %d\n", ck.clientId, op, key, value, ck.leader) 114 | args := new(PutAppendArgs) 115 | args.Key = key 116 | args.Value = value 117 | args.Op = op 118 | args.ReqID = ck.reqId 119 | ck.reqId++ 120 | args.ID = ck.clientId 121 | 122 | fail := 0 123 | for { 124 | reply := new(PutAppendReply) 125 | var done = make(chan bool) 126 | go func(leader int) { 127 | ok := ck.servers[leader].Call("KVServer.PutAppend", args, reply) 128 | done<-ok 129 | }(ck.leader) 130 | 131 | var ok = false 132 | var timeout = false 133 | select { 134 | case <-time.After(RpcTimeout): 135 | timeout = true 136 | 137 | case ok = <-done: 138 | } 139 | 140 | if !timeout && ok && !reply.WrongLeader { 141 | DPrintf("[client %d] succ %s %s = %s\n", ck.clientId, op, key, value) 142 | return 143 | } else { 144 | fail++ 145 | if timeout || reply.WrongLeader || fail >= maxTry { 146 | fail = 0 147 | ck.leader++ 148 | if ck.leader >= len(ck.servers) { 149 | ck.leader = 0 150 | } 151 | } 152 | } 153 | 154 | time.Sleep(1 * time.Millisecond) 155 | if fail == 0 { 156 | DPrintf("[client %d] retry PUT/APPEND to another server %d\n", ck.clientId, ck.leader) 157 | } 158 | } 159 | } 160 | 161 | func (ck *Clerk) Put(key string, value string) { 162 | ck.PutAppend(key, value, "Put") 163 | } 164 | 165 | func (ck *Clerk) Append(key string, value string) { 166 | ck.PutAppend(key, value, "Append") 167 | } 168 | 169 | -------------------------------------------------------------------------------- /src/kvraft/common.go: -------------------------------------------------------------------------------- 1 | package raftkv 2 | 3 | import "time" 4 | 5 | const ( 6 | OK = "OK" 7 | ErrNoKey = "ErrNoKey" 8 | ErrNotLeader = "ErrNotLeader" 9 | ErrInvalidOp = "ErrInvalidOp" 10 | ErrDuplicateReq = "ErrDuplicateReq" 11 | ) 12 | 13 | type Err string 14 | 15 | // Put or Append 16 | type PutAppendArgs struct { 17 | Key string 18 | Value string 19 | Op string // "Put" or "Append" 20 | // You'll have to add definitions here. 21 | // Field names must start with capital letters, 22 | // otherwise RPC will break. 23 | ID int32 // client id 24 | ReqID int64 25 | } 26 | 27 | type PutAppendReply struct { 28 | WrongLeader bool 29 | Err Err 30 | ID int32 31 | RspID int64 32 | } 33 | 34 | type GetArgs struct { 35 | Key string 36 | // You'll have to add definitions here. 37 | ID int32 38 | ReqID int64 39 | } 40 | 41 | type GetReply struct { 42 | WrongLeader bool 43 | Err Err 44 | Value string 45 | ID int32 46 | RspID int64 47 | } 48 | 49 | const RpcTimeout time.Duration = 1000 * time.Millisecond 50 | 51 | -------------------------------------------------------------------------------- /src/kvraft/config.go: -------------------------------------------------------------------------------- 1 | package raftkv 2 | 3 | import "labrpc" 4 | import "testing" 5 | import "os" 6 | 7 | // import "log" 8 | import crand "crypto/rand" 9 | import "math/big" 10 | import "math/rand" 11 | import "encoding/base64" 12 | import "sync" 13 | import "runtime" 14 | import "raft" 15 | import "fmt" 16 | import "time" 17 | import "sync/atomic" 18 | 19 | func randstring(n int) string { 20 | b := make([]byte, 2*n) 21 | crand.Read(b) 22 | s := base64.URLEncoding.EncodeToString(b) 23 | return s[0:n] 24 | } 25 | 26 | func makeSeed() int64 { 27 | max := big.NewInt(int64(1) << 62) 28 | bigx, _ := crand.Int(crand.Reader, max) 29 | x := bigx.Int64() 30 | return x 31 | } 32 | 33 | // Randomize server handles 34 | func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd { 35 | sa := make([]*labrpc.ClientEnd, len(kvh)) 36 | copy(sa, kvh) 37 | for i := range sa { 38 | j := rand.Intn(i + 1) 39 | sa[i], sa[j] = sa[j], sa[i] 40 | } 41 | return sa 42 | } 43 | 44 | type config struct { 45 | mu sync.Mutex 46 | t *testing.T 47 | net *labrpc.Network 48 | n int 49 | kvservers []*KVServer 50 | saved []*raft.Persister 51 | endnames [][]string // names of each server's sending ClientEnds 52 | clerks map[*Clerk][]string 53 | nextClientId int 54 | maxraftstate int 55 | start time.Time // time at which make_config() was called 56 | // begin()/end() statistics 57 | t0 time.Time // time at which test_test.go called cfg.begin() 58 | rpcs0 int // rpcTotal() at start of test 59 | ops int32 // number of clerk get/put/append method calls 60 | } 61 | 62 | func (cfg *config) checkTimeout() { 63 | // enforce a two minute real-time limit on each test 64 | if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second { 65 | cfg.t.Fatal("test took longer than 120 seconds") 66 | } 67 | } 68 | 69 | func (cfg *config) cleanup() { 70 | cfg.mu.Lock() 71 | defer cfg.mu.Unlock() 72 | for i := 0; i < len(cfg.kvservers); i++ { 73 | if cfg.kvservers[i] != nil { 74 | cfg.kvservers[i].Kill() 75 | } 76 | } 77 | cfg.net.Cleanup() 78 | cfg.checkTimeout() 79 | } 80 | 81 | // Maximum log size across all servers 82 | func (cfg *config) LogSize() int { 83 | logsize := 0 84 | for i := 0; i < cfg.n; i++ { 85 | n := cfg.saved[i].RaftStateSize() 86 | if n > logsize { 87 | logsize = n 88 | } 89 | } 90 | return logsize 91 | } 92 | 93 | // Maximum snapshot size across all servers 94 | func (cfg *config) SnapshotSize() int { 95 | snapshotsize := 0 96 | for i := 0; i < cfg.n; i++ { 97 | n := cfg.saved[i].SnapshotSize() 98 | if n > snapshotsize { 99 | snapshotsize = n 100 | } 101 | } 102 | return snapshotsize 103 | } 104 | 105 | // attach server i to servers listed in to 106 | // caller must hold cfg.mu 107 | func (cfg *config) connectUnlocked(i int, to []int) { 108 | // log.Printf("connect peer %d to %v\n", i, to) 109 | 110 | // outgoing socket files 111 | for j := 0; j < len(to); j++ { 112 | endname := cfg.endnames[i][to[j]] 113 | cfg.net.Enable(endname, true) 114 | } 115 | 116 | // incoming socket files 117 | for j := 0; j < len(to); j++ { 118 | endname := cfg.endnames[to[j]][i] 119 | cfg.net.Enable(endname, true) 120 | } 121 | } 122 | 123 | func (cfg *config) connect(i int, to []int) { 124 | cfg.mu.Lock() 125 | defer cfg.mu.Unlock() 126 | cfg.connectUnlocked(i, to) 127 | } 128 | 129 | // detach server i from the servers listed in from 130 | // caller must hold cfg.mu 131 | func (cfg *config) disconnectUnlocked(i int, from []int) { 132 | // log.Printf("disconnect peer %d from %v\n", i, from) 133 | 134 | // outgoing socket files 135 | for j := 0; j < len(from); j++ { 136 | if cfg.endnames[i] != nil { 137 | endname := cfg.endnames[i][from[j]] 138 | cfg.net.Enable(endname, false) 139 | } 140 | } 141 | 142 | // incoming socket files 143 | for j := 0; j < len(from); j++ { 144 | if cfg.endnames[j] != nil { 145 | endname := cfg.endnames[from[j]][i] 146 | cfg.net.Enable(endname, false) 147 | } 148 | } 149 | } 150 | 151 | func (cfg *config) disconnect(i int, from []int) { 152 | cfg.mu.Lock() 153 | defer cfg.mu.Unlock() 154 | cfg.disconnectUnlocked(i, from) 155 | } 156 | 157 | func (cfg *config) All() []int { 158 | all := make([]int, cfg.n) 159 | for i := 0; i < cfg.n; i++ { 160 | all[i] = i 161 | } 162 | return all 163 | } 164 | 165 | func (cfg *config) ConnectAll() { 166 | cfg.mu.Lock() 167 | defer cfg.mu.Unlock() 168 | for i := 0; i < cfg.n; i++ { 169 | cfg.connectUnlocked(i, cfg.All()) 170 | } 171 | } 172 | 173 | // Sets up 2 partitions with connectivity between servers in each partition. 174 | func (cfg *config) partition(p1 []int, p2 []int) { 175 | cfg.mu.Lock() 176 | defer cfg.mu.Unlock() 177 | // log.Printf("partition servers into: %v %v\n", p1, p2) 178 | for i := 0; i < len(p1); i++ { 179 | cfg.disconnectUnlocked(p1[i], p2) 180 | cfg.connectUnlocked(p1[i], p1) 181 | } 182 | for i := 0; i < len(p2); i++ { 183 | cfg.disconnectUnlocked(p2[i], p1) 184 | cfg.connectUnlocked(p2[i], p2) 185 | } 186 | } 187 | 188 | // Create a clerk with clerk specific server names. 189 | // Give it connections to all of the servers, but for 190 | // now enable only connections to servers in to[]. 191 | func (cfg *config) makeClient(to []int) *Clerk { 192 | cfg.mu.Lock() 193 | defer cfg.mu.Unlock() 194 | 195 | // a fresh set of ClientEnds. 196 | ends := make([]*labrpc.ClientEnd, cfg.n) 197 | endnames := make([]string, cfg.n) 198 | for j := 0; j < cfg.n; j++ { 199 | endnames[j] = randstring(20) 200 | ends[j] = cfg.net.MakeEnd(endnames[j]) 201 | cfg.net.Connect(endnames[j], j) 202 | } 203 | 204 | ck := MakeClerk(random_handles(ends)) 205 | cfg.clerks[ck] = endnames 206 | cfg.nextClientId++ 207 | cfg.ConnectClientUnlocked(ck, to) 208 | return ck 209 | } 210 | 211 | func (cfg *config) deleteClient(ck *Clerk) { 212 | cfg.mu.Lock() 213 | defer cfg.mu.Unlock() 214 | 215 | v := cfg.clerks[ck] 216 | for i := 0; i < len(v); i++ { 217 | os.Remove(v[i]) 218 | } 219 | delete(cfg.clerks, ck) 220 | } 221 | 222 | // caller should hold cfg.mu 223 | func (cfg *config) ConnectClientUnlocked(ck *Clerk, to []int) { 224 | // log.Printf("ConnectClient %v to %v\n", ck, to) 225 | endnames := cfg.clerks[ck] 226 | for j := 0; j < len(to); j++ { 227 | s := endnames[to[j]] 228 | cfg.net.Enable(s, true) 229 | } 230 | } 231 | 232 | func (cfg *config) ConnectClient(ck *Clerk, to []int) { 233 | cfg.mu.Lock() 234 | defer cfg.mu.Unlock() 235 | cfg.ConnectClientUnlocked(ck, to) 236 | } 237 | 238 | // caller should hold cfg.mu 239 | func (cfg *config) DisconnectClientUnlocked(ck *Clerk, from []int) { 240 | // log.Printf("DisconnectClient %v from %v\n", ck, from) 241 | endnames := cfg.clerks[ck] 242 | for j := 0; j < len(from); j++ { 243 | s := endnames[from[j]] 244 | cfg.net.Enable(s, false) 245 | } 246 | } 247 | 248 | func (cfg *config) DisconnectClient(ck *Clerk, from []int) { 249 | cfg.mu.Lock() 250 | defer cfg.mu.Unlock() 251 | cfg.DisconnectClientUnlocked(ck, from) 252 | } 253 | 254 | // Shutdown a server by isolating it 255 | func (cfg *config) ShutdownServer(i int) { 256 | cfg.mu.Lock() 257 | defer cfg.mu.Unlock() 258 | 259 | cfg.disconnectUnlocked(i, cfg.All()) 260 | 261 | // disable client connections to the server. 262 | // it's important to do this before creating 263 | // the new Persister in saved[i], to avoid 264 | // the possibility of the server returning a 265 | // positive reply to an Append but persisting 266 | // the result in the superseded Persister. 267 | cfg.net.DeleteServer(i) 268 | 269 | // a fresh persister, in case old instance 270 | // continues to update the Persister. 271 | // but copy old persister's content so that we always 272 | // pass Make() the last persisted state. 273 | if cfg.saved[i] != nil { 274 | cfg.saved[i] = cfg.saved[i].Copy() 275 | } 276 | 277 | kv := cfg.kvservers[i] 278 | if kv != nil { 279 | cfg.mu.Unlock() 280 | kv.Kill() 281 | cfg.mu.Lock() 282 | cfg.kvservers[i] = nil 283 | } 284 | } 285 | 286 | // If restart servers, first call ShutdownServer 287 | func (cfg *config) StartServer(i int) { 288 | cfg.mu.Lock() 289 | 290 | // a fresh set of outgoing ClientEnd names. 291 | cfg.endnames[i] = make([]string, cfg.n) 292 | for j := 0; j < cfg.n; j++ { 293 | cfg.endnames[i][j] = randstring(20) 294 | } 295 | 296 | // a fresh set of ClientEnds. 297 | ends := make([]*labrpc.ClientEnd, cfg.n) 298 | for j := 0; j < cfg.n; j++ { 299 | ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j]) 300 | cfg.net.Connect(cfg.endnames[i][j], j) 301 | } 302 | 303 | // a fresh persister, so old instance doesn't overwrite 304 | // new instance's persisted state. 305 | // give the fresh persister a copy of the old persister's 306 | // state, so that the spec is that we pass StartKVServer() 307 | // the last persisted state. 308 | if cfg.saved[i] != nil { 309 | cfg.saved[i] = cfg.saved[i].Copy() 310 | } else { 311 | cfg.saved[i] = raft.MakePersister() 312 | } 313 | cfg.mu.Unlock() 314 | 315 | cfg.kvservers[i] = StartKVServer(ends, i, cfg.saved[i], cfg.maxraftstate) 316 | 317 | kvsvc := labrpc.MakeService(cfg.kvservers[i]) 318 | rfsvc := labrpc.MakeService(cfg.kvservers[i].rf) 319 | srv := labrpc.MakeServer() 320 | srv.AddService(kvsvc) 321 | srv.AddService(rfsvc) 322 | cfg.net.AddServer(i, srv) 323 | } 324 | 325 | func (cfg *config) Leader() (bool, int) { 326 | cfg.mu.Lock() 327 | defer cfg.mu.Unlock() 328 | 329 | for i := 0; i < cfg.n; i++ { 330 | _, is_leader := cfg.kvservers[i].rf.GetState() 331 | if is_leader { 332 | return true, i 333 | } 334 | } 335 | return false, 0 336 | } 337 | 338 | // Partition servers into 2 groups and put current leader in minority 339 | func (cfg *config) make_partition() ([]int, []int) { 340 | _, l := cfg.Leader() 341 | p1 := make([]int, cfg.n/2+1) 342 | p2 := make([]int, cfg.n/2) 343 | j := 0 344 | for i := 0; i < cfg.n; i++ { 345 | if i != l { 346 | if j < len(p1) { 347 | p1[j] = i 348 | } else { 349 | p2[j-len(p1)] = i 350 | } 351 | j++ 352 | } 353 | } 354 | p2[len(p2)-1] = l 355 | return p1, p2 356 | } 357 | 358 | var ncpu_once sync.Once 359 | 360 | func make_config(t *testing.T, n int, unreliable bool, maxraftstate int) *config { 361 | ncpu_once.Do(func() { 362 | if runtime.NumCPU() < 2 { 363 | fmt.Printf("warning: only one CPU, which may conceal locking bugs\n") 364 | } 365 | rand.Seed(makeSeed()) 366 | }) 367 | runtime.GOMAXPROCS(4) 368 | cfg := &config{} 369 | cfg.t = t 370 | cfg.net = labrpc.MakeNetwork() 371 | cfg.n = n 372 | cfg.kvservers = make([]*KVServer, cfg.n) 373 | cfg.saved = make([]*raft.Persister, cfg.n) 374 | cfg.endnames = make([][]string, cfg.n) 375 | cfg.clerks = make(map[*Clerk][]string) 376 | cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid 377 | cfg.maxraftstate = maxraftstate 378 | cfg.start = time.Now() 379 | 380 | // create a full set of KV servers. 381 | for i := 0; i < cfg.n; i++ { 382 | cfg.StartServer(i) 383 | } 384 | 385 | cfg.ConnectAll() 386 | 387 | cfg.net.Reliable(!unreliable) 388 | 389 | return cfg 390 | } 391 | 392 | func (cfg *config) rpcTotal() int { 393 | return cfg.net.GetTotalCount() 394 | } 395 | 396 | // start a Test. 397 | // print the Test message. 398 | // e.g. cfg.begin("Test (2B): RPC counts aren't too high") 399 | func (cfg *config) begin(description string) { 400 | fmt.Printf("%s ...\n", description) 401 | cfg.t0 = time.Now() 402 | cfg.rpcs0 = cfg.rpcTotal() 403 | atomic.StoreInt32(&cfg.ops, 0) 404 | } 405 | 406 | func (cfg *config) op() { 407 | atomic.AddInt32(&cfg.ops, 1) 408 | } 409 | 410 | // end a Test -- the fact that we got here means there 411 | // was no failure. 412 | // print the Passed message, 413 | // and some performance numbers. 414 | func (cfg *config) end() { 415 | cfg.checkTimeout() 416 | if cfg.t.Failed() == false { 417 | t := time.Since(cfg.t0).Seconds() // real time 418 | npeers := cfg.n // number of Raft peers 419 | nrpc := cfg.rpcTotal() - cfg.rpcs0 // number of RPC sends 420 | ops := atomic.LoadInt32(&cfg.ops) // number of clerk get/put/append calls 421 | 422 | fmt.Printf(" ... Passed --") 423 | fmt.Printf(" %4.1f %d %5d %4d\n", t, npeers, nrpc, ops) 424 | } 425 | } 426 | -------------------------------------------------------------------------------- /src/kvraft/lab3: -------------------------------------------------------------------------------- 1 | Lab 3: 容错的KV服务 2 | 3 | 介绍 4 | 5 | 在这个lab你将使用lab2中的raft库构建一个容错的kv存储。这个kv服务将是一个复制状态机,由一组kv服务器组成,它们之间通过raft日志协调。只要有过半节点存活, 6 | 你的kv服务就应该能正常工作。 7 | 8 | 你的系统将由client和kv服务组成,每个kv服务也作为一个raft peer的角色。客户端向服务器发送Put,Append,Get,服务器将这些调用转存到raft日志,并按序执行; 9 | 一个客户端能够向任意的某个服务节点发送请求,但是应该重试其他服务器如果当前的不是leader。如果操作作为raft日志commit了,操作结果应该回复给客户端。如果 10 | 操作失败了,返回错误,客户端重试其他服务节点。 11 | 12 | 本实验由两部分。A部分,实现的时候不需要考虑日志快照压缩。实验B部分,需要实现snapshot。 13 | 14 | 你应该仔细读一下paper的7和8章节; 15 | 你可以给Raft ApplyMsg添加字段,也可以给Raft RPC比如AppendEntries添加字段。 16 | 17 | Part A: kv服务,不需要日志压缩 18 | 19 | 服务支持三种rpc: Put(k, v), Append(k, arg), Get(k); 若k不存在,Append相当于Put。 20 | 21 | 你的kvraft客户端代码(src/kvraft/client.go的clerk类)应该尝试不同的kv服务器直到返回成功;只要客户端可以和主分区的raft leader节点通信,操作 最终一定会成功。 22 | 23 | 你的第一个任务是实现一个在无消息丢失,无服务失败情况下的解决方案。你的服务必须保证Get,Put,Append返回linearizable的结果。即,调用put get append的变动对 24 | 所有客户端看上去是一致的,它们以一个特定的顺序执行,不管是否有服务失败或者leader变化。一个put之后执行的get方法一定能够看到最近一次put的值。完整的调用应该 25 | 有exactly-once语义。 26 | 27 | 一个可行的计划是,先用kvraft通过Raft去达成一致性的"value"信息去填充server.go中的Op结构体,然后实现putappend和get处理函数;代码在server.go。 28 | 处理函数应该使用Start将Op加入raft日志,当日志被commit时,回复客户端;注意,你不能执行一个操作,直到被commit到日志(即当它到达applyCh的时候); 29 | 30 | 当你通过第一个测试"one client"的时候就完成了任务。如果你实现的足够老练,那么"concurrent clients"也通过了。 31 | 32 | 你的kvraft服务之间不应该直接通信,而是通过raft日志通信。 33 | 34 | 调用Start()之后,你的kvraft服务需要等待Raft达成共识;达成共识的命令会到达applyCh;你应该仔细思考怎么组织代码让它持续读取applyCh,当putappend和get处理器 35 | 提交命令到raft日志的时候。小心kv服务和raft库之间的死锁。 36 | (实际就是等待applyCh上的消息,底层raft在commit日志后,会向applyCh发命令,通知将日志命令应用到用户的状态机) 37 | 38 | 你的方案需要处理leader调用了Start(),但是在请求被commit之前就失去了leader身份的情况。这种情况下,你应该让客户端重发请求到一个不同的服务进程, 39 | 直到发现新的leader。一个方法是leader通过发现一个与Start返回的index位置相同的不同请求来意识到自己失去了leader身份,或者Raft.GetState()返回的term 40 | 变化了。如果ex leader自己发生了分区,它不会知道新的leader;但是在相同分区的客户端也不能与新leader通信,所以让client永远等下去是合理的,直到 41 | 分区愈合。 42 | 43 | 你可能需要修改Clerk来记住最近的leader,避免浪费时间探测leader。 44 | 一个kvraft服务不应该完成一个Get RPC如果它不是majority部分的节点。所以它不会返回stale数据; 45 | 一个简单方案是,让每一个Get也提交Raft日志中,这样你不必要实现第8节介绍的关于只读操作的优化。 46 | 47 | 假定:请求是串行的,不允许pipeline请求。 48 | 49 | BERT YOUNG: 50 | 关于处理重复客户端请求的问题: 51 | 为了便于思考,将整个kv服务视作一个单机服务: 52 | 首先,与client直接交互的kvraft节点,每个节点视作一个worker线程; 53 | 每个线程有自己的raft实例维护一份raft日志,由于强一致性,我们将raft实例视为只有一个,多个 54 | worker线程往里提交命令(即调用raft.Start()).(我们可以将raft实例视作一个慢IO设备,毕竟达成共识需要时间:) 55 | raft实例会将commit的日志命令项提交给应用的状态机(通过applyCh); 56 | 为了避免重复请求,每个worker线程会记录自己apply过的每个客户端的最大命令ID(单调递增的), 57 | 小于等于它的认为是重复请求;命令只有落地到状态机,才会更新ID记录。 58 | 所以关于请求去重就比较好理解了: 59 | 当worker线程(kvraft 的rpc接收处理入口那)接收到客户端请求,先简单检查一下是否重复请求,这步检查 60 | 没有也可以,但检查了是个很大的优化。 61 | 不重复则提交请求给raft实例,当raft实例处理完成(请求命令日志项被commit),则通知我们应用层的状态机; 62 | 因此每个worker线程的状态机只需要此时尝试将命令ID记录,失败则是重复ID,不更改状态机; 63 | 否则更改状态机,ID记录也更新了; 64 | 65 | 思考一个场景:ABC三个worker线程,A是leader接收请求,准备放在raft实例的第一个槽,但是A发生了 66 | 分区,无法访问B和C了;所以这个请求无法commit,A陷入等待(等待请求被commit); 67 | 然后客户端自己重试发给了B,此时B已经称为leader,处理请求,也准备放在raft实例的第一个槽;最终成功; 68 | 当分区愈合时,A称为B的follower,最终收到log index 1的apply,A不再等待,但是发现返回的日志项并不是 69 | 自己投递的日志项,A知道自己是个stale leader,请求处理失败了;事实上该请求是第二次重试到B线程才成功的。 70 | 71 | 72 | 73 | Part B: kv服务,需要日志压缩 74 | 75 | kvraft服务需要经常将当前状态作为snapshot持久化,并丢弃持久化之前的日志。当服务重启时,服务首先读取snapshot,然后replay之后的日志。sec7介绍了快照机制。 76 | 77 | 你应该花一些时间设计raft库和你的服务之间的接口,让raft库能够丢弃日志。思考下当往log尾部存储的时候,你的raft将怎么操作,怎样丢弃旧日志。 78 | 79 | kvraft测试将maxraftstate传递给你的StartKVServer(), maxraftstate表示raft持久化状态的字节最大大小,包括log,但不包括快照。 80 | 你应该用maxraftstate和persister.RaftStateSize()做比较。当kv服务检测到大小超过限制,就应该保存快照,通知raft库产生了快照,所以raft能够抛弃旧日志。 81 | 82 | 你的raft.go很可能将整个log存放在slice。修改一下,能让它抛弃某一个log index之前的所有日志。 83 | Your raft.go probably keeps the entire log in a Go slice. Modify it so that it can be given a log index, discard the entries before that index, and continue operating while storing only log entries after that index. Make sure you pass all the Raft tests after making these changes. 84 | 85 | 修改你的kv服务,能够检测底层raft的日志太大,就启动一次快照,告知raft丢弃日志。用persister.SaveSnapshot()保存每一个快照。不要使用文件。 86 | Modify your kvraft server so that it detects when the persisted Raft state grows too large, and then saves a snapshot and tells Raft that it can discard old log entries. Save each snapshot with persister.SaveSnapshot() (don't use files). 87 | 88 | 修改你的raft leader,当follower需要的日志在leader这里已经被丢弃时,能发送一个InstallSnapshot RPC给follower; 89 | 当follower接受时,你的raft需要发送给kvraft。你可以使用applyCh来实现,看论文中UseSnapshot字段。一个kvraft实例应该在重启的时候从快照初始化状态。 90 | 91 | Modify your Raft leader code to send an InstallSnapshot RPC to a follower when the leader has discarded the log entries the follower needs. When a follower receives an InstallSnapshot RPC, your Raft code will need to send the included snapshot to its kvraft. You can use the applyCh for this purpose — see the UseSnapshot field. A kvraft instance should restore the snapshot from the persister when it re-starts. Your solution is complete when you pass the remaining tests reliably. 92 | 93 | The maxraftstate limit applies to the GOB-encoded bytes your Raft passes to persister.SaveRaftState(). 94 | 95 | Remember that your kvserver must be able to detect duplicate client requests across checkpoints, so any state you are using to detect them must be included in the snapshots. Remember to capitalize all fields of structures stored in the snapshot. 96 | Make sure you pass TestSnapshotRPC before moving on to the other Snapshot tests. 97 | A common source of bugs is for the Raft state and the snapshot state to be inconsistent with each other, particularly after re-starts or InstallSnapshots. You are allowed to add methods to your Raft for kvserver to call to help handle InstallSnapshot RPCs. 98 | 99 | -------------------------------------------------------------------------------- /src/kvraft/server.go: -------------------------------------------------------------------------------- 1 | package raftkv 2 | 3 | import ( 4 | "labgob" 5 | "labrpc" 6 | "log" 7 | "raft" 8 | "sync" 9 | 10 | "bytes" 11 | ) 12 | 13 | const Debug = 0 14 | 15 | func DPrintf(format string, a ...interface{}) (n int, err error) { 16 | if Debug > 0 { 17 | log.Printf(format, a...) 18 | } 19 | return 20 | } 21 | 22 | // real Command 23 | type Op struct { 24 | // Your definitions here. 25 | // Field names must start with capital letters, 26 | // otherwise RPC will break. 27 | Key string 28 | Value string 29 | Operation string // Get Put or Append 30 | 31 | //Request context 32 | ID int32 33 | ReqID int64 34 | } 35 | 36 | type KVServer struct { 37 | mu sync.Mutex 38 | me int 39 | // each kv server has a raft instance 40 | rf *raft.Raft 41 | 42 | // when raft commit log, it'll notify applyCh eventually 43 | // so we listen this applyCh 44 | applyCh chan raft.ApplyMsg 45 | 46 | maxraftstate int // snapshot if < persister.RaftStateSize() 47 | 48 | // Your definitions here. 49 | 50 | // real kv data here 51 | data map[string]string 52 | 53 | // Notify chan for each log index 54 | notifyCh map[int]chan Op 55 | // request records 56 | requests map[int32]int64 // client -> last commited reqID 57 | 58 | // for exit 59 | shutdown chan interface{} 60 | } 61 | 62 | // check if repeated request 63 | func (kv *KVServer) isDuplicated(id int32, reqId int64) bool { 64 | kv.mu.Lock() 65 | defer kv.mu.Unlock() 66 | maxSeenReqId, ok := kv.requests[id] 67 | if ok { 68 | return reqId <= maxSeenReqId 69 | } 70 | return false 71 | } 72 | 73 | // true if update success, imply nonrepeat request can be applied to state machine: eg, data field 74 | func (kv *KVServer) updateIfNotDuplicated(id int32, reqId int64) bool { 75 | // must hold lock outside 76 | 77 | maxSeenReqId, ok := kv.requests[id] 78 | if ok { 79 | if reqId <= maxSeenReqId { 80 | return false 81 | } 82 | } 83 | 84 | kv.requests[id] = reqId 85 | return true 86 | } 87 | 88 | // call raft.Start to commit a command as log entry 89 | func (kv *KVServer) proposeCommand(cmd Op) bool { 90 | kv.mu.Lock() 91 | // lock kv first, think about: 92 | // If no lock with rf.Start, raft maybe very quick to agree. 93 | // Then applyRoutine will not find notifyCh on log index, 94 | // proposeCommand will block on notifyCh forever. 95 | logIndex, _, isLeader := kv.rf.Start(cmd) 96 | if !isLeader { 97 | kv.mu.Unlock() 98 | return false 99 | } 100 | 101 | // wait command to be commited 102 | 103 | // use logIndex because all servers agree on same log index 104 | ch, ok := kv.notifyCh[logIndex] 105 | if !ok { 106 | ch = make(chan Op, 1) 107 | kv.notifyCh[logIndex] = ch 108 | } 109 | kv.mu.Unlock() 110 | 111 | // check 112 | if ch == nil { 113 | panic("FATAL: chan is nil") 114 | } 115 | 116 | // wait on ch forever, because: 117 | // If I lose leadership before commit, may be partioned 118 | // I can't response, so wait until partion healed. 119 | // Eventually a log will be commited on index, then I'm 120 | // awaken, but cmd1 is different from cmd, return failed 121 | // to client. 122 | // If client retry another leader when I waiting, no matter. 123 | select { 124 | case cmd1 := <-ch: 125 | return cmd1 == cmd // if different log, me is not leader 126 | } 127 | 128 | return false 129 | } 130 | 131 | func (kv *KVServer) Get(args *GetArgs, reply *GetReply) { 132 | // Your code here. 133 | // check if leader, useless but efficient 134 | _, isLeader := kv.rf.GetState() 135 | if !isLeader { 136 | reply.WrongLeader = true 137 | reply.Err = ErrNotLeader 138 | return 139 | } 140 | 141 | DPrintf("[server %d] GetRPC isLeader %v, args %v", kv.me, isLeader, args) 142 | reply.WrongLeader = false 143 | reply.Err = "" 144 | reply.ID = args.ID 145 | reply.RspID = args.ReqID 146 | 147 | cmd := Op{} 148 | cmd.Key = args.Key 149 | cmd.Value = "" // no use for Get 150 | cmd.Operation = "Get" 151 | cmd.ID = args.ID 152 | cmd.ReqID = args.ReqID 153 | 154 | // try commit cmd to raft log 155 | succ := kv.proposeCommand(cmd) 156 | if succ { 157 | kv.mu.Lock() 158 | if v, ok := kv.data[args.Key]; ok { 159 | reply.Value = v 160 | } else { 161 | reply.Value = "" 162 | reply.Err = ErrNoKey 163 | } 164 | kv.mu.Unlock() 165 | } else { 166 | reply.WrongLeader = true 167 | reply.Err = ErrNotLeader 168 | } 169 | } 170 | 171 | func (kv *KVServer) PutAppend(args *PutAppendArgs, reply *PutAppendReply) { 172 | // Your code here. 173 | _, isLeader := kv.rf.GetState() 174 | DPrintf("[server %d] PutAppendRPC isLeader %v, args %v", kv.me, isLeader, args) 175 | 176 | reply.WrongLeader = false 177 | reply.Err = "" 178 | reply.ID = args.ID 179 | reply.RspID = args.ReqID 180 | 181 | if !isLeader { 182 | reply.WrongLeader = true 183 | reply.Err = ErrNotLeader 184 | } else { 185 | if args.Op != "Put" && args.Op != "Append" { 186 | reply.Err = ErrInvalidOp 187 | return 188 | } 189 | } 190 | 191 | // check if repeated request, useless but efficient 192 | duplicate := kv.isDuplicated(args.ID, args.ReqID) 193 | if duplicate { 194 | reply.Err = ErrDuplicateReq 195 | return 196 | } 197 | 198 | cmd := Op{} 199 | cmd.Key = args.Key 200 | cmd.Value = args.Value 201 | cmd.Operation = args.Op 202 | cmd.ID = args.ID 203 | cmd.ReqID = args.ReqID 204 | 205 | succ := kv.proposeCommand(cmd) 206 | if !succ { 207 | reply.WrongLeader = true 208 | reply.Err = ErrNotLeader 209 | } 210 | } 211 | 212 | // when raft commited a log entry, it'll notify me 213 | func (kv *KVServer) applyRoutine() { 214 | for { 215 | var op Op 216 | var applyMsg raft.ApplyMsg 217 | 218 | select { 219 | case <-kv.shutdown: 220 | DPrintf("[server %d] shutdown applyRoutine", kv.me) 221 | return 222 | 223 | case applyMsg = <-kv.applyCh: 224 | } 225 | 226 | if !applyMsg.CommandValid { 227 | kv.loadSnapshot(applyMsg.Snapshot) 228 | continue 229 | } 230 | 231 | op, _ = (applyMsg.Command).(Op) 232 | 233 | kv.mu.Lock() 234 | // Follower & Leader: try apply to state machine, fail if duplicated request 235 | if op.Operation == "Put" { 236 | update := kv.updateIfNotDuplicated(op.ID, op.ReqID) 237 | if update { 238 | DPrintf("[server %d] apply for client %d PUT key %s, value %s, logindex %d", kv.me, op.ID, op.Key, op.Value, applyMsg.CommandIndex) 239 | kv.data[op.Key] = op.Value 240 | } 241 | } else if op.Operation == "Append" { 242 | update := kv.updateIfNotDuplicated(op.ID, op.ReqID) 243 | if update { 244 | kv.data[op.Key] += op.Value 245 | DPrintf("[server %d] apply for client %d APPEND key %s, value %s, now %s, logindex %d", kv.me, op.ID, op.Key, op.Value, kv.data[op.Key], applyMsg.CommandIndex) 246 | } 247 | } else { 248 | // Do nothing for Get, should I cached reply? 249 | } 250 | 251 | ch, ok := kv.notifyCh[applyMsg.CommandIndex] 252 | if ok { 253 | //_, isLeader := kv.rf.GetState() 254 | // likely be leader 255 | /* 256 | select { 257 | case <-ch: 258 | default: 259 | } 260 | */ 261 | 262 | ch <- op 263 | } 264 | 265 | if kv.maxraftstate > 0 && kv.rf.RaftStateSize() >= kv.maxraftstate { 266 | DPrintf("(%d) state size %d", kv.me, kv.rf.RaftStateSize()) 267 | // If I keep mu.Lock, the startSnapshot will use raft's lock 268 | // But raft's applyRoutine is keeping lock and apply msg, he will be blocking with held lock. 269 | go kv.startSnapshot(applyMsg.CommandIndex) 270 | } 271 | 272 | kv.mu.Unlock() 273 | } 274 | } 275 | 276 | // for snapshot 277 | func (kv *KVServer) startSnapshot(lastIndex int) { 278 | w := new(bytes.Buffer) 279 | e := labgob.NewEncoder(w) 280 | 281 | kv.mu.Lock() 282 | DPrintf("[server %d] startSnapshot index %d with data %v", kv.me, lastIndex, kv.data) 283 | e.Encode(kv.data) 284 | e.Encode(kv.requests) 285 | kv.mu.Unlock() 286 | 287 | data := w.Bytes() 288 | kv.rf.StartSnapshot(data, lastIndex) 289 | } 290 | 291 | func (kv *KVServer) loadSnapshot(data []byte) { 292 | if data == nil || len(data) < 1 { // bootstrap without any state? 293 | return 294 | } 295 | 296 | r := bytes.NewBuffer(data) 297 | d := labgob.NewDecoder(r) 298 | 299 | kv.mu.Lock() 300 | defer kv.mu.Unlock() 301 | kv.data = make(map[string]string) 302 | kv.requests = make(map[int32]int64) 303 | 304 | d.Decode(&kv.data) 305 | d.Decode(&kv.requests) 306 | DPrintf("[server %d] load snapshot data %v", kv.me, kv.data) 307 | } 308 | 309 | // 310 | // the tester calls Kill() when a KVServer instance won't 311 | // be needed again. you are not required to do anything 312 | // in Kill(), but it might be convenient to (for example) 313 | // turn off debug output from this instance. 314 | // 315 | func (kv *KVServer) Kill() { 316 | kv.rf.Kill() 317 | // Your code here, if desired. 318 | close(kv.shutdown) 319 | } 320 | 321 | // 322 | // servers[] contains the ports of the set of 323 | // servers that will cooperate via Raft to 324 | // form the fault-tolerant key/value service. 325 | // me is the index of the current server in servers[]. 326 | // the k/v server should store snapshots through the underlying Raft 327 | // implementation, which should call persister.SaveStateAndSnapshot() to 328 | // atomically save the Raft state along with the snapshot. 329 | // the k/v server should snapshot when Raft's saved state exceeds maxraftstate bytes, 330 | // in order to allow Raft to garbage-collect its log. if maxraftstate is -1, 331 | // you don't need to snapshot. 332 | // StartKVServer() must return quickly, so it should start goroutines 333 | // for any long-running work. 334 | // 335 | func StartKVServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int) *KVServer { 336 | // call labgob.Register on structures you want 337 | // Go's RPC library to marshall/unmarshall. 338 | labgob.Register(Op{}) 339 | 340 | kv := new(KVServer) 341 | kv.me = me 342 | kv.maxraftstate = maxraftstate 343 | 344 | // You may need initialization code here. 345 | kv.data = make(map[string]string) 346 | kv.requests = make(map[int32]int64) 347 | kv.notifyCh = make(map[int]chan Op) 348 | kv.shutdown = make(chan interface{}, 1) 349 | 350 | kv.applyCh = make(chan raft.ApplyMsg, 1) 351 | kv.rf = raft.Make(servers, me, persister, kv.applyCh) 352 | 353 | // You may need initialization code here. 354 | go kv.applyRoutine() // listen on applyCh, apply op to state machine 355 | 356 | return kv 357 | } 358 | -------------------------------------------------------------------------------- /src/kvraft/snapshot_hints: -------------------------------------------------------------------------------- 1 | *** topic: log compaction and Snapshots (Lab 3B) 2 | 3 | problem: 4 | log will get to be huge -- much larger than state-machine state! 5 | will take a long time to re-play on reboot or send to a new server 6 | 7 | luckily: 8 | a server doesn't need *both* the complete log *and* the service state 9 | the executed part of the log is captured in the state 10 | clients only see the state, not the log 11 | service state usually much smaller, so let's keep just that 12 | 13 | what constrains how a server discards log entries? 14 | can't forget un-committed entries -- might be part of leader's majority 15 | can't forget un-executed entries -- not yet reflected in the state 16 | executed entries might be needed to bring other servers up to date 17 | 18 | solution: service periodically creates persistent "snapshot" 19 | [diagram: service with state, snapshot on disk, raft log, raft persistent] 20 | copy of entire state-machine state as of execution of a specific log entry 21 | e.g. k/v table 22 | service writes snapshot to persistent storage (disk) 23 | service tells Raft it is snapshotted through some log index 24 | Raft discards log before that index 25 | a server can create a snapshot and discard prefix of log at any time 26 | e.g. when log grows too long 27 | 28 | relation of snapshot and log 29 | snapshot reflects only executed log entries 30 | and thus only committed entries 31 | so server will only discard committed prefix of log 32 | anything not known to be committed will remain in log 33 | 34 | so a server's on-disk state consists of: 35 | service's snapshot up to a certain log entry 36 | Raft's persisted log w/ following log entries 37 | the combination is equivalent to the full log 38 | 39 | what happens on crash+restart? 40 | service reads snapshot from disk 41 | Raft reads persisted log from disk 42 | sends service entries that are committed but not in snapshot 43 | 44 | what if a follower lags and leader has discarded past end of follower's log? 45 | nextIndex[i] will back up to start of leader's log 46 | so leader can't repair that follower with AppendEntries RPCs 47 | thus the InstallSnapshot RPC 48 | (Q: why not have leader discard only entries that *all* servers have?) 49 | 50 | what's in an InstallSnapshot RPC? Figures 12, 13 51 | term 52 | lastIncludedIndex 53 | lastIncludedTerm 54 | snapshot data 55 | 56 | what does a follower do w/ InstallSnapshot? 57 | reject if term is old (not the current leader) 58 | reject (ignore) if follower already has last included index/term 59 | it's an old/delayed RPC 60 | empty the log, replace with fake "prev" entry 61 | set lastApplied to lastIncludedIndex 62 | replace service state (e.g. k/v table) with snapshot contents 63 | 64 | note that the state and the operation history are roughly equivalent 65 | designer can choose which to send 66 | e.g. last few operations (log entries) for lagging replica, 67 | but entire state (snapshot) for a replica that has lost its disk. 68 | still, replica repair can be very expensive, and warrants attention 69 | 70 | The Question: 71 | Could a received InstallSnapshot RPC cause the state machine to go 72 | backwards in time? That is, could step 8 in Figure 13 cause the state 73 | machine to be reset so that it reflects fewer executed operations? If 74 | yes, explain how this could happen. If no, explain why it can't 75 | happen. 76 | 77 | *** topic: configuration change (not needed for the labs) 78 | 79 | configuration change (Section 6) 80 | configuration = set of servers 81 | sometimes you need to 82 | move to a new set of servers, or 83 | increase/decrease the number of servers 84 | human initiates configuration change, Raft manages it 85 | we'd like Raft to cope correctly with failure during configuration change 86 | i.e. clients should not notice (except maybe dip in performance) 87 | 88 | why doesn't a straightforward approach work? 89 | suppose each server has the list of servers in the current config 90 | change configuration by telling each server the new list 91 | using some mechanism outside of Raft 92 | problem: they will learn new configuration at different times 93 | example: want to replace S3 with S4 94 | we get as far as telling S1 and S4 that the new config is 1,2,4 95 | S1: 1,2,3 1,2,4 96 | S2: 1,2,3 1,2,3 97 | S3: 1,2,3 1,2,3 98 | S4: 1,2,4 99 | OOPS! now *two* leaders could be elected! 100 | S2 and S3 could elect S2 101 | S1 and S4 could elect S1 102 | 103 | Raft configuration change 104 | idea: "joint consensus" stage that includes *both* old and new configuration 105 | avoids any time when both old and new can choose leader independently 106 | system starts with Cold 107 | system administrator asks the leader to switch to Cnew 108 | Raft has special configuration log entries (sets of server addresses) 109 | each server uses the last configuration in its own log 110 | 1. leader commits Cold,new to a majority of both Cold and Cnew 111 | 2. after Cold,new commits, leader commits Cnew to servers in Cnew 112 | 113 | what if leader crashes at various points in this process? 114 | can we have two leaders for the next term? 115 | if that could happen, each leader must be one of these: 116 | A. in Cold, but does not have Cold,new in log 117 | B. in Cold or Cnew, has Cold,new in log 118 | C. in Cnew, has Cnew in log 119 | we know we can't have A+A or C+C by the usual rules of leader election 120 | A+B? no, since B needs majority from Cold as well as Cnew 121 | A+C? no, since can't proceed to Cnew until Cold,new committed to Cold 122 | B+B? no, since B needs majority from both Cold and Cnew 123 | B+C? no, since B needs majority from Cnew as well as Cold 124 | 125 | good! Raft can switch to a new set of servers w/o risk of two active leaders 126 | 127 | *** topic: performance 128 | 129 | Note: many situations don't require high performance. 130 | key/value store might. 131 | but GFS or MapReduce master might not. 132 | 133 | Most replication systems have similar common-case performance: 134 | One RPC exchange and one disk write per agreement. 135 | So Raft is pretty typical for message complexity. 136 | 137 | Raft makes a few design choices that sacrifice performance for simplicity: 138 | Follower rejects out-of-order AppendEntries RPCs. 139 | Rather than saving for use after hole is filled. 140 | Might be important if network re-orders packets a lot. 141 | No provision for batching or pipelining AppendEntries. 142 | Snapshotting is wasteful for big states. 143 | A slow leader may hurt Raft, e.g. in geo-replication. 144 | 145 | These have a big effect on performance: 146 | Disk writes for persistence. 147 | Message/packet/RPC overhead. 148 | Need to execute logged commands sequentially. 149 | Fast path for read-only operations. 150 | 151 | -------------------------------------------------------------------------------- /src/labgob/labgob.go: -------------------------------------------------------------------------------- 1 | package labgob 2 | 3 | // 4 | // trying to send non-capitalized fields over RPC produces a range of 5 | // misbehavior, including both mysterious incorrect computation and 6 | // outright crashes. so this wrapper around Go's encoding/gob warns 7 | // about non-capitalized field names. 8 | // 9 | 10 | import "encoding/gob" 11 | import "io" 12 | import "reflect" 13 | import "fmt" 14 | import "sync" 15 | import "unicode" 16 | import "unicode/utf8" 17 | 18 | var mu sync.Mutex 19 | var errorCount int // for TestCapital 20 | var checked map[reflect.Type]bool 21 | 22 | type LabEncoder struct { 23 | gob *gob.Encoder 24 | } 25 | 26 | func NewEncoder(w io.Writer) *LabEncoder { 27 | enc := &LabEncoder{} 28 | enc.gob = gob.NewEncoder(w) 29 | return enc 30 | } 31 | 32 | func (enc *LabEncoder) Encode(e interface{}) error { 33 | checkValue(e) 34 | return enc.gob.Encode(e) 35 | } 36 | 37 | func (enc *LabEncoder) EncodeValue(value reflect.Value) error { 38 | checkValue(value.Interface()) 39 | return enc.gob.EncodeValue(value) 40 | } 41 | 42 | type LabDecoder struct { 43 | gob *gob.Decoder 44 | } 45 | 46 | func NewDecoder(r io.Reader) *LabDecoder { 47 | dec := &LabDecoder{} 48 | dec.gob = gob.NewDecoder(r) 49 | return dec 50 | } 51 | 52 | func (dec *LabDecoder) Decode(e interface{}) error { 53 | checkValue(e) 54 | checkDefault(e) 55 | return dec.gob.Decode(e) 56 | } 57 | 58 | func Register(value interface{}) { 59 | checkValue(value) 60 | gob.Register(value) 61 | } 62 | 63 | func RegisterName(name string, value interface{}) { 64 | checkValue(value) 65 | gob.RegisterName(name, value) 66 | } 67 | 68 | func checkValue(value interface{}) { 69 | checkType(reflect.TypeOf(value)) 70 | } 71 | 72 | func checkType(t reflect.Type) { 73 | k := t.Kind() 74 | 75 | mu.Lock() 76 | // only complain once, and avoid recursion. 77 | if checked == nil { 78 | checked = map[reflect.Type]bool{} 79 | } 80 | if checked[t] { 81 | mu.Unlock() 82 | return 83 | } 84 | checked[t] = true 85 | mu.Unlock() 86 | 87 | switch k { 88 | case reflect.Struct: 89 | for i := 0; i < t.NumField(); i++ { 90 | f := t.Field(i) 91 | rune, _ := utf8.DecodeRuneInString(f.Name) 92 | if unicode.IsUpper(rune) == false { 93 | // ta da 94 | fmt.Printf("labgob error: lower-case field %v of %v in RPC or persist/snapshot will break your Raft\n", 95 | f.Name, t.Name()) 96 | mu.Lock() 97 | errorCount += 1 98 | mu.Unlock() 99 | } 100 | checkType(f.Type) 101 | } 102 | return 103 | case reflect.Slice, reflect.Array, reflect.Ptr: 104 | checkType(t.Elem()) 105 | return 106 | case reflect.Map: 107 | checkType(t.Elem()) 108 | checkType(t.Key()) 109 | return 110 | default: 111 | return 112 | } 113 | } 114 | 115 | // 116 | // warn if the value contains non-default values, 117 | // as it would if one sent an RPC but the reply 118 | // struct was already modified. if the RPC reply 119 | // contains default values, GOB won't overwrite 120 | // the non-default value. 121 | // 122 | func checkDefault(value interface{}) { 123 | if value == nil { 124 | return 125 | } 126 | checkDefault1(reflect.ValueOf(value), 1, "") 127 | } 128 | 129 | func checkDefault1(value reflect.Value, depth int, name string) { 130 | if depth > 3 { 131 | return 132 | } 133 | 134 | t := value.Type() 135 | k := t.Kind() 136 | 137 | switch k { 138 | case reflect.Struct: 139 | for i := 0; i < t.NumField(); i++ { 140 | vv := value.Field(i) 141 | name1 := t.Field(i).Name 142 | if name != "" { 143 | name1 = name + "." + name1 144 | } 145 | checkDefault1(vv, depth+1, name1) 146 | } 147 | return 148 | case reflect.Ptr: 149 | if value.IsNil() { 150 | return 151 | } 152 | checkDefault1(value.Elem(), depth+1, name) 153 | return 154 | case reflect.Bool, 155 | reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, 156 | reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, 157 | reflect.Uintptr, reflect.Float32, reflect.Float64, 158 | reflect.String: 159 | if reflect.DeepEqual(reflect.Zero(t).Interface(), value.Interface()) == false { 160 | mu.Lock() 161 | if errorCount < 1 { 162 | what := name 163 | if what == "" { 164 | what = t.Name() 165 | } 166 | // this warning typically arises if code re-uses the same RPC reply 167 | // variable for multiple RPC calls, or if code restores persisted 168 | // state into variable that already have non-default values. 169 | fmt.Printf("labgob warning: Decoding into a non-default variable/field %v may not work\n", 170 | what) 171 | } 172 | errorCount += 1 173 | mu.Unlock() 174 | } 175 | return 176 | } 177 | } 178 | -------------------------------------------------------------------------------- /src/labgob/test_test.go: -------------------------------------------------------------------------------- 1 | package labgob 2 | 3 | import "testing" 4 | 5 | import "bytes" 6 | 7 | type T1 struct { 8 | T1int0 int 9 | T1int1 int 10 | T1string0 string 11 | T1string1 string 12 | } 13 | 14 | type T2 struct { 15 | T2slice []T1 16 | T2map map[int]*T1 17 | T2t3 interface{} 18 | } 19 | 20 | type T3 struct { 21 | T3int999 int 22 | } 23 | 24 | // 25 | // test that we didn't break GOB. 26 | // 27 | func TestGOB(t *testing.T) { 28 | e0 := errorCount 29 | 30 | w := new(bytes.Buffer) 31 | 32 | Register(T3{}) 33 | 34 | { 35 | x0 := 0 36 | x1 := 1 37 | t1 := T1{} 38 | t1.T1int1 = 1 39 | t1.T1string1 = "6.824" 40 | t2 := T2{} 41 | t2.T2slice = []T1{T1{}, t1} 42 | t2.T2map = map[int]*T1{} 43 | t2.T2map[99] = &T1{1, 2, "x", "y"} 44 | t2.T2t3 = T3{999} 45 | 46 | e := NewEncoder(w) 47 | e.Encode(x0) 48 | e.Encode(x1) 49 | e.Encode(t1) 50 | e.Encode(t2) 51 | } 52 | data := w.Bytes() 53 | 54 | { 55 | var x0 int 56 | var x1 int 57 | var t1 T1 58 | var t2 T2 59 | 60 | r := bytes.NewBuffer(data) 61 | d := NewDecoder(r) 62 | if d.Decode(&x0) != nil || 63 | d.Decode(&x1) != nil || 64 | d.Decode(&t1) != nil || 65 | d.Decode(&t2) != nil { 66 | t.Fatalf("Decode failed") 67 | } 68 | 69 | if x0 != 0 { 70 | t.Fatalf("wrong x0 %v\n", x0) 71 | } 72 | if x1 != 1 { 73 | t.Fatalf("wrong x1 %v\n", x1) 74 | } 75 | if t1.T1int0 != 0 { 76 | t.Fatalf("wrong t1.T1int0 %v\n", t1.T1int0) 77 | } 78 | if t1.T1int1 != 1 { 79 | t.Fatalf("wrong t1.T1int1 %v\n", t1.T1int1) 80 | } 81 | if t1.T1string0 != "" { 82 | t.Fatalf("wrong t1.T1string0 %v\n", t1.T1string0) 83 | } 84 | if t1.T1string1 != "6.824" { 85 | t.Fatalf("wrong t1.T1string1 %v\n", t1.T1string1) 86 | } 87 | if len(t2.T2slice) != 2 { 88 | t.Fatalf("wrong t2.T2slice len %v\n", len(t2.T2slice)) 89 | } 90 | if t2.T2slice[1].T1int1 != 1 { 91 | t.Fatalf("wrong slice value\n") 92 | } 93 | if len(t2.T2map) != 1 { 94 | t.Fatalf("wrong t2.T2map len %v\n", len(t2.T2map)) 95 | } 96 | if t2.T2map[99].T1string1 != "y" { 97 | t.Fatalf("wrong map value\n") 98 | } 99 | t3 := (t2.T2t3).(T3) 100 | if t3.T3int999 != 999 { 101 | t.Fatalf("wrong t2.T2t3.T3int999\n") 102 | } 103 | } 104 | 105 | if errorCount != e0 { 106 | t.Fatalf("there were errors, but should not have been") 107 | } 108 | } 109 | 110 | type T4 struct { 111 | Yes int 112 | no int 113 | } 114 | 115 | // 116 | // make sure we check capitalization 117 | // labgob prints one warning during this test. 118 | // 119 | func TestCapital(t *testing.T) { 120 | e0 := errorCount 121 | 122 | v := []map[*T4]int{} 123 | 124 | w := new(bytes.Buffer) 125 | e := NewEncoder(w) 126 | e.Encode(v) 127 | data := w.Bytes() 128 | 129 | var v1 []map[T4]int 130 | r := bytes.NewBuffer(data) 131 | d := NewDecoder(r) 132 | d.Decode(&v1) 133 | 134 | if errorCount != e0+1 { 135 | t.Fatalf("failed to warn about lower-case field") 136 | } 137 | } 138 | 139 | // 140 | // check that we warn when someone sends a default value over 141 | // RPC but the target into which we're decoding holds a non-default 142 | // value, which GOB seems not to overwrite as you'd expect. 143 | // 144 | // labgob does not print a warning. 145 | // 146 | func TestDefault(t *testing.T) { 147 | e0 := errorCount 148 | 149 | type DD struct { 150 | X int 151 | } 152 | 153 | // send a default value... 154 | dd1 := DD{} 155 | 156 | w := new(bytes.Buffer) 157 | e := NewEncoder(w) 158 | e.Encode(dd1) 159 | data := w.Bytes() 160 | 161 | // and receive it into memory that already 162 | // holds non-default values. 163 | reply := DD{99} 164 | 165 | r := bytes.NewBuffer(data) 166 | d := NewDecoder(r) 167 | d.Decode(&reply) 168 | 169 | if errorCount != e0+1 { 170 | t.Fatalf("failed to warn about decoding into non-default value") 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /src/labrpc/test_test.go: -------------------------------------------------------------------------------- 1 | package labrpc 2 | 3 | import "testing" 4 | import "strconv" 5 | import "sync" 6 | import "runtime" 7 | import "time" 8 | import "fmt" 9 | 10 | type JunkArgs struct { 11 | X int 12 | } 13 | type JunkReply struct { 14 | X string 15 | } 16 | 17 | type JunkServer struct { 18 | mu sync.Mutex 19 | log1 []string 20 | log2 []int 21 | } 22 | 23 | func (js *JunkServer) Handler1(args string, reply *int) { 24 | js.mu.Lock() 25 | defer js.mu.Unlock() 26 | js.log1 = append(js.log1, args) 27 | *reply, _ = strconv.Atoi(args) 28 | } 29 | 30 | func (js *JunkServer) Handler2(args int, reply *string) { 31 | js.mu.Lock() 32 | defer js.mu.Unlock() 33 | js.log2 = append(js.log2, args) 34 | *reply = "handler2-" + strconv.Itoa(args) 35 | } 36 | 37 | func (js *JunkServer) Handler3(args int, reply *int) { 38 | js.mu.Lock() 39 | defer js.mu.Unlock() 40 | time.Sleep(20 * time.Second) 41 | *reply = -args 42 | } 43 | 44 | // args is a pointer 45 | func (js *JunkServer) Handler4(args *JunkArgs, reply *JunkReply) { 46 | reply.X = "pointer" 47 | } 48 | 49 | // args is a not pointer 50 | func (js *JunkServer) Handler5(args JunkArgs, reply *JunkReply) { 51 | reply.X = "no pointer" 52 | } 53 | 54 | func TestBasic(t *testing.T) { 55 | runtime.GOMAXPROCS(4) 56 | 57 | rn := MakeNetwork() 58 | defer rn.Cleanup() 59 | 60 | e := rn.MakeEnd("end1-99") 61 | 62 | js := &JunkServer{} 63 | svc := MakeService(js) 64 | 65 | rs := MakeServer() 66 | rs.AddService(svc) 67 | rn.AddServer("server99", rs) 68 | 69 | rn.Connect("end1-99", "server99") 70 | rn.Enable("end1-99", true) 71 | 72 | { 73 | reply := "" 74 | e.Call("JunkServer.Handler2", 111, &reply) 75 | if reply != "handler2-111" { 76 | t.Fatalf("wrong reply from Handler2") 77 | } 78 | } 79 | 80 | { 81 | reply := 0 82 | e.Call("JunkServer.Handler1", "9099", &reply) 83 | if reply != 9099 { 84 | t.Fatalf("wrong reply from Handler1") 85 | } 86 | } 87 | } 88 | 89 | func TestTypes(t *testing.T) { 90 | runtime.GOMAXPROCS(4) 91 | 92 | rn := MakeNetwork() 93 | defer rn.Cleanup() 94 | 95 | e := rn.MakeEnd("end1-99") 96 | 97 | js := &JunkServer{} 98 | svc := MakeService(js) 99 | 100 | rs := MakeServer() 101 | rs.AddService(svc) 102 | rn.AddServer("server99", rs) 103 | 104 | rn.Connect("end1-99", "server99") 105 | rn.Enable("end1-99", true) 106 | 107 | { 108 | var args JunkArgs 109 | var reply JunkReply 110 | // args must match type (pointer or not) of handler. 111 | e.Call("JunkServer.Handler4", &args, &reply) 112 | if reply.X != "pointer" { 113 | t.Fatalf("wrong reply from Handler4") 114 | } 115 | } 116 | 117 | { 118 | var args JunkArgs 119 | var reply JunkReply 120 | // args must match type (pointer or not) of handler. 121 | e.Call("JunkServer.Handler5", args, &reply) 122 | if reply.X != "no pointer" { 123 | t.Fatalf("wrong reply from Handler5") 124 | } 125 | } 126 | } 127 | 128 | // 129 | // does net.Enable(endname, false) really disconnect a client? 130 | // 131 | func TestDisconnect(t *testing.T) { 132 | runtime.GOMAXPROCS(4) 133 | 134 | rn := MakeNetwork() 135 | defer rn.Cleanup() 136 | 137 | e := rn.MakeEnd("end1-99") 138 | 139 | js := &JunkServer{} 140 | svc := MakeService(js) 141 | 142 | rs := MakeServer() 143 | rs.AddService(svc) 144 | rn.AddServer("server99", rs) 145 | 146 | rn.Connect("end1-99", "server99") 147 | 148 | { 149 | reply := "" 150 | e.Call("JunkServer.Handler2", 111, &reply) 151 | if reply != "" { 152 | t.Fatalf("unexpected reply from Handler2") 153 | } 154 | } 155 | 156 | rn.Enable("end1-99", true) 157 | 158 | { 159 | reply := 0 160 | e.Call("JunkServer.Handler1", "9099", &reply) 161 | if reply != 9099 { 162 | t.Fatalf("wrong reply from Handler1") 163 | } 164 | } 165 | } 166 | 167 | // 168 | // test net.GetCount() 169 | // 170 | func TestCounts(t *testing.T) { 171 | runtime.GOMAXPROCS(4) 172 | 173 | rn := MakeNetwork() 174 | defer rn.Cleanup() 175 | 176 | e := rn.MakeEnd("end1-99") 177 | 178 | js := &JunkServer{} 179 | svc := MakeService(js) 180 | 181 | rs := MakeServer() 182 | rs.AddService(svc) 183 | rn.AddServer(99, rs) 184 | 185 | rn.Connect("end1-99", 99) 186 | rn.Enable("end1-99", true) 187 | 188 | for i := 0; i < 17; i++ { 189 | reply := "" 190 | e.Call("JunkServer.Handler2", i, &reply) 191 | wanted := "handler2-" + strconv.Itoa(i) 192 | if reply != wanted { 193 | t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted) 194 | } 195 | } 196 | 197 | n := rn.GetCount(99) 198 | if n != 17 { 199 | t.Fatalf("wrong GetCount() %v, expected 17\n", n) 200 | } 201 | } 202 | 203 | // 204 | // test RPCs from concurrent ClientEnds 205 | // 206 | func TestConcurrentMany(t *testing.T) { 207 | runtime.GOMAXPROCS(4) 208 | 209 | rn := MakeNetwork() 210 | defer rn.Cleanup() 211 | 212 | js := &JunkServer{} 213 | svc := MakeService(js) 214 | 215 | rs := MakeServer() 216 | rs.AddService(svc) 217 | rn.AddServer(1000, rs) 218 | 219 | ch := make(chan int) 220 | 221 | nclients := 20 222 | nrpcs := 10 223 | for ii := 0; ii < nclients; ii++ { 224 | go func(i int) { 225 | n := 0 226 | defer func() { ch <- n }() 227 | 228 | e := rn.MakeEnd(i) 229 | rn.Connect(i, 1000) 230 | rn.Enable(i, true) 231 | 232 | for j := 0; j < nrpcs; j++ { 233 | arg := i*100 + j 234 | reply := "" 235 | e.Call("JunkServer.Handler2", arg, &reply) 236 | wanted := "handler2-" + strconv.Itoa(arg) 237 | if reply != wanted { 238 | t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted) 239 | } 240 | n += 1 241 | } 242 | }(ii) 243 | } 244 | 245 | total := 0 246 | for ii := 0; ii < nclients; ii++ { 247 | x := <-ch 248 | total += x 249 | } 250 | 251 | if total != nclients*nrpcs { 252 | t.Fatalf("wrong number of RPCs completed, got %v, expected %v", total, nclients*nrpcs) 253 | } 254 | 255 | n := rn.GetCount(1000) 256 | if n != total { 257 | t.Fatalf("wrong GetCount() %v, expected %v\n", n, total) 258 | } 259 | } 260 | 261 | // 262 | // test unreliable 263 | // 264 | func TestUnreliable(t *testing.T) { 265 | runtime.GOMAXPROCS(4) 266 | 267 | rn := MakeNetwork() 268 | defer rn.Cleanup() 269 | rn.Reliable(false) 270 | 271 | js := &JunkServer{} 272 | svc := MakeService(js) 273 | 274 | rs := MakeServer() 275 | rs.AddService(svc) 276 | rn.AddServer(1000, rs) 277 | 278 | ch := make(chan int) 279 | 280 | nclients := 300 281 | for ii := 0; ii < nclients; ii++ { 282 | go func(i int) { 283 | n := 0 284 | defer func() { ch <- n }() 285 | 286 | e := rn.MakeEnd(i) 287 | rn.Connect(i, 1000) 288 | rn.Enable(i, true) 289 | 290 | arg := i * 100 291 | reply := "" 292 | ok := e.Call("JunkServer.Handler2", arg, &reply) 293 | if ok { 294 | wanted := "handler2-" + strconv.Itoa(arg) 295 | if reply != wanted { 296 | t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted) 297 | } 298 | n += 1 299 | } 300 | }(ii) 301 | } 302 | 303 | total := 0 304 | for ii := 0; ii < nclients; ii++ { 305 | x := <-ch 306 | total += x 307 | } 308 | 309 | if total == nclients || total == 0 { 310 | t.Fatalf("all RPCs succeeded despite unreliable") 311 | } 312 | } 313 | 314 | // 315 | // test concurrent RPCs from a single ClientEnd 316 | // 317 | func TestConcurrentOne(t *testing.T) { 318 | runtime.GOMAXPROCS(4) 319 | 320 | rn := MakeNetwork() 321 | defer rn.Cleanup() 322 | 323 | js := &JunkServer{} 324 | svc := MakeService(js) 325 | 326 | rs := MakeServer() 327 | rs.AddService(svc) 328 | rn.AddServer(1000, rs) 329 | 330 | e := rn.MakeEnd("c") 331 | rn.Connect("c", 1000) 332 | rn.Enable("c", true) 333 | 334 | ch := make(chan int) 335 | 336 | nrpcs := 20 337 | for ii := 0; ii < nrpcs; ii++ { 338 | go func(i int) { 339 | n := 0 340 | defer func() { ch <- n }() 341 | 342 | arg := 100 + i 343 | reply := "" 344 | e.Call("JunkServer.Handler2", arg, &reply) 345 | wanted := "handler2-" + strconv.Itoa(arg) 346 | if reply != wanted { 347 | t.Fatalf("wrong reply %v from Handler2, expecting %v", reply, wanted) 348 | } 349 | n += 1 350 | }(ii) 351 | } 352 | 353 | total := 0 354 | for ii := 0; ii < nrpcs; ii++ { 355 | x := <-ch 356 | total += x 357 | } 358 | 359 | if total != nrpcs { 360 | t.Fatalf("wrong number of RPCs completed, got %v, expected %v", total, nrpcs) 361 | } 362 | 363 | js.mu.Lock() 364 | defer js.mu.Unlock() 365 | if len(js.log2) != nrpcs { 366 | t.Fatalf("wrong number of RPCs delivered") 367 | } 368 | 369 | n := rn.GetCount(1000) 370 | if n != total { 371 | t.Fatalf("wrong GetCount() %v, expected %v\n", n, total) 372 | } 373 | } 374 | 375 | // 376 | // regression: an RPC that's delayed during Enabled=false 377 | // should not delay subsequent RPCs (e.g. after Enabled=true). 378 | // 379 | func TestRegression1(t *testing.T) { 380 | runtime.GOMAXPROCS(4) 381 | 382 | rn := MakeNetwork() 383 | defer rn.Cleanup() 384 | 385 | js := &JunkServer{} 386 | svc := MakeService(js) 387 | 388 | rs := MakeServer() 389 | rs.AddService(svc) 390 | rn.AddServer(1000, rs) 391 | 392 | e := rn.MakeEnd("c") 393 | rn.Connect("c", 1000) 394 | 395 | // start some RPCs while the ClientEnd is disabled. 396 | // they'll be delayed. 397 | rn.Enable("c", false) 398 | ch := make(chan bool) 399 | nrpcs := 20 400 | for ii := 0; ii < nrpcs; ii++ { 401 | go func(i int) { 402 | ok := false 403 | defer func() { ch <- ok }() 404 | 405 | arg := 100 + i 406 | reply := "" 407 | // this call ought to return false. 408 | e.Call("JunkServer.Handler2", arg, &reply) 409 | ok = true 410 | }(ii) 411 | } 412 | 413 | time.Sleep(100 * time.Millisecond) 414 | 415 | // now enable the ClientEnd and check that an RPC completes quickly. 416 | t0 := time.Now() 417 | rn.Enable("c", true) 418 | { 419 | arg := 99 420 | reply := "" 421 | e.Call("JunkServer.Handler2", arg, &reply) 422 | wanted := "handler2-" + strconv.Itoa(arg) 423 | if reply != wanted { 424 | t.Fatalf("wrong reply %v from Handler2, expecting %v", reply, wanted) 425 | } 426 | } 427 | dur := time.Since(t0).Seconds() 428 | 429 | if dur > 0.03 { 430 | t.Fatalf("RPC took too long (%v) after Enable", dur) 431 | } 432 | 433 | for ii := 0; ii < nrpcs; ii++ { 434 | <-ch 435 | } 436 | 437 | js.mu.Lock() 438 | defer js.mu.Unlock() 439 | if len(js.log2) != 1 { 440 | t.Fatalf("wrong number (%v) of RPCs delivered, expected 1", len(js.log2)) 441 | } 442 | 443 | n := rn.GetCount(1000) 444 | if n != 1 { 445 | t.Fatalf("wrong GetCount() %v, expected %v\n", n, 1) 446 | } 447 | } 448 | 449 | // 450 | // if an RPC is stuck in a server, and the server 451 | // is killed with DeleteServer(), does the RPC 452 | // get un-stuck? 453 | // 454 | func TestKilled(t *testing.T) { 455 | runtime.GOMAXPROCS(4) 456 | 457 | rn := MakeNetwork() 458 | defer rn.Cleanup() 459 | 460 | e := rn.MakeEnd("end1-99") 461 | 462 | js := &JunkServer{} 463 | svc := MakeService(js) 464 | 465 | rs := MakeServer() 466 | rs.AddService(svc) 467 | rn.AddServer("server99", rs) 468 | 469 | rn.Connect("end1-99", "server99") 470 | rn.Enable("end1-99", true) 471 | 472 | doneCh := make(chan bool) 473 | go func() { 474 | reply := 0 475 | ok := e.Call("JunkServer.Handler3", 99, &reply) 476 | doneCh <- ok 477 | }() 478 | 479 | time.Sleep(1000 * time.Millisecond) 480 | 481 | select { 482 | case <-doneCh: 483 | t.Fatalf("Handler3 should not have returned yet") 484 | case <-time.After(100 * time.Millisecond): 485 | } 486 | 487 | rn.DeleteServer("server99") 488 | 489 | select { 490 | case x := <-doneCh: 491 | if x != false { 492 | t.Fatalf("Handler3 returned successfully despite DeleteServer()") 493 | } 494 | case <-time.After(100 * time.Millisecond): 495 | t.Fatalf("Handler3 should return after DeleteServer()") 496 | } 497 | } 498 | 499 | func TestBenchmark(t *testing.T) { 500 | runtime.GOMAXPROCS(4) 501 | 502 | rn := MakeNetwork() 503 | defer rn.Cleanup() 504 | 505 | e := rn.MakeEnd("end1-99") 506 | 507 | js := &JunkServer{} 508 | svc := MakeService(js) 509 | 510 | rs := MakeServer() 511 | rs.AddService(svc) 512 | rn.AddServer("server99", rs) 513 | 514 | rn.Connect("end1-99", "server99") 515 | rn.Enable("end1-99", true) 516 | 517 | t0 := time.Now() 518 | n := 100000 519 | for iters := 0; iters < n; iters++ { 520 | reply := "" 521 | e.Call("JunkServer.Handler2", 111, &reply) 522 | if reply != "handler2-111" { 523 | t.Fatalf("wrong reply from Handler2") 524 | } 525 | } 526 | fmt.Printf("%v for %v\n", time.Since(t0), n) 527 | // march 2016, rtm laptop, 22 microseconds per RPC 528 | } 529 | -------------------------------------------------------------------------------- /src/linearizability/bitset.go: -------------------------------------------------------------------------------- 1 | package linearizability 2 | 3 | type bitset []uint64 4 | 5 | // data layout: 6 | // bits 0-63 are in data[0], the next are in data[1], etc. 7 | 8 | func newBitset(bits uint) bitset { 9 | extra := uint(0) 10 | if bits%64 != 0 { 11 | extra = 1 12 | } 13 | chunks := bits/64 + extra 14 | return bitset(make([]uint64, chunks)) 15 | } 16 | 17 | func (b bitset) clone() bitset { 18 | dataCopy := make([]uint64, len(b)) 19 | copy(dataCopy, b) 20 | return bitset(dataCopy) 21 | } 22 | 23 | func bitsetIndex(pos uint) (uint, uint) { 24 | return pos / 64, pos % 64 25 | } 26 | 27 | func (b bitset) set(pos uint) bitset { 28 | major, minor := bitsetIndex(pos) 29 | b[major] |= (1 << minor) 30 | return b 31 | } 32 | 33 | func (b bitset) clear(pos uint) bitset { 34 | major, minor := bitsetIndex(pos) 35 | b[major] &^= (1 << minor) 36 | return b 37 | } 38 | 39 | func (b bitset) get(pos uint) bool { 40 | major, minor := bitsetIndex(pos) 41 | return b[major]&(1<> 1) 48 | v = (v & 0x3333333333333333) + ((v & 0xCCCCCCCCCCCCCCCC) >> 2) 49 | v = (v & 0x0F0F0F0F0F0F0F0F) + ((v & 0xF0F0F0F0F0F0F0F0) >> 4) 50 | v *= 0x0101010101010101 51 | total += uint((v >> 56) & 0xFF) 52 | } 53 | return total 54 | } 55 | 56 | func (b bitset) hash() uint64 { 57 | hash := uint64(b.popcnt()) 58 | for _, v := range b { 59 | hash ^= v 60 | } 61 | return hash 62 | } 63 | 64 | func (b bitset) equals(b2 bitset) bool { 65 | if len(b) != len(b2) { 66 | return false 67 | } 68 | for i := range b { 69 | if b[i] != b2[i] { 70 | return false 71 | } 72 | } 73 | return true 74 | } 75 | -------------------------------------------------------------------------------- /src/linearizability/linearizability.go: -------------------------------------------------------------------------------- 1 | package linearizability 2 | 3 | import ( 4 | "sort" 5 | "sync/atomic" 6 | "time" 7 | ) 8 | 9 | type entryKind bool 10 | 11 | const ( 12 | callEntry entryKind = false 13 | returnEntry = true 14 | ) 15 | 16 | type entry struct { 17 | kind entryKind 18 | value interface{} 19 | id uint 20 | time int64 21 | } 22 | 23 | type byTime []entry 24 | 25 | func (a byTime) Len() int { 26 | return len(a) 27 | } 28 | 29 | func (a byTime) Swap(i, j int) { 30 | a[i], a[j] = a[j], a[i] 31 | } 32 | 33 | func (a byTime) Less(i, j int) bool { 34 | return a[i].time < a[j].time 35 | } 36 | 37 | func makeEntries(history []Operation) []entry { 38 | var entries []entry = nil 39 | id := uint(0) 40 | for _, elem := range history { 41 | entries = append(entries, entry{ 42 | callEntry, elem.Input, id, elem.Call}) 43 | entries = append(entries, entry{ 44 | returnEntry, elem.Output, id, elem.Return}) 45 | id++ 46 | } 47 | sort.Sort(byTime(entries)) 48 | return entries 49 | } 50 | 51 | type node struct { 52 | value interface{} 53 | match *node // call if match is nil, otherwise return 54 | id uint 55 | next *node 56 | prev *node 57 | } 58 | 59 | func insertBefore(n *node, mark *node) *node { 60 | if mark != nil { 61 | beforeMark := mark.prev 62 | mark.prev = n 63 | n.next = mark 64 | if beforeMark != nil { 65 | n.prev = beforeMark 66 | beforeMark.next = n 67 | } 68 | } 69 | return n 70 | } 71 | 72 | func length(n *node) uint { 73 | l := uint(0) 74 | for n != nil { 75 | n = n.next 76 | l++ 77 | } 78 | return l 79 | } 80 | 81 | func renumber(events []Event) []Event { 82 | var e []Event 83 | m := make(map[uint]uint) // renumbering 84 | id := uint(0) 85 | for _, v := range events { 86 | if r, ok := m[v.Id]; ok { 87 | e = append(e, Event{v.Kind, v.Value, r}) 88 | } else { 89 | e = append(e, Event{v.Kind, v.Value, id}) 90 | m[v.Id] = id 91 | id++ 92 | } 93 | } 94 | return e 95 | } 96 | 97 | func convertEntries(events []Event) []entry { 98 | var entries []entry 99 | for _, elem := range events { 100 | kind := callEntry 101 | if elem.Kind == ReturnEvent { 102 | kind = returnEntry 103 | } 104 | entries = append(entries, entry{kind, elem.Value, elem.Id, -1}) 105 | } 106 | return entries 107 | } 108 | 109 | func makeLinkedEntries(entries []entry) *node { 110 | var root *node = nil 111 | match := make(map[uint]*node) 112 | for i := len(entries) - 1; i >= 0; i-- { 113 | elem := entries[i] 114 | if elem.kind == returnEntry { 115 | entry := &node{value: elem.value, match: nil, id: elem.id} 116 | match[elem.id] = entry 117 | insertBefore(entry, root) 118 | root = entry 119 | } else { 120 | entry := &node{value: elem.value, match: match[elem.id], id: elem.id} 121 | insertBefore(entry, root) 122 | root = entry 123 | } 124 | } 125 | return root 126 | } 127 | 128 | type cacheEntry struct { 129 | linearized bitset 130 | state interface{} 131 | } 132 | 133 | func cacheContains(model Model, cache map[uint64][]cacheEntry, entry cacheEntry) bool { 134 | for _, elem := range cache[entry.linearized.hash()] { 135 | if entry.linearized.equals(elem.linearized) && model.Equal(entry.state, elem.state) { 136 | return true 137 | } 138 | } 139 | return false 140 | } 141 | 142 | type callsEntry struct { 143 | entry *node 144 | state interface{} 145 | } 146 | 147 | func lift(entry *node) { 148 | entry.prev.next = entry.next 149 | entry.next.prev = entry.prev 150 | match := entry.match 151 | match.prev.next = match.next 152 | if match.next != nil { 153 | match.next.prev = match.prev 154 | } 155 | } 156 | 157 | func unlift(entry *node) { 158 | match := entry.match 159 | match.prev.next = match 160 | if match.next != nil { 161 | match.next.prev = match 162 | } 163 | entry.prev.next = entry 164 | entry.next.prev = entry 165 | } 166 | 167 | func checkSingle(model Model, subhistory *node, kill *int32) bool { 168 | n := length(subhistory) / 2 169 | linearized := newBitset(n) 170 | cache := make(map[uint64][]cacheEntry) // map from hash to cache entry 171 | var calls []callsEntry 172 | 173 | state := model.Init() 174 | headEntry := insertBefore(&node{value: nil, match: nil, id: ^uint(0)}, subhistory) 175 | entry := subhistory 176 | for headEntry.next != nil { 177 | if atomic.LoadInt32(kill) != 0 { 178 | return false 179 | } 180 | if entry.match != nil { 181 | matching := entry.match // the return entry 182 | ok, newState := model.Step(state, entry.value, matching.value) 183 | if ok { 184 | newLinearized := linearized.clone().set(entry.id) 185 | newCacheEntry := cacheEntry{newLinearized, newState} 186 | if !cacheContains(model, cache, newCacheEntry) { 187 | hash := newLinearized.hash() 188 | cache[hash] = append(cache[hash], newCacheEntry) 189 | calls = append(calls, callsEntry{entry, state}) 190 | state = newState 191 | linearized.set(entry.id) 192 | lift(entry) 193 | entry = headEntry.next 194 | } else { 195 | entry = entry.next 196 | } 197 | } else { 198 | entry = entry.next 199 | } 200 | } else { 201 | if len(calls) == 0 { 202 | return false 203 | } 204 | callsTop := calls[len(calls)-1] 205 | entry = callsTop.entry 206 | state = callsTop.state 207 | linearized.clear(entry.id) 208 | calls = calls[:len(calls)-1] 209 | unlift(entry) 210 | entry = entry.next 211 | } 212 | } 213 | return true 214 | } 215 | 216 | func fillDefault(model Model) Model { 217 | if model.Partition == nil { 218 | model.Partition = NoPartition 219 | } 220 | if model.PartitionEvent == nil { 221 | model.PartitionEvent = NoPartitionEvent 222 | } 223 | if model.Equal == nil { 224 | model.Equal = ShallowEqual 225 | } 226 | return model 227 | } 228 | 229 | func CheckOperations(model Model, history []Operation) bool { 230 | return CheckOperationsTimeout(model, history, 0) 231 | } 232 | 233 | // timeout = 0 means no timeout 234 | // if this operation times out, then a false positive is possible 235 | func CheckOperationsTimeout(model Model, history []Operation, timeout time.Duration) bool { 236 | model = fillDefault(model) 237 | partitions := model.Partition(history) 238 | ok := true 239 | results := make(chan bool) 240 | kill := int32(0) 241 | for _, subhistory := range partitions { 242 | l := makeLinkedEntries(makeEntries(subhistory)) 243 | go func() { 244 | results <- checkSingle(model, l, &kill) 245 | }() 246 | } 247 | var timeoutChan <-chan time.Time 248 | if timeout > 0 { 249 | timeoutChan = time.After(timeout) 250 | } 251 | count := 0 252 | loop: 253 | for { 254 | select { 255 | case result := <-results: 256 | ok = ok && result 257 | if !ok { 258 | atomic.StoreInt32(&kill, 1) 259 | break loop 260 | } 261 | count++ 262 | if count >= len(partitions) { 263 | break loop 264 | } 265 | case <-timeoutChan: 266 | break loop // if we time out, we might get a false positive 267 | } 268 | } 269 | return ok 270 | } 271 | 272 | func CheckEvents(model Model, history []Event) bool { 273 | return CheckEventsTimeout(model, history, 0) 274 | } 275 | 276 | // timeout = 0 means no timeout 277 | // if this operation times out, then a false positive is possible 278 | func CheckEventsTimeout(model Model, history []Event, timeout time.Duration) bool { 279 | model = fillDefault(model) 280 | partitions := model.PartitionEvent(history) 281 | ok := true 282 | results := make(chan bool) 283 | kill := int32(0) 284 | for _, subhistory := range partitions { 285 | l := makeLinkedEntries(convertEntries(renumber(subhistory))) 286 | go func() { 287 | results <- checkSingle(model, l, &kill) 288 | }() 289 | } 290 | var timeoutChan <-chan time.Time 291 | if timeout > 0 { 292 | timeoutChan = time.After(timeout) 293 | } 294 | count := 0 295 | loop: 296 | for { 297 | select { 298 | case result := <-results: 299 | ok = ok && result 300 | if !ok { 301 | atomic.StoreInt32(&kill, 1) 302 | break loop 303 | } 304 | count++ 305 | if count >= len(partitions) { 306 | break loop 307 | } 308 | case <-timeoutChan: 309 | break loop // if we time out, we might get a false positive 310 | } 311 | } 312 | return ok 313 | } 314 | -------------------------------------------------------------------------------- /src/linearizability/model.go: -------------------------------------------------------------------------------- 1 | package linearizability 2 | 3 | type Operation struct { 4 | Input interface{} 5 | Call int64 // invocation time 6 | Output interface{} 7 | Return int64 // response time 8 | } 9 | 10 | type EventKind bool 11 | 12 | const ( 13 | CallEvent EventKind = false 14 | ReturnEvent EventKind = true 15 | ) 16 | 17 | type Event struct { 18 | Kind EventKind 19 | Value interface{} 20 | Id uint 21 | } 22 | 23 | type Model struct { 24 | // Partition functions, such that a history is linearizable if an only 25 | // if each partition is linearizable. If you don't want to implement 26 | // this, you can always use the `NoPartition` functions implemented 27 | // below. 28 | Partition func(history []Operation) [][]Operation 29 | PartitionEvent func(history []Event) [][]Event 30 | // Initial state of the system. 31 | Init func() interface{} 32 | // Step function for the system. Returns whether or not the system 33 | // could take this step with the given inputs and outputs and also 34 | // returns the new state. This should not mutate the existing state. 35 | Step func(state interface{}, input interface{}, output interface{}) (bool, interface{}) 36 | // Equality on states. If you are using a simple data type for states, 37 | // you can use the `ShallowEqual` function implemented below. 38 | Equal func(state1, state2 interface{}) bool 39 | } 40 | 41 | func NoPartition(history []Operation) [][]Operation { 42 | return [][]Operation{history} 43 | } 44 | 45 | func NoPartitionEvent(history []Event) [][]Event { 46 | return [][]Event{history} 47 | } 48 | 49 | func ShallowEqual(state1, state2 interface{}) bool { 50 | return state1 == state2 51 | } 52 | -------------------------------------------------------------------------------- /src/linearizability/models.go: -------------------------------------------------------------------------------- 1 | package linearizability 2 | 3 | // kv model 4 | 5 | type KvInput struct { 6 | Op uint8 // 0 => get, 1 => put, 2 => append 7 | Key string 8 | Value string 9 | } 10 | 11 | type KvOutput struct { 12 | Value string 13 | } 14 | 15 | func KvModel() Model { 16 | return Model { 17 | Partition: func(history []Operation) [][]Operation { 18 | m := make(map[string][]Operation) 19 | for _, v := range history { 20 | key := v.Input.(KvInput).Key 21 | m[key] = append(m[key], v) 22 | } 23 | var ret [][]Operation 24 | for _, v := range m { 25 | ret = append(ret, v) 26 | } 27 | return ret 28 | }, 29 | Init: func() interface{} { 30 | // note: we are modeling a single key's value here; 31 | // we're partitioning by key, so this is okay 32 | return "" 33 | }, 34 | Step: func(state, input, output interface{}) (bool, interface{}) { 35 | inp := input.(KvInput) 36 | out := output.(KvOutput) 37 | st := state.(string) 38 | if inp.Op == 0 { 39 | // get 40 | return out.Value == st, state 41 | } else if inp.Op == 1 { 42 | // put 43 | return true, inp.Value 44 | } else { 45 | // append 46 | return true, (st + inp.Value) 47 | } 48 | }, 49 | Equal: ShallowEqual, 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/main/diskvd.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // start a diskvd server. it's a member of some replica 5 | // group, which has other members, and it needs to know 6 | // how to talk to the members of the shardmaster service. 7 | // used by ../diskv/test_test.go 8 | // 9 | // arguments: 10 | // -g groupid 11 | // -m masterport1 -m masterport2 ... 12 | // -s replicaport1 -s replicaport2 ... 13 | // -i my-index-in-server-port-list 14 | // -u unreliable 15 | // -d directory 16 | // -r restart 17 | 18 | import "time" 19 | import "diskv" 20 | import "os" 21 | import "fmt" 22 | import "strconv" 23 | import "runtime" 24 | 25 | func usage() { 26 | fmt.Printf("Usage: diskvd -g gid -m master... -s server... -i my-index -d dir\n") 27 | os.Exit(1) 28 | } 29 | 30 | func main() { 31 | var gid int64 = -1 // my replica group ID 32 | masters := []string{} // ports of shardmasters 33 | replicas := []string{} // ports of servers in my replica group 34 | me := -1 // my index in replicas[] 35 | unreliable := false 36 | dir := "" // store persistent data here 37 | restart := false 38 | 39 | for i := 1; i+1 < len(os.Args); i += 2 { 40 | a0 := os.Args[i] 41 | a1 := os.Args[i+1] 42 | if a0 == "-g" { 43 | gid, _ = strconv.ParseInt(a1, 10, 64) 44 | } else if a0 == "-m" { 45 | masters = append(masters, a1) 46 | } else if a0 == "-s" { 47 | replicas = append(replicas, a1) 48 | } else if a0 == "-i" { 49 | me, _ = strconv.Atoi(a1) 50 | } else if a0 == "-u" { 51 | unreliable, _ = strconv.ParseBool(a1) 52 | } else if a0 == "-d" { 53 | dir = a1 54 | } else if a0 == "-r" { 55 | restart, _ = strconv.ParseBool(a1) 56 | } else { 57 | usage() 58 | } 59 | } 60 | 61 | if gid < 0 || me < 0 || len(masters) < 1 || me >= len(replicas) || dir == "" { 62 | usage() 63 | } 64 | 65 | runtime.GOMAXPROCS(4) 66 | 67 | srv := diskv.StartServer(gid, masters, replicas, me, dir, restart) 68 | srv.Setunreliable(unreliable) 69 | 70 | // for safety, force quit after 10 minutes. 71 | time.Sleep(10 * 60 * time.Second) 72 | mep, _ := os.FindProcess(os.Getpid()) 73 | mep.Kill() 74 | } 75 | -------------------------------------------------------------------------------- /src/main/ii.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | "fmt" 6 | "mapreduce" 7 | 8 | "strings" 9 | "sort" 10 | "strconv" 11 | "unicode" 12 | ) 13 | 14 | // The mapping function is called once for each piece of the input. 15 | // In this framework, the key is the name of the file that is being processed, 16 | // and the value is the file's contents. The return value should be a slice of 17 | // key/value pairs, each represented by a mapreduce.KeyValue. 18 | func mapF(document string, value string) (res []mapreduce.KeyValue) { 19 | // Your code here (Part V). 20 | fmt.Println("ii mapF " + document) 21 | words := strings.FieldsFunc(value, func(r rune) bool { 22 | return !unicode.IsLetter(r) 23 | }) 24 | 25 | // declare an empty unordered_set 26 | type Dummy struct { } 27 | var empty Dummy 28 | keys := make(map[string] Dummy) // non repeat keys 29 | 30 | for _, w := range words { 31 | if _, exist := keys[w]; !exist { 32 | keys[w] = empty 33 | res = append(res, mapreduce.KeyValue{Key: w, Value: document}) 34 | } 35 | } 36 | 37 | return 38 | } 39 | 40 | // The reduce function is called once for each key generated by Map, with a 41 | // list of that key's string value (merged across all inputs). The return value 42 | // should be a single output value for that key. 43 | func reduceF(key string, values []string) string { 44 | // Your code here (Part V). 45 | sort.Strings(values) 46 | return strconv.Itoa(len(values)) + " " + strings.Join(values, ",") 47 | } 48 | 49 | // Can be run in 3 ways: 50 | // 1) Sequential (e.g., go run wc.go master sequential x1.txt .. xN.txt) 51 | // 2) Master (e.g., go run wc.go master localhost:7777 x1.txt .. xN.txt) 52 | // 3) Worker (e.g., go run wc.go worker localhost:7777 localhost:7778 &) 53 | func main() { 54 | if len(os.Args) < 4 { 55 | fmt.Printf("%s: see usage comments in file\n", os.Args[0]) 56 | } else if os.Args[1] == "master" { 57 | var mr *mapreduce.Master 58 | if os.Args[2] == "sequential" { 59 | mr = mapreduce.Sequential("iiseq", os.Args[3:], 3, mapF, reduceF) 60 | } else { 61 | mr = mapreduce.Distributed("iiseq", os.Args[3:], 3, os.Args[2]) 62 | } 63 | mr.Wait() 64 | } else { 65 | mapreduce.RunWorker(os.Args[2], os.Args[3], mapF, reduceF, 100, nil) 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/main/lockc.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // see comments in lockd.go 5 | // 6 | 7 | import "lockservice" 8 | import "os" 9 | import "fmt" 10 | 11 | func usage() { 12 | fmt.Printf("Usage: lockc -l|-u primaryport backupport lockname\n") 13 | os.Exit(1) 14 | } 15 | 16 | func main() { 17 | if len(os.Args) == 5 { 18 | ck := lockservice.MakeClerk(os.Args[2], os.Args[3]) 19 | var ok bool 20 | if os.Args[1] == "-l" { 21 | ok = ck.Lock(os.Args[4]) 22 | } else if os.Args[1] == "-u" { 23 | ok = ck.Unlock(os.Args[4]) 24 | } else { 25 | usage() 26 | } 27 | fmt.Printf("reply: %v\n", ok) 28 | } else { 29 | usage() 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/lockd.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // export GOPATH=~/6.824 4 | // go build lockd.go 5 | // go build lockc.go 6 | // ./lockd -p a b & 7 | // ./lockd -b a b & 8 | // ./lockc -l a b lx 9 | // ./lockc -u a b lx 10 | // 11 | // on Athena, use /tmp/myname-a and /tmp/myname-b 12 | // instead of a and b. 13 | 14 | import "time" 15 | import "lockservice" 16 | import "os" 17 | import "fmt" 18 | 19 | func main() { 20 | if len(os.Args) == 4 && os.Args[1] == "-p" { 21 | lockservice.StartServer(os.Args[2], os.Args[3], true) 22 | } else if len(os.Args) == 4 && os.Args[1] == "-b" { 23 | lockservice.StartServer(os.Args[2], os.Args[3], false) 24 | } else { 25 | fmt.Printf("Usage: lockd -p|-b primaryport backupport\n") 26 | os.Exit(1) 27 | } 28 | for { 29 | time.Sleep(100 * time.Second) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/mr-challenge.txt: -------------------------------------------------------------------------------- 1 | www: 8 pg-being_ernest.txt,pg-dorian_gray.txt,pg-frankenstein.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-metamorphosis.txt,pg-sherlock_holmes.txt,pg-tom_sawyer.txt 2 | year: 8 pg-being_ernest.txt,pg-dorian_gray.txt,pg-frankenstein.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-metamorphosis.txt,pg-sherlock_holmes.txt,pg-tom_sawyer.txt 3 | years: 8 pg-being_ernest.txt,pg-dorian_gray.txt,pg-frankenstein.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-metamorphosis.txt,pg-sherlock_holmes.txt,pg-tom_sawyer.txt 4 | yesterday: 8 pg-being_ernest.txt,pg-dorian_gray.txt,pg-frankenstein.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-metamorphosis.txt,pg-sherlock_holmes.txt,pg-tom_sawyer.txt 5 | yet: 8 pg-being_ernest.txt,pg-dorian_gray.txt,pg-frankenstein.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-metamorphosis.txt,pg-sherlock_holmes.txt,pg-tom_sawyer.txt 6 | you: 8 pg-being_ernest.txt,pg-dorian_gray.txt,pg-frankenstein.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-metamorphosis.txt,pg-sherlock_holmes.txt,pg-tom_sawyer.txt 7 | young: 8 pg-being_ernest.txt,pg-dorian_gray.txt,pg-frankenstein.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-metamorphosis.txt,pg-sherlock_holmes.txt,pg-tom_sawyer.txt 8 | your: 8 pg-being_ernest.txt,pg-dorian_gray.txt,pg-frankenstein.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-metamorphosis.txt,pg-sherlock_holmes.txt,pg-tom_sawyer.txt 9 | yourself: 8 pg-being_ernest.txt,pg-dorian_gray.txt,pg-frankenstein.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-metamorphosis.txt,pg-sherlock_holmes.txt,pg-tom_sawyer.txt 10 | zip: 8 pg-being_ernest.txt,pg-dorian_gray.txt,pg-frankenstein.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-metamorphosis.txt,pg-sherlock_holmes.txt,pg-tom_sawyer.txt 11 | -------------------------------------------------------------------------------- /src/main/mr-testout.txt: -------------------------------------------------------------------------------- 1 | that: 7871 2 | it: 7987 3 | in: 8415 4 | was: 8578 5 | a: 13382 6 | of: 13536 7 | I: 14296 8 | to: 16079 9 | and: 23612 10 | the: 29748 11 | -------------------------------------------------------------------------------- /src/main/pbc.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // pbservice client application 5 | // 6 | // export GOPATH=~/6.824 7 | // go build viewd.go 8 | // go build pbd.go 9 | // go build pbc.go 10 | // ./viewd /tmp/rtm-v & 11 | // ./pbd /tmp/rtm-v /tmp/rtm-1 & 12 | // ./pbd /tmp/rtm-v /tmp/rtm-2 & 13 | // ./pbc /tmp/rtm-v key1 value1 14 | // ./pbc /tmp/rtm-v key1 15 | // 16 | // change "rtm" to your user name. 17 | // start the pbd programs in separate windows and kill 18 | // and restart them to exercise fault tolerance. 19 | // 20 | 21 | import "pbservice" 22 | import "os" 23 | import "fmt" 24 | 25 | func usage() { 26 | fmt.Printf("Usage: pbc viewport key\n") 27 | fmt.Printf(" pbc viewport key value\n") 28 | os.Exit(1) 29 | } 30 | 31 | func main() { 32 | if len(os.Args) == 3 { 33 | // get 34 | ck := pbservice.MakeClerk(os.Args[1], "") 35 | v := ck.Get(os.Args[2]) 36 | fmt.Printf("%v\n", v) 37 | } else if len(os.Args) == 4 { 38 | // put 39 | ck := pbservice.MakeClerk(os.Args[1], "") 40 | ck.Put(os.Args[2], os.Args[3]) 41 | } else { 42 | usage() 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/pbd.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // see directions in pbc.go 5 | // 6 | 7 | import "time" 8 | import "pbservice" 9 | import "os" 10 | import "fmt" 11 | 12 | func main() { 13 | if len(os.Args) != 3 { 14 | fmt.Printf("Usage: pbd viewport myport\n") 15 | os.Exit(1) 16 | } 17 | 18 | pbservice.StartServer(os.Args[1], os.Args[2]) 19 | 20 | for { 21 | time.Sleep(100 * time.Second) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/main/test-ii.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | go run ii.go master sequential pg-*.txt 3 | 4 | # cause sort to be case sensitive. 5 | # on Ubuntu (Athena) it's otherwise insensitive. 6 | LC_ALL=C 7 | export LC_ALL 8 | 9 | sort -k1,1 mrtmp.iiseq | sort -snk2,2 | grep -v '16' | tail -10 | diff - mr-challenge.txt > diff.out 10 | if [ -s diff.out ] 11 | then 12 | echo "Failed test. Output should be as in mr-challenge.txt. Your output differs as follows (from diff.out):" > /dev/stderr 13 | cat diff.out 14 | else 15 | echo "Passed test" > /dev/stderr 16 | fi 17 | 18 | -------------------------------------------------------------------------------- /src/main/test-mr.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | here=$(dirname "$0") 3 | [[ "$here" = /* ]] || here="$PWD/$here" 4 | export GOPATH="$here/../../" 5 | echo "" 6 | echo "==> Part I" 7 | go test -run Sequential mapreduce/... 8 | echo "" 9 | echo "==> Part II" 10 | (cd "$here" && sh ./test-wc.sh > /dev/null) 11 | echo "" 12 | echo "==> Part III" 13 | go test -run TestParallel mapreduce/... 14 | echo "" 15 | echo "==> Part IV" 16 | go test -run Failure mapreduce/... 17 | echo "" 18 | echo "==> Part V (inverted index)" 19 | (cd "$here" && sh ./test-ii.sh > /dev/null) 20 | 21 | rm "$here"/mrtmp.* "$here"/diff.out 22 | -------------------------------------------------------------------------------- /src/main/test-wc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | go run wc.go master sequential pg-*.txt 3 | sort -n -k2 mrtmp.wcseq | tail -10 | diff - mr-testout.txt > diff.out 4 | if [ -s diff.out ] 5 | then 6 | echo "Failed test. Output should be as in mr-testout.txt. Your output differs as follows (from diff.out):" > /dev/stderr 7 | cat diff.out 8 | else 9 | echo "Passed test" > /dev/stderr 10 | fi 11 | 12 | -------------------------------------------------------------------------------- /src/main/viewd.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // see directions in pbc.go 5 | // 6 | 7 | import "time" 8 | import "viewservice" 9 | import "os" 10 | import "fmt" 11 | 12 | func main() { 13 | if len(os.Args) != 2 { 14 | fmt.Printf("Usage: viewd port\n") 15 | os.Exit(1) 16 | } 17 | 18 | viewservice.StartServer(os.Args[1]) 19 | 20 | for { 21 | time.Sleep(100 * time.Second) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/main/wc.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "unicode" 6 | "mapreduce" 7 | "os" 8 | "strconv" 9 | "strings" 10 | ) 11 | 12 | // 13 | // The map function is called once for each file of input. The first 14 | // argument is the name of the input file, and the second is the 15 | // file's complete contents. You should ignore the input file name, 16 | // and look only at the contents argument. The return value is a slice 17 | // of key/value pairs. 18 | // 19 | // produce key/value pairs from file 20 | func mapF(filename string, contents string) []mapreduce.KeyValue { 21 | // Your code here (Part II). 22 | fmt.Println("mapF " + filename) 23 | words := strings.FieldsFunc(contents, func(r rune) bool { 24 | return !unicode.IsLetter(r) 25 | }) 26 | 27 | var res []mapreduce.KeyValue 28 | for _, w := range words { 29 | res = append(res, mapreduce.KeyValue{Key: w, Value: "1"}) 30 | } 31 | return res 32 | } 33 | 34 | // 35 | // The reduce function is called once for each key generated by the 36 | // map tasks, with a list of all the values created for that key by 37 | // any map task. 38 | // 39 | func reduceF(key string, values []string) string { 40 | // Your code here (Part II). 41 | count := len(values) 42 | fmt.Println("reduceF " + key + strconv.Itoa(count)) 43 | return strconv.Itoa(count) 44 | } 45 | 46 | // Can be run in 3 ways: 47 | // 1) Sequential (e.g., go run wc.go master sequential x1.txt .. xN.txt) 48 | // 2) Master (e.g., go run wc.go master localhost:7777 x1.txt .. xN.txt) 49 | // 3) Worker (e.g., go run wc.go worker localhost:7777 localhost:7778 &) 50 | // 4...) File name list 51 | func main() { 52 | if len(os.Args) < 4 { 53 | fmt.Printf("%s: see usage comments in file\n", os.Args[0]) 54 | } else if os.Args[1] == "master" { 55 | var mr *mapreduce.Master 56 | if os.Args[2] == "sequential" { 57 | mr = mapreduce.Sequential("wcseq", os.Args[3:], 3, mapF, reduceF) 58 | } else { 59 | mr = mapreduce.Distributed("wcseq", os.Args[3:], 3, os.Args[2]) 60 | } 61 | mr.Wait() 62 | } else { 63 | mapreduce.RunWorker(os.Args[2], os.Args[3], mapF, reduceF, 100, nil) 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/mapreduce/common.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | ) 7 | 8 | // Debugging enabled? 9 | const debugEnabled = false 10 | 11 | // debug() will only print if debugEnabled is true 12 | func debug(format string, a ...interface{}) (n int, err error) { 13 | if debugEnabled { 14 | n, err = fmt.Printf(format, a...) 15 | } 16 | return 17 | } 18 | 19 | // jobPhase indicates whether a task is scheduled as a map or reduce task. 20 | type jobPhase string 21 | 22 | const ( 23 | mapPhase jobPhase = "mapPhase" 24 | reducePhase = "reducePhase" 25 | ) 26 | 27 | // KeyValue is a type used to hold the key/value pairs passed to the map and 28 | // reduce functions. 29 | type KeyValue struct { 30 | Key string 31 | Value string 32 | } 33 | 34 | // reduceName constructs the name of the intermediate file which map task 35 | // produces for reduce task . 36 | func reduceName(jobName string, mapTask int, reduceTask int) string { 37 | return "mrtmp." + jobName + "-" + strconv.Itoa(mapTask) + "-" + strconv.Itoa(reduceTask) 38 | } 39 | 40 | // mergeName constructs the name of the output file of reduce task 41 | func mergeName(jobName string, reduceTask int) string { 42 | return "mrtmp." + jobName + "-res-" + strconv.Itoa(reduceTask) 43 | } 44 | -------------------------------------------------------------------------------- /src/mapreduce/common_map.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "hash/fnv" 5 | "encoding/json" 6 | "io/ioutil" 7 | "fmt" 8 | "os" 9 | ) 10 | 11 | // it is called for each input file 12 | func doMap( 13 | jobName string, // the name of the MapReduce job 14 | mapTask int, // which map task this is 15 | inFile string, 16 | nReduce int, // the number of reduce task that will be run ("R" in the paper) 17 | mapF func(filename string, contents string) []KeyValue, // use defined function 18 | ) { 19 | contents, err := ioutil.ReadFile(inFile) 20 | if err != nil { 21 | fmt.Println("ioutil.ReadFile error " + inFile) 22 | return 23 | } 24 | 25 | // produce key/value pairs from file 26 | kvs := mapF(inFile, string(contents)) 27 | for _ , kv := range kvs { 28 | reduceTask := ihash(kv.Key) % nReduce 29 | var intermediate string = reduceName(jobName, mapTask, reduceTask) // intermediate file 30 | f, err := os.OpenFile(intermediate, os.O_APPEND|os.O_CREATE|os.O_RDWR, 0666) 31 | if err != nil { 32 | fmt.Println("open err " + err.Error()) 33 | } 34 | 35 | enc := json.NewEncoder(f) 36 | enc.Encode(&kv) 37 | f.Close() 38 | } 39 | // 40 | // doMap manages one map task: it should read one of the input files 41 | // (inFile), call the user-defined map function (mapF) for that file's 42 | // contents, and partition mapF's output into nReduce intermediate files. 43 | // 44 | // There is one intermediate file per reduce task. The file name 45 | // includes both the map task number and the reduce task number. Use 46 | // the filename generated by reduceName(jobName, mapTask, r) 47 | // as the intermediate file for reduce task r. Call ihash() (see 48 | // below) on each key, mod nReduce, to pick r for a key/value pair. 49 | // 50 | // mapF() is the map function provided by the application. The first 51 | // argument should be the input file name, though the map function 52 | // typically ignores it. The second argument should be the entire 53 | // input file contents. mapF() returns a slice containing the 54 | // key/value pairs for reduce; see common.go for the definition of 55 | // KeyValue. 56 | // 57 | // Look at Go's ioutil and os packages for functions to read 58 | // and write files. 59 | // 60 | // Coming up with a scheme for how to format the key/value pairs on 61 | // disk can be tricky, especially when taking into account that both 62 | // keys and values could contain newlines, quotes, and any other 63 | // character you can think of. 64 | // 65 | // One format often used for serializing data to a byte stream that the 66 | // other end can correctly reconstruct is JSON. You are not required to 67 | // use JSON, but as the output of the reduce tasks *must* be JSON, 68 | // familiarizing yourself with it here may prove useful. You can write 69 | // out a data structure as a JSON string to a file using the commented 70 | // code below. The corresponding decoding functions can be found in 71 | // common_reduce.go. 72 | // 73 | // enc := json.NewEncoder(file) 74 | // for _, kv := ... { 75 | // err := enc.Encode(&kv) 76 | // 77 | // Remember to close the file after you have written all the values! 78 | // 79 | // Your code here (Part I). 80 | // 81 | } 82 | 83 | func ihash(s string) int { 84 | h := fnv.New32a() 85 | h.Write([]byte(s)) 86 | return int(h.Sum32() & 0x7fffffff) 87 | } 88 | -------------------------------------------------------------------------------- /src/mapreduce/common_reduce.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "fmt" 5 | "sort" 6 | "encoding/json" 7 | "os" 8 | ) 9 | 10 | // called for each reduce worker 11 | func doReduce( 12 | jobName string, // the name of the whole MapReduce job 13 | reduceTask int, // which reduce task this is 14 | outFile string, // write the output here 15 | nMap int, // the number of map tasks that were run ("M" in the paper) 16 | reduceF func(key string, values []string) string, 17 | ) { 18 | var kvs = make(map[string] []string) // unordered_map, key --> value list 19 | var keys []string // for sort keys 20 | 21 | for i := 0; i < nMap; i++ { 22 | var tmpFile string = reduceName(jobName, i, reduceTask) // the intermediate file from map task 23 | 24 | fmt.Println("doReduce read intermediate file:" + tmpFile) 25 | f, err := os.OpenFile(tmpFile, os.O_RDONLY, 0) 26 | if err != nil { 27 | fmt.Println(tmpFile + " opened error: " + err.Error()) 28 | } else { 29 | dec := json.NewDecoder(f) 30 | for { 31 | var kv KeyValue 32 | err = dec.Decode(&kv) 33 | if err != nil { 34 | break 35 | } 36 | 37 | if _, ok := kvs[kv.Key]; !ok { 38 | keys = append(keys, kv.Key) // new key 39 | } 40 | 41 | kvs[kv.Key] = append(kvs[kv.Key], kv.Value) // value list 42 | } 43 | } 44 | 45 | sort.Strings(keys) 46 | out, err := os.OpenFile(outFile, os.O_APPEND|os.O_CREATE|os.O_RDWR,0666) 47 | if err != nil { 48 | fmt.Println("Create file failed:" + outFile) 49 | return 50 | } 51 | 52 | enc := json.NewEncoder(out) 53 | for _, key := range keys { 54 | // call reduceF on each sorted key 55 | v := reduceF(key, kvs[key]) 56 | // output to reduce dest file 57 | if err = enc.Encode(KeyValue{key, v}); err != nil { 58 | fmt.Println("write [key: " + key + "] to file failed:" + outFile) 59 | } 60 | } 61 | out.Close() 62 | } 63 | fmt.Println("reduce Out file " + outFile) 64 | // 65 | // doReduce manages one reduce task: it should read the intermediate 66 | // files for the task, sort the intermediate key/value pairs by key, 67 | // call the user-defined reduce function (reduceF) for each key, and 68 | // write reduceF's output to disk. 69 | // 70 | // You'll need to read one intermediate file from each map task; 71 | // reduceName(jobName, m, reduceTask) yields the file 72 | // name from map task m. 73 | // 74 | // Your doMap() encoded the key/value pairs in the intermediate 75 | // files, so you will need to decode them. If you used JSON, you can 76 | // read and decode by creating a decoder and repeatedly calling 77 | // .Decode(&kv) on it until it returns an error. 78 | // 79 | // You may find the first example in the golang sort package 80 | // documentation useful. 81 | // 82 | // reduceF() is the application's reduce function. You should 83 | // call it once per distinct key, with a slice of all the values 84 | // for that key. reduceF() returns the reduced value for that key. 85 | // 86 | // You should write the reduce output as JSON encoded KeyValue 87 | // objects to the file named outFile. We require you to use JSON 88 | // because that is what the merger than combines the output 89 | // from all the reduce tasks expects. There is nothing special about 90 | // JSON -- it is just the marshalling format we chose to use. Your 91 | // output code will look something like this: 92 | // 93 | // enc := json.NewEncoder(file) 94 | // for key := ... { 95 | // enc.Encode(KeyValue{key, reduceF(...)}) 96 | // } 97 | // file.Close() 98 | // 99 | // Your code here (Part I). 100 | // 101 | } 102 | -------------------------------------------------------------------------------- /src/mapreduce/common_rpc.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "fmt" 5 | "net/rpc" 6 | ) 7 | 8 | // What follows are RPC types and methods. 9 | // Field names must start with capital letters, otherwise RPC will break. 10 | 11 | // DoTaskArgs holds the arguments that are passed to a worker when a job is 12 | // scheduled on it. 13 | type DoTaskArgs struct { 14 | JobName string 15 | File string // only for map, the input file 16 | Phase jobPhase // are we in mapPhase or reducePhase? 17 | TaskNumber int // this task's index in the current phase 18 | 19 | // NumOtherPhase is the total number of tasks in other phase; mappers 20 | // need this to compute the number of output bins, and reducers needs 21 | // this to know how many input files to collect. 22 | NumOtherPhase int 23 | } 24 | 25 | // ShutdownReply is the response to a WorkerShutdown. 26 | // It holds the number of tasks this worker has processed since it was started. 27 | type ShutdownReply struct { 28 | Ntasks int 29 | } 30 | 31 | // RegisterArgs is the argument passed when a worker registers with the master. 32 | type RegisterArgs struct { 33 | Worker string // the worker's UNIX-domain socket name, i.e. its RPC address 34 | } 35 | 36 | // call() sends an RPC to the rpcname handler on server srv 37 | // with arguments args, waits for the reply, and leaves the 38 | // reply in reply. the reply argument should be the address 39 | // of a reply structure. 40 | // 41 | // call() returns true if the server responded, and false if call() 42 | // received no reply from the server. reply's contents are valid if 43 | // and only if call() returned true. 44 | // 45 | // you should assume that call() will time out and return 46 | // false after a while if it doesn't get a reply from the server. 47 | // 48 | // please use call() to send all RPCs. please don't change this 49 | // function. 50 | // 51 | func call(srv string, rpcname string, 52 | args interface{}, reply interface{}) bool { 53 | c, errx := rpc.Dial("unix", srv) 54 | if errx != nil { 55 | return false 56 | } 57 | defer c.Close() 58 | 59 | err := c.Call(rpcname, args, reply) 60 | if err == nil { 61 | return true 62 | } 63 | 64 | fmt.Println(err) 65 | return false 66 | } 67 | -------------------------------------------------------------------------------- /src/mapreduce/master.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | // 4 | // Please do not modify this file. 5 | // 6 | 7 | import ( 8 | "fmt" 9 | "net" 10 | "sync" 11 | ) 12 | 13 | // Master holds all the state that the master needs to keep track of. 14 | type Master struct { 15 | sync.Mutex 16 | 17 | address string 18 | doneChannel chan bool 19 | 20 | // protected by the mutex 21 | newCond *sync.Cond // signals when Register() adds to workers[] 22 | workers []string // each worker's UNIX-domain socket name -- its RPC address 23 | 24 | // Per-task information 25 | jobName string // Name of currently executing job 26 | files []string // Input files 27 | nReduce int // Number of reduce partitions 28 | 29 | shutdown chan struct{} 30 | l net.Listener 31 | stats []int 32 | } 33 | 34 | // Register is an RPC method that is called by workers after they have started 35 | // up to report that they are ready to receive tasks. 36 | func (mr *Master) Register(args *RegisterArgs, _ *struct{}) error { 37 | mr.Lock() 38 | defer mr.Unlock() 39 | debug("Register: worker %s\n", args.Worker) 40 | mr.workers = append(mr.workers, args.Worker) 41 | 42 | // tell forwardRegistrations() that there's a new workers[] entry. 43 | mr.newCond.Broadcast() 44 | 45 | return nil 46 | } 47 | 48 | // newMaster initializes a new Map/Reduce Master 49 | func newMaster(master string) (mr *Master) { 50 | mr = new(Master) 51 | mr.address = master 52 | mr.shutdown = make(chan struct{}) 53 | mr.newCond = sync.NewCond(mr) 54 | mr.doneChannel = make(chan bool) 55 | return 56 | } 57 | 58 | // Sequential runs map and reduce tasks sequentially, waiting for each task to 59 | // complete before running the next. 60 | func Sequential(jobName string, files []string, nreduce int, 61 | mapF func(string, string) []KeyValue, 62 | reduceF func(string, []string) string, 63 | ) (mr *Master) { // return mr as Master* 64 | mr = newMaster("master") 65 | go mr.run(jobName, files, nreduce, func(phase jobPhase) { // schedule function 66 | switch phase { 67 | case mapPhase: 68 | // for each file, call your mapF on it 69 | for i, f := range mr.files { 70 | doMap(mr.jobName, i, f, mr.nReduce, mapF) 71 | } 72 | case reducePhase: 73 | // nMap = len(mr.files), there are nMap * nReduce intermediate Files 74 | for i := 0; i < mr.nReduce; i++ { // i is the reduce Task ID 75 | doReduce(mr.jobName, i, mergeName(mr.jobName, i), len(mr.files), reduceF) 76 | } 77 | } 78 | }, func() { // finish func 79 | mr.stats = []int{len(files) + nreduce} 80 | }) 81 | return 82 | } 83 | 84 | // helper function that sends information about all existing 85 | // and newly registered workers to channel ch. schedule() 86 | // reads ch to learn about workers. 87 | func (mr *Master) forwardRegistrations(ch chan string) { 88 | i := 0 89 | for { 90 | mr.Lock() 91 | if len(mr.workers) > i { // Register RPC will append worker list 92 | // there's a worker that we haven't told schedule() about. 93 | w := mr.workers[i] 94 | go func() { ch <- w }() // send without holding the lock. 95 | i = i + 1 96 | } else { 97 | // wait for Register() to add an entry to workers[] 98 | // in response to an RPC from a new worker. 99 | mr.newCond.Wait() 100 | } 101 | mr.Unlock() 102 | } 103 | } 104 | 105 | // Distributed schedules map and reduce tasks on workers that register with the 106 | // master over RPC. 107 | func Distributed(jobName string, files []string, nreduce int, master string) (mr *Master) { 108 | mr = newMaster(master) 109 | mr.startRPCServer() 110 | go mr.run(jobName, files, nreduce, 111 | func(phase jobPhase) { 112 | ch := make(chan string) 113 | go mr.forwardRegistrations(ch) 114 | schedule(mr.jobName, mr.files, mr.nReduce, phase, ch) 115 | }, 116 | func() { 117 | mr.stats = mr.killWorkers() 118 | mr.stopRPCServer() 119 | }) 120 | return 121 | } 122 | 123 | // run executes a mapreduce job on the given number of mappers and reducers. 124 | // 125 | // First, it divides up the input file among the given number of mappers, and 126 | // schedules each task on workers as they become available. Each map task bins 127 | // its output in a number of bins equal to the given number of reduce tasks. 128 | // Once all the mappers have finished, workers are assigned reduce tasks. 129 | // 130 | // When all tasks have been completed, the reducer outputs are merged, 131 | // statistics are collected, and the master is shut down. 132 | // 133 | // Note that this implementation assumes a shared file system. 134 | func (mr *Master) run(jobName string, files []string, nreduce int, 135 | schedule func(phase jobPhase), 136 | finish func(), 137 | ) { 138 | mr.jobName = jobName 139 | mr.files = files 140 | mr.nReduce = nreduce 141 | 142 | fmt.Printf("%s: Starting Map/Reduce task %s\n", mr.address, mr.jobName) 143 | 144 | schedule(mapPhase) 145 | schedule(reducePhase) 146 | finish() 147 | mr.merge() 148 | 149 | fmt.Printf("%s: Map/Reduce task completed\n", mr.address) 150 | 151 | mr.doneChannel <- true 152 | } 153 | 154 | // Wait blocks until the currently scheduled work has completed. 155 | // This happens when all tasks have scheduled and completed, the final output 156 | // have been computed, and all workers have been shut down. 157 | func (mr *Master) Wait() { 158 | <-mr.doneChannel 159 | } 160 | 161 | // killWorkers cleans up all workers by sending each one a Shutdown RPC. 162 | // It also collects and returns the number of tasks each worker has performed. 163 | func (mr *Master) killWorkers() []int { 164 | mr.Lock() 165 | defer mr.Unlock() 166 | ntasks := make([]int, 0, len(mr.workers)) 167 | for _, w := range mr.workers { 168 | debug("Master: shutdown worker %s\n", w) 169 | var reply ShutdownReply 170 | ok := call(w, "Worker.Shutdown", new(struct{}), &reply) 171 | if ok == false { 172 | fmt.Printf("Master: RPC %s shutdown error\n", w) 173 | } else { 174 | ntasks = append(ntasks, reply.Ntasks) 175 | } 176 | } 177 | return ntasks 178 | } 179 | -------------------------------------------------------------------------------- /src/mapreduce/master_rpc.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "net" 7 | "net/rpc" 8 | "os" 9 | ) 10 | 11 | // Shutdown is an RPC method that shuts down the Master's RPC server. 12 | func (mr *Master) Shutdown(_, _ *struct{}) error { 13 | debug("Shutdown: registration server\n") 14 | close(mr.shutdown) 15 | mr.l.Close() // causes the Accept to fail 16 | return nil 17 | } 18 | 19 | // startRPCServer starts the Master's RPC server. It continues accepting RPC 20 | // calls (Register in particular) for as long as the worker is alive. 21 | func (mr *Master) startRPCServer() { 22 | rpcs := rpc.NewServer() 23 | rpcs.Register(mr) 24 | os.Remove(mr.address) // only needed for "unix" 25 | l, e := net.Listen("unix", mr.address) 26 | if e != nil { 27 | log.Fatal("RegstrationServer", mr.address, " error: ", e) 28 | } 29 | mr.l = l 30 | 31 | // now that we are listening on the master address, can fork off 32 | // accepting connections to another thread. 33 | go func() { 34 | loop: 35 | for { 36 | select { 37 | case <-mr.shutdown: 38 | break loop 39 | default: 40 | } 41 | conn, err := mr.l.Accept() 42 | if err == nil { 43 | go func() { 44 | rpcs.ServeConn(conn) // default: gob protocol 45 | conn.Close() 46 | }() 47 | } else { 48 | debug("RegistrationServer: accept error %v\n", err) 49 | break 50 | } 51 | } 52 | debug("RegistrationServer: done\n") 53 | }() 54 | } 55 | 56 | // stopRPCServer stops the master RPC server. 57 | // This must be done through an RPC to avoid race conditions between the RPC 58 | // server thread and the current thread. 59 | func (mr *Master) stopRPCServer() { 60 | var reply ShutdownReply 61 | ok := call(mr.address, "Master.Shutdown", new(struct{}), &reply) 62 | if ok == false { 63 | fmt.Printf("Cleanup: RPC %s error\n", mr.address) 64 | } 65 | debug("cleanupRegistration: done\n") 66 | } 67 | -------------------------------------------------------------------------------- /src/mapreduce/master_splitmerge.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "bufio" 5 | "encoding/json" 6 | "fmt" 7 | "log" 8 | "os" 9 | "sort" 10 | ) 11 | 12 | // merge combines the results of the many reduce jobs into a single output file 13 | // XXX use merge sort 14 | func (mr *Master) merge() { 15 | debug("Merge phase") 16 | kvs := make(map[string]string) 17 | for i := 0; i < mr.nReduce; i++ { 18 | p := mergeName(mr.jobName, i) 19 | fmt.Printf("Merge: read %s\n", p) 20 | file, err := os.Open(p) 21 | if err != nil { 22 | log.Fatal("Merge: ", err) 23 | } 24 | dec := json.NewDecoder(file) 25 | for { 26 | var kv KeyValue 27 | err = dec.Decode(&kv) 28 | if err != nil { 29 | break 30 | } 31 | kvs[kv.Key] = kv.Value 32 | } 33 | file.Close() 34 | } 35 | var keys []string 36 | for k := range kvs { 37 | keys = append(keys, k) 38 | } 39 | sort.Strings(keys) 40 | 41 | file, err := os.Create("mrtmp." + mr.jobName) 42 | if err != nil { 43 | log.Fatal("Merge: create ", err) 44 | } 45 | w := bufio.NewWriter(file) 46 | for _, k := range keys { 47 | fmt.Fprintf(w, "%s: %s\n", k, kvs[k]) 48 | } 49 | w.Flush() 50 | file.Close() 51 | } 52 | 53 | // removeFile is a simple wrapper around os.Remove that logs errors. 54 | func removeFile(n string) { 55 | err := os.Remove(n) 56 | if err != nil { 57 | log.Fatal("CleanupFiles ", err) 58 | } 59 | } 60 | 61 | // CleanupFiles removes all intermediate files produced by running mapreduce. 62 | func (mr *Master) CleanupFiles() { 63 | for i := range mr.files { 64 | for j := 0; j < mr.nReduce; j++ { 65 | removeFile(reduceName(mr.jobName, i, j)) 66 | } 67 | } 68 | for i := 0; i < mr.nReduce; i++ { 69 | removeFile(mergeName(mr.jobName, i)) 70 | } 71 | removeFile("mrtmp." + mr.jobName) 72 | } 73 | -------------------------------------------------------------------------------- /src/mapreduce/schedule.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | ) 7 | 8 | // 9 | // schedule() starts and waits for all tasks in the given phase (mapPhase 10 | // or reducePhase). the mapFiles argument holds the names of the files that 11 | // are the inputs to the map phase, one per map task. nReduce is the 12 | // number of reduce tasks. the registerChan argument yields a stream 13 | // of registered workers; each item is the worker's RPC address, 14 | // suitable for passing to call(). registerChan will yield all 15 | // existing registered workers (if any) and new ones as they register. 16 | // 17 | func schedule(jobName string, mapFiles []string, nReduce int, phase jobPhase, registerChan chan string) { 18 | var ntasks int 19 | var n_other int // number of inputs (for reduce) or outputs (for map) 20 | switch phase { 21 | case mapPhase: 22 | ntasks = len(mapFiles) 23 | n_other = nReduce 24 | case reducePhase: 25 | ntasks = nReduce 26 | n_other = len(mapFiles) 27 | } 28 | 29 | fmt.Printf("Schedule: %v %v tasks (%d I/Os)\n", ntasks, phase, n_other) 30 | 31 | // All ntasks tasks have to be scheduled on workers. Once all tasks 32 | // have completed successfully, schedule() should return. 33 | // 34 | // Your code here (Part III, Part IV). 35 | // 36 | 37 | // Remember that workers may fail, and that any given worker may finish 38 | // multiple tasks. 39 | 40 | // schedule will wait until all worker has done their jobs 41 | var wg sync.WaitGroup 42 | 43 | // task id will get from this channel 44 | var taskChan = make(chan int) 45 | go func() { 46 | for i := 0; i < ntasks; i++ { 47 | wg.Add(1) 48 | taskChan <- i 49 | } 50 | // wait all workers have done their job, then close taskChan 51 | wg.Wait() 52 | close(taskChan) 53 | }() 54 | 55 | // RPC call parameter 56 | var task DoTaskArgs 57 | task.JobName = jobName 58 | task.NumOtherPhase = n_other 59 | task.Phase = phase 60 | 61 | // assign all task to worker 62 | for i := range taskChan { // wait a new task 63 | // get a worker from register channel 64 | worker := <-registerChan 65 | 66 | task.TaskNumber = i 67 | if phase == mapPhase { 68 | task.File = mapFiles[i] 69 | } 70 | 71 | // Note: must use parameter 72 | go func(worker string, task DoTaskArgs) { 73 | if call(worker, "Worker.DoTask", &task, nil) { 74 | // only successful call will call wg.Done() 75 | wg.Done() 76 | 77 | // put idle worker back to register channel 78 | registerChan <- worker; 79 | } else { 80 | fmt.Printf("Schedule: assign %s task %v to %s failed", phase, 81 | task.TaskNumber, worker) 82 | 83 | // put failed task back to task channel 84 | taskChan <- task.TaskNumber 85 | } 86 | }(worker, task) 87 | } 88 | fmt.Printf("Schedule: %v done\n", phase) 89 | } 90 | -------------------------------------------------------------------------------- /src/mapreduce/test_test.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | "time" 7 | 8 | "bufio" 9 | "log" 10 | "os" 11 | "sort" 12 | "strconv" 13 | "strings" 14 | ) 15 | 16 | const ( 17 | nNumber = 100000 18 | nMap = 20 19 | nReduce = 10 20 | ) 21 | 22 | // Create input file with N numbers 23 | // Check if we have N numbers in output file 24 | 25 | // Split in words to kv pairs 26 | func MapFunc(file string, value string) (res []KeyValue) { 27 | debug("MapFunc %s\n", value) 28 | words := strings.Fields(value) 29 | for _, w := range words { 30 | kv := KeyValue{w, ""} 31 | res = append(res, kv) 32 | } 33 | return 34 | } 35 | 36 | // Just return key 37 | func ReduceFunc(key string, values []string) string { 38 | for _, e := range values { 39 | debug("Reduce %s %v\n", key, e) 40 | } 41 | return "" 42 | } 43 | 44 | // Checks input file agaist output file: each input number should show up 45 | // in the output file in string sorted order 46 | func check(t *testing.T, files []string) { 47 | output, err := os.Open("mrtmp.test") 48 | if err != nil { 49 | log.Fatal("check: ", err) 50 | } 51 | defer output.Close() 52 | 53 | var lines []string 54 | for _, f := range files { 55 | input, err := os.Open(f) 56 | if err != nil { 57 | log.Fatal("check: ", err) 58 | } 59 | defer input.Close() 60 | inputScanner := bufio.NewScanner(input) 61 | for inputScanner.Scan() { 62 | lines = append(lines, inputScanner.Text()) 63 | } 64 | } 65 | 66 | sort.Strings(lines) 67 | 68 | outputScanner := bufio.NewScanner(output) 69 | i := 0 70 | for outputScanner.Scan() { 71 | var v1 int 72 | var v2 int 73 | text := outputScanner.Text() 74 | n, err := fmt.Sscanf(lines[i], "%d", &v1) 75 | if n == 1 && err == nil { 76 | n, err = fmt.Sscanf(text, "%d", &v2) 77 | } 78 | if err != nil || v1 != v2 { 79 | t.Fatalf("line %d: %d != %d err %v\n", i, v1, v2, err) 80 | } 81 | i++ 82 | } 83 | if i != nNumber { 84 | t.Fatalf("Expected %d lines in output\n", nNumber) 85 | } 86 | } 87 | 88 | // Workers report back how many RPCs they have processed in the Shutdown reply. 89 | // Check that they processed at least 1 DoTask RPC. 90 | func checkWorker(t *testing.T, l []int) { 91 | for _, tasks := range l { 92 | if tasks == 0 { 93 | t.Fatalf("A worker didn't do any work\n") 94 | } 95 | } 96 | } 97 | 98 | // Make input file 99 | func makeInputs(num int) []string { 100 | var names []string 101 | var i = 0 102 | for f := 0; f < num; f++ { 103 | names = append(names, fmt.Sprintf("824-mrinput-%d.txt", f)) 104 | file, err := os.Create(names[f]) 105 | if err != nil { 106 | log.Fatal("mkInput: ", err) 107 | } 108 | w := bufio.NewWriter(file) 109 | for i < (f+1)*(nNumber/num) { 110 | fmt.Fprintf(w, "%d\n", i) 111 | i++ 112 | } 113 | w.Flush() 114 | file.Close() 115 | } 116 | return names 117 | } 118 | 119 | // Cook up a unique-ish UNIX-domain socket name 120 | // in /var/tmp. can't use current directory since 121 | // AFS doesn't support UNIX-domain sockets. 122 | func port(suffix string) string { 123 | s := "/var/tmp/824-" 124 | s += strconv.Itoa(os.Getuid()) + "/" 125 | os.Mkdir(s, 0777) 126 | s += "mr" 127 | s += strconv.Itoa(os.Getpid()) + "-" 128 | s += suffix 129 | return s 130 | } 131 | 132 | func setup() *Master { 133 | files := makeInputs(nMap) 134 | master := port("master") 135 | // start rpc and run master 136 | mr := Distributed("test", files, nReduce, master) 137 | return mr 138 | } 139 | 140 | func cleanup(mr *Master) { 141 | mr.CleanupFiles() 142 | for _, f := range mr.files { 143 | removeFile(f) 144 | } 145 | } 146 | 147 | // lab1.1 148 | func TestSequentialSingle(t *testing.T) { 149 | mr := Sequential("test", makeInputs(1), 1, MapFunc, ReduceFunc) 150 | mr.Wait() 151 | check(t, mr.files) 152 | checkWorker(t, mr.stats) 153 | cleanup(mr) 154 | } 155 | 156 | // lab1.1 157 | func TestSequentialMany(t *testing.T) { 158 | mr := Sequential("test", makeInputs(5), 3, MapFunc, ReduceFunc) 159 | mr.Wait() 160 | check(t, mr.files) 161 | checkWorker(t, mr.stats) 162 | cleanup(mr) 163 | } 164 | 165 | // lab 1.3 166 | func TestParallelBasic(t *testing.T) { 167 | mr := setup() 168 | for i := 0; i < 2; i++ { 169 | go RunWorker(mr.address, port("worker"+strconv.Itoa(i)), 170 | MapFunc, ReduceFunc, -1, nil) 171 | } 172 | mr.Wait() 173 | check(t, mr.files) 174 | checkWorker(t, mr.stats) 175 | cleanup(mr) 176 | } 177 | 178 | func TestParallelCheck(t *testing.T) { 179 | mr := setup() 180 | parallelism := &Parallelism{} 181 | for i := 0; i < 2; i++ { 182 | go RunWorker(mr.address, port("worker"+strconv.Itoa(i)), 183 | MapFunc, ReduceFunc, -1, parallelism) 184 | } 185 | mr.Wait() 186 | check(t, mr.files) 187 | checkWorker(t, mr.stats) 188 | 189 | parallelism.mu.Lock() 190 | if parallelism.max < 2 { 191 | t.Fatalf("workers did not execute in parallel") 192 | } 193 | parallelism.mu.Unlock() 194 | 195 | cleanup(mr) 196 | } 197 | 198 | func TestOneFailure(t *testing.T) { 199 | mr := setup() 200 | // Start 2 workers that fail after 10 tasks 201 | go RunWorker(mr.address, port("worker"+strconv.Itoa(0)), 202 | MapFunc, ReduceFunc, 10, nil) 203 | go RunWorker(mr.address, port("worker"+strconv.Itoa(1)), 204 | MapFunc, ReduceFunc, -1, nil) 205 | mr.Wait() 206 | check(t, mr.files) 207 | checkWorker(t, mr.stats) 208 | cleanup(mr) 209 | } 210 | 211 | func TestManyFailures(t *testing.T) { 212 | mr := setup() 213 | i := 0 214 | done := false 215 | for !done { 216 | select { 217 | case done = <-mr.doneChannel: 218 | check(t, mr.files) 219 | cleanup(mr) 220 | break 221 | default: 222 | // Start 2 workers each sec. The workers fail after 10 tasks 223 | w := port("worker" + strconv.Itoa(i)) 224 | go RunWorker(mr.address, w, MapFunc, ReduceFunc, 10, nil) 225 | i++ 226 | w = port("worker" + strconv.Itoa(i)) 227 | go RunWorker(mr.address, w, MapFunc, ReduceFunc, 10, nil) 228 | i++ 229 | time.Sleep(1 * time.Second) 230 | } 231 | } 232 | } 233 | -------------------------------------------------------------------------------- /src/mapreduce/worker.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | // 4 | // Please do not modify this file. 5 | // 6 | 7 | import ( 8 | "fmt" 9 | "log" 10 | "net" 11 | "net/rpc" 12 | "os" 13 | "sync" 14 | "time" 15 | ) 16 | 17 | // track whether workers executed in parallel. 18 | type Parallelism struct { 19 | mu sync.Mutex 20 | now int32 21 | max int32 22 | } 23 | 24 | // Worker holds the state for a server waiting for DoTask or Shutdown RPCs 25 | type Worker struct { 26 | sync.Mutex 27 | 28 | name string 29 | Map func(string, string) []KeyValue 30 | Reduce func(string, []string) string 31 | nRPC int // quit after this many RPCs; protected by mutex 32 | nTasks int // total tasks executed; protected by mutex 33 | concurrent int // number of parallel DoTasks in this worker; mutex 34 | l net.Listener 35 | parallelism *Parallelism 36 | } 37 | 38 | // DoTask is called by the master when a new task is being scheduled on this 39 | // worker. 40 | func (wk *Worker) DoTask(arg *DoTaskArgs, _ *struct{}) error { 41 | fmt.Printf("%s: given %v task #%d on file %s (nios: %d)\n", 42 | wk.name, arg.Phase, arg.TaskNumber, arg.File, arg.NumOtherPhase) 43 | 44 | wk.Lock() 45 | wk.nTasks += 1 46 | wk.concurrent += 1 47 | nc := wk.concurrent 48 | wk.Unlock() 49 | 50 | if nc > 1 { 51 | // schedule() should never issue more than one RPC at a 52 | // time to a given worker. 53 | log.Fatal("Worker.DoTask: more than one DoTask sent concurrently to a single worker\n") 54 | } 55 | 56 | pause := false 57 | if wk.parallelism != nil { 58 | wk.parallelism.mu.Lock() 59 | wk.parallelism.now += 1 60 | if wk.parallelism.now > wk.parallelism.max { 61 | wk.parallelism.max = wk.parallelism.now 62 | } 63 | if wk.parallelism.max < 2 { 64 | pause = true 65 | } 66 | wk.parallelism.mu.Unlock() 67 | } 68 | 69 | if pause { 70 | // give other workers a chance to prove that 71 | // they are executing in parallel. 72 | time.Sleep(time.Second) 73 | } 74 | 75 | switch arg.Phase { 76 | case mapPhase: 77 | doMap(arg.JobName, arg.TaskNumber, arg.File, arg.NumOtherPhase, wk.Map) 78 | case reducePhase: 79 | doReduce(arg.JobName, arg.TaskNumber, mergeName(arg.JobName, arg.TaskNumber), arg.NumOtherPhase, wk.Reduce) 80 | } 81 | 82 | wk.Lock() 83 | wk.concurrent -= 1 84 | wk.Unlock() 85 | 86 | if wk.parallelism != nil { 87 | wk.parallelism.mu.Lock() 88 | wk.parallelism.now -= 1 89 | wk.parallelism.mu.Unlock() 90 | } 91 | 92 | fmt.Printf("%s: %v task #%d done\n", wk.name, arg.Phase, arg.TaskNumber) 93 | return nil 94 | } 95 | 96 | // Shutdown is called by the master when all work has been completed. 97 | // We should respond with the number of tasks we have processed. 98 | func (wk *Worker) Shutdown(_ *struct{}, res *ShutdownReply) error { 99 | debug("Shutdown %s\n", wk.name) 100 | wk.Lock() 101 | defer wk.Unlock() 102 | res.Ntasks = wk.nTasks 103 | wk.nRPC = 1 104 | return nil 105 | } 106 | 107 | // Tell the master we exist and ready to work 108 | func (wk *Worker) register(master string) { 109 | args := new(RegisterArgs) 110 | args.Worker = wk.name 111 | ok := call(master, "Master.Register", args, new(struct{})) 112 | if ok == false { 113 | fmt.Printf("Register: RPC %s register error\n", master) 114 | } 115 | } 116 | 117 | // RunWorker sets up a connection with the master, registers its address, and 118 | // waits for tasks to be scheduled. 119 | func RunWorker(MasterAddress string, me string, 120 | MapFunc func(string, string) []KeyValue, 121 | ReduceFunc func(string, []string) string, 122 | nRPC int, parallelism *Parallelism, 123 | ) { 124 | debug("RunWorker %s\n", me) 125 | wk := new(Worker) 126 | wk.name = me 127 | wk.Map = MapFunc 128 | wk.Reduce = ReduceFunc 129 | wk.nRPC = nRPC 130 | wk.parallelism = parallelism 131 | rpcs := rpc.NewServer() 132 | rpcs.Register(wk) 133 | os.Remove(me) // only needed for "unix" 134 | l, e := net.Listen("unix", me) 135 | if e != nil { 136 | log.Fatal("RunWorker: worker ", me, " error: ", e) 137 | } 138 | wk.l = l 139 | wk.register(MasterAddress) 140 | 141 | // DON'T MODIFY CODE BELOW 142 | for { 143 | wk.Lock() 144 | if wk.nRPC == 0 { 145 | wk.Unlock() 146 | break 147 | } 148 | wk.Unlock() 149 | conn, err := wk.l.Accept() 150 | if err == nil { 151 | wk.Lock() 152 | wk.nRPC-- 153 | wk.Unlock() 154 | go rpcs.ServeConn(conn) 155 | } else { 156 | break 157 | } 158 | } 159 | wk.l.Close() 160 | debug("RunWorker %s exit\n", me) 161 | } 162 | -------------------------------------------------------------------------------- /src/raft/README.md: -------------------------------------------------------------------------------- 1 | # Lab2 Raft 2 | 3 | ## Lab Part 2A 4 | 5 | Implement leader election and heartbeats (AppendEntries RPCs with no log entries). The goal for Part 2A is for a single leader to be elected, for 6 | the leader to remain the leader if there are no failures, and for a new leader to take over if the old leader fails or if packets to/from the old leader 7 | are lost. Run go test -run 2A to test your 2A code. 8 | 9 | - raft.go 10 | 1. Init state is Follower 11 | 2. Three goroutines: checkHealthy & election & heartDaemon 12 | 3. checkHealthy is for Follower state, it checks whether leader is active 13 | 4. If leader inactive, then switch to Candidate, start election goroutine 14 | 5. RequestVotes in election routine. 15 | 6. Handle RequestVotes: if Candidate, switchToFollower; if Follower, check if 16 | had voted or same term; if Leader, check if stale leader. 17 | 7. heartbeat routine: send heartbeat periodly, if recv bigger term, switchToFollower. 18 | 19 | ## Lab Part 2B 20 | 21 | Implement the leader and follower code to append new log entries. This will involve implementing Start(), completing the AppendEntries RPC 22 | structs, sending them, fleshing out the AppendEntry RPC handler, and advancing the commitIndex at the leader. Your first goal should be to pass the 23 | TestBasicAgree() test (in test_test.go). Once you have that working, you should get all the 2B tests to pass (go test -run 2B). 24 | 25 | - raft.go 26 | 1. If recv AppendEntries with multiple new Entries, say 3 entries, and PrevLogIndex is 10, what if rf.log[PrevLogIndex + 0].Term == Entries[0].Term, 27 | But rf.log[PrevLogIndex + 1].Term != Entries[1].Term, is it possible? 28 | 2. A call to Start() at the leader starts the process of adding a new operation to the log; the leader sends the new operation to the other servers in AppendEntries RPCs. 29 | sends on the applyCh in a goroutine. 30 | 3. Pay attention that log index is 1-based in thesis. 31 | 32 | 目前2C的unreliable figure 8测试,约有1%的几率超时 33 | 通过比对正常日志和超时日志,正常日志中,投票被拒绝的记录约67个,异常日志中,该记录有160个: 34 | [me 0] RequestVoteReply from 2, term 123, grant false, votes 2 35 | 上面是异常的最后一条记录,term达到了123;而正常记录中,最大term只有80; 36 | 基本可以断定是没有pre-vote机制导致的,有空加上测试。目前平均100-200次失败一次 37 | 理论上pre vote之后,失败率可以降为0 38 | 但有个疑点是,并没有"bigger heartbeat reply term",也就是leader发送心跳,却收到失败回复,因为有更大的term在follower中。 39 | -------------------------------------------------------------------------------- /src/raft/lab2: -------------------------------------------------------------------------------- 1 | 2 | Implement Raft by adding code to raft/raft.go. In that file you'll find a bit of skeleton code, plus examples of how to send and receive RPCs. 3 | Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in raft.go. 4 | 5 | // create a new Raft server instance: 6 | rf := Make(peers, me, persister, applyCh) 7 | 8 | // start agreement on a new log entry: 9 | rf.Start(command interface{}) (index, term, isleader) 10 | 11 | // ask a Raft for its current term, and whether it thinks it is leader 12 | rf.GetState() (term, isLeader) 13 | 14 | // each time a new entry is committed to the log, each Raft peer 15 | // should send an ApplyMsg to the service (or tester). 16 | type ApplyMsg 17 | A service calls Make(peers,me,…) to create a Raft peer. The peers argument is an array of established RPC connections, one to each Raft peer (including this one). The me argument is the index of this peer in the peers array. Start(command) asks Raft to start the processing to append the command to the replicated log. Start() should return immediately, without waiting for this process to complete. The service expects your implementation to send an ApplyMsg for each new committed log entry to the applyCh argument to Make(). 18 | 19 | Your Raft peers should exchange RPCs using the labrpc Go package that we provide to you. It is modeled after Go's rpc library, but internally uses Go channels rather than sockets. raft.go contains some example code that sends an RPC (sendRequestVote()) and that handles an incoming RPC (RequestVote()). The reason you must use labrpc instead of Go's RPC package is that the tester tells labrpc to delay RPCs, re-order them, and delete them to simulate challenging network conditions under which your code should work correctly. Don't modify labrpc because we will test your code with the labrpc as handed out. 20 | 21 | This lab may be your first exposure to writing challenging concurrent code and your first implementation may not be clean enough that you can easily reason about its correctness. Give yourself enough time to rewrite your implementation so that you can easily reason about its correctness. Subsequent labs will build on this lab, so it is important to do a good job on your implementation. 22 | 23 | Part 2A 24 | 25 | Implement leader election and heartbeats (AppendEntries RPCs with no log entries). The goal for Part 2A is for a single leader to be elected, for the leader to remain the leader if there are no failures, and for a new leader to take over if the old leader fails or if packets to/from the old leader are lost. Run go test -run 2A to test your 2A code. 26 | 27 | Add any state you need to the Raft struct in raft.go. You'll also need to define a struct to hold information about each log entry. Your code should follow Figure 2 in the paper as closely as possible. 28 | Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names (e.g. fields of log records in an array). Forgetting to capitalize field names sent by RPC is the single most frequent source of bugs in these labs. 29 | Fill in the RequestVoteArgs and RequestVoteReply structs. Modify Make() to create a background goroutine that will kick off leader election periodically by sending out RequestVote RPCs when it hasn't heard from another peer for a while. This way a peer will learn who is the leader, if there is already a leader, or become the leader itself. Implement the RequestVote() RPC handler so that servers will vote for one another. 30 | To implement heartbeats, define an AppendEntries RPC struct (though you may not need all the arguments yet), and have the leader send them out periodically. Write an AppendEntries RPC handler method that resets the election timeout so that other servers don't step forward as leaders when one has already been elected. 31 | Make sure the election timeouts in different peers don't always fire at the same time, or else all peers will vote only for themselves and no one will become the leader. 32 | The tester requires that the leader send heartbeat RPCs no more than ten times per second. 33 | The tester requires your Raft to elect a new leader within five seconds of the failure of the old leader (if a majority of peers can still communicate). Remember, however, that leader election may require multiple rounds in case of a split vote (which can happen if packets are lost or if candidates unluckily choose the same random backoff times). You must pick election timeouts (and thus heartbeat intervals) that are short enough that it's very likely that an election will complete in less than five seconds even if it requires multiple rounds. 34 | The paper's Section 5.2 mentions election timeouts in the range of 150 to 300 milliseconds. Such a range only makes sense if the leader sends heartbeats considerably more often than once per 150 milliseconds. Because the tester limits you to 10 heartbeats per second, you will have to use an election timeout larger than the paper's 150 to 300 milliseconds, but not too large, because then you may fail to elect a leader within five seconds. 35 | You may find Go's time.Sleep() and rand useful. 36 | You'll need to write code that takes actions periodically or after delays in time. The easiest way to do this is to create a goroutine with a loop that calls time.Sleep(). 37 | If your code has trouble passing the tests, read the paper's Figure 2 again; the full logic for leader election is spread over multiple parts of the figure. 38 | A good way to debug your code is to insert print statements when a peer sends or receives a message, and collect the output in a file with go test -run 2A > out. Then, by studying the trace of messages in the out file, you can identify where your implementation deviates from the desired protocol. You might find DPrintf in util.go useful to turn printing on and off as you debug different problems. 39 | You should check your code with go test -race, and fix any races it reports. 40 | Be sure you pass the 2A tests before submitting Part 2A. Note that the 2A tests test the basic operation of leader election. Parts B and C will test leader election in more challenging settings and may expose bugs in your leader election code which the 2A tests miss. 41 | 42 | Part 2B 43 | 44 | We want Raft to keep a consistent, replicated log of operations. A call to Start() at the leader starts the process of adding a new operation to the log; the leader sends the new operation to the other servers in AppendEntries RPCs. 45 | Implement the leader and follower code to append new log entries. This will involve implementing Start(), completing the AppendEntries RPC structs, sending them, fleshing out the AppendEntry RPC handler, and advancing the commitIndex at the leader. Your first goal should be to pass the TestBasicAgree() test (in test_test.go). Once you have that working, you should get all the 2B tests to pass (go test -run 2B). 46 | 47 | You will need to implement the election restriction (section 5.4.1 in the paper). 48 | While the Raft leader is the only server that initiates appends of new entries to the log, all the servers need to independently give each newly committed entry to their local service replica (via their own applyCh). You should try to keep the goroutines that implement the Raft protocol as separate as possible from the code that sends committed log entries on the applyCh (e.g., by using a separate goroutine for delivering committed messages). If you don't separate these activities cleanly, then it is easy to create deadlocks, either in this lab or in subsequent labs in which you implement services that use your Raft package. Without a clean separation, a common deadlock scenario is as follows: an RPC handler sends on the applyCh but it blocks because no goroutine is reading from the channel (e.g., perhaps because it called Start()). Now, the RPC handler is blocked while holding the mutex on the Raft structure. The reading goroutine is also blocked on the mutex because Start() needs to acquire it. Furthermore, no other RPC handler that needs the lock on the Raft structure can run. 49 | Give yourself enough time to rewrite your implementation because only after writing a first implementation will you realize how to organize your code cleanly. For example, only after writing one implementation will you understand how to write an implementation that makes it easy to argue that your implementation has no deadlocks. 50 | Figure out the minimum number of messages Raft should use when reaching agreement in non-failure cases and make your implementation use that minimum. 51 | You may need to write code that waits for certain events to occur. Do not write loops that execute continuously without pausing, since that will slow your implementation enough that it fails tests. You can wait efficiently with Go's channels, or Go's condition variables, or (if all else fails) by inserting a time.Sleep(10 * time.Millisecond) in each loop iteration. 52 | Be sure you pass the 2A and 2B tests before submitting Part 2B. 53 | 54 | Part 2C 55 | 56 | If a Raft-based server reboots it should resume service where it left off. This requires that Raft keep persistent state that survives a reboot. The paper's Figure 2 mentions which state should be persistent, and raft.go contains examples of how to save and restore persistent state. 57 | 58 | A “real” implementation would do this by writing Raft's persistent state to disk each time it changes, and reading the latest saved state from disk when restarting after a reboot. Your implementation won't use the disk; instead, it will save and restore persistent state from a Persister object (see persister.go). Whoever calls Raft.Make() supplies a Persister that initially holds Raft's most recently persisted state (if any). Raft should initialize its state from that Persister, and should use it to save its persistent state each time the state changes. Use the Persister's ReadRaftState() and SaveRaftState() methods. 59 | 60 | Implement persistence by first adding code that saves and restores persistent state to persist() and readPersist() in raft.go. You will need to encode (or "serialize") the state as an array of bytes in order to pass it to the Persister. Use Go's gob encoder to do this; see the comments in persist() and readPersist(). 61 | 62 | You now need to determine at what points in the Raft protocol your servers are required to persist their state, and insert calls to persist() in those places. You must also load persisted state in Raft.Make(). Once you've done this, you should pass the remaining tests. You may want to first try to pass the "basic persistence" test (go test -run 'TestPersist12C'), and then tackle the remaining ones (go test -run 2C). 63 | 64 | In order to avoid running out of memory, Raft must periodically discard old log entries, but you do not have to worry about this until the next lab. 65 | 66 | Many of the 2C tests involve servers failing and the network losing RPC requests or replies. 67 | The Go gob encoder you'll use to encode persistent state only saves fields whose names start with upper case letters. Using small caps for field names is a common source of mysterious bugs, since Go doesn't warn you that they won't be saved. 68 | In order to pass some of the challenging tests towards the end, such as those marked "unreliable", you will need to implement the optimization to allow a follower to back up the leader's nextIndex by more than one entry at a time. See the description in the extended Raft paper starting at the bottom of page 7 and top of page 8 (marked by a gray line). The paper is vague about the details; you will need to fill in the gaps, perhaps with the help of the 6.824 Raft lectures. 69 | Be sure you pass all the tests before submitting Part 2C. 70 | 71 | -------------------------------------------------------------------------------- /src/raft/persister.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | // 4 | // support for Raft and kvraft to save persistent 5 | // Raft state (log &c) and k/v server snapshots. 6 | // 7 | // we will use the original persister.go to test your code for grading. 8 | // so, while you can modify this code to help you debug, please 9 | // test with the original before submitting. 10 | // 11 | 12 | import "sync" 13 | 14 | type Persister struct { 15 | mu sync.Mutex 16 | raftstate []byte 17 | snapshot []byte 18 | } 19 | 20 | func MakePersister() *Persister { 21 | return &Persister{} 22 | } 23 | 24 | func (ps *Persister) Copy() *Persister { 25 | ps.mu.Lock() 26 | defer ps.mu.Unlock() 27 | np := MakePersister() 28 | np.raftstate = ps.raftstate 29 | np.snapshot = ps.snapshot 30 | return np 31 | } 32 | 33 | func (ps *Persister) SaveRaftState(state []byte) { 34 | ps.mu.Lock() 35 | defer ps.mu.Unlock() 36 | ps.raftstate = state 37 | } 38 | 39 | func (ps *Persister) ReadRaftState() []byte { 40 | ps.mu.Lock() 41 | defer ps.mu.Unlock() 42 | return ps.raftstate 43 | } 44 | 45 | func (ps *Persister) RaftStateSize() int { 46 | ps.mu.Lock() 47 | defer ps.mu.Unlock() 48 | return len(ps.raftstate) 49 | } 50 | 51 | // Save both Raft state and K/V snapshot as a single atomic action, 52 | // to help avoid them getting out of sync. 53 | func (ps *Persister) SaveStateAndSnapshot(state []byte, snapshot []byte) { 54 | ps.mu.Lock() 55 | defer ps.mu.Unlock() 56 | ps.raftstate = state 57 | ps.snapshot = snapshot 58 | } 59 | 60 | func (ps *Persister) ReadSnapshot() []byte { 61 | ps.mu.Lock() 62 | defer ps.mu.Unlock() 63 | return ps.snapshot 64 | } 65 | 66 | func (ps *Persister) SnapshotSize() int { 67 | ps.mu.Lock() 68 | defer ps.mu.Unlock() 69 | return len(ps.snapshot) 70 | } 71 | -------------------------------------------------------------------------------- /src/raft/util.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import "log" 4 | 5 | // Debugging 6 | const Debug = 0 7 | 8 | func init() { 9 | log.SetFlags(log.Lmicroseconds) 10 | } 11 | 12 | func DPrintf(format string, a ...interface{}) (n int, err error) { 13 | if Debug > 0 { 14 | log.Printf(format, a...) 15 | } 16 | return 17 | } 18 | -------------------------------------------------------------------------------- /src/shardkv/client.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | // 4 | // client code to talk to a sharded key/value service. 5 | // 6 | // the client first talks to the shardmaster to find out 7 | // the assignment of shards (keys) to groups, and then 8 | // talks to the group that holds the key's shard. 9 | // 10 | 11 | import "labrpc" 12 | import "crypto/rand" 13 | import "math/big" 14 | import "shardmaster" 15 | import "time" 16 | import "sync/atomic" 17 | 18 | var clientIdGen = int32(0) 19 | 20 | const maxTry = 3 21 | 22 | // 23 | // which shard is a key in? 24 | // please use this function, 25 | // and please do not change it. 26 | // 27 | func key2shard(key string) int { 28 | shard := 0 29 | if len(key) > 0 { 30 | shard = int(key[0]) 31 | } 32 | shard %= shardmaster.NShards 33 | return shard 34 | } 35 | 36 | func nrand() int64 { 37 | max := big.NewInt(int64(1) << 62) 38 | bigx, _ := rand.Int(rand.Reader, max) 39 | x := bigx.Int64() 40 | return x 41 | } 42 | 43 | type Clerk struct { 44 | sm *shardmaster.Clerk // client to shardMaster 45 | config shardmaster.Config 46 | make_end func(string) *labrpc.ClientEnd 47 | // You will have to modify this struct. 48 | 49 | fail int // successive fail calls for leader 50 | clientId int32 // client id, init by clientIdGen 51 | reqId int64 // req id 52 | } 53 | 54 | // 55 | // the tester calls MakeClerk. 56 | // 57 | // masters[] is needed to call shardmaster.MakeClerk(). 58 | // 59 | // make_end(servername) turns a server name from a 60 | // Config.Groups[gid][i] into a labrpc.ClientEnd on which you can 61 | // send RPCs. 62 | // 63 | func MakeClerk(masters []*labrpc.ClientEnd, make_end func(string) *labrpc.ClientEnd) *Clerk { 64 | ck := new(Clerk) 65 | ck.sm = shardmaster.MakeClerk(masters) 66 | ck.make_end = make_end 67 | // You'll have to add code here. 68 | ck.clientId = atomic.AddInt32(&clientIdGen, 1) 69 | ck.reqId = 1 70 | 71 | // init config first 72 | ck.config = ck.sm.Query(-1) 73 | return ck 74 | } 75 | 76 | // 77 | // fetch the current value for a key. 78 | // returns "" if the key does not exist. 79 | // keeps trying forever in the face of all other errors. 80 | // You will have to modify this function. 81 | // 82 | func (ck *Clerk) Get(key string) string { 83 | args := GetArgs{} 84 | args.Key = key 85 | args.ReqID = ck.reqId 86 | ck.reqId++ 87 | args.ID = ck.clientId 88 | 89 | shard := key2shard(key) 90 | for { 91 | gid := ck.config.Shards[shard] 92 | if servers, ok := ck.config.Groups[gid]; ok { 93 | // try each server for the shard. 94 | for si := 0; si < len(servers); si++ { 95 | srv := ck.make_end(servers[si]) 96 | 97 | // start rpc call to server 98 | var reply GetReply 99 | done := make(chan bool) 100 | go func() { 101 | ok := srv.Call("ShardKV.Get", &args, &reply) 102 | done <- ok 103 | }() 104 | 105 | // wait rpc response 106 | ok := true 107 | timeout := false 108 | select { 109 | case <-time.After(RpcTimeout): 110 | timeout = true 111 | 112 | case ok = <-done: 113 | } 114 | //close(done) 115 | 116 | if !timeout && ok && !reply.WrongLeader && (reply.Err == "" || reply.Err == OK || reply.Err == ErrNoKey) { 117 | DPrintf("[client %d] succ GET: %s = %s", ck.clientId, key, reply.Value) 118 | return reply.Value 119 | } 120 | 121 | if ok && reply.Err == ErrWrongGroup { 122 | break 123 | } 124 | 125 | // not leader or timeout, try next server in this replica 126 | } 127 | } 128 | 129 | time.Sleep(100 * time.Millisecond) 130 | // ask master for the latest configuration. 131 | ck.config = ck.sm.Query(-1) 132 | } 133 | 134 | return "" 135 | } 136 | 137 | // 138 | // shared by Put and Append. 139 | // You will have to modify this function. 140 | // 141 | func (ck *Clerk) PutAppend(key string, value string, op string) { 142 | args := PutAppendArgs{} 143 | args.Key = key 144 | args.Value = value 145 | args.Op = op 146 | 147 | args.ReqID = ck.reqId 148 | ck.reqId++ 149 | args.ID = ck.clientId 150 | 151 | shard := key2shard(key) 152 | for { 153 | gid := ck.config.Shards[shard] 154 | if servers, ok := ck.config.Groups[gid]; ok { 155 | // try each server for the shard. 156 | for si := 0; si < len(servers); si++ { 157 | srv := ck.make_end(servers[si]) 158 | 159 | // start rpc call to server 160 | var reply PutAppendReply 161 | done := make(chan bool) 162 | go func() { 163 | ok := srv.Call("ShardKV.PutAppend", &args, &reply) 164 | done <- ok 165 | }() 166 | 167 | // wait rpc response 168 | ok := true 169 | timeout := false 170 | select { 171 | case <-time.After(RpcTimeout): 172 | timeout = true 173 | 174 | case ok = <-done: 175 | } 176 | //close(done) 177 | 178 | if !timeout && ok && !reply.WrongLeader && (reply.Err == "" || reply.Err == OK) { 179 | DPrintf("[client %d] succ PutAppend: %s = %s, %v", ck.clientId, key, value, reply.Err) 180 | return 181 | } 182 | if ok && reply.Err == ErrWrongGroup { 183 | break 184 | } 185 | // not leader or timeout, try next server in this replica 186 | } 187 | } 188 | 189 | time.Sleep(100 * time.Millisecond) 190 | // ask master for the latest configuration. 191 | ck.config = ck.sm.Query(-1) 192 | } 193 | } 194 | 195 | func (ck *Clerk) Put(key string, value string) { 196 | ck.PutAppend(key, value, "Put") 197 | } 198 | func (ck *Clerk) Append(key string, value string) { 199 | ck.PutAppend(key, value, "Append") 200 | } 201 | 202 | -------------------------------------------------------------------------------- /src/shardkv/common.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | import "time" 4 | 5 | // 6 | // Sharded key/value server. 7 | // Lots of replica groups, each running op-at-a-time paxos. 8 | // Shardmaster decides which group serves each shard. 9 | // Shardmaster may change shard assignment from time to time. 10 | // 11 | // You will have to modify these definitions. 12 | // 13 | 14 | const ( 15 | OK = "OK" 16 | ErrNoKey = "ErrNoKey" 17 | ErrWrongGroup = "ErrWrongGroup" 18 | 19 | ErrNotLeader = "ErrNotLeader" 20 | ErrInvalidOp = "ErrInvalidOp" 21 | ErrDuplicateReq = "ErrDuplicateReq" 22 | ) 23 | 24 | type Err string 25 | 26 | // Put or Append 27 | type PutAppendArgs struct { 28 | // You'll have to add definitions here. 29 | Key string 30 | Value string 31 | Op string // "Put" or "Append" 32 | // You'll have to add definitions here. 33 | // Field names must start with capital letters, 34 | // otherwise RPC will break. 35 | ID int32 // client id 36 | ReqID int64 37 | } 38 | 39 | type PutAppendReply struct { 40 | WrongLeader bool 41 | Err Err 42 | 43 | ID int32 44 | RspID int64 45 | } 46 | 47 | type GetArgs struct { 48 | Key string 49 | // You'll have to add definitions here. 50 | ID int32 51 | ReqID int64 52 | } 53 | 54 | type GetReply struct { 55 | WrongLeader bool 56 | Err Err 57 | Value string 58 | 59 | ID int32 60 | RspID int64 61 | } 62 | 63 | const RpcTimeout time.Duration = 1000 * time.Millisecond 64 | 65 | -------------------------------------------------------------------------------- /src/shardkv/config.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | import "shardmaster" 4 | import "labrpc" 5 | import "testing" 6 | import "os" 7 | 8 | // import "log" 9 | import crand "crypto/rand" 10 | import "math/big" 11 | import "math/rand" 12 | import "encoding/base64" 13 | import "sync" 14 | import "runtime" 15 | import "raft" 16 | import "strconv" 17 | import "fmt" 18 | import "time" 19 | 20 | func randstring(n int) string { 21 | b := make([]byte, 2*n) 22 | crand.Read(b) 23 | s := base64.URLEncoding.EncodeToString(b) 24 | return s[0:n] 25 | } 26 | 27 | func makeSeed() int64 { 28 | max := big.NewInt(int64(1) << 62) 29 | bigx, _ := crand.Int(crand.Reader, max) 30 | x := bigx.Int64() 31 | return x 32 | } 33 | 34 | // Randomize server handles 35 | func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd { 36 | sa := make([]*labrpc.ClientEnd, len(kvh)) 37 | copy(sa, kvh) 38 | for i := range sa { 39 | j := rand.Intn(i + 1) 40 | sa[i], sa[j] = sa[j], sa[i] 41 | } 42 | return sa 43 | } 44 | 45 | type group struct { 46 | gid int 47 | servers []*ShardKV 48 | saved []*raft.Persister 49 | endnames [][]string 50 | mendnames [][]string 51 | } 52 | 53 | type config struct { 54 | mu sync.Mutex 55 | t *testing.T 56 | net *labrpc.Network 57 | start time.Time // time at which make_config() was called 58 | 59 | nmasters int 60 | masterservers []*shardmaster.ShardMaster 61 | mck *shardmaster.Clerk 62 | 63 | ngroups int 64 | n int // servers per k/v group 65 | groups []*group 66 | 67 | clerks map[*Clerk][]string 68 | nextClientId int 69 | maxraftstate int 70 | } 71 | 72 | func (cfg *config) checkTimeout() { 73 | // enforce a two minute real-time limit on each test 74 | if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second { 75 | cfg.t.Fatal("test took longer than 120 seconds") 76 | } 77 | } 78 | 79 | func (cfg *config) cleanup() { 80 | for gi := 0; gi < cfg.ngroups; gi++ { 81 | cfg.ShutdownGroup(gi) 82 | } 83 | cfg.net.Cleanup() 84 | cfg.checkTimeout() 85 | } 86 | 87 | // check that no server's log is too big. 88 | func (cfg *config) checklogs() { 89 | for gi := 0; gi < cfg.ngroups; gi++ { 90 | for i := 0; i < cfg.n; i++ { 91 | raft := cfg.groups[gi].saved[i].RaftStateSize() 92 | snap := len(cfg.groups[gi].saved[i].ReadSnapshot()) 93 | if cfg.maxraftstate >= 0 && raft > 2*cfg.maxraftstate { 94 | cfg.t.Fatalf("persister.RaftStateSize() %v, but maxraftstate %v", 95 | raft, cfg.maxraftstate) 96 | } 97 | if cfg.maxraftstate < 0 && snap > 0 { 98 | cfg.t.Fatalf("maxraftstate is -1, but snapshot is non-empty!") 99 | } 100 | } 101 | } 102 | } 103 | 104 | // master server name for labrpc. 105 | func (cfg *config) mastername(i int) string { 106 | return "master" + strconv.Itoa(i) 107 | } 108 | 109 | // shard server name for labrpc. 110 | // i'th server of group gid. 111 | func (cfg *config) servername(gid int, i int) string { 112 | return "server-" + strconv.Itoa(gid) + "-" + strconv.Itoa(i) 113 | } 114 | 115 | func (cfg *config) makeClient() *Clerk { 116 | cfg.mu.Lock() 117 | defer cfg.mu.Unlock() 118 | 119 | // ClientEnds to talk to master service. 120 | ends := make([]*labrpc.ClientEnd, cfg.nmasters) 121 | endnames := make([]string, cfg.n) 122 | for j := 0; j < cfg.nmasters; j++ { 123 | endnames[j] = randstring(20) 124 | ends[j] = cfg.net.MakeEnd(endnames[j]) 125 | cfg.net.Connect(endnames[j], cfg.mastername(j)) 126 | cfg.net.Enable(endnames[j], true) 127 | } 128 | 129 | ck := MakeClerk(ends, func(servername string) *labrpc.ClientEnd { 130 | name := randstring(20) 131 | end := cfg.net.MakeEnd(name) 132 | cfg.net.Connect(name, servername) 133 | cfg.net.Enable(name, true) 134 | return end 135 | }) 136 | cfg.clerks[ck] = endnames 137 | cfg.nextClientId++ 138 | return ck 139 | } 140 | 141 | func (cfg *config) deleteClient(ck *Clerk) { 142 | cfg.mu.Lock() 143 | defer cfg.mu.Unlock() 144 | 145 | v := cfg.clerks[ck] 146 | for i := 0; i < len(v); i++ { 147 | os.Remove(v[i]) 148 | } 149 | delete(cfg.clerks, ck) 150 | } 151 | 152 | // Shutdown i'th server of gi'th group, by isolating it 153 | func (cfg *config) ShutdownServer(gi int, i int) { 154 | cfg.mu.Lock() 155 | defer cfg.mu.Unlock() 156 | 157 | gg := cfg.groups[gi] 158 | 159 | // prevent this server from sending 160 | for j := 0; j < len(gg.servers); j++ { 161 | name := gg.endnames[i][j] 162 | cfg.net.Enable(name, false) 163 | } 164 | for j := 0; j < len(gg.mendnames[i]); j++ { 165 | name := gg.mendnames[i][j] 166 | cfg.net.Enable(name, false) 167 | } 168 | 169 | // disable client connections to the server. 170 | // it's important to do this before creating 171 | // the new Persister in saved[i], to avoid 172 | // the possibility of the server returning a 173 | // positive reply to an Append but persisting 174 | // the result in the superseded Persister. 175 | cfg.net.DeleteServer(cfg.servername(gg.gid, i)) 176 | 177 | // a fresh persister, in case old instance 178 | // continues to update the Persister. 179 | // but copy old persister's content so that we always 180 | // pass Make() the last persisted state. 181 | if gg.saved[i] != nil { 182 | gg.saved[i] = gg.saved[i].Copy() 183 | } 184 | 185 | kv := gg.servers[i] 186 | if kv != nil { 187 | cfg.mu.Unlock() 188 | kv.Kill() 189 | cfg.mu.Lock() 190 | gg.servers[i] = nil 191 | } 192 | } 193 | 194 | func (cfg *config) ShutdownGroup(gi int) { 195 | for i := 0; i < cfg.n; i++ { 196 | cfg.ShutdownServer(gi, i) 197 | } 198 | } 199 | 200 | // start i'th server in gi'th group 201 | func (cfg *config) StartServer(gi int, i int) { 202 | cfg.mu.Lock() 203 | 204 | gg := cfg.groups[gi] 205 | 206 | // a fresh set of outgoing ClientEnd names 207 | // to talk to other servers in this group. 208 | gg.endnames[i] = make([]string, cfg.n) 209 | for j := 0; j < cfg.n; j++ { 210 | gg.endnames[i][j] = randstring(20) 211 | } 212 | 213 | // and the connections to other servers in this group. 214 | ends := make([]*labrpc.ClientEnd, cfg.n) 215 | for j := 0; j < cfg.n; j++ { 216 | ends[j] = cfg.net.MakeEnd(gg.endnames[i][j]) 217 | cfg.net.Connect(gg.endnames[i][j], cfg.servername(gg.gid, j)) 218 | cfg.net.Enable(gg.endnames[i][j], true) 219 | } 220 | 221 | // ends to talk to shardmaster service 222 | mends := make([]*labrpc.ClientEnd, cfg.nmasters) 223 | gg.mendnames[i] = make([]string, cfg.nmasters) 224 | for j := 0; j < cfg.nmasters; j++ { 225 | gg.mendnames[i][j] = randstring(20) 226 | mends[j] = cfg.net.MakeEnd(gg.mendnames[i][j]) 227 | cfg.net.Connect(gg.mendnames[i][j], cfg.mastername(j)) 228 | cfg.net.Enable(gg.mendnames[i][j], true) 229 | } 230 | 231 | // a fresh persister, so old instance doesn't overwrite 232 | // new instance's persisted state. 233 | // give the fresh persister a copy of the old persister's 234 | // state, so that the spec is that we pass StartKVServer() 235 | // the last persisted state. 236 | if gg.saved[i] != nil { 237 | gg.saved[i] = gg.saved[i].Copy() 238 | } else { 239 | gg.saved[i] = raft.MakePersister() 240 | } 241 | cfg.mu.Unlock() 242 | 243 | gg.servers[i] = StartServer(ends, i, gg.saved[i], cfg.maxraftstate, 244 | gg.gid, mends, 245 | func(servername string) *labrpc.ClientEnd { 246 | name := randstring(20) 247 | end := cfg.net.MakeEnd(name) 248 | cfg.net.Connect(name, servername) 249 | cfg.net.Enable(name, true) 250 | return end 251 | }) 252 | 253 | kvsvc := labrpc.MakeService(gg.servers[i]) 254 | rfsvc := labrpc.MakeService(gg.servers[i].rf) 255 | srv := labrpc.MakeServer() 256 | srv.AddService(kvsvc) 257 | srv.AddService(rfsvc) 258 | cfg.net.AddServer(cfg.servername(gg.gid, i), srv) 259 | } 260 | 261 | func (cfg *config) StartGroup(gi int) { 262 | for i := 0; i < cfg.n; i++ { 263 | cfg.StartServer(gi, i) 264 | } 265 | } 266 | 267 | func (cfg *config) StartMasterServer(i int) { 268 | // ClientEnds to talk to other master replicas. 269 | ends := make([]*labrpc.ClientEnd, cfg.nmasters) 270 | for j := 0; j < cfg.nmasters; j++ { 271 | endname := randstring(20) 272 | ends[j] = cfg.net.MakeEnd(endname) 273 | cfg.net.Connect(endname, cfg.mastername(j)) 274 | cfg.net.Enable(endname, true) 275 | } 276 | 277 | p := raft.MakePersister() 278 | 279 | cfg.masterservers[i] = shardmaster.StartServer(ends, i, p) 280 | 281 | msvc := labrpc.MakeService(cfg.masterservers[i]) 282 | rfsvc := labrpc.MakeService(cfg.masterservers[i].Raft()) 283 | srv := labrpc.MakeServer() 284 | srv.AddService(msvc) 285 | srv.AddService(rfsvc) 286 | cfg.net.AddServer(cfg.mastername(i), srv) 287 | } 288 | 289 | func (cfg *config) shardclerk() *shardmaster.Clerk { 290 | // ClientEnds to talk to master service. 291 | ends := make([]*labrpc.ClientEnd, cfg.nmasters) 292 | for j := 0; j < cfg.nmasters; j++ { 293 | name := randstring(20) 294 | ends[j] = cfg.net.MakeEnd(name) 295 | cfg.net.Connect(name, cfg.mastername(j)) 296 | cfg.net.Enable(name, true) 297 | } 298 | 299 | return shardmaster.MakeClerk(ends) 300 | } 301 | 302 | // tell the shardmaster that a group is joining. 303 | func (cfg *config) join(gi int) { 304 | cfg.joinm([]int{gi}) 305 | } 306 | 307 | func (cfg *config) joinm(gis []int) { 308 | m := make(map[int][]string, len(gis)) 309 | for _, g := range gis { 310 | gid := cfg.groups[g].gid 311 | servernames := make([]string, cfg.n) 312 | for i := 0; i < cfg.n; i++ { 313 | servernames[i] = cfg.servername(gid, i) 314 | } 315 | m[gid] = servernames 316 | } 317 | cfg.mck.Join(m) 318 | } 319 | 320 | // tell the shardmaster that a group is leaving. 321 | func (cfg *config) leave(gi int) { 322 | cfg.leavem([]int{gi}) 323 | } 324 | 325 | func (cfg *config) leavem(gis []int) { 326 | gids := make([]int, 0, len(gis)) 327 | for _, g := range gis { 328 | gids = append(gids, cfg.groups[g].gid) 329 | } 330 | cfg.mck.Leave(gids) 331 | } 332 | 333 | var ncpu_once sync.Once 334 | 335 | func make_config(t *testing.T, n int, unreliable bool, maxraftstate int) *config { 336 | ncpu_once.Do(func() { 337 | if runtime.NumCPU() < 2 { 338 | fmt.Printf("warning: only one CPU, which may conceal locking bugs\n") 339 | } 340 | rand.Seed(makeSeed()) 341 | }) 342 | runtime.GOMAXPROCS(4) 343 | cfg := &config{} 344 | cfg.t = t 345 | cfg.maxraftstate = maxraftstate 346 | cfg.net = labrpc.MakeNetwork() 347 | cfg.start = time.Now() 348 | 349 | // master 350 | cfg.nmasters = 3 351 | cfg.masterservers = make([]*shardmaster.ShardMaster, cfg.nmasters) 352 | for i := 0; i < cfg.nmasters; i++ { 353 | cfg.StartMasterServer(i) 354 | } 355 | cfg.mck = cfg.shardclerk() 356 | 357 | cfg.ngroups = 3 358 | cfg.groups = make([]*group, cfg.ngroups) 359 | cfg.n = n 360 | for gi := 0; gi < cfg.ngroups; gi++ { 361 | gg := &group{} 362 | cfg.groups[gi] = gg 363 | gg.gid = 100 + gi 364 | gg.servers = make([]*ShardKV, cfg.n) 365 | gg.saved = make([]*raft.Persister, cfg.n) 366 | gg.endnames = make([][]string, cfg.n) 367 | gg.mendnames = make([][]string, cfg.nmasters) 368 | for i := 0; i < cfg.n; i++ { 369 | cfg.StartServer(gi, i) 370 | } 371 | } 372 | 373 | cfg.clerks = make(map[*Clerk][]string) 374 | cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid 375 | 376 | cfg.net.Reliable(!unreliable) 377 | 378 | return cfg 379 | } 380 | -------------------------------------------------------------------------------- /src/shardkv/server.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | 4 | import "shardmaster" 5 | import "labrpc" 6 | import "raft" 7 | import "sync" 8 | import "labgob" 9 | import "log" 10 | import "time" 11 | 12 | const Debug = 0 13 | 14 | func DPrintf(format string, a ...interface{}) (n int, err error) { 15 | if Debug > 0 { 16 | log.Printf(format, a...) 17 | } 18 | return 19 | } 20 | 21 | 22 | // real Command 23 | type Op struct { 24 | // Your definitions here. 25 | // Field names must start with capital letters, 26 | // otherwise RPC will break. 27 | Key string 28 | Value string 29 | Operation string // Get Put or Append 30 | 31 | //Request context 32 | ID int32 33 | ReqID int64 34 | } 35 | 36 | type ShardInfo struct { 37 | shardID int 38 | // real kv data here 39 | data map[string]string 40 | // request records 41 | requests map[int32]int64 // client -> last commited reqID 42 | } 43 | 44 | type ShardKV struct { 45 | mu sync.Mutex 46 | me int 47 | rf *raft.Raft 48 | applyCh chan raft.ApplyMsg 49 | make_end func(string) *labrpc.ClientEnd 50 | gid int 51 | masters []*labrpc.ClientEnd 52 | maxraftstate int // snapshot if log grows this big 53 | 54 | // Your definitions here. 55 | // shards and data 56 | data map[int]*ShardInfo // shard id --> ShardInfo 57 | 58 | sm *shardmaster.Clerk // client to shardMaster 59 | config shardmaster.Config 60 | // Notify chan for each log index 61 | notifyCh map[int]chan Op 62 | // for exit 63 | shutdown chan interface{} 64 | } 65 | 66 | // check if repeated request 67 | func (kv *ShardKV) isDuplicated(shard int, id int32, reqId int64) bool { 68 | kv.mu.Lock() 69 | defer kv.mu.Unlock() 70 | maxSeenReqId, ok := kv.data[shard].requests[id] 71 | if ok { 72 | return reqId <= maxSeenReqId 73 | } 74 | return false 75 | } 76 | 77 | // true if update success, imply nonrepeat request can be applied to state machine: eg, data field 78 | func (kv *ShardKV) updateIfNotDuplicated(shard int, id int32, reqId int64) bool { 79 | // must hold lock outside 80 | maxSeenReqId, ok := kv.data[shard].requests[id] 81 | if ok { 82 | if reqId <= maxSeenReqId { 83 | return false 84 | } 85 | } 86 | 87 | kv.data[shard].requests[id] = reqId 88 | return true 89 | } 90 | 91 | 92 | // call raft.Start to commit a command as log entry 93 | func (kv *ShardKV) proposeCommand(cmd Op) bool { 94 | kv.mu.Lock() 95 | // lock kv first, think about: 96 | // If no lock with rf.Start, raft maybe very quick to agree. 97 | // Then applyRoutine will not find notifyCh on log index, 98 | // proposeCommand will block on notifyCh forever. 99 | logIndex, _, isLeader := kv.rf.Start(cmd) 100 | if !isLeader { 101 | kv.mu.Unlock() 102 | return false 103 | } 104 | 105 | // wait command to be commited 106 | 107 | // use logIndex because all servers agree on same log index 108 | ch, ok := kv.notifyCh[logIndex] 109 | if !ok { 110 | ch = make(chan Op, 1) 111 | kv.notifyCh[logIndex] = ch 112 | } 113 | kv.mu.Unlock() 114 | 115 | // check 116 | if ch == nil { 117 | panic("FATAL: chan is nil") 118 | } 119 | 120 | // wait on ch forever, because: 121 | // If I lose leadership before commit, may be partioned 122 | // I can't response, so wait until partion healed. 123 | // Eventually a log will be commited on index, then I'm 124 | // awaken, but cmd1 is different from cmd, return failed 125 | // to client. 126 | // If client retry another leader when I waiting, no matter. 127 | select { 128 | case cmd1 := <-ch: 129 | return cmd1 == cmd // if different log, me is not leader 130 | } 131 | 132 | return false 133 | } 134 | 135 | 136 | func (kv* ShardKV) checkGroup(key string) bool { 137 | kv.mu.Lock() 138 | defer kv.mu.Unlock() 139 | shard := key2shard(key) 140 | if len(kv.config.Shards) <= shard { 141 | return false 142 | } 143 | 144 | expectGid := kv.config.Shards[shard] 145 | return expectGid == kv.gid 146 | } 147 | 148 | func (kv *ShardKV) Get(args *GetArgs, reply *GetReply) { 149 | // Your code here. 150 | DPrintf("[server %d] GetRPC args %v", kv.me, args) 151 | reply.WrongLeader = false 152 | reply.Err = OK 153 | reply.ID = args.ID 154 | reply.RspID = args.ReqID 155 | 156 | // check if wrong group 157 | if !kv.checkGroup(args.Key) { 158 | DPrintf("[server %d] GetRPC wrong group", kv.me) 159 | reply.Err = ErrWrongGroup 160 | return 161 | } 162 | 163 | // check if leader, useless but efficient 164 | _, isLeader := kv.rf.GetState() 165 | if !isLeader { 166 | reply.WrongLeader = true 167 | reply.Err = ErrNotLeader 168 | return 169 | } 170 | 171 | // check if repeated request, useless but efficient 172 | duplicate := kv.isDuplicated(key2shard(args.Key), args.ID, args.ReqID) 173 | if duplicate { 174 | reply.Err = ErrDuplicateReq 175 | return 176 | } 177 | 178 | cmd := Op{} 179 | cmd.Key = args.Key 180 | cmd.Value = "" // no use for Get 181 | cmd.Operation = "Get" 182 | cmd.ID = args.ID 183 | cmd.ReqID = args.ReqID 184 | 185 | // try commit cmd to raft log 186 | succ := kv.proposeCommand(cmd) 187 | if succ { 188 | shard := key2shard(args.Key) 189 | kv.mu.Lock() 190 | if v, ok := kv.data[shard].data[args.Key]; ok { 191 | reply.Value = v 192 | } else { 193 | reply.Value = "" 194 | reply.Err = ErrNoKey 195 | } 196 | kv.mu.Unlock() 197 | } else { 198 | reply.WrongLeader = true 199 | reply.Err = ErrNotLeader 200 | } 201 | } 202 | 203 | func (kv *ShardKV) PutAppend(args *PutAppendArgs, reply *PutAppendReply) { 204 | // Your code here. 205 | DPrintf("[server %d] PutAppendRPC args %v", kv.me, args) 206 | reply.WrongLeader = false 207 | reply.Err = OK 208 | reply.ID = args.ID 209 | reply.RspID = args.ReqID 210 | 211 | // check if wrong group 212 | if !kv.checkGroup(args.Key) { 213 | reply.Err = ErrWrongGroup 214 | return 215 | } 216 | 217 | // check if leader, useless but efficient 218 | _, isLeader := kv.rf.GetState() 219 | if !isLeader { 220 | reply.WrongLeader = true 221 | reply.Err = ErrNotLeader 222 | return 223 | } else { 224 | if args.Op != "Put" && args.Op != "Append" { 225 | reply.Err = ErrInvalidOp 226 | return 227 | } 228 | } 229 | 230 | // check if repeated request, useless but efficient 231 | duplicate := kv.isDuplicated(key2shard(args.Key), args.ID, args.ReqID) 232 | if duplicate { 233 | reply.Err = ErrDuplicateReq 234 | return 235 | } 236 | 237 | cmd := Op{} 238 | cmd.Key = args.Key 239 | cmd.Value = args.Value 240 | cmd.Operation = args.Op 241 | cmd.ID = args.ID 242 | cmd.ReqID = args.ReqID 243 | 244 | // try commit cmd to raft log 245 | succ := kv.proposeCommand(cmd) 246 | if !succ { 247 | reply.WrongLeader = true 248 | reply.Err = ErrNotLeader 249 | } 250 | } 251 | 252 | // 253 | // the tester calls Kill() when a ShardKV instance won't 254 | // be needed again. you are not required to do anything 255 | // in Kill(), but it might be convenient to (for example) 256 | // turn off debug output from this instance. 257 | // 258 | func (kv *ShardKV) Kill() { 259 | kv.rf.Kill() 260 | // Your code here, if desired. 261 | close(kv.shutdown) 262 | } 263 | 264 | // when raft commited a log entry, it'll notify me 265 | func (kv *ShardKV) applyRoutine() { 266 | for { 267 | var op Op 268 | var applyMsg raft.ApplyMsg 269 | 270 | select { 271 | case <-kv.shutdown: 272 | DPrintf("[server %d] shutdown applyRoutine", kv.me) 273 | return 274 | 275 | case applyMsg = <-kv.applyCh: 276 | } 277 | 278 | if !applyMsg.CommandValid { 279 | // TODO 280 | //kv.loadSnapshot(applyMsg.Snapshot) 281 | continue 282 | } 283 | 284 | op, _ = (applyMsg.Command).(Op) 285 | 286 | shard := key2shard(op.Key) 287 | kv.mu.Lock() 288 | // Follower & Leader: try apply to state machine, fail if duplicated request 289 | if op.Operation == "Put" { 290 | update := kv.updateIfNotDuplicated(shard, op.ID, op.ReqID) 291 | if update { 292 | kv.data[shard].data[op.Key] = op.Value 293 | DPrintf("[server %d] apply for client %d PUT key %s, value %s, logindex %d", kv.me, op.ID, op.Key, op.Value, applyMsg.CommandIndex) 294 | } 295 | } else if op.Operation == "Append" { 296 | update := kv.updateIfNotDuplicated(shard, op.ID, op.ReqID) 297 | if update { 298 | kv.data[shard].data[op.Key] += op.Value 299 | DPrintf("[server %d] apply for client %d APPEND key %s, value %s, logindex %d", kv.me, op.ID, op.Key, op.Value, applyMsg.CommandIndex) 300 | } 301 | } else { 302 | // Do nothing for Get, should I cached reply? 303 | var val = "" 304 | if v, ok := kv.data[shard].data[op.Key]; ok { 305 | val = v 306 | } 307 | DPrintf("[server %d] apply for client %d GET key %s, value %s, logindex %d", kv.me, op.ID, op.Key, val, applyMsg.CommandIndex) 308 | } 309 | 310 | ch, ok := kv.notifyCh[applyMsg.CommandIndex] 311 | if ok { 312 | ch <- op 313 | } 314 | 315 | /* 316 | if kv.maxraftstate > 0 && kv.rf.RaftStateSize() >= kv.maxraftstate { 317 | DPrintf("(%d) state size %d", kv.me, kv.rf.RaftStateSize()) 318 | // If I keep mu.Lock, the startSnapshot will use raft's lock 319 | // But raft's applyRoutine is keeping lock and apply msg, he will be blocking with held lock. 320 | //go kv.startSnapshot(applyMsg.CommandIndex) 321 | kv.startSnapshot(applyMsg.CommandIndex) 322 | } 323 | */ 324 | 325 | kv.mu.Unlock() 326 | } 327 | } 328 | 329 | // for snapshot 330 | 331 | // poll shardMaster 332 | func (kv *ShardKV) pollConfigRoutine() { 333 | timer := time.After(time.Duration(1) * time.Nanosecond) 334 | period := time.Duration(50) * time.Millisecond 335 | for { 336 | select { 337 | case <-kv.shutdown: 338 | return 339 | 340 | case <-timer: 341 | timer = time.After(period) 342 | } 343 | kv.mu.Lock() 344 | kv.config = kv.sm.Query(-1) 345 | //DPrintf("[server %d] config %v", kv.me, kv.config) 346 | kv.mu.Unlock() 347 | } 348 | } 349 | 350 | // 351 | // servers[] contains the ports of the servers in this group. 352 | // 353 | // me is the index of the current server in servers[]. 354 | // 355 | // the k/v server should store snapshots through the underlying Raft 356 | // implementation, which should call persister.SaveStateAndSnapshot() to 357 | // atomically save the Raft state along with the snapshot. 358 | // 359 | // the k/v server should snapshot when Raft's saved state exceeds 360 | // maxraftstate bytes, in order to allow Raft to garbage-collect its 361 | // log. if maxraftstate is -1, you don't need to snapshot. 362 | // 363 | // gid is this group's GID, for interacting with the shardmaster. 364 | // 365 | // pass masters[] to shardmaster.MakeClerk() so you can send 366 | // RPCs to the shardmaster. 367 | // 368 | // make_end(servername) turns a server name from a 369 | // Config.Groups[gid][i] into a labrpc.ClientEnd on which you can 370 | // send RPCs. You'll need this to send RPCs to other groups. 371 | // 372 | // look at client.go for examples of how to use masters[] 373 | // and make_end() to send RPCs to the group owning a specific shard. 374 | // 375 | // StartServer() must return quickly, so it should start goroutines 376 | // for any long-running work. 377 | // 378 | func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int, gid int, masters []*labrpc.ClientEnd, make_end func(string) *labrpc.ClientEnd) *ShardKV { 379 | DPrintf("[server %d] StartServer", me) 380 | // call labgob.Register on structures you want 381 | // Go's RPC library to marshall/unmarshall. 382 | labgob.Register(Op{}) 383 | 384 | kv := new(ShardKV) 385 | kv.me = me 386 | kv.maxraftstate = maxraftstate 387 | kv.make_end = make_end 388 | kv.gid = gid 389 | kv.masters = masters 390 | kv.sm = shardmaster.MakeClerk(kv.masters) 391 | 392 | // Your initialization code here. 393 | 394 | // Use something like this to talk to the shardmaster: 395 | // kv.mck = shardmaster.MakeClerk(kv.masters) 396 | 397 | kv.applyCh = make(chan raft.ApplyMsg) 398 | kv.rf = raft.Make(servers, me, persister, kv.applyCh) 399 | kv.shutdown = make(chan interface{}, 1) 400 | kv.notifyCh = make(map[int]chan Op) 401 | 402 | // init shard data 403 | kv.data = make(map[int]*ShardInfo) 404 | for i := 0; i < shardmaster.NShards; i++ { 405 | kv.data[i] = new(ShardInfo) 406 | kv.data[i].shardID = i 407 | kv.data[i].data = make(map[string]string) 408 | kv.data[i].requests = make(map[int32]int64) 409 | } 410 | 411 | go kv.pollConfigRoutine() 412 | //go kv.migrateRoutine() // when config changes, MakeClerk(), and send my data to dest gid 413 | // add a rpc interface for recv migrate data 414 | go kv.applyRoutine() 415 | 416 | return kv 417 | } 418 | -------------------------------------------------------------------------------- /src/shardmaster/client.go: -------------------------------------------------------------------------------- 1 | package shardmaster 2 | 3 | // 4 | // Shardmaster clerk. 5 | // 6 | 7 | import "labrpc" 8 | import "time" 9 | import "crypto/rand" 10 | import "math/big" 11 | import "sync/atomic" 12 | 13 | var clientIdGen = int32(0) 14 | 15 | const maxTry = 3 16 | 17 | type Clerk struct { 18 | servers []*labrpc.ClientEnd 19 | // Your data here. 20 | leader int // hint or probe, TODO: server no use this field 21 | fail int // successive fail calls for leader 22 | clientId int32 // client id, init by clientIdGen 23 | reqId int64 // req id 24 | } 25 | 26 | func nrand() int64 { 27 | max := big.NewInt(int64(1) << 62) 28 | bigx, _ := rand.Int(rand.Reader, max) 29 | x := bigx.Int64() 30 | return x 31 | } 32 | 33 | func MakeClerk(servers []*labrpc.ClientEnd) *Clerk { 34 | ck := new(Clerk) 35 | ck.servers = servers 36 | // Your code here. 37 | ck.leader = 0 38 | ck.clientId = atomic.AddInt32(&clientIdGen, 1) 39 | ck.reqId = 1 40 | return ck 41 | } 42 | 43 | func (ck *Clerk) Query(num int) Config { 44 | args := &QueryArgs{} 45 | // Your code here. 46 | args.Num = num 47 | fail := 0 48 | for { 49 | reply := new(QueryReply) 50 | var done = make(chan bool) 51 | go func(leader int) { 52 | ok := ck.servers[leader].Call("ShardMaster.Query", args, reply) 53 | done<-ok 54 | }(ck.leader) 55 | 56 | var ok = true 57 | var timeout = false 58 | select { 59 | case <-time.After(RpcTimeout): 60 | timeout = true 61 | 62 | case ok = <-done: 63 | } 64 | 65 | if !timeout && ok && !reply.WrongLeader { 66 | return reply.Config 67 | } else { 68 | fail++ 69 | if timeout || reply.WrongLeader || fail >= maxTry { 70 | fail = 0 71 | ck.leader++ 72 | if ck.leader >= len(ck.servers) { 73 | ck.leader = 0 74 | } 75 | } 76 | } 77 | 78 | time.Sleep(50 * time.Millisecond) 79 | if fail == 0 { 80 | DPrintf("[client %d] retry QUERY to another server %d\n", ck.clientId, ck.leader) 81 | } 82 | } 83 | } 84 | 85 | func (ck *Clerk) Join(servers map[int][]string) { 86 | args := &JoinArgs{} 87 | // Your code here. 88 | args.Servers = servers 89 | args.ReqID = ck.reqId 90 | ck.reqId++ 91 | args.ID = ck.clientId 92 | 93 | fail := 0 94 | for { 95 | reply := new(QueryReply) 96 | var done = make(chan bool) 97 | go func(leader int) { 98 | ok := ck.servers[leader].Call("ShardMaster.Join", args, reply) 99 | done<-ok 100 | }(ck.leader) 101 | 102 | var ok = true 103 | var timeout = false 104 | select { 105 | case <-time.After(RpcTimeout): 106 | timeout = true 107 | 108 | case ok = <-done: 109 | } 110 | 111 | if !timeout && ok && !reply.WrongLeader { 112 | return 113 | } else { 114 | fail++ 115 | if timeout || reply.WrongLeader || fail >= maxTry { 116 | fail = 0 117 | ck.leader++ 118 | if ck.leader >= len(ck.servers) { 119 | ck.leader = 0 120 | } 121 | } 122 | } 123 | 124 | time.Sleep(50 * time.Millisecond) 125 | if fail == 0 { 126 | DPrintf("[client %d] retry QUERY to another server %d\n", ck.clientId, ck.leader) 127 | } 128 | } 129 | } 130 | 131 | func (ck *Clerk) Leave(gids []int) { 132 | args := &LeaveArgs{} 133 | // Your code here. 134 | args.GIDs = gids 135 | args.ReqID = ck.reqId 136 | ck.reqId++ 137 | args.ID = ck.clientId 138 | 139 | fail := 0 140 | for { 141 | reply := new(QueryReply) 142 | var done = make(chan bool) 143 | go func(leader int) { 144 | ok := ck.servers[leader].Call("ShardMaster.Leave", args, reply) 145 | done<-ok 146 | }(ck.leader) 147 | 148 | var ok = true 149 | var timeout = false 150 | select { 151 | case <-time.After(RpcTimeout): 152 | timeout = true 153 | 154 | case ok = <-done: 155 | } 156 | 157 | if !timeout && ok && !reply.WrongLeader { 158 | return 159 | } else { 160 | fail++ 161 | if timeout || reply.WrongLeader || fail >= maxTry { 162 | fail = 0 163 | ck.leader++ 164 | if ck.leader >= len(ck.servers) { 165 | ck.leader = 0 166 | } 167 | } 168 | } 169 | 170 | time.Sleep(50 * time.Millisecond) 171 | if fail == 0 { 172 | DPrintf("[client %d] retry QUERY to another server %d\n", ck.clientId, ck.leader) 173 | } 174 | } 175 | } 176 | 177 | func (ck *Clerk) Move(shard int, gid int) { 178 | args := &MoveArgs{} 179 | // Your code here. 180 | args.Shard = shard 181 | args.GID = gid 182 | args.ReqID = ck.reqId 183 | ck.reqId++ 184 | args.ID = ck.clientId 185 | 186 | fail := 0 187 | for { 188 | reply := new(QueryReply) 189 | var done = make(chan bool) 190 | go func(leader int) { 191 | ok := ck.servers[leader].Call("ShardMaster.Move", args, reply) 192 | done<-ok 193 | }(ck.leader) 194 | 195 | var ok = true 196 | var timeout = false 197 | select { 198 | case <-time.After(RpcTimeout): 199 | timeout = true 200 | 201 | case ok = <-done: 202 | } 203 | 204 | if !timeout && ok && !reply.WrongLeader { 205 | return 206 | } else { 207 | fail++ 208 | if timeout || reply.WrongLeader || fail >= maxTry { 209 | fail = 0 210 | ck.leader++ 211 | if ck.leader >= len(ck.servers) { 212 | ck.leader = 0 213 | } 214 | } 215 | } 216 | 217 | time.Sleep(50 * time.Millisecond) 218 | if fail == 0 { 219 | DPrintf("[client %d] retry QUERY to another server %d\n", ck.clientId, ck.leader) 220 | } 221 | } 222 | } 223 | 224 | -------------------------------------------------------------------------------- /src/shardmaster/common.go: -------------------------------------------------------------------------------- 1 | package shardmaster 2 | 3 | import "time" 4 | import "strconv" 5 | 6 | // 7 | // Master shard server: assigns shards to replication groups. 8 | // 9 | // RPC interface: 10 | // Join(servers) -- add a set of groups (gid -> server-list mapping). 11 | // Leave(gids) -- delete a set of groups. 12 | // Move(shard, gid) -- hand off one shard from current owner to gid. 13 | // Query(num) -> fetch Config # num, or latest config if num==-1. 14 | // 15 | // A Config (configuration) describes a set of replica groups, and the 16 | // replica group responsible for each shard. Configs are numbered. Config 17 | // #0 is the initial configuration, with no groups and all shards 18 | // assigned to group 0 (the invalid group). 19 | // 20 | // You will need to add fields to the RPC argument structs. 21 | // 22 | 23 | // The number of shards. 24 | const NShards = 10 25 | 26 | // A configuration -- an assignment of shards to groups. 27 | // Please don't change this. 28 | type Config struct { 29 | Num int // config number 30 | Shards [NShards]int // shard -> gid 31 | Groups map[int][]string // gid -> servers[] 32 | } 33 | 34 | func (cfg *Config) String() string { 35 | s := "Num:" + strconv.Itoa(cfg.Num) + "\n" 36 | s += "shard->gid:\n" 37 | for shard, gid := range cfg.Shards { 38 | s += strconv.Itoa(shard) + "->" + strconv.Itoa(gid) + "\n" 39 | } 40 | s += "gid->nservers:\n" 41 | for gid, ss := range cfg.Groups { 42 | s += strconv.Itoa(gid) + " with nservers " + strconv.Itoa(len(ss)) + "\n" 43 | } 44 | return s 45 | } 46 | 47 | const ( 48 | OK = "OK" 49 | ErrNotLeader = "ErrNotLeader" 50 | ErrDuplicateReq = "ErrDuplicateReq" 51 | ) 52 | 53 | type Err string 54 | 55 | type JoinArgs struct { 56 | Servers map[int][]string // new GID -> servers mappings 57 | 58 | ID int32 // client id 59 | ReqID int64 60 | } 61 | 62 | type JoinReply struct { 63 | WrongLeader bool 64 | Err Err 65 | 66 | ID int32 // client id 67 | RspID int64 68 | } 69 | 70 | type LeaveArgs struct { 71 | GIDs []int 72 | 73 | ID int32 // client id 74 | ReqID int64 75 | } 76 | 77 | type LeaveReply struct { 78 | WrongLeader bool 79 | Err Err 80 | ID int32 // client id 81 | RspID int64 82 | } 83 | 84 | type MoveArgs struct { 85 | Shard int 86 | GID int 87 | 88 | ID int32 // client id 89 | ReqID int64 90 | } 91 | 92 | type MoveReply struct { 93 | WrongLeader bool 94 | Err Err 95 | 96 | ID int32 // client id 97 | RspID int64 98 | } 99 | 100 | type QueryArgs struct { 101 | Num int // desired config number 102 | } 103 | 104 | type QueryReply struct { 105 | WrongLeader bool 106 | Err Err 107 | Config Config 108 | } 109 | 110 | const RpcTimeout time.Duration = 1000 * time.Millisecond 111 | 112 | -------------------------------------------------------------------------------- /src/shardmaster/config.go: -------------------------------------------------------------------------------- 1 | package shardmaster 2 | 3 | import "labrpc" 4 | import "raft" 5 | import "testing" 6 | import "os" 7 | 8 | // import "log" 9 | import crand "crypto/rand" 10 | import "math/rand" 11 | import "encoding/base64" 12 | import "sync" 13 | import "runtime" 14 | import "time" 15 | 16 | func randstring(n int) string { 17 | b := make([]byte, 2*n) 18 | crand.Read(b) 19 | s := base64.URLEncoding.EncodeToString(b) 20 | return s[0:n] 21 | } 22 | 23 | // Randomize server handles 24 | func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd { 25 | sa := make([]*labrpc.ClientEnd, len(kvh)) 26 | copy(sa, kvh) 27 | for i := range sa { 28 | j := rand.Intn(i + 1) 29 | sa[i], sa[j] = sa[j], sa[i] 30 | } 31 | return sa 32 | } 33 | 34 | type config struct { 35 | mu sync.Mutex 36 | t *testing.T 37 | net *labrpc.Network 38 | n int 39 | servers []*ShardMaster 40 | saved []*raft.Persister 41 | endnames [][]string // names of each server's sending ClientEnds 42 | clerks map[*Clerk][]string 43 | nextClientId int 44 | start time.Time // time at which make_config() was called 45 | } 46 | 47 | func (cfg *config) checkTimeout() { 48 | // enforce a two minute real-time limit on each test 49 | if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second { 50 | cfg.t.Fatal("test took longer than 120 seconds") 51 | } 52 | } 53 | 54 | func (cfg *config) cleanup() { 55 | cfg.mu.Lock() 56 | defer cfg.mu.Unlock() 57 | for i := 0; i < len(cfg.servers); i++ { 58 | if cfg.servers[i] != nil { 59 | cfg.servers[i].Kill() 60 | } 61 | } 62 | cfg.net.Cleanup() 63 | cfg.checkTimeout() 64 | } 65 | 66 | // Maximum log size across all servers 67 | func (cfg *config) LogSize() int { 68 | logsize := 0 69 | for i := 0; i < cfg.n; i++ { 70 | n := cfg.saved[i].RaftStateSize() 71 | if n > logsize { 72 | logsize = n 73 | } 74 | } 75 | return logsize 76 | } 77 | 78 | // attach server i to servers listed in to 79 | // caller must hold cfg.mu 80 | func (cfg *config) connectUnlocked(i int, to []int) { 81 | // log.Printf("connect peer %d to %v\n", i, to) 82 | 83 | // outgoing socket files 84 | for j := 0; j < len(to); j++ { 85 | endname := cfg.endnames[i][to[j]] 86 | cfg.net.Enable(endname, true) 87 | } 88 | 89 | // incoming socket files 90 | for j := 0; j < len(to); j++ { 91 | endname := cfg.endnames[to[j]][i] 92 | cfg.net.Enable(endname, true) 93 | } 94 | } 95 | 96 | func (cfg *config) connect(i int, to []int) { 97 | cfg.mu.Lock() 98 | defer cfg.mu.Unlock() 99 | cfg.connectUnlocked(i, to) 100 | } 101 | 102 | // detach server i from the servers listed in from 103 | // caller must hold cfg.mu 104 | func (cfg *config) disconnectUnlocked(i int, from []int) { 105 | // log.Printf("disconnect peer %d from %v\n", i, from) 106 | 107 | // outgoing socket files 108 | for j := 0; j < len(from); j++ { 109 | if cfg.endnames[i] != nil { 110 | endname := cfg.endnames[i][from[j]] 111 | cfg.net.Enable(endname, false) 112 | } 113 | } 114 | 115 | // incoming socket files 116 | for j := 0; j < len(from); j++ { 117 | if cfg.endnames[j] != nil { 118 | endname := cfg.endnames[from[j]][i] 119 | cfg.net.Enable(endname, false) 120 | } 121 | } 122 | } 123 | 124 | func (cfg *config) disconnect(i int, from []int) { 125 | cfg.mu.Lock() 126 | defer cfg.mu.Unlock() 127 | cfg.disconnectUnlocked(i, from) 128 | } 129 | 130 | func (cfg *config) All() []int { 131 | all := make([]int, cfg.n) 132 | for i := 0; i < cfg.n; i++ { 133 | all[i] = i 134 | } 135 | return all 136 | } 137 | 138 | func (cfg *config) ConnectAll() { 139 | cfg.mu.Lock() 140 | defer cfg.mu.Unlock() 141 | for i := 0; i < cfg.n; i++ { 142 | cfg.connectUnlocked(i, cfg.All()) 143 | } 144 | } 145 | 146 | // Sets up 2 partitions with connectivity between servers in each partition. 147 | func (cfg *config) partition(p1 []int, p2 []int) { 148 | cfg.mu.Lock() 149 | defer cfg.mu.Unlock() 150 | // log.Printf("partition servers into: %v %v\n", p1, p2) 151 | for i := 0; i < len(p1); i++ { 152 | cfg.disconnectUnlocked(p1[i], p2) 153 | cfg.connectUnlocked(p1[i], p1) 154 | } 155 | for i := 0; i < len(p2); i++ { 156 | cfg.disconnectUnlocked(p2[i], p1) 157 | cfg.connectUnlocked(p2[i], p2) 158 | } 159 | } 160 | 161 | // Create a clerk with clerk specific server names. 162 | // Give it connections to all of the servers, but for 163 | // now enable only connections to servers in to[]. 164 | func (cfg *config) makeClient(to []int) *Clerk { 165 | cfg.mu.Lock() 166 | defer cfg.mu.Unlock() 167 | 168 | // a fresh set of ClientEnds. 169 | ends := make([]*labrpc.ClientEnd, cfg.n) 170 | endnames := make([]string, cfg.n) 171 | for j := 0; j < cfg.n; j++ { 172 | endnames[j] = randstring(20) 173 | ends[j] = cfg.net.MakeEnd(endnames[j]) 174 | cfg.net.Connect(endnames[j], j) 175 | } 176 | 177 | ck := MakeClerk(random_handles(ends)) 178 | cfg.clerks[ck] = endnames 179 | cfg.nextClientId++ 180 | cfg.ConnectClientUnlocked(ck, to) 181 | return ck 182 | } 183 | 184 | func (cfg *config) deleteClient(ck *Clerk) { 185 | cfg.mu.Lock() 186 | defer cfg.mu.Unlock() 187 | 188 | v := cfg.clerks[ck] 189 | for i := 0; i < len(v); i++ { 190 | os.Remove(v[i]) 191 | } 192 | delete(cfg.clerks, ck) 193 | } 194 | 195 | // caller should hold cfg.mu 196 | func (cfg *config) ConnectClientUnlocked(ck *Clerk, to []int) { 197 | // log.Printf("ConnectClient %v to %v\n", ck, to) 198 | endnames := cfg.clerks[ck] 199 | for j := 0; j < len(to); j++ { 200 | s := endnames[to[j]] 201 | cfg.net.Enable(s, true) 202 | } 203 | } 204 | 205 | func (cfg *config) ConnectClient(ck *Clerk, to []int) { 206 | cfg.mu.Lock() 207 | defer cfg.mu.Unlock() 208 | cfg.ConnectClientUnlocked(ck, to) 209 | } 210 | 211 | // caller should hold cfg.mu 212 | func (cfg *config) DisconnectClientUnlocked(ck *Clerk, from []int) { 213 | // log.Printf("DisconnectClient %v from %v\n", ck, from) 214 | endnames := cfg.clerks[ck] 215 | for j := 0; j < len(from); j++ { 216 | s := endnames[from[j]] 217 | cfg.net.Enable(s, false) 218 | } 219 | } 220 | 221 | func (cfg *config) DisconnectClient(ck *Clerk, from []int) { 222 | cfg.mu.Lock() 223 | defer cfg.mu.Unlock() 224 | cfg.DisconnectClientUnlocked(ck, from) 225 | } 226 | 227 | // Shutdown a server by isolating it 228 | func (cfg *config) ShutdownServer(i int) { 229 | cfg.mu.Lock() 230 | defer cfg.mu.Unlock() 231 | 232 | cfg.disconnectUnlocked(i, cfg.All()) 233 | 234 | // disable client connections to the server. 235 | // it's important to do this before creating 236 | // the new Persister in saved[i], to avoid 237 | // the possibility of the server returning a 238 | // positive reply to an Append but persisting 239 | // the result in the superseded Persister. 240 | cfg.net.DeleteServer(i) 241 | 242 | // a fresh persister, in case old instance 243 | // continues to update the Persister. 244 | // but copy old persister's content so that we always 245 | // pass Make() the last persisted state. 246 | if cfg.saved[i] != nil { 247 | cfg.saved[i] = cfg.saved[i].Copy() 248 | } 249 | 250 | kv := cfg.servers[i] 251 | if kv != nil { 252 | cfg.mu.Unlock() 253 | kv.Kill() 254 | cfg.mu.Lock() 255 | cfg.servers[i] = nil 256 | } 257 | } 258 | 259 | // If restart servers, first call ShutdownServer 260 | func (cfg *config) StartServer(i int) { 261 | cfg.mu.Lock() 262 | 263 | // a fresh set of outgoing ClientEnd names. 264 | cfg.endnames[i] = make([]string, cfg.n) 265 | for j := 0; j < cfg.n; j++ { 266 | cfg.endnames[i][j] = randstring(20) 267 | } 268 | 269 | // a fresh set of ClientEnds. 270 | ends := make([]*labrpc.ClientEnd, cfg.n) 271 | for j := 0; j < cfg.n; j++ { 272 | ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j]) 273 | cfg.net.Connect(cfg.endnames[i][j], j) 274 | } 275 | 276 | // a fresh persister, so old instance doesn't overwrite 277 | // new instance's persisted state. 278 | // give the fresh persister a copy of the old persister's 279 | // state, so that the spec is that we pass StartKVServer() 280 | // the last persisted state. 281 | if cfg.saved[i] != nil { 282 | cfg.saved[i] = cfg.saved[i].Copy() 283 | } else { 284 | cfg.saved[i] = raft.MakePersister() 285 | } 286 | 287 | cfg.mu.Unlock() 288 | 289 | cfg.servers[i] = StartServer(ends, i, cfg.saved[i]) 290 | 291 | kvsvc := labrpc.MakeService(cfg.servers[i]) 292 | rfsvc := labrpc.MakeService(cfg.servers[i].rf) 293 | srv := labrpc.MakeServer() 294 | srv.AddService(kvsvc) 295 | srv.AddService(rfsvc) 296 | cfg.net.AddServer(i, srv) 297 | } 298 | 299 | func (cfg *config) Leader() (bool, int) { 300 | cfg.mu.Lock() 301 | defer cfg.mu.Unlock() 302 | 303 | for i := 0; i < cfg.n; i++ { 304 | _, is_leader := cfg.servers[i].rf.GetState() 305 | if is_leader { 306 | return true, i 307 | } 308 | } 309 | return false, 0 310 | } 311 | 312 | // Partition servers into 2 groups and put current leader in minority 313 | func (cfg *config) make_partition() ([]int, []int) { 314 | _, l := cfg.Leader() 315 | p1 := make([]int, cfg.n/2+1) 316 | p2 := make([]int, cfg.n/2) 317 | j := 0 318 | for i := 0; i < cfg.n; i++ { 319 | if i != l { 320 | if j < len(p1) { 321 | p1[j] = i 322 | } else { 323 | p2[j-len(p1)] = i 324 | } 325 | j++ 326 | } 327 | } 328 | p2[len(p2)-1] = l 329 | return p1, p2 330 | } 331 | 332 | func make_config(t *testing.T, n int, unreliable bool) *config { 333 | runtime.GOMAXPROCS(4) 334 | cfg := &config{} 335 | cfg.t = t 336 | cfg.net = labrpc.MakeNetwork() 337 | cfg.n = n 338 | cfg.servers = make([]*ShardMaster, cfg.n) 339 | cfg.saved = make([]*raft.Persister, cfg.n) 340 | cfg.endnames = make([][]string, cfg.n) 341 | cfg.clerks = make(map[*Clerk][]string) 342 | cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid 343 | cfg.start = time.Now() 344 | 345 | // create a full set of KV servers. 346 | for i := 0; i < cfg.n; i++ { 347 | cfg.StartServer(i) 348 | } 349 | 350 | cfg.ConnectAll() 351 | 352 | cfg.net.Reliable(!unreliable) 353 | 354 | return cfg 355 | } 356 | -------------------------------------------------------------------------------- /src/shardmaster/server.go: -------------------------------------------------------------------------------- 1 | package shardmaster 2 | 3 | 4 | import "raft" 5 | import "labrpc" 6 | import "labgob" 7 | import "sync" 8 | import "log" 9 | import "strconv" 10 | 11 | const Debug = 0 12 | 13 | func DPrintf(format string, a ...interface{}) (n int, err error) { 14 | if Debug > 0 { 15 | log.Printf(format, a...) 16 | } 17 | return 18 | } 19 | 20 | 21 | type ShardMaster struct { 22 | mu sync.Mutex 23 | me int 24 | rf *raft.Raft 25 | applyCh chan raft.ApplyMsg 26 | 27 | configs []Config // indexed by config num 28 | 29 | // Your data here. 30 | 31 | // Notify chan for each log index 32 | notifyCh map[int]chan Response 33 | // request records 34 | requests map[int32]int64 // client -> last commited reqID 35 | 36 | // for exit 37 | shutdown chan interface{} 38 | } 39 | 40 | // real Command 41 | type Op struct { 42 | // Your data here. like union 43 | Operation string // join/leave/move/query 44 | 45 | //join 46 | Servers map[int][]string // new GID -> servers mappings 47 | 48 | //leave 49 | GIDs []int 50 | 51 | //move 52 | Shard int 53 | GID int 54 | 55 | // query 56 | Num int // desired config number 57 | 58 | ID int32 // client id 59 | ReqID int64 60 | } 61 | 62 | type Response struct { 63 | WrongLeader bool 64 | Err Err 65 | Config Config 66 | 67 | ID int32 // client id 68 | ReqID int64 69 | } 70 | 71 | func (sm *ShardMaster) String() string { 72 | s := "[master_" + strconv.Itoa(sm.me) + "]:\n" 73 | for _, cfg := range sm.configs { 74 | s += cfg.String() 75 | } 76 | return s 77 | } 78 | 79 | // check if repeated request 80 | func (sm *ShardMaster) isDuplicated(id int32, reqId int64) bool { 81 | sm.mu.Lock() 82 | defer sm.mu.Unlock() 83 | maxSeenReqId, ok := sm.requests[id] 84 | if ok { 85 | return reqId <= maxSeenReqId 86 | } 87 | return false 88 | } 89 | 90 | // true if update success, imply nonrepeat request can be applied to state machine: eg, data field 91 | func (sm *ShardMaster) updateIfNotDuplicated(id int32, reqId int64) bool { 92 | // must hold lock outside 93 | maxSeenReqId, ok := sm.requests[id] 94 | if ok { 95 | if reqId <= maxSeenReqId { 96 | return false 97 | } 98 | } 99 | 100 | sm.requests[id] = reqId 101 | return true 102 | } 103 | 104 | // call raft.Start to commit a command as log entry 105 | func (sm *ShardMaster) proposeCommand(cmd Op) (bool, *Response) { 106 | logIndex, _, isLeader := sm.rf.Start(cmd) 107 | if !isLeader { 108 | //DPrintf("[master %d] proposeCommand %d but not leader", sm.me, cmd.ReqID) 109 | return false, nil 110 | } 111 | 112 | DPrintf("[master %d] proposeCommand %d, %s, logIdx %d", sm.me, cmd.ReqID, cmd.Operation, logIndex) 113 | 114 | // wait command to be commited 115 | sm.mu.Lock() 116 | // use logIndex because all servers agree on same log index 117 | ch, ok := sm.notifyCh[logIndex] 118 | if !ok { 119 | ch = make(chan Response, 1) 120 | sm.notifyCh[logIndex] = ch 121 | } 122 | sm.mu.Unlock() 123 | 124 | // check 125 | if ch == nil { 126 | panic("FATAL: chan is nil") 127 | } 128 | 129 | // wait on ch forever, because: 130 | // If I lose leadership before commit, may be partioned 131 | // I can't response, so wait until partion healed. 132 | // Eventually a log will be commited on index, then I'm 133 | // awaken, but cmd1 is different from cmd, return failed 134 | // to client. 135 | // If client retry another leader when I waiting, no matter. 136 | select { 137 | case rsp := <-ch: 138 | return rsp.ID == cmd.ID && rsp.ReqID == cmd.ReqID, &rsp 139 | //return cmd1 == cmd // if different log, me is not leader 140 | } 141 | 142 | return false, nil 143 | } 144 | 145 | 146 | func (sm *ShardMaster) Join(args *JoinArgs, reply *JoinReply) { 147 | // Your code here. 148 | reply.ID = args.ID 149 | reply.RspID = args.ReqID 150 | reply.WrongLeader = false 151 | reply.Err = "" 152 | 153 | if len(args.Servers) == 0 { 154 | return 155 | } 156 | 157 | for gid, _ := range args.Servers { 158 | if gid == 0 { 159 | return 160 | } 161 | } 162 | 163 | DPrintf("[master %d] JoinRPC args %v", sm.me, args) 164 | 165 | // check if repeated request, useless but efficient 166 | duplicate := sm.isDuplicated(args.ID, args.ReqID) 167 | if duplicate { 168 | reply.Err = ErrDuplicateReq 169 | return 170 | } 171 | 172 | cmd := Op{} 173 | cmd.Operation = "join" 174 | cmd.Servers = args.Servers 175 | cmd.ID = args.ID 176 | cmd.ReqID = args.ReqID 177 | 178 | succ, _ := sm.proposeCommand(cmd) 179 | if !succ { 180 | reply.WrongLeader = true 181 | reply.Err = ErrNotLeader 182 | } 183 | } 184 | 185 | func (sm *ShardMaster) Leave(args *LeaveArgs, reply *LeaveReply) { 186 | // Your code here. 187 | reply.ID = args.ID 188 | reply.RspID = args.ReqID 189 | 190 | if len(args.GIDs) == 0 { 191 | return 192 | } 193 | 194 | reply.WrongLeader = false 195 | reply.Err = "" 196 | 197 | // check if repeated request, useless but efficient 198 | duplicate := sm.isDuplicated(args.ID, args.ReqID) 199 | if duplicate { 200 | reply.Err = ErrDuplicateReq 201 | return 202 | } 203 | 204 | DPrintf("[master %d] LeaveRPC args %v", sm.me, args) 205 | 206 | cmd := Op{} 207 | cmd.Operation = "leave" 208 | cmd.GIDs = args.GIDs 209 | cmd.ID = args.ID 210 | cmd.ReqID = args.ReqID 211 | 212 | succ, _ := sm.proposeCommand(cmd) 213 | if !succ { 214 | reply.WrongLeader = true 215 | reply.Err = ErrNotLeader 216 | } 217 | } 218 | 219 | func (sm *ShardMaster) Move(args *MoveArgs, reply *MoveReply) { 220 | // Your code here. 221 | reply.ID = args.ID 222 | reply.RspID = args.ReqID 223 | reply.WrongLeader = false 224 | reply.Err = "" 225 | 226 | // check if repeated request, useless but efficient 227 | duplicate := sm.isDuplicated(args.ID, args.ReqID) 228 | if duplicate { 229 | reply.Err = ErrDuplicateReq 230 | return 231 | } 232 | 233 | DPrintf("[master %d] MoveRPC args %v", sm.me, args) 234 | 235 | cmd := Op{} 236 | cmd.Operation = "move" 237 | cmd.Shard = args.Shard 238 | cmd.GID = args.GID 239 | cmd.ID = args.ID 240 | cmd.ReqID = args.ReqID 241 | 242 | succ, _ := sm.proposeCommand(cmd) 243 | if !succ { 244 | reply.WrongLeader = true 245 | reply.Err = ErrNotLeader 246 | } 247 | } 248 | 249 | func (sm *ShardMaster) Query(args *QueryArgs, reply *QueryReply) { 250 | // Your code here. 251 | reply.WrongLeader = false 252 | reply.Err = "" 253 | 254 | cmd := Op{} 255 | cmd.Operation = "query" 256 | cmd.Num = args.Num 257 | 258 | succ, rsp := sm.proposeCommand(cmd) 259 | if !succ { 260 | reply.WrongLeader = true 261 | reply.Err = ErrNotLeader 262 | } else { 263 | reply.Config = rsp.Config 264 | } 265 | } 266 | 267 | func (sm* ShardMaster) copyLastConfig() Config { 268 | cfg := Config{} 269 | cfg.Num = sm.configs[len(sm.configs)-1].Num + 1 270 | cfg.Shards = sm.configs[len(sm.configs)-1].Shards 271 | cfg.Groups = make(map[int][]string) 272 | for k, v := range sm.configs[len(sm.configs)-1].Groups { 273 | var servers = make([]string, len(v)) 274 | copy(servers, v) 275 | cfg.Groups[k] = servers 276 | } 277 | 278 | return cfg; 279 | } 280 | 281 | func (sm* ShardMaster) rebalance() { 282 | cfg := &sm.configs[len(sm.configs)-1] 283 | 284 | if len(cfg.Groups) == 0 { 285 | return 286 | } 287 | 288 | //1. make gid --> shard count 289 | gidShards := make(map[int]int) 290 | allShardsAllocated := true 291 | for _, gid := range cfg.Shards { 292 | if gid != 0 { 293 | gidShards[gid] += 1 294 | } else { 295 | allShardsAllocated = false 296 | } 297 | } 298 | 299 | minGid, min := 0, NShards + 1 300 | maxGid, max := 0, 0 301 | nGidAllocated := 0 302 | // 2. some gid is not allocated, set to 0 303 | for gid, _ := range cfg.Groups { 304 | if gidShards[gid] == 0 { 305 | gidShards[gid] += 0 306 | } else { 307 | nGidAllocated++ 308 | } 309 | 310 | if gidShards[gid] > max { 311 | max = gidShards[gid] 312 | maxGid = gid 313 | } 314 | if gidShards[gid] < min { 315 | min = gidShards[gid] 316 | minGid = gid 317 | } 318 | } 319 | 320 | allGidAllocated := false 321 | if nGidAllocated == len(cfg.Groups) || nGidAllocated == NShards { 322 | allGidAllocated = true 323 | } 324 | 325 | if allShardsAllocated && allGidAllocated && max - min <= 1 { 326 | return 327 | } 328 | 329 | if allShardsAllocated { 330 | for shard, gid := range cfg.Shards { 331 | if gid == maxGid { 332 | cfg.Shards[shard] = minGid 333 | sm.rebalance() 334 | return 335 | } 336 | } 337 | } else if allGidAllocated { 338 | // try alloc empty shards to minGid 339 | for shard, gid := range cfg.Shards { 340 | if gid == 0 { 341 | cfg.Shards[shard] = minGid 342 | sm.rebalance() 343 | return 344 | } 345 | } 346 | } else { 347 | // try alloc empty shards to minGid 348 | for shard, gid := range cfg.Shards { 349 | if gid == 0 { 350 | cfg.Shards[shard] = minGid 351 | sm.rebalance() 352 | return 353 | } 354 | } 355 | } 356 | } 357 | 358 | // when raft commited a log entry, it'll notify me 359 | func (sm *ShardMaster) applyRoutine() { 360 | for { 361 | var op Op 362 | var applyMsg raft.ApplyMsg 363 | 364 | select { 365 | case <-sm.shutdown: 366 | DPrintf("[master %d] shutdown applyRoutine", sm.me) 367 | return 368 | 369 | case applyMsg = <-sm.applyCh: 370 | } 371 | 372 | if !applyMsg.CommandValid { 373 | panic("no snapshot for ShardMaster") 374 | } 375 | 376 | op, _ = (applyMsg.Command).(Op) 377 | rebalance := false 378 | reply := Response{} 379 | reply.ID = op.ID 380 | reply.ReqID= op.ReqID 381 | 382 | sm.mu.Lock() 383 | // Follower & Leader: try apply to state machine, fail if duplicated request 384 | if op.Operation == "join" { 385 | update := sm.updateIfNotDuplicated(op.ID, op.ReqID) 386 | if update { 387 | cfg := sm.copyLastConfig() 388 | for k,v := range op.Servers { 389 | cfg.Groups[k] = v 390 | } 391 | 392 | DPrintf("[master %d] apply for client %d Join logindex %d, new cfg %v", sm.me, op.ID, applyMsg.CommandIndex, cfg) 393 | sm.configs = append(sm.configs, cfg) 394 | rebalance = true 395 | } 396 | } else if op.Operation == "leave" { 397 | update := sm.updateIfNotDuplicated(op.ID, op.ReqID) 398 | if update { 399 | DPrintf("[master %d] apply for client %d Leave logindex %d", sm.me, op.ID, applyMsg.CommandIndex) 400 | cfg := sm.copyLastConfig() 401 | for _, id := range op.GIDs { 402 | delete (cfg.Groups, id) 403 | for shard, gid := range cfg.Shards { 404 | if gid == id { 405 | cfg.Shards[shard] = 0 406 | } 407 | } 408 | } 409 | 410 | sm.configs = append(sm.configs, cfg) 411 | rebalance = true 412 | } 413 | } else if op.Operation == "move" { 414 | update := sm.updateIfNotDuplicated(op.ID, op.ReqID) 415 | if update { 416 | DPrintf("[master %d] apply for client %d Move logindex %d", sm.me, op.ID, applyMsg.CommandIndex) 417 | cfg := sm.copyLastConfig() 418 | cfg.Shards[op.Shard] = op.GID 419 | 420 | sm.configs = append(sm.configs, cfg) 421 | } 422 | } else if op.Operation == "query" { 423 | num := len(sm.configs) - 1 424 | if op.Num >= 0 && op.Num < len(sm.configs) { 425 | num = op.Num 426 | } 427 | reply.Config = sm.configs[num] 428 | } 429 | 430 | if rebalance { 431 | sm.rebalance() 432 | } 433 | 434 | ch, ok := sm.notifyCh[applyMsg.CommandIndex] 435 | if ok { 436 | ch <- reply 437 | } 438 | 439 | sm.mu.Unlock() 440 | } 441 | } 442 | 443 | 444 | // 445 | // the tester calls Kill() when a ShardMaster instance won't 446 | // be needed again. you are not required to do anything 447 | // in Kill(), but it might be convenient to (for example) 448 | // turn off debug output from this instance. 449 | // 450 | func (sm *ShardMaster) Kill() { 451 | sm.rf.Kill() 452 | // Your code here, if desired. 453 | close(sm.shutdown) 454 | } 455 | 456 | // needed by shardkv tester 457 | func (sm *ShardMaster) Raft() *raft.Raft { 458 | return sm.rf 459 | } 460 | 461 | // 462 | // servers[] contains the ports of the set of 463 | // servers that will cooperate via Paxos to 464 | // form the fault-tolerant shardmaster service. 465 | // me is the index of the current server in servers[]. 466 | // 467 | func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister) *ShardMaster { 468 | DPrintf("[master %d] StartServer", me) 469 | sm := new(ShardMaster) 470 | sm.me = me 471 | 472 | sm.configs = make([]Config, 1) 473 | sm.configs[0].Groups = map[int][]string{} 474 | 475 | labgob.Register(Op{}) 476 | sm.applyCh = make(chan raft.ApplyMsg) 477 | sm.rf = raft.Make(servers, me, persister, sm.applyCh) 478 | 479 | // Your code here. 480 | sm.requests = make(map[int32]int64) 481 | sm.notifyCh = make(map[int]chan Response) 482 | sm.shutdown = make(chan interface{}, 1) 483 | 484 | go sm.applyRoutine() 485 | 486 | return sm 487 | } 488 | -------------------------------------------------------------------------------- /src/shardmaster/test_test.go: -------------------------------------------------------------------------------- 1 | package shardmaster 2 | 3 | import ( 4 | "sync" 5 | "testing" 6 | ) 7 | 8 | // import "time" 9 | import "fmt" 10 | 11 | func check(t *testing.T, groups []int, ck *Clerk) { 12 | c := ck.Query(-1) 13 | if len(c.Groups) != len(groups) { 14 | t.Fatalf("wanted %v groups, got %v", len(groups), len(c.Groups)) 15 | } 16 | 17 | // are the groups as expected? 18 | for _, g := range groups { 19 | _, ok := c.Groups[g] 20 | if ok != true { 21 | t.Fatalf("missing group %v", g) 22 | } 23 | } 24 | 25 | // any un-allocated shards? 26 | if len(groups) > 0 { 27 | for s, g := range c.Shards { 28 | _, ok := c.Groups[g] 29 | if ok == false { 30 | t.Fatalf("shard %v -> invalid group %v", s, g) 31 | } 32 | } 33 | } 34 | 35 | // more or less balanced sharding? 36 | counts := map[int]int{} 37 | for _, g := range c.Shards { 38 | counts[g] += 1 39 | } 40 | min := 257 41 | max := 0 42 | for g, _ := range c.Groups { 43 | if counts[g] > max { 44 | max = counts[g] 45 | } 46 | if counts[g] < min { 47 | min = counts[g] 48 | } 49 | } 50 | if max > min+1 { 51 | t.Fatalf("max %v too much larger than min %v", max, min) 52 | } 53 | } 54 | 55 | func check_same_config(t *testing.T, c1 Config, c2 Config) { 56 | if c1.Num != c2.Num { 57 | t.Fatalf("Num wrong") 58 | } 59 | if c1.Shards != c2.Shards { 60 | t.Fatalf("Shards wrong") 61 | } 62 | if len(c1.Groups) != len(c2.Groups) { 63 | t.Fatalf("number of Groups is wrong") 64 | } 65 | for gid, sa := range c1.Groups { 66 | sa1, ok := c2.Groups[gid] 67 | if ok == false || len(sa1) != len(sa) { 68 | t.Fatalf("len(Groups) wrong") 69 | } 70 | if ok && len(sa1) == len(sa) { 71 | for j := 0; j < len(sa); j++ { 72 | if sa[j] != sa1[j] { 73 | t.Fatalf("Groups wrong") 74 | } 75 | } 76 | } 77 | } 78 | } 79 | 80 | func TestBasic(t *testing.T) { 81 | const nservers = 3 82 | cfg := make_config(t, nservers, false) 83 | defer cfg.cleanup() 84 | 85 | ck := cfg.makeClient(cfg.All()) 86 | 87 | fmt.Printf("Test: Basic leave/join ...\n") 88 | 89 | cfa := make([]Config, 6) 90 | cfa[0] = ck.Query(-1) 91 | 92 | check(t, []int{}, ck) 93 | 94 | var gid1 int = 1 95 | ck.Join(map[int][]string{gid1: []string{"x", "y", "z"}}) 96 | check(t, []int{gid1}, ck) 97 | cfa[1] = ck.Query(-1) 98 | 99 | var gid2 int = 2 100 | ck.Join(map[int][]string{gid2: []string{"a", "b", "c"}}) 101 | check(t, []int{gid1, gid2}, ck) 102 | cfa[2] = ck.Query(-1) 103 | 104 | cfx := ck.Query(-1) 105 | sa1 := cfx.Groups[gid1] 106 | if len(sa1) != 3 || sa1[0] != "x" || sa1[1] != "y" || sa1[2] != "z" { 107 | t.Fatalf("wrong servers for gid %v: %v\n", gid1, sa1) 108 | } 109 | sa2 := cfx.Groups[gid2] 110 | if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" { 111 | t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2) 112 | } 113 | 114 | ck.Leave([]int{gid1}) 115 | check(t, []int{gid2}, ck) 116 | cfa[4] = ck.Query(-1) 117 | 118 | ck.Leave([]int{gid2}) 119 | cfa[5] = ck.Query(-1) 120 | 121 | fmt.Printf(" ... Passed\n") 122 | 123 | fmt.Printf("Test: Historical queries ...\n") 124 | 125 | for s := 0; s < nservers; s++ { 126 | cfg.ShutdownServer(s) 127 | for i := 0; i < len(cfa); i++ { 128 | c := ck.Query(cfa[i].Num) 129 | check_same_config(t, c, cfa[i]) 130 | } 131 | cfg.StartServer(s) 132 | cfg.ConnectAll() 133 | } 134 | 135 | fmt.Printf(" ... Passed\n") 136 | 137 | fmt.Printf("Test: Move ...\n") 138 | { 139 | var gid3 int = 503 140 | ck.Join(map[int][]string{gid3: []string{"3a", "3b", "3c"}}) 141 | var gid4 int = 504 142 | ck.Join(map[int][]string{gid4: []string{"4a", "4b", "4c"}}) 143 | for i := 0; i < NShards; i++ { 144 | cf := ck.Query(-1) 145 | if i < NShards/2 { 146 | ck.Move(i, gid3) 147 | if cf.Shards[i] != gid3 { 148 | cf1 := ck.Query(-1) 149 | if cf1.Num <= cf.Num { 150 | t.Fatalf("Move should increase Config.Num") 151 | } 152 | } 153 | } else { 154 | ck.Move(i, gid4) 155 | if cf.Shards[i] != gid4 { 156 | cf1 := ck.Query(-1) 157 | if cf1.Num <= cf.Num { 158 | t.Fatalf("Move should increase Config.Num") 159 | } 160 | } 161 | } 162 | } 163 | cf2 := ck.Query(-1) 164 | for i := 0; i < NShards; i++ { 165 | if i < NShards/2 { 166 | if cf2.Shards[i] != gid3 { 167 | t.Fatalf("expected shard %v on gid %v actually %v", 168 | i, gid3, cf2.Shards[i]) 169 | } 170 | } else { 171 | if cf2.Shards[i] != gid4 { 172 | t.Fatalf("expected shard %v on gid %v actually %v", 173 | i, gid4, cf2.Shards[i]) 174 | } 175 | } 176 | } 177 | ck.Leave([]int{gid3}) 178 | ck.Leave([]int{gid4}) 179 | } 180 | fmt.Printf(" ... Passed\n") 181 | 182 | fmt.Printf("Test: Concurrent leave/join ...\n") 183 | 184 | const npara = 10 185 | var cka [npara]*Clerk 186 | for i := 0; i < len(cka); i++ { 187 | cka[i] = cfg.makeClient(cfg.All()) 188 | } 189 | gids := make([]int, npara) 190 | ch := make(chan bool) 191 | for xi := 0; xi < npara; xi++ { 192 | gids[xi] = int((xi * 10) + 100) 193 | go func(i int) { 194 | defer func() { ch <- true }() 195 | var gid int = gids[i] 196 | var sid1 = fmt.Sprintf("s%da", gid) 197 | var sid2 = fmt.Sprintf("s%db", gid) 198 | cka[i].Join(map[int][]string{gid + 1000: []string{sid1}}) 199 | cka[i].Join(map[int][]string{gid: []string{sid2}}) 200 | cka[i].Leave([]int{gid + 1000}) 201 | }(xi) 202 | } 203 | for i := 0; i < npara; i++ { 204 | <-ch 205 | } 206 | check(t, gids, ck) 207 | 208 | fmt.Printf(" ... Passed\n") 209 | 210 | fmt.Printf("Test: Minimal transfers after joins ...\n") 211 | 212 | c1 := ck.Query(-1) 213 | for i := 0; i < 5; i++ { 214 | var gid = int(npara + 1 + i) 215 | ck.Join(map[int][]string{gid: []string{ 216 | fmt.Sprintf("%da", gid), 217 | fmt.Sprintf("%db", gid), 218 | fmt.Sprintf("%db", gid)}}) 219 | } 220 | c2 := ck.Query(-1) 221 | for i := int(1); i <= npara; i++ { 222 | for j := 0; j < len(c1.Shards); j++ { 223 | if c2.Shards[j] == i { 224 | if c1.Shards[j] != i { 225 | t.Fatalf("non-minimal transfer after Join()s") 226 | } 227 | } 228 | } 229 | } 230 | 231 | fmt.Printf(" ... Passed\n") 232 | 233 | fmt.Printf("Test: Minimal transfers after leaves ...\n") 234 | 235 | for i := 0; i < 5; i++ { 236 | ck.Leave([]int{int(npara + 1 + i)}) 237 | } 238 | c3 := ck.Query(-1) 239 | for i := int(1); i <= npara; i++ { 240 | for j := 0; j < len(c1.Shards); j++ { 241 | if c2.Shards[j] == i { 242 | if c3.Shards[j] != i { 243 | t.Fatalf("non-minimal transfer after Leave()s") 244 | } 245 | } 246 | } 247 | } 248 | 249 | fmt.Printf(" ... Passed\n") 250 | } 251 | 252 | func TestMulti(t *testing.T) { 253 | const nservers = 3 254 | cfg := make_config(t, nservers, false) 255 | defer cfg.cleanup() 256 | 257 | ck := cfg.makeClient(cfg.All()) 258 | 259 | fmt.Printf("Test: Multi-group join/leave ...\n") 260 | 261 | cfa := make([]Config, 6) 262 | cfa[0] = ck.Query(-1) 263 | 264 | check(t, []int{}, ck) 265 | 266 | var gid1 int = 1 267 | var gid2 int = 2 268 | ck.Join(map[int][]string{ 269 | gid1: []string{"x", "y", "z"}, 270 | gid2: []string{"a", "b", "c"}, 271 | }) 272 | check(t, []int{gid1, gid2}, ck) 273 | cfa[1] = ck.Query(-1) 274 | 275 | var gid3 int = 3 276 | ck.Join(map[int][]string{gid3: []string{"j", "k", "l"}}) 277 | check(t, []int{gid1, gid2, gid3}, ck) 278 | cfa[2] = ck.Query(-1) 279 | 280 | cfx := ck.Query(-1) 281 | sa1 := cfx.Groups[gid1] 282 | if len(sa1) != 3 || sa1[0] != "x" || sa1[1] != "y" || sa1[2] != "z" { 283 | t.Fatalf("wrong servers for gid %v: %v\n", gid1, sa1) 284 | } 285 | sa2 := cfx.Groups[gid2] 286 | if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" { 287 | t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2) 288 | } 289 | sa3 := cfx.Groups[gid3] 290 | if len(sa3) != 3 || sa3[0] != "j" || sa3[1] != "k" || sa3[2] != "l" { 291 | t.Fatalf("wrong servers for gid %v: %v\n", gid3, sa3) 292 | } 293 | 294 | ck.Leave([]int{gid1, gid3}) 295 | check(t, []int{gid2}, ck) 296 | cfa[3] = ck.Query(-1) 297 | 298 | cfx = ck.Query(-1) 299 | sa2 = cfx.Groups[gid2] 300 | if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" { 301 | t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2) 302 | } 303 | 304 | ck.Leave([]int{gid2}) 305 | 306 | fmt.Printf(" ... Passed\n") 307 | 308 | fmt.Printf("Test: Concurrent multi leave/join ...\n") 309 | 310 | const npara = 10 311 | var cka [npara]*Clerk 312 | for i := 0; i < len(cka); i++ { 313 | cka[i] = cfg.makeClient(cfg.All()) 314 | } 315 | gids := make([]int, npara) 316 | var wg sync.WaitGroup 317 | for xi := 0; xi < npara; xi++ { 318 | wg.Add(1) 319 | gids[xi] = int(xi + 1000) 320 | go func(i int) { 321 | defer wg.Done() 322 | var gid int = gids[i] 323 | cka[i].Join(map[int][]string{ 324 | gid: []string{ 325 | fmt.Sprintf("%da", gid), 326 | fmt.Sprintf("%db", gid), 327 | fmt.Sprintf("%dc", gid)}, 328 | gid + 1000: []string{fmt.Sprintf("%da", gid+1000)}, 329 | gid + 2000: []string{fmt.Sprintf("%da", gid+2000)}, 330 | }) 331 | cka[i].Leave([]int{gid + 1000, gid + 2000}) 332 | }(xi) 333 | } 334 | wg.Wait() 335 | check(t, gids, ck) 336 | 337 | fmt.Printf(" ... Passed\n") 338 | 339 | fmt.Printf("Test: Minimal transfers after multijoins ...\n") 340 | 341 | c1 := ck.Query(-1) 342 | m := make(map[int][]string) 343 | for i := 0; i < 5; i++ { 344 | var gid = npara + 1 + i 345 | m[gid] = []string{fmt.Sprintf("%da", gid), fmt.Sprintf("%db", gid)} 346 | } 347 | ck.Join(m) 348 | c2 := ck.Query(-1) 349 | for i := int(1); i <= npara; i++ { 350 | for j := 0; j < len(c1.Shards); j++ { 351 | if c2.Shards[j] == i { 352 | if c1.Shards[j] != i { 353 | t.Fatalf("non-minimal transfer after Join()s") 354 | } 355 | } 356 | } 357 | } 358 | 359 | fmt.Printf(" ... Passed\n") 360 | 361 | fmt.Printf("Test: Minimal transfers after multileaves ...\n") 362 | 363 | var l []int 364 | for i := 0; i < 5; i++ { 365 | l = append(l, npara+1+i) 366 | } 367 | ck.Leave(l) 368 | c3 := ck.Query(-1) 369 | for i := int(1); i <= npara; i++ { 370 | for j := 0; j < len(c1.Shards); j++ { 371 | if c2.Shards[j] == i { 372 | if c3.Shards[j] != i { 373 | t.Fatalf("non-minimal transfer after Leave()s") 374 | } 375 | } 376 | } 377 | } 378 | 379 | fmt.Printf(" ... Passed\n") 380 | } 381 | --------------------------------------------------------------------------------