├── LICENSE ├── README.md ├── config.go ├── lab_res ├── Lab2A.PNG ├── Lab2B.PNG ├── Lab2C.PNG └── Lab2D.PNG ├── persister.go ├── raft.go ├── test_test.go └── util.go /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Holdonbush 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MIT6.824-Lab2-Raft 2 | MIT6.824 Lab2-Raft的实现 3 | 4 | ## Lab2A 5 | 可在master/main/Lab2A/Lab2B/Lab2C/Lab2D分支执行命令`go test -run 2A -race` 6 | ![Lab2A](./lab_res/Lab2A.PNG) 7 | 8 | ## Lab2B 9 | 可在master/main/Lab2B/Lab2C/Lab2D分支执行命令`go test -run 2B`或者`time go test -run 2B` 10 | ![Lab2B](./lab_res/Lab2B.PNG) 11 | 12 | ## Lab2C 13 | 可在master/main/Lab2C/Lab2D分支执行命令`go test -run 2C -race` 14 | ![Lab2C](./lab_res/Lab2C.PNG) 15 | 16 | ## Lab2D 17 | 可在master/main/Lab2D执行命令`go test -run 2D` 18 | ![Lab2D](./lab_res/Lab2D.PNG) 19 | -------------------------------------------------------------------------------- /config.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | // 4 | // support for Raft tester. 5 | // 6 | // we will use the original config.go to test your code for grading. 7 | // so, while you can modify this code to help you debug, please 8 | // test with the original before submitting. 9 | // 10 | 11 | import "6.824/labgob" 12 | import "6.824/labrpc" 13 | import "bytes" 14 | import "log" 15 | import "sync" 16 | import "testing" 17 | import "runtime" 18 | import "math/rand" 19 | import crand "crypto/rand" 20 | import "math/big" 21 | import "encoding/base64" 22 | import "time" 23 | import "fmt" 24 | 25 | func randstring(n int) string { 26 | b := make([]byte, 2*n) 27 | crand.Read(b) 28 | s := base64.URLEncoding.EncodeToString(b) 29 | return s[0:n] 30 | } 31 | 32 | func makeSeed() int64 { 33 | max := big.NewInt(int64(1) << 62) 34 | bigx, _ := crand.Int(crand.Reader, max) 35 | x := bigx.Int64() 36 | return x 37 | } 38 | 39 | type config struct { 40 | mu sync.Mutex 41 | t *testing.T 42 | net *labrpc.Network 43 | n int 44 | rafts []*Raft 45 | applyErr []string // from apply channel readers 46 | connected []bool // whether each server is on the net 47 | saved []*Persister 48 | endnames [][]string // the port file names each sends to 49 | logs []map[int]interface{} // copy of each server's committed entries 50 | start time.Time // time at which make_config() was called 51 | // begin()/end() statistics 52 | t0 time.Time // time at which test_test.go called cfg.begin() 53 | rpcs0 int // rpcTotal() at start of test 54 | cmds0 int // number of agreements 55 | bytes0 int64 56 | maxIndex int 57 | maxIndex0 int 58 | } 59 | 60 | var ncpu_once sync.Once 61 | 62 | func make_config(t *testing.T, n int, unreliable bool, snapshot bool) *config { 63 | ncpu_once.Do(func() { 64 | if runtime.NumCPU() < 2 { 65 | fmt.Printf("warning: only one CPU, which may conceal locking bugs\n") 66 | } 67 | rand.Seed(makeSeed()) 68 | }) 69 | runtime.GOMAXPROCS(4) 70 | cfg := &config{} 71 | cfg.t = t 72 | cfg.net = labrpc.MakeNetwork() 73 | cfg.n = n 74 | cfg.applyErr = make([]string, cfg.n) 75 | cfg.rafts = make([]*Raft, cfg.n) 76 | cfg.connected = make([]bool, cfg.n) 77 | cfg.saved = make([]*Persister, cfg.n) 78 | cfg.endnames = make([][]string, cfg.n) 79 | cfg.logs = make([]map[int]interface{}, cfg.n) 80 | cfg.start = time.Now() 81 | 82 | cfg.setunreliable(unreliable) 83 | 84 | cfg.net.LongDelays(true) 85 | 86 | applier := cfg.applier 87 | if snapshot { 88 | applier = cfg.applierSnap 89 | } 90 | // create a full set of Rafts. 91 | for i := 0; i < cfg.n; i++ { 92 | cfg.logs[i] = map[int]interface{}{} 93 | cfg.start1(i, applier) 94 | } 95 | 96 | // connect everyone 97 | for i := 0; i < cfg.n; i++ { 98 | cfg.connect(i) 99 | } 100 | 101 | return cfg 102 | } 103 | 104 | // shut down a Raft server but save its persistent state. 105 | func (cfg *config) crash1(i int) { 106 | cfg.disconnect(i) 107 | cfg.net.DeleteServer(i) // disable client connections to the server. 108 | 109 | cfg.mu.Lock() 110 | defer cfg.mu.Unlock() 111 | 112 | // a fresh persister, in case old instance 113 | // continues to update the Persister. 114 | // but copy old persister's content so that we always 115 | // pass Make() the last persisted state. 116 | if cfg.saved[i] != nil { 117 | cfg.saved[i] = cfg.saved[i].Copy() 118 | } 119 | 120 | rf := cfg.rafts[i] 121 | if rf != nil { 122 | cfg.mu.Unlock() 123 | rf.Kill() 124 | cfg.mu.Lock() 125 | cfg.rafts[i] = nil 126 | } 127 | 128 | if cfg.saved[i] != nil { 129 | raftlog := cfg.saved[i].ReadRaftState() 130 | snapshot := cfg.saved[i].ReadSnapshot() 131 | cfg.saved[i] = &Persister{} 132 | cfg.saved[i].SaveStateAndSnapshot(raftlog, snapshot) 133 | } 134 | } 135 | 136 | func (cfg *config) checkLogs(i int, m ApplyMsg) (string, bool) { 137 | err_msg := "" 138 | v := m.Command 139 | for j := 0; j < len(cfg.logs); j++ { 140 | if old, oldok := cfg.logs[j][m.CommandIndex]; oldok && old != v { 141 | log.Printf("%v: log %v; server %v\n", i, cfg.logs[i], cfg.logs[j]) 142 | // some server has already committed a different value for this entry! 143 | err_msg = fmt.Sprintf("commit index=%v server=%v %v != server=%v %v", 144 | m.CommandIndex, i, m.Command, j, old) 145 | } 146 | } 147 | _, prevok := cfg.logs[i][m.CommandIndex-1] 148 | cfg.logs[i][m.CommandIndex] = v 149 | if m.CommandIndex > cfg.maxIndex { 150 | cfg.maxIndex = m.CommandIndex 151 | } 152 | return err_msg, prevok 153 | } 154 | 155 | // applier reads message from apply ch and checks that they match the log 156 | // contents 157 | func (cfg *config) applier(i int, applyCh chan ApplyMsg) { 158 | for m := range applyCh { 159 | if m.CommandValid == false { 160 | // ignore other types of ApplyMsg 161 | } else { 162 | cfg.mu.Lock() 163 | err_msg, prevok := cfg.checkLogs(i, m) 164 | cfg.mu.Unlock() 165 | if m.CommandIndex > 1 && prevok == false { 166 | err_msg = fmt.Sprintf("server %v apply out of order %v", i, m.CommandIndex) 167 | } 168 | if err_msg != "" { 169 | log.Fatalf("apply error: %v\n", err_msg) 170 | cfg.applyErr[i] = err_msg 171 | // keep reading after error so that Raft doesn't block 172 | // holding locks... 173 | } 174 | } 175 | } 176 | } 177 | 178 | const SnapShotInterval = 10 179 | 180 | // periodically snapshot raft state 181 | func (cfg *config) applierSnap(i int, applyCh chan ApplyMsg) { 182 | lastApplied := 0 183 | for m := range applyCh { 184 | if m.SnapshotValid { 185 | //DPrintf("Installsnapshot %v %v\n", m.SnapshotIndex, lastApplied) 186 | cfg.mu.Lock() 187 | if cfg.rafts[i].CondInstallSnapshot(m.SnapshotTerm, 188 | m.SnapshotIndex, m.Snapshot) { 189 | cfg.logs[i] = make(map[int]interface{}) 190 | r := bytes.NewBuffer(m.Snapshot) 191 | d := labgob.NewDecoder(r) 192 | var v int 193 | if d.Decode(&v) != nil { 194 | log.Fatalf("decode error\n") 195 | } 196 | cfg.logs[i][m.SnapshotIndex] = v 197 | lastApplied = m.SnapshotIndex 198 | } 199 | cfg.mu.Unlock() 200 | } else if m.CommandValid && m.CommandIndex > lastApplied { 201 | //DPrintf("apply %v lastApplied %v\n", m.CommandIndex, lastApplied) 202 | cfg.mu.Lock() 203 | err_msg, prevok := cfg.checkLogs(i, m) 204 | cfg.mu.Unlock() 205 | if m.CommandIndex > 1 && prevok == false { 206 | err_msg = fmt.Sprintf("server %v apply out of order %v", i, m.CommandIndex) 207 | } 208 | if err_msg != "" { 209 | log.Fatalf("apply error: %v\n", err_msg) 210 | cfg.applyErr[i] = err_msg 211 | // keep reading after error so that Raft doesn't block 212 | // holding locks... 213 | } 214 | lastApplied = m.CommandIndex 215 | if (m.CommandIndex+1)%SnapShotInterval == 0 { 216 | w := new(bytes.Buffer) 217 | e := labgob.NewEncoder(w) 218 | v := m.Command 219 | e.Encode(v) 220 | cfg.rafts[i].Snapshot(m.CommandIndex, w.Bytes()) 221 | } 222 | } else { 223 | // Ignore other types of ApplyMsg or old 224 | // commands. Old command may never happen, 225 | // depending on the Raft implementation, but 226 | // just in case. 227 | // DPrintf("Ignore: Index %v lastApplied %v\n", m.CommandIndex, lastApplied) 228 | 229 | } 230 | } 231 | } 232 | 233 | // 234 | // start or re-start a Raft. 235 | // if one already exists, "kill" it first. 236 | // allocate new outgoing port file names, and a new 237 | // state persister, to isolate previous instance of 238 | // this server. since we cannot really kill it. 239 | // 240 | func (cfg *config) start1(i int, applier func(int, chan ApplyMsg)) { 241 | cfg.crash1(i) 242 | 243 | // a fresh set of outgoing ClientEnd names. 244 | // so that old crashed instance's ClientEnds can't send. 245 | cfg.endnames[i] = make([]string, cfg.n) 246 | for j := 0; j < cfg.n; j++ { 247 | cfg.endnames[i][j] = randstring(20) 248 | } 249 | 250 | // a fresh set of ClientEnds. 251 | ends := make([]*labrpc.ClientEnd, cfg.n) 252 | for j := 0; j < cfg.n; j++ { 253 | ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j]) 254 | cfg.net.Connect(cfg.endnames[i][j], j) 255 | } 256 | 257 | cfg.mu.Lock() 258 | 259 | // a fresh persister, so old instance doesn't overwrite 260 | // new instance's persisted state. 261 | // but copy old persister's content so that we always 262 | // pass Make() the last persisted state. 263 | if cfg.saved[i] != nil { 264 | cfg.saved[i] = cfg.saved[i].Copy() 265 | } else { 266 | cfg.saved[i] = MakePersister() 267 | } 268 | 269 | cfg.mu.Unlock() 270 | 271 | applyCh := make(chan ApplyMsg) 272 | 273 | rf := Make(ends, i, cfg.saved[i], applyCh) 274 | 275 | cfg.mu.Lock() 276 | cfg.rafts[i] = rf 277 | cfg.mu.Unlock() 278 | 279 | go applier(i, applyCh) 280 | 281 | svc := labrpc.MakeService(rf) 282 | srv := labrpc.MakeServer() 283 | srv.AddService(svc) 284 | cfg.net.AddServer(i, srv) 285 | } 286 | 287 | func (cfg *config) checkTimeout() { 288 | // enforce a two minute real-time limit on each test 289 | if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second { 290 | cfg.t.Fatal("test took longer than 120 seconds") 291 | } 292 | } 293 | 294 | func (cfg *config) cleanup() { 295 | for i := 0; i < len(cfg.rafts); i++ { 296 | if cfg.rafts[i] != nil { 297 | cfg.rafts[i].Kill() 298 | } 299 | } 300 | cfg.net.Cleanup() 301 | cfg.checkTimeout() 302 | } 303 | 304 | // attach server i to the net. 305 | func (cfg *config) connect(i int) { 306 | // fmt.Printf("connect(%d)\n", i) 307 | 308 | cfg.connected[i] = true 309 | 310 | // outgoing ClientEnds 311 | for j := 0; j < cfg.n; j++ { 312 | if cfg.connected[j] { 313 | endname := cfg.endnames[i][j] 314 | cfg.net.Enable(endname, true) 315 | } 316 | } 317 | 318 | // incoming ClientEnds 319 | for j := 0; j < cfg.n; j++ { 320 | if cfg.connected[j] { 321 | endname := cfg.endnames[j][i] 322 | cfg.net.Enable(endname, true) 323 | } 324 | } 325 | } 326 | 327 | // detach server i from the net. 328 | func (cfg *config) disconnect(i int) { 329 | // fmt.Printf("disconnect(%d)\n", i) 330 | 331 | cfg.connected[i] = false 332 | 333 | // outgoing ClientEnds 334 | for j := 0; j < cfg.n; j++ { 335 | if cfg.endnames[i] != nil { 336 | endname := cfg.endnames[i][j] 337 | cfg.net.Enable(endname, false) 338 | } 339 | } 340 | 341 | // incoming ClientEnds 342 | for j := 0; j < cfg.n; j++ { 343 | if cfg.endnames[j] != nil { 344 | endname := cfg.endnames[j][i] 345 | cfg.net.Enable(endname, false) 346 | } 347 | } 348 | } 349 | 350 | func (cfg *config) rpcCount(server int) int { 351 | return cfg.net.GetCount(server) 352 | } 353 | 354 | func (cfg *config) rpcTotal() int { 355 | return cfg.net.GetTotalCount() 356 | } 357 | 358 | func (cfg *config) setunreliable(unrel bool) { 359 | cfg.net.Reliable(!unrel) 360 | } 361 | 362 | func (cfg *config) bytesTotal() int64 { 363 | return cfg.net.GetTotalBytes() 364 | } 365 | 366 | func (cfg *config) setlongreordering(longrel bool) { 367 | cfg.net.LongReordering(longrel) 368 | } 369 | 370 | // check that there's exactly one leader. 371 | // try a few times in case re-elections are needed. 372 | func (cfg *config) checkOneLeader() int { 373 | for iters := 0; iters < 10; iters++ { 374 | ms := 450 + (rand.Int63() % 100) 375 | time.Sleep(time.Duration(ms) * time.Millisecond) 376 | 377 | leaders := make(map[int][]int) 378 | for i := 0; i < cfg.n; i++ { 379 | if cfg.connected[i] { 380 | if term, leader := cfg.rafts[i].GetState(); leader { 381 | leaders[term] = append(leaders[term], i) 382 | } 383 | } 384 | } 385 | 386 | lastTermWithLeader := -1 387 | for term, leaders := range leaders { 388 | if len(leaders) > 1 { 389 | cfg.t.Fatalf("term %d has %d (>1) leaders", term, len(leaders)) 390 | } 391 | if term > lastTermWithLeader { 392 | lastTermWithLeader = term 393 | } 394 | } 395 | 396 | if len(leaders) != 0 { 397 | return leaders[lastTermWithLeader][0] 398 | } 399 | } 400 | cfg.t.Fatalf("expected one leader, got none") 401 | return -1 402 | } 403 | 404 | // check that everyone agrees on the term. 405 | func (cfg *config) checkTerms() int { 406 | term := -1 407 | for i := 0; i < cfg.n; i++ { 408 | if cfg.connected[i] { 409 | xterm, _ := cfg.rafts[i].GetState() 410 | if term == -1 { 411 | term = xterm 412 | } else if term != xterm { 413 | cfg.t.Fatalf("servers disagree on term") 414 | } 415 | } 416 | } 417 | return term 418 | } 419 | 420 | // check that there's no leader 421 | func (cfg *config) checkNoLeader() { 422 | for i := 0; i < cfg.n; i++ { 423 | if cfg.connected[i] { 424 | _, is_leader := cfg.rafts[i].GetState() 425 | if is_leader { 426 | cfg.t.Fatalf("expected no leader, but %v claims to be leader", i) 427 | } 428 | } 429 | } 430 | } 431 | 432 | // how many servers think a log entry is committed? 433 | func (cfg *config) nCommitted(index int) (int, interface{}) { 434 | count := 0 435 | var cmd interface{} = nil 436 | for i := 0; i < len(cfg.rafts); i++ { 437 | if cfg.applyErr[i] != "" { 438 | cfg.t.Fatal(cfg.applyErr[i]) 439 | } 440 | 441 | cfg.mu.Lock() 442 | cmd1, ok := cfg.logs[i][index] 443 | cfg.mu.Unlock() 444 | 445 | if ok { 446 | if count > 0 && cmd != cmd1 { 447 | cfg.t.Fatalf("committed values do not match: index %v, %v, %v\n", 448 | index, cmd, cmd1) 449 | } 450 | count += 1 451 | cmd = cmd1 452 | } 453 | } 454 | return count, cmd 455 | } 456 | 457 | // wait for at least n servers to commit. 458 | // but don't wait forever. 459 | func (cfg *config) wait(index int, n int, startTerm int) interface{} { 460 | to := 10 * time.Millisecond 461 | for iters := 0; iters < 30; iters++ { 462 | nd, _ := cfg.nCommitted(index) 463 | if nd >= n { 464 | break 465 | } 466 | time.Sleep(to) 467 | if to < time.Second { 468 | to *= 2 469 | } 470 | if startTerm > -1 { 471 | for _, r := range cfg.rafts { 472 | if t, _ := r.GetState(); t > startTerm { 473 | // someone has moved on 474 | // can no longer guarantee that we'll "win" 475 | return -1 476 | } 477 | } 478 | } 479 | } 480 | nd, cmd := cfg.nCommitted(index) 481 | if nd < n { 482 | cfg.t.Fatalf("only %d decided for index %d; wanted %d\n", 483 | nd, index, n) 484 | } 485 | return cmd 486 | } 487 | 488 | // do a complete agreement. 489 | // it might choose the wrong leader initially, 490 | // and have to re-submit after giving up. 491 | // entirely gives up after about 10 seconds. 492 | // indirectly checks that the servers agree on the 493 | // same value, since nCommitted() checks this, 494 | // as do the threads that read from applyCh. 495 | // returns index. 496 | // if retry==true, may submit the command multiple 497 | // times, in case a leader fails just after Start(). 498 | // if retry==false, calls Start() only once, in order 499 | // to simplify the early Lab 2B tests. 500 | func (cfg *config) one(cmd interface{}, expectedServers int, retry bool) int { 501 | t0 := time.Now() 502 | starts := 0 503 | for time.Since(t0).Seconds() < 10 { 504 | // try all the servers, maybe one is the leader. 505 | index := -1 506 | for si := 0; si < cfg.n; si++ { 507 | starts = (starts + 1) % cfg.n 508 | var rf *Raft 509 | cfg.mu.Lock() 510 | if cfg.connected[starts] { 511 | rf = cfg.rafts[starts] 512 | } 513 | cfg.mu.Unlock() 514 | if rf != nil { 515 | index1, _, ok := rf.Start(cmd) 516 | if ok { 517 | index = index1 518 | break 519 | } 520 | } 521 | } 522 | 523 | if index != -1 { 524 | // somebody claimed to be the leader and to have 525 | // submitted our command; wait a while for agreement. 526 | t1 := time.Now() 527 | for time.Since(t1).Seconds() < 2 { 528 | nd, cmd1 := cfg.nCommitted(index) 529 | if nd > 0 && nd >= expectedServers { 530 | // committed 531 | if cmd1 == cmd { 532 | // and it was the command we submitted. 533 | return index 534 | } 535 | } 536 | time.Sleep(20 * time.Millisecond) 537 | } 538 | if retry == false { 539 | cfg.t.Fatalf("one(%v) failed to reach agreement", cmd) 540 | } 541 | } else { 542 | time.Sleep(50 * time.Millisecond) 543 | } 544 | } 545 | cfg.t.Fatalf("one(%v) failed to reach agreement", cmd) 546 | return -1 547 | } 548 | 549 | // start a Test. 550 | // print the Test message. 551 | // e.g. cfg.begin("Test (2B): RPC counts aren't too high") 552 | func (cfg *config) begin(description string) { 553 | fmt.Printf("%s ...\n", description) 554 | cfg.t0 = time.Now() 555 | cfg.rpcs0 = cfg.rpcTotal() 556 | cfg.bytes0 = cfg.bytesTotal() 557 | cfg.cmds0 = 0 558 | cfg.maxIndex0 = cfg.maxIndex 559 | } 560 | 561 | // end a Test -- the fact that we got here means there 562 | // was no failure. 563 | // print the Passed message, 564 | // and some performance numbers. 565 | func (cfg *config) end() { 566 | cfg.checkTimeout() 567 | if cfg.t.Failed() == false { 568 | cfg.mu.Lock() 569 | t := time.Since(cfg.t0).Seconds() // real time 570 | npeers := cfg.n // number of Raft peers 571 | nrpc := cfg.rpcTotal() - cfg.rpcs0 // number of RPC sends 572 | nbytes := cfg.bytesTotal() - cfg.bytes0 // number of bytes 573 | ncmds := cfg.maxIndex - cfg.maxIndex0 // number of Raft agreements reported 574 | cfg.mu.Unlock() 575 | 576 | fmt.Printf(" ... Passed --") 577 | fmt.Printf(" %4.1f %d %4d %7d %4d\n", t, npeers, nrpc, nbytes, ncmds) 578 | } 579 | } 580 | 581 | // Maximum log size across all servers 582 | func (cfg *config) LogSize() int { 583 | logsize := 0 584 | for i := 0; i < cfg.n; i++ { 585 | n := cfg.saved[i].RaftStateSize() 586 | if n > logsize { 587 | logsize = n 588 | } 589 | } 590 | return logsize 591 | } 592 | -------------------------------------------------------------------------------- /lab_res/Lab2A.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdonbush/MIT6.824-Lab2-Raft/c240da0adadfcd639cc7de44ff134da5f47444e4/lab_res/Lab2A.PNG -------------------------------------------------------------------------------- /lab_res/Lab2B.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdonbush/MIT6.824-Lab2-Raft/c240da0adadfcd639cc7de44ff134da5f47444e4/lab_res/Lab2B.PNG -------------------------------------------------------------------------------- /lab_res/Lab2C.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdonbush/MIT6.824-Lab2-Raft/c240da0adadfcd639cc7de44ff134da5f47444e4/lab_res/Lab2C.PNG -------------------------------------------------------------------------------- /lab_res/Lab2D.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/holdonbush/MIT6.824-Lab2-Raft/c240da0adadfcd639cc7de44ff134da5f47444e4/lab_res/Lab2D.PNG -------------------------------------------------------------------------------- /persister.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | // 4 | // support for Raft and kvraft to save persistent 5 | // Raft state (log &c) and k/v server snapshots. 6 | // 7 | // we will use the original persister.go to test your code for grading. 8 | // so, while you can modify this code to help you debug, please 9 | // test with the original before submitting. 10 | // 11 | 12 | import "sync" 13 | 14 | type Persister struct { 15 | mu sync.Mutex 16 | raftstate []byte 17 | snapshot []byte 18 | } 19 | 20 | func MakePersister() *Persister { 21 | return &Persister{} 22 | } 23 | 24 | func clone(orig []byte) []byte { 25 | x := make([]byte, len(orig)) 26 | copy(x, orig) 27 | return x 28 | } 29 | 30 | func (ps *Persister) Copy() *Persister { 31 | ps.mu.Lock() 32 | defer ps.mu.Unlock() 33 | np := MakePersister() 34 | np.raftstate = ps.raftstate 35 | np.snapshot = ps.snapshot 36 | return np 37 | } 38 | 39 | func (ps *Persister) SaveRaftState(state []byte) { 40 | ps.mu.Lock() 41 | defer ps.mu.Unlock() 42 | ps.raftstate = clone(state) 43 | } 44 | 45 | func (ps *Persister) ReadRaftState() []byte { 46 | ps.mu.Lock() 47 | defer ps.mu.Unlock() 48 | return clone(ps.raftstate) 49 | } 50 | 51 | func (ps *Persister) RaftStateSize() int { 52 | ps.mu.Lock() 53 | defer ps.mu.Unlock() 54 | return len(ps.raftstate) 55 | } 56 | 57 | // Save both Raft state and K/V snapshot as a single atomic action, 58 | // to help avoid them getting out of sync. 59 | func (ps *Persister) SaveStateAndSnapshot(state []byte, snapshot []byte) { 60 | ps.mu.Lock() 61 | defer ps.mu.Unlock() 62 | ps.raftstate = clone(state) 63 | ps.snapshot = clone(snapshot) 64 | } 65 | 66 | func (ps *Persister) ReadSnapshot() []byte { 67 | ps.mu.Lock() 68 | defer ps.mu.Unlock() 69 | return clone(ps.snapshot) 70 | } 71 | 72 | func (ps *Persister) SnapshotSize() int { 73 | ps.mu.Lock() 74 | defer ps.mu.Unlock() 75 | return len(ps.snapshot) 76 | } 77 | -------------------------------------------------------------------------------- /raft.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | // 4 | // this is an outline of the API that raft must expose to 5 | // the service (or tester). see comments below for 6 | // each of these functions for more details. 7 | // 8 | // rf = Make(...) 9 | // create a new Raft server. 10 | // rf.Start(command interface{}) (index, term, isleader) 11 | // start agreement on a new log entry 12 | // rf.GetState() (term, isLeader) 13 | // ask a Raft for its current term, and whether it thinks it is leader 14 | // ApplyMsg 15 | // each time a new entry is committed to the log, each Raft peer 16 | // should send an ApplyMsg to the service (or tester) 17 | // in the same server. 18 | // 19 | 20 | import ( 21 | "6.824/labgob" 22 | "bytes" 23 | "fmt" 24 | "math/rand" 25 | 26 | // "bytes" 27 | "sync" 28 | "sync/atomic" 29 | "time" 30 | 31 | // "6.824/labgob" 32 | "6.824/labrpc" 33 | ) 34 | 35 | 36 | // 37 | // as each Raft peer becomes aware that successive log entries are 38 | // committed, the peer should send an ApplyMsg to the service (or 39 | // tester) on the same server, via the applyCh passed to Make(). set 40 | // CommandValid to true to indicate that the ApplyMsg contains a newly 41 | // committed log entry. 42 | // 43 | // in part 2D you'll want to send other kinds of messages (e.g., 44 | // snapshots) on the applyCh, but set CommandValid to false for these 45 | // other uses. 46 | // 47 | type ApplyMsg struct { 48 | CommandValid bool 49 | Command interface{} 50 | CommandIndex int 51 | 52 | // For 2D: 53 | SnapshotValid bool 54 | Snapshot []byte 55 | SnapshotTerm int 56 | SnapshotIndex int 57 | } 58 | 59 | // 60 | // A Go object implementing a single Raft peer. 61 | // 62 | type Raft struct { 63 | mu sync.Mutex // Lock to protect shared access to this peer's state 64 | peers []*labrpc.ClientEnd // RPC end points of all peers 65 | persister *Persister // Object to hold this peer's persisted state 66 | me int // this peer's index into peers[] 67 | dead int32 // set by Kill() 68 | 69 | // Your data here (2A, 2B, 2C). 70 | // Look at the paper's Figure 2 for a description of what 71 | // state a Raft server must maintain. 72 | currentTerm int // Server当前的term 73 | voteFor int // Server在选举阶段的投票目标 74 | logs []LogEntry 75 | nextIndexs []int // Leader在发送LogEntry时,对应每个其他Server,开始发送的index 76 | matchIndexs []int 77 | commitIndex int // Server已经commit了的Log index 78 | lastApplied int // Server已经apply了的log index 79 | myStatus Status // Server的状态 80 | 81 | timer *time.Ticker // timer 82 | voteTimeout time.Duration // 选举超时时间,选举超时时间是会变动的,所以定义在Raft结构体中 83 | applyChan chan ApplyMsg // 消息channel 84 | 85 | // 2D 86 | lastIncludeIndex int // snapshot保存的最后log的index 87 | lastIncludeTerm int // snapshot保存的最后log的term 88 | snapshotCmd []byte 89 | } 90 | 91 | // LogEntry 92 | type LogEntry struct { 93 | Term int // LogEntry中记录有log的Term 94 | Cmd interface{} // Log的command 95 | } 96 | 97 | // 定义一个全局心跳超时时间 98 | var HeartBeatTimeout = 120*time.Millisecond 99 | 100 | type Status int64 101 | const ( 102 | Follower Status = iota 103 | Candidate 104 | Leader 105 | ) 106 | 107 | // return currentTerm and whether this server 108 | // believes it is the leader. 109 | func (rf *Raft) GetState() (int, bool) { 110 | 111 | var term int 112 | var isleader bool 113 | // Your code here (2A). 114 | // 获取Server当前的Term和是否是Leader 115 | rf.mu.Lock() 116 | term = rf.currentTerm 117 | isleader = rf.myStatus == Leader 118 | rf.mu.Unlock() 119 | return term, isleader 120 | } 121 | 122 | // 123 | // save Raft's persistent state to stable storage, 124 | // where it can later be retrieved after a crash and restart. 125 | // see paper's Figure 2 for a description of what should be persistent. 126 | // 127 | func (rf *Raft) persist() { 128 | // Your code here (2C). 129 | // Example: 130 | // w := new(bytes.Buffer) 131 | // e := labgob.NewEncoder(w) 132 | // e.Encode(rf.xxx) 133 | // e.Encode(rf.yyy) 134 | // data := w.Bytes() 135 | // rf.persister.SaveRaftState(data) 136 | w := new(bytes.Buffer) 137 | e := labgob.NewEncoder(w) 138 | e.Encode(rf.currentTerm) 139 | e.Encode(rf.voteFor) 140 | e.Encode(rf.logs) 141 | data := w.Bytes() 142 | rf.persister.SaveRaftState(data) 143 | } 144 | 145 | 146 | // 147 | // restore previously persisted state. 148 | // 149 | func (rf *Raft) readPersist(data []byte) { 150 | if data == nil || len(data) < 1 { // bootstrap without any state? 151 | return 152 | } 153 | // Your code here (2C). 154 | // Example: 155 | // r := bytes.NewBuffer(data) 156 | // d := labgob.NewDecoder(r) 157 | // var xxx 158 | // var yyy 159 | // if d.Decode(&xxx) != nil || 160 | // d.Decode(&yyy) != nil { 161 | // error... 162 | // } else { 163 | // rf.xxx = xxx 164 | // rf.yyy = yyy 165 | // } 166 | r := bytes.NewBuffer(data) 167 | d := labgob.NewDecoder(r) 168 | var tmpTerm int 169 | var tmpVoteFor int 170 | var tmplogs []LogEntry 171 | if d.Decode(&tmpTerm) != nil || 172 | d.Decode(&tmpVoteFor) != nil || 173 | d.Decode(&tmplogs) != nil { 174 | fmt.Println("decode error") 175 | } else { 176 | rf.currentTerm = tmpTerm 177 | rf.voteFor = tmpVoteFor 178 | rf.logs = tmplogs 179 | } 180 | } 181 | 182 | 183 | // 184 | // A service wants to switch to snapshot. Only do so if Raft hasn't 185 | // have more recent info since it communicate the snapshot on applyCh. 186 | // 187 | func (rf *Raft) CondInstallSnapshot(lastIncludedTerm int, lastIncludedIndex int, snapshot []byte) bool { 188 | 189 | // Your code here (2D). 190 | // 在lastIncludeIndex后有新的内容,返回false 191 | rf.mu.Lock() 192 | if len(rf.logs)+rf.lastIncludeIndex > lastIncludedIndex { 193 | return false 194 | } 195 | 196 | rf.snapshotCmd = snapshot 197 | // 当前log最大index小于等于snapshot中存储的最后一个index 198 | rf.logs = []LogEntry{} 199 | rf.lastIncludeTerm = lastIncludedTerm 200 | rf.lastIncludeIndex = lastIncludedIndex 201 | 202 | rf.commitIndex = rf.lastIncludeIndex 203 | rf.lastApplied = rf.commitIndex 204 | rf.mu.Unlock() 205 | return true 206 | } 207 | 208 | // the service says it has created a snapshot that has 209 | // all info up to and including index. this means the 210 | // service no longer needs the log through (and including) 211 | // that index. Raft should now trim its log as much as possible. 212 | func (rf *Raft) Snapshot(index int, snapshot []byte) { 213 | // Your code here (2D). 214 | if rf.killed() { 215 | return 216 | } 217 | pos := index - rf.lastIncludeIndex-1 218 | rf.lastIncludeIndex = index 219 | rf.lastIncludeTerm = rf.logs[pos].Term 220 | rf.logs = rf.logs[pos+1:] 221 | rf.snapshotCmd = snapshot 222 | } 223 | 224 | 225 | // 226 | // example RequestVote RPC arguments structure. 227 | // field names must start with capital letters! 228 | // 229 | type VoteErr int64 230 | const ( 231 | Nil VoteErr = iota //投票过程无错误 232 | VoteReqOutofDate //投票消息过期 233 | CandidateLogTooOld //候选人Log不够新 234 | VotedThisTerm //本Term内已经投过票 235 | RaftKilled //Raft程已终止 236 | ) 237 | 238 | type RequestVoteArgs struct { 239 | // Your data here (2A, 2B). 240 | Term int 241 | Candidate int 242 | LastLogIndex int // 用于选举限制,LogEntry中最后Log的index 243 | LastLogTerm int // 用于选举限制,LogEntry中最后log的Term 244 | } 245 | 246 | // 247 | // example RequestVote RPC reply structure. 248 | // field names must start with capital letters! 249 | // 250 | type RequestVoteReply struct { 251 | // Your data here (2A). 252 | Term int 253 | VoteGranted bool //是否同意投票 254 | VoteErr VoteErr //投票操作错误 255 | } 256 | 257 | type AppendEntriesErr int64 258 | const ( 259 | AppendErr_Nil AppendEntriesErr = iota // Append操作无错误 260 | AppendErr_LogsNotMatch // Append操作log不匹配 261 | AppendErr_ReqOutofDate // Append操作请求过期 262 | AppendErr_ReqRepeat // Append请求重复 263 | AppendErr_Commited // Append的log已经commit 264 | AppendErr_RaftKilled // Raft程序终止 265 | ) 266 | 267 | type AppendEntriesArgs struct { 268 | Term int 269 | LeaderId int //Leader标识 270 | PrevLogIndex int //nextIndex前一个index 271 | PrevLogTerm int //nextindex前一个index处的term 272 | Logs []LogEntry 273 | LeaderCommit int //Leader已经commit了的Log index 274 | LogIndex int 275 | } 276 | 277 | type AppendEntriesReply struct { 278 | Term int 279 | Success bool // Append操作结果 280 | AppendErr AppendEntriesErr // Append操作错误情况 281 | NotMatchIndex int // 当前Term的第一个元素(没有被commit的元素)的index 282 | } 283 | 284 | // snapshot 285 | type InstallSnapshotRequest struct { 286 | Term int 287 | LeaderId int 288 | LastIncludeIndex int 289 | LastIncludeTerm int 290 | //Offset int // Lab2D不要求实现 291 | Data []byte 292 | //Done bool // Lab2D不要求实现 293 | } 294 | 295 | type InstallSnapshotErr int64 296 | const ( 297 | InstallSnapshotErr_Nil InstallSnapshotErr = iota 298 | InstallSnapshotErr_ReqOutofDate 299 | InstallSnapshotErr_OldIndex 300 | ) 301 | type InstallSnapshotResponse struct { 302 | Term int 303 | Err InstallSnapshotErr 304 | } 305 | 306 | // 307 | // example RequestVote RPC handler. 308 | // 309 | // 投票过程 310 | func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) { 311 | // Your code here (2A, 2B). 312 | if rf.killed() { 313 | reply.Term = -1 314 | reply.VoteGranted = false 315 | reply.VoteErr = RaftKilled 316 | return 317 | } 318 | rf.mu.Lock() 319 | if args.Term < rf.currentTerm { // 请求term更小,不投票 320 | reply.Term = rf.currentTerm 321 | reply.VoteGranted = false 322 | reply.VoteErr = VoteReqOutofDate 323 | rf.mu.Unlock() 324 | return 325 | } 326 | 327 | if args.Term > rf.currentTerm { 328 | rf.myStatus = Follower 329 | rf.currentTerm = args.Term 330 | rf.voteFor = -1 331 | } 332 | // 选举限制-参数term小于自身的term 333 | candidateLogTermTooOld :=args.LastLogTerm < rf.lastIncludeTerm || (len(rf.logs) > 0 && args.LastLogTerm < rf.logs[len(rf.logs)-1].Term) // 有log,取最后一条log的term比较 || 无log时参数term小于snapshot的term 334 | // 选举限制-term相等,参数index小于自身index 335 | candidateLogIndexTooOld := (args.LastLogIndex < rf.lastIncludeIndex) || (len(rf.logs) > 0 && args.LastLogTerm == rf.logs[len(rf.logs)-1].Term && args.LastLogIndex < len(rf.logs)+rf.lastIncludeIndex) //有log,取最后一条log的index比较 || 无log时取snapshot的index比较 336 | 337 | // 选举限制 338 | if candidateLogIndexTooOld || candidateLogTermTooOld { 339 | rf.currentTerm = args.Term 340 | reply.Term = args.Term 341 | reply.VoteGranted = false 342 | reply.VoteErr = CandidateLogTooOld 343 | rf.persist() 344 | rf.mu.Unlock() 345 | return 346 | } 347 | 348 | if args.Term == rf.currentTerm { 349 | reply.Term = args.Term 350 | // 已经投过票,且投给了同一人,由于某些原因,之前的resp丢失 351 | if rf.voteFor == args.Candidate { 352 | rf.myStatus = Follower 353 | rf.timer.Reset(rf.voteTimeout) 354 | reply.VoteGranted = true 355 | reply.VoteErr = VotedThisTerm 356 | rf.mu.Unlock() 357 | return 358 | } 359 | // 来自同一Term不同Candidate的请求,忽略 360 | if rf.voteFor != -1 { 361 | reply.VoteGranted = false 362 | reply.VoteErr = VotedThisTerm 363 | rf.mu.Unlock() 364 | return 365 | } 366 | } 367 | 368 | // 可以投票 369 | rf.currentTerm = args.Term 370 | rf.voteFor = args.Candidate 371 | rf.myStatus = Follower 372 | rf.timer.Reset(rf.voteTimeout) 373 | 374 | reply.Term = rf.currentTerm 375 | reply.VoteGranted = true 376 | reply.VoteErr = Nil 377 | rf.persist() 378 | rf.mu.Unlock() 379 | return 380 | } 381 | 382 | // 心跳包/log追加 383 | func (rf *Raft) AppendEntries(args *AppendEntriesArgs, reply *AppendEntriesReply) { 384 | if rf.killed() { 385 | reply.Term = -1 386 | reply.AppendErr = AppendErr_RaftKilled 387 | reply.Success = false 388 | return 389 | } 390 | rf.mu.Lock() 391 | // 无效消息 392 | if args.Term < rf.currentTerm || args.PrevLogIndex < rf.lastIncludeIndex { 393 | reply.Term = rf.currentTerm 394 | reply.Success = false 395 | reply.AppendErr = AppendErr_ReqOutofDate 396 | reply.NotMatchIndex = -1 397 | rf.mu.Unlock() 398 | return 399 | } 400 | 401 | rf.currentTerm = args.Term 402 | rf.voteFor = args.LeaderId 403 | rf.myStatus = Follower 404 | rf.timer.Reset(rf.voteTimeout) 405 | 406 | // 不匹配 407 | if (args.PrevLogIndex != rf.lastIncludeIndex && (args.PrevLogIndex >= len(rf.logs)+rf.lastIncludeIndex+1 || args.PrevLogTerm != rf.logs[args.PrevLogIndex-rf.lastIncludeIndex-1].Term)) || 408 | (args.PrevLogIndex == rf.lastIncludeIndex && args.PrevLogTerm != rf.lastIncludeTerm){ 409 | reply.Term = rf.currentTerm 410 | reply.Success = false 411 | reply.AppendErr = AppendErr_LogsNotMatch 412 | reply.NotMatchIndex = rf.lastApplied + 1 413 | rf.persist() 414 | rf.mu.Unlock() 415 | return 416 | } 417 | 418 | if rf.lastApplied > args.PrevLogIndex { 419 | reply.Term = rf.currentTerm 420 | reply.Success = false 421 | reply.AppendErr = AppendErr_Commited 422 | reply.NotMatchIndex = rf.lastApplied+1 423 | rf.persist() 424 | rf.mu.Unlock() 425 | return 426 | } 427 | 428 | // 处理日志 429 | if args.Logs != nil { 430 | rf.logs = rf.logs[:args.PrevLogIndex-rf.lastIncludeIndex] 431 | rf.logs = append(rf.logs, args.Logs...) 432 | } 433 | for rf.lastApplied < args.LeaderCommit { 434 | rf.lastApplied++ 435 | applyMsg := ApplyMsg{ 436 | CommandValid: true, 437 | CommandIndex: rf.lastApplied, 438 | Command: rf.logs[rf.lastApplied-rf.lastIncludeIndex-1].Cmd, 439 | } 440 | rf.applyChan <- applyMsg 441 | rf.commitIndex = rf.lastApplied 442 | } 443 | 444 | 445 | reply.Term = rf.currentTerm 446 | reply.Success = true 447 | reply.AppendErr = AppendErr_Nil 448 | reply.NotMatchIndex = -1 449 | rf.persist() 450 | rf.mu.Unlock() 451 | return 452 | } 453 | 454 | func (rf *Raft) InstallSnapshot(args *InstallSnapshotRequest, reply *InstallSnapshotResponse) { 455 | if rf.killed() { 456 | reply.Term = args.Term 457 | return 458 | } 459 | 460 | rf.mu.Lock() 461 | //fmt.Println(rf.me,"收到snapshot","来自",args.LeaderId,"自身lastinclueindex",rf.lastIncludeTerm, args.LastIncludeIndex) 462 | // defer rf.mu.Unlock() 463 | 464 | // 过期消息 465 | if args.Term < rf.currentTerm { 466 | reply.Term = rf.currentTerm 467 | //fmt.Println("installsnapshot消息过期") 468 | reply.Err = InstallSnapshotErr_ReqOutofDate 469 | rf.mu.Unlock() 470 | return 471 | } 472 | 473 | // 错误消息 474 | if args.LastIncludeIndex <= rf.lastIncludeIndex { 475 | reply.Term = rf.currentTerm 476 | //fmt.Println("installsnapsho消息lastIncludeIndex错误") 477 | reply.Err = InstallSnapshotErr_OldIndex 478 | rf.timer.Reset(rf.voteTimeout) 479 | rf.mu.Unlock() 480 | return 481 | } 482 | // 创建快照 483 | rf.currentTerm = args.Term 484 | rf.voteFor = args.LeaderId 485 | rf.myStatus = Follower 486 | rf.timer.Reset(rf.voteTimeout) 487 | 488 | if len(rf.logs)+rf.lastIncludeIndex <= args.LastIncludeIndex { 489 | rf.logs = []LogEntry{} 490 | rf.lastIncludeIndex = args.LastIncludeIndex 491 | rf.lastIncludeTerm = args.LastIncludeTerm 492 | } else { 493 | // rf.logs = rf.logs[len(rf.logs)+rf.lastIncludeIndex-args.LastIncludeIndex:] 494 | rf.logs = rf.logs[args.LastIncludeIndex-rf.lastIncludeIndex:] 495 | rf.lastIncludeIndex = args.LastIncludeIndex 496 | rf.lastIncludeTerm = args.LastIncludeTerm 497 | } 498 | 499 | rf.applyChan <- ApplyMsg{ 500 | SnapshotValid: true, 501 | Snapshot: args.Data, 502 | SnapshotTerm: args.LastIncludeTerm, 503 | SnapshotIndex: args.LastIncludeIndex, 504 | } 505 | rf.lastApplied = args.LastIncludeIndex 506 | rf.commitIndex = rf.lastApplied 507 | 508 | reply.Term = rf.currentTerm 509 | reply.Err = InstallSnapshotErr_Nil 510 | rf.mu.Unlock() 511 | return 512 | } 513 | // 514 | // example code to send a RequestVote RPC to a server. 515 | // server is the index of the target server in rf.peers[]. 516 | // expects RPC arguments in args. 517 | // fills in *reply with RPC reply, so caller should 518 | // pass &reply. 519 | // the types of the args and reply passed to Call() must be 520 | // the same as the types of the arguments declared in the 521 | // handler function (including whether they are pointers). 522 | // 523 | // The labrpc package simulates a lossy network, in which servers 524 | // may be unreachable, and in which requests and replies may be lost. 525 | // Call() sends a request and waits for a reply. If a reply arrives 526 | // within a timeout interval, Call() returns true; otherwise 527 | // Call() returns false. Thus Call() may not return for a while. 528 | // A false return can be caused by a dead server, a live server that 529 | // can't be reached, a lost request, or a lost reply. 530 | // 531 | // Call() is guaranteed to return (perhaps after a delay) *except* if the 532 | // handler function on the server side does not return. Thus there 533 | // is no need to implement your own timeouts around Call(). 534 | // 535 | // look at the comments in ../labrpc/labrpc.go for more details. 536 | // 537 | // if you're having trouble getting RPC to work, check that you've 538 | // capitalized all field names in structs passed over RPC, and 539 | // that the caller passes the address of the reply struct with &, not 540 | // the struct itself. 541 | // 542 | // 改造函数,添加了一个参数,用于方便实现同一Term内请求的统计 543 | func (rf *Raft) sendRequestVote(server int, args *RequestVoteArgs, reply *RequestVoteReply, voteNum *int) bool { 544 | if rf.killed() { 545 | return false 546 | } 547 | ok := rf.peers[server].Call("Raft.RequestVote", args, reply) 548 | for !ok { 549 | // 失败重传 550 | if rf.killed() { 551 | return false 552 | } 553 | ok := rf.peers[server].Call("Raft.RequestVote", args, reply) 554 | if ok { 555 | break 556 | } 557 | } 558 | 559 | if rf.killed() { 560 | return false 561 | } 562 | rf.mu.Lock() 563 | if args.Term < rf.currentTerm { // 过期请求 564 | rf.mu.Unlock() 565 | return false 566 | } 567 | rf.mu.Unlock() 568 | 569 | switch reply.VoteErr { 570 | case VoteReqOutofDate: 571 | rf.mu.Lock() 572 | rf.myStatus = Follower 573 | rf.timer.Reset(rf.voteTimeout) 574 | if reply.Term > rf.currentTerm { 575 | rf.currentTerm = reply.Term 576 | rf.voteFor = -1 577 | rf.persist() 578 | } 579 | rf.mu.Unlock() 580 | case CandidateLogTooOld: 581 | // 日志不够新 582 | rf.mu.Lock() 583 | rf.myStatus = Follower 584 | rf.timer.Reset(rf.voteTimeout) 585 | if reply.Term > rf.currentTerm { 586 | rf.currentTerm = reply.Term 587 | rf.voteFor = -1 588 | rf.persist() 589 | } 590 | rf.mu.Unlock() 591 | case Nil,VotedThisTerm: 592 | rf.mu.Lock() 593 | //根据是否同意投票,收集选票数量 594 | if reply.VoteGranted && reply.Term == rf.currentTerm && *voteNum <= len(rf.peers)/2 { 595 | *voteNum++ 596 | } 597 | if *voteNum > len(rf.peers)/2 { 598 | *voteNum = 0 599 | if rf.myStatus == Leader { 600 | rf.mu.Unlock() 601 | return ok 602 | } 603 | rf.myStatus = Leader 604 | rf.nextIndexs = make([]int, len(rf.peers)) 605 | for i,_ := range rf.nextIndexs { 606 | rf.nextIndexs[i] = len(rf.logs)+rf.lastIncludeIndex+1 607 | } 608 | rf.timer.Reset(HeartBeatTimeout) 609 | } 610 | rf.mu.Unlock() 611 | case RaftKilled: 612 | return false 613 | } 614 | return ok 615 | } 616 | 617 | func (rf *Raft) sendAppendEntries(server int, args *AppendEntriesArgs, reply *AppendEntriesReply, appendNum *int) bool { 618 | if rf.killed() { 619 | return false 620 | } 621 | ok := rf.peers[server].Call("Raft.AppendEntries", args, reply) 622 | for !ok { 623 | if rf.killed() { 624 | return false 625 | } 626 | ok = rf.peers[server].Call("Raft.AppendEntries", args, reply) 627 | if ok { 628 | break 629 | } 630 | } 631 | 632 | if rf.killed() { 633 | return false 634 | } 635 | rf.mu.Lock() 636 | if args.Term < rf.currentTerm { // 过期消息 637 | rf.mu.Unlock() 638 | return false 639 | } 640 | 641 | switch reply.AppendErr { 642 | case AppendErr_Nil: 643 | if reply.Success && reply.Term == rf.currentTerm && *appendNum <= len(rf.peers)/2 { 644 | *appendNum++ 645 | } 646 | if rf.nextIndexs[server] > args.LogIndex+1 { 647 | rf.mu.Unlock() 648 | return ok 649 | } 650 | rf.nextIndexs[server] = args.LogIndex+1 651 | if *appendNum > len(rf.peers)/2 { 652 | *appendNum = 0 653 | if (args.LogIndex>rf.lastIncludeIndex && rf.logs[args.LogIndex-rf.lastIncludeIndex-1].Term != rf.currentTerm) || 654 | (args.LogIndex == rf.lastIncludeIndex && rf.lastIncludeTerm != rf.currentTerm){ 655 | rf.mu.Unlock() 656 | return false 657 | } 658 | for rf.lastApplied < args.LogIndex { 659 | rf.lastApplied++ 660 | applyMsg := ApplyMsg{ 661 | CommandValid: true, 662 | Command: rf.logs[rf.lastApplied-rf.lastIncludeIndex-1].Cmd, 663 | CommandIndex: rf.lastApplied, 664 | } 665 | rf.applyChan <- applyMsg 666 | rf.commitIndex = rf.lastApplied 667 | } 668 | } 669 | case AppendErr_ReqOutofDate: 670 | rf.myStatus = Follower 671 | rf.timer.Reset(rf.voteTimeout) 672 | if reply.Term > rf.currentTerm { 673 | rf.currentTerm = reply.Term 674 | rf.voteFor = -1 675 | rf.persist() 676 | } 677 | case AppendErr_LogsNotMatch: 678 | if args.Term != rf.currentTerm { 679 | rf.mu.Unlock() 680 | return false 681 | } 682 | rf.nextIndexs[server] = reply.NotMatchIndex 683 | case AppendErr_ReqRepeat: 684 | if reply.Term > rf.currentTerm { 685 | rf.myStatus = Follower 686 | rf.currentTerm = reply.Term 687 | rf.voteFor = -1 688 | rf.timer.Reset(rf.voteTimeout) 689 | rf.persist() 690 | } 691 | case AppendErr_Commited: 692 | if args.Term != rf.currentTerm { 693 | rf.mu.Unlock() 694 | return false 695 | } 696 | rf.nextIndexs[server] = reply.NotMatchIndex 697 | case AppendErr_RaftKilled: 698 | rf.mu.Unlock() 699 | return false 700 | } 701 | rf.mu.Unlock() 702 | return ok 703 | } 704 | 705 | func (rf *Raft) sendInstallSnapshot(server int, args *InstallSnapshotRequest, reply *InstallSnapshotResponse) bool { 706 | if rf.killed() { 707 | return false 708 | } 709 | ok := rf.peers[server].Call("Raft.InstallSnapshot", args, reply) 710 | for !ok { 711 | if rf.killed() { 712 | return false 713 | } 714 | ok = rf.peers[server].Call("Raft.InstallSnapshot", args, reply) 715 | if ok { 716 | break 717 | } 718 | } 719 | 720 | if rf.killed() { 721 | return false 722 | } 723 | rf.mu.Lock() 724 | if reply.Term < rf.currentTerm { 725 | rf.mu.Unlock() 726 | return false 727 | } 728 | switch reply.Err { 729 | case InstallSnapshotErr_Nil: 730 | if reply.Term > rf.currentTerm { 731 | rf.myStatus = Follower 732 | rf.currentTerm = reply.Term 733 | rf.voteFor = -1 734 | rf.timer.Reset(rf.voteTimeout) 735 | rf.persist() 736 | } 737 | rf.nextIndexs[server] = args.LastIncludeIndex+1 738 | case InstallSnapshotErr_OldIndex: 739 | if reply.Term > rf.currentTerm { 740 | rf.myStatus = Follower 741 | rf.currentTerm = reply.Term 742 | rf.voteFor = -1 743 | rf.timer.Reset(rf.voteTimeout) 744 | rf.persist() 745 | } 746 | rf.nextIndexs[server] = len(rf.logs)+rf.lastIncludeIndex+1 747 | case InstallSnapshotErr_ReqOutofDate: 748 | } 749 | 750 | 751 | rf.mu.Unlock() 752 | return false 753 | } 754 | // 755 | // the service using Raft (e.g. a k/v server) wants to start 756 | // agreement on the next command to be appended to Raft's log. if this 757 | // server isn't the leader, returns false. otherwise start the 758 | // agreement and return immediately. there is no guarantee that this 759 | // command will ever be committed to the Raft log, since the leader 760 | // may fail or lose an election. even if the Raft instance has been killed, 761 | // this function should return gracefully. 762 | // 763 | // the first return value is the index that the command will appear at 764 | // if it's ever committed. the second return value is the current 765 | // term. the third return value is true if this server believes it is 766 | // the leader. 767 | // 768 | func (rf *Raft) Start(command interface{}) (int, int, bool) { 769 | index := -1 770 | term := -1 771 | isLeader := true 772 | 773 | // Your code here (2B). 774 | // 客户端的log 775 | if rf.killed() { 776 | return index, term, false 777 | } 778 | rf.mu.Lock() 779 | isLeader = rf.myStatus == Leader 780 | if !isLeader { 781 | rf.mu.Unlock() 782 | return index, term, isLeader 783 | } 784 | logEntry := LogEntry{Term: rf.currentTerm, Cmd: command} 785 | rf.logs = append(rf.logs, logEntry) 786 | 787 | index = len(rf.logs)+rf.lastIncludeIndex 788 | term = rf.currentTerm 789 | rf.persist() 790 | rf.mu.Unlock() 791 | 792 | return index, term, isLeader 793 | } 794 | 795 | // 796 | // the tester doesn't halt goroutines created by Raft after each test, 797 | // but it does call the Kill() method. your code can use killed() to 798 | // check whether Kill() has been called. the use of atomic avoids the 799 | // need for a lock. 800 | // 801 | // the issue is that long-running goroutines use memory and may chew 802 | // up CPU time, perhaps causing later tests to fail and generating 803 | // confusing debug output. any goroutine with a long-running loop 804 | // should call killed() to check whether it should stop. 805 | // 806 | func (rf *Raft) Kill() { 807 | atomic.StoreInt32(&rf.dead, 1) 808 | // Your code here, if desired. 809 | rf.mu.Lock() 810 | rf.timer.Stop() 811 | rf.mu.Unlock() 812 | } 813 | 814 | func (rf *Raft) killed() bool { 815 | z := atomic.LoadInt32(&rf.dead) 816 | return z == 1 817 | } 818 | 819 | // The ticker go routine starts a new election if this peer hasn't received 820 | // heartsbeats recently. 821 | func (rf *Raft) ticker() { 822 | for rf.killed() == false { 823 | 824 | // Your code here to check if a leader election should 825 | // be started and to randomize sleeping time using 826 | // time.Sleep(). 827 | select { 828 | case <-rf.timer.C: 829 | if rf.killed() { 830 | return 831 | } 832 | rf.mu.Lock() 833 | currStatus := rf.myStatus 834 | switch currStatus { 835 | case Follower: 836 | rf.myStatus = Candidate 837 | fallthrough 838 | case Candidate: 839 | // 进行选举 840 | rf.currentTerm+=1 841 | rf.voteFor = rf.me 842 | // 每轮选举开始时,重新设置选举超时 843 | rf.voteTimeout = time.Duration(rand.Intn(150)+200)*time.Millisecond 844 | voteNum := 1 845 | rf.persist() 846 | rf.timer.Reset(rf.voteTimeout) 847 | // 构造msg 848 | for i,_ := range rf.peers { 849 | if i == rf.me { 850 | continue 851 | } 852 | voteArgs := &RequestVoteArgs{ 853 | Term: rf.currentTerm, 854 | Candidate: rf.me, 855 | LastLogIndex: len(rf.logs)+rf.lastIncludeIndex, 856 | LastLogTerm: rf.lastIncludeTerm, 857 | } 858 | if len(rf.logs) > 0 { 859 | voteArgs.LastLogTerm = rf.logs[len(rf.logs)-1].Term 860 | } 861 | voteReply := new(RequestVoteReply) 862 | //fmt.Println("发起选举",rf.me,i,voteArgs,rf.currentTerm, rf.lastIncludeIndex, rf.lastIncludeTerm) 863 | go rf.sendRequestVote(i, voteArgs, voteReply, &voteNum) 864 | } 865 | case Leader: 866 | // 进行心跳 867 | appendNum := 1 868 | rf.timer.Reset(HeartBeatTimeout) 869 | // 构造msg 870 | for i,_ := range rf.peers { 871 | if i == rf.me { 872 | continue 873 | } 874 | appendEntriesArgs := &AppendEntriesArgs{ 875 | Term: rf.currentTerm, 876 | LeaderId: rf.me, 877 | PrevLogIndex: 0, 878 | PrevLogTerm: 0, 879 | Logs: nil, 880 | LeaderCommit: rf.commitIndex, 881 | LogIndex: len(rf.logs)+rf.lastIncludeIndex, 882 | } 883 | //installSnapshot,如果rf.nextIndex[i]小于等lastCludeIndex,则发送snapShot 884 | if rf.nextIndexs[i] <= rf.lastIncludeIndex { 885 | installSnapshotReq := &InstallSnapshotRequest{ 886 | Term: rf.currentTerm, 887 | LeaderId: rf.me, 888 | LastIncludeIndex: rf.lastIncludeIndex, 889 | LastIncludeTerm: rf.lastIncludeTerm, 890 | Data: rf.snapshotCmd, 891 | } 892 | installSnapshotReply := &InstallSnapshotResponse{} 893 | //fmt.Println("installsnapshot", rf.me, i, rf.lastIncludeIndex, rf.lastIncludeTerm, rf.currentTerm, installSnapshotReq) 894 | go rf.sendInstallSnapshot(i, installSnapshotReq, installSnapshotReply) 895 | continue 896 | } 897 | for rf.nextIndexs[i] > rf.lastIncludeIndex { 898 | appendEntriesArgs.PrevLogIndex = rf.nextIndexs[i]-1 899 | if appendEntriesArgs.PrevLogIndex >= len(rf.logs)+rf.lastIncludeIndex+1 { 900 | rf.nextIndexs[i]-- 901 | continue 902 | } 903 | if appendEntriesArgs.PrevLogIndex == rf.lastIncludeIndex { 904 | appendEntriesArgs.PrevLogTerm = rf.lastIncludeTerm 905 | } else { 906 | appendEntriesArgs.PrevLogTerm = rf.logs[appendEntriesArgs.PrevLogIndex-rf.lastIncludeIndex-1].Term 907 | } 908 | break 909 | } 910 | if rf.nextIndexs[i] < len(rf.logs)+rf.lastIncludeIndex+1 { 911 | appendEntriesArgs.Logs = make([]LogEntry,appendEntriesArgs.LogIndex+1-rf.nextIndexs[i]) 912 | copy(appendEntriesArgs.Logs, rf.logs[rf.nextIndexs[i]-rf.lastIncludeIndex-1:appendEntriesArgs.LogIndex-rf.lastIncludeIndex]) 913 | } 914 | 915 | appendEntriesReply := new(AppendEntriesReply) 916 | go rf.sendAppendEntries(i, appendEntriesArgs, appendEntriesReply, &appendNum) 917 | } 918 | } 919 | rf.mu.Unlock() 920 | } 921 | } 922 | } 923 | 924 | // 925 | // the service or tester wants to create a Raft server. the ports 926 | // of all the Raft servers (including this one) are in peers[]. this 927 | // server's port is peers[me]. all the servers' peers[] arrays 928 | // have the same order. persister is a place for this server to 929 | // save its persistent state, and also initially holds the most 930 | // recent saved state, if any. applyCh is a channel on which the 931 | // tester or service expects Raft to send ApplyMsg messages. 932 | // Make() must return quickly, so it should start goroutines 933 | // for any long-running work. 934 | // 935 | func Make(peers []*labrpc.ClientEnd, me int, 936 | persister *Persister, applyCh chan ApplyMsg) *Raft { 937 | rf := &Raft{} 938 | rf.peers = peers 939 | rf.persister = persister 940 | rf.me = me 941 | 942 | // Your initialization code here (2A, 2B, 2C). 943 | rf.myStatus = Follower 944 | rf.voteFor = -1 945 | rand.Seed(time.Now().UnixNano()) 946 | rf.voteTimeout = time.Duration(rand.Intn(150)+200)*time.Millisecond 947 | rf.currentTerm, rf.commitIndex, rf.lastApplied = 0,0,0 948 | rf.nextIndexs, rf.matchIndexs, rf.logs = nil, nil, []LogEntry{{0,nil}} 949 | rf.timer = time.NewTicker(rf.voteTimeout) 950 | rf.applyChan = applyCh 951 | 952 | // 2D 953 | rf.lastIncludeIndex = -1 954 | rf.lastIncludeTerm = 0 955 | rf.snapshotCmd = make([]byte, 0) 956 | // initialize from state persisted before a crash 957 | rf.readPersist(persister.ReadRaftState()) 958 | 959 | // start ticker goroutine to start elections 960 | go rf.ticker() 961 | 962 | 963 | return rf 964 | } 965 | -------------------------------------------------------------------------------- /test_test.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | // 4 | // Raft tests. 5 | // 6 | // we will use the original test_test.go to test your code for grading. 7 | // so, while you can modify this code to help you debug, please 8 | // test with the original before submitting. 9 | // 10 | 11 | import "testing" 12 | import "fmt" 13 | import "time" 14 | import "math/rand" 15 | import "sync/atomic" 16 | import "sync" 17 | 18 | // The tester generously allows solutions to complete elections in one second 19 | // (much more than the paper's range of timeouts). 20 | const RaftElectionTimeout = 1000 * time.Millisecond 21 | 22 | func TestInitialElection2A(t *testing.T) { 23 | servers := 3 24 | cfg := make_config(t, servers, false, false) 25 | defer cfg.cleanup() 26 | 27 | cfg.begin("Test (2A): initial election") 28 | 29 | // is a leader elected? 30 | cfg.checkOneLeader() 31 | 32 | // sleep a bit to avoid racing with followers learning of the 33 | // election, then check that all peers agree on the term. 34 | time.Sleep(50 * time.Millisecond) 35 | term1 := cfg.checkTerms() 36 | if term1 < 1 { 37 | t.Fatalf("term is %v, but should be at least 1", term1) 38 | } 39 | 40 | // does the leader+term stay the same if there is no network failure? 41 | time.Sleep(2 * RaftElectionTimeout) 42 | term2 := cfg.checkTerms() 43 | if term1 != term2 { 44 | fmt.Printf("warning: term changed even though there were no failures") 45 | } 46 | 47 | // there should still be a leader. 48 | cfg.checkOneLeader() 49 | 50 | cfg.end() 51 | } 52 | 53 | func TestReElection2A(t *testing.T) { 54 | servers := 3 55 | cfg := make_config(t, servers, false, false) 56 | defer cfg.cleanup() 57 | 58 | cfg.begin("Test (2A): election after network failure") 59 | 60 | leader1 := cfg.checkOneLeader() 61 | 62 | // if the leader disconnects, a new one should be elected. 63 | cfg.disconnect(leader1) 64 | cfg.checkOneLeader() 65 | 66 | // if the old leader rejoins, that shouldn't 67 | // disturb the new leader. 68 | cfg.connect(leader1) 69 | leader2 := cfg.checkOneLeader() 70 | 71 | // if there's no quorum, no leader should 72 | // be elected. 73 | cfg.disconnect(leader2) 74 | cfg.disconnect((leader2 + 1) % servers) 75 | time.Sleep(2 * RaftElectionTimeout) 76 | cfg.checkNoLeader() 77 | 78 | // if a quorum arises, it should elect a leader. 79 | cfg.connect((leader2 + 1) % servers) 80 | cfg.checkOneLeader() 81 | 82 | // re-join of last node shouldn't prevent leader from existing. 83 | cfg.connect(leader2) 84 | cfg.checkOneLeader() 85 | 86 | cfg.end() 87 | } 88 | 89 | func TestManyElections2A(t *testing.T) { 90 | servers := 7 91 | cfg := make_config(t, servers, false, false) 92 | defer cfg.cleanup() 93 | 94 | cfg.begin("Test (2A): multiple elections") 95 | 96 | cfg.checkOneLeader() 97 | 98 | iters := 10 99 | for ii := 1; ii < iters; ii++ { 100 | // disconnect three nodes 101 | i1 := rand.Int() % servers 102 | i2 := rand.Int() % servers 103 | i3 := rand.Int() % servers 104 | cfg.disconnect(i1) 105 | cfg.disconnect(i2) 106 | cfg.disconnect(i3) 107 | 108 | // either the current leader should still be alive, 109 | // or the remaining four should elect a new one. 110 | cfg.checkOneLeader() 111 | 112 | cfg.connect(i1) 113 | cfg.connect(i2) 114 | cfg.connect(i3) 115 | } 116 | 117 | cfg.checkOneLeader() 118 | 119 | cfg.end() 120 | } 121 | 122 | func TestBasicAgree2B(t *testing.T) { 123 | servers := 3 124 | cfg := make_config(t, servers, false, false) 125 | defer cfg.cleanup() 126 | 127 | cfg.begin("Test (2B): basic agreement") 128 | 129 | iters := 3 130 | for index := 1; index < iters+1; index++ { 131 | nd, _ := cfg.nCommitted(index) 132 | if nd > 0 { 133 | t.Fatalf("some have committed before Start()") 134 | } 135 | 136 | xindex := cfg.one(index*100, servers, false) 137 | if xindex != index { 138 | t.Fatalf("got index %v but expected %v", xindex, index) 139 | } 140 | } 141 | 142 | cfg.end() 143 | } 144 | 145 | // 146 | // check, based on counting bytes of RPCs, that 147 | // each command is sent to each peer just once. 148 | // 149 | func TestRPCBytes2B(t *testing.T) { 150 | servers := 3 151 | cfg := make_config(t, servers, false, false) 152 | defer cfg.cleanup() 153 | 154 | cfg.begin("Test (2B): RPC byte count") 155 | 156 | cfg.one(99, servers, false) 157 | bytes0 := cfg.bytesTotal() 158 | 159 | iters := 10 160 | var sent int64 = 0 161 | for index := 2; index < iters+2; index++ { 162 | cmd := randstring(5000) 163 | xindex := cfg.one(cmd, servers, false) 164 | if xindex != index { 165 | t.Fatalf("got index %v but expected %v", xindex, index) 166 | } 167 | sent += int64(len(cmd)) 168 | } 169 | 170 | bytes1 := cfg.bytesTotal() 171 | got := bytes1 - bytes0 172 | expected := int64(servers) * sent 173 | if got > expected+50000 { 174 | t.Fatalf("too many RPC bytes; got %v, expected %v", got, expected) 175 | } 176 | 177 | cfg.end() 178 | } 179 | 180 | func TestFailAgree2B(t *testing.T) { 181 | servers := 3 182 | cfg := make_config(t, servers, false, false) 183 | defer cfg.cleanup() 184 | 185 | cfg.begin("Test (2B): agreement despite follower disconnection") 186 | 187 | cfg.one(101, servers, false) 188 | 189 | // disconnect one follower from the network. 190 | leader := cfg.checkOneLeader() 191 | cfg.disconnect((leader + 1) % servers) 192 | 193 | // the leader and remaining follower should be 194 | // able to agree despite the disconnected follower. 195 | cfg.one(102, servers-1, false) 196 | cfg.one(103, servers-1, false) 197 | time.Sleep(RaftElectionTimeout) 198 | cfg.one(104, servers-1, false) 199 | cfg.one(105, servers-1, false) 200 | 201 | // re-connect 202 | cfg.connect((leader + 1) % servers) 203 | 204 | // the full set of servers should preserve 205 | // previous agreements, and be able to agree 206 | // on new commands. 207 | cfg.one(106, servers, true) 208 | time.Sleep(RaftElectionTimeout) 209 | cfg.one(107, servers, true) 210 | 211 | cfg.end() 212 | } 213 | 214 | func TestFailNoAgree2B(t *testing.T) { 215 | servers := 5 216 | cfg := make_config(t, servers, false, false) 217 | defer cfg.cleanup() 218 | 219 | cfg.begin("Test (2B): no agreement if too many followers disconnect") 220 | 221 | cfg.one(10, servers, false) 222 | 223 | // 3 of 5 followers disconnect 224 | leader := cfg.checkOneLeader() 225 | cfg.disconnect((leader + 1) % servers) 226 | cfg.disconnect((leader + 2) % servers) 227 | cfg.disconnect((leader + 3) % servers) 228 | 229 | index, _, ok := cfg.rafts[leader].Start(20) 230 | if ok != true { 231 | t.Fatalf("leader rejected Start()") 232 | } 233 | if index != 2 { 234 | t.Fatalf("expected index 2, got %v", index) 235 | } 236 | 237 | time.Sleep(2 * RaftElectionTimeout) 238 | 239 | n, _ := cfg.nCommitted(index) 240 | if n > 0 { 241 | t.Fatalf("%v committed but no majority", n) 242 | } 243 | 244 | // repair 245 | cfg.connect((leader + 1) % servers) 246 | cfg.connect((leader + 2) % servers) 247 | cfg.connect((leader + 3) % servers) 248 | 249 | // the disconnected majority may have chosen a leader from 250 | // among their own ranks, forgetting index 2. 251 | leader2 := cfg.checkOneLeader() 252 | index2, _, ok2 := cfg.rafts[leader2].Start(30) 253 | if ok2 == false { 254 | t.Fatalf("leader2 rejected Start()") 255 | } 256 | if index2 < 2 || index2 > 3 { 257 | t.Fatalf("unexpected index %v", index2) 258 | } 259 | 260 | cfg.one(1000, servers, true) 261 | 262 | cfg.end() 263 | } 264 | 265 | func TestConcurrentStarts2B(t *testing.T) { 266 | servers := 3 267 | cfg := make_config(t, servers, false, false) 268 | defer cfg.cleanup() 269 | 270 | cfg.begin("Test (2B): concurrent Start()s") 271 | 272 | var success bool 273 | loop: 274 | for try := 0; try < 5; try++ { 275 | if try > 0 { 276 | // give solution some time to settle 277 | time.Sleep(3 * time.Second) 278 | } 279 | 280 | leader := cfg.checkOneLeader() 281 | _, term, ok := cfg.rafts[leader].Start(1) 282 | if !ok { 283 | // leader moved on really quickly 284 | continue 285 | } 286 | 287 | iters := 5 288 | var wg sync.WaitGroup 289 | is := make(chan int, iters) 290 | for ii := 0; ii < iters; ii++ { 291 | wg.Add(1) 292 | go func(i int) { 293 | defer wg.Done() 294 | i, term1, ok := cfg.rafts[leader].Start(100 + i) 295 | if term1 != term { 296 | return 297 | } 298 | if ok != true { 299 | return 300 | } 301 | is <- i 302 | }(ii) 303 | } 304 | 305 | wg.Wait() 306 | close(is) 307 | 308 | for j := 0; j < servers; j++ { 309 | if t, _ := cfg.rafts[j].GetState(); t != term { 310 | // term changed -- can't expect low RPC counts 311 | continue loop 312 | } 313 | } 314 | 315 | failed := false 316 | cmds := []int{} 317 | for index := range is { 318 | cmd := cfg.wait(index, servers, term) 319 | if ix, ok := cmd.(int); ok { 320 | if ix == -1 { 321 | // peers have moved on to later terms 322 | // so we can't expect all Start()s to 323 | // have succeeded 324 | failed = true 325 | break 326 | } 327 | cmds = append(cmds, ix) 328 | } else { 329 | t.Fatalf("value %v is not an int", cmd) 330 | } 331 | } 332 | 333 | if failed { 334 | // avoid leaking goroutines 335 | go func() { 336 | for range is { 337 | } 338 | }() 339 | continue 340 | } 341 | 342 | for ii := 0; ii < iters; ii++ { 343 | x := 100 + ii 344 | ok := false 345 | for j := 0; j < len(cmds); j++ { 346 | if cmds[j] == x { 347 | ok = true 348 | } 349 | } 350 | if ok == false { 351 | t.Fatalf("cmd %v missing in %v", x, cmds) 352 | } 353 | } 354 | 355 | success = true 356 | break 357 | } 358 | 359 | if !success { 360 | t.Fatalf("term changed too often") 361 | } 362 | 363 | cfg.end() 364 | } 365 | 366 | func TestRejoin2B(t *testing.T) { 367 | servers := 3 368 | cfg := make_config(t, servers, false, false) 369 | defer cfg.cleanup() 370 | 371 | cfg.begin("Test (2B): rejoin of partitioned leader") 372 | 373 | cfg.one(101, servers, true) 374 | 375 | // leader network failure 376 | leader1 := cfg.checkOneLeader() 377 | cfg.disconnect(leader1) 378 | 379 | // make old leader try to agree on some entries 380 | cfg.rafts[leader1].Start(102) 381 | cfg.rafts[leader1].Start(103) 382 | cfg.rafts[leader1].Start(104) 383 | 384 | // new leader commits, also for index=2 385 | cfg.one(103, 2, true) 386 | 387 | // new leader network failure 388 | leader2 := cfg.checkOneLeader() 389 | cfg.disconnect(leader2) 390 | 391 | // old leader connected again 392 | cfg.connect(leader1) 393 | 394 | cfg.one(104, 2, true) 395 | 396 | // all together now 397 | cfg.connect(leader2) 398 | 399 | cfg.one(105, servers, true) 400 | 401 | cfg.end() 402 | } 403 | 404 | func TestBackup2B(t *testing.T) { 405 | servers := 5 406 | cfg := make_config(t, servers, false, false) 407 | defer cfg.cleanup() 408 | 409 | cfg.begin("Test (2B): leader backs up quickly over incorrect follower logs") 410 | 411 | cfg.one(rand.Int(), servers, true) 412 | 413 | // put leader and one follower in a partition 414 | leader1 := cfg.checkOneLeader() 415 | cfg.disconnect((leader1 + 2) % servers) 416 | cfg.disconnect((leader1 + 3) % servers) 417 | cfg.disconnect((leader1 + 4) % servers) 418 | 419 | // submit lots of commands that won't commit 420 | for i := 0; i < 50; i++ { 421 | cfg.rafts[leader1].Start(rand.Int()) 422 | } 423 | 424 | time.Sleep(RaftElectionTimeout / 2) 425 | 426 | cfg.disconnect((leader1 + 0) % servers) 427 | cfg.disconnect((leader1 + 1) % servers) 428 | 429 | // allow other partition to recover 430 | cfg.connect((leader1 + 2) % servers) 431 | cfg.connect((leader1 + 3) % servers) 432 | cfg.connect((leader1 + 4) % servers) 433 | 434 | // lots of successful commands to new group. 435 | for i := 0; i < 50; i++ { 436 | cfg.one(rand.Int(), 3, true) 437 | } 438 | 439 | // now another partitioned leader and one follower 440 | leader2 := cfg.checkOneLeader() 441 | other := (leader1 + 2) % servers 442 | if leader2 == other { 443 | other = (leader2 + 1) % servers 444 | } 445 | cfg.disconnect(other) 446 | 447 | // lots more commands that won't commit 448 | for i := 0; i < 50; i++ { 449 | cfg.rafts[leader2].Start(rand.Int()) 450 | } 451 | 452 | time.Sleep(RaftElectionTimeout / 2) 453 | 454 | // bring original leader back to life, 455 | for i := 0; i < servers; i++ { 456 | cfg.disconnect(i) 457 | } 458 | cfg.connect((leader1 + 0) % servers) 459 | cfg.connect((leader1 + 1) % servers) 460 | cfg.connect(other) 461 | 462 | // lots of successful commands to new group. 463 | for i := 0; i < 50; i++ { 464 | cfg.one(rand.Int(), 3, true) 465 | } 466 | 467 | // now everyone 468 | for i := 0; i < servers; i++ { 469 | cfg.connect(i) 470 | } 471 | cfg.one(rand.Int(), servers, true) 472 | 473 | cfg.end() 474 | } 475 | 476 | func TestCount2B(t *testing.T) { 477 | servers := 3 478 | cfg := make_config(t, servers, false, false) 479 | defer cfg.cleanup() 480 | 481 | cfg.begin("Test (2B): RPC counts aren't too high") 482 | 483 | rpcs := func() (n int) { 484 | for j := 0; j < servers; j++ { 485 | n += cfg.rpcCount(j) 486 | } 487 | return 488 | } 489 | 490 | leader := cfg.checkOneLeader() 491 | 492 | total1 := rpcs() 493 | 494 | if total1 > 30 || total1 < 1 { 495 | t.Fatalf("too many or few RPCs (%v) to elect initial leader\n", total1) 496 | } 497 | 498 | var total2 int 499 | var success bool 500 | loop: 501 | for try := 0; try < 5; try++ { 502 | if try > 0 { 503 | // give solution some time to settle 504 | time.Sleep(3 * time.Second) 505 | } 506 | 507 | leader = cfg.checkOneLeader() 508 | total1 = rpcs() 509 | 510 | iters := 10 511 | starti, term, ok := cfg.rafts[leader].Start(1) 512 | if !ok { 513 | // leader moved on really quickly 514 | continue 515 | } 516 | cmds := []int{} 517 | for i := 1; i < iters+2; i++ { 518 | x := int(rand.Int31()) 519 | cmds = append(cmds, x) 520 | index1, term1, ok := cfg.rafts[leader].Start(x) 521 | if term1 != term { 522 | // Term changed while starting 523 | continue loop 524 | } 525 | if !ok { 526 | // No longer the leader, so term has changed 527 | continue loop 528 | } 529 | if starti+i != index1 { 530 | t.Fatalf("Start() failed") 531 | } 532 | } 533 | 534 | for i := 1; i < iters+1; i++ { 535 | cmd := cfg.wait(starti+i, servers, term) 536 | if ix, ok := cmd.(int); ok == false || ix != cmds[i-1] { 537 | if ix == -1 { 538 | // term changed -- try again 539 | continue loop 540 | } 541 | t.Fatalf("wrong value %v committed for index %v; expected %v\n", cmd, starti+i, cmds) 542 | } 543 | } 544 | 545 | failed := false 546 | total2 = 0 547 | for j := 0; j < servers; j++ { 548 | if t, _ := cfg.rafts[j].GetState(); t != term { 549 | // term changed -- can't expect low RPC counts 550 | // need to keep going to update total2 551 | failed = true 552 | } 553 | total2 += cfg.rpcCount(j) 554 | } 555 | 556 | if failed { 557 | continue loop 558 | } 559 | 560 | if total2-total1 > (iters+1+3)*3 { 561 | t.Fatalf("too many RPCs (%v) for %v entries\n", total2-total1, iters) 562 | } 563 | 564 | success = true 565 | break 566 | } 567 | 568 | if !success { 569 | t.Fatalf("term changed too often") 570 | } 571 | 572 | time.Sleep(RaftElectionTimeout) 573 | 574 | total3 := 0 575 | for j := 0; j < servers; j++ { 576 | total3 += cfg.rpcCount(j) 577 | } 578 | 579 | if total3-total2 > 3*20 { 580 | t.Fatalf("too many RPCs (%v) for 1 second of idleness\n", total3-total2) 581 | } 582 | 583 | cfg.end() 584 | } 585 | 586 | func TestPersist12C(t *testing.T) { 587 | servers := 3 588 | cfg := make_config(t, servers, false, false) 589 | defer cfg.cleanup() 590 | 591 | cfg.begin("Test (2C): basic persistence") 592 | 593 | cfg.one(11, servers, true) 594 | 595 | // crash and re-start all 596 | for i := 0; i < servers; i++ { 597 | cfg.start1(i, cfg.applier) 598 | } 599 | for i := 0; i < servers; i++ { 600 | cfg.disconnect(i) 601 | cfg.connect(i) 602 | } 603 | 604 | cfg.one(12, servers, true) 605 | 606 | leader1 := cfg.checkOneLeader() 607 | cfg.disconnect(leader1) 608 | cfg.start1(leader1, cfg.applier) 609 | cfg.connect(leader1) 610 | 611 | cfg.one(13, servers, true) 612 | 613 | leader2 := cfg.checkOneLeader() 614 | cfg.disconnect(leader2) 615 | cfg.one(14, servers-1, true) 616 | cfg.start1(leader2, cfg.applier) 617 | cfg.connect(leader2) 618 | 619 | cfg.wait(4, servers, -1) // wait for leader2 to join before killing i3 620 | 621 | i3 := (cfg.checkOneLeader() + 1) % servers 622 | cfg.disconnect(i3) 623 | cfg.one(15, servers-1, true) 624 | cfg.start1(i3, cfg.applier) 625 | cfg.connect(i3) 626 | 627 | cfg.one(16, servers, true) 628 | 629 | cfg.end() 630 | } 631 | 632 | func TestPersist22C(t *testing.T) { 633 | servers := 5 634 | cfg := make_config(t, servers, false, false) 635 | defer cfg.cleanup() 636 | 637 | cfg.begin("Test (2C): more persistence") 638 | 639 | index := 1 640 | for iters := 0; iters < 5; iters++ { 641 | cfg.one(10+index, servers, true) 642 | index++ 643 | 644 | leader1 := cfg.checkOneLeader() 645 | 646 | cfg.disconnect((leader1 + 1) % servers) 647 | cfg.disconnect((leader1 + 2) % servers) 648 | 649 | cfg.one(10+index, servers-2, true) 650 | index++ 651 | 652 | cfg.disconnect((leader1 + 0) % servers) 653 | cfg.disconnect((leader1 + 3) % servers) 654 | cfg.disconnect((leader1 + 4) % servers) 655 | 656 | cfg.start1((leader1+1)%servers, cfg.applier) 657 | cfg.start1((leader1+2)%servers, cfg.applier) 658 | cfg.connect((leader1 + 1) % servers) 659 | cfg.connect((leader1 + 2) % servers) 660 | 661 | time.Sleep(RaftElectionTimeout) 662 | 663 | cfg.start1((leader1+3)%servers, cfg.applier) 664 | cfg.connect((leader1 + 3) % servers) 665 | 666 | cfg.one(10+index, servers-2, true) 667 | index++ 668 | 669 | cfg.connect((leader1 + 4) % servers) 670 | cfg.connect((leader1 + 0) % servers) 671 | } 672 | 673 | cfg.one(1000, servers, true) 674 | 675 | cfg.end() 676 | } 677 | 678 | func TestPersist32C(t *testing.T) { 679 | servers := 3 680 | cfg := make_config(t, servers, false, false) 681 | defer cfg.cleanup() 682 | 683 | cfg.begin("Test (2C): partitioned leader and one follower crash, leader restarts") 684 | 685 | cfg.one(101, 3, true) 686 | 687 | leader := cfg.checkOneLeader() 688 | cfg.disconnect((leader + 2) % servers) 689 | 690 | cfg.one(102, 2, true) 691 | 692 | cfg.crash1((leader + 0) % servers) 693 | cfg.crash1((leader + 1) % servers) 694 | cfg.connect((leader + 2) % servers) 695 | cfg.start1((leader+0)%servers, cfg.applier) 696 | cfg.connect((leader + 0) % servers) 697 | 698 | cfg.one(103, 2, true) 699 | 700 | cfg.start1((leader+1)%servers, cfg.applier) 701 | cfg.connect((leader + 1) % servers) 702 | 703 | cfg.one(104, servers, true) 704 | 705 | cfg.end() 706 | } 707 | 708 | // 709 | // Test the scenarios described in Figure 8 of the extended Raft paper. Each 710 | // iteration asks a leader, if there is one, to insert a command in the Raft 711 | // log. If there is a leader, that leader will fail quickly with a high 712 | // probability (perhaps without committing the command), or crash after a while 713 | // with low probability (most likey committing the command). If the number of 714 | // alive servers isn't enough to form a majority, perhaps start a new server. 715 | // The leader in a new term may try to finish replicating log entries that 716 | // haven't been committed yet. 717 | // 718 | func TestFigure82C(t *testing.T) { 719 | servers := 5 720 | cfg := make_config(t, servers, false, false) 721 | defer cfg.cleanup() 722 | 723 | cfg.begin("Test (2C): Figure 8") 724 | 725 | cfg.one(rand.Int(), 1, true) 726 | 727 | nup := servers 728 | for iters := 0; iters < 1000; iters++ { 729 | leader := -1 730 | for i := 0; i < servers; i++ { 731 | if cfg.rafts[i] != nil { 732 | _, _, ok := cfg.rafts[i].Start(rand.Int()) 733 | if ok { 734 | leader = i 735 | } 736 | } 737 | } 738 | 739 | if (rand.Int() % 1000) < 100 { 740 | ms := rand.Int63() % (int64(RaftElectionTimeout/time.Millisecond) / 2) 741 | time.Sleep(time.Duration(ms) * time.Millisecond) 742 | } else { 743 | ms := (rand.Int63() % 13) 744 | time.Sleep(time.Duration(ms) * time.Millisecond) 745 | } 746 | 747 | if leader != -1 { 748 | cfg.crash1(leader) 749 | nup -= 1 750 | } 751 | 752 | if nup < 3 { 753 | s := rand.Int() % servers 754 | if cfg.rafts[s] == nil { 755 | cfg.start1(s, cfg.applier) 756 | cfg.connect(s) 757 | nup += 1 758 | } 759 | } 760 | } 761 | 762 | for i := 0; i < servers; i++ { 763 | if cfg.rafts[i] == nil { 764 | cfg.start1(i, cfg.applier) 765 | cfg.connect(i) 766 | } 767 | } 768 | 769 | cfg.one(rand.Int(), servers, true) 770 | 771 | cfg.end() 772 | } 773 | 774 | func TestUnreliableAgree2C(t *testing.T) { 775 | servers := 5 776 | cfg := make_config(t, servers, true, false) 777 | defer cfg.cleanup() 778 | 779 | cfg.begin("Test (2C): unreliable agreement") 780 | 781 | var wg sync.WaitGroup 782 | 783 | for iters := 1; iters < 50; iters++ { 784 | for j := 0; j < 4; j++ { 785 | wg.Add(1) 786 | go func(iters, j int) { 787 | defer wg.Done() 788 | cfg.one((100*iters)+j, 1, true) 789 | }(iters, j) 790 | } 791 | cfg.one(iters, 1, true) 792 | } 793 | 794 | cfg.setunreliable(false) 795 | 796 | wg.Wait() 797 | 798 | cfg.one(100, servers, true) 799 | 800 | cfg.end() 801 | } 802 | 803 | func TestFigure8Unreliable2C(t *testing.T) { 804 | servers := 5 805 | cfg := make_config(t, servers, true, false) 806 | defer cfg.cleanup() 807 | 808 | cfg.begin("Test (2C): Figure 8 (unreliable)") 809 | 810 | cfg.one(rand.Int()%10000, 1, true) 811 | 812 | nup := servers 813 | for iters := 0; iters < 1000; iters++ { 814 | if iters == 200 { 815 | cfg.setlongreordering(true) 816 | } 817 | leader := -1 818 | for i := 0; i < servers; i++ { 819 | _, _, ok := cfg.rafts[i].Start(rand.Int() % 10000) 820 | if ok && cfg.connected[i] { 821 | leader = i 822 | } 823 | } 824 | 825 | if (rand.Int() % 1000) < 100 { 826 | ms := rand.Int63() % (int64(RaftElectionTimeout/time.Millisecond) / 2) 827 | time.Sleep(time.Duration(ms) * time.Millisecond) 828 | } else { 829 | ms := (rand.Int63() % 13) 830 | time.Sleep(time.Duration(ms) * time.Millisecond) 831 | } 832 | 833 | if leader != -1 && (rand.Int()%1000) < int(RaftElectionTimeout/time.Millisecond)/2 { 834 | cfg.disconnect(leader) 835 | nup -= 1 836 | } 837 | 838 | if nup < 3 { 839 | s := rand.Int() % servers 840 | if cfg.connected[s] == false { 841 | cfg.connect(s) 842 | nup += 1 843 | } 844 | } 845 | } 846 | 847 | for i := 0; i < servers; i++ { 848 | if cfg.connected[i] == false { 849 | cfg.connect(i) 850 | } 851 | } 852 | 853 | cfg.one(rand.Int()%10000, servers, true) 854 | 855 | cfg.end() 856 | } 857 | 858 | func internalChurn(t *testing.T, unreliable bool) { 859 | 860 | servers := 5 861 | cfg := make_config(t, servers, unreliable, false) 862 | defer cfg.cleanup() 863 | 864 | if unreliable { 865 | cfg.begin("Test (2C): unreliable churn") 866 | } else { 867 | cfg.begin("Test (2C): churn") 868 | } 869 | 870 | stop := int32(0) 871 | 872 | // create concurrent clients 873 | cfn := func(me int, ch chan []int) { 874 | var ret []int 875 | ret = nil 876 | defer func() { ch <- ret }() 877 | values := []int{} 878 | for atomic.LoadInt32(&stop) == 0 { 879 | x := rand.Int() 880 | index := -1 881 | ok := false 882 | for i := 0; i < servers; i++ { 883 | // try them all, maybe one of them is a leader 884 | cfg.mu.Lock() 885 | rf := cfg.rafts[i] 886 | cfg.mu.Unlock() 887 | if rf != nil { 888 | index1, _, ok1 := rf.Start(x) 889 | if ok1 { 890 | ok = ok1 891 | index = index1 892 | } 893 | } 894 | } 895 | if ok { 896 | // maybe leader will commit our value, maybe not. 897 | // but don't wait forever. 898 | for _, to := range []int{10, 20, 50, 100, 200} { 899 | nd, cmd := cfg.nCommitted(index) 900 | if nd > 0 { 901 | if xx, ok := cmd.(int); ok { 902 | if xx == x { 903 | values = append(values, x) 904 | } 905 | } else { 906 | cfg.t.Fatalf("wrong command type") 907 | } 908 | break 909 | } 910 | time.Sleep(time.Duration(to) * time.Millisecond) 911 | } 912 | } else { 913 | time.Sleep(time.Duration(79+me*17) * time.Millisecond) 914 | } 915 | } 916 | ret = values 917 | } 918 | 919 | ncli := 3 920 | cha := []chan []int{} 921 | for i := 0; i < ncli; i++ { 922 | cha = append(cha, make(chan []int)) 923 | go cfn(i, cha[i]) 924 | } 925 | 926 | for iters := 0; iters < 20; iters++ { 927 | if (rand.Int() % 1000) < 200 { 928 | i := rand.Int() % servers 929 | cfg.disconnect(i) 930 | } 931 | 932 | if (rand.Int() % 1000) < 500 { 933 | i := rand.Int() % servers 934 | if cfg.rafts[i] == nil { 935 | cfg.start1(i, cfg.applier) 936 | } 937 | cfg.connect(i) 938 | } 939 | 940 | if (rand.Int() % 1000) < 200 { 941 | i := rand.Int() % servers 942 | if cfg.rafts[i] != nil { 943 | cfg.crash1(i) 944 | } 945 | } 946 | 947 | // Make crash/restart infrequent enough that the peers can often 948 | // keep up, but not so infrequent that everything has settled 949 | // down from one change to the next. Pick a value smaller than 950 | // the election timeout, but not hugely smaller. 951 | time.Sleep((RaftElectionTimeout * 7) / 10) 952 | } 953 | 954 | time.Sleep(RaftElectionTimeout) 955 | cfg.setunreliable(false) 956 | for i := 0; i < servers; i++ { 957 | if cfg.rafts[i] == nil { 958 | cfg.start1(i, cfg.applier) 959 | } 960 | cfg.connect(i) 961 | } 962 | 963 | atomic.StoreInt32(&stop, 1) 964 | 965 | values := []int{} 966 | for i := 0; i < ncli; i++ { 967 | vv := <-cha[i] 968 | if vv == nil { 969 | t.Fatal("client failed") 970 | } 971 | values = append(values, vv...) 972 | } 973 | 974 | time.Sleep(RaftElectionTimeout) 975 | 976 | lastIndex := cfg.one(rand.Int(), servers, true) 977 | 978 | really := make([]int, lastIndex+1) 979 | for index := 1; index <= lastIndex; index++ { 980 | v := cfg.wait(index, servers, -1) 981 | if vi, ok := v.(int); ok { 982 | really = append(really, vi) 983 | } else { 984 | t.Fatalf("not an int") 985 | } 986 | } 987 | 988 | for _, v1 := range values { 989 | ok := false 990 | for _, v2 := range really { 991 | if v1 == v2 { 992 | ok = true 993 | } 994 | } 995 | if ok == false { 996 | cfg.t.Fatalf("didn't find a value") 997 | } 998 | } 999 | 1000 | cfg.end() 1001 | } 1002 | 1003 | func TestReliableChurn2C(t *testing.T) { 1004 | internalChurn(t, false) 1005 | } 1006 | 1007 | func TestUnreliableChurn2C(t *testing.T) { 1008 | internalChurn(t, true) 1009 | } 1010 | 1011 | const MAXLOGSIZE = 2000 1012 | 1013 | func snapcommon(t *testing.T, name string, disconnect bool, reliable bool, crash bool) { 1014 | iters := 30 1015 | servers := 3 1016 | cfg := make_config(t, servers, !reliable, true) 1017 | defer cfg.cleanup() 1018 | 1019 | cfg.begin(name) 1020 | 1021 | cfg.one(rand.Int(), servers, true) 1022 | leader1 := cfg.checkOneLeader() 1023 | 1024 | for i := 0; i < iters; i++ { 1025 | victim := (leader1 + 1) % servers 1026 | sender := leader1 1027 | if i%3 == 1 { 1028 | sender = (leader1 + 1) % servers 1029 | victim = leader1 1030 | } 1031 | 1032 | if disconnect { 1033 | cfg.disconnect(victim) 1034 | cfg.one(rand.Int(), servers-1, true) 1035 | } 1036 | if crash { 1037 | cfg.crash1(victim) 1038 | cfg.one(rand.Int(), servers-1, true) 1039 | } 1040 | // send enough to get a snapshot 1041 | for i := 0; i < SnapShotInterval+1; i++ { 1042 | cfg.rafts[sender].Start(rand.Int()) 1043 | } 1044 | // let applier threads catch up with the Start()'s 1045 | cfg.one(rand.Int(), servers-1, true) 1046 | 1047 | if cfg.LogSize() >= MAXLOGSIZE { 1048 | cfg.t.Fatalf("Log size too large") 1049 | } 1050 | if disconnect { 1051 | // reconnect a follower, who maybe behind and 1052 | // needs to rceive a snapshot to catch up. 1053 | cfg.connect(victim) 1054 | cfg.one(rand.Int(), servers, true) 1055 | leader1 = cfg.checkOneLeader() 1056 | } 1057 | if crash { 1058 | cfg.start1(victim, cfg.applierSnap) 1059 | cfg.connect(victim) 1060 | cfg.one(rand.Int(), servers, true) 1061 | leader1 = cfg.checkOneLeader() 1062 | } 1063 | } 1064 | cfg.end() 1065 | } 1066 | 1067 | func TestSnapshotBasic2D(t *testing.T) { 1068 | snapcommon(t, "Test (2D): snapshots basic", false, true, false) 1069 | } 1070 | 1071 | func TestSnapshotInstall2D(t *testing.T) { 1072 | snapcommon(t, "Test (2D): install snapshots (disconnect)", true, true, false) 1073 | } 1074 | 1075 | func TestSnapshotInstallUnreliable2D(t *testing.T) { 1076 | snapcommon(t, "Test (2D): install snapshots (disconnect+unreliable)", 1077 | true, false, false) 1078 | } 1079 | 1080 | func TestSnapshotInstallCrash2D(t *testing.T) { 1081 | snapcommon(t, "Test (2D): install snapshots (crash)", false, true, true) 1082 | } 1083 | 1084 | func TestSnapshotInstallUnCrash2D(t *testing.T) { 1085 | snapcommon(t, "Test (2D): install snapshots (unreliable+crash)", false, false, true) 1086 | } 1087 | -------------------------------------------------------------------------------- /util.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import "log" 4 | 5 | // Debugging 6 | const Debug = false 7 | 8 | func DPrintf(format string, a ...interface{}) (n int, err error) { 9 | if Debug { 10 | log.Printf(format, a...) 11 | } 12 | return 13 | } 14 | --------------------------------------------------------------------------------