├── images ├── logo.png ├── test1.png ├── test2.png ├── timing.png └── overall.png ├── .gitignore ├── go.mod ├── utils ├── visualize.sh ├── viz.go ├── sample_logs.txt ├── sample_output.txt └── output.txt ├── raft ├── config.go ├── storage.go ├── server.go ├── simulator.go ├── raft_test.go └── raft.go ├── LICENSE ├── go.sum ├── main.go └── README.md /images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/debajyotidasgupta/raft-consensus/HEAD/images/logo.png -------------------------------------------------------------------------------- /images/test1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/debajyotidasgupta/raft-consensus/HEAD/images/test1.png -------------------------------------------------------------------------------- /images/test2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/debajyotidasgupta/raft-consensus/HEAD/images/test2.png -------------------------------------------------------------------------------- /images/timing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/debajyotidasgupta/raft-consensus/HEAD/images/timing.png -------------------------------------------------------------------------------- /images/overall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/debajyotidasgupta/raft-consensus/HEAD/images/overall.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, built with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | # Dependency directories (remove the comment below to include it) 15 | # vendor/ 16 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module raft-consensus 2 | 3 | go 1.17 4 | 5 | require ( 6 | github.com/fortytw2/leaktest v1.3.0 7 | github.com/jedib0t/go-pretty/v6 v6.2.7 8 | ) 9 | 10 | require ( 11 | github.com/mattn/go-runewidth v0.0.13 // indirect 12 | github.com/rivo/uniseg v0.2.0 // indirect 13 | golang.org/x/sys v0.0.0-20180816055513-1c9583448a9c // indirect 14 | ) 15 | -------------------------------------------------------------------------------- /utils/visualize.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Example: ./visualize.sh -t TestElectionNormal 3 | clear # Clear the screen 4 | echo "Ensure that debug is enabled in the raft/raft.go file" 5 | 6 | # Get Args 7 | while getopts ":t:" opt; do 8 | case $opt in 9 | t) 10 | TEST_NAME=$OPTARG 11 | ;; 12 | \?) 13 | echo "Invalid option: -$OPTARG" >&2 14 | ;; 15 | esac 16 | done 17 | 18 | cd ../raft # Go to the raft directory 19 | go test -v -run "$TEST_NAME$" -timeout=10m > ../utils/logs.txt # Run the test 20 | 21 | cd ../utils 22 | go run viz.go < logs.txt > output.txt 23 | 24 | echo "Output saved to output.txt" -------------------------------------------------------------------------------- /raft/config.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import "fmt" 4 | 5 | // Basic Data Structure and Associated operations to 6 | // Maintain cluster configuration 7 | 8 | // Set implementation from https://golangbyexample.com/set-implementation-in-golang/ 9 | type Set struct { 10 | peerSet map[uint64]struct{} 11 | } 12 | 13 | func makeSet() Set { 14 | return Set{ 15 | peerSet: make(map[uint64]struct{}), 16 | } 17 | } 18 | 19 | func (c *Set) Exists(key uint64) bool { 20 | _, exists := c.peerSet[key] 21 | return exists 22 | } 23 | 24 | func (c *Set) Add(key uint64) { 25 | c.peerSet[key] = struct{}{} 26 | } 27 | 28 | func (c *Set) Remove(key uint64) error { 29 | _, exists := c.peerSet[key] 30 | if !exists { 31 | return fmt.Errorf("Remove Error: Item doesn't exist in set") 32 | } 33 | delete(c.peerSet, key) 34 | return nil 35 | } 36 | 37 | func (c *Set) Size() int { 38 | return len(c.peerSet) 39 | } 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Debajyoti Dasgupta 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw= 4 | github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= 5 | github.com/jedib0t/go-pretty/v6 v6.2.7 h1:4823Lult/tJ0VI1PgW3aSKw59pMWQ6Kzv9b3Bj6MwY0= 6 | github.com/jedib0t/go-pretty/v6 v6.2.7/go.mod h1:FMkOpgGD3EZ91cW8g/96RfxoV7bdeJyzXPYgz1L1ln0= 7 | github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU= 8 | github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= 9 | github.com/pkg/profile v1.6.0/go.mod h1:qBsxPvzyUincmltOk6iyRVxHYg4adc0OFOv72ZdLa18= 10 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 11 | github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= 12 | github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= 13 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 14 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 15 | golang.org/x/sys v0.0.0-20180816055513-1c9583448a9c h1:uHnKXcvx6SNkuwC+nrzxkJ+TpPwZOtumbhWrrOYN5YA= 16 | golang.org/x/sys v0.0.0-20180816055513-1c9583448a9c/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 17 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 18 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 19 | -------------------------------------------------------------------------------- /raft/storage.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import "sync" 4 | 5 | type Storage interface { // Storage is the interface for the storage of Raft 6 | Set(key string, value []byte) // Set stores the value of the given key 7 | Get(key string) ([]byte, bool) // Get returns the value of the given key and a boolean value indicating whether the key was found 8 | HasData() bool // HasData returns a boolean value indicating whether there is any data in the storage 9 | } 10 | 11 | // Simple implementation of the Storage interface in memory 12 | type Database struct { // Database is a storage implementation that uses a map as the underlying storage 13 | mu sync.Mutex // Mutex protects the map 14 | kv map[string][]byte // kv is the map of the storage 15 | } 16 | 17 | func NewDatabase() *Database { 18 | newKV := make(map[string][]byte) // newKV is the map of the storage 19 | return &Database{ // Return a new Database 20 | kv: newKV, 21 | } 22 | } 23 | 24 | func (db *Database) Get(key string) ([]byte, bool) { 25 | db.mu.Lock() // Lock the database 26 | defer db.mu.Unlock() // Unlock the database 27 | value, found := db.kv[key] // Get the value of the key 28 | return value, found // Return the value and a boolean value indicating whether the key was found 29 | } 30 | 31 | func (db *Database) Set(key string, value []byte) { 32 | db.mu.Lock() // Lock the database 33 | defer db.mu.Unlock() // Unlock the database 34 | db.kv[key] = value // Set the value of the key in the database 35 | } 36 | 37 | func (db *Database) HasData() bool { // HasData returns a boolean value indicating whether there is any data in the storage 38 | db.mu.Lock() // Lock the database 39 | defer db.mu.Unlock() // Unlock the database 40 | return len(db.kv) > 0 // Return a boolean value indicating whether there is any data in the storage 41 | } 42 | -------------------------------------------------------------------------------- /utils/viz.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "io" 7 | "log" 8 | "os" 9 | "regexp" 10 | "strconv" 11 | "strings" 12 | 13 | "github.com/jedib0t/go-pretty/v6/table" 14 | ) 15 | 16 | // Entry is a single log entry emitted by a raft server. 17 | type Entry struct { 18 | timestamp string 19 | id string 20 | msg string 21 | } 22 | 23 | // TestLog is a whole log for a single test, containing many entries. 24 | type TestLog struct { 25 | name string 26 | status string 27 | entries []Entry 28 | 29 | // ids is a set of all IDs seen emitting entries in this test. 30 | ids map[string]bool 31 | } 32 | 33 | func min(a, b int) int { 34 | if a < b { 35 | return a 36 | } 37 | return b 38 | } 39 | 40 | func format(msg string) string { 41 | // break the message in multiple lines to display only 30 characters per line 42 | newMsg := "" 43 | numChars := 30 44 | 45 | for i := 0; i < len(msg); i += numChars { 46 | newMsg += msg[i:min(i+numChars, len(msg))] + "\n" 47 | } 48 | 49 | return newMsg 50 | } 51 | 52 | func TableViz(tl TestLog) { 53 | t := table.NewWriter() 54 | nservers := len(tl.ids) 55 | 56 | t.SetOutputMirror(os.Stdout) 57 | 58 | headers := table.Row{"Time"} 59 | for i := 0; i < nservers; i++ { 60 | headers = append(headers, strconv.Itoa(i)) 61 | } 62 | t.AppendHeader(headers) 63 | 64 | for _, entry := range tl.entries { 65 | row := table.Row{entry.timestamp} 66 | idInt, err := strconv.Atoi(entry.id) 67 | if err != nil { 68 | log.Fatal(err) 69 | } 70 | 71 | for i := 0; i < nservers; i++ { 72 | if i == idInt { 73 | row = append(row, format(entry.msg)) 74 | } else { 75 | row = append(row, "") 76 | } 77 | } 78 | t.AppendSeparator() 79 | t.AppendRow(row) 80 | } 81 | t.SetStyle(table.StyleLight) 82 | // t.SetStyle(table.StyleColoredBlueWhiteOnBlack) 83 | t.Render() 84 | } 85 | 86 | func parseTestLogs(rd io.Reader) []TestLog { 87 | var testlogs []TestLog 88 | 89 | statusRE := regexp.MustCompile(`--- (\w+):\s+(\w+)`) 90 | entryRE := regexp.MustCompile(`([0-9:.]+) \[(\d+)\] (.*)`) 91 | 92 | scanner := bufio.NewScanner(bufio.NewReader(rd)) 93 | for scanner.Scan() { 94 | line := scanner.Text() 95 | if strings.Contains(line, "=== RUN") { 96 | testlogs = append(testlogs, TestLog{ids: make(map[string]bool)}) 97 | testlogs[len(testlogs)-1].name = strings.TrimSpace(line[7:]) 98 | } else { 99 | if len(testlogs) == 0 { 100 | continue 101 | } 102 | curlog := &testlogs[len(testlogs)-1] 103 | 104 | statusMatch := statusRE.FindStringSubmatch(line) 105 | if len(statusMatch) > 0 { 106 | if statusMatch[2] != curlog.name { 107 | log.Fatalf("name on line %q mismatch with test name: got %s", line, curlog.name) 108 | } 109 | curlog.status = statusMatch[1] 110 | continue 111 | } 112 | 113 | entryMatch := entryRE.FindStringSubmatch(line) 114 | if len(entryMatch) > 0 { 115 | entry := Entry{ 116 | timestamp: entryMatch[1], 117 | id: entryMatch[2], 118 | msg: entryMatch[3], 119 | } 120 | curlog.entries = append(curlog.entries, entry) 121 | curlog.ids[entry.id] = true 122 | continue 123 | } 124 | } 125 | } 126 | return testlogs 127 | } 128 | 129 | func main() { 130 | testlogs := parseTestLogs(os.Stdin) 131 | tnames := make(map[string]int) 132 | 133 | /** 134 | * We deduplicate the repeated test case names, so that in case the 135 | * test case name is repeated, we can generate a unique table for it. 136 | */ 137 | 138 | for i, tl := range testlogs { 139 | if count, ok := tnames[tl.name]; ok { 140 | testlogs[i].name = fmt.Sprintf("%s_%d", tl.name, count) 141 | } 142 | tnames[tl.name] += 1 143 | } 144 | 145 | statusSummary := "PASS" 146 | 147 | for _, tl := range testlogs { 148 | fmt.Println(tl.status, tl.name, tl.ids, "; entries:", len(tl.entries)) 149 | if tl.status != "PASS" { 150 | statusSummary = tl.status 151 | } 152 | TableViz(tl) 153 | fmt.Println("") 154 | } 155 | 156 | fmt.Println(statusSummary) 157 | } 158 | -------------------------------------------------------------------------------- /utils/sample_logs.txt: -------------------------------------------------------------------------------- 1 | Seed: 1647776199682732205 2 | === RUN TestElectionNormal 3 | 17:06:39.683218 [0] listening at [::]:15867 4 | 17:06:39.683363 [1] listening at [::]:27259 5 | 17:06:39.683470 [2] listening at [::]:9351 6 | 17:06:39.684415 [2] Election Timer started (215ms), term=0 7 | 17:06:39.684454 [1] Election Timer started (290ms), term=0 8 | 17:06:39.684462 [0] Election Timer started (199ms), term=0 9 | 17:06:39.885142 [0] Becomes Candidate (currentTerm=1); log=[] 10 | 17:06:39.885376 [0] Election Timer started (262ms), term=1 11 | 17:06:39.885393 [0] Sending RequestVote to 1: {Term:1 CandidateId:0 LastLogIndex:0 LastLogTerm:0} 12 | 17:06:39.885495 [0] Sending RequestVote to 2: {Term:1 CandidateId:0 LastLogIndex:0 LastLogTerm:0} 13 | 17:06:39.892139 [2] RequestVote: {Term:1 CandidateId:0 LastLogIndex:0 LastLogTerm:0} [currentTerm=0, votedFor=-1, log index/term=(0, 0)] 14 | 17:06:39.892187 [2] Term out of date with term in RequestVote 15 | 17:06:39.892228 [2] Becomes Follower with term=1; log=[] 16 | 17:06:39.892272 [1] RequestVote: {Term:1 CandidateId:0 LastLogIndex:0 LastLogTerm:0} [currentTerm=0, votedFor=-1, log index/term=(0, 0)] 17 | 17:06:39.892331 [1] Term out of date with term in RequestVote 18 | 17:06:39.892356 [1] Becomes Follower with term=1; log=[] 19 | 17:06:39.892536 [2] RequestVote reply: &{Term:1 VoteGranted:true} 20 | 17:06:39.892593 [1] RequestVote reply: &{Term:1 VoteGranted:true} 21 | 17:06:39.892666 [1] Election Timer started (221ms), term=1 22 | 17:06:39.892702 [2] Election Timer started (226ms), term=1 23 | 17:06:39.893176 [0] received RequestVoteReply {Term:1 VoteGranted:true} from 2 24 | 17:06:39.893222 [0] Wins election with 2 votes 25 | 17:06:39.893288 [0] becomes Leader; term=1, nextIndex=map[1:1 2:1], matchIndex=map[1:0 2:0]; log=[] 26 | 17:06:39.893381 [0] sending AppendEntries to 1: ni=1, args={Term:1 LeaderId:0 PrevLogIndex:0 PrevLogTerm:0 Entries:[] LeaderCommit:0} 27 | 17:06:39.893400 [0] sending AppendEntries to 2: ni=1, args={Term:1 LeaderId:0 PrevLogIndex:0 PrevLogTerm:0 Entries:[] LeaderCommit:0} 28 | 17:06:39.893389 [0] received RequestVoteReply {Term:1 VoteGranted:true} from 1 29 | 17:06:39.893562 [0] While waiting for reply, state = Leader 30 | 17:06:39.895158 [2] in election timer term changed from 0 to 1, bailing out 31 | 17:06:39.895265 [1] in election timer term changed from 0 to 1, bailing out 32 | 17:06:39.895270 [2] AppendEntries: {Term:1 LeaderId:0 PrevLogIndex:0 PrevLogTerm:0 Entries:[] LeaderCommit:0} 33 | 17:06:39.895410 [2] AppendEntries reply: {Term:1 Success:true ConflictIndex:0 ConflictTerm:0} 34 | 17:06:39.895663 [0] In election timer state=Leader, bailing out 35 | 17:06:39.895853 [0] AppendEntries reply from 2 success: nextIndex := map[1:1 2:1], matchIndex := map[1:0 2:0]; commitIndex := 0 36 | 17:06:39.898983 [1] AppendEntries: {Term:1 LeaderId:0 PrevLogIndex:0 PrevLogTerm:0 Entries:[] LeaderCommit:0} 37 | 17:06:39.899104 [1] AppendEntries reply: {Term:1 Success:true ConflictIndex:0 ConflictTerm:0} 38 | 17:06:39.899388 [0] AppendEntries reply from 1 success: nextIndex := map[1:1 2:1], matchIndex := map[1:0 2:0]; commitIndex := 0 39 | 17:06:39.943713 [0] sending AppendEntries to 2: ni=1, args={Term:1 LeaderId:0 PrevLogIndex:0 PrevLogTerm:0 Entries:[] LeaderCommit:0} 40 | 17:06:39.943743 [0] sending AppendEntries to 1: ni=1, args={Term:1 LeaderId:0 PrevLogIndex:0 PrevLogTerm:0 Entries:[] LeaderCommit:0} 41 | 17:06:39.945253 [1] AppendEntries: {Term:1 LeaderId:0 PrevLogIndex:0 PrevLogTerm:0 Entries:[] LeaderCommit:0} 42 | 17:06:39.945377 [1] AppendEntries reply: {Term:1 Success:true ConflictIndex:0 ConflictTerm:0} 43 | 17:06:39.945574 [0] AppendEntries reply from 1 success: nextIndex := map[1:1 2:1], matchIndex := map[1:0 2:0]; commitIndex := 0 44 | 17:06:39.946599 [2] AppendEntries: {Term:1 LeaderId:0 PrevLogIndex:0 PrevLogTerm:0 Entries:[] LeaderCommit:0} 45 | 17:06:39.946699 [2] AppendEntries reply: {Term:1 Success:true ConflictIndex:0 ConflictTerm:0} 46 | 17:06:39.946945 [0] AppendEntries reply from 2 success: nextIndex := map[1:1 2:1], matchIndex := map[1:0 2:0]; commitIndex := 0 47 | 17:06:39.985733 [2] Becomes Dead 48 | 17:06:39.985806 [2] Accepting no more connections 49 | 17:06:39.985854 [2] Waiting for existing connections to close 50 | 17:06:39.985884 [2] All connections closed. Stopping server 51 | 17:06:39.985900 [0] Becomes Dead 52 | 17:06:39.986006 [0] sendCommit completed 53 | 17:06:39.986016 [2] sendCommit completed 54 | 17:06:39.986046 [0] Accepting no more connections 55 | 17:06:39.986050 [0] Waiting for existing connections to close 56 | 17:06:39.986123 [0] All connections closed. Stopping server 57 | 17:06:39.986135 [1] Becomes Dead 58 | 17:06:39.986185 [1] Accepting no more connections 59 | 17:06:39.986203 [1] Waiting for existing connections to close 60 | 17:06:39.986214 [1] All connections closed. Stopping server 61 | 17:06:39.986218 [1] sendCommit completed 62 | --- PASS: TestElectionNormal (0.30s) 63 | PASS 64 | ok raft-consensus/raft 0.310s 65 | -------------------------------------------------------------------------------- /raft/server.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "math/rand" 7 | "net" 8 | "net/rpc" 9 | "sync" 10 | "time" 11 | ) 12 | 13 | type ServiceType uint64 14 | 15 | // Server wraps a RPC Server 16 | // It will also wrap Raft service object 17 | type Server struct { 18 | mu sync.Mutex // mutual exclusion for accessing server members 19 | serverId uint64 // id of this server 20 | peerList Set // peerIds that this server will connect as a client 21 | rpcServer *rpc.Server // RPC Server 22 | listener net.Listener // listener to keep listening for incoming connections 23 | peers map[uint64]*rpc.Client // maps peerId to corresponding peer 24 | quit chan interface{} // channel to indicate to stop listening for incoming connections 25 | wg sync.WaitGroup // waitgroup to wait for all connections to close before gracefully stopping 26 | 27 | rn *RaftNode // raft node service implemented on this server 28 | db *Database // storage for this server 29 | rpcProxy *RPCProxy // NOT USED YET 30 | commitChan chan CommitEntry // channel to receive commit entries from raftnode 31 | ready <-chan interface{} // channel to indicate raftnode that this server is connected to its peers 32 | 33 | service *ServiceType // DUMMY SERVICE FOR TESTING PURPOSES 34 | } 35 | 36 | //to avoid https://github.com/golang/go/issues/19957 issue 37 | type RPCProxy struct { 38 | rn *RaftNode 39 | } 40 | 41 | //create a Server Instance with serverId and list of peerIds 42 | func CreateServer(serverId uint64, peerList Set, db *Database, ready <-chan interface{}, commitChan chan CommitEntry) *Server { 43 | s := new(Server) 44 | s.serverId = serverId 45 | s.peerList = peerList 46 | s.peers = make(map[uint64]*rpc.Client) 47 | s.db = db 48 | s.ready = ready 49 | s.commitChan = commitChan 50 | s.quit = make(chan interface{}) 51 | return s 52 | } 53 | 54 | //keep listening for incoming connections in a loop 55 | //on accepting a connection start a go routine to serve the connection 56 | func (s *Server) ConnectionAccept() { 57 | defer s.wg.Done() 58 | 59 | for { 60 | //log.Printf("[%d] Listening\n", s.serverId) 61 | listener, err := s.listener.Accept() // wait to accept an incoming connection 62 | if err != nil { 63 | select { 64 | case <-s.quit: // quit listening 65 | log.Printf("[%d] Accepting no more connections\n", s.serverId) 66 | return 67 | default: 68 | log.Fatalf("[%d] Error in accepting %v\n", s.serverId, err) 69 | } 70 | } 71 | s.wg.Add(1) // serve the new accepted connection in a separate go routine 72 | go func() { 73 | s.rpcServer.ServeConn(listener) 74 | s.wg.Done() 75 | }() 76 | } 77 | } 78 | 79 | //start a new service -> 80 | //1. create the RPC Server 81 | //2. register the service with RPC 82 | //3. get a lister for TCP port passed as argument 83 | //4. start listening for incoming connections 84 | func (s *Server) Serve(port ...string) { 85 | s.mu.Lock() 86 | s.rn = NewRaftNode(s.serverId, s.peerList, s, s.db, s.ready, s.commitChan) 87 | 88 | s.rpcServer = rpc.NewServer() //create a new RPC Server for the new service 89 | s.rpcProxy = &RPCProxy{rn: s.rn} 90 | //MIGHT ADD PROXY LATER 91 | //s.rpcServer.RegisterName("RaftNode", s.rpcProxy) //register the new service 92 | s.rpcServer.RegisterName("RaftNode", s.rpcProxy) 93 | 94 | var st ServiceType = ServiceType(1) 95 | s.service = &st 96 | s.rpcServer.RegisterName("ServiceType", s.service) 97 | 98 | var err error 99 | var tcpPort string = ":" 100 | if len(port) == 1 { 101 | tcpPort = tcpPort + port[0] 102 | } else { 103 | tcpPort = tcpPort + "0" 104 | } 105 | s.listener, err = net.Listen("tcp", tcpPort) //get a listener to the tcp port 106 | if err != nil { 107 | log.Fatal(err) 108 | } 109 | log.Printf("[%v] listening at %s\n", s.serverId, s.listener.Addr()) 110 | s.mu.Unlock() 111 | 112 | s.wg.Add(1) 113 | 114 | go s.ConnectionAccept() 115 | } 116 | 117 | //close connections to all peers 118 | func (s *Server) DisconnectAll() { 119 | s.mu.Lock() 120 | defer s.mu.Unlock() 121 | for id := range s.peers { 122 | if s.peers[id] != nil { 123 | s.peers[id].Close() 124 | s.peers[id] = nil 125 | } 126 | } 127 | } 128 | 129 | //stop the server 130 | func (s *Server) Stop() { 131 | s.rn.Stop() 132 | close(s.quit) // indicate the listener to stop listening 133 | s.listener.Close() // close the listener 134 | 135 | log.Printf("[%d] Waiting for existing connections to close\n", s.serverId) 136 | s.wg.Wait() // wait for all existing connections to close 137 | 138 | log.Printf("[%d] All connections closed. Stopping server\n", s.serverId) 139 | } 140 | 141 | func (s *Server) GetListenerAddr() net.Addr { 142 | s.mu.Lock() 143 | defer s.mu.Unlock() 144 | return s.listener.Addr() 145 | } 146 | 147 | //connect to a peer 148 | func (s *Server) ConnectToPeer(peerId uint64, addr net.Addr) error { 149 | s.mu.Lock() 150 | defer s.mu.Unlock() 151 | // if not already connected to the peer 152 | if s.peers[peerId] == nil { 153 | peer, err := rpc.Dial(addr.Network(), addr.String()) // dial to eh network address of the peer server 154 | if err != nil { 155 | return err 156 | } 157 | s.peers[peerId] = peer // assign the peer client 158 | } 159 | return nil 160 | } 161 | 162 | //disconnect from a particular peer 163 | func (s *Server) DisconnectPeer(peerId uint64) error { 164 | s.mu.Lock() 165 | defer s.mu.Unlock() 166 | peer := s.peers[peerId] 167 | if peer != nil { 168 | err := peer.Close() 169 | s.peers[peerId] = nil 170 | return err 171 | } 172 | return nil 173 | } 174 | 175 | //make an RPC call to the particular peer 176 | func (s *Server) RPC(peerId uint64, rpcCall string, args interface{}, reply interface{}) error { 177 | s.mu.Lock() 178 | peer := s.peers[peerId] //obtain the peer client 179 | s.mu.Unlock() 180 | 181 | if peer == nil { 182 | return fmt.Errorf("[%d] RPC call to peer %d after it is closed", s.serverId, peerId) 183 | } else { 184 | // call RPC corresponding to the particular peer connection 185 | return peer.Call(rpcCall, args, reply) 186 | } 187 | } 188 | 189 | //A DUMMY RPC FUNCTION 190 | func (s *ServiceType) DisplayMsg(args uint64, reply *uint64) error { 191 | fmt.Printf("received %d\n", args) 192 | *reply = 2 * args 193 | return nil 194 | } 195 | 196 | //RPC call from proxy for RequestVote 197 | func (rp *RPCProxy) RequestVote(args RequestVoteArgs, reply *RequestVoteReply) error { 198 | time.Sleep(time.Duration(1+rand.Intn(5)) * time.Millisecond) 199 | return rp.rn.RequestVote(args, reply) 200 | } 201 | 202 | //RPC call from proxy for AppendEntries 203 | func (rp *RPCProxy) AppendEntries(args AppendEntriesArgs, reply *AppendEntriesReply) error { 204 | time.Sleep(time.Duration(1+rand.Intn(5)) * time.Millisecond) 205 | return rp.rn.AppendEntries(args, reply) 206 | } 207 | -------------------------------------------------------------------------------- /raft/simulator.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import ( 4 | "bytes" 5 | "encoding/gob" 6 | "fmt" 7 | "log" 8 | "math/rand" 9 | "strconv" 10 | "sync" 11 | "testing" 12 | "time" 13 | ) 14 | 15 | func init() { 16 | log.SetFlags(log.Ltime | log.Lmicroseconds) 17 | 18 | seed := time.Now().UnixNano() // 1647347572251367891 19 | fmt.Println("Seed: ", seed) 20 | 21 | rand.Seed(seed) 22 | } 23 | 24 | type ClusterSimulator struct { 25 | mu sync.Mutex 26 | 27 | raftCluster map[uint64]*Server // all servers present in Cluster 28 | dbCluster map[uint64]*Database 29 | 30 | commitChans map[uint64]chan CommitEntry // commit Channel for each Cluster 31 | 32 | commits map[uint64][]CommitEntry // commits[i] := sequence of commits by server i 33 | 34 | isConnected map[uint64]bool // check if node i is connected to cluster 35 | 36 | isAlive map[uint64]bool // check if node is alive 37 | /* 38 | *Now that servers can leave/enter, the cluster must have 39 | *info of all servers in the cluster, which is stored in 40 | *activeServers 41 | */ 42 | activeServers Set // set of all servers currently in the cluster 43 | n uint64 // number of servers 44 | t *testing.T 45 | } 46 | 47 | type CommitFunctionType int 48 | 49 | const ( 50 | TestCommitFunction CommitFunctionType = iota 51 | TestNoCommitFunction 52 | ) 53 | 54 | type Write struct { 55 | Key string 56 | Val int 57 | } 58 | 59 | type Read struct { 60 | Key string 61 | } 62 | 63 | type AddServers struct { 64 | ServerIds []int 65 | } 66 | 67 | type RemoveServers struct { 68 | ServerIds []int 69 | } 70 | 71 | // Create a new ClusterSimulator 72 | func CreateNewCluster(t *testing.T, n uint64) *ClusterSimulator { 73 | // initialising required fields of ClusterSimulator 74 | 75 | serverList := make(map[uint64]*Server) 76 | isConnected := make(map[uint64]bool) 77 | isAlive := make(map[uint64]bool) 78 | commitChans := make(map[uint64]chan CommitEntry) 79 | commits := make(map[uint64][]CommitEntry) 80 | ready := make(chan interface{}) 81 | storage := make(map[uint64]*Database) 82 | activeServers := makeSet() 83 | // creating servers 84 | 85 | for i := uint64(0); i < n; i++ { 86 | peerList := makeSet() 87 | 88 | // get PeerList for server i 89 | for j := uint64(0); j < n; j++ { 90 | if i == j { 91 | continue 92 | } else { 93 | peerList.Add(j) 94 | } 95 | } 96 | 97 | storage[i] = NewDatabase() 98 | commitChans[i] = make(chan CommitEntry) 99 | serverList[i] = CreateServer(i, peerList, storage[i], ready, commitChans[i]) 100 | 101 | serverList[i].Serve() 102 | isAlive[i] = true 103 | } 104 | 105 | // Connecting peers to each other 106 | for i := uint64(0); i < n; i++ { 107 | for j := uint64(0); j < n; j++ { 108 | if i == j { 109 | continue 110 | } 111 | serverList[i].ConnectToPeer(j, serverList[j].GetListenerAddr()) 112 | } 113 | isConnected[i] = true 114 | } 115 | 116 | for i := uint64(0); i < n; i++ { 117 | activeServers.Add(i) 118 | } 119 | close(ready) 120 | 121 | // create a new cluster 122 | newCluster := &ClusterSimulator{ 123 | raftCluster: serverList, 124 | dbCluster: storage, 125 | commitChans: commitChans, 126 | commits: commits, 127 | isConnected: isConnected, 128 | isAlive: isAlive, 129 | n: n, 130 | activeServers: activeServers, 131 | t: t, 132 | } 133 | 134 | for i := uint64(0); i < n; i++ { 135 | go newCluster.collectCommits(i) 136 | } 137 | 138 | return newCluster 139 | } 140 | 141 | // Shut down all servers in the cluster 142 | func (nc *ClusterSimulator) Shutdown() { 143 | 144 | for i := range nc.activeServers.peerSet { 145 | nc.raftCluster[i].DisconnectAll() 146 | nc.isConnected[i] = false 147 | } 148 | 149 | for i := range nc.activeServers.peerSet { 150 | if nc.isAlive[i] { 151 | nc.isAlive[i] = false 152 | nc.raftCluster[i].Stop() 153 | } 154 | } 155 | 156 | for i := range nc.activeServers.peerSet { 157 | close(nc.commitChans[i]) 158 | } 159 | } 160 | 161 | // Reads channel and adds all received entries to the corresponding commits 162 | func (nc *ClusterSimulator) collectCommits(i uint64) error { 163 | for commit := range nc.commitChans[i] { 164 | nc.mu.Lock() 165 | logtest(i, "collectCommits (%d) got %+v", i, commit) 166 | switch v := commit.Command.(type) { 167 | case Read: 168 | break 169 | case Write: 170 | var buf bytes.Buffer // Buffer to hold the data 171 | enc := gob.NewEncoder(&buf) // Create a new encoder 172 | 173 | if err := enc.Encode(v.Val); err != nil { // Encode the data 174 | nc.mu.Unlock() 175 | return err 176 | } 177 | nc.dbCluster[i].Set(v.Key, buf.Bytes()) // Save the data to the database 178 | case RemoveServers: 179 | serverIds := v.ServerIds 180 | for i := uint64(0); i < uint64(len(serverIds)); i++ { 181 | if nc.activeServers.Exists(uint64(serverIds[i])) { 182 | // Cluster Modifications 183 | nc.DisconnectPeer(uint64(serverIds[i])) 184 | nc.isAlive[uint64(serverIds[i])] = false 185 | nc.raftCluster[uint64(serverIds[i])].Stop() 186 | nc.commits[uint64(serverIds[i])] = nc.commits[uint64(serverIds[i])][:0] 187 | close(nc.commitChans[uint64(serverIds[i])]) 188 | 189 | // Removing traces of this server 190 | delete(nc.raftCluster, uint64(serverIds[i])) 191 | delete(nc.dbCluster, uint64(serverIds[i])) 192 | delete(nc.commitChans, uint64(serverIds[i])) 193 | delete(nc.commits, uint64(serverIds[i])) 194 | delete(nc.isAlive, uint64(serverIds[i])) 195 | delete(nc.isConnected, uint64(serverIds[i])) 196 | 197 | nc.activeServers.Remove(uint64(serverIds[i])) 198 | } 199 | } 200 | default: 201 | break 202 | } 203 | nc.commits[i] = append(nc.commits[i], commit) 204 | nc.mu.Unlock() 205 | } 206 | return nil 207 | } 208 | 209 | // Disconnect a server from other servers 210 | func (nc *ClusterSimulator) DisconnectPeer(id uint64) error { 211 | if !nc.activeServers.Exists(uint64(id)) { 212 | return fmt.Errorf("invalid server id passed") 213 | } 214 | logtest(id, "Disconnect %d", id) 215 | 216 | nc.raftCluster[id].DisconnectAll() 217 | for i := range nc.activeServers.peerSet { 218 | if i == id { 219 | continue 220 | } else { 221 | nc.raftCluster[i].DisconnectPeer(id) 222 | } 223 | } 224 | nc.isConnected[id] = false 225 | return nil 226 | } 227 | 228 | // Reconnect a server to other servers 229 | func (nc *ClusterSimulator) ReconnectPeer(id uint64) error { 230 | if !nc.activeServers.Exists(uint64(id)) { 231 | return fmt.Errorf("invalid server id passed") 232 | } 233 | logtest(id, "Reconnect %d", id) 234 | 235 | for i := range nc.activeServers.peerSet { 236 | if i != id && nc.isAlive[i] { 237 | err := nc.raftCluster[id].ConnectToPeer(i, nc.raftCluster[i].GetListenerAddr()) 238 | if err != nil { 239 | if nc.t != nil { 240 | nc.t.Fatal(err) 241 | } else { 242 | return err 243 | } 244 | } 245 | err = nc.raftCluster[i].ConnectToPeer(id, nc.raftCluster[id].GetListenerAddr()) 246 | if err != nil { 247 | if nc.t != nil { 248 | nc.t.Fatal(err) 249 | } else { 250 | return err 251 | } 252 | } 253 | } 254 | } 255 | 256 | nc.isConnected[id] = true 257 | return nil 258 | } 259 | 260 | // Crash a server and shut it down 261 | func (nc *ClusterSimulator) CrashPeer(id uint64) error { 262 | if !nc.activeServers.Exists(uint64(id)) { 263 | return fmt.Errorf("invalid server id passed") 264 | } 265 | logtest(id, "Crash %d", id) 266 | 267 | nc.DisconnectPeer(id) 268 | 269 | nc.isAlive[id] = false 270 | nc.raftCluster[id].Stop() 271 | 272 | nc.mu.Lock() 273 | nc.commits[id] = nc.commits[id][:0] 274 | nc.mu.Unlock() 275 | return nil 276 | } 277 | 278 | // Restart a server and reconnect to other peers 279 | func (nc *ClusterSimulator) RestartPeer(id uint64) error { 280 | if !nc.activeServers.Exists(uint64(id)) { 281 | return fmt.Errorf("invalid server id passed") 282 | } 283 | if nc.isAlive[id] { 284 | if nc.t != nil { 285 | log.Fatalf("Id %d alive in restart peer", id) 286 | } else { 287 | return fmt.Errorf("id %d alive in restart peer", id) 288 | } 289 | } 290 | logtest(id, "Restart ", id, id) 291 | 292 | peerList := makeSet() 293 | for i := range nc.activeServers.peerSet { 294 | if id == i { 295 | continue 296 | } else { 297 | peerList.Add(i) 298 | } 299 | } 300 | 301 | ready := make(chan interface{}) 302 | 303 | nc.raftCluster[id] = CreateServer(id, peerList, nc.dbCluster[id], ready, nc.commitChans[id]) 304 | nc.raftCluster[id].Serve() 305 | nc.ReconnectPeer(id) 306 | 307 | close(ready) 308 | nc.isAlive[id] = true 309 | time.Sleep(time.Duration(20) * time.Millisecond) 310 | return nil 311 | } 312 | 313 | // Ensure only a single leader 314 | func (nc *ClusterSimulator) CheckUniqueLeader() (int, int, error) { 315 | for r := 0; r < 8; r++ { 316 | leaderId := -1 317 | leaderTerm := -1 318 | 319 | for i := range nc.activeServers.peerSet { 320 | if nc.isConnected[i] { 321 | _, term, isLeader := nc.raftCluster[i].rn.Report() 322 | if isLeader { 323 | if leaderId < 0 { 324 | leaderId = int(i) 325 | leaderTerm = term 326 | } else { 327 | if nc.t != nil { 328 | nc.t.Fatalf("2 ids: %d, %d think they are leaders", leaderId, i) 329 | } else { 330 | return -1, -1, fmt.Errorf("2 ids: %d, %d think they are leaders", leaderId, i) 331 | } 332 | } 333 | } 334 | } 335 | } 336 | if leaderId >= 0 { 337 | return leaderId, leaderTerm, nil 338 | } 339 | time.Sleep(150 * time.Millisecond) 340 | } 341 | 342 | if nc.t != nil { 343 | nc.t.Fatalf("no leader found") 344 | } 345 | return -1, -1, fmt.Errorf("no leader found") 346 | } 347 | 348 | // check if there are no leaders 349 | func (nc *ClusterSimulator) CheckNoLeader() error { 350 | 351 | for i := range nc.activeServers.peerSet { 352 | if nc.isConnected[i] { 353 | if _, _, isLeader := nc.raftCluster[i].rn.Report(); isLeader { 354 | if nc.t != nil { 355 | nc.t.Fatalf("%d is Leader, expected no leader", i) 356 | } else { 357 | return fmt.Errorf("%d is Leader, expected no leader", i) 358 | } 359 | } 360 | } 361 | } 362 | return nil 363 | } 364 | 365 | func (nc *ClusterSimulator) CheckCommitted(cmd int, choice CommitFunctionType) (num int, index int, err error) { 366 | nc.mu.Lock() 367 | defer nc.mu.Unlock() 368 | 369 | err = nil 370 | 371 | // Find the length of the commits slice for connected servers. 372 | commitsLen := -1 373 | for i := range nc.activeServers.peerSet { 374 | if nc.isConnected[i] { 375 | if commitsLen >= 0 { 376 | // If this was set already, expect the new length to be the same. 377 | if len(nc.commits[i]) != commitsLen { 378 | if nc.t != nil { 379 | nc.t.Fatalf("commits[%d] = %d, commitsLen = %d", i, nc.commits[i], commitsLen) 380 | } else { 381 | err = fmt.Errorf("commits[%d] = %d, commitsLen = %d", i, nc.commits[i], commitsLen) 382 | return -1, -1, err 383 | } 384 | } 385 | } else { 386 | commitsLen = len(nc.commits[i]) 387 | } 388 | } 389 | } 390 | 391 | // Check consistency of commits from the start and to the command we're asked 392 | // about. This loop will return once a command=cmd is found. 393 | for c := 0; c < commitsLen; c++ { 394 | cmdAtC := -1 395 | for i := uint64(0); i < nc.n; i++ { 396 | if nc.isConnected[i] { 397 | cmdOfN := nc.commits[i][c].Command.(int) 398 | if cmdAtC >= 0 { 399 | if cmdOfN != cmdAtC { 400 | if nc.t != nil { 401 | nc.t.Errorf("got %d, want %d at nc.commits[%d][%d]", cmdOfN, cmdAtC, i, c) 402 | } else { 403 | err = fmt.Errorf("got %d, want %d at nc.commits[%d][%d]", cmdOfN, cmdAtC, i, c) 404 | } 405 | } 406 | } else { 407 | cmdAtC = cmdOfN 408 | } 409 | } 410 | } 411 | if cmdAtC == cmd { 412 | // Check consistency of Index. 413 | index := -1 414 | num := 0 415 | for i := uint64(0); i < nc.n; i++ { 416 | if nc.isConnected[i] { 417 | if index >= 0 && int(nc.commits[i][c].Index) != index { 418 | if nc.t != nil { 419 | nc.t.Errorf("got Index=%d, want %d at h.commits[%d][%d]", nc.commits[i][c].Index, index, i, c) 420 | } else { 421 | err = fmt.Errorf("got Index=%d, want %d at h.commits[%d][%d]", nc.commits[i][c].Index, index, i, c) 422 | } 423 | } else { 424 | index = int(nc.commits[i][c].Index) 425 | } 426 | num++ 427 | } 428 | } 429 | return num, index, err 430 | } 431 | } 432 | 433 | // If there's no early return, we haven't found the command we were looking for 434 | 435 | if choice == TestCommitFunction { 436 | if nc.t != nil { 437 | nc.t.Errorf("cmd = %d not found in commits", cmd) 438 | } else { 439 | err = fmt.Errorf("cmd = %d not found in commits", cmd) 440 | } 441 | return 0, -1, err 442 | } else { 443 | return 0, -1, err 444 | } 445 | 446 | } 447 | 448 | func (nc *ClusterSimulator) SubmitToServer(serverId int, cmd interface{}) (bool, interface{}, error) { 449 | if !nc.activeServers.Exists(uint64(serverId)) { 450 | return false, nil, fmt.Errorf("invalid server id passed") 451 | } 452 | switch v := cmd.(type) { 453 | case AddServers: 454 | nc.mu.Lock() 455 | // Cluster modifications 456 | serverIds := v.ServerIds 457 | for i := 0; i < len(serverIds); i++ { 458 | nc.activeServers.Add(uint64(serverIds[i])) 459 | } 460 | ready := make(chan interface{}) 461 | 462 | // creating the new servers to be added 463 | for i := uint64(0); i < uint64(len(serverIds)); i++ { 464 | peerList := makeSet() 465 | 466 | // get PeerList for server i 467 | for j := range nc.activeServers.peerSet { 468 | if uint64(serverIds[i]) == j { 469 | continue 470 | } else { 471 | peerList.Add(j) 472 | } 473 | } 474 | 475 | nc.dbCluster[uint64(serverIds[i])] = NewDatabase() 476 | nc.commitChans[uint64(serverIds[i])] = make(chan CommitEntry) 477 | nc.raftCluster[uint64(serverIds[i])] = CreateServer(uint64(serverIds[i]), peerList, nc.dbCluster[uint64(serverIds[i])], ready, nc.commitChans[uint64(serverIds[i])]) 478 | 479 | nc.raftCluster[uint64(serverIds[i])].Serve() 480 | nc.isAlive[uint64(serverIds[i])] = true 481 | } 482 | 483 | // Connecting peers to each other 484 | for i := uint64(0); i < uint64(len(serverIds)); i++ { 485 | for j := range nc.activeServers.peerSet { 486 | if uint64(serverIds[i]) == j { 487 | continue 488 | } 489 | nc.raftCluster[uint64(serverIds[i])].ConnectToPeer(j, nc.raftCluster[j].GetListenerAddr()) 490 | nc.raftCluster[j].ConnectToPeer(uint64(serverIds[i]), nc.raftCluster[uint64(serverIds[i])].GetListenerAddr()) 491 | } 492 | nc.isConnected[uint64(serverIds[i])] = true 493 | } 494 | 495 | for i := uint64(0); i < uint64(len(serverIds)); i++ { 496 | go nc.collectCommits(uint64(serverIds[i])) 497 | } 498 | 499 | close(ready) 500 | 501 | nc.mu.Unlock() 502 | return nc.raftCluster[uint64(serverId)].rn.Submit(cmd) 503 | case RemoveServers: 504 | return nc.raftCluster[uint64(serverId)].rn.Submit(cmd) 505 | default: 506 | return nc.raftCluster[uint64(serverId)].rn.Submit(cmd) 507 | } 508 | } 509 | 510 | func logtest(id uint64, logstr string, a ...interface{}) { 511 | if DEBUG > 0 { 512 | logstr = "[" + strconv.Itoa(int(id)) + "] " + "[TEST]" + logstr 513 | log.Printf(logstr, a...) 514 | } 515 | } 516 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "encoding/gob" 6 | "errors" 7 | "fmt" 8 | "os" 9 | "os/signal" 10 | "raft-consensus/raft" 11 | "strconv" 12 | "strings" 13 | "syscall" 14 | ) 15 | 16 | //USER COMMANDS ARGUMENTS 17 | //1-> create cluster number of nodes 18 | //2-> set data key, value, [peerId] 19 | //3-> get data key, [peerId] 20 | //4-> disconnect peer peerId 21 | //5-> reconnect peer peerId 22 | //6-> crash peer peerId 23 | //7-> shutdown _ 24 | //8-> check leader _ 25 | //9-> stop execution _ 26 | 27 | //create cluster with peers 28 | func CreateCluster(peers int) (*raft.ClusterSimulator, error) { 29 | if peers < 0 { 30 | return nil, errors.New("invalid number of peers") 31 | } 32 | cluster := raft.CreateNewCluster(nil, uint64(peers)) 33 | if cluster != nil { 34 | return cluster, nil 35 | } 36 | return nil, errors.New("cluster could not be created") 37 | } 38 | 39 | //write integer value to a string key in the database 40 | //OPTIONAL to pass a particular server id to send command to 41 | func SetData(cluster *raft.ClusterSimulator, key string, val int, serverParam ...int) error { 42 | if cluster == nil { 43 | return errors.New("raft cluster not created") 44 | } 45 | commandToServer := raft.Write{Key: key, Val: val} 46 | serverId := 0 47 | if len(serverParam) >= 1 { 48 | serverId = serverParam[0] 49 | } else { 50 | var err error 51 | serverId, _, err = cluster.CheckUniqueLeader() 52 | if err != nil { 53 | return err 54 | } 55 | } 56 | if serverId < 0 { 57 | return errors.New("unable to submit command to any server") 58 | } 59 | success := false 60 | if success, _, _ = cluster.SubmitToServer(serverId, commandToServer); success { 61 | return nil 62 | } else { 63 | return errors.New("command could not be submitted, try different server(leader)") 64 | } 65 | } 66 | 67 | //read integer value of a string key from the database 68 | //OPTIONAL to pass a particular server id to send command to 69 | func GetData(cluster *raft.ClusterSimulator, key string, serverParam ...int) (int, error) { 70 | if cluster == nil { 71 | return 0, errors.New("raft cluster not created") 72 | } 73 | commandToServer := raft.Read{Key: key} 74 | serverId := 0 75 | if len(serverParam) >= 1 { 76 | serverId = serverParam[0] 77 | } else { 78 | var err error 79 | serverId, _, err = cluster.CheckUniqueLeader() 80 | if err != nil { 81 | return 0, err 82 | } 83 | } 84 | if serverId < 0 { 85 | return 0, errors.New("unable to submit command to any server") 86 | } 87 | if success, reply, err := cluster.SubmitToServer(serverId, commandToServer); success { 88 | if err != nil { 89 | return -1, err 90 | } else { 91 | value, _ := reply.(int) 92 | return value, nil 93 | } 94 | } else { 95 | return 0, errors.New("command could not be submitted, try different server(leader)") 96 | } 97 | } 98 | 99 | //add new server to the raft cluster 100 | func AddServers(cluster *raft.ClusterSimulator, serverIds []int) error { 101 | if cluster == nil { 102 | return errors.New("raft cluster not created") 103 | } 104 | commandToServer := raft.AddServers{ServerIds: serverIds} 105 | var err error 106 | serverId, _, err := cluster.CheckUniqueLeader() 107 | 108 | if err != nil { 109 | return err 110 | } 111 | 112 | if serverId < 0 { 113 | return errors.New("unable to submit command to any server") 114 | } 115 | 116 | if success, _, err := cluster.SubmitToServer(serverId, commandToServer); success { 117 | if err != nil { 118 | return err 119 | } else { 120 | return nil 121 | } 122 | } else { 123 | return errors.New("command could not be submitted, try different server") 124 | } 125 | } 126 | 127 | //remove server from the raft cluster 128 | func RemoveServers(cluster *raft.ClusterSimulator, serverIds []int) error { 129 | if cluster == nil { 130 | return errors.New("raft cluster not created") 131 | } 132 | commandToServer := raft.RemoveServers{ServerIds: serverIds} 133 | var err error 134 | serverId, _, err := cluster.CheckUniqueLeader() 135 | 136 | if err != nil { 137 | return err 138 | } 139 | 140 | if serverId < 0 { 141 | return errors.New("unable to submit command to any server") 142 | } 143 | 144 | if success, _, err := cluster.SubmitToServer(serverId, commandToServer); success { 145 | if err != nil { 146 | return err 147 | } else { 148 | return nil 149 | } 150 | } else { 151 | return errors.New("command could not be submitted, try different server") 152 | } 153 | } 154 | 155 | //disconnect a peer from the cluster 156 | func DisconnectPeer(cluster *raft.ClusterSimulator, peerId int) error { 157 | if cluster == nil { 158 | return errors.New("raft cluster not created") 159 | } 160 | if peerId < 0 { 161 | return errors.New("invalid peer id passed") 162 | } 163 | err := cluster.DisconnectPeer(uint64(peerId)) 164 | return err 165 | } 166 | 167 | //reconnect a disconnected peer to the cluster 168 | func ReconnectPeer(cluster *raft.ClusterSimulator, peerId int) error { 169 | if cluster == nil { 170 | return errors.New("raft cluster not created") 171 | } 172 | if peerId < 0 { 173 | return errors.New("invalid peer id passed") 174 | } 175 | err := cluster.ReconnectPeer(uint64(peerId)) 176 | return err 177 | } 178 | 179 | //crash a server 180 | func CrashPeer(cluster *raft.ClusterSimulator, peerId int) error { 181 | if cluster == nil { 182 | return errors.New("raft cluster not created") 183 | } 184 | if peerId < 0 { 185 | return errors.New("invalid peer id passed") 186 | } 187 | err := cluster.CrashPeer(uint64(peerId)) 188 | return err 189 | } 190 | 191 | //restart a server 192 | func RestartPeer(cluster *raft.ClusterSimulator, peerId int) error { 193 | if cluster == nil { 194 | return errors.New("raft cluster not created") 195 | } 196 | if peerId < 0 { 197 | return errors.New("invalid peer id passed") 198 | } 199 | err := cluster.RestartPeer(uint64(peerId)) 200 | return err 201 | } 202 | 203 | //shutdown all servers in the cluster and stop raft 204 | func Shutdown(cluster *raft.ClusterSimulator) error { 205 | if cluster == nil { 206 | return errors.New("raft cluster not created") 207 | } 208 | cluster.Shutdown() 209 | cluster = nil 210 | return nil 211 | } 212 | 213 | //check leader of raft cluster 214 | func CheckLeader(cluster *raft.ClusterSimulator) (int, int, error) { 215 | if cluster == nil { 216 | return -1, -1, errors.New("raft cluster not created") 217 | } 218 | return cluster.CheckUniqueLeader() 219 | } 220 | 221 | //shutdown all servers in the cluster and stop raft and stop execution 222 | func Stop(cluster *raft.ClusterSimulator) error { 223 | if cluster == nil { 224 | return nil 225 | } 226 | cluster.Shutdown() 227 | cluster = nil 228 | return nil 229 | } 230 | 231 | func PrintMenu() { 232 | fmt.Println("\n\n RAFT MENU: [nodes are 0 indexed]") 233 | fmt.Println("+---------------------------+------------------------------------+") 234 | fmt.Println("| Sr | USER COMMANDS | ARGUMENTS |") 235 | fmt.Println("+----+----------------------+------------------------------------+") 236 | fmt.Println("| 1 | create cluster | number of nodes |") 237 | fmt.Println("| 2 | set data | key, value, peerId (optional) |") 238 | fmt.Println("| 3 | get data | key, peerId (optional) |") 239 | fmt.Println("| 4 | disconnect peer | peerId |") 240 | fmt.Println("| 5 | reconnect peer | peerId |") 241 | fmt.Println("| 6 | crash peer | peerId |") 242 | fmt.Println("| 7 | restart peer | peerId |") 243 | fmt.Println("| 8 | shutdown | _ |") 244 | fmt.Println("| 9 | check leader | _ |") 245 | fmt.Println("| 10 | stop execution | _ |") 246 | fmt.Println("| 11 | add servers | [peerIds] |") 247 | fmt.Println("| 12 | remove servers | [peerIds] |") 248 | fmt.Println("+----+----------------------+------------------------------------+") 249 | fmt.Println("") 250 | fmt.Println("+-------------------- USER ----------------------------+") 251 | fmt.Println("+ +") 252 | fmt.Println("+ User input should be of the format: Sr ...Arguments +") 253 | fmt.Println("+ Example: 2 4 1 3 +") 254 | fmt.Println("+----------------------------------------------------------------+") 255 | fmt.Println("") 256 | } 257 | 258 | func main() { 259 | var input string 260 | var cluster *raft.ClusterSimulator = nil 261 | var peers int = 0 262 | 263 | sigCh := make(chan os.Signal) 264 | signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM, syscall.SIGINT) 265 | 266 | go func() { 267 | <-sigCh 268 | fmt.Println("SIGNAL RECEIVED") 269 | Stop(cluster) 270 | os.Exit(0) 271 | }() 272 | 273 | gob.Register(raft.Write{}) 274 | gob.Register(raft.Read{}) 275 | gob.Register(raft.AddServers{}) 276 | gob.Register(raft.RemoveServers{}) 277 | 278 | fmt.Println("\n\n=============================================================") 279 | fmt.Println("= Ensure that you set [DEBUG=0] in [raft/raft.go] file =") 280 | fmt.Println("=============================================================") 281 | PrintMenu() 282 | 283 | for { 284 | fmt.Println("WAITING FOR INPUTS..") 285 | fmt.Println("") 286 | 287 | reader := bufio.NewReader(os.Stdin) 288 | input, _ = reader.ReadString('\n') 289 | tokens := strings.Fields(input) 290 | command, err0 := strconv.Atoi(tokens[0]) 291 | if err0 != nil { 292 | fmt.Println("Wrong input") 293 | continue 294 | } 295 | switch command { 296 | case 1: 297 | if len(tokens) < 2 { 298 | fmt.Println("number of peers not passed") 299 | break 300 | } 301 | var err error 302 | peers, err = strconv.Atoi(tokens[1]) 303 | if err != nil { 304 | fmt.Println("invalid number of peers") 305 | break 306 | } 307 | cluster, err = CreateCluster(peers) 308 | if err == nil { 309 | fmt.Printf("CLUSTER OF %d PEERS CREATED !!!\n", peers) 310 | } else { 311 | fmt.Printf("err: %v\n", err) 312 | cluster = nil 313 | } 314 | case 2: 315 | if len(tokens) < 3 { 316 | fmt.Println("key or value not passed") 317 | break 318 | } 319 | val, err := strconv.Atoi(tokens[2]) 320 | if err != nil { 321 | fmt.Println("invalid value passed") 322 | break 323 | } 324 | serverId := 0 325 | if len(tokens) >= 4 { 326 | serverId, err = strconv.Atoi(tokens[3]) 327 | if err != nil /*|| serverId >= peers*/ { 328 | fmt.Printf("invalid server id %d passed\n", serverId) 329 | break 330 | } 331 | err = SetData(cluster, tokens[1], val, serverId) 332 | } else { 333 | err = SetData(cluster, tokens[1], val) 334 | } 335 | if err == nil { 336 | fmt.Printf("WRITE TO KEY %s WITH VALUE %d SUCCESSFUL\n", tokens[1], val) 337 | } else { 338 | fmt.Printf("%v\n", err) 339 | } 340 | case 3: 341 | if len(tokens) < 2 { 342 | fmt.Println("key not passed") 343 | break 344 | } 345 | var err error 346 | var val int 347 | serverId := 0 348 | if len(tokens) >= 3 { 349 | serverId, err = strconv.Atoi(tokens[2]) 350 | if err != nil /*|| serverId >= peers*/ { 351 | fmt.Printf("invalid server id %d passed\n", serverId) 352 | break 353 | } 354 | val, err = GetData(cluster, tokens[1], serverId) 355 | } else { 356 | val, err = GetData(cluster, tokens[1]) 357 | } 358 | if err == nil { 359 | fmt.Printf("READ KEY %s VALUE %d\n", tokens[1], val) 360 | } else { 361 | fmt.Printf("%v\n", err) 362 | } 363 | case 4: 364 | if len(tokens) < 2 { 365 | fmt.Println("peer id not passed") 366 | break 367 | } 368 | peer, err := strconv.Atoi(tokens[1]) 369 | if err != nil /*|| peer >= peers*/ { 370 | fmt.Printf("invalid server id %d passed\n", peer) 371 | break 372 | } 373 | 374 | err = DisconnectPeer(cluster, peer) 375 | if err == nil { 376 | fmt.Printf("PEER %d DISCONNECTED\n", peer) 377 | } else { 378 | fmt.Printf("%v\n", err) 379 | } 380 | case 5: 381 | if len(tokens) < 2 { 382 | fmt.Println("peer id not passed") 383 | break 384 | } 385 | peer, err := strconv.Atoi(tokens[1]) 386 | if err != nil /*|| peer >= peers */ { 387 | fmt.Printf("invalid server id %d passed\n", peer) 388 | break 389 | } 390 | err = ReconnectPeer(cluster, peer) 391 | if err == nil { 392 | fmt.Printf("PEER %d RECONNECTED\n", peer) 393 | } else { 394 | fmt.Printf("%v\n", err) 395 | } 396 | case 6: 397 | if len(tokens) < 2 { 398 | fmt.Println("peer id not passed") 399 | break 400 | } 401 | peer, err := strconv.Atoi(tokens[1]) 402 | if err != nil /*|| peer >= peers*/ { 403 | fmt.Printf("invalid server id %d passed\n", peer) 404 | break 405 | } 406 | err = CrashPeer(cluster, peer) 407 | if err == nil { 408 | fmt.Printf("PEER %d CRASHED\n", peer) 409 | } else { 410 | fmt.Printf("%v\n", err) 411 | } 412 | case 7: 413 | if len(tokens) < 2 { 414 | fmt.Println("peer id not passed") 415 | break 416 | } 417 | peer, err := strconv.Atoi(tokens[1]) 418 | if err != nil /*|| peer >= peers*/ { 419 | fmt.Printf("invalid server id %d passed\n", peer) 420 | break 421 | } 422 | err = RestartPeer(cluster, peer) 423 | if err == nil { 424 | fmt.Printf("PEER %d RESTARTED\n", peer) 425 | } else { 426 | fmt.Printf("%v\n", err) 427 | } 428 | case 8: 429 | err := Shutdown(cluster) 430 | if err == nil { 431 | fmt.Println("ALL SERVERS STOPPED AND RAFT SERVICE STOPPED") 432 | } else { 433 | fmt.Printf("%v\n", err) 434 | } 435 | cluster = nil 436 | case 9: 437 | leaderId, term, err := CheckLeader(cluster) 438 | if err == nil { 439 | fmt.Printf("LEADER ID: %d, TERM: %d\n", leaderId, term) 440 | } else { 441 | fmt.Printf("%v\n", err) 442 | } 443 | case 10: 444 | err := Stop(cluster) 445 | if err == nil { 446 | fmt.Println("STOPPING EXECUTION, NO INPUTS WILL BE TAKEN FURTHER") 447 | cluster = nil 448 | return 449 | } else { 450 | fmt.Printf("%v\n", err) 451 | } 452 | case 11: 453 | if len(tokens) < 2 { 454 | fmt.Println("peer ids not passed") 455 | break 456 | } 457 | serverIds := make([]int, len(tokens)-1) 458 | var val int 459 | var err error 460 | for i := 1; i < len(tokens); i++ { 461 | val, err = strconv.Atoi(tokens[i]) 462 | if err != nil { 463 | fmt.Println("Invalid server ID") 464 | break 465 | } 466 | serverIds[i-1] = val 467 | } 468 | 469 | err = AddServers(cluster, serverIds) 470 | if err == nil { 471 | fmt.Printf("Added ServerIDs: %v to cluster", serverIds) 472 | } else { 473 | fmt.Printf("%v\n", err) 474 | } 475 | case 12: 476 | if len(tokens) < 2 { 477 | fmt.Println("peer ids not passed") 478 | break 479 | } 480 | serverIds := make([]int, len(tokens)-1) 481 | var val int 482 | var err error 483 | for i := 1; i < len(tokens); i++ { 484 | val, err = strconv.Atoi(tokens[i]) 485 | if err != nil { 486 | fmt.Println("Invalid server ID") 487 | break 488 | } 489 | serverIds[i-1] = val 490 | } 491 | 492 | err = RemoveServers(cluster, serverIds) 493 | if err == nil { 494 | fmt.Printf("Removed ServerIDs: %v from cluster", serverIds) 495 | } else { 496 | fmt.Printf("%v\n", err) 497 | } 498 | default: 499 | fmt.Println("Invalid Command") 500 | } 501 | fmt.Println("\n---------------------------------------------------------") 502 | PrintMenu() 503 | } 504 | } 505 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Raft Consensus Algorithm 2 | 3 |
4 | 11 | 12 | 13 | 20 | 21 | [![Contributors][contributors-shield]][contributors-url] 22 | [![Forks][forks-shield]][forks-url] 23 | [![Stargazers][stars-shield]][stars-url] 24 | [![Issues][issues-shield]][issues-url] 25 | [![MIT License][license-shield]][license-url] 26 | [![LinkedIn][linkedin-shield]][linkedin-url] 27 | 28 | 29 |
30 |
31 | 32 | Logo 33 | 34 | 35 |

Raft Consensus

36 | 37 |

38 | A prototype for demonstrating the raft consensus algorithm. 39 |
40 | Explore the docs » 41 |
42 |
43 | View Demo 44 | · 45 | Report Bug 46 | · 47 | Request Feature 48 |

49 |
50 | 51 | 52 |
53 | Table of Contents 54 |
    55 |
  1. 56 | About The Project 57 | 60 |
  2. 61 |
  3. 62 | Getting Started 63 | 68 |
  4. 69 |
  5. Project Details
  6. 70 |
  7. Usage
  8. 71 |
  9. License
  10. 72 |
  11. Contact
  12. 73 |
  13. Acknowledgments
  14. 74 |
75 |
76 | 77 | 78 | 79 | ## About The Project 80 | 81 | ![Screen Shot](images/overall.png) 82 | 83 | This project demonstrates the implementation of the `Raft Consensus algorithm` which is a consensus based protocol for distributed systems. This project is built as a part of the course `CS60002` **_Distributed Systems_** at Indian Institute of Technology, Kharagpur. This project implements a simple version of the raft protocol, which can be used as a base template to build your own distributed system by adding features. Following are the core features implemented in this projects: 84 | 85 | - Raft Consensus RPCs 86 | - `RequestVote` RPC 87 | - `AppendEntries` RPC 88 | - Raft Log 89 | - `Log` class 90 | - Raft State Machine 91 | - `StateMachine` (a simple state machine) 92 | - Raft Leader Election 93 | - `LeaderElection` RPC 94 | - Raft Consensus 95 | - `RaftConsensus` class 96 | - `Membership Change` Feature 97 | - Visualization with `timing diagram` 98 | - Single `client interface` for testing the features 99 | 100 | A single client interface was built mainly because this is a simple working protoype and not industrial-strength distributed system. The client interface is a simple command line interface which can be used to test the features of the project. All the RPCs are implemented in accordance with [In Search of an Understandable Consensus Algorithm](https://raft.github.io/raft.pdf) by Diego Ongaro and John Ousterhout. This implementation of the raft can be used as a base model and can be extended to build your own distributed system by adding advanced features and implementing multiple client gateway. 101 | 102 |

(back to top)

103 | 104 | ### Built With 105 | 106 | Following mentioned are the major frameworks/libraries used to bootstrap this project. Also included are the dependencies and addons used in this project. 107 | 108 | - [Golang](https://go.dev/) 109 | - [Leak Test](github.com/fortytw2/leaktest) - _Required for memory leak testing_ 110 | - [Pretty Tables](github.com/jedib0t/go-pretty) - _Used in Timing Diagram visualization_ 111 | - [net/rpc](https://pkg.go.dev/net/rpc) - _Wrappers required for building Raft RPCs_ 112 | - [Shell Script](https://www.javatpoint.com/shell-scripting-tutorial) 113 | 114 |

(back to top)

115 | 116 | ## Project Details 117 | 118 | Following are the details of the file structure of this project: 119 | 120 | ``` 121 | raft-consensus 122 | ├──── LICENSE 123 | ├──── README.md 124 | ├──── go.mod 125 | ├──── go.sum 126 | ├──── images 127 | │ └── logo.png 128 | │ └── overall.png 129 | │ └── test1.png 130 | │ └── test2.png 131 | │ └── timing.png 132 | ├──── main.go 133 | ├──── raft 134 | │ ├── config.go 135 | │ ├── raft.go 136 | │ ├── raft_test.go 137 | │ ├── server.go 138 | │ ├── simulator.go 139 | │ └── storage.go 140 | └──── utils 141 | ├── viz.go 142 | ├── visualize.sh 143 | ├── sample_logs.txt 144 | └── sample_output.txt 145 | ``` 146 | 147 | Following are the details of the file structure and their functionalities that are present in this code base. 148 | 149 | - **raft/server.go** - _This file contains all the necessary code for implementing servers in a network using TCP along with various Remote Procedural Calls_ 150 | - `Server struct` - Structure to define a service object 151 | - `Server` methods - Methods to implement the server 152 | - **_CreateServer\:_** create a Server Instance with serverId and list of peerIds 153 | - **_ConnectionAccept\:_** keep listening for incoming connections and serve them 154 | - **_Serve\:_** start a new service 155 | - **_Stop\:_** stop an existing service 156 | - **_ConnectToPeer\:_** connect to another server or peer 157 | - **_DisconnectPeer\:_** disconnect from a particular peer 158 | - **_RPC\:_** make an RPC call to the particular peer 159 | - **_RequestVote\:_** RPC call from a raft node for RequestVote 160 | - **_AppendEntries\:_** RPC call from a raft node for AppendEntries 161 | - **raft/raft.go** - This file contains the implementation of the Raft Consensus algorithm 162 | - `RNState` - Enum to define the state of the Raft Node 163 | - **_Follower\:_** Follower state 164 | - **_Candidate\:_** Candidate state 165 | - **_Leader\:_** Leader state 166 | - **_Dead\:_** Dead/Shutdown state 167 | - `CommitEntry` - Structure to define a commit entry 168 | - **_Command\:_** Command to be executed 169 | - **_Term\:_** Term in which the command was executed 170 | - **_Index\:_** Index of the command in the log 171 | - `LogEntry` - Structure to define a log entry 172 | - **_Command\:_** Command to be executed 173 | - **_Index\:_** Index of the command in the log 174 | - `RequestVoteArgs` - Structure to define the arguments for RequestVote RPC 175 | - **_Term\:_** Term of the candidate requesting vote 176 | - **_CandidateId\:_** Id of the candidate requesting vote 177 | - **_LastLogIndex\:_** Index of the last log entry 178 | - **_LastLogTerm\:_** Term of the last log entry 179 | - `RequestVoteReply` - Structure to define the reply for RequestVote RPC 180 | - **_Term\:_** Term of the leader 181 | - **_VoteGranted\:_** Vote granted or not 182 | - `AppendEntriesArgs` - Structure to define the arguments for AppendEntries RPC 183 | - **_Term\:_** Term of the leader 184 | - **_LeaderId\:_** Id of the leader 185 | - **_PrevLogIndex\:_** Index of the previous log entry 186 | - **_PrevLogTerm\:_** Term of the previous log entry 187 | - **_Entries\:_** List of log entries 188 | - **_LeaderCommit\:_** Index of the leader's commit 189 | - `AppendEntriesReply` - Structure to define the reply for AppendEntries RPC 190 | - **_Term\:_** Term of the leader 191 | - **_Success\:_** Success or not 192 | - **_ConflictIndex\:_** Index of the conflicting log entry 193 | - **_ConflictTerm\:_** Term of the conflicting log entry 194 | - `RaftNode` - Structure to define a raft node 195 | - **_id\:_** Id of the raft node 196 | - **_peerList\:_** List of peers 197 | - **_state\:_** State of the raft node 198 | - **_currentTerm\:_** Current term of the raft node 199 | - **_votedFor\:_** Id of the candidate voted for in the current term 200 | - **_CommitIndex\:_** Index of the last committed entry 201 | - **_lastApplied\:_** Index of the last applied entry 202 | - **_Log\:_** Log of the raft node 203 | - **_NextIndex\:_** Next index of the follower 204 | - **_MatchIndex\:_** Match index of the follower 205 | - **_server\:_** Server object of the raft node 206 | - **_db_\:\_** Database object of the raft node 207 | - **_commitChan\:_** Channel to send the commit index of logs to the state machine 208 | - **_newCommitReady\:_** Internal channel used to notify that new log entries may be sent on commitChan 209 | - **_trigger\:_** Trigger AppendEntries RPC when some relevant condition is met 210 | - **_electionResetEvent\:_** Last time at which the election timer was reset 211 | - `Raft utility` functions 212 | - **_sendCommit\:_** Send the commit index to the state machine 213 | - **_runElectionTimer\:_** Reset the election timer 214 | - **_electionTimeout\:_** Set Election timeout 215 | - **_startElection\:_** Start an election 216 | - **_becomeLeader\:_** helper function to become the leader 217 | - **_leaderSendAEs\:_** Send AppendEntries RPCs to all the followers in the cluster and update Node 218 | - **_lastLogIndexAndTerm\:_** Get the last log index and term 219 | - **_AppendEntries\:_** Send AppendEntries RPCs to all the followers 220 | - **_RequestVote\:_** Send RequestVote RPCs to all the peers 221 | - **_becomeFollower\:_** helper function to become the follower 222 | - **_restoreFromStorage\:_** Restore the state of the raft node from storage 223 | - **_readFromStorage\:_** Read the state of the raft node from storage 224 | - **_Submit\:_** Submit a command to the raft node 225 | - **_Stop\:_** Stop the raft node 226 | - **_Report\:_** Report the state of the raft node 227 | - **raft/simulator.go** - _This file contains all the necessary code to setup a cluster of raft nodes, interact with the cluster and execute different commands such as read, write and config change on the cluster._ 228 | - `ClusterSimulator` struct - Structure to define a Raft cluster 229 | - `Simulator` methods - Methods to implement the cluster 230 | - **_CreateNewCluster\:_** create a new Raft cluster consisting of a given number of nodes and establish 231 | - connections between them 232 | - **_Shutdown\:_** shut down all servers in the cluster 233 | - **_CollectCommits\:_** reads channel and adds all received entries to the corresponding commits 234 | - **_DisconnectPeer\:_** disconnect a server from other servers 235 | - **_ReconnectPeer\:_** reconnect a disconnected server to other servers 236 | - **_CrashPeer\:_** crash a server and shut it down 237 | - **_RestartPeer\:_** restart a crashed server and reconnect to other peers 238 | - **_SubmitToServer\:_** submit a command to a server 239 | - **_Check_Functions\:_** auxiliary helper functions to check the status of the raft cluster: CheckUniqueLeader, CheckNoLeader and CheckCommitted 240 | - **raft/raft_test.go** - _This file has a set of test functions designed to test the various functionalities of the raft protocol. The tests can be designed into 3 major classes:_ 241 | - **_Tests to check Leader Election_** 242 | - **_Tests to check Command Commits_** 243 | - **_Tests to check Membership Changes_** 244 | - **raft/config.go** - _This file has a custom implementation of a Set Data Structure as it is not provided inherently by Go. This implementation is inspired by [Set in Golang](https://golangbyexample.com/set-implementation-in-golang/). It provides the following functions:_ 245 | - **_makeSet\:_** make a new set of type uint64 246 | - **_Exists\:_** check if an element exists in the set 247 | - **_Add\:_** add a new element to the set 248 | - **_Remove\:_** remove an element from the set 249 | - **_Size\:_** get the number of elements in the set 250 | - **util/viz.go** - _This file contains the visualization functions for the raft protocol. It is used to visualize the raft protocol's timing diagram_ 251 | - **ParseTestLog\:\_** parse the log file and return the list of commands 252 | - **_TableViz\:_** visualize the raft protocol in a table format 253 | 254 |

(back to top)

255 | 256 | 257 | 258 | ## Getting Started 259 | 260 | To get a local copy up and running follow these simple steps. 261 | 262 | ### Prerequisites 263 | 264 | - **Go** 265 | To run the code in this Assignment, one needs to have Go installed in their system. If it is not 266 | already installed, it can be done by following the steps in [Install Go Ubuntu](https://www.tecmint.com/install-go-in-ubuntu/#:~:text=To%20download%20the%20latest%20version,download%20it%20on%20the%20terminal.&text=Next%2C%20extract%20the%20tarball%20to%20%2Fusr%2Flocal%20directory.&text=Add%20the%20go%20binary%20path,a%20system%2Dwide%20installation) 267 | 268 | ### Installation 269 | 270 | _In order to setup a local copy of the project, you can follow the one of the 2 methods listed below. Once the local copy is setup, the steps listed in [Usage](#usage) can be used to interact with the system._ 271 | 272 | 1. `Clone` the repo 273 | ```sh 274 | git clone https://github.com/debajyotidasgupta/raft-consensus 275 | ``` 276 | 2. Alternatively, `unzip` the attached submission zip file to unpack all the files included with the project. 277 | ```sh 278 | unzip 279 | ``` 280 | 3. Change directory to the `raft-consensus` directory 281 | ```sh 282 | cd raft-consensus 283 | ``` 284 | 4. If some dependency is missing, `install` it with the following command 285 | ```go 286 | go get 287 | ``` 288 | 289 |

(back to top)

290 | 291 | ### Setting DEBUG level 292 | 293 | _In order to obtain logs regarding the execution of Raft algorithm you need to set DEBUG variable as 1 inside raft/raft.go_ 294 | _Similarly if you do not wish to see huge logs and just see the outputs of execution you can set the DEBUG level to 0 (recommended)_ 295 | 296 | 297 | 298 | ## Usage 299 | 300 | Once the local copy of the project has been setup, follow these steps to interact with the system and run tests on the system 301 | 302 | ### User interaction with the system 303 | 304 | _To interact with the system from the console, do the following steps\:_ 305 | 306 | 1. Open terminal from the main project directory 307 | 2. Run the main go file (Ensure that `DEBUG` is set to `0` in `raft/raft.go` file) 308 | ```sh 309 | go run main.go 310 | ``` 311 | 3. You will be presented with a menu with necessary commands to create raft cluster, send commands, etc. 312 | 313 | > **NOTE:** While using the features like set value, get value etc., that should pass through the leader node, you can user the 9th menu and find the leader and then send the request to leader node. Sending a such a request to a non leader node will lead to failure. This implementation is in accordance with the official Raft Implementation from the paper. 314 | 315 | ### Running tests 316 | 317 | _A comprehensive set of tests has been provided in **raft/raft_test.go**. In order to run these tests, do the following steps\:_ 318 | 319 | 1. To run a particular test execute the following command from the main project directory 320 | ```sh 321 | go test -timeout 30s -v -run ^[Test Name]$ raft-consensus/raft 322 | ``` 323 | 2. To run the entire test suite run the following command from the main project directory 324 | ```sh 325 | go test -v raft-consensus/raft 326 | ``` 327 | 328 | ![test1](images/test1.png) 329 | ![test2](images/test2.png) 330 | 331 | ### Visualizing Test Results 332 | 333 | _The **utils** directory provides functionalities to cleanly visualize the test logs in the form of a timing diagram table. To visualize the test logs follow the steps below\:_ 334 | 335 | 1. [**_Important_**] Ensure that the DEBUG level is set to 1 in **raft/raft.go** 336 | 337 | ```sh 338 | const DEBUG = 1 339 | ``` 340 | 341 | 2. Run a test and save its logs in the utils directory (execute from root project folder `raft-consensus`). 342 | ```sh 343 | go test -timeout 30s -v -run ^[Test Name]$ raft-consensus/raft > utils/logs.txt 344 | ``` 345 | 3. Use the logs to generate the timing diagram using the **utils/viz.go** file (This is to be executed from inside the `utils` directory) 346 | ```sh 347 | cd utils 348 | go run viz.go < logs.txt > output.txt 349 | ``` 350 | 351 | Alternatively, you can use the following command to generate the timing diagram from the logs 352 | 353 | 1. [**_Important_**] Ensure that the DEBUG level is set to 1 in **raft/raft.go** 354 | ```sh 355 | const DEBUG = 1 356 | ``` 357 | 2. Run the following command from inside the `utils` directory 358 | ```sh 359 | ./visualize.sh -t 360 | ``` 361 | 362 | - In both cases, the output will be saved in the `utils` directory as `output.txt` 363 | - A sample log file and output file is provided in the `utils` directory. 364 | 365 | ![timing](images/timing.png) 366 | 367 |

(back to top)

368 | 369 | 370 | 371 | ## License 372 | 373 | Distributed under the MIT License. See `LICENSE.txt` for more information. 374 | 375 |

(back to top)

376 | 377 | 378 | 379 | ## Contact 380 | 381 | | Name | Roll No. | Email | 382 | | ------------------ | --------- | ---------------------------- | 383 | | Debajyoti Dasgupta | 18CS30051 | debajyotidasgupta6@gmail.com | 384 | | Somnath Jena | 18CS30047 | somnathjena.2011@gmail.com | 385 | | Sagnik Roy | 18CS10063 | sagnikr38@gmail.com | 386 | 387 |

(back to top)

388 | 389 | 390 | 391 | ## Acknowledgments 392 | 393 | List of resources we found helpful and we would like to give them some credits. 394 | 395 | - [The Raft Consensus Algorithm](https://raft.github.io/) 396 | - [In Search of an Understandable Consensus Algorithm](https://raft.github.io/raft.pdf) 397 | - [You Must Build A Raft](https://www.youtube.com/watch?v=Hm_m4MIXn9Q) 398 | - [Practical Distributed Consensus using HashiCorp/raft](https://www.youtube.com/watch?v=EGRmmxVFOfE) 399 | - [Lecture Notes CS60002](https://cse.iitkgp.ac.in/~sandipc/courses/cs60002/cs60002.html) 400 | 401 |

(back to top)

402 | 403 | 404 | 405 | 406 | [contributors-shield]: https://img.shields.io/github/contributors/debajyotidasgupta/raft-consensus?style=for-the-badge 407 | [contributors-url]: https://github.com/debajyotidasgupta/raft-consensus/graphs/contributors 408 | [forks-shield]: https://img.shields.io/github/forks/debajyotidasgupta/raft-consensus?style=for-the-badge 409 | [forks-url]: https://github.com/debajyotidasgupta/raft-consensus/network/members 410 | [stars-shield]: https://img.shields.io/github/stars/debajyotidasgupta/raft-consensus?style=for-the-badge 411 | [stars-url]: https://github.com/debajyotidasgupta/raft-consensus/stargazers 412 | [issues-shield]: https://img.shields.io/github/issues/debajyotidasgupta/raft-consensus?style=for-the-badge 413 | [issues-url]: https://github.com/debajyotidasgupta/raft-consensus/issues 414 | [license-shield]: https://img.shields.io/github/license/debajyotidasgupta/raft-consensus?style=for-the-badge 415 | [license-url]: https://github.com/debajyotidasgupta/raft-consensus/blob/main/LICENSE 416 | [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555 417 | [linkedin-url]: https://www.linkedin.com/in/debajyoti-dasgupta/ 418 | -------------------------------------------------------------------------------- /raft/raft_test.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import ( 4 | "encoding/gob" 5 | "log" 6 | "math/rand" 7 | "strconv" 8 | "sync" 9 | "testing" 10 | "time" 11 | 12 | "github.com/fortytw2/leaktest" 13 | ) 14 | 15 | type Addr struct { 16 | network string 17 | address string 18 | } 19 | 20 | func (a Addr) Network() string { 21 | return a.network 22 | } 23 | 24 | func (a Addr) String() string { 25 | return a.address 26 | } 27 | 28 | var wg = sync.WaitGroup{} 29 | 30 | var ports map[uint64]string = map[uint64]string{} 31 | 32 | func communicate(serverId uint64, numPeers uint64, s *Server) { 33 | defer wg.Done() 34 | for { 35 | peerId := uint64(rand.Intn(int(numPeers)) + 1) 36 | pause := uint64(rand.Intn(50)) 37 | pauseTime := pause * uint64(time.Millisecond) 38 | 39 | time.Sleep(time.Duration(pauseTime)) 40 | msg := uint64(rand.Intn(1000)) 41 | if peerId == serverId { 42 | log.Println(serverId, "Shutting") 43 | s.DisconnectAll() 44 | s.Stop() 45 | log.Println(serverId, "Stopped") 46 | return 47 | } else { 48 | addr := Addr{"tcp", "[::]:" + ports[peerId]} 49 | err := s.ConnectToPeer(peerId, addr) 50 | var reply uint64 51 | if err == nil { 52 | log.Printf("[%d] sending %d to [%d]\n", serverId, msg, peerId) 53 | s.RPC(peerId, "ServiceType.DisplayMsg", msg, &reply) 54 | if reply != 2*msg { 55 | s.DisconnectAll() 56 | s.Stop() 57 | log.Fatalf("[%d] returned %d expected %d\n", peerId, reply, 2*msg) 58 | } 59 | } 60 | } 61 | } 62 | } 63 | 64 | // Change the numPeers to test with different number of peers 65 | func TestServerClient(t *testing.T) { 66 | var numPeers uint64 = 5 67 | var port = 2000 68 | 69 | for i := uint64(1); i <= numPeers; i++ { 70 | portStr := strconv.Itoa(port) 71 | ports[i] = portStr 72 | port++ 73 | } 74 | 75 | var servers []*Server 76 | 77 | for i := uint64(1); i <= numPeers; i++ { 78 | peerList := makeSet() 79 | j := 0 80 | for peerId := uint64(1); peerId <= numPeers; peerId++ { 81 | if peerId != i { 82 | peerList.Add(uint64(j)) 83 | j++ 84 | } 85 | } 86 | 87 | db := NewDatabase() 88 | ready := make(chan interface{}) 89 | commitChan := make(chan CommitEntry) 90 | 91 | s := CreateServer(i, peerList, db, ready, commitChan) 92 | if s == nil { 93 | t.Errorf("ERROR: server could not be created") 94 | } 95 | servers = append(servers, s) 96 | s.Serve(ports[i]) 97 | } 98 | 99 | for i := uint64(1); i <= numPeers; i++ { 100 | wg.Add(1) 101 | go communicate(i, numPeers, servers[i-1]) 102 | } 103 | 104 | wg.Wait() 105 | } 106 | 107 | //REAL RAFT TESTS START HERE 108 | 109 | func TestElectionNormal(t *testing.T) { 110 | defer leaktest.CheckTimeout(t, 100*time.Millisecond) 111 | 112 | cs := CreateNewCluster(t, 3) 113 | defer cs.Shutdown() 114 | cs.CheckUniqueLeader() 115 | } 116 | 117 | func TestElectionLeaderDisconnect(t *testing.T) { 118 | defer leaktest.CheckTimeout(t, 100*time.Millisecond) 119 | 120 | cs := CreateNewCluster(t, 3) 121 | defer cs.Shutdown() 122 | 123 | initialLeader, initialLeaderTerm, _ := cs.CheckUniqueLeader() 124 | 125 | cs.DisconnectPeer(uint64(initialLeader)) 126 | time.Sleep(300 * time.Millisecond) 127 | 128 | newLeader, newLeaderTerm, _ := cs.CheckUniqueLeader() 129 | 130 | if newLeader == initialLeader { 131 | t.Errorf("new leader expected to be different from initial leader") 132 | } 133 | if newLeaderTerm <= initialLeaderTerm { 134 | t.Errorf("new leader term expected to be > initial leader term, new term=%d old term=%d", newLeaderTerm, initialLeaderTerm) 135 | } 136 | } 137 | 138 | func TestElectionLeaderAndFollowerDisconnect(t *testing.T) { 139 | defer leaktest.CheckTimeout(t, 100*time.Millisecond) 140 | 141 | cs := CreateNewCluster(t, 3) 142 | defer cs.Shutdown() 143 | 144 | initialLeader, _, _ := cs.CheckUniqueLeader() 145 | cs.DisconnectPeer(uint64(initialLeader)) 146 | follower := (initialLeader + 1) % 3 147 | cs.DisconnectPeer(uint64(follower)) 148 | 149 | time.Sleep(300 * time.Millisecond) 150 | cs.CheckNoLeader() 151 | 152 | cs.ReconnectPeer(uint64(initialLeader)) 153 | time.Sleep(100 * time.Millisecond) 154 | cs.CheckUniqueLeader() 155 | 156 | } 157 | 158 | func TestElectionLeaderDisconnectAndReconnect(t *testing.T) { 159 | defer leaktest.CheckTimeout(t, 100*time.Millisecond) 160 | 161 | cs := CreateNewCluster(t, 3) 162 | defer cs.Shutdown() 163 | 164 | initialLeader, initialLeaderTerm, _ := cs.CheckUniqueLeader() 165 | cs.DisconnectPeer(uint64(initialLeader)) 166 | time.Sleep(300 * time.Millisecond) 167 | 168 | newLeader, newLeaderTerm, _ := cs.CheckUniqueLeader() 169 | 170 | if newLeader == initialLeader { 171 | t.Errorf("new leader expected to be different from initial leader") 172 | return 173 | } 174 | if newLeaderTerm <= initialLeaderTerm { 175 | t.Errorf("new leader term expected to be > initial leader term, new term=%d old term=%d", newLeaderTerm, initialLeaderTerm) 176 | return 177 | } 178 | 179 | cs.ReconnectPeer(uint64(initialLeader)) 180 | time.Sleep(300 * time.Millisecond) 181 | 182 | latestLeader, latestLeaderTerm, _ := cs.CheckUniqueLeader() 183 | 184 | if latestLeader != newLeader { 185 | t.Errorf("latest leader expected to be %d, got %d", newLeader, latestLeader) 186 | } 187 | if latestLeaderTerm != newLeaderTerm { 188 | t.Errorf("latest leader term expected to be %d got %d", newLeaderTerm, latestLeaderTerm) 189 | } 190 | } 191 | 192 | func TestElectionDisconnectAllAndReconnectAll(t *testing.T) { 193 | defer leaktest.CheckTimeout(t, 100*time.Millisecond) 194 | 195 | cs := CreateNewCluster(t, 3) 196 | defer cs.Shutdown() 197 | 198 | time.Sleep(300 * time.Millisecond) 199 | 200 | for i := 0; i < 3; i++ { 201 | cs.DisconnectPeer(uint64(i)) 202 | } 203 | time.Sleep(300 * time.Millisecond) 204 | cs.CheckNoLeader() 205 | 206 | for i := 0; i < 3; i++ { 207 | cs.ReconnectPeer(uint64(i)) 208 | } 209 | 210 | time.Sleep(300 * time.Millisecond) 211 | cs.CheckUniqueLeader() 212 | } 213 | 214 | func TestElectionLeaderDisconnectAndReconnect5Nodes(t *testing.T) { 215 | defer leaktest.CheckTimeout(t, 100*time.Millisecond) 216 | 217 | cs := CreateNewCluster(t, 5) 218 | defer cs.Shutdown() 219 | 220 | initialLeader, initialLeaderTerm, _ := cs.CheckUniqueLeader() 221 | cs.DisconnectPeer(uint64(initialLeader)) 222 | time.Sleep(300 * time.Millisecond) 223 | 224 | newLeader, newLeaderTerm, _ := cs.CheckUniqueLeader() 225 | 226 | if newLeader == initialLeader { 227 | t.Errorf("new leader expected to be different from initial leader") 228 | return 229 | } 230 | if newLeaderTerm <= initialLeaderTerm { 231 | t.Errorf("new leader term expected to be > initial leader term, new term=%d old term=%d", newLeaderTerm, initialLeaderTerm) 232 | return 233 | } 234 | 235 | cs.ReconnectPeer(uint64(initialLeader)) 236 | time.Sleep(300 * time.Millisecond) 237 | 238 | latestLeader, latestLeaderTerm, _ := cs.CheckUniqueLeader() 239 | 240 | if latestLeader != newLeader { 241 | t.Errorf("latest leader expected to be %d, got %d", newLeader, latestLeader) 242 | } 243 | if latestLeaderTerm != newLeaderTerm { 244 | t.Errorf("latest leader term expected to be %d got %d", newLeaderTerm, latestLeaderTerm) 245 | } 246 | } 247 | 248 | func TestElectionFollowerDisconnectAndReconnect(t *testing.T) { 249 | defer leaktest.CheckTimeout(t, 100*time.Millisecond) 250 | 251 | cs := CreateNewCluster(t, 3) 252 | defer cs.Shutdown() 253 | 254 | initialLeader, initialLeaderTerm, _ := cs.CheckUniqueLeader() 255 | follower := (initialLeader + 1) % 3 256 | cs.DisconnectPeer(uint64(follower)) 257 | 258 | time.Sleep(300 * time.Millisecond) 259 | 260 | cs.ReconnectPeer(uint64(follower)) 261 | time.Sleep(100 * time.Millisecond) 262 | _, newLeaderTerm, _ := cs.CheckUniqueLeader() 263 | 264 | if newLeaderTerm <= initialLeaderTerm { 265 | t.Errorf("new leader term expected to be > initial leader term, new term=%d old term=%d", newLeaderTerm, initialLeaderTerm) 266 | } 267 | } 268 | 269 | func TestElectionDisconnectReconnectLoop(t *testing.T) { 270 | defer leaktest.CheckTimeout(t, 100*time.Millisecond) 271 | 272 | cs := CreateNewCluster(t, 3) 273 | defer cs.Shutdown() 274 | 275 | var term = 0 276 | 277 | for i := 0; i < 6; i++ { 278 | leader, newTerm, _ := cs.CheckUniqueLeader() 279 | 280 | if newTerm <= term { 281 | t.Errorf("new leader term expected to be > old leader term, new term=%d old term=%d", newTerm, term) 282 | return 283 | } 284 | 285 | cs.DisconnectPeer(uint64(leader)) 286 | follower := (leader + 1) % 3 287 | cs.DisconnectPeer(uint64(follower)) 288 | time.Sleep(300 * time.Millisecond) 289 | cs.CheckNoLeader() 290 | 291 | cs.ReconnectPeer(uint64(follower)) 292 | cs.ReconnectPeer(uint64(leader)) 293 | 294 | time.Sleep(100 * time.Millisecond) 295 | } 296 | 297 | _, newTerm, _ := cs.CheckUniqueLeader() 298 | 299 | if newTerm <= term { 300 | t.Errorf("new leader term expected to be > old leader term, new term=%d old term=%d", newTerm, term) 301 | return 302 | } 303 | } 304 | 305 | func TestElectionFollowerDisconnectReconnectAfterLong(t *testing.T) { 306 | defer leaktest.CheckTimeout(t, 100*time.Millisecond) 307 | 308 | cs := CreateNewCluster(t, 3) 309 | defer cs.Shutdown() 310 | 311 | initialLeader, initialLeaderTerm, _ := cs.CheckUniqueLeader() 312 | 313 | follower := (initialLeader + 1) % 3 314 | cs.DisconnectPeer(uint64(follower)) 315 | 316 | time.Sleep(1200 * time.Millisecond) 317 | 318 | cs.ReconnectPeer(uint64(follower)) 319 | 320 | time.Sleep(500 * time.Millisecond) 321 | newLeader, newLeaderTerm, _ := cs.CheckUniqueLeader() 322 | 323 | if newLeaderTerm <= initialLeaderTerm { 324 | t.Errorf("new leader term expected to be > initial leader term, new term=%d old term=%d", newLeaderTerm, initialLeaderTerm) 325 | } 326 | 327 | if newLeader != follower { 328 | t.Errorf("new leader expected to be %d, got %d", follower, newLeader) 329 | } 330 | } 331 | 332 | func TestCommitOneCommand(t *testing.T) { 333 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 334 | 335 | cs := CreateNewCluster(t, 3) 336 | defer cs.Shutdown() 337 | 338 | origLeaderId, _, _ := cs.CheckUniqueLeader() 339 | 340 | logtest(uint64(origLeaderId), "submitting 42 to %d", origLeaderId) 341 | isLeader, _, _ := cs.SubmitToServer(origLeaderId, 42) 342 | if !isLeader { 343 | t.Errorf("want id=%d leader, but it's not", origLeaderId) 344 | } 345 | 346 | time.Sleep(time.Duration(250) * time.Millisecond) 347 | num, _, _ := cs.CheckCommitted(42, 0) 348 | if num != 3 { 349 | t.Errorf("Not committed by 3 nodes") 350 | } 351 | } 352 | 353 | func TestElectionFollowerDisconnectReconnectAfterLongCommitDone(t *testing.T) { 354 | defer leaktest.CheckTimeout(t, 100*time.Millisecond) 355 | 356 | cs := CreateNewCluster(t, 3) 357 | defer cs.Shutdown() 358 | 359 | initialLeader, initialLeaderTerm, _ := cs.CheckUniqueLeader() 360 | 361 | follower := (initialLeader + 1) % 3 362 | cs.DisconnectPeer(uint64(follower)) 363 | 364 | logtest(uint64(initialLeader), "submitting 42 to %d", initialLeader) 365 | isLeader, _, _ := cs.SubmitToServer(initialLeader, 42) 366 | if !isLeader { 367 | t.Errorf("want id=%d leader, but it's not", initialLeader) 368 | } 369 | 370 | time.Sleep(1200 * time.Millisecond) 371 | 372 | cs.ReconnectPeer(uint64(follower)) 373 | 374 | time.Sleep(500 * time.Millisecond) 375 | newLeader, newLeaderTerm, _ := cs.CheckUniqueLeader() 376 | 377 | if newLeaderTerm <= initialLeaderTerm { 378 | t.Errorf("new leader term expected to be > initial leader term, new term=%d old term=%d", newLeaderTerm, initialLeaderTerm) 379 | } 380 | 381 | if newLeader == follower { 382 | t.Errorf("new leader not expected to be %d", follower) 383 | } 384 | } 385 | 386 | func TestTryCommitToNonLeader(t *testing.T) { 387 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 388 | 389 | cs := CreateNewCluster(t, 3) 390 | defer cs.Shutdown() 391 | 392 | leaderId, _, _ := cs.CheckUniqueLeader() 393 | servingId := (leaderId + 1) % 3 394 | logtest(uint64(servingId), "submitting 42 to %d", servingId) 395 | isLeader, _, _ := cs.SubmitToServer(servingId, 42) 396 | if isLeader { 397 | t.Errorf("want id=%d to be non leader, but it is", servingId) 398 | } 399 | time.Sleep(time.Duration(20) * time.Millisecond) 400 | } 401 | 402 | func TestCommitThenLeaderDisconnect(t *testing.T) { 403 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 404 | 405 | cs := CreateNewCluster(t, 3) 406 | defer cs.Shutdown() 407 | 408 | origLeaderId, _, _ := cs.CheckUniqueLeader() 409 | 410 | logtest(uint64(origLeaderId), "submitting 42 to %d", origLeaderId) 411 | isLeader, _, _ := cs.SubmitToServer(origLeaderId, 42) 412 | if !isLeader { 413 | t.Errorf("want id=%d leader, but it's not", origLeaderId) 414 | } 415 | 416 | time.Sleep(time.Duration(250) * time.Millisecond) 417 | 418 | cs.DisconnectPeer(uint64(origLeaderId)) 419 | time.Sleep(time.Duration(300) * time.Millisecond) 420 | 421 | num, _, _ := cs.CheckCommitted(42, 0) 422 | if num != 2 { 423 | t.Errorf("expected 2 commits found = %d", num) 424 | } 425 | 426 | } 427 | 428 | func TestCommitMultipleCommands(t *testing.T) { 429 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 430 | 431 | cs := CreateNewCluster(t, 3) 432 | defer cs.Shutdown() 433 | 434 | origLeaderId, _, _ := cs.CheckUniqueLeader() 435 | 436 | values := []int{42, 55, 81} 437 | for _, v := range values { 438 | logtest(uint64(origLeaderId), "submitting %d to %d", v, origLeaderId) 439 | isLeader, _, _ := cs.SubmitToServer(origLeaderId, v) 440 | if !isLeader { 441 | t.Errorf("want id=%d leader, but it's not", origLeaderId) 442 | } 443 | time.Sleep(time.Duration(100) * time.Millisecond) 444 | } 445 | 446 | time.Sleep(time.Duration(300) * time.Millisecond) 447 | com1, i1, _ := cs.CheckCommitted(42, 0) 448 | com2, i2, _ := cs.CheckCommitted(55, 0) 449 | com3, i3, _ := cs.CheckCommitted(81, 0) 450 | 451 | if com1 != 3 || com2 != 3 || com3 != 3 { 452 | t.Errorf("expected com1 = com2 = com3 = 3 found com1 = %d com2 = %d com3 = %d", com1, com2, com3) 453 | } 454 | 455 | if i1 != 1 || i2 != 2 || i3 != 3 { 456 | t.Errorf("expected i1 = 1 i2 = 2 i3 = 3 found i1 = %d i2 = %d i3 = %d", i1, i2, i3) 457 | } 458 | } 459 | 460 | func TestCommitWithDisconnectionAndRecover(t *testing.T) { 461 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 462 | 463 | cs := CreateNewCluster(t, 3) 464 | defer cs.Shutdown() 465 | 466 | // Submit a couple of values to a fully connected cluster. 467 | origLeaderId, _, _ := cs.CheckUniqueLeader() 468 | cs.SubmitToServer(origLeaderId, 5) 469 | cs.SubmitToServer(origLeaderId, 6) 470 | 471 | time.Sleep(time.Duration(500) * time.Millisecond) 472 | num, _, _ := cs.CheckCommitted(6, 0) 473 | if num != 3 { 474 | t.Errorf("expected 3 commits found = %d", num) 475 | } 476 | 477 | dPeerId := (origLeaderId + 1) % 3 478 | cs.DisconnectPeer(uint64(dPeerId)) 479 | time.Sleep(time.Duration(500) * time.Millisecond) 480 | 481 | // Submit a new command; it will be committed but only to two servers. 482 | cs.SubmitToServer(origLeaderId, 7) 483 | time.Sleep(time.Duration(300) * time.Millisecond) 484 | num, _, _ = cs.CheckCommitted(7, 0) 485 | if num != 2 { 486 | t.Errorf("expected 2 commits found = %d", num) 487 | } 488 | // Now reconnect dPeerId and wait a bit; it should find the new command too. 489 | cs.ReconnectPeer(uint64(dPeerId)) 490 | time.Sleep(time.Duration(500) * time.Millisecond) 491 | 492 | time.Sleep(time.Duration(500) * time.Millisecond) 493 | num, _, _ = cs.CheckCommitted(7, 0) 494 | if num != 3 { 495 | t.Errorf("expected 3 commits found = %d", num) 496 | } 497 | } 498 | 499 | func TestTryCommitMajorityFailure(t *testing.T) { 500 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 501 | 502 | cs := CreateNewCluster(t, 3) 503 | defer cs.Shutdown() 504 | 505 | origLeaderId, _, _ := cs.CheckUniqueLeader() 506 | cs.SubmitToServer(origLeaderId, 5) 507 | 508 | time.Sleep(time.Duration(300) * time.Millisecond) 509 | num, _, _ := cs.CheckCommitted(5, 0) 510 | 511 | if num != 3 { 512 | t.Errorf("expected 3 commits found = %d", num) 513 | } 514 | 515 | dPeer1 := (origLeaderId + 1) % 3 516 | dPeer2 := (origLeaderId + 2) % 3 517 | 518 | cs.DisconnectPeer(uint64(dPeer1)) 519 | cs.DisconnectPeer(uint64(dPeer2)) 520 | time.Sleep(time.Duration(300) * time.Millisecond) 521 | 522 | cs.SubmitToServer(origLeaderId, 6) 523 | time.Sleep(time.Duration(300) * time.Millisecond) 524 | numC, _, _ := cs.CheckCommitted(6, 1) 525 | 526 | if numC != 0 { 527 | t.Errorf("expected 0 commits found = %d", numC) 528 | } 529 | 530 | cs.ReconnectPeer(uint64(dPeer1)) 531 | cs.ReconnectPeer(uint64(dPeer2)) 532 | time.Sleep(time.Duration(600) * time.Millisecond) 533 | 534 | newLeaderId, _, _ := cs.CheckUniqueLeader() 535 | cs.SubmitToServer(newLeaderId, 8) 536 | time.Sleep(time.Duration(300) * time.Millisecond) 537 | 538 | numF, _, _ := cs.CheckCommitted(8, 1) 539 | if numF != 3 { 540 | t.Errorf("expected 3 commits found = %d", numF) 541 | } 542 | 543 | } 544 | 545 | func TestTryCommitToDCLeader(t *testing.T) { 546 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 547 | 548 | cs := CreateNewCluster(t, 5) 549 | defer cs.Shutdown() 550 | 551 | origLeaderId, _, _ := cs.CheckUniqueLeader() 552 | cs.SubmitToServer(origLeaderId, 5) 553 | time.Sleep(time.Duration(300) * time.Millisecond) 554 | 555 | cs.DisconnectPeer(uint64(origLeaderId)) 556 | time.Sleep(time.Duration(200) * time.Millisecond) 557 | cs.SubmitToServer(origLeaderId, 6) 558 | time.Sleep(time.Duration(200) * time.Millisecond) 559 | 560 | num1, _, _ := cs.CheckCommitted(6, 1) 561 | if num1 != 0 { 562 | t.Errorf("expected 0 commits found = %d", num1) 563 | } 564 | 565 | newLeaderId, _, _ := cs.CheckUniqueLeader() 566 | cs.SubmitToServer(newLeaderId, 7) 567 | time.Sleep(time.Duration(300) * time.Millisecond) 568 | num2, _, _ := cs.CheckCommitted(7, 0) 569 | 570 | if num2 != 4 { 571 | t.Errorf("expected 4 commits found = %d", num2) 572 | } 573 | 574 | cs.ReconnectPeer(uint64(origLeaderId)) 575 | time.Sleep(time.Duration(300) * time.Millisecond) 576 | 577 | newLeaderId, _, _ = cs.CheckUniqueLeader() 578 | cs.SubmitToServer(newLeaderId, 8) 579 | time.Sleep(time.Duration(300) * time.Millisecond) 580 | num3, _, _ := cs.CheckCommitted(7, 0) 581 | num4, _, _ := cs.CheckCommitted(8, 0) 582 | num5, _, _ := cs.CheckCommitted(6, 1) 583 | if num3 != 5 || num4 != 5 || num5 != 0 { 584 | t.Errorf("expected num3 = num4 = 5 and num5 = 0 found num3= %d num4 = %d num5 = %d", num3, num4, num5) 585 | } 586 | } 587 | 588 | func TestTryCommitLeaderDisconnectsShortTime(t *testing.T) { 589 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 590 | 591 | cs := CreateNewCluster(t, 5) 592 | defer cs.Shutdown() 593 | 594 | origLeaderId, _, _ := cs.CheckUniqueLeader() 595 | cs.DisconnectPeer(uint64(origLeaderId)) 596 | cs.SubmitToServer(origLeaderId, 5) 597 | time.Sleep(time.Duration(10) * time.Millisecond) 598 | cs.ReconnectPeer(uint64(origLeaderId)) 599 | time.Sleep(time.Duration(300) * time.Millisecond) 600 | 601 | num, _, _ := cs.CheckCommitted(5, 0) 602 | 603 | if num != 5 { 604 | t.Errorf("expected commits = 5 found = %d", num) 605 | } 606 | } 607 | 608 | func TestCrashFollowerThenLeader(t *testing.T) { 609 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 610 | 611 | cs := CreateNewCluster(t, 5) 612 | defer cs.Shutdown() 613 | 614 | origLeaderId, _, _ := cs.CheckUniqueLeader() 615 | cs.SubmitToServer(origLeaderId, 25) 616 | cs.SubmitToServer(origLeaderId, 26) 617 | cs.SubmitToServer(origLeaderId, 27) 618 | time.Sleep(time.Duration(250) * time.Millisecond) 619 | 620 | dPeerID1 := (origLeaderId + 1) % 5 621 | dPeerID2 := (origLeaderId + 2) % 5 622 | cs.CrashPeer(uint64(dPeerID1)) 623 | cs.CrashPeer(uint64(dPeerID2)) 624 | time.Sleep(time.Duration(250) * time.Millisecond) 625 | 626 | num1, _, _ := cs.CheckCommitted(25, 0) 627 | num2, _, _ := cs.CheckCommitted(26, 0) 628 | num3, _, _ := cs.CheckCommitted(27, 0) 629 | 630 | if num1 != 3 || num2 != 3 || num3 != 3 { 631 | t.Errorf("expected num1 = num2 = num3 = 3 got num1 = %d num2 = %d num3 = %d", num1, num2, num3) 632 | } 633 | 634 | cs.RestartPeer(uint64(dPeerID1)) 635 | time.Sleep(time.Duration(250) * time.Millisecond) 636 | cs.CrashPeer(uint64(origLeaderId)) 637 | time.Sleep(time.Duration(250) * time.Millisecond) 638 | 639 | newLeaderId, _, _ := cs.CheckUniqueLeader() 640 | cs.SubmitToServer(newLeaderId, 29) 641 | time.Sleep(time.Duration(250) * time.Millisecond) 642 | num4, _, _ := cs.CheckCommitted(29, 0) 643 | 644 | if num4 != 3 { 645 | t.Errorf("expected commit number = 3 found = %d", num4) 646 | } 647 | 648 | } 649 | 650 | func TestAddNewServer(t *testing.T) { 651 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 652 | 653 | cs := CreateNewCluster(t, 5) 654 | defer cs.Shutdown() 655 | 656 | gob.Register(Write{}) 657 | gob.Register(Read{}) 658 | gob.Register(AddServers{}) 659 | gob.Register(RemoveServers{}) 660 | 661 | origLeaderId, _, _ := cs.CheckUniqueLeader() 662 | serverIds := []int{5, 6} 663 | commandToServer := AddServers{ServerIds: serverIds} 664 | 665 | if success, _, err := cs.SubmitToServer(origLeaderId, commandToServer); success { 666 | if err != nil { 667 | t.Errorf("Could not submit command") 668 | } 669 | } else { 670 | t.Errorf("Could not submit command") 671 | } 672 | 673 | time.Sleep(time.Duration(500) * time.Millisecond) 674 | numServer := cs.activeServers.Size() 675 | if numServer != 7 { 676 | t.Errorf("Add Servers could not be completed expected 7 servers, found %d", numServer) 677 | } 678 | } 679 | 680 | func TestRemoveServerNonLeader(t *testing.T) { 681 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 682 | 683 | cs := CreateNewCluster(t, 5) 684 | defer cs.Shutdown() 685 | 686 | gob.Register(Write{}) 687 | gob.Register(Read{}) 688 | gob.Register(AddServers{}) 689 | gob.Register(RemoveServers{}) 690 | 691 | origLeaderId, _, _ := cs.CheckUniqueLeader() 692 | rem1 := (origLeaderId + 1) % 5 693 | rem2 := (origLeaderId + 2) % 5 694 | serverIds := []int{rem1, rem2} 695 | commandToServer := RemoveServers{ServerIds: serverIds} 696 | 697 | if success, _, err := cs.SubmitToServer(origLeaderId, commandToServer); success { 698 | if err != nil { 699 | t.Errorf("Could not submit command") 700 | } 701 | } else { 702 | t.Errorf("Could not submit command") 703 | } 704 | 705 | time.Sleep(time.Duration(1000) * time.Millisecond) 706 | numServer := cs.activeServers.Size() 707 | if numServer != 3 { 708 | t.Errorf("Remove Servers could not be completed expected 3 servers, found %d", numServer) 709 | } 710 | } 711 | 712 | func TestRemoveLeader(t *testing.T) { 713 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 714 | 715 | cs := CreateNewCluster(t, 5) 716 | defer cs.Shutdown() 717 | 718 | gob.Register(Write{}) 719 | gob.Register(Read{}) 720 | gob.Register(AddServers{}) 721 | gob.Register(RemoveServers{}) 722 | 723 | origLeaderId, _, _ := cs.CheckUniqueLeader() 724 | serverIds := []int{origLeaderId} 725 | commandToServer := RemoveServers{ServerIds: serverIds} 726 | 727 | if success, _, err := cs.SubmitToServer(origLeaderId, commandToServer); success { 728 | if err != nil { 729 | t.Errorf("Could not submit command") 730 | } 731 | } else { 732 | t.Errorf("Could not submit command") 733 | } 734 | 735 | time.Sleep(time.Duration(1000) * time.Millisecond) 736 | numServer := cs.activeServers.Size() 737 | if numServer != 4 { 738 | t.Errorf("Remove Servers could not be completed expected 3 servers, found %d", numServer) 739 | } 740 | 741 | newLeaderId, _, _ := cs.CheckUniqueLeader() 742 | if origLeaderId == newLeaderId { 743 | t.Errorf("Expected New Leader to be different, Found Same ") 744 | } 745 | } 746 | -------------------------------------------------------------------------------- /utils/sample_output.txt: -------------------------------------------------------------------------------- 1 | PASS TestElectionNormal map[0:true 1:true 2:true] ; entries: 59 2 | ┌─────────────────┬────────────────────────────────┬────────────────────────────────┬────────────────────────────────┐ 3 | │ TIME │ 0 │ 1 │ 2 │ 4 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 5 | │ 17:06:39.683218 │ listening at [::]:15867 │ │ │ 6 | │ │ │ │ │ 7 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 8 | │ 17:06:39.683363 │ │ listening at [::]:27259 │ │ 9 | │ │ │ │ │ 10 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 11 | │ 17:06:39.683470 │ │ │ listening at [::]:9351 │ 12 | │ │ │ │ │ 13 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 14 | │ 17:06:39.684415 │ │ │ Election Timer started (215ms) │ 15 | │ │ │ │ , term=0 │ 16 | │ │ │ │ │ 17 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 18 | │ 17:06:39.684454 │ │ Election Timer started (290ms) │ │ 19 | │ │ │ , term=0 │ │ 20 | │ │ │ │ │ 21 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 22 | │ 17:06:39.684462 │ Election Timer started (199ms) │ │ │ 23 | │ │ , term=0 │ │ │ 24 | │ │ │ │ │ 25 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 26 | │ 17:06:39.885142 │ Becomes Candidate (currentTerm │ │ │ 27 | │ │ =1); log=[] │ │ │ 28 | │ │ │ │ │ 29 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 30 | │ 17:06:39.885376 │ Election Timer started (262ms) │ │ │ 31 | │ │ , term=1 │ │ │ 32 | │ │ │ │ │ 33 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 34 | │ 17:06:39.885393 │ Sending RequestVote to 1: {Ter │ │ │ 35 | │ │ m:1 CandidateId:0 LastLogIndex │ │ │ 36 | │ │ :0 LastLogTerm:0} │ │ │ 37 | │ │ │ │ │ 38 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 39 | │ 17:06:39.885495 │ Sending RequestVote to 2: {Ter │ │ │ 40 | │ │ m:1 CandidateId:0 LastLogIndex │ │ │ 41 | │ │ :0 LastLogTerm:0} │ │ │ 42 | │ │ │ │ │ 43 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 44 | │ 17:06:39.892139 │ │ │ RequestVote: {Term:1 Candidate │ 45 | │ │ │ │ Id:0 LastLogIndex:0 LastLogTer │ 46 | │ │ │ │ m:0} [currentTerm=0, votedFor= │ 47 | │ │ │ │ -1, log index/term=(0, 0)] │ 48 | │ │ │ │ │ 49 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 50 | │ 17:06:39.892187 │ │ │ Term out of date with term in │ 51 | │ │ │ │ RequestVote │ 52 | │ │ │ │ │ 53 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 54 | │ 17:06:39.892228 │ │ │ Becomes Follower with term=1; │ 55 | │ │ │ │ log=[] │ 56 | │ │ │ │ │ 57 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 58 | │ 17:06:39.892272 │ │ RequestVote: {Term:1 Candidate │ │ 59 | │ │ │ Id:0 LastLogIndex:0 LastLogTer │ │ 60 | │ │ │ m:0} [currentTerm=0, votedFor= │ │ 61 | │ │ │ -1, log index/term=(0, 0)] │ │ 62 | │ │ │ │ │ 63 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 64 | │ 17:06:39.892331 │ │ Term out of date with term in │ │ 65 | │ │ │ RequestVote │ │ 66 | │ │ │ │ │ 67 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 68 | │ 17:06:39.892356 │ │ Becomes Follower with term=1; │ │ 69 | │ │ │ log=[] │ │ 70 | │ │ │ │ │ 71 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 72 | │ 17:06:39.892536 │ │ │ RequestVote reply: &{Term:1 Vo │ 73 | │ │ │ │ teGranted:true} │ 74 | │ │ │ │ │ 75 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 76 | │ 17:06:39.892593 │ │ RequestVote reply: &{Term:1 Vo │ │ 77 | │ │ │ teGranted:true} │ │ 78 | │ │ │ │ │ 79 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 80 | │ 17:06:39.892666 │ │ Election Timer started (221ms) │ │ 81 | │ │ │ , term=1 │ │ 82 | │ │ │ │ │ 83 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 84 | │ 17:06:39.892702 │ │ │ Election Timer started (226ms) │ 85 | │ │ │ │ , term=1 │ 86 | │ │ │ │ │ 87 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 88 | │ 17:06:39.893176 │ received RequestVoteReply {Ter │ │ │ 89 | │ │ m:1 VoteGranted:true} from 2 │ │ │ 90 | │ │ │ │ │ 91 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 92 | │ 17:06:39.893222 │ Wins election with 2 votes │ │ │ 93 | │ │ │ │ │ 94 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 95 | │ 17:06:39.893288 │ becomes Leader; term=1, nextIn │ │ │ 96 | │ │ dex=map[1:1 2:1], matchIndex=m │ │ │ 97 | │ │ ap[1:0 2:0]; log=[] │ │ │ 98 | │ │ │ │ │ 99 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 100 | │ 17:06:39.893381 │ sending AppendEntries to 1: ni │ │ │ 101 | │ │ =1, args={Term:1 LeaderId:0 Pr │ │ │ 102 | │ │ evLogIndex:0 PrevLogTerm:0 Ent │ │ │ 103 | │ │ ries:[] LeaderCommit:0} │ │ │ 104 | │ │ │ │ │ 105 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 106 | │ 17:06:39.893400 │ sending AppendEntries to 2: ni │ │ │ 107 | │ │ =1, args={Term:1 LeaderId:0 Pr │ │ │ 108 | │ │ evLogIndex:0 PrevLogTerm:0 Ent │ │ │ 109 | │ │ ries:[] LeaderCommit:0} │ │ │ 110 | │ │ │ │ │ 111 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 112 | │ 17:06:39.893389 │ received RequestVoteReply {Ter │ │ │ 113 | │ │ m:1 VoteGranted:true} from 1 │ │ │ 114 | │ │ │ │ │ 115 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 116 | │ 17:06:39.893562 │ While waiting for reply, state │ │ │ 117 | │ │ = Leader │ │ │ 118 | │ │ │ │ │ 119 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 120 | │ 17:06:39.895158 │ │ │ in election timer term changed │ 121 | │ │ │ │ from 0 to 1, bailing out │ 122 | │ │ │ │ │ 123 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 124 | │ 17:06:39.895265 │ │ in election timer term changed │ │ 125 | │ │ │ from 0 to 1, bailing out │ │ 126 | │ │ │ │ │ 127 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 128 | │ 17:06:39.895270 │ │ │ AppendEntries: {Term:1 LeaderI │ 129 | │ │ │ │ d:0 PrevLogIndex:0 PrevLogTerm │ 130 | │ │ │ │ :0 Entries:[] LeaderCommit:0} │ 131 | │ │ │ │ │ 132 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 133 | │ 17:06:39.895410 │ │ │ AppendEntries reply: {Term:1 S │ 134 | │ │ │ │ uccess:true ConflictIndex:0 Co │ 135 | │ │ │ │ nflictTerm:0} │ 136 | │ │ │ │ │ 137 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 138 | │ 17:06:39.895663 │ In election timer state=Leader │ │ │ 139 | │ │ , bailing out │ │ │ 140 | │ │ │ │ │ 141 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 142 | │ 17:06:39.895853 │ AppendEntries reply from 2 suc │ │ │ 143 | │ │ cess: nextIndex := map[1:1 2:1 │ │ │ 144 | │ │ ], matchIndex := map[1:0 2:0]; │ │ │ 145 | │ │ commitIndex := 0 │ │ │ 146 | │ │ │ │ │ 147 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 148 | │ 17:06:39.898983 │ │ AppendEntries: {Term:1 LeaderI │ │ 149 | │ │ │ d:0 PrevLogIndex:0 PrevLogTerm │ │ 150 | │ │ │ :0 Entries:[] LeaderCommit:0} │ │ 151 | │ │ │ │ │ 152 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 153 | │ 17:06:39.899104 │ │ AppendEntries reply: {Term:1 S │ │ 154 | │ │ │ uccess:true ConflictIndex:0 Co │ │ 155 | │ │ │ nflictTerm:0} │ │ 156 | │ │ │ │ │ 157 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 158 | │ 17:06:39.899388 │ AppendEntries reply from 1 suc │ │ │ 159 | │ │ cess: nextIndex := map[1:1 2:1 │ │ │ 160 | │ │ ], matchIndex := map[1:0 2:0]; │ │ │ 161 | │ │ commitIndex := 0 │ │ │ 162 | │ │ │ │ │ 163 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 164 | │ 17:06:39.943713 │ sending AppendEntries to 2: ni │ │ │ 165 | │ │ =1, args={Term:1 LeaderId:0 Pr │ │ │ 166 | │ │ evLogIndex:0 PrevLogTerm:0 Ent │ │ │ 167 | │ │ ries:[] LeaderCommit:0} │ │ │ 168 | │ │ │ │ │ 169 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 170 | │ 17:06:39.943743 │ sending AppendEntries to 1: ni │ │ │ 171 | │ │ =1, args={Term:1 LeaderId:0 Pr │ │ │ 172 | │ │ evLogIndex:0 PrevLogTerm:0 Ent │ │ │ 173 | │ │ ries:[] LeaderCommit:0} │ │ │ 174 | │ │ │ │ │ 175 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 176 | │ 17:06:39.945253 │ │ AppendEntries: {Term:1 LeaderI │ │ 177 | │ │ │ d:0 PrevLogIndex:0 PrevLogTerm │ │ 178 | │ │ │ :0 Entries:[] LeaderCommit:0} │ │ 179 | │ │ │ │ │ 180 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 181 | │ 17:06:39.945377 │ │ AppendEntries reply: {Term:1 S │ │ 182 | │ │ │ uccess:true ConflictIndex:0 Co │ │ 183 | │ │ │ nflictTerm:0} │ │ 184 | │ │ │ │ │ 185 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 186 | │ 17:06:39.945574 │ AppendEntries reply from 1 suc │ │ │ 187 | │ │ cess: nextIndex := map[1:1 2:1 │ │ │ 188 | │ │ ], matchIndex := map[1:0 2:0]; │ │ │ 189 | │ │ commitIndex := 0 │ │ │ 190 | │ │ │ │ │ 191 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 192 | │ 17:06:39.946599 │ │ │ AppendEntries: {Term:1 LeaderI │ 193 | │ │ │ │ d:0 PrevLogIndex:0 PrevLogTerm │ 194 | │ │ │ │ :0 Entries:[] LeaderCommit:0} │ 195 | │ │ │ │ │ 196 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 197 | │ 17:06:39.946699 │ │ │ AppendEntries reply: {Term:1 S │ 198 | │ │ │ │ uccess:true ConflictIndex:0 Co │ 199 | │ │ │ │ nflictTerm:0} │ 200 | │ │ │ │ │ 201 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 202 | │ 17:06:39.946945 │ AppendEntries reply from 2 suc │ │ │ 203 | │ │ cess: nextIndex := map[1:1 2:1 │ │ │ 204 | │ │ ], matchIndex := map[1:0 2:0]; │ │ │ 205 | │ │ commitIndex := 0 │ │ │ 206 | │ │ │ │ │ 207 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 208 | │ 17:06:39.985733 │ │ │ Becomes Dead │ 209 | │ │ │ │ │ 210 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 211 | │ 17:06:39.985806 │ │ │ Accepting no more connections │ 212 | │ │ │ │ │ 213 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 214 | │ 17:06:39.985854 │ │ │ Waiting for existing connectio │ 215 | │ │ │ │ ns to close │ 216 | │ │ │ │ │ 217 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 218 | │ 17:06:39.985884 │ │ │ All connections closed. Stoppi │ 219 | │ │ │ │ ng server │ 220 | │ │ │ │ │ 221 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 222 | │ 17:06:39.985900 │ Becomes Dead │ │ │ 223 | │ │ │ │ │ 224 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 225 | │ 17:06:39.986006 │ sendCommit completed │ │ │ 226 | │ │ │ │ │ 227 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 228 | │ 17:06:39.986016 │ │ │ sendCommit completed │ 229 | │ │ │ │ │ 230 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 231 | │ 17:06:39.986046 │ Accepting no more connections │ │ │ 232 | │ │ │ │ │ 233 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 234 | │ 17:06:39.986050 │ Waiting for existing connectio │ │ │ 235 | │ │ ns to close │ │ │ 236 | │ │ │ │ │ 237 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 238 | │ 17:06:39.986123 │ All connections closed. Stoppi │ │ │ 239 | │ │ ng server │ │ │ 240 | │ │ │ │ │ 241 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 242 | │ 17:06:39.986135 │ │ Becomes Dead │ │ 243 | │ │ │ │ │ 244 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 245 | │ 17:06:39.986185 │ │ Accepting no more connections │ │ 246 | │ │ │ │ │ 247 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 248 | │ 17:06:39.986203 │ │ Waiting for existing connectio │ │ 249 | │ │ │ ns to close │ │ 250 | │ │ │ │ │ 251 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 252 | │ 17:06:39.986214 │ │ All connections closed. Stoppi │ │ 253 | │ │ │ ng server │ │ 254 | │ │ │ │ │ 255 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 256 | │ 17:06:39.986218 │ │ sendCommit completed │ │ 257 | │ │ │ │ │ 258 | └─────────────────┴────────────────────────────────┴────────────────────────────────┴────────────────────────────────┘ 259 | 260 | PASS 261 | -------------------------------------------------------------------------------- /utils/output.txt: -------------------------------------------------------------------------------- 1 | PASS TestElectionNormal map[0:true 1:true 2:true] ; entries: 60 2 | ┌─────────────────┬────────────────────────────────┬────────────────────────────────┬────────────────────────────────┐ 3 | │ TIME │ 0 │ 1 │ 2 │ 4 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 5 | │ 07:55:17.043470 │ listening at [::]:46083 │ │ │ 6 | │ │ │ │ │ 7 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 8 | │ 07:55:17.043649 │ │ listening at [::]:37779 │ │ 9 | │ │ │ │ │ 10 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 11 | │ 07:55:17.043698 │ │ │ listening at [::]:44039 │ 12 | │ │ │ │ │ 13 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 14 | │ 07:55:17.045275 │ │ │ Election Timer started (288ms) │ 15 | │ │ │ │ , term=0 │ 16 | │ │ │ │ │ 17 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 18 | │ 07:55:17.045290 │ │ Election Timer started (268ms) │ │ 19 | │ │ │ , term=0 │ │ 20 | │ │ │ │ │ 21 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 22 | │ 07:55:17.045325 │ Election Timer started (235ms) │ │ │ 23 | │ │ , term=0 │ │ │ 24 | │ │ │ │ │ 25 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 26 | │ 07:55:17.286596 │ Becomes Candidate (currentTerm │ │ │ 27 | │ │ =1); log=[] │ │ │ 28 | │ │ │ │ │ 29 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 30 | │ 07:55:17.286816 │ Election Timer started (279ms) │ │ │ 31 | │ │ , term=1 │ │ │ 32 | │ │ │ │ │ 33 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 34 | │ 07:55:17.286843 │ Sending RequestVote to 1: {Ter │ │ │ 35 | │ │ m:1 CandidateId:0 LastLogIndex │ │ │ 36 | │ │ :0 LastLogTerm:0} │ │ │ 37 | │ │ │ │ │ 38 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 39 | │ 07:55:17.286899 │ Sending RequestVote to 2: {Ter │ │ │ 40 | │ │ m:1 CandidateId:0 LastLogIndex │ │ │ 41 | │ │ :0 LastLogTerm:0} │ │ │ 42 | │ │ │ │ │ 43 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 44 | │ 07:55:17.289733 │ │ │ RequestVote: {Term:1 Candidate │ 45 | │ │ │ │ Id:0 LastLogIndex:0 LastLogTer │ 46 | │ │ │ │ m:0} [currentTerm=0, votedFor= │ 47 | │ │ │ │ -1, log index/term=(0, 0)] │ 48 | │ │ │ │ │ 49 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 50 | │ 07:55:17.289871 │ │ │ Term out of date with term in │ 51 | │ │ │ │ RequestVote │ 52 | │ │ │ │ │ 53 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 54 | │ 07:55:17.289906 │ │ │ Becomes Follower with term=1; │ 55 | │ │ │ │ log=[] │ 56 | │ │ │ │ │ 57 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 58 | │ 07:55:17.290251 │ │ │ RequestVote reply: &{Term:1 Vo │ 59 | │ │ │ │ teGranted:true} │ 60 | │ │ │ │ │ 61 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 62 | │ 07:55:17.290464 │ │ │ Election Timer started (292ms) │ 63 | │ │ │ │ , term=1 │ 64 | │ │ │ │ │ 65 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 66 | │ 07:55:17.291215 │ │ RequestVote: {Term:1 Candidate │ │ 67 | │ │ │ Id:0 LastLogIndex:0 LastLogTer │ │ 68 | │ │ │ m:0} [currentTerm=0, votedFor= │ │ 69 | │ │ │ -1, log index/term=(0, 0)] │ │ 70 | │ │ │ │ │ 71 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 72 | │ 07:55:17.291342 │ │ Term out of date with term in │ │ 73 | │ │ │ RequestVote │ │ 74 | │ │ │ │ │ 75 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 76 | │ 07:55:17.291429 │ received RequestVoteReply {Ter │ │ │ 77 | │ │ m:1 VoteGranted:true} from 2 │ │ │ 78 | │ │ │ │ │ 79 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 80 | │ 07:55:17.291477 │ Wins election with 2 votes │ │ │ 81 | │ │ │ │ │ 82 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 83 | │ 07:55:17.291529 │ becomes Leader; term=1, nextIn │ │ │ 84 | │ │ dex=map[1:1 2:1], matchIndex=m │ │ │ 85 | │ │ ap[1:0 2:0]; log=[] │ │ │ 86 | │ │ │ │ │ 87 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 88 | │ 07:55:17.291622 │ sending AppendEntries to 2: ni │ │ │ 89 | │ │ =1, args={Term:1 LeaderId:0 Pr │ │ │ 90 | │ │ evLogIndex:0 PrevLogTerm:0 Ent │ │ │ 91 | │ │ ries:[] LeaderCommit:0} │ │ │ 92 | │ │ │ │ │ 93 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 94 | │ 07:55:17.291439 │ │ Becomes Follower with term=1; │ │ 95 | │ │ │ log=[] │ │ 96 | │ │ │ │ │ 97 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 98 | │ 07:55:17.292017 │ │ RequestVote reply: &{Term:1 Vo │ │ 99 | │ │ │ teGranted:true} │ │ 100 | │ │ │ │ │ 101 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 102 | │ 07:55:17.292173 │ │ Election Timer started (174ms) │ │ 103 | │ │ │ , term=1 │ │ 104 | │ │ │ │ │ 105 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 106 | │ 07:55:17.292216 │ sending AppendEntries to 1: ni │ │ │ 107 | │ │ =1, args={Term:1 LeaderId:0 Pr │ │ │ 108 | │ │ evLogIndex:0 PrevLogTerm:0 Ent │ │ │ 109 | │ │ ries:[] LeaderCommit:0} │ │ │ 110 | │ │ │ │ │ 111 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 112 | │ 07:55:17.292670 │ received RequestVoteReply {Ter │ │ │ 113 | │ │ m:1 VoteGranted:true} from 1 │ │ │ 114 | │ │ │ │ │ 115 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 116 | │ 07:55:17.292818 │ While waiting for reply, state │ │ │ 117 | │ │ = Leader │ │ │ 118 | │ │ │ │ │ 119 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 120 | │ 07:55:17.295473 │ │ │ AppendEntries: {Term:1 LeaderI │ 121 | │ │ │ │ d:0 PrevLogIndex:0 PrevLogTerm │ 122 | │ │ │ │ :0 Entries:[] LeaderCommit:0} │ 123 | │ │ │ │ │ 124 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 125 | │ 07:55:17.295725 │ │ in election timer term changed │ │ 126 | │ │ │ from 0 to 1, bailing out │ │ 127 | │ │ │ │ │ 128 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 129 | │ 07:55:17.295943 │ │ │ AppendEntries reply: {Term:1 S │ 130 | │ │ │ │ uccess:true ConflictIndex:0 Co │ 131 | │ │ │ │ nflictTerm:0} │ 132 | │ │ │ │ │ 133 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 134 | │ 07:55:17.295992 │ │ AppendEntries: {Term:1 LeaderI │ │ 135 | │ │ │ d:0 PrevLogIndex:0 PrevLogTerm │ │ 136 | │ │ │ :0 Entries:[] LeaderCommit:0} │ │ 137 | │ │ │ │ │ 138 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 139 | │ 07:55:17.296154 │ │ AppendEntries reply: {Term:1 S │ │ 140 | │ │ │ uccess:true ConflictIndex:0 Co │ │ 141 | │ │ │ nflictTerm:0} │ │ 142 | │ │ │ │ │ 143 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 144 | │ 07:55:17.296388 │ │ │ in election timer term changed │ 145 | │ │ │ │ from 0 to 1, bailing out │ 146 | │ │ │ │ │ 147 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 148 | │ 07:55:17.296616 │ AppendEntries reply from 1 suc │ │ │ 149 | │ │ cess: nextIndex := map[1:1 2:1 │ │ │ 150 | │ │ ], matchIndex := map[1:0 2:0]; │ │ │ 151 | │ │ commitIndex := 0 │ │ │ 152 | │ │ │ │ │ 153 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 154 | │ 07:55:17.296841 │ AppendEntries reply from 2 suc │ │ │ 155 | │ │ cess: nextIndex := map[1:1 2:1 │ │ │ 156 | │ │ ], matchIndex := map[1:0 2:0]; │ │ │ 157 | │ │ commitIndex := 0 │ │ │ 158 | │ │ │ │ │ 159 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 160 | │ 07:55:17.296985 │ In election timer state=Leader │ │ │ 161 | │ │ , bailing out │ │ │ 162 | │ │ │ │ │ 163 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 164 | │ 07:55:17.342763 │ sending AppendEntries to 2: ni │ │ │ 165 | │ │ =1, args={Term:1 LeaderId:0 Pr │ │ │ 166 | │ │ evLogIndex:0 PrevLogTerm:0 Ent │ │ │ 167 | │ │ ries:[] LeaderCommit:0} │ │ │ 168 | │ │ │ │ │ 169 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 170 | │ 07:55:17.343015 │ sending AppendEntries to 1: ni │ │ │ 171 | │ │ =1, args={Term:1 LeaderId:0 Pr │ │ │ 172 | │ │ evLogIndex:0 PrevLogTerm:0 Ent │ │ │ 173 | │ │ ries:[] LeaderCommit:0} │ │ │ 174 | │ │ │ │ │ 175 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 176 | │ 07:55:17.345219 │ │ AppendEntries: {Term:1 LeaderI │ │ 177 | │ │ │ d:0 PrevLogIndex:0 PrevLogTerm │ │ 178 | │ │ │ :0 Entries:[] LeaderCommit:0} │ │ 179 | │ │ │ │ │ 180 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 181 | │ 07:55:17.345444 │ │ AppendEntries reply: {Term:1 S │ │ 182 | │ │ │ uccess:true ConflictIndex:0 Co │ │ 183 | │ │ │ nflictTerm:0} │ │ 184 | │ │ │ │ │ 185 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 186 | │ 07:55:17.346049 │ AppendEntries reply from 1 suc │ │ │ 187 | │ │ cess: nextIndex := map[1:1 2:1 │ │ │ 188 | │ │ ], matchIndex := map[1:0 2:0]; │ │ │ 189 | │ │ commitIndex := 0 │ │ │ 190 | │ │ │ │ │ 191 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 192 | │ 07:55:17.347337 │ │ │ AppendEntries: {Term:1 LeaderI │ 193 | │ │ │ │ d:0 PrevLogIndex:0 PrevLogTerm │ 194 | │ │ │ │ :0 Entries:[] LeaderCommit:0} │ 195 | │ │ │ │ │ 196 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 197 | │ 07:55:17.347516 │ │ │ AppendEntries reply: {Term:1 S │ 198 | │ │ │ │ uccess:true ConflictIndex:0 Co │ 199 | │ │ │ │ nflictTerm:0} │ 200 | │ │ │ │ │ 201 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 202 | │ 07:55:17.347686 │ │ │ Becomes Dead │ 203 | │ │ │ │ │ 204 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 205 | │ 07:55:17.347955 │ │ │ Accepting no more connections │ 206 | │ │ │ │ │ 207 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 208 | │ 07:55:17.348045 │ │ │ Waiting for existing connectio │ 209 | │ │ │ │ ns to close │ 210 | │ │ │ │ │ 211 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 212 | │ 07:55:17.348073 │ │ │ sendCommit completed │ 213 | │ │ │ │ │ 214 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 215 | │ 07:55:17.348089 │ │ │ All connections closed. Stoppi │ 216 | │ │ │ │ ng server │ 217 | │ │ │ │ │ 218 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 219 | │ 07:55:17.348164 │ Becomes Dead │ │ │ 220 | │ │ │ │ │ 221 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 222 | │ 07:55:17.348224 │ Accepting no more connections │ │ │ 223 | │ │ │ │ │ 224 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 225 | │ 07:55:17.348267 │ Waiting for existing connectio │ │ │ 226 | │ │ ns to close │ │ │ 227 | │ │ │ │ │ 228 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 229 | │ 07:55:17.348319 │ All connections closed. Stoppi │ │ │ 230 | │ │ ng server │ │ │ 231 | │ │ │ │ │ 232 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 233 | │ 07:55:17.348332 │ sendCommit completed │ │ │ 234 | │ │ │ │ │ 235 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 236 | │ 07:55:17.348343 │ │ Becomes Dead │ │ 237 | │ │ │ │ │ 238 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 239 | │ 07:55:17.348624 │ │ Accepting no more connections │ │ 240 | │ │ │ │ │ 241 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 242 | │ 07:55:17.348646 │ │ Waiting for existing connectio │ │ 243 | │ │ │ ns to close │ │ 244 | │ │ │ │ │ 245 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 246 | │ 07:55:17.348661 │ │ All connections closed. Stoppi │ │ 247 | │ │ │ ng server │ │ 248 | │ │ │ │ │ 249 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 250 | │ 07:55:17.348749 │ │ sendCommit completed │ │ 251 | │ │ │ │ │ 252 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 253 | │ 07:55:17.350756 │ │ │ In election timer state=Dead, │ 254 | │ │ │ │ bailing out │ 255 | │ │ │ │ │ 256 | ├─────────────────┼────────────────────────────────┼────────────────────────────────┼────────────────────────────────┤ 257 | │ 07:55:17.352309 │ │ In election timer state=Dead, │ │ 258 | │ │ │ bailing out │ │ 259 | │ │ │ │ │ 260 | └─────────────────┴────────────────────────────────┴────────────────────────────────┴────────────────────────────────┘ 261 | 262 | PASS 263 | -------------------------------------------------------------------------------- /raft/raft.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import ( 4 | "bytes" 5 | "encoding/gob" 6 | "errors" 7 | "fmt" 8 | "log" 9 | "math" 10 | "math/rand" 11 | "os" 12 | "sync" 13 | "time" 14 | ) 15 | 16 | const DEBUG = 0 // DEBUG is the debug level 17 | type RNState int // RNState is the state of the Raft node 18 | 19 | const ( 20 | Follower RNState = iota // Follower is the state of a Raft node that is a follower 21 | Candidate // Candidate is the state of a Raft node that is a candidate 22 | Leader // Leader is the state of a Raft node that is a leader 23 | Dead // Dead is the state of a Raft node that is dead 24 | ) 25 | 26 | type CommitEntry struct { 27 | Command interface{} // Command is the command to be committed 28 | Term uint64 // Term is the term of the command 29 | Index uint64 // Index is the index of the command 30 | } 31 | 32 | type LogEntry struct { 33 | Command interface{} // Command is the command to be committed 34 | Term uint64 // Term is the term of the log entry 35 | } 36 | 37 | type RaftNode struct { 38 | id uint64 // id is the id of the Raft node 39 | mu sync.Mutex // Mutex protects the Raft node 40 | peerList Set // Peer is the list of peers in the Raft cluster 41 | server *Server // Server is the server of the Raft node. Issue RPCs to the peers 42 | db *Database // Database is the storage of the Raft node 43 | commitChan chan CommitEntry // CommitChan is the channel the channel where this Raft Node is going to report committed log entries 44 | newCommitReady chan struct{} // NewCommitReady is an internal notification channel used to notify that new log entries may be sent on commitChan. 45 | trigger chan struct{} // Trigger is the channel used to trigger the Raft node to send a AppendEntries RPC to the peers when some relevant event occurs 46 | 47 | // Persistent state on all servers 48 | currentTerm uint64 // CurrentTerm is the current term of the Raft node 49 | votedFor int // VotedFor is the candidate id that received a vote in the current term 50 | log []LogEntry // Log is the log of the Raft node 51 | 52 | // IMPORTANT: Use 1 based indexing for log entries 53 | 54 | // Volatile state on all servers 55 | commitIndex uint64 // CommitIndex is the index of the last committed log entry 56 | lastApplied uint64 // LastApplied is the index of the last applied log entry 57 | state RNState // State is the state of the Raft node 58 | electionResetEvent time.Time // ElectionResetEvent is the time at which the Raft node had last reset its election timer 59 | 60 | // Volatile state on leaders 61 | nextIndex map[uint64]uint64 // NextIndex is the index of the next log entry to send to each peer 62 | matchIndex map[uint64]uint64 // MatchIndex is the index of the highest log entry known to be replicated on the leader's peers 63 | } 64 | 65 | type RequestVoteArgs struct { 66 | Term uint64 // Term is the term of the Raft node 67 | CandidateId uint64 // CandidateId is the id of the Raft node that is requesting the vote 68 | LastLogIndex uint64 // LastLogIndex is the index of the last log entry 69 | LastLogTerm uint64 // LastLogTerm is the term of the last log entry 70 | } 71 | 72 | type RequestVoteReply struct { 73 | Term uint64 // Term is the term of the Raft node 74 | VoteGranted bool // VoteGranted is true if the Raft node granted the vote 75 | } 76 | 77 | type AppendEntriesArgs struct { 78 | Term uint64 // Term is the term of the Raft node 79 | LeaderId uint64 // LeaderId is the id of the Raft node that is sending the AppendEntries RPC 80 | 81 | PrevLogIndex uint64 // PrevLogIndex is the index of the log entry immediately preceding the new ones 82 | PrevLogTerm uint64 // PrevLogTerm is the term of the log entry immediately preceding the new ones 83 | Entries []LogEntry // Entries is the slice of log entries to be appended 84 | LeaderCommit uint64 // LeaderCommit is the index of the log entry to be committed 85 | } 86 | 87 | type AppendEntriesReply struct { 88 | Term uint64 // Term is the term of the Raft node 89 | Success bool // true if follower contained entry matching prevLogIndex and prevLogTerm 90 | 91 | // Faster conflict resolution optimization 92 | // (described near the end of section 5.3 93 | // in the https://raft.github.io/raft.pdf 94 | // [RAFT] paper) 95 | 96 | ConflictIndex uint64 // ConflictIndex is the index of the conflicting log entry 97 | ConflictTerm uint64 // ConflictTerm is the term of the conflicting log entry 98 | } 99 | 100 | // debug logs a debug message if the debug level is set to DEBUG 101 | func (rn *RaftNode) debug(format string, args ...interface{}) { 102 | if DEBUG > 0 { 103 | format = fmt.Sprintf("[%d] %s", rn.id, format) 104 | log.Printf(format, args...) 105 | } 106 | } 107 | 108 | // NewRaftNode creates a new Raft node with the given id, peers, 109 | // server, database, and commit channel. The ready channel is used to 110 | // notify the caller that the peers have been initialized and the Raft 111 | // node is ready to be started. 112 | 113 | func NewRaftNode(id uint64, peerList Set, server *Server, db *Database, ready <-chan interface{}, commitChan chan CommitEntry) *RaftNode { 114 | node := &RaftNode{ 115 | id: id, // id is the id of the Raft node 116 | peerList: peerList, // List of peers of this Raft node 117 | server: server, // Server is the server of the Raft node. Issue RPCs to the peers 118 | db: db, // Database is the storage of the Raft node 119 | commitChan: commitChan, // CommitChan is the channel the channel where this Raft Node is going to report committed log entries 120 | newCommitReady: make(chan struct{}, 16), // NewCommitReady is an internal notification channel used to notify that new log entries may be sent on commitChan. 121 | trigger: make(chan struct{}, 1), // Trigger is the channel used to trigger the Raft node to send a AppendEntries RPC to the peers when some relevant event occurs 122 | currentTerm: 0, // CurrentTerm is the current term of the Raft node 123 | votedFor: -1, // VotedFor is the candidate id that received a vote in the current term 124 | log: make([]LogEntry, 0), // Log is the log of the Raft node 125 | commitIndex: 0, // CommitIndex is the index of the last committed log entry 126 | lastApplied: 0, // LastApplied is the index of the last applied log entry 127 | state: Follower, // State is the state of the Raft node 128 | electionResetEvent: time.Now(), // ElectionResetEvent is the time at which the Raft node had last reset its election timer 129 | nextIndex: make(map[uint64]uint64), // NextIndex is the index of the next log entry to send to each peer 130 | matchIndex: make(map[uint64]uint64), // MatchIndex is the index of the highest log entry known to be replicated on the leader's peers 131 | } 132 | 133 | if node.db.HasData() { 134 | // If the database has data, load the 135 | // currentTerm, votedFor, and log from 136 | // the database before crashing. 137 | 138 | node.restoreFromStorage() 139 | } 140 | 141 | // Start the Raft node 142 | go func() { 143 | <-ready // Wait for the peers to be initialized 144 | node.mu.Lock() // Lock the Raft node 145 | node.electionResetEvent = time.Now() // Reset the election timer 146 | node.mu.Unlock() // Unlock the Raft node 147 | node.runElectionTimer() // Start the election timer 148 | }() 149 | 150 | go node.sendCommit() // Start the commit channel as a goroutine 151 | return node 152 | } 153 | 154 | // sendCommit sends committed entries on commit channel. It watches 155 | // newCommitReady for notifications and calculates which new entries 156 | // are ready to be sent. This method should run in background goroutine 157 | // Commit Channel of Node may be buffered and will limit how fast the 158 | // client consumes new committed entries. 159 | 160 | func (rn *RaftNode) sendCommit() { 161 | for range rn.newCommitReady { 162 | // Find which entries we have to apply. 163 | rn.mu.Lock() // Lock the Raft node 164 | savedTerm := rn.currentTerm // Save the current term 165 | savedLastApplied := rn.lastApplied // Save the last applied index 166 | 167 | // Find the slice of entry that we have not applied yet. 168 | var entries []LogEntry // Entries is the slice of log entries that we have not applied yet 169 | if rn.commitIndex > rn.lastApplied { // If the commit index is greater than the last applied index 170 | entries = rn.log[rn.lastApplied:rn.commitIndex] // Entries is the slice of log entries that we have not applied yet 171 | rn.lastApplied = rn.commitIndex // Update the last applied index 172 | } 173 | rn.mu.Unlock() // Unlock the Raft node 174 | rn.debug("sendCommit entries=%v, savedLastApplied=%d", entries, savedLastApplied) 175 | 176 | // Send the entries to the commit channel one by one. 177 | for i, entry := range entries { // For each entry in the slice of log entries that we have not applied yet 178 | rn.commitChan <- CommitEntry{ // Send the entry to the commit channel 179 | Command: entry.Command, // Command is the command of the log entry 180 | Index: savedLastApplied + uint64(i) + 1, // Index is the index of the log entry 181 | Term: savedTerm, // Term is the term of the log entry 182 | } 183 | } 184 | } 185 | rn.debug("sendCommit completed") 186 | } 187 | 188 | func (rn *RaftNode) runElectionTimer() { 189 | timeoutDuration := rn.electionTimeout() 190 | rn.mu.Lock() 191 | termStarted := rn.currentTerm 192 | rn.mu.Unlock() 193 | rn.debug("Election Timer started (%v), term=%d", timeoutDuration, termStarted) 194 | 195 | /** The following loop will run until either: 196 | * [-] We discover the election timer is no longer needed, (or) 197 | * [-] the election timer expires and this RN becomes a candidate 198 | * 199 | * In a follower, this typically keeps running in the background 200 | * for the duration of the node's lifetime. The ticker ensures 201 | * that the node responds to any change in term, and the state is 202 | * same as the expected state. If anything is off, we terminate 203 | * the election timer. 204 | */ 205 | 206 | ticker := time.NewTicker(10 * time.Millisecond) 207 | defer ticker.Stop() 208 | for { 209 | <-ticker.C // Wait for the ticker to fire 210 | 211 | rn.mu.Lock() 212 | 213 | // If we are not a candidate or follower, we are 214 | // in no need of election timer in leader state 215 | 216 | if rn.state != Candidate && rn.state != Follower { 217 | rn.debug("In election timer state=%s, bailing out", rn.state) 218 | rn.mu.Unlock() 219 | return 220 | } 221 | 222 | // If the term has changed, we are no longer needed 223 | //of the current election timer and we can bail out 224 | 225 | if termStarted != rn.currentTerm { 226 | rn.debug("in election timer term changed from %d to %d, bailing out", termStarted, rn.currentTerm) 227 | rn.mu.Unlock() 228 | return 229 | } 230 | 231 | // Start an election if we haven't heard from a leader 232 | // or haven't voted for someone for the duration of the 233 | // timeout. 234 | 235 | if elapsed := time.Since(rn.electionResetEvent); elapsed >= timeoutDuration { 236 | rn.startElection() 237 | rn.mu.Unlock() 238 | return 239 | } 240 | rn.mu.Unlock() 241 | } 242 | } 243 | 244 | // electionTimeout generates a pseudo-random election timeout duration 245 | // The duration is chosen randomly between 150ms and 300ms (Ref to 246 | // https://raft.github.io/raft.pdf [Raft] Section 9.3 [ preformance ]) 247 | // 248 | // If RAFT_FORCE_MORE_REELECTION is set, we stress-test by deliberately 249 | // generating a hard-coded number very often (150ms in this case). This 250 | // will create collisions between different servers and force more 251 | // re-elections. 252 | 253 | func (rn *RaftNode) electionTimeout() time.Duration { 254 | if os.Getenv("RAFT_FORCE_MORE_REELECTION") == "true" && rand.Intn(3) > 0 { 255 | 256 | // Force a re-election every 150ms with probability 2/3 257 | return time.Duration(150) * time.Millisecond 258 | 259 | } else { 260 | 261 | // Generate a random election timeout between 150ms and 300ms 262 | return time.Duration(150+rand.Intn(150)) * time.Millisecond 263 | 264 | } 265 | } 266 | 267 | // startElection starts a new election with the 268 | // current Raft Node as a candidate. This function 269 | // expects the mutex of the Node to be locked. 270 | 271 | func (rn *RaftNode) startElection() { 272 | rn.state = Candidate // Update the state to candidate 273 | rn.currentTerm += 1 // Increment the term 274 | savedCurrentTerm := rn.currentTerm // Save the current term 275 | rn.electionResetEvent = time.Now() // Set the Reset Event to current time 276 | rn.votedFor = int(rn.id) // Vote for self 277 | votesReceived := 1 // We have already voted for self 278 | 279 | rn.debug("Becomes Candidate (currentTerm=%d); log=%v", savedCurrentTerm, rn.log) 280 | 281 | // Check majority, helpful in case of only 1 node in cluster 282 | go func() { 283 | rn.mu.Lock() 284 | defer rn.mu.Unlock() 285 | 286 | if rn.state != Candidate { 287 | rn.debug("while waiting for majority, state = %v", rn.state) 288 | return 289 | } 290 | if votesReceived*2 > rn.peerList.Size()+1 { // If we have majority votes, become leader 291 | 292 | // Won the election so become leader 293 | rn.debug("Wins election with %d votes", votesReceived) 294 | rn.becomeLeader() // Become leader 295 | return 296 | } 297 | }() 298 | 299 | // Send RequestVote RPCs to all other peer servers concurrently. 300 | for peer := range rn.peerList.peerSet { 301 | go func(peer uint64) { 302 | rn.mu.Lock() 303 | savedLastLogIndex, savedLastLogTerm := rn.lastLogIndexAndTerm() 304 | rn.mu.Unlock() 305 | 306 | args := RequestVoteArgs{ 307 | Term: savedCurrentTerm, // Current term 308 | CandidateId: rn.id, // Candidate's ID 309 | LastLogIndex: savedLastLogIndex, // Index of candidate's last log entry 310 | LastLogTerm: savedLastLogTerm, // Term of candidate's last log entry 311 | } 312 | 313 | rn.debug("Sending RequestVote to %d: %+v", peer, args) 314 | 315 | var reply RequestVoteReply // Reply from the server 316 | if err := rn.server.RPC(peer, "RaftNode.RequestVote", args, &reply); err == nil { 317 | rn.mu.Lock() // Lock the Raft Node 318 | defer rn.mu.Unlock() // Unlock the Raft Node 319 | rn.debug("received RequestVoteReply %+v from %v", reply, peer) 320 | 321 | if rn.state != Candidate { // If we are no longer a candidate, bail out 322 | rn.debug("While waiting for reply, state = %v", rn.state) 323 | return 324 | } 325 | 326 | if reply.Term > savedCurrentTerm { // If the term is greater than ours, we are no longer a candidate 327 | rn.debug("Term out of date in RequestVoteReply from %v", peer) 328 | rn.becomeFollower(reply.Term) // Become a follower 329 | return 330 | } else if reply.Term == savedCurrentTerm { 331 | // If the term is equal to ours, we need to check the vote 332 | /*fmt.Println("Candi Id: ", rn.id) 333 | fmt.Println(peer) 334 | fmt.Println(reply)*/ 335 | if reply.VoteGranted { 336 | 337 | // If the vote is granted, increment the vote count 338 | votesReceived += 1 // Increment the vote count 339 | if votesReceived*2 > rn.peerList.Size()+1 { // If we have majority votes, become leader 340 | 341 | // Won the election so become leader 342 | rn.debug("Wins election with %d votes", votesReceived) 343 | rn.becomeLeader() // Become leader 344 | return 345 | } 346 | } 347 | } 348 | } 349 | }(peer) 350 | } 351 | 352 | // Run another election timer, in case this election is not successful. 353 | go rn.runElectionTimer() 354 | } 355 | 356 | // becomeLeader switches Raft Node into a leader state 357 | // and begins process of heartbeats every 50ms. This 358 | // function expects the mutex of the Node to be locked 359 | 360 | func (rn *RaftNode) becomeLeader() { 361 | rn.state = Leader // Update the state to leader 362 | 363 | for peer := range rn.peerList.peerSet { 364 | rn.nextIndex[peer] = uint64(len(rn.log)) + 1 // Initialize nextIndex for all peers with the last log index (leader) + 1 365 | rn.matchIndex[peer] = 0 // No match index yet 366 | } 367 | 368 | rn.debug("becomes Leader; term=%d, nextIndex=%v, matchIndex=%v; log=%v", rn.currentTerm, rn.nextIndex, rn.matchIndex, rn.log) 369 | 370 | /** 371 | * The following goroutine is the heart of the leader 372 | * election. It sends AppendEntries RPCs to all peers 373 | * in the cluster in any of the following cases: 374 | * 375 | * 1. There is something on the trigger channel (OR) 376 | * 2. Every 50ms, if no events occur on trigger channel 377 | * 378 | * The goroutine is terminated when the Raft Node is no 379 | * longer a leader. This goroutine runs in the background 380 | */ 381 | 382 | go func(heartbeatTimeout time.Duration) { 383 | rn.leaderSendAEs() // Send AppendEntries RPCs to all peers to notify them of the leader 384 | 385 | t := time.NewTimer(heartbeatTimeout) // Create a new timer 386 | defer t.Stop() // Stop the timer when the goroutine terminates 387 | for { 388 | doSend := false 389 | select { 390 | case <-t.C: 391 | 392 | // CASE: Timer expired 393 | doSend = true // If the timer expires, send an heartbeat 394 | t.Stop() // Stop the timer if we have something to send 395 | t.Reset(heartbeatTimeout) // Reset the timer to fire again after heartbeat timeout 396 | 397 | case _, ok := <-rn.trigger: 398 | 399 | // CASE: Trigger channel has something 400 | if ok { 401 | doSend = true // If the trigger channel has something, send an AppendEntries 402 | } else { 403 | return // If the trigger channel is closed, terminate the goroutine 404 | } 405 | if !t.Stop() { // Wait for the timer to stop 50ms for the next event 406 | <-t.C 407 | } 408 | t.Reset(heartbeatTimeout) // Reset the timer to fire again after heartbeat timeout 409 | } 410 | 411 | if doSend { // If we have something to send, send it 412 | rn.mu.Lock() // Lock the mutex 413 | if rn.state != Leader { // If we are no longer a leader, bail out 414 | rn.mu.Unlock() // Unlock the mutex 415 | return // Terminate the goroutine 416 | } 417 | rn.mu.Unlock() // Unlock the mutex 418 | rn.leaderSendAEs() // Send AppendEntries to all peers 419 | } 420 | } 421 | }(50 * time.Millisecond) 422 | } 423 | 424 | // leaderSendAEs sends AppendEntries RPCs to all peers 425 | // in the cluster, collects responses, and updates the 426 | // state of the Raft Node accordingly. 427 | 428 | func (rn *RaftNode) leaderSendAEs() { 429 | rn.mu.Lock() // Lock the mutex 430 | savedCurrentTerm := rn.currentTerm // Save the current term 431 | rn.mu.Unlock() // Unlock the mutex 432 | 433 | // handling the case for a single node cluster 434 | go func(peer uint64) { 435 | if rn.peerList.Size() == 0 { 436 | if uint64(len(rn.log)) > rn.commitIndex { 437 | savedCommitIndex := rn.commitIndex 438 | for i := rn.commitIndex + 1; i <= uint64(len(rn.log)); i++ { 439 | if rn.log[i-1].Term == rn.currentTerm { // If the term is the same as the current term, update the commit index 440 | rn.commitIndex = i 441 | 442 | } 443 | } 444 | if savedCommitIndex != rn.commitIndex { 445 | rn.debug("Leader sets commitIndex := %d", rn.commitIndex) 446 | // Commit index changed: the leader considers new entries 447 | // to be committed. Send new entries on the commit channel 448 | // to this leader's clients, and notify followers by sending 449 | // them Append Entries. 450 | 451 | rn.newCommitReady <- struct{}{} 452 | rn.trigger <- struct{}{} 453 | } 454 | } 455 | } 456 | }(rn.id) 457 | 458 | for peer := range rn.peerList.peerSet { 459 | 460 | // The following goroutine is used to send AppendEntries RPCs 461 | // to one peer in the cluster and collect responses to 462 | // determine and update the current state of the Raft Node 463 | 464 | go func(peer uint64) { 465 | rn.mu.Lock() // Lock the mutex 466 | nextIndex := rn.nextIndex[peer] // Get the next index for this peer 467 | prevLogIndex := int(nextIndex) - 1 // Get the previous log index for this peer 468 | prevLogTerm := uint64(0) // Get the previous log term for this peer 469 | 470 | if prevLogIndex > 0 { 471 | // If the previous log index is greater than 0, get the previous log term from the log 472 | prevLogTerm = rn.log[uint64(prevLogIndex)-1].Term 473 | } 474 | entries := rn.log[int(nextIndex)-1:] // Get the entries for this peer 475 | 476 | args := AppendEntriesArgs{ 477 | Term: savedCurrentTerm, // Get the current term 478 | LeaderId: rn.id, // Get the id of the leader 479 | PrevLogIndex: uint64(prevLogIndex), // Get the previous log index 480 | PrevLogTerm: prevLogTerm, // Get the previous log term 481 | Entries: entries, // Get the entries 482 | LeaderCommit: rn.commitIndex, // Get the leader commit index 483 | } 484 | 485 | rn.mu.Unlock() // Unlock the mutex before sending the RPC 486 | rn.debug("sending AppendEntries to %v: ni=%d, args=%+v", peer, nextIndex, args) 487 | 488 | var reply AppendEntriesReply 489 | if err := rn.server.RPC(peer, "RaftNode.AppendEntries", args, &reply); err == nil { 490 | rn.mu.Lock() // Lock the mutex before updating the state 491 | defer rn.mu.Unlock() // Unlock the mutex after updating the state 492 | if reply.Term > savedCurrentTerm { // If the reply term is greater than the current term, update the current term 493 | 494 | rn.debug("Term out of date in heartbeat reply") 495 | rn.becomeFollower(reply.Term) // Update the state to follower since the term is out of date 496 | return 497 | } 498 | 499 | if rn.state == Leader && savedCurrentTerm == reply.Term { // If we are still a leader and the term is the same, update the next index and match index 500 | if reply.Success { // If follower contained entry matching prevLogIndex and prevLogTerm 501 | rn.nextIndex[peer] = nextIndex + uint64(len(entries)) // Update the next index 502 | rn.matchIndex[peer] = rn.nextIndex[peer] - 1 // Update the match index 503 | 504 | savedCommitIndex := rn.commitIndex // Save the current commit index 505 | for i := rn.commitIndex + 1; i <= uint64(len(rn.log)); i++ { 506 | if rn.log[i-1].Term == rn.currentTerm { // If the term is the same as the current term, update the commit index 507 | matchCount := 1 // Initialize the match count to single match 508 | 509 | for peer := range rn.peerList.peerSet { 510 | if rn.matchIndex[peer] >= i { 511 | 512 | // If the match index is greater than or equal 513 | // to the current index, increment the match count 514 | matchCount++ 515 | } 516 | } 517 | if matchCount*2 > rn.peerList.Size()+1 { 518 | 519 | // If the match count is greater than the 520 | // number of peers plus 1, that is got 521 | // the majority, update the commit index 522 | rn.commitIndex = i 523 | } 524 | } 525 | } 526 | rn.debug("AppendEntries reply from %d success: nextIndex := %v, matchIndex := %v; commitIndex := %d", peer, rn.nextIndex, rn.matchIndex, rn.commitIndex) 527 | 528 | if rn.commitIndex != savedCommitIndex { 529 | rn.debug("Leader sets commitIndex := %d", rn.commitIndex) 530 | // Commit index changed: the leader considers new entries 531 | // to be committed. Send new entries on the commit channel 532 | // to this leader's clients, and notify followers by sending 533 | // them Append Entries. 534 | 535 | rn.newCommitReady <- struct{}{} 536 | rn.trigger <- struct{}{} 537 | } 538 | } else { 539 | // Success is false: follower contained conflicting entry 540 | 541 | if reply.ConflictTerm > 0 { 542 | lastIndexOfTerm := uint64(0) // Initialize the last index of the term to 0 543 | for i := uint64(len(rn.log)); i > 0; i-- { 544 | if rn.log[i-1].Term == reply.ConflictTerm { 545 | // If the term is the same as the conflict term, update the last index of the term 546 | lastIndexOfTerm = i 547 | break 548 | } 549 | } 550 | if lastIndexOfTerm > 0 { 551 | // If there are entries after the conflicting entry in the log 552 | // update the next index to the index of the last entry in the 553 | rn.nextIndex[peer] = lastIndexOfTerm + 1 554 | } else { 555 | // If there are no entries after the conflicting entry in the log 556 | // update the next index to the index of the conflicting entry 557 | rn.nextIndex[peer] = reply.ConflictIndex 558 | } 559 | } else { 560 | // Success is false and conflict term is 0: follower contained conflicting entry 561 | rn.nextIndex[peer] = reply.ConflictIndex 562 | } 563 | rn.debug("AppendEntries reply from %d !success: nextIndex := %d", peer, nextIndex-1) 564 | } 565 | } 566 | } 567 | }(peer) 568 | } 569 | } 570 | 571 | // lastLogIndexAndTerm returns the index of the last 572 | // log and the last log entry's term (or 0 if there's 573 | // no log) for this server. This function expects the 574 | // Node's mutex to be locked. The log index is 1 based 575 | // hence the empty log is index 0. 576 | 577 | func (rn *RaftNode) lastLogIndexAndTerm() (uint64, uint64) { 578 | if len(rn.log) > 0 { 579 | 580 | // Log is not empty 581 | lastIndex := uint64(len(rn.log)) // Index of last log entry (1 based) 582 | return lastIndex, rn.log[lastIndex-1].Term 583 | 584 | } else { 585 | 586 | // Empty log has index 0 and term 0 587 | return 0, 0 588 | 589 | } 590 | } 591 | 592 | // AppendEntries is the RPC handler for AppendEntries 593 | // RPCs. This function is used to send entries to followers 594 | // This function expects the Node's mutex to be locked 595 | 596 | func (rn *RaftNode) AppendEntries(args AppendEntriesArgs, reply *AppendEntriesReply) error { 597 | rn.mu.Lock() // Lock the mutex before updating the state 598 | defer rn.mu.Unlock() // Unlock the mutex after updating the state 599 | if rn.state == Dead || !rn.peerList.Exists(args.LeaderId) { // If the node is dead, return false 600 | return nil // Return no error 601 | } 602 | rn.debug("AppendEntries: %+v", args) 603 | 604 | if args.Term > rn.currentTerm { // If the term is greater than the current term, update the current term 605 | rn.debug("Term out of date in AppendEntries") 606 | rn.becomeFollower(args.Term) // Update the state to follower since the term is out of date 607 | } 608 | 609 | reply.Success = false // Initialize the reply to false 610 | if args.Term == rn.currentTerm { 611 | if rn.state != Follower { 612 | 613 | // Raft guarantees that only a single leader exists in 614 | // any given term. If we carefully follow the logic of 615 | // RequestVote and the code in startElection that sends 616 | // RVs, we'll see that two leaders can't exist in the 617 | // cluster with the same term. This condition is 618 | // important for candidates that find out that another 619 | // peer won the election for this term. 620 | 621 | rn.becomeFollower(args.Term) // Update the state to follower since it received an AE from a leader 622 | } 623 | rn.electionResetEvent = time.Now() // Reset the election timer 624 | 625 | // Does our log contain an entry at PrevLogIndex whose 626 | // term matches PrevLogTerm? Note that in the extreme 627 | // case of PrevLogIndex=0 (empty) this is vacuously true 628 | 629 | if args.PrevLogIndex == 0 || 630 | (args.PrevLogIndex <= uint64(len(rn.log)) && args.PrevLogTerm == rn.log[args.PrevLogIndex-1].Term) { 631 | reply.Success = true 632 | 633 | // Find an insertion point - where there's a term mismatch between 634 | // the existing log starting at PrevLogIndex+1 and the new entries 635 | // sent in the RPC. 636 | 637 | logInsertIndex := args.PrevLogIndex + 1 638 | newEntriesIndex := uint64(1) 639 | 640 | for { 641 | if logInsertIndex > uint64(len(rn.log)) || newEntriesIndex > uint64(len(args.Entries)) { 642 | break 643 | } 644 | if rn.log[logInsertIndex-1].Term != args.Entries[newEntriesIndex-1].Term { 645 | break 646 | } 647 | logInsertIndex++ 648 | newEntriesIndex++ 649 | } 650 | 651 | /** 652 | * At the end of this loop (considering 1 based 653 | * indexing), the following will hold: 654 | * 655 | * [-] logInsertIndex points at the end of the log 656 | * or an index where the term mismatches with 657 | * an entry from the leader 658 | * 659 | * [-] newEntriesIndex points at the end of Entries 660 | * or an index where the term mismatches with 661 | * the corresponding log entry 662 | */ 663 | 664 | if newEntriesIndex <= uint64(len(args.Entries)) { 665 | // If the newEntriesIndex is less than the length of the entries, append the entries 666 | rn.debug("Inserting entries %v from index %d", args.Entries[newEntriesIndex-1:], logInsertIndex) 667 | rn.log = append(rn.log[:logInsertIndex-1], args.Entries[newEntriesIndex-1:]...) // Insert the new entries 668 | // Add the code to Update Config 669 | // Add code to establish/remove connections 670 | 671 | // loop over the new entries to check if any is for cluster change 672 | for _, entry := range args.Entries[newEntriesIndex-1:] { 673 | cmd := entry.Command 674 | switch v := cmd.(type) { 675 | case AddServers: 676 | for _, peerId := range v.ServerIds { 677 | if rn.id == uint64(peerId) { 678 | continue 679 | } 680 | rn.peerList.Add(uint64(peerId)) // add new server id to the peerList 681 | } 682 | case RemoveServers: 683 | for _, peerId := range v.ServerIds { 684 | rn.peerList.Remove(uint64(peerId)) // remove old server id from the peerList 685 | } 686 | } 687 | } 688 | 689 | rn.debug("Log is now: %v", rn.log) 690 | } 691 | 692 | // Set commit index, if the leader's commit index is greater than the length of the log, set it to the length of the log 693 | if args.LeaderCommit > rn.commitIndex { 694 | rn.commitIndex = uint64(math.Min(float64(args.LeaderCommit), float64(len(rn.log)))) // Update the commit index 695 | rn.debug("Setting commitIndex=%d", rn.commitIndex) 696 | rn.newCommitReady <- struct{}{} // Signal that a new commit index is ready 697 | } 698 | } else { 699 | // No match for PrevLogIndex or PrevLogTerm. Populate 700 | // ConflictIndex or ConflictTerm to help the leader 701 | // bring us up to date quickly. Success is false in 702 | // this case. 703 | 704 | if args.PrevLogIndex > uint64(len(rn.log)) { 705 | reply.ConflictIndex = uint64(len(rn.log)) + 1 // If the PrevLogIndex is greater than the length of the log, set the conflict index to the length of the log 706 | reply.ConflictTerm = 0 // Set the conflict term to 0 707 | } else { 708 | // PrevLogIndex points within our log 709 | // but PrevLogTerm does not match 710 | // rn.log[PrevLogIndex-1]. 711 | 712 | reply.ConflictTerm = rn.log[args.PrevLogIndex-1].Term 713 | 714 | var cfi uint64 715 | for cfi = args.PrevLogIndex - 1; cfi > 0; cfi-- { 716 | if rn.log[cfi-1].Term != reply.ConflictTerm { 717 | break // Break out of the loop when the term mismatches 718 | } 719 | } 720 | reply.ConflictIndex = cfi + 1 // Set the conflict index to the index of the first entry with a with the same term as the conflict term 721 | } 722 | } 723 | } 724 | 725 | reply.Term = rn.currentTerm // Set the term in the reply to the current term 726 | rn.persistToStorage() // Persist the state to storage 727 | rn.debug("AppendEntries reply: %+v", *reply) 728 | return nil // Return no error 729 | } 730 | 731 | // RequestVote Remote Procedure Call is invoked by candidates 732 | // to find out if they can win an election. The RPC returns 733 | // true if the candidate is running and has a higher term than 734 | // the current term. 735 | 736 | func (rn *RaftNode) RequestVote(args RequestVoteArgs, reply *RequestVoteReply) error { 737 | rn.mu.Lock() 738 | defer rn.mu.Unlock() 739 | 740 | if rn.state == Dead || !rn.peerList.Exists(args.CandidateId) { // If the node is dead, we don't need to process this request, since it is stale 741 | return nil 742 | } 743 | 744 | lastLogIndex, lastLogTerm := rn.lastLogIndexAndTerm() // Get the last log index and term 745 | rn.debug("RequestVote: %+v [currentTerm=%d, votedFor=%d, log index/term=(%d, %d)]", args, rn.currentTerm, rn.votedFor, lastLogIndex, lastLogTerm) 746 | 747 | if args.Term > rn.currentTerm { 748 | // If the term is out of date and becoming a follower 749 | // If it's already a follower, the state won't change 750 | // but the other state fields will reset 751 | 752 | rn.debug("Term out of date with term in RequestVote") 753 | rn.becomeFollower(args.Term) 754 | } 755 | 756 | /** 757 | * If the candidate's log is at least as up-to-date as 758 | * our last log entry, and the candidate's term is at 759 | * least as recent as ours, then we can grant the vote 760 | */ 761 | 762 | if rn.currentTerm == args.Term && 763 | (rn.votedFor == -1 || rn.votedFor == int(args.CandidateId)) && 764 | (args.LastLogTerm > lastLogTerm || (args.LastLogTerm == lastLogTerm && args.LastLogIndex >= lastLogIndex)) { 765 | 766 | // If the caller's term is aligned with ours and 767 | // we haven't voted for another candidate yet 768 | // we'll grant the vote. We never grant a vote 769 | // for RPCs from older terms 770 | 771 | reply.VoteGranted = true // Grant the vote to the candidate 772 | rn.votedFor = int(args.CandidateId) // Remember who we voted for 773 | rn.electionResetEvent = time.Now() // Set the Reset Event to current time 774 | 775 | } else { 776 | 777 | // Deny the vote to the candidate 778 | reply.VoteGranted = false 779 | 780 | } 781 | reply.Term = rn.currentTerm // Set the term to the current term 782 | rn.persistToStorage() // Persist the state to storage 783 | rn.debug("RequestVote reply: %+v", reply) 784 | return nil // Return nil error 785 | } 786 | 787 | // becomeFollower makes the current RaftNode a 788 | // follower and resets the state. This function 789 | // expects the mutex of the Node to be locked 790 | 791 | func (rn *RaftNode) becomeFollower(term uint64) { 792 | rn.debug("Becomes Follower with term=%d; log=%v", term, rn.log) 793 | 794 | rn.state = Follower // Update the state to follower 795 | rn.currentTerm = term // Update the term 796 | rn.votedFor = -1 // Reset the votedFor to 0 [0 based index] 797 | rn.electionResetEvent = time.Now() // Set the Reset Event to current time 798 | 799 | go rn.runElectionTimer() // Run another election timer, since we transitioned to follower 800 | } 801 | 802 | // persistToStorage saves all of Raft Node's persistent 803 | // state in Raft Node's database / non volatile storage 804 | // This function expects the mutex of the Node to be locked 805 | 806 | func (rn *RaftNode) persistToStorage() { 807 | // Persist the currentTerm, votedFor and log to storage 808 | 809 | for _, data := range []struct { 810 | name string 811 | value interface{} 812 | }{{"currentTerm", rn.currentTerm}, {"votedFor", rn.votedFor}, {"log", rn.log}} { 813 | 814 | var buf bytes.Buffer // Buffer to hold the data 815 | enc := gob.NewEncoder(&buf) // Create a new encoder 816 | 817 | if err := enc.Encode(data.value); err != nil { // Encode the data 818 | log.Fatal("encode error: ", err) // If there's an error, log it 819 | } 820 | rn.db.Set(data.name, buf.Bytes()) // Save the data to the database 821 | } 822 | } 823 | 824 | // restoreFromStorage saves all of Raft Node's persistent 825 | // state in Raft Node's database / non volatile storage 826 | // This function expects the mutex of the Node to be locked 827 | 828 | func (rn *RaftNode) restoreFromStorage() { 829 | // Persist the currentTerm, votedFor and log to storage 830 | 831 | for _, data := range []struct { 832 | name string 833 | value interface{} 834 | }{{"currentTerm", &rn.currentTerm}, {"votedFor", &rn.votedFor}, {"log", &rn.log}} { 835 | if value, found := rn.db.Get(data.name); found { 836 | 837 | // If the data is found in the database, decode it 838 | dec := gob.NewDecoder(bytes.NewBuffer(value)) // Create a new decoder 839 | if err := dec.Decode(data.value); err != nil { // Decode the data 840 | log.Fatal("decode error: ", err) // If there's an error, log it 841 | } 842 | 843 | } else { 844 | 845 | // If the data is not found in the database 846 | log.Fatal("No data found for", data.name) 847 | 848 | } 849 | } 850 | } 851 | 852 | func (rn *RaftNode) readFromStorage(key string, reply interface{}) error { 853 | if value, found := rn.db.Get(key); found { 854 | // If the data is found in the database, decode it 855 | dec := gob.NewDecoder(bytes.NewBuffer(value)) // Create a new decoder 856 | if err := dec.Decode(reply); err != nil { // Decode the data 857 | return err 858 | } 859 | return nil 860 | } else { 861 | err := fmt.Errorf("KeyNotFound:%v", key) 862 | return err 863 | } 864 | } 865 | 866 | // Submit submits a new command from the client to the RaftNode. This 867 | // function doesn't block; clients read the commit channel passed in 868 | // the constructor to be notified of new committed entries. It returns 869 | // true iff this Raft Node is the leader - in which case the command is 870 | // accepted. If false is returned, the client will have to find a 871 | // different RaftNode to submit this command to. 872 | 873 | func (rn *RaftNode) Submit(command interface{}) (bool, interface{}, error) { 874 | rn.mu.Lock() // Lock the mutex 875 | rn.debug("Submit received by %v: %v", rn.state, command) 876 | 877 | // Process the command only if the node is a leader 878 | if rn.state == Leader { 879 | switch v := command.(type) { 880 | case Read: 881 | key := v.Key 882 | var value int 883 | readErr := rn.readFromStorage(key, &value) 884 | rn.mu.Unlock() 885 | return true, value, readErr 886 | case AddServers: 887 | serverIds := v.ServerIds 888 | for i := 0; i < len(serverIds); i++ { 889 | if rn.peerList.Exists(uint64(serverIds[i])) { 890 | rn.mu.Unlock() 891 | return false, nil, errors.New("server with given serverID already exists") 892 | } 893 | } 894 | rn.log = append(rn.log, LogEntry{Command: command, Term: rn.currentTerm}) // Append the command to the log 895 | 896 | // Updating the configuration for this node. Raft Paper Section 6 mentions that 897 | // "Once a server adds the new configuration to its log, it uses that configuration 898 | // for all future decisions (regardless of whether it has been committed) " 899 | for i := 0; i < len(serverIds); i++ { 900 | rn.peerList.Add(uint64(serverIds[i])) 901 | rn.server.peerList.Add(uint64(serverIds[i])) 902 | rn.nextIndex[uint64(serverIds[i])] = uint64(len(rn.log)) + 1 // Initialize nextIndex for all peers with the last log index (leader) + 1 903 | rn.matchIndex[uint64(serverIds[i])] = 0 // No match index yet 904 | } 905 | // Add code to establish connections 906 | rn.persistToStorage() // Persist the log to storage 907 | rn.debug("log=%v", rn.log) // Debug the log state 908 | rn.mu.Unlock() // Unlock the mutex before returning 909 | rn.trigger <- struct{}{} // Trigger the event for append entries 910 | return true, nil, nil // Return true since we are the leader 911 | case RemoveServers: 912 | serverIds := v.ServerIds 913 | for i := 0; i < len(serverIds); i++ { 914 | if !rn.peerList.Exists(uint64(serverIds[i])) && rn.id != uint64(serverIds[i]) { 915 | rn.mu.Unlock() 916 | return false, nil, errors.New("server with given serverID does not exist") 917 | } 918 | } 919 | rn.log = append(rn.log, LogEntry{Command: command, Term: rn.currentTerm}) // Append the command to the log 920 | 921 | // Updating the configuration for this node. Raft Paper Section 6 mentions that 922 | // "Once a server adds the new configuration to its log, it uses that configuration 923 | // for all future decisions (regardless of whether it has been committed) " 924 | for i := 0; i < len(serverIds); i++ { 925 | if rn.id != uint64(serverIds[i]) { 926 | rn.peerList.Remove(uint64(serverIds[i])) 927 | rn.server.peerList.Remove(uint64(serverIds[i])) 928 | } 929 | } 930 | // Add code to remove connections 931 | rn.persistToStorage() // Persist the log to storage 932 | rn.debug("log=%v", rn.log) // Debug the log state 933 | rn.mu.Unlock() // Unlock the mutex before returning 934 | rn.trigger <- struct{}{} // Trigger the event for append entries 935 | return true, nil, nil // Return true since we are the leader 936 | default: 937 | rn.log = append(rn.log, LogEntry{Command: command, Term: rn.currentTerm}) // Append the command to the log 938 | rn.persistToStorage() // Persist the log to storage 939 | rn.debug("log=%v", rn.log) // Debug the log state 940 | rn.mu.Unlock() // Unlock the mutex before returning 941 | rn.trigger <- struct{}{} // Trigger the event for append entries 942 | return true, nil, nil // Return true since we are the leader 943 | } 944 | } 945 | 946 | rn.mu.Unlock() // Unlock the mutex 947 | return false, nil, nil 948 | } 949 | 950 | // Stop stops this RaftNode, cleaning up its state. This method 951 | // returns quickly, but it may take a bit of time (up to election 952 | // timeout) for all goroutines to exit and fully free its resources 953 | 954 | func (rn *RaftNode) Stop() { 955 | rn.mu.Lock() // Lock the mutex 956 | defer rn.mu.Unlock() // Unlock the mutex 957 | 958 | // Update the state to stopped 959 | rn.state = Dead // Set the state to dead 960 | rn.debug("Becomes Dead") // Debug the state 961 | close(rn.newCommitReady) // Close the channel 962 | } 963 | 964 | // Report reports the current state of the RaftNode 965 | // This function primarily returns the following 966 | // information: 967 | // - The identity of the RaftNode 968 | // - The current term of the RaftNode 969 | // - The boolean indicating whether 970 | // this RaftNode is a leader 971 | 972 | func (rn *RaftNode) Report() (id int, term int, isLeader bool) { 973 | rn.mu.Lock() // Lock the mutex 974 | defer rn.mu.Unlock() // Unlock the mutex 975 | 976 | isLeader = rn.state == Leader // Set the leader flag to true if the node is a leader 977 | return int(rn.id), int(rn.currentTerm), isLeader // Return the id, term and leader flag 978 | } 979 | 980 | // String returns a string representation of the Raft node state. 981 | func (s RNState) String() string { // String returns the string representation of a Raft node state 982 | switch s { 983 | case Follower: 984 | return "Follower" 985 | case Candidate: 986 | return "Candidate" 987 | case Leader: 988 | return "Leader" 989 | case Dead: 990 | return "Dead" 991 | default: // Should never happen 992 | panic("Error: Unknown state") 993 | } 994 | } 995 | --------------------------------------------------------------------------------