├── .gitignore ├── LICENSE ├── README.md ├── part1 ├── dochecks.sh ├── dotest.sh ├── go.mod ├── go.sum ├── raft.go ├── raft_test.go ├── server.go └── testharness.go ├── part2 ├── dochecks.sh ├── dotest.sh ├── go.mod ├── go.sum ├── raft.go ├── raft_test.go ├── server.go └── testharness.go ├── part3 └── raft │ ├── dochecks.sh │ ├── dotest.sh │ ├── go.mod │ ├── go.sum │ ├── raft.go │ ├── raft_test.go │ ├── server.go │ ├── storage.go │ └── testharness.go ├── part4kv ├── api │ └── api.go ├── dochecks.sh ├── dotest.sh ├── go.mod ├── go.sum ├── kvclient │ └── kvclient.go ├── kvservice │ ├── command.go │ ├── datastore.go │ ├── datastore_test.go │ ├── json.go │ └── kvservice.go ├── system_test.go └── testharness.go ├── part5kv ├── api │ └── api.go ├── dochecks.sh ├── dotest.sh ├── go.mod ├── go.sum ├── kvclient │ └── kvclient.go ├── kvservice │ ├── command.go │ ├── datastore.go │ ├── datastore_test.go │ ├── json.go │ └── kvservice.go ├── system_test.go └── testharness.go ├── raftlog-screenshot.png └── tools └── raft-testlog-viz └── main.go /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, built with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | # Dependency directories (remove the comment below to include it) 15 | # vendor/ 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # :rowboat: Raft 2 | 3 | This is an instructional implementation of the Raft distributed consensus 4 | algorithm in Go. It's accompanied by a series of blog posts: 5 | 6 | * [Part 0: Introduction](https://eli.thegreenplace.net/2020/implementing-raft-part-0-introduction/) 7 | * [Part 1: Elections](https://eli.thegreenplace.net/2020/implementing-raft-part-1-elections/) 8 | * [Part 2: Commands and log replication](https://eli.thegreenplace.net/2020/implementing-raft-part-2-commands-and-log-replication/) 9 | * [Part 3: Persistence and optimizations](https://eli.thegreenplace.net/2020/implementing-raft-part-3-persistence-and-optimizations/) 10 | * [Part 4: Key/Value database](https://eli.thegreenplace.net/2024/implementing-raft-part-4-keyvalue-database/) 11 | * [Part 5: exactly-once delivery](https://eli.thegreenplace.net/2024/implementing-raft-part-5-exactly-once-delivery/) 12 | 13 | Each of the `partN` directories in this repository is the complete source code 14 | for Part N of the blog post series (except Part 0, which is introductory and has 15 | no code). There is a lot of duplicated code between the different `partN` 16 | directories - this is a conscious design decision. Rather than abstracting and 17 | reusing parts of the implementation, I opted for keeping the code as simple 18 | as possible. Each directory is completely self contained and can be read and 19 | understood in isolation. Using a graphical diff tool to see the deltas between 20 | the parts can be instructional. 21 | 22 | `part4kv` and `part5kv` contain the code of a Key/Value DB that uses `part3` 23 | as its Raft implementation. 24 | 25 | ## How to use this repository 26 | 27 | You can read the code, but I'd also encourage you to run tests and observe the 28 | logs they print out. The repository contains a useful tool for visualizing 29 | output. Here's a complete usage example: 30 | 31 | ``` 32 | $ cd part1 33 | $ go test -v -race -run TestElectionFollowerComesBack |& tee /tmp/raftlog 34 | ... logging output 35 | ... test should PASS 36 | $ go run ../tools/raft-testlog-viz/main.go < /tmp/raftlog 37 | PASS TestElectionFollowerComesBack map[0:true 1:true 2:true TEST:true] ; entries: 150 38 | ... Emitted file:///tmp/TestElectionFollowerComesBack.html 39 | 40 | PASS 41 | ``` 42 | 43 | Now open `file:///tmp/TestElectionFollowerComesBack.html` in your browser. 44 | You should see something like this: 45 | 46 | ![Image of log browser](https://github.com/eliben/raft/blob/main/raftlog-screenshot.png) 47 | 48 | Scroll and read the logs from the servers, noticing state changes (highlighted 49 | with colors). Feel free to add your own `cm.dlog(...)` calls to the code to 50 | experiment and print out more details. 51 | 52 | ## Changing and testing the code 53 | 54 | Each `partN` directory is completely independent of the others, and is its own 55 | Go module. The Raft code itself has no external dependencies; the only `require` 56 | in its `go.mod` is for a package that enables goroutine leak testing - it's only 57 | used in tests. 58 | 59 | To work on `part2`, for example: 60 | 61 | ``` 62 | $ cd part2 63 | ... make code changes 64 | $ go test -race ./... 65 | ``` 66 | 67 | Depending on the part and your machine, the tests can take up to a minute to 68 | run. Feel free to enable verbose logging with `-v`, and/or used the provided 69 | `dotest.sh` script to run specific tests with log visualization. 70 | 71 | Note: for part3, the actual Raft code is in a `part3/raft` subdirectory, to 72 | be importable from future parts. All the instructions discussed so far work 73 | with part3 as well - you just have to `cd` one directory deeper. 74 | 75 | ## Contributing 76 | 77 | I'm interested in hearing your opinion or suggestions for the code in this 78 | repository. Feel free to open an issue if something is unclear, or if you think 79 | you found a bug. Code contributions through PRs are welcome as well. 80 | -------------------------------------------------------------------------------- /part1/dochecks.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ex 3 | 4 | go vet ./... 5 | staticcheck ./... 6 | -------------------------------------------------------------------------------- /part1/dotest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ex 3 | 4 | logfile=~/temp/rlog 5 | 6 | go test -v -race -run $@ |& tee ${logfile} 7 | 8 | go run ../tools/raft-testlog-viz/main.go < ${logfile} 9 | -------------------------------------------------------------------------------- /part1/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/eliben/raft 2 | 3 | go 1.23.1 4 | 5 | require github.com/fortytw2/leaktest v1.3.0 6 | -------------------------------------------------------------------------------- /part1/go.sum: -------------------------------------------------------------------------------- 1 | github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw= 2 | github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= 3 | -------------------------------------------------------------------------------- /part1/raft.go: -------------------------------------------------------------------------------- 1 | // Core Raft implementation - Consensus Module. 2 | // 3 | // Eli Bendersky [https://eli.thegreenplace.net] 4 | // This code is in the public domain. 5 | package raft 6 | 7 | import ( 8 | "fmt" 9 | "log" 10 | "math/rand" 11 | "os" 12 | "sync" 13 | "time" 14 | ) 15 | 16 | const DebugCM = 1 17 | 18 | type LogEntry struct { 19 | Command any 20 | Term int 21 | } 22 | 23 | type CMState int 24 | 25 | const ( 26 | Follower CMState = iota 27 | Candidate 28 | Leader 29 | Dead 30 | ) 31 | 32 | func (s CMState) String() string { 33 | switch s { 34 | case Follower: 35 | return "Follower" 36 | case Candidate: 37 | return "Candidate" 38 | case Leader: 39 | return "Leader" 40 | case Dead: 41 | return "Dead" 42 | default: 43 | panic("unreachable") 44 | } 45 | } 46 | 47 | // ConsensusModule (CM) implements a single node of Raft consensus. 48 | type ConsensusModule struct { 49 | // mu protects concurrent access to a CM. 50 | mu sync.Mutex 51 | 52 | // id is the server ID of this CM. 53 | id int 54 | 55 | // peerIds lists the IDs of our peers in the cluster. 56 | peerIds []int 57 | 58 | // server is the server containing this CM. It's used to issue RPC calls 59 | // to peers. 60 | server *Server 61 | 62 | // Persistent Raft state on all servers 63 | currentTerm int 64 | votedFor int 65 | log []LogEntry 66 | 67 | // Volatile Raft state on all servers 68 | state CMState 69 | electionResetEvent time.Time 70 | } 71 | 72 | // NewConsensusModule creates a new CM with the given ID, list of peer IDs and 73 | // server. The ready channel signals the CM that all peers are connected and 74 | // it's safe to start its state machine. 75 | func NewConsensusModule(id int, peerIds []int, server *Server, ready <-chan any) *ConsensusModule { 76 | cm := new(ConsensusModule) 77 | cm.id = id 78 | cm.peerIds = peerIds 79 | cm.server = server 80 | cm.state = Follower 81 | cm.votedFor = -1 82 | 83 | go func() { 84 | // The CM is quiescent until ready is signaled; then, it starts a countdown 85 | // for leader election. 86 | <-ready 87 | cm.mu.Lock() 88 | cm.electionResetEvent = time.Now() 89 | cm.mu.Unlock() 90 | cm.runElectionTimer() 91 | }() 92 | 93 | return cm 94 | } 95 | 96 | // Report reports the state of this CM. 97 | func (cm *ConsensusModule) Report() (id int, term int, isLeader bool) { 98 | cm.mu.Lock() 99 | defer cm.mu.Unlock() 100 | return cm.id, cm.currentTerm, cm.state == Leader 101 | } 102 | 103 | // Stop stops this CM, cleaning up its state. This method returns quickly, but 104 | // it may take a bit of time (up to ~election timeout) for all goroutines to 105 | // exit. 106 | func (cm *ConsensusModule) Stop() { 107 | cm.mu.Lock() 108 | defer cm.mu.Unlock() 109 | cm.state = Dead 110 | cm.dlog("becomes Dead") 111 | } 112 | 113 | // dlog logs a debugging message if DebugCM > 0. 114 | func (cm *ConsensusModule) dlog(format string, args ...any) { 115 | if DebugCM > 0 { 116 | format = fmt.Sprintf("[%d] ", cm.id) + format 117 | log.Printf(format, args...) 118 | } 119 | } 120 | 121 | // See figure 2 in the paper. 122 | type RequestVoteArgs struct { 123 | Term int 124 | CandidateId int 125 | LastLogIndex int 126 | LastLogTerm int 127 | } 128 | 129 | type RequestVoteReply struct { 130 | Term int 131 | VoteGranted bool 132 | } 133 | 134 | // RequestVote RPC. 135 | func (cm *ConsensusModule) RequestVote(args RequestVoteArgs, reply *RequestVoteReply) error { 136 | cm.mu.Lock() 137 | defer cm.mu.Unlock() 138 | if cm.state == Dead { 139 | return nil 140 | } 141 | cm.dlog("RequestVote: %+v [currentTerm=%d, votedFor=%d]", args, cm.currentTerm, cm.votedFor) 142 | 143 | if args.Term > cm.currentTerm { 144 | cm.dlog("... term out of date in RequestVote") 145 | cm.becomeFollower(args.Term) 146 | } 147 | 148 | if cm.currentTerm == args.Term && 149 | (cm.votedFor == -1 || cm.votedFor == args.CandidateId) { 150 | reply.VoteGranted = true 151 | cm.votedFor = args.CandidateId 152 | cm.electionResetEvent = time.Now() 153 | } else { 154 | reply.VoteGranted = false 155 | } 156 | reply.Term = cm.currentTerm 157 | cm.dlog("... RequestVote reply: %+v", reply) 158 | return nil 159 | } 160 | 161 | // See figure 2 in the paper. 162 | type AppendEntriesArgs struct { 163 | Term int 164 | LeaderId int 165 | 166 | PrevLogIndex int 167 | PrevLogTerm int 168 | Entries []LogEntry 169 | LeaderCommit int 170 | } 171 | 172 | type AppendEntriesReply struct { 173 | Term int 174 | Success bool 175 | } 176 | 177 | func (cm *ConsensusModule) AppendEntries(args AppendEntriesArgs, reply *AppendEntriesReply) error { 178 | cm.mu.Lock() 179 | defer cm.mu.Unlock() 180 | if cm.state == Dead { 181 | return nil 182 | } 183 | cm.dlog("AppendEntries: %+v", args) 184 | 185 | if args.Term > cm.currentTerm { 186 | cm.dlog("... term out of date in AppendEntries") 187 | cm.becomeFollower(args.Term) 188 | } 189 | 190 | reply.Success = false 191 | if args.Term == cm.currentTerm { 192 | if cm.state != Follower { 193 | cm.becomeFollower(args.Term) 194 | } 195 | cm.electionResetEvent = time.Now() 196 | reply.Success = true 197 | } 198 | 199 | reply.Term = cm.currentTerm 200 | cm.dlog("AppendEntries reply: %+v", *reply) 201 | return nil 202 | } 203 | 204 | // electionTimeout generates a pseudo-random election timeout duration. 205 | func (cm *ConsensusModule) electionTimeout() time.Duration { 206 | // If RAFT_FORCE_MORE_REELECTION is set, stress-test by deliberately 207 | // generating a hard-coded number very often. This will create collisions 208 | // between different servers and force more re-elections. 209 | if len(os.Getenv("RAFT_FORCE_MORE_REELECTION")) > 0 && rand.Intn(3) == 0 { 210 | return time.Duration(150) * time.Millisecond 211 | } else { 212 | return time.Duration(150+rand.Intn(150)) * time.Millisecond 213 | } 214 | } 215 | 216 | // runElectionTimer implements an election timer. It should be launched whenever 217 | // we want to start a timer towards becoming a candidate in a new election. 218 | // 219 | // This function is blocking and should be launched in a separate goroutine; 220 | // it's designed to work for a single (one-shot) election timer, as it exits 221 | // whenever the CM state changes from follower/candidate or the term changes. 222 | func (cm *ConsensusModule) runElectionTimer() { 223 | timeoutDuration := cm.electionTimeout() 224 | cm.mu.Lock() 225 | termStarted := cm.currentTerm 226 | cm.mu.Unlock() 227 | cm.dlog("election timer started (%v), term=%d", timeoutDuration, termStarted) 228 | 229 | // This loops until either: 230 | // - we discover the election timer is no longer needed, or 231 | // - the election timer expires and this CM becomes a candidate 232 | // In a follower, this typically keeps running in the background for the 233 | // duration of the CM's lifetime. 234 | ticker := time.NewTicker(10 * time.Millisecond) 235 | defer ticker.Stop() 236 | for { 237 | <-ticker.C 238 | 239 | cm.mu.Lock() 240 | if cm.state != Candidate && cm.state != Follower { 241 | cm.dlog("in election timer state=%s, bailing out", cm.state) 242 | cm.mu.Unlock() 243 | return 244 | } 245 | 246 | if termStarted != cm.currentTerm { 247 | cm.dlog("in election timer term changed from %d to %d, bailing out", termStarted, cm.currentTerm) 248 | cm.mu.Unlock() 249 | return 250 | } 251 | 252 | // Start an election if we haven't heard from a leader or haven't voted for 253 | // someone for the duration of the timeout. 254 | if elapsed := time.Since(cm.electionResetEvent); elapsed >= timeoutDuration { 255 | cm.startElection() 256 | cm.mu.Unlock() 257 | return 258 | } 259 | cm.mu.Unlock() 260 | } 261 | } 262 | 263 | // startElection starts a new election with this CM as a candidate. 264 | // Expects cm.mu to be locked. 265 | func (cm *ConsensusModule) startElection() { 266 | cm.state = Candidate 267 | cm.currentTerm += 1 268 | savedCurrentTerm := cm.currentTerm 269 | cm.electionResetEvent = time.Now() 270 | cm.votedFor = cm.id 271 | cm.dlog("becomes Candidate (currentTerm=%d); log=%v", savedCurrentTerm, cm.log) 272 | 273 | votesReceived := 1 274 | 275 | // Send RequestVote RPCs to all other servers concurrently. 276 | for _, peerId := range cm.peerIds { 277 | go func() { 278 | args := RequestVoteArgs{ 279 | Term: savedCurrentTerm, 280 | CandidateId: cm.id, 281 | } 282 | var reply RequestVoteReply 283 | 284 | cm.dlog("sending RequestVote to %d: %+v", peerId, args) 285 | if err := cm.server.Call(peerId, "ConsensusModule.RequestVote", args, &reply); err == nil { 286 | cm.mu.Lock() 287 | defer cm.mu.Unlock() 288 | cm.dlog("received RequestVoteReply %+v", reply) 289 | 290 | if cm.state != Candidate { 291 | cm.dlog("while waiting for reply, state = %v", cm.state) 292 | return 293 | } 294 | 295 | if reply.Term > savedCurrentTerm { 296 | cm.dlog("term out of date in RequestVoteReply") 297 | cm.becomeFollower(reply.Term) 298 | return 299 | } else if reply.Term == savedCurrentTerm { 300 | if reply.VoteGranted { 301 | votesReceived += 1 302 | if votesReceived*2 > len(cm.peerIds)+1 { 303 | // Won the election! 304 | cm.dlog("wins election with %d votes", votesReceived) 305 | cm.startLeader() 306 | return 307 | } 308 | } 309 | } 310 | } 311 | }() 312 | } 313 | 314 | // Run another election timer, in case this election is not successful. 315 | go cm.runElectionTimer() 316 | } 317 | 318 | // becomeFollower makes cm a follower and resets its state. 319 | // Expects cm.mu to be locked. 320 | func (cm *ConsensusModule) becomeFollower(term int) { 321 | cm.dlog("becomes Follower with term=%d; log=%v", term, cm.log) 322 | cm.state = Follower 323 | cm.currentTerm = term 324 | cm.votedFor = -1 325 | cm.electionResetEvent = time.Now() 326 | 327 | go cm.runElectionTimer() 328 | } 329 | 330 | // startLeader switches cm into a leader state and begins process of heartbeats. 331 | // Expects cm.mu to be locked. 332 | func (cm *ConsensusModule) startLeader() { 333 | cm.state = Leader 334 | cm.dlog("becomes Leader; term=%d, log=%v", cm.currentTerm, cm.log) 335 | 336 | go func() { 337 | ticker := time.NewTicker(50 * time.Millisecond) 338 | defer ticker.Stop() 339 | 340 | // Send periodic heartbeats, as long as still leader. 341 | for { 342 | cm.leaderSendHeartbeats() 343 | <-ticker.C 344 | 345 | cm.mu.Lock() 346 | if cm.state != Leader { 347 | cm.mu.Unlock() 348 | return 349 | } 350 | cm.mu.Unlock() 351 | } 352 | }() 353 | } 354 | 355 | // leaderSendHeartbeats sends a round of heartbeats to all peers, collects their 356 | // replies and adjusts cm's state. 357 | func (cm *ConsensusModule) leaderSendHeartbeats() { 358 | cm.mu.Lock() 359 | if cm.state != Leader { 360 | cm.mu.Unlock() 361 | return 362 | } 363 | savedCurrentTerm := cm.currentTerm 364 | cm.mu.Unlock() 365 | 366 | for _, peerId := range cm.peerIds { 367 | args := AppendEntriesArgs{ 368 | Term: savedCurrentTerm, 369 | LeaderId: cm.id, 370 | } 371 | go func() { 372 | cm.dlog("sending AppendEntries to %v: ni=%d, args=%+v", peerId, 0, args) 373 | var reply AppendEntriesReply 374 | if err := cm.server.Call(peerId, "ConsensusModule.AppendEntries", args, &reply); err == nil { 375 | cm.mu.Lock() 376 | defer cm.mu.Unlock() 377 | if reply.Term > savedCurrentTerm { 378 | cm.dlog("term out of date in heartbeat reply") 379 | cm.becomeFollower(reply.Term) 380 | return 381 | } 382 | } 383 | }() 384 | } 385 | } 386 | -------------------------------------------------------------------------------- /part1/raft_test.go: -------------------------------------------------------------------------------- 1 | // Eli Bendersky [https://eli.thegreenplace.net] 2 | // This code is in the public domain. 3 | package raft 4 | 5 | import ( 6 | "testing" 7 | "time" 8 | 9 | "github.com/fortytw2/leaktest" 10 | ) 11 | 12 | func TestElectionBasic(t *testing.T) { 13 | h := NewHarness(t, 3) 14 | defer h.Shutdown() 15 | 16 | h.CheckSingleLeader() 17 | } 18 | 19 | func TestElectionLeaderDisconnect(t *testing.T) { 20 | h := NewHarness(t, 3) 21 | defer h.Shutdown() 22 | 23 | origLeaderId, origTerm := h.CheckSingleLeader() 24 | 25 | h.DisconnectPeer(origLeaderId) 26 | sleepMs(350) 27 | 28 | newLeaderId, newTerm := h.CheckSingleLeader() 29 | if newLeaderId == origLeaderId { 30 | t.Errorf("want new leader to be different from orig leader") 31 | } 32 | if newTerm <= origTerm { 33 | t.Errorf("want newTerm <= origTerm, got %d and %d", newTerm, origTerm) 34 | } 35 | } 36 | 37 | func TestElectionLeaderAndAnotherDisconnect(t *testing.T) { 38 | h := NewHarness(t, 3) 39 | defer h.Shutdown() 40 | 41 | origLeaderId, _ := h.CheckSingleLeader() 42 | 43 | h.DisconnectPeer(origLeaderId) 44 | otherId := (origLeaderId + 1) % 3 45 | h.DisconnectPeer(otherId) 46 | 47 | // No quorum. 48 | sleepMs(450) 49 | h.CheckNoLeader() 50 | 51 | // Reconnect one other server; now we'll have quorum. 52 | h.ReconnectPeer(otherId) 53 | h.CheckSingleLeader() 54 | } 55 | 56 | func TestDisconnectAllThenRestore(t *testing.T) { 57 | h := NewHarness(t, 3) 58 | defer h.Shutdown() 59 | 60 | sleepMs(100) 61 | // Disconnect all servers from the start. There will be no leader. 62 | for i := 0; i < 3; i++ { 63 | h.DisconnectPeer(i) 64 | } 65 | sleepMs(450) 66 | h.CheckNoLeader() 67 | 68 | // Reconnect all servers. A leader will be found. 69 | for i := 0; i < 3; i++ { 70 | h.ReconnectPeer(i) 71 | } 72 | h.CheckSingleLeader() 73 | } 74 | 75 | func TestElectionLeaderDisconnectThenReconnect(t *testing.T) { 76 | h := NewHarness(t, 3) 77 | defer h.Shutdown() 78 | origLeaderId, _ := h.CheckSingleLeader() 79 | 80 | h.DisconnectPeer(origLeaderId) 81 | 82 | sleepMs(350) 83 | newLeaderId, newTerm := h.CheckSingleLeader() 84 | 85 | h.ReconnectPeer(origLeaderId) 86 | sleepMs(150) 87 | 88 | againLeaderId, againTerm := h.CheckSingleLeader() 89 | 90 | if newLeaderId != againLeaderId { 91 | t.Errorf("again leader id got %d; want %d", againLeaderId, newLeaderId) 92 | } 93 | if againTerm != newTerm { 94 | t.Errorf("again term got %d; want %d", againTerm, newTerm) 95 | } 96 | } 97 | 98 | func TestElectionLeaderDisconnectThenReconnect5(t *testing.T) { 99 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 100 | 101 | h := NewHarness(t, 5) 102 | defer h.Shutdown() 103 | 104 | origLeaderId, _ := h.CheckSingleLeader() 105 | 106 | h.DisconnectPeer(origLeaderId) 107 | sleepMs(150) 108 | newLeaderId, newTerm := h.CheckSingleLeader() 109 | 110 | h.ReconnectPeer(origLeaderId) 111 | sleepMs(150) 112 | 113 | againLeaderId, againTerm := h.CheckSingleLeader() 114 | 115 | if newLeaderId != againLeaderId { 116 | t.Errorf("again leader id got %d; want %d", againLeaderId, newLeaderId) 117 | } 118 | if againTerm != newTerm { 119 | t.Errorf("again term got %d; want %d", againTerm, newTerm) 120 | } 121 | } 122 | 123 | func TestElectionFollowerComesBack(t *testing.T) { 124 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 125 | 126 | h := NewHarness(t, 3) 127 | defer h.Shutdown() 128 | 129 | origLeaderId, origTerm := h.CheckSingleLeader() 130 | 131 | otherId := (origLeaderId + 1) % 3 132 | h.DisconnectPeer(otherId) 133 | time.Sleep(650 * time.Millisecond) 134 | h.ReconnectPeer(otherId) 135 | sleepMs(150) 136 | 137 | // We can't have an assertion on the new leader id here because it depends 138 | // on the relative election timeouts. We can assert that the term changed, 139 | // however, which implies that re-election has occurred. 140 | _, newTerm := h.CheckSingleLeader() 141 | if newTerm <= origTerm { 142 | t.Errorf("newTerm=%d, origTerm=%d", newTerm, origTerm) 143 | } 144 | } 145 | 146 | func TestElectionDisconnectLoop(t *testing.T) { 147 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 148 | 149 | h := NewHarness(t, 3) 150 | defer h.Shutdown() 151 | 152 | for cycle := 0; cycle < 5; cycle++ { 153 | leaderId, _ := h.CheckSingleLeader() 154 | 155 | h.DisconnectPeer(leaderId) 156 | otherId := (leaderId + 1) % 3 157 | h.DisconnectPeer(otherId) 158 | sleepMs(310) 159 | h.CheckNoLeader() 160 | 161 | // Reconnect both. 162 | h.ReconnectPeer(otherId) 163 | h.ReconnectPeer(leaderId) 164 | 165 | // Give it time to settle 166 | sleepMs(150) 167 | } 168 | } 169 | -------------------------------------------------------------------------------- /part1/server.go: -------------------------------------------------------------------------------- 1 | // Server container for a Raft Consensus Module. Exposes Raft to the network 2 | // and enables RPCs between Raft peers. 3 | // 4 | // Eli Bendersky [https://eli.thegreenplace.net] 5 | // This code is in the public domain. 6 | package raft 7 | 8 | import ( 9 | "fmt" 10 | "log" 11 | "math/rand" 12 | "net" 13 | "net/rpc" 14 | "os" 15 | "sync" 16 | "time" 17 | ) 18 | 19 | // Server wraps a raft.ConsensusModule along with a rpc.Server that exposes its 20 | // methods as RPC endpoints. It also manages the peers of the Raft server. The 21 | // main goal of this type is to simplify the code of raft.Server for 22 | // presentation purposes. raft.ConsensusModule has a *Server to do its peer 23 | // communication and doesn't have to worry about the specifics of running an 24 | // RPC server. 25 | type Server struct { 26 | mu sync.Mutex 27 | 28 | serverId int 29 | peerIds []int 30 | 31 | cm *ConsensusModule 32 | rpcProxy *RPCProxy 33 | 34 | rpcServer *rpc.Server 35 | listener net.Listener 36 | 37 | peerClients map[int]*rpc.Client 38 | 39 | ready <-chan any 40 | quit chan any 41 | wg sync.WaitGroup 42 | } 43 | 44 | func NewServer(serverId int, peerIds []int, ready <-chan any) *Server { 45 | s := new(Server) 46 | s.serverId = serverId 47 | s.peerIds = peerIds 48 | s.peerClients = make(map[int]*rpc.Client) 49 | s.ready = ready 50 | s.quit = make(chan any) 51 | return s 52 | } 53 | 54 | func (s *Server) Serve() { 55 | s.mu.Lock() 56 | s.cm = NewConsensusModule(s.serverId, s.peerIds, s, s.ready) 57 | 58 | // Create a new RPC server and register a RPCProxy that forwards all methods 59 | // to n.cm 60 | s.rpcServer = rpc.NewServer() 61 | s.rpcProxy = &RPCProxy{cm: s.cm} 62 | s.rpcServer.RegisterName("ConsensusModule", s.rpcProxy) 63 | 64 | var err error 65 | s.listener, err = net.Listen("tcp", ":0") 66 | if err != nil { 67 | log.Fatal(err) 68 | } 69 | log.Printf("[%v] listening at %s", s.serverId, s.listener.Addr()) 70 | s.mu.Unlock() 71 | 72 | s.wg.Add(1) 73 | go func() { 74 | defer s.wg.Done() 75 | 76 | for { 77 | conn, err := s.listener.Accept() 78 | if err != nil { 79 | select { 80 | case <-s.quit: 81 | return 82 | default: 83 | log.Fatal("accept error:", err) 84 | } 85 | } 86 | s.wg.Add(1) 87 | go func() { 88 | s.rpcServer.ServeConn(conn) 89 | s.wg.Done() 90 | }() 91 | } 92 | }() 93 | } 94 | 95 | // DisconnectAll closes all the client connections to peers for this server. 96 | func (s *Server) DisconnectAll() { 97 | s.mu.Lock() 98 | defer s.mu.Unlock() 99 | for id := range s.peerClients { 100 | if s.peerClients[id] != nil { 101 | s.peerClients[id].Close() 102 | s.peerClients[id] = nil 103 | } 104 | } 105 | } 106 | 107 | // Shutdown closes the server and waits for it to shut down properly. 108 | func (s *Server) Shutdown() { 109 | s.cm.Stop() 110 | close(s.quit) 111 | s.listener.Close() 112 | s.wg.Wait() 113 | } 114 | 115 | func (s *Server) GetListenAddr() net.Addr { 116 | s.mu.Lock() 117 | defer s.mu.Unlock() 118 | return s.listener.Addr() 119 | } 120 | 121 | func (s *Server) ConnectToPeer(peerId int, addr net.Addr) error { 122 | s.mu.Lock() 123 | defer s.mu.Unlock() 124 | if s.peerClients[peerId] == nil { 125 | client, err := rpc.Dial(addr.Network(), addr.String()) 126 | if err != nil { 127 | return err 128 | } 129 | s.peerClients[peerId] = client 130 | } 131 | return nil 132 | } 133 | 134 | // DisconnectPeer disconnects this server from the peer identified by peerId. 135 | func (s *Server) DisconnectPeer(peerId int) error { 136 | s.mu.Lock() 137 | defer s.mu.Unlock() 138 | if s.peerClients[peerId] != nil { 139 | err := s.peerClients[peerId].Close() 140 | s.peerClients[peerId] = nil 141 | return err 142 | } 143 | return nil 144 | } 145 | 146 | func (s *Server) Call(id int, serviceMethod string, args any, reply any) error { 147 | s.mu.Lock() 148 | peer := s.peerClients[id] 149 | s.mu.Unlock() 150 | 151 | // If this is called after shutdown (where client.Close is called), it will 152 | // return an error. 153 | if peer == nil { 154 | return fmt.Errorf("call client %d after it's closed", id) 155 | } else { 156 | return peer.Call(serviceMethod, args, reply) 157 | } 158 | } 159 | 160 | // RPCProxy is a trivial pass-thru proxy type for ConsensusModule's RPC methods. 161 | // It's useful for: 162 | // - Simulating a small delay in RPC transmission. 163 | // - Avoiding running into https://github.com/golang/go/issues/19957 164 | // - Simulating possible unreliable connections by delaying some messages 165 | // significantly and dropping others when RAFT_UNRELIABLE_RPC is set. 166 | type RPCProxy struct { 167 | cm *ConsensusModule 168 | } 169 | 170 | func (rpp *RPCProxy) RequestVote(args RequestVoteArgs, reply *RequestVoteReply) error { 171 | if len(os.Getenv("RAFT_UNRELIABLE_RPC")) > 0 { 172 | dice := rand.Intn(10) 173 | if dice == 9 { 174 | rpp.cm.dlog("drop RequestVote") 175 | return fmt.Errorf("RPC failed") 176 | } else if dice == 8 { 177 | rpp.cm.dlog("delay RequestVote") 178 | time.Sleep(75 * time.Millisecond) 179 | } 180 | } else { 181 | time.Sleep(time.Duration(1+rand.Intn(5)) * time.Millisecond) 182 | } 183 | return rpp.cm.RequestVote(args, reply) 184 | } 185 | 186 | func (rpp *RPCProxy) AppendEntries(args AppendEntriesArgs, reply *AppendEntriesReply) error { 187 | if len(os.Getenv("RAFT_UNRELIABLE_RPC")) > 0 { 188 | dice := rand.Intn(10) 189 | if dice == 9 { 190 | rpp.cm.dlog("drop AppendEntries") 191 | return fmt.Errorf("RPC failed") 192 | } else if dice == 8 { 193 | rpp.cm.dlog("delay AppendEntries") 194 | time.Sleep(75 * time.Millisecond) 195 | } 196 | } else { 197 | time.Sleep(time.Duration(1+rand.Intn(5)) * time.Millisecond) 198 | } 199 | return rpp.cm.AppendEntries(args, reply) 200 | } 201 | -------------------------------------------------------------------------------- /part1/testharness.go: -------------------------------------------------------------------------------- 1 | // Test harness for writing tests for Raft. 2 | // 3 | // Eli Bendersky [https://eli.thegreenplace.net] 4 | // This code is in the public domain. 5 | package raft 6 | 7 | import ( 8 | "log" 9 | "testing" 10 | "time" 11 | ) 12 | 13 | func init() { 14 | log.SetFlags(log.Ltime | log.Lmicroseconds) 15 | } 16 | 17 | type Harness struct { 18 | // cluster is a list of all the raft servers participating in a cluster. 19 | cluster []*Server 20 | 21 | // connected has a bool per server in cluster, specifying whether this server 22 | // is currently connected to peers (if false, it's partitioned and no messages 23 | // will pass to or from it). 24 | connected []bool 25 | 26 | n int 27 | t *testing.T 28 | } 29 | 30 | // NewHarness creates a new test Harness, initialized with n servers connected 31 | // to each other. 32 | func NewHarness(t *testing.T, n int) *Harness { 33 | ns := make([]*Server, n) 34 | connected := make([]bool, n) 35 | ready := make(chan any) 36 | 37 | // Create all Servers in this cluster, assign ids and peer ids. 38 | for i := 0; i < n; i++ { 39 | peerIds := make([]int, 0) 40 | for p := 0; p < n; p++ { 41 | if p != i { 42 | peerIds = append(peerIds, p) 43 | } 44 | } 45 | 46 | ns[i] = NewServer(i, peerIds, ready) 47 | ns[i].Serve() 48 | } 49 | 50 | // Connect all peers to each other. 51 | for i := 0; i < n; i++ { 52 | for j := 0; j < n; j++ { 53 | if i != j { 54 | ns[i].ConnectToPeer(j, ns[j].GetListenAddr()) 55 | } 56 | } 57 | connected[i] = true 58 | } 59 | close(ready) 60 | 61 | return &Harness{ 62 | cluster: ns, 63 | connected: connected, 64 | n: n, 65 | t: t, 66 | } 67 | } 68 | 69 | // Shutdown shuts down all the servers in the harness and waits for them to 70 | // stop running. 71 | func (h *Harness) Shutdown() { 72 | for i := 0; i < h.n; i++ { 73 | h.cluster[i].DisconnectAll() 74 | h.connected[i] = false 75 | } 76 | for i := 0; i < h.n; i++ { 77 | h.cluster[i].Shutdown() 78 | } 79 | } 80 | 81 | // DisconnectPeer disconnects a server from all other servers in the cluster. 82 | func (h *Harness) DisconnectPeer(id int) { 83 | tlog("Disconnect %d", id) 84 | h.cluster[id].DisconnectAll() 85 | for j := 0; j < h.n; j++ { 86 | if j != id { 87 | h.cluster[j].DisconnectPeer(id) 88 | } 89 | } 90 | h.connected[id] = false 91 | } 92 | 93 | // ReconnectPeer connects a server to all other servers in the cluster. 94 | func (h *Harness) ReconnectPeer(id int) { 95 | tlog("Reconnect %d", id) 96 | for j := 0; j < h.n; j++ { 97 | if j != id { 98 | if err := h.cluster[id].ConnectToPeer(j, h.cluster[j].GetListenAddr()); err != nil { 99 | h.t.Fatal(err) 100 | } 101 | if err := h.cluster[j].ConnectToPeer(id, h.cluster[id].GetListenAddr()); err != nil { 102 | h.t.Fatal(err) 103 | } 104 | } 105 | } 106 | h.connected[id] = true 107 | } 108 | 109 | // CheckSingleLeader checks that only a single server thinks it's the leader. 110 | // Returns the leader's id and term. It retries several times if no leader is 111 | // identified yet. 112 | func (h *Harness) CheckSingleLeader() (int, int) { 113 | for r := 0; r < 5; r++ { 114 | leaderId := -1 115 | leaderTerm := -1 116 | for i := 0; i < h.n; i++ { 117 | if h.connected[i] { 118 | _, term, isLeader := h.cluster[i].cm.Report() 119 | if isLeader { 120 | if leaderId < 0 { 121 | leaderId = i 122 | leaderTerm = term 123 | } else { 124 | h.t.Fatalf("both %d and %d think they're leaders", leaderId, i) 125 | } 126 | } 127 | } 128 | } 129 | if leaderId >= 0 { 130 | return leaderId, leaderTerm 131 | } 132 | time.Sleep(150 * time.Millisecond) 133 | } 134 | 135 | h.t.Fatalf("leader not found") 136 | return -1, -1 137 | } 138 | 139 | // CheckNoLeader checks that no connected server considers itself the leader. 140 | func (h *Harness) CheckNoLeader() { 141 | for i := 0; i < h.n; i++ { 142 | if h.connected[i] { 143 | _, _, isLeader := h.cluster[i].cm.Report() 144 | if isLeader { 145 | h.t.Fatalf("server %d leader; want none", i) 146 | } 147 | } 148 | } 149 | } 150 | 151 | func tlog(format string, a ...any) { 152 | format = "[TEST] " + format 153 | log.Printf(format, a...) 154 | } 155 | 156 | func sleepMs(n int) { 157 | time.Sleep(time.Duration(n) * time.Millisecond) 158 | } 159 | -------------------------------------------------------------------------------- /part2/dochecks.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ex 3 | 4 | go vet ./... 5 | staticcheck ./... 6 | -------------------------------------------------------------------------------- /part2/dotest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | set -e 4 | 5 | logfile=~/temp/rlog 6 | 7 | go test -v -race -run $@ |& tee ${logfile} 8 | 9 | go run ../tools/raft-testlog-viz/main.go < ${logfile} 10 | -------------------------------------------------------------------------------- /part2/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/eliben/raft 2 | 3 | go 1.23.1 4 | 5 | require github.com/fortytw2/leaktest v1.3.0 6 | -------------------------------------------------------------------------------- /part2/go.sum: -------------------------------------------------------------------------------- 1 | github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw= 2 | github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= 3 | -------------------------------------------------------------------------------- /part2/raft_test.go: -------------------------------------------------------------------------------- 1 | // Eli Bendersky [https://eli.thegreenplace.net] 2 | // This code is in the public domain. 3 | package raft 4 | 5 | import ( 6 | "testing" 7 | "time" 8 | 9 | "github.com/fortytw2/leaktest" 10 | ) 11 | 12 | func TestElectionBasic(t *testing.T) { 13 | h := NewHarness(t, 3) 14 | defer h.Shutdown() 15 | 16 | h.CheckSingleLeader() 17 | } 18 | 19 | func TestElectionLeaderDisconnect(t *testing.T) { 20 | h := NewHarness(t, 3) 21 | defer h.Shutdown() 22 | 23 | origLeaderId, origTerm := h.CheckSingleLeader() 24 | 25 | h.DisconnectPeer(origLeaderId) 26 | sleepMs(350) 27 | 28 | newLeaderId, newTerm := h.CheckSingleLeader() 29 | if newLeaderId == origLeaderId { 30 | t.Errorf("want new leader to be different from orig leader") 31 | } 32 | if newTerm <= origTerm { 33 | t.Errorf("want newTerm <= origTerm, got %d and %d", newTerm, origTerm) 34 | } 35 | } 36 | 37 | func TestElectionLeaderAndAnotherDisconnect(t *testing.T) { 38 | h := NewHarness(t, 3) 39 | defer h.Shutdown() 40 | 41 | origLeaderId, _ := h.CheckSingleLeader() 42 | 43 | h.DisconnectPeer(origLeaderId) 44 | otherId := (origLeaderId + 1) % 3 45 | h.DisconnectPeer(otherId) 46 | 47 | // No quorum. 48 | sleepMs(450) 49 | h.CheckNoLeader() 50 | 51 | // Reconnect one other server; now we'll have quorum. 52 | h.ReconnectPeer(otherId) 53 | h.CheckSingleLeader() 54 | } 55 | 56 | func TestDisconnectAllThenRestore(t *testing.T) { 57 | h := NewHarness(t, 3) 58 | defer h.Shutdown() 59 | 60 | sleepMs(100) 61 | // Disconnect all servers from the start. There will be no leader. 62 | for i := 0; i < 3; i++ { 63 | h.DisconnectPeer(i) 64 | } 65 | sleepMs(450) 66 | h.CheckNoLeader() 67 | 68 | // Reconnect all servers. A leader will be found. 69 | for i := 0; i < 3; i++ { 70 | h.ReconnectPeer(i) 71 | } 72 | h.CheckSingleLeader() 73 | } 74 | 75 | func TestElectionLeaderDisconnectThenReconnect(t *testing.T) { 76 | h := NewHarness(t, 3) 77 | defer h.Shutdown() 78 | origLeaderId, _ := h.CheckSingleLeader() 79 | 80 | h.DisconnectPeer(origLeaderId) 81 | 82 | sleepMs(350) 83 | newLeaderId, newTerm := h.CheckSingleLeader() 84 | 85 | h.ReconnectPeer(origLeaderId) 86 | sleepMs(150) 87 | 88 | againLeaderId, againTerm := h.CheckSingleLeader() 89 | 90 | if newLeaderId != againLeaderId { 91 | t.Errorf("again leader id got %d; want %d", againLeaderId, newLeaderId) 92 | } 93 | if againTerm != newTerm { 94 | t.Errorf("again term got %d; want %d", againTerm, newTerm) 95 | } 96 | } 97 | 98 | func TestElectionLeaderDisconnectThenReconnect5(t *testing.T) { 99 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 100 | 101 | h := NewHarness(t, 5) 102 | defer h.Shutdown() 103 | 104 | origLeaderId, _ := h.CheckSingleLeader() 105 | 106 | h.DisconnectPeer(origLeaderId) 107 | sleepMs(150) 108 | newLeaderId, newTerm := h.CheckSingleLeader() 109 | 110 | h.ReconnectPeer(origLeaderId) 111 | sleepMs(150) 112 | 113 | againLeaderId, againTerm := h.CheckSingleLeader() 114 | 115 | if newLeaderId != againLeaderId { 116 | t.Errorf("again leader id got %d; want %d", againLeaderId, newLeaderId) 117 | } 118 | if againTerm != newTerm { 119 | t.Errorf("again term got %d; want %d", againTerm, newTerm) 120 | } 121 | } 122 | 123 | func TestElectionFollowerComesBack(t *testing.T) { 124 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 125 | 126 | h := NewHarness(t, 3) 127 | defer h.Shutdown() 128 | 129 | origLeaderId, origTerm := h.CheckSingleLeader() 130 | 131 | otherId := (origLeaderId + 1) % 3 132 | h.DisconnectPeer(otherId) 133 | time.Sleep(650 * time.Millisecond) 134 | h.ReconnectPeer(otherId) 135 | sleepMs(150) 136 | 137 | // We can't have an assertion on the new leader id here because it depends 138 | // on the relative election timeouts. We can assert that the term changed, 139 | // however, which implies that re-election has occurred. 140 | _, newTerm := h.CheckSingleLeader() 141 | if newTerm <= origTerm { 142 | t.Errorf("newTerm=%d, origTerm=%d", newTerm, origTerm) 143 | } 144 | } 145 | 146 | func TestElectionDisconnectLoop(t *testing.T) { 147 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 148 | 149 | h := NewHarness(t, 3) 150 | defer h.Shutdown() 151 | 152 | for cycle := 0; cycle < 5; cycle++ { 153 | leaderId, _ := h.CheckSingleLeader() 154 | 155 | h.DisconnectPeer(leaderId) 156 | otherId := (leaderId + 1) % 3 157 | h.DisconnectPeer(otherId) 158 | sleepMs(310) 159 | h.CheckNoLeader() 160 | 161 | // Reconnect both. 162 | h.ReconnectPeer(otherId) 163 | h.ReconnectPeer(leaderId) 164 | 165 | // Give it time to settle 166 | sleepMs(150) 167 | } 168 | } 169 | 170 | func TestCommitOneCommand(t *testing.T) { 171 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 172 | 173 | h := NewHarness(t, 3) 174 | defer h.Shutdown() 175 | 176 | origLeaderId, _ := h.CheckSingleLeader() 177 | 178 | tlog("submitting 42 to %d", origLeaderId) 179 | isLeader := h.SubmitToServer(origLeaderId, 42) 180 | if !isLeader { 181 | t.Errorf("want id=%d leader, but it's not", origLeaderId) 182 | } 183 | 184 | sleepMs(150) 185 | h.CheckCommittedN(42, 3) 186 | } 187 | 188 | func TestSubmitNonLeaderFails(t *testing.T) { 189 | h := NewHarness(t, 3) 190 | defer h.Shutdown() 191 | 192 | origLeaderId, _ := h.CheckSingleLeader() 193 | sid := (origLeaderId + 1) % 3 194 | tlog("submitting 42 to %d", sid) 195 | isLeader := h.SubmitToServer(sid, 42) 196 | if isLeader { 197 | t.Errorf("want id=%d !leader, but it is", sid) 198 | } 199 | sleepMs(10) 200 | } 201 | 202 | func TestCommitMultipleCommands(t *testing.T) { 203 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 204 | 205 | h := NewHarness(t, 3) 206 | defer h.Shutdown() 207 | 208 | origLeaderId, _ := h.CheckSingleLeader() 209 | 210 | values := []int{42, 55, 81} 211 | for _, v := range values { 212 | tlog("submitting %d to %d", v, origLeaderId) 213 | isLeader := h.SubmitToServer(origLeaderId, v) 214 | if !isLeader { 215 | t.Errorf("want id=%d leader, but it's not", origLeaderId) 216 | } 217 | sleepMs(100) 218 | } 219 | 220 | sleepMs(150) 221 | nc, i1 := h.CheckCommitted(42) 222 | _, i2 := h.CheckCommitted(55) 223 | if nc != 3 { 224 | t.Errorf("want nc=3, got %d", nc) 225 | } 226 | if i1 >= i2 { 227 | t.Errorf("want i1= i3 { 232 | t.Errorf("want i2 0 { 176 | dice := rand.Intn(10) 177 | if dice == 9 { 178 | rpp.cm.dlog("drop RequestVote") 179 | return fmt.Errorf("RPC failed") 180 | } else if dice == 8 { 181 | rpp.cm.dlog("delay RequestVote") 182 | time.Sleep(75 * time.Millisecond) 183 | } 184 | } else { 185 | time.Sleep(time.Duration(1+rand.Intn(5)) * time.Millisecond) 186 | } 187 | return rpp.cm.RequestVote(args, reply) 188 | } 189 | 190 | func (rpp *RPCProxy) AppendEntries(args AppendEntriesArgs, reply *AppendEntriesReply) error { 191 | if len(os.Getenv("RAFT_UNRELIABLE_RPC")) > 0 { 192 | dice := rand.Intn(10) 193 | if dice == 9 { 194 | rpp.cm.dlog("drop AppendEntries") 195 | return fmt.Errorf("RPC failed") 196 | } else if dice == 8 { 197 | rpp.cm.dlog("delay AppendEntries") 198 | time.Sleep(75 * time.Millisecond) 199 | } 200 | } else { 201 | time.Sleep(time.Duration(1+rand.Intn(5)) * time.Millisecond) 202 | } 203 | return rpp.cm.AppendEntries(args, reply) 204 | } 205 | -------------------------------------------------------------------------------- /part2/testharness.go: -------------------------------------------------------------------------------- 1 | // Test harness for writing tests for Raft. 2 | // 3 | // Eli Bendersky [https://eli.thegreenplace.net] 4 | // This code is in the public domain. 5 | package raft 6 | 7 | import ( 8 | "log" 9 | "sync" 10 | "testing" 11 | "time" 12 | ) 13 | 14 | func init() { 15 | log.SetFlags(log.Ltime | log.Lmicroseconds) 16 | } 17 | 18 | type Harness struct { 19 | mu sync.Mutex 20 | 21 | // cluster is a list of all the raft servers participating in a cluster. 22 | cluster []*Server 23 | 24 | // commitChans has a channel per server in cluster with the commit channel for 25 | // that server. 26 | commitChans []chan CommitEntry 27 | 28 | // commits at index i holds the sequence of commits made by server i so far. 29 | // It is populated by goroutines that listen on the corresponding commitChans 30 | // channel. 31 | commits [][]CommitEntry 32 | 33 | // connected has a bool per server in cluster, specifying whether this server 34 | // is currently connected to peers (if false, it's partitioned and no messages 35 | // will pass to or from it). 36 | connected []bool 37 | 38 | n int 39 | t *testing.T 40 | } 41 | 42 | // NewHarness creates a new test Harness, initialized with n servers connected 43 | // to each other. 44 | func NewHarness(t *testing.T, n int) *Harness { 45 | ns := make([]*Server, n) 46 | connected := make([]bool, n) 47 | commitChans := make([]chan CommitEntry, n) 48 | commits := make([][]CommitEntry, n) 49 | ready := make(chan any) 50 | 51 | // Create all Servers in this cluster, assign ids and peer ids. 52 | for i := 0; i < n; i++ { 53 | peerIds := make([]int, 0) 54 | for p := 0; p < n; p++ { 55 | if p != i { 56 | peerIds = append(peerIds, p) 57 | } 58 | } 59 | 60 | commitChans[i] = make(chan CommitEntry) 61 | ns[i] = NewServer(i, peerIds, ready, commitChans[i]) 62 | ns[i].Serve() 63 | } 64 | 65 | // Connect all peers to each other. 66 | for i := 0; i < n; i++ { 67 | for j := 0; j < n; j++ { 68 | if i != j { 69 | ns[i].ConnectToPeer(j, ns[j].GetListenAddr()) 70 | } 71 | } 72 | connected[i] = true 73 | } 74 | close(ready) 75 | 76 | h := &Harness{ 77 | cluster: ns, 78 | commitChans: commitChans, 79 | commits: commits, 80 | connected: connected, 81 | n: n, 82 | t: t, 83 | } 84 | for i := 0; i < n; i++ { 85 | go h.collectCommits(i) 86 | } 87 | return h 88 | } 89 | 90 | // Shutdown shuts down all the servers in the harness and waits for them to 91 | // stop running. 92 | func (h *Harness) Shutdown() { 93 | for i := 0; i < h.n; i++ { 94 | h.cluster[i].DisconnectAll() 95 | h.connected[i] = false 96 | } 97 | for i := 0; i < h.n; i++ { 98 | h.cluster[i].Shutdown() 99 | } 100 | for i := 0; i < h.n; i++ { 101 | close(h.commitChans[i]) 102 | } 103 | } 104 | 105 | // DisconnectPeer disconnects a server from all other servers in the cluster. 106 | func (h *Harness) DisconnectPeer(id int) { 107 | tlog("Disconnect %d", id) 108 | h.cluster[id].DisconnectAll() 109 | for j := 0; j < h.n; j++ { 110 | if j != id { 111 | h.cluster[j].DisconnectPeer(id) 112 | } 113 | } 114 | h.connected[id] = false 115 | } 116 | 117 | // ReconnectPeer connects a server to all other servers in the cluster. 118 | func (h *Harness) ReconnectPeer(id int) { 119 | tlog("Reconnect %d", id) 120 | for j := 0; j < h.n; j++ { 121 | if j != id { 122 | if err := h.cluster[id].ConnectToPeer(j, h.cluster[j].GetListenAddr()); err != nil { 123 | h.t.Fatal(err) 124 | } 125 | if err := h.cluster[j].ConnectToPeer(id, h.cluster[id].GetListenAddr()); err != nil { 126 | h.t.Fatal(err) 127 | } 128 | } 129 | } 130 | h.connected[id] = true 131 | } 132 | 133 | // CheckSingleLeader checks that only a single server thinks it's the leader. 134 | // Returns the leader's id and term. It retries several times if no leader is 135 | // identified yet. 136 | func (h *Harness) CheckSingleLeader() (int, int) { 137 | for r := 0; r < 8; r++ { 138 | leaderId := -1 139 | leaderTerm := -1 140 | for i := 0; i < h.n; i++ { 141 | if h.connected[i] { 142 | _, term, isLeader := h.cluster[i].cm.Report() 143 | if isLeader { 144 | if leaderId < 0 { 145 | leaderId = i 146 | leaderTerm = term 147 | } else { 148 | h.t.Fatalf("both %d and %d think they're leaders", leaderId, i) 149 | } 150 | } 151 | } 152 | } 153 | if leaderId >= 0 { 154 | return leaderId, leaderTerm 155 | } 156 | time.Sleep(150 * time.Millisecond) 157 | } 158 | 159 | h.t.Fatalf("leader not found") 160 | return -1, -1 161 | } 162 | 163 | // CheckNoLeader checks that no connected server considers itself the leader. 164 | func (h *Harness) CheckNoLeader() { 165 | for i := 0; i < h.n; i++ { 166 | if h.connected[i] { 167 | _, _, isLeader := h.cluster[i].cm.Report() 168 | if isLeader { 169 | h.t.Fatalf("server %d leader; want none", i) 170 | } 171 | } 172 | } 173 | } 174 | 175 | // CheckCommitted verifies that all connected servers have cmd committed with 176 | // the same index. It also verifies that all commands *before* cmd in 177 | // the commit sequence match. For this to work properly, all commands submitted 178 | // to Raft should be unique positive ints. 179 | // Returns the number of servers that have this command committed, and its 180 | // log index. 181 | func (h *Harness) CheckCommitted(cmd int) (nc int, index int) { 182 | h.mu.Lock() 183 | defer h.mu.Unlock() 184 | 185 | // Find the length of the commits slice for connected servers. 186 | commitsLen := -1 187 | for i := 0; i < h.n; i++ { 188 | if h.connected[i] { 189 | if commitsLen >= 0 { 190 | // If this was set already, expect the new length to be the same. 191 | if len(h.commits[i]) != commitsLen { 192 | h.t.Fatalf("commits[%d] = %d, commitsLen = %d", i, h.commits[i], commitsLen) 193 | } 194 | } else { 195 | commitsLen = len(h.commits[i]) 196 | } 197 | } 198 | } 199 | 200 | // Check consistency of commits from the start and to the command we're asked 201 | // about. This loop will return once a command=cmd is found. 202 | for c := 0; c < commitsLen; c++ { 203 | cmdAtC := -1 204 | for i := 0; i < h.n; i++ { 205 | if h.connected[i] { 206 | cmdOfN := h.commits[i][c].Command.(int) 207 | if cmdAtC >= 0 { 208 | if cmdOfN != cmdAtC { 209 | h.t.Errorf("got %d, want %d at h.commits[%d][%d]", cmdOfN, cmdAtC, i, c) 210 | } 211 | } else { 212 | cmdAtC = cmdOfN 213 | } 214 | } 215 | } 216 | if cmdAtC == cmd { 217 | // Check consistency of Index. 218 | index := -1 219 | nc := 0 220 | for i := 0; i < h.n; i++ { 221 | if h.connected[i] { 222 | if index >= 0 && h.commits[i][c].Index != index { 223 | h.t.Errorf("got Index=%d, want %d at h.commits[%d][%d]", h.commits[i][c].Index, index, i, c) 224 | } else { 225 | index = h.commits[i][c].Index 226 | } 227 | nc++ 228 | } 229 | } 230 | return nc, index 231 | } 232 | } 233 | 234 | // If there's no early return, we haven't found the command we were looking 235 | // for. 236 | h.t.Errorf("cmd=%d not found in commits", cmd) 237 | return -1, -1 238 | } 239 | 240 | // CheckCommittedN verifies that cmd was committed by exactly n connected 241 | // servers. 242 | func (h *Harness) CheckCommittedN(cmd int, n int) { 243 | nc, _ := h.CheckCommitted(cmd) 244 | if nc != n { 245 | h.t.Errorf("CheckCommittedN got nc=%d, want %d", nc, n) 246 | } 247 | } 248 | 249 | // CheckNotCommitted verifies that no command equal to cmd has been committed 250 | // by any of the active servers yet. 251 | func (h *Harness) CheckNotCommitted(cmd int) { 252 | h.mu.Lock() 253 | defer h.mu.Unlock() 254 | 255 | for i := 0; i < h.n; i++ { 256 | if h.connected[i] { 257 | for c := 0; c < len(h.commits[i]); c++ { 258 | gotCmd := h.commits[i][c].Command.(int) 259 | if gotCmd == cmd { 260 | h.t.Errorf("found %d at commits[%d][%d], expected none", cmd, i, c) 261 | } 262 | } 263 | } 264 | } 265 | } 266 | 267 | // SubmitToServer submits the command to serverId. 268 | func (h *Harness) SubmitToServer(serverId int, cmd any) bool { 269 | return h.cluster[serverId].cm.Submit(cmd) 270 | } 271 | 272 | func tlog(format string, a ...any) { 273 | format = "[TEST] " + format 274 | log.Printf(format, a...) 275 | } 276 | 277 | func sleepMs(n int) { 278 | time.Sleep(time.Duration(n) * time.Millisecond) 279 | } 280 | 281 | // collectCommits reads channel commitChans[i] and adds all received entries 282 | // to the corresponding commits[i]. It's blocking and should be run in a 283 | // separate goroutine. It returns when commitChans[i] is closed. 284 | func (h *Harness) collectCommits(i int) { 285 | for c := range h.commitChans[i] { 286 | h.mu.Lock() 287 | tlog("collectCommits(%d) got %+v", i, c) 288 | h.commits[i] = append(h.commits[i], c) 289 | h.mu.Unlock() 290 | } 291 | } 292 | -------------------------------------------------------------------------------- /part3/raft/dochecks.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ex 3 | 4 | go vet ./... 5 | staticcheck ./... 6 | -------------------------------------------------------------------------------- /part3/raft/dotest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | set -e 4 | 5 | logfile=~/temp/rlog 6 | 7 | go test -v -race -run $@ |& tee ${logfile} 8 | 9 | go run ../../tools/raft-testlog-viz/main.go < ${logfile} 10 | -------------------------------------------------------------------------------- /part3/raft/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/eliben/raft/part3/raft 2 | 3 | go 1.23.1 4 | 5 | require github.com/fortytw2/leaktest v1.3.0 6 | -------------------------------------------------------------------------------- /part3/raft/go.sum: -------------------------------------------------------------------------------- 1 | github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw= 2 | github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= 3 | -------------------------------------------------------------------------------- /part3/raft/raft_test.go: -------------------------------------------------------------------------------- 1 | // Eli Bendersky [https://eli.thegreenplace.net] 2 | // This code is in the public domain. 3 | package raft 4 | 5 | import ( 6 | "testing" 7 | "time" 8 | 9 | "github.com/fortytw2/leaktest" 10 | ) 11 | 12 | func TestElectionBasic(t *testing.T) { 13 | h := NewHarness(t, 3) 14 | defer h.Shutdown() 15 | 16 | h.CheckSingleLeader() 17 | } 18 | 19 | func TestElectionLeaderDisconnect(t *testing.T) { 20 | h := NewHarness(t, 3) 21 | defer h.Shutdown() 22 | 23 | origLeaderId, origTerm := h.CheckSingleLeader() 24 | 25 | h.DisconnectPeer(origLeaderId) 26 | sleepMs(350) 27 | 28 | newLeaderId, newTerm := h.CheckSingleLeader() 29 | if newLeaderId == origLeaderId { 30 | t.Errorf("want new leader to be different from orig leader") 31 | } 32 | if newTerm <= origTerm { 33 | t.Errorf("want newTerm <= origTerm, got %d and %d", newTerm, origTerm) 34 | } 35 | } 36 | 37 | func TestElectionLeaderAndAnotherDisconnect(t *testing.T) { 38 | h := NewHarness(t, 3) 39 | defer h.Shutdown() 40 | 41 | origLeaderId, _ := h.CheckSingleLeader() 42 | 43 | h.DisconnectPeer(origLeaderId) 44 | otherId := (origLeaderId + 1) % 3 45 | h.DisconnectPeer(otherId) 46 | 47 | // No quorum. 48 | sleepMs(450) 49 | h.CheckNoLeader() 50 | 51 | // Reconnect one other server; now we'll have quorum. 52 | h.ReconnectPeer(otherId) 53 | h.CheckSingleLeader() 54 | } 55 | 56 | func TestDisconnectAllThenRestore(t *testing.T) { 57 | h := NewHarness(t, 3) 58 | defer h.Shutdown() 59 | 60 | sleepMs(100) 61 | // Disconnect all servers from the start. There will be no leader. 62 | for i := 0; i < 3; i++ { 63 | h.DisconnectPeer(i) 64 | } 65 | sleepMs(450) 66 | h.CheckNoLeader() 67 | 68 | // Reconnect all servers. A leader will be found. 69 | for i := 0; i < 3; i++ { 70 | h.ReconnectPeer(i) 71 | } 72 | h.CheckSingleLeader() 73 | } 74 | 75 | func TestElectionLeaderDisconnectThenReconnect(t *testing.T) { 76 | h := NewHarness(t, 3) 77 | defer h.Shutdown() 78 | origLeaderId, _ := h.CheckSingleLeader() 79 | 80 | h.DisconnectPeer(origLeaderId) 81 | 82 | sleepMs(350) 83 | newLeaderId, newTerm := h.CheckSingleLeader() 84 | 85 | h.ReconnectPeer(origLeaderId) 86 | sleepMs(150) 87 | 88 | againLeaderId, againTerm := h.CheckSingleLeader() 89 | 90 | if newLeaderId != againLeaderId { 91 | t.Errorf("again leader id got %d; want %d", againLeaderId, newLeaderId) 92 | } 93 | if againTerm != newTerm { 94 | t.Errorf("again term got %d; want %d", againTerm, newTerm) 95 | } 96 | } 97 | 98 | func TestElectionLeaderDisconnectThenReconnect5(t *testing.T) { 99 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 100 | 101 | h := NewHarness(t, 5) 102 | defer h.Shutdown() 103 | 104 | origLeaderId, _ := h.CheckSingleLeader() 105 | 106 | h.DisconnectPeer(origLeaderId) 107 | sleepMs(150) 108 | newLeaderId, newTerm := h.CheckSingleLeader() 109 | 110 | h.ReconnectPeer(origLeaderId) 111 | sleepMs(150) 112 | 113 | againLeaderId, againTerm := h.CheckSingleLeader() 114 | 115 | if newLeaderId != againLeaderId { 116 | t.Errorf("again leader id got %d; want %d", againLeaderId, newLeaderId) 117 | } 118 | if againTerm != newTerm { 119 | t.Errorf("again term got %d; want %d", againTerm, newTerm) 120 | } 121 | } 122 | 123 | func TestElectionFollowerComesBack(t *testing.T) { 124 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 125 | 126 | h := NewHarness(t, 3) 127 | defer h.Shutdown() 128 | 129 | origLeaderId, origTerm := h.CheckSingleLeader() 130 | 131 | otherId := (origLeaderId + 1) % 3 132 | h.DisconnectPeer(otherId) 133 | time.Sleep(650 * time.Millisecond) 134 | h.ReconnectPeer(otherId) 135 | sleepMs(150) 136 | 137 | // We can't have an assertion on the new leader id here because it depends 138 | // on the relative election timeouts. We can assert that the term changed, 139 | // however, which implies that re-election has occurred. 140 | _, newTerm := h.CheckSingleLeader() 141 | if newTerm <= origTerm { 142 | t.Errorf("newTerm=%d, origTerm=%d", newTerm, origTerm) 143 | } 144 | } 145 | 146 | func TestElectionDisconnectLoop(t *testing.T) { 147 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 148 | 149 | h := NewHarness(t, 3) 150 | defer h.Shutdown() 151 | 152 | for cycle := 0; cycle < 5; cycle++ { 153 | leaderId, _ := h.CheckSingleLeader() 154 | 155 | h.DisconnectPeer(leaderId) 156 | otherId := (leaderId + 1) % 3 157 | h.DisconnectPeer(otherId) 158 | sleepMs(310) 159 | h.CheckNoLeader() 160 | 161 | // Reconnect both. 162 | h.ReconnectPeer(otherId) 163 | h.ReconnectPeer(leaderId) 164 | 165 | // Give it time to settle 166 | sleepMs(150) 167 | } 168 | } 169 | 170 | func TestCommitOneCommand(t *testing.T) { 171 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 172 | 173 | h := NewHarness(t, 3) 174 | defer h.Shutdown() 175 | 176 | origLeaderId, _ := h.CheckSingleLeader() 177 | 178 | tlog("submitting 42 to %d", origLeaderId) 179 | isLeader := h.SubmitToServer(origLeaderId, 42) >= 0 180 | if !isLeader { 181 | t.Errorf("want id=%d leader, but it's not", origLeaderId) 182 | } 183 | 184 | sleepMs(250) 185 | h.CheckCommittedN(42, 3) 186 | } 187 | 188 | func TestCommitAfterCallDrops(t *testing.T) { 189 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 190 | 191 | h := NewHarness(t, 3) 192 | defer h.Shutdown() 193 | 194 | lid, _ := h.CheckSingleLeader() 195 | h.PeerDropCallsAfterN(lid, 2) 196 | h.SubmitToServer(lid, 99) 197 | sleepMs(30) 198 | h.PeerDontDropCalls(lid) 199 | 200 | sleepMs(60) 201 | h.CheckCommittedN(99, 3) 202 | } 203 | 204 | func TestSubmitNonLeaderFails(t *testing.T) { 205 | h := NewHarness(t, 3) 206 | defer h.Shutdown() 207 | 208 | origLeaderId, _ := h.CheckSingleLeader() 209 | sid := (origLeaderId + 1) % 3 210 | tlog("submitting 42 to %d", sid) 211 | isLeader := h.SubmitToServer(sid, 42) >= 0 212 | if isLeader { 213 | t.Errorf("want id=%d !leader, but it is", sid) 214 | } 215 | sleepMs(10) 216 | } 217 | 218 | func TestCommitMultipleCommands(t *testing.T) { 219 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 220 | 221 | h := NewHarness(t, 3) 222 | defer h.Shutdown() 223 | 224 | origLeaderId, _ := h.CheckSingleLeader() 225 | 226 | values := []int{42, 55, 81} 227 | for _, v := range values { 228 | tlog("submitting %d to %d", v, origLeaderId) 229 | isLeader := h.SubmitToServer(origLeaderId, v) >= 0 230 | if !isLeader { 231 | t.Errorf("want id=%d leader, but it's not", origLeaderId) 232 | } 233 | sleepMs(100) 234 | } 235 | 236 | sleepMs(250) 237 | nc, i1 := h.CheckCommitted(42) 238 | _, i2 := h.CheckCommitted(55) 239 | if nc != 3 { 240 | t.Errorf("want nc=3, got %d", nc) 241 | } 242 | if i1 >= i2 { 243 | t.Errorf("want i1= i3 { 248 | t.Errorf("want i20: means we'll start dropping calls after this number is made 198 | numCallsBeforeDrop int 199 | } 200 | 201 | func NewProxy(cm *ConsensusModule) *RPCProxy { 202 | return &RPCProxy{ 203 | cm: cm, 204 | numCallsBeforeDrop: -1, 205 | } 206 | } 207 | 208 | func (rpp *RPCProxy) RequestVote(args RequestVoteArgs, reply *RequestVoteReply) error { 209 | if len(os.Getenv("RAFT_UNRELIABLE_RPC")) > 0 { 210 | dice := rand.Intn(10) 211 | if dice == 9 { 212 | rpp.cm.dlog("drop RequestVote") 213 | return fmt.Errorf("RPC failed") 214 | } else if dice == 8 { 215 | rpp.cm.dlog("delay RequestVote") 216 | time.Sleep(75 * time.Millisecond) 217 | } 218 | } else { 219 | time.Sleep(time.Duration(1+rand.Intn(5)) * time.Millisecond) 220 | } 221 | return rpp.cm.RequestVote(args, reply) 222 | } 223 | 224 | func (rpp *RPCProxy) AppendEntries(args AppendEntriesArgs, reply *AppendEntriesReply) error { 225 | if len(os.Getenv("RAFT_UNRELIABLE_RPC")) > 0 { 226 | dice := rand.Intn(10) 227 | if dice == 9 { 228 | rpp.cm.dlog("drop AppendEntries") 229 | return fmt.Errorf("RPC failed") 230 | } else if dice == 8 { 231 | rpp.cm.dlog("delay AppendEntries") 232 | time.Sleep(75 * time.Millisecond) 233 | } 234 | } else { 235 | time.Sleep(time.Duration(1+rand.Intn(5)) * time.Millisecond) 236 | } 237 | return rpp.cm.AppendEntries(args, reply) 238 | } 239 | 240 | func (rpp *RPCProxy) Call(peer *rpc.Client, method string, args any, reply any) error { 241 | rpp.mu.Lock() 242 | if rpp.numCallsBeforeDrop == 0 { 243 | rpp.mu.Unlock() 244 | rpp.cm.dlog("drop Call %s: %v", method, args) 245 | return fmt.Errorf("RPC failed") 246 | } else { 247 | if rpp.numCallsBeforeDrop > 0 { 248 | rpp.numCallsBeforeDrop-- 249 | } 250 | rpp.mu.Unlock() 251 | return peer.Call(method, args, reply) 252 | } 253 | } 254 | 255 | // DropCallsAfterN instruct the proxy to drop calls after n are made from this 256 | // point. 257 | func (rpp *RPCProxy) DropCallsAfterN(n int) { 258 | rpp.mu.Lock() 259 | defer rpp.mu.Unlock() 260 | 261 | rpp.numCallsBeforeDrop = n 262 | } 263 | 264 | func (rpp *RPCProxy) DontDropCalls() { 265 | rpp.mu.Lock() 266 | defer rpp.mu.Unlock() 267 | 268 | rpp.numCallsBeforeDrop = -1 269 | } 270 | -------------------------------------------------------------------------------- /part3/raft/storage.go: -------------------------------------------------------------------------------- 1 | // Eli Bendersky [https://eli.thegreenplace.net] 2 | // This code is in the public domain. 3 | package raft 4 | 5 | import "sync" 6 | 7 | // Storage is an interface implemented by stable storage providers. 8 | type Storage interface { 9 | Set(key string, value []byte) 10 | 11 | Get(key string) ([]byte, bool) 12 | 13 | // HasData returns true iff any Sets were made on this Storage. 14 | HasData() bool 15 | } 16 | 17 | // MapStorage is a simple in-memory implementation of Storage for testing. 18 | type MapStorage struct { 19 | mu sync.Mutex 20 | m map[string][]byte 21 | } 22 | 23 | func NewMapStorage() *MapStorage { 24 | m := make(map[string][]byte) 25 | return &MapStorage{ 26 | m: m, 27 | } 28 | } 29 | 30 | func (ms *MapStorage) Get(key string) ([]byte, bool) { 31 | ms.mu.Lock() 32 | defer ms.mu.Unlock() 33 | v, found := ms.m[key] 34 | return v, found 35 | } 36 | 37 | func (ms *MapStorage) Set(key string, value []byte) { 38 | ms.mu.Lock() 39 | defer ms.mu.Unlock() 40 | ms.m[key] = value 41 | } 42 | 43 | func (ms *MapStorage) HasData() bool { 44 | ms.mu.Lock() 45 | defer ms.mu.Unlock() 46 | return len(ms.m) > 0 47 | } 48 | -------------------------------------------------------------------------------- /part3/raft/testharness.go: -------------------------------------------------------------------------------- 1 | // Test harness for writing tests for Raft. 2 | // 3 | // Eli Bendersky [https://eli.thegreenplace.net] 4 | // This code is in the public domain. 5 | package raft 6 | 7 | import ( 8 | "log" 9 | "sync" 10 | "testing" 11 | "time" 12 | ) 13 | 14 | func init() { 15 | log.SetFlags(log.Ltime | log.Lmicroseconds) 16 | } 17 | 18 | type Harness struct { 19 | mu sync.Mutex 20 | 21 | // cluster is a list of all the raft servers participating in a cluster. 22 | cluster []*Server 23 | storage []*MapStorage 24 | 25 | // commitChans has a channel per server in cluster with the commit channel for 26 | // that server. 27 | commitChans []chan CommitEntry 28 | 29 | // commits at index i holds the sequence of commits made by server i so far. 30 | // It is populated by goroutines that listen on the corresponding commitChans 31 | // channel. 32 | commits [][]CommitEntry 33 | 34 | // connected has a bool per server in cluster, specifying whether this server 35 | // is currently connected to peers (if false, it's partitioned and no messages 36 | // will pass to or from it). 37 | connected []bool 38 | 39 | // alive has a bool per server in cluster, specifying whether this server is 40 | // currently alive (false means it has crashed and wasn't restarted yet). 41 | // connected implies alive. 42 | alive []bool 43 | 44 | n int 45 | t *testing.T 46 | } 47 | 48 | // NewHarness creates a new test Harness, initialized with n servers connected 49 | // to each other. 50 | func NewHarness(t *testing.T, n int) *Harness { 51 | ns := make([]*Server, n) 52 | connected := make([]bool, n) 53 | alive := make([]bool, n) 54 | commitChans := make([]chan CommitEntry, n) 55 | commits := make([][]CommitEntry, n) 56 | ready := make(chan any) 57 | storage := make([]*MapStorage, n) 58 | 59 | // Create all Servers in this cluster, assign ids and peer ids. 60 | for i := 0; i < n; i++ { 61 | peerIds := make([]int, 0) 62 | for p := 0; p < n; p++ { 63 | if p != i { 64 | peerIds = append(peerIds, p) 65 | } 66 | } 67 | 68 | storage[i] = NewMapStorage() 69 | commitChans[i] = make(chan CommitEntry) 70 | ns[i] = NewServer(i, peerIds, storage[i], ready, commitChans[i]) 71 | ns[i].Serve() 72 | alive[i] = true 73 | } 74 | 75 | // Connect all peers to each other. 76 | for i := 0; i < n; i++ { 77 | for j := 0; j < n; j++ { 78 | if i != j { 79 | ns[i].ConnectToPeer(j, ns[j].GetListenAddr()) 80 | } 81 | } 82 | connected[i] = true 83 | } 84 | close(ready) 85 | 86 | h := &Harness{ 87 | cluster: ns, 88 | storage: storage, 89 | commitChans: commitChans, 90 | commits: commits, 91 | connected: connected, 92 | alive: alive, 93 | n: n, 94 | t: t, 95 | } 96 | for i := 0; i < n; i++ { 97 | go h.collectCommits(i) 98 | } 99 | return h 100 | } 101 | 102 | // Shutdown shuts down all the servers in the harness and waits for them to 103 | // stop running. 104 | func (h *Harness) Shutdown() { 105 | for i := 0; i < h.n; i++ { 106 | h.cluster[i].DisconnectAll() 107 | h.connected[i] = false 108 | } 109 | for i := 0; i < h.n; i++ { 110 | if h.alive[i] { 111 | h.alive[i] = false 112 | h.cluster[i].Shutdown() 113 | } 114 | } 115 | for i := 0; i < h.n; i++ { 116 | close(h.commitChans[i]) 117 | } 118 | } 119 | 120 | // DisconnectPeer disconnects a server from all other servers in the cluster. 121 | func (h *Harness) DisconnectPeer(id int) { 122 | tlog("Disconnect %d", id) 123 | h.cluster[id].DisconnectAll() 124 | for j := 0; j < h.n; j++ { 125 | if j != id { 126 | h.cluster[j].DisconnectPeer(id) 127 | } 128 | } 129 | h.connected[id] = false 130 | } 131 | 132 | // ReconnectPeer connects a server to all other servers in the cluster. 133 | func (h *Harness) ReconnectPeer(id int) { 134 | tlog("Reconnect %d", id) 135 | for j := 0; j < h.n; j++ { 136 | if j != id && h.alive[j] { 137 | if err := h.cluster[id].ConnectToPeer(j, h.cluster[j].GetListenAddr()); err != nil { 138 | h.t.Fatal(err) 139 | } 140 | if err := h.cluster[j].ConnectToPeer(id, h.cluster[id].GetListenAddr()); err != nil { 141 | h.t.Fatal(err) 142 | } 143 | } 144 | } 145 | h.connected[id] = true 146 | } 147 | 148 | // CrashPeer "crashes" a server by disconnecting it from all peers and then 149 | // asking it to shut down. We're not going to use the same server instance 150 | // again, but its storage is retained. 151 | func (h *Harness) CrashPeer(id int) { 152 | tlog("Crash %d", id) 153 | h.DisconnectPeer(id) 154 | h.alive[id] = false 155 | h.cluster[id].Shutdown() 156 | 157 | // Clear out the commits slice for the crashed server; Raft assumes the client 158 | // has no persistent state. Once this server comes back online it will replay 159 | // the whole log to us. 160 | h.mu.Lock() 161 | h.commits[id] = h.commits[id][:0] 162 | h.mu.Unlock() 163 | } 164 | 165 | // RestartPeer "restarts" a server by creating a new Server instance and giving 166 | // it the appropriate storage, reconnecting it to peers. 167 | func (h *Harness) RestartPeer(id int) { 168 | if h.alive[id] { 169 | log.Fatalf("id=%d is alive in RestartPeer", id) 170 | } 171 | tlog("Restart %d", id) 172 | 173 | peerIds := make([]int, 0) 174 | for p := 0; p < h.n; p++ { 175 | if p != id { 176 | peerIds = append(peerIds, p) 177 | } 178 | } 179 | 180 | ready := make(chan any) 181 | h.cluster[id] = NewServer(id, peerIds, h.storage[id], ready, h.commitChans[id]) 182 | h.cluster[id].Serve() 183 | h.ReconnectPeer(id) 184 | close(ready) 185 | h.alive[id] = true 186 | sleepMs(20) 187 | } 188 | 189 | // PeerDropCallsAfterN instructs peer `id` to drop calls after the next `n` 190 | // are made. 191 | func (h *Harness) PeerDropCallsAfterN(id int, n int) { 192 | tlog("peer %d drop calls after %d", id, n) 193 | h.cluster[id].Proxy().DropCallsAfterN(n) 194 | } 195 | 196 | // PeerDontDropCalls instructs peer `id` to stop dropping calls. 197 | func (h *Harness) PeerDontDropCalls(id int) { 198 | tlog("peer %d don't drop calls") 199 | h.cluster[id].Proxy().DontDropCalls() 200 | } 201 | 202 | // CheckSingleLeader checks that only a single server thinks it's the leader. 203 | // Returns the leader's id and term. It retries several times if no leader is 204 | // identified yet. 205 | func (h *Harness) CheckSingleLeader() (int, int) { 206 | for r := 0; r < 8; r++ { 207 | leaderId := -1 208 | leaderTerm := -1 209 | for i := 0; i < h.n; i++ { 210 | if h.connected[i] { 211 | _, term, isLeader := h.cluster[i].cm.Report() 212 | if isLeader { 213 | if leaderId < 0 { 214 | leaderId = i 215 | leaderTerm = term 216 | } else { 217 | h.t.Fatalf("both %d and %d think they're leaders", leaderId, i) 218 | } 219 | } 220 | } 221 | } 222 | if leaderId >= 0 { 223 | return leaderId, leaderTerm 224 | } 225 | time.Sleep(150 * time.Millisecond) 226 | } 227 | 228 | h.t.Fatalf("leader not found") 229 | return -1, -1 230 | } 231 | 232 | // CheckNoLeader checks that no connected server considers itself the leader. 233 | func (h *Harness) CheckNoLeader() { 234 | for i := 0; i < h.n; i++ { 235 | if h.connected[i] { 236 | _, _, isLeader := h.cluster[i].cm.Report() 237 | if isLeader { 238 | h.t.Fatalf("server %d leader; want none", i) 239 | } 240 | } 241 | } 242 | } 243 | 244 | // CheckCommitted verifies that all connected servers have cmd committed with 245 | // the same index. It also verifies that all commands *before* cmd in 246 | // the commit sequence match. For this to work properly, all commands submitted 247 | // to Raft should be unique positive ints. 248 | // Returns the number of servers that have this command committed, and its 249 | // log index. 250 | func (h *Harness) CheckCommitted(cmd int) (nc int, index int) { 251 | h.t.Helper() 252 | h.mu.Lock() 253 | defer h.mu.Unlock() 254 | 255 | // Find the length of the commits slice for connected servers. 256 | commitsLen := -1 257 | for i := 0; i < h.n; i++ { 258 | if h.connected[i] { 259 | if commitsLen >= 0 { 260 | // If this was set already, expect the new length to be the same. 261 | if len(h.commits[i]) != commitsLen { 262 | h.t.Fatalf("commits[%d] = %d, commitsLen = %d", i, h.commits[i], commitsLen) 263 | } 264 | } else { 265 | commitsLen = len(h.commits[i]) 266 | } 267 | } 268 | } 269 | 270 | // Check consistency of commits from the start and to the command we're asked 271 | // about. This loop will return once a command=cmd is found. 272 | for c := 0; c < commitsLen; c++ { 273 | cmdAtC := -1 274 | for i := 0; i < h.n; i++ { 275 | if h.connected[i] { 276 | cmdOfN := h.commits[i][c].Command.(int) 277 | if cmdAtC >= 0 { 278 | if cmdOfN != cmdAtC { 279 | h.t.Errorf("got %d, want %d at h.commits[%d][%d]", cmdOfN, cmdAtC, i, c) 280 | } 281 | } else { 282 | cmdAtC = cmdOfN 283 | } 284 | } 285 | } 286 | if cmdAtC == cmd { 287 | // Check consistency of Index. 288 | index := -1 289 | nc := 0 290 | for i := 0; i < h.n; i++ { 291 | if h.connected[i] { 292 | if index >= 0 && h.commits[i][c].Index != index { 293 | h.t.Errorf("got Index=%d, want %d at h.commits[%d][%d]", h.commits[i][c].Index, index, i, c) 294 | } else { 295 | index = h.commits[i][c].Index 296 | } 297 | nc++ 298 | } 299 | } 300 | return nc, index 301 | } 302 | } 303 | 304 | // If there's no early return, we haven't found the command we were looking 305 | // for. 306 | h.t.Errorf("cmd=%d not found in commits", cmd) 307 | return -1, -1 308 | } 309 | 310 | // CheckCommittedN verifies that cmd was committed by exactly n connected 311 | // servers. 312 | func (h *Harness) CheckCommittedN(cmd int, n int) { 313 | h.t.Helper() 314 | nc, _ := h.CheckCommitted(cmd) 315 | if nc != n { 316 | h.t.Errorf("CheckCommittedN got nc=%d, want %d", nc, n) 317 | } 318 | } 319 | 320 | // CheckNotCommitted verifies that no command equal to cmd has been committed 321 | // by any of the active servers yet. 322 | func (h *Harness) CheckNotCommitted(cmd int) { 323 | h.t.Helper() 324 | h.mu.Lock() 325 | defer h.mu.Unlock() 326 | 327 | for i := 0; i < h.n; i++ { 328 | if h.connected[i] { 329 | for c := 0; c < len(h.commits[i]); c++ { 330 | gotCmd := h.commits[i][c].Command.(int) 331 | if gotCmd == cmd { 332 | h.t.Errorf("found %d at commits[%d][%d], expected none", cmd, i, c) 333 | } 334 | } 335 | } 336 | } 337 | } 338 | 339 | // SubmitToServer submits the command to serverId. 340 | func (h *Harness) SubmitToServer(serverId int, cmd any) int { 341 | return h.cluster[serverId].Submit(cmd) 342 | } 343 | 344 | func tlog(format string, a ...any) { 345 | format = "[TEST] " + format 346 | log.Printf(format, a...) 347 | } 348 | 349 | func sleepMs(n int) { 350 | time.Sleep(time.Duration(n) * time.Millisecond) 351 | } 352 | 353 | // collectCommits reads channel commitChans[i] and adds all received entries 354 | // to the corresponding commits[i]. It's blocking and should be run in a 355 | // separate goroutine. It returns when commitChans[i] is closed. 356 | func (h *Harness) collectCommits(i int) { 357 | for c := range h.commitChans[i] { 358 | h.mu.Lock() 359 | tlog("collectCommits(%d) got %+v", i, c) 360 | h.commits[i] = append(h.commits[i], c) 361 | h.mu.Unlock() 362 | } 363 | } 364 | -------------------------------------------------------------------------------- /part4kv/api/api.go: -------------------------------------------------------------------------------- 1 | // REST API data types for the KV service. 2 | // 3 | // Eli Bendersky [https://eli.thegreenplace.net] 4 | // This code is in the public domain. 5 | package api 6 | 7 | // Defines the data structures used in the REST API between kvservice and 8 | // clients. These structs are JSON-encoded into the body of HTTP requests 9 | // and responses passed between services and clients. 10 | // Uses bespoke ResponseStatus per response instead of HTTP status 11 | // codes because some statuses like "not leader" or "failed commit" don't have a 12 | // good match in standard HTTP status codes. 13 | 14 | type PutRequest struct { 15 | Key string 16 | Value string 17 | } 18 | 19 | type Response interface { 20 | Status() ResponseStatus 21 | } 22 | 23 | type PutResponse struct { 24 | RespStatus ResponseStatus 25 | KeyFound bool 26 | PrevValue string 27 | } 28 | 29 | func (pr *PutResponse) Status() ResponseStatus { 30 | return pr.RespStatus 31 | } 32 | 33 | type GetRequest struct { 34 | Key string 35 | } 36 | 37 | type GetResponse struct { 38 | RespStatus ResponseStatus 39 | KeyFound bool 40 | Value string 41 | } 42 | 43 | func (gr *GetResponse) Status() ResponseStatus { 44 | return gr.RespStatus 45 | } 46 | 47 | type CASRequest struct { 48 | Key string 49 | CompareValue string 50 | Value string 51 | } 52 | 53 | type CASResponse struct { 54 | RespStatus ResponseStatus 55 | KeyFound bool 56 | PrevValue string 57 | } 58 | 59 | func (cr *CASResponse) Status() ResponseStatus { 60 | return cr.RespStatus 61 | } 62 | 63 | type ResponseStatus int 64 | 65 | const ( 66 | StatusInvalid ResponseStatus = iota 67 | StatusOK 68 | StatusNotLeader 69 | StatusFailedCommit 70 | ) 71 | 72 | var responseName = map[ResponseStatus]string{ 73 | StatusInvalid: "invalid", 74 | StatusOK: "OK", 75 | StatusNotLeader: "NotLeader", 76 | StatusFailedCommit: "FailedCommit", 77 | } 78 | 79 | func (rs ResponseStatus) String() string { 80 | return responseName[rs] 81 | } 82 | -------------------------------------------------------------------------------- /part4kv/dochecks.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ex 3 | 4 | go vet ./... 5 | staticcheck ./... 6 | -------------------------------------------------------------------------------- /part4kv/dotest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | set -e 4 | 5 | logfile=~/temp/rlog 6 | 7 | go test -v -race -run $@ |& tee ${logfile} 8 | 9 | go run ../tools/raft-testlog-viz/main.go < ${logfile} 10 | -------------------------------------------------------------------------------- /part4kv/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/eliben/raft/part4kv 2 | 3 | go 1.23.1 4 | 5 | replace github.com/eliben/raft/part3/raft => ../part3/raft/ 6 | 7 | require github.com/eliben/raft/part3/raft v0.0.0-00010101000000-000000000000 8 | 9 | require github.com/fortytw2/leaktest v1.3.0 // indirect 10 | -------------------------------------------------------------------------------- /part4kv/go.sum: -------------------------------------------------------------------------------- 1 | github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw= 2 | github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= 3 | -------------------------------------------------------------------------------- /part4kv/kvclient/kvclient.go: -------------------------------------------------------------------------------- 1 | // KV client library. Go code that wants to talk to the KV service should use 2 | // this client instead of sending REST requests directly. 3 | // 4 | // Eli Bendersky [https://eli.thegreenplace.net] 5 | // This code is in the public domain. 6 | package kvclient 7 | 8 | import ( 9 | "bytes" 10 | "context" 11 | "encoding/json" 12 | "fmt" 13 | "log" 14 | "net/http" 15 | "sync/atomic" 16 | "time" 17 | 18 | "github.com/eliben/raft/part4kv/api" 19 | ) 20 | 21 | // DebugClient enables debug output 22 | const DebugClient = 1 23 | 24 | type KVClient struct { 25 | addrs []string 26 | 27 | // assumedLeader is the index (in addrs) of the service we assume is the 28 | // current leader. It is zero-initialized by default, without loss of 29 | // generality. 30 | assumedLeader int 31 | 32 | clientID int32 33 | } 34 | 35 | // New creates a new KVClient. serviceAddrs is the addresses (each a string 36 | // with the format "host:port") of the services in the KVService cluster the 37 | // client will contact. 38 | func New(serviceAddrs []string) *KVClient { 39 | return &KVClient{ 40 | addrs: serviceAddrs, 41 | assumedLeader: 0, 42 | clientID: clientCount.Add(1), 43 | } 44 | } 45 | 46 | // clientCount is used internally for debugging 47 | var clientCount atomic.Int32 48 | 49 | // Put the key=value pair into the store. Returns an error, or 50 | // (prevValue, keyFound, false), where keyFound specifies whether the key was 51 | // found in the store prior to this command, and prevValue is its previous 52 | // value if it was found. 53 | func (c *KVClient) Put(ctx context.Context, key string, value string) (string, bool, error) { 54 | putReq := api.PutRequest{ 55 | Key: key, 56 | Value: value, 57 | } 58 | var putResp api.PutResponse 59 | err := c.send(ctx, "put", putReq, &putResp) 60 | return putResp.PrevValue, putResp.KeyFound, err 61 | } 62 | 63 | // Get the value of key from the store. Returns an error, or 64 | // (value, found, false), where found specifies whether the key was found in 65 | // the store, and value is its value. 66 | func (c *KVClient) Get(ctx context.Context, key string) (string, bool, error) { 67 | getReq := api.GetRequest{ 68 | Key: key, 69 | } 70 | var getResp api.GetResponse 71 | err := c.send(ctx, "get", getReq, &getResp) 72 | return getResp.Value, getResp.KeyFound, err 73 | } 74 | 75 | // CAS operation: if prev value of key == compare, assign new value. Returns an 76 | // error, or (prevValue, keyFound, false), where keyFound specifies whether the 77 | // key was found in the store prior to this command, and prevValue is its 78 | // previous value if it was found. 79 | func (c *KVClient) CAS(ctx context.Context, key string, compare string, value string) (string, bool, error) { 80 | casReq := api.CASRequest{ 81 | Key: key, 82 | CompareValue: compare, 83 | Value: value, 84 | } 85 | var casResp api.CASResponse 86 | err := c.send(ctx, "cas", casReq, &casResp) 87 | return casResp.PrevValue, casResp.KeyFound, err 88 | } 89 | 90 | func (c *KVClient) send(ctx context.Context, route string, req any, resp api.Response) error { 91 | // This loop rotates through the list of service addresses until we get 92 | // a response that indicates we've found the leader of the cluster. It 93 | // starts at c.assumedLeader 94 | FindLeader: 95 | for { 96 | // There's a two-level context tree here: we have the user context - ctx, 97 | // and we create our own context to impose a timeout on each request to 98 | // the service. If our timeout expires, we move on to try the next service. 99 | // In the meantime, we have to keep an eye on the user context - if that's 100 | // canceled at any time (due to timeout, explicit cancellation, etc), we 101 | // bail out. 102 | retryCtx, retryCtxCancel := context.WithTimeout(ctx, 50*time.Millisecond) 103 | path := fmt.Sprintf("http://%s/%s/", c.addrs[c.assumedLeader], route) 104 | 105 | c.clientlog("sending %#v to %v", req, path) 106 | if err := sendJSONRequest(retryCtx, path, req, resp); err != nil { 107 | // Since the contexts are nested, the order of testing here matters. 108 | // We have to check the parent context first - if it's done, it means 109 | // we have to return. 110 | if contextDone(ctx) { 111 | c.clientlog("parent context done; bailing out") 112 | retryCtxCancel() 113 | return err 114 | } else if contextDeadlineExceeded(retryCtx) { 115 | // If the parent context is not done, but our retry context is done, 116 | // it's time to retry a different service. 117 | c.clientlog("timed out: will try next address") 118 | c.assumedLeader = (c.assumedLeader + 1) % len(c.addrs) 119 | retryCtxCancel() 120 | continue FindLeader 121 | } 122 | retryCtxCancel() 123 | return err 124 | } 125 | c.clientlog("received response %#v", resp) 126 | 127 | // No context/timeout on this request - we've actually received a response. 128 | switch resp.Status() { 129 | case api.StatusNotLeader: 130 | c.clientlog("not leader: will try next address") 131 | c.assumedLeader = (c.assumedLeader + 1) % len(c.addrs) 132 | retryCtxCancel() 133 | continue FindLeader 134 | case api.StatusOK: 135 | retryCtxCancel() 136 | return nil 137 | case api.StatusFailedCommit: 138 | retryCtxCancel() 139 | return fmt.Errorf("commit failed; please retry") 140 | default: 141 | panic("unreachable") 142 | } 143 | } 144 | } 145 | 146 | // clientlog logs a debugging message if DebugClient > 0 147 | func (c *KVClient) clientlog(format string, args ...any) { 148 | if DebugClient > 0 { 149 | clientName := fmt.Sprintf("[client%03d]", c.clientID) 150 | format = clientName + " " + format 151 | log.Printf(format, args...) 152 | } 153 | } 154 | 155 | func sendJSONRequest(ctx context.Context, path string, reqData any, respData any) error { 156 | body := new(bytes.Buffer) 157 | enc := json.NewEncoder(body) 158 | if err := enc.Encode(reqData); err != nil { 159 | return fmt.Errorf("JSON-encoding request data: %w", err) 160 | } 161 | 162 | req, err := http.NewRequestWithContext(ctx, http.MethodPost, path, body) 163 | if err != nil { 164 | return fmt.Errorf("creating HTTP request: %w", err) 165 | } 166 | req.Header.Add("Content-Type", "application/json") 167 | 168 | resp, err := http.DefaultClient.Do(req) 169 | if err != nil { 170 | return err 171 | } 172 | 173 | dec := json.NewDecoder(resp.Body) 174 | if err := dec.Decode(respData); err != nil { 175 | return fmt.Errorf("JSON-decoding response data: %w", err) 176 | } 177 | return nil 178 | } 179 | 180 | // contextDone checks whether ctx is done for any reason. It doesn't block. 181 | func contextDone(ctx context.Context) bool { 182 | select { 183 | case <-ctx.Done(): 184 | return true 185 | default: 186 | } 187 | return false 188 | } 189 | 190 | // contextDeadlineExceeded checks whether ctx is done because of an exceeded 191 | // deadline. It doesn't block. 192 | func contextDeadlineExceeded(ctx context.Context) bool { 193 | select { 194 | case <-ctx.Done(): 195 | if ctx.Err() == context.DeadlineExceeded { 196 | return true 197 | } 198 | default: 199 | } 200 | return false 201 | } 202 | -------------------------------------------------------------------------------- /part4kv/kvservice/command.go: -------------------------------------------------------------------------------- 1 | // Command type: stored in Raft by the KV service. 2 | // 3 | // Eli Bendersky [https://eli.thegreenplace.net] 4 | // This code is in the public domain. 5 | package kvservice 6 | 7 | // Command is the concrete command type KVService submits to the Raft log to 8 | // manage its state machine. It's also used to carry the results of the command 9 | // after it's applied to the state machine. These are the supported commands: 10 | // 11 | // CommandGet: queries a key's value 12 | // 13 | // * Key is the key to get, Value is ignored 14 | // * CompareValue is ignored 15 | // * ResultFound is true iff Key was found in the store 16 | // * ResultValue is the value, if Key was found in the store 17 | // 18 | // CommandPut: assigns value to the key 19 | // 20 | // * Key,Value are the pair to assign (store[key]=value) 21 | // * CompareValue is ignored 22 | // * ResultFound is true iff Key was previously found in the store 23 | // * ResultValue is the old value of Key, if it was previously found 24 | // 25 | // CommandCAS: atomic compare-and-swap, performs: 26 | // 27 | // if Store[Key] == CompareValue { 28 | // Store[Key] = Value 29 | // } else { 30 | // nop 31 | // } 32 | // 33 | // * Key is the key this command acts on 34 | // * CompareValue is the previous value the command compares to 35 | // * Value is the new value the command assigns 36 | // * ResultFound is true iff Key was previously found in the store 37 | // * ResultValue is the old value of Key, if it was previously found 38 | type Command struct { 39 | Kind CommandKind 40 | 41 | Key, Value string 42 | 43 | CompareValue string 44 | 45 | ResultValue string 46 | ResultFound bool 47 | 48 | // id is the Raft ID of the server submitting this command. 49 | Id int 50 | } 51 | 52 | type CommandKind int 53 | 54 | const ( 55 | CommandInvalid CommandKind = iota 56 | CommandGet 57 | CommandPut 58 | CommandCAS 59 | ) 60 | 61 | var commandName = map[CommandKind]string{ 62 | CommandInvalid: "invalid", 63 | CommandGet: "get", 64 | CommandPut: "put", 65 | CommandCAS: "cas", 66 | } 67 | 68 | func (ck CommandKind) String() string { 69 | return commandName[ck] 70 | } 71 | -------------------------------------------------------------------------------- /part4kv/kvservice/datastore.go: -------------------------------------------------------------------------------- 1 | // Basic in-memory datastore backing the KV service. 2 | // 3 | // Eli Bendersky [https://eli.thegreenplace.net] 4 | // This code is in the public domain. 5 | package kvservice 6 | 7 | import "sync" 8 | 9 | // DataStore is a simple, concurrency-safe key-value store used as a backend 10 | // for kvservice. 11 | type DataStore struct { 12 | sync.Mutex 13 | data map[string]string 14 | } 15 | 16 | func NewDataStore() *DataStore { 17 | return &DataStore{ 18 | data: make(map[string]string), 19 | } 20 | } 21 | 22 | // Get fetches the value of key from the datastore and returns (v, true) if 23 | // it was found or ("", false) otherwise. 24 | func (ds *DataStore) Get(key string) (string, bool) { 25 | ds.Lock() 26 | defer ds.Unlock() 27 | 28 | value, ok := ds.data[key] 29 | return value, ok 30 | } 31 | 32 | // Put assigns datastore[key]=value, and returns (v, true) if the key was 33 | // previously in the store and its value was v, or ("", false) otherwise. 34 | func (ds *DataStore) Put(key, value string) (string, bool) { 35 | ds.Lock() 36 | defer ds.Unlock() 37 | 38 | v, ok := ds.data[key] 39 | ds.data[key] = value 40 | return v, ok 41 | } 42 | 43 | // CAS performs an atomic compare-and-swap: 44 | // if key exists and its prev value == compare, write value, else nop 45 | // The prev value and whether the key existed in the store is returned. 46 | func (ds *DataStore) CAS(key, compare, value string) (string, bool) { 47 | ds.Lock() 48 | defer ds.Unlock() 49 | 50 | prevValue, ok := ds.data[key] 51 | if ok && prevValue == compare { 52 | ds.data[key] = value 53 | } 54 | return prevValue, ok 55 | } 56 | -------------------------------------------------------------------------------- /part4kv/kvservice/datastore_test.go: -------------------------------------------------------------------------------- 1 | package kvservice 2 | 3 | import "testing" 4 | 5 | func checkPutPrev(t *testing.T, ds *DataStore, k string, v string, prev string, hasPrev bool) { 6 | t.Helper() 7 | prevVal, ok := ds.Put(k, v) 8 | if hasPrev != ok || prevVal != prev { 9 | t.Errorf("prevVal=%s, ok=%v; want %s,%v", prevVal, ok, prev, hasPrev) 10 | } 11 | } 12 | 13 | func checkGet(t *testing.T, ds *DataStore, k string, v string, found bool) { 14 | t.Helper() 15 | gotV, ok := ds.Get(k) 16 | if found != ok || v != gotV { 17 | t.Errorf("gotV=%s, ok=%v; want %s,%v", gotV, ok, v, found) 18 | } 19 | } 20 | 21 | func checkCAS(t *testing.T, ds *DataStore, k string, comp string, v string, prev string, found bool) { 22 | t.Helper() 23 | gotPrev, gotFound := ds.CAS(k, comp, v) 24 | if found != gotFound || prev != gotPrev { 25 | t.Errorf("gotPrev=%s, gotFound=%v; want %s,%v", gotPrev, gotFound, prev, found) 26 | } 27 | } 28 | 29 | func TestGetPut(t *testing.T) { 30 | ds := NewDataStore() 31 | 32 | checkGet(t, ds, "foo", "", false) 33 | checkPutPrev(t, ds, "foo", "bar", "", false) 34 | checkGet(t, ds, "foo", "bar", true) 35 | checkPutPrev(t, ds, "foo", "baz", "bar", true) 36 | checkGet(t, ds, "foo", "baz", true) 37 | checkPutPrev(t, ds, "nix", "hard", "", false) 38 | } 39 | 40 | func TestCASBasic(t *testing.T) { 41 | ds := NewDataStore() 42 | ds.Put("foo", "bar") 43 | ds.Put("sun", "beam") 44 | 45 | // CAS replace existing value 46 | checkCAS(t, ds, "foo", "mex", "bro", "bar", true) 47 | checkCAS(t, ds, "foo", "bar", "bro", "bar", true) 48 | checkGet(t, ds, "foo", "bro", true) 49 | 50 | // CAS when key not found 51 | checkCAS(t, ds, "goa", "mm", "vv", "", false) 52 | checkGet(t, ds, "goa", "", false) 53 | 54 | // ... and now this key assigned 55 | ds.Put("goa", "tva") 56 | checkCAS(t, ds, "goa", "mm", "vv", "tva", true) 57 | checkCAS(t, ds, "goa", "mm", "vv", "tva", true) 58 | } 59 | 60 | func TestCASConcurrent(t *testing.T) { 61 | // Run this with -race 62 | ds := NewDataStore() 63 | ds.Put("foo", "bar") 64 | ds.Put("sun", "beam") 65 | 66 | go func() { 67 | for range 2000 { 68 | ds.CAS("foo", "bar", "baz") 69 | } 70 | }() 71 | go func() { 72 | for range 2000 { 73 | ds.CAS("foo", "baz", "bar") 74 | } 75 | }() 76 | 77 | v, _ := ds.Get("foo") 78 | if v != "bar" && v != "baz" { 79 | t.Errorf("got v=%s, want bar or baz", v) 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /part4kv/kvservice/json.go: -------------------------------------------------------------------------------- 1 | // JSON utilities for the KV service. 2 | // 3 | // Eli Bendersky [https://eli.thegreenplace.net] 4 | // This code is in the public domain. 5 | package kvservice 6 | 7 | import ( 8 | "encoding/json" 9 | "fmt" 10 | "mime" 11 | "net/http" 12 | ) 13 | 14 | // readRequestJSON expects req to have a JSON content type with a body that 15 | // contains a JSON-encoded value complying with the underlying type of target. 16 | // It populates target, or returns an error. 17 | func readRequestJSON(req *http.Request, target any) error { 18 | contentType := req.Header.Get("Content-Type") 19 | mediaType, _, err := mime.ParseMediaType(contentType) 20 | if err != nil { 21 | return err 22 | } 23 | if mediaType != "application/json" { 24 | return fmt.Errorf("expect application/json Content-Type, got %s", mediaType) 25 | } 26 | 27 | dec := json.NewDecoder(req.Body) 28 | dec.DisallowUnknownFields() 29 | return dec.Decode(target) 30 | } 31 | 32 | // renderJSON renders 'v' as JSON and writes it as a response into w. 33 | func renderJSON(w http.ResponseWriter, v any) { 34 | js, err := json.Marshal(v) 35 | if err != nil { 36 | http.Error(w, err.Error(), http.StatusInternalServerError) 37 | return 38 | } 39 | w.Header().Set("Content-Type", "application/json") 40 | w.Write(js) 41 | } 42 | -------------------------------------------------------------------------------- /part4kv/kvservice/kvservice.go: -------------------------------------------------------------------------------- 1 | // KV service based on Raft - main implementation file. 2 | // 3 | // Eli Bendersky [https://eli.thegreenplace.net] 4 | // This code is in the public domain. 5 | package kvservice 6 | 7 | import ( 8 | "context" 9 | "encoding/gob" 10 | "fmt" 11 | "log" 12 | "net" 13 | "net/http" 14 | "sync" 15 | "time" 16 | 17 | "github.com/eliben/raft/part3/raft" 18 | "github.com/eliben/raft/part4kv/api" 19 | ) 20 | 21 | const DebugKV = 1 22 | 23 | type KVService struct { 24 | sync.Mutex 25 | 26 | // id is the service ID in a Raft cluster. 27 | id int 28 | 29 | // rs is the Raft server that contains a CM 30 | rs *raft.Server 31 | 32 | // commitChan is the commit channel passed to the Raft server; when commands 33 | // are committed, they're sent on this channel. 34 | commitChan chan raft.CommitEntry 35 | 36 | // commitSubs are the commit subscriptions currently active in this service. 37 | // See the createCommitSubscription method for more details. 38 | commitSubs map[int]chan Command 39 | 40 | // ds is the underlying data store implementing the KV DB. 41 | ds *DataStore 42 | 43 | // srv is the HTTP server exposed by the service to the external world. 44 | srv *http.Server 45 | 46 | // httpResponsesEnabled controls whether this service returns HTTP responses 47 | // to the client. It's only used for testing and debugging. 48 | httpResponsesEnabled bool 49 | } 50 | 51 | // New creates a new KVService 52 | // 53 | // - id: this service's ID within its Raft cluster 54 | // - peerIds: the IDs of the other Raft peers in the cluster 55 | // - storage: a raft.Storage implementation the service can use for 56 | // durable storage to persist its state. 57 | // - readyChan: notification channel that has to be closed when the Raft 58 | // cluster is ready (all peers are up and connected to each other). 59 | func New(id int, peerIds []int, storage raft.Storage, readyChan <-chan any) *KVService { 60 | gob.Register(Command{}) 61 | commitChan := make(chan raft.CommitEntry) 62 | 63 | // raft.Server handles the Raft RPCs in the cluster; after Serve is called, 64 | // it's ready to accept RPC connections from peers. 65 | rs := raft.NewServer(id, peerIds, storage, readyChan, commitChan) 66 | rs.Serve() 67 | kvs := &KVService{ 68 | id: id, 69 | rs: rs, 70 | commitChan: commitChan, 71 | ds: NewDataStore(), 72 | commitSubs: make(map[int]chan Command), 73 | httpResponsesEnabled: true, 74 | } 75 | 76 | kvs.runUpdater() 77 | return kvs 78 | } 79 | 80 | // IsLeader checks if kvs thinks it's the leader in the Raft cluster. Only 81 | // use this for testin and debugging. 82 | func (kvs *KVService) IsLeader() bool { 83 | return kvs.rs.IsLeader() 84 | } 85 | 86 | // ServeHTTP starts serving the KV REST API on the given TCP port. This 87 | // function does not block; it fires up the HTTP server and returns. To properly 88 | // shut down the server, call the Shutdown method. 89 | func (kvs *KVService) ServeHTTP(port int) { 90 | if kvs.srv != nil { 91 | panic("ServeHTTP called with existing server") 92 | } 93 | mux := http.NewServeMux() 94 | mux.HandleFunc("POST /get/", kvs.handleGet) 95 | mux.HandleFunc("POST /put/", kvs.handlePut) 96 | mux.HandleFunc("POST /cas/", kvs.handleCAS) 97 | 98 | kvs.srv = &http.Server{ 99 | Addr: fmt.Sprintf(":%d", port), 100 | Handler: mux, 101 | } 102 | 103 | go func() { 104 | kvs.kvlog("serving HTTP on %s", kvs.srv.Addr) 105 | if err := kvs.srv.ListenAndServe(); err != http.ErrServerClosed { 106 | log.Fatal(err) 107 | } 108 | kvs.srv = nil 109 | }() 110 | } 111 | 112 | // Shutdown performs a proper shutdown of the service: shuts down the Raft RPC 113 | // server, and shuts down the main HTTP service. It only returns once shutdown 114 | // is complete. 115 | // Note: DisconnectFromRaftPeers on all peers in the cluster should be done 116 | // before Shutdown is called. 117 | func (kvs *KVService) Shutdown() error { 118 | kvs.kvlog("shutting down Raft server") 119 | kvs.rs.Shutdown() 120 | kvs.kvlog("closing commitChan") 121 | close(kvs.commitChan) 122 | 123 | if kvs.srv != nil { 124 | kvs.kvlog("shutting down HTTP server") 125 | ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond) 126 | defer cancel() 127 | kvs.srv.Shutdown(ctx) 128 | kvs.kvlog("HTTP shutdown complete") 129 | return nil 130 | } 131 | 132 | return nil 133 | } 134 | 135 | // ToggleHTTPResponsesEnabled controls whether this service returns HTTP 136 | // responses to clients. It's always enabled during normal operation. 137 | // For testing and debugging purposes, this method can be called with false; 138 | // then, the service will not respond to clients over HTTP. 139 | func (kvs *KVService) ToggleHTTPResponsesEnabled(enable bool) { 140 | kvs.httpResponsesEnabled = enable 141 | } 142 | 143 | func (kvs *KVService) sendHTTPResponse(w http.ResponseWriter, v any) { 144 | if kvs.httpResponsesEnabled { 145 | renderJSON(w, v) 146 | } 147 | } 148 | 149 | func (kvs *KVService) handlePut(w http.ResponseWriter, req *http.Request) { 150 | pr := &api.PutRequest{} 151 | if err := readRequestJSON(req, pr); err != nil { 152 | http.Error(w, err.Error(), http.StatusBadRequest) 153 | return 154 | } 155 | kvs.kvlog("HTTP PUT %v", pr) 156 | 157 | // Submit a command into the Raft server; this is the state change in the 158 | // replicated state machine built on top of the Raft log. 159 | cmd := Command{ 160 | Kind: CommandPut, 161 | Key: pr.Key, 162 | Value: pr.Value, 163 | Id: kvs.id, 164 | } 165 | logIndex := kvs.rs.Submit(cmd) 166 | // If we're not the Raft leader, send an appropriate status 167 | if logIndex < 0 { 168 | kvs.sendHTTPResponse(w, api.PutResponse{RespStatus: api.StatusNotLeader}) 169 | return 170 | } 171 | 172 | // Subscribe for a commit update for our log index. Then wait for it to 173 | // be delivered. 174 | sub := kvs.createCommitSubscription(logIndex) 175 | 176 | // Wait on the sub channel: the updater will deliver a value when the Raft 177 | // log has a commit at logIndex. To ensure clean shutdown of the service, 178 | // also select on the request context - if the request is canceled, this 179 | // handler aborts without sending data back to the client. 180 | select { 181 | case commitCmd := <-sub: 182 | // If this is our command, all is good! If it's some other server's command, 183 | // this means we lost leadership at some point and should return an error 184 | // to the client. 185 | if commitCmd.Id == kvs.id { 186 | kvs.sendHTTPResponse(w, api.PutResponse{ 187 | RespStatus: api.StatusOK, 188 | KeyFound: commitCmd.ResultFound, 189 | PrevValue: commitCmd.ResultValue, 190 | }) 191 | } else { 192 | kvs.sendHTTPResponse(w, api.PutResponse{RespStatus: api.StatusFailedCommit}) 193 | } 194 | case <-req.Context().Done(): 195 | return 196 | } 197 | } 198 | 199 | // The details of these handlers are very similar to handlePut: refer to that 200 | // function for detailed comments. 201 | func (kvs *KVService) handleGet(w http.ResponseWriter, req *http.Request) { 202 | gr := &api.GetRequest{} 203 | if err := readRequestJSON(req, gr); err != nil { 204 | http.Error(w, err.Error(), http.StatusBadRequest) 205 | return 206 | } 207 | kvs.kvlog("HTTP GET %v", gr) 208 | 209 | cmd := Command{ 210 | Kind: CommandGet, 211 | Key: gr.Key, 212 | Id: kvs.id, 213 | } 214 | logIndex := kvs.rs.Submit(cmd) 215 | if logIndex < 0 { 216 | kvs.sendHTTPResponse(w, api.GetResponse{RespStatus: api.StatusNotLeader}) 217 | return 218 | } 219 | 220 | sub := kvs.createCommitSubscription(logIndex) 221 | 222 | select { 223 | case commitCmd := <-sub: 224 | if commitCmd.Id == kvs.id { 225 | kvs.sendHTTPResponse(w, api.GetResponse{ 226 | RespStatus: api.StatusOK, 227 | KeyFound: commitCmd.ResultFound, 228 | Value: commitCmd.ResultValue, 229 | }) 230 | } else { 231 | kvs.sendHTTPResponse(w, api.GetResponse{RespStatus: api.StatusFailedCommit}) 232 | } 233 | case <-req.Context().Done(): 234 | return 235 | } 236 | } 237 | 238 | func (kvs *KVService) handleCAS(w http.ResponseWriter, req *http.Request) { 239 | cr := &api.CASRequest{} 240 | if err := readRequestJSON(req, cr); err != nil { 241 | http.Error(w, err.Error(), http.StatusBadRequest) 242 | return 243 | } 244 | kvs.kvlog("HTTP CAS %v", cr) 245 | 246 | cmd := Command{ 247 | Kind: CommandCAS, 248 | Key: cr.Key, 249 | Value: cr.Value, 250 | CompareValue: cr.CompareValue, 251 | Id: kvs.id, 252 | } 253 | logIndex := kvs.rs.Submit(cmd) 254 | if logIndex < 0 { 255 | kvs.sendHTTPResponse(w, api.PutResponse{RespStatus: api.StatusNotLeader}) 256 | return 257 | } 258 | 259 | sub := kvs.createCommitSubscription(logIndex) 260 | 261 | select { 262 | case commitCmd := <-sub: 263 | if commitCmd.Id == kvs.id { 264 | kvs.sendHTTPResponse(w, api.CASResponse{ 265 | RespStatus: api.StatusOK, 266 | KeyFound: commitCmd.ResultFound, 267 | PrevValue: commitCmd.ResultValue, 268 | }) 269 | } else { 270 | kvs.sendHTTPResponse(w, api.CASResponse{RespStatus: api.StatusFailedCommit}) 271 | } 272 | case <-req.Context().Done(): 273 | return 274 | } 275 | } 276 | 277 | // runUpdater runs the "updater" goroutine that reads the commit channel 278 | // from Raft and updates the data store; this is the Replicated State Machine 279 | // part of distributed consensus! 280 | // It also notifies subscribers (registered with createCommitSubscription). 281 | func (kvs *KVService) runUpdater() { 282 | go func() { 283 | for entry := range kvs.commitChan { 284 | cmd := entry.Command.(Command) 285 | 286 | switch cmd.Kind { 287 | case CommandGet: 288 | cmd.ResultValue, cmd.ResultFound = kvs.ds.Get(cmd.Key) 289 | case CommandPut: 290 | cmd.ResultValue, cmd.ResultFound = kvs.ds.Put(cmd.Key, cmd.Value) 291 | case CommandCAS: 292 | cmd.ResultValue, cmd.ResultFound = kvs.ds.CAS(cmd.Key, cmd.CompareValue, cmd.Value) 293 | default: 294 | panic(fmt.Errorf("unexpected command %v", cmd)) 295 | } 296 | 297 | // Forward this entry to the subscriber interested in its index, and 298 | // close the subscription - it's single-use. 299 | if sub := kvs.popCommitSubscription(entry.Index); sub != nil { 300 | sub <- cmd 301 | close(sub) 302 | } 303 | } 304 | }() 305 | } 306 | 307 | // createCommitSubscription creates a "commit subscription" for a certain log 308 | // index. It's used by client request handlers that submit a command to the 309 | // Raft CM. createCommitSubscription(index) means "I want to be notified when 310 | // an entry is committed at this index in the Raft log". The entry is delivered 311 | // on the returend (buffered) channel by the updater goroutine, after which 312 | // the channel is closed and the subscription is automatically canceled. 313 | func (kvs *KVService) createCommitSubscription(logIndex int) chan Command { 314 | kvs.Lock() 315 | defer kvs.Unlock() 316 | 317 | if _, exists := kvs.commitSubs[logIndex]; exists { 318 | panic(fmt.Sprintf("duplicate commit subscription for logIndex=%d", logIndex)) 319 | } 320 | 321 | ch := make(chan Command, 1) 322 | kvs.commitSubs[logIndex] = ch 323 | return ch 324 | } 325 | 326 | func (kvs *KVService) popCommitSubscription(logIndex int) chan Command { 327 | kvs.Lock() 328 | defer kvs.Unlock() 329 | 330 | ch := kvs.commitSubs[logIndex] 331 | delete(kvs.commitSubs, logIndex) 332 | return ch 333 | } 334 | 335 | // kvlog logs a debugging message if DebugKV > 0 336 | func (kvs *KVService) kvlog(format string, args ...any) { 337 | if DebugKV > 0 { 338 | format = fmt.Sprintf("[kv %d] ", kvs.id) + format 339 | log.Printf(format, args...) 340 | } 341 | } 342 | 343 | // The following functions exist for testing purposes, to simulate faults. 344 | 345 | func (kvs *KVService) ConnectToRaftPeer(peerId int, addr net.Addr) error { 346 | return kvs.rs.ConnectToPeer(peerId, addr) 347 | } 348 | 349 | func (kvs *KVService) DisconnectFromAllRaftPeers() { 350 | kvs.rs.DisconnectAll() 351 | } 352 | 353 | func (kvs *KVService) DisconnectFromRaftPeer(peerId int) error { 354 | return kvs.rs.DisconnectPeer(peerId) 355 | } 356 | 357 | func (kvs *KVService) GetRaftListenAddr() net.Addr { 358 | return kvs.rs.GetListenAddr() 359 | } 360 | -------------------------------------------------------------------------------- /part4kv/system_test.go: -------------------------------------------------------------------------------- 1 | // Test KV services and clients. 2 | // 3 | // It's called a "system" test because it doesn't test a component (like 4 | // KVService) in isolation; rather, the test harness constructs a complete 5 | // system comprising of a cluster of services and some KVClients to exercise it. 6 | // 7 | // Eli Bendersky [https://eli.thegreenplace.net] 8 | // This code is in the public domain. 9 | package main 10 | 11 | import ( 12 | "fmt" 13 | "sync" 14 | "testing" 15 | "time" 16 | 17 | "github.com/fortytw2/leaktest" 18 | ) 19 | 20 | func sleepMs(n int) { 21 | time.Sleep(time.Duration(n) * time.Millisecond) 22 | } 23 | 24 | func TestSetupHarness(t *testing.T) { 25 | h := NewHarness(t, 3) 26 | defer h.Shutdown() 27 | sleepMs(80) 28 | } 29 | 30 | func TestClientRequestBeforeConsensus(t *testing.T) { 31 | h := NewHarness(t, 3) 32 | defer h.Shutdown() 33 | sleepMs(10) 34 | 35 | // The client will keep cycling between the services until a leader is found. 36 | c1 := h.NewClient() 37 | h.CheckPut(c1, "llave", "cosa") 38 | sleepMs(80) 39 | } 40 | 41 | func TestBasicPutGetSingleClient(t *testing.T) { 42 | // Basic smoke test: send one Put, followed by one Get from a single client. 43 | h := NewHarness(t, 3) 44 | defer h.Shutdown() 45 | h.CheckSingleLeader() 46 | 47 | c1 := h.NewClient() 48 | h.CheckPut(c1, "llave", "cosa") 49 | 50 | h.CheckGet(c1, "llave", "cosa") 51 | sleepMs(80) 52 | } 53 | 54 | func TestPutPrevValue(t *testing.T) { 55 | h := NewHarness(t, 3) 56 | defer h.Shutdown() 57 | h.CheckSingleLeader() 58 | 59 | c1 := h.NewClient() 60 | // Make sure we get the expected found/prev values before and after Put 61 | prev, found := h.CheckPut(c1, "llave", "cosa") 62 | if found || prev != "" { 63 | t.Errorf(`got found=%v, prev=%v, want false/""`, found, prev) 64 | } 65 | 66 | prev, found = h.CheckPut(c1, "llave", "frodo") 67 | if !found || prev != "cosa" { 68 | t.Errorf(`got found=%v, prev=%v, want true/"cosa"`, found, prev) 69 | } 70 | 71 | // A different key... 72 | prev, found = h.CheckPut(c1, "mafteah", "davar") 73 | if found || prev != "" { 74 | t.Errorf(`got found=%v, prev=%v, want false/""`, found, prev) 75 | } 76 | } 77 | 78 | func TestBasicPutGetDifferentClients(t *testing.T) { 79 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 80 | 81 | h := NewHarness(t, 3) 82 | defer h.Shutdown() 83 | h.CheckSingleLeader() 84 | 85 | c1 := h.NewClient() 86 | h.CheckPut(c1, "k", "v") 87 | 88 | c2 := h.NewClient() 89 | h.CheckGet(c2, "k", "v") 90 | sleepMs(80) 91 | } 92 | 93 | func TestCASBasic(t *testing.T) { 94 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 95 | 96 | h := NewHarness(t, 3) 97 | defer h.Shutdown() 98 | h.CheckSingleLeader() 99 | 100 | c1 := h.NewClient() 101 | h.CheckPut(c1, "k", "v") 102 | 103 | if pv, found := h.CheckCAS(c1, "k", "v", "newv"); pv != "v" || !found { 104 | t.Errorf("got %s,%v, want replacement", pv, found) 105 | } 106 | } 107 | 108 | func TestCASConcurrent(t *testing.T) { 109 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 110 | 111 | h := NewHarness(t, 3) 112 | defer h.Shutdown() 113 | h.CheckSingleLeader() 114 | c := h.NewClient() 115 | h.CheckPut(c, "foo", "mexico") 116 | 117 | var wg sync.WaitGroup 118 | wg.Add(1) 119 | go func() { 120 | defer wg.Done() 121 | c := h.NewClient() 122 | for range 20 { 123 | h.CheckCAS(c, "foo", "bar", "bomba") 124 | } 125 | }() 126 | 127 | // Once a client homes in on the right leader, it takes 4-5 ms to roundtrip 128 | // a command. For the first 50 ms after launching the CAS goroutines, 'foo' 129 | // has the wrong value so the CAS doesn't work, but then it will... 130 | sleepMs(50) 131 | c2 := h.NewClient() 132 | h.CheckPut(c2, "foo", "bar") 133 | 134 | sleepMs(300) 135 | h.CheckGet(c2, "foo", "bomba") 136 | 137 | wg.Wait() 138 | } 139 | 140 | func TestConcurrentClientsPutsAndGets(t *testing.T) { 141 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 142 | 143 | // Test that we can submit multiple PUT and GET requests concurrently, with 144 | // one goroutine per request launching at the same time. 145 | h := NewHarness(t, 3) 146 | defer h.Shutdown() 147 | h.CheckSingleLeader() 148 | 149 | n := 9 150 | for i := range n { 151 | go func() { 152 | c := h.NewClient() 153 | _, f := h.CheckPut(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 154 | if f { 155 | t.Errorf("got key found for %d, want false", i) 156 | } 157 | }() 158 | } 159 | sleepMs(150) 160 | 161 | for i := range n { 162 | go func() { 163 | c := h.NewClient() 164 | h.CheckGet(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 165 | }() 166 | } 167 | sleepMs(150) 168 | } 169 | 170 | func Test5ServerConcurrentClientsPutsAndGets(t *testing.T) { 171 | // Similar to the previous test, but this one has a 5-server Raft cluster. 172 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 173 | 174 | h := NewHarness(t, 5) 175 | defer h.Shutdown() 176 | h.CheckSingleLeader() 177 | 178 | n := 9 179 | for i := range n { 180 | go func() { 181 | c := h.NewClient() 182 | _, f := h.CheckPut(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 183 | if f { 184 | t.Errorf("got key found for %d, want false", i) 185 | } 186 | }() 187 | } 188 | sleepMs(150) 189 | 190 | for i := range n { 191 | go func() { 192 | c := h.NewClient() 193 | h.CheckGet(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 194 | }() 195 | } 196 | sleepMs(150) 197 | } 198 | 199 | func TestDisconnectLeaderAfterPuts(t *testing.T) { 200 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 201 | 202 | h := NewHarness(t, 3) 203 | defer h.Shutdown() 204 | lid := h.CheckSingleLeader() 205 | 206 | // Submit some PUT commands. 207 | n := 4 208 | for i := range n { 209 | c := h.NewClient() 210 | h.CheckPut(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 211 | } 212 | 213 | h.DisconnectServiceFromPeers(lid) 214 | sleepMs(300) 215 | newlid := h.CheckSingleLeader() 216 | 217 | if newlid == lid { 218 | t.Errorf("got the same leader") 219 | } 220 | 221 | // Trying to contact the disconnected leader will time out. 222 | c := h.NewClientSingleService(lid) 223 | h.CheckGetTimesOut(c, "key1") 224 | 225 | // GET commands expecting to get the right values 226 | for range 5 { 227 | c := h.NewClientWithRandomAddrsOrder() 228 | for j := range n { 229 | h.CheckGet(c, fmt.Sprintf("key%v", j), fmt.Sprintf("value%v", j)) 230 | } 231 | } 232 | 233 | // At the end of the test, reconnect the peers to avoid a goroutine leak. 234 | // In real scenarios, we expect that services will eventually be reconnected, 235 | // and if not - a single goroutine leaked is not an issue since the server 236 | // will end up being killed anyway. 237 | h.ReconnectServiceToPeers(lid) 238 | sleepMs(200) 239 | } 240 | 241 | func TestDisconnectLeaderAndFollower(t *testing.T) { 242 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 243 | 244 | h := NewHarness(t, 3) 245 | defer h.Shutdown() 246 | lid := h.CheckSingleLeader() 247 | 248 | // Submit some PUT commands. 249 | n := 4 250 | for i := range n { 251 | c := h.NewClient() 252 | _, f := h.CheckPut(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 253 | if f { 254 | t.Errorf("got key found for %d, want false", i) 255 | } 256 | } 257 | 258 | // Disconnect leader and one other server; the cluster loses consensus 259 | // and client requests should now time out. 260 | h.DisconnectServiceFromPeers(lid) 261 | otherId := (lid + 1) % 3 262 | h.DisconnectServiceFromPeers(otherId) 263 | sleepMs(100) 264 | 265 | c := h.NewClient() 266 | h.CheckGetTimesOut(c, "key0") 267 | 268 | // Reconnect one server, but not the old leader. We should still get all 269 | // the right data back. 270 | h.ReconnectServiceToPeers(otherId) 271 | h.CheckSingleLeader() 272 | for i := range n { 273 | h.CheckGet(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 274 | } 275 | 276 | // Reconnect the old leader. We should still get all the right data back. 277 | h.ReconnectServiceToPeers(lid) 278 | h.CheckSingleLeader() 279 | for i := range n { 280 | h.CheckGet(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 281 | } 282 | sleepMs(100) 283 | } 284 | 285 | func TestCrashFollower(t *testing.T) { 286 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 287 | 288 | h := NewHarness(t, 3) 289 | defer h.Shutdown() 290 | lid := h.CheckSingleLeader() 291 | 292 | // Submit some PUT commands. 293 | n := 3 294 | for i := range n { 295 | c := h.NewClient() 296 | _, f := h.CheckPut(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 297 | if f { 298 | t.Errorf("got key found for %d, want false", i) 299 | } 300 | } 301 | 302 | // Crash a non-leader 303 | otherId := (lid + 1) % 3 304 | h.CrashService(otherId) 305 | 306 | // Talking directly to the leader should still work... 307 | for i := range n { 308 | c := h.NewClientSingleService(lid) 309 | h.CheckGet(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 310 | } 311 | 312 | // Talking to the remaining live servers should also work 313 | for i := range n { 314 | c := h.NewClient() 315 | h.CheckGet(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 316 | } 317 | } 318 | 319 | func TestCrashLeader(t *testing.T) { 320 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 321 | 322 | h := NewHarness(t, 3) 323 | defer h.Shutdown() 324 | lid := h.CheckSingleLeader() 325 | 326 | // Submit some PUT commands. 327 | n := 3 328 | for i := range n { 329 | c := h.NewClient() 330 | _, f := h.CheckPut(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 331 | if f { 332 | t.Errorf("got key found for %d, want false", i) 333 | } 334 | } 335 | 336 | // Crash a leader and wait for the cluster to establish a new leader. 337 | h.CrashService(lid) 338 | h.CheckSingleLeader() 339 | 340 | // Talking to the remaining live servers should get the right data. 341 | for i := range n { 342 | c := h.NewClient() 343 | h.CheckGet(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 344 | } 345 | } 346 | 347 | func TestCrashThenRestartLeader(t *testing.T) { 348 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 349 | 350 | h := NewHarness(t, 3) 351 | defer h.Shutdown() 352 | lid := h.CheckSingleLeader() 353 | 354 | // Submit some PUT commands. 355 | n := 3 356 | for i := range n { 357 | c := h.NewClient() 358 | _, f := h.CheckPut(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 359 | if f { 360 | t.Errorf("got key found for %d, want false", i) 361 | } 362 | } 363 | 364 | // Crash a leader and wait for the cluster to establish a new leader. 365 | h.CrashService(lid) 366 | h.CheckSingleLeader() 367 | 368 | // Talking to the remaining live servers should get the right data. 369 | for i := range n { 370 | c := h.NewClient() 371 | h.CheckGet(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 372 | } 373 | 374 | // Now restart the old leader: it will join the cluster and get all the 375 | // data. 376 | h.RestartService(lid) 377 | 378 | // Get data from services in different orders. 379 | for range 5 { 380 | c := h.NewClientWithRandomAddrsOrder() 381 | for j := range n { 382 | h.CheckGet(c, fmt.Sprintf("key%v", j), fmt.Sprintf("value%v", j)) 383 | } 384 | } 385 | } 386 | -------------------------------------------------------------------------------- /part4kv/testharness.go: -------------------------------------------------------------------------------- 1 | // Test harness for testing the KV service and clients. 2 | // 3 | // Eli Bendersky [https://eli.thegreenplace.net] 4 | // This code is in the public domain. 5 | package main 6 | 7 | import ( 8 | "context" 9 | "fmt" 10 | "log" 11 | "math/rand/v2" 12 | "net/http" 13 | "strings" 14 | "testing" 15 | "time" 16 | 17 | "github.com/eliben/raft/part3/raft" 18 | "github.com/eliben/raft/part4kv/kvclient" 19 | "github.com/eliben/raft/part4kv/kvservice" 20 | ) 21 | 22 | func init() { 23 | log.SetFlags(log.Ltime | log.Lmicroseconds) 24 | } 25 | 26 | // Test harness for kvservice and client system tests. 27 | type Harness struct { 28 | n int 29 | 30 | // kvCluster is a list of all KVService instances participating in a cluster. 31 | // A service's index into this list is its ID in the cluster. 32 | kvCluster []*kvservice.KVService 33 | 34 | // kvServiceAddrs is a list of HTTP addresses (localhost:) the KV 35 | // services are accepting client commands on. 36 | kvServiceAddrs []string 37 | 38 | storage []*raft.MapStorage 39 | 40 | t *testing.T 41 | 42 | // connected has a bool per server in cluster, specifying whether this server 43 | // is currently connected to peers (if false, it's partitioned and no messages 44 | // will pass to or from it). 45 | connected []bool 46 | 47 | // alive has a bool per server in the cluster, specifying whether this server 48 | // is currently alive (false means it has crashed and wasn't restarted yet). 49 | // connected implies alive. 50 | alive []bool 51 | 52 | // ctx is context used for the HTTP client commands used by tests. 53 | // ctxCancel is its cancellation function. 54 | ctx context.Context 55 | ctxCancel func() 56 | } 57 | 58 | func NewHarness(t *testing.T, n int) *Harness { 59 | kvss := make([]*kvservice.KVService, n) 60 | ready := make(chan any) 61 | connected := make([]bool, n) 62 | alive := make([]bool, n) 63 | storage := make([]*raft.MapStorage, n) 64 | 65 | // Create all KVService instances in this cluster. 66 | for i := range n { 67 | peerIds := make([]int, 0) 68 | for p := range n { 69 | if p != i { 70 | peerIds = append(peerIds, p) 71 | } 72 | } 73 | 74 | storage[i] = raft.NewMapStorage() 75 | kvss[i] = kvservice.New(i, peerIds, storage[i], ready) 76 | alive[i] = true 77 | } 78 | 79 | // Connect the Raft peers of the services to each other and close the ready 80 | // channel to signal to them it's all ready. 81 | for i := range n { 82 | for j := range n { 83 | if i != j { 84 | kvss[i].ConnectToRaftPeer(j, kvss[j].GetRaftListenAddr()) 85 | } 86 | } 87 | connected[i] = true 88 | } 89 | close(ready) 90 | 91 | // Each KVService instance serves a REST API on a different port 92 | kvServiceAddrs := make([]string, n) 93 | for i := range n { 94 | port := 14200 + i 95 | kvss[i].ServeHTTP(port) 96 | 97 | kvServiceAddrs[i] = fmt.Sprintf("localhost:%d", port) 98 | } 99 | 100 | ctx, ctxCancel := context.WithCancel(context.Background()) 101 | 102 | h := &Harness{ 103 | n: n, 104 | kvCluster: kvss, 105 | kvServiceAddrs: kvServiceAddrs, 106 | t: t, 107 | connected: connected, 108 | alive: alive, 109 | storage: storage, 110 | ctx: ctx, 111 | ctxCancel: ctxCancel, 112 | } 113 | return h 114 | } 115 | 116 | func (h *Harness) DisconnectServiceFromPeers(id int) { 117 | tlog("Disconnect %d", id) 118 | h.kvCluster[id].DisconnectFromAllRaftPeers() 119 | for j := 0; j < h.n; j++ { 120 | if j != id { 121 | h.kvCluster[j].DisconnectFromRaftPeer(id) 122 | } 123 | } 124 | h.connected[id] = false 125 | } 126 | 127 | func (h *Harness) ReconnectServiceToPeers(id int) { 128 | tlog("Reconnect %d", id) 129 | for j := 0; j < h.n; j++ { 130 | if j != id && h.alive[j] { 131 | if err := h.kvCluster[id].ConnectToRaftPeer(j, h.kvCluster[j].GetRaftListenAddr()); err != nil { 132 | h.t.Fatal(err) 133 | } 134 | if err := h.kvCluster[j].ConnectToRaftPeer(id, h.kvCluster[id].GetRaftListenAddr()); err != nil { 135 | h.t.Fatal(err) 136 | } 137 | } 138 | } 139 | h.connected[id] = true 140 | } 141 | 142 | // CrashService "crashes" a service by disconnecting it from all peers and 143 | // then asking it to shut down. We're not going to be using the same service 144 | // instance again. 145 | func (h *Harness) CrashService(id int) { 146 | tlog("Crash %d", id) 147 | h.DisconnectServiceFromPeers(id) 148 | h.alive[id] = false 149 | if err := h.kvCluster[id].Shutdown(); err != nil { 150 | h.t.Errorf("error while shutting down service %d: %v", id, err) 151 | } 152 | } 153 | 154 | // RestartService "restarts" a service by creating a new instance and 155 | // connecting it to peers. 156 | func (h *Harness) RestartService(id int) { 157 | if h.alive[id] { 158 | log.Fatalf("id=%d is alive in RestartService", id) 159 | } 160 | tlog("Restart %d", id) 161 | 162 | peerIds := make([]int, 0) 163 | for p := range h.n { 164 | if p != id { 165 | peerIds = append(peerIds, p) 166 | } 167 | } 168 | ready := make(chan any) 169 | h.kvCluster[id] = kvservice.New(id, peerIds, h.storage[id], ready) 170 | h.kvCluster[id].ServeHTTP(14200 + id) 171 | 172 | h.ReconnectServiceToPeers(id) 173 | close(ready) 174 | h.alive[id] = true 175 | time.Sleep(20 * time.Millisecond) 176 | } 177 | 178 | // DisableHTTPResponsesFromService causes the given service to stop responding 179 | // to HTTP request from clients (though it will still perform the requested 180 | // operations). 181 | func (h *Harness) DisableHTTPResponsesFromService(id int) { 182 | tlog("Disabling HTTP responses from %d", id) 183 | h.kvCluster[id].ToggleHTTPResponsesEnabled(false) 184 | } 185 | 186 | func (h *Harness) Shutdown() { 187 | for i := range h.n { 188 | h.kvCluster[i].DisconnectFromAllRaftPeers() 189 | h.connected[i] = false 190 | } 191 | 192 | // These help the HTTP server in KVService shut down properly. 193 | http.DefaultClient.CloseIdleConnections() 194 | h.ctxCancel() 195 | 196 | for i := range h.n { 197 | if h.alive[i] { 198 | h.alive[i] = false 199 | if err := h.kvCluster[i].Shutdown(); err != nil { 200 | h.t.Errorf("error while shutting down service %d: %v", i, err) 201 | } 202 | } 203 | } 204 | } 205 | 206 | // NewClient creates a new client that will contact all the existing live 207 | // services. 208 | func (h *Harness) NewClient() *kvclient.KVClient { 209 | var addrs []string 210 | for i := range h.n { 211 | if h.alive[i] { 212 | addrs = append(addrs, h.kvServiceAddrs[i]) 213 | } 214 | } 215 | return kvclient.New(addrs) 216 | } 217 | 218 | // NewClientWithRandomAddrsOrder creates a new client that will contact all 219 | // the existing live services, but in a randomized order. 220 | func (h *Harness) NewClientWithRandomAddrsOrder() *kvclient.KVClient { 221 | var addrs []string 222 | for i := range h.n { 223 | if h.alive[i] { 224 | addrs = append(addrs, h.kvServiceAddrs[i]) 225 | } 226 | } 227 | rand.Shuffle(len(addrs), func(i, j int) { 228 | addrs[i], addrs[j] = addrs[j], addrs[i] 229 | }) 230 | return kvclient.New(addrs) 231 | } 232 | 233 | // NewClientSingleService creates a new client that will contact only a single 234 | // service (specified by id). Note that if this isn't the leader, the client 235 | // may get stuck in retries. 236 | func (h *Harness) NewClientSingleService(id int) *kvclient.KVClient { 237 | addrs := h.kvServiceAddrs[id : id+1] 238 | return kvclient.New(addrs) 239 | } 240 | 241 | // CheckSingleLeader checks that only a single server thinks it's the leader. 242 | // Returns the leader's id in the Raft cluster. It retries serveral times if 243 | // no leader is identified yet, so this method is also useful to check that 244 | // the Raft cluster settled on a leader and is ready to execute commands. 245 | func (h *Harness) CheckSingleLeader() int { 246 | for r := 0; r < 8; r++ { 247 | leaderId := -1 248 | for i := range h.n { 249 | if h.connected[i] && h.kvCluster[i].IsLeader() { 250 | if leaderId < 0 { 251 | leaderId = i 252 | } else { 253 | h.t.Fatalf("both %d and %d think they're leaders", leaderId, i) 254 | } 255 | } 256 | } 257 | if leaderId >= 0 { 258 | return leaderId 259 | } 260 | time.Sleep(150 * time.Millisecond) 261 | } 262 | 263 | h.t.Fatalf("leader not found") 264 | return -1 265 | } 266 | 267 | // CheckPut sends a Put request through client c, and checks there are no 268 | // errors. Returns (prevValue, keyFound). 269 | func (h *Harness) CheckPut(c *kvclient.KVClient, key, value string) (string, bool) { 270 | ctx, cancel := context.WithTimeout(h.ctx, 500*time.Millisecond) 271 | defer cancel() 272 | pv, f, err := c.Put(ctx, key, value) 273 | if err != nil { 274 | h.t.Error(err) 275 | } 276 | return pv, f 277 | } 278 | 279 | // CheckGet sends a Get request through client c, and checks there are 280 | // no errors; it also checks that the key was found, and has the expected 281 | // value. 282 | func (h *Harness) CheckGet(c *kvclient.KVClient, key string, wantValue string) { 283 | ctx, cancel := context.WithTimeout(h.ctx, 500*time.Millisecond) 284 | defer cancel() 285 | gv, f, err := c.Get(ctx, key) 286 | if err != nil { 287 | h.t.Error(err) 288 | } 289 | if !f { 290 | h.t.Errorf("got found=false, want true for key=%s", key) 291 | } 292 | if gv != wantValue { 293 | h.t.Errorf("got value=%v, want %v", gv, wantValue) 294 | } 295 | } 296 | 297 | // CheckCAS sends a CAS request through client c, and checks there are no 298 | // errors. Returns (prevValue, keyFound). 299 | func (h *Harness) CheckCAS(c *kvclient.KVClient, key, compare, value string) (string, bool) { 300 | ctx, cancel := context.WithTimeout(h.ctx, 500*time.Millisecond) 301 | defer cancel() 302 | pv, f, err := c.CAS(ctx, key, compare, value) 303 | if err != nil { 304 | h.t.Error(err) 305 | } 306 | return pv, f 307 | } 308 | 309 | // CheckGetNotFound sends a Get request through client c, and checks there are 310 | // no errors, but the key isn't found in the service. 311 | func (h *Harness) CheckGetNotFound(c *kvclient.KVClient, key string) { 312 | ctx, cancel := context.WithTimeout(h.ctx, 500*time.Millisecond) 313 | defer cancel() 314 | _, f, err := c.Get(ctx, key) 315 | if err != nil { 316 | h.t.Error(err) 317 | } 318 | if f { 319 | h.t.Errorf("got found=true, want false for key=%s", key) 320 | } 321 | } 322 | 323 | // CheckGetTimesOut checks that a Get request with the given client will 324 | // time out if we set up a context with a deadline, because the client is 325 | // unable to get the service to commit its command. 326 | func (h *Harness) CheckGetTimesOut(c *kvclient.KVClient, key string) { 327 | ctx, cancel := context.WithTimeout(context.Background(), 300*time.Millisecond) 328 | defer cancel() 329 | _, _, err := c.Get(ctx, key) 330 | if err == nil || !strings.Contains(err.Error(), "deadline exceeded") { 331 | h.t.Errorf("got err %v; want 'deadline exceeded'", err) 332 | } 333 | } 334 | 335 | func tlog(format string, a ...any) { 336 | format = "[TEST] " + format 337 | log.Printf(format, a...) 338 | } 339 | -------------------------------------------------------------------------------- /part5kv/api/api.go: -------------------------------------------------------------------------------- 1 | // REST API data types for the KV service. 2 | // 3 | // Eli Bendersky [https://eli.thegreenplace.net] 4 | // This code is in the public domain. 5 | package api 6 | 7 | // Defines the data structures used in the REST API between kvservice and 8 | // clients. These structs are JSON-encoded into the body of HTTP requests 9 | // and responses passed between services and clients. 10 | // Uses bespoke ResponseStatus per response instead of HTTP status 11 | // codes because some statuses like "not leader" or "failed commit" don't have a 12 | // good match in standard HTTP status codes. 13 | // Each request type has fields of unique identification of requests. 14 | 15 | type Response interface { 16 | Status() ResponseStatus 17 | } 18 | 19 | type PutRequest struct { 20 | Key string 21 | Value string 22 | 23 | ClientID int64 24 | RequestID int64 25 | } 26 | 27 | type PutResponse struct { 28 | RespStatus ResponseStatus 29 | KeyFound bool 30 | PrevValue string 31 | } 32 | 33 | func (pr *PutResponse) Status() ResponseStatus { 34 | return pr.RespStatus 35 | } 36 | 37 | type AppendRequest struct { 38 | Key string 39 | Value string 40 | 41 | ClientID int64 42 | RequestID int64 43 | } 44 | 45 | type AppendResponse struct { 46 | RespStatus ResponseStatus 47 | KeyFound bool 48 | PrevValue string 49 | } 50 | 51 | func (ar *AppendResponse) Status() ResponseStatus { 52 | return ar.RespStatus 53 | } 54 | 55 | type GetRequest struct { 56 | Key string 57 | 58 | ClientID int64 59 | RequestID int64 60 | } 61 | 62 | type GetResponse struct { 63 | RespStatus ResponseStatus 64 | KeyFound bool 65 | Value string 66 | } 67 | 68 | func (gr *GetResponse) Status() ResponseStatus { 69 | return gr.RespStatus 70 | } 71 | 72 | type CASRequest struct { 73 | Key string 74 | CompareValue string 75 | Value string 76 | 77 | ClientID int64 78 | RequestID int64 79 | } 80 | 81 | type CASResponse struct { 82 | RespStatus ResponseStatus 83 | KeyFound bool 84 | PrevValue string 85 | } 86 | 87 | func (cr *CASResponse) Status() ResponseStatus { 88 | return cr.RespStatus 89 | } 90 | 91 | type ResponseStatus int 92 | 93 | const ( 94 | StatusInvalid ResponseStatus = iota 95 | StatusOK 96 | StatusNotLeader 97 | StatusFailedCommit 98 | StatusDuplicateRequest 99 | ) 100 | 101 | var responseName = map[ResponseStatus]string{ 102 | StatusInvalid: "invalid", 103 | StatusOK: "OK", 104 | StatusNotLeader: "NotLeader", 105 | StatusFailedCommit: "FailedCommit", 106 | StatusDuplicateRequest: "DuplicateRequest", 107 | } 108 | 109 | func (rs ResponseStatus) String() string { 110 | return responseName[rs] 111 | } 112 | -------------------------------------------------------------------------------- /part5kv/dochecks.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ex 3 | 4 | go vet ./... 5 | staticcheck ./... 6 | -------------------------------------------------------------------------------- /part5kv/dotest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | set -e 4 | 5 | logfile=~/temp/rlog 6 | 7 | go test -v -race -run $@ |& tee ${logfile} 8 | 9 | go run ../tools/raft-testlog-viz/main.go < ${logfile} 10 | -------------------------------------------------------------------------------- /part5kv/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/eliben/raft/part5kv 2 | 3 | go 1.23.1 4 | 5 | replace github.com/eliben/raft/part3/raft => ../part3/raft/ 6 | 7 | require github.com/eliben/raft/part3/raft v0.0.0-00010101000000-000000000000 8 | 9 | require github.com/fortytw2/leaktest v1.3.0 // indirect 10 | -------------------------------------------------------------------------------- /part5kv/go.sum: -------------------------------------------------------------------------------- 1 | github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw= 2 | github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= 3 | -------------------------------------------------------------------------------- /part5kv/kvclient/kvclient.go: -------------------------------------------------------------------------------- 1 | // KV client library. Go code that wants to talk to the KV service should use 2 | // this client instead of sending REST requests directly. 3 | // 4 | // Eli Bendersky [https://eli.thegreenplace.net] 5 | // This code is in the public domain. 6 | package kvclient 7 | 8 | import ( 9 | "bytes" 10 | "context" 11 | "encoding/json" 12 | "fmt" 13 | "log" 14 | "net/http" 15 | "sync/atomic" 16 | "time" 17 | 18 | "github.com/eliben/raft/part5kv/api" 19 | ) 20 | 21 | // DebugClient enables debug output 22 | const DebugClient = 1 23 | 24 | type KVClient struct { 25 | addrs []string 26 | 27 | // assumedLeader is the index (in addrs) of the service we assume is the 28 | // current leader. It is zero-initialized by default, without loss of 29 | // generality. 30 | assumedLeader int 31 | 32 | // clientID is a unique identifier for a client; it's managed internally 33 | // in this file by incrementing the clientCount global. 34 | clientID int64 35 | 36 | // requestID is a unique identifier for a request a specific client makes; 37 | // each client manages its own requestID, and increments it monotonically and 38 | // atomically each time the user asks to send a new request. 39 | requestID atomic.Int64 40 | } 41 | 42 | // New creates a new KVClient. serviceAddrs is the addresses (each a string 43 | // with the format "host:port") of the services in the KVService cluster the 44 | // client will contact. 45 | func New(serviceAddrs []string) *KVClient { 46 | return &KVClient{ 47 | addrs: serviceAddrs, 48 | assumedLeader: 0, 49 | clientID: clientCount.Add(1), 50 | } 51 | } 52 | 53 | // clientCount is used to assign unique identifiers to distinct clients. 54 | var clientCount atomic.Int64 55 | 56 | // Put the key=value pair into the store. Returns an error, or 57 | // (prevValue, keyFound, false), where keyFound specifies whether the key was 58 | // found in the store prior to this command, and prevValue is its previous 59 | // value if it was found. 60 | func (c *KVClient) Put(ctx context.Context, key string, value string) (string, bool, error) { 61 | // Each request gets a unique ID, which is a combination of client ID and 62 | // request ID within this client. The struct with this ID is passed to s.send, 63 | // which may retry the request multiple times until succeeding. The unique ID 64 | // within each request helps the service de-duplicate requests that may 65 | // arrive multiple times due to network issues and client retries. 66 | putReq := api.PutRequest{ 67 | Key: key, 68 | Value: value, 69 | ClientID: c.clientID, 70 | RequestID: c.requestID.Add(1), 71 | } 72 | var putResp api.PutResponse 73 | err := c.send(ctx, "put", putReq, &putResp) 74 | return putResp.PrevValue, putResp.KeyFound, err 75 | } 76 | 77 | // Append the value to the key in the store. Returns an error, or 78 | // (prevValue, keyFound, false), where keyFound specifies whether the key was 79 | // found in the store prior to this command, and prevValue is its previous 80 | // value if it was found. 81 | func (c *KVClient) Append(ctx context.Context, key string, value string) (string, bool, error) { 82 | appendReq := api.AppendRequest{ 83 | Key: key, 84 | Value: value, 85 | ClientID: c.clientID, 86 | RequestID: c.requestID.Add(1), 87 | } 88 | var appendResp api.AppendResponse 89 | err := c.send(ctx, "append", appendReq, &appendResp) 90 | return appendResp.PrevValue, appendResp.KeyFound, err 91 | } 92 | 93 | // Get the value of key from the store. Returns an error, or 94 | // (value, found, false), where found specifies whether the key was found in 95 | // the store, and value is its value. 96 | func (c *KVClient) Get(ctx context.Context, key string) (string, bool, error) { 97 | getReq := api.GetRequest{ 98 | Key: key, 99 | ClientID: c.clientID, 100 | RequestID: c.requestID.Add(1), 101 | } 102 | var getResp api.GetResponse 103 | err := c.send(ctx, "get", getReq, &getResp) 104 | return getResp.Value, getResp.KeyFound, err 105 | } 106 | 107 | // CAS operation: if prev value of key == compare, assign new value. Returns an 108 | // error, or (prevValue, keyFound, false), where keyFound specifies whether the 109 | // key was found in the store prior to this command, and prevValue is its 110 | // previous value if it was found. 111 | func (c *KVClient) CAS(ctx context.Context, key string, compare string, value string) (string, bool, error) { 112 | casReq := api.CASRequest{ 113 | Key: key, 114 | CompareValue: compare, 115 | Value: value, 116 | ClientID: c.clientID, 117 | RequestID: c.requestID.Add(1), 118 | } 119 | var casResp api.CASResponse 120 | err := c.send(ctx, "cas", casReq, &casResp) 121 | return casResp.PrevValue, casResp.KeyFound, err 122 | } 123 | 124 | func (c *KVClient) send(ctx context.Context, route string, req any, resp api.Response) error { 125 | // This loop rotates through the list of service addresses until we get 126 | // a response that indicates we've found the leader of the cluster. It 127 | // starts at c.assumedLeader 128 | FindLeader: 129 | for { 130 | // There's a two-level context tree here: we have the user context - ctx, 131 | // and we create our own context to impose a timeout on each request to 132 | // the service. If our timeout expires, we move on to try the next service. 133 | // In the meantime, we have to keep an eye on the user context - if that's 134 | // canceled at any time (due to timeout, explicit cancellation, etc), we 135 | // bail out. 136 | retryCtx, retryCtxCancel := context.WithTimeout(ctx, 50*time.Millisecond) 137 | path := fmt.Sprintf("http://%s/%s/", c.addrs[c.assumedLeader], route) 138 | 139 | c.clientlog("sending %#v to %v", req, path) 140 | if err := sendJSONRequest(retryCtx, path, req, resp); err != nil { 141 | // Since the contexts are nested, the order of testing here matters. 142 | // We have to check the parent context first - if it's done, it means 143 | // we have to return. 144 | if contextDone(ctx) { 145 | c.clientlog("parent context done; bailing out") 146 | retryCtxCancel() 147 | return err 148 | } else if contextDeadlineExceeded(retryCtx) { 149 | // If the parent context is not done, but our retry context is done, 150 | // it's time to retry a different service. 151 | c.clientlog("timed out: will try next address") 152 | c.assumedLeader = (c.assumedLeader + 1) % len(c.addrs) 153 | retryCtxCancel() 154 | continue FindLeader 155 | } 156 | retryCtxCancel() 157 | return err 158 | } 159 | c.clientlog("received response %#v", resp) 160 | 161 | // No context/timeout on this request - we've actually received a response. 162 | switch resp.Status() { 163 | case api.StatusNotLeader: 164 | c.clientlog("not leader: will try next address") 165 | c.assumedLeader = (c.assumedLeader + 1) % len(c.addrs) 166 | retryCtxCancel() 167 | continue FindLeader 168 | case api.StatusOK: 169 | retryCtxCancel() 170 | return nil 171 | case api.StatusFailedCommit: 172 | retryCtxCancel() 173 | return fmt.Errorf("commit failed; please retry") 174 | case api.StatusDuplicateRequest: 175 | retryCtxCancel() 176 | return fmt.Errorf("this request was already completed") 177 | default: 178 | panic("unreachable") 179 | } 180 | } 181 | } 182 | 183 | // clientlog logs a debugging message if DebugClient > 0 184 | func (c *KVClient) clientlog(format string, args ...any) { 185 | if DebugClient > 0 { 186 | clientName := fmt.Sprintf("[client%03d]", c.clientID) 187 | format = clientName + " " + format 188 | log.Printf(format, args...) 189 | } 190 | } 191 | 192 | func sendJSONRequest(ctx context.Context, path string, reqData any, respData any) error { 193 | body := new(bytes.Buffer) 194 | enc := json.NewEncoder(body) 195 | if err := enc.Encode(reqData); err != nil { 196 | return fmt.Errorf("JSON-encoding request data: %w", err) 197 | } 198 | 199 | req, err := http.NewRequestWithContext(ctx, http.MethodPost, path, body) 200 | if err != nil { 201 | return fmt.Errorf("creating HTTP request: %w", err) 202 | } 203 | req.Header.Add("Content-Type", "application/json") 204 | 205 | resp, err := http.DefaultClient.Do(req) 206 | if err != nil { 207 | return err 208 | } 209 | 210 | dec := json.NewDecoder(resp.Body) 211 | if err := dec.Decode(respData); err != nil { 212 | return fmt.Errorf("JSON-decoding response data: %w", err) 213 | } 214 | return nil 215 | } 216 | 217 | // contextDone checks whether ctx is done for any reason. It doesn't block. 218 | func contextDone(ctx context.Context) bool { 219 | select { 220 | case <-ctx.Done(): 221 | return true 222 | default: 223 | } 224 | return false 225 | } 226 | 227 | // contextDeadlineExceeded checks whether ctx is done because of an exceeded 228 | // deadline. It doesn't block. 229 | func contextDeadlineExceeded(ctx context.Context) bool { 230 | select { 231 | case <-ctx.Done(): 232 | if ctx.Err() == context.DeadlineExceeded { 233 | return true 234 | } 235 | default: 236 | } 237 | return false 238 | } 239 | -------------------------------------------------------------------------------- /part5kv/kvservice/command.go: -------------------------------------------------------------------------------- 1 | // Command type: stored in Raft by the KV service. 2 | // 3 | // Eli Bendersky [https://eli.thegreenplace.net] 4 | // This code is in the public domain. 5 | package kvservice 6 | 7 | // Command is the concrete command type KVService submits to the Raft log to 8 | // manage its state machine. It's also used to carry the results of the command 9 | // after it's applied to the state machine. These are the supported commands: 10 | // 11 | // CommandGet: queries a key's value 12 | // 13 | // * Key is the key to get, Value is ignored 14 | // * CompareValue is ignored 15 | // * ResultFound is true iff Key was found in the store 16 | // * ResultValue is the value, if Key was found in the store 17 | // 18 | // CommandPut: assigns value to the key 19 | // 20 | // * Key,Value are the pair to assign (store[key]=value) 21 | // * CompareValue is ignored 22 | // * ResultFound is true iff Key was previously found in the store 23 | // * ResultValue is the old value of Key, if it was previously found 24 | // 25 | // CommandAppend: appends to a key's value 26 | // 27 | // If Key wasn't previously in the store, it's created with the given 28 | // Value (as if it was present with an empty value before this operation). 29 | // 30 | // * Performs Store[Key] = Store[Key] + Value, where "+" is a string append 31 | // * CompareValue is ignored 32 | // * ResultFound is true iff the Key was found in the store 33 | // * ResultValue is the old value of Key, before the append 34 | // 35 | // CommandCAS: atomic compare-and-swap, performs: 36 | // 37 | // if Store[Key] == CompareValue { 38 | // Store[Key] = Value 39 | // } else { 40 | // nop 41 | // } 42 | // 43 | // * Key is the key this command acts on 44 | // * CompareValue is the previous value the command compares to 45 | // * Value is the new value the command assigns 46 | // * ResultFound is true iff Key was previously found in the store 47 | // * ResultValue is the old value of Key, if it was previously found 48 | type Command struct { 49 | Kind CommandKind 50 | 51 | Key, Value string 52 | 53 | CompareValue string 54 | 55 | ResultValue string 56 | ResultFound bool 57 | 58 | // ServiceID is the Raft ID of the service submitting this command. 59 | ServiceID int 60 | 61 | // ClientID and RequestID uniquely identify the request+client. 62 | ClientID, RequestID int64 63 | 64 | // IsDuplicate is used to mark the command as a duplicate by the updater. When 65 | // the updater notices a command that has a client+request ID that has already 66 | // been executed, the command is not applied to the datastore; instead, 67 | // IsDuplicate is set to true. 68 | IsDuplicate bool 69 | } 70 | 71 | type CommandKind int 72 | 73 | const ( 74 | CommandInvalid CommandKind = iota 75 | CommandGet 76 | CommandPut 77 | CommandAppend 78 | CommandCAS 79 | ) 80 | 81 | var commandName = map[CommandKind]string{ 82 | CommandInvalid: "invalid", 83 | CommandGet: "get", 84 | CommandPut: "put", 85 | CommandAppend: "append", 86 | CommandCAS: "cas", 87 | } 88 | 89 | func (ck CommandKind) String() string { 90 | return commandName[ck] 91 | } 92 | -------------------------------------------------------------------------------- /part5kv/kvservice/datastore.go: -------------------------------------------------------------------------------- 1 | // Basic in-memory datastore backing the KV service. 2 | // 3 | // Eli Bendersky [https://eli.thegreenplace.net] 4 | // This code is in the public domain. 5 | package kvservice 6 | 7 | import "sync" 8 | 9 | // DataStore is a simple, concurrency-safe key-value store used as a backend 10 | // for kvservice. 11 | type DataStore struct { 12 | sync.Mutex 13 | data map[string]string 14 | } 15 | 16 | func NewDataStore() *DataStore { 17 | return &DataStore{ 18 | data: make(map[string]string), 19 | } 20 | } 21 | 22 | // Get fetches the value of key from the datastore and returns (v, true) if 23 | // it was found or ("", false) otherwise. 24 | func (ds *DataStore) Get(key string) (string, bool) { 25 | ds.Lock() 26 | defer ds.Unlock() 27 | 28 | value, ok := ds.data[key] 29 | return value, ok 30 | } 31 | 32 | // Put assigns datastore[key]=value, and returns (v, true) if the key was 33 | // previously in the store and its value was v, or ("", false) otherwise. 34 | func (ds *DataStore) Put(key, value string) (string, bool) { 35 | ds.Lock() 36 | defer ds.Unlock() 37 | 38 | v, ok := ds.data[key] 39 | ds.data[key] = value 40 | return v, ok 41 | } 42 | 43 | // Append performs an append: 44 | // If key exists and its previous value is v, its value is updated to 45 | // v+value and (v, true) is returned. 46 | // If key doesn't exist, then assigns datastore[key]=value and ("", false) is 47 | // returned. 48 | func (ds *DataStore) Append(key, value string) (string, bool) { 49 | ds.Lock() 50 | defer ds.Unlock() 51 | 52 | v, ok := ds.data[key] 53 | ds.data[key] += value 54 | return v, ok 55 | } 56 | 57 | // CAS performs an atomic compare-and-swap: 58 | // if key exists and its prev value == compare, write value, else nop 59 | // The prev value and whether the key existed in the store is returned. 60 | func (ds *DataStore) CAS(key, compare, value string) (string, bool) { 61 | ds.Lock() 62 | defer ds.Unlock() 63 | 64 | prevValue, ok := ds.data[key] 65 | if ok && prevValue == compare { 66 | ds.data[key] = value 67 | } 68 | return prevValue, ok 69 | } 70 | -------------------------------------------------------------------------------- /part5kv/kvservice/datastore_test.go: -------------------------------------------------------------------------------- 1 | package kvservice 2 | 3 | import "testing" 4 | 5 | func checkPutPrev(t *testing.T, ds *DataStore, k string, v string, prev string, hasPrev bool) { 6 | t.Helper() 7 | prevVal, ok := ds.Put(k, v) 8 | if hasPrev != ok || prevVal != prev { 9 | t.Errorf("prevVal=%s, ok=%v; want %s,%v", prevVal, ok, prev, hasPrev) 10 | } 11 | } 12 | 13 | func checkGet(t *testing.T, ds *DataStore, k string, v string, found bool) { 14 | t.Helper() 15 | gotV, ok := ds.Get(k) 16 | if found != ok || v != gotV { 17 | t.Errorf("gotV=%s, ok=%v; want %s,%v", gotV, ok, v, found) 18 | } 19 | } 20 | 21 | func checkCAS(t *testing.T, ds *DataStore, k string, comp string, v string, prev string, found bool) { 22 | t.Helper() 23 | gotPrev, gotFound := ds.CAS(k, comp, v) 24 | if found != gotFound || prev != gotPrev { 25 | t.Errorf("gotPrev=%s, gotFound=%v; want %s,%v", gotPrev, gotFound, prev, found) 26 | } 27 | } 28 | 29 | func TestGetPut(t *testing.T) { 30 | ds := NewDataStore() 31 | 32 | checkGet(t, ds, "foo", "", false) 33 | checkPutPrev(t, ds, "foo", "bar", "", false) 34 | checkGet(t, ds, "foo", "bar", true) 35 | checkPutPrev(t, ds, "foo", "baz", "bar", true) 36 | checkGet(t, ds, "foo", "baz", true) 37 | checkPutPrev(t, ds, "nix", "hard", "", false) 38 | } 39 | 40 | func TestCASBasic(t *testing.T) { 41 | ds := NewDataStore() 42 | ds.Put("foo", "bar") 43 | ds.Put("sun", "beam") 44 | 45 | // CAS replace existing value 46 | checkCAS(t, ds, "foo", "mex", "bro", "bar", true) 47 | checkCAS(t, ds, "foo", "bar", "bro", "bar", true) 48 | checkGet(t, ds, "foo", "bro", true) 49 | 50 | // CAS when key not found 51 | checkCAS(t, ds, "goa", "mm", "vv", "", false) 52 | checkGet(t, ds, "goa", "", false) 53 | 54 | // ... and now this key assigned 55 | ds.Put("goa", "tva") 56 | checkCAS(t, ds, "goa", "mm", "vv", "tva", true) 57 | checkCAS(t, ds, "goa", "mm", "vv", "tva", true) 58 | } 59 | 60 | func TestCASConcurrent(t *testing.T) { 61 | // Run this with -race 62 | ds := NewDataStore() 63 | ds.Put("foo", "bar") 64 | ds.Put("sun", "beam") 65 | 66 | go func() { 67 | for range 2000 { 68 | ds.CAS("foo", "bar", "baz") 69 | } 70 | }() 71 | go func() { 72 | for range 2000 { 73 | ds.CAS("foo", "baz", "bar") 74 | } 75 | }() 76 | 77 | v, _ := ds.Get("foo") 78 | if v != "bar" && v != "baz" { 79 | t.Errorf("got v=%s, want bar or baz", v) 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /part5kv/kvservice/json.go: -------------------------------------------------------------------------------- 1 | // JSON utilities for the KV service. 2 | // 3 | // Eli Bendersky [https://eli.thegreenplace.net] 4 | // This code is in the public domain. 5 | package kvservice 6 | 7 | import ( 8 | "encoding/json" 9 | "fmt" 10 | "mime" 11 | "net/http" 12 | ) 13 | 14 | // readRequestJSON expects req to have a JSON content type with a body that 15 | // contains a JSON-encoded value complying with the underlying type of target. 16 | // It populates target, or returns an error. 17 | func readRequestJSON(req *http.Request, target any) error { 18 | contentType := req.Header.Get("Content-Type") 19 | mediaType, _, err := mime.ParseMediaType(contentType) 20 | if err != nil { 21 | return err 22 | } 23 | if mediaType != "application/json" { 24 | return fmt.Errorf("expect application/json Content-Type, got %s", mediaType) 25 | } 26 | 27 | dec := json.NewDecoder(req.Body) 28 | dec.DisallowUnknownFields() 29 | return dec.Decode(target) 30 | } 31 | 32 | // renderJSON renders 'v' as JSON and writes it as a response into w. 33 | func renderJSON(w http.ResponseWriter, v any) { 34 | js, err := json.Marshal(v) 35 | if err != nil { 36 | http.Error(w, err.Error(), http.StatusInternalServerError) 37 | return 38 | } 39 | w.Header().Set("Content-Type", "application/json") 40 | w.Write(js) 41 | } 42 | -------------------------------------------------------------------------------- /part5kv/kvservice/kvservice.go: -------------------------------------------------------------------------------- 1 | // KV service based on Raft - main implementation file. 2 | // 3 | // Eli Bendersky [https://eli.thegreenplace.net] 4 | // This code is in the public domain. 5 | package kvservice 6 | 7 | import ( 8 | "context" 9 | "encoding/gob" 10 | "fmt" 11 | "log" 12 | "net" 13 | "net/http" 14 | "sync" 15 | "sync/atomic" 16 | "time" 17 | 18 | "github.com/eliben/raft/part3/raft" 19 | "github.com/eliben/raft/part5kv/api" 20 | ) 21 | 22 | const DebugKV = 1 23 | 24 | type KVService struct { 25 | sync.Mutex 26 | 27 | // id is the service ID in a Raft cluster. 28 | id int 29 | 30 | // rs is the Raft server that contains a CM 31 | rs *raft.Server 32 | 33 | // commitChan is the commit channel passed to the Raft server; when commands 34 | // are committed, they're sent on this channel. 35 | commitChan chan raft.CommitEntry 36 | 37 | // commitSubs are the commit subscriptions currently active in this service. 38 | // See the createCommitSubscription method for more details. 39 | commitSubs map[int]chan Command 40 | 41 | // ds is the underlying data store implementing the KV DB. 42 | ds *DataStore 43 | 44 | // srv is the HTTP server exposed by the service to the external world. 45 | srv *http.Server 46 | 47 | // lastRequestIDPerClient helps de-duplicate client requests. It stores the 48 | // last request ID that was applied by the updater per client; the assumption 49 | // is that client IDs are unique (keys in this map), and for each client the 50 | // requests IDs (values in this map) are unique and monotonically increasing. 51 | lastRequestIDPerClient map[int64]int64 52 | 53 | // delayNextHTTPResponse will be on when the service was requested to 54 | // delay its next HTTP response to the client. This flips back to off after 55 | // use. 56 | delayNextHTTPResponse atomic.Bool 57 | } 58 | 59 | // New creates a new KVService 60 | // 61 | // - id: this service's ID within its Raft cluster 62 | // - peerIds: the IDs of the other Raft peers in the cluster 63 | // - storage: a raft.Storage implementation the service can use for 64 | // durable storage to persist its state. 65 | // - readyChan: notification channel that has to be closed when the Raft 66 | // cluster is ready (all peers are up and connected to each other). 67 | func New(id int, peerIds []int, storage raft.Storage, readyChan <-chan any) *KVService { 68 | gob.Register(Command{}) 69 | commitChan := make(chan raft.CommitEntry) 70 | 71 | // raft.Server handles the Raft RPCs in the cluster; after Serve is called, 72 | // it's ready to accept RPC connections from peers. 73 | rs := raft.NewServer(id, peerIds, storage, readyChan, commitChan) 74 | rs.Serve() 75 | kvs := &KVService{ 76 | id: id, 77 | rs: rs, 78 | commitChan: commitChan, 79 | ds: NewDataStore(), 80 | commitSubs: make(map[int]chan Command), 81 | lastRequestIDPerClient: make(map[int64]int64), 82 | } 83 | 84 | kvs.runUpdater() 85 | return kvs 86 | } 87 | 88 | // IsLeader checks if kvs thinks it's the leader in the Raft cluster. Only 89 | // use this for testin and debugging. 90 | func (kvs *KVService) IsLeader() bool { 91 | return kvs.rs.IsLeader() 92 | } 93 | 94 | // ServeHTTP starts serving the KV REST API on the given TCP port. This 95 | // function does not block; it fires up the HTTP server and returns. To properly 96 | // shut down the server, call the Shutdown method. 97 | func (kvs *KVService) ServeHTTP(port int) { 98 | if kvs.srv != nil { 99 | panic("ServeHTTP called with existing server") 100 | } 101 | mux := http.NewServeMux() 102 | mux.HandleFunc("POST /get/", kvs.handleGet) 103 | mux.HandleFunc("POST /put/", kvs.handlePut) 104 | mux.HandleFunc("POST /append/", kvs.handleAppend) 105 | mux.HandleFunc("POST /cas/", kvs.handleCAS) 106 | 107 | kvs.srv = &http.Server{ 108 | Addr: fmt.Sprintf(":%d", port), 109 | Handler: mux, 110 | } 111 | 112 | go func() { 113 | kvs.kvlog("serving HTTP on %s", kvs.srv.Addr) 114 | if err := kvs.srv.ListenAndServe(); err != http.ErrServerClosed { 115 | log.Fatal(err) 116 | } 117 | kvs.srv = nil 118 | }() 119 | } 120 | 121 | // Shutdown performs a proper shutdown of the service: shuts down the Raft RPC 122 | // server, and shuts down the main HTTP service. It only returns once shutdown 123 | // is complete. 124 | // Note: DisconnectFromRaftPeers on all peers in the cluster should be done 125 | // before Shutdown is called. 126 | func (kvs *KVService) Shutdown() error { 127 | kvs.kvlog("shutting down Raft server") 128 | kvs.rs.Shutdown() 129 | kvs.kvlog("closing commitChan") 130 | close(kvs.commitChan) 131 | 132 | if kvs.srv != nil { 133 | kvs.kvlog("shutting down HTTP server") 134 | ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond) 135 | defer cancel() 136 | kvs.srv.Shutdown(ctx) 137 | kvs.kvlog("HTTP shutdown complete") 138 | return nil 139 | } 140 | 141 | return nil 142 | } 143 | 144 | // DelayNextHTTPResponse instructs the service to delay the response to the 145 | // next HTTP request from the client. The service still acts on the request 146 | // as usual, just the HTTP response is delayed. This only applies to a single 147 | // response - the bit flips back to off after use. 148 | func (kvs *KVService) DelayNextHTTPResponse() { 149 | kvs.delayNextHTTPResponse.Store(true) 150 | } 151 | 152 | func (kvs *KVService) sendHTTPResponse(w http.ResponseWriter, v any) { 153 | if kvs.delayNextHTTPResponse.Load() { 154 | kvs.delayNextHTTPResponse.Store(false) 155 | time.Sleep(300 * time.Millisecond) 156 | } 157 | kvs.kvlog("sending response %#v", v) 158 | renderJSON(w, v) 159 | } 160 | 161 | func (kvs *KVService) handlePut(w http.ResponseWriter, req *http.Request) { 162 | pr := &api.PutRequest{} 163 | if err := readRequestJSON(req, pr); err != nil { 164 | http.Error(w, err.Error(), http.StatusBadRequest) 165 | return 166 | } 167 | kvs.kvlog("HTTP PUT %v", pr) 168 | 169 | // Submit a command into the Raft server; this is the state change in the 170 | // replicated state machine built on top of the Raft log. 171 | cmd := Command{ 172 | Kind: CommandPut, 173 | Key: pr.Key, 174 | Value: pr.Value, 175 | ServiceID: kvs.id, 176 | ClientID: pr.ClientID, 177 | RequestID: pr.RequestID, 178 | } 179 | logIndex := kvs.rs.Submit(cmd) 180 | // If we're not the Raft leader, send an appropriate status 181 | if logIndex < 0 { 182 | kvs.sendHTTPResponse(w, api.PutResponse{RespStatus: api.StatusNotLeader}) 183 | return 184 | } 185 | 186 | // Subscribe for a commit update for our log index. Then wait for it to 187 | // be delivered. 188 | sub := kvs.createCommitSubscription(logIndex) 189 | 190 | // Wait on the sub channel: the updater will deliver a value when the Raft 191 | // log has a commit at logIndex. To ensure clean shutdown of the service, 192 | // also select on the request context - if the request is canceled, this 193 | // handler aborts without sending data back to the client. 194 | select { 195 | case commitCmd := <-sub: 196 | // If this is our command, all is good! If it's some other server's command, 197 | // this means we lost leadership at some point and should return an error 198 | // to the client. 199 | if commitCmd.ServiceID == kvs.id { 200 | if commitCmd.IsDuplicate { 201 | // If this command is a duplicate, it wasn't executed as a result of 202 | // this request. Notify the client with a special status. 203 | kvs.sendHTTPResponse(w, api.PutResponse{ 204 | RespStatus: api.StatusDuplicateRequest, 205 | }) 206 | } else { 207 | kvs.sendHTTPResponse(w, api.PutResponse{ 208 | RespStatus: api.StatusOK, 209 | KeyFound: commitCmd.ResultFound, 210 | PrevValue: commitCmd.ResultValue, 211 | }) 212 | } 213 | } else { 214 | kvs.sendHTTPResponse(w, api.PutResponse{RespStatus: api.StatusFailedCommit}) 215 | } 216 | case <-req.Context().Done(): 217 | return 218 | } 219 | } 220 | 221 | // The details of these handlers are very similar to handlePut: refer to that 222 | // function for detailed comments. 223 | func (kvs *KVService) handleAppend(w http.ResponseWriter, req *http.Request) { 224 | ar := &api.AppendRequest{} 225 | if err := readRequestJSON(req, ar); err != nil { 226 | http.Error(w, err.Error(), http.StatusBadRequest) 227 | return 228 | } 229 | kvs.kvlog("HTTP APPEND %v", ar) 230 | 231 | cmd := Command{ 232 | Kind: CommandAppend, 233 | Key: ar.Key, 234 | Value: ar.Value, 235 | ServiceID: kvs.id, 236 | ClientID: ar.ClientID, 237 | RequestID: ar.RequestID, 238 | } 239 | logIndex := kvs.rs.Submit(cmd) 240 | if logIndex < 0 { 241 | kvs.sendHTTPResponse(w, api.AppendResponse{RespStatus: api.StatusNotLeader}) 242 | return 243 | } 244 | 245 | sub := kvs.createCommitSubscription(logIndex) 246 | 247 | select { 248 | case commitCmd := <-sub: 249 | if commitCmd.ServiceID == kvs.id { 250 | if commitCmd.IsDuplicate { 251 | kvs.sendHTTPResponse(w, api.AppendResponse{ 252 | RespStatus: api.StatusDuplicateRequest, 253 | }) 254 | } else { 255 | kvs.sendHTTPResponse(w, api.AppendResponse{ 256 | RespStatus: api.StatusOK, 257 | KeyFound: commitCmd.ResultFound, 258 | PrevValue: commitCmd.ResultValue, 259 | }) 260 | } 261 | } else { 262 | kvs.sendHTTPResponse(w, api.AppendResponse{RespStatus: api.StatusFailedCommit}) 263 | } 264 | case <-req.Context().Done(): 265 | return 266 | } 267 | } 268 | 269 | func (kvs *KVService) handleGet(w http.ResponseWriter, req *http.Request) { 270 | gr := &api.GetRequest{} 271 | if err := readRequestJSON(req, gr); err != nil { 272 | http.Error(w, err.Error(), http.StatusBadRequest) 273 | return 274 | } 275 | kvs.kvlog("HTTP GET %v", gr) 276 | 277 | cmd := Command{ 278 | Kind: CommandGet, 279 | Key: gr.Key, 280 | ServiceID: kvs.id, 281 | ClientID: gr.ClientID, 282 | RequestID: gr.RequestID, 283 | } 284 | logIndex := kvs.rs.Submit(cmd) 285 | if logIndex < 0 { 286 | kvs.sendHTTPResponse(w, api.GetResponse{RespStatus: api.StatusNotLeader}) 287 | return 288 | } 289 | 290 | sub := kvs.createCommitSubscription(logIndex) 291 | 292 | select { 293 | case commitCmd := <-sub: 294 | if commitCmd.ServiceID == kvs.id { 295 | if commitCmd.IsDuplicate { 296 | kvs.sendHTTPResponse(w, api.GetResponse{ 297 | RespStatus: api.StatusDuplicateRequest, 298 | }) 299 | } else { 300 | kvs.sendHTTPResponse(w, api.GetResponse{ 301 | RespStatus: api.StatusOK, 302 | KeyFound: commitCmd.ResultFound, 303 | Value: commitCmd.ResultValue, 304 | }) 305 | } 306 | } else { 307 | kvs.sendHTTPResponse(w, api.GetResponse{RespStatus: api.StatusFailedCommit}) 308 | } 309 | case <-req.Context().Done(): 310 | return 311 | } 312 | } 313 | 314 | func (kvs *KVService) handleCAS(w http.ResponseWriter, req *http.Request) { 315 | cr := &api.CASRequest{} 316 | if err := readRequestJSON(req, cr); err != nil { 317 | http.Error(w, err.Error(), http.StatusBadRequest) 318 | return 319 | } 320 | kvs.kvlog("HTTP CAS %v", cr) 321 | 322 | cmd := Command{ 323 | Kind: CommandCAS, 324 | Key: cr.Key, 325 | Value: cr.Value, 326 | CompareValue: cr.CompareValue, 327 | ServiceID: kvs.id, 328 | ClientID: cr.ClientID, 329 | RequestID: cr.RequestID, 330 | } 331 | logIndex := kvs.rs.Submit(cmd) 332 | if logIndex < 0 { 333 | kvs.sendHTTPResponse(w, api.CASResponse{RespStatus: api.StatusNotLeader}) 334 | return 335 | } 336 | 337 | sub := kvs.createCommitSubscription(logIndex) 338 | 339 | select { 340 | case commitCmd := <-sub: 341 | if commitCmd.ServiceID == kvs.id { 342 | if commitCmd.IsDuplicate { 343 | kvs.sendHTTPResponse(w, api.CASResponse{ 344 | RespStatus: api.StatusDuplicateRequest, 345 | }) 346 | } else { 347 | kvs.sendHTTPResponse(w, api.CASResponse{ 348 | RespStatus: api.StatusOK, 349 | KeyFound: commitCmd.ResultFound, 350 | PrevValue: commitCmd.ResultValue, 351 | }) 352 | } 353 | } else { 354 | kvs.sendHTTPResponse(w, api.CASResponse{RespStatus: api.StatusFailedCommit}) 355 | } 356 | case <-req.Context().Done(): 357 | return 358 | } 359 | } 360 | 361 | // runUpdater runs the "updater" goroutine that reads the commit channel 362 | // from Raft and updates the data store; this is the Replicated State Machine 363 | // part of distributed consensus! 364 | // It also notifies subscribers (registered with createCommitSubscription). 365 | func (kvs *KVService) runUpdater() { 366 | go func() { 367 | for entry := range kvs.commitChan { 368 | cmd := entry.Command.(Command) 369 | 370 | // Duplicate command detection. 371 | // Only accept this request if its ID is higher than the last request from 372 | // this client. 373 | lastReqID, ok := kvs.lastRequestIDPerClient[cmd.ClientID] 374 | if ok && lastReqID >= cmd.RequestID { 375 | kvs.kvlog("duplicate request id=%v, from client id=%v", cmd.RequestID, cmd.ClientID) 376 | // Duplicate: this request ID was already applied in the past! 377 | cmd = Command{ 378 | Kind: cmd.Kind, 379 | IsDuplicate: true, 380 | } 381 | } else { 382 | kvs.lastRequestIDPerClient[cmd.ClientID] = cmd.RequestID 383 | 384 | switch cmd.Kind { 385 | case CommandGet: 386 | cmd.ResultValue, cmd.ResultFound = kvs.ds.Get(cmd.Key) 387 | case CommandPut: 388 | cmd.ResultValue, cmd.ResultFound = kvs.ds.Put(cmd.Key, cmd.Value) 389 | case CommandAppend: 390 | cmd.ResultValue, cmd.ResultFound = kvs.ds.Append(cmd.Key, cmd.Value) 391 | case CommandCAS: 392 | cmd.ResultValue, cmd.ResultFound = kvs.ds.CAS(cmd.Key, cmd.CompareValue, cmd.Value) 393 | default: 394 | panic(fmt.Errorf("unexpected command %v", cmd)) 395 | } 396 | } 397 | 398 | // Forward this command to the subscriber interested in its index, and 399 | // close the subscription - it's single-use. 400 | if sub := kvs.popCommitSubscription(entry.Index); sub != nil { 401 | sub <- cmd 402 | close(sub) 403 | } 404 | } 405 | }() 406 | } 407 | 408 | // createCommitSubscription creates a "commit subscription" for a certain log 409 | // index. It's used by client request handlers that submit a command to the 410 | // Raft CM. createCommitSubscription(index) means "I want to be notified when 411 | // an entry is committed at this index in the Raft log". The entry is delivered 412 | // on the returend (buffered) channel by the updater goroutine, after which 413 | // the channel is closed and the subscription is automatically canceled. 414 | func (kvs *KVService) createCommitSubscription(logIndex int) chan Command { 415 | kvs.Lock() 416 | defer kvs.Unlock() 417 | 418 | if _, exists := kvs.commitSubs[logIndex]; exists { 419 | panic(fmt.Sprintf("duplicate commit subscription for logIndex=%d", logIndex)) 420 | } 421 | 422 | ch := make(chan Command, 1) 423 | kvs.commitSubs[logIndex] = ch 424 | return ch 425 | } 426 | 427 | func (kvs *KVService) popCommitSubscription(logIndex int) chan Command { 428 | kvs.Lock() 429 | defer kvs.Unlock() 430 | 431 | ch := kvs.commitSubs[logIndex] 432 | delete(kvs.commitSubs, logIndex) 433 | return ch 434 | } 435 | 436 | // kvlog logs a debugging message if DebugKV > 0 437 | func (kvs *KVService) kvlog(format string, args ...any) { 438 | if DebugKV > 0 { 439 | format = fmt.Sprintf("[kv %d] ", kvs.id) + format 440 | log.Printf(format, args...) 441 | } 442 | } 443 | 444 | // The following functions exist for testing purposes, to simulate faults. 445 | 446 | func (kvs *KVService) ConnectToRaftPeer(peerId int, addr net.Addr) error { 447 | return kvs.rs.ConnectToPeer(peerId, addr) 448 | } 449 | 450 | func (kvs *KVService) DisconnectFromAllRaftPeers() { 451 | kvs.rs.DisconnectAll() 452 | } 453 | 454 | func (kvs *KVService) DisconnectFromRaftPeer(peerId int) error { 455 | return kvs.rs.DisconnectPeer(peerId) 456 | } 457 | 458 | func (kvs *KVService) GetRaftListenAddr() net.Addr { 459 | return kvs.rs.GetListenAddr() 460 | } 461 | -------------------------------------------------------------------------------- /part5kv/system_test.go: -------------------------------------------------------------------------------- 1 | // Test KV services and clients. 2 | // 3 | // It's called a "system" test because it doesn't test a component (like 4 | // KVService) in isolation; rather, the test harness constructs a complete 5 | // system comprising of a cluster of services and some KVClients to exercise it. 6 | // 7 | // Eli Bendersky [https://eli.thegreenplace.net] 8 | // This code is in the public domain. 9 | package main 10 | 11 | import ( 12 | "context" 13 | "fmt" 14 | "sync" 15 | "testing" 16 | "time" 17 | 18 | "github.com/fortytw2/leaktest" 19 | ) 20 | 21 | func sleepMs(n int) { 22 | time.Sleep(time.Duration(n) * time.Millisecond) 23 | } 24 | 25 | func TestSetupHarness(t *testing.T) { 26 | h := NewHarness(t, 3) 27 | defer h.Shutdown() 28 | sleepMs(80) 29 | } 30 | 31 | func TestClientRequestBeforeConsensus(t *testing.T) { 32 | h := NewHarness(t, 3) 33 | defer h.Shutdown() 34 | sleepMs(10) 35 | 36 | // The client will keep cycling between the services until a leader is found. 37 | c1 := h.NewClient() 38 | h.CheckPut(c1, "llave", "cosa") 39 | sleepMs(80) 40 | } 41 | 42 | func TestBasicPutGetSingleClient(t *testing.T) { 43 | // Basic smoke test: send one Put, followed by one Get from a single client. 44 | h := NewHarness(t, 3) 45 | defer h.Shutdown() 46 | h.CheckSingleLeader() 47 | 48 | c1 := h.NewClient() 49 | h.CheckPut(c1, "llave", "cosa") 50 | 51 | h.CheckGet(c1, "llave", "cosa") 52 | sleepMs(80) 53 | } 54 | 55 | func TestPutPrevValue(t *testing.T) { 56 | h := NewHarness(t, 3) 57 | defer h.Shutdown() 58 | h.CheckSingleLeader() 59 | 60 | c1 := h.NewClient() 61 | // Make sure we get the expected found/prev values before and after Put 62 | prev, found := h.CheckPut(c1, "llave", "cosa") 63 | if found || prev != "" { 64 | t.Errorf(`got found=%v, prev=%v, want false/""`, found, prev) 65 | } 66 | 67 | prev, found = h.CheckPut(c1, "llave", "frodo") 68 | if !found || prev != "cosa" { 69 | t.Errorf(`got found=%v, prev=%v, want true/"cosa"`, found, prev) 70 | } 71 | 72 | // A different key... 73 | prev, found = h.CheckPut(c1, "mafteah", "davar") 74 | if found || prev != "" { 75 | t.Errorf(`got found=%v, prev=%v, want false/""`, found, prev) 76 | } 77 | } 78 | 79 | func TestBasicAppendSameClient(t *testing.T) { 80 | h := NewHarness(t, 3) 81 | defer h.Shutdown() 82 | h.CheckSingleLeader() 83 | 84 | c1 := h.NewClient() 85 | h.CheckPut(c1, "foo", "bar") 86 | 87 | // Append to a key that existed 88 | prev, found := h.CheckAppend(c1, "foo", "baz") 89 | if !found || prev != "bar" { 90 | t.Errorf(`got found=%v, prev=%v, want true/"foo"`, found, prev) 91 | } 92 | h.CheckGet(c1, "foo", "barbaz") 93 | 94 | // Append to a key that didn't exist 95 | prev, found = h.CheckAppend(c1, "mix", "match") 96 | if found || prev != "" { 97 | t.Errorf(`got found=%v, prev=%v, want false/""`, found, prev) 98 | } 99 | h.CheckGet(c1, "mix", "match") 100 | } 101 | 102 | func TestBasicPutGetDifferentClients(t *testing.T) { 103 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 104 | 105 | h := NewHarness(t, 3) 106 | defer h.Shutdown() 107 | h.CheckSingleLeader() 108 | 109 | c1 := h.NewClient() 110 | h.CheckPut(c1, "k", "v") 111 | 112 | c2 := h.NewClient() 113 | h.CheckGet(c2, "k", "v") 114 | sleepMs(80) 115 | } 116 | 117 | func TestBasicAppendDifferentClients(t *testing.T) { 118 | h := NewHarness(t, 3) 119 | defer h.Shutdown() 120 | h.CheckSingleLeader() 121 | 122 | c1 := h.NewClient() 123 | h.CheckPut(c1, "foo", "bar") 124 | 125 | // Append to a key that existed 126 | c2 := h.NewClient() 127 | prev, found := h.CheckAppend(c2, "foo", "baz") 128 | if !found || prev != "bar" { 129 | t.Errorf(`got found=%v, prev=%v, want true/"foo"`, found, prev) 130 | } 131 | h.CheckGet(c1, "foo", "barbaz") 132 | 133 | // Append to a key that didn't exist 134 | prev, found = h.CheckAppend(c2, "mix", "match") 135 | if found || prev != "" { 136 | t.Errorf(`got found=%v, prev=%v, want false/""`, found, prev) 137 | } 138 | h.CheckGet(c1, "mix", "match") 139 | } 140 | 141 | func TestAppendDifferentLeaders(t *testing.T) { 142 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 143 | 144 | h := NewHarness(t, 3) 145 | defer h.Shutdown() 146 | lid := h.CheckSingleLeader() 147 | 148 | c1 := h.NewClient() 149 | h.CheckAppend(c1, "foo", "bar") 150 | h.CheckGet(c1, "foo", "bar") 151 | 152 | // Crash a leader and wait for the cluster to establish a new leader. 153 | h.CrashService(lid) 154 | h.CheckSingleLeader() 155 | 156 | c2 := h.NewClient() 157 | h.CheckAppend(c2, "foo", "baz") 158 | h.CheckGet(c2, "foo", "barbaz") 159 | 160 | h.RestartService(lid) 161 | c3 := h.NewClient() 162 | sleepMs(300) 163 | h.CheckGet(c3, "foo", "barbaz") 164 | } 165 | 166 | func TestCASBasic(t *testing.T) { 167 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 168 | 169 | h := NewHarness(t, 3) 170 | defer h.Shutdown() 171 | h.CheckSingleLeader() 172 | 173 | c1 := h.NewClient() 174 | h.CheckPut(c1, "k", "v") 175 | 176 | if pv, found := h.CheckCAS(c1, "k", "v", "newv"); pv != "v" || !found { 177 | t.Errorf("got %s,%v, want replacement", pv, found) 178 | } 179 | } 180 | 181 | func TestCASConcurrent(t *testing.T) { 182 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 183 | 184 | h := NewHarness(t, 3) 185 | defer h.Shutdown() 186 | h.CheckSingleLeader() 187 | c := h.NewClient() 188 | h.CheckPut(c, "foo", "mexico") 189 | 190 | var wg sync.WaitGroup 191 | wg.Add(1) 192 | go func() { 193 | defer wg.Done() 194 | c := h.NewClient() 195 | for range 20 { 196 | h.CheckCAS(c, "foo", "bar", "bomba") 197 | } 198 | }() 199 | 200 | // Once a client homes in on the right leader, it takes 4-5 ms to roundtrip 201 | // a command. For the first 50 ms after launching the CAS goroutines, 'foo' 202 | // has the wrong value so the CAS doesn't work, but then it will... 203 | sleepMs(50) 204 | c2 := h.NewClient() 205 | h.CheckPut(c2, "foo", "bar") 206 | 207 | sleepMs(300) 208 | h.CheckGet(c2, "foo", "bomba") 209 | 210 | wg.Wait() 211 | } 212 | 213 | func TestConcurrentClientsPutsAndGets(t *testing.T) { 214 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 215 | 216 | // Test that we can submit multiple PUT and GET requests concurrently, with 217 | // one goroutine per request launching at the same time. 218 | h := NewHarness(t, 3) 219 | defer h.Shutdown() 220 | h.CheckSingleLeader() 221 | 222 | n := 9 223 | for i := range n { 224 | go func() { 225 | c := h.NewClient() 226 | _, f := h.CheckPut(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 227 | if f { 228 | t.Errorf("got key found for %d, want false", i) 229 | } 230 | }() 231 | } 232 | sleepMs(150) 233 | 234 | for i := range n { 235 | go func() { 236 | c := h.NewClient() 237 | h.CheckGet(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 238 | }() 239 | } 240 | sleepMs(150) 241 | } 242 | 243 | func Test5ServerConcurrentClientsPutsAndGets(t *testing.T) { 244 | // Similar to the previous test, but this one has a 5-server Raft cluster. 245 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 246 | 247 | h := NewHarness(t, 5) 248 | defer h.Shutdown() 249 | h.CheckSingleLeader() 250 | 251 | n := 9 252 | for i := range n { 253 | go func() { 254 | c := h.NewClient() 255 | _, f := h.CheckPut(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 256 | if f { 257 | t.Errorf("got key found for %d, want false", i) 258 | } 259 | }() 260 | } 261 | sleepMs(150) 262 | 263 | for i := range n { 264 | go func() { 265 | c := h.NewClient() 266 | h.CheckGet(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 267 | }() 268 | } 269 | sleepMs(150) 270 | } 271 | 272 | func TestDisconnectLeaderAfterPuts(t *testing.T) { 273 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 274 | 275 | h := NewHarness(t, 3) 276 | defer h.Shutdown() 277 | lid := h.CheckSingleLeader() 278 | 279 | // Submit some PUT commands. 280 | n := 4 281 | for i := range n { 282 | c := h.NewClient() 283 | h.CheckPut(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 284 | } 285 | 286 | h.DisconnectServiceFromPeers(lid) 287 | sleepMs(300) 288 | newlid := h.CheckSingleLeader() 289 | 290 | if newlid == lid { 291 | t.Errorf("got the same leader") 292 | } 293 | 294 | // Trying to contact the disconnected leader will time out. 295 | c := h.NewClientSingleService(lid) 296 | h.CheckGetTimesOut(c, "key1") 297 | 298 | // GET commands expecting to get the right values 299 | for range 5 { 300 | c := h.NewClientWithRandomAddrsOrder() 301 | for j := range n { 302 | h.CheckGet(c, fmt.Sprintf("key%v", j), fmt.Sprintf("value%v", j)) 303 | } 304 | } 305 | 306 | // At the end of the test, reconnect the peers to avoid a goroutine leak. 307 | // In real scenarios, we expect that services will eventually be reconnected, 308 | // and if not - a single goroutine leaked is not an issue since the server 309 | // will end up being killed anyway. 310 | h.ReconnectServiceToPeers(lid) 311 | sleepMs(200) 312 | } 313 | 314 | func TestDisconnectLeaderAndFollower(t *testing.T) { 315 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 316 | 317 | h := NewHarness(t, 3) 318 | defer h.Shutdown() 319 | lid := h.CheckSingleLeader() 320 | 321 | // Submit some PUT commands. 322 | n := 4 323 | for i := range n { 324 | c := h.NewClient() 325 | _, f := h.CheckPut(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 326 | if f { 327 | t.Errorf("got key found for %d, want false", i) 328 | } 329 | } 330 | 331 | // Disconnect leader and one other server; the cluster loses consensus 332 | // and client requests should now time out. 333 | h.DisconnectServiceFromPeers(lid) 334 | otherId := (lid + 1) % 3 335 | h.DisconnectServiceFromPeers(otherId) 336 | sleepMs(100) 337 | 338 | c := h.NewClient() 339 | h.CheckGetTimesOut(c, "key0") 340 | 341 | // Reconnect one server, but not the old leader. We should still get all 342 | // the right data back. 343 | h.ReconnectServiceToPeers(otherId) 344 | h.CheckSingleLeader() 345 | for i := range n { 346 | h.CheckGet(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 347 | } 348 | 349 | // Reconnect the old leader. We should still get all the right data back. 350 | h.ReconnectServiceToPeers(lid) 351 | h.CheckSingleLeader() 352 | for i := range n { 353 | h.CheckGet(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 354 | } 355 | sleepMs(100) 356 | } 357 | 358 | func TestCrashFollower(t *testing.T) { 359 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 360 | 361 | h := NewHarness(t, 3) 362 | defer h.Shutdown() 363 | lid := h.CheckSingleLeader() 364 | 365 | // Submit some PUT commands. 366 | n := 3 367 | for i := range n { 368 | c := h.NewClient() 369 | _, f := h.CheckPut(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 370 | if f { 371 | t.Errorf("got key found for %d, want false", i) 372 | } 373 | } 374 | 375 | // Crash a non-leader 376 | otherId := (lid + 1) % 3 377 | h.CrashService(otherId) 378 | 379 | // Talking directly to the leader should still work... 380 | for i := range n { 381 | c := h.NewClientSingleService(lid) 382 | h.CheckGet(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 383 | } 384 | 385 | // Talking to the remaining live servers should also work 386 | for i := range n { 387 | c := h.NewClient() 388 | h.CheckGet(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 389 | } 390 | } 391 | 392 | func TestCrashLeader(t *testing.T) { 393 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 394 | 395 | h := NewHarness(t, 3) 396 | defer h.Shutdown() 397 | lid := h.CheckSingleLeader() 398 | 399 | // Submit some PUT commands. 400 | n := 3 401 | for i := range n { 402 | c := h.NewClient() 403 | _, f := h.CheckPut(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 404 | if f { 405 | t.Errorf("got key found for %d, want false", i) 406 | } 407 | } 408 | 409 | // Crash a leader and wait for the cluster to establish a new leader. 410 | h.CrashService(lid) 411 | h.CheckSingleLeader() 412 | 413 | // Talking to the remaining live servers should get the right data. 414 | for i := range n { 415 | c := h.NewClient() 416 | h.CheckGet(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 417 | } 418 | } 419 | 420 | func TestCrashThenRestartLeader(t *testing.T) { 421 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 422 | 423 | h := NewHarness(t, 3) 424 | defer h.Shutdown() 425 | lid := h.CheckSingleLeader() 426 | 427 | // Submit some PUT commands. 428 | n := 3 429 | for i := range n { 430 | c := h.NewClient() 431 | _, f := h.CheckPut(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 432 | if f { 433 | t.Errorf("got key found for %d, want false", i) 434 | } 435 | } 436 | 437 | // Crash a leader and wait for the cluster to establish a new leader. 438 | h.CrashService(lid) 439 | h.CheckSingleLeader() 440 | 441 | // Talking to the remaining live servers should get the right data. 442 | for i := range n { 443 | c := h.NewClient() 444 | h.CheckGet(c, fmt.Sprintf("key%v", i), fmt.Sprintf("value%v", i)) 445 | } 446 | 447 | // Now restart the old leader: it will join the cluster and get all the 448 | // data. 449 | h.RestartService(lid) 450 | 451 | // Get data from services in different orders. 452 | for range 5 { 453 | c := h.NewClientWithRandomAddrsOrder() 454 | for j := range n { 455 | h.CheckGet(c, fmt.Sprintf("key%v", j), fmt.Sprintf("value%v", j)) 456 | } 457 | } 458 | } 459 | 460 | func TestAppendLinearizableAfterDelay(t *testing.T) { 461 | h := NewHarness(t, 3) 462 | defer h.Shutdown() 463 | lid := h.CheckSingleLeader() 464 | 465 | c1 := h.NewClient() 466 | 467 | // A sequence of put+append, check we get the right result. 468 | h.CheckPut(c1, "foo", "bar") 469 | h.CheckAppend(c1, "foo", "baz") 470 | h.CheckGet(c1, "foo", "barbaz") 471 | 472 | // Ask the service to delay the response to the next request, and send 473 | // an append. The client will retry this append, so the system has to be 474 | // resilient to this. It will report a duplicate because of the retries, 475 | // but the append will be applied successfully. 476 | h.DelayNextHTTPResponseFromService(lid) 477 | 478 | _, _, err := c1.Append(context.Background(), "foo", "mira") 479 | if err == nil { 480 | t.Errorf("got no error, want duplicate") 481 | } 482 | 483 | // Make sure the append was applied successfully, and just once. 484 | sleepMs(300) 485 | h.CheckGet(c1, "foo", "barbazmira") 486 | } 487 | 488 | func TestAppendLinearizableAfterCrash(t *testing.T) { 489 | defer leaktest.CheckTimeout(t, 100*time.Millisecond)() 490 | 491 | h := NewHarness(t, 3) 492 | defer h.Shutdown() 493 | lid := h.CheckSingleLeader() 494 | 495 | c1 := h.NewClient() 496 | 497 | h.CheckAppend(c1, "foo", "bar") 498 | h.CheckGet(c1, "foo", "bar") 499 | 500 | // Delay response from the leader and then crash it. When a new leader is 501 | // selected, we expect to see one append committed (but only one!) 502 | h.DelayNextHTTPResponseFromService(lid) 503 | go func() { 504 | ctx, cancel := context.WithTimeout(h.ctx, 500*time.Millisecond) 505 | defer cancel() 506 | _, _, err := c1.Append(ctx, "foo", "mira") 507 | if err == nil { 508 | t.Errorf("got no error; want error") 509 | } 510 | tlog("received err: %v", err) 511 | }() 512 | 513 | sleepMs(50) 514 | h.CrashService(lid) 515 | h.CheckSingleLeader() 516 | c2 := h.NewClient() 517 | h.CheckGet(c2, "foo", "barmira") 518 | } 519 | -------------------------------------------------------------------------------- /part5kv/testharness.go: -------------------------------------------------------------------------------- 1 | // Test harness for testing the KV service and clients. 2 | // 3 | // Eli Bendersky [https://eli.thegreenplace.net] 4 | // This code is in the public domain. 5 | package main 6 | 7 | import ( 8 | "context" 9 | "fmt" 10 | "log" 11 | "math/rand/v2" 12 | "net/http" 13 | "strings" 14 | "testing" 15 | "time" 16 | 17 | "github.com/eliben/raft/part3/raft" 18 | "github.com/eliben/raft/part5kv/kvclient" 19 | "github.com/eliben/raft/part5kv/kvservice" 20 | ) 21 | 22 | func init() { 23 | log.SetFlags(log.Ltime | log.Lmicroseconds) 24 | } 25 | 26 | // Test harness for kvservice and client system tests. 27 | type Harness struct { 28 | n int 29 | 30 | // kvCluster is a list of all KVService instances participating in a cluster. 31 | // A service's index into this list is its ID in the cluster. 32 | kvCluster []*kvservice.KVService 33 | 34 | // kvServiceAddrs is a list of HTTP addresses (localhost:) the KV 35 | // services are accepting client commands on. 36 | kvServiceAddrs []string 37 | 38 | storage []*raft.MapStorage 39 | 40 | t *testing.T 41 | 42 | // connected has a bool per server in cluster, specifying whether this server 43 | // is currently connected to peers (if false, it's partitioned and no messages 44 | // will pass to or from it). 45 | connected []bool 46 | 47 | // alive has a bool per server in the cluster, specifying whether this server 48 | // is currently alive (false means it has crashed and wasn't restarted yet). 49 | // connected implies alive. 50 | alive []bool 51 | 52 | // ctx is context used for the HTTP client commands used by tests. 53 | // ctxCancel is its cancellation function. 54 | ctx context.Context 55 | ctxCancel func() 56 | } 57 | 58 | func NewHarness(t *testing.T, n int) *Harness { 59 | kvss := make([]*kvservice.KVService, n) 60 | ready := make(chan any) 61 | connected := make([]bool, n) 62 | alive := make([]bool, n) 63 | storage := make([]*raft.MapStorage, n) 64 | 65 | // Create all KVService instances in this cluster. 66 | for i := range n { 67 | peerIds := make([]int, 0) 68 | for p := range n { 69 | if p != i { 70 | peerIds = append(peerIds, p) 71 | } 72 | } 73 | 74 | storage[i] = raft.NewMapStorage() 75 | kvss[i] = kvservice.New(i, peerIds, storage[i], ready) 76 | alive[i] = true 77 | } 78 | 79 | // Connect the Raft peers of the services to each other and close the ready 80 | // channel to signal to them it's all ready. 81 | for i := range n { 82 | for j := range n { 83 | if i != j { 84 | kvss[i].ConnectToRaftPeer(j, kvss[j].GetRaftListenAddr()) 85 | } 86 | } 87 | connected[i] = true 88 | } 89 | close(ready) 90 | 91 | // Each KVService instance serves a REST API on a different port 92 | kvServiceAddrs := make([]string, n) 93 | for i := range n { 94 | port := 14200 + i 95 | kvss[i].ServeHTTP(port) 96 | 97 | kvServiceAddrs[i] = fmt.Sprintf("localhost:%d", port) 98 | } 99 | 100 | ctx, ctxCancel := context.WithCancel(context.Background()) 101 | 102 | h := &Harness{ 103 | n: n, 104 | kvCluster: kvss, 105 | kvServiceAddrs: kvServiceAddrs, 106 | t: t, 107 | connected: connected, 108 | alive: alive, 109 | storage: storage, 110 | ctx: ctx, 111 | ctxCancel: ctxCancel, 112 | } 113 | return h 114 | } 115 | 116 | func (h *Harness) DisconnectServiceFromPeers(id int) { 117 | tlog("Disconnect %d", id) 118 | h.kvCluster[id].DisconnectFromAllRaftPeers() 119 | for j := 0; j < h.n; j++ { 120 | if j != id { 121 | h.kvCluster[j].DisconnectFromRaftPeer(id) 122 | } 123 | } 124 | h.connected[id] = false 125 | } 126 | 127 | func (h *Harness) ReconnectServiceToPeers(id int) { 128 | tlog("Reconnect %d", id) 129 | for j := 0; j < h.n; j++ { 130 | if j != id && h.alive[j] { 131 | if err := h.kvCluster[id].ConnectToRaftPeer(j, h.kvCluster[j].GetRaftListenAddr()); err != nil { 132 | h.t.Fatal(err) 133 | } 134 | if err := h.kvCluster[j].ConnectToRaftPeer(id, h.kvCluster[id].GetRaftListenAddr()); err != nil { 135 | h.t.Fatal(err) 136 | } 137 | } 138 | } 139 | h.connected[id] = true 140 | } 141 | 142 | // CrashService "crashes" a service by disconnecting it from all peers and 143 | // then asking it to shut down. We're not going to be using the same service 144 | // instance again. 145 | func (h *Harness) CrashService(id int) { 146 | tlog("Crash %d", id) 147 | h.DisconnectServiceFromPeers(id) 148 | h.alive[id] = false 149 | if err := h.kvCluster[id].Shutdown(); err != nil { 150 | h.t.Errorf("error while shutting down service %d: %v", id, err) 151 | } 152 | } 153 | 154 | // RestartService "restarts" a service by creating a new instance and 155 | // connecting it to peers. 156 | func (h *Harness) RestartService(id int) { 157 | if h.alive[id] { 158 | log.Fatalf("id=%d is alive in RestartService", id) 159 | } 160 | tlog("Restart %d", id) 161 | 162 | peerIds := make([]int, 0) 163 | for p := range h.n { 164 | if p != id { 165 | peerIds = append(peerIds, p) 166 | } 167 | } 168 | ready := make(chan any) 169 | h.kvCluster[id] = kvservice.New(id, peerIds, h.storage[id], ready) 170 | h.kvCluster[id].ServeHTTP(14200 + id) 171 | 172 | h.ReconnectServiceToPeers(id) 173 | close(ready) 174 | h.alive[id] = true 175 | time.Sleep(20 * time.Millisecond) 176 | } 177 | 178 | // DelayNextHTTPResponseFromService delays the next HTTP response from this 179 | // service to a client. 180 | func (h *Harness) DelayNextHTTPResponseFromService(id int) { 181 | tlog("Delaying next HTTP response from %d", id) 182 | h.kvCluster[id].DelayNextHTTPResponse() 183 | } 184 | 185 | func (h *Harness) Shutdown() { 186 | for i := range h.n { 187 | h.kvCluster[i].DisconnectFromAllRaftPeers() 188 | h.connected[i] = false 189 | } 190 | 191 | // These help the HTTP server in KVService shut down properly. 192 | http.DefaultClient.CloseIdleConnections() 193 | h.ctxCancel() 194 | 195 | for i := range h.n { 196 | if h.alive[i] { 197 | h.alive[i] = false 198 | if err := h.kvCluster[i].Shutdown(); err != nil { 199 | h.t.Errorf("error while shutting down service %d: %v", i, err) 200 | } 201 | } 202 | } 203 | } 204 | 205 | // NewClient creates a new client that will contact all the existing live 206 | // services. 207 | func (h *Harness) NewClient() *kvclient.KVClient { 208 | var addrs []string 209 | for i := range h.n { 210 | if h.alive[i] { 211 | addrs = append(addrs, h.kvServiceAddrs[i]) 212 | } 213 | } 214 | return kvclient.New(addrs) 215 | } 216 | 217 | // NewClientWithRandomAddrsOrder creates a new client that will contact all 218 | // the existing live services, but in a randomized order. 219 | func (h *Harness) NewClientWithRandomAddrsOrder() *kvclient.KVClient { 220 | var addrs []string 221 | for i := range h.n { 222 | if h.alive[i] { 223 | addrs = append(addrs, h.kvServiceAddrs[i]) 224 | } 225 | } 226 | rand.Shuffle(len(addrs), func(i, j int) { 227 | addrs[i], addrs[j] = addrs[j], addrs[i] 228 | }) 229 | return kvclient.New(addrs) 230 | } 231 | 232 | // NewClientSingleService creates a new client that will contact only a single 233 | // service (specified by id). Note that if this isn't the leader, the client 234 | // may get stuck in retries. 235 | func (h *Harness) NewClientSingleService(id int) *kvclient.KVClient { 236 | addrs := h.kvServiceAddrs[id : id+1] 237 | return kvclient.New(addrs) 238 | } 239 | 240 | // CheckSingleLeader checks that only a single server thinks it's the leader. 241 | // Returns the leader's id in the Raft cluster. It retries serveral times if 242 | // no leader is identified yet, so this method is also useful to check that 243 | // the Raft cluster settled on a leader and is ready to execute commands. 244 | func (h *Harness) CheckSingleLeader() int { 245 | for r := 0; r < 8; r++ { 246 | leaderId := -1 247 | for i := range h.n { 248 | if h.connected[i] && h.kvCluster[i].IsLeader() { 249 | if leaderId < 0 { 250 | leaderId = i 251 | } else { 252 | h.t.Fatalf("both %d and %d think they're leaders", leaderId, i) 253 | } 254 | } 255 | } 256 | if leaderId >= 0 { 257 | return leaderId 258 | } 259 | time.Sleep(150 * time.Millisecond) 260 | } 261 | 262 | h.t.Fatalf("leader not found") 263 | return -1 264 | } 265 | 266 | // CheckPut sends a Put request through client c, and checks there are no 267 | // errors. Returns (prevValue, keyFound). 268 | func (h *Harness) CheckPut(c *kvclient.KVClient, key, value string) (string, bool) { 269 | ctx, cancel := context.WithTimeout(h.ctx, 500*time.Millisecond) 270 | defer cancel() 271 | pv, f, err := c.Put(ctx, key, value) 272 | if err != nil { 273 | h.t.Error(err) 274 | } 275 | return pv, f 276 | } 277 | 278 | // CheckAppend sends a Append request through client c, and checks there are no 279 | // errors. Returns (prevValue, keyFound). 280 | func (h *Harness) CheckAppend(c *kvclient.KVClient, key, value string) (string, bool) { 281 | ctx, cancel := context.WithTimeout(h.ctx, 500*time.Millisecond) 282 | defer cancel() 283 | pv, f, err := c.Append(ctx, key, value) 284 | if err != nil { 285 | h.t.Error(err) 286 | } 287 | return pv, f 288 | } 289 | 290 | // CheckGet sends a Get request through client c, and checks there are 291 | // no errors; it also checks that the key was found, and has the expected 292 | // value. 293 | func (h *Harness) CheckGet(c *kvclient.KVClient, key string, wantValue string) { 294 | ctx, cancel := context.WithTimeout(h.ctx, 500*time.Millisecond) 295 | defer cancel() 296 | gv, f, err := c.Get(ctx, key) 297 | if err != nil { 298 | h.t.Error(err) 299 | } 300 | if !f { 301 | h.t.Errorf("got found=false, want true for key=%s", key) 302 | } 303 | if gv != wantValue { 304 | h.t.Errorf("got value=%v, want %v", gv, wantValue) 305 | } 306 | } 307 | 308 | // CheckCAS sends a CAS request through client c, and checks there are no 309 | // errors. Returns (prevValue, keyFound). 310 | func (h *Harness) CheckCAS(c *kvclient.KVClient, key, compare, value string) (string, bool) { 311 | ctx, cancel := context.WithTimeout(h.ctx, 500*time.Millisecond) 312 | defer cancel() 313 | pv, f, err := c.CAS(ctx, key, compare, value) 314 | if err != nil { 315 | h.t.Error(err) 316 | } 317 | return pv, f 318 | } 319 | 320 | // CheckGetNotFound sends a Get request through client c, and checks there are 321 | // no errors, but the key isn't found in the service. 322 | func (h *Harness) CheckGetNotFound(c *kvclient.KVClient, key string) { 323 | ctx, cancel := context.WithTimeout(h.ctx, 500*time.Millisecond) 324 | defer cancel() 325 | _, f, err := c.Get(ctx, key) 326 | if err != nil { 327 | h.t.Error(err) 328 | } 329 | if f { 330 | h.t.Errorf("got found=true, want false for key=%s", key) 331 | } 332 | } 333 | 334 | // CheckGetTimesOut checks that a Get request with the given client will 335 | // time out if we set up a context with a deadline, because the client is 336 | // unable to get the service to commit its command. 337 | func (h *Harness) CheckGetTimesOut(c *kvclient.KVClient, key string) { 338 | ctx, cancel := context.WithTimeout(context.Background(), 300*time.Millisecond) 339 | defer cancel() 340 | _, _, err := c.Get(ctx, key) 341 | if err == nil || !strings.Contains(err.Error(), "deadline exceeded") { 342 | h.t.Errorf("got err %v; want 'deadline exceeded'", err) 343 | } 344 | } 345 | 346 | func tlog(format string, a ...any) { 347 | format = "[TEST] " + format 348 | log.Printf(format, a...) 349 | } 350 | -------------------------------------------------------------------------------- /raftlog-screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eliben/raft/d85b35cef3da8bde6c863b9231559e61b1542805/raftlog-screenshot.png -------------------------------------------------------------------------------- /tools/raft-testlog-viz/main.go: -------------------------------------------------------------------------------- 1 | // Convert a test log from a Raft run into an HTML file with a colorful 2 | // table for easier tracking of the log events. 3 | // 4 | // Note: by "log" here we mean the logging messages emitted by our Raft code, 5 | // not the raft log that stores replicated data. 6 | // 7 | // Eli Bendersky [https://eli.thegreenplace.net] 8 | // This code is in the public domain. 9 | package main 10 | 11 | import ( 12 | "bufio" 13 | "fmt" 14 | "io" 15 | "log" 16 | "os" 17 | "path" 18 | "regexp" 19 | "strconv" 20 | "strings" 21 | "text/template" 22 | ) 23 | 24 | // Entry is a single log entry emitted by a raft server. 25 | type Entry struct { 26 | timestamp string 27 | id string 28 | msg string 29 | } 30 | 31 | // TestLog is a whole log for a single test, containing many entries. 32 | type TestLog struct { 33 | name string 34 | status string 35 | entries []Entry 36 | 37 | // ids is a set of all IDs seen emitting entries in this test. 38 | ids map[string]bool 39 | } 40 | 41 | const tmpl = ` 42 | 43 | 44 | 45 | {{.Title}} 46 | 47 | 82 | 83 |

{{.Title}}

84 |

85 | 86 | 87 | {{range .Headers}} 88 | 89 | {{end}} 90 | 91 | {{range .HtmlItems}} 92 | 93 | {{.}} 94 | 95 | {{end}} 96 |
{{.}}
97 | 98 | 99 | ` 100 | 101 | type ServerState int 102 | 103 | const ( 104 | Follower ServerState = iota 105 | Candidate 106 | Leader 107 | Dead 108 | ) 109 | 110 | func (s ServerState) String() string { 111 | switch s { 112 | case Follower: 113 | return "Follower" 114 | case Candidate: 115 | return "Candidate" 116 | case Leader: 117 | return "Leader" 118 | case Dead: 119 | return "Dead" 120 | default: 121 | panic("unreachable") 122 | } 123 | } 124 | 125 | func emitTestViz(dirname string, tl TestLog) { 126 | filename := path.Join(dirname, tl.name+".html") 127 | f, err := os.Create(filename) 128 | if err != nil { 129 | log.Fatal(err) 130 | } 131 | defer f.Close() 132 | 133 | t, err := template.New("page").Parse(tmpl) 134 | if err != nil { 135 | log.Fatal(err) 136 | } 137 | 138 | var nservers int 139 | var havetest bool 140 | 141 | if _, ok := tl.ids["TEST"]; ok { 142 | havetest = true 143 | nservers = len(tl.ids) - 1 144 | } else { 145 | havetest = false 146 | nservers = len(tl.ids) 147 | } 148 | 149 | headers := []string{"Time"} 150 | if havetest { 151 | headers = append(headers, "TEST") 152 | } 153 | for i := 0; i < nservers; i++ { 154 | headers = append(headers, strconv.Itoa(i)) 155 | } 156 | 157 | serverState := make([]ServerState, nservers) 158 | 159 | var htmlitems []string 160 | for _, entry := range tl.entries { 161 | var b strings.Builder 162 | fmt.Fprintf(&b, "%s", entry.timestamp) 163 | if entry.id == "TEST" { 164 | if havetest { 165 | fmt.Fprintf(&b, ` %s`, entry.msg) 166 | for i := 0; i < nservers; i++ { 167 | fmt.Fprintf(&b, ` `, serverState[i]) 168 | } 169 | } else { 170 | log.Fatal("have TEST entry with no test IDs") 171 | } 172 | } else { 173 | idInt, err := strconv.Atoi(entry.id) 174 | if err != nil { 175 | log.Fatal(err) 176 | } 177 | 178 | if strings.Contains(entry.msg, "becomes Follower") { 179 | serverState[idInt] = Follower 180 | } else if strings.Contains(entry.msg, "listening") { 181 | serverState[idInt] = Follower 182 | } else if strings.Contains(entry.msg, "becomes Candidate") { 183 | serverState[idInt] = Candidate 184 | } else if strings.Contains(entry.msg, "becomes Leader") { 185 | serverState[idInt] = Leader 186 | } else if strings.Contains(entry.msg, "becomes Dead") { 187 | serverState[idInt] = Dead 188 | } else if strings.Contains(entry.msg, "created in state Follower") { 189 | serverState[idInt] = Follower 190 | } 191 | 192 | if havetest { 193 | fmt.Fprintf(&b, " ") 194 | } 195 | // Emit the right number of td's, with an entry in the right place. 196 | for i := 0; i < idInt; i++ { 197 | fmt.Fprintf(&b, ` `, serverState[i]) 198 | } 199 | fmt.Fprintf(&b, ` %s`, serverState[idInt], entry.msg) 200 | for i := idInt + 1; i < nservers; i++ { 201 | fmt.Fprintf(&b, ` `, serverState[i]) 202 | } 203 | } 204 | htmlitems = append(htmlitems, b.String()) 205 | } 206 | 207 | data := struct { 208 | Title string 209 | Headers []string 210 | HtmlItems []string 211 | }{ 212 | Title: fmt.Sprintf("%s -- %s", tl.name, tl.status), 213 | Headers: headers, 214 | HtmlItems: htmlitems, 215 | } 216 | err = t.Execute(f, data) 217 | if err != nil { 218 | log.Fatal(err) 219 | } 220 | fmt.Println("... Emitted", "file://"+filename) 221 | } 222 | 223 | func parseTestLogs(rd io.Reader) []TestLog { 224 | var testlogs []TestLog 225 | 226 | statusRE := regexp.MustCompile(`--- (\w+):\s+(\w+)`) 227 | entryRE := regexp.MustCompile(`([0-9:.]+) \[([\w ]+)\] (.*)`) 228 | 229 | scanner := bufio.NewScanner(bufio.NewReader(rd)) 230 | for scanner.Scan() { 231 | line := scanner.Text() 232 | if strings.HasPrefix(line, "=== RUN") { 233 | testlogs = append(testlogs, TestLog{ids: make(map[string]bool)}) 234 | testlogs[len(testlogs)-1].name = strings.TrimSpace(line[7:]) 235 | } else { 236 | if len(testlogs) == 0 { 237 | continue 238 | } 239 | curlog := &testlogs[len(testlogs)-1] 240 | 241 | statusMatch := statusRE.FindStringSubmatch(line) 242 | if len(statusMatch) > 0 { 243 | if statusMatch[2] != curlog.name { 244 | log.Fatalf("name on line %q mismatch with test name: got %s", line, curlog.name) 245 | } 246 | curlog.status = statusMatch[1] 247 | continue 248 | } 249 | 250 | entryMatch := entryRE.FindStringSubmatch(line) 251 | if len(entryMatch) > 0 { 252 | // [kv N] entries get folded into id=N, with the "kv N" part prefixed 253 | // to the message. 254 | id, foundKV := strings.CutPrefix(entryMatch[2], "kv ") 255 | msg := entryMatch[3] 256 | if foundKV { 257 | msg = id + " " + msg 258 | } 259 | 260 | // [clientNNN] entries get folded into id=TEST 261 | if strings.HasPrefix(entryMatch[2], "client") { 262 | id = "TEST" 263 | msg = entryMatch[2] + " " + msg 264 | } 265 | 266 | entry := Entry{ 267 | timestamp: entryMatch[1], 268 | id: id, 269 | msg: msg, 270 | } 271 | curlog.entries = append(curlog.entries, entry) 272 | curlog.ids[entry.id] = true 273 | continue 274 | } 275 | } 276 | } 277 | return testlogs 278 | } 279 | 280 | func main() { 281 | testlogs := parseTestLogs(os.Stdin) 282 | 283 | tnames := make(map[string]int) 284 | 285 | // Deduplicated the names of testlogs; in case the log containts multiple 286 | // instances of the same test, we'd like them all the have different file 287 | // names. 288 | for i, tl := range testlogs { 289 | if count, ok := tnames[tl.name]; ok { 290 | testlogs[i].name = fmt.Sprintf("%s_%d", tl.name, count) 291 | } 292 | tnames[tl.name] += 1 293 | } 294 | 295 | statusSummary := "PASS" 296 | 297 | for _, tl := range testlogs { 298 | fmt.Println(tl.status, tl.name, tl.ids, "; entries:", len(tl.entries)) 299 | if tl.status != "PASS" { 300 | statusSummary = tl.status 301 | } 302 | emitTestViz("/tmp", tl) 303 | fmt.Println("") 304 | } 305 | 306 | fmt.Println(statusSummary) 307 | } 308 | --------------------------------------------------------------------------------