├── .check-build ├── .gitignore ├── .run ├── TestBasicAgree2B in raft.run.xml ├── TestFailAgree2B in raft.run.xml ├── TestFailNoAgree2B in raft.run.xml ├── TestInitialElection2A in raft.run.xml ├── TestReElection2A in raft.run.xml ├── build wc.run.xml ├── go run mrmaster.run.xml ├── go run mrworker.run.xml └── go test lab2a.run.xml ├── Makefile ├── README.md └── src ├── .gitignore ├── kvraft ├── client.go ├── common.go ├── config.go ├── server.go └── test_test.go ├── labgob ├── labgob.go └── test_test.go ├── labrpc ├── labrpc.go └── test_test.go ├── main ├── .gitignore ├── build-wc.sh ├── diskvd.go ├── lockc.go ├── lockd.go ├── mrmaster.go ├── mrsequential.go ├── mrworker.go ├── pbc.go ├── pbd.go ├── pg-being_ernest.txt ├── pg-dorian_gray.txt ├── pg-frankenstein.txt ├── pg-grimm.txt ├── pg-huckleberry_finn.txt ├── pg-metamorphosis.txt ├── pg-sherlock_holmes.txt ├── pg-tom_sawyer.txt ├── test-mr.sh └── viewd.go ├── models └── kv.go ├── mr ├── master.go ├── rpc.go └── worker.go ├── mrapps ├── crash.go ├── indexer.go ├── mtiming.go ├── nocrash.go ├── rtiming.go └── wc.go ├── porcupine ├── bitset.go ├── checker.go ├── model.go ├── porcupine.go └── visualization.go ├── raft ├── README.md ├── append_entries_callback.go ├── append_entries_task.go ├── callback.go ├── config.go ├── peer_log_state.go ├── persister.go ├── raft.go ├── raft_log.go ├── raft_state.go ├── raft_task.go ├── raft_time.go ├── request_vote_callback.go ├── request_vote_task.go ├── rpc_args_reply.go ├── rpc_data.go ├── test_test.go └── util.go ├── shardkv ├── client.go ├── common.go ├── config.go ├── server.go └── test_test.go └── shardmaster ├── client.go ├── common.go ├── config.go ├── server.go └── test_test.go /.check-build: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -eu 4 | 5 | REFERENCE_FILES=( 6 | # lab 1 7 | src/mrapps/crash.go 8 | src/mrapps/indexer.go 9 | src/mrapps/mtiming.go 10 | src/mrapps/nocrash.go 11 | src/mrapps/rtiming.go 12 | src/mrapps/wc.go 13 | src/main/mrsequential.go 14 | src/main/mrmaster.go 15 | src/main/mrworker.go 16 | 17 | # lab 2 18 | src/raft/persister.go 19 | src/raft/test_test.go 20 | src/raft/config.go 21 | src/labrpc/labrpc.go 22 | 23 | # lab 3 24 | src/kvraft/test_test.go 25 | src/kvraft/config.go 26 | 27 | # lab 4a 28 | src/shardmaster/test_test.go 29 | src/shardmaster/config.go 30 | 31 | # lab 4b 32 | src/shardkv/test_test.go 33 | src/shardkv/config.go 34 | ) 35 | 36 | main() { 37 | upstream="$1" 38 | labnum="$2" 39 | 40 | # make sure we have reference copy of lab, in FETCH_HEAD 41 | git fetch "$upstream" 2>/dev/null || die "unable to git fetch $upstream" 42 | 43 | # copy existing directory 44 | tmpdir="$(mktemp -d)" 45 | find src -type s -delete # cp can't copy sockets 46 | cp -r src "$tmpdir" 47 | orig="$PWD" 48 | cd "$tmpdir" 49 | 50 | # check out reference files 51 | for f in ${REFERENCE_FILES[@]}; do 52 | mkdir -p "$(dirname $f)" 53 | git --git-dir="$orig/.git" show "FETCH_HEAD:$f" > "$f" 54 | done 55 | 56 | case $labnum in 57 | "lab1") check_lab1;; 58 | "lab2a"|"lab2b"|"lab2c") check_lab2;; 59 | "lab3a"|"lab3b") check_lab3;; 60 | "lab4a") check_lab4a;; 61 | "lab4b") check_lab4b;; 62 | *) die "unknown lab: $labnum";; 63 | esac 64 | 65 | cd 66 | rm -rf "$tmpdir" 67 | } 68 | 69 | check_lab1() { 70 | check_cmd cd src/mrapps 71 | check_cmd go build -buildmode=plugin wc.go 72 | check_cmd go build -buildmode=plugin indexer.go 73 | check_cmd go build -buildmode=plugin mtiming.go 74 | check_cmd go build -buildmode=plugin rtiming.go 75 | check_cmd go build -buildmode=plugin crash.go 76 | check_cmd go build -buildmode=plugin nocrash.go 77 | check_cmd cd ../main 78 | check_cmd go build mrmaster.go 79 | check_cmd go build mrworker.go 80 | check_cmd go build mrsequential.go 81 | } 82 | 83 | check_lab2() { 84 | check_cmd cd src/raft 85 | check_cmd go test -c 86 | } 87 | 88 | check_lab3() { 89 | check_cmd cd src/kvraft 90 | check_cmd go test -c 91 | } 92 | 93 | check_lab4a() { 94 | check_cmd cd src/shardmaster 95 | check_cmd go test -c 96 | } 97 | 98 | check_lab4b() { 99 | check_cmd cd src/shardkv 100 | check_cmd go test -c 101 | # also check other labs/parts 102 | cd "$tmpdir" 103 | check_lab4a 104 | cd "$tmpdir" 105 | check_lab3 106 | cd "$tmpdir" 107 | check_lab2 108 | } 109 | 110 | check_cmd() { 111 | if ! "$@" >/dev/null 2>&1; then 112 | echo "We tried building your source code with testing-related files reverted to original versions, and the build failed. This copy of your code is preserved in $tmpdir for debugging purposes. Please make sure the code you are trying to hand in does not make changes to test code." >&2 113 | echo >&2 114 | echo "The build failed while trying to run the following command:" >&2 115 | echo >&2 116 | echo "$ $@" >&2 117 | echo " (cwd: ${PWD#$tmpdir/})" >&2 118 | exit 1 119 | fi 120 | } 121 | 122 | die() { 123 | echo "$1" >&2 124 | exit 1 125 | } 126 | 127 | main "$@" 128 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | pkg/ 2 | api.key 3 | *-handin.tar.gz 4 | /.idea 5 | -------------------------------------------------------------------------------- /.run/TestBasicAgree2B in raft.run.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.run/TestFailAgree2B in raft.run.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.run/TestFailNoAgree2B in raft.run.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.run/TestInitialElection2A in raft.run.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /.run/TestReElection2A in raft.run.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.run/build wc.run.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 13 | -------------------------------------------------------------------------------- /.run/go run mrmaster.run.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /.run/go run mrworker.run.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 13 | 14 | -------------------------------------------------------------------------------- /.run/go test lab2a.run.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # This is the Makefile helping you submit the labs. 2 | # Just create 6.824/api.key with your API key in it, 3 | # and submit your lab with the following command: 4 | # $ make [lab1|lab2a|lab2b|lab2c|lab3a|lab3b|lab4a|lab4b] 5 | 6 | LABS=" lab1 lab2a lab2b lab2c lab3a lab3b lab4a lab4b " 7 | 8 | %: check-% 9 | @echo "Preparing $@-handin.tar.gz" 10 | @if echo $(LABS) | grep -q " $@ " ; then \ 11 | echo "Tarring up your submission..." ; \ 12 | tar cvzf $@-handin.tar.gz \ 13 | "--exclude=src/main/pg-*.txt" \ 14 | "--exclude=src/main/diskvd" \ 15 | "--exclude=src/mapreduce/824-mrinput-*.txt" \ 16 | "--exclude=src/main/mr-*" \ 17 | "--exclude=mrtmp.*" \ 18 | "--exclude=src/main/diff.out" \ 19 | "--exclude=src/main/mrmaster" \ 20 | "--exclude=src/main/mrsequential" \ 21 | "--exclude=src/main/mrworker" \ 22 | "--exclude=*.so" \ 23 | Makefile src; \ 24 | if ! test -e api.key ; then \ 25 | echo "Missing $(PWD)/api.key. Please create the file with your key in it or submit the $@-handin.tar.gz via the web interface."; \ 26 | else \ 27 | echo "Are you sure you want to submit $@? Enter 'yes' to continue:"; \ 28 | read line; \ 29 | if test "$$line" != "yes" ; then echo "Giving up submission"; exit; fi; \ 30 | if test `stat -c "%s" "$@-handin.tar.gz" 2>/dev/null || stat -f "%z" "$@-handin.tar.gz"` -ge 20971520 ; then echo "File exceeds 20MB."; exit; fi; \ 31 | mv api.key api.key.fix ; \ 32 | cat api.key.fix | tr -d '\n' > api.key ; \ 33 | rm api.key.fix ; \ 34 | curl -F file=@$@-handin.tar.gz -F "key= /dev/null || { \ 36 | echo ; \ 37 | echo "Submit seems to have failed."; \ 38 | echo "Please upload the tarball manually on the submission website."; } \ 39 | fi; \ 40 | else \ 41 | echo "Bad target $@. Usage: make [$(LABS)]"; \ 42 | fi 43 | 44 | .PHONY: check-% 45 | check-%: 46 | @echo "Checking that your submission builds correctly..." 47 | @./.check-build git://g.csail.mit.edu/6.824-golabs-2020 $(patsubst check-%,%,$@) 48 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # My 6.824 Solution 2 | 3 | This is my solutions to the labs of MIT 6.824. An original copy of the course material is stored in branch `skeleton`. The branch `master` is my most updated version of solution. I am still working on this, therefore it may not be complete. Nevertheless, you must not dishonist yourself with this repository. 4 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | *.*/ 2 | mrtmp.* 3 | 824-mrinput-*.txt 4 | /main/diff.out 5 | /mapreduce/x.txt 6 | /pbservice/x.txt 7 | /kvpaxos/x.txt 8 | *.so 9 | /.idea 10 | -------------------------------------------------------------------------------- /src/kvraft/client.go: -------------------------------------------------------------------------------- 1 | package kvraft 2 | 3 | import "../labrpc" 4 | import "crypto/rand" 5 | import "math/big" 6 | 7 | 8 | type Clerk struct { 9 | servers []*labrpc.ClientEnd 10 | // You will have to modify this struct. 11 | } 12 | 13 | func nrand() int64 { 14 | max := big.NewInt(int64(1) << 62) 15 | bigx, _ := rand.Int(rand.Reader, max) 16 | x := bigx.Int64() 17 | return x 18 | } 19 | 20 | func MakeClerk(servers []*labrpc.ClientEnd) *Clerk { 21 | ck := new(Clerk) 22 | ck.servers = servers 23 | // You'll have to add code here. 24 | return ck 25 | } 26 | 27 | // 28 | // fetch the current value for a key. 29 | // returns "" if the key does not exist. 30 | // keeps trying forever in the face of all other errors. 31 | // 32 | // you can send an RPC with code like this: 33 | // ok := ck.servers[i].Call("KVServer.Get", &args, &reply) 34 | // 35 | // the types of args and reply (including whether they are pointers) 36 | // must match the declared types of the RPC handler function's 37 | // arguments. and reply must be passed as a pointer. 38 | // 39 | func (ck *Clerk) Get(key string) string { 40 | 41 | // You will have to modify this function. 42 | return "" 43 | } 44 | 45 | // 46 | // shared by Put and Append. 47 | // 48 | // you can send an RPC with code like this: 49 | // ok := ck.servers[i].Call("KVServer.PutAppend", &args, &reply) 50 | // 51 | // the types of args and reply (including whether they are pointers) 52 | // must match the declared types of the RPC handler function's 53 | // arguments. and reply must be passed as a pointer. 54 | // 55 | func (ck *Clerk) PutAppend(key string, value string, op string) { 56 | // You will have to modify this function. 57 | } 58 | 59 | func (ck *Clerk) Put(key string, value string) { 60 | ck.PutAppend(key, value, "Put") 61 | } 62 | func (ck *Clerk) Append(key string, value string) { 63 | ck.PutAppend(key, value, "Append") 64 | } 65 | -------------------------------------------------------------------------------- /src/kvraft/common.go: -------------------------------------------------------------------------------- 1 | package kvraft 2 | 3 | const ( 4 | OK = "OK" 5 | ErrNoKey = "ErrNoKey" 6 | ErrWrongLeader = "ErrWrongLeader" 7 | ) 8 | 9 | type Err string 10 | 11 | // Put or Append 12 | type PutAppendArgs struct { 13 | Key string 14 | Value string 15 | Op string // "Put" or "Append" 16 | // You'll have to add definitions here. 17 | // Field names must start with capital letters, 18 | // otherwise RPC will break. 19 | } 20 | 21 | type PutAppendReply struct { 22 | Err Err 23 | } 24 | 25 | type GetArgs struct { 26 | Key string 27 | // You'll have to add definitions here. 28 | } 29 | 30 | type GetReply struct { 31 | Err Err 32 | Value string 33 | } 34 | -------------------------------------------------------------------------------- /src/kvraft/config.go: -------------------------------------------------------------------------------- 1 | package kvraft 2 | 3 | import "../labrpc" 4 | import "testing" 5 | import "os" 6 | 7 | // import "log" 8 | import crand "crypto/rand" 9 | import "math/big" 10 | import "math/rand" 11 | import "encoding/base64" 12 | import "sync" 13 | import "runtime" 14 | import "../raft" 15 | import "fmt" 16 | import "time" 17 | import "sync/atomic" 18 | 19 | func randstring(n int) string { 20 | b := make([]byte, 2*n) 21 | crand.Read(b) 22 | s := base64.URLEncoding.EncodeToString(b) 23 | return s[0:n] 24 | } 25 | 26 | func makeSeed() int64 { 27 | max := big.NewInt(int64(1) << 62) 28 | bigx, _ := crand.Int(crand.Reader, max) 29 | x := bigx.Int64() 30 | return x 31 | } 32 | 33 | // Randomize server handles 34 | func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd { 35 | sa := make([]*labrpc.ClientEnd, len(kvh)) 36 | copy(sa, kvh) 37 | for i := range sa { 38 | j := rand.Intn(i + 1) 39 | sa[i], sa[j] = sa[j], sa[i] 40 | } 41 | return sa 42 | } 43 | 44 | type config struct { 45 | mu sync.Mutex 46 | t *testing.T 47 | net *labrpc.Network 48 | n int 49 | kvservers []*KVServer 50 | saved []*raft.Persister 51 | endnames [][]string // names of each server's sending ClientEnds 52 | clerks map[*Clerk][]string 53 | nextClientId int 54 | maxraftstate int 55 | start time.Time // time at which make_config() was called 56 | // begin()/end() statistics 57 | t0 time.Time // time at which test_test.go called cfg.begin() 58 | rpcs0 int // rpcTotal() at start of test 59 | ops int32 // number of clerk get/put/append method calls 60 | } 61 | 62 | func (cfg *config) checkTimeout() { 63 | // enforce a two minute real-time limit on each test 64 | if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second { 65 | cfg.t.Fatal("test took longer than 120 seconds") 66 | } 67 | } 68 | 69 | func (cfg *config) cleanup() { 70 | cfg.mu.Lock() 71 | defer cfg.mu.Unlock() 72 | for i := 0; i < len(cfg.kvservers); i++ { 73 | if cfg.kvservers[i] != nil { 74 | cfg.kvservers[i].Kill() 75 | } 76 | } 77 | cfg.net.Cleanup() 78 | cfg.checkTimeout() 79 | } 80 | 81 | // Maximum log size across all servers 82 | func (cfg *config) LogSize() int { 83 | logsize := 0 84 | for i := 0; i < cfg.n; i++ { 85 | n := cfg.saved[i].RaftStateSize() 86 | if n > logsize { 87 | logsize = n 88 | } 89 | } 90 | return logsize 91 | } 92 | 93 | // Maximum snapshot size across all servers 94 | func (cfg *config) SnapshotSize() int { 95 | snapshotsize := 0 96 | for i := 0; i < cfg.n; i++ { 97 | n := cfg.saved[i].SnapshotSize() 98 | if n > snapshotsize { 99 | snapshotsize = n 100 | } 101 | } 102 | return snapshotsize 103 | } 104 | 105 | // attach server i to servers listed in to 106 | // caller must hold cfg.mu 107 | func (cfg *config) connectUnlocked(i int, to []int) { 108 | // log.Printf("connect peer %d to %v\n", i, to) 109 | 110 | // outgoing socket files 111 | for j := 0; j < len(to); j++ { 112 | endname := cfg.endnames[i][to[j]] 113 | cfg.net.Enable(endname, true) 114 | } 115 | 116 | // incoming socket files 117 | for j := 0; j < len(to); j++ { 118 | endname := cfg.endnames[to[j]][i] 119 | cfg.net.Enable(endname, true) 120 | } 121 | } 122 | 123 | func (cfg *config) connect(i int, to []int) { 124 | cfg.mu.Lock() 125 | defer cfg.mu.Unlock() 126 | cfg.connectUnlocked(i, to) 127 | } 128 | 129 | // detach server i from the servers listed in from 130 | // caller must hold cfg.mu 131 | func (cfg *config) disconnectUnlocked(i int, from []int) { 132 | // log.Printf("disconnect peer %d from %v\n", i, from) 133 | 134 | // outgoing socket files 135 | for j := 0; j < len(from); j++ { 136 | if cfg.endnames[i] != nil { 137 | endname := cfg.endnames[i][from[j]] 138 | cfg.net.Enable(endname, false) 139 | } 140 | } 141 | 142 | // incoming socket files 143 | for j := 0; j < len(from); j++ { 144 | if cfg.endnames[j] != nil { 145 | endname := cfg.endnames[from[j]][i] 146 | cfg.net.Enable(endname, false) 147 | } 148 | } 149 | } 150 | 151 | func (cfg *config) disconnect(i int, from []int) { 152 | cfg.mu.Lock() 153 | defer cfg.mu.Unlock() 154 | cfg.disconnectUnlocked(i, from) 155 | } 156 | 157 | func (cfg *config) All() []int { 158 | all := make([]int, cfg.n) 159 | for i := 0; i < cfg.n; i++ { 160 | all[i] = i 161 | } 162 | return all 163 | } 164 | 165 | func (cfg *config) ConnectAll() { 166 | cfg.mu.Lock() 167 | defer cfg.mu.Unlock() 168 | for i := 0; i < cfg.n; i++ { 169 | cfg.connectUnlocked(i, cfg.All()) 170 | } 171 | } 172 | 173 | // Sets up 2 partitions with connectivity between servers in each partition. 174 | func (cfg *config) partition(p1 []int, p2 []int) { 175 | cfg.mu.Lock() 176 | defer cfg.mu.Unlock() 177 | // log.Printf("partition servers into: %v %v\n", p1, p2) 178 | for i := 0; i < len(p1); i++ { 179 | cfg.disconnectUnlocked(p1[i], p2) 180 | cfg.connectUnlocked(p1[i], p1) 181 | } 182 | for i := 0; i < len(p2); i++ { 183 | cfg.disconnectUnlocked(p2[i], p1) 184 | cfg.connectUnlocked(p2[i], p2) 185 | } 186 | } 187 | 188 | // Create a clerk with clerk specific server names. 189 | // Give it connections to all of the servers, but for 190 | // now enable only connections to servers in to[]. 191 | func (cfg *config) makeClient(to []int) *Clerk { 192 | cfg.mu.Lock() 193 | defer cfg.mu.Unlock() 194 | 195 | // a fresh set of ClientEnds. 196 | ends := make([]*labrpc.ClientEnd, cfg.n) 197 | endnames := make([]string, cfg.n) 198 | for j := 0; j < cfg.n; j++ { 199 | endnames[j] = randstring(20) 200 | ends[j] = cfg.net.MakeEnd(endnames[j]) 201 | cfg.net.Connect(endnames[j], j) 202 | } 203 | 204 | ck := MakeClerk(random_handles(ends)) 205 | cfg.clerks[ck] = endnames 206 | cfg.nextClientId++ 207 | cfg.ConnectClientUnlocked(ck, to) 208 | return ck 209 | } 210 | 211 | func (cfg *config) deleteClient(ck *Clerk) { 212 | cfg.mu.Lock() 213 | defer cfg.mu.Unlock() 214 | 215 | v := cfg.clerks[ck] 216 | for i := 0; i < len(v); i++ { 217 | os.Remove(v[i]) 218 | } 219 | delete(cfg.clerks, ck) 220 | } 221 | 222 | // caller should hold cfg.mu 223 | func (cfg *config) ConnectClientUnlocked(ck *Clerk, to []int) { 224 | // log.Printf("ConnectClient %v to %v\n", ck, to) 225 | endnames := cfg.clerks[ck] 226 | for j := 0; j < len(to); j++ { 227 | s := endnames[to[j]] 228 | cfg.net.Enable(s, true) 229 | } 230 | } 231 | 232 | func (cfg *config) ConnectClient(ck *Clerk, to []int) { 233 | cfg.mu.Lock() 234 | defer cfg.mu.Unlock() 235 | cfg.ConnectClientUnlocked(ck, to) 236 | } 237 | 238 | // caller should hold cfg.mu 239 | func (cfg *config) DisconnectClientUnlocked(ck *Clerk, from []int) { 240 | // log.Printf("DisconnectClient %v from %v\n", ck, from) 241 | endnames := cfg.clerks[ck] 242 | for j := 0; j < len(from); j++ { 243 | s := endnames[from[j]] 244 | cfg.net.Enable(s, false) 245 | } 246 | } 247 | 248 | func (cfg *config) DisconnectClient(ck *Clerk, from []int) { 249 | cfg.mu.Lock() 250 | defer cfg.mu.Unlock() 251 | cfg.DisconnectClientUnlocked(ck, from) 252 | } 253 | 254 | // Shutdown a server by isolating it 255 | func (cfg *config) ShutdownServer(i int) { 256 | cfg.mu.Lock() 257 | defer cfg.mu.Unlock() 258 | 259 | cfg.disconnectUnlocked(i, cfg.All()) 260 | 261 | // disable client connections to the server. 262 | // it's important to do this before creating 263 | // the new Persister in saved[i], to avoid 264 | // the possibility of the server returning a 265 | // positive reply to an Append but persisting 266 | // the result in the superseded Persister. 267 | cfg.net.DeleteServer(i) 268 | 269 | // a fresh persister, in case old instance 270 | // continues to update the Persister. 271 | // but copy old persister's content so that we always 272 | // pass Make() the last persisted state. 273 | if cfg.saved[i] != nil { 274 | cfg.saved[i] = cfg.saved[i].Copy() 275 | } 276 | 277 | kv := cfg.kvservers[i] 278 | if kv != nil { 279 | cfg.mu.Unlock() 280 | kv.Kill() 281 | cfg.mu.Lock() 282 | cfg.kvservers[i] = nil 283 | } 284 | } 285 | 286 | // If restart servers, first call ShutdownServer 287 | func (cfg *config) StartServer(i int) { 288 | cfg.mu.Lock() 289 | 290 | // a fresh set of outgoing ClientEnd names. 291 | cfg.endnames[i] = make([]string, cfg.n) 292 | for j := 0; j < cfg.n; j++ { 293 | cfg.endnames[i][j] = randstring(20) 294 | } 295 | 296 | // a fresh set of ClientEnds. 297 | ends := make([]*labrpc.ClientEnd, cfg.n) 298 | for j := 0; j < cfg.n; j++ { 299 | ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j]) 300 | cfg.net.Connect(cfg.endnames[i][j], j) 301 | } 302 | 303 | // a fresh persister, so old instance doesn't overwrite 304 | // new instance's persisted state. 305 | // give the fresh persister a copy of the old persister's 306 | // state, so that the spec is that we pass StartKVServer() 307 | // the last persisted state. 308 | if cfg.saved[i] != nil { 309 | cfg.saved[i] = cfg.saved[i].Copy() 310 | } else { 311 | cfg.saved[i] = raft.MakePersister() 312 | } 313 | cfg.mu.Unlock() 314 | 315 | cfg.kvservers[i] = StartKVServer(ends, i, cfg.saved[i], cfg.maxraftstate) 316 | 317 | kvsvc := labrpc.MakeService(cfg.kvservers[i]) 318 | rfsvc := labrpc.MakeService(cfg.kvservers[i].rf) 319 | srv := labrpc.MakeServer() 320 | srv.AddService(kvsvc) 321 | srv.AddService(rfsvc) 322 | cfg.net.AddServer(i, srv) 323 | } 324 | 325 | func (cfg *config) Leader() (bool, int) { 326 | cfg.mu.Lock() 327 | defer cfg.mu.Unlock() 328 | 329 | for i := 0; i < cfg.n; i++ { 330 | _, is_leader := cfg.kvservers[i].rf.GetState() 331 | if is_leader { 332 | return true, i 333 | } 334 | } 335 | return false, 0 336 | } 337 | 338 | // Partition servers into 2 groups and put current leader in minority 339 | func (cfg *config) make_partition() ([]int, []int) { 340 | _, l := cfg.Leader() 341 | p1 := make([]int, cfg.n/2+1) 342 | p2 := make([]int, cfg.n/2) 343 | j := 0 344 | for i := 0; i < cfg.n; i++ { 345 | if i != l { 346 | if j < len(p1) { 347 | p1[j] = i 348 | } else { 349 | p2[j-len(p1)] = i 350 | } 351 | j++ 352 | } 353 | } 354 | p2[len(p2)-1] = l 355 | return p1, p2 356 | } 357 | 358 | var ncpu_once sync.Once 359 | 360 | func make_config(t *testing.T, n int, unreliable bool, maxraftstate int) *config { 361 | ncpu_once.Do(func() { 362 | if runtime.NumCPU() < 2 { 363 | fmt.Printf("warning: only one CPU, which may conceal locking bugs\n") 364 | } 365 | rand.Seed(makeSeed()) 366 | }) 367 | runtime.GOMAXPROCS(4) 368 | cfg := &config{} 369 | cfg.t = t 370 | cfg.net = labrpc.MakeNetwork() 371 | cfg.n = n 372 | cfg.kvservers = make([]*KVServer, cfg.n) 373 | cfg.saved = make([]*raft.Persister, cfg.n) 374 | cfg.endnames = make([][]string, cfg.n) 375 | cfg.clerks = make(map[*Clerk][]string) 376 | cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid 377 | cfg.maxraftstate = maxraftstate 378 | cfg.start = time.Now() 379 | 380 | // create a full set of KV servers. 381 | for i := 0; i < cfg.n; i++ { 382 | cfg.StartServer(i) 383 | } 384 | 385 | cfg.ConnectAll() 386 | 387 | cfg.net.Reliable(!unreliable) 388 | 389 | return cfg 390 | } 391 | 392 | func (cfg *config) rpcTotal() int { 393 | return cfg.net.GetTotalCount() 394 | } 395 | 396 | // start a Test. 397 | // print the Test message. 398 | // e.g. cfg.begin("Test (2B): RPC counts aren't too high") 399 | func (cfg *config) begin(description string) { 400 | fmt.Printf("%s ...\n", description) 401 | cfg.t0 = time.Now() 402 | cfg.rpcs0 = cfg.rpcTotal() 403 | atomic.StoreInt32(&cfg.ops, 0) 404 | } 405 | 406 | func (cfg *config) op() { 407 | atomic.AddInt32(&cfg.ops, 1) 408 | } 409 | 410 | // end a Test -- the fact that we got here means there 411 | // was no failure. 412 | // print the Passed message, 413 | // and some performance numbers. 414 | func (cfg *config) end() { 415 | cfg.checkTimeout() 416 | if cfg.t.Failed() == false { 417 | t := time.Since(cfg.t0).Seconds() // real time 418 | npeers := cfg.n // number of Raft peers 419 | nrpc := cfg.rpcTotal() - cfg.rpcs0 // number of RPC sends 420 | ops := atomic.LoadInt32(&cfg.ops) // number of clerk get/put/append calls 421 | 422 | fmt.Printf(" ... Passed --") 423 | fmt.Printf(" %4.1f %d %5d %4d\n", t, npeers, nrpc, ops) 424 | } 425 | } 426 | -------------------------------------------------------------------------------- /src/kvraft/server.go: -------------------------------------------------------------------------------- 1 | package kvraft 2 | 3 | import ( 4 | "../labgob" 5 | "../labrpc" 6 | "log" 7 | "../raft" 8 | "sync" 9 | "sync/atomic" 10 | ) 11 | 12 | const Debug = 0 13 | 14 | func DPrintf(format string, a ...interface{}) (n int, err error) { 15 | if Debug > 0 { 16 | log.Printf(format, a...) 17 | } 18 | return 19 | } 20 | 21 | 22 | type Op struct { 23 | // Your definitions here. 24 | // Field names must start with capital letters, 25 | // otherwise RPC will break. 26 | } 27 | 28 | type KVServer struct { 29 | mu sync.Mutex 30 | me int 31 | rf *raft.Raft 32 | applyCh chan raft.ApplyMsg 33 | dead int32 // set by Kill() 34 | 35 | maxraftstate int // snapshot if log grows this big 36 | 37 | // Your definitions here. 38 | } 39 | 40 | 41 | func (kv *KVServer) Get(args *GetArgs, reply *GetReply) { 42 | // Your code here. 43 | } 44 | 45 | func (kv *KVServer) PutAppend(args *PutAppendArgs, reply *PutAppendReply) { 46 | // Your code here. 47 | } 48 | 49 | // 50 | // the tester calls Kill() when a KVServer instance won't 51 | // be needed again. for your convenience, we supply 52 | // code to set rf.dead (without needing a lock), 53 | // and a killed() method to test rf.dead in 54 | // long-running loops. you can also add your own 55 | // code to Kill(). you're not required to do anything 56 | // about this, but it may be convenient (for example) 57 | // to suppress debug output from a Kill()ed instance. 58 | // 59 | func (kv *KVServer) Kill() { 60 | atomic.StoreInt32(&kv.dead, 1) 61 | kv.rf.Kill() 62 | // Your code here, if desired. 63 | } 64 | 65 | func (kv *KVServer) killed() bool { 66 | z := atomic.LoadInt32(&kv.dead) 67 | return z == 1 68 | } 69 | 70 | // 71 | // servers[] contains the ports of the set of 72 | // servers that will cooperate via Raft to 73 | // form the fault-tolerant key/value service. 74 | // me is the index of the current server in servers[]. 75 | // the k/v server should store snapshots through the underlying Raft 76 | // implementation, which should call persister.SaveStateAndSnapshot() to 77 | // atomically save the Raft state along with the snapshot. 78 | // the k/v server should snapshot when Raft's saved state exceeds maxraftstate bytes, 79 | // in order to allow Raft to garbage-collect its log. if maxraftstate is -1, 80 | // you don't need to snapshot. 81 | // StartKVServer() must return quickly, so it should start goroutines 82 | // for any long-running work. 83 | // 84 | func StartKVServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int) *KVServer { 85 | // call labgob.Register on structures you want 86 | // Go's RPC library to marshall/unmarshall. 87 | labgob.Register(Op{}) 88 | 89 | kv := new(KVServer) 90 | kv.me = me 91 | kv.maxraftstate = maxraftstate 92 | 93 | // You may need initialization code here. 94 | 95 | kv.applyCh = make(chan raft.ApplyMsg) 96 | kv.rf = raft.Make(servers, me, persister, kv.applyCh) 97 | 98 | // You may need initialization code here. 99 | 100 | return kv 101 | } 102 | -------------------------------------------------------------------------------- /src/labgob/labgob.go: -------------------------------------------------------------------------------- 1 | package labgob 2 | 3 | // 4 | // trying to send non-capitalized fields over RPC produces a range of 5 | // misbehavior, including both mysterious incorrect computation and 6 | // outright crashes. so this wrapper around Go's encoding/gob warns 7 | // about non-capitalized field names. 8 | // 9 | 10 | import "encoding/gob" 11 | import "io" 12 | import "reflect" 13 | import "fmt" 14 | import "sync" 15 | import "unicode" 16 | import "unicode/utf8" 17 | 18 | var mu sync.Mutex 19 | var errorCount int // for TestCapital 20 | var checked map[reflect.Type]bool 21 | 22 | type LabEncoder struct { 23 | gob *gob.Encoder 24 | } 25 | 26 | func NewEncoder(w io.Writer) *LabEncoder { 27 | enc := &LabEncoder{} 28 | enc.gob = gob.NewEncoder(w) 29 | return enc 30 | } 31 | 32 | func (enc *LabEncoder) Encode(e interface{}) error { 33 | checkValue(e) 34 | return enc.gob.Encode(e) 35 | } 36 | 37 | func (enc *LabEncoder) EncodeValue(value reflect.Value) error { 38 | checkValue(value.Interface()) 39 | return enc.gob.EncodeValue(value) 40 | } 41 | 42 | type LabDecoder struct { 43 | gob *gob.Decoder 44 | } 45 | 46 | func NewDecoder(r io.Reader) *LabDecoder { 47 | dec := &LabDecoder{} 48 | dec.gob = gob.NewDecoder(r) 49 | return dec 50 | } 51 | 52 | func (dec *LabDecoder) Decode(e interface{}) error { 53 | checkValue(e) 54 | checkDefault(e) 55 | return dec.gob.Decode(e) 56 | } 57 | 58 | func Register(value interface{}) { 59 | checkValue(value) 60 | gob.Register(value) 61 | } 62 | 63 | func RegisterName(name string, value interface{}) { 64 | checkValue(value) 65 | gob.RegisterName(name, value) 66 | } 67 | 68 | func checkValue(value interface{}) { 69 | checkType(reflect.TypeOf(value)) 70 | } 71 | 72 | func checkType(t reflect.Type) { 73 | k := t.Kind() 74 | 75 | mu.Lock() 76 | // only complain once, and avoid recursion. 77 | if checked == nil { 78 | checked = map[reflect.Type]bool{} 79 | } 80 | if checked[t] { 81 | mu.Unlock() 82 | return 83 | } 84 | checked[t] = true 85 | mu.Unlock() 86 | 87 | switch k { 88 | case reflect.Struct: 89 | for i := 0; i < t.NumField(); i++ { 90 | f := t.Field(i) 91 | rune, _ := utf8.DecodeRuneInString(f.Name) 92 | if unicode.IsUpper(rune) == false { 93 | // ta da 94 | fmt.Printf("labgob error: lower-case field %v of %v in RPC or persist/snapshot will break your Raft\n", 95 | f.Name, t.Name()) 96 | mu.Lock() 97 | errorCount += 1 98 | mu.Unlock() 99 | } 100 | checkType(f.Type) 101 | } 102 | return 103 | case reflect.Slice, reflect.Array, reflect.Ptr: 104 | checkType(t.Elem()) 105 | return 106 | case reflect.Map: 107 | checkType(t.Elem()) 108 | checkType(t.Key()) 109 | return 110 | default: 111 | return 112 | } 113 | } 114 | 115 | // 116 | // warn if the value contains non-default values, 117 | // as it would if one sent an RPC but the reply 118 | // struct was already modified. if the RPC reply 119 | // contains default values, GOB won't overwrite 120 | // the non-default value. 121 | // 122 | func checkDefault(value interface{}) { 123 | if value == nil { 124 | return 125 | } 126 | checkDefault1(reflect.ValueOf(value), 1, "") 127 | } 128 | 129 | func checkDefault1(value reflect.Value, depth int, name string) { 130 | if depth > 3 { 131 | return 132 | } 133 | 134 | t := value.Type() 135 | k := t.Kind() 136 | 137 | switch k { 138 | case reflect.Struct: 139 | for i := 0; i < t.NumField(); i++ { 140 | vv := value.Field(i) 141 | name1 := t.Field(i).Name 142 | if name != "" { 143 | name1 = name + "." + name1 144 | } 145 | checkDefault1(vv, depth+1, name1) 146 | } 147 | return 148 | case reflect.Ptr: 149 | if value.IsNil() { 150 | return 151 | } 152 | checkDefault1(value.Elem(), depth+1, name) 153 | return 154 | case reflect.Bool, 155 | reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, 156 | reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, 157 | reflect.Uintptr, reflect.Float32, reflect.Float64, 158 | reflect.String: 159 | if reflect.DeepEqual(reflect.Zero(t).Interface(), value.Interface()) == false { 160 | mu.Lock() 161 | if errorCount < 1 { 162 | what := name 163 | if what == "" { 164 | what = t.Name() 165 | } 166 | // this warning typically arises if code re-uses the same RPC reply 167 | // variable for multiple RPC calls, or if code restores persisted 168 | // state into variable that already have non-default values. 169 | fmt.Printf("labgob warning: Decoding into a non-default variable/field %v may not work\n", 170 | what) 171 | } 172 | errorCount += 1 173 | mu.Unlock() 174 | } 175 | return 176 | } 177 | } 178 | -------------------------------------------------------------------------------- /src/labgob/test_test.go: -------------------------------------------------------------------------------- 1 | package labgob 2 | 3 | import "testing" 4 | 5 | import "bytes" 6 | 7 | type T1 struct { 8 | T1int0 int 9 | T1int1 int 10 | T1string0 string 11 | T1string1 string 12 | } 13 | 14 | type T2 struct { 15 | T2slice []T1 16 | T2map map[int]*T1 17 | T2t3 interface{} 18 | } 19 | 20 | type T3 struct { 21 | T3int999 int 22 | } 23 | 24 | // 25 | // test that we didn't break GOB. 26 | // 27 | func TestGOB(t *testing.T) { 28 | e0 := errorCount 29 | 30 | w := new(bytes.Buffer) 31 | 32 | Register(T3{}) 33 | 34 | { 35 | x0 := 0 36 | x1 := 1 37 | t1 := T1{} 38 | t1.T1int1 = 1 39 | t1.T1string1 = "6.824" 40 | t2 := T2{} 41 | t2.T2slice = []T1{T1{}, t1} 42 | t2.T2map = map[int]*T1{} 43 | t2.T2map[99] = &T1{1, 2, "x", "y"} 44 | t2.T2t3 = T3{999} 45 | 46 | e := NewEncoder(w) 47 | e.Encode(x0) 48 | e.Encode(x1) 49 | e.Encode(t1) 50 | e.Encode(t2) 51 | } 52 | data := w.Bytes() 53 | 54 | { 55 | var x0 int 56 | var x1 int 57 | var t1 T1 58 | var t2 T2 59 | 60 | r := bytes.NewBuffer(data) 61 | d := NewDecoder(r) 62 | if d.Decode(&x0) != nil || 63 | d.Decode(&x1) != nil || 64 | d.Decode(&t1) != nil || 65 | d.Decode(&t2) != nil { 66 | t.Fatalf("Decode failed") 67 | } 68 | 69 | if x0 != 0 { 70 | t.Fatalf("wrong x0 %v\n", x0) 71 | } 72 | if x1 != 1 { 73 | t.Fatalf("wrong x1 %v\n", x1) 74 | } 75 | if t1.T1int0 != 0 { 76 | t.Fatalf("wrong t1.T1int0 %v\n", t1.T1int0) 77 | } 78 | if t1.T1int1 != 1 { 79 | t.Fatalf("wrong t1.T1int1 %v\n", t1.T1int1) 80 | } 81 | if t1.T1string0 != "" { 82 | t.Fatalf("wrong t1.T1string0 %v\n", t1.T1string0) 83 | } 84 | if t1.T1string1 != "6.824" { 85 | t.Fatalf("wrong t1.T1string1 %v\n", t1.T1string1) 86 | } 87 | if len(t2.T2slice) != 2 { 88 | t.Fatalf("wrong t2.T2slice len %v\n", len(t2.T2slice)) 89 | } 90 | if t2.T2slice[1].T1int1 != 1 { 91 | t.Fatalf("wrong slice value\n") 92 | } 93 | if len(t2.T2map) != 1 { 94 | t.Fatalf("wrong t2.T2map len %v\n", len(t2.T2map)) 95 | } 96 | if t2.T2map[99].T1string1 != "y" { 97 | t.Fatalf("wrong map value\n") 98 | } 99 | t3 := (t2.T2t3).(T3) 100 | if t3.T3int999 != 999 { 101 | t.Fatalf("wrong t2.T2t3.T3int999\n") 102 | } 103 | } 104 | 105 | if errorCount != e0 { 106 | t.Fatalf("there were errors, but should not have been") 107 | } 108 | } 109 | 110 | type T4 struct { 111 | Yes int 112 | no int 113 | } 114 | 115 | // 116 | // make sure we check capitalization 117 | // labgob prints one warning during this test. 118 | // 119 | func TestCapital(t *testing.T) { 120 | e0 := errorCount 121 | 122 | v := []map[*T4]int{} 123 | 124 | w := new(bytes.Buffer) 125 | e := NewEncoder(w) 126 | e.Encode(v) 127 | data := w.Bytes() 128 | 129 | var v1 []map[T4]int 130 | r := bytes.NewBuffer(data) 131 | d := NewDecoder(r) 132 | d.Decode(&v1) 133 | 134 | if errorCount != e0+1 { 135 | t.Fatalf("failed to warn about lower-case field") 136 | } 137 | } 138 | 139 | // 140 | // check that we warn when someone sends a default value over 141 | // RPC but the target into which we're decoding holds a non-default 142 | // value, which GOB seems not to overwrite as you'd expect. 143 | // 144 | // labgob does not print a warning. 145 | // 146 | func TestDefault(t *testing.T) { 147 | e0 := errorCount 148 | 149 | type DD struct { 150 | X int 151 | } 152 | 153 | // send a default value... 154 | dd1 := DD{} 155 | 156 | w := new(bytes.Buffer) 157 | e := NewEncoder(w) 158 | e.Encode(dd1) 159 | data := w.Bytes() 160 | 161 | // and receive it into memory that already 162 | // holds non-default values. 163 | reply := DD{99} 164 | 165 | r := bytes.NewBuffer(data) 166 | d := NewDecoder(r) 167 | d.Decode(&reply) 168 | 169 | if errorCount != e0+1 { 170 | t.Fatalf("failed to warn about decoding into non-default value") 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /src/labrpc/test_test.go: -------------------------------------------------------------------------------- 1 | package labrpc 2 | 3 | import "testing" 4 | import "strconv" 5 | import "sync" 6 | import "runtime" 7 | import "time" 8 | import "fmt" 9 | 10 | type JunkArgs struct { 11 | X int 12 | } 13 | type JunkReply struct { 14 | X string 15 | } 16 | 17 | type JunkServer struct { 18 | mu sync.Mutex 19 | log1 []string 20 | log2 []int 21 | } 22 | 23 | func (js *JunkServer) Handler1(args string, reply *int) { 24 | js.mu.Lock() 25 | defer js.mu.Unlock() 26 | js.log1 = append(js.log1, args) 27 | *reply, _ = strconv.Atoi(args) 28 | } 29 | 30 | func (js *JunkServer) Handler2(args int, reply *string) { 31 | js.mu.Lock() 32 | defer js.mu.Unlock() 33 | js.log2 = append(js.log2, args) 34 | *reply = "handler2-" + strconv.Itoa(args) 35 | } 36 | 37 | func (js *JunkServer) Handler3(args int, reply *int) { 38 | js.mu.Lock() 39 | defer js.mu.Unlock() 40 | time.Sleep(20 * time.Second) 41 | *reply = -args 42 | } 43 | 44 | // args is a pointer 45 | func (js *JunkServer) Handler4(args *JunkArgs, reply *JunkReply) { 46 | reply.X = "pointer" 47 | } 48 | 49 | // args is a not pointer 50 | func (js *JunkServer) Handler5(args JunkArgs, reply *JunkReply) { 51 | reply.X = "no pointer" 52 | } 53 | 54 | func (js *JunkServer) Handler6(args string, reply *int) { 55 | js.mu.Lock() 56 | defer js.mu.Unlock() 57 | *reply = len(args) 58 | } 59 | 60 | func (js *JunkServer) Handler7(args int, reply *string) { 61 | js.mu.Lock() 62 | defer js.mu.Unlock() 63 | *reply = "" 64 | for i := 0; i < args; i++ { 65 | *reply = *reply + "y" 66 | } 67 | } 68 | 69 | func TestBasic(t *testing.T) { 70 | runtime.GOMAXPROCS(4) 71 | 72 | rn := MakeNetwork() 73 | defer rn.Cleanup() 74 | 75 | e := rn.MakeEnd("end1-99") 76 | 77 | js := &JunkServer{} 78 | svc := MakeService(js) 79 | 80 | rs := MakeServer() 81 | rs.AddService(svc) 82 | rn.AddServer("server99", rs) 83 | 84 | rn.Connect("end1-99", "server99") 85 | rn.Enable("end1-99", true) 86 | 87 | { 88 | reply := "" 89 | e.Call("JunkServer.Handler2", 111, &reply) 90 | if reply != "handler2-111" { 91 | t.Fatalf("wrong reply from Handler2") 92 | } 93 | } 94 | 95 | { 96 | reply := 0 97 | e.Call("JunkServer.Handler1", "9099", &reply) 98 | if reply != 9099 { 99 | t.Fatalf("wrong reply from Handler1") 100 | } 101 | } 102 | } 103 | 104 | func TestTypes(t *testing.T) { 105 | runtime.GOMAXPROCS(4) 106 | 107 | rn := MakeNetwork() 108 | defer rn.Cleanup() 109 | 110 | e := rn.MakeEnd("end1-99") 111 | 112 | js := &JunkServer{} 113 | svc := MakeService(js) 114 | 115 | rs := MakeServer() 116 | rs.AddService(svc) 117 | rn.AddServer("server99", rs) 118 | 119 | rn.Connect("end1-99", "server99") 120 | rn.Enable("end1-99", true) 121 | 122 | { 123 | var args JunkArgs 124 | var reply JunkReply 125 | // args must match type (pointer or not) of handler. 126 | e.Call("JunkServer.Handler4", &args, &reply) 127 | if reply.X != "pointer" { 128 | t.Fatalf("wrong reply from Handler4") 129 | } 130 | } 131 | 132 | { 133 | var args JunkArgs 134 | var reply JunkReply 135 | // args must match type (pointer or not) of handler. 136 | e.Call("JunkServer.Handler5", args, &reply) 137 | if reply.X != "no pointer" { 138 | t.Fatalf("wrong reply from Handler5") 139 | } 140 | } 141 | } 142 | 143 | // 144 | // does net.Enable(endname, false) really disconnect a client? 145 | // 146 | func TestDisconnect(t *testing.T) { 147 | runtime.GOMAXPROCS(4) 148 | 149 | rn := MakeNetwork() 150 | defer rn.Cleanup() 151 | 152 | e := rn.MakeEnd("end1-99") 153 | 154 | js := &JunkServer{} 155 | svc := MakeService(js) 156 | 157 | rs := MakeServer() 158 | rs.AddService(svc) 159 | rn.AddServer("server99", rs) 160 | 161 | rn.Connect("end1-99", "server99") 162 | 163 | { 164 | reply := "" 165 | e.Call("JunkServer.Handler2", 111, &reply) 166 | if reply != "" { 167 | t.Fatalf("unexpected reply from Handler2") 168 | } 169 | } 170 | 171 | rn.Enable("end1-99", true) 172 | 173 | { 174 | reply := 0 175 | e.Call("JunkServer.Handler1", "9099", &reply) 176 | if reply != 9099 { 177 | t.Fatalf("wrong reply from Handler1") 178 | } 179 | } 180 | } 181 | 182 | // 183 | // test net.GetCount() 184 | // 185 | func TestCounts(t *testing.T) { 186 | runtime.GOMAXPROCS(4) 187 | 188 | rn := MakeNetwork() 189 | defer rn.Cleanup() 190 | 191 | e := rn.MakeEnd("end1-99") 192 | 193 | js := &JunkServer{} 194 | svc := MakeService(js) 195 | 196 | rs := MakeServer() 197 | rs.AddService(svc) 198 | rn.AddServer(99, rs) 199 | 200 | rn.Connect("end1-99", 99) 201 | rn.Enable("end1-99", true) 202 | 203 | for i := 0; i < 17; i++ { 204 | reply := "" 205 | e.Call("JunkServer.Handler2", i, &reply) 206 | wanted := "handler2-" + strconv.Itoa(i) 207 | if reply != wanted { 208 | t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted) 209 | } 210 | } 211 | 212 | n := rn.GetCount(99) 213 | if n != 17 { 214 | t.Fatalf("wrong GetCount() %v, expected 17\n", n) 215 | } 216 | } 217 | 218 | // 219 | // test net.GetTotalBytes() 220 | // 221 | func TestBytes(t *testing.T) { 222 | runtime.GOMAXPROCS(4) 223 | 224 | rn := MakeNetwork() 225 | defer rn.Cleanup() 226 | 227 | e := rn.MakeEnd("end1-99") 228 | 229 | js := &JunkServer{} 230 | svc := MakeService(js) 231 | 232 | rs := MakeServer() 233 | rs.AddService(svc) 234 | rn.AddServer(99, rs) 235 | 236 | rn.Connect("end1-99", 99) 237 | rn.Enable("end1-99", true) 238 | 239 | for i := 0; i < 17; i++ { 240 | args := "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" 241 | args = args + args 242 | args = args + args 243 | reply := 0 244 | e.Call("JunkServer.Handler6", args, &reply) 245 | wanted := len(args) 246 | if reply != wanted { 247 | t.Fatalf("wrong reply %v from Handler6, expecting %v", reply, wanted) 248 | } 249 | } 250 | 251 | n := rn.GetTotalBytes() 252 | if n < 4828 || n > 6000 { 253 | t.Fatalf("wrong GetTotalBytes() %v, expected about 5000\n", n) 254 | } 255 | 256 | for i := 0; i < 17; i++ { 257 | args := 107 258 | reply := "" 259 | e.Call("JunkServer.Handler7", args, &reply) 260 | wanted := args 261 | if len(reply) != wanted { 262 | t.Fatalf("wrong reply len=%v from Handler6, expecting %v", len(reply), wanted) 263 | } 264 | } 265 | 266 | nn := rn.GetTotalBytes() - n 267 | if nn < 1800 || nn > 2500 { 268 | t.Fatalf("wrong GetTotalBytes() %v, expected about 2000\n", nn) 269 | } 270 | } 271 | 272 | // 273 | // test RPCs from concurrent ClientEnds 274 | // 275 | func TestConcurrentMany(t *testing.T) { 276 | runtime.GOMAXPROCS(4) 277 | 278 | rn := MakeNetwork() 279 | defer rn.Cleanup() 280 | 281 | js := &JunkServer{} 282 | svc := MakeService(js) 283 | 284 | rs := MakeServer() 285 | rs.AddService(svc) 286 | rn.AddServer(1000, rs) 287 | 288 | ch := make(chan int) 289 | 290 | nclients := 20 291 | nrpcs := 10 292 | for ii := 0; ii < nclients; ii++ { 293 | go func(i int) { 294 | n := 0 295 | defer func() { ch <- n }() 296 | 297 | e := rn.MakeEnd(i) 298 | rn.Connect(i, 1000) 299 | rn.Enable(i, true) 300 | 301 | for j := 0; j < nrpcs; j++ { 302 | arg := i*100 + j 303 | reply := "" 304 | e.Call("JunkServer.Handler2", arg, &reply) 305 | wanted := "handler2-" + strconv.Itoa(arg) 306 | if reply != wanted { 307 | t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted) 308 | } 309 | n += 1 310 | } 311 | }(ii) 312 | } 313 | 314 | total := 0 315 | for ii := 0; ii < nclients; ii++ { 316 | x := <-ch 317 | total += x 318 | } 319 | 320 | if total != nclients*nrpcs { 321 | t.Fatalf("wrong number of RPCs completed, got %v, expected %v", total, nclients*nrpcs) 322 | } 323 | 324 | n := rn.GetCount(1000) 325 | if n != total { 326 | t.Fatalf("wrong GetCount() %v, expected %v\n", n, total) 327 | } 328 | } 329 | 330 | // 331 | // test unreliable 332 | // 333 | func TestUnreliable(t *testing.T) { 334 | runtime.GOMAXPROCS(4) 335 | 336 | rn := MakeNetwork() 337 | defer rn.Cleanup() 338 | rn.Reliable(false) 339 | 340 | js := &JunkServer{} 341 | svc := MakeService(js) 342 | 343 | rs := MakeServer() 344 | rs.AddService(svc) 345 | rn.AddServer(1000, rs) 346 | 347 | ch := make(chan int) 348 | 349 | nclients := 300 350 | for ii := 0; ii < nclients; ii++ { 351 | go func(i int) { 352 | n := 0 353 | defer func() { ch <- n }() 354 | 355 | e := rn.MakeEnd(i) 356 | rn.Connect(i, 1000) 357 | rn.Enable(i, true) 358 | 359 | arg := i * 100 360 | reply := "" 361 | ok := e.Call("JunkServer.Handler2", arg, &reply) 362 | if ok { 363 | wanted := "handler2-" + strconv.Itoa(arg) 364 | if reply != wanted { 365 | t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted) 366 | } 367 | n += 1 368 | } 369 | }(ii) 370 | } 371 | 372 | total := 0 373 | for ii := 0; ii < nclients; ii++ { 374 | x := <-ch 375 | total += x 376 | } 377 | 378 | if total == nclients || total == 0 { 379 | t.Fatalf("all RPCs succeeded despite unreliable") 380 | } 381 | } 382 | 383 | // 384 | // test concurrent RPCs from a single ClientEnd 385 | // 386 | func TestConcurrentOne(t *testing.T) { 387 | runtime.GOMAXPROCS(4) 388 | 389 | rn := MakeNetwork() 390 | defer rn.Cleanup() 391 | 392 | js := &JunkServer{} 393 | svc := MakeService(js) 394 | 395 | rs := MakeServer() 396 | rs.AddService(svc) 397 | rn.AddServer(1000, rs) 398 | 399 | e := rn.MakeEnd("c") 400 | rn.Connect("c", 1000) 401 | rn.Enable("c", true) 402 | 403 | ch := make(chan int) 404 | 405 | nrpcs := 20 406 | for ii := 0; ii < nrpcs; ii++ { 407 | go func(i int) { 408 | n := 0 409 | defer func() { ch <- n }() 410 | 411 | arg := 100 + i 412 | reply := "" 413 | e.Call("JunkServer.Handler2", arg, &reply) 414 | wanted := "handler2-" + strconv.Itoa(arg) 415 | if reply != wanted { 416 | t.Fatalf("wrong reply %v from Handler2, expecting %v", reply, wanted) 417 | } 418 | n += 1 419 | }(ii) 420 | } 421 | 422 | total := 0 423 | for ii := 0; ii < nrpcs; ii++ { 424 | x := <-ch 425 | total += x 426 | } 427 | 428 | if total != nrpcs { 429 | t.Fatalf("wrong number of RPCs completed, got %v, expected %v", total, nrpcs) 430 | } 431 | 432 | js.mu.Lock() 433 | defer js.mu.Unlock() 434 | if len(js.log2) != nrpcs { 435 | t.Fatalf("wrong number of RPCs delivered") 436 | } 437 | 438 | n := rn.GetCount(1000) 439 | if n != total { 440 | t.Fatalf("wrong GetCount() %v, expected %v\n", n, total) 441 | } 442 | } 443 | 444 | // 445 | // regression: an RPC that's delayed during Enabled=false 446 | // should not delay subsequent RPCs (e.g. after Enabled=true). 447 | // 448 | func TestRegression1(t *testing.T) { 449 | runtime.GOMAXPROCS(4) 450 | 451 | rn := MakeNetwork() 452 | defer rn.Cleanup() 453 | 454 | js := &JunkServer{} 455 | svc := MakeService(js) 456 | 457 | rs := MakeServer() 458 | rs.AddService(svc) 459 | rn.AddServer(1000, rs) 460 | 461 | e := rn.MakeEnd("c") 462 | rn.Connect("c", 1000) 463 | 464 | // start some RPCs while the ClientEnd is disabled. 465 | // they'll be delayed. 466 | rn.Enable("c", false) 467 | ch := make(chan bool) 468 | nrpcs := 20 469 | for ii := 0; ii < nrpcs; ii++ { 470 | go func(i int) { 471 | ok := false 472 | defer func() { ch <- ok }() 473 | 474 | arg := 100 + i 475 | reply := "" 476 | // this call ought to return false. 477 | e.Call("JunkServer.Handler2", arg, &reply) 478 | ok = true 479 | }(ii) 480 | } 481 | 482 | time.Sleep(100 * time.Millisecond) 483 | 484 | // now enable the ClientEnd and check that an RPC completes quickly. 485 | t0 := time.Now() 486 | rn.Enable("c", true) 487 | { 488 | arg := 99 489 | reply := "" 490 | e.Call("JunkServer.Handler2", arg, &reply) 491 | wanted := "handler2-" + strconv.Itoa(arg) 492 | if reply != wanted { 493 | t.Fatalf("wrong reply %v from Handler2, expecting %v", reply, wanted) 494 | } 495 | } 496 | dur := time.Since(t0).Seconds() 497 | 498 | if dur > 0.03 { 499 | t.Fatalf("RPC took too long (%v) after Enable", dur) 500 | } 501 | 502 | for ii := 0; ii < nrpcs; ii++ { 503 | <-ch 504 | } 505 | 506 | js.mu.Lock() 507 | defer js.mu.Unlock() 508 | if len(js.log2) != 1 { 509 | t.Fatalf("wrong number (%v) of RPCs delivered, expected 1", len(js.log2)) 510 | } 511 | 512 | n := rn.GetCount(1000) 513 | if n != 1 { 514 | t.Fatalf("wrong GetCount() %v, expected %v\n", n, 1) 515 | } 516 | } 517 | 518 | // 519 | // if an RPC is stuck in a server, and the server 520 | // is killed with DeleteServer(), does the RPC 521 | // get un-stuck? 522 | // 523 | func TestKilled(t *testing.T) { 524 | runtime.GOMAXPROCS(4) 525 | 526 | rn := MakeNetwork() 527 | defer rn.Cleanup() 528 | 529 | e := rn.MakeEnd("end1-99") 530 | 531 | js := &JunkServer{} 532 | svc := MakeService(js) 533 | 534 | rs := MakeServer() 535 | rs.AddService(svc) 536 | rn.AddServer("server99", rs) 537 | 538 | rn.Connect("end1-99", "server99") 539 | rn.Enable("end1-99", true) 540 | 541 | doneCh := make(chan bool) 542 | go func() { 543 | reply := 0 544 | ok := e.Call("JunkServer.Handler3", 99, &reply) 545 | doneCh <- ok 546 | }() 547 | 548 | time.Sleep(1000 * time.Millisecond) 549 | 550 | select { 551 | case <-doneCh: 552 | t.Fatalf("Handler3 should not have returned yet") 553 | case <-time.After(100 * time.Millisecond): 554 | } 555 | 556 | rn.DeleteServer("server99") 557 | 558 | select { 559 | case x := <-doneCh: 560 | if x != false { 561 | t.Fatalf("Handler3 returned successfully despite DeleteServer()") 562 | } 563 | case <-time.After(100 * time.Millisecond): 564 | t.Fatalf("Handler3 should return after DeleteServer()") 565 | } 566 | } 567 | 568 | func TestBenchmark(t *testing.T) { 569 | runtime.GOMAXPROCS(4) 570 | 571 | rn := MakeNetwork() 572 | defer rn.Cleanup() 573 | 574 | e := rn.MakeEnd("end1-99") 575 | 576 | js := &JunkServer{} 577 | svc := MakeService(js) 578 | 579 | rs := MakeServer() 580 | rs.AddService(svc) 581 | rn.AddServer("server99", rs) 582 | 583 | rn.Connect("end1-99", "server99") 584 | rn.Enable("end1-99", true) 585 | 586 | t0 := time.Now() 587 | n := 100000 588 | for iters := 0; iters < n; iters++ { 589 | reply := "" 590 | e.Call("JunkServer.Handler2", 111, &reply) 591 | if reply != "handler2-111" { 592 | t.Fatalf("wrong reply from Handler2") 593 | } 594 | } 595 | fmt.Printf("%v for %v\n", time.Since(t0), n) 596 | // march 2016, rtm laptop, 22 microseconds per RPC 597 | } 598 | -------------------------------------------------------------------------------- /src/main/.gitignore: -------------------------------------------------------------------------------- 1 | mr-tmp 2 | mr-*[^go] 3 | *.so 4 | 5 | mrsequential 6 | mrworker 7 | mrmaster 8 | -------------------------------------------------------------------------------- /src/main/build-wc.sh: -------------------------------------------------------------------------------- 1 | go build -buildmode=plugin ../mrapps/wc.go 2 | -------------------------------------------------------------------------------- /src/main/diskvd.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // start a diskvd server. it's a member of some replica 5 | // group, which has other members, and it needs to know 6 | // how to talk to the members of the shardmaster service. 7 | // used by ../diskv/test_test.go 8 | // 9 | // arguments: 10 | // -g groupid 11 | // -m masterport1 -m masterport2 ... 12 | // -s replicaport1 -s replicaport2 ... 13 | // -i my-index-in-server-port-list 14 | // -u unreliable 15 | // -d directory 16 | // -r restart 17 | 18 | import "time" 19 | import "diskv" 20 | import "os" 21 | import "fmt" 22 | import "strconv" 23 | import "runtime" 24 | 25 | func usage() { 26 | fmt.Printf("Usage: diskvd -g gid -m master... -s server... -i my-index -d dir\n") 27 | os.Exit(1) 28 | } 29 | 30 | func main() { 31 | var gid int64 = -1 // my replica group ID 32 | masters := []string{} // ports of shardmasters 33 | replicas := []string{} // ports of servers in my replica group 34 | me := -1 // my index in replicas[] 35 | unreliable := false 36 | dir := "" // store persistent data here 37 | restart := false 38 | 39 | for i := 1; i+1 < len(os.Args); i += 2 { 40 | a0 := os.Args[i] 41 | a1 := os.Args[i+1] 42 | if a0 == "-g" { 43 | gid, _ = strconv.ParseInt(a1, 10, 64) 44 | } else if a0 == "-m" { 45 | masters = append(masters, a1) 46 | } else if a0 == "-s" { 47 | replicas = append(replicas, a1) 48 | } else if a0 == "-i" { 49 | me, _ = strconv.Atoi(a1) 50 | } else if a0 == "-u" { 51 | unreliable, _ = strconv.ParseBool(a1) 52 | } else if a0 == "-d" { 53 | dir = a1 54 | } else if a0 == "-r" { 55 | restart, _ = strconv.ParseBool(a1) 56 | } else { 57 | usage() 58 | } 59 | } 60 | 61 | if gid < 0 || me < 0 || len(masters) < 1 || me >= len(replicas) || dir == "" { 62 | usage() 63 | } 64 | 65 | runtime.GOMAXPROCS(4) 66 | 67 | srv := diskv.StartServer(gid, masters, replicas, me, dir, restart) 68 | srv.Setunreliable(unreliable) 69 | 70 | // for safety, force quit after 10 minutes. 71 | time.Sleep(10 * 60 * time.Second) 72 | mep, _ := os.FindProcess(os.Getpid()) 73 | mep.Kill() 74 | } 75 | -------------------------------------------------------------------------------- /src/main/lockc.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // see comments in lockd.go 5 | // 6 | 7 | import "lockservice" 8 | import "os" 9 | import "fmt" 10 | 11 | func usage() { 12 | fmt.Printf("Usage: lockc -l|-u primaryport backupport lockname\n") 13 | os.Exit(1) 14 | } 15 | 16 | func main() { 17 | if len(os.Args) == 5 { 18 | ck := lockservice.MakeClerk(os.Args[2], os.Args[3]) 19 | var ok bool 20 | if os.Args[1] == "-l" { 21 | ok = ck.Lock(os.Args[4]) 22 | } else if os.Args[1] == "-u" { 23 | ok = ck.Unlock(os.Args[4]) 24 | } else { 25 | usage() 26 | } 27 | fmt.Printf("reply: %v\n", ok) 28 | } else { 29 | usage() 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/lockd.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // export GOPATH=~/6.824 4 | // go build lockd.go 5 | // go build lockc.go 6 | // ./lockd -p a b & 7 | // ./lockd -b a b & 8 | // ./lockc -l a b lx 9 | // ./lockc -u a b lx 10 | // 11 | // on Athena, use /tmp/myname-a and /tmp/myname-b 12 | // instead of a and b. 13 | 14 | import "time" 15 | import "lockservice" 16 | import "os" 17 | import "fmt" 18 | 19 | func main() { 20 | if len(os.Args) == 4 && os.Args[1] == "-p" { 21 | lockservice.StartServer(os.Args[2], os.Args[3], true) 22 | } else if len(os.Args) == 4 && os.Args[1] == "-b" { 23 | lockservice.StartServer(os.Args[2], os.Args[3], false) 24 | } else { 25 | fmt.Printf("Usage: lockd -p|-b primaryport backupport\n") 26 | os.Exit(1) 27 | } 28 | for { 29 | time.Sleep(100 * time.Second) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/mrmaster.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // start the master process, which is implemented 5 | // in ../mr/master.go 6 | // 7 | // go run mrmaster.go pg*.txt 8 | // 9 | // Please do not change this file. 10 | // 11 | 12 | import "mr" 13 | import "time" 14 | import "os" 15 | import "fmt" 16 | 17 | func main() { 18 | if len(os.Args) < 2 { 19 | fmt.Fprintf(os.Stderr, "Usage: mrmaster inputfiles...\n") 20 | os.Exit(1) 21 | } 22 | 23 | m := mr.MakeMaster(os.Args[1:], 10) 24 | for m.Done() == false { 25 | time.Sleep(time.Second) 26 | } 27 | 28 | time.Sleep(time.Second) 29 | } 30 | -------------------------------------------------------------------------------- /src/main/mrsequential.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // simple sequential MapReduce. 5 | // 6 | // go run mrsequential.go wc.so pg*.txt 7 | // 8 | 9 | import "fmt" 10 | import "mr" 11 | import "plugin" 12 | import "os" 13 | import "log" 14 | import "io/ioutil" 15 | import "sort" 16 | 17 | // for sorting by key. 18 | type ByKey []mr.KeyValue 19 | 20 | // for sorting by key. 21 | func (a ByKey) Len() int { return len(a) } 22 | func (a ByKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 23 | func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key } 24 | 25 | func main() { 26 | if len(os.Args) < 3 { 27 | fmt.Fprintf(os.Stderr, "Usage: mrsequential xxx.so inputfiles...\n") 28 | os.Exit(1) 29 | } 30 | 31 | mapf, reducef := loadPlugin(os.Args[1]) 32 | 33 | // 34 | // read each input file, 35 | // pass it to Map, 36 | // accumulate the intermediate Map output. 37 | // 38 | intermediate := []mr.KeyValue{} 39 | for _, filename := range os.Args[2:] { 40 | file, err := os.Open(filename) 41 | if err != nil { 42 | log.Fatalf("cannot open %v", filename) 43 | } 44 | content, err := ioutil.ReadAll(file) 45 | if err != nil { 46 | log.Fatalf("cannot read %v", filename) 47 | } 48 | file.Close() 49 | kva := mapf(filename, string(content)) 50 | intermediate = append(intermediate, kva...) 51 | } 52 | 53 | // 54 | // a big difference from real MapReduce is that all the 55 | // intermediate data is in one place, intermediate[], 56 | // rather than being partitioned into NxM buckets. 57 | // 58 | 59 | sort.Sort(ByKey(intermediate)) 60 | 61 | oname := "mr-out-0" 62 | ofile, _ := os.Create(oname) 63 | 64 | // 65 | // call Reduce on each distinct key in intermediate[], 66 | // and print the result to mr-out-0. 67 | // 68 | i := 0 69 | for i < len(intermediate) { 70 | j := i + 1 71 | for j < len(intermediate) && intermediate[j].Key == intermediate[i].Key { 72 | j++ 73 | } 74 | values := []string{} 75 | for k := i; k < j; k++ { 76 | values = append(values, intermediate[k].Value) 77 | } 78 | output := reducef(intermediate[i].Key, values) 79 | 80 | // this is the correct format for each line of Reduce output. 81 | fmt.Fprintf(ofile, "%v %v\n", intermediate[i].Key, output) 82 | 83 | i = j 84 | } 85 | 86 | ofile.Close() 87 | } 88 | 89 | // 90 | // load the application Map and Reduce functions 91 | // from a plugin file, e.g. ../mrapps/wc.so 92 | // 93 | func loadPlugin(filename string) (func(string, string) []mr.KeyValue, func(string, []string) string) { 94 | p, err := plugin.Open(filename) 95 | if err != nil { 96 | log.Fatalf("cannot load plugin %v", filename) 97 | } 98 | xmapf, err := p.Lookup("Map") 99 | if err != nil { 100 | log.Fatalf("cannot find Map in %v", filename) 101 | } 102 | mapf := xmapf.(func(string, string) []mr.KeyValue) 103 | xreducef, err := p.Lookup("Reduce") 104 | if err != nil { 105 | log.Fatalf("cannot find Reduce in %v", filename) 106 | } 107 | reducef := xreducef.(func(string, []string) string) 108 | 109 | return mapf, reducef 110 | } 111 | -------------------------------------------------------------------------------- /src/main/mrworker.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // start a worker process, which is implemented 5 | // in ../mr/worker.go. typically there will be 6 | // multiple worker processes, talking to one master. 7 | // 8 | // go run mrworker.go wc.so 9 | // 10 | // Please do not change this file. 11 | // 12 | 13 | import "mr" 14 | import "plugin" 15 | import "os" 16 | import "fmt" 17 | import "log" 18 | 19 | func main() { 20 | if len(os.Args) != 2 { 21 | fmt.Fprintf(os.Stderr, "Usage: mrworker xxx.so\n") 22 | os.Exit(1) 23 | } 24 | 25 | mapf, reducef := loadPlugin(os.Args[1]) 26 | 27 | mr.Worker(mapf, reducef) 28 | } 29 | 30 | // 31 | // load the application Map and Reduce functions 32 | // from a plugin file, e.g. ../mrapps/wc.so 33 | // 34 | func loadPlugin(filename string) (func(string, string) []mr.KeyValue, func(string, []string) string) { 35 | p, err := plugin.Open(filename) 36 | if err != nil { 37 | log.Fatalf("cannot load plugin %v", filename) 38 | } 39 | xmapf, err := p.Lookup("Map") 40 | if err != nil { 41 | log.Fatalf("cannot find Map in %v", filename) 42 | } 43 | mapf := xmapf.(func(string, string) []mr.KeyValue) 44 | xreducef, err := p.Lookup("Reduce") 45 | if err != nil { 46 | log.Fatalf("cannot find Reduce in %v", filename) 47 | } 48 | reducef := xreducef.(func(string, []string) string) 49 | 50 | return mapf, reducef 51 | } 52 | -------------------------------------------------------------------------------- /src/main/pbc.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // pbservice client application 5 | // 6 | // export GOPATH=~/6.824 7 | // go build viewd.go 8 | // go build pbd.go 9 | // go build pbc.go 10 | // ./viewd /tmp/rtm-v & 11 | // ./pbd /tmp/rtm-v /tmp/rtm-1 & 12 | // ./pbd /tmp/rtm-v /tmp/rtm-2 & 13 | // ./pbc /tmp/rtm-v key1 value1 14 | // ./pbc /tmp/rtm-v key1 15 | // 16 | // change "rtm" to your user name. 17 | // start the pbd programs in separate windows and kill 18 | // and restart them to exercise fault tolerance. 19 | // 20 | 21 | import "pbservice" 22 | import "os" 23 | import "fmt" 24 | 25 | func usage() { 26 | fmt.Printf("Usage: pbc viewport key\n") 27 | fmt.Printf(" pbc viewport key value\n") 28 | os.Exit(1) 29 | } 30 | 31 | func main() { 32 | if len(os.Args) == 3 { 33 | // get 34 | ck := pbservice.MakeClerk(os.Args[1], "") 35 | v := ck.Get(os.Args[2]) 36 | fmt.Printf("%v\n", v) 37 | } else if len(os.Args) == 4 { 38 | // put 39 | ck := pbservice.MakeClerk(os.Args[1], "") 40 | ck.Put(os.Args[2], os.Args[3]) 41 | } else { 42 | usage() 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/pbd.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // see directions in pbc.go 5 | // 6 | 7 | import "time" 8 | import "pbservice" 9 | import "os" 10 | import "fmt" 11 | 12 | func main() { 13 | if len(os.Args) != 3 { 14 | fmt.Printf("Usage: pbd viewport myport\n") 15 | os.Exit(1) 16 | } 17 | 18 | pbservice.StartServer(os.Args[1], os.Args[2]) 19 | 20 | for { 21 | time.Sleep(100 * time.Second) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/main/test-mr.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # 4 | # basic map-reduce test 5 | # 6 | 7 | RACE= 8 | 9 | # uncomment this to run the tests with the Go race detector. 10 | #RACE=-race 11 | 12 | # run the test in a fresh sub-directory. 13 | rm -rf mr-tmp 14 | mkdir mr-tmp || exit 1 15 | cd mr-tmp || exit 1 16 | rm -f mr-* 17 | 18 | # make sure software is freshly built. 19 | (cd ../../mrapps && go build $RACE -buildmode=plugin wc.go) || exit 1 20 | (cd ../../mrapps && go build $RACE -buildmode=plugin indexer.go) || exit 1 21 | (cd ../../mrapps && go build $RACE -buildmode=plugin mtiming.go) || exit 1 22 | (cd ../../mrapps && go build $RACE -buildmode=plugin rtiming.go) || exit 1 23 | (cd ../../mrapps && go build $RACE -buildmode=plugin crash.go) || exit 1 24 | (cd ../../mrapps && go build $RACE -buildmode=plugin nocrash.go) || exit 1 25 | (cd .. && go build $RACE mrmaster.go) || exit 1 26 | (cd .. && go build $RACE mrworker.go) || exit 1 27 | (cd .. && go build $RACE mrsequential.go) || exit 1 28 | 29 | failed_any=0 30 | 31 | # first word-count 32 | 33 | # generate the correct output 34 | ../mrsequential ../../mrapps/wc.so ../pg*txt || exit 1 35 | sort mr-out-0 > mr-correct-wc.txt 36 | rm -f mr-out* 37 | 38 | echo '***' Starting wc test. 39 | 40 | timeout -k 2s 180s ../mrmaster ../pg*txt & 41 | 42 | # give the master time to create the sockets. 43 | sleep 1 44 | 45 | # start multiple workers. 46 | timeout -k 2s 180s ../mrworker ../../mrapps/wc.so & 47 | timeout -k 2s 180s ../mrworker ../../mrapps/wc.so & 48 | timeout -k 2s 180s ../mrworker ../../mrapps/wc.so & 49 | 50 | # wait for one of the processes to exit. 51 | # under bash, this waits for all processes, 52 | # including the master. 53 | wait 54 | 55 | # the master or a worker has exited. since workers are required 56 | # to exit when a job is completely finished, and not before, 57 | # that means the job has finished. 58 | 59 | sort mr-out* | grep . > mr-wc-all 60 | if cmp mr-wc-all mr-correct-wc.txt 61 | then 62 | echo '---' wc test: PASS 63 | else 64 | echo '---' wc output is not the same as mr-correct-wc.txt 65 | echo '---' wc test: FAIL 66 | failed_any=1 67 | fi 68 | 69 | # wait for remaining workers and master to exit. 70 | wait ; wait ; wait 71 | 72 | # now indexer 73 | rm -f mr-* 74 | 75 | # generate the correct output 76 | ../mrsequential ../../mrapps/indexer.so ../pg*txt || exit 1 77 | sort mr-out-0 > mr-correct-indexer.txt 78 | rm -f mr-out* 79 | 80 | echo '***' Starting indexer test. 81 | 82 | timeout -k 2s 180s ../mrmaster ../pg*txt & 83 | sleep 1 84 | 85 | # start multiple workers 86 | timeout -k 2s 180s ../mrworker ../../mrapps/indexer.so & 87 | timeout -k 2s 180s ../mrworker ../../mrapps/indexer.so 88 | 89 | sort mr-out* | grep . > mr-indexer-all 90 | if cmp mr-indexer-all mr-correct-indexer.txt 91 | then 92 | echo '---' indexer test: PASS 93 | else 94 | echo '---' indexer output is not the same as mr-correct-indexer.txt 95 | echo '---' indexer test: FAIL 96 | failed_any=1 97 | fi 98 | 99 | wait ; wait 100 | 101 | 102 | echo '***' Starting map parallelism test. 103 | 104 | rm -f mr-out* mr-worker* 105 | 106 | timeout -k 2s 180s ../mrmaster ../pg*txt & 107 | sleep 1 108 | 109 | timeout -k 2s 180s ../mrworker ../../mrapps/mtiming.so & 110 | timeout -k 2s 180s ../mrworker ../../mrapps/mtiming.so 111 | 112 | NT=`cat mr-out* | grep '^times-' | wc -l | sed 's/ //g'` 113 | if [ "$NT" != "2" ] 114 | then 115 | echo '---' saw "$NT" workers rather than 2 116 | echo '---' map parallelism test: FAIL 117 | failed_any=1 118 | fi 119 | 120 | if cat mr-out* | grep '^parallel.* 2' > /dev/null 121 | then 122 | echo '---' map parallelism test: PASS 123 | else 124 | echo '---' map workers did not run in parallel 125 | echo '---' map parallelism test: FAIL 126 | failed_any=1 127 | fi 128 | 129 | wait ; wait 130 | 131 | 132 | echo '***' Starting reduce parallelism test. 133 | 134 | rm -f mr-out* mr-worker* 135 | 136 | timeout -k 2s 180s ../mrmaster ../pg*txt & 137 | sleep 1 138 | 139 | timeout -k 2s 180s ../mrworker ../../mrapps/rtiming.so & 140 | timeout -k 2s 180s ../mrworker ../../mrapps/rtiming.so 141 | 142 | NT=`cat mr-out* | grep '^[a-z] 2' | wc -l | sed 's/ //g'` 143 | if [ "$NT" -lt "2" ] 144 | then 145 | echo '---' too few parallel reduces. 146 | echo '---' reduce parallelism test: FAIL 147 | failed_any=1 148 | else 149 | echo '---' reduce parallelism test: PASS 150 | fi 151 | 152 | wait ; wait 153 | 154 | 155 | # generate the correct output 156 | ../mrsequential ../../mrapps/nocrash.so ../pg*txt || exit 1 157 | sort mr-out-0 > mr-correct-crash.txt 158 | rm -f mr-out* 159 | 160 | echo '***' Starting crash test. 161 | 162 | rm -f mr-done 163 | (timeout -k 2s 180s ../mrmaster ../pg*txt ; touch mr-done ) & 164 | sleep 1 165 | 166 | # start multiple workers 167 | timeout -k 2s 180s ../mrworker ../../mrapps/crash.so & 168 | 169 | # mimic rpc.go's masterSock() 170 | SOCKNAME=/var/tmp/824-mr-`id -u` 171 | 172 | ( while [ -e $SOCKNAME -a ! -f mr-done ] 173 | do 174 | timeout -k 2s 180s ../mrworker ../../mrapps/crash.so 175 | sleep 1 176 | done ) & 177 | 178 | ( while [ -e $SOCKNAME -a ! -f mr-done ] 179 | do 180 | timeout -k 2s 180s ../mrworker ../../mrapps/crash.so 181 | sleep 1 182 | done ) & 183 | 184 | while [ -e $SOCKNAME -a ! -f mr-done ] 185 | do 186 | timeout -k 2s 180s ../mrworker ../../mrapps/crash.so 187 | sleep 1 188 | done 189 | 190 | wait 191 | wait 192 | wait 193 | 194 | rm $SOCKNAME 195 | sort mr-out* | grep . > mr-crash-all 196 | if cmp mr-crash-all mr-correct-crash.txt 197 | then 198 | echo '---' crash test: PASS 199 | else 200 | echo '---' crash output is not the same as mr-correct-crash.txt 201 | echo '---' crash test: FAIL 202 | failed_any=1 203 | fi 204 | 205 | if [ $failed_any -eq 0 ]; then 206 | echo '***' PASSED ALL TESTS 207 | else 208 | echo '***' FAILED SOME TESTS 209 | exit 1 210 | fi 211 | -------------------------------------------------------------------------------- /src/main/viewd.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // see directions in pbc.go 5 | // 6 | 7 | import "time" 8 | import "viewservice" 9 | import "os" 10 | import "fmt" 11 | 12 | func main() { 13 | if len(os.Args) != 2 { 14 | fmt.Printf("Usage: viewd port\n") 15 | os.Exit(1) 16 | } 17 | 18 | viewservice.StartServer(os.Args[1]) 19 | 20 | for { 21 | time.Sleep(100 * time.Second) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/models/kv.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import "../porcupine" 4 | import "fmt" 5 | import "sort" 6 | 7 | type KvInput struct { 8 | Op uint8 // 0 => get, 1 => put, 2 => append 9 | Key string 10 | Value string 11 | } 12 | 13 | type KvOutput struct { 14 | Value string 15 | } 16 | 17 | var KvModel = porcupine.Model{ 18 | Partition: func(history []porcupine.Operation) [][]porcupine.Operation { 19 | m := make(map[string][]porcupine.Operation) 20 | for _, v := range history { 21 | key := v.Input.(KvInput).Key 22 | m[key] = append(m[key], v) 23 | } 24 | keys := make([]string, 0, len(m)) 25 | for k := range m { 26 | keys = append(keys, k) 27 | } 28 | sort.Strings(keys) 29 | ret := make([][]porcupine.Operation, 0, len(keys)) 30 | for _, k := range keys { 31 | ret = append(ret, m[k]) 32 | } 33 | return ret 34 | }, 35 | Init: func() interface{} { 36 | // note: we are modeling a single key's value here; 37 | // we're partitioning by key, so this is okay 38 | return "" 39 | }, 40 | Step: func(state, input, output interface{}) (bool, interface{}) { 41 | inp := input.(KvInput) 42 | out := output.(KvOutput) 43 | st := state.(string) 44 | if inp.Op == 0 { 45 | // get 46 | return out.Value == st, state 47 | } else if inp.Op == 1 { 48 | // put 49 | return true, inp.Value 50 | } else { 51 | // append 52 | return true, (st + inp.Value) 53 | } 54 | }, 55 | DescribeOperation: func(input, output interface{}) string { 56 | inp := input.(KvInput) 57 | out := output.(KvOutput) 58 | switch inp.Op { 59 | case 0: 60 | return fmt.Sprintf("get('%s') -> '%s'", inp.Key, out.Value) 61 | case 1: 62 | return fmt.Sprintf("put('%s', '%s')", inp.Key, inp.Value) 63 | case 2: 64 | return fmt.Sprintf("append('%s', '%s')", inp.Key, inp.Value) 65 | default: 66 | return "" 67 | } 68 | }, 69 | } 70 | -------------------------------------------------------------------------------- /src/mr/master.go: -------------------------------------------------------------------------------- 1 | package mr 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "sync" 7 | "time" 8 | ) 9 | import "net" 10 | import "os" 11 | import "net/rpc" 12 | import "net/http" 13 | 14 | type TaskStat struct { 15 | beginTime time.Time 16 | fileName string 17 | fileIndex int 18 | partIndex int 19 | nReduce int 20 | nFiles int 21 | } 22 | 23 | type TaskStatInterface interface { 24 | GenerateTaskInfo() TaskInfo 25 | OutOfTime() bool 26 | GetFileIndex() int 27 | GetPartIndex() int 28 | SetNow() 29 | } 30 | 31 | type MapTaskStat struct { 32 | TaskStat 33 | } 34 | 35 | type ReduceTaskStat struct { 36 | TaskStat 37 | } 38 | 39 | func (this *MapTaskStat) GenerateTaskInfo() TaskInfo { 40 | return TaskInfo{ 41 | State: TaskMap, 42 | FileName: this.fileName, 43 | FileIndex: this.fileIndex, 44 | PartIndex: this.partIndex, 45 | NReduce: this.nReduce, 46 | NFiles: this.nFiles, 47 | } 48 | } 49 | 50 | func (this *ReduceTaskStat) GenerateTaskInfo() TaskInfo { 51 | return TaskInfo{ 52 | State: TaskReduce, 53 | FileName: this.fileName, 54 | FileIndex: this.fileIndex, 55 | PartIndex: this.partIndex, 56 | NReduce: this.nReduce, 57 | NFiles: this.nFiles, 58 | } 59 | } 60 | 61 | func (this *TaskStat) OutOfTime() bool { 62 | return time.Now().Sub(this.beginTime) > time.Duration(time.Second*60) 63 | } 64 | 65 | func (this *TaskStat) SetNow() { 66 | this.beginTime = time.Now() 67 | } 68 | 69 | func (this *TaskStat) GetFileIndex() int { 70 | return this.fileIndex 71 | } 72 | 73 | func (this *TaskStat) GetPartIndex() int { 74 | return this.partIndex 75 | } 76 | 77 | type TaskStatQueue struct { 78 | taskArray []TaskStatInterface 79 | mutex sync.Mutex 80 | } 81 | 82 | func (this *TaskStatQueue) lock() { 83 | this.mutex.Lock() 84 | } 85 | 86 | func (this *TaskStatQueue) unlock() { 87 | this.mutex.Unlock() 88 | } 89 | 90 | func (this *TaskStatQueue) Size() int { 91 | return len(this.taskArray) 92 | } 93 | 94 | func (this *TaskStatQueue) Pop() TaskStatInterface { 95 | this.lock() 96 | arrayLength := len(this.taskArray) 97 | if arrayLength == 0 { 98 | this.unlock() 99 | return nil 100 | } 101 | ret := this.taskArray[arrayLength-1] 102 | this.taskArray = this.taskArray[:arrayLength-1] 103 | this.unlock() 104 | return ret 105 | } 106 | 107 | func (this *TaskStatQueue) Push(taskStat TaskStatInterface) { 108 | this.lock() 109 | if taskStat == nil { 110 | this.unlock() 111 | return 112 | } 113 | this.taskArray = append(this.taskArray, taskStat) 114 | this.unlock() 115 | } 116 | 117 | func (this *TaskStatQueue) TimeOutQueue() []TaskStatInterface { 118 | outArray := make([]TaskStatInterface, 0) 119 | this.lock() 120 | for taskIndex := 0; taskIndex < len(this.taskArray); { 121 | taskStat := this.taskArray[taskIndex] 122 | if (taskStat).OutOfTime() { 123 | outArray = append(outArray, taskStat) 124 | this.taskArray = append(this.taskArray[:taskIndex], this.taskArray[taskIndex+1:]...) 125 | // must resume at this index next time 126 | } else { 127 | taskIndex++ 128 | } 129 | } 130 | this.unlock() 131 | return outArray 132 | } 133 | 134 | func (this *TaskStatQueue) MoveAppend(rhs []TaskStatInterface) { 135 | this.lock() 136 | this.taskArray = append(this.taskArray, rhs...) 137 | rhs = make([]TaskStatInterface, 0) 138 | this.unlock() 139 | } 140 | 141 | func (this *TaskStatQueue) RemoveTask(fileIndex int, partIndex int) { 142 | this.lock() 143 | for index := 0; index < len(this.taskArray); { 144 | task := this.taskArray[index] 145 | if fileIndex == task.GetFileIndex() && partIndex == task.GetPartIndex() { 146 | this.taskArray = append(this.taskArray[:index], this.taskArray[index+1:]...) 147 | } else { 148 | index++ 149 | } 150 | } 151 | this.unlock() 152 | } 153 | 154 | type Master struct { 155 | // Your definitions here. 156 | 157 | filenames []string 158 | 159 | // reduce task queue 160 | reduceTaskWaiting TaskStatQueue 161 | reduceTaskRunning TaskStatQueue 162 | 163 | // map task statistics 164 | mapTaskWaiting TaskStatQueue 165 | mapTaskRunning TaskStatQueue 166 | 167 | // machine state 168 | isDone bool 169 | nReduce int 170 | } 171 | 172 | // Your code here -- RPC handlers for the worker to call. 173 | /* 174 | func (this *Master) TryMap(args *TryMapArgs, reply *TryMapReply) error { 175 | if this.isMapped { 176 | reply.RunMap = false 177 | return nil 178 | } 179 | for this.isMapping { 180 | time.Sleep(time.Duration(1) * time.Second) 181 | } 182 | this.isMapped = false 183 | this.isMapping = true 184 | reply.RunMap = true 185 | return nil 186 | } 187 | 188 | func (this *Master) MapFinished(args *TryMapArgs, reply *ExampleReply) error { 189 | this.isMapping = false 190 | this.isMapped = true 191 | return nil 192 | } 193 | */ 194 | func (this *Master) AskTask(args *ExampleArgs, reply *TaskInfo) error { 195 | if this.isDone { 196 | reply.State = TaskEnd 197 | return nil 198 | } 199 | 200 | // check for reduce tasks 201 | reduceTask := this.reduceTaskWaiting.Pop() 202 | if reduceTask != nil { 203 | // an available reduce task 204 | // record task begin time 205 | reduceTask.SetNow() 206 | // note task is running 207 | this.reduceTaskRunning.Push(reduceTask) 208 | // setup a reply 209 | *reply = reduceTask.GenerateTaskInfo() 210 | fmt.Printf("Distributing reduce task on part %v %vth file %v\n", reply.PartIndex, reply.FileIndex, reply.FileName) 211 | return nil 212 | } 213 | 214 | // check for map tasks 215 | mapTask := this.mapTaskWaiting.Pop() 216 | if mapTask != nil { 217 | // an available map task 218 | // record task begin time 219 | mapTask.SetNow() 220 | // note task is running 221 | this.mapTaskRunning.Push(mapTask) 222 | // setup a reply 223 | *reply = mapTask.GenerateTaskInfo() 224 | fmt.Printf("Distributing map task on %vth file %v\n", reply.FileIndex, reply.FileName) 225 | return nil 226 | } 227 | 228 | // all tasks distributed 229 | if this.mapTaskRunning.Size() > 0 || this.reduceTaskRunning.Size() > 0 { 230 | // must wait for new tasks 231 | reply.State = TaskWait 232 | return nil 233 | } 234 | // all tasks complete 235 | reply.State = TaskEnd 236 | this.isDone = true 237 | return nil 238 | } 239 | 240 | func (this *Master) distributeReduce() { 241 | reduceTask := ReduceTaskStat{ 242 | TaskStat{ 243 | fileIndex: 0, 244 | partIndex: 0, 245 | nReduce: this.nReduce, 246 | nFiles: len(this.filenames), 247 | }, 248 | } 249 | for reduceIndex := 0; reduceIndex < this.nReduce; reduceIndex++ { 250 | task := reduceTask 251 | task.partIndex = reduceIndex 252 | this.reduceTaskWaiting.Push(&task) 253 | } 254 | } 255 | 256 | func (this *Master) TaskDone(args *TaskInfo, reply *ExampleReply) error { 257 | switch args.State { 258 | case TaskMap: 259 | fmt.Printf("Map task on %vth file %v complete\n", args.FileIndex, args.FileName) 260 | this.mapTaskRunning.RemoveTask(args.FileIndex, args.PartIndex) 261 | if this.mapTaskRunning.Size() == 0 && this.mapTaskWaiting.Size() == 0 { 262 | // all map tasks done 263 | // can distribute reduce tasks 264 | this.distributeReduce() 265 | } 266 | break 267 | case TaskReduce: 268 | fmt.Printf("Reduce task on %vth part complete\n", args.PartIndex) 269 | this.reduceTaskRunning.RemoveTask(args.FileIndex, args.PartIndex) 270 | break 271 | default: 272 | panic("Task Done error") 273 | } 274 | return nil 275 | } 276 | 277 | // 278 | // an example RPC handler. 279 | // 280 | // the RPC argument and reply types are defined in rpc.go. 281 | // 282 | func (m *Master) Example(args *ExampleArgs, reply *ExampleReply) error { 283 | reply.Y = args.X + 1 284 | return nil 285 | } 286 | 287 | // 288 | // start a thread that listens for RPCs from worker.go 289 | // 290 | func (m *Master) server() { 291 | rpc.Register(m) 292 | rpc.HandleHTTP() 293 | //l, e := net.Listen("tcp", ":1234") 294 | sockname := masterSock() 295 | os.Remove(sockname) 296 | l, e := net.Listen("unix", sockname) 297 | if e != nil { 298 | log.Fatal("listen error:", e) 299 | } 300 | go http.Serve(l, nil) 301 | } 302 | 303 | // 304 | // main/mrmaster.go calls Done() periodically to find out 305 | // if the entire job has finished. 306 | // 307 | func (this *Master) Done() bool { 308 | // Your code here. 309 | 310 | return this.isDone 311 | } 312 | 313 | // 314 | // create a Master. 315 | // main/mrmaster.go calls this function. 316 | // nReduce is the number of reduce tasks to use. 317 | // 318 | func MakeMaster(files []string, nReduce int) *Master { 319 | // distribute map tasks 320 | mapArray := make([]TaskStatInterface, 0) 321 | for fileIndex, filename := range files { 322 | mapTask := MapTaskStat{ 323 | TaskStat{ 324 | fileName: filename, 325 | fileIndex: fileIndex, 326 | partIndex: 0, 327 | nReduce: nReduce, 328 | nFiles: len(files), 329 | }, 330 | } 331 | mapArray = append(mapArray, &mapTask) 332 | } 333 | m := Master{ 334 | mapTaskWaiting: TaskStatQueue{taskArray: mapArray}, 335 | nReduce: nReduce, 336 | filenames: files, 337 | } 338 | 339 | // create tmp directory if not exists 340 | if _, err := os.Stat("mr-tmp"); os.IsNotExist(err) { 341 | err = os.Mkdir("mr-tmp", os.ModePerm) 342 | if err != nil { 343 | fmt.Print("Create tmp directory failed... Error: %v\n", err) 344 | panic("Create tmp directory failed...") 345 | } 346 | } 347 | 348 | // begin a thread to collect tasks out of time 349 | go m.collectOutOfTime() 350 | 351 | m.server() 352 | return &m 353 | } 354 | 355 | func (this *Master) collectOutOfTime() { 356 | for { 357 | time.Sleep(time.Duration(time.Second * 5)) 358 | timeouts := this.reduceTaskRunning.TimeOutQueue() 359 | if len(timeouts) > 0 { 360 | this.reduceTaskWaiting.MoveAppend(timeouts) 361 | } 362 | timeouts = this.mapTaskRunning.TimeOutQueue() 363 | if len(timeouts) > 0 { 364 | this.mapTaskWaiting.MoveAppend(timeouts) 365 | } 366 | } 367 | } 368 | -------------------------------------------------------------------------------- /src/mr/rpc.go: -------------------------------------------------------------------------------- 1 | package mr 2 | 3 | // 4 | // RPC definitions. 5 | // 6 | // remember to capitalize all names. 7 | // 8 | 9 | import "os" 10 | import "strconv" 11 | 12 | // 13 | // example to show how to declare the arguments 14 | // and reply for an RPC. 15 | // 16 | 17 | type ExampleArgs struct { 18 | X int 19 | } 20 | 21 | type ExampleReply struct { 22 | Y int 23 | } 24 | 25 | // Add your RPC definitions here. 26 | 27 | type TryMapArgs struct { 28 | } 29 | 30 | type TryMapReply struct { 31 | // if should not run map, run reduce 32 | RunMap bool 33 | } 34 | 35 | const ( 36 | TaskMap = 0 37 | TaskReduce = 1 38 | TaskWait = 2 39 | TaskEnd = 3 40 | ) 41 | 42 | type TaskInfo struct { 43 | /* 44 | Declared in consts above 45 | 0 map 46 | 1 reduce 47 | 2 wait 48 | 3 end 49 | */ 50 | State int 51 | 52 | FileName string 53 | FileIndex int 54 | PartIndex int 55 | 56 | NReduce int 57 | NFiles int 58 | } 59 | 60 | // Cook up a unique-ish UNIX-domain socket name 61 | // in /var/tmp, for the master. 62 | // Can't use the current directory since 63 | // Athena AFS doesn't support UNIX-domain sockets. 64 | func masterSock() string { 65 | s := "/var/tmp/824-mr-" 66 | s += strconv.Itoa(os.Getuid()) 67 | return s 68 | } 69 | -------------------------------------------------------------------------------- /src/mr/worker.go: -------------------------------------------------------------------------------- 1 | package mr 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "io/ioutil" 7 | "os" 8 | "path/filepath" 9 | "sort" 10 | "strconv" 11 | "time" 12 | ) 13 | import "log" 14 | import "net/rpc" 15 | import "hash/fnv" 16 | 17 | // 18 | // Map functions return a slice of KeyValue. 19 | // 20 | type KeyValue struct { 21 | Key string 22 | Value string 23 | } 24 | type ByKey []KeyValue 25 | 26 | func (a ByKey) Len() int { return len(a) } 27 | func (a ByKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 28 | func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key } 29 | 30 | // 31 | // use ihash(key) % NReduce to choose the reduce 32 | // task number for each KeyValue emitted by Map. 33 | // 34 | func ihash(key string) int { 35 | h := fnv.New32a() 36 | h.Write([]byte(key)) 37 | return int(h.Sum32() & 0x7fffffff) 38 | } 39 | 40 | // 41 | // main/mrworker.go calls this function. 42 | // 43 | /* 44 | This function implements worker behaviour. 45 | The worker asks the master which work to perform, map or reduce. 46 | If the map task is claimed by another worker and has not finished as yet, the worker waits for a little while. 47 | If the map task is finished by another worker, the master provides necessary information on a reduce task for the worker. 48 | If all reduce tasks are done, enter exit procedure. 49 | */ 50 | func Worker(mapf func(string, string) []KeyValue, 51 | reducef func(string, []string) string) { 52 | 53 | // Your worker implementation here. 54 | 55 | // uncomment to send the Example RPC to the master. 56 | //CallExample() 57 | for { 58 | taskInfo := CallAskTask() 59 | switch taskInfo.State { 60 | case TaskMap: 61 | workerMap(mapf, taskInfo) 62 | break 63 | case TaskReduce: 64 | workerReduce(reducef, taskInfo) 65 | break 66 | case TaskWait: 67 | // wait for 5 seconds to requeset again 68 | time.Sleep(time.Duration(time.Second * 5)) 69 | break 70 | case TaskEnd: 71 | fmt.Println("Master all tasks complete. Nothing to do...") 72 | // exit worker process 73 | return 74 | default: 75 | panic("Invalid Task state received by worker") 76 | } 77 | } 78 | 79 | } 80 | 81 | func CallAskTask() *TaskInfo { 82 | args := ExampleArgs{} 83 | reply := TaskInfo{} 84 | call("Master.AskTask", &args, &reply) 85 | return &reply 86 | } 87 | 88 | func CallTaskDone(taskInfo *TaskInfo) { 89 | reply := ExampleReply{} 90 | call("Master.TaskDone", taskInfo, &reply) 91 | } 92 | 93 | // 94 | // example function to show how to make an RPC call to the master. 95 | // 96 | // the RPC argument and reply types are defined in rpc.go. 97 | // 98 | func CallExample() { 99 | 100 | // declare an argument structure. 101 | args := ExampleArgs{} 102 | 103 | // fill in the argument(s). 104 | args.X = 99 105 | 106 | // declare a reply structure. 107 | reply := ExampleReply{} 108 | 109 | // send the RPC request, wait for the reply. 110 | call("Master.Example", &args, &reply) 111 | 112 | // reply.Y should be 100. 113 | fmt.Printf("reply.Y %v\n", reply.Y) 114 | } 115 | 116 | // 117 | // send an RPC request to the master, wait for the response. 118 | // usually returns true. 119 | // returns false if something goes wrong. 120 | // 121 | func call(rpcname string, args interface{}, reply interface{}) bool { 122 | // c, err := rpc.DialHTTP("tcp", "127.0.0.1"+":1234") 123 | sockname := masterSock() 124 | c, err := rpc.DialHTTP("unix", sockname) 125 | if err != nil { 126 | log.Fatal("dialing:", err) 127 | } 128 | defer c.Close() 129 | 130 | err = c.Call(rpcname, args, reply) 131 | if err == nil { 132 | return true 133 | } 134 | 135 | fmt.Println(err) 136 | return false 137 | } 138 | 139 | func workerMap(mapf func(string, string) []KeyValue, taskInfo *TaskInfo) { 140 | fmt.Printf("Got assigned map task on %vth file %v\n", taskInfo.FileIndex, taskInfo.FileName) 141 | 142 | // read in target files as a key-value array 143 | intermediate := []KeyValue{} 144 | file, err := os.Open(taskInfo.FileName) 145 | if err != nil { 146 | log.Fatalf("cannot open %v", taskInfo.FileName) 147 | } 148 | content, err := ioutil.ReadAll(file) 149 | if err != nil { 150 | log.Fatalf("cannot read %v", taskInfo.FileName) 151 | } 152 | file.Close() 153 | kva := mapf(taskInfo.FileName, string(content)) 154 | intermediate = append(intermediate, kva...) 155 | 156 | // prepare output files and encoders 157 | nReduce := taskInfo.NReduce 158 | outprefix := "mr-tmp/mr-" 159 | outprefix += strconv.Itoa(taskInfo.FileIndex) 160 | outprefix += "-" 161 | outFiles := make([]*os.File, nReduce) 162 | fileEncs := make([]*json.Encoder, nReduce) 163 | for outindex := 0; outindex < nReduce; outindex++ { 164 | //outname := outprefix + strconv.Itoa(outindex) 165 | //outFiles[outindex], _ = os.Create(outname) 166 | outFiles[outindex], _ = ioutil.TempFile("mr-tmp", "mr-tmp-*") 167 | fileEncs[outindex] = json.NewEncoder(outFiles[outindex]) 168 | } 169 | 170 | // distribute keys among mr-fileindex-* 171 | for _, kv := range intermediate { 172 | outindex := ihash(kv.Key) % nReduce 173 | file = outFiles[outindex] 174 | enc := fileEncs[outindex] 175 | err := enc.Encode(&kv) 176 | if err != nil { 177 | fmt.Printf("File %v Key %v Value %v Error: %v\n", taskInfo.FileName, kv.Key, kv.Value, err) 178 | panic("Json encode failed") 179 | } 180 | } 181 | 182 | // save as files 183 | for outindex, file := range outFiles { 184 | outname := outprefix + strconv.Itoa(outindex) 185 | oldpath := filepath.Join(file.Name()) 186 | //fmt.Printf("temp file oldpath %v\n", oldpath) 187 | os.Rename(oldpath, outname) 188 | file.Close() 189 | } 190 | // acknowledge master 191 | CallTaskDone(taskInfo) 192 | } 193 | 194 | func workerReduce(reducef func(string, []string) string, taskInfo *TaskInfo) { 195 | fmt.Printf("Got assigned reduce task on part %v\n", taskInfo.PartIndex) 196 | outname := "mr-out-" + strconv.Itoa(taskInfo.PartIndex) 197 | //fmt.Printf("%v\n", taskInfo) 198 | 199 | // read from output files from map tasks 200 | 201 | innameprefix := "mr-tmp/mr-" 202 | innamesuffix := "-" + strconv.Itoa(taskInfo.PartIndex) 203 | 204 | // read in all files as a kv array 205 | intermediate := []KeyValue{} 206 | for index := 0; index < taskInfo.NFiles; index++ { 207 | inname := innameprefix + strconv.Itoa(index) + innamesuffix 208 | file, err := os.Open(inname) 209 | if err != nil { 210 | fmt.Printf("Open intermediate file %v failed: %v\n", inname, err) 211 | panic("Open file error") 212 | } 213 | dec := json.NewDecoder(file) 214 | for { 215 | var kv KeyValue 216 | if err := dec.Decode(&kv); err != nil { 217 | //fmt.Printf("%v\n", err) 218 | break 219 | } 220 | //fmt.Printf("%v\n", kv) 221 | intermediate = append(intermediate, kv) 222 | } 223 | file.Close() 224 | } 225 | 226 | sort.Sort(ByKey(intermediate)) 227 | 228 | //ofile, err := os.Create(outname) 229 | ofile, err := ioutil.TempFile("mr-tmp", "mr-*") 230 | if err != nil { 231 | fmt.Printf("Create output file %v failed: %v\n", outname, err) 232 | panic("Create file error") 233 | } 234 | //fmt.Printf("%v\n", intermediate) 235 | i := 0 236 | for i < len(intermediate) { 237 | j := i + 1 238 | for j < len(intermediate) && intermediate[j].Key == intermediate[i].Key { 239 | j++ 240 | } 241 | values := []string{} 242 | for k := i; k < j; k++ { 243 | values = append(values, intermediate[k].Value) 244 | } 245 | output := reducef(intermediate[i].Key, values) 246 | 247 | // this is the correct format for each line of Reduce output. 248 | fmt.Fprintf(ofile, "%v %v\n", intermediate[i].Key, output) 249 | 250 | i = j 251 | } 252 | os.Rename(filepath.Join(ofile.Name()), outname) 253 | ofile.Close() 254 | // acknowledge master 255 | CallTaskDone(taskInfo) 256 | } 257 | -------------------------------------------------------------------------------- /src/mrapps/crash.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // a MapReduce pseudo-application that sometimes crashes, 5 | // and sometimes takes a long time, 6 | // to test MapReduce's ability to recover. 7 | // 8 | // go build -buildmode=plugin crash.go 9 | // 10 | 11 | import "mr" 12 | import crand "crypto/rand" 13 | import "math/big" 14 | import "strings" 15 | import "os" 16 | import "sort" 17 | import "strconv" 18 | import "time" 19 | 20 | func maybeCrash() { 21 | max := big.NewInt(1000) 22 | rr, _ := crand.Int(crand.Reader, max) 23 | if rr.Int64() < 330 { 24 | // crash! 25 | os.Exit(1) 26 | } else if rr.Int64() < 660 { 27 | // delay for a while. 28 | maxms := big.NewInt(10 * 1000) 29 | ms, _ := crand.Int(crand.Reader, maxms) 30 | time.Sleep(time.Duration(ms.Int64()) * time.Millisecond) 31 | } 32 | } 33 | 34 | func Map(filename string, contents string) []mr.KeyValue { 35 | maybeCrash() 36 | 37 | kva := []mr.KeyValue{} 38 | kva = append(kva, mr.KeyValue{"a", filename}) 39 | kva = append(kva, mr.KeyValue{"b", strconv.Itoa(len(filename))}) 40 | kva = append(kva, mr.KeyValue{"c", strconv.Itoa(len(contents))}) 41 | kva = append(kva, mr.KeyValue{"d", "xyzzy"}) 42 | return kva 43 | } 44 | 45 | func Reduce(key string, values []string) string { 46 | maybeCrash() 47 | 48 | // sort values to ensure deterministic output. 49 | vv := make([]string, len(values)) 50 | copy(vv, values) 51 | sort.Strings(vv) 52 | 53 | val := strings.Join(vv, " ") 54 | return val 55 | } 56 | -------------------------------------------------------------------------------- /src/mrapps/indexer.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // an indexing application "plugin" for MapReduce. 5 | // 6 | // go build -buildmode=plugin indexer.go 7 | // 8 | 9 | import "fmt" 10 | import "mr" 11 | 12 | import "strings" 13 | import "unicode" 14 | import "sort" 15 | 16 | // The mapping function is called once for each piece of the input. 17 | // In this framework, the key is the name of the file that is being processed, 18 | // and the value is the file's contents. The return value should be a slice of 19 | // key/value pairs, each represented by a mr.KeyValue. 20 | func Map(document string, value string) (res []mr.KeyValue) { 21 | m := make(map[string]bool) 22 | words := strings.FieldsFunc(value, func(x rune) bool { return !unicode.IsLetter(x) }) 23 | for _, w := range words { 24 | m[w] = true 25 | } 26 | for w := range m { 27 | kv := mr.KeyValue{w, document} 28 | res = append(res, kv) 29 | } 30 | return 31 | } 32 | 33 | // The reduce function is called once for each key generated by Map, with a 34 | // list of that key's string value (merged across all inputs). The return value 35 | // should be a single output value for that key. 36 | func Reduce(key string, values []string) string { 37 | sort.Strings(values) 38 | return fmt.Sprintf("%d %s", len(values), strings.Join(values, ",")) 39 | } 40 | -------------------------------------------------------------------------------- /src/mrapps/mtiming.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // a MapReduce pseudo-application to test that workers 5 | // execute map tasks in parallel. 6 | // 7 | // go build -buildmode=plugin mtiming.go 8 | // 9 | 10 | import "mr" 11 | import "strings" 12 | import "fmt" 13 | import "os" 14 | import "syscall" 15 | import "time" 16 | import "sort" 17 | import "io/ioutil" 18 | 19 | func nparallel(phase string) int { 20 | // create a file so that other workers will see that 21 | // we're running at the same time as them. 22 | pid := os.Getpid() 23 | myfilename := fmt.Sprintf("mr-worker-%s-%d", phase, pid) 24 | err := ioutil.WriteFile(myfilename, []byte("x"), 0666) 25 | if err != nil { 26 | panic(err) 27 | } 28 | 29 | // are any other workers running? 30 | // find their PIDs by scanning directory for mr-worker-XXX files. 31 | dd, err := os.Open(".") 32 | if err != nil { 33 | panic(err) 34 | } 35 | names, err := dd.Readdirnames(1000000) 36 | if err != nil { 37 | panic(err) 38 | } 39 | ret := 0 40 | for _, name := range names { 41 | var xpid int 42 | pat := fmt.Sprintf("mr-worker-%s-%%d", phase) 43 | n, err := fmt.Sscanf(name, pat, &xpid) 44 | if n == 1 && err == nil { 45 | err := syscall.Kill(xpid, 0) 46 | if err == nil { 47 | // if err == nil, xpid is alive. 48 | ret += 1 49 | } 50 | } 51 | } 52 | dd.Close() 53 | 54 | time.Sleep(1 * time.Second) 55 | 56 | err = os.Remove(myfilename) 57 | if err != nil { 58 | panic(err) 59 | } 60 | 61 | return ret 62 | } 63 | 64 | func Map(filename string, contents string) []mr.KeyValue { 65 | t0 := time.Now() 66 | ts := float64(t0.Unix()) + (float64(t0.Nanosecond()) / 1000000000.0) 67 | pid := os.Getpid() 68 | 69 | n := nparallel("map") 70 | 71 | kva := []mr.KeyValue{} 72 | kva = append(kva, mr.KeyValue{ 73 | fmt.Sprintf("times-%v", pid), 74 | fmt.Sprintf("%.1f", ts)}) 75 | kva = append(kva, mr.KeyValue{ 76 | fmt.Sprintf("parallel-%v", pid), 77 | fmt.Sprintf("%d", n)}) 78 | return kva 79 | } 80 | 81 | func Reduce(key string, values []string) string { 82 | //n := nparallel("reduce") 83 | 84 | // sort values to ensure deterministic output. 85 | vv := make([]string, len(values)) 86 | copy(vv, values) 87 | sort.Strings(vv) 88 | 89 | val := strings.Join(vv, " ") 90 | return val 91 | } 92 | -------------------------------------------------------------------------------- /src/mrapps/nocrash.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // same as crash.go but doesn't actually crash. 5 | // 6 | // go build -buildmode=plugin nocrash.go 7 | // 8 | 9 | import "mr" 10 | import crand "crypto/rand" 11 | import "math/big" 12 | import "strings" 13 | import "os" 14 | import "sort" 15 | import "strconv" 16 | 17 | func maybeCrash() { 18 | max := big.NewInt(1000) 19 | rr, _ := crand.Int(crand.Reader, max) 20 | if false && rr.Int64() < 500 { 21 | // crash! 22 | os.Exit(1) 23 | } 24 | } 25 | 26 | func Map(filename string, contents string) []mr.KeyValue { 27 | maybeCrash() 28 | 29 | kva := []mr.KeyValue{} 30 | kva = append(kva, mr.KeyValue{"a", filename}) 31 | kva = append(kva, mr.KeyValue{"b", strconv.Itoa(len(filename))}) 32 | kva = append(kva, mr.KeyValue{"c", strconv.Itoa(len(contents))}) 33 | kva = append(kva, mr.KeyValue{"d", "xyzzy"}) 34 | return kva 35 | } 36 | 37 | func Reduce(key string, values []string) string { 38 | maybeCrash() 39 | 40 | // sort values to ensure deterministic output. 41 | vv := make([]string, len(values)) 42 | copy(vv, values) 43 | sort.Strings(vv) 44 | 45 | val := strings.Join(vv, " ") 46 | return val 47 | } 48 | -------------------------------------------------------------------------------- /src/mrapps/rtiming.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // a MapReduce pseudo-application to test that workers 5 | // execute reduce tasks in parallel. 6 | // 7 | // go build -buildmode=plugin rtiming.go 8 | // 9 | 10 | import "mr" 11 | import "fmt" 12 | import "os" 13 | import "syscall" 14 | import "time" 15 | import "io/ioutil" 16 | 17 | func nparallel(phase string) int { 18 | // create a file so that other workers will see that 19 | // we're running at the same time as them. 20 | pid := os.Getpid() 21 | myfilename := fmt.Sprintf("mr-worker-%s-%d", phase, pid) 22 | err := ioutil.WriteFile(myfilename, []byte("x"), 0666) 23 | if err != nil { 24 | panic(err) 25 | } 26 | 27 | // are any other workers running? 28 | // find their PIDs by scanning directory for mr-worker-XXX files. 29 | dd, err := os.Open(".") 30 | if err != nil { 31 | panic(err) 32 | } 33 | names, err := dd.Readdirnames(1000000) 34 | if err != nil { 35 | panic(err) 36 | } 37 | ret := 0 38 | for _, name := range names { 39 | var xpid int 40 | pat := fmt.Sprintf("mr-worker-%s-%%d", phase) 41 | n, err := fmt.Sscanf(name, pat, &xpid) 42 | if n == 1 && err == nil { 43 | err := syscall.Kill(xpid, 0) 44 | if err == nil { 45 | // if err == nil, xpid is alive. 46 | ret += 1 47 | } 48 | } 49 | } 50 | dd.Close() 51 | 52 | time.Sleep(1 * time.Second) 53 | 54 | err = os.Remove(myfilename) 55 | if err != nil { 56 | panic(err) 57 | } 58 | 59 | return ret 60 | } 61 | 62 | func Map(filename string, contents string) []mr.KeyValue { 63 | 64 | kva := []mr.KeyValue{} 65 | kva = append(kva, mr.KeyValue{"a", "1"}) 66 | kva = append(kva, mr.KeyValue{"b", "1"}) 67 | kva = append(kva, mr.KeyValue{"c", "1"}) 68 | kva = append(kva, mr.KeyValue{"d", "1"}) 69 | kva = append(kva, mr.KeyValue{"e", "1"}) 70 | kva = append(kva, mr.KeyValue{"f", "1"}) 71 | kva = append(kva, mr.KeyValue{"g", "1"}) 72 | kva = append(kva, mr.KeyValue{"h", "1"}) 73 | kva = append(kva, mr.KeyValue{"i", "1"}) 74 | kva = append(kva, mr.KeyValue{"j", "1"}) 75 | return kva 76 | } 77 | 78 | func Reduce(key string, values []string) string { 79 | n := nparallel("reduce") 80 | 81 | val := fmt.Sprintf("%d", n) 82 | 83 | return val 84 | } 85 | -------------------------------------------------------------------------------- /src/mrapps/wc.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // a word-count application "plugin" for MapReduce. 5 | // 6 | // go build -buildmode=plugin wc.go 7 | // 8 | 9 | import "mr" 10 | import "unicode" 11 | import "strings" 12 | import "strconv" 13 | 14 | // 15 | // The map function is called once for each file of input. The first 16 | // argument is the name of the input file, and the second is the 17 | // file's complete contents. You should ignore the input file name, 18 | // and look only at the contents argument. The return value is a slice 19 | // of key/value pairs. 20 | // 21 | func Map(filename string, contents string) []mr.KeyValue { 22 | // function to detect word separators. 23 | ff := func(r rune) bool { return !unicode.IsLetter(r) } 24 | 25 | // split contents into an array of words. 26 | words := strings.FieldsFunc(contents, ff) 27 | 28 | kva := []mr.KeyValue{} 29 | for _, w := range words { 30 | kv := mr.KeyValue{w, "1"} 31 | kva = append(kva, kv) 32 | } 33 | return kva 34 | } 35 | 36 | // 37 | // The reduce function is called once for each key generated by the 38 | // map tasks, with a list of all the values created for that key by 39 | // any map task. 40 | // 41 | func Reduce(key string, values []string) string { 42 | // return the number of occurrences of this word. 43 | return strconv.Itoa(len(values)) 44 | } 45 | -------------------------------------------------------------------------------- /src/porcupine/bitset.go: -------------------------------------------------------------------------------- 1 | package porcupine 2 | 3 | import "math/bits" 4 | 5 | type bitset []uint64 6 | 7 | // data layout: 8 | // bits 0-63 are in data[0], the next are in data[1], etc. 9 | 10 | func newBitset(bits uint) bitset { 11 | extra := uint(0) 12 | if bits%64 != 0 { 13 | extra = 1 14 | } 15 | chunks := bits/64 + extra 16 | return bitset(make([]uint64, chunks)) 17 | } 18 | 19 | func (b bitset) clone() bitset { 20 | dataCopy := make([]uint64, len(b)) 21 | copy(dataCopy, b) 22 | return bitset(dataCopy) 23 | } 24 | 25 | func bitsetIndex(pos uint) (uint, uint) { 26 | return pos / 64, pos % 64 27 | } 28 | 29 | func (b bitset) set(pos uint) bitset { 30 | major, minor := bitsetIndex(pos) 31 | b[major] |= (1 << minor) 32 | return b 33 | } 34 | 35 | func (b bitset) clear(pos uint) bitset { 36 | major, minor := bitsetIndex(pos) 37 | b[major] &^= (1 << minor) 38 | return b 39 | } 40 | 41 | func (b bitset) get(pos uint) bool { 42 | major, minor := bitsetIndex(pos) 43 | return b[major]&(1<= 0; i-- { 120 | elem := entries[i] 121 | if elem.kind == returnEntry { 122 | entry := &node{value: elem.value, match: nil, id: elem.id} 123 | match[elem.id] = entry 124 | insertBefore(entry, root) 125 | root = entry 126 | } else { 127 | entry := &node{value: elem.value, match: match[elem.id], id: elem.id} 128 | insertBefore(entry, root) 129 | root = entry 130 | } 131 | } 132 | return root 133 | } 134 | 135 | type cacheEntry struct { 136 | linearized bitset 137 | state interface{} 138 | } 139 | 140 | func cacheContains(model Model, cache map[uint64][]cacheEntry, entry cacheEntry) bool { 141 | for _, elem := range cache[entry.linearized.hash()] { 142 | if entry.linearized.equals(elem.linearized) && model.Equal(entry.state, elem.state) { 143 | return true 144 | } 145 | } 146 | return false 147 | } 148 | 149 | type callsEntry struct { 150 | entry *node 151 | state interface{} 152 | } 153 | 154 | func lift(entry *node) { 155 | entry.prev.next = entry.next 156 | entry.next.prev = entry.prev 157 | match := entry.match 158 | match.prev.next = match.next 159 | if match.next != nil { 160 | match.next.prev = match.prev 161 | } 162 | } 163 | 164 | func unlift(entry *node) { 165 | match := entry.match 166 | match.prev.next = match 167 | if match.next != nil { 168 | match.next.prev = match 169 | } 170 | entry.prev.next = entry 171 | entry.next.prev = entry 172 | } 173 | 174 | func checkSingle(model Model, history []entry, computePartial bool, kill *int32) (bool, []*[]int) { 175 | entry := makeLinkedEntries(history) 176 | n := length(entry) / 2 177 | linearized := newBitset(uint(n)) 178 | cache := make(map[uint64][]cacheEntry) // map from hash to cache entry 179 | var calls []callsEntry 180 | // longest linearizable prefix that includes the given entry 181 | longest := make([]*[]int, n) 182 | 183 | state := model.Init() 184 | headEntry := insertBefore(&node{value: nil, match: nil, id: -1}, entry) 185 | for headEntry.next != nil { 186 | if atomic.LoadInt32(kill) != 0 { 187 | return false, longest 188 | } 189 | if entry.match != nil { 190 | matching := entry.match // the return entry 191 | ok, newState := model.Step(state, entry.value, matching.value) 192 | if ok { 193 | newLinearized := linearized.clone().set(uint(entry.id)) 194 | newCacheEntry := cacheEntry{newLinearized, newState} 195 | if !cacheContains(model, cache, newCacheEntry) { 196 | hash := newLinearized.hash() 197 | cache[hash] = append(cache[hash], newCacheEntry) 198 | calls = append(calls, callsEntry{entry, state}) 199 | state = newState 200 | linearized.set(uint(entry.id)) 201 | lift(entry) 202 | entry = headEntry.next 203 | } else { 204 | entry = entry.next 205 | } 206 | } else { 207 | entry = entry.next 208 | } 209 | } else { 210 | if len(calls) == 0 { 211 | return false, longest 212 | } 213 | // longest 214 | if computePartial { 215 | callsLen := len(calls) 216 | var seq []int = nil 217 | for _, v := range calls { 218 | if longest[v.entry.id] == nil || callsLen > len(*longest[v.entry.id]) { 219 | // create seq lazily 220 | if seq == nil { 221 | seq = make([]int, len(calls)) 222 | for i, v := range calls { 223 | seq[i] = v.entry.id 224 | } 225 | } 226 | longest[v.entry.id] = &seq 227 | } 228 | } 229 | } 230 | callsTop := calls[len(calls)-1] 231 | entry = callsTop.entry 232 | state = callsTop.state 233 | linearized.clear(uint(entry.id)) 234 | calls = calls[:len(calls)-1] 235 | unlift(entry) 236 | entry = entry.next 237 | } 238 | } 239 | // longest linearization is the complete linearization, which is calls 240 | seq := make([]int, len(calls)) 241 | for i, v := range calls { 242 | seq[i] = v.entry.id 243 | } 244 | for i := 0; i < n; i++ { 245 | longest[i] = &seq 246 | } 247 | return true, longest 248 | } 249 | 250 | func fillDefault(model Model) Model { 251 | if model.Partition == nil { 252 | model.Partition = NoPartition 253 | } 254 | if model.PartitionEvent == nil { 255 | model.PartitionEvent = NoPartitionEvent 256 | } 257 | if model.Equal == nil { 258 | model.Equal = ShallowEqual 259 | } 260 | if model.DescribeOperation == nil { 261 | model.DescribeOperation = DefaultDescribeOperation 262 | } 263 | if model.DescribeState == nil { 264 | model.DescribeState = DefaultDescribeState 265 | } 266 | return model 267 | } 268 | 269 | func checkParallel(model Model, history [][]entry, computeInfo bool, timeout time.Duration) (CheckResult, linearizationInfo) { 270 | ok := true 271 | timedOut := false 272 | results := make(chan bool, len(history)) 273 | longest := make([][]*[]int, len(history)) 274 | kill := int32(0) 275 | for i, subhistory := range history { 276 | go func(i int, subhistory []entry) { 277 | ok, l := checkSingle(model, subhistory, computeInfo, &kill) 278 | longest[i] = l 279 | results <- ok 280 | }(i, subhistory) 281 | } 282 | var timeoutChan <-chan time.Time 283 | if timeout > 0 { 284 | timeoutChan = time.After(timeout) 285 | } 286 | count := 0 287 | loop: 288 | for { 289 | select { 290 | case result := <-results: 291 | count++ 292 | ok = ok && result 293 | if !ok && !computeInfo { 294 | atomic.StoreInt32(&kill, 1) 295 | break loop 296 | } 297 | if count >= len(history) { 298 | break loop 299 | } 300 | case <-timeoutChan: 301 | timedOut = true 302 | atomic.StoreInt32(&kill, 1) 303 | break loop // if we time out, we might get a false positive 304 | } 305 | } 306 | var info linearizationInfo 307 | if computeInfo { 308 | // make sure we've waited for all goroutines to finish, 309 | // otherwise we might race on access to longest[] 310 | for count < len(history) { 311 | <-results 312 | count++ 313 | } 314 | // return longest linearizable prefixes that include each history element 315 | partialLinearizations := make([][][]int, len(history)) 316 | for i := 0; i < len(history); i++ { 317 | var partials [][]int 318 | // turn longest into a set of unique linearizations 319 | set := make(map[*[]int]struct{}) 320 | for _, v := range longest[i] { 321 | if v != nil { 322 | set[v] = struct{}{} 323 | } 324 | } 325 | for k, _ := range set { 326 | arr := make([]int, len(*k)) 327 | for i, v := range *k { 328 | arr[i] = v 329 | } 330 | partials = append(partials, arr) 331 | } 332 | partialLinearizations[i] = partials 333 | } 334 | info.history = history 335 | info.partialLinearizations = partialLinearizations 336 | } 337 | var result CheckResult 338 | if !ok { 339 | result = Illegal 340 | } else { 341 | if timedOut { 342 | result = Unknown 343 | } else { 344 | result = Ok 345 | } 346 | } 347 | return result, info 348 | } 349 | 350 | func checkEvents(model Model, history []Event, verbose bool, timeout time.Duration) (CheckResult, linearizationInfo) { 351 | model = fillDefault(model) 352 | partitions := model.PartitionEvent(history) 353 | l := make([][]entry, len(partitions)) 354 | for i, subhistory := range partitions { 355 | l[i] = convertEntries(renumber(subhistory)) 356 | } 357 | return checkParallel(model, l, verbose, timeout) 358 | } 359 | 360 | func checkOperations(model Model, history []Operation, verbose bool, timeout time.Duration) (CheckResult, linearizationInfo) { 361 | model = fillDefault(model) 362 | partitions := model.Partition(history) 363 | l := make([][]entry, len(partitions)) 364 | for i, subhistory := range partitions { 365 | l[i] = makeEntries(subhistory) 366 | } 367 | return checkParallel(model, l, verbose, timeout) 368 | } 369 | -------------------------------------------------------------------------------- /src/porcupine/model.go: -------------------------------------------------------------------------------- 1 | package porcupine 2 | 3 | import "fmt" 4 | 5 | type Operation struct { 6 | ClientId int // optional, unless you want a visualization; zero-indexed 7 | Input interface{} 8 | Call int64 // invocation time 9 | Output interface{} 10 | Return int64 // response time 11 | } 12 | 13 | type EventKind bool 14 | 15 | const ( 16 | CallEvent EventKind = false 17 | ReturnEvent EventKind = true 18 | ) 19 | 20 | type Event struct { 21 | ClientId int // optional, unless you want a visualization; zero-indexed 22 | Kind EventKind 23 | Value interface{} 24 | Id int 25 | } 26 | 27 | type Model struct { 28 | // Partition functions, such that a history is linearizable if an only 29 | // if each partition is linearizable. If you don't want to implement 30 | // this, you can always use the `NoPartition` functions implemented 31 | // below. 32 | Partition func(history []Operation) [][]Operation 33 | PartitionEvent func(history []Event) [][]Event 34 | // Initial state of the system. 35 | Init func() interface{} 36 | // Step function for the system. Returns whether or not the system 37 | // could take this step with the given inputs and outputs and also 38 | // returns the new state. This should not mutate the existing state. 39 | Step func(state interface{}, input interface{}, output interface{}) (bool, interface{}) 40 | // Equality on states. If you are using a simple data type for states, 41 | // you can use the `ShallowEqual` function implemented below. 42 | Equal func(state1, state2 interface{}) bool 43 | // For visualization, describe an operation as a string. 44 | // For example, "Get('x') -> 'y'". 45 | DescribeOperation func(input interface{}, output interface{}) string 46 | // For visualization purposes, describe a state as a string. 47 | // For example, "{'x' -> 'y', 'z' -> 'w'}" 48 | DescribeState func(state interface{}) string 49 | } 50 | 51 | func NoPartition(history []Operation) [][]Operation { 52 | return [][]Operation{history} 53 | } 54 | 55 | func NoPartitionEvent(history []Event) [][]Event { 56 | return [][]Event{history} 57 | } 58 | 59 | func ShallowEqual(state1, state2 interface{}) bool { 60 | return state1 == state2 61 | } 62 | 63 | func DefaultDescribeOperation(input interface{}, output interface{}) string { 64 | return fmt.Sprintf("%v -> %v", input, output) 65 | } 66 | 67 | func DefaultDescribeState(state interface{}) string { 68 | return fmt.Sprintf("%v", state) 69 | } 70 | 71 | type CheckResult string 72 | 73 | const ( 74 | Unknown CheckResult = "Unknown" // timed out 75 | Ok = "Ok" 76 | Illegal = "Illegal" 77 | ) 78 | -------------------------------------------------------------------------------- /src/porcupine/porcupine.go: -------------------------------------------------------------------------------- 1 | package porcupine 2 | 3 | import "time" 4 | 5 | func CheckOperations(model Model, history []Operation) bool { 6 | res, _ := checkOperations(model, history, false, 0) 7 | return res == Ok 8 | } 9 | 10 | // timeout = 0 means no timeout 11 | // if this operation times out, then a false positive is possible 12 | func CheckOperationsTimeout(model Model, history []Operation, timeout time.Duration) CheckResult { 13 | res, _ := checkOperations(model, history, false, timeout) 14 | return res 15 | } 16 | 17 | // timeout = 0 means no timeout 18 | // if this operation times out, then a false positive is possible 19 | func CheckOperationsVerbose(model Model, history []Operation, timeout time.Duration) (CheckResult, linearizationInfo) { 20 | return checkOperations(model, history, true, timeout) 21 | } 22 | 23 | func CheckEvents(model Model, history []Event) bool { 24 | res, _ := checkEvents(model, history, false, 0) 25 | return res == Ok 26 | } 27 | 28 | // timeout = 0 means no timeout 29 | // if this operation times out, then a false positive is possible 30 | func CheckEventsTimeout(model Model, history []Event, timeout time.Duration) CheckResult { 31 | res, _ := checkEvents(model, history, false, timeout) 32 | return res 33 | } 34 | 35 | // timeout = 0 means no timeout 36 | // if this operation times out, then a false positive is possible 37 | func CheckEventsVerbose(model Model, history []Event, timeout time.Duration) (CheckResult, linearizationInfo) { 38 | return checkEvents(model, history, true, timeout) 39 | } 40 | -------------------------------------------------------------------------------- /src/raft/README.md: -------------------------------------------------------------------------------- 1 | Here is some explanation to this Raft implementation. 2 | 3 | # Procedure Setup 4 | 5 | There are 3 kinds of procedures at hand, state procedure, async procedure, and callback procedure. These are run simultaniously inside a raft server process. 6 | 7 | ## State Procedure 8 | 9 | Each state of raft corresponds to an action. This action is actively run by a thread. 10 | 11 | A leader periodically sends AppendEntries to all other peers. 12 | 13 | A Candidate periodically sends RequestVote to all other peers after a random wait. 14 | 15 | A Follower periodically checks its timer for `Timer Cleared` or `Timer Expired` events. 16 | 17 | This is the main procedure happening inside the raft process. 18 | 19 | ## Async Procedure 20 | 21 | There is a `TaskQueue` for every raft, holding some async task. A thread is devoted to running these tasks when they are present. 22 | 23 | A thread is devoted to execute these tasks by popping from the `TaskQueue` one at a time. 24 | 25 | An async task can be generated by RPCs. When the process receives an RPC, it pushes the corresponding task onto the `TaskQueue`. 26 | 27 | An async task can also be generated by attempts to alter the state of the raft process, as is listed below. 28 | 29 | - change peer state (leader, follower, candidate) 30 | - in terms may change peer current term count `currentTerm` 31 | - in terms may change peer assumed leader `votedFor` 32 | 33 | ## Callback Procedure 34 | 35 | Callback happens when an RPC is responded. It is the result of async RPCs. All callbacks may only change the states appointed to them as `AsyncRpcInfo`. When it must change the other parts of the raft process, it must do this by pushing a task onto `TaskQueue`. 36 | 37 | ## Change in Peer State 38 | 39 | The state of a raft instance may be changed in the process of the 2 threads as described above. It must be organized in an elegant way. 40 | 41 | The key is how all means of executions handle the change in state. 42 | 43 | ### State Procedure 44 | 45 | In the main state procedure, codes are called by **polymorphism**. As the attribute `myState` changes, its execution changes accordingly. We tend to the interface implementations of `RaftState` interface to maintain this procedure when state changes. 46 | 47 | ### Async Procedure 48 | 49 | As all RPCs and peer state changes are allowed under any circumstances for any peers, async procedures are not effected directly by peer state transfer. No async procedure would be discarded when popped from `TaskQueue`. 50 | 51 | Yet, an async procedure may act differently by condition or polymorphism for different peer state it sees. 52 | 53 | ### Callback Procedure 54 | 55 | When an RPC returns, raft process state might have changed so that its original purpose is vain. This is done within the callback procedures. 56 | 57 | ## Peer State Lock 58 | 59 | Async and Callback procedures may change peer state, while State procedure does not. Therefore, in Async and Callback procedures, the peer state is locked by provided mutex attribute `mu`. 60 | 61 | # Go through Running 62 | 63 | 64 | -------------------------------------------------------------------------------- /src/raft/append_entries_callback.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | type AppendEntriesCall struct { 4 | AsyncRpcCallAttr 5 | 6 | args []AppendEntriesArgs 7 | replies []AppendEntriesReply 8 | } 9 | 10 | func (aec *AppendEntriesCall) makeRpcCall(peerIndex int) bool { 11 | aec.args[peerIndex] = *aec.raft.newAppendEntriesArgs(peerIndex) 12 | return aec.peers[peerIndex].Call("Raft.AppendEntries", &aec.args[peerIndex], &aec.replies[peerIndex]) 13 | } 14 | 15 | func (aec *AppendEntriesCall) shouldExit() bool { 16 | if aec.MustExit { 17 | return true 18 | } 19 | return false 20 | } 21 | 22 | func (aec *AppendEntriesCall) callback(peerIndex int) { 23 | if !aec.raft.MyState.IsLeader() { 24 | aec.SetMustExit() 25 | return 26 | } 27 | aec.raft.printInfo("heartbeat received from peer", peerIndex) 28 | reply := &aec.replies[peerIndex] 29 | if aec.raft.tryFollowNewerTerm(peerIndex, reply.Term) { 30 | aec.SetMustExit() 31 | return 32 | } 33 | aec.raft.printInfo("peer", peerIndex, "received", len(aec.args[peerIndex].Entries), "entries") 34 | for reply.Success == false { 35 | aec.raft.peerLogStates.Less(peerIndex) 36 | /* 37 | nextIndex := aec.raft.Log.lastTermIndex(0, reply.ConflictTerm) 38 | if nextIndex == -1 { 39 | aec.raft.peerLogStates.NextIndex[peerIndex] = reply.ConflictIndex 40 | } else { 41 | aec.raft.peerLogStates.NextIndex[peerIndex] = nextIndex 42 | } 43 | */ 44 | aec.raft.printInfo("peer", peerIndex, "refused entries sent, must decrement nextIndex to", aec.raft.peerLogStates.NextIndex[peerIndex]) 45 | ok := aec.makeRpcCall(peerIndex) 46 | if ok == false { 47 | return 48 | } 49 | if !aec.raft.MyState.IsLeader() { 50 | aec.SetMustExit() 51 | return 52 | } 53 | if aec.raft.tryFollowNewerTerm(peerIndex, reply.Term) { 54 | aec.SetMustExit() 55 | return 56 | } 57 | } 58 | aec.raft.peerLogStates.More(peerIndex, len(aec.args[peerIndex].Entries)) 59 | aec.raft.printInfo("peer", peerIndex, "log updated to", aec.raft.peerLogStates.matchIndex[peerIndex]) 60 | } 61 | 62 | func (aec *AppendEntriesCall) tryEnd() bool { 63 | if aec.CurrentCount >= aec.TotalCount { 64 | aec.SetMustExit() 65 | aec.raft.TryCommit(aec) 66 | return true 67 | } 68 | return false 69 | } 70 | 71 | func NewAppendEntriesCall(raft *Raft) *AppendEntriesCall { 72 | return &AppendEntriesCall{ 73 | AsyncRpcCallAttr: raft.NewAsyncRpcCall(), 74 | args: make([]AppendEntriesArgs, raft.PeerCount()), 75 | replies: make([]AppendEntriesReply, raft.PeerCount()), 76 | } 77 | } 78 | 79 | /* 80 | func (rf *Raft) appendEntriesCallBack(ok bool, peerIndex int, args *AppendEntriesArgs, reply *AppendEntriesReply, info *AsyncRpcCallAttr) { 81 | if ok { 82 | info.SetAliveHost(peerIndex) 83 | if rf.tryFollowNewerTerm(peerIndex, reply.Term) { 84 | info.SetMustExit() 85 | } else { 86 | // decrement and retry 87 | for reply.Success == false { 88 | //rf.logMutex.Lock() 89 | if rf.MyState != LeaderState { 90 | info.SetMustExit() 91 | return 92 | } 93 | rf.NextIndex[peerIndex]-- 94 | args = rf.newAppendEntriesArgs(peerIndex) 95 | fmt.Println(rf.PrefixPrint(), "got false heartbeat reply from peer", peerIndex, ", must decrement NextIndex then try again") 96 | //rf.logMutex.Unlock() 97 | // retry 98 | reply = &AppendEntriesReply{} 99 | ok = rf.sendAppendEntries(peerIndex, args, reply) 100 | if !ok { 101 | break 102 | } 103 | if rf.tryFollowNewerTerm(peerIndex, reply.Term) { 104 | info.SetMustExit() 105 | return 106 | } 107 | } 108 | if ok { 109 | //rf.logMutex.Lock() 110 | // update NextIndex, matchIndex 111 | 112 | //rf.logMutex.Unlock() 113 | } 114 | } 115 | } else { 116 | //fmt.Println(rf.PrefixPrint(), "found peer", peerIndex, "unreachable when sending heartbeats") 117 | } 118 | info.IncrementCurrentCount() 119 | if ok { 120 | fmt.Println(rf.PrefixPrint(), "got reply on AppendEntries #current", info.CurrentCount, "#total", info.TotalCount) 121 | } else { 122 | fmt.Println(rf.PrefixPrint(), "got timeout on AppendEntries #current", info.CurrentCount, "#total", info.TotalCount) 123 | } 124 | if info.MustExit == false { 125 | rf.leaderTryCommit(info) 126 | } 127 | } 128 | */ 129 | -------------------------------------------------------------------------------- /src/raft/append_entries_task.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | type AppendEntriesTask struct { 4 | RaftTaskAttr 5 | args *AppendEntriesArgs 6 | reply *AppendEntriesReply 7 | } 8 | 9 | func (aet *AppendEntriesTask) execute() { 10 | aet.executeAppendEntriesRpc(aet.args, aet.reply) 11 | } 12 | 13 | func (aet *AppendEntriesTask) executeAppendEntriesRpc(args *AppendEntriesArgs, reply *AppendEntriesReply) { 14 | reply.Term = aet.raft.currentTerm 15 | reply.Success = true 16 | aet.raft.printInfo("heartbeat received from peer", args.LeaderId) 17 | 18 | // ignore old terms 19 | if aet.raft.currentTerm > args.Term { 20 | reply.Success = false 21 | aet.raft.printInfo("sees an old term AppendEntries from peer", args.LeaderId) 22 | return 23 | } 24 | if aet.raft.tryFollowNewerTerm(args.LeaderId, args.Term) { 25 | 26 | } 27 | aet.raft.TimeParams.heartBeatTimer.SetClear() 28 | if args.PrevLogIndex >= aet.raft.Log.Length() { 29 | reply.Success = false 30 | aet.raft.printInfo("new entries to log index", args.PrevLogIndex, "too large") 31 | reply.ConflictIndex = aet.raft.Log.Length() 32 | reply.ConflictTerm = -1 33 | return 34 | } 35 | if args.PrevLogIndex != -1 { 36 | // check when there should be a previous log entry 37 | if aet.raft.Log.Index(args.PrevLogIndex).Term != args.PrevLogTerm { 38 | reply.Success = false 39 | aet.raft.printInfo("new entries term", args.PrevLogTerm, "not consistent with this peer's previous log entry term", aet.raft.Log.Index(args.PrevLogIndex).Term) 40 | reply.ConflictTerm = aet.raft.Log.Index(args.PrevLogIndex).Term 41 | reply.ConflictIndex = aet.raft.Log.firstTermIndex(args.PrevLogIndex, reply.ConflictTerm) 42 | return 43 | } 44 | } 45 | // here the log can be updated 46 | aet.raft.printInfo("trying to append #entries", len(args.Entries)) 47 | aet.raft.Log.UpdateLog(args.Entries, args.PrevLogIndex, args.LeaderCommit) 48 | // extra modifications done under candidate 49 | if aet.raft.MyState.IsCandidate() { 50 | aet.raft.currentTerm = args.Term 51 | aet.raft.ToFollower(args.LeaderId) 52 | } 53 | } 54 | 55 | func NewAppendEntriesTask(raft *Raft, args *AppendEntriesArgs, reply *AppendEntriesReply) *AppendEntriesTask { 56 | return &AppendEntriesTask{ 57 | RaftTaskAttr: NewRaftTaskAttr(raft), 58 | args: args, 59 | reply: reply, 60 | } 61 | } 62 | 63 | // A non-leader should receive this 64 | /* 65 | func (rf *Raft) AppendEntries(args *AppendEntriesArgs, reply *AppendEntriesReply) { 66 | RunTask(NewAppendEntriesTask(rf, args, reply), &rf.taskQueue) 67 | return 68 | //fmt.Println(rf.PrefixPrint(), "got heartbeat message from leader peer", args.LeaderId, "at term", args.Term) 69 | // default reply state 70 | reply.Term = rf.currentTerm 71 | reply.Success = true 72 | rf.timerCleared = true 73 | 74 | if rf.tryDiscardOldTerm(args.LeaderId, args.Term) { 75 | reply.Success = false 76 | return 77 | } 78 | if rf.tryFollowNewerTerm(args.LeaderId, args.Term) { 79 | reply.Success = true 80 | } 81 | // check Log info 82 | if args.PrevLogIndex >= len(rf.logs) { 83 | reply.Success = false 84 | fmt.Println(rf.PrefixPrint(), "got new Log index", args.PrevLogIndex+1, "too large for this peer's Log length", len(rf.logs)) 85 | return 86 | } else { 87 | if args.PrevLogIndex != -1 { 88 | if rf.logs[args.PrevLogIndex].Term != args.PrevLogTerm { 89 | reply.Success = false 90 | fmt.Println(rf.PrefixPrint(), "Log entry on PrevLogIndex term inconsistent") 91 | return 92 | } 93 | } 94 | } 95 | for argsEntryIndex := 0; argsEntryIndex < len(args.Entries); { 96 | newEntryIndex := argsEntryIndex + args.PrevLogIndex + 1 97 | if newEntryIndex < len(rf.logs) { 98 | oldEntry := &rf.logs[newEntryIndex] 99 | newEntry := &args.Entries[argsEntryIndex] 100 | // existing Log 101 | // check for conflict 102 | if oldEntry.Equals(newEntry) { 103 | // consistent! 104 | argsEntryIndex++ 105 | } else { 106 | // inconsistent! 107 | // delete everything after current index 108 | rf.logs = rf.logs[:newEntryIndex] 109 | fmt.Println(rf.PrefixPrint(), "inconsistent with leader at Log index", newEntryIndex, "removing from then on") 110 | } 111 | } else { 112 | // new Log 113 | // append everything 114 | rf.logs = append(rf.logs, args.Entries[argsEntryIndex:]...) 115 | break 116 | } 117 | } 118 | if args.LeaderCommit > rf.commitIndex { 119 | oldCommitIndex := rf.commitIndex 120 | if args.LeaderCommit < len(rf.logs)-1 { 121 | rf.commitIndex = args.LeaderCommit 122 | } else { 123 | rf.commitIndex = len(rf.logs) - 1 124 | } 125 | for ; oldCommitIndex <= rf.commitIndex; oldCommitIndex++ { 126 | if oldCommitIndex == 0 { 127 | continue 128 | } 129 | rf.applyCh <- ApplyMsg{ 130 | CommandIndex: oldCommitIndex, 131 | CommandValid: true, 132 | Command: rf.logs[oldCommitIndex].Command, 133 | } 134 | } 135 | 136 | } 137 | fmt.Println(rf.PrefixPrint(), "got", len(args.Entries), "new Log entries from leader peer", args.LeaderId, "committed index", rf.commitIndex) 138 | 139 | switch rf.MyState { 140 | case LeaderState: 141 | break 142 | case FollowerState: 143 | break 144 | case CandidateState: 145 | rf.MyState = FollowerState 146 | rf.currentTerm = args.Term 147 | rf.votedFor = args.LeaderId 148 | reply.Success = true 149 | fmt.Println(rf.PrefixPrint(), "set leader to peer", args.LeaderId, "by heartbeat message") 150 | break 151 | default: 152 | panic("Invalid peer state in rpc AppendEntries!") 153 | } 154 | 155 | //rf.mu.Unlock() 156 | } 157 | */ 158 | -------------------------------------------------------------------------------- /src/raft/callback.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import ( 4 | "labrpc" 5 | "sync" 6 | ) 7 | 8 | type AsyncRpcCallInterface interface { 9 | callback(int) 10 | tryEnd() bool 11 | 12 | SetAliveHost(int) 13 | makeRpcCall(peerIndex int) bool 14 | IncrementCurrentCount() 15 | PeerCount() int 16 | shouldExit() bool 17 | 18 | LockCallBack() 19 | UnlockCallBack() 20 | 21 | GetRaftIndex() int 22 | GetRaft() *Raft 23 | } 24 | 25 | /* 26 | Thus, an async rpc call should only implement these methods: 27 | - makeRpcCall 28 | - callBack 29 | - tryEnd 30 | */ 31 | func CallAsyncRpc(call AsyncRpcCallInterface) { 32 | for peerIndex := 0; peerIndex < call.PeerCount(); peerIndex++ { 33 | if call.GetRaftIndex() == peerIndex { 34 | // don't send to myself 35 | continue 36 | } 37 | go func(peerIndex int) { 38 | ok := call.makeRpcCall(peerIndex) 39 | // one callback at a time 40 | call.LockCallBack() 41 | call.IncrementCurrentCount() 42 | if call.shouldExit() { 43 | call.UnlockCallBack() 44 | return 45 | } 46 | if ok { 47 | call.SetAliveHost(peerIndex) 48 | call.callback(peerIndex) 49 | } else { 50 | call.GetRaft().printInfo("rpc to peer", peerIndex, "timeout") 51 | } 52 | if call.tryEnd() { 53 | call.UnlockCallBack() 54 | return 55 | } 56 | call.UnlockCallBack() 57 | }(peerIndex) 58 | } 59 | } 60 | 61 | type AsyncRpcCallAttr struct { 62 | // initialized in constructor 63 | AliveCount int 64 | SuccessCount int 65 | TotalCount int 66 | CurrentCount int 67 | AliveHosts []bool 68 | peers []*labrpc.ClientEnd 69 | raft *Raft 70 | 71 | Cond *sync.Cond 72 | mu sync.Mutex 73 | MustExit bool 74 | } 75 | 76 | func (ri *AsyncRpcCallAttr) PeerCount() int { 77 | return ri.TotalCount 78 | } 79 | 80 | func (ri *AsyncRpcCallAttr) IncrementAliveCount() { 81 | ri.AliveCount++ 82 | } 83 | 84 | func (ri *AsyncRpcCallAttr) IncrementSuccessCount() { 85 | ri.SuccessCount++ 86 | } 87 | 88 | func (ri *AsyncRpcCallAttr) IncrementCurrentCount() { 89 | ri.Cond.L.Lock() 90 | ri.CurrentCount++ 91 | ri.Cond.L.Unlock() 92 | ri.Cond.Broadcast() 93 | } 94 | 95 | func (ri *AsyncRpcCallAttr) Wait() { 96 | ri.raft.printInfo("waiting for election done") 97 | ri.Cond.L.Lock() 98 | for !(ri.CurrentCount >= ri.TotalCount || ri.MustExit) { 99 | ri.Cond.Wait() 100 | } 101 | ri.Cond.L.Unlock() 102 | ri.raft.printInfo("election done wait exit") 103 | } 104 | 105 | func (ri *AsyncRpcCallAttr) SetMustExit() { 106 | ri.Cond.L.Lock() 107 | ri.MustExit = true 108 | ri.Cond.L.Unlock() 109 | ri.Cond.Broadcast() 110 | } 111 | 112 | func (ri *AsyncRpcCallAttr) SetAliveHost(index int) { 113 | ri.AliveHosts[index] = true 114 | ri.AliveCount++ 115 | } 116 | 117 | func (ri *AsyncRpcCallAttr) shouldExit() bool { 118 | return ri.MustExit 119 | } 120 | 121 | func (ri *AsyncRpcCallAttr) LockCallBack() { 122 | ri.mu.Lock() 123 | } 124 | 125 | func (ri *AsyncRpcCallAttr) UnlockCallBack() { 126 | ri.mu.Unlock() 127 | } 128 | 129 | func (ri *AsyncRpcCallAttr) GetRaftIndex() int { 130 | return ri.raft.me 131 | } 132 | 133 | func (ri *AsyncRpcCallAttr) GetRaft() *Raft { 134 | return ri.raft 135 | } 136 | 137 | func (rf *Raft) NewAsyncRpcCall() AsyncRpcCallAttr { 138 | aliveHosts := make([]bool, len(rf.peers)) 139 | for index, _ := range aliveHosts { 140 | aliveHosts[index] = false 141 | } 142 | return AsyncRpcCallAttr{ 143 | TotalCount: rf.PeerCount(), 144 | Cond: sync.NewCond(&sync.Mutex{}), 145 | AliveHosts: aliveHosts, 146 | AliveCount: 1, 147 | SuccessCount: 1, 148 | CurrentCount: 1, 149 | MustExit: false, 150 | peers: rf.peers, 151 | raft: rf, 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /src/raft/config.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | // 4 | // support for Raft tester. 5 | // 6 | // we will use the original config.go to test your code for grading. 7 | // so, while you can modify this code to help you debug, please 8 | // test with the original before submitting. 9 | // 10 | 11 | import "labrpc" 12 | import "log" 13 | import "sync" 14 | import "testing" 15 | import "runtime" 16 | import "math/rand" 17 | import crand "crypto/rand" 18 | import "math/big" 19 | import "encoding/base64" 20 | import "time" 21 | import "fmt" 22 | 23 | func randstring(n int) string { 24 | b := make([]byte, 2*n) 25 | crand.Read(b) 26 | s := base64.URLEncoding.EncodeToString(b) 27 | return s[0:n] 28 | } 29 | 30 | func makeSeed() int64 { 31 | max := big.NewInt(int64(1) << 62) 32 | bigx, _ := crand.Int(crand.Reader, max) 33 | x := bigx.Int64() 34 | return x 35 | } 36 | 37 | type config struct { 38 | mu sync.Mutex 39 | t *testing.T 40 | net *labrpc.Network 41 | n int 42 | rafts []*Raft 43 | applyErr []string // from apply channel readers 44 | connected []bool // whether each server is on the net 45 | saved []*Persister 46 | endnames [][]string // the port file names each sends to 47 | logs []map[int]interface{} // copy of each server's committed entries 48 | start time.Time // time at which make_config() was called 49 | // begin()/end() statistics 50 | t0 time.Time // time at which test_test.go called cfg.begin() 51 | rpcs0 int // rpcTotal() at start of test 52 | cmds0 int // number of agreements 53 | bytes0 int64 54 | maxIndex int 55 | maxIndex0 int 56 | } 57 | 58 | var ncpu_once sync.Once 59 | 60 | func make_config(t *testing.T, n int, unreliable bool) *config { 61 | ncpu_once.Do(func() { 62 | if runtime.NumCPU() < 2 { 63 | fmt.Printf("warning: only one CPU, which may conceal locking bugs\n") 64 | } 65 | rand.Seed(makeSeed()) 66 | }) 67 | runtime.GOMAXPROCS(4) 68 | cfg := &config{} 69 | cfg.t = t 70 | cfg.net = labrpc.MakeNetwork() 71 | cfg.n = n 72 | cfg.applyErr = make([]string, cfg.n) 73 | cfg.rafts = make([]*Raft, cfg.n) 74 | cfg.connected = make([]bool, cfg.n) 75 | cfg.saved = make([]*Persister, cfg.n) 76 | cfg.endnames = make([][]string, cfg.n) 77 | cfg.logs = make([]map[int]interface{}, cfg.n) 78 | cfg.start = time.Now() 79 | 80 | cfg.setunreliable(unreliable) 81 | 82 | cfg.net.LongDelays(true) 83 | 84 | // create a full set of Rafts. 85 | for i := 0; i < cfg.n; i++ { 86 | cfg.logs[i] = map[int]interface{}{} 87 | cfg.start1(i) 88 | } 89 | 90 | // connect everyone 91 | for i := 0; i < cfg.n; i++ { 92 | cfg.connect(i) 93 | } 94 | 95 | return cfg 96 | } 97 | 98 | // shut down a Raft server but save its persistent state. 99 | func (cfg *config) crash1(i int) { 100 | cfg.disconnect(i) 101 | cfg.net.DeleteServer(i) // disable client connections to the server. 102 | 103 | cfg.mu.Lock() 104 | defer cfg.mu.Unlock() 105 | 106 | // a fresh persister, in case old instance 107 | // continues to update the Persister. 108 | // but copy old persister's content so that we always 109 | // pass Make() the last persisted state. 110 | if cfg.saved[i] != nil { 111 | cfg.saved[i] = cfg.saved[i].Copy() 112 | } 113 | 114 | rf := cfg.rafts[i] 115 | if rf != nil { 116 | cfg.mu.Unlock() 117 | rf.Kill() 118 | cfg.mu.Lock() 119 | cfg.rafts[i] = nil 120 | } 121 | 122 | if cfg.saved[i] != nil { 123 | raftlog := cfg.saved[i].ReadRaftState() 124 | cfg.saved[i] = &Persister{} 125 | cfg.saved[i].SaveRaftState(raftlog) 126 | } 127 | } 128 | 129 | // 130 | // start or re-start a Raft. 131 | // if one already exists, "kill" it first. 132 | // allocate new outgoing port file names, and a new 133 | // state persister, to isolate previous instance of 134 | // this server. since we cannot really kill it. 135 | // 136 | func (cfg *config) start1(i int) { 137 | cfg.crash1(i) 138 | 139 | // a fresh set of outgoing ClientEnd names. 140 | // so that old crashed instance's ClientEnds can't send. 141 | cfg.endnames[i] = make([]string, cfg.n) 142 | for j := 0; j < cfg.n; j++ { 143 | cfg.endnames[i][j] = randstring(20) 144 | } 145 | 146 | // a fresh set of ClientEnds. 147 | ends := make([]*labrpc.ClientEnd, cfg.n) 148 | for j := 0; j < cfg.n; j++ { 149 | ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j]) 150 | cfg.net.Connect(cfg.endnames[i][j], j) 151 | } 152 | 153 | cfg.mu.Lock() 154 | 155 | // a fresh persister, so old instance doesn't overwrite 156 | // new instance's persisted state. 157 | // but copy old persister's content so that we always 158 | // pass Make() the last persisted state. 159 | if cfg.saved[i] != nil { 160 | cfg.saved[i] = cfg.saved[i].Copy() 161 | } else { 162 | cfg.saved[i] = MakePersister() 163 | } 164 | 165 | cfg.mu.Unlock() 166 | 167 | // listen to messages from Raft indicating newly committed messages. 168 | applyCh := make(chan ApplyMsg) 169 | go func() { 170 | for m := range applyCh { 171 | err_msg := "" 172 | if m.CommandValid == false { 173 | // ignore other types of ApplyMsg 174 | } else { 175 | v := m.Command 176 | cfg.mu.Lock() 177 | //fmt.Println("[monitor] sees a committed Command", m.Command, "valid", m.CommandValid, "index", m.CommandIndex) 178 | for j := 0; j < len(cfg.logs); j++ { 179 | if old, oldok := cfg.logs[j][m.CommandIndex]; oldok && old != v { 180 | // some server has already committed a different value for this entry! 181 | err_msg = fmt.Sprintf("commit index=%v server=%v %v != server=%v %v", 182 | m.CommandIndex, i, m.Command, j, old) 183 | } 184 | } 185 | _, prevok := cfg.logs[i][m.CommandIndex-1] 186 | cfg.logs[i][m.CommandIndex] = v 187 | if m.CommandIndex > cfg.maxIndex { 188 | cfg.maxIndex = m.CommandIndex 189 | } 190 | cfg.mu.Unlock() 191 | 192 | if m.CommandIndex > 1 && prevok == false { 193 | err_msg = fmt.Sprintf("server %v apply out of order %v", i, m.CommandIndex) 194 | } 195 | } 196 | 197 | if err_msg != "" { 198 | log.Fatalf("apply error: %v\n", err_msg) 199 | cfg.applyErr[i] = err_msg 200 | // keep reading after error so that Raft doesn't block 201 | // holding locks... 202 | } 203 | } 204 | }() 205 | 206 | rf := Make(ends, i, cfg.saved[i], applyCh) 207 | 208 | cfg.mu.Lock() 209 | cfg.rafts[i] = rf 210 | cfg.mu.Unlock() 211 | 212 | svc := labrpc.MakeService(rf) 213 | srv := labrpc.MakeServer() 214 | srv.AddService(svc) 215 | cfg.net.AddServer(i, srv) 216 | } 217 | 218 | func (cfg *config) checkTimeout() { 219 | // enforce a two minute real-time limit on each test 220 | if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second { 221 | cfg.t.Fatal("test took longer than 120 seconds") 222 | } 223 | } 224 | 225 | func (cfg *config) cleanup() { 226 | for i := 0; i < len(cfg.rafts); i++ { 227 | if cfg.rafts[i] != nil { 228 | cfg.rafts[i].Kill() 229 | } 230 | } 231 | cfg.net.Cleanup() 232 | cfg.checkTimeout() 233 | } 234 | 235 | // attach server i to the net. 236 | func (cfg *config) connect(i int) { 237 | // fmt.Printf("connect(%d)\n", i) 238 | 239 | cfg.connected[i] = true 240 | 241 | // outgoing ClientEnds 242 | for j := 0; j < cfg.n; j++ { 243 | if cfg.connected[j] { 244 | endname := cfg.endnames[i][j] 245 | cfg.net.Enable(endname, true) 246 | } 247 | } 248 | 249 | // incoming ClientEnds 250 | for j := 0; j < cfg.n; j++ { 251 | if cfg.connected[j] { 252 | endname := cfg.endnames[j][i] 253 | cfg.net.Enable(endname, true) 254 | } 255 | } 256 | } 257 | 258 | // detach server i from the net. 259 | func (cfg *config) disconnect(i int) { 260 | // fmt.Printf("disconnect(%d)\n", i) 261 | 262 | cfg.connected[i] = false 263 | 264 | // outgoing ClientEnds 265 | for j := 0; j < cfg.n; j++ { 266 | if cfg.endnames[i] != nil { 267 | endname := cfg.endnames[i][j] 268 | cfg.net.Enable(endname, false) 269 | } 270 | } 271 | 272 | // incoming ClientEnds 273 | for j := 0; j < cfg.n; j++ { 274 | if cfg.endnames[j] != nil { 275 | endname := cfg.endnames[j][i] 276 | cfg.net.Enable(endname, false) 277 | } 278 | } 279 | } 280 | 281 | func (cfg *config) rpcCount(server int) int { 282 | return cfg.net.GetCount(server) 283 | } 284 | 285 | func (cfg *config) rpcTotal() int { 286 | return cfg.net.GetTotalCount() 287 | } 288 | 289 | func (cfg *config) setunreliable(unrel bool) { 290 | cfg.net.Reliable(!unrel) 291 | } 292 | 293 | func (cfg *config) bytesTotal() int64 { 294 | return cfg.net.GetTotalBytes() 295 | } 296 | 297 | func (cfg *config) setlongreordering(longrel bool) { 298 | cfg.net.LongReordering(longrel) 299 | } 300 | 301 | // check that there's exactly one leader. 302 | // try a few times in case re-elections are needed. 303 | func (cfg *config) checkOneLeader() int { 304 | for iters := 0; iters < 10; iters++ { 305 | ms := 450 + (rand.Int63() % 100) 306 | time.Sleep(time.Duration(ms) * time.Millisecond) 307 | 308 | leaders := make(map[int][]int) 309 | for i := 0; i < cfg.n; i++ { 310 | if cfg.connected[i] { 311 | if term, leader := cfg.rafts[i].GetState(); leader { 312 | leaders[term] = append(leaders[term], i) 313 | } 314 | } 315 | } 316 | 317 | lastTermWithLeader := -1 318 | for term, leaders := range leaders { 319 | if len(leaders) > 1 { 320 | cfg.t.Fatalf("Term %d has %d (>1) leaders", term, len(leaders)) 321 | } 322 | if term > lastTermWithLeader { 323 | lastTermWithLeader = term 324 | } 325 | } 326 | 327 | if len(leaders) != 0 { 328 | return leaders[lastTermWithLeader][0] 329 | } 330 | } 331 | cfg.t.Fatalf("expected one leader, got none") 332 | return -1 333 | } 334 | 335 | // check that everyone agrees on the Term. 336 | func (cfg *config) checkTerms() int { 337 | term := -1 338 | for i := 0; i < cfg.n; i++ { 339 | if cfg.connected[i] { 340 | xterm, _ := cfg.rafts[i].GetState() 341 | if term == -1 { 342 | term = xterm 343 | } else if term != xterm { 344 | cfg.t.Fatalf("servers disagree on Term") 345 | } 346 | } 347 | } 348 | return term 349 | } 350 | 351 | // check that there's no leader 352 | func (cfg *config) checkNoLeader() { 353 | for i := 0; i < cfg.n; i++ { 354 | if cfg.connected[i] { 355 | _, is_leader := cfg.rafts[i].GetState() 356 | if is_leader { 357 | cfg.t.Fatalf("expected no leader, but %v claims to be leader", i) 358 | } 359 | } 360 | } 361 | } 362 | 363 | // how many servers think a Log entry is committed? 364 | func (cfg *config) nCommitted(index int) (int, interface{}) { 365 | count := 0 366 | var cmd interface{} = nil 367 | for i := 0; i < len(cfg.rafts); i++ { 368 | if cfg.applyErr[i] != "" { 369 | cfg.t.Fatal(cfg.applyErr[i]) 370 | } 371 | 372 | cfg.mu.Lock() 373 | cmd1, ok := cfg.logs[i][index] 374 | cfg.mu.Unlock() 375 | 376 | if ok { 377 | if count > 0 && cmd != cmd1 { 378 | cfg.t.Fatalf("committed values do not match: index %v, %v, %v\n", 379 | index, cmd, cmd1) 380 | } 381 | count += 1 382 | cmd = cmd1 383 | } 384 | } 385 | return count, cmd 386 | } 387 | 388 | // wait for at least n servers to commit. 389 | // but don't wait forever. 390 | func (cfg *config) wait(index int, n int, startTerm int) interface{} { 391 | to := 10 * time.Millisecond 392 | for iters := 0; iters < 30; iters++ { 393 | nd, _ := cfg.nCommitted(index) 394 | if nd >= n { 395 | break 396 | } 397 | time.Sleep(to) 398 | if to < time.Second { 399 | to *= 2 400 | } 401 | if startTerm > -1 { 402 | for _, r := range cfg.rafts { 403 | if t, _ := r.GetState(); t > startTerm { 404 | // someone has moved on 405 | // can no longer guarantee that we'll "win" 406 | return -1 407 | } 408 | } 409 | } 410 | } 411 | nd, cmd := cfg.nCommitted(index) 412 | if nd < n { 413 | cfg.t.Fatalf("only %d decided for index %d; wanted %d\n", 414 | nd, index, n) 415 | } 416 | return cmd 417 | } 418 | 419 | // do a complete agreement. 420 | // it might choose the wrong leader initially, 421 | // and have to re-submit after giving up. 422 | // entirely gives up after about 10 seconds. 423 | // indirectly checks that the servers agree on the 424 | // same value, since nCommitted() checks this, 425 | // as do the threads that read from applyCh. 426 | // returns index. 427 | // if retry==true, may submit the command multiple 428 | // times, in case a leader fails just after Start(). 429 | // if retry==false, calls Start() only once, in order 430 | // to simplify the early Lab 2B tests. 431 | func (cfg *config) one(cmd interface{}, expectedServers int, retry bool) int { 432 | t0 := time.Now() 433 | starts := 0 434 | for time.Since(t0).Seconds() < 10 { 435 | // try all the servers, maybe one is the leader. 436 | index := -1 437 | for si := 0; si < cfg.n; si++ { 438 | starts = (starts + 1) % cfg.n 439 | var rf *Raft 440 | cfg.mu.Lock() 441 | if cfg.connected[starts] { 442 | rf = cfg.rafts[starts] 443 | } 444 | cfg.mu.Unlock() 445 | if rf != nil { 446 | index1, _, ok := rf.Start(cmd) 447 | if ok { 448 | index = index1 449 | break 450 | } 451 | } 452 | } 453 | 454 | if index != -1 { 455 | // somebody claimed to be the leader and to have 456 | // submitted our command; wait a while for agreement. 457 | t1 := time.Now() 458 | for time.Since(t1).Seconds() < 2 { 459 | nd, cmd1 := cfg.nCommitted(index) 460 | if nd > 0 && nd >= expectedServers { 461 | // committed 462 | if cmd1 == cmd { 463 | // and it was the command we submitted. 464 | return index 465 | } 466 | } 467 | time.Sleep(20 * time.Millisecond) 468 | } 469 | if retry == false { 470 | cfg.t.Fatalf("one(%v) failed to reach agreement", cmd) 471 | } 472 | } else { 473 | time.Sleep(50 * time.Millisecond) 474 | } 475 | } 476 | cfg.t.Fatalf("one(%v) failed to reach agreement", cmd) 477 | 478 | return -1 479 | } 480 | 481 | // start a Test. 482 | // print the Test message. 483 | // e.g. cfg.begin("Test (2B): RPC counts aren't too high") 484 | func (cfg *config) begin(description string) { 485 | fmt.Printf("%s ...\n", description) 486 | cfg.t0 = time.Now() 487 | cfg.rpcs0 = cfg.rpcTotal() 488 | cfg.bytes0 = cfg.bytesTotal() 489 | cfg.cmds0 = 0 490 | cfg.maxIndex0 = cfg.maxIndex 491 | } 492 | 493 | // end a Test -- the fact that we got here means there 494 | // was no failure. 495 | // print the Passed message, 496 | // and some performance numbers. 497 | func (cfg *config) end() { 498 | cfg.checkTimeout() 499 | if cfg.t.Failed() == false { 500 | cfg.mu.Lock() 501 | t := time.Since(cfg.t0).Seconds() // real time 502 | npeers := cfg.n // number of Raft peers 503 | nrpc := cfg.rpcTotal() - cfg.rpcs0 // number of RPC sends 504 | nbytes := cfg.bytesTotal() - cfg.bytes0 // number of bytes 505 | ncmds := cfg.maxIndex - cfg.maxIndex0 // number of Raft agreements reported 506 | cfg.mu.Unlock() 507 | 508 | fmt.Printf(" ... Passed --") 509 | fmt.Printf(" %4.1f %d %4d %7d %4d\n", t, npeers, nrpc, nbytes, ncmds) 510 | } 511 | } 512 | -------------------------------------------------------------------------------- /src/raft/peer_log_state.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import "sync" 4 | 5 | type PeerLogStates struct { 6 | NextIndex []int 7 | matchIndex []int // init 0 8 | 9 | mutex sync.Mutex 10 | } 11 | 12 | func initPeerCountIntArray(peerCount int, initVal int) []int { 13 | ret := make([]int, peerCount) 14 | for index := 0; index < peerCount; index++ { 15 | ret[index] = initVal 16 | } 17 | return ret 18 | } 19 | 20 | func NewPeerLogStates(peerCount int) PeerLogStates { 21 | return PeerLogStates{ 22 | NextIndex: initPeerCountIntArray(peerCount, 0), 23 | matchIndex: initPeerCountIntArray(peerCount, 0), 24 | } 25 | } 26 | 27 | func (pls *PeerLogStates) Lock() { 28 | pls.mutex.Lock() 29 | } 30 | 31 | func (pls *PeerLogStates) Unlock() { 32 | pls.mutex.Unlock() 33 | } 34 | 35 | func (pls *PeerLogStates) SetAllNextIndex(nextIndex int) { 36 | pls.mutex.Lock() 37 | for index := 0; index < len(pls.NextIndex); index++ { 38 | pls.NextIndex[index] = nextIndex 39 | } 40 | pls.mutex.Unlock() 41 | } 42 | 43 | func (pls *PeerLogStates) More(peerIndex int, moreNextIndex int) { 44 | pls.mutex.Lock() 45 | pls.NextIndex[peerIndex] += moreNextIndex 46 | pls.matchIndex[peerIndex] = pls.NextIndex[peerIndex] - 1 47 | pls.mutex.Unlock() 48 | } 49 | 50 | func (pls *PeerLogStates) Less(peerIndex int) { 51 | pls.mutex.Lock() 52 | pls.NextIndex[peerIndex] /= 2 53 | pls.mutex.Unlock() 54 | } 55 | -------------------------------------------------------------------------------- /src/raft/persister.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | // 4 | // support for Raft and kvraft to save persistent 5 | // Raft state (Log &c) and k/v server snapshots. 6 | // 7 | // we will use the original persister.go to test your code for grading. 8 | // so, while you can modify this code to help you debug, please 9 | // test with the original before submitting. 10 | // 11 | 12 | import "sync" 13 | 14 | type Persister struct { 15 | mu sync.Mutex 16 | raftstate []byte 17 | snapshot []byte 18 | } 19 | 20 | func MakePersister() *Persister { 21 | return &Persister{} 22 | } 23 | 24 | func (ps *Persister) Copy() *Persister { 25 | ps.mu.Lock() 26 | defer ps.mu.Unlock() 27 | np := MakePersister() 28 | np.raftstate = ps.raftstate 29 | np.snapshot = ps.snapshot 30 | return np 31 | } 32 | 33 | func (ps *Persister) SaveRaftState(state []byte) { 34 | ps.mu.Lock() 35 | defer ps.mu.Unlock() 36 | ps.raftstate = state 37 | } 38 | 39 | func (ps *Persister) ReadRaftState() []byte { 40 | ps.mu.Lock() 41 | defer ps.mu.Unlock() 42 | return ps.raftstate 43 | } 44 | 45 | func (ps *Persister) RaftStateSize() int { 46 | ps.mu.Lock() 47 | defer ps.mu.Unlock() 48 | return len(ps.raftstate) 49 | } 50 | 51 | // Save both Raft state and K/V snapshot as a single atomic action, 52 | // to help avoid them getting out of sync. 53 | func (ps *Persister) SaveStateAndSnapshot(state []byte, snapshot []byte) { 54 | ps.mu.Lock() 55 | defer ps.mu.Unlock() 56 | ps.raftstate = state 57 | ps.snapshot = snapshot 58 | } 59 | 60 | func (ps *Persister) ReadSnapshot() []byte { 61 | ps.mu.Lock() 62 | defer ps.mu.Unlock() 63 | return ps.snapshot 64 | } 65 | 66 | func (ps *Persister) SnapshotSize() int { 67 | ps.mu.Lock() 68 | defer ps.mu.Unlock() 69 | return len(ps.snapshot) 70 | } 71 | -------------------------------------------------------------------------------- /src/raft/raft_log.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | "sync" 7 | ) 8 | 9 | type LogEntry struct { 10 | Term int 11 | Command interface{} 12 | } 13 | 14 | func (le *LogEntry) Equals(entry *LogEntry) bool { 15 | return le.Term == entry.Term && le.Command == entry.Command 16 | } 17 | 18 | func (le *LogEntry) ToApplyMsg(index int, valid bool) ApplyMsg { 19 | return ApplyMsg{ 20 | Command: le.Command, 21 | CommandValid: valid, 22 | CommandIndex: index, 23 | } 24 | } 25 | 26 | type RaftLog struct { 27 | CommitIndex int 28 | lastApplied int 29 | entries []LogEntry 30 | cond *sync.Cond 31 | applyCh chan ApplyMsg 32 | 33 | raft *Raft 34 | } 35 | 36 | func initRaftLogEntries() []LogEntry { 37 | ret := make([]LogEntry, 1) 38 | ret[0].Term = -1 39 | ret[0].Command = nil 40 | return ret 41 | } 42 | 43 | func NewRaftLog(applyCh chan ApplyMsg, raft *Raft) *RaftLog { 44 | return &RaftLog{ 45 | CommitIndex: 0, 46 | lastApplied: 0, 47 | entries: initRaftLogEntries(), 48 | cond: sync.NewCond(&sync.Mutex{}), 49 | applyCh: applyCh, 50 | raft: raft, 51 | } 52 | } 53 | 54 | var dumpLock sync.Mutex 55 | 56 | func (rl *RaftLog) InfoString() string { 57 | return "commitIndex " + strconv.Itoa(rl.CommitIndex) + " lastApplied " + strconv.Itoa(rl.lastApplied) + " log length " + strconv.Itoa(rl.Length()) 58 | } 59 | 60 | func (rl *RaftLog) dump() { 61 | dumpLock.Lock() 62 | fmt.Println("dumping log", rl.Length()) 63 | fmt.Println("log length", rl.Length(), "commit index", rl.CommitIndex) 64 | //for entryIndex, entry := range rl.entries { 65 | // fmt.Printf("%v term: %v action: %v\n", entryIndex, entry.Term, entry.Command) 66 | // if entryIndex == rl.CommitIndex { 67 | // fmt.Println("----------------------------------- commit index", entryIndex) 68 | // } 69 | //} 70 | dumpLock.Unlock() 71 | } 72 | 73 | func (rl *RaftLog) Lock() { 74 | rl.cond.L.Lock() 75 | } 76 | 77 | func (rl *RaftLog) Unlock() { 78 | rl.cond.L.Unlock() 79 | } 80 | 81 | func (rl *RaftLog) Append(entries ...LogEntry) { 82 | rl.entries = append(rl.entries, entries...) 83 | } 84 | 85 | // remove all logs starting at this index 86 | func (rl *RaftLog) RemoveAt(index int) { 87 | rl.entries = rl.entries[:index] 88 | } 89 | 90 | func (rl *RaftLog) Index(index int) *LogEntry { 91 | return &rl.entries[index] 92 | } 93 | 94 | func (rl *RaftLog) LastEntry() *LogEntry { 95 | return &rl.entries[rl.Length()-1] 96 | } 97 | 98 | func (rl *RaftLog) Length() int { 99 | return len(rl.entries) 100 | } 101 | 102 | func (rl *RaftLog) firstTermIndex(beginIndex int, term int) int { 103 | for ; beginIndex > 1; beginIndex-- { 104 | if rl.entries[beginIndex-1].Term != term { 105 | return beginIndex 106 | } 107 | } 108 | return 1 109 | } 110 | 111 | func (rl *RaftLog) lastTermIndex(beginIndex int, term int) int { 112 | for ; beginIndex < rl.Length()-1; beginIndex++ { 113 | if rl.entries[beginIndex].Term == term && rl.entries[beginIndex+1].Term != term { 114 | return beginIndex + 1 115 | } 116 | } 117 | return -1 118 | } 119 | 120 | /* 121 | From now on, the methods are locked. 122 | The methods above can compose, by some outer caller, other form of methods, locking taken care of by the outer caller 123 | */ 124 | 125 | func (rl *RaftLog) ApplyWorker() { 126 | for { 127 | rl.Lock() 128 | for rl.CommitIndex <= rl.lastApplied { 129 | rl.cond.Wait() 130 | } 131 | rl.lastApplied++ 132 | rl.entries[rl.lastApplied].Apply() 133 | rl.Unlock() 134 | } 135 | } 136 | 137 | func (rl *RaftLog) UpdateLog(newEntries []LogEntry, prevLogIndex int, leaderCommit int) { 138 | rl.Lock() 139 | 140 | // update 141 | for argsEntryIndex := 0; argsEntryIndex < len(newEntries); { 142 | newEntryIndex := argsEntryIndex + prevLogIndex + 1 143 | if newEntryIndex < rl.Length() { 144 | oldEntry := &rl.entries[newEntryIndex] 145 | newEntry := &newEntries[argsEntryIndex] 146 | // existing Log 147 | // check for conflict 148 | if oldEntry.Equals(newEntry) { 149 | // consistent! 150 | rl.raft.printInfo("existing consistent entry", newEntryIndex) 151 | argsEntryIndex++ 152 | } else { 153 | // inconsistent! 154 | // delete everything after current index 155 | rl.raft.printInfo("inconsistent entry at index", newEntryIndex) 156 | rl.RemoveAt(newEntryIndex) 157 | } 158 | } else { 159 | // new Log 160 | // append everything 161 | rl.raft.printInfo("new entries at", newEntryIndex, "length", len(newEntries)-argsEntryIndex) 162 | rl.Append(newEntries[argsEntryIndex:]...) 163 | break 164 | } 165 | } 166 | // commit 167 | if leaderCommit > rl.CommitIndex { 168 | oldCommitIndex := rl.CommitIndex 169 | if leaderCommit < rl.Length()-1 { 170 | rl.CommitIndex = leaderCommit 171 | } else { 172 | rl.CommitIndex = rl.Length() - 1 173 | } 174 | for ; oldCommitIndex <= rl.CommitIndex; oldCommitIndex++ { 175 | if oldCommitIndex == 0 { 176 | continue 177 | } 178 | rl.applyCh <- rl.entries[oldCommitIndex].ToApplyMsg(oldCommitIndex, true) 179 | } 180 | } 181 | 182 | rl.Unlock() 183 | } 184 | 185 | /* 186 | Rpc Args 187 | */ 188 | func (rl *RaftLog) NewAppendEntriesArgs(nextIndex int, currentTerm int, me int) *AppendEntriesArgs { 189 | rl.cond.L.Lock() 190 | var entries []LogEntry 191 | prevLogIndex := -1 192 | prevLogTerm := -1 193 | if nextIndex >= len(rl.entries) { 194 | entries = make([]LogEntry, 0) 195 | } else { 196 | entries = rl.entries[nextIndex:] 197 | } 198 | if nextIndex != 0 { 199 | prevLogIndex = nextIndex - 1 200 | prevLogTerm = entries[prevLogIndex].Term 201 | } 202 | ret := &AppendEntriesArgs{ 203 | Term: currentTerm, 204 | LeaderId: me, 205 | LeaderCommit: rl.CommitIndex, 206 | // index of previous entry of this sending package 207 | PrevLogIndex: prevLogIndex, 208 | // term of previous entry of this sending package 209 | PrevLogTerm: prevLogTerm, 210 | // sending package 211 | Entries: entries, 212 | } 213 | rl.cond.L.Unlock() 214 | return ret 215 | } 216 | 217 | func (rl *RaftLog) NewRequestVoteArgs(currentTerm int, me int) *RequestVoteArgs { 218 | return &RequestVoteArgs{ 219 | Term: currentTerm, 220 | CandidateId: me, 221 | LastLogIndex: len(rl.entries) - 1, 222 | LastLogTerm: rl.entries[len(rl.entries)-1].Term, 223 | } 224 | } 225 | -------------------------------------------------------------------------------- /src/raft/raft_state.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | type RaftState interface { 4 | IsLeader() bool 5 | IsCandidate() bool 6 | ToString() string 7 | Run() 8 | } 9 | 10 | type RaftStateAttr struct { 11 | raft *Raft 12 | } 13 | 14 | func NewStateCommon(raft *Raft) RaftStateAttr { 15 | return RaftStateAttr{ 16 | raft: raft, 17 | } 18 | } 19 | 20 | type RaftLeader struct { 21 | RaftStateAttr 22 | } 23 | 24 | type RaftFollower struct { 25 | RaftStateAttr 26 | } 27 | 28 | type RaftCandidate struct { 29 | RaftStateAttr 30 | } 31 | 32 | /* 33 | Constructors 34 | */ 35 | func NewLeaderState(raft *Raft) *RaftLeader { 36 | return &RaftLeader{ 37 | RaftStateAttr: NewStateCommon(raft), 38 | } 39 | } 40 | 41 | func NewFollowerState(raft *Raft) *RaftFollower { 42 | return &RaftFollower{ 43 | RaftStateAttr: NewStateCommon(raft), 44 | } 45 | } 46 | 47 | func NewRaftCandidate(raft *Raft) *RaftCandidate { 48 | return &RaftCandidate{ 49 | RaftStateAttr: NewStateCommon(raft), 50 | } 51 | } 52 | 53 | /* 54 | check leader 55 | */ 56 | func (*RaftFollower) IsLeader() bool { 57 | return false 58 | } 59 | 60 | func (*RaftCandidate) IsLeader() bool { 61 | return false 62 | } 63 | 64 | func (*RaftLeader) IsLeader() bool { 65 | return true 66 | } 67 | 68 | /* 69 | check candidate 70 | */ 71 | func (*RaftFollower) IsCandidate() bool { 72 | return false 73 | } 74 | 75 | func (*RaftLeader) IsCandidate() bool { 76 | return false 77 | } 78 | 79 | func (*RaftCandidate) IsCandidate() bool { 80 | return true 81 | } 82 | 83 | /* 84 | Print 85 | */ 86 | func (leader *RaftLeader) ToString() string { 87 | return "Leader" 88 | } 89 | 90 | func (follower *RaftFollower) ToString() string { 91 | return "Follower" 92 | } 93 | 94 | func (candidate *RaftCandidate) ToString() string { 95 | return "Candidate" 96 | } 97 | 98 | /* 99 | Run roll 100 | */ 101 | // actions are ignored for inconsistent raft state 102 | func (leader *RaftLeader) Run() { 103 | leader.raft.printInfo("sending a heartbeat message to all") 104 | leader.raft.SendAppendEntriesToAll() 105 | leader.raft.printInfo("all heartbeat messages sent") 106 | leader.raft.UnlockPeerState() 107 | leader.raft.TimeParams.WaitHeartBeat() 108 | leader.raft.LockPeerState() 109 | // locking won't matter 110 | // the function exits 111 | // if the peer state is changed, the next roll would discover it 112 | } 113 | 114 | /* 115 | func (rf *Raft) runLeader() { 116 | // init NextIndex 117 | for index, _ := range rf.nextIndex { 118 | rf.nextIndex[index] = len(rf.logs) 119 | } 120 | for { 121 | // prepare an info instance 122 | info := rf.NewAsyncRpcCall(len(rf.peers)) 123 | if rf.MyState != LeaderState { 124 | return 125 | } 126 | // send heartbeat signal 127 | for peerIndex, _ := range rf.peers { 128 | if peerIndex == rf.me { 129 | continue 130 | } 131 | //fmt.Println(rf.PrefixPrint(), "sending heartbeat signal to peer", peerIndex) 132 | // send ahead logs 133 | args := rf.newAppendEntriesArgs(peerIndex) 134 | rf.sendAppendEntriesAsync(peerIndex, args, &AppendEntriesReply{}, info) 135 | } 136 | time.Sleep(time.Millisecond * time.Duration(rf.heartBeatWaitDuration)) 137 | } 138 | } 139 | */ 140 | 141 | func (follower *RaftFollower) Run() { 142 | follower.raft.printInfo("begin waiting for", follower.raft.TimeParams.heartBeatSendWait, "ms") 143 | follower.raft.UnlockPeerState() 144 | if follower.raft.TimeParams.heartBeatTimer.Wait() { 145 | follower.raft.LockPeerState() 146 | // timer expired 147 | follower.raft.printInfo("timer expired, becoming candidate") 148 | follower.raft.toCandidate() 149 | // this is the only way of being candidate 150 | // no need to worry about holding lock for this long time 151 | // other actions won't try to make this peer a leader 152 | } else { 153 | follower.raft.printInfo("timer cleared, following still peer", follower.raft.votedFor) 154 | follower.raft.LockPeerState() 155 | } 156 | } 157 | 158 | func (candidate *RaftCandidate) Run() { 159 | // release lock to allow peer state changes 160 | candidate.raft.printInfo("wait a random time then initiate an election") 161 | candidate.raft.UnlockPeerState() 162 | candidate.raft.TimeParams.WaitRandomRequestVote() 163 | candidate.raft.LockPeerState() 164 | // must check peer state for change in waiting 165 | if !candidate.IsCandidate() { 166 | return 167 | } 168 | candidate.raft.printInfo("initiate an election") 169 | candidate.raft.initiateElection() 170 | } 171 | -------------------------------------------------------------------------------- /src/raft/raft_task.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import "sync" 4 | 5 | /* 6 | A Task class should implement all methods in RaftTask interface, 7 | and inherits struct RaftTaskAttr 8 | */ 9 | 10 | type RaftTask interface { 11 | execute() 12 | 13 | WaitForDone() 14 | SetDone() 15 | } 16 | 17 | // rpc calls this and wait for return 18 | func RunTask(rt RaftTask, queue *RaftTaskQueue) { 19 | queue.Push(rt) 20 | rt.WaitForDone() 21 | } 22 | 23 | type RaftTaskAttr struct { 24 | done bool 25 | doneCond *sync.Cond 26 | 27 | raft *Raft 28 | } 29 | 30 | func NewRaftTaskAttr(raft *Raft) RaftTaskAttr { 31 | return RaftTaskAttr{ 32 | done: false, 33 | doneCond: sync.NewCond(&sync.Mutex{}), 34 | 35 | raft: raft, 36 | } 37 | } 38 | 39 | func (rtd *RaftTaskAttr) WaitForDone() { 40 | rtd.doneCond.L.Lock() 41 | for rtd.done == false { 42 | rtd.doneCond.Wait() 43 | } 44 | rtd.doneCond.L.Unlock() 45 | } 46 | 47 | func (rtd *RaftTaskAttr) SetDone() { 48 | rtd.doneCond.L.Lock() 49 | if rtd.done { 50 | panic("Task done twice...") 51 | } 52 | rtd.done = true 53 | rtd.doneCond.L.Unlock() 54 | rtd.doneCond.Broadcast() 55 | } 56 | 57 | type RaftTaskQueue struct { 58 | channel chan RaftTask 59 | } 60 | 61 | func NewRaftTaskQueue() *RaftTaskQueue { 62 | return &RaftTaskQueue{ 63 | channel: make(chan RaftTask), 64 | } 65 | } 66 | 67 | func (rtq *RaftTaskQueue) pop() RaftTask { 68 | return <-rtq.channel 69 | } 70 | 71 | func (rtq *RaftTaskQueue) RunOne() { 72 | task := rtq.pop() 73 | task.execute() 74 | task.SetDone() 75 | } 76 | 77 | func (rtq *RaftTaskQueue) Push(rt RaftTask) { 78 | rtq.channel <- rt 79 | } 80 | -------------------------------------------------------------------------------- /src/raft/raft_time.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import ( 4 | "math/rand" 5 | "time" 6 | ) 7 | 8 | type RaftTimer struct { 9 | WaitDuration int 10 | TimerCleared bool 11 | } 12 | 13 | func (rt *RaftTimer) SetClear() { 14 | rt.TimerCleared = true 15 | } 16 | 17 | func (rt *RaftTimer) Wait() bool { 18 | checkCount := 200 19 | divDuration := rt.WaitDuration / checkCount 20 | for checkIndex := 0; checkIndex < checkCount; checkIndex++ { 21 | if rt.TimerCleared { 22 | // not expired 23 | rt.TimerCleared = false 24 | return false 25 | } 26 | time.Sleep(time.Millisecond * time.Duration(divDuration)) 27 | } 28 | ret := !rt.TimerCleared 29 | rt.TimerCleared = false 30 | return ret 31 | } 32 | 33 | func NewRaftTimer(waitDuration int) RaftTimer { 34 | return RaftTimer{ 35 | WaitDuration: waitDuration, 36 | TimerCleared: false, 37 | } 38 | } 39 | 40 | type RaftTime struct { 41 | // waits 42 | heartBeatSendWait int 43 | requestVoteRandMax int 44 | 45 | // timer 46 | heartBeatTimer RaftTimer 47 | //electionTimer RaftTimer 48 | } 49 | 50 | func (rt *RaftTime) WaitHeartBeat() { 51 | time.Sleep(time.Millisecond * time.Duration(rt.heartBeatSendWait)) 52 | } 53 | 54 | func (rt *RaftTime) WaitRandomRequestVote() { 55 | time.Sleep(time.Duration(rand.Intn(rt.requestVoteRandMax))) 56 | } 57 | 58 | func NewRaftTime(heartBeatSendWait, electionRandMax, heartBeatWaitMax int) *RaftTime { 59 | return &RaftTime{ 60 | heartBeatSendWait: heartBeatSendWait, 61 | requestVoteRandMax: electionRandMax, 62 | heartBeatTimer: NewRaftTimer(heartBeatWaitMax), 63 | //electionTimer: NewRaftTimer(electionWaitMax), 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/raft/request_vote_callback.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | type RequestVoteCall struct { 4 | AsyncRpcCallAttr 5 | 6 | args *RequestVoteArgs 7 | replies []RequestVoteReply 8 | } 9 | 10 | func NewRequestVoteCall(raft *Raft, args *RequestVoteArgs) *RequestVoteCall { 11 | return &RequestVoteCall{ 12 | AsyncRpcCallAttr: raft.NewAsyncRpcCall(), 13 | args: args, 14 | replies: make([]RequestVoteReply, raft.PeerCount()), 15 | } 16 | } 17 | 18 | func (rvc *RequestVoteCall) shouldExit() bool { 19 | if rvc.MustExit { 20 | return true 21 | } 22 | return false 23 | } 24 | 25 | func (rvc *RequestVoteCall) makeRpcCall(peerIndex int) bool { 26 | rvc.raft.printInfo("sending RequestVote to peer", peerIndex) 27 | return rvc.peers[peerIndex].Call("Raft.RequestVote", rvc.args, &rvc.replies[peerIndex]) 28 | } 29 | 30 | func (rvc *RequestVoteCall) callback(peerIndex int) { 31 | rvc.raft.printInfo("RequestVote reply received from peer", peerIndex) 32 | if !rvc.raft.MyState.IsCandidate() { 33 | rvc.SetMustExit() 34 | return 35 | } 36 | reply := rvc.replies[peerIndex] 37 | if rvc.raft.tryFollowNewerTerm(peerIndex, reply.Term) { 38 | rvc.SetMustExit() 39 | return 40 | } 41 | if reply.GrantVote { 42 | rvc.raft.printInfo("vote granted by peer", peerIndex) 43 | rvc.IncrementSuccessCount() 44 | } 45 | } 46 | 47 | func (rvc *RequestVoteCall) tryEnd() bool { 48 | if rvc.SuccessCount > rvc.TotalCount/2 { 49 | rvc.raft.printInfo("#granted", rvc.SuccessCount, "in #total", rvc.TotalCount) 50 | rvc.SetMustExit() 51 | // change raft state 52 | rvc.raft.toLeader() 53 | return true 54 | } 55 | if rvc.SuccessCount+rvc.TotalCount-rvc.CurrentCount <= rvc.TotalCount/2 { 56 | rvc.SetMustExit() 57 | rvc.raft.printInfo("#granted", rvc.SuccessCount, "too few for #total - #current", rvc.TotalCount-rvc.CurrentCount) 58 | return true 59 | } 60 | if rvc.CurrentCount >= rvc.TotalCount { 61 | rvc.raft.printInfo("#granted", rvc.SuccessCount, "too few for #total", rvc.TotalCount) 62 | rvc.SetMustExit() 63 | return true 64 | } 65 | return false 66 | } 67 | 68 | /* 69 | func (rf *Raft) requestVoteCallBack(ok bool, peerIndex int, args *RequestVoteArgs, reply *RequestVoteReply, requestVoteInfo *AsyncRpcCallAttr) { 70 | if !ok { 71 | fmt.Println(rf.PrefixPrint(), "cannot reach peer", peerIndex, "when requesting a vote") 72 | } else { 73 | requestVoteInfo.IncrementAliveCount() 74 | if rf.tryFollowNewerTerm(peerIndex, reply.Term) { 75 | requestVoteInfo.SetMustExit() 76 | } else if reply.GrantVote { 77 | fmt.Println(rf.PrefixPrint(), "granted a vote by peer", peerIndex) 78 | requestVoteInfo.IncrementSuccessCount() 79 | if requestVoteInfo.SuccessCount+1 > requestVoteInfo.TotalCount/2 { 80 | // leader claimed! 81 | //fmt.Println(rf.PrefixPrint(), "got", requestVoteInfo.SuccessCount, "votes in", requestVoteInfo.AliveCount, "alive peers", requestVoteInfo.TotalCount, "total peers") 82 | rf.MyState = LeaderState 83 | rf.votedFor = rf.me 84 | //fmt.Println(rf.PrefixPrint(), "elected leader at term", rf.currentTerm) 85 | rf.setLeaderNextIndex() 86 | requestVoteInfo.SetMustExit() 87 | } 88 | } 89 | } 90 | requestVoteInfo.IncrementCurrentCount() 91 | } 92 | */ 93 | -------------------------------------------------------------------------------- /src/raft/request_vote_task.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | type RequestVoteTask struct { 4 | RaftTaskAttr 5 | args *RequestVoteArgs 6 | reply *RequestVoteReply 7 | } 8 | 9 | func (rvt *RequestVoteTask) execute() { 10 | rvt.executeRequestVoteRpc(rvt.args, rvt.reply) 11 | } 12 | 13 | func (rvt *RequestVoteTask) printThisMoreUpToDate() { 14 | rvt.raft.printInfo("this peer's log is newer") 15 | } 16 | 17 | func (rvt *RequestVoteTask) printGrantVote(peerId int) { 18 | rvt.raft.printInfo("grant vote to peer", peerId) 19 | } 20 | 21 | func (rvt *RequestVoteTask) grantVote(peerId int, reply *RequestVoteReply) { 22 | reply.GrantVote = true 23 | rvt.raft.toFollower(peerId) 24 | rvt.raft.TimeParams.heartBeatTimer.SetClear() 25 | } 26 | 27 | func (rvt *RequestVoteTask) executeRequestVoteRpc(args *RequestVoteArgs, reply *RequestVoteReply) { 28 | reply.Term = rvt.raft.currentTerm 29 | reply.GrantVote = false 30 | 31 | if args.Term < rvt.raft.currentTerm { 32 | return 33 | } 34 | if rvt.raft.tryFollowNewerTerm(args.CandidateId, args.Term) { 35 | rvt.raft.printInfo("sees newer term RequestVote from peer", args.CandidateId) 36 | rvt.grantVote(args.CandidateId, reply) 37 | return 38 | } 39 | // decide vote 40 | if rvt.raft.votedFor < 0 || rvt.raft.votedFor == args.CandidateId { 41 | // check up-to-date 42 | if rvt.raft.Log.LastEntry().Term < args.LastLogTerm { 43 | // that peer has more up-to-date Log 44 | rvt.grantVote(args.CandidateId, reply) 45 | rvt.printGrantVote(args.CandidateId) 46 | return 47 | } 48 | if rvt.raft.Log.LastEntry().Term > args.LastLogTerm { 49 | // this peer has more up-to-date Log 50 | rvt.printThisMoreUpToDate() 51 | return 52 | } 53 | // Term attribute equals, comparing length 54 | if args.LastLogIndex <= rvt.raft.Log.Length()-1 { 55 | // this peer is more up-to-date 56 | rvt.printThisMoreUpToDate() 57 | return 58 | } 59 | rvt.printGrantVote(args.CandidateId) 60 | rvt.grantVote(args.CandidateId, reply) 61 | return 62 | } 63 | } 64 | 65 | func NewRequestVoteTask(raft *Raft, args *RequestVoteArgs, reply *RequestVoteReply) *RequestVoteTask { 66 | return &RequestVoteTask{ 67 | RaftTaskAttr: NewRaftTaskAttr(raft), 68 | args: args, 69 | reply: reply, 70 | } 71 | } 72 | 73 | /* 74 | func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) { 75 | // Your code here (2A, 2B). 76 | 77 | RunTask(NewRequestVoteTask(rf, args, reply), &rf.taskQueue) 78 | return 79 | 80 | // default reply state 81 | reply.Term = rf.currentTerm 82 | reply.GrantVote = false 83 | rf.timerCleared = true 84 | 85 | if rf.tryDiscardOldTerm(args.CandidateId, args.Term) { 86 | reply.GrantVote = false 87 | return 88 | } 89 | if rf.tryFollowNewerTerm(args.CandidateId, args.Term) { 90 | reply.GrantVote = true 91 | return 92 | } 93 | 94 | // < 0 for not elected leader 95 | // == for already accepted leader 96 | if rf.votedFor < 0 || rf.votedFor == args.CandidateId { 97 | // check up-to-date 98 | if rf.logs[len(rf.logs)-1].Term > args.LastLogTerm { 99 | // this peer has more up-to-date Log 100 | return 101 | } 102 | if rf.logs[len(rf.logs)-1].Term < args.LastLogTerm { 103 | // that peer has more up-to-date Log 104 | reply.GrantVote = true 105 | return 106 | } 107 | // Term attribute equals, comparing length 108 | if args.LastLogIndex < len(rf.logs)-1 { 109 | // this peer is more up-to-date 110 | return 111 | } 112 | reply.GrantVote = true 113 | return 114 | } 115 | //fmt.Println(rf.PrefixPrint(), "with leader", rf.votedFor, "at term %v not granting vote to peer", rf.currentTerm, "at term", args.Term) 116 | } 117 | */ 118 | -------------------------------------------------------------------------------- /src/raft/rpc_args_reply.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | // 4 | // example RequestVote RPC arguments structure. 5 | // field names must start with capital letters! 6 | // 7 | type RequestVoteArgs struct { 8 | // Your data here (2A, 2B). 9 | Term int 10 | CandidateId int 11 | LastLogIndex int 12 | LastLogTerm int 13 | } 14 | 15 | // 16 | // example RequestVote RPC reply structure. 17 | // field names must start with capital letters! 18 | // 19 | type RequestVoteReply struct { 20 | // Your data here (2A). 21 | GrantVote bool 22 | Term int 23 | } 24 | 25 | type AppendEntriesArgs struct { 26 | // machine state 27 | Term int 28 | LeaderId int 29 | // Log state 30 | PrevLogIndex int 31 | PrevLogTerm int 32 | Entries []LogEntry 33 | //entries 34 | LeaderCommit int 35 | } 36 | 37 | type AppendEntriesReply struct { 38 | Term int 39 | Success bool 40 | 41 | ConflictIndex int 42 | ConflictTerm int 43 | } 44 | -------------------------------------------------------------------------------- /src/raft/rpc_data.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | type RaftRpcData struct { 4 | args interface{} 5 | reply interface{} 6 | } 7 | 8 | func (rrd *RaftRpcData) GetRequestVote() (*RequestVoteArgs, *RequestVoteReply) { 9 | return rrd.args.(*RequestVoteArgs), rrd.reply.(*RequestVoteReply) 10 | } 11 | 12 | func (rrd *RaftRpcData) GetAppendEntries() (*AppendEntriesArgs, *AppendEntriesReply) { 13 | return rrd.args.(*AppendEntriesArgs), rrd.reply.(*AppendEntriesReply) 14 | } 15 | 16 | func NewRequestVoteData(args *RequestVoteArgs, reply *RequestVoteReply) *RaftRpcData { 17 | return &RaftRpcData{ 18 | args: args, 19 | reply: reply, 20 | } 21 | } 22 | 23 | func NewAppendEntriesData(args *AppendEntriesArgs, reply *AppendEntriesReply) *RaftRpcData { 24 | return &RaftRpcData{ 25 | args: args, 26 | reply: reply, 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/raft/util.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import "log" 4 | 5 | // Debugging 6 | const Debug = 0 7 | 8 | func DPrintf(format string, a ...interface{}) (n int, err error) { 9 | if Debug > 0 { 10 | log.Printf(format, a...) 11 | } 12 | return 13 | } 14 | -------------------------------------------------------------------------------- /src/shardkv/client.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | // 4 | // client code to talk to a sharded key/value service. 5 | // 6 | // the client first talks to the shardmaster to find out 7 | // the assignment of shards (keys) to groups, and then 8 | // talks to the group that holds the key's shard. 9 | // 10 | 11 | import "../labrpc" 12 | import "crypto/rand" 13 | import "math/big" 14 | import "../shardmaster" 15 | import "time" 16 | 17 | // 18 | // which shard is a key in? 19 | // please use this function, 20 | // and please do not change it. 21 | // 22 | func key2shard(key string) int { 23 | shard := 0 24 | if len(key) > 0 { 25 | shard = int(key[0]) 26 | } 27 | shard %= shardmaster.NShards 28 | return shard 29 | } 30 | 31 | func nrand() int64 { 32 | max := big.NewInt(int64(1) << 62) 33 | bigx, _ := rand.Int(rand.Reader, max) 34 | x := bigx.Int64() 35 | return x 36 | } 37 | 38 | type Clerk struct { 39 | sm *shardmaster.Clerk 40 | config shardmaster.Config 41 | make_end func(string) *labrpc.ClientEnd 42 | // You will have to modify this struct. 43 | } 44 | 45 | // 46 | // the tester calls MakeClerk. 47 | // 48 | // masters[] is needed to call shardmaster.MakeClerk(). 49 | // 50 | // make_end(servername) turns a server name from a 51 | // Config.Groups[gid][i] into a labrpc.ClientEnd on which you can 52 | // send RPCs. 53 | // 54 | func MakeClerk(masters []*labrpc.ClientEnd, make_end func(string) *labrpc.ClientEnd) *Clerk { 55 | ck := new(Clerk) 56 | ck.sm = shardmaster.MakeClerk(masters) 57 | ck.make_end = make_end 58 | // You'll have to add code here. 59 | return ck 60 | } 61 | 62 | // 63 | // fetch the current value for a key. 64 | // returns "" if the key does not exist. 65 | // keeps trying forever in the face of all other errors. 66 | // You will have to modify this function. 67 | // 68 | func (ck *Clerk) Get(key string) string { 69 | args := GetArgs{} 70 | args.Key = key 71 | 72 | for { 73 | shard := key2shard(key) 74 | gid := ck.config.Shards[shard] 75 | if servers, ok := ck.config.Groups[gid]; ok { 76 | // try each server for the shard. 77 | for si := 0; si < len(servers); si++ { 78 | srv := ck.make_end(servers[si]) 79 | var reply GetReply 80 | ok := srv.Call("ShardKV.Get", &args, &reply) 81 | if ok && (reply.Err == OK || reply.Err == ErrNoKey) { 82 | return reply.Value 83 | } 84 | if ok && (reply.Err == ErrWrongGroup) { 85 | break 86 | } 87 | // ... not ok, or ErrWrongLeader 88 | } 89 | } 90 | time.Sleep(100 * time.Millisecond) 91 | // ask master for the latest configuration. 92 | ck.config = ck.sm.Query(-1) 93 | } 94 | 95 | return "" 96 | } 97 | 98 | // 99 | // shared by Put and Append. 100 | // You will have to modify this function. 101 | // 102 | func (ck *Clerk) PutAppend(key string, value string, op string) { 103 | args := PutAppendArgs{} 104 | args.Key = key 105 | args.Value = value 106 | args.Op = op 107 | 108 | 109 | for { 110 | shard := key2shard(key) 111 | gid := ck.config.Shards[shard] 112 | if servers, ok := ck.config.Groups[gid]; ok { 113 | for si := 0; si < len(servers); si++ { 114 | srv := ck.make_end(servers[si]) 115 | var reply PutAppendReply 116 | ok := srv.Call("ShardKV.PutAppend", &args, &reply) 117 | if ok && reply.Err == OK { 118 | return 119 | } 120 | if ok && reply.Err == ErrWrongGroup { 121 | break 122 | } 123 | // ... not ok, or ErrWrongLeader 124 | } 125 | } 126 | time.Sleep(100 * time.Millisecond) 127 | // ask master for the latest configuration. 128 | ck.config = ck.sm.Query(-1) 129 | } 130 | } 131 | 132 | func (ck *Clerk) Put(key string, value string) { 133 | ck.PutAppend(key, value, "Put") 134 | } 135 | func (ck *Clerk) Append(key string, value string) { 136 | ck.PutAppend(key, value, "Append") 137 | } 138 | -------------------------------------------------------------------------------- /src/shardkv/common.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | // 4 | // Sharded key/value server. 5 | // Lots of replica groups, each running op-at-a-time paxos. 6 | // Shardmaster decides which group serves each shard. 7 | // Shardmaster may change shard assignment from time to time. 8 | // 9 | // You will have to modify these definitions. 10 | // 11 | 12 | const ( 13 | OK = "OK" 14 | ErrNoKey = "ErrNoKey" 15 | ErrWrongGroup = "ErrWrongGroup" 16 | ErrWrongLeader = "ErrWrongLeader" 17 | ) 18 | 19 | type Err string 20 | 21 | // Put or Append 22 | type PutAppendArgs struct { 23 | // You'll have to add definitions here. 24 | Key string 25 | Value string 26 | Op string // "Put" or "Append" 27 | // You'll have to add definitions here. 28 | // Field names must start with capital letters, 29 | // otherwise RPC will break. 30 | } 31 | 32 | type PutAppendReply struct { 33 | Err Err 34 | } 35 | 36 | type GetArgs struct { 37 | Key string 38 | // You'll have to add definitions here. 39 | } 40 | 41 | type GetReply struct { 42 | Err Err 43 | Value string 44 | } 45 | -------------------------------------------------------------------------------- /src/shardkv/config.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | import "../shardmaster" 4 | import "../labrpc" 5 | import "testing" 6 | import "os" 7 | 8 | // import "log" 9 | import crand "crypto/rand" 10 | import "math/big" 11 | import "math/rand" 12 | import "encoding/base64" 13 | import "sync" 14 | import "runtime" 15 | import "../raft" 16 | import "strconv" 17 | import "fmt" 18 | import "time" 19 | 20 | func randstring(n int) string { 21 | b := make([]byte, 2*n) 22 | crand.Read(b) 23 | s := base64.URLEncoding.EncodeToString(b) 24 | return s[0:n] 25 | } 26 | 27 | func makeSeed() int64 { 28 | max := big.NewInt(int64(1) << 62) 29 | bigx, _ := crand.Int(crand.Reader, max) 30 | x := bigx.Int64() 31 | return x 32 | } 33 | 34 | // Randomize server handles 35 | func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd { 36 | sa := make([]*labrpc.ClientEnd, len(kvh)) 37 | copy(sa, kvh) 38 | for i := range sa { 39 | j := rand.Intn(i + 1) 40 | sa[i], sa[j] = sa[j], sa[i] 41 | } 42 | return sa 43 | } 44 | 45 | type group struct { 46 | gid int 47 | servers []*ShardKV 48 | saved []*raft.Persister 49 | endnames [][]string 50 | mendnames [][]string 51 | } 52 | 53 | type config struct { 54 | mu sync.Mutex 55 | t *testing.T 56 | net *labrpc.Network 57 | start time.Time // time at which make_config() was called 58 | 59 | nmasters int 60 | masterservers []*shardmaster.ShardMaster 61 | mck *shardmaster.Clerk 62 | 63 | ngroups int 64 | n int // servers per k/v group 65 | groups []*group 66 | 67 | clerks map[*Clerk][]string 68 | nextClientId int 69 | maxraftstate int 70 | } 71 | 72 | func (cfg *config) checkTimeout() { 73 | // enforce a two minute real-time limit on each test 74 | if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second { 75 | cfg.t.Fatal("test took longer than 120 seconds") 76 | } 77 | } 78 | 79 | func (cfg *config) cleanup() { 80 | for gi := 0; gi < cfg.ngroups; gi++ { 81 | cfg.ShutdownGroup(gi) 82 | } 83 | cfg.net.Cleanup() 84 | cfg.checkTimeout() 85 | } 86 | 87 | // check that no server's log is too big. 88 | func (cfg *config) checklogs() { 89 | for gi := 0; gi < cfg.ngroups; gi++ { 90 | for i := 0; i < cfg.n; i++ { 91 | raft := cfg.groups[gi].saved[i].RaftStateSize() 92 | snap := len(cfg.groups[gi].saved[i].ReadSnapshot()) 93 | if cfg.maxraftstate >= 0 && raft > 8*cfg.maxraftstate { 94 | cfg.t.Fatalf("persister.RaftStateSize() %v, but maxraftstate %v", 95 | raft, cfg.maxraftstate) 96 | } 97 | if cfg.maxraftstate < 0 && snap > 0 { 98 | cfg.t.Fatalf("maxraftstate is -1, but snapshot is non-empty!") 99 | } 100 | } 101 | } 102 | } 103 | 104 | // master server name for labrpc. 105 | func (cfg *config) mastername(i int) string { 106 | return "master" + strconv.Itoa(i) 107 | } 108 | 109 | // shard server name for labrpc. 110 | // i'th server of group gid. 111 | func (cfg *config) servername(gid int, i int) string { 112 | return "server-" + strconv.Itoa(gid) + "-" + strconv.Itoa(i) 113 | } 114 | 115 | func (cfg *config) makeClient() *Clerk { 116 | cfg.mu.Lock() 117 | defer cfg.mu.Unlock() 118 | 119 | // ClientEnds to talk to master service. 120 | ends := make([]*labrpc.ClientEnd, cfg.nmasters) 121 | endnames := make([]string, cfg.n) 122 | for j := 0; j < cfg.nmasters; j++ { 123 | endnames[j] = randstring(20) 124 | ends[j] = cfg.net.MakeEnd(endnames[j]) 125 | cfg.net.Connect(endnames[j], cfg.mastername(j)) 126 | cfg.net.Enable(endnames[j], true) 127 | } 128 | 129 | ck := MakeClerk(ends, func(servername string) *labrpc.ClientEnd { 130 | name := randstring(20) 131 | end := cfg.net.MakeEnd(name) 132 | cfg.net.Connect(name, servername) 133 | cfg.net.Enable(name, true) 134 | return end 135 | }) 136 | cfg.clerks[ck] = endnames 137 | cfg.nextClientId++ 138 | return ck 139 | } 140 | 141 | func (cfg *config) deleteClient(ck *Clerk) { 142 | cfg.mu.Lock() 143 | defer cfg.mu.Unlock() 144 | 145 | v := cfg.clerks[ck] 146 | for i := 0; i < len(v); i++ { 147 | os.Remove(v[i]) 148 | } 149 | delete(cfg.clerks, ck) 150 | } 151 | 152 | // Shutdown i'th server of gi'th group, by isolating it 153 | func (cfg *config) ShutdownServer(gi int, i int) { 154 | cfg.mu.Lock() 155 | defer cfg.mu.Unlock() 156 | 157 | gg := cfg.groups[gi] 158 | 159 | // prevent this server from sending 160 | for j := 0; j < len(gg.servers); j++ { 161 | name := gg.endnames[i][j] 162 | cfg.net.Enable(name, false) 163 | } 164 | for j := 0; j < len(gg.mendnames[i]); j++ { 165 | name := gg.mendnames[i][j] 166 | cfg.net.Enable(name, false) 167 | } 168 | 169 | // disable client connections to the server. 170 | // it's important to do this before creating 171 | // the new Persister in saved[i], to avoid 172 | // the possibility of the server returning a 173 | // positive reply to an Append but persisting 174 | // the result in the superseded Persister. 175 | cfg.net.DeleteServer(cfg.servername(gg.gid, i)) 176 | 177 | // a fresh persister, in case old instance 178 | // continues to update the Persister. 179 | // but copy old persister's content so that we always 180 | // pass Make() the last persisted state. 181 | if gg.saved[i] != nil { 182 | gg.saved[i] = gg.saved[i].Copy() 183 | } 184 | 185 | kv := gg.servers[i] 186 | if kv != nil { 187 | cfg.mu.Unlock() 188 | kv.Kill() 189 | cfg.mu.Lock() 190 | gg.servers[i] = nil 191 | } 192 | } 193 | 194 | func (cfg *config) ShutdownGroup(gi int) { 195 | for i := 0; i < cfg.n; i++ { 196 | cfg.ShutdownServer(gi, i) 197 | } 198 | } 199 | 200 | // start i'th server in gi'th group 201 | func (cfg *config) StartServer(gi int, i int) { 202 | cfg.mu.Lock() 203 | 204 | gg := cfg.groups[gi] 205 | 206 | // a fresh set of outgoing ClientEnd names 207 | // to talk to other servers in this group. 208 | gg.endnames[i] = make([]string, cfg.n) 209 | for j := 0; j < cfg.n; j++ { 210 | gg.endnames[i][j] = randstring(20) 211 | } 212 | 213 | // and the connections to other servers in this group. 214 | ends := make([]*labrpc.ClientEnd, cfg.n) 215 | for j := 0; j < cfg.n; j++ { 216 | ends[j] = cfg.net.MakeEnd(gg.endnames[i][j]) 217 | cfg.net.Connect(gg.endnames[i][j], cfg.servername(gg.gid, j)) 218 | cfg.net.Enable(gg.endnames[i][j], true) 219 | } 220 | 221 | // ends to talk to shardmaster service 222 | mends := make([]*labrpc.ClientEnd, cfg.nmasters) 223 | gg.mendnames[i] = make([]string, cfg.nmasters) 224 | for j := 0; j < cfg.nmasters; j++ { 225 | gg.mendnames[i][j] = randstring(20) 226 | mends[j] = cfg.net.MakeEnd(gg.mendnames[i][j]) 227 | cfg.net.Connect(gg.mendnames[i][j], cfg.mastername(j)) 228 | cfg.net.Enable(gg.mendnames[i][j], true) 229 | } 230 | 231 | // a fresh persister, so old instance doesn't overwrite 232 | // new instance's persisted state. 233 | // give the fresh persister a copy of the old persister's 234 | // state, so that the spec is that we pass StartKVServer() 235 | // the last persisted state. 236 | if gg.saved[i] != nil { 237 | gg.saved[i] = gg.saved[i].Copy() 238 | } else { 239 | gg.saved[i] = raft.MakePersister() 240 | } 241 | cfg.mu.Unlock() 242 | 243 | gg.servers[i] = StartServer(ends, i, gg.saved[i], cfg.maxraftstate, 244 | gg.gid, mends, 245 | func(servername string) *labrpc.ClientEnd { 246 | name := randstring(20) 247 | end := cfg.net.MakeEnd(name) 248 | cfg.net.Connect(name, servername) 249 | cfg.net.Enable(name, true) 250 | return end 251 | }) 252 | 253 | kvsvc := labrpc.MakeService(gg.servers[i]) 254 | rfsvc := labrpc.MakeService(gg.servers[i].rf) 255 | srv := labrpc.MakeServer() 256 | srv.AddService(kvsvc) 257 | srv.AddService(rfsvc) 258 | cfg.net.AddServer(cfg.servername(gg.gid, i), srv) 259 | } 260 | 261 | func (cfg *config) StartGroup(gi int) { 262 | for i := 0; i < cfg.n; i++ { 263 | cfg.StartServer(gi, i) 264 | } 265 | } 266 | 267 | func (cfg *config) StartMasterServer(i int) { 268 | // ClientEnds to talk to other master replicas. 269 | ends := make([]*labrpc.ClientEnd, cfg.nmasters) 270 | for j := 0; j < cfg.nmasters; j++ { 271 | endname := randstring(20) 272 | ends[j] = cfg.net.MakeEnd(endname) 273 | cfg.net.Connect(endname, cfg.mastername(j)) 274 | cfg.net.Enable(endname, true) 275 | } 276 | 277 | p := raft.MakePersister() 278 | 279 | cfg.masterservers[i] = shardmaster.StartServer(ends, i, p) 280 | 281 | msvc := labrpc.MakeService(cfg.masterservers[i]) 282 | rfsvc := labrpc.MakeService(cfg.masterservers[i].Raft()) 283 | srv := labrpc.MakeServer() 284 | srv.AddService(msvc) 285 | srv.AddService(rfsvc) 286 | cfg.net.AddServer(cfg.mastername(i), srv) 287 | } 288 | 289 | func (cfg *config) shardclerk() *shardmaster.Clerk { 290 | // ClientEnds to talk to master service. 291 | ends := make([]*labrpc.ClientEnd, cfg.nmasters) 292 | for j := 0; j < cfg.nmasters; j++ { 293 | name := randstring(20) 294 | ends[j] = cfg.net.MakeEnd(name) 295 | cfg.net.Connect(name, cfg.mastername(j)) 296 | cfg.net.Enable(name, true) 297 | } 298 | 299 | return shardmaster.MakeClerk(ends) 300 | } 301 | 302 | // tell the shardmaster that a group is joining. 303 | func (cfg *config) join(gi int) { 304 | cfg.joinm([]int{gi}) 305 | } 306 | 307 | func (cfg *config) joinm(gis []int) { 308 | m := make(map[int][]string, len(gis)) 309 | for _, g := range gis { 310 | gid := cfg.groups[g].gid 311 | servernames := make([]string, cfg.n) 312 | for i := 0; i < cfg.n; i++ { 313 | servernames[i] = cfg.servername(gid, i) 314 | } 315 | m[gid] = servernames 316 | } 317 | cfg.mck.Join(m) 318 | } 319 | 320 | // tell the shardmaster that a group is leaving. 321 | func (cfg *config) leave(gi int) { 322 | cfg.leavem([]int{gi}) 323 | } 324 | 325 | func (cfg *config) leavem(gis []int) { 326 | gids := make([]int, 0, len(gis)) 327 | for _, g := range gis { 328 | gids = append(gids, cfg.groups[g].gid) 329 | } 330 | cfg.mck.Leave(gids) 331 | } 332 | 333 | var ncpu_once sync.Once 334 | 335 | func make_config(t *testing.T, n int, unreliable bool, maxraftstate int) *config { 336 | ncpu_once.Do(func() { 337 | if runtime.NumCPU() < 2 { 338 | fmt.Printf("warning: only one CPU, which may conceal locking bugs\n") 339 | } 340 | rand.Seed(makeSeed()) 341 | }) 342 | runtime.GOMAXPROCS(4) 343 | cfg := &config{} 344 | cfg.t = t 345 | cfg.maxraftstate = maxraftstate 346 | cfg.net = labrpc.MakeNetwork() 347 | cfg.start = time.Now() 348 | 349 | // master 350 | cfg.nmasters = 3 351 | cfg.masterservers = make([]*shardmaster.ShardMaster, cfg.nmasters) 352 | for i := 0; i < cfg.nmasters; i++ { 353 | cfg.StartMasterServer(i) 354 | } 355 | cfg.mck = cfg.shardclerk() 356 | 357 | cfg.ngroups = 3 358 | cfg.groups = make([]*group, cfg.ngroups) 359 | cfg.n = n 360 | for gi := 0; gi < cfg.ngroups; gi++ { 361 | gg := &group{} 362 | cfg.groups[gi] = gg 363 | gg.gid = 100 + gi 364 | gg.servers = make([]*ShardKV, cfg.n) 365 | gg.saved = make([]*raft.Persister, cfg.n) 366 | gg.endnames = make([][]string, cfg.n) 367 | gg.mendnames = make([][]string, cfg.nmasters) 368 | for i := 0; i < cfg.n; i++ { 369 | cfg.StartServer(gi, i) 370 | } 371 | } 372 | 373 | cfg.clerks = make(map[*Clerk][]string) 374 | cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid 375 | 376 | cfg.net.Reliable(!unreliable) 377 | 378 | return cfg 379 | } 380 | -------------------------------------------------------------------------------- /src/shardkv/server.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | 4 | // import "../shardmaster" 5 | import "../labrpc" 6 | import "../raft" 7 | import "sync" 8 | import "../labgob" 9 | 10 | 11 | 12 | type Op struct { 13 | // Your definitions here. 14 | // Field names must start with capital letters, 15 | // otherwise RPC will break. 16 | } 17 | 18 | type ShardKV struct { 19 | mu sync.Mutex 20 | me int 21 | rf *raft.Raft 22 | applyCh chan raft.ApplyMsg 23 | make_end func(string) *labrpc.ClientEnd 24 | gid int 25 | masters []*labrpc.ClientEnd 26 | maxraftstate int // snapshot if log grows this big 27 | 28 | // Your definitions here. 29 | } 30 | 31 | 32 | func (kv *ShardKV) Get(args *GetArgs, reply *GetReply) { 33 | // Your code here. 34 | } 35 | 36 | func (kv *ShardKV) PutAppend(args *PutAppendArgs, reply *PutAppendReply) { 37 | // Your code here. 38 | } 39 | 40 | // 41 | // the tester calls Kill() when a ShardKV instance won't 42 | // be needed again. you are not required to do anything 43 | // in Kill(), but it might be convenient to (for example) 44 | // turn off debug output from this instance. 45 | // 46 | func (kv *ShardKV) Kill() { 47 | kv.rf.Kill() 48 | // Your code here, if desired. 49 | } 50 | 51 | 52 | // 53 | // servers[] contains the ports of the servers in this group. 54 | // 55 | // me is the index of the current server in servers[]. 56 | // 57 | // the k/v server should store snapshots through the underlying Raft 58 | // implementation, which should call persister.SaveStateAndSnapshot() to 59 | // atomically save the Raft state along with the snapshot. 60 | // 61 | // the k/v server should snapshot when Raft's saved state exceeds 62 | // maxraftstate bytes, in order to allow Raft to garbage-collect its 63 | // log. if maxraftstate is -1, you don't need to snapshot. 64 | // 65 | // gid is this group's GID, for interacting with the shardmaster. 66 | // 67 | // pass masters[] to shardmaster.MakeClerk() so you can send 68 | // RPCs to the shardmaster. 69 | // 70 | // make_end(servername) turns a server name from a 71 | // Config.Groups[gid][i] into a labrpc.ClientEnd on which you can 72 | // send RPCs. You'll need this to send RPCs to other groups. 73 | // 74 | // look at client.go for examples of how to use masters[] 75 | // and make_end() to send RPCs to the group owning a specific shard. 76 | // 77 | // StartServer() must return quickly, so it should start goroutines 78 | // for any long-running work. 79 | // 80 | func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int, gid int, masters []*labrpc.ClientEnd, make_end func(string) *labrpc.ClientEnd) *ShardKV { 81 | // call labgob.Register on structures you want 82 | // Go's RPC library to marshall/unmarshall. 83 | labgob.Register(Op{}) 84 | 85 | kv := new(ShardKV) 86 | kv.me = me 87 | kv.maxraftstate = maxraftstate 88 | kv.make_end = make_end 89 | kv.gid = gid 90 | kv.masters = masters 91 | 92 | // Your initialization code here. 93 | 94 | // Use something like this to talk to the shardmaster: 95 | // kv.mck = shardmaster.MakeClerk(kv.masters) 96 | 97 | kv.applyCh = make(chan raft.ApplyMsg) 98 | kv.rf = raft.Make(servers, me, persister, kv.applyCh) 99 | 100 | 101 | return kv 102 | } 103 | -------------------------------------------------------------------------------- /src/shardmaster/client.go: -------------------------------------------------------------------------------- 1 | package shardmaster 2 | 3 | // 4 | // Shardmaster clerk. 5 | // 6 | 7 | import "../labrpc" 8 | import "time" 9 | import "crypto/rand" 10 | import "math/big" 11 | 12 | type Clerk struct { 13 | servers []*labrpc.ClientEnd 14 | // Your data here. 15 | } 16 | 17 | func nrand() int64 { 18 | max := big.NewInt(int64(1) << 62) 19 | bigx, _ := rand.Int(rand.Reader, max) 20 | x := bigx.Int64() 21 | return x 22 | } 23 | 24 | func MakeClerk(servers []*labrpc.ClientEnd) *Clerk { 25 | ck := new(Clerk) 26 | ck.servers = servers 27 | // Your code here. 28 | return ck 29 | } 30 | 31 | func (ck *Clerk) Query(num int) Config { 32 | args := &QueryArgs{} 33 | // Your code here. 34 | args.Num = num 35 | for { 36 | // try each known server. 37 | for _, srv := range ck.servers { 38 | var reply QueryReply 39 | ok := srv.Call("ShardMaster.Query", args, &reply) 40 | if ok && reply.WrongLeader == false { 41 | return reply.Config 42 | } 43 | } 44 | time.Sleep(100 * time.Millisecond) 45 | } 46 | } 47 | 48 | func (ck *Clerk) Join(servers map[int][]string) { 49 | args := &JoinArgs{} 50 | // Your code here. 51 | args.Servers = servers 52 | 53 | for { 54 | // try each known server. 55 | for _, srv := range ck.servers { 56 | var reply JoinReply 57 | ok := srv.Call("ShardMaster.Join", args, &reply) 58 | if ok && reply.WrongLeader == false { 59 | return 60 | } 61 | } 62 | time.Sleep(100 * time.Millisecond) 63 | } 64 | } 65 | 66 | func (ck *Clerk) Leave(gids []int) { 67 | args := &LeaveArgs{} 68 | // Your code here. 69 | args.GIDs = gids 70 | 71 | for { 72 | // try each known server. 73 | for _, srv := range ck.servers { 74 | var reply LeaveReply 75 | ok := srv.Call("ShardMaster.Leave", args, &reply) 76 | if ok && reply.WrongLeader == false { 77 | return 78 | } 79 | } 80 | time.Sleep(100 * time.Millisecond) 81 | } 82 | } 83 | 84 | func (ck *Clerk) Move(shard int, gid int) { 85 | args := &MoveArgs{} 86 | // Your code here. 87 | args.Shard = shard 88 | args.GID = gid 89 | 90 | for { 91 | // try each known server. 92 | for _, srv := range ck.servers { 93 | var reply MoveReply 94 | ok := srv.Call("ShardMaster.Move", args, &reply) 95 | if ok && reply.WrongLeader == false { 96 | return 97 | } 98 | } 99 | time.Sleep(100 * time.Millisecond) 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /src/shardmaster/common.go: -------------------------------------------------------------------------------- 1 | package shardmaster 2 | 3 | // 4 | // Master shard server: assigns shards to replication groups. 5 | // 6 | // RPC interface: 7 | // Join(servers) -- add a set of groups (gid -> server-list mapping). 8 | // Leave(gids) -- delete a set of groups. 9 | // Move(shard, gid) -- hand off one shard from current owner to gid. 10 | // Query(num) -> fetch Config # num, or latest config if num==-1. 11 | // 12 | // A Config (configuration) describes a set of replica groups, and the 13 | // replica group responsible for each shard. Configs are numbered. Config 14 | // #0 is the initial configuration, with no groups and all shards 15 | // assigned to group 0 (the invalid group). 16 | // 17 | // You will need to add fields to the RPC argument structs. 18 | // 19 | 20 | // The number of shards. 21 | const NShards = 10 22 | 23 | // A configuration -- an assignment of shards to groups. 24 | // Please don't change this. 25 | type Config struct { 26 | Num int // config number 27 | Shards [NShards]int // shard -> gid 28 | Groups map[int][]string // gid -> servers[] 29 | } 30 | 31 | const ( 32 | OK = "OK" 33 | ) 34 | 35 | type Err string 36 | 37 | type JoinArgs struct { 38 | Servers map[int][]string // new GID -> servers mappings 39 | } 40 | 41 | type JoinReply struct { 42 | WrongLeader bool 43 | Err Err 44 | } 45 | 46 | type LeaveArgs struct { 47 | GIDs []int 48 | } 49 | 50 | type LeaveReply struct { 51 | WrongLeader bool 52 | Err Err 53 | } 54 | 55 | type MoveArgs struct { 56 | Shard int 57 | GID int 58 | } 59 | 60 | type MoveReply struct { 61 | WrongLeader bool 62 | Err Err 63 | } 64 | 65 | type QueryArgs struct { 66 | Num int // desired config number 67 | } 68 | 69 | type QueryReply struct { 70 | WrongLeader bool 71 | Err Err 72 | Config Config 73 | } 74 | -------------------------------------------------------------------------------- /src/shardmaster/config.go: -------------------------------------------------------------------------------- 1 | package shardmaster 2 | 3 | import "../labrpc" 4 | import "../raft" 5 | import "testing" 6 | import "os" 7 | 8 | // import "log" 9 | import crand "crypto/rand" 10 | import "math/rand" 11 | import "encoding/base64" 12 | import "sync" 13 | import "runtime" 14 | import "time" 15 | 16 | func randstring(n int) string { 17 | b := make([]byte, 2*n) 18 | crand.Read(b) 19 | s := base64.URLEncoding.EncodeToString(b) 20 | return s[0:n] 21 | } 22 | 23 | // Randomize server handles 24 | func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd { 25 | sa := make([]*labrpc.ClientEnd, len(kvh)) 26 | copy(sa, kvh) 27 | for i := range sa { 28 | j := rand.Intn(i + 1) 29 | sa[i], sa[j] = sa[j], sa[i] 30 | } 31 | return sa 32 | } 33 | 34 | type config struct { 35 | mu sync.Mutex 36 | t *testing.T 37 | net *labrpc.Network 38 | n int 39 | servers []*ShardMaster 40 | saved []*raft.Persister 41 | endnames [][]string // names of each server's sending ClientEnds 42 | clerks map[*Clerk][]string 43 | nextClientId int 44 | start time.Time // time at which make_config() was called 45 | } 46 | 47 | func (cfg *config) checkTimeout() { 48 | // enforce a two minute real-time limit on each test 49 | if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second { 50 | cfg.t.Fatal("test took longer than 120 seconds") 51 | } 52 | } 53 | 54 | func (cfg *config) cleanup() { 55 | cfg.mu.Lock() 56 | defer cfg.mu.Unlock() 57 | for i := 0; i < len(cfg.servers); i++ { 58 | if cfg.servers[i] != nil { 59 | cfg.servers[i].Kill() 60 | } 61 | } 62 | cfg.net.Cleanup() 63 | cfg.checkTimeout() 64 | } 65 | 66 | // Maximum log size across all servers 67 | func (cfg *config) LogSize() int { 68 | logsize := 0 69 | for i := 0; i < cfg.n; i++ { 70 | n := cfg.saved[i].RaftStateSize() 71 | if n > logsize { 72 | logsize = n 73 | } 74 | } 75 | return logsize 76 | } 77 | 78 | // attach server i to servers listed in to 79 | // caller must hold cfg.mu 80 | func (cfg *config) connectUnlocked(i int, to []int) { 81 | // log.Printf("connect peer %d to %v\n", i, to) 82 | 83 | // outgoing socket files 84 | for j := 0; j < len(to); j++ { 85 | endname := cfg.endnames[i][to[j]] 86 | cfg.net.Enable(endname, true) 87 | } 88 | 89 | // incoming socket files 90 | for j := 0; j < len(to); j++ { 91 | endname := cfg.endnames[to[j]][i] 92 | cfg.net.Enable(endname, true) 93 | } 94 | } 95 | 96 | func (cfg *config) connect(i int, to []int) { 97 | cfg.mu.Lock() 98 | defer cfg.mu.Unlock() 99 | cfg.connectUnlocked(i, to) 100 | } 101 | 102 | // detach server i from the servers listed in from 103 | // caller must hold cfg.mu 104 | func (cfg *config) disconnectUnlocked(i int, from []int) { 105 | // log.Printf("disconnect peer %d from %v\n", i, from) 106 | 107 | // outgoing socket files 108 | for j := 0; j < len(from); j++ { 109 | if cfg.endnames[i] != nil { 110 | endname := cfg.endnames[i][from[j]] 111 | cfg.net.Enable(endname, false) 112 | } 113 | } 114 | 115 | // incoming socket files 116 | for j := 0; j < len(from); j++ { 117 | if cfg.endnames[j] != nil { 118 | endname := cfg.endnames[from[j]][i] 119 | cfg.net.Enable(endname, false) 120 | } 121 | } 122 | } 123 | 124 | func (cfg *config) disconnect(i int, from []int) { 125 | cfg.mu.Lock() 126 | defer cfg.mu.Unlock() 127 | cfg.disconnectUnlocked(i, from) 128 | } 129 | 130 | func (cfg *config) All() []int { 131 | all := make([]int, cfg.n) 132 | for i := 0; i < cfg.n; i++ { 133 | all[i] = i 134 | } 135 | return all 136 | } 137 | 138 | func (cfg *config) ConnectAll() { 139 | cfg.mu.Lock() 140 | defer cfg.mu.Unlock() 141 | for i := 0; i < cfg.n; i++ { 142 | cfg.connectUnlocked(i, cfg.All()) 143 | } 144 | } 145 | 146 | // Sets up 2 partitions with connectivity between servers in each partition. 147 | func (cfg *config) partition(p1 []int, p2 []int) { 148 | cfg.mu.Lock() 149 | defer cfg.mu.Unlock() 150 | // log.Printf("partition servers into: %v %v\n", p1, p2) 151 | for i := 0; i < len(p1); i++ { 152 | cfg.disconnectUnlocked(p1[i], p2) 153 | cfg.connectUnlocked(p1[i], p1) 154 | } 155 | for i := 0; i < len(p2); i++ { 156 | cfg.disconnectUnlocked(p2[i], p1) 157 | cfg.connectUnlocked(p2[i], p2) 158 | } 159 | } 160 | 161 | // Create a clerk with clerk specific server names. 162 | // Give it connections to all of the servers, but for 163 | // now enable only connections to servers in to[]. 164 | func (cfg *config) makeClient(to []int) *Clerk { 165 | cfg.mu.Lock() 166 | defer cfg.mu.Unlock() 167 | 168 | // a fresh set of ClientEnds. 169 | ends := make([]*labrpc.ClientEnd, cfg.n) 170 | endnames := make([]string, cfg.n) 171 | for j := 0; j < cfg.n; j++ { 172 | endnames[j] = randstring(20) 173 | ends[j] = cfg.net.MakeEnd(endnames[j]) 174 | cfg.net.Connect(endnames[j], j) 175 | } 176 | 177 | ck := MakeClerk(random_handles(ends)) 178 | cfg.clerks[ck] = endnames 179 | cfg.nextClientId++ 180 | cfg.ConnectClientUnlocked(ck, to) 181 | return ck 182 | } 183 | 184 | func (cfg *config) deleteClient(ck *Clerk) { 185 | cfg.mu.Lock() 186 | defer cfg.mu.Unlock() 187 | 188 | v := cfg.clerks[ck] 189 | for i := 0; i < len(v); i++ { 190 | os.Remove(v[i]) 191 | } 192 | delete(cfg.clerks, ck) 193 | } 194 | 195 | // caller should hold cfg.mu 196 | func (cfg *config) ConnectClientUnlocked(ck *Clerk, to []int) { 197 | // log.Printf("ConnectClient %v to %v\n", ck, to) 198 | endnames := cfg.clerks[ck] 199 | for j := 0; j < len(to); j++ { 200 | s := endnames[to[j]] 201 | cfg.net.Enable(s, true) 202 | } 203 | } 204 | 205 | func (cfg *config) ConnectClient(ck *Clerk, to []int) { 206 | cfg.mu.Lock() 207 | defer cfg.mu.Unlock() 208 | cfg.ConnectClientUnlocked(ck, to) 209 | } 210 | 211 | // caller should hold cfg.mu 212 | func (cfg *config) DisconnectClientUnlocked(ck *Clerk, from []int) { 213 | // log.Printf("DisconnectClient %v from %v\n", ck, from) 214 | endnames := cfg.clerks[ck] 215 | for j := 0; j < len(from); j++ { 216 | s := endnames[from[j]] 217 | cfg.net.Enable(s, false) 218 | } 219 | } 220 | 221 | func (cfg *config) DisconnectClient(ck *Clerk, from []int) { 222 | cfg.mu.Lock() 223 | defer cfg.mu.Unlock() 224 | cfg.DisconnectClientUnlocked(ck, from) 225 | } 226 | 227 | // Shutdown a server by isolating it 228 | func (cfg *config) ShutdownServer(i int) { 229 | cfg.mu.Lock() 230 | defer cfg.mu.Unlock() 231 | 232 | cfg.disconnectUnlocked(i, cfg.All()) 233 | 234 | // disable client connections to the server. 235 | // it's important to do this before creating 236 | // the new Persister in saved[i], to avoid 237 | // the possibility of the server returning a 238 | // positive reply to an Append but persisting 239 | // the result in the superseded Persister. 240 | cfg.net.DeleteServer(i) 241 | 242 | // a fresh persister, in case old instance 243 | // continues to update the Persister. 244 | // but copy old persister's content so that we always 245 | // pass Make() the last persisted state. 246 | if cfg.saved[i] != nil { 247 | cfg.saved[i] = cfg.saved[i].Copy() 248 | } 249 | 250 | kv := cfg.servers[i] 251 | if kv != nil { 252 | cfg.mu.Unlock() 253 | kv.Kill() 254 | cfg.mu.Lock() 255 | cfg.servers[i] = nil 256 | } 257 | } 258 | 259 | // If restart servers, first call ShutdownServer 260 | func (cfg *config) StartServer(i int) { 261 | cfg.mu.Lock() 262 | 263 | // a fresh set of outgoing ClientEnd names. 264 | cfg.endnames[i] = make([]string, cfg.n) 265 | for j := 0; j < cfg.n; j++ { 266 | cfg.endnames[i][j] = randstring(20) 267 | } 268 | 269 | // a fresh set of ClientEnds. 270 | ends := make([]*labrpc.ClientEnd, cfg.n) 271 | for j := 0; j < cfg.n; j++ { 272 | ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j]) 273 | cfg.net.Connect(cfg.endnames[i][j], j) 274 | } 275 | 276 | // a fresh persister, so old instance doesn't overwrite 277 | // new instance's persisted state. 278 | // give the fresh persister a copy of the old persister's 279 | // state, so that the spec is that we pass StartKVServer() 280 | // the last persisted state. 281 | if cfg.saved[i] != nil { 282 | cfg.saved[i] = cfg.saved[i].Copy() 283 | } else { 284 | cfg.saved[i] = raft.MakePersister() 285 | } 286 | 287 | cfg.mu.Unlock() 288 | 289 | cfg.servers[i] = StartServer(ends, i, cfg.saved[i]) 290 | 291 | kvsvc := labrpc.MakeService(cfg.servers[i]) 292 | rfsvc := labrpc.MakeService(cfg.servers[i].rf) 293 | srv := labrpc.MakeServer() 294 | srv.AddService(kvsvc) 295 | srv.AddService(rfsvc) 296 | cfg.net.AddServer(i, srv) 297 | } 298 | 299 | func (cfg *config) Leader() (bool, int) { 300 | cfg.mu.Lock() 301 | defer cfg.mu.Unlock() 302 | 303 | for i := 0; i < cfg.n; i++ { 304 | _, is_leader := cfg.servers[i].rf.GetState() 305 | if is_leader { 306 | return true, i 307 | } 308 | } 309 | return false, 0 310 | } 311 | 312 | // Partition servers into 2 groups and put current leader in minority 313 | func (cfg *config) make_partition() ([]int, []int) { 314 | _, l := cfg.Leader() 315 | p1 := make([]int, cfg.n/2+1) 316 | p2 := make([]int, cfg.n/2) 317 | j := 0 318 | for i := 0; i < cfg.n; i++ { 319 | if i != l { 320 | if j < len(p1) { 321 | p1[j] = i 322 | } else { 323 | p2[j-len(p1)] = i 324 | } 325 | j++ 326 | } 327 | } 328 | p2[len(p2)-1] = l 329 | return p1, p2 330 | } 331 | 332 | func make_config(t *testing.T, n int, unreliable bool) *config { 333 | runtime.GOMAXPROCS(4) 334 | cfg := &config{} 335 | cfg.t = t 336 | cfg.net = labrpc.MakeNetwork() 337 | cfg.n = n 338 | cfg.servers = make([]*ShardMaster, cfg.n) 339 | cfg.saved = make([]*raft.Persister, cfg.n) 340 | cfg.endnames = make([][]string, cfg.n) 341 | cfg.clerks = make(map[*Clerk][]string) 342 | cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid 343 | cfg.start = time.Now() 344 | 345 | // create a full set of KV servers. 346 | for i := 0; i < cfg.n; i++ { 347 | cfg.StartServer(i) 348 | } 349 | 350 | cfg.ConnectAll() 351 | 352 | cfg.net.Reliable(!unreliable) 353 | 354 | return cfg 355 | } 356 | -------------------------------------------------------------------------------- /src/shardmaster/server.go: -------------------------------------------------------------------------------- 1 | package shardmaster 2 | 3 | 4 | import "../raft" 5 | import "../labrpc" 6 | import "sync" 7 | import "../labgob" 8 | 9 | 10 | type ShardMaster struct { 11 | mu sync.Mutex 12 | me int 13 | rf *raft.Raft 14 | applyCh chan raft.ApplyMsg 15 | 16 | // Your data here. 17 | 18 | configs []Config // indexed by config num 19 | } 20 | 21 | 22 | type Op struct { 23 | // Your data here. 24 | } 25 | 26 | 27 | func (sm *ShardMaster) Join(args *JoinArgs, reply *JoinReply) { 28 | // Your code here. 29 | } 30 | 31 | func (sm *ShardMaster) Leave(args *LeaveArgs, reply *LeaveReply) { 32 | // Your code here. 33 | } 34 | 35 | func (sm *ShardMaster) Move(args *MoveArgs, reply *MoveReply) { 36 | // Your code here. 37 | } 38 | 39 | func (sm *ShardMaster) Query(args *QueryArgs, reply *QueryReply) { 40 | // Your code here. 41 | } 42 | 43 | 44 | // 45 | // the tester calls Kill() when a ShardMaster instance won't 46 | // be needed again. you are not required to do anything 47 | // in Kill(), but it might be convenient to (for example) 48 | // turn off debug output from this instance. 49 | // 50 | func (sm *ShardMaster) Kill() { 51 | sm.rf.Kill() 52 | // Your code here, if desired. 53 | } 54 | 55 | // needed by shardkv tester 56 | func (sm *ShardMaster) Raft() *raft.Raft { 57 | return sm.rf 58 | } 59 | 60 | // 61 | // servers[] contains the ports of the set of 62 | // servers that will cooperate via Paxos to 63 | // form the fault-tolerant shardmaster service. 64 | // me is the index of the current server in servers[]. 65 | // 66 | func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister) *ShardMaster { 67 | sm := new(ShardMaster) 68 | sm.me = me 69 | 70 | sm.configs = make([]Config, 1) 71 | sm.configs[0].Groups = map[int][]string{} 72 | 73 | labgob.Register(Op{}) 74 | sm.applyCh = make(chan raft.ApplyMsg) 75 | sm.rf = raft.Make(servers, me, persister, sm.applyCh) 76 | 77 | // Your code here. 78 | 79 | return sm 80 | } 81 | -------------------------------------------------------------------------------- /src/shardmaster/test_test.go: -------------------------------------------------------------------------------- 1 | package shardmaster 2 | 3 | import ( 4 | "sync" 5 | "testing" 6 | ) 7 | 8 | // import "time" 9 | import "fmt" 10 | 11 | func check(t *testing.T, groups []int, ck *Clerk) { 12 | c := ck.Query(-1) 13 | if len(c.Groups) != len(groups) { 14 | t.Fatalf("wanted %v groups, got %v", len(groups), len(c.Groups)) 15 | } 16 | 17 | // are the groups as expected? 18 | for _, g := range groups { 19 | _, ok := c.Groups[g] 20 | if ok != true { 21 | t.Fatalf("missing group %v", g) 22 | } 23 | } 24 | 25 | // any un-allocated shards? 26 | if len(groups) > 0 { 27 | for s, g := range c.Shards { 28 | _, ok := c.Groups[g] 29 | if ok == false { 30 | t.Fatalf("shard %v -> invalid group %v", s, g) 31 | } 32 | } 33 | } 34 | 35 | // more or less balanced sharding? 36 | counts := map[int]int{} 37 | for _, g := range c.Shards { 38 | counts[g] += 1 39 | } 40 | min := 257 41 | max := 0 42 | for g, _ := range c.Groups { 43 | if counts[g] > max { 44 | max = counts[g] 45 | } 46 | if counts[g] < min { 47 | min = counts[g] 48 | } 49 | } 50 | if max > min+1 { 51 | t.Fatalf("max %v too much larger than min %v", max, min) 52 | } 53 | } 54 | 55 | func check_same_config(t *testing.T, c1 Config, c2 Config) { 56 | if c1.Num != c2.Num { 57 | t.Fatalf("Num wrong") 58 | } 59 | if c1.Shards != c2.Shards { 60 | t.Fatalf("Shards wrong") 61 | } 62 | if len(c1.Groups) != len(c2.Groups) { 63 | t.Fatalf("number of Groups is wrong") 64 | } 65 | for gid, sa := range c1.Groups { 66 | sa1, ok := c2.Groups[gid] 67 | if ok == false || len(sa1) != len(sa) { 68 | t.Fatalf("len(Groups) wrong") 69 | } 70 | if ok && len(sa1) == len(sa) { 71 | for j := 0; j < len(sa); j++ { 72 | if sa[j] != sa1[j] { 73 | t.Fatalf("Groups wrong") 74 | } 75 | } 76 | } 77 | } 78 | } 79 | 80 | func TestBasic(t *testing.T) { 81 | const nservers = 3 82 | cfg := make_config(t, nservers, false) 83 | defer cfg.cleanup() 84 | 85 | ck := cfg.makeClient(cfg.All()) 86 | 87 | fmt.Printf("Test: Basic leave/join ...\n") 88 | 89 | cfa := make([]Config, 6) 90 | cfa[0] = ck.Query(-1) 91 | 92 | check(t, []int{}, ck) 93 | 94 | var gid1 int = 1 95 | ck.Join(map[int][]string{gid1: []string{"x", "y", "z"}}) 96 | check(t, []int{gid1}, ck) 97 | cfa[1] = ck.Query(-1) 98 | 99 | var gid2 int = 2 100 | ck.Join(map[int][]string{gid2: []string{"a", "b", "c"}}) 101 | check(t, []int{gid1, gid2}, ck) 102 | cfa[2] = ck.Query(-1) 103 | 104 | cfx := ck.Query(-1) 105 | sa1 := cfx.Groups[gid1] 106 | if len(sa1) != 3 || sa1[0] != "x" || sa1[1] != "y" || sa1[2] != "z" { 107 | t.Fatalf("wrong servers for gid %v: %v\n", gid1, sa1) 108 | } 109 | sa2 := cfx.Groups[gid2] 110 | if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" { 111 | t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2) 112 | } 113 | 114 | ck.Leave([]int{gid1}) 115 | check(t, []int{gid2}, ck) 116 | cfa[4] = ck.Query(-1) 117 | 118 | ck.Leave([]int{gid2}) 119 | cfa[5] = ck.Query(-1) 120 | 121 | fmt.Printf(" ... Passed\n") 122 | 123 | fmt.Printf("Test: Historical queries ...\n") 124 | 125 | for s := 0; s < nservers; s++ { 126 | cfg.ShutdownServer(s) 127 | for i := 0; i < len(cfa); i++ { 128 | c := ck.Query(cfa[i].Num) 129 | check_same_config(t, c, cfa[i]) 130 | } 131 | cfg.StartServer(s) 132 | cfg.ConnectAll() 133 | } 134 | 135 | fmt.Printf(" ... Passed\n") 136 | 137 | fmt.Printf("Test: Move ...\n") 138 | { 139 | var gid3 int = 503 140 | ck.Join(map[int][]string{gid3: []string{"3a", "3b", "3c"}}) 141 | var gid4 int = 504 142 | ck.Join(map[int][]string{gid4: []string{"4a", "4b", "4c"}}) 143 | for i := 0; i < NShards; i++ { 144 | cf := ck.Query(-1) 145 | if i < NShards/2 { 146 | ck.Move(i, gid3) 147 | if cf.Shards[i] != gid3 { 148 | cf1 := ck.Query(-1) 149 | if cf1.Num <= cf.Num { 150 | t.Fatalf("Move should increase Config.Num") 151 | } 152 | } 153 | } else { 154 | ck.Move(i, gid4) 155 | if cf.Shards[i] != gid4 { 156 | cf1 := ck.Query(-1) 157 | if cf1.Num <= cf.Num { 158 | t.Fatalf("Move should increase Config.Num") 159 | } 160 | } 161 | } 162 | } 163 | cf2 := ck.Query(-1) 164 | for i := 0; i < NShards; i++ { 165 | if i < NShards/2 { 166 | if cf2.Shards[i] != gid3 { 167 | t.Fatalf("expected shard %v on gid %v actually %v", 168 | i, gid3, cf2.Shards[i]) 169 | } 170 | } else { 171 | if cf2.Shards[i] != gid4 { 172 | t.Fatalf("expected shard %v on gid %v actually %v", 173 | i, gid4, cf2.Shards[i]) 174 | } 175 | } 176 | } 177 | ck.Leave([]int{gid3}) 178 | ck.Leave([]int{gid4}) 179 | } 180 | fmt.Printf(" ... Passed\n") 181 | 182 | fmt.Printf("Test: Concurrent leave/join ...\n") 183 | 184 | const npara = 10 185 | var cka [npara]*Clerk 186 | for i := 0; i < len(cka); i++ { 187 | cka[i] = cfg.makeClient(cfg.All()) 188 | } 189 | gids := make([]int, npara) 190 | ch := make(chan bool) 191 | for xi := 0; xi < npara; xi++ { 192 | gids[xi] = int((xi * 10) + 100) 193 | go func(i int) { 194 | defer func() { ch <- true }() 195 | var gid int = gids[i] 196 | var sid1 = fmt.Sprintf("s%da", gid) 197 | var sid2 = fmt.Sprintf("s%db", gid) 198 | cka[i].Join(map[int][]string{gid + 1000: []string{sid1}}) 199 | cka[i].Join(map[int][]string{gid: []string{sid2}}) 200 | cka[i].Leave([]int{gid + 1000}) 201 | }(xi) 202 | } 203 | for i := 0; i < npara; i++ { 204 | <-ch 205 | } 206 | check(t, gids, ck) 207 | 208 | fmt.Printf(" ... Passed\n") 209 | 210 | fmt.Printf("Test: Minimal transfers after joins ...\n") 211 | 212 | c1 := ck.Query(-1) 213 | for i := 0; i < 5; i++ { 214 | var gid = int(npara + 1 + i) 215 | ck.Join(map[int][]string{gid: []string{ 216 | fmt.Sprintf("%da", gid), 217 | fmt.Sprintf("%db", gid), 218 | fmt.Sprintf("%db", gid)}}) 219 | } 220 | c2 := ck.Query(-1) 221 | for i := int(1); i <= npara; i++ { 222 | for j := 0; j < len(c1.Shards); j++ { 223 | if c2.Shards[j] == i { 224 | if c1.Shards[j] != i { 225 | t.Fatalf("non-minimal transfer after Join()s") 226 | } 227 | } 228 | } 229 | } 230 | 231 | fmt.Printf(" ... Passed\n") 232 | 233 | fmt.Printf("Test: Minimal transfers after leaves ...\n") 234 | 235 | for i := 0; i < 5; i++ { 236 | ck.Leave([]int{int(npara + 1 + i)}) 237 | } 238 | c3 := ck.Query(-1) 239 | for i := int(1); i <= npara; i++ { 240 | for j := 0; j < len(c1.Shards); j++ { 241 | if c2.Shards[j] == i { 242 | if c3.Shards[j] != i { 243 | t.Fatalf("non-minimal transfer after Leave()s") 244 | } 245 | } 246 | } 247 | } 248 | 249 | fmt.Printf(" ... Passed\n") 250 | } 251 | 252 | func TestMulti(t *testing.T) { 253 | const nservers = 3 254 | cfg := make_config(t, nservers, false) 255 | defer cfg.cleanup() 256 | 257 | ck := cfg.makeClient(cfg.All()) 258 | 259 | fmt.Printf("Test: Multi-group join/leave ...\n") 260 | 261 | cfa := make([]Config, 6) 262 | cfa[0] = ck.Query(-1) 263 | 264 | check(t, []int{}, ck) 265 | 266 | var gid1 int = 1 267 | var gid2 int = 2 268 | ck.Join(map[int][]string{ 269 | gid1: []string{"x", "y", "z"}, 270 | gid2: []string{"a", "b", "c"}, 271 | }) 272 | check(t, []int{gid1, gid2}, ck) 273 | cfa[1] = ck.Query(-1) 274 | 275 | var gid3 int = 3 276 | ck.Join(map[int][]string{gid3: []string{"j", "k", "l"}}) 277 | check(t, []int{gid1, gid2, gid3}, ck) 278 | cfa[2] = ck.Query(-1) 279 | 280 | cfx := ck.Query(-1) 281 | sa1 := cfx.Groups[gid1] 282 | if len(sa1) != 3 || sa1[0] != "x" || sa1[1] != "y" || sa1[2] != "z" { 283 | t.Fatalf("wrong servers for gid %v: %v\n", gid1, sa1) 284 | } 285 | sa2 := cfx.Groups[gid2] 286 | if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" { 287 | t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2) 288 | } 289 | sa3 := cfx.Groups[gid3] 290 | if len(sa3) != 3 || sa3[0] != "j" || sa3[1] != "k" || sa3[2] != "l" { 291 | t.Fatalf("wrong servers for gid %v: %v\n", gid3, sa3) 292 | } 293 | 294 | ck.Leave([]int{gid1, gid3}) 295 | check(t, []int{gid2}, ck) 296 | cfa[3] = ck.Query(-1) 297 | 298 | cfx = ck.Query(-1) 299 | sa2 = cfx.Groups[gid2] 300 | if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" { 301 | t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2) 302 | } 303 | 304 | ck.Leave([]int{gid2}) 305 | 306 | fmt.Printf(" ... Passed\n") 307 | 308 | fmt.Printf("Test: Concurrent multi leave/join ...\n") 309 | 310 | const npara = 10 311 | var cka [npara]*Clerk 312 | for i := 0; i < len(cka); i++ { 313 | cka[i] = cfg.makeClient(cfg.All()) 314 | } 315 | gids := make([]int, npara) 316 | var wg sync.WaitGroup 317 | for xi := 0; xi < npara; xi++ { 318 | wg.Add(1) 319 | gids[xi] = int(xi + 1000) 320 | go func(i int) { 321 | defer wg.Done() 322 | var gid int = gids[i] 323 | cka[i].Join(map[int][]string{ 324 | gid: []string{ 325 | fmt.Sprintf("%da", gid), 326 | fmt.Sprintf("%db", gid), 327 | fmt.Sprintf("%dc", gid)}, 328 | gid + 1000: []string{fmt.Sprintf("%da", gid+1000)}, 329 | gid + 2000: []string{fmt.Sprintf("%da", gid+2000)}, 330 | }) 331 | cka[i].Leave([]int{gid + 1000, gid + 2000}) 332 | }(xi) 333 | } 334 | wg.Wait() 335 | check(t, gids, ck) 336 | 337 | fmt.Printf(" ... Passed\n") 338 | 339 | fmt.Printf("Test: Minimal transfers after multijoins ...\n") 340 | 341 | c1 := ck.Query(-1) 342 | m := make(map[int][]string) 343 | for i := 0; i < 5; i++ { 344 | var gid = npara + 1 + i 345 | m[gid] = []string{fmt.Sprintf("%da", gid), fmt.Sprintf("%db", gid)} 346 | } 347 | ck.Join(m) 348 | c2 := ck.Query(-1) 349 | for i := int(1); i <= npara; i++ { 350 | for j := 0; j < len(c1.Shards); j++ { 351 | if c2.Shards[j] == i { 352 | if c1.Shards[j] != i { 353 | t.Fatalf("non-minimal transfer after Join()s") 354 | } 355 | } 356 | } 357 | } 358 | 359 | fmt.Printf(" ... Passed\n") 360 | 361 | fmt.Printf("Test: Minimal transfers after multileaves ...\n") 362 | 363 | var l []int 364 | for i := 0; i < 5; i++ { 365 | l = append(l, npara+1+i) 366 | } 367 | ck.Leave(l) 368 | c3 := ck.Query(-1) 369 | for i := int(1); i <= npara; i++ { 370 | for j := 0; j < len(c1.Shards); j++ { 371 | if c2.Shards[j] == i { 372 | if c3.Shards[j] != i { 373 | t.Fatalf("non-minimal transfer after Leave()s") 374 | } 375 | } 376 | } 377 | } 378 | 379 | fmt.Printf(" ... Passed\n") 380 | } 381 | --------------------------------------------------------------------------------