├── .check-build
├── .gitignore
├── .run
├── TestBasicAgree2B in raft.run.xml
├── TestFailAgree2B in raft.run.xml
├── TestFailNoAgree2B in raft.run.xml
├── TestInitialElection2A in raft.run.xml
├── TestReElection2A in raft.run.xml
├── build wc.run.xml
├── go run mrmaster.run.xml
├── go run mrworker.run.xml
└── go test lab2a.run.xml
├── Makefile
├── README.md
└── src
├── .gitignore
├── kvraft
├── client.go
├── common.go
├── config.go
├── server.go
└── test_test.go
├── labgob
├── labgob.go
└── test_test.go
├── labrpc
├── labrpc.go
└── test_test.go
├── main
├── .gitignore
├── build-wc.sh
├── diskvd.go
├── lockc.go
├── lockd.go
├── mrmaster.go
├── mrsequential.go
├── mrworker.go
├── pbc.go
├── pbd.go
├── pg-being_ernest.txt
├── pg-dorian_gray.txt
├── pg-frankenstein.txt
├── pg-grimm.txt
├── pg-huckleberry_finn.txt
├── pg-metamorphosis.txt
├── pg-sherlock_holmes.txt
├── pg-tom_sawyer.txt
├── test-mr.sh
└── viewd.go
├── models
└── kv.go
├── mr
├── master.go
├── rpc.go
└── worker.go
├── mrapps
├── crash.go
├── indexer.go
├── mtiming.go
├── nocrash.go
├── rtiming.go
└── wc.go
├── porcupine
├── bitset.go
├── checker.go
├── model.go
├── porcupine.go
└── visualization.go
├── raft
├── README.md
├── append_entries_callback.go
├── append_entries_task.go
├── callback.go
├── config.go
├── peer_log_state.go
├── persister.go
├── raft.go
├── raft_log.go
├── raft_state.go
├── raft_task.go
├── raft_time.go
├── request_vote_callback.go
├── request_vote_task.go
├── rpc_args_reply.go
├── rpc_data.go
├── test_test.go
└── util.go
├── shardkv
├── client.go
├── common.go
├── config.go
├── server.go
└── test_test.go
└── shardmaster
├── client.go
├── common.go
├── config.go
├── server.go
└── test_test.go
/.check-build:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | set -eu
4 |
5 | REFERENCE_FILES=(
6 | # lab 1
7 | src/mrapps/crash.go
8 | src/mrapps/indexer.go
9 | src/mrapps/mtiming.go
10 | src/mrapps/nocrash.go
11 | src/mrapps/rtiming.go
12 | src/mrapps/wc.go
13 | src/main/mrsequential.go
14 | src/main/mrmaster.go
15 | src/main/mrworker.go
16 |
17 | # lab 2
18 | src/raft/persister.go
19 | src/raft/test_test.go
20 | src/raft/config.go
21 | src/labrpc/labrpc.go
22 |
23 | # lab 3
24 | src/kvraft/test_test.go
25 | src/kvraft/config.go
26 |
27 | # lab 4a
28 | src/shardmaster/test_test.go
29 | src/shardmaster/config.go
30 |
31 | # lab 4b
32 | src/shardkv/test_test.go
33 | src/shardkv/config.go
34 | )
35 |
36 | main() {
37 | upstream="$1"
38 | labnum="$2"
39 |
40 | # make sure we have reference copy of lab, in FETCH_HEAD
41 | git fetch "$upstream" 2>/dev/null || die "unable to git fetch $upstream"
42 |
43 | # copy existing directory
44 | tmpdir="$(mktemp -d)"
45 | find src -type s -delete # cp can't copy sockets
46 | cp -r src "$tmpdir"
47 | orig="$PWD"
48 | cd "$tmpdir"
49 |
50 | # check out reference files
51 | for f in ${REFERENCE_FILES[@]}; do
52 | mkdir -p "$(dirname $f)"
53 | git --git-dir="$orig/.git" show "FETCH_HEAD:$f" > "$f"
54 | done
55 |
56 | case $labnum in
57 | "lab1") check_lab1;;
58 | "lab2a"|"lab2b"|"lab2c") check_lab2;;
59 | "lab3a"|"lab3b") check_lab3;;
60 | "lab4a") check_lab4a;;
61 | "lab4b") check_lab4b;;
62 | *) die "unknown lab: $labnum";;
63 | esac
64 |
65 | cd
66 | rm -rf "$tmpdir"
67 | }
68 |
69 | check_lab1() {
70 | check_cmd cd src/mrapps
71 | check_cmd go build -buildmode=plugin wc.go
72 | check_cmd go build -buildmode=plugin indexer.go
73 | check_cmd go build -buildmode=plugin mtiming.go
74 | check_cmd go build -buildmode=plugin rtiming.go
75 | check_cmd go build -buildmode=plugin crash.go
76 | check_cmd go build -buildmode=plugin nocrash.go
77 | check_cmd cd ../main
78 | check_cmd go build mrmaster.go
79 | check_cmd go build mrworker.go
80 | check_cmd go build mrsequential.go
81 | }
82 |
83 | check_lab2() {
84 | check_cmd cd src/raft
85 | check_cmd go test -c
86 | }
87 |
88 | check_lab3() {
89 | check_cmd cd src/kvraft
90 | check_cmd go test -c
91 | }
92 |
93 | check_lab4a() {
94 | check_cmd cd src/shardmaster
95 | check_cmd go test -c
96 | }
97 |
98 | check_lab4b() {
99 | check_cmd cd src/shardkv
100 | check_cmd go test -c
101 | # also check other labs/parts
102 | cd "$tmpdir"
103 | check_lab4a
104 | cd "$tmpdir"
105 | check_lab3
106 | cd "$tmpdir"
107 | check_lab2
108 | }
109 |
110 | check_cmd() {
111 | if ! "$@" >/dev/null 2>&1; then
112 | echo "We tried building your source code with testing-related files reverted to original versions, and the build failed. This copy of your code is preserved in $tmpdir for debugging purposes. Please make sure the code you are trying to hand in does not make changes to test code." >&2
113 | echo >&2
114 | echo "The build failed while trying to run the following command:" >&2
115 | echo >&2
116 | echo "$ $@" >&2
117 | echo " (cwd: ${PWD#$tmpdir/})" >&2
118 | exit 1
119 | fi
120 | }
121 |
122 | die() {
123 | echo "$1" >&2
124 | exit 1
125 | }
126 |
127 | main "$@"
128 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | pkg/
2 | api.key
3 | *-handin.tar.gz
4 | /.idea
5 |
--------------------------------------------------------------------------------
/.run/TestBasicAgree2B in raft.run.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.run/TestFailAgree2B in raft.run.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.run/TestFailNoAgree2B in raft.run.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.run/TestInitialElection2A in raft.run.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.run/TestReElection2A in raft.run.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.run/build wc.run.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.run/go run mrmaster.run.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.run/go run mrworker.run.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/.run/go test lab2a.run.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | # This is the Makefile helping you submit the labs.
2 | # Just create 6.824/api.key with your API key in it,
3 | # and submit your lab with the following command:
4 | # $ make [lab1|lab2a|lab2b|lab2c|lab3a|lab3b|lab4a|lab4b]
5 |
6 | LABS=" lab1 lab2a lab2b lab2c lab3a lab3b lab4a lab4b "
7 |
8 | %: check-%
9 | @echo "Preparing $@-handin.tar.gz"
10 | @if echo $(LABS) | grep -q " $@ " ; then \
11 | echo "Tarring up your submission..." ; \
12 | tar cvzf $@-handin.tar.gz \
13 | "--exclude=src/main/pg-*.txt" \
14 | "--exclude=src/main/diskvd" \
15 | "--exclude=src/mapreduce/824-mrinput-*.txt" \
16 | "--exclude=src/main/mr-*" \
17 | "--exclude=mrtmp.*" \
18 | "--exclude=src/main/diff.out" \
19 | "--exclude=src/main/mrmaster" \
20 | "--exclude=src/main/mrsequential" \
21 | "--exclude=src/main/mrworker" \
22 | "--exclude=*.so" \
23 | Makefile src; \
24 | if ! test -e api.key ; then \
25 | echo "Missing $(PWD)/api.key. Please create the file with your key in it or submit the $@-handin.tar.gz via the web interface."; \
26 | else \
27 | echo "Are you sure you want to submit $@? Enter 'yes' to continue:"; \
28 | read line; \
29 | if test "$$line" != "yes" ; then echo "Giving up submission"; exit; fi; \
30 | if test `stat -c "%s" "$@-handin.tar.gz" 2>/dev/null || stat -f "%z" "$@-handin.tar.gz"` -ge 20971520 ; then echo "File exceeds 20MB."; exit; fi; \
31 | mv api.key api.key.fix ; \
32 | cat api.key.fix | tr -d '\n' > api.key ; \
33 | rm api.key.fix ; \
34 | curl -F file=@$@-handin.tar.gz -F "key= /dev/null || { \
36 | echo ; \
37 | echo "Submit seems to have failed."; \
38 | echo "Please upload the tarball manually on the submission website."; } \
39 | fi; \
40 | else \
41 | echo "Bad target $@. Usage: make [$(LABS)]"; \
42 | fi
43 |
44 | .PHONY: check-%
45 | check-%:
46 | @echo "Checking that your submission builds correctly..."
47 | @./.check-build git://g.csail.mit.edu/6.824-golabs-2020 $(patsubst check-%,%,$@)
48 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # My 6.824 Solution
2 |
3 | This is my solutions to the labs of MIT 6.824. An original copy of the course material is stored in branch `skeleton`. The branch `master` is my most updated version of solution. I am still working on this, therefore it may not be complete. Nevertheless, you must not dishonist yourself with this repository.
4 |
--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
1 | *.*/
2 | mrtmp.*
3 | 824-mrinput-*.txt
4 | /main/diff.out
5 | /mapreduce/x.txt
6 | /pbservice/x.txt
7 | /kvpaxos/x.txt
8 | *.so
9 | /.idea
10 |
--------------------------------------------------------------------------------
/src/kvraft/client.go:
--------------------------------------------------------------------------------
1 | package kvraft
2 |
3 | import "../labrpc"
4 | import "crypto/rand"
5 | import "math/big"
6 |
7 |
8 | type Clerk struct {
9 | servers []*labrpc.ClientEnd
10 | // You will have to modify this struct.
11 | }
12 |
13 | func nrand() int64 {
14 | max := big.NewInt(int64(1) << 62)
15 | bigx, _ := rand.Int(rand.Reader, max)
16 | x := bigx.Int64()
17 | return x
18 | }
19 |
20 | func MakeClerk(servers []*labrpc.ClientEnd) *Clerk {
21 | ck := new(Clerk)
22 | ck.servers = servers
23 | // You'll have to add code here.
24 | return ck
25 | }
26 |
27 | //
28 | // fetch the current value for a key.
29 | // returns "" if the key does not exist.
30 | // keeps trying forever in the face of all other errors.
31 | //
32 | // you can send an RPC with code like this:
33 | // ok := ck.servers[i].Call("KVServer.Get", &args, &reply)
34 | //
35 | // the types of args and reply (including whether they are pointers)
36 | // must match the declared types of the RPC handler function's
37 | // arguments. and reply must be passed as a pointer.
38 | //
39 | func (ck *Clerk) Get(key string) string {
40 |
41 | // You will have to modify this function.
42 | return ""
43 | }
44 |
45 | //
46 | // shared by Put and Append.
47 | //
48 | // you can send an RPC with code like this:
49 | // ok := ck.servers[i].Call("KVServer.PutAppend", &args, &reply)
50 | //
51 | // the types of args and reply (including whether they are pointers)
52 | // must match the declared types of the RPC handler function's
53 | // arguments. and reply must be passed as a pointer.
54 | //
55 | func (ck *Clerk) PutAppend(key string, value string, op string) {
56 | // You will have to modify this function.
57 | }
58 |
59 | func (ck *Clerk) Put(key string, value string) {
60 | ck.PutAppend(key, value, "Put")
61 | }
62 | func (ck *Clerk) Append(key string, value string) {
63 | ck.PutAppend(key, value, "Append")
64 | }
65 |
--------------------------------------------------------------------------------
/src/kvraft/common.go:
--------------------------------------------------------------------------------
1 | package kvraft
2 |
3 | const (
4 | OK = "OK"
5 | ErrNoKey = "ErrNoKey"
6 | ErrWrongLeader = "ErrWrongLeader"
7 | )
8 |
9 | type Err string
10 |
11 | // Put or Append
12 | type PutAppendArgs struct {
13 | Key string
14 | Value string
15 | Op string // "Put" or "Append"
16 | // You'll have to add definitions here.
17 | // Field names must start with capital letters,
18 | // otherwise RPC will break.
19 | }
20 |
21 | type PutAppendReply struct {
22 | Err Err
23 | }
24 |
25 | type GetArgs struct {
26 | Key string
27 | // You'll have to add definitions here.
28 | }
29 |
30 | type GetReply struct {
31 | Err Err
32 | Value string
33 | }
34 |
--------------------------------------------------------------------------------
/src/kvraft/config.go:
--------------------------------------------------------------------------------
1 | package kvraft
2 |
3 | import "../labrpc"
4 | import "testing"
5 | import "os"
6 |
7 | // import "log"
8 | import crand "crypto/rand"
9 | import "math/big"
10 | import "math/rand"
11 | import "encoding/base64"
12 | import "sync"
13 | import "runtime"
14 | import "../raft"
15 | import "fmt"
16 | import "time"
17 | import "sync/atomic"
18 |
19 | func randstring(n int) string {
20 | b := make([]byte, 2*n)
21 | crand.Read(b)
22 | s := base64.URLEncoding.EncodeToString(b)
23 | return s[0:n]
24 | }
25 |
26 | func makeSeed() int64 {
27 | max := big.NewInt(int64(1) << 62)
28 | bigx, _ := crand.Int(crand.Reader, max)
29 | x := bigx.Int64()
30 | return x
31 | }
32 |
33 | // Randomize server handles
34 | func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd {
35 | sa := make([]*labrpc.ClientEnd, len(kvh))
36 | copy(sa, kvh)
37 | for i := range sa {
38 | j := rand.Intn(i + 1)
39 | sa[i], sa[j] = sa[j], sa[i]
40 | }
41 | return sa
42 | }
43 |
44 | type config struct {
45 | mu sync.Mutex
46 | t *testing.T
47 | net *labrpc.Network
48 | n int
49 | kvservers []*KVServer
50 | saved []*raft.Persister
51 | endnames [][]string // names of each server's sending ClientEnds
52 | clerks map[*Clerk][]string
53 | nextClientId int
54 | maxraftstate int
55 | start time.Time // time at which make_config() was called
56 | // begin()/end() statistics
57 | t0 time.Time // time at which test_test.go called cfg.begin()
58 | rpcs0 int // rpcTotal() at start of test
59 | ops int32 // number of clerk get/put/append method calls
60 | }
61 |
62 | func (cfg *config) checkTimeout() {
63 | // enforce a two minute real-time limit on each test
64 | if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second {
65 | cfg.t.Fatal("test took longer than 120 seconds")
66 | }
67 | }
68 |
69 | func (cfg *config) cleanup() {
70 | cfg.mu.Lock()
71 | defer cfg.mu.Unlock()
72 | for i := 0; i < len(cfg.kvservers); i++ {
73 | if cfg.kvservers[i] != nil {
74 | cfg.kvservers[i].Kill()
75 | }
76 | }
77 | cfg.net.Cleanup()
78 | cfg.checkTimeout()
79 | }
80 |
81 | // Maximum log size across all servers
82 | func (cfg *config) LogSize() int {
83 | logsize := 0
84 | for i := 0; i < cfg.n; i++ {
85 | n := cfg.saved[i].RaftStateSize()
86 | if n > logsize {
87 | logsize = n
88 | }
89 | }
90 | return logsize
91 | }
92 |
93 | // Maximum snapshot size across all servers
94 | func (cfg *config) SnapshotSize() int {
95 | snapshotsize := 0
96 | for i := 0; i < cfg.n; i++ {
97 | n := cfg.saved[i].SnapshotSize()
98 | if n > snapshotsize {
99 | snapshotsize = n
100 | }
101 | }
102 | return snapshotsize
103 | }
104 |
105 | // attach server i to servers listed in to
106 | // caller must hold cfg.mu
107 | func (cfg *config) connectUnlocked(i int, to []int) {
108 | // log.Printf("connect peer %d to %v\n", i, to)
109 |
110 | // outgoing socket files
111 | for j := 0; j < len(to); j++ {
112 | endname := cfg.endnames[i][to[j]]
113 | cfg.net.Enable(endname, true)
114 | }
115 |
116 | // incoming socket files
117 | for j := 0; j < len(to); j++ {
118 | endname := cfg.endnames[to[j]][i]
119 | cfg.net.Enable(endname, true)
120 | }
121 | }
122 |
123 | func (cfg *config) connect(i int, to []int) {
124 | cfg.mu.Lock()
125 | defer cfg.mu.Unlock()
126 | cfg.connectUnlocked(i, to)
127 | }
128 |
129 | // detach server i from the servers listed in from
130 | // caller must hold cfg.mu
131 | func (cfg *config) disconnectUnlocked(i int, from []int) {
132 | // log.Printf("disconnect peer %d from %v\n", i, from)
133 |
134 | // outgoing socket files
135 | for j := 0; j < len(from); j++ {
136 | if cfg.endnames[i] != nil {
137 | endname := cfg.endnames[i][from[j]]
138 | cfg.net.Enable(endname, false)
139 | }
140 | }
141 |
142 | // incoming socket files
143 | for j := 0; j < len(from); j++ {
144 | if cfg.endnames[j] != nil {
145 | endname := cfg.endnames[from[j]][i]
146 | cfg.net.Enable(endname, false)
147 | }
148 | }
149 | }
150 |
151 | func (cfg *config) disconnect(i int, from []int) {
152 | cfg.mu.Lock()
153 | defer cfg.mu.Unlock()
154 | cfg.disconnectUnlocked(i, from)
155 | }
156 |
157 | func (cfg *config) All() []int {
158 | all := make([]int, cfg.n)
159 | for i := 0; i < cfg.n; i++ {
160 | all[i] = i
161 | }
162 | return all
163 | }
164 |
165 | func (cfg *config) ConnectAll() {
166 | cfg.mu.Lock()
167 | defer cfg.mu.Unlock()
168 | for i := 0; i < cfg.n; i++ {
169 | cfg.connectUnlocked(i, cfg.All())
170 | }
171 | }
172 |
173 | // Sets up 2 partitions with connectivity between servers in each partition.
174 | func (cfg *config) partition(p1 []int, p2 []int) {
175 | cfg.mu.Lock()
176 | defer cfg.mu.Unlock()
177 | // log.Printf("partition servers into: %v %v\n", p1, p2)
178 | for i := 0; i < len(p1); i++ {
179 | cfg.disconnectUnlocked(p1[i], p2)
180 | cfg.connectUnlocked(p1[i], p1)
181 | }
182 | for i := 0; i < len(p2); i++ {
183 | cfg.disconnectUnlocked(p2[i], p1)
184 | cfg.connectUnlocked(p2[i], p2)
185 | }
186 | }
187 |
188 | // Create a clerk with clerk specific server names.
189 | // Give it connections to all of the servers, but for
190 | // now enable only connections to servers in to[].
191 | func (cfg *config) makeClient(to []int) *Clerk {
192 | cfg.mu.Lock()
193 | defer cfg.mu.Unlock()
194 |
195 | // a fresh set of ClientEnds.
196 | ends := make([]*labrpc.ClientEnd, cfg.n)
197 | endnames := make([]string, cfg.n)
198 | for j := 0; j < cfg.n; j++ {
199 | endnames[j] = randstring(20)
200 | ends[j] = cfg.net.MakeEnd(endnames[j])
201 | cfg.net.Connect(endnames[j], j)
202 | }
203 |
204 | ck := MakeClerk(random_handles(ends))
205 | cfg.clerks[ck] = endnames
206 | cfg.nextClientId++
207 | cfg.ConnectClientUnlocked(ck, to)
208 | return ck
209 | }
210 |
211 | func (cfg *config) deleteClient(ck *Clerk) {
212 | cfg.mu.Lock()
213 | defer cfg.mu.Unlock()
214 |
215 | v := cfg.clerks[ck]
216 | for i := 0; i < len(v); i++ {
217 | os.Remove(v[i])
218 | }
219 | delete(cfg.clerks, ck)
220 | }
221 |
222 | // caller should hold cfg.mu
223 | func (cfg *config) ConnectClientUnlocked(ck *Clerk, to []int) {
224 | // log.Printf("ConnectClient %v to %v\n", ck, to)
225 | endnames := cfg.clerks[ck]
226 | for j := 0; j < len(to); j++ {
227 | s := endnames[to[j]]
228 | cfg.net.Enable(s, true)
229 | }
230 | }
231 |
232 | func (cfg *config) ConnectClient(ck *Clerk, to []int) {
233 | cfg.mu.Lock()
234 | defer cfg.mu.Unlock()
235 | cfg.ConnectClientUnlocked(ck, to)
236 | }
237 |
238 | // caller should hold cfg.mu
239 | func (cfg *config) DisconnectClientUnlocked(ck *Clerk, from []int) {
240 | // log.Printf("DisconnectClient %v from %v\n", ck, from)
241 | endnames := cfg.clerks[ck]
242 | for j := 0; j < len(from); j++ {
243 | s := endnames[from[j]]
244 | cfg.net.Enable(s, false)
245 | }
246 | }
247 |
248 | func (cfg *config) DisconnectClient(ck *Clerk, from []int) {
249 | cfg.mu.Lock()
250 | defer cfg.mu.Unlock()
251 | cfg.DisconnectClientUnlocked(ck, from)
252 | }
253 |
254 | // Shutdown a server by isolating it
255 | func (cfg *config) ShutdownServer(i int) {
256 | cfg.mu.Lock()
257 | defer cfg.mu.Unlock()
258 |
259 | cfg.disconnectUnlocked(i, cfg.All())
260 |
261 | // disable client connections to the server.
262 | // it's important to do this before creating
263 | // the new Persister in saved[i], to avoid
264 | // the possibility of the server returning a
265 | // positive reply to an Append but persisting
266 | // the result in the superseded Persister.
267 | cfg.net.DeleteServer(i)
268 |
269 | // a fresh persister, in case old instance
270 | // continues to update the Persister.
271 | // but copy old persister's content so that we always
272 | // pass Make() the last persisted state.
273 | if cfg.saved[i] != nil {
274 | cfg.saved[i] = cfg.saved[i].Copy()
275 | }
276 |
277 | kv := cfg.kvservers[i]
278 | if kv != nil {
279 | cfg.mu.Unlock()
280 | kv.Kill()
281 | cfg.mu.Lock()
282 | cfg.kvservers[i] = nil
283 | }
284 | }
285 |
286 | // If restart servers, first call ShutdownServer
287 | func (cfg *config) StartServer(i int) {
288 | cfg.mu.Lock()
289 |
290 | // a fresh set of outgoing ClientEnd names.
291 | cfg.endnames[i] = make([]string, cfg.n)
292 | for j := 0; j < cfg.n; j++ {
293 | cfg.endnames[i][j] = randstring(20)
294 | }
295 |
296 | // a fresh set of ClientEnds.
297 | ends := make([]*labrpc.ClientEnd, cfg.n)
298 | for j := 0; j < cfg.n; j++ {
299 | ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j])
300 | cfg.net.Connect(cfg.endnames[i][j], j)
301 | }
302 |
303 | // a fresh persister, so old instance doesn't overwrite
304 | // new instance's persisted state.
305 | // give the fresh persister a copy of the old persister's
306 | // state, so that the spec is that we pass StartKVServer()
307 | // the last persisted state.
308 | if cfg.saved[i] != nil {
309 | cfg.saved[i] = cfg.saved[i].Copy()
310 | } else {
311 | cfg.saved[i] = raft.MakePersister()
312 | }
313 | cfg.mu.Unlock()
314 |
315 | cfg.kvservers[i] = StartKVServer(ends, i, cfg.saved[i], cfg.maxraftstate)
316 |
317 | kvsvc := labrpc.MakeService(cfg.kvservers[i])
318 | rfsvc := labrpc.MakeService(cfg.kvservers[i].rf)
319 | srv := labrpc.MakeServer()
320 | srv.AddService(kvsvc)
321 | srv.AddService(rfsvc)
322 | cfg.net.AddServer(i, srv)
323 | }
324 |
325 | func (cfg *config) Leader() (bool, int) {
326 | cfg.mu.Lock()
327 | defer cfg.mu.Unlock()
328 |
329 | for i := 0; i < cfg.n; i++ {
330 | _, is_leader := cfg.kvservers[i].rf.GetState()
331 | if is_leader {
332 | return true, i
333 | }
334 | }
335 | return false, 0
336 | }
337 |
338 | // Partition servers into 2 groups and put current leader in minority
339 | func (cfg *config) make_partition() ([]int, []int) {
340 | _, l := cfg.Leader()
341 | p1 := make([]int, cfg.n/2+1)
342 | p2 := make([]int, cfg.n/2)
343 | j := 0
344 | for i := 0; i < cfg.n; i++ {
345 | if i != l {
346 | if j < len(p1) {
347 | p1[j] = i
348 | } else {
349 | p2[j-len(p1)] = i
350 | }
351 | j++
352 | }
353 | }
354 | p2[len(p2)-1] = l
355 | return p1, p2
356 | }
357 |
358 | var ncpu_once sync.Once
359 |
360 | func make_config(t *testing.T, n int, unreliable bool, maxraftstate int) *config {
361 | ncpu_once.Do(func() {
362 | if runtime.NumCPU() < 2 {
363 | fmt.Printf("warning: only one CPU, which may conceal locking bugs\n")
364 | }
365 | rand.Seed(makeSeed())
366 | })
367 | runtime.GOMAXPROCS(4)
368 | cfg := &config{}
369 | cfg.t = t
370 | cfg.net = labrpc.MakeNetwork()
371 | cfg.n = n
372 | cfg.kvservers = make([]*KVServer, cfg.n)
373 | cfg.saved = make([]*raft.Persister, cfg.n)
374 | cfg.endnames = make([][]string, cfg.n)
375 | cfg.clerks = make(map[*Clerk][]string)
376 | cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid
377 | cfg.maxraftstate = maxraftstate
378 | cfg.start = time.Now()
379 |
380 | // create a full set of KV servers.
381 | for i := 0; i < cfg.n; i++ {
382 | cfg.StartServer(i)
383 | }
384 |
385 | cfg.ConnectAll()
386 |
387 | cfg.net.Reliable(!unreliable)
388 |
389 | return cfg
390 | }
391 |
392 | func (cfg *config) rpcTotal() int {
393 | return cfg.net.GetTotalCount()
394 | }
395 |
396 | // start a Test.
397 | // print the Test message.
398 | // e.g. cfg.begin("Test (2B): RPC counts aren't too high")
399 | func (cfg *config) begin(description string) {
400 | fmt.Printf("%s ...\n", description)
401 | cfg.t0 = time.Now()
402 | cfg.rpcs0 = cfg.rpcTotal()
403 | atomic.StoreInt32(&cfg.ops, 0)
404 | }
405 |
406 | func (cfg *config) op() {
407 | atomic.AddInt32(&cfg.ops, 1)
408 | }
409 |
410 | // end a Test -- the fact that we got here means there
411 | // was no failure.
412 | // print the Passed message,
413 | // and some performance numbers.
414 | func (cfg *config) end() {
415 | cfg.checkTimeout()
416 | if cfg.t.Failed() == false {
417 | t := time.Since(cfg.t0).Seconds() // real time
418 | npeers := cfg.n // number of Raft peers
419 | nrpc := cfg.rpcTotal() - cfg.rpcs0 // number of RPC sends
420 | ops := atomic.LoadInt32(&cfg.ops) // number of clerk get/put/append calls
421 |
422 | fmt.Printf(" ... Passed --")
423 | fmt.Printf(" %4.1f %d %5d %4d\n", t, npeers, nrpc, ops)
424 | }
425 | }
426 |
--------------------------------------------------------------------------------
/src/kvraft/server.go:
--------------------------------------------------------------------------------
1 | package kvraft
2 |
3 | import (
4 | "../labgob"
5 | "../labrpc"
6 | "log"
7 | "../raft"
8 | "sync"
9 | "sync/atomic"
10 | )
11 |
12 | const Debug = 0
13 |
14 | func DPrintf(format string, a ...interface{}) (n int, err error) {
15 | if Debug > 0 {
16 | log.Printf(format, a...)
17 | }
18 | return
19 | }
20 |
21 |
22 | type Op struct {
23 | // Your definitions here.
24 | // Field names must start with capital letters,
25 | // otherwise RPC will break.
26 | }
27 |
28 | type KVServer struct {
29 | mu sync.Mutex
30 | me int
31 | rf *raft.Raft
32 | applyCh chan raft.ApplyMsg
33 | dead int32 // set by Kill()
34 |
35 | maxraftstate int // snapshot if log grows this big
36 |
37 | // Your definitions here.
38 | }
39 |
40 |
41 | func (kv *KVServer) Get(args *GetArgs, reply *GetReply) {
42 | // Your code here.
43 | }
44 |
45 | func (kv *KVServer) PutAppend(args *PutAppendArgs, reply *PutAppendReply) {
46 | // Your code here.
47 | }
48 |
49 | //
50 | // the tester calls Kill() when a KVServer instance won't
51 | // be needed again. for your convenience, we supply
52 | // code to set rf.dead (without needing a lock),
53 | // and a killed() method to test rf.dead in
54 | // long-running loops. you can also add your own
55 | // code to Kill(). you're not required to do anything
56 | // about this, but it may be convenient (for example)
57 | // to suppress debug output from a Kill()ed instance.
58 | //
59 | func (kv *KVServer) Kill() {
60 | atomic.StoreInt32(&kv.dead, 1)
61 | kv.rf.Kill()
62 | // Your code here, if desired.
63 | }
64 |
65 | func (kv *KVServer) killed() bool {
66 | z := atomic.LoadInt32(&kv.dead)
67 | return z == 1
68 | }
69 |
70 | //
71 | // servers[] contains the ports of the set of
72 | // servers that will cooperate via Raft to
73 | // form the fault-tolerant key/value service.
74 | // me is the index of the current server in servers[].
75 | // the k/v server should store snapshots through the underlying Raft
76 | // implementation, which should call persister.SaveStateAndSnapshot() to
77 | // atomically save the Raft state along with the snapshot.
78 | // the k/v server should snapshot when Raft's saved state exceeds maxraftstate bytes,
79 | // in order to allow Raft to garbage-collect its log. if maxraftstate is -1,
80 | // you don't need to snapshot.
81 | // StartKVServer() must return quickly, so it should start goroutines
82 | // for any long-running work.
83 | //
84 | func StartKVServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int) *KVServer {
85 | // call labgob.Register on structures you want
86 | // Go's RPC library to marshall/unmarshall.
87 | labgob.Register(Op{})
88 |
89 | kv := new(KVServer)
90 | kv.me = me
91 | kv.maxraftstate = maxraftstate
92 |
93 | // You may need initialization code here.
94 |
95 | kv.applyCh = make(chan raft.ApplyMsg)
96 | kv.rf = raft.Make(servers, me, persister, kv.applyCh)
97 |
98 | // You may need initialization code here.
99 |
100 | return kv
101 | }
102 |
--------------------------------------------------------------------------------
/src/labgob/labgob.go:
--------------------------------------------------------------------------------
1 | package labgob
2 |
3 | //
4 | // trying to send non-capitalized fields over RPC produces a range of
5 | // misbehavior, including both mysterious incorrect computation and
6 | // outright crashes. so this wrapper around Go's encoding/gob warns
7 | // about non-capitalized field names.
8 | //
9 |
10 | import "encoding/gob"
11 | import "io"
12 | import "reflect"
13 | import "fmt"
14 | import "sync"
15 | import "unicode"
16 | import "unicode/utf8"
17 |
18 | var mu sync.Mutex
19 | var errorCount int // for TestCapital
20 | var checked map[reflect.Type]bool
21 |
22 | type LabEncoder struct {
23 | gob *gob.Encoder
24 | }
25 |
26 | func NewEncoder(w io.Writer) *LabEncoder {
27 | enc := &LabEncoder{}
28 | enc.gob = gob.NewEncoder(w)
29 | return enc
30 | }
31 |
32 | func (enc *LabEncoder) Encode(e interface{}) error {
33 | checkValue(e)
34 | return enc.gob.Encode(e)
35 | }
36 |
37 | func (enc *LabEncoder) EncodeValue(value reflect.Value) error {
38 | checkValue(value.Interface())
39 | return enc.gob.EncodeValue(value)
40 | }
41 |
42 | type LabDecoder struct {
43 | gob *gob.Decoder
44 | }
45 |
46 | func NewDecoder(r io.Reader) *LabDecoder {
47 | dec := &LabDecoder{}
48 | dec.gob = gob.NewDecoder(r)
49 | return dec
50 | }
51 |
52 | func (dec *LabDecoder) Decode(e interface{}) error {
53 | checkValue(e)
54 | checkDefault(e)
55 | return dec.gob.Decode(e)
56 | }
57 |
58 | func Register(value interface{}) {
59 | checkValue(value)
60 | gob.Register(value)
61 | }
62 |
63 | func RegisterName(name string, value interface{}) {
64 | checkValue(value)
65 | gob.RegisterName(name, value)
66 | }
67 |
68 | func checkValue(value interface{}) {
69 | checkType(reflect.TypeOf(value))
70 | }
71 |
72 | func checkType(t reflect.Type) {
73 | k := t.Kind()
74 |
75 | mu.Lock()
76 | // only complain once, and avoid recursion.
77 | if checked == nil {
78 | checked = map[reflect.Type]bool{}
79 | }
80 | if checked[t] {
81 | mu.Unlock()
82 | return
83 | }
84 | checked[t] = true
85 | mu.Unlock()
86 |
87 | switch k {
88 | case reflect.Struct:
89 | for i := 0; i < t.NumField(); i++ {
90 | f := t.Field(i)
91 | rune, _ := utf8.DecodeRuneInString(f.Name)
92 | if unicode.IsUpper(rune) == false {
93 | // ta da
94 | fmt.Printf("labgob error: lower-case field %v of %v in RPC or persist/snapshot will break your Raft\n",
95 | f.Name, t.Name())
96 | mu.Lock()
97 | errorCount += 1
98 | mu.Unlock()
99 | }
100 | checkType(f.Type)
101 | }
102 | return
103 | case reflect.Slice, reflect.Array, reflect.Ptr:
104 | checkType(t.Elem())
105 | return
106 | case reflect.Map:
107 | checkType(t.Elem())
108 | checkType(t.Key())
109 | return
110 | default:
111 | return
112 | }
113 | }
114 |
115 | //
116 | // warn if the value contains non-default values,
117 | // as it would if one sent an RPC but the reply
118 | // struct was already modified. if the RPC reply
119 | // contains default values, GOB won't overwrite
120 | // the non-default value.
121 | //
122 | func checkDefault(value interface{}) {
123 | if value == nil {
124 | return
125 | }
126 | checkDefault1(reflect.ValueOf(value), 1, "")
127 | }
128 |
129 | func checkDefault1(value reflect.Value, depth int, name string) {
130 | if depth > 3 {
131 | return
132 | }
133 |
134 | t := value.Type()
135 | k := t.Kind()
136 |
137 | switch k {
138 | case reflect.Struct:
139 | for i := 0; i < t.NumField(); i++ {
140 | vv := value.Field(i)
141 | name1 := t.Field(i).Name
142 | if name != "" {
143 | name1 = name + "." + name1
144 | }
145 | checkDefault1(vv, depth+1, name1)
146 | }
147 | return
148 | case reflect.Ptr:
149 | if value.IsNil() {
150 | return
151 | }
152 | checkDefault1(value.Elem(), depth+1, name)
153 | return
154 | case reflect.Bool,
155 | reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
156 | reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64,
157 | reflect.Uintptr, reflect.Float32, reflect.Float64,
158 | reflect.String:
159 | if reflect.DeepEqual(reflect.Zero(t).Interface(), value.Interface()) == false {
160 | mu.Lock()
161 | if errorCount < 1 {
162 | what := name
163 | if what == "" {
164 | what = t.Name()
165 | }
166 | // this warning typically arises if code re-uses the same RPC reply
167 | // variable for multiple RPC calls, or if code restores persisted
168 | // state into variable that already have non-default values.
169 | fmt.Printf("labgob warning: Decoding into a non-default variable/field %v may not work\n",
170 | what)
171 | }
172 | errorCount += 1
173 | mu.Unlock()
174 | }
175 | return
176 | }
177 | }
178 |
--------------------------------------------------------------------------------
/src/labgob/test_test.go:
--------------------------------------------------------------------------------
1 | package labgob
2 |
3 | import "testing"
4 |
5 | import "bytes"
6 |
7 | type T1 struct {
8 | T1int0 int
9 | T1int1 int
10 | T1string0 string
11 | T1string1 string
12 | }
13 |
14 | type T2 struct {
15 | T2slice []T1
16 | T2map map[int]*T1
17 | T2t3 interface{}
18 | }
19 |
20 | type T3 struct {
21 | T3int999 int
22 | }
23 |
24 | //
25 | // test that we didn't break GOB.
26 | //
27 | func TestGOB(t *testing.T) {
28 | e0 := errorCount
29 |
30 | w := new(bytes.Buffer)
31 |
32 | Register(T3{})
33 |
34 | {
35 | x0 := 0
36 | x1 := 1
37 | t1 := T1{}
38 | t1.T1int1 = 1
39 | t1.T1string1 = "6.824"
40 | t2 := T2{}
41 | t2.T2slice = []T1{T1{}, t1}
42 | t2.T2map = map[int]*T1{}
43 | t2.T2map[99] = &T1{1, 2, "x", "y"}
44 | t2.T2t3 = T3{999}
45 |
46 | e := NewEncoder(w)
47 | e.Encode(x0)
48 | e.Encode(x1)
49 | e.Encode(t1)
50 | e.Encode(t2)
51 | }
52 | data := w.Bytes()
53 |
54 | {
55 | var x0 int
56 | var x1 int
57 | var t1 T1
58 | var t2 T2
59 |
60 | r := bytes.NewBuffer(data)
61 | d := NewDecoder(r)
62 | if d.Decode(&x0) != nil ||
63 | d.Decode(&x1) != nil ||
64 | d.Decode(&t1) != nil ||
65 | d.Decode(&t2) != nil {
66 | t.Fatalf("Decode failed")
67 | }
68 |
69 | if x0 != 0 {
70 | t.Fatalf("wrong x0 %v\n", x0)
71 | }
72 | if x1 != 1 {
73 | t.Fatalf("wrong x1 %v\n", x1)
74 | }
75 | if t1.T1int0 != 0 {
76 | t.Fatalf("wrong t1.T1int0 %v\n", t1.T1int0)
77 | }
78 | if t1.T1int1 != 1 {
79 | t.Fatalf("wrong t1.T1int1 %v\n", t1.T1int1)
80 | }
81 | if t1.T1string0 != "" {
82 | t.Fatalf("wrong t1.T1string0 %v\n", t1.T1string0)
83 | }
84 | if t1.T1string1 != "6.824" {
85 | t.Fatalf("wrong t1.T1string1 %v\n", t1.T1string1)
86 | }
87 | if len(t2.T2slice) != 2 {
88 | t.Fatalf("wrong t2.T2slice len %v\n", len(t2.T2slice))
89 | }
90 | if t2.T2slice[1].T1int1 != 1 {
91 | t.Fatalf("wrong slice value\n")
92 | }
93 | if len(t2.T2map) != 1 {
94 | t.Fatalf("wrong t2.T2map len %v\n", len(t2.T2map))
95 | }
96 | if t2.T2map[99].T1string1 != "y" {
97 | t.Fatalf("wrong map value\n")
98 | }
99 | t3 := (t2.T2t3).(T3)
100 | if t3.T3int999 != 999 {
101 | t.Fatalf("wrong t2.T2t3.T3int999\n")
102 | }
103 | }
104 |
105 | if errorCount != e0 {
106 | t.Fatalf("there were errors, but should not have been")
107 | }
108 | }
109 |
110 | type T4 struct {
111 | Yes int
112 | no int
113 | }
114 |
115 | //
116 | // make sure we check capitalization
117 | // labgob prints one warning during this test.
118 | //
119 | func TestCapital(t *testing.T) {
120 | e0 := errorCount
121 |
122 | v := []map[*T4]int{}
123 |
124 | w := new(bytes.Buffer)
125 | e := NewEncoder(w)
126 | e.Encode(v)
127 | data := w.Bytes()
128 |
129 | var v1 []map[T4]int
130 | r := bytes.NewBuffer(data)
131 | d := NewDecoder(r)
132 | d.Decode(&v1)
133 |
134 | if errorCount != e0+1 {
135 | t.Fatalf("failed to warn about lower-case field")
136 | }
137 | }
138 |
139 | //
140 | // check that we warn when someone sends a default value over
141 | // RPC but the target into which we're decoding holds a non-default
142 | // value, which GOB seems not to overwrite as you'd expect.
143 | //
144 | // labgob does not print a warning.
145 | //
146 | func TestDefault(t *testing.T) {
147 | e0 := errorCount
148 |
149 | type DD struct {
150 | X int
151 | }
152 |
153 | // send a default value...
154 | dd1 := DD{}
155 |
156 | w := new(bytes.Buffer)
157 | e := NewEncoder(w)
158 | e.Encode(dd1)
159 | data := w.Bytes()
160 |
161 | // and receive it into memory that already
162 | // holds non-default values.
163 | reply := DD{99}
164 |
165 | r := bytes.NewBuffer(data)
166 | d := NewDecoder(r)
167 | d.Decode(&reply)
168 |
169 | if errorCount != e0+1 {
170 | t.Fatalf("failed to warn about decoding into non-default value")
171 | }
172 | }
173 |
--------------------------------------------------------------------------------
/src/labrpc/test_test.go:
--------------------------------------------------------------------------------
1 | package labrpc
2 |
3 | import "testing"
4 | import "strconv"
5 | import "sync"
6 | import "runtime"
7 | import "time"
8 | import "fmt"
9 |
10 | type JunkArgs struct {
11 | X int
12 | }
13 | type JunkReply struct {
14 | X string
15 | }
16 |
17 | type JunkServer struct {
18 | mu sync.Mutex
19 | log1 []string
20 | log2 []int
21 | }
22 |
23 | func (js *JunkServer) Handler1(args string, reply *int) {
24 | js.mu.Lock()
25 | defer js.mu.Unlock()
26 | js.log1 = append(js.log1, args)
27 | *reply, _ = strconv.Atoi(args)
28 | }
29 |
30 | func (js *JunkServer) Handler2(args int, reply *string) {
31 | js.mu.Lock()
32 | defer js.mu.Unlock()
33 | js.log2 = append(js.log2, args)
34 | *reply = "handler2-" + strconv.Itoa(args)
35 | }
36 |
37 | func (js *JunkServer) Handler3(args int, reply *int) {
38 | js.mu.Lock()
39 | defer js.mu.Unlock()
40 | time.Sleep(20 * time.Second)
41 | *reply = -args
42 | }
43 |
44 | // args is a pointer
45 | func (js *JunkServer) Handler4(args *JunkArgs, reply *JunkReply) {
46 | reply.X = "pointer"
47 | }
48 |
49 | // args is a not pointer
50 | func (js *JunkServer) Handler5(args JunkArgs, reply *JunkReply) {
51 | reply.X = "no pointer"
52 | }
53 |
54 | func (js *JunkServer) Handler6(args string, reply *int) {
55 | js.mu.Lock()
56 | defer js.mu.Unlock()
57 | *reply = len(args)
58 | }
59 |
60 | func (js *JunkServer) Handler7(args int, reply *string) {
61 | js.mu.Lock()
62 | defer js.mu.Unlock()
63 | *reply = ""
64 | for i := 0; i < args; i++ {
65 | *reply = *reply + "y"
66 | }
67 | }
68 |
69 | func TestBasic(t *testing.T) {
70 | runtime.GOMAXPROCS(4)
71 |
72 | rn := MakeNetwork()
73 | defer rn.Cleanup()
74 |
75 | e := rn.MakeEnd("end1-99")
76 |
77 | js := &JunkServer{}
78 | svc := MakeService(js)
79 |
80 | rs := MakeServer()
81 | rs.AddService(svc)
82 | rn.AddServer("server99", rs)
83 |
84 | rn.Connect("end1-99", "server99")
85 | rn.Enable("end1-99", true)
86 |
87 | {
88 | reply := ""
89 | e.Call("JunkServer.Handler2", 111, &reply)
90 | if reply != "handler2-111" {
91 | t.Fatalf("wrong reply from Handler2")
92 | }
93 | }
94 |
95 | {
96 | reply := 0
97 | e.Call("JunkServer.Handler1", "9099", &reply)
98 | if reply != 9099 {
99 | t.Fatalf("wrong reply from Handler1")
100 | }
101 | }
102 | }
103 |
104 | func TestTypes(t *testing.T) {
105 | runtime.GOMAXPROCS(4)
106 |
107 | rn := MakeNetwork()
108 | defer rn.Cleanup()
109 |
110 | e := rn.MakeEnd("end1-99")
111 |
112 | js := &JunkServer{}
113 | svc := MakeService(js)
114 |
115 | rs := MakeServer()
116 | rs.AddService(svc)
117 | rn.AddServer("server99", rs)
118 |
119 | rn.Connect("end1-99", "server99")
120 | rn.Enable("end1-99", true)
121 |
122 | {
123 | var args JunkArgs
124 | var reply JunkReply
125 | // args must match type (pointer or not) of handler.
126 | e.Call("JunkServer.Handler4", &args, &reply)
127 | if reply.X != "pointer" {
128 | t.Fatalf("wrong reply from Handler4")
129 | }
130 | }
131 |
132 | {
133 | var args JunkArgs
134 | var reply JunkReply
135 | // args must match type (pointer or not) of handler.
136 | e.Call("JunkServer.Handler5", args, &reply)
137 | if reply.X != "no pointer" {
138 | t.Fatalf("wrong reply from Handler5")
139 | }
140 | }
141 | }
142 |
143 | //
144 | // does net.Enable(endname, false) really disconnect a client?
145 | //
146 | func TestDisconnect(t *testing.T) {
147 | runtime.GOMAXPROCS(4)
148 |
149 | rn := MakeNetwork()
150 | defer rn.Cleanup()
151 |
152 | e := rn.MakeEnd("end1-99")
153 |
154 | js := &JunkServer{}
155 | svc := MakeService(js)
156 |
157 | rs := MakeServer()
158 | rs.AddService(svc)
159 | rn.AddServer("server99", rs)
160 |
161 | rn.Connect("end1-99", "server99")
162 |
163 | {
164 | reply := ""
165 | e.Call("JunkServer.Handler2", 111, &reply)
166 | if reply != "" {
167 | t.Fatalf("unexpected reply from Handler2")
168 | }
169 | }
170 |
171 | rn.Enable("end1-99", true)
172 |
173 | {
174 | reply := 0
175 | e.Call("JunkServer.Handler1", "9099", &reply)
176 | if reply != 9099 {
177 | t.Fatalf("wrong reply from Handler1")
178 | }
179 | }
180 | }
181 |
182 | //
183 | // test net.GetCount()
184 | //
185 | func TestCounts(t *testing.T) {
186 | runtime.GOMAXPROCS(4)
187 |
188 | rn := MakeNetwork()
189 | defer rn.Cleanup()
190 |
191 | e := rn.MakeEnd("end1-99")
192 |
193 | js := &JunkServer{}
194 | svc := MakeService(js)
195 |
196 | rs := MakeServer()
197 | rs.AddService(svc)
198 | rn.AddServer(99, rs)
199 |
200 | rn.Connect("end1-99", 99)
201 | rn.Enable("end1-99", true)
202 |
203 | for i := 0; i < 17; i++ {
204 | reply := ""
205 | e.Call("JunkServer.Handler2", i, &reply)
206 | wanted := "handler2-" + strconv.Itoa(i)
207 | if reply != wanted {
208 | t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted)
209 | }
210 | }
211 |
212 | n := rn.GetCount(99)
213 | if n != 17 {
214 | t.Fatalf("wrong GetCount() %v, expected 17\n", n)
215 | }
216 | }
217 |
218 | //
219 | // test net.GetTotalBytes()
220 | //
221 | func TestBytes(t *testing.T) {
222 | runtime.GOMAXPROCS(4)
223 |
224 | rn := MakeNetwork()
225 | defer rn.Cleanup()
226 |
227 | e := rn.MakeEnd("end1-99")
228 |
229 | js := &JunkServer{}
230 | svc := MakeService(js)
231 |
232 | rs := MakeServer()
233 | rs.AddService(svc)
234 | rn.AddServer(99, rs)
235 |
236 | rn.Connect("end1-99", 99)
237 | rn.Enable("end1-99", true)
238 |
239 | for i := 0; i < 17; i++ {
240 | args := "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
241 | args = args + args
242 | args = args + args
243 | reply := 0
244 | e.Call("JunkServer.Handler6", args, &reply)
245 | wanted := len(args)
246 | if reply != wanted {
247 | t.Fatalf("wrong reply %v from Handler6, expecting %v", reply, wanted)
248 | }
249 | }
250 |
251 | n := rn.GetTotalBytes()
252 | if n < 4828 || n > 6000 {
253 | t.Fatalf("wrong GetTotalBytes() %v, expected about 5000\n", n)
254 | }
255 |
256 | for i := 0; i < 17; i++ {
257 | args := 107
258 | reply := ""
259 | e.Call("JunkServer.Handler7", args, &reply)
260 | wanted := args
261 | if len(reply) != wanted {
262 | t.Fatalf("wrong reply len=%v from Handler6, expecting %v", len(reply), wanted)
263 | }
264 | }
265 |
266 | nn := rn.GetTotalBytes() - n
267 | if nn < 1800 || nn > 2500 {
268 | t.Fatalf("wrong GetTotalBytes() %v, expected about 2000\n", nn)
269 | }
270 | }
271 |
272 | //
273 | // test RPCs from concurrent ClientEnds
274 | //
275 | func TestConcurrentMany(t *testing.T) {
276 | runtime.GOMAXPROCS(4)
277 |
278 | rn := MakeNetwork()
279 | defer rn.Cleanup()
280 |
281 | js := &JunkServer{}
282 | svc := MakeService(js)
283 |
284 | rs := MakeServer()
285 | rs.AddService(svc)
286 | rn.AddServer(1000, rs)
287 |
288 | ch := make(chan int)
289 |
290 | nclients := 20
291 | nrpcs := 10
292 | for ii := 0; ii < nclients; ii++ {
293 | go func(i int) {
294 | n := 0
295 | defer func() { ch <- n }()
296 |
297 | e := rn.MakeEnd(i)
298 | rn.Connect(i, 1000)
299 | rn.Enable(i, true)
300 |
301 | for j := 0; j < nrpcs; j++ {
302 | arg := i*100 + j
303 | reply := ""
304 | e.Call("JunkServer.Handler2", arg, &reply)
305 | wanted := "handler2-" + strconv.Itoa(arg)
306 | if reply != wanted {
307 | t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted)
308 | }
309 | n += 1
310 | }
311 | }(ii)
312 | }
313 |
314 | total := 0
315 | for ii := 0; ii < nclients; ii++ {
316 | x := <-ch
317 | total += x
318 | }
319 |
320 | if total != nclients*nrpcs {
321 | t.Fatalf("wrong number of RPCs completed, got %v, expected %v", total, nclients*nrpcs)
322 | }
323 |
324 | n := rn.GetCount(1000)
325 | if n != total {
326 | t.Fatalf("wrong GetCount() %v, expected %v\n", n, total)
327 | }
328 | }
329 |
330 | //
331 | // test unreliable
332 | //
333 | func TestUnreliable(t *testing.T) {
334 | runtime.GOMAXPROCS(4)
335 |
336 | rn := MakeNetwork()
337 | defer rn.Cleanup()
338 | rn.Reliable(false)
339 |
340 | js := &JunkServer{}
341 | svc := MakeService(js)
342 |
343 | rs := MakeServer()
344 | rs.AddService(svc)
345 | rn.AddServer(1000, rs)
346 |
347 | ch := make(chan int)
348 |
349 | nclients := 300
350 | for ii := 0; ii < nclients; ii++ {
351 | go func(i int) {
352 | n := 0
353 | defer func() { ch <- n }()
354 |
355 | e := rn.MakeEnd(i)
356 | rn.Connect(i, 1000)
357 | rn.Enable(i, true)
358 |
359 | arg := i * 100
360 | reply := ""
361 | ok := e.Call("JunkServer.Handler2", arg, &reply)
362 | if ok {
363 | wanted := "handler2-" + strconv.Itoa(arg)
364 | if reply != wanted {
365 | t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted)
366 | }
367 | n += 1
368 | }
369 | }(ii)
370 | }
371 |
372 | total := 0
373 | for ii := 0; ii < nclients; ii++ {
374 | x := <-ch
375 | total += x
376 | }
377 |
378 | if total == nclients || total == 0 {
379 | t.Fatalf("all RPCs succeeded despite unreliable")
380 | }
381 | }
382 |
383 | //
384 | // test concurrent RPCs from a single ClientEnd
385 | //
386 | func TestConcurrentOne(t *testing.T) {
387 | runtime.GOMAXPROCS(4)
388 |
389 | rn := MakeNetwork()
390 | defer rn.Cleanup()
391 |
392 | js := &JunkServer{}
393 | svc := MakeService(js)
394 |
395 | rs := MakeServer()
396 | rs.AddService(svc)
397 | rn.AddServer(1000, rs)
398 |
399 | e := rn.MakeEnd("c")
400 | rn.Connect("c", 1000)
401 | rn.Enable("c", true)
402 |
403 | ch := make(chan int)
404 |
405 | nrpcs := 20
406 | for ii := 0; ii < nrpcs; ii++ {
407 | go func(i int) {
408 | n := 0
409 | defer func() { ch <- n }()
410 |
411 | arg := 100 + i
412 | reply := ""
413 | e.Call("JunkServer.Handler2", arg, &reply)
414 | wanted := "handler2-" + strconv.Itoa(arg)
415 | if reply != wanted {
416 | t.Fatalf("wrong reply %v from Handler2, expecting %v", reply, wanted)
417 | }
418 | n += 1
419 | }(ii)
420 | }
421 |
422 | total := 0
423 | for ii := 0; ii < nrpcs; ii++ {
424 | x := <-ch
425 | total += x
426 | }
427 |
428 | if total != nrpcs {
429 | t.Fatalf("wrong number of RPCs completed, got %v, expected %v", total, nrpcs)
430 | }
431 |
432 | js.mu.Lock()
433 | defer js.mu.Unlock()
434 | if len(js.log2) != nrpcs {
435 | t.Fatalf("wrong number of RPCs delivered")
436 | }
437 |
438 | n := rn.GetCount(1000)
439 | if n != total {
440 | t.Fatalf("wrong GetCount() %v, expected %v\n", n, total)
441 | }
442 | }
443 |
444 | //
445 | // regression: an RPC that's delayed during Enabled=false
446 | // should not delay subsequent RPCs (e.g. after Enabled=true).
447 | //
448 | func TestRegression1(t *testing.T) {
449 | runtime.GOMAXPROCS(4)
450 |
451 | rn := MakeNetwork()
452 | defer rn.Cleanup()
453 |
454 | js := &JunkServer{}
455 | svc := MakeService(js)
456 |
457 | rs := MakeServer()
458 | rs.AddService(svc)
459 | rn.AddServer(1000, rs)
460 |
461 | e := rn.MakeEnd("c")
462 | rn.Connect("c", 1000)
463 |
464 | // start some RPCs while the ClientEnd is disabled.
465 | // they'll be delayed.
466 | rn.Enable("c", false)
467 | ch := make(chan bool)
468 | nrpcs := 20
469 | for ii := 0; ii < nrpcs; ii++ {
470 | go func(i int) {
471 | ok := false
472 | defer func() { ch <- ok }()
473 |
474 | arg := 100 + i
475 | reply := ""
476 | // this call ought to return false.
477 | e.Call("JunkServer.Handler2", arg, &reply)
478 | ok = true
479 | }(ii)
480 | }
481 |
482 | time.Sleep(100 * time.Millisecond)
483 |
484 | // now enable the ClientEnd and check that an RPC completes quickly.
485 | t0 := time.Now()
486 | rn.Enable("c", true)
487 | {
488 | arg := 99
489 | reply := ""
490 | e.Call("JunkServer.Handler2", arg, &reply)
491 | wanted := "handler2-" + strconv.Itoa(arg)
492 | if reply != wanted {
493 | t.Fatalf("wrong reply %v from Handler2, expecting %v", reply, wanted)
494 | }
495 | }
496 | dur := time.Since(t0).Seconds()
497 |
498 | if dur > 0.03 {
499 | t.Fatalf("RPC took too long (%v) after Enable", dur)
500 | }
501 |
502 | for ii := 0; ii < nrpcs; ii++ {
503 | <-ch
504 | }
505 |
506 | js.mu.Lock()
507 | defer js.mu.Unlock()
508 | if len(js.log2) != 1 {
509 | t.Fatalf("wrong number (%v) of RPCs delivered, expected 1", len(js.log2))
510 | }
511 |
512 | n := rn.GetCount(1000)
513 | if n != 1 {
514 | t.Fatalf("wrong GetCount() %v, expected %v\n", n, 1)
515 | }
516 | }
517 |
518 | //
519 | // if an RPC is stuck in a server, and the server
520 | // is killed with DeleteServer(), does the RPC
521 | // get un-stuck?
522 | //
523 | func TestKilled(t *testing.T) {
524 | runtime.GOMAXPROCS(4)
525 |
526 | rn := MakeNetwork()
527 | defer rn.Cleanup()
528 |
529 | e := rn.MakeEnd("end1-99")
530 |
531 | js := &JunkServer{}
532 | svc := MakeService(js)
533 |
534 | rs := MakeServer()
535 | rs.AddService(svc)
536 | rn.AddServer("server99", rs)
537 |
538 | rn.Connect("end1-99", "server99")
539 | rn.Enable("end1-99", true)
540 |
541 | doneCh := make(chan bool)
542 | go func() {
543 | reply := 0
544 | ok := e.Call("JunkServer.Handler3", 99, &reply)
545 | doneCh <- ok
546 | }()
547 |
548 | time.Sleep(1000 * time.Millisecond)
549 |
550 | select {
551 | case <-doneCh:
552 | t.Fatalf("Handler3 should not have returned yet")
553 | case <-time.After(100 * time.Millisecond):
554 | }
555 |
556 | rn.DeleteServer("server99")
557 |
558 | select {
559 | case x := <-doneCh:
560 | if x != false {
561 | t.Fatalf("Handler3 returned successfully despite DeleteServer()")
562 | }
563 | case <-time.After(100 * time.Millisecond):
564 | t.Fatalf("Handler3 should return after DeleteServer()")
565 | }
566 | }
567 |
568 | func TestBenchmark(t *testing.T) {
569 | runtime.GOMAXPROCS(4)
570 |
571 | rn := MakeNetwork()
572 | defer rn.Cleanup()
573 |
574 | e := rn.MakeEnd("end1-99")
575 |
576 | js := &JunkServer{}
577 | svc := MakeService(js)
578 |
579 | rs := MakeServer()
580 | rs.AddService(svc)
581 | rn.AddServer("server99", rs)
582 |
583 | rn.Connect("end1-99", "server99")
584 | rn.Enable("end1-99", true)
585 |
586 | t0 := time.Now()
587 | n := 100000
588 | for iters := 0; iters < n; iters++ {
589 | reply := ""
590 | e.Call("JunkServer.Handler2", 111, &reply)
591 | if reply != "handler2-111" {
592 | t.Fatalf("wrong reply from Handler2")
593 | }
594 | }
595 | fmt.Printf("%v for %v\n", time.Since(t0), n)
596 | // march 2016, rtm laptop, 22 microseconds per RPC
597 | }
598 |
--------------------------------------------------------------------------------
/src/main/.gitignore:
--------------------------------------------------------------------------------
1 | mr-tmp
2 | mr-*[^go]
3 | *.so
4 |
5 | mrsequential
6 | mrworker
7 | mrmaster
8 |
--------------------------------------------------------------------------------
/src/main/build-wc.sh:
--------------------------------------------------------------------------------
1 | go build -buildmode=plugin ../mrapps/wc.go
2 |
--------------------------------------------------------------------------------
/src/main/diskvd.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | //
4 | // start a diskvd server. it's a member of some replica
5 | // group, which has other members, and it needs to know
6 | // how to talk to the members of the shardmaster service.
7 | // used by ../diskv/test_test.go
8 | //
9 | // arguments:
10 | // -g groupid
11 | // -m masterport1 -m masterport2 ...
12 | // -s replicaport1 -s replicaport2 ...
13 | // -i my-index-in-server-port-list
14 | // -u unreliable
15 | // -d directory
16 | // -r restart
17 |
18 | import "time"
19 | import "diskv"
20 | import "os"
21 | import "fmt"
22 | import "strconv"
23 | import "runtime"
24 |
25 | func usage() {
26 | fmt.Printf("Usage: diskvd -g gid -m master... -s server... -i my-index -d dir\n")
27 | os.Exit(1)
28 | }
29 |
30 | func main() {
31 | var gid int64 = -1 // my replica group ID
32 | masters := []string{} // ports of shardmasters
33 | replicas := []string{} // ports of servers in my replica group
34 | me := -1 // my index in replicas[]
35 | unreliable := false
36 | dir := "" // store persistent data here
37 | restart := false
38 |
39 | for i := 1; i+1 < len(os.Args); i += 2 {
40 | a0 := os.Args[i]
41 | a1 := os.Args[i+1]
42 | if a0 == "-g" {
43 | gid, _ = strconv.ParseInt(a1, 10, 64)
44 | } else if a0 == "-m" {
45 | masters = append(masters, a1)
46 | } else if a0 == "-s" {
47 | replicas = append(replicas, a1)
48 | } else if a0 == "-i" {
49 | me, _ = strconv.Atoi(a1)
50 | } else if a0 == "-u" {
51 | unreliable, _ = strconv.ParseBool(a1)
52 | } else if a0 == "-d" {
53 | dir = a1
54 | } else if a0 == "-r" {
55 | restart, _ = strconv.ParseBool(a1)
56 | } else {
57 | usage()
58 | }
59 | }
60 |
61 | if gid < 0 || me < 0 || len(masters) < 1 || me >= len(replicas) || dir == "" {
62 | usage()
63 | }
64 |
65 | runtime.GOMAXPROCS(4)
66 |
67 | srv := diskv.StartServer(gid, masters, replicas, me, dir, restart)
68 | srv.Setunreliable(unreliable)
69 |
70 | // for safety, force quit after 10 minutes.
71 | time.Sleep(10 * 60 * time.Second)
72 | mep, _ := os.FindProcess(os.Getpid())
73 | mep.Kill()
74 | }
75 |
--------------------------------------------------------------------------------
/src/main/lockc.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | //
4 | // see comments in lockd.go
5 | //
6 |
7 | import "lockservice"
8 | import "os"
9 | import "fmt"
10 |
11 | func usage() {
12 | fmt.Printf("Usage: lockc -l|-u primaryport backupport lockname\n")
13 | os.Exit(1)
14 | }
15 |
16 | func main() {
17 | if len(os.Args) == 5 {
18 | ck := lockservice.MakeClerk(os.Args[2], os.Args[3])
19 | var ok bool
20 | if os.Args[1] == "-l" {
21 | ok = ck.Lock(os.Args[4])
22 | } else if os.Args[1] == "-u" {
23 | ok = ck.Unlock(os.Args[4])
24 | } else {
25 | usage()
26 | }
27 | fmt.Printf("reply: %v\n", ok)
28 | } else {
29 | usage()
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/src/main/lockd.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | // export GOPATH=~/6.824
4 | // go build lockd.go
5 | // go build lockc.go
6 | // ./lockd -p a b &
7 | // ./lockd -b a b &
8 | // ./lockc -l a b lx
9 | // ./lockc -u a b lx
10 | //
11 | // on Athena, use /tmp/myname-a and /tmp/myname-b
12 | // instead of a and b.
13 |
14 | import "time"
15 | import "lockservice"
16 | import "os"
17 | import "fmt"
18 |
19 | func main() {
20 | if len(os.Args) == 4 && os.Args[1] == "-p" {
21 | lockservice.StartServer(os.Args[2], os.Args[3], true)
22 | } else if len(os.Args) == 4 && os.Args[1] == "-b" {
23 | lockservice.StartServer(os.Args[2], os.Args[3], false)
24 | } else {
25 | fmt.Printf("Usage: lockd -p|-b primaryport backupport\n")
26 | os.Exit(1)
27 | }
28 | for {
29 | time.Sleep(100 * time.Second)
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/src/main/mrmaster.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | //
4 | // start the master process, which is implemented
5 | // in ../mr/master.go
6 | //
7 | // go run mrmaster.go pg*.txt
8 | //
9 | // Please do not change this file.
10 | //
11 |
12 | import "mr"
13 | import "time"
14 | import "os"
15 | import "fmt"
16 |
17 | func main() {
18 | if len(os.Args) < 2 {
19 | fmt.Fprintf(os.Stderr, "Usage: mrmaster inputfiles...\n")
20 | os.Exit(1)
21 | }
22 |
23 | m := mr.MakeMaster(os.Args[1:], 10)
24 | for m.Done() == false {
25 | time.Sleep(time.Second)
26 | }
27 |
28 | time.Sleep(time.Second)
29 | }
30 |
--------------------------------------------------------------------------------
/src/main/mrsequential.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | //
4 | // simple sequential MapReduce.
5 | //
6 | // go run mrsequential.go wc.so pg*.txt
7 | //
8 |
9 | import "fmt"
10 | import "mr"
11 | import "plugin"
12 | import "os"
13 | import "log"
14 | import "io/ioutil"
15 | import "sort"
16 |
17 | // for sorting by key.
18 | type ByKey []mr.KeyValue
19 |
20 | // for sorting by key.
21 | func (a ByKey) Len() int { return len(a) }
22 | func (a ByKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
23 | func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key }
24 |
25 | func main() {
26 | if len(os.Args) < 3 {
27 | fmt.Fprintf(os.Stderr, "Usage: mrsequential xxx.so inputfiles...\n")
28 | os.Exit(1)
29 | }
30 |
31 | mapf, reducef := loadPlugin(os.Args[1])
32 |
33 | //
34 | // read each input file,
35 | // pass it to Map,
36 | // accumulate the intermediate Map output.
37 | //
38 | intermediate := []mr.KeyValue{}
39 | for _, filename := range os.Args[2:] {
40 | file, err := os.Open(filename)
41 | if err != nil {
42 | log.Fatalf("cannot open %v", filename)
43 | }
44 | content, err := ioutil.ReadAll(file)
45 | if err != nil {
46 | log.Fatalf("cannot read %v", filename)
47 | }
48 | file.Close()
49 | kva := mapf(filename, string(content))
50 | intermediate = append(intermediate, kva...)
51 | }
52 |
53 | //
54 | // a big difference from real MapReduce is that all the
55 | // intermediate data is in one place, intermediate[],
56 | // rather than being partitioned into NxM buckets.
57 | //
58 |
59 | sort.Sort(ByKey(intermediate))
60 |
61 | oname := "mr-out-0"
62 | ofile, _ := os.Create(oname)
63 |
64 | //
65 | // call Reduce on each distinct key in intermediate[],
66 | // and print the result to mr-out-0.
67 | //
68 | i := 0
69 | for i < len(intermediate) {
70 | j := i + 1
71 | for j < len(intermediate) && intermediate[j].Key == intermediate[i].Key {
72 | j++
73 | }
74 | values := []string{}
75 | for k := i; k < j; k++ {
76 | values = append(values, intermediate[k].Value)
77 | }
78 | output := reducef(intermediate[i].Key, values)
79 |
80 | // this is the correct format for each line of Reduce output.
81 | fmt.Fprintf(ofile, "%v %v\n", intermediate[i].Key, output)
82 |
83 | i = j
84 | }
85 |
86 | ofile.Close()
87 | }
88 |
89 | //
90 | // load the application Map and Reduce functions
91 | // from a plugin file, e.g. ../mrapps/wc.so
92 | //
93 | func loadPlugin(filename string) (func(string, string) []mr.KeyValue, func(string, []string) string) {
94 | p, err := plugin.Open(filename)
95 | if err != nil {
96 | log.Fatalf("cannot load plugin %v", filename)
97 | }
98 | xmapf, err := p.Lookup("Map")
99 | if err != nil {
100 | log.Fatalf("cannot find Map in %v", filename)
101 | }
102 | mapf := xmapf.(func(string, string) []mr.KeyValue)
103 | xreducef, err := p.Lookup("Reduce")
104 | if err != nil {
105 | log.Fatalf("cannot find Reduce in %v", filename)
106 | }
107 | reducef := xreducef.(func(string, []string) string)
108 |
109 | return mapf, reducef
110 | }
111 |
--------------------------------------------------------------------------------
/src/main/mrworker.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | //
4 | // start a worker process, which is implemented
5 | // in ../mr/worker.go. typically there will be
6 | // multiple worker processes, talking to one master.
7 | //
8 | // go run mrworker.go wc.so
9 | //
10 | // Please do not change this file.
11 | //
12 |
13 | import "mr"
14 | import "plugin"
15 | import "os"
16 | import "fmt"
17 | import "log"
18 |
19 | func main() {
20 | if len(os.Args) != 2 {
21 | fmt.Fprintf(os.Stderr, "Usage: mrworker xxx.so\n")
22 | os.Exit(1)
23 | }
24 |
25 | mapf, reducef := loadPlugin(os.Args[1])
26 |
27 | mr.Worker(mapf, reducef)
28 | }
29 |
30 | //
31 | // load the application Map and Reduce functions
32 | // from a plugin file, e.g. ../mrapps/wc.so
33 | //
34 | func loadPlugin(filename string) (func(string, string) []mr.KeyValue, func(string, []string) string) {
35 | p, err := plugin.Open(filename)
36 | if err != nil {
37 | log.Fatalf("cannot load plugin %v", filename)
38 | }
39 | xmapf, err := p.Lookup("Map")
40 | if err != nil {
41 | log.Fatalf("cannot find Map in %v", filename)
42 | }
43 | mapf := xmapf.(func(string, string) []mr.KeyValue)
44 | xreducef, err := p.Lookup("Reduce")
45 | if err != nil {
46 | log.Fatalf("cannot find Reduce in %v", filename)
47 | }
48 | reducef := xreducef.(func(string, []string) string)
49 |
50 | return mapf, reducef
51 | }
52 |
--------------------------------------------------------------------------------
/src/main/pbc.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | //
4 | // pbservice client application
5 | //
6 | // export GOPATH=~/6.824
7 | // go build viewd.go
8 | // go build pbd.go
9 | // go build pbc.go
10 | // ./viewd /tmp/rtm-v &
11 | // ./pbd /tmp/rtm-v /tmp/rtm-1 &
12 | // ./pbd /tmp/rtm-v /tmp/rtm-2 &
13 | // ./pbc /tmp/rtm-v key1 value1
14 | // ./pbc /tmp/rtm-v key1
15 | //
16 | // change "rtm" to your user name.
17 | // start the pbd programs in separate windows and kill
18 | // and restart them to exercise fault tolerance.
19 | //
20 |
21 | import "pbservice"
22 | import "os"
23 | import "fmt"
24 |
25 | func usage() {
26 | fmt.Printf("Usage: pbc viewport key\n")
27 | fmt.Printf(" pbc viewport key value\n")
28 | os.Exit(1)
29 | }
30 |
31 | func main() {
32 | if len(os.Args) == 3 {
33 | // get
34 | ck := pbservice.MakeClerk(os.Args[1], "")
35 | v := ck.Get(os.Args[2])
36 | fmt.Printf("%v\n", v)
37 | } else if len(os.Args) == 4 {
38 | // put
39 | ck := pbservice.MakeClerk(os.Args[1], "")
40 | ck.Put(os.Args[2], os.Args[3])
41 | } else {
42 | usage()
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/pbd.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | //
4 | // see directions in pbc.go
5 | //
6 |
7 | import "time"
8 | import "pbservice"
9 | import "os"
10 | import "fmt"
11 |
12 | func main() {
13 | if len(os.Args) != 3 {
14 | fmt.Printf("Usage: pbd viewport myport\n")
15 | os.Exit(1)
16 | }
17 |
18 | pbservice.StartServer(os.Args[1], os.Args[2])
19 |
20 | for {
21 | time.Sleep(100 * time.Second)
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/src/main/test-mr.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | #
4 | # basic map-reduce test
5 | #
6 |
7 | RACE=
8 |
9 | # uncomment this to run the tests with the Go race detector.
10 | #RACE=-race
11 |
12 | # run the test in a fresh sub-directory.
13 | rm -rf mr-tmp
14 | mkdir mr-tmp || exit 1
15 | cd mr-tmp || exit 1
16 | rm -f mr-*
17 |
18 | # make sure software is freshly built.
19 | (cd ../../mrapps && go build $RACE -buildmode=plugin wc.go) || exit 1
20 | (cd ../../mrapps && go build $RACE -buildmode=plugin indexer.go) || exit 1
21 | (cd ../../mrapps && go build $RACE -buildmode=plugin mtiming.go) || exit 1
22 | (cd ../../mrapps && go build $RACE -buildmode=plugin rtiming.go) || exit 1
23 | (cd ../../mrapps && go build $RACE -buildmode=plugin crash.go) || exit 1
24 | (cd ../../mrapps && go build $RACE -buildmode=plugin nocrash.go) || exit 1
25 | (cd .. && go build $RACE mrmaster.go) || exit 1
26 | (cd .. && go build $RACE mrworker.go) || exit 1
27 | (cd .. && go build $RACE mrsequential.go) || exit 1
28 |
29 | failed_any=0
30 |
31 | # first word-count
32 |
33 | # generate the correct output
34 | ../mrsequential ../../mrapps/wc.so ../pg*txt || exit 1
35 | sort mr-out-0 > mr-correct-wc.txt
36 | rm -f mr-out*
37 |
38 | echo '***' Starting wc test.
39 |
40 | timeout -k 2s 180s ../mrmaster ../pg*txt &
41 |
42 | # give the master time to create the sockets.
43 | sleep 1
44 |
45 | # start multiple workers.
46 | timeout -k 2s 180s ../mrworker ../../mrapps/wc.so &
47 | timeout -k 2s 180s ../mrworker ../../mrapps/wc.so &
48 | timeout -k 2s 180s ../mrworker ../../mrapps/wc.so &
49 |
50 | # wait for one of the processes to exit.
51 | # under bash, this waits for all processes,
52 | # including the master.
53 | wait
54 |
55 | # the master or a worker has exited. since workers are required
56 | # to exit when a job is completely finished, and not before,
57 | # that means the job has finished.
58 |
59 | sort mr-out* | grep . > mr-wc-all
60 | if cmp mr-wc-all mr-correct-wc.txt
61 | then
62 | echo '---' wc test: PASS
63 | else
64 | echo '---' wc output is not the same as mr-correct-wc.txt
65 | echo '---' wc test: FAIL
66 | failed_any=1
67 | fi
68 |
69 | # wait for remaining workers and master to exit.
70 | wait ; wait ; wait
71 |
72 | # now indexer
73 | rm -f mr-*
74 |
75 | # generate the correct output
76 | ../mrsequential ../../mrapps/indexer.so ../pg*txt || exit 1
77 | sort mr-out-0 > mr-correct-indexer.txt
78 | rm -f mr-out*
79 |
80 | echo '***' Starting indexer test.
81 |
82 | timeout -k 2s 180s ../mrmaster ../pg*txt &
83 | sleep 1
84 |
85 | # start multiple workers
86 | timeout -k 2s 180s ../mrworker ../../mrapps/indexer.so &
87 | timeout -k 2s 180s ../mrworker ../../mrapps/indexer.so
88 |
89 | sort mr-out* | grep . > mr-indexer-all
90 | if cmp mr-indexer-all mr-correct-indexer.txt
91 | then
92 | echo '---' indexer test: PASS
93 | else
94 | echo '---' indexer output is not the same as mr-correct-indexer.txt
95 | echo '---' indexer test: FAIL
96 | failed_any=1
97 | fi
98 |
99 | wait ; wait
100 |
101 |
102 | echo '***' Starting map parallelism test.
103 |
104 | rm -f mr-out* mr-worker*
105 |
106 | timeout -k 2s 180s ../mrmaster ../pg*txt &
107 | sleep 1
108 |
109 | timeout -k 2s 180s ../mrworker ../../mrapps/mtiming.so &
110 | timeout -k 2s 180s ../mrworker ../../mrapps/mtiming.so
111 |
112 | NT=`cat mr-out* | grep '^times-' | wc -l | sed 's/ //g'`
113 | if [ "$NT" != "2" ]
114 | then
115 | echo '---' saw "$NT" workers rather than 2
116 | echo '---' map parallelism test: FAIL
117 | failed_any=1
118 | fi
119 |
120 | if cat mr-out* | grep '^parallel.* 2' > /dev/null
121 | then
122 | echo '---' map parallelism test: PASS
123 | else
124 | echo '---' map workers did not run in parallel
125 | echo '---' map parallelism test: FAIL
126 | failed_any=1
127 | fi
128 |
129 | wait ; wait
130 |
131 |
132 | echo '***' Starting reduce parallelism test.
133 |
134 | rm -f mr-out* mr-worker*
135 |
136 | timeout -k 2s 180s ../mrmaster ../pg*txt &
137 | sleep 1
138 |
139 | timeout -k 2s 180s ../mrworker ../../mrapps/rtiming.so &
140 | timeout -k 2s 180s ../mrworker ../../mrapps/rtiming.so
141 |
142 | NT=`cat mr-out* | grep '^[a-z] 2' | wc -l | sed 's/ //g'`
143 | if [ "$NT" -lt "2" ]
144 | then
145 | echo '---' too few parallel reduces.
146 | echo '---' reduce parallelism test: FAIL
147 | failed_any=1
148 | else
149 | echo '---' reduce parallelism test: PASS
150 | fi
151 |
152 | wait ; wait
153 |
154 |
155 | # generate the correct output
156 | ../mrsequential ../../mrapps/nocrash.so ../pg*txt || exit 1
157 | sort mr-out-0 > mr-correct-crash.txt
158 | rm -f mr-out*
159 |
160 | echo '***' Starting crash test.
161 |
162 | rm -f mr-done
163 | (timeout -k 2s 180s ../mrmaster ../pg*txt ; touch mr-done ) &
164 | sleep 1
165 |
166 | # start multiple workers
167 | timeout -k 2s 180s ../mrworker ../../mrapps/crash.so &
168 |
169 | # mimic rpc.go's masterSock()
170 | SOCKNAME=/var/tmp/824-mr-`id -u`
171 |
172 | ( while [ -e $SOCKNAME -a ! -f mr-done ]
173 | do
174 | timeout -k 2s 180s ../mrworker ../../mrapps/crash.so
175 | sleep 1
176 | done ) &
177 |
178 | ( while [ -e $SOCKNAME -a ! -f mr-done ]
179 | do
180 | timeout -k 2s 180s ../mrworker ../../mrapps/crash.so
181 | sleep 1
182 | done ) &
183 |
184 | while [ -e $SOCKNAME -a ! -f mr-done ]
185 | do
186 | timeout -k 2s 180s ../mrworker ../../mrapps/crash.so
187 | sleep 1
188 | done
189 |
190 | wait
191 | wait
192 | wait
193 |
194 | rm $SOCKNAME
195 | sort mr-out* | grep . > mr-crash-all
196 | if cmp mr-crash-all mr-correct-crash.txt
197 | then
198 | echo '---' crash test: PASS
199 | else
200 | echo '---' crash output is not the same as mr-correct-crash.txt
201 | echo '---' crash test: FAIL
202 | failed_any=1
203 | fi
204 |
205 | if [ $failed_any -eq 0 ]; then
206 | echo '***' PASSED ALL TESTS
207 | else
208 | echo '***' FAILED SOME TESTS
209 | exit 1
210 | fi
211 |
--------------------------------------------------------------------------------
/src/main/viewd.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | //
4 | // see directions in pbc.go
5 | //
6 |
7 | import "time"
8 | import "viewservice"
9 | import "os"
10 | import "fmt"
11 |
12 | func main() {
13 | if len(os.Args) != 2 {
14 | fmt.Printf("Usage: viewd port\n")
15 | os.Exit(1)
16 | }
17 |
18 | viewservice.StartServer(os.Args[1])
19 |
20 | for {
21 | time.Sleep(100 * time.Second)
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/src/models/kv.go:
--------------------------------------------------------------------------------
1 | package models
2 |
3 | import "../porcupine"
4 | import "fmt"
5 | import "sort"
6 |
7 | type KvInput struct {
8 | Op uint8 // 0 => get, 1 => put, 2 => append
9 | Key string
10 | Value string
11 | }
12 |
13 | type KvOutput struct {
14 | Value string
15 | }
16 |
17 | var KvModel = porcupine.Model{
18 | Partition: func(history []porcupine.Operation) [][]porcupine.Operation {
19 | m := make(map[string][]porcupine.Operation)
20 | for _, v := range history {
21 | key := v.Input.(KvInput).Key
22 | m[key] = append(m[key], v)
23 | }
24 | keys := make([]string, 0, len(m))
25 | for k := range m {
26 | keys = append(keys, k)
27 | }
28 | sort.Strings(keys)
29 | ret := make([][]porcupine.Operation, 0, len(keys))
30 | for _, k := range keys {
31 | ret = append(ret, m[k])
32 | }
33 | return ret
34 | },
35 | Init: func() interface{} {
36 | // note: we are modeling a single key's value here;
37 | // we're partitioning by key, so this is okay
38 | return ""
39 | },
40 | Step: func(state, input, output interface{}) (bool, interface{}) {
41 | inp := input.(KvInput)
42 | out := output.(KvOutput)
43 | st := state.(string)
44 | if inp.Op == 0 {
45 | // get
46 | return out.Value == st, state
47 | } else if inp.Op == 1 {
48 | // put
49 | return true, inp.Value
50 | } else {
51 | // append
52 | return true, (st + inp.Value)
53 | }
54 | },
55 | DescribeOperation: func(input, output interface{}) string {
56 | inp := input.(KvInput)
57 | out := output.(KvOutput)
58 | switch inp.Op {
59 | case 0:
60 | return fmt.Sprintf("get('%s') -> '%s'", inp.Key, out.Value)
61 | case 1:
62 | return fmt.Sprintf("put('%s', '%s')", inp.Key, inp.Value)
63 | case 2:
64 | return fmt.Sprintf("append('%s', '%s')", inp.Key, inp.Value)
65 | default:
66 | return ""
67 | }
68 | },
69 | }
70 |
--------------------------------------------------------------------------------
/src/mr/master.go:
--------------------------------------------------------------------------------
1 | package mr
2 |
3 | import (
4 | "fmt"
5 | "log"
6 | "sync"
7 | "time"
8 | )
9 | import "net"
10 | import "os"
11 | import "net/rpc"
12 | import "net/http"
13 |
14 | type TaskStat struct {
15 | beginTime time.Time
16 | fileName string
17 | fileIndex int
18 | partIndex int
19 | nReduce int
20 | nFiles int
21 | }
22 |
23 | type TaskStatInterface interface {
24 | GenerateTaskInfo() TaskInfo
25 | OutOfTime() bool
26 | GetFileIndex() int
27 | GetPartIndex() int
28 | SetNow()
29 | }
30 |
31 | type MapTaskStat struct {
32 | TaskStat
33 | }
34 |
35 | type ReduceTaskStat struct {
36 | TaskStat
37 | }
38 |
39 | func (this *MapTaskStat) GenerateTaskInfo() TaskInfo {
40 | return TaskInfo{
41 | State: TaskMap,
42 | FileName: this.fileName,
43 | FileIndex: this.fileIndex,
44 | PartIndex: this.partIndex,
45 | NReduce: this.nReduce,
46 | NFiles: this.nFiles,
47 | }
48 | }
49 |
50 | func (this *ReduceTaskStat) GenerateTaskInfo() TaskInfo {
51 | return TaskInfo{
52 | State: TaskReduce,
53 | FileName: this.fileName,
54 | FileIndex: this.fileIndex,
55 | PartIndex: this.partIndex,
56 | NReduce: this.nReduce,
57 | NFiles: this.nFiles,
58 | }
59 | }
60 |
61 | func (this *TaskStat) OutOfTime() bool {
62 | return time.Now().Sub(this.beginTime) > time.Duration(time.Second*60)
63 | }
64 |
65 | func (this *TaskStat) SetNow() {
66 | this.beginTime = time.Now()
67 | }
68 |
69 | func (this *TaskStat) GetFileIndex() int {
70 | return this.fileIndex
71 | }
72 |
73 | func (this *TaskStat) GetPartIndex() int {
74 | return this.partIndex
75 | }
76 |
77 | type TaskStatQueue struct {
78 | taskArray []TaskStatInterface
79 | mutex sync.Mutex
80 | }
81 |
82 | func (this *TaskStatQueue) lock() {
83 | this.mutex.Lock()
84 | }
85 |
86 | func (this *TaskStatQueue) unlock() {
87 | this.mutex.Unlock()
88 | }
89 |
90 | func (this *TaskStatQueue) Size() int {
91 | return len(this.taskArray)
92 | }
93 |
94 | func (this *TaskStatQueue) Pop() TaskStatInterface {
95 | this.lock()
96 | arrayLength := len(this.taskArray)
97 | if arrayLength == 0 {
98 | this.unlock()
99 | return nil
100 | }
101 | ret := this.taskArray[arrayLength-1]
102 | this.taskArray = this.taskArray[:arrayLength-1]
103 | this.unlock()
104 | return ret
105 | }
106 |
107 | func (this *TaskStatQueue) Push(taskStat TaskStatInterface) {
108 | this.lock()
109 | if taskStat == nil {
110 | this.unlock()
111 | return
112 | }
113 | this.taskArray = append(this.taskArray, taskStat)
114 | this.unlock()
115 | }
116 |
117 | func (this *TaskStatQueue) TimeOutQueue() []TaskStatInterface {
118 | outArray := make([]TaskStatInterface, 0)
119 | this.lock()
120 | for taskIndex := 0; taskIndex < len(this.taskArray); {
121 | taskStat := this.taskArray[taskIndex]
122 | if (taskStat).OutOfTime() {
123 | outArray = append(outArray, taskStat)
124 | this.taskArray = append(this.taskArray[:taskIndex], this.taskArray[taskIndex+1:]...)
125 | // must resume at this index next time
126 | } else {
127 | taskIndex++
128 | }
129 | }
130 | this.unlock()
131 | return outArray
132 | }
133 |
134 | func (this *TaskStatQueue) MoveAppend(rhs []TaskStatInterface) {
135 | this.lock()
136 | this.taskArray = append(this.taskArray, rhs...)
137 | rhs = make([]TaskStatInterface, 0)
138 | this.unlock()
139 | }
140 |
141 | func (this *TaskStatQueue) RemoveTask(fileIndex int, partIndex int) {
142 | this.lock()
143 | for index := 0; index < len(this.taskArray); {
144 | task := this.taskArray[index]
145 | if fileIndex == task.GetFileIndex() && partIndex == task.GetPartIndex() {
146 | this.taskArray = append(this.taskArray[:index], this.taskArray[index+1:]...)
147 | } else {
148 | index++
149 | }
150 | }
151 | this.unlock()
152 | }
153 |
154 | type Master struct {
155 | // Your definitions here.
156 |
157 | filenames []string
158 |
159 | // reduce task queue
160 | reduceTaskWaiting TaskStatQueue
161 | reduceTaskRunning TaskStatQueue
162 |
163 | // map task statistics
164 | mapTaskWaiting TaskStatQueue
165 | mapTaskRunning TaskStatQueue
166 |
167 | // machine state
168 | isDone bool
169 | nReduce int
170 | }
171 |
172 | // Your code here -- RPC handlers for the worker to call.
173 | /*
174 | func (this *Master) TryMap(args *TryMapArgs, reply *TryMapReply) error {
175 | if this.isMapped {
176 | reply.RunMap = false
177 | return nil
178 | }
179 | for this.isMapping {
180 | time.Sleep(time.Duration(1) * time.Second)
181 | }
182 | this.isMapped = false
183 | this.isMapping = true
184 | reply.RunMap = true
185 | return nil
186 | }
187 |
188 | func (this *Master) MapFinished(args *TryMapArgs, reply *ExampleReply) error {
189 | this.isMapping = false
190 | this.isMapped = true
191 | return nil
192 | }
193 | */
194 | func (this *Master) AskTask(args *ExampleArgs, reply *TaskInfo) error {
195 | if this.isDone {
196 | reply.State = TaskEnd
197 | return nil
198 | }
199 |
200 | // check for reduce tasks
201 | reduceTask := this.reduceTaskWaiting.Pop()
202 | if reduceTask != nil {
203 | // an available reduce task
204 | // record task begin time
205 | reduceTask.SetNow()
206 | // note task is running
207 | this.reduceTaskRunning.Push(reduceTask)
208 | // setup a reply
209 | *reply = reduceTask.GenerateTaskInfo()
210 | fmt.Printf("Distributing reduce task on part %v %vth file %v\n", reply.PartIndex, reply.FileIndex, reply.FileName)
211 | return nil
212 | }
213 |
214 | // check for map tasks
215 | mapTask := this.mapTaskWaiting.Pop()
216 | if mapTask != nil {
217 | // an available map task
218 | // record task begin time
219 | mapTask.SetNow()
220 | // note task is running
221 | this.mapTaskRunning.Push(mapTask)
222 | // setup a reply
223 | *reply = mapTask.GenerateTaskInfo()
224 | fmt.Printf("Distributing map task on %vth file %v\n", reply.FileIndex, reply.FileName)
225 | return nil
226 | }
227 |
228 | // all tasks distributed
229 | if this.mapTaskRunning.Size() > 0 || this.reduceTaskRunning.Size() > 0 {
230 | // must wait for new tasks
231 | reply.State = TaskWait
232 | return nil
233 | }
234 | // all tasks complete
235 | reply.State = TaskEnd
236 | this.isDone = true
237 | return nil
238 | }
239 |
240 | func (this *Master) distributeReduce() {
241 | reduceTask := ReduceTaskStat{
242 | TaskStat{
243 | fileIndex: 0,
244 | partIndex: 0,
245 | nReduce: this.nReduce,
246 | nFiles: len(this.filenames),
247 | },
248 | }
249 | for reduceIndex := 0; reduceIndex < this.nReduce; reduceIndex++ {
250 | task := reduceTask
251 | task.partIndex = reduceIndex
252 | this.reduceTaskWaiting.Push(&task)
253 | }
254 | }
255 |
256 | func (this *Master) TaskDone(args *TaskInfo, reply *ExampleReply) error {
257 | switch args.State {
258 | case TaskMap:
259 | fmt.Printf("Map task on %vth file %v complete\n", args.FileIndex, args.FileName)
260 | this.mapTaskRunning.RemoveTask(args.FileIndex, args.PartIndex)
261 | if this.mapTaskRunning.Size() == 0 && this.mapTaskWaiting.Size() == 0 {
262 | // all map tasks done
263 | // can distribute reduce tasks
264 | this.distributeReduce()
265 | }
266 | break
267 | case TaskReduce:
268 | fmt.Printf("Reduce task on %vth part complete\n", args.PartIndex)
269 | this.reduceTaskRunning.RemoveTask(args.FileIndex, args.PartIndex)
270 | break
271 | default:
272 | panic("Task Done error")
273 | }
274 | return nil
275 | }
276 |
277 | //
278 | // an example RPC handler.
279 | //
280 | // the RPC argument and reply types are defined in rpc.go.
281 | //
282 | func (m *Master) Example(args *ExampleArgs, reply *ExampleReply) error {
283 | reply.Y = args.X + 1
284 | return nil
285 | }
286 |
287 | //
288 | // start a thread that listens for RPCs from worker.go
289 | //
290 | func (m *Master) server() {
291 | rpc.Register(m)
292 | rpc.HandleHTTP()
293 | //l, e := net.Listen("tcp", ":1234")
294 | sockname := masterSock()
295 | os.Remove(sockname)
296 | l, e := net.Listen("unix", sockname)
297 | if e != nil {
298 | log.Fatal("listen error:", e)
299 | }
300 | go http.Serve(l, nil)
301 | }
302 |
303 | //
304 | // main/mrmaster.go calls Done() periodically to find out
305 | // if the entire job has finished.
306 | //
307 | func (this *Master) Done() bool {
308 | // Your code here.
309 |
310 | return this.isDone
311 | }
312 |
313 | //
314 | // create a Master.
315 | // main/mrmaster.go calls this function.
316 | // nReduce is the number of reduce tasks to use.
317 | //
318 | func MakeMaster(files []string, nReduce int) *Master {
319 | // distribute map tasks
320 | mapArray := make([]TaskStatInterface, 0)
321 | for fileIndex, filename := range files {
322 | mapTask := MapTaskStat{
323 | TaskStat{
324 | fileName: filename,
325 | fileIndex: fileIndex,
326 | partIndex: 0,
327 | nReduce: nReduce,
328 | nFiles: len(files),
329 | },
330 | }
331 | mapArray = append(mapArray, &mapTask)
332 | }
333 | m := Master{
334 | mapTaskWaiting: TaskStatQueue{taskArray: mapArray},
335 | nReduce: nReduce,
336 | filenames: files,
337 | }
338 |
339 | // create tmp directory if not exists
340 | if _, err := os.Stat("mr-tmp"); os.IsNotExist(err) {
341 | err = os.Mkdir("mr-tmp", os.ModePerm)
342 | if err != nil {
343 | fmt.Print("Create tmp directory failed... Error: %v\n", err)
344 | panic("Create tmp directory failed...")
345 | }
346 | }
347 |
348 | // begin a thread to collect tasks out of time
349 | go m.collectOutOfTime()
350 |
351 | m.server()
352 | return &m
353 | }
354 |
355 | func (this *Master) collectOutOfTime() {
356 | for {
357 | time.Sleep(time.Duration(time.Second * 5))
358 | timeouts := this.reduceTaskRunning.TimeOutQueue()
359 | if len(timeouts) > 0 {
360 | this.reduceTaskWaiting.MoveAppend(timeouts)
361 | }
362 | timeouts = this.mapTaskRunning.TimeOutQueue()
363 | if len(timeouts) > 0 {
364 | this.mapTaskWaiting.MoveAppend(timeouts)
365 | }
366 | }
367 | }
368 |
--------------------------------------------------------------------------------
/src/mr/rpc.go:
--------------------------------------------------------------------------------
1 | package mr
2 |
3 | //
4 | // RPC definitions.
5 | //
6 | // remember to capitalize all names.
7 | //
8 |
9 | import "os"
10 | import "strconv"
11 |
12 | //
13 | // example to show how to declare the arguments
14 | // and reply for an RPC.
15 | //
16 |
17 | type ExampleArgs struct {
18 | X int
19 | }
20 |
21 | type ExampleReply struct {
22 | Y int
23 | }
24 |
25 | // Add your RPC definitions here.
26 |
27 | type TryMapArgs struct {
28 | }
29 |
30 | type TryMapReply struct {
31 | // if should not run map, run reduce
32 | RunMap bool
33 | }
34 |
35 | const (
36 | TaskMap = 0
37 | TaskReduce = 1
38 | TaskWait = 2
39 | TaskEnd = 3
40 | )
41 |
42 | type TaskInfo struct {
43 | /*
44 | Declared in consts above
45 | 0 map
46 | 1 reduce
47 | 2 wait
48 | 3 end
49 | */
50 | State int
51 |
52 | FileName string
53 | FileIndex int
54 | PartIndex int
55 |
56 | NReduce int
57 | NFiles int
58 | }
59 |
60 | // Cook up a unique-ish UNIX-domain socket name
61 | // in /var/tmp, for the master.
62 | // Can't use the current directory since
63 | // Athena AFS doesn't support UNIX-domain sockets.
64 | func masterSock() string {
65 | s := "/var/tmp/824-mr-"
66 | s += strconv.Itoa(os.Getuid())
67 | return s
68 | }
69 |
--------------------------------------------------------------------------------
/src/mr/worker.go:
--------------------------------------------------------------------------------
1 | package mr
2 |
3 | import (
4 | "encoding/json"
5 | "fmt"
6 | "io/ioutil"
7 | "os"
8 | "path/filepath"
9 | "sort"
10 | "strconv"
11 | "time"
12 | )
13 | import "log"
14 | import "net/rpc"
15 | import "hash/fnv"
16 |
17 | //
18 | // Map functions return a slice of KeyValue.
19 | //
20 | type KeyValue struct {
21 | Key string
22 | Value string
23 | }
24 | type ByKey []KeyValue
25 |
26 | func (a ByKey) Len() int { return len(a) }
27 | func (a ByKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
28 | func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key }
29 |
30 | //
31 | // use ihash(key) % NReduce to choose the reduce
32 | // task number for each KeyValue emitted by Map.
33 | //
34 | func ihash(key string) int {
35 | h := fnv.New32a()
36 | h.Write([]byte(key))
37 | return int(h.Sum32() & 0x7fffffff)
38 | }
39 |
40 | //
41 | // main/mrworker.go calls this function.
42 | //
43 | /*
44 | This function implements worker behaviour.
45 | The worker asks the master which work to perform, map or reduce.
46 | If the map task is claimed by another worker and has not finished as yet, the worker waits for a little while.
47 | If the map task is finished by another worker, the master provides necessary information on a reduce task for the worker.
48 | If all reduce tasks are done, enter exit procedure.
49 | */
50 | func Worker(mapf func(string, string) []KeyValue,
51 | reducef func(string, []string) string) {
52 |
53 | // Your worker implementation here.
54 |
55 | // uncomment to send the Example RPC to the master.
56 | //CallExample()
57 | for {
58 | taskInfo := CallAskTask()
59 | switch taskInfo.State {
60 | case TaskMap:
61 | workerMap(mapf, taskInfo)
62 | break
63 | case TaskReduce:
64 | workerReduce(reducef, taskInfo)
65 | break
66 | case TaskWait:
67 | // wait for 5 seconds to requeset again
68 | time.Sleep(time.Duration(time.Second * 5))
69 | break
70 | case TaskEnd:
71 | fmt.Println("Master all tasks complete. Nothing to do...")
72 | // exit worker process
73 | return
74 | default:
75 | panic("Invalid Task state received by worker")
76 | }
77 | }
78 |
79 | }
80 |
81 | func CallAskTask() *TaskInfo {
82 | args := ExampleArgs{}
83 | reply := TaskInfo{}
84 | call("Master.AskTask", &args, &reply)
85 | return &reply
86 | }
87 |
88 | func CallTaskDone(taskInfo *TaskInfo) {
89 | reply := ExampleReply{}
90 | call("Master.TaskDone", taskInfo, &reply)
91 | }
92 |
93 | //
94 | // example function to show how to make an RPC call to the master.
95 | //
96 | // the RPC argument and reply types are defined in rpc.go.
97 | //
98 | func CallExample() {
99 |
100 | // declare an argument structure.
101 | args := ExampleArgs{}
102 |
103 | // fill in the argument(s).
104 | args.X = 99
105 |
106 | // declare a reply structure.
107 | reply := ExampleReply{}
108 |
109 | // send the RPC request, wait for the reply.
110 | call("Master.Example", &args, &reply)
111 |
112 | // reply.Y should be 100.
113 | fmt.Printf("reply.Y %v\n", reply.Y)
114 | }
115 |
116 | //
117 | // send an RPC request to the master, wait for the response.
118 | // usually returns true.
119 | // returns false if something goes wrong.
120 | //
121 | func call(rpcname string, args interface{}, reply interface{}) bool {
122 | // c, err := rpc.DialHTTP("tcp", "127.0.0.1"+":1234")
123 | sockname := masterSock()
124 | c, err := rpc.DialHTTP("unix", sockname)
125 | if err != nil {
126 | log.Fatal("dialing:", err)
127 | }
128 | defer c.Close()
129 |
130 | err = c.Call(rpcname, args, reply)
131 | if err == nil {
132 | return true
133 | }
134 |
135 | fmt.Println(err)
136 | return false
137 | }
138 |
139 | func workerMap(mapf func(string, string) []KeyValue, taskInfo *TaskInfo) {
140 | fmt.Printf("Got assigned map task on %vth file %v\n", taskInfo.FileIndex, taskInfo.FileName)
141 |
142 | // read in target files as a key-value array
143 | intermediate := []KeyValue{}
144 | file, err := os.Open(taskInfo.FileName)
145 | if err != nil {
146 | log.Fatalf("cannot open %v", taskInfo.FileName)
147 | }
148 | content, err := ioutil.ReadAll(file)
149 | if err != nil {
150 | log.Fatalf("cannot read %v", taskInfo.FileName)
151 | }
152 | file.Close()
153 | kva := mapf(taskInfo.FileName, string(content))
154 | intermediate = append(intermediate, kva...)
155 |
156 | // prepare output files and encoders
157 | nReduce := taskInfo.NReduce
158 | outprefix := "mr-tmp/mr-"
159 | outprefix += strconv.Itoa(taskInfo.FileIndex)
160 | outprefix += "-"
161 | outFiles := make([]*os.File, nReduce)
162 | fileEncs := make([]*json.Encoder, nReduce)
163 | for outindex := 0; outindex < nReduce; outindex++ {
164 | //outname := outprefix + strconv.Itoa(outindex)
165 | //outFiles[outindex], _ = os.Create(outname)
166 | outFiles[outindex], _ = ioutil.TempFile("mr-tmp", "mr-tmp-*")
167 | fileEncs[outindex] = json.NewEncoder(outFiles[outindex])
168 | }
169 |
170 | // distribute keys among mr-fileindex-*
171 | for _, kv := range intermediate {
172 | outindex := ihash(kv.Key) % nReduce
173 | file = outFiles[outindex]
174 | enc := fileEncs[outindex]
175 | err := enc.Encode(&kv)
176 | if err != nil {
177 | fmt.Printf("File %v Key %v Value %v Error: %v\n", taskInfo.FileName, kv.Key, kv.Value, err)
178 | panic("Json encode failed")
179 | }
180 | }
181 |
182 | // save as files
183 | for outindex, file := range outFiles {
184 | outname := outprefix + strconv.Itoa(outindex)
185 | oldpath := filepath.Join(file.Name())
186 | //fmt.Printf("temp file oldpath %v\n", oldpath)
187 | os.Rename(oldpath, outname)
188 | file.Close()
189 | }
190 | // acknowledge master
191 | CallTaskDone(taskInfo)
192 | }
193 |
194 | func workerReduce(reducef func(string, []string) string, taskInfo *TaskInfo) {
195 | fmt.Printf("Got assigned reduce task on part %v\n", taskInfo.PartIndex)
196 | outname := "mr-out-" + strconv.Itoa(taskInfo.PartIndex)
197 | //fmt.Printf("%v\n", taskInfo)
198 |
199 | // read from output files from map tasks
200 |
201 | innameprefix := "mr-tmp/mr-"
202 | innamesuffix := "-" + strconv.Itoa(taskInfo.PartIndex)
203 |
204 | // read in all files as a kv array
205 | intermediate := []KeyValue{}
206 | for index := 0; index < taskInfo.NFiles; index++ {
207 | inname := innameprefix + strconv.Itoa(index) + innamesuffix
208 | file, err := os.Open(inname)
209 | if err != nil {
210 | fmt.Printf("Open intermediate file %v failed: %v\n", inname, err)
211 | panic("Open file error")
212 | }
213 | dec := json.NewDecoder(file)
214 | for {
215 | var kv KeyValue
216 | if err := dec.Decode(&kv); err != nil {
217 | //fmt.Printf("%v\n", err)
218 | break
219 | }
220 | //fmt.Printf("%v\n", kv)
221 | intermediate = append(intermediate, kv)
222 | }
223 | file.Close()
224 | }
225 |
226 | sort.Sort(ByKey(intermediate))
227 |
228 | //ofile, err := os.Create(outname)
229 | ofile, err := ioutil.TempFile("mr-tmp", "mr-*")
230 | if err != nil {
231 | fmt.Printf("Create output file %v failed: %v\n", outname, err)
232 | panic("Create file error")
233 | }
234 | //fmt.Printf("%v\n", intermediate)
235 | i := 0
236 | for i < len(intermediate) {
237 | j := i + 1
238 | for j < len(intermediate) && intermediate[j].Key == intermediate[i].Key {
239 | j++
240 | }
241 | values := []string{}
242 | for k := i; k < j; k++ {
243 | values = append(values, intermediate[k].Value)
244 | }
245 | output := reducef(intermediate[i].Key, values)
246 |
247 | // this is the correct format for each line of Reduce output.
248 | fmt.Fprintf(ofile, "%v %v\n", intermediate[i].Key, output)
249 |
250 | i = j
251 | }
252 | os.Rename(filepath.Join(ofile.Name()), outname)
253 | ofile.Close()
254 | // acknowledge master
255 | CallTaskDone(taskInfo)
256 | }
257 |
--------------------------------------------------------------------------------
/src/mrapps/crash.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | //
4 | // a MapReduce pseudo-application that sometimes crashes,
5 | // and sometimes takes a long time,
6 | // to test MapReduce's ability to recover.
7 | //
8 | // go build -buildmode=plugin crash.go
9 | //
10 |
11 | import "mr"
12 | import crand "crypto/rand"
13 | import "math/big"
14 | import "strings"
15 | import "os"
16 | import "sort"
17 | import "strconv"
18 | import "time"
19 |
20 | func maybeCrash() {
21 | max := big.NewInt(1000)
22 | rr, _ := crand.Int(crand.Reader, max)
23 | if rr.Int64() < 330 {
24 | // crash!
25 | os.Exit(1)
26 | } else if rr.Int64() < 660 {
27 | // delay for a while.
28 | maxms := big.NewInt(10 * 1000)
29 | ms, _ := crand.Int(crand.Reader, maxms)
30 | time.Sleep(time.Duration(ms.Int64()) * time.Millisecond)
31 | }
32 | }
33 |
34 | func Map(filename string, contents string) []mr.KeyValue {
35 | maybeCrash()
36 |
37 | kva := []mr.KeyValue{}
38 | kva = append(kva, mr.KeyValue{"a", filename})
39 | kva = append(kva, mr.KeyValue{"b", strconv.Itoa(len(filename))})
40 | kva = append(kva, mr.KeyValue{"c", strconv.Itoa(len(contents))})
41 | kva = append(kva, mr.KeyValue{"d", "xyzzy"})
42 | return kva
43 | }
44 |
45 | func Reduce(key string, values []string) string {
46 | maybeCrash()
47 |
48 | // sort values to ensure deterministic output.
49 | vv := make([]string, len(values))
50 | copy(vv, values)
51 | sort.Strings(vv)
52 |
53 | val := strings.Join(vv, " ")
54 | return val
55 | }
56 |
--------------------------------------------------------------------------------
/src/mrapps/indexer.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | //
4 | // an indexing application "plugin" for MapReduce.
5 | //
6 | // go build -buildmode=plugin indexer.go
7 | //
8 |
9 | import "fmt"
10 | import "mr"
11 |
12 | import "strings"
13 | import "unicode"
14 | import "sort"
15 |
16 | // The mapping function is called once for each piece of the input.
17 | // In this framework, the key is the name of the file that is being processed,
18 | // and the value is the file's contents. The return value should be a slice of
19 | // key/value pairs, each represented by a mr.KeyValue.
20 | func Map(document string, value string) (res []mr.KeyValue) {
21 | m := make(map[string]bool)
22 | words := strings.FieldsFunc(value, func(x rune) bool { return !unicode.IsLetter(x) })
23 | for _, w := range words {
24 | m[w] = true
25 | }
26 | for w := range m {
27 | kv := mr.KeyValue{w, document}
28 | res = append(res, kv)
29 | }
30 | return
31 | }
32 |
33 | // The reduce function is called once for each key generated by Map, with a
34 | // list of that key's string value (merged across all inputs). The return value
35 | // should be a single output value for that key.
36 | func Reduce(key string, values []string) string {
37 | sort.Strings(values)
38 | return fmt.Sprintf("%d %s", len(values), strings.Join(values, ","))
39 | }
40 |
--------------------------------------------------------------------------------
/src/mrapps/mtiming.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | //
4 | // a MapReduce pseudo-application to test that workers
5 | // execute map tasks in parallel.
6 | //
7 | // go build -buildmode=plugin mtiming.go
8 | //
9 |
10 | import "mr"
11 | import "strings"
12 | import "fmt"
13 | import "os"
14 | import "syscall"
15 | import "time"
16 | import "sort"
17 | import "io/ioutil"
18 |
19 | func nparallel(phase string) int {
20 | // create a file so that other workers will see that
21 | // we're running at the same time as them.
22 | pid := os.Getpid()
23 | myfilename := fmt.Sprintf("mr-worker-%s-%d", phase, pid)
24 | err := ioutil.WriteFile(myfilename, []byte("x"), 0666)
25 | if err != nil {
26 | panic(err)
27 | }
28 |
29 | // are any other workers running?
30 | // find their PIDs by scanning directory for mr-worker-XXX files.
31 | dd, err := os.Open(".")
32 | if err != nil {
33 | panic(err)
34 | }
35 | names, err := dd.Readdirnames(1000000)
36 | if err != nil {
37 | panic(err)
38 | }
39 | ret := 0
40 | for _, name := range names {
41 | var xpid int
42 | pat := fmt.Sprintf("mr-worker-%s-%%d", phase)
43 | n, err := fmt.Sscanf(name, pat, &xpid)
44 | if n == 1 && err == nil {
45 | err := syscall.Kill(xpid, 0)
46 | if err == nil {
47 | // if err == nil, xpid is alive.
48 | ret += 1
49 | }
50 | }
51 | }
52 | dd.Close()
53 |
54 | time.Sleep(1 * time.Second)
55 |
56 | err = os.Remove(myfilename)
57 | if err != nil {
58 | panic(err)
59 | }
60 |
61 | return ret
62 | }
63 |
64 | func Map(filename string, contents string) []mr.KeyValue {
65 | t0 := time.Now()
66 | ts := float64(t0.Unix()) + (float64(t0.Nanosecond()) / 1000000000.0)
67 | pid := os.Getpid()
68 |
69 | n := nparallel("map")
70 |
71 | kva := []mr.KeyValue{}
72 | kva = append(kva, mr.KeyValue{
73 | fmt.Sprintf("times-%v", pid),
74 | fmt.Sprintf("%.1f", ts)})
75 | kva = append(kva, mr.KeyValue{
76 | fmt.Sprintf("parallel-%v", pid),
77 | fmt.Sprintf("%d", n)})
78 | return kva
79 | }
80 |
81 | func Reduce(key string, values []string) string {
82 | //n := nparallel("reduce")
83 |
84 | // sort values to ensure deterministic output.
85 | vv := make([]string, len(values))
86 | copy(vv, values)
87 | sort.Strings(vv)
88 |
89 | val := strings.Join(vv, " ")
90 | return val
91 | }
92 |
--------------------------------------------------------------------------------
/src/mrapps/nocrash.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | //
4 | // same as crash.go but doesn't actually crash.
5 | //
6 | // go build -buildmode=plugin nocrash.go
7 | //
8 |
9 | import "mr"
10 | import crand "crypto/rand"
11 | import "math/big"
12 | import "strings"
13 | import "os"
14 | import "sort"
15 | import "strconv"
16 |
17 | func maybeCrash() {
18 | max := big.NewInt(1000)
19 | rr, _ := crand.Int(crand.Reader, max)
20 | if false && rr.Int64() < 500 {
21 | // crash!
22 | os.Exit(1)
23 | }
24 | }
25 |
26 | func Map(filename string, contents string) []mr.KeyValue {
27 | maybeCrash()
28 |
29 | kva := []mr.KeyValue{}
30 | kva = append(kva, mr.KeyValue{"a", filename})
31 | kva = append(kva, mr.KeyValue{"b", strconv.Itoa(len(filename))})
32 | kva = append(kva, mr.KeyValue{"c", strconv.Itoa(len(contents))})
33 | kva = append(kva, mr.KeyValue{"d", "xyzzy"})
34 | return kva
35 | }
36 |
37 | func Reduce(key string, values []string) string {
38 | maybeCrash()
39 |
40 | // sort values to ensure deterministic output.
41 | vv := make([]string, len(values))
42 | copy(vv, values)
43 | sort.Strings(vv)
44 |
45 | val := strings.Join(vv, " ")
46 | return val
47 | }
48 |
--------------------------------------------------------------------------------
/src/mrapps/rtiming.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | //
4 | // a MapReduce pseudo-application to test that workers
5 | // execute reduce tasks in parallel.
6 | //
7 | // go build -buildmode=plugin rtiming.go
8 | //
9 |
10 | import "mr"
11 | import "fmt"
12 | import "os"
13 | import "syscall"
14 | import "time"
15 | import "io/ioutil"
16 |
17 | func nparallel(phase string) int {
18 | // create a file so that other workers will see that
19 | // we're running at the same time as them.
20 | pid := os.Getpid()
21 | myfilename := fmt.Sprintf("mr-worker-%s-%d", phase, pid)
22 | err := ioutil.WriteFile(myfilename, []byte("x"), 0666)
23 | if err != nil {
24 | panic(err)
25 | }
26 |
27 | // are any other workers running?
28 | // find their PIDs by scanning directory for mr-worker-XXX files.
29 | dd, err := os.Open(".")
30 | if err != nil {
31 | panic(err)
32 | }
33 | names, err := dd.Readdirnames(1000000)
34 | if err != nil {
35 | panic(err)
36 | }
37 | ret := 0
38 | for _, name := range names {
39 | var xpid int
40 | pat := fmt.Sprintf("mr-worker-%s-%%d", phase)
41 | n, err := fmt.Sscanf(name, pat, &xpid)
42 | if n == 1 && err == nil {
43 | err := syscall.Kill(xpid, 0)
44 | if err == nil {
45 | // if err == nil, xpid is alive.
46 | ret += 1
47 | }
48 | }
49 | }
50 | dd.Close()
51 |
52 | time.Sleep(1 * time.Second)
53 |
54 | err = os.Remove(myfilename)
55 | if err != nil {
56 | panic(err)
57 | }
58 |
59 | return ret
60 | }
61 |
62 | func Map(filename string, contents string) []mr.KeyValue {
63 |
64 | kva := []mr.KeyValue{}
65 | kva = append(kva, mr.KeyValue{"a", "1"})
66 | kva = append(kva, mr.KeyValue{"b", "1"})
67 | kva = append(kva, mr.KeyValue{"c", "1"})
68 | kva = append(kva, mr.KeyValue{"d", "1"})
69 | kva = append(kva, mr.KeyValue{"e", "1"})
70 | kva = append(kva, mr.KeyValue{"f", "1"})
71 | kva = append(kva, mr.KeyValue{"g", "1"})
72 | kva = append(kva, mr.KeyValue{"h", "1"})
73 | kva = append(kva, mr.KeyValue{"i", "1"})
74 | kva = append(kva, mr.KeyValue{"j", "1"})
75 | return kva
76 | }
77 |
78 | func Reduce(key string, values []string) string {
79 | n := nparallel("reduce")
80 |
81 | val := fmt.Sprintf("%d", n)
82 |
83 | return val
84 | }
85 |
--------------------------------------------------------------------------------
/src/mrapps/wc.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | //
4 | // a word-count application "plugin" for MapReduce.
5 | //
6 | // go build -buildmode=plugin wc.go
7 | //
8 |
9 | import "mr"
10 | import "unicode"
11 | import "strings"
12 | import "strconv"
13 |
14 | //
15 | // The map function is called once for each file of input. The first
16 | // argument is the name of the input file, and the second is the
17 | // file's complete contents. You should ignore the input file name,
18 | // and look only at the contents argument. The return value is a slice
19 | // of key/value pairs.
20 | //
21 | func Map(filename string, contents string) []mr.KeyValue {
22 | // function to detect word separators.
23 | ff := func(r rune) bool { return !unicode.IsLetter(r) }
24 |
25 | // split contents into an array of words.
26 | words := strings.FieldsFunc(contents, ff)
27 |
28 | kva := []mr.KeyValue{}
29 | for _, w := range words {
30 | kv := mr.KeyValue{w, "1"}
31 | kva = append(kva, kv)
32 | }
33 | return kva
34 | }
35 |
36 | //
37 | // The reduce function is called once for each key generated by the
38 | // map tasks, with a list of all the values created for that key by
39 | // any map task.
40 | //
41 | func Reduce(key string, values []string) string {
42 | // return the number of occurrences of this word.
43 | return strconv.Itoa(len(values))
44 | }
45 |
--------------------------------------------------------------------------------
/src/porcupine/bitset.go:
--------------------------------------------------------------------------------
1 | package porcupine
2 |
3 | import "math/bits"
4 |
5 | type bitset []uint64
6 |
7 | // data layout:
8 | // bits 0-63 are in data[0], the next are in data[1], etc.
9 |
10 | func newBitset(bits uint) bitset {
11 | extra := uint(0)
12 | if bits%64 != 0 {
13 | extra = 1
14 | }
15 | chunks := bits/64 + extra
16 | return bitset(make([]uint64, chunks))
17 | }
18 |
19 | func (b bitset) clone() bitset {
20 | dataCopy := make([]uint64, len(b))
21 | copy(dataCopy, b)
22 | return bitset(dataCopy)
23 | }
24 |
25 | func bitsetIndex(pos uint) (uint, uint) {
26 | return pos / 64, pos % 64
27 | }
28 |
29 | func (b bitset) set(pos uint) bitset {
30 | major, minor := bitsetIndex(pos)
31 | b[major] |= (1 << minor)
32 | return b
33 | }
34 |
35 | func (b bitset) clear(pos uint) bitset {
36 | major, minor := bitsetIndex(pos)
37 | b[major] &^= (1 << minor)
38 | return b
39 | }
40 |
41 | func (b bitset) get(pos uint) bool {
42 | major, minor := bitsetIndex(pos)
43 | return b[major]&(1<= 0; i-- {
120 | elem := entries[i]
121 | if elem.kind == returnEntry {
122 | entry := &node{value: elem.value, match: nil, id: elem.id}
123 | match[elem.id] = entry
124 | insertBefore(entry, root)
125 | root = entry
126 | } else {
127 | entry := &node{value: elem.value, match: match[elem.id], id: elem.id}
128 | insertBefore(entry, root)
129 | root = entry
130 | }
131 | }
132 | return root
133 | }
134 |
135 | type cacheEntry struct {
136 | linearized bitset
137 | state interface{}
138 | }
139 |
140 | func cacheContains(model Model, cache map[uint64][]cacheEntry, entry cacheEntry) bool {
141 | for _, elem := range cache[entry.linearized.hash()] {
142 | if entry.linearized.equals(elem.linearized) && model.Equal(entry.state, elem.state) {
143 | return true
144 | }
145 | }
146 | return false
147 | }
148 |
149 | type callsEntry struct {
150 | entry *node
151 | state interface{}
152 | }
153 |
154 | func lift(entry *node) {
155 | entry.prev.next = entry.next
156 | entry.next.prev = entry.prev
157 | match := entry.match
158 | match.prev.next = match.next
159 | if match.next != nil {
160 | match.next.prev = match.prev
161 | }
162 | }
163 |
164 | func unlift(entry *node) {
165 | match := entry.match
166 | match.prev.next = match
167 | if match.next != nil {
168 | match.next.prev = match
169 | }
170 | entry.prev.next = entry
171 | entry.next.prev = entry
172 | }
173 |
174 | func checkSingle(model Model, history []entry, computePartial bool, kill *int32) (bool, []*[]int) {
175 | entry := makeLinkedEntries(history)
176 | n := length(entry) / 2
177 | linearized := newBitset(uint(n))
178 | cache := make(map[uint64][]cacheEntry) // map from hash to cache entry
179 | var calls []callsEntry
180 | // longest linearizable prefix that includes the given entry
181 | longest := make([]*[]int, n)
182 |
183 | state := model.Init()
184 | headEntry := insertBefore(&node{value: nil, match: nil, id: -1}, entry)
185 | for headEntry.next != nil {
186 | if atomic.LoadInt32(kill) != 0 {
187 | return false, longest
188 | }
189 | if entry.match != nil {
190 | matching := entry.match // the return entry
191 | ok, newState := model.Step(state, entry.value, matching.value)
192 | if ok {
193 | newLinearized := linearized.clone().set(uint(entry.id))
194 | newCacheEntry := cacheEntry{newLinearized, newState}
195 | if !cacheContains(model, cache, newCacheEntry) {
196 | hash := newLinearized.hash()
197 | cache[hash] = append(cache[hash], newCacheEntry)
198 | calls = append(calls, callsEntry{entry, state})
199 | state = newState
200 | linearized.set(uint(entry.id))
201 | lift(entry)
202 | entry = headEntry.next
203 | } else {
204 | entry = entry.next
205 | }
206 | } else {
207 | entry = entry.next
208 | }
209 | } else {
210 | if len(calls) == 0 {
211 | return false, longest
212 | }
213 | // longest
214 | if computePartial {
215 | callsLen := len(calls)
216 | var seq []int = nil
217 | for _, v := range calls {
218 | if longest[v.entry.id] == nil || callsLen > len(*longest[v.entry.id]) {
219 | // create seq lazily
220 | if seq == nil {
221 | seq = make([]int, len(calls))
222 | for i, v := range calls {
223 | seq[i] = v.entry.id
224 | }
225 | }
226 | longest[v.entry.id] = &seq
227 | }
228 | }
229 | }
230 | callsTop := calls[len(calls)-1]
231 | entry = callsTop.entry
232 | state = callsTop.state
233 | linearized.clear(uint(entry.id))
234 | calls = calls[:len(calls)-1]
235 | unlift(entry)
236 | entry = entry.next
237 | }
238 | }
239 | // longest linearization is the complete linearization, which is calls
240 | seq := make([]int, len(calls))
241 | for i, v := range calls {
242 | seq[i] = v.entry.id
243 | }
244 | for i := 0; i < n; i++ {
245 | longest[i] = &seq
246 | }
247 | return true, longest
248 | }
249 |
250 | func fillDefault(model Model) Model {
251 | if model.Partition == nil {
252 | model.Partition = NoPartition
253 | }
254 | if model.PartitionEvent == nil {
255 | model.PartitionEvent = NoPartitionEvent
256 | }
257 | if model.Equal == nil {
258 | model.Equal = ShallowEqual
259 | }
260 | if model.DescribeOperation == nil {
261 | model.DescribeOperation = DefaultDescribeOperation
262 | }
263 | if model.DescribeState == nil {
264 | model.DescribeState = DefaultDescribeState
265 | }
266 | return model
267 | }
268 |
269 | func checkParallel(model Model, history [][]entry, computeInfo bool, timeout time.Duration) (CheckResult, linearizationInfo) {
270 | ok := true
271 | timedOut := false
272 | results := make(chan bool, len(history))
273 | longest := make([][]*[]int, len(history))
274 | kill := int32(0)
275 | for i, subhistory := range history {
276 | go func(i int, subhistory []entry) {
277 | ok, l := checkSingle(model, subhistory, computeInfo, &kill)
278 | longest[i] = l
279 | results <- ok
280 | }(i, subhistory)
281 | }
282 | var timeoutChan <-chan time.Time
283 | if timeout > 0 {
284 | timeoutChan = time.After(timeout)
285 | }
286 | count := 0
287 | loop:
288 | for {
289 | select {
290 | case result := <-results:
291 | count++
292 | ok = ok && result
293 | if !ok && !computeInfo {
294 | atomic.StoreInt32(&kill, 1)
295 | break loop
296 | }
297 | if count >= len(history) {
298 | break loop
299 | }
300 | case <-timeoutChan:
301 | timedOut = true
302 | atomic.StoreInt32(&kill, 1)
303 | break loop // if we time out, we might get a false positive
304 | }
305 | }
306 | var info linearizationInfo
307 | if computeInfo {
308 | // make sure we've waited for all goroutines to finish,
309 | // otherwise we might race on access to longest[]
310 | for count < len(history) {
311 | <-results
312 | count++
313 | }
314 | // return longest linearizable prefixes that include each history element
315 | partialLinearizations := make([][][]int, len(history))
316 | for i := 0; i < len(history); i++ {
317 | var partials [][]int
318 | // turn longest into a set of unique linearizations
319 | set := make(map[*[]int]struct{})
320 | for _, v := range longest[i] {
321 | if v != nil {
322 | set[v] = struct{}{}
323 | }
324 | }
325 | for k, _ := range set {
326 | arr := make([]int, len(*k))
327 | for i, v := range *k {
328 | arr[i] = v
329 | }
330 | partials = append(partials, arr)
331 | }
332 | partialLinearizations[i] = partials
333 | }
334 | info.history = history
335 | info.partialLinearizations = partialLinearizations
336 | }
337 | var result CheckResult
338 | if !ok {
339 | result = Illegal
340 | } else {
341 | if timedOut {
342 | result = Unknown
343 | } else {
344 | result = Ok
345 | }
346 | }
347 | return result, info
348 | }
349 |
350 | func checkEvents(model Model, history []Event, verbose bool, timeout time.Duration) (CheckResult, linearizationInfo) {
351 | model = fillDefault(model)
352 | partitions := model.PartitionEvent(history)
353 | l := make([][]entry, len(partitions))
354 | for i, subhistory := range partitions {
355 | l[i] = convertEntries(renumber(subhistory))
356 | }
357 | return checkParallel(model, l, verbose, timeout)
358 | }
359 |
360 | func checkOperations(model Model, history []Operation, verbose bool, timeout time.Duration) (CheckResult, linearizationInfo) {
361 | model = fillDefault(model)
362 | partitions := model.Partition(history)
363 | l := make([][]entry, len(partitions))
364 | for i, subhistory := range partitions {
365 | l[i] = makeEntries(subhistory)
366 | }
367 | return checkParallel(model, l, verbose, timeout)
368 | }
369 |
--------------------------------------------------------------------------------
/src/porcupine/model.go:
--------------------------------------------------------------------------------
1 | package porcupine
2 |
3 | import "fmt"
4 |
5 | type Operation struct {
6 | ClientId int // optional, unless you want a visualization; zero-indexed
7 | Input interface{}
8 | Call int64 // invocation time
9 | Output interface{}
10 | Return int64 // response time
11 | }
12 |
13 | type EventKind bool
14 |
15 | const (
16 | CallEvent EventKind = false
17 | ReturnEvent EventKind = true
18 | )
19 |
20 | type Event struct {
21 | ClientId int // optional, unless you want a visualization; zero-indexed
22 | Kind EventKind
23 | Value interface{}
24 | Id int
25 | }
26 |
27 | type Model struct {
28 | // Partition functions, such that a history is linearizable if an only
29 | // if each partition is linearizable. If you don't want to implement
30 | // this, you can always use the `NoPartition` functions implemented
31 | // below.
32 | Partition func(history []Operation) [][]Operation
33 | PartitionEvent func(history []Event) [][]Event
34 | // Initial state of the system.
35 | Init func() interface{}
36 | // Step function for the system. Returns whether or not the system
37 | // could take this step with the given inputs and outputs and also
38 | // returns the new state. This should not mutate the existing state.
39 | Step func(state interface{}, input interface{}, output interface{}) (bool, interface{})
40 | // Equality on states. If you are using a simple data type for states,
41 | // you can use the `ShallowEqual` function implemented below.
42 | Equal func(state1, state2 interface{}) bool
43 | // For visualization, describe an operation as a string.
44 | // For example, "Get('x') -> 'y'".
45 | DescribeOperation func(input interface{}, output interface{}) string
46 | // For visualization purposes, describe a state as a string.
47 | // For example, "{'x' -> 'y', 'z' -> 'w'}"
48 | DescribeState func(state interface{}) string
49 | }
50 |
51 | func NoPartition(history []Operation) [][]Operation {
52 | return [][]Operation{history}
53 | }
54 |
55 | func NoPartitionEvent(history []Event) [][]Event {
56 | return [][]Event{history}
57 | }
58 |
59 | func ShallowEqual(state1, state2 interface{}) bool {
60 | return state1 == state2
61 | }
62 |
63 | func DefaultDescribeOperation(input interface{}, output interface{}) string {
64 | return fmt.Sprintf("%v -> %v", input, output)
65 | }
66 |
67 | func DefaultDescribeState(state interface{}) string {
68 | return fmt.Sprintf("%v", state)
69 | }
70 |
71 | type CheckResult string
72 |
73 | const (
74 | Unknown CheckResult = "Unknown" // timed out
75 | Ok = "Ok"
76 | Illegal = "Illegal"
77 | )
78 |
--------------------------------------------------------------------------------
/src/porcupine/porcupine.go:
--------------------------------------------------------------------------------
1 | package porcupine
2 |
3 | import "time"
4 |
5 | func CheckOperations(model Model, history []Operation) bool {
6 | res, _ := checkOperations(model, history, false, 0)
7 | return res == Ok
8 | }
9 |
10 | // timeout = 0 means no timeout
11 | // if this operation times out, then a false positive is possible
12 | func CheckOperationsTimeout(model Model, history []Operation, timeout time.Duration) CheckResult {
13 | res, _ := checkOperations(model, history, false, timeout)
14 | return res
15 | }
16 |
17 | // timeout = 0 means no timeout
18 | // if this operation times out, then a false positive is possible
19 | func CheckOperationsVerbose(model Model, history []Operation, timeout time.Duration) (CheckResult, linearizationInfo) {
20 | return checkOperations(model, history, true, timeout)
21 | }
22 |
23 | func CheckEvents(model Model, history []Event) bool {
24 | res, _ := checkEvents(model, history, false, 0)
25 | return res == Ok
26 | }
27 |
28 | // timeout = 0 means no timeout
29 | // if this operation times out, then a false positive is possible
30 | func CheckEventsTimeout(model Model, history []Event, timeout time.Duration) CheckResult {
31 | res, _ := checkEvents(model, history, false, timeout)
32 | return res
33 | }
34 |
35 | // timeout = 0 means no timeout
36 | // if this operation times out, then a false positive is possible
37 | func CheckEventsVerbose(model Model, history []Event, timeout time.Duration) (CheckResult, linearizationInfo) {
38 | return checkEvents(model, history, true, timeout)
39 | }
40 |
--------------------------------------------------------------------------------
/src/raft/README.md:
--------------------------------------------------------------------------------
1 | Here is some explanation to this Raft implementation.
2 |
3 | # Procedure Setup
4 |
5 | There are 3 kinds of procedures at hand, state procedure, async procedure, and callback procedure. These are run simultaniously inside a raft server process.
6 |
7 | ## State Procedure
8 |
9 | Each state of raft corresponds to an action. This action is actively run by a thread.
10 |
11 | A leader periodically sends AppendEntries to all other peers.
12 |
13 | A Candidate periodically sends RequestVote to all other peers after a random wait.
14 |
15 | A Follower periodically checks its timer for `Timer Cleared` or `Timer Expired` events.
16 |
17 | This is the main procedure happening inside the raft process.
18 |
19 | ## Async Procedure
20 |
21 | There is a `TaskQueue` for every raft, holding some async task. A thread is devoted to running these tasks when they are present.
22 |
23 | A thread is devoted to execute these tasks by popping from the `TaskQueue` one at a time.
24 |
25 | An async task can be generated by RPCs. When the process receives an RPC, it pushes the corresponding task onto the `TaskQueue`.
26 |
27 | An async task can also be generated by attempts to alter the state of the raft process, as is listed below.
28 |
29 | - change peer state (leader, follower, candidate)
30 | - in terms may change peer current term count `currentTerm`
31 | - in terms may change peer assumed leader `votedFor`
32 |
33 | ## Callback Procedure
34 |
35 | Callback happens when an RPC is responded. It is the result of async RPCs. All callbacks may only change the states appointed to them as `AsyncRpcInfo`. When it must change the other parts of the raft process, it must do this by pushing a task onto `TaskQueue`.
36 |
37 | ## Change in Peer State
38 |
39 | The state of a raft instance may be changed in the process of the 2 threads as described above. It must be organized in an elegant way.
40 |
41 | The key is how all means of executions handle the change in state.
42 |
43 | ### State Procedure
44 |
45 | In the main state procedure, codes are called by **polymorphism**. As the attribute `myState` changes, its execution changes accordingly. We tend to the interface implementations of `RaftState` interface to maintain this procedure when state changes.
46 |
47 | ### Async Procedure
48 |
49 | As all RPCs and peer state changes are allowed under any circumstances for any peers, async procedures are not effected directly by peer state transfer. No async procedure would be discarded when popped from `TaskQueue`.
50 |
51 | Yet, an async procedure may act differently by condition or polymorphism for different peer state it sees.
52 |
53 | ### Callback Procedure
54 |
55 | When an RPC returns, raft process state might have changed so that its original purpose is vain. This is done within the callback procedures.
56 |
57 | ## Peer State Lock
58 |
59 | Async and Callback procedures may change peer state, while State procedure does not. Therefore, in Async and Callback procedures, the peer state is locked by provided mutex attribute `mu`.
60 |
61 | # Go through Running
62 |
63 |
64 |
--------------------------------------------------------------------------------
/src/raft/append_entries_callback.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | type AppendEntriesCall struct {
4 | AsyncRpcCallAttr
5 |
6 | args []AppendEntriesArgs
7 | replies []AppendEntriesReply
8 | }
9 |
10 | func (aec *AppendEntriesCall) makeRpcCall(peerIndex int) bool {
11 | aec.args[peerIndex] = *aec.raft.newAppendEntriesArgs(peerIndex)
12 | return aec.peers[peerIndex].Call("Raft.AppendEntries", &aec.args[peerIndex], &aec.replies[peerIndex])
13 | }
14 |
15 | func (aec *AppendEntriesCall) shouldExit() bool {
16 | if aec.MustExit {
17 | return true
18 | }
19 | return false
20 | }
21 |
22 | func (aec *AppendEntriesCall) callback(peerIndex int) {
23 | if !aec.raft.MyState.IsLeader() {
24 | aec.SetMustExit()
25 | return
26 | }
27 | aec.raft.printInfo("heartbeat received from peer", peerIndex)
28 | reply := &aec.replies[peerIndex]
29 | if aec.raft.tryFollowNewerTerm(peerIndex, reply.Term) {
30 | aec.SetMustExit()
31 | return
32 | }
33 | aec.raft.printInfo("peer", peerIndex, "received", len(aec.args[peerIndex].Entries), "entries")
34 | for reply.Success == false {
35 | aec.raft.peerLogStates.Less(peerIndex)
36 | /*
37 | nextIndex := aec.raft.Log.lastTermIndex(0, reply.ConflictTerm)
38 | if nextIndex == -1 {
39 | aec.raft.peerLogStates.NextIndex[peerIndex] = reply.ConflictIndex
40 | } else {
41 | aec.raft.peerLogStates.NextIndex[peerIndex] = nextIndex
42 | }
43 | */
44 | aec.raft.printInfo("peer", peerIndex, "refused entries sent, must decrement nextIndex to", aec.raft.peerLogStates.NextIndex[peerIndex])
45 | ok := aec.makeRpcCall(peerIndex)
46 | if ok == false {
47 | return
48 | }
49 | if !aec.raft.MyState.IsLeader() {
50 | aec.SetMustExit()
51 | return
52 | }
53 | if aec.raft.tryFollowNewerTerm(peerIndex, reply.Term) {
54 | aec.SetMustExit()
55 | return
56 | }
57 | }
58 | aec.raft.peerLogStates.More(peerIndex, len(aec.args[peerIndex].Entries))
59 | aec.raft.printInfo("peer", peerIndex, "log updated to", aec.raft.peerLogStates.matchIndex[peerIndex])
60 | }
61 |
62 | func (aec *AppendEntriesCall) tryEnd() bool {
63 | if aec.CurrentCount >= aec.TotalCount {
64 | aec.SetMustExit()
65 | aec.raft.TryCommit(aec)
66 | return true
67 | }
68 | return false
69 | }
70 |
71 | func NewAppendEntriesCall(raft *Raft) *AppendEntriesCall {
72 | return &AppendEntriesCall{
73 | AsyncRpcCallAttr: raft.NewAsyncRpcCall(),
74 | args: make([]AppendEntriesArgs, raft.PeerCount()),
75 | replies: make([]AppendEntriesReply, raft.PeerCount()),
76 | }
77 | }
78 |
79 | /*
80 | func (rf *Raft) appendEntriesCallBack(ok bool, peerIndex int, args *AppendEntriesArgs, reply *AppendEntriesReply, info *AsyncRpcCallAttr) {
81 | if ok {
82 | info.SetAliveHost(peerIndex)
83 | if rf.tryFollowNewerTerm(peerIndex, reply.Term) {
84 | info.SetMustExit()
85 | } else {
86 | // decrement and retry
87 | for reply.Success == false {
88 | //rf.logMutex.Lock()
89 | if rf.MyState != LeaderState {
90 | info.SetMustExit()
91 | return
92 | }
93 | rf.NextIndex[peerIndex]--
94 | args = rf.newAppendEntriesArgs(peerIndex)
95 | fmt.Println(rf.PrefixPrint(), "got false heartbeat reply from peer", peerIndex, ", must decrement NextIndex then try again")
96 | //rf.logMutex.Unlock()
97 | // retry
98 | reply = &AppendEntriesReply{}
99 | ok = rf.sendAppendEntries(peerIndex, args, reply)
100 | if !ok {
101 | break
102 | }
103 | if rf.tryFollowNewerTerm(peerIndex, reply.Term) {
104 | info.SetMustExit()
105 | return
106 | }
107 | }
108 | if ok {
109 | //rf.logMutex.Lock()
110 | // update NextIndex, matchIndex
111 |
112 | //rf.logMutex.Unlock()
113 | }
114 | }
115 | } else {
116 | //fmt.Println(rf.PrefixPrint(), "found peer", peerIndex, "unreachable when sending heartbeats")
117 | }
118 | info.IncrementCurrentCount()
119 | if ok {
120 | fmt.Println(rf.PrefixPrint(), "got reply on AppendEntries #current", info.CurrentCount, "#total", info.TotalCount)
121 | } else {
122 | fmt.Println(rf.PrefixPrint(), "got timeout on AppendEntries #current", info.CurrentCount, "#total", info.TotalCount)
123 | }
124 | if info.MustExit == false {
125 | rf.leaderTryCommit(info)
126 | }
127 | }
128 | */
129 |
--------------------------------------------------------------------------------
/src/raft/append_entries_task.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | type AppendEntriesTask struct {
4 | RaftTaskAttr
5 | args *AppendEntriesArgs
6 | reply *AppendEntriesReply
7 | }
8 |
9 | func (aet *AppendEntriesTask) execute() {
10 | aet.executeAppendEntriesRpc(aet.args, aet.reply)
11 | }
12 |
13 | func (aet *AppendEntriesTask) executeAppendEntriesRpc(args *AppendEntriesArgs, reply *AppendEntriesReply) {
14 | reply.Term = aet.raft.currentTerm
15 | reply.Success = true
16 | aet.raft.printInfo("heartbeat received from peer", args.LeaderId)
17 |
18 | // ignore old terms
19 | if aet.raft.currentTerm > args.Term {
20 | reply.Success = false
21 | aet.raft.printInfo("sees an old term AppendEntries from peer", args.LeaderId)
22 | return
23 | }
24 | if aet.raft.tryFollowNewerTerm(args.LeaderId, args.Term) {
25 |
26 | }
27 | aet.raft.TimeParams.heartBeatTimer.SetClear()
28 | if args.PrevLogIndex >= aet.raft.Log.Length() {
29 | reply.Success = false
30 | aet.raft.printInfo("new entries to log index", args.PrevLogIndex, "too large")
31 | reply.ConflictIndex = aet.raft.Log.Length()
32 | reply.ConflictTerm = -1
33 | return
34 | }
35 | if args.PrevLogIndex != -1 {
36 | // check when there should be a previous log entry
37 | if aet.raft.Log.Index(args.PrevLogIndex).Term != args.PrevLogTerm {
38 | reply.Success = false
39 | aet.raft.printInfo("new entries term", args.PrevLogTerm, "not consistent with this peer's previous log entry term", aet.raft.Log.Index(args.PrevLogIndex).Term)
40 | reply.ConflictTerm = aet.raft.Log.Index(args.PrevLogIndex).Term
41 | reply.ConflictIndex = aet.raft.Log.firstTermIndex(args.PrevLogIndex, reply.ConflictTerm)
42 | return
43 | }
44 | }
45 | // here the log can be updated
46 | aet.raft.printInfo("trying to append #entries", len(args.Entries))
47 | aet.raft.Log.UpdateLog(args.Entries, args.PrevLogIndex, args.LeaderCommit)
48 | // extra modifications done under candidate
49 | if aet.raft.MyState.IsCandidate() {
50 | aet.raft.currentTerm = args.Term
51 | aet.raft.ToFollower(args.LeaderId)
52 | }
53 | }
54 |
55 | func NewAppendEntriesTask(raft *Raft, args *AppendEntriesArgs, reply *AppendEntriesReply) *AppendEntriesTask {
56 | return &AppendEntriesTask{
57 | RaftTaskAttr: NewRaftTaskAttr(raft),
58 | args: args,
59 | reply: reply,
60 | }
61 | }
62 |
63 | // A non-leader should receive this
64 | /*
65 | func (rf *Raft) AppendEntries(args *AppendEntriesArgs, reply *AppendEntriesReply) {
66 | RunTask(NewAppendEntriesTask(rf, args, reply), &rf.taskQueue)
67 | return
68 | //fmt.Println(rf.PrefixPrint(), "got heartbeat message from leader peer", args.LeaderId, "at term", args.Term)
69 | // default reply state
70 | reply.Term = rf.currentTerm
71 | reply.Success = true
72 | rf.timerCleared = true
73 |
74 | if rf.tryDiscardOldTerm(args.LeaderId, args.Term) {
75 | reply.Success = false
76 | return
77 | }
78 | if rf.tryFollowNewerTerm(args.LeaderId, args.Term) {
79 | reply.Success = true
80 | }
81 | // check Log info
82 | if args.PrevLogIndex >= len(rf.logs) {
83 | reply.Success = false
84 | fmt.Println(rf.PrefixPrint(), "got new Log index", args.PrevLogIndex+1, "too large for this peer's Log length", len(rf.logs))
85 | return
86 | } else {
87 | if args.PrevLogIndex != -1 {
88 | if rf.logs[args.PrevLogIndex].Term != args.PrevLogTerm {
89 | reply.Success = false
90 | fmt.Println(rf.PrefixPrint(), "Log entry on PrevLogIndex term inconsistent")
91 | return
92 | }
93 | }
94 | }
95 | for argsEntryIndex := 0; argsEntryIndex < len(args.Entries); {
96 | newEntryIndex := argsEntryIndex + args.PrevLogIndex + 1
97 | if newEntryIndex < len(rf.logs) {
98 | oldEntry := &rf.logs[newEntryIndex]
99 | newEntry := &args.Entries[argsEntryIndex]
100 | // existing Log
101 | // check for conflict
102 | if oldEntry.Equals(newEntry) {
103 | // consistent!
104 | argsEntryIndex++
105 | } else {
106 | // inconsistent!
107 | // delete everything after current index
108 | rf.logs = rf.logs[:newEntryIndex]
109 | fmt.Println(rf.PrefixPrint(), "inconsistent with leader at Log index", newEntryIndex, "removing from then on")
110 | }
111 | } else {
112 | // new Log
113 | // append everything
114 | rf.logs = append(rf.logs, args.Entries[argsEntryIndex:]...)
115 | break
116 | }
117 | }
118 | if args.LeaderCommit > rf.commitIndex {
119 | oldCommitIndex := rf.commitIndex
120 | if args.LeaderCommit < len(rf.logs)-1 {
121 | rf.commitIndex = args.LeaderCommit
122 | } else {
123 | rf.commitIndex = len(rf.logs) - 1
124 | }
125 | for ; oldCommitIndex <= rf.commitIndex; oldCommitIndex++ {
126 | if oldCommitIndex == 0 {
127 | continue
128 | }
129 | rf.applyCh <- ApplyMsg{
130 | CommandIndex: oldCommitIndex,
131 | CommandValid: true,
132 | Command: rf.logs[oldCommitIndex].Command,
133 | }
134 | }
135 |
136 | }
137 | fmt.Println(rf.PrefixPrint(), "got", len(args.Entries), "new Log entries from leader peer", args.LeaderId, "committed index", rf.commitIndex)
138 |
139 | switch rf.MyState {
140 | case LeaderState:
141 | break
142 | case FollowerState:
143 | break
144 | case CandidateState:
145 | rf.MyState = FollowerState
146 | rf.currentTerm = args.Term
147 | rf.votedFor = args.LeaderId
148 | reply.Success = true
149 | fmt.Println(rf.PrefixPrint(), "set leader to peer", args.LeaderId, "by heartbeat message")
150 | break
151 | default:
152 | panic("Invalid peer state in rpc AppendEntries!")
153 | }
154 |
155 | //rf.mu.Unlock()
156 | }
157 | */
158 |
--------------------------------------------------------------------------------
/src/raft/callback.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | import (
4 | "labrpc"
5 | "sync"
6 | )
7 |
8 | type AsyncRpcCallInterface interface {
9 | callback(int)
10 | tryEnd() bool
11 |
12 | SetAliveHost(int)
13 | makeRpcCall(peerIndex int) bool
14 | IncrementCurrentCount()
15 | PeerCount() int
16 | shouldExit() bool
17 |
18 | LockCallBack()
19 | UnlockCallBack()
20 |
21 | GetRaftIndex() int
22 | GetRaft() *Raft
23 | }
24 |
25 | /*
26 | Thus, an async rpc call should only implement these methods:
27 | - makeRpcCall
28 | - callBack
29 | - tryEnd
30 | */
31 | func CallAsyncRpc(call AsyncRpcCallInterface) {
32 | for peerIndex := 0; peerIndex < call.PeerCount(); peerIndex++ {
33 | if call.GetRaftIndex() == peerIndex {
34 | // don't send to myself
35 | continue
36 | }
37 | go func(peerIndex int) {
38 | ok := call.makeRpcCall(peerIndex)
39 | // one callback at a time
40 | call.LockCallBack()
41 | call.IncrementCurrentCount()
42 | if call.shouldExit() {
43 | call.UnlockCallBack()
44 | return
45 | }
46 | if ok {
47 | call.SetAliveHost(peerIndex)
48 | call.callback(peerIndex)
49 | } else {
50 | call.GetRaft().printInfo("rpc to peer", peerIndex, "timeout")
51 | }
52 | if call.tryEnd() {
53 | call.UnlockCallBack()
54 | return
55 | }
56 | call.UnlockCallBack()
57 | }(peerIndex)
58 | }
59 | }
60 |
61 | type AsyncRpcCallAttr struct {
62 | // initialized in constructor
63 | AliveCount int
64 | SuccessCount int
65 | TotalCount int
66 | CurrentCount int
67 | AliveHosts []bool
68 | peers []*labrpc.ClientEnd
69 | raft *Raft
70 |
71 | Cond *sync.Cond
72 | mu sync.Mutex
73 | MustExit bool
74 | }
75 |
76 | func (ri *AsyncRpcCallAttr) PeerCount() int {
77 | return ri.TotalCount
78 | }
79 |
80 | func (ri *AsyncRpcCallAttr) IncrementAliveCount() {
81 | ri.AliveCount++
82 | }
83 |
84 | func (ri *AsyncRpcCallAttr) IncrementSuccessCount() {
85 | ri.SuccessCount++
86 | }
87 |
88 | func (ri *AsyncRpcCallAttr) IncrementCurrentCount() {
89 | ri.Cond.L.Lock()
90 | ri.CurrentCount++
91 | ri.Cond.L.Unlock()
92 | ri.Cond.Broadcast()
93 | }
94 |
95 | func (ri *AsyncRpcCallAttr) Wait() {
96 | ri.raft.printInfo("waiting for election done")
97 | ri.Cond.L.Lock()
98 | for !(ri.CurrentCount >= ri.TotalCount || ri.MustExit) {
99 | ri.Cond.Wait()
100 | }
101 | ri.Cond.L.Unlock()
102 | ri.raft.printInfo("election done wait exit")
103 | }
104 |
105 | func (ri *AsyncRpcCallAttr) SetMustExit() {
106 | ri.Cond.L.Lock()
107 | ri.MustExit = true
108 | ri.Cond.L.Unlock()
109 | ri.Cond.Broadcast()
110 | }
111 |
112 | func (ri *AsyncRpcCallAttr) SetAliveHost(index int) {
113 | ri.AliveHosts[index] = true
114 | ri.AliveCount++
115 | }
116 |
117 | func (ri *AsyncRpcCallAttr) shouldExit() bool {
118 | return ri.MustExit
119 | }
120 |
121 | func (ri *AsyncRpcCallAttr) LockCallBack() {
122 | ri.mu.Lock()
123 | }
124 |
125 | func (ri *AsyncRpcCallAttr) UnlockCallBack() {
126 | ri.mu.Unlock()
127 | }
128 |
129 | func (ri *AsyncRpcCallAttr) GetRaftIndex() int {
130 | return ri.raft.me
131 | }
132 |
133 | func (ri *AsyncRpcCallAttr) GetRaft() *Raft {
134 | return ri.raft
135 | }
136 |
137 | func (rf *Raft) NewAsyncRpcCall() AsyncRpcCallAttr {
138 | aliveHosts := make([]bool, len(rf.peers))
139 | for index, _ := range aliveHosts {
140 | aliveHosts[index] = false
141 | }
142 | return AsyncRpcCallAttr{
143 | TotalCount: rf.PeerCount(),
144 | Cond: sync.NewCond(&sync.Mutex{}),
145 | AliveHosts: aliveHosts,
146 | AliveCount: 1,
147 | SuccessCount: 1,
148 | CurrentCount: 1,
149 | MustExit: false,
150 | peers: rf.peers,
151 | raft: rf,
152 | }
153 | }
154 |
--------------------------------------------------------------------------------
/src/raft/config.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | //
4 | // support for Raft tester.
5 | //
6 | // we will use the original config.go to test your code for grading.
7 | // so, while you can modify this code to help you debug, please
8 | // test with the original before submitting.
9 | //
10 |
11 | import "labrpc"
12 | import "log"
13 | import "sync"
14 | import "testing"
15 | import "runtime"
16 | import "math/rand"
17 | import crand "crypto/rand"
18 | import "math/big"
19 | import "encoding/base64"
20 | import "time"
21 | import "fmt"
22 |
23 | func randstring(n int) string {
24 | b := make([]byte, 2*n)
25 | crand.Read(b)
26 | s := base64.URLEncoding.EncodeToString(b)
27 | return s[0:n]
28 | }
29 |
30 | func makeSeed() int64 {
31 | max := big.NewInt(int64(1) << 62)
32 | bigx, _ := crand.Int(crand.Reader, max)
33 | x := bigx.Int64()
34 | return x
35 | }
36 |
37 | type config struct {
38 | mu sync.Mutex
39 | t *testing.T
40 | net *labrpc.Network
41 | n int
42 | rafts []*Raft
43 | applyErr []string // from apply channel readers
44 | connected []bool // whether each server is on the net
45 | saved []*Persister
46 | endnames [][]string // the port file names each sends to
47 | logs []map[int]interface{} // copy of each server's committed entries
48 | start time.Time // time at which make_config() was called
49 | // begin()/end() statistics
50 | t0 time.Time // time at which test_test.go called cfg.begin()
51 | rpcs0 int // rpcTotal() at start of test
52 | cmds0 int // number of agreements
53 | bytes0 int64
54 | maxIndex int
55 | maxIndex0 int
56 | }
57 |
58 | var ncpu_once sync.Once
59 |
60 | func make_config(t *testing.T, n int, unreliable bool) *config {
61 | ncpu_once.Do(func() {
62 | if runtime.NumCPU() < 2 {
63 | fmt.Printf("warning: only one CPU, which may conceal locking bugs\n")
64 | }
65 | rand.Seed(makeSeed())
66 | })
67 | runtime.GOMAXPROCS(4)
68 | cfg := &config{}
69 | cfg.t = t
70 | cfg.net = labrpc.MakeNetwork()
71 | cfg.n = n
72 | cfg.applyErr = make([]string, cfg.n)
73 | cfg.rafts = make([]*Raft, cfg.n)
74 | cfg.connected = make([]bool, cfg.n)
75 | cfg.saved = make([]*Persister, cfg.n)
76 | cfg.endnames = make([][]string, cfg.n)
77 | cfg.logs = make([]map[int]interface{}, cfg.n)
78 | cfg.start = time.Now()
79 |
80 | cfg.setunreliable(unreliable)
81 |
82 | cfg.net.LongDelays(true)
83 |
84 | // create a full set of Rafts.
85 | for i := 0; i < cfg.n; i++ {
86 | cfg.logs[i] = map[int]interface{}{}
87 | cfg.start1(i)
88 | }
89 |
90 | // connect everyone
91 | for i := 0; i < cfg.n; i++ {
92 | cfg.connect(i)
93 | }
94 |
95 | return cfg
96 | }
97 |
98 | // shut down a Raft server but save its persistent state.
99 | func (cfg *config) crash1(i int) {
100 | cfg.disconnect(i)
101 | cfg.net.DeleteServer(i) // disable client connections to the server.
102 |
103 | cfg.mu.Lock()
104 | defer cfg.mu.Unlock()
105 |
106 | // a fresh persister, in case old instance
107 | // continues to update the Persister.
108 | // but copy old persister's content so that we always
109 | // pass Make() the last persisted state.
110 | if cfg.saved[i] != nil {
111 | cfg.saved[i] = cfg.saved[i].Copy()
112 | }
113 |
114 | rf := cfg.rafts[i]
115 | if rf != nil {
116 | cfg.mu.Unlock()
117 | rf.Kill()
118 | cfg.mu.Lock()
119 | cfg.rafts[i] = nil
120 | }
121 |
122 | if cfg.saved[i] != nil {
123 | raftlog := cfg.saved[i].ReadRaftState()
124 | cfg.saved[i] = &Persister{}
125 | cfg.saved[i].SaveRaftState(raftlog)
126 | }
127 | }
128 |
129 | //
130 | // start or re-start a Raft.
131 | // if one already exists, "kill" it first.
132 | // allocate new outgoing port file names, and a new
133 | // state persister, to isolate previous instance of
134 | // this server. since we cannot really kill it.
135 | //
136 | func (cfg *config) start1(i int) {
137 | cfg.crash1(i)
138 |
139 | // a fresh set of outgoing ClientEnd names.
140 | // so that old crashed instance's ClientEnds can't send.
141 | cfg.endnames[i] = make([]string, cfg.n)
142 | for j := 0; j < cfg.n; j++ {
143 | cfg.endnames[i][j] = randstring(20)
144 | }
145 |
146 | // a fresh set of ClientEnds.
147 | ends := make([]*labrpc.ClientEnd, cfg.n)
148 | for j := 0; j < cfg.n; j++ {
149 | ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j])
150 | cfg.net.Connect(cfg.endnames[i][j], j)
151 | }
152 |
153 | cfg.mu.Lock()
154 |
155 | // a fresh persister, so old instance doesn't overwrite
156 | // new instance's persisted state.
157 | // but copy old persister's content so that we always
158 | // pass Make() the last persisted state.
159 | if cfg.saved[i] != nil {
160 | cfg.saved[i] = cfg.saved[i].Copy()
161 | } else {
162 | cfg.saved[i] = MakePersister()
163 | }
164 |
165 | cfg.mu.Unlock()
166 |
167 | // listen to messages from Raft indicating newly committed messages.
168 | applyCh := make(chan ApplyMsg)
169 | go func() {
170 | for m := range applyCh {
171 | err_msg := ""
172 | if m.CommandValid == false {
173 | // ignore other types of ApplyMsg
174 | } else {
175 | v := m.Command
176 | cfg.mu.Lock()
177 | //fmt.Println("[monitor] sees a committed Command", m.Command, "valid", m.CommandValid, "index", m.CommandIndex)
178 | for j := 0; j < len(cfg.logs); j++ {
179 | if old, oldok := cfg.logs[j][m.CommandIndex]; oldok && old != v {
180 | // some server has already committed a different value for this entry!
181 | err_msg = fmt.Sprintf("commit index=%v server=%v %v != server=%v %v",
182 | m.CommandIndex, i, m.Command, j, old)
183 | }
184 | }
185 | _, prevok := cfg.logs[i][m.CommandIndex-1]
186 | cfg.logs[i][m.CommandIndex] = v
187 | if m.CommandIndex > cfg.maxIndex {
188 | cfg.maxIndex = m.CommandIndex
189 | }
190 | cfg.mu.Unlock()
191 |
192 | if m.CommandIndex > 1 && prevok == false {
193 | err_msg = fmt.Sprintf("server %v apply out of order %v", i, m.CommandIndex)
194 | }
195 | }
196 |
197 | if err_msg != "" {
198 | log.Fatalf("apply error: %v\n", err_msg)
199 | cfg.applyErr[i] = err_msg
200 | // keep reading after error so that Raft doesn't block
201 | // holding locks...
202 | }
203 | }
204 | }()
205 |
206 | rf := Make(ends, i, cfg.saved[i], applyCh)
207 |
208 | cfg.mu.Lock()
209 | cfg.rafts[i] = rf
210 | cfg.mu.Unlock()
211 |
212 | svc := labrpc.MakeService(rf)
213 | srv := labrpc.MakeServer()
214 | srv.AddService(svc)
215 | cfg.net.AddServer(i, srv)
216 | }
217 |
218 | func (cfg *config) checkTimeout() {
219 | // enforce a two minute real-time limit on each test
220 | if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second {
221 | cfg.t.Fatal("test took longer than 120 seconds")
222 | }
223 | }
224 |
225 | func (cfg *config) cleanup() {
226 | for i := 0; i < len(cfg.rafts); i++ {
227 | if cfg.rafts[i] != nil {
228 | cfg.rafts[i].Kill()
229 | }
230 | }
231 | cfg.net.Cleanup()
232 | cfg.checkTimeout()
233 | }
234 |
235 | // attach server i to the net.
236 | func (cfg *config) connect(i int) {
237 | // fmt.Printf("connect(%d)\n", i)
238 |
239 | cfg.connected[i] = true
240 |
241 | // outgoing ClientEnds
242 | for j := 0; j < cfg.n; j++ {
243 | if cfg.connected[j] {
244 | endname := cfg.endnames[i][j]
245 | cfg.net.Enable(endname, true)
246 | }
247 | }
248 |
249 | // incoming ClientEnds
250 | for j := 0; j < cfg.n; j++ {
251 | if cfg.connected[j] {
252 | endname := cfg.endnames[j][i]
253 | cfg.net.Enable(endname, true)
254 | }
255 | }
256 | }
257 |
258 | // detach server i from the net.
259 | func (cfg *config) disconnect(i int) {
260 | // fmt.Printf("disconnect(%d)\n", i)
261 |
262 | cfg.connected[i] = false
263 |
264 | // outgoing ClientEnds
265 | for j := 0; j < cfg.n; j++ {
266 | if cfg.endnames[i] != nil {
267 | endname := cfg.endnames[i][j]
268 | cfg.net.Enable(endname, false)
269 | }
270 | }
271 |
272 | // incoming ClientEnds
273 | for j := 0; j < cfg.n; j++ {
274 | if cfg.endnames[j] != nil {
275 | endname := cfg.endnames[j][i]
276 | cfg.net.Enable(endname, false)
277 | }
278 | }
279 | }
280 |
281 | func (cfg *config) rpcCount(server int) int {
282 | return cfg.net.GetCount(server)
283 | }
284 |
285 | func (cfg *config) rpcTotal() int {
286 | return cfg.net.GetTotalCount()
287 | }
288 |
289 | func (cfg *config) setunreliable(unrel bool) {
290 | cfg.net.Reliable(!unrel)
291 | }
292 |
293 | func (cfg *config) bytesTotal() int64 {
294 | return cfg.net.GetTotalBytes()
295 | }
296 |
297 | func (cfg *config) setlongreordering(longrel bool) {
298 | cfg.net.LongReordering(longrel)
299 | }
300 |
301 | // check that there's exactly one leader.
302 | // try a few times in case re-elections are needed.
303 | func (cfg *config) checkOneLeader() int {
304 | for iters := 0; iters < 10; iters++ {
305 | ms := 450 + (rand.Int63() % 100)
306 | time.Sleep(time.Duration(ms) * time.Millisecond)
307 |
308 | leaders := make(map[int][]int)
309 | for i := 0; i < cfg.n; i++ {
310 | if cfg.connected[i] {
311 | if term, leader := cfg.rafts[i].GetState(); leader {
312 | leaders[term] = append(leaders[term], i)
313 | }
314 | }
315 | }
316 |
317 | lastTermWithLeader := -1
318 | for term, leaders := range leaders {
319 | if len(leaders) > 1 {
320 | cfg.t.Fatalf("Term %d has %d (>1) leaders", term, len(leaders))
321 | }
322 | if term > lastTermWithLeader {
323 | lastTermWithLeader = term
324 | }
325 | }
326 |
327 | if len(leaders) != 0 {
328 | return leaders[lastTermWithLeader][0]
329 | }
330 | }
331 | cfg.t.Fatalf("expected one leader, got none")
332 | return -1
333 | }
334 |
335 | // check that everyone agrees on the Term.
336 | func (cfg *config) checkTerms() int {
337 | term := -1
338 | for i := 0; i < cfg.n; i++ {
339 | if cfg.connected[i] {
340 | xterm, _ := cfg.rafts[i].GetState()
341 | if term == -1 {
342 | term = xterm
343 | } else if term != xterm {
344 | cfg.t.Fatalf("servers disagree on Term")
345 | }
346 | }
347 | }
348 | return term
349 | }
350 |
351 | // check that there's no leader
352 | func (cfg *config) checkNoLeader() {
353 | for i := 0; i < cfg.n; i++ {
354 | if cfg.connected[i] {
355 | _, is_leader := cfg.rafts[i].GetState()
356 | if is_leader {
357 | cfg.t.Fatalf("expected no leader, but %v claims to be leader", i)
358 | }
359 | }
360 | }
361 | }
362 |
363 | // how many servers think a Log entry is committed?
364 | func (cfg *config) nCommitted(index int) (int, interface{}) {
365 | count := 0
366 | var cmd interface{} = nil
367 | for i := 0; i < len(cfg.rafts); i++ {
368 | if cfg.applyErr[i] != "" {
369 | cfg.t.Fatal(cfg.applyErr[i])
370 | }
371 |
372 | cfg.mu.Lock()
373 | cmd1, ok := cfg.logs[i][index]
374 | cfg.mu.Unlock()
375 |
376 | if ok {
377 | if count > 0 && cmd != cmd1 {
378 | cfg.t.Fatalf("committed values do not match: index %v, %v, %v\n",
379 | index, cmd, cmd1)
380 | }
381 | count += 1
382 | cmd = cmd1
383 | }
384 | }
385 | return count, cmd
386 | }
387 |
388 | // wait for at least n servers to commit.
389 | // but don't wait forever.
390 | func (cfg *config) wait(index int, n int, startTerm int) interface{} {
391 | to := 10 * time.Millisecond
392 | for iters := 0; iters < 30; iters++ {
393 | nd, _ := cfg.nCommitted(index)
394 | if nd >= n {
395 | break
396 | }
397 | time.Sleep(to)
398 | if to < time.Second {
399 | to *= 2
400 | }
401 | if startTerm > -1 {
402 | for _, r := range cfg.rafts {
403 | if t, _ := r.GetState(); t > startTerm {
404 | // someone has moved on
405 | // can no longer guarantee that we'll "win"
406 | return -1
407 | }
408 | }
409 | }
410 | }
411 | nd, cmd := cfg.nCommitted(index)
412 | if nd < n {
413 | cfg.t.Fatalf("only %d decided for index %d; wanted %d\n",
414 | nd, index, n)
415 | }
416 | return cmd
417 | }
418 |
419 | // do a complete agreement.
420 | // it might choose the wrong leader initially,
421 | // and have to re-submit after giving up.
422 | // entirely gives up after about 10 seconds.
423 | // indirectly checks that the servers agree on the
424 | // same value, since nCommitted() checks this,
425 | // as do the threads that read from applyCh.
426 | // returns index.
427 | // if retry==true, may submit the command multiple
428 | // times, in case a leader fails just after Start().
429 | // if retry==false, calls Start() only once, in order
430 | // to simplify the early Lab 2B tests.
431 | func (cfg *config) one(cmd interface{}, expectedServers int, retry bool) int {
432 | t0 := time.Now()
433 | starts := 0
434 | for time.Since(t0).Seconds() < 10 {
435 | // try all the servers, maybe one is the leader.
436 | index := -1
437 | for si := 0; si < cfg.n; si++ {
438 | starts = (starts + 1) % cfg.n
439 | var rf *Raft
440 | cfg.mu.Lock()
441 | if cfg.connected[starts] {
442 | rf = cfg.rafts[starts]
443 | }
444 | cfg.mu.Unlock()
445 | if rf != nil {
446 | index1, _, ok := rf.Start(cmd)
447 | if ok {
448 | index = index1
449 | break
450 | }
451 | }
452 | }
453 |
454 | if index != -1 {
455 | // somebody claimed to be the leader and to have
456 | // submitted our command; wait a while for agreement.
457 | t1 := time.Now()
458 | for time.Since(t1).Seconds() < 2 {
459 | nd, cmd1 := cfg.nCommitted(index)
460 | if nd > 0 && nd >= expectedServers {
461 | // committed
462 | if cmd1 == cmd {
463 | // and it was the command we submitted.
464 | return index
465 | }
466 | }
467 | time.Sleep(20 * time.Millisecond)
468 | }
469 | if retry == false {
470 | cfg.t.Fatalf("one(%v) failed to reach agreement", cmd)
471 | }
472 | } else {
473 | time.Sleep(50 * time.Millisecond)
474 | }
475 | }
476 | cfg.t.Fatalf("one(%v) failed to reach agreement", cmd)
477 |
478 | return -1
479 | }
480 |
481 | // start a Test.
482 | // print the Test message.
483 | // e.g. cfg.begin("Test (2B): RPC counts aren't too high")
484 | func (cfg *config) begin(description string) {
485 | fmt.Printf("%s ...\n", description)
486 | cfg.t0 = time.Now()
487 | cfg.rpcs0 = cfg.rpcTotal()
488 | cfg.bytes0 = cfg.bytesTotal()
489 | cfg.cmds0 = 0
490 | cfg.maxIndex0 = cfg.maxIndex
491 | }
492 |
493 | // end a Test -- the fact that we got here means there
494 | // was no failure.
495 | // print the Passed message,
496 | // and some performance numbers.
497 | func (cfg *config) end() {
498 | cfg.checkTimeout()
499 | if cfg.t.Failed() == false {
500 | cfg.mu.Lock()
501 | t := time.Since(cfg.t0).Seconds() // real time
502 | npeers := cfg.n // number of Raft peers
503 | nrpc := cfg.rpcTotal() - cfg.rpcs0 // number of RPC sends
504 | nbytes := cfg.bytesTotal() - cfg.bytes0 // number of bytes
505 | ncmds := cfg.maxIndex - cfg.maxIndex0 // number of Raft agreements reported
506 | cfg.mu.Unlock()
507 |
508 | fmt.Printf(" ... Passed --")
509 | fmt.Printf(" %4.1f %d %4d %7d %4d\n", t, npeers, nrpc, nbytes, ncmds)
510 | }
511 | }
512 |
--------------------------------------------------------------------------------
/src/raft/peer_log_state.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | import "sync"
4 |
5 | type PeerLogStates struct {
6 | NextIndex []int
7 | matchIndex []int // init 0
8 |
9 | mutex sync.Mutex
10 | }
11 |
12 | func initPeerCountIntArray(peerCount int, initVal int) []int {
13 | ret := make([]int, peerCount)
14 | for index := 0; index < peerCount; index++ {
15 | ret[index] = initVal
16 | }
17 | return ret
18 | }
19 |
20 | func NewPeerLogStates(peerCount int) PeerLogStates {
21 | return PeerLogStates{
22 | NextIndex: initPeerCountIntArray(peerCount, 0),
23 | matchIndex: initPeerCountIntArray(peerCount, 0),
24 | }
25 | }
26 |
27 | func (pls *PeerLogStates) Lock() {
28 | pls.mutex.Lock()
29 | }
30 |
31 | func (pls *PeerLogStates) Unlock() {
32 | pls.mutex.Unlock()
33 | }
34 |
35 | func (pls *PeerLogStates) SetAllNextIndex(nextIndex int) {
36 | pls.mutex.Lock()
37 | for index := 0; index < len(pls.NextIndex); index++ {
38 | pls.NextIndex[index] = nextIndex
39 | }
40 | pls.mutex.Unlock()
41 | }
42 |
43 | func (pls *PeerLogStates) More(peerIndex int, moreNextIndex int) {
44 | pls.mutex.Lock()
45 | pls.NextIndex[peerIndex] += moreNextIndex
46 | pls.matchIndex[peerIndex] = pls.NextIndex[peerIndex] - 1
47 | pls.mutex.Unlock()
48 | }
49 |
50 | func (pls *PeerLogStates) Less(peerIndex int) {
51 | pls.mutex.Lock()
52 | pls.NextIndex[peerIndex] /= 2
53 | pls.mutex.Unlock()
54 | }
55 |
--------------------------------------------------------------------------------
/src/raft/persister.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | //
4 | // support for Raft and kvraft to save persistent
5 | // Raft state (Log &c) and k/v server snapshots.
6 | //
7 | // we will use the original persister.go to test your code for grading.
8 | // so, while you can modify this code to help you debug, please
9 | // test with the original before submitting.
10 | //
11 |
12 | import "sync"
13 |
14 | type Persister struct {
15 | mu sync.Mutex
16 | raftstate []byte
17 | snapshot []byte
18 | }
19 |
20 | func MakePersister() *Persister {
21 | return &Persister{}
22 | }
23 |
24 | func (ps *Persister) Copy() *Persister {
25 | ps.mu.Lock()
26 | defer ps.mu.Unlock()
27 | np := MakePersister()
28 | np.raftstate = ps.raftstate
29 | np.snapshot = ps.snapshot
30 | return np
31 | }
32 |
33 | func (ps *Persister) SaveRaftState(state []byte) {
34 | ps.mu.Lock()
35 | defer ps.mu.Unlock()
36 | ps.raftstate = state
37 | }
38 |
39 | func (ps *Persister) ReadRaftState() []byte {
40 | ps.mu.Lock()
41 | defer ps.mu.Unlock()
42 | return ps.raftstate
43 | }
44 |
45 | func (ps *Persister) RaftStateSize() int {
46 | ps.mu.Lock()
47 | defer ps.mu.Unlock()
48 | return len(ps.raftstate)
49 | }
50 |
51 | // Save both Raft state and K/V snapshot as a single atomic action,
52 | // to help avoid them getting out of sync.
53 | func (ps *Persister) SaveStateAndSnapshot(state []byte, snapshot []byte) {
54 | ps.mu.Lock()
55 | defer ps.mu.Unlock()
56 | ps.raftstate = state
57 | ps.snapshot = snapshot
58 | }
59 |
60 | func (ps *Persister) ReadSnapshot() []byte {
61 | ps.mu.Lock()
62 | defer ps.mu.Unlock()
63 | return ps.snapshot
64 | }
65 |
66 | func (ps *Persister) SnapshotSize() int {
67 | ps.mu.Lock()
68 | defer ps.mu.Unlock()
69 | return len(ps.snapshot)
70 | }
71 |
--------------------------------------------------------------------------------
/src/raft/raft_log.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | import (
4 | "fmt"
5 | "strconv"
6 | "sync"
7 | )
8 |
9 | type LogEntry struct {
10 | Term int
11 | Command interface{}
12 | }
13 |
14 | func (le *LogEntry) Equals(entry *LogEntry) bool {
15 | return le.Term == entry.Term && le.Command == entry.Command
16 | }
17 |
18 | func (le *LogEntry) ToApplyMsg(index int, valid bool) ApplyMsg {
19 | return ApplyMsg{
20 | Command: le.Command,
21 | CommandValid: valid,
22 | CommandIndex: index,
23 | }
24 | }
25 |
26 | type RaftLog struct {
27 | CommitIndex int
28 | lastApplied int
29 | entries []LogEntry
30 | cond *sync.Cond
31 | applyCh chan ApplyMsg
32 |
33 | raft *Raft
34 | }
35 |
36 | func initRaftLogEntries() []LogEntry {
37 | ret := make([]LogEntry, 1)
38 | ret[0].Term = -1
39 | ret[0].Command = nil
40 | return ret
41 | }
42 |
43 | func NewRaftLog(applyCh chan ApplyMsg, raft *Raft) *RaftLog {
44 | return &RaftLog{
45 | CommitIndex: 0,
46 | lastApplied: 0,
47 | entries: initRaftLogEntries(),
48 | cond: sync.NewCond(&sync.Mutex{}),
49 | applyCh: applyCh,
50 | raft: raft,
51 | }
52 | }
53 |
54 | var dumpLock sync.Mutex
55 |
56 | func (rl *RaftLog) InfoString() string {
57 | return "commitIndex " + strconv.Itoa(rl.CommitIndex) + " lastApplied " + strconv.Itoa(rl.lastApplied) + " log length " + strconv.Itoa(rl.Length())
58 | }
59 |
60 | func (rl *RaftLog) dump() {
61 | dumpLock.Lock()
62 | fmt.Println("dumping log", rl.Length())
63 | fmt.Println("log length", rl.Length(), "commit index", rl.CommitIndex)
64 | //for entryIndex, entry := range rl.entries {
65 | // fmt.Printf("%v term: %v action: %v\n", entryIndex, entry.Term, entry.Command)
66 | // if entryIndex == rl.CommitIndex {
67 | // fmt.Println("----------------------------------- commit index", entryIndex)
68 | // }
69 | //}
70 | dumpLock.Unlock()
71 | }
72 |
73 | func (rl *RaftLog) Lock() {
74 | rl.cond.L.Lock()
75 | }
76 |
77 | func (rl *RaftLog) Unlock() {
78 | rl.cond.L.Unlock()
79 | }
80 |
81 | func (rl *RaftLog) Append(entries ...LogEntry) {
82 | rl.entries = append(rl.entries, entries...)
83 | }
84 |
85 | // remove all logs starting at this index
86 | func (rl *RaftLog) RemoveAt(index int) {
87 | rl.entries = rl.entries[:index]
88 | }
89 |
90 | func (rl *RaftLog) Index(index int) *LogEntry {
91 | return &rl.entries[index]
92 | }
93 |
94 | func (rl *RaftLog) LastEntry() *LogEntry {
95 | return &rl.entries[rl.Length()-1]
96 | }
97 |
98 | func (rl *RaftLog) Length() int {
99 | return len(rl.entries)
100 | }
101 |
102 | func (rl *RaftLog) firstTermIndex(beginIndex int, term int) int {
103 | for ; beginIndex > 1; beginIndex-- {
104 | if rl.entries[beginIndex-1].Term != term {
105 | return beginIndex
106 | }
107 | }
108 | return 1
109 | }
110 |
111 | func (rl *RaftLog) lastTermIndex(beginIndex int, term int) int {
112 | for ; beginIndex < rl.Length()-1; beginIndex++ {
113 | if rl.entries[beginIndex].Term == term && rl.entries[beginIndex+1].Term != term {
114 | return beginIndex + 1
115 | }
116 | }
117 | return -1
118 | }
119 |
120 | /*
121 | From now on, the methods are locked.
122 | The methods above can compose, by some outer caller, other form of methods, locking taken care of by the outer caller
123 | */
124 |
125 | func (rl *RaftLog) ApplyWorker() {
126 | for {
127 | rl.Lock()
128 | for rl.CommitIndex <= rl.lastApplied {
129 | rl.cond.Wait()
130 | }
131 | rl.lastApplied++
132 | rl.entries[rl.lastApplied].Apply()
133 | rl.Unlock()
134 | }
135 | }
136 |
137 | func (rl *RaftLog) UpdateLog(newEntries []LogEntry, prevLogIndex int, leaderCommit int) {
138 | rl.Lock()
139 |
140 | // update
141 | for argsEntryIndex := 0; argsEntryIndex < len(newEntries); {
142 | newEntryIndex := argsEntryIndex + prevLogIndex + 1
143 | if newEntryIndex < rl.Length() {
144 | oldEntry := &rl.entries[newEntryIndex]
145 | newEntry := &newEntries[argsEntryIndex]
146 | // existing Log
147 | // check for conflict
148 | if oldEntry.Equals(newEntry) {
149 | // consistent!
150 | rl.raft.printInfo("existing consistent entry", newEntryIndex)
151 | argsEntryIndex++
152 | } else {
153 | // inconsistent!
154 | // delete everything after current index
155 | rl.raft.printInfo("inconsistent entry at index", newEntryIndex)
156 | rl.RemoveAt(newEntryIndex)
157 | }
158 | } else {
159 | // new Log
160 | // append everything
161 | rl.raft.printInfo("new entries at", newEntryIndex, "length", len(newEntries)-argsEntryIndex)
162 | rl.Append(newEntries[argsEntryIndex:]...)
163 | break
164 | }
165 | }
166 | // commit
167 | if leaderCommit > rl.CommitIndex {
168 | oldCommitIndex := rl.CommitIndex
169 | if leaderCommit < rl.Length()-1 {
170 | rl.CommitIndex = leaderCommit
171 | } else {
172 | rl.CommitIndex = rl.Length() - 1
173 | }
174 | for ; oldCommitIndex <= rl.CommitIndex; oldCommitIndex++ {
175 | if oldCommitIndex == 0 {
176 | continue
177 | }
178 | rl.applyCh <- rl.entries[oldCommitIndex].ToApplyMsg(oldCommitIndex, true)
179 | }
180 | }
181 |
182 | rl.Unlock()
183 | }
184 |
185 | /*
186 | Rpc Args
187 | */
188 | func (rl *RaftLog) NewAppendEntriesArgs(nextIndex int, currentTerm int, me int) *AppendEntriesArgs {
189 | rl.cond.L.Lock()
190 | var entries []LogEntry
191 | prevLogIndex := -1
192 | prevLogTerm := -1
193 | if nextIndex >= len(rl.entries) {
194 | entries = make([]LogEntry, 0)
195 | } else {
196 | entries = rl.entries[nextIndex:]
197 | }
198 | if nextIndex != 0 {
199 | prevLogIndex = nextIndex - 1
200 | prevLogTerm = entries[prevLogIndex].Term
201 | }
202 | ret := &AppendEntriesArgs{
203 | Term: currentTerm,
204 | LeaderId: me,
205 | LeaderCommit: rl.CommitIndex,
206 | // index of previous entry of this sending package
207 | PrevLogIndex: prevLogIndex,
208 | // term of previous entry of this sending package
209 | PrevLogTerm: prevLogTerm,
210 | // sending package
211 | Entries: entries,
212 | }
213 | rl.cond.L.Unlock()
214 | return ret
215 | }
216 |
217 | func (rl *RaftLog) NewRequestVoteArgs(currentTerm int, me int) *RequestVoteArgs {
218 | return &RequestVoteArgs{
219 | Term: currentTerm,
220 | CandidateId: me,
221 | LastLogIndex: len(rl.entries) - 1,
222 | LastLogTerm: rl.entries[len(rl.entries)-1].Term,
223 | }
224 | }
225 |
--------------------------------------------------------------------------------
/src/raft/raft_state.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | type RaftState interface {
4 | IsLeader() bool
5 | IsCandidate() bool
6 | ToString() string
7 | Run()
8 | }
9 |
10 | type RaftStateAttr struct {
11 | raft *Raft
12 | }
13 |
14 | func NewStateCommon(raft *Raft) RaftStateAttr {
15 | return RaftStateAttr{
16 | raft: raft,
17 | }
18 | }
19 |
20 | type RaftLeader struct {
21 | RaftStateAttr
22 | }
23 |
24 | type RaftFollower struct {
25 | RaftStateAttr
26 | }
27 |
28 | type RaftCandidate struct {
29 | RaftStateAttr
30 | }
31 |
32 | /*
33 | Constructors
34 | */
35 | func NewLeaderState(raft *Raft) *RaftLeader {
36 | return &RaftLeader{
37 | RaftStateAttr: NewStateCommon(raft),
38 | }
39 | }
40 |
41 | func NewFollowerState(raft *Raft) *RaftFollower {
42 | return &RaftFollower{
43 | RaftStateAttr: NewStateCommon(raft),
44 | }
45 | }
46 |
47 | func NewRaftCandidate(raft *Raft) *RaftCandidate {
48 | return &RaftCandidate{
49 | RaftStateAttr: NewStateCommon(raft),
50 | }
51 | }
52 |
53 | /*
54 | check leader
55 | */
56 | func (*RaftFollower) IsLeader() bool {
57 | return false
58 | }
59 |
60 | func (*RaftCandidate) IsLeader() bool {
61 | return false
62 | }
63 |
64 | func (*RaftLeader) IsLeader() bool {
65 | return true
66 | }
67 |
68 | /*
69 | check candidate
70 | */
71 | func (*RaftFollower) IsCandidate() bool {
72 | return false
73 | }
74 |
75 | func (*RaftLeader) IsCandidate() bool {
76 | return false
77 | }
78 |
79 | func (*RaftCandidate) IsCandidate() bool {
80 | return true
81 | }
82 |
83 | /*
84 | Print
85 | */
86 | func (leader *RaftLeader) ToString() string {
87 | return "Leader"
88 | }
89 |
90 | func (follower *RaftFollower) ToString() string {
91 | return "Follower"
92 | }
93 |
94 | func (candidate *RaftCandidate) ToString() string {
95 | return "Candidate"
96 | }
97 |
98 | /*
99 | Run roll
100 | */
101 | // actions are ignored for inconsistent raft state
102 | func (leader *RaftLeader) Run() {
103 | leader.raft.printInfo("sending a heartbeat message to all")
104 | leader.raft.SendAppendEntriesToAll()
105 | leader.raft.printInfo("all heartbeat messages sent")
106 | leader.raft.UnlockPeerState()
107 | leader.raft.TimeParams.WaitHeartBeat()
108 | leader.raft.LockPeerState()
109 | // locking won't matter
110 | // the function exits
111 | // if the peer state is changed, the next roll would discover it
112 | }
113 |
114 | /*
115 | func (rf *Raft) runLeader() {
116 | // init NextIndex
117 | for index, _ := range rf.nextIndex {
118 | rf.nextIndex[index] = len(rf.logs)
119 | }
120 | for {
121 | // prepare an info instance
122 | info := rf.NewAsyncRpcCall(len(rf.peers))
123 | if rf.MyState != LeaderState {
124 | return
125 | }
126 | // send heartbeat signal
127 | for peerIndex, _ := range rf.peers {
128 | if peerIndex == rf.me {
129 | continue
130 | }
131 | //fmt.Println(rf.PrefixPrint(), "sending heartbeat signal to peer", peerIndex)
132 | // send ahead logs
133 | args := rf.newAppendEntriesArgs(peerIndex)
134 | rf.sendAppendEntriesAsync(peerIndex, args, &AppendEntriesReply{}, info)
135 | }
136 | time.Sleep(time.Millisecond * time.Duration(rf.heartBeatWaitDuration))
137 | }
138 | }
139 | */
140 |
141 | func (follower *RaftFollower) Run() {
142 | follower.raft.printInfo("begin waiting for", follower.raft.TimeParams.heartBeatSendWait, "ms")
143 | follower.raft.UnlockPeerState()
144 | if follower.raft.TimeParams.heartBeatTimer.Wait() {
145 | follower.raft.LockPeerState()
146 | // timer expired
147 | follower.raft.printInfo("timer expired, becoming candidate")
148 | follower.raft.toCandidate()
149 | // this is the only way of being candidate
150 | // no need to worry about holding lock for this long time
151 | // other actions won't try to make this peer a leader
152 | } else {
153 | follower.raft.printInfo("timer cleared, following still peer", follower.raft.votedFor)
154 | follower.raft.LockPeerState()
155 | }
156 | }
157 |
158 | func (candidate *RaftCandidate) Run() {
159 | // release lock to allow peer state changes
160 | candidate.raft.printInfo("wait a random time then initiate an election")
161 | candidate.raft.UnlockPeerState()
162 | candidate.raft.TimeParams.WaitRandomRequestVote()
163 | candidate.raft.LockPeerState()
164 | // must check peer state for change in waiting
165 | if !candidate.IsCandidate() {
166 | return
167 | }
168 | candidate.raft.printInfo("initiate an election")
169 | candidate.raft.initiateElection()
170 | }
171 |
--------------------------------------------------------------------------------
/src/raft/raft_task.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | import "sync"
4 |
5 | /*
6 | A Task class should implement all methods in RaftTask interface,
7 | and inherits struct RaftTaskAttr
8 | */
9 |
10 | type RaftTask interface {
11 | execute()
12 |
13 | WaitForDone()
14 | SetDone()
15 | }
16 |
17 | // rpc calls this and wait for return
18 | func RunTask(rt RaftTask, queue *RaftTaskQueue) {
19 | queue.Push(rt)
20 | rt.WaitForDone()
21 | }
22 |
23 | type RaftTaskAttr struct {
24 | done bool
25 | doneCond *sync.Cond
26 |
27 | raft *Raft
28 | }
29 |
30 | func NewRaftTaskAttr(raft *Raft) RaftTaskAttr {
31 | return RaftTaskAttr{
32 | done: false,
33 | doneCond: sync.NewCond(&sync.Mutex{}),
34 |
35 | raft: raft,
36 | }
37 | }
38 |
39 | func (rtd *RaftTaskAttr) WaitForDone() {
40 | rtd.doneCond.L.Lock()
41 | for rtd.done == false {
42 | rtd.doneCond.Wait()
43 | }
44 | rtd.doneCond.L.Unlock()
45 | }
46 |
47 | func (rtd *RaftTaskAttr) SetDone() {
48 | rtd.doneCond.L.Lock()
49 | if rtd.done {
50 | panic("Task done twice...")
51 | }
52 | rtd.done = true
53 | rtd.doneCond.L.Unlock()
54 | rtd.doneCond.Broadcast()
55 | }
56 |
57 | type RaftTaskQueue struct {
58 | channel chan RaftTask
59 | }
60 |
61 | func NewRaftTaskQueue() *RaftTaskQueue {
62 | return &RaftTaskQueue{
63 | channel: make(chan RaftTask),
64 | }
65 | }
66 |
67 | func (rtq *RaftTaskQueue) pop() RaftTask {
68 | return <-rtq.channel
69 | }
70 |
71 | func (rtq *RaftTaskQueue) RunOne() {
72 | task := rtq.pop()
73 | task.execute()
74 | task.SetDone()
75 | }
76 |
77 | func (rtq *RaftTaskQueue) Push(rt RaftTask) {
78 | rtq.channel <- rt
79 | }
80 |
--------------------------------------------------------------------------------
/src/raft/raft_time.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | import (
4 | "math/rand"
5 | "time"
6 | )
7 |
8 | type RaftTimer struct {
9 | WaitDuration int
10 | TimerCleared bool
11 | }
12 |
13 | func (rt *RaftTimer) SetClear() {
14 | rt.TimerCleared = true
15 | }
16 |
17 | func (rt *RaftTimer) Wait() bool {
18 | checkCount := 200
19 | divDuration := rt.WaitDuration / checkCount
20 | for checkIndex := 0; checkIndex < checkCount; checkIndex++ {
21 | if rt.TimerCleared {
22 | // not expired
23 | rt.TimerCleared = false
24 | return false
25 | }
26 | time.Sleep(time.Millisecond * time.Duration(divDuration))
27 | }
28 | ret := !rt.TimerCleared
29 | rt.TimerCleared = false
30 | return ret
31 | }
32 |
33 | func NewRaftTimer(waitDuration int) RaftTimer {
34 | return RaftTimer{
35 | WaitDuration: waitDuration,
36 | TimerCleared: false,
37 | }
38 | }
39 |
40 | type RaftTime struct {
41 | // waits
42 | heartBeatSendWait int
43 | requestVoteRandMax int
44 |
45 | // timer
46 | heartBeatTimer RaftTimer
47 | //electionTimer RaftTimer
48 | }
49 |
50 | func (rt *RaftTime) WaitHeartBeat() {
51 | time.Sleep(time.Millisecond * time.Duration(rt.heartBeatSendWait))
52 | }
53 |
54 | func (rt *RaftTime) WaitRandomRequestVote() {
55 | time.Sleep(time.Duration(rand.Intn(rt.requestVoteRandMax)))
56 | }
57 |
58 | func NewRaftTime(heartBeatSendWait, electionRandMax, heartBeatWaitMax int) *RaftTime {
59 | return &RaftTime{
60 | heartBeatSendWait: heartBeatSendWait,
61 | requestVoteRandMax: electionRandMax,
62 | heartBeatTimer: NewRaftTimer(heartBeatWaitMax),
63 | //electionTimer: NewRaftTimer(electionWaitMax),
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/src/raft/request_vote_callback.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | type RequestVoteCall struct {
4 | AsyncRpcCallAttr
5 |
6 | args *RequestVoteArgs
7 | replies []RequestVoteReply
8 | }
9 |
10 | func NewRequestVoteCall(raft *Raft, args *RequestVoteArgs) *RequestVoteCall {
11 | return &RequestVoteCall{
12 | AsyncRpcCallAttr: raft.NewAsyncRpcCall(),
13 | args: args,
14 | replies: make([]RequestVoteReply, raft.PeerCount()),
15 | }
16 | }
17 |
18 | func (rvc *RequestVoteCall) shouldExit() bool {
19 | if rvc.MustExit {
20 | return true
21 | }
22 | return false
23 | }
24 |
25 | func (rvc *RequestVoteCall) makeRpcCall(peerIndex int) bool {
26 | rvc.raft.printInfo("sending RequestVote to peer", peerIndex)
27 | return rvc.peers[peerIndex].Call("Raft.RequestVote", rvc.args, &rvc.replies[peerIndex])
28 | }
29 |
30 | func (rvc *RequestVoteCall) callback(peerIndex int) {
31 | rvc.raft.printInfo("RequestVote reply received from peer", peerIndex)
32 | if !rvc.raft.MyState.IsCandidate() {
33 | rvc.SetMustExit()
34 | return
35 | }
36 | reply := rvc.replies[peerIndex]
37 | if rvc.raft.tryFollowNewerTerm(peerIndex, reply.Term) {
38 | rvc.SetMustExit()
39 | return
40 | }
41 | if reply.GrantVote {
42 | rvc.raft.printInfo("vote granted by peer", peerIndex)
43 | rvc.IncrementSuccessCount()
44 | }
45 | }
46 |
47 | func (rvc *RequestVoteCall) tryEnd() bool {
48 | if rvc.SuccessCount > rvc.TotalCount/2 {
49 | rvc.raft.printInfo("#granted", rvc.SuccessCount, "in #total", rvc.TotalCount)
50 | rvc.SetMustExit()
51 | // change raft state
52 | rvc.raft.toLeader()
53 | return true
54 | }
55 | if rvc.SuccessCount+rvc.TotalCount-rvc.CurrentCount <= rvc.TotalCount/2 {
56 | rvc.SetMustExit()
57 | rvc.raft.printInfo("#granted", rvc.SuccessCount, "too few for #total - #current", rvc.TotalCount-rvc.CurrentCount)
58 | return true
59 | }
60 | if rvc.CurrentCount >= rvc.TotalCount {
61 | rvc.raft.printInfo("#granted", rvc.SuccessCount, "too few for #total", rvc.TotalCount)
62 | rvc.SetMustExit()
63 | return true
64 | }
65 | return false
66 | }
67 |
68 | /*
69 | func (rf *Raft) requestVoteCallBack(ok bool, peerIndex int, args *RequestVoteArgs, reply *RequestVoteReply, requestVoteInfo *AsyncRpcCallAttr) {
70 | if !ok {
71 | fmt.Println(rf.PrefixPrint(), "cannot reach peer", peerIndex, "when requesting a vote")
72 | } else {
73 | requestVoteInfo.IncrementAliveCount()
74 | if rf.tryFollowNewerTerm(peerIndex, reply.Term) {
75 | requestVoteInfo.SetMustExit()
76 | } else if reply.GrantVote {
77 | fmt.Println(rf.PrefixPrint(), "granted a vote by peer", peerIndex)
78 | requestVoteInfo.IncrementSuccessCount()
79 | if requestVoteInfo.SuccessCount+1 > requestVoteInfo.TotalCount/2 {
80 | // leader claimed!
81 | //fmt.Println(rf.PrefixPrint(), "got", requestVoteInfo.SuccessCount, "votes in", requestVoteInfo.AliveCount, "alive peers", requestVoteInfo.TotalCount, "total peers")
82 | rf.MyState = LeaderState
83 | rf.votedFor = rf.me
84 | //fmt.Println(rf.PrefixPrint(), "elected leader at term", rf.currentTerm)
85 | rf.setLeaderNextIndex()
86 | requestVoteInfo.SetMustExit()
87 | }
88 | }
89 | }
90 | requestVoteInfo.IncrementCurrentCount()
91 | }
92 | */
93 |
--------------------------------------------------------------------------------
/src/raft/request_vote_task.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | type RequestVoteTask struct {
4 | RaftTaskAttr
5 | args *RequestVoteArgs
6 | reply *RequestVoteReply
7 | }
8 |
9 | func (rvt *RequestVoteTask) execute() {
10 | rvt.executeRequestVoteRpc(rvt.args, rvt.reply)
11 | }
12 |
13 | func (rvt *RequestVoteTask) printThisMoreUpToDate() {
14 | rvt.raft.printInfo("this peer's log is newer")
15 | }
16 |
17 | func (rvt *RequestVoteTask) printGrantVote(peerId int) {
18 | rvt.raft.printInfo("grant vote to peer", peerId)
19 | }
20 |
21 | func (rvt *RequestVoteTask) grantVote(peerId int, reply *RequestVoteReply) {
22 | reply.GrantVote = true
23 | rvt.raft.toFollower(peerId)
24 | rvt.raft.TimeParams.heartBeatTimer.SetClear()
25 | }
26 |
27 | func (rvt *RequestVoteTask) executeRequestVoteRpc(args *RequestVoteArgs, reply *RequestVoteReply) {
28 | reply.Term = rvt.raft.currentTerm
29 | reply.GrantVote = false
30 |
31 | if args.Term < rvt.raft.currentTerm {
32 | return
33 | }
34 | if rvt.raft.tryFollowNewerTerm(args.CandidateId, args.Term) {
35 | rvt.raft.printInfo("sees newer term RequestVote from peer", args.CandidateId)
36 | rvt.grantVote(args.CandidateId, reply)
37 | return
38 | }
39 | // decide vote
40 | if rvt.raft.votedFor < 0 || rvt.raft.votedFor == args.CandidateId {
41 | // check up-to-date
42 | if rvt.raft.Log.LastEntry().Term < args.LastLogTerm {
43 | // that peer has more up-to-date Log
44 | rvt.grantVote(args.CandidateId, reply)
45 | rvt.printGrantVote(args.CandidateId)
46 | return
47 | }
48 | if rvt.raft.Log.LastEntry().Term > args.LastLogTerm {
49 | // this peer has more up-to-date Log
50 | rvt.printThisMoreUpToDate()
51 | return
52 | }
53 | // Term attribute equals, comparing length
54 | if args.LastLogIndex <= rvt.raft.Log.Length()-1 {
55 | // this peer is more up-to-date
56 | rvt.printThisMoreUpToDate()
57 | return
58 | }
59 | rvt.printGrantVote(args.CandidateId)
60 | rvt.grantVote(args.CandidateId, reply)
61 | return
62 | }
63 | }
64 |
65 | func NewRequestVoteTask(raft *Raft, args *RequestVoteArgs, reply *RequestVoteReply) *RequestVoteTask {
66 | return &RequestVoteTask{
67 | RaftTaskAttr: NewRaftTaskAttr(raft),
68 | args: args,
69 | reply: reply,
70 | }
71 | }
72 |
73 | /*
74 | func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) {
75 | // Your code here (2A, 2B).
76 |
77 | RunTask(NewRequestVoteTask(rf, args, reply), &rf.taskQueue)
78 | return
79 |
80 | // default reply state
81 | reply.Term = rf.currentTerm
82 | reply.GrantVote = false
83 | rf.timerCleared = true
84 |
85 | if rf.tryDiscardOldTerm(args.CandidateId, args.Term) {
86 | reply.GrantVote = false
87 | return
88 | }
89 | if rf.tryFollowNewerTerm(args.CandidateId, args.Term) {
90 | reply.GrantVote = true
91 | return
92 | }
93 |
94 | // < 0 for not elected leader
95 | // == for already accepted leader
96 | if rf.votedFor < 0 || rf.votedFor == args.CandidateId {
97 | // check up-to-date
98 | if rf.logs[len(rf.logs)-1].Term > args.LastLogTerm {
99 | // this peer has more up-to-date Log
100 | return
101 | }
102 | if rf.logs[len(rf.logs)-1].Term < args.LastLogTerm {
103 | // that peer has more up-to-date Log
104 | reply.GrantVote = true
105 | return
106 | }
107 | // Term attribute equals, comparing length
108 | if args.LastLogIndex < len(rf.logs)-1 {
109 | // this peer is more up-to-date
110 | return
111 | }
112 | reply.GrantVote = true
113 | return
114 | }
115 | //fmt.Println(rf.PrefixPrint(), "with leader", rf.votedFor, "at term %v not granting vote to peer", rf.currentTerm, "at term", args.Term)
116 | }
117 | */
118 |
--------------------------------------------------------------------------------
/src/raft/rpc_args_reply.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | //
4 | // example RequestVote RPC arguments structure.
5 | // field names must start with capital letters!
6 | //
7 | type RequestVoteArgs struct {
8 | // Your data here (2A, 2B).
9 | Term int
10 | CandidateId int
11 | LastLogIndex int
12 | LastLogTerm int
13 | }
14 |
15 | //
16 | // example RequestVote RPC reply structure.
17 | // field names must start with capital letters!
18 | //
19 | type RequestVoteReply struct {
20 | // Your data here (2A).
21 | GrantVote bool
22 | Term int
23 | }
24 |
25 | type AppendEntriesArgs struct {
26 | // machine state
27 | Term int
28 | LeaderId int
29 | // Log state
30 | PrevLogIndex int
31 | PrevLogTerm int
32 | Entries []LogEntry
33 | //entries
34 | LeaderCommit int
35 | }
36 |
37 | type AppendEntriesReply struct {
38 | Term int
39 | Success bool
40 |
41 | ConflictIndex int
42 | ConflictTerm int
43 | }
44 |
--------------------------------------------------------------------------------
/src/raft/rpc_data.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | type RaftRpcData struct {
4 | args interface{}
5 | reply interface{}
6 | }
7 |
8 | func (rrd *RaftRpcData) GetRequestVote() (*RequestVoteArgs, *RequestVoteReply) {
9 | return rrd.args.(*RequestVoteArgs), rrd.reply.(*RequestVoteReply)
10 | }
11 |
12 | func (rrd *RaftRpcData) GetAppendEntries() (*AppendEntriesArgs, *AppendEntriesReply) {
13 | return rrd.args.(*AppendEntriesArgs), rrd.reply.(*AppendEntriesReply)
14 | }
15 |
16 | func NewRequestVoteData(args *RequestVoteArgs, reply *RequestVoteReply) *RaftRpcData {
17 | return &RaftRpcData{
18 | args: args,
19 | reply: reply,
20 | }
21 | }
22 |
23 | func NewAppendEntriesData(args *AppendEntriesArgs, reply *AppendEntriesReply) *RaftRpcData {
24 | return &RaftRpcData{
25 | args: args,
26 | reply: reply,
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/src/raft/util.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | import "log"
4 |
5 | // Debugging
6 | const Debug = 0
7 |
8 | func DPrintf(format string, a ...interface{}) (n int, err error) {
9 | if Debug > 0 {
10 | log.Printf(format, a...)
11 | }
12 | return
13 | }
14 |
--------------------------------------------------------------------------------
/src/shardkv/client.go:
--------------------------------------------------------------------------------
1 | package shardkv
2 |
3 | //
4 | // client code to talk to a sharded key/value service.
5 | //
6 | // the client first talks to the shardmaster to find out
7 | // the assignment of shards (keys) to groups, and then
8 | // talks to the group that holds the key's shard.
9 | //
10 |
11 | import "../labrpc"
12 | import "crypto/rand"
13 | import "math/big"
14 | import "../shardmaster"
15 | import "time"
16 |
17 | //
18 | // which shard is a key in?
19 | // please use this function,
20 | // and please do not change it.
21 | //
22 | func key2shard(key string) int {
23 | shard := 0
24 | if len(key) > 0 {
25 | shard = int(key[0])
26 | }
27 | shard %= shardmaster.NShards
28 | return shard
29 | }
30 |
31 | func nrand() int64 {
32 | max := big.NewInt(int64(1) << 62)
33 | bigx, _ := rand.Int(rand.Reader, max)
34 | x := bigx.Int64()
35 | return x
36 | }
37 |
38 | type Clerk struct {
39 | sm *shardmaster.Clerk
40 | config shardmaster.Config
41 | make_end func(string) *labrpc.ClientEnd
42 | // You will have to modify this struct.
43 | }
44 |
45 | //
46 | // the tester calls MakeClerk.
47 | //
48 | // masters[] is needed to call shardmaster.MakeClerk().
49 | //
50 | // make_end(servername) turns a server name from a
51 | // Config.Groups[gid][i] into a labrpc.ClientEnd on which you can
52 | // send RPCs.
53 | //
54 | func MakeClerk(masters []*labrpc.ClientEnd, make_end func(string) *labrpc.ClientEnd) *Clerk {
55 | ck := new(Clerk)
56 | ck.sm = shardmaster.MakeClerk(masters)
57 | ck.make_end = make_end
58 | // You'll have to add code here.
59 | return ck
60 | }
61 |
62 | //
63 | // fetch the current value for a key.
64 | // returns "" if the key does not exist.
65 | // keeps trying forever in the face of all other errors.
66 | // You will have to modify this function.
67 | //
68 | func (ck *Clerk) Get(key string) string {
69 | args := GetArgs{}
70 | args.Key = key
71 |
72 | for {
73 | shard := key2shard(key)
74 | gid := ck.config.Shards[shard]
75 | if servers, ok := ck.config.Groups[gid]; ok {
76 | // try each server for the shard.
77 | for si := 0; si < len(servers); si++ {
78 | srv := ck.make_end(servers[si])
79 | var reply GetReply
80 | ok := srv.Call("ShardKV.Get", &args, &reply)
81 | if ok && (reply.Err == OK || reply.Err == ErrNoKey) {
82 | return reply.Value
83 | }
84 | if ok && (reply.Err == ErrWrongGroup) {
85 | break
86 | }
87 | // ... not ok, or ErrWrongLeader
88 | }
89 | }
90 | time.Sleep(100 * time.Millisecond)
91 | // ask master for the latest configuration.
92 | ck.config = ck.sm.Query(-1)
93 | }
94 |
95 | return ""
96 | }
97 |
98 | //
99 | // shared by Put and Append.
100 | // You will have to modify this function.
101 | //
102 | func (ck *Clerk) PutAppend(key string, value string, op string) {
103 | args := PutAppendArgs{}
104 | args.Key = key
105 | args.Value = value
106 | args.Op = op
107 |
108 |
109 | for {
110 | shard := key2shard(key)
111 | gid := ck.config.Shards[shard]
112 | if servers, ok := ck.config.Groups[gid]; ok {
113 | for si := 0; si < len(servers); si++ {
114 | srv := ck.make_end(servers[si])
115 | var reply PutAppendReply
116 | ok := srv.Call("ShardKV.PutAppend", &args, &reply)
117 | if ok && reply.Err == OK {
118 | return
119 | }
120 | if ok && reply.Err == ErrWrongGroup {
121 | break
122 | }
123 | // ... not ok, or ErrWrongLeader
124 | }
125 | }
126 | time.Sleep(100 * time.Millisecond)
127 | // ask master for the latest configuration.
128 | ck.config = ck.sm.Query(-1)
129 | }
130 | }
131 |
132 | func (ck *Clerk) Put(key string, value string) {
133 | ck.PutAppend(key, value, "Put")
134 | }
135 | func (ck *Clerk) Append(key string, value string) {
136 | ck.PutAppend(key, value, "Append")
137 | }
138 |
--------------------------------------------------------------------------------
/src/shardkv/common.go:
--------------------------------------------------------------------------------
1 | package shardkv
2 |
3 | //
4 | // Sharded key/value server.
5 | // Lots of replica groups, each running op-at-a-time paxos.
6 | // Shardmaster decides which group serves each shard.
7 | // Shardmaster may change shard assignment from time to time.
8 | //
9 | // You will have to modify these definitions.
10 | //
11 |
12 | const (
13 | OK = "OK"
14 | ErrNoKey = "ErrNoKey"
15 | ErrWrongGroup = "ErrWrongGroup"
16 | ErrWrongLeader = "ErrWrongLeader"
17 | )
18 |
19 | type Err string
20 |
21 | // Put or Append
22 | type PutAppendArgs struct {
23 | // You'll have to add definitions here.
24 | Key string
25 | Value string
26 | Op string // "Put" or "Append"
27 | // You'll have to add definitions here.
28 | // Field names must start with capital letters,
29 | // otherwise RPC will break.
30 | }
31 |
32 | type PutAppendReply struct {
33 | Err Err
34 | }
35 |
36 | type GetArgs struct {
37 | Key string
38 | // You'll have to add definitions here.
39 | }
40 |
41 | type GetReply struct {
42 | Err Err
43 | Value string
44 | }
45 |
--------------------------------------------------------------------------------
/src/shardkv/config.go:
--------------------------------------------------------------------------------
1 | package shardkv
2 |
3 | import "../shardmaster"
4 | import "../labrpc"
5 | import "testing"
6 | import "os"
7 |
8 | // import "log"
9 | import crand "crypto/rand"
10 | import "math/big"
11 | import "math/rand"
12 | import "encoding/base64"
13 | import "sync"
14 | import "runtime"
15 | import "../raft"
16 | import "strconv"
17 | import "fmt"
18 | import "time"
19 |
20 | func randstring(n int) string {
21 | b := make([]byte, 2*n)
22 | crand.Read(b)
23 | s := base64.URLEncoding.EncodeToString(b)
24 | return s[0:n]
25 | }
26 |
27 | func makeSeed() int64 {
28 | max := big.NewInt(int64(1) << 62)
29 | bigx, _ := crand.Int(crand.Reader, max)
30 | x := bigx.Int64()
31 | return x
32 | }
33 |
34 | // Randomize server handles
35 | func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd {
36 | sa := make([]*labrpc.ClientEnd, len(kvh))
37 | copy(sa, kvh)
38 | for i := range sa {
39 | j := rand.Intn(i + 1)
40 | sa[i], sa[j] = sa[j], sa[i]
41 | }
42 | return sa
43 | }
44 |
45 | type group struct {
46 | gid int
47 | servers []*ShardKV
48 | saved []*raft.Persister
49 | endnames [][]string
50 | mendnames [][]string
51 | }
52 |
53 | type config struct {
54 | mu sync.Mutex
55 | t *testing.T
56 | net *labrpc.Network
57 | start time.Time // time at which make_config() was called
58 |
59 | nmasters int
60 | masterservers []*shardmaster.ShardMaster
61 | mck *shardmaster.Clerk
62 |
63 | ngroups int
64 | n int // servers per k/v group
65 | groups []*group
66 |
67 | clerks map[*Clerk][]string
68 | nextClientId int
69 | maxraftstate int
70 | }
71 |
72 | func (cfg *config) checkTimeout() {
73 | // enforce a two minute real-time limit on each test
74 | if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second {
75 | cfg.t.Fatal("test took longer than 120 seconds")
76 | }
77 | }
78 |
79 | func (cfg *config) cleanup() {
80 | for gi := 0; gi < cfg.ngroups; gi++ {
81 | cfg.ShutdownGroup(gi)
82 | }
83 | cfg.net.Cleanup()
84 | cfg.checkTimeout()
85 | }
86 |
87 | // check that no server's log is too big.
88 | func (cfg *config) checklogs() {
89 | for gi := 0; gi < cfg.ngroups; gi++ {
90 | for i := 0; i < cfg.n; i++ {
91 | raft := cfg.groups[gi].saved[i].RaftStateSize()
92 | snap := len(cfg.groups[gi].saved[i].ReadSnapshot())
93 | if cfg.maxraftstate >= 0 && raft > 8*cfg.maxraftstate {
94 | cfg.t.Fatalf("persister.RaftStateSize() %v, but maxraftstate %v",
95 | raft, cfg.maxraftstate)
96 | }
97 | if cfg.maxraftstate < 0 && snap > 0 {
98 | cfg.t.Fatalf("maxraftstate is -1, but snapshot is non-empty!")
99 | }
100 | }
101 | }
102 | }
103 |
104 | // master server name for labrpc.
105 | func (cfg *config) mastername(i int) string {
106 | return "master" + strconv.Itoa(i)
107 | }
108 |
109 | // shard server name for labrpc.
110 | // i'th server of group gid.
111 | func (cfg *config) servername(gid int, i int) string {
112 | return "server-" + strconv.Itoa(gid) + "-" + strconv.Itoa(i)
113 | }
114 |
115 | func (cfg *config) makeClient() *Clerk {
116 | cfg.mu.Lock()
117 | defer cfg.mu.Unlock()
118 |
119 | // ClientEnds to talk to master service.
120 | ends := make([]*labrpc.ClientEnd, cfg.nmasters)
121 | endnames := make([]string, cfg.n)
122 | for j := 0; j < cfg.nmasters; j++ {
123 | endnames[j] = randstring(20)
124 | ends[j] = cfg.net.MakeEnd(endnames[j])
125 | cfg.net.Connect(endnames[j], cfg.mastername(j))
126 | cfg.net.Enable(endnames[j], true)
127 | }
128 |
129 | ck := MakeClerk(ends, func(servername string) *labrpc.ClientEnd {
130 | name := randstring(20)
131 | end := cfg.net.MakeEnd(name)
132 | cfg.net.Connect(name, servername)
133 | cfg.net.Enable(name, true)
134 | return end
135 | })
136 | cfg.clerks[ck] = endnames
137 | cfg.nextClientId++
138 | return ck
139 | }
140 |
141 | func (cfg *config) deleteClient(ck *Clerk) {
142 | cfg.mu.Lock()
143 | defer cfg.mu.Unlock()
144 |
145 | v := cfg.clerks[ck]
146 | for i := 0; i < len(v); i++ {
147 | os.Remove(v[i])
148 | }
149 | delete(cfg.clerks, ck)
150 | }
151 |
152 | // Shutdown i'th server of gi'th group, by isolating it
153 | func (cfg *config) ShutdownServer(gi int, i int) {
154 | cfg.mu.Lock()
155 | defer cfg.mu.Unlock()
156 |
157 | gg := cfg.groups[gi]
158 |
159 | // prevent this server from sending
160 | for j := 0; j < len(gg.servers); j++ {
161 | name := gg.endnames[i][j]
162 | cfg.net.Enable(name, false)
163 | }
164 | for j := 0; j < len(gg.mendnames[i]); j++ {
165 | name := gg.mendnames[i][j]
166 | cfg.net.Enable(name, false)
167 | }
168 |
169 | // disable client connections to the server.
170 | // it's important to do this before creating
171 | // the new Persister in saved[i], to avoid
172 | // the possibility of the server returning a
173 | // positive reply to an Append but persisting
174 | // the result in the superseded Persister.
175 | cfg.net.DeleteServer(cfg.servername(gg.gid, i))
176 |
177 | // a fresh persister, in case old instance
178 | // continues to update the Persister.
179 | // but copy old persister's content so that we always
180 | // pass Make() the last persisted state.
181 | if gg.saved[i] != nil {
182 | gg.saved[i] = gg.saved[i].Copy()
183 | }
184 |
185 | kv := gg.servers[i]
186 | if kv != nil {
187 | cfg.mu.Unlock()
188 | kv.Kill()
189 | cfg.mu.Lock()
190 | gg.servers[i] = nil
191 | }
192 | }
193 |
194 | func (cfg *config) ShutdownGroup(gi int) {
195 | for i := 0; i < cfg.n; i++ {
196 | cfg.ShutdownServer(gi, i)
197 | }
198 | }
199 |
200 | // start i'th server in gi'th group
201 | func (cfg *config) StartServer(gi int, i int) {
202 | cfg.mu.Lock()
203 |
204 | gg := cfg.groups[gi]
205 |
206 | // a fresh set of outgoing ClientEnd names
207 | // to talk to other servers in this group.
208 | gg.endnames[i] = make([]string, cfg.n)
209 | for j := 0; j < cfg.n; j++ {
210 | gg.endnames[i][j] = randstring(20)
211 | }
212 |
213 | // and the connections to other servers in this group.
214 | ends := make([]*labrpc.ClientEnd, cfg.n)
215 | for j := 0; j < cfg.n; j++ {
216 | ends[j] = cfg.net.MakeEnd(gg.endnames[i][j])
217 | cfg.net.Connect(gg.endnames[i][j], cfg.servername(gg.gid, j))
218 | cfg.net.Enable(gg.endnames[i][j], true)
219 | }
220 |
221 | // ends to talk to shardmaster service
222 | mends := make([]*labrpc.ClientEnd, cfg.nmasters)
223 | gg.mendnames[i] = make([]string, cfg.nmasters)
224 | for j := 0; j < cfg.nmasters; j++ {
225 | gg.mendnames[i][j] = randstring(20)
226 | mends[j] = cfg.net.MakeEnd(gg.mendnames[i][j])
227 | cfg.net.Connect(gg.mendnames[i][j], cfg.mastername(j))
228 | cfg.net.Enable(gg.mendnames[i][j], true)
229 | }
230 |
231 | // a fresh persister, so old instance doesn't overwrite
232 | // new instance's persisted state.
233 | // give the fresh persister a copy of the old persister's
234 | // state, so that the spec is that we pass StartKVServer()
235 | // the last persisted state.
236 | if gg.saved[i] != nil {
237 | gg.saved[i] = gg.saved[i].Copy()
238 | } else {
239 | gg.saved[i] = raft.MakePersister()
240 | }
241 | cfg.mu.Unlock()
242 |
243 | gg.servers[i] = StartServer(ends, i, gg.saved[i], cfg.maxraftstate,
244 | gg.gid, mends,
245 | func(servername string) *labrpc.ClientEnd {
246 | name := randstring(20)
247 | end := cfg.net.MakeEnd(name)
248 | cfg.net.Connect(name, servername)
249 | cfg.net.Enable(name, true)
250 | return end
251 | })
252 |
253 | kvsvc := labrpc.MakeService(gg.servers[i])
254 | rfsvc := labrpc.MakeService(gg.servers[i].rf)
255 | srv := labrpc.MakeServer()
256 | srv.AddService(kvsvc)
257 | srv.AddService(rfsvc)
258 | cfg.net.AddServer(cfg.servername(gg.gid, i), srv)
259 | }
260 |
261 | func (cfg *config) StartGroup(gi int) {
262 | for i := 0; i < cfg.n; i++ {
263 | cfg.StartServer(gi, i)
264 | }
265 | }
266 |
267 | func (cfg *config) StartMasterServer(i int) {
268 | // ClientEnds to talk to other master replicas.
269 | ends := make([]*labrpc.ClientEnd, cfg.nmasters)
270 | for j := 0; j < cfg.nmasters; j++ {
271 | endname := randstring(20)
272 | ends[j] = cfg.net.MakeEnd(endname)
273 | cfg.net.Connect(endname, cfg.mastername(j))
274 | cfg.net.Enable(endname, true)
275 | }
276 |
277 | p := raft.MakePersister()
278 |
279 | cfg.masterservers[i] = shardmaster.StartServer(ends, i, p)
280 |
281 | msvc := labrpc.MakeService(cfg.masterservers[i])
282 | rfsvc := labrpc.MakeService(cfg.masterservers[i].Raft())
283 | srv := labrpc.MakeServer()
284 | srv.AddService(msvc)
285 | srv.AddService(rfsvc)
286 | cfg.net.AddServer(cfg.mastername(i), srv)
287 | }
288 |
289 | func (cfg *config) shardclerk() *shardmaster.Clerk {
290 | // ClientEnds to talk to master service.
291 | ends := make([]*labrpc.ClientEnd, cfg.nmasters)
292 | for j := 0; j < cfg.nmasters; j++ {
293 | name := randstring(20)
294 | ends[j] = cfg.net.MakeEnd(name)
295 | cfg.net.Connect(name, cfg.mastername(j))
296 | cfg.net.Enable(name, true)
297 | }
298 |
299 | return shardmaster.MakeClerk(ends)
300 | }
301 |
302 | // tell the shardmaster that a group is joining.
303 | func (cfg *config) join(gi int) {
304 | cfg.joinm([]int{gi})
305 | }
306 |
307 | func (cfg *config) joinm(gis []int) {
308 | m := make(map[int][]string, len(gis))
309 | for _, g := range gis {
310 | gid := cfg.groups[g].gid
311 | servernames := make([]string, cfg.n)
312 | for i := 0; i < cfg.n; i++ {
313 | servernames[i] = cfg.servername(gid, i)
314 | }
315 | m[gid] = servernames
316 | }
317 | cfg.mck.Join(m)
318 | }
319 |
320 | // tell the shardmaster that a group is leaving.
321 | func (cfg *config) leave(gi int) {
322 | cfg.leavem([]int{gi})
323 | }
324 |
325 | func (cfg *config) leavem(gis []int) {
326 | gids := make([]int, 0, len(gis))
327 | for _, g := range gis {
328 | gids = append(gids, cfg.groups[g].gid)
329 | }
330 | cfg.mck.Leave(gids)
331 | }
332 |
333 | var ncpu_once sync.Once
334 |
335 | func make_config(t *testing.T, n int, unreliable bool, maxraftstate int) *config {
336 | ncpu_once.Do(func() {
337 | if runtime.NumCPU() < 2 {
338 | fmt.Printf("warning: only one CPU, which may conceal locking bugs\n")
339 | }
340 | rand.Seed(makeSeed())
341 | })
342 | runtime.GOMAXPROCS(4)
343 | cfg := &config{}
344 | cfg.t = t
345 | cfg.maxraftstate = maxraftstate
346 | cfg.net = labrpc.MakeNetwork()
347 | cfg.start = time.Now()
348 |
349 | // master
350 | cfg.nmasters = 3
351 | cfg.masterservers = make([]*shardmaster.ShardMaster, cfg.nmasters)
352 | for i := 0; i < cfg.nmasters; i++ {
353 | cfg.StartMasterServer(i)
354 | }
355 | cfg.mck = cfg.shardclerk()
356 |
357 | cfg.ngroups = 3
358 | cfg.groups = make([]*group, cfg.ngroups)
359 | cfg.n = n
360 | for gi := 0; gi < cfg.ngroups; gi++ {
361 | gg := &group{}
362 | cfg.groups[gi] = gg
363 | gg.gid = 100 + gi
364 | gg.servers = make([]*ShardKV, cfg.n)
365 | gg.saved = make([]*raft.Persister, cfg.n)
366 | gg.endnames = make([][]string, cfg.n)
367 | gg.mendnames = make([][]string, cfg.nmasters)
368 | for i := 0; i < cfg.n; i++ {
369 | cfg.StartServer(gi, i)
370 | }
371 | }
372 |
373 | cfg.clerks = make(map[*Clerk][]string)
374 | cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid
375 |
376 | cfg.net.Reliable(!unreliable)
377 |
378 | return cfg
379 | }
380 |
--------------------------------------------------------------------------------
/src/shardkv/server.go:
--------------------------------------------------------------------------------
1 | package shardkv
2 |
3 |
4 | // import "../shardmaster"
5 | import "../labrpc"
6 | import "../raft"
7 | import "sync"
8 | import "../labgob"
9 |
10 |
11 |
12 | type Op struct {
13 | // Your definitions here.
14 | // Field names must start with capital letters,
15 | // otherwise RPC will break.
16 | }
17 |
18 | type ShardKV struct {
19 | mu sync.Mutex
20 | me int
21 | rf *raft.Raft
22 | applyCh chan raft.ApplyMsg
23 | make_end func(string) *labrpc.ClientEnd
24 | gid int
25 | masters []*labrpc.ClientEnd
26 | maxraftstate int // snapshot if log grows this big
27 |
28 | // Your definitions here.
29 | }
30 |
31 |
32 | func (kv *ShardKV) Get(args *GetArgs, reply *GetReply) {
33 | // Your code here.
34 | }
35 |
36 | func (kv *ShardKV) PutAppend(args *PutAppendArgs, reply *PutAppendReply) {
37 | // Your code here.
38 | }
39 |
40 | //
41 | // the tester calls Kill() when a ShardKV instance won't
42 | // be needed again. you are not required to do anything
43 | // in Kill(), but it might be convenient to (for example)
44 | // turn off debug output from this instance.
45 | //
46 | func (kv *ShardKV) Kill() {
47 | kv.rf.Kill()
48 | // Your code here, if desired.
49 | }
50 |
51 |
52 | //
53 | // servers[] contains the ports of the servers in this group.
54 | //
55 | // me is the index of the current server in servers[].
56 | //
57 | // the k/v server should store snapshots through the underlying Raft
58 | // implementation, which should call persister.SaveStateAndSnapshot() to
59 | // atomically save the Raft state along with the snapshot.
60 | //
61 | // the k/v server should snapshot when Raft's saved state exceeds
62 | // maxraftstate bytes, in order to allow Raft to garbage-collect its
63 | // log. if maxraftstate is -1, you don't need to snapshot.
64 | //
65 | // gid is this group's GID, for interacting with the shardmaster.
66 | //
67 | // pass masters[] to shardmaster.MakeClerk() so you can send
68 | // RPCs to the shardmaster.
69 | //
70 | // make_end(servername) turns a server name from a
71 | // Config.Groups[gid][i] into a labrpc.ClientEnd on which you can
72 | // send RPCs. You'll need this to send RPCs to other groups.
73 | //
74 | // look at client.go for examples of how to use masters[]
75 | // and make_end() to send RPCs to the group owning a specific shard.
76 | //
77 | // StartServer() must return quickly, so it should start goroutines
78 | // for any long-running work.
79 | //
80 | func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int, gid int, masters []*labrpc.ClientEnd, make_end func(string) *labrpc.ClientEnd) *ShardKV {
81 | // call labgob.Register on structures you want
82 | // Go's RPC library to marshall/unmarshall.
83 | labgob.Register(Op{})
84 |
85 | kv := new(ShardKV)
86 | kv.me = me
87 | kv.maxraftstate = maxraftstate
88 | kv.make_end = make_end
89 | kv.gid = gid
90 | kv.masters = masters
91 |
92 | // Your initialization code here.
93 |
94 | // Use something like this to talk to the shardmaster:
95 | // kv.mck = shardmaster.MakeClerk(kv.masters)
96 |
97 | kv.applyCh = make(chan raft.ApplyMsg)
98 | kv.rf = raft.Make(servers, me, persister, kv.applyCh)
99 |
100 |
101 | return kv
102 | }
103 |
--------------------------------------------------------------------------------
/src/shardmaster/client.go:
--------------------------------------------------------------------------------
1 | package shardmaster
2 |
3 | //
4 | // Shardmaster clerk.
5 | //
6 |
7 | import "../labrpc"
8 | import "time"
9 | import "crypto/rand"
10 | import "math/big"
11 |
12 | type Clerk struct {
13 | servers []*labrpc.ClientEnd
14 | // Your data here.
15 | }
16 |
17 | func nrand() int64 {
18 | max := big.NewInt(int64(1) << 62)
19 | bigx, _ := rand.Int(rand.Reader, max)
20 | x := bigx.Int64()
21 | return x
22 | }
23 |
24 | func MakeClerk(servers []*labrpc.ClientEnd) *Clerk {
25 | ck := new(Clerk)
26 | ck.servers = servers
27 | // Your code here.
28 | return ck
29 | }
30 |
31 | func (ck *Clerk) Query(num int) Config {
32 | args := &QueryArgs{}
33 | // Your code here.
34 | args.Num = num
35 | for {
36 | // try each known server.
37 | for _, srv := range ck.servers {
38 | var reply QueryReply
39 | ok := srv.Call("ShardMaster.Query", args, &reply)
40 | if ok && reply.WrongLeader == false {
41 | return reply.Config
42 | }
43 | }
44 | time.Sleep(100 * time.Millisecond)
45 | }
46 | }
47 |
48 | func (ck *Clerk) Join(servers map[int][]string) {
49 | args := &JoinArgs{}
50 | // Your code here.
51 | args.Servers = servers
52 |
53 | for {
54 | // try each known server.
55 | for _, srv := range ck.servers {
56 | var reply JoinReply
57 | ok := srv.Call("ShardMaster.Join", args, &reply)
58 | if ok && reply.WrongLeader == false {
59 | return
60 | }
61 | }
62 | time.Sleep(100 * time.Millisecond)
63 | }
64 | }
65 |
66 | func (ck *Clerk) Leave(gids []int) {
67 | args := &LeaveArgs{}
68 | // Your code here.
69 | args.GIDs = gids
70 |
71 | for {
72 | // try each known server.
73 | for _, srv := range ck.servers {
74 | var reply LeaveReply
75 | ok := srv.Call("ShardMaster.Leave", args, &reply)
76 | if ok && reply.WrongLeader == false {
77 | return
78 | }
79 | }
80 | time.Sleep(100 * time.Millisecond)
81 | }
82 | }
83 |
84 | func (ck *Clerk) Move(shard int, gid int) {
85 | args := &MoveArgs{}
86 | // Your code here.
87 | args.Shard = shard
88 | args.GID = gid
89 |
90 | for {
91 | // try each known server.
92 | for _, srv := range ck.servers {
93 | var reply MoveReply
94 | ok := srv.Call("ShardMaster.Move", args, &reply)
95 | if ok && reply.WrongLeader == false {
96 | return
97 | }
98 | }
99 | time.Sleep(100 * time.Millisecond)
100 | }
101 | }
102 |
--------------------------------------------------------------------------------
/src/shardmaster/common.go:
--------------------------------------------------------------------------------
1 | package shardmaster
2 |
3 | //
4 | // Master shard server: assigns shards to replication groups.
5 | //
6 | // RPC interface:
7 | // Join(servers) -- add a set of groups (gid -> server-list mapping).
8 | // Leave(gids) -- delete a set of groups.
9 | // Move(shard, gid) -- hand off one shard from current owner to gid.
10 | // Query(num) -> fetch Config # num, or latest config if num==-1.
11 | //
12 | // A Config (configuration) describes a set of replica groups, and the
13 | // replica group responsible for each shard. Configs are numbered. Config
14 | // #0 is the initial configuration, with no groups and all shards
15 | // assigned to group 0 (the invalid group).
16 | //
17 | // You will need to add fields to the RPC argument structs.
18 | //
19 |
20 | // The number of shards.
21 | const NShards = 10
22 |
23 | // A configuration -- an assignment of shards to groups.
24 | // Please don't change this.
25 | type Config struct {
26 | Num int // config number
27 | Shards [NShards]int // shard -> gid
28 | Groups map[int][]string // gid -> servers[]
29 | }
30 |
31 | const (
32 | OK = "OK"
33 | )
34 |
35 | type Err string
36 |
37 | type JoinArgs struct {
38 | Servers map[int][]string // new GID -> servers mappings
39 | }
40 |
41 | type JoinReply struct {
42 | WrongLeader bool
43 | Err Err
44 | }
45 |
46 | type LeaveArgs struct {
47 | GIDs []int
48 | }
49 |
50 | type LeaveReply struct {
51 | WrongLeader bool
52 | Err Err
53 | }
54 |
55 | type MoveArgs struct {
56 | Shard int
57 | GID int
58 | }
59 |
60 | type MoveReply struct {
61 | WrongLeader bool
62 | Err Err
63 | }
64 |
65 | type QueryArgs struct {
66 | Num int // desired config number
67 | }
68 |
69 | type QueryReply struct {
70 | WrongLeader bool
71 | Err Err
72 | Config Config
73 | }
74 |
--------------------------------------------------------------------------------
/src/shardmaster/config.go:
--------------------------------------------------------------------------------
1 | package shardmaster
2 |
3 | import "../labrpc"
4 | import "../raft"
5 | import "testing"
6 | import "os"
7 |
8 | // import "log"
9 | import crand "crypto/rand"
10 | import "math/rand"
11 | import "encoding/base64"
12 | import "sync"
13 | import "runtime"
14 | import "time"
15 |
16 | func randstring(n int) string {
17 | b := make([]byte, 2*n)
18 | crand.Read(b)
19 | s := base64.URLEncoding.EncodeToString(b)
20 | return s[0:n]
21 | }
22 |
23 | // Randomize server handles
24 | func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd {
25 | sa := make([]*labrpc.ClientEnd, len(kvh))
26 | copy(sa, kvh)
27 | for i := range sa {
28 | j := rand.Intn(i + 1)
29 | sa[i], sa[j] = sa[j], sa[i]
30 | }
31 | return sa
32 | }
33 |
34 | type config struct {
35 | mu sync.Mutex
36 | t *testing.T
37 | net *labrpc.Network
38 | n int
39 | servers []*ShardMaster
40 | saved []*raft.Persister
41 | endnames [][]string // names of each server's sending ClientEnds
42 | clerks map[*Clerk][]string
43 | nextClientId int
44 | start time.Time // time at which make_config() was called
45 | }
46 |
47 | func (cfg *config) checkTimeout() {
48 | // enforce a two minute real-time limit on each test
49 | if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second {
50 | cfg.t.Fatal("test took longer than 120 seconds")
51 | }
52 | }
53 |
54 | func (cfg *config) cleanup() {
55 | cfg.mu.Lock()
56 | defer cfg.mu.Unlock()
57 | for i := 0; i < len(cfg.servers); i++ {
58 | if cfg.servers[i] != nil {
59 | cfg.servers[i].Kill()
60 | }
61 | }
62 | cfg.net.Cleanup()
63 | cfg.checkTimeout()
64 | }
65 |
66 | // Maximum log size across all servers
67 | func (cfg *config) LogSize() int {
68 | logsize := 0
69 | for i := 0; i < cfg.n; i++ {
70 | n := cfg.saved[i].RaftStateSize()
71 | if n > logsize {
72 | logsize = n
73 | }
74 | }
75 | return logsize
76 | }
77 |
78 | // attach server i to servers listed in to
79 | // caller must hold cfg.mu
80 | func (cfg *config) connectUnlocked(i int, to []int) {
81 | // log.Printf("connect peer %d to %v\n", i, to)
82 |
83 | // outgoing socket files
84 | for j := 0; j < len(to); j++ {
85 | endname := cfg.endnames[i][to[j]]
86 | cfg.net.Enable(endname, true)
87 | }
88 |
89 | // incoming socket files
90 | for j := 0; j < len(to); j++ {
91 | endname := cfg.endnames[to[j]][i]
92 | cfg.net.Enable(endname, true)
93 | }
94 | }
95 |
96 | func (cfg *config) connect(i int, to []int) {
97 | cfg.mu.Lock()
98 | defer cfg.mu.Unlock()
99 | cfg.connectUnlocked(i, to)
100 | }
101 |
102 | // detach server i from the servers listed in from
103 | // caller must hold cfg.mu
104 | func (cfg *config) disconnectUnlocked(i int, from []int) {
105 | // log.Printf("disconnect peer %d from %v\n", i, from)
106 |
107 | // outgoing socket files
108 | for j := 0; j < len(from); j++ {
109 | if cfg.endnames[i] != nil {
110 | endname := cfg.endnames[i][from[j]]
111 | cfg.net.Enable(endname, false)
112 | }
113 | }
114 |
115 | // incoming socket files
116 | for j := 0; j < len(from); j++ {
117 | if cfg.endnames[j] != nil {
118 | endname := cfg.endnames[from[j]][i]
119 | cfg.net.Enable(endname, false)
120 | }
121 | }
122 | }
123 |
124 | func (cfg *config) disconnect(i int, from []int) {
125 | cfg.mu.Lock()
126 | defer cfg.mu.Unlock()
127 | cfg.disconnectUnlocked(i, from)
128 | }
129 |
130 | func (cfg *config) All() []int {
131 | all := make([]int, cfg.n)
132 | for i := 0; i < cfg.n; i++ {
133 | all[i] = i
134 | }
135 | return all
136 | }
137 |
138 | func (cfg *config) ConnectAll() {
139 | cfg.mu.Lock()
140 | defer cfg.mu.Unlock()
141 | for i := 0; i < cfg.n; i++ {
142 | cfg.connectUnlocked(i, cfg.All())
143 | }
144 | }
145 |
146 | // Sets up 2 partitions with connectivity between servers in each partition.
147 | func (cfg *config) partition(p1 []int, p2 []int) {
148 | cfg.mu.Lock()
149 | defer cfg.mu.Unlock()
150 | // log.Printf("partition servers into: %v %v\n", p1, p2)
151 | for i := 0; i < len(p1); i++ {
152 | cfg.disconnectUnlocked(p1[i], p2)
153 | cfg.connectUnlocked(p1[i], p1)
154 | }
155 | for i := 0; i < len(p2); i++ {
156 | cfg.disconnectUnlocked(p2[i], p1)
157 | cfg.connectUnlocked(p2[i], p2)
158 | }
159 | }
160 |
161 | // Create a clerk with clerk specific server names.
162 | // Give it connections to all of the servers, but for
163 | // now enable only connections to servers in to[].
164 | func (cfg *config) makeClient(to []int) *Clerk {
165 | cfg.mu.Lock()
166 | defer cfg.mu.Unlock()
167 |
168 | // a fresh set of ClientEnds.
169 | ends := make([]*labrpc.ClientEnd, cfg.n)
170 | endnames := make([]string, cfg.n)
171 | for j := 0; j < cfg.n; j++ {
172 | endnames[j] = randstring(20)
173 | ends[j] = cfg.net.MakeEnd(endnames[j])
174 | cfg.net.Connect(endnames[j], j)
175 | }
176 |
177 | ck := MakeClerk(random_handles(ends))
178 | cfg.clerks[ck] = endnames
179 | cfg.nextClientId++
180 | cfg.ConnectClientUnlocked(ck, to)
181 | return ck
182 | }
183 |
184 | func (cfg *config) deleteClient(ck *Clerk) {
185 | cfg.mu.Lock()
186 | defer cfg.mu.Unlock()
187 |
188 | v := cfg.clerks[ck]
189 | for i := 0; i < len(v); i++ {
190 | os.Remove(v[i])
191 | }
192 | delete(cfg.clerks, ck)
193 | }
194 |
195 | // caller should hold cfg.mu
196 | func (cfg *config) ConnectClientUnlocked(ck *Clerk, to []int) {
197 | // log.Printf("ConnectClient %v to %v\n", ck, to)
198 | endnames := cfg.clerks[ck]
199 | for j := 0; j < len(to); j++ {
200 | s := endnames[to[j]]
201 | cfg.net.Enable(s, true)
202 | }
203 | }
204 |
205 | func (cfg *config) ConnectClient(ck *Clerk, to []int) {
206 | cfg.mu.Lock()
207 | defer cfg.mu.Unlock()
208 | cfg.ConnectClientUnlocked(ck, to)
209 | }
210 |
211 | // caller should hold cfg.mu
212 | func (cfg *config) DisconnectClientUnlocked(ck *Clerk, from []int) {
213 | // log.Printf("DisconnectClient %v from %v\n", ck, from)
214 | endnames := cfg.clerks[ck]
215 | for j := 0; j < len(from); j++ {
216 | s := endnames[from[j]]
217 | cfg.net.Enable(s, false)
218 | }
219 | }
220 |
221 | func (cfg *config) DisconnectClient(ck *Clerk, from []int) {
222 | cfg.mu.Lock()
223 | defer cfg.mu.Unlock()
224 | cfg.DisconnectClientUnlocked(ck, from)
225 | }
226 |
227 | // Shutdown a server by isolating it
228 | func (cfg *config) ShutdownServer(i int) {
229 | cfg.mu.Lock()
230 | defer cfg.mu.Unlock()
231 |
232 | cfg.disconnectUnlocked(i, cfg.All())
233 |
234 | // disable client connections to the server.
235 | // it's important to do this before creating
236 | // the new Persister in saved[i], to avoid
237 | // the possibility of the server returning a
238 | // positive reply to an Append but persisting
239 | // the result in the superseded Persister.
240 | cfg.net.DeleteServer(i)
241 |
242 | // a fresh persister, in case old instance
243 | // continues to update the Persister.
244 | // but copy old persister's content so that we always
245 | // pass Make() the last persisted state.
246 | if cfg.saved[i] != nil {
247 | cfg.saved[i] = cfg.saved[i].Copy()
248 | }
249 |
250 | kv := cfg.servers[i]
251 | if kv != nil {
252 | cfg.mu.Unlock()
253 | kv.Kill()
254 | cfg.mu.Lock()
255 | cfg.servers[i] = nil
256 | }
257 | }
258 |
259 | // If restart servers, first call ShutdownServer
260 | func (cfg *config) StartServer(i int) {
261 | cfg.mu.Lock()
262 |
263 | // a fresh set of outgoing ClientEnd names.
264 | cfg.endnames[i] = make([]string, cfg.n)
265 | for j := 0; j < cfg.n; j++ {
266 | cfg.endnames[i][j] = randstring(20)
267 | }
268 |
269 | // a fresh set of ClientEnds.
270 | ends := make([]*labrpc.ClientEnd, cfg.n)
271 | for j := 0; j < cfg.n; j++ {
272 | ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j])
273 | cfg.net.Connect(cfg.endnames[i][j], j)
274 | }
275 |
276 | // a fresh persister, so old instance doesn't overwrite
277 | // new instance's persisted state.
278 | // give the fresh persister a copy of the old persister's
279 | // state, so that the spec is that we pass StartKVServer()
280 | // the last persisted state.
281 | if cfg.saved[i] != nil {
282 | cfg.saved[i] = cfg.saved[i].Copy()
283 | } else {
284 | cfg.saved[i] = raft.MakePersister()
285 | }
286 |
287 | cfg.mu.Unlock()
288 |
289 | cfg.servers[i] = StartServer(ends, i, cfg.saved[i])
290 |
291 | kvsvc := labrpc.MakeService(cfg.servers[i])
292 | rfsvc := labrpc.MakeService(cfg.servers[i].rf)
293 | srv := labrpc.MakeServer()
294 | srv.AddService(kvsvc)
295 | srv.AddService(rfsvc)
296 | cfg.net.AddServer(i, srv)
297 | }
298 |
299 | func (cfg *config) Leader() (bool, int) {
300 | cfg.mu.Lock()
301 | defer cfg.mu.Unlock()
302 |
303 | for i := 0; i < cfg.n; i++ {
304 | _, is_leader := cfg.servers[i].rf.GetState()
305 | if is_leader {
306 | return true, i
307 | }
308 | }
309 | return false, 0
310 | }
311 |
312 | // Partition servers into 2 groups and put current leader in minority
313 | func (cfg *config) make_partition() ([]int, []int) {
314 | _, l := cfg.Leader()
315 | p1 := make([]int, cfg.n/2+1)
316 | p2 := make([]int, cfg.n/2)
317 | j := 0
318 | for i := 0; i < cfg.n; i++ {
319 | if i != l {
320 | if j < len(p1) {
321 | p1[j] = i
322 | } else {
323 | p2[j-len(p1)] = i
324 | }
325 | j++
326 | }
327 | }
328 | p2[len(p2)-1] = l
329 | return p1, p2
330 | }
331 |
332 | func make_config(t *testing.T, n int, unreliable bool) *config {
333 | runtime.GOMAXPROCS(4)
334 | cfg := &config{}
335 | cfg.t = t
336 | cfg.net = labrpc.MakeNetwork()
337 | cfg.n = n
338 | cfg.servers = make([]*ShardMaster, cfg.n)
339 | cfg.saved = make([]*raft.Persister, cfg.n)
340 | cfg.endnames = make([][]string, cfg.n)
341 | cfg.clerks = make(map[*Clerk][]string)
342 | cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid
343 | cfg.start = time.Now()
344 |
345 | // create a full set of KV servers.
346 | for i := 0; i < cfg.n; i++ {
347 | cfg.StartServer(i)
348 | }
349 |
350 | cfg.ConnectAll()
351 |
352 | cfg.net.Reliable(!unreliable)
353 |
354 | return cfg
355 | }
356 |
--------------------------------------------------------------------------------
/src/shardmaster/server.go:
--------------------------------------------------------------------------------
1 | package shardmaster
2 |
3 |
4 | import "../raft"
5 | import "../labrpc"
6 | import "sync"
7 | import "../labgob"
8 |
9 |
10 | type ShardMaster struct {
11 | mu sync.Mutex
12 | me int
13 | rf *raft.Raft
14 | applyCh chan raft.ApplyMsg
15 |
16 | // Your data here.
17 |
18 | configs []Config // indexed by config num
19 | }
20 |
21 |
22 | type Op struct {
23 | // Your data here.
24 | }
25 |
26 |
27 | func (sm *ShardMaster) Join(args *JoinArgs, reply *JoinReply) {
28 | // Your code here.
29 | }
30 |
31 | func (sm *ShardMaster) Leave(args *LeaveArgs, reply *LeaveReply) {
32 | // Your code here.
33 | }
34 |
35 | func (sm *ShardMaster) Move(args *MoveArgs, reply *MoveReply) {
36 | // Your code here.
37 | }
38 |
39 | func (sm *ShardMaster) Query(args *QueryArgs, reply *QueryReply) {
40 | // Your code here.
41 | }
42 |
43 |
44 | //
45 | // the tester calls Kill() when a ShardMaster instance won't
46 | // be needed again. you are not required to do anything
47 | // in Kill(), but it might be convenient to (for example)
48 | // turn off debug output from this instance.
49 | //
50 | func (sm *ShardMaster) Kill() {
51 | sm.rf.Kill()
52 | // Your code here, if desired.
53 | }
54 |
55 | // needed by shardkv tester
56 | func (sm *ShardMaster) Raft() *raft.Raft {
57 | return sm.rf
58 | }
59 |
60 | //
61 | // servers[] contains the ports of the set of
62 | // servers that will cooperate via Paxos to
63 | // form the fault-tolerant shardmaster service.
64 | // me is the index of the current server in servers[].
65 | //
66 | func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister) *ShardMaster {
67 | sm := new(ShardMaster)
68 | sm.me = me
69 |
70 | sm.configs = make([]Config, 1)
71 | sm.configs[0].Groups = map[int][]string{}
72 |
73 | labgob.Register(Op{})
74 | sm.applyCh = make(chan raft.ApplyMsg)
75 | sm.rf = raft.Make(servers, me, persister, sm.applyCh)
76 |
77 | // Your code here.
78 |
79 | return sm
80 | }
81 |
--------------------------------------------------------------------------------
/src/shardmaster/test_test.go:
--------------------------------------------------------------------------------
1 | package shardmaster
2 |
3 | import (
4 | "sync"
5 | "testing"
6 | )
7 |
8 | // import "time"
9 | import "fmt"
10 |
11 | func check(t *testing.T, groups []int, ck *Clerk) {
12 | c := ck.Query(-1)
13 | if len(c.Groups) != len(groups) {
14 | t.Fatalf("wanted %v groups, got %v", len(groups), len(c.Groups))
15 | }
16 |
17 | // are the groups as expected?
18 | for _, g := range groups {
19 | _, ok := c.Groups[g]
20 | if ok != true {
21 | t.Fatalf("missing group %v", g)
22 | }
23 | }
24 |
25 | // any un-allocated shards?
26 | if len(groups) > 0 {
27 | for s, g := range c.Shards {
28 | _, ok := c.Groups[g]
29 | if ok == false {
30 | t.Fatalf("shard %v -> invalid group %v", s, g)
31 | }
32 | }
33 | }
34 |
35 | // more or less balanced sharding?
36 | counts := map[int]int{}
37 | for _, g := range c.Shards {
38 | counts[g] += 1
39 | }
40 | min := 257
41 | max := 0
42 | for g, _ := range c.Groups {
43 | if counts[g] > max {
44 | max = counts[g]
45 | }
46 | if counts[g] < min {
47 | min = counts[g]
48 | }
49 | }
50 | if max > min+1 {
51 | t.Fatalf("max %v too much larger than min %v", max, min)
52 | }
53 | }
54 |
55 | func check_same_config(t *testing.T, c1 Config, c2 Config) {
56 | if c1.Num != c2.Num {
57 | t.Fatalf("Num wrong")
58 | }
59 | if c1.Shards != c2.Shards {
60 | t.Fatalf("Shards wrong")
61 | }
62 | if len(c1.Groups) != len(c2.Groups) {
63 | t.Fatalf("number of Groups is wrong")
64 | }
65 | for gid, sa := range c1.Groups {
66 | sa1, ok := c2.Groups[gid]
67 | if ok == false || len(sa1) != len(sa) {
68 | t.Fatalf("len(Groups) wrong")
69 | }
70 | if ok && len(sa1) == len(sa) {
71 | for j := 0; j < len(sa); j++ {
72 | if sa[j] != sa1[j] {
73 | t.Fatalf("Groups wrong")
74 | }
75 | }
76 | }
77 | }
78 | }
79 |
80 | func TestBasic(t *testing.T) {
81 | const nservers = 3
82 | cfg := make_config(t, nservers, false)
83 | defer cfg.cleanup()
84 |
85 | ck := cfg.makeClient(cfg.All())
86 |
87 | fmt.Printf("Test: Basic leave/join ...\n")
88 |
89 | cfa := make([]Config, 6)
90 | cfa[0] = ck.Query(-1)
91 |
92 | check(t, []int{}, ck)
93 |
94 | var gid1 int = 1
95 | ck.Join(map[int][]string{gid1: []string{"x", "y", "z"}})
96 | check(t, []int{gid1}, ck)
97 | cfa[1] = ck.Query(-1)
98 |
99 | var gid2 int = 2
100 | ck.Join(map[int][]string{gid2: []string{"a", "b", "c"}})
101 | check(t, []int{gid1, gid2}, ck)
102 | cfa[2] = ck.Query(-1)
103 |
104 | cfx := ck.Query(-1)
105 | sa1 := cfx.Groups[gid1]
106 | if len(sa1) != 3 || sa1[0] != "x" || sa1[1] != "y" || sa1[2] != "z" {
107 | t.Fatalf("wrong servers for gid %v: %v\n", gid1, sa1)
108 | }
109 | sa2 := cfx.Groups[gid2]
110 | if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" {
111 | t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2)
112 | }
113 |
114 | ck.Leave([]int{gid1})
115 | check(t, []int{gid2}, ck)
116 | cfa[4] = ck.Query(-1)
117 |
118 | ck.Leave([]int{gid2})
119 | cfa[5] = ck.Query(-1)
120 |
121 | fmt.Printf(" ... Passed\n")
122 |
123 | fmt.Printf("Test: Historical queries ...\n")
124 |
125 | for s := 0; s < nservers; s++ {
126 | cfg.ShutdownServer(s)
127 | for i := 0; i < len(cfa); i++ {
128 | c := ck.Query(cfa[i].Num)
129 | check_same_config(t, c, cfa[i])
130 | }
131 | cfg.StartServer(s)
132 | cfg.ConnectAll()
133 | }
134 |
135 | fmt.Printf(" ... Passed\n")
136 |
137 | fmt.Printf("Test: Move ...\n")
138 | {
139 | var gid3 int = 503
140 | ck.Join(map[int][]string{gid3: []string{"3a", "3b", "3c"}})
141 | var gid4 int = 504
142 | ck.Join(map[int][]string{gid4: []string{"4a", "4b", "4c"}})
143 | for i := 0; i < NShards; i++ {
144 | cf := ck.Query(-1)
145 | if i < NShards/2 {
146 | ck.Move(i, gid3)
147 | if cf.Shards[i] != gid3 {
148 | cf1 := ck.Query(-1)
149 | if cf1.Num <= cf.Num {
150 | t.Fatalf("Move should increase Config.Num")
151 | }
152 | }
153 | } else {
154 | ck.Move(i, gid4)
155 | if cf.Shards[i] != gid4 {
156 | cf1 := ck.Query(-1)
157 | if cf1.Num <= cf.Num {
158 | t.Fatalf("Move should increase Config.Num")
159 | }
160 | }
161 | }
162 | }
163 | cf2 := ck.Query(-1)
164 | for i := 0; i < NShards; i++ {
165 | if i < NShards/2 {
166 | if cf2.Shards[i] != gid3 {
167 | t.Fatalf("expected shard %v on gid %v actually %v",
168 | i, gid3, cf2.Shards[i])
169 | }
170 | } else {
171 | if cf2.Shards[i] != gid4 {
172 | t.Fatalf("expected shard %v on gid %v actually %v",
173 | i, gid4, cf2.Shards[i])
174 | }
175 | }
176 | }
177 | ck.Leave([]int{gid3})
178 | ck.Leave([]int{gid4})
179 | }
180 | fmt.Printf(" ... Passed\n")
181 |
182 | fmt.Printf("Test: Concurrent leave/join ...\n")
183 |
184 | const npara = 10
185 | var cka [npara]*Clerk
186 | for i := 0; i < len(cka); i++ {
187 | cka[i] = cfg.makeClient(cfg.All())
188 | }
189 | gids := make([]int, npara)
190 | ch := make(chan bool)
191 | for xi := 0; xi < npara; xi++ {
192 | gids[xi] = int((xi * 10) + 100)
193 | go func(i int) {
194 | defer func() { ch <- true }()
195 | var gid int = gids[i]
196 | var sid1 = fmt.Sprintf("s%da", gid)
197 | var sid2 = fmt.Sprintf("s%db", gid)
198 | cka[i].Join(map[int][]string{gid + 1000: []string{sid1}})
199 | cka[i].Join(map[int][]string{gid: []string{sid2}})
200 | cka[i].Leave([]int{gid + 1000})
201 | }(xi)
202 | }
203 | for i := 0; i < npara; i++ {
204 | <-ch
205 | }
206 | check(t, gids, ck)
207 |
208 | fmt.Printf(" ... Passed\n")
209 |
210 | fmt.Printf("Test: Minimal transfers after joins ...\n")
211 |
212 | c1 := ck.Query(-1)
213 | for i := 0; i < 5; i++ {
214 | var gid = int(npara + 1 + i)
215 | ck.Join(map[int][]string{gid: []string{
216 | fmt.Sprintf("%da", gid),
217 | fmt.Sprintf("%db", gid),
218 | fmt.Sprintf("%db", gid)}})
219 | }
220 | c2 := ck.Query(-1)
221 | for i := int(1); i <= npara; i++ {
222 | for j := 0; j < len(c1.Shards); j++ {
223 | if c2.Shards[j] == i {
224 | if c1.Shards[j] != i {
225 | t.Fatalf("non-minimal transfer after Join()s")
226 | }
227 | }
228 | }
229 | }
230 |
231 | fmt.Printf(" ... Passed\n")
232 |
233 | fmt.Printf("Test: Minimal transfers after leaves ...\n")
234 |
235 | for i := 0; i < 5; i++ {
236 | ck.Leave([]int{int(npara + 1 + i)})
237 | }
238 | c3 := ck.Query(-1)
239 | for i := int(1); i <= npara; i++ {
240 | for j := 0; j < len(c1.Shards); j++ {
241 | if c2.Shards[j] == i {
242 | if c3.Shards[j] != i {
243 | t.Fatalf("non-minimal transfer after Leave()s")
244 | }
245 | }
246 | }
247 | }
248 |
249 | fmt.Printf(" ... Passed\n")
250 | }
251 |
252 | func TestMulti(t *testing.T) {
253 | const nservers = 3
254 | cfg := make_config(t, nservers, false)
255 | defer cfg.cleanup()
256 |
257 | ck := cfg.makeClient(cfg.All())
258 |
259 | fmt.Printf("Test: Multi-group join/leave ...\n")
260 |
261 | cfa := make([]Config, 6)
262 | cfa[0] = ck.Query(-1)
263 |
264 | check(t, []int{}, ck)
265 |
266 | var gid1 int = 1
267 | var gid2 int = 2
268 | ck.Join(map[int][]string{
269 | gid1: []string{"x", "y", "z"},
270 | gid2: []string{"a", "b", "c"},
271 | })
272 | check(t, []int{gid1, gid2}, ck)
273 | cfa[1] = ck.Query(-1)
274 |
275 | var gid3 int = 3
276 | ck.Join(map[int][]string{gid3: []string{"j", "k", "l"}})
277 | check(t, []int{gid1, gid2, gid3}, ck)
278 | cfa[2] = ck.Query(-1)
279 |
280 | cfx := ck.Query(-1)
281 | sa1 := cfx.Groups[gid1]
282 | if len(sa1) != 3 || sa1[0] != "x" || sa1[1] != "y" || sa1[2] != "z" {
283 | t.Fatalf("wrong servers for gid %v: %v\n", gid1, sa1)
284 | }
285 | sa2 := cfx.Groups[gid2]
286 | if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" {
287 | t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2)
288 | }
289 | sa3 := cfx.Groups[gid3]
290 | if len(sa3) != 3 || sa3[0] != "j" || sa3[1] != "k" || sa3[2] != "l" {
291 | t.Fatalf("wrong servers for gid %v: %v\n", gid3, sa3)
292 | }
293 |
294 | ck.Leave([]int{gid1, gid3})
295 | check(t, []int{gid2}, ck)
296 | cfa[3] = ck.Query(-1)
297 |
298 | cfx = ck.Query(-1)
299 | sa2 = cfx.Groups[gid2]
300 | if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" {
301 | t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2)
302 | }
303 |
304 | ck.Leave([]int{gid2})
305 |
306 | fmt.Printf(" ... Passed\n")
307 |
308 | fmt.Printf("Test: Concurrent multi leave/join ...\n")
309 |
310 | const npara = 10
311 | var cka [npara]*Clerk
312 | for i := 0; i < len(cka); i++ {
313 | cka[i] = cfg.makeClient(cfg.All())
314 | }
315 | gids := make([]int, npara)
316 | var wg sync.WaitGroup
317 | for xi := 0; xi < npara; xi++ {
318 | wg.Add(1)
319 | gids[xi] = int(xi + 1000)
320 | go func(i int) {
321 | defer wg.Done()
322 | var gid int = gids[i]
323 | cka[i].Join(map[int][]string{
324 | gid: []string{
325 | fmt.Sprintf("%da", gid),
326 | fmt.Sprintf("%db", gid),
327 | fmt.Sprintf("%dc", gid)},
328 | gid + 1000: []string{fmt.Sprintf("%da", gid+1000)},
329 | gid + 2000: []string{fmt.Sprintf("%da", gid+2000)},
330 | })
331 | cka[i].Leave([]int{gid + 1000, gid + 2000})
332 | }(xi)
333 | }
334 | wg.Wait()
335 | check(t, gids, ck)
336 |
337 | fmt.Printf(" ... Passed\n")
338 |
339 | fmt.Printf("Test: Minimal transfers after multijoins ...\n")
340 |
341 | c1 := ck.Query(-1)
342 | m := make(map[int][]string)
343 | for i := 0; i < 5; i++ {
344 | var gid = npara + 1 + i
345 | m[gid] = []string{fmt.Sprintf("%da", gid), fmt.Sprintf("%db", gid)}
346 | }
347 | ck.Join(m)
348 | c2 := ck.Query(-1)
349 | for i := int(1); i <= npara; i++ {
350 | for j := 0; j < len(c1.Shards); j++ {
351 | if c2.Shards[j] == i {
352 | if c1.Shards[j] != i {
353 | t.Fatalf("non-minimal transfer after Join()s")
354 | }
355 | }
356 | }
357 | }
358 |
359 | fmt.Printf(" ... Passed\n")
360 |
361 | fmt.Printf("Test: Minimal transfers after multileaves ...\n")
362 |
363 | var l []int
364 | for i := 0; i < 5; i++ {
365 | l = append(l, npara+1+i)
366 | }
367 | ck.Leave(l)
368 | c3 := ck.Query(-1)
369 | for i := int(1); i <= npara; i++ {
370 | for j := 0; j < len(c1.Shards); j++ {
371 | if c2.Shards[j] == i {
372 | if c3.Shards[j] != i {
373 | t.Fatalf("non-minimal transfer after Leave()s")
374 | }
375 | }
376 | }
377 | }
378 |
379 | fmt.Printf(" ... Passed\n")
380 | }
381 |
--------------------------------------------------------------------------------