├── .gitignore
├── Makefile
├── README.md
├── raft-by-bertyoung.pptx
└── src
    ├── .gitignore
    ├── kvraft
        ├── client.go
        ├── common.go
        ├── config.go
        ├── faq
        ├── lab3
        ├── server.go
        ├── snapshot_hints
        └── test_test.go
    ├── labgob
        ├── labgob.go
        └── test_test.go
    ├── labrpc
        ├── labrpc.go
        └── test_test.go
    ├── linearizability
        ├── bitset.go
        ├── linearizability.go
        ├── model.go
        └── models.go
    ├── main
        ├── diskvd.go
        ├── ii.go
        ├── lockc.go
        ├── lockd.go
        ├── mr-challenge.txt
        ├── mr-testout.txt
        ├── pbc.go
        ├── pbd.go
        ├── pg-being_ernest.txt
        ├── pg-dorian_gray.txt
        ├── pg-frankenstein.txt
        ├── pg-grimm.txt
        ├── pg-huckleberry_finn.txt
        ├── pg-metamorphosis.txt
        ├── pg-sherlock_holmes.txt
        ├── pg-tom_sawyer.txt
        ├── test-ii.sh
        ├── test-mr.sh
        ├── test-wc.sh
        ├── viewd.go
        └── wc.go
    ├── mapreduce
        ├── common.go
        ├── common_map.go
        ├── common_reduce.go
        ├── common_rpc.go
        ├── lab1
        ├── master.go
        ├── master_rpc.go
        ├── master_splitmerge.go
        ├── schedule.go
        ├── test_test.go
        └── worker.go
    ├── raft
        ├── README.md
        ├── agreement-despite-follower-disconnection.log
        ├── config.go
        ├── lab2
        ├── persister.go
        ├── raft.go
        ├── test_test.go
        └── util.go
    ├── shardkv
        ├── client.go
        ├── common.go
        ├── config.go
        ├── server.go
        └── test_test.go
    └── shardmaster
        ├── client.go
        ├── common.go
        ├── config.go
        ├── server.go
        └── test_test.go


/.gitignore:
--------------------------------------------------------------------------------
1 | pkg/
2 | api.key
3 | *-handin.tar.gz
4 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # This is the Makefile helping you submit the labs.  
 2 | # Just create 6.824/api.key with your API key in it, 
 3 | # and submit your lab with the following command: 
 4 | #     $ make [lab1|lab2a|lab2b|lab2c|lab3a|lab3b|lab4a|lab4b]
 5 | 
 6 | LABS=" lab1 lab2a lab2b lab2c lab3a lab3b lab4a lab4b "
 7 | 
 8 | %:
 9 | 	@echo "Preparing $@-handin.tar.gz"
10 | 	@echo "Checking for committed temporary files..."
11 | 	@if git ls-files | grep -E 'mrtmp|mrinput' > /dev/null; then \
12 | 		echo "" ; \
13 | 		echo "OBS! You have committed some large temporary files:" ; \
14 | 		echo "" ; \
15 | 		git ls-files | grep -E 'mrtmp|mrinput' | sed 's/^/\t/' ; \
16 | 		echo "" ; \
17 | 		echo "Follow the instructions at http://stackoverflow.com/a/308684/472927" ; \
18 | 		echo "to remove them, and then run make again." ; \
19 | 		echo "" ; \
20 | 		exit 1 ; \
21 | 	fi
22 | 	@if echo $(LABS) | grep -q " $@ " ; then \
23 | 		echo "Tarring up your submission..." ; \
24 | 		tar cvzf $@-handin.tar.gz \
25 | 			"--exclude=src/main/pg-*.txt" \
26 | 			"--exclude=src/main/diskvd" \
27 | 			"--exclude=src/mapreduce/824-mrinput-*.txt" \
28 | 			"--exclude=mrtmp.*" \
29 | 			"--exclude=src/main/diff.out" \
30 | 			Makefile src; \
31 | 		if ! test -e api.key ; then \
32 | 			echo "Missing $(PWD)/api.key. Please create the file with your key in it or submit the $@-handin.tar.gz via the web interface."; \
33 | 		else \
34 | 			echo "Are you sure you want to submit $@? Enter 'yes' to continue:"; \
35 | 			read line; \
36 | 			if test "$$line" != "yes" ; then echo "Giving up submission"; exit; fi; \
37 | 			if test `stat -c "%s" "$@-handin.tar.gz" 2>/dev/null || stat -f "%z" "$@-handin.tar.gz"` -ge 20971520 ; then echo "File exceeds 20MB."; exit; fi; \
38 | 			mv api.key api.key.fix ; \
39 | 			cat api.key.fix | tr -d '\n' > api.key ; \
40 | 			rm api.key.fix ; \
41 | 			curl -F file=@$@-handin.tar.gz -F "key=<api.key" \
42 | 			https://6824.scripts.mit.edu/2018/handin.py/upload > /dev/null || { \
43 | 				echo ; \
44 | 				echo "Submit seems to have failed."; \
45 | 				echo "Please upload the tarball manually on the submission website."; } \
46 | 		fi; \
47 | 	else \
48 | 		echo "Bad target $@. Usage: make [$(LABS)]"; \
49 | 	fi
50 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 2017年春季MIT分布式系统课程labs
 2 | 
 3 | - [x] Lab 1: [mapreduce](src/mapreduce)
 4 | 
 5 | - [x] Lab 2: [raft](src/raft)
 6 | 
 7 |      - [x] Part 2A: Raft election
 8 | 
 9 |      - [x] Part 2B: Raft log replication
10 | 
11 |      - [x] Part 2C: Raft state persistence
12 | 
13 | - [x] Lab 3: [kvraft](src/kvraft)
14 | 
15 |      - [x] Part 3A: Key/value service without log compaction
16 | 
17 |      - [x] Part 3B: Key/value service with log compaction
18 | 
19 | - [x] Lab 4: Sharded Key/Value Service
20 | 
21 |      - [x] Part 4A: Shard Master
22 | 
23 |      - [x] Part 4B: Sharded Key/Value Server
24 | 
25 |      - [ ] Challenge: Garbage collection of state
26 | 
27 |      - [ ] Challenge: Client requests during configuration changes
28 | 
29 | 


--------------------------------------------------------------------------------
/raft-by-bertyoung.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/loveyacper/raft_for_dummies/30047f10b2e9851ac17cc006e2ce38486d37de8c/raft-by-bertyoung.pptx


--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
1 | *.*/
2 | mrtmp.*
3 | 824-mrinput-*.txt
4 | /main/diff.out
5 | /mapreduce/x.txt
6 | /pbservice/x.txt
7 | /kvpaxos/x.txt
8 | 


--------------------------------------------------------------------------------
/src/kvraft/client.go:
--------------------------------------------------------------------------------
  1 | package raftkv
  2 | 
  3 | import "labrpc"
  4 | import "crypto/rand"
  5 | import "math/big"
  6 | import "sync/atomic"
  7 | 
  8 | import "time"
  9 | 
 10 | var clientIdGen = int32(0)
 11 | 
 12 | const maxTry = 3
 13 | 
 14 | type Clerk struct {
 15 |     servers []*labrpc.ClientEnd
 16 |     // You will have to modify this struct.
 17 |     leader int // hint or probe, TODO: server no use this field
 18 |     fail int // successive fail calls for leader
 19 |     clientId int32 // client id, init by clientIdGen
 20 |     reqId int64 // req id
 21 | }
 22 | 
 23 | func nrand() int64 {
 24 |     // What's this??? I don't use it
 25 |     max := big.NewInt(int64(1) << 62)
 26 |     bigx, _ := rand.Int(rand.Reader, max)
 27 |     x := bigx.Int64()
 28 |     return x
 29 | }
 30 | 
 31 | func MakeClerk(servers []*labrpc.ClientEnd) *Clerk {
 32 |     ck := new(Clerk)
 33 |     ck.servers = servers
 34 |     // You'll have to add code here.
 35 |     ck.leader = 0
 36 |     ck.clientId = atomic.AddInt32(&clientIdGen, 1)
 37 |     ck.reqId = 1
 38 | 
 39 |     return ck
 40 | }
 41 | 
 42 | //
 43 | // fetch the current value for a key.
 44 | // returns "" if the key does not exist.
 45 | // keeps trying forever in the face of all other errors.
 46 | //
 47 | // you can send an RPC with code like this:
 48 | // ok := ck.servers[i].Call("KVServer.Get", &args, &reply)
 49 | //
 50 | // the types of args and reply (including whether they are pointers)
 51 | // must match the declared types of the RPC handler function's
 52 | // arguments. and reply must be passed as a pointer.
 53 | //
 54 | func (ck *Clerk) Get(key string) string {
 55 |     // You will have to modify this function.
 56 |     args := new(GetArgs)
 57 |     args.Key = key
 58 |     args.ReqID = ck.reqId
 59 |     ck.reqId++
 60 |     args.ID = ck.clientId
 61 | 
 62 |     fail := 0
 63 |     for {
 64 |         reply := new(GetReply)
 65 |         var done = make(chan bool)
 66 |         go func(leader int) {
 67 |             ok := ck.servers[leader].Call("KVServer.Get", args, reply)
 68 |             done<-ok
 69 |         }(ck.leader)
 70 | 
 71 |         var ok = true
 72 |         var timeout = false
 73 |         select {
 74 |         case <-time.After(RpcTimeout):
 75 |             timeout = true
 76 | 
 77 |         case ok = <-done:
 78 |         }
 79 | 
 80 |         if !timeout && ok && !reply.WrongLeader {
 81 |             DPrintf("[client %d] succ GET: %s = %s", ck.clientId, key, reply.Value)
 82 |             return reply.Value
 83 |         } else {
 84 |             fail++
 85 |             if timeout || reply.WrongLeader || fail >= maxTry {
 86 |                 fail = 0
 87 |                 ck.leader++
 88 |                 if ck.leader >= len(ck.servers) {
 89 |                     ck.leader = 0
 90 |                 }
 91 |             }
 92 |         }
 93 | 
 94 |         time.Sleep(1 * time.Millisecond)
 95 |         if fail == 0 {
 96 |             DPrintf("[client %d] retry GET to another server %d\n", ck.clientId, ck.leader)
 97 |         }
 98 |     }
 99 | }
100 | 
101 | //
102 | // shared by Put and Append.
103 | //
104 | // you can send an RPC with code like this:
105 | // ok := ck.servers[i].Call("KVServer.PutAppend", &args, &reply)
106 | //
107 | // the types of args and reply (including whether they are pointers)
108 | // must match the declared types of the RPC handler function's
109 | // arguments. and reply must be passed as a pointer.
110 | //
111 | func (ck *Clerk) PutAppend(key string, value string, op string) {
112 |     // You will have to modify this function.
113 |     DPrintf("[client %d] try %s %s = %s to server %d\n", ck.clientId, op, key, value, ck.leader)
114 |     args := new(PutAppendArgs)
115 |     args.Key = key
116 |     args.Value = value
117 |     args.Op = op
118 |     args.ReqID = ck.reqId
119 |     ck.reqId++
120 |     args.ID = ck.clientId
121 | 
122 |     fail := 0
123 |     for {
124 |         reply := new(PutAppendReply)
125 |         var done = make(chan bool)
126 |         go func(leader int) {
127 |             ok := ck.servers[leader].Call("KVServer.PutAppend", args, reply)
128 |             done<-ok
129 |         }(ck.leader)
130 | 
131 |         var ok = false
132 |         var timeout = false
133 |         select {
134 |             case <-time.After(RpcTimeout):
135 |             timeout = true
136 | 
137 |         case ok = <-done:
138 |         }
139 | 
140 |         if !timeout && ok && !reply.WrongLeader {
141 |             DPrintf("[client %d] succ %s %s = %s\n", ck.clientId, op, key, value)
142 |             return
143 |         } else {
144 |             fail++
145 |             if timeout || reply.WrongLeader || fail >= maxTry {
146 |                 fail = 0
147 |                 ck.leader++
148 |                 if ck.leader >= len(ck.servers) {
149 |                     ck.leader = 0
150 |                 }
151 |             }
152 |         }
153 | 
154 |         time.Sleep(1 * time.Millisecond)
155 |         if fail == 0 {
156 |             DPrintf("[client %d] retry PUT/APPEND to another server %d\n", ck.clientId, ck.leader)
157 |         }
158 |     }
159 | }
160 | 
161 | func (ck *Clerk) Put(key string, value string) {
162 |     ck.PutAppend(key, value, "Put")
163 | }
164 | 
165 | func (ck *Clerk) Append(key string, value string) {
166 |     ck.PutAppend(key, value, "Append")
167 | }
168 | 
169 | 


--------------------------------------------------------------------------------
/src/kvraft/common.go:
--------------------------------------------------------------------------------
 1 | package raftkv
 2 | 
 3 | import "time"
 4 | 
 5 | const (
 6 |     OK       = "OK"
 7 |     ErrNoKey = "ErrNoKey"
 8 |     ErrNotLeader = "ErrNotLeader"
 9 |     ErrInvalidOp = "ErrInvalidOp"
10 |     ErrDuplicateReq = "ErrDuplicateReq"
11 | )
12 | 
13 | type Err string
14 | 
15 | // Put or Append
16 | type PutAppendArgs struct {
17 |     Key   string
18 |     Value string
19 |     Op    string // "Put" or "Append"
20 |     // You'll have to add definitions here.
21 |     // Field names must start with capital letters,
22 |     // otherwise RPC will break.
23 |     ID int32 // client id
24 |     ReqID int64
25 | }
26 | 
27 | type PutAppendReply struct {
28 |     WrongLeader bool
29 |     Err         Err
30 |     ID int32
31 |     RspID int64
32 | }
33 | 
34 | type GetArgs struct {
35 |     Key string
36 |     // You'll have to add definitions here.
37 |     ID int32
38 |     ReqID int64
39 | }
40 | 
41 | type GetReply struct {
42 |     WrongLeader bool
43 |     Err         Err
44 |     Value       string
45 |     ID int32
46 |     RspID int64
47 | }
48 | 
49 | const RpcTimeout time.Duration = 1000 * time.Millisecond
50 | 
51 | 


--------------------------------------------------------------------------------
/src/kvraft/config.go:
--------------------------------------------------------------------------------
  1 | package raftkv
  2 | 
  3 | import "labrpc"
  4 | import "testing"
  5 | import "os"
  6 | 
  7 | // import "log"
  8 | import crand "crypto/rand"
  9 | import "math/big"
 10 | import "math/rand"
 11 | import "encoding/base64"
 12 | import "sync"
 13 | import "runtime"
 14 | import "raft"
 15 | import "fmt"
 16 | import "time"
 17 | import "sync/atomic"
 18 | 
 19 | func randstring(n int) string {
 20 | 	b := make([]byte, 2*n)
 21 | 	crand.Read(b)
 22 | 	s := base64.URLEncoding.EncodeToString(b)
 23 | 	return s[0:n]
 24 | }
 25 | 
 26 | func makeSeed() int64 {
 27 | 	max := big.NewInt(int64(1) << 62)
 28 | 	bigx, _ := crand.Int(crand.Reader, max)
 29 | 	x := bigx.Int64()
 30 | 	return x
 31 | }
 32 | 
 33 | // Randomize server handles
 34 | func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd {
 35 | 	sa := make([]*labrpc.ClientEnd, len(kvh))
 36 | 	copy(sa, kvh)
 37 | 	for i := range sa {
 38 | 		j := rand.Intn(i + 1)
 39 | 		sa[i], sa[j] = sa[j], sa[i]
 40 | 	}
 41 | 	return sa
 42 | }
 43 | 
 44 | type config struct {
 45 | 	mu           sync.Mutex
 46 | 	t            *testing.T
 47 | 	net          *labrpc.Network
 48 | 	n            int
 49 | 	kvservers    []*KVServer
 50 | 	saved        []*raft.Persister
 51 | 	endnames     [][]string // names of each server's sending ClientEnds
 52 | 	clerks       map[*Clerk][]string
 53 | 	nextClientId int
 54 | 	maxraftstate int
 55 | 	start        time.Time // time at which make_config() was called
 56 | 	// begin()/end() statistics
 57 | 	t0    time.Time // time at which test_test.go called cfg.begin()
 58 | 	rpcs0 int       // rpcTotal() at start of test
 59 | 	ops   int32     // number of clerk get/put/append method calls
 60 | }
 61 | 
 62 | func (cfg *config) checkTimeout() {
 63 | 	// enforce a two minute real-time limit on each test
 64 | 	if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second {
 65 | 		cfg.t.Fatal("test took longer than 120 seconds")
 66 | 	}
 67 | }
 68 | 
 69 | func (cfg *config) cleanup() {
 70 | 	cfg.mu.Lock()
 71 | 	defer cfg.mu.Unlock()
 72 | 	for i := 0; i < len(cfg.kvservers); i++ {
 73 | 		if cfg.kvservers[i] != nil {
 74 | 			cfg.kvservers[i].Kill()
 75 | 		}
 76 | 	}
 77 | 	cfg.net.Cleanup()
 78 | 	cfg.checkTimeout()
 79 | }
 80 | 
 81 | // Maximum log size across all servers
 82 | func (cfg *config) LogSize() int {
 83 | 	logsize := 0
 84 | 	for i := 0; i < cfg.n; i++ {
 85 | 		n := cfg.saved[i].RaftStateSize()
 86 | 		if n > logsize {
 87 | 			logsize = n
 88 | 		}
 89 | 	}
 90 | 	return logsize
 91 | }
 92 | 
 93 | // Maximum snapshot size across all servers
 94 | func (cfg *config) SnapshotSize() int {
 95 | 	snapshotsize := 0
 96 | 	for i := 0; i < cfg.n; i++ {
 97 | 		n := cfg.saved[i].SnapshotSize()
 98 | 		if n > snapshotsize {
 99 | 			snapshotsize = n
100 | 		}
101 | 	}
102 | 	return snapshotsize
103 | }
104 | 
105 | // attach server i to servers listed in to
106 | // caller must hold cfg.mu
107 | func (cfg *config) connectUnlocked(i int, to []int) {
108 | 	// log.Printf("connect peer %d to %v\n", i, to)
109 | 
110 | 	// outgoing socket files
111 | 	for j := 0; j < len(to); j++ {
112 | 		endname := cfg.endnames[i][to[j]]
113 | 		cfg.net.Enable(endname, true)
114 | 	}
115 | 
116 | 	// incoming socket files
117 | 	for j := 0; j < len(to); j++ {
118 | 		endname := cfg.endnames[to[j]][i]
119 | 		cfg.net.Enable(endname, true)
120 | 	}
121 | }
122 | 
123 | func (cfg *config) connect(i int, to []int) {
124 | 	cfg.mu.Lock()
125 | 	defer cfg.mu.Unlock()
126 | 	cfg.connectUnlocked(i, to)
127 | }
128 | 
129 | // detach server i from the servers listed in from
130 | // caller must hold cfg.mu
131 | func (cfg *config) disconnectUnlocked(i int, from []int) {
132 | 	// log.Printf("disconnect peer %d from %v\n", i, from)
133 | 
134 | 	// outgoing socket files
135 | 	for j := 0; j < len(from); j++ {
136 | 		if cfg.endnames[i] != nil {
137 | 			endname := cfg.endnames[i][from[j]]
138 | 			cfg.net.Enable(endname, false)
139 | 		}
140 | 	}
141 | 
142 | 	// incoming socket files
143 | 	for j := 0; j < len(from); j++ {
144 | 		if cfg.endnames[j] != nil {
145 | 			endname := cfg.endnames[from[j]][i]
146 | 			cfg.net.Enable(endname, false)
147 | 		}
148 | 	}
149 | }
150 | 
151 | func (cfg *config) disconnect(i int, from []int) {
152 | 	cfg.mu.Lock()
153 | 	defer cfg.mu.Unlock()
154 | 	cfg.disconnectUnlocked(i, from)
155 | }
156 | 
157 | func (cfg *config) All() []int {
158 | 	all := make([]int, cfg.n)
159 | 	for i := 0; i < cfg.n; i++ {
160 | 		all[i] = i
161 | 	}
162 | 	return all
163 | }
164 | 
165 | func (cfg *config) ConnectAll() {
166 | 	cfg.mu.Lock()
167 | 	defer cfg.mu.Unlock()
168 | 	for i := 0; i < cfg.n; i++ {
169 | 		cfg.connectUnlocked(i, cfg.All())
170 | 	}
171 | }
172 | 
173 | // Sets up 2 partitions with connectivity between servers in each  partition.
174 | func (cfg *config) partition(p1 []int, p2 []int) {
175 | 	cfg.mu.Lock()
176 | 	defer cfg.mu.Unlock()
177 | 	// log.Printf("partition servers into: %v %v\n", p1, p2)
178 | 	for i := 0; i < len(p1); i++ {
179 | 		cfg.disconnectUnlocked(p1[i], p2)
180 | 		cfg.connectUnlocked(p1[i], p1)
181 | 	}
182 | 	for i := 0; i < len(p2); i++ {
183 | 		cfg.disconnectUnlocked(p2[i], p1)
184 | 		cfg.connectUnlocked(p2[i], p2)
185 | 	}
186 | }
187 | 
188 | // Create a clerk with clerk specific server names.
189 | // Give it connections to all of the servers, but for
190 | // now enable only connections to servers in to[].
191 | func (cfg *config) makeClient(to []int) *Clerk {
192 | 	cfg.mu.Lock()
193 | 	defer cfg.mu.Unlock()
194 | 
195 | 	// a fresh set of ClientEnds.
196 | 	ends := make([]*labrpc.ClientEnd, cfg.n)
197 | 	endnames := make([]string, cfg.n)
198 | 	for j := 0; j < cfg.n; j++ {
199 | 		endnames[j] = randstring(20)
200 | 		ends[j] = cfg.net.MakeEnd(endnames[j])
201 | 		cfg.net.Connect(endnames[j], j)
202 | 	}
203 | 
204 | 	ck := MakeClerk(random_handles(ends))
205 | 	cfg.clerks[ck] = endnames
206 | 	cfg.nextClientId++
207 | 	cfg.ConnectClientUnlocked(ck, to)
208 | 	return ck
209 | }
210 | 
211 | func (cfg *config) deleteClient(ck *Clerk) {
212 | 	cfg.mu.Lock()
213 | 	defer cfg.mu.Unlock()
214 | 
215 | 	v := cfg.clerks[ck]
216 | 	for i := 0; i < len(v); i++ {
217 | 		os.Remove(v[i])
218 | 	}
219 | 	delete(cfg.clerks, ck)
220 | }
221 | 
222 | // caller should hold cfg.mu
223 | func (cfg *config) ConnectClientUnlocked(ck *Clerk, to []int) {
224 | 	// log.Printf("ConnectClient %v to %v\n", ck, to)
225 | 	endnames := cfg.clerks[ck]
226 | 	for j := 0; j < len(to); j++ {
227 | 		s := endnames[to[j]]
228 | 		cfg.net.Enable(s, true)
229 | 	}
230 | }
231 | 
232 | func (cfg *config) ConnectClient(ck *Clerk, to []int) {
233 | 	cfg.mu.Lock()
234 | 	defer cfg.mu.Unlock()
235 | 	cfg.ConnectClientUnlocked(ck, to)
236 | }
237 | 
238 | // caller should hold cfg.mu
239 | func (cfg *config) DisconnectClientUnlocked(ck *Clerk, from []int) {
240 | 	// log.Printf("DisconnectClient %v from %v\n", ck, from)
241 | 	endnames := cfg.clerks[ck]
242 | 	for j := 0; j < len(from); j++ {
243 | 		s := endnames[from[j]]
244 | 		cfg.net.Enable(s, false)
245 | 	}
246 | }
247 | 
248 | func (cfg *config) DisconnectClient(ck *Clerk, from []int) {
249 | 	cfg.mu.Lock()
250 | 	defer cfg.mu.Unlock()
251 | 	cfg.DisconnectClientUnlocked(ck, from)
252 | }
253 | 
254 | // Shutdown a server by isolating it
255 | func (cfg *config) ShutdownServer(i int) {
256 | 	cfg.mu.Lock()
257 | 	defer cfg.mu.Unlock()
258 | 
259 | 	cfg.disconnectUnlocked(i, cfg.All())
260 | 
261 | 	// disable client connections to the server.
262 | 	// it's important to do this before creating
263 | 	// the new Persister in saved[i], to avoid
264 | 	// the possibility of the server returning a
265 | 	// positive reply to an Append but persisting
266 | 	// the result in the superseded Persister.
267 | 	cfg.net.DeleteServer(i)
268 | 
269 | 	// a fresh persister, in case old instance
270 | 	// continues to update the Persister.
271 | 	// but copy old persister's content so that we always
272 | 	// pass Make() the last persisted state.
273 | 	if cfg.saved[i] != nil {
274 | 		cfg.saved[i] = cfg.saved[i].Copy()
275 | 	}
276 | 
277 | 	kv := cfg.kvservers[i]
278 | 	if kv != nil {
279 | 		cfg.mu.Unlock()
280 | 		kv.Kill()
281 | 		cfg.mu.Lock()
282 | 		cfg.kvservers[i] = nil
283 | 	}
284 | }
285 | 
286 | // If restart servers, first call ShutdownServer
287 | func (cfg *config) StartServer(i int) {
288 | 	cfg.mu.Lock()
289 | 
290 | 	// a fresh set of outgoing ClientEnd names.
291 | 	cfg.endnames[i] = make([]string, cfg.n)
292 | 	for j := 0; j < cfg.n; j++ {
293 | 		cfg.endnames[i][j] = randstring(20)
294 | 	}
295 | 
296 | 	// a fresh set of ClientEnds.
297 | 	ends := make([]*labrpc.ClientEnd, cfg.n)
298 | 	for j := 0; j < cfg.n; j++ {
299 | 		ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j])
300 | 		cfg.net.Connect(cfg.endnames[i][j], j)
301 | 	}
302 | 
303 | 	// a fresh persister, so old instance doesn't overwrite
304 | 	// new instance's persisted state.
305 | 	// give the fresh persister a copy of the old persister's
306 | 	// state, so that the spec is that we pass StartKVServer()
307 | 	// the last persisted state.
308 | 	if cfg.saved[i] != nil {
309 | 		cfg.saved[i] = cfg.saved[i].Copy()
310 | 	} else {
311 | 		cfg.saved[i] = raft.MakePersister()
312 | 	}
313 | 	cfg.mu.Unlock()
314 | 
315 | 	cfg.kvservers[i] = StartKVServer(ends, i, cfg.saved[i], cfg.maxraftstate)
316 | 
317 | 	kvsvc := labrpc.MakeService(cfg.kvservers[i])
318 | 	rfsvc := labrpc.MakeService(cfg.kvservers[i].rf)
319 | 	srv := labrpc.MakeServer()
320 | 	srv.AddService(kvsvc)
321 | 	srv.AddService(rfsvc)
322 | 	cfg.net.AddServer(i, srv)
323 | }
324 | 
325 | func (cfg *config) Leader() (bool, int) {
326 | 	cfg.mu.Lock()
327 | 	defer cfg.mu.Unlock()
328 | 
329 | 	for i := 0; i < cfg.n; i++ {
330 | 		_, is_leader := cfg.kvservers[i].rf.GetState()
331 | 		if is_leader {
332 | 			return true, i
333 | 		}
334 | 	}
335 | 	return false, 0
336 | }
337 | 
338 | // Partition servers into 2 groups and put current leader in minority
339 | func (cfg *config) make_partition() ([]int, []int) {
340 | 	_, l := cfg.Leader()
341 | 	p1 := make([]int, cfg.n/2+1)
342 | 	p2 := make([]int, cfg.n/2)
343 | 	j := 0
344 | 	for i := 0; i < cfg.n; i++ {
345 | 		if i != l {
346 | 			if j < len(p1) {
347 | 				p1[j] = i
348 | 			} else {
349 | 				p2[j-len(p1)] = i
350 | 			}
351 | 			j++
352 | 		}
353 | 	}
354 | 	p2[len(p2)-1] = l
355 | 	return p1, p2
356 | }
357 | 
358 | var ncpu_once sync.Once
359 | 
360 | func make_config(t *testing.T, n int, unreliable bool, maxraftstate int) *config {
361 | 	ncpu_once.Do(func() {
362 | 		if runtime.NumCPU() < 2 {
363 | 			fmt.Printf("warning: only one CPU, which may conceal locking bugs\n")
364 | 		}
365 | 		rand.Seed(makeSeed())
366 | 	})
367 | 	runtime.GOMAXPROCS(4)
368 | 	cfg := &config{}
369 | 	cfg.t = t
370 | 	cfg.net = labrpc.MakeNetwork()
371 | 	cfg.n = n
372 | 	cfg.kvservers = make([]*KVServer, cfg.n)
373 | 	cfg.saved = make([]*raft.Persister, cfg.n)
374 | 	cfg.endnames = make([][]string, cfg.n)
375 | 	cfg.clerks = make(map[*Clerk][]string)
376 | 	cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid
377 | 	cfg.maxraftstate = maxraftstate
378 | 	cfg.start = time.Now()
379 | 
380 | 	// create a full set of KV servers.
381 | 	for i := 0; i < cfg.n; i++ {
382 | 		cfg.StartServer(i)
383 | 	}
384 | 
385 | 	cfg.ConnectAll()
386 | 
387 | 	cfg.net.Reliable(!unreliable)
388 | 
389 | 	return cfg
390 | }
391 | 
392 | func (cfg *config) rpcTotal() int {
393 | 	return cfg.net.GetTotalCount()
394 | }
395 | 
396 | // start a Test.
397 | // print the Test message.
398 | // e.g. cfg.begin("Test (2B): RPC counts aren't too high")
399 | func (cfg *config) begin(description string) {
400 | 	fmt.Printf("%s ...\n", description)
401 | 	cfg.t0 = time.Now()
402 | 	cfg.rpcs0 = cfg.rpcTotal()
403 | 	atomic.StoreInt32(&cfg.ops, 0)
404 | }
405 | 
406 | func (cfg *config) op() {
407 | 	atomic.AddInt32(&cfg.ops, 1)
408 | }
409 | 
410 | // end a Test -- the fact that we got here means there
411 | // was no failure.
412 | // print the Passed message,
413 | // and some performance numbers.
414 | func (cfg *config) end() {
415 | 	cfg.checkTimeout()
416 | 	if cfg.t.Failed() == false {
417 | 		t := time.Since(cfg.t0).Seconds()  // real time
418 | 		npeers := cfg.n                    // number of Raft peers
419 | 		nrpc := cfg.rpcTotal() - cfg.rpcs0 // number of RPC sends
420 | 		ops := atomic.LoadInt32(&cfg.ops)  //  number of clerk get/put/append calls
421 | 
422 | 		fmt.Printf("  ... Passed --")
423 | 		fmt.Printf("  %4.1f  %d %5d %4d\n", t, npeers, nrpc, ops)
424 | 	}
425 | }
426 | 


--------------------------------------------------------------------------------
/src/kvraft/lab3:
--------------------------------------------------------------------------------
 1 | Lab 3: 容错的KV服务
 2 | 
 3 | 介绍
 4 | 
 5 | 在这个lab你将使用lab2中的raft库构建一个容错的kv存储。这个kv服务将是一个复制状态机，由一组kv服务器组成，它们之间通过raft日志协调。只要有过半节点存活，
 6 | 你的kv服务就应该能正常工作。
 7 | 
 8 | 你的系统将由client和kv服务组成，每个kv服务也作为一个raft peer的角色。客户端向服务器发送Put,Append,Get,服务器将这些调用转存到raft日志，并按序执行；
 9 | 一个客户端能够向任意的某个服务节点发送请求，但是应该重试其他服务器如果当前的不是leader。如果操作作为raft日志commit了，操作结果应该回复给客户端。如果
10 | 操作失败了，返回错误，客户端重试其他服务节点。
11 | 
12 | 本实验由两部分。A部分，实现的时候不需要考虑日志快照压缩。实验B部分，需要实现snapshot。
13 | 
14 | 你应该仔细读一下paper的7和8章节；
15 | 你可以给Raft ApplyMsg添加字段，也可以给Raft RPC比如AppendEntries添加字段。
16 | 
17 | Part A: kv服务，不需要日志压缩
18 | 
19 | 服务支持三种rpc： Put(k, v), Append(k, arg), Get(k); 若k不存在，Append相当于Put。
20 | 
21 | 你的kvraft客户端代码（src/kvraft/client.go的clerk类）应该尝试不同的kv服务器直到返回成功；只要客户端可以和主分区的raft leader节点通信，操作 最终一定会成功。
22 | 
23 | 你的第一个任务是实现一个在无消息丢失，无服务失败情况下的解决方案。你的服务必须保证Get，Put，Append返回linearizable的结果。即，调用put get append的变动对
24 | 所有客户端看上去是一致的，它们以一个特定的顺序执行，不管是否有服务失败或者leader变化。一个put之后执行的get方法一定能够看到最近一次put的值。完整的调用应该
25 | 有exactly-once语义。
26 | 
27 | 一个可行的计划是，先用kvraft通过Raft去达成一致性的"value"信息去填充server.go中的Op结构体，然后实现putappend和get处理函数；代码在server.go。
28 | 处理函数应该使用Start将Op加入raft日志，当日志被commit时，回复客户端；注意，你不能执行一个操作，直到被commit到日志（即当它到达applyCh的时候）；
29 | 
30 | 当你通过第一个测试"one client"的时候就完成了任务。如果你实现的足够老练，那么"concurrent clients"也通过了。
31 | 
32 | 你的kvraft服务之间不应该直接通信，而是通过raft日志通信。
33 | 
34 | 调用Start()之后，你的kvraft服务需要等待Raft达成共识；达成共识的命令会到达applyCh；你应该仔细思考怎么组织代码让它持续读取applyCh，当putappend和get处理器
35 | 提交命令到raft日志的时候。小心kv服务和raft库之间的死锁。
36 | (实际就是等待applyCh上的消息，底层raft在commit日志后，会向applyCh发命令，通知将日志命令应用到用户的状态机)
37 | 
38 | 你的方案需要处理leader调用了Start()，但是在请求被commit之前就失去了leader身份的情况。这种情况下，你应该让客户端重发请求到一个不同的服务进程，
39 | 直到发现新的leader。一个方法是leader通过发现一个与Start返回的index位置相同的不同请求来意识到自己失去了leader身份，或者Raft.GetState()返回的term
40 | 变化了。如果ex leader自己发生了分区，它不会知道新的leader；但是在相同分区的客户端也不能与新leader通信，所以让client永远等下去是合理的，直到
41 | 分区愈合。
42 | 
43 | 你可能需要修改Clerk来记住最近的leader，避免浪费时间探测leader。
44 | 一个kvraft服务不应该完成一个Get RPC如果它不是majority部分的节点。所以它不会返回stale数据；
45 | 一个简单方案是，让每一个Get也提交Raft日志中，这样你不必要实现第8节介绍的关于只读操作的优化。
46 | 
47 | 假定：请求是串行的，不允许pipeline请求。
48 | 
49 | BERT YOUNG：
50 | 关于处理重复客户端请求的问题：
51 | 为了便于思考，将整个kv服务视作一个单机服务：
52 | 首先，与client直接交互的kvraft节点，每个节点视作一个worker线程；
53 | 每个线程有自己的raft实例维护一份raft日志，由于强一致性，我们将raft实例视为只有一个，多个
54 | worker线程往里提交命令（即调用raft.Start()).（我们可以将raft实例视作一个慢IO设备，毕竟达成共识需要时间:)
55 | raft实例会将commit的日志命令项提交给应用的状态机（通过applyCh）；
56 | 为了避免重复请求，每个worker线程会记录自己apply过的每个客户端的最大命令ID（单调递增的），
57 | 小于等于它的认为是重复请求；命令只有落地到状态机，才会更新ID记录。
58 | 所以关于请求去重就比较好理解了：
59 | 当worker线程(kvraft 的rpc接收处理入口那)接收到客户端请求，先简单检查一下是否重复请求，这步检查
60 | 没有也可以，但检查了是个很大的优化。
61 | 不重复则提交请求给raft实例，当raft实例处理完成（请求命令日志项被commit），则通知我们应用层的状态机；
62 | 因此每个worker线程的状态机只需要此时尝试将命令ID记录，失败则是重复ID，不更改状态机；
63 | 否则更改状态机，ID记录也更新了；
64 | 
65 | 思考一个场景：ABC三个worker线程，A是leader接收请求，准备放在raft实例的第一个槽，但是A发生了
66 | 分区，无法访问B和C了；所以这个请求无法commit，A陷入等待（等待请求被commit）；
67 | 然后客户端自己重试发给了B，此时B已经称为leader，处理请求，也准备放在raft实例的第一个槽；最终成功；
68 | 当分区愈合时，A称为B的follower，最终收到log index 1的apply，A不再等待，但是发现返回的日志项并不是
69 | 自己投递的日志项，A知道自己是个stale leader，请求处理失败了；事实上该请求是第二次重试到B线程才成功的。
70 | 
71 | 
72 | 
73 | Part B: kv服务，需要日志压缩
74 | 
75 | kvraft服务需要经常将当前状态作为snapshot持久化，并丢弃持久化之前的日志。当服务重启时，服务首先读取snapshot，然后replay之后的日志。sec7介绍了快照机制。
76 | 
77 | 你应该花一些时间设计raft库和你的服务之间的接口，让raft库能够丢弃日志。思考下当往log尾部存储的时候，你的raft将怎么操作，怎样丢弃旧日志。
78 | 
79 | kvraft测试将maxraftstate传递给你的StartKVServer(), maxraftstate表示raft持久化状态的字节最大大小，包括log，但不包括快照。
80 | 你应该用maxraftstate和persister.RaftStateSize（）做比较。当kv服务检测到大小超过限制，就应该保存快照，通知raft库产生了快照，所以raft能够抛弃旧日志。
81 | 
82 | 你的raft.go很可能将整个log存放在slice。修改一下，能让它抛弃某一个log index之前的所有日志。
83 | Your raft.go probably keeps the entire log in a Go slice. Modify it so that it can be given a log index, discard the entries before that index, and continue operating while storing only log entries after that index. Make sure you pass all the Raft tests after making these changes.
84 | 
85 | 修改你的kv服务，能够检测底层raft的日志太大，就启动一次快照，告知raft丢弃日志。用persister.SaveSnapshot()保存每一个快照。不要使用文件。
86 | Modify your kvraft server so that it detects when the persisted Raft state grows too large, and then saves a snapshot and tells Raft that it can discard old log entries. Save each snapshot with persister.SaveSnapshot() (don't use files).
87 | 
88 | 修改你的raft leader，当follower需要的日志在leader这里已经被丢弃时，能发送一个InstallSnapshot RPC给follower；
89 | 当follower接受时，你的raft需要发送给kvraft。你可以使用applyCh来实现，看论文中UseSnapshot字段。一个kvraft实例应该在重启的时候从快照初始化状态。
90 | 
91 | Modify your Raft leader code to send an InstallSnapshot RPC to a follower when the leader has discarded the log entries the follower needs. When a follower receives an InstallSnapshot RPC, your Raft code will need to send the included snapshot to its kvraft. You can use the applyCh for this purpose — see the UseSnapshot field. A kvraft instance should restore the snapshot from the persister when it re-starts. Your solution is complete when you pass the remaining tests reliably.
92 | 
93 | The maxraftstate limit applies to the GOB-encoded bytes your Raft passes to persister.SaveRaftState().
94 | 
95 | Remember that your kvserver must be able to detect duplicate client requests across checkpoints, so any state you are using to detect them must be included in the snapshots. Remember to capitalize all fields of structures stored in the snapshot.
96 | Make sure you pass TestSnapshotRPC before moving on to the other Snapshot tests.
97 | A common source of bugs is for the Raft state and the snapshot state to be inconsistent with each other, particularly after re-starts or InstallSnapshots. You are allowed to add methods to your Raft for kvserver to call to help handle InstallSnapshot RPCs.
98 | 
99 | 


--------------------------------------------------------------------------------
/src/kvraft/server.go:
--------------------------------------------------------------------------------
  1 | package raftkv
  2 | 
  3 | import (
  4 |     "labgob"
  5 |     "labrpc"
  6 |     "log"
  7 |     "raft"
  8 |     "sync"
  9 | 
 10 |     "bytes"
 11 | )
 12 | 
 13 | const Debug = 0
 14 | 
 15 | func DPrintf(format string, a ...interface{}) (n int, err error) {
 16 |     if Debug > 0 {
 17 |         log.Printf(format, a...)
 18 |     }
 19 |     return
 20 | }
 21 | 
 22 | // real Command
 23 | type Op struct {
 24 |     // Your definitions here.
 25 |     // Field names must start with capital letters,
 26 |     // otherwise RPC will break.
 27 |     Key       string
 28 |     Value     string
 29 |     Operation string // Get Put or Append
 30 | 
 31 |     //Request context
 32 |     ID    int32
 33 |     ReqID int64
 34 | }
 35 | 
 36 | type KVServer struct {
 37 |     mu sync.Mutex
 38 |     me int
 39 |     // each kv server has a raft instance
 40 |     rf *raft.Raft
 41 | 
 42 |     // when raft commit log, it'll notify applyCh eventually
 43 |     // so we listen this applyCh
 44 |     applyCh chan raft.ApplyMsg
 45 | 
 46 |     maxraftstate int // snapshot if < persister.RaftStateSize()
 47 | 
 48 |     // Your definitions here.
 49 | 
 50 |     // real kv data here
 51 |     data map[string]string
 52 | 
 53 |     // Notify chan for each log index
 54 |     notifyCh map[int]chan Op
 55 |     // request records
 56 |     requests map[int32]int64 // client -> last commited reqID
 57 | 
 58 |     // for exit
 59 |     shutdown chan interface{}
 60 | }
 61 | 
 62 | // check if repeated request
 63 | func (kv *KVServer) isDuplicated(id int32, reqId int64) bool {
 64 |     kv.mu.Lock()
 65 |     defer kv.mu.Unlock()
 66 |     maxSeenReqId, ok := kv.requests[id]
 67 |     if ok {
 68 |         return reqId <= maxSeenReqId
 69 |     }
 70 |     return false
 71 | }
 72 | 
 73 | // true if update success, imply nonrepeat request can be applied to state machine: eg, data field
 74 | func (kv *KVServer) updateIfNotDuplicated(id int32, reqId int64) bool {
 75 |     // must hold lock outside
 76 | 
 77 |     maxSeenReqId, ok := kv.requests[id]
 78 |     if ok {
 79 |         if reqId <= maxSeenReqId {
 80 |             return false
 81 |         }
 82 |     }
 83 | 
 84 |     kv.requests[id] = reqId
 85 |     return true
 86 | }
 87 | 
 88 | // call raft.Start to commit a command as log entry
 89 | func (kv *KVServer) proposeCommand(cmd Op) bool {
 90 |     kv.mu.Lock()
 91 |     // lock kv first, think about:
 92 |     // If no lock with rf.Start, raft maybe very quick to agree.
 93 |     // Then applyRoutine will not find notifyCh on log index,
 94 |     // proposeCommand will block on notifyCh forever.
 95 |     logIndex, _, isLeader := kv.rf.Start(cmd)
 96 |     if !isLeader {
 97 |         kv.mu.Unlock()
 98 |         return false
 99 |     }
100 | 
101 |     // wait command to be commited
102 | 
103 |     // use logIndex because all servers agree on same log index
104 |     ch, ok := kv.notifyCh[logIndex]
105 |     if !ok {
106 |         ch = make(chan Op, 1)
107 |         kv.notifyCh[logIndex] = ch
108 |     }
109 |     kv.mu.Unlock()
110 | 
111 |     // check
112 |     if ch == nil {
113 |         panic("FATAL: chan is nil")
114 |     }
115 | 
116 |     // wait on ch forever, because:
117 |     // If I lose leadership before commit, may be partioned
118 |     // I can't response, so wait until partion healed.
119 |     // Eventually a log will be commited on index, then I'm
120 |     // awaken, but cmd1 is different from cmd, return failed
121 |     // to client.
122 |     // If client retry another leader when I waiting, no matter.
123 |     select {
124 |     case cmd1 := <-ch:
125 |         return cmd1 == cmd // if different log, me is not leader
126 |     }
127 | 
128 |     return false
129 | }
130 | 
131 | func (kv *KVServer) Get(args *GetArgs, reply *GetReply) {
132 |     // Your code here.
133 |     // check if leader, useless but efficient
134 |     _, isLeader := kv.rf.GetState()
135 |     if !isLeader {
136 |         reply.WrongLeader = true
137 |         reply.Err = ErrNotLeader
138 |         return
139 |     }
140 | 
141 |     DPrintf("[server %d] GetRPC isLeader %v, args %v", kv.me, isLeader, args)
142 |     reply.WrongLeader = false
143 |     reply.Err = ""
144 |     reply.ID = args.ID
145 |     reply.RspID = args.ReqID
146 | 
147 |     cmd := Op{}
148 |     cmd.Key = args.Key
149 |     cmd.Value = "" // no use for Get
150 |     cmd.Operation = "Get"
151 |     cmd.ID = args.ID
152 |     cmd.ReqID = args.ReqID
153 | 
154 |     // try commit cmd to raft log
155 |     succ := kv.proposeCommand(cmd)
156 |     if succ {
157 |         kv.mu.Lock()
158 |         if v, ok := kv.data[args.Key]; ok {
159 |             reply.Value = v
160 |         } else {
161 |             reply.Value = ""
162 |             reply.Err = ErrNoKey
163 |         }
164 |         kv.mu.Unlock()
165 |     } else {
166 |         reply.WrongLeader = true
167 |         reply.Err = ErrNotLeader
168 |     }
169 | }
170 | 
171 | func (kv *KVServer) PutAppend(args *PutAppendArgs, reply *PutAppendReply) {
172 |     // Your code here.
173 |     _, isLeader := kv.rf.GetState()
174 |     DPrintf("[server %d] PutAppendRPC isLeader %v, args %v", kv.me, isLeader, args)
175 | 
176 |     reply.WrongLeader = false
177 |     reply.Err = ""
178 |     reply.ID = args.ID
179 |     reply.RspID = args.ReqID
180 | 
181 |     if !isLeader {
182 |         reply.WrongLeader = true
183 |         reply.Err = ErrNotLeader
184 |     } else {
185 |         if args.Op != "Put" && args.Op != "Append" {
186 |             reply.Err = ErrInvalidOp
187 |             return
188 |         }
189 |     }
190 | 
191 |     // check if repeated request, useless but efficient
192 |     duplicate := kv.isDuplicated(args.ID, args.ReqID)
193 |     if duplicate {
194 |         reply.Err = ErrDuplicateReq
195 |         return
196 |     }
197 | 
198 |     cmd := Op{}
199 |     cmd.Key = args.Key
200 |     cmd.Value = args.Value
201 |     cmd.Operation = args.Op
202 |     cmd.ID = args.ID
203 |     cmd.ReqID = args.ReqID
204 | 
205 |     succ := kv.proposeCommand(cmd)
206 |     if !succ {
207 |         reply.WrongLeader = true
208 |         reply.Err = ErrNotLeader
209 |     }
210 | }
211 | 
212 | // when raft commited a log entry, it'll notify me
213 | func (kv *KVServer) applyRoutine() {
214 |     for {
215 |         var op Op
216 |         var applyMsg raft.ApplyMsg
217 | 
218 |         select {
219 |         case <-kv.shutdown:
220 |             DPrintf("[server %d] shutdown applyRoutine", kv.me)
221 |             return
222 | 
223 |         case applyMsg = <-kv.applyCh:
224 |         }
225 | 
226 |         if !applyMsg.CommandValid {
227 |             kv.loadSnapshot(applyMsg.Snapshot)
228 |             continue
229 |         }
230 | 
231 |         op, _ = (applyMsg.Command).(Op)
232 | 
233 |         kv.mu.Lock()
234 |         // Follower & Leader: try apply to state machine, fail if duplicated request
235 |         if op.Operation == "Put" {
236 |             update := kv.updateIfNotDuplicated(op.ID, op.ReqID)
237 |             if update {
238 |                 DPrintf("[server %d] apply for client %d PUT key %s, value %s, logindex %d", kv.me, op.ID, op.Key, op.Value, applyMsg.CommandIndex)
239 |                 kv.data[op.Key] = op.Value
240 |             }
241 |         } else if op.Operation == "Append" {
242 |             update := kv.updateIfNotDuplicated(op.ID, op.ReqID)
243 |             if update {
244 |                 kv.data[op.Key] += op.Value
245 |                 DPrintf("[server %d] apply for client %d APPEND key %s, value %s, now %s, logindex %d", kv.me, op.ID, op.Key, op.Value, kv.data[op.Key], applyMsg.CommandIndex)
246 |             }
247 |         } else {
248 |             // Do nothing for Get, should I cached reply?
249 |         }
250 | 
251 |         ch, ok := kv.notifyCh[applyMsg.CommandIndex]
252 |         if ok {
253 |             //_, isLeader := kv.rf.GetState()
254 |             // likely be leader
255 |             /*
256 |             select {
257 |             case <-ch:
258 |             default:
259 |             }
260 |             */
261 | 
262 |             ch <- op
263 |         }
264 | 
265 |         if kv.maxraftstate > 0 && kv.rf.RaftStateSize() >= kv.maxraftstate {
266 |             DPrintf("(%d) state size %d", kv.me, kv.rf.RaftStateSize())
267 |             // If I keep mu.Lock, the startSnapshot will use raft's lock
268 |             // But raft's applyRoutine is keeping lock and apply msg, he will be blocking with held lock.
269 |             go kv.startSnapshot(applyMsg.CommandIndex)
270 |         }
271 | 
272 |         kv.mu.Unlock()
273 |     }
274 | }
275 | 
276 | // for snapshot
277 | func (kv *KVServer) startSnapshot(lastIndex int) {
278 |     w := new(bytes.Buffer)
279 |     e := labgob.NewEncoder(w)
280 | 
281 |     kv.mu.Lock()
282 |     DPrintf("[server %d] startSnapshot index %d with data %v", kv.me, lastIndex, kv.data)
283 |     e.Encode(kv.data)
284 |     e.Encode(kv.requests)
285 |     kv.mu.Unlock()
286 | 
287 |     data := w.Bytes()
288 |     kv.rf.StartSnapshot(data, lastIndex)
289 | }
290 | 
291 | func (kv *KVServer) loadSnapshot(data []byte) {
292 |     if data == nil || len(data) < 1 { // bootstrap without any state?
293 |         return
294 |     }
295 | 
296 |     r := bytes.NewBuffer(data)
297 |     d := labgob.NewDecoder(r)
298 | 
299 |     kv.mu.Lock()
300 |     defer kv.mu.Unlock()
301 |     kv.data = make(map[string]string)
302 |     kv.requests = make(map[int32]int64)
303 | 
304 |     d.Decode(&kv.data)
305 |     d.Decode(&kv.requests)
306 |     DPrintf("[server %d] load snapshot data %v", kv.me, kv.data)
307 | }
308 | 
309 | //
310 | // the tester calls Kill() when a KVServer instance won't
311 | // be needed again. you are not required to do anything
312 | // in Kill(), but it might be convenient to (for example)
313 | // turn off debug output from this instance.
314 | //
315 | func (kv *KVServer) Kill() {
316 |     kv.rf.Kill()
317 |     // Your code here, if desired.
318 |     close(kv.shutdown)
319 | }
320 | 
321 | //
322 | // servers[] contains the ports of the set of
323 | // servers that will cooperate via Raft to
324 | // form the fault-tolerant key/value service.
325 | // me is the index of the current server in servers[].
326 | // the k/v server should store snapshots through the underlying Raft
327 | // implementation, which should call persister.SaveStateAndSnapshot() to
328 | // atomically save the Raft state along with the snapshot.
329 | // the k/v server should snapshot when Raft's saved state exceeds maxraftstate bytes,
330 | // in order to allow Raft to garbage-collect its log. if maxraftstate is -1,
331 | // you don't need to snapshot.
332 | // StartKVServer() must return quickly, so it should start goroutines
333 | // for any long-running work.
334 | //
335 | func StartKVServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int) *KVServer {
336 |     // call labgob.Register on structures you want
337 |     // Go's RPC library to marshall/unmarshall.
338 |     labgob.Register(Op{})
339 | 
340 |     kv := new(KVServer)
341 |     kv.me = me
342 |     kv.maxraftstate = maxraftstate
343 | 
344 |     // You may need initialization code here.
345 |     kv.data = make(map[string]string)
346 |     kv.requests = make(map[int32]int64)
347 |     kv.notifyCh = make(map[int]chan Op)
348 |     kv.shutdown = make(chan interface{}, 1)
349 | 
350 |     kv.applyCh = make(chan raft.ApplyMsg, 1)
351 |     kv.rf = raft.Make(servers, me, persister, kv.applyCh)
352 | 
353 |     // You may need initialization code here.
354 |     go kv.applyRoutine() // listen on applyCh, apply op to state machine
355 | 
356 |     return kv
357 | }
358 | 


--------------------------------------------------------------------------------
/src/kvraft/snapshot_hints:
--------------------------------------------------------------------------------
  1 | *** topic: log compaction and Snapshots (Lab 3B)
  2 | 
  3 | problem:
  4 |   log will get to be huge -- much larger than state-machine state!
  5 |   will take a long time to re-play on reboot or send to a new server
  6 | 
  7 | luckily:
  8 |   a server doesn't need *both* the complete log *and* the service state
  9 |     the executed part of the log is captured in the state
 10 |     clients only see the state, not the log
 11 |   service state usually much smaller, so let's keep just that
 12 | 
 13 | what constrains how a server discards log entries?
 14 |   can't forget un-committed entries -- might be part of leader's majority
 15 |   can't forget un-executed entries -- not yet reflected in the state
 16 |   executed entries might be needed to bring other servers up to date
 17 | 
 18 | solution: service periodically creates persistent "snapshot"
 19 |   [diagram: service with state, snapshot on disk, raft log, raft persistent]
 20 |   copy of entire state-machine state as of execution of a specific log entry
 21 |     e.g. k/v table
 22 |   service writes snapshot to persistent storage (disk)
 23 |   service tells Raft it is snapshotted through some log index
 24 |   Raft discards log before that index
 25 |   a server can create a snapshot and discard prefix of log at any time
 26 |     e.g. when log grows too long
 27 | 
 28 | relation of snapshot and log
 29 |   snapshot reflects only executed log entries
 30 |     and thus only committed entries
 31 |   so server will only discard committed prefix of log
 32 |     anything not known to be committed will remain in log
 33 | 
 34 | so a server's on-disk state consists of:
 35 |   service's snapshot up to a certain log entry
 36 |   Raft's persisted log w/ following log entries
 37 |   the combination is equivalent to the full log
 38 | 
 39 | what happens on crash+restart?
 40 |   service reads snapshot from disk
 41 |   Raft reads persisted log from disk
 42 |     sends service entries that are committed but not in snapshot
 43 | 
 44 | what if a follower lags and leader has discarded past end of follower's log?
 45 |   nextIndex[i] will back up to start of leader's log
 46 |   so leader can't repair that follower with AppendEntries RPCs
 47 |   thus the InstallSnapshot RPC
 48 |   (Q: why not have leader discard only entries that *all* servers have?)
 49 | 
 50 | what's in an InstallSnapshot RPC? Figures 12, 13
 51 |   term
 52 |   lastIncludedIndex
 53 |   lastIncludedTerm
 54 |   snapshot data
 55 | 
 56 | what does a follower do w/ InstallSnapshot?
 57 |   reject if term is old (not the current leader)
 58 |   reject (ignore) if follower already has last included index/term
 59 |     it's an old/delayed RPC
 60 |   empty the log, replace with fake "prev" entry
 61 |   set lastApplied to lastIncludedIndex
 62 |   replace service state (e.g. k/v table) with snapshot contents
 63 | 
 64 | note that the state and the operation history are roughly equivalent
 65 |   designer can choose which to send
 66 |   e.g. last few operations (log entries) for lagging replica,
 67 |     but entire state (snapshot) for a replica that has lost its disk.
 68 |   still, replica repair can be very expensive, and warrants attention
 69 | 
 70 | The Question:
 71 |   Could a received InstallSnapshot RPC cause the state machine to go
 72 |   backwards in time? That is, could step 8 in Figure 13 cause the state
 73 |   machine to be reset so that it reflects fewer executed operations? If
 74 |   yes, explain how this could happen. If no, explain why it can't
 75 |   happen.
 76 | 
 77 | *** topic: configuration change (not needed for the labs)
 78 | 
 79 | configuration change (Section 6)
 80 |   configuration = set of servers
 81 |   sometimes you need to
 82 |     move to a new set of servers, or
 83 |     increase/decrease the number of servers
 84 |   human initiates configuration change, Raft manages it
 85 |   we'd like Raft to cope correctly with failure during configuration change
 86 |     i.e. clients should not notice (except maybe dip in performance)
 87 | 
 88 | why doesn't a straightforward approach work?
 89 |   suppose each server has the list of servers in the current config
 90 |   change configuration by telling each server the new list
 91 |     using some mechanism outside of Raft
 92 |   problem: they will learn new configuration at different times
 93 |   example: want to replace S3 with S4
 94 |     we get as far as telling S1 and S4 that the new config is 1,2,4
 95 |     S1: 1,2,3  1,2,4
 96 |     S2: 1,2,3  1,2,3
 97 |     S3: 1,2,3  1,2,3
 98 |     S4:        1,2,4
 99 |   OOPS! now *two* leaders could be elected!
100 |     S2 and S3 could elect S2
101 |     S1 and S4 could elect S1
102 | 
103 | Raft configuration change
104 |   idea: "joint consensus" stage that includes *both* old and new configuration
105 |     avoids any time when both old and new can choose leader independently
106 |   system starts with Cold
107 |   system administrator asks the leader to switch to Cnew
108 |   Raft has special configuration log entries (sets of server addresses)
109 |   each server uses the last configuration in its own log
110 |   1. leader commits Cold,new to a majority of both Cold and Cnew
111 |   2. after Cold,new commits, leader commits Cnew to servers in Cnew
112 | 
113 | what if leader crashes at various points in this process?
114 |   can we have two leaders for the next term?
115 |   if that could happen, each leader must be one of these:
116 |     A. in Cold, but does not have Cold,new in log
117 |     B. in Cold or Cnew, has Cold,new in log
118 |     C. in Cnew, has Cnew in log
119 |   we know we can't have A+A or C+C by the usual rules of leader election
120 |   A+B? no, since B needs majority from Cold as well as Cnew
121 |   A+C? no, since can't proceed to Cnew until Cold,new committed to Cold
122 |   B+B? no, since B needs majority from both Cold and Cnew
123 |   B+C? no, since B needs majority from Cnew as well as Cold
124 | 
125 | good! Raft can switch to a new set of servers w/o risk of two active leaders
126 | 
127 | *** topic: performance
128 | 
129 | Note: many situations don't require high performance.
130 |   key/value store might.
131 |   but GFS or MapReduce master might not.
132 | 
133 | Most replication systems have similar common-case performance:
134 |   One RPC exchange and one disk write per agreement.
135 |   So Raft is pretty typical for message complexity.
136 | 
137 | Raft makes a few design choices that sacrifice performance for simplicity:
138 |   Follower rejects out-of-order AppendEntries RPCs.
139 |     Rather than saving for use after hole is filled.
140 |     Might be important if network re-orders packets a lot.
141 |   No provision for batching or pipelining AppendEntries.
142 |   Snapshotting is wasteful for big states.
143 |   A slow leader may hurt Raft, e.g. in geo-replication.
144 | 
145 | These have a big effect on performance:
146 |   Disk writes for persistence.
147 |   Message/packet/RPC overhead.
148 |   Need to execute logged commands sequentially.
149 |   Fast path for read-only operations.
150 | 
151 | 


--------------------------------------------------------------------------------
/src/labgob/labgob.go:
--------------------------------------------------------------------------------
  1 | package labgob
  2 | 
  3 | //
  4 | // trying to send non-capitalized fields over RPC produces a range of
  5 | // misbehavior, including both mysterious incorrect computation and
  6 | // outright crashes. so this wrapper around Go's encoding/gob warns
  7 | // about non-capitalized field names.
  8 | //
  9 | 
 10 | import "encoding/gob"
 11 | import "io"
 12 | import "reflect"
 13 | import "fmt"
 14 | import "sync"
 15 | import "unicode"
 16 | import "unicode/utf8"
 17 | 
 18 | var mu sync.Mutex
 19 | var errorCount int // for TestCapital
 20 | var checked map[reflect.Type]bool
 21 | 
 22 | type LabEncoder struct {
 23 | 	gob *gob.Encoder
 24 | }
 25 | 
 26 | func NewEncoder(w io.Writer) *LabEncoder {
 27 | 	enc := &LabEncoder{}
 28 | 	enc.gob = gob.NewEncoder(w)
 29 | 	return enc
 30 | }
 31 | 
 32 | func (enc *LabEncoder) Encode(e interface{}) error {
 33 | 	checkValue(e)
 34 | 	return enc.gob.Encode(e)
 35 | }
 36 | 
 37 | func (enc *LabEncoder) EncodeValue(value reflect.Value) error {
 38 | 	checkValue(value.Interface())
 39 | 	return enc.gob.EncodeValue(value)
 40 | }
 41 | 
 42 | type LabDecoder struct {
 43 | 	gob *gob.Decoder
 44 | }
 45 | 
 46 | func NewDecoder(r io.Reader) *LabDecoder {
 47 | 	dec := &LabDecoder{}
 48 | 	dec.gob = gob.NewDecoder(r)
 49 | 	return dec
 50 | }
 51 | 
 52 | func (dec *LabDecoder) Decode(e interface{}) error {
 53 | 	checkValue(e)
 54 | 	checkDefault(e)
 55 | 	return dec.gob.Decode(e)
 56 | }
 57 | 
 58 | func Register(value interface{}) {
 59 | 	checkValue(value)
 60 | 	gob.Register(value)
 61 | }
 62 | 
 63 | func RegisterName(name string, value interface{}) {
 64 | 	checkValue(value)
 65 | 	gob.RegisterName(name, value)
 66 | }
 67 | 
 68 | func checkValue(value interface{}) {
 69 | 	checkType(reflect.TypeOf(value))
 70 | }
 71 | 
 72 | func checkType(t reflect.Type) {
 73 | 	k := t.Kind()
 74 | 
 75 | 	mu.Lock()
 76 | 	// only complain once, and avoid recursion.
 77 | 	if checked == nil {
 78 | 		checked = map[reflect.Type]bool{}
 79 | 	}
 80 | 	if checked[t] {
 81 | 		mu.Unlock()
 82 | 		return
 83 | 	}
 84 | 	checked[t] = true
 85 | 	mu.Unlock()
 86 | 
 87 | 	switch k {
 88 | 	case reflect.Struct:
 89 | 		for i := 0; i < t.NumField(); i++ {
 90 | 			f := t.Field(i)
 91 | 			rune, _ := utf8.DecodeRuneInString(f.Name)
 92 | 			if unicode.IsUpper(rune) == false {
 93 | 				// ta da
 94 | 				fmt.Printf("labgob error: lower-case field %v of %v in RPC or persist/snapshot will break your Raft\n",
 95 | 					f.Name, t.Name())
 96 | 				mu.Lock()
 97 | 				errorCount += 1
 98 | 				mu.Unlock()
 99 | 			}
100 | 			checkType(f.Type)
101 | 		}
102 | 		return
103 | 	case reflect.Slice, reflect.Array, reflect.Ptr:
104 | 		checkType(t.Elem())
105 | 		return
106 | 	case reflect.Map:
107 | 		checkType(t.Elem())
108 | 		checkType(t.Key())
109 | 		return
110 | 	default:
111 | 		return
112 | 	}
113 | }
114 | 
115 | //
116 | // warn if the value contains non-default values,
117 | // as it would if one sent an RPC but the reply
118 | // struct was already modified. if the RPC reply
119 | // contains default values, GOB won't overwrite
120 | // the non-default value.
121 | //
122 | func checkDefault(value interface{}) {
123 | 	if value == nil {
124 | 		return
125 | 	}
126 | 	checkDefault1(reflect.ValueOf(value), 1, "")
127 | }
128 | 
129 | func checkDefault1(value reflect.Value, depth int, name string) {
130 | 	if depth > 3 {
131 | 		return
132 | 	}
133 | 
134 | 	t := value.Type()
135 | 	k := t.Kind()
136 | 
137 | 	switch k {
138 | 	case reflect.Struct:
139 | 		for i := 0; i < t.NumField(); i++ {
140 | 			vv := value.Field(i)
141 | 			name1 := t.Field(i).Name
142 | 			if name != "" {
143 | 				name1 = name + "." + name1
144 | 			}
145 | 			checkDefault1(vv, depth+1, name1)
146 | 		}
147 | 		return
148 | 	case reflect.Ptr:
149 | 		if value.IsNil() {
150 | 			return
151 | 		}
152 | 		checkDefault1(value.Elem(), depth+1, name)
153 | 		return
154 | 	case reflect.Bool,
155 | 		reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
156 | 		reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64,
157 | 		reflect.Uintptr, reflect.Float32, reflect.Float64,
158 | 		reflect.String:
159 | 		if reflect.DeepEqual(reflect.Zero(t).Interface(), value.Interface()) == false {
160 | 			mu.Lock()
161 | 			if errorCount < 1 {
162 | 				what := name
163 | 				if what == "" {
164 | 					what = t.Name()
165 | 				}
166 | 				// this warning typically arises if code re-uses the same RPC reply
167 | 				// variable for multiple RPC calls, or if code restores persisted
168 | 				// state into variable that already have non-default values.
169 | 				fmt.Printf("labgob warning: Decoding into a non-default variable/field %v may not work\n",
170 | 					what)
171 | 			}
172 | 			errorCount += 1
173 | 			mu.Unlock()
174 | 		}
175 | 		return
176 | 	}
177 | }
178 | 


--------------------------------------------------------------------------------
/src/labgob/test_test.go:
--------------------------------------------------------------------------------
  1 | package labgob
  2 | 
  3 | import "testing"
  4 | 
  5 | import "bytes"
  6 | 
  7 | type T1 struct {
  8 | 	T1int0    int
  9 | 	T1int1    int
 10 | 	T1string0 string
 11 | 	T1string1 string
 12 | }
 13 | 
 14 | type T2 struct {
 15 | 	T2slice []T1
 16 | 	T2map   map[int]*T1
 17 | 	T2t3    interface{}
 18 | }
 19 | 
 20 | type T3 struct {
 21 | 	T3int999 int
 22 | }
 23 | 
 24 | //
 25 | // test that we didn't break GOB.
 26 | //
 27 | func TestGOB(t *testing.T) {
 28 | 	e0 := errorCount
 29 | 
 30 | 	w := new(bytes.Buffer)
 31 | 
 32 | 	Register(T3{})
 33 | 
 34 | 	{
 35 | 		x0 := 0
 36 | 		x1 := 1
 37 | 		t1 := T1{}
 38 | 		t1.T1int1 = 1
 39 | 		t1.T1string1 = "6.824"
 40 | 		t2 := T2{}
 41 | 		t2.T2slice = []T1{T1{}, t1}
 42 | 		t2.T2map = map[int]*T1{}
 43 | 		t2.T2map[99] = &T1{1, 2, "x", "y"}
 44 | 		t2.T2t3 = T3{999}
 45 | 
 46 | 		e := NewEncoder(w)
 47 | 		e.Encode(x0)
 48 | 		e.Encode(x1)
 49 | 		e.Encode(t1)
 50 | 		e.Encode(t2)
 51 | 	}
 52 | 	data := w.Bytes()
 53 | 
 54 | 	{
 55 | 		var x0 int
 56 | 		var x1 int
 57 | 		var t1 T1
 58 | 		var t2 T2
 59 | 
 60 | 		r := bytes.NewBuffer(data)
 61 | 		d := NewDecoder(r)
 62 | 		if d.Decode(&x0) != nil ||
 63 | 			d.Decode(&x1) != nil ||
 64 | 			d.Decode(&t1) != nil ||
 65 | 			d.Decode(&t2) != nil {
 66 | 			t.Fatalf("Decode failed")
 67 | 		}
 68 | 
 69 | 		if x0 != 0 {
 70 | 			t.Fatalf("wrong x0 %v\n", x0)
 71 | 		}
 72 | 		if x1 != 1 {
 73 | 			t.Fatalf("wrong x1 %v\n", x1)
 74 | 		}
 75 | 		if t1.T1int0 != 0 {
 76 | 			t.Fatalf("wrong t1.T1int0 %v\n", t1.T1int0)
 77 | 		}
 78 | 		if t1.T1int1 != 1 {
 79 | 			t.Fatalf("wrong t1.T1int1 %v\n", t1.T1int1)
 80 | 		}
 81 | 		if t1.T1string0 != "" {
 82 | 			t.Fatalf("wrong t1.T1string0 %v\n", t1.T1string0)
 83 | 		}
 84 | 		if t1.T1string1 != "6.824" {
 85 | 			t.Fatalf("wrong t1.T1string1 %v\n", t1.T1string1)
 86 | 		}
 87 | 		if len(t2.T2slice) != 2 {
 88 | 			t.Fatalf("wrong t2.T2slice len %v\n", len(t2.T2slice))
 89 | 		}
 90 | 		if t2.T2slice[1].T1int1 != 1 {
 91 | 			t.Fatalf("wrong slice value\n")
 92 | 		}
 93 | 		if len(t2.T2map) != 1 {
 94 | 			t.Fatalf("wrong t2.T2map len %v\n", len(t2.T2map))
 95 | 		}
 96 | 		if t2.T2map[99].T1string1 != "y" {
 97 | 			t.Fatalf("wrong map value\n")
 98 | 		}
 99 | 		t3 := (t2.T2t3).(T3)
100 | 		if t3.T3int999 != 999 {
101 | 			t.Fatalf("wrong t2.T2t3.T3int999\n")
102 | 		}
103 | 	}
104 | 
105 | 	if errorCount != e0 {
106 | 		t.Fatalf("there were errors, but should not have been")
107 | 	}
108 | }
109 | 
110 | type T4 struct {
111 | 	Yes int
112 | 	no  int
113 | }
114 | 
115 | //
116 | // make sure we check capitalization
117 | // labgob prints one warning during this test.
118 | //
119 | func TestCapital(t *testing.T) {
120 | 	e0 := errorCount
121 | 
122 | 	v := []map[*T4]int{}
123 | 
124 | 	w := new(bytes.Buffer)
125 | 	e := NewEncoder(w)
126 | 	e.Encode(v)
127 | 	data := w.Bytes()
128 | 
129 | 	var v1 []map[T4]int
130 | 	r := bytes.NewBuffer(data)
131 | 	d := NewDecoder(r)
132 | 	d.Decode(&v1)
133 | 
134 | 	if errorCount != e0+1 {
135 | 		t.Fatalf("failed to warn about lower-case field")
136 | 	}
137 | }
138 | 
139 | //
140 | // check that we warn when someone sends a default value over
141 | // RPC but the target into which we're decoding holds a non-default
142 | // value, which GOB seems not to overwrite as you'd expect.
143 | //
144 | // labgob does not print a warning.
145 | //
146 | func TestDefault(t *testing.T) {
147 | 	e0 := errorCount
148 | 
149 | 	type DD struct {
150 | 		X int
151 | 	}
152 | 
153 | 	// send a default value...
154 | 	dd1 := DD{}
155 | 
156 | 	w := new(bytes.Buffer)
157 | 	e := NewEncoder(w)
158 | 	e.Encode(dd1)
159 | 	data := w.Bytes()
160 | 
161 | 	// and receive it into memory that already
162 | 	// holds non-default values.
163 | 	reply := DD{99}
164 | 
165 | 	r := bytes.NewBuffer(data)
166 | 	d := NewDecoder(r)
167 | 	d.Decode(&reply)
168 | 
169 | 	if errorCount != e0+1 {
170 | 		t.Fatalf("failed to warn about decoding into non-default value")
171 | 	}
172 | }
173 | 


--------------------------------------------------------------------------------
/src/labrpc/test_test.go:
--------------------------------------------------------------------------------
  1 | package labrpc
  2 | 
  3 | import "testing"
  4 | import "strconv"
  5 | import "sync"
  6 | import "runtime"
  7 | import "time"
  8 | import "fmt"
  9 | 
 10 | type JunkArgs struct {
 11 | 	X int
 12 | }
 13 | type JunkReply struct {
 14 | 	X string
 15 | }
 16 | 
 17 | type JunkServer struct {
 18 | 	mu   sync.Mutex
 19 | 	log1 []string
 20 | 	log2 []int
 21 | }
 22 | 
 23 | func (js *JunkServer) Handler1(args string, reply *int) {
 24 | 	js.mu.Lock()
 25 | 	defer js.mu.Unlock()
 26 | 	js.log1 = append(js.log1, args)
 27 | 	*reply, _ = strconv.Atoi(args)
 28 | }
 29 | 
 30 | func (js *JunkServer) Handler2(args int, reply *string) {
 31 | 	js.mu.Lock()
 32 | 	defer js.mu.Unlock()
 33 | 	js.log2 = append(js.log2, args)
 34 | 	*reply = "handler2-" + strconv.Itoa(args)
 35 | }
 36 | 
 37 | func (js *JunkServer) Handler3(args int, reply *int) {
 38 | 	js.mu.Lock()
 39 | 	defer js.mu.Unlock()
 40 | 	time.Sleep(20 * time.Second)
 41 | 	*reply = -args
 42 | }
 43 | 
 44 | // args is a pointer
 45 | func (js *JunkServer) Handler4(args *JunkArgs, reply *JunkReply) {
 46 | 	reply.X = "pointer"
 47 | }
 48 | 
 49 | // args is a not pointer
 50 | func (js *JunkServer) Handler5(args JunkArgs, reply *JunkReply) {
 51 | 	reply.X = "no pointer"
 52 | }
 53 | 
 54 | func TestBasic(t *testing.T) {
 55 | 	runtime.GOMAXPROCS(4)
 56 | 
 57 | 	rn := MakeNetwork()
 58 | 	defer rn.Cleanup()
 59 | 
 60 | 	e := rn.MakeEnd("end1-99")
 61 | 
 62 | 	js := &JunkServer{}
 63 | 	svc := MakeService(js)
 64 | 
 65 | 	rs := MakeServer()
 66 | 	rs.AddService(svc)
 67 | 	rn.AddServer("server99", rs)
 68 | 
 69 | 	rn.Connect("end1-99", "server99")
 70 | 	rn.Enable("end1-99", true)
 71 | 
 72 | 	{
 73 | 		reply := ""
 74 | 		e.Call("JunkServer.Handler2", 111, &reply)
 75 | 		if reply != "handler2-111" {
 76 | 			t.Fatalf("wrong reply from Handler2")
 77 | 		}
 78 | 	}
 79 | 
 80 | 	{
 81 | 		reply := 0
 82 | 		e.Call("JunkServer.Handler1", "9099", &reply)
 83 | 		if reply != 9099 {
 84 | 			t.Fatalf("wrong reply from Handler1")
 85 | 		}
 86 | 	}
 87 | }
 88 | 
 89 | func TestTypes(t *testing.T) {
 90 | 	runtime.GOMAXPROCS(4)
 91 | 
 92 | 	rn := MakeNetwork()
 93 | 	defer rn.Cleanup()
 94 | 
 95 | 	e := rn.MakeEnd("end1-99")
 96 | 
 97 | 	js := &JunkServer{}
 98 | 	svc := MakeService(js)
 99 | 
100 | 	rs := MakeServer()
101 | 	rs.AddService(svc)
102 | 	rn.AddServer("server99", rs)
103 | 
104 | 	rn.Connect("end1-99", "server99")
105 | 	rn.Enable("end1-99", true)
106 | 
107 | 	{
108 | 		var args JunkArgs
109 | 		var reply JunkReply
110 | 		// args must match type (pointer or not) of handler.
111 | 		e.Call("JunkServer.Handler4", &args, &reply)
112 | 		if reply.X != "pointer" {
113 | 			t.Fatalf("wrong reply from Handler4")
114 | 		}
115 | 	}
116 | 
117 | 	{
118 | 		var args JunkArgs
119 | 		var reply JunkReply
120 | 		// args must match type (pointer or not) of handler.
121 | 		e.Call("JunkServer.Handler5", args, &reply)
122 | 		if reply.X != "no pointer" {
123 | 			t.Fatalf("wrong reply from Handler5")
124 | 		}
125 | 	}
126 | }
127 | 
128 | //
129 | // does net.Enable(endname, false) really disconnect a client?
130 | //
131 | func TestDisconnect(t *testing.T) {
132 | 	runtime.GOMAXPROCS(4)
133 | 
134 | 	rn := MakeNetwork()
135 | 	defer rn.Cleanup()
136 | 
137 | 	e := rn.MakeEnd("end1-99")
138 | 
139 | 	js := &JunkServer{}
140 | 	svc := MakeService(js)
141 | 
142 | 	rs := MakeServer()
143 | 	rs.AddService(svc)
144 | 	rn.AddServer("server99", rs)
145 | 
146 | 	rn.Connect("end1-99", "server99")
147 | 
148 | 	{
149 | 		reply := ""
150 | 		e.Call("JunkServer.Handler2", 111, &reply)
151 | 		if reply != "" {
152 | 			t.Fatalf("unexpected reply from Handler2")
153 | 		}
154 | 	}
155 | 
156 | 	rn.Enable("end1-99", true)
157 | 
158 | 	{
159 | 		reply := 0
160 | 		e.Call("JunkServer.Handler1", "9099", &reply)
161 | 		if reply != 9099 {
162 | 			t.Fatalf("wrong reply from Handler1")
163 | 		}
164 | 	}
165 | }
166 | 
167 | //
168 | // test net.GetCount()
169 | //
170 | func TestCounts(t *testing.T) {
171 | 	runtime.GOMAXPROCS(4)
172 | 
173 | 	rn := MakeNetwork()
174 | 	defer rn.Cleanup()
175 | 
176 | 	e := rn.MakeEnd("end1-99")
177 | 
178 | 	js := &JunkServer{}
179 | 	svc := MakeService(js)
180 | 
181 | 	rs := MakeServer()
182 | 	rs.AddService(svc)
183 | 	rn.AddServer(99, rs)
184 | 
185 | 	rn.Connect("end1-99", 99)
186 | 	rn.Enable("end1-99", true)
187 | 
188 | 	for i := 0; i < 17; i++ {
189 | 		reply := ""
190 | 		e.Call("JunkServer.Handler2", i, &reply)
191 | 		wanted := "handler2-" + strconv.Itoa(i)
192 | 		if reply != wanted {
193 | 			t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted)
194 | 		}
195 | 	}
196 | 
197 | 	n := rn.GetCount(99)
198 | 	if n != 17 {
199 | 		t.Fatalf("wrong GetCount() %v, expected 17\n", n)
200 | 	}
201 | }
202 | 
203 | //
204 | // test RPCs from concurrent ClientEnds
205 | //
206 | func TestConcurrentMany(t *testing.T) {
207 | 	runtime.GOMAXPROCS(4)
208 | 
209 | 	rn := MakeNetwork()
210 | 	defer rn.Cleanup()
211 | 
212 | 	js := &JunkServer{}
213 | 	svc := MakeService(js)
214 | 
215 | 	rs := MakeServer()
216 | 	rs.AddService(svc)
217 | 	rn.AddServer(1000, rs)
218 | 
219 | 	ch := make(chan int)
220 | 
221 | 	nclients := 20
222 | 	nrpcs := 10
223 | 	for ii := 0; ii < nclients; ii++ {
224 | 		go func(i int) {
225 | 			n := 0
226 | 			defer func() { ch <- n }()
227 | 
228 | 			e := rn.MakeEnd(i)
229 | 			rn.Connect(i, 1000)
230 | 			rn.Enable(i, true)
231 | 
232 | 			for j := 0; j < nrpcs; j++ {
233 | 				arg := i*100 + j
234 | 				reply := ""
235 | 				e.Call("JunkServer.Handler2", arg, &reply)
236 | 				wanted := "handler2-" + strconv.Itoa(arg)
237 | 				if reply != wanted {
238 | 					t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted)
239 | 				}
240 | 				n += 1
241 | 			}
242 | 		}(ii)
243 | 	}
244 | 
245 | 	total := 0
246 | 	for ii := 0; ii < nclients; ii++ {
247 | 		x := <-ch
248 | 		total += x
249 | 	}
250 | 
251 | 	if total != nclients*nrpcs {
252 | 		t.Fatalf("wrong number of RPCs completed, got %v, expected %v", total, nclients*nrpcs)
253 | 	}
254 | 
255 | 	n := rn.GetCount(1000)
256 | 	if n != total {
257 | 		t.Fatalf("wrong GetCount() %v, expected %v\n", n, total)
258 | 	}
259 | }
260 | 
261 | //
262 | // test unreliable
263 | //
264 | func TestUnreliable(t *testing.T) {
265 | 	runtime.GOMAXPROCS(4)
266 | 
267 | 	rn := MakeNetwork()
268 | 	defer rn.Cleanup()
269 | 	rn.Reliable(false)
270 | 
271 | 	js := &JunkServer{}
272 | 	svc := MakeService(js)
273 | 
274 | 	rs := MakeServer()
275 | 	rs.AddService(svc)
276 | 	rn.AddServer(1000, rs)
277 | 
278 | 	ch := make(chan int)
279 | 
280 | 	nclients := 300
281 | 	for ii := 0; ii < nclients; ii++ {
282 | 		go func(i int) {
283 | 			n := 0
284 | 			defer func() { ch <- n }()
285 | 
286 | 			e := rn.MakeEnd(i)
287 | 			rn.Connect(i, 1000)
288 | 			rn.Enable(i, true)
289 | 
290 | 			arg := i * 100
291 | 			reply := ""
292 | 			ok := e.Call("JunkServer.Handler2", arg, &reply)
293 | 			if ok {
294 | 				wanted := "handler2-" + strconv.Itoa(arg)
295 | 				if reply != wanted {
296 | 					t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted)
297 | 				}
298 | 				n += 1
299 | 			}
300 | 		}(ii)
301 | 	}
302 | 
303 | 	total := 0
304 | 	for ii := 0; ii < nclients; ii++ {
305 | 		x := <-ch
306 | 		total += x
307 | 	}
308 | 
309 | 	if total == nclients || total == 0 {
310 | 		t.Fatalf("all RPCs succeeded despite unreliable")
311 | 	}
312 | }
313 | 
314 | //
315 | // test concurrent RPCs from a single ClientEnd
316 | //
317 | func TestConcurrentOne(t *testing.T) {
318 | 	runtime.GOMAXPROCS(4)
319 | 
320 | 	rn := MakeNetwork()
321 | 	defer rn.Cleanup()
322 | 
323 | 	js := &JunkServer{}
324 | 	svc := MakeService(js)
325 | 
326 | 	rs := MakeServer()
327 | 	rs.AddService(svc)
328 | 	rn.AddServer(1000, rs)
329 | 
330 | 	e := rn.MakeEnd("c")
331 | 	rn.Connect("c", 1000)
332 | 	rn.Enable("c", true)
333 | 
334 | 	ch := make(chan int)
335 | 
336 | 	nrpcs := 20
337 | 	for ii := 0; ii < nrpcs; ii++ {
338 | 		go func(i int) {
339 | 			n := 0
340 | 			defer func() { ch <- n }()
341 | 
342 | 			arg := 100 + i
343 | 			reply := ""
344 | 			e.Call("JunkServer.Handler2", arg, &reply)
345 | 			wanted := "handler2-" + strconv.Itoa(arg)
346 | 			if reply != wanted {
347 | 				t.Fatalf("wrong reply %v from Handler2, expecting %v", reply, wanted)
348 | 			}
349 | 			n += 1
350 | 		}(ii)
351 | 	}
352 | 
353 | 	total := 0
354 | 	for ii := 0; ii < nrpcs; ii++ {
355 | 		x := <-ch
356 | 		total += x
357 | 	}
358 | 
359 | 	if total != nrpcs {
360 | 		t.Fatalf("wrong number of RPCs completed, got %v, expected %v", total, nrpcs)
361 | 	}
362 | 
363 | 	js.mu.Lock()
364 | 	defer js.mu.Unlock()
365 | 	if len(js.log2) != nrpcs {
366 | 		t.Fatalf("wrong number of RPCs delivered")
367 | 	}
368 | 
369 | 	n := rn.GetCount(1000)
370 | 	if n != total {
371 | 		t.Fatalf("wrong GetCount() %v, expected %v\n", n, total)
372 | 	}
373 | }
374 | 
375 | //
376 | // regression: an RPC that's delayed during Enabled=false
377 | // should not delay subsequent RPCs (e.g. after Enabled=true).
378 | //
379 | func TestRegression1(t *testing.T) {
380 | 	runtime.GOMAXPROCS(4)
381 | 
382 | 	rn := MakeNetwork()
383 | 	defer rn.Cleanup()
384 | 
385 | 	js := &JunkServer{}
386 | 	svc := MakeService(js)
387 | 
388 | 	rs := MakeServer()
389 | 	rs.AddService(svc)
390 | 	rn.AddServer(1000, rs)
391 | 
392 | 	e := rn.MakeEnd("c")
393 | 	rn.Connect("c", 1000)
394 | 
395 | 	// start some RPCs while the ClientEnd is disabled.
396 | 	// they'll be delayed.
397 | 	rn.Enable("c", false)
398 | 	ch := make(chan bool)
399 | 	nrpcs := 20
400 | 	for ii := 0; ii < nrpcs; ii++ {
401 | 		go func(i int) {
402 | 			ok := false
403 | 			defer func() { ch <- ok }()
404 | 
405 | 			arg := 100 + i
406 | 			reply := ""
407 | 			// this call ought to return false.
408 | 			e.Call("JunkServer.Handler2", arg, &reply)
409 | 			ok = true
410 | 		}(ii)
411 | 	}
412 | 
413 | 	time.Sleep(100 * time.Millisecond)
414 | 
415 | 	// now enable the ClientEnd and check that an RPC completes quickly.
416 | 	t0 := time.Now()
417 | 	rn.Enable("c", true)
418 | 	{
419 | 		arg := 99
420 | 		reply := ""
421 | 		e.Call("JunkServer.Handler2", arg, &reply)
422 | 		wanted := "handler2-" + strconv.Itoa(arg)
423 | 		if reply != wanted {
424 | 			t.Fatalf("wrong reply %v from Handler2, expecting %v", reply, wanted)
425 | 		}
426 | 	}
427 | 	dur := time.Since(t0).Seconds()
428 | 
429 | 	if dur > 0.03 {
430 | 		t.Fatalf("RPC took too long (%v) after Enable", dur)
431 | 	}
432 | 
433 | 	for ii := 0; ii < nrpcs; ii++ {
434 | 		<-ch
435 | 	}
436 | 
437 | 	js.mu.Lock()
438 | 	defer js.mu.Unlock()
439 | 	if len(js.log2) != 1 {
440 | 		t.Fatalf("wrong number (%v) of RPCs delivered, expected 1", len(js.log2))
441 | 	}
442 | 
443 | 	n := rn.GetCount(1000)
444 | 	if n != 1 {
445 | 		t.Fatalf("wrong GetCount() %v, expected %v\n", n, 1)
446 | 	}
447 | }
448 | 
449 | //
450 | // if an RPC is stuck in a server, and the server
451 | // is killed with DeleteServer(), does the RPC
452 | // get un-stuck?
453 | //
454 | func TestKilled(t *testing.T) {
455 | 	runtime.GOMAXPROCS(4)
456 | 
457 | 	rn := MakeNetwork()
458 | 	defer rn.Cleanup()
459 | 
460 | 	e := rn.MakeEnd("end1-99")
461 | 
462 | 	js := &JunkServer{}
463 | 	svc := MakeService(js)
464 | 
465 | 	rs := MakeServer()
466 | 	rs.AddService(svc)
467 | 	rn.AddServer("server99", rs)
468 | 
469 | 	rn.Connect("end1-99", "server99")
470 | 	rn.Enable("end1-99", true)
471 | 
472 | 	doneCh := make(chan bool)
473 | 	go func() {
474 | 		reply := 0
475 | 		ok := e.Call("JunkServer.Handler3", 99, &reply)
476 | 		doneCh <- ok
477 | 	}()
478 | 
479 | 	time.Sleep(1000 * time.Millisecond)
480 | 
481 | 	select {
482 | 	case <-doneCh:
483 | 		t.Fatalf("Handler3 should not have returned yet")
484 | 	case <-time.After(100 * time.Millisecond):
485 | 	}
486 | 
487 | 	rn.DeleteServer("server99")
488 | 
489 | 	select {
490 | 	case x := <-doneCh:
491 | 		if x != false {
492 | 			t.Fatalf("Handler3 returned successfully despite DeleteServer()")
493 | 		}
494 | 	case <-time.After(100 * time.Millisecond):
495 | 		t.Fatalf("Handler3 should return after DeleteServer()")
496 | 	}
497 | }
498 | 
499 | func TestBenchmark(t *testing.T) {
500 | 	runtime.GOMAXPROCS(4)
501 | 
502 | 	rn := MakeNetwork()
503 | 	defer rn.Cleanup()
504 | 
505 | 	e := rn.MakeEnd("end1-99")
506 | 
507 | 	js := &JunkServer{}
508 | 	svc := MakeService(js)
509 | 
510 | 	rs := MakeServer()
511 | 	rs.AddService(svc)
512 | 	rn.AddServer("server99", rs)
513 | 
514 | 	rn.Connect("end1-99", "server99")
515 | 	rn.Enable("end1-99", true)
516 | 
517 | 	t0 := time.Now()
518 | 	n := 100000
519 | 	for iters := 0; iters < n; iters++ {
520 | 		reply := ""
521 | 		e.Call("JunkServer.Handler2", 111, &reply)
522 | 		if reply != "handler2-111" {
523 | 			t.Fatalf("wrong reply from Handler2")
524 | 		}
525 | 	}
526 | 	fmt.Printf("%v for %v\n", time.Since(t0), n)
527 | 	// march 2016, rtm laptop, 22 microseconds per RPC
528 | }
529 | 


--------------------------------------------------------------------------------
/src/linearizability/bitset.go:
--------------------------------------------------------------------------------
 1 | package linearizability
 2 | 
 3 | type bitset []uint64
 4 | 
 5 | // data layout:
 6 | // bits 0-63 are in data[0], the next are in data[1], etc.
 7 | 
 8 | func newBitset(bits uint) bitset {
 9 | 	extra := uint(0)
10 | 	if bits%64 != 0 {
11 | 		extra = 1
12 | 	}
13 | 	chunks := bits/64 + extra
14 | 	return bitset(make([]uint64, chunks))
15 | }
16 | 
17 | func (b bitset) clone() bitset {
18 | 	dataCopy := make([]uint64, len(b))
19 | 	copy(dataCopy, b)
20 | 	return bitset(dataCopy)
21 | }
22 | 
23 | func bitsetIndex(pos uint) (uint, uint) {
24 | 	return pos / 64, pos % 64
25 | }
26 | 
27 | func (b bitset) set(pos uint) bitset {
28 | 	major, minor := bitsetIndex(pos)
29 | 	b[major] |= (1 << minor)
30 | 	return b
31 | }
32 | 
33 | func (b bitset) clear(pos uint) bitset {
34 | 	major, minor := bitsetIndex(pos)
35 | 	b[major] &^= (1 << minor)
36 | 	return b
37 | }
38 | 
39 | func (b bitset) get(pos uint) bool {
40 | 	major, minor := bitsetIndex(pos)
41 | 	return b[major]&(1<<minor) != 0
42 | }
43 | 
44 | func (b bitset) popcnt() uint {
45 | 	total := uint(0)
46 | 	for _, v := range b {
47 | 		v = (v & 0x5555555555555555) + ((v & 0xAAAAAAAAAAAAAAAA) >> 1)
48 | 		v = (v & 0x3333333333333333) + ((v & 0xCCCCCCCCCCCCCCCC) >> 2)
49 | 		v = (v & 0x0F0F0F0F0F0F0F0F) + ((v & 0xF0F0F0F0F0F0F0F0) >> 4)
50 | 		v *= 0x0101010101010101
51 | 		total += uint((v >> 56) & 0xFF)
52 | 	}
53 | 	return total
54 | }
55 | 
56 | func (b bitset) hash() uint64 {
57 | 	hash := uint64(b.popcnt())
58 | 	for _, v := range b {
59 | 		hash ^= v
60 | 	}
61 | 	return hash
62 | }
63 | 
64 | func (b bitset) equals(b2 bitset) bool {
65 | 	if len(b) != len(b2) {
66 | 		return false
67 | 	}
68 | 	for i := range b {
69 | 		if b[i] != b2[i] {
70 | 			return false
71 | 		}
72 | 	}
73 | 	return true
74 | }
75 | 


--------------------------------------------------------------------------------
/src/linearizability/linearizability.go:
--------------------------------------------------------------------------------
  1 | package linearizability
  2 | 
  3 | import (
  4 | 	"sort"
  5 | 	"sync/atomic"
  6 | 	"time"
  7 | )
  8 | 
  9 | type entryKind bool
 10 | 
 11 | const (
 12 | 	callEntry   entryKind = false
 13 | 	returnEntry           = true
 14 | )
 15 | 
 16 | type entry struct {
 17 | 	kind  entryKind
 18 | 	value interface{}
 19 | 	id    uint
 20 | 	time  int64
 21 | }
 22 | 
 23 | type byTime []entry
 24 | 
 25 | func (a byTime) Len() int {
 26 | 	return len(a)
 27 | }
 28 | 
 29 | func (a byTime) Swap(i, j int) {
 30 | 	a[i], a[j] = a[j], a[i]
 31 | }
 32 | 
 33 | func (a byTime) Less(i, j int) bool {
 34 | 	return a[i].time < a[j].time
 35 | }
 36 | 
 37 | func makeEntries(history []Operation) []entry {
 38 | 	var entries []entry = nil
 39 | 	id := uint(0)
 40 | 	for _, elem := range history {
 41 | 		entries = append(entries, entry{
 42 | 			callEntry, elem.Input, id, elem.Call})
 43 | 		entries = append(entries, entry{
 44 | 			returnEntry, elem.Output, id, elem.Return})
 45 | 		id++
 46 | 	}
 47 | 	sort.Sort(byTime(entries))
 48 | 	return entries
 49 | }
 50 | 
 51 | type node struct {
 52 | 	value interface{}
 53 | 	match *node // call if match is nil, otherwise return
 54 | 	id    uint
 55 | 	next  *node
 56 | 	prev  *node
 57 | }
 58 | 
 59 | func insertBefore(n *node, mark *node) *node {
 60 | 	if mark != nil {
 61 | 		beforeMark := mark.prev
 62 | 		mark.prev = n
 63 | 		n.next = mark
 64 | 		if beforeMark != nil {
 65 | 			n.prev = beforeMark
 66 | 			beforeMark.next = n
 67 | 		}
 68 | 	}
 69 | 	return n
 70 | }
 71 | 
 72 | func length(n *node) uint {
 73 | 	l := uint(0)
 74 | 	for n != nil {
 75 | 		n = n.next
 76 | 		l++
 77 | 	}
 78 | 	return l
 79 | }
 80 | 
 81 | func renumber(events []Event) []Event {
 82 | 	var e []Event
 83 | 	m := make(map[uint]uint) // renumbering
 84 | 	id := uint(0)
 85 | 	for _, v := range events {
 86 | 		if r, ok := m[v.Id]; ok {
 87 | 			e = append(e, Event{v.Kind, v.Value, r})
 88 | 		} else {
 89 | 			e = append(e, Event{v.Kind, v.Value, id})
 90 | 			m[v.Id] = id
 91 | 			id++
 92 | 		}
 93 | 	}
 94 | 	return e
 95 | }
 96 | 
 97 | func convertEntries(events []Event) []entry {
 98 | 	var entries []entry
 99 | 	for _, elem := range events {
100 | 		kind := callEntry
101 | 		if elem.Kind == ReturnEvent {
102 | 			kind = returnEntry
103 | 		}
104 | 		entries = append(entries, entry{kind, elem.Value, elem.Id, -1})
105 | 	}
106 | 	return entries
107 | }
108 | 
109 | func makeLinkedEntries(entries []entry) *node {
110 | 	var root *node = nil
111 | 	match := make(map[uint]*node)
112 | 	for i := len(entries) - 1; i >= 0; i-- {
113 | 		elem := entries[i]
114 | 		if elem.kind == returnEntry {
115 | 			entry := &node{value: elem.value, match: nil, id: elem.id}
116 | 			match[elem.id] = entry
117 | 			insertBefore(entry, root)
118 | 			root = entry
119 | 		} else {
120 | 			entry := &node{value: elem.value, match: match[elem.id], id: elem.id}
121 | 			insertBefore(entry, root)
122 | 			root = entry
123 | 		}
124 | 	}
125 | 	return root
126 | }
127 | 
128 | type cacheEntry struct {
129 | 	linearized bitset
130 | 	state      interface{}
131 | }
132 | 
133 | func cacheContains(model Model, cache map[uint64][]cacheEntry, entry cacheEntry) bool {
134 | 	for _, elem := range cache[entry.linearized.hash()] {
135 | 		if entry.linearized.equals(elem.linearized) && model.Equal(entry.state, elem.state) {
136 | 			return true
137 | 		}
138 | 	}
139 | 	return false
140 | }
141 | 
142 | type callsEntry struct {
143 | 	entry *node
144 | 	state interface{}
145 | }
146 | 
147 | func lift(entry *node) {
148 | 	entry.prev.next = entry.next
149 | 	entry.next.prev = entry.prev
150 | 	match := entry.match
151 | 	match.prev.next = match.next
152 | 	if match.next != nil {
153 | 		match.next.prev = match.prev
154 | 	}
155 | }
156 | 
157 | func unlift(entry *node) {
158 | 	match := entry.match
159 | 	match.prev.next = match
160 | 	if match.next != nil {
161 | 		match.next.prev = match
162 | 	}
163 | 	entry.prev.next = entry
164 | 	entry.next.prev = entry
165 | }
166 | 
167 | func checkSingle(model Model, subhistory *node, kill *int32) bool {
168 | 	n := length(subhistory) / 2
169 | 	linearized := newBitset(n)
170 | 	cache := make(map[uint64][]cacheEntry) // map from hash to cache entry
171 | 	var calls []callsEntry
172 | 
173 | 	state := model.Init()
174 | 	headEntry := insertBefore(&node{value: nil, match: nil, id: ^uint(0)}, subhistory)
175 | 	entry := subhistory
176 | 	for headEntry.next != nil {
177 | 		if atomic.LoadInt32(kill) != 0 {
178 | 			return false
179 | 		}
180 | 		if entry.match != nil {
181 | 			matching := entry.match // the return entry
182 | 			ok, newState := model.Step(state, entry.value, matching.value)
183 | 			if ok {
184 | 				newLinearized := linearized.clone().set(entry.id)
185 | 				newCacheEntry := cacheEntry{newLinearized, newState}
186 | 				if !cacheContains(model, cache, newCacheEntry) {
187 | 					hash := newLinearized.hash()
188 | 					cache[hash] = append(cache[hash], newCacheEntry)
189 | 					calls = append(calls, callsEntry{entry, state})
190 | 					state = newState
191 | 					linearized.set(entry.id)
192 | 					lift(entry)
193 | 					entry = headEntry.next
194 | 				} else {
195 | 					entry = entry.next
196 | 				}
197 | 			} else {
198 | 				entry = entry.next
199 | 			}
200 | 		} else {
201 | 			if len(calls) == 0 {
202 | 				return false
203 | 			}
204 | 			callsTop := calls[len(calls)-1]
205 | 			entry = callsTop.entry
206 | 			state = callsTop.state
207 | 			linearized.clear(entry.id)
208 | 			calls = calls[:len(calls)-1]
209 | 			unlift(entry)
210 | 			entry = entry.next
211 | 		}
212 | 	}
213 | 	return true
214 | }
215 | 
216 | func fillDefault(model Model) Model {
217 | 	if model.Partition == nil {
218 | 		model.Partition = NoPartition
219 | 	}
220 | 	if model.PartitionEvent == nil {
221 | 		model.PartitionEvent = NoPartitionEvent
222 | 	}
223 | 	if model.Equal == nil {
224 | 		model.Equal = ShallowEqual
225 | 	}
226 | 	return model
227 | }
228 | 
229 | func CheckOperations(model Model, history []Operation) bool {
230 | 	return CheckOperationsTimeout(model, history, 0)
231 | }
232 | 
233 | // timeout = 0 means no timeout
234 | // if this operation times out, then a false positive is possible
235 | func CheckOperationsTimeout(model Model, history []Operation, timeout time.Duration) bool {
236 | 	model = fillDefault(model)
237 | 	partitions := model.Partition(history)
238 | 	ok := true
239 | 	results := make(chan bool)
240 | 	kill := int32(0)
241 | 	for _, subhistory := range partitions {
242 | 		l := makeLinkedEntries(makeEntries(subhistory))
243 | 		go func() {
244 | 			results <- checkSingle(model, l, &kill)
245 | 		}()
246 | 	}
247 | 	var timeoutChan <-chan time.Time
248 | 	if timeout > 0 {
249 | 		timeoutChan = time.After(timeout)
250 | 	}
251 | 	count := 0
252 | loop:
253 | 	for {
254 | 		select {
255 | 		case result := <-results:
256 | 			ok = ok && result
257 | 			if !ok {
258 | 				atomic.StoreInt32(&kill, 1)
259 | 				break loop
260 | 			}
261 | 			count++
262 | 			if count >= len(partitions) {
263 | 				break loop
264 | 			}
265 | 		case <-timeoutChan:
266 | 			break loop // if we time out, we might get a false positive
267 | 		}
268 | 	}
269 | 	return ok
270 | }
271 | 
272 | func CheckEvents(model Model, history []Event) bool {
273 | 	return CheckEventsTimeout(model, history, 0)
274 | }
275 | 
276 | // timeout = 0 means no timeout
277 | // if this operation times out, then a false positive is possible
278 | func CheckEventsTimeout(model Model, history []Event, timeout time.Duration) bool {
279 | 	model = fillDefault(model)
280 | 	partitions := model.PartitionEvent(history)
281 | 	ok := true
282 | 	results := make(chan bool)
283 | 	kill := int32(0)
284 | 	for _, subhistory := range partitions {
285 | 		l := makeLinkedEntries(convertEntries(renumber(subhistory)))
286 | 		go func() {
287 | 			results <- checkSingle(model, l, &kill)
288 | 		}()
289 | 	}
290 | 	var timeoutChan <-chan time.Time
291 | 	if timeout > 0 {
292 | 		timeoutChan = time.After(timeout)
293 | 	}
294 | 	count := 0
295 | loop:
296 | 	for {
297 | 		select {
298 | 		case result := <-results:
299 | 			ok = ok && result
300 | 			if !ok {
301 | 				atomic.StoreInt32(&kill, 1)
302 | 				break loop
303 | 			}
304 | 			count++
305 | 			if count >= len(partitions) {
306 | 				break loop
307 | 			}
308 | 		case <-timeoutChan:
309 | 			break loop // if we time out, we might get a false positive
310 | 		}
311 | 	}
312 | 	return ok
313 | }
314 | 


--------------------------------------------------------------------------------
/src/linearizability/model.go:
--------------------------------------------------------------------------------
 1 | package linearizability
 2 | 
 3 | type Operation struct {
 4 | 	Input  interface{}
 5 | 	Call   int64 // invocation time
 6 | 	Output interface{}
 7 | 	Return int64 // response time
 8 | }
 9 | 
10 | type EventKind bool
11 | 
12 | const (
13 | 	CallEvent   EventKind = false
14 | 	ReturnEvent EventKind = true
15 | )
16 | 
17 | type Event struct {
18 | 	Kind  EventKind
19 | 	Value interface{}
20 | 	Id    uint
21 | }
22 | 
23 | type Model struct {
24 | 	// Partition functions, such that a history is linearizable if an only
25 | 	// if each partition is linearizable. If you don't want to implement
26 | 	// this, you can always use the `NoPartition` functions implemented
27 | 	// below.
28 | 	Partition      func(history []Operation) [][]Operation
29 | 	PartitionEvent func(history []Event) [][]Event
30 | 	// Initial state of the system.
31 | 	Init func() interface{}
32 | 	// Step function for the system. Returns whether or not the system
33 | 	// could take this step with the given inputs and outputs and also
34 | 	// returns the new state. This should not mutate the existing state.
35 | 	Step func(state interface{}, input interface{}, output interface{}) (bool, interface{})
36 | 	// Equality on states. If you are using a simple data type for states,
37 | 	// you can use the `ShallowEqual` function implemented below.
38 | 	Equal func(state1, state2 interface{}) bool
39 | }
40 | 
41 | func NoPartition(history []Operation) [][]Operation {
42 | 	return [][]Operation{history}
43 | }
44 | 
45 | func NoPartitionEvent(history []Event) [][]Event {
46 | 	return [][]Event{history}
47 | }
48 | 
49 | func ShallowEqual(state1, state2 interface{}) bool {
50 | 	return state1 == state2
51 | }
52 | 


--------------------------------------------------------------------------------
/src/linearizability/models.go:
--------------------------------------------------------------------------------
 1 | package linearizability
 2 | 
 3 | // kv model
 4 | 
 5 | type KvInput struct {
 6 | 	Op uint8 // 0 => get, 1 => put, 2 => append
 7 | 	Key string
 8 | 	Value string
 9 | }
10 | 
11 | type KvOutput struct {
12 | 	Value string
13 | }
14 | 
15 | func KvModel() Model {
16 |     return Model {
17 |         Partition: func(history []Operation) [][]Operation {
18 |             m := make(map[string][]Operation)
19 |             for _, v := range history {
20 |                 key := v.Input.(KvInput).Key
21 |                 m[key] = append(m[key], v)
22 |             }
23 |             var ret [][]Operation
24 |             for _, v := range m {
25 |                 ret = append(ret, v)
26 |             }
27 |             return ret
28 |         },
29 |         Init: func() interface{} {
30 |             // note: we are modeling a single key's value here;
31 |             // we're partitioning by key, so this is okay
32 |             return ""
33 |         },
34 |         Step: func(state, input, output interface{}) (bool, interface{}) {
35 |             inp := input.(KvInput)
36 |             out := output.(KvOutput)
37 |             st := state.(string)
38 |             if inp.Op == 0 {
39 |                 // get
40 |                 return out.Value == st, state
41 |             } else if inp.Op == 1 {
42 |                 // put
43 |                 return true, inp.Value
44 |             } else {
45 |                 // append
46 |                 return true, (st + inp.Value)
47 |             }
48 |         },
49 |         Equal: ShallowEqual,
50 |     }
51 | }
52 | 


--------------------------------------------------------------------------------
/src/main/diskvd.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // start a diskvd server. it's a member of some replica
 5 | // group, which has other members, and it needs to know
 6 | // how to talk to the members of the shardmaster service.
 7 | // used by ../diskv/test_test.go
 8 | //
 9 | // arguments:
10 | //   -g groupid
11 | //   -m masterport1 -m masterport2 ...
12 | //   -s replicaport1 -s replicaport2 ...
13 | //   -i my-index-in-server-port-list
14 | //   -u unreliable
15 | //   -d directory
16 | //   -r restart
17 | 
18 | import "time"
19 | import "diskv"
20 | import "os"
21 | import "fmt"
22 | import "strconv"
23 | import "runtime"
24 | 
25 | func usage() {
26 | 	fmt.Printf("Usage: diskvd -g gid -m master... -s server... -i my-index -d dir\n")
27 | 	os.Exit(1)
28 | }
29 | 
30 | func main() {
31 | 	var gid int64 = -1     // my replica group ID
32 | 	masters := []string{}  // ports of shardmasters
33 | 	replicas := []string{} // ports of servers in my replica group
34 | 	me := -1               // my index in replicas[]
35 | 	unreliable := false
36 | 	dir := "" // store persistent data here
37 | 	restart := false
38 | 
39 | 	for i := 1; i+1 < len(os.Args); i += 2 {
40 | 		a0 := os.Args[i]
41 | 		a1 := os.Args[i+1]
42 | 		if a0 == "-g" {
43 | 			gid, _ = strconv.ParseInt(a1, 10, 64)
44 | 		} else if a0 == "-m" {
45 | 			masters = append(masters, a1)
46 | 		} else if a0 == "-s" {
47 | 			replicas = append(replicas, a1)
48 | 		} else if a0 == "-i" {
49 | 			me, _ = strconv.Atoi(a1)
50 | 		} else if a0 == "-u" {
51 | 			unreliable, _ = strconv.ParseBool(a1)
52 | 		} else if a0 == "-d" {
53 | 			dir = a1
54 | 		} else if a0 == "-r" {
55 | 			restart, _ = strconv.ParseBool(a1)
56 | 		} else {
57 | 			usage()
58 | 		}
59 | 	}
60 | 
61 | 	if gid < 0 || me < 0 || len(masters) < 1 || me >= len(replicas) || dir == "" {
62 | 		usage()
63 | 	}
64 | 
65 | 	runtime.GOMAXPROCS(4)
66 | 
67 | 	srv := diskv.StartServer(gid, masters, replicas, me, dir, restart)
68 | 	srv.Setunreliable(unreliable)
69 | 
70 | 	// for safety, force quit after 10 minutes.
71 | 	time.Sleep(10 * 60 * time.Second)
72 | 	mep, _ := os.FindProcess(os.Getpid())
73 | 	mep.Kill()
74 | }
75 | 


--------------------------------------------------------------------------------
/src/main/ii.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 |     "os"
 5 |     "fmt"
 6 |     "mapreduce"
 7 | 
 8 |     "strings"
 9 |     "sort"
10 |     "strconv"
11 |     "unicode"
12 | )
13 | 
14 | // The mapping function is called once for each piece of the input.
15 | // In this framework, the key is the name of the file that is being processed,
16 | // and the value is the file's contents. The return value should be a slice of
17 | // key/value pairs, each represented by a mapreduce.KeyValue.
18 | func mapF(document string, value string) (res []mapreduce.KeyValue) {
19 | 	// Your code here (Part V).
20 |     fmt.Println("ii mapF " + document)
21 |     words := strings.FieldsFunc(value, func(r rune) bool {
22 |         return !unicode.IsLetter(r)
23 |     })
24 | 
25 |     // declare an empty unordered_set
26 |     type Dummy struct { }
27 |     var empty Dummy
28 |     keys := make(map[string] Dummy) // non repeat keys
29 | 
30 |     for _, w := range words {
31 |         if _, exist := keys[w]; !exist {
32 |             keys[w] = empty
33 |             res = append(res, mapreduce.KeyValue{Key: w, Value: document})
34 |         }
35 |     }
36 | 
37 |     return
38 | }
39 | 
40 | // The reduce function is called once for each key generated by Map, with a
41 | // list of that key's string value (merged across all inputs). The return value
42 | // should be a single output value for that key.
43 | func reduceF(key string, values []string) string {
44 | 	// Your code here (Part V).
45 |     sort.Strings(values)
46 |     return strconv.Itoa(len(values)) + " " + strings.Join(values, ",")
47 | }
48 | 
49 | // Can be run in 3 ways:
50 | // 1) Sequential (e.g., go run wc.go master sequential x1.txt .. xN.txt)
51 | // 2) Master (e.g., go run wc.go master localhost:7777 x1.txt .. xN.txt)
52 | // 3) Worker (e.g., go run wc.go worker localhost:7777 localhost:7778 &)
53 | func main() {
54 | 	if len(os.Args) < 4 {
55 | 		fmt.Printf("%s: see usage comments in file\n", os.Args[0])
56 | 	} else if os.Args[1] == "master" {
57 | 		var mr *mapreduce.Master
58 | 		if os.Args[2] == "sequential" {
59 | 			mr = mapreduce.Sequential("iiseq", os.Args[3:], 3, mapF, reduceF)
60 | 		} else {
61 | 			mr = mapreduce.Distributed("iiseq", os.Args[3:], 3, os.Args[2])
62 | 		}
63 | 		mr.Wait()
64 | 	} else {
65 | 		mapreduce.RunWorker(os.Args[2], os.Args[3], mapF, reduceF, 100, nil)
66 | 	}
67 | }
68 | 


--------------------------------------------------------------------------------
/src/main/lockc.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // see comments in lockd.go
 5 | //
 6 | 
 7 | import "lockservice"
 8 | import "os"
 9 | import "fmt"
10 | 
11 | func usage() {
12 | 	fmt.Printf("Usage: lockc -l|-u primaryport backupport lockname\n")
13 | 	os.Exit(1)
14 | }
15 | 
16 | func main() {
17 | 	if len(os.Args) == 5 {
18 | 		ck := lockservice.MakeClerk(os.Args[2], os.Args[3])
19 | 		var ok bool
20 | 		if os.Args[1] == "-l" {
21 | 			ok = ck.Lock(os.Args[4])
22 | 		} else if os.Args[1] == "-u" {
23 | 			ok = ck.Unlock(os.Args[4])
24 | 		} else {
25 | 			usage()
26 | 		}
27 | 		fmt.Printf("reply: %v\n", ok)
28 | 	} else {
29 | 		usage()
30 | 	}
31 | }
32 | 


--------------------------------------------------------------------------------
/src/main/lockd.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | // export GOPATH=~/6.824
 4 | // go build lockd.go
 5 | // go build lockc.go
 6 | // ./lockd -p a b &
 7 | // ./lockd -b a b &
 8 | // ./lockc -l a b lx
 9 | // ./lockc -u a b lx
10 | //
11 | // on Athena, use /tmp/myname-a and /tmp/myname-b
12 | // instead of a and b.
13 | 
14 | import "time"
15 | import "lockservice"
16 | import "os"
17 | import "fmt"
18 | 
19 | func main() {
20 | 	if len(os.Args) == 4 && os.Args[1] == "-p" {
21 | 		lockservice.StartServer(os.Args[2], os.Args[3], true)
22 | 	} else if len(os.Args) == 4 && os.Args[1] == "-b" {
23 | 		lockservice.StartServer(os.Args[2], os.Args[3], false)
24 | 	} else {
25 | 		fmt.Printf("Usage: lockd -p|-b primaryport backupport\n")
26 | 		os.Exit(1)
27 | 	}
28 | 	for {
29 | 		time.Sleep(100 * time.Second)
30 | 	}
31 | }
32 | 


--------------------------------------------------------------------------------
/src/main/mr-challenge.txt:
--------------------------------------------------------------------------------
 1 | www: 8 pg-being_ernest.txt,pg-dorian_gray.txt,pg-frankenstein.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-metamorphosis.txt,pg-sherlock_holmes.txt,pg-tom_sawyer.txt
 2 | year: 8 pg-being_ernest.txt,pg-dorian_gray.txt,pg-frankenstein.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-metamorphosis.txt,pg-sherlock_holmes.txt,pg-tom_sawyer.txt
 3 | years: 8 pg-being_ernest.txt,pg-dorian_gray.txt,pg-frankenstein.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-metamorphosis.txt,pg-sherlock_holmes.txt,pg-tom_sawyer.txt
 4 | yesterday: 8 pg-being_ernest.txt,pg-dorian_gray.txt,pg-frankenstein.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-metamorphosis.txt,pg-sherlock_holmes.txt,pg-tom_sawyer.txt
 5 | yet: 8 pg-being_ernest.txt,pg-dorian_gray.txt,pg-frankenstein.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-metamorphosis.txt,pg-sherlock_holmes.txt,pg-tom_sawyer.txt
 6 | you: 8 pg-being_ernest.txt,pg-dorian_gray.txt,pg-frankenstein.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-metamorphosis.txt,pg-sherlock_holmes.txt,pg-tom_sawyer.txt
 7 | young: 8 pg-being_ernest.txt,pg-dorian_gray.txt,pg-frankenstein.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-metamorphosis.txt,pg-sherlock_holmes.txt,pg-tom_sawyer.txt
 8 | your: 8 pg-being_ernest.txt,pg-dorian_gray.txt,pg-frankenstein.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-metamorphosis.txt,pg-sherlock_holmes.txt,pg-tom_sawyer.txt
 9 | yourself: 8 pg-being_ernest.txt,pg-dorian_gray.txt,pg-frankenstein.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-metamorphosis.txt,pg-sherlock_holmes.txt,pg-tom_sawyer.txt
10 | zip: 8 pg-being_ernest.txt,pg-dorian_gray.txt,pg-frankenstein.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-metamorphosis.txt,pg-sherlock_holmes.txt,pg-tom_sawyer.txt
11 | 


--------------------------------------------------------------------------------
/src/main/mr-testout.txt:
--------------------------------------------------------------------------------
 1 | that: 7871
 2 | it: 7987
 3 | in: 8415
 4 | was: 8578
 5 | a: 13382
 6 | of: 13536
 7 | I: 14296
 8 | to: 16079
 9 | and: 23612
10 | the: 29748
11 | 


--------------------------------------------------------------------------------
/src/main/pbc.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // pbservice client application
 5 | //
 6 | // export GOPATH=~/6.824
 7 | // go build viewd.go
 8 | // go build pbd.go
 9 | // go build pbc.go
10 | // ./viewd /tmp/rtm-v &
11 | // ./pbd /tmp/rtm-v /tmp/rtm-1 &
12 | // ./pbd /tmp/rtm-v /tmp/rtm-2 &
13 | // ./pbc /tmp/rtm-v key1 value1
14 | // ./pbc /tmp/rtm-v key1
15 | //
16 | // change "rtm" to your user name.
17 | // start the pbd programs in separate windows and kill
18 | // and restart them to exercise fault tolerance.
19 | //
20 | 
21 | import "pbservice"
22 | import "os"
23 | import "fmt"
24 | 
25 | func usage() {
26 | 	fmt.Printf("Usage: pbc viewport key\n")
27 | 	fmt.Printf("       pbc viewport key value\n")
28 | 	os.Exit(1)
29 | }
30 | 
31 | func main() {
32 | 	if len(os.Args) == 3 {
33 | 		// get
34 | 		ck := pbservice.MakeClerk(os.Args[1], "")
35 | 		v := ck.Get(os.Args[2])
36 | 		fmt.Printf("%v\n", v)
37 | 	} else if len(os.Args) == 4 {
38 | 		// put
39 | 		ck := pbservice.MakeClerk(os.Args[1], "")
40 | 		ck.Put(os.Args[2], os.Args[3])
41 | 	} else {
42 | 		usage()
43 | 	}
44 | }
45 | 


--------------------------------------------------------------------------------
/src/main/pbd.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // see directions in pbc.go
 5 | //
 6 | 
 7 | import "time"
 8 | import "pbservice"
 9 | import "os"
10 | import "fmt"
11 | 
12 | func main() {
13 | 	if len(os.Args) != 3 {
14 | 		fmt.Printf("Usage: pbd viewport myport\n")
15 | 		os.Exit(1)
16 | 	}
17 | 
18 | 	pbservice.StartServer(os.Args[1], os.Args[2])
19 | 
20 | 	for {
21 | 		time.Sleep(100 * time.Second)
22 | 	}
23 | }
24 | 


--------------------------------------------------------------------------------
/src/main/test-ii.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | go run ii.go master sequential pg-*.txt
 3 | 
 4 | # cause sort to be case sensitive.
 5 | # on Ubuntu (Athena) it's otherwise insensitive.
 6 | LC_ALL=C
 7 | export LC_ALL
 8 | 
 9 | sort -k1,1 mrtmp.iiseq | sort -snk2,2 | grep -v '16' | tail -10 | diff - mr-challenge.txt > diff.out
10 | if [ -s diff.out ]
11 | then
12 | echo "Failed test. Output should be as in mr-challenge.txt. Your output differs as follows (from diff.out):" > /dev/stderr
13 |   cat diff.out
14 | else
15 |   echo "Passed test" > /dev/stderr
16 | fi
17 | 
18 | 


--------------------------------------------------------------------------------
/src/main/test-mr.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | here=$(dirname "$0")
 3 | [[ "$here" = /* ]] || here="$PWD/$here"
 4 | export GOPATH="$here/../../"
 5 | echo ""
 6 | echo "==> Part I"
 7 | go test -run Sequential mapreduce/...
 8 | echo ""
 9 | echo "==> Part II"
10 | (cd "$here" && sh ./test-wc.sh > /dev/null)
11 | echo ""
12 | echo "==> Part III"
13 | go test -run TestParallel mapreduce/...
14 | echo ""
15 | echo "==> Part IV"
16 | go test -run Failure mapreduce/...
17 | echo ""
18 | echo "==> Part V (inverted index)"
19 | (cd "$here" && sh ./test-ii.sh > /dev/null)
20 | 
21 | rm "$here"/mrtmp.* "$here"/diff.out
22 | 


--------------------------------------------------------------------------------
/src/main/test-wc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | go run wc.go master sequential pg-*.txt
 3 | sort -n -k2 mrtmp.wcseq | tail -10 | diff - mr-testout.txt > diff.out
 4 | if [ -s diff.out ]
 5 | then
 6 | echo "Failed test. Output should be as in mr-testout.txt. Your output differs as follows (from diff.out):" > /dev/stderr
 7 |   cat diff.out
 8 | else
 9 |   echo "Passed test" > /dev/stderr
10 | fi
11 | 
12 | 


--------------------------------------------------------------------------------
/src/main/viewd.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // see directions in pbc.go
 5 | //
 6 | 
 7 | import "time"
 8 | import "viewservice"
 9 | import "os"
10 | import "fmt"
11 | 
12 | func main() {
13 | 	if len(os.Args) != 2 {
14 | 		fmt.Printf("Usage: viewd port\n")
15 | 		os.Exit(1)
16 | 	}
17 | 
18 | 	viewservice.StartServer(os.Args[1])
19 | 
20 | 	for {
21 | 		time.Sleep(100 * time.Second)
22 | 	}
23 | }
24 | 


--------------------------------------------------------------------------------
/src/main/wc.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"unicode"
 6 | 	"mapreduce"
 7 | 	"os"
 8 | 	"strconv"
 9 | 	"strings"
10 | )
11 | 
12 | //
13 | // The map function is called once for each file of input. The first
14 | // argument is the name of the input file, and the second is the
15 | // file's complete contents. You should ignore the input file name,
16 | // and look only at the contents argument. The return value is a slice
17 | // of key/value pairs.
18 | //
19 | // produce key/value pairs from file
20 | func mapF(filename string, contents string) []mapreduce.KeyValue {
21 | 	// Your code here (Part II).
22 |     fmt.Println("mapF " + filename)
23 |     words := strings.FieldsFunc(contents, func(r rune) bool {
24 |         return !unicode.IsLetter(r)
25 |     })
26 | 
27 |     var res []mapreduce.KeyValue
28 |     for _, w := range words {
29 |         res = append(res, mapreduce.KeyValue{Key: w, Value: "1"})
30 |     }
31 |     return res
32 | }
33 | 
34 | //
35 | // The reduce function is called once for each key generated by the
36 | // map tasks, with a list of all the values created for that key by
37 | // any map task.
38 | //
39 | func reduceF(key string, values []string) string {
40 | 	// Your code here (Part II).
41 |     count := len(values)
42 |     fmt.Println("reduceF " + key + strconv.Itoa(count))
43 |     return strconv.Itoa(count)
44 | }
45 | 
46 | // Can be run in 3 ways:
47 | // 1) Sequential (e.g., go run wc.go master sequential x1.txt .. xN.txt)
48 | // 2) Master (e.g., go run wc.go master localhost:7777 x1.txt .. xN.txt)
49 | // 3) Worker (e.g., go run wc.go worker localhost:7777 localhost:7778 &)
50 | // 4...) File name list
51 | func main() {
52 | 	if len(os.Args) < 4 {
53 | 		fmt.Printf("%s: see usage comments in file\n", os.Args[0])
54 | 	} else if os.Args[1] == "master" {
55 | 		var mr *mapreduce.Master
56 | 		if os.Args[2] == "sequential" {
57 | 			mr = mapreduce.Sequential("wcseq", os.Args[3:], 3, mapF, reduceF)
58 | 		} else {
59 | 			mr = mapreduce.Distributed("wcseq", os.Args[3:], 3, os.Args[2])
60 | 		}
61 | 		mr.Wait()
62 | 	} else {
63 | 		mapreduce.RunWorker(os.Args[2], os.Args[3], mapF, reduceF, 100, nil)
64 | 	}
65 | }
66 | 


--------------------------------------------------------------------------------
/src/mapreduce/common.go:
--------------------------------------------------------------------------------
 1 | package mapreduce
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"strconv"
 6 | )
 7 | 
 8 | // Debugging enabled?
 9 | const debugEnabled = false
10 | 
11 | // debug() will only print if debugEnabled is true
12 | func debug(format string, a ...interface{}) (n int, err error) {
13 | 	if debugEnabled {
14 | 		n, err = fmt.Printf(format, a...)
15 | 	}
16 | 	return
17 | }
18 | 
19 | // jobPhase indicates whether a task is scheduled as a map or reduce task.
20 | type jobPhase string
21 | 
22 | const (
23 | 	mapPhase    jobPhase = "mapPhase"
24 | 	reducePhase          = "reducePhase"
25 | )
26 | 
27 | // KeyValue is a type used to hold the key/value pairs passed to the map and
28 | // reduce functions.
29 | type KeyValue struct {
30 | 	Key   string
31 | 	Value string
32 | }
33 | 
34 | // reduceName constructs the name of the intermediate file which map task
35 | // <mapTask> produces for reduce task <reduceTask>.
36 | func reduceName(jobName string, mapTask int, reduceTask int) string {
37 | 	return "mrtmp." + jobName + "-" + strconv.Itoa(mapTask) + "-" + strconv.Itoa(reduceTask)
38 | }
39 | 
40 | // mergeName constructs the name of the output file of reduce task <reduceTask>
41 | func mergeName(jobName string, reduceTask int) string {
42 | 	return "mrtmp." + jobName + "-res-" + strconv.Itoa(reduceTask)
43 | }
44 | 


--------------------------------------------------------------------------------
/src/mapreduce/common_map.go:
--------------------------------------------------------------------------------
 1 | package mapreduce
 2 | 
 3 | import (
 4 | 	"hash/fnv"
 5 | 	"encoding/json"
 6 | 	"io/ioutil"
 7 | 	"fmt"
 8 | 	"os"
 9 | )
10 | 
11 | // it is called for each input file
12 | func doMap(
13 | 	jobName string, // the name of the MapReduce job
14 | 	mapTask int, // which map task this is
15 | 	inFile string,
16 | 	nReduce int, // the number of reduce task that will be run ("R" in the paper)
17 | 	mapF func(filename string, contents string) []KeyValue, // use defined function
18 | ) {
19 |     contents, err := ioutil.ReadFile(inFile)
20 |     if err != nil {
21 |         fmt.Println("ioutil.ReadFile error " + inFile)
22 |         return
23 |     }
24 | 
25 |     // produce key/value pairs from file
26 |     kvs := mapF(inFile, string(contents))
27 |     for _ , kv := range kvs {
28 |         reduceTask := ihash(kv.Key) % nReduce
29 |         var intermediate string = reduceName(jobName, mapTask, reduceTask) // intermediate file
30 |         f, err := os.OpenFile(intermediate, os.O_APPEND|os.O_CREATE|os.O_RDWR, 0666)
31 |         if err != nil {
32 |             fmt.Println("open err " + err.Error())
33 |         }
34 | 
35 |         enc := json.NewEncoder(f)
36 |         enc.Encode(&kv)
37 |         f.Close()
38 |     }
39 | 	//
40 | 	// doMap manages one map task: it should read one of the input files
41 | 	// (inFile), call the user-defined map function (mapF) for that file's
42 | 	// contents, and partition mapF's output into nReduce intermediate files.
43 | 	//
44 | 	// There is one intermediate file per reduce task. The file name
45 | 	// includes both the map task number and the reduce task number. Use
46 | 	// the filename generated by reduceName(jobName, mapTask, r)
47 | 	// as the intermediate file for reduce task r. Call ihash() (see
48 | 	// below) on each key, mod nReduce, to pick r for a key/value pair.
49 | 	//
50 | 	// mapF() is the map function provided by the application. The first
51 | 	// argument should be the input file name, though the map function
52 | 	// typically ignores it. The second argument should be the entire
53 | 	// input file contents. mapF() returns a slice containing the
54 | 	// key/value pairs for reduce; see common.go for the definition of
55 | 	// KeyValue.
56 | 	//
57 | 	// Look at Go's ioutil and os packages for functions to read
58 | 	// and write files.
59 | 	//
60 | 	// Coming up with a scheme for how to format the key/value pairs on
61 | 	// disk can be tricky, especially when taking into account that both
62 | 	// keys and values could contain newlines, quotes, and any other
63 | 	// character you can think of.
64 | 	//
65 | 	// One format often used for serializing data to a byte stream that the
66 | 	// other end can correctly reconstruct is JSON. You are not required to
67 | 	// use JSON, but as the output of the reduce tasks *must* be JSON,
68 | 	// familiarizing yourself with it here may prove useful. You can write
69 | 	// out a data structure as a JSON string to a file using the commented
70 | 	// code below. The corresponding decoding functions can be found in
71 | 	// common_reduce.go.
72 | 	//
73 | 	//   enc := json.NewEncoder(file)
74 | 	//   for _, kv := ... {
75 | 	//     err := enc.Encode(&kv)
76 | 	//
77 | 	// Remember to close the file after you have written all the values!
78 | 	//
79 | 	// Your code here (Part I).
80 | 	//
81 | }
82 | 
83 | func ihash(s string) int {
84 | 	h := fnv.New32a()
85 | 	h.Write([]byte(s))
86 | 	return int(h.Sum32() & 0x7fffffff)
87 | }
88 | 


--------------------------------------------------------------------------------
/src/mapreduce/common_reduce.go:
--------------------------------------------------------------------------------
  1 | package mapreduce
  2 | 
  3 | import (
  4 |     "fmt"
  5 |     "sort"
  6 |     "encoding/json"
  7 |     "os"
  8 | )
  9 | 
 10 | // called for each reduce worker
 11 | func doReduce(
 12 | 	jobName string, // the name of the whole MapReduce job
 13 | 	reduceTask int, // which reduce task this is
 14 | 	outFile string, // write the output here
 15 | 	nMap int, // the number of map tasks that were run ("M" in the paper)
 16 | 	reduceF func(key string, values []string) string,
 17 | ) {
 18 |     var kvs = make(map[string] []string) // unordered_map, key --> value list
 19 |     var keys []string // for sort keys
 20 | 
 21 |     for i := 0; i < nMap; i++ {
 22 |         var tmpFile string = reduceName(jobName, i, reduceTask) // the intermediate file from map task
 23 | 
 24 |         fmt.Println("doReduce read intermediate file:" + tmpFile)
 25 |         f, err := os.OpenFile(tmpFile, os.O_RDONLY, 0)
 26 |         if err != nil {
 27 |             fmt.Println(tmpFile + " opened error: " + err.Error())
 28 |         } else {
 29 |             dec := json.NewDecoder(f)
 30 |             for {
 31 |                 var kv KeyValue
 32 |                 err = dec.Decode(&kv)
 33 |                 if err != nil {
 34 |                     break
 35 |                 }
 36 | 
 37 |                 if _, ok := kvs[kv.Key]; !ok {
 38 |                     keys = append(keys, kv.Key) // new key
 39 |                 }
 40 | 
 41 |                 kvs[kv.Key] = append(kvs[kv.Key], kv.Value) // value list
 42 |             }
 43 |         }
 44 | 
 45 |         sort.Strings(keys)
 46 |         out, err := os.OpenFile(outFile, os.O_APPEND|os.O_CREATE|os.O_RDWR,0666)
 47 |         if err != nil {
 48 |             fmt.Println("Create file failed:" + outFile)
 49 |             return
 50 |         }
 51 | 
 52 |         enc := json.NewEncoder(out)
 53 |         for _, key := range keys {
 54 |             // call reduceF on each sorted key
 55 |             v := reduceF(key, kvs[key])
 56 |             // output to reduce dest file
 57 |             if err = enc.Encode(KeyValue{key, v}); err != nil {
 58 |                 fmt.Println("write [key: " + key + "] to file failed:" + outFile)
 59 |             }
 60 |         }
 61 |         out.Close()
 62 |     }
 63 |     fmt.Println("reduce Out file " + outFile)
 64 | 	//
 65 | 	// doReduce manages one reduce task: it should read the intermediate
 66 | 	// files for the task, sort the intermediate key/value pairs by key,
 67 | 	// call the user-defined reduce function (reduceF) for each key, and
 68 | 	// write reduceF's output to disk.
 69 | 	//
 70 | 	// You'll need to read one intermediate file from each map task;
 71 | 	// reduceName(jobName, m, reduceTask) yields the file
 72 | 	// name from map task m.
 73 | 	//
 74 | 	// Your doMap() encoded the key/value pairs in the intermediate
 75 | 	// files, so you will need to decode them. If you used JSON, you can
 76 | 	// read and decode by creating a decoder and repeatedly calling
 77 | 	// .Decode(&kv) on it until it returns an error.
 78 | 	//
 79 | 	// You may find the first example in the golang sort package
 80 | 	// documentation useful.
 81 | 	//
 82 | 	// reduceF() is the application's reduce function. You should
 83 | 	// call it once per distinct key, with a slice of all the values
 84 | 	// for that key. reduceF() returns the reduced value for that key.
 85 | 	//
 86 | 	// You should write the reduce output as JSON encoded KeyValue
 87 | 	// objects to the file named outFile. We require you to use JSON
 88 | 	// because that is what the merger than combines the output
 89 | 	// from all the reduce tasks expects. There is nothing special about
 90 | 	// JSON -- it is just the marshalling format we chose to use. Your
 91 | 	// output code will look something like this:
 92 | 	//
 93 | 	// enc := json.NewEncoder(file)
 94 | 	// for key := ... {
 95 | 	// 	enc.Encode(KeyValue{key, reduceF(...)})
 96 | 	// }
 97 | 	// file.Close()
 98 | 	//
 99 | 	// Your code here (Part I).
100 | 	//
101 | }
102 | 


--------------------------------------------------------------------------------
/src/mapreduce/common_rpc.go:
--------------------------------------------------------------------------------
 1 | package mapreduce
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"net/rpc"
 6 | )
 7 | 
 8 | // What follows are RPC types and methods.
 9 | // Field names must start with capital letters, otherwise RPC will break.
10 | 
11 | // DoTaskArgs holds the arguments that are passed to a worker when a job is
12 | // scheduled on it.
13 | type DoTaskArgs struct {
14 | 	JobName    string
15 | 	File       string   // only for map, the input file
16 | 	Phase      jobPhase // are we in mapPhase or reducePhase?
17 | 	TaskNumber int      // this task's index in the current phase
18 | 
19 | 	// NumOtherPhase is the total number of tasks in other phase; mappers
20 | 	// need this to compute the number of output bins, and reducers needs
21 | 	// this to know how many input files to collect.
22 | 	NumOtherPhase int
23 | }
24 | 
25 | // ShutdownReply is the response to a WorkerShutdown.
26 | // It holds the number of tasks this worker has processed since it was started.
27 | type ShutdownReply struct {
28 | 	Ntasks int
29 | }
30 | 
31 | // RegisterArgs is the argument passed when a worker registers with the master.
32 | type RegisterArgs struct {
33 | 	Worker string // the worker's UNIX-domain socket name, i.e. its RPC address
34 | }
35 | 
36 | // call() sends an RPC to the rpcname handler on server srv
37 | // with arguments args, waits for the reply, and leaves the
38 | // reply in reply. the reply argument should be the address
39 | // of a reply structure.
40 | //
41 | // call() returns true if the server responded, and false if call()
42 | // received no reply from the server. reply's contents are valid if
43 | // and only if call() returned true.
44 | //
45 | // you should assume that call() will time out and return
46 | // false after a while if it doesn't get a reply from the server.
47 | //
48 | // please use call() to send all RPCs. please don't change this
49 | // function.
50 | //
51 | func call(srv string, rpcname string,
52 | 	args interface{}, reply interface{}) bool {
53 | 	c, errx := rpc.Dial("unix", srv)
54 | 	if errx != nil {
55 | 		return false
56 | 	}
57 | 	defer c.Close()
58 | 
59 | 	err := c.Call(rpcname, args, reply)
60 | 	if err == nil {
61 | 		return true
62 | 	}
63 | 
64 | 	fmt.Println(err)
65 | 	return false
66 | }
67 | 


--------------------------------------------------------------------------------
/src/mapreduce/master.go:
--------------------------------------------------------------------------------
  1 | package mapreduce
  2 | 
  3 | //
  4 | // Please do not modify this file.
  5 | //
  6 | 
  7 | import (
  8 | 	"fmt"
  9 | 	"net"
 10 | 	"sync"
 11 | )
 12 | 
 13 | // Master holds all the state that the master needs to keep track of.
 14 | type Master struct {
 15 | 	sync.Mutex
 16 | 
 17 | 	address     string
 18 | 	doneChannel chan bool
 19 | 
 20 | 	// protected by the mutex
 21 | 	newCond *sync.Cond // signals when Register() adds to workers[]
 22 | 	workers []string   // each worker's UNIX-domain socket name -- its RPC address
 23 | 
 24 | 	// Per-task information
 25 | 	jobName string   // Name of currently executing job
 26 | 	files   []string // Input files
 27 | 	nReduce int      // Number of reduce partitions
 28 | 
 29 | 	shutdown chan struct{}
 30 | 	l        net.Listener
 31 | 	stats    []int
 32 | }
 33 | 
 34 | // Register is an RPC method that is called by workers after they have started
 35 | // up to report that they are ready to receive tasks.
 36 | func (mr *Master) Register(args *RegisterArgs, _ *struct{}) error {
 37 | 	mr.Lock()
 38 | 	defer mr.Unlock()
 39 | 	debug("Register: worker %s\n", args.Worker)
 40 | 	mr.workers = append(mr.workers, args.Worker)
 41 | 
 42 | 	// tell forwardRegistrations() that there's a new workers[] entry.
 43 | 	mr.newCond.Broadcast()
 44 | 
 45 | 	return nil
 46 | }
 47 | 
 48 | // newMaster initializes a new Map/Reduce Master
 49 | func newMaster(master string) (mr *Master) {
 50 | 	mr = new(Master)
 51 | 	mr.address = master
 52 | 	mr.shutdown = make(chan struct{})
 53 | 	mr.newCond = sync.NewCond(mr)
 54 | 	mr.doneChannel = make(chan bool)
 55 | 	return
 56 | }
 57 | 
 58 | // Sequential runs map and reduce tasks sequentially, waiting for each task to
 59 | // complete before running the next.
 60 | func Sequential(jobName string, files []string, nreduce int,
 61 | 	mapF func(string, string) []KeyValue,
 62 | 	reduceF func(string, []string) string,
 63 | ) (mr *Master) { // return mr as Master*
 64 | 	mr = newMaster("master")
 65 | 	go mr.run(jobName, files, nreduce, func(phase jobPhase) { // schedule function
 66 | 		switch phase {
 67 | 		case mapPhase:
 68 |             // for each file, call your mapF on it
 69 | 			for i, f := range mr.files {
 70 | 				doMap(mr.jobName, i, f, mr.nReduce, mapF)
 71 | 			}
 72 | 		case reducePhase:
 73 |             // nMap = len(mr.files), there are nMap * nReduce intermediate Files
 74 | 			for i := 0; i < mr.nReduce; i++ { // i is the reduce Task ID
 75 | 				doReduce(mr.jobName, i, mergeName(mr.jobName, i), len(mr.files), reduceF)
 76 | 			}
 77 | 		}
 78 | 	}, func() { // finish func
 79 | 		mr.stats = []int{len(files) + nreduce}
 80 | 	})
 81 | 	return
 82 | }
 83 | 
 84 | // helper function that sends information about all existing
 85 | // and newly registered workers to channel ch. schedule()
 86 | // reads ch to learn about workers.
 87 | func (mr *Master) forwardRegistrations(ch chan string) {
 88 | 	i := 0
 89 | 	for {
 90 | 		mr.Lock()
 91 | 		if len(mr.workers) > i { // Register RPC will append worker list
 92 | 			// there's a worker that we haven't told schedule() about.
 93 | 			w := mr.workers[i]
 94 | 			go func() { ch <- w }() // send without holding the lock.
 95 | 			i = i + 1
 96 | 		} else {
 97 | 			// wait for Register() to add an entry to workers[]
 98 | 			// in response to an RPC from a new worker.
 99 | 			mr.newCond.Wait()
100 | 		}
101 | 		mr.Unlock()
102 | 	}
103 | }
104 | 
105 | // Distributed schedules map and reduce tasks on workers that register with the
106 | // master over RPC.
107 | func Distributed(jobName string, files []string, nreduce int, master string) (mr *Master) {
108 | 	mr = newMaster(master)
109 | 	mr.startRPCServer()
110 | 	go mr.run(jobName, files, nreduce,
111 | 		func(phase jobPhase) {
112 | 			ch := make(chan string)
113 | 			go mr.forwardRegistrations(ch)
114 | 			schedule(mr.jobName, mr.files, mr.nReduce, phase, ch)
115 | 		},
116 | 		func() {
117 | 			mr.stats = mr.killWorkers()
118 | 			mr.stopRPCServer()
119 | 		})
120 | 	return
121 | }
122 | 
123 | // run executes a mapreduce job on the given number of mappers and reducers.
124 | //
125 | // First, it divides up the input file among the given number of mappers, and
126 | // schedules each task on workers as they become available. Each map task bins
127 | // its output in a number of bins equal to the given number of reduce tasks.
128 | // Once all the mappers have finished, workers are assigned reduce tasks.
129 | //
130 | // When all tasks have been completed, the reducer outputs are merged,
131 | // statistics are collected, and the master is shut down.
132 | //
133 | // Note that this implementation assumes a shared file system.
134 | func (mr *Master) run(jobName string, files []string, nreduce int,
135 | 	schedule func(phase jobPhase),
136 | 	finish func(),
137 | ) {
138 | 	mr.jobName = jobName
139 | 	mr.files = files
140 | 	mr.nReduce = nreduce
141 | 
142 | 	fmt.Printf("%s: Starting Map/Reduce task %s\n", mr.address, mr.jobName)
143 | 
144 | 	schedule(mapPhase)
145 | 	schedule(reducePhase)
146 | 	finish()
147 | 	mr.merge()
148 | 
149 | 	fmt.Printf("%s: Map/Reduce task completed\n", mr.address)
150 | 
151 | 	mr.doneChannel <- true
152 | }
153 | 
154 | // Wait blocks until the currently scheduled work has completed.
155 | // This happens when all tasks have scheduled and completed, the final output
156 | // have been computed, and all workers have been shut down.
157 | func (mr *Master) Wait() {
158 | 	<-mr.doneChannel
159 | }
160 | 
161 | // killWorkers cleans up all workers by sending each one a Shutdown RPC.
162 | // It also collects and returns the number of tasks each worker has performed.
163 | func (mr *Master) killWorkers() []int {
164 | 	mr.Lock()
165 | 	defer mr.Unlock()
166 | 	ntasks := make([]int, 0, len(mr.workers))
167 | 	for _, w := range mr.workers {
168 | 		debug("Master: shutdown worker %s\n", w)
169 | 		var reply ShutdownReply
170 | 		ok := call(w, "Worker.Shutdown", new(struct{}), &reply)
171 | 		if ok == false {
172 | 			fmt.Printf("Master: RPC %s shutdown error\n", w)
173 | 		} else {
174 | 			ntasks = append(ntasks, reply.Ntasks)
175 | 		}
176 | 	}
177 | 	return ntasks
178 | }
179 | 


--------------------------------------------------------------------------------
/src/mapreduce/master_rpc.go:
--------------------------------------------------------------------------------
 1 | package mapreduce
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"log"
 6 | 	"net"
 7 | 	"net/rpc"
 8 | 	"os"
 9 | )
10 | 
11 | // Shutdown is an RPC method that shuts down the Master's RPC server.
12 | func (mr *Master) Shutdown(_, _ *struct{}) error {
13 | 	debug("Shutdown: registration server\n")
14 | 	close(mr.shutdown)
15 | 	mr.l.Close() // causes the Accept to fail
16 | 	return nil
17 | }
18 | 
19 | // startRPCServer starts the Master's RPC server. It continues accepting RPC
20 | // calls (Register in particular) for as long as the worker is alive.
21 | func (mr *Master) startRPCServer() {
22 | 	rpcs := rpc.NewServer()
23 | 	rpcs.Register(mr)
24 | 	os.Remove(mr.address) // only needed for "unix"
25 | 	l, e := net.Listen("unix", mr.address)
26 | 	if e != nil {
27 | 		log.Fatal("RegstrationServer", mr.address, " error: ", e)
28 | 	}
29 | 	mr.l = l
30 | 
31 | 	// now that we are listening on the master address, can fork off
32 | 	// accepting connections to another thread.
33 | 	go func() {
34 | 	loop:
35 | 		for {
36 | 			select {
37 | 			case <-mr.shutdown:
38 | 				break loop
39 | 			default:
40 | 			}
41 | 			conn, err := mr.l.Accept()
42 | 			if err == nil {
43 | 				go func() {
44 |                     rpcs.ServeConn(conn) // default: gob protocol
45 | 					conn.Close()
46 | 				}()
47 | 			} else {
48 | 				debug("RegistrationServer: accept error %v\n", err)
49 | 				break
50 | 			}
51 | 		}
52 | 		debug("RegistrationServer: done\n")
53 | 	}()
54 | }
55 | 
56 | // stopRPCServer stops the master RPC server.
57 | // This must be done through an RPC to avoid race conditions between the RPC
58 | // server thread and the current thread.
59 | func (mr *Master) stopRPCServer() {
60 | 	var reply ShutdownReply
61 | 	ok := call(mr.address, "Master.Shutdown", new(struct{}), &reply)
62 | 	if ok == false {
63 | 		fmt.Printf("Cleanup: RPC %s error\n", mr.address)
64 | 	}
65 | 	debug("cleanupRegistration: done\n")
66 | }
67 | 


--------------------------------------------------------------------------------
/src/mapreduce/master_splitmerge.go:
--------------------------------------------------------------------------------
 1 | package mapreduce
 2 | 
 3 | import (
 4 | 	"bufio"
 5 | 	"encoding/json"
 6 | 	"fmt"
 7 | 	"log"
 8 | 	"os"
 9 | 	"sort"
10 | )
11 | 
12 | // merge combines the results of the many reduce jobs into a single output file
13 | // XXX use merge sort
14 | func (mr *Master) merge() {
15 | 	debug("Merge phase")
16 | 	kvs := make(map[string]string)
17 | 	for i := 0; i < mr.nReduce; i++ {
18 | 		p := mergeName(mr.jobName, i)
19 | 		fmt.Printf("Merge: read %s\n", p)
20 | 		file, err := os.Open(p)
21 | 		if err != nil {
22 | 			log.Fatal("Merge: ", err)
23 | 		}
24 | 		dec := json.NewDecoder(file)
25 | 		for {
26 | 			var kv KeyValue
27 | 			err = dec.Decode(&kv)
28 | 			if err != nil {
29 | 				break
30 | 			}
31 | 			kvs[kv.Key] = kv.Value
32 | 		}
33 | 		file.Close()
34 | 	}
35 | 	var keys []string
36 | 	for k := range kvs {
37 | 		keys = append(keys, k)
38 | 	}
39 | 	sort.Strings(keys)
40 | 
41 | 	file, err := os.Create("mrtmp." + mr.jobName)
42 | 	if err != nil {
43 | 		log.Fatal("Merge: create ", err)
44 | 	}
45 | 	w := bufio.NewWriter(file)
46 | 	for _, k := range keys {
47 | 		fmt.Fprintf(w, "%s: %s\n", k, kvs[k])
48 | 	}
49 | 	w.Flush()
50 | 	file.Close()
51 | }
52 | 
53 | // removeFile is a simple wrapper around os.Remove that logs errors.
54 | func removeFile(n string) {
55 | 	err := os.Remove(n)
56 | 	if err != nil {
57 | 		log.Fatal("CleanupFiles ", err)
58 | 	}
59 | }
60 | 
61 | // CleanupFiles removes all intermediate files produced by running mapreduce.
62 | func (mr *Master) CleanupFiles() {
63 | 	for i := range mr.files {
64 | 		for j := 0; j < mr.nReduce; j++ {
65 | 			removeFile(reduceName(mr.jobName, i, j))
66 | 		}
67 | 	}
68 | 	for i := 0; i < mr.nReduce; i++ {
69 | 		removeFile(mergeName(mr.jobName, i))
70 | 	}
71 | 	removeFile("mrtmp." + mr.jobName)
72 | }
73 | 


--------------------------------------------------------------------------------
/src/mapreduce/schedule.go:
--------------------------------------------------------------------------------
 1 | package mapreduce
 2 | 
 3 | import (
 4 |     "fmt"
 5 |     "sync"
 6 | )
 7 | 
 8 | //
 9 | // schedule() starts and waits for all tasks in the given phase (mapPhase
10 | // or reducePhase). the mapFiles argument holds the names of the files that
11 | // are the inputs to the map phase, one per map task. nReduce is the
12 | // number of reduce tasks. the registerChan argument yields a stream
13 | // of registered workers; each item is the worker's RPC address,
14 | // suitable for passing to call(). registerChan will yield all
15 | // existing registered workers (if any) and new ones as they register.
16 | //
17 | func schedule(jobName string, mapFiles []string, nReduce int, phase jobPhase, registerChan chan string) {
18 | 	var ntasks int
19 | 	var n_other int // number of inputs (for reduce) or outputs (for map)
20 | 	switch phase {
21 | 	case mapPhase:
22 | 		ntasks = len(mapFiles)
23 | 		n_other = nReduce
24 | 	case reducePhase:
25 | 		ntasks = nReduce
26 | 		n_other = len(mapFiles)
27 | 	}
28 | 
29 | 	fmt.Printf("Schedule: %v %v tasks (%d I/Os)\n", ntasks, phase, n_other)
30 | 
31 | 	// All ntasks tasks have to be scheduled on workers. Once all tasks
32 | 	// have completed successfully, schedule() should return.
33 | 	//
34 | 	// Your code here (Part III, Part IV).
35 | 	//
36 | 
37 | 	// Remember that workers may fail, and that any given worker may finish
38 | 	// multiple tasks.
39 | 
40 | 	// schedule will wait until all worker has done their jobs
41 | 	var wg sync.WaitGroup
42 | 
43 | 	// task id will get from this channel
44 | 	var taskChan = make(chan int)
45 | 	go func() {
46 | 		for i := 0; i < ntasks; i++ {
47 | 			wg.Add(1)
48 | 			taskChan <- i
49 | 		}
50 | 		// wait all workers have done their job, then close taskChan
51 | 		wg.Wait()
52 | 		close(taskChan)
53 | 	}()
54 | 
55 | 	// RPC call parameter
56 | 	var task DoTaskArgs
57 | 	task.JobName = jobName
58 | 	task.NumOtherPhase = n_other
59 | 	task.Phase = phase
60 | 
61 | 	// assign all task to worker
62 | 	for i := range taskChan { // wait a new task
63 | 		// get a worker from register channel
64 | 		worker := <-registerChan
65 | 
66 | 		task.TaskNumber = i
67 | 		if phase == mapPhase {
68 | 			task.File = mapFiles[i]
69 | 		}
70 | 
71 | 		// Note: must use parameter
72 | 		go func(worker string, task DoTaskArgs) {
73 | 			if call(worker, "Worker.DoTask", &task, nil) {
74 | 				// only successful call will call wg.Done()
75 | 				wg.Done()
76 | 
77 | 				// put idle worker back to register channel
78 | 				registerChan <- worker;
79 | 			} else {
80 | 				fmt.Printf("Schedule: assign %s task %v to %s failed", phase,
81 | 					task.TaskNumber, worker)
82 | 
83 | 				// put failed task back to task channel
84 | 				taskChan <- task.TaskNumber
85 | 			}
86 | 		}(worker, task)
87 | 	}
88 | 	fmt.Printf("Schedule: %v done\n", phase)
89 | }
90 | 


--------------------------------------------------------------------------------
/src/mapreduce/test_test.go:
--------------------------------------------------------------------------------
  1 | package mapreduce
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"testing"
  6 | 	"time"
  7 | 
  8 | 	"bufio"
  9 | 	"log"
 10 | 	"os"
 11 | 	"sort"
 12 | 	"strconv"
 13 | 	"strings"
 14 | )
 15 | 
 16 | const (
 17 | 	nNumber = 100000
 18 | 	nMap    = 20
 19 | 	nReduce = 10
 20 | )
 21 | 
 22 | // Create input file with N numbers
 23 | // Check if we have N numbers in output file
 24 | 
 25 | // Split in words to kv pairs
 26 | func MapFunc(file string, value string) (res []KeyValue) {
 27 | 	debug("MapFunc %s\n", value)
 28 | 	words := strings.Fields(value)
 29 | 	for _, w := range words {
 30 | 		kv := KeyValue{w, ""}
 31 | 		res = append(res, kv)
 32 | 	}
 33 | 	return
 34 | }
 35 | 
 36 | // Just return key
 37 | func ReduceFunc(key string, values []string) string {
 38 | 	for _, e := range values {
 39 | 		debug("Reduce %s %v\n", key, e)
 40 | 	}
 41 | 	return ""
 42 | }
 43 | 
 44 | // Checks input file agaist output file: each input number should show up
 45 | // in the output file in string sorted order
 46 | func check(t *testing.T, files []string) {
 47 | 	output, err := os.Open("mrtmp.test")
 48 | 	if err != nil {
 49 | 		log.Fatal("check: ", err)
 50 | 	}
 51 | 	defer output.Close()
 52 | 
 53 | 	var lines []string
 54 | 	for _, f := range files {
 55 | 		input, err := os.Open(f)
 56 | 		if err != nil {
 57 | 			log.Fatal("check: ", err)
 58 | 		}
 59 | 		defer input.Close()
 60 | 		inputScanner := bufio.NewScanner(input)
 61 | 		for inputScanner.Scan() {
 62 | 			lines = append(lines, inputScanner.Text())
 63 | 		}
 64 | 	}
 65 | 
 66 | 	sort.Strings(lines)
 67 | 
 68 | 	outputScanner := bufio.NewScanner(output)
 69 | 	i := 0
 70 | 	for outputScanner.Scan() {
 71 | 		var v1 int
 72 | 		var v2 int
 73 | 		text := outputScanner.Text()
 74 | 		n, err := fmt.Sscanf(lines[i], "%d", &v1)
 75 | 		if n == 1 && err == nil {
 76 | 			n, err = fmt.Sscanf(text, "%d", &v2)
 77 | 		}
 78 | 		if err != nil || v1 != v2 {
 79 | 			t.Fatalf("line %d: %d != %d err %v\n", i, v1, v2, err)
 80 | 		}
 81 | 		i++
 82 | 	}
 83 | 	if i != nNumber {
 84 | 		t.Fatalf("Expected %d lines in output\n", nNumber)
 85 | 	}
 86 | }
 87 | 
 88 | // Workers report back how many RPCs they have processed in the Shutdown reply.
 89 | // Check that they processed at least 1 DoTask RPC.
 90 | func checkWorker(t *testing.T, l []int) {
 91 | 	for _, tasks := range l {
 92 | 		if tasks == 0 {
 93 | 			t.Fatalf("A worker didn't do any work\n")
 94 | 		}
 95 | 	}
 96 | }
 97 | 
 98 | // Make input file
 99 | func makeInputs(num int) []string {
100 | 	var names []string
101 | 	var i = 0
102 | 	for f := 0; f < num; f++ {
103 | 		names = append(names, fmt.Sprintf("824-mrinput-%d.txt", f))
104 | 		file, err := os.Create(names[f])
105 | 		if err != nil {
106 | 			log.Fatal("mkInput: ", err)
107 | 		}
108 | 		w := bufio.NewWriter(file)
109 | 		for i < (f+1)*(nNumber/num) {
110 | 			fmt.Fprintf(w, "%d\n", i)
111 | 			i++
112 | 		}
113 | 		w.Flush()
114 | 		file.Close()
115 | 	}
116 | 	return names
117 | }
118 | 
119 | // Cook up a unique-ish UNIX-domain socket name
120 | // in /var/tmp. can't use current directory since
121 | // AFS doesn't support UNIX-domain sockets.
122 | func port(suffix string) string {
123 | 	s := "/var/tmp/824-"
124 | 	s += strconv.Itoa(os.Getuid()) + "/"
125 | 	os.Mkdir(s, 0777)
126 | 	s += "mr"
127 | 	s += strconv.Itoa(os.Getpid()) + "-"
128 | 	s += suffix
129 | 	return s
130 | }
131 | 
132 | func setup() *Master {
133 | 	files := makeInputs(nMap)
134 | 	master := port("master")
135 |     // start rpc and run master
136 | 	mr := Distributed("test", files, nReduce, master)
137 | 	return mr
138 | }
139 | 
140 | func cleanup(mr *Master) {
141 | 	mr.CleanupFiles()
142 | 	for _, f := range mr.files {
143 | 		removeFile(f)
144 | 	}
145 | }
146 | 
147 | // lab1.1
148 | func TestSequentialSingle(t *testing.T) {
149 | 	mr := Sequential("test", makeInputs(1), 1, MapFunc, ReduceFunc)
150 | 	mr.Wait()
151 | 	check(t, mr.files)
152 | 	checkWorker(t, mr.stats)
153 | 	cleanup(mr)
154 | }
155 | 
156 | // lab1.1
157 | func TestSequentialMany(t *testing.T) {
158 | 	mr := Sequential("test", makeInputs(5), 3, MapFunc, ReduceFunc)
159 | 	mr.Wait()
160 | 	check(t, mr.files)
161 | 	checkWorker(t, mr.stats)
162 | 	cleanup(mr)
163 | }
164 | 
165 | // lab 1.3
166 | func TestParallelBasic(t *testing.T) {
167 | 	mr := setup()
168 | 	for i := 0; i < 2; i++ {
169 | 		go RunWorker(mr.address, port("worker"+strconv.Itoa(i)),
170 | 			MapFunc, ReduceFunc, -1, nil)
171 | 	}
172 | 	mr.Wait()
173 | 	check(t, mr.files)
174 | 	checkWorker(t, mr.stats)
175 | 	cleanup(mr)
176 | }
177 | 
178 | func TestParallelCheck(t *testing.T) {
179 | 	mr := setup()
180 | 	parallelism := &Parallelism{}
181 | 	for i := 0; i < 2; i++ {
182 | 		go RunWorker(mr.address, port("worker"+strconv.Itoa(i)),
183 | 			MapFunc, ReduceFunc, -1, parallelism)
184 | 	}
185 | 	mr.Wait()
186 | 	check(t, mr.files)
187 | 	checkWorker(t, mr.stats)
188 | 
189 | 	parallelism.mu.Lock()
190 | 	if parallelism.max < 2 {
191 | 		t.Fatalf("workers did not execute in parallel")
192 | 	}
193 | 	parallelism.mu.Unlock()
194 | 
195 | 	cleanup(mr)
196 | }
197 | 
198 | func TestOneFailure(t *testing.T) {
199 | 	mr := setup()
200 | 	// Start 2 workers that fail after 10 tasks
201 | 	go RunWorker(mr.address, port("worker"+strconv.Itoa(0)),
202 | 		MapFunc, ReduceFunc, 10, nil)
203 | 	go RunWorker(mr.address, port("worker"+strconv.Itoa(1)),
204 | 		MapFunc, ReduceFunc, -1, nil)
205 | 	mr.Wait()
206 | 	check(t, mr.files)
207 | 	checkWorker(t, mr.stats)
208 | 	cleanup(mr)
209 | }
210 | 
211 | func TestManyFailures(t *testing.T) {
212 | 	mr := setup()
213 | 	i := 0
214 | 	done := false
215 | 	for !done {
216 | 		select {
217 | 		case done = <-mr.doneChannel:
218 | 			check(t, mr.files)
219 | 			cleanup(mr)
220 | 			break
221 | 		default:
222 | 			// Start 2 workers each sec. The workers fail after 10 tasks
223 | 			w := port("worker" + strconv.Itoa(i))
224 | 			go RunWorker(mr.address, w, MapFunc, ReduceFunc, 10, nil)
225 | 			i++
226 | 			w = port("worker" + strconv.Itoa(i))
227 | 			go RunWorker(mr.address, w, MapFunc, ReduceFunc, 10, nil)
228 | 			i++
229 | 			time.Sleep(1 * time.Second)
230 | 		}
231 | 	}
232 | }
233 | 


--------------------------------------------------------------------------------
/src/mapreduce/worker.go:
--------------------------------------------------------------------------------
  1 | package mapreduce
  2 | 
  3 | //
  4 | // Please do not modify this file.
  5 | //
  6 | 
  7 | import (
  8 | 	"fmt"
  9 | 	"log"
 10 | 	"net"
 11 | 	"net/rpc"
 12 | 	"os"
 13 | 	"sync"
 14 | 	"time"
 15 | )
 16 | 
 17 | // track whether workers executed in parallel.
 18 | type Parallelism struct {
 19 | 	mu  sync.Mutex
 20 | 	now int32
 21 | 	max int32
 22 | }
 23 | 
 24 | // Worker holds the state for a server waiting for DoTask or Shutdown RPCs
 25 | type Worker struct {
 26 | 	sync.Mutex
 27 | 
 28 | 	name        string
 29 | 	Map         func(string, string) []KeyValue
 30 | 	Reduce      func(string, []string) string
 31 | 	nRPC        int // quit after this many RPCs; protected by mutex
 32 | 	nTasks      int // total tasks executed; protected by mutex
 33 | 	concurrent  int // number of parallel DoTasks in this worker; mutex
 34 | 	l           net.Listener
 35 | 	parallelism *Parallelism
 36 | }
 37 | 
 38 | // DoTask is called by the master when a new task is being scheduled on this
 39 | // worker.
 40 | func (wk *Worker) DoTask(arg *DoTaskArgs, _ *struct{}) error {
 41 | 	fmt.Printf("%s: given %v task #%d on file %s (nios: %d)\n",
 42 | 		wk.name, arg.Phase, arg.TaskNumber, arg.File, arg.NumOtherPhase)
 43 | 
 44 | 	wk.Lock()
 45 | 	wk.nTasks += 1
 46 | 	wk.concurrent += 1
 47 | 	nc := wk.concurrent
 48 | 	wk.Unlock()
 49 | 
 50 | 	if nc > 1 {
 51 | 		// schedule() should never issue more than one RPC at a
 52 | 		// time to a given worker.
 53 | 		log.Fatal("Worker.DoTask: more than one DoTask sent concurrently to a single worker\n")
 54 | 	}
 55 | 
 56 | 	pause := false
 57 | 	if wk.parallelism != nil {
 58 | 		wk.parallelism.mu.Lock()
 59 | 		wk.parallelism.now += 1
 60 | 		if wk.parallelism.now > wk.parallelism.max {
 61 | 			wk.parallelism.max = wk.parallelism.now
 62 | 		}
 63 | 		if wk.parallelism.max < 2 {
 64 | 			pause = true
 65 | 		}
 66 | 		wk.parallelism.mu.Unlock()
 67 | 	}
 68 | 
 69 | 	if pause {
 70 | 		// give other workers a chance to prove that
 71 | 		// they are executing in parallel.
 72 | 		time.Sleep(time.Second)
 73 | 	}
 74 | 
 75 | 	switch arg.Phase {
 76 | 	case mapPhase:
 77 | 		doMap(arg.JobName, arg.TaskNumber, arg.File, arg.NumOtherPhase, wk.Map)
 78 | 	case reducePhase:
 79 | 		doReduce(arg.JobName, arg.TaskNumber, mergeName(arg.JobName, arg.TaskNumber), arg.NumOtherPhase, wk.Reduce)
 80 | 	}
 81 | 
 82 | 	wk.Lock()
 83 | 	wk.concurrent -= 1
 84 | 	wk.Unlock()
 85 | 
 86 | 	if wk.parallelism != nil {
 87 | 		wk.parallelism.mu.Lock()
 88 | 		wk.parallelism.now -= 1
 89 | 		wk.parallelism.mu.Unlock()
 90 | 	}
 91 | 
 92 | 	fmt.Printf("%s: %v task #%d done\n", wk.name, arg.Phase, arg.TaskNumber)
 93 | 	return nil
 94 | }
 95 | 
 96 | // Shutdown is called by the master when all work has been completed.
 97 | // We should respond with the number of tasks we have processed.
 98 | func (wk *Worker) Shutdown(_ *struct{}, res *ShutdownReply) error {
 99 | 	debug("Shutdown %s\n", wk.name)
100 | 	wk.Lock()
101 | 	defer wk.Unlock()
102 | 	res.Ntasks = wk.nTasks
103 | 	wk.nRPC = 1
104 | 	return nil
105 | }
106 | 
107 | // Tell the master we exist and ready to work
108 | func (wk *Worker) register(master string) {
109 | 	args := new(RegisterArgs)
110 | 	args.Worker = wk.name
111 | 	ok := call(master, "Master.Register", args, new(struct{}))
112 | 	if ok == false {
113 | 		fmt.Printf("Register: RPC %s register error\n", master)
114 | 	}
115 | }
116 | 
117 | // RunWorker sets up a connection with the master, registers its address, and
118 | // waits for tasks to be scheduled.
119 | func RunWorker(MasterAddress string, me string,
120 | 	MapFunc func(string, string) []KeyValue,
121 | 	ReduceFunc func(string, []string) string,
122 | 	nRPC int, parallelism *Parallelism,
123 | ) {
124 | 	debug("RunWorker %s\n", me)
125 | 	wk := new(Worker)
126 | 	wk.name = me
127 | 	wk.Map = MapFunc
128 | 	wk.Reduce = ReduceFunc
129 | 	wk.nRPC = nRPC
130 | 	wk.parallelism = parallelism
131 | 	rpcs := rpc.NewServer()
132 | 	rpcs.Register(wk)
133 | 	os.Remove(me) // only needed for "unix"
134 | 	l, e := net.Listen("unix", me)
135 | 	if e != nil {
136 | 		log.Fatal("RunWorker: worker ", me, " error: ", e)
137 | 	}
138 | 	wk.l = l
139 | 	wk.register(MasterAddress)
140 | 
141 | 	// DON'T MODIFY CODE BELOW
142 | 	for {
143 | 		wk.Lock()
144 | 		if wk.nRPC == 0 {
145 | 			wk.Unlock()
146 | 			break
147 | 		}
148 | 		wk.Unlock()
149 | 		conn, err := wk.l.Accept()
150 | 		if err == nil {
151 | 			wk.Lock()
152 | 			wk.nRPC--
153 | 			wk.Unlock()
154 | 			go rpcs.ServeConn(conn)
155 | 		} else {
156 | 			break
157 | 		}
158 | 	}
159 | 	wk.l.Close()
160 | 	debug("RunWorker %s exit\n", me)
161 | }
162 | 


--------------------------------------------------------------------------------
/src/raft/README.md:
--------------------------------------------------------------------------------
 1 | # Lab2 Raft
 2 | 
 3 | ## Lab Part 2A
 4 | 
 5 |         Implement leader election and heartbeats (AppendEntries RPCs with no log entries). The goal for Part 2A is for a single leader to be elected, for
 6 | the leader to remain the leader if there are no failures, and for a new leader to take over if the old leader fails or if packets to/from the old leader
 7 | are lost. Run go test -run 2A to test your 2A code.
 8 | 
 9 | - raft.go
10 | 1. Init state is Follower
11 | 2. Three goroutines: checkHealthy & election & heartDaemon
12 | 3. checkHealthy is for Follower state, it checks whether leader is active
13 | 4. If leader inactive, then switch to Candidate, start election goroutine
14 | 5. RequestVotes in election routine.
15 | 6. Handle RequestVotes: if Candidate, switchToFollower; if Follower, check if
16 | had voted or same term; if Leader, check if stale leader.
17 | 7. heartbeat routine: send heartbeat periodly, if recv bigger term, switchToFollower.
18 | 
19 | ## Lab Part 2B
20 | 
21 |         Implement the leader and follower code to append new log entries. This will involve implementing Start(), completing the AppendEntries RPC
22 | structs, sending them, fleshing out the AppendEntry RPC handler, and advancing the commitIndex at the leader. Your first goal should be to pass the
23 | TestBasicAgree() test (in test_test.go). Once you have that working, you should get all the 2B tests to pass (go test -run 2B).
24 | 
25 | - raft.go
26 | 1. If recv AppendEntries with multiple new Entries, say 3 entries, and PrevLogIndex is 10, what if rf.log[PrevLogIndex + 0].Term == Entries[0].Term,
27 |     But rf.log[PrevLogIndex + 1].Term != Entries[1].Term, is it possible?
28 | 2. A call to Start() at the leader starts the process of adding a new operation to the log; the leader sends the new operation to the other servers in AppendEntries RPCs.
29 |    sends on the applyCh in a goroutine.
30 | 3. Pay attention that log index is 1-based in thesis.
31 | 
32 | 目前2C的unreliable figure 8测试，约有1%的几率超时
33 | 通过比对正常日志和超时日志，正常日志中，投票被拒绝的记录约67个，异常日志中，该记录有160个：
34 | [me 0] RequestVoteReply from 2, term 123, grant false, votes 2
35 | 上面是异常的最后一条记录，term达到了123；而正常记录中，最大term只有80；
36 | 基本可以断定是没有pre-vote机制导致的，有空加上测试。目前平均100-200次失败一次
37 | 理论上pre vote之后，失败率可以降为0
38 | 但有个疑点是，并没有"bigger heartbeat reply term"，也就是leader发送心跳，却收到失败回复，因为有更大的term在follower中。
39 | 


--------------------------------------------------------------------------------
/src/raft/lab2:
--------------------------------------------------------------------------------
 1 | 
 2 | Implement Raft by adding code to raft/raft.go. In that file you'll find a bit of skeleton code, plus examples of how to send and receive RPCs.
 3 | Your implementation must support the following interface, which the tester and (eventually) your key/value server will use. You'll find more details in comments in raft.go.
 4 | 
 5 | // create a new Raft server instance:
 6 | rf := Make(peers, me, persister, applyCh)
 7 | 
 8 | // start agreement on a new log entry:
 9 | rf.Start(command interface{}) (index, term, isleader)
10 | 
11 | // ask a Raft for its current term, and whether it thinks it is leader
12 | rf.GetState() (term, isLeader)
13 | 
14 | // each time a new entry is committed to the log, each Raft peer
15 | // should send an ApplyMsg to the service (or tester).
16 | type ApplyMsg
17 | A service calls Make(peers,me,…) to create a Raft peer. The peers argument is an array of established RPC connections, one to each Raft peer (including this one). The me argument is the index of this peer in the peers array. Start(command) asks Raft to start the processing to append the command to the replicated log. Start() should return immediately, without waiting for this process to complete. The service expects your implementation to send an ApplyMsg for each new committed log entry to the applyCh argument to Make().
18 | 
19 | Your Raft peers should exchange RPCs using the labrpc Go package that we provide to you. It is modeled after Go's rpc library, but internally uses Go channels rather than sockets. raft.go contains some example code that sends an RPC (sendRequestVote()) and that handles an incoming RPC (RequestVote()). The reason you must use labrpc instead of Go's RPC package is that the tester tells labrpc to delay RPCs, re-order them, and delete them to simulate challenging network conditions under which your code should work correctly. Don't modify labrpc because we will test your code with the labrpc as handed out.
20 | 
21 | This lab may be your first exposure to writing challenging concurrent code and your first implementation may not be clean enough that you can easily reason about its correctness. Give yourself enough time to rewrite your implementation so that you can easily reason about its correctness. Subsequent labs will build on this lab, so it is important to do a good job on your implementation.
22 | 
23 | Part 2A
24 | 
25 | Implement leader election and heartbeats (AppendEntries RPCs with no log entries). The goal for Part 2A is for a single leader to be elected, for the leader to remain the leader if there are no failures, and for a new leader to take over if the old leader fails or if packets to/from the old leader are lost. Run go test -run 2A to test your 2A code.
26 | 
27 | Add any state you need to the Raft struct in raft.go. You'll also need to define a struct to hold information about each log entry. Your code should follow Figure 2 in the paper as closely as possible.
28 | Go RPC sends only struct fields whose names start with capital letters. Sub-structures must also have capitalized field names (e.g. fields of log records in an array). Forgetting to capitalize field names sent by RPC is the single most frequent source of bugs in these labs.
29 | Fill in the RequestVoteArgs and RequestVoteReply structs. Modify Make() to create a background goroutine that will kick off leader election periodically by sending out RequestVote RPCs when it hasn't heard from another peer for a while. This way a peer will learn who is the leader, if there is already a leader, or become the leader itself. Implement the RequestVote() RPC handler so that servers will vote for one another.
30 | To implement heartbeats, define an AppendEntries RPC struct (though you may not need all the arguments yet), and have the leader send them out periodically. Write an AppendEntries RPC handler method that resets the election timeout so that other servers don't step forward as leaders when one has already been elected.
31 | Make sure the election timeouts in different peers don't always fire at the same time, or else all peers will vote only for themselves and no one will become the leader.
32 | The tester requires that the leader send heartbeat RPCs no more than ten times per second.
33 | The tester requires your Raft to elect a new leader within five seconds of the failure of the old leader (if a majority of peers can still communicate). Remember, however, that leader election may require multiple rounds in case of a split vote (which can happen if packets are lost or if candidates unluckily choose the same random backoff times). You must pick election timeouts (and thus heartbeat intervals) that are short enough that it's very likely that an election will complete in less than five seconds even if it requires multiple rounds.
34 | The paper's Section 5.2 mentions election timeouts in the range of 150 to 300 milliseconds. Such a range only makes sense if the leader sends heartbeats considerably more often than once per 150 milliseconds. Because the tester limits you to 10 heartbeats per second, you will have to use an election timeout larger than the paper's 150 to 300 milliseconds, but not too large, because then you may fail to elect a leader within five seconds.
35 | You may find Go's time.Sleep() and rand useful.
36 | You'll need to write code that takes actions periodically or after delays in time. The easiest way to do this is to create a goroutine with a loop that calls time.Sleep().
37 | If your code has trouble passing the tests, read the paper's Figure 2 again; the full logic for leader election is spread over multiple parts of the figure.
38 | A good way to debug your code is to insert print statements when a peer sends or receives a message, and collect the output in a file with go test -run 2A > out. Then, by studying the trace of messages in the out file, you can identify where your implementation deviates from the desired protocol. You might find DPrintf in util.go useful to turn printing on and off as you debug different problems.
39 | You should check your code with go test -race, and fix any races it reports.
40 | Be sure you pass the 2A tests before submitting Part 2A. Note that the 2A tests test the basic operation of leader election. Parts B and C will test leader election in more challenging settings and may expose bugs in your leader election code which the 2A tests miss.
41 | 
42 | Part 2B
43 | 
44 | We want Raft to keep a consistent, replicated log of operations. A call to Start() at the leader starts the process of adding a new operation to the log; the leader sends the new operation to the other servers in AppendEntries RPCs.
45 | Implement the leader and follower code to append new log entries. This will involve implementing Start(), completing the AppendEntries RPC structs, sending them, fleshing out the AppendEntry RPC handler, and advancing the commitIndex at the leader. Your first goal should be to pass the TestBasicAgree() test (in test_test.go). Once you have that working, you should get all the 2B tests to pass (go test -run 2B).
46 | 
47 | You will need to implement the election restriction (section 5.4.1 in the paper).
48 | While the Raft leader is the only server that initiates appends of new entries to the log, all the servers need to independently give each newly committed entry to their local service replica (via their own applyCh). You should try to keep the goroutines that implement the Raft protocol as separate as possible from the code that sends committed log entries on the applyCh (e.g., by using a separate goroutine for delivering committed messages). If you don't separate these activities cleanly, then it is easy to create deadlocks, either in this lab or in subsequent labs in which you implement services that use your Raft package. Without a clean separation, a common deadlock scenario is as follows: an RPC handler sends on the applyCh but it blocks because no goroutine is reading from the channel (e.g., perhaps because it called Start()). Now, the RPC handler is blocked while holding the mutex on the Raft structure. The reading goroutine is also blocked on the mutex because Start() needs to acquire it. Furthermore, no other RPC handler that needs the lock on the Raft structure can run.
49 | Give yourself enough time to rewrite your implementation because only after writing a first implementation will you realize how to organize your code cleanly. For example, only after writing one implementation will you understand how to write an implementation that makes it easy to argue that your implementation has no deadlocks.
50 | Figure out the minimum number of messages Raft should use when reaching agreement in non-failure cases and make your implementation use that minimum.
51 | You may need to write code that waits for certain events to occur. Do not write loops that execute continuously without pausing, since that will slow your implementation enough that it fails tests. You can wait efficiently with Go's channels, or Go's condition variables, or (if all else fails) by inserting a time.Sleep(10 * time.Millisecond) in each loop iteration.
52 | Be sure you pass the 2A and 2B tests before submitting Part 2B.
53 | 
54 | Part 2C
55 | 
56 | If a Raft-based server reboots it should resume service where it left off. This requires that Raft keep persistent state that survives a reboot. The paper's Figure 2 mentions which state should be persistent, and raft.go contains examples of how to save and restore persistent state.
57 | 
58 | A “real” implementation would do this by writing Raft's persistent state to disk each time it changes, and reading the latest saved state from disk when restarting after a reboot. Your implementation won't use the disk; instead, it will save and restore persistent state from a Persister object (see persister.go). Whoever calls Raft.Make() supplies a Persister that initially holds Raft's most recently persisted state (if any). Raft should initialize its state from that Persister, and should use it to save its persistent state each time the state changes. Use the Persister's ReadRaftState() and SaveRaftState() methods.
59 | 
60 | Implement persistence by first adding code that saves and restores persistent state to persist() and readPersist() in raft.go. You will need to encode (or "serialize") the state as an array of bytes in order to pass it to the Persister. Use Go's gob encoder to do this; see the comments in persist() and readPersist().
61 | 
62 | You now need to determine at what points in the Raft protocol your servers are required to persist their state, and insert calls to persist() in those places. You must also load persisted state in Raft.Make(). Once you've done this, you should pass the remaining tests. You may want to first try to pass the "basic persistence" test (go test -run 'TestPersist12C'), and then tackle the remaining ones (go test -run 2C).
63 | 
64 | In order to avoid running out of memory, Raft must periodically discard old log entries, but you do not have to worry about this until the next lab.
65 | 
66 | Many of the 2C tests involve servers failing and the network losing RPC requests or replies.
67 | The Go gob encoder you'll use to encode persistent state only saves fields whose names start with upper case letters. Using small caps for field names is a common source of mysterious bugs, since Go doesn't warn you that they won't be saved.
68 | In order to pass some of the challenging tests towards the end, such as those marked "unreliable", you will need to implement the optimization to allow a follower to back up the leader's nextIndex by more than one entry at a time. See the description in the extended Raft paper starting at the bottom of page 7 and top of page 8 (marked by a gray line). The paper is vague about the details; you will need to fill in the gaps, perhaps with the help of the 6.824 Raft lectures.
69 | Be sure you pass all the tests before submitting Part 2C.
70 | 
71 | 


--------------------------------------------------------------------------------
/src/raft/persister.go:
--------------------------------------------------------------------------------
 1 | package raft
 2 | 
 3 | //
 4 | // support for Raft and kvraft to save persistent
 5 | // Raft state (log &c) and k/v server snapshots.
 6 | //
 7 | // we will use the original persister.go to test your code for grading.
 8 | // so, while you can modify this code to help you debug, please
 9 | // test with the original before submitting.
10 | //
11 | 
12 | import "sync"
13 | 
14 | type Persister struct {
15 | 	mu        sync.Mutex
16 | 	raftstate []byte
17 | 	snapshot  []byte
18 | }
19 | 
20 | func MakePersister() *Persister {
21 | 	return &Persister{}
22 | }
23 | 
24 | func (ps *Persister) Copy() *Persister {
25 | 	ps.mu.Lock()
26 | 	defer ps.mu.Unlock()
27 | 	np := MakePersister()
28 | 	np.raftstate = ps.raftstate
29 | 	np.snapshot = ps.snapshot
30 | 	return np
31 | }
32 | 
33 | func (ps *Persister) SaveRaftState(state []byte) {
34 | 	ps.mu.Lock()
35 | 	defer ps.mu.Unlock()
36 | 	ps.raftstate = state
37 | }
38 | 
39 | func (ps *Persister) ReadRaftState() []byte {
40 | 	ps.mu.Lock()
41 | 	defer ps.mu.Unlock()
42 | 	return ps.raftstate
43 | }
44 | 
45 | func (ps *Persister) RaftStateSize() int {
46 | 	ps.mu.Lock()
47 | 	defer ps.mu.Unlock()
48 | 	return len(ps.raftstate)
49 | }
50 | 
51 | // Save both Raft state and K/V snapshot as a single atomic action,
52 | // to help avoid them getting out of sync.
53 | func (ps *Persister) SaveStateAndSnapshot(state []byte, snapshot []byte) {
54 | 	ps.mu.Lock()
55 | 	defer ps.mu.Unlock()
56 | 	ps.raftstate = state
57 | 	ps.snapshot = snapshot
58 | }
59 | 
60 | func (ps *Persister) ReadSnapshot() []byte {
61 | 	ps.mu.Lock()
62 | 	defer ps.mu.Unlock()
63 | 	return ps.snapshot
64 | }
65 | 
66 | func (ps *Persister) SnapshotSize() int {
67 | 	ps.mu.Lock()
68 | 	defer ps.mu.Unlock()
69 | 	return len(ps.snapshot)
70 | }
71 | 


--------------------------------------------------------------------------------
/src/raft/util.go:
--------------------------------------------------------------------------------
 1 | package raft
 2 | 
 3 | import "log"
 4 | 
 5 | // Debugging
 6 | const Debug = 0
 7 | 
 8 | func init() {
 9 |     log.SetFlags(log.Lmicroseconds)
10 | }
11 | 
12 | func DPrintf(format string, a ...interface{}) (n int, err error) {
13 | 	if Debug > 0 {
14 | 		log.Printf(format, a...)
15 | 	}
16 | 	return
17 | }
18 | 


--------------------------------------------------------------------------------
/src/shardkv/client.go:
--------------------------------------------------------------------------------
  1 | package shardkv
  2 | 
  3 | //
  4 | // client code to talk to a sharded key/value service.
  5 | //
  6 | // the client first talks to the shardmaster to find out
  7 | // the assignment of shards (keys) to groups, and then
  8 | // talks to the group that holds the key's shard.
  9 | //
 10 | 
 11 | import "labrpc"
 12 | import "crypto/rand"
 13 | import "math/big"
 14 | import "shardmaster"
 15 | import "time"
 16 | import "sync/atomic"
 17 | 
 18 | var clientIdGen = int32(0)
 19 | 
 20 | const maxTry = 3
 21 | 
 22 | //
 23 | // which shard is a key in?
 24 | // please use this function,
 25 | // and please do not change it.
 26 | //
 27 | func key2shard(key string) int {
 28 |     shard := 0
 29 |     if len(key) > 0 {
 30 |         shard = int(key[0])
 31 |     }
 32 |     shard %= shardmaster.NShards
 33 |     return shard
 34 | }
 35 | 
 36 | func nrand() int64 {
 37 |     max := big.NewInt(int64(1) << 62)
 38 |     bigx, _ := rand.Int(rand.Reader, max)
 39 |     x := bigx.Int64()
 40 |     return x
 41 | }
 42 | 
 43 | type Clerk struct {
 44 |     sm       *shardmaster.Clerk // client to shardMaster
 45 |     config   shardmaster.Config
 46 |     make_end func(string) *labrpc.ClientEnd
 47 |     // You will have to modify this struct.
 48 | 
 49 |     fail int // successive fail calls for leader
 50 |     clientId int32 // client id, init by clientIdGen
 51 |     reqId int64 // req id
 52 | }
 53 | 
 54 | //
 55 | // the tester calls MakeClerk.
 56 | //
 57 | // masters[] is needed to call shardmaster.MakeClerk().
 58 | //
 59 | // make_end(servername) turns a server name from a
 60 | // Config.Groups[gid][i] into a labrpc.ClientEnd on which you can
 61 | // send RPCs.
 62 | //
 63 | func MakeClerk(masters []*labrpc.ClientEnd, make_end func(string) *labrpc.ClientEnd) *Clerk {
 64 |     ck := new(Clerk)
 65 |     ck.sm = shardmaster.MakeClerk(masters)
 66 |     ck.make_end = make_end
 67 |     // You'll have to add code here.
 68 |     ck.clientId = atomic.AddInt32(&clientIdGen, 1)
 69 |     ck.reqId = 1
 70 | 
 71 |     // init config first
 72 |     ck.config = ck.sm.Query(-1)
 73 |     return ck
 74 | }
 75 | 
 76 | //
 77 | // fetch the current value for a key.
 78 | // returns "" if the key does not exist.
 79 | // keeps trying forever in the face of all other errors.
 80 | // You will have to modify this function.
 81 | //
 82 | func (ck *Clerk) Get(key string) string {
 83 |     args := GetArgs{}
 84 |     args.Key = key
 85 |     args.ReqID = ck.reqId
 86 |     ck.reqId++
 87 |     args.ID = ck.clientId
 88 | 
 89 |     shard := key2shard(key)
 90 |     for {
 91 |         gid := ck.config.Shards[shard]
 92 |         if servers, ok := ck.config.Groups[gid]; ok {
 93 |             // try each server for the shard.
 94 |             for si := 0; si < len(servers); si++ {
 95 |                 srv := ck.make_end(servers[si])
 96 | 
 97 |                 // start rpc call to server
 98 |                 var reply GetReply
 99 |                 done := make(chan bool)
100 |                 go func() {
101 |                     ok := srv.Call("ShardKV.Get", &args, &reply)
102 |                     done <- ok
103 |                 }()
104 | 
105 |                 // wait rpc response
106 |                 ok := true
107 |                 timeout := false
108 |                 select {
109 |                 case <-time.After(RpcTimeout):
110 |                     timeout = true
111 | 
112 |                 case ok = <-done:
113 |                 }
114 |                 //close(done)
115 | 
116 |                 if !timeout && ok && !reply.WrongLeader && (reply.Err == "" || reply.Err == OK || reply.Err == ErrNoKey) {
117 |                     DPrintf("[client %d] succ GET: %s = %s", ck.clientId, key, reply.Value)
118 |                     return reply.Value
119 |                 }
120 | 
121 |                 if ok && reply.Err == ErrWrongGroup {
122 |                     break
123 |                 }
124 | 
125 |                 // not leader or timeout, try next server in this replica
126 |             }
127 |         }
128 | 
129 |         time.Sleep(100 * time.Millisecond)
130 |         // ask master for the latest configuration.
131 |         ck.config = ck.sm.Query(-1)
132 |     }
133 | 
134 |     return ""
135 | }
136 | 
137 | //
138 | // shared by Put and Append.
139 | // You will have to modify this function.
140 | //
141 | func (ck *Clerk) PutAppend(key string, value string, op string) {
142 |     args := PutAppendArgs{}
143 |     args.Key = key
144 |     args.Value = value
145 |     args.Op = op
146 | 
147 |     args.ReqID = ck.reqId
148 |     ck.reqId++
149 |     args.ID = ck.clientId
150 | 
151 |     shard := key2shard(key)
152 |     for {
153 |         gid := ck.config.Shards[shard]
154 |         if servers, ok := ck.config.Groups[gid]; ok {
155 |             // try each server for the shard.
156 |             for si := 0; si < len(servers); si++ {
157 |                 srv := ck.make_end(servers[si])
158 | 
159 |                 // start rpc call to server
160 |                 var reply PutAppendReply
161 |                 done := make(chan bool)
162 |                 go func() {
163 |                     ok := srv.Call("ShardKV.PutAppend", &args, &reply)
164 |                     done <- ok
165 |                 }()
166 | 
167 |                 // wait rpc response
168 |                 ok := true
169 |                 timeout := false
170 |                 select {
171 |                 case <-time.After(RpcTimeout):
172 |                     timeout = true
173 | 
174 |                 case ok = <-done:
175 |                 }
176 |                 //close(done)
177 | 
178 |                 if !timeout && ok && !reply.WrongLeader && (reply.Err == "" || reply.Err == OK) {
179 |                     DPrintf("[client %d] succ PutAppend: %s = %s, %v", ck.clientId, key, value, reply.Err)
180 |                     return
181 |                 }
182 |                 if ok && reply.Err == ErrWrongGroup {
183 |                     break
184 |                 }
185 |                 // not leader or timeout, try next server in this replica
186 |             }
187 |         }
188 | 
189 |         time.Sleep(100 * time.Millisecond)
190 |         // ask master for the latest configuration.
191 |         ck.config = ck.sm.Query(-1)
192 |     }
193 | }
194 | 
195 | func (ck *Clerk) Put(key string, value string) {
196 |     ck.PutAppend(key, value, "Put")
197 | }
198 | func (ck *Clerk) Append(key string, value string) {
199 |     ck.PutAppend(key, value, "Append")
200 | }
201 | 
202 | 


--------------------------------------------------------------------------------
/src/shardkv/common.go:
--------------------------------------------------------------------------------
 1 | package shardkv
 2 | 
 3 | import "time"
 4 | 
 5 | //
 6 | // Sharded key/value server.
 7 | // Lots of replica groups, each running op-at-a-time paxos.
 8 | // Shardmaster decides which group serves each shard.
 9 | // Shardmaster may change shard assignment from time to time.
10 | //
11 | // You will have to modify these definitions.
12 | //
13 | 
14 | const (
15 |     OK            = "OK"
16 |     ErrNoKey      = "ErrNoKey"
17 |     ErrWrongGroup = "ErrWrongGroup"
18 | 
19 |     ErrNotLeader = "ErrNotLeader"
20 |     ErrInvalidOp = "ErrInvalidOp"
21 |     ErrDuplicateReq = "ErrDuplicateReq"
22 | )
23 | 
24 | type Err string
25 | 
26 | // Put or Append
27 | type PutAppendArgs struct {
28 |     // You'll have to add definitions here.
29 |     Key   string
30 |     Value string
31 |     Op    string // "Put" or "Append"
32 |     // You'll have to add definitions here.
33 |     // Field names must start with capital letters,
34 |     // otherwise RPC will break.
35 |     ID int32 // client id
36 |     ReqID int64
37 | }
38 | 
39 | type PutAppendReply struct {
40 |     WrongLeader bool
41 |     Err         Err
42 | 
43 |     ID int32
44 |     RspID int64
45 | }
46 | 
47 | type GetArgs struct {
48 |     Key string
49 |     // You'll have to add definitions here.
50 |     ID int32
51 |     ReqID int64
52 | }
53 | 
54 | type GetReply struct {
55 |     WrongLeader bool
56 |     Err         Err
57 |     Value       string
58 | 
59 |     ID int32
60 |     RspID int64
61 | }
62 | 
63 | const RpcTimeout time.Duration = 1000 * time.Millisecond
64 | 
65 | 


--------------------------------------------------------------------------------
/src/shardkv/config.go:
--------------------------------------------------------------------------------
  1 | package shardkv
  2 | 
  3 | import "shardmaster"
  4 | import "labrpc"
  5 | import "testing"
  6 | import "os"
  7 | 
  8 | // import "log"
  9 | import crand "crypto/rand"
 10 | import "math/big"
 11 | import "math/rand"
 12 | import "encoding/base64"
 13 | import "sync"
 14 | import "runtime"
 15 | import "raft"
 16 | import "strconv"
 17 | import "fmt"
 18 | import "time"
 19 | 
 20 | func randstring(n int) string {
 21 | 	b := make([]byte, 2*n)
 22 | 	crand.Read(b)
 23 | 	s := base64.URLEncoding.EncodeToString(b)
 24 | 	return s[0:n]
 25 | }
 26 | 
 27 | func makeSeed() int64 {
 28 | 	max := big.NewInt(int64(1) << 62)
 29 | 	bigx, _ := crand.Int(crand.Reader, max)
 30 | 	x := bigx.Int64()
 31 | 	return x
 32 | }
 33 | 
 34 | // Randomize server handles
 35 | func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd {
 36 | 	sa := make([]*labrpc.ClientEnd, len(kvh))
 37 | 	copy(sa, kvh)
 38 | 	for i := range sa {
 39 | 		j := rand.Intn(i + 1)
 40 | 		sa[i], sa[j] = sa[j], sa[i]
 41 | 	}
 42 | 	return sa
 43 | }
 44 | 
 45 | type group struct {
 46 | 	gid       int
 47 | 	servers   []*ShardKV
 48 | 	saved     []*raft.Persister
 49 | 	endnames  [][]string
 50 | 	mendnames [][]string
 51 | }
 52 | 
 53 | type config struct {
 54 | 	mu    sync.Mutex
 55 | 	t     *testing.T
 56 | 	net   *labrpc.Network
 57 | 	start time.Time // time at which make_config() was called
 58 | 
 59 | 	nmasters      int
 60 | 	masterservers []*shardmaster.ShardMaster
 61 | 	mck           *shardmaster.Clerk
 62 | 
 63 | 	ngroups int
 64 | 	n       int // servers per k/v group
 65 | 	groups  []*group
 66 | 
 67 | 	clerks       map[*Clerk][]string
 68 | 	nextClientId int
 69 | 	maxraftstate int
 70 | }
 71 | 
 72 | func (cfg *config) checkTimeout() {
 73 | 	// enforce a two minute real-time limit on each test
 74 | 	if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second {
 75 | 		cfg.t.Fatal("test took longer than 120 seconds")
 76 | 	}
 77 | }
 78 | 
 79 | func (cfg *config) cleanup() {
 80 | 	for gi := 0; gi < cfg.ngroups; gi++ {
 81 | 		cfg.ShutdownGroup(gi)
 82 | 	}
 83 | 	cfg.net.Cleanup()
 84 | 	cfg.checkTimeout()
 85 | }
 86 | 
 87 | // check that no server's log is too big.
 88 | func (cfg *config) checklogs() {
 89 | 	for gi := 0; gi < cfg.ngroups; gi++ {
 90 | 		for i := 0; i < cfg.n; i++ {
 91 | 			raft := cfg.groups[gi].saved[i].RaftStateSize()
 92 | 			snap := len(cfg.groups[gi].saved[i].ReadSnapshot())
 93 | 			if cfg.maxraftstate >= 0 && raft > 2*cfg.maxraftstate {
 94 | 				cfg.t.Fatalf("persister.RaftStateSize() %v, but maxraftstate %v",
 95 | 					raft, cfg.maxraftstate)
 96 | 			}
 97 | 			if cfg.maxraftstate < 0 && snap > 0 {
 98 | 				cfg.t.Fatalf("maxraftstate is -1, but snapshot is non-empty!")
 99 | 			}
100 | 		}
101 | 	}
102 | }
103 | 
104 | // master server name for labrpc.
105 | func (cfg *config) mastername(i int) string {
106 | 	return "master" + strconv.Itoa(i)
107 | }
108 | 
109 | // shard server name for labrpc.
110 | // i'th server of group gid.
111 | func (cfg *config) servername(gid int, i int) string {
112 | 	return "server-" + strconv.Itoa(gid) + "-" + strconv.Itoa(i)
113 | }
114 | 
115 | func (cfg *config) makeClient() *Clerk {
116 | 	cfg.mu.Lock()
117 | 	defer cfg.mu.Unlock()
118 | 
119 | 	// ClientEnds to talk to master service.
120 | 	ends := make([]*labrpc.ClientEnd, cfg.nmasters)
121 | 	endnames := make([]string, cfg.n)
122 | 	for j := 0; j < cfg.nmasters; j++ {
123 | 		endnames[j] = randstring(20)
124 | 		ends[j] = cfg.net.MakeEnd(endnames[j])
125 | 		cfg.net.Connect(endnames[j], cfg.mastername(j))
126 | 		cfg.net.Enable(endnames[j], true)
127 | 	}
128 | 
129 | 	ck := MakeClerk(ends, func(servername string) *labrpc.ClientEnd {
130 | 		name := randstring(20)
131 | 		end := cfg.net.MakeEnd(name)
132 | 		cfg.net.Connect(name, servername)
133 | 		cfg.net.Enable(name, true)
134 | 		return end
135 | 	})
136 | 	cfg.clerks[ck] = endnames
137 | 	cfg.nextClientId++
138 | 	return ck
139 | }
140 | 
141 | func (cfg *config) deleteClient(ck *Clerk) {
142 | 	cfg.mu.Lock()
143 | 	defer cfg.mu.Unlock()
144 | 
145 | 	v := cfg.clerks[ck]
146 | 	for i := 0; i < len(v); i++ {
147 | 		os.Remove(v[i])
148 | 	}
149 | 	delete(cfg.clerks, ck)
150 | }
151 | 
152 | // Shutdown i'th server of gi'th group, by isolating it
153 | func (cfg *config) ShutdownServer(gi int, i int) {
154 | 	cfg.mu.Lock()
155 | 	defer cfg.mu.Unlock()
156 | 
157 | 	gg := cfg.groups[gi]
158 | 
159 | 	// prevent this server from sending
160 | 	for j := 0; j < len(gg.servers); j++ {
161 | 		name := gg.endnames[i][j]
162 | 		cfg.net.Enable(name, false)
163 | 	}
164 | 	for j := 0; j < len(gg.mendnames[i]); j++ {
165 | 		name := gg.mendnames[i][j]
166 | 		cfg.net.Enable(name, false)
167 | 	}
168 | 
169 | 	// disable client connections to the server.
170 | 	// it's important to do this before creating
171 | 	// the new Persister in saved[i], to avoid
172 | 	// the possibility of the server returning a
173 | 	// positive reply to an Append but persisting
174 | 	// the result in the superseded Persister.
175 | 	cfg.net.DeleteServer(cfg.servername(gg.gid, i))
176 | 
177 | 	// a fresh persister, in case old instance
178 | 	// continues to update the Persister.
179 | 	// but copy old persister's content so that we always
180 | 	// pass Make() the last persisted state.
181 | 	if gg.saved[i] != nil {
182 | 		gg.saved[i] = gg.saved[i].Copy()
183 | 	}
184 | 
185 | 	kv := gg.servers[i]
186 | 	if kv != nil {
187 | 		cfg.mu.Unlock()
188 | 		kv.Kill()
189 | 		cfg.mu.Lock()
190 | 		gg.servers[i] = nil
191 | 	}
192 | }
193 | 
194 | func (cfg *config) ShutdownGroup(gi int) {
195 | 	for i := 0; i < cfg.n; i++ {
196 | 		cfg.ShutdownServer(gi, i)
197 | 	}
198 | }
199 | 
200 | // start i'th server in gi'th group
201 | func (cfg *config) StartServer(gi int, i int) {
202 | 	cfg.mu.Lock()
203 | 
204 | 	gg := cfg.groups[gi]
205 | 
206 | 	// a fresh set of outgoing ClientEnd names
207 | 	// to talk to other servers in this group.
208 | 	gg.endnames[i] = make([]string, cfg.n)
209 | 	for j := 0; j < cfg.n; j++ {
210 | 		gg.endnames[i][j] = randstring(20)
211 | 	}
212 | 
213 | 	// and the connections to other servers in this group.
214 | 	ends := make([]*labrpc.ClientEnd, cfg.n)
215 | 	for j := 0; j < cfg.n; j++ {
216 | 		ends[j] = cfg.net.MakeEnd(gg.endnames[i][j])
217 | 		cfg.net.Connect(gg.endnames[i][j], cfg.servername(gg.gid, j))
218 | 		cfg.net.Enable(gg.endnames[i][j], true)
219 | 	}
220 | 
221 | 	// ends to talk to shardmaster service
222 | 	mends := make([]*labrpc.ClientEnd, cfg.nmasters)
223 | 	gg.mendnames[i] = make([]string, cfg.nmasters)
224 | 	for j := 0; j < cfg.nmasters; j++ {
225 | 		gg.mendnames[i][j] = randstring(20)
226 | 		mends[j] = cfg.net.MakeEnd(gg.mendnames[i][j])
227 | 		cfg.net.Connect(gg.mendnames[i][j], cfg.mastername(j))
228 | 		cfg.net.Enable(gg.mendnames[i][j], true)
229 | 	}
230 | 
231 | 	// a fresh persister, so old instance doesn't overwrite
232 | 	// new instance's persisted state.
233 | 	// give the fresh persister a copy of the old persister's
234 | 	// state, so that the spec is that we pass StartKVServer()
235 | 	// the last persisted state.
236 | 	if gg.saved[i] != nil {
237 | 		gg.saved[i] = gg.saved[i].Copy()
238 | 	} else {
239 | 		gg.saved[i] = raft.MakePersister()
240 | 	}
241 | 	cfg.mu.Unlock()
242 | 
243 | 	gg.servers[i] = StartServer(ends, i, gg.saved[i], cfg.maxraftstate,
244 | 		gg.gid, mends,
245 | 		func(servername string) *labrpc.ClientEnd {
246 | 			name := randstring(20)
247 | 			end := cfg.net.MakeEnd(name)
248 | 			cfg.net.Connect(name, servername)
249 | 			cfg.net.Enable(name, true)
250 | 			return end
251 | 		})
252 | 
253 | 	kvsvc := labrpc.MakeService(gg.servers[i])
254 | 	rfsvc := labrpc.MakeService(gg.servers[i].rf)
255 | 	srv := labrpc.MakeServer()
256 | 	srv.AddService(kvsvc)
257 | 	srv.AddService(rfsvc)
258 | 	cfg.net.AddServer(cfg.servername(gg.gid, i), srv)
259 | }
260 | 
261 | func (cfg *config) StartGroup(gi int) {
262 | 	for i := 0; i < cfg.n; i++ {
263 | 		cfg.StartServer(gi, i)
264 | 	}
265 | }
266 | 
267 | func (cfg *config) StartMasterServer(i int) {
268 | 	// ClientEnds to talk to other master replicas.
269 | 	ends := make([]*labrpc.ClientEnd, cfg.nmasters)
270 | 	for j := 0; j < cfg.nmasters; j++ {
271 | 		endname := randstring(20)
272 | 		ends[j] = cfg.net.MakeEnd(endname)
273 | 		cfg.net.Connect(endname, cfg.mastername(j))
274 | 		cfg.net.Enable(endname, true)
275 | 	}
276 | 
277 | 	p := raft.MakePersister()
278 | 
279 | 	cfg.masterservers[i] = shardmaster.StartServer(ends, i, p)
280 | 
281 | 	msvc := labrpc.MakeService(cfg.masterservers[i])
282 | 	rfsvc := labrpc.MakeService(cfg.masterservers[i].Raft())
283 | 	srv := labrpc.MakeServer()
284 | 	srv.AddService(msvc)
285 | 	srv.AddService(rfsvc)
286 | 	cfg.net.AddServer(cfg.mastername(i), srv)
287 | }
288 | 
289 | func (cfg *config) shardclerk() *shardmaster.Clerk {
290 | 	// ClientEnds to talk to master service.
291 | 	ends := make([]*labrpc.ClientEnd, cfg.nmasters)
292 | 	for j := 0; j < cfg.nmasters; j++ {
293 | 		name := randstring(20)
294 | 		ends[j] = cfg.net.MakeEnd(name)
295 | 		cfg.net.Connect(name, cfg.mastername(j))
296 | 		cfg.net.Enable(name, true)
297 | 	}
298 | 
299 | 	return shardmaster.MakeClerk(ends)
300 | }
301 | 
302 | // tell the shardmaster that a group is joining.
303 | func (cfg *config) join(gi int) {
304 | 	cfg.joinm([]int{gi})
305 | }
306 | 
307 | func (cfg *config) joinm(gis []int) {
308 | 	m := make(map[int][]string, len(gis))
309 | 	for _, g := range gis {
310 | 		gid := cfg.groups[g].gid
311 | 		servernames := make([]string, cfg.n)
312 | 		for i := 0; i < cfg.n; i++ {
313 | 			servernames[i] = cfg.servername(gid, i)
314 | 		}
315 | 		m[gid] = servernames
316 | 	}
317 | 	cfg.mck.Join(m)
318 | }
319 | 
320 | // tell the shardmaster that a group is leaving.
321 | func (cfg *config) leave(gi int) {
322 | 	cfg.leavem([]int{gi})
323 | }
324 | 
325 | func (cfg *config) leavem(gis []int) {
326 | 	gids := make([]int, 0, len(gis))
327 | 	for _, g := range gis {
328 | 		gids = append(gids, cfg.groups[g].gid)
329 | 	}
330 | 	cfg.mck.Leave(gids)
331 | }
332 | 
333 | var ncpu_once sync.Once
334 | 
335 | func make_config(t *testing.T, n int, unreliable bool, maxraftstate int) *config {
336 | 	ncpu_once.Do(func() {
337 | 		if runtime.NumCPU() < 2 {
338 | 			fmt.Printf("warning: only one CPU, which may conceal locking bugs\n")
339 | 		}
340 | 		rand.Seed(makeSeed())
341 | 	})
342 | 	runtime.GOMAXPROCS(4)
343 | 	cfg := &config{}
344 | 	cfg.t = t
345 | 	cfg.maxraftstate = maxraftstate
346 | 	cfg.net = labrpc.MakeNetwork()
347 | 	cfg.start = time.Now()
348 | 
349 | 	// master
350 | 	cfg.nmasters = 3
351 | 	cfg.masterservers = make([]*shardmaster.ShardMaster, cfg.nmasters)
352 | 	for i := 0; i < cfg.nmasters; i++ {
353 | 		cfg.StartMasterServer(i)
354 | 	}
355 | 	cfg.mck = cfg.shardclerk()
356 | 
357 | 	cfg.ngroups = 3
358 | 	cfg.groups = make([]*group, cfg.ngroups)
359 | 	cfg.n = n
360 | 	for gi := 0; gi < cfg.ngroups; gi++ {
361 | 		gg := &group{}
362 | 		cfg.groups[gi] = gg
363 | 		gg.gid = 100 + gi
364 | 		gg.servers = make([]*ShardKV, cfg.n)
365 | 		gg.saved = make([]*raft.Persister, cfg.n)
366 | 		gg.endnames = make([][]string, cfg.n)
367 | 		gg.mendnames = make([][]string, cfg.nmasters)
368 | 		for i := 0; i < cfg.n; i++ {
369 | 			cfg.StartServer(gi, i)
370 | 		}
371 | 	}
372 | 
373 | 	cfg.clerks = make(map[*Clerk][]string)
374 | 	cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid
375 | 
376 | 	cfg.net.Reliable(!unreliable)
377 | 
378 | 	return cfg
379 | }
380 | 


--------------------------------------------------------------------------------
/src/shardkv/server.go:
--------------------------------------------------------------------------------
  1 | package shardkv
  2 | 
  3 | 
  4 | import "shardmaster"
  5 | import "labrpc"
  6 | import "raft"
  7 | import "sync"
  8 | import "labgob"
  9 | import "log"
 10 | import "time"
 11 | 
 12 | const Debug = 0
 13 | 
 14 | func DPrintf(format string, a ...interface{}) (n int, err error) {
 15 |     if Debug > 0 {
 16 |         log.Printf(format, a...)
 17 |     }
 18 |     return
 19 | }
 20 | 
 21 | 
 22 | // real Command
 23 | type Op struct {
 24 |     // Your definitions here.
 25 |     // Field names must start with capital letters,
 26 |     // otherwise RPC will break.
 27 |     Key       string
 28 |     Value     string
 29 |     Operation string // Get Put or Append
 30 | 
 31 |     //Request context
 32 |     ID    int32
 33 |     ReqID int64
 34 | }
 35 | 
 36 | type ShardInfo struct {
 37 |     shardID int
 38 |     // real kv data here
 39 |     data map[string]string
 40 |     // request records
 41 |     requests map[int32]int64 // client -> last commited reqID
 42 | }
 43 | 
 44 | type ShardKV struct {
 45 |     mu           sync.Mutex
 46 |     me           int
 47 |     rf           *raft.Raft
 48 |     applyCh      chan raft.ApplyMsg
 49 |     make_end     func(string) *labrpc.ClientEnd
 50 |     gid          int
 51 |     masters      []*labrpc.ClientEnd
 52 |     maxraftstate int // snapshot if log grows this big
 53 | 
 54 |     // Your definitions here.
 55 |     // shards and data
 56 |     data map[int]*ShardInfo // shard id --> ShardInfo
 57 | 
 58 |     sm *shardmaster.Clerk // client to shardMaster
 59 |     config shardmaster.Config
 60 |     // Notify chan for each log index
 61 |     notifyCh map[int]chan Op
 62 |     // for exit
 63 |     shutdown chan interface{}
 64 | }
 65 | 
 66 | // check if repeated request
 67 | func (kv *ShardKV) isDuplicated(shard int, id int32, reqId int64) bool {
 68 |     kv.mu.Lock()
 69 |     defer kv.mu.Unlock()
 70 |     maxSeenReqId, ok := kv.data[shard].requests[id]
 71 |     if ok {
 72 |         return reqId <= maxSeenReqId
 73 |     }
 74 |     return false
 75 | }
 76 | 
 77 | // true if update success, imply nonrepeat request can be applied to state machine: eg, data field
 78 | func (kv *ShardKV) updateIfNotDuplicated(shard int, id int32, reqId int64) bool {
 79 |     // must hold lock outside
 80 |     maxSeenReqId, ok := kv.data[shard].requests[id]
 81 |     if ok {
 82 |         if reqId <= maxSeenReqId {
 83 |             return false
 84 |         }
 85 |     }
 86 | 
 87 |     kv.data[shard].requests[id] = reqId
 88 |     return true
 89 | }
 90 | 
 91 | 
 92 | // call raft.Start to commit a command as log entry
 93 | func (kv *ShardKV) proposeCommand(cmd Op) bool {
 94 |     kv.mu.Lock()
 95 |     // lock kv first, think about:
 96 |     // If no lock with rf.Start, raft maybe very quick to agree.
 97 |     // Then applyRoutine will not find notifyCh on log index,
 98 |     // proposeCommand will block on notifyCh forever.
 99 |     logIndex, _, isLeader := kv.rf.Start(cmd)
100 |     if !isLeader {
101 |         kv.mu.Unlock()
102 |         return false
103 |     }
104 | 
105 |     // wait command to be commited
106 | 
107 |     // use logIndex because all servers agree on same log index
108 |     ch, ok := kv.notifyCh[logIndex]
109 |     if !ok {
110 |         ch = make(chan Op, 1)
111 |         kv.notifyCh[logIndex] = ch
112 |     }
113 |     kv.mu.Unlock()
114 | 
115 |     // check
116 |     if ch == nil {
117 |         panic("FATAL: chan is nil")
118 |     }
119 | 
120 |     // wait on ch forever, because:
121 |     // If I lose leadership before commit, may be partioned
122 |     // I can't response, so wait until partion healed.
123 |     // Eventually a log will be commited on index, then I'm
124 |     // awaken, but cmd1 is different from cmd, return failed
125 |     // to client.
126 |     // If client retry another leader when I waiting, no matter.
127 |     select {
128 |     case cmd1 := <-ch:
129 |         return cmd1 == cmd // if different log, me is not leader
130 |     }
131 | 
132 |     return false
133 | }
134 | 
135 | 
136 | func (kv* ShardKV) checkGroup(key string) bool {
137 |     kv.mu.Lock()
138 |     defer kv.mu.Unlock()
139 |     shard := key2shard(key)
140 |     if len(kv.config.Shards) <= shard {
141 |         return false
142 |     }
143 | 
144 |     expectGid := kv.config.Shards[shard]
145 |     return expectGid == kv.gid
146 | }
147 | 
148 | func (kv *ShardKV) Get(args *GetArgs, reply *GetReply) {
149 |     // Your code here.
150 |     DPrintf("[server %d] GetRPC args %v", kv.me, args)
151 |     reply.WrongLeader = false
152 |     reply.Err = OK
153 |     reply.ID = args.ID
154 |     reply.RspID = args.ReqID
155 | 
156 |     // check if wrong group
157 |     if !kv.checkGroup(args.Key) {
158 |         DPrintf("[server %d] GetRPC wrong group", kv.me)
159 |         reply.Err = ErrWrongGroup
160 |         return
161 |     }
162 | 
163 |     // check if leader, useless but efficient
164 |     _, isLeader := kv.rf.GetState()
165 |     if !isLeader {
166 |         reply.WrongLeader = true
167 |         reply.Err = ErrNotLeader
168 |         return
169 |     }
170 | 
171 |     // check if repeated request, useless but efficient
172 |     duplicate := kv.isDuplicated(key2shard(args.Key), args.ID, args.ReqID)
173 |     if duplicate {
174 |         reply.Err = ErrDuplicateReq
175 |         return
176 |     }
177 | 
178 |     cmd := Op{}
179 |     cmd.Key = args.Key
180 |     cmd.Value = "" // no use for Get
181 |     cmd.Operation = "Get"
182 |     cmd.ID = args.ID
183 |     cmd.ReqID = args.ReqID
184 | 
185 |     // try commit cmd to raft log
186 |     succ := kv.proposeCommand(cmd)
187 |     if succ {
188 |         shard := key2shard(args.Key)
189 |         kv.mu.Lock()
190 |         if v, ok := kv.data[shard].data[args.Key]; ok {
191 |             reply.Value = v
192 |         } else {
193 |             reply.Value = ""
194 |             reply.Err = ErrNoKey
195 |         }
196 |         kv.mu.Unlock()
197 |     } else {
198 |         reply.WrongLeader = true
199 |         reply.Err = ErrNotLeader
200 |     }
201 | }
202 | 
203 | func (kv *ShardKV) PutAppend(args *PutAppendArgs, reply *PutAppendReply) {
204 |     // Your code here.
205 |     DPrintf("[server %d] PutAppendRPC args %v", kv.me, args)
206 |     reply.WrongLeader = false
207 |     reply.Err = OK
208 |     reply.ID = args.ID
209 |     reply.RspID = args.ReqID
210 | 
211 |     // check if wrong group
212 |     if !kv.checkGroup(args.Key) {
213 |         reply.Err = ErrWrongGroup
214 |         return
215 |     }
216 | 
217 |     // check if leader, useless but efficient
218 |     _, isLeader := kv.rf.GetState()
219 |     if !isLeader {
220 |         reply.WrongLeader = true
221 |         reply.Err = ErrNotLeader
222 |         return
223 |     } else {
224 |         if args.Op != "Put" && args.Op != "Append" {
225 |             reply.Err = ErrInvalidOp
226 |             return
227 |         }
228 |     }
229 | 
230 |     // check if repeated request, useless but efficient
231 |     duplicate := kv.isDuplicated(key2shard(args.Key), args.ID, args.ReqID)
232 |     if duplicate {
233 |         reply.Err = ErrDuplicateReq
234 |         return
235 |     }
236 | 
237 |     cmd := Op{}
238 |     cmd.Key = args.Key
239 |     cmd.Value = args.Value
240 |     cmd.Operation = args.Op
241 |     cmd.ID = args.ID
242 |     cmd.ReqID = args.ReqID
243 | 
244 |     // try commit cmd to raft log
245 |     succ := kv.proposeCommand(cmd)
246 |     if !succ {
247 |         reply.WrongLeader = true
248 |         reply.Err = ErrNotLeader
249 |     }
250 | }
251 | 
252 | //
253 | // the tester calls Kill() when a ShardKV instance won't
254 | // be needed again. you are not required to do anything
255 | // in Kill(), but it might be convenient to (for example)
256 | // turn off debug output from this instance.
257 | //
258 | func (kv *ShardKV) Kill() {
259 |     kv.rf.Kill()
260 |     // Your code here, if desired.
261 |     close(kv.shutdown)
262 | }
263 | 
264 | // when raft commited a log entry, it'll notify me
265 | func (kv *ShardKV) applyRoutine() {
266 |     for {
267 |         var op Op
268 |         var applyMsg raft.ApplyMsg
269 | 
270 |         select {
271 |         case <-kv.shutdown:
272 |             DPrintf("[server %d] shutdown applyRoutine", kv.me)
273 |             return
274 | 
275 |         case applyMsg = <-kv.applyCh:
276 |         }
277 | 
278 |         if !applyMsg.CommandValid {
279 |             // TODO
280 |             //kv.loadSnapshot(applyMsg.Snapshot)
281 |             continue
282 |         }
283 | 
284 |         op, _ = (applyMsg.Command).(Op)
285 | 
286 |         shard := key2shard(op.Key)
287 |         kv.mu.Lock()
288 |         // Follower & Leader: try apply to state machine, fail if duplicated request
289 |         if op.Operation == "Put" {
290 |             update := kv.updateIfNotDuplicated(shard, op.ID, op.ReqID)
291 |             if update {
292 |                 kv.data[shard].data[op.Key] = op.Value
293 |                 DPrintf("[server %d] apply for client %d PUT key %s, value %s, logindex %d", kv.me, op.ID, op.Key, op.Value, applyMsg.CommandIndex)
294 |             }
295 |         } else if op.Operation == "Append" {
296 |             update := kv.updateIfNotDuplicated(shard, op.ID, op.ReqID)
297 |             if update {
298 |                 kv.data[shard].data[op.Key] += op.Value
299 |                 DPrintf("[server %d] apply for client %d APPEND key %s, value %s, logindex %d", kv.me, op.ID, op.Key, op.Value, applyMsg.CommandIndex)
300 |             }
301 |         } else {
302 |             // Do nothing for Get, should I cached reply?
303 |             var val = ""
304 |             if v, ok := kv.data[shard].data[op.Key]; ok {
305 |                 val = v
306 |             }
307 |             DPrintf("[server %d] apply for client %d GET key %s, value %s, logindex %d", kv.me, op.ID, op.Key, val, applyMsg.CommandIndex)
308 |         }
309 | 
310 |         ch, ok := kv.notifyCh[applyMsg.CommandIndex]
311 |         if ok {
312 |             ch <- op
313 |         }
314 | 
315 |         /*
316 |         if kv.maxraftstate > 0 && kv.rf.RaftStateSize() >= kv.maxraftstate {
317 |             DPrintf("(%d) state size %d", kv.me, kv.rf.RaftStateSize())
318 |             // If I keep mu.Lock, the startSnapshot will use raft's lock
319 |             // But raft's applyRoutine is keeping lock and apply msg, he will be blocking with held lock.
320 |             //go kv.startSnapshot(applyMsg.CommandIndex)
321 |             kv.startSnapshot(applyMsg.CommandIndex)
322 |         }
323 |         */
324 | 
325 |         kv.mu.Unlock()
326 |     }
327 | }
328 | 
329 | // for snapshot
330 | 
331 | // poll shardMaster
332 | func (kv *ShardKV) pollConfigRoutine() {
333 |     timer := time.After(time.Duration(1) * time.Nanosecond)
334 |     period := time.Duration(50) * time.Millisecond
335 |     for {
336 |         select {
337 |         case <-kv.shutdown:
338 |             return
339 | 
340 |         case <-timer:
341 |             timer = time.After(period)
342 |         }
343 |         kv.mu.Lock()
344 |         kv.config = kv.sm.Query(-1)
345 |         //DPrintf("[server %d] config %v", kv.me, kv.config)
346 |         kv.mu.Unlock()
347 |     }
348 | }
349 | 
350 | //
351 | // servers[] contains the ports of the servers in this group.
352 | //
353 | // me is the index of the current server in servers[].
354 | //
355 | // the k/v server should store snapshots through the underlying Raft
356 | // implementation, which should call persister.SaveStateAndSnapshot() to
357 | // atomically save the Raft state along with the snapshot.
358 | //
359 | // the k/v server should snapshot when Raft's saved state exceeds
360 | // maxraftstate bytes, in order to allow Raft to garbage-collect its
361 | // log. if maxraftstate is -1, you don't need to snapshot.
362 | //
363 | // gid is this group's GID, for interacting with the shardmaster.
364 | //
365 | // pass masters[] to shardmaster.MakeClerk() so you can send
366 | // RPCs to the shardmaster.
367 | //
368 | // make_end(servername) turns a server name from a
369 | // Config.Groups[gid][i] into a labrpc.ClientEnd on which you can
370 | // send RPCs. You'll need this to send RPCs to other groups.
371 | //
372 | // look at client.go for examples of how to use masters[]
373 | // and make_end() to send RPCs to the group owning a specific shard.
374 | //
375 | // StartServer() must return quickly, so it should start goroutines
376 | // for any long-running work.
377 | //
378 | func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int, gid int, masters []*labrpc.ClientEnd, make_end func(string) *labrpc.ClientEnd) *ShardKV {
379 |     DPrintf("[server %d] StartServer", me)
380 |     // call labgob.Register on structures you want
381 |     // Go's RPC library to marshall/unmarshall.
382 |     labgob.Register(Op{})
383 | 
384 |     kv := new(ShardKV)
385 |     kv.me = me
386 |     kv.maxraftstate = maxraftstate
387 |     kv.make_end = make_end
388 |     kv.gid = gid
389 |     kv.masters = masters
390 |     kv.sm = shardmaster.MakeClerk(kv.masters)
391 | 
392 |     // Your initialization code here.
393 | 
394 |     // Use something like this to talk to the shardmaster:
395 |     // kv.mck = shardmaster.MakeClerk(kv.masters)
396 | 
397 |     kv.applyCh = make(chan raft.ApplyMsg)
398 |     kv.rf = raft.Make(servers, me, persister, kv.applyCh)
399 |     kv.shutdown = make(chan interface{}, 1)
400 |     kv.notifyCh = make(map[int]chan Op)
401 | 
402 |     // init shard data
403 |     kv.data = make(map[int]*ShardInfo)
404 |     for i := 0; i < shardmaster.NShards; i++ {
405 |         kv.data[i] = new(ShardInfo)
406 |         kv.data[i].shardID  = i
407 |         kv.data[i].data = make(map[string]string)
408 |         kv.data[i].requests = make(map[int32]int64)
409 |     }
410 | 
411 |     go kv.pollConfigRoutine()
412 |     //go kv.migrateRoutine() // when config changes, MakeClerk(), and send my data to dest gid
413 |     // add a rpc interface for recv migrate data
414 |     go kv.applyRoutine()
415 | 
416 |     return kv
417 | }
418 | 


--------------------------------------------------------------------------------
/src/shardmaster/client.go:
--------------------------------------------------------------------------------
  1 | package shardmaster
  2 | 
  3 | //
  4 | // Shardmaster clerk.
  5 | //
  6 | 
  7 | import "labrpc"
  8 | import "time"
  9 | import "crypto/rand"
 10 | import "math/big"
 11 | import "sync/atomic"
 12 | 
 13 | var clientIdGen = int32(0)
 14 | 
 15 | const maxTry = 3
 16 | 
 17 | type Clerk struct {
 18 |     servers []*labrpc.ClientEnd
 19 |     // Your data here.
 20 |     leader int // hint or probe, TODO: server no use this field
 21 |     fail int // successive fail calls for leader
 22 |     clientId int32 // client id, init by clientIdGen
 23 |     reqId int64 // req id
 24 | }
 25 | 
 26 | func nrand() int64 {
 27 |     max := big.NewInt(int64(1) << 62)
 28 |     bigx, _ := rand.Int(rand.Reader, max)
 29 |     x := bigx.Int64()
 30 |     return x
 31 | }
 32 | 
 33 | func MakeClerk(servers []*labrpc.ClientEnd) *Clerk {
 34 |     ck := new(Clerk)
 35 |     ck.servers = servers
 36 |     // Your code here.
 37 |     ck.leader = 0
 38 |     ck.clientId = atomic.AddInt32(&clientIdGen, 1)
 39 |     ck.reqId = 1
 40 |     return ck
 41 | }
 42 | 
 43 | func (ck *Clerk) Query(num int) Config {
 44 |     args := &QueryArgs{}
 45 |     // Your code here.
 46 |     args.Num = num
 47 |     fail := 0
 48 |     for {
 49 |         reply := new(QueryReply)
 50 |         var done = make(chan bool)
 51 |         go func(leader int) {
 52 |             ok := ck.servers[leader].Call("ShardMaster.Query", args, reply)
 53 |             done<-ok
 54 |         }(ck.leader)
 55 | 
 56 |         var ok = true
 57 |         var timeout = false
 58 |         select {
 59 |         case <-time.After(RpcTimeout):
 60 |             timeout = true
 61 | 
 62 |         case ok = <-done:
 63 |         }
 64 | 
 65 |         if !timeout && ok && !reply.WrongLeader {
 66 |             return reply.Config
 67 |         } else {
 68 |             fail++
 69 |             if timeout || reply.WrongLeader || fail >= maxTry {
 70 |                 fail = 0
 71 |                 ck.leader++
 72 |                 if ck.leader >= len(ck.servers) {
 73 |                     ck.leader = 0
 74 |                 }
 75 |             }
 76 |         }
 77 | 
 78 |         time.Sleep(50 * time.Millisecond)
 79 |         if fail == 0 {
 80 |             DPrintf("[client %d] retry QUERY to another server %d\n", ck.clientId, ck.leader)
 81 |         }
 82 |     }
 83 | }
 84 | 
 85 | func (ck *Clerk) Join(servers map[int][]string) {
 86 |     args := &JoinArgs{}
 87 |     // Your code here.
 88 |     args.Servers = servers
 89 |     args.ReqID = ck.reqId
 90 |     ck.reqId++
 91 |     args.ID = ck.clientId
 92 | 
 93 |     fail := 0
 94 |     for {
 95 |         reply := new(QueryReply)
 96 |         var done = make(chan bool)
 97 |         go func(leader int) {
 98 |             ok := ck.servers[leader].Call("ShardMaster.Join", args, reply)
 99 |             done<-ok
100 |         }(ck.leader)
101 | 
102 |         var ok = true
103 |         var timeout = false
104 |         select {
105 |         case <-time.After(RpcTimeout):
106 |             timeout = true
107 | 
108 |         case ok = <-done:
109 |         }
110 | 
111 |         if !timeout && ok && !reply.WrongLeader {
112 |             return
113 |         } else {
114 |             fail++
115 |             if timeout || reply.WrongLeader || fail >= maxTry {
116 |                 fail = 0
117 |                 ck.leader++
118 |                 if ck.leader >= len(ck.servers) {
119 |                     ck.leader = 0
120 |                 }
121 |             }
122 |         }
123 | 
124 |         time.Sleep(50 * time.Millisecond)
125 |         if fail == 0 {
126 |             DPrintf("[client %d] retry QUERY to another server %d\n", ck.clientId, ck.leader)
127 |         }
128 |     }
129 | }
130 | 
131 | func (ck *Clerk) Leave(gids []int) {
132 |     args := &LeaveArgs{}
133 |     // Your code here.
134 |     args.GIDs = gids
135 |     args.ReqID = ck.reqId
136 |     ck.reqId++
137 |     args.ID = ck.clientId
138 | 
139 |     fail := 0
140 |     for {
141 |         reply := new(QueryReply)
142 |         var done = make(chan bool)
143 |         go func(leader int) {
144 |             ok := ck.servers[leader].Call("ShardMaster.Leave", args, reply)
145 |             done<-ok
146 |         }(ck.leader)
147 | 
148 |         var ok = true
149 |         var timeout = false
150 |         select {
151 |         case <-time.After(RpcTimeout):
152 |             timeout = true
153 | 
154 |         case ok = <-done:
155 |         }
156 | 
157 |         if !timeout && ok && !reply.WrongLeader {
158 |             return
159 |         } else {
160 |             fail++
161 |             if timeout || reply.WrongLeader || fail >= maxTry {
162 |                 fail = 0
163 |                 ck.leader++
164 |                 if ck.leader >= len(ck.servers) {
165 |                     ck.leader = 0
166 |                 }
167 |             }
168 |         }
169 | 
170 |         time.Sleep(50 * time.Millisecond)
171 |         if fail == 0 {
172 |             DPrintf("[client %d] retry QUERY to another server %d\n", ck.clientId, ck.leader)
173 |         }
174 |     }
175 | }
176 | 
177 | func (ck *Clerk) Move(shard int, gid int) {
178 |     args := &MoveArgs{}
179 |     // Your code here.
180 |     args.Shard = shard
181 |     args.GID = gid
182 |     args.ReqID = ck.reqId
183 |     ck.reqId++
184 |     args.ID = ck.clientId
185 | 
186 |     fail := 0
187 |     for {
188 |         reply := new(QueryReply)
189 |         var done = make(chan bool)
190 |         go func(leader int) {
191 |             ok := ck.servers[leader].Call("ShardMaster.Move", args, reply)
192 |             done<-ok
193 |         }(ck.leader)
194 | 
195 |         var ok = true
196 |         var timeout = false
197 |         select {
198 |         case <-time.After(RpcTimeout):
199 |             timeout = true
200 | 
201 |         case ok = <-done:
202 |         }
203 | 
204 |         if !timeout && ok && !reply.WrongLeader {
205 |             return
206 |         } else {
207 |             fail++
208 |             if timeout || reply.WrongLeader || fail >= maxTry {
209 |                 fail = 0
210 |                 ck.leader++
211 |                 if ck.leader >= len(ck.servers) {
212 |                     ck.leader = 0
213 |                 }
214 |             }
215 |         }
216 | 
217 |         time.Sleep(50 * time.Millisecond)
218 |         if fail == 0 {
219 |             DPrintf("[client %d] retry QUERY to another server %d\n", ck.clientId, ck.leader)
220 |         }
221 |     }
222 | }
223 | 
224 | 


--------------------------------------------------------------------------------
/src/shardmaster/common.go:
--------------------------------------------------------------------------------
  1 | package shardmaster
  2 | 
  3 | import "time"
  4 | import "strconv"
  5 | 
  6 | //
  7 | // Master shard server: assigns shards to replication groups.
  8 | //
  9 | // RPC interface:
 10 | // Join(servers) -- add a set of groups (gid -> server-list mapping).
 11 | // Leave(gids) -- delete a set of groups.
 12 | // Move(shard, gid) -- hand off one shard from current owner to gid.
 13 | // Query(num) -> fetch Config # num, or latest config if num==-1.
 14 | //
 15 | // A Config (configuration) describes a set of replica groups, and the
 16 | // replica group responsible for each shard. Configs are numbered. Config
 17 | // #0 is the initial configuration, with no groups and all shards
 18 | // assigned to group 0 (the invalid group).
 19 | //
 20 | // You will need to add fields to the RPC argument structs.
 21 | //
 22 | 
 23 | // The number of shards.
 24 | const NShards = 10
 25 | 
 26 | // A configuration -- an assignment of shards to groups.
 27 | // Please don't change this.
 28 | type Config struct {
 29 | 	Num    int              // config number
 30 | 	Shards [NShards]int     // shard -> gid
 31 | 	Groups map[int][]string // gid -> servers[]
 32 | }
 33 | 
 34 | func (cfg *Config) String() string {
 35 |     s := "Num:" + strconv.Itoa(cfg.Num) + "\n"
 36 |     s += "shard->gid:\n"
 37 |     for shard, gid := range cfg.Shards {
 38 |         s += strconv.Itoa(shard) + "->" + strconv.Itoa(gid) + "\n"
 39 |     }
 40 |     s += "gid->nservers:\n"
 41 |     for gid, ss := range cfg.Groups {
 42 |         s += strconv.Itoa(gid) + " with nservers " + strconv.Itoa(len(ss)) + "\n"
 43 |     }
 44 |     return s
 45 | }
 46 | 
 47 | const (
 48 | 	OK = "OK"
 49 |     ErrNotLeader = "ErrNotLeader"
 50 |     ErrDuplicateReq = "ErrDuplicateReq"
 51 | )
 52 | 
 53 | type Err string
 54 | 
 55 | type JoinArgs struct {
 56 | 	Servers map[int][]string // new GID -> servers mappings
 57 | 
 58 |     ID int32 // client id
 59 |     ReqID int64
 60 | }
 61 | 
 62 | type JoinReply struct {
 63 | 	WrongLeader bool
 64 | 	Err         Err
 65 | 
 66 |     ID int32 // client id
 67 |     RspID int64
 68 | }
 69 | 
 70 | type LeaveArgs struct {
 71 | 	GIDs []int
 72 | 
 73 |     ID int32 // client id
 74 |     ReqID int64
 75 | }
 76 | 
 77 | type LeaveReply struct {
 78 | 	WrongLeader bool
 79 | 	Err         Err
 80 |     ID int32 // client id
 81 |     RspID int64
 82 | }
 83 | 
 84 | type MoveArgs struct {
 85 | 	Shard int
 86 | 	GID   int
 87 | 
 88 |     ID int32 // client id
 89 |     ReqID int64
 90 | }
 91 | 
 92 | type MoveReply struct {
 93 | 	WrongLeader bool
 94 | 	Err         Err
 95 | 
 96 |     ID int32 // client id
 97 |     RspID int64
 98 | }
 99 | 
100 | type QueryArgs struct {
101 | 	Num int // desired config number
102 | }
103 | 
104 | type QueryReply struct {
105 | 	WrongLeader bool
106 | 	Err         Err
107 | 	Config      Config
108 | }
109 | 
110 | const RpcTimeout time.Duration = 1000 * time.Millisecond
111 | 
112 | 


--------------------------------------------------------------------------------
/src/shardmaster/config.go:
--------------------------------------------------------------------------------
  1 | package shardmaster
  2 | 
  3 | import "labrpc"
  4 | import "raft"
  5 | import "testing"
  6 | import "os"
  7 | 
  8 | // import "log"
  9 | import crand "crypto/rand"
 10 | import "math/rand"
 11 | import "encoding/base64"
 12 | import "sync"
 13 | import "runtime"
 14 | import "time"
 15 | 
 16 | func randstring(n int) string {
 17 | 	b := make([]byte, 2*n)
 18 | 	crand.Read(b)
 19 | 	s := base64.URLEncoding.EncodeToString(b)
 20 | 	return s[0:n]
 21 | }
 22 | 
 23 | // Randomize server handles
 24 | func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd {
 25 | 	sa := make([]*labrpc.ClientEnd, len(kvh))
 26 | 	copy(sa, kvh)
 27 | 	for i := range sa {
 28 | 		j := rand.Intn(i + 1)
 29 | 		sa[i], sa[j] = sa[j], sa[i]
 30 | 	}
 31 | 	return sa
 32 | }
 33 | 
 34 | type config struct {
 35 | 	mu           sync.Mutex
 36 | 	t            *testing.T
 37 | 	net          *labrpc.Network
 38 | 	n            int
 39 | 	servers      []*ShardMaster
 40 | 	saved        []*raft.Persister
 41 | 	endnames     [][]string // names of each server's sending ClientEnds
 42 | 	clerks       map[*Clerk][]string
 43 | 	nextClientId int
 44 | 	start        time.Time // time at which make_config() was called
 45 | }
 46 | 
 47 | func (cfg *config) checkTimeout() {
 48 | 	// enforce a two minute real-time limit on each test
 49 | 	if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second {
 50 | 		cfg.t.Fatal("test took longer than 120 seconds")
 51 | 	}
 52 | }
 53 | 
 54 | func (cfg *config) cleanup() {
 55 | 	cfg.mu.Lock()
 56 | 	defer cfg.mu.Unlock()
 57 | 	for i := 0; i < len(cfg.servers); i++ {
 58 | 		if cfg.servers[i] != nil {
 59 | 			cfg.servers[i].Kill()
 60 | 		}
 61 | 	}
 62 | 	cfg.net.Cleanup()
 63 | 	cfg.checkTimeout()
 64 | }
 65 | 
 66 | // Maximum log size across all servers
 67 | func (cfg *config) LogSize() int {
 68 | 	logsize := 0
 69 | 	for i := 0; i < cfg.n; i++ {
 70 | 		n := cfg.saved[i].RaftStateSize()
 71 | 		if n > logsize {
 72 | 			logsize = n
 73 | 		}
 74 | 	}
 75 | 	return logsize
 76 | }
 77 | 
 78 | // attach server i to servers listed in to
 79 | // caller must hold cfg.mu
 80 | func (cfg *config) connectUnlocked(i int, to []int) {
 81 | 	// log.Printf("connect peer %d to %v\n", i, to)
 82 | 
 83 | 	// outgoing socket files
 84 | 	for j := 0; j < len(to); j++ {
 85 | 		endname := cfg.endnames[i][to[j]]
 86 | 		cfg.net.Enable(endname, true)
 87 | 	}
 88 | 
 89 | 	// incoming socket files
 90 | 	for j := 0; j < len(to); j++ {
 91 | 		endname := cfg.endnames[to[j]][i]
 92 | 		cfg.net.Enable(endname, true)
 93 | 	}
 94 | }
 95 | 
 96 | func (cfg *config) connect(i int, to []int) {
 97 | 	cfg.mu.Lock()
 98 | 	defer cfg.mu.Unlock()
 99 | 	cfg.connectUnlocked(i, to)
100 | }
101 | 
102 | // detach server i from the servers listed in from
103 | // caller must hold cfg.mu
104 | func (cfg *config) disconnectUnlocked(i int, from []int) {
105 | 	// log.Printf("disconnect peer %d from %v\n", i, from)
106 | 
107 | 	// outgoing socket files
108 | 	for j := 0; j < len(from); j++ {
109 | 		if cfg.endnames[i] != nil {
110 | 			endname := cfg.endnames[i][from[j]]
111 | 			cfg.net.Enable(endname, false)
112 | 		}
113 | 	}
114 | 
115 | 	// incoming socket files
116 | 	for j := 0; j < len(from); j++ {
117 | 		if cfg.endnames[j] != nil {
118 | 			endname := cfg.endnames[from[j]][i]
119 | 			cfg.net.Enable(endname, false)
120 | 		}
121 | 	}
122 | }
123 | 
124 | func (cfg *config) disconnect(i int, from []int) {
125 | 	cfg.mu.Lock()
126 | 	defer cfg.mu.Unlock()
127 | 	cfg.disconnectUnlocked(i, from)
128 | }
129 | 
130 | func (cfg *config) All() []int {
131 | 	all := make([]int, cfg.n)
132 | 	for i := 0; i < cfg.n; i++ {
133 | 		all[i] = i
134 | 	}
135 | 	return all
136 | }
137 | 
138 | func (cfg *config) ConnectAll() {
139 | 	cfg.mu.Lock()
140 | 	defer cfg.mu.Unlock()
141 | 	for i := 0; i < cfg.n; i++ {
142 | 		cfg.connectUnlocked(i, cfg.All())
143 | 	}
144 | }
145 | 
146 | // Sets up 2 partitions with connectivity between servers in each  partition.
147 | func (cfg *config) partition(p1 []int, p2 []int) {
148 | 	cfg.mu.Lock()
149 | 	defer cfg.mu.Unlock()
150 | 	// log.Printf("partition servers into: %v %v\n", p1, p2)
151 | 	for i := 0; i < len(p1); i++ {
152 | 		cfg.disconnectUnlocked(p1[i], p2)
153 | 		cfg.connectUnlocked(p1[i], p1)
154 | 	}
155 | 	for i := 0; i < len(p2); i++ {
156 | 		cfg.disconnectUnlocked(p2[i], p1)
157 | 		cfg.connectUnlocked(p2[i], p2)
158 | 	}
159 | }
160 | 
161 | // Create a clerk with clerk specific server names.
162 | // Give it connections to all of the servers, but for
163 | // now enable only connections to servers in to[].
164 | func (cfg *config) makeClient(to []int) *Clerk {
165 | 	cfg.mu.Lock()
166 | 	defer cfg.mu.Unlock()
167 | 
168 | 	// a fresh set of ClientEnds.
169 | 	ends := make([]*labrpc.ClientEnd, cfg.n)
170 | 	endnames := make([]string, cfg.n)
171 | 	for j := 0; j < cfg.n; j++ {
172 | 		endnames[j] = randstring(20)
173 | 		ends[j] = cfg.net.MakeEnd(endnames[j])
174 | 		cfg.net.Connect(endnames[j], j)
175 | 	}
176 | 
177 | 	ck := MakeClerk(random_handles(ends))
178 | 	cfg.clerks[ck] = endnames
179 | 	cfg.nextClientId++
180 | 	cfg.ConnectClientUnlocked(ck, to)
181 | 	return ck
182 | }
183 | 
184 | func (cfg *config) deleteClient(ck *Clerk) {
185 | 	cfg.mu.Lock()
186 | 	defer cfg.mu.Unlock()
187 | 
188 | 	v := cfg.clerks[ck]
189 | 	for i := 0; i < len(v); i++ {
190 | 		os.Remove(v[i])
191 | 	}
192 | 	delete(cfg.clerks, ck)
193 | }
194 | 
195 | // caller should hold cfg.mu
196 | func (cfg *config) ConnectClientUnlocked(ck *Clerk, to []int) {
197 | 	// log.Printf("ConnectClient %v to %v\n", ck, to)
198 | 	endnames := cfg.clerks[ck]
199 | 	for j := 0; j < len(to); j++ {
200 | 		s := endnames[to[j]]
201 | 		cfg.net.Enable(s, true)
202 | 	}
203 | }
204 | 
205 | func (cfg *config) ConnectClient(ck *Clerk, to []int) {
206 | 	cfg.mu.Lock()
207 | 	defer cfg.mu.Unlock()
208 | 	cfg.ConnectClientUnlocked(ck, to)
209 | }
210 | 
211 | // caller should hold cfg.mu
212 | func (cfg *config) DisconnectClientUnlocked(ck *Clerk, from []int) {
213 | 	// log.Printf("DisconnectClient %v from %v\n", ck, from)
214 | 	endnames := cfg.clerks[ck]
215 | 	for j := 0; j < len(from); j++ {
216 | 		s := endnames[from[j]]
217 | 		cfg.net.Enable(s, false)
218 | 	}
219 | }
220 | 
221 | func (cfg *config) DisconnectClient(ck *Clerk, from []int) {
222 | 	cfg.mu.Lock()
223 | 	defer cfg.mu.Unlock()
224 | 	cfg.DisconnectClientUnlocked(ck, from)
225 | }
226 | 
227 | // Shutdown a server by isolating it
228 | func (cfg *config) ShutdownServer(i int) {
229 | 	cfg.mu.Lock()
230 | 	defer cfg.mu.Unlock()
231 | 
232 | 	cfg.disconnectUnlocked(i, cfg.All())
233 | 
234 | 	// disable client connections to the server.
235 | 	// it's important to do this before creating
236 | 	// the new Persister in saved[i], to avoid
237 | 	// the possibility of the server returning a
238 | 	// positive reply to an Append but persisting
239 | 	// the result in the superseded Persister.
240 | 	cfg.net.DeleteServer(i)
241 | 
242 | 	// a fresh persister, in case old instance
243 | 	// continues to update the Persister.
244 | 	// but copy old persister's content so that we always
245 | 	// pass Make() the last persisted state.
246 | 	if cfg.saved[i] != nil {
247 | 		cfg.saved[i] = cfg.saved[i].Copy()
248 | 	}
249 | 
250 | 	kv := cfg.servers[i]
251 | 	if kv != nil {
252 | 		cfg.mu.Unlock()
253 | 		kv.Kill()
254 | 		cfg.mu.Lock()
255 | 		cfg.servers[i] = nil
256 | 	}
257 | }
258 | 
259 | // If restart servers, first call ShutdownServer
260 | func (cfg *config) StartServer(i int) {
261 | 	cfg.mu.Lock()
262 | 
263 | 	// a fresh set of outgoing ClientEnd names.
264 | 	cfg.endnames[i] = make([]string, cfg.n)
265 | 	for j := 0; j < cfg.n; j++ {
266 | 		cfg.endnames[i][j] = randstring(20)
267 | 	}
268 | 
269 | 	// a fresh set of ClientEnds.
270 | 	ends := make([]*labrpc.ClientEnd, cfg.n)
271 | 	for j := 0; j < cfg.n; j++ {
272 | 		ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j])
273 | 		cfg.net.Connect(cfg.endnames[i][j], j)
274 | 	}
275 | 
276 | 	// a fresh persister, so old instance doesn't overwrite
277 | 	// new instance's persisted state.
278 | 	// give the fresh persister a copy of the old persister's
279 | 	// state, so that the spec is that we pass StartKVServer()
280 | 	// the last persisted state.
281 | 	if cfg.saved[i] != nil {
282 | 		cfg.saved[i] = cfg.saved[i].Copy()
283 | 	} else {
284 | 		cfg.saved[i] = raft.MakePersister()
285 | 	}
286 | 
287 | 	cfg.mu.Unlock()
288 | 
289 | 	cfg.servers[i] = StartServer(ends, i, cfg.saved[i])
290 | 
291 | 	kvsvc := labrpc.MakeService(cfg.servers[i])
292 | 	rfsvc := labrpc.MakeService(cfg.servers[i].rf)
293 | 	srv := labrpc.MakeServer()
294 | 	srv.AddService(kvsvc)
295 | 	srv.AddService(rfsvc)
296 | 	cfg.net.AddServer(i, srv)
297 | }
298 | 
299 | func (cfg *config) Leader() (bool, int) {
300 | 	cfg.mu.Lock()
301 | 	defer cfg.mu.Unlock()
302 | 
303 | 	for i := 0; i < cfg.n; i++ {
304 | 		_, is_leader := cfg.servers[i].rf.GetState()
305 | 		if is_leader {
306 | 			return true, i
307 | 		}
308 | 	}
309 | 	return false, 0
310 | }
311 | 
312 | // Partition servers into 2 groups and put current leader in minority
313 | func (cfg *config) make_partition() ([]int, []int) {
314 | 	_, l := cfg.Leader()
315 | 	p1 := make([]int, cfg.n/2+1)
316 | 	p2 := make([]int, cfg.n/2)
317 | 	j := 0
318 | 	for i := 0; i < cfg.n; i++ {
319 | 		if i != l {
320 | 			if j < len(p1) {
321 | 				p1[j] = i
322 | 			} else {
323 | 				p2[j-len(p1)] = i
324 | 			}
325 | 			j++
326 | 		}
327 | 	}
328 | 	p2[len(p2)-1] = l
329 | 	return p1, p2
330 | }
331 | 
332 | func make_config(t *testing.T, n int, unreliable bool) *config {
333 | 	runtime.GOMAXPROCS(4)
334 | 	cfg := &config{}
335 | 	cfg.t = t
336 | 	cfg.net = labrpc.MakeNetwork()
337 | 	cfg.n = n
338 | 	cfg.servers = make([]*ShardMaster, cfg.n)
339 | 	cfg.saved = make([]*raft.Persister, cfg.n)
340 | 	cfg.endnames = make([][]string, cfg.n)
341 | 	cfg.clerks = make(map[*Clerk][]string)
342 | 	cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid
343 | 	cfg.start = time.Now()
344 | 
345 | 	// create a full set of KV servers.
346 | 	for i := 0; i < cfg.n; i++ {
347 | 		cfg.StartServer(i)
348 | 	}
349 | 
350 | 	cfg.ConnectAll()
351 | 
352 | 	cfg.net.Reliable(!unreliable)
353 | 
354 | 	return cfg
355 | }
356 | 


--------------------------------------------------------------------------------
/src/shardmaster/server.go:
--------------------------------------------------------------------------------
  1 | package shardmaster
  2 | 
  3 | 
  4 | import "raft"
  5 | import "labrpc"
  6 | import "labgob"
  7 | import "sync"
  8 | import "log"
  9 | import "strconv"
 10 | 
 11 | const Debug = 0
 12 | 
 13 | func DPrintf(format string, a ...interface{}) (n int, err error) {
 14 |     if Debug > 0 {
 15 |         log.Printf(format, a...)
 16 |     }
 17 |     return
 18 | }
 19 | 
 20 | 
 21 | type ShardMaster struct {
 22 |     mu      sync.Mutex
 23 |     me      int
 24 |     rf      *raft.Raft
 25 |     applyCh chan raft.ApplyMsg
 26 | 
 27 |     configs []Config // indexed by config num
 28 | 
 29 |     // Your data here.
 30 | 
 31 |     // Notify chan for each log index
 32 |     notifyCh map[int]chan Response
 33 |     // request records
 34 |     requests map[int32]int64 // client -> last commited reqID
 35 | 
 36 |     // for exit
 37 |     shutdown chan interface{}
 38 | }
 39 | 
 40 | // real Command
 41 | type Op struct {
 42 |     // Your data here. like union
 43 |     Operation string // join/leave/move/query
 44 | 
 45 |     //join
 46 |     Servers map[int][]string // new GID -> servers mappings
 47 | 
 48 |     //leave
 49 |     GIDs []int
 50 | 
 51 |     //move
 52 |     Shard int
 53 |     GID   int
 54 | 
 55 |     // query
 56 |     Num int // desired config number
 57 | 
 58 |     ID int32 // client id
 59 |     ReqID int64
 60 | }
 61 | 
 62 | type Response struct {
 63 |     WrongLeader bool
 64 |     Err         Err
 65 |     Config      Config
 66 | 
 67 |     ID int32 // client id
 68 |     ReqID int64
 69 | }
 70 | 
 71 | func (sm *ShardMaster) String() string {
 72 |     s := "[master_" + strconv.Itoa(sm.me) + "]:\n"
 73 |     for _, cfg := range sm.configs {
 74 |         s += cfg.String()
 75 |     }
 76 |     return s
 77 | }
 78 | 
 79 | // check if repeated request
 80 | func (sm *ShardMaster) isDuplicated(id int32, reqId int64) bool {
 81 |     sm.mu.Lock()
 82 |     defer sm.mu.Unlock()
 83 |     maxSeenReqId, ok := sm.requests[id]
 84 |     if ok {
 85 |         return reqId <= maxSeenReqId
 86 |     }
 87 |     return false
 88 | }
 89 | 
 90 | // true if update success, imply nonrepeat request can be applied to state machine: eg, data field
 91 | func (sm *ShardMaster) updateIfNotDuplicated(id int32, reqId int64) bool {
 92 |     // must hold lock outside
 93 |     maxSeenReqId, ok := sm.requests[id]
 94 |     if ok {
 95 |         if reqId <= maxSeenReqId {
 96 |             return false
 97 |         }
 98 |     }
 99 | 
100 |     sm.requests[id] = reqId
101 |     return true
102 | }
103 | 
104 | // call raft.Start to commit a command as log entry
105 | func (sm *ShardMaster) proposeCommand(cmd Op) (bool, *Response) {
106 |     logIndex, _, isLeader := sm.rf.Start(cmd)
107 |     if !isLeader {
108 |         //DPrintf("[master %d] proposeCommand %d but not leader", sm.me, cmd.ReqID)
109 |         return false, nil
110 |     }
111 | 
112 |     DPrintf("[master %d] proposeCommand %d, %s, logIdx %d", sm.me, cmd.ReqID, cmd.Operation, logIndex)
113 | 
114 |     // wait command to be commited
115 |     sm.mu.Lock()
116 |     // use logIndex because all servers agree on same log index
117 |     ch, ok := sm.notifyCh[logIndex]
118 |     if !ok {
119 |         ch = make(chan Response, 1)
120 |         sm.notifyCh[logIndex] = ch
121 |     }
122 |     sm.mu.Unlock()
123 | 
124 |     // check
125 |     if ch == nil {
126 |         panic("FATAL: chan is nil")
127 |     }
128 | 
129 |     // wait on ch forever, because:
130 |     // If I lose leadership before commit, may be partioned
131 |     // I can't response, so wait until partion healed.
132 |     // Eventually a log will be commited on index, then I'm
133 |     // awaken, but cmd1 is different from cmd, return failed
134 |     // to client.
135 |     // If client retry another leader when I waiting, no matter.
136 |     select {
137 |     case rsp := <-ch:
138 |         return rsp.ID == cmd.ID && rsp.ReqID == cmd.ReqID, &rsp
139 |         //return cmd1 == cmd // if different log, me is not leader
140 |     }
141 | 
142 |     return false, nil
143 | }
144 | 
145 | 
146 | func (sm *ShardMaster) Join(args *JoinArgs, reply *JoinReply) {
147 |     // Your code here.
148 |     reply.ID = args.ID
149 |     reply.RspID = args.ReqID
150 |     reply.WrongLeader = false
151 |     reply.Err = ""
152 | 
153 |     if len(args.Servers) == 0 {
154 |         return
155 |     }
156 | 
157 |     for gid, _ := range args.Servers {
158 |         if gid == 0 {
159 |             return
160 |         }
161 |     }
162 | 
163 |     DPrintf("[master %d] JoinRPC args %v", sm.me, args)
164 | 
165 |     // check if repeated request, useless but efficient
166 |     duplicate := sm.isDuplicated(args.ID, args.ReqID)
167 |     if duplicate {
168 |         reply.Err = ErrDuplicateReq
169 |         return
170 |     }
171 | 
172 |     cmd := Op{}
173 |     cmd.Operation = "join"
174 |     cmd.Servers = args.Servers
175 |     cmd.ID = args.ID
176 |     cmd.ReqID = args.ReqID
177 | 
178 |     succ, _ := sm.proposeCommand(cmd)
179 |     if !succ {
180 |         reply.WrongLeader = true
181 |         reply.Err = ErrNotLeader
182 |     }
183 | }
184 | 
185 | func (sm *ShardMaster) Leave(args *LeaveArgs, reply *LeaveReply) {
186 |     // Your code here.
187 |     reply.ID = args.ID
188 |     reply.RspID = args.ReqID
189 | 
190 |     if len(args.GIDs) == 0 {
191 |         return
192 |     }
193 | 
194 |     reply.WrongLeader = false
195 |     reply.Err = ""
196 | 
197 |     // check if repeated request, useless but efficient
198 |     duplicate := sm.isDuplicated(args.ID, args.ReqID)
199 |     if duplicate {
200 |         reply.Err = ErrDuplicateReq
201 |         return
202 |     }
203 | 
204 |     DPrintf("[master %d] LeaveRPC args %v", sm.me, args)
205 | 
206 |     cmd := Op{}
207 |     cmd.Operation = "leave"
208 |     cmd.GIDs = args.GIDs
209 |     cmd.ID = args.ID
210 |     cmd.ReqID = args.ReqID
211 | 
212 |     succ, _ := sm.proposeCommand(cmd)
213 |     if !succ {
214 |         reply.WrongLeader = true
215 |         reply.Err = ErrNotLeader
216 |     }
217 | }
218 | 
219 | func (sm *ShardMaster) Move(args *MoveArgs, reply *MoveReply) {
220 |     // Your code here.
221 |     reply.ID = args.ID
222 |     reply.RspID = args.ReqID
223 |     reply.WrongLeader = false
224 |     reply.Err = ""
225 | 
226 |     // check if repeated request, useless but efficient
227 |     duplicate := sm.isDuplicated(args.ID, args.ReqID)
228 |     if duplicate {
229 |         reply.Err = ErrDuplicateReq
230 |         return
231 |     }
232 | 
233 |     DPrintf("[master %d] MoveRPC args %v", sm.me, args)
234 | 
235 |     cmd := Op{}
236 |     cmd.Operation = "move"
237 |     cmd.Shard = args.Shard
238 |     cmd.GID = args.GID
239 |     cmd.ID = args.ID
240 |     cmd.ReqID = args.ReqID
241 | 
242 |     succ, _ := sm.proposeCommand(cmd)
243 |     if !succ {
244 |         reply.WrongLeader = true
245 |         reply.Err = ErrNotLeader
246 |     }
247 | }
248 | 
249 | func (sm *ShardMaster) Query(args *QueryArgs, reply *QueryReply) {
250 |     // Your code here.
251 |     reply.WrongLeader = false
252 |     reply.Err = ""
253 | 
254 |     cmd := Op{}
255 |     cmd.Operation = "query"
256 |     cmd.Num = args.Num
257 | 
258 |     succ, rsp := sm.proposeCommand(cmd)
259 |     if !succ {
260 |         reply.WrongLeader = true
261 |         reply.Err = ErrNotLeader
262 |     } else {
263 |         reply.Config = rsp.Config
264 |     }
265 | }
266 | 
267 | func (sm* ShardMaster) copyLastConfig() Config {
268 |     cfg := Config{}
269 |     cfg.Num = sm.configs[len(sm.configs)-1].Num + 1
270 |     cfg.Shards = sm.configs[len(sm.configs)-1].Shards
271 |     cfg.Groups = make(map[int][]string)
272 |     for k, v := range sm.configs[len(sm.configs)-1].Groups {
273 |         var servers = make([]string, len(v))
274 |         copy(servers, v)
275 |         cfg.Groups[k] = servers
276 |     }
277 | 
278 |     return cfg;
279 | }
280 | 
281 | func (sm* ShardMaster) rebalance() {
282 |     cfg := &sm.configs[len(sm.configs)-1]
283 | 
284 |     if len(cfg.Groups) == 0 {
285 |         return
286 |     }
287 | 
288 |     //1. make gid --> shard count
289 |     gidShards := make(map[int]int)
290 |     allShardsAllocated := true
291 |     for _, gid := range cfg.Shards {
292 |         if gid != 0 {
293 |             gidShards[gid] += 1
294 |         } else {
295 |             allShardsAllocated = false
296 |         }
297 |     }
298 | 
299 |     minGid, min := 0, NShards + 1
300 |     maxGid, max := 0, 0
301 |     nGidAllocated := 0
302 |     // 2. some gid is not allocated, set to 0
303 |     for gid, _ := range cfg.Groups {
304 |         if gidShards[gid] == 0 {
305 |             gidShards[gid] += 0
306 |         } else {
307 |             nGidAllocated++
308 |         }
309 | 
310 |         if gidShards[gid] > max {
311 |             max = gidShards[gid]
312 |             maxGid = gid
313 |         }
314 |         if gidShards[gid] < min {
315 |             min = gidShards[gid]
316 |             minGid = gid
317 |         }
318 |     }
319 | 
320 |     allGidAllocated := false
321 |     if nGidAllocated == len(cfg.Groups) || nGidAllocated == NShards {
322 |         allGidAllocated = true
323 |     }
324 | 
325 |     if allShardsAllocated && allGidAllocated && max - min <= 1 {
326 |         return
327 |     }
328 | 
329 |     if allShardsAllocated {
330 |         for shard, gid := range cfg.Shards {
331 |             if gid == maxGid {
332 |                 cfg.Shards[shard] = minGid
333 |                 sm.rebalance()
334 |                 return
335 |             }
336 |         }
337 |     } else if allGidAllocated {
338 |         // try alloc empty shards to minGid
339 |         for shard, gid := range cfg.Shards {
340 |             if gid == 0 {
341 |                 cfg.Shards[shard] = minGid
342 |                 sm.rebalance()
343 |                 return
344 |             }
345 |         }
346 |     } else {
347 |         // try alloc empty shards to minGid
348 |         for shard, gid := range cfg.Shards {
349 |             if gid == 0 {
350 |                 cfg.Shards[shard] = minGid
351 |                 sm.rebalance()
352 |                 return
353 |             }
354 |         }
355 |     }
356 | }
357 | 
358 | // when raft commited a log entry, it'll notify me
359 | func (sm *ShardMaster) applyRoutine() {
360 |     for {
361 |         var op Op
362 |         var applyMsg raft.ApplyMsg
363 | 
364 |         select {
365 |         case <-sm.shutdown:
366 |             DPrintf("[master %d] shutdown applyRoutine", sm.me)
367 |             return
368 | 
369 |         case applyMsg = <-sm.applyCh:
370 |         }
371 | 
372 |         if !applyMsg.CommandValid {
373 |             panic("no snapshot for ShardMaster")
374 |         }
375 | 
376 |         op, _ = (applyMsg.Command).(Op)
377 |         rebalance := false
378 |         reply := Response{}
379 |         reply.ID = op.ID
380 |         reply.ReqID= op.ReqID
381 | 
382 |         sm.mu.Lock()
383 |         // Follower & Leader: try apply to state machine, fail if duplicated request
384 |         if op.Operation == "join" {
385 |             update := sm.updateIfNotDuplicated(op.ID, op.ReqID)
386 |             if update {
387 |                 cfg := sm.copyLastConfig()
388 |                 for k,v := range op.Servers {
389 |                     cfg.Groups[k] = v
390 |                 }
391 | 
392 |                 DPrintf("[master %d] apply for client %d Join logindex %d, new cfg %v", sm.me, op.ID, applyMsg.CommandIndex, cfg)
393 |                 sm.configs = append(sm.configs, cfg)
394 |                 rebalance = true
395 |             }
396 |         } else if op.Operation == "leave" {
397 |             update := sm.updateIfNotDuplicated(op.ID, op.ReqID)
398 |             if update {
399 |                 DPrintf("[master %d] apply for client %d Leave logindex %d", sm.me, op.ID, applyMsg.CommandIndex)
400 |                 cfg := sm.copyLastConfig()
401 |                 for _, id := range op.GIDs {
402 |                     delete (cfg.Groups, id)
403 |                     for shard, gid := range cfg.Shards {
404 |                         if gid == id {
405 |                             cfg.Shards[shard] = 0
406 |                         }
407 |                     }
408 |                 }
409 | 
410 |                 sm.configs = append(sm.configs, cfg)
411 |                 rebalance = true
412 |             }
413 |         } else if op.Operation == "move" {
414 |             update := sm.updateIfNotDuplicated(op.ID, op.ReqID)
415 |             if update {
416 |                 DPrintf("[master %d] apply for client %d Move logindex %d", sm.me, op.ID, applyMsg.CommandIndex)
417 |                 cfg := sm.copyLastConfig()
418 |                 cfg.Shards[op.Shard] = op.GID
419 | 
420 |                 sm.configs = append(sm.configs, cfg)
421 |             }
422 |         } else if op.Operation == "query" {
423 |             num := len(sm.configs) - 1
424 |             if op.Num >= 0 && op.Num < len(sm.configs) {
425 |                 num = op.Num
426 |             }
427 |             reply.Config = sm.configs[num]
428 |         }
429 | 
430 |         if rebalance {
431 |             sm.rebalance()
432 |         }
433 | 
434 |         ch, ok := sm.notifyCh[applyMsg.CommandIndex]
435 |         if ok {
436 |             ch <- reply
437 |         }
438 | 
439 |         sm.mu.Unlock()
440 |     }
441 | }
442 | 
443 | 
444 | //
445 | // the tester calls Kill() when a ShardMaster instance won't
446 | // be needed again. you are not required to do anything
447 | // in Kill(), but it might be convenient to (for example)
448 | // turn off debug output from this instance.
449 | //
450 | func (sm *ShardMaster) Kill() {
451 |     sm.rf.Kill()
452 |     // Your code here, if desired.
453 |     close(sm.shutdown)
454 | }
455 | 
456 | // needed by shardkv tester
457 | func (sm *ShardMaster) Raft() *raft.Raft {
458 |     return sm.rf
459 | }
460 | 
461 | //
462 | // servers[] contains the ports of the set of
463 | // servers that will cooperate via Paxos to
464 | // form the fault-tolerant shardmaster service.
465 | // me is the index of the current server in servers[].
466 | //
467 | func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister) *ShardMaster {
468 |     DPrintf("[master %d] StartServer", me)
469 |     sm := new(ShardMaster)
470 |     sm.me = me
471 | 
472 |     sm.configs = make([]Config, 1)
473 |     sm.configs[0].Groups = map[int][]string{}
474 | 
475 |     labgob.Register(Op{})
476 |     sm.applyCh = make(chan raft.ApplyMsg)
477 |     sm.rf = raft.Make(servers, me, persister, sm.applyCh)
478 | 
479 |     // Your code here.
480 |     sm.requests = make(map[int32]int64)
481 |     sm.notifyCh = make(map[int]chan Response)
482 |     sm.shutdown = make(chan interface{}, 1)
483 | 
484 |     go sm.applyRoutine()
485 | 
486 |     return sm
487 | }
488 | 


--------------------------------------------------------------------------------
/src/shardmaster/test_test.go:
--------------------------------------------------------------------------------
  1 | package shardmaster
  2 | 
  3 | import (
  4 | 	"sync"
  5 | 	"testing"
  6 | )
  7 | 
  8 | // import "time"
  9 | import "fmt"
 10 | 
 11 | func check(t *testing.T, groups []int, ck *Clerk) {
 12 | 	c := ck.Query(-1)
 13 | 	if len(c.Groups) != len(groups) {
 14 | 		t.Fatalf("wanted %v groups, got %v", len(groups), len(c.Groups))
 15 | 	}
 16 | 
 17 | 	// are the groups as expected?
 18 | 	for _, g := range groups {
 19 | 		_, ok := c.Groups[g]
 20 | 		if ok != true {
 21 | 			t.Fatalf("missing group %v", g)
 22 | 		}
 23 | 	}
 24 | 
 25 | 	// any un-allocated shards?
 26 | 	if len(groups) > 0 {
 27 | 		for s, g := range c.Shards {
 28 | 			_, ok := c.Groups[g]
 29 | 			if ok == false {
 30 | 				t.Fatalf("shard %v -> invalid group %v", s, g)
 31 | 			}
 32 | 		}
 33 | 	}
 34 | 
 35 | 	// more or less balanced sharding?
 36 | 	counts := map[int]int{}
 37 | 	for _, g := range c.Shards {
 38 | 		counts[g] += 1
 39 | 	}
 40 | 	min := 257
 41 | 	max := 0
 42 | 	for g, _ := range c.Groups {
 43 | 		if counts[g] > max {
 44 | 			max = counts[g]
 45 | 		}
 46 | 		if counts[g] < min {
 47 | 			min = counts[g]
 48 | 		}
 49 | 	}
 50 | 	if max > min+1 {
 51 | 		t.Fatalf("max %v too much larger than min %v", max, min)
 52 | 	}
 53 | }
 54 | 
 55 | func check_same_config(t *testing.T, c1 Config, c2 Config) {
 56 | 	if c1.Num != c2.Num {
 57 | 		t.Fatalf("Num wrong")
 58 | 	}
 59 | 	if c1.Shards != c2.Shards {
 60 | 		t.Fatalf("Shards wrong")
 61 | 	}
 62 | 	if len(c1.Groups) != len(c2.Groups) {
 63 | 		t.Fatalf("number of Groups is wrong")
 64 | 	}
 65 | 	for gid, sa := range c1.Groups {
 66 | 		sa1, ok := c2.Groups[gid]
 67 | 		if ok == false || len(sa1) != len(sa) {
 68 | 			t.Fatalf("len(Groups) wrong")
 69 | 		}
 70 | 		if ok && len(sa1) == len(sa) {
 71 | 			for j := 0; j < len(sa); j++ {
 72 | 				if sa[j] != sa1[j] {
 73 | 					t.Fatalf("Groups wrong")
 74 | 				}
 75 | 			}
 76 | 		}
 77 | 	}
 78 | }
 79 | 
 80 | func TestBasic(t *testing.T) {
 81 | 	const nservers = 3
 82 | 	cfg := make_config(t, nservers, false)
 83 | 	defer cfg.cleanup()
 84 | 
 85 | 	ck := cfg.makeClient(cfg.All())
 86 | 
 87 | 	fmt.Printf("Test: Basic leave/join ...\n")
 88 | 
 89 | 	cfa := make([]Config, 6)
 90 | 	cfa[0] = ck.Query(-1)
 91 | 
 92 | 	check(t, []int{}, ck)
 93 | 
 94 | 	var gid1 int = 1
 95 | 	ck.Join(map[int][]string{gid1: []string{"x", "y", "z"}})
 96 | 	check(t, []int{gid1}, ck)
 97 | 	cfa[1] = ck.Query(-1)
 98 | 
 99 | 	var gid2 int = 2
100 | 	ck.Join(map[int][]string{gid2: []string{"a", "b", "c"}})
101 | 	check(t, []int{gid1, gid2}, ck)
102 | 	cfa[2] = ck.Query(-1)
103 | 
104 | 	cfx := ck.Query(-1)
105 | 	sa1 := cfx.Groups[gid1]
106 | 	if len(sa1) != 3 || sa1[0] != "x" || sa1[1] != "y" || sa1[2] != "z" {
107 | 		t.Fatalf("wrong servers for gid %v: %v\n", gid1, sa1)
108 | 	}
109 | 	sa2 := cfx.Groups[gid2]
110 | 	if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" {
111 | 		t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2)
112 | 	}
113 | 
114 | 	ck.Leave([]int{gid1})
115 | 	check(t, []int{gid2}, ck)
116 | 	cfa[4] = ck.Query(-1)
117 | 
118 | 	ck.Leave([]int{gid2})
119 | 	cfa[5] = ck.Query(-1)
120 | 
121 | 	fmt.Printf("  ... Passed\n")
122 | 
123 | 	fmt.Printf("Test: Historical queries ...\n")
124 | 
125 | 	for s := 0; s < nservers; s++ {
126 | 		cfg.ShutdownServer(s)
127 | 		for i := 0; i < len(cfa); i++ {
128 | 			c := ck.Query(cfa[i].Num)
129 | 			check_same_config(t, c, cfa[i])
130 | 		}
131 | 		cfg.StartServer(s)
132 | 		cfg.ConnectAll()
133 | 	}
134 | 
135 | 	fmt.Printf("  ... Passed\n")
136 | 
137 | 	fmt.Printf("Test: Move ...\n")
138 | 	{
139 | 		var gid3 int = 503
140 | 		ck.Join(map[int][]string{gid3: []string{"3a", "3b", "3c"}})
141 | 		var gid4 int = 504
142 | 		ck.Join(map[int][]string{gid4: []string{"4a", "4b", "4c"}})
143 | 		for i := 0; i < NShards; i++ {
144 | 			cf := ck.Query(-1)
145 | 			if i < NShards/2 {
146 | 				ck.Move(i, gid3)
147 | 				if cf.Shards[i] != gid3 {
148 | 					cf1 := ck.Query(-1)
149 | 					if cf1.Num <= cf.Num {
150 | 						t.Fatalf("Move should increase Config.Num")
151 | 					}
152 | 				}
153 | 			} else {
154 | 				ck.Move(i, gid4)
155 | 				if cf.Shards[i] != gid4 {
156 | 					cf1 := ck.Query(-1)
157 | 					if cf1.Num <= cf.Num {
158 | 						t.Fatalf("Move should increase Config.Num")
159 | 					}
160 | 				}
161 | 			}
162 | 		}
163 | 		cf2 := ck.Query(-1)
164 | 		for i := 0; i < NShards; i++ {
165 | 			if i < NShards/2 {
166 | 				if cf2.Shards[i] != gid3 {
167 | 					t.Fatalf("expected shard %v on gid %v actually %v",
168 | 						i, gid3, cf2.Shards[i])
169 | 				}
170 | 			} else {
171 | 				if cf2.Shards[i] != gid4 {
172 | 					t.Fatalf("expected shard %v on gid %v actually %v",
173 | 						i, gid4, cf2.Shards[i])
174 | 				}
175 | 			}
176 | 		}
177 | 		ck.Leave([]int{gid3})
178 | 		ck.Leave([]int{gid4})
179 | 	}
180 | 	fmt.Printf("  ... Passed\n")
181 | 
182 | 	fmt.Printf("Test: Concurrent leave/join ...\n")
183 | 
184 | 	const npara = 10
185 | 	var cka [npara]*Clerk
186 | 	for i := 0; i < len(cka); i++ {
187 | 		cka[i] = cfg.makeClient(cfg.All())
188 | 	}
189 | 	gids := make([]int, npara)
190 | 	ch := make(chan bool)
191 | 	for xi := 0; xi < npara; xi++ {
192 | 		gids[xi] = int((xi * 10) + 100)
193 | 		go func(i int) {
194 | 			defer func() { ch <- true }()
195 | 			var gid int = gids[i]
196 | 			var sid1 = fmt.Sprintf("s%da", gid)
197 | 			var sid2 = fmt.Sprintf("s%db", gid)
198 | 			cka[i].Join(map[int][]string{gid + 1000: []string{sid1}})
199 | 			cka[i].Join(map[int][]string{gid: []string{sid2}})
200 | 			cka[i].Leave([]int{gid + 1000})
201 | 		}(xi)
202 | 	}
203 | 	for i := 0; i < npara; i++ {
204 | 		<-ch
205 | 	}
206 | 	check(t, gids, ck)
207 | 
208 | 	fmt.Printf("  ... Passed\n")
209 | 
210 | 	fmt.Printf("Test: Minimal transfers after joins ...\n")
211 | 
212 | 	c1 := ck.Query(-1)
213 | 	for i := 0; i < 5; i++ {
214 | 		var gid = int(npara + 1 + i)
215 | 		ck.Join(map[int][]string{gid: []string{
216 | 			fmt.Sprintf("%da", gid),
217 | 			fmt.Sprintf("%db", gid),
218 | 			fmt.Sprintf("%db", gid)}})
219 | 	}
220 | 	c2 := ck.Query(-1)
221 | 	for i := int(1); i <= npara; i++ {
222 | 		for j := 0; j < len(c1.Shards); j++ {
223 | 			if c2.Shards[j] == i {
224 | 				if c1.Shards[j] != i {
225 | 					t.Fatalf("non-minimal transfer after Join()s")
226 | 				}
227 | 			}
228 | 		}
229 | 	}
230 | 
231 | 	fmt.Printf("  ... Passed\n")
232 | 
233 | 	fmt.Printf("Test: Minimal transfers after leaves ...\n")
234 | 
235 | 	for i := 0; i < 5; i++ {
236 | 		ck.Leave([]int{int(npara + 1 + i)})
237 | 	}
238 | 	c3 := ck.Query(-1)
239 | 	for i := int(1); i <= npara; i++ {
240 | 		for j := 0; j < len(c1.Shards); j++ {
241 | 			if c2.Shards[j] == i {
242 | 				if c3.Shards[j] != i {
243 | 					t.Fatalf("non-minimal transfer after Leave()s")
244 | 				}
245 | 			}
246 | 		}
247 | 	}
248 | 
249 | 	fmt.Printf("  ... Passed\n")
250 | }
251 | 
252 | func TestMulti(t *testing.T) {
253 | 	const nservers = 3
254 | 	cfg := make_config(t, nservers, false)
255 | 	defer cfg.cleanup()
256 | 
257 | 	ck := cfg.makeClient(cfg.All())
258 | 
259 | 	fmt.Printf("Test: Multi-group join/leave ...\n")
260 | 
261 | 	cfa := make([]Config, 6)
262 | 	cfa[0] = ck.Query(-1)
263 | 
264 | 	check(t, []int{}, ck)
265 | 
266 | 	var gid1 int = 1
267 | 	var gid2 int = 2
268 | 	ck.Join(map[int][]string{
269 | 		gid1: []string{"x", "y", "z"},
270 | 		gid2: []string{"a", "b", "c"},
271 | 	})
272 | 	check(t, []int{gid1, gid2}, ck)
273 | 	cfa[1] = ck.Query(-1)
274 | 
275 | 	var gid3 int = 3
276 | 	ck.Join(map[int][]string{gid3: []string{"j", "k", "l"}})
277 | 	check(t, []int{gid1, gid2, gid3}, ck)
278 | 	cfa[2] = ck.Query(-1)
279 | 
280 | 	cfx := ck.Query(-1)
281 | 	sa1 := cfx.Groups[gid1]
282 | 	if len(sa1) != 3 || sa1[0] != "x" || sa1[1] != "y" || sa1[2] != "z" {
283 | 		t.Fatalf("wrong servers for gid %v: %v\n", gid1, sa1)
284 | 	}
285 | 	sa2 := cfx.Groups[gid2]
286 | 	if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" {
287 | 		t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2)
288 | 	}
289 | 	sa3 := cfx.Groups[gid3]
290 | 	if len(sa3) != 3 || sa3[0] != "j" || sa3[1] != "k" || sa3[2] != "l" {
291 | 		t.Fatalf("wrong servers for gid %v: %v\n", gid3, sa3)
292 | 	}
293 | 
294 | 	ck.Leave([]int{gid1, gid3})
295 | 	check(t, []int{gid2}, ck)
296 | 	cfa[3] = ck.Query(-1)
297 | 
298 | 	cfx = ck.Query(-1)
299 | 	sa2 = cfx.Groups[gid2]
300 | 	if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" {
301 | 		t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2)
302 | 	}
303 | 
304 | 	ck.Leave([]int{gid2})
305 | 
306 | 	fmt.Printf("  ... Passed\n")
307 | 
308 | 	fmt.Printf("Test: Concurrent multi leave/join ...\n")
309 | 
310 | 	const npara = 10
311 | 	var cka [npara]*Clerk
312 | 	for i := 0; i < len(cka); i++ {
313 | 		cka[i] = cfg.makeClient(cfg.All())
314 | 	}
315 | 	gids := make([]int, npara)
316 | 	var wg sync.WaitGroup
317 | 	for xi := 0; xi < npara; xi++ {
318 | 		wg.Add(1)
319 | 		gids[xi] = int(xi + 1000)
320 | 		go func(i int) {
321 | 			defer wg.Done()
322 | 			var gid int = gids[i]
323 | 			cka[i].Join(map[int][]string{
324 | 				gid: []string{
325 | 					fmt.Sprintf("%da", gid),
326 | 					fmt.Sprintf("%db", gid),
327 | 					fmt.Sprintf("%dc", gid)},
328 | 				gid + 1000: []string{fmt.Sprintf("%da", gid+1000)},
329 | 				gid + 2000: []string{fmt.Sprintf("%da", gid+2000)},
330 | 			})
331 | 			cka[i].Leave([]int{gid + 1000, gid + 2000})
332 | 		}(xi)
333 | 	}
334 | 	wg.Wait()
335 | 	check(t, gids, ck)
336 | 
337 | 	fmt.Printf("  ... Passed\n")
338 | 
339 | 	fmt.Printf("Test: Minimal transfers after multijoins ...\n")
340 | 
341 | 	c1 := ck.Query(-1)
342 | 	m := make(map[int][]string)
343 | 	for i := 0; i < 5; i++ {
344 | 		var gid = npara + 1 + i
345 | 		m[gid] = []string{fmt.Sprintf("%da", gid), fmt.Sprintf("%db", gid)}
346 | 	}
347 | 	ck.Join(m)
348 | 	c2 := ck.Query(-1)
349 | 	for i := int(1); i <= npara; i++ {
350 | 		for j := 0; j < len(c1.Shards); j++ {
351 | 			if c2.Shards[j] == i {
352 | 				if c1.Shards[j] != i {
353 | 					t.Fatalf("non-minimal transfer after Join()s")
354 | 				}
355 | 			}
356 | 		}
357 | 	}
358 | 
359 | 	fmt.Printf("  ... Passed\n")
360 | 
361 | 	fmt.Printf("Test: Minimal transfers after multileaves ...\n")
362 | 
363 | 	var l []int
364 | 	for i := 0; i < 5; i++ {
365 | 		l = append(l, npara+1+i)
366 | 	}
367 | 	ck.Leave(l)
368 | 	c3 := ck.Query(-1)
369 | 	for i := int(1); i <= npara; i++ {
370 | 		for j := 0; j < len(c1.Shards); j++ {
371 | 			if c2.Shards[j] == i {
372 | 				if c3.Shards[j] != i {
373 | 					t.Fatalf("non-minimal transfer after Leave()s")
374 | 				}
375 | 			}
376 | 		}
377 | 	}
378 | 
379 | 	fmt.Printf("  ... Passed\n")
380 | }
381 | 


--------------------------------------------------------------------------------