├── README.md
├── kvpaxos
    ├── client.go
    ├── common.go
    ├── server.go
    └── test_test.go
├── kvraft
    ├── ClientRpc.go
    ├── KVStateMachine.go
    ├── client.go
    ├── common.go
    ├── config.go
    └── server.go
├── labgob
    ├── labgob.go
    └── test_test.go
├── labrpc
    ├── labrpc.go
    └── test_test.go
├── linearizability
    ├── bitset.go
    ├── linearizability.go
    ├── model.go
    └── models.go
├── paxos
    ├── paxos.go
    └── test_test.go
├── pbservice
    ├── client.go
    ├── common.go
    ├── server.go
    └── test_test.go
├── raft
    ├── config.go
    ├── persister.go
    ├── raft.go
    ├── rpc.go
    ├── test_test.go
    └── util.go
└── viewservice
    ├── client.go
    ├── common.go
    ├── server.go
    └── test_test.go


/README.md:
--------------------------------------------------------------------------------
  1 | ## 分布式相关算法原型Lab，包含raft,paxos等。
  2 | 
  3 | ### 一. raft算法部分
  4 | 
  5 | 论文复现，完成一个小kv分布式文件服务。
  6 | 
  7 | ### 服务模型图：
  8 | 
  9 | ![d03ea63de8357c2ac881e97b985f39a](https://user-images.githubusercontent.com/50191422/138548789-eefdc3d6-6b50-48dc-b14b-6fede57e86c9.png)
 10 | 
 11 | #### 实现领导者选举机制，当现在的Leader挂掉以后，必须及时从peers选出一个新的Leader,角色转换机制如Paper中图：
 12 | 
 13 | ![61b3d0401befac813a40a6b5886b02e](https://user-images.githubusercontent.com/50191422/138554219-6a24ba8a-c0c9-466a-b056-e5c34c3251e9.png)
 14 | 
 15 | RSM设计：
 16 |     
 17 |  Raft实例有两种时间驱动的(time-driven)活动：(1) 领导者必须发送心跳，(2) 以及其他(对等点)自收到领导者消息以来(since hearing from the leader)，如果太长时间过去(if too much time has passed)，开始一次选举。最好使用一个专门的(dedicated)、长期运行的(long-running)goroutine来驱动这两者中的每个活动，而不是将多个活动组合到单个goroutine中。
 18 |     
 19 |   1. 心跳超时检测。
 20 |         
 21 |         ```go
 22 |         // leader检查距离上次发送心跳的时间（latestIssueTime）是否超过了心跳周期（heartbeatPeriod）
 23 |         func (rf *Raft) heartbeatPeriodTick(){...}
 24 |         ```
 25 |         
 26 |  2. 选举超时检测。
 27 |         
 28 |         ```go
 29 |         // 定期检查自最新一次从leader那里收到AppendEntries RPC(包括heartbeat)
 30 |         // 或给予candidate 的RequestVote RPC请求的投票的时间（latestHeardTime）以来的时间差，是否超过了
 31 |         // 选举超时时间（electionTimeout）.若超时，则往electionTimeoutChan写入数据，以表明可以发起选举。
 32 |         func (rf *Raft) electionTimeoutTick(){...}
 33 |         ```
 34 |         
 35 |  3. 主消息处理，处理两种互斥时间。
 36 |         
 37 |         ```go
 38 |         // implement;
 39 |         func (rf *Raft) eventLoop(){...}
 40 |         ```
 41 |         
 42 |   4. Raft结构体
 43 |         
 44 |         ```go
 45 |         type Raft struct {
 46 |         	mu        sync.Mutex          // Lock to protect shared access to this peer's state
 47 |         	peers     []*labrpc.ClientEnd // RPC end points of all peers
 48 |         	persister *Persister          // Object to hold this peer's persisted state
 49 |         	me        int                 // this peer's index into peers[]
 50 |         
 51 |         	dead int32
 52 |         	applyCh chan ApplyMsg
 53 |         	state    int
 54 |         	leaderId int
 55 |         	applyCond *sync.Cond // 更新commitIndex时，新提交的条目的信号
 56 |         	leaderCond    *sync.Cond
 57 |         	nonLeaderCond *sync.Cond
 58 |         	electionTimeout int
 59 |         	heartbeatPeriod int
 60 |         	electionTimeoutChan chan bool
 61 |         	heartbeatPeriodChan chan bool
 62 |         	CurrentTerm int
 63 |         	VoteFor     int
 64 |         	Log         []LogEntry
 65 |         	commitIndex int
 66 |         	lastApplied int
 67 |         
 68 |         	nVotes int
 69 |         
 70 |         	nextIndex  []int
 71 |         	matchIndex []int
 72 |         
 73 |         	latestHeardTime int64 // 最新的收到leader的AppendEntries RPC(包括heartbeat)  或给予candidate的RequestVote RPC投票的时间
 74 |         	latestIssueTime int64 // 最新的leader发送心跳的时间
 75 |         }
 76 |         ```
 77 |  
 78 | ![image](https://user-images.githubusercontent.com/50191422/138550122-72c2857b-672f-468a-801f-b2d9765d879d.png)    
 79 |     
 80 |  5. rpc相关：
 81 |     
 82 |   为了提高发送rpc性能，采用并行发送，迭代peers，为每个peer单独创建一个goroutine发送rpc,在同一个goroutine里进行RPC回复(reply)处理是最简单的，创建投票统计，一但发现获得rpc的统计数超过一般以上，将立刻切换状态，并立即发送一次心跳，防止发起新的选举。
 83 |     
 84 |     ```go
 85 |     func (rf *Raft) startElection() {...}
 86 |     ```
 87 |     
 88 |   rpc超时处理问题。根据Raft算法论文中的Rule for Server规则，如果在网络的请求或者回复中有Term≥当前任期Term, 则更新当前Term为最大Term，并切换Follower状态。
 89 |     
 90 | 
 91 | #### 日志复制，leader将从Start()接收到的新的指令作为日志条目添加到本次日志后，然后给其它peers发起rpc服务，请求复制entries至follower。
 92 | 
 93 |           ![image](https://user-images.githubusercontent.com/50191422/138549717-3b1fe5b2-cec5-4308-8bd3-32d24e5ce937.png)
 94 | 
 95 | 1. Leader接收到客户端的日志条日之后，先将Log Entry添加到自己的日志当中去，然后发送RPC给其它Peers同意其内容，并完成日志提交。
 96 | 2. leader在提升commitIndex之前，需要保证本次提交之后的index要大于当前commitIndex,
 97 | 3. 达到多数条件时必须检测状态机的status是否还处于之前的leader的状态，防止因为peers中有term大于当前请求或回复中的term导致当前状态被切换至Follower状态，而在Follower的状态时又再次收到大部分回复，防止这时错误提升commitIndex。
 98 | 4. 当Leader在local进行提交entry之后，须发一次心跳告诉peers来提升commitIndex,同时对还没有复制该Entry的peers，在这次心跳采用携带上次提交之后到本次提交之间的entry,再更新peers的状态机。
 99 | 5. 对一致性检查之后的冲突条目进行日志替换，删除冲突条目，寻找冲突的第一个点，需要向前递减index，直至找到冲突的第一个点。该地方存在一个优化项，如果冲突点需要向前递减的条目非常多，会发生多次网络请求，这个时候可以采用一些压缩手段或其它减少rpc次数来进行优化。
100 | 6. 在一致性检查通过之后，收到rpc的peer将自己状态机设为Follower。
101 | 7. Rpc处理，extended Raft论文中有提到，如果需要的话，算法可以通过减少被拒绝的追加条目(AppendEntries) RPC的次数来优化。例如，当追加条目(AppendEntries) RPC的请求被拒绝时，跟随者可以包含冲突条目的任期号和它自己存储的那个任期的第一个索引值。借助这个信息，领导者可以减少nextIndex来越过该任期内的所有冲突的日志条目；这样就变为每个任期需要一条追加条目(AppendEntries) RPC而不是每个条目一条，意思为AppendEtries携带的entry必须在发生冲突的点之前，不能在之后。
102 | 
103 | ```go
104 | func (rf *Raft) Start(command interface{}) {...}
105 | func (rf *Raft) AppendEntries(args *AppendEntriesArgs, reply *AppendEntriesReply{...}
106 | ...
107 | ```
108 | 
109 | 8. 心跳处理，判断nextIndex的值是否大于log的尾后位置，如果是，则心跳不需要携带Entries信息，如果不是，则需要携带期间的entries信息，在一致性检查通过之后，提升commitedIndex。
110 |     
111 |     ```go
112 |     func (rf *Raft) broadcastHeartbeat() {...}
113 |     ...
114 |     ```
115 |     
116 | 9. 新增长期守护型groutine来应用日志条目。循环检测commitIndex是否大于lastApplied，如果是，则立即唤醒线程，将期间的数据封装成一个ApplyMsg来发送到applyCh,如果不是，则线程进行休眠即可，等待唤醒。
117 | 
118 | ```go
119 | // long thread 应用commited日志条目
120 | func (rf *Raft) applyEntries() {...}
121 | ```
122 | 
123 | 10. 领导者安全特性（任何时候都要保证）
124 | 10.1 Raft算法的安全属性是状态机必须要保证的，例如，任意的服务器本地已经完成了一个确定的entry提交动作，那么其余peers也要在同一个位置完成相同的Entry提交。
125 | 10.2. 同一个任期内只能有一个Leader被选出。
126 | 
127 | ### 实现Client端到Server的KV存储服务
128 | 
129 | ![593511591328c8f9d62f1348e48889d](https://user-images.githubusercontent.com/50191422/138549897-6b159009-5569-4821-9d74-7cf6cdedc00a.png)
130 | 
131 | 
132 | client向server端提交一条日志，server在状态机中成功同步commit，返回一个执行结果给客户端，只要大多服务器存活，哪怕有一些网络分区等故障，依旧保证处理客户端请求。
133 | 
134 | ![4833da889f13f66fec4e37c2d5b648f](https://user-images.githubusercontent.com/50191422/138549904-a92f7564-b7c9-40d2-9b44-3d9ab03ef393.png)
135 | 
136 | 1. 实现线性化语义，即便命令被执行多次，返回请求仍然保持幂等性，客户端保证唯一的clientId,server保证接收到clientId+commandId记录是否被应用以及被应用之后的结果，服务端用map保存，key为clientId，value为command和状态机结果即可。
137 | 
138 | ![6c549e9b25e1afa8cece94c1f1044d4](https://user-images.githubusercontent.com/50191422/138549988-7f1310c3-fdfe-414c-bc64-cc09c8d92f0a.png)
139 | 
140 | 2. 对map的设计需要考虑其大小，集群需要对过期淘汰达成共识。
141 | 3. 客户端在范围时间内没有收到rpc响应回复，则对命令进行重新发送。
142 | 
143 | ![1596cd1ca6bc817f2d4df5ac56c709e](https://user-images.githubusercontent.com/50191422/138549910-abc3760b-28f9-4965-b125-843d39d9c704.png)
144 | 
145 | 4. 客户端：
146 |     
147 |     ```go
148 |     func (ck *Clerk) Get(key string) string {...}
149 |     func (ck *Clerk) Put(key string, value string) {...}
150 |     func (ck *Clerk) Append(key string, value string) {...}
151 |     ...
152 |     ```
153 |     
154 | 5. 服务端结构体：
155 |     
156 |     ```go
157 |     type KVServer struct {
158 |     	mu      sync.RWMutex
159 |     	dead    int32
160 |     	rf      *raft.Raft
161 |     	applyCh chan raft.ApplyMsg
162 |     	maxRaftState int // snapshot if log grows this big
163 |     	lastApplied  int // record the lastApplied to prevent stateMachine from rollback
164 |     	stateMachine   KVStateMachine                // KV stateMachine
165 |     	lastOperations map[int64]OperationContext    
166 |     	notifyChans    map[int]chan *CommandResponse
167 |     }
168 |     ...
169 |     ```
170 |  
171 |  ##### 更多文档待更新...
172 |  
173 |  ### 资料：
174 |     Raft算法英文在线原版：https://raft.github.io/raft.pdf
175 |     Raft算法中文在线版：https://www.ulunwen.com/archives/229938
176 |     ...
177 | 


--------------------------------------------------------------------------------
/kvpaxos/client.go:
--------------------------------------------------------------------------------
 1 | package kvpaxos
 2 | 
 3 | import "net/rpc"
 4 | import "crypto/rand"
 5 | import "math/big"
 6 | 
 7 | import "fmt"
 8 | 
 9 | type Clerk struct {
10 | 	servers []string
11 | 	// will have to modify this struct.
12 | }
13 | 
14 | func nrand() int64 {
15 | 	max := big.NewInt(int64(1) << 62)
16 | 	bigx, _ := rand.Int(rand.Reader, max)
17 | 	x := bigx.Int64()
18 | 	return x
19 | }
20 | 
21 | func MakeClerk(servers []string) *Clerk {
22 | 	ck := new(Clerk)
23 | 	ck.servers = servers
24 | 	//  have to add code here.
25 | 	return ck
26 | }
27 | 
28 | //
29 | // call() sends an RPC to the rpcname handler on server srv
30 | // with arguments args, waits for the reply, and leaves the
31 | // reply in reply. the reply argument should be a pointer
32 | // to a reply structure.
33 | //
34 | // the return value is true if the server responded, and false
35 | // if call() was not able to contact the server. in particular,
36 | // the reply's contents are only valid if call() returned true.
37 | //
38 | // should assume that call() will return an
39 | // error after a while if the server is dead.
40 | // don't provide own time-out mechanism.
41 | //
42 | // please use call() to send all RPCs, in client.go and server.go.
43 | // please don't change this function.
44 | //
45 | func call(srv string, rpcname string,
46 | 	args interface{}, reply interface{}) bool {
47 | 	c, errx := rpc.Dial("unix", srv)
48 | 	if errx != nil {
49 | 		return false
50 | 	}
51 | 	defer c.Close()
52 | 
53 | 	err := c.Call(rpcname, args, reply)
54 | 	if err == nil {
55 | 		return true
56 | 	}
57 | 
58 | 	fmt.Println(err)
59 | 	return false
60 | }
61 | 
62 | //
63 | // fetch the current value for a key.
64 | // returns "" if the key does not exist.
65 | // keeps trying forever in the face of all other errors.
66 | //
67 | func (ck *Clerk) Get(key string) string {
68 | 	// will have to modify this function.
69 | 	return ""
70 | }
71 | 
72 | //
73 | // shared by Put and Append.
74 | //
75 | func (ck *Clerk) PutAppend(key string, value string, op string) {
76 | 	// will have to modify this function.
77 | }
78 | 
79 | func (ck *Clerk) Put(key string, value string) {
80 | 	ck.PutAppend(key, value, "Put")
81 | }
82 | func (ck *Clerk) Append(key string, value string) {
83 | 	ck.PutAppend(key, value, "Append")
84 | }
85 | 


--------------------------------------------------------------------------------
/kvpaxos/common.go:
--------------------------------------------------------------------------------
 1 | package kvpaxos
 2 | 
 3 | const (
 4 | 	OK       = "OK"
 5 | 	ErrNoKey = "ErrNoKey"
 6 | )
 7 | 
 8 | type Err string
 9 | 
10 | // Put or Append
11 | type PutAppendArgs struct {
12 | 	//  have to add definitions here.
13 | 	Key   string
14 | 	Value string
15 | 	Op    string // "Put" or "Append"
16 | 	//  have to add definitions here.
17 | 	// Field names must start with capital letters,
18 | 	// otherwise RPC will break.
19 | }
20 | 
21 | type PutAppendReply struct {
22 | 	Err Err
23 | }
24 | 
25 | type GetArgs struct {
26 | 	Key string
27 | 	//  have to add definitions here.
28 | }
29 | 
30 | type GetReply struct {
31 | 	Err   Err
32 | 	Value string
33 | }
34 | 


--------------------------------------------------------------------------------
/kvpaxos/server.go:
--------------------------------------------------------------------------------
  1 | package kvpaxos
  2 | 
  3 | import "net"
  4 | import "fmt"
  5 | import "net/rpc"
  6 | import "log"
  7 | import "DS-Lab/paxos"
  8 | import "sync"
  9 | import "sync/atomic"
 10 | import "os"
 11 | import "syscall"
 12 | import "encoding/gob"
 13 | import "math/rand"
 14 | 
 15 | const Debug = 0
 16 | 
 17 | func DPrintf(format string, a ...interface{}) (n int, err error) {
 18 | 	if Debug > 0 {
 19 | 		log.Printf(format, a...)
 20 | 	}
 21 | 	return
 22 | }
 23 | 
 24 | type Op struct {
 25 | 	// definitions here.
 26 | 	// Field names must start with capital letters,
 27 | 	// otherwise RPC will break.
 28 | }
 29 | 
 30 | type KVPaxos struct {
 31 | 	mu         sync.Mutex
 32 | 	l          net.Listener
 33 | 	me         int
 34 | 	dead       int32 // for testing
 35 | 	unreliable int32 // for testing
 36 | 	px         *paxos.Paxos
 37 | 
 38 | 	// definitions here.
 39 | }
 40 | 
 41 | func (kv *KVPaxos) Get(args *GetArgs, reply *GetReply) error {
 42 | 	// code here.
 43 | 	return nil
 44 | }
 45 | 
 46 | func (kv *KVPaxos) PutAppend(args *PutAppendArgs, reply *PutAppendReply) error {
 47 | 	// code here.
 48 | 
 49 | 	return nil
 50 | }
 51 | 
 52 | // tell the server to shut itself down.
 53 | // please do not change these two functions.
 54 | func (kv *KVPaxos) kill() {
 55 | 	DPrintf("Kill(%d): die\n", kv.me)
 56 | 	atomic.StoreInt32(&kv.dead, 1)
 57 | 	kv.l.Close()
 58 | 	kv.px.Kill()
 59 | }
 60 | 
 61 | // call this to find out if the server is dead.
 62 | func (kv *KVPaxos) isdead() bool {
 63 | 	return atomic.LoadInt32(&kv.dead) != 0
 64 | }
 65 | 
 66 | // please do not change these two functions.
 67 | func (kv *KVPaxos) setunreliable(what bool) {
 68 | 	if what {
 69 | 		atomic.StoreInt32(&kv.unreliable, 1)
 70 | 	} else {
 71 | 		atomic.StoreInt32(&kv.unreliable, 0)
 72 | 	}
 73 | }
 74 | 
 75 | func (kv *KVPaxos) isunreliable() bool {
 76 | 	return atomic.LoadInt32(&kv.unreliable) != 0
 77 | }
 78 | 
 79 | //
 80 | // servers[] contains the ports of the set of
 81 | // servers that will cooperate via Paxos to
 82 | // form the fault-tolerant key/value service.
 83 | // me is the index of the current server in servers[].
 84 | //
 85 | func StartServer(servers []string, me int) *KVPaxos {
 86 | 	// call gob.Register on structures want
 87 | 	// Go's RPC library to marshall/unmarshall.
 88 | 	gob.Register(Op{})
 89 | 
 90 | 	kv := new(KVPaxos)
 91 | 	kv.me = me
 92 | 
 93 | 	// initialization code here.
 94 | 
 95 | 	rpcs := rpc.NewServer()
 96 | 	rpcs.Register(kv)
 97 | 
 98 | 	kv.px = paxos.Make(servers, me, rpcs)
 99 | 
100 | 	os.Remove(servers[me])
101 | 	l, e := net.Listen("unix", servers[me])
102 | 	if e != nil {
103 | 		log.Fatal("listen error: ", e)
104 | 	}
105 | 	kv.l = l
106 | 
107 | 	// please do not change any of the following code,
108 | 	// or do anything to subvert it.
109 | 
110 | 	go func() {
111 | 		for kv.isdead() == false {
112 | 			conn, err := kv.l.Accept()
113 | 			if err == nil && kv.isdead() == false {
114 | 				if kv.isunreliable() && (rand.Int63()%1000) < 100 {
115 | 					// discard the request.
116 | 					conn.Close()
117 | 				} else if kv.isunreliable() && (rand.Int63()%1000) < 200 {
118 | 					// process the request but force discard of reply.
119 | 					c1 := conn.(*net.UnixConn)
120 | 					f, _ := c1.File()
121 | 					err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR)
122 | 					if err != nil {
123 | 						fmt.Printf("shutdown: %v\n", err)
124 | 					}
125 | 					go rpcs.ServeConn(conn)
126 | 				} else {
127 | 					go rpcs.ServeConn(conn)
128 | 				}
129 | 			} else if err == nil {
130 | 				conn.Close()
131 | 			}
132 | 			if err != nil && kv.isdead() == false {
133 | 				fmt.Printf("KVPaxos(%v) accept: %v\n", me, err.Error())
134 | 				kv.kill()
135 | 			}
136 | 		}
137 | 	}()
138 | 
139 | 	return kv
140 | }
141 | 


--------------------------------------------------------------------------------
/kvpaxos/test_test.go:
--------------------------------------------------------------------------------
  1 | package kvpaxos
  2 | 
  3 | import "testing"
  4 | import "runtime"
  5 | import "strconv"
  6 | import "os"
  7 | import "time"
  8 | import "fmt"
  9 | import "math/rand"
 10 | import "strings"
 11 | import "sync/atomic"
 12 | 
 13 | func check(t *testing.T, ck *Clerk, key string, value string) {
 14 | 	v := ck.Get(key)
 15 | 	if v != value {
 16 | 		t.Fatalf("Get(%v) -> %v, expected %v", key, v, value)
 17 | 	}
 18 | }
 19 | 
 20 | func port(tag string, host int) string {
 21 | 	s := "/var/tmp/824-"
 22 | 	s += strconv.Itoa(os.Getuid()) + "/"
 23 | 	os.Mkdir(s, 0777)
 24 | 	s += "kv-"
 25 | 	s += strconv.Itoa(os.Getpid()) + "-"
 26 | 	s += tag + "-"
 27 | 	s += strconv.Itoa(host)
 28 | 	return s
 29 | }
 30 | 
 31 | func cleanup(kva []*KVPaxos) {
 32 | 	for i := 0; i < len(kva); i++ {
 33 | 		if kva[i] != nil {
 34 | 			kva[i].kill()
 35 | 		}
 36 | 	}
 37 | }
 38 | 
 39 | // predict effect of Append(k, val) if old value is prev.
 40 | func NextValue(prev string, val string) string {
 41 | 	return prev + val
 42 | }
 43 | 
 44 | func TestBasic(t *testing.T) {
 45 | 	runtime.GOMAXPROCS(4)
 46 | 
 47 | 	const nservers = 3
 48 | 	var kva []*KVPaxos = make([]*KVPaxos, nservers)
 49 | 	var kvh []string = make([]string, nservers)
 50 | 	defer cleanup(kva)
 51 | 
 52 | 	for i := 0; i < nservers; i++ {
 53 | 		kvh[i] = port("basic", i)
 54 | 	}
 55 | 	for i := 0; i < nservers; i++ {
 56 | 		kva[i] = StartServer(kvh, i)
 57 | 	}
 58 | 
 59 | 	ck := MakeClerk(kvh)
 60 | 	var cka [nservers]*Clerk
 61 | 	for i := 0; i < nservers; i++ {
 62 | 		cka[i] = MakeClerk([]string{kvh[i]})
 63 | 	}
 64 | 
 65 | 	fmt.Printf("Test: Basic put/append/get ...\n")
 66 | 
 67 | 	ck.Append("app", "x")
 68 | 	ck.Append("app", "y")
 69 | 	check(t, ck, "app", "xy")
 70 | 
 71 | 	ck.Put("a", "aa")
 72 | 	check(t, ck, "a", "aa")
 73 | 
 74 | 	cka[1].Put("a", "aaa")
 75 | 
 76 | 	check(t, cka[2], "a", "aaa")
 77 | 	check(t, cka[1], "a", "aaa")
 78 | 	check(t, ck, "a", "aaa")
 79 | 
 80 | 	fmt.Printf("  ... Passed\n")
 81 | 
 82 | 	fmt.Printf("Test: Concurrent clients ...\n")
 83 | 
 84 | 	for iters := 0; iters < 20; iters++ {
 85 | 		const npara = 15
 86 | 		var ca [npara]chan bool
 87 | 		for nth := 0; nth < npara; nth++ {
 88 | 			ca[nth] = make(chan bool)
 89 | 			go func(me int) {
 90 | 				defer func() { ca[me] <- true }()
 91 | 				ci := (rand.Int() % nservers)
 92 | 				myck := MakeClerk([]string{kvh[ci]})
 93 | 				if (rand.Int() % 1000) < 500 {
 94 | 					myck.Put("b", strconv.Itoa(rand.Int()))
 95 | 				} else {
 96 | 					myck.Get("b")
 97 | 				}
 98 | 			}(nth)
 99 | 		}
100 | 		for nth := 0; nth < npara; nth++ {
101 | 			<-ca[nth]
102 | 		}
103 | 		var va [nservers]string
104 | 		for i := 0; i < nservers; i++ {
105 | 			va[i] = cka[i].Get("b")
106 | 			if va[i] != va[0] {
107 | 				t.Fatalf("mismatch")
108 | 			}
109 | 		}
110 | 	}
111 | 
112 | 	fmt.Printf("  ... Passed\n")
113 | 
114 | 	time.Sleep(1 * time.Second)
115 | }
116 | 
117 | func TestDone(t *testing.T) {
118 | 	runtime.GOMAXPROCS(4)
119 | 
120 | 	const nservers = 3
121 | 	var kva []*KVPaxos = make([]*KVPaxos, nservers)
122 | 	var kvh []string = make([]string, nservers)
123 | 	defer cleanup(kva)
124 | 
125 | 	for i := 0; i < nservers; i++ {
126 | 		kvh[i] = port("done", i)
127 | 	}
128 | 	for i := 0; i < nservers; i++ {
129 | 		kva[i] = StartServer(kvh, i)
130 | 	}
131 | 	ck := MakeClerk(kvh)
132 | 	var cka [nservers]*Clerk
133 | 	for pi := 0; pi < nservers; pi++ {
134 | 		cka[pi] = MakeClerk([]string{kvh[pi]})
135 | 	}
136 | 
137 | 	fmt.Printf("Test: server frees Paxos log memory...\n")
138 | 
139 | 	ck.Put("a", "aa")
140 | 	check(t, ck, "a", "aa")
141 | 
142 | 	runtime.GC()
143 | 	var m0 runtime.MemStats
144 | 	runtime.ReadMemStats(&m0)
145 | 	// rtm's m0.Alloc is 2 MB
146 | 
147 | 	sz := 1000000
148 | 	items := 10
149 | 
150 | 	for iters := 0; iters < 2; iters++ {
151 | 		for i := 0; i < items; i++ {
152 | 			key := strconv.Itoa(i)
153 | 			value := make([]byte, sz)
154 | 			for j := 0; j < len(value); j++ {
155 | 				value[j] = byte((rand.Int() % 100) + 1)
156 | 			}
157 | 			ck.Put(key, string(value))
158 | 			check(t, cka[i%nservers], key, string(value))
159 | 		}
160 | 	}
161 | 
162 | 	// Put and Get to each of the replicas, in case
163 | 	// the Done information is piggybacked on
164 | 	// the Paxos proposer messages.
165 | 	for iters := 0; iters < 2; iters++ {
166 | 		for pi := 0; pi < nservers; pi++ {
167 | 			cka[pi].Put("a", "aa")
168 | 			check(t, cka[pi], "a", "aa")
169 | 		}
170 | 	}
171 | 
172 | 	time.Sleep(1 * time.Second)
173 | 
174 | 	runtime.GC()
175 | 	var m1 runtime.MemStats
176 | 	runtime.ReadMemStats(&m1)
177 | 	// rtm's m1.Alloc is 45 MB
178 | 
179 | 	// fmt.Printf("  Memory: before %v, after %v\n", m0.Alloc, m1.Alloc)
180 | 
181 | 	allowed := m0.Alloc + uint64(nservers*items*sz*2)
182 | 	if m1.Alloc > allowed {
183 | 		t.Fatalf("Memory use did not shrink enough (Used: %v, allowed: %v).\n", m1.Alloc, allowed)
184 | 	}
185 | 
186 | 	fmt.Printf("  ... Passed\n")
187 | }
188 | 
189 | func pp(tag string, src int, dst int) string {
190 | 	s := "/var/tmp/824-"
191 | 	s += strconv.Itoa(os.Getuid()) + "/"
192 | 	s += "kv-" + tag + "-"
193 | 	s += strconv.Itoa(os.Getpid()) + "-"
194 | 	s += strconv.Itoa(src) + "-"
195 | 	s += strconv.Itoa(dst)
196 | 	return s
197 | }
198 | 
199 | func cleanpp(tag string, n int) {
200 | 	for i := 0; i < n; i++ {
201 | 		for j := 0; j < n; j++ {
202 | 			ij := pp(tag, i, j)
203 | 			os.Remove(ij)
204 | 		}
205 | 	}
206 | }
207 | 
208 | func part(t *testing.T, tag string, npaxos int, p1 []int, p2 []int, p3 []int) {
209 | 	cleanpp(tag, npaxos)
210 | 
211 | 	pa := [][]int{p1, p2, p3}
212 | 	for pi := 0; pi < len(pa); pi++ {
213 | 		p := pa[pi]
214 | 		for i := 0; i < len(p); i++ {
215 | 			for j := 0; j < len(p); j++ {
216 | 				ij := pp(tag, p[i], p[j])
217 | 				pj := port(tag, p[j])
218 | 				err := os.Link(pj, ij)
219 | 				if err != nil {
220 | 					t.Fatalf("os.Link(%v, %v): %v\n", pj, ij, err)
221 | 				}
222 | 			}
223 | 		}
224 | 	}
225 | }
226 | 
227 | func TestPartition(t *testing.T) {
228 | 	runtime.GOMAXPROCS(4)
229 | 
230 | 	tag := "partition"
231 | 	const nservers = 5
232 | 	var kva []*KVPaxos = make([]*KVPaxos, nservers)
233 | 	defer cleanup(kva)
234 | 	defer cleanpp(tag, nservers)
235 | 
236 | 	for i := 0; i < nservers; i++ {
237 | 		var kvh []string = make([]string, nservers)
238 | 		for j := 0; j < nservers; j++ {
239 | 			if j == i {
240 | 				kvh[j] = port(tag, i)
241 | 			} else {
242 | 				kvh[j] = pp(tag, i, j)
243 | 			}
244 | 		}
245 | 		kva[i] = StartServer(kvh, i)
246 | 	}
247 | 	defer part(t, tag, nservers, []int{}, []int{}, []int{})
248 | 
249 | 	var cka [nservers]*Clerk
250 | 	for i := 0; i < nservers; i++ {
251 | 		cka[i] = MakeClerk([]string{port(tag, i)})
252 | 	}
253 | 
254 | 	fmt.Printf("Test: No partition ...\n")
255 | 
256 | 	part(t, tag, nservers, []int{0, 1, 2, 3, 4}, []int{}, []int{})
257 | 	cka[0].Put("1", "12")
258 | 	cka[2].Put("1", "13")
259 | 	check(t, cka[3], "1", "13")
260 | 
261 | 	fmt.Printf("  ... Passed\n")
262 | 
263 | 	fmt.Printf("Test: Progress in majority ...\n")
264 | 
265 | 	part(t, tag, nservers, []int{2, 3, 4}, []int{0, 1}, []int{})
266 | 	cka[2].Put("1", "14")
267 | 	check(t, cka[4], "1", "14")
268 | 
269 | 	fmt.Printf("  ... Passed\n")
270 | 
271 | 	fmt.Printf("Test: No progress in minority ...\n")
272 | 
273 | 	done0 := make(chan bool)
274 | 	done1 := make(chan bool)
275 | 	go func() {
276 | 		cka[0].Put("1", "15")
277 | 		done0 <- true
278 | 	}()
279 | 	go func() {
280 | 		cka[1].Get("1")
281 | 		done1 <- true
282 | 	}()
283 | 
284 | 	select {
285 | 	case <-done0:
286 | 		t.Fatalf("Put in minority completed")
287 | 	case <-done1:
288 | 		t.Fatalf("Get in minority completed")
289 | 	case <-time.After(time.Second):
290 | 	}
291 | 
292 | 	check(t, cka[4], "1", "14")
293 | 	cka[3].Put("1", "16")
294 | 	check(t, cka[4], "1", "16")
295 | 
296 | 	fmt.Printf("  ... Passed\n")
297 | 
298 | 	fmt.Printf("Test: Completion after heal ...\n")
299 | 
300 | 	part(t, tag, nservers, []int{0, 2, 3, 4}, []int{1}, []int{})
301 | 
302 | 	select {
303 | 	case <-done0:
304 | 	case <-time.After(30 * 100 * time.Millisecond):
305 | 		t.Fatalf("Put did not complete")
306 | 	}
307 | 
308 | 	select {
309 | 	case <-done1:
310 | 		t.Fatalf("Get in minority completed")
311 | 	default:
312 | 	}
313 | 
314 | 	check(t, cka[4], "1", "15")
315 | 	check(t, cka[0], "1", "15")
316 | 
317 | 	part(t, tag, nservers, []int{0, 1, 2}, []int{3, 4}, []int{})
318 | 
319 | 	select {
320 | 	case <-done1:
321 | 	case <-time.After(100 * 100 * time.Millisecond):
322 | 		t.Fatalf("Get did not complete")
323 | 	}
324 | 
325 | 	check(t, cka[1], "1", "15")
326 | 
327 | 	fmt.Printf("  ... Passed\n")
328 | }
329 | 
330 | func randclerk(kvh []string) *Clerk {
331 | 	sa := make([]string, len(kvh))
332 | 	copy(sa, kvh)
333 | 	for i := range sa {
334 | 		j := rand.Intn(i + 1)
335 | 		sa[i], sa[j] = sa[j], sa[i]
336 | 	}
337 | 	return MakeClerk(sa)
338 | }
339 | 
340 | // check that all known appends are present in a value,
341 | // and are in order for each concurrent client.
342 | func checkAppends(t *testing.T, v string, counts []int) {
343 | 	nclients := len(counts)
344 | 	for i := 0; i < nclients; i++ {
345 | 		lastoff := -1
346 | 		for j := 0; j < counts[i]; j++ {
347 | 			wanted := "x " + strconv.Itoa(i) + " " + strconv.Itoa(j) + " y"
348 | 			off := strings.Index(v, wanted)
349 | 			if off < 0 {
350 | 				t.Fatalf("missing element in Append result")
351 | 			}
352 | 			off1 := strings.LastIndex(v, wanted)
353 | 			if off1 != off {
354 | 				t.Fatalf("duplicate element in Append result")
355 | 			}
356 | 			if off <= lastoff {
357 | 				t.Fatalf("wrong order for element in Append result")
358 | 			}
359 | 			lastoff = off
360 | 		}
361 | 	}
362 | }
363 | 
364 | func TestUnreliable(t *testing.T) {
365 | 	runtime.GOMAXPROCS(4)
366 | 
367 | 	const nservers = 3
368 | 	var kva []*KVPaxos = make([]*KVPaxos, nservers)
369 | 	var kvh []string = make([]string, nservers)
370 | 	defer cleanup(kva)
371 | 
372 | 	for i := 0; i < nservers; i++ {
373 | 		kvh[i] = port("un", i)
374 | 	}
375 | 	for i := 0; i < nservers; i++ {
376 | 		kva[i] = StartServer(kvh, i)
377 | 		kva[i].setunreliable(true)
378 | 	}
379 | 
380 | 	ck := MakeClerk(kvh)
381 | 	var cka [nservers]*Clerk
382 | 	for i := 0; i < nservers; i++ {
383 | 		cka[i] = MakeClerk([]string{kvh[i]})
384 | 	}
385 | 
386 | 	fmt.Printf("Test: Basic put/get, unreliable ...\n")
387 | 
388 | 	ck.Put("a", "aa")
389 | 	check(t, ck, "a", "aa")
390 | 
391 | 	cka[1].Put("a", "aaa")
392 | 
393 | 	check(t, cka[2], "a", "aaa")
394 | 	check(t, cka[1], "a", "aaa")
395 | 	check(t, ck, "a", "aaa")
396 | 
397 | 	fmt.Printf("  ... Passed\n")
398 | 
399 | 	fmt.Printf("Test: Sequence of puts, unreliable ...\n")
400 | 
401 | 	for iters := 0; iters < 6; iters++ {
402 | 		const ncli = 5
403 | 		var ca [ncli]chan bool
404 | 		for cli := 0; cli < ncli; cli++ {
405 | 			ca[cli] = make(chan bool)
406 | 			go func(me int) {
407 | 				ok := false
408 | 				defer func() { ca[me] <- ok }()
409 | 				myck := randclerk(kvh)
410 | 				key := strconv.Itoa(me)
411 | 				vv := myck.Get(key)
412 | 				myck.Append(key, "0")
413 | 				vv = NextValue(vv, "0")
414 | 				myck.Append(key, "1")
415 | 				vv = NextValue(vv, "1")
416 | 				myck.Append(key, "2")
417 | 				vv = NextValue(vv, "2")
418 | 				time.Sleep(100 * time.Millisecond)
419 | 				if myck.Get(key) != vv {
420 | 					t.Fatalf("wrong value")
421 | 				}
422 | 				if myck.Get(key) != vv {
423 | 					t.Fatalf("wrong value")
424 | 				}
425 | 				ok = true
426 | 			}(cli)
427 | 		}
428 | 		for cli := 0; cli < ncli; cli++ {
429 | 			x := <-ca[cli]
430 | 			if x == false {
431 | 				t.Fatalf("failure")
432 | 			}
433 | 		}
434 | 	}
435 | 
436 | 	fmt.Printf("  ... Passed\n")
437 | 
438 | 	fmt.Printf("Test: Concurrent clients, unreliable ...\n")
439 | 
440 | 	for iters := 0; iters < 20; iters++ {
441 | 		const ncli = 15
442 | 		var ca [ncli]chan bool
443 | 		for cli := 0; cli < ncli; cli++ {
444 | 			ca[cli] = make(chan bool)
445 | 			go func(me int) {
446 | 				defer func() { ca[me] <- true }()
447 | 				myck := randclerk(kvh)
448 | 				if (rand.Int() % 1000) < 500 {
449 | 					myck.Put("b", strconv.Itoa(rand.Int()))
450 | 				} else {
451 | 					myck.Get("b")
452 | 				}
453 | 			}(cli)
454 | 		}
455 | 		for cli := 0; cli < ncli; cli++ {
456 | 			<-ca[cli]
457 | 		}
458 | 
459 | 		var va [nservers]string
460 | 		for i := 0; i < nservers; i++ {
461 | 			va[i] = cka[i].Get("b")
462 | 			if va[i] != va[0] {
463 | 				t.Fatalf("mismatch; 0 got %v, %v got %v", va[0], i, va[i])
464 | 			}
465 | 		}
466 | 	}
467 | 
468 | 	fmt.Printf("  ... Passed\n")
469 | 
470 | 	fmt.Printf("Test: Concurrent Append to same key, unreliable ...\n")
471 | 
472 | 	ck.Put("k", "")
473 | 
474 | 	ff := func(me int, ch chan int) {
475 | 		ret := -1
476 | 		defer func() { ch <- ret }()
477 | 		myck := randclerk(kvh)
478 | 		n := 0
479 | 		for n < 5 {
480 | 			myck.Append("k", "x "+strconv.Itoa(me)+" "+strconv.Itoa(n)+" y")
481 | 			n++
482 | 		}
483 | 		ret = n
484 | 	}
485 | 
486 | 	ncli := 5
487 | 	cha := []chan int{}
488 | 	for i := 0; i < ncli; i++ {
489 | 		cha = append(cha, make(chan int))
490 | 		go ff(i, cha[i])
491 | 	}
492 | 
493 | 	counts := []int{}
494 | 	for i := 0; i < ncli; i++ {
495 | 		n := <-cha[i]
496 | 		if n < 0 {
497 | 			t.Fatal("client failed")
498 | 		}
499 | 		counts = append(counts, n)
500 | 	}
501 | 
502 | 	vx := ck.Get("k")
503 | 	checkAppends(t, vx, counts)
504 | 
505 | 	{
506 | 		for i := 0; i < nservers; i++ {
507 | 			vi := cka[i].Get("k")
508 | 			if vi != vx {
509 | 				t.Fatalf("mismatch; 0 got %v, %v got %v", vx, i, vi)
510 | 			}
511 | 		}
512 | 	}
513 | 
514 | 	fmt.Printf("  ... Passed\n")
515 | 
516 | 	time.Sleep(1 * time.Second)
517 | }
518 | 
519 | func TestHole(t *testing.T) {
520 | 	runtime.GOMAXPROCS(4)
521 | 
522 | 	fmt.Printf("Test: Tolerates holes in paxos sequence ...\n")
523 | 
524 | 	tag := "hole"
525 | 	const nservers = 5
526 | 	var kva []*KVPaxos = make([]*KVPaxos, nservers)
527 | 	defer cleanup(kva)
528 | 	defer cleanpp(tag, nservers)
529 | 
530 | 	for i := 0; i < nservers; i++ {
531 | 		var kvh []string = make([]string, nservers)
532 | 		for j := 0; j < nservers; j++ {
533 | 			if j == i {
534 | 				kvh[j] = port(tag, i)
535 | 			} else {
536 | 				kvh[j] = pp(tag, i, j)
537 | 			}
538 | 		}
539 | 		kva[i] = StartServer(kvh, i)
540 | 	}
541 | 	defer part(t, tag, nservers, []int{}, []int{}, []int{})
542 | 
543 | 	for iters := 0; iters < 5; iters++ {
544 | 		part(t, tag, nservers, []int{0, 1, 2, 3, 4}, []int{}, []int{})
545 | 
546 | 		ck2 := MakeClerk([]string{port(tag, 2)})
547 | 		ck2.Put("q", "q")
548 | 
549 | 		done := int32(0)
550 | 		const nclients = 10
551 | 		var ca [nclients]chan bool
552 | 		for xcli := 0; xcli < nclients; xcli++ {
553 | 			ca[xcli] = make(chan bool)
554 | 			go func(cli int) {
555 | 				ok := false
556 | 				defer func() { ca[cli] <- ok }()
557 | 				var cka [nservers]*Clerk
558 | 				for i := 0; i < nservers; i++ {
559 | 					cka[i] = MakeClerk([]string{port(tag, i)})
560 | 				}
561 | 				key := strconv.Itoa(cli)
562 | 				last := ""
563 | 				cka[0].Put(key, last)
564 | 				for atomic.LoadInt32(&done) == 0 {
565 | 					ci := (rand.Int() % 2)
566 | 					if (rand.Int() % 1000) < 500 {
567 | 						nv := strconv.Itoa(rand.Int())
568 | 						cka[ci].Put(key, nv)
569 | 						last = nv
570 | 					} else {
571 | 						v := cka[ci].Get(key)
572 | 						if v != last {
573 | 							t.Fatalf("%v: wrong value, key %v, wanted %v, got %v",
574 | 								cli, key, last, v)
575 | 						}
576 | 					}
577 | 				}
578 | 				ok = true
579 | 			}(xcli)
580 | 		}
581 | 
582 | 		time.Sleep(3 * time.Second)
583 | 
584 | 		part(t, tag, nservers, []int{2, 3, 4}, []int{0, 1}, []int{})
585 | 
586 | 		// can majority partition make progress even though
587 | 		// minority servers were interrupted in the middle of
588 | 		// paxos agreements?
589 | 		check(t, ck2, "q", "q")
590 | 		ck2.Put("q", "qq")
591 | 		check(t, ck2, "q", "qq")
592 | 
593 | 		// restore network, wait for all threads to exit.
594 | 		part(t, tag, nservers, []int{0, 1, 2, 3, 4}, []int{}, []int{})
595 | 		atomic.StoreInt32(&done, 1)
596 | 		ok := true
597 | 		for i := 0; i < nclients; i++ {
598 | 			z := <-ca[i]
599 | 			ok = ok && z
600 | 		}
601 | 		if ok == false {
602 | 			t.Fatal("something is wrong")
603 | 		}
604 | 		check(t, ck2, "q", "qq")
605 | 	}
606 | 
607 | 	fmt.Printf("  ... Passed\n")
608 | }
609 | 
610 | func TestManyPartition(t *testing.T) {
611 | 	runtime.GOMAXPROCS(4)
612 | 
613 | 	fmt.Printf("Test: Many clients, changing partitions ...\n")
614 | 
615 | 	tag := "many"
616 | 	const nservers = 5
617 | 	var kva []*KVPaxos = make([]*KVPaxos, nservers)
618 | 	defer cleanup(kva)
619 | 	defer cleanpp(tag, nservers)
620 | 
621 | 	for i := 0; i < nservers; i++ {
622 | 		var kvh []string = make([]string, nservers)
623 | 		for j := 0; j < nservers; j++ {
624 | 			if j == i {
625 | 				kvh[j] = port(tag, i)
626 | 			} else {
627 | 				kvh[j] = pp(tag, i, j)
628 | 			}
629 | 		}
630 | 		kva[i] = StartServer(kvh, i)
631 | 		kva[i].setunreliable(true)
632 | 	}
633 | 	defer part(t, tag, nservers, []int{}, []int{}, []int{})
634 | 	part(t, tag, nservers, []int{0, 1, 2, 3, 4}, []int{}, []int{})
635 | 
636 | 	done := int32(0)
637 | 
638 | 	// re-partition periodically
639 | 	ch1 := make(chan bool)
640 | 	go func() {
641 | 		defer func() { ch1 <- true }()
642 | 		for atomic.LoadInt32(&done) == 0 {
643 | 			var a [nservers]int
644 | 			for i := 0; i < nservers; i++ {
645 | 				a[i] = (rand.Int() % 3)
646 | 			}
647 | 			pa := make([][]int, 3)
648 | 			for i := 0; i < 3; i++ {
649 | 				pa[i] = make([]int, 0)
650 | 				for j := 0; j < nservers; j++ {
651 | 					if a[j] == i {
652 | 						pa[i] = append(pa[i], j)
653 | 					}
654 | 				}
655 | 			}
656 | 			part(t, tag, nservers, pa[0], pa[1], pa[2])
657 | 			time.Sleep(time.Duration(rand.Int63()%200) * time.Millisecond)
658 | 		}
659 | 	}()
660 | 
661 | 	const nclients = 10
662 | 	var ca [nclients]chan bool
663 | 	for xcli := 0; xcli < nclients; xcli++ {
664 | 		ca[xcli] = make(chan bool)
665 | 		go func(cli int) {
666 | 			ok := false
667 | 			defer func() { ca[cli] <- ok }()
668 | 			sa := make([]string, nservers)
669 | 			for i := 0; i < nservers; i++ {
670 | 				sa[i] = port(tag, i)
671 | 			}
672 | 			for i := range sa {
673 | 				j := rand.Intn(i + 1)
674 | 				sa[i], sa[j] = sa[j], sa[i]
675 | 			}
676 | 			myck := MakeClerk(sa)
677 | 			key := strconv.Itoa(cli)
678 | 			last := ""
679 | 			myck.Put(key, last)
680 | 			for atomic.LoadInt32(&done) == 0 {
681 | 				if (rand.Int() % 1000) < 500 {
682 | 					nv := strconv.Itoa(rand.Int())
683 | 					myck.Append(key, nv)
684 | 					last = NextValue(last, nv)
685 | 				} else {
686 | 					v := myck.Get(key)
687 | 					if v != last {
688 | 						t.Fatalf("%v: get wrong value, key %v, wanted %v, got %v",
689 | 							cli, key, last, v)
690 | 					}
691 | 				}
692 | 			}
693 | 			ok = true
694 | 		}(xcli)
695 | 	}
696 | 
697 | 	time.Sleep(20 * time.Second)
698 | 	atomic.StoreInt32(&done, 1)
699 | 	<-ch1
700 | 	part(t, tag, nservers, []int{0, 1, 2, 3, 4}, []int{}, []int{})
701 | 
702 | 	ok := true
703 | 	for i := 0; i < nclients; i++ {
704 | 		z := <-ca[i]
705 | 		ok = ok && z
706 | 	}
707 | 
708 | 	if ok {
709 | 		fmt.Printf("  ... Passed\n")
710 | 	}
711 | }
712 | 


--------------------------------------------------------------------------------
/kvraft/ClientRpc.go:
--------------------------------------------------------------------------------
 1 | package raftkv
 2 | 
 3 | import "fmt"
 4 | 
 5 | type CommandRequest struct {
 6 | 	Key       string
 7 | 	Value     string
 8 | 	Op        OperationOp
 9 | 	ClientId  int64
10 | 	CommandId int64
11 | }
12 | 
13 | func (request CommandRequest) String() string {
14 | 	return fmt.Sprintf("{Key:%v,Value:%v,Op:%v,ClientId:%v,CommandId:%v}", request.Key, request.Value, request.Op, request.ClientId, request.CommandId)
15 | }
16 | 
17 | type CommandResponse struct {
18 | 	Err   Err
19 | 	Value string
20 | }
21 | 
22 | func (response CommandResponse) String() string {
23 | 	return fmt.Sprintf("{Err:%v,Value:%v}", response.Err, response.Value)
24 | }
25 | 


--------------------------------------------------------------------------------
/kvraft/KVStateMachine.go:
--------------------------------------------------------------------------------
 1 | package raftkv
 2 | 
 3 | type KVStateMachine interface {
 4 | 	Get(key string) (string, Err)
 5 | 	Put(key, value string) Err
 6 | 	Append(key, value string) Err
 7 | }
 8 | 
 9 | type MemoryKV struct {
10 | 	KV map[string]string
11 | }
12 | 
13 | func NewMemoryKV() *MemoryKV {
14 | 	return &MemoryKV{make(map[string]string)}
15 | }
16 | 
17 | func (MemoryKV *MemoryKV) Get(key string) (string, Err) {
18 | 	if value, ok := MemoryKV.KV[key]; ok {
19 | 		return value, OK
20 | 	}
21 | 	return "", ErrNoKey
22 | }
23 | 
24 | func (MemoryKV *MemoryKV) Put(key, value string) Err {
25 | 	MemoryKV.KV[key] += value
26 | 	return OK
27 | }
28 | 
29 | func (MemoryKV *MemoryKV) Append(key, value string) Err {
30 | 	MemoryKV.KV[key] += value
31 | 	return OK
32 | }
33 | 


--------------------------------------------------------------------------------
/kvraft/client.go:
--------------------------------------------------------------------------------
 1 | package raftkv
 2 | 
 3 | import "DS-Lab/labrpc"
 4 | import "crypto/rand"
 5 | import "math/big"
 6 | 
 7 | type Clerk struct {
 8 | 	servers []*labrpc.ClientEnd
 9 | 	// You will have to modify this struct.
10 | 	leaderId  int64
11 | 	clientId  int64
12 | 	commandId int64
13 | }
14 | 
15 | func nrand() int64 {
16 | 	max := big.NewInt(int64(1) << 62)
17 | 	bigx, _ := rand.Int(rand.Reader, max)
18 | 	x := bigx.Int64()
19 | 	return x
20 | }
21 | 
22 | func MakeClerk(servers []*labrpc.ClientEnd) *Clerk {
23 | 
24 | 	return &Clerk{
25 | 		servers:   servers,
26 | 		leaderId:  0,
27 | 		clientId:  nrand(),
28 | 		commandId: 0,
29 | 	}
30 | 	//ck := new(Clerk)
31 | 	//ck.servers = servers
32 | 
33 | 	//return ck
34 | }
35 | 
36 | //
37 | // fetch the current value for a key.
38 | // returns "" if the key does not exist.
39 | // keeps trying forever in the face of all other errors.
40 | //
41 | // you can send an RPC with code like this:
42 | // ok := ck.servers[i].Call("KVServer.Get", &args, &reply)
43 | //
44 | // the types of args and reply (including whether they are pointers)
45 | // must match the declared types of the RPC handler function's
46 | // arguments. and reply must be passed as a pointer.
47 | //
48 | 
49 | func (ck *Clerk) Get(key string) string {
50 | 
51 | 	// You will have to modify this function.
52 | 	//return ""
53 | 	return ck.Command(&CommandRequest{Key: key, Op: OpGet})
54 | }
55 | 
56 | //
57 | // shared by Put and Append.
58 | //
59 | // you can send an RPC with code like this:
60 | // ok := ck.servers[i].Call("KVServer.PutAppend", &args, &reply)
61 | //
62 | // the types of args and reply (including whether they are pointers)
63 | // must match the declared types of the RPC handler function's
64 | // arguments. and reply must be passed as a pointer.
65 | //
66 | //func (ck *Clerk) PutAppend(key string, value string, op string) {
67 | //	// You will have to modify this function.
68 | //}
69 | 
70 | func (ck *Clerk) Put(key string, value string) {
71 | 	//ck.PutAppend(key, value, "Put")
72 | 	ck.Command(&CommandRequest{Key: key, Value: value, Op: OpPut})
73 | }
74 | func (ck *Clerk) Append(key string, value string) {
75 | 	//ck.PutAppend(key, value, "Append")
76 | 	ck.Command(&CommandRequest{Key: key, Value: value, Op: OpAppend})
77 | }
78 | 
79 | func (ck *Clerk) Command(request *CommandRequest) string {
80 | 	request.ClientId, request.CommandId = ck.clientId, ck.commandId
81 | 	for {
82 | 		var response CommandResponse
83 | 		if !ck.servers[ck.leaderId].Call("KVServer.Command", request, &response) || response.Err == ErrWrongLeader || response.Err == ErrTimeout {
84 | 			ck.leaderId = (ck.leaderId + 1) % int64(len(ck.servers))
85 | 			continue
86 | 		}
87 | 		ck.commandId++
88 | 		return response.Value
89 | 	}
90 | }
91 | 


--------------------------------------------------------------------------------
/kvraft/common.go:
--------------------------------------------------------------------------------
  1 | package raftkv
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"log"
  6 | 	"time"
  7 | )
  8 | 
  9 | //const (
 10 | //	OK       = "OK"
 11 | //	ErrNoKey = "ErrNoKey"
 12 | //)
 13 | 
 14 | const ExecuteTimeout = 500 * time.Millisecond
 15 | 
 16 | const DEBUG = false
 17 | 
 18 | func DPrintf(format string, a ...interface{}) (n int, err error) {
 19 | 	if DEBUG {
 20 | 		log.Printf(format, a...)
 21 | 	}
 22 | 	return
 23 | }
 24 | 
 25 | //type Err string
 26 | type Err uint8
 27 | 
 28 | const (
 29 | 	OK Err = iota
 30 | 	ErrNoKey
 31 | 	ErrWrongLeader
 32 | 	ErrTimeout
 33 | )
 34 | 
 35 | func (err Err) string() string {
 36 | 	switch err {
 37 | 	case OK:
 38 | 		return "OK"
 39 | 	case ErrNoKey:
 40 | 		return "ErrNoKey"
 41 | 	case ErrWrongLeader:
 42 | 		return "ErrWrongLeader"
 43 | 	case ErrTimeout:
 44 | 		return "ErrTimeout"
 45 | 	}
 46 | 	panic(fmt.Sprintf("unexpected Err %d", err))
 47 | }
 48 | 
 49 | /*********************/
 50 | 
 51 | type Command struct {
 52 | 	*CommandRequest
 53 | }
 54 | 
 55 | type OperationContext struct {
 56 | 	MaxAppliedCommandId int64
 57 | 	LastResponse        *CommandResponse
 58 | }
 59 | 
 60 | type OperationOp uint8
 61 | 
 62 | const (
 63 | 	OpPut OperationOp = iota
 64 | 	OpAppend
 65 | 	OpGet
 66 | )
 67 | 
 68 | func (op OperationOp) String() string {
 69 | 	switch op {
 70 | 	case OpPut:
 71 | 		return "OpPut"
 72 | 	case OpAppend:
 73 | 		return "OpAppend"
 74 | 	case OpGet:
 75 | 		return "OpGet"
 76 | 	}
 77 | 	panic(fmt.Sprintf("unexpected OperationOp %d", op))
 78 | }
 79 | 
 80 | /*********************/
 81 | 
 82 | // Put or Append
 83 | type PutAppendArgs struct {
 84 | 	Key   string
 85 | 	Value string
 86 | 	Op    string // "Put" or "Append"
 87 | 	// You'll have to add definitions here.
 88 | 	// Field names must start with capital letters,
 89 | 	// otherwise RPC will break.
 90 | }
 91 | 
 92 | type PutAppendReply struct {
 93 | 	WrongLeader bool
 94 | 	Err         Err
 95 | }
 96 | 
 97 | type GetArgs struct {
 98 | 	Key string
 99 | 	// You'll have to add definitions here.
100 | }
101 | 
102 | type GetReply struct {
103 | 	WrongLeader bool
104 | 	Err         Err
105 | 	Value       string
106 | }
107 | 
108 | /***********************/
109 | 


--------------------------------------------------------------------------------
/kvraft/config.go:
--------------------------------------------------------------------------------
  1 | package raftkv
  2 | 
  3 | import "DS-Lab/labrpc"
  4 | import "testing"
  5 | import "os"
  6 | 
  7 | // import "log"
  8 | import crand "crypto/rand"
  9 | import "math/big"
 10 | import "math/rand"
 11 | import "encoding/base64"
 12 | import "sync"
 13 | import "runtime"
 14 | import "DS-Lab/raft"
 15 | import "fmt"
 16 | import "time"
 17 | import "sync/atomic"
 18 | 
 19 | func randstring(n int) string {
 20 | 	b := make([]byte, 2*n)
 21 | 	crand.Read(b)
 22 | 	s := base64.URLEncoding.EncodeToString(b)
 23 | 	return s[0:n]
 24 | }
 25 | 
 26 | func makeSeed() int64 {
 27 | 	max := big.NewInt(int64(1) << 62)
 28 | 	bigx, _ := crand.Int(crand.Reader, max)
 29 | 	x := bigx.Int64()
 30 | 	return x
 31 | }
 32 | 
 33 | // Randomize server handles
 34 | func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd {
 35 | 	sa := make([]*labrpc.ClientEnd, len(kvh))
 36 | 	copy(sa, kvh)
 37 | 	for i := range sa {
 38 | 		j := rand.Intn(i + 1)
 39 | 		sa[i], sa[j] = sa[j], sa[i]
 40 | 	}
 41 | 	return sa
 42 | }
 43 | 
 44 | type config struct {
 45 | 	mu           sync.Mutex
 46 | 	t            *testing.T
 47 | 	net          *labrpc.Network
 48 | 	n            int
 49 | 	kvservers    []*KVServer
 50 | 	saved        []*raft.Persister
 51 | 	endnames     [][]string // names of each server's sending ClientEnds
 52 | 	clerks       map[*Clerk][]string
 53 | 	nextClientId int
 54 | 	maxraftstate int
 55 | 	start        time.Time // time at which make_config() was called
 56 | 	// begin()/end() statistics
 57 | 	t0    time.Time // time at which test_test.go called cfg.begin()
 58 | 	rpcs0 int       // rpcTotal() at start of test
 59 | 	ops   int32     // number of clerk get/put/append method calls
 60 | }
 61 | 
 62 | func (cfg *config) checkTimeout() {
 63 | 	// enforce a two minute real-time limit on each test
 64 | 	if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second {
 65 | 		cfg.t.Fatal("test took longer than 120 seconds")
 66 | 	}
 67 | }
 68 | 
 69 | func (cfg *config) cleanup() {
 70 | 	cfg.mu.Lock()
 71 | 	defer cfg.mu.Unlock()
 72 | 	for i := 0; i < len(cfg.kvservers); i++ {
 73 | 		if cfg.kvservers[i] != nil {
 74 | 			cfg.kvservers[i].Kill()
 75 | 		}
 76 | 	}
 77 | 	cfg.net.Cleanup()
 78 | 	cfg.checkTimeout()
 79 | }
 80 | 
 81 | // Maximum log size across all servers
 82 | func (cfg *config) LogSize() int {
 83 | 	logsize := 0
 84 | 	for i := 0; i < cfg.n; i++ {
 85 | 		n := cfg.saved[i].RaftStateSize()
 86 | 		if n > logsize {
 87 | 			logsize = n
 88 | 		}
 89 | 	}
 90 | 	return logsize
 91 | }
 92 | 
 93 | // Maximum snapshot size across all servers
 94 | func (cfg *config) SnapshotSize() int {
 95 | 	snapshotsize := 0
 96 | 	for i := 0; i < cfg.n; i++ {
 97 | 		n := cfg.saved[i].SnapshotSize()
 98 | 		if n > snapshotsize {
 99 | 			snapshotsize = n
100 | 		}
101 | 	}
102 | 	return snapshotsize
103 | }
104 | 
105 | // attach server i to servers listed in to
106 | // caller must hold cfg.mu
107 | func (cfg *config) connectUnlocked(i int, to []int) {
108 | 	// log.Printf("connect peer %d to %v\n", i, to)
109 | 
110 | 	// outgoing socket files
111 | 	for j := 0; j < len(to); j++ {
112 | 		endname := cfg.endnames[i][to[j]]
113 | 		cfg.net.Enable(endname, true)
114 | 	}
115 | 
116 | 	// incoming socket files
117 | 	for j := 0; j < len(to); j++ {
118 | 		endname := cfg.endnames[to[j]][i]
119 | 		cfg.net.Enable(endname, true)
120 | 	}
121 | }
122 | 
123 | func (cfg *config) connect(i int, to []int) {
124 | 	cfg.mu.Lock()
125 | 	defer cfg.mu.Unlock()
126 | 	cfg.connectUnlocked(i, to)
127 | }
128 | 
129 | // detach server i from the servers listed in from
130 | // caller must hold cfg.mu
131 | func (cfg *config) disconnectUnlocked(i int, from []int) {
132 | 	// log.Printf("disconnect peer %d from %v\n", i, from)
133 | 
134 | 	// outgoing socket files
135 | 	for j := 0; j < len(from); j++ {
136 | 		if cfg.endnames[i] != nil {
137 | 			endname := cfg.endnames[i][from[j]]
138 | 			cfg.net.Enable(endname, false)
139 | 		}
140 | 	}
141 | 
142 | 	// incoming socket files
143 | 	for j := 0; j < len(from); j++ {
144 | 		if cfg.endnames[j] != nil {
145 | 			endname := cfg.endnames[from[j]][i]
146 | 			cfg.net.Enable(endname, false)
147 | 		}
148 | 	}
149 | }
150 | 
151 | func (cfg *config) disconnect(i int, from []int) {
152 | 	cfg.mu.Lock()
153 | 	defer cfg.mu.Unlock()
154 | 	cfg.disconnectUnlocked(i, from)
155 | }
156 | 
157 | func (cfg *config) All() []int {
158 | 	all := make([]int, cfg.n)
159 | 	for i := 0; i < cfg.n; i++ {
160 | 		all[i] = i
161 | 	}
162 | 	return all
163 | }
164 | 
165 | func (cfg *config) ConnectAll() {
166 | 	cfg.mu.Lock()
167 | 	defer cfg.mu.Unlock()
168 | 	for i := 0; i < cfg.n; i++ {
169 | 		cfg.connectUnlocked(i, cfg.All())
170 | 	}
171 | }
172 | 
173 | // Sets up 2 partitions with connectivity between servers in each  partition.
174 | func (cfg *config) partition(p1 []int, p2 []int) {
175 | 	cfg.mu.Lock()
176 | 	defer cfg.mu.Unlock()
177 | 	// log.Printf("partition servers into: %v %v\n", p1, p2)
178 | 	for i := 0; i < len(p1); i++ {
179 | 		cfg.disconnectUnlocked(p1[i], p2)
180 | 		cfg.connectUnlocked(p1[i], p1)
181 | 	}
182 | 	for i := 0; i < len(p2); i++ {
183 | 		cfg.disconnectUnlocked(p2[i], p1)
184 | 		cfg.connectUnlocked(p2[i], p2)
185 | 	}
186 | }
187 | 
188 | // Create a clerk with clerk specific server names.
189 | // Give it connections to all of the servers, but for
190 | // now enable only connections to servers in to[].
191 | func (cfg *config) makeClient(to []int) *Clerk {
192 | 	cfg.mu.Lock()
193 | 	defer cfg.mu.Unlock()
194 | 
195 | 	// a fresh set of ClientEnds.
196 | 	ends := make([]*labrpc.ClientEnd, cfg.n)
197 | 	endnames := make([]string, cfg.n)
198 | 	for j := 0; j < cfg.n; j++ {
199 | 		endnames[j] = randstring(20)
200 | 		ends[j] = cfg.net.MakeEnd(endnames[j])
201 | 		cfg.net.Connect(endnames[j], j)
202 | 	}
203 | 
204 | 	ck := MakeClerk(random_handles(ends))
205 | 	cfg.clerks[ck] = endnames
206 | 	cfg.nextClientId++
207 | 	cfg.ConnectClientUnlocked(ck, to)
208 | 	return ck
209 | }
210 | 
211 | func (cfg *config) deleteClient(ck *Clerk) {
212 | 	cfg.mu.Lock()
213 | 	defer cfg.mu.Unlock()
214 | 
215 | 	v := cfg.clerks[ck]
216 | 	for i := 0; i < len(v); i++ {
217 | 		os.Remove(v[i])
218 | 	}
219 | 	delete(cfg.clerks, ck)
220 | }
221 | 
222 | // caller should hold cfg.mu
223 | func (cfg *config) ConnectClientUnlocked(ck *Clerk, to []int) {
224 | 	// log.Printf("ConnectClient %v to %v\n", ck, to)
225 | 	endnames := cfg.clerks[ck]
226 | 	for j := 0; j < len(to); j++ {
227 | 		s := endnames[to[j]]
228 | 		cfg.net.Enable(s, true)
229 | 	}
230 | }
231 | 
232 | func (cfg *config) ConnectClient(ck *Clerk, to []int) {
233 | 	cfg.mu.Lock()
234 | 	defer cfg.mu.Unlock()
235 | 	cfg.ConnectClientUnlocked(ck, to)
236 | }
237 | 
238 | // caller should hold cfg.mu
239 | func (cfg *config) DisconnectClientUnlocked(ck *Clerk, from []int) {
240 | 	// log.Printf("DisconnectClient %v from %v\n", ck, from)
241 | 	endnames := cfg.clerks[ck]
242 | 	for j := 0; j < len(from); j++ {
243 | 		s := endnames[from[j]]
244 | 		cfg.net.Enable(s, false)
245 | 	}
246 | }
247 | 
248 | func (cfg *config) DisconnectClient(ck *Clerk, from []int) {
249 | 	cfg.mu.Lock()
250 | 	defer cfg.mu.Unlock()
251 | 	cfg.DisconnectClientUnlocked(ck, from)
252 | }
253 | 
254 | // Shutdown a server by isolating it
255 | func (cfg *config) ShutdownServer(i int) {
256 | 	cfg.mu.Lock()
257 | 	defer cfg.mu.Unlock()
258 | 
259 | 	cfg.disconnectUnlocked(i, cfg.All())
260 | 
261 | 	// disable client connections to the server.
262 | 	// it's important to do this before creating
263 | 	// the new Persister in saved[i], to avoid
264 | 	// the possibility of the server returning a
265 | 	// positive reply to an Append but persisting
266 | 	// the result in the superseded Persister.
267 | 	cfg.net.DeleteServer(i)
268 | 
269 | 	// a fresh persister, in case old instance
270 | 	// continues to update the Persister.
271 | 	// but copy old persister's content so that we always
272 | 	// pass Make() the last persisted state.
273 | 	if cfg.saved[i] != nil {
274 | 		cfg.saved[i] = cfg.saved[i].Copy()
275 | 	}
276 | 
277 | 	kv := cfg.kvservers[i]
278 | 	if kv != nil {
279 | 		cfg.mu.Unlock()
280 | 		kv.Kill()
281 | 		cfg.mu.Lock()
282 | 		cfg.kvservers[i] = nil
283 | 	}
284 | }
285 | 
286 | // If restart servers, first call ShutdownServer
287 | func (cfg *config) StartServer(i int) {
288 | 	cfg.mu.Lock()
289 | 
290 | 	// a fresh set of outgoing ClientEnd names.
291 | 	cfg.endnames[i] = make([]string, cfg.n)
292 | 	for j := 0; j < cfg.n; j++ {
293 | 		cfg.endnames[i][j] = randstring(20)
294 | 	}
295 | 
296 | 	// a fresh set of ClientEnds.
297 | 	ends := make([]*labrpc.ClientEnd, cfg.n)
298 | 	for j := 0; j < cfg.n; j++ {
299 | 		ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j])
300 | 		cfg.net.Connect(cfg.endnames[i][j], j)
301 | 	}
302 | 
303 | 	// a fresh persister, so old instance doesn't overwrite
304 | 	// new instance's persisted state.
305 | 	// give the fresh persister a copy of the old persister's
306 | 	// state, so that the spec is that we pass StartKVServer()
307 | 	// the last persisted state.
308 | 	if cfg.saved[i] != nil {
309 | 		cfg.saved[i] = cfg.saved[i].Copy()
310 | 	} else {
311 | 		cfg.saved[i] = raft.MakePersister()
312 | 	}
313 | 	cfg.mu.Unlock()
314 | 
315 | 	cfg.kvservers[i] = StartKVServer(ends, i, cfg.saved[i], cfg.maxraftstate)
316 | 
317 | 	kvsvc := labrpc.MakeService(cfg.kvservers[i])
318 | 	rfsvc := labrpc.MakeService(cfg.kvservers[i].rf)
319 | 	srv := labrpc.MakeServer()
320 | 	srv.AddService(kvsvc)
321 | 	srv.AddService(rfsvc)
322 | 	cfg.net.AddServer(i, srv)
323 | }
324 | 
325 | func (cfg *config) Leader() (bool, int) {
326 | 	cfg.mu.Lock()
327 | 	defer cfg.mu.Unlock()
328 | 
329 | 	for i := 0; i < cfg.n; i++ {
330 | 		_, is_leader := cfg.kvservers[i].rf.GetState()
331 | 		if is_leader {
332 | 			return true, i
333 | 		}
334 | 	}
335 | 	return false, 0
336 | }
337 | 
338 | // Partition servers into 2 groups and put current leader in minority
339 | func (cfg *config) make_partition() ([]int, []int) {
340 | 	_, l := cfg.Leader()
341 | 	p1 := make([]int, cfg.n/2+1)
342 | 	p2 := make([]int, cfg.n/2)
343 | 	j := 0
344 | 	for i := 0; i < cfg.n; i++ {
345 | 		if i != l {
346 | 			if j < len(p1) {
347 | 				p1[j] = i
348 | 			} else {
349 | 				p2[j-len(p1)] = i
350 | 			}
351 | 			j++
352 | 		}
353 | 	}
354 | 	p2[len(p2)-1] = l
355 | 	return p1, p2
356 | }
357 | 
358 | var ncpu_once sync.Once
359 | 
360 | func make_config(t *testing.T, n int, unreliable bool, maxraftstate int) *config {
361 | 	ncpu_once.Do(func() {
362 | 		if runtime.NumCPU() < 2 {
363 | 			fmt.Printf("warning: only one CPU, which may conceal locking bugs\n")
364 | 		}
365 | 		rand.Seed(makeSeed())
366 | 	})
367 | 	runtime.GOMAXPROCS(4)
368 | 	cfg := &config{}
369 | 	cfg.t = t
370 | 	cfg.net = labrpc.MakeNetwork()
371 | 	cfg.n = n
372 | 	cfg.kvservers = make([]*KVServer, cfg.n)
373 | 	cfg.saved = make([]*raft.Persister, cfg.n)
374 | 	cfg.endnames = make([][]string, cfg.n)
375 | 	cfg.clerks = make(map[*Clerk][]string)
376 | 	cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid
377 | 	cfg.maxraftstate = maxraftstate
378 | 	cfg.start = time.Now()
379 | 
380 | 	// create a full set of KV servers.
381 | 	for i := 0; i < cfg.n; i++ {
382 | 		cfg.StartServer(i)
383 | 	}
384 | 
385 | 	cfg.ConnectAll()
386 | 
387 | 	cfg.net.Reliable(!unreliable)
388 | 
389 | 	return cfg
390 | }
391 | 
392 | func (cfg *config) rpcTotal() int {
393 | 	return cfg.net.GetTotalCount()
394 | }
395 | 
396 | // start a Test.
397 | // print the Test message.
398 | // e.g. cfg.begin("Test (2B): RPC counts aren't too high")
399 | func (cfg *config) begin(description string) {
400 | 	fmt.Printf("%s ...\n", description)
401 | 	cfg.t0 = time.Now()
402 | 	cfg.rpcs0 = cfg.rpcTotal()
403 | 	atomic.StoreInt32(&cfg.ops, 0)
404 | }
405 | 
406 | func (cfg *config) op() {
407 | 	atomic.AddInt32(&cfg.ops, 1)
408 | }
409 | 
410 | // end a Test -- the fact that we got here means there
411 | // was no failure.
412 | // print the Passed message,
413 | // and some performance numbers.
414 | func (cfg *config) end() {
415 | 	cfg.checkTimeout()
416 | 	if cfg.t.Failed() == false {
417 | 		t := time.Since(cfg.t0).Seconds()  // real time
418 | 		npeers := cfg.n                    // number of Raft peers
419 | 		nrpc := cfg.rpcTotal() - cfg.rpcs0 // number of RPC sends
420 | 		ops := atomic.LoadInt32(&cfg.ops)  //  number of clerk get/put/append calls
421 | 
422 | 		fmt.Printf("  ... Passed --")
423 | 		fmt.Printf("  %4.1f  %d %5d %4d\n", t, npeers, nrpc, ops)
424 | 	}
425 | }
426 | 


--------------------------------------------------------------------------------
/kvraft/server.go:
--------------------------------------------------------------------------------
  1 | package raftkv
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"fmt"
  6 | 	"sync"
  7 | 	"sync/atomic"
  8 | 	"time"
  9 | 
 10 | 	"DS-Lab/labgob"
 11 | 	"DS-Lab/labrpc"
 12 | 	"DS-Lab/raft"
 13 | )
 14 | 
 15 | type KVServer struct {
 16 | 	mu      sync.RWMutex
 17 | 	dead    int32
 18 | 	rf      *raft.Raft
 19 | 	applyCh chan raft.ApplyMsg
 20 | 
 21 | 	maxRaftState int // snapshot if log grows this big
 22 | 	lastApplied  int // record the lastApplied to prevent stateMachine from rollback
 23 | 
 24 | 	stateMachine   KVStateMachine                // KV stateMachine
 25 | 	lastOperations map[int64]OperationContext    // determine whether log is duplicated by recording the last commandId and response corresponding to the clientId
 26 | 	notifyChans    map[int]chan *CommandResponse // notify client goroutine by applier goroutine to response
 27 | }
 28 | 
 29 | func (kv *KVServer) Command(request *CommandRequest, response *CommandResponse) {
 30 | 	defer DPrintf("{Node %v} processes CommandRequest %v with CommandResponse %v", kv.rf.Me(), request, response)
 31 | 	// return result directly without raft layer's participation if request is duplicated
 32 | 	kv.mu.RLock()
 33 | 	if request.Op != OpGet && kv.isDuplicateRequest(request.ClientId, request.CommandId) {
 34 | 		lastResponse := kv.lastOperations[request.ClientId].LastResponse
 35 | 		response.Value, response.Err = lastResponse.Value, lastResponse.Err
 36 | 		kv.mu.RUnlock()
 37 | 		return
 38 | 	}
 39 | 	kv.mu.RUnlock()
 40 | 	// do not hold lock to improve throughput
 41 | 	// when KVServer holds the lock to take snapshot, underlying raft can still commit raft logs
 42 | 	index, _, isLeader := kv.rf.Start(Command{request})
 43 | 	if !isLeader {
 44 | 		response.Err = ErrWrongLeader
 45 | 		return
 46 | 	}
 47 | 	kv.mu.Lock()
 48 | 	ch := kv.getNotifyChan(index)
 49 | 	kv.mu.Unlock()
 50 | 	select {
 51 | 	case result := <-ch:
 52 | 		response.Value, response.Err = result.Value, result.Err
 53 | 	case <-time.After(ExecuteTimeout):
 54 | 		response.Err = ErrTimeout
 55 | 	}
 56 | 	// release notifyChan to reduce memory footprint
 57 | 	// why asynchronously? to improve throughput, here is no need to block client request
 58 | 	go func() {
 59 | 		kv.mu.Lock()
 60 | 		kv.removeOutdatedNotifyChan(index)
 61 | 		kv.mu.Unlock()
 62 | 	}()
 63 | }
 64 | 
 65 | // each RPC imply that the client has seen the reply for its previous RPC
 66 | // therefore, we only need to determine whether the latest commandId of a clientId meets the criteria
 67 | func (kv *KVServer) isDuplicateRequest(clientId int64, requestId int64) bool {
 68 | 	operationContext, ok := kv.lastOperations[clientId]
 69 | 	return ok && requestId <= operationContext.MaxAppliedCommandId
 70 | }
 71 | 
 72 | //
 73 | // the tester calls Kill() when a KVServer instance won't
 74 | // be needed again. for your convenience, we supply
 75 | // code to set rf.dead (without needing a lock),
 76 | // and a killed() method to test rf.dead in
 77 | // long-running loops. you can also add your own
 78 | // code to Kill(). you're not required to do anything
 79 | // about this, but it may be convenient (for example)
 80 | // to suppress debug output from a Kill()ed instance.
 81 | //
 82 | func (kv *KVServer) Kill() {
 83 | 	DPrintf("{Node %v} has been killed", kv.rf.Me())
 84 | 	atomic.StoreInt32(&kv.dead, 1)
 85 | 	kv.rf.Kill()
 86 | }
 87 | 
 88 | func (kv *KVServer) killed() bool {
 89 | 	return atomic.LoadInt32(&kv.dead) == 1
 90 | }
 91 | 
 92 | // a dedicated applier goroutine to apply committed entries to stateMachine, take snapshot and apply snapshot from raft
 93 | func (kv *KVServer) applier() {
 94 | 	for kv.killed() == false {
 95 | 		select {
 96 | 		case message := <-kv.applyCh:
 97 | 			DPrintf("{Node %v} tries to apply message %v", kv.rf.Me(), message)
 98 | 			if message.CommandValid {
 99 | 				kv.mu.Lock()
100 | 				if message.CommandIndex <= kv.lastApplied {
101 | 					DPrintf("{Node %v} discards outdated message %v because a newer snapshot which lastApplied is %v has been restored", kv.rf.Me(), message, kv.lastApplied)
102 | 					kv.mu.Unlock()
103 | 					continue
104 | 				}
105 | 				kv.lastApplied = message.CommandIndex
106 | 
107 | 				var response *CommandResponse
108 | 				command := message.Command.(Command)
109 | 				if command.Op != OpGet && kv.isDuplicateRequest(command.ClientId, command.CommandId) {
110 | 					DPrintf("{Node %v} doesn't apply duplicated message %v to stateMachine because maxAppliedCommandId is %v for client %v", kv.rf.Me(), message, kv.lastOperations[command.ClientId], command.ClientId)
111 | 					response = kv.lastOperations[command.ClientId].LastResponse
112 | 				} else {
113 | 					response = kv.applyLogToStateMachine(command)
114 | 					if command.Op != OpGet {
115 | 						kv.lastOperations[command.ClientId] = OperationContext{command.CommandId, response}
116 | 					}
117 | 				}
118 | 
119 | 				// only notify related channel for currentTerm's log when node is leader
120 | 				if currentTerm, isLeader := kv.rf.GetState(); isLeader && message.CommitTerm == currentTerm {
121 | 					ch := kv.getNotifyChan(message.CommandIndex)
122 | 					ch <- response
123 | 				}
124 | 
125 | 				needSnapshot := kv.needSnapshot()
126 | 				if needSnapshot {
127 | 					kv.takeSnapshot(message.CommandIndex)
128 | 				}
129 | 				kv.mu.Unlock()
130 | 			} else if message.SnapshotValid {
131 | 				kv.mu.Lock()
132 | 				if kv.rf.CondInstallSnapshot(message.SnapshotTerm, message.SnapshotIndex, message.Snapshot) {
133 | 					kv.restoreSnapshot(message.Snapshot)
134 | 					kv.lastApplied = message.SnapshotIndex
135 | 				}
136 | 				kv.mu.Unlock()
137 | 			} else {
138 | 				panic(fmt.Sprintf("unexpected Message %v", message))
139 | 			}
140 | 		}
141 | 	}
142 | }
143 | 
144 | func (kv *KVServer) needSnapshot() bool {
145 | 	return kv.maxRaftState != -1 && kv.rf.GetRaftStateSize() >= kv.maxRaftState
146 | }
147 | 
148 | func (kv *KVServer) takeSnapshot(index int) {
149 | 	w := new(bytes.Buffer)
150 | 	e := labgob.NewEncoder(w)
151 | 	e.Encode(kv.stateMachine)
152 | 	e.Encode(kv.lastOperations)
153 | 	kv.rf.Snapshot(index, w.Bytes())
154 | }
155 | 
156 | func (kv *KVServer) restoreSnapshot(snapshot []byte) {
157 | 	if snapshot == nil || len(snapshot) == 0 {
158 | 		return
159 | 	}
160 | 	r := bytes.NewBuffer(snapshot)
161 | 	d := labgob.NewDecoder(r)
162 | 	var stateMachine MemoryKV
163 | 	var lastOperations map[int64]OperationContext
164 | 	if d.Decode(&stateMachine) != nil ||
165 | 		d.Decode(&lastOperations) != nil {
166 | 		DPrintf("{Node %v} restores snapshot failed", kv.rf.Me())
167 | 	}
168 | 	kv.stateMachine, kv.lastOperations = &stateMachine, lastOperations
169 | }
170 | 
171 | func (kv *KVServer) getNotifyChan(index int) chan *CommandResponse {
172 | 	if _, ok := kv.notifyChans[index]; !ok {
173 | 		kv.notifyChans[index] = make(chan *CommandResponse, 1)
174 | 	}
175 | 	return kv.notifyChans[index]
176 | }
177 | 
178 | func (kv *KVServer) removeOutdatedNotifyChan(index int) {
179 | 	delete(kv.notifyChans, index)
180 | }
181 | 
182 | func (kv *KVServer) applyLogToStateMachine(command Command) *CommandResponse {
183 | 	var value string
184 | 	var err Err
185 | 	switch command.Op {
186 | 	case OpPut:
187 | 		err = kv.stateMachine.Put(command.Key, command.Value)
188 | 	case OpAppend:
189 | 		err = kv.stateMachine.Append(command.Key, command.Value)
190 | 	case OpGet:
191 | 		value, err = kv.stateMachine.Get(command.Key)
192 | 	}
193 | 	return &CommandResponse{err, value}
194 | }
195 | 
196 | //
197 | // servers[] contains the ports of the set of
198 | // servers that will cooperate via Raft to
199 | // form the fault-tolerant key/value service.
200 | // me is the index of the current server in servers[].
201 | // the k/v server should store snapshots through the underlying Raft
202 | // implementation, which should call persister.SaveStateAndSnapshot() to
203 | // atomically save the Raft state along with the snapshot.
204 | // the k/v server should snapshot when Raft's saved state exceeds maxraftstate bytes,
205 | // in order to allow Raft to garbage-collect its log. if maxraftstate is -1,
206 | // you don't need to snapshot.
207 | // StartKVServer() must return quickly, so it should start goroutines
208 | // for any long-running work.
209 | //
210 | func StartKVServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int) *KVServer {
211 | 	// call labgob.Register on structures you want
212 | 	// Go's RPC library to marshall/unmarshall.
213 | 	labgob.Register(Command{})
214 | 	applyCh := make(chan raft.ApplyMsg)
215 | 
216 | 	kv := &KVServer{
217 | 		maxRaftState:   maxraftstate,
218 | 		applyCh:        applyCh,
219 | 		dead:           0,
220 | 		lastApplied:    0,
221 | 		rf:             raft.Make(servers, me, persister, applyCh),
222 | 		stateMachine:   NewMemoryKV(),
223 | 		lastOperations: make(map[int64]OperationContext),
224 | 		notifyChans:    make(map[int]chan *CommandResponse),
225 | 	}
226 | 	kv.restoreSnapshot(persister.ReadSnapshot())
227 | 	// start applier goroutine to apply committed logs to stateMachine
228 | 	go kv.applier()
229 | 
230 | 	DPrintf("{Node %v} has started", kv.rf.Me())
231 | 	return kv
232 | }
233 | 


--------------------------------------------------------------------------------
/labgob/labgob.go:
--------------------------------------------------------------------------------
  1 | package labgob
  2 | 
  3 | import "encoding/gob"
  4 | import "io"
  5 | import "reflect"
  6 | import "fmt"
  7 | import "sync"
  8 | import "unicode"
  9 | import "unicode/utf8"
 10 | 
 11 | var mu sync.Mutex
 12 | var errorCount int // for TestCapital
 13 | var checked map[reflect.Type]bool
 14 | 
 15 | type LabEncoder struct {
 16 | 	gob *gob.Encoder
 17 | }
 18 | 
 19 | func NewEncoder(w io.Writer) *LabEncoder {
 20 | 	enc := &LabEncoder{}
 21 | 	enc.gob = gob.NewEncoder(w)
 22 | 	return enc
 23 | }
 24 | 
 25 | func (enc *LabEncoder) Encode(e interface{}) error {
 26 | 	checkValue(e)
 27 | 	return enc.gob.Encode(e)
 28 | }
 29 | 
 30 | func (enc *LabEncoder) EncodeValue(value reflect.Value) error {
 31 | 	checkValue(value.Interface())
 32 | 	return enc.gob.EncodeValue(value)
 33 | }
 34 | 
 35 | type LabDecoder struct {
 36 | 	gob *gob.Decoder
 37 | }
 38 | 
 39 | func NewDecoder(r io.Reader) *LabDecoder {
 40 | 	dec := &LabDecoder{}
 41 | 	dec.gob = gob.NewDecoder(r)
 42 | 	return dec
 43 | }
 44 | 
 45 | func (dec *LabDecoder) Decode(e interface{}) error {
 46 | 	checkValue(e)
 47 | 	checkDefault(e)
 48 | 	return dec.gob.Decode(e)
 49 | }
 50 | 
 51 | func Register(value interface{}) {
 52 | 	checkValue(value)
 53 | 	gob.Register(value)
 54 | }
 55 | 
 56 | func RegisterName(name string, value interface{}) {
 57 | 	checkValue(value)
 58 | 	gob.RegisterName(name, value)
 59 | }
 60 | 
 61 | func checkValue(value interface{}) {
 62 | 	checkType(reflect.TypeOf(value))
 63 | }
 64 | 
 65 | func checkType(t reflect.Type) {
 66 | 	k := t.Kind()
 67 | 
 68 | 	mu.Lock()
 69 | 	// only complain once, and avoid recursion.
 70 | 	if checked == nil {
 71 | 		checked = map[reflect.Type]bool{}
 72 | 	}
 73 | 	if checked[t] {
 74 | 		mu.Unlock()
 75 | 		return
 76 | 	}
 77 | 	checked[t] = true
 78 | 	mu.Unlock()
 79 | 
 80 | 	switch k {
 81 | 	case reflect.Struct:
 82 | 		for i := 0; i < t.NumField(); i++ {
 83 | 			f := t.Field(i)
 84 | 			rune, _ := utf8.DecodeRuneInString(f.Name)
 85 | 			if unicode.IsUpper(rune) == false {
 86 | 				// ta da
 87 | 				fmt.Printf("labgob error: lower-case field %v of %v in RPC or persist/snapshot will break your Raft\n",
 88 | 					f.Name, t.Name())
 89 | 				mu.Lock()
 90 | 				errorCount += 1
 91 | 				mu.Unlock()
 92 | 			}
 93 | 			checkType(f.Type)
 94 | 		}
 95 | 		return
 96 | 	case reflect.Slice, reflect.Array, reflect.Ptr:
 97 | 		checkType(t.Elem())
 98 | 		return
 99 | 	case reflect.Map:
100 | 		checkType(t.Elem())
101 | 		checkType(t.Key())
102 | 		return
103 | 	default:
104 | 		return
105 | 	}
106 | }
107 | 
108 | //
109 | // warn if the value contains non-default values,
110 | // as it would if one sent an RPC but the reply
111 | // struct was already modified. if the RPC reply
112 | // contains default values, GOB won't overwrite
113 | // the non-default value.
114 | //
115 | func checkDefault(value interface{}) {
116 | 	if value == nil {
117 | 		return
118 | 	}
119 | 	checkDefault1(reflect.ValueOf(value), 1, "")
120 | }
121 | 
122 | func checkDefault1(value reflect.Value, depth int, name string) {
123 | 	if depth > 3 {
124 | 		return
125 | 	}
126 | 
127 | 	t := value.Type()
128 | 	k := t.Kind()
129 | 
130 | 	switch k {
131 | 	case reflect.Struct:
132 | 		for i := 0; i < t.NumField(); i++ {
133 | 			vv := value.Field(i)
134 | 			name1 := t.Field(i).Name
135 | 			if name != "" {
136 | 				name1 = name + "." + name1
137 | 			}
138 | 			checkDefault1(vv, depth+1, name1)
139 | 		}
140 | 		return
141 | 	case reflect.Ptr:
142 | 		if value.IsNil() {
143 | 			return
144 | 		}
145 | 		checkDefault1(value.Elem(), depth+1, name)
146 | 		return
147 | 	case reflect.Bool,
148 | 		reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
149 | 		reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64,
150 | 		reflect.Uintptr, reflect.Float32, reflect.Float64,
151 | 		reflect.String:
152 | 		if reflect.DeepEqual(reflect.Zero(t).Interface(), value.Interface()) == false {
153 | 			mu.Lock()
154 | 			if errorCount < 1 {
155 | 				what := name
156 | 				if what == "" {
157 | 					what = t.Name()
158 | 				}
159 | 				// this warning typically arises if code re-uses the same RPC reply
160 | 				// variable for multiple RPC calls, or if code restores persisted
161 | 				// state into variable that already have non-default values.
162 | 				fmt.Printf("labgob warning: Decoding into a non-default variable/field %v may not work\n",
163 | 					what)
164 | 			}
165 | 			errorCount += 1
166 | 			mu.Unlock()
167 | 		}
168 | 		return
169 | 	}
170 | }
171 | 


--------------------------------------------------------------------------------
/labgob/test_test.go:
--------------------------------------------------------------------------------
  1 | package labgob
  2 | 
  3 | import "testing"
  4 | 
  5 | import "bytes"
  6 | 
  7 | type T1 struct {
  8 | 	T1int0    int
  9 | 	T1int1    int
 10 | 	T1string0 string
 11 | 	T1string1 string
 12 | }
 13 | 
 14 | type T2 struct {
 15 | 	T2slice []T1
 16 | 	T2map   map[int]*T1
 17 | 	T2t3    interface{}
 18 | }
 19 | 
 20 | type T3 struct {
 21 | 	T3int999 int
 22 | }
 23 | 
 24 | //
 25 | // test that we didn't break GOB.
 26 | //
 27 | func TestGOB(t *testing.T) {
 28 | 	e0 := errorCount
 29 | 
 30 | 	w := new(bytes.Buffer)
 31 | 
 32 | 	Register(T3{})
 33 | 
 34 | 	{
 35 | 		x0 := 0
 36 | 		x1 := 1
 37 | 		t1 := T1{}
 38 | 		t1.T1int1 = 1
 39 | 		t1.T1string1 = "6.824"
 40 | 		t2 := T2{}
 41 | 		t2.T2slice = []T1{T1{}, t1}
 42 | 		t2.T2map = map[int]*T1{}
 43 | 		t2.T2map[99] = &T1{1, 2, "x", "y"}
 44 | 		t2.T2t3 = T3{999}
 45 | 
 46 | 		e := NewEncoder(w)
 47 | 		e.Encode(x0)
 48 | 		e.Encode(x1)
 49 | 		e.Encode(t1)
 50 | 		e.Encode(t2)
 51 | 	}
 52 | 	data := w.Bytes()
 53 | 
 54 | 	{
 55 | 		var x0 int
 56 | 		var x1 int
 57 | 		var t1 T1
 58 | 		var t2 T2
 59 | 
 60 | 		r := bytes.NewBuffer(data)
 61 | 		d := NewDecoder(r)
 62 | 		if d.Decode(&x0) != nil ||
 63 | 			d.Decode(&x1) != nil ||
 64 | 			d.Decode(&t1) != nil ||
 65 | 			d.Decode(&t2) != nil {
 66 | 			t.Fatalf("Decode failed")
 67 | 		}
 68 | 
 69 | 		if x0 != 0 {
 70 | 			t.Fatalf("wrong x0 %v\n", x0)
 71 | 		}
 72 | 		if x1 != 1 {
 73 | 			t.Fatalf("wrong x1 %v\n", x1)
 74 | 		}
 75 | 		if t1.T1int0 != 0 {
 76 | 			t.Fatalf("wrong t1.T1int0 %v\n", t1.T1int0)
 77 | 		}
 78 | 		if t1.T1int1 != 1 {
 79 | 			t.Fatalf("wrong t1.T1int1 %v\n", t1.T1int1)
 80 | 		}
 81 | 		if t1.T1string0 != "" {
 82 | 			t.Fatalf("wrong t1.T1string0 %v\n", t1.T1string0)
 83 | 		}
 84 | 		if t1.T1string1 != "6.824" {
 85 | 			t.Fatalf("wrong t1.T1string1 %v\n", t1.T1string1)
 86 | 		}
 87 | 		if len(t2.T2slice) != 2 {
 88 | 			t.Fatalf("wrong t2.T2slice len %v\n", len(t2.T2slice))
 89 | 		}
 90 | 		if t2.T2slice[1].T1int1 != 1 {
 91 | 			t.Fatalf("wrong slice value\n")
 92 | 		}
 93 | 		if len(t2.T2map) != 1 {
 94 | 			t.Fatalf("wrong t2.T2map len %v\n", len(t2.T2map))
 95 | 		}
 96 | 		if t2.T2map[99].T1string1 != "y" {
 97 | 			t.Fatalf("wrong map value\n")
 98 | 		}
 99 | 		t3 := (t2.T2t3).(T3)
100 | 		if t3.T3int999 != 999 {
101 | 			t.Fatalf("wrong t2.T2t3.T3int999\n")
102 | 		}
103 | 	}
104 | 
105 | 	if errorCount != e0 {
106 | 		t.Fatalf("there were errors, but should not have been")
107 | 	}
108 | }
109 | 
110 | type T4 struct {
111 | 	Yes int
112 | 	no  int
113 | }
114 | 
115 | //
116 | // make sure we check capitalization
117 | // labgob prints one warning during this test.
118 | //
119 | func TestCapital(t *testing.T) {
120 | 	e0 := errorCount
121 | 
122 | 	v := []map[*T4]int{}
123 | 
124 | 	w := new(bytes.Buffer)
125 | 	e := NewEncoder(w)
126 | 	e.Encode(v)
127 | 	data := w.Bytes()
128 | 
129 | 	var v1 []map[T4]int
130 | 	r := bytes.NewBuffer(data)
131 | 	d := NewDecoder(r)
132 | 	d.Decode(&v1)
133 | 
134 | 	if errorCount != e0+1 {
135 | 		t.Fatalf("failed to warn about lower-case field")
136 | 	}
137 | }
138 | 
139 | //
140 | // check that we warn when someone sends a default value over
141 | // RPC but the target into which we're decoding holds a non-default
142 | // value, which GOB seems not to overwrite as you'd expect.
143 | //
144 | // labgob does not print a warning.
145 | //
146 | func TestDefault(t *testing.T) {
147 | 	e0 := errorCount
148 | 
149 | 	type DD struct {
150 | 		X int
151 | 	}
152 | 
153 | 	// send a default value...
154 | 	dd1 := DD{}
155 | 
156 | 	w := new(bytes.Buffer)
157 | 	e := NewEncoder(w)
158 | 	e.Encode(dd1)
159 | 	data := w.Bytes()
160 | 
161 | 	// and receive it into memory that already
162 | 	// holds non-default values.
163 | 	reply := DD{99}
164 | 
165 | 	r := bytes.NewBuffer(data)
166 | 	d := NewDecoder(r)
167 | 	d.Decode(&reply)
168 | 
169 | 	if errorCount != e0+1 {
170 | 		t.Fatalf("failed to warn about decoding into non-default value")
171 | 	}
172 | }
173 | 


--------------------------------------------------------------------------------
/labrpc/labrpc.go:
--------------------------------------------------------------------------------
  1 | package labrpc
  2 | 
  3 | import "DS-Lab/labgob"
  4 | import "bytes"
  5 | import "reflect"
  6 | import "sync"
  7 | import "log"
  8 | import "strings"
  9 | import "math/rand"
 10 | import "time"
 11 | import "sync/atomic"
 12 | 
 13 | type reqMsg struct {
 14 | 	endname  interface{} // name of sending ClientEnd
 15 | 	svcMeth  string      // e.g. "Raft.AppendEntries"
 16 | 	argsType reflect.Type
 17 | 	args     []byte
 18 | 	replyCh  chan replyMsg
 19 | }
 20 | 
 21 | type replyMsg struct {
 22 | 	ok    bool
 23 | 	reply []byte
 24 | }
 25 | 
 26 | type ClientEnd struct {
 27 | 	endname interface{}   // this end-point's name
 28 | 	ch      chan reqMsg   // copy of Network.endCh
 29 | 	done    chan struct{} // closed when Network is cleaned up
 30 | }
 31 | 
 32 | // send an RPC, wait for the reply.
 33 | // the return value indicates success; false means that
 34 | // no reply was received from the server.
 35 | func (e *ClientEnd) Call(svcMeth string, args interface{}, reply interface{}) bool {
 36 | 	req := reqMsg{}
 37 | 	req.endname = e.endname
 38 | 	req.svcMeth = svcMeth
 39 | 	req.argsType = reflect.TypeOf(args)
 40 | 	req.replyCh = make(chan replyMsg)
 41 | 
 42 | 	qb := new(bytes.Buffer)
 43 | 	qe := labgob.NewEncoder(qb)
 44 | 	qe.Encode(args)
 45 | 	req.args = qb.Bytes()
 46 | 
 47 | 	select {
 48 | 	case e.ch <- req:
 49 | 		// ok
 50 | 	case <-e.done:
 51 | 		return false
 52 | 	}
 53 | 
 54 | 	rep := <-req.replyCh
 55 | 	if rep.ok {
 56 | 		rb := bytes.NewBuffer(rep.reply)
 57 | 		rd := labgob.NewDecoder(rb)
 58 | 		if err := rd.Decode(reply); err != nil {
 59 | 			log.Fatalf("ClientEnd.Call(): decode reply: %v\n", err)
 60 | 		}
 61 | 		return true
 62 | 	} else {
 63 | 		return false
 64 | 	}
 65 | }
 66 | 
 67 | type Network struct {
 68 | 	mu             sync.Mutex
 69 | 	reliable       bool
 70 | 	longDelays     bool                        // pause a long time on send on disabled connection
 71 | 	longReordering bool                        // sometimes delay replies a long time
 72 | 	ends           map[interface{}]*ClientEnd  // ends, by name
 73 | 	enabled        map[interface{}]bool        // by end name
 74 | 	servers        map[interface{}]*Server     // servers, by name
 75 | 	connections    map[interface{}]interface{} // endname -> servername
 76 | 	endCh          chan reqMsg
 77 | 	done           chan struct{} // closed when Network is cleaned up
 78 | 	count          int32         // total RPC count, for statistics
 79 | }
 80 | 
 81 | func MakeNetwork() *Network {
 82 | 	rn := &Network{}
 83 | 	rn.reliable = true
 84 | 	rn.ends = map[interface{}]*ClientEnd{}
 85 | 	rn.enabled = map[interface{}]bool{}
 86 | 	rn.servers = map[interface{}]*Server{}
 87 | 	rn.connections = map[interface{}](interface{}){}
 88 | 	rn.endCh = make(chan reqMsg)
 89 | 	rn.done = make(chan struct{})
 90 | 
 91 | 	// single goroutine to handle all ClientEnd.Call()s
 92 | 	go func() {
 93 | 		for {
 94 | 			select {
 95 | 			case xreq := <-rn.endCh:
 96 | 				atomic.AddInt32(&rn.count, 1)
 97 | 				go rn.ProcessReq(xreq)
 98 | 			case <-rn.done:
 99 | 				return
100 | 			}
101 | 		}
102 | 	}()
103 | 
104 | 	return rn
105 | }
106 | 
107 | func (rn *Network) Cleanup() {
108 | 	close(rn.done)
109 | }
110 | 
111 | func (rn *Network) Reliable(yes bool) {
112 | 	rn.mu.Lock()
113 | 	defer rn.mu.Unlock()
114 | 
115 | 	rn.reliable = yes
116 | }
117 | 
118 | func (rn *Network) LongReordering(yes bool) {
119 | 	rn.mu.Lock()
120 | 	defer rn.mu.Unlock()
121 | 
122 | 	rn.longReordering = yes
123 | }
124 | 
125 | func (rn *Network) LongDelays(yes bool) {
126 | 	rn.mu.Lock()
127 | 	defer rn.mu.Unlock()
128 | 
129 | 	rn.longDelays = yes
130 | }
131 | 
132 | func (rn *Network) ReadEndnameInfo(endname interface{}) (enabled bool,
133 | 	servername interface{}, server *Server, reliable bool, longreordering bool,
134 | ) {
135 | 	rn.mu.Lock()
136 | 	defer rn.mu.Unlock()
137 | 
138 | 	enabled = rn.enabled[endname]
139 | 	servername = rn.connections[endname]
140 | 	if servername != nil {
141 | 		server = rn.servers[servername]
142 | 	}
143 | 	reliable = rn.reliable
144 | 	longreordering = rn.longReordering
145 | 	return
146 | }
147 | 
148 | func (rn *Network) IsServerDead(endname interface{}, servername interface{}, server *Server) bool {
149 | 	rn.mu.Lock()
150 | 	defer rn.mu.Unlock()
151 | 
152 | 	if rn.enabled[endname] == false || rn.servers[servername] != server {
153 | 		return true
154 | 	}
155 | 	return false
156 | }
157 | 
158 | func (rn *Network) ProcessReq(req reqMsg) {
159 | 	enabled, servername, server, reliable, longreordering := rn.ReadEndnameInfo(req.endname)
160 | 
161 | 	if enabled && servername != nil && server != nil {
162 | 		if reliable == false {
163 | 			// short delay
164 | 			ms := (rand.Int() % 27)
165 | 			time.Sleep(time.Duration(ms) * time.Millisecond)
166 | 		}
167 | 
168 | 		if reliable == false && (rand.Int()%1000) < 100 {
169 | 			// drop the request, return as if timeout
170 | 			req.replyCh <- replyMsg{false, nil}
171 | 			return
172 | 		}
173 | 
174 | 		// execute the request (call the RPC handler).
175 | 		// in a separate thread so that we can periodically check
176 | 		// if the server has been killed and the RPC should get a
177 | 		// failure reply.
178 | 		ech := make(chan replyMsg)
179 | 		go func() {
180 | 			r := server.dispatch(req)
181 | 			ech <- r
182 | 		}()
183 | 
184 | 		// wait for handler to return,
185 | 		// but stop waiting if DeleteServer() has been called,
186 | 		// and return an error.
187 | 		var reply replyMsg
188 | 		replyOK := false
189 | 		serverDead := false
190 | 		for replyOK == false && serverDead == false {
191 | 			select {
192 | 			case reply = <-ech:
193 | 				replyOK = true
194 | 			case <-time.After(100 * time.Millisecond):
195 | 				serverDead = rn.IsServerDead(req.endname, servername, server)
196 | 				if serverDead {
197 | 					go func() {
198 | 						<-ech // drain channel to let the goroutine created earlier terminate
199 | 					}()
200 | 				}
201 | 			}
202 | 		}
203 | 
204 | 		// do not reply if DeleteServer() has been called, i.e.
205 | 		// the server has been killed. this is needed to avoid
206 | 		// situation in which a client gets a positive reply
207 | 		// to an Append, but the server persisted the update
208 | 		// into the old Persister. config.go is careful to call
209 | 		// DeleteServer() before superseding the Persister.
210 | 		serverDead = rn.IsServerDead(req.endname, servername, server)
211 | 
212 | 		if replyOK == false || serverDead == true {
213 | 			// server was killed while we were waiting; return error.
214 | 			req.replyCh <- replyMsg{false, nil}
215 | 		} else if reliable == false && (rand.Int()%1000) < 100 {
216 | 			// drop the reply, return as if timeout
217 | 			req.replyCh <- replyMsg{false, nil}
218 | 		} else if longreordering == true && rand.Intn(900) < 600 {
219 | 			// delay the response for a while
220 | 			ms := 200 + rand.Intn(1+rand.Intn(2000))
221 | 			// Russ points out that this timer arrangement will decrease
222 | 			// the number of goroutines, so that the race
223 | 			// detector is less likely to get upset.
224 | 			time.AfterFunc(time.Duration(ms)*time.Millisecond, func() {
225 | 				req.replyCh <- reply
226 | 			})
227 | 		} else {
228 | 			req.replyCh <- reply
229 | 		}
230 | 	} else {
231 | 		// simulate no reply and eventual timeout.
232 | 		ms := 0
233 | 		if rn.longDelays {
234 | 			// let Raft tests check that leader doesn't send
235 | 			// RPCs synchronously.
236 | 			ms = (rand.Int() % 7000)
237 | 		} else {
238 | 			// many kv tests require the client to try each
239 | 			// server in fairly rapid succession.
240 | 			ms = (rand.Int() % 100)
241 | 		}
242 | 		time.AfterFunc(time.Duration(ms)*time.Millisecond, func() {
243 | 			req.replyCh <- replyMsg{false, nil}
244 | 		})
245 | 	}
246 | 
247 | }
248 | 
249 | // create a client end-point.
250 | // start the thread that listens and delivers.
251 | func (rn *Network) MakeEnd(endname interface{}) *ClientEnd {
252 | 	rn.mu.Lock()
253 | 	defer rn.mu.Unlock()
254 | 
255 | 	if _, ok := rn.ends[endname]; ok {
256 | 		log.Fatalf("MakeEnd: %v already exists\n", endname)
257 | 	}
258 | 
259 | 	e := &ClientEnd{}
260 | 	e.endname = endname
261 | 	e.ch = rn.endCh
262 | 	e.done = rn.done
263 | 	rn.ends[endname] = e
264 | 	rn.enabled[endname] = false
265 | 	rn.connections[endname] = nil
266 | 
267 | 	return e
268 | }
269 | 
270 | func (rn *Network) AddServer(servername interface{}, rs *Server) {
271 | 	rn.mu.Lock()
272 | 	defer rn.mu.Unlock()
273 | 
274 | 	rn.servers[servername] = rs
275 | }
276 | 
277 | func (rn *Network) DeleteServer(servername interface{}) {
278 | 	rn.mu.Lock()
279 | 	defer rn.mu.Unlock()
280 | 
281 | 	rn.servers[servername] = nil
282 | }
283 | 
284 | // connect a ClientEnd to a server.
285 | // a ClientEnd can only be connected once in its lifetime.
286 | func (rn *Network) Connect(endname interface{}, servername interface{}) {
287 | 	rn.mu.Lock()
288 | 	defer rn.mu.Unlock()
289 | 
290 | 	rn.connections[endname] = servername
291 | }
292 | 
293 | // enable/disable a ClientEnd.
294 | func (rn *Network) Enable(endname interface{}, enabled bool) {
295 | 	rn.mu.Lock()
296 | 	defer rn.mu.Unlock()
297 | 
298 | 	rn.enabled[endname] = enabled
299 | }
300 | 
301 | // get a server's count of incoming RPCs.
302 | func (rn *Network) GetCount(servername interface{}) int {
303 | 	rn.mu.Lock()
304 | 	defer rn.mu.Unlock()
305 | 
306 | 	svr := rn.servers[servername]
307 | 	return svr.GetCount()
308 | }
309 | 
310 | func (rn *Network) GetTotalCount() int {
311 | 	x := atomic.LoadInt32(&rn.count)
312 | 	return int(x)
313 | }
314 | 
315 | //
316 | // a server is a collection of services, all sharing
317 | // the same rpc dispatcher. so that e.g. both a Raft
318 | // and a k/v server can listen to the same rpc endpoint.
319 | //
320 | type Server struct {
321 | 	mu       sync.Mutex
322 | 	services map[string]*Service
323 | 	count    int // incoming RPCs
324 | }
325 | 
326 | func MakeServer() *Server {
327 | 	rs := &Server{}
328 | 	rs.services = map[string]*Service{}
329 | 	return rs
330 | }
331 | 
332 | func (rs *Server) AddService(svc *Service) {
333 | 	rs.mu.Lock()
334 | 	defer rs.mu.Unlock()
335 | 	rs.services[svc.name] = svc
336 | }
337 | 
338 | func (rs *Server) dispatch(req reqMsg) replyMsg {
339 | 	rs.mu.Lock()
340 | 
341 | 	rs.count += 1
342 | 
343 | 	// split Raft.AppendEntries into service and method
344 | 	dot := strings.LastIndex(req.svcMeth, ".")
345 | 	serviceName := req.svcMeth[:dot]
346 | 	methodName := req.svcMeth[dot+1:]
347 | 
348 | 	service, ok := rs.services[serviceName]
349 | 
350 | 	rs.mu.Unlock()
351 | 
352 | 	if ok {
353 | 		return service.dispatch(methodName, req)
354 | 	} else {
355 | 		choices := []string{}
356 | 		for k, _ := range rs.services {
357 | 			choices = append(choices, k)
358 | 		}
359 | 		log.Fatalf("labrpc.Server.dispatch(): unknown service %v in %v.%v; expecting one of %v\n",
360 | 			serviceName, serviceName, methodName, choices)
361 | 		return replyMsg{false, nil}
362 | 	}
363 | }
364 | 
365 | func (rs *Server) GetCount() int {
366 | 	rs.mu.Lock()
367 | 	defer rs.mu.Unlock()
368 | 	return rs.count
369 | }
370 | 
371 | // an object with methods that can be called via RPC.
372 | // a single server may have more than one Service.
373 | type Service struct {
374 | 	name    string
375 | 	rcvr    reflect.Value
376 | 	typ     reflect.Type
377 | 	methods map[string]reflect.Method
378 | }
379 | 
380 | func MakeService(rcvr interface{}) *Service {
381 | 	svc := &Service{}
382 | 	svc.typ = reflect.TypeOf(rcvr)
383 | 	svc.rcvr = reflect.ValueOf(rcvr)
384 | 	svc.name = reflect.Indirect(svc.rcvr).Type().Name()
385 | 	svc.methods = map[string]reflect.Method{}
386 | 
387 | 	for m := 0; m < svc.typ.NumMethod(); m++ {
388 | 		method := svc.typ.Method(m)
389 | 		mtype := method.Type
390 | 		mname := method.Name
391 | 
392 | 		//fmt.Printf("%v pp %v ni %v 1k %v 2k %v no %v\n",
393 | 		//	mname, method.PkgPath, mtype.NumIn(), mtype.In(1).Kind(), mtype.In(2).Kind(), mtype.NumOut())
394 | 
395 | 		if method.PkgPath != "" || // capitalized?
396 | 			mtype.NumIn() != 3 ||
397 | 			//mtype.In(1).Kind() != reflect.Ptr ||
398 | 			mtype.In(2).Kind() != reflect.Ptr ||
399 | 			mtype.NumOut() != 0 {
400 | 			// the method is not suitable for a handler
401 | 			//fmt.Printf("bad method: %v\n", mname)
402 | 		} else {
403 | 			// the method looks like a handler
404 | 			svc.methods[mname] = method
405 | 		}
406 | 	}
407 | 
408 | 	return svc
409 | }
410 | 
411 | func (svc *Service) dispatch(methname string, req reqMsg) replyMsg {
412 | 	if method, ok := svc.methods[methname]; ok {
413 | 		// prepare space into which to read the argument.
414 | 		// the Value's type will be a pointer to req.argsType.
415 | 		args := reflect.New(req.argsType)
416 | 
417 | 		// decode the argument.
418 | 		ab := bytes.NewBuffer(req.args)
419 | 		ad := labgob.NewDecoder(ab)
420 | 		ad.Decode(args.Interface())
421 | 
422 | 		// allocate space for the reply.
423 | 		replyType := method.Type.In(2)
424 | 		replyType = replyType.Elem()
425 | 		replyv := reflect.New(replyType)
426 | 
427 | 		// call the method.
428 | 		function := method.Func
429 | 		function.Call([]reflect.Value{svc.rcvr, args.Elem(), replyv})
430 | 
431 | 		// encode the reply.
432 | 		rb := new(bytes.Buffer)
433 | 		re := labgob.NewEncoder(rb)
434 | 		re.EncodeValue(replyv)
435 | 
436 | 		return replyMsg{true, rb.Bytes()}
437 | 	} else {
438 | 		choices := []string{}
439 | 		for k, _ := range svc.methods {
440 | 			choices = append(choices, k)
441 | 		}
442 | 		log.Fatalf("labrpc.Service.dispatch(): unknown method %v in %v; expecting one of %v\n",
443 | 			methname, req.svcMeth, choices)
444 | 		return replyMsg{false, nil}
445 | 	}
446 | }
447 | 


--------------------------------------------------------------------------------
/labrpc/test_test.go:
--------------------------------------------------------------------------------
  1 | package labrpc
  2 | 
  3 | import "testing"
  4 | import "strconv"
  5 | import "sync"
  6 | import "runtime"
  7 | import "time"
  8 | import "fmt"
  9 | 
 10 | type JunkArgs struct {
 11 | 	X int
 12 | }
 13 | type JunkReply struct {
 14 | 	X string
 15 | }
 16 | 
 17 | type JunkServer struct {
 18 | 	mu   sync.Mutex
 19 | 	log1 []string
 20 | 	log2 []int
 21 | }
 22 | 
 23 | func (js *JunkServer) Handler1(args string, reply *int) {
 24 | 	js.mu.Lock()
 25 | 	defer js.mu.Unlock()
 26 | 	js.log1 = append(js.log1, args)
 27 | 	*reply, _ = strconv.Atoi(args)
 28 | }
 29 | 
 30 | func (js *JunkServer) Handler2(args int, reply *string) {
 31 | 	js.mu.Lock()
 32 | 	defer js.mu.Unlock()
 33 | 	js.log2 = append(js.log2, args)
 34 | 	*reply = "handler2-" + strconv.Itoa(args)
 35 | }
 36 | 
 37 | func (js *JunkServer) Handler3(args int, reply *int) {
 38 | 	js.mu.Lock()
 39 | 	defer js.mu.Unlock()
 40 | 	time.Sleep(20 * time.Second)
 41 | 	*reply = -args
 42 | }
 43 | 
 44 | // args is a pointer
 45 | func (js *JunkServer) Handler4(args *JunkArgs, reply *JunkReply) {
 46 | 	reply.X = "pointer"
 47 | }
 48 | 
 49 | // args is a not pointer
 50 | func (js *JunkServer) Handler5(args JunkArgs, reply *JunkReply) {
 51 | 	reply.X = "no pointer"
 52 | }
 53 | 
 54 | func TestBasic(t *testing.T) {
 55 | 	runtime.GOMAXPROCS(4)
 56 | 
 57 | 	rn := MakeNetwork()
 58 | 	defer rn.Cleanup()
 59 | 
 60 | 	e := rn.MakeEnd("end1-99")
 61 | 
 62 | 	js := &JunkServer{}
 63 | 	svc := MakeService(js)
 64 | 
 65 | 	rs := MakeServer()
 66 | 	rs.AddService(svc)
 67 | 	rn.AddServer("server99", rs)
 68 | 
 69 | 	rn.Connect("end1-99", "server99")
 70 | 	rn.Enable("end1-99", true)
 71 | 
 72 | 	{
 73 | 		reply := ""
 74 | 		e.Call("JunkServer.Handler2", 111, &reply)
 75 | 		if reply != "handler2-111" {
 76 | 			t.Fatalf("wrong reply from Handler2")
 77 | 		}
 78 | 	}
 79 | 
 80 | 	{
 81 | 		reply := 0
 82 | 		e.Call("JunkServer.Handler1", "9099", &reply)
 83 | 		if reply != 9099 {
 84 | 			t.Fatalf("wrong reply from Handler1")
 85 | 		}
 86 | 	}
 87 | }
 88 | 
 89 | func TestTypes(t *testing.T) {
 90 | 	runtime.GOMAXPROCS(4)
 91 | 
 92 | 	rn := MakeNetwork()
 93 | 	defer rn.Cleanup()
 94 | 
 95 | 	e := rn.MakeEnd("end1-99")
 96 | 
 97 | 	js := &JunkServer{}
 98 | 	svc := MakeService(js)
 99 | 
100 | 	rs := MakeServer()
101 | 	rs.AddService(svc)
102 | 	rn.AddServer("server99", rs)
103 | 
104 | 	rn.Connect("end1-99", "server99")
105 | 	rn.Enable("end1-99", true)
106 | 
107 | 	{
108 | 		var args JunkArgs
109 | 		var reply JunkReply
110 | 		// args must match type (pointer or not) of handler.
111 | 		e.Call("JunkServer.Handler4", &args, &reply)
112 | 		if reply.X != "pointer" {
113 | 			t.Fatalf("wrong reply from Handler4")
114 | 		}
115 | 	}
116 | 
117 | 	{
118 | 		var args JunkArgs
119 | 		var reply JunkReply
120 | 		// args must match type (pointer or not) of handler.
121 | 		e.Call("JunkServer.Handler5", args, &reply)
122 | 		if reply.X != "no pointer" {
123 | 			t.Fatalf("wrong reply from Handler5")
124 | 		}
125 | 	}
126 | }
127 | 
128 | //
129 | // does net.Enable(endname, false) really disconnect a client?
130 | //
131 | func TestDisconnect(t *testing.T) {
132 | 	runtime.GOMAXPROCS(4)
133 | 
134 | 	rn := MakeNetwork()
135 | 	defer rn.Cleanup()
136 | 
137 | 	e := rn.MakeEnd("end1-99")
138 | 
139 | 	js := &JunkServer{}
140 | 	svc := MakeService(js)
141 | 
142 | 	rs := MakeServer()
143 | 	rs.AddService(svc)
144 | 	rn.AddServer("server99", rs)
145 | 
146 | 	rn.Connect("end1-99", "server99")
147 | 
148 | 	{
149 | 		reply := ""
150 | 		e.Call("JunkServer.Handler2", 111, &reply)
151 | 		if reply != "" {
152 | 			t.Fatalf("unexpected reply from Handler2")
153 | 		}
154 | 	}
155 | 
156 | 	rn.Enable("end1-99", true)
157 | 
158 | 	{
159 | 		reply := 0
160 | 		e.Call("JunkServer.Handler1", "9099", &reply)
161 | 		if reply != 9099 {
162 | 			t.Fatalf("wrong reply from Handler1")
163 | 		}
164 | 	}
165 | }
166 | 
167 | //
168 | // test net.GetCount()
169 | //
170 | func TestCounts(t *testing.T) {
171 | 	runtime.GOMAXPROCS(4)
172 | 
173 | 	rn := MakeNetwork()
174 | 	defer rn.Cleanup()
175 | 
176 | 	e := rn.MakeEnd("end1-99")
177 | 
178 | 	js := &JunkServer{}
179 | 	svc := MakeService(js)
180 | 
181 | 	rs := MakeServer()
182 | 	rs.AddService(svc)
183 | 	rn.AddServer(99, rs)
184 | 
185 | 	rn.Connect("end1-99", 99)
186 | 	rn.Enable("end1-99", true)
187 | 
188 | 	for i := 0; i < 17; i++ {
189 | 		reply := ""
190 | 		e.Call("JunkServer.Handler2", i, &reply)
191 | 		wanted := "handler2-" + strconv.Itoa(i)
192 | 		if reply != wanted {
193 | 			t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted)
194 | 		}
195 | 	}
196 | 
197 | 	n := rn.GetCount(99)
198 | 	if n != 17 {
199 | 		t.Fatalf("wrong GetCount() %v, expected 17\n", n)
200 | 	}
201 | }
202 | 
203 | //
204 | // test RPCs from concurrent ClientEnds
205 | //
206 | func TestConcurrentMany(t *testing.T) {
207 | 	runtime.GOMAXPROCS(4)
208 | 
209 | 	rn := MakeNetwork()
210 | 	defer rn.Cleanup()
211 | 
212 | 	js := &JunkServer{}
213 | 	svc := MakeService(js)
214 | 
215 | 	rs := MakeServer()
216 | 	rs.AddService(svc)
217 | 	rn.AddServer(1000, rs)
218 | 
219 | 	ch := make(chan int)
220 | 
221 | 	nclients := 20
222 | 	nrpcs := 10
223 | 	for ii := 0; ii < nclients; ii++ {
224 | 		go func(i int) {
225 | 			n := 0
226 | 			defer func() { ch <- n }()
227 | 
228 | 			e := rn.MakeEnd(i)
229 | 			rn.Connect(i, 1000)
230 | 			rn.Enable(i, true)
231 | 
232 | 			for j := 0; j < nrpcs; j++ {
233 | 				arg := i*100 + j
234 | 				reply := ""
235 | 				e.Call("JunkServer.Handler2", arg, &reply)
236 | 				wanted := "handler2-" + strconv.Itoa(arg)
237 | 				if reply != wanted {
238 | 					t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted)
239 | 				}
240 | 				n += 1
241 | 			}
242 | 		}(ii)
243 | 	}
244 | 
245 | 	total := 0
246 | 	for ii := 0; ii < nclients; ii++ {
247 | 		x := <-ch
248 | 		total += x
249 | 	}
250 | 
251 | 	if total != nclients*nrpcs {
252 | 		t.Fatalf("wrong number of RPCs completed, got %v, expected %v", total, nclients*nrpcs)
253 | 	}
254 | 
255 | 	n := rn.GetCount(1000)
256 | 	if n != total {
257 | 		t.Fatalf("wrong GetCount() %v, expected %v\n", n, total)
258 | 	}
259 | }
260 | 
261 | //
262 | // test unreliable
263 | //
264 | func TestUnreliable(t *testing.T) {
265 | 	runtime.GOMAXPROCS(4)
266 | 
267 | 	rn := MakeNetwork()
268 | 	defer rn.Cleanup()
269 | 	rn.Reliable(false)
270 | 
271 | 	js := &JunkServer{}
272 | 	svc := MakeService(js)
273 | 
274 | 	rs := MakeServer()
275 | 	rs.AddService(svc)
276 | 	rn.AddServer(1000, rs)
277 | 
278 | 	ch := make(chan int)
279 | 
280 | 	nclients := 300
281 | 	for ii := 0; ii < nclients; ii++ {
282 | 		go func(i int) {
283 | 			n := 0
284 | 			defer func() { ch <- n }()
285 | 
286 | 			e := rn.MakeEnd(i)
287 | 			rn.Connect(i, 1000)
288 | 			rn.Enable(i, true)
289 | 
290 | 			arg := i * 100
291 | 			reply := ""
292 | 			ok := e.Call("JunkServer.Handler2", arg, &reply)
293 | 			if ok {
294 | 				wanted := "handler2-" + strconv.Itoa(arg)
295 | 				if reply != wanted {
296 | 					t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted)
297 | 				}
298 | 				n += 1
299 | 			}
300 | 		}(ii)
301 | 	}
302 | 
303 | 	total := 0
304 | 	for ii := 0; ii < nclients; ii++ {
305 | 		x := <-ch
306 | 		total += x
307 | 	}
308 | 
309 | 	if total == nclients || total == 0 {
310 | 		t.Fatalf("all RPCs succeeded despite unreliable")
311 | 	}
312 | }
313 | 
314 | //
315 | // test concurrent RPCs from a single ClientEnd
316 | //
317 | func TestConcurrentOne(t *testing.T) {
318 | 	runtime.GOMAXPROCS(4)
319 | 
320 | 	rn := MakeNetwork()
321 | 	defer rn.Cleanup()
322 | 
323 | 	js := &JunkServer{}
324 | 	svc := MakeService(js)
325 | 
326 | 	rs := MakeServer()
327 | 	rs.AddService(svc)
328 | 	rn.AddServer(1000, rs)
329 | 
330 | 	e := rn.MakeEnd("c")
331 | 	rn.Connect("c", 1000)
332 | 	rn.Enable("c", true)
333 | 
334 | 	ch := make(chan int)
335 | 
336 | 	nrpcs := 20
337 | 	for ii := 0; ii < nrpcs; ii++ {
338 | 		go func(i int) {
339 | 			n := 0
340 | 			defer func() { ch <- n }()
341 | 
342 | 			arg := 100 + i
343 | 			reply := ""
344 | 			e.Call("JunkServer.Handler2", arg, &reply)
345 | 			wanted := "handler2-" + strconv.Itoa(arg)
346 | 			if reply != wanted {
347 | 				t.Fatalf("wrong reply %v from Handler2, expecting %v", reply, wanted)
348 | 			}
349 | 			n += 1
350 | 		}(ii)
351 | 	}
352 | 
353 | 	total := 0
354 | 	for ii := 0; ii < nrpcs; ii++ {
355 | 		x := <-ch
356 | 		total += x
357 | 	}
358 | 
359 | 	if total != nrpcs {
360 | 		t.Fatalf("wrong number of RPCs completed, got %v, expected %v", total, nrpcs)
361 | 	}
362 | 
363 | 	js.mu.Lock()
364 | 	defer js.mu.Unlock()
365 | 	if len(js.log2) != nrpcs {
366 | 		t.Fatalf("wrong number of RPCs delivered")
367 | 	}
368 | 
369 | 	n := rn.GetCount(1000)
370 | 	if n != total {
371 | 		t.Fatalf("wrong GetCount() %v, expected %v\n", n, total)
372 | 	}
373 | }
374 | 
375 | //
376 | // regression: an RPC that's delayed during Enabled=false
377 | // should not delay subsequent RPCs (e.g. after Enabled=true).
378 | //
379 | func TestRegression1(t *testing.T) {
380 | 	runtime.GOMAXPROCS(4)
381 | 
382 | 	rn := MakeNetwork()
383 | 	defer rn.Cleanup()
384 | 
385 | 	js := &JunkServer{}
386 | 	svc := MakeService(js)
387 | 
388 | 	rs := MakeServer()
389 | 	rs.AddService(svc)
390 | 	rn.AddServer(1000, rs)
391 | 
392 | 	e := rn.MakeEnd("c")
393 | 	rn.Connect("c", 1000)
394 | 
395 | 	// start some RPCs while the ClientEnd is disabled.
396 | 	// they'll be delayed.
397 | 	rn.Enable("c", false)
398 | 	ch := make(chan bool)
399 | 	nrpcs := 20
400 | 	for ii := 0; ii < nrpcs; ii++ {
401 | 		go func(i int) {
402 | 			ok := false
403 | 			defer func() { ch <- ok }()
404 | 
405 | 			arg := 100 + i
406 | 			reply := ""
407 | 			// this call ought to return false.
408 | 			e.Call("JunkServer.Handler2", arg, &reply)
409 | 			ok = true
410 | 		}(ii)
411 | 	}
412 | 
413 | 	time.Sleep(100 * time.Millisecond)
414 | 
415 | 	// now enable the ClientEnd and check that an RPC completes quickly.
416 | 	t0 := time.Now()
417 | 	rn.Enable("c", true)
418 | 	{
419 | 		arg := 99
420 | 		reply := ""
421 | 		e.Call("JunkServer.Handler2", arg, &reply)
422 | 		wanted := "handler2-" + strconv.Itoa(arg)
423 | 		if reply != wanted {
424 | 			t.Fatalf("wrong reply %v from Handler2, expecting %v", reply, wanted)
425 | 		}
426 | 	}
427 | 	dur := time.Since(t0).Seconds()
428 | 
429 | 	if dur > 0.03 {
430 | 		t.Fatalf("RPC took too long (%v) after Enable", dur)
431 | 	}
432 | 
433 | 	for ii := 0; ii < nrpcs; ii++ {
434 | 		<-ch
435 | 	}
436 | 
437 | 	js.mu.Lock()
438 | 	defer js.mu.Unlock()
439 | 	if len(js.log2) != 1 {
440 | 		t.Fatalf("wrong number (%v) of RPCs delivered, expected 1", len(js.log2))
441 | 	}
442 | 
443 | 	n := rn.GetCount(1000)
444 | 	if n != 1 {
445 | 		t.Fatalf("wrong GetCount() %v, expected %v\n", n, 1)
446 | 	}
447 | }
448 | 
449 | //
450 | // if an RPC is stuck in a server, and the server
451 | // is killed with DeleteServer(), does the RPC
452 | // get un-stuck?
453 | //
454 | func TestKilled(t *testing.T) {
455 | 	runtime.GOMAXPROCS(4)
456 | 
457 | 	rn := MakeNetwork()
458 | 	defer rn.Cleanup()
459 | 
460 | 	e := rn.MakeEnd("end1-99")
461 | 
462 | 	js := &JunkServer{}
463 | 	svc := MakeService(js)
464 | 
465 | 	rs := MakeServer()
466 | 	rs.AddService(svc)
467 | 	rn.AddServer("server99", rs)
468 | 
469 | 	rn.Connect("end1-99", "server99")
470 | 	rn.Enable("end1-99", true)
471 | 
472 | 	doneCh := make(chan bool)
473 | 	go func() {
474 | 		reply := 0
475 | 		ok := e.Call("JunkServer.Handler3", 99, &reply)
476 | 		doneCh <- ok
477 | 	}()
478 | 
479 | 	time.Sleep(1000 * time.Millisecond)
480 | 
481 | 	select {
482 | 	case <-doneCh:
483 | 		t.Fatalf("Handler3 should not have returned yet")
484 | 	case <-time.After(100 * time.Millisecond):
485 | 	}
486 | 
487 | 	rn.DeleteServer("server99")
488 | 
489 | 	select {
490 | 	case x := <-doneCh:
491 | 		if x != false {
492 | 			t.Fatalf("Handler3 returned successfully despite DeleteServer()")
493 | 		}
494 | 	case <-time.After(100 * time.Millisecond):
495 | 		t.Fatalf("Handler3 should return after DeleteServer()")
496 | 	}
497 | }
498 | 
499 | func TestBenchmark(t *testing.T) {
500 | 	runtime.GOMAXPROCS(4)
501 | 
502 | 	rn := MakeNetwork()
503 | 	defer rn.Cleanup()
504 | 
505 | 	e := rn.MakeEnd("end1-99")
506 | 
507 | 	js := &JunkServer{}
508 | 	svc := MakeService(js)
509 | 
510 | 	rs := MakeServer()
511 | 	rs.AddService(svc)
512 | 	rn.AddServer("server99", rs)
513 | 
514 | 	rn.Connect("end1-99", "server99")
515 | 	rn.Enable("end1-99", true)
516 | 
517 | 	t0 := time.Now()
518 | 	n := 100000
519 | 	for iters := 0; iters < n; iters++ {
520 | 		reply := ""
521 | 		e.Call("JunkServer.Handler2", 111, &reply)
522 | 		if reply != "handler2-111" {
523 | 			t.Fatalf("wrong reply from Handler2")
524 | 		}
525 | 	}
526 | 	fmt.Printf("%v for %v\n", time.Since(t0), n)
527 | 	// march 2016, rtm laptop, 22 microseconds per RPC
528 | }
529 | 


--------------------------------------------------------------------------------
/linearizability/bitset.go:
--------------------------------------------------------------------------------
 1 | package linearizability
 2 | 
 3 | type bitset []uint64
 4 | 
 5 | // data layout:
 6 | // bits 0-63 are in data[0], the next are in data[1], etc.
 7 | 
 8 | func newBitset(bits uint) bitset {
 9 | 	extra := uint(0)
10 | 	if bits%64 != 0 {
11 | 		extra = 1
12 | 	}
13 | 	chunks := bits/64 + extra
14 | 	return bitset(make([]uint64, chunks))
15 | }
16 | 
17 | func (b bitset) clone() bitset {
18 | 	dataCopy := make([]uint64, len(b))
19 | 	copy(dataCopy, b)
20 | 	return bitset(dataCopy)
21 | }
22 | 
23 | func bitsetIndex(pos uint) (uint, uint) {
24 | 	return pos / 64, pos % 64
25 | }
26 | 
27 | func (b bitset) set(pos uint) bitset {
28 | 	major, minor := bitsetIndex(pos)
29 | 	b[major] |= (1 << minor)
30 | 	return b
31 | }
32 | 
33 | func (b bitset) clear(pos uint) bitset {
34 | 	major, minor := bitsetIndex(pos)
35 | 	b[major] &^= (1 << minor)
36 | 	return b
37 | }
38 | 
39 | func (b bitset) get(pos uint) bool {
40 | 	major, minor := bitsetIndex(pos)
41 | 	return b[major]&(1<<minor) != 0
42 | }
43 | 
44 | func (b bitset) popcnt() uint {
45 | 	total := uint(0)
46 | 	for _, v := range b {
47 | 		v = (v & 0x5555555555555555) + ((v & 0xAAAAAAAAAAAAAAAA) >> 1)
48 | 		v = (v & 0x3333333333333333) + ((v & 0xCCCCCCCCCCCCCCCC) >> 2)
49 | 		v = (v & 0x0F0F0F0F0F0F0F0F) + ((v & 0xF0F0F0F0F0F0F0F0) >> 4)
50 | 		v *= 0x0101010101010101
51 | 		total += uint((v >> 56) & 0xFF)
52 | 	}
53 | 	return total
54 | }
55 | 
56 | func (b bitset) hash() uint64 {
57 | 	hash := uint64(b.popcnt())
58 | 	for _, v := range b {
59 | 		hash ^= v
60 | 	}
61 | 	return hash
62 | }
63 | 
64 | func (b bitset) equals(b2 bitset) bool {
65 | 	if len(b) != len(b2) {
66 | 		return false
67 | 	}
68 | 	for i := range b {
69 | 		if b[i] != b2[i] {
70 | 			return false
71 | 		}
72 | 	}
73 | 	return true
74 | }
75 | 


--------------------------------------------------------------------------------
/linearizability/linearizability.go:
--------------------------------------------------------------------------------
  1 | package linearizability
  2 | 
  3 | import (
  4 | 	"sort"
  5 | 	"sync/atomic"
  6 | 	"time"
  7 | )
  8 | 
  9 | type entryKind bool
 10 | 
 11 | const (
 12 | 	callEntry   entryKind = false
 13 | 	returnEntry           = true
 14 | )
 15 | 
 16 | type entry struct {
 17 | 	kind  entryKind
 18 | 	value interface{}
 19 | 	id    uint
 20 | 	time  int64
 21 | }
 22 | 
 23 | type byTime []entry
 24 | 
 25 | func (a byTime) Len() int {
 26 | 	return len(a)
 27 | }
 28 | 
 29 | func (a byTime) Swap(i, j int) {
 30 | 	a[i], a[j] = a[j], a[i]
 31 | }
 32 | 
 33 | func (a byTime) Less(i, j int) bool {
 34 | 	return a[i].time < a[j].time
 35 | }
 36 | 
 37 | func makeEntries(history []Operation) []entry {
 38 | 	var entries []entry = nil
 39 | 	id := uint(0)
 40 | 	for _, elem := range history {
 41 | 		entries = append(entries, entry{
 42 | 			callEntry, elem.Input, id, elem.Call})
 43 | 		entries = append(entries, entry{
 44 | 			returnEntry, elem.Output, id, elem.Return})
 45 | 		id++
 46 | 	}
 47 | 	sort.Sort(byTime(entries))
 48 | 	return entries
 49 | }
 50 | 
 51 | type node struct {
 52 | 	value interface{}
 53 | 	match *node // call if match is nil, otherwise return
 54 | 	id    uint
 55 | 	next  *node
 56 | 	prev  *node
 57 | }
 58 | 
 59 | func insertBefore(n *node, mark *node) *node {
 60 | 	if mark != nil {
 61 | 		beforeMark := mark.prev
 62 | 		mark.prev = n
 63 | 		n.next = mark
 64 | 		if beforeMark != nil {
 65 | 			n.prev = beforeMark
 66 | 			beforeMark.next = n
 67 | 		}
 68 | 	}
 69 | 	return n
 70 | }
 71 | 
 72 | func length(n *node) uint {
 73 | 	l := uint(0)
 74 | 	for n != nil {
 75 | 		n = n.next
 76 | 		l++
 77 | 	}
 78 | 	return l
 79 | }
 80 | 
 81 | func renumber(events []Event) []Event {
 82 | 	var e []Event
 83 | 	m := make(map[uint]uint) // renumbering
 84 | 	id := uint(0)
 85 | 	for _, v := range events {
 86 | 		if r, ok := m[v.Id]; ok {
 87 | 			e = append(e, Event{v.Kind, v.Value, r})
 88 | 		} else {
 89 | 			e = append(e, Event{v.Kind, v.Value, id})
 90 | 			m[v.Id] = id
 91 | 			id++
 92 | 		}
 93 | 	}
 94 | 	return e
 95 | }
 96 | 
 97 | func convertEntries(events []Event) []entry {
 98 | 	var entries []entry
 99 | 	for _, elem := range events {
100 | 		kind := callEntry
101 | 		if elem.Kind == ReturnEvent {
102 | 			kind = returnEntry
103 | 		}
104 | 		entries = append(entries, entry{kind, elem.Value, elem.Id, -1})
105 | 	}
106 | 	return entries
107 | }
108 | 
109 | func makeLinkedEntries(entries []entry) *node {
110 | 	var root *node = nil
111 | 	match := make(map[uint]*node)
112 | 	for i := len(entries) - 1; i >= 0; i-- {
113 | 		elem := entries[i]
114 | 		if elem.kind == returnEntry {
115 | 			entry := &node{value: elem.value, match: nil, id: elem.id}
116 | 			match[elem.id] = entry
117 | 			insertBefore(entry, root)
118 | 			root = entry
119 | 		} else {
120 | 			entry := &node{value: elem.value, match: match[elem.id], id: elem.id}
121 | 			insertBefore(entry, root)
122 | 			root = entry
123 | 		}
124 | 	}
125 | 	return root
126 | }
127 | 
128 | type cacheEntry struct {
129 | 	linearized bitset
130 | 	state      interface{}
131 | }
132 | 
133 | func cacheContains(model Model, cache map[uint64][]cacheEntry, entry cacheEntry) bool {
134 | 	for _, elem := range cache[entry.linearized.hash()] {
135 | 		if entry.linearized.equals(elem.linearized) && model.Equal(entry.state, elem.state) {
136 | 			return true
137 | 		}
138 | 	}
139 | 	return false
140 | }
141 | 
142 | type callsEntry struct {
143 | 	entry *node
144 | 	state interface{}
145 | }
146 | 
147 | func lift(entry *node) {
148 | 	entry.prev.next = entry.next
149 | 	entry.next.prev = entry.prev
150 | 	match := entry.match
151 | 	match.prev.next = match.next
152 | 	if match.next != nil {
153 | 		match.next.prev = match.prev
154 | 	}
155 | }
156 | 
157 | func unlift(entry *node) {
158 | 	match := entry.match
159 | 	match.prev.next = match
160 | 	if match.next != nil {
161 | 		match.next.prev = match
162 | 	}
163 | 	entry.prev.next = entry
164 | 	entry.next.prev = entry
165 | }
166 | 
167 | func checkSingle(model Model, subhistory *node, kill *int32) bool {
168 | 	n := length(subhistory) / 2
169 | 	linearized := newBitset(n)
170 | 	cache := make(map[uint64][]cacheEntry) // map from hash to cache entry
171 | 	var calls []callsEntry
172 | 
173 | 	state := model.Init()
174 | 	headEntry := insertBefore(&node{value: nil, match: nil, id: ^uint(0)}, subhistory)
175 | 	entry := subhistory
176 | 	for headEntry.next != nil {
177 | 		if atomic.LoadInt32(kill) != 0 {
178 | 			return false
179 | 		}
180 | 		if entry.match != nil {
181 | 			matching := entry.match // the return entry
182 | 			ok, newState := model.Step(state, entry.value, matching.value)
183 | 			if ok {
184 | 				newLinearized := linearized.clone().set(entry.id)
185 | 				newCacheEntry := cacheEntry{newLinearized, newState}
186 | 				if !cacheContains(model, cache, newCacheEntry) {
187 | 					hash := newLinearized.hash()
188 | 					cache[hash] = append(cache[hash], newCacheEntry)
189 | 					calls = append(calls, callsEntry{entry, state})
190 | 					state = newState
191 | 					linearized.set(entry.id)
192 | 					lift(entry)
193 | 					entry = headEntry.next
194 | 				} else {
195 | 					entry = entry.next
196 | 				}
197 | 			} else {
198 | 				entry = entry.next
199 | 			}
200 | 		} else {
201 | 			if len(calls) == 0 {
202 | 				return false
203 | 			}
204 | 			callsTop := calls[len(calls)-1]
205 | 			entry = callsTop.entry
206 | 			state = callsTop.state
207 | 			linearized.clear(entry.id)
208 | 			calls = calls[:len(calls)-1]
209 | 			unlift(entry)
210 | 			entry = entry.next
211 | 		}
212 | 	}
213 | 	return true
214 | }
215 | 
216 | func fillDefault(model Model) Model {
217 | 	if model.Partition == nil {
218 | 		model.Partition = NoPartition
219 | 	}
220 | 	if model.PartitionEvent == nil {
221 | 		model.PartitionEvent = NoPartitionEvent
222 | 	}
223 | 	if model.Equal == nil {
224 | 		model.Equal = ShallowEqual
225 | 	}
226 | 	return model
227 | }
228 | 
229 | func CheckOperations(model Model, history []Operation) bool {
230 | 	return CheckOperationsTimeout(model, history, 0)
231 | }
232 | 
233 | // timeout = 0 means no timeout
234 | // if this operation times out, then a false positive is possible
235 | func CheckOperationsTimeout(model Model, history []Operation, timeout time.Duration) bool {
236 | 	model = fillDefault(model)
237 | 	partitions := model.Partition(history)
238 | 	ok := true
239 | 	results := make(chan bool)
240 | 	kill := int32(0)
241 | 	for _, subhistory := range partitions {
242 | 		l := makeLinkedEntries(makeEntries(subhistory))
243 | 		go func() {
244 | 			results <- checkSingle(model, l, &kill)
245 | 		}()
246 | 	}
247 | 	var timeoutChan <-chan time.Time
248 | 	if timeout > 0 {
249 | 		timeoutChan = time.After(timeout)
250 | 	}
251 | 	count := 0
252 | loop:
253 | 	for {
254 | 		select {
255 | 		case result := <-results:
256 | 			ok = ok && result
257 | 			if !ok {
258 | 				atomic.StoreInt32(&kill, 1)
259 | 				break loop
260 | 			}
261 | 			count++
262 | 			if count >= len(partitions) {
263 | 				break loop
264 | 			}
265 | 		case <-timeoutChan:
266 | 			break loop // if we time out, we might get a false positive
267 | 		}
268 | 	}
269 | 	return ok
270 | }
271 | 
272 | func CheckEvents(model Model, history []Event) bool {
273 | 	return CheckEventsTimeout(model, history, 0)
274 | }
275 | 
276 | // timeout = 0 means no timeout
277 | // if this operation times out, then a false positive is possible
278 | func CheckEventsTimeout(model Model, history []Event, timeout time.Duration) bool {
279 | 	model = fillDefault(model)
280 | 	partitions := model.PartitionEvent(history)
281 | 	ok := true
282 | 	results := make(chan bool)
283 | 	kill := int32(0)
284 | 	for _, subhistory := range partitions {
285 | 		l := makeLinkedEntries(convertEntries(renumber(subhistory)))
286 | 		go func() {
287 | 			results <- checkSingle(model, l, &kill)
288 | 		}()
289 | 	}
290 | 	var timeoutChan <-chan time.Time
291 | 	if timeout > 0 {
292 | 		timeoutChan = time.After(timeout)
293 | 	}
294 | 	count := 0
295 | loop:
296 | 	for {
297 | 		select {
298 | 		case result := <-results:
299 | 			ok = ok && result
300 | 			if !ok {
301 | 				atomic.StoreInt32(&kill, 1)
302 | 				break loop
303 | 			}
304 | 			count++
305 | 			if count >= len(partitions) {
306 | 				break loop
307 | 			}
308 | 		case <-timeoutChan:
309 | 			break loop // if we time out, we might get a false positive
310 | 		}
311 | 	}
312 | 	return ok
313 | }
314 | 


--------------------------------------------------------------------------------
/linearizability/model.go:
--------------------------------------------------------------------------------
 1 | package linearizability
 2 | 
 3 | type Operation struct {
 4 | 	Input  interface{}
 5 | 	Call   int64 // invocation time
 6 | 	Output interface{}
 7 | 	Return int64 // response time
 8 | }
 9 | 
10 | type EventKind bool
11 | 
12 | const (
13 | 	CallEvent   EventKind = false
14 | 	ReturnEvent EventKind = true
15 | )
16 | 
17 | type Event struct {
18 | 	Kind  EventKind
19 | 	Value interface{}
20 | 	Id    uint
21 | }
22 | 
23 | type Model struct {
24 | 	// Partition functions, such that a history is linearizable if an only
25 | 	// if each partition is linearizable. If you don't want to implement
26 | 	// this, you can always use the `NoPartition` functions implemented
27 | 	// below.
28 | 	Partition      func(history []Operation) [][]Operation
29 | 	PartitionEvent func(history []Event) [][]Event
30 | 	// Initial state of the system.
31 | 	Init func() interface{}
32 | 	// Step function for the system. Returns whether or not the system
33 | 	// could take this step with the given inputs and outputs and also
34 | 	// returns the new state. This should not mutate the existing state.
35 | 	Step func(state interface{}, input interface{}, output interface{}) (bool, interface{})
36 | 	// Equality on states. If you are using a simple data type for states,
37 | 	// you can use the `ShallowEqual` function implemented below.
38 | 	Equal func(state1, state2 interface{}) bool
39 | }
40 | 
41 | func NoPartition(history []Operation) [][]Operation {
42 | 	return [][]Operation{history}
43 | }
44 | 
45 | func NoPartitionEvent(history []Event) [][]Event {
46 | 	return [][]Event{history}
47 | }
48 | 
49 | func ShallowEqual(state1, state2 interface{}) bool {
50 | 	return state1 == state2
51 | }
52 | 


--------------------------------------------------------------------------------
/linearizability/models.go:
--------------------------------------------------------------------------------
 1 | package linearizability
 2 | 
 3 | // kv model
 4 | 
 5 | type KvInput struct {
 6 | 	Op    uint8 // 0 => get, 1 => put, 2 => append
 7 | 	Key   string
 8 | 	Value string
 9 | }
10 | 
11 | type KvOutput struct {
12 | 	Value string
13 | }
14 | 
15 | func KvModel() Model {
16 | 	return Model{
17 | 		Partition: func(history []Operation) [][]Operation {
18 | 			m := make(map[string][]Operation)
19 | 			for _, v := range history {
20 | 				key := v.Input.(KvInput).Key
21 | 				m[key] = append(m[key], v)
22 | 			}
23 | 			var ret [][]Operation
24 | 			for _, v := range m {
25 | 				ret = append(ret, v)
26 | 			}
27 | 			return ret
28 | 		},
29 | 		Init: func() interface{} {
30 | 			// note: we are modeling a single key's value here;
31 | 			// we're partitioning by key, so this is okay
32 | 			return ""
33 | 		},
34 | 		Step: func(state, input, output interface{}) (bool, interface{}) {
35 | 			inp := input.(KvInput)
36 | 			out := output.(KvOutput)
37 | 			st := state.(string)
38 | 			if inp.Op == 0 {
39 | 				// get
40 | 				return out.Value == st, state
41 | 			} else if inp.Op == 1 {
42 | 				// put
43 | 				return true, inp.Value
44 | 			} else {
45 | 				// append
46 | 				return true, (st + inp.Value)
47 | 			}
48 | 		},
49 | 		Equal: ShallowEqual,
50 | 	}
51 | }
52 | 


--------------------------------------------------------------------------------
/paxos/paxos.go:
--------------------------------------------------------------------------------
  1 | package paxos
  2 | 
  3 | //
  4 | // Paxos library, to be included in an application.
  5 | // Multiple applications will run, each including
  6 | // a Paxos peer.
  7 | //
  8 | // Manages a sequence of agreed-on values.
  9 | // The set of peers is fixed.
 10 | // Copes with network failures (partition, msg loss, &c).
 11 | // Does not store anything persistently, so cannot handle crash+restart.
 12 | //
 13 | // The application interface:
 14 | //
 15 | // px = paxos.Make(peers []string, me string)
 16 | // px.Start(seq int, v interface{}) -- start agreement on new instance
 17 | // px.Status(seq int) (Fate, v interface{}) -- get info about an instance
 18 | // px.Done(seq int) -- ok to forget all instances <= seq
 19 | // px.Max() int -- highest instance seq known, or -1
 20 | // px.Min() int -- instances before this seq have been forgotten
 21 | //
 22 | 
 23 | import "net"
 24 | import "net/rpc"
 25 | import "log"
 26 | 
 27 | import "os"
 28 | import "syscall"
 29 | import "sync"
 30 | import "sync/atomic"
 31 | import "fmt"
 32 | import "math/rand"
 33 | 
 34 | // px.Status() return values, indicating
 35 | // whether an agreement has been decided,
 36 | // or Paxos has not yet reached agreement,
 37 | // or it was agreed but forgotten (i.e. < Min()).
 38 | type Fate int
 39 | 
 40 | const (
 41 | 	Decided   Fate = iota + 1
 42 | 	Pending        // not yet decided.
 43 | 	Forgotten      // decided but forgotten.
 44 | )
 45 | 
 46 | type Paxos struct {
 47 | 	mu         sync.Mutex
 48 | 	l          net.Listener
 49 | 	dead       int32 // for testing
 50 | 	unreliable int32 // for testing
 51 | 	rpcCount   int32 // for testing
 52 | 	peers      []string
 53 | 	me         int // index into peers[]
 54 | 
 55 | 	// data here.
 56 | }
 57 | 
 58 | //
 59 | // call() sends an RPC to the rpcname handler on server srv
 60 | // with arguments args, waits for the reply, and leaves the
 61 | // reply in reply. the reply argument should be a pointer
 62 | // to a reply structure.
 63 | //
 64 | // the return value is true if the server responded, and false
 65 | // if call() was not able to contact the server. in particular,
 66 | // the replys contents are only valid if call() returned true.
 67 | //
 68 | // you should assume that call() will time out and return an
 69 | // error after a while if it does not get a reply from the server.
 70 | //
 71 | // please use call() to send all RPCs, in client.go and server.go.
 72 | // please do not change this function.
 73 | //
 74 | func call(srv string, name string, args interface{}, reply interface{}) bool {
 75 | 	c, err := rpc.Dial("unix", srv)
 76 | 	if err != nil {
 77 | 		err1 := err.(*net.OpError)
 78 | 		if err1.Err != syscall.ENOENT && err1.Err != syscall.ECONNREFUSED {
 79 | 			fmt.Printf("paxos Dial() failed: %v\n", err1)
 80 | 		}
 81 | 		return false
 82 | 	}
 83 | 	defer c.Close()
 84 | 
 85 | 	err = c.Call(name, args, reply)
 86 | 	if err == nil {
 87 | 		return true
 88 | 	}
 89 | 
 90 | 	fmt.Println(err)
 91 | 	return false
 92 | }
 93 | 
 94 | //
 95 | // the application wants paxos to start agreement on
 96 | // instance seq, with proposed value v.
 97 | // Start() returns right away; the application will
 98 | // call Status() to find out if/when agreement
 99 | // is reached.
100 | //
101 | func (px *Paxos) Start(seq int, v interface{}) {
102 | 	// code here.
103 | }
104 | 
105 | //
106 | // the application on this machine is done with
107 | // all instances <= seq.
108 | //
109 | // see the comments for Min() for more explanation.
110 | //
111 | func (px *Paxos) Done(seq int) {
112 | 	// code here.
113 | }
114 | 
115 | //
116 | // the application wants to know the
117 | // highest instance sequence known to
118 | // this peer.
119 | //
120 | func (px *Paxos) Max() int {
121 | 	// code here.
122 | 	return 0
123 | }
124 | 
125 | //
126 | // Min() should return one more than the minimum among z_i,
127 | // where z_i is the highest number ever passed
128 | // to Done() on peer i. A peers z_i is -1 if it has
129 | // never called Done().
130 | //
131 | // Paxos is required to have forgotten all information
132 | // about any instances it knows that are < Min().
133 | // The point is to free up memory in long-running
134 | // Paxos-based servers.
135 | //
136 | // Paxos peers need to exchange their highest Done()
137 | // arguments in order to implement Min(). These
138 | // exchanges can be piggybacked on ordinary Paxos
139 | // agreement protocol messages, so it is OK if one
140 | // peers Min does not reflect another Peers Done()
141 | // until after the next instance is agreed to.
142 | //
143 | // The fact that Min() is defined as a minimum over
144 | // *all* Paxos peers means that Min() cannot increase until
145 | // all peers have been heard from. So if a peer is dead
146 | // or unreachable, other peers Min()s will not increase
147 | // even if all reachable peers call Done. The reason for
148 | // this is that when the unreachable peer comes back to
149 | // life, it will need to catch up on instances that it
150 | // missed -- the other peers therefor cannot forget these
151 | // instances.
152 | //
153 | func (px *Paxos) Min() int {
154 | 	// You code here.
155 | 	return 0
156 | }
157 | 
158 | //
159 | // the application wants to know whether this
160 | // peer thinks an instance has been decided,
161 | // and if so what the agreed value is. Status()
162 | // should just inspect the local peer state;
163 | // it should not contact other Paxos peers.
164 | //
165 | func (px *Paxos) Status(seq int) (Fate, interface{}) {
166 | 	// code here.
167 | 	return Pending, nil
168 | }
169 | 
170 | //
171 | // tell the peer to shut itself down.
172 | // for testing.
173 | // please do not change these two functions.
174 | //
175 | func (px *Paxos) Kill() {
176 | 	atomic.StoreInt32(&px.dead, 1)
177 | 	if px.l != nil {
178 | 		px.l.Close()
179 | 	}
180 | }
181 | 
182 | //
183 | // has this peer been asked to shut down?
184 | //
185 | func (px *Paxos) isdead() bool {
186 | 	return atomic.LoadInt32(&px.dead) != 0
187 | }
188 | 
189 | // please do not change these two functions.
190 | func (px *Paxos) setunreliable(what bool) {
191 | 	if what {
192 | 		atomic.StoreInt32(&px.unreliable, 1)
193 | 	} else {
194 | 		atomic.StoreInt32(&px.unreliable, 0)
195 | 	}
196 | }
197 | 
198 | func (px *Paxos) isunreliable() bool {
199 | 	return atomic.LoadInt32(&px.unreliable) != 0
200 | }
201 | 
202 | //
203 | // the application wants to create a paxos peer.
204 | // the ports of all the paxos peers (including this one)
205 | // are in peers[]. this servers port is peers[me].
206 | //
207 | func Make(peers []string, me int, rpcs *rpc.Server) *Paxos {
208 | 	px := &Paxos{}
209 | 	px.peers = peers
210 | 	px.me = me
211 | 
212 | 	// initialization code here.
213 | 
214 | 	if rpcs != nil {
215 | 		// caller will create socket &c
216 | 		rpcs.Register(px)
217 | 	} else {
218 | 		rpcs = rpc.NewServer()
219 | 		rpcs.Register(px)
220 | 
221 | 		// prepare to receive connections from clients.
222 | 		// change "unix" to "tcp" to use over a network.
223 | 		os.Remove(peers[me]) // only needed for "unix"
224 | 		l, e := net.Listen("unix", peers[me])
225 | 		if e != nil {
226 | 			log.Fatal("listen error: ", e)
227 | 		}
228 | 		px.l = l
229 | 
230 | 		// please do not change any of the following code,
231 | 		// or do anything to subvert it.
232 | 
233 | 		// create a thread to accept RPC connections
234 | 		go func() {
235 | 			for px.isdead() == false {
236 | 				conn, err := px.l.Accept()
237 | 				if err == nil && px.isdead() == false {
238 | 					if px.isunreliable() && (rand.Int63()%1000) < 100 {
239 | 						// discard the request.
240 | 						conn.Close()
241 | 					} else if px.isunreliable() && (rand.Int63()%1000) < 200 {
242 | 						// process the request but force discard of reply.
243 | 						c1 := conn.(*net.UnixConn)
244 | 						f, _ := c1.File()
245 | 						err := syscall.Shutdown(syscall.Handle(int(f.Fd())), syscall.SHUT_WR)
246 | 						if err != nil {
247 | 							fmt.Printf("shutdown: %v\n", err)
248 | 						}
249 | 						atomic.AddInt32(&px.rpcCount, 1)
250 | 						go rpcs.ServeConn(conn)
251 | 					} else {
252 | 						atomic.AddInt32(&px.rpcCount, 1)
253 | 						go rpcs.ServeConn(conn)
254 | 					}
255 | 				} else if err == nil {
256 | 					conn.Close()
257 | 				}
258 | 				if err != nil && px.isdead() == false {
259 | 					fmt.Printf("Paxos(%v) accept: %v\n", me, err.Error())
260 | 				}
261 | 			}
262 | 		}()
263 | 	}
264 | 
265 | 	return px
266 | }
267 | 


--------------------------------------------------------------------------------
/paxos/test_test.go:
--------------------------------------------------------------------------------
  1 | package paxos
  2 | 
  3 | import "testing"
  4 | import "runtime"
  5 | import "strconv"
  6 | import "os"
  7 | import "time"
  8 | import "fmt"
  9 | import "math/rand"
 10 | import crand "crypto/rand"
 11 | import "encoding/base64"
 12 | import "sync/atomic"
 13 | 
 14 | func randstring(n int) string {
 15 | 	b := make([]byte, 2*n)
 16 | 	crand.Read(b)
 17 | 	s := base64.URLEncoding.EncodeToString(b)
 18 | 	return s[0:n]
 19 | }
 20 | 
 21 | func port(tag string, host int) string {
 22 | 	s := "/var/tmp/824-"
 23 | 	s += strconv.Itoa(os.Getuid()) + "/"
 24 | 	os.Mkdir(s, 0777)
 25 | 	s += "px-"
 26 | 	s += strconv.Itoa(os.Getpid()) + "-"
 27 | 	s += tag + "-"
 28 | 	s += strconv.Itoa(host)
 29 | 	return s
 30 | }
 31 | 
 32 | func ndecided(t *testing.T, pxa []*Paxos, seq int) int {
 33 | 	count := 0
 34 | 	var v interface{}
 35 | 	for i := 0; i < len(pxa); i++ {
 36 | 		if pxa[i] != nil {
 37 | 			decided, v1 := pxa[i].Status(seq)
 38 | 			if decided == Decided {
 39 | 				if count > 0 && v != v1 {
 40 | 					t.Fatalf("decided values do not match; seq=%v i=%v v=%v v1=%v",
 41 | 						seq, i, v, v1)
 42 | 				}
 43 | 				count++
 44 | 				v = v1
 45 | 			}
 46 | 		}
 47 | 	}
 48 | 	return count
 49 | }
 50 | 
 51 | func waitn(t *testing.T, pxa []*Paxos, seq int, wanted int) {
 52 | 	to := 10 * time.Millisecond
 53 | 	for iters := 0; iters < 30; iters++ {
 54 | 		if ndecided(t, pxa, seq) >= wanted {
 55 | 			break
 56 | 		}
 57 | 		time.Sleep(to)
 58 | 		if to < time.Second {
 59 | 			to *= 2
 60 | 		}
 61 | 	}
 62 | 	nd := ndecided(t, pxa, seq)
 63 | 	if nd < wanted {
 64 | 		t.Fatalf("too few decided; seq=%v ndecided=%v wanted=%v", seq, nd, wanted)
 65 | 	}
 66 | }
 67 | 
 68 | func waitmajority(t *testing.T, pxa []*Paxos, seq int) {
 69 | 	waitn(t, pxa, seq, (len(pxa)/2)+1)
 70 | }
 71 | 
 72 | func checkmax(t *testing.T, pxa []*Paxos, seq int, max int) {
 73 | 	time.Sleep(3 * time.Second)
 74 | 	nd := ndecided(t, pxa, seq)
 75 | 	if nd > max {
 76 | 		t.Fatalf("too many decided; seq=%v ndecided=%v max=%v", seq, nd, max)
 77 | 	}
 78 | }
 79 | 
 80 | func cleanup(pxa []*Paxos) {
 81 | 	for i := 0; i < len(pxa); i++ {
 82 | 		if pxa[i] != nil {
 83 | 			pxa[i].Kill()
 84 | 		}
 85 | 	}
 86 | }
 87 | 
 88 | func noTestSpeed(t *testing.T) {
 89 | 	runtime.GOMAXPROCS(4)
 90 | 
 91 | 	const npaxos = 3
 92 | 	var pxa []*Paxos = make([]*Paxos, npaxos)
 93 | 	var pxh []string = make([]string, npaxos)
 94 | 	defer cleanup(pxa)
 95 | 
 96 | 	for i := 0; i < npaxos; i++ {
 97 | 		pxh[i] = port("time", i)
 98 | 	}
 99 | 	for i := 0; i < npaxos; i++ {
100 | 		pxa[i] = Make(pxh, i, nil)
101 | 	}
102 | 
103 | 	t0 := time.Now()
104 | 
105 | 	for i := 0; i < 20; i++ {
106 | 		pxa[0].Start(i, "x")
107 | 		waitn(t, pxa, i, npaxos)
108 | 	}
109 | 
110 | 	d := time.Since(t0)
111 | 	fmt.Printf("20 agreements %v seconds\n", d.Seconds())
112 | }
113 | 
114 | func TestBasic(t *testing.T) {
115 | 	runtime.GOMAXPROCS(4)
116 | 
117 | 	const npaxos = 3
118 | 	var pxa []*Paxos = make([]*Paxos, npaxos)
119 | 	var pxh []string = make([]string, npaxos)
120 | 	defer cleanup(pxa)
121 | 
122 | 	for i := 0; i < npaxos; i++ {
123 | 		pxh[i] = port("basic", i)
124 | 	}
125 | 	for i := 0; i < npaxos; i++ {
126 | 		pxa[i] = Make(pxh, i, nil)
127 | 	}
128 | 
129 | 	fmt.Printf("Test: Single proposer ...\n")
130 | 
131 | 	pxa[0].Start(0, "hello")
132 | 	waitn(t, pxa, 0, npaxos)
133 | 
134 | 	fmt.Printf("  ... Passed\n")
135 | 
136 | 	fmt.Printf("Test: Many proposers, same value ...\n")
137 | 
138 | 	for i := 0; i < npaxos; i++ {
139 | 		pxa[i].Start(1, 77)
140 | 	}
141 | 	waitn(t, pxa, 1, npaxos)
142 | 
143 | 	fmt.Printf("  ... Passed\n")
144 | 
145 | 	fmt.Printf("Test: Many proposers, different values ...\n")
146 | 
147 | 	pxa[0].Start(2, 100)
148 | 	pxa[1].Start(2, 101)
149 | 	pxa[2].Start(2, 102)
150 | 	waitn(t, pxa, 2, npaxos)
151 | 
152 | 	fmt.Printf("  ... Passed\n")
153 | 
154 | 	fmt.Printf("Test: Out-of-order instances ...\n")
155 | 
156 | 	pxa[0].Start(7, 700)
157 | 	pxa[0].Start(6, 600)
158 | 	pxa[1].Start(5, 500)
159 | 	waitn(t, pxa, 7, npaxos)
160 | 	pxa[0].Start(4, 400)
161 | 	pxa[1].Start(3, 300)
162 | 	waitn(t, pxa, 6, npaxos)
163 | 	waitn(t, pxa, 5, npaxos)
164 | 	waitn(t, pxa, 4, npaxos)
165 | 	waitn(t, pxa, 3, npaxos)
166 | 
167 | 	if pxa[0].Max() != 7 {
168 | 		t.Fatalf("wrong Max()")
169 | 	}
170 | 
171 | 	fmt.Printf("  ... Passed\n")
172 | }
173 | 
174 | func TestDeaf(t *testing.T) {
175 | 	runtime.GOMAXPROCS(4)
176 | 
177 | 	const npaxos = 5
178 | 	var pxa []*Paxos = make([]*Paxos, npaxos)
179 | 	var pxh []string = make([]string, npaxos)
180 | 	defer cleanup(pxa)
181 | 
182 | 	for i := 0; i < npaxos; i++ {
183 | 		pxh[i] = port("deaf", i)
184 | 	}
185 | 	for i := 0; i < npaxos; i++ {
186 | 		pxa[i] = Make(pxh, i, nil)
187 | 	}
188 | 
189 | 	fmt.Printf("Test: Deaf proposer ...\n")
190 | 
191 | 	pxa[0].Start(0, "hello")
192 | 	waitn(t, pxa, 0, npaxos)
193 | 
194 | 	os.Remove(pxh[0])
195 | 	os.Remove(pxh[npaxos-1])
196 | 
197 | 	pxa[1].Start(1, "goodbye")
198 | 	waitmajority(t, pxa, 1)
199 | 	time.Sleep(1 * time.Second)
200 | 	if ndecided(t, pxa, 1) != npaxos-2 {
201 | 		t.Fatalf("a deaf peer heard about a decision")
202 | 	}
203 | 
204 | 	pxa[0].Start(1, "xxx")
205 | 	waitn(t, pxa, 1, npaxos-1)
206 | 	time.Sleep(1 * time.Second)
207 | 	if ndecided(t, pxa, 1) != npaxos-1 {
208 | 		t.Fatalf("a deaf peer heard about a decision")
209 | 	}
210 | 
211 | 	pxa[npaxos-1].Start(1, "yyy")
212 | 	waitn(t, pxa, 1, npaxos)
213 | 
214 | 	fmt.Printf("  ... Passed\n")
215 | }
216 | 
217 | func TestForget(t *testing.T) {
218 | 	runtime.GOMAXPROCS(4)
219 | 
220 | 	const npaxos = 6
221 | 	var pxa []*Paxos = make([]*Paxos, npaxos)
222 | 	var pxh []string = make([]string, npaxos)
223 | 	defer cleanup(pxa)
224 | 
225 | 	for i := 0; i < npaxos; i++ {
226 | 		pxh[i] = port("gc", i)
227 | 	}
228 | 	for i := 0; i < npaxos; i++ {
229 | 		pxa[i] = Make(pxh, i, nil)
230 | 	}
231 | 
232 | 	fmt.Printf("Test: Forgetting ...\n")
233 | 
234 | 	// initial Min() correct?
235 | 	for i := 0; i < npaxos; i++ {
236 | 		m := pxa[i].Min()
237 | 		if m > 0 {
238 | 			t.Fatalf("wrong initial Min() %v", m)
239 | 		}
240 | 	}
241 | 
242 | 	pxa[0].Start(0, "00")
243 | 	pxa[1].Start(1, "11")
244 | 	pxa[2].Start(2, "22")
245 | 	pxa[0].Start(6, "66")
246 | 	pxa[1].Start(7, "77")
247 | 
248 | 	waitn(t, pxa, 0, npaxos)
249 | 
250 | 	// Min() correct?
251 | 	for i := 0; i < npaxos; i++ {
252 | 		m := pxa[i].Min()
253 | 		if m != 0 {
254 | 			t.Fatalf("wrong Min() %v; expected 0", m)
255 | 		}
256 | 	}
257 | 
258 | 	waitn(t, pxa, 1, npaxos)
259 | 
260 | 	// Min() correct?
261 | 	for i := 0; i < npaxos; i++ {
262 | 		m := pxa[i].Min()
263 | 		if m != 0 {
264 | 			t.Fatalf("wrong Min() %v; expected 0", m)
265 | 		}
266 | 	}
267 | 
268 | 	// everyone Done() -> Min() changes?
269 | 	for i := 0; i < npaxos; i++ {
270 | 		pxa[i].Done(0)
271 | 	}
272 | 	for i := 1; i < npaxos; i++ {
273 | 		pxa[i].Done(1)
274 | 	}
275 | 	for i := 0; i < npaxos; i++ {
276 | 		pxa[i].Start(8+i, "xx")
277 | 	}
278 | 	allok := false
279 | 	for iters := 0; iters < 12; iters++ {
280 | 		allok = true
281 | 		for i := 0; i < npaxos; i++ {
282 | 			s := pxa[i].Min()
283 | 			if s != 1 {
284 | 				allok = false
285 | 			}
286 | 		}
287 | 		if allok {
288 | 			break
289 | 		}
290 | 		time.Sleep(1 * time.Second)
291 | 	}
292 | 	if allok != true {
293 | 		t.Fatalf("Min() did not advance after Done()")
294 | 	}
295 | 
296 | 	fmt.Printf("  ... Passed\n")
297 | }
298 | 
299 | func TestManyForget(t *testing.T) {
300 | 	runtime.GOMAXPROCS(4)
301 | 
302 | 	const npaxos = 3
303 | 	var pxa []*Paxos = make([]*Paxos, npaxos)
304 | 	var pxh []string = make([]string, npaxos)
305 | 	defer cleanup(pxa)
306 | 
307 | 	for i := 0; i < npaxos; i++ {
308 | 		pxh[i] = port("manygc", i)
309 | 	}
310 | 	for i := 0; i < npaxos; i++ {
311 | 		pxa[i] = Make(pxh, i, nil)
312 | 		pxa[i].setunreliable(true)
313 | 	}
314 | 
315 | 	fmt.Printf("Test: Lots of forgetting ...\n")
316 | 
317 | 	const maxseq = 20
318 | 
319 | 	go func() {
320 | 		na := rand.Perm(maxseq)
321 | 		for i := 0; i < len(na); i++ {
322 | 			seq := na[i]
323 | 			j := (rand.Int() % npaxos)
324 | 			v := rand.Int()
325 | 			pxa[j].Start(seq, v)
326 | 			runtime.Gosched()
327 | 		}
328 | 	}()
329 | 
330 | 	done := make(chan bool)
331 | 	go func() {
332 | 		for {
333 | 			select {
334 | 			case <-done:
335 | 				return
336 | 			default:
337 | 			}
338 | 			seq := (rand.Int() % maxseq)
339 | 			i := (rand.Int() % npaxos)
340 | 			if seq >= pxa[i].Min() {
341 | 				decided, _ := pxa[i].Status(seq)
342 | 				if decided == Decided {
343 | 					pxa[i].Done(seq)
344 | 				}
345 | 			}
346 | 			runtime.Gosched()
347 | 		}
348 | 	}()
349 | 
350 | 	time.Sleep(5 * time.Second)
351 | 	done <- true
352 | 	for i := 0; i < npaxos; i++ {
353 | 		pxa[i].setunreliable(false)
354 | 	}
355 | 	time.Sleep(2 * time.Second)
356 | 
357 | 	for seq := 0; seq < maxseq; seq++ {
358 | 		for i := 0; i < npaxos; i++ {
359 | 			if seq >= pxa[i].Min() {
360 | 				pxa[i].Status(seq)
361 | 			}
362 | 		}
363 | 	}
364 | 
365 | 	fmt.Printf("  ... Passed\n")
366 | }
367 | 
368 | //
369 | // does paxos forgetting actually free the memory?
370 | //
371 | func TestForgetMem(t *testing.T) {
372 | 	runtime.GOMAXPROCS(4)
373 | 
374 | 	fmt.Printf("Test: Paxos frees forgotten instance memory ...\n")
375 | 
376 | 	const npaxos = 3
377 | 	var pxa []*Paxos = make([]*Paxos, npaxos)
378 | 	var pxh []string = make([]string, npaxos)
379 | 	defer cleanup(pxa)
380 | 
381 | 	for i := 0; i < npaxos; i++ {
382 | 		pxh[i] = port("gcmem", i)
383 | 	}
384 | 	for i := 0; i < npaxos; i++ {
385 | 		pxa[i] = Make(pxh, i, nil)
386 | 	}
387 | 
388 | 	pxa[0].Start(0, "x")
389 | 	waitn(t, pxa, 0, npaxos)
390 | 
391 | 	runtime.GC()
392 | 	var m0 runtime.MemStats
393 | 	runtime.ReadMemStats(&m0)
394 | 	// m0.Alloc about a megabyte
395 | 
396 | 	for i := 1; i <= 10; i++ {
397 | 		big := make([]byte, 1000000)
398 | 		for j := 0; j < len(big); j++ {
399 | 			big[j] = byte('a' + rand.Int()%26)
400 | 		}
401 | 		pxa[0].Start(i, string(big))
402 | 		waitn(t, pxa, i, npaxos)
403 | 	}
404 | 
405 | 	runtime.GC()
406 | 	var m1 runtime.MemStats
407 | 	runtime.ReadMemStats(&m1)
408 | 	// m1.Alloc about 90 megabytes
409 | 
410 | 	for i := 0; i < npaxos; i++ {
411 | 		pxa[i].Done(10)
412 | 	}
413 | 	for i := 0; i < npaxos; i++ {
414 | 		pxa[i].Start(11+i, "z")
415 | 	}
416 | 	time.Sleep(3 * time.Second)
417 | 	for i := 0; i < npaxos; i++ {
418 | 		if pxa[i].Min() != 11 {
419 | 			t.Fatalf("expected Min() %v, got %v\n", 11, pxa[i].Min())
420 | 		}
421 | 	}
422 | 
423 | 	runtime.GC()
424 | 	var m2 runtime.MemStats
425 | 	runtime.ReadMemStats(&m2)
426 | 	// m2.Alloc about 10 megabytes
427 | 
428 | 	if m2.Alloc > (m1.Alloc / 2) {
429 | 		t.Fatalf("memory use did not shrink enough")
430 | 	}
431 | 
432 | 	again := make([]string, 10)
433 | 	for seq := 0; seq < npaxos && seq < 10; seq++ {
434 | 		again[seq] = randstring(20)
435 | 		for i := 0; i < npaxos; i++ {
436 | 			fate, _ := pxa[i].Status(seq)
437 | 			if fate != Forgotten {
438 | 				t.Fatalf("seq %d < Min() %d but not Forgotten", seq, pxa[i].Min())
439 | 			}
440 | 			pxa[i].Start(seq, again[seq])
441 | 		}
442 | 	}
443 | 	time.Sleep(1 * time.Second)
444 | 	for seq := 0; seq < npaxos && seq < 10; seq++ {
445 | 		for i := 0; i < npaxos; i++ {
446 | 			fate, v := pxa[i].Status(seq)
447 | 			if fate != Forgotten || v == again[seq] {
448 | 				t.Fatalf("seq %d < Min() %d but not Forgotten", seq, pxa[i].Min())
449 | 			}
450 | 		}
451 | 	}
452 | 
453 | 	fmt.Printf("  ... Passed\n")
454 | }
455 | 
456 | //
457 | // does Max() work after Done()s?
458 | //
459 | func TestDoneMax(t *testing.T) {
460 | 	runtime.GOMAXPROCS(4)
461 | 
462 | 	fmt.Printf("Test: Paxos Max() after Done()s ...\n")
463 | 
464 | 	const npaxos = 3
465 | 	var pxa []*Paxos = make([]*Paxos, npaxos)
466 | 	var pxh []string = make([]string, npaxos)
467 | 	defer cleanup(pxa)
468 | 
469 | 	for i := 0; i < npaxos; i++ {
470 | 		pxh[i] = port("donemax", i)
471 | 	}
472 | 	for i := 0; i < npaxos; i++ {
473 | 		pxa[i] = Make(pxh, i, nil)
474 | 	}
475 | 
476 | 	pxa[0].Start(0, "x")
477 | 	waitn(t, pxa, 0, npaxos)
478 | 
479 | 	for i := 1; i <= 10; i++ {
480 | 		pxa[0].Start(i, "y")
481 | 		waitn(t, pxa, i, npaxos)
482 | 	}
483 | 
484 | 	for i := 0; i < npaxos; i++ {
485 | 		pxa[i].Done(10)
486 | 	}
487 | 
488 | 	// Propagate messages so everyone knows about Done(10)
489 | 	for i := 0; i < npaxos; i++ {
490 | 		pxa[i].Start(10, "z")
491 | 	}
492 | 	time.Sleep(2 * time.Second)
493 | 	for i := 0; i < npaxos; i++ {
494 | 		mx := pxa[i].Max()
495 | 		if mx != 10 {
496 | 			t.Fatalf("Max() did not return correct result %d after calling Done(); returned %d", 10, mx)
497 | 		}
498 | 	}
499 | 
500 | 	fmt.Printf("  ... Passed\n")
501 | }
502 | 
503 | func TestRPCCount(t *testing.T) {
504 | 	runtime.GOMAXPROCS(4)
505 | 
506 | 	fmt.Printf("Test: RPC counts aren't too high ...\n")
507 | 
508 | 	const npaxos = 3
509 | 	var pxa []*Paxos = make([]*Paxos, npaxos)
510 | 	var pxh []string = make([]string, npaxos)
511 | 	defer cleanup(pxa)
512 | 
513 | 	for i := 0; i < npaxos; i++ {
514 | 		pxh[i] = port("count", i)
515 | 	}
516 | 	for i := 0; i < npaxos; i++ {
517 | 		pxa[i] = Make(pxh, i, nil)
518 | 	}
519 | 
520 | 	ninst1 := 5
521 | 	seq := 0
522 | 	for i := 0; i < ninst1; i++ {
523 | 		pxa[0].Start(seq, "x")
524 | 		waitn(t, pxa, seq, npaxos)
525 | 		seq++
526 | 	}
527 | 
528 | 	time.Sleep(2 * time.Second)
529 | 
530 | 	total1 := int32(0)
531 | 	for j := 0; j < npaxos; j++ {
532 | 		total1 += atomic.LoadInt32(&pxa[j].rpcCount)
533 | 	}
534 | 
535 | 	// per agreement:
536 | 	// 3 prepares
537 | 	// 3 accepts
538 | 	// 3 decides
539 | 	expected1 := int32(ninst1 * npaxos * npaxos)
540 | 	if total1 > expected1 {
541 | 		t.Fatalf("too many RPCs for serial Start()s; %v instances, got %v, expected %v",
542 | 			ninst1, total1, expected1)
543 | 	}
544 | 
545 | 	ninst2 := 5
546 | 	for i := 0; i < ninst2; i++ {
547 | 		for j := 0; j < npaxos; j++ {
548 | 			go pxa[j].Start(seq, j+(i*10))
549 | 		}
550 | 		waitn(t, pxa, seq, npaxos)
551 | 		seq++
552 | 	}
553 | 
554 | 	time.Sleep(2 * time.Second)
555 | 
556 | 	total2 := int32(0)
557 | 	for j := 0; j < npaxos; j++ {
558 | 		total2 += atomic.LoadInt32(&pxa[j].rpcCount)
559 | 	}
560 | 	total2 -= total1
561 | 
562 | 	// worst case per agreement:
563 | 	// Proposer 1: 3 prep, 3 acc, 3 decides.
564 | 	// Proposer 2: 3 prep, 3 acc, 3 prep, 3 acc, 3 decides.
565 | 	// Proposer 3: 3 prep, 3 acc, 3 prep, 3 acc, 3 prep, 3 acc, 3 decides.
566 | 	expected2 := int32(ninst2 * npaxos * 15)
567 | 	if total2 > expected2 {
568 | 		t.Fatalf("too many RPCs for concurrent Start()s; %v instances, got %v, expected %v",
569 | 			ninst2, total2, expected2)
570 | 	}
571 | 
572 | 	fmt.Printf("  ... Passed\n")
573 | }
574 | 
575 | //
576 | // many agreements (without failures)
577 | //
578 | func TestMany(t *testing.T) {
579 | 	runtime.GOMAXPROCS(4)
580 | 
581 | 	fmt.Printf("Test: Many instances ...\n")
582 | 
583 | 	const npaxos = 3
584 | 	var pxa []*Paxos = make([]*Paxos, npaxos)
585 | 	var pxh []string = make([]string, npaxos)
586 | 	defer cleanup(pxa)
587 | 
588 | 	for i := 0; i < npaxos; i++ {
589 | 		pxh[i] = port("many", i)
590 | 	}
591 | 	for i := 0; i < npaxos; i++ {
592 | 		pxa[i] = Make(pxh, i, nil)
593 | 		pxa[i].Start(0, 0)
594 | 	}
595 | 
596 | 	const ninst = 50
597 | 	for seq := 1; seq < ninst; seq++ {
598 | 		// only 5 active instances, to limit the
599 | 		// number of file descriptors.
600 | 		for seq >= 5 && ndecided(t, pxa, seq-5) < npaxos {
601 | 			time.Sleep(20 * time.Millisecond)
602 | 		}
603 | 		for i := 0; i < npaxos; i++ {
604 | 			pxa[i].Start(seq, (seq*10)+i)
605 | 		}
606 | 	}
607 | 
608 | 	for {
609 | 		done := true
610 | 		for seq := 1; seq < ninst; seq++ {
611 | 			if ndecided(t, pxa, seq) < npaxos {
612 | 				done = false
613 | 			}
614 | 		}
615 | 		if done {
616 | 			break
617 | 		}
618 | 		time.Sleep(100 * time.Millisecond)
619 | 	}
620 | 
621 | 	fmt.Printf("  ... Passed\n")
622 | }
623 | 
624 | //
625 | // a peer starts up, with proposal, after others decide.
626 | // then another peer starts, without a proposal.
627 | //
628 | func TestOld(t *testing.T) {
629 | 	runtime.GOMAXPROCS(4)
630 | 
631 | 	fmt.Printf("Test: Minority proposal ignored ...\n")
632 | 
633 | 	const npaxos = 5
634 | 	var pxa []*Paxos = make([]*Paxos, npaxos)
635 | 	var pxh []string = make([]string, npaxos)
636 | 	defer cleanup(pxa)
637 | 
638 | 	for i := 0; i < npaxos; i++ {
639 | 		pxh[i] = port("old", i)
640 | 	}
641 | 
642 | 	pxa[1] = Make(pxh, 1, nil)
643 | 	pxa[2] = Make(pxh, 2, nil)
644 | 	pxa[3] = Make(pxh, 3, nil)
645 | 	pxa[1].Start(1, 111)
646 | 
647 | 	waitmajority(t, pxa, 1)
648 | 
649 | 	pxa[0] = Make(pxh, 0, nil)
650 | 	pxa[0].Start(1, 222)
651 | 
652 | 	waitn(t, pxa, 1, 4)
653 | 
654 | 	if false {
655 | 		pxa[4] = Make(pxh, 4, nil)
656 | 		waitn(t, pxa, 1, npaxos)
657 | 	}
658 | 
659 | 	fmt.Printf("  ... Passed\n")
660 | }
661 | 
662 | //
663 | // many agreements, with unreliable RPC
664 | //
665 | func TestManyUnreliable(t *testing.T) {
666 | 	runtime.GOMAXPROCS(4)
667 | 
668 | 	fmt.Printf("Test: Many instances, unreliable RPC ...\n")
669 | 
670 | 	const npaxos = 3
671 | 	var pxa []*Paxos = make([]*Paxos, npaxos)
672 | 	var pxh []string = make([]string, npaxos)
673 | 	defer cleanup(pxa)
674 | 
675 | 	for i := 0; i < npaxos; i++ {
676 | 		pxh[i] = port("manyun", i)
677 | 	}
678 | 	for i := 0; i < npaxos; i++ {
679 | 		pxa[i] = Make(pxh, i, nil)
680 | 		pxa[i].setunreliable(true)
681 | 		pxa[i].Start(0, 0)
682 | 	}
683 | 
684 | 	const ninst = 50
685 | 	for seq := 1; seq < ninst; seq++ {
686 | 		// only 3 active instances, to limit the
687 | 		// number of file descriptors.
688 | 		for seq >= 3 && ndecided(t, pxa, seq-3) < npaxos {
689 | 			time.Sleep(20 * time.Millisecond)
690 | 		}
691 | 		for i := 0; i < npaxos; i++ {
692 | 			pxa[i].Start(seq, (seq*10)+i)
693 | 		}
694 | 	}
695 | 
696 | 	for {
697 | 		done := true
698 | 		for seq := 1; seq < ninst; seq++ {
699 | 			if ndecided(t, pxa, seq) < npaxos {
700 | 				done = false
701 | 			}
702 | 		}
703 | 		if done {
704 | 			break
705 | 		}
706 | 		time.Sleep(100 * time.Millisecond)
707 | 	}
708 | 
709 | 	fmt.Printf("  ... Passed\n")
710 | }
711 | 
712 | func pp(tag string, src int, dst int) string {
713 | 	s := "/var/tmp/824-"
714 | 	s += strconv.Itoa(os.Getuid()) + "/"
715 | 	s += "px-" + tag + "-"
716 | 	s += strconv.Itoa(os.Getpid()) + "-"
717 | 	s += strconv.Itoa(src) + "-"
718 | 	s += strconv.Itoa(dst)
719 | 	return s
720 | }
721 | 
722 | func cleanpp(tag string, n int) {
723 | 	for i := 0; i < n; i++ {
724 | 		for j := 0; j < n; j++ {
725 | 			ij := pp(tag, i, j)
726 | 			os.Remove(ij)
727 | 		}
728 | 	}
729 | }
730 | 
731 | func part(t *testing.T, tag string, npaxos int, p1 []int, p2 []int, p3 []int) {
732 | 	cleanpp(tag, npaxos)
733 | 
734 | 	pa := [][]int{p1, p2, p3}
735 | 	for pi := 0; pi < len(pa); pi++ {
736 | 		p := pa[pi]
737 | 		for i := 0; i < len(p); i++ {
738 | 			for j := 0; j < len(p); j++ {
739 | 				ij := pp(tag, p[i], p[j])
740 | 				pj := port(tag, p[j])
741 | 				err := os.Link(pj, ij)
742 | 				if err != nil {
743 | 					// one reason this link can fail is if the
744 | 					// corresponding Paxos peer has prematurely quit and
745 | 					// deleted its socket file (e.g., called px.Kill()).
746 | 					t.Fatalf("os.Link(%v, %v): %v\n", pj, ij, err)
747 | 				}
748 | 			}
749 | 		}
750 | 	}
751 | }
752 | 
753 | func TestPartition(t *testing.T) {
754 | 	runtime.GOMAXPROCS(4)
755 | 
756 | 	tag := "partition"
757 | 	const npaxos = 5
758 | 	var pxa []*Paxos = make([]*Paxos, npaxos)
759 | 	defer cleanup(pxa)
760 | 	defer cleanpp(tag, npaxos)
761 | 
762 | 	for i := 0; i < npaxos; i++ {
763 | 		var pxh []string = make([]string, npaxos)
764 | 		for j := 0; j < npaxos; j++ {
765 | 			if j == i {
766 | 				pxh[j] = port(tag, i)
767 | 			} else {
768 | 				pxh[j] = pp(tag, i, j)
769 | 			}
770 | 		}
771 | 		pxa[i] = Make(pxh, i, nil)
772 | 	}
773 | 	defer part(t, tag, npaxos, []int{}, []int{}, []int{})
774 | 
775 | 	seq := 0
776 | 
777 | 	fmt.Printf("Test: No decision if partitioned ...\n")
778 | 
779 | 	part(t, tag, npaxos, []int{0, 2}, []int{1, 3}, []int{4})
780 | 	pxa[1].Start(seq, 111)
781 | 	checkmax(t, pxa, seq, 0)
782 | 
783 | 	fmt.Printf("  ... Passed\n")
784 | 
785 | 	fmt.Printf("Test: Decision in majority partition ...\n")
786 | 
787 | 	part(t, tag, npaxos, []int{0}, []int{1, 2, 3}, []int{4})
788 | 	time.Sleep(2 * time.Second)
789 | 	waitmajority(t, pxa, seq)
790 | 
791 | 	fmt.Printf("  ... Passed\n")
792 | 
793 | 	fmt.Printf("Test: All agree after full heal ...\n")
794 | 
795 | 	pxa[0].Start(seq, 1000) // poke them
796 | 	pxa[4].Start(seq, 1004)
797 | 	part(t, tag, npaxos, []int{0, 1, 2, 3, 4}, []int{}, []int{})
798 | 
799 | 	waitn(t, pxa, seq, npaxos)
800 | 
801 | 	fmt.Printf("  ... Passed\n")
802 | 
803 | 	fmt.Printf("Test: One peer switches partitions ...\n")
804 | 
805 | 	for iters := 0; iters < 20; iters++ {
806 | 		seq++
807 | 
808 | 		part(t, tag, npaxos, []int{0, 1, 2}, []int{3, 4}, []int{})
809 | 		pxa[0].Start(seq, seq*10)
810 | 		pxa[3].Start(seq, (seq*10)+1)
811 | 		waitmajority(t, pxa, seq)
812 | 		if ndecided(t, pxa, seq) > 3 {
813 | 			t.Fatalf("too many decided")
814 | 		}
815 | 
816 | 		part(t, tag, npaxos, []int{0, 1}, []int{2, 3, 4}, []int{})
817 | 		waitn(t, pxa, seq, npaxos)
818 | 	}
819 | 
820 | 	fmt.Printf("  ... Passed\n")
821 | 
822 | 	fmt.Printf("Test: One peer switches partitions, unreliable ...\n")
823 | 
824 | 	for iters := 0; iters < 20; iters++ {
825 | 		seq++
826 | 
827 | 		for i := 0; i < npaxos; i++ {
828 | 			pxa[i].setunreliable(true)
829 | 		}
830 | 
831 | 		part(t, tag, npaxos, []int{0, 1, 2}, []int{3, 4}, []int{})
832 | 		for i := 0; i < npaxos; i++ {
833 | 			pxa[i].Start(seq, (seq*10)+i)
834 | 		}
835 | 		waitn(t, pxa, seq, 3)
836 | 		if ndecided(t, pxa, seq) > 3 {
837 | 			t.Fatalf("too many decided")
838 | 		}
839 | 
840 | 		part(t, tag, npaxos, []int{0, 1}, []int{2, 3, 4}, []int{})
841 | 
842 | 		for i := 0; i < npaxos; i++ {
843 | 			pxa[i].setunreliable(false)
844 | 		}
845 | 
846 | 		waitn(t, pxa, seq, 5)
847 | 	}
848 | 
849 | 	fmt.Printf("  ... Passed\n")
850 | }
851 | 
852 | func TestLots(t *testing.T) {
853 | 	runtime.GOMAXPROCS(4)
854 | 
855 | 	fmt.Printf("Test: Many requests, changing partitions ...\n")
856 | 
857 | 	tag := "lots"
858 | 	const npaxos = 5
859 | 	var pxa []*Paxos = make([]*Paxos, npaxos)
860 | 	defer cleanup(pxa)
861 | 	defer cleanpp(tag, npaxos)
862 | 
863 | 	for i := 0; i < npaxos; i++ {
864 | 		var pxh []string = make([]string, npaxos)
865 | 		for j := 0; j < npaxos; j++ {
866 | 			if j == i {
867 | 				pxh[j] = port(tag, i)
868 | 			} else {
869 | 				pxh[j] = pp(tag, i, j)
870 | 			}
871 | 		}
872 | 		pxa[i] = Make(pxh, i, nil)
873 | 		pxa[i].setunreliable(true)
874 | 	}
875 | 	defer part(t, tag, npaxos, []int{}, []int{}, []int{})
876 | 
877 | 	done := int32(0)
878 | 
879 | 	// re-partition periodically
880 | 	ch1 := make(chan bool)
881 | 	go func() {
882 | 		defer func() { ch1 <- true }()
883 | 		for atomic.LoadInt32(&done) == 0 {
884 | 			var a [npaxos]int
885 | 			for i := 0; i < npaxos; i++ {
886 | 				a[i] = (rand.Int() % 3)
887 | 			}
888 | 			pa := make([][]int, 3)
889 | 			for i := 0; i < 3; i++ {
890 | 				pa[i] = make([]int, 0)
891 | 				for j := 0; j < npaxos; j++ {
892 | 					if a[j] == i {
893 | 						pa[i] = append(pa[i], j)
894 | 					}
895 | 				}
896 | 			}
897 | 			part(t, tag, npaxos, pa[0], pa[1], pa[2])
898 | 			time.Sleep(time.Duration(rand.Int63()%200) * time.Millisecond)
899 | 		}
900 | 	}()
901 | 
902 | 	seq := int32(0)
903 | 
904 | 	// periodically start a new instance
905 | 	ch2 := make(chan bool)
906 | 	go func() {
907 | 		defer func() { ch2 <- true }()
908 | 		for atomic.LoadInt32(&done) == 0 {
909 | 			// how many instances are in progress?
910 | 			nd := 0
911 | 			sq := int(atomic.LoadInt32(&seq))
912 | 			for i := 0; i < sq; i++ {
913 | 				if ndecided(t, pxa, i) == npaxos {
914 | 					nd++
915 | 				}
916 | 			}
917 | 			if sq-nd < 10 {
918 | 				for i := 0; i < npaxos; i++ {
919 | 					pxa[i].Start(sq, rand.Int()%10)
920 | 				}
921 | 				atomic.AddInt32(&seq, 1)
922 | 			}
923 | 			time.Sleep(time.Duration(rand.Int63()%300) * time.Millisecond)
924 | 		}
925 | 	}()
926 | 
927 | 	// periodically check that decisions are consistent
928 | 	ch3 := make(chan bool)
929 | 	go func() {
930 | 		defer func() { ch3 <- true }()
931 | 		for atomic.LoadInt32(&done) == 0 {
932 | 			for i := 0; i < int(atomic.LoadInt32(&seq)); i++ {
933 | 				ndecided(t, pxa, i)
934 | 			}
935 | 			time.Sleep(time.Duration(rand.Int63()%300) * time.Millisecond)
936 | 		}
937 | 	}()
938 | 
939 | 	time.Sleep(20 * time.Second)
940 | 	atomic.StoreInt32(&done, 1)
941 | 	<-ch1
942 | 	<-ch2
943 | 	<-ch3
944 | 
945 | 	// repair, then check that all instances decided.
946 | 	for i := 0; i < npaxos; i++ {
947 | 		pxa[i].setunreliable(false)
948 | 	}
949 | 	part(t, tag, npaxos, []int{0, 1, 2, 3, 4}, []int{}, []int{})
950 | 	time.Sleep(5 * time.Second)
951 | 
952 | 	for i := 0; i < int(atomic.LoadInt32(&seq)); i++ {
953 | 		waitmajority(t, pxa, i)
954 | 	}
955 | 
956 | 	fmt.Printf("  ... Passed\n")
957 | }
958 | 


--------------------------------------------------------------------------------
/pbservice/client.go:
--------------------------------------------------------------------------------
  1 | package pbservice
  2 | 
  3 | import "viewservice"
  4 | import "net/rpc"
  5 | import "fmt"
  6 | 
  7 | import "crypto/rand"
  8 | import "math/big"
  9 | 
 10 | type Clerk struct {
 11 | 	vs *viewservice.Clerk
 12 | 	// declarations here
 13 | }
 14 | 
 15 | // this may come in handy.
 16 | func nrand() int64 {
 17 | 	max := big.NewInt(int64(1) << 62)
 18 | 	bigx, _ := rand.Int(rand.Reader, max)
 19 | 	x := bigx.Int64()
 20 | 	return x
 21 | }
 22 | 
 23 | func MakeClerk(vshost string, me string) *Clerk {
 24 | 	ck := new(Clerk)
 25 | 	ck.vs = viewservice.MakeClerk(me, vshost)
 26 | 	// ck.* initializations here
 27 | 
 28 | 	return ck
 29 | }
 30 | 
 31 | //
 32 | // call() sends an RPC to the rpcname handler on server srv
 33 | // with arguments args, waits for the reply, and leaves the
 34 | // reply in reply. the reply argument should be a pointer
 35 | // to a reply structure.
 36 | //
 37 | // the return value is true if the server responded, and false
 38 | // if call() was not able to contact the server. in particular,
 39 | // the reply's contents are only valid if call() returned true.
 40 | //
 41 | // you should assume that call() will return an
 42 | // error after a while if the server is dead.
 43 | // don't provide own time-out mechanism.
 44 | //
 45 | // please use call() to send all RPCs, in client.go and server.go.
 46 | // please don't change this function.
 47 | //
 48 | func call(srv string, rpcname string,
 49 | 	args interface{}, reply interface{}) bool {
 50 | 	c, errx := rpc.Dial("unix", srv)
 51 | 	if errx != nil {
 52 | 		return false
 53 | 	}
 54 | 	defer c.Close()
 55 | 
 56 | 	err := c.Call(rpcname, args, reply)
 57 | 	if err == nil {
 58 | 		return true
 59 | 	}
 60 | 
 61 | 	fmt.Println(err)
 62 | 	return false
 63 | }
 64 | 
 65 | //
 66 | // fetch a key's value from the current primary;
 67 | // if they key has never been set, return "".
 68 | // Get() must keep trying until it either the
 69 | // primary replies with the value or the primary
 70 | // says the key doesn't exist (has never been Put().
 71 | //
 72 | func (ck *Clerk) Get(key string) string {
 73 | 
 74 | 	// code here.
 75 | 
 76 | 	return "???"
 77 | }
 78 | 
 79 | //
 80 | // send a Put or Append RPC
 81 | //
 82 | func (ck *Clerk) PutAppend(key string, value string, op string) {
 83 | 
 84 | 	// code here.
 85 | }
 86 | 
 87 | //
 88 | // tell the primary to update key's value.
 89 | // must keep trying until it succeeds.
 90 | //
 91 | func (ck *Clerk) Put(key string, value string) {
 92 | 	ck.PutAppend(key, value, "Put")
 93 | }
 94 | 
 95 | //
 96 | // tell the primary to append to key's value.
 97 | // must keep trying until it succeeds.
 98 | //
 99 | func (ck *Clerk) Append(key string, value string) {
100 | 	ck.PutAppend(key, value, "Append")
101 | }
102 | 


--------------------------------------------------------------------------------
/pbservice/common.go:
--------------------------------------------------------------------------------
 1 | package pbservice
 2 | 
 3 | const (
 4 | 	OK             = "OK"
 5 | 	ErrNoKey       = "ErrNoKey"
 6 | 	ErrWrongServer = "ErrWrongServer"
 7 | )
 8 | 
 9 | type Err string
10 | 
11 | // Put or Append
12 | type PutAppendArgs struct {
13 | 	Key   string
14 | 	Value string
15 | 	// You'll have to add definitions here.
16 | 
17 | 	// Field names must start with capital letters,
18 | 	// otherwise RPC will break.
19 | }
20 | 
21 | type PutAppendReply struct {
22 | 	Err Err
23 | }
24 | 
25 | type GetArgs struct {
26 | 	Key string
27 | 	//  have to add definitions here.
28 | }
29 | 
30 | type GetReply struct {
31 | 	Err   Err
32 | 	Value string
33 | }
34 | 
35 | // RPC definitions here.
36 | 


--------------------------------------------------------------------------------
/pbservice/server.go:
--------------------------------------------------------------------------------
  1 | package pbservice
  2 | 
  3 | import "net"
  4 | import "fmt"
  5 | import "net/rpc"
  6 | import "log"
  7 | import "time"
  8 | import "viewservice"
  9 | import "sync"
 10 | import "sync/atomic"
 11 | import "os"
 12 | import "syscall"
 13 | import "math/rand"
 14 | 
 15 | type PBServer struct {
 16 | 	mu         sync.Mutex
 17 | 	l          net.Listener
 18 | 	dead       int32 // for testing
 19 | 	unreliable int32 // for testing
 20 | 	me         string
 21 | 	vs         *viewservice.Clerk
 22 | 	// declarations here.
 23 | }
 24 | 
 25 | func (pb *PBServer) Get(args *GetArgs, reply *GetReply) error {
 26 | 
 27 | 	// code here.
 28 | 
 29 | 	return nil
 30 | }
 31 | 
 32 | func (pb *PBServer) PutAppend(args *PutAppendArgs, reply *PutAppendReply) error {
 33 | 
 34 | 	// Your code here.
 35 | 
 36 | 	return nil
 37 | }
 38 | 
 39 | //
 40 | // ping the viewserver periodically.
 41 | // if view changed:
 42 | //   transition to new view.
 43 | //   manage transfer of state from primary to new backup.
 44 | //
 45 | func (pb *PBServer) tick() {
 46 | 
 47 | 	// code here.
 48 | }
 49 | 
 50 | // tell the server to shut itself down.
 51 | // please do not change these two functions.
 52 | func (pb *PBServer) kill() {
 53 | 	atomic.StoreInt32(&pb.dead, 1)
 54 | 	pb.l.Close()
 55 | }
 56 | 
 57 | // call this to find out if the server is dead.
 58 | func (pb *PBServer) isdead() bool {
 59 | 	return atomic.LoadInt32(&pb.dead) != 0
 60 | }
 61 | 
 62 | // please do not change these two functions.
 63 | func (pb *PBServer) setunreliable(what bool) {
 64 | 	if what {
 65 | 		atomic.StoreInt32(&pb.unreliable, 1)
 66 | 	} else {
 67 | 		atomic.StoreInt32(&pb.unreliable, 0)
 68 | 	}
 69 | }
 70 | 
 71 | func (pb *PBServer) isunreliable() bool {
 72 | 	return atomic.LoadInt32(&pb.unreliable) != 0
 73 | }
 74 | 
 75 | func StartServer(vshost string, me string) *PBServer {
 76 | 	pb := new(PBServer)
 77 | 	pb.me = me
 78 | 	pb.vs = viewservice.MakeClerk(me, vshost)
 79 | 	// Your pb.* initializations here.
 80 | 
 81 | 	rpcs := rpc.NewServer()
 82 | 	rpcs.Register(pb)
 83 | 
 84 | 	os.Remove(pb.me)
 85 | 	l, e := net.Listen("unix", pb.me)
 86 | 	if e != nil {
 87 | 		log.Fatal("listen error: ", e)
 88 | 	}
 89 | 	pb.l = l
 90 | 
 91 | 	// please do not change any of the following code,
 92 | 	// or do anything to subvert it.
 93 | 
 94 | 	go func() {
 95 | 		for pb.isdead() == false {
 96 | 			conn, err := pb.l.Accept()
 97 | 			if err == nil && pb.isdead() == false {
 98 | 				if pb.isunreliable() && (rand.Int63()%1000) < 100 {
 99 | 					// discard the request.
100 | 					conn.Close()
101 | 				} else if pb.isunreliable() && (rand.Int63()%1000) < 200 {
102 | 					// process the request but force discard of reply.
103 | 					c1 := conn.(*net.UnixConn)
104 | 					f, _ := c1.File()
105 | 					err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR)
106 | 					if err != nil {
107 | 						fmt.Printf("shutdown: %v\n", err)
108 | 					}
109 | 					go rpcs.ServeConn(conn)
110 | 				} else {
111 | 					go rpcs.ServeConn(conn)
112 | 				}
113 | 			} else if err == nil {
114 | 				conn.Close()
115 | 			}
116 | 			if err != nil && pb.isdead() == false {
117 | 				fmt.Printf("PBServer(%v) accept: %v\n", me, err.Error())
118 | 				pb.kill()
119 | 			}
120 | 		}
121 | 	}()
122 | 
123 | 	go func() {
124 | 		for pb.isdead() == false {
125 | 			pb.tick()
126 | 			time.Sleep(viewservice.PingInterval)
127 | 		}
128 | 	}()
129 | 
130 | 	return pb
131 | }
132 | 


--------------------------------------------------------------------------------
/pbservice/test_test.go:
--------------------------------------------------------------------------------
   1 | package pbservice
   2 | 
   3 | import "viewservice"
   4 | import "fmt"
   5 | import "io"
   6 | import "net"
   7 | import "testing"
   8 | import "time"
   9 | import "log"
  10 | import "runtime"
  11 | import "math/rand"
  12 | import "os"
  13 | import "sync"
  14 | import "strconv"
  15 | import "strings"
  16 | import "sync/atomic"
  17 | 
  18 | func check(ck *Clerk, key string, value string) {
  19 | 	v := ck.Get(key)
  20 | 	if v != value {
  21 | 		log.Fatalf("Get(%v) -> %v, expected %v", key, v, value)
  22 | 	}
  23 | }
  24 | 
  25 | func port(tag string, host int) string {
  26 | 	s := "/var/tmp/824-"
  27 | 	s += strconv.Itoa(os.Getuid()) + "/"
  28 | 	os.Mkdir(s, 0777)
  29 | 	s += "pb-"
  30 | 	s += strconv.Itoa(os.Getpid()) + "-"
  31 | 	s += tag + "-"
  32 | 	s += strconv.Itoa(host)
  33 | 	return s
  34 | }
  35 | 
  36 | func TestBasicFail(t *testing.T) {
  37 | 	runtime.GOMAXPROCS(4)
  38 | 
  39 | 	tag := "basic"
  40 | 	vshost := port(tag+"v", 1)
  41 | 	vs := viewservice.StartServer(vshost)
  42 | 	time.Sleep(time.Second)
  43 | 	vck := viewservice.MakeClerk("", vshost)
  44 | 
  45 | 	ck := MakeClerk(vshost, "")
  46 | 
  47 | 	fmt.Printf("Test: Single primary, no backup ...\n")
  48 | 
  49 | 	s1 := StartServer(vshost, port(tag, 1))
  50 | 
  51 | 	deadtime := viewservice.PingInterval * viewservice.DeadPings
  52 | 	time.Sleep(deadtime * 2)
  53 | 	if vck.Primary() != s1.me {
  54 | 		t.Fatal("first primary never formed view")
  55 | 	}
  56 | 
  57 | 	ck.Put("111", "v1")
  58 | 	check(ck, "111", "v1")
  59 | 
  60 | 	ck.Put("2", "v2")
  61 | 	check(ck, "2", "v2")
  62 | 
  63 | 	ck.Put("1", "v1a")
  64 | 	check(ck, "1", "v1a")
  65 | 
  66 | 	ck.Append("ak", "hello")
  67 | 	check(ck, "ak", "hello")
  68 | 	ck.Put("ak", "xx")
  69 | 	ck.Append("ak", "yy")
  70 | 	check(ck, "ak", "xxyy")
  71 | 
  72 | 	fmt.Printf("  ... Passed\n")
  73 | 
  74 | 	// add a backup
  75 | 
  76 | 	fmt.Printf("Test: Add a backup ...\n")
  77 | 
  78 | 	s2 := StartServer(vshost, port(tag, 2))
  79 | 	for i := 0; i < viewservice.DeadPings*2; i++ {
  80 | 		v, _ := vck.Get()
  81 | 		if v.Backup == s2.me {
  82 | 			break
  83 | 		}
  84 | 		time.Sleep(viewservice.PingInterval)
  85 | 	}
  86 | 	v, _ := vck.Get()
  87 | 	if v.Backup != s2.me {
  88 | 		t.Fatal("backup never came up")
  89 | 	}
  90 | 
  91 | 	ck.Put("3", "33")
  92 | 	check(ck, "3", "33")
  93 | 
  94 | 	// give the backup time to initialize
  95 | 	time.Sleep(3 * viewservice.PingInterval)
  96 | 
  97 | 	ck.Put("4", "44")
  98 | 	check(ck, "4", "44")
  99 | 
 100 | 	fmt.Printf("  ... Passed\n")
 101 | 
 102 | 	fmt.Printf("Test: Count RPCs to viewserver ...\n")
 103 | 
 104 | 	// verify that the client or server doesn't contact the
 105 | 	// viewserver for every request -- i.e. that both client
 106 | 	// and servers cache the current view and only refresh
 107 | 	// it when something seems to be wrong. this test allows
 108 | 	// each server to Ping() the viewserver 10 times / second.
 109 | 
 110 | 	count1 := int(vs.GetRPCCount())
 111 | 	t1 := time.Now()
 112 | 	for i := 0; i < 100; i++ {
 113 | 		ck.Put("xk"+strconv.Itoa(i), strconv.Itoa(i))
 114 | 	}
 115 | 	count2 := int(vs.GetRPCCount())
 116 | 	t2 := time.Now()
 117 | 	dt := t2.Sub(t1)
 118 | 	allowed := 2 * (dt / (100 * time.Millisecond)) // two servers tick()ing 10/second
 119 | 	if (count2 - count1) > int(allowed)+20 {
 120 | 		t.Fatal("too many viewserver RPCs")
 121 | 	}
 122 | 
 123 | 	fmt.Printf("  ... Passed\n")
 124 | 
 125 | 	// kill the primary
 126 | 
 127 | 	fmt.Printf("Test: Primary failure ...\n")
 128 | 
 129 | 	s1.kill()
 130 | 	for i := 0; i < viewservice.DeadPings*2; i++ {
 131 | 		v, _ := vck.Get()
 132 | 		if v.Primary == s2.me {
 133 | 			break
 134 | 		}
 135 | 		time.Sleep(viewservice.PingInterval)
 136 | 	}
 137 | 	v, _ = vck.Get()
 138 | 	if v.Primary != s2.me {
 139 | 		t.Fatal("backup never switched to primary")
 140 | 	}
 141 | 
 142 | 	check(ck, "1", "v1a")
 143 | 	check(ck, "3", "33")
 144 | 	check(ck, "4", "44")
 145 | 
 146 | 	fmt.Printf("  ... Passed\n")
 147 | 
 148 | 	// kill solo server, start new server, check that
 149 | 	// it does not start serving as primary
 150 | 
 151 | 	fmt.Printf("Test: Kill last server, new one should not be active ...\n")
 152 | 
 153 | 	s2.kill()
 154 | 	s3 := StartServer(vshost, port(tag, 3))
 155 | 	time.Sleep(1 * time.Second)
 156 | 	get_done := make(chan bool)
 157 | 	go func() {
 158 | 		ck.Get("1")
 159 | 		get_done <- true
 160 | 	}()
 161 | 
 162 | 	select {
 163 | 	case <-get_done:
 164 | 		t.Fatalf("ck.Get() returned even though no initialized primary")
 165 | 	case <-time.After(2 * time.Second):
 166 | 	}
 167 | 
 168 | 	fmt.Printf("  ... Passed\n")
 169 | 
 170 | 	s1.kill()
 171 | 	s2.kill()
 172 | 	s3.kill()
 173 | 	time.Sleep(time.Second)
 174 | 	vs.Kill()
 175 | 	time.Sleep(time.Second)
 176 | }
 177 | 
 178 | func TestAtMostOnce(t *testing.T) {
 179 | 	runtime.GOMAXPROCS(4)
 180 | 
 181 | 	tag := "tamo"
 182 | 	vshost := port(tag+"v", 1)
 183 | 	vs := viewservice.StartServer(vshost)
 184 | 	time.Sleep(time.Second)
 185 | 	vck := viewservice.MakeClerk("", vshost)
 186 | 
 187 | 	fmt.Printf("Test: at-most-once Append; unreliable ...\n")
 188 | 
 189 | 	const nservers = 1
 190 | 	var sa [nservers]*PBServer
 191 | 	for i := 0; i < nservers; i++ {
 192 | 		sa[i] = StartServer(vshost, port(tag, i+1))
 193 | 		sa[i].setunreliable(true)
 194 | 	}
 195 | 
 196 | 	for iters := 0; iters < viewservice.DeadPings*2; iters++ {
 197 | 		view, _ := vck.Get()
 198 | 		if view.Primary != "" && view.Backup != "" {
 199 | 			break
 200 | 		}
 201 | 		time.Sleep(viewservice.PingInterval)
 202 | 	}
 203 | 
 204 | 	// give p+b time to ack, initialize
 205 | 	time.Sleep(viewservice.PingInterval * viewservice.DeadPings)
 206 | 
 207 | 	ck := MakeClerk(vshost, "")
 208 | 	k := "counter"
 209 | 	val := ""
 210 | 	for i := 0; i < 100; i++ {
 211 | 		v := strconv.Itoa(i)
 212 | 		ck.Append(k, v)
 213 | 		val = val + v
 214 | 	}
 215 | 
 216 | 	v := ck.Get(k)
 217 | 	if v != val {
 218 | 		t.Fatalf("ck.Get() returned %v but expected %v\n", v, val)
 219 | 	}
 220 | 
 221 | 	fmt.Printf("  ... Passed\n")
 222 | 
 223 | 	for i := 0; i < nservers; i++ {
 224 | 		sa[i].kill()
 225 | 	}
 226 | 	time.Sleep(time.Second)
 227 | 	vs.Kill()
 228 | 	time.Sleep(time.Second)
 229 | }
 230 | 
 231 | // Put right after a backup dies.
 232 | func TestFailPut(t *testing.T) {
 233 | 	runtime.GOMAXPROCS(4)
 234 | 
 235 | 	tag := "failput"
 236 | 	vshost := port(tag+"v", 1)
 237 | 	vs := viewservice.StartServer(vshost)
 238 | 	time.Sleep(time.Second)
 239 | 	vck := viewservice.MakeClerk("", vshost)
 240 | 
 241 | 	s1 := StartServer(vshost, port(tag, 1))
 242 | 	time.Sleep(time.Second)
 243 | 	s2 := StartServer(vshost, port(tag, 2))
 244 | 	time.Sleep(time.Second)
 245 | 	s3 := StartServer(vshost, port(tag, 3))
 246 | 
 247 | 	for i := 0; i < viewservice.DeadPings*3; i++ {
 248 | 		v, _ := vck.Get()
 249 | 		if v.Primary != "" && v.Backup != "" {
 250 | 			break
 251 | 		}
 252 | 		time.Sleep(viewservice.PingInterval)
 253 | 	}
 254 | 	time.Sleep(time.Second) // wait for backup initializion
 255 | 	v1, _ := vck.Get()
 256 | 	if v1.Primary != s1.me || v1.Backup != s2.me {
 257 | 		t.Fatalf("wrong primary or backup")
 258 | 	}
 259 | 
 260 | 	ck := MakeClerk(vshost, "")
 261 | 
 262 | 	ck.Put("a", "aa")
 263 | 	ck.Put("b", "bb")
 264 | 	ck.Put("c", "cc")
 265 | 	check(ck, "a", "aa")
 266 | 	check(ck, "b", "bb")
 267 | 	check(ck, "c", "cc")
 268 | 
 269 | 	// kill backup, then immediate Put
 270 | 	fmt.Printf("Test: Put() immediately after backup failure ...\n")
 271 | 	s2.kill()
 272 | 	ck.Put("a", "aaa")
 273 | 	check(ck, "a", "aaa")
 274 | 
 275 | 	for i := 0; i < viewservice.DeadPings*3; i++ {
 276 | 		v, _ := vck.Get()
 277 | 		if v.Viewnum > v1.Viewnum && v.Primary != "" && v.Backup != "" {
 278 | 			break
 279 | 		}
 280 | 		time.Sleep(viewservice.PingInterval)
 281 | 	}
 282 | 	time.Sleep(time.Second) // wait for backup initialization
 283 | 	v2, _ := vck.Get()
 284 | 	if v2.Primary != s1.me || v2.Backup != s3.me {
 285 | 		t.Fatal("wrong primary or backup")
 286 | 	}
 287 | 
 288 | 	check(ck, "a", "aaa")
 289 | 	fmt.Printf("  ... Passed\n")
 290 | 
 291 | 	// kill primary, then immediate Put
 292 | 	fmt.Printf("Test: Put() immediately after primary failure ...\n")
 293 | 	s1.kill()
 294 | 	ck.Put("b", "bbb")
 295 | 	check(ck, "b", "bbb")
 296 | 
 297 | 	for i := 0; i < viewservice.DeadPings*3; i++ {
 298 | 		v, _ := vck.Get()
 299 | 		if v.Viewnum > v2.Viewnum && v.Primary != "" {
 300 | 			break
 301 | 		}
 302 | 		time.Sleep(viewservice.PingInterval)
 303 | 	}
 304 | 	time.Sleep(time.Second)
 305 | 
 306 | 	check(ck, "a", "aaa")
 307 | 	check(ck, "b", "bbb")
 308 | 	check(ck, "c", "cc")
 309 | 	fmt.Printf("  ... Passed\n")
 310 | 
 311 | 	s1.kill()
 312 | 	s2.kill()
 313 | 	s3.kill()
 314 | 	time.Sleep(viewservice.PingInterval * 2)
 315 | 	vs.Kill()
 316 | }
 317 | 
 318 | // do a bunch of concurrent Put()s on the same key,
 319 | // then check that primary and backup have identical values.
 320 | // i.e. that they processed the Put()s in the same order.
 321 | func TestConcurrentSame(t *testing.T) {
 322 | 	runtime.GOMAXPROCS(4)
 323 | 
 324 | 	tag := "cs"
 325 | 	vshost := port(tag+"v", 1)
 326 | 	vs := viewservice.StartServer(vshost)
 327 | 	time.Sleep(time.Second)
 328 | 	vck := viewservice.MakeClerk("", vshost)
 329 | 
 330 | 	fmt.Printf("Test: Concurrent Put()s to the same key ...\n")
 331 | 
 332 | 	const nservers = 2
 333 | 	var sa [nservers]*PBServer
 334 | 	for i := 0; i < nservers; i++ {
 335 | 		sa[i] = StartServer(vshost, port(tag, i+1))
 336 | 	}
 337 | 
 338 | 	for iters := 0; iters < viewservice.DeadPings*2; iters++ {
 339 | 		view, _ := vck.Get()
 340 | 		if view.Primary != "" && view.Backup != "" {
 341 | 			break
 342 | 		}
 343 | 		time.Sleep(viewservice.PingInterval)
 344 | 	}
 345 | 
 346 | 	// give p+b time to ack, initialize
 347 | 	time.Sleep(viewservice.PingInterval * viewservice.DeadPings)
 348 | 
 349 | 	done := int32(0)
 350 | 
 351 | 	view1, _ := vck.Get()
 352 | 	const nclients = 3
 353 | 	const nkeys = 2
 354 | 	for xi := 0; xi < nclients; xi++ {
 355 | 		go func(i int) {
 356 | 			ck := MakeClerk(vshost, "")
 357 | 			rr := rand.New(rand.NewSource(int64(os.Getpid() + i)))
 358 | 			for atomic.LoadInt32(&done) == 0 {
 359 | 				k := strconv.Itoa(rr.Int() % nkeys)
 360 | 				v := strconv.Itoa(rr.Int())
 361 | 				ck.Put(k, v)
 362 | 			}
 363 | 		}(xi)
 364 | 	}
 365 | 
 366 | 	time.Sleep(5 * time.Second)
 367 | 	atomic.StoreInt32(&done, 1)
 368 | 	time.Sleep(time.Second)
 369 | 
 370 | 	// read from primary
 371 | 	ck := MakeClerk(vshost, "")
 372 | 	var vals [nkeys]string
 373 | 	for i := 0; i < nkeys; i++ {
 374 | 		vals[i] = ck.Get(strconv.Itoa(i))
 375 | 		if vals[i] == "" {
 376 | 			t.Fatalf("Get(%v) failed from primary", i)
 377 | 		}
 378 | 	}
 379 | 
 380 | 	// kill the primary
 381 | 	for i := 0; i < nservers; i++ {
 382 | 		if view1.Primary == sa[i].me {
 383 | 			sa[i].kill()
 384 | 			break
 385 | 		}
 386 | 	}
 387 | 	for iters := 0; iters < viewservice.DeadPings*2; iters++ {
 388 | 		view, _ := vck.Get()
 389 | 		if view.Primary == view1.Backup {
 390 | 			break
 391 | 		}
 392 | 		time.Sleep(viewservice.PingInterval)
 393 | 	}
 394 | 	view2, _ := vck.Get()
 395 | 	if view2.Primary != view1.Backup {
 396 | 		t.Fatal("wrong Primary")
 397 | 	}
 398 | 
 399 | 	// read from old backup
 400 | 	for i := 0; i < nkeys; i++ {
 401 | 		z := ck.Get(strconv.Itoa(i))
 402 | 		if z != vals[i] {
 403 | 			t.Fatalf("Get(%v) from backup; wanted %v, got %v", i, vals[i], z)
 404 | 		}
 405 | 	}
 406 | 
 407 | 	fmt.Printf("  ... Passed\n")
 408 | 
 409 | 	for i := 0; i < nservers; i++ {
 410 | 		sa[i].kill()
 411 | 	}
 412 | 	time.Sleep(time.Second)
 413 | 	vs.Kill()
 414 | 	time.Sleep(time.Second)
 415 | }
 416 | 
 417 | // check that all known appends are present in a value,
 418 | // and are in order for each concurrent client.
 419 | func checkAppends(t *testing.T, v string, counts []int) {
 420 | 	nclients := len(counts)
 421 | 	for i := 0; i < nclients; i++ {
 422 | 		lastoff := -1
 423 | 		for j := 0; j < counts[i]; j++ {
 424 | 			wanted := "x " + strconv.Itoa(i) + " " + strconv.Itoa(j) + " y"
 425 | 			off := strings.Index(v, wanted)
 426 | 			if off < 0 {
 427 | 				t.Fatalf("missing element in Append result")
 428 | 			}
 429 | 			off1 := strings.LastIndex(v, wanted)
 430 | 			if off1 != off {
 431 | 				t.Fatalf("duplicate element in Append result")
 432 | 			}
 433 | 			if off <= lastoff {
 434 | 				t.Fatalf("wrong order for element in Append result")
 435 | 			}
 436 | 			lastoff = off
 437 | 		}
 438 | 	}
 439 | }
 440 | 
 441 | // do a bunch of concurrent Append()s on the same key,
 442 | // then check that primary and backup have identical values.
 443 | // i.e. that they processed the Append()s in the same order.
 444 | func TestConcurrentSameAppend(t *testing.T) {
 445 | 	runtime.GOMAXPROCS(4)
 446 | 
 447 | 	tag := "csa"
 448 | 	vshost := port(tag+"v", 1)
 449 | 	vs := viewservice.StartServer(vshost)
 450 | 	time.Sleep(time.Second)
 451 | 	vck := viewservice.MakeClerk("", vshost)
 452 | 
 453 | 	fmt.Printf("Test: Concurrent Append()s to the same key ...\n")
 454 | 
 455 | 	const nservers = 2
 456 | 	var sa [nservers]*PBServer
 457 | 	for i := 0; i < nservers; i++ {
 458 | 		sa[i] = StartServer(vshost, port(tag, i+1))
 459 | 	}
 460 | 
 461 | 	for iters := 0; iters < viewservice.DeadPings*2; iters++ {
 462 | 		view, _ := vck.Get()
 463 | 		if view.Primary != "" && view.Backup != "" {
 464 | 			break
 465 | 		}
 466 | 		time.Sleep(viewservice.PingInterval)
 467 | 	}
 468 | 
 469 | 	// give p+b time to ack, initialize
 470 | 	time.Sleep(viewservice.PingInterval * viewservice.DeadPings)
 471 | 
 472 | 	view1, _ := vck.Get()
 473 | 
 474 | 	// code for i'th concurrent client thread.
 475 | 	ff := func(i int, ch chan int) {
 476 | 		ret := -1
 477 | 		defer func() { ch <- ret }()
 478 | 		ck := MakeClerk(vshost, "")
 479 | 		n := 0
 480 | 		for n < 50 {
 481 | 			v := "x " + strconv.Itoa(i) + " " + strconv.Itoa(n) + " y"
 482 | 			ck.Append("k", v)
 483 | 			n += 1
 484 | 		}
 485 | 		ret = n
 486 | 	}
 487 | 
 488 | 	// start the concurrent clients
 489 | 	const nclients = 3
 490 | 	chans := []chan int{}
 491 | 	for i := 0; i < nclients; i++ {
 492 | 		chans = append(chans, make(chan int))
 493 | 		go ff(i, chans[i])
 494 | 	}
 495 | 
 496 | 	// wait for the clients, accumulate Append counts.
 497 | 	counts := []int{}
 498 | 	for i := 0; i < nclients; i++ {
 499 | 		n := <-chans[i]
 500 | 		if n < 0 {
 501 | 			t.Fatalf("child failed")
 502 | 		}
 503 | 		counts = append(counts, n)
 504 | 	}
 505 | 
 506 | 	ck := MakeClerk(vshost, "")
 507 | 
 508 | 	// check that primary's copy of the value has all
 509 | 	// the Append()s.
 510 | 	primaryv := ck.Get("k")
 511 | 	checkAppends(t, primaryv, counts)
 512 | 
 513 | 	// kill the primary so we can check the backup
 514 | 	for i := 0; i < nservers; i++ {
 515 | 		if view1.Primary == sa[i].me {
 516 | 			sa[i].kill()
 517 | 			break
 518 | 		}
 519 | 	}
 520 | 	for iters := 0; iters < viewservice.DeadPings*2; iters++ {
 521 | 		view, _ := vck.Get()
 522 | 		if view.Primary == view1.Backup {
 523 | 			break
 524 | 		}
 525 | 		time.Sleep(viewservice.PingInterval)
 526 | 	}
 527 | 	view2, _ := vck.Get()
 528 | 	if view2.Primary != view1.Backup {
 529 | 		t.Fatal("wrong Primary")
 530 | 	}
 531 | 
 532 | 	// check that backup's copy of the value has all
 533 | 	// the Append()s.
 534 | 	backupv := ck.Get("k")
 535 | 	checkAppends(t, backupv, counts)
 536 | 
 537 | 	if backupv != primaryv {
 538 | 		t.Fatal("primary and backup had different values")
 539 | 	}
 540 | 
 541 | 	fmt.Printf("  ... Passed\n")
 542 | 
 543 | 	for i := 0; i < nservers; i++ {
 544 | 		sa[i].kill()
 545 | 	}
 546 | 	time.Sleep(time.Second)
 547 | 	vs.Kill()
 548 | 	time.Sleep(time.Second)
 549 | }
 550 | 
 551 | func TestConcurrentSameUnreliable(t *testing.T) {
 552 | 	runtime.GOMAXPROCS(4)
 553 | 
 554 | 	tag := "csu"
 555 | 	vshost := port(tag+"v", 1)
 556 | 	vs := viewservice.StartServer(vshost)
 557 | 	time.Sleep(time.Second)
 558 | 	vck := viewservice.MakeClerk("", vshost)
 559 | 
 560 | 	fmt.Printf("Test: Concurrent Put()s to the same key; unreliable ...\n")
 561 | 
 562 | 	const nservers = 2
 563 | 	var sa [nservers]*PBServer
 564 | 	for i := 0; i < nservers; i++ {
 565 | 		sa[i] = StartServer(vshost, port(tag, i+1))
 566 | 		sa[i].setunreliable(true)
 567 | 	}
 568 | 
 569 | 	for iters := 0; iters < viewservice.DeadPings*2; iters++ {
 570 | 		view, _ := vck.Get()
 571 | 		if view.Primary != "" && view.Backup != "" {
 572 | 			break
 573 | 		}
 574 | 		time.Sleep(viewservice.PingInterval)
 575 | 	}
 576 | 
 577 | 	// give p+b time to ack, initialize
 578 | 	time.Sleep(viewservice.PingInterval * viewservice.DeadPings)
 579 | 
 580 | 	{
 581 | 		ck := MakeClerk(vshost, "")
 582 | 		ck.Put("0", "x")
 583 | 		ck.Put("1", "x")
 584 | 	}
 585 | 
 586 | 	done := int32(0)
 587 | 
 588 | 	view1, _ := vck.Get()
 589 | 	const nclients = 3
 590 | 	const nkeys = 2
 591 | 	cha := []chan bool{}
 592 | 	for xi := 0; xi < nclients; xi++ {
 593 | 		cha = append(cha, make(chan bool))
 594 | 		go func(i int, ch chan bool) {
 595 | 			ok := false
 596 | 			defer func() { ch <- ok }()
 597 | 			ck := MakeClerk(vshost, "")
 598 | 			rr := rand.New(rand.NewSource(int64(os.Getpid() + i)))
 599 | 			for atomic.LoadInt32(&done) == 0 {
 600 | 				k := strconv.Itoa(rr.Int() % nkeys)
 601 | 				v := strconv.Itoa(rr.Int())
 602 | 				ck.Put(k, v)
 603 | 			}
 604 | 			ok = true
 605 | 		}(xi, cha[xi])
 606 | 	}
 607 | 
 608 | 	time.Sleep(5 * time.Second)
 609 | 	atomic.StoreInt32(&done, 1)
 610 | 
 611 | 	for i := 0; i < len(cha); i++ {
 612 | 		ok := <-cha[i]
 613 | 		if ok == false {
 614 | 			t.Fatalf("child failed")
 615 | 		}
 616 | 	}
 617 | 
 618 | 	// read from primary
 619 | 	ck := MakeClerk(vshost, "")
 620 | 	var vals [nkeys]string
 621 | 	for i := 0; i < nkeys; i++ {
 622 | 		vals[i] = ck.Get(strconv.Itoa(i))
 623 | 		if vals[i] == "" {
 624 | 			t.Fatalf("Get(%v) failed from primary", i)
 625 | 		}
 626 | 	}
 627 | 
 628 | 	// kill the primary
 629 | 	for i := 0; i < nservers; i++ {
 630 | 		if view1.Primary == sa[i].me {
 631 | 			sa[i].kill()
 632 | 			break
 633 | 		}
 634 | 	}
 635 | 	for iters := 0; iters < viewservice.DeadPings*2; iters++ {
 636 | 		view, _ := vck.Get()
 637 | 		if view.Primary == view1.Backup {
 638 | 			break
 639 | 		}
 640 | 		time.Sleep(viewservice.PingInterval)
 641 | 	}
 642 | 	view2, _ := vck.Get()
 643 | 	if view2.Primary != view1.Backup {
 644 | 		t.Fatal("wrong Primary")
 645 | 	}
 646 | 
 647 | 	// read from old backup
 648 | 	for i := 0; i < nkeys; i++ {
 649 | 		z := ck.Get(strconv.Itoa(i))
 650 | 		if z != vals[i] {
 651 | 			t.Fatalf("Get(%v) from backup; wanted %v, got %v", i, vals[i], z)
 652 | 		}
 653 | 	}
 654 | 
 655 | 	fmt.Printf("  ... Passed\n")
 656 | 
 657 | 	for i := 0; i < nservers; i++ {
 658 | 		sa[i].kill()
 659 | 	}
 660 | 	time.Sleep(time.Second)
 661 | 	vs.Kill()
 662 | 	time.Sleep(time.Second)
 663 | }
 664 | 
 665 | // constant put/get while crashing and restarting servers
 666 | func TestRepeatedCrash(t *testing.T) {
 667 | 	runtime.GOMAXPROCS(4)
 668 | 
 669 | 	tag := "rc"
 670 | 	vshost := port(tag+"v", 1)
 671 | 	vs := viewservice.StartServer(vshost)
 672 | 	time.Sleep(time.Second)
 673 | 	vck := viewservice.MakeClerk("", vshost)
 674 | 
 675 | 	fmt.Printf("Test: Repeated failures/restarts ...\n")
 676 | 
 677 | 	const nservers = 3
 678 | 	var sa [nservers]*PBServer
 679 | 	samu := sync.Mutex{}
 680 | 	for i := 0; i < nservers; i++ {
 681 | 		sa[i] = StartServer(vshost, port(tag, i+1))
 682 | 	}
 683 | 
 684 | 	for i := 0; i < viewservice.DeadPings; i++ {
 685 | 		v, _ := vck.Get()
 686 | 		if v.Primary != "" && v.Backup != "" {
 687 | 			break
 688 | 		}
 689 | 		time.Sleep(viewservice.PingInterval)
 690 | 	}
 691 | 
 692 | 	// wait a bit for primary to initialize backup
 693 | 	time.Sleep(viewservice.DeadPings * viewservice.PingInterval)
 694 | 
 695 | 	done := int32(0)
 696 | 
 697 | 	go func() {
 698 | 		// kill and restart servers
 699 | 		rr := rand.New(rand.NewSource(int64(os.Getpid())))
 700 | 		for atomic.LoadInt32(&done) == 0 {
 701 | 			i := rr.Int() % nservers
 702 | 			// fmt.Printf("%v killing %v\n", ts(), 5001+i)
 703 | 			sa[i].kill()
 704 | 
 705 | 			// wait long enough for new view to form, backup to be initialized
 706 | 			time.Sleep(2 * viewservice.PingInterval * viewservice.DeadPings)
 707 | 
 708 | 			sss := StartServer(vshost, port(tag, i+1))
 709 | 			samu.Lock()
 710 | 			sa[i] = sss
 711 | 			samu.Unlock()
 712 | 
 713 | 			// wait long enough for new view to form, backup to be initialized
 714 | 			time.Sleep(2 * viewservice.PingInterval * viewservice.DeadPings)
 715 | 		}
 716 | 	}()
 717 | 
 718 | 	const nth = 2
 719 | 	var cha [nth]chan bool
 720 | 	for xi := 0; xi < nth; xi++ {
 721 | 		cha[xi] = make(chan bool)
 722 | 		go func(i int) {
 723 | 			ok := false
 724 | 			defer func() { cha[i] <- ok }()
 725 | 			ck := MakeClerk(vshost, "")
 726 | 			data := map[string]string{}
 727 | 			rr := rand.New(rand.NewSource(int64(os.Getpid() + i)))
 728 | 			for atomic.LoadInt32(&done) == 0 {
 729 | 				k := strconv.Itoa((i * 1000000) + (rr.Int() % 10))
 730 | 				wanted, ok := data[k]
 731 | 				if ok {
 732 | 					v := ck.Get(k)
 733 | 					if v != wanted {
 734 | 						t.Fatalf("key=%v wanted=%v got=%v", k, wanted, v)
 735 | 					}
 736 | 				}
 737 | 				nv := strconv.Itoa(rr.Int())
 738 | 				ck.Put(k, nv)
 739 | 				data[k] = nv
 740 | 				// if no sleep here, then server tick() threads do not get
 741 | 				// enough time to Ping the viewserver.
 742 | 				time.Sleep(10 * time.Millisecond)
 743 | 			}
 744 | 			ok = true
 745 | 		}(xi)
 746 | 	}
 747 | 
 748 | 	time.Sleep(20 * time.Second)
 749 | 	atomic.StoreInt32(&done, 1)
 750 | 
 751 | 	fmt.Printf("  ... Put/Gets done ... \n")
 752 | 
 753 | 	for i := 0; i < nth; i++ {
 754 | 		ok := <-cha[i]
 755 | 		if ok == false {
 756 | 			t.Fatal("child failed")
 757 | 		}
 758 | 	}
 759 | 
 760 | 	ck := MakeClerk(vshost, "")
 761 | 	ck.Put("aaa", "bbb")
 762 | 	if v := ck.Get("aaa"); v != "bbb" {
 763 | 		t.Fatalf("final Put/Get failed")
 764 | 	}
 765 | 
 766 | 	fmt.Printf("  ... Passed\n")
 767 | 
 768 | 	for i := 0; i < nservers; i++ {
 769 | 		samu.Lock()
 770 | 		sa[i].kill()
 771 | 		samu.Unlock()
 772 | 	}
 773 | 	time.Sleep(time.Second)
 774 | 	vs.Kill()
 775 | 	time.Sleep(time.Second)
 776 | }
 777 | 
 778 | func TestRepeatedCrashUnreliable(t *testing.T) {
 779 | 	runtime.GOMAXPROCS(4)
 780 | 
 781 | 	tag := "rcu"
 782 | 	vshost := port(tag+"v", 1)
 783 | 	vs := viewservice.StartServer(vshost)
 784 | 	time.Sleep(time.Second)
 785 | 	vck := viewservice.MakeClerk("", vshost)
 786 | 
 787 | 	fmt.Printf("Test: Repeated failures/restarts with concurrent updates to same key; unreliable ...\n")
 788 | 
 789 | 	const nservers = 3
 790 | 	var sa [nservers]*PBServer
 791 | 	samu := sync.Mutex{}
 792 | 	for i := 0; i < nservers; i++ {
 793 | 		sa[i] = StartServer(vshost, port(tag, i+1))
 794 | 		sa[i].setunreliable(true)
 795 | 	}
 796 | 
 797 | 	for i := 0; i < viewservice.DeadPings; i++ {
 798 | 		v, _ := vck.Get()
 799 | 		if v.Primary != "" && v.Backup != "" {
 800 | 			break
 801 | 		}
 802 | 		time.Sleep(viewservice.PingInterval)
 803 | 	}
 804 | 
 805 | 	// wait a bit for primary to initialize backup
 806 | 	time.Sleep(viewservice.DeadPings * viewservice.PingInterval)
 807 | 
 808 | 	done := int32(0)
 809 | 
 810 | 	go func() {
 811 | 		// kill and restart servers
 812 | 		rr := rand.New(rand.NewSource(int64(os.Getpid())))
 813 | 		for atomic.LoadInt32(&done) == 0 {
 814 | 			i := rr.Int() % nservers
 815 | 			// fmt.Printf("%v killing %v\n", ts(), 5001+i)
 816 | 			sa[i].kill()
 817 | 
 818 | 			// wait long enough for new view to form, backup to be initialized
 819 | 			time.Sleep(2 * viewservice.PingInterval * viewservice.DeadPings)
 820 | 
 821 | 			sss := StartServer(vshost, port(tag, i+1))
 822 | 			samu.Lock()
 823 | 			sa[i] = sss
 824 | 			samu.Unlock()
 825 | 
 826 | 			// wait long enough for new view to form, backup to be initialized
 827 | 			time.Sleep(2 * viewservice.PingInterval * viewservice.DeadPings)
 828 | 		}
 829 | 	}()
 830 | 
 831 | 	// concurrent client thread.
 832 | 	ff := func(i int, ch chan int) {
 833 | 		ret := -1
 834 | 		defer func() { ch <- ret }()
 835 | 		ck := MakeClerk(vshost, "")
 836 | 		n := 0
 837 | 		for atomic.LoadInt32(&done) == 0 {
 838 | 			v := "x " + strconv.Itoa(i) + " " + strconv.Itoa(n) + " y"
 839 | 			ck.Append("0", v)
 840 | 			// if no sleep here, then server tick() threads do not get
 841 | 			// enough time to Ping the viewserver.
 842 | 			time.Sleep(10 * time.Millisecond)
 843 | 			n++
 844 | 		}
 845 | 		ret = n
 846 | 	}
 847 | 
 848 | 	const nth = 2
 849 | 	var cha [nth]chan int
 850 | 	for i := 0; i < nth; i++ {
 851 | 		cha[i] = make(chan int)
 852 | 		go ff(i, cha[i])
 853 | 	}
 854 | 
 855 | 	time.Sleep(20 * time.Second)
 856 | 	atomic.StoreInt32(&done, 1)
 857 | 
 858 | 	fmt.Printf("  ... Appends done ... \n")
 859 | 
 860 | 	counts := []int{}
 861 | 	for i := 0; i < nth; i++ {
 862 | 		n := <-cha[i]
 863 | 		if n < 0 {
 864 | 			t.Fatal("child failed")
 865 | 		}
 866 | 		counts = append(counts, n)
 867 | 	}
 868 | 
 869 | 	ck := MakeClerk(vshost, "")
 870 | 
 871 | 	checkAppends(t, ck.Get("0"), counts)
 872 | 
 873 | 	ck.Put("aaa", "bbb")
 874 | 	if v := ck.Get("aaa"); v != "bbb" {
 875 | 		t.Fatalf("final Put/Get failed")
 876 | 	}
 877 | 
 878 | 	fmt.Printf("  ... Passed\n")
 879 | 
 880 | 	for i := 0; i < nservers; i++ {
 881 | 		samu.Lock()
 882 | 		sa[i].kill()
 883 | 		samu.Unlock()
 884 | 	}
 885 | 	time.Sleep(time.Second)
 886 | 	vs.Kill()
 887 | 	time.Sleep(time.Second)
 888 | }
 889 | 
 890 | func proxy(t *testing.T, port string, delay *int32) {
 891 | 	portx := port + "x"
 892 | 	os.Remove(portx)
 893 | 	if os.Rename(port, portx) != nil {
 894 | 		t.Fatalf("proxy rename failed")
 895 | 	}
 896 | 	l, err := net.Listen("unix", port)
 897 | 	if err != nil {
 898 | 		t.Fatalf("proxy listen failed: %v", err)
 899 | 	}
 900 | 	go func() {
 901 | 		defer l.Close()
 902 | 		defer os.Remove(portx)
 903 | 		defer os.Remove(port)
 904 | 		for {
 905 | 			c1, err := l.Accept()
 906 | 			if err != nil {
 907 | 				t.Fatalf("proxy accept failed: %v\n", err)
 908 | 			}
 909 | 			time.Sleep(time.Duration(atomic.LoadInt32(delay)) * time.Second)
 910 | 			c2, err := net.Dial("unix", portx)
 911 | 			if err != nil {
 912 | 				t.Fatalf("proxy dial failed: %v\n", err)
 913 | 			}
 914 | 
 915 | 			go func() {
 916 | 				for {
 917 | 					buf := make([]byte, 1000)
 918 | 					n, _ := c2.Read(buf)
 919 | 					if n == 0 {
 920 | 						break
 921 | 					}
 922 | 					n1, _ := c1.Write(buf[0:n])
 923 | 					if n1 != n {
 924 | 						break
 925 | 					}
 926 | 				}
 927 | 			}()
 928 | 			for {
 929 | 				buf := make([]byte, 1000)
 930 | 				n, err := c1.Read(buf)
 931 | 				if err != nil && err != io.EOF {
 932 | 					t.Fatalf("proxy c1.Read: %v\n", err)
 933 | 				}
 934 | 				if n == 0 {
 935 | 					break
 936 | 				}
 937 | 				n1, err1 := c2.Write(buf[0:n])
 938 | 				if err1 != nil || n1 != n {
 939 | 					t.Fatalf("proxy c2.Write: %v\n", err1)
 940 | 				}
 941 | 			}
 942 | 
 943 | 			c1.Close()
 944 | 			c2.Close()
 945 | 		}
 946 | 	}()
 947 | }
 948 | 
 949 | func TestPartition1(t *testing.T) {
 950 | 	runtime.GOMAXPROCS(4)
 951 | 
 952 | 	tag := "part1"
 953 | 	vshost := port(tag+"v", 1)
 954 | 	vs := viewservice.StartServer(vshost)
 955 | 	time.Sleep(time.Second)
 956 | 	vck := viewservice.MakeClerk("", vshost)
 957 | 
 958 | 	ck1 := MakeClerk(vshost, "")
 959 | 
 960 | 	fmt.Printf("Test: Old primary does not serve Gets ...\n")
 961 | 
 962 | 	vshosta := vshost + "a"
 963 | 	os.Link(vshost, vshosta)
 964 | 
 965 | 	s1 := StartServer(vshosta, port(tag, 1))
 966 | 	delay := int32(0)
 967 | 	proxy(t, port(tag, 1), &delay)
 968 | 
 969 | 	deadtime := viewservice.PingInterval * viewservice.DeadPings
 970 | 	time.Sleep(deadtime * 2)
 971 | 	if vck.Primary() != s1.me {
 972 | 		t.Fatal("primary never formed initial view")
 973 | 	}
 974 | 
 975 | 	s2 := StartServer(vshost, port(tag, 2))
 976 | 	time.Sleep(deadtime * 2)
 977 | 	v1, _ := vck.Get()
 978 | 	if v1.Primary != s1.me || v1.Backup != s2.me {
 979 | 		t.Fatal("backup did not join view")
 980 | 	}
 981 | 
 982 | 	ck1.Put("a", "1")
 983 | 	check(ck1, "a", "1")
 984 | 
 985 | 	os.Remove(vshosta)
 986 | 
 987 | 	// start a client Get(), but use proxy to delay it long
 988 | 	// enough that it won't reach s1 until after s1 is no
 989 | 	// longer the primary.
 990 | 	atomic.StoreInt32(&delay, 4)
 991 | 	stale_get := make(chan bool)
 992 | 	go func() {
 993 | 		local_stale := false
 994 | 		defer func() { stale_get <- local_stale }()
 995 | 		x := ck1.Get("a")
 996 | 		if x == "1" {
 997 | 			local_stale = true
 998 | 		}
 999 | 	}()
1000 | 
1001 | 	// now s1 cannot talk to viewserver, so view will change,
1002 | 	// and s1 won't immediately realize.
1003 | 
1004 | 	for iter := 0; iter < viewservice.DeadPings*3; iter++ {
1005 | 		if vck.Primary() == s2.me {
1006 | 			break
1007 | 		}
1008 | 		time.Sleep(viewservice.PingInterval)
1009 | 	}
1010 | 	if vck.Primary() != s2.me {
1011 | 		t.Fatalf("primary never changed")
1012 | 	}
1013 | 
1014 | 	// wait long enough that s2 is guaranteed to have Pinged
1015 | 	// the viewservice, and thus that s2 must know about
1016 | 	// the new view.
1017 | 	time.Sleep(2 * viewservice.PingInterval)
1018 | 
1019 | 	// change the value (on s2) so it's no longer "1".
1020 | 	ck2 := MakeClerk(vshost, "")
1021 | 	ck2.Put("a", "111")
1022 | 	check(ck2, "a", "111")
1023 | 
1024 | 	// wait for the background Get to s1 to be delivered.
1025 | 	select {
1026 | 	case x := <-stale_get:
1027 | 		if x {
1028 | 			t.Fatalf("Get to old primary succeeded and produced stale value")
1029 | 		}
1030 | 	case <-time.After(5 * time.Second):
1031 | 	}
1032 | 
1033 | 	check(ck2, "a", "111")
1034 | 
1035 | 	fmt.Printf("  ... Passed\n")
1036 | 
1037 | 	s1.kill()
1038 | 	s2.kill()
1039 | 	vs.Kill()
1040 | }
1041 | 
1042 | func TestPartition2(t *testing.T) {
1043 | 	runtime.GOMAXPROCS(4)
1044 | 
1045 | 	tag := "part2"
1046 | 	vshost := port(tag+"v", 1)
1047 | 	vs := viewservice.StartServer(vshost)
1048 | 	time.Sleep(time.Second)
1049 | 	vck := viewservice.MakeClerk("", vshost)
1050 | 
1051 | 	ck1 := MakeClerk(vshost, "")
1052 | 
1053 | 	vshosta := vshost + "a"
1054 | 	os.Link(vshost, vshosta)
1055 | 
1056 | 	s1 := StartServer(vshosta, port(tag, 1))
1057 | 	delay := int32(0)
1058 | 	proxy(t, port(tag, 1), &delay)
1059 | 
1060 | 	fmt.Printf("Test: Partitioned old primary does not complete Gets ...\n")
1061 | 
1062 | 	deadtime := viewservice.PingInterval * viewservice.DeadPings
1063 | 	time.Sleep(deadtime * 2)
1064 | 	if vck.Primary() != s1.me {
1065 | 		t.Fatal("primary never formed initial view")
1066 | 	}
1067 | 
1068 | 	s2 := StartServer(vshost, port(tag, 2))
1069 | 	time.Sleep(deadtime * 2)
1070 | 	v1, _ := vck.Get()
1071 | 	if v1.Primary != s1.me || v1.Backup != s2.me {
1072 | 		t.Fatal("backup did not join view")
1073 | 	}
1074 | 
1075 | 	ck1.Put("a", "1")
1076 | 	check(ck1, "a", "1")
1077 | 
1078 | 	os.Remove(vshosta)
1079 | 
1080 | 	// start a client Get(), but use proxy to delay it long
1081 | 	// enough that it won't reach s1 until after s1 is no
1082 | 	// longer the primary.
1083 | 	atomic.StoreInt32(&delay, 5)
1084 | 	stale_get := make(chan bool)
1085 | 	go func() {
1086 | 		local_stale := false
1087 | 		defer func() { stale_get <- local_stale }()
1088 | 		x := ck1.Get("a")
1089 | 		if x == "1" {
1090 | 			local_stale = true
1091 | 		}
1092 | 	}()
1093 | 
1094 | 	// now s1 cannot talk to viewserver, so view will change.
1095 | 
1096 | 	for iter := 0; iter < viewservice.DeadPings*3; iter++ {
1097 | 		if vck.Primary() == s2.me {
1098 | 			break
1099 | 		}
1100 | 		time.Sleep(viewservice.PingInterval)
1101 | 	}
1102 | 	if vck.Primary() != s2.me {
1103 | 		t.Fatalf("primary never changed")
1104 | 	}
1105 | 
1106 | 	s3 := StartServer(vshost, port(tag, 3))
1107 | 	for iter := 0; iter < viewservice.DeadPings*3; iter++ {
1108 | 		v, _ := vck.Get()
1109 | 		if v.Backup == s3.me && v.Primary == s2.me {
1110 | 			break
1111 | 		}
1112 | 		time.Sleep(viewservice.PingInterval)
1113 | 	}
1114 | 	v2, _ := vck.Get()
1115 | 	if v2.Primary != s2.me || v2.Backup != s3.me {
1116 | 		t.Fatalf("new backup never joined")
1117 | 	}
1118 | 	time.Sleep(2 * time.Second)
1119 | 
1120 | 	ck2 := MakeClerk(vshost, "")
1121 | 	ck2.Put("a", "2")
1122 | 	check(ck2, "a", "2")
1123 | 
1124 | 	s2.kill()
1125 | 
1126 | 	// wait for delayed get to s1 to complete.
1127 | 	select {
1128 | 	case x := <-stale_get:
1129 | 		if x {
1130 | 			t.Fatalf("partitioned primary replied to a Get with a stale value")
1131 | 		}
1132 | 	case <-time.After(6 * time.Second):
1133 | 	}
1134 | 
1135 | 	check(ck2, "a", "2")
1136 | 
1137 | 	fmt.Printf("  ... Passed\n")
1138 | 
1139 | 	s1.kill()
1140 | 	s2.kill()
1141 | 	s3.kill()
1142 | 	vs.Kill()
1143 | }
1144 | 


--------------------------------------------------------------------------------
/raft/config.go:
--------------------------------------------------------------------------------
  1 | package raft
  2 | 
  3 | import "DS-Lab/labrpc"
  4 | import "log"
  5 | import "sync"
  6 | import "testing"
  7 | import "runtime"
  8 | import "math/rand"
  9 | import crand "crypto/rand"
 10 | import "math/big"
 11 | import "encoding/base64"
 12 | import "time"
 13 | import "fmt"
 14 | 
 15 | func randstring(n int) string {
 16 | 	b := make([]byte, 2*n)
 17 | 	crand.Read(b)
 18 | 	s := base64.URLEncoding.EncodeToString(b)
 19 | 	return s[0:n]
 20 | }
 21 | 
 22 | func makeSeed() int64 {
 23 | 	max := big.NewInt(int64(1) << 62)
 24 | 	bigx, _ := crand.Int(crand.Reader, max)
 25 | 	x := bigx.Int64()
 26 | 	return x
 27 | }
 28 | 
 29 | type config struct {
 30 | 	mu        sync.Mutex
 31 | 	t         *testing.T
 32 | 	net       *labrpc.Network
 33 | 	n         int
 34 | 	rafts     []*Raft
 35 | 	applyErr  []string // from apply channel readers
 36 | 	connected []bool   // whether each server is on the net
 37 | 	saved     []*Persister
 38 | 	endnames  [][]string    // the port file names each sends to
 39 | 	logs      []map[int]int // copy of each server's committed entries
 40 | 	start     time.Time     // time at which make_config() was called
 41 | 	// begin()/end() statistics
 42 | 	t0        time.Time // time at which test_test.go called cfg.begin()
 43 | 	rpcs0     int       // rpcTotal() at start of test
 44 | 	cmds0     int       // number of agreements
 45 | 	maxIndex  int
 46 | 	maxIndex0 int
 47 | }
 48 | 
 49 | var ncpu_once sync.Once
 50 | 
 51 | func make_config(t *testing.T, n int, unreliable bool) *config {
 52 | 	ncpu_once.Do(func() {
 53 | 		if runtime.NumCPU() < 2 {
 54 | 			fmt.Printf("warning: only one CPU, which may conceal locking bugs\n")
 55 | 		}
 56 | 		rand.Seed(makeSeed())
 57 | 	})
 58 | 	runtime.GOMAXPROCS(4)
 59 | 	cfg := &config{}
 60 | 	cfg.t = t
 61 | 	cfg.net = labrpc.MakeNetwork()
 62 | 	cfg.n = n
 63 | 	cfg.applyErr = make([]string, cfg.n)
 64 | 	cfg.rafts = make([]*Raft, cfg.n)
 65 | 	cfg.connected = make([]bool, cfg.n)
 66 | 	cfg.saved = make([]*Persister, cfg.n)
 67 | 	cfg.endnames = make([][]string, cfg.n)
 68 | 	cfg.logs = make([]map[int]int, cfg.n)
 69 | 	cfg.start = time.Now()
 70 | 
 71 | 	cfg.setunreliable(unreliable)
 72 | 
 73 | 	cfg.net.LongDelays(true)
 74 | 
 75 | 	// create a full set of Rafts.
 76 | 	for i := 0; i < cfg.n; i++ {
 77 | 		cfg.logs[i] = map[int]int{}
 78 | 		cfg.start1(i)
 79 | 	}
 80 | 
 81 | 	// connect everyone
 82 | 	for i := 0; i < cfg.n; i++ {
 83 | 		cfg.connect(i)
 84 | 	}
 85 | 
 86 | 	return cfg
 87 | }
 88 | 
 89 | // shut down a Raft server but save its persistent state.
 90 | func (cfg *config) crash1(i int) {
 91 | 	cfg.disconnect(i)
 92 | 	cfg.net.DeleteServer(i) // disable client connections to the server.
 93 | 
 94 | 	cfg.mu.Lock()
 95 | 	defer cfg.mu.Unlock()
 96 | 
 97 | 	// a fresh persister, in case old instance
 98 | 	// continues to update the Persister.
 99 | 	// but copy old persister's content so that we always
100 | 	// pass Make() the last persisted state.
101 | 	if cfg.saved[i] != nil {
102 | 		cfg.saved[i] = cfg.saved[i].Copy()
103 | 	}
104 | 
105 | 	rf := cfg.rafts[i]
106 | 	if rf != nil {
107 | 		cfg.mu.Unlock()
108 | 		rf.Kill()
109 | 		cfg.mu.Lock()
110 | 		cfg.rafts[i] = nil
111 | 	}
112 | 
113 | 	if cfg.saved[i] != nil {
114 | 		raftlog := cfg.saved[i].ReadRaftState()
115 | 		cfg.saved[i] = &Persister{}
116 | 		cfg.saved[i].SaveRaftState(raftlog)
117 | 	}
118 | }
119 | 
120 | //
121 | // start or re-start a Raft.
122 | // if one already exists, "kill" it first.
123 | // allocate new outgoing port file names, and a new
124 | // state persister, to isolate previous instance of
125 | // this server. since we cannot really kill it.
126 | //
127 | func (cfg *config) start1(i int) {
128 | 	cfg.crash1(i)
129 | 
130 | 	// a fresh set of outgoing ClientEnd names.
131 | 	// so that old crashed instance's ClientEnds can't send.
132 | 	cfg.endnames[i] = make([]string, cfg.n)
133 | 	for j := 0; j < cfg.n; j++ {
134 | 		cfg.endnames[i][j] = randstring(20)
135 | 	}
136 | 
137 | 	// a fresh set of ClientEnds.
138 | 	ends := make([]*labrpc.ClientEnd, cfg.n)
139 | 	for j := 0; j < cfg.n; j++ {
140 | 		ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j])
141 | 		cfg.net.Connect(cfg.endnames[i][j], j)
142 | 	}
143 | 
144 | 	cfg.mu.Lock()
145 | 
146 | 	// a fresh persister, so old instance doesn't overwrite
147 | 	// new instance's persisted state.
148 | 	// but copy old persister's content so that we always
149 | 	// pass Make() the last persisted state.
150 | 	if cfg.saved[i] != nil {
151 | 		cfg.saved[i] = cfg.saved[i].Copy()
152 | 	} else {
153 | 		cfg.saved[i] = MakePersister()
154 | 	}
155 | 
156 | 	cfg.mu.Unlock()
157 | 
158 | 	// listen to messages from Raft indicating newly committed messages.
159 | 	applyCh := make(chan ApplyMsg)
160 | 	go func() {
161 | 		for m := range applyCh {
162 | 			err_msg := ""
163 | 			if m.CommandValid == false {
164 | 				// ignore other types of ApplyMsg
165 | 			} else if v, ok := (m.Command).(int); ok {
166 | 				cfg.mu.Lock()
167 | 				for j := 0; j < len(cfg.logs); j++ {
168 | 					if old, oldok := cfg.logs[j][m.CommandIndex]; oldok && old != v {
169 | 						// some server has already committed a different value for this entry!
170 | 						err_msg = fmt.Sprintf("commit index=%v server=%v %v != server=%v %v",
171 | 							m.CommandIndex, i, m.Command, j, old)
172 | 					}
173 | 				}
174 | 				_, prevok := cfg.logs[i][m.CommandIndex-1]
175 | 				cfg.logs[i][m.CommandIndex] = v
176 | 				if m.CommandIndex > cfg.maxIndex {
177 | 					cfg.maxIndex = m.CommandIndex
178 | 				}
179 | 				cfg.mu.Unlock()
180 | 
181 | 				if m.CommandIndex > 1 && prevok == false {
182 | 					err_msg = fmt.Sprintf("server %v apply out of order %v", i, m.CommandIndex)
183 | 				}
184 | 			} else {
185 | 				err_msg = fmt.Sprintf("committed command %v is not an int", m.Command)
186 | 			}
187 | 
188 | 			if err_msg != "" {
189 | 				log.Fatalf("apply error: %v\n", err_msg)
190 | 				cfg.applyErr[i] = err_msg
191 | 				// keep reading after error so that Raft doesn't block
192 | 				// holding locks...
193 | 			}
194 | 		}
195 | 	}()
196 | 
197 | 	rf := Make(ends, i, cfg.saved[i], applyCh)
198 | 
199 | 	cfg.mu.Lock()
200 | 	cfg.rafts[i] = rf
201 | 	cfg.mu.Unlock()
202 | 
203 | 	svc := labrpc.MakeService(rf)
204 | 	srv := labrpc.MakeServer()
205 | 	srv.AddService(svc)
206 | 	cfg.net.AddServer(i, srv)
207 | }
208 | 
209 | func (cfg *config) checkTimeout() {
210 | 	// enforce a two minute real-time limit on each test
211 | 	if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second {
212 | 		cfg.t.Fatal("test took longer than 120 seconds")
213 | 	}
214 | }
215 | 
216 | func (cfg *config) cleanup() {
217 | 	for i := 0; i < len(cfg.rafts); i++ {
218 | 		if cfg.rafts[i] != nil {
219 | 			cfg.rafts[i].Kill()
220 | 		}
221 | 	}
222 | 	cfg.net.Cleanup()
223 | 	cfg.checkTimeout()
224 | }
225 | 
226 | // attach server i to the net.
227 | func (cfg *config) connect(i int) {
228 | 	// fmt.Printf("connect(%d)\n", i)
229 | 
230 | 	cfg.connected[i] = true
231 | 
232 | 	// outgoing ClientEnds
233 | 	for j := 0; j < cfg.n; j++ {
234 | 		if cfg.connected[j] {
235 | 			endname := cfg.endnames[i][j]
236 | 			cfg.net.Enable(endname, true)
237 | 		}
238 | 	}
239 | 
240 | 	// incoming ClientEnds
241 | 	for j := 0; j < cfg.n; j++ {
242 | 		if cfg.connected[j] {
243 | 			endname := cfg.endnames[j][i]
244 | 			cfg.net.Enable(endname, true)
245 | 		}
246 | 	}
247 | }
248 | 
249 | // detach server i from the net.
250 | func (cfg *config) disconnect(i int) {
251 | 	// fmt.Printf("disconnect(%d)\n", i)
252 | 
253 | 	cfg.connected[i] = false
254 | 
255 | 	// outgoing ClientEnds
256 | 	for j := 0; j < cfg.n; j++ {
257 | 		if cfg.endnames[i] != nil {
258 | 			endname := cfg.endnames[i][j]
259 | 			cfg.net.Enable(endname, false)
260 | 		}
261 | 	}
262 | 
263 | 	// incoming ClientEnds
264 | 	for j := 0; j < cfg.n; j++ {
265 | 		if cfg.endnames[j] != nil {
266 | 			endname := cfg.endnames[j][i]
267 | 			cfg.net.Enable(endname, false)
268 | 		}
269 | 	}
270 | }
271 | 
272 | func (cfg *config) rpcCount(server int) int {
273 | 	return cfg.net.GetCount(server)
274 | }
275 | 
276 | func (cfg *config) rpcTotal() int {
277 | 	return cfg.net.GetTotalCount()
278 | }
279 | 
280 | func (cfg *config) setunreliable(unrel bool) {
281 | 	cfg.net.Reliable(!unrel)
282 | }
283 | 
284 | func (cfg *config) setlongreordering(longrel bool) {
285 | 	cfg.net.LongReordering(longrel)
286 | }
287 | 
288 | // check that there's exactly one leader.
289 | // try a few times in case re-elections are needed.
290 | func (cfg *config) checkOneLeader() int {
291 | 	for iters := 0; iters < 10; iters++ {
292 | 		ms := 450 + (rand.Int63() % 100)
293 | 		time.Sleep(time.Duration(ms) * time.Millisecond)
294 | 
295 | 		leaders := make(map[int][]int)
296 | 		for i := 0; i < cfg.n; i++ {
297 | 			if cfg.connected[i] {
298 | 				if term, leader := cfg.rafts[i].GetState(); leader {
299 | 					leaders[term] = append(leaders[term], i)
300 | 				}
301 | 			}
302 | 		}
303 | 
304 | 		lastTermWithLeader := -1
305 | 		for term, leaders := range leaders {
306 | 			if len(leaders) > 1 {
307 | 				cfg.t.Fatalf("term %d has %d (>1) leaders", term, len(leaders))
308 | 			}
309 | 			if term > lastTermWithLeader {
310 | 				lastTermWithLeader = term
311 | 			}
312 | 		}
313 | 
314 | 		if len(leaders) != 0 {
315 | 			return leaders[lastTermWithLeader][0]
316 | 		}
317 | 	}
318 | 	cfg.t.Fatalf("expected one leader, got none")
319 | 	return -1
320 | }
321 | 
322 | // check that everyone agrees on the term.
323 | func (cfg *config) checkTerms() int {
324 | 	term := -1
325 | 	for i := 0; i < cfg.n; i++ {
326 | 		if cfg.connected[i] {
327 | 			xterm, _ := cfg.rafts[i].GetState()
328 | 			if term == -1 {
329 | 				term = xterm
330 | 			} else if term != xterm {
331 | 				cfg.t.Fatalf("servers disagree on term")
332 | 			}
333 | 		}
334 | 	}
335 | 	return term
336 | }
337 | 
338 | // check that there's no leader
339 | func (cfg *config) checkNoLeader() {
340 | 	for i := 0; i < cfg.n; i++ {
341 | 		if cfg.connected[i] {
342 | 			_, is_leader := cfg.rafts[i].GetState()
343 | 			if is_leader {
344 | 				cfg.t.Fatalf("expected no leader, but %v claims to be leader", i)
345 | 			}
346 | 		}
347 | 	}
348 | }
349 | 
350 | // how many servers think a log entry is committed?
351 | func (cfg *config) nCommitted(index int) (int, interface{}) {
352 | 	count := 0
353 | 	cmd := -1
354 | 	for i := 0; i < len(cfg.rafts); i++ {
355 | 		if cfg.applyErr[i] != "" {
356 | 			cfg.t.Fatal(cfg.applyErr[i])
357 | 		}
358 | 
359 | 		cfg.mu.Lock()
360 | 		cmd1, ok := cfg.logs[i][index]
361 | 		cfg.mu.Unlock()
362 | 
363 | 		if ok {
364 | 			if count > 0 && cmd != cmd1 {
365 | 				cfg.t.Fatalf("committed values do not match: index %v, %v, %v\n",
366 | 					index, cmd, cmd1)
367 | 			}
368 | 			count += 1
369 | 			cmd = cmd1
370 | 		}
371 | 	}
372 | 	return count, cmd
373 | }
374 | 
375 | // wait for at least n servers to commit.
376 | // but don't wait forever.
377 | func (cfg *config) wait(index int, n int, startTerm int) interface{} {
378 | 	to := 10 * time.Millisecond
379 | 	for iters := 0; iters < 30; iters++ {
380 | 		nd, _ := cfg.nCommitted(index)
381 | 		if nd >= n {
382 | 			break
383 | 		}
384 | 		time.Sleep(to)
385 | 		if to < time.Second {
386 | 			to *= 2
387 | 		}
388 | 		if startTerm > -1 {
389 | 			for _, r := range cfg.rafts {
390 | 				if t, _ := r.GetState(); t > startTerm {
391 | 					// someone has moved on
392 | 					// can no longer guarantee that we'll "win"
393 | 					return -1
394 | 				}
395 | 			}
396 | 		}
397 | 	}
398 | 	nd, cmd := cfg.nCommitted(index)
399 | 	if nd < n {
400 | 		cfg.t.Fatalf("only %d decided for index %d; wanted %d\n",
401 | 			nd, index, n)
402 | 	}
403 | 	return cmd
404 | }
405 | 
406 | // do a complete agreement.
407 | // it might choose the wrong leader initially,
408 | // and have to re-submit after giving up.
409 | // entirely gives up after about 10 seconds.
410 | // indirectly checks that the servers agree on the
411 | // same value, since nCommitted() checks this,
412 | // as do the threads that read from applyCh.
413 | // returns index.
414 | // if retry==true, may submit the command multiple
415 | // times, in case a leader fails just after Start().
416 | // if retry==false, calls Start() only once, in order
417 | // to simplify the early Lab 2B tests.
418 | func (cfg *config) one(cmd int, expectedServers int, retry bool) int {
419 | 	t0 := time.Now()
420 | 	starts := 0
421 | 	for time.Since(t0).Seconds() < 10 {
422 | 		// try all the servers, maybe one is the leader.
423 | 		index := -1
424 | 		for si := 0; si < cfg.n; si++ {
425 | 			starts = (starts + 1) % cfg.n
426 | 			var rf *Raft
427 | 			cfg.mu.Lock()
428 | 			if cfg.connected[starts] {
429 | 				rf = cfg.rafts[starts]
430 | 			}
431 | 			cfg.mu.Unlock()
432 | 			if rf != nil {
433 | 				index1, _, ok := rf.Start(cmd)
434 | 				if ok {
435 | 					index = index1
436 | 					break
437 | 				}
438 | 			}
439 | 		}
440 | 
441 | 		if index != -1 {
442 | 			// somebody claimed to be the leader and to have
443 | 			// submitted our command; wait a while for agreement.
444 | 			t1 := time.Now()
445 | 			for time.Since(t1).Seconds() < 2 {
446 | 				nd, cmd1 := cfg.nCommitted(index)
447 | 				if nd > 0 && nd >= expectedServers {
448 | 					// committed
449 | 					if cmd2, ok := cmd1.(int); ok && cmd2 == cmd {
450 | 						// and it was the command we submitted.
451 | 						return index
452 | 					}
453 | 				}
454 | 				time.Sleep(20 * time.Millisecond)
455 | 			}
456 | 			if retry == false {
457 | 				cfg.t.Fatalf("one(%v) failed to reach agreement", cmd)
458 | 			}
459 | 		} else {
460 | 			time.Sleep(50 * time.Millisecond)
461 | 		}
462 | 	}
463 | 	cfg.t.Fatalf("one(%v) failed to reach agreement", cmd)
464 | 	return -1
465 | }
466 | 
467 | // start a Test.
468 | // print the Test message.
469 | // e.g. cfg.begin("Test (2B): RPC counts aren't too high")
470 | func (cfg *config) begin(description string) {
471 | 	fmt.Printf("%s ...\n", description)
472 | 	cfg.t0 = time.Now()
473 | 	cfg.rpcs0 = cfg.rpcTotal()
474 | 	cfg.cmds0 = 0
475 | 	cfg.maxIndex0 = cfg.maxIndex
476 | }
477 | 
478 | // end a Test -- the fact that we got here means there
479 | // was no failure.
480 | // print the Passed message,
481 | // and some performance numbers.
482 | func (cfg *config) end() {
483 | 	cfg.checkTimeout()
484 | 	if cfg.t.Failed() == false {
485 | 		cfg.mu.Lock()
486 | 		t := time.Since(cfg.t0).Seconds()     // real time
487 | 		npeers := cfg.n                       // number of Raft peers
488 | 		nrpc := cfg.rpcTotal() - cfg.rpcs0    // number of RPC sends
489 | 		ncmds := cfg.maxIndex - cfg.maxIndex0 // number of Raft agreements reported
490 | 		cfg.mu.Unlock()
491 | 
492 | 		fmt.Printf("  ... Passed --")
493 | 		fmt.Printf("  %4.1f  %d %4d %4d\n", t, npeers, nrpc, ncmds)
494 | 	}
495 | }
496 | 


--------------------------------------------------------------------------------
/raft/persister.go:
--------------------------------------------------------------------------------
 1 | package raft
 2 | 
 3 | import "sync"
 4 | 
 5 | type Persister struct {
 6 | 	mu        sync.Mutex
 7 | 	raftstate []byte
 8 | 	snapshot  []byte
 9 | }
10 | 
11 | func MakePersister() *Persister {
12 | 	return &Persister{}
13 | }
14 | 
15 | func (ps *Persister) Copy() *Persister {
16 | 	ps.mu.Lock()
17 | 	defer ps.mu.Unlock()
18 | 	np := MakePersister()
19 | 	np.raftstate = ps.raftstate
20 | 	np.snapshot = ps.snapshot
21 | 	return np
22 | }
23 | 
24 | func (ps *Persister) SaveRaftState(state []byte) {
25 | 	ps.mu.Lock()
26 | 	defer ps.mu.Unlock()
27 | 	ps.raftstate = state
28 | }
29 | 
30 | func (ps *Persister) ReadRaftState() []byte {
31 | 	ps.mu.Lock()
32 | 	defer ps.mu.Unlock()
33 | 	return ps.raftstate
34 | }
35 | 
36 | func (ps *Persister) RaftStateSize() int {
37 | 	ps.mu.Lock()
38 | 	defer ps.mu.Unlock()
39 | 	return len(ps.raftstate)
40 | }
41 | 
42 | // Save both Raft state and K/V snapshot as a single atomic action,
43 | // to help avoid them getting out of sync.
44 | func (ps *Persister) SaveStateAndSnapshot(state []byte, snapshot []byte) {
45 | 	ps.mu.Lock()
46 | 	defer ps.mu.Unlock()
47 | 	ps.raftstate = state
48 | 	ps.snapshot = snapshot
49 | }
50 | 
51 | func (ps *Persister) ReadSnapshot() []byte {
52 | 	ps.mu.Lock()
53 | 	defer ps.mu.Unlock()
54 | 	return ps.snapshot
55 | }
56 | 
57 | func (ps *Persister) SnapshotSize() int {
58 | 	ps.mu.Lock()
59 | 	defer ps.mu.Unlock()
60 | 	return len(ps.snapshot)
61 | }
62 | 


--------------------------------------------------------------------------------
/raft/rpc.go:
--------------------------------------------------------------------------------
 1 | package raft
 2 | 
 3 | import "fmt"
 4 | 
 5 | type InstallSnapshotArgs struct {
 6 | 	Term              int
 7 | 	LeaderId          int
 8 | 	LastIncludedIndex int
 9 | 	LastIncludedTerm  int
10 | 	Data              []byte
11 | }
12 | 
13 | func (args InstallSnapshotArgs) String() string {
14 | 	return fmt.Sprintf("{Term:%v,LeaderId:%v,LastIncludedIndex:%v,LastIncludedTerm:%v,DataSize:%v}", args.Term, args.LeaderId, args.LastIncludedIndex, args.LastIncludedTerm, len(args.Data))
15 | }
16 | 
17 | type InstallSnapshotReply struct {
18 | 	Term int
19 | }
20 | 
21 | func (reply InstallSnapshotReply) String() string {
22 | 	return fmt.Sprintf("{Term:%v}", reply.Term)
23 | }
24 | 


--------------------------------------------------------------------------------
/raft/test_test.go:
--------------------------------------------------------------------------------
  1 | package raft
  2 | 
  3 | //
  4 | // Raft tests.
  5 | //
  6 | // we will use the original test_test.go to test your code for grading.
  7 | // so, while you can modify this code to help you debug, please
  8 | // test with the original before submitting.
  9 | //
 10 | 
 11 | import "testing"
 12 | import "fmt"
 13 | import "time"
 14 | import "math/rand"
 15 | import "sync/atomic"
 16 | import "sync"
 17 | 
 18 | // The tester generously allows solutions to complete elections in one second
 19 | // (much more than the paper's range of timeouts).
 20 | const RaftElectionTimeout = 1000 * time.Millisecond
 21 | 
 22 | func TestInitialElection2A(t *testing.T) {
 23 | 	servers := 3
 24 | 	cfg := make_config(t, servers, false)
 25 | 	defer cfg.cleanup()
 26 | 
 27 | 	cfg.begin("Test (2A): initial election")
 28 | 
 29 | 	// is a leader elected?
 30 | 	cfg.checkOneLeader()
 31 | 
 32 | 	// sleep a bit to avoid racing with followers learning of the
 33 | 	// election, then check that all peers agree on the term.
 34 | 	time.Sleep(50 * time.Millisecond)
 35 | 	term1 := cfg.checkTerms()
 36 | 
 37 | 	// does the leader+term stay the same if there is no network failure?
 38 | 	time.Sleep(2 * RaftElectionTimeout)
 39 | 	term2 := cfg.checkTerms()
 40 | 	if term1 != term2 {
 41 | 		fmt.Printf("warning: term changed even though there were no failures")
 42 | 	}
 43 | 
 44 | 	// there should still be a leader.
 45 | 	cfg.checkOneLeader()
 46 | 
 47 | 	cfg.end()
 48 | }
 49 | 
 50 | func TestReElection2A(t *testing.T) {
 51 | 	servers := 3
 52 | 	cfg := make_config(t, servers, false)
 53 | 	defer cfg.cleanup()
 54 | 
 55 | 	cfg.begin("Test (2A): election after network failure")
 56 | 
 57 | 	leader1 := cfg.checkOneLeader()
 58 | 
 59 | 	// if the leader disconnects, a new one should be elected.
 60 | 	cfg.disconnect(leader1)
 61 | 	cfg.checkOneLeader()
 62 | 
 63 | 	// if the old leader rejoins, that shouldn't
 64 | 	// disturb the new leader.
 65 | 	cfg.connect(leader1)
 66 | 	leader2 := cfg.checkOneLeader()
 67 | 
 68 | 	// if there's no quorum, no leader should
 69 | 	// be elected.
 70 | 	cfg.disconnect(leader2)
 71 | 	cfg.disconnect((leader2 + 1) % servers)
 72 | 	time.Sleep(2 * RaftElectionTimeout)
 73 | 	cfg.checkNoLeader()
 74 | 
 75 | 	// if a quorum arises, it should elect a leader.
 76 | 	cfg.connect((leader2 + 1) % servers)
 77 | 	cfg.checkOneLeader()
 78 | 
 79 | 	// re-join of last node shouldn't prevent leader from existing.
 80 | 	cfg.connect(leader2)
 81 | 	cfg.checkOneLeader()
 82 | 
 83 | 	cfg.end()
 84 | }
 85 | 
 86 | func TestBasicAgree2B(t *testing.T) {
 87 | 	servers := 5
 88 | 	cfg := make_config(t, servers, false)
 89 | 	defer cfg.cleanup()
 90 | 
 91 | 	cfg.begin("Test (2B): basic agreement")
 92 | 
 93 | 	iters := 3
 94 | 	for index := 1; index < iters+1; index++ {
 95 | 		nd, _ := cfg.nCommitted(index)
 96 | 		if nd > 0 {
 97 | 			t.Fatalf("some have committed before Start()")
 98 | 		}
 99 | 
100 | 		xindex := cfg.one(index*100, servers, false)
101 | 		if xindex != index {
102 | 			t.Fatalf("got index %v but expected %v", xindex, index)
103 | 		}
104 | 	}
105 | 
106 | 	cfg.end()
107 | }
108 | 
109 | func TestFailAgree2B(t *testing.T) {
110 | 	servers := 3
111 | 	cfg := make_config(t, servers, false)
112 | 	defer cfg.cleanup()
113 | 
114 | 	cfg.begin("Test (2B): agreement despite follower disconnection")
115 | 
116 | 	cfg.one(101, servers, false)
117 | 
118 | 	// follower network disconnection
119 | 	leader := cfg.checkOneLeader()
120 | 	cfg.disconnect((leader + 1) % servers)
121 | 
122 | 	// agree despite one disconnected server?
123 | 	cfg.one(102, servers-1, false)
124 | 	cfg.one(103, servers-1, false)
125 | 	time.Sleep(RaftElectionTimeout)
126 | 	cfg.one(104, servers-1, false)
127 | 	cfg.one(105, servers-1, false)
128 | 
129 | 	// re-connect
130 | 	cfg.connect((leader + 1) % servers)
131 | 
132 | 	// agree with full set of servers?
133 | 	cfg.one(106, servers, true)
134 | 	time.Sleep(RaftElectionTimeout)
135 | 	cfg.one(107, servers, true)
136 | 
137 | 	cfg.end()
138 | }
139 | 
140 | func TestFailNoAgree2B(t *testing.T) {
141 | 	servers := 5
142 | 	cfg := make_config(t, servers, false)
143 | 	defer cfg.cleanup()
144 | 
145 | 	cfg.begin("Test (2B): no agreement if too many followers disconnect")
146 | 
147 | 	cfg.one(10, servers, false)
148 | 
149 | 	// 3 of 5 followers disconnect
150 | 	leader := cfg.checkOneLeader()
151 | 	cfg.disconnect((leader + 1) % servers)
152 | 	cfg.disconnect((leader + 2) % servers)
153 | 	cfg.disconnect((leader + 3) % servers)
154 | 
155 | 	index, _, ok := cfg.rafts[leader].Start(20)
156 | 	if ok != true {
157 | 		t.Fatalf("leader rejected Start()")
158 | 	}
159 | 	if index != 2 {
160 | 		t.Fatalf("expected index 2, got %v", index)
161 | 	}
162 | 
163 | 	time.Sleep(2 * RaftElectionTimeout)
164 | 
165 | 	n, _ := cfg.nCommitted(index)
166 | 	if n > 0 {
167 | 		t.Fatalf("%v committed but no majority", n)
168 | 	}
169 | 
170 | 	// repair
171 | 	cfg.connect((leader + 1) % servers)
172 | 	cfg.connect((leader + 2) % servers)
173 | 	cfg.connect((leader + 3) % servers)
174 | 
175 | 	// the disconnected majority may have chosen a leader from
176 | 	// among their own ranks, forgetting index 2.
177 | 	leader2 := cfg.checkOneLeader()
178 | 	index2, _, ok2 := cfg.rafts[leader2].Start(30)
179 | 	if ok2 == false {
180 | 		t.Fatalf("leader2 rejected Start()")
181 | 	}
182 | 	if index2 < 2 || index2 > 3 {
183 | 		t.Fatalf("unexpected index %v", index2)
184 | 	}
185 | 
186 | 	cfg.one(1000, servers, true)
187 | 
188 | 	cfg.end()
189 | }
190 | 
191 | func TestConcurrentStarts2B(t *testing.T) {
192 | 	servers := 3
193 | 	cfg := make_config(t, servers, false)
194 | 	defer cfg.cleanup()
195 | 
196 | 	cfg.begin("Test (2B): concurrent Start()s")
197 | 
198 | 	var success bool
199 | loop:
200 | 	for try := 0; try < 5; try++ {
201 | 		if try > 0 {
202 | 			// give solution some time to settle
203 | 			time.Sleep(3 * time.Second)
204 | 		}
205 | 
206 | 		leader := cfg.checkOneLeader()
207 | 		_, term, ok := cfg.rafts[leader].Start(1)
208 | 		if !ok {
209 | 			// leader moved on really quickly
210 | 			continue
211 | 		}
212 | 
213 | 		iters := 5
214 | 		var wg sync.WaitGroup
215 | 		is := make(chan int, iters)
216 | 		for ii := 0; ii < iters; ii++ {
217 | 			wg.Add(1)
218 | 			go func(i int) {
219 | 				defer wg.Done()
220 | 				i, term1, ok := cfg.rafts[leader].Start(100 + i)
221 | 				if term1 != term {
222 | 					return
223 | 				}
224 | 				if ok != true {
225 | 					return
226 | 				}
227 | 				is <- i
228 | 			}(ii)
229 | 		}
230 | 
231 | 		wg.Wait()
232 | 		close(is)
233 | 
234 | 		for j := 0; j < servers; j++ {
235 | 			if t, _ := cfg.rafts[j].GetState(); t != term {
236 | 				// term changed -- can't expect low RPC counts
237 | 				continue loop
238 | 			}
239 | 		}
240 | 
241 | 		failed := false
242 | 		cmds := []int{}
243 | 		for index := range is {
244 | 			cmd := cfg.wait(index, servers, term)
245 | 			if ix, ok := cmd.(int); ok {
246 | 				if ix == -1 {
247 | 					// peers have moved on to later terms
248 | 					// so we can't expect all Start()s to
249 | 					// have succeeded
250 | 					failed = true
251 | 					break
252 | 				}
253 | 				cmds = append(cmds, ix)
254 | 			} else {
255 | 				t.Fatalf("value %v is not an int", cmd)
256 | 			}
257 | 		}
258 | 
259 | 		if failed {
260 | 			// avoid leaking goroutines
261 | 			go func() {
262 | 				for range is {
263 | 				}
264 | 			}()
265 | 			continue
266 | 		}
267 | 
268 | 		for ii := 0; ii < iters; ii++ {
269 | 			x := 100 + ii
270 | 			ok := false
271 | 			for j := 0; j < len(cmds); j++ {
272 | 				if cmds[j] == x {
273 | 					ok = true
274 | 				}
275 | 			}
276 | 			if ok == false {
277 | 				t.Fatalf("cmd %v missing in %v", x, cmds)
278 | 			}
279 | 		}
280 | 
281 | 		success = true
282 | 		break
283 | 	}
284 | 
285 | 	if !success {
286 | 		t.Fatalf("term changed too often")
287 | 	}
288 | 
289 | 	cfg.end()
290 | }
291 | 
292 | func TestRejoin2B(t *testing.T) {
293 | 	servers := 3
294 | 	cfg := make_config(t, servers, false)
295 | 	defer cfg.cleanup()
296 | 
297 | 	cfg.begin("Test (2B): rejoin of partitioned leader")
298 | 
299 | 	cfg.one(101, servers, true)
300 | 
301 | 	// leader network failure
302 | 	leader1 := cfg.checkOneLeader()
303 | 	cfg.disconnect(leader1)
304 | 
305 | 	// make old leader try to agree on some entries
306 | 	cfg.rafts[leader1].Start(102)
307 | 	cfg.rafts[leader1].Start(103)
308 | 	cfg.rafts[leader1].Start(104)
309 | 
310 | 	// new leader commits, also for index=2
311 | 	cfg.one(103, 2, true)
312 | 
313 | 	// new leader network failure
314 | 	leader2 := cfg.checkOneLeader()
315 | 	cfg.disconnect(leader2)
316 | 
317 | 	// old leader connected again
318 | 	cfg.connect(leader1)
319 | 
320 | 	cfg.one(104, 2, true)
321 | 
322 | 	// all together now
323 | 	cfg.connect(leader2)
324 | 
325 | 	cfg.one(105, servers, true)
326 | 
327 | 	cfg.end()
328 | }
329 | 
330 | func TestBackup2B(t *testing.T) {
331 | 	servers := 5
332 | 	cfg := make_config(t, servers, false)
333 | 	defer cfg.cleanup()
334 | 
335 | 	cfg.begin("Test (2B): leader backs up quickly over incorrect follower logs")
336 | 
337 | 	cfg.one(rand.Int(), servers, true)
338 | 
339 | 	// put leader and one follower in a partition
340 | 	leader1 := cfg.checkOneLeader()
341 | 	cfg.disconnect((leader1 + 2) % servers)
342 | 	cfg.disconnect((leader1 + 3) % servers)
343 | 	cfg.disconnect((leader1 + 4) % servers)
344 | 
345 | 	// submit lots of commands that won't commit
346 | 	for i := 0; i < 50; i++ {
347 | 		cfg.rafts[leader1].Start(rand.Int())
348 | 	}
349 | 
350 | 	time.Sleep(RaftElectionTimeout / 2)
351 | 
352 | 	cfg.disconnect((leader1 + 0) % servers)
353 | 	cfg.disconnect((leader1 + 1) % servers)
354 | 
355 | 	// allow other partition to recover
356 | 	cfg.connect((leader1 + 2) % servers)
357 | 	cfg.connect((leader1 + 3) % servers)
358 | 	cfg.connect((leader1 + 4) % servers)
359 | 
360 | 	// lots of successful commands to new group.
361 | 	for i := 0; i < 50; i++ {
362 | 		cfg.one(rand.Int(), 3, true)
363 | 	}
364 | 
365 | 	// now another partitioned leader and one follower
366 | 	leader2 := cfg.checkOneLeader()
367 | 	other := (leader1 + 2) % servers
368 | 	if leader2 == other {
369 | 		other = (leader2 + 1) % servers
370 | 	}
371 | 	cfg.disconnect(other)
372 | 
373 | 	// lots more commands that won't commit
374 | 	for i := 0; i < 50; i++ {
375 | 		cfg.rafts[leader2].Start(rand.Int())
376 | 	}
377 | 
378 | 	time.Sleep(RaftElectionTimeout / 2)
379 | 
380 | 	// bring original leader back to life,
381 | 	for i := 0; i < servers; i++ {
382 | 		cfg.disconnect(i)
383 | 	}
384 | 	cfg.connect((leader1 + 0) % servers)
385 | 	cfg.connect((leader1 + 1) % servers)
386 | 	cfg.connect(other)
387 | 
388 | 	// lots of successful commands to new group.
389 | 	for i := 0; i < 50; i++ {
390 | 		cfg.one(rand.Int(), 3, true)
391 | 	}
392 | 
393 | 	// now everyone
394 | 	for i := 0; i < servers; i++ {
395 | 		cfg.connect(i)
396 | 	}
397 | 	cfg.one(rand.Int(), servers, true)
398 | 
399 | 	cfg.end()
400 | }
401 | 
402 | func TestCount2B(t *testing.T) {
403 | 	servers := 3
404 | 	cfg := make_config(t, servers, false)
405 | 	defer cfg.cleanup()
406 | 
407 | 	cfg.begin("Test (2B): RPC counts aren't too high")
408 | 
409 | 	rpcs := func() (n int) {
410 | 		for j := 0; j < servers; j++ {
411 | 			n += cfg.rpcCount(j)
412 | 		}
413 | 		return
414 | 	}
415 | 
416 | 	leader := cfg.checkOneLeader()
417 | 
418 | 	total1 := rpcs()
419 | 
420 | 	if total1 > 30 || total1 < 1 {
421 | 		t.Fatalf("too many or few RPCs (%v) to elect initial leader\n", total1)
422 | 	}
423 | 
424 | 	var total2 int
425 | 	var success bool
426 | loop:
427 | 	for try := 0; try < 5; try++ {
428 | 		if try > 0 {
429 | 			// give solution some time to settle
430 | 			time.Sleep(3 * time.Second)
431 | 		}
432 | 
433 | 		leader = cfg.checkOneLeader()
434 | 		total1 = rpcs()
435 | 
436 | 		iters := 10
437 | 		starti, term, ok := cfg.rafts[leader].Start(1)
438 | 		if !ok {
439 | 			// leader moved on really quickly
440 | 			continue
441 | 		}
442 | 		cmds := []int{}
443 | 		for i := 1; i < iters+2; i++ {
444 | 			x := int(rand.Int31())
445 | 			cmds = append(cmds, x)
446 | 			index1, term1, ok := cfg.rafts[leader].Start(x)
447 | 			if term1 != term {
448 | 				// Term changed while starting
449 | 				continue loop
450 | 			}
451 | 			if !ok {
452 | 				// No longer the leader, so term has changed
453 | 				continue loop
454 | 			}
455 | 			if starti+i != index1 {
456 | 				t.Fatalf("Start() failed")
457 | 			}
458 | 		}
459 | 
460 | 		for i := 1; i < iters+1; i++ {
461 | 			cmd := cfg.wait(starti+i, servers, term)
462 | 			if ix, ok := cmd.(int); ok == false || ix != cmds[i-1] {
463 | 				if ix == -1 {
464 | 					// term changed -- try again
465 | 					continue loop
466 | 				}
467 | 				t.Fatalf("wrong value %v committed for index %v; expected %v\n", cmd, starti+i, cmds)
468 | 			}
469 | 		}
470 | 
471 | 		failed := false
472 | 		total2 = 0
473 | 		for j := 0; j < servers; j++ {
474 | 			if t, _ := cfg.rafts[j].GetState(); t != term {
475 | 				// term changed -- can't expect low RPC counts
476 | 				// need to keep going to update total2
477 | 				failed = true
478 | 			}
479 | 			total2 += cfg.rpcCount(j)
480 | 		}
481 | 
482 | 		if failed {
483 | 			continue loop
484 | 		}
485 | 
486 | 		if total2-total1 > (iters+1+3)*3 {
487 | 			t.Fatalf("too many RPCs (%v) for %v entries\n", total2-total1, iters)
488 | 		}
489 | 
490 | 		success = true
491 | 		break
492 | 	}
493 | 
494 | 	if !success {
495 | 		t.Fatalf("term changed too often")
496 | 	}
497 | 
498 | 	time.Sleep(RaftElectionTimeout)
499 | 
500 | 	total3 := 0
501 | 	for j := 0; j < servers; j++ {
502 | 		total3 += cfg.rpcCount(j)
503 | 	}
504 | 
505 | 	if total3-total2 > 3*20 {
506 | 		t.Fatalf("too many RPCs (%v) for 1 second of idleness\n", total3-total2)
507 | 	}
508 | 
509 | 	cfg.end()
510 | }
511 | 
512 | func TestPersist12C(t *testing.T) {
513 | 	servers := 3
514 | 	cfg := make_config(t, servers, false)
515 | 	defer cfg.cleanup()
516 | 
517 | 	cfg.begin("Test (2C): basic persistence")
518 | 
519 | 	cfg.one(11, servers, true)
520 | 
521 | 	// crash and re-start all
522 | 	for i := 0; i < servers; i++ {
523 | 		cfg.start1(i)
524 | 	}
525 | 	for i := 0; i < servers; i++ {
526 | 		cfg.disconnect(i)
527 | 		cfg.connect(i)
528 | 	}
529 | 
530 | 	cfg.one(12, servers, true)
531 | 
532 | 	leader1 := cfg.checkOneLeader()
533 | 	cfg.disconnect(leader1)
534 | 	cfg.start1(leader1)
535 | 	cfg.connect(leader1)
536 | 
537 | 	cfg.one(13, servers, true)
538 | 
539 | 	leader2 := cfg.checkOneLeader()
540 | 	cfg.disconnect(leader2)
541 | 	cfg.one(14, servers-1, true)
542 | 	cfg.start1(leader2)
543 | 	cfg.connect(leader2)
544 | 
545 | 	cfg.wait(4, servers, -1) // wait for leader2 to join before killing i3
546 | 
547 | 	i3 := (cfg.checkOneLeader() + 1) % servers
548 | 	cfg.disconnect(i3)
549 | 	cfg.one(15, servers-1, true)
550 | 	cfg.start1(i3)
551 | 	cfg.connect(i3)
552 | 
553 | 	cfg.one(16, servers, true)
554 | 
555 | 	cfg.end()
556 | }
557 | 
558 | func TestPersist22C(t *testing.T) {
559 | 	servers := 5
560 | 	cfg := make_config(t, servers, false)
561 | 	defer cfg.cleanup()
562 | 
563 | 	cfg.begin("Test (2C): more persistence")
564 | 
565 | 	index := 1
566 | 	for iters := 0; iters < 5; iters++ {
567 | 		cfg.one(10+index, servers, true)
568 | 		index++
569 | 
570 | 		leader1 := cfg.checkOneLeader()
571 | 
572 | 		cfg.disconnect((leader1 + 1) % servers)
573 | 		cfg.disconnect((leader1 + 2) % servers)
574 | 
575 | 		cfg.one(10+index, servers-2, true)
576 | 		index++
577 | 
578 | 		cfg.disconnect((leader1 + 0) % servers)
579 | 		cfg.disconnect((leader1 + 3) % servers)
580 | 		cfg.disconnect((leader1 + 4) % servers)
581 | 
582 | 		cfg.start1((leader1 + 1) % servers)
583 | 		cfg.start1((leader1 + 2) % servers)
584 | 		cfg.connect((leader1 + 1) % servers)
585 | 		cfg.connect((leader1 + 2) % servers)
586 | 
587 | 		time.Sleep(RaftElectionTimeout)
588 | 
589 | 		cfg.start1((leader1 + 3) % servers)
590 | 		cfg.connect((leader1 + 3) % servers)
591 | 
592 | 		cfg.one(10+index, servers-2, true)
593 | 		index++
594 | 
595 | 		cfg.connect((leader1 + 4) % servers)
596 | 		cfg.connect((leader1 + 0) % servers)
597 | 	}
598 | 
599 | 	cfg.one(1000, servers, true)
600 | 
601 | 	cfg.end()
602 | }
603 | 
604 | func TestPersist32C(t *testing.T) {
605 | 	servers := 3
606 | 	cfg := make_config(t, servers, false)
607 | 	defer cfg.cleanup()
608 | 
609 | 	cfg.begin("Test (2C): partitioned leader and one follower crash, leader restarts")
610 | 
611 | 	cfg.one(101, 3, true)
612 | 
613 | 	leader := cfg.checkOneLeader()
614 | 	cfg.disconnect((leader + 2) % servers)
615 | 
616 | 	cfg.one(102, 2, true)
617 | 
618 | 	cfg.crash1((leader + 0) % servers)
619 | 	cfg.crash1((leader + 1) % servers)
620 | 	cfg.connect((leader + 2) % servers)
621 | 	cfg.start1((leader + 0) % servers)
622 | 	cfg.connect((leader + 0) % servers)
623 | 
624 | 	cfg.one(103, 2, true)
625 | 
626 | 	cfg.start1((leader + 1) % servers)
627 | 	cfg.connect((leader + 1) % servers)
628 | 
629 | 	cfg.one(104, servers, true)
630 | 
631 | 	cfg.end()
632 | }
633 | 
634 | //
635 | // Test the scenarios described in Figure 8 of the extended Raft paper. Each
636 | // iteration asks a leader, if there is one, to insert a command in the Raft
637 | // log.  If there is a leader, that leader will fail quickly with a high
638 | // probability (perhaps without committing the command), or crash after a while
639 | // with low probability (most likey committing the command).  If the number of
640 | // alive servers isn't enough to form a majority, perhaps start a new server.
641 | // The leader in a new term may try to finish replicating log entries that
642 | // haven't been committed yet.
643 | //
644 | func TestFigure82C(t *testing.T) {
645 | 	servers := 5
646 | 	cfg := make_config(t, servers, false)
647 | 	defer cfg.cleanup()
648 | 
649 | 	cfg.begin("Test (2C): Figure 8")
650 | 
651 | 	cfg.one(rand.Int(), 1, true)
652 | 
653 | 	nup := servers
654 | 	for iters := 0; iters < 1000; iters++ {
655 | 		leader := -1
656 | 		for i := 0; i < servers; i++ {
657 | 			if cfg.rafts[i] != nil {
658 | 				_, _, ok := cfg.rafts[i].Start(rand.Int())
659 | 				if ok {
660 | 					leader = i
661 | 				}
662 | 			}
663 | 		}
664 | 
665 | 		if (rand.Int() % 1000) < 100 {
666 | 			ms := rand.Int63() % (int64(RaftElectionTimeout/time.Millisecond) / 2)
667 | 			time.Sleep(time.Duration(ms) * time.Millisecond)
668 | 		} else {
669 | 			ms := (rand.Int63() % 13)
670 | 			time.Sleep(time.Duration(ms) * time.Millisecond)
671 | 		}
672 | 
673 | 		if leader != -1 {
674 | 			cfg.crash1(leader)
675 | 			nup -= 1
676 | 		}
677 | 
678 | 		if nup < 3 {
679 | 			s := rand.Int() % servers
680 | 			if cfg.rafts[s] == nil {
681 | 				cfg.start1(s)
682 | 				cfg.connect(s)
683 | 				nup += 1
684 | 			}
685 | 		}
686 | 	}
687 | 
688 | 	for i := 0; i < servers; i++ {
689 | 		if cfg.rafts[i] == nil {
690 | 			cfg.start1(i)
691 | 			cfg.connect(i)
692 | 		}
693 | 	}
694 | 
695 | 	cfg.one(rand.Int(), servers, true)
696 | 
697 | 	cfg.end()
698 | }
699 | 
700 | func TestUnreliableAgree2C(t *testing.T) {
701 | 	servers := 5
702 | 	cfg := make_config(t, servers, true)
703 | 	defer cfg.cleanup()
704 | 
705 | 	cfg.begin("Test (2C): unreliable agreement")
706 | 
707 | 	var wg sync.WaitGroup
708 | 
709 | 	for iters := 1; iters < 50; iters++ {
710 | 		for j := 0; j < 4; j++ {
711 | 			wg.Add(1)
712 | 			go func(iters, j int) {
713 | 				defer wg.Done()
714 | 				cfg.one((100*iters)+j, 1, true)
715 | 			}(iters, j)
716 | 		}
717 | 		cfg.one(iters, 1, true)
718 | 	}
719 | 
720 | 	cfg.setunreliable(false)
721 | 
722 | 	wg.Wait()
723 | 
724 | 	cfg.one(100, servers, true)
725 | 
726 | 	cfg.end()
727 | }
728 | 
729 | func TestFigure8Unreliable2C(t *testing.T) {
730 | 	servers := 5
731 | 	cfg := make_config(t, servers, true)
732 | 	defer cfg.cleanup()
733 | 
734 | 	cfg.begin("Test (2C): Figure 8 (unreliable)")
735 | 
736 | 	cfg.one(rand.Int()%10000, 1, true)
737 | 
738 | 	nup := servers
739 | 	for iters := 0; iters < 1000; iters++ {
740 | 		if iters == 200 {
741 | 			cfg.setlongreordering(true)
742 | 		}
743 | 		leader := -1
744 | 		for i := 0; i < servers; i++ {
745 | 			_, _, ok := cfg.rafts[i].Start(rand.Int() % 10000)
746 | 			if ok && cfg.connected[i] {
747 | 				leader = i
748 | 			}
749 | 		}
750 | 
751 | 		if (rand.Int() % 1000) < 100 {
752 | 			ms := rand.Int63() % (int64(RaftElectionTimeout/time.Millisecond) / 2)
753 | 			time.Sleep(time.Duration(ms) * time.Millisecond)
754 | 		} else {
755 | 			ms := (rand.Int63() % 13)
756 | 			time.Sleep(time.Duration(ms) * time.Millisecond)
757 | 		}
758 | 
759 | 		if leader != -1 && (rand.Int()%1000) < int(RaftElectionTimeout/time.Millisecond)/2 {
760 | 			cfg.disconnect(leader)
761 | 			nup -= 1
762 | 		}
763 | 
764 | 		if nup < 3 {
765 | 			s := rand.Int() % servers
766 | 			if cfg.connected[s] == false {
767 | 				cfg.connect(s)
768 | 				nup += 1
769 | 			}
770 | 		}
771 | 	}
772 | 
773 | 	for i := 0; i < servers; i++ {
774 | 		if cfg.connected[i] == false {
775 | 			cfg.connect(i)
776 | 		}
777 | 	}
778 | 
779 | 	cfg.one(rand.Int()%10000, servers, true)
780 | 
781 | 	cfg.end()
782 | }
783 | 
784 | func internalChurn(t *testing.T, unreliable bool) {
785 | 
786 | 	servers := 5
787 | 	cfg := make_config(t, servers, unreliable)
788 | 	defer cfg.cleanup()
789 | 
790 | 	if unreliable {
791 | 		cfg.begin("Test (2C): unreliable churn")
792 | 	} else {
793 | 		cfg.begin("Test (2C): churn")
794 | 	}
795 | 
796 | 	stop := int32(0)
797 | 
798 | 	// create concurrent clients
799 | 	cfn := func(me int, ch chan []int) {
800 | 		var ret []int
801 | 		ret = nil
802 | 		defer func() { ch <- ret }()
803 | 		values := []int{}
804 | 		for atomic.LoadInt32(&stop) == 0 {
805 | 			x := rand.Int()
806 | 			index := -1
807 | 			ok := false
808 | 			for i := 0; i < servers; i++ {
809 | 				// try them all, maybe one of them is a leader
810 | 				cfg.mu.Lock()
811 | 				rf := cfg.rafts[i]
812 | 				cfg.mu.Unlock()
813 | 				if rf != nil {
814 | 					index1, _, ok1 := rf.Start(x)
815 | 					if ok1 {
816 | 						ok = ok1
817 | 						index = index1
818 | 					}
819 | 				}
820 | 			}
821 | 			if ok {
822 | 				// maybe leader will commit our value, maybe not.
823 | 				// but don't wait forever.
824 | 				for _, to := range []int{10, 20, 50, 100, 200} {
825 | 					nd, cmd := cfg.nCommitted(index)
826 | 					if nd > 0 {
827 | 						if xx, ok := cmd.(int); ok {
828 | 							if xx == x {
829 | 								values = append(values, x)
830 | 							}
831 | 						} else {
832 | 							cfg.t.Fatalf("wrong command type")
833 | 						}
834 | 						break
835 | 					}
836 | 					time.Sleep(time.Duration(to) * time.Millisecond)
837 | 				}
838 | 			} else {
839 | 				time.Sleep(time.Duration(79+me*17) * time.Millisecond)
840 | 			}
841 | 		}
842 | 		ret = values
843 | 	}
844 | 
845 | 	ncli := 3
846 | 	cha := []chan []int{}
847 | 	for i := 0; i < ncli; i++ {
848 | 		cha = append(cha, make(chan []int))
849 | 		go cfn(i, cha[i])
850 | 	}
851 | 
852 | 	for iters := 0; iters < 20; iters++ {
853 | 		if (rand.Int() % 1000) < 200 {
854 | 			i := rand.Int() % servers
855 | 			cfg.disconnect(i)
856 | 		}
857 | 
858 | 		if (rand.Int() % 1000) < 500 {
859 | 			i := rand.Int() % servers
860 | 			if cfg.rafts[i] == nil {
861 | 				cfg.start1(i)
862 | 			}
863 | 			cfg.connect(i)
864 | 		}
865 | 
866 | 		if (rand.Int() % 1000) < 200 {
867 | 			i := rand.Int() % servers
868 | 			if cfg.rafts[i] != nil {
869 | 				cfg.crash1(i)
870 | 			}
871 | 		}
872 | 
873 | 		// Make crash/restart infrequent enough that the peers can often
874 | 		// keep up, but not so infrequent that everything has settled
875 | 		// down from one change to the next. Pick a value smaller than
876 | 		// the election timeout, but not hugely smaller.
877 | 		time.Sleep((RaftElectionTimeout * 7) / 10)
878 | 	}
879 | 
880 | 	time.Sleep(RaftElectionTimeout)
881 | 	cfg.setunreliable(false)
882 | 	for i := 0; i < servers; i++ {
883 | 		if cfg.rafts[i] == nil {
884 | 			cfg.start1(i)
885 | 		}
886 | 		cfg.connect(i)
887 | 	}
888 | 
889 | 	atomic.StoreInt32(&stop, 1)
890 | 
891 | 	values := []int{}
892 | 	for i := 0; i < ncli; i++ {
893 | 		vv := <-cha[i]
894 | 		if vv == nil {
895 | 			t.Fatal("client failed")
896 | 		}
897 | 		values = append(values, vv...)
898 | 	}
899 | 
900 | 	time.Sleep(RaftElectionTimeout)
901 | 
902 | 	lastIndex := cfg.one(rand.Int(), servers, true)
903 | 
904 | 	really := make([]int, lastIndex+1)
905 | 	for index := 1; index <= lastIndex; index++ {
906 | 		v := cfg.wait(index, servers, -1)
907 | 		if vi, ok := v.(int); ok {
908 | 			really = append(really, vi)
909 | 		} else {
910 | 			t.Fatalf("not an int")
911 | 		}
912 | 	}
913 | 
914 | 	for _, v1 := range values {
915 | 		ok := false
916 | 		for _, v2 := range really {
917 | 			if v1 == v2 {
918 | 				ok = true
919 | 			}
920 | 		}
921 | 		if ok == false {
922 | 			cfg.t.Fatalf("didn't find a value")
923 | 		}
924 | 	}
925 | 
926 | 	cfg.end()
927 | }
928 | 
929 | func TestReliableChurn2C(t *testing.T) {
930 | 	internalChurn(t, false)
931 | }
932 | 
933 | func TestUnreliableChurn2C(t *testing.T) {
934 | 	internalChurn(t, true)
935 | }
936 | 


--------------------------------------------------------------------------------
/raft/util.go:
--------------------------------------------------------------------------------
 1 | package raft
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"log"
 6 | )
 7 | 
 8 | // Debugging
 9 | const Debug = 0
10 | 
11 | func DPrintf(format string, a ...interface{}) (n int, err error) {
12 | 	if Debug > 0 {
13 | 		log.Printf(format, a...)
14 | 	}
15 | 	return
16 | }
17 | 
18 | type Entry struct {
19 | 	Index   int
20 | 	Term    int
21 | 	Command interface{}
22 | }
23 | 
24 | func (entry Entry) String() string {
25 | 	return fmt.Sprintf("{Index:%v,Term:%v}", entry.Index, entry.Term)
26 | }
27 | 
28 | // 解决内存泄漏辅助函数
29 | func shrinkEntriesArray(entries []LogEntry) []LogEntry {
30 | 	// We replace the array if we're using less than half of the space in it.
31 | 	const lenMultiple = 2
32 | 	if len(entries)*lenMultiple < cap(entries) {
33 | 		newEntries := make([]LogEntry, len(entries))
34 | 		copy(newEntries, entries)
35 | 		return newEntries
36 | 	}
37 | 	return entries
38 | }
39 | 


--------------------------------------------------------------------------------
/viewservice/client.go:
--------------------------------------------------------------------------------
 1 | package viewservice
 2 | 
 3 | import "net/rpc"
 4 | import "fmt"
 5 | 
 6 | //
 7 | // the viewservice Clerk lives in the client
 8 | // and maintains a little state.
 9 | //
10 | type Clerk struct {
11 | 	me     string // client's name (host:port)
12 | 	server string // viewservice's host:port
13 | }
14 | 
15 | func MakeClerk(me string, server string) *Clerk {
16 | 	ck := new(Clerk)
17 | 	ck.me = me
18 | 	ck.server = server
19 | 	return ck
20 | }
21 | 
22 | //
23 | // call() sends an RPC to the rpcname handler on server srv
24 | // with arguments args, waits for the reply, and leaves the
25 | // reply in reply. the reply argument should be a pointer
26 | // to a reply structure.
27 | //
28 | // the return value is true if the server responded, and false
29 | // if call() was not able to contact the server. in particular,
30 | // the reply's contents are only valid if call() returned true.
31 | //
32 | // you should assume that call() will return an
33 | // error after a while if the server is dead.
34 | // don't provide own time-out mechanism.
35 | //
36 | // please use call() to send all RPCs, in client.go and server.go.
37 | // please don't change this function.
38 | //
39 | func call(srv string, rpcname string,
40 | 	args interface{}, reply interface{}) bool {
41 | 	c, errx := rpc.Dial("unix", srv)
42 | 	if errx != nil {
43 | 		return false
44 | 	}
45 | 	defer c.Close()
46 | 
47 | 	err := c.Call(rpcname, args, reply)
48 | 	if err == nil {
49 | 		return true
50 | 	}
51 | 
52 | 	fmt.Println(err)
53 | 	return false
54 | }
55 | 
56 | func (ck *Clerk) Ping(viewnum uint) (View, error) {
57 | 	// prepare the arguments.
58 | 	args := &PingArgs{}
59 | 	args.Me = ck.me
60 | 	args.Viewnum = viewnum
61 | 	var reply PingReply
62 | 
63 | 	// send an RPC request, wait for the reply.
64 | 	ok := call(ck.server, "ViewServer.Ping", args, &reply)
65 | 	if ok == false {
66 | 		return View{}, fmt.Errorf("Ping(%v) failed", viewnum)
67 | 	}
68 | 
69 | 	return reply.View, nil
70 | }
71 | 
72 | func (ck *Clerk) Get() (View, bool) {
73 | 	args := &GetArgs{}
74 | 	var reply GetReply
75 | 	ok := call(ck.server, "ViewServer.Get", args, &reply)
76 | 	if ok == false {
77 | 		return View{}, false
78 | 	}
79 | 	return reply.View, true
80 | }
81 | 
82 | func (ck *Clerk) Primary() string {
83 | 	v, ok := ck.Get()
84 | 	if ok {
85 | 		return v.Primary
86 | 	}
87 | 	return ""
88 | }
89 | 


--------------------------------------------------------------------------------
/viewservice/common.go:
--------------------------------------------------------------------------------
 1 | package viewservice
 2 | 
 3 | import "time"
 4 | 
 5 | //
 6 | // This is a non-replicated view service for a simple
 7 | // primary/backup system.
 8 | //
 9 | // The view service goes through a sequence of numbered
10 | // views, each with a primary and (if possible) a backup.
11 | // A view consists of a view number and the host:port of
12 | // the view's primary and backup p/b servers.
13 | //
14 | // The primary in a view is always either the primary
15 | // or the backup of the previous view (in order to ensure
16 | // that the p/b service's state is preserved).
17 | //
18 | // Each p/b server should send a Ping RPC once per PingInterval.
19 | // The view server replies with a description of the current
20 | // view. The Pings let the view server know that the p/b
21 | // server is still alive; inform the p/b server of the current
22 | // view; and inform the view server of the most recent view
23 | // that the p/b server knows about.
24 | //
25 | // The view server proceeds to a new view when either it hasn't
26 | // received a ping from the primary or backup for a while, or
27 | // if there was no backup and a new server starts Pinging.
28 | //
29 | // The view server will not proceed to a new view until
30 | // the primary from the current view acknowledges
31 | // that it is operating in the current view. This helps
32 | // ensure that there's at most one p/b primary operating at
33 | // a time.
34 | //
35 | 
36 | type View struct {
37 | 	Viewnum uint
38 | 	Primary string
39 | 	Backup  string
40 | }
41 | 
42 | // clients should send a Ping RPC this often,
43 | // to tell the viewservice that the client is alive.
44 | const PingInterval = time.Millisecond * 100
45 | 
46 | // the viewserver will declare a client dead if it misses
47 | // this many Ping RPCs in a row.
48 | const DeadPings = 5
49 | 
50 | //
51 | // Ping(): called by a primary/backup server to tell the
52 | // view service it is alive, to indicate whether p/b server
53 | // has seen the latest view, and for p/b server to learn
54 | // the latest view.
55 | //
56 | // If Viewnum is zero, the caller is signalling that it is
57 | // alive and could become backup if needed.
58 | //
59 | 
60 | type PingArgs struct {
61 | 	Me      string // "host:port"
62 | 	Viewnum uint   // caller's notion of current view #
63 | }
64 | 
65 | type PingReply struct {
66 | 	View View
67 | }
68 | 
69 | //
70 | // Get(): fetch the current view, without volunteering
71 | // to be a server. mostly for clients of the p/b service,
72 | // and for testing.
73 | //
74 | 
75 | type GetArgs struct {
76 | }
77 | 
78 | type GetReply struct {
79 | 	View View
80 | }
81 | 


--------------------------------------------------------------------------------
/viewservice/server.go:
--------------------------------------------------------------------------------
  1 | package viewservice
  2 | 
  3 | import "net"
  4 | import "net/rpc"
  5 | import "log"
  6 | import "time"
  7 | import "sync"
  8 | import "fmt"
  9 | import "os"
 10 | import "sync/atomic"
 11 | 
 12 | type ViewServer struct {
 13 | 	mu       sync.Mutex
 14 | 	l        net.Listener
 15 | 	dead     int32 // for testing
 16 | 	rpccount int32 // for testing
 17 | 	me       string
 18 | 
 19 | 	// declarations here.
 20 | }
 21 | 
 22 | //
 23 | // server Ping RPC handler.
 24 | //
 25 | func (vs *ViewServer) Ping(args *PingArgs, reply *PingReply) error {
 26 | 
 27 | 	// code here.
 28 | 
 29 | 	return nil
 30 | }
 31 | 
 32 | //
 33 | // server Get() RPC handler.
 34 | //
 35 | func (vs *ViewServer) Get(args *GetArgs, reply *GetReply) error {
 36 | 
 37 | 	// code here.
 38 | 
 39 | 	return nil
 40 | }
 41 | 
 42 | //
 43 | // tick() is called once per PingInterval; it should notice
 44 | // if servers have died or recovered, and change the view
 45 | // accordingly.
 46 | //
 47 | func (vs *ViewServer) tick() {
 48 | 
 49 | 	// code here.
 50 | }
 51 | 
 52 | //
 53 | // tell the server to shut itself down.
 54 | // for testing.
 55 | // please don't change these two functions.
 56 | //
 57 | func (vs *ViewServer) Kill() {
 58 | 	atomic.StoreInt32(&vs.dead, 1)
 59 | 	vs.l.Close()
 60 | }
 61 | 
 62 | //
 63 | // has this server been asked to shut down?
 64 | //
 65 | func (vs *ViewServer) isdead() bool {
 66 | 	return atomic.LoadInt32(&vs.dead) != 0
 67 | }
 68 | 
 69 | // please don't change this function.
 70 | func (vs *ViewServer) GetRPCCount() int32 {
 71 | 	return atomic.LoadInt32(&vs.rpccount)
 72 | }
 73 | 
 74 | func StartServer(me string) *ViewServer {
 75 | 	vs := new(ViewServer)
 76 | 	vs.me = me
 77 | 	// vs.* initializations here.
 78 | 
 79 | 	// tell net/rpc about our RPC server and handlers.
 80 | 	rpcs := rpc.NewServer()
 81 | 	rpcs.Register(vs)
 82 | 
 83 | 	// prepare to receive connections from clients.
 84 | 	// change "unix" to "tcp" to use over a network.
 85 | 	os.Remove(vs.me) // only needed for "unix"
 86 | 	l, e := net.Listen("unix", vs.me)
 87 | 	if e != nil {
 88 | 		log.Fatal("listen error: ", e)
 89 | 	}
 90 | 	vs.l = l
 91 | 
 92 | 	// please don't change any of the following code,
 93 | 	// or do anything to subvert it.
 94 | 
 95 | 	// create a thread to accept RPC connections from clients.
 96 | 	go func() {
 97 | 		for vs.isdead() == false {
 98 | 			conn, err := vs.l.Accept()
 99 | 			if err == nil && vs.isdead() == false {
100 | 				atomic.AddInt32(&vs.rpccount, 1)
101 | 				go rpcs.ServeConn(conn)
102 | 			} else if err == nil {
103 | 				conn.Close()
104 | 			}
105 | 			if err != nil && vs.isdead() == false {
106 | 				fmt.Printf("ViewServer(%v) accept: %v\n", me, err.Error())
107 | 				vs.Kill()
108 | 			}
109 | 		}
110 | 	}()
111 | 
112 | 	// create a thread to call tick() periodically.
113 | 	go func() {
114 | 		for vs.isdead() == false {
115 | 			vs.tick()
116 | 			time.Sleep(PingInterval)
117 | 		}
118 | 	}()
119 | 
120 | 	return vs
121 | }
122 | 


--------------------------------------------------------------------------------
/viewservice/test_test.go:
--------------------------------------------------------------------------------
  1 | package viewservice
  2 | 
  3 | import "testing"
  4 | import "runtime"
  5 | import "time"
  6 | import "fmt"
  7 | import "os"
  8 | import "strconv"
  9 | 
 10 | func check(t *testing.T, ck *Clerk, p string, b string, n uint) {
 11 | 	view, _ := ck.Get()
 12 | 	if view.Primary != p {
 13 | 		t.Fatalf("wanted primary %v, got %v", p, view.Primary)
 14 | 	}
 15 | 	if view.Backup != b {
 16 | 		t.Fatalf("wanted backup %v, got %v", b, view.Backup)
 17 | 	}
 18 | 	if n != 0 && n != view.Viewnum {
 19 | 		t.Fatalf("wanted viewnum %v, got %v", n, view.Viewnum)
 20 | 	}
 21 | 	if ck.Primary() != p {
 22 | 		t.Fatalf("wanted primary %v, got %v", p, ck.Primary())
 23 | 	}
 24 | }
 25 | 
 26 | func port(suffix string) string {
 27 | 	s := "/var/tmp/824-"
 28 | 	s += strconv.Itoa(os.Getuid()) + "/"
 29 | 	os.Mkdir(s, 0777)
 30 | 	s += "viewserver-"
 31 | 	s += strconv.Itoa(os.Getpid()) + "-"
 32 | 	s += suffix
 33 | 	return s
 34 | }
 35 | 
 36 | func Test1(t *testing.T) {
 37 | 	runtime.GOMAXPROCS(4)
 38 | 
 39 | 	vshost := port("v")
 40 | 	vs := StartServer(vshost)
 41 | 
 42 | 	ck1 := MakeClerk(port("1"), vshost)
 43 | 	ck2 := MakeClerk(port("2"), vshost)
 44 | 	ck3 := MakeClerk(port("3"), vshost)
 45 | 
 46 | 	//
 47 | 
 48 | 	if ck1.Primary() != "" {
 49 | 		t.Fatalf("there was a primary too soon")
 50 | 	}
 51 | 
 52 | 	// very first primary
 53 | 	fmt.Printf("Test: First primary ...\n")
 54 | 
 55 | 	for i := 0; i < DeadPings*2; i++ {
 56 | 		view, _ := ck1.Ping(0)
 57 | 		if view.Primary == ck1.me {
 58 | 			break
 59 | 		}
 60 | 		time.Sleep(PingInterval)
 61 | 	}
 62 | 	check(t, ck1, ck1.me, "", 1)
 63 | 	fmt.Printf("  ... Passed\n")
 64 | 
 65 | 	// very first backup
 66 | 	fmt.Printf("Test: First backup ...\n")
 67 | 
 68 | 	{
 69 | 		vx, _ := ck1.Get()
 70 | 		for i := 0; i < DeadPings*2; i++ {
 71 | 			ck1.Ping(1)
 72 | 			view, _ := ck2.Ping(0)
 73 | 			if view.Backup == ck2.me {
 74 | 				break
 75 | 			}
 76 | 			time.Sleep(PingInterval)
 77 | 		}
 78 | 		check(t, ck1, ck1.me, ck2.me, vx.Viewnum+1)
 79 | 	}
 80 | 	fmt.Printf("  ... Passed\n")
 81 | 
 82 | 	// primary dies, backup should take over
 83 | 	fmt.Printf("Test: Backup takes over if primary fails ...\n")
 84 | 
 85 | 	{
 86 | 		ck1.Ping(2)
 87 | 		vx, _ := ck2.Ping(2)
 88 | 		for i := 0; i < DeadPings*2; i++ {
 89 | 			v, _ := ck2.Ping(vx.Viewnum)
 90 | 			if v.Primary == ck2.me && v.Backup == "" {
 91 | 				break
 92 | 			}
 93 | 			time.Sleep(PingInterval)
 94 | 		}
 95 | 		check(t, ck2, ck2.me, "", vx.Viewnum+1)
 96 | 	}
 97 | 	fmt.Printf("  ... Passed\n")
 98 | 
 99 | 	// revive ck1, should become backup
100 | 	fmt.Printf("Test: Restarted server becomes backup ...\n")
101 | 
102 | 	{
103 | 		vx, _ := ck2.Get()
104 | 		ck2.Ping(vx.Viewnum)
105 | 		for i := 0; i < DeadPings*2; i++ {
106 | 			ck1.Ping(0)
107 | 			v, _ := ck2.Ping(vx.Viewnum)
108 | 			if v.Primary == ck2.me && v.Backup == ck1.me {
109 | 				break
110 | 			}
111 | 			time.Sleep(PingInterval)
112 | 		}
113 | 		check(t, ck2, ck2.me, ck1.me, vx.Viewnum+1)
114 | 	}
115 | 	fmt.Printf("  ... Passed\n")
116 | 
117 | 	// start ck3, kill the primary (ck2), the previous backup (ck1)
118 | 	// should become the server, and ck3 the backup.
119 | 	// this should happen in a single view change, without
120 | 	// any period in which there's no backup.
121 | 	fmt.Printf("Test: Idle third server becomes backup if primary fails ...\n")
122 | 
123 | 	{
124 | 		vx, _ := ck2.Get()
125 | 		ck2.Ping(vx.Viewnum)
126 | 		for i := 0; i < DeadPings*2; i++ {
127 | 			ck3.Ping(0)
128 | 			v, _ := ck1.Ping(vx.Viewnum)
129 | 			if v.Primary == ck1.me && v.Backup == ck3.me {
130 | 				break
131 | 			}
132 | 			vx = v
133 | 			time.Sleep(PingInterval)
134 | 		}
135 | 		check(t, ck1, ck1.me, ck3.me, vx.Viewnum+1)
136 | 	}
137 | 	fmt.Printf("  ... Passed\n")
138 | 
139 | 	// kill and immediately restart the primary -- does viewservice
140 | 	// conclude primary is down even though it's pinging?
141 | 	fmt.Printf("Test: Restarted primary treated as dead ...\n")
142 | 
143 | 	{
144 | 		vx, _ := ck1.Get()
145 | 		ck1.Ping(vx.Viewnum)
146 | 		for i := 0; i < DeadPings*2; i++ {
147 | 			ck1.Ping(0)
148 | 			ck3.Ping(vx.Viewnum)
149 | 			v, _ := ck3.Get()
150 | 			if v.Primary != ck1.me {
151 | 				break
152 | 			}
153 | 			time.Sleep(PingInterval)
154 | 		}
155 | 		vy, _ := ck3.Get()
156 | 		if vy.Primary != ck3.me {
157 | 			t.Fatalf("expected primary=%v, got %v\n", ck3.me, vy.Primary)
158 | 		}
159 | 	}
160 | 	fmt.Printf("  ... Passed\n")
161 | 
162 | 	fmt.Printf("Test: Dead backup is removed from view ...\n")
163 | 
164 | 	// set up a view with just 3 as primary,
165 | 	// to prepare for the next test.
166 | 	{
167 | 		for i := 0; i < DeadPings*3; i++ {
168 | 			vx, _ := ck3.Get()
169 | 			ck3.Ping(vx.Viewnum)
170 | 			time.Sleep(PingInterval)
171 | 		}
172 | 		v, _ := ck3.Get()
173 | 		if v.Primary != ck3.me || v.Backup != "" {
174 | 			t.Fatalf("wrong primary or backup")
175 | 		}
176 | 	}
177 | 	fmt.Printf("  ... Passed\n")
178 | 
179 | 	// does viewserver wait for ack of previous view before
180 | 	// starting the next one?
181 | 	fmt.Printf("Test: Viewserver waits for primary to ack view ...\n")
182 | 
183 | 	{
184 | 		// set up p=ck3 b=ck1, but
185 | 		// but do not ack
186 | 		vx, _ := ck1.Get()
187 | 		for i := 0; i < DeadPings*3; i++ {
188 | 			ck1.Ping(0)
189 | 			ck3.Ping(vx.Viewnum)
190 | 			v, _ := ck1.Get()
191 | 			if v.Viewnum > vx.Viewnum {
192 | 				break
193 | 			}
194 | 			time.Sleep(PingInterval)
195 | 		}
196 | 		check(t, ck1, ck3.me, ck1.me, vx.Viewnum+1)
197 | 		vy, _ := ck1.Get()
198 | 		// ck3 is the primary, but it never acked.
199 | 		// let ck3 die. check that ck1 is not promoted.
200 | 		for i := 0; i < DeadPings*3; i++ {
201 | 			v, _ := ck1.Ping(vy.Viewnum)
202 | 			if v.Viewnum > vy.Viewnum {
203 | 				break
204 | 			}
205 | 			time.Sleep(PingInterval)
206 | 		}
207 | 		check(t, ck2, ck3.me, ck1.me, vy.Viewnum)
208 | 	}
209 | 	fmt.Printf("  ... Passed\n")
210 | 
211 | 	// if old servers die, check that a new (uninitialized) server
212 | 	// cannot take over.
213 | 	fmt.Printf("Test: Uninitialized server can't become primary ...\n")
214 | 
215 | 	{
216 | 		for i := 0; i < DeadPings*2; i++ {
217 | 			v, _ := ck1.Get()
218 | 			ck1.Ping(v.Viewnum)
219 | 			ck2.Ping(0)
220 | 			ck3.Ping(v.Viewnum)
221 | 			time.Sleep(PingInterval)
222 | 		}
223 | 		for i := 0; i < DeadPings*2; i++ {
224 | 			ck2.Ping(0)
225 | 			time.Sleep(PingInterval)
226 | 		}
227 | 		vz, _ := ck2.Get()
228 | 		if vz.Primary == ck2.me {
229 | 			t.Fatalf("uninitialized backup promoted to primary")
230 | 		}
231 | 	}
232 | 	fmt.Printf("  ... Passed\n")
233 | 
234 | 	vs.Kill()
235 | }
236 | 


--------------------------------------------------------------------------------