├── .gitignore ├── env.sh ├── README.md ├── src ├── paxos │ ├── common.go │ ├── paxos.go │ └── test_test.go └── kvpaxos │ ├── common.go │ ├── client.go │ ├── server.go │ └── test_test.go └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | -------------------------------------------------------------------------------- /env.sh: -------------------------------------------------------------------------------- 1 | export GOPATH=${GOPATH}:`pwd` 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 从[MIT 6.824 lab3](http://nil.csail.mit.edu/6.824/2015/labs/lab-3.html) 中抠出来的模拟Paxos算法以及应用的例子. 2 | 3 | src下的paxos目录是Paxos算法及其测试用例,kvpaxos目录是基于这个Paxos库实现的一个KV存储系统. 4 | 5 | 可以分别进入对应的目录中执行 6 | 7 | ``` 8 | go test 9 | ``` 10 | 11 | 来跑测试用例,但是在执行kvpaxos目录的用例之前需要首先设置Go的执行环境,将Paxos目录加入环境中,具体可类似env.sh脚本. 12 | 13 | 这个实验的完整描述参见最开始开始的MIT课程实验链接. -------------------------------------------------------------------------------- /src/paxos/common.go: -------------------------------------------------------------------------------- 1 | package paxos 2 | 3 | const ( 4 | OK = "OK" 5 | Reject = "Reject" 6 | ) 7 | 8 | type PrepareArgs struct { 9 | Seq int 10 | PNum string 11 | } 12 | 13 | type PrepareReply struct { 14 | Err string 15 | AcceptPnum string 16 | AcceptValue interface {} 17 | } 18 | 19 | type AcceptArgs struct { 20 | Seq int 21 | PNum string 22 | Value interface {} 23 | } 24 | 25 | type AcceptReply struct { 26 | Err string 27 | } 28 | 29 | type DecideArgs struct { 30 | Seq int 31 | Value interface {} 32 | PNum string 33 | Me int 34 | Done int 35 | } 36 | 37 | type DecideReply struct { 38 | 39 | } -------------------------------------------------------------------------------- /src/kvpaxos/common.go: -------------------------------------------------------------------------------- 1 | package kvpaxos 2 | 3 | const ( 4 | OK = "OK" 5 | ErrNoKey = "ErrNoKey" 6 | ) 7 | 8 | const ( 9 | PUT = "Put" 10 | APPEND = "Append" 11 | GET = "Get" 12 | ) 13 | type Err string 14 | 15 | // Put or Append 16 | type PutAppendArgs struct { 17 | // You'll have to add definitions here. 18 | Key string 19 | Value string 20 | Op string // "Put" or "Append" 21 | // You'll have to add definitions here. 22 | // Field names must start with capital letters, 23 | // otherwise RPC will break. 24 | Seq int64 25 | } 26 | 27 | type PutAppendReply struct { 28 | Err Err 29 | } 30 | 31 | type GetArgs struct { 32 | Key string 33 | // You'll have to add definitions here. 34 | Seq int64 35 | } 36 | 37 | type GetReply struct { 38 | Err Err 39 | Value string 40 | } 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 lichuang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/kvpaxos/client.go: -------------------------------------------------------------------------------- 1 | package kvpaxos 2 | 3 | import "net/rpc" 4 | import "crypto/rand" 5 | import "math/big" 6 | 7 | import "fmt" 8 | import "time" 9 | 10 | type Clerk struct { 11 | servers []string 12 | // You will have to modify this struct. 13 | } 14 | 15 | func nrand() int64 { 16 | max := big.NewInt(int64(1) << 62) 17 | bigx, _ := rand.Int(rand.Reader, max) 18 | x := bigx.Int64() 19 | return x 20 | } 21 | 22 | func MakeClerk(servers []string) *Clerk { 23 | ck := new(Clerk) 24 | ck.servers = servers 25 | // You'll have to add code here. 26 | return ck 27 | } 28 | 29 | // 30 | // call() sends an RPC to the rpcname handler on server srv 31 | // with arguments args, waits for the reply, and leaves the 32 | // reply in reply. the reply argument should be a pointer 33 | // to a reply structure. 34 | // 35 | // the return value is true if the server responded, and false 36 | // if call() was not able to contact the server. in particular, 37 | // the reply's contents are only valid if call() returned true. 38 | // 39 | // you should assume that call() will return an 40 | // error after a while if the server is dead. 41 | // don't provide your own time-out mechanism. 42 | // 43 | // please use call() to send all RPCs, in client.go and server.go. 44 | // please don't change this function. 45 | // 46 | func call(srv string, rpcname string, 47 | args interface{}, reply interface{}) bool { 48 | c, errx := rpc.Dial("unix", srv) 49 | if errx != nil { 50 | return false 51 | } 52 | defer c.Close() 53 | 54 | err := c.Call(rpcname, args, reply) 55 | if err == nil { 56 | return true 57 | } 58 | 59 | fmt.Println(err) 60 | return false 61 | } 62 | 63 | // 64 | // fetch the current value for a key. 65 | // returns "" if the key does not exist. 66 | // keeps trying forever in the face of all other errors. 67 | // 68 | func (ck *Clerk) Get(key string) string { 69 | // You will have to modify this function. 70 | args := &GetArgs{} 71 | args.Key = key 72 | args.Seq = nrand() 73 | 74 | var reply GetReply 75 | index := 0 76 | for { 77 | DPrintf("client %d get %s\n", index, key) 78 | ok := call(ck.servers[index], "KVPaxos.Get", args, &reply) 79 | if ok { 80 | return reply.Value 81 | } 82 | time.Sleep(time.Second * 2) 83 | index = (index + 1) % len(ck.servers) 84 | } 85 | 86 | return reply.Value 87 | } 88 | 89 | // 90 | // shared by Put and Append. 91 | // 92 | func (ck *Clerk) PutAppend(key string, value string, op string) { 93 | // You will have to modify this function. 94 | args := PutAppendArgs{} 95 | args.Key = key 96 | args.Seq = nrand() 97 | args.Value = value 98 | args.Op = op 99 | 100 | var reply PutAppendReply 101 | index := 0 102 | for { 103 | DPrintf("client %d %s %s:%s\n", index, op, key, value) 104 | ok := call(ck.servers[index], "KVPaxos.PutAppend", args, &reply) 105 | if ok { 106 | break 107 | } 108 | time.Sleep(time.Second * 2) 109 | index = (index + 1) % len(ck.servers) 110 | } 111 | } 112 | 113 | func (ck *Clerk) Put(key string, value string) { 114 | ck.PutAppend(key, value, PUT) 115 | } 116 | func (ck *Clerk) Append(key string, value string) { 117 | ck.PutAppend(key, value, APPEND) 118 | } 119 | -------------------------------------------------------------------------------- /src/kvpaxos/server.go: -------------------------------------------------------------------------------- 1 | package kvpaxos 2 | 3 | import "net" 4 | import "fmt" 5 | import "net/rpc" 6 | import "log" 7 | import "paxos" 8 | import "sync" 9 | import "sync/atomic" 10 | import "os" 11 | import "syscall" 12 | import "encoding/gob" 13 | import "math/rand" 14 | import ( 15 | "time" 16 | ) 17 | 18 | 19 | const Debug = 0 20 | 21 | func DPrintf(format string, a ...interface{}) (n int, err error) { 22 | if Debug > 0 { 23 | log.Printf(format, a...) 24 | } 25 | return 26 | } 27 | 28 | 29 | type Op struct { 30 | // Your definitions here. 31 | // Field names must start with capital letters, 32 | // otherwise RPC will break. 33 | Op string 34 | ClientSeq int64 35 | Key string 36 | Value string 37 | } 38 | 39 | type KVPaxos struct { 40 | mu sync.Mutex 41 | l net.Listener 42 | me int 43 | dead int32 // for testing 44 | unreliable int32 // for testing 45 | px *paxos.Paxos 46 | 47 | // Your definitions here. 48 | kv map[string]string 49 | seq map[int64]bool 50 | logs [] Op 51 | lastSeq int 52 | servers [] string 53 | } 54 | 55 | // helper functions 56 | func (kv *KVPaxos) wait(seq int) Op { 57 | to := 10 * time.Millisecond 58 | for { 59 | status, v := kv.px.Status(seq) 60 | DPrintf("%d seq %d status: %d:%d\n", kv.me, seq, status, paxos.Decided) 61 | if status == paxos.Decided { 62 | kvlog, _ := v.(Op) 63 | return kvlog 64 | } 65 | time.Sleep(to) 66 | if to < 10 * time.Second { 67 | to *= 2 68 | } 69 | } 70 | } 71 | 72 | func (kv *KVPaxos)Apply(v Op) { 73 | // append logs 74 | if (v.Op != GET) { 75 | kv.logs = append(kv.logs, v) 76 | 77 | // modify k-v 78 | if (v.Op == PUT) { 79 | kv.kv[v.Key] = v.Value 80 | } else if v.Op == APPEND { 81 | old, err := kv.kv[v.Key] 82 | if (err) { 83 | kv.kv[v.Key] = old + v.Value 84 | } else { 85 | kv.kv[v.Key] = v.Value 86 | } 87 | } 88 | } 89 | // add client seq map 90 | kv.seq[v.ClientSeq] = true 91 | 92 | kv.px.Done(kv.lastSeq) 93 | kv.lastSeq += 1 94 | } 95 | 96 | func (kv *KVPaxos) ProcessOperation(v Op) { 97 | ok := false 98 | var log Op 99 | for !ok { 100 | 101 | seq := kv.lastSeq 102 | 103 | status, val := kv.px.Status(kv.lastSeq) 104 | 105 | // sync all decided data 106 | if status == paxos.Decided { 107 | log = val.(Op) 108 | } else { 109 | // now can sync our data 110 | kv.px.Start(seq, v) 111 | log = kv.wait(seq) 112 | } 113 | 114 | ok = v.ClientSeq == log.ClientSeq 115 | kv.Apply(log) 116 | } 117 | } 118 | 119 | func (kv *KVPaxos) Get(args *GetArgs, reply *GetReply) error { 120 | // Your code here. 121 | kv.mu.Lock() 122 | defer kv.mu.Unlock() 123 | 124 | var v Op = Op{Op:GET, Key:args.Key, ClientSeq:args.Seq} 125 | kv.ProcessOperation(v) 126 | 127 | val, ok := kv.kv[v.Key] 128 | if !ok { 129 | reply.Err = ErrNoKey 130 | } else { 131 | reply.Err = OK 132 | reply.Value = val 133 | } 134 | 135 | return nil 136 | } 137 | 138 | func (kv *KVPaxos) PutAppend(args *PutAppendArgs, reply *PutAppendReply) error { 139 | // Your code here. 140 | kv.mu.Lock() 141 | defer kv.mu.Unlock() 142 | 143 | _, exist := kv.seq[args.Seq] 144 | if (exist) { 145 | reply.Err = OK 146 | return nil 147 | } 148 | 149 | appendOp := Op{Op:args.Op, ClientSeq:args.Seq, Key:args.Key, Value:args.Value} 150 | kv.ProcessOperation(appendOp) 151 | 152 | DPrintf("%d %s %s:%s done\n", kv.me, args.Op, args.Key, args.Value) 153 | //kv.px.Done(kv.lastSeq) 154 | 155 | return nil 156 | } 157 | 158 | // tell the server to shut itself down. 159 | // please do not change these two functions. 160 | func (kv *KVPaxos) kill() { 161 | DPrintf("Kill(%d): die\n", kv.me) 162 | atomic.StoreInt32(&kv.dead, 1) 163 | kv.l.Close() 164 | kv.px.Kill() 165 | } 166 | 167 | // call this to find out if the server is dead. 168 | func (kv *KVPaxos) isdead() bool { 169 | return atomic.LoadInt32(&kv.dead) != 0 170 | } 171 | 172 | // please do not change these two functions. 173 | func (kv *KVPaxos) setunreliable(what bool) { 174 | if what { 175 | atomic.StoreInt32(&kv.unreliable, 1) 176 | } else { 177 | atomic.StoreInt32(&kv.unreliable, 0) 178 | } 179 | } 180 | 181 | func (kv *KVPaxos) isunreliable() bool { 182 | return atomic.LoadInt32(&kv.unreliable) != 0 183 | } 184 | 185 | // 186 | // servers[] contains the ports of the set of 187 | // servers that will cooperate via Paxos to 188 | // form the fault-tolerant key/value service. 189 | // me is the index of the current server in servers[]. 190 | // 191 | func StartServer(servers []string, me int) *KVPaxos { 192 | // call gob.Register on structures you want 193 | // Go's RPC library to marshall/unmarshall. 194 | gob.Register(Op{}) 195 | 196 | kv := new(KVPaxos) 197 | kv.me = me 198 | 199 | // Your initialization code here. 200 | kv.kv = make(map[string] string) 201 | kv.seq = make(map[int64]bool) 202 | kv.logs = [] Op{} 203 | kv.lastSeq = 1 204 | kv.servers = servers 205 | 206 | rpcs := rpc.NewServer() 207 | rpcs.Register(kv) 208 | 209 | kv.px = paxos.Make(servers, me, rpcs) 210 | 211 | os.Remove(servers[me]) 212 | l, e := net.Listen("unix", servers[me]) 213 | if e != nil { 214 | log.Fatal("listen error: ", e) 215 | } 216 | kv.l = l 217 | 218 | 219 | // please do not change any of the following code, 220 | // or do anything to subvert it. 221 | 222 | go func() { 223 | for kv.isdead() == false { 224 | conn, err := kv.l.Accept() 225 | if err == nil && kv.isdead() == false { 226 | if kv.isunreliable() && (rand.Int63()%1000) < 100 { 227 | // discard the request. 228 | conn.Close() 229 | } else if kv.isunreliable() && (rand.Int63()%1000) < 200 { 230 | // process the request but force discard of reply. 231 | c1 := conn.(*net.UnixConn) 232 | f, _ := c1.File() 233 | err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR) 234 | if err != nil { 235 | fmt.Printf("shutdown: %v\n", err) 236 | } 237 | go rpcs.ServeConn(conn) 238 | } else { 239 | go rpcs.ServeConn(conn) 240 | } 241 | } else if err == nil { 242 | conn.Close() 243 | } 244 | if err != nil && kv.isdead() == false { 245 | fmt.Printf("KVPaxos(%v) accept: %v\n", me, err.Error()) 246 | kv.kill() 247 | } 248 | } 249 | }() 250 | 251 | return kv 252 | } 253 | -------------------------------------------------------------------------------- /src/paxos/paxos.go: -------------------------------------------------------------------------------- 1 | package paxos 2 | 3 | // 4 | // Paxos library, to be included in an application. 5 | // Multiple applications will run, each including 6 | // a Paxos peer. 7 | // 8 | // Manages a sequence of agreed-on values. 9 | // The set of peers is fixed. 10 | // Copes with network failures (partition, msg loss, &c). 11 | // Does not store anything persistently, so cannot handle crash+restart. 12 | // 13 | // The application interface: 14 | // 15 | // px = paxos.Make(peers []string, me string) 16 | // px.Start(seq int, v interface{}) -- start agreement on new instance 17 | // px.Status(seq int) (Fate, v interface{}) -- get info about an instance 18 | // px.Done(seq int) -- ok to forget all instances <= seq 19 | // px.Max() int -- highest instance seq known, or -1 20 | // px.Min() int -- instances before this seq have been forgotten 21 | // 22 | 23 | import "net" 24 | import "net/rpc" 25 | import "log" 26 | 27 | import "os" 28 | import "syscall" 29 | import "sync" 30 | import "sync/atomic" 31 | import "fmt" 32 | import ( 33 | "math/rand" 34 | "strconv" 35 | "time" 36 | ) 37 | 38 | // px.Status() return values, indicating 39 | // whether an agreement has been decided, 40 | // or Paxos has not yet reached agreement, 41 | // or it was agreed but forgotten (i.e. < Min()). 42 | type Fate int 43 | 44 | const ( 45 | Decided Fate = iota + 1 46 | Pending // not yet decided. 47 | Forgotten // decided but forgotten. 48 | ) 49 | 50 | const ( 51 | PrintDebug = false 52 | ) 53 | 54 | type instance struct { 55 | state Fate // instance state 56 | n_p string // propose num 57 | n_a string // accept num 58 | v_a interface{} // accept value 59 | } 60 | 61 | type Paxos struct { 62 | mu sync.Mutex 63 | l net.Listener 64 | dead int32 // for testing 65 | unreliable int32 // for testing 66 | rpcCount int32 // for testing 67 | peers []string 68 | me int // index into peers[] 69 | 70 | // Your data here. 71 | dones []int 72 | instances map[int]*instance 73 | } 74 | 75 | // 76 | // call() sends an RPC to the rpcname handler on server srv 77 | // with arguments args, waits for the reply, and leaves the 78 | // reply in reply. the reply argument should be a pointer 79 | // to a reply structure. 80 | // 81 | // the return value is true if the server responded, and false 82 | // if call() was not able to contact the server. in particular, 83 | // the replys contents are only valid if call() returned true. 84 | // 85 | // you should assume that call() will time out and return an 86 | // error after a while if it does not get a reply from the server. 87 | // 88 | // please use call() to send all RPCs, in client.go and server.go. 89 | // please do not change this function. 90 | // 91 | func call(srv string, name string, args interface{}, reply interface{}) bool { 92 | c, err := rpc.Dial("unix", srv) 93 | if err != nil { 94 | err1 := err.(*net.OpError) 95 | if err1.Err != syscall.ENOENT && err1.Err != syscall.ECONNREFUSED { 96 | //fmt.Printf("paxos Dial() failed: %v\n", err1) 97 | } 98 | return false 99 | } 100 | defer c.Close() 101 | 102 | err = c.Call(name, args, reply) 103 | if err == nil { 104 | return true 105 | } 106 | 107 | //fmt.Println(err) 108 | return false 109 | } 110 | 111 | // RPC handler 112 | func (px *Paxos) Prepare(args *PrepareArgs, reply *PrepareReply) error { 113 | px.mu.Lock() 114 | defer px.mu.Unlock() 115 | 116 | instance, exist := px.instances[args.Seq] 117 | if !exist { 118 | // if not exist,reply OK 119 | px.instances[args.Seq] = px.newInstance() 120 | instance, _ = px.instances[args.Seq] 121 | reply.Err = OK 122 | } else { 123 | // if exist, check pnum 124 | if args.PNum > instance.n_p { 125 | reply.Err = OK 126 | } else { 127 | reply.Err = Reject 128 | } 129 | } 130 | 131 | if reply.Err == OK { 132 | if PrintDebug { 133 | fmt.Printf("%s:%d accept prepare\n", px.peers[px.me], args.Seq) 134 | } 135 | // if OK, reply accept num and value 136 | reply.AcceptPnum = instance.n_a 137 | reply.AcceptValue = instance.v_a 138 | 139 | // update proposer number 140 | px.instances[args.Seq].n_p = args.PNum 141 | } else { 142 | if PrintDebug { 143 | fmt.Printf("%s:%d reject prepare\n", px.peers[px.me], args.Seq) 144 | } 145 | } 146 | 147 | return nil 148 | } 149 | 150 | func (px *Paxos) Accept(args *AcceptArgs, reply *AcceptReply) error { 151 | px.mu.Lock() 152 | defer px.mu.Unlock() 153 | 154 | instance, exist := px.instances[args.Seq] 155 | if !exist { 156 | // if not exist,reply OK 157 | px.instances[args.Seq] = px.newInstance() 158 | reply.Err = OK 159 | } else { 160 | // if exist, check pnum 161 | if args.PNum >= instance.n_p { 162 | reply.Err = OK 163 | } else { 164 | reply.Err = Reject 165 | } 166 | } 167 | 168 | if reply.Err == OK { 169 | if PrintDebug { 170 | fmt.Printf("%s:%d accept accept %v\n", px.peers[px.me], args.Seq, args.Value) 171 | } 172 | // update proposer number,accept num and value 173 | px.instances[args.Seq].n_a = args.PNum 174 | px.instances[args.Seq].n_p = args.PNum 175 | px.instances[args.Seq].v_a = args.Value 176 | } else { 177 | if PrintDebug { 178 | fmt.Printf("%s:%d reject accept %v\n", px.peers[px.me], args.Seq, args.Value) 179 | } 180 | } 181 | 182 | return nil 183 | } 184 | 185 | func (px *Paxos) Decide(args *DecideArgs, reply *DecideReply) error { 186 | px.mu.Lock() 187 | defer px.mu.Unlock() 188 | 189 | if PrintDebug { 190 | fmt.Printf("%s decide %d:%v\n", px.peers[px.me], args.Seq, args.Value) 191 | } 192 | _, exist := px.instances[args.Seq] 193 | if !exist { 194 | px.instances[args.Seq] = px.newInstance() 195 | } 196 | 197 | // update proposer number,accept num and value,state 198 | px.instances[args.Seq].v_a = args.Value 199 | px.instances[args.Seq].n_a = args.PNum 200 | px.instances[args.Seq].n_p = args.PNum 201 | px.instances[args.Seq].state = Decided 202 | // update the server done array 203 | px.dones[args.Me] = args.Done 204 | return nil 205 | } 206 | 207 | // helper functions 208 | func (px *Paxos) newInstance() *instance { 209 | return &instance{n_a: "", n_p: "", v_a: nil, state: Pending} 210 | } 211 | 212 | func (px *Paxos) majority() int { 213 | return len(px.peers)/2 + 1 214 | } 215 | 216 | // generate a proposer num 217 | func (px *Paxos) generatePNum() string { 218 | begin := time.Date(2015, time.May, 6, 22, 0, 0, 0, time.UTC) 219 | duration := time.Now().Sub(begin) 220 | return strconv.FormatInt(duration.Nanoseconds(), 10) + "-" + strconv.Itoa(px.me) 221 | } 222 | 223 | func (px *Paxos) sendPrepare(seq int, v interface{}) (bool, string, interface{}) { 224 | pnum := px.generatePNum() 225 | 226 | if PrintDebug { 227 | fmt.Printf("%s send prepare %d:%v\n", px.peers[px.me], seq, v) 228 | } 229 | 230 | arg := PrepareArgs{Seq: seq, PNum: pnum} 231 | num := 0 232 | replyPnum := "" 233 | // first set replyValue as v 234 | replyValue := v 235 | for i, peer := range px.peers { 236 | var reply = PrepareReply{AcceptValue: nil, AcceptPnum: "", Err: Reject} 237 | if i == px.me { 238 | // if the same server, just call prepare function directly 239 | px.Prepare(&arg, &reply) 240 | } else { 241 | call(peer, "Paxos.Prepare", &arg, &reply) 242 | } 243 | 244 | if reply.Err == OK { 245 | num += 1 246 | // update accept num and value if replay accept num > 247 | if reply.AcceptPnum > replyPnum { 248 | replyPnum = reply.AcceptPnum 249 | replyValue = reply.AcceptValue 250 | } 251 | } 252 | } 253 | 254 | // return reply value 255 | // why return pnum not replyPnum?cause use pnum to propose replyValue 256 | return num >= px.majority(), pnum, replyValue 257 | } 258 | 259 | func (px *Paxos) sendAccept(seq int, pnum string, v interface{}) bool { 260 | arg := AcceptArgs{Seq: seq, PNum: pnum, Value: v} 261 | num := 0 262 | 263 | if PrintDebug { 264 | fmt.Printf("%s send accept %d:%v\n", px.peers[px.me], seq, v) 265 | } 266 | for i, peer := range px.peers { 267 | var reply AcceptReply 268 | if i == px.me { 269 | // if the same server, just call accept function directly 270 | px.Accept(&arg, &reply) 271 | } else { 272 | call(peer, "Paxos.Accept", &arg, &reply) 273 | } 274 | 275 | if reply.Err == OK { 276 | num += 1 277 | } 278 | } 279 | 280 | // return if qurom accept 281 | return num >= px.majority() 282 | } 283 | 284 | func (px *Paxos) sendDecide(seq int, pnum string, v interface{}) { 285 | // first update seq instance 286 | px.mu.Lock() 287 | px.instances[seq].state = Decided 288 | px.instances[seq].n_a = pnum 289 | px.instances[seq].n_p = pnum 290 | px.instances[seq].v_a = v 291 | px.mu.Unlock() 292 | 293 | if PrintDebug { 294 | fmt.Printf("%s send decide %d:%v\n", px.peers[px.me], seq, v) 295 | } 296 | 297 | arg := DecideArgs{Seq: seq, PNum: pnum, Value: v, Me: px.me, Done: px.dones[px.me]} 298 | for i, peer := range px.peers { 299 | if i == px.me { 300 | // if the same server, just continue 301 | continue 302 | } 303 | var reply DecideReply 304 | call(peer, "Paxos.Decide", &arg, &reply) 305 | } 306 | } 307 | 308 | func (px *Paxos) proposer(seq int, v interface{}) { 309 | for { 310 | ok, pnum, value := px.sendPrepare(seq, v) 311 | if ok { 312 | ok = px.sendAccept(seq, pnum, value) 313 | } 314 | if ok { 315 | px.sendDecide(seq, pnum, value) 316 | break 317 | } 318 | 319 | state, _ := px.Status(seq) 320 | if state == Decided { 321 | break 322 | } 323 | } 324 | } 325 | 326 | // 327 | // the application wants paxos to start agreement on 328 | // instance seq, with proposed value v. 329 | // Start() returns right away; the application will 330 | // call Status() to find out if/when agreement 331 | // is reached. 332 | // 333 | func (px *Paxos) Start(seq int, v interface{}) { 334 | go func() { 335 | // if seq < min,just return 336 | if seq < px.Min() { 337 | return 338 | } 339 | px.proposer(seq, v) 340 | }() 341 | } 342 | 343 | // 344 | // the application on this machine is done with 345 | // all instances <= seq. 346 | // 347 | // see the comments for Min() for more explanation. 348 | // 349 | func (px *Paxos) Done(seq int) { 350 | // Your code here. 351 | px.mu.Lock() 352 | defer px.mu.Unlock() 353 | 354 | if seq > px.dones[px.me] { 355 | px.dones[px.me] = seq 356 | } 357 | } 358 | 359 | // 360 | // the application wants to know the 361 | // highest instance sequence known to 362 | // this peer. 363 | // 364 | func (px *Paxos) Max() int { 365 | // Your code here. 366 | px.mu.Lock() 367 | defer px.mu.Unlock() 368 | 369 | max := 0 370 | for k, _ := range px.instances { 371 | if k > max { 372 | max = k 373 | } 374 | } 375 | 376 | return max 377 | } 378 | 379 | // 380 | // Min() should return one more than the minimum among z_i, 381 | // where z_i is the highest number ever passed 382 | // to Done() on peer i. A peers z_i is -1 if it has 383 | // never called Done(). 384 | // 385 | // Paxos is required to have forgotten all information 386 | // about any instances it knows that are < Min(). 387 | // The point is to free up memory in long-running 388 | // Paxos-based servers. 389 | // 390 | // Paxos peers need to exchange their highest Done() 391 | // arguments in order to implement Min(). These 392 | // exchanges can be piggybacked on ordinary Paxos 393 | // agreement protocol messages, so it is OK if one 394 | // peers Min does not reflect another Peers Done() 395 | // until after the next instance is agreed to. 396 | // 397 | // The fact that Min() is defined as a minimum over 398 | // *all* Paxos peers means that Min() cannot increase until 399 | // all peers have been heard from. So if a peer is dead 400 | // or unreachable, other peers Min()s will not increase 401 | // even if all reachable peers call Done. The reason for 402 | // this is that when the unreachable peer comes back to 403 | // life, it will need to catch up on instances that it 404 | // missed -- the other peers therefor cannot forget these 405 | // instances. 406 | // 407 | func (px *Paxos) Min() int { 408 | // You code here. 409 | 410 | px.mu.Lock() 411 | defer px.mu.Unlock() 412 | 413 | // iterator servers, get min seq 414 | min := px.dones[px.me] 415 | for i := range px.dones { 416 | if px.dones[i] < min { 417 | min = px.dones[i] 418 | } 419 | } 420 | 421 | // delete all instance smaller than min 422 | for k, instance := range px.instances { 423 | if k > min { 424 | continue 425 | } 426 | if instance.state != Decided { 427 | continue 428 | } 429 | 430 | delete(px.instances, k) 431 | } 432 | 433 | //fmt.Printf("min: %d\n", min) 434 | return min + 1 435 | } 436 | 437 | // 438 | // the application wants to know whether this 439 | // peer thinks an instance has been decided, 440 | // and if so what the agreed value is. Status() 441 | // should just inspect the local peer state; 442 | // it should not contact other Paxos peers. 443 | // 444 | func (px *Paxos) Status(seq int) (Fate, interface{}) { 445 | // Your code here. 446 | if seq < px.Min() { 447 | return Forgotten, nil 448 | } 449 | 450 | px.mu.Lock() 451 | defer px.mu.Unlock() 452 | 453 | instance, exist := px.instances[seq] 454 | if !exist { 455 | return Pending, nil 456 | } 457 | 458 | return instance.state, instance.v_a 459 | } 460 | 461 | // 462 | // tell the peer to shut itself down. 463 | // for testing. 464 | // please do not change these two functions. 465 | // 466 | func (px *Paxos) Kill() { 467 | atomic.StoreInt32(&px.dead, 1) 468 | if px.l != nil { 469 | px.l.Close() 470 | } 471 | } 472 | 473 | // 474 | // has this peer been asked to shut down? 475 | // 476 | func (px *Paxos) isdead() bool { 477 | return atomic.LoadInt32(&px.dead) != 0 478 | } 479 | 480 | // please do not change these two functions. 481 | func (px *Paxos) setunreliable(what bool) { 482 | if what { 483 | atomic.StoreInt32(&px.unreliable, 1) 484 | } else { 485 | atomic.StoreInt32(&px.unreliable, 0) 486 | } 487 | } 488 | 489 | func (px *Paxos) isunreliable() bool { 490 | return atomic.LoadInt32(&px.unreliable) != 0 491 | } 492 | 493 | // 494 | // the application wants to create a paxos peer. 495 | // the ports of all the paxos peers (including this one) 496 | // are in peers[]. this servers port is peers[me]. 497 | // 498 | func Make(peers []string, me int, rpcs *rpc.Server) *Paxos { 499 | px := &Paxos{} 500 | px.peers = peers 501 | px.me = me 502 | 503 | // Your initialization code here. 504 | px.instances = map[int]*instance{} 505 | px.dones = make([]int, len(px.peers)) 506 | for i := range px.peers { 507 | px.dones[i] = -1 508 | } 509 | 510 | if rpcs != nil { 511 | // caller will create socket &c 512 | rpcs.Register(px) 513 | } else { 514 | rpcs = rpc.NewServer() 515 | rpcs.Register(px) 516 | 517 | // prepare to receive connections from clients. 518 | // change "unix" to "tcp" to use over a network. 519 | os.Remove(peers[me]) // only needed for "unix" 520 | l, e := net.Listen("unix", peers[me]) 521 | if e != nil { 522 | log.Fatal("listen error: ", e) 523 | } 524 | px.l = l 525 | 526 | // please do not change any of the following code, 527 | // or do anything to subvert it. 528 | 529 | // create a thread to accept RPC connections 530 | go func() { 531 | for px.isdead() == false { 532 | conn, err := px.l.Accept() 533 | if err == nil && px.isdead() == false { 534 | if px.isunreliable() && (rand.Int63()%1000) < 100 { 535 | // discard the request. 536 | conn.Close() 537 | } else if px.isunreliable() && (rand.Int63()%1000) < 200 { 538 | // process the request but force discard of reply. 539 | c1 := conn.(*net.UnixConn) 540 | f, _ := c1.File() 541 | err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR) 542 | if err != nil { 543 | fmt.Printf("shutdown: %v\n", err) 544 | } 545 | atomic.AddInt32(&px.rpcCount, 1) 546 | go rpcs.ServeConn(conn) 547 | } else { 548 | atomic.AddInt32(&px.rpcCount, 1) 549 | go rpcs.ServeConn(conn) 550 | } 551 | } else if err == nil { 552 | conn.Close() 553 | } 554 | if err != nil && px.isdead() == false { 555 | //fmt.Printf("Paxos(%v) accept: %v\n", me, err.Error()) 556 | } 557 | } 558 | }() 559 | } 560 | 561 | return px 562 | } 563 | -------------------------------------------------------------------------------- /src/kvpaxos/test_test.go: -------------------------------------------------------------------------------- 1 | package kvpaxos 2 | 3 | import "testing" 4 | import "runtime" 5 | import "strconv" 6 | import "os" 7 | import "time" 8 | import "fmt" 9 | import "math/rand" 10 | import "strings" 11 | import "sync/atomic" 12 | 13 | func check(t *testing.T, ck *Clerk, key string, value string) { 14 | v := ck.Get(key) 15 | if v != value { 16 | t.Fatalf("Get(%v) -> %v, expected %v", key, v, value) 17 | } 18 | } 19 | 20 | func port(tag string, host int) string { 21 | s := "/var/tmp/824-" 22 | s += strconv.Itoa(os.Getuid()) + "/" 23 | os.Mkdir(s, 0777) 24 | s += "kv-" 25 | s += strconv.Itoa(os.Getpid()) + "-" 26 | s += tag + "-" 27 | s += strconv.Itoa(host) 28 | return s 29 | } 30 | 31 | func cleanup(kva []*KVPaxos) { 32 | for i := 0; i < len(kva); i++ { 33 | if kva[i] != nil { 34 | kva[i].kill() 35 | } 36 | } 37 | } 38 | 39 | // predict effect of Append(k, val) if old value is prev. 40 | func NextValue(prev string, val string) string { 41 | return prev + val 42 | } 43 | 44 | func TestBasic(t *testing.T) { 45 | if (true) { 46 | return 47 | } 48 | runtime.GOMAXPROCS(4) 49 | 50 | const nservers = 3 51 | var kva []*KVPaxos = make([]*KVPaxos, nservers) 52 | var kvh []string = make([]string, nservers) 53 | defer cleanup(kva) 54 | 55 | for i := 0; i < nservers; i++ { 56 | kvh[i] = port("basic", i) 57 | } 58 | for i := 0; i < nservers; i++ { 59 | kva[i] = StartServer(kvh, i) 60 | } 61 | 62 | ck := MakeClerk(kvh) 63 | var cka [nservers]*Clerk 64 | for i := 0; i < nservers; i++ { 65 | cka[i] = MakeClerk([]string{kvh[i]}) 66 | } 67 | 68 | fmt.Printf("Test: Basic put/append/get ...\n") 69 | 70 | ck.Append("app", "x") 71 | ck.Append("app", "y") 72 | check(t, ck, "app", "xy") 73 | 74 | ck.Put("a", "aa") 75 | check(t, ck, "a", "aa") 76 | 77 | cka[1].Put("a", "aaa") 78 | 79 | check(t, cka[2], "a", "aaa") 80 | check(t, cka[1], "a", "aaa") 81 | check(t, ck, "a", "aaa") 82 | 83 | fmt.Printf(" ... Passed\n") 84 | 85 | fmt.Printf("Test: Concurrent clients ...\n") 86 | 87 | for iters := 0; iters < 20; iters++ { 88 | const npara = 15 89 | var ca [npara]chan bool 90 | for nth := 0; nth < npara; nth++ { 91 | ca[nth] = make(chan bool) 92 | go func(me int) { 93 | defer func() { ca[me] <- true }() 94 | ci := (rand.Int() % nservers) 95 | myck := MakeClerk([]string{kvh[ci]}) 96 | if (rand.Int() % 1000) < 500 { 97 | myck.Put("b", strconv.Itoa(rand.Int())) 98 | } else { 99 | myck.Get("b") 100 | } 101 | }(nth) 102 | } 103 | for nth := 0; nth < npara; nth++ { 104 | <-ca[nth] 105 | } 106 | var va [nservers]string 107 | for i := 0; i < nservers; i++ { 108 | va[i] = cka[i].Get("b") 109 | if va[i] != va[0] { 110 | t.Fatalf("mismatch") 111 | } 112 | } 113 | } 114 | 115 | fmt.Printf(" ... Passed\n") 116 | 117 | time.Sleep(1 * time.Second) 118 | } 119 | 120 | func TestDone(t *testing.T) { 121 | if true { 122 | return 123 | } 124 | runtime.GOMAXPROCS(4) 125 | 126 | const nservers = 3 127 | var kva []*KVPaxos = make([]*KVPaxos, nservers) 128 | var kvh []string = make([]string, nservers) 129 | defer cleanup(kva) 130 | 131 | for i := 0; i < nservers; i++ { 132 | kvh[i] = port("done", i) 133 | } 134 | for i := 0; i < nservers; i++ { 135 | kva[i] = StartServer(kvh, i) 136 | } 137 | ck := MakeClerk(kvh) 138 | var cka [nservers]*Clerk 139 | for pi := 0; pi < nservers; pi++ { 140 | cka[pi] = MakeClerk([]string{kvh[pi]}) 141 | } 142 | 143 | fmt.Printf("Test: server frees Paxos log memory...\n") 144 | 145 | ck.Put("a", "aa") 146 | check(t, ck, "a", "aa") 147 | 148 | runtime.GC() 149 | var m0 runtime.MemStats 150 | runtime.ReadMemStats(&m0) 151 | // rtm's m0.Alloc is 2 MB 152 | 153 | sz := 1000000 154 | sz = 100 155 | items := 10 156 | 157 | for iters := 0; iters < 2; iters++ { 158 | for i := 0; i < items; i++ { 159 | key := strconv.Itoa(i) 160 | value := make([]byte, sz) 161 | for j := 0; j < len(value); j++ { 162 | value[j] = byte((rand.Int() % 100) + 1) 163 | } 164 | ck.Put(key, string(value)) 165 | check(t, cka[i%nservers], key, string(value)) 166 | } 167 | } 168 | 169 | // Put and Get to each of the replicas, in case 170 | // the Done information is piggybacked on 171 | // the Paxos proposer messages. 172 | for iters := 0; iters < 2; iters++ { 173 | for pi := 0; pi < nservers; pi++ { 174 | cka[pi].Put("a", "aa") 175 | check(t, cka[pi], "a", "aa") 176 | } 177 | } 178 | 179 | time.Sleep(1 * time.Second) 180 | 181 | runtime.GC() 182 | var m1 runtime.MemStats 183 | runtime.ReadMemStats(&m1) 184 | // rtm's m1.Alloc is 45 MB 185 | 186 | fmt.Printf(" Memory: before %v, after %v\n", m0.Alloc, m1.Alloc) 187 | 188 | allowed := m0.Alloc + uint64(nservers*items*sz*2) 189 | if m1.Alloc > allowed { 190 | t.Fatalf("Memory use did not shrink enough (Used: %v, allowed: %v).\n", m1.Alloc, allowed) 191 | } 192 | 193 | fmt.Printf(" ... Passed\n") 194 | } 195 | 196 | func pp(tag string, src int, dst int) string { 197 | s := "/var/tmp/824-" 198 | s += strconv.Itoa(os.Getuid()) + "/" 199 | s += "kv-" + tag + "-" 200 | s += strconv.Itoa(os.Getpid()) + "-" 201 | s += strconv.Itoa(src) + "-" 202 | s += strconv.Itoa(dst) 203 | return s 204 | } 205 | 206 | func cleanpp(tag string, n int) { 207 | for i := 0; i < n; i++ { 208 | for j := 0; j < n; j++ { 209 | ij := pp(tag, i, j) 210 | os.Remove(ij) 211 | } 212 | } 213 | } 214 | 215 | func part(t *testing.T, tag string, npaxos int, p1 []int, p2 []int, p3 []int) { 216 | cleanpp(tag, npaxos) 217 | 218 | pa := [][]int{p1, p2, p3} 219 | for pi := 0; pi < len(pa); pi++ { 220 | p := pa[pi] 221 | for i := 0; i < len(p); i++ { 222 | for j := 0; j < len(p); j++ { 223 | ij := pp(tag, p[i], p[j]) 224 | pj := port(tag, p[j]) 225 | err := os.Link(pj, ij) 226 | if err != nil { 227 | t.Fatalf("os.Link(%v, %v): %v\n", pj, ij, err) 228 | } 229 | } 230 | } 231 | } 232 | } 233 | 234 | func TestPartition(t *testing.T) { 235 | if true { 236 | return 237 | } 238 | runtime.GOMAXPROCS(4) 239 | 240 | tag := "partition" 241 | const nservers = 5 242 | var kva []*KVPaxos = make([]*KVPaxos, nservers) 243 | defer cleanup(kva) 244 | defer cleanpp(tag, nservers) 245 | 246 | for i := 0; i < nservers; i++ { 247 | var kvh []string = make([]string, nservers) 248 | for j := 0; j < nservers; j++ { 249 | if j == i { 250 | kvh[j] = port(tag, i) 251 | } else { 252 | kvh[j] = pp(tag, i, j) 253 | } 254 | } 255 | kva[i] = StartServer(kvh, i) 256 | } 257 | defer part(t, tag, nservers, []int{}, []int{}, []int{}) 258 | 259 | var cka [nservers]*Clerk 260 | for i := 0; i < nservers; i++ { 261 | cka[i] = MakeClerk([]string{port(tag, i)}) 262 | } 263 | 264 | fmt.Printf("Test: No partition ...\n") 265 | 266 | part(t, tag, nservers, []int{0, 1, 2, 3, 4}, []int{}, []int{}) 267 | cka[0].Put("1", "12") 268 | cka[2].Put("1", "13") 269 | check(t, cka[3], "1", "13") 270 | 271 | fmt.Printf(" ... Passed\n") 272 | 273 | fmt.Printf("Test: Progress in majority ...\n") 274 | 275 | part(t, tag, nservers, []int{2, 3, 4}, []int{0, 1}, []int{}) 276 | cka[2].Put("1", "14") 277 | check(t, cka[4], "1", "14") 278 | 279 | fmt.Printf(" ... Passed\n") 280 | 281 | fmt.Printf("Test: No progress in minority ...\n") 282 | 283 | done0 := make(chan bool) 284 | done1 := make(chan bool) 285 | go func() { 286 | cka[0].Put("1", "15") 287 | done0 <- true 288 | }() 289 | go func() { 290 | cka[1].Get("1") 291 | done1 <- true 292 | }() 293 | 294 | select { 295 | case <-done0: 296 | t.Fatalf("Put in minority completed") 297 | case <-done1: 298 | t.Fatalf("Get in minority completed") 299 | case <-time.After(time.Second): 300 | } 301 | 302 | check(t, cka[4], "1", "14") 303 | cka[3].Put("1", "16") 304 | check(t, cka[4], "1", "16") 305 | 306 | fmt.Printf(" ... Passed\n") 307 | 308 | fmt.Printf("Test: Completion after heal ...\n") 309 | 310 | part(t, tag, nservers, []int{0, 2, 3, 4}, []int{1}, []int{}) 311 | 312 | select { 313 | case <-done0: 314 | case <-time.After(30 * 100 * time.Millisecond): 315 | t.Fatalf("Put did not complete") 316 | } 317 | 318 | select { 319 | case <-done1: 320 | t.Fatalf("Get in minority completed") 321 | default: 322 | } 323 | 324 | check(t, cka[4], "1", "15") 325 | check(t, cka[0], "1", "15") 326 | 327 | part(t, tag, nservers, []int{0, 1, 2}, []int{3, 4}, []int{}) 328 | 329 | select { 330 | case <-done1: 331 | case <-time.After(100 * 100 * time.Millisecond): 332 | t.Fatalf("Get did not complete") 333 | } 334 | 335 | check(t, cka[1], "1", "15") 336 | 337 | fmt.Printf(" ... Passed\n") 338 | } 339 | 340 | func randclerk(kvh []string) *Clerk { 341 | sa := make([]string, len(kvh)) 342 | copy(sa, kvh) 343 | for i := range sa { 344 | j := rand.Intn(i + 1) 345 | sa[i], sa[j] = sa[j], sa[i] 346 | } 347 | return MakeClerk(sa) 348 | } 349 | 350 | // check that all known appends are present in a value, 351 | // and are in order for each concurrent client. 352 | func checkAppends(t *testing.T, v string, counts []int) { 353 | nclients := len(counts) 354 | for i := 0; i < nclients; i++ { 355 | lastoff := -1 356 | for j := 0; j < counts[i]; j++ { 357 | wanted := "x " + strconv.Itoa(i) + " " + strconv.Itoa(j) + " y" 358 | off := strings.Index(v, wanted) 359 | if off < 0 { 360 | t.Fatalf("missing element in Append result") 361 | } 362 | off1 := strings.LastIndex(v, wanted) 363 | if off1 != off { 364 | t.Fatalf("duplicate element in Append result") 365 | } 366 | if off <= lastoff { 367 | t.Fatalf("wrong order for element in Append result") 368 | } 369 | lastoff = off 370 | } 371 | } 372 | } 373 | 374 | func TestUnreliable(t *testing.T) { 375 | if true { 376 | return 377 | } 378 | runtime.GOMAXPROCS(4) 379 | 380 | const nservers = 3 381 | var kva []*KVPaxos = make([]*KVPaxos, nservers) 382 | var kvh []string = make([]string, nservers) 383 | defer cleanup(kva) 384 | 385 | for i := 0; i < nservers; i++ { 386 | kvh[i] = port("un", i) 387 | } 388 | for i := 0; i < nservers; i++ { 389 | kva[i] = StartServer(kvh, i) 390 | kva[i].setunreliable(true) 391 | } 392 | 393 | ck := MakeClerk(kvh) 394 | var cka [nservers]*Clerk 395 | for i := 0; i < nservers; i++ { 396 | cka[i] = MakeClerk([]string{kvh[i]}) 397 | } 398 | 399 | fmt.Printf("Test: Basic put/get, unreliable ...\n") 400 | 401 | ck.Put("a", "aa") 402 | check(t, ck, "a", "aa") 403 | 404 | cka[1].Put("a", "aaa") 405 | 406 | check(t, cka[2], "a", "aaa") 407 | check(t, cka[1], "a", "aaa") 408 | check(t, ck, "a", "aaa") 409 | 410 | fmt.Printf(" ... Passed\n") 411 | 412 | fmt.Printf("Test: Sequence of puts, unreliable ...\n") 413 | 414 | for iters := 0; iters < 6; iters++ { 415 | const ncli = 5 416 | var ca [ncli]chan bool 417 | for cli := 0; cli < ncli; cli++ { 418 | ca[cli] = make(chan bool) 419 | go func(me int) { 420 | ok := false 421 | defer func() { ca[me] <- ok }() 422 | myck := randclerk(kvh) 423 | key := strconv.Itoa(me) 424 | vv := myck.Get(key) 425 | myck.Append(key, "0") 426 | vv = NextValue(vv, "0") 427 | myck.Append(key, "1") 428 | vv = NextValue(vv, "1") 429 | myck.Append(key, "2") 430 | vv = NextValue(vv, "2") 431 | time.Sleep(100 * time.Millisecond) 432 | if myck.Get(key) != vv { 433 | t.Fatalf("wrong value") 434 | } 435 | if myck.Get(key) != vv { 436 | t.Fatalf("wrong value") 437 | } 438 | ok = true 439 | }(cli) 440 | } 441 | for cli := 0; cli < ncli; cli++ { 442 | x := <-ca[cli] 443 | if x == false { 444 | t.Fatalf("failure") 445 | } 446 | } 447 | } 448 | 449 | fmt.Printf(" ... Passed\n") 450 | 451 | fmt.Printf("Test: Concurrent clients, unreliable ...\n") 452 | 453 | for iters := 0; iters < 20; iters++ { 454 | const ncli = 15 455 | var ca [ncli]chan bool 456 | for cli := 0; cli < ncli; cli++ { 457 | ca[cli] = make(chan bool) 458 | go func(me int) { 459 | defer func() { ca[me] <- true }() 460 | myck := randclerk(kvh) 461 | if (rand.Int() % 1000) < 500 { 462 | myck.Put("b", strconv.Itoa(rand.Int())) 463 | } else { 464 | myck.Get("b") 465 | } 466 | }(cli) 467 | } 468 | for cli := 0; cli < ncli; cli++ { 469 | <-ca[cli] 470 | } 471 | 472 | var va [nservers]string 473 | for i := 0; i < nservers; i++ { 474 | va[i] = cka[i].Get("b") 475 | if va[i] != va[0] { 476 | t.Fatalf("mismatch; 0 got %v, %v got %v", va[0], i, va[i]) 477 | } 478 | } 479 | } 480 | 481 | fmt.Printf(" ... Passed\n") 482 | 483 | fmt.Printf("Test: Concurrent Append to same key, unreliable ...\n") 484 | 485 | ck.Put("k", "") 486 | 487 | ff := func(me int, ch chan int) { 488 | ret := -1 489 | defer func() { ch <- ret }() 490 | myck := randclerk(kvh) 491 | n := 0 492 | for n < 5 { 493 | myck.Append("k", "x "+strconv.Itoa(me)+" "+strconv.Itoa(n)+" y") 494 | n++ 495 | } 496 | ret = n 497 | } 498 | 499 | ncli := 5 500 | cha := []chan int{} 501 | for i := 0; i < ncli; i++ { 502 | cha = append(cha, make(chan int)) 503 | go ff(i, cha[i]) 504 | } 505 | 506 | counts := []int{} 507 | for i := 0; i < ncli; i++ { 508 | n := <-cha[i] 509 | if n < 0 { 510 | t.Fatal("client failed") 511 | } 512 | counts = append(counts, n) 513 | } 514 | 515 | vx := ck.Get("k") 516 | checkAppends(t, vx, counts) 517 | 518 | { 519 | for i := 0; i < nservers; i++ { 520 | vi := cka[i].Get("k") 521 | if vi != vx { 522 | t.Fatalf("mismatch; 0 got %v, %v got %v", vx, i, vi) 523 | } 524 | } 525 | } 526 | 527 | fmt.Printf(" ... Passed\n") 528 | 529 | time.Sleep(1 * time.Second) 530 | } 531 | 532 | func TestHole(t *testing.T) { 533 | if true { 534 | //return 535 | } 536 | runtime.GOMAXPROCS(4) 537 | 538 | fmt.Printf("Test: Tolerates holes in paxos sequence ...\n") 539 | 540 | tag := "hole" 541 | const nservers = 5 542 | var kva []*KVPaxos = make([]*KVPaxos, nservers) 543 | defer cleanup(kva) 544 | defer cleanpp(tag, nservers) 545 | 546 | for i := 0; i < nservers; i++ { 547 | var kvh []string = make([]string, nservers) 548 | for j := 0; j < nservers; j++ { 549 | if j == i { 550 | kvh[j] = port(tag, i) 551 | } else { 552 | kvh[j] = pp(tag, i, j) 553 | } 554 | } 555 | kva[i] = StartServer(kvh, i) 556 | } 557 | defer part(t, tag, nservers, []int{}, []int{}, []int{}) 558 | 559 | for iters := 0; iters < 5; iters++ { 560 | part(t, tag, nservers, []int{0, 1, 2, 3, 4}, []int{}, []int{}) 561 | 562 | ck2 := MakeClerk([]string{port(tag, 2)}) 563 | ck2.Put("q", "q") 564 | 565 | done := int32(0) 566 | const nclients = 10 567 | var ca [nclients]chan bool 568 | for xcli := 0; xcli < nclients; xcli++ { 569 | ca[xcli] = make(chan bool) 570 | go func(cli int) { 571 | ok := false 572 | defer func() { ca[cli] <- ok }() 573 | var cka [nservers]*Clerk 574 | for i := 0; i < nservers; i++ { 575 | cka[i] = MakeClerk([]string{port(tag, i)}) 576 | } 577 | key := strconv.Itoa(cli) 578 | last := "" 579 | cka[0].Put(key, last) 580 | for atomic.LoadInt32(&done) == 0 { 581 | ci := (rand.Int() % 2) 582 | if (rand.Int() % 1000) < 500 { 583 | nv := strconv.Itoa(rand.Int()) 584 | cka[ci].Put(key, nv) 585 | last = nv 586 | } else { 587 | v := cka[ci].Get(key) 588 | if v != last { 589 | t.Fatalf("%v: wrong value, key %v, wanted %v, got %v", 590 | cli, key, last, v) 591 | } 592 | } 593 | } 594 | ok = true 595 | }(xcli) 596 | } 597 | 598 | time.Sleep(3 * time.Second) 599 | 600 | part(t, tag, nservers, []int{2, 3, 4}, []int{0, 1}, []int{}) 601 | 602 | // can majority partition make progress even though 603 | // minority servers were interrupted in the middle of 604 | // paxos agreements? 605 | check(t, ck2, "q", "q") 606 | ck2.Put("q", "qq") 607 | check(t, ck2, "q", "qq") 608 | 609 | // restore network, wait for all threads to exit. 610 | part(t, tag, nservers, []int{0, 1, 2, 3, 4}, []int{}, []int{}) 611 | atomic.StoreInt32(&done, 1) 612 | ok := true 613 | for i := 0; i < nclients; i++ { 614 | z := <-ca[i] 615 | ok = ok && z 616 | } 617 | if ok == false { 618 | t.Fatal("something is wrong") 619 | } 620 | check(t, ck2, "q", "qq") 621 | } 622 | 623 | fmt.Printf(" ... Passed\n") 624 | } 625 | 626 | func TestManyPartition(t *testing.T) { 627 | if true { 628 | return 629 | } 630 | runtime.GOMAXPROCS(4) 631 | 632 | fmt.Printf("Test: Many clients, changing partitions ...\n") 633 | 634 | tag := "many" 635 | const nservers = 5 636 | var kva []*KVPaxos = make([]*KVPaxos, nservers) 637 | defer cleanup(kva) 638 | defer cleanpp(tag, nservers) 639 | 640 | for i := 0; i < nservers; i++ { 641 | var kvh []string = make([]string, nservers) 642 | for j := 0; j < nservers; j++ { 643 | if j == i { 644 | kvh[j] = port(tag, i) 645 | } else { 646 | kvh[j] = pp(tag, i, j) 647 | } 648 | } 649 | kva[i] = StartServer(kvh, i) 650 | kva[i].setunreliable(true) 651 | } 652 | defer part(t, tag, nservers, []int{}, []int{}, []int{}) 653 | part(t, tag, nservers, []int{0, 1, 2, 3, 4}, []int{}, []int{}) 654 | 655 | done := int32(0) 656 | 657 | // re-partition periodically 658 | ch1 := make(chan bool) 659 | go func() { 660 | defer func() { ch1 <- true }() 661 | for atomic.LoadInt32(&done) == 0 { 662 | var a [nservers]int 663 | for i := 0; i < nservers; i++ { 664 | a[i] = (rand.Int() % 3) 665 | } 666 | pa := make([][]int, 3) 667 | for i := 0; i < 3; i++ { 668 | pa[i] = make([]int, 0) 669 | for j := 0; j < nservers; j++ { 670 | if a[j] == i { 671 | pa[i] = append(pa[i], j) 672 | } 673 | } 674 | } 675 | part(t, tag, nservers, pa[0], pa[1], pa[2]) 676 | time.Sleep(time.Duration(rand.Int63()%200) * time.Millisecond) 677 | } 678 | }() 679 | 680 | const nclients = 10 681 | var ca [nclients]chan bool 682 | for xcli := 0; xcli < nclients; xcli++ { 683 | ca[xcli] = make(chan bool) 684 | go func(cli int) { 685 | ok := false 686 | defer func() { ca[cli] <- ok }() 687 | sa := make([]string, nservers) 688 | for i := 0; i < nservers; i++ { 689 | sa[i] = port(tag, i) 690 | } 691 | for i := range sa { 692 | j := rand.Intn(i + 1) 693 | sa[i], sa[j] = sa[j], sa[i] 694 | } 695 | myck := MakeClerk(sa) 696 | key := strconv.Itoa(cli) 697 | last := "" 698 | myck.Put(key, last) 699 | for atomic.LoadInt32(&done) == 0 { 700 | if (rand.Int() % 1000) < 500 { 701 | nv := strconv.Itoa(rand.Int()) 702 | myck.Append(key, nv) 703 | last = NextValue(last, nv) 704 | } else { 705 | v := myck.Get(key) 706 | if v != last { 707 | t.Fatalf("%v: get wrong value, key %v, wanted %v, got %v", 708 | cli, key, last, v) 709 | } 710 | } 711 | } 712 | ok = true 713 | }(xcli) 714 | } 715 | 716 | time.Sleep(20 * time.Second) 717 | atomic.StoreInt32(&done, 1) 718 | <-ch1 719 | part(t, tag, nservers, []int{0, 1, 2, 3, 4}, []int{}, []int{}) 720 | 721 | ok := true 722 | for i := 0; i < nclients; i++ { 723 | z := <-ca[i] 724 | ok = ok && z 725 | } 726 | 727 | if ok { 728 | fmt.Printf(" ... Passed\n") 729 | } 730 | } 731 | -------------------------------------------------------------------------------- /src/paxos/test_test.go: -------------------------------------------------------------------------------- 1 | package paxos 2 | 3 | import "testing" 4 | import "runtime" 5 | import "strconv" 6 | import "os" 7 | import "time" 8 | import "fmt" 9 | import "math/rand" 10 | import crand "crypto/rand" 11 | import "encoding/base64" 12 | import "sync/atomic" 13 | 14 | func randstring(n int) string { 15 | b := make([]byte, 2*n) 16 | crand.Read(b) 17 | s := base64.URLEncoding.EncodeToString(b) 18 | return s[0:n] 19 | } 20 | 21 | func port(tag string, host int) string { 22 | s := "/var/tmp/824-" 23 | s += strconv.Itoa(os.Getuid()) + "/" 24 | os.Mkdir(s, 0777) 25 | s += "px-" 26 | s += strconv.Itoa(os.Getpid()) + "-" 27 | s += tag + "-" 28 | s += strconv.Itoa(host) 29 | return s 30 | } 31 | 32 | func ndecided(t *testing.T, pxa []*Paxos, seq int) int { 33 | count := 0 34 | var v interface{} 35 | for i := 0; i < len(pxa); i++ { 36 | if pxa[i] != nil { 37 | decided, v1 := pxa[i].Status(seq) 38 | if decided == Decided { 39 | if count > 0 && v != v1 { 40 | t.Fatalf("decided values do not match; seq=%v i=%v v=%v v1=%v", 41 | seq, i, v, v1) 42 | } 43 | count++ 44 | v = v1 45 | } 46 | } 47 | } 48 | return count 49 | } 50 | 51 | func waitn(t *testing.T, pxa []*Paxos, seq int, wanted int) { 52 | to := 10 * time.Millisecond 53 | for iters := 0; iters < 30; iters++ { 54 | if ndecided(t, pxa, seq) >= wanted { 55 | break 56 | } 57 | time.Sleep(to) 58 | if to < time.Second { 59 | to *= 2 60 | } 61 | } 62 | nd := ndecided(t, pxa, seq) 63 | if nd < wanted { 64 | t.Fatalf("too few decided; seq=%v ndecided=%v wanted=%v", seq, nd, wanted) 65 | } 66 | } 67 | 68 | func waitmajority(t *testing.T, pxa []*Paxos, seq int) { 69 | waitn(t, pxa, seq, (len(pxa)/2)+1) 70 | } 71 | 72 | func checkmax(t *testing.T, pxa []*Paxos, seq int, max int) { 73 | time.Sleep(3 * time.Second) 74 | nd := ndecided(t, pxa, seq) 75 | if nd > max { 76 | t.Fatalf("too many decided; seq=%v ndecided=%v max=%v", seq, nd, max) 77 | } 78 | } 79 | 80 | func cleanup(pxa []*Paxos) { 81 | for i := 0; i < len(pxa); i++ { 82 | if pxa[i] != nil { 83 | pxa[i].Kill() 84 | } 85 | } 86 | } 87 | 88 | func noTestSpeed(t *testing.T) { 89 | runtime.GOMAXPROCS(4) 90 | 91 | const npaxos = 3 92 | var pxa []*Paxos = make([]*Paxos, npaxos) 93 | var pxh []string = make([]string, npaxos) 94 | defer cleanup(pxa) 95 | 96 | for i := 0; i < npaxos; i++ { 97 | pxh[i] = port("time", i) 98 | } 99 | for i := 0; i < npaxos; i++ { 100 | pxa[i] = Make(pxh, i, nil) 101 | } 102 | 103 | t0 := time.Now() 104 | 105 | for i := 0; i < 20; i++ { 106 | pxa[0].Start(i, "x") 107 | waitn(t, pxa, i, npaxos) 108 | } 109 | 110 | d := time.Since(t0) 111 | fmt.Printf("20 agreements %v seconds\n", d.Seconds()) 112 | } 113 | 114 | func TestBasic(t *testing.T) { 115 | now := time.Now().Second() 116 | runtime.GOMAXPROCS(4) 117 | 118 | const npaxos = 3 119 | var pxa []*Paxos = make([]*Paxos, npaxos) 120 | var pxh []string = make([]string, npaxos) 121 | defer cleanup(pxa) 122 | 123 | for i := 0; i < npaxos; i++ { 124 | pxh[i] = port("basic", i) 125 | } 126 | for i := 0; i < npaxos; i++ { 127 | pxa[i] = Make(pxh, i, nil) 128 | } 129 | 130 | fmt.Printf("Test: Single proposer ...\n") 131 | 132 | pxa[0].Start(0, "hello") 133 | waitn(t, pxa, 0, npaxos) 134 | 135 | fmt.Printf(" ... Passed\n") 136 | 137 | fmt.Printf("Test: Many proposers, same value ...\n") 138 | 139 | for i := 0; i < npaxos; i++ { 140 | pxa[i].Start(1, 77) 141 | } 142 | waitn(t, pxa, 1, npaxos) 143 | 144 | fmt.Printf(" ... Passed\n") 145 | 146 | fmt.Printf("Test: Many proposers, different values ...\n") 147 | 148 | pxa[0].Start(2, 100) 149 | pxa[1].Start(2, 101) 150 | pxa[2].Start(2, 102) 151 | waitn(t, pxa, 2, npaxos) 152 | 153 | fmt.Printf(" ... Passed\n") 154 | 155 | fmt.Printf("Test: Out-of-order instances ...\n") 156 | 157 | pxa[0].Start(7, 700) 158 | pxa[0].Start(6, 600) 159 | pxa[1].Start(5, 500) 160 | waitn(t, pxa, 7, npaxos) 161 | pxa[0].Start(4, 400) 162 | pxa[1].Start(3, 300) 163 | waitn(t, pxa, 6, npaxos) 164 | waitn(t, pxa, 5, npaxos) 165 | waitn(t, pxa, 4, npaxos) 166 | waitn(t, pxa, 3, npaxos) 167 | 168 | if pxa[0].Max() != 7 { 169 | t.Fatalf("wrong Max()") 170 | } 171 | 172 | fmt.Printf(" ... Passed\n") 173 | fmt.Printf("diff = %d\n", time.Now().Second() - now) 174 | } 175 | 176 | func TestDeaf(t *testing.T) { 177 | now := time.Now().Second() 178 | runtime.GOMAXPROCS(4) 179 | 180 | const npaxos = 5 181 | var pxa []*Paxos = make([]*Paxos, npaxos) 182 | var pxh []string = make([]string, npaxos) 183 | defer cleanup(pxa) 184 | 185 | for i := 0; i < npaxos; i++ { 186 | pxh[i] = port("deaf", i) 187 | } 188 | for i := 0; i < npaxos; i++ { 189 | pxa[i] = Make(pxh, i, nil) 190 | } 191 | 192 | fmt.Printf("Test: Deaf proposer ...\n") 193 | 194 | pxa[0].Start(0, "hello") 195 | waitn(t, pxa, 0, npaxos) 196 | 197 | os.Remove(pxh[0]) 198 | os.Remove(pxh[npaxos-1]) 199 | 200 | pxa[1].Start(1, "goodbye") 201 | waitmajority(t, pxa, 1) 202 | time.Sleep(1 * time.Second) 203 | if ndecided(t, pxa, 1) != npaxos-2 { 204 | t.Fatalf("a deaf peer heard about a decision") 205 | } 206 | 207 | pxa[0].Start(1, "xxx") 208 | waitn(t, pxa, 1, npaxos-1) 209 | time.Sleep(1 * time.Second) 210 | if ndecided(t, pxa, 1) != npaxos-1 { 211 | t.Fatalf("a deaf peer heard about a decision") 212 | } 213 | 214 | pxa[npaxos-1].Start(1, "yyy") 215 | waitn(t, pxa, 1, npaxos) 216 | 217 | fmt.Printf(" ... Passed\n") 218 | fmt.Printf("diff = %d\n", time.Now().Second() - now) 219 | } 220 | 221 | func TestForget(t *testing.T) { 222 | now := time.Now().Second() 223 | runtime.GOMAXPROCS(4) 224 | 225 | const npaxos = 6 226 | var pxa []*Paxos = make([]*Paxos, npaxos) 227 | var pxh []string = make([]string, npaxos) 228 | defer cleanup(pxa) 229 | 230 | for i := 0; i < npaxos; i++ { 231 | pxh[i] = port("gc", i) 232 | } 233 | for i := 0; i < npaxos; i++ { 234 | pxa[i] = Make(pxh, i, nil) 235 | } 236 | 237 | fmt.Printf("Test: Forgetting ...\n") 238 | 239 | // initial Min() correct? 240 | for i := 0; i < npaxos; i++ { 241 | m := pxa[i].Min() 242 | if m > 0 { 243 | t.Fatalf("wrong initial Min() %v", m) 244 | } 245 | } 246 | 247 | pxa[0].Start(0, "00") 248 | pxa[1].Start(1, "11") 249 | pxa[2].Start(2, "22") 250 | pxa[0].Start(6, "66") 251 | pxa[1].Start(7, "77") 252 | 253 | waitn(t, pxa, 0, npaxos) 254 | 255 | // Min() correct? 256 | for i := 0; i < npaxos; i++ { 257 | m := pxa[i].Min() 258 | if m != 0 { 259 | t.Fatalf("wrong Min() %v; expected 0", m) 260 | } 261 | } 262 | 263 | waitn(t, pxa, 1, npaxos) 264 | 265 | // Min() correct? 266 | for i := 0; i < npaxos; i++ { 267 | m := pxa[i].Min() 268 | if m != 0 { 269 | t.Fatalf("wrong Min() %v; expected 0", m) 270 | } 271 | } 272 | 273 | // everyone Done() -> Min() changes? 274 | for i := 0; i < npaxos; i++ { 275 | pxa[i].Done(0) 276 | } 277 | for i := 1; i < npaxos; i++ { 278 | pxa[i].Done(1) 279 | } 280 | for i := 0; i < npaxos; i++ { 281 | pxa[i].Start(8+i, "xx") 282 | } 283 | allok := false 284 | for iters := 0; iters < 12; iters++ { 285 | allok = true 286 | for i := 0; i < npaxos; i++ { 287 | s := pxa[i].Min() 288 | if s != 1 { 289 | allok = false 290 | } 291 | } 292 | if allok { 293 | break 294 | } 295 | time.Sleep(1 * time.Second) 296 | } 297 | if allok != true { 298 | t.Fatalf("Min() did not advance after Done()") 299 | } 300 | 301 | fmt.Printf(" ... Passed\n") 302 | fmt.Printf("diff = %d\n", time.Now().Second() - now) 303 | } 304 | 305 | func TestManyForget(t *testing.T) { 306 | now := time.Now().Second() 307 | runtime.GOMAXPROCS(4) 308 | 309 | const npaxos = 3 310 | var pxa []*Paxos = make([]*Paxos, npaxos) 311 | var pxh []string = make([]string, npaxos) 312 | defer cleanup(pxa) 313 | 314 | for i := 0; i < npaxos; i++ { 315 | pxh[i] = port("manygc", i) 316 | } 317 | for i := 0; i < npaxos; i++ { 318 | pxa[i] = Make(pxh, i, nil) 319 | pxa[i].setunreliable(true) 320 | } 321 | 322 | fmt.Printf("Test: Lots of forgetting ...\n") 323 | 324 | const maxseq = 20 325 | 326 | go func() { 327 | na := rand.Perm(maxseq) 328 | for i := 0; i < len(na); i++ { 329 | seq := na[i] 330 | j := (rand.Int() % npaxos) 331 | v := rand.Int() 332 | pxa[j].Start(seq, v) 333 | runtime.Gosched() 334 | } 335 | }() 336 | 337 | done := make(chan bool) 338 | go func() { 339 | for { 340 | select { 341 | case <-done: 342 | return 343 | default: 344 | } 345 | seq := (rand.Int() % maxseq) 346 | i := (rand.Int() % npaxos) 347 | if seq >= pxa[i].Min() { 348 | decided, _ := pxa[i].Status(seq) 349 | if decided == Decided { 350 | pxa[i].Done(seq) 351 | } 352 | } 353 | runtime.Gosched() 354 | } 355 | }() 356 | 357 | time.Sleep(5 * time.Second) 358 | done <- true 359 | for i := 0; i < npaxos; i++ { 360 | pxa[i].setunreliable(false) 361 | } 362 | time.Sleep(2 * time.Second) 363 | 364 | for seq := 0; seq < maxseq; seq++ { 365 | for i := 0; i < npaxos; i++ { 366 | if seq >= pxa[i].Min() { 367 | pxa[i].Status(seq) 368 | } 369 | } 370 | } 371 | 372 | fmt.Printf(" ... Passed\n") 373 | fmt.Printf("diff = %d\n", time.Now().Second() - now) 374 | } 375 | 376 | // 377 | // does paxos forgetting actually free the memory? 378 | // 379 | func TestForgetMem(t *testing.T) { 380 | now := time.Now().Second() 381 | runtime.GOMAXPROCS(4) 382 | 383 | fmt.Printf("Test: Paxos frees forgotten instance memory ...\n") 384 | 385 | const npaxos = 3 386 | var pxa []*Paxos = make([]*Paxos, npaxos) 387 | var pxh []string = make([]string, npaxos) 388 | defer cleanup(pxa) 389 | 390 | for i := 0; i < npaxos; i++ { 391 | pxh[i] = port("gcmem", i) 392 | } 393 | for i := 0; i < npaxos; i++ { 394 | pxa[i] = Make(pxh, i, nil) 395 | } 396 | 397 | pxa[0].Start(0, "x") 398 | waitn(t, pxa, 0, npaxos) 399 | 400 | runtime.GC() 401 | var m0 runtime.MemStats 402 | runtime.ReadMemStats(&m0) 403 | // m0.Alloc about a megabyte 404 | 405 | for i := 1; i <= 10; i++ { 406 | big := make([]byte, 1000000) 407 | for j := 0; j < len(big); j++ { 408 | big[j] = byte('a' + rand.Int()%26) 409 | } 410 | pxa[0].Start(i, string(big)) 411 | waitn(t, pxa, i, npaxos) 412 | } 413 | 414 | runtime.GC() 415 | var m1 runtime.MemStats 416 | runtime.ReadMemStats(&m1) 417 | // m1.Alloc about 90 megabytes 418 | 419 | for i := 0; i < npaxos; i++ { 420 | pxa[i].Done(10) 421 | } 422 | for i := 0; i < npaxos; i++ { 423 | pxa[i].Start(11+i, "z") 424 | } 425 | time.Sleep(3 * time.Second) 426 | for i := 0; i < npaxos; i++ { 427 | if pxa[i].Min() != 11 { 428 | t.Fatalf("expected Min() %v, got %v\n", 11, pxa[i].Min()) 429 | } 430 | } 431 | 432 | runtime.GC() 433 | var m2 runtime.MemStats 434 | runtime.ReadMemStats(&m2) 435 | // m2.Alloc about 10 megabytes 436 | 437 | if m2.Alloc > (m1.Alloc / 2) { 438 | t.Fatalf("memory use did not shrink enough") 439 | } 440 | 441 | again := make([]string, 10) 442 | for seq := 0; seq < npaxos && seq < 10; seq++ { 443 | again[seq] = randstring(20) 444 | for i := 0; i < npaxos; i++ { 445 | fate, _ := pxa[i].Status(seq) 446 | if fate != Forgotten { 447 | t.Fatalf("seq %d < Min() %d but not Forgotten", seq, pxa[i].Min()) 448 | } 449 | pxa[i].Start(seq, again[seq]) 450 | } 451 | } 452 | time.Sleep(1 * time.Second) 453 | for seq := 0; seq < npaxos && seq < 10; seq++ { 454 | for i := 0; i < npaxos; i++ { 455 | fate, v := pxa[i].Status(seq) 456 | if fate != Forgotten || v == again[seq] { 457 | t.Fatalf("seq %d < Min() %d but not Forgotten", seq, pxa[i].Min()) 458 | } 459 | } 460 | } 461 | 462 | fmt.Printf(" ... Passed\n") 463 | fmt.Printf("diff = %d\n", time.Now().Second() - now) 464 | } 465 | 466 | // 467 | // does Max() work after Done()s? 468 | // 469 | func TestDoneMax(t *testing.T) { 470 | now := time.Now().Second() 471 | runtime.GOMAXPROCS(4) 472 | 473 | fmt.Printf("Test: Paxos Max() after Done()s ...\n") 474 | 475 | const npaxos = 3 476 | var pxa []*Paxos = make([]*Paxos, npaxos) 477 | var pxh []string = make([]string, npaxos) 478 | defer cleanup(pxa) 479 | 480 | for i := 0; i < npaxos; i++ { 481 | pxh[i] = port("donemax", i) 482 | } 483 | for i := 0; i < npaxos; i++ { 484 | pxa[i] = Make(pxh, i, nil) 485 | } 486 | 487 | pxa[0].Start(0, "x") 488 | waitn(t, pxa, 0, npaxos) 489 | 490 | for i := 1; i <= 10; i++ { 491 | pxa[0].Start(i, "y") 492 | waitn(t, pxa, i, npaxos) 493 | } 494 | 495 | for i := 0; i < npaxos; i++ { 496 | pxa[i].Done(10) 497 | } 498 | 499 | // Propagate messages so everyone knows about Done(10) 500 | for i := 0; i < npaxos; i++ { 501 | pxa[i].Start(10, "z") 502 | } 503 | time.Sleep(2 * time.Second) 504 | for i := 0; i < npaxos; i++ { 505 | mx := pxa[i].Max() 506 | if mx != 10 { 507 | t.Fatalf("Max() did not return correct result %d after calling Done(); returned %d", 10, mx) 508 | } 509 | } 510 | 511 | fmt.Printf(" ... Passed\n") 512 | fmt.Printf("diff = %d\n", time.Now().Second() - now) 513 | } 514 | 515 | func TestRPCCount(t *testing.T) { 516 | now := time.Now().Second() 517 | runtime.GOMAXPROCS(4) 518 | 519 | fmt.Printf("Test: RPC counts aren't too high ...\n") 520 | 521 | const npaxos = 3 522 | var pxa []*Paxos = make([]*Paxos, npaxos) 523 | var pxh []string = make([]string, npaxos) 524 | defer cleanup(pxa) 525 | 526 | for i := 0; i < npaxos; i++ { 527 | pxh[i] = port("count", i) 528 | } 529 | for i := 0; i < npaxos; i++ { 530 | pxa[i] = Make(pxh, i, nil) 531 | } 532 | 533 | ninst1 := 5 534 | seq := 0 535 | for i := 0; i < ninst1; i++ { 536 | pxa[0].Start(seq, "x") 537 | waitn(t, pxa, seq, npaxos) 538 | seq++ 539 | } 540 | 541 | time.Sleep(2 * time.Second) 542 | 543 | total1 := int32(0) 544 | for j := 0; j < npaxos; j++ { 545 | total1 += atomic.LoadInt32(&pxa[j].rpcCount) 546 | } 547 | 548 | // per agreement: 549 | // 3 prepares 550 | // 3 accepts 551 | // 3 decides 552 | expected1 := int32(ninst1 * npaxos * npaxos) 553 | if total1 > expected1 { 554 | t.Fatalf("too many RPCs for serial Start()s; %v instances, got %v, expected %v", 555 | ninst1, total1, expected1) 556 | } 557 | 558 | ninst2 := 5 559 | for i := 0; i < ninst2; i++ { 560 | for j := 0; j < npaxos; j++ { 561 | go pxa[j].Start(seq, j+(i*10)) 562 | } 563 | waitn(t, pxa, seq, npaxos) 564 | seq++ 565 | } 566 | 567 | time.Sleep(2 * time.Second) 568 | 569 | total2 := int32(0) 570 | for j := 0; j < npaxos; j++ { 571 | total2 += atomic.LoadInt32(&pxa[j].rpcCount) 572 | } 573 | total2 -= total1 574 | 575 | // worst case per agreement: 576 | // Proposer 1: 3 prep, 3 acc, 3 decides. 577 | // Proposer 2: 3 prep, 3 acc, 3 prep, 3 acc, 3 decides. 578 | // Proposer 3: 3 prep, 3 acc, 3 prep, 3 acc, 3 prep, 3 acc, 3 decides. 579 | expected2 := int32(ninst2 * npaxos * 15) 580 | if total2 > expected2 { 581 | t.Fatalf("too many RPCs for concurrent Start()s; %v instances, got %v, expected %v", 582 | ninst2, total2, expected2) 583 | } 584 | 585 | fmt.Printf(" ... Passed\n") 586 | fmt.Printf("diff = %d\n", time.Now().Second() - now) 587 | } 588 | 589 | // 590 | // many agreements (without failures) 591 | // 592 | func TestMany(t *testing.T) { 593 | now := time.Now().Second() 594 | runtime.GOMAXPROCS(4) 595 | 596 | fmt.Printf("Test: Many instances ...\n") 597 | 598 | const npaxos = 3 599 | var pxa []*Paxos = make([]*Paxos, npaxos) 600 | var pxh []string = make([]string, npaxos) 601 | defer cleanup(pxa) 602 | 603 | for i := 0; i < npaxos; i++ { 604 | pxh[i] = port("many", i) 605 | } 606 | for i := 0; i < npaxos; i++ { 607 | pxa[i] = Make(pxh, i, nil) 608 | pxa[i].Start(0, 0) 609 | } 610 | 611 | const ninst = 50 612 | for seq := 1; seq < ninst; seq++ { 613 | // only 5 active instances, to limit the 614 | // number of file descriptors. 615 | for seq >= 5 && ndecided(t, pxa, seq-5) < npaxos { 616 | time.Sleep(20 * time.Millisecond) 617 | } 618 | for i := 0; i < npaxos; i++ { 619 | pxa[i].Start(seq, (seq*10)+i) 620 | } 621 | } 622 | 623 | for { 624 | done := true 625 | for seq := 1; seq < ninst; seq++ { 626 | if ndecided(t, pxa, seq) < npaxos { 627 | done = false 628 | } 629 | } 630 | if done { 631 | break 632 | } 633 | time.Sleep(100 * time.Millisecond) 634 | } 635 | 636 | fmt.Printf(" ... Passed\n") 637 | fmt.Printf("diff = %d\n", time.Now().Second() - now) 638 | } 639 | 640 | // 641 | // a peer starts up, with proposal, after others decide. 642 | // then another peer starts, without a proposal. 643 | // 644 | func TestOld(t *testing.T) { 645 | now := time.Now().Second() 646 | runtime.GOMAXPROCS(4) 647 | 648 | fmt.Printf("Test: Minority proposal ignored ...\n") 649 | 650 | const npaxos = 5 651 | var pxa []*Paxos = make([]*Paxos, npaxos) 652 | var pxh []string = make([]string, npaxos) 653 | defer cleanup(pxa) 654 | 655 | for i := 0; i < npaxos; i++ { 656 | pxh[i] = port("old", i) 657 | } 658 | 659 | pxa[1] = Make(pxh, 1, nil) 660 | pxa[2] = Make(pxh, 2, nil) 661 | pxa[3] = Make(pxh, 3, nil) 662 | pxa[1].Start(1, 111) 663 | 664 | waitmajority(t, pxa, 1) 665 | 666 | pxa[0] = Make(pxh, 0, nil) 667 | pxa[0].Start(1, 222) 668 | 669 | waitn(t, pxa, 1, 4) 670 | 671 | if false { 672 | pxa[4] = Make(pxh, 4, nil) 673 | waitn(t, pxa, 1, npaxos) 674 | } 675 | 676 | fmt.Printf(" ... Passed\n") 677 | fmt.Printf("diff = %d\n", time.Now().Second() - now) 678 | } 679 | 680 | // 681 | // many agreements, with unreliable RPC 682 | // 683 | func TestManyUnreliable(t *testing.T) { 684 | now := time.Now().Second() 685 | 686 | runtime.GOMAXPROCS(4) 687 | 688 | fmt.Printf("Test: Many instances, unreliable RPC ...\n") 689 | 690 | const npaxos = 3 691 | var pxa []*Paxos = make([]*Paxos, npaxos) 692 | var pxh []string = make([]string, npaxos) 693 | defer cleanup(pxa) 694 | 695 | for i := 0; i < npaxos; i++ { 696 | pxh[i] = port("manyun", i) 697 | } 698 | for i := 0; i < npaxos; i++ { 699 | pxa[i] = Make(pxh, i, nil) 700 | pxa[i].setunreliable(true) 701 | pxa[i].Start(0, 0) 702 | } 703 | 704 | const ninst = 50 705 | for seq := 1; seq < ninst; seq++ { 706 | // only 3 active instances, to limit the 707 | // number of file descriptors. 708 | for seq >= 3 && ndecided(t, pxa, seq-3) < npaxos { 709 | time.Sleep(20 * time.Millisecond) 710 | } 711 | for i := 0; i < npaxos; i++ { 712 | pxa[i].Start(seq, (seq*10)+i) 713 | } 714 | } 715 | 716 | for { 717 | done := true 718 | for seq := 1; seq < ninst; seq++ { 719 | if ndecided(t, pxa, seq) < npaxos { 720 | done = false 721 | } 722 | } 723 | if done { 724 | break 725 | } 726 | time.Sleep(100 * time.Millisecond) 727 | } 728 | 729 | fmt.Printf(" ... Passed\n") 730 | fmt.Printf("diff = %d\n", time.Now().Second() - now) 731 | } 732 | 733 | func pp(tag string, src int, dst int) string { 734 | s := "/var/tmp/824-" 735 | s += strconv.Itoa(os.Getuid()) + "/" 736 | s += "px-" + tag + "-" 737 | s += strconv.Itoa(os.Getpid()) + "-" 738 | s += strconv.Itoa(src) + "-" 739 | s += strconv.Itoa(dst) 740 | return s 741 | } 742 | 743 | func cleanpp(tag string, n int) { 744 | for i := 0; i < n; i++ { 745 | for j := 0; j < n; j++ { 746 | ij := pp(tag, i, j) 747 | os.Remove(ij) 748 | } 749 | } 750 | } 751 | 752 | func part(t *testing.T, tag string, npaxos int, p1 []int, p2 []int, p3 []int) { 753 | cleanpp(tag, npaxos) 754 | 755 | pa := [][]int{p1, p2, p3} 756 | for pi := 0; pi < len(pa); pi++ { 757 | p := pa[pi] 758 | for i := 0; i < len(p); i++ { 759 | for j := 0; j < len(p); j++ { 760 | ij := pp(tag, p[i], p[j]) 761 | pj := port(tag, p[j]) 762 | err := os.Link(pj, ij) 763 | if err != nil { 764 | // one reason this link can fail is if the 765 | // corresponding Paxos peer has prematurely quit and 766 | // deleted its socket file (e.g., called px.Kill()). 767 | t.Fatalf("os.Link(%v, %v): %v\n", pj, ij, err) 768 | } 769 | } 770 | } 771 | } 772 | } 773 | 774 | func TestPartition(t *testing.T) { 775 | now := time.Now().Second() 776 | runtime.GOMAXPROCS(4) 777 | 778 | tag := "partition" 779 | const npaxos = 5 780 | var pxa []*Paxos = make([]*Paxos, npaxos) 781 | defer cleanup(pxa) 782 | defer cleanpp(tag, npaxos) 783 | 784 | for i := 0; i < npaxos; i++ { 785 | var pxh []string = make([]string, npaxos) 786 | for j := 0; j < npaxos; j++ { 787 | if j == i { 788 | pxh[j] = port(tag, i) 789 | } else { 790 | pxh[j] = pp(tag, i, j) 791 | } 792 | } 793 | pxa[i] = Make(pxh, i, nil) 794 | } 795 | defer part(t, tag, npaxos, []int{}, []int{}, []int{}) 796 | 797 | seq := 0 798 | 799 | fmt.Printf("Test: No decision if partitioned ...\n") 800 | 801 | part(t, tag, npaxos, []int{0, 2}, []int{1, 3}, []int{4}) 802 | pxa[1].Start(seq, 111) 803 | checkmax(t, pxa, seq, 0) 804 | 805 | fmt.Printf(" ... Passed\n") 806 | 807 | fmt.Printf("Test: Decision in majority partition ...\n") 808 | 809 | part(t, tag, npaxos, []int{0}, []int{1, 2, 3}, []int{4}) 810 | time.Sleep(2 * time.Second) 811 | waitmajority(t, pxa, seq) 812 | 813 | fmt.Printf(" ... Passed\n") 814 | 815 | fmt.Printf("Test: All agree after full heal ...\n") 816 | 817 | pxa[0].Start(seq, 1000) // poke them 818 | pxa[4].Start(seq, 1004) 819 | part(t, tag, npaxos, []int{0, 1, 2, 3, 4}, []int{}, []int{}) 820 | 821 | waitn(t, pxa, seq, npaxos) 822 | 823 | fmt.Printf(" ... Passed\n") 824 | 825 | fmt.Printf("Test: One peer switches partitions ...\n") 826 | 827 | for iters := 0; iters < 20; iters++ { 828 | seq++ 829 | 830 | part(t, tag, npaxos, []int{0, 1, 2}, []int{3, 4}, []int{}) 831 | pxa[0].Start(seq, seq*10) 832 | pxa[3].Start(seq, (seq*10)+1) 833 | waitmajority(t, pxa, seq) 834 | if ndecided(t, pxa, seq) > 3 { 835 | t.Fatalf("too many decided") 836 | } 837 | 838 | part(t, tag, npaxos, []int{0, 1}, []int{2, 3, 4}, []int{}) 839 | waitn(t, pxa, seq, npaxos) 840 | } 841 | 842 | fmt.Printf(" ... Passed\n") 843 | 844 | fmt.Printf("Test: One peer switches partitions, unreliable ...\n") 845 | 846 | for iters := 0; iters < 20; iters++ { 847 | seq++ 848 | 849 | for i := 0; i < npaxos; i++ { 850 | pxa[i].setunreliable(true) 851 | } 852 | 853 | part(t, tag, npaxos, []int{0, 1, 2}, []int{3, 4}, []int{}) 854 | for i := 0; i < npaxos; i++ { 855 | pxa[i].Start(seq, (seq*10)+i) 856 | } 857 | waitn(t, pxa, seq, 3) 858 | if ndecided(t, pxa, seq) > 3 { 859 | t.Fatalf("too many decided") 860 | } 861 | 862 | part(t, tag, npaxos, []int{0, 1}, []int{2, 3, 4}, []int{}) 863 | 864 | for i := 0; i < npaxos; i++ { 865 | pxa[i].setunreliable(false) 866 | } 867 | 868 | waitn(t, pxa, seq, 5) 869 | } 870 | 871 | fmt.Printf(" ... Passed\n") 872 | fmt.Printf("diff = %d\n", time.Now().Second() - now) 873 | } 874 | 875 | func TestLots(t *testing.T) { 876 | now := time.Now().Second() 877 | runtime.GOMAXPROCS(4) 878 | 879 | fmt.Printf("Test: Many requests, changing partitions ...\n") 880 | 881 | tag := "lots" 882 | const npaxos = 5 883 | var pxa []*Paxos = make([]*Paxos, npaxos) 884 | defer cleanup(pxa) 885 | defer cleanpp(tag, npaxos) 886 | 887 | for i := 0; i < npaxos; i++ { 888 | var pxh []string = make([]string, npaxos) 889 | for j := 0; j < npaxos; j++ { 890 | if j == i { 891 | pxh[j] = port(tag, i) 892 | } else { 893 | pxh[j] = pp(tag, i, j) 894 | } 895 | } 896 | pxa[i] = Make(pxh, i, nil) 897 | pxa[i].setunreliable(true) 898 | } 899 | defer part(t, tag, npaxos, []int{}, []int{}, []int{}) 900 | 901 | done := int32(0) 902 | 903 | // re-partition periodically 904 | ch1 := make(chan bool) 905 | go func() { 906 | defer func() { ch1 <- true }() 907 | for atomic.LoadInt32(&done) == 0 { 908 | var a [npaxos]int 909 | for i := 0; i < npaxos; i++ { 910 | a[i] = (rand.Int() % 3) 911 | } 912 | pa := make([][]int, 3) 913 | for i := 0; i < 3; i++ { 914 | pa[i] = make([]int, 0) 915 | for j := 0; j < npaxos; j++ { 916 | if a[j] == i { 917 | pa[i] = append(pa[i], j) 918 | } 919 | } 920 | } 921 | part(t, tag, npaxos, pa[0], pa[1], pa[2]) 922 | time.Sleep(time.Duration(rand.Int63()%200) * time.Millisecond) 923 | } 924 | }() 925 | 926 | seq := int32(0) 927 | 928 | // periodically start a new instance 929 | ch2 := make(chan bool) 930 | go func() { 931 | defer func() { ch2 <- true }() 932 | for atomic.LoadInt32(&done) == 0 { 933 | // how many instances are in progress? 934 | nd := 0 935 | sq := int(atomic.LoadInt32(&seq)) 936 | for i := 0; i < sq; i++ { 937 | if ndecided(t, pxa, i) == npaxos { 938 | nd++ 939 | } 940 | } 941 | if sq-nd < 10 { 942 | for i := 0; i < npaxos; i++ { 943 | pxa[i].Start(sq, rand.Int()%10) 944 | } 945 | atomic.AddInt32(&seq, 1) 946 | } 947 | time.Sleep(time.Duration(rand.Int63()%300) * time.Millisecond) 948 | } 949 | }() 950 | 951 | // periodically check that decisions are consistent 952 | ch3 := make(chan bool) 953 | go func() { 954 | defer func() { ch3 <- true }() 955 | for atomic.LoadInt32(&done) == 0 { 956 | for i := 0; i < int(atomic.LoadInt32(&seq)); i++ { 957 | ndecided(t, pxa, i) 958 | } 959 | time.Sleep(time.Duration(rand.Int63()%300) * time.Millisecond) 960 | } 961 | }() 962 | 963 | time.Sleep(20 * time.Second) 964 | atomic.StoreInt32(&done, 1) 965 | <-ch1 966 | <-ch2 967 | <-ch3 968 | 969 | // repair, then check that all instances decided. 970 | for i := 0; i < npaxos; i++ { 971 | pxa[i].setunreliable(false) 972 | } 973 | part(t, tag, npaxos, []int{0, 1, 2, 3, 4}, []int{}, []int{}) 974 | time.Sleep(5 * time.Second) 975 | 976 | for i := 0; i < int(atomic.LoadInt32(&seq)); i++ { 977 | waitmajority(t, pxa, i) 978 | } 979 | 980 | fmt.Printf(" ... Passed\n") 981 | fmt.Printf("diff = %d\n", time.Now().Second() - now) 982 | } 983 | --------------------------------------------------------------------------------