├── hw1 ├── pkg │ └── mod │ │ └── cache │ │ └── lock ├── .idea │ ├── .gitignore │ ├── misc.xml │ ├── vcs.xml │ ├── modules.xml │ └── CS188-hw1.iml ├── src │ ├── .DS_Store │ ├── main │ │ ├── .DS_Store │ │ ├── mr-testout.txt │ │ ├── test-wc.sh │ │ └── wc.go │ ├── .gitignore │ └── mapreduce │ │ ├── common.go │ │ ├── worker.go │ │ ├── master.go │ │ ├── test_test.go │ │ └── mapreduce.go ├── gitpush.sh └── README.md ├── hw2 ├── .gitignore ├── .idea │ ├── .gitignore │ ├── misc.xml │ ├── vcs.xml │ ├── modules.xml │ └── hw2.iml └── src │ ├── main │ ├── viewd.go │ ├── pbd.go │ └── pbc.go │ ├── pbservice │ ├── test.sh │ ├── common.go │ ├── client.go │ ├── server.go │ └── out.txt │ └── viewservice │ ├── client.go │ ├── common.go │ ├── test_test.go │ └── server.go ├── hw3 ├── .idea │ ├── .gitignore │ ├── misc.xml │ ├── vcs.xml │ ├── modules.xml │ └── hw3.iml └── src │ ├── .DS_Store │ ├── paxos │ ├── .DS_Store │ ├── test.sh │ └── common.go │ ├── kvpaxos │ ├── out.txt │ ├── test.sh │ ├── common.go │ ├── client.go │ └── server.go │ ├── main │ ├── viewd.go │ ├── pbd.go │ └── pbc.go │ ├── pbservice │ ├── common.go │ ├── client.go │ └── server.go │ └── viewservice │ ├── client.go │ ├── common.go │ ├── server.go │ └── test_test.go ├── hw4 ├── .idea │ ├── .gitignore │ ├── misc.xml │ ├── vcs.xml │ ├── modules.xml │ └── hw4.iml └── src │ ├── main │ ├── viewd.go │ ├── pbd.go │ └── pbc.go │ ├── shardkv │ ├── test_linux.sh │ ├── test.sh │ ├── common.go │ ├── client.go │ └── test_test.go │ ├── pbservice │ ├── common.go │ ├── client.go │ └── server.go │ ├── paxos │ └── common.go │ ├── kvpaxos │ ├── common.go │ ├── client.go │ └── server.go │ ├── shardmaster │ ├── common.go │ ├── client.go │ ├── test_test.go │ └── server.go │ └── viewservice │ ├── client.go │ ├── common.go │ ├── server.go │ └── test_test.go ├── gitpush.sh └── README.md /hw1/pkg/mod/cache/lock: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hw2/.gitignore: -------------------------------------------------------------------------------- 1 | */out.txt 2 | -------------------------------------------------------------------------------- /hw1/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /workspace.xml 3 | -------------------------------------------------------------------------------- /hw2/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /workspace.xml 3 | -------------------------------------------------------------------------------- /hw3/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /workspace.xml 3 | -------------------------------------------------------------------------------- /hw4/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /workspace.xml 3 | -------------------------------------------------------------------------------- /hw1/src/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AdrianHsu/MIT-6.824-Lab/HEAD/hw1/src/.DS_Store -------------------------------------------------------------------------------- /hw3/src/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AdrianHsu/MIT-6.824-Lab/HEAD/hw3/src/.DS_Store -------------------------------------------------------------------------------- /hw1/src/main/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AdrianHsu/MIT-6.824-Lab/HEAD/hw1/src/main/.DS_Store -------------------------------------------------------------------------------- /hw3/src/paxos/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AdrianHsu/MIT-6.824-Lab/HEAD/hw3/src/paxos/.DS_Store -------------------------------------------------------------------------------- /hw1/src/.gitignore: -------------------------------------------------------------------------------- 1 | mrtmp.* 2 | /main/diff.out 3 | /mapreduce/x.txt 4 | /pbservice/x.txt 5 | /kvpaxos/x.txt 6 | -------------------------------------------------------------------------------- /hw3/src/kvpaxos/out.txt: -------------------------------------------------------------------------------- 1 | round: 1 2 | round: 2 3 | round: 3 4 | round: 4 5 | round: 5 6 | round: 6 7 | round: 7 8 | -------------------------------------------------------------------------------- /gitpush.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | DATE=$(date) 4 | #echo "$DATE" 5 | 6 | git add . 7 | git commit -m "[UPDATED] $DATE :bulb:" # :tada: 8 | git push 9 | -------------------------------------------------------------------------------- /hw1/gitpush.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | DATE=$(date) 4 | #echo "$DATE" 5 | 6 | git add . 7 | git commit -m "[UPDATED] $DATE :bulb:" # :tada: 8 | git push 9 | -------------------------------------------------------------------------------- /hw1/src/main/mr-testout.txt: -------------------------------------------------------------------------------- 1 | unto: 8940 2 | he: 9666 3 | shall: 9760 4 | in: 12334 5 | that: 12577 6 | And: 12846 7 | to: 13384 8 | of: 34434 9 | and: 38850 10 | the: 62075 11 | -------------------------------------------------------------------------------- /hw1/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | -------------------------------------------------------------------------------- /hw2/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | -------------------------------------------------------------------------------- /hw3/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | -------------------------------------------------------------------------------- /hw4/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | -------------------------------------------------------------------------------- /hw1/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /hw2/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /hw3/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /hw4/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /hw2/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /hw3/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /hw4/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /hw1/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /hw2/.idea/hw2.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /hw3/.idea/hw3.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /hw4/.idea/hw4.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /hw1/.idea/CS188-hw1.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /hw1/src/main/test-wc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | go run wc.go master kjv12.txt sequential 3 | sort -n -k2 mrtmp.kjv12.txt | tail -10 | diff - mr-testout.txt > diff.out 4 | if [ -s diff.out ] 5 | then 6 | echo "Failed test. Output should be as in mr-testout.txt. Your output differs as follows (from diff.out):" 7 | cat diff.out 8 | else 9 | echo "Passed test" 10 | fi 11 | 12 | -------------------------------------------------------------------------------- /hw2/src/main/viewd.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // see directions in pbc.go 5 | // 6 | 7 | import "time" 8 | import "viewservice" 9 | import "os" 10 | import "fmt" 11 | 12 | func main() { 13 | if len(os.Args) != 2 { 14 | fmt.Printf("Usage: viewd port\n") 15 | os.Exit(1) 16 | } 17 | 18 | viewservice.StartServer(os.Args[1]) 19 | 20 | for { 21 | time.Sleep(100 * time.Second) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /hw3/src/main/viewd.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // see directions in pbc.go 5 | // 6 | 7 | import "time" 8 | import "viewservice" 9 | import "os" 10 | import "fmt" 11 | 12 | func main() { 13 | if len(os.Args) != 2 { 14 | fmt.Printf("Usage: viewd port\n") 15 | os.Exit(1) 16 | } 17 | 18 | viewservice.StartServer(os.Args[1]) 19 | 20 | for { 21 | time.Sleep(100 * time.Second) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /hw4/src/main/viewd.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // see directions in pbc.go 5 | // 6 | 7 | import "time" 8 | import "viewservice" 9 | import "os" 10 | import "fmt" 11 | 12 | func main() { 13 | if len(os.Args) != 2 { 14 | fmt.Printf("Usage: viewd port\n") 15 | os.Exit(1) 16 | } 17 | 18 | viewservice.StartServer(os.Args[1]) 19 | 20 | for { 21 | time.Sleep(100 * time.Second) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /hw2/src/main/pbd.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // see directions in pbc.go 5 | // 6 | 7 | import "time" 8 | import "pbservice" 9 | import "os" 10 | import "fmt" 11 | 12 | func main() { 13 | if len(os.Args) != 3 { 14 | fmt.Printf("Usage: pbd viewport myport\n") 15 | os.Exit(1) 16 | } 17 | 18 | pbservice.StartServer(os.Args[1], os.Args[2]) 19 | 20 | for { 21 | time.Sleep(100 * time.Second) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /hw3/src/main/pbd.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // see directions in pbc.go 5 | // 6 | 7 | import "time" 8 | import "pbservice" 9 | import "os" 10 | import "fmt" 11 | 12 | func main() { 13 | if len(os.Args) != 3 { 14 | fmt.Printf("Usage: pbd viewport myport\n") 15 | os.Exit(1) 16 | } 17 | 18 | pbservice.StartServer(os.Args[1], os.Args[2]) 19 | 20 | for { 21 | time.Sleep(100 * time.Second) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /hw4/src/main/pbd.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // see directions in pbc.go 5 | // 6 | 7 | import "time" 8 | import "pbservice" 9 | import "os" 10 | import "fmt" 11 | 12 | func main() { 13 | if len(os.Args) != 3 { 14 | fmt.Printf("Usage: pbd viewport myport\n") 15 | os.Exit(1) 16 | } 17 | 18 | pbservice.StartServer(os.Args[1], os.Args[2]) 19 | 20 | for { 21 | time.Sleep(100 * time.Second) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /hw3/src/kvpaxos/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | rm out.txt 4 | 5 | for i in {1..60} 6 | 7 | do 8 | echo "round: $i" | tee -a ./out.txt 9 | go test | tee -a out.txt 10 | sed -i '' '/unexpected EOF/d' ./out.txt 11 | sed -i '' '/write unix/d' ./out.txt 12 | sed -i '' '/read unix/d' ./out.txt 13 | sed -i '' '/connection is/d' ./out.txt 14 | sed -i '' '/rpc.Register/d' ./out.txt 15 | sed -i '' '/paxos Dial() failed/d' ./out.txt 16 | done 17 | cat out.txt 18 | -------------------------------------------------------------------------------- /hw3/src/paxos/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | rm out.txt 4 | 5 | for i in {1..60} 6 | 7 | do 8 | echo "round: $i" | tee -a ./out.txt 9 | go test | tee -a out.txt 10 | sed -i '' '/unexpected EOF/d' ./out.txt 11 | sed -i '' '/write unix/d' ./out.txt 12 | sed -i '' '/read unix/d' ./out.txt 13 | sed -i '' '/connection is/d' ./out.txt 14 | sed -i '' '/rpc.Register/d' ./out.txt 15 | sed -i '' '/paxos Dial() failed/d' ./out.txt 16 | done 17 | cat out.txt 18 | -------------------------------------------------------------------------------- /hw2/src/pbservice/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | rm out.txt 4 | 5 | for i in {1..10}; 6 | do go test | tee -a out.txt; done 7 | 8 | sed -i '' '/ForwardTest: NOT CURRENT PRIMARY/d' ./out.txt 9 | sed -i '' '/PutAppend: NOT THE PRIMARY YET/d' ./out.txt 10 | 11 | sed -i '' '/unexpected EOF/d' ./out.txt 12 | sed -i '' '/write unix/d' ./out.txt 13 | sed -i '' '/connection is shut down/d' ./out.txt 14 | sed -i '' '/rpc.Register/d' ./out.txt 15 | sed -i '' '/connection reset by peer/d' ./out.txt 16 | -------------------------------------------------------------------------------- /hw4/src/shardkv/test_linux.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | rm out.txt 4 | 5 | for i in {1..1} 6 | 7 | do 8 | echo "round: $i" | tee -a ./out.txt 9 | go test | tee -a out.txt 10 | sed -i '/unexpected EOF/d' ./out.txt 11 | sed -i '/write unix/d' ./out.txt 12 | sed -i '/read unix/d' ./out.txt 13 | sed -i '/connection is/d' ./out.txt 14 | sed -i '/rpc.Register/d' ./out.txt 15 | sed -i '/paxos Dial() failed/d' ./out.txt 16 | sed -i '/ShardKV:/d' ./out.txt 17 | sed -i '/ShardMaster:/d' ./out.txt 18 | sed -i '/Timeout:/d' ./out.txt 19 | done 20 | cat out.txt 21 | -------------------------------------------------------------------------------- /hw4/src/shardkv/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | rm out.txt 4 | 5 | for i in {1..1} 6 | 7 | do 8 | echo "round: $i" | tee -a ./out.txt 9 | go test | tee -a out.txt 10 | sed -i '' '/unexpected EOF/d' ./out.txt 11 | sed -i '' '/write unix/d' ./out.txt 12 | sed -i '' '/read unix/d' ./out.txt 13 | sed -i '' '/connection is/d' ./out.txt 14 | sed -i '' '/rpc.Register/d' ./out.txt 15 | sed -i '' '/paxos Dial() failed/d' ./out.txt 16 | sed -i '' '/ShardKV:/d' ./out.txt 17 | sed -i '' '/ShardMaster:/d' ./out.txt 18 | sed -i '' '/Timeout:/d' ./out.txt 19 | done 20 | cat out.txt 21 | -------------------------------------------------------------------------------- /hw3/src/pbservice/common.go: -------------------------------------------------------------------------------- 1 | package pbservice 2 | 3 | const ( 4 | OK = "OK" 5 | ErrNoKey = "ErrNoKey" 6 | ErrWrongServer = "ErrWrongServer" 7 | ) 8 | 9 | type Err string 10 | 11 | // Put or Append 12 | type PutAppendArgs struct { 13 | Key string 14 | Value string 15 | // You'll have to add definitions here. 16 | 17 | // Field names must start with capital letters, 18 | // otherwise RPC will break. 19 | } 20 | 21 | type PutAppendReply struct { 22 | Err Err 23 | } 24 | 25 | type GetArgs struct { 26 | Key string 27 | // You'll have to add definitions here. 28 | } 29 | 30 | type GetReply struct { 31 | Err Err 32 | Value string 33 | } 34 | 35 | 36 | // Your RPC definitions here. 37 | -------------------------------------------------------------------------------- /hw4/src/pbservice/common.go: -------------------------------------------------------------------------------- 1 | package pbservice 2 | 3 | const ( 4 | OK = "OK" 5 | ErrNoKey = "ErrNoKey" 6 | ErrWrongServer = "ErrWrongServer" 7 | ) 8 | 9 | type Err string 10 | 11 | // Put or Append 12 | type PutAppendArgs struct { 13 | Key string 14 | Value string 15 | // You'll have to add definitions here. 16 | 17 | // Field names must start with capital letters, 18 | // otherwise RPC will break. 19 | } 20 | 21 | type PutAppendReply struct { 22 | Err Err 23 | } 24 | 25 | type GetArgs struct { 26 | Key string 27 | // You'll have to add definitions here. 28 | } 29 | 30 | type GetReply struct { 31 | Err Err 32 | Value string 33 | } 34 | 35 | 36 | // Your RPC definitions here. 37 | -------------------------------------------------------------------------------- /hw3/src/kvpaxos/common.go: -------------------------------------------------------------------------------- 1 | package kvpaxos 2 | 3 | const ( 4 | OK = "OK" 5 | ErrNoKey = "ErrNoKey" 6 | ) 7 | 8 | type Err string 9 | 10 | // Put or Append 11 | type PutAppendArgs struct { 12 | // You'll have to add definitions here. 13 | Key string 14 | Value string 15 | Op string // "Put" or "Append" 16 | Hash int64 17 | // You'll have to add definitions here. 18 | // Field names must start with capital letters, 19 | // otherwise RPC will break. 20 | } 21 | 22 | type PutAppendReply struct { 23 | Err Err 24 | } 25 | 26 | type GetArgs struct { 27 | Key string 28 | Hash int64 29 | // You'll have to add definitions here. 30 | } 31 | 32 | type GetReply struct { 33 | Err Err 34 | Value string 35 | } 36 | -------------------------------------------------------------------------------- /hw3/src/paxos/common.go: -------------------------------------------------------------------------------- 1 | package paxos 2 | // added by Adrian 3 | 4 | type Err string 5 | 6 | 7 | type PrepareArgs struct { 8 | Seq int 9 | N int 10 | } 11 | 12 | type PrepareReply struct { 13 | Err Err 14 | N int 15 | N_a int 16 | V_a interface{} 17 | Z_i int 18 | Higher_N int 19 | } 20 | 21 | type AcceptArgs struct { 22 | Seq int 23 | N int 24 | V_p interface{} // v prime 25 | } 26 | 27 | type AcceptReply struct { 28 | Err Err 29 | N int 30 | } 31 | 32 | type DecidedArgs struct { 33 | Seq int 34 | N int 35 | V_p interface{} 36 | } 37 | 38 | type DecidedReply struct { 39 | Err Err 40 | } -------------------------------------------------------------------------------- /hw4/src/paxos/common.go: -------------------------------------------------------------------------------- 1 | package paxos 2 | // added by Adrian 3 | 4 | type Err string 5 | 6 | 7 | type PrepareArgs struct { 8 | Seq int 9 | N int 10 | } 11 | 12 | type PrepareReply struct { 13 | Err Err 14 | N int 15 | N_a int 16 | V_a interface{} 17 | Z_i int 18 | Higher_N int 19 | } 20 | 21 | type AcceptArgs struct { 22 | Seq int 23 | N int 24 | V_p interface{} // v prime 25 | } 26 | 27 | type AcceptReply struct { 28 | Err Err 29 | N int 30 | } 31 | 32 | type DecidedArgs struct { 33 | Seq int 34 | N int 35 | V_p interface{} 36 | } 37 | 38 | type DecidedReply struct { 39 | Err Err 40 | } -------------------------------------------------------------------------------- /hw4/src/kvpaxos/common.go: -------------------------------------------------------------------------------- 1 | package kvpaxos 2 | 3 | const ( 4 | OK = "OK" 5 | ErrNoKey = "ErrNoKey" 6 | ) 7 | 8 | type Err string 9 | 10 | // Put or Append 11 | type PutAppendArgs struct { 12 | // You'll have to add definitions here. 13 | Key string 14 | Value string 15 | Op string // "Put" or "Append" 16 | // You'll have to add definitions here. 17 | // Field names must start with capital letters, 18 | // otherwise RPC will break. 19 | ClientID int64 20 | Seq int 21 | } 22 | 23 | type PutAppendReply struct { 24 | Err Err 25 | } 26 | 27 | type GetArgs struct { 28 | Key string 29 | Op string 30 | // You'll have to add definitions here. 31 | ClientID int64 32 | Seq int 33 | } 34 | 35 | type GetReply struct { 36 | Err Err 37 | Value string 38 | } 39 | -------------------------------------------------------------------------------- /hw2/src/main/pbc.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // pbservice client application 5 | // 6 | // go build viewd.go 7 | // go build pbd.go 8 | // go build pbc.go 9 | // ./viewd /tmp/rtm-v & 10 | // ./pbd /tmp/rtm-v /tmp/rtm-1 & 11 | // ./pbd /tmp/rtm-v /tmp/rtm-2 & 12 | // ./pbc /tmp/rtm-v key1 value1 13 | // ./pbc /tmp/rtm-v key1 14 | // 15 | // change "rtm" to your user name. 16 | // start the pbd programs in separate windows, kill them 17 | // and then restart them to exercise fault tolerance. 18 | // 19 | 20 | import "pbservice" 21 | import "os" 22 | import "fmt" 23 | 24 | func usage() { 25 | fmt.Printf("Usage: pbc viewport key\n") 26 | fmt.Printf(" pbc viewport key value\n") 27 | os.Exit(1) 28 | } 29 | 30 | func main() { 31 | if len(os.Args) == 3 { 32 | // get 33 | ck := pbservice.MakeClerk(os.Args[1], "") 34 | v := ck.Get(os.Args[2]) 35 | fmt.Printf("%v\n", v) 36 | } else if len(os.Args) == 4 { 37 | // put 38 | ck := pbservice.MakeClerk(os.Args[1], "") 39 | ck.Put(os.Args[2], os.Args[3]) 40 | } else { 41 | usage() 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /hw3/src/main/pbc.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // pbservice client application 5 | // 6 | // go build viewd.go 7 | // go build pbd.go 8 | // go build pbc.go 9 | // ./viewd /tmp/rtm-v & 10 | // ./pbd /tmp/rtm-v /tmp/rtm-1 & 11 | // ./pbd /tmp/rtm-v /tmp/rtm-2 & 12 | // ./pbc /tmp/rtm-v key1 value1 13 | // ./pbc /tmp/rtm-v key1 14 | // 15 | // change "rtm" to your user name. 16 | // start the pbd programs in separate windows, kill them 17 | // and then restart them to exercise fault tolerance. 18 | // 19 | 20 | import "pbservice" 21 | import "os" 22 | import "fmt" 23 | 24 | func usage() { 25 | fmt.Printf("Usage: pbc viewport key\n") 26 | fmt.Printf(" pbc viewport key value\n") 27 | os.Exit(1) 28 | } 29 | 30 | func main() { 31 | if len(os.Args) == 3 { 32 | // get 33 | ck := pbservice.MakeClerk(os.Args[1], "") 34 | v := ck.Get(os.Args[2]) 35 | fmt.Printf("%v\n", v) 36 | } else if len(os.Args) == 4 { 37 | // put 38 | ck := pbservice.MakeClerk(os.Args[1], "") 39 | ck.Put(os.Args[2], os.Args[3]) 40 | } else { 41 | usage() 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /hw4/src/main/pbc.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // pbservice client application 5 | // 6 | // go build viewd.go 7 | // go build pbd.go 8 | // go build pbc.go 9 | // ./viewd /tmp/rtm-v & 10 | // ./pbd /tmp/rtm-v /tmp/rtm-1 & 11 | // ./pbd /tmp/rtm-v /tmp/rtm-2 & 12 | // ./pbc /tmp/rtm-v key1 value1 13 | // ./pbc /tmp/rtm-v key1 14 | // 15 | // change "rtm" to your user name. 16 | // start the pbd programs in separate windows, kill them 17 | // and then restart them to exercise fault tolerance. 18 | // 19 | 20 | import "pbservice" 21 | import "os" 22 | import "fmt" 23 | 24 | func usage() { 25 | fmt.Printf("Usage: pbc viewport key\n") 26 | fmt.Printf(" pbc viewport key value\n") 27 | os.Exit(1) 28 | } 29 | 30 | func main() { 31 | if len(os.Args) == 3 { 32 | // get 33 | ck := pbservice.MakeClerk(os.Args[1], "") 34 | v := ck.Get(os.Args[2]) 35 | fmt.Printf("%v\n", v) 36 | } else if len(os.Args) == 4 { 37 | // put 38 | ck := pbservice.MakeClerk(os.Args[1], "") 39 | ck.Put(os.Args[2], os.Args[3]) 40 | } else { 41 | usage() 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /hw2/src/pbservice/common.go: -------------------------------------------------------------------------------- 1 | package pbservice 2 | 3 | const ( 4 | OK = "OK" 5 | ErrNoKey = "ErrNoKey" 6 | ErrWrongServer = "ErrWrongServer" 7 | ) 8 | 9 | type Err string 10 | 11 | // Put or Append 12 | type PutAppendArgs struct { 13 | Key string 14 | Value string 15 | // You'll have to add definitions here. 16 | // Field names must start with capital letters, 17 | // otherwise RPC will break. 18 | Op string 19 | HashVal int64 20 | } 21 | 22 | type PutAppendReply struct { 23 | Err Err 24 | } 25 | 26 | // Put or Append 27 | type PutAppendSyncArgs struct { 28 | Key string 29 | Value string 30 | // You'll have to add definitions here. 31 | // Field names must start with capital letters, 32 | // otherwise RPC will break. 33 | Op string 34 | HashVal int64 35 | Primary string 36 | 37 | } 38 | 39 | type PutAppendSyncReply struct { 40 | Err Err 41 | } 42 | 43 | type GetArgs struct { 44 | Key string 45 | // You'll have to add definitions here. 46 | } 47 | 48 | type GetReply struct { 49 | Err Err 50 | Value string 51 | } 52 | 53 | type GetSyncArgs struct { 54 | Key string 55 | // You'll have to add definitions here. 56 | Primary string 57 | } 58 | 59 | type GetSyncReply struct { 60 | Err Err 61 | Value string 62 | } 63 | 64 | // Your RPC definitions here. 65 | type BootstrapArgs struct { 66 | Database map[string]string 67 | HashVals map[int64]bool 68 | } 69 | 70 | type BootstrapReply struct { 71 | Err Err 72 | } -------------------------------------------------------------------------------- /hw4/src/shardkv/common.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | // 4 | // Sharded key/value server. 5 | // Lots of replica groups, each running op-at-a-time paxos. 6 | // Shardmaster decides which group serves each shard. 7 | // Shardmaster may change shard assignment from time to time. 8 | // 9 | // You will have to modify these definitions. 10 | // 11 | 12 | const ( 13 | OK = "OK" 14 | ErrNoKey = "ErrNoKey" 15 | ErrWrongGroup = "ErrWrongGroup" 16 | ErrNotReady = "ErrNotReady" 17 | ) 18 | 19 | type Err string 20 | 21 | type PutAppendArgs struct { 22 | Key string 23 | Value string 24 | Op string // "Put" or "Append" 25 | 26 | ID int64 // client ID (each client has an unique id) 27 | // client's seq. everytime it performs put/get/append 28 | // its seq will += 1 29 | Seq int 30 | ConfigNum int // Number in the clients' config 31 | Shard int // from 0 ~ 9. the index of shards 32 | } 33 | 34 | type PutAppendReply struct { 35 | Err Err 36 | } 37 | 38 | type GetArgs struct { 39 | Key string 40 | 41 | ID int64 42 | Seq int 43 | ConfigNum int 44 | Shard int 45 | } 46 | 47 | type GetReply struct { 48 | Err Err 49 | Value string 50 | } 51 | 52 | type BootstrapArgs struct { 53 | Shard int 54 | ConfigNum int 55 | } 56 | 57 | type BootstrapReply struct { 58 | ShardState ShardState 59 | ProducerGID int64 60 | ConfigNum int 61 | Shard int 62 | Err string 63 | } 64 | 65 | type ReconfigureArgs struct { 66 | NewConfigNum int 67 | } -------------------------------------------------------------------------------- /hw4/src/shardmaster/common.go: -------------------------------------------------------------------------------- 1 | package shardmaster 2 | 3 | // 4 | // Master shard server: assigns shards to replication groups. 5 | // 6 | // RPC interface: 7 | // Join(gid, servers) -- replica group gid is joining, give it some shards. 8 | // Leave(gid) -- replica group gid is retiring, hand off all its shards. 9 | // Move(shard, gid) -- hand off one shard from current owner to gid. 10 | // Query(num) -> fetch Config # num, or latest config if num==-1. 11 | // 12 | // A Config (configuration) describes a set of replica groups, and the 13 | // replica group responsible for each shard. Configs are numbered. Config 14 | // #0 is the initial configuration, with no groups and all shards 15 | // assigned to group 0 (the invalid group). 16 | // 17 | // A GID is a replica group ID. GIDs must be uniqe and > 0. 18 | // Once a GID joins, and leaves, it should never join again. 19 | // 20 | // Please don't change this file. 21 | // 22 | 23 | const NShards = 10 24 | 25 | type Config struct { 26 | Num int // config number 27 | Shards [NShards]int64 // shard -> gid 28 | Groups map[int64][]string // gid -> servers[] 29 | } 30 | 31 | type JoinArgs struct { 32 | GID int64 // unique replica group ID 33 | Servers []string // group server ports 34 | } 35 | 36 | type JoinReply struct { 37 | } 38 | 39 | type LeaveArgs struct { 40 | GID int64 41 | } 42 | 43 | type LeaveReply struct { 44 | } 45 | 46 | type MoveArgs struct { 47 | Shard int 48 | GID int64 49 | } 50 | 51 | type MoveReply struct { 52 | } 53 | 54 | type QueryArgs struct { 55 | Num int // desired config number 56 | } 57 | 58 | type QueryReply struct { 59 | Config Config 60 | } 61 | -------------------------------------------------------------------------------- /hw1/src/main/wc.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | "strconv" 6 | "strings" 7 | "unicode" 8 | ) 9 | import "fmt" 10 | import "mapreduce" 11 | 12 | import "container/list" 13 | 14 | // our simplified version of MapReduce does not supply a 15 | // key to the Map function, as in the paper; only a value, 16 | // which is a part of the input file content. the return 17 | // value should be a list of key/value pairs, each represented 18 | // by a mapreduce.KeyValue. 19 | func Map(value string) *list.List { 20 | f := func(c rune) bool { 21 | return !unicode.IsLetter(c) 22 | } 23 | s := strings.FieldsFunc(value, f) 24 | 25 | li := list.New() 26 | for _, v := range s { 27 | //fmt.Printf("%v, %v\n", i, v) 28 | li.PushBack(mapreduce.KeyValue{v, "1"}) 29 | } 30 | return li 31 | } 32 | 33 | // called once for each key generated by Map, with a list 34 | // of that key's string value. should return a single 35 | // output value for that key. 36 | func Reduce(key string, values *list.List) string { 37 | total := values.Len() 38 | return strconv.Itoa(total) 39 | } 40 | 41 | // Can be run in 3 ways: 42 | // 1) Sequential (e.g., go run wc.go master x.txt sequential) 43 | // 2) Master (e.g., go run wc.go master x.txt localhost:7777) 44 | // 3) Worker (e.g., go run wc.go worker localhost:7777 localhost:7778 &) 45 | func main() { 46 | if len(os.Args) != 4 { 47 | fmt.Printf("%v", os.Args) 48 | fmt.Printf("%s: see usage comments in file\n", os.Args[0]) 49 | } else if os.Args[1] == "master" { 50 | if os.Args[3] == "sequential" { 51 | mapreduce.RunSingle(5, 3, os.Args[2], Map, Reduce) 52 | } else { 53 | mr := mapreduce.MakeMapReduce(5, 3, os.Args[2], os.Args[3]) 54 | // Wait until MR is done 55 | <-mr.DoneChannel 56 | } 57 | } else { 58 | mapreduce.RunWorker(os.Args[2], os.Args[3], Map, Reduce, 100) 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /hw1/src/mapreduce/common.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | import "net/rpc" 7 | 8 | const ( 9 | Map = "Map" 10 | Reduce = "Reduce" 11 | ) 12 | 13 | type JobType string 14 | 15 | // RPC arguments and replies. Field names must start with capital letters, 16 | // otherwise RPC will break. 17 | 18 | type DoJobArgs struct { 19 | File string 20 | Operation JobType 21 | JobNumber int // this job's number 22 | NumOtherPhase int // total number of jobs in other phase (map or reduce) 23 | // for example, if we are in Map Phase, then NumOtherPhase = nReduce = 50 24 | } 25 | 26 | type DoJobReply struct { 27 | OK bool 28 | } 29 | 30 | type ShutdownArgs struct { 31 | } 32 | 33 | type ShutdownReply struct { 34 | Njobs int 35 | OK bool 36 | } 37 | 38 | type RegisterArgs struct { 39 | Worker string 40 | } 41 | 42 | type RegisterReply struct { 43 | OK bool 44 | } 45 | 46 | // 47 | // call() sends an RPC to the rpcname handler on server srv 48 | // with arguments args, waits for the reply, and leaves the 49 | // reply in reply. the reply argument should be the address 50 | // of a reply structure. 51 | // 52 | // call() returns true if the server responded, and false 53 | // if call() was not able to contact the server. in particular, 54 | // reply's contents are valid if and only if call() returned true. 55 | // 56 | // you should assume that call() will time out and return an 57 | // error after a while if it doesn't get a reply from the server. 58 | // 59 | // please use call() to send all RPCs, in master.go, mapreduce.go, 60 | // and worker.go. please don't change this function. 61 | // 62 | func call(srv string, rpcname string, 63 | args interface{}, reply interface{}) bool { 64 | c, errx := rpc.Dial("unix", srv) 65 | if errx != nil { 66 | return false 67 | } 68 | defer c.Close() 69 | 70 | err := c.Call(rpcname, args, reply) 71 | if err == nil { 72 | return true 73 | } 74 | 75 | fmt.Println(err) 76 | return false 77 | } 78 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## MIT 6.824: Distributed Systems 2 | 3 | > 🏃 MIT 6.824 is where my journey of Distributed Systems began. All projects are my own individual works. 4 | 5 | ### TODO 6 | 7 | 1. MapReduce 8 | * ✅ (Mar 23, 2020) 9 | * Write a simple MapReduce program, and then Build a MapReduce library of which the master hands out jobs to workers, and handles failures of workers. 10 | 2. Primary Backup Replication Key/Value Service 11 | * ✅ (Jun 27, 2020) 12 | * Uses primary/backup replication, assisted by a view service that decides which machines are alive. The view service allows the primary/backup service to work correctly in the presence of network partitions. The view service itself is not replicated, and is a single point of failure. 13 | 3. Paxos-based Key/Value Service 14 | * ✅ (Jul 3, 2020) 15 | * Uses Paxos protocol to replicate the key/value database with no single point of failure, and handles network partitions correctly. This key/value service is slower than a non-replicated key/value server would be, but is fault tolerant. 16 | 4. Sharded Key/Value Service based on Paxos 17 | * ✅ (Jul 14, 2020) 18 | * A sharded key/value database where each shard replicates its state using Paxos. This service can perform Put/Get operations in parallel on different shards. It also has a replicated configuration service, which tells the shards what key range they are responsible for. It can change the assignment of keys to shards in response to changing load. This project has the core of a real-world design for thousands of servers. 19 | 20 | ### Course Description 21 | 22 | MIT 6.824 is a core 12-unit graduate subject with lectures, readings, programming labs, an optional project, a mid-term exam, and a final exam. It will present abstractions and implementation techniques for engineering distributed systems. Major topics include fault tolerance, replication, and consistency. Much of the class consists of studying and discussing case studies of distributed systems. 23 | 24 | ### References 25 | * [MIT 6.824 Spring 2015](http://nil.csail.mit.edu/6.824/2015/) 26 | * [UCLA CS 134 Spring 2020](http://web.cs.ucla.edu/~ravi/CS134_S20/) 27 | -------------------------------------------------------------------------------- /hw2/src/viewservice/client.go: -------------------------------------------------------------------------------- 1 | package viewservice 2 | 3 | import "net/rpc" 4 | import "fmt" 5 | 6 | // 7 | // the viewservice Clerk lives in the client 8 | // and maintains a little state. 9 | // 10 | type Clerk struct { 11 | me string // client's name (host:port) 12 | server string // viewservice's host:port 13 | } 14 | 15 | func MakeClerk(me string, server string) *Clerk { 16 | ck := new(Clerk) 17 | ck.me = me 18 | ck.server = server 19 | return ck 20 | } 21 | 22 | // 23 | // call() sends an RPC to the rpcname handler on server srv 24 | // with arguments args, waits for the reply, and leaves the 25 | // reply in reply. the reply argument should be a pointer 26 | // to a reply structure. 27 | // 28 | // the return value is true if the server responded, and false 29 | // if call() was not able to contact the server. in particular, 30 | // the reply's contents are only valid if call() returned true. 31 | // 32 | // you should assume that call() will return an 33 | // error after a while if the server is dead. 34 | // don't provide your own time-out mechanism. 35 | // 36 | // please use call() to send all RPCs, in client.go and server.go. 37 | // please don't change this function. 38 | // 39 | func call(srv string, rpcname string, 40 | args interface{}, reply interface{}) bool { 41 | c, errx := rpc.Dial("unix", srv) 42 | if errx != nil { 43 | return false 44 | } 45 | defer c.Close() 46 | 47 | err := c.Call(rpcname, args, reply) 48 | if err == nil { 49 | return true 50 | } 51 | 52 | fmt.Println(err) 53 | return false 54 | } 55 | 56 | func (ck *Clerk) Ping(viewnum uint) (View, error) { 57 | // prepare the arguments. 58 | args := &PingArgs{} 59 | args.Me = ck.me 60 | args.Viewnum = viewnum 61 | var reply PingReply 62 | 63 | // send an RPC request, wait for the reply. 64 | ok := call(ck.server, "ViewServer.Ping", args, &reply) 65 | if ok == false { 66 | return View{}, fmt.Errorf("Ping(%v) failed", viewnum) 67 | } 68 | 69 | return reply.View, nil 70 | } 71 | 72 | func (ck *Clerk) Get() (View, bool) { 73 | args := &GetArgs{} 74 | var reply GetReply 75 | ok := call(ck.server, "ViewServer.Get", args, &reply) 76 | if ok == false { 77 | return View{}, false 78 | } 79 | return reply.View, true 80 | } 81 | 82 | func (ck *Clerk) Primary() string { 83 | v, ok := ck.Get() 84 | if ok { 85 | return v.Primary 86 | } 87 | return "" 88 | } 89 | -------------------------------------------------------------------------------- /hw3/src/viewservice/client.go: -------------------------------------------------------------------------------- 1 | package viewservice 2 | 3 | import "net/rpc" 4 | import "fmt" 5 | 6 | // 7 | // the viewservice Clerk lives in the client 8 | // and maintains a little state. 9 | // 10 | type Clerk struct { 11 | me string // client's name (host:port) 12 | server string // viewservice's host:port 13 | } 14 | 15 | func MakeClerk(me string, server string) *Clerk { 16 | ck := new(Clerk) 17 | ck.me = me 18 | ck.server = server 19 | return ck 20 | } 21 | 22 | // 23 | // call() sends an RPC to the rpcname handler on server srv 24 | // with arguments args, waits for the reply, and leaves the 25 | // reply in reply. the reply argument should be a pointer 26 | // to a reply structure. 27 | // 28 | // the return value is true if the server responded, and false 29 | // if call() was not able to contact the server. in particular, 30 | // the reply's contents are only valid if call() returned true. 31 | // 32 | // you should assume that call() will return an 33 | // error after a while if the server is dead. 34 | // don't provide your own time-out mechanism. 35 | // 36 | // please use call() to send all RPCs, in client.go and server.go. 37 | // please don't change this function. 38 | // 39 | func call(srv string, rpcname string, 40 | args interface{}, reply interface{}) bool { 41 | c, errx := rpc.Dial("unix", srv) 42 | if errx != nil { 43 | return false 44 | } 45 | defer c.Close() 46 | 47 | err := c.Call(rpcname, args, reply) 48 | if err == nil { 49 | return true 50 | } 51 | 52 | fmt.Println(err) 53 | return false 54 | } 55 | 56 | func (ck *Clerk) Ping(viewnum uint) (View, error) { 57 | // prepare the arguments. 58 | args := &PingArgs{} 59 | args.Me = ck.me 60 | args.Viewnum = viewnum 61 | var reply PingReply 62 | 63 | // send an RPC request, wait for the reply. 64 | ok := call(ck.server, "ViewServer.Ping", args, &reply) 65 | if ok == false { 66 | return View{}, fmt.Errorf("Ping(%v) failed", viewnum) 67 | } 68 | 69 | return reply.View, nil 70 | } 71 | 72 | func (ck *Clerk) Get() (View, bool) { 73 | args := &GetArgs{} 74 | var reply GetReply 75 | ok := call(ck.server, "ViewServer.Get", args, &reply) 76 | if ok == false { 77 | return View{}, false 78 | } 79 | return reply.View, true 80 | } 81 | 82 | func (ck *Clerk) Primary() string { 83 | v, ok := ck.Get() 84 | if ok { 85 | return v.Primary 86 | } 87 | return "" 88 | } 89 | -------------------------------------------------------------------------------- /hw4/src/viewservice/client.go: -------------------------------------------------------------------------------- 1 | package viewservice 2 | 3 | import "net/rpc" 4 | import "fmt" 5 | 6 | // 7 | // the viewservice Clerk lives in the client 8 | // and maintains a little state. 9 | // 10 | type Clerk struct { 11 | me string // client's name (host:port) 12 | server string // viewservice's host:port 13 | } 14 | 15 | func MakeClerk(me string, server string) *Clerk { 16 | ck := new(Clerk) 17 | ck.me = me 18 | ck.server = server 19 | return ck 20 | } 21 | 22 | // 23 | // call() sends an RPC to the rpcname handler on server srv 24 | // with arguments args, waits for the reply, and leaves the 25 | // reply in reply. the reply argument should be a pointer 26 | // to a reply structure. 27 | // 28 | // the return value is true if the server responded, and false 29 | // if call() was not able to contact the server. in particular, 30 | // the reply's contents are only valid if call() returned true. 31 | // 32 | // you should assume that call() will return an 33 | // error after a while if the server is dead. 34 | // don't provide your own time-out mechanism. 35 | // 36 | // please use call() to send all RPCs, in client.go and server.go. 37 | // please don't change this function. 38 | // 39 | func call(srv string, rpcname string, 40 | args interface{}, reply interface{}) bool { 41 | c, errx := rpc.Dial("unix", srv) 42 | if errx != nil { 43 | return false 44 | } 45 | defer c.Close() 46 | 47 | err := c.Call(rpcname, args, reply) 48 | if err == nil { 49 | return true 50 | } 51 | 52 | fmt.Println(err) 53 | return false 54 | } 55 | 56 | func (ck *Clerk) Ping(viewnum uint) (View, error) { 57 | // prepare the arguments. 58 | args := &PingArgs{} 59 | args.Me = ck.me 60 | args.Viewnum = viewnum 61 | var reply PingReply 62 | 63 | // send an RPC request, wait for the reply. 64 | ok := call(ck.server, "ViewServer.Ping", args, &reply) 65 | if ok == false { 66 | return View{}, fmt.Errorf("Ping(%v) failed", viewnum) 67 | } 68 | 69 | return reply.View, nil 70 | } 71 | 72 | func (ck *Clerk) Get() (View, bool) { 73 | args := &GetArgs{} 74 | var reply GetReply 75 | ok := call(ck.server, "ViewServer.Get", args, &reply) 76 | if ok == false { 77 | return View{}, false 78 | } 79 | return reply.View, true 80 | } 81 | 82 | func (ck *Clerk) Primary() string { 83 | v, ok := ck.Get() 84 | if ok { 85 | return v.Primary 86 | } 87 | return "" 88 | } 89 | -------------------------------------------------------------------------------- /hw2/src/viewservice/common.go: -------------------------------------------------------------------------------- 1 | package viewservice 2 | 3 | import "time" 4 | 5 | // 6 | // This is a non-replicated view service for a simple 7 | // primary/backup system. 8 | // 9 | // The view service goes through a sequence of numbered 10 | // views, each with a primary and (if possible) a backup. 11 | // A view consists of a view number and the host:port of 12 | // the view's primary and backup p/b servers. 13 | // 14 | // The primary in a view is always either the primary 15 | // or the backup of the previous view (in order to ensure 16 | // that the p/b service's state is preserved). 17 | // 18 | // Each p/b server should send a Ping RPC once per PingInterval. 19 | // The view server replies with a description of the current 20 | // view. The Pings let the view server know that the p/b 21 | // server is still alive; inform the p/b server of the current 22 | // view; and inform the view server of the most recent view 23 | // that the p/b server knows about. 24 | // 25 | // The view server proceeds to a new view when either it hasn't 26 | // received a ping from the primary or backup for a while, or 27 | // if there was no backup and a new server starts Pinging. 28 | // 29 | // The view server will not proceed to a new view until 30 | // the primary from the current view acknowledges 31 | // that it is operating in the current view. This helps 32 | // ensure that there's at most one p/b primary operating at 33 | // a time. 34 | // 35 | 36 | type View struct { 37 | Viewnum uint 38 | Primary string 39 | Backup string 40 | } 41 | 42 | // clients should send a Ping RPC this often, 43 | // to tell the viewservice that the client is alive. 44 | const PingInterval = time.Millisecond * 100 45 | 46 | // the viewserver will declare a client dead if it misses 47 | // this many Ping RPCs in a row. 48 | const DeadPings = 5 49 | 50 | // 51 | // Ping(): called by a primary/backup server to tell the 52 | // view service it is alive, to indicate whether p/b server 53 | // has seen the latest view, and for p/b server to learn 54 | // the latest view. 55 | // 56 | // If Viewnum is zero, the caller is signalling that it is 57 | // alive and could become backup if needed. 58 | // 59 | 60 | type PingArgs struct { 61 | Me string // "host:port" 62 | Viewnum uint // caller's notion of current view # 63 | } 64 | 65 | type PingReply struct { 66 | View View 67 | } 68 | 69 | // 70 | // Get(): fetch the current view, without volunteering 71 | // to be a server. mostly for clients of the p/b service, 72 | // and for testing. 73 | // 74 | 75 | type GetArgs struct { 76 | } 77 | 78 | type GetReply struct { 79 | View View 80 | } 81 | -------------------------------------------------------------------------------- /hw3/src/viewservice/common.go: -------------------------------------------------------------------------------- 1 | package viewservice 2 | 3 | import "time" 4 | 5 | // 6 | // This is a non-replicated view service for a simple 7 | // primary/backup system. 8 | // 9 | // The view service goes through a sequence of numbered 10 | // views, each with a primary and (if possible) a backup. 11 | // A view consists of a view number and the host:port of 12 | // the view's primary and backup p/b servers. 13 | // 14 | // The primary in a view is always either the primary 15 | // or the backup of the previous view (in order to ensure 16 | // that the p/b service's state is preserved). 17 | // 18 | // Each p/b server should send a Ping RPC once per PingInterval. 19 | // The view server replies with a description of the current 20 | // view. The Pings let the view server know that the p/b 21 | // server is still alive; inform the p/b server of the current 22 | // view; and inform the view server of the most recent view 23 | // that the p/b server knows about. 24 | // 25 | // The view server proceeds to a new view when either it hasn't 26 | // received a ping from the primary or backup for a while, or 27 | // if there was no backup and a new server starts Pinging. 28 | // 29 | // The view server will not proceed to a new view until 30 | // the primary from the current view acknowledges 31 | // that it is operating in the current view. This helps 32 | // ensure that there's at most one p/b primary operating at 33 | // a time. 34 | // 35 | 36 | type View struct { 37 | Viewnum uint 38 | Primary string 39 | Backup string 40 | } 41 | 42 | // clients should send a Ping RPC this often, 43 | // to tell the viewservice that the client is alive. 44 | const PingInterval = time.Millisecond * 100 45 | 46 | // the viewserver will declare a client dead if it misses 47 | // this many Ping RPCs in a row. 48 | const DeadPings = 5 49 | 50 | // 51 | // Ping(): called by a primary/backup server to tell the 52 | // view service it is alive, to indicate whether p/b server 53 | // has seen the latest view, and for p/b server to learn 54 | // the latest view. 55 | // 56 | // If Viewnum is zero, the caller is signalling that it is 57 | // alive and could become backup if needed. 58 | // 59 | 60 | type PingArgs struct { 61 | Me string // "host:port" 62 | Viewnum uint // caller's notion of current view # 63 | } 64 | 65 | type PingReply struct { 66 | View View 67 | } 68 | 69 | // 70 | // Get(): fetch the current view, without volunteering 71 | // to be a server. mostly for clients of the p/b service, 72 | // and for testing. 73 | // 74 | 75 | type GetArgs struct { 76 | } 77 | 78 | type GetReply struct { 79 | View View 80 | } 81 | -------------------------------------------------------------------------------- /hw4/src/viewservice/common.go: -------------------------------------------------------------------------------- 1 | package viewservice 2 | 3 | import "time" 4 | 5 | // 6 | // This is a non-replicated view service for a simple 7 | // primary/backup system. 8 | // 9 | // The view service goes through a sequence of numbered 10 | // views, each with a primary and (if possible) a backup. 11 | // A view consists of a view number and the host:port of 12 | // the view's primary and backup p/b servers. 13 | // 14 | // The primary in a view is always either the primary 15 | // or the backup of the previous view (in order to ensure 16 | // that the p/b service's state is preserved). 17 | // 18 | // Each p/b server should send a Ping RPC once per PingInterval. 19 | // The view server replies with a description of the current 20 | // view. The Pings let the view server know that the p/b 21 | // server is still alive; inform the p/b server of the current 22 | // view; and inform the view server of the most recent view 23 | // that the p/b server knows about. 24 | // 25 | // The view server proceeds to a new view when either it hasn't 26 | // received a ping from the primary or backup for a while, or 27 | // if there was no backup and a new server starts Pinging. 28 | // 29 | // The view server will not proceed to a new view until 30 | // the primary from the current view acknowledges 31 | // that it is operating in the current view. This helps 32 | // ensure that there's at most one p/b primary operating at 33 | // a time. 34 | // 35 | 36 | type View struct { 37 | Viewnum uint 38 | Primary string 39 | Backup string 40 | } 41 | 42 | // clients should send a Ping RPC this often, 43 | // to tell the viewservice that the client is alive. 44 | const PingInterval = time.Millisecond * 100 45 | 46 | // the viewserver will declare a client dead if it misses 47 | // this many Ping RPCs in a row. 48 | const DeadPings = 5 49 | 50 | // 51 | // Ping(): called by a primary/backup server to tell the 52 | // view service it is alive, to indicate whether p/b server 53 | // has seen the latest view, and for p/b server to learn 54 | // the latest view. 55 | // 56 | // If Viewnum is zero, the caller is signalling that it is 57 | // alive and could become backup if needed. 58 | // 59 | 60 | type PingArgs struct { 61 | Me string // "host:port" 62 | Viewnum uint // caller's notion of current view # 63 | } 64 | 65 | type PingReply struct { 66 | View View 67 | } 68 | 69 | // 70 | // Get(): fetch the current view, without volunteering 71 | // to be a server. mostly for clients of the p/b service, 72 | // and for testing. 73 | // 74 | 75 | type GetArgs struct { 76 | } 77 | 78 | type GetReply struct { 79 | View View 80 | } 81 | -------------------------------------------------------------------------------- /hw1/src/mapreduce/worker.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | import "os" 7 | import "log" 8 | import "net/rpc" 9 | import "net" 10 | import "container/list" 11 | 12 | // Worker is a server waiting for DoJob or Shutdown RPCs 13 | 14 | type Worker struct { 15 | name string 16 | Reduce func(string, *list.List) string 17 | Map func(string) *list.List 18 | nRPC int 19 | nJobs int 20 | l net.Listener 21 | } 22 | 23 | // The master sent us a job 24 | func (wk *Worker) DoJob(arg *DoJobArgs, res *DoJobReply) error { 25 | fmt.Printf("Dojob %s job %d file %s operation %v N %d\n", 26 | wk.name, arg.JobNumber, arg.File, arg.Operation, 27 | arg.NumOtherPhase) 28 | switch arg.Operation { 29 | case Map: 30 | DoMap(arg.JobNumber, arg.File, arg.NumOtherPhase, wk.Map) 31 | case Reduce: 32 | DoReduce(arg.JobNumber, arg.File, arg.NumOtherPhase, wk.Reduce) 33 | } 34 | res.OK = true 35 | return nil 36 | } 37 | 38 | // The master is telling us to shutdown. Report the number of Jobs we 39 | // have processed. 40 | func (wk *Worker) Shutdown(args *ShutdownArgs, res *ShutdownReply) error { 41 | DPrintf("Shutdown %s\n", wk.name) 42 | res.Njobs = wk.nJobs 43 | res.OK = true 44 | wk.nRPC = 1 // OK, because the same thread reads nRPC 45 | wk.nJobs-- // Don't count the shutdown RPC 46 | return nil 47 | } 48 | 49 | // Tell the master we exist and ready to work 50 | func Register(master string, me string) { 51 | args := &RegisterArgs{} 52 | args.Worker = me 53 | var reply RegisterReply 54 | ok := call(master, "MapReduce.Register", args, &reply) 55 | 56 | if ok == false { 57 | fmt.Printf("Register: RPC %s register error\n", master) 58 | } 59 | } 60 | 61 | // Set up a connection with the master, register with the master, 62 | // and wait for jobs from the master 63 | func RunWorker(MasterAddress string, me string, 64 | MapFunc func(string) *list.List, 65 | ReduceFunc func(string, *list.List) string, nRPC int) { 66 | DPrintf("RunWorker %s\n", me) 67 | wk := new(Worker) 68 | wk.name = me 69 | wk.Map = MapFunc 70 | wk.Reduce = ReduceFunc 71 | wk.nRPC = nRPC 72 | rpcs := rpc.NewServer() 73 | rpcs.Register(wk) 74 | os.Remove(me) // only needed for "unix" 75 | l, e := net.Listen("unix", me) 76 | if e != nil { 77 | log.Fatal("RunWorker: worker ", me, " error: ", e) 78 | } 79 | wk.l = l 80 | Register(MasterAddress, me) 81 | 82 | // DON'T MODIFY CODE BELOW 83 | for wk.nRPC != 0 { 84 | conn, err := wk.l.Accept() 85 | if err == nil { 86 | wk.nRPC -= 1 87 | go rpcs.ServeConn(conn) 88 | wk.nJobs += 1 89 | } else { 90 | break 91 | } 92 | } 93 | wk.l.Close() 94 | DPrintf("RunWorker %s exit\n", me) 95 | } 96 | -------------------------------------------------------------------------------- /hw1/README.md: -------------------------------------------------------------------------------- 1 | # CS188-hw1 2 | private repo 3 | 4 | # Setup 5 | 6 | ```sh 7 | (py3) adrianhsu:~/Desktop 8 | $ git clone --mirror https://github.com/S19-CS188/assignment1-skeleton.git 9 | Cloning into bare repository 'assignment1-skeleton.git'... 10 | remote: Enumerating objects: 15, done. 11 | remote: Total 15 (delta 0), reused 0 (delta 0), pack-reused 15 12 | Unpacking objects: 100% (15/15), done. 13 | (py3) adrianhsu:~/Desktop 14 | $ cd assignment1-skeleton.git/ 15 | (py3) adrianhsu:~/Desktop/assignment1-skeleton.git (master) 16 | $ git push --mirror https://github.com/AdrianHsu/CS188-hw1.git 17 | Enumerating objects: 15, done. 18 | Counting objects: 100% (15/15), done. 19 | Delta compression using up to 4 threads 20 | Compressing objects: 100% (14/14), done. 21 | Writing objects: 100% (15/15), 1.37 MiB | 117.00 KiB/s, done. 22 | Total 15 (delta 0), reused 0 (delta 0) 23 | To https://github.com/AdrianHsu/CS188-hw1.git 24 | * [new branch] master -> master 25 | (py3) adrianhsu:~/Desktop/assignment1-skeleton.git (master) 26 | $ git remote set-url --push origin https://github.com/AdrianHsu/CS188-hw1.git 27 | (py3) adrianhsu:~/Desktop/assignment1-skeleton.git (master) 28 | $ cd .. 29 | (py3) adrianhsu:~/Desktop 30 | $ git clone https://github.com/AdrianHsu/CS188-hw1 31 | Cloning into 'CS188-hw1'... 32 | remote: Enumerating objects: 15, done. 33 | remote: Counting objects: 100% (15/15), done. 34 | remote: Compressing objects: 100% (14/14), done. 35 | remote: Total 15 (delta 0), reused 15 (delta 0), pack-reused 0 36 | Unpacking objects: 100% (15/15), done. 37 | (py3) adrianhsu:~/Desktop 38 | $ cd CS188-hw1/ 39 | (py3) adrianhsu:~/Desktop/CS188-hw1 (master) 40 | $ ls 41 | src 42 | (py3) adrianhsu:~/Desktop/CS188-hw1 (master) 43 | $ git remote -v 44 | origin https://github.com/AdrianHsu/CS188-hw1 (fetch) 45 | origin https://github.com/AdrianHsu/CS188-hw1 (push) 46 | ``` 47 | 48 | 49 | 50 | ## Compile 51 | 52 | ```sh 53 | (py3) adrianhsu:~/Desktop/CS188-hw1 (master) 54 | $ export GOPATH=$(pwd) 55 | (py3) adrianhsu:~/Desktop/CS188-hw1 (master) 56 | $ cd src/ma 57 | -bash: cd: src/ma: No such file or directory 58 | (py3) adrianhsu:~/Desktop/CS188-hw1 (master) 59 | $ cd src/main/ 60 | (py3) adrianhsu:~/Desktop/CS188-hw1/src/main (master) 61 | $ go run wc.go master kjv12.txt sequential 62 | # command-line-arguments 63 | ./wc.go:15:1: missing return at end of function 64 | ./wc.go:21:1: missing return at end of function 65 | ``` 66 | 67 | The compiler produces two errors, because the implementation of the `Map()` and `Reduce()` functions are incomplete. 68 | 69 | 70 | ## Submit 71 | 72 | ```sh 73 | ... Basic Test Passed 74 | ... One Failure Passed 75 | ... Many Failures Passed 76 | ``` 77 | 78 | 79 | -------------------------------------------------------------------------------- /hw3/src/pbservice/client.go: -------------------------------------------------------------------------------- 1 | package pbservice 2 | 3 | import "viewservice" 4 | import "net/rpc" 5 | import "fmt" 6 | 7 | import "crypto/rand" 8 | import "math/big" 9 | 10 | 11 | type Clerk struct { 12 | vs *viewservice.Clerk 13 | // Your declarations here 14 | } 15 | 16 | // this may come in handy. 17 | func nrand() int64 { 18 | max := big.NewInt(int64(1) << 62) 19 | bigx, _ := rand.Int(rand.Reader, max) 20 | x := bigx.Int64() 21 | return x 22 | } 23 | 24 | func MakeClerk(vshost string, me string) *Clerk { 25 | ck := new(Clerk) 26 | ck.vs = viewservice.MakeClerk(me, vshost) 27 | // Your ck.* initializations here 28 | 29 | return ck 30 | } 31 | 32 | 33 | // 34 | // call() sends an RPC to the rpcname handler on server srv 35 | // with arguments args, waits for the reply, and leaves the 36 | // reply in reply. the reply argument should be a pointer 37 | // to a reply structure. 38 | // 39 | // the return value is true if the server responded, and false 40 | // if call() was not able to contact the server. in particular, 41 | // the reply's contents are only valid if call() returned true. 42 | // 43 | // you should assume that call() will return an 44 | // error after a while if the server is dead. 45 | // don't provide your own time-out mechanism. 46 | // 47 | // please use call() to send all RPCs, in client.go and server.go. 48 | // please don't change this function. 49 | // 50 | func call(srv string, rpcname string, 51 | args interface{}, reply interface{}) bool { 52 | c, errx := rpc.Dial("unix", srv) 53 | if errx != nil { 54 | return false 55 | } 56 | defer c.Close() 57 | 58 | err := c.Call(rpcname, args, reply) 59 | if err == nil { 60 | return true 61 | } 62 | 63 | fmt.Println(err) 64 | return false 65 | } 66 | 67 | // 68 | // fetch a key's value from the current primary; 69 | // if they key has never been set, return "". 70 | // Get() must keep trying until it either the 71 | // primary replies with the value or the primary 72 | // says the key doesn't exist (has never been Put(). 73 | // 74 | func (ck *Clerk) Get(key string) string { 75 | 76 | // Your code here. 77 | 78 | return "???" 79 | } 80 | 81 | // 82 | // send a Put or Append RPC 83 | // 84 | func (ck *Clerk) PutAppend(key string, value string, op string) { 85 | 86 | // Your code here. 87 | } 88 | 89 | // 90 | // tell the primary to update key's value. 91 | // must keep trying until it succeeds. 92 | // 93 | func (ck *Clerk) Put(key string, value string) { 94 | ck.PutAppend(key, value, "Put") 95 | } 96 | 97 | // 98 | // tell the primary to append to key's value. 99 | // must keep trying until it succeeds. 100 | // 101 | func (ck *Clerk) Append(key string, value string) { 102 | ck.PutAppend(key, value, "Append") 103 | } 104 | -------------------------------------------------------------------------------- /hw4/src/pbservice/client.go: -------------------------------------------------------------------------------- 1 | package pbservice 2 | 3 | import "viewservice" 4 | import "net/rpc" 5 | import "fmt" 6 | 7 | import "crypto/rand" 8 | import "math/big" 9 | 10 | 11 | type Clerk struct { 12 | vs *viewservice.Clerk 13 | // Your declarations here 14 | } 15 | 16 | // this may come in handy. 17 | func nrand() int64 { 18 | max := big.NewInt(int64(1) << 62) 19 | bigx, _ := rand.Int(rand.Reader, max) 20 | x := bigx.Int64() 21 | return x 22 | } 23 | 24 | func MakeClerk(vshost string, me string) *Clerk { 25 | ck := new(Clerk) 26 | ck.vs = viewservice.MakeClerk(me, vshost) 27 | // Your ck.* initializations here 28 | 29 | return ck 30 | } 31 | 32 | 33 | // 34 | // call() sends an RPC to the rpcname handler on server srv 35 | // with arguments args, waits for the reply, and leaves the 36 | // reply in reply. the reply argument should be a pointer 37 | // to a reply structure. 38 | // 39 | // the return value is true if the server responded, and false 40 | // if call() was not able to contact the server. in particular, 41 | // the reply's contents are only valid if call() returned true. 42 | // 43 | // you should assume that call() will return an 44 | // error after a while if the server is dead. 45 | // don't provide your own time-out mechanism. 46 | // 47 | // please use call() to send all RPCs, in client.go and server.go. 48 | // please don't change this function. 49 | // 50 | func call(srv string, rpcname string, 51 | args interface{}, reply interface{}) bool { 52 | c, errx := rpc.Dial("unix", srv) 53 | if errx != nil { 54 | return false 55 | } 56 | defer c.Close() 57 | 58 | err := c.Call(rpcname, args, reply) 59 | if err == nil { 60 | return true 61 | } 62 | 63 | fmt.Println(err) 64 | return false 65 | } 66 | 67 | // 68 | // fetch a key's value from the current primary; 69 | // if they key has never been set, return "". 70 | // Get() must keep trying until it either the 71 | // primary replies with the value or the primary 72 | // says the key doesn't exist (has never been Put(). 73 | // 74 | func (ck *Clerk) Get(key string) string { 75 | 76 | // Your code here. 77 | 78 | return "???" 79 | } 80 | 81 | // 82 | // send a Put or Append RPC 83 | // 84 | func (ck *Clerk) PutAppend(key string, value string, op string) { 85 | 86 | // Your code here. 87 | } 88 | 89 | // 90 | // tell the primary to update key's value. 91 | // must keep trying until it succeeds. 92 | // 93 | func (ck *Clerk) Put(key string, value string) { 94 | ck.PutAppend(key, value, "Put") 95 | } 96 | 97 | // 98 | // tell the primary to append to key's value. 99 | // must keep trying until it succeeds. 100 | // 101 | func (ck *Clerk) Append(key string, value string) { 102 | ck.PutAppend(key, value, "Append") 103 | } 104 | -------------------------------------------------------------------------------- /hw3/src/viewservice/server.go: -------------------------------------------------------------------------------- 1 | package viewservice 2 | 3 | import "net" 4 | import "net/rpc" 5 | import "log" 6 | import "time" 7 | import "sync" 8 | import "fmt" 9 | import "os" 10 | import "sync/atomic" 11 | 12 | type ViewServer struct { 13 | mu sync.Mutex 14 | l net.Listener 15 | dead int32 // for testing 16 | rpccount int32 // for testing 17 | me string 18 | 19 | 20 | // Your declarations here. 21 | } 22 | 23 | // 24 | // server Ping RPC handler. 25 | // 26 | func (vs *ViewServer) Ping(args *PingArgs, reply *PingReply) error { 27 | 28 | // Your code here. 29 | 30 | return nil 31 | } 32 | 33 | // 34 | // server Get() RPC handler. 35 | // 36 | func (vs *ViewServer) Get(args *GetArgs, reply *GetReply) error { 37 | 38 | // Your code here. 39 | 40 | return nil 41 | } 42 | 43 | 44 | // 45 | // tick() is called once per PingInterval; it should notice 46 | // if servers have died or recovered, and change the view 47 | // accordingly. 48 | // 49 | func (vs *ViewServer) tick() { 50 | 51 | // Your code here. 52 | } 53 | 54 | // 55 | // tell the server to shut itself down. 56 | // for testing. 57 | // please don't change these two functions. 58 | // 59 | func (vs *ViewServer) Kill() { 60 | atomic.StoreInt32(&vs.dead, 1) 61 | vs.l.Close() 62 | } 63 | 64 | // 65 | // has this server been asked to shut down? 66 | // 67 | func (vs *ViewServer) isdead() bool { 68 | return atomic.LoadInt32(&vs.dead) != 0 69 | } 70 | 71 | // please don't change this function. 72 | func (vs *ViewServer) GetRPCCount() int32 { 73 | return atomic.LoadInt32(&vs.rpccount) 74 | } 75 | 76 | func StartServer(me string) *ViewServer { 77 | vs := new(ViewServer) 78 | vs.me = me 79 | // Your vs.* initializations here. 80 | 81 | // tell net/rpc about our RPC server and handlers. 82 | rpcs := rpc.NewServer() 83 | rpcs.Register(vs) 84 | 85 | // prepare to receive connections from clients. 86 | // change "unix" to "tcp" to use over a network. 87 | os.Remove(vs.me) // only needed for "unix" 88 | l, e := net.Listen("unix", vs.me) 89 | if e != nil { 90 | log.Fatal("listen error: ", e) 91 | } 92 | vs.l = l 93 | 94 | // please don't change any of the following code, 95 | // or do anything to subvert it. 96 | 97 | // create a thread to accept RPC connections from clients. 98 | go func() { 99 | for vs.isdead() == false { 100 | conn, err := vs.l.Accept() 101 | if err == nil && vs.isdead() == false { 102 | atomic.AddInt32(&vs.rpccount, 1) 103 | go rpcs.ServeConn(conn) 104 | } else if err == nil { 105 | conn.Close() 106 | } 107 | if err != nil && vs.isdead() == false { 108 | fmt.Printf("ViewServer(%v) accept: %v\n", me, err.Error()) 109 | vs.Kill() 110 | } 111 | } 112 | }() 113 | 114 | // create a thread to call tick() periodically. 115 | go func() { 116 | for vs.isdead() == false { 117 | vs.tick() 118 | time.Sleep(PingInterval) 119 | } 120 | }() 121 | 122 | return vs 123 | } 124 | -------------------------------------------------------------------------------- /hw4/src/viewservice/server.go: -------------------------------------------------------------------------------- 1 | package viewservice 2 | 3 | import "net" 4 | import "net/rpc" 5 | import "log" 6 | import "time" 7 | import "sync" 8 | import "fmt" 9 | import "os" 10 | import "sync/atomic" 11 | 12 | type ViewServer struct { 13 | mu sync.Mutex 14 | l net.Listener 15 | dead int32 // for testing 16 | rpccount int32 // for testing 17 | me string 18 | 19 | 20 | // Your declarations here. 21 | } 22 | 23 | // 24 | // server Ping RPC handler. 25 | // 26 | func (vs *ViewServer) Ping(args *PingArgs, reply *PingReply) error { 27 | 28 | // Your code here. 29 | 30 | return nil 31 | } 32 | 33 | // 34 | // server Get() RPC handler. 35 | // 36 | func (vs *ViewServer) Get(args *GetArgs, reply *GetReply) error { 37 | 38 | // Your code here. 39 | 40 | return nil 41 | } 42 | 43 | 44 | // 45 | // tick() is called once per PingInterval; it should notice 46 | // if servers have died or recovered, and change the view 47 | // accordingly. 48 | // 49 | func (vs *ViewServer) tick() { 50 | 51 | // Your code here. 52 | } 53 | 54 | // 55 | // tell the server to shut itself down. 56 | // for testing. 57 | // please don't change these two functions. 58 | // 59 | func (vs *ViewServer) Kill() { 60 | atomic.StoreInt32(&vs.dead, 1) 61 | vs.l.Close() 62 | } 63 | 64 | // 65 | // has this server been asked to shut down? 66 | // 67 | func (vs *ViewServer) isdead() bool { 68 | return atomic.LoadInt32(&vs.dead) != 0 69 | } 70 | 71 | // please don't change this function. 72 | func (vs *ViewServer) GetRPCCount() int32 { 73 | return atomic.LoadInt32(&vs.rpccount) 74 | } 75 | 76 | func StartServer(me string) *ViewServer { 77 | vs := new(ViewServer) 78 | vs.me = me 79 | // Your vs.* initializations here. 80 | 81 | // tell net/rpc about our RPC server and handlers. 82 | rpcs := rpc.NewServer() 83 | rpcs.Register(vs) 84 | 85 | // prepare to receive connections from clients. 86 | // change "unix" to "tcp" to use over a network. 87 | os.Remove(vs.me) // only needed for "unix" 88 | l, e := net.Listen("unix", vs.me) 89 | if e != nil { 90 | log.Fatal("listen error: ", e) 91 | } 92 | vs.l = l 93 | 94 | // please don't change any of the following code, 95 | // or do anything to subvert it. 96 | 97 | // create a thread to accept RPC connections from clients. 98 | go func() { 99 | for vs.isdead() == false { 100 | conn, err := vs.l.Accept() 101 | if err == nil && vs.isdead() == false { 102 | atomic.AddInt32(&vs.rpccount, 1) 103 | go rpcs.ServeConn(conn) 104 | } else if err == nil { 105 | conn.Close() 106 | } 107 | if err != nil && vs.isdead() == false { 108 | fmt.Printf("ViewServer(%v) accept: %v\n", me, err.Error()) 109 | vs.Kill() 110 | } 111 | } 112 | }() 113 | 114 | // create a thread to call tick() periodically. 115 | go func() { 116 | for vs.isdead() == false { 117 | vs.tick() 118 | time.Sleep(PingInterval) 119 | } 120 | }() 121 | 122 | return vs 123 | } 124 | -------------------------------------------------------------------------------- /hw4/src/shardmaster/client.go: -------------------------------------------------------------------------------- 1 | package shardmaster 2 | 3 | // 4 | // Shardmaster clerk. 5 | // Please don't change this file. 6 | // 7 | 8 | import "net/rpc" 9 | import "time" 10 | import "fmt" 11 | 12 | type Clerk struct { 13 | servers []string // shardmaster replicas 14 | } 15 | 16 | func MakeClerk(servers []string) *Clerk { 17 | ck := new(Clerk) 18 | ck.servers = servers 19 | return ck 20 | } 21 | 22 | // 23 | // call() sends an RPC to the rpcname handler on server srv 24 | // with arguments args, waits for the reply, and leaves the 25 | // reply in reply. the reply argument should be a pointer 26 | // to a reply structure. 27 | // 28 | // the return value is true if the server responded, and false 29 | // if call() was not able to contact the server. in particular, 30 | // the reply's contents are only valid if call() returned true. 31 | // 32 | // you should assume that call() will return an 33 | // error after a while if the server is dead. 34 | // don't provide your own time-out mechanism. 35 | // 36 | // please use call() to send all RPCs, in client.go and server.go. 37 | // please don't change this function. 38 | // 39 | func call(srv string, rpcname string, 40 | args interface{}, reply interface{}) bool { 41 | c, errx := rpc.Dial("unix", srv) 42 | if errx != nil { 43 | return false 44 | } 45 | defer c.Close() 46 | 47 | err := c.Call(rpcname, args, reply) 48 | if err == nil { 49 | return true 50 | } 51 | 52 | fmt.Println(err) 53 | return false 54 | } 55 | 56 | func (ck *Clerk) Query(num int) Config { 57 | for { 58 | // try each known server. 59 | for _, srv := range ck.servers { 60 | args := &QueryArgs{} 61 | args.Num = num // num??? 62 | var reply QueryReply 63 | ok := call(srv, "ShardMaster.Query", args, &reply) 64 | if ok { 65 | return reply.Config 66 | } 67 | } 68 | time.Sleep(100 * time.Millisecond) 69 | } 70 | } 71 | 72 | func (ck *Clerk) Join(gid int64, servers []string) { 73 | for { 74 | // try each known server. 75 | for _, srv := range ck.servers { 76 | args := &JoinArgs{} 77 | args.GID = gid 78 | args.Servers = servers 79 | var reply JoinReply 80 | ok := call(srv, "ShardMaster.Join", args, &reply) 81 | if ok { 82 | return 83 | } 84 | } 85 | time.Sleep(100 * time.Millisecond) 86 | } 87 | } 88 | 89 | func (ck *Clerk) Leave(gid int64) { 90 | for { 91 | // try each known server. 92 | for _, srv := range ck.servers { 93 | args := &LeaveArgs{} 94 | args.GID = gid 95 | var reply LeaveReply 96 | ok := call(srv, "ShardMaster.Leave", args, &reply) 97 | if ok { 98 | return 99 | } 100 | } 101 | time.Sleep(100 * time.Millisecond) 102 | } 103 | } 104 | 105 | func (ck *Clerk) Move(shard int, gid int64) { 106 | for { 107 | // try each known server. 108 | for _, srv := range ck.servers { 109 | args := &MoveArgs{} 110 | args.Shard = shard 111 | args.GID = gid 112 | var reply MoveReply 113 | ok := call(srv, "ShardMaster.Move", args, &reply) 114 | if ok { 115 | return 116 | } 117 | } 118 | time.Sleep(100 * time.Millisecond) 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /hw3/src/kvpaxos/client.go: -------------------------------------------------------------------------------- 1 | package kvpaxos 2 | 3 | import ( 4 | "net/rpc" 5 | "time" 6 | ) 7 | import "crypto/rand" 8 | import "math/big" 9 | 10 | import "fmt" 11 | 12 | type Clerk struct { 13 | servers []string 14 | // You will have to modify this struct. 15 | } 16 | 17 | func nrand() int64 { 18 | max := big.NewInt(int64(1) << 62) 19 | bigx, _ := rand.Int(rand.Reader, max) 20 | x := bigx.Int64() 21 | return x 22 | } 23 | 24 | func MakeClerk(servers []string) *Clerk { 25 | ck := new(Clerk) 26 | ck.servers = servers 27 | // You'll have to add code here. 28 | return ck 29 | } 30 | 31 | // 32 | // call() sends an RPC to the rpcname handler on server srv 33 | // with arguments args, waits for the reply, and leaves the 34 | // reply in reply. the reply argument should be a pointer 35 | // to a reply structure. 36 | // 37 | // the return value is true if the server responded, and false 38 | // if call() was not able to contact the server. in particular, 39 | // the reply's contents are only valid if call() returned true. 40 | // 41 | // you should assume that call() will return an 42 | // error after a while if the server is dead. 43 | // don't provide your own time-out mechanism. 44 | // 45 | // please use call() to send all RPCs, in client.go and server.go. 46 | // please don't change this function. 47 | // 48 | func call(srv string, rpcname string, 49 | args interface{}, reply interface{}) bool { 50 | c, errx := rpc.Dial("unix", srv) 51 | if errx != nil { 52 | return false 53 | } 54 | defer c.Close() 55 | 56 | err := c.Call(rpcname, args, reply) 57 | if err == nil { 58 | return true 59 | } 60 | 61 | fmt.Println(err) 62 | return false 63 | } 64 | 65 | // 66 | // fetch the current value for a key. 67 | // returns "" if the key does not exist. 68 | // keeps trying forever in the face of all other errors. 69 | // 70 | func (ck *Clerk) Get(key string) string { 71 | // You will have to modify this function. 72 | args := &GetArgs{key, nrand()} 73 | reply := &GetReply{} 74 | 75 | var ok = false 76 | var i = 0 77 | for !ok { 78 | ok = call(ck.servers[i], "KVPaxos.Get", args, reply) 79 | if ok { 80 | break 81 | } else { 82 | //log.Printf("Get on server %v fails. change another one", reply.FailSrv) 83 | time.Sleep(100 * time.Millisecond) 84 | i += 1 85 | i %= len(ck.servers) 86 | } 87 | } 88 | return reply.Value 89 | } 90 | 91 | // 92 | // shared by Put and Append. 93 | // 94 | func (ck *Clerk) PutAppend(key string, value string, op string) { 95 | // You will have to modify this function. 96 | args := &PutAppendArgs{key, value, op, nrand()} 97 | reply := &PutAppendReply{} 98 | var ok = false 99 | var i = 0 100 | for !ok { 101 | ok = call(ck.servers[i], "KVPaxos.PutAppend", args, reply) 102 | if ok { 103 | break 104 | } else { 105 | //log.Printf("PutAppend on server %v fails. change another one", reply.FailSrv) 106 | time.Sleep(100 * time.Millisecond) 107 | i += 1 108 | i %= len(ck.servers) 109 | } 110 | } 111 | } 112 | 113 | func (ck *Clerk) Put(key string, value string) { 114 | ck.PutAppend(key, value, "Put") 115 | } 116 | func (ck *Clerk) Append(key string, value string) { 117 | ck.PutAppend(key, value, "Append") 118 | } 119 | -------------------------------------------------------------------------------- /hw1/src/mapreduce/master.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "container/list" 5 | ) 6 | import "fmt" 7 | 8 | 9 | type WorkerInfo struct { 10 | address string 11 | // You can add definitions here. 12 | 13 | } 14 | 15 | 16 | // Clean up all workers by sending a Shutdown RPC to each one of them Collect 17 | // the number of jobs each work has performed. 18 | func (mr *MapReduce) KillWorkers() *list.List { 19 | l := list.New() 20 | for _, w := range mr.Workers { 21 | DPrintf("DoWork: shutdown %s\n", w.address) 22 | args := &ShutdownArgs{} 23 | var reply ShutdownReply 24 | ok := call(w.address, "Worker.Shutdown", args, &reply) 25 | if ok == false { 26 | fmt.Printf("DoWork: RPC %s shutdown error\n", w.address) 27 | } else { 28 | l.PushBack(reply.Njobs) 29 | } 30 | } 31 | return l 32 | } 33 | 34 | func setupWorkers(mr *MapReduce) { 35 | for wkr := range mr.registerChannel { 36 | mr.Workers[wkr] = &WorkerInfo{address: wkr} 37 | mr.availableWorkers <- wkr 38 | } 39 | } 40 | 41 | func allocate(mr *MapReduce) { 42 | for i := 0; i < mr.nMap; i++ { 43 | mr.remainMapJobs <- i 44 | } 45 | for i := 0; i < mr.nReduce; i++ { 46 | mr.remainReduceJobs <- i 47 | } 48 | } 49 | 50 | func doMap(mr *MapReduce) { 51 | for job := range mr.remainMapJobs { // keep listening 52 | wkr := <-mr.availableWorkers 53 | go func(job int, wkr string) { 54 | 55 | args := &DoJobArgs{File: mr.file, Operation: Map, 56 | JobNumber: job, NumOtherPhase: mr.nReduce} 57 | 58 | var reply DoJobReply 59 | ok := call(wkr, "Worker.DoJob", args, &reply) 60 | if ok == false { 61 | fmt.Printf("DoMap: RPC %s do job failure! reassign the job...\n", wkr) 62 | mr.remainMapJobs <- job 63 | } else { 64 | mr.availableWorkers <- wkr 65 | mr.nCount <- true 66 | } 67 | }(job, wkr) 68 | } 69 | } 70 | 71 | 72 | func doReduce(mr *MapReduce) { 73 | for job := range mr.remainReduceJobs { // keep listening 74 | wkr := <-mr.availableWorkers 75 | go func(job int, wkr string) { 76 | 77 | args := &DoJobArgs{File: mr.file, Operation: Reduce, 78 | JobNumber: job, NumOtherPhase: mr.nMap} 79 | 80 | var reply DoJobReply 81 | ok := call(wkr, "Worker.DoJob", args, &reply) 82 | if ok == false { 83 | fmt.Printf("DoReduce: RPC %s do job failure! reassign the job...\n", wkr) 84 | mr.remainReduceJobs <- job 85 | } else { 86 | mr.availableWorkers <- wkr 87 | mr.nCount <- true 88 | } 89 | }(job, wkr) 90 | } 91 | } 92 | 93 | func mapCountFinishJobs(mr *MapReduce) { 94 | cnt := 0 95 | for range mr.nCount { 96 | cnt += 1 97 | if cnt == mr.nMap { 98 | break 99 | } 100 | } 101 | close(mr.remainMapJobs) 102 | mr.donePhase <- true 103 | } 104 | 105 | func reduceCountFinishJobs(mr *MapReduce) { 106 | cnt := 0 107 | for range mr.nCount { 108 | cnt += 1 109 | if cnt == mr.nReduce { 110 | break 111 | } 112 | } 113 | close(mr.remainReduceJobs) 114 | mr.donePhase <- true 115 | } 116 | 117 | 118 | func (mr *MapReduce) RunMaster() *list.List { 119 | // Your code here 120 | 121 | 122 | go setupWorkers(mr) 123 | go allocate(mr) 124 | 125 | // Map Phase 126 | go mapCountFinishJobs(mr) 127 | doMap(mr) 128 | <-mr.donePhase 129 | // Reduce Phase 130 | go reduceCountFinishJobs(mr) 131 | doReduce(mr) 132 | <-mr.donePhase 133 | return mr.KillWorkers() 134 | } 135 | -------------------------------------------------------------------------------- /hw3/src/pbservice/server.go: -------------------------------------------------------------------------------- 1 | package pbservice 2 | 3 | import "net" 4 | import "fmt" 5 | import "net/rpc" 6 | import "log" 7 | import "time" 8 | import "viewservice" 9 | import "sync" 10 | import "sync/atomic" 11 | import "os" 12 | import "syscall" 13 | import "math/rand" 14 | 15 | 16 | 17 | type PBServer struct { 18 | mu sync.Mutex 19 | l net.Listener 20 | dead int32 // for testing 21 | unreliable int32 // for testing 22 | me string 23 | vs *viewservice.Clerk 24 | // Your declarations here. 25 | } 26 | 27 | 28 | func (pb *PBServer) Get(args *GetArgs, reply *GetReply) error { 29 | 30 | // Your code here. 31 | 32 | return nil 33 | } 34 | 35 | 36 | func (pb *PBServer) PutAppend(args *PutAppendArgs, reply *PutAppendReply) error { 37 | 38 | // Your code here. 39 | 40 | 41 | return nil 42 | } 43 | 44 | 45 | // 46 | // ping the viewserver periodically. 47 | // if view changed: 48 | // transition to new view. 49 | // manage transfer of state from primary to new backup. 50 | // 51 | func (pb *PBServer) tick() { 52 | 53 | // Your code here. 54 | } 55 | 56 | // tell the server to shut itself down. 57 | // please do not change these two functions. 58 | func (pb *PBServer) kill() { 59 | atomic.StoreInt32(&pb.dead, 1) 60 | pb.l.Close() 61 | } 62 | 63 | // call this to find out if the server is dead. 64 | func (pb *PBServer) isdead() bool { 65 | return atomic.LoadInt32(&pb.dead) != 0 66 | } 67 | 68 | // please do not change these two functions. 69 | func (pb *PBServer) setunreliable(what bool) { 70 | if what { 71 | atomic.StoreInt32(&pb.unreliable, 1) 72 | } else { 73 | atomic.StoreInt32(&pb.unreliable, 0) 74 | } 75 | } 76 | 77 | func (pb *PBServer) isunreliable() bool { 78 | return atomic.LoadInt32(&pb.unreliable) != 0 79 | } 80 | 81 | 82 | func StartServer(vshost string, me string) *PBServer { 83 | pb := new(PBServer) 84 | pb.me = me 85 | pb.vs = viewservice.MakeClerk(me, vshost) 86 | // Your pb.* initializations here. 87 | 88 | rpcs := rpc.NewServer() 89 | rpcs.Register(pb) 90 | 91 | os.Remove(pb.me) 92 | l, e := net.Listen("unix", pb.me) 93 | if e != nil { 94 | log.Fatal("listen error: ", e) 95 | } 96 | pb.l = l 97 | 98 | // please do not change any of the following code, 99 | // or do anything to subvert it. 100 | 101 | go func() { 102 | for pb.isdead() == false { 103 | conn, err := pb.l.Accept() 104 | if err == nil && pb.isdead() == false { 105 | if pb.isunreliable() && (rand.Int63()%1000) < 100 { 106 | // discard the request. 107 | conn.Close() 108 | } else if pb.isunreliable() && (rand.Int63()%1000) < 200 { 109 | // process the request but force discard of reply. 110 | c1 := conn.(*net.UnixConn) 111 | f, _ := c1.File() 112 | err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR) 113 | if err != nil { 114 | fmt.Printf("shutdown: %v\n", err) 115 | } 116 | go rpcs.ServeConn(conn) 117 | } else { 118 | go rpcs.ServeConn(conn) 119 | } 120 | } else if err == nil { 121 | conn.Close() 122 | } 123 | if err != nil && pb.isdead() == false { 124 | fmt.Printf("PBServer(%v) accept: %v\n", me, err.Error()) 125 | pb.kill() 126 | } 127 | } 128 | }() 129 | 130 | go func() { 131 | for pb.isdead() == false { 132 | pb.tick() 133 | time.Sleep(viewservice.PingInterval) 134 | } 135 | }() 136 | 137 | return pb 138 | } 139 | -------------------------------------------------------------------------------- /hw4/src/pbservice/server.go: -------------------------------------------------------------------------------- 1 | package pbservice 2 | 3 | import "net" 4 | import "fmt" 5 | import "net/rpc" 6 | import "log" 7 | import "time" 8 | import "viewservice" 9 | import "sync" 10 | import "sync/atomic" 11 | import "os" 12 | import "syscall" 13 | import "math/rand" 14 | 15 | 16 | 17 | type PBServer struct { 18 | mu sync.Mutex 19 | l net.Listener 20 | dead int32 // for testing 21 | unreliable int32 // for testing 22 | me string 23 | vs *viewservice.Clerk 24 | // Your declarations here. 25 | } 26 | 27 | 28 | func (pb *PBServer) Get(args *GetArgs, reply *GetReply) error { 29 | 30 | // Your code here. 31 | 32 | return nil 33 | } 34 | 35 | 36 | func (pb *PBServer) PutAppend(args *PutAppendArgs, reply *PutAppendReply) error { 37 | 38 | // Your code here. 39 | 40 | 41 | return nil 42 | } 43 | 44 | 45 | // 46 | // ping the viewserver periodically. 47 | // if view changed: 48 | // transition to new view. 49 | // manage transfer of state from primary to new backup. 50 | // 51 | func (pb *PBServer) tick() { 52 | 53 | // Your code here. 54 | } 55 | 56 | // tell the server to shut itself down. 57 | // please do not change these two functions. 58 | func (pb *PBServer) kill() { 59 | atomic.StoreInt32(&pb.dead, 1) 60 | pb.l.Close() 61 | } 62 | 63 | // call this to find out if the server is dead. 64 | func (pb *PBServer) isdead() bool { 65 | return atomic.LoadInt32(&pb.dead) != 0 66 | } 67 | 68 | // please do not change these two functions. 69 | func (pb *PBServer) setunreliable(what bool) { 70 | if what { 71 | atomic.StoreInt32(&pb.unreliable, 1) 72 | } else { 73 | atomic.StoreInt32(&pb.unreliable, 0) 74 | } 75 | } 76 | 77 | func (pb *PBServer) isunreliable() bool { 78 | return atomic.LoadInt32(&pb.unreliable) != 0 79 | } 80 | 81 | 82 | func StartServer(vshost string, me string) *PBServer { 83 | pb := new(PBServer) 84 | pb.me = me 85 | pb.vs = viewservice.MakeClerk(me, vshost) 86 | // Your pb.* initializations here. 87 | 88 | rpcs := rpc.NewServer() 89 | rpcs.Register(pb) 90 | 91 | os.Remove(pb.me) 92 | l, e := net.Listen("unix", pb.me) 93 | if e != nil { 94 | log.Fatal("listen error: ", e) 95 | } 96 | pb.l = l 97 | 98 | // please do not change any of the following code, 99 | // or do anything to subvert it. 100 | 101 | go func() { 102 | for pb.isdead() == false { 103 | conn, err := pb.l.Accept() 104 | if err == nil && pb.isdead() == false { 105 | if pb.isunreliable() && (rand.Int63()%1000) < 100 { 106 | // discard the request. 107 | conn.Close() 108 | } else if pb.isunreliable() && (rand.Int63()%1000) < 200 { 109 | // process the request but force discard of reply. 110 | c1 := conn.(*net.UnixConn) 111 | f, _ := c1.File() 112 | err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR) 113 | if err != nil { 114 | fmt.Printf("shutdown: %v\n", err) 115 | } 116 | go rpcs.ServeConn(conn) 117 | } else { 118 | go rpcs.ServeConn(conn) 119 | } 120 | } else if err == nil { 121 | conn.Close() 122 | } 123 | if err != nil && pb.isdead() == false { 124 | fmt.Printf("PBServer(%v) accept: %v\n", me, err.Error()) 125 | pb.kill() 126 | } 127 | } 128 | }() 129 | 130 | go func() { 131 | for pb.isdead() == false { 132 | pb.tick() 133 | time.Sleep(viewservice.PingInterval) 134 | } 135 | }() 136 | 137 | return pb 138 | } 139 | -------------------------------------------------------------------------------- /hw4/src/kvpaxos/client.go: -------------------------------------------------------------------------------- 1 | package kvpaxos 2 | 3 | import ( 4 | "net/rpc" 5 | "time" 6 | ) 7 | import "crypto/rand" 8 | import "math/big" 9 | 10 | import "fmt" 11 | 12 | type Clerk struct { 13 | servers []string 14 | // You will have to modify this struct. 15 | ClientID int64 16 | Seq int 17 | } 18 | 19 | func nrand() int64 { 20 | max := big.NewInt(int64(1) << 62) 21 | bigx, _ := rand.Int(rand.Reader, max) 22 | x := bigx.Int64() 23 | return x 24 | } 25 | 26 | func MakeClerk(servers []string) *Clerk { 27 | ck := new(Clerk) 28 | ck.servers = servers 29 | // You'll have to add code here. 30 | ck.ClientID = nrand() 31 | ck.Seq = 0 32 | return ck 33 | } 34 | 35 | // 36 | // call() sends an RPC to the rpcname handler on server srv 37 | // with arguments args, waits for the reply, and leaves the 38 | // reply in reply. the reply argument should be a pointer 39 | // to a reply structure. 40 | // 41 | // the return value is true if the server responded, and false 42 | // if call() was not able to contact the server. in particular, 43 | // the reply's contents are only valid if call() returned true. 44 | // 45 | // you should assume that call() will return an 46 | // error after a while if the server is dead. 47 | // don't provide your own time-out mechanism. 48 | // 49 | // please use call() to send all RPCs, in client.go and server.go. 50 | // please don't change this function. 51 | // 52 | func call(srv string, rpcname string, 53 | args interface{}, reply interface{}) bool { 54 | c, errx := rpc.Dial("unix", srv) 55 | if errx != nil { 56 | return false 57 | } 58 | defer c.Close() 59 | 60 | err := c.Call(rpcname, args, reply) 61 | if err == nil { 62 | return true 63 | } 64 | 65 | fmt.Println(err) 66 | return false 67 | } 68 | 69 | // 70 | // fetch the current value for a key. 71 | // returns "" if the key does not exist. 72 | // keeps trying forever in the face of all other errors. 73 | // 74 | func (ck *Clerk) Get(key string) string { 75 | // You will have to modify this function. 76 | ck.Seq += 1 77 | args := &GetArgs{Key: key, Op: "Get", Seq: ck.Seq, ClientID: ck.ClientID} 78 | reply := &GetReply{} 79 | 80 | var ok = false 81 | var i = 0 82 | for !ok { 83 | ok = call(ck.servers[i], "KVPaxos.Get", args, reply) 84 | if ok && reply.Err == OK { 85 | break 86 | } else { 87 | //log.Printf("Get on server %v fails. change another one", reply.FailSrv) 88 | time.Sleep(100 * time.Millisecond) 89 | i += 1 90 | i %= len(ck.servers) 91 | } 92 | } 93 | return reply.Value 94 | } 95 | 96 | // 97 | // shared by Put and Append. 98 | // 99 | func (ck *Clerk) PutAppend(key string, value string, op string) { 100 | // You will have to modify this function. 101 | ck.Seq += 1 102 | args := &PutAppendArgs{Key: key, Value: value, Op: op, Seq: ck.Seq, ClientID: ck.ClientID} 103 | reply := &PutAppendReply{} 104 | var ok = false 105 | var i = 0 106 | for !ok { 107 | ok = call(ck.servers[i], "KVPaxos.PutAppend", args, reply) 108 | if ok && reply.Err == OK { 109 | break 110 | } else { 111 | //log.Printf("PutAppend on server %v fails. change another one", reply.FailSrv) 112 | time.Sleep(100 * time.Millisecond) 113 | i += 1 114 | i %= len(ck.servers) 115 | } 116 | } 117 | } 118 | 119 | func (ck *Clerk) Put(key string, value string) { 120 | ck.PutAppend(key, value, "Put") 121 | } 122 | func (ck *Clerk) Append(key string, value string) { 123 | ck.PutAppend(key, value, "Append") 124 | } 125 | -------------------------------------------------------------------------------- /hw2/src/pbservice/client.go: -------------------------------------------------------------------------------- 1 | package pbservice 2 | 3 | import ( 4 | "time" 5 | "viewservice" 6 | ) 7 | import "net/rpc" 8 | import "fmt" 9 | 10 | import "crypto/rand" 11 | import "math/big" 12 | 13 | 14 | type Clerk struct { 15 | vs *viewservice.Clerk 16 | // Your declarations here 17 | currPrimary string 18 | } 19 | 20 | // this may come in handy. 21 | func nrand() int64 { 22 | max := big.NewInt(int64(1) << 62) 23 | bigx, _ := rand.Int(rand.Reader, max) 24 | x := bigx.Int64() 25 | return x 26 | } 27 | 28 | func MakeClerk(vshost string, me string) *Clerk { 29 | ck := new(Clerk) 30 | ck.vs = viewservice.MakeClerk(me, vshost) 31 | // Your ck.* initializations here 32 | ck.currPrimary = "" // initially, the current primary stored in cache is none 33 | 34 | return ck 35 | } 36 | 37 | 38 | // 39 | // call() sends an RPC to the rpcname handler on server srv 40 | // with arguments args, waits for the reply, and leaves the 41 | // reply in reply. the reply argument should be a pointer 42 | // to a reply structure. 43 | // 44 | // the return value is true if the server responded, and false 45 | // if call() was not able to contact the server. in particular, 46 | // the reply's contents are only valid if call() returned true. 47 | // 48 | // you should assume that call() will return an 49 | // error after a while if the server is dead. 50 | // don't provide your own time-out mechanism. 51 | // 52 | // please use call() to send all RPCs, in client.go and server.go. 53 | // please don't change this function. 54 | // 55 | func call(srv string, rpcname string, 56 | args interface{}, reply interface{}) bool { 57 | c, errx := rpc.Dial("unix", srv) 58 | if errx != nil { 59 | return false 60 | } 61 | defer c.Close() 62 | 63 | err := c.Call(rpcname, args, reply) 64 | if err == nil { 65 | return true 66 | } 67 | 68 | fmt.Println(err) 69 | return false 70 | } 71 | 72 | // 73 | // fetch a key's value from the current primary; 74 | // if they key has never been set, return "". 75 | // Get() must keep trying until it either the 76 | // primary replies with the value or the primary 77 | // says the key doesn't exist (has never been Put(). 78 | // 79 | func (ck *Clerk) Get(key string) string { 80 | 81 | // Your code here. 82 | // if the current view has no primary, keep asking the view service until a primary showed up 83 | for ck.currPrimary == "" { 84 | view, _ := ck.vs.Get() 85 | ck.currPrimary = view.Primary 86 | } 87 | // might need to set up lock for client. but as it passed all testcases so I ignored that 88 | 89 | args := &GetArgs{key} 90 | var reply GetReply 91 | 92 | ok := false 93 | // clients keep re-trying until they get an answer. 94 | for ok == false { 95 | //log.Printf("%v start %v", ck.currPrimary, args.Key) 96 | ok := call(ck.currPrimary, "PBServer.Get", args, &reply) 97 | //log.Printf("%v end %v", ck.currPrimary, args.Key) 98 | if ok { 99 | // everything works fine 100 | break 101 | } else { 102 | // case 1. if the current primary is dead 103 | // case 2. the network is unavailable temporarily 104 | // case 3. if the asked primary doesn't think itself as the primary 105 | // (ps. case 3 will show an error: NOT THE PRIMARY YET) 106 | 107 | // do the update view manually 108 | time.Sleep(viewservice.PingInterval) 109 | view, _ := ck.vs.Get() 110 | ck.currPrimary = view.Primary 111 | } 112 | } 113 | return reply.Value 114 | } 115 | 116 | // 117 | // send a Put or Append RPC 118 | // 119 | func (ck *Clerk) PutAppend(key string, value string, op string) { 120 | 121 | // Your code here. 122 | for ck.currPrimary == "" { 123 | view, _ := ck.vs.Get() 124 | ck.currPrimary = view.Primary 125 | } 126 | 127 | // nrand(): make the k/v service can detect duplicates. 128 | args := &PutAppendArgs{key, value, op, nrand()} 129 | var reply PutAppendReply 130 | 131 | ok := false 132 | for ok == false { 133 | //log.Printf("%v", ck.currPrimary) 134 | ok := call(ck.currPrimary, "PBServer.PutAppend", args, &reply) 135 | //log.Printf("%v, %v", ok, ck.currPrimary) 136 | if ok { 137 | // everything works fine 138 | break 139 | } else { 140 | time.Sleep(viewservice.PingInterval) 141 | view, _ := ck.vs.Get() 142 | ck.currPrimary = view.Primary 143 | } 144 | } 145 | } 146 | 147 | // 148 | // tell the primary to update key's value. 149 | // must keep trying until it succeeds. 150 | // 151 | func (ck *Clerk) Put(key string, value string) { 152 | ck.PutAppend(key, value, "Put") 153 | } 154 | 155 | // 156 | // tell the primary to append to key's value. 157 | // must keep trying until it succeeds. 158 | // 159 | func (ck *Clerk) Append(key string, value string) { 160 | ck.PutAppend(key, value, "Append") 161 | } 162 | -------------------------------------------------------------------------------- /hw4/src/shardkv/client.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | import ( 4 | "crypto/rand" 5 | "fmt" 6 | "math/big" 7 | "net/rpc" 8 | "shardmaster" 9 | "sync" 10 | "time" 11 | ) 12 | 13 | type Clerk struct { 14 | mu sync.Mutex // one RPC at a time 15 | sm *shardmaster.Clerk 16 | config shardmaster.Config 17 | clientID int64 18 | seq int 19 | } 20 | 21 | func nrand() int64 { 22 | max := big.NewInt(int64(1) << 62) 23 | bigx, _ := rand.Int(rand.Reader, max) 24 | x := bigx.Int64() 25 | return x 26 | } 27 | 28 | func MakeClerk(shardmasters []string) *Clerk { 29 | ck := new(Clerk) 30 | ck.sm = shardmaster.MakeClerk(shardmasters) 31 | ck.config = ck.sm.Query(-1) 32 | ck.clientID = nrand() 33 | ck.seq = 0 34 | return ck 35 | } 36 | 37 | // 38 | // call() sends an RPC to the rpcname handler on server srv 39 | // with arguments args, waits for the reply, and leaves the 40 | // reply in reply. the reply argument should be a pointer 41 | // to a reply structure. 42 | // 43 | // the return value is true if the server responded, and false 44 | // if call() was not able to contact the server. in particular, 45 | // the reply's contents are only valid if call() returned true. 46 | // 47 | // you should assume that call() will return an 48 | // error after a while if the server is dead. 49 | // don't provide your own time-out mechanism. 50 | // 51 | // please use call() to send all RPCs, in client.go and server.go. 52 | // please don't change this function. 53 | // 54 | func call(srv string, rpcname string, 55 | args interface{}, reply interface{}) bool { 56 | c, errx := rpc.Dial("unix", srv) 57 | if errx != nil { 58 | return false 59 | } 60 | defer c.Close() 61 | 62 | err := c.Call(rpcname, args, reply) 63 | if err == nil { 64 | return true 65 | } 66 | 67 | fmt.Println(err) 68 | return false 69 | } 70 | 71 | // 72 | // which shard is a key in? 73 | // please use this function, 74 | // and please do not change it. 75 | // 76 | func key2shard(key string) int { 77 | shard := 0 78 | if len(key) > 0 { 79 | shard = int(key[0]) 80 | } 81 | shard %= shardmaster.NShards 82 | return shard 83 | } 84 | 85 | // 86 | // fetch the current value for a key. 87 | // returns "" if the key does not exist. 88 | // keeps trying forever in the face of all other errors. 89 | // 90 | func (ck *Clerk) Get(key string) string { 91 | ck.mu.Lock() 92 | defer ck.mu.Unlock() 93 | 94 | ck.seq++ 95 | for { 96 | shard := key2shard(key) 97 | 98 | gid := ck.config.Shards[shard] 99 | servers, ok := ck.config.Groups[gid] 100 | 101 | if ok { 102 | // try each server in the shard's replication group. 103 | for _, srv := range servers { 104 | args := &GetArgs{} 105 | args.Key = key 106 | args.ID = ck.clientID 107 | args.Seq = ck.seq 108 | args.ConfigNum = ck.config.Num 109 | args.Shard = shard 110 | var reply GetReply 111 | ok := call(srv, "ShardKV.Get", args, &reply) 112 | if ok && (reply.Err == OK || reply.Err == ErrNoKey) { 113 | return reply.Value 114 | } 115 | if ok && (reply.Err == ErrWrongGroup) { 116 | continue 117 | } 118 | } 119 | } 120 | 121 | time.Sleep(100 * time.Millisecond) 122 | 123 | // ask master for a new configuration. 124 | ck.config = ck.sm.Query(-1) 125 | } 126 | } 127 | 128 | // send a Put or Append request. 129 | func (ck *Clerk) PutAppend(key string, value string, op string) { 130 | ck.mu.Lock() 131 | defer ck.mu.Unlock() 132 | 133 | ck.seq++ 134 | for { 135 | shard := key2shard(key) 136 | 137 | gid := ck.config.Shards[shard] 138 | 139 | servers, ok := ck.config.Groups[gid] 140 | 141 | if ok { 142 | // try each server in the shard's replication group. 143 | for _, srv := range servers { 144 | args := &PutAppendArgs{} 145 | args.Key = key 146 | args.Value = value 147 | args.ID = ck.clientID 148 | // ck.seq is for dealing with duplicate client RPCs 149 | args.Seq = ck.seq 150 | args.Op = op 151 | args.Shard = shard 152 | args.ConfigNum = ck.config.Num 153 | var reply PutAppendReply 154 | ok := call(srv, "ShardKV.PutAppend", args, &reply) 155 | if ok && reply.Err == OK { 156 | return 157 | } 158 | // When the client received ErrWrongGroup -> we don't change ck.seq 159 | // instead, we just re-Query and change a group and request again 160 | if ok && (reply.Err == ErrWrongGroup) { 161 | continue 162 | } 163 | } 164 | } 165 | //It re-tries if the replica group says it is not responsible for the key; 166 | //in that case, the client code asks the shard master for the latest configuration and tries again. 167 | time.Sleep(100 * time.Millisecond) 168 | 169 | // ask master for a new configuration. 170 | ck.config = ck.sm.Query(-1) 171 | } 172 | } 173 | 174 | func (ck *Clerk) Put(key string, value string) { 175 | ck.PutAppend(key, value, "Put") 176 | } 177 | func (ck *Clerk) Append(key string, value string) { 178 | ck.PutAppend(key, value, "Append") 179 | } -------------------------------------------------------------------------------- /hw1/src/mapreduce/test_test.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import "testing" 4 | import "fmt" 5 | import "time" 6 | import "container/list" 7 | import "strings" 8 | import "os" 9 | import "bufio" 10 | import "log" 11 | import "sort" 12 | import "strconv" 13 | 14 | const ( 15 | nNumber = 100000 16 | nMap = 100 17 | nReduce = 50 18 | ) 19 | 20 | // Create input file with N numbers 21 | // Check if we have N numbers in output file 22 | 23 | // Split in words 24 | func MapFunc(value string) *list.List { 25 | DPrintf("Map %v\n", value) 26 | res := list.New() 27 | words := strings.Fields(value) 28 | for _, w := range words { 29 | kv := KeyValue{w, ""} 30 | res.PushBack(kv) 31 | } 32 | return res 33 | } 34 | 35 | // Just return key 36 | func ReduceFunc(key string, values *list.List) string { 37 | for e := values.Front(); e != nil; e = e.Next() { 38 | DPrintf("Reduce %s %v\n", key, e.Value) 39 | } 40 | return "" 41 | } 42 | 43 | // Checks input file against output file: each input number should show up 44 | // in the output file in string sorted order 45 | func check(t *testing.T, file string) { 46 | input, err := os.Open(file) 47 | if err != nil { 48 | log.Fatal("check: ", err) 49 | } 50 | defer input.Close() 51 | output, err := os.Open("mrtmp." + file) 52 | if err != nil { 53 | log.Fatal("check: ", err) 54 | } 55 | defer output.Close() 56 | 57 | var lines []string 58 | inputScanner := bufio.NewScanner(input) 59 | for inputScanner.Scan() { 60 | lines = append(lines, inputScanner.Text()) 61 | } 62 | 63 | sort.Strings(lines) 64 | 65 | outputScanner := bufio.NewScanner(output) 66 | i := 0 67 | for outputScanner.Scan() { 68 | var v1 int 69 | var v2 int 70 | text := outputScanner.Text() 71 | n, err := fmt.Sscanf(lines[i], "%d", &v1) 72 | if n == 1 && err == nil { 73 | n, err = fmt.Sscanf(text, "%d", &v2) 74 | } 75 | if err != nil || v1 != v2 { 76 | t.Fatalf("line %d: %d != %d err %v\n", i, v1, v2, err) 77 | } 78 | i += 1 79 | } 80 | if i != nNumber { 81 | t.Fatalf("Expected %d lines in output\n", nNumber) 82 | } 83 | } 84 | 85 | // Workers report back how many RPCs they have processed in the Shutdown reply. 86 | // Check that they processed at least 1 RPC. 87 | func checkWorker(t *testing.T, l *list.List) { 88 | for e := l.Front(); e != nil; e = e.Next() { 89 | if e.Value == 0 { 90 | t.Fatalf("Some worker didn't do any work\n") 91 | } 92 | } 93 | } 94 | 95 | // Make input file 96 | func makeInput() string { 97 | name := "824-mrinput.txt" 98 | file, err := os.Create(name) 99 | if err != nil { 100 | log.Fatal("mkInput: ", err) 101 | } 102 | w := bufio.NewWriter(file) 103 | for i := 0; i < nNumber; i++ { 104 | fmt.Fprintf(w, "%d\n", i) 105 | } 106 | w.Flush() 107 | file.Close() 108 | return name 109 | } 110 | 111 | // Cook up a unique-ish UNIX-domain socket name 112 | // in /var/tmp. can't use current directory since 113 | // AFS doesn't support UNIX-domain sockets. 114 | func port(suffix string) string { 115 | s := "/var/tmp/824-" 116 | s += strconv.Itoa(os.Getuid()) + "/" 117 | os.Mkdir(s, 0777) 118 | s += "mr" 119 | s += strconv.Itoa(os.Getpid()) + "-" 120 | s += suffix 121 | return s 122 | } 123 | 124 | func setup() *MapReduce { 125 | file := makeInput() 126 | master := port("master") 127 | mr := MakeMapReduce(nMap, nReduce, file, master) 128 | return mr 129 | } 130 | 131 | func cleanup(mr *MapReduce) { 132 | mr.CleanupFiles() 133 | RemoveFile(mr.file) 134 | } 135 | 136 | func TestBasic(t *testing.T) { 137 | fmt.Printf("Test: Basic mapreduce ...\n") 138 | mr := setup() 139 | for i := 0; i < 2; i++ { 140 | go RunWorker(mr.MasterAddress, port("worker"+strconv.Itoa(i)), 141 | MapFunc, ReduceFunc, -1) 142 | } 143 | // Wait until MR is done 144 | <-mr.DoneChannel 145 | check(t, mr.file) 146 | checkWorker(t, mr.stats) 147 | cleanup(mr) 148 | fmt.Printf(" ... Basic Passed\n") 149 | } 150 | 151 | func TestOneFailure(t *testing.T) { 152 | fmt.Printf("Test: One Failure mapreduce ...\n") 153 | mr := setup() 154 | // Start 2 workers that fail after 10 jobs 155 | go RunWorker(mr.MasterAddress, port("worker"+strconv.Itoa(0)), 156 | MapFunc, ReduceFunc, 10) 157 | go RunWorker(mr.MasterAddress, port("worker"+strconv.Itoa(1)), 158 | MapFunc, ReduceFunc, -1) 159 | // Wait until MR is done 160 | <-mr.DoneChannel 161 | check(t, mr.file) 162 | checkWorker(t, mr.stats) 163 | cleanup(mr) 164 | fmt.Printf(" ... One Failure Passed\n") 165 | } 166 | 167 | func TestManyFailures(t *testing.T) { 168 | fmt.Printf("Test: One ManyFailures mapreduce ...\n") 169 | mr := setup() 170 | i := 0 171 | done := false 172 | for !done { 173 | select { 174 | case done = <-mr.DoneChannel: 175 | check(t, mr.file) 176 | cleanup(mr) 177 | break 178 | default: 179 | // Start 2 workers each sec. The workers fail after 10 jobs 180 | w := port("worker" + strconv.Itoa(i)) 181 | go RunWorker(mr.MasterAddress, w, MapFunc, ReduceFunc, 10) 182 | i++ 183 | w = port("worker" + strconv.Itoa(i)) 184 | go RunWorker(mr.MasterAddress, w, MapFunc, ReduceFunc, 10) 185 | i++ 186 | time.Sleep(1 * time.Second) 187 | } 188 | } 189 | 190 | fmt.Printf(" ... Many Failures Passed\n") 191 | } 192 | -------------------------------------------------------------------------------- /hw3/src/viewservice/test_test.go: -------------------------------------------------------------------------------- 1 | package viewservice 2 | 3 | import "testing" 4 | import "runtime" 5 | import "time" 6 | import "fmt" 7 | import "os" 8 | import "strconv" 9 | 10 | func check(t *testing.T, ck *Clerk, p string, b string, n uint) { 11 | view, _ := ck.Get() 12 | if view.Primary != p { 13 | t.Fatalf("wanted primary %v, got %v", p, view.Primary) 14 | } 15 | if view.Backup != b { 16 | t.Fatalf("wanted backup %v, got %v", b, view.Backup) 17 | } 18 | if n != 0 && n != view.Viewnum { 19 | t.Fatalf("wanted viewnum %v, got %v", n, view.Viewnum) 20 | } 21 | if ck.Primary() != p { 22 | t.Fatalf("wanted primary %v, got %v", p, ck.Primary()) 23 | } 24 | } 25 | 26 | func port(suffix string) string { 27 | s := "/var/tmp/824-" 28 | s += strconv.Itoa(os.Getuid()) + "/" 29 | os.Mkdir(s, 0777) 30 | s += "viewserver-" 31 | s += strconv.Itoa(os.Getpid()) + "-" 32 | s += suffix 33 | return s 34 | } 35 | 36 | func Test1(t *testing.T) { 37 | runtime.GOMAXPROCS(4) 38 | 39 | vshost := port("v") 40 | vs := StartServer(vshost) 41 | 42 | ck1 := MakeClerk(port("1"), vshost) 43 | ck2 := MakeClerk(port("2"), vshost) 44 | ck3 := MakeClerk(port("3"), vshost) 45 | 46 | // 47 | 48 | if ck1.Primary() != "" { 49 | t.Fatalf("there was a primary too soon") 50 | } 51 | 52 | // very first primary 53 | fmt.Printf("Test: First primary ...\n") 54 | 55 | for i := 0; i < DeadPings*2; i++ { 56 | view, _ := ck1.Ping(0) 57 | if view.Primary == ck1.me { 58 | break 59 | } 60 | time.Sleep(PingInterval) 61 | } 62 | check(t, ck1, ck1.me, "", 1) 63 | fmt.Printf(" ... Passed\n") 64 | 65 | // very first backup 66 | fmt.Printf("Test: First backup ...\n") 67 | 68 | { 69 | vx, _ := ck1.Get() 70 | for i := 0; i < DeadPings*2; i++ { 71 | ck1.Ping(1) 72 | view, _ := ck2.Ping(0) 73 | if view.Backup == ck2.me { 74 | break 75 | } 76 | time.Sleep(PingInterval) 77 | } 78 | check(t, ck1, ck1.me, ck2.me, vx.Viewnum+1) 79 | } 80 | fmt.Printf(" ... Passed\n") 81 | 82 | // primary dies, backup should take over 83 | fmt.Printf("Test: Backup takes over if primary fails ...\n") 84 | 85 | { 86 | ck1.Ping(2) 87 | vx, _ := ck2.Ping(2) 88 | for i := 0; i < DeadPings*2; i++ { 89 | v, _ := ck2.Ping(vx.Viewnum) 90 | if v.Primary == ck2.me && v.Backup == "" { 91 | break 92 | } 93 | time.Sleep(PingInterval) 94 | } 95 | check(t, ck2, ck2.me, "", vx.Viewnum+1) 96 | } 97 | fmt.Printf(" ... Passed\n") 98 | 99 | // revive ck1, should become backup 100 | fmt.Printf("Test: Restarted server becomes backup ...\n") 101 | 102 | { 103 | vx, _ := ck2.Get() 104 | ck2.Ping(vx.Viewnum) 105 | for i := 0; i < DeadPings*2; i++ { 106 | ck1.Ping(0) 107 | v, _ := ck2.Ping(vx.Viewnum) 108 | if v.Primary == ck2.me && v.Backup == ck1.me { 109 | break 110 | } 111 | time.Sleep(PingInterval) 112 | } 113 | check(t, ck2, ck2.me, ck1.me, vx.Viewnum+1) 114 | } 115 | fmt.Printf(" ... Passed\n") 116 | 117 | // start ck3, kill the primary (ck2), the previous backup (ck1) 118 | // should become the server, and ck3 the backup. 119 | // this should happen in a single view change, without 120 | // any period in which there's no backup. 121 | fmt.Printf("Test: Idle third server becomes backup if primary fails ...\n") 122 | 123 | { 124 | vx, _ := ck2.Get() 125 | ck2.Ping(vx.Viewnum) 126 | for i := 0; i < DeadPings*2; i++ { 127 | ck3.Ping(0) 128 | v, _ := ck1.Ping(vx.Viewnum) 129 | if v.Primary == ck1.me && v.Backup == ck3.me { 130 | break 131 | } 132 | vx = v 133 | time.Sleep(PingInterval) 134 | } 135 | check(t, ck1, ck1.me, ck3.me, vx.Viewnum+1) 136 | } 137 | fmt.Printf(" ... Passed\n") 138 | 139 | // kill and immediately restart the primary -- does viewservice 140 | // conclude primary is down even though it's pinging? 141 | fmt.Printf("Test: Restarted primary treated as dead ...\n") 142 | 143 | { 144 | vx, _ := ck1.Get() 145 | ck1.Ping(vx.Viewnum) 146 | for i := 0; i < DeadPings*2; i++ { 147 | ck1.Ping(0) 148 | ck3.Ping(vx.Viewnum) 149 | v, _ := ck3.Get() 150 | if v.Primary != ck1.me { 151 | break 152 | } 153 | time.Sleep(PingInterval) 154 | } 155 | vy, _ := ck3.Get() 156 | if vy.Primary != ck3.me { 157 | t.Fatalf("expected primary=%v, got %v\n", ck3.me, vy.Primary) 158 | } 159 | } 160 | fmt.Printf(" ... Passed\n") 161 | 162 | fmt.Printf("Test: Dead backup is removed from view ...\n") 163 | 164 | // set up a view with just 3 as primary, 165 | // to prepare for the next test. 166 | { 167 | for i := 0; i < DeadPings*3; i++ { 168 | vx, _ := ck3.Get() 169 | ck3.Ping(vx.Viewnum) 170 | time.Sleep(PingInterval) 171 | } 172 | v, _ := ck3.Get() 173 | if v.Primary != ck3.me || v.Backup != "" { 174 | t.Fatalf("wrong primary or backup") 175 | } 176 | } 177 | fmt.Printf(" ... Passed\n") 178 | 179 | // does viewserver wait for ack of previous view before 180 | // starting the next one? 181 | fmt.Printf("Test: Viewserver waits for primary to ack view ...\n") 182 | 183 | { 184 | // set up p=ck3 b=ck1, but 185 | // but do not ack 186 | vx, _ := ck1.Get() 187 | for i := 0; i < DeadPings*3; i++ { 188 | ck1.Ping(0) 189 | ck3.Ping(vx.Viewnum) 190 | v, _ := ck1.Get() 191 | if v.Viewnum > vx.Viewnum { 192 | break 193 | } 194 | time.Sleep(PingInterval) 195 | } 196 | check(t, ck1, ck3.me, ck1.me, vx.Viewnum+1) 197 | vy, _ := ck1.Get() 198 | // ck3 is the primary, but it never acked. 199 | // let ck3 die. check that ck1 is not promoted. 200 | for i := 0; i < DeadPings*3; i++ { 201 | v, _ := ck1.Ping(vy.Viewnum) 202 | if v.Viewnum > vy.Viewnum { 203 | break 204 | } 205 | time.Sleep(PingInterval) 206 | } 207 | check(t, ck2, ck3.me, ck1.me, vy.Viewnum) 208 | } 209 | fmt.Printf(" ... Passed\n") 210 | 211 | // if old servers die, check that a new (uninitialized) server 212 | // cannot take over. 213 | fmt.Printf("Test: Uninitialized server can't become primary ...\n") 214 | 215 | { 216 | for i := 0; i < DeadPings*2; i++ { 217 | v, _ := ck1.Get() 218 | ck1.Ping(v.Viewnum) 219 | ck2.Ping(0) 220 | ck3.Ping(v.Viewnum) 221 | time.Sleep(PingInterval) 222 | } 223 | for i := 0; i < DeadPings*2; i++ { 224 | ck2.Ping(0) 225 | time.Sleep(PingInterval) 226 | } 227 | vz, _ := ck2.Get() 228 | if vz.Primary == ck2.me { 229 | t.Fatalf("uninitialized backup promoted to primary") 230 | } 231 | } 232 | fmt.Printf(" ... Passed\n") 233 | 234 | vs.Kill() 235 | } 236 | -------------------------------------------------------------------------------- /hw4/src/viewservice/test_test.go: -------------------------------------------------------------------------------- 1 | package viewservice 2 | 3 | import "testing" 4 | import "runtime" 5 | import "time" 6 | import "fmt" 7 | import "os" 8 | import "strconv" 9 | 10 | func check(t *testing.T, ck *Clerk, p string, b string, n uint) { 11 | view, _ := ck.Get() 12 | if view.Primary != p { 13 | t.Fatalf("wanted primary %v, got %v", p, view.Primary) 14 | } 15 | if view.Backup != b { 16 | t.Fatalf("wanted backup %v, got %v", b, view.Backup) 17 | } 18 | if n != 0 && n != view.Viewnum { 19 | t.Fatalf("wanted viewnum %v, got %v", n, view.Viewnum) 20 | } 21 | if ck.Primary() != p { 22 | t.Fatalf("wanted primary %v, got %v", p, ck.Primary()) 23 | } 24 | } 25 | 26 | func port(suffix string) string { 27 | s := "/var/tmp/824-" 28 | s += strconv.Itoa(os.Getuid()) + "/" 29 | os.Mkdir(s, 0777) 30 | s += "viewserver-" 31 | s += strconv.Itoa(os.Getpid()) + "-" 32 | s += suffix 33 | return s 34 | } 35 | 36 | func Test1(t *testing.T) { 37 | runtime.GOMAXPROCS(4) 38 | 39 | vshost := port("v") 40 | vs := StartServer(vshost) 41 | 42 | ck1 := MakeClerk(port("1"), vshost) 43 | ck2 := MakeClerk(port("2"), vshost) 44 | ck3 := MakeClerk(port("3"), vshost) 45 | 46 | // 47 | 48 | if ck1.Primary() != "" { 49 | t.Fatalf("there was a primary too soon") 50 | } 51 | 52 | // very first primary 53 | fmt.Printf("Test: First primary ...\n") 54 | 55 | for i := 0; i < DeadPings*2; i++ { 56 | view, _ := ck1.Ping(0) 57 | if view.Primary == ck1.me { 58 | break 59 | } 60 | time.Sleep(PingInterval) 61 | } 62 | check(t, ck1, ck1.me, "", 1) 63 | fmt.Printf(" ... Passed\n") 64 | 65 | // very first backup 66 | fmt.Printf("Test: First backup ...\n") 67 | 68 | { 69 | vx, _ := ck1.Get() 70 | for i := 0; i < DeadPings*2; i++ { 71 | ck1.Ping(1) 72 | view, _ := ck2.Ping(0) 73 | if view.Backup == ck2.me { 74 | break 75 | } 76 | time.Sleep(PingInterval) 77 | } 78 | check(t, ck1, ck1.me, ck2.me, vx.Viewnum+1) 79 | } 80 | fmt.Printf(" ... Passed\n") 81 | 82 | // primary dies, backup should take over 83 | fmt.Printf("Test: Backup takes over if primary fails ...\n") 84 | 85 | { 86 | ck1.Ping(2) 87 | vx, _ := ck2.Ping(2) 88 | for i := 0; i < DeadPings*2; i++ { 89 | v, _ := ck2.Ping(vx.Viewnum) 90 | if v.Primary == ck2.me && v.Backup == "" { 91 | break 92 | } 93 | time.Sleep(PingInterval) 94 | } 95 | check(t, ck2, ck2.me, "", vx.Viewnum+1) 96 | } 97 | fmt.Printf(" ... Passed\n") 98 | 99 | // revive ck1, should become backup 100 | fmt.Printf("Test: Restarted server becomes backup ...\n") 101 | 102 | { 103 | vx, _ := ck2.Get() 104 | ck2.Ping(vx.Viewnum) 105 | for i := 0; i < DeadPings*2; i++ { 106 | ck1.Ping(0) 107 | v, _ := ck2.Ping(vx.Viewnum) 108 | if v.Primary == ck2.me && v.Backup == ck1.me { 109 | break 110 | } 111 | time.Sleep(PingInterval) 112 | } 113 | check(t, ck2, ck2.me, ck1.me, vx.Viewnum+1) 114 | } 115 | fmt.Printf(" ... Passed\n") 116 | 117 | // start ck3, kill the primary (ck2), the previous backup (ck1) 118 | // should become the server, and ck3 the backup. 119 | // this should happen in a single view change, without 120 | // any period in which there's no backup. 121 | fmt.Printf("Test: Idle third server becomes backup if primary fails ...\n") 122 | 123 | { 124 | vx, _ := ck2.Get() 125 | ck2.Ping(vx.Viewnum) 126 | for i := 0; i < DeadPings*2; i++ { 127 | ck3.Ping(0) 128 | v, _ := ck1.Ping(vx.Viewnum) 129 | if v.Primary == ck1.me && v.Backup == ck3.me { 130 | break 131 | } 132 | vx = v 133 | time.Sleep(PingInterval) 134 | } 135 | check(t, ck1, ck1.me, ck3.me, vx.Viewnum+1) 136 | } 137 | fmt.Printf(" ... Passed\n") 138 | 139 | // kill and immediately restart the primary -- does viewservice 140 | // conclude primary is down even though it's pinging? 141 | fmt.Printf("Test: Restarted primary treated as dead ...\n") 142 | 143 | { 144 | vx, _ := ck1.Get() 145 | ck1.Ping(vx.Viewnum) 146 | for i := 0; i < DeadPings*2; i++ { 147 | ck1.Ping(0) 148 | ck3.Ping(vx.Viewnum) 149 | v, _ := ck3.Get() 150 | if v.Primary != ck1.me { 151 | break 152 | } 153 | time.Sleep(PingInterval) 154 | } 155 | vy, _ := ck3.Get() 156 | if vy.Primary != ck3.me { 157 | t.Fatalf("expected primary=%v, got %v\n", ck3.me, vy.Primary) 158 | } 159 | } 160 | fmt.Printf(" ... Passed\n") 161 | 162 | fmt.Printf("Test: Dead backup is removed from view ...\n") 163 | 164 | // set up a view with just 3 as primary, 165 | // to prepare for the next test. 166 | { 167 | for i := 0; i < DeadPings*3; i++ { 168 | vx, _ := ck3.Get() 169 | ck3.Ping(vx.Viewnum) 170 | time.Sleep(PingInterval) 171 | } 172 | v, _ := ck3.Get() 173 | if v.Primary != ck3.me || v.Backup != "" { 174 | t.Fatalf("wrong primary or backup") 175 | } 176 | } 177 | fmt.Printf(" ... Passed\n") 178 | 179 | // does viewserver wait for ack of previous view before 180 | // starting the next one? 181 | fmt.Printf("Test: Viewserver waits for primary to ack view ...\n") 182 | 183 | { 184 | // set up p=ck3 b=ck1, but 185 | // but do not ack 186 | vx, _ := ck1.Get() 187 | for i := 0; i < DeadPings*3; i++ { 188 | ck1.Ping(0) 189 | ck3.Ping(vx.Viewnum) 190 | v, _ := ck1.Get() 191 | if v.Viewnum > vx.Viewnum { 192 | break 193 | } 194 | time.Sleep(PingInterval) 195 | } 196 | check(t, ck1, ck3.me, ck1.me, vx.Viewnum+1) 197 | vy, _ := ck1.Get() 198 | // ck3 is the primary, but it never acked. 199 | // let ck3 die. check that ck1 is not promoted. 200 | for i := 0; i < DeadPings*3; i++ { 201 | v, _ := ck1.Ping(vy.Viewnum) 202 | if v.Viewnum > vy.Viewnum { 203 | break 204 | } 205 | time.Sleep(PingInterval) 206 | } 207 | check(t, ck2, ck3.me, ck1.me, vy.Viewnum) 208 | } 209 | fmt.Printf(" ... Passed\n") 210 | 211 | // if old servers die, check that a new (uninitialized) server 212 | // cannot take over. 213 | fmt.Printf("Test: Uninitialized server can't become primary ...\n") 214 | 215 | { 216 | for i := 0; i < DeadPings*2; i++ { 217 | v, _ := ck1.Get() 218 | ck1.Ping(v.Viewnum) 219 | ck2.Ping(0) 220 | ck3.Ping(v.Viewnum) 221 | time.Sleep(PingInterval) 222 | } 223 | for i := 0; i < DeadPings*2; i++ { 224 | ck2.Ping(0) 225 | time.Sleep(PingInterval) 226 | } 227 | vz, _ := ck2.Get() 228 | if vz.Primary == ck2.me { 229 | t.Fatalf("uninitialized backup promoted to primary") 230 | } 231 | } 232 | fmt.Printf(" ... Passed\n") 233 | 234 | vs.Kill() 235 | } 236 | -------------------------------------------------------------------------------- /hw4/src/kvpaxos/server.go: -------------------------------------------------------------------------------- 1 | package kvpaxos 2 | 3 | import ( 4 | "errors" 5 | "net" 6 | "reflect" 7 | "time" 8 | ) 9 | import "fmt" 10 | import "net/rpc" 11 | import "log" 12 | import "paxos" 13 | import "sync" 14 | import "sync/atomic" 15 | import "os" 16 | import "syscall" 17 | import "encoding/gob" 18 | import "math/rand" 19 | 20 | 21 | const Debug = 1 22 | 23 | func DPrintf(format string, a ...interface{}) (n int, err error) { 24 | if Debug > 0 { 25 | log.Printf(format, a...) 26 | } 27 | return 28 | } 29 | 30 | 31 | const ( 32 | Put = "Put" 33 | Append = "Append" 34 | Get = "Get" 35 | ) 36 | 37 | type Op struct { 38 | // Put, Get, Append 39 | Operation string 40 | Args interface{} 41 | } 42 | 43 | type KVPaxos struct { 44 | mu sync.Mutex 45 | l net.Listener 46 | me int 47 | dead int32 // for testing 48 | unreliable int32 // for testing 49 | px *paxos.Paxos 50 | 51 | lastApply int 52 | database map[string]string 53 | maxClientSeq map[int64]int 54 | } 55 | 56 | 57 | func (kv *KVPaxos) Apply(op Op) { 58 | if op.Operation == Get { 59 | args := op.Args.(GetArgs) 60 | if args.Seq > kv.maxClientSeq[args.ClientID] { 61 | kv.maxClientSeq[args.ClientID] = args.Seq 62 | } 63 | } else if op.Operation == Put { 64 | args := op.Args.(PutAppendArgs) 65 | kv.database[args.Key] = args.Value 66 | if args.Seq > kv.maxClientSeq[args.ClientID] { 67 | kv.maxClientSeq[args.ClientID] = args.Seq 68 | } 69 | } else if op.Operation == Append { 70 | args := op.Args.(PutAppendArgs) 71 | value, ok := kv.database[args.Key] 72 | if !ok { 73 | value = "" 74 | } 75 | kv.database[args.Key] = value + args.Value 76 | if args.Seq > kv.maxClientSeq[args.ClientID] { 77 | kv.maxClientSeq[args.ClientID] = args.Seq 78 | } 79 | } 80 | } 81 | 82 | func (kv *KVPaxos) Wait(seq int) (Op, error) { 83 | sleepTime := 10 * time.Microsecond 84 | for iters := 0; iters < 15; iters ++ { 85 | decided, op := kv.px.Status(seq) 86 | if decided == paxos.Decided { 87 | return op.(Op), nil 88 | } 89 | // as we correctly do `done()` forgetten one should not be shown 90 | //else if decided == paxos.Forgotten { 91 | // break 92 | //} 93 | time.Sleep(sleepTime) 94 | if sleepTime < 10 * time.Second { 95 | sleepTime *= 2 96 | } 97 | } 98 | return Op{}, errors.New("Wait for too long") 99 | } 100 | 101 | func (kv *KVPaxos) Propose(xop Op) error { 102 | for { 103 | kv.px.Start(kv.lastApply + 1, xop) 104 | op, err := kv.Wait(kv.lastApply + 1) 105 | if err != nil { 106 | return err 107 | } 108 | kv.Apply(op) 109 | kv.lastApply += 1 110 | 111 | if reflect.DeepEqual(op, xop) { 112 | break 113 | } 114 | // do this everytime lastApply +1 -> to prevent any possible mem overflow possibilities 115 | kv.px.Done(kv.lastApply) 116 | } 117 | kv.px.Done(kv.lastApply) 118 | return nil 119 | } 120 | 121 | func (kv *KVPaxos) Get(args *GetArgs, reply *GetReply) error { 122 | // Your code here. 123 | kv.mu.Lock() 124 | defer kv.mu.Unlock() 125 | if args.Seq <= kv.maxClientSeq[args.ClientID] { 126 | reply.Err = OK 127 | reply.Value = kv.database[args.Key] 128 | return nil 129 | } 130 | op := Op{Operation: "Get", Args: *args} 131 | err := kv.Propose(op) 132 | if err != nil { 133 | return err 134 | } 135 | 136 | value, ok := kv.database[args.Key] 137 | if !ok { 138 | reply.Err = ErrNoKey 139 | reply.Value = "" 140 | } else { 141 | reply.Err = OK 142 | reply.Value = value 143 | } 144 | return nil 145 | 146 | } 147 | 148 | func (kv *KVPaxos) PutAppend(args *PutAppendArgs, reply *PutAppendReply) error { 149 | // Your code here. 150 | kv.mu.Lock() 151 | defer kv.mu.Unlock() 152 | if args.Seq <= kv.maxClientSeq[args.ClientID] { 153 | reply.Err = OK 154 | return nil 155 | } 156 | 157 | op := Op{Args: *args, Operation: args.Op} 158 | err := kv.Propose(op) 159 | if err != nil { 160 | return err 161 | } 162 | reply.Err = OK 163 | return nil 164 | } 165 | 166 | // tell the server to shut itself down. 167 | // please do not change these two functions. 168 | func (kv *KVPaxos) kill() { 169 | //DPrintf("Kill(%d): die\n", kv.me) 170 | atomic.StoreInt32(&kv.dead, 1) 171 | kv.l.Close() 172 | kv.px.Kill() 173 | } 174 | 175 | // call this to find out if the server is dead. 176 | func (kv *KVPaxos) isdead() bool { 177 | return atomic.LoadInt32(&kv.dead) != 0 178 | } 179 | 180 | // please do not change these two functions. 181 | func (kv *KVPaxos) setunreliable(what bool) { 182 | if what { 183 | atomic.StoreInt32(&kv.unreliable, 1) 184 | } else { 185 | atomic.StoreInt32(&kv.unreliable, 0) 186 | } 187 | } 188 | 189 | func (kv *KVPaxos) isunreliable() bool { 190 | return atomic.LoadInt32(&kv.unreliable) != 0 191 | } 192 | 193 | // 194 | // servers[] contains the ports of the set of 195 | // servers that will cooperate via Paxos to 196 | // form the fault-tolerant key/value service. 197 | // me is the index of the current server in servers[]. 198 | // 199 | func StartServer(servers []string, me int) *KVPaxos { 200 | // call gob.Register on structures you want 201 | // Go's RPC library to marshall/unmarshall. 202 | gob.Register(Op{}) 203 | gob.Register(GetArgs{}) 204 | gob.Register(PutAppendArgs{}) 205 | 206 | kv := new(KVPaxos) 207 | kv.me = me 208 | 209 | // Your initialization code here. 210 | kv.database = make(map[string]string) 211 | kv.maxClientSeq = make(map[int64]int) 212 | 213 | rpcs := rpc.NewServer() 214 | rpcs.Register(kv) 215 | 216 | kv.px = paxos.Make(servers, me, rpcs) 217 | 218 | os.Remove(servers[me]) 219 | l, e := net.Listen("unix", servers[me]) 220 | if e != nil { 221 | log.Fatal("listen error: ", e) 222 | } 223 | kv.l = l 224 | 225 | 226 | // please do not change any of the following code, 227 | // or do anything to subvert it. 228 | 229 | go func() { 230 | for kv.isdead() == false { 231 | conn, err := kv.l.Accept() 232 | if err == nil && kv.isdead() == false { 233 | if kv.isunreliable() && (rand.Int63()%1000) < 100 { 234 | // discard the request. 235 | conn.Close() 236 | } else if kv.isunreliable() && (rand.Int63()%1000) < 200 { 237 | // process the request but force discard of reply. 238 | c1 := conn.(*net.UnixConn) 239 | f, _ := c1.File() 240 | err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR) 241 | if err != nil { 242 | fmt.Printf("shutdown: %v\n", err) 243 | } 244 | go rpcs.ServeConn(conn) 245 | } else { 246 | go rpcs.ServeConn(conn) 247 | } 248 | } else if err == nil { 249 | conn.Close() 250 | } 251 | if err != nil && kv.isdead() == false { 252 | fmt.Printf("KVPaxos(%v) accept: %v\n", me, err.Error()) 253 | kv.kill() 254 | } 255 | } 256 | }() 257 | 258 | return kv 259 | } 260 | -------------------------------------------------------------------------------- /hw2/src/viewservice/test_test.go: -------------------------------------------------------------------------------- 1 | package viewservice 2 | 3 | import ( 4 | "log" 5 | "testing" 6 | ) 7 | import "runtime" 8 | import "time" 9 | import "fmt" 10 | import "os" 11 | import "strconv" 12 | 13 | func check(t *testing.T, ck *Clerk, p string, b string, n uint) { 14 | view, _ := ck.Get() 15 | log.Printf("p=%v, b=%v, n=%v", view.Primary, view.Backup, view.Viewnum) 16 | 17 | if view.Primary != p { 18 | t.Fatalf("wanted primary %v, got %v", p, view.Primary) 19 | } 20 | if view.Backup != b { 21 | t.Fatalf("wanted backup %v, got %v", b, view.Backup) 22 | } 23 | if n != 0 && n != view.Viewnum { 24 | t.Fatalf("wanted viewnum %v, got %v", n, view.Viewnum) 25 | } 26 | if ck.Primary() != p { 27 | t.Fatalf("wanted primary %v, got %v", p, ck.Primary()) 28 | } 29 | } 30 | 31 | func port(suffix string) string { 32 | s := "/var/tmp/824-" 33 | s += strconv.Itoa(os.Getuid()) + "/" 34 | os.Mkdir(s, 0777) 35 | s += "viewserver-" 36 | s += strconv.Itoa(os.Getpid()) + "-" 37 | s += suffix 38 | return s 39 | } 40 | 41 | func Test1(t *testing.T) { 42 | runtime.GOMAXPROCS(4) 43 | 44 | vshost := port("v") 45 | vs := StartServer(vshost) 46 | 47 | ck1 := MakeClerk(port("1"), vshost) 48 | ck2 := MakeClerk(port("2"), vshost) 49 | ck3 := MakeClerk(port("3"), vshost) 50 | 51 | // 52 | 53 | if ck1.Primary() != "" { 54 | t.Fatalf("there was a primary too soon") 55 | } 56 | 57 | // very first primary 58 | fmt.Printf("Test: First primary ...\n") 59 | 60 | for i := 0; i < DeadPings*2; i++ { 61 | view, _ := ck1.Ping(0) 62 | if view.Primary == ck1.me { 63 | break 64 | } 65 | time.Sleep(PingInterval) 66 | } 67 | check(t, ck1, ck1.me, "", 1) 68 | fmt.Printf(" ... Passed\n") 69 | 70 | // very first backup 71 | fmt.Printf("Test: First backup ...\n") 72 | 73 | { 74 | vx, _ := ck1.Get() 75 | for i := 0; i < DeadPings*2; i++ { 76 | ck1.Ping(1) 77 | view, _ := ck2.Ping(0) 78 | if view.Backup == ck2.me { 79 | break 80 | } 81 | time.Sleep(PingInterval) 82 | } 83 | check(t, ck1, ck1.me, ck2.me, vx.Viewnum+1) 84 | } 85 | fmt.Printf(" ... Passed\n") 86 | 87 | // primary dies, backup should take over 88 | fmt.Printf("Test: Backup takes over if primary fails ...\n") 89 | 90 | { 91 | ck1.Ping(2) // view bound from 1 to 2 92 | vx, _ := ck2.Ping(2) // {2, ck1, ck2} 93 | for i := 0; i < DeadPings*2; i++ { 94 | v, _ := ck2.Ping(vx.Viewnum) 95 | if v.Primary == ck2.me && v.Backup == "" { 96 | break 97 | } 98 | time.Sleep(PingInterval) 99 | } 100 | check(t, ck2, ck2.me, "", vx.Viewnum+1) 101 | } 102 | fmt.Printf(" ... Passed\n") 103 | 104 | // revive ck1, should become backup 105 | fmt.Printf("Test: Restarted server becomes backup ...\n") 106 | 107 | { 108 | vx, _ := ck2.Get() // vx.viewnum = 3 109 | ck2.Ping(vx.Viewnum) // ck2 is the primary (so viewBound=3 after that) 110 | for i := 0; i < DeadPings*2; i++ { 111 | ck1.Ping(0) // ck1 is the restarted server. do `assignRole()` 112 | v, _ := ck2.Ping(vx.Viewnum) 113 | if v.Primary == ck2.me && v.Backup == ck1.me { 114 | break 115 | } 116 | time.Sleep(PingInterval) 117 | } 118 | check(t, ck2, ck2.me, ck1.me, vx.Viewnum+1) 119 | } 120 | fmt.Printf(" ... Passed\n") 121 | 122 | // start ck3, kill the primary (ck2), the previous backup (ck1) 123 | // should become the server, and ck3 the backup. 124 | // this should happen in a single view change, without 125 | // any period in which there's no backup. 126 | fmt.Printf("Test: Idle third server becomes backup if primary fails ...\n") 127 | 128 | { 129 | vx, _ := ck2.Get() // vx = {4, p=ck2, b=ck1} 130 | ck2.Ping(vx.Viewnum) 131 | for i := 0; i < DeadPings*2; i++ { 132 | ck3.Ping(0) // new server is added! 133 | v, _ := ck1.Ping(vx.Viewnum) 134 | if v.Primary == ck1.me && v.Backup == ck3.me { 135 | break 136 | } 137 | vx = v 138 | time.Sleep(PingInterval) 139 | } 140 | check(t, ck1, ck1.me, ck3.me, vx.Viewnum+1) 141 | } 142 | fmt.Printf(" ... Passed\n") 143 | 144 | // kill and immediately restart the primary -- does viewservice 145 | // conclude primary is down even though it's pinging? 146 | // Adrian: yes! it is dead as I saw its ping is zero 147 | fmt.Printf("Test: Restarted primary treated as dead ...\n") 148 | 149 | { 150 | vx, _ := ck1.Get() 151 | ck1.Ping(vx.Viewnum) // vx = {5, p=ck1, b=ck3} 152 | for i := 0; i < DeadPings*2; i++ { 153 | ck1.Ping(0) // by force do the `replace()` 154 | ck3.Ping(vx.Viewnum) 155 | v, _ := ck3.Get() 156 | if v.Primary != ck1.me { 157 | break 158 | } 159 | time.Sleep(PingInterval) 160 | } 161 | vy, _ := ck3.Get() // for my case it is vy={6, p=ck3, b=""} 162 | if vy.Primary != ck3.me { 163 | t.Fatalf("expected primary=%v, got %v\n", ck3.me, vy.Primary) 164 | } 165 | } 166 | fmt.Printf(" ... Passed\n") 167 | 168 | fmt.Printf("Test: Dead backup is removed from view ...\n") // i don't think this make sense 169 | // it should be `checking backup is now promoted to be the primary` 170 | 171 | // set up a view with just 3 as primary, 172 | // to prepare for the next test. 173 | { 174 | for i := 0; i < DeadPings*3; i++ { 175 | vx, _ := ck3.Get() // vx = {6, p=ck3, b=""} 176 | ck3.Ping(vx.Viewnum) // now viewBound will be 6 177 | time.Sleep(PingInterval) 178 | } 179 | v, _ := ck3.Get() 180 | if v.Primary != ck3.me || v.Backup != "" { 181 | t.Fatalf("wrong primary or backup") 182 | } 183 | } 184 | fmt.Printf(" ... Passed\n") 185 | // to here, vx.viewnum is still 6. Nothin changed 186 | 187 | // does viewserver wait for ack of previous view before 188 | // starting the next one? 189 | fmt.Printf("Test: Viewserver waits for primary to ack view ...\n") 190 | 191 | { 192 | // set up p=ck3 b=ck1, but 193 | // but do not ack 194 | vx, _ := ck1.Get() // vx = {6, ck3, _} -> ck1 is not even in the set!! 195 | for i := 0; i < DeadPings*3; i++ { 196 | ck1.Ping(0) // now ck1 ias assignRole(). v is changed to 6 -> 7 {ck3, ck1} 197 | ck3.Ping(vx.Viewnum) 198 | v, _ := ck1.Get() // v = 7, {ck3, ck1} 199 | if v.Viewnum > vx.Viewnum { // 7 > 6 200 | break 201 | } 202 | time.Sleep(PingInterval) 203 | } 204 | check(t, ck1, ck3.me, ck1.me, vx.Viewnum+1) 205 | 206 | vy, _ := ck1.Get() // vy = 7, {ck3, ck1} 207 | // ck3 is the primary, but it never acked. 208 | // let ck3 die. check that ck1 is not promoted. (IMPORTANT) 209 | for i := 0; i < DeadPings*3; i++ { 210 | v, _ := ck1.Ping(vy.Viewnum) 211 | //log.Printf("v: %v, %v", vy.Viewnum, v.Viewnum) 212 | if v.Viewnum > vy.Viewnum { // e.g., v = 8, 8 > 7 213 | break // should not happen! 214 | } 215 | time.Sleep(PingInterval) 216 | } 217 | // we will NOT break the for loop. 218 | check(t, ck2, ck3.me, ck1.me, vy.Viewnum) // view doesn't change 219 | } 220 | fmt.Printf(" ... Passed\n") 221 | 222 | // if old servers die, check that a new (uninitialized) server 223 | // cannot take over. 224 | // Adrian: what is an `uninitialized server`? 225 | fmt.Printf("Test: Uninitialized server can't become primary ...\n") 226 | 227 | { 228 | for i := 0; i < DeadPings*2; i++ { 229 | v, _ := ck1.Get() // v = {7, ck3, ck1} 230 | ck1.Ping(v.Viewnum) // viewBound is already 7 231 | ck2.Ping(0) // a new server joined -> idle 232 | ck3.Ping(v.Viewnum) // primary ack! finally. viewnum = 6 -> 7 now. 233 | time.Sleep(PingInterval) 234 | } // v = {8, ck3, ck2} 235 | //v, _ := ck1.Get() 236 | //log.Printf(v.Backup) 237 | 238 | // wait for ck1, ck3 be dead... 239 | // who dies first? does it matter? 240 | // ck1.Ping(7) // UNCOMMENT this line will make ck1 dies later than ck3 as it has just pinged 241 | for i := 0; i < DeadPings*2; i++ { 242 | ck2.Ping(0) 243 | time.Sleep(PingInterval) 244 | } 245 | // v = {7, ck3, ck1}. viewnum=7. so we can proceed view by 1. 246 | // then if case 1. ck1 dies first 247 | // so we got v = {8, ck3, ck2} 248 | // then we know that as viewBound is 7. so we cannot change view anymore 249 | // now that ck3 dies. -> we want to be {9, ck2, _} 250 | // but we cannot do that according to ack rule. So we are keeping {8, ck3, ck2} 251 | 252 | // else if case 2. ck3 dies first 253 | // so we got v = {8, ck1, ck2} 254 | // then ck1 dies -> we want to change to {9, ck2, _} but in vein. So we got {8, ck1, ck2} 255 | vz, _ := ck2.Get() // vz = {8, ck3, ck2} 256 | if vz.Primary == ck2.me { 257 | t.Fatalf("uninitialized backup cannot be promote to primary") 258 | } 259 | } 260 | fmt.Printf(" ... Passed\n") 261 | 262 | vs.Kill() 263 | } 264 | -------------------------------------------------------------------------------- /hw2/src/viewservice/server.go: -------------------------------------------------------------------------------- 1 | package viewservice 2 | 3 | import "net" 4 | import "net/rpc" 5 | import "log" 6 | import "time" 7 | import "sync" 8 | import "fmt" 9 | import "os" 10 | import "sync/atomic" 11 | 12 | type ViewServer struct { 13 | mu sync.Mutex 14 | l net.Listener 15 | dead int32 // for testing 16 | rpccount int32 // for testing 17 | me string 18 | 19 | // Your declarations here. 20 | 21 | // Hint #2: add field(s) to ViewServer to keep track of the current view. 22 | currview *View 23 | recentHeard map[string] time.Time 24 | // A read/write mutex allows all the readers to access 25 | // the map at the same time, but a writer will lock out everyone else. 26 | rwm sync.RWMutex 27 | // Hint #3: 28 | // keep track of whether the primary for the current view has acked the latest view X 29 | // viewBound = 7 means that the current primary has acked the view 7. **And the way 30 | // it did the ACK is by sending an Ping(7)** 31 | viewBound uint // last value view X of the primary Ping(X) 32 | idleServers map[string] bool 33 | } 34 | 35 | // 36 | // server Ping RPC handler. 37 | // 38 | func (vs *ViewServer) Ping(args *PingArgs, reply *PingReply) error { 39 | 40 | // Your code here. 41 | 42 | // why do we need lock here? 43 | // even though the test didn't specified, but we should know that Ping() 44 | // can be called concurrently by many threads -> may cause concurrent writes on `recentHeard` 45 | vs.rwm.Lock() 46 | defer vs.rwm.Unlock() 47 | 48 | // Hint #1: you'll want to add field(s) to ViewServer in server.go 49 | // in order to keep track of the most recent time at which 50 | // the viewservice has heard a Ping from each server. 51 | vs.recentHeard[args.Me] = time.Now() 52 | 53 | // init, Ping(0) from ck1. only do this one time when vs get bootstrapped 54 | if vs.currview == nil { 55 | vs.viewBound = args.Viewnum // X is now 0 56 | vs.currview = &View{0, "", ""} 57 | // ps. as it now received a Ping(0) from primary => can proceed to Viewnum = 1 58 | } 59 | 60 | if args.Me == vs.currview.Primary { 61 | // deal with the ACK from the primary 62 | // if the incoming Ping(X'): its X' is larger than our view bound X 63 | // e.g., in the test case #2: Ping(1) from ck1: then 0 < 1 64 | // received a Ping(1) from the primary ck1 => can later proceed to Viewnum = 2 65 | 66 | // viewBound increases means that some new view was ack-ed by the primary 67 | // and the vs realized that now 68 | if vs.viewBound < args.Viewnum { 69 | vs.viewBound = args.Viewnum 70 | } 71 | // Hint #6: the viewservice needs a way to detect that 72 | // a primary or backup has failed and re-started. 73 | // Therefore, we set that when a server re-starts after a crash, 74 | // it should send one or more Pings with an argument of zero to 75 | // inform the view service that it crashed. 76 | if args.Viewnum == 0 { 77 | vs.replace(args.Me) // force replace 78 | } 79 | } else if args.Me == vs.currview.Backup { 80 | // same as above. 81 | if args.Viewnum == 0 { // just got crashed and restarted 82 | vs.replace(args.Me) // force replace 83 | } 84 | } else { 85 | // an idle server comes in. put into the waitlist 86 | vs.assignRole(args.Me) 87 | } 88 | 89 | reply.View = *vs.currview 90 | return nil 91 | } 92 | 93 | // 94 | // server Get() RPC handler. 95 | // 96 | func (vs *ViewServer) Get(args *GetArgs, reply *GetReply) error { 97 | 98 | // Your code here. 99 | // the clerk can ask for the latest view from the view service 100 | // without doing Ping(). it uses Get() to fetch the latest view. 101 | vs.rwm.Lock() 102 | defer vs.rwm.Unlock() 103 | 104 | if vs.currview != nil { 105 | reply.View = *vs.currview 106 | } 107 | return nil 108 | } 109 | 110 | // edited by Adrian 111 | // backed up by idle server 112 | func (vs *ViewServer) backupByIdleSrv() { 113 | // only when idleServers exists will the backup be filled in 114 | if len(vs.idleServers) > 0 { 115 | // pick either one of them 116 | for key, _ := range vs.idleServers { 117 | vs.currview.Backup = key // backup will be set 118 | delete(vs.idleServers, key) // to keep the size of map 119 | break 120 | } 121 | } 122 | } 123 | 124 | // edited by Adrian 125 | func (vs *ViewServer) replace(k string) { 126 | // IMPORTANT! 127 | // the view service may NOT proceed from view X to view X + 1 128 | // if it has not received a Ping(X) from the primary of the view X 129 | 130 | // vs.viewBound is the latest view number of which the current primary 131 | // has already send back an ack to the view service 132 | // e.g., viewBound = 6 means that the current primary ck_i has sent a 133 | // Ping(6) to the view service successfully. the View {p=cki, b=_, n=6} is acked 134 | 135 | // if current view's Viewnum in view service = 6, then 6 + 1 > 6 136 | // so you can do the vs.replace(k) and the Viewnum of vs will be 7 137 | // however, if current view's Viewnum in the vs = 7, then 6 + 1 > 7 doesn't hold 138 | // so you CANNOT do the replacement even though many rounds of tick() may have passed 139 | 140 | // X = 6, X+1 = 7: 141 | // the vs CANNOT proceed from view 7 to view 8 as it has not received a Ping(7) 142 | // from the primary of the view X. the current viewBound is still 7 143 | // see testcase: `Viewserver waits for primary to ack view` 144 | // if the current viewnum is 7 145 | // 6 + 1 > 7? NO! so you cannot proceed. skip this function. 146 | if vs.viewBound + 1 > vs.currview.Viewnum { 147 | 148 | if k == vs.currview.Primary { 149 | // if k is the current primary -> remove this primary 150 | vs.currview.Primary = vs.currview.Backup 151 | vs.currview.Backup = "" 152 | vs.backupByIdleSrv() 153 | vs.currview.Viewnum += 1 154 | } else if k == vs.currview.Backup { 155 | // if k is the current backup -> remove this backup 156 | vs.currview.Backup = "" 157 | vs.backupByIdleSrv() 158 | vs.currview.Viewnum += 1 159 | } // if k is neither of both -> we don't do anything 160 | } else { 161 | //log.Printf("cannot change view: current view not yet acked by primary:\n" + 162 | // "viewBound=%v, vs.currview.Viewnum=%v", vs.viewBound, vs.currview.Viewnum) 163 | } 164 | } 165 | 166 | // edited by Adrian 167 | func (vs *ViewServer) assignRole(me string) { 168 | 169 | // ack rule: same idea as the `replace()` function 170 | if vs.viewBound + 1 > vs.currview.Viewnum { 171 | // the current ping is from an arbitrary server (not primary, nor backup) 172 | // new server has joined! what job should it do? primary? backup? or idle? 173 | if vs.currview.Primary == "" { 174 | vs.currview.Primary = me 175 | vs.currview.Viewnum += 1 176 | } else if vs.currview.Backup == "" { 177 | vs.currview.Backup = me 178 | vs.currview.Viewnum += 1 179 | } else { 180 | vs.idleServers[me] = true 181 | // do not add the viewnum 182 | } 183 | } else { 184 | //log.Printf("cannot change view: current view not yet acked by primary:\n " + 185 | //"viewBound=%v, vs.currview.Viewnum=%v", vs.viewBound, vs.currview.Viewnum) 186 | } 187 | } 188 | 189 | // 190 | // tick() is called once per PingInterval; it should notice 191 | // if servers have died or recovered, and change the view 192 | // accordingly. 193 | // 194 | func (vs *ViewServer) tick() { 195 | // Your code here. 196 | 197 | // Hint #4: your viewservice needs to make periodic decisions, 198 | // for example to promote the backup if the viewservice has missed 199 | // DeadPings pings from the primary. 200 | vs.rwm.Lock() 201 | defer vs.rwm.Unlock() 202 | for k, v := range vs.recentHeard { 203 | // if current time time.Now() > (recentHeard time + some timeout) 204 | // then we need to replace this server `k` 205 | if time.Now().After(v.Add(DeadPings * PingInterval)) { 206 | vs.replace(k) 207 | } 208 | } 209 | } 210 | 211 | // 212 | // tell the server to shut itself down. 213 | // for testing. 214 | // please don't change these two functions. 215 | // 216 | func (vs *ViewServer) Kill() { 217 | atomic.StoreInt32(&vs.dead, 1) 218 | vs.l.Close() 219 | } 220 | 221 | // 222 | // has this server been asked to shut down? 223 | // 224 | func (vs *ViewServer) isdead() bool { 225 | return atomic.LoadInt32(&vs.dead) != 0 226 | } 227 | 228 | // please don't change this function. 229 | func (vs *ViewServer) GetRPCCount() int32 { 230 | return atomic.LoadInt32(&vs.rpccount) 231 | } 232 | 233 | func StartServer(me string) *ViewServer { 234 | vs := new(ViewServer) 235 | vs.me = me 236 | // Your vs.* initializations here. 237 | vs.currview = nil 238 | vs.recentHeard = make(map[string]time.Time) 239 | vs.viewBound = 0 240 | vs.idleServers = make(map[string]bool) 241 | 242 | // tell net/rpc about our RPC server and handlers. 243 | rpcs := rpc.NewServer() 244 | rpcs.Register(vs) 245 | 246 | // prepare to receive connections from clients. 247 | // change "unix" to "tcp" to use over a network. 248 | os.Remove(vs.me) // only needed for "unix" 249 | l, e := net.Listen("unix", vs.me) 250 | if e != nil { 251 | log.Fatal("listen error: ", e) 252 | } 253 | vs.l = l 254 | 255 | // please don't change any of the following code, 256 | // or do anything to subvert it. 257 | 258 | // create a thread to accept RPC connections from clients. 259 | go func() { 260 | for vs.isdead() == false { 261 | conn, err := vs.l.Accept() 262 | if err == nil && vs.isdead() == false { 263 | atomic.AddInt32(&vs.rpccount, 1) 264 | go rpcs.ServeConn(conn) 265 | } else if err == nil { 266 | conn.Close() 267 | } 268 | if err != nil && vs.isdead() == false { 269 | fmt.Printf("ViewServer(%v) accept: %v\n", me, err.Error()) 270 | vs.Kill() 271 | } 272 | } 273 | }() 274 | 275 | // create a thread to call tick() periodically. 276 | go func() { 277 | for vs.isdead() == false { 278 | vs.tick() 279 | time.Sleep(PingInterval) 280 | } 281 | }() 282 | 283 | return vs 284 | } 285 | -------------------------------------------------------------------------------- /hw4/src/shardmaster/test_test.go: -------------------------------------------------------------------------------- 1 | package shardmaster 2 | 3 | import "testing" 4 | import "runtime" 5 | import "strconv" 6 | import "os" 7 | 8 | // import "time" 9 | import "fmt" 10 | import "math/rand" 11 | 12 | func port(tag string, host int) string { 13 | s := "/var/tmp/824-" 14 | s += strconv.Itoa(os.Getuid()) + "/" 15 | os.Mkdir(s, 0777) 16 | s += "sm-" 17 | s += strconv.Itoa(os.Getpid()) + "-" 18 | s += tag + "-" 19 | s += strconv.Itoa(host) 20 | return s 21 | } 22 | 23 | func cleanup(sma []*ShardMaster) { 24 | for i := 0; i < len(sma); i++ { 25 | if sma[i] != nil { 26 | sma[i].Kill() 27 | } 28 | } 29 | } 30 | 31 | // 32 | // maybe should take a cka[] and find the server with 33 | // the highest Num. 34 | // 35 | func check(t *testing.T, groups []int64, ck *Clerk) { 36 | c := ck.Query(-1) 37 | if len(c.Groups) != len(groups) { 38 | t.Fatalf("wanted %v groups, got %v", len(groups), len(c.Groups)) 39 | } 40 | 41 | // are the groups as expected? 42 | for _, g := range groups { 43 | _, ok := c.Groups[g] 44 | if ok != true { 45 | t.Fatalf("missing group %v", g) 46 | } 47 | } 48 | 49 | // any un-allocated shards? 50 | if len(groups) > 0 { 51 | for s, g := range c.Shards { 52 | _, ok := c.Groups[g] 53 | if ok == false { 54 | t.Fatalf("shard %v -> invalid group %v", s, g) 55 | } 56 | } 57 | } 58 | 59 | // more or less balanced sharding? 60 | counts := map[int64]int{} 61 | for _, g := range c.Shards { 62 | counts[g] += 1 63 | } 64 | min := 257 65 | max := 0 66 | for g, _ := range c.Groups { 67 | if counts[g] > max { 68 | max = counts[g] 69 | } 70 | if counts[g] < min { 71 | min = counts[g] 72 | } 73 | } 74 | if max > min+1 { 75 | t.Fatalf("max %v too much larger than min %v", max, min) 76 | } 77 | } 78 | 79 | func TestBasic(t *testing.T) { 80 | runtime.GOMAXPROCS(4) 81 | 82 | const nservers = 3 83 | var sma []*ShardMaster = make([]*ShardMaster, nservers) 84 | var kvh []string = make([]string, nservers) 85 | defer cleanup(sma) 86 | 87 | for i := 0; i < nservers; i++ { 88 | kvh[i] = port("basic", i) 89 | } 90 | for i := 0; i < nservers; i++ { 91 | sma[i] = StartServer(kvh, i) 92 | } 93 | 94 | ck := MakeClerk(kvh) 95 | var cka [nservers]*Clerk 96 | for i := 0; i < nservers; i++ { 97 | cka[i] = MakeClerk([]string{kvh[i]}) 98 | } 99 | 100 | fmt.Printf("Test: Basic leave/join ...\n") 101 | 102 | cfa := make([]Config, 6) 103 | cfa[0] = ck.Query(-1) 104 | 105 | check(t, []int64{}, ck) 106 | 107 | var gid1 int64 = 1 108 | ck.Join(gid1, []string{"x", "y", "z"}) 109 | check(t, []int64{gid1}, ck) 110 | cfa[1] = ck.Query(-1) 111 | 112 | var gid2 int64 = 2 113 | ck.Join(gid2, []string{"a", "b", "c"}) 114 | check(t, []int64{gid1, gid2}, ck) 115 | cfa[2] = ck.Query(-1) 116 | 117 | ck.Join(gid2, []string{"a", "b", "c"}) 118 | check(t, []int64{gid1, gid2}, ck) 119 | cfa[3] = ck.Query(-1) 120 | 121 | cfx := ck.Query(-1) 122 | sa1 := cfx.Groups[gid1] 123 | if len(sa1) != 3 || sa1[0] != "x" || sa1[1] != "y" || sa1[2] != "z" { 124 | t.Fatalf("wrong servers for gid %v: %v\n", gid1, sa1) 125 | } 126 | sa2 := cfx.Groups[gid2] 127 | if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" { 128 | t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2) 129 | } 130 | 131 | ck.Leave(gid1) 132 | check(t, []int64{gid2}, ck) 133 | cfa[4] = ck.Query(-1) 134 | 135 | ck.Leave(gid1) 136 | check(t, []int64{gid2}, ck) 137 | cfa[5] = ck.Query(-1) 138 | 139 | fmt.Printf(" ... Passed\n") 140 | 141 | fmt.Printf("Test: Historical queries ...\n") 142 | 143 | for i := 0; i < len(cfa); i++ { 144 | c := ck.Query(cfa[i].Num) 145 | if c.Num != cfa[i].Num { 146 | t.Fatalf("historical Num wrong") 147 | } 148 | if c.Shards != cfa[i].Shards { 149 | t.Fatalf("historical Shards wrong") 150 | } 151 | if len(c.Groups) != len(cfa[i].Groups) { 152 | t.Fatalf("number of historical Groups is wrong") 153 | } 154 | for gid, sa := range c.Groups { 155 | sa1, ok := cfa[i].Groups[gid] 156 | if ok == false || len(sa1) != len(sa) { 157 | t.Fatalf("historical len(Groups) wrong") 158 | } 159 | if ok && len(sa1) == len(sa) { 160 | for j := 0; j < len(sa); j++ { 161 | if sa[j] != sa1[j] { 162 | t.Fatalf("historical Groups wrong") 163 | } 164 | } 165 | } 166 | } 167 | } 168 | 169 | fmt.Printf(" ... Passed\n") 170 | 171 | fmt.Printf("Test: Move ...\n") 172 | { 173 | var gid3 int64 = 503 174 | ck.Join(gid3, []string{"3a", "3b", "3c"}) 175 | var gid4 int64 = 504 176 | ck.Join(gid4, []string{"4a", "4b", "4c"}) 177 | for i := 0; i < NShards; i++ { 178 | cf := ck.Query(-1) 179 | if i < NShards/2 { 180 | ck.Move(i, gid3) 181 | if cf.Shards[i] != gid3 { 182 | cf1 := ck.Query(-1) 183 | if cf1.Num <= cf.Num { 184 | t.Fatalf("Move should increase Config.Num") 185 | } 186 | } 187 | } else { 188 | ck.Move(i, gid4) 189 | if cf.Shards[i] != gid4 { 190 | cf1 := ck.Query(-1) 191 | if cf1.Num <= cf.Num { 192 | t.Fatalf("Move should increase Config.Num") 193 | } 194 | } 195 | } 196 | } 197 | cf2 := ck.Query(-1) 198 | for i := 0; i < NShards; i++ { 199 | if i < NShards/2 { 200 | if cf2.Shards[i] != gid3 { 201 | t.Fatalf("expected shard %v on gid %v actually %v", 202 | i, gid3, cf2.Shards[i]) 203 | } 204 | } else { 205 | if cf2.Shards[i] != gid4 { 206 | t.Fatalf("expected shard %v on gid %v actually %v", 207 | i, gid4, cf2.Shards[i]) 208 | } 209 | } 210 | } 211 | ck.Leave(gid3) 212 | ck.Leave(gid4) 213 | } 214 | fmt.Printf(" ... Passed\n") 215 | 216 | fmt.Printf("Test: Concurrent leave/join ...\n") 217 | 218 | const npara = 10 219 | gids := make([]int64, npara) 220 | var ca [npara]chan bool 221 | for xi := 0; xi < npara; xi++ { 222 | gids[xi] = int64(xi + 1) 223 | ca[xi] = make(chan bool) 224 | go func(i int) { 225 | defer func() { ca[i] <- true }() 226 | var gid int64 = gids[i] 227 | cka[(i+0)%nservers].Join(gid + 1000, []string{"a", "b", "c"}) 228 | cka[(i+0)%nservers].Join(gid, []string{"a", "b", "c"}) 229 | cka[(i+1)%nservers].Leave(gid + 1000) 230 | }(xi) 231 | } 232 | for i := 0; i < npara; i++ { 233 | <-ca[i] 234 | } 235 | check(t, gids, ck) 236 | 237 | fmt.Printf(" ... Passed\n") 238 | 239 | fmt.Printf("Test: Min advances after joins ...\n") 240 | 241 | for i, sm := range sma { 242 | if sm.px.Min() <= 0 { 243 | t.Fatalf("Min() for %s did not advance", kvh[i]) 244 | } 245 | } 246 | 247 | fmt.Printf(" ... Passed\n") 248 | 249 | fmt.Printf("Test: Minimal transfers after joins ...\n") 250 | 251 | c1 := ck.Query(-1) 252 | for i := 0; i < 5; i++ { 253 | ck.Join(int64(npara+1+i), []string{"a", "b", "c"}) 254 | } 255 | c2 := ck.Query(-1) 256 | for i := int64(1); i <= npara; i++ { 257 | for j := 0; j < len(c1.Shards); j++ { 258 | if c2.Shards[j] == i { 259 | if c1.Shards[j] != i { 260 | t.Fatalf("non-minimal transfer after Join()s") 261 | } 262 | } 263 | } 264 | } 265 | 266 | fmt.Printf(" ... Passed\n") 267 | 268 | fmt.Printf("Test: Minimal transfers after leaves ...\n") 269 | 270 | for i := 0; i < 5; i++ { 271 | ck.Leave(int64(npara + 1 + i)) 272 | } 273 | c3 := ck.Query(-1) 274 | for i := int64(1); i <= npara; i++ { 275 | for j := 0; j < len(c1.Shards); j++ { 276 | if c2.Shards[j] == i { 277 | if c3.Shards[j] != i { 278 | t.Fatalf("non-minimal transfer after Leave()s") 279 | } 280 | } 281 | } 282 | } 283 | 284 | fmt.Printf(" ... Passed\n") 285 | } 286 | 287 | func TestUnreliable(t *testing.T) { 288 | runtime.GOMAXPROCS(4) 289 | 290 | const nservers = 3 291 | var sma []*ShardMaster = make([]*ShardMaster, nservers) 292 | var kvh []string = make([]string, nservers) 293 | defer cleanup(sma) 294 | 295 | for i := 0; i < nservers; i++ { 296 | kvh[i] = port("unrel", i) 297 | } 298 | for i := 0; i < nservers; i++ { 299 | sma[i] = StartServer(kvh, i) 300 | // don't turn on unreliable because the assignment 301 | // doesn't require the shardmaster to detect duplicate 302 | // client requests. 303 | // sma[i].setunreliable(true) 304 | } 305 | 306 | ck := MakeClerk(kvh) 307 | var cka [nservers]*Clerk 308 | for i := 0; i < nservers; i++ { 309 | cka[i] = MakeClerk([]string{kvh[i]}) 310 | } 311 | 312 | fmt.Printf("Test: Concurrent leave/join, failure ...\n") 313 | 314 | const npara = 20 315 | gids := make([]int64, npara) 316 | var ca [npara]chan bool 317 | for xi := 0; xi < npara; xi++ { 318 | gids[xi] = int64(xi + 1) 319 | ca[xi] = make(chan bool) 320 | go func(i int) { 321 | defer func() { ca[i] <- true }() 322 | var gid int64 = gids[i] 323 | cka[1+(rand.Int()%2)].Join(gid+1000, []string{"a", "b", "c"}) 324 | cka[1+(rand.Int()%2)].Join(gid, []string{"a", "b", "c"}) 325 | cka[1+(rand.Int()%2)].Leave(gid + 1000) 326 | // server 0 won't be able to hear any RPCs. 327 | os.Remove(kvh[0]) 328 | }(xi) 329 | } 330 | for i := 0; i < npara; i++ { 331 | <-ca[i] 332 | } 333 | check(t, gids, ck) 334 | 335 | fmt.Printf(" ... Passed\n") 336 | } 337 | 338 | func TestFreshQuery(t *testing.T) { 339 | runtime.GOMAXPROCS(4) 340 | 341 | const nservers = 3 342 | var sma []*ShardMaster = make([]*ShardMaster, nservers) 343 | var kvh []string = make([]string, nservers) 344 | defer cleanup(sma) 345 | 346 | for i := 0; i < nservers; i++ { 347 | kvh[i] = port("fresh", i) 348 | } 349 | for i := 0; i < nservers; i++ { 350 | sma[i] = StartServer(kvh, i) 351 | } 352 | 353 | ck1 := MakeClerk([]string{kvh[1]}) 354 | 355 | fmt.Printf("Test: Query() returns latest configuration ...\n") 356 | 357 | portx := kvh[0] + strconv.Itoa(rand.Int()) 358 | if os.Rename(kvh[0], portx) != nil { 359 | t.Fatalf("os.Rename() failed") 360 | } 361 | ck0 := MakeClerk([]string{portx}) 362 | 363 | ck1.Join(1001, []string{"a", "b", "c"}) 364 | c := ck0.Query(-1) 365 | _, ok := c.Groups[1001] 366 | if ok == false { 367 | t.Fatalf("Query(-1) produced a stale configuration") 368 | } 369 | 370 | fmt.Printf(" ... Passed\n") 371 | os.Remove(portx) 372 | } 373 | -------------------------------------------------------------------------------- /hw4/src/shardmaster/server.go: -------------------------------------------------------------------------------- 1 | package shardmaster 2 | 3 | import ( 4 | "errors" 5 | "net" 6 | "reflect" 7 | "time" 8 | ) 9 | import "fmt" 10 | import "net/rpc" 11 | import "log" 12 | 13 | import "paxos" 14 | import "sync" 15 | import "sync/atomic" 16 | import "os" 17 | import "syscall" 18 | import "encoding/gob" 19 | import "math/rand" 20 | 21 | type ShardMaster struct { 22 | mu sync.Mutex 23 | l net.Listener 24 | me int 25 | dead int32 // for testing 26 | unreliable int32 // for testing 27 | px *paxos.Paxos 28 | 29 | configs []Config // indexed by config num 30 | lastApply int 31 | } 32 | 33 | 34 | const ( 35 | Join = "Join" 36 | Leave = "Leave" 37 | Move = "Move" 38 | Query = "Query" 39 | ) 40 | 41 | type Op struct { 42 | // Your data here. 43 | Operation string 44 | Args interface{} 45 | } 46 | 47 | func (sm *ShardMaster) Rebalance(config *Config, deleteGID int64) { 48 | nGroup := len(config.Groups) 49 | limit := NShards / nGroup 50 | 51 | for i := 0; i < NShards; i++ { 52 | if config.Shards[i] == deleteGID { 53 | // let's say we want to delete gid = 101 54 | // and Shards is now [101, 101, 100, 101, 102, ...] 55 | // then it becomes [0, 0, 100, 0, 102, ...] 56 | config.Shards[i] = 0 57 | } 58 | } 59 | gidCounts := make(map[int64]int) 60 | for i := 0; i < NShards; i++ { 61 | // occurrences of gids in these 10 shards 62 | 63 | // ps. the DELETED gid will also has a gidCounts 64 | // and our goal is just making it decrease to 0 (all distributed) 65 | gidCounts[config.Shards[i]] += 1 66 | } 67 | 68 | for i := 0; i < NShards; i++ { 69 | gid := config.Shards[i] 70 | // if `i`th shard's group is now deleted 71 | // OR if `i`th shard's group need to manage too many shards 72 | // -> find someone to replace it and to take care of `i`th shard 73 | // how do we find who is the best choice? 74 | if gid == 0 || gidCounts[gid] > limit { 75 | 76 | // bestGid is the best replacement gid that we could find now 77 | bestGid := int64(-1) // init value 78 | // minGidCount is the # of shards that the group `bestGid` 79 | // is taking care of. 80 | // e.g., [0, 0, 0, 101, 101, 102, 101, 0, 102, 101] 81 | // then bestGid = 102 as its minGidCount = 2 82 | // in contrast, gid 101 is not the best as it is already 83 | // taking care of 4 shards 84 | minGidCount := -1 // init value 85 | 86 | // enumerate all existing groups 87 | for currGid, _ := range config.Groups { 88 | // if init OR 89 | // group `currGid` is taking care of less # of shards 90 | // compared to minGidCount 91 | // update our best choice Gid (the one will MINIMUM count) 92 | if bestGid == -1 || gidCounts[currGid] < minGidCount { 93 | bestGid = currGid 94 | minGidCount = gidCounts[currGid] 95 | } 96 | } 97 | // if the current gid on shard `i` is deleted 98 | // we MUST need to give it a new gid 99 | // and so new the deleted group's gidCount will -= 1 100 | // and the replacement group will += 1 101 | if gid == 0 { 102 | gidCounts[gid] -= 1 103 | gidCounts[bestGid] += 1 104 | config.Shards[i] = bestGid 105 | } else { 106 | // if the current gid is not the deleted one 107 | // i.e., it is just `gid` group taking care of too many shards 108 | // then we should reduce its burden. But NOT all the time. When? 109 | 110 | // only if our replacement could be meaningful 111 | // e.g. [100, 100, 100, 100, 101, 101, 101, 102, 102, 102] 112 | // for gid = 100, it has now gidCount = 4 113 | // and for gid = 101, it has now gidCount = 3 114 | // then if we make gidCount[100] -= 1 and gidCount[101] += 1 115 | // they will be 3 and 4 respectively...it does not help at all 116 | // e.g. [100, 100, 100, 101, 101, 101, 101, 102, 102, 102] 117 | // so we will prefer doing nothing 118 | if gidCounts[gid] - gidCounts[bestGid] > 1 { 119 | gidCounts[gid] -= 1 120 | gidCounts[bestGid] += 1 121 | config.Shards[i] = bestGid 122 | } else { 123 | // do nothing 124 | } 125 | } 126 | } 127 | } 128 | } 129 | 130 | func (sm *ShardMaster) Apply(op Op) { 131 | lastConfig := sm.configs[sm.lastApply] 132 | var newConfig Config 133 | newConfig.Num = lastConfig.Num 134 | newConfig.Groups = make(map[int64][]string) 135 | for k, v := range lastConfig.Groups { 136 | newConfig.Groups[k] = v 137 | } 138 | for i := 0; i < NShards; i++ { 139 | newConfig.Shards[i] = lastConfig.Shards[i] 140 | } 141 | 142 | if op.Operation == Join { 143 | joinArgs := op.Args.(JoinArgs) 144 | newConfig.Groups[joinArgs.GID] = joinArgs.Servers 145 | newConfig.Num += 1 146 | sm.Rebalance(&newConfig, 0) 147 | } else if op.Operation == Leave { 148 | leaveArgs := op.Args.(LeaveArgs) 149 | delete(newConfig.Groups, leaveArgs.GID) 150 | newConfig.Num += 1 151 | sm.Rebalance(&newConfig, leaveArgs.GID) 152 | } else if op.Operation == Move { 153 | moveArgs := op.Args.(MoveArgs) 154 | newConfig.Shards[moveArgs.Shard] = moveArgs.GID 155 | newConfig.Num += 1 156 | } else if op.Operation == Query { 157 | // do nothin 158 | } 159 | 160 | sm.configs = append(sm.configs, newConfig) 161 | } 162 | 163 | func (sm *ShardMaster) Wait(seq int) (Op, error) { 164 | sleepTime := 10 * time.Millisecond 165 | for iters := 0; iters < 15; iters ++ { 166 | decided, op := sm.px.Status(seq) 167 | if decided == paxos.Decided { 168 | return op.(Op), nil 169 | } 170 | time.Sleep(sleepTime) 171 | if sleepTime < 10 * time.Second { 172 | sleepTime *= 2 173 | } 174 | } 175 | return Op{}, errors.New("ShardMaster: Wait for too long") 176 | } 177 | 178 | func (sm *ShardMaster) Propose(xop Op) error { 179 | for { 180 | sm.px.Start(sm.lastApply + 1, xop) 181 | op, err := sm.Wait(sm.lastApply + 1) 182 | if err != nil { 183 | return err 184 | } 185 | sm.Apply(op) 186 | sm.lastApply += 1 187 | if reflect.DeepEqual(op, xop) { 188 | break 189 | } 190 | sm.px.Done(sm.lastApply) 191 | } 192 | sm.px.Done(sm.lastApply) 193 | return nil 194 | } 195 | 196 | func (sm *ShardMaster) Join(args *JoinArgs, reply *JoinReply) error { 197 | // Your code here. 198 | sm.mu.Lock() 199 | defer sm.mu.Unlock() 200 | op := Op{Args: *args, Operation: Join} 201 | err := sm.Propose(op) 202 | if err != nil { 203 | return err 204 | } 205 | return nil 206 | } 207 | 208 | func (sm *ShardMaster) Leave(args *LeaveArgs, reply *LeaveReply) error { 209 | sm.mu.Lock() 210 | defer sm.mu.Unlock() 211 | op := Op{Args: *args, Operation: Leave} 212 | err := sm.Propose(op) 213 | if err != nil { 214 | return err 215 | } 216 | return nil 217 | } 218 | 219 | func (sm *ShardMaster) Move(args *MoveArgs, reply *MoveReply) error { 220 | sm.mu.Lock() 221 | defer sm.mu.Unlock() 222 | op := Op{Args: *args, Operation: Move} 223 | err := sm.Propose(op) 224 | if err != nil { 225 | return err 226 | } 227 | return nil 228 | } 229 | 230 | func (sm *ShardMaster) Query(args *QueryArgs, reply *QueryReply) error { 231 | // Your code here. 232 | sm.mu.Lock() 233 | defer sm.mu.Unlock() 234 | 235 | op := Op{Args: *args, Operation: Query} 236 | err := sm.Propose(op) 237 | if err != nil { 238 | return err 239 | } 240 | 241 | // config.Num is not necessarily equal to its index in sm.configs 242 | // e.g., sm.configs[1203].Num -> this value could be != 1203 243 | // e.g., sm.configs[6].Num = 3, sm.configs[16].Num = 5 244 | // why? since that WE ONLY add Num when Join/Leave/Move 245 | // but we don't add Num when doing Query 246 | // however, sm.configs will be appended even if it was Query 247 | // thus, len of configs grows FASTER than Num 248 | for i := 0; i < sm.lastApply; i++ { 249 | if sm.configs[i].Num == args.Num { 250 | reply.Config = sm.configs[i] 251 | //log.Printf("i=%v, num=%v", i, args.Num) 252 | return nil 253 | } 254 | } 255 | // args.Num == -1 OR args.Num is larger than any other Num in configs 256 | reply.Config = sm.configs[sm.lastApply] 257 | return nil 258 | } 259 | 260 | // please don't change these two functions. 261 | func (sm *ShardMaster) Kill() { 262 | atomic.StoreInt32(&sm.dead, 1) 263 | sm.l.Close() 264 | sm.px.Kill() 265 | } 266 | 267 | // call this to find out if the server is dead. 268 | func (sm *ShardMaster) isdead() bool { 269 | return atomic.LoadInt32(&sm.dead) != 0 270 | } 271 | 272 | // please do not change these two functions. 273 | func (sm *ShardMaster) setunreliable(what bool) { 274 | if what { 275 | atomic.StoreInt32(&sm.unreliable, 1) 276 | } else { 277 | atomic.StoreInt32(&sm.unreliable, 0) 278 | } 279 | } 280 | 281 | func (sm *ShardMaster) isunreliable() bool { 282 | return atomic.LoadInt32(&sm.unreliable) != 0 283 | } 284 | 285 | // 286 | // servers[] contains the ports of the set of 287 | // servers that will cooperate via Paxos to 288 | // form the fault-tolerant shardmaster service. 289 | // me is the index of the current server in servers[]. 290 | // 291 | func StartServer(servers []string, me int) *ShardMaster { 292 | gob.Register(Op{}) 293 | gob.Register(JoinArgs{}) 294 | gob.Register(LeaveArgs{}) 295 | gob.Register(MoveArgs{}) 296 | gob.Register(QueryArgs{}) 297 | 298 | sm := new(ShardMaster) 299 | sm.me = me 300 | 301 | sm.configs = make([]Config, 1) 302 | sm.configs[0].Groups = map[int64][]string{} 303 | 304 | rpcs := rpc.NewServer() 305 | 306 | gob.Register(Op{}) 307 | rpcs.Register(sm) 308 | sm.px = paxos.Make(servers, me, rpcs) 309 | 310 | os.Remove(servers[me]) 311 | l, e := net.Listen("unix", servers[me]) 312 | if e != nil { 313 | log.Fatal("listen error: ", e) 314 | } 315 | sm.l = l 316 | 317 | // please do not change any of the following code, 318 | // or do anything to subvert it. 319 | 320 | go func() { 321 | for sm.isdead() == false { 322 | conn, err := sm.l.Accept() 323 | if err == nil && sm.isdead() == false { 324 | if sm.isunreliable() && (rand.Int63()%1000) < 100 { 325 | // discard the request. 326 | conn.Close() 327 | } else if sm.isunreliable() && (rand.Int63()%1000) < 200 { 328 | // process the request but force discard of reply. 329 | c1 := conn.(*net.UnixConn) 330 | f, _ := c1.File() 331 | err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR) 332 | if err != nil { 333 | fmt.Printf("shutdown: %v\n", err) 334 | } 335 | go rpcs.ServeConn(conn) 336 | } else { 337 | go rpcs.ServeConn(conn) 338 | } 339 | } else if err == nil { 340 | conn.Close() 341 | } 342 | if err != nil && sm.isdead() == false { 343 | fmt.Printf("ShardMaster(%v) accept: %v\n", me, err.Error()) 344 | sm.Kill() 345 | } 346 | } 347 | }() 348 | 349 | return sm 350 | } -------------------------------------------------------------------------------- /hw4/src/shardkv/test_test.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | import ( 4 | "testing" 5 | ) 6 | import "shardmaster" 7 | import "runtime" 8 | import "strconv" 9 | import "os" 10 | import "time" 11 | import "fmt" 12 | import "sync" 13 | import "sync/atomic" 14 | import "math/rand" 15 | 16 | // information about the servers of one replica group. 17 | type tGroup struct { 18 | gid int64 19 | servers []*ShardKV 20 | ports []string 21 | } 22 | 23 | // information about all the servers of a k/v cluster. 24 | type tCluster struct { 25 | t *testing.T 26 | masters []*shardmaster.ShardMaster 27 | mck *shardmaster.Clerk 28 | masterports []string 29 | groups []*tGroup 30 | } 31 | 32 | func port(tag string, host int) string { 33 | s := "/var/tmp/824-" 34 | s += strconv.Itoa(os.Getuid()) + "/" 35 | os.Mkdir(s, 0777) 36 | s += "skv-" 37 | s += strconv.Itoa(os.Getpid()) + "-" 38 | s += tag + "-" 39 | s += strconv.Itoa(host) 40 | return s 41 | } 42 | 43 | // 44 | // start a k/v replica server thread. 45 | // 46 | func (tc *tCluster) start1(gi int, si int, unreliable bool) { 47 | s := StartServer(tc.groups[gi].gid, tc.masterports, tc.groups[gi].ports, si) 48 | tc.groups[gi].servers[si] = s 49 | s.Setunreliable(unreliable) 50 | } 51 | 52 | func (tc *tCluster) cleanup() { 53 | for gi := 0; gi < len(tc.groups); gi++ { 54 | g := tc.groups[gi] 55 | for si := 0; si < len(g.servers); si++ { 56 | if g.servers[si] != nil { 57 | g.servers[si].kill() 58 | } 59 | } 60 | } 61 | 62 | for i := 0; i < len(tc.masters); i++ { 63 | if tc.masters[i] != nil { 64 | tc.masters[i].Kill() 65 | } 66 | } 67 | } 68 | 69 | func (tc *tCluster) shardclerk() *shardmaster.Clerk { 70 | return shardmaster.MakeClerk(tc.masterports) 71 | } 72 | 73 | func (tc *tCluster) clerk() *Clerk { 74 | return MakeClerk(tc.masterports) 75 | } 76 | 77 | func (tc *tCluster) join(gi int) { 78 | tc.mck.Join(tc.groups[gi].gid, tc.groups[gi].ports) 79 | } 80 | 81 | func (tc *tCluster) leave(gi int) { 82 | tc.mck.Leave(tc.groups[gi].gid) 83 | } 84 | 85 | func setup(t *testing.T, tag string, unreliable bool) *tCluster { 86 | runtime.GOMAXPROCS(4) 87 | 88 | const nmasters = 3 89 | const ngroups = 3 // replica groups 90 | const nreplicas = 3 // servers per group 91 | 92 | tc := &tCluster{} 93 | tc.t = t 94 | tc.masters = make([]*shardmaster.ShardMaster, nmasters) 95 | tc.masterports = make([]string, nmasters) 96 | 97 | for i := 0; i < nmasters; i++ { 98 | tc.masterports[i] = port(tag+"m", i) 99 | } 100 | for i := 0; i < nmasters; i++ { 101 | tc.masters[i] = shardmaster.StartServer(tc.masterports, i) 102 | } 103 | tc.mck = tc.shardclerk() 104 | 105 | tc.groups = make([]*tGroup, ngroups) 106 | 107 | for i := 0; i < ngroups; i++ { 108 | tc.groups[i] = &tGroup{} 109 | tc.groups[i].gid = int64(i + 100) 110 | tc.groups[i].servers = make([]*ShardKV, nreplicas) 111 | tc.groups[i].ports = make([]string, nreplicas) 112 | for j := 0; j < nreplicas; j++ { 113 | tc.groups[i].ports[j] = port(tag+"s", (i*nreplicas)+j) 114 | } 115 | for j := 0; j < nreplicas; j++ { 116 | tc.start1(i, j, unreliable) 117 | } 118 | } 119 | 120 | // return smh, gids, ha, sa, clean 121 | return tc 122 | } 123 | 124 | func TestBasic(t *testing.T) { 125 | tc := setup(t, "basic", false) 126 | defer tc.cleanup() 127 | 128 | fmt.Printf("Test: Basic Join/Leave ...\n") 129 | time.Sleep(time.Second) 130 | 131 | tc.join(0) 132 | 133 | ck := tc.clerk() 134 | 135 | ck.Put("a", "x") 136 | ck.Append("a", "b") 137 | if ck.Get("a") != "xb" { 138 | t.Fatalf("Get got wrong value") 139 | } 140 | 141 | keys := make([]string, 10) 142 | vals := make([]string, len(keys)) 143 | for i := 0; i < len(keys); i++ { 144 | keys[i] = strconv.Itoa(rand.Int()) 145 | vals[i] = strconv.Itoa(rand.Int()) 146 | ck.Put(keys[i], vals[i]) 147 | } 148 | // are keys still there after joins? 149 | for g := 1; g < len(tc.groups); g++ { 150 | tc.join(g) 151 | time.Sleep(1 * time.Second) 152 | for i := 0; i < len(keys); i++ { 153 | v := ck.Get(keys[i]) 154 | if v != vals[i] { 155 | t.Fatalf("joining; wrong value; g=%v k=%v wanted=%v got=%v", 156 | g, keys[i], vals[i], v) 157 | } 158 | vals[i] = strconv.Itoa(rand.Int()) 159 | ck.Put(keys[i], vals[i]) 160 | } 161 | } 162 | //log.Printf("done first part") 163 | // are keys still there after leaves? 164 | for g := 0; g < len(tc.groups)-1; g++ { 165 | tc.leave(g) 166 | time.Sleep(1 * time.Second) 167 | for i := 0; i < len(keys); i++ { 168 | v := ck.Get(keys[i]) 169 | if v != vals[i] { 170 | t.Fatalf("leaving; wrong value; g=%v k=%v wanted=%v got=%v", 171 | g, keys[i], vals[i], v) 172 | } 173 | vals[i] = strconv.Itoa(rand.Int()) 174 | ck.Put(keys[i], vals[i]) 175 | } 176 | } 177 | 178 | fmt.Printf(" ... Passed\n") 179 | } 180 | 181 | func TestMove(t *testing.T) { 182 | tc := setup(t, "move", false) 183 | defer tc.cleanup() 184 | 185 | fmt.Printf("Test: Shards really move ...\n") 186 | 187 | tc.join(0) 188 | 189 | ck := tc.clerk() 190 | 191 | // insert one key per shard 192 | for i := 0; i < shardmaster.NShards; i++ { 193 | ck.Put(string('0'+i), string('0'+i)) 194 | } 195 | 196 | // add group 1. 197 | tc.join(1) 198 | time.Sleep(5 * time.Second) 199 | 200 | // check that keys are still there. 201 | for i := 0; i < shardmaster.NShards; i++ { 202 | if ck.Get(string('0'+i)) != string('0'+i) { 203 | t.Fatalf("missing key/value") 204 | } 205 | } 206 | 207 | // remove sockets from group 0. 208 | for _, port := range tc.groups[0].ports { 209 | os.Remove(port) 210 | } 211 | 212 | count := int32(0) 213 | var mu sync.Mutex 214 | for i := 0; i < shardmaster.NShards; i++ { 215 | go func(me int) { 216 | myck := tc.clerk() 217 | v := myck.Get(string('0' + me)) 218 | if v == string('0'+me) { 219 | mu.Lock() 220 | atomic.AddInt32(&count, 1) 221 | mu.Unlock() 222 | } else { 223 | t.Fatalf("Get(%v) yielded %v\n", me, v) 224 | } 225 | }(i) 226 | } 227 | 228 | time.Sleep(10 * time.Second) 229 | 230 | ccc := atomic.LoadInt32(&count) 231 | if ccc > shardmaster.NShards/3 && ccc < 2*(shardmaster.NShards/3) { 232 | fmt.Printf(" ... Passed\n") 233 | } else { 234 | t.Fatalf("%v keys worked after killing 1/2 of groups; wanted %v", 235 | ccc, shardmaster.NShards/2) 236 | } 237 | } 238 | 239 | func TestLimp(t *testing.T) { 240 | tc := setup(t, "limp", false) 241 | defer tc.cleanup() 242 | 243 | fmt.Printf("Test: Reconfiguration with some dead replicas ...\n") 244 | 245 | tc.join(0) 246 | 247 | ck := tc.clerk() 248 | 249 | ck.Put("a", "b") 250 | if ck.Get("a") != "b" { 251 | t.Fatalf("got wrong value") 252 | } 253 | 254 | // kill one server from each replica group. 255 | for gi := 0; gi < len(tc.groups); gi++ { 256 | sa := tc.groups[gi].servers 257 | ns := len(sa) 258 | sa[rand.Int()%ns].kill() 259 | } 260 | 261 | keys := make([]string, 10) 262 | vals := make([]string, len(keys)) 263 | for i := 0; i < len(keys); i++ { 264 | keys[i] = strconv.Itoa(rand.Int()) 265 | vals[i] = strconv.Itoa(rand.Int()) 266 | ck.Put(keys[i], vals[i]) 267 | } 268 | 269 | // are keys still there after joins? 270 | for g := 1; g < len(tc.groups); g++ { 271 | tc.join(g) 272 | time.Sleep(1 * time.Second) 273 | for i := 0; i < len(keys); i++ { 274 | v := ck.Get(keys[i]) 275 | if v != vals[i] { 276 | t.Fatalf("joining; wrong value; g=%v k=%v wanted=%v got=%v", 277 | g, keys[i], vals[i], v) 278 | } 279 | vals[i] = strconv.Itoa(rand.Int()) 280 | ck.Put(keys[i], vals[i]) 281 | } 282 | } 283 | 284 | // are keys still there after leaves? 285 | for gi := 0; gi < len(tc.groups)-1; gi++ { 286 | tc.leave(gi) 287 | time.Sleep(2 * time.Second) 288 | g := tc.groups[gi] 289 | for i := 0; i < len(g.servers); i++ { 290 | g.servers[i].kill() 291 | } 292 | for i := 0; i < len(keys); i++ { 293 | v := ck.Get(keys[i]) 294 | if v != vals[i] { 295 | t.Fatalf("leaving; wrong value; g=%v k=%v wanted=%v got=%v", 296 | g, keys[i], vals[i], v) 297 | } 298 | vals[i] = strconv.Itoa(rand.Int()) 299 | ck.Put(keys[i], vals[i]) 300 | } 301 | } 302 | 303 | fmt.Printf(" ... Passed\n") 304 | } 305 | 306 | func doConcurrent(t *testing.T, unreliable bool) { 307 | tc := setup(t, "concurrent-"+strconv.FormatBool(unreliable), unreliable) 308 | defer tc.cleanup() 309 | 310 | for i := 0; i < len(tc.groups); i++ { 311 | tc.join(i) 312 | } 313 | 314 | const npara = 11 315 | var ca [npara]chan bool 316 | for i := 0; i < npara; i++ { 317 | ca[i] = make(chan bool) 318 | go func(me int) { 319 | ok := true 320 | defer func() { ca[me] <- ok }() 321 | ck := tc.clerk() 322 | mymck := tc.shardclerk() 323 | key := strconv.Itoa(me) 324 | last := "" 325 | for iters := 0; iters < 3; iters++ { 326 | //for iters := 0; iters < 1; iters++ { 327 | nv := strconv.Itoa(rand.Int()) 328 | //nv := strconv.Itoa(me) 329 | //log.Printf("[test1] Append key: %v, nv %v", key, nv) 330 | ck.Append(key, nv) 331 | //log.Printf("[test1] done Append key: %v, nv %v", key, nv) 332 | 333 | last = last + nv 334 | v := ck.Get(key) 335 | 336 | if v != last { 337 | ok = false 338 | t.Fatalf("Get(%v) expected %v got %v\n", key, last, v) 339 | } 340 | 341 | gi := rand.Int() % len(tc.groups) 342 | gid := tc.groups[gi].gid 343 | which := rand.Int()%shardmaster.NShards 344 | //log.Printf("[test2] Move which %v, gid %v", which, gid) 345 | mymck.Move(which, gid) 346 | //log.Printf("[test2] done Move which %v, gid %v", which, gid) 347 | 348 | time.Sleep(time.Duration(rand.Int()%30) * time.Millisecond) 349 | } 350 | }(i) 351 | } 352 | 353 | for i := 0; i < npara; i++ { 354 | x := <-ca[i] 355 | if x == false { 356 | t.Fatalf("something is wrong") 357 | } 358 | } 359 | } 360 | 361 | //The two Concurrent test cases above test several clients sending Append and Get 362 | //operations to different shard groups concurrently while also periodically asking 363 | //the shard master to move shards between groups. To pass these test cases you must 364 | //design a correct protocol for handling concurrent operations in the presence of configuration changes. 365 | func TestConcurrent(t *testing.T) { 366 | fmt.Printf("Test: Concurrent Put/Get/Move ...\n") 367 | doConcurrent(t, false) 368 | fmt.Printf(" ... Passed\n") 369 | } 370 | //The second concurrent test case is the same as the first one, though the test code 371 | //drops requests and responses randomly. 372 | func TestConcurrentUnreliable(t *testing.T) { 373 | fmt.Printf("Test: Concurrent Put/Get/Move (unreliable) ...\n") 374 | doConcurrent(t, true) 375 | fmt.Printf(" ... Passed\n") 376 | } 377 | -------------------------------------------------------------------------------- /hw1/src/mapreduce/mapreduce.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | ) 7 | import "os" 8 | import "log" 9 | import "strconv" 10 | import "encoding/json" 11 | import "sort" 12 | import "container/list" 13 | import "net/rpc" 14 | import "net" 15 | import "bufio" 16 | import "hash/fnv" 17 | 18 | // import "os/exec" 19 | 20 | // A simple mapreduce library with a sequential implementation. 21 | // 22 | // The application provides an input file f, a Map and Reduce function, 23 | // and the number of nMap and nReduce tasks. 24 | // 25 | // Split() splits the file f in nMap input files: 26 | // f-0, f-1, ..., f- 27 | // one for each Map job. 28 | // 29 | // DoMap() runs Map on each map file and produces nReduce files for each 30 | // map file. Thus, there will be nMap x nReduce files after all map 31 | // jobs are done: 32 | // f-0-0, ..., f-0-0, f-0-, ..., 33 | // f--0, ... f--. 34 | // 35 | // DoReduce() collects reduce files from each map (f-*-), 36 | // and runs Reduce on those files. This produces result files, 37 | // which Merge() merges into a single output. 38 | 39 | // Debugging 40 | const Debug = 0 41 | 42 | func DPrintf(format string, a ...interface{}) (n int, err error) { 43 | if Debug > 0 { 44 | n, err = fmt.Printf(format, a...) 45 | } 46 | return 47 | } 48 | 49 | // Map and Reduce deal with pairs: 50 | type KeyValue struct { 51 | Key string 52 | Value string 53 | } 54 | 55 | type MapReduce struct { 56 | nMap int // Number of Map jobs 57 | nReduce int // Number of Reduce jobs 58 | file string // Name of input file 59 | MasterAddress string 60 | registerChannel chan string 61 | DoneChannel chan bool 62 | alive bool 63 | l net.Listener 64 | stats *list.List 65 | 66 | // Map of registered workers that you need to keep up to date 67 | Workers map[string]*WorkerInfo 68 | 69 | // add any additional state here 70 | availableWorkers chan string 71 | wg sync.WaitGroup 72 | remainMapJobs chan int 73 | remainReduceJobs chan int 74 | nCount chan bool 75 | donePhase chan bool 76 | } 77 | 78 | func InitMapReduce(nmap int, nreduce int, 79 | file string, master string) *MapReduce { 80 | mr := new(MapReduce) 81 | mr.nMap = nmap 82 | mr.nReduce = nreduce 83 | mr.file = file 84 | mr.MasterAddress = master 85 | mr.alive = true 86 | mr.registerChannel = make(chan string) 87 | mr.DoneChannel = make(chan bool) 88 | 89 | // initialize any additional state here 90 | mr.Workers = make(map[string]*WorkerInfo) 91 | mr.availableWorkers = make(chan string, 484) // a lot of buffers 92 | mr.remainMapJobs = make(chan int, nmap) 93 | mr.remainReduceJobs = make(chan int, nreduce) 94 | mr.nCount = make(chan bool) 95 | mr.donePhase = make(chan bool) 96 | return mr 97 | } 98 | 99 | func MakeMapReduce(nmap int, nreduce int, 100 | file string, master string) *MapReduce { 101 | mr := InitMapReduce(nmap, nreduce, file, master) 102 | mr.StartRegistrationServer() 103 | go mr.Run() 104 | return mr 105 | } 106 | 107 | func (mr *MapReduce) Register(args *RegisterArgs, res *RegisterReply) error { 108 | DPrintf("Register: worker %s\n", args.Worker) 109 | mr.registerChannel <- args.Worker 110 | res.OK = true 111 | return nil 112 | } 113 | 114 | func (mr *MapReduce) Shutdown(args *ShutdownArgs, res *ShutdownReply) error { 115 | DPrintf("Shutdown: registration server\n") 116 | mr.alive = false 117 | mr.l.Close() // causes the Accept to fail 118 | return nil 119 | } 120 | 121 | func (mr *MapReduce) StartRegistrationServer() { 122 | rpcs := rpc.NewServer() 123 | rpcs.Register(mr) 124 | os.Remove(mr.MasterAddress) // only needed for "unix" 125 | l, e := net.Listen("unix", mr.MasterAddress) 126 | if e != nil { 127 | log.Fatal("RegistrationServer", mr.MasterAddress, " error: ", e) 128 | } 129 | mr.l = l 130 | 131 | // now that we are listening on the master address, can fork off 132 | // accepting connections to another thread. 133 | go func() { 134 | for mr.alive { 135 | conn, err := mr.l.Accept() 136 | if err == nil { 137 | go func() { 138 | rpcs.ServeConn(conn) 139 | conn.Close() 140 | }() 141 | } else { 142 | DPrintf("RegistrationServer: accept error %s", err) 143 | break 144 | } 145 | } 146 | DPrintf("RegistrationServer: done\n") 147 | }() 148 | } 149 | 150 | // Name of the file that is the input for map job 151 | func MapName(fileName string, MapJob int) string { 152 | return "mrtmp." + fileName + "-" + strconv.Itoa(MapJob) 153 | } 154 | 155 | // Split bytes of input file into nMap splits, but split only on white space 156 | func (mr *MapReduce) Split(fileName string) { 157 | fmt.Printf("Split %s\n", fileName) 158 | infile, err := os.Open(fileName) 159 | if err != nil { 160 | log.Fatal("Split: ", err) 161 | } 162 | defer infile.Close() 163 | fi, err := infile.Stat() 164 | if err != nil { 165 | log.Fatal("Split: ", err) 166 | } 167 | size := fi.Size() 168 | nchunk := size / int64(mr.nMap) 169 | nchunk += 1 170 | 171 | outfile, err := os.Create(MapName(fileName, 0)) 172 | if err != nil { 173 | log.Fatal("Split: ", err) 174 | } 175 | writer := bufio.NewWriter(outfile) 176 | m := 1 177 | i := 0 178 | 179 | scanner := bufio.NewScanner(infile) 180 | for scanner.Scan() { 181 | if int64(i) > nchunk*int64(m) { 182 | writer.Flush() 183 | outfile.Close() 184 | outfile, err = os.Create(MapName(fileName, m)) 185 | writer = bufio.NewWriter(outfile) 186 | m += 1 187 | } 188 | line := scanner.Text() + "\n" 189 | writer.WriteString(line) 190 | i += len(line) 191 | } 192 | writer.Flush() 193 | outfile.Close() 194 | } 195 | 196 | func ReduceName(fileName string, MapJob int, ReduceJob int) string { 197 | return MapName(fileName, MapJob) + "-" + strconv.Itoa(ReduceJob) 198 | } 199 | 200 | func ihash(s string) uint32 { 201 | h := fnv.New32a() 202 | h.Write([]byte(s)) 203 | return h.Sum32() 204 | } 205 | 206 | // Read split for job, call Map for that split, and create nreduce 207 | // partitions. 208 | func DoMap(JobNumber int, fileName string, 209 | nreduce int, Map func(string) *list.List) { 210 | name := MapName(fileName, JobNumber) 211 | file, err := os.Open(name) 212 | if err != nil { 213 | log.Fatal("DoMap: ", err) 214 | } 215 | fi, err := file.Stat() 216 | if err != nil { 217 | log.Fatal("DoMap: ", err) 218 | } 219 | size := fi.Size() 220 | fmt.Printf("DoMap: read split %s %d\n", name, size) 221 | b := make([]byte, size) 222 | _, err = file.Read(b) 223 | if err != nil { 224 | log.Fatal("DoMap: ", err) 225 | } 226 | file.Close() 227 | res := Map(string(b)) 228 | // XXX a bit inefficient. could open r files and run over list once 229 | for r := 0; r < nreduce; r++ { 230 | file, err = os.Create(ReduceName(fileName, JobNumber, r)) 231 | if err != nil { 232 | log.Fatal("DoMap: create ", err) 233 | } 234 | enc := json.NewEncoder(file) 235 | for e := res.Front(); e != nil; e = e.Next() { 236 | kv := e.Value.(KeyValue) 237 | if ihash(kv.Key)%uint32(nreduce) == uint32(r) { 238 | err := enc.Encode(&kv) 239 | if err != nil { 240 | log.Fatal("DoMap: marshall ", err) 241 | } 242 | } 243 | } 244 | file.Close() 245 | } 246 | } 247 | 248 | func MergeName(fileName string, ReduceJob int) string { 249 | return "mrtmp." + fileName + "-res-" + strconv.Itoa(ReduceJob) 250 | } 251 | 252 | // Read map outputs for partition job, sort them by key, call reduce for each 253 | // key 254 | func DoReduce(job int, fileName string, nmap int, 255 | Reduce func(string, *list.List) string) { 256 | kvs := make(map[string]*list.List) 257 | for i := 0; i < nmap; i++ { 258 | name := ReduceName(fileName, i, job) 259 | fmt.Printf("DoReduce: read %s\n", name) 260 | file, err := os.Open(name) 261 | if err != nil { 262 | log.Fatal("DoReduce: ", err) 263 | } 264 | dec := json.NewDecoder(file) 265 | for { 266 | var kv KeyValue 267 | err = dec.Decode(&kv) 268 | if err != nil { 269 | break 270 | } 271 | _, ok := kvs[kv.Key] 272 | if !ok { 273 | kvs[kv.Key] = list.New() 274 | } 275 | kvs[kv.Key].PushBack(kv.Value) 276 | } 277 | file.Close() 278 | } 279 | var keys []string 280 | for k := range kvs { 281 | keys = append(keys, k) 282 | } 283 | sort.Strings(keys) 284 | p := MergeName(fileName, job) 285 | file, err := os.Create(p) 286 | if err != nil { 287 | log.Fatal("DoReduce: create ", err) 288 | } 289 | enc := json.NewEncoder(file) 290 | for _, k := range keys { 291 | res := Reduce(k, kvs[k]) 292 | enc.Encode(KeyValue{k, res}) 293 | } 294 | file.Close() 295 | } 296 | 297 | // Merge the results of the reduce jobs 298 | // XXX use merge sort 299 | func (mr *MapReduce) Merge() { 300 | DPrintf("Merge phase") 301 | kvs := make(map[string]string) 302 | for i := 0; i < mr.nReduce; i++ { 303 | p := MergeName(mr.file, i) 304 | fmt.Printf("Merge: read %s\n", p) 305 | file, err := os.Open(p) 306 | if err != nil { 307 | log.Fatal("Merge: ", err) 308 | } 309 | dec := json.NewDecoder(file) 310 | for { 311 | var kv KeyValue 312 | err = dec.Decode(&kv) 313 | if err != nil { 314 | break 315 | } 316 | kvs[kv.Key] = kv.Value 317 | } 318 | file.Close() 319 | } 320 | var keys []string 321 | for k := range kvs { 322 | keys = append(keys, k) 323 | } 324 | sort.Strings(keys) 325 | 326 | file, err := os.Create("mrtmp." + mr.file) 327 | if err != nil { 328 | log.Fatal("Merge: create ", err) 329 | } 330 | w := bufio.NewWriter(file) 331 | for _, k := range keys { 332 | fmt.Fprintf(w, "%s: %s\n", k, kvs[k]) 333 | } 334 | w.Flush() 335 | file.Close() 336 | } 337 | 338 | func RemoveFile(n string) { 339 | err := os.Remove(n) 340 | if err != nil { 341 | log.Fatal("CleanupFiles ", err) 342 | } 343 | } 344 | 345 | func (mr *MapReduce) CleanupFiles() { 346 | for i := 0; i < mr.nMap; i++ { 347 | RemoveFile(MapName(mr.file, i)) 348 | for j := 0; j < mr.nReduce; j++ { 349 | RemoveFile(ReduceName(mr.file, i, j)) 350 | } 351 | } 352 | for i := 0; i < mr.nReduce; i++ { 353 | RemoveFile(MergeName(mr.file, i)) 354 | } 355 | RemoveFile("mrtmp." + mr.file) 356 | } 357 | 358 | // Run jobs sequentially. 359 | func RunSingle(nMap int, nReduce int, file string, 360 | Map func(string) *list.List, 361 | Reduce func(string, *list.List) string) { 362 | mr := InitMapReduce(nMap, nReduce, file, "") 363 | mr.Split(mr.file) 364 | for i := 0; i < nMap; i++ { 365 | DoMap(i, mr.file, mr.nReduce, Map) 366 | } 367 | for i := 0; i < mr.nReduce; i++ { 368 | DoReduce(i, mr.file, mr.nMap, Reduce) 369 | } 370 | mr.Merge() 371 | } 372 | 373 | func (mr *MapReduce) CleanupRegistration() { 374 | args := &ShutdownArgs{} 375 | var reply ShutdownReply 376 | ok := call(mr.MasterAddress, "MapReduce.Shutdown", args, &reply) 377 | if ok == false { 378 | fmt.Printf("Cleanup: RPC %s error\n", mr.MasterAddress) 379 | } 380 | DPrintf("CleanupRegistration: done\n") 381 | } 382 | 383 | // Run jobs in parallel, assuming a shared file system 384 | func (mr *MapReduce) Run() { 385 | fmt.Printf("Run mapreduce job %s %s\n", mr.MasterAddress, mr.file) 386 | 387 | mr.Split(mr.file) 388 | mr.stats = mr.RunMaster() 389 | mr.Merge() 390 | mr.CleanupRegistration() 391 | 392 | fmt.Printf("%s: MapReduce done\n", mr.MasterAddress) 393 | 394 | mr.DoneChannel <- true 395 | } 396 | -------------------------------------------------------------------------------- /hw3/src/kvpaxos/server.go: -------------------------------------------------------------------------------- 1 | package kvpaxos 2 | 3 | import ( 4 | "net" 5 | "time" 6 | ) 7 | import "fmt" 8 | import "net/rpc" 9 | import "log" 10 | import "paxos" 11 | import "sync" 12 | import "sync/atomic" 13 | import "os" 14 | import "syscall" 15 | import "encoding/gob" 16 | import "math/rand" 17 | 18 | 19 | const Debug = 1 20 | 21 | func DPrintf(format string, a ...interface{}) (n int, err error) { 22 | if Debug > 0 { 23 | log.Printf(format, a...) 24 | } 25 | return 26 | } 27 | 28 | 29 | type Op struct { 30 | // Your definitions here. 31 | // Field names must start with capital letters, 32 | // otherwise RPC will break. 33 | 34 | // OpID is a hash key attached by the client. Each time when client 35 | // retried an operation, it will always use a fixed OpID 36 | OpID int64 37 | // Put, Get, Append 38 | Operation string 39 | Key string 40 | Value string 41 | } 42 | 43 | type KVPaxos struct { 44 | mu sync.Mutex 45 | l net.Listener 46 | me int 47 | dead int32 // for testing 48 | unreliable int32 // for testing 49 | px *paxos.Paxos 50 | 51 | // Your definitions here. 52 | database sync.Map 53 | // hashVals acts as the state to filter duplicates 54 | // if an operation has already been performed on `database`, 55 | // it should not be performed again 56 | hashVals sync.Map 57 | // each KVPaxos has a seq number recording the current progress. 58 | // seq starts from 0 and gradually increase by 1 59 | // if seq = 12, it means that the paxos server instances 0 to 11 has all been decided 60 | // and also they should have been called Done() 61 | seq int 62 | } 63 | 64 | // added by Adrian 65 | // when every time clients sends a Get/PutAppend, 66 | // the first thing our KVPaxos replica do is NOT to perform the Get/PutAppend 67 | // on Paxos directly; instead, we will first make sure all the previous 68 | // operations that have been decided by some other majority (exclude me) 69 | // are successfully fetched into my database. The way we did that is to 70 | // Start() a new operation with the seq number. And since that n_a, v_a has been 71 | // decided, as a result we will get the value which all other majority has reached an agreement to. 72 | func (kv *KVPaxos) SyncUp(xop Op) { 73 | to := 10 * time.Millisecond 74 | doing := false 75 | // sync on all seq number instances that I have not yet recorded 76 | // and after they are all done, we perform our own xop by calling Start() 77 | for { 78 | status, op := kv.px.Status(kv.seq) 79 | // DPrintf("server %v, seq %v, status %v", kv.me, kv.seq, status) 80 | // KVPaxos servers interact with each other through the Paxos log. 81 | if status == paxos.Decided { 82 | // this Decided() could be 2 cases. 83 | // case 1. this kv.seq instances has been decided by others and thus when I called Start(), 84 | // the instance n_a, v_a is taken from some other majority's agreement. 85 | // case 2. I am the initiator. No one has reached an agreement (not Decided) on this seq number yet 86 | // and thus the xop.OpID == op.OpID 87 | op := op.(Op) 88 | 89 | if xop.OpID == op.OpID { 90 | // if it was case 2. then we don't do doPutAppend() as we will do it later out of this function 91 | break 92 | } else if op.Operation == "Put" || op.Operation == "Append" { 93 | // if it was case 1, then we have to make compensation. 94 | // we have to catch up on some others' progress. so we perform the PutAppend 95 | // according to the paxos log we have the consensus on 96 | kv.doPutAppend(op.Operation, op.Key, op.Value, op.OpID) 97 | } else { 98 | // if it was case 1, then even though it is a Get I was previously not aware of, 99 | // I still don't need to do anything as it will not affect my `database` 100 | //value, _ := kv.doGet(op.Key) 101 | //DPrintf("get: %v", value) 102 | } 103 | // we could do Done() here. but as it checks all seq num from 0 ~ kv.seq. 104 | // so we can elegantly do it outside of this for loop for simplicity. 105 | // kv.px.Done(kv.seq) 106 | 107 | // once we catched up on this instance, we can finally increase our seq num by 1 108 | kv.seq += 1 109 | // also we have to set that our Start() is over. We might need to initiate another Start() though 110 | doing = false 111 | } else { 112 | if !doing { 113 | // your server should try to assign the next available Paxos instance (sequence number) 114 | // to each incoming client RPC. However, some other kvpaxos replica may also be trying 115 | // to use that instance for a different client's operation. 116 | // e.g., KVPaxos server 1 do Put(1, 15) and server 2 do Put(1, 32). they are both seq=3 now 117 | // Acceptor 1: P1 x A1-15(ok) P2 A2-32(ok) // p.s., Proposal 22 arrives A1 a bit late 118 | // Acceptor 2: P1 P2 A1-15(fail) A2-32(ok) 119 | // Acceptor 3: P1 P2 A1-15(fail) A2-32(ok) 120 | // as a result, Put(1, 32) will be accepted instead of Put(1, 15)(got overrided) 121 | // although these 2 servers are both doing this on seq=3 122 | 123 | // Hint: if one of your kvpaxos servers falls behind (i.e. did not participate 124 | // in the agreement for some instance), it will later need to find out what (if anything) 125 | // was agree to. A reasonable way to to this is to call Start(), which will either 126 | // discover the previously agreed-to value, or cause agreement to happen 127 | 128 | // Think about what value would be reasonable to pass to Start() in this situation. 129 | // Ans. Just pass in the value we want to agree on previously (the `xop`) as a matter of fact 130 | // if the instance on seq = 3 has been Decided, then when we call Start() on the prepare phase 131 | // the V_a will definitely be replaced by the V_a that some other majority has agreed to 132 | // Let's say srv 0 are not at seq = 3, and it wants to do Put(1, 15) 133 | // and yet srv 1, 2, 3 has already reached seq = 8, that is, their seq 3 ~ 7 are all decided 134 | // thus, srv 0 will do Start() on seq 3 ~ 7 but the value will got substituded. 135 | // and finally the Put(1, 15) will only be accepted when seq = 8. 136 | kv.px.Start(kv.seq, xop) 137 | //DPrintf("%v: do start for seq: %v, value=%v", kv.me, kv.seq, xop.Value) 138 | // now I'm doing Start(). So don't call Start() again on the same seq, same xop. 139 | // not until I finished doing this xop will I initiate another Start() 140 | doing = true 141 | } 142 | time.Sleep(to) 143 | // your code will need to wait for Paxos instances to complete agreement. 144 | // A good plan is to check quickly at first, and then more slowly: 145 | to += 10 * time.Millisecond 146 | } 147 | } 148 | // don't forget to call the Paxos Done() method when a kvpaxos has processed 149 | // an instance and will no longer need it or any previous instance. 150 | // When will the px.Forget() to be called? when EVERY KVPaxos call Done() on seq = 3, 151 | // then Min() will be 4. -> when doing next Start() (as we are piggybacked() the proposer 152 | // will clean up those old instances by calling Forget() 153 | kv.px.Done(kv.seq) 154 | kv.seq += 1 155 | } 156 | // added by Adrian 157 | func (kv *KVPaxos) doGet(Key string) (string, bool) { 158 | val, ok := kv.database.Load(Key) 159 | // no effect for Get even the hashVal may has duplicates 160 | if !ok { 161 | return "", false 162 | } else { 163 | return val.(string), true 164 | } 165 | } 166 | // added by Adrian 167 | func (kv *KVPaxos) doPutAppend(Operation string, Key string, Value string, hash int64) { 168 | // first, we check if the key is already exists 169 | val, ok := kv.database.Load(Key) 170 | if !ok { // if not exists 171 | 172 | // init. are the same for either put / append 173 | kv.database.Store(Key, Value) 174 | 175 | } else { // load 176 | if Operation == "Put" { 177 | kv.database.Store(Key, Value) 178 | } else if Operation == "Append" { 179 | vals := val.(string) 180 | // you will need to uniquely identify client operations 181 | // to ensure that they execute just once. 182 | _, ok := kv.hashVals.Load(hash) 183 | if !ok { 184 | // check if the hashVals has been added 185 | kv.database.Store(Key, vals+Value) 186 | } 187 | } 188 | } 189 | // we have to store this hash whether it is the first time this pair was pushed or not 190 | // therefore I put this outside of the if-else branch condition 191 | kv.hashVals.Store(hash, 1) // an arbitrary value 1 192 | } 193 | 194 | func (kv *KVPaxos) Get(args *GetArgs, reply *GetReply) error { 195 | // Your code here. 196 | kv.mu.Lock() 197 | defer kv.mu.Unlock() 198 | // these values in Get Op are basically dummy value, 199 | // we will not use them when meeting one of them in SyncUp() 200 | op := Op{args.Hash, "Get", args.Key, ""} 201 | // a kvpaxos server should not complete a Get() RPC if it is not part of a majority 202 | // (so that it does not serve stale data). 203 | // -> instead, it will endlessly try syncing and wait for it to be `Decided` 204 | kv.SyncUp(op) 205 | reply.Value, _ = kv.doGet(args.Key) 206 | return nil 207 | } 208 | 209 | func (kv *KVPaxos) PutAppend(args *PutAppendArgs, reply *PutAppendReply) error { 210 | // Your code here. 211 | kv.mu.Lock() 212 | defer kv.mu.Unlock() 213 | // It should enter a Get Op in the Paxos log, and then "interpret" the the log **before that point** 214 | // to make sure its key/value database reflects all recent Put()s. 215 | // ps. An Append Paxos log entry should contain the Append's arguments, 216 | // but not the resulting value, since the result might be large. 217 | op := Op{args.Hash, args.Op, args.Key, args.Value} 218 | kv.SyncUp(op) 219 | kv.doPutAppend(args.Op, args.Key, args.Value, args.Hash) 220 | return nil 221 | } 222 | 223 | // tell the server to shut itself down. 224 | // please do not change these two functions. 225 | func (kv *KVPaxos) kill() { 226 | //DPrintf("Kill(%d): die\n", kv.me) 227 | atomic.StoreInt32(&kv.dead, 1) 228 | kv.l.Close() 229 | kv.px.Kill() 230 | } 231 | 232 | // call this to find out if the server is dead. 233 | func (kv *KVPaxos) isdead() bool { 234 | return atomic.LoadInt32(&kv.dead) != 0 235 | } 236 | 237 | // please do not change these two functions. 238 | func (kv *KVPaxos) setunreliable(what bool) { 239 | if what { 240 | atomic.StoreInt32(&kv.unreliable, 1) 241 | } else { 242 | atomic.StoreInt32(&kv.unreliable, 0) 243 | } 244 | } 245 | 246 | func (kv *KVPaxos) isunreliable() bool { 247 | return atomic.LoadInt32(&kv.unreliable) != 0 248 | } 249 | 250 | // 251 | // servers[] contains the ports of the set of 252 | // servers that will cooperate via Paxos to 253 | // form the fault-tolerant key/value service. 254 | // me is the index of the current server in servers[]. 255 | // 256 | func StartServer(servers []string, me int) *KVPaxos { 257 | // call gob.Register on structures you want 258 | // Go's RPC library to marshall/unmarshall. 259 | gob.Register(Op{}) 260 | 261 | kv := new(KVPaxos) 262 | kv.me = me 263 | 264 | // Your initialization code here. 265 | kv.seq = 0 266 | 267 | rpcs := rpc.NewServer() 268 | rpcs.Register(kv) 269 | 270 | kv.px = paxos.Make(servers, me, rpcs) 271 | 272 | os.Remove(servers[me]) 273 | l, e := net.Listen("unix", servers[me]) 274 | if e != nil { 275 | log.Fatal("listen error: ", e) 276 | } 277 | kv.l = l 278 | 279 | 280 | // please do not change any of the following code, 281 | // or do anything to subvert it. 282 | 283 | go func() { 284 | for kv.isdead() == false { 285 | conn, err := kv.l.Accept() 286 | if err == nil && kv.isdead() == false { 287 | if kv.isunreliable() && (rand.Int63()%1000) < 100 { 288 | // discard the request. 289 | conn.Close() 290 | } else if kv.isunreliable() && (rand.Int63()%1000) < 200 { 291 | // process the request but force discard of reply. 292 | c1 := conn.(*net.UnixConn) 293 | f, _ := c1.File() 294 | err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR) 295 | if err != nil { 296 | fmt.Printf("shutdown: %v\n", err) 297 | } 298 | go rpcs.ServeConn(conn) 299 | } else { 300 | go rpcs.ServeConn(conn) 301 | } 302 | } else if err == nil { 303 | conn.Close() 304 | } 305 | if err != nil && kv.isdead() == false { 306 | fmt.Printf("KVPaxos(%v) accept: %v\n", me, err.Error()) 307 | kv.kill() 308 | } 309 | } 310 | }() 311 | 312 | return kv 313 | } 314 | -------------------------------------------------------------------------------- /hw2/src/pbservice/server.go: -------------------------------------------------------------------------------- 1 | package pbservice 2 | 3 | import ( 4 | "errors" 5 | "net" 6 | ) 7 | import "fmt" 8 | import "net/rpc" 9 | import "log" 10 | import "time" 11 | import "viewservice" 12 | import "sync" 13 | import "sync/atomic" 14 | import "os" 15 | import "syscall" 16 | import "math/rand" 17 | 18 | 19 | 20 | type PBServer struct { 21 | mu sync.Mutex 22 | l net.Listener 23 | dead int32 // for testing 24 | unreliable int32 // for testing 25 | me string 26 | // To the view service, this PBServer is acting like a clerk. 27 | // so we set up a clerk pointer to do Ping and some other stuff. 28 | vs *viewservice.Clerk 29 | 30 | // Your declarations here. 31 | currview *viewservice.View 32 | database map[string]string 33 | // hashVals acts as the state to filter duplicates 34 | hashVals map[int64]bool 35 | // A read/write mutex allows all the readers to access 36 | // the map at the same time, but a writer will lock out everyone else. 37 | rwm sync.RWMutex 38 | } 39 | 40 | // edited by Adrian 41 | // the new backup got bootstrapped. 42 | func (pb *PBServer) Bootstrapped(args *BootstrapArgs, reply *BootstrapReply) error { 43 | 44 | pb.rwm.Lock() 45 | defer pb.rwm.Unlock() 46 | for k, v := range args.Database { 47 | pb.database[k] = v 48 | } 49 | for k, v := range args.HashVals { 50 | pb.hashVals[k] = v 51 | } 52 | return nil 53 | } 54 | 55 | // edited by Adrian 56 | // initiate by the primary when it found that it's time to bootstrap the new backup 57 | // since that the current view has not yet changed. so we cannot use `pb.currview.Backup` 58 | // instead, we pass in a backup param 59 | func (pb *PBServer) Bootstrapping(backup string) error { 60 | 61 | args := &BootstrapArgs{pb.database, pb.hashVals} 62 | var reply BootstrapReply 63 | 64 | ok := false 65 | for ok == false { 66 | ok = call(backup, "PBServer.Bootstrapped", args, &reply) 67 | if ok { 68 | break 69 | } else { 70 | // network failure 71 | time.Sleep(viewservice.PingInterval) 72 | } 73 | } 74 | return nil 75 | } 76 | 77 | // edited by Adrian 78 | // to leverage determinism of the state machine 79 | // the backup got a Get request forwarded by the primary 80 | func (pb *PBServer) ForwardGet(sargs *GetSyncArgs, sreply *GetSyncReply) error { 81 | 82 | pb.rwm.Lock() 83 | defer pb.rwm.Unlock() 84 | 85 | if sargs.Primary != pb.currview.Primary { 86 | // the backup first need to check if the primary is still the current primary 87 | // e.g. split-brain: {s1, s3} -> s1 dies -> {s3, s2} -> s1 revokes 88 | // -> s1 still receives some requests from client -> so s1 forward to its cache backup, s3 89 | // -> s3 will tell s1 that "you are no longer the current primary now" 90 | // -> so finally s1 will reject the client's request 91 | sreply.Err = "ForwardTest: SENDER IS NOT CURRENT PRIMARY" 92 | return errors.New("ForwardTest: SENDER IS NOT CURRENT PRIMARY") 93 | } else { 94 | // if it is the primary, then we do Get normally 95 | sreply.Value = pb.database[sargs.Key] 96 | } 97 | return nil 98 | } 99 | // edited by Adrian 100 | // to leverage determinism of the state machine 101 | // forward any state necessary for backup to `mimic` the execution 102 | // do exactly the same PutAppend request on the backup 103 | func (pb *PBServer) Forward(sargs *PutAppendSyncArgs, sreply *PutAppendSyncReply) error { 104 | 105 | pb.rwm.Lock() 106 | defer pb.rwm.Unlock() 107 | 108 | if sargs.Primary != pb.currview.Primary { 109 | sreply.Err = "ForwardTest: SENDER IS NOT CURRENT PRIMARY" 110 | return errors.New("ForwardTest: SENDER IS NOT CURRENT PRIMARY") 111 | } else { 112 | pb.Update(sargs.Key, sargs.Value, sargs.Op, sargs.HashVal) 113 | } 114 | return nil 115 | } 116 | 117 | // edited by Adrian 118 | func (pb *PBServer) Update(key string, value string, op string, hashVal int64) { 119 | 120 | // no need to do lock. 121 | // Update() must be called by Forward() or PutAppend() and they both did acquire the lock 122 | if op == "Put" { 123 | pb.database[key] = value 124 | } else if op == "Append" { 125 | // detect duplicates 126 | if pb.hashVals[hashVal] != true { 127 | // Append should use an empty string for the previous value 128 | // if the key doesn't exist 129 | pb.database[key] += value 130 | pb.hashVals[hashVal] = true 131 | } 132 | } 133 | } 134 | 135 | func (pb *PBServer) Get(args *GetArgs, reply *GetReply) error { 136 | 137 | // Your code here. 138 | pb.rwm.Lock() 139 | defer pb.rwm.Unlock() 140 | 141 | if pb.me != pb.currview.Primary { 142 | reply.Err = "Get: NOT THE PRIMARY YET" 143 | // it might be possible that the primary dies and then the backup still not yet 144 | // realizes that it is now the new primary (the `p3`). 145 | 146 | // e.g., (p1, p3) -> (p3, _) 147 | // client: already know that p3 is now the new primary 148 | // p3: according to its cache, it still think (p1, p3) -> still dont think it is the primary 149 | // so it will return an error the client and tell it to try later 150 | // -> wait for tick() until it knows (p3, _) and problem solved 151 | 152 | // the backup (at least it thought by itself) should reject a direct client request 153 | return errors.New("GetTest: NOT THE PRIMARY YET") 154 | } 155 | 156 | reply.Value = pb.database[args.Key] 157 | 158 | sargs := GetSyncArgs{args.Key,pb.me} 159 | sreply := GetSyncReply{} 160 | 161 | // if there is no backup currently -> don't do Forward 162 | ok := pb.currview.Backup == "" 163 | 164 | for ok == false { 165 | //log.Printf("b get %v, %v, %v", pb.me, pb.currview.Backup, sargs.Key) 166 | ok = call(pb.currview.Backup, "PBServer.ForwardGet", sargs, &sreply) 167 | //log.Printf("get %v, %v, %v", pb.me, pb.currview.Backup, sargs.Key) 168 | if ok == true { 169 | // everything works well 170 | break 171 | } else { 172 | // case 1. you are no longer the primary 173 | if sreply.Err == "ForwardTest: SENDER IS NOT CURRENT PRIMARY" { 174 | reply.Err = sreply.Err 175 | return errors.New("GetTest: SENDER IS NOT CURRENT PRIMARY") // don't need to update anymore 176 | } 177 | 178 | time.Sleep(viewservice.PingInterval) 179 | // case 2. check if the backup was still alive 180 | // perform exactly the same as tick(). Cannot call it directly as we will acquire lock twice 181 | newview, _ := pb.vs.Ping(pb.currview.Viewnum) 182 | pb.checkNewBackup(newview) 183 | pb.changeView(newview) 184 | 185 | ok = pb.currview.Backup == "" 186 | } 187 | } 188 | 189 | return nil 190 | } 191 | 192 | func (pb *PBServer) PutAppend(args *PutAppendArgs, reply *PutAppendReply) error { 193 | 194 | // Your code here. 195 | pb.rwm.Lock() 196 | defer pb.rwm.Unlock() 197 | 198 | if pb.me != pb.currview.Primary { 199 | reply.Err = "PutAppend: NOT THE PRIMARY YET" 200 | return errors.New("PutAppendTest: NOT THE PRIMARY YET") 201 | } 202 | 203 | // Step 1. Update the primary itself (note: should not update the backup first!) 204 | pb.Update(args.Key, args.Value, args.Op, args.HashVal) 205 | 206 | sargs := PutAppendSyncArgs{args.Key, args.Value, args.Op, args.HashVal, pb.me} 207 | sreply := PutAppendSyncReply{} 208 | 209 | // Step 2. Update the backup (if exists) 210 | 211 | // IMPORTANT: 212 | // only if the primary and the backup is `externally consistent` 213 | // will the primary respond to the client, i.e., to make this change `externally visible` 214 | ok := pb.currview.Backup == "" 215 | 216 | for ok == false { 217 | //log.Printf("b put %v, %v, %v", pb.me, pb.currview.Backup, sargs.Key) 218 | ok = call(pb.currview.Backup, "PBServer.Forward", sargs, &sreply) 219 | //log.Printf("put %v, %v, %v", pb.me, pb.currview.Backup, sargs.Key) 220 | if ok == true { 221 | // everything works fine 222 | break 223 | } else { 224 | // case 1. you are no longer the primary 225 | if sreply.Err == "ForwardTest: SENDER IS NOT CURRENT PRIMARY" { 226 | reply.Err = sreply.Err 227 | return errors.New("PutAppendTest: SENDER NOT CURRENT PRIMARY") // don't need to update anymore 228 | } 229 | 230 | time.Sleep(viewservice.PingInterval) 231 | // case 2. check if the backup was still alive 232 | // perform exactly the same as tick(). Cannot call it directly as we will acquire lock twice 233 | newview, _ := pb.vs.Ping(pb.currview.Viewnum) 234 | pb.checkNewBackup(newview) 235 | pb.changeView(newview) 236 | 237 | ok = pb.currview.Backup == "" 238 | } 239 | } 240 | 241 | return nil 242 | } 243 | 244 | // edited by Adrian 245 | // to detect if the backup has changed 246 | func (pb *PBServer) checkNewBackup(newview viewservice.View) { 247 | 248 | // case 1. {s1, _} -> {s1, s2} // s2 is the new backup. s1 is myself. 249 | // case 2. {s1, s2} -> s2 dies -> {s1, s3} // s3 is the new backup. s1 is myself. 250 | // note that in case 2, `b` will not be "" in that intermediate state since we called backupByIdleSrv() 251 | // -> it was already replaced when the primary got notified 252 | 253 | // case 3. {s1, s2} -> {s2, s3} // s3 is the new backup. s2 is me 254 | // -> therefore we use newview.Primary (s2) to do the bootstrap but not the pb.currview.Primary (s1) 255 | if newview.Primary == pb.me && pb.currview.Backup != newview.Backup && newview.Backup != "" { 256 | pb.Bootstrapping(newview.Backup) 257 | } 258 | } 259 | 260 | func (pb* PBServer) changeView(newview viewservice.View) { 261 | // no need to lock 262 | // the caller should already acquired a lock 263 | pb.currview = &newview 264 | } 265 | 266 | // 267 | // ping the viewserver periodically. 268 | // if view changed: 269 | // transition to new view. 270 | // manage transfer of state from primary to new backup. 271 | // 272 | func (pb *PBServer) tick() { 273 | 274 | // Your code here. 275 | pb.rwm.Lock() 276 | defer pb.rwm.Unlock() 277 | 278 | newview, _ := pb.vs.Ping(pb.currview.Viewnum) 279 | //log.Printf("me=%v, v=%v, p=%v, b=%v", pb.me, newview.Viewnum, newview.Primary, newview.Backup) 280 | pb.checkNewBackup(newview) 281 | pb.changeView(newview) 282 | } 283 | 284 | // tell the server to shut itself down. 285 | // please do not change these two functions. 286 | func (pb *PBServer) kill() { 287 | atomic.StoreInt32(&pb.dead, 1) 288 | pb.l.Close() 289 | } 290 | 291 | // call this to find out if the server is dead. 292 | func (pb *PBServer) isdead() bool { 293 | return atomic.LoadInt32(&pb.dead) != 0 294 | } 295 | 296 | // please do not change these two functions. 297 | func (pb *PBServer) setunreliable(what bool) { 298 | if what { 299 | atomic.StoreInt32(&pb.unreliable, 1) 300 | } else { 301 | atomic.StoreInt32(&pb.unreliable, 0) 302 | } 303 | } 304 | 305 | func (pb *PBServer) isunreliable() bool { 306 | return atomic.LoadInt32(&pb.unreliable) != 0 307 | } 308 | 309 | 310 | func StartServer(vshost string, me string) *PBServer { 311 | pb := new(PBServer) 312 | pb.me = me 313 | pb.vs = viewservice.MakeClerk(me, vshost) 314 | // Your pb.* initializations here. 315 | 316 | pb.currview = &viewservice.View{} 317 | pb.database = make(map[string]string) 318 | pb.hashVals = make(map[int64]bool) 319 | rpcs := rpc.NewServer() 320 | rpcs.Register(pb) 321 | 322 | os.Remove(pb.me) 323 | l, e := net.Listen("unix", pb.me) 324 | if e != nil { 325 | log.Fatal("listen error: ", e) 326 | } 327 | pb.l = l 328 | 329 | // please do not change any of the following code, 330 | // or do anything to subvert it. 331 | 332 | go func() { 333 | for pb.isdead() == false { 334 | conn, err := pb.l.Accept() 335 | if err == nil && pb.isdead() == false { 336 | if pb.isunreliable() && (rand.Int63()%1000) < 100 { 337 | // discard the request. 338 | conn.Close() 339 | } else if pb.isunreliable() && (rand.Int63()%1000) < 200 { 340 | // process the request but force discard of reply. 341 | c1 := conn.(*net.UnixConn) 342 | f, _ := c1.File() 343 | err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR) 344 | if err != nil { 345 | fmt.Printf("shutdown: %v\n", err) 346 | } 347 | go rpcs.ServeConn(conn) 348 | } else { 349 | go rpcs.ServeConn(conn) 350 | } 351 | } else if err == nil { 352 | conn.Close() 353 | } 354 | if err != nil && pb.isdead() == false { 355 | fmt.Printf("PBServer(%v) accept: %v\n", me, err.Error()) 356 | pb.kill() 357 | } 358 | } 359 | }() 360 | 361 | go func() { 362 | for pb.isdead() == false { 363 | pb.tick() 364 | time.Sleep(viewservice.PingInterval) 365 | } 366 | }() 367 | 368 | return pb 369 | } 370 | -------------------------------------------------------------------------------- /hw2/src/pbservice/out.txt: -------------------------------------------------------------------------------- 1 | Test: Single primary, no backup ... 2 | ... Passed 3 | Test: Add a backup ... 4 | ... Passed 5 | Test: Count RPCs to viewserver ... 6 | ... Passed 7 | Test: Primary failure ... 8 | ... Passed 9 | Test: Kill last server, new one should not be active ... 10 | ... Passed 11 | Test: at-most-once Append; unreliable ... 12 | ... Passed 13 | Test: Put() immediately after backup failure ... 14 | ... Passed 15 | Test: Put() immediately after primary failure ... 16 | PutAppendTest: NOT THE PRIMARY YET 17 | ... Passed 18 | Test: Concurrent Put()s to the same key ... 19 | ... Passed 20 | Test: Concurrent Append()s to the same key ... 21 | ... Passed 22 | Test: Concurrent Put()s to the same key; unreliable ... 23 | ... Passed 24 | Test: Repeated failures/restarts ... 25 | GetTest: NOT THE PRIMARY YET 26 | GetTest: NOT THE PRIMARY YET 27 | GetTest: NOT THE PRIMARY YET 28 | GetTest: NOT THE PRIMARY YET 29 | ... Put/Gets done ... 30 | ... Passed 31 | Test: Repeated failures/restarts with concurrent updates to same key; unreliable ... 32 | ForwardTest: SENDER IS NOT CURRENT PRIMARY 33 | ... Appends done ... 34 | ... Passed 35 | Test: Old primary does not serve Gets ... 36 | GetTest: NOT THE PRIMARY YET 37 | ... Passed 38 | Test: Partitioned old primary does not complete Gets ... 39 | GetTest: NOT THE PRIMARY YET 40 | ... Passed 41 | PASS 42 | ok pbservice 111.928s 43 | Test: Single primary, no backup ... 44 | ... Passed 45 | Test: Add a backup ... 46 | ... Passed 47 | Test: Count RPCs to viewserver ... 48 | ... Passed 49 | Test: Primary failure ... 50 | ... Passed 51 | Test: Kill last server, new one should not be active ... 52 | ... Passed 53 | Test: at-most-once Append; unreliable ... 54 | ... Passed 55 | Test: Put() immediately after backup failure ... 56 | ... Passed 57 | Test: Put() immediately after primary failure ... 58 | PutAppendTest: NOT THE PRIMARY YET 59 | ... Passed 60 | Test: Concurrent Put()s to the same key ... 61 | ... Passed 62 | Test: Concurrent Append()s to the same key ... 63 | ... Passed 64 | Test: Concurrent Put()s to the same key; unreliable ... 65 | ... Passed 66 | Test: Repeated failures/restarts ... 67 | ... Put/Gets done ... 68 | ... Passed 69 | Test: Repeated failures/restarts with concurrent updates to same key; unreliable ... 70 | PutAppendTest: NOT THE PRIMARY YET 71 | ... Appends done ... 72 | ... Passed 73 | Test: Old primary does not serve Gets ... 74 | GetTest: NOT THE PRIMARY YET 75 | ... Passed 76 | Test: Partitioned old primary does not complete Gets ... 77 | GetTest: NOT THE PRIMARY YET 78 | ... Passed 79 | PASS 80 | ok pbservice 111.657s 81 | Test: Single primary, no backup ... 82 | ... Passed 83 | Test: Add a backup ... 84 | ... Passed 85 | Test: Count RPCs to viewserver ... 86 | ... Passed 87 | Test: Primary failure ... 88 | ... Passed 89 | Test: Kill last server, new one should not be active ... 90 | ... Passed 91 | Test: at-most-once Append; unreliable ... 92 | ... Passed 93 | Test: Put() immediately after backup failure ... 94 | ... Passed 95 | Test: Put() immediately after primary failure ... 96 | ... Passed 97 | Test: Concurrent Put()s to the same key ... 98 | ... Passed 99 | Test: Concurrent Append()s to the same key ... 100 | ... Passed 101 | Test: Concurrent Put()s to the same key; unreliable ... 102 | ... Passed 103 | Test: Repeated failures/restarts ... 104 | GetTest: NOT THE PRIMARY YET 105 | ForwardTest: SENDER IS NOT CURRENT PRIMARY 106 | ... Put/Gets done ... 107 | ... Passed 108 | Test: Repeated failures/restarts with concurrent updates to same key; unreliable ... 109 | PutAppendTest: NOT THE PRIMARY YET 110 | ... Appends done ... 111 | ... Passed 112 | Test: Old primary does not serve Gets ... 113 | GetTest: NOT THE PRIMARY YET 114 | ... Passed 115 | Test: Partitioned old primary does not complete Gets ... 116 | GetTest: NOT THE PRIMARY YET 117 | ... Passed 118 | PASS 119 | ok pbservice 111.684s 120 | Test: Single primary, no backup ... 121 | ... Passed 122 | Test: Add a backup ... 123 | ... Passed 124 | Test: Count RPCs to viewserver ... 125 | ... Passed 126 | Test: Primary failure ... 127 | ... Passed 128 | Test: Kill last server, new one should not be active ... 129 | ... Passed 130 | Test: at-most-once Append; unreliable ... 131 | ... Passed 132 | Test: Put() immediately after backup failure ... 133 | ... Passed 134 | Test: Put() immediately after primary failure ... 135 | ... Passed 136 | Test: Concurrent Put()s to the same key ... 137 | ... Passed 138 | Test: Concurrent Append()s to the same key ... 139 | ... Passed 140 | Test: Concurrent Put()s to the same key; unreliable ... 141 | ... Passed 142 | Test: Repeated failures/restarts ... 143 | ForwardTest: SENDER IS NOT CURRENT PRIMARY 144 | GetTest: NOT THE PRIMARY YET 145 | GetTest: NOT THE PRIMARY YET 146 | GetTest: NOT THE PRIMARY YET 147 | GetTest: NOT THE PRIMARY YET 148 | ... Put/Gets done ... 149 | ... Passed 150 | Test: Repeated failures/restarts with concurrent updates to same key; unreliable ... 151 | PutAppendTest: NOT THE PRIMARY YET 152 | PutAppendTest: NOT THE PRIMARY YET 153 | ... Appends done ... 154 | ... Passed 155 | Test: Old primary does not serve Gets ... 156 | GetTest: NOT THE PRIMARY YET 157 | ... Passed 158 | Test: Partitioned old primary does not complete Gets ... 159 | GetTest: NOT THE PRIMARY YET 160 | ... Passed 161 | PASS 162 | ok pbservice 111.359s 163 | Test: Single primary, no backup ... 164 | ... Passed 165 | Test: Add a backup ... 166 | ... Passed 167 | Test: Count RPCs to viewserver ... 168 | ... Passed 169 | Test: Primary failure ... 170 | ... Passed 171 | Test: Kill last server, new one should not be active ... 172 | ... Passed 173 | Test: at-most-once Append; unreliable ... 174 | ... Passed 175 | Test: Put() immediately after backup failure ... 176 | ... Passed 177 | Test: Put() immediately after primary failure ... 178 | PutAppendTest: NOT THE PRIMARY YET 179 | ... Passed 180 | Test: Concurrent Put()s to the same key ... 181 | ... Passed 182 | Test: Concurrent Append()s to the same key ... 183 | ... Passed 184 | Test: Concurrent Put()s to the same key; unreliable ... 185 | ... Passed 186 | Test: Repeated failures/restarts ... 187 | GetTest: NOT THE PRIMARY YET 188 | GetTest: NOT THE PRIMARY YET 189 | ... Put/Gets done ... 190 | ... Passed 191 | Test: Repeated failures/restarts with concurrent updates to same key; unreliable ... 192 | PutAppendTest: NOT THE PRIMARY YET 193 | PutAppendTest: NOT THE PRIMARY YET 194 | PutAppendTest: NOT THE PRIMARY YET 195 | ForwardTest: SENDER IS NOT CURRENT PRIMARY 196 | ForwardTest: SENDER IS NOT CURRENT PRIMARY 197 | ... Appends done ... 198 | ... Passed 199 | Test: Old primary does not serve Gets ... 200 | GetTest: NOT THE PRIMARY YET 201 | ... Passed 202 | Test: Partitioned old primary does not complete Gets ... 203 | GetTest: NOT THE PRIMARY YET 204 | ... Passed 205 | PASS 206 | ok pbservice 111.798s 207 | Test: Single primary, no backup ... 208 | ... Passed 209 | Test: Add a backup ... 210 | ... Passed 211 | Test: Count RPCs to viewserver ... 212 | ... Passed 213 | Test: Primary failure ... 214 | ... Passed 215 | Test: Kill last server, new one should not be active ... 216 | ... Passed 217 | Test: at-most-once Append; unreliable ... 218 | ... Passed 219 | Test: Put() immediately after backup failure ... 220 | ... Passed 221 | Test: Put() immediately after primary failure ... 222 | ... Passed 223 | Test: Concurrent Put()s to the same key ... 224 | ... Passed 225 | Test: Concurrent Append()s to the same key ... 226 | ... Passed 227 | Test: Concurrent Put()s to the same key; unreliable ... 228 | ... Passed 229 | Test: Repeated failures/restarts ... 230 | GetTest: NOT THE PRIMARY YET 231 | GetTest: NOT THE PRIMARY YET 232 | ForwardTest: SENDER IS NOT CURRENT PRIMARY 233 | ... Put/Gets done ... 234 | ... Passed 235 | Test: Repeated failures/restarts with concurrent updates to same key; unreliable ... 236 | PutAppendTest: NOT THE PRIMARY YET 237 | ForwardTest: SENDER IS NOT CURRENT PRIMARY 238 | ... Appends done ... 239 | ... Passed 240 | Test: Old primary does not serve Gets ... 241 | GetTest: NOT THE PRIMARY YET 242 | ... Passed 243 | Test: Partitioned old primary does not complete Gets ... 244 | GetTest: NOT THE PRIMARY YET 245 | ... Passed 246 | PASS 247 | ok pbservice 111.461s 248 | Test: Single primary, no backup ... 249 | ... Passed 250 | Test: Add a backup ... 251 | ... Passed 252 | Test: Count RPCs to viewserver ... 253 | ... Passed 254 | Test: Primary failure ... 255 | ... Passed 256 | Test: Kill last server, new one should not be active ... 257 | ... Passed 258 | Test: at-most-once Append; unreliable ... 259 | ... Passed 260 | Test: Put() immediately after backup failure ... 261 | ... Passed 262 | Test: Put() immediately after primary failure ... 263 | ... Passed 264 | Test: Concurrent Put()s to the same key ... 265 | ... Passed 266 | Test: Concurrent Append()s to the same key ... 267 | ... Passed 268 | Test: Concurrent Put()s to the same key; unreliable ... 269 | ... Passed 270 | Test: Repeated failures/restarts ... 271 | GetTest: NOT THE PRIMARY YET 272 | GetTest: NOT THE PRIMARY YET 273 | ... Put/Gets done ... 274 | ... Passed 275 | Test: Repeated failures/restarts with concurrent updates to same key; unreliable ... 276 | ForwardTest: SENDER IS NOT CURRENT PRIMARY 277 | ForwardTest: SENDER IS NOT CURRENT PRIMARY 278 | ... Appends done ... 279 | ... Passed 280 | Test: Old primary does not serve Gets ... 281 | GetTest: NOT THE PRIMARY YET 282 | ... Passed 283 | Test: Partitioned old primary does not complete Gets ... 284 | GetTest: NOT THE PRIMARY YET 285 | ... Passed 286 | PASS 287 | ok pbservice 111.965s 288 | Test: Single primary, no backup ... 289 | ... Passed 290 | Test: Add a backup ... 291 | ... Passed 292 | Test: Count RPCs to viewserver ... 293 | ... Passed 294 | Test: Primary failure ... 295 | ... Passed 296 | Test: Kill last server, new one should not be active ... 297 | ... Passed 298 | Test: at-most-once Append; unreliable ... 299 | ... Passed 300 | Test: Put() immediately after backup failure ... 301 | ... Passed 302 | Test: Put() immediately after primary failure ... 303 | ... Passed 304 | Test: Concurrent Put()s to the same key ... 305 | ... Passed 306 | Test: Concurrent Append()s to the same key ... 307 | ... Passed 308 | Test: Concurrent Put()s to the same key; unreliable ... 309 | ... Passed 310 | Test: Repeated failures/restarts ... 311 | ForwardTest: SENDER IS NOT CURRENT PRIMARY 312 | ForwardTest: SENDER IS NOT CURRENT PRIMARY 313 | GetTest: NOT THE PRIMARY YET 314 | GetTest: NOT THE PRIMARY YET 315 | ForwardTest: SENDER IS NOT CURRENT PRIMARY 316 | ... Put/Gets done ... 317 | ... Passed 318 | Test: Repeated failures/restarts with concurrent updates to same key; unreliable ... 319 | ForwardTest: SENDER IS NOT CURRENT PRIMARY 320 | PutAppendTest: NOT THE PRIMARY YET 321 | PutAppendTest: NOT THE PRIMARY YET 322 | ForwardTest: SENDER IS NOT CURRENT PRIMARY 323 | PutAppendTest: NOT THE PRIMARY YET 324 | PutAppendTest: NOT THE PRIMARY YET 325 | PutAppendTest: NOT THE PRIMARY YET 326 | PutAppendTest: NOT THE PRIMARY YET 327 | ForwardTest: SENDER IS NOT CURRENT PRIMARY 328 | ... Appends done ... 329 | ... Passed 330 | Test: Old primary does not serve Gets ... 331 | GetTest: NOT THE PRIMARY YET 332 | ... Passed 333 | Test: Partitioned old primary does not complete Gets ... 334 | GetTest: NOT THE PRIMARY YET 335 | ... Passed 336 | PASS 337 | ok pbservice 111.465s 338 | Test: Single primary, no backup ... 339 | ... Passed 340 | Test: Add a backup ... 341 | ... Passed 342 | Test: Count RPCs to viewserver ... 343 | ... Passed 344 | Test: Primary failure ... 345 | ... Passed 346 | Test: Kill last server, new one should not be active ... 347 | ... Passed 348 | Test: at-most-once Append; unreliable ... 349 | ... Passed 350 | Test: Put() immediately after backup failure ... 351 | ... Passed 352 | Test: Put() immediately after primary failure ... 353 | PutAppendTest: NOT THE PRIMARY YET 354 | ... Passed 355 | Test: Concurrent Put()s to the same key ... 356 | ... Passed 357 | Test: Concurrent Append()s to the same key ... 358 | ... Passed 359 | Test: Concurrent Put()s to the same key; unreliable ... 360 | ... Passed 361 | Test: Repeated failures/restarts ... 362 | PutAppendTest: NOT THE PRIMARY YET 363 | PutAppendTest: NOT THE PRIMARY YET 364 | GetTest: NOT THE PRIMARY YET 365 | GetTest: NOT THE PRIMARY YET 366 | GetTest: NOT THE PRIMARY YET 367 | ForwardTest: SENDER IS NOT CURRENT PRIMARY 368 | ForwardTest: SENDER IS NOT CURRENT PRIMARY 369 | ... Put/Gets done ... 370 | ... Passed 371 | Test: Repeated failures/restarts with concurrent updates to same key; unreliable ... 372 | PutAppendTest: NOT THE PRIMARY YET 373 | ForwardTest: SENDER IS NOT CURRENT PRIMARY 374 | PutAppendTest: NOT THE PRIMARY YET 375 | ForwardTest: SENDER IS NOT CURRENT PRIMARY 376 | ForwardTest: SENDER IS NOT CURRENT PRIMARY 377 | ... Appends done ... 378 | ... Passed 379 | Test: Old primary does not serve Gets ... 380 | GetTest: NOT THE PRIMARY YET 381 | ... Passed 382 | Test: Partitioned old primary does not complete Gets ... 383 | GetTest: NOT THE PRIMARY YET 384 | ... Passed 385 | PASS 386 | ok pbservice 111.567s 387 | Test: Single primary, no backup ... 388 | ... Passed 389 | Test: Add a backup ... 390 | ... Passed 391 | Test: Count RPCs to viewserver ... 392 | ... Passed 393 | Test: Primary failure ... 394 | ... Passed 395 | Test: Kill last server, new one should not be active ... 396 | ... Passed 397 | Test: at-most-once Append; unreliable ... 398 | ... Passed 399 | Test: Put() immediately after backup failure ... 400 | ... Passed 401 | Test: Put() immediately after primary failure ... 402 | ... Passed 403 | Test: Concurrent Put()s to the same key ... 404 | ... Passed 405 | Test: Concurrent Append()s to the same key ... 406 | ... Passed 407 | Test: Concurrent Put()s to the same key; unreliable ... 408 | ... Passed 409 | Test: Repeated failures/restarts ... 410 | GetTest: NOT THE PRIMARY YET 411 | GetTest: NOT THE PRIMARY YET 412 | ... Put/Gets done ... 413 | ... Passed 414 | Test: Repeated failures/restarts with concurrent updates to same key; unreliable ... 415 | ForwardTest: SENDER IS NOT CURRENT PRIMARY 416 | ForwardTest: SENDER IS NOT CURRENT PRIMARY 417 | ... Appends done ... 418 | ... Passed 419 | Test: Old primary does not serve Gets ... 420 | GetTest: NOT THE PRIMARY YET 421 | ... Passed 422 | Test: Partitioned old primary does not complete Gets ... 423 | GetTest: NOT THE PRIMARY YET 424 | ... Passed 425 | PASS 426 | ok pbservice 111.108s 427 | --------------------------------------------------------------------------------