├── hw1
├── pkg
│ └── mod
│ │ └── cache
│ │ └── lock
├── .idea
│ ├── .gitignore
│ ├── misc.xml
│ ├── vcs.xml
│ ├── modules.xml
│ └── CS188-hw1.iml
├── src
│ ├── .DS_Store
│ ├── main
│ │ ├── .DS_Store
│ │ ├── mr-testout.txt
│ │ ├── test-wc.sh
│ │ └── wc.go
│ ├── .gitignore
│ └── mapreduce
│ │ ├── common.go
│ │ ├── worker.go
│ │ ├── master.go
│ │ ├── test_test.go
│ │ └── mapreduce.go
├── gitpush.sh
└── README.md
├── hw2
├── .gitignore
├── .idea
│ ├── .gitignore
│ ├── misc.xml
│ ├── vcs.xml
│ ├── modules.xml
│ └── hw2.iml
└── src
│ ├── main
│ ├── viewd.go
│ ├── pbd.go
│ └── pbc.go
│ ├── pbservice
│ ├── test.sh
│ ├── common.go
│ ├── client.go
│ ├── server.go
│ └── out.txt
│ └── viewservice
│ ├── client.go
│ ├── common.go
│ ├── test_test.go
│ └── server.go
├── hw3
├── .idea
│ ├── .gitignore
│ ├── misc.xml
│ ├── vcs.xml
│ ├── modules.xml
│ └── hw3.iml
└── src
│ ├── .DS_Store
│ ├── paxos
│ ├── .DS_Store
│ ├── test.sh
│ └── common.go
│ ├── kvpaxos
│ ├── out.txt
│ ├── test.sh
│ ├── common.go
│ ├── client.go
│ └── server.go
│ ├── main
│ ├── viewd.go
│ ├── pbd.go
│ └── pbc.go
│ ├── pbservice
│ ├── common.go
│ ├── client.go
│ └── server.go
│ └── viewservice
│ ├── client.go
│ ├── common.go
│ ├── server.go
│ └── test_test.go
├── hw4
├── .idea
│ ├── .gitignore
│ ├── misc.xml
│ ├── vcs.xml
│ ├── modules.xml
│ └── hw4.iml
└── src
│ ├── main
│ ├── viewd.go
│ ├── pbd.go
│ └── pbc.go
│ ├── shardkv
│ ├── test_linux.sh
│ ├── test.sh
│ ├── common.go
│ ├── client.go
│ └── test_test.go
│ ├── pbservice
│ ├── common.go
│ ├── client.go
│ └── server.go
│ ├── paxos
│ └── common.go
│ ├── kvpaxos
│ ├── common.go
│ ├── client.go
│ └── server.go
│ ├── shardmaster
│ ├── common.go
│ ├── client.go
│ ├── test_test.go
│ └── server.go
│ └── viewservice
│ ├── client.go
│ ├── common.go
│ ├── server.go
│ └── test_test.go
├── gitpush.sh
└── README.md
/hw1/pkg/mod/cache/lock:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/hw2/.gitignore:
--------------------------------------------------------------------------------
1 | */out.txt
2 |
--------------------------------------------------------------------------------
/hw1/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /workspace.xml
3 |
--------------------------------------------------------------------------------
/hw2/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /workspace.xml
3 |
--------------------------------------------------------------------------------
/hw3/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /workspace.xml
3 |
--------------------------------------------------------------------------------
/hw4/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /workspace.xml
3 |
--------------------------------------------------------------------------------
/hw1/src/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AdrianHsu/MIT-6.824-Lab/HEAD/hw1/src/.DS_Store
--------------------------------------------------------------------------------
/hw3/src/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AdrianHsu/MIT-6.824-Lab/HEAD/hw3/src/.DS_Store
--------------------------------------------------------------------------------
/hw1/src/main/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AdrianHsu/MIT-6.824-Lab/HEAD/hw1/src/main/.DS_Store
--------------------------------------------------------------------------------
/hw3/src/paxos/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AdrianHsu/MIT-6.824-Lab/HEAD/hw3/src/paxos/.DS_Store
--------------------------------------------------------------------------------
/hw1/src/.gitignore:
--------------------------------------------------------------------------------
1 | mrtmp.*
2 | /main/diff.out
3 | /mapreduce/x.txt
4 | /pbservice/x.txt
5 | /kvpaxos/x.txt
6 |
--------------------------------------------------------------------------------
/hw3/src/kvpaxos/out.txt:
--------------------------------------------------------------------------------
1 | round: 1
2 | round: 2
3 | round: 3
4 | round: 4
5 | round: 5
6 | round: 6
7 | round: 7
8 |
--------------------------------------------------------------------------------
/gitpush.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | DATE=$(date)
4 | #echo "$DATE"
5 |
6 | git add .
7 | git commit -m "[UPDATED] $DATE :bulb:" # :tada:
8 | git push
9 |
--------------------------------------------------------------------------------
/hw1/gitpush.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | DATE=$(date)
4 | #echo "$DATE"
5 |
6 | git add .
7 | git commit -m "[UPDATED] $DATE :bulb:" # :tada:
8 | git push
9 |
--------------------------------------------------------------------------------
/hw1/src/main/mr-testout.txt:
--------------------------------------------------------------------------------
1 | unto: 8940
2 | he: 9666
3 | shall: 9760
4 | in: 12334
5 | that: 12577
6 | And: 12846
7 | to: 13384
8 | of: 34434
9 | and: 38850
10 | the: 62075
11 |
--------------------------------------------------------------------------------
/hw1/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/hw2/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/hw3/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/hw4/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/hw1/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/hw2/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/hw3/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/hw4/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/hw2/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/hw3/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/hw4/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/hw1/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/hw2/.idea/hw2.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/hw3/.idea/hw3.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/hw4/.idea/hw4.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/hw1/.idea/CS188-hw1.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/hw1/src/main/test-wc.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | go run wc.go master kjv12.txt sequential
3 | sort -n -k2 mrtmp.kjv12.txt | tail -10 | diff - mr-testout.txt > diff.out
4 | if [ -s diff.out ]
5 | then
6 | echo "Failed test. Output should be as in mr-testout.txt. Your output differs as follows (from diff.out):"
7 | cat diff.out
8 | else
9 | echo "Passed test"
10 | fi
11 |
12 |
--------------------------------------------------------------------------------
/hw2/src/main/viewd.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | //
4 | // see directions in pbc.go
5 | //
6 |
7 | import "time"
8 | import "viewservice"
9 | import "os"
10 | import "fmt"
11 |
12 | func main() {
13 | if len(os.Args) != 2 {
14 | fmt.Printf("Usage: viewd port\n")
15 | os.Exit(1)
16 | }
17 |
18 | viewservice.StartServer(os.Args[1])
19 |
20 | for {
21 | time.Sleep(100 * time.Second)
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/hw3/src/main/viewd.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | //
4 | // see directions in pbc.go
5 | //
6 |
7 | import "time"
8 | import "viewservice"
9 | import "os"
10 | import "fmt"
11 |
12 | func main() {
13 | if len(os.Args) != 2 {
14 | fmt.Printf("Usage: viewd port\n")
15 | os.Exit(1)
16 | }
17 |
18 | viewservice.StartServer(os.Args[1])
19 |
20 | for {
21 | time.Sleep(100 * time.Second)
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/hw4/src/main/viewd.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | //
4 | // see directions in pbc.go
5 | //
6 |
7 | import "time"
8 | import "viewservice"
9 | import "os"
10 | import "fmt"
11 |
12 | func main() {
13 | if len(os.Args) != 2 {
14 | fmt.Printf("Usage: viewd port\n")
15 | os.Exit(1)
16 | }
17 |
18 | viewservice.StartServer(os.Args[1])
19 |
20 | for {
21 | time.Sleep(100 * time.Second)
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/hw2/src/main/pbd.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | //
4 | // see directions in pbc.go
5 | //
6 |
7 | import "time"
8 | import "pbservice"
9 | import "os"
10 | import "fmt"
11 |
12 | func main() {
13 | if len(os.Args) != 3 {
14 | fmt.Printf("Usage: pbd viewport myport\n")
15 | os.Exit(1)
16 | }
17 |
18 | pbservice.StartServer(os.Args[1], os.Args[2])
19 |
20 | for {
21 | time.Sleep(100 * time.Second)
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/hw3/src/main/pbd.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | //
4 | // see directions in pbc.go
5 | //
6 |
7 | import "time"
8 | import "pbservice"
9 | import "os"
10 | import "fmt"
11 |
12 | func main() {
13 | if len(os.Args) != 3 {
14 | fmt.Printf("Usage: pbd viewport myport\n")
15 | os.Exit(1)
16 | }
17 |
18 | pbservice.StartServer(os.Args[1], os.Args[2])
19 |
20 | for {
21 | time.Sleep(100 * time.Second)
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/hw4/src/main/pbd.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | //
4 | // see directions in pbc.go
5 | //
6 |
7 | import "time"
8 | import "pbservice"
9 | import "os"
10 | import "fmt"
11 |
12 | func main() {
13 | if len(os.Args) != 3 {
14 | fmt.Printf("Usage: pbd viewport myport\n")
15 | os.Exit(1)
16 | }
17 |
18 | pbservice.StartServer(os.Args[1], os.Args[2])
19 |
20 | for {
21 | time.Sleep(100 * time.Second)
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/hw3/src/kvpaxos/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | rm out.txt
4 |
5 | for i in {1..60}
6 |
7 | do
8 | echo "round: $i" | tee -a ./out.txt
9 | go test | tee -a out.txt
10 | sed -i '' '/unexpected EOF/d' ./out.txt
11 | sed -i '' '/write unix/d' ./out.txt
12 | sed -i '' '/read unix/d' ./out.txt
13 | sed -i '' '/connection is/d' ./out.txt
14 | sed -i '' '/rpc.Register/d' ./out.txt
15 | sed -i '' '/paxos Dial() failed/d' ./out.txt
16 | done
17 | cat out.txt
18 |
--------------------------------------------------------------------------------
/hw3/src/paxos/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | rm out.txt
4 |
5 | for i in {1..60}
6 |
7 | do
8 | echo "round: $i" | tee -a ./out.txt
9 | go test | tee -a out.txt
10 | sed -i '' '/unexpected EOF/d' ./out.txt
11 | sed -i '' '/write unix/d' ./out.txt
12 | sed -i '' '/read unix/d' ./out.txt
13 | sed -i '' '/connection is/d' ./out.txt
14 | sed -i '' '/rpc.Register/d' ./out.txt
15 | sed -i '' '/paxos Dial() failed/d' ./out.txt
16 | done
17 | cat out.txt
18 |
--------------------------------------------------------------------------------
/hw2/src/pbservice/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | rm out.txt
4 |
5 | for i in {1..10};
6 | do go test | tee -a out.txt; done
7 |
8 | sed -i '' '/ForwardTest: NOT CURRENT PRIMARY/d' ./out.txt
9 | sed -i '' '/PutAppend: NOT THE PRIMARY YET/d' ./out.txt
10 |
11 | sed -i '' '/unexpected EOF/d' ./out.txt
12 | sed -i '' '/write unix/d' ./out.txt
13 | sed -i '' '/connection is shut down/d' ./out.txt
14 | sed -i '' '/rpc.Register/d' ./out.txt
15 | sed -i '' '/connection reset by peer/d' ./out.txt
16 |
--------------------------------------------------------------------------------
/hw4/src/shardkv/test_linux.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | rm out.txt
4 |
5 | for i in {1..1}
6 |
7 | do
8 | echo "round: $i" | tee -a ./out.txt
9 | go test | tee -a out.txt
10 | sed -i '/unexpected EOF/d' ./out.txt
11 | sed -i '/write unix/d' ./out.txt
12 | sed -i '/read unix/d' ./out.txt
13 | sed -i '/connection is/d' ./out.txt
14 | sed -i '/rpc.Register/d' ./out.txt
15 | sed -i '/paxos Dial() failed/d' ./out.txt
16 | sed -i '/ShardKV:/d' ./out.txt
17 | sed -i '/ShardMaster:/d' ./out.txt
18 | sed -i '/Timeout:/d' ./out.txt
19 | done
20 | cat out.txt
21 |
--------------------------------------------------------------------------------
/hw4/src/shardkv/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | rm out.txt
4 |
5 | for i in {1..1}
6 |
7 | do
8 | echo "round: $i" | tee -a ./out.txt
9 | go test | tee -a out.txt
10 | sed -i '' '/unexpected EOF/d' ./out.txt
11 | sed -i '' '/write unix/d' ./out.txt
12 | sed -i '' '/read unix/d' ./out.txt
13 | sed -i '' '/connection is/d' ./out.txt
14 | sed -i '' '/rpc.Register/d' ./out.txt
15 | sed -i '' '/paxos Dial() failed/d' ./out.txt
16 | sed -i '' '/ShardKV:/d' ./out.txt
17 | sed -i '' '/ShardMaster:/d' ./out.txt
18 | sed -i '' '/Timeout:/d' ./out.txt
19 | done
20 | cat out.txt
21 |
--------------------------------------------------------------------------------
/hw3/src/pbservice/common.go:
--------------------------------------------------------------------------------
1 | package pbservice
2 |
3 | const (
4 | OK = "OK"
5 | ErrNoKey = "ErrNoKey"
6 | ErrWrongServer = "ErrWrongServer"
7 | )
8 |
9 | type Err string
10 |
11 | // Put or Append
12 | type PutAppendArgs struct {
13 | Key string
14 | Value string
15 | // You'll have to add definitions here.
16 |
17 | // Field names must start with capital letters,
18 | // otherwise RPC will break.
19 | }
20 |
21 | type PutAppendReply struct {
22 | Err Err
23 | }
24 |
25 | type GetArgs struct {
26 | Key string
27 | // You'll have to add definitions here.
28 | }
29 |
30 | type GetReply struct {
31 | Err Err
32 | Value string
33 | }
34 |
35 |
36 | // Your RPC definitions here.
37 |
--------------------------------------------------------------------------------
/hw4/src/pbservice/common.go:
--------------------------------------------------------------------------------
1 | package pbservice
2 |
3 | const (
4 | OK = "OK"
5 | ErrNoKey = "ErrNoKey"
6 | ErrWrongServer = "ErrWrongServer"
7 | )
8 |
9 | type Err string
10 |
11 | // Put or Append
12 | type PutAppendArgs struct {
13 | Key string
14 | Value string
15 | // You'll have to add definitions here.
16 |
17 | // Field names must start with capital letters,
18 | // otherwise RPC will break.
19 | }
20 |
21 | type PutAppendReply struct {
22 | Err Err
23 | }
24 |
25 | type GetArgs struct {
26 | Key string
27 | // You'll have to add definitions here.
28 | }
29 |
30 | type GetReply struct {
31 | Err Err
32 | Value string
33 | }
34 |
35 |
36 | // Your RPC definitions here.
37 |
--------------------------------------------------------------------------------
/hw3/src/kvpaxos/common.go:
--------------------------------------------------------------------------------
1 | package kvpaxos
2 |
3 | const (
4 | OK = "OK"
5 | ErrNoKey = "ErrNoKey"
6 | )
7 |
8 | type Err string
9 |
10 | // Put or Append
11 | type PutAppendArgs struct {
12 | // You'll have to add definitions here.
13 | Key string
14 | Value string
15 | Op string // "Put" or "Append"
16 | Hash int64
17 | // You'll have to add definitions here.
18 | // Field names must start with capital letters,
19 | // otherwise RPC will break.
20 | }
21 |
22 | type PutAppendReply struct {
23 | Err Err
24 | }
25 |
26 | type GetArgs struct {
27 | Key string
28 | Hash int64
29 | // You'll have to add definitions here.
30 | }
31 |
32 | type GetReply struct {
33 | Err Err
34 | Value string
35 | }
36 |
--------------------------------------------------------------------------------
/hw3/src/paxos/common.go:
--------------------------------------------------------------------------------
1 | package paxos
2 | // added by Adrian
3 |
4 | type Err string
5 |
6 |
7 | type PrepareArgs struct {
8 | Seq int
9 | N int
10 | }
11 |
12 | type PrepareReply struct {
13 | Err Err
14 | N int
15 | N_a int
16 | V_a interface{}
17 | Z_i int
18 | Higher_N int
19 | }
20 |
21 | type AcceptArgs struct {
22 | Seq int
23 | N int
24 | V_p interface{} // v prime
25 | }
26 |
27 | type AcceptReply struct {
28 | Err Err
29 | N int
30 | }
31 |
32 | type DecidedArgs struct {
33 | Seq int
34 | N int
35 | V_p interface{}
36 | }
37 |
38 | type DecidedReply struct {
39 | Err Err
40 | }
--------------------------------------------------------------------------------
/hw4/src/paxos/common.go:
--------------------------------------------------------------------------------
1 | package paxos
2 | // added by Adrian
3 |
4 | type Err string
5 |
6 |
7 | type PrepareArgs struct {
8 | Seq int
9 | N int
10 | }
11 |
12 | type PrepareReply struct {
13 | Err Err
14 | N int
15 | N_a int
16 | V_a interface{}
17 | Z_i int
18 | Higher_N int
19 | }
20 |
21 | type AcceptArgs struct {
22 | Seq int
23 | N int
24 | V_p interface{} // v prime
25 | }
26 |
27 | type AcceptReply struct {
28 | Err Err
29 | N int
30 | }
31 |
32 | type DecidedArgs struct {
33 | Seq int
34 | N int
35 | V_p interface{}
36 | }
37 |
38 | type DecidedReply struct {
39 | Err Err
40 | }
--------------------------------------------------------------------------------
/hw4/src/kvpaxos/common.go:
--------------------------------------------------------------------------------
1 | package kvpaxos
2 |
3 | const (
4 | OK = "OK"
5 | ErrNoKey = "ErrNoKey"
6 | )
7 |
8 | type Err string
9 |
10 | // Put or Append
11 | type PutAppendArgs struct {
12 | // You'll have to add definitions here.
13 | Key string
14 | Value string
15 | Op string // "Put" or "Append"
16 | // You'll have to add definitions here.
17 | // Field names must start with capital letters,
18 | // otherwise RPC will break.
19 | ClientID int64
20 | Seq int
21 | }
22 |
23 | type PutAppendReply struct {
24 | Err Err
25 | }
26 |
27 | type GetArgs struct {
28 | Key string
29 | Op string
30 | // You'll have to add definitions here.
31 | ClientID int64
32 | Seq int
33 | }
34 |
35 | type GetReply struct {
36 | Err Err
37 | Value string
38 | }
39 |
--------------------------------------------------------------------------------
/hw2/src/main/pbc.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | //
4 | // pbservice client application
5 | //
6 | // go build viewd.go
7 | // go build pbd.go
8 | // go build pbc.go
9 | // ./viewd /tmp/rtm-v &
10 | // ./pbd /tmp/rtm-v /tmp/rtm-1 &
11 | // ./pbd /tmp/rtm-v /tmp/rtm-2 &
12 | // ./pbc /tmp/rtm-v key1 value1
13 | // ./pbc /tmp/rtm-v key1
14 | //
15 | // change "rtm" to your user name.
16 | // start the pbd programs in separate windows, kill them
17 | // and then restart them to exercise fault tolerance.
18 | //
19 |
20 | import "pbservice"
21 | import "os"
22 | import "fmt"
23 |
24 | func usage() {
25 | fmt.Printf("Usage: pbc viewport key\n")
26 | fmt.Printf(" pbc viewport key value\n")
27 | os.Exit(1)
28 | }
29 |
30 | func main() {
31 | if len(os.Args) == 3 {
32 | // get
33 | ck := pbservice.MakeClerk(os.Args[1], "")
34 | v := ck.Get(os.Args[2])
35 | fmt.Printf("%v\n", v)
36 | } else if len(os.Args) == 4 {
37 | // put
38 | ck := pbservice.MakeClerk(os.Args[1], "")
39 | ck.Put(os.Args[2], os.Args[3])
40 | } else {
41 | usage()
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/hw3/src/main/pbc.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | //
4 | // pbservice client application
5 | //
6 | // go build viewd.go
7 | // go build pbd.go
8 | // go build pbc.go
9 | // ./viewd /tmp/rtm-v &
10 | // ./pbd /tmp/rtm-v /tmp/rtm-1 &
11 | // ./pbd /tmp/rtm-v /tmp/rtm-2 &
12 | // ./pbc /tmp/rtm-v key1 value1
13 | // ./pbc /tmp/rtm-v key1
14 | //
15 | // change "rtm" to your user name.
16 | // start the pbd programs in separate windows, kill them
17 | // and then restart them to exercise fault tolerance.
18 | //
19 |
20 | import "pbservice"
21 | import "os"
22 | import "fmt"
23 |
24 | func usage() {
25 | fmt.Printf("Usage: pbc viewport key\n")
26 | fmt.Printf(" pbc viewport key value\n")
27 | os.Exit(1)
28 | }
29 |
30 | func main() {
31 | if len(os.Args) == 3 {
32 | // get
33 | ck := pbservice.MakeClerk(os.Args[1], "")
34 | v := ck.Get(os.Args[2])
35 | fmt.Printf("%v\n", v)
36 | } else if len(os.Args) == 4 {
37 | // put
38 | ck := pbservice.MakeClerk(os.Args[1], "")
39 | ck.Put(os.Args[2], os.Args[3])
40 | } else {
41 | usage()
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/hw4/src/main/pbc.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | //
4 | // pbservice client application
5 | //
6 | // go build viewd.go
7 | // go build pbd.go
8 | // go build pbc.go
9 | // ./viewd /tmp/rtm-v &
10 | // ./pbd /tmp/rtm-v /tmp/rtm-1 &
11 | // ./pbd /tmp/rtm-v /tmp/rtm-2 &
12 | // ./pbc /tmp/rtm-v key1 value1
13 | // ./pbc /tmp/rtm-v key1
14 | //
15 | // change "rtm" to your user name.
16 | // start the pbd programs in separate windows, kill them
17 | // and then restart them to exercise fault tolerance.
18 | //
19 |
20 | import "pbservice"
21 | import "os"
22 | import "fmt"
23 |
24 | func usage() {
25 | fmt.Printf("Usage: pbc viewport key\n")
26 | fmt.Printf(" pbc viewport key value\n")
27 | os.Exit(1)
28 | }
29 |
30 | func main() {
31 | if len(os.Args) == 3 {
32 | // get
33 | ck := pbservice.MakeClerk(os.Args[1], "")
34 | v := ck.Get(os.Args[2])
35 | fmt.Printf("%v\n", v)
36 | } else if len(os.Args) == 4 {
37 | // put
38 | ck := pbservice.MakeClerk(os.Args[1], "")
39 | ck.Put(os.Args[2], os.Args[3])
40 | } else {
41 | usage()
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/hw2/src/pbservice/common.go:
--------------------------------------------------------------------------------
1 | package pbservice
2 |
3 | const (
4 | OK = "OK"
5 | ErrNoKey = "ErrNoKey"
6 | ErrWrongServer = "ErrWrongServer"
7 | )
8 |
9 | type Err string
10 |
11 | // Put or Append
12 | type PutAppendArgs struct {
13 | Key string
14 | Value string
15 | // You'll have to add definitions here.
16 | // Field names must start with capital letters,
17 | // otherwise RPC will break.
18 | Op string
19 | HashVal int64
20 | }
21 |
22 | type PutAppendReply struct {
23 | Err Err
24 | }
25 |
26 | // Put or Append
27 | type PutAppendSyncArgs struct {
28 | Key string
29 | Value string
30 | // You'll have to add definitions here.
31 | // Field names must start with capital letters,
32 | // otherwise RPC will break.
33 | Op string
34 | HashVal int64
35 | Primary string
36 |
37 | }
38 |
39 | type PutAppendSyncReply struct {
40 | Err Err
41 | }
42 |
43 | type GetArgs struct {
44 | Key string
45 | // You'll have to add definitions here.
46 | }
47 |
48 | type GetReply struct {
49 | Err Err
50 | Value string
51 | }
52 |
53 | type GetSyncArgs struct {
54 | Key string
55 | // You'll have to add definitions here.
56 | Primary string
57 | }
58 |
59 | type GetSyncReply struct {
60 | Err Err
61 | Value string
62 | }
63 |
64 | // Your RPC definitions here.
65 | type BootstrapArgs struct {
66 | Database map[string]string
67 | HashVals map[int64]bool
68 | }
69 |
70 | type BootstrapReply struct {
71 | Err Err
72 | }
--------------------------------------------------------------------------------
/hw4/src/shardkv/common.go:
--------------------------------------------------------------------------------
1 | package shardkv
2 |
3 | //
4 | // Sharded key/value server.
5 | // Lots of replica groups, each running op-at-a-time paxos.
6 | // Shardmaster decides which group serves each shard.
7 | // Shardmaster may change shard assignment from time to time.
8 | //
9 | // You will have to modify these definitions.
10 | //
11 |
12 | const (
13 | OK = "OK"
14 | ErrNoKey = "ErrNoKey"
15 | ErrWrongGroup = "ErrWrongGroup"
16 | ErrNotReady = "ErrNotReady"
17 | )
18 |
19 | type Err string
20 |
21 | type PutAppendArgs struct {
22 | Key string
23 | Value string
24 | Op string // "Put" or "Append"
25 |
26 | ID int64 // client ID (each client has an unique id)
27 | // client's seq. everytime it performs put/get/append
28 | // its seq will += 1
29 | Seq int
30 | ConfigNum int // Number in the clients' config
31 | Shard int // from 0 ~ 9. the index of shards
32 | }
33 |
34 | type PutAppendReply struct {
35 | Err Err
36 | }
37 |
38 | type GetArgs struct {
39 | Key string
40 |
41 | ID int64
42 | Seq int
43 | ConfigNum int
44 | Shard int
45 | }
46 |
47 | type GetReply struct {
48 | Err Err
49 | Value string
50 | }
51 |
52 | type BootstrapArgs struct {
53 | Shard int
54 | ConfigNum int
55 | }
56 |
57 | type BootstrapReply struct {
58 | ShardState ShardState
59 | ProducerGID int64
60 | ConfigNum int
61 | Shard int
62 | Err string
63 | }
64 |
65 | type ReconfigureArgs struct {
66 | NewConfigNum int
67 | }
--------------------------------------------------------------------------------
/hw4/src/shardmaster/common.go:
--------------------------------------------------------------------------------
1 | package shardmaster
2 |
3 | //
4 | // Master shard server: assigns shards to replication groups.
5 | //
6 | // RPC interface:
7 | // Join(gid, servers) -- replica group gid is joining, give it some shards.
8 | // Leave(gid) -- replica group gid is retiring, hand off all its shards.
9 | // Move(shard, gid) -- hand off one shard from current owner to gid.
10 | // Query(num) -> fetch Config # num, or latest config if num==-1.
11 | //
12 | // A Config (configuration) describes a set of replica groups, and the
13 | // replica group responsible for each shard. Configs are numbered. Config
14 | // #0 is the initial configuration, with no groups and all shards
15 | // assigned to group 0 (the invalid group).
16 | //
17 | // A GID is a replica group ID. GIDs must be uniqe and > 0.
18 | // Once a GID joins, and leaves, it should never join again.
19 | //
20 | // Please don't change this file.
21 | //
22 |
23 | const NShards = 10
24 |
25 | type Config struct {
26 | Num int // config number
27 | Shards [NShards]int64 // shard -> gid
28 | Groups map[int64][]string // gid -> servers[]
29 | }
30 |
31 | type JoinArgs struct {
32 | GID int64 // unique replica group ID
33 | Servers []string // group server ports
34 | }
35 |
36 | type JoinReply struct {
37 | }
38 |
39 | type LeaveArgs struct {
40 | GID int64
41 | }
42 |
43 | type LeaveReply struct {
44 | }
45 |
46 | type MoveArgs struct {
47 | Shard int
48 | GID int64
49 | }
50 |
51 | type MoveReply struct {
52 | }
53 |
54 | type QueryArgs struct {
55 | Num int // desired config number
56 | }
57 |
58 | type QueryReply struct {
59 | Config Config
60 | }
61 |
--------------------------------------------------------------------------------
/hw1/src/main/wc.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "os"
5 | "strconv"
6 | "strings"
7 | "unicode"
8 | )
9 | import "fmt"
10 | import "mapreduce"
11 |
12 | import "container/list"
13 |
14 | // our simplified version of MapReduce does not supply a
15 | // key to the Map function, as in the paper; only a value,
16 | // which is a part of the input file content. the return
17 | // value should be a list of key/value pairs, each represented
18 | // by a mapreduce.KeyValue.
19 | func Map(value string) *list.List {
20 | f := func(c rune) bool {
21 | return !unicode.IsLetter(c)
22 | }
23 | s := strings.FieldsFunc(value, f)
24 |
25 | li := list.New()
26 | for _, v := range s {
27 | //fmt.Printf("%v, %v\n", i, v)
28 | li.PushBack(mapreduce.KeyValue{v, "1"})
29 | }
30 | return li
31 | }
32 |
33 | // called once for each key generated by Map, with a list
34 | // of that key's string value. should return a single
35 | // output value for that key.
36 | func Reduce(key string, values *list.List) string {
37 | total := values.Len()
38 | return strconv.Itoa(total)
39 | }
40 |
41 | // Can be run in 3 ways:
42 | // 1) Sequential (e.g., go run wc.go master x.txt sequential)
43 | // 2) Master (e.g., go run wc.go master x.txt localhost:7777)
44 | // 3) Worker (e.g., go run wc.go worker localhost:7777 localhost:7778 &)
45 | func main() {
46 | if len(os.Args) != 4 {
47 | fmt.Printf("%v", os.Args)
48 | fmt.Printf("%s: see usage comments in file\n", os.Args[0])
49 | } else if os.Args[1] == "master" {
50 | if os.Args[3] == "sequential" {
51 | mapreduce.RunSingle(5, 3, os.Args[2], Map, Reduce)
52 | } else {
53 | mr := mapreduce.MakeMapReduce(5, 3, os.Args[2], os.Args[3])
54 | // Wait until MR is done
55 | <-mr.DoneChannel
56 | }
57 | } else {
58 | mapreduce.RunWorker(os.Args[2], os.Args[3], Map, Reduce, 100)
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/hw1/src/mapreduce/common.go:
--------------------------------------------------------------------------------
1 | package mapreduce
2 |
3 | import (
4 | "fmt"
5 | )
6 | import "net/rpc"
7 |
8 | const (
9 | Map = "Map"
10 | Reduce = "Reduce"
11 | )
12 |
13 | type JobType string
14 |
15 | // RPC arguments and replies. Field names must start with capital letters,
16 | // otherwise RPC will break.
17 |
18 | type DoJobArgs struct {
19 | File string
20 | Operation JobType
21 | JobNumber int // this job's number
22 | NumOtherPhase int // total number of jobs in other phase (map or reduce)
23 | // for example, if we are in Map Phase, then NumOtherPhase = nReduce = 50
24 | }
25 |
26 | type DoJobReply struct {
27 | OK bool
28 | }
29 |
30 | type ShutdownArgs struct {
31 | }
32 |
33 | type ShutdownReply struct {
34 | Njobs int
35 | OK bool
36 | }
37 |
38 | type RegisterArgs struct {
39 | Worker string
40 | }
41 |
42 | type RegisterReply struct {
43 | OK bool
44 | }
45 |
46 | //
47 | // call() sends an RPC to the rpcname handler on server srv
48 | // with arguments args, waits for the reply, and leaves the
49 | // reply in reply. the reply argument should be the address
50 | // of a reply structure.
51 | //
52 | // call() returns true if the server responded, and false
53 | // if call() was not able to contact the server. in particular,
54 | // reply's contents are valid if and only if call() returned true.
55 | //
56 | // you should assume that call() will time out and return an
57 | // error after a while if it doesn't get a reply from the server.
58 | //
59 | // please use call() to send all RPCs, in master.go, mapreduce.go,
60 | // and worker.go. please don't change this function.
61 | //
62 | func call(srv string, rpcname string,
63 | args interface{}, reply interface{}) bool {
64 | c, errx := rpc.Dial("unix", srv)
65 | if errx != nil {
66 | return false
67 | }
68 | defer c.Close()
69 |
70 | err := c.Call(rpcname, args, reply)
71 | if err == nil {
72 | return true
73 | }
74 |
75 | fmt.Println(err)
76 | return false
77 | }
78 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## MIT 6.824: Distributed Systems
2 |
3 | > 🏃 MIT 6.824 is where my journey of Distributed Systems began. All projects are my own individual works.
4 |
5 | ### TODO
6 |
7 | 1. MapReduce
8 | * ✅ (Mar 23, 2020)
9 | * Write a simple MapReduce program, and then Build a MapReduce library of which the master hands out jobs to workers, and handles failures of workers.
10 | 2. Primary Backup Replication Key/Value Service
11 | * ✅ (Jun 27, 2020)
12 | * Uses primary/backup replication, assisted by a view service that decides which machines are alive. The view service allows the primary/backup service to work correctly in the presence of network partitions. The view service itself is not replicated, and is a single point of failure.
13 | 3. Paxos-based Key/Value Service
14 | * ✅ (Jul 3, 2020)
15 | * Uses Paxos protocol to replicate the key/value database with no single point of failure, and handles network partitions correctly. This key/value service is slower than a non-replicated key/value server would be, but is fault tolerant.
16 | 4. Sharded Key/Value Service based on Paxos
17 | * ✅ (Jul 14, 2020)
18 | * A sharded key/value database where each shard replicates its state using Paxos. This service can perform Put/Get operations in parallel on different shards. It also has a replicated configuration service, which tells the shards what key range they are responsible for. It can change the assignment of keys to shards in response to changing load. This project has the core of a real-world design for thousands of servers.
19 |
20 | ### Course Description
21 |
22 | MIT 6.824 is a core 12-unit graduate subject with lectures, readings, programming labs, an optional project, a mid-term exam, and a final exam. It will present abstractions and implementation techniques for engineering distributed systems. Major topics include fault tolerance, replication, and consistency. Much of the class consists of studying and discussing case studies of distributed systems.
23 |
24 | ### References
25 | * [MIT 6.824 Spring 2015](http://nil.csail.mit.edu/6.824/2015/)
26 | * [UCLA CS 134 Spring 2020](http://web.cs.ucla.edu/~ravi/CS134_S20/)
27 |
--------------------------------------------------------------------------------
/hw2/src/viewservice/client.go:
--------------------------------------------------------------------------------
1 | package viewservice
2 |
3 | import "net/rpc"
4 | import "fmt"
5 |
6 | //
7 | // the viewservice Clerk lives in the client
8 | // and maintains a little state.
9 | //
10 | type Clerk struct {
11 | me string // client's name (host:port)
12 | server string // viewservice's host:port
13 | }
14 |
15 | func MakeClerk(me string, server string) *Clerk {
16 | ck := new(Clerk)
17 | ck.me = me
18 | ck.server = server
19 | return ck
20 | }
21 |
22 | //
23 | // call() sends an RPC to the rpcname handler on server srv
24 | // with arguments args, waits for the reply, and leaves the
25 | // reply in reply. the reply argument should be a pointer
26 | // to a reply structure.
27 | //
28 | // the return value is true if the server responded, and false
29 | // if call() was not able to contact the server. in particular,
30 | // the reply's contents are only valid if call() returned true.
31 | //
32 | // you should assume that call() will return an
33 | // error after a while if the server is dead.
34 | // don't provide your own time-out mechanism.
35 | //
36 | // please use call() to send all RPCs, in client.go and server.go.
37 | // please don't change this function.
38 | //
39 | func call(srv string, rpcname string,
40 | args interface{}, reply interface{}) bool {
41 | c, errx := rpc.Dial("unix", srv)
42 | if errx != nil {
43 | return false
44 | }
45 | defer c.Close()
46 |
47 | err := c.Call(rpcname, args, reply)
48 | if err == nil {
49 | return true
50 | }
51 |
52 | fmt.Println(err)
53 | return false
54 | }
55 |
56 | func (ck *Clerk) Ping(viewnum uint) (View, error) {
57 | // prepare the arguments.
58 | args := &PingArgs{}
59 | args.Me = ck.me
60 | args.Viewnum = viewnum
61 | var reply PingReply
62 |
63 | // send an RPC request, wait for the reply.
64 | ok := call(ck.server, "ViewServer.Ping", args, &reply)
65 | if ok == false {
66 | return View{}, fmt.Errorf("Ping(%v) failed", viewnum)
67 | }
68 |
69 | return reply.View, nil
70 | }
71 |
72 | func (ck *Clerk) Get() (View, bool) {
73 | args := &GetArgs{}
74 | var reply GetReply
75 | ok := call(ck.server, "ViewServer.Get", args, &reply)
76 | if ok == false {
77 | return View{}, false
78 | }
79 | return reply.View, true
80 | }
81 |
82 | func (ck *Clerk) Primary() string {
83 | v, ok := ck.Get()
84 | if ok {
85 | return v.Primary
86 | }
87 | return ""
88 | }
89 |
--------------------------------------------------------------------------------
/hw3/src/viewservice/client.go:
--------------------------------------------------------------------------------
1 | package viewservice
2 |
3 | import "net/rpc"
4 | import "fmt"
5 |
6 | //
7 | // the viewservice Clerk lives in the client
8 | // and maintains a little state.
9 | //
10 | type Clerk struct {
11 | me string // client's name (host:port)
12 | server string // viewservice's host:port
13 | }
14 |
15 | func MakeClerk(me string, server string) *Clerk {
16 | ck := new(Clerk)
17 | ck.me = me
18 | ck.server = server
19 | return ck
20 | }
21 |
22 | //
23 | // call() sends an RPC to the rpcname handler on server srv
24 | // with arguments args, waits for the reply, and leaves the
25 | // reply in reply. the reply argument should be a pointer
26 | // to a reply structure.
27 | //
28 | // the return value is true if the server responded, and false
29 | // if call() was not able to contact the server. in particular,
30 | // the reply's contents are only valid if call() returned true.
31 | //
32 | // you should assume that call() will return an
33 | // error after a while if the server is dead.
34 | // don't provide your own time-out mechanism.
35 | //
36 | // please use call() to send all RPCs, in client.go and server.go.
37 | // please don't change this function.
38 | //
39 | func call(srv string, rpcname string,
40 | args interface{}, reply interface{}) bool {
41 | c, errx := rpc.Dial("unix", srv)
42 | if errx != nil {
43 | return false
44 | }
45 | defer c.Close()
46 |
47 | err := c.Call(rpcname, args, reply)
48 | if err == nil {
49 | return true
50 | }
51 |
52 | fmt.Println(err)
53 | return false
54 | }
55 |
56 | func (ck *Clerk) Ping(viewnum uint) (View, error) {
57 | // prepare the arguments.
58 | args := &PingArgs{}
59 | args.Me = ck.me
60 | args.Viewnum = viewnum
61 | var reply PingReply
62 |
63 | // send an RPC request, wait for the reply.
64 | ok := call(ck.server, "ViewServer.Ping", args, &reply)
65 | if ok == false {
66 | return View{}, fmt.Errorf("Ping(%v) failed", viewnum)
67 | }
68 |
69 | return reply.View, nil
70 | }
71 |
72 | func (ck *Clerk) Get() (View, bool) {
73 | args := &GetArgs{}
74 | var reply GetReply
75 | ok := call(ck.server, "ViewServer.Get", args, &reply)
76 | if ok == false {
77 | return View{}, false
78 | }
79 | return reply.View, true
80 | }
81 |
82 | func (ck *Clerk) Primary() string {
83 | v, ok := ck.Get()
84 | if ok {
85 | return v.Primary
86 | }
87 | return ""
88 | }
89 |
--------------------------------------------------------------------------------
/hw4/src/viewservice/client.go:
--------------------------------------------------------------------------------
1 | package viewservice
2 |
3 | import "net/rpc"
4 | import "fmt"
5 |
6 | //
7 | // the viewservice Clerk lives in the client
8 | // and maintains a little state.
9 | //
10 | type Clerk struct {
11 | me string // client's name (host:port)
12 | server string // viewservice's host:port
13 | }
14 |
15 | func MakeClerk(me string, server string) *Clerk {
16 | ck := new(Clerk)
17 | ck.me = me
18 | ck.server = server
19 | return ck
20 | }
21 |
22 | //
23 | // call() sends an RPC to the rpcname handler on server srv
24 | // with arguments args, waits for the reply, and leaves the
25 | // reply in reply. the reply argument should be a pointer
26 | // to a reply structure.
27 | //
28 | // the return value is true if the server responded, and false
29 | // if call() was not able to contact the server. in particular,
30 | // the reply's contents are only valid if call() returned true.
31 | //
32 | // you should assume that call() will return an
33 | // error after a while if the server is dead.
34 | // don't provide your own time-out mechanism.
35 | //
36 | // please use call() to send all RPCs, in client.go and server.go.
37 | // please don't change this function.
38 | //
39 | func call(srv string, rpcname string,
40 | args interface{}, reply interface{}) bool {
41 | c, errx := rpc.Dial("unix", srv)
42 | if errx != nil {
43 | return false
44 | }
45 | defer c.Close()
46 |
47 | err := c.Call(rpcname, args, reply)
48 | if err == nil {
49 | return true
50 | }
51 |
52 | fmt.Println(err)
53 | return false
54 | }
55 |
56 | func (ck *Clerk) Ping(viewnum uint) (View, error) {
57 | // prepare the arguments.
58 | args := &PingArgs{}
59 | args.Me = ck.me
60 | args.Viewnum = viewnum
61 | var reply PingReply
62 |
63 | // send an RPC request, wait for the reply.
64 | ok := call(ck.server, "ViewServer.Ping", args, &reply)
65 | if ok == false {
66 | return View{}, fmt.Errorf("Ping(%v) failed", viewnum)
67 | }
68 |
69 | return reply.View, nil
70 | }
71 |
72 | func (ck *Clerk) Get() (View, bool) {
73 | args := &GetArgs{}
74 | var reply GetReply
75 | ok := call(ck.server, "ViewServer.Get", args, &reply)
76 | if ok == false {
77 | return View{}, false
78 | }
79 | return reply.View, true
80 | }
81 |
82 | func (ck *Clerk) Primary() string {
83 | v, ok := ck.Get()
84 | if ok {
85 | return v.Primary
86 | }
87 | return ""
88 | }
89 |
--------------------------------------------------------------------------------
/hw2/src/viewservice/common.go:
--------------------------------------------------------------------------------
1 | package viewservice
2 |
3 | import "time"
4 |
5 | //
6 | // This is a non-replicated view service for a simple
7 | // primary/backup system.
8 | //
9 | // The view service goes through a sequence of numbered
10 | // views, each with a primary and (if possible) a backup.
11 | // A view consists of a view number and the host:port of
12 | // the view's primary and backup p/b servers.
13 | //
14 | // The primary in a view is always either the primary
15 | // or the backup of the previous view (in order to ensure
16 | // that the p/b service's state is preserved).
17 | //
18 | // Each p/b server should send a Ping RPC once per PingInterval.
19 | // The view server replies with a description of the current
20 | // view. The Pings let the view server know that the p/b
21 | // server is still alive; inform the p/b server of the current
22 | // view; and inform the view server of the most recent view
23 | // that the p/b server knows about.
24 | //
25 | // The view server proceeds to a new view when either it hasn't
26 | // received a ping from the primary or backup for a while, or
27 | // if there was no backup and a new server starts Pinging.
28 | //
29 | // The view server will not proceed to a new view until
30 | // the primary from the current view acknowledges
31 | // that it is operating in the current view. This helps
32 | // ensure that there's at most one p/b primary operating at
33 | // a time.
34 | //
35 |
36 | type View struct {
37 | Viewnum uint
38 | Primary string
39 | Backup string
40 | }
41 |
42 | // clients should send a Ping RPC this often,
43 | // to tell the viewservice that the client is alive.
44 | const PingInterval = time.Millisecond * 100
45 |
46 | // the viewserver will declare a client dead if it misses
47 | // this many Ping RPCs in a row.
48 | const DeadPings = 5
49 |
50 | //
51 | // Ping(): called by a primary/backup server to tell the
52 | // view service it is alive, to indicate whether p/b server
53 | // has seen the latest view, and for p/b server to learn
54 | // the latest view.
55 | //
56 | // If Viewnum is zero, the caller is signalling that it is
57 | // alive and could become backup if needed.
58 | //
59 |
60 | type PingArgs struct {
61 | Me string // "host:port"
62 | Viewnum uint // caller's notion of current view #
63 | }
64 |
65 | type PingReply struct {
66 | View View
67 | }
68 |
69 | //
70 | // Get(): fetch the current view, without volunteering
71 | // to be a server. mostly for clients of the p/b service,
72 | // and for testing.
73 | //
74 |
75 | type GetArgs struct {
76 | }
77 |
78 | type GetReply struct {
79 | View View
80 | }
81 |
--------------------------------------------------------------------------------
/hw3/src/viewservice/common.go:
--------------------------------------------------------------------------------
1 | package viewservice
2 |
3 | import "time"
4 |
5 | //
6 | // This is a non-replicated view service for a simple
7 | // primary/backup system.
8 | //
9 | // The view service goes through a sequence of numbered
10 | // views, each with a primary and (if possible) a backup.
11 | // A view consists of a view number and the host:port of
12 | // the view's primary and backup p/b servers.
13 | //
14 | // The primary in a view is always either the primary
15 | // or the backup of the previous view (in order to ensure
16 | // that the p/b service's state is preserved).
17 | //
18 | // Each p/b server should send a Ping RPC once per PingInterval.
19 | // The view server replies with a description of the current
20 | // view. The Pings let the view server know that the p/b
21 | // server is still alive; inform the p/b server of the current
22 | // view; and inform the view server of the most recent view
23 | // that the p/b server knows about.
24 | //
25 | // The view server proceeds to a new view when either it hasn't
26 | // received a ping from the primary or backup for a while, or
27 | // if there was no backup and a new server starts Pinging.
28 | //
29 | // The view server will not proceed to a new view until
30 | // the primary from the current view acknowledges
31 | // that it is operating in the current view. This helps
32 | // ensure that there's at most one p/b primary operating at
33 | // a time.
34 | //
35 |
36 | type View struct {
37 | Viewnum uint
38 | Primary string
39 | Backup string
40 | }
41 |
42 | // clients should send a Ping RPC this often,
43 | // to tell the viewservice that the client is alive.
44 | const PingInterval = time.Millisecond * 100
45 |
46 | // the viewserver will declare a client dead if it misses
47 | // this many Ping RPCs in a row.
48 | const DeadPings = 5
49 |
50 | //
51 | // Ping(): called by a primary/backup server to tell the
52 | // view service it is alive, to indicate whether p/b server
53 | // has seen the latest view, and for p/b server to learn
54 | // the latest view.
55 | //
56 | // If Viewnum is zero, the caller is signalling that it is
57 | // alive and could become backup if needed.
58 | //
59 |
60 | type PingArgs struct {
61 | Me string // "host:port"
62 | Viewnum uint // caller's notion of current view #
63 | }
64 |
65 | type PingReply struct {
66 | View View
67 | }
68 |
69 | //
70 | // Get(): fetch the current view, without volunteering
71 | // to be a server. mostly for clients of the p/b service,
72 | // and for testing.
73 | //
74 |
75 | type GetArgs struct {
76 | }
77 |
78 | type GetReply struct {
79 | View View
80 | }
81 |
--------------------------------------------------------------------------------
/hw4/src/viewservice/common.go:
--------------------------------------------------------------------------------
1 | package viewservice
2 |
3 | import "time"
4 |
5 | //
6 | // This is a non-replicated view service for a simple
7 | // primary/backup system.
8 | //
9 | // The view service goes through a sequence of numbered
10 | // views, each with a primary and (if possible) a backup.
11 | // A view consists of a view number and the host:port of
12 | // the view's primary and backup p/b servers.
13 | //
14 | // The primary in a view is always either the primary
15 | // or the backup of the previous view (in order to ensure
16 | // that the p/b service's state is preserved).
17 | //
18 | // Each p/b server should send a Ping RPC once per PingInterval.
19 | // The view server replies with a description of the current
20 | // view. The Pings let the view server know that the p/b
21 | // server is still alive; inform the p/b server of the current
22 | // view; and inform the view server of the most recent view
23 | // that the p/b server knows about.
24 | //
25 | // The view server proceeds to a new view when either it hasn't
26 | // received a ping from the primary or backup for a while, or
27 | // if there was no backup and a new server starts Pinging.
28 | //
29 | // The view server will not proceed to a new view until
30 | // the primary from the current view acknowledges
31 | // that it is operating in the current view. This helps
32 | // ensure that there's at most one p/b primary operating at
33 | // a time.
34 | //
35 |
36 | type View struct {
37 | Viewnum uint
38 | Primary string
39 | Backup string
40 | }
41 |
42 | // clients should send a Ping RPC this often,
43 | // to tell the viewservice that the client is alive.
44 | const PingInterval = time.Millisecond * 100
45 |
46 | // the viewserver will declare a client dead if it misses
47 | // this many Ping RPCs in a row.
48 | const DeadPings = 5
49 |
50 | //
51 | // Ping(): called by a primary/backup server to tell the
52 | // view service it is alive, to indicate whether p/b server
53 | // has seen the latest view, and for p/b server to learn
54 | // the latest view.
55 | //
56 | // If Viewnum is zero, the caller is signalling that it is
57 | // alive and could become backup if needed.
58 | //
59 |
60 | type PingArgs struct {
61 | Me string // "host:port"
62 | Viewnum uint // caller's notion of current view #
63 | }
64 |
65 | type PingReply struct {
66 | View View
67 | }
68 |
69 | //
70 | // Get(): fetch the current view, without volunteering
71 | // to be a server. mostly for clients of the p/b service,
72 | // and for testing.
73 | //
74 |
75 | type GetArgs struct {
76 | }
77 |
78 | type GetReply struct {
79 | View View
80 | }
81 |
--------------------------------------------------------------------------------
/hw1/src/mapreduce/worker.go:
--------------------------------------------------------------------------------
1 | package mapreduce
2 |
3 | import (
4 | "fmt"
5 | )
6 | import "os"
7 | import "log"
8 | import "net/rpc"
9 | import "net"
10 | import "container/list"
11 |
12 | // Worker is a server waiting for DoJob or Shutdown RPCs
13 |
14 | type Worker struct {
15 | name string
16 | Reduce func(string, *list.List) string
17 | Map func(string) *list.List
18 | nRPC int
19 | nJobs int
20 | l net.Listener
21 | }
22 |
23 | // The master sent us a job
24 | func (wk *Worker) DoJob(arg *DoJobArgs, res *DoJobReply) error {
25 | fmt.Printf("Dojob %s job %d file %s operation %v N %d\n",
26 | wk.name, arg.JobNumber, arg.File, arg.Operation,
27 | arg.NumOtherPhase)
28 | switch arg.Operation {
29 | case Map:
30 | DoMap(arg.JobNumber, arg.File, arg.NumOtherPhase, wk.Map)
31 | case Reduce:
32 | DoReduce(arg.JobNumber, arg.File, arg.NumOtherPhase, wk.Reduce)
33 | }
34 | res.OK = true
35 | return nil
36 | }
37 |
38 | // The master is telling us to shutdown. Report the number of Jobs we
39 | // have processed.
40 | func (wk *Worker) Shutdown(args *ShutdownArgs, res *ShutdownReply) error {
41 | DPrintf("Shutdown %s\n", wk.name)
42 | res.Njobs = wk.nJobs
43 | res.OK = true
44 | wk.nRPC = 1 // OK, because the same thread reads nRPC
45 | wk.nJobs-- // Don't count the shutdown RPC
46 | return nil
47 | }
48 |
49 | // Tell the master we exist and ready to work
50 | func Register(master string, me string) {
51 | args := &RegisterArgs{}
52 | args.Worker = me
53 | var reply RegisterReply
54 | ok := call(master, "MapReduce.Register", args, &reply)
55 |
56 | if ok == false {
57 | fmt.Printf("Register: RPC %s register error\n", master)
58 | }
59 | }
60 |
61 | // Set up a connection with the master, register with the master,
62 | // and wait for jobs from the master
63 | func RunWorker(MasterAddress string, me string,
64 | MapFunc func(string) *list.List,
65 | ReduceFunc func(string, *list.List) string, nRPC int) {
66 | DPrintf("RunWorker %s\n", me)
67 | wk := new(Worker)
68 | wk.name = me
69 | wk.Map = MapFunc
70 | wk.Reduce = ReduceFunc
71 | wk.nRPC = nRPC
72 | rpcs := rpc.NewServer()
73 | rpcs.Register(wk)
74 | os.Remove(me) // only needed for "unix"
75 | l, e := net.Listen("unix", me)
76 | if e != nil {
77 | log.Fatal("RunWorker: worker ", me, " error: ", e)
78 | }
79 | wk.l = l
80 | Register(MasterAddress, me)
81 |
82 | // DON'T MODIFY CODE BELOW
83 | for wk.nRPC != 0 {
84 | conn, err := wk.l.Accept()
85 | if err == nil {
86 | wk.nRPC -= 1
87 | go rpcs.ServeConn(conn)
88 | wk.nJobs += 1
89 | } else {
90 | break
91 | }
92 | }
93 | wk.l.Close()
94 | DPrintf("RunWorker %s exit\n", me)
95 | }
96 |
--------------------------------------------------------------------------------
/hw1/README.md:
--------------------------------------------------------------------------------
1 | # CS188-hw1
2 | private repo
3 |
4 | # Setup
5 |
6 | ```sh
7 | (py3) adrianhsu:~/Desktop
8 | $ git clone --mirror https://github.com/S19-CS188/assignment1-skeleton.git
9 | Cloning into bare repository 'assignment1-skeleton.git'...
10 | remote: Enumerating objects: 15, done.
11 | remote: Total 15 (delta 0), reused 0 (delta 0), pack-reused 15
12 | Unpacking objects: 100% (15/15), done.
13 | (py3) adrianhsu:~/Desktop
14 | $ cd assignment1-skeleton.git/
15 | (py3) adrianhsu:~/Desktop/assignment1-skeleton.git (master)
16 | $ git push --mirror https://github.com/AdrianHsu/CS188-hw1.git
17 | Enumerating objects: 15, done.
18 | Counting objects: 100% (15/15), done.
19 | Delta compression using up to 4 threads
20 | Compressing objects: 100% (14/14), done.
21 | Writing objects: 100% (15/15), 1.37 MiB | 117.00 KiB/s, done.
22 | Total 15 (delta 0), reused 0 (delta 0)
23 | To https://github.com/AdrianHsu/CS188-hw1.git
24 | * [new branch] master -> master
25 | (py3) adrianhsu:~/Desktop/assignment1-skeleton.git (master)
26 | $ git remote set-url --push origin https://github.com/AdrianHsu/CS188-hw1.git
27 | (py3) adrianhsu:~/Desktop/assignment1-skeleton.git (master)
28 | $ cd ..
29 | (py3) adrianhsu:~/Desktop
30 | $ git clone https://github.com/AdrianHsu/CS188-hw1
31 | Cloning into 'CS188-hw1'...
32 | remote: Enumerating objects: 15, done.
33 | remote: Counting objects: 100% (15/15), done.
34 | remote: Compressing objects: 100% (14/14), done.
35 | remote: Total 15 (delta 0), reused 15 (delta 0), pack-reused 0
36 | Unpacking objects: 100% (15/15), done.
37 | (py3) adrianhsu:~/Desktop
38 | $ cd CS188-hw1/
39 | (py3) adrianhsu:~/Desktop/CS188-hw1 (master)
40 | $ ls
41 | src
42 | (py3) adrianhsu:~/Desktop/CS188-hw1 (master)
43 | $ git remote -v
44 | origin https://github.com/AdrianHsu/CS188-hw1 (fetch)
45 | origin https://github.com/AdrianHsu/CS188-hw1 (push)
46 | ```
47 |
48 |
49 |
50 | ## Compile
51 |
52 | ```sh
53 | (py3) adrianhsu:~/Desktop/CS188-hw1 (master)
54 | $ export GOPATH=$(pwd)
55 | (py3) adrianhsu:~/Desktop/CS188-hw1 (master)
56 | $ cd src/ma
57 | -bash: cd: src/ma: No such file or directory
58 | (py3) adrianhsu:~/Desktop/CS188-hw1 (master)
59 | $ cd src/main/
60 | (py3) adrianhsu:~/Desktop/CS188-hw1/src/main (master)
61 | $ go run wc.go master kjv12.txt sequential
62 | # command-line-arguments
63 | ./wc.go:15:1: missing return at end of function
64 | ./wc.go:21:1: missing return at end of function
65 | ```
66 |
67 | The compiler produces two errors, because the implementation of the `Map()` and `Reduce()` functions are incomplete.
68 |
69 |
70 | ## Submit
71 |
72 | ```sh
73 | ... Basic Test Passed
74 | ... One Failure Passed
75 | ... Many Failures Passed
76 | ```
77 |
78 |
79 |
--------------------------------------------------------------------------------
/hw3/src/pbservice/client.go:
--------------------------------------------------------------------------------
1 | package pbservice
2 |
3 | import "viewservice"
4 | import "net/rpc"
5 | import "fmt"
6 |
7 | import "crypto/rand"
8 | import "math/big"
9 |
10 |
11 | type Clerk struct {
12 | vs *viewservice.Clerk
13 | // Your declarations here
14 | }
15 |
16 | // this may come in handy.
17 | func nrand() int64 {
18 | max := big.NewInt(int64(1) << 62)
19 | bigx, _ := rand.Int(rand.Reader, max)
20 | x := bigx.Int64()
21 | return x
22 | }
23 |
24 | func MakeClerk(vshost string, me string) *Clerk {
25 | ck := new(Clerk)
26 | ck.vs = viewservice.MakeClerk(me, vshost)
27 | // Your ck.* initializations here
28 |
29 | return ck
30 | }
31 |
32 |
33 | //
34 | // call() sends an RPC to the rpcname handler on server srv
35 | // with arguments args, waits for the reply, and leaves the
36 | // reply in reply. the reply argument should be a pointer
37 | // to a reply structure.
38 | //
39 | // the return value is true if the server responded, and false
40 | // if call() was not able to contact the server. in particular,
41 | // the reply's contents are only valid if call() returned true.
42 | //
43 | // you should assume that call() will return an
44 | // error after a while if the server is dead.
45 | // don't provide your own time-out mechanism.
46 | //
47 | // please use call() to send all RPCs, in client.go and server.go.
48 | // please don't change this function.
49 | //
50 | func call(srv string, rpcname string,
51 | args interface{}, reply interface{}) bool {
52 | c, errx := rpc.Dial("unix", srv)
53 | if errx != nil {
54 | return false
55 | }
56 | defer c.Close()
57 |
58 | err := c.Call(rpcname, args, reply)
59 | if err == nil {
60 | return true
61 | }
62 |
63 | fmt.Println(err)
64 | return false
65 | }
66 |
67 | //
68 | // fetch a key's value from the current primary;
69 | // if they key has never been set, return "".
70 | // Get() must keep trying until it either the
71 | // primary replies with the value or the primary
72 | // says the key doesn't exist (has never been Put().
73 | //
74 | func (ck *Clerk) Get(key string) string {
75 |
76 | // Your code here.
77 |
78 | return "???"
79 | }
80 |
81 | //
82 | // send a Put or Append RPC
83 | //
84 | func (ck *Clerk) PutAppend(key string, value string, op string) {
85 |
86 | // Your code here.
87 | }
88 |
89 | //
90 | // tell the primary to update key's value.
91 | // must keep trying until it succeeds.
92 | //
93 | func (ck *Clerk) Put(key string, value string) {
94 | ck.PutAppend(key, value, "Put")
95 | }
96 |
97 | //
98 | // tell the primary to append to key's value.
99 | // must keep trying until it succeeds.
100 | //
101 | func (ck *Clerk) Append(key string, value string) {
102 | ck.PutAppend(key, value, "Append")
103 | }
104 |
--------------------------------------------------------------------------------
/hw4/src/pbservice/client.go:
--------------------------------------------------------------------------------
1 | package pbservice
2 |
3 | import "viewservice"
4 | import "net/rpc"
5 | import "fmt"
6 |
7 | import "crypto/rand"
8 | import "math/big"
9 |
10 |
11 | type Clerk struct {
12 | vs *viewservice.Clerk
13 | // Your declarations here
14 | }
15 |
16 | // this may come in handy.
17 | func nrand() int64 {
18 | max := big.NewInt(int64(1) << 62)
19 | bigx, _ := rand.Int(rand.Reader, max)
20 | x := bigx.Int64()
21 | return x
22 | }
23 |
24 | func MakeClerk(vshost string, me string) *Clerk {
25 | ck := new(Clerk)
26 | ck.vs = viewservice.MakeClerk(me, vshost)
27 | // Your ck.* initializations here
28 |
29 | return ck
30 | }
31 |
32 |
33 | //
34 | // call() sends an RPC to the rpcname handler on server srv
35 | // with arguments args, waits for the reply, and leaves the
36 | // reply in reply. the reply argument should be a pointer
37 | // to a reply structure.
38 | //
39 | // the return value is true if the server responded, and false
40 | // if call() was not able to contact the server. in particular,
41 | // the reply's contents are only valid if call() returned true.
42 | //
43 | // you should assume that call() will return an
44 | // error after a while if the server is dead.
45 | // don't provide your own time-out mechanism.
46 | //
47 | // please use call() to send all RPCs, in client.go and server.go.
48 | // please don't change this function.
49 | //
50 | func call(srv string, rpcname string,
51 | args interface{}, reply interface{}) bool {
52 | c, errx := rpc.Dial("unix", srv)
53 | if errx != nil {
54 | return false
55 | }
56 | defer c.Close()
57 |
58 | err := c.Call(rpcname, args, reply)
59 | if err == nil {
60 | return true
61 | }
62 |
63 | fmt.Println(err)
64 | return false
65 | }
66 |
67 | //
68 | // fetch a key's value from the current primary;
69 | // if they key has never been set, return "".
70 | // Get() must keep trying until it either the
71 | // primary replies with the value or the primary
72 | // says the key doesn't exist (has never been Put().
73 | //
74 | func (ck *Clerk) Get(key string) string {
75 |
76 | // Your code here.
77 |
78 | return "???"
79 | }
80 |
81 | //
82 | // send a Put or Append RPC
83 | //
84 | func (ck *Clerk) PutAppend(key string, value string, op string) {
85 |
86 | // Your code here.
87 | }
88 |
89 | //
90 | // tell the primary to update key's value.
91 | // must keep trying until it succeeds.
92 | //
93 | func (ck *Clerk) Put(key string, value string) {
94 | ck.PutAppend(key, value, "Put")
95 | }
96 |
97 | //
98 | // tell the primary to append to key's value.
99 | // must keep trying until it succeeds.
100 | //
101 | func (ck *Clerk) Append(key string, value string) {
102 | ck.PutAppend(key, value, "Append")
103 | }
104 |
--------------------------------------------------------------------------------
/hw3/src/viewservice/server.go:
--------------------------------------------------------------------------------
1 | package viewservice
2 |
3 | import "net"
4 | import "net/rpc"
5 | import "log"
6 | import "time"
7 | import "sync"
8 | import "fmt"
9 | import "os"
10 | import "sync/atomic"
11 |
12 | type ViewServer struct {
13 | mu sync.Mutex
14 | l net.Listener
15 | dead int32 // for testing
16 | rpccount int32 // for testing
17 | me string
18 |
19 |
20 | // Your declarations here.
21 | }
22 |
23 | //
24 | // server Ping RPC handler.
25 | //
26 | func (vs *ViewServer) Ping(args *PingArgs, reply *PingReply) error {
27 |
28 | // Your code here.
29 |
30 | return nil
31 | }
32 |
33 | //
34 | // server Get() RPC handler.
35 | //
36 | func (vs *ViewServer) Get(args *GetArgs, reply *GetReply) error {
37 |
38 | // Your code here.
39 |
40 | return nil
41 | }
42 |
43 |
44 | //
45 | // tick() is called once per PingInterval; it should notice
46 | // if servers have died or recovered, and change the view
47 | // accordingly.
48 | //
49 | func (vs *ViewServer) tick() {
50 |
51 | // Your code here.
52 | }
53 |
54 | //
55 | // tell the server to shut itself down.
56 | // for testing.
57 | // please don't change these two functions.
58 | //
59 | func (vs *ViewServer) Kill() {
60 | atomic.StoreInt32(&vs.dead, 1)
61 | vs.l.Close()
62 | }
63 |
64 | //
65 | // has this server been asked to shut down?
66 | //
67 | func (vs *ViewServer) isdead() bool {
68 | return atomic.LoadInt32(&vs.dead) != 0
69 | }
70 |
71 | // please don't change this function.
72 | func (vs *ViewServer) GetRPCCount() int32 {
73 | return atomic.LoadInt32(&vs.rpccount)
74 | }
75 |
76 | func StartServer(me string) *ViewServer {
77 | vs := new(ViewServer)
78 | vs.me = me
79 | // Your vs.* initializations here.
80 |
81 | // tell net/rpc about our RPC server and handlers.
82 | rpcs := rpc.NewServer()
83 | rpcs.Register(vs)
84 |
85 | // prepare to receive connections from clients.
86 | // change "unix" to "tcp" to use over a network.
87 | os.Remove(vs.me) // only needed for "unix"
88 | l, e := net.Listen("unix", vs.me)
89 | if e != nil {
90 | log.Fatal("listen error: ", e)
91 | }
92 | vs.l = l
93 |
94 | // please don't change any of the following code,
95 | // or do anything to subvert it.
96 |
97 | // create a thread to accept RPC connections from clients.
98 | go func() {
99 | for vs.isdead() == false {
100 | conn, err := vs.l.Accept()
101 | if err == nil && vs.isdead() == false {
102 | atomic.AddInt32(&vs.rpccount, 1)
103 | go rpcs.ServeConn(conn)
104 | } else if err == nil {
105 | conn.Close()
106 | }
107 | if err != nil && vs.isdead() == false {
108 | fmt.Printf("ViewServer(%v) accept: %v\n", me, err.Error())
109 | vs.Kill()
110 | }
111 | }
112 | }()
113 |
114 | // create a thread to call tick() periodically.
115 | go func() {
116 | for vs.isdead() == false {
117 | vs.tick()
118 | time.Sleep(PingInterval)
119 | }
120 | }()
121 |
122 | return vs
123 | }
124 |
--------------------------------------------------------------------------------
/hw4/src/viewservice/server.go:
--------------------------------------------------------------------------------
1 | package viewservice
2 |
3 | import "net"
4 | import "net/rpc"
5 | import "log"
6 | import "time"
7 | import "sync"
8 | import "fmt"
9 | import "os"
10 | import "sync/atomic"
11 |
12 | type ViewServer struct {
13 | mu sync.Mutex
14 | l net.Listener
15 | dead int32 // for testing
16 | rpccount int32 // for testing
17 | me string
18 |
19 |
20 | // Your declarations here.
21 | }
22 |
23 | //
24 | // server Ping RPC handler.
25 | //
26 | func (vs *ViewServer) Ping(args *PingArgs, reply *PingReply) error {
27 |
28 | // Your code here.
29 |
30 | return nil
31 | }
32 |
33 | //
34 | // server Get() RPC handler.
35 | //
36 | func (vs *ViewServer) Get(args *GetArgs, reply *GetReply) error {
37 |
38 | // Your code here.
39 |
40 | return nil
41 | }
42 |
43 |
44 | //
45 | // tick() is called once per PingInterval; it should notice
46 | // if servers have died or recovered, and change the view
47 | // accordingly.
48 | //
49 | func (vs *ViewServer) tick() {
50 |
51 | // Your code here.
52 | }
53 |
54 | //
55 | // tell the server to shut itself down.
56 | // for testing.
57 | // please don't change these two functions.
58 | //
59 | func (vs *ViewServer) Kill() {
60 | atomic.StoreInt32(&vs.dead, 1)
61 | vs.l.Close()
62 | }
63 |
64 | //
65 | // has this server been asked to shut down?
66 | //
67 | func (vs *ViewServer) isdead() bool {
68 | return atomic.LoadInt32(&vs.dead) != 0
69 | }
70 |
71 | // please don't change this function.
72 | func (vs *ViewServer) GetRPCCount() int32 {
73 | return atomic.LoadInt32(&vs.rpccount)
74 | }
75 |
76 | func StartServer(me string) *ViewServer {
77 | vs := new(ViewServer)
78 | vs.me = me
79 | // Your vs.* initializations here.
80 |
81 | // tell net/rpc about our RPC server and handlers.
82 | rpcs := rpc.NewServer()
83 | rpcs.Register(vs)
84 |
85 | // prepare to receive connections from clients.
86 | // change "unix" to "tcp" to use over a network.
87 | os.Remove(vs.me) // only needed for "unix"
88 | l, e := net.Listen("unix", vs.me)
89 | if e != nil {
90 | log.Fatal("listen error: ", e)
91 | }
92 | vs.l = l
93 |
94 | // please don't change any of the following code,
95 | // or do anything to subvert it.
96 |
97 | // create a thread to accept RPC connections from clients.
98 | go func() {
99 | for vs.isdead() == false {
100 | conn, err := vs.l.Accept()
101 | if err == nil && vs.isdead() == false {
102 | atomic.AddInt32(&vs.rpccount, 1)
103 | go rpcs.ServeConn(conn)
104 | } else if err == nil {
105 | conn.Close()
106 | }
107 | if err != nil && vs.isdead() == false {
108 | fmt.Printf("ViewServer(%v) accept: %v\n", me, err.Error())
109 | vs.Kill()
110 | }
111 | }
112 | }()
113 |
114 | // create a thread to call tick() periodically.
115 | go func() {
116 | for vs.isdead() == false {
117 | vs.tick()
118 | time.Sleep(PingInterval)
119 | }
120 | }()
121 |
122 | return vs
123 | }
124 |
--------------------------------------------------------------------------------
/hw4/src/shardmaster/client.go:
--------------------------------------------------------------------------------
1 | package shardmaster
2 |
3 | //
4 | // Shardmaster clerk.
5 | // Please don't change this file.
6 | //
7 |
8 | import "net/rpc"
9 | import "time"
10 | import "fmt"
11 |
12 | type Clerk struct {
13 | servers []string // shardmaster replicas
14 | }
15 |
16 | func MakeClerk(servers []string) *Clerk {
17 | ck := new(Clerk)
18 | ck.servers = servers
19 | return ck
20 | }
21 |
22 | //
23 | // call() sends an RPC to the rpcname handler on server srv
24 | // with arguments args, waits for the reply, and leaves the
25 | // reply in reply. the reply argument should be a pointer
26 | // to a reply structure.
27 | //
28 | // the return value is true if the server responded, and false
29 | // if call() was not able to contact the server. in particular,
30 | // the reply's contents are only valid if call() returned true.
31 | //
32 | // you should assume that call() will return an
33 | // error after a while if the server is dead.
34 | // don't provide your own time-out mechanism.
35 | //
36 | // please use call() to send all RPCs, in client.go and server.go.
37 | // please don't change this function.
38 | //
39 | func call(srv string, rpcname string,
40 | args interface{}, reply interface{}) bool {
41 | c, errx := rpc.Dial("unix", srv)
42 | if errx != nil {
43 | return false
44 | }
45 | defer c.Close()
46 |
47 | err := c.Call(rpcname, args, reply)
48 | if err == nil {
49 | return true
50 | }
51 |
52 | fmt.Println(err)
53 | return false
54 | }
55 |
56 | func (ck *Clerk) Query(num int) Config {
57 | for {
58 | // try each known server.
59 | for _, srv := range ck.servers {
60 | args := &QueryArgs{}
61 | args.Num = num // num???
62 | var reply QueryReply
63 | ok := call(srv, "ShardMaster.Query", args, &reply)
64 | if ok {
65 | return reply.Config
66 | }
67 | }
68 | time.Sleep(100 * time.Millisecond)
69 | }
70 | }
71 |
72 | func (ck *Clerk) Join(gid int64, servers []string) {
73 | for {
74 | // try each known server.
75 | for _, srv := range ck.servers {
76 | args := &JoinArgs{}
77 | args.GID = gid
78 | args.Servers = servers
79 | var reply JoinReply
80 | ok := call(srv, "ShardMaster.Join", args, &reply)
81 | if ok {
82 | return
83 | }
84 | }
85 | time.Sleep(100 * time.Millisecond)
86 | }
87 | }
88 |
89 | func (ck *Clerk) Leave(gid int64) {
90 | for {
91 | // try each known server.
92 | for _, srv := range ck.servers {
93 | args := &LeaveArgs{}
94 | args.GID = gid
95 | var reply LeaveReply
96 | ok := call(srv, "ShardMaster.Leave", args, &reply)
97 | if ok {
98 | return
99 | }
100 | }
101 | time.Sleep(100 * time.Millisecond)
102 | }
103 | }
104 |
105 | func (ck *Clerk) Move(shard int, gid int64) {
106 | for {
107 | // try each known server.
108 | for _, srv := range ck.servers {
109 | args := &MoveArgs{}
110 | args.Shard = shard
111 | args.GID = gid
112 | var reply MoveReply
113 | ok := call(srv, "ShardMaster.Move", args, &reply)
114 | if ok {
115 | return
116 | }
117 | }
118 | time.Sleep(100 * time.Millisecond)
119 | }
120 | }
121 |
--------------------------------------------------------------------------------
/hw3/src/kvpaxos/client.go:
--------------------------------------------------------------------------------
1 | package kvpaxos
2 |
3 | import (
4 | "net/rpc"
5 | "time"
6 | )
7 | import "crypto/rand"
8 | import "math/big"
9 |
10 | import "fmt"
11 |
12 | type Clerk struct {
13 | servers []string
14 | // You will have to modify this struct.
15 | }
16 |
17 | func nrand() int64 {
18 | max := big.NewInt(int64(1) << 62)
19 | bigx, _ := rand.Int(rand.Reader, max)
20 | x := bigx.Int64()
21 | return x
22 | }
23 |
24 | func MakeClerk(servers []string) *Clerk {
25 | ck := new(Clerk)
26 | ck.servers = servers
27 | // You'll have to add code here.
28 | return ck
29 | }
30 |
31 | //
32 | // call() sends an RPC to the rpcname handler on server srv
33 | // with arguments args, waits for the reply, and leaves the
34 | // reply in reply. the reply argument should be a pointer
35 | // to a reply structure.
36 | //
37 | // the return value is true if the server responded, and false
38 | // if call() was not able to contact the server. in particular,
39 | // the reply's contents are only valid if call() returned true.
40 | //
41 | // you should assume that call() will return an
42 | // error after a while if the server is dead.
43 | // don't provide your own time-out mechanism.
44 | //
45 | // please use call() to send all RPCs, in client.go and server.go.
46 | // please don't change this function.
47 | //
48 | func call(srv string, rpcname string,
49 | args interface{}, reply interface{}) bool {
50 | c, errx := rpc.Dial("unix", srv)
51 | if errx != nil {
52 | return false
53 | }
54 | defer c.Close()
55 |
56 | err := c.Call(rpcname, args, reply)
57 | if err == nil {
58 | return true
59 | }
60 |
61 | fmt.Println(err)
62 | return false
63 | }
64 |
65 | //
66 | // fetch the current value for a key.
67 | // returns "" if the key does not exist.
68 | // keeps trying forever in the face of all other errors.
69 | //
70 | func (ck *Clerk) Get(key string) string {
71 | // You will have to modify this function.
72 | args := &GetArgs{key, nrand()}
73 | reply := &GetReply{}
74 |
75 | var ok = false
76 | var i = 0
77 | for !ok {
78 | ok = call(ck.servers[i], "KVPaxos.Get", args, reply)
79 | if ok {
80 | break
81 | } else {
82 | //log.Printf("Get on server %v fails. change another one", reply.FailSrv)
83 | time.Sleep(100 * time.Millisecond)
84 | i += 1
85 | i %= len(ck.servers)
86 | }
87 | }
88 | return reply.Value
89 | }
90 |
91 | //
92 | // shared by Put and Append.
93 | //
94 | func (ck *Clerk) PutAppend(key string, value string, op string) {
95 | // You will have to modify this function.
96 | args := &PutAppendArgs{key, value, op, nrand()}
97 | reply := &PutAppendReply{}
98 | var ok = false
99 | var i = 0
100 | for !ok {
101 | ok = call(ck.servers[i], "KVPaxos.PutAppend", args, reply)
102 | if ok {
103 | break
104 | } else {
105 | //log.Printf("PutAppend on server %v fails. change another one", reply.FailSrv)
106 | time.Sleep(100 * time.Millisecond)
107 | i += 1
108 | i %= len(ck.servers)
109 | }
110 | }
111 | }
112 |
113 | func (ck *Clerk) Put(key string, value string) {
114 | ck.PutAppend(key, value, "Put")
115 | }
116 | func (ck *Clerk) Append(key string, value string) {
117 | ck.PutAppend(key, value, "Append")
118 | }
119 |
--------------------------------------------------------------------------------
/hw1/src/mapreduce/master.go:
--------------------------------------------------------------------------------
1 | package mapreduce
2 |
3 | import (
4 | "container/list"
5 | )
6 | import "fmt"
7 |
8 |
9 | type WorkerInfo struct {
10 | address string
11 | // You can add definitions here.
12 |
13 | }
14 |
15 |
16 | // Clean up all workers by sending a Shutdown RPC to each one of them Collect
17 | // the number of jobs each work has performed.
18 | func (mr *MapReduce) KillWorkers() *list.List {
19 | l := list.New()
20 | for _, w := range mr.Workers {
21 | DPrintf("DoWork: shutdown %s\n", w.address)
22 | args := &ShutdownArgs{}
23 | var reply ShutdownReply
24 | ok := call(w.address, "Worker.Shutdown", args, &reply)
25 | if ok == false {
26 | fmt.Printf("DoWork: RPC %s shutdown error\n", w.address)
27 | } else {
28 | l.PushBack(reply.Njobs)
29 | }
30 | }
31 | return l
32 | }
33 |
34 | func setupWorkers(mr *MapReduce) {
35 | for wkr := range mr.registerChannel {
36 | mr.Workers[wkr] = &WorkerInfo{address: wkr}
37 | mr.availableWorkers <- wkr
38 | }
39 | }
40 |
41 | func allocate(mr *MapReduce) {
42 | for i := 0; i < mr.nMap; i++ {
43 | mr.remainMapJobs <- i
44 | }
45 | for i := 0; i < mr.nReduce; i++ {
46 | mr.remainReduceJobs <- i
47 | }
48 | }
49 |
50 | func doMap(mr *MapReduce) {
51 | for job := range mr.remainMapJobs { // keep listening
52 | wkr := <-mr.availableWorkers
53 | go func(job int, wkr string) {
54 |
55 | args := &DoJobArgs{File: mr.file, Operation: Map,
56 | JobNumber: job, NumOtherPhase: mr.nReduce}
57 |
58 | var reply DoJobReply
59 | ok := call(wkr, "Worker.DoJob", args, &reply)
60 | if ok == false {
61 | fmt.Printf("DoMap: RPC %s do job failure! reassign the job...\n", wkr)
62 | mr.remainMapJobs <- job
63 | } else {
64 | mr.availableWorkers <- wkr
65 | mr.nCount <- true
66 | }
67 | }(job, wkr)
68 | }
69 | }
70 |
71 |
72 | func doReduce(mr *MapReduce) {
73 | for job := range mr.remainReduceJobs { // keep listening
74 | wkr := <-mr.availableWorkers
75 | go func(job int, wkr string) {
76 |
77 | args := &DoJobArgs{File: mr.file, Operation: Reduce,
78 | JobNumber: job, NumOtherPhase: mr.nMap}
79 |
80 | var reply DoJobReply
81 | ok := call(wkr, "Worker.DoJob", args, &reply)
82 | if ok == false {
83 | fmt.Printf("DoReduce: RPC %s do job failure! reassign the job...\n", wkr)
84 | mr.remainReduceJobs <- job
85 | } else {
86 | mr.availableWorkers <- wkr
87 | mr.nCount <- true
88 | }
89 | }(job, wkr)
90 | }
91 | }
92 |
93 | func mapCountFinishJobs(mr *MapReduce) {
94 | cnt := 0
95 | for range mr.nCount {
96 | cnt += 1
97 | if cnt == mr.nMap {
98 | break
99 | }
100 | }
101 | close(mr.remainMapJobs)
102 | mr.donePhase <- true
103 | }
104 |
105 | func reduceCountFinishJobs(mr *MapReduce) {
106 | cnt := 0
107 | for range mr.nCount {
108 | cnt += 1
109 | if cnt == mr.nReduce {
110 | break
111 | }
112 | }
113 | close(mr.remainReduceJobs)
114 | mr.donePhase <- true
115 | }
116 |
117 |
118 | func (mr *MapReduce) RunMaster() *list.List {
119 | // Your code here
120 |
121 |
122 | go setupWorkers(mr)
123 | go allocate(mr)
124 |
125 | // Map Phase
126 | go mapCountFinishJobs(mr)
127 | doMap(mr)
128 | <-mr.donePhase
129 | // Reduce Phase
130 | go reduceCountFinishJobs(mr)
131 | doReduce(mr)
132 | <-mr.donePhase
133 | return mr.KillWorkers()
134 | }
135 |
--------------------------------------------------------------------------------
/hw3/src/pbservice/server.go:
--------------------------------------------------------------------------------
1 | package pbservice
2 |
3 | import "net"
4 | import "fmt"
5 | import "net/rpc"
6 | import "log"
7 | import "time"
8 | import "viewservice"
9 | import "sync"
10 | import "sync/atomic"
11 | import "os"
12 | import "syscall"
13 | import "math/rand"
14 |
15 |
16 |
17 | type PBServer struct {
18 | mu sync.Mutex
19 | l net.Listener
20 | dead int32 // for testing
21 | unreliable int32 // for testing
22 | me string
23 | vs *viewservice.Clerk
24 | // Your declarations here.
25 | }
26 |
27 |
28 | func (pb *PBServer) Get(args *GetArgs, reply *GetReply) error {
29 |
30 | // Your code here.
31 |
32 | return nil
33 | }
34 |
35 |
36 | func (pb *PBServer) PutAppend(args *PutAppendArgs, reply *PutAppendReply) error {
37 |
38 | // Your code here.
39 |
40 |
41 | return nil
42 | }
43 |
44 |
45 | //
46 | // ping the viewserver periodically.
47 | // if view changed:
48 | // transition to new view.
49 | // manage transfer of state from primary to new backup.
50 | //
51 | func (pb *PBServer) tick() {
52 |
53 | // Your code here.
54 | }
55 |
56 | // tell the server to shut itself down.
57 | // please do not change these two functions.
58 | func (pb *PBServer) kill() {
59 | atomic.StoreInt32(&pb.dead, 1)
60 | pb.l.Close()
61 | }
62 |
63 | // call this to find out if the server is dead.
64 | func (pb *PBServer) isdead() bool {
65 | return atomic.LoadInt32(&pb.dead) != 0
66 | }
67 |
68 | // please do not change these two functions.
69 | func (pb *PBServer) setunreliable(what bool) {
70 | if what {
71 | atomic.StoreInt32(&pb.unreliable, 1)
72 | } else {
73 | atomic.StoreInt32(&pb.unreliable, 0)
74 | }
75 | }
76 |
77 | func (pb *PBServer) isunreliable() bool {
78 | return atomic.LoadInt32(&pb.unreliable) != 0
79 | }
80 |
81 |
82 | func StartServer(vshost string, me string) *PBServer {
83 | pb := new(PBServer)
84 | pb.me = me
85 | pb.vs = viewservice.MakeClerk(me, vshost)
86 | // Your pb.* initializations here.
87 |
88 | rpcs := rpc.NewServer()
89 | rpcs.Register(pb)
90 |
91 | os.Remove(pb.me)
92 | l, e := net.Listen("unix", pb.me)
93 | if e != nil {
94 | log.Fatal("listen error: ", e)
95 | }
96 | pb.l = l
97 |
98 | // please do not change any of the following code,
99 | // or do anything to subvert it.
100 |
101 | go func() {
102 | for pb.isdead() == false {
103 | conn, err := pb.l.Accept()
104 | if err == nil && pb.isdead() == false {
105 | if pb.isunreliable() && (rand.Int63()%1000) < 100 {
106 | // discard the request.
107 | conn.Close()
108 | } else if pb.isunreliable() && (rand.Int63()%1000) < 200 {
109 | // process the request but force discard of reply.
110 | c1 := conn.(*net.UnixConn)
111 | f, _ := c1.File()
112 | err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR)
113 | if err != nil {
114 | fmt.Printf("shutdown: %v\n", err)
115 | }
116 | go rpcs.ServeConn(conn)
117 | } else {
118 | go rpcs.ServeConn(conn)
119 | }
120 | } else if err == nil {
121 | conn.Close()
122 | }
123 | if err != nil && pb.isdead() == false {
124 | fmt.Printf("PBServer(%v) accept: %v\n", me, err.Error())
125 | pb.kill()
126 | }
127 | }
128 | }()
129 |
130 | go func() {
131 | for pb.isdead() == false {
132 | pb.tick()
133 | time.Sleep(viewservice.PingInterval)
134 | }
135 | }()
136 |
137 | return pb
138 | }
139 |
--------------------------------------------------------------------------------
/hw4/src/pbservice/server.go:
--------------------------------------------------------------------------------
1 | package pbservice
2 |
3 | import "net"
4 | import "fmt"
5 | import "net/rpc"
6 | import "log"
7 | import "time"
8 | import "viewservice"
9 | import "sync"
10 | import "sync/atomic"
11 | import "os"
12 | import "syscall"
13 | import "math/rand"
14 |
15 |
16 |
17 | type PBServer struct {
18 | mu sync.Mutex
19 | l net.Listener
20 | dead int32 // for testing
21 | unreliable int32 // for testing
22 | me string
23 | vs *viewservice.Clerk
24 | // Your declarations here.
25 | }
26 |
27 |
28 | func (pb *PBServer) Get(args *GetArgs, reply *GetReply) error {
29 |
30 | // Your code here.
31 |
32 | return nil
33 | }
34 |
35 |
36 | func (pb *PBServer) PutAppend(args *PutAppendArgs, reply *PutAppendReply) error {
37 |
38 | // Your code here.
39 |
40 |
41 | return nil
42 | }
43 |
44 |
45 | //
46 | // ping the viewserver periodically.
47 | // if view changed:
48 | // transition to new view.
49 | // manage transfer of state from primary to new backup.
50 | //
51 | func (pb *PBServer) tick() {
52 |
53 | // Your code here.
54 | }
55 |
56 | // tell the server to shut itself down.
57 | // please do not change these two functions.
58 | func (pb *PBServer) kill() {
59 | atomic.StoreInt32(&pb.dead, 1)
60 | pb.l.Close()
61 | }
62 |
63 | // call this to find out if the server is dead.
64 | func (pb *PBServer) isdead() bool {
65 | return atomic.LoadInt32(&pb.dead) != 0
66 | }
67 |
68 | // please do not change these two functions.
69 | func (pb *PBServer) setunreliable(what bool) {
70 | if what {
71 | atomic.StoreInt32(&pb.unreliable, 1)
72 | } else {
73 | atomic.StoreInt32(&pb.unreliable, 0)
74 | }
75 | }
76 |
77 | func (pb *PBServer) isunreliable() bool {
78 | return atomic.LoadInt32(&pb.unreliable) != 0
79 | }
80 |
81 |
82 | func StartServer(vshost string, me string) *PBServer {
83 | pb := new(PBServer)
84 | pb.me = me
85 | pb.vs = viewservice.MakeClerk(me, vshost)
86 | // Your pb.* initializations here.
87 |
88 | rpcs := rpc.NewServer()
89 | rpcs.Register(pb)
90 |
91 | os.Remove(pb.me)
92 | l, e := net.Listen("unix", pb.me)
93 | if e != nil {
94 | log.Fatal("listen error: ", e)
95 | }
96 | pb.l = l
97 |
98 | // please do not change any of the following code,
99 | // or do anything to subvert it.
100 |
101 | go func() {
102 | for pb.isdead() == false {
103 | conn, err := pb.l.Accept()
104 | if err == nil && pb.isdead() == false {
105 | if pb.isunreliable() && (rand.Int63()%1000) < 100 {
106 | // discard the request.
107 | conn.Close()
108 | } else if pb.isunreliable() && (rand.Int63()%1000) < 200 {
109 | // process the request but force discard of reply.
110 | c1 := conn.(*net.UnixConn)
111 | f, _ := c1.File()
112 | err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR)
113 | if err != nil {
114 | fmt.Printf("shutdown: %v\n", err)
115 | }
116 | go rpcs.ServeConn(conn)
117 | } else {
118 | go rpcs.ServeConn(conn)
119 | }
120 | } else if err == nil {
121 | conn.Close()
122 | }
123 | if err != nil && pb.isdead() == false {
124 | fmt.Printf("PBServer(%v) accept: %v\n", me, err.Error())
125 | pb.kill()
126 | }
127 | }
128 | }()
129 |
130 | go func() {
131 | for pb.isdead() == false {
132 | pb.tick()
133 | time.Sleep(viewservice.PingInterval)
134 | }
135 | }()
136 |
137 | return pb
138 | }
139 |
--------------------------------------------------------------------------------
/hw4/src/kvpaxos/client.go:
--------------------------------------------------------------------------------
1 | package kvpaxos
2 |
3 | import (
4 | "net/rpc"
5 | "time"
6 | )
7 | import "crypto/rand"
8 | import "math/big"
9 |
10 | import "fmt"
11 |
12 | type Clerk struct {
13 | servers []string
14 | // You will have to modify this struct.
15 | ClientID int64
16 | Seq int
17 | }
18 |
19 | func nrand() int64 {
20 | max := big.NewInt(int64(1) << 62)
21 | bigx, _ := rand.Int(rand.Reader, max)
22 | x := bigx.Int64()
23 | return x
24 | }
25 |
26 | func MakeClerk(servers []string) *Clerk {
27 | ck := new(Clerk)
28 | ck.servers = servers
29 | // You'll have to add code here.
30 | ck.ClientID = nrand()
31 | ck.Seq = 0
32 | return ck
33 | }
34 |
35 | //
36 | // call() sends an RPC to the rpcname handler on server srv
37 | // with arguments args, waits for the reply, and leaves the
38 | // reply in reply. the reply argument should be a pointer
39 | // to a reply structure.
40 | //
41 | // the return value is true if the server responded, and false
42 | // if call() was not able to contact the server. in particular,
43 | // the reply's contents are only valid if call() returned true.
44 | //
45 | // you should assume that call() will return an
46 | // error after a while if the server is dead.
47 | // don't provide your own time-out mechanism.
48 | //
49 | // please use call() to send all RPCs, in client.go and server.go.
50 | // please don't change this function.
51 | //
52 | func call(srv string, rpcname string,
53 | args interface{}, reply interface{}) bool {
54 | c, errx := rpc.Dial("unix", srv)
55 | if errx != nil {
56 | return false
57 | }
58 | defer c.Close()
59 |
60 | err := c.Call(rpcname, args, reply)
61 | if err == nil {
62 | return true
63 | }
64 |
65 | fmt.Println(err)
66 | return false
67 | }
68 |
69 | //
70 | // fetch the current value for a key.
71 | // returns "" if the key does not exist.
72 | // keeps trying forever in the face of all other errors.
73 | //
74 | func (ck *Clerk) Get(key string) string {
75 | // You will have to modify this function.
76 | ck.Seq += 1
77 | args := &GetArgs{Key: key, Op: "Get", Seq: ck.Seq, ClientID: ck.ClientID}
78 | reply := &GetReply{}
79 |
80 | var ok = false
81 | var i = 0
82 | for !ok {
83 | ok = call(ck.servers[i], "KVPaxos.Get", args, reply)
84 | if ok && reply.Err == OK {
85 | break
86 | } else {
87 | //log.Printf("Get on server %v fails. change another one", reply.FailSrv)
88 | time.Sleep(100 * time.Millisecond)
89 | i += 1
90 | i %= len(ck.servers)
91 | }
92 | }
93 | return reply.Value
94 | }
95 |
96 | //
97 | // shared by Put and Append.
98 | //
99 | func (ck *Clerk) PutAppend(key string, value string, op string) {
100 | // You will have to modify this function.
101 | ck.Seq += 1
102 | args := &PutAppendArgs{Key: key, Value: value, Op: op, Seq: ck.Seq, ClientID: ck.ClientID}
103 | reply := &PutAppendReply{}
104 | var ok = false
105 | var i = 0
106 | for !ok {
107 | ok = call(ck.servers[i], "KVPaxos.PutAppend", args, reply)
108 | if ok && reply.Err == OK {
109 | break
110 | } else {
111 | //log.Printf("PutAppend on server %v fails. change another one", reply.FailSrv)
112 | time.Sleep(100 * time.Millisecond)
113 | i += 1
114 | i %= len(ck.servers)
115 | }
116 | }
117 | }
118 |
119 | func (ck *Clerk) Put(key string, value string) {
120 | ck.PutAppend(key, value, "Put")
121 | }
122 | func (ck *Clerk) Append(key string, value string) {
123 | ck.PutAppend(key, value, "Append")
124 | }
125 |
--------------------------------------------------------------------------------
/hw2/src/pbservice/client.go:
--------------------------------------------------------------------------------
1 | package pbservice
2 |
3 | import (
4 | "time"
5 | "viewservice"
6 | )
7 | import "net/rpc"
8 | import "fmt"
9 |
10 | import "crypto/rand"
11 | import "math/big"
12 |
13 |
14 | type Clerk struct {
15 | vs *viewservice.Clerk
16 | // Your declarations here
17 | currPrimary string
18 | }
19 |
20 | // this may come in handy.
21 | func nrand() int64 {
22 | max := big.NewInt(int64(1) << 62)
23 | bigx, _ := rand.Int(rand.Reader, max)
24 | x := bigx.Int64()
25 | return x
26 | }
27 |
28 | func MakeClerk(vshost string, me string) *Clerk {
29 | ck := new(Clerk)
30 | ck.vs = viewservice.MakeClerk(me, vshost)
31 | // Your ck.* initializations here
32 | ck.currPrimary = "" // initially, the current primary stored in cache is none
33 |
34 | return ck
35 | }
36 |
37 |
38 | //
39 | // call() sends an RPC to the rpcname handler on server srv
40 | // with arguments args, waits for the reply, and leaves the
41 | // reply in reply. the reply argument should be a pointer
42 | // to a reply structure.
43 | //
44 | // the return value is true if the server responded, and false
45 | // if call() was not able to contact the server. in particular,
46 | // the reply's contents are only valid if call() returned true.
47 | //
48 | // you should assume that call() will return an
49 | // error after a while if the server is dead.
50 | // don't provide your own time-out mechanism.
51 | //
52 | // please use call() to send all RPCs, in client.go and server.go.
53 | // please don't change this function.
54 | //
55 | func call(srv string, rpcname string,
56 | args interface{}, reply interface{}) bool {
57 | c, errx := rpc.Dial("unix", srv)
58 | if errx != nil {
59 | return false
60 | }
61 | defer c.Close()
62 |
63 | err := c.Call(rpcname, args, reply)
64 | if err == nil {
65 | return true
66 | }
67 |
68 | fmt.Println(err)
69 | return false
70 | }
71 |
72 | //
73 | // fetch a key's value from the current primary;
74 | // if they key has never been set, return "".
75 | // Get() must keep trying until it either the
76 | // primary replies with the value or the primary
77 | // says the key doesn't exist (has never been Put().
78 | //
79 | func (ck *Clerk) Get(key string) string {
80 |
81 | // Your code here.
82 | // if the current view has no primary, keep asking the view service until a primary showed up
83 | for ck.currPrimary == "" {
84 | view, _ := ck.vs.Get()
85 | ck.currPrimary = view.Primary
86 | }
87 | // might need to set up lock for client. but as it passed all testcases so I ignored that
88 |
89 | args := &GetArgs{key}
90 | var reply GetReply
91 |
92 | ok := false
93 | // clients keep re-trying until they get an answer.
94 | for ok == false {
95 | //log.Printf("%v start %v", ck.currPrimary, args.Key)
96 | ok := call(ck.currPrimary, "PBServer.Get", args, &reply)
97 | //log.Printf("%v end %v", ck.currPrimary, args.Key)
98 | if ok {
99 | // everything works fine
100 | break
101 | } else {
102 | // case 1. if the current primary is dead
103 | // case 2. the network is unavailable temporarily
104 | // case 3. if the asked primary doesn't think itself as the primary
105 | // (ps. case 3 will show an error: NOT THE PRIMARY YET)
106 |
107 | // do the update view manually
108 | time.Sleep(viewservice.PingInterval)
109 | view, _ := ck.vs.Get()
110 | ck.currPrimary = view.Primary
111 | }
112 | }
113 | return reply.Value
114 | }
115 |
116 | //
117 | // send a Put or Append RPC
118 | //
119 | func (ck *Clerk) PutAppend(key string, value string, op string) {
120 |
121 | // Your code here.
122 | for ck.currPrimary == "" {
123 | view, _ := ck.vs.Get()
124 | ck.currPrimary = view.Primary
125 | }
126 |
127 | // nrand(): make the k/v service can detect duplicates.
128 | args := &PutAppendArgs{key, value, op, nrand()}
129 | var reply PutAppendReply
130 |
131 | ok := false
132 | for ok == false {
133 | //log.Printf("%v", ck.currPrimary)
134 | ok := call(ck.currPrimary, "PBServer.PutAppend", args, &reply)
135 | //log.Printf("%v, %v", ok, ck.currPrimary)
136 | if ok {
137 | // everything works fine
138 | break
139 | } else {
140 | time.Sleep(viewservice.PingInterval)
141 | view, _ := ck.vs.Get()
142 | ck.currPrimary = view.Primary
143 | }
144 | }
145 | }
146 |
147 | //
148 | // tell the primary to update key's value.
149 | // must keep trying until it succeeds.
150 | //
151 | func (ck *Clerk) Put(key string, value string) {
152 | ck.PutAppend(key, value, "Put")
153 | }
154 |
155 | //
156 | // tell the primary to append to key's value.
157 | // must keep trying until it succeeds.
158 | //
159 | func (ck *Clerk) Append(key string, value string) {
160 | ck.PutAppend(key, value, "Append")
161 | }
162 |
--------------------------------------------------------------------------------
/hw4/src/shardkv/client.go:
--------------------------------------------------------------------------------
1 | package shardkv
2 |
3 | import (
4 | "crypto/rand"
5 | "fmt"
6 | "math/big"
7 | "net/rpc"
8 | "shardmaster"
9 | "sync"
10 | "time"
11 | )
12 |
13 | type Clerk struct {
14 | mu sync.Mutex // one RPC at a time
15 | sm *shardmaster.Clerk
16 | config shardmaster.Config
17 | clientID int64
18 | seq int
19 | }
20 |
21 | func nrand() int64 {
22 | max := big.NewInt(int64(1) << 62)
23 | bigx, _ := rand.Int(rand.Reader, max)
24 | x := bigx.Int64()
25 | return x
26 | }
27 |
28 | func MakeClerk(shardmasters []string) *Clerk {
29 | ck := new(Clerk)
30 | ck.sm = shardmaster.MakeClerk(shardmasters)
31 | ck.config = ck.sm.Query(-1)
32 | ck.clientID = nrand()
33 | ck.seq = 0
34 | return ck
35 | }
36 |
37 | //
38 | // call() sends an RPC to the rpcname handler on server srv
39 | // with arguments args, waits for the reply, and leaves the
40 | // reply in reply. the reply argument should be a pointer
41 | // to a reply structure.
42 | //
43 | // the return value is true if the server responded, and false
44 | // if call() was not able to contact the server. in particular,
45 | // the reply's contents are only valid if call() returned true.
46 | //
47 | // you should assume that call() will return an
48 | // error after a while if the server is dead.
49 | // don't provide your own time-out mechanism.
50 | //
51 | // please use call() to send all RPCs, in client.go and server.go.
52 | // please don't change this function.
53 | //
54 | func call(srv string, rpcname string,
55 | args interface{}, reply interface{}) bool {
56 | c, errx := rpc.Dial("unix", srv)
57 | if errx != nil {
58 | return false
59 | }
60 | defer c.Close()
61 |
62 | err := c.Call(rpcname, args, reply)
63 | if err == nil {
64 | return true
65 | }
66 |
67 | fmt.Println(err)
68 | return false
69 | }
70 |
71 | //
72 | // which shard is a key in?
73 | // please use this function,
74 | // and please do not change it.
75 | //
76 | func key2shard(key string) int {
77 | shard := 0
78 | if len(key) > 0 {
79 | shard = int(key[0])
80 | }
81 | shard %= shardmaster.NShards
82 | return shard
83 | }
84 |
85 | //
86 | // fetch the current value for a key.
87 | // returns "" if the key does not exist.
88 | // keeps trying forever in the face of all other errors.
89 | //
90 | func (ck *Clerk) Get(key string) string {
91 | ck.mu.Lock()
92 | defer ck.mu.Unlock()
93 |
94 | ck.seq++
95 | for {
96 | shard := key2shard(key)
97 |
98 | gid := ck.config.Shards[shard]
99 | servers, ok := ck.config.Groups[gid]
100 |
101 | if ok {
102 | // try each server in the shard's replication group.
103 | for _, srv := range servers {
104 | args := &GetArgs{}
105 | args.Key = key
106 | args.ID = ck.clientID
107 | args.Seq = ck.seq
108 | args.ConfigNum = ck.config.Num
109 | args.Shard = shard
110 | var reply GetReply
111 | ok := call(srv, "ShardKV.Get", args, &reply)
112 | if ok && (reply.Err == OK || reply.Err == ErrNoKey) {
113 | return reply.Value
114 | }
115 | if ok && (reply.Err == ErrWrongGroup) {
116 | continue
117 | }
118 | }
119 | }
120 |
121 | time.Sleep(100 * time.Millisecond)
122 |
123 | // ask master for a new configuration.
124 | ck.config = ck.sm.Query(-1)
125 | }
126 | }
127 |
128 | // send a Put or Append request.
129 | func (ck *Clerk) PutAppend(key string, value string, op string) {
130 | ck.mu.Lock()
131 | defer ck.mu.Unlock()
132 |
133 | ck.seq++
134 | for {
135 | shard := key2shard(key)
136 |
137 | gid := ck.config.Shards[shard]
138 |
139 | servers, ok := ck.config.Groups[gid]
140 |
141 | if ok {
142 | // try each server in the shard's replication group.
143 | for _, srv := range servers {
144 | args := &PutAppendArgs{}
145 | args.Key = key
146 | args.Value = value
147 | args.ID = ck.clientID
148 | // ck.seq is for dealing with duplicate client RPCs
149 | args.Seq = ck.seq
150 | args.Op = op
151 | args.Shard = shard
152 | args.ConfigNum = ck.config.Num
153 | var reply PutAppendReply
154 | ok := call(srv, "ShardKV.PutAppend", args, &reply)
155 | if ok && reply.Err == OK {
156 | return
157 | }
158 | // When the client received ErrWrongGroup -> we don't change ck.seq
159 | // instead, we just re-Query and change a group and request again
160 | if ok && (reply.Err == ErrWrongGroup) {
161 | continue
162 | }
163 | }
164 | }
165 | //It re-tries if the replica group says it is not responsible for the key;
166 | //in that case, the client code asks the shard master for the latest configuration and tries again.
167 | time.Sleep(100 * time.Millisecond)
168 |
169 | // ask master for a new configuration.
170 | ck.config = ck.sm.Query(-1)
171 | }
172 | }
173 |
174 | func (ck *Clerk) Put(key string, value string) {
175 | ck.PutAppend(key, value, "Put")
176 | }
177 | func (ck *Clerk) Append(key string, value string) {
178 | ck.PutAppend(key, value, "Append")
179 | }
--------------------------------------------------------------------------------
/hw1/src/mapreduce/test_test.go:
--------------------------------------------------------------------------------
1 | package mapreduce
2 |
3 | import "testing"
4 | import "fmt"
5 | import "time"
6 | import "container/list"
7 | import "strings"
8 | import "os"
9 | import "bufio"
10 | import "log"
11 | import "sort"
12 | import "strconv"
13 |
14 | const (
15 | nNumber = 100000
16 | nMap = 100
17 | nReduce = 50
18 | )
19 |
20 | // Create input file with N numbers
21 | // Check if we have N numbers in output file
22 |
23 | // Split in words
24 | func MapFunc(value string) *list.List {
25 | DPrintf("Map %v\n", value)
26 | res := list.New()
27 | words := strings.Fields(value)
28 | for _, w := range words {
29 | kv := KeyValue{w, ""}
30 | res.PushBack(kv)
31 | }
32 | return res
33 | }
34 |
35 | // Just return key
36 | func ReduceFunc(key string, values *list.List) string {
37 | for e := values.Front(); e != nil; e = e.Next() {
38 | DPrintf("Reduce %s %v\n", key, e.Value)
39 | }
40 | return ""
41 | }
42 |
43 | // Checks input file against output file: each input number should show up
44 | // in the output file in string sorted order
45 | func check(t *testing.T, file string) {
46 | input, err := os.Open(file)
47 | if err != nil {
48 | log.Fatal("check: ", err)
49 | }
50 | defer input.Close()
51 | output, err := os.Open("mrtmp." + file)
52 | if err != nil {
53 | log.Fatal("check: ", err)
54 | }
55 | defer output.Close()
56 |
57 | var lines []string
58 | inputScanner := bufio.NewScanner(input)
59 | for inputScanner.Scan() {
60 | lines = append(lines, inputScanner.Text())
61 | }
62 |
63 | sort.Strings(lines)
64 |
65 | outputScanner := bufio.NewScanner(output)
66 | i := 0
67 | for outputScanner.Scan() {
68 | var v1 int
69 | var v2 int
70 | text := outputScanner.Text()
71 | n, err := fmt.Sscanf(lines[i], "%d", &v1)
72 | if n == 1 && err == nil {
73 | n, err = fmt.Sscanf(text, "%d", &v2)
74 | }
75 | if err != nil || v1 != v2 {
76 | t.Fatalf("line %d: %d != %d err %v\n", i, v1, v2, err)
77 | }
78 | i += 1
79 | }
80 | if i != nNumber {
81 | t.Fatalf("Expected %d lines in output\n", nNumber)
82 | }
83 | }
84 |
85 | // Workers report back how many RPCs they have processed in the Shutdown reply.
86 | // Check that they processed at least 1 RPC.
87 | func checkWorker(t *testing.T, l *list.List) {
88 | for e := l.Front(); e != nil; e = e.Next() {
89 | if e.Value == 0 {
90 | t.Fatalf("Some worker didn't do any work\n")
91 | }
92 | }
93 | }
94 |
95 | // Make input file
96 | func makeInput() string {
97 | name := "824-mrinput.txt"
98 | file, err := os.Create(name)
99 | if err != nil {
100 | log.Fatal("mkInput: ", err)
101 | }
102 | w := bufio.NewWriter(file)
103 | for i := 0; i < nNumber; i++ {
104 | fmt.Fprintf(w, "%d\n", i)
105 | }
106 | w.Flush()
107 | file.Close()
108 | return name
109 | }
110 |
111 | // Cook up a unique-ish UNIX-domain socket name
112 | // in /var/tmp. can't use current directory since
113 | // AFS doesn't support UNIX-domain sockets.
114 | func port(suffix string) string {
115 | s := "/var/tmp/824-"
116 | s += strconv.Itoa(os.Getuid()) + "/"
117 | os.Mkdir(s, 0777)
118 | s += "mr"
119 | s += strconv.Itoa(os.Getpid()) + "-"
120 | s += suffix
121 | return s
122 | }
123 |
124 | func setup() *MapReduce {
125 | file := makeInput()
126 | master := port("master")
127 | mr := MakeMapReduce(nMap, nReduce, file, master)
128 | return mr
129 | }
130 |
131 | func cleanup(mr *MapReduce) {
132 | mr.CleanupFiles()
133 | RemoveFile(mr.file)
134 | }
135 |
136 | func TestBasic(t *testing.T) {
137 | fmt.Printf("Test: Basic mapreduce ...\n")
138 | mr := setup()
139 | for i := 0; i < 2; i++ {
140 | go RunWorker(mr.MasterAddress, port("worker"+strconv.Itoa(i)),
141 | MapFunc, ReduceFunc, -1)
142 | }
143 | // Wait until MR is done
144 | <-mr.DoneChannel
145 | check(t, mr.file)
146 | checkWorker(t, mr.stats)
147 | cleanup(mr)
148 | fmt.Printf(" ... Basic Passed\n")
149 | }
150 |
151 | func TestOneFailure(t *testing.T) {
152 | fmt.Printf("Test: One Failure mapreduce ...\n")
153 | mr := setup()
154 | // Start 2 workers that fail after 10 jobs
155 | go RunWorker(mr.MasterAddress, port("worker"+strconv.Itoa(0)),
156 | MapFunc, ReduceFunc, 10)
157 | go RunWorker(mr.MasterAddress, port("worker"+strconv.Itoa(1)),
158 | MapFunc, ReduceFunc, -1)
159 | // Wait until MR is done
160 | <-mr.DoneChannel
161 | check(t, mr.file)
162 | checkWorker(t, mr.stats)
163 | cleanup(mr)
164 | fmt.Printf(" ... One Failure Passed\n")
165 | }
166 |
167 | func TestManyFailures(t *testing.T) {
168 | fmt.Printf("Test: One ManyFailures mapreduce ...\n")
169 | mr := setup()
170 | i := 0
171 | done := false
172 | for !done {
173 | select {
174 | case done = <-mr.DoneChannel:
175 | check(t, mr.file)
176 | cleanup(mr)
177 | break
178 | default:
179 | // Start 2 workers each sec. The workers fail after 10 jobs
180 | w := port("worker" + strconv.Itoa(i))
181 | go RunWorker(mr.MasterAddress, w, MapFunc, ReduceFunc, 10)
182 | i++
183 | w = port("worker" + strconv.Itoa(i))
184 | go RunWorker(mr.MasterAddress, w, MapFunc, ReduceFunc, 10)
185 | i++
186 | time.Sleep(1 * time.Second)
187 | }
188 | }
189 |
190 | fmt.Printf(" ... Many Failures Passed\n")
191 | }
192 |
--------------------------------------------------------------------------------
/hw3/src/viewservice/test_test.go:
--------------------------------------------------------------------------------
1 | package viewservice
2 |
3 | import "testing"
4 | import "runtime"
5 | import "time"
6 | import "fmt"
7 | import "os"
8 | import "strconv"
9 |
10 | func check(t *testing.T, ck *Clerk, p string, b string, n uint) {
11 | view, _ := ck.Get()
12 | if view.Primary != p {
13 | t.Fatalf("wanted primary %v, got %v", p, view.Primary)
14 | }
15 | if view.Backup != b {
16 | t.Fatalf("wanted backup %v, got %v", b, view.Backup)
17 | }
18 | if n != 0 && n != view.Viewnum {
19 | t.Fatalf("wanted viewnum %v, got %v", n, view.Viewnum)
20 | }
21 | if ck.Primary() != p {
22 | t.Fatalf("wanted primary %v, got %v", p, ck.Primary())
23 | }
24 | }
25 |
26 | func port(suffix string) string {
27 | s := "/var/tmp/824-"
28 | s += strconv.Itoa(os.Getuid()) + "/"
29 | os.Mkdir(s, 0777)
30 | s += "viewserver-"
31 | s += strconv.Itoa(os.Getpid()) + "-"
32 | s += suffix
33 | return s
34 | }
35 |
36 | func Test1(t *testing.T) {
37 | runtime.GOMAXPROCS(4)
38 |
39 | vshost := port("v")
40 | vs := StartServer(vshost)
41 |
42 | ck1 := MakeClerk(port("1"), vshost)
43 | ck2 := MakeClerk(port("2"), vshost)
44 | ck3 := MakeClerk(port("3"), vshost)
45 |
46 | //
47 |
48 | if ck1.Primary() != "" {
49 | t.Fatalf("there was a primary too soon")
50 | }
51 |
52 | // very first primary
53 | fmt.Printf("Test: First primary ...\n")
54 |
55 | for i := 0; i < DeadPings*2; i++ {
56 | view, _ := ck1.Ping(0)
57 | if view.Primary == ck1.me {
58 | break
59 | }
60 | time.Sleep(PingInterval)
61 | }
62 | check(t, ck1, ck1.me, "", 1)
63 | fmt.Printf(" ... Passed\n")
64 |
65 | // very first backup
66 | fmt.Printf("Test: First backup ...\n")
67 |
68 | {
69 | vx, _ := ck1.Get()
70 | for i := 0; i < DeadPings*2; i++ {
71 | ck1.Ping(1)
72 | view, _ := ck2.Ping(0)
73 | if view.Backup == ck2.me {
74 | break
75 | }
76 | time.Sleep(PingInterval)
77 | }
78 | check(t, ck1, ck1.me, ck2.me, vx.Viewnum+1)
79 | }
80 | fmt.Printf(" ... Passed\n")
81 |
82 | // primary dies, backup should take over
83 | fmt.Printf("Test: Backup takes over if primary fails ...\n")
84 |
85 | {
86 | ck1.Ping(2)
87 | vx, _ := ck2.Ping(2)
88 | for i := 0; i < DeadPings*2; i++ {
89 | v, _ := ck2.Ping(vx.Viewnum)
90 | if v.Primary == ck2.me && v.Backup == "" {
91 | break
92 | }
93 | time.Sleep(PingInterval)
94 | }
95 | check(t, ck2, ck2.me, "", vx.Viewnum+1)
96 | }
97 | fmt.Printf(" ... Passed\n")
98 |
99 | // revive ck1, should become backup
100 | fmt.Printf("Test: Restarted server becomes backup ...\n")
101 |
102 | {
103 | vx, _ := ck2.Get()
104 | ck2.Ping(vx.Viewnum)
105 | for i := 0; i < DeadPings*2; i++ {
106 | ck1.Ping(0)
107 | v, _ := ck2.Ping(vx.Viewnum)
108 | if v.Primary == ck2.me && v.Backup == ck1.me {
109 | break
110 | }
111 | time.Sleep(PingInterval)
112 | }
113 | check(t, ck2, ck2.me, ck1.me, vx.Viewnum+1)
114 | }
115 | fmt.Printf(" ... Passed\n")
116 |
117 | // start ck3, kill the primary (ck2), the previous backup (ck1)
118 | // should become the server, and ck3 the backup.
119 | // this should happen in a single view change, without
120 | // any period in which there's no backup.
121 | fmt.Printf("Test: Idle third server becomes backup if primary fails ...\n")
122 |
123 | {
124 | vx, _ := ck2.Get()
125 | ck2.Ping(vx.Viewnum)
126 | for i := 0; i < DeadPings*2; i++ {
127 | ck3.Ping(0)
128 | v, _ := ck1.Ping(vx.Viewnum)
129 | if v.Primary == ck1.me && v.Backup == ck3.me {
130 | break
131 | }
132 | vx = v
133 | time.Sleep(PingInterval)
134 | }
135 | check(t, ck1, ck1.me, ck3.me, vx.Viewnum+1)
136 | }
137 | fmt.Printf(" ... Passed\n")
138 |
139 | // kill and immediately restart the primary -- does viewservice
140 | // conclude primary is down even though it's pinging?
141 | fmt.Printf("Test: Restarted primary treated as dead ...\n")
142 |
143 | {
144 | vx, _ := ck1.Get()
145 | ck1.Ping(vx.Viewnum)
146 | for i := 0; i < DeadPings*2; i++ {
147 | ck1.Ping(0)
148 | ck3.Ping(vx.Viewnum)
149 | v, _ := ck3.Get()
150 | if v.Primary != ck1.me {
151 | break
152 | }
153 | time.Sleep(PingInterval)
154 | }
155 | vy, _ := ck3.Get()
156 | if vy.Primary != ck3.me {
157 | t.Fatalf("expected primary=%v, got %v\n", ck3.me, vy.Primary)
158 | }
159 | }
160 | fmt.Printf(" ... Passed\n")
161 |
162 | fmt.Printf("Test: Dead backup is removed from view ...\n")
163 |
164 | // set up a view with just 3 as primary,
165 | // to prepare for the next test.
166 | {
167 | for i := 0; i < DeadPings*3; i++ {
168 | vx, _ := ck3.Get()
169 | ck3.Ping(vx.Viewnum)
170 | time.Sleep(PingInterval)
171 | }
172 | v, _ := ck3.Get()
173 | if v.Primary != ck3.me || v.Backup != "" {
174 | t.Fatalf("wrong primary or backup")
175 | }
176 | }
177 | fmt.Printf(" ... Passed\n")
178 |
179 | // does viewserver wait for ack of previous view before
180 | // starting the next one?
181 | fmt.Printf("Test: Viewserver waits for primary to ack view ...\n")
182 |
183 | {
184 | // set up p=ck3 b=ck1, but
185 | // but do not ack
186 | vx, _ := ck1.Get()
187 | for i := 0; i < DeadPings*3; i++ {
188 | ck1.Ping(0)
189 | ck3.Ping(vx.Viewnum)
190 | v, _ := ck1.Get()
191 | if v.Viewnum > vx.Viewnum {
192 | break
193 | }
194 | time.Sleep(PingInterval)
195 | }
196 | check(t, ck1, ck3.me, ck1.me, vx.Viewnum+1)
197 | vy, _ := ck1.Get()
198 | // ck3 is the primary, but it never acked.
199 | // let ck3 die. check that ck1 is not promoted.
200 | for i := 0; i < DeadPings*3; i++ {
201 | v, _ := ck1.Ping(vy.Viewnum)
202 | if v.Viewnum > vy.Viewnum {
203 | break
204 | }
205 | time.Sleep(PingInterval)
206 | }
207 | check(t, ck2, ck3.me, ck1.me, vy.Viewnum)
208 | }
209 | fmt.Printf(" ... Passed\n")
210 |
211 | // if old servers die, check that a new (uninitialized) server
212 | // cannot take over.
213 | fmt.Printf("Test: Uninitialized server can't become primary ...\n")
214 |
215 | {
216 | for i := 0; i < DeadPings*2; i++ {
217 | v, _ := ck1.Get()
218 | ck1.Ping(v.Viewnum)
219 | ck2.Ping(0)
220 | ck3.Ping(v.Viewnum)
221 | time.Sleep(PingInterval)
222 | }
223 | for i := 0; i < DeadPings*2; i++ {
224 | ck2.Ping(0)
225 | time.Sleep(PingInterval)
226 | }
227 | vz, _ := ck2.Get()
228 | if vz.Primary == ck2.me {
229 | t.Fatalf("uninitialized backup promoted to primary")
230 | }
231 | }
232 | fmt.Printf(" ... Passed\n")
233 |
234 | vs.Kill()
235 | }
236 |
--------------------------------------------------------------------------------
/hw4/src/viewservice/test_test.go:
--------------------------------------------------------------------------------
1 | package viewservice
2 |
3 | import "testing"
4 | import "runtime"
5 | import "time"
6 | import "fmt"
7 | import "os"
8 | import "strconv"
9 |
10 | func check(t *testing.T, ck *Clerk, p string, b string, n uint) {
11 | view, _ := ck.Get()
12 | if view.Primary != p {
13 | t.Fatalf("wanted primary %v, got %v", p, view.Primary)
14 | }
15 | if view.Backup != b {
16 | t.Fatalf("wanted backup %v, got %v", b, view.Backup)
17 | }
18 | if n != 0 && n != view.Viewnum {
19 | t.Fatalf("wanted viewnum %v, got %v", n, view.Viewnum)
20 | }
21 | if ck.Primary() != p {
22 | t.Fatalf("wanted primary %v, got %v", p, ck.Primary())
23 | }
24 | }
25 |
26 | func port(suffix string) string {
27 | s := "/var/tmp/824-"
28 | s += strconv.Itoa(os.Getuid()) + "/"
29 | os.Mkdir(s, 0777)
30 | s += "viewserver-"
31 | s += strconv.Itoa(os.Getpid()) + "-"
32 | s += suffix
33 | return s
34 | }
35 |
36 | func Test1(t *testing.T) {
37 | runtime.GOMAXPROCS(4)
38 |
39 | vshost := port("v")
40 | vs := StartServer(vshost)
41 |
42 | ck1 := MakeClerk(port("1"), vshost)
43 | ck2 := MakeClerk(port("2"), vshost)
44 | ck3 := MakeClerk(port("3"), vshost)
45 |
46 | //
47 |
48 | if ck1.Primary() != "" {
49 | t.Fatalf("there was a primary too soon")
50 | }
51 |
52 | // very first primary
53 | fmt.Printf("Test: First primary ...\n")
54 |
55 | for i := 0; i < DeadPings*2; i++ {
56 | view, _ := ck1.Ping(0)
57 | if view.Primary == ck1.me {
58 | break
59 | }
60 | time.Sleep(PingInterval)
61 | }
62 | check(t, ck1, ck1.me, "", 1)
63 | fmt.Printf(" ... Passed\n")
64 |
65 | // very first backup
66 | fmt.Printf("Test: First backup ...\n")
67 |
68 | {
69 | vx, _ := ck1.Get()
70 | for i := 0; i < DeadPings*2; i++ {
71 | ck1.Ping(1)
72 | view, _ := ck2.Ping(0)
73 | if view.Backup == ck2.me {
74 | break
75 | }
76 | time.Sleep(PingInterval)
77 | }
78 | check(t, ck1, ck1.me, ck2.me, vx.Viewnum+1)
79 | }
80 | fmt.Printf(" ... Passed\n")
81 |
82 | // primary dies, backup should take over
83 | fmt.Printf("Test: Backup takes over if primary fails ...\n")
84 |
85 | {
86 | ck1.Ping(2)
87 | vx, _ := ck2.Ping(2)
88 | for i := 0; i < DeadPings*2; i++ {
89 | v, _ := ck2.Ping(vx.Viewnum)
90 | if v.Primary == ck2.me && v.Backup == "" {
91 | break
92 | }
93 | time.Sleep(PingInterval)
94 | }
95 | check(t, ck2, ck2.me, "", vx.Viewnum+1)
96 | }
97 | fmt.Printf(" ... Passed\n")
98 |
99 | // revive ck1, should become backup
100 | fmt.Printf("Test: Restarted server becomes backup ...\n")
101 |
102 | {
103 | vx, _ := ck2.Get()
104 | ck2.Ping(vx.Viewnum)
105 | for i := 0; i < DeadPings*2; i++ {
106 | ck1.Ping(0)
107 | v, _ := ck2.Ping(vx.Viewnum)
108 | if v.Primary == ck2.me && v.Backup == ck1.me {
109 | break
110 | }
111 | time.Sleep(PingInterval)
112 | }
113 | check(t, ck2, ck2.me, ck1.me, vx.Viewnum+1)
114 | }
115 | fmt.Printf(" ... Passed\n")
116 |
117 | // start ck3, kill the primary (ck2), the previous backup (ck1)
118 | // should become the server, and ck3 the backup.
119 | // this should happen in a single view change, without
120 | // any period in which there's no backup.
121 | fmt.Printf("Test: Idle third server becomes backup if primary fails ...\n")
122 |
123 | {
124 | vx, _ := ck2.Get()
125 | ck2.Ping(vx.Viewnum)
126 | for i := 0; i < DeadPings*2; i++ {
127 | ck3.Ping(0)
128 | v, _ := ck1.Ping(vx.Viewnum)
129 | if v.Primary == ck1.me && v.Backup == ck3.me {
130 | break
131 | }
132 | vx = v
133 | time.Sleep(PingInterval)
134 | }
135 | check(t, ck1, ck1.me, ck3.me, vx.Viewnum+1)
136 | }
137 | fmt.Printf(" ... Passed\n")
138 |
139 | // kill and immediately restart the primary -- does viewservice
140 | // conclude primary is down even though it's pinging?
141 | fmt.Printf("Test: Restarted primary treated as dead ...\n")
142 |
143 | {
144 | vx, _ := ck1.Get()
145 | ck1.Ping(vx.Viewnum)
146 | for i := 0; i < DeadPings*2; i++ {
147 | ck1.Ping(0)
148 | ck3.Ping(vx.Viewnum)
149 | v, _ := ck3.Get()
150 | if v.Primary != ck1.me {
151 | break
152 | }
153 | time.Sleep(PingInterval)
154 | }
155 | vy, _ := ck3.Get()
156 | if vy.Primary != ck3.me {
157 | t.Fatalf("expected primary=%v, got %v\n", ck3.me, vy.Primary)
158 | }
159 | }
160 | fmt.Printf(" ... Passed\n")
161 |
162 | fmt.Printf("Test: Dead backup is removed from view ...\n")
163 |
164 | // set up a view with just 3 as primary,
165 | // to prepare for the next test.
166 | {
167 | for i := 0; i < DeadPings*3; i++ {
168 | vx, _ := ck3.Get()
169 | ck3.Ping(vx.Viewnum)
170 | time.Sleep(PingInterval)
171 | }
172 | v, _ := ck3.Get()
173 | if v.Primary != ck3.me || v.Backup != "" {
174 | t.Fatalf("wrong primary or backup")
175 | }
176 | }
177 | fmt.Printf(" ... Passed\n")
178 |
179 | // does viewserver wait for ack of previous view before
180 | // starting the next one?
181 | fmt.Printf("Test: Viewserver waits for primary to ack view ...\n")
182 |
183 | {
184 | // set up p=ck3 b=ck1, but
185 | // but do not ack
186 | vx, _ := ck1.Get()
187 | for i := 0; i < DeadPings*3; i++ {
188 | ck1.Ping(0)
189 | ck3.Ping(vx.Viewnum)
190 | v, _ := ck1.Get()
191 | if v.Viewnum > vx.Viewnum {
192 | break
193 | }
194 | time.Sleep(PingInterval)
195 | }
196 | check(t, ck1, ck3.me, ck1.me, vx.Viewnum+1)
197 | vy, _ := ck1.Get()
198 | // ck3 is the primary, but it never acked.
199 | // let ck3 die. check that ck1 is not promoted.
200 | for i := 0; i < DeadPings*3; i++ {
201 | v, _ := ck1.Ping(vy.Viewnum)
202 | if v.Viewnum > vy.Viewnum {
203 | break
204 | }
205 | time.Sleep(PingInterval)
206 | }
207 | check(t, ck2, ck3.me, ck1.me, vy.Viewnum)
208 | }
209 | fmt.Printf(" ... Passed\n")
210 |
211 | // if old servers die, check that a new (uninitialized) server
212 | // cannot take over.
213 | fmt.Printf("Test: Uninitialized server can't become primary ...\n")
214 |
215 | {
216 | for i := 0; i < DeadPings*2; i++ {
217 | v, _ := ck1.Get()
218 | ck1.Ping(v.Viewnum)
219 | ck2.Ping(0)
220 | ck3.Ping(v.Viewnum)
221 | time.Sleep(PingInterval)
222 | }
223 | for i := 0; i < DeadPings*2; i++ {
224 | ck2.Ping(0)
225 | time.Sleep(PingInterval)
226 | }
227 | vz, _ := ck2.Get()
228 | if vz.Primary == ck2.me {
229 | t.Fatalf("uninitialized backup promoted to primary")
230 | }
231 | }
232 | fmt.Printf(" ... Passed\n")
233 |
234 | vs.Kill()
235 | }
236 |
--------------------------------------------------------------------------------
/hw4/src/kvpaxos/server.go:
--------------------------------------------------------------------------------
1 | package kvpaxos
2 |
3 | import (
4 | "errors"
5 | "net"
6 | "reflect"
7 | "time"
8 | )
9 | import "fmt"
10 | import "net/rpc"
11 | import "log"
12 | import "paxos"
13 | import "sync"
14 | import "sync/atomic"
15 | import "os"
16 | import "syscall"
17 | import "encoding/gob"
18 | import "math/rand"
19 |
20 |
21 | const Debug = 1
22 |
23 | func DPrintf(format string, a ...interface{}) (n int, err error) {
24 | if Debug > 0 {
25 | log.Printf(format, a...)
26 | }
27 | return
28 | }
29 |
30 |
31 | const (
32 | Put = "Put"
33 | Append = "Append"
34 | Get = "Get"
35 | )
36 |
37 | type Op struct {
38 | // Put, Get, Append
39 | Operation string
40 | Args interface{}
41 | }
42 |
43 | type KVPaxos struct {
44 | mu sync.Mutex
45 | l net.Listener
46 | me int
47 | dead int32 // for testing
48 | unreliable int32 // for testing
49 | px *paxos.Paxos
50 |
51 | lastApply int
52 | database map[string]string
53 | maxClientSeq map[int64]int
54 | }
55 |
56 |
57 | func (kv *KVPaxos) Apply(op Op) {
58 | if op.Operation == Get {
59 | args := op.Args.(GetArgs)
60 | if args.Seq > kv.maxClientSeq[args.ClientID] {
61 | kv.maxClientSeq[args.ClientID] = args.Seq
62 | }
63 | } else if op.Operation == Put {
64 | args := op.Args.(PutAppendArgs)
65 | kv.database[args.Key] = args.Value
66 | if args.Seq > kv.maxClientSeq[args.ClientID] {
67 | kv.maxClientSeq[args.ClientID] = args.Seq
68 | }
69 | } else if op.Operation == Append {
70 | args := op.Args.(PutAppendArgs)
71 | value, ok := kv.database[args.Key]
72 | if !ok {
73 | value = ""
74 | }
75 | kv.database[args.Key] = value + args.Value
76 | if args.Seq > kv.maxClientSeq[args.ClientID] {
77 | kv.maxClientSeq[args.ClientID] = args.Seq
78 | }
79 | }
80 | }
81 |
82 | func (kv *KVPaxos) Wait(seq int) (Op, error) {
83 | sleepTime := 10 * time.Microsecond
84 | for iters := 0; iters < 15; iters ++ {
85 | decided, op := kv.px.Status(seq)
86 | if decided == paxos.Decided {
87 | return op.(Op), nil
88 | }
89 | // as we correctly do `done()` forgetten one should not be shown
90 | //else if decided == paxos.Forgotten {
91 | // break
92 | //}
93 | time.Sleep(sleepTime)
94 | if sleepTime < 10 * time.Second {
95 | sleepTime *= 2
96 | }
97 | }
98 | return Op{}, errors.New("Wait for too long")
99 | }
100 |
101 | func (kv *KVPaxos) Propose(xop Op) error {
102 | for {
103 | kv.px.Start(kv.lastApply + 1, xop)
104 | op, err := kv.Wait(kv.lastApply + 1)
105 | if err != nil {
106 | return err
107 | }
108 | kv.Apply(op)
109 | kv.lastApply += 1
110 |
111 | if reflect.DeepEqual(op, xop) {
112 | break
113 | }
114 | // do this everytime lastApply +1 -> to prevent any possible mem overflow possibilities
115 | kv.px.Done(kv.lastApply)
116 | }
117 | kv.px.Done(kv.lastApply)
118 | return nil
119 | }
120 |
121 | func (kv *KVPaxos) Get(args *GetArgs, reply *GetReply) error {
122 | // Your code here.
123 | kv.mu.Lock()
124 | defer kv.mu.Unlock()
125 | if args.Seq <= kv.maxClientSeq[args.ClientID] {
126 | reply.Err = OK
127 | reply.Value = kv.database[args.Key]
128 | return nil
129 | }
130 | op := Op{Operation: "Get", Args: *args}
131 | err := kv.Propose(op)
132 | if err != nil {
133 | return err
134 | }
135 |
136 | value, ok := kv.database[args.Key]
137 | if !ok {
138 | reply.Err = ErrNoKey
139 | reply.Value = ""
140 | } else {
141 | reply.Err = OK
142 | reply.Value = value
143 | }
144 | return nil
145 |
146 | }
147 |
148 | func (kv *KVPaxos) PutAppend(args *PutAppendArgs, reply *PutAppendReply) error {
149 | // Your code here.
150 | kv.mu.Lock()
151 | defer kv.mu.Unlock()
152 | if args.Seq <= kv.maxClientSeq[args.ClientID] {
153 | reply.Err = OK
154 | return nil
155 | }
156 |
157 | op := Op{Args: *args, Operation: args.Op}
158 | err := kv.Propose(op)
159 | if err != nil {
160 | return err
161 | }
162 | reply.Err = OK
163 | return nil
164 | }
165 |
166 | // tell the server to shut itself down.
167 | // please do not change these two functions.
168 | func (kv *KVPaxos) kill() {
169 | //DPrintf("Kill(%d): die\n", kv.me)
170 | atomic.StoreInt32(&kv.dead, 1)
171 | kv.l.Close()
172 | kv.px.Kill()
173 | }
174 |
175 | // call this to find out if the server is dead.
176 | func (kv *KVPaxos) isdead() bool {
177 | return atomic.LoadInt32(&kv.dead) != 0
178 | }
179 |
180 | // please do not change these two functions.
181 | func (kv *KVPaxos) setunreliable(what bool) {
182 | if what {
183 | atomic.StoreInt32(&kv.unreliable, 1)
184 | } else {
185 | atomic.StoreInt32(&kv.unreliable, 0)
186 | }
187 | }
188 |
189 | func (kv *KVPaxos) isunreliable() bool {
190 | return atomic.LoadInt32(&kv.unreliable) != 0
191 | }
192 |
193 | //
194 | // servers[] contains the ports of the set of
195 | // servers that will cooperate via Paxos to
196 | // form the fault-tolerant key/value service.
197 | // me is the index of the current server in servers[].
198 | //
199 | func StartServer(servers []string, me int) *KVPaxos {
200 | // call gob.Register on structures you want
201 | // Go's RPC library to marshall/unmarshall.
202 | gob.Register(Op{})
203 | gob.Register(GetArgs{})
204 | gob.Register(PutAppendArgs{})
205 |
206 | kv := new(KVPaxos)
207 | kv.me = me
208 |
209 | // Your initialization code here.
210 | kv.database = make(map[string]string)
211 | kv.maxClientSeq = make(map[int64]int)
212 |
213 | rpcs := rpc.NewServer()
214 | rpcs.Register(kv)
215 |
216 | kv.px = paxos.Make(servers, me, rpcs)
217 |
218 | os.Remove(servers[me])
219 | l, e := net.Listen("unix", servers[me])
220 | if e != nil {
221 | log.Fatal("listen error: ", e)
222 | }
223 | kv.l = l
224 |
225 |
226 | // please do not change any of the following code,
227 | // or do anything to subvert it.
228 |
229 | go func() {
230 | for kv.isdead() == false {
231 | conn, err := kv.l.Accept()
232 | if err == nil && kv.isdead() == false {
233 | if kv.isunreliable() && (rand.Int63()%1000) < 100 {
234 | // discard the request.
235 | conn.Close()
236 | } else if kv.isunreliable() && (rand.Int63()%1000) < 200 {
237 | // process the request but force discard of reply.
238 | c1 := conn.(*net.UnixConn)
239 | f, _ := c1.File()
240 | err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR)
241 | if err != nil {
242 | fmt.Printf("shutdown: %v\n", err)
243 | }
244 | go rpcs.ServeConn(conn)
245 | } else {
246 | go rpcs.ServeConn(conn)
247 | }
248 | } else if err == nil {
249 | conn.Close()
250 | }
251 | if err != nil && kv.isdead() == false {
252 | fmt.Printf("KVPaxos(%v) accept: %v\n", me, err.Error())
253 | kv.kill()
254 | }
255 | }
256 | }()
257 |
258 | return kv
259 | }
260 |
--------------------------------------------------------------------------------
/hw2/src/viewservice/test_test.go:
--------------------------------------------------------------------------------
1 | package viewservice
2 |
3 | import (
4 | "log"
5 | "testing"
6 | )
7 | import "runtime"
8 | import "time"
9 | import "fmt"
10 | import "os"
11 | import "strconv"
12 |
13 | func check(t *testing.T, ck *Clerk, p string, b string, n uint) {
14 | view, _ := ck.Get()
15 | log.Printf("p=%v, b=%v, n=%v", view.Primary, view.Backup, view.Viewnum)
16 |
17 | if view.Primary != p {
18 | t.Fatalf("wanted primary %v, got %v", p, view.Primary)
19 | }
20 | if view.Backup != b {
21 | t.Fatalf("wanted backup %v, got %v", b, view.Backup)
22 | }
23 | if n != 0 && n != view.Viewnum {
24 | t.Fatalf("wanted viewnum %v, got %v", n, view.Viewnum)
25 | }
26 | if ck.Primary() != p {
27 | t.Fatalf("wanted primary %v, got %v", p, ck.Primary())
28 | }
29 | }
30 |
31 | func port(suffix string) string {
32 | s := "/var/tmp/824-"
33 | s += strconv.Itoa(os.Getuid()) + "/"
34 | os.Mkdir(s, 0777)
35 | s += "viewserver-"
36 | s += strconv.Itoa(os.Getpid()) + "-"
37 | s += suffix
38 | return s
39 | }
40 |
41 | func Test1(t *testing.T) {
42 | runtime.GOMAXPROCS(4)
43 |
44 | vshost := port("v")
45 | vs := StartServer(vshost)
46 |
47 | ck1 := MakeClerk(port("1"), vshost)
48 | ck2 := MakeClerk(port("2"), vshost)
49 | ck3 := MakeClerk(port("3"), vshost)
50 |
51 | //
52 |
53 | if ck1.Primary() != "" {
54 | t.Fatalf("there was a primary too soon")
55 | }
56 |
57 | // very first primary
58 | fmt.Printf("Test: First primary ...\n")
59 |
60 | for i := 0; i < DeadPings*2; i++ {
61 | view, _ := ck1.Ping(0)
62 | if view.Primary == ck1.me {
63 | break
64 | }
65 | time.Sleep(PingInterval)
66 | }
67 | check(t, ck1, ck1.me, "", 1)
68 | fmt.Printf(" ... Passed\n")
69 |
70 | // very first backup
71 | fmt.Printf("Test: First backup ...\n")
72 |
73 | {
74 | vx, _ := ck1.Get()
75 | for i := 0; i < DeadPings*2; i++ {
76 | ck1.Ping(1)
77 | view, _ := ck2.Ping(0)
78 | if view.Backup == ck2.me {
79 | break
80 | }
81 | time.Sleep(PingInterval)
82 | }
83 | check(t, ck1, ck1.me, ck2.me, vx.Viewnum+1)
84 | }
85 | fmt.Printf(" ... Passed\n")
86 |
87 | // primary dies, backup should take over
88 | fmt.Printf("Test: Backup takes over if primary fails ...\n")
89 |
90 | {
91 | ck1.Ping(2) // view bound from 1 to 2
92 | vx, _ := ck2.Ping(2) // {2, ck1, ck2}
93 | for i := 0; i < DeadPings*2; i++ {
94 | v, _ := ck2.Ping(vx.Viewnum)
95 | if v.Primary == ck2.me && v.Backup == "" {
96 | break
97 | }
98 | time.Sleep(PingInterval)
99 | }
100 | check(t, ck2, ck2.me, "", vx.Viewnum+1)
101 | }
102 | fmt.Printf(" ... Passed\n")
103 |
104 | // revive ck1, should become backup
105 | fmt.Printf("Test: Restarted server becomes backup ...\n")
106 |
107 | {
108 | vx, _ := ck2.Get() // vx.viewnum = 3
109 | ck2.Ping(vx.Viewnum) // ck2 is the primary (so viewBound=3 after that)
110 | for i := 0; i < DeadPings*2; i++ {
111 | ck1.Ping(0) // ck1 is the restarted server. do `assignRole()`
112 | v, _ := ck2.Ping(vx.Viewnum)
113 | if v.Primary == ck2.me && v.Backup == ck1.me {
114 | break
115 | }
116 | time.Sleep(PingInterval)
117 | }
118 | check(t, ck2, ck2.me, ck1.me, vx.Viewnum+1)
119 | }
120 | fmt.Printf(" ... Passed\n")
121 |
122 | // start ck3, kill the primary (ck2), the previous backup (ck1)
123 | // should become the server, and ck3 the backup.
124 | // this should happen in a single view change, without
125 | // any period in which there's no backup.
126 | fmt.Printf("Test: Idle third server becomes backup if primary fails ...\n")
127 |
128 | {
129 | vx, _ := ck2.Get() // vx = {4, p=ck2, b=ck1}
130 | ck2.Ping(vx.Viewnum)
131 | for i := 0; i < DeadPings*2; i++ {
132 | ck3.Ping(0) // new server is added!
133 | v, _ := ck1.Ping(vx.Viewnum)
134 | if v.Primary == ck1.me && v.Backup == ck3.me {
135 | break
136 | }
137 | vx = v
138 | time.Sleep(PingInterval)
139 | }
140 | check(t, ck1, ck1.me, ck3.me, vx.Viewnum+1)
141 | }
142 | fmt.Printf(" ... Passed\n")
143 |
144 | // kill and immediately restart the primary -- does viewservice
145 | // conclude primary is down even though it's pinging?
146 | // Adrian: yes! it is dead as I saw its ping is zero
147 | fmt.Printf("Test: Restarted primary treated as dead ...\n")
148 |
149 | {
150 | vx, _ := ck1.Get()
151 | ck1.Ping(vx.Viewnum) // vx = {5, p=ck1, b=ck3}
152 | for i := 0; i < DeadPings*2; i++ {
153 | ck1.Ping(0) // by force do the `replace()`
154 | ck3.Ping(vx.Viewnum)
155 | v, _ := ck3.Get()
156 | if v.Primary != ck1.me {
157 | break
158 | }
159 | time.Sleep(PingInterval)
160 | }
161 | vy, _ := ck3.Get() // for my case it is vy={6, p=ck3, b=""}
162 | if vy.Primary != ck3.me {
163 | t.Fatalf("expected primary=%v, got %v\n", ck3.me, vy.Primary)
164 | }
165 | }
166 | fmt.Printf(" ... Passed\n")
167 |
168 | fmt.Printf("Test: Dead backup is removed from view ...\n") // i don't think this make sense
169 | // it should be `checking backup is now promoted to be the primary`
170 |
171 | // set up a view with just 3 as primary,
172 | // to prepare for the next test.
173 | {
174 | for i := 0; i < DeadPings*3; i++ {
175 | vx, _ := ck3.Get() // vx = {6, p=ck3, b=""}
176 | ck3.Ping(vx.Viewnum) // now viewBound will be 6
177 | time.Sleep(PingInterval)
178 | }
179 | v, _ := ck3.Get()
180 | if v.Primary != ck3.me || v.Backup != "" {
181 | t.Fatalf("wrong primary or backup")
182 | }
183 | }
184 | fmt.Printf(" ... Passed\n")
185 | // to here, vx.viewnum is still 6. Nothin changed
186 |
187 | // does viewserver wait for ack of previous view before
188 | // starting the next one?
189 | fmt.Printf("Test: Viewserver waits for primary to ack view ...\n")
190 |
191 | {
192 | // set up p=ck3 b=ck1, but
193 | // but do not ack
194 | vx, _ := ck1.Get() // vx = {6, ck3, _} -> ck1 is not even in the set!!
195 | for i := 0; i < DeadPings*3; i++ {
196 | ck1.Ping(0) // now ck1 ias assignRole(). v is changed to 6 -> 7 {ck3, ck1}
197 | ck3.Ping(vx.Viewnum)
198 | v, _ := ck1.Get() // v = 7, {ck3, ck1}
199 | if v.Viewnum > vx.Viewnum { // 7 > 6
200 | break
201 | }
202 | time.Sleep(PingInterval)
203 | }
204 | check(t, ck1, ck3.me, ck1.me, vx.Viewnum+1)
205 |
206 | vy, _ := ck1.Get() // vy = 7, {ck3, ck1}
207 | // ck3 is the primary, but it never acked.
208 | // let ck3 die. check that ck1 is not promoted. (IMPORTANT)
209 | for i := 0; i < DeadPings*3; i++ {
210 | v, _ := ck1.Ping(vy.Viewnum)
211 | //log.Printf("v: %v, %v", vy.Viewnum, v.Viewnum)
212 | if v.Viewnum > vy.Viewnum { // e.g., v = 8, 8 > 7
213 | break // should not happen!
214 | }
215 | time.Sleep(PingInterval)
216 | }
217 | // we will NOT break the for loop.
218 | check(t, ck2, ck3.me, ck1.me, vy.Viewnum) // view doesn't change
219 | }
220 | fmt.Printf(" ... Passed\n")
221 |
222 | // if old servers die, check that a new (uninitialized) server
223 | // cannot take over.
224 | // Adrian: what is an `uninitialized server`?
225 | fmt.Printf("Test: Uninitialized server can't become primary ...\n")
226 |
227 | {
228 | for i := 0; i < DeadPings*2; i++ {
229 | v, _ := ck1.Get() // v = {7, ck3, ck1}
230 | ck1.Ping(v.Viewnum) // viewBound is already 7
231 | ck2.Ping(0) // a new server joined -> idle
232 | ck3.Ping(v.Viewnum) // primary ack! finally. viewnum = 6 -> 7 now.
233 | time.Sleep(PingInterval)
234 | } // v = {8, ck3, ck2}
235 | //v, _ := ck1.Get()
236 | //log.Printf(v.Backup)
237 |
238 | // wait for ck1, ck3 be dead...
239 | // who dies first? does it matter?
240 | // ck1.Ping(7) // UNCOMMENT this line will make ck1 dies later than ck3 as it has just pinged
241 | for i := 0; i < DeadPings*2; i++ {
242 | ck2.Ping(0)
243 | time.Sleep(PingInterval)
244 | }
245 | // v = {7, ck3, ck1}. viewnum=7. so we can proceed view by 1.
246 | // then if case 1. ck1 dies first
247 | // so we got v = {8, ck3, ck2}
248 | // then we know that as viewBound is 7. so we cannot change view anymore
249 | // now that ck3 dies. -> we want to be {9, ck2, _}
250 | // but we cannot do that according to ack rule. So we are keeping {8, ck3, ck2}
251 |
252 | // else if case 2. ck3 dies first
253 | // so we got v = {8, ck1, ck2}
254 | // then ck1 dies -> we want to change to {9, ck2, _} but in vein. So we got {8, ck1, ck2}
255 | vz, _ := ck2.Get() // vz = {8, ck3, ck2}
256 | if vz.Primary == ck2.me {
257 | t.Fatalf("uninitialized backup cannot be promote to primary")
258 | }
259 | }
260 | fmt.Printf(" ... Passed\n")
261 |
262 | vs.Kill()
263 | }
264 |
--------------------------------------------------------------------------------
/hw2/src/viewservice/server.go:
--------------------------------------------------------------------------------
1 | package viewservice
2 |
3 | import "net"
4 | import "net/rpc"
5 | import "log"
6 | import "time"
7 | import "sync"
8 | import "fmt"
9 | import "os"
10 | import "sync/atomic"
11 |
12 | type ViewServer struct {
13 | mu sync.Mutex
14 | l net.Listener
15 | dead int32 // for testing
16 | rpccount int32 // for testing
17 | me string
18 |
19 | // Your declarations here.
20 |
21 | // Hint #2: add field(s) to ViewServer to keep track of the current view.
22 | currview *View
23 | recentHeard map[string] time.Time
24 | // A read/write mutex allows all the readers to access
25 | // the map at the same time, but a writer will lock out everyone else.
26 | rwm sync.RWMutex
27 | // Hint #3:
28 | // keep track of whether the primary for the current view has acked the latest view X
29 | // viewBound = 7 means that the current primary has acked the view 7. **And the way
30 | // it did the ACK is by sending an Ping(7)**
31 | viewBound uint // last value view X of the primary Ping(X)
32 | idleServers map[string] bool
33 | }
34 |
35 | //
36 | // server Ping RPC handler.
37 | //
38 | func (vs *ViewServer) Ping(args *PingArgs, reply *PingReply) error {
39 |
40 | // Your code here.
41 |
42 | // why do we need lock here?
43 | // even though the test didn't specified, but we should know that Ping()
44 | // can be called concurrently by many threads -> may cause concurrent writes on `recentHeard`
45 | vs.rwm.Lock()
46 | defer vs.rwm.Unlock()
47 |
48 | // Hint #1: you'll want to add field(s) to ViewServer in server.go
49 | // in order to keep track of the most recent time at which
50 | // the viewservice has heard a Ping from each server.
51 | vs.recentHeard[args.Me] = time.Now()
52 |
53 | // init, Ping(0) from ck1. only do this one time when vs get bootstrapped
54 | if vs.currview == nil {
55 | vs.viewBound = args.Viewnum // X is now 0
56 | vs.currview = &View{0, "", ""}
57 | // ps. as it now received a Ping(0) from primary => can proceed to Viewnum = 1
58 | }
59 |
60 | if args.Me == vs.currview.Primary {
61 | // deal with the ACK from the primary
62 | // if the incoming Ping(X'): its X' is larger than our view bound X
63 | // e.g., in the test case #2: Ping(1) from ck1: then 0 < 1
64 | // received a Ping(1) from the primary ck1 => can later proceed to Viewnum = 2
65 |
66 | // viewBound increases means that some new view was ack-ed by the primary
67 | // and the vs realized that now
68 | if vs.viewBound < args.Viewnum {
69 | vs.viewBound = args.Viewnum
70 | }
71 | // Hint #6: the viewservice needs a way to detect that
72 | // a primary or backup has failed and re-started.
73 | // Therefore, we set that when a server re-starts after a crash,
74 | // it should send one or more Pings with an argument of zero to
75 | // inform the view service that it crashed.
76 | if args.Viewnum == 0 {
77 | vs.replace(args.Me) // force replace
78 | }
79 | } else if args.Me == vs.currview.Backup {
80 | // same as above.
81 | if args.Viewnum == 0 { // just got crashed and restarted
82 | vs.replace(args.Me) // force replace
83 | }
84 | } else {
85 | // an idle server comes in. put into the waitlist
86 | vs.assignRole(args.Me)
87 | }
88 |
89 | reply.View = *vs.currview
90 | return nil
91 | }
92 |
93 | //
94 | // server Get() RPC handler.
95 | //
96 | func (vs *ViewServer) Get(args *GetArgs, reply *GetReply) error {
97 |
98 | // Your code here.
99 | // the clerk can ask for the latest view from the view service
100 | // without doing Ping(). it uses Get() to fetch the latest view.
101 | vs.rwm.Lock()
102 | defer vs.rwm.Unlock()
103 |
104 | if vs.currview != nil {
105 | reply.View = *vs.currview
106 | }
107 | return nil
108 | }
109 |
110 | // edited by Adrian
111 | // backed up by idle server
112 | func (vs *ViewServer) backupByIdleSrv() {
113 | // only when idleServers exists will the backup be filled in
114 | if len(vs.idleServers) > 0 {
115 | // pick either one of them
116 | for key, _ := range vs.idleServers {
117 | vs.currview.Backup = key // backup will be set
118 | delete(vs.idleServers, key) // to keep the size of map
119 | break
120 | }
121 | }
122 | }
123 |
124 | // edited by Adrian
125 | func (vs *ViewServer) replace(k string) {
126 | // IMPORTANT!
127 | // the view service may NOT proceed from view X to view X + 1
128 | // if it has not received a Ping(X) from the primary of the view X
129 |
130 | // vs.viewBound is the latest view number of which the current primary
131 | // has already send back an ack to the view service
132 | // e.g., viewBound = 6 means that the current primary ck_i has sent a
133 | // Ping(6) to the view service successfully. the View {p=cki, b=_, n=6} is acked
134 |
135 | // if current view's Viewnum in view service = 6, then 6 + 1 > 6
136 | // so you can do the vs.replace(k) and the Viewnum of vs will be 7
137 | // however, if current view's Viewnum in the vs = 7, then 6 + 1 > 7 doesn't hold
138 | // so you CANNOT do the replacement even though many rounds of tick() may have passed
139 |
140 | // X = 6, X+1 = 7:
141 | // the vs CANNOT proceed from view 7 to view 8 as it has not received a Ping(7)
142 | // from the primary of the view X. the current viewBound is still 7
143 | // see testcase: `Viewserver waits for primary to ack view`
144 | // if the current viewnum is 7
145 | // 6 + 1 > 7? NO! so you cannot proceed. skip this function.
146 | if vs.viewBound + 1 > vs.currview.Viewnum {
147 |
148 | if k == vs.currview.Primary {
149 | // if k is the current primary -> remove this primary
150 | vs.currview.Primary = vs.currview.Backup
151 | vs.currview.Backup = ""
152 | vs.backupByIdleSrv()
153 | vs.currview.Viewnum += 1
154 | } else if k == vs.currview.Backup {
155 | // if k is the current backup -> remove this backup
156 | vs.currview.Backup = ""
157 | vs.backupByIdleSrv()
158 | vs.currview.Viewnum += 1
159 | } // if k is neither of both -> we don't do anything
160 | } else {
161 | //log.Printf("cannot change view: current view not yet acked by primary:\n" +
162 | // "viewBound=%v, vs.currview.Viewnum=%v", vs.viewBound, vs.currview.Viewnum)
163 | }
164 | }
165 |
166 | // edited by Adrian
167 | func (vs *ViewServer) assignRole(me string) {
168 |
169 | // ack rule: same idea as the `replace()` function
170 | if vs.viewBound + 1 > vs.currview.Viewnum {
171 | // the current ping is from an arbitrary server (not primary, nor backup)
172 | // new server has joined! what job should it do? primary? backup? or idle?
173 | if vs.currview.Primary == "" {
174 | vs.currview.Primary = me
175 | vs.currview.Viewnum += 1
176 | } else if vs.currview.Backup == "" {
177 | vs.currview.Backup = me
178 | vs.currview.Viewnum += 1
179 | } else {
180 | vs.idleServers[me] = true
181 | // do not add the viewnum
182 | }
183 | } else {
184 | //log.Printf("cannot change view: current view not yet acked by primary:\n " +
185 | //"viewBound=%v, vs.currview.Viewnum=%v", vs.viewBound, vs.currview.Viewnum)
186 | }
187 | }
188 |
189 | //
190 | // tick() is called once per PingInterval; it should notice
191 | // if servers have died or recovered, and change the view
192 | // accordingly.
193 | //
194 | func (vs *ViewServer) tick() {
195 | // Your code here.
196 |
197 | // Hint #4: your viewservice needs to make periodic decisions,
198 | // for example to promote the backup if the viewservice has missed
199 | // DeadPings pings from the primary.
200 | vs.rwm.Lock()
201 | defer vs.rwm.Unlock()
202 | for k, v := range vs.recentHeard {
203 | // if current time time.Now() > (recentHeard time + some timeout)
204 | // then we need to replace this server `k`
205 | if time.Now().After(v.Add(DeadPings * PingInterval)) {
206 | vs.replace(k)
207 | }
208 | }
209 | }
210 |
211 | //
212 | // tell the server to shut itself down.
213 | // for testing.
214 | // please don't change these two functions.
215 | //
216 | func (vs *ViewServer) Kill() {
217 | atomic.StoreInt32(&vs.dead, 1)
218 | vs.l.Close()
219 | }
220 |
221 | //
222 | // has this server been asked to shut down?
223 | //
224 | func (vs *ViewServer) isdead() bool {
225 | return atomic.LoadInt32(&vs.dead) != 0
226 | }
227 |
228 | // please don't change this function.
229 | func (vs *ViewServer) GetRPCCount() int32 {
230 | return atomic.LoadInt32(&vs.rpccount)
231 | }
232 |
233 | func StartServer(me string) *ViewServer {
234 | vs := new(ViewServer)
235 | vs.me = me
236 | // Your vs.* initializations here.
237 | vs.currview = nil
238 | vs.recentHeard = make(map[string]time.Time)
239 | vs.viewBound = 0
240 | vs.idleServers = make(map[string]bool)
241 |
242 | // tell net/rpc about our RPC server and handlers.
243 | rpcs := rpc.NewServer()
244 | rpcs.Register(vs)
245 |
246 | // prepare to receive connections from clients.
247 | // change "unix" to "tcp" to use over a network.
248 | os.Remove(vs.me) // only needed for "unix"
249 | l, e := net.Listen("unix", vs.me)
250 | if e != nil {
251 | log.Fatal("listen error: ", e)
252 | }
253 | vs.l = l
254 |
255 | // please don't change any of the following code,
256 | // or do anything to subvert it.
257 |
258 | // create a thread to accept RPC connections from clients.
259 | go func() {
260 | for vs.isdead() == false {
261 | conn, err := vs.l.Accept()
262 | if err == nil && vs.isdead() == false {
263 | atomic.AddInt32(&vs.rpccount, 1)
264 | go rpcs.ServeConn(conn)
265 | } else if err == nil {
266 | conn.Close()
267 | }
268 | if err != nil && vs.isdead() == false {
269 | fmt.Printf("ViewServer(%v) accept: %v\n", me, err.Error())
270 | vs.Kill()
271 | }
272 | }
273 | }()
274 |
275 | // create a thread to call tick() periodically.
276 | go func() {
277 | for vs.isdead() == false {
278 | vs.tick()
279 | time.Sleep(PingInterval)
280 | }
281 | }()
282 |
283 | return vs
284 | }
285 |
--------------------------------------------------------------------------------
/hw4/src/shardmaster/test_test.go:
--------------------------------------------------------------------------------
1 | package shardmaster
2 |
3 | import "testing"
4 | import "runtime"
5 | import "strconv"
6 | import "os"
7 |
8 | // import "time"
9 | import "fmt"
10 | import "math/rand"
11 |
12 | func port(tag string, host int) string {
13 | s := "/var/tmp/824-"
14 | s += strconv.Itoa(os.Getuid()) + "/"
15 | os.Mkdir(s, 0777)
16 | s += "sm-"
17 | s += strconv.Itoa(os.Getpid()) + "-"
18 | s += tag + "-"
19 | s += strconv.Itoa(host)
20 | return s
21 | }
22 |
23 | func cleanup(sma []*ShardMaster) {
24 | for i := 0; i < len(sma); i++ {
25 | if sma[i] != nil {
26 | sma[i].Kill()
27 | }
28 | }
29 | }
30 |
31 | //
32 | // maybe should take a cka[] and find the server with
33 | // the highest Num.
34 | //
35 | func check(t *testing.T, groups []int64, ck *Clerk) {
36 | c := ck.Query(-1)
37 | if len(c.Groups) != len(groups) {
38 | t.Fatalf("wanted %v groups, got %v", len(groups), len(c.Groups))
39 | }
40 |
41 | // are the groups as expected?
42 | for _, g := range groups {
43 | _, ok := c.Groups[g]
44 | if ok != true {
45 | t.Fatalf("missing group %v", g)
46 | }
47 | }
48 |
49 | // any un-allocated shards?
50 | if len(groups) > 0 {
51 | for s, g := range c.Shards {
52 | _, ok := c.Groups[g]
53 | if ok == false {
54 | t.Fatalf("shard %v -> invalid group %v", s, g)
55 | }
56 | }
57 | }
58 |
59 | // more or less balanced sharding?
60 | counts := map[int64]int{}
61 | for _, g := range c.Shards {
62 | counts[g] += 1
63 | }
64 | min := 257
65 | max := 0
66 | for g, _ := range c.Groups {
67 | if counts[g] > max {
68 | max = counts[g]
69 | }
70 | if counts[g] < min {
71 | min = counts[g]
72 | }
73 | }
74 | if max > min+1 {
75 | t.Fatalf("max %v too much larger than min %v", max, min)
76 | }
77 | }
78 |
79 | func TestBasic(t *testing.T) {
80 | runtime.GOMAXPROCS(4)
81 |
82 | const nservers = 3
83 | var sma []*ShardMaster = make([]*ShardMaster, nservers)
84 | var kvh []string = make([]string, nservers)
85 | defer cleanup(sma)
86 |
87 | for i := 0; i < nservers; i++ {
88 | kvh[i] = port("basic", i)
89 | }
90 | for i := 0; i < nservers; i++ {
91 | sma[i] = StartServer(kvh, i)
92 | }
93 |
94 | ck := MakeClerk(kvh)
95 | var cka [nservers]*Clerk
96 | for i := 0; i < nservers; i++ {
97 | cka[i] = MakeClerk([]string{kvh[i]})
98 | }
99 |
100 | fmt.Printf("Test: Basic leave/join ...\n")
101 |
102 | cfa := make([]Config, 6)
103 | cfa[0] = ck.Query(-1)
104 |
105 | check(t, []int64{}, ck)
106 |
107 | var gid1 int64 = 1
108 | ck.Join(gid1, []string{"x", "y", "z"})
109 | check(t, []int64{gid1}, ck)
110 | cfa[1] = ck.Query(-1)
111 |
112 | var gid2 int64 = 2
113 | ck.Join(gid2, []string{"a", "b", "c"})
114 | check(t, []int64{gid1, gid2}, ck)
115 | cfa[2] = ck.Query(-1)
116 |
117 | ck.Join(gid2, []string{"a", "b", "c"})
118 | check(t, []int64{gid1, gid2}, ck)
119 | cfa[3] = ck.Query(-1)
120 |
121 | cfx := ck.Query(-1)
122 | sa1 := cfx.Groups[gid1]
123 | if len(sa1) != 3 || sa1[0] != "x" || sa1[1] != "y" || sa1[2] != "z" {
124 | t.Fatalf("wrong servers for gid %v: %v\n", gid1, sa1)
125 | }
126 | sa2 := cfx.Groups[gid2]
127 | if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" {
128 | t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2)
129 | }
130 |
131 | ck.Leave(gid1)
132 | check(t, []int64{gid2}, ck)
133 | cfa[4] = ck.Query(-1)
134 |
135 | ck.Leave(gid1)
136 | check(t, []int64{gid2}, ck)
137 | cfa[5] = ck.Query(-1)
138 |
139 | fmt.Printf(" ... Passed\n")
140 |
141 | fmt.Printf("Test: Historical queries ...\n")
142 |
143 | for i := 0; i < len(cfa); i++ {
144 | c := ck.Query(cfa[i].Num)
145 | if c.Num != cfa[i].Num {
146 | t.Fatalf("historical Num wrong")
147 | }
148 | if c.Shards != cfa[i].Shards {
149 | t.Fatalf("historical Shards wrong")
150 | }
151 | if len(c.Groups) != len(cfa[i].Groups) {
152 | t.Fatalf("number of historical Groups is wrong")
153 | }
154 | for gid, sa := range c.Groups {
155 | sa1, ok := cfa[i].Groups[gid]
156 | if ok == false || len(sa1) != len(sa) {
157 | t.Fatalf("historical len(Groups) wrong")
158 | }
159 | if ok && len(sa1) == len(sa) {
160 | for j := 0; j < len(sa); j++ {
161 | if sa[j] != sa1[j] {
162 | t.Fatalf("historical Groups wrong")
163 | }
164 | }
165 | }
166 | }
167 | }
168 |
169 | fmt.Printf(" ... Passed\n")
170 |
171 | fmt.Printf("Test: Move ...\n")
172 | {
173 | var gid3 int64 = 503
174 | ck.Join(gid3, []string{"3a", "3b", "3c"})
175 | var gid4 int64 = 504
176 | ck.Join(gid4, []string{"4a", "4b", "4c"})
177 | for i := 0; i < NShards; i++ {
178 | cf := ck.Query(-1)
179 | if i < NShards/2 {
180 | ck.Move(i, gid3)
181 | if cf.Shards[i] != gid3 {
182 | cf1 := ck.Query(-1)
183 | if cf1.Num <= cf.Num {
184 | t.Fatalf("Move should increase Config.Num")
185 | }
186 | }
187 | } else {
188 | ck.Move(i, gid4)
189 | if cf.Shards[i] != gid4 {
190 | cf1 := ck.Query(-1)
191 | if cf1.Num <= cf.Num {
192 | t.Fatalf("Move should increase Config.Num")
193 | }
194 | }
195 | }
196 | }
197 | cf2 := ck.Query(-1)
198 | for i := 0; i < NShards; i++ {
199 | if i < NShards/2 {
200 | if cf2.Shards[i] != gid3 {
201 | t.Fatalf("expected shard %v on gid %v actually %v",
202 | i, gid3, cf2.Shards[i])
203 | }
204 | } else {
205 | if cf2.Shards[i] != gid4 {
206 | t.Fatalf("expected shard %v on gid %v actually %v",
207 | i, gid4, cf2.Shards[i])
208 | }
209 | }
210 | }
211 | ck.Leave(gid3)
212 | ck.Leave(gid4)
213 | }
214 | fmt.Printf(" ... Passed\n")
215 |
216 | fmt.Printf("Test: Concurrent leave/join ...\n")
217 |
218 | const npara = 10
219 | gids := make([]int64, npara)
220 | var ca [npara]chan bool
221 | for xi := 0; xi < npara; xi++ {
222 | gids[xi] = int64(xi + 1)
223 | ca[xi] = make(chan bool)
224 | go func(i int) {
225 | defer func() { ca[i] <- true }()
226 | var gid int64 = gids[i]
227 | cka[(i+0)%nservers].Join(gid + 1000, []string{"a", "b", "c"})
228 | cka[(i+0)%nservers].Join(gid, []string{"a", "b", "c"})
229 | cka[(i+1)%nservers].Leave(gid + 1000)
230 | }(xi)
231 | }
232 | for i := 0; i < npara; i++ {
233 | <-ca[i]
234 | }
235 | check(t, gids, ck)
236 |
237 | fmt.Printf(" ... Passed\n")
238 |
239 | fmt.Printf("Test: Min advances after joins ...\n")
240 |
241 | for i, sm := range sma {
242 | if sm.px.Min() <= 0 {
243 | t.Fatalf("Min() for %s did not advance", kvh[i])
244 | }
245 | }
246 |
247 | fmt.Printf(" ... Passed\n")
248 |
249 | fmt.Printf("Test: Minimal transfers after joins ...\n")
250 |
251 | c1 := ck.Query(-1)
252 | for i := 0; i < 5; i++ {
253 | ck.Join(int64(npara+1+i), []string{"a", "b", "c"})
254 | }
255 | c2 := ck.Query(-1)
256 | for i := int64(1); i <= npara; i++ {
257 | for j := 0; j < len(c1.Shards); j++ {
258 | if c2.Shards[j] == i {
259 | if c1.Shards[j] != i {
260 | t.Fatalf("non-minimal transfer after Join()s")
261 | }
262 | }
263 | }
264 | }
265 |
266 | fmt.Printf(" ... Passed\n")
267 |
268 | fmt.Printf("Test: Minimal transfers after leaves ...\n")
269 |
270 | for i := 0; i < 5; i++ {
271 | ck.Leave(int64(npara + 1 + i))
272 | }
273 | c3 := ck.Query(-1)
274 | for i := int64(1); i <= npara; i++ {
275 | for j := 0; j < len(c1.Shards); j++ {
276 | if c2.Shards[j] == i {
277 | if c3.Shards[j] != i {
278 | t.Fatalf("non-minimal transfer after Leave()s")
279 | }
280 | }
281 | }
282 | }
283 |
284 | fmt.Printf(" ... Passed\n")
285 | }
286 |
287 | func TestUnreliable(t *testing.T) {
288 | runtime.GOMAXPROCS(4)
289 |
290 | const nservers = 3
291 | var sma []*ShardMaster = make([]*ShardMaster, nservers)
292 | var kvh []string = make([]string, nservers)
293 | defer cleanup(sma)
294 |
295 | for i := 0; i < nservers; i++ {
296 | kvh[i] = port("unrel", i)
297 | }
298 | for i := 0; i < nservers; i++ {
299 | sma[i] = StartServer(kvh, i)
300 | // don't turn on unreliable because the assignment
301 | // doesn't require the shardmaster to detect duplicate
302 | // client requests.
303 | // sma[i].setunreliable(true)
304 | }
305 |
306 | ck := MakeClerk(kvh)
307 | var cka [nservers]*Clerk
308 | for i := 0; i < nservers; i++ {
309 | cka[i] = MakeClerk([]string{kvh[i]})
310 | }
311 |
312 | fmt.Printf("Test: Concurrent leave/join, failure ...\n")
313 |
314 | const npara = 20
315 | gids := make([]int64, npara)
316 | var ca [npara]chan bool
317 | for xi := 0; xi < npara; xi++ {
318 | gids[xi] = int64(xi + 1)
319 | ca[xi] = make(chan bool)
320 | go func(i int) {
321 | defer func() { ca[i] <- true }()
322 | var gid int64 = gids[i]
323 | cka[1+(rand.Int()%2)].Join(gid+1000, []string{"a", "b", "c"})
324 | cka[1+(rand.Int()%2)].Join(gid, []string{"a", "b", "c"})
325 | cka[1+(rand.Int()%2)].Leave(gid + 1000)
326 | // server 0 won't be able to hear any RPCs.
327 | os.Remove(kvh[0])
328 | }(xi)
329 | }
330 | for i := 0; i < npara; i++ {
331 | <-ca[i]
332 | }
333 | check(t, gids, ck)
334 |
335 | fmt.Printf(" ... Passed\n")
336 | }
337 |
338 | func TestFreshQuery(t *testing.T) {
339 | runtime.GOMAXPROCS(4)
340 |
341 | const nservers = 3
342 | var sma []*ShardMaster = make([]*ShardMaster, nservers)
343 | var kvh []string = make([]string, nservers)
344 | defer cleanup(sma)
345 |
346 | for i := 0; i < nservers; i++ {
347 | kvh[i] = port("fresh", i)
348 | }
349 | for i := 0; i < nservers; i++ {
350 | sma[i] = StartServer(kvh, i)
351 | }
352 |
353 | ck1 := MakeClerk([]string{kvh[1]})
354 |
355 | fmt.Printf("Test: Query() returns latest configuration ...\n")
356 |
357 | portx := kvh[0] + strconv.Itoa(rand.Int())
358 | if os.Rename(kvh[0], portx) != nil {
359 | t.Fatalf("os.Rename() failed")
360 | }
361 | ck0 := MakeClerk([]string{portx})
362 |
363 | ck1.Join(1001, []string{"a", "b", "c"})
364 | c := ck0.Query(-1)
365 | _, ok := c.Groups[1001]
366 | if ok == false {
367 | t.Fatalf("Query(-1) produced a stale configuration")
368 | }
369 |
370 | fmt.Printf(" ... Passed\n")
371 | os.Remove(portx)
372 | }
373 |
--------------------------------------------------------------------------------
/hw4/src/shardmaster/server.go:
--------------------------------------------------------------------------------
1 | package shardmaster
2 |
3 | import (
4 | "errors"
5 | "net"
6 | "reflect"
7 | "time"
8 | )
9 | import "fmt"
10 | import "net/rpc"
11 | import "log"
12 |
13 | import "paxos"
14 | import "sync"
15 | import "sync/atomic"
16 | import "os"
17 | import "syscall"
18 | import "encoding/gob"
19 | import "math/rand"
20 |
21 | type ShardMaster struct {
22 | mu sync.Mutex
23 | l net.Listener
24 | me int
25 | dead int32 // for testing
26 | unreliable int32 // for testing
27 | px *paxos.Paxos
28 |
29 | configs []Config // indexed by config num
30 | lastApply int
31 | }
32 |
33 |
34 | const (
35 | Join = "Join"
36 | Leave = "Leave"
37 | Move = "Move"
38 | Query = "Query"
39 | )
40 |
41 | type Op struct {
42 | // Your data here.
43 | Operation string
44 | Args interface{}
45 | }
46 |
47 | func (sm *ShardMaster) Rebalance(config *Config, deleteGID int64) {
48 | nGroup := len(config.Groups)
49 | limit := NShards / nGroup
50 |
51 | for i := 0; i < NShards; i++ {
52 | if config.Shards[i] == deleteGID {
53 | // let's say we want to delete gid = 101
54 | // and Shards is now [101, 101, 100, 101, 102, ...]
55 | // then it becomes [0, 0, 100, 0, 102, ...]
56 | config.Shards[i] = 0
57 | }
58 | }
59 | gidCounts := make(map[int64]int)
60 | for i := 0; i < NShards; i++ {
61 | // occurrences of gids in these 10 shards
62 |
63 | // ps. the DELETED gid will also has a gidCounts
64 | // and our goal is just making it decrease to 0 (all distributed)
65 | gidCounts[config.Shards[i]] += 1
66 | }
67 |
68 | for i := 0; i < NShards; i++ {
69 | gid := config.Shards[i]
70 | // if `i`th shard's group is now deleted
71 | // OR if `i`th shard's group need to manage too many shards
72 | // -> find someone to replace it and to take care of `i`th shard
73 | // how do we find who is the best choice?
74 | if gid == 0 || gidCounts[gid] > limit {
75 |
76 | // bestGid is the best replacement gid that we could find now
77 | bestGid := int64(-1) // init value
78 | // minGidCount is the # of shards that the group `bestGid`
79 | // is taking care of.
80 | // e.g., [0, 0, 0, 101, 101, 102, 101, 0, 102, 101]
81 | // then bestGid = 102 as its minGidCount = 2
82 | // in contrast, gid 101 is not the best as it is already
83 | // taking care of 4 shards
84 | minGidCount := -1 // init value
85 |
86 | // enumerate all existing groups
87 | for currGid, _ := range config.Groups {
88 | // if init OR
89 | // group `currGid` is taking care of less # of shards
90 | // compared to minGidCount
91 | // update our best choice Gid (the one will MINIMUM count)
92 | if bestGid == -1 || gidCounts[currGid] < minGidCount {
93 | bestGid = currGid
94 | minGidCount = gidCounts[currGid]
95 | }
96 | }
97 | // if the current gid on shard `i` is deleted
98 | // we MUST need to give it a new gid
99 | // and so new the deleted group's gidCount will -= 1
100 | // and the replacement group will += 1
101 | if gid == 0 {
102 | gidCounts[gid] -= 1
103 | gidCounts[bestGid] += 1
104 | config.Shards[i] = bestGid
105 | } else {
106 | // if the current gid is not the deleted one
107 | // i.e., it is just `gid` group taking care of too many shards
108 | // then we should reduce its burden. But NOT all the time. When?
109 |
110 | // only if our replacement could be meaningful
111 | // e.g. [100, 100, 100, 100, 101, 101, 101, 102, 102, 102]
112 | // for gid = 100, it has now gidCount = 4
113 | // and for gid = 101, it has now gidCount = 3
114 | // then if we make gidCount[100] -= 1 and gidCount[101] += 1
115 | // they will be 3 and 4 respectively...it does not help at all
116 | // e.g. [100, 100, 100, 101, 101, 101, 101, 102, 102, 102]
117 | // so we will prefer doing nothing
118 | if gidCounts[gid] - gidCounts[bestGid] > 1 {
119 | gidCounts[gid] -= 1
120 | gidCounts[bestGid] += 1
121 | config.Shards[i] = bestGid
122 | } else {
123 | // do nothing
124 | }
125 | }
126 | }
127 | }
128 | }
129 |
130 | func (sm *ShardMaster) Apply(op Op) {
131 | lastConfig := sm.configs[sm.lastApply]
132 | var newConfig Config
133 | newConfig.Num = lastConfig.Num
134 | newConfig.Groups = make(map[int64][]string)
135 | for k, v := range lastConfig.Groups {
136 | newConfig.Groups[k] = v
137 | }
138 | for i := 0; i < NShards; i++ {
139 | newConfig.Shards[i] = lastConfig.Shards[i]
140 | }
141 |
142 | if op.Operation == Join {
143 | joinArgs := op.Args.(JoinArgs)
144 | newConfig.Groups[joinArgs.GID] = joinArgs.Servers
145 | newConfig.Num += 1
146 | sm.Rebalance(&newConfig, 0)
147 | } else if op.Operation == Leave {
148 | leaveArgs := op.Args.(LeaveArgs)
149 | delete(newConfig.Groups, leaveArgs.GID)
150 | newConfig.Num += 1
151 | sm.Rebalance(&newConfig, leaveArgs.GID)
152 | } else if op.Operation == Move {
153 | moveArgs := op.Args.(MoveArgs)
154 | newConfig.Shards[moveArgs.Shard] = moveArgs.GID
155 | newConfig.Num += 1
156 | } else if op.Operation == Query {
157 | // do nothin
158 | }
159 |
160 | sm.configs = append(sm.configs, newConfig)
161 | }
162 |
163 | func (sm *ShardMaster) Wait(seq int) (Op, error) {
164 | sleepTime := 10 * time.Millisecond
165 | for iters := 0; iters < 15; iters ++ {
166 | decided, op := sm.px.Status(seq)
167 | if decided == paxos.Decided {
168 | return op.(Op), nil
169 | }
170 | time.Sleep(sleepTime)
171 | if sleepTime < 10 * time.Second {
172 | sleepTime *= 2
173 | }
174 | }
175 | return Op{}, errors.New("ShardMaster: Wait for too long")
176 | }
177 |
178 | func (sm *ShardMaster) Propose(xop Op) error {
179 | for {
180 | sm.px.Start(sm.lastApply + 1, xop)
181 | op, err := sm.Wait(sm.lastApply + 1)
182 | if err != nil {
183 | return err
184 | }
185 | sm.Apply(op)
186 | sm.lastApply += 1
187 | if reflect.DeepEqual(op, xop) {
188 | break
189 | }
190 | sm.px.Done(sm.lastApply)
191 | }
192 | sm.px.Done(sm.lastApply)
193 | return nil
194 | }
195 |
196 | func (sm *ShardMaster) Join(args *JoinArgs, reply *JoinReply) error {
197 | // Your code here.
198 | sm.mu.Lock()
199 | defer sm.mu.Unlock()
200 | op := Op{Args: *args, Operation: Join}
201 | err := sm.Propose(op)
202 | if err != nil {
203 | return err
204 | }
205 | return nil
206 | }
207 |
208 | func (sm *ShardMaster) Leave(args *LeaveArgs, reply *LeaveReply) error {
209 | sm.mu.Lock()
210 | defer sm.mu.Unlock()
211 | op := Op{Args: *args, Operation: Leave}
212 | err := sm.Propose(op)
213 | if err != nil {
214 | return err
215 | }
216 | return nil
217 | }
218 |
219 | func (sm *ShardMaster) Move(args *MoveArgs, reply *MoveReply) error {
220 | sm.mu.Lock()
221 | defer sm.mu.Unlock()
222 | op := Op{Args: *args, Operation: Move}
223 | err := sm.Propose(op)
224 | if err != nil {
225 | return err
226 | }
227 | return nil
228 | }
229 |
230 | func (sm *ShardMaster) Query(args *QueryArgs, reply *QueryReply) error {
231 | // Your code here.
232 | sm.mu.Lock()
233 | defer sm.mu.Unlock()
234 |
235 | op := Op{Args: *args, Operation: Query}
236 | err := sm.Propose(op)
237 | if err != nil {
238 | return err
239 | }
240 |
241 | // config.Num is not necessarily equal to its index in sm.configs
242 | // e.g., sm.configs[1203].Num -> this value could be != 1203
243 | // e.g., sm.configs[6].Num = 3, sm.configs[16].Num = 5
244 | // why? since that WE ONLY add Num when Join/Leave/Move
245 | // but we don't add Num when doing Query
246 | // however, sm.configs will be appended even if it was Query
247 | // thus, len of configs grows FASTER than Num
248 | for i := 0; i < sm.lastApply; i++ {
249 | if sm.configs[i].Num == args.Num {
250 | reply.Config = sm.configs[i]
251 | //log.Printf("i=%v, num=%v", i, args.Num)
252 | return nil
253 | }
254 | }
255 | // args.Num == -1 OR args.Num is larger than any other Num in configs
256 | reply.Config = sm.configs[sm.lastApply]
257 | return nil
258 | }
259 |
260 | // please don't change these two functions.
261 | func (sm *ShardMaster) Kill() {
262 | atomic.StoreInt32(&sm.dead, 1)
263 | sm.l.Close()
264 | sm.px.Kill()
265 | }
266 |
267 | // call this to find out if the server is dead.
268 | func (sm *ShardMaster) isdead() bool {
269 | return atomic.LoadInt32(&sm.dead) != 0
270 | }
271 |
272 | // please do not change these two functions.
273 | func (sm *ShardMaster) setunreliable(what bool) {
274 | if what {
275 | atomic.StoreInt32(&sm.unreliable, 1)
276 | } else {
277 | atomic.StoreInt32(&sm.unreliable, 0)
278 | }
279 | }
280 |
281 | func (sm *ShardMaster) isunreliable() bool {
282 | return atomic.LoadInt32(&sm.unreliable) != 0
283 | }
284 |
285 | //
286 | // servers[] contains the ports of the set of
287 | // servers that will cooperate via Paxos to
288 | // form the fault-tolerant shardmaster service.
289 | // me is the index of the current server in servers[].
290 | //
291 | func StartServer(servers []string, me int) *ShardMaster {
292 | gob.Register(Op{})
293 | gob.Register(JoinArgs{})
294 | gob.Register(LeaveArgs{})
295 | gob.Register(MoveArgs{})
296 | gob.Register(QueryArgs{})
297 |
298 | sm := new(ShardMaster)
299 | sm.me = me
300 |
301 | sm.configs = make([]Config, 1)
302 | sm.configs[0].Groups = map[int64][]string{}
303 |
304 | rpcs := rpc.NewServer()
305 |
306 | gob.Register(Op{})
307 | rpcs.Register(sm)
308 | sm.px = paxos.Make(servers, me, rpcs)
309 |
310 | os.Remove(servers[me])
311 | l, e := net.Listen("unix", servers[me])
312 | if e != nil {
313 | log.Fatal("listen error: ", e)
314 | }
315 | sm.l = l
316 |
317 | // please do not change any of the following code,
318 | // or do anything to subvert it.
319 |
320 | go func() {
321 | for sm.isdead() == false {
322 | conn, err := sm.l.Accept()
323 | if err == nil && sm.isdead() == false {
324 | if sm.isunreliable() && (rand.Int63()%1000) < 100 {
325 | // discard the request.
326 | conn.Close()
327 | } else if sm.isunreliable() && (rand.Int63()%1000) < 200 {
328 | // process the request but force discard of reply.
329 | c1 := conn.(*net.UnixConn)
330 | f, _ := c1.File()
331 | err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR)
332 | if err != nil {
333 | fmt.Printf("shutdown: %v\n", err)
334 | }
335 | go rpcs.ServeConn(conn)
336 | } else {
337 | go rpcs.ServeConn(conn)
338 | }
339 | } else if err == nil {
340 | conn.Close()
341 | }
342 | if err != nil && sm.isdead() == false {
343 | fmt.Printf("ShardMaster(%v) accept: %v\n", me, err.Error())
344 | sm.Kill()
345 | }
346 | }
347 | }()
348 |
349 | return sm
350 | }
--------------------------------------------------------------------------------
/hw4/src/shardkv/test_test.go:
--------------------------------------------------------------------------------
1 | package shardkv
2 |
3 | import (
4 | "testing"
5 | )
6 | import "shardmaster"
7 | import "runtime"
8 | import "strconv"
9 | import "os"
10 | import "time"
11 | import "fmt"
12 | import "sync"
13 | import "sync/atomic"
14 | import "math/rand"
15 |
16 | // information about the servers of one replica group.
17 | type tGroup struct {
18 | gid int64
19 | servers []*ShardKV
20 | ports []string
21 | }
22 |
23 | // information about all the servers of a k/v cluster.
24 | type tCluster struct {
25 | t *testing.T
26 | masters []*shardmaster.ShardMaster
27 | mck *shardmaster.Clerk
28 | masterports []string
29 | groups []*tGroup
30 | }
31 |
32 | func port(tag string, host int) string {
33 | s := "/var/tmp/824-"
34 | s += strconv.Itoa(os.Getuid()) + "/"
35 | os.Mkdir(s, 0777)
36 | s += "skv-"
37 | s += strconv.Itoa(os.Getpid()) + "-"
38 | s += tag + "-"
39 | s += strconv.Itoa(host)
40 | return s
41 | }
42 |
43 | //
44 | // start a k/v replica server thread.
45 | //
46 | func (tc *tCluster) start1(gi int, si int, unreliable bool) {
47 | s := StartServer(tc.groups[gi].gid, tc.masterports, tc.groups[gi].ports, si)
48 | tc.groups[gi].servers[si] = s
49 | s.Setunreliable(unreliable)
50 | }
51 |
52 | func (tc *tCluster) cleanup() {
53 | for gi := 0; gi < len(tc.groups); gi++ {
54 | g := tc.groups[gi]
55 | for si := 0; si < len(g.servers); si++ {
56 | if g.servers[si] != nil {
57 | g.servers[si].kill()
58 | }
59 | }
60 | }
61 |
62 | for i := 0; i < len(tc.masters); i++ {
63 | if tc.masters[i] != nil {
64 | tc.masters[i].Kill()
65 | }
66 | }
67 | }
68 |
69 | func (tc *tCluster) shardclerk() *shardmaster.Clerk {
70 | return shardmaster.MakeClerk(tc.masterports)
71 | }
72 |
73 | func (tc *tCluster) clerk() *Clerk {
74 | return MakeClerk(tc.masterports)
75 | }
76 |
77 | func (tc *tCluster) join(gi int) {
78 | tc.mck.Join(tc.groups[gi].gid, tc.groups[gi].ports)
79 | }
80 |
81 | func (tc *tCluster) leave(gi int) {
82 | tc.mck.Leave(tc.groups[gi].gid)
83 | }
84 |
85 | func setup(t *testing.T, tag string, unreliable bool) *tCluster {
86 | runtime.GOMAXPROCS(4)
87 |
88 | const nmasters = 3
89 | const ngroups = 3 // replica groups
90 | const nreplicas = 3 // servers per group
91 |
92 | tc := &tCluster{}
93 | tc.t = t
94 | tc.masters = make([]*shardmaster.ShardMaster, nmasters)
95 | tc.masterports = make([]string, nmasters)
96 |
97 | for i := 0; i < nmasters; i++ {
98 | tc.masterports[i] = port(tag+"m", i)
99 | }
100 | for i := 0; i < nmasters; i++ {
101 | tc.masters[i] = shardmaster.StartServer(tc.masterports, i)
102 | }
103 | tc.mck = tc.shardclerk()
104 |
105 | tc.groups = make([]*tGroup, ngroups)
106 |
107 | for i := 0; i < ngroups; i++ {
108 | tc.groups[i] = &tGroup{}
109 | tc.groups[i].gid = int64(i + 100)
110 | tc.groups[i].servers = make([]*ShardKV, nreplicas)
111 | tc.groups[i].ports = make([]string, nreplicas)
112 | for j := 0; j < nreplicas; j++ {
113 | tc.groups[i].ports[j] = port(tag+"s", (i*nreplicas)+j)
114 | }
115 | for j := 0; j < nreplicas; j++ {
116 | tc.start1(i, j, unreliable)
117 | }
118 | }
119 |
120 | // return smh, gids, ha, sa, clean
121 | return tc
122 | }
123 |
124 | func TestBasic(t *testing.T) {
125 | tc := setup(t, "basic", false)
126 | defer tc.cleanup()
127 |
128 | fmt.Printf("Test: Basic Join/Leave ...\n")
129 | time.Sleep(time.Second)
130 |
131 | tc.join(0)
132 |
133 | ck := tc.clerk()
134 |
135 | ck.Put("a", "x")
136 | ck.Append("a", "b")
137 | if ck.Get("a") != "xb" {
138 | t.Fatalf("Get got wrong value")
139 | }
140 |
141 | keys := make([]string, 10)
142 | vals := make([]string, len(keys))
143 | for i := 0; i < len(keys); i++ {
144 | keys[i] = strconv.Itoa(rand.Int())
145 | vals[i] = strconv.Itoa(rand.Int())
146 | ck.Put(keys[i], vals[i])
147 | }
148 | // are keys still there after joins?
149 | for g := 1; g < len(tc.groups); g++ {
150 | tc.join(g)
151 | time.Sleep(1 * time.Second)
152 | for i := 0; i < len(keys); i++ {
153 | v := ck.Get(keys[i])
154 | if v != vals[i] {
155 | t.Fatalf("joining; wrong value; g=%v k=%v wanted=%v got=%v",
156 | g, keys[i], vals[i], v)
157 | }
158 | vals[i] = strconv.Itoa(rand.Int())
159 | ck.Put(keys[i], vals[i])
160 | }
161 | }
162 | //log.Printf("done first part")
163 | // are keys still there after leaves?
164 | for g := 0; g < len(tc.groups)-1; g++ {
165 | tc.leave(g)
166 | time.Sleep(1 * time.Second)
167 | for i := 0; i < len(keys); i++ {
168 | v := ck.Get(keys[i])
169 | if v != vals[i] {
170 | t.Fatalf("leaving; wrong value; g=%v k=%v wanted=%v got=%v",
171 | g, keys[i], vals[i], v)
172 | }
173 | vals[i] = strconv.Itoa(rand.Int())
174 | ck.Put(keys[i], vals[i])
175 | }
176 | }
177 |
178 | fmt.Printf(" ... Passed\n")
179 | }
180 |
181 | func TestMove(t *testing.T) {
182 | tc := setup(t, "move", false)
183 | defer tc.cleanup()
184 |
185 | fmt.Printf("Test: Shards really move ...\n")
186 |
187 | tc.join(0)
188 |
189 | ck := tc.clerk()
190 |
191 | // insert one key per shard
192 | for i := 0; i < shardmaster.NShards; i++ {
193 | ck.Put(string('0'+i), string('0'+i))
194 | }
195 |
196 | // add group 1.
197 | tc.join(1)
198 | time.Sleep(5 * time.Second)
199 |
200 | // check that keys are still there.
201 | for i := 0; i < shardmaster.NShards; i++ {
202 | if ck.Get(string('0'+i)) != string('0'+i) {
203 | t.Fatalf("missing key/value")
204 | }
205 | }
206 |
207 | // remove sockets from group 0.
208 | for _, port := range tc.groups[0].ports {
209 | os.Remove(port)
210 | }
211 |
212 | count := int32(0)
213 | var mu sync.Mutex
214 | for i := 0; i < shardmaster.NShards; i++ {
215 | go func(me int) {
216 | myck := tc.clerk()
217 | v := myck.Get(string('0' + me))
218 | if v == string('0'+me) {
219 | mu.Lock()
220 | atomic.AddInt32(&count, 1)
221 | mu.Unlock()
222 | } else {
223 | t.Fatalf("Get(%v) yielded %v\n", me, v)
224 | }
225 | }(i)
226 | }
227 |
228 | time.Sleep(10 * time.Second)
229 |
230 | ccc := atomic.LoadInt32(&count)
231 | if ccc > shardmaster.NShards/3 && ccc < 2*(shardmaster.NShards/3) {
232 | fmt.Printf(" ... Passed\n")
233 | } else {
234 | t.Fatalf("%v keys worked after killing 1/2 of groups; wanted %v",
235 | ccc, shardmaster.NShards/2)
236 | }
237 | }
238 |
239 | func TestLimp(t *testing.T) {
240 | tc := setup(t, "limp", false)
241 | defer tc.cleanup()
242 |
243 | fmt.Printf("Test: Reconfiguration with some dead replicas ...\n")
244 |
245 | tc.join(0)
246 |
247 | ck := tc.clerk()
248 |
249 | ck.Put("a", "b")
250 | if ck.Get("a") != "b" {
251 | t.Fatalf("got wrong value")
252 | }
253 |
254 | // kill one server from each replica group.
255 | for gi := 0; gi < len(tc.groups); gi++ {
256 | sa := tc.groups[gi].servers
257 | ns := len(sa)
258 | sa[rand.Int()%ns].kill()
259 | }
260 |
261 | keys := make([]string, 10)
262 | vals := make([]string, len(keys))
263 | for i := 0; i < len(keys); i++ {
264 | keys[i] = strconv.Itoa(rand.Int())
265 | vals[i] = strconv.Itoa(rand.Int())
266 | ck.Put(keys[i], vals[i])
267 | }
268 |
269 | // are keys still there after joins?
270 | for g := 1; g < len(tc.groups); g++ {
271 | tc.join(g)
272 | time.Sleep(1 * time.Second)
273 | for i := 0; i < len(keys); i++ {
274 | v := ck.Get(keys[i])
275 | if v != vals[i] {
276 | t.Fatalf("joining; wrong value; g=%v k=%v wanted=%v got=%v",
277 | g, keys[i], vals[i], v)
278 | }
279 | vals[i] = strconv.Itoa(rand.Int())
280 | ck.Put(keys[i], vals[i])
281 | }
282 | }
283 |
284 | // are keys still there after leaves?
285 | for gi := 0; gi < len(tc.groups)-1; gi++ {
286 | tc.leave(gi)
287 | time.Sleep(2 * time.Second)
288 | g := tc.groups[gi]
289 | for i := 0; i < len(g.servers); i++ {
290 | g.servers[i].kill()
291 | }
292 | for i := 0; i < len(keys); i++ {
293 | v := ck.Get(keys[i])
294 | if v != vals[i] {
295 | t.Fatalf("leaving; wrong value; g=%v k=%v wanted=%v got=%v",
296 | g, keys[i], vals[i], v)
297 | }
298 | vals[i] = strconv.Itoa(rand.Int())
299 | ck.Put(keys[i], vals[i])
300 | }
301 | }
302 |
303 | fmt.Printf(" ... Passed\n")
304 | }
305 |
306 | func doConcurrent(t *testing.T, unreliable bool) {
307 | tc := setup(t, "concurrent-"+strconv.FormatBool(unreliable), unreliable)
308 | defer tc.cleanup()
309 |
310 | for i := 0; i < len(tc.groups); i++ {
311 | tc.join(i)
312 | }
313 |
314 | const npara = 11
315 | var ca [npara]chan bool
316 | for i := 0; i < npara; i++ {
317 | ca[i] = make(chan bool)
318 | go func(me int) {
319 | ok := true
320 | defer func() { ca[me] <- ok }()
321 | ck := tc.clerk()
322 | mymck := tc.shardclerk()
323 | key := strconv.Itoa(me)
324 | last := ""
325 | for iters := 0; iters < 3; iters++ {
326 | //for iters := 0; iters < 1; iters++ {
327 | nv := strconv.Itoa(rand.Int())
328 | //nv := strconv.Itoa(me)
329 | //log.Printf("[test1] Append key: %v, nv %v", key, nv)
330 | ck.Append(key, nv)
331 | //log.Printf("[test1] done Append key: %v, nv %v", key, nv)
332 |
333 | last = last + nv
334 | v := ck.Get(key)
335 |
336 | if v != last {
337 | ok = false
338 | t.Fatalf("Get(%v) expected %v got %v\n", key, last, v)
339 | }
340 |
341 | gi := rand.Int() % len(tc.groups)
342 | gid := tc.groups[gi].gid
343 | which := rand.Int()%shardmaster.NShards
344 | //log.Printf("[test2] Move which %v, gid %v", which, gid)
345 | mymck.Move(which, gid)
346 | //log.Printf("[test2] done Move which %v, gid %v", which, gid)
347 |
348 | time.Sleep(time.Duration(rand.Int()%30) * time.Millisecond)
349 | }
350 | }(i)
351 | }
352 |
353 | for i := 0; i < npara; i++ {
354 | x := <-ca[i]
355 | if x == false {
356 | t.Fatalf("something is wrong")
357 | }
358 | }
359 | }
360 |
361 | //The two Concurrent test cases above test several clients sending Append and Get
362 | //operations to different shard groups concurrently while also periodically asking
363 | //the shard master to move shards between groups. To pass these test cases you must
364 | //design a correct protocol for handling concurrent operations in the presence of configuration changes.
365 | func TestConcurrent(t *testing.T) {
366 | fmt.Printf("Test: Concurrent Put/Get/Move ...\n")
367 | doConcurrent(t, false)
368 | fmt.Printf(" ... Passed\n")
369 | }
370 | //The second concurrent test case is the same as the first one, though the test code
371 | //drops requests and responses randomly.
372 | func TestConcurrentUnreliable(t *testing.T) {
373 | fmt.Printf("Test: Concurrent Put/Get/Move (unreliable) ...\n")
374 | doConcurrent(t, true)
375 | fmt.Printf(" ... Passed\n")
376 | }
377 |
--------------------------------------------------------------------------------
/hw1/src/mapreduce/mapreduce.go:
--------------------------------------------------------------------------------
1 | package mapreduce
2 |
3 | import (
4 | "fmt"
5 | "sync"
6 | )
7 | import "os"
8 | import "log"
9 | import "strconv"
10 | import "encoding/json"
11 | import "sort"
12 | import "container/list"
13 | import "net/rpc"
14 | import "net"
15 | import "bufio"
16 | import "hash/fnv"
17 |
18 | // import "os/exec"
19 |
20 | // A simple mapreduce library with a sequential implementation.
21 | //
22 | // The application provides an input file f, a Map and Reduce function,
23 | // and the number of nMap and nReduce tasks.
24 | //
25 | // Split() splits the file f in nMap input files:
26 | // f-0, f-1, ..., f-
27 | // one for each Map job.
28 | //
29 | // DoMap() runs Map on each map file and produces nReduce files for each
30 | // map file. Thus, there will be nMap x nReduce files after all map
31 | // jobs are done:
32 | // f-0-0, ..., f-0-0, f-0-, ...,
33 | // f--0, ... f--.
34 | //
35 | // DoReduce() collects reduce files from each map (f-*-),
36 | // and runs Reduce on those files. This produces result files,
37 | // which Merge() merges into a single output.
38 |
39 | // Debugging
40 | const Debug = 0
41 |
42 | func DPrintf(format string, a ...interface{}) (n int, err error) {
43 | if Debug > 0 {
44 | n, err = fmt.Printf(format, a...)
45 | }
46 | return
47 | }
48 |
49 | // Map and Reduce deal with pairs:
50 | type KeyValue struct {
51 | Key string
52 | Value string
53 | }
54 |
55 | type MapReduce struct {
56 | nMap int // Number of Map jobs
57 | nReduce int // Number of Reduce jobs
58 | file string // Name of input file
59 | MasterAddress string
60 | registerChannel chan string
61 | DoneChannel chan bool
62 | alive bool
63 | l net.Listener
64 | stats *list.List
65 |
66 | // Map of registered workers that you need to keep up to date
67 | Workers map[string]*WorkerInfo
68 |
69 | // add any additional state here
70 | availableWorkers chan string
71 | wg sync.WaitGroup
72 | remainMapJobs chan int
73 | remainReduceJobs chan int
74 | nCount chan bool
75 | donePhase chan bool
76 | }
77 |
78 | func InitMapReduce(nmap int, nreduce int,
79 | file string, master string) *MapReduce {
80 | mr := new(MapReduce)
81 | mr.nMap = nmap
82 | mr.nReduce = nreduce
83 | mr.file = file
84 | mr.MasterAddress = master
85 | mr.alive = true
86 | mr.registerChannel = make(chan string)
87 | mr.DoneChannel = make(chan bool)
88 |
89 | // initialize any additional state here
90 | mr.Workers = make(map[string]*WorkerInfo)
91 | mr.availableWorkers = make(chan string, 484) // a lot of buffers
92 | mr.remainMapJobs = make(chan int, nmap)
93 | mr.remainReduceJobs = make(chan int, nreduce)
94 | mr.nCount = make(chan bool)
95 | mr.donePhase = make(chan bool)
96 | return mr
97 | }
98 |
99 | func MakeMapReduce(nmap int, nreduce int,
100 | file string, master string) *MapReduce {
101 | mr := InitMapReduce(nmap, nreduce, file, master)
102 | mr.StartRegistrationServer()
103 | go mr.Run()
104 | return mr
105 | }
106 |
107 | func (mr *MapReduce) Register(args *RegisterArgs, res *RegisterReply) error {
108 | DPrintf("Register: worker %s\n", args.Worker)
109 | mr.registerChannel <- args.Worker
110 | res.OK = true
111 | return nil
112 | }
113 |
114 | func (mr *MapReduce) Shutdown(args *ShutdownArgs, res *ShutdownReply) error {
115 | DPrintf("Shutdown: registration server\n")
116 | mr.alive = false
117 | mr.l.Close() // causes the Accept to fail
118 | return nil
119 | }
120 |
121 | func (mr *MapReduce) StartRegistrationServer() {
122 | rpcs := rpc.NewServer()
123 | rpcs.Register(mr)
124 | os.Remove(mr.MasterAddress) // only needed for "unix"
125 | l, e := net.Listen("unix", mr.MasterAddress)
126 | if e != nil {
127 | log.Fatal("RegistrationServer", mr.MasterAddress, " error: ", e)
128 | }
129 | mr.l = l
130 |
131 | // now that we are listening on the master address, can fork off
132 | // accepting connections to another thread.
133 | go func() {
134 | for mr.alive {
135 | conn, err := mr.l.Accept()
136 | if err == nil {
137 | go func() {
138 | rpcs.ServeConn(conn)
139 | conn.Close()
140 | }()
141 | } else {
142 | DPrintf("RegistrationServer: accept error %s", err)
143 | break
144 | }
145 | }
146 | DPrintf("RegistrationServer: done\n")
147 | }()
148 | }
149 |
150 | // Name of the file that is the input for map job
151 | func MapName(fileName string, MapJob int) string {
152 | return "mrtmp." + fileName + "-" + strconv.Itoa(MapJob)
153 | }
154 |
155 | // Split bytes of input file into nMap splits, but split only on white space
156 | func (mr *MapReduce) Split(fileName string) {
157 | fmt.Printf("Split %s\n", fileName)
158 | infile, err := os.Open(fileName)
159 | if err != nil {
160 | log.Fatal("Split: ", err)
161 | }
162 | defer infile.Close()
163 | fi, err := infile.Stat()
164 | if err != nil {
165 | log.Fatal("Split: ", err)
166 | }
167 | size := fi.Size()
168 | nchunk := size / int64(mr.nMap)
169 | nchunk += 1
170 |
171 | outfile, err := os.Create(MapName(fileName, 0))
172 | if err != nil {
173 | log.Fatal("Split: ", err)
174 | }
175 | writer := bufio.NewWriter(outfile)
176 | m := 1
177 | i := 0
178 |
179 | scanner := bufio.NewScanner(infile)
180 | for scanner.Scan() {
181 | if int64(i) > nchunk*int64(m) {
182 | writer.Flush()
183 | outfile.Close()
184 | outfile, err = os.Create(MapName(fileName, m))
185 | writer = bufio.NewWriter(outfile)
186 | m += 1
187 | }
188 | line := scanner.Text() + "\n"
189 | writer.WriteString(line)
190 | i += len(line)
191 | }
192 | writer.Flush()
193 | outfile.Close()
194 | }
195 |
196 | func ReduceName(fileName string, MapJob int, ReduceJob int) string {
197 | return MapName(fileName, MapJob) + "-" + strconv.Itoa(ReduceJob)
198 | }
199 |
200 | func ihash(s string) uint32 {
201 | h := fnv.New32a()
202 | h.Write([]byte(s))
203 | return h.Sum32()
204 | }
205 |
206 | // Read split for job, call Map for that split, and create nreduce
207 | // partitions.
208 | func DoMap(JobNumber int, fileName string,
209 | nreduce int, Map func(string) *list.List) {
210 | name := MapName(fileName, JobNumber)
211 | file, err := os.Open(name)
212 | if err != nil {
213 | log.Fatal("DoMap: ", err)
214 | }
215 | fi, err := file.Stat()
216 | if err != nil {
217 | log.Fatal("DoMap: ", err)
218 | }
219 | size := fi.Size()
220 | fmt.Printf("DoMap: read split %s %d\n", name, size)
221 | b := make([]byte, size)
222 | _, err = file.Read(b)
223 | if err != nil {
224 | log.Fatal("DoMap: ", err)
225 | }
226 | file.Close()
227 | res := Map(string(b))
228 | // XXX a bit inefficient. could open r files and run over list once
229 | for r := 0; r < nreduce; r++ {
230 | file, err = os.Create(ReduceName(fileName, JobNumber, r))
231 | if err != nil {
232 | log.Fatal("DoMap: create ", err)
233 | }
234 | enc := json.NewEncoder(file)
235 | for e := res.Front(); e != nil; e = e.Next() {
236 | kv := e.Value.(KeyValue)
237 | if ihash(kv.Key)%uint32(nreduce) == uint32(r) {
238 | err := enc.Encode(&kv)
239 | if err != nil {
240 | log.Fatal("DoMap: marshall ", err)
241 | }
242 | }
243 | }
244 | file.Close()
245 | }
246 | }
247 |
248 | func MergeName(fileName string, ReduceJob int) string {
249 | return "mrtmp." + fileName + "-res-" + strconv.Itoa(ReduceJob)
250 | }
251 |
252 | // Read map outputs for partition job, sort them by key, call reduce for each
253 | // key
254 | func DoReduce(job int, fileName string, nmap int,
255 | Reduce func(string, *list.List) string) {
256 | kvs := make(map[string]*list.List)
257 | for i := 0; i < nmap; i++ {
258 | name := ReduceName(fileName, i, job)
259 | fmt.Printf("DoReduce: read %s\n", name)
260 | file, err := os.Open(name)
261 | if err != nil {
262 | log.Fatal("DoReduce: ", err)
263 | }
264 | dec := json.NewDecoder(file)
265 | for {
266 | var kv KeyValue
267 | err = dec.Decode(&kv)
268 | if err != nil {
269 | break
270 | }
271 | _, ok := kvs[kv.Key]
272 | if !ok {
273 | kvs[kv.Key] = list.New()
274 | }
275 | kvs[kv.Key].PushBack(kv.Value)
276 | }
277 | file.Close()
278 | }
279 | var keys []string
280 | for k := range kvs {
281 | keys = append(keys, k)
282 | }
283 | sort.Strings(keys)
284 | p := MergeName(fileName, job)
285 | file, err := os.Create(p)
286 | if err != nil {
287 | log.Fatal("DoReduce: create ", err)
288 | }
289 | enc := json.NewEncoder(file)
290 | for _, k := range keys {
291 | res := Reduce(k, kvs[k])
292 | enc.Encode(KeyValue{k, res})
293 | }
294 | file.Close()
295 | }
296 |
297 | // Merge the results of the reduce jobs
298 | // XXX use merge sort
299 | func (mr *MapReduce) Merge() {
300 | DPrintf("Merge phase")
301 | kvs := make(map[string]string)
302 | for i := 0; i < mr.nReduce; i++ {
303 | p := MergeName(mr.file, i)
304 | fmt.Printf("Merge: read %s\n", p)
305 | file, err := os.Open(p)
306 | if err != nil {
307 | log.Fatal("Merge: ", err)
308 | }
309 | dec := json.NewDecoder(file)
310 | for {
311 | var kv KeyValue
312 | err = dec.Decode(&kv)
313 | if err != nil {
314 | break
315 | }
316 | kvs[kv.Key] = kv.Value
317 | }
318 | file.Close()
319 | }
320 | var keys []string
321 | for k := range kvs {
322 | keys = append(keys, k)
323 | }
324 | sort.Strings(keys)
325 |
326 | file, err := os.Create("mrtmp." + mr.file)
327 | if err != nil {
328 | log.Fatal("Merge: create ", err)
329 | }
330 | w := bufio.NewWriter(file)
331 | for _, k := range keys {
332 | fmt.Fprintf(w, "%s: %s\n", k, kvs[k])
333 | }
334 | w.Flush()
335 | file.Close()
336 | }
337 |
338 | func RemoveFile(n string) {
339 | err := os.Remove(n)
340 | if err != nil {
341 | log.Fatal("CleanupFiles ", err)
342 | }
343 | }
344 |
345 | func (mr *MapReduce) CleanupFiles() {
346 | for i := 0; i < mr.nMap; i++ {
347 | RemoveFile(MapName(mr.file, i))
348 | for j := 0; j < mr.nReduce; j++ {
349 | RemoveFile(ReduceName(mr.file, i, j))
350 | }
351 | }
352 | for i := 0; i < mr.nReduce; i++ {
353 | RemoveFile(MergeName(mr.file, i))
354 | }
355 | RemoveFile("mrtmp." + mr.file)
356 | }
357 |
358 | // Run jobs sequentially.
359 | func RunSingle(nMap int, nReduce int, file string,
360 | Map func(string) *list.List,
361 | Reduce func(string, *list.List) string) {
362 | mr := InitMapReduce(nMap, nReduce, file, "")
363 | mr.Split(mr.file)
364 | for i := 0; i < nMap; i++ {
365 | DoMap(i, mr.file, mr.nReduce, Map)
366 | }
367 | for i := 0; i < mr.nReduce; i++ {
368 | DoReduce(i, mr.file, mr.nMap, Reduce)
369 | }
370 | mr.Merge()
371 | }
372 |
373 | func (mr *MapReduce) CleanupRegistration() {
374 | args := &ShutdownArgs{}
375 | var reply ShutdownReply
376 | ok := call(mr.MasterAddress, "MapReduce.Shutdown", args, &reply)
377 | if ok == false {
378 | fmt.Printf("Cleanup: RPC %s error\n", mr.MasterAddress)
379 | }
380 | DPrintf("CleanupRegistration: done\n")
381 | }
382 |
383 | // Run jobs in parallel, assuming a shared file system
384 | func (mr *MapReduce) Run() {
385 | fmt.Printf("Run mapreduce job %s %s\n", mr.MasterAddress, mr.file)
386 |
387 | mr.Split(mr.file)
388 | mr.stats = mr.RunMaster()
389 | mr.Merge()
390 | mr.CleanupRegistration()
391 |
392 | fmt.Printf("%s: MapReduce done\n", mr.MasterAddress)
393 |
394 | mr.DoneChannel <- true
395 | }
396 |
--------------------------------------------------------------------------------
/hw3/src/kvpaxos/server.go:
--------------------------------------------------------------------------------
1 | package kvpaxos
2 |
3 | import (
4 | "net"
5 | "time"
6 | )
7 | import "fmt"
8 | import "net/rpc"
9 | import "log"
10 | import "paxos"
11 | import "sync"
12 | import "sync/atomic"
13 | import "os"
14 | import "syscall"
15 | import "encoding/gob"
16 | import "math/rand"
17 |
18 |
19 | const Debug = 1
20 |
21 | func DPrintf(format string, a ...interface{}) (n int, err error) {
22 | if Debug > 0 {
23 | log.Printf(format, a...)
24 | }
25 | return
26 | }
27 |
28 |
29 | type Op struct {
30 | // Your definitions here.
31 | // Field names must start with capital letters,
32 | // otherwise RPC will break.
33 |
34 | // OpID is a hash key attached by the client. Each time when client
35 | // retried an operation, it will always use a fixed OpID
36 | OpID int64
37 | // Put, Get, Append
38 | Operation string
39 | Key string
40 | Value string
41 | }
42 |
43 | type KVPaxos struct {
44 | mu sync.Mutex
45 | l net.Listener
46 | me int
47 | dead int32 // for testing
48 | unreliable int32 // for testing
49 | px *paxos.Paxos
50 |
51 | // Your definitions here.
52 | database sync.Map
53 | // hashVals acts as the state to filter duplicates
54 | // if an operation has already been performed on `database`,
55 | // it should not be performed again
56 | hashVals sync.Map
57 | // each KVPaxos has a seq number recording the current progress.
58 | // seq starts from 0 and gradually increase by 1
59 | // if seq = 12, it means that the paxos server instances 0 to 11 has all been decided
60 | // and also they should have been called Done()
61 | seq int
62 | }
63 |
64 | // added by Adrian
65 | // when every time clients sends a Get/PutAppend,
66 | // the first thing our KVPaxos replica do is NOT to perform the Get/PutAppend
67 | // on Paxos directly; instead, we will first make sure all the previous
68 | // operations that have been decided by some other majority (exclude me)
69 | // are successfully fetched into my database. The way we did that is to
70 | // Start() a new operation with the seq number. And since that n_a, v_a has been
71 | // decided, as a result we will get the value which all other majority has reached an agreement to.
72 | func (kv *KVPaxos) SyncUp(xop Op) {
73 | to := 10 * time.Millisecond
74 | doing := false
75 | // sync on all seq number instances that I have not yet recorded
76 | // and after they are all done, we perform our own xop by calling Start()
77 | for {
78 | status, op := kv.px.Status(kv.seq)
79 | // DPrintf("server %v, seq %v, status %v", kv.me, kv.seq, status)
80 | // KVPaxos servers interact with each other through the Paxos log.
81 | if status == paxos.Decided {
82 | // this Decided() could be 2 cases.
83 | // case 1. this kv.seq instances has been decided by others and thus when I called Start(),
84 | // the instance n_a, v_a is taken from some other majority's agreement.
85 | // case 2. I am the initiator. No one has reached an agreement (not Decided) on this seq number yet
86 | // and thus the xop.OpID == op.OpID
87 | op := op.(Op)
88 |
89 | if xop.OpID == op.OpID {
90 | // if it was case 2. then we don't do doPutAppend() as we will do it later out of this function
91 | break
92 | } else if op.Operation == "Put" || op.Operation == "Append" {
93 | // if it was case 1, then we have to make compensation.
94 | // we have to catch up on some others' progress. so we perform the PutAppend
95 | // according to the paxos log we have the consensus on
96 | kv.doPutAppend(op.Operation, op.Key, op.Value, op.OpID)
97 | } else {
98 | // if it was case 1, then even though it is a Get I was previously not aware of,
99 | // I still don't need to do anything as it will not affect my `database`
100 | //value, _ := kv.doGet(op.Key)
101 | //DPrintf("get: %v", value)
102 | }
103 | // we could do Done() here. but as it checks all seq num from 0 ~ kv.seq.
104 | // so we can elegantly do it outside of this for loop for simplicity.
105 | // kv.px.Done(kv.seq)
106 |
107 | // once we catched up on this instance, we can finally increase our seq num by 1
108 | kv.seq += 1
109 | // also we have to set that our Start() is over. We might need to initiate another Start() though
110 | doing = false
111 | } else {
112 | if !doing {
113 | // your server should try to assign the next available Paxos instance (sequence number)
114 | // to each incoming client RPC. However, some other kvpaxos replica may also be trying
115 | // to use that instance for a different client's operation.
116 | // e.g., KVPaxos server 1 do Put(1, 15) and server 2 do Put(1, 32). they are both seq=3 now
117 | // Acceptor 1: P1 x A1-15(ok) P2 A2-32(ok) // p.s., Proposal 22 arrives A1 a bit late
118 | // Acceptor 2: P1 P2 A1-15(fail) A2-32(ok)
119 | // Acceptor 3: P1 P2 A1-15(fail) A2-32(ok)
120 | // as a result, Put(1, 32) will be accepted instead of Put(1, 15)(got overrided)
121 | // although these 2 servers are both doing this on seq=3
122 |
123 | // Hint: if one of your kvpaxos servers falls behind (i.e. did not participate
124 | // in the agreement for some instance), it will later need to find out what (if anything)
125 | // was agree to. A reasonable way to to this is to call Start(), which will either
126 | // discover the previously agreed-to value, or cause agreement to happen
127 |
128 | // Think about what value would be reasonable to pass to Start() in this situation.
129 | // Ans. Just pass in the value we want to agree on previously (the `xop`) as a matter of fact
130 | // if the instance on seq = 3 has been Decided, then when we call Start() on the prepare phase
131 | // the V_a will definitely be replaced by the V_a that some other majority has agreed to
132 | // Let's say srv 0 are not at seq = 3, and it wants to do Put(1, 15)
133 | // and yet srv 1, 2, 3 has already reached seq = 8, that is, their seq 3 ~ 7 are all decided
134 | // thus, srv 0 will do Start() on seq 3 ~ 7 but the value will got substituded.
135 | // and finally the Put(1, 15) will only be accepted when seq = 8.
136 | kv.px.Start(kv.seq, xop)
137 | //DPrintf("%v: do start for seq: %v, value=%v", kv.me, kv.seq, xop.Value)
138 | // now I'm doing Start(). So don't call Start() again on the same seq, same xop.
139 | // not until I finished doing this xop will I initiate another Start()
140 | doing = true
141 | }
142 | time.Sleep(to)
143 | // your code will need to wait for Paxos instances to complete agreement.
144 | // A good plan is to check quickly at first, and then more slowly:
145 | to += 10 * time.Millisecond
146 | }
147 | }
148 | // don't forget to call the Paxos Done() method when a kvpaxos has processed
149 | // an instance and will no longer need it or any previous instance.
150 | // When will the px.Forget() to be called? when EVERY KVPaxos call Done() on seq = 3,
151 | // then Min() will be 4. -> when doing next Start() (as we are piggybacked() the proposer
152 | // will clean up those old instances by calling Forget()
153 | kv.px.Done(kv.seq)
154 | kv.seq += 1
155 | }
156 | // added by Adrian
157 | func (kv *KVPaxos) doGet(Key string) (string, bool) {
158 | val, ok := kv.database.Load(Key)
159 | // no effect for Get even the hashVal may has duplicates
160 | if !ok {
161 | return "", false
162 | } else {
163 | return val.(string), true
164 | }
165 | }
166 | // added by Adrian
167 | func (kv *KVPaxos) doPutAppend(Operation string, Key string, Value string, hash int64) {
168 | // first, we check if the key is already exists
169 | val, ok := kv.database.Load(Key)
170 | if !ok { // if not exists
171 |
172 | // init. are the same for either put / append
173 | kv.database.Store(Key, Value)
174 |
175 | } else { // load
176 | if Operation == "Put" {
177 | kv.database.Store(Key, Value)
178 | } else if Operation == "Append" {
179 | vals := val.(string)
180 | // you will need to uniquely identify client operations
181 | // to ensure that they execute just once.
182 | _, ok := kv.hashVals.Load(hash)
183 | if !ok {
184 | // check if the hashVals has been added
185 | kv.database.Store(Key, vals+Value)
186 | }
187 | }
188 | }
189 | // we have to store this hash whether it is the first time this pair was pushed or not
190 | // therefore I put this outside of the if-else branch condition
191 | kv.hashVals.Store(hash, 1) // an arbitrary value 1
192 | }
193 |
194 | func (kv *KVPaxos) Get(args *GetArgs, reply *GetReply) error {
195 | // Your code here.
196 | kv.mu.Lock()
197 | defer kv.mu.Unlock()
198 | // these values in Get Op are basically dummy value,
199 | // we will not use them when meeting one of them in SyncUp()
200 | op := Op{args.Hash, "Get", args.Key, ""}
201 | // a kvpaxos server should not complete a Get() RPC if it is not part of a majority
202 | // (so that it does not serve stale data).
203 | // -> instead, it will endlessly try syncing and wait for it to be `Decided`
204 | kv.SyncUp(op)
205 | reply.Value, _ = kv.doGet(args.Key)
206 | return nil
207 | }
208 |
209 | func (kv *KVPaxos) PutAppend(args *PutAppendArgs, reply *PutAppendReply) error {
210 | // Your code here.
211 | kv.mu.Lock()
212 | defer kv.mu.Unlock()
213 | // It should enter a Get Op in the Paxos log, and then "interpret" the the log **before that point**
214 | // to make sure its key/value database reflects all recent Put()s.
215 | // ps. An Append Paxos log entry should contain the Append's arguments,
216 | // but not the resulting value, since the result might be large.
217 | op := Op{args.Hash, args.Op, args.Key, args.Value}
218 | kv.SyncUp(op)
219 | kv.doPutAppend(args.Op, args.Key, args.Value, args.Hash)
220 | return nil
221 | }
222 |
223 | // tell the server to shut itself down.
224 | // please do not change these two functions.
225 | func (kv *KVPaxos) kill() {
226 | //DPrintf("Kill(%d): die\n", kv.me)
227 | atomic.StoreInt32(&kv.dead, 1)
228 | kv.l.Close()
229 | kv.px.Kill()
230 | }
231 |
232 | // call this to find out if the server is dead.
233 | func (kv *KVPaxos) isdead() bool {
234 | return atomic.LoadInt32(&kv.dead) != 0
235 | }
236 |
237 | // please do not change these two functions.
238 | func (kv *KVPaxos) setunreliable(what bool) {
239 | if what {
240 | atomic.StoreInt32(&kv.unreliable, 1)
241 | } else {
242 | atomic.StoreInt32(&kv.unreliable, 0)
243 | }
244 | }
245 |
246 | func (kv *KVPaxos) isunreliable() bool {
247 | return atomic.LoadInt32(&kv.unreliable) != 0
248 | }
249 |
250 | //
251 | // servers[] contains the ports of the set of
252 | // servers that will cooperate via Paxos to
253 | // form the fault-tolerant key/value service.
254 | // me is the index of the current server in servers[].
255 | //
256 | func StartServer(servers []string, me int) *KVPaxos {
257 | // call gob.Register on structures you want
258 | // Go's RPC library to marshall/unmarshall.
259 | gob.Register(Op{})
260 |
261 | kv := new(KVPaxos)
262 | kv.me = me
263 |
264 | // Your initialization code here.
265 | kv.seq = 0
266 |
267 | rpcs := rpc.NewServer()
268 | rpcs.Register(kv)
269 |
270 | kv.px = paxos.Make(servers, me, rpcs)
271 |
272 | os.Remove(servers[me])
273 | l, e := net.Listen("unix", servers[me])
274 | if e != nil {
275 | log.Fatal("listen error: ", e)
276 | }
277 | kv.l = l
278 |
279 |
280 | // please do not change any of the following code,
281 | // or do anything to subvert it.
282 |
283 | go func() {
284 | for kv.isdead() == false {
285 | conn, err := kv.l.Accept()
286 | if err == nil && kv.isdead() == false {
287 | if kv.isunreliable() && (rand.Int63()%1000) < 100 {
288 | // discard the request.
289 | conn.Close()
290 | } else if kv.isunreliable() && (rand.Int63()%1000) < 200 {
291 | // process the request but force discard of reply.
292 | c1 := conn.(*net.UnixConn)
293 | f, _ := c1.File()
294 | err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR)
295 | if err != nil {
296 | fmt.Printf("shutdown: %v\n", err)
297 | }
298 | go rpcs.ServeConn(conn)
299 | } else {
300 | go rpcs.ServeConn(conn)
301 | }
302 | } else if err == nil {
303 | conn.Close()
304 | }
305 | if err != nil && kv.isdead() == false {
306 | fmt.Printf("KVPaxos(%v) accept: %v\n", me, err.Error())
307 | kv.kill()
308 | }
309 | }
310 | }()
311 |
312 | return kv
313 | }
314 |
--------------------------------------------------------------------------------
/hw2/src/pbservice/server.go:
--------------------------------------------------------------------------------
1 | package pbservice
2 |
3 | import (
4 | "errors"
5 | "net"
6 | )
7 | import "fmt"
8 | import "net/rpc"
9 | import "log"
10 | import "time"
11 | import "viewservice"
12 | import "sync"
13 | import "sync/atomic"
14 | import "os"
15 | import "syscall"
16 | import "math/rand"
17 |
18 |
19 |
20 | type PBServer struct {
21 | mu sync.Mutex
22 | l net.Listener
23 | dead int32 // for testing
24 | unreliable int32 // for testing
25 | me string
26 | // To the view service, this PBServer is acting like a clerk.
27 | // so we set up a clerk pointer to do Ping and some other stuff.
28 | vs *viewservice.Clerk
29 |
30 | // Your declarations here.
31 | currview *viewservice.View
32 | database map[string]string
33 | // hashVals acts as the state to filter duplicates
34 | hashVals map[int64]bool
35 | // A read/write mutex allows all the readers to access
36 | // the map at the same time, but a writer will lock out everyone else.
37 | rwm sync.RWMutex
38 | }
39 |
40 | // edited by Adrian
41 | // the new backup got bootstrapped.
42 | func (pb *PBServer) Bootstrapped(args *BootstrapArgs, reply *BootstrapReply) error {
43 |
44 | pb.rwm.Lock()
45 | defer pb.rwm.Unlock()
46 | for k, v := range args.Database {
47 | pb.database[k] = v
48 | }
49 | for k, v := range args.HashVals {
50 | pb.hashVals[k] = v
51 | }
52 | return nil
53 | }
54 |
55 | // edited by Adrian
56 | // initiate by the primary when it found that it's time to bootstrap the new backup
57 | // since that the current view has not yet changed. so we cannot use `pb.currview.Backup`
58 | // instead, we pass in a backup param
59 | func (pb *PBServer) Bootstrapping(backup string) error {
60 |
61 | args := &BootstrapArgs{pb.database, pb.hashVals}
62 | var reply BootstrapReply
63 |
64 | ok := false
65 | for ok == false {
66 | ok = call(backup, "PBServer.Bootstrapped", args, &reply)
67 | if ok {
68 | break
69 | } else {
70 | // network failure
71 | time.Sleep(viewservice.PingInterval)
72 | }
73 | }
74 | return nil
75 | }
76 |
77 | // edited by Adrian
78 | // to leverage determinism of the state machine
79 | // the backup got a Get request forwarded by the primary
80 | func (pb *PBServer) ForwardGet(sargs *GetSyncArgs, sreply *GetSyncReply) error {
81 |
82 | pb.rwm.Lock()
83 | defer pb.rwm.Unlock()
84 |
85 | if sargs.Primary != pb.currview.Primary {
86 | // the backup first need to check if the primary is still the current primary
87 | // e.g. split-brain: {s1, s3} -> s1 dies -> {s3, s2} -> s1 revokes
88 | // -> s1 still receives some requests from client -> so s1 forward to its cache backup, s3
89 | // -> s3 will tell s1 that "you are no longer the current primary now"
90 | // -> so finally s1 will reject the client's request
91 | sreply.Err = "ForwardTest: SENDER IS NOT CURRENT PRIMARY"
92 | return errors.New("ForwardTest: SENDER IS NOT CURRENT PRIMARY")
93 | } else {
94 | // if it is the primary, then we do Get normally
95 | sreply.Value = pb.database[sargs.Key]
96 | }
97 | return nil
98 | }
99 | // edited by Adrian
100 | // to leverage determinism of the state machine
101 | // forward any state necessary for backup to `mimic` the execution
102 | // do exactly the same PutAppend request on the backup
103 | func (pb *PBServer) Forward(sargs *PutAppendSyncArgs, sreply *PutAppendSyncReply) error {
104 |
105 | pb.rwm.Lock()
106 | defer pb.rwm.Unlock()
107 |
108 | if sargs.Primary != pb.currview.Primary {
109 | sreply.Err = "ForwardTest: SENDER IS NOT CURRENT PRIMARY"
110 | return errors.New("ForwardTest: SENDER IS NOT CURRENT PRIMARY")
111 | } else {
112 | pb.Update(sargs.Key, sargs.Value, sargs.Op, sargs.HashVal)
113 | }
114 | return nil
115 | }
116 |
117 | // edited by Adrian
118 | func (pb *PBServer) Update(key string, value string, op string, hashVal int64) {
119 |
120 | // no need to do lock.
121 | // Update() must be called by Forward() or PutAppend() and they both did acquire the lock
122 | if op == "Put" {
123 | pb.database[key] = value
124 | } else if op == "Append" {
125 | // detect duplicates
126 | if pb.hashVals[hashVal] != true {
127 | // Append should use an empty string for the previous value
128 | // if the key doesn't exist
129 | pb.database[key] += value
130 | pb.hashVals[hashVal] = true
131 | }
132 | }
133 | }
134 |
135 | func (pb *PBServer) Get(args *GetArgs, reply *GetReply) error {
136 |
137 | // Your code here.
138 | pb.rwm.Lock()
139 | defer pb.rwm.Unlock()
140 |
141 | if pb.me != pb.currview.Primary {
142 | reply.Err = "Get: NOT THE PRIMARY YET"
143 | // it might be possible that the primary dies and then the backup still not yet
144 | // realizes that it is now the new primary (the `p3`).
145 |
146 | // e.g., (p1, p3) -> (p3, _)
147 | // client: already know that p3 is now the new primary
148 | // p3: according to its cache, it still think (p1, p3) -> still dont think it is the primary
149 | // so it will return an error the client and tell it to try later
150 | // -> wait for tick() until it knows (p3, _) and problem solved
151 |
152 | // the backup (at least it thought by itself) should reject a direct client request
153 | return errors.New("GetTest: NOT THE PRIMARY YET")
154 | }
155 |
156 | reply.Value = pb.database[args.Key]
157 |
158 | sargs := GetSyncArgs{args.Key,pb.me}
159 | sreply := GetSyncReply{}
160 |
161 | // if there is no backup currently -> don't do Forward
162 | ok := pb.currview.Backup == ""
163 |
164 | for ok == false {
165 | //log.Printf("b get %v, %v, %v", pb.me, pb.currview.Backup, sargs.Key)
166 | ok = call(pb.currview.Backup, "PBServer.ForwardGet", sargs, &sreply)
167 | //log.Printf("get %v, %v, %v", pb.me, pb.currview.Backup, sargs.Key)
168 | if ok == true {
169 | // everything works well
170 | break
171 | } else {
172 | // case 1. you are no longer the primary
173 | if sreply.Err == "ForwardTest: SENDER IS NOT CURRENT PRIMARY" {
174 | reply.Err = sreply.Err
175 | return errors.New("GetTest: SENDER IS NOT CURRENT PRIMARY") // don't need to update anymore
176 | }
177 |
178 | time.Sleep(viewservice.PingInterval)
179 | // case 2. check if the backup was still alive
180 | // perform exactly the same as tick(). Cannot call it directly as we will acquire lock twice
181 | newview, _ := pb.vs.Ping(pb.currview.Viewnum)
182 | pb.checkNewBackup(newview)
183 | pb.changeView(newview)
184 |
185 | ok = pb.currview.Backup == ""
186 | }
187 | }
188 |
189 | return nil
190 | }
191 |
192 | func (pb *PBServer) PutAppend(args *PutAppendArgs, reply *PutAppendReply) error {
193 |
194 | // Your code here.
195 | pb.rwm.Lock()
196 | defer pb.rwm.Unlock()
197 |
198 | if pb.me != pb.currview.Primary {
199 | reply.Err = "PutAppend: NOT THE PRIMARY YET"
200 | return errors.New("PutAppendTest: NOT THE PRIMARY YET")
201 | }
202 |
203 | // Step 1. Update the primary itself (note: should not update the backup first!)
204 | pb.Update(args.Key, args.Value, args.Op, args.HashVal)
205 |
206 | sargs := PutAppendSyncArgs{args.Key, args.Value, args.Op, args.HashVal, pb.me}
207 | sreply := PutAppendSyncReply{}
208 |
209 | // Step 2. Update the backup (if exists)
210 |
211 | // IMPORTANT:
212 | // only if the primary and the backup is `externally consistent`
213 | // will the primary respond to the client, i.e., to make this change `externally visible`
214 | ok := pb.currview.Backup == ""
215 |
216 | for ok == false {
217 | //log.Printf("b put %v, %v, %v", pb.me, pb.currview.Backup, sargs.Key)
218 | ok = call(pb.currview.Backup, "PBServer.Forward", sargs, &sreply)
219 | //log.Printf("put %v, %v, %v", pb.me, pb.currview.Backup, sargs.Key)
220 | if ok == true {
221 | // everything works fine
222 | break
223 | } else {
224 | // case 1. you are no longer the primary
225 | if sreply.Err == "ForwardTest: SENDER IS NOT CURRENT PRIMARY" {
226 | reply.Err = sreply.Err
227 | return errors.New("PutAppendTest: SENDER NOT CURRENT PRIMARY") // don't need to update anymore
228 | }
229 |
230 | time.Sleep(viewservice.PingInterval)
231 | // case 2. check if the backup was still alive
232 | // perform exactly the same as tick(). Cannot call it directly as we will acquire lock twice
233 | newview, _ := pb.vs.Ping(pb.currview.Viewnum)
234 | pb.checkNewBackup(newview)
235 | pb.changeView(newview)
236 |
237 | ok = pb.currview.Backup == ""
238 | }
239 | }
240 |
241 | return nil
242 | }
243 |
244 | // edited by Adrian
245 | // to detect if the backup has changed
246 | func (pb *PBServer) checkNewBackup(newview viewservice.View) {
247 |
248 | // case 1. {s1, _} -> {s1, s2} // s2 is the new backup. s1 is myself.
249 | // case 2. {s1, s2} -> s2 dies -> {s1, s3} // s3 is the new backup. s1 is myself.
250 | // note that in case 2, `b` will not be "" in that intermediate state since we called backupByIdleSrv()
251 | // -> it was already replaced when the primary got notified
252 |
253 | // case 3. {s1, s2} -> {s2, s3} // s3 is the new backup. s2 is me
254 | // -> therefore we use newview.Primary (s2) to do the bootstrap but not the pb.currview.Primary (s1)
255 | if newview.Primary == pb.me && pb.currview.Backup != newview.Backup && newview.Backup != "" {
256 | pb.Bootstrapping(newview.Backup)
257 | }
258 | }
259 |
260 | func (pb* PBServer) changeView(newview viewservice.View) {
261 | // no need to lock
262 | // the caller should already acquired a lock
263 | pb.currview = &newview
264 | }
265 |
266 | //
267 | // ping the viewserver periodically.
268 | // if view changed:
269 | // transition to new view.
270 | // manage transfer of state from primary to new backup.
271 | //
272 | func (pb *PBServer) tick() {
273 |
274 | // Your code here.
275 | pb.rwm.Lock()
276 | defer pb.rwm.Unlock()
277 |
278 | newview, _ := pb.vs.Ping(pb.currview.Viewnum)
279 | //log.Printf("me=%v, v=%v, p=%v, b=%v", pb.me, newview.Viewnum, newview.Primary, newview.Backup)
280 | pb.checkNewBackup(newview)
281 | pb.changeView(newview)
282 | }
283 |
284 | // tell the server to shut itself down.
285 | // please do not change these two functions.
286 | func (pb *PBServer) kill() {
287 | atomic.StoreInt32(&pb.dead, 1)
288 | pb.l.Close()
289 | }
290 |
291 | // call this to find out if the server is dead.
292 | func (pb *PBServer) isdead() bool {
293 | return atomic.LoadInt32(&pb.dead) != 0
294 | }
295 |
296 | // please do not change these two functions.
297 | func (pb *PBServer) setunreliable(what bool) {
298 | if what {
299 | atomic.StoreInt32(&pb.unreliable, 1)
300 | } else {
301 | atomic.StoreInt32(&pb.unreliable, 0)
302 | }
303 | }
304 |
305 | func (pb *PBServer) isunreliable() bool {
306 | return atomic.LoadInt32(&pb.unreliable) != 0
307 | }
308 |
309 |
310 | func StartServer(vshost string, me string) *PBServer {
311 | pb := new(PBServer)
312 | pb.me = me
313 | pb.vs = viewservice.MakeClerk(me, vshost)
314 | // Your pb.* initializations here.
315 |
316 | pb.currview = &viewservice.View{}
317 | pb.database = make(map[string]string)
318 | pb.hashVals = make(map[int64]bool)
319 | rpcs := rpc.NewServer()
320 | rpcs.Register(pb)
321 |
322 | os.Remove(pb.me)
323 | l, e := net.Listen("unix", pb.me)
324 | if e != nil {
325 | log.Fatal("listen error: ", e)
326 | }
327 | pb.l = l
328 |
329 | // please do not change any of the following code,
330 | // or do anything to subvert it.
331 |
332 | go func() {
333 | for pb.isdead() == false {
334 | conn, err := pb.l.Accept()
335 | if err == nil && pb.isdead() == false {
336 | if pb.isunreliable() && (rand.Int63()%1000) < 100 {
337 | // discard the request.
338 | conn.Close()
339 | } else if pb.isunreliable() && (rand.Int63()%1000) < 200 {
340 | // process the request but force discard of reply.
341 | c1 := conn.(*net.UnixConn)
342 | f, _ := c1.File()
343 | err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR)
344 | if err != nil {
345 | fmt.Printf("shutdown: %v\n", err)
346 | }
347 | go rpcs.ServeConn(conn)
348 | } else {
349 | go rpcs.ServeConn(conn)
350 | }
351 | } else if err == nil {
352 | conn.Close()
353 | }
354 | if err != nil && pb.isdead() == false {
355 | fmt.Printf("PBServer(%v) accept: %v\n", me, err.Error())
356 | pb.kill()
357 | }
358 | }
359 | }()
360 |
361 | go func() {
362 | for pb.isdead() == false {
363 | pb.tick()
364 | time.Sleep(viewservice.PingInterval)
365 | }
366 | }()
367 |
368 | return pb
369 | }
370 |
--------------------------------------------------------------------------------
/hw2/src/pbservice/out.txt:
--------------------------------------------------------------------------------
1 | Test: Single primary, no backup ...
2 | ... Passed
3 | Test: Add a backup ...
4 | ... Passed
5 | Test: Count RPCs to viewserver ...
6 | ... Passed
7 | Test: Primary failure ...
8 | ... Passed
9 | Test: Kill last server, new one should not be active ...
10 | ... Passed
11 | Test: at-most-once Append; unreliable ...
12 | ... Passed
13 | Test: Put() immediately after backup failure ...
14 | ... Passed
15 | Test: Put() immediately after primary failure ...
16 | PutAppendTest: NOT THE PRIMARY YET
17 | ... Passed
18 | Test: Concurrent Put()s to the same key ...
19 | ... Passed
20 | Test: Concurrent Append()s to the same key ...
21 | ... Passed
22 | Test: Concurrent Put()s to the same key; unreliable ...
23 | ... Passed
24 | Test: Repeated failures/restarts ...
25 | GetTest: NOT THE PRIMARY YET
26 | GetTest: NOT THE PRIMARY YET
27 | GetTest: NOT THE PRIMARY YET
28 | GetTest: NOT THE PRIMARY YET
29 | ... Put/Gets done ...
30 | ... Passed
31 | Test: Repeated failures/restarts with concurrent updates to same key; unreliable ...
32 | ForwardTest: SENDER IS NOT CURRENT PRIMARY
33 | ... Appends done ...
34 | ... Passed
35 | Test: Old primary does not serve Gets ...
36 | GetTest: NOT THE PRIMARY YET
37 | ... Passed
38 | Test: Partitioned old primary does not complete Gets ...
39 | GetTest: NOT THE PRIMARY YET
40 | ... Passed
41 | PASS
42 | ok pbservice 111.928s
43 | Test: Single primary, no backup ...
44 | ... Passed
45 | Test: Add a backup ...
46 | ... Passed
47 | Test: Count RPCs to viewserver ...
48 | ... Passed
49 | Test: Primary failure ...
50 | ... Passed
51 | Test: Kill last server, new one should not be active ...
52 | ... Passed
53 | Test: at-most-once Append; unreliable ...
54 | ... Passed
55 | Test: Put() immediately after backup failure ...
56 | ... Passed
57 | Test: Put() immediately after primary failure ...
58 | PutAppendTest: NOT THE PRIMARY YET
59 | ... Passed
60 | Test: Concurrent Put()s to the same key ...
61 | ... Passed
62 | Test: Concurrent Append()s to the same key ...
63 | ... Passed
64 | Test: Concurrent Put()s to the same key; unreliable ...
65 | ... Passed
66 | Test: Repeated failures/restarts ...
67 | ... Put/Gets done ...
68 | ... Passed
69 | Test: Repeated failures/restarts with concurrent updates to same key; unreliable ...
70 | PutAppendTest: NOT THE PRIMARY YET
71 | ... Appends done ...
72 | ... Passed
73 | Test: Old primary does not serve Gets ...
74 | GetTest: NOT THE PRIMARY YET
75 | ... Passed
76 | Test: Partitioned old primary does not complete Gets ...
77 | GetTest: NOT THE PRIMARY YET
78 | ... Passed
79 | PASS
80 | ok pbservice 111.657s
81 | Test: Single primary, no backup ...
82 | ... Passed
83 | Test: Add a backup ...
84 | ... Passed
85 | Test: Count RPCs to viewserver ...
86 | ... Passed
87 | Test: Primary failure ...
88 | ... Passed
89 | Test: Kill last server, new one should not be active ...
90 | ... Passed
91 | Test: at-most-once Append; unreliable ...
92 | ... Passed
93 | Test: Put() immediately after backup failure ...
94 | ... Passed
95 | Test: Put() immediately after primary failure ...
96 | ... Passed
97 | Test: Concurrent Put()s to the same key ...
98 | ... Passed
99 | Test: Concurrent Append()s to the same key ...
100 | ... Passed
101 | Test: Concurrent Put()s to the same key; unreliable ...
102 | ... Passed
103 | Test: Repeated failures/restarts ...
104 | GetTest: NOT THE PRIMARY YET
105 | ForwardTest: SENDER IS NOT CURRENT PRIMARY
106 | ... Put/Gets done ...
107 | ... Passed
108 | Test: Repeated failures/restarts with concurrent updates to same key; unreliable ...
109 | PutAppendTest: NOT THE PRIMARY YET
110 | ... Appends done ...
111 | ... Passed
112 | Test: Old primary does not serve Gets ...
113 | GetTest: NOT THE PRIMARY YET
114 | ... Passed
115 | Test: Partitioned old primary does not complete Gets ...
116 | GetTest: NOT THE PRIMARY YET
117 | ... Passed
118 | PASS
119 | ok pbservice 111.684s
120 | Test: Single primary, no backup ...
121 | ... Passed
122 | Test: Add a backup ...
123 | ... Passed
124 | Test: Count RPCs to viewserver ...
125 | ... Passed
126 | Test: Primary failure ...
127 | ... Passed
128 | Test: Kill last server, new one should not be active ...
129 | ... Passed
130 | Test: at-most-once Append; unreliable ...
131 | ... Passed
132 | Test: Put() immediately after backup failure ...
133 | ... Passed
134 | Test: Put() immediately after primary failure ...
135 | ... Passed
136 | Test: Concurrent Put()s to the same key ...
137 | ... Passed
138 | Test: Concurrent Append()s to the same key ...
139 | ... Passed
140 | Test: Concurrent Put()s to the same key; unreliable ...
141 | ... Passed
142 | Test: Repeated failures/restarts ...
143 | ForwardTest: SENDER IS NOT CURRENT PRIMARY
144 | GetTest: NOT THE PRIMARY YET
145 | GetTest: NOT THE PRIMARY YET
146 | GetTest: NOT THE PRIMARY YET
147 | GetTest: NOT THE PRIMARY YET
148 | ... Put/Gets done ...
149 | ... Passed
150 | Test: Repeated failures/restarts with concurrent updates to same key; unreliable ...
151 | PutAppendTest: NOT THE PRIMARY YET
152 | PutAppendTest: NOT THE PRIMARY YET
153 | ... Appends done ...
154 | ... Passed
155 | Test: Old primary does not serve Gets ...
156 | GetTest: NOT THE PRIMARY YET
157 | ... Passed
158 | Test: Partitioned old primary does not complete Gets ...
159 | GetTest: NOT THE PRIMARY YET
160 | ... Passed
161 | PASS
162 | ok pbservice 111.359s
163 | Test: Single primary, no backup ...
164 | ... Passed
165 | Test: Add a backup ...
166 | ... Passed
167 | Test: Count RPCs to viewserver ...
168 | ... Passed
169 | Test: Primary failure ...
170 | ... Passed
171 | Test: Kill last server, new one should not be active ...
172 | ... Passed
173 | Test: at-most-once Append; unreliable ...
174 | ... Passed
175 | Test: Put() immediately after backup failure ...
176 | ... Passed
177 | Test: Put() immediately after primary failure ...
178 | PutAppendTest: NOT THE PRIMARY YET
179 | ... Passed
180 | Test: Concurrent Put()s to the same key ...
181 | ... Passed
182 | Test: Concurrent Append()s to the same key ...
183 | ... Passed
184 | Test: Concurrent Put()s to the same key; unreliable ...
185 | ... Passed
186 | Test: Repeated failures/restarts ...
187 | GetTest: NOT THE PRIMARY YET
188 | GetTest: NOT THE PRIMARY YET
189 | ... Put/Gets done ...
190 | ... Passed
191 | Test: Repeated failures/restarts with concurrent updates to same key; unreliable ...
192 | PutAppendTest: NOT THE PRIMARY YET
193 | PutAppendTest: NOT THE PRIMARY YET
194 | PutAppendTest: NOT THE PRIMARY YET
195 | ForwardTest: SENDER IS NOT CURRENT PRIMARY
196 | ForwardTest: SENDER IS NOT CURRENT PRIMARY
197 | ... Appends done ...
198 | ... Passed
199 | Test: Old primary does not serve Gets ...
200 | GetTest: NOT THE PRIMARY YET
201 | ... Passed
202 | Test: Partitioned old primary does not complete Gets ...
203 | GetTest: NOT THE PRIMARY YET
204 | ... Passed
205 | PASS
206 | ok pbservice 111.798s
207 | Test: Single primary, no backup ...
208 | ... Passed
209 | Test: Add a backup ...
210 | ... Passed
211 | Test: Count RPCs to viewserver ...
212 | ... Passed
213 | Test: Primary failure ...
214 | ... Passed
215 | Test: Kill last server, new one should not be active ...
216 | ... Passed
217 | Test: at-most-once Append; unreliable ...
218 | ... Passed
219 | Test: Put() immediately after backup failure ...
220 | ... Passed
221 | Test: Put() immediately after primary failure ...
222 | ... Passed
223 | Test: Concurrent Put()s to the same key ...
224 | ... Passed
225 | Test: Concurrent Append()s to the same key ...
226 | ... Passed
227 | Test: Concurrent Put()s to the same key; unreliable ...
228 | ... Passed
229 | Test: Repeated failures/restarts ...
230 | GetTest: NOT THE PRIMARY YET
231 | GetTest: NOT THE PRIMARY YET
232 | ForwardTest: SENDER IS NOT CURRENT PRIMARY
233 | ... Put/Gets done ...
234 | ... Passed
235 | Test: Repeated failures/restarts with concurrent updates to same key; unreliable ...
236 | PutAppendTest: NOT THE PRIMARY YET
237 | ForwardTest: SENDER IS NOT CURRENT PRIMARY
238 | ... Appends done ...
239 | ... Passed
240 | Test: Old primary does not serve Gets ...
241 | GetTest: NOT THE PRIMARY YET
242 | ... Passed
243 | Test: Partitioned old primary does not complete Gets ...
244 | GetTest: NOT THE PRIMARY YET
245 | ... Passed
246 | PASS
247 | ok pbservice 111.461s
248 | Test: Single primary, no backup ...
249 | ... Passed
250 | Test: Add a backup ...
251 | ... Passed
252 | Test: Count RPCs to viewserver ...
253 | ... Passed
254 | Test: Primary failure ...
255 | ... Passed
256 | Test: Kill last server, new one should not be active ...
257 | ... Passed
258 | Test: at-most-once Append; unreliable ...
259 | ... Passed
260 | Test: Put() immediately after backup failure ...
261 | ... Passed
262 | Test: Put() immediately after primary failure ...
263 | ... Passed
264 | Test: Concurrent Put()s to the same key ...
265 | ... Passed
266 | Test: Concurrent Append()s to the same key ...
267 | ... Passed
268 | Test: Concurrent Put()s to the same key; unreliable ...
269 | ... Passed
270 | Test: Repeated failures/restarts ...
271 | GetTest: NOT THE PRIMARY YET
272 | GetTest: NOT THE PRIMARY YET
273 | ... Put/Gets done ...
274 | ... Passed
275 | Test: Repeated failures/restarts with concurrent updates to same key; unreliable ...
276 | ForwardTest: SENDER IS NOT CURRENT PRIMARY
277 | ForwardTest: SENDER IS NOT CURRENT PRIMARY
278 | ... Appends done ...
279 | ... Passed
280 | Test: Old primary does not serve Gets ...
281 | GetTest: NOT THE PRIMARY YET
282 | ... Passed
283 | Test: Partitioned old primary does not complete Gets ...
284 | GetTest: NOT THE PRIMARY YET
285 | ... Passed
286 | PASS
287 | ok pbservice 111.965s
288 | Test: Single primary, no backup ...
289 | ... Passed
290 | Test: Add a backup ...
291 | ... Passed
292 | Test: Count RPCs to viewserver ...
293 | ... Passed
294 | Test: Primary failure ...
295 | ... Passed
296 | Test: Kill last server, new one should not be active ...
297 | ... Passed
298 | Test: at-most-once Append; unreliable ...
299 | ... Passed
300 | Test: Put() immediately after backup failure ...
301 | ... Passed
302 | Test: Put() immediately after primary failure ...
303 | ... Passed
304 | Test: Concurrent Put()s to the same key ...
305 | ... Passed
306 | Test: Concurrent Append()s to the same key ...
307 | ... Passed
308 | Test: Concurrent Put()s to the same key; unreliable ...
309 | ... Passed
310 | Test: Repeated failures/restarts ...
311 | ForwardTest: SENDER IS NOT CURRENT PRIMARY
312 | ForwardTest: SENDER IS NOT CURRENT PRIMARY
313 | GetTest: NOT THE PRIMARY YET
314 | GetTest: NOT THE PRIMARY YET
315 | ForwardTest: SENDER IS NOT CURRENT PRIMARY
316 | ... Put/Gets done ...
317 | ... Passed
318 | Test: Repeated failures/restarts with concurrent updates to same key; unreliable ...
319 | ForwardTest: SENDER IS NOT CURRENT PRIMARY
320 | PutAppendTest: NOT THE PRIMARY YET
321 | PutAppendTest: NOT THE PRIMARY YET
322 | ForwardTest: SENDER IS NOT CURRENT PRIMARY
323 | PutAppendTest: NOT THE PRIMARY YET
324 | PutAppendTest: NOT THE PRIMARY YET
325 | PutAppendTest: NOT THE PRIMARY YET
326 | PutAppendTest: NOT THE PRIMARY YET
327 | ForwardTest: SENDER IS NOT CURRENT PRIMARY
328 | ... Appends done ...
329 | ... Passed
330 | Test: Old primary does not serve Gets ...
331 | GetTest: NOT THE PRIMARY YET
332 | ... Passed
333 | Test: Partitioned old primary does not complete Gets ...
334 | GetTest: NOT THE PRIMARY YET
335 | ... Passed
336 | PASS
337 | ok pbservice 111.465s
338 | Test: Single primary, no backup ...
339 | ... Passed
340 | Test: Add a backup ...
341 | ... Passed
342 | Test: Count RPCs to viewserver ...
343 | ... Passed
344 | Test: Primary failure ...
345 | ... Passed
346 | Test: Kill last server, new one should not be active ...
347 | ... Passed
348 | Test: at-most-once Append; unreliable ...
349 | ... Passed
350 | Test: Put() immediately after backup failure ...
351 | ... Passed
352 | Test: Put() immediately after primary failure ...
353 | PutAppendTest: NOT THE PRIMARY YET
354 | ... Passed
355 | Test: Concurrent Put()s to the same key ...
356 | ... Passed
357 | Test: Concurrent Append()s to the same key ...
358 | ... Passed
359 | Test: Concurrent Put()s to the same key; unreliable ...
360 | ... Passed
361 | Test: Repeated failures/restarts ...
362 | PutAppendTest: NOT THE PRIMARY YET
363 | PutAppendTest: NOT THE PRIMARY YET
364 | GetTest: NOT THE PRIMARY YET
365 | GetTest: NOT THE PRIMARY YET
366 | GetTest: NOT THE PRIMARY YET
367 | ForwardTest: SENDER IS NOT CURRENT PRIMARY
368 | ForwardTest: SENDER IS NOT CURRENT PRIMARY
369 | ... Put/Gets done ...
370 | ... Passed
371 | Test: Repeated failures/restarts with concurrent updates to same key; unreliable ...
372 | PutAppendTest: NOT THE PRIMARY YET
373 | ForwardTest: SENDER IS NOT CURRENT PRIMARY
374 | PutAppendTest: NOT THE PRIMARY YET
375 | ForwardTest: SENDER IS NOT CURRENT PRIMARY
376 | ForwardTest: SENDER IS NOT CURRENT PRIMARY
377 | ... Appends done ...
378 | ... Passed
379 | Test: Old primary does not serve Gets ...
380 | GetTest: NOT THE PRIMARY YET
381 | ... Passed
382 | Test: Partitioned old primary does not complete Gets ...
383 | GetTest: NOT THE PRIMARY YET
384 | ... Passed
385 | PASS
386 | ok pbservice 111.567s
387 | Test: Single primary, no backup ...
388 | ... Passed
389 | Test: Add a backup ...
390 | ... Passed
391 | Test: Count RPCs to viewserver ...
392 | ... Passed
393 | Test: Primary failure ...
394 | ... Passed
395 | Test: Kill last server, new one should not be active ...
396 | ... Passed
397 | Test: at-most-once Append; unreliable ...
398 | ... Passed
399 | Test: Put() immediately after backup failure ...
400 | ... Passed
401 | Test: Put() immediately after primary failure ...
402 | ... Passed
403 | Test: Concurrent Put()s to the same key ...
404 | ... Passed
405 | Test: Concurrent Append()s to the same key ...
406 | ... Passed
407 | Test: Concurrent Put()s to the same key; unreliable ...
408 | ... Passed
409 | Test: Repeated failures/restarts ...
410 | GetTest: NOT THE PRIMARY YET
411 | GetTest: NOT THE PRIMARY YET
412 | ... Put/Gets done ...
413 | ... Passed
414 | Test: Repeated failures/restarts with concurrent updates to same key; unreliable ...
415 | ForwardTest: SENDER IS NOT CURRENT PRIMARY
416 | ForwardTest: SENDER IS NOT CURRENT PRIMARY
417 | ... Appends done ...
418 | ... Passed
419 | Test: Old primary does not serve Gets ...
420 | GetTest: NOT THE PRIMARY YET
421 | ... Passed
422 | Test: Partitioned old primary does not complete Gets ...
423 | GetTest: NOT THE PRIMARY YET
424 | ... Passed
425 | PASS
426 | ok pbservice 111.108s
427 |
--------------------------------------------------------------------------------