├── .DS_Store
├── src
    ├── .DS_Store
    ├── .gitignore
    ├── main
    │   ├── mr-testout.txt
    │   ├── test-wc.sh
    │   ├── viewd.go
    │   ├── pbd.go
    │   ├── test-ii.sh
    │   ├── test-mr.sh
    │   ├── lockc.go
    │   ├── lockd.go
    │   ├── cli.go
    │   ├── pbc.go
    │   ├── ii.go
    │   ├── wc.go
    │   ├── diskvd.go
    │   └── mr-challenge.txt
    ├── raft
    │   ├── util.go
    │   ├── persister.go
    │   └── config.go
    ├── raftkv
    │   ├── common.go
    │   ├── cmd.go
    │   ├── client.go
    │   ├── server.go
    │   ├── cmd_config.go
    │   ├── config.go
    │   └── test_test.go
    ├── shardkv
    │   ├── common.go
    │   ├── server.go
    │   ├── client.go
    │   ├── config.go
    │   └── test_test.go
    ├── mapreduce
    │   ├── common.go
    │   ├── master_splitmerge.go
    │   ├── master_rpc.go
    │   ├── common_reduce.go
    │   ├── schedule.go
    │   ├── common_rpc.go
    │   ├── common_map.go
    │   ├── worker.go
    │   ├── test_test.go
    │   └── master.go
    ├── shardmaster
    │   ├── server.go
    │   ├── common.go
    │   ├── client.go
    │   ├── config.go
    │   └── test_test.go
    └── labrpc
    │   ├── test_test.go
    │   └── labrpc.go
├── img
    ├── test_raft.png
    └── test_raftkv.png
├── test_6824.sh
├── LICENSE
├── README.md
└── PROPOSAL.md


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hoanhan101/hstore/HEAD/.DS_Store


--------------------------------------------------------------------------------
/src/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hoanhan101/hstore/HEAD/src/.DS_Store


--------------------------------------------------------------------------------
/img/test_raft.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hoanhan101/hstore/HEAD/img/test_raft.png


--------------------------------------------------------------------------------
/img/test_raftkv.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hoanhan101/hstore/HEAD/img/test_raftkv.png


--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
1 | *.*/
2 | mrtmp.*
3 | 824-mrinput-*.txt
4 | /main/diff.out
5 | /mapreduce/x.txt
6 | /pbservice/x.txt
7 | /kvpaxos/x.txt
8 | 


--------------------------------------------------------------------------------
/src/main/mr-testout.txt:
--------------------------------------------------------------------------------
 1 | he: 34077
 2 | was: 37044
 3 | that: 37495
 4 | I: 44502
 5 | in: 46092
 6 | a: 60558
 7 | to: 74357
 8 | of: 79727
 9 | and: 93990
10 | the: 154024
11 | 


--------------------------------------------------------------------------------
/src/raft/util.go:
--------------------------------------------------------------------------------
 1 | package raft
 2 | 
 3 | import "log"
 4 | 
 5 | // Debug option
 6 | const Debug = 0
 7 | 
 8 | // DPrintf prints debugging options
 9 | func DPrintf(format string, a ...interface{}) (n int, err error) {
10 | 	if Debug > 0 {
11 | 		log.Printf(format, a...)
12 | 	}
13 | 	return
14 | }
15 | 


--------------------------------------------------------------------------------
/src/main/test-wc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | go run wc.go master sequential pg-*.txt
 3 | sort -n -k2 mrtmp.wcseq | tail -10 | diff - mr-testout.txt > diff.out
 4 | if [ -s diff.out ]
 5 | then
 6 | echo "Failed test. Output should be as in mr-testout.txt. Your output differs as follows (from diff.out):" > /dev/stderr
 7 |   cat diff.out
 8 | else
 9 |   echo "Passed test" > /dev/stderr
10 | fi
11 | rm mrtmp.*
12 | 


--------------------------------------------------------------------------------
/src/main/viewd.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // see directions in pbc.go
 5 | //
 6 | 
 7 | import "time"
 8 | import "viewservice"
 9 | import "os"
10 | import "fmt"
11 | 
12 | func main() {
13 | 	if len(os.Args) != 2 {
14 | 		fmt.Printf("Usage: viewd port\n")
15 | 		os.Exit(1)
16 | 	}
17 | 
18 | 	viewservice.StartServer(os.Args[1])
19 | 
20 | 	for {
21 | 		time.Sleep(100 * time.Second)
22 | 	}
23 | }
24 | 


--------------------------------------------------------------------------------
/src/main/pbd.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // see directions in pbc.go
 5 | //
 6 | 
 7 | import "time"
 8 | import "pbservice"
 9 | import "os"
10 | import "fmt"
11 | 
12 | func main() {
13 | 	if len(os.Args) != 3 {
14 | 		fmt.Printf("Usage: pbd viewport myport\n")
15 | 		os.Exit(1)
16 | 	}
17 | 
18 | 	pbservice.StartServer(os.Args[1], os.Args[2])
19 | 
20 | 	for {
21 | 		time.Sleep(100 * time.Second)
22 | 	}
23 | }
24 | 


--------------------------------------------------------------------------------
/src/main/test-ii.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | go run ii.go master sequential pg-*.txt
 3 | sort -k1,1 mrtmp.iiseq | sort -snk2,2 | grep -v '16' | tail -10 | diff - mr-challenge.txt > diff.out
 4 | if [ -s diff.out ]
 5 | then
 6 | echo "Failed test. Output should be as in mr-challenge.txt. Your output differs as follows (from diff.out):" > /dev/stderr
 7 |   cat diff.out
 8 | else
 9 |   echo "Passed test" > /dev/stderr
10 | fi
11 | 
12 | 


--------------------------------------------------------------------------------
/src/main/test-mr.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | here=$(dirname "$0")
 3 | [[ "$here" = /* ]] || here="$PWD/$here"
 4 | export GOPATH="$here/../../"
 5 | echo ""
 6 | echo "==> Part I"
 7 | go test -run Sequential mapreduce/...
 8 | echo ""
 9 | echo "==> Part II"
10 | (cd "$here" && sh ./test-wc.sh > /dev/null)
11 | echo ""
12 | echo "==> Part III"
13 | go test -run TestBasic mapreduce/...
14 | echo ""
15 | echo "==> Part IV"
16 | go test -run Failure mapreduce/...
17 | echo ""
18 | echo "==> Part V (challenge)"
19 | (cd "$here" && sh ./test-ii.sh > /dev/null)
20 | 
21 | rm "$here"/mrtmp.* "$here"/diff.out
22 | 


--------------------------------------------------------------------------------
/test_6824.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | #
 4 | # test_6824.sh - Run MIT's 6.824 tests for a specific package
 5 | # Author: Hoanh An (hoanhan@bennington.edu)
 6 | # Date: 04/15/18
 7 | #
 8 | # Usage:
 9 | #   ./test_6824.sh <package_name> [<test_name>]
10 | #
11 | # Example:
12 | #   ./test_6824.sh kvraft: runs all tests for kvraft package
13 | #   ./test_6824.sh raft TestInitialElection2A: runs only initial election test for raft
14 | #
15 | 
16 | # Correct GOPATH
17 | export "GOPATH=$PWD"
18 | cd "$GOPATH/src/$1"
19 | 
20 | # Execute test
21 | if [ -z  "$2" ]; then
22 |     go test
23 | else
24 |     go test -run $2
25 | fi
26 | 


--------------------------------------------------------------------------------
/src/main/lockc.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // see comments in lockd.go
 5 | //
 6 | 
 7 | import "lockservice"
 8 | import "os"
 9 | import "fmt"
10 | 
11 | func usage() {
12 | 	fmt.Printf("Usage: lockc -l|-u primaryport backupport lockname\n")
13 | 	os.Exit(1)
14 | }
15 | 
16 | func main() {
17 | 	if len(os.Args) == 5 {
18 | 		ck := lockservice.MakeClerk(os.Args[2], os.Args[3])
19 | 		var ok bool
20 | 		if os.Args[1] == "-l" {
21 | 			ok = ck.Lock(os.Args[4])
22 | 		} else if os.Args[1] == "-u" {
23 | 			ok = ck.Unlock(os.Args[4])
24 | 		} else {
25 | 			usage()
26 | 		}
27 | 		fmt.Printf("reply: %v\n", ok)
28 | 	} else {
29 | 		usage()
30 | 	}
31 | }
32 | 


--------------------------------------------------------------------------------
/src/main/lockd.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | // export GOPATH=~/6.824
 4 | // go build lockd.go
 5 | // go build lockc.go
 6 | // ./lockd -p a b &
 7 | // ./lockd -b a b &
 8 | // ./lockc -l a b lx
 9 | // ./lockc -u a b lx
10 | //
11 | // on Athena, use /tmp/myname-a and /tmp/myname-b
12 | // instead of a and b.
13 | 
14 | import "time"
15 | import "lockservice"
16 | import "os"
17 | import "fmt"
18 | 
19 | func main() {
20 | 	if len(os.Args) == 4 && os.Args[1] == "-p" {
21 | 		lockservice.StartServer(os.Args[2], os.Args[3], true)
22 | 	} else if len(os.Args) == 4 && os.Args[1] == "-b" {
23 | 		lockservice.StartServer(os.Args[2], os.Args[3], false)
24 | 	} else {
25 | 		fmt.Printf("Usage: lockd -p|-b primaryport backupport\n")
26 | 		os.Exit(1)
27 | 	}
28 | 	for {
29 | 		time.Sleep(100 * time.Second)
30 | 	}
31 | }
32 | 


--------------------------------------------------------------------------------
/src/raftkv/common.go:
--------------------------------------------------------------------------------
 1 | package raftkv
 2 | 
 3 | // OK and ErrNoKey constants
 4 | const (
 5 | 	OK       = "OK"
 6 | 	ErrNoKey = "ErrNoKey"
 7 | )
 8 | 
 9 | // Err string type
10 | type Err string
11 | 
12 | // PutAppendArgs structure for Put or Append Argument
13 | type PutAppendArgs struct {
14 | 	Key       string
15 | 	Value     string
16 | 	Op        string
17 | 	ClientID  int64
18 | 	RequestID int
19 | }
20 | 
21 | // PutAppendReply structure for Put or Append Reply
22 | type PutAppendReply struct {
23 | 	WrongLeader bool
24 | 	Err         Err
25 | }
26 | 
27 | // GetArgs structure for Get Argument
28 | type GetArgs struct {
29 | 	Key       string
30 | 	ClientID  int64
31 | 	RequestID int
32 | }
33 | 
34 | // GetReply structure for Get Reply
35 | type GetReply struct {
36 | 	WrongLeader bool
37 | 	Err         Err
38 | 	Value       string
39 | }
40 | 


--------------------------------------------------------------------------------
/src/main/cli.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"bufio"
 5 | 	"fmt"
 6 | 	"kvraft"
 7 | 	"os"
 8 | 	"strings"
 9 | )
10 | 
11 | func main() {
12 | 	user := raftkv.Command{}
13 | 
14 | 	fmt.Printf("Enter number of servers: ")
15 | 	var nservers int
16 | 	fmt.Scan(&nservers)
17 | 
18 | 	user.Setup(nservers)
19 | 	user.Put("foo", "bar")
20 | 	user.Get("foo")
21 | 
22 | 	scanner := bufio.NewScanner(os.Stdin)
23 | 	for scanner.Scan() {
24 | 		rawString := strings.Split(scanner.Text(), " ")
25 | 
26 | 		if rawString[0] == "GET" {
27 | 			user.Get(rawString[1])
28 | 		} else if rawString[0] == "PUT" {
29 | 			if len(rawString) != 3 || rawString[2] == "" {
30 | 				fmt.Println("Cannot PUT empty value")
31 | 				continue
32 | 			} else {
33 | 				user.Put(rawString[1], rawString[2])
34 | 			}
35 | 		} else if rawString[0] == "APPEND" {
36 | 			user.Append(rawString[1], rawString[2])
37 | 		} else {
38 | 			fmt.Println("Not supported method")
39 | 		}
40 | 	}
41 | }
42 | 


--------------------------------------------------------------------------------
/src/raftkv/cmd.go:
--------------------------------------------------------------------------------
 1 | package raftkv
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | )
 6 | 
 7 | // Command structure
 8 | type Command struct {
 9 | 	clerk *Clerk
10 | }
11 | 
12 | // Setup without log compaction
13 | func (cmd *Command) Setup(nservers int) *Clerk {
14 | 	cfg := makeCmdConfig(nservers, -1)
15 | 	fmt.Printf("Boot up with %v servers\n", nservers)
16 | 
17 | 	ck := cfg.makeClient(cfg.All())
18 | 	cmd.clerk = ck
19 | 
20 | 	return ck
21 | }
22 | 
23 | // Put a key-value pair
24 | func (cmd *Command) Put(key string, value string) string {
25 | 	cmd.clerk.Put(key, value)
26 | 	fmt.Printf("Put(%v, %v)\n", key, value)
27 | 
28 | 	return key
29 | }
30 | 
31 | // Append a key-value pair
32 | func (cmd *Command) Append(key string, value string) string {
33 | 	cmd.clerk.Append(key, value)
34 | 	fmt.Printf("Append(%v, %v)\n", key, value)
35 | 
36 | 	return key
37 | }
38 | 
39 | // Get a value for a key
40 | func (cmd *Command) Get(key string) string {
41 | 	result := cmd.clerk.Get(key)
42 | 	fmt.Printf("Get(%v) -> %v\n", key, result)
43 | 
44 | 	return result
45 | }
46 | 


--------------------------------------------------------------------------------
/src/main/pbc.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // pbservice client application
 5 | //
 6 | // export GOPATH=~/6.824
 7 | // go build viewd.go
 8 | // go build pbd.go
 9 | // go build pbc.go
10 | // ./viewd /tmp/rtm-v &
11 | // ./pbd /tmp/rtm-v /tmp/rtm-1 &
12 | // ./pbd /tmp/rtm-v /tmp/rtm-2 &
13 | // ./pbc /tmp/rtm-v key1 value1
14 | // ./pbc /tmp/rtm-v key1
15 | //
16 | // change "rtm" to your user name.
17 | // start the pbd programs in separate windows and kill
18 | // and restart them to exercise fault tolerance.
19 | //
20 | 
21 | import "pbservice"
22 | import "os"
23 | import "fmt"
24 | 
25 | func usage() {
26 | 	fmt.Printf("Usage: pbc viewport key\n")
27 | 	fmt.Printf("       pbc viewport key value\n")
28 | 	os.Exit(1)
29 | }
30 | 
31 | func main() {
32 | 	if len(os.Args) == 3 {
33 | 		// get
34 | 		ck := pbservice.MakeClerk(os.Args[1], "")
35 | 		v := ck.Get(os.Args[2])
36 | 		fmt.Printf("%v\n", v)
37 | 	} else if len(os.Args) == 4 {
38 | 		// put
39 | 		ck := pbservice.MakeClerk(os.Args[1], "")
40 | 		ck.Put(os.Args[2], os.Args[3])
41 | 	} else {
42 | 		usage()
43 | 	}
44 | }
45 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Hoanh An
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/shardkv/common.go:
--------------------------------------------------------------------------------
 1 | package shardkv
 2 | 
 3 | //
 4 | // Sharded key/value server.
 5 | // Lots of replica groups, each running op-at-a-time paxos.
 6 | // Shardmaster decides which group serves each shard.
 7 | // Shardmaster may change shard assignment from time to time.
 8 | //
 9 | // You will have to modify these definitions.
10 | //
11 | 
12 | // Constants
13 | const (
14 | 	OK            = "OK"
15 | 	ErrNoKey      = "ErrNoKey"
16 | 	ErrWrongGroup = "ErrWrongGroup"
17 | )
18 | 
19 | // Err string type
20 | type Err string
21 | 
22 | // PutAppendArgs structure
23 | type PutAppendArgs struct {
24 | 	// You'll have to add definitions here.
25 | 	Key   string
26 | 	Value string
27 | 	Op    string // "Put" or "Append"
28 | 	// You'll have to add definitions here.
29 | 	// Field names must start with capital letters,
30 | 	// otherwise RPC will break.
31 | }
32 | 
33 | // PutAppendReply structure
34 | type PutAppendReply struct {
35 | 	WrongLeader bool
36 | 	Err         Err
37 | }
38 | 
39 | // GetArgs structure
40 | type GetArgs struct {
41 | 	Key string
42 | 	// You'll have to add definitions here.
43 | }
44 | 
45 | // GetReply structure
46 | type GetReply struct {
47 | 	WrongLeader bool
48 | 	Err         Err
49 | 	Value       string
50 | }
51 | 


--------------------------------------------------------------------------------
/src/mapreduce/common.go:
--------------------------------------------------------------------------------
 1 | package mapreduce
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"strconv"
 6 | )
 7 | 
 8 | // Debugging enabled?
 9 | const debugEnabled = false
10 | 
11 | // debug() will only print if debugEnabled is true
12 | func debug(format string, a ...interface{}) (n int, err error) {
13 | 	if debugEnabled {
14 | 		n, err = fmt.Printf(format, a...)
15 | 	}
16 | 	return
17 | }
18 | 
19 | // jobPhase indicates whether a task is scheduled as a map or reduce task.
20 | type jobPhase string
21 | 
22 | const (
23 | 	mapPhase    jobPhase = "Map"
24 | 	reducePhase          = "Reduce"
25 | )
26 | 
27 | // KeyValue is a type used to hold the key/value pairs passed to the map and
28 | // reduce functions.
29 | type KeyValue struct {
30 | 	Key   string
31 | 	Value string
32 | }
33 | 
34 | // reduceName constructs the name of the intermediate file which map task
35 | // <mapTask> produces for reduce task <reduceTask>.
36 | func reduceName(jobName string, mapTask int, reduceTask int) string {
37 | 	return "mrtmp." + jobName + "-" + strconv.Itoa(mapTask) + "-" + strconv.Itoa(reduceTask)
38 | }
39 | 
40 | // mergeName constructs the name of the output file of reduce task <reduceTask>
41 | func mergeName(jobName string, reduceTask int) string {
42 | 	return "mrtmp." + jobName + "-res-" + strconv.Itoa(reduceTask)
43 | }
44 | 


--------------------------------------------------------------------------------
/src/main/ii.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import "os"
 4 | import "fmt"
 5 | import "mapreduce"
 6 | 
 7 | // The mapping function is called once for each piece of the input.
 8 | // In this framework, the key is the name of the file that is being processed,
 9 | // and the value is the file's contents. The return value should be a slice of
10 | // key/value pairs, each represented by a mapreduce.KeyValue.
11 | func mapF(document string, value string) (res []mapreduce.KeyValue) {
12 | 	// TODO: you should complete this to do the inverted index challenge
13 | }
14 | 
15 | // The reduce function is called once for each key generated by Map, with a
16 | // list of that key's string value (merged across all inputs). The return value
17 | // should be a single output value for that key.
18 | func reduceF(key string, values []string) string {
19 | 	// TODO: you should complete this to do the inverted index challenge
20 | }
21 | 
22 | // Can be run in 3 ways:
23 | // 1) Sequential (e.g., go run wc.go master sequential x1.txt .. xN.txt)
24 | // 2) Master (e.g., go run wc.go master localhost:7777 x1.txt .. xN.txt)
25 | // 3) Worker (e.g., go run wc.go worker localhost:7777 localhost:7778 &)
26 | func main() {
27 | 	if len(os.Args) < 4 {
28 | 		fmt.Printf("%s: see usage comments in file\n", os.Args[0])
29 | 	} else if os.Args[1] == "master" {
30 | 		var mr *mapreduce.Master
31 | 		if os.Args[2] == "sequential" {
32 | 			mr = mapreduce.Sequential("iiseq", os.Args[3:], 3, mapF, reduceF)
33 | 		} else {
34 | 			mr = mapreduce.Distributed("iiseq", os.Args[3:], 3, os.Args[2])
35 | 		}
36 | 		mr.Wait()
37 | 	} else {
38 | 		mapreduce.RunWorker(os.Args[2], os.Args[3], mapF, reduceF, 100)
39 | 	}
40 | }
41 | 


--------------------------------------------------------------------------------
/src/mapreduce/master_splitmerge.go:
--------------------------------------------------------------------------------
 1 | package mapreduce
 2 | 
 3 | import (
 4 | 	"bufio"
 5 | 	"encoding/json"
 6 | 	"fmt"
 7 | 	"log"
 8 | 	"os"
 9 | 	"sort"
10 | )
11 | 
12 | // merge combines the results of the many reduce jobs into a single output file
13 | // XXX use merge sort
14 | func (mr *Master) merge() {
15 | 	debug("Merge phase")
16 | 	kvs := make(map[string]string)
17 | 	for i := 0; i < mr.nReduce; i++ {
18 | 		p := mergeName(mr.jobName, i)
19 | 		fmt.Printf("Merge: read %s\n", p)
20 | 		file, err := os.Open(p)
21 | 		if err != nil {
22 | 			log.Fatal("Merge: ", err)
23 | 		}
24 | 		dec := json.NewDecoder(file)
25 | 		for {
26 | 			var kv KeyValue
27 | 			err = dec.Decode(&kv)
28 | 			if err != nil {
29 | 				break
30 | 			}
31 | 			kvs[kv.Key] = kv.Value
32 | 		}
33 | 		file.Close()
34 | 	}
35 | 	var keys []string
36 | 	for k := range kvs {
37 | 		keys = append(keys, k)
38 | 	}
39 | 	sort.Strings(keys)
40 | 
41 | 	file, err := os.Create("mrtmp." + mr.jobName)
42 | 	if err != nil {
43 | 		log.Fatal("Merge: create ", err)
44 | 	}
45 | 	w := bufio.NewWriter(file)
46 | 	for _, k := range keys {
47 | 		fmt.Fprintf(w, "%s: %s\n", k, kvs[k])
48 | 	}
49 | 	w.Flush()
50 | 	file.Close()
51 | }
52 | 
53 | // removeFile is a simple wrapper around os.Remove that logs errors.
54 | func removeFile(n string) {
55 | 	err := os.Remove(n)
56 | 	if err != nil {
57 | 		log.Fatal("CleanupFiles ", err)
58 | 	}
59 | }
60 | 
61 | // CleanupFiles removes all intermediate files produced by running mapreduce.
62 | func (mr *Master) CleanupFiles() {
63 | 	for i := range mr.files {
64 | 		for j := 0; j < mr.nReduce; j++ {
65 | 			removeFile(reduceName(mr.jobName, i, j))
66 | 		}
67 | 	}
68 | 	for i := 0; i < mr.nReduce; i++ {
69 | 		removeFile(mergeName(mr.jobName, i))
70 | 	}
71 | 	removeFile("mrtmp." + mr.jobName)
72 | }
73 | 


--------------------------------------------------------------------------------
/src/mapreduce/master_rpc.go:
--------------------------------------------------------------------------------
 1 | package mapreduce
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"log"
 6 | 	"net"
 7 | 	"net/rpc"
 8 | 	"os"
 9 | )
10 | 
11 | // Shutdown is an RPC method that shuts down the Master's RPC server.
12 | func (mr *Master) Shutdown(_, _ *struct{}) error {
13 | 	debug("Shutdown: registration server\n")
14 | 	close(mr.shutdown)
15 | 	mr.l.Close() // causes the Accept to fail
16 | 	return nil
17 | }
18 | 
19 | // startRPCServer starts the Master's RPC server. It continues accepting RPC
20 | // calls (Register in particular) for as long as the worker is alive.
21 | func (mr *Master) startRPCServer() {
22 | 	rpcs := rpc.NewServer()
23 | 	rpcs.Register(mr)
24 | 	os.Remove(mr.address) // only needed for "unix"
25 | 	l, e := net.Listen("unix", mr.address)
26 | 	if e != nil {
27 | 		log.Fatal("RegstrationServer", mr.address, " error: ", e)
28 | 	}
29 | 	mr.l = l
30 | 
31 | 	// now that we are listening on the master address, can fork off
32 | 	// accepting connections to another thread.
33 | 	go func() {
34 | 	loop:
35 | 		for {
36 | 			select {
37 | 			case <-mr.shutdown:
38 | 				break loop
39 | 			default:
40 | 			}
41 | 			conn, err := mr.l.Accept()
42 | 			if err == nil {
43 | 				go func() {
44 | 					rpcs.ServeConn(conn)
45 | 					conn.Close()
46 | 				}()
47 | 			} else {
48 | 				debug("RegistrationServer: accept error", err)
49 | 				break
50 | 			}
51 | 		}
52 | 		debug("RegistrationServer: done\n")
53 | 	}()
54 | }
55 | 
56 | // stopRPCServer stops the master RPC server.
57 | // This must be done through an RPC to avoid race conditions between the RPC
58 | // server thread and the current thread.
59 | func (mr *Master) stopRPCServer() {
60 | 	var reply ShutdownReply
61 | 	ok := call(mr.address, "Master.Shutdown", new(struct{}), &reply)
62 | 	if ok == false {
63 | 		fmt.Printf("Cleanup: RPC %s error\n", mr.address)
64 | 	}
65 | 	debug("cleanupRegistration: done\n")
66 | }
67 | 


--------------------------------------------------------------------------------
/src/main/wc.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"mapreduce"
 6 | 	"os"
 7 | 	"strconv"
 8 | 	"strings"
 9 | 	"unicode"
10 | )
11 | 
12 | //
13 | // The map function is called once for each file of input. The first
14 | // argument is the name of the input file, and the second is the
15 | // file's complete contents. You should ignore the input file name,
16 | // and look only at the contents argument. The return value is a slice
17 | // of key/value pairs.
18 | //
19 | func mapF(filename string, contents string) []mapreduce.KeyValue {
20 | 	// This is the condition for splitting string of contents
21 | 	f := func(r rune) bool {
22 | 		return !unicode.IsLetter(r)
23 | 	}
24 | 
25 | 	// Split the string s at each run of Unicode code points
26 | 	words := strings.FieldsFunc(contents, f)
27 | 
28 | 	// Create a temporary slice that holds key-value pairs
29 | 	kvs := make([]mapreduce.KeyValue, 1)
30 | 	for _, word := range words {
31 | 		kvs = append(kvs, mapreduce.KeyValue{word, "1"})
32 | 	}
33 | 	return kvs
34 | }
35 | 
36 | //
37 | // The reduce function is called once for each key generated by the
38 | // map tasks, with a list of all the values created for that key by
39 | // any map task.
40 | //
41 | func reduceF(key string, values []string) string {
42 | 	return strconv.Itoa(len(values))
43 | }
44 | 
45 | // Can be run in 3 ways:
46 | // 1) Sequential (e.g., go run wc.go master sequential x1.txt .. xN.txt)
47 | // 2) Master (e.g., go run wc.go master localhost:7777 x1.txt .. xN.txt)
48 | // 3) Worker (e.g., go run wc.go worker localhost:7777 localhost:7778 &)
49 | func main() {
50 | 	if len(os.Args) < 4 {
51 | 		fmt.Printf("%s: see usage comments in file\n", os.Args[0])
52 | 	} else if os.Args[1] == "master" {
53 | 		var mr *mapreduce.Master
54 | 		if os.Args[2] == "sequential" {
55 | 			mr = mapreduce.Sequential("wcseq", os.Args[3:], 3, mapF, reduceF)
56 | 		} else {
57 | 			mr = mapreduce.Distributed("wcseq", os.Args[3:], 3, os.Args[2])
58 | 		}
59 | 		mr.Wait()
60 | 	} else {
61 | 		mapreduce.RunWorker(os.Args[2], os.Args[3], mapF, reduceF, 100)
62 | 	}
63 | }
64 | 


--------------------------------------------------------------------------------
/src/main/diskvd.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // start a diskvd server. it's a member of some replica
 5 | // group, which has other members, and it needs to know
 6 | // how to talk to the members of the shardmaster service.
 7 | // used by ../diskv/test_test.go
 8 | //
 9 | // arguments:
10 | //   -g groupid
11 | //   -m masterport1 -m masterport2 ...
12 | //   -s replicaport1 -s replicaport2 ...
13 | //   -i my-index-in-server-port-list
14 | //   -u unreliable
15 | //   -d directory
16 | //   -r restart
17 | 
18 | import "time"
19 | import "diskv"
20 | import "os"
21 | import "fmt"
22 | import "strconv"
23 | import "runtime"
24 | 
25 | func usage() {
26 | 	fmt.Printf("Usage: diskvd -g gid -m master... -s server... -i my-index -d dir\n")
27 | 	os.Exit(1)
28 | }
29 | 
30 | func main() {
31 | 	var gid int64 = -1     // my replica group ID
32 | 	masters := []string{}  // ports of shardmasters
33 | 	replicas := []string{} // ports of servers in my replica group
34 | 	me := -1               // my index in replicas[]
35 | 	unreliable := false
36 | 	dir := "" // store persistent data here
37 | 	restart := false
38 | 
39 | 	for i := 1; i+1 < len(os.Args); i += 2 {
40 | 		a0 := os.Args[i]
41 | 		a1 := os.Args[i+1]
42 | 		if a0 == "-g" {
43 | 			gid, _ = strconv.ParseInt(a1, 10, 64)
44 | 		} else if a0 == "-m" {
45 | 			masters = append(masters, a1)
46 | 		} else if a0 == "-s" {
47 | 			replicas = append(replicas, a1)
48 | 		} else if a0 == "-i" {
49 | 			me, _ = strconv.Atoi(a1)
50 | 		} else if a0 == "-u" {
51 | 			unreliable, _ = strconv.ParseBool(a1)
52 | 		} else if a0 == "-d" {
53 | 			dir = a1
54 | 		} else if a0 == "-r" {
55 | 			restart, _ = strconv.ParseBool(a1)
56 | 		} else {
57 | 			usage()
58 | 		}
59 | 	}
60 | 
61 | 	if gid < 0 || me < 0 || len(masters) < 1 || me >= len(replicas) || dir == "" {
62 | 		usage()
63 | 	}
64 | 
65 | 	runtime.GOMAXPROCS(4)
66 | 
67 | 	srv := diskv.StartServer(gid, masters, replicas, me, dir, restart)
68 | 	srv.Setunreliable(unreliable)
69 | 
70 | 	// for safety, force quit after 10 minutes.
71 | 	time.Sleep(10 * 60 * time.Second)
72 | 	mep, _ := os.FindProcess(os.Getpid())
73 | 	mep.Kill()
74 | }
75 | 


--------------------------------------------------------------------------------
/src/shardmaster/server.go:
--------------------------------------------------------------------------------
 1 | package shardmaster
 2 | 
 3 | import "raft"
 4 | import "labrpc"
 5 | import "sync"
 6 | import "encoding/gob"
 7 | 
 8 | // ShardMaster structure
 9 | type ShardMaster struct {
10 | 	mu      sync.Mutex
11 | 	me      int
12 | 	rf      *raft.Raft
13 | 	applyCh chan raft.ApplyMsg
14 | 
15 | 	// Your data here.
16 | 
17 | 	configs []Config // indexed by config num
18 | }
19 | 
20 | // Op structure
21 | type Op struct {
22 | 	// Your data here.
23 | }
24 | 
25 | // Join Group
26 | func (sm *ShardMaster) Join(args *JoinArgs, reply *JoinReply) {
27 | 	// Your code here.
28 | }
29 | 
30 | // Leave Group
31 | func (sm *ShardMaster) Leave(args *LeaveArgs, reply *LeaveReply) {
32 | 	// Your code here.
33 | }
34 | 
35 | // Move Group
36 | func (sm *ShardMaster) Move(args *MoveArgs, reply *MoveReply) {
37 | 	// Your code here.
38 | }
39 | 
40 | // Query Group
41 | func (sm *ShardMaster) Query(args *QueryArgs, reply *QueryReply) {
42 | 	// Your code here.
43 | }
44 | 
45 | // Kill is called by the tester calls when a ShardMaster instance won't
46 | // be needed again. you are not required to do anything
47 | // in Kill(), but it might be convenient to (for example)
48 | // turn off debug output from this instance.
49 | func (sm *ShardMaster) Kill() {
50 | 	sm.rf.Kill()
51 | 	// Your code here, if desired.
52 | }
53 | 
54 | // Raft is needed by shardkv tester
55 | func (sm *ShardMaster) Raft() *raft.Raft {
56 | 	return sm.rf
57 | }
58 | 
59 | // StartServer initializes a ShardMaster server
60 | // servers[] contains the ports of the set of
61 | // servers that will cooperate via Paxos to
62 | // form the fault-tolerant shardmaster service.
63 | // me is the index of the current server in servers[].
64 | func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister) *ShardMaster {
65 | 	sm := new(ShardMaster)
66 | 	sm.me = me
67 | 
68 | 	sm.configs = make([]Config, 1)
69 | 	sm.configs[0].Groups = map[int][]string{}
70 | 
71 | 	gob.Register(Op{})
72 | 	sm.applyCh = make(chan raft.ApplyMsg)
73 | 	sm.rf = raft.Make(servers, me, persister, sm.applyCh)
74 | 
75 | 	// Your code here.
76 | 
77 | 	return sm
78 | }
79 | 


--------------------------------------------------------------------------------
/src/mapreduce/common_reduce.go:
--------------------------------------------------------------------------------
 1 | package mapreduce
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 	"log"
 6 | 	"os"
 7 | 	"sort"
 8 | )
 9 | 
10 | // doReduce manages one reduce task: it should read the intermediate
11 | // files for the task, sort the intermediate key/value pairs by key,
12 | // call the user-defined reduce function (reduceF) for each key, and
13 | // write reduceF's output to disk.
14 | func doReduce(
15 | 	jobName string, // the name of the whole MapReduce job
16 | 	reduceTask int, // which reduce task this is
17 | 	outputFile string, // write the output here
18 | 	nMap int, // the number of map tasks that were run ("M" in the paper)
19 | 	reduceF func(key string, values []string) string,
20 | ) {
21 | 	// Make a slice that hold nMap number of Decoders
22 | 	var decoders = make([]*json.Decoder, nMap)
23 | 
24 | 	// Open an intermediate file for each map task and give it a Decoder.
25 | 	for i := 0; i < nMap; i++ {
26 | 		// reduceName(jobName, m, reduceTask) yields the file name from map task m.
27 | 		fileName := reduceName(jobName, i, reduceTask)
28 | 
29 | 		fd, err := os.OpenFile(fileName, os.O_RDONLY, 0600)
30 | 		defer fd.Close()
31 | 		if err != nil {
32 | 			log.Fatal(err)
33 | 			return
34 | 		}
35 | 
36 | 		decoders[i] = json.NewDecoder(fd)
37 | 	}
38 | 
39 | 	// Unmarshal all intermediate files and collate key-values
40 | 	kvs := make(map[string][]string)
41 | 	for i := 0; i < nMap; i++ {
42 | 		var kv *KeyValue
43 | 		for {
44 | 			err := decoders[i].Decode(&kv)
45 | 			if err != nil {
46 | 				break
47 | 			}
48 | 			kvs[kv.Key] = append(kvs[kv.Key], kv.Value)
49 | 		}
50 | 	}
51 | 
52 | 	// Sort the intermediate key/value pairs by key,
53 | 	var keys []string
54 | 	for k := range kvs {
55 | 		keys = append(keys, k)
56 | 	}
57 | 	sort.Strings(keys)
58 | 
59 | 	// Create an output file
60 | 	fd, err := os.OpenFile(outputFile, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0600)
61 | 	defer fd.Close()
62 | 	if err != nil {
63 | 		log.Fatal(err)
64 | 		return
65 | 	}
66 | 
67 | 	// Call the reduce function (reduceF) for each key and write its output to disk
68 | 	encoder := json.NewEncoder(fd)
69 | 	for _, key := range keys {
70 | 		encoder.Encode(KeyValue{key, reduceF(key, kvs[key])})
71 | 	}
72 | }
73 | 


--------------------------------------------------------------------------------
/src/shardmaster/common.go:
--------------------------------------------------------------------------------
 1 | package shardmaster
 2 | 
 3 | //
 4 | // Master shard server: assigns shards to replication groups.
 5 | //
 6 | // RPC interface:
 7 | // Join(servers) -- add a set of groups (gid -> server-list mapping).
 8 | // Leave(gids) -- delete a set of groups.
 9 | // Move(shard, gid) -- hand off one shard from current owner to gid.
10 | // Query(num) -> fetch Config # num, or latest config if num==-1.
11 | //
12 | // A Config (configuration) describes a set of replica groups, and the
13 | // replica group responsible for each shard. Configs are numbered. Config
14 | // #0 is the initial configuration, with no groups and all shards
15 | // assigned to group 0 (the invalid group).
16 | //
17 | // A GID is a replica group ID. GIDs must be uniqe and > 0.
18 | // Once a GID joins, and leaves, it should never join again.
19 | //
20 | // You will need to add fields to the RPC arguments.
21 | //
22 | 
23 | // NShards is the number of shards.
24 | const NShards = 10
25 | 
26 | // Config structure: a configuration -- an assignment of shards to groups.
27 | // Please don't change this.
28 | type Config struct {
29 | 	Num    int              // config number
30 | 	Shards [NShards]int     // shard -> gid
31 | 	Groups map[int][]string // gid -> servers[]
32 | }
33 | 
34 | // Constant
35 | const (
36 | 	OK = "OK"
37 | )
38 | 
39 | // Err string type
40 | type Err string
41 | 
42 | // JoinArgs structure
43 | type JoinArgs struct {
44 | 	Servers map[int][]string // new GID -> servers mappings
45 | }
46 | 
47 | // JoinReply structure
48 | type JoinReply struct {
49 | 	WrongLeader bool
50 | 	Err         Err
51 | }
52 | 
53 | // LeaveArgs structure
54 | type LeaveArgs struct {
55 | 	GIDs []int
56 | }
57 | 
58 | // LeaveReply structure
59 | type LeaveReply struct {
60 | 	WrongLeader bool
61 | 	Err         Err
62 | }
63 | 
64 | // MoveArgs structure
65 | type MoveArgs struct {
66 | 	Shard int
67 | 	GID   int
68 | }
69 | 
70 | // MoveReply structure
71 | type MoveReply struct {
72 | 	WrongLeader bool
73 | 	Err         Err
74 | }
75 | 
76 | // QueryArgs structure
77 | type QueryArgs struct {
78 | 	Num int // desired config number
79 | }
80 | 
81 | // QueryReply structure
82 | type QueryReply struct {
83 | 	WrongLeader bool
84 | 	Err         Err
85 | 	Config      Config
86 | }
87 | 


--------------------------------------------------------------------------------
/src/mapreduce/schedule.go:
--------------------------------------------------------------------------------
 1 | package mapreduce
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"sync"
 6 | )
 7 | 
 8 | //
 9 | // schedule() starts and waits for all tasks in the given phase (mapPhase
10 | // or reducePhase). the mapFiles argument holds the names of the files that
11 | // are the inputs to the map phase, one per map task. nReduce is the
12 | // number of reduce tasks. the registerChan argument yields a stream
13 | // of registered workers; each item is the worker's RPC address,
14 | // suitable for passing to call(). registerChan will yield all
15 | // existing registered workers (if any) and new ones as they register.
16 | //
17 | func schedule(
18 | 	jobName string,
19 | 	mapFiles []string,
20 | 	nReduce int,
21 | 	phase jobPhase,
22 | 	registerChan chan string,
23 | ) {
24 | 	var ntasks int
25 | 	var nOther int // number of inputs (for reduce) or outputs (for map)
26 | 	switch phase {
27 | 	case mapPhase:
28 | 		ntasks = len(mapFiles)
29 | 		nOther = nReduce
30 | 	case reducePhase:
31 | 		ntasks = nReduce
32 | 		nOther = len(mapFiles)
33 | 	}
34 | 
35 | 	fmt.Printf("Schedule: %v %v tasks (%d I/Os)\n", ntasks, phase, nOther)
36 | 
37 | 	// Make 10 channels of workers that receive string
38 | 	workers := make(chan string, 10)
39 | 	done := make(chan bool)
40 | 
41 | 	// Define a WaitGroup
42 | 	var wg sync.WaitGroup
43 | 
44 | 	// Send informations to channels concurrently
45 | 	go func() {
46 | 		for {
47 | 			select {
48 | 			case work := <-registerChan:
49 | 				workers <- work
50 | 			case <-done:
51 | 				break
52 | 			}
53 | 		}
54 | 	}()
55 | 
56 | 	// Schedule tasks to free workers
57 | 	for i := 0; i < ntasks; i++ {
58 | 		select {
59 | 		case work := <-workers:
60 | 			doTaskArgs := DoTaskArgs{jobName, mapFiles[i], phase, i, nOther}
61 | 			wg.Add(1)
62 | 			var taskFunc func(string)
63 | 
64 | 			// DoTask on each worker.
65 | 			// If worker fails, reassign to other free worker.
66 | 			taskFunc = func(work string) {
67 | 				if call(work, "Worker.DoTask", doTaskArgs, nil) {
68 | 					workers <- work
69 | 					wg.Done()
70 | 				} else {
71 | 					taskFunc(<-workers)
72 | 				}
73 | 			}
74 | 			go taskFunc(work)
75 | 		}
76 | 	}
77 | 
78 | 	// Wait for all tasks to completed then return
79 | 	wg.Wait()
80 | 	done <- true
81 | 	fmt.Printf("Schedule: %v phase done\n", phase)
82 | }
83 | 


--------------------------------------------------------------------------------
/src/mapreduce/common_rpc.go:
--------------------------------------------------------------------------------
 1 | package mapreduce
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"net/rpc"
 6 | )
 7 | 
 8 | // What follows are RPC types and methods.
 9 | // Field names must start with capital letters, otherwise RPC will break.
10 | 
11 | // DoTaskArgs holds the arguments that are passed to a worker when a job is
12 | // scheduled on it.
13 | type DoTaskArgs struct {
14 | 	JobName    string
15 | 	File       string   // only for map, the input file
16 | 	Phase      jobPhase // are we in mapPhase or reducePhase?
17 | 	TaskNumber int      // this task's index in the current phase
18 | 
19 | 	// NumOtherPhase is the total number of tasks in other phase; mappers
20 | 	// need this to compute the number of output bins, and reducers needs
21 | 	// this to know how many input files to collect.
22 | 	NumOtherPhase int
23 | }
24 | 
25 | // ShutdownReply is the response to a WorkerShutdown.
26 | // It holds the number of tasks this worker has processed since it was started.
27 | type ShutdownReply struct {
28 | 	Ntasks int
29 | }
30 | 
31 | // RegisterArgs is the argument passed when a worker registers with the master.
32 | type RegisterArgs struct {
33 | 	Worker string // the worker's UNIX-domain socket name, i.e. its RPC address
34 | }
35 | 
36 | // call() sends an RPC to the rpcname handler on server srv
37 | // with arguments args, waits for the reply, and leaves the
38 | // reply in reply. the reply argument should be the address
39 | // of a reply structure.
40 | //
41 | // call() returns true if the server responded, and false
42 | // if call() was not able to contact the server. in particular,
43 | // reply's contents are valid if and only if call() returned true.
44 | //
45 | // you should assume that call() will time out and return an
46 | // error after a while if it doesn't get a reply from the server.
47 | //
48 | // please use call() to send all RPCs, in master.go, mapreduce.go,
49 | // and worker.go.  please don't change this function.
50 | //
51 | func call(srv string, rpcname string,
52 | 	args interface{}, reply interface{}) bool {
53 | 	c, errx := rpc.Dial("unix", srv)
54 | 	if errx != nil {
55 | 		return false
56 | 	}
57 | 	defer c.Close()
58 | 
59 | 	err := c.Call(rpcname, args, reply)
60 | 	if err == nil {
61 | 		return true
62 | 	}
63 | 
64 | 	fmt.Println(err)
65 | 	return false
66 | }
67 | 


--------------------------------------------------------------------------------
/src/raft/persister.go:
--------------------------------------------------------------------------------
 1 | package raft
 2 | 
 3 | //
 4 | // support for Raft and kvraft to save persistent
 5 | // Raft state (log &c) and k/v server snapshots.
 6 | //
 7 | // a “real” implementation would do this by writing Raft's persistent state
 8 | // to disk each time it changes, and reading the latest saved state from disk
 9 | // when restarting after a reboot.
10 | // this implementation won't use the disk; instead, it will save and restore
11 | // persistent state from a Persister object. Whoever calls Raft.Make()
12 | // supplies a Persister that initially holds Raft's most recently persisted state (if any).
13 | // Raft should initialize its state from that Persister, and should use it to
14 | // save its persistent state each time the state changes.
15 | //
16 | // we will use the original persister.go to test your code for grading.
17 | // so, while you can modify this code to help you debug, please
18 | // test with the original before submitting.
19 | //
20 | 
21 | import "sync"
22 | 
23 | // Persister structure
24 | type Persister struct {
25 | 	mu        sync.Mutex
26 | 	raftstate []byte
27 | 	snapshot  []byte
28 | }
29 | 
30 | // MakePersister create a Persister instance
31 | func MakePersister() *Persister {
32 | 	return &Persister{}
33 | }
34 | 
35 | // Copy a Persister
36 | func (ps *Persister) Copy() *Persister {
37 | 	ps.mu.Lock()
38 | 	defer ps.mu.Unlock()
39 | 	np := MakePersister()
40 | 	np.raftstate = ps.raftstate
41 | 	np.snapshot = ps.snapshot
42 | 	return np
43 | }
44 | 
45 | // SaveRaftState save data in a list of byte
46 | func (ps *Persister) SaveRaftState(data []byte) {
47 | 	ps.mu.Lock()
48 | 	defer ps.mu.Unlock()
49 | 	ps.raftstate = data
50 | }
51 | 
52 | // ReadRaftState return a list of byte
53 | func (ps *Persister) ReadRaftState() []byte {
54 | 	ps.mu.Lock()
55 | 	defer ps.mu.Unlock()
56 | 	return ps.raftstate
57 | }
58 | 
59 | // RaftStateSize return state size in int
60 | func (ps *Persister) RaftStateSize() int {
61 | 	ps.mu.Lock()
62 | 	defer ps.mu.Unlock()
63 | 	return len(ps.raftstate)
64 | }
65 | 
66 | // SaveSnapshot save a snapshot data in a list of byte
67 | func (ps *Persister) SaveSnapshot(snapshot []byte) {
68 | 	ps.mu.Lock()
69 | 	defer ps.mu.Unlock()
70 | 	ps.snapshot = snapshot
71 | }
72 | 
73 | // ReadSnapshot read data in list of byte
74 | func (ps *Persister) ReadSnapshot() []byte {
75 | 	ps.mu.Lock()
76 | 	defer ps.mu.Unlock()
77 | 	return ps.snapshot
78 | }
79 | 
80 | // SnapshotSize return the value in int
81 | func (ps *Persister) SnapshotSize() int {
82 | 	ps.mu.Lock()
83 | 	defer ps.mu.Unlock()
84 | 	return len(ps.snapshot)
85 | }
86 | 


--------------------------------------------------------------------------------
/src/shardmaster/client.go:
--------------------------------------------------------------------------------
  1 | package shardmaster
  2 | 
  3 | //
  4 | // Shardmaster clerk.
  5 | //
  6 | 
  7 | import (
  8 | 	"crypto/rand"
  9 | 	"labrpc"
 10 | 	"math/big"
 11 | 	"time"
 12 | )
 13 | 
 14 | // Clerk structure
 15 | type Clerk struct {
 16 | 	servers []*labrpc.ClientEnd
 17 | 	// Your data here.
 18 | }
 19 | 
 20 | // Generate random string
 21 | func nrand() int64 {
 22 | 	max := big.NewInt(int64(1) << 62)
 23 | 	bigx, _ := rand.Int(rand.Reader, max)
 24 | 	x := bigx.Int64()
 25 | 	return x
 26 | }
 27 | 
 28 | // MakeClerk create a Clerk instance
 29 | func MakeClerk(servers []*labrpc.ClientEnd) *Clerk {
 30 | 	ck := new(Clerk)
 31 | 	ck.servers = servers
 32 | 	// Your code here.
 33 | 	return ck
 34 | }
 35 | 
 36 | // Query Group
 37 | func (ck *Clerk) Query(num int) Config {
 38 | 	args := &QueryArgs{}
 39 | 	// Your code here.
 40 | 	args.Num = num
 41 | 	for {
 42 | 		// try each known server.
 43 | 		for _, srv := range ck.servers {
 44 | 			var reply QueryReply
 45 | 			ok := srv.Call("ShardMaster.Query", args, &reply)
 46 | 			if ok && reply.WrongLeader == false {
 47 | 				return reply.Config
 48 | 			}
 49 | 		}
 50 | 		time.Sleep(100 * time.Millisecond)
 51 | 	}
 52 | }
 53 | 
 54 | // Join Group
 55 | func (ck *Clerk) Join(servers map[int][]string) {
 56 | 	args := &JoinArgs{}
 57 | 	// Your code here.
 58 | 	args.Servers = servers
 59 | 
 60 | 	for {
 61 | 		// try each known server.
 62 | 		for _, srv := range ck.servers {
 63 | 			var reply JoinReply
 64 | 			ok := srv.Call("ShardMaster.Join", args, &reply)
 65 | 			if ok && reply.WrongLeader == false {
 66 | 				return
 67 | 			}
 68 | 		}
 69 | 		time.Sleep(100 * time.Millisecond)
 70 | 	}
 71 | }
 72 | 
 73 | // Leave Group
 74 | func (ck *Clerk) Leave(gids []int) {
 75 | 	args := &LeaveArgs{}
 76 | 	// Your code here.
 77 | 	args.GIDs = gids
 78 | 
 79 | 	for {
 80 | 		// try each known server.
 81 | 		for _, srv := range ck.servers {
 82 | 			var reply LeaveReply
 83 | 			ok := srv.Call("ShardMaster.Leave", args, &reply)
 84 | 			if ok && reply.WrongLeader == false {
 85 | 				return
 86 | 			}
 87 | 		}
 88 | 		time.Sleep(100 * time.Millisecond)
 89 | 	}
 90 | }
 91 | 
 92 | // Move Group
 93 | func (ck *Clerk) Move(shard int, gid int) {
 94 | 	args := &MoveArgs{}
 95 | 	// Your code here.
 96 | 	args.Shard = shard
 97 | 	args.GID = gid
 98 | 
 99 | 	for {
100 | 		// try each known server.
101 | 		for _, srv := range ck.servers {
102 | 			var reply MoveReply
103 | 			ok := srv.Call("ShardMaster.Move", args, &reply)
104 | 			if ok && reply.WrongLeader == false {
105 | 				return
106 | 			}
107 | 		}
108 | 		time.Sleep(100 * time.Millisecond)
109 | 	}
110 | }
111 | 


--------------------------------------------------------------------------------
/src/mapreduce/common_map.go:
--------------------------------------------------------------------------------
 1 | package mapreduce
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 	"hash/fnv"
 6 | 	"io/ioutil"
 7 | 	"log"
 8 | 	"os"
 9 | )
10 | 
11 | // doMap manages one map task: it should read one of the input files
12 | // (inFile), call the user-defined map function (mapF) for that file's
13 | // contents, and partition mapF's output into nReduce intermediate files.
14 | func doMap(
15 | 	jobName string, // the name of the MapReduce job
16 | 	mapTask int, // which map task this is
17 | 	inputFile string,
18 | 	nReduce int, // the number of reduce task that will be run ("R" in the paper)
19 | 
20 | 	// mapF() is the map function provided by the application.
21 | 	// The first argument should be the input file name,
22 | 	// though the map function typically ignores it.
23 | 	// The second argument should be the entire input file contents.
24 | 	// mapF() returns a slice containing the key/value pairs for reduce
25 | 	mapF func(filename string, contents string) []KeyValue,
26 | ) {
27 | 	// Read the input file.
28 | 	data, err := ioutil.ReadFile(inputFile)
29 | 	if err != nil {
30 | 		log.Fatal(err)
31 | 		return
32 | 	}
33 | 
34 | 	// Call mapF function for that file's content.
35 | 	// Need to convert data to string because returned data is []uint8
36 | 	kvs := mapF(inputFile, string(data))
37 | 
38 | 	// Do the partition mapF's output into nReduce intermediate files.
39 | 	// Create a slice with nReduce number of Encoders .
40 | 	var encoders = make([]*json.Encoder, nReduce)
41 | 	var fd *os.File
42 | 
43 | 	// Open an intermediate file for each reduce task and give it an Encoder.
44 | 	for i := 0; i < nReduce; i++ {
45 | 		// The file name includes both the map task number and the reduce task number.
46 | 		// Use the filename generated by reduceName(jobName, mapTask, r)
47 | 		// as the intermediate file for reduce task r.
48 | 		fileName := reduceName(jobName, mapTask, i)
49 | 
50 | 		// Create a file if none exists, open it write-only, append while writing.
51 | 		fd, err = os.OpenFile(fileName, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0600)
52 | 		defer fd.Close()
53 | 		if err != nil {
54 | 			log.Fatal(err)
55 | 			return
56 | 		}
57 | 
58 | 		encoders[i] = json.NewEncoder(fd)
59 | 	}
60 | 
61 | 	// For every key-value pair of mapF output,
62 | 	// call ihash() on each key and mod nReduce to pick the immediate file r.
63 | 	// Use that file to encode the key value content
64 | 	for _, kv := range kvs {
65 | 		r := ihash(kv.Key) % nReduce
66 | 		err = encoders[r].Encode(kv)
67 | 		if err != nil {
68 | 			log.Fatal(err)
69 | 			return
70 | 		}
71 | 	}
72 | }
73 | 
74 | func ihash(s string) int {
75 | 	h := fnv.New32a()
76 | 	h.Write([]byte(s))
77 | 	return int(h.Sum32() & 0x7fffffff)
78 | }
79 | 


--------------------------------------------------------------------------------
/src/raftkv/client.go:
--------------------------------------------------------------------------------
  1 | package raftkv
  2 | 
  3 | import (
  4 | 	"crypto/rand"
  5 | 	"labrpc"
  6 | 	"math/big"
  7 | 	"sync"
  8 | )
  9 | 
 10 | // Clerk structure
 11 | type Clerk struct {
 12 | 	servers []*labrpc.ClientEnd
 13 | 
 14 | 	id        int64
 15 | 	requestID int
 16 | 	mu        sync.Mutex
 17 | 	preLeader int
 18 | }
 19 | 
 20 | // nrand generates random int64 number
 21 | func nrand() int64 {
 22 | 	max := big.NewInt(int64(1) << 62)
 23 | 	bigx, _ := rand.Int(rand.Reader, max)
 24 | 	x := bigx.Int64()
 25 | 	return x
 26 | }
 27 | 
 28 | // MakeClerk makes a Clerk instance
 29 | func MakeClerk(servers []*labrpc.ClientEnd) *Clerk {
 30 | 	ck := new(Clerk)
 31 | 	ck.servers = servers
 32 | 
 33 | 	ck.id = nrand()
 34 | 	ck.preLeader = 0
 35 | 	ck.requestID = 0
 36 | 
 37 | 	return ck
 38 | }
 39 | 
 40 | // Get fetches the current value for a key.
 41 | // Returns "" if the key does not exist.
 42 | // Keeps trying forever in the face of all other errors.
 43 | //
 44 | // You can send an RPC with code like this:
 45 | // ok := ck.servers[i].Call("RaftKV.Get", &args, &reply)
 46 | //
 47 | // The types of args and reply (including whether they are pointers)
 48 | // Must match the declared types of the RPC handler function's
 49 | // arguments; reply must be passed as a pointer.
 50 | func (ck *Clerk) Get(key string) string {
 51 | 	ck.mu.Lock()
 52 | 	args := GetArgs{Key: key, ClientID: ck.id}
 53 | 	args.RequestID = ck.requestID
 54 | 	ck.requestID++
 55 | 	ck.mu.Unlock()
 56 | 
 57 | 	for {
 58 | 		reply := GetReply{}
 59 | 		ok := ck.servers[ck.preLeader].Call("RaftKV.Get", &args, &reply)
 60 | 		if ok && reply.WrongLeader == false {
 61 | 			return reply.Value
 62 | 		}
 63 | 		ck.preLeader = (ck.preLeader + 1) % len(ck.servers)
 64 | 	}
 65 | }
 66 | 
 67 | // PutAppend is shared shared by Put and Append.
 68 | //
 69 | // You can send an RPC with code like this:
 70 | // ok := ck.servers[i].Call("RaftKV.PutAppend", &args, &reply)
 71 | //
 72 | // The types of args and reply (including whether they are pointers)
 73 | // Must match the declared types of the RPC handler function's
 74 | // arguments; reply must be passed as a pointer.
 75 | func (ck *Clerk) PutAppend(key string, value string, op string) {
 76 | 	ck.mu.Lock()
 77 | 	args := PutAppendArgs{Key: key, Value: value, Op: op, ClientID: ck.id}
 78 | 	args.RequestID = ck.requestID
 79 | 	ck.requestID++
 80 | 	ck.mu.Unlock()
 81 | 
 82 | 	for {
 83 | 		reply := PutAppendReply{}
 84 | 		ok := ck.servers[ck.preLeader].Call("RaftKV.PutAppend", &args, &reply)
 85 | 		if ok && reply.WrongLeader == false {
 86 | 			return
 87 | 		}
 88 | 		ck.preLeader = (ck.preLeader + 1) % len(ck.servers)
 89 | 	}
 90 | }
 91 | 
 92 | // Put asks to put a key-value pair
 93 | func (ck *Clerk) Put(key string, value string) {
 94 | 	ck.PutAppend(key, value, "Put")
 95 | }
 96 | 
 97 | // Append asks to append a key-value pari
 98 | func (ck *Clerk) Append(key string, value string) {
 99 | 	ck.PutAppend(key, value, "Append")
100 | }
101 | 


--------------------------------------------------------------------------------
/src/main/mr-challenge.txt:
--------------------------------------------------------------------------------
 1 | women: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-metamorphosis.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
 2 | won: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-metamorphosis.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
 3 | wonderful: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
 4 | words: 15 pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-metamorphosis.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
 5 | worked: 15 pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-metamorphosis.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
 6 | worse: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
 7 | wounded: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
 8 | yes: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-metamorphosis.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
 9 | younger: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
10 | yours: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
11 | 


--------------------------------------------------------------------------------
/src/shardkv/server.go:
--------------------------------------------------------------------------------
 1 | package shardkv
 2 | 
 3 | // import "shardmaster"
 4 | import "labrpc"
 5 | import "raft"
 6 | import "sync"
 7 | import "encoding/gob"
 8 | 
 9 | // Op structure
10 | type Op struct {
11 | 	// Your definitions here.
12 | 	// Field names must start with capital letters,
13 | 	// otherwise RPC will break.
14 | }
15 | 
16 | // ShardKV structure
17 | type ShardKV struct {
18 | 	mu           sync.Mutex
19 | 	me           int
20 | 	rf           *raft.Raft
21 | 	applyCh      chan raft.ApplyMsg
22 | 	makeEnd      func(string) *labrpc.ClientEnd
23 | 	gid          int
24 | 	masters      []*labrpc.ClientEnd
25 | 	maxraftstate int // snapshot if log grows this big
26 | 
27 | 	// Your definitions here.
28 | }
29 | 
30 | // Get RPC
31 | func (kv *ShardKV) Get(args *GetArgs, reply *GetReply) {
32 | 	// Your code here.
33 | }
34 | 
35 | // PutAppend RPC
36 | func (kv *ShardKV) PutAppend(args *PutAppendArgs, reply *PutAppendReply) {
37 | 	// Your code here.
38 | }
39 | 
40 | // Kill is called by the tester when a ShardKV instance won't
41 | // be needed again. you are not required to do anything
42 | // in Kill(), but it might be convenient to (for example)
43 | // turn off debug output from this instance.
44 | func (kv *ShardKV) Kill() {
45 | 	kv.rf.Kill()
46 | 	// Your code here, if desired.
47 | }
48 | 
49 | // StartServer initializes a ShardKV
50 | //
51 | // servers[] contains the ports of the servers in this group.
52 | //
53 | // me is the index of the current server in servers[].
54 | //
55 | // the k/v server should store snapshots with
56 | // persister.SaveSnapshot(), and Raft should save its state (including
57 | // log) with persister.SaveRaftState().
58 | //
59 | // the k/v server should snapshot when Raft's saved state exceeds
60 | // maxraftstate bytes, in order to allow Raft to garbage-collect its
61 | // log. if maxraftstate is -1, you don't need to snapshot.
62 | //
63 | // gid is this group's GID, for interacting with the shardmaster.
64 | //
65 | // pass masters[] to shardmaster.MakeClerk() so you can send
66 | // RPCs to the shardmaster.
67 | //
68 | // makeEnd(servername) turns a server name from a
69 | // Config.Groups[gid][i] into a labrpc.ClientEnd on which you can
70 | // send RPCs. You'll need this to send RPCs to other groups.
71 | //
72 | // look at client.go for examples of how to use masters[]
73 | // and makeEnd() to send RPCs to the group owning a specific shard.
74 | //
75 | // StartServer() must return quickly, so it should start goroutines
76 | // for any long-running work.
77 | func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int, gid int, masters []*labrpc.ClientEnd, makeEnd func(string) *labrpc.ClientEnd) *ShardKV {
78 | 	// call gob.Register on structures you want
79 | 	// Go's RPC library to marshall/unmarshall.
80 | 	gob.Register(Op{})
81 | 
82 | 	kv := new(ShardKV)
83 | 	kv.me = me
84 | 	kv.maxraftstate = maxraftstate
85 | 	kv.makeEnd = makeEnd
86 | 	kv.gid = gid
87 | 	kv.masters = masters
88 | 
89 | 	// Your initialization code here.
90 | 
91 | 	// Use something like this to talk to the shardmaster:
92 | 	// kv.mck = shardmaster.MakeClerk(kv.masters)
93 | 
94 | 	kv.applyCh = make(chan raft.ApplyMsg)
95 | 	kv.rf = raft.Make(servers, me, persister, kv.applyCh)
96 | 
97 | 	return kv
98 | }
99 | 


--------------------------------------------------------------------------------
/src/mapreduce/worker.go:
--------------------------------------------------------------------------------
  1 | package mapreduce
  2 | 
  3 | //
  4 | // Please do not modify this file.
  5 | //
  6 | 
  7 | import (
  8 | 	"fmt"
  9 | 	"log"
 10 | 	"net"
 11 | 	"net/rpc"
 12 | 	"os"
 13 | 	"sync"
 14 | )
 15 | 
 16 | // Worker holds the state for a server waiting for DoTask or Shutdown RPCs
 17 | type Worker struct {
 18 | 	sync.Mutex
 19 | 
 20 | 	name       string
 21 | 	Map        func(string, string) []KeyValue
 22 | 	Reduce     func(string, []string) string
 23 | 	nRPC       int // quit after this many RPCs; protected by mutex
 24 | 	nTasks     int // total tasks executed; protected by mutex
 25 | 	concurrent int // number of parallel DoTasks in this worker; mutex
 26 | 	l          net.Listener
 27 | }
 28 | 
 29 | // DoTask is called by the master when a new task is being scheduled on this
 30 | // worker.
 31 | func (wk *Worker) DoTask(arg *DoTaskArgs, _ *struct{}) error {
 32 | 	fmt.Printf("%s: given %v task #%d on file %s (nios: %d)\n",
 33 | 		wk.name, arg.Phase, arg.TaskNumber, arg.File, arg.NumOtherPhase)
 34 | 
 35 | 	wk.Lock()
 36 | 	wk.nTasks += 1
 37 | 	wk.concurrent += 1
 38 | 	nc := wk.concurrent
 39 | 	wk.Unlock()
 40 | 
 41 | 	if nc > 1 {
 42 | 		// schedule() should never issue more than one RPC at a
 43 | 		// time to a given worker.
 44 | 		log.Fatal("Worker.DoTask: more than one DoTask sent concurrently to a single worker\n")
 45 | 	}
 46 | 
 47 | 	switch arg.Phase {
 48 | 	case mapPhase:
 49 | 		doMap(arg.JobName, arg.TaskNumber, arg.File, arg.NumOtherPhase, wk.Map)
 50 | 	case reducePhase:
 51 | 		doReduce(arg.JobName, arg.TaskNumber, mergeName(arg.JobName, arg.TaskNumber), arg.NumOtherPhase, wk.Reduce)
 52 | 	}
 53 | 
 54 | 	wk.Lock()
 55 | 	wk.concurrent -= 1
 56 | 	wk.Unlock()
 57 | 
 58 | 	fmt.Printf("%s: %v task #%d done\n", wk.name, arg.Phase, arg.TaskNumber)
 59 | 	return nil
 60 | }
 61 | 
 62 | // Shutdown is called by the master when all work has been completed.
 63 | // We should respond with the number of tasks we have processed.
 64 | func (wk *Worker) Shutdown(_ *struct{}, res *ShutdownReply) error {
 65 | 	debug("Shutdown %s\n", wk.name)
 66 | 	wk.Lock()
 67 | 	defer wk.Unlock()
 68 | 	res.Ntasks = wk.nTasks
 69 | 	wk.nRPC = 1
 70 | 	return nil
 71 | }
 72 | 
 73 | // Tell the master we exist and ready to work
 74 | func (wk *Worker) register(master string) {
 75 | 	args := new(RegisterArgs)
 76 | 	args.Worker = wk.name
 77 | 	ok := call(master, "Master.Register", args, new(struct{}))
 78 | 	if ok == false {
 79 | 		fmt.Printf("Register: RPC %s register error\n", master)
 80 | 	}
 81 | }
 82 | 
 83 | // RunWorker sets up a connection with the master, registers its address, and
 84 | // waits for tasks to be scheduled.
 85 | func RunWorker(MasterAddress string, me string,
 86 | 	MapFunc func(string, string) []KeyValue,
 87 | 	ReduceFunc func(string, []string) string,
 88 | 	nRPC int,
 89 | ) {
 90 | 	debug("RunWorker %s\n", me)
 91 | 	wk := new(Worker)
 92 | 	wk.name = me
 93 | 	wk.Map = MapFunc
 94 | 	wk.Reduce = ReduceFunc
 95 | 	wk.nRPC = nRPC
 96 | 	rpcs := rpc.NewServer()
 97 | 	rpcs.Register(wk)
 98 | 	os.Remove(me) // only needed for "unix"
 99 | 	l, e := net.Listen("unix", me)
100 | 	if e != nil {
101 | 		log.Fatal("RunWorker: worker ", me, " error: ", e)
102 | 	}
103 | 	wk.l = l
104 | 	wk.register(MasterAddress)
105 | 
106 | 	// DON'T MODIFY CODE BELOW
107 | 	for {
108 | 		wk.Lock()
109 | 		if wk.nRPC == 0 {
110 | 			wk.Unlock()
111 | 			break
112 | 		}
113 | 		wk.Unlock()
114 | 		conn, err := wk.l.Accept()
115 | 		if err == nil {
116 | 			wk.Lock()
117 | 			wk.nRPC--
118 | 			wk.Unlock()
119 | 			go rpcs.ServeConn(conn)
120 | 		} else {
121 | 			break
122 | 		}
123 | 	}
124 | 	wk.l.Close()
125 | 	debug("RunWorker %s exit\n", me)
126 | }
127 | 


--------------------------------------------------------------------------------
/src/shardkv/client.go:
--------------------------------------------------------------------------------
  1 | package shardkv
  2 | 
  3 | //
  4 | // client code to talk to a sharded key/value service.
  5 | //
  6 | // the client first talks to the shardmaster to find out
  7 | // the assignment of shards (keys) to groups, and then
  8 | // talks to the group that holds the key's shard.
  9 | //
 10 | 
 11 | import (
 12 | 	"crypto/rand"
 13 | 	"labrpc"
 14 | 	"math/big"
 15 | 	"shardmaster"
 16 | 	"time"
 17 | )
 18 | 
 19 | // please use this function,
 20 | // and please do not change it.
 21 | func key2shard(key string) int {
 22 | 	shard := 0
 23 | 	if len(key) > 0 {
 24 | 		shard = int(key[0])
 25 | 	}
 26 | 	shard %= shardmaster.NShards
 27 | 	return shard
 28 | }
 29 | 
 30 | func nrand() int64 {
 31 | 	max := big.NewInt(int64(1) << 62)
 32 | 	bigx, _ := rand.Int(rand.Reader, max)
 33 | 	x := bigx.Int64()
 34 | 	return x
 35 | }
 36 | 
 37 | // Clerk structure
 38 | type Clerk struct {
 39 | 	sm      *shardmaster.Clerk
 40 | 	config  shardmaster.Config
 41 | 	makeEnd func(string) *labrpc.ClientEnd
 42 | 	// You will have to modify this struct.
 43 | }
 44 | 
 45 | // MakeClerk is called by the tester
 46 | //
 47 | // masters[] is needed to call shardmaster.MakeClerk().
 48 | //
 49 | // makeEnd(servername) turns a server name from a
 50 | // Config.Groups[gid][i] into a labrpc.ClientEnd on which you can
 51 | // send RPCs.
 52 | func MakeClerk(masters []*labrpc.ClientEnd, makeEnd func(string) *labrpc.ClientEnd) *Clerk {
 53 | 	ck := new(Clerk)
 54 | 	ck.sm = shardmaster.MakeClerk(masters)
 55 | 	ck.makeEnd = makeEnd
 56 | 	// You'll have to add code here.
 57 | 	return ck
 58 | }
 59 | 
 60 | // Get fetch the current value for a key.
 61 | // returns "" if the key does not exist.
 62 | // keeps trying forever in the face of all other errors.
 63 | // You will have to modify this function.
 64 | func (ck *Clerk) Get(key string) string {
 65 | 	args := GetArgs{}
 66 | 	args.Key = key
 67 | 
 68 | 	for {
 69 | 		shard := key2shard(key)
 70 | 		gid := ck.config.Shards[shard]
 71 | 		if servers, ok := ck.config.Groups[gid]; ok {
 72 | 			// try each server for the shard.
 73 | 			for si := 0; si < len(servers); si++ {
 74 | 				srv := ck.makeEnd(servers[si])
 75 | 				var reply GetReply
 76 | 				ok := srv.Call("ShardKV.Get", &args, &reply)
 77 | 				if ok && reply.WrongLeader == false && (reply.Err == OK || reply.Err == ErrNoKey) {
 78 | 					return reply.Value
 79 | 				}
 80 | 				if ok && (reply.Err == ErrWrongGroup) {
 81 | 					break
 82 | 				}
 83 | 			}
 84 | 		}
 85 | 		time.Sleep(100 * time.Millisecond)
 86 | 		// ask master for the latest configuration.
 87 | 		ck.config = ck.sm.Query(-1)
 88 | 	}
 89 | 
 90 | 	return ""
 91 | }
 92 | 
 93 | // PutAppend shared by Put and Append.
 94 | // You will have to modify this function.
 95 | func (ck *Clerk) PutAppend(key string, value string, op string) {
 96 | 	args := PutAppendArgs{}
 97 | 	args.Key = key
 98 | 	args.Value = value
 99 | 	args.Op = op
100 | 
101 | 	for {
102 | 		shard := key2shard(key)
103 | 		gid := ck.config.Shards[shard]
104 | 		if servers, ok := ck.config.Groups[gid]; ok {
105 | 			for si := 0; si < len(servers); si++ {
106 | 				srv := ck.makeEnd(servers[si])
107 | 				var reply PutAppendReply
108 | 				ok := srv.Call("ShardKV.PutAppend", &args, &reply)
109 | 				if ok && reply.WrongLeader == false && reply.Err == OK {
110 | 					return
111 | 				}
112 | 				if ok && reply.Err == ErrWrongGroup {
113 | 					break
114 | 				}
115 | 			}
116 | 		}
117 | 		time.Sleep(100 * time.Millisecond)
118 | 		// ask master for the latest configuration.
119 | 		ck.config = ck.sm.Query(-1)
120 | 	}
121 | }
122 | 
123 | // Put by a Clerk
124 | func (ck *Clerk) Put(key string, value string) {
125 | 	ck.PutAppend(key, value, "Put")
126 | }
127 | 
128 | // Append by a Clerk
129 | func (ck *Clerk) Append(key string, value string) {
130 | 	ck.PutAppend(key, value, "Append")
131 | }
132 | 


--------------------------------------------------------------------------------
/src/mapreduce/test_test.go:
--------------------------------------------------------------------------------
  1 | package mapreduce
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"testing"
  6 | 	"time"
  7 | 
  8 | 	"bufio"
  9 | 	"log"
 10 | 	"os"
 11 | 	"sort"
 12 | 	"strconv"
 13 | 	"strings"
 14 | )
 15 | 
 16 | const (
 17 | 	nNumber = 100000
 18 | 	nMap    = 100
 19 | 	nReduce = 50
 20 | )
 21 | 
 22 | // Create input file with N numbers
 23 | // Check if we have N numbers in output file
 24 | 
 25 | // Split in words
 26 | func MapFunc(file string, value string) (res []KeyValue) {
 27 | 	debug("Map %v\n", value)
 28 | 	words := strings.Fields(value)
 29 | 	for _, w := range words {
 30 | 		kv := KeyValue{w, ""}
 31 | 		res = append(res, kv)
 32 | 	}
 33 | 	return
 34 | }
 35 | 
 36 | // Just return key
 37 | func ReduceFunc(key string, values []string) string {
 38 | 	for _, e := range values {
 39 | 		debug("Reduce %s %v\n", key, e)
 40 | 	}
 41 | 	return ""
 42 | }
 43 | 
 44 | // Checks input file agaist output file: each input number should show up
 45 | // in the output file in string sorted order
 46 | func check(t *testing.T, files []string) {
 47 | 	output, err := os.Open("mrtmp.test")
 48 | 	if err != nil {
 49 | 		log.Fatal("check: ", err)
 50 | 	}
 51 | 	defer output.Close()
 52 | 
 53 | 	var lines []string
 54 | 	for _, f := range files {
 55 | 		input, err := os.Open(f)
 56 | 		if err != nil {
 57 | 			log.Fatal("check: ", err)
 58 | 		}
 59 | 		defer input.Close()
 60 | 		inputScanner := bufio.NewScanner(input)
 61 | 		for inputScanner.Scan() {
 62 | 			lines = append(lines, inputScanner.Text())
 63 | 		}
 64 | 	}
 65 | 
 66 | 	sort.Strings(lines)
 67 | 
 68 | 	outputScanner := bufio.NewScanner(output)
 69 | 	i := 0
 70 | 	for outputScanner.Scan() {
 71 | 		var v1 int
 72 | 		var v2 int
 73 | 		text := outputScanner.Text()
 74 | 		n, err := fmt.Sscanf(lines[i], "%d", &v1)
 75 | 		if n == 1 && err == nil {
 76 | 			_, err = fmt.Sscanf(text, "%d", &v2)
 77 | 		}
 78 | 		if err != nil || v1 != v2 {
 79 | 			t.Fatalf("line %d: %d != %d err %v\n", i, v1, v2, err)
 80 | 		}
 81 | 		i++
 82 | 	}
 83 | 	if i != nNumber {
 84 | 		t.Fatalf("Expected %d lines in output\n", nNumber)
 85 | 	}
 86 | }
 87 | 
 88 | // Workers report back how many RPCs they have processed in the Shutdown reply.
 89 | // Check that they processed at least 1 DoTask RPC.
 90 | func checkWorker(t *testing.T, l []int) {
 91 | 	for _, tasks := range l {
 92 | 		if tasks == 0 {
 93 | 			t.Fatalf("A worker didn't do any work\n")
 94 | 		}
 95 | 	}
 96 | }
 97 | 
 98 | // Make input file
 99 | func makeInputs(num int) []string {
100 | 	var names []string
101 | 	var i = 0
102 | 	for f := 0; f < num; f++ {
103 | 		names = append(names, fmt.Sprintf("824-mrinput-%d.txt", f))
104 | 		file, err := os.Create(names[f])
105 | 		if err != nil {
106 | 			log.Fatal("mkInput: ", err)
107 | 		}
108 | 		w := bufio.NewWriter(file)
109 | 		for i < (f+1)*(nNumber/num) {
110 | 			fmt.Fprintf(w, "%d\n", i)
111 | 			i++
112 | 		}
113 | 		w.Flush()
114 | 		file.Close()
115 | 	}
116 | 	return names
117 | }
118 | 
119 | // Cook up a unique-ish UNIX-domain socket name
120 | // in /var/tmp. can't use current directory since
121 | // AFS doesn't support UNIX-domain sockets.
122 | func port(suffix string) string {
123 | 	s := "/var/tmp/824-"
124 | 	s += strconv.Itoa(os.Getuid()) + "/"
125 | 	os.Mkdir(s, 0777)
126 | 	s += "mr"
127 | 	s += strconv.Itoa(os.Getpid()) + "-"
128 | 	s += suffix
129 | 	return s
130 | }
131 | 
132 | func setup() *Master {
133 | 	files := makeInputs(nMap)
134 | 	master := port("master")
135 | 	mr := Distributed("test", files, nReduce, master)
136 | 	return mr
137 | }
138 | 
139 | func cleanup(mr *Master) {
140 | 	mr.CleanupFiles()
141 | 	for _, f := range mr.files {
142 | 		removeFile(f)
143 | 	}
144 | }
145 | 
146 | func TestSequentialSingle(t *testing.T) {
147 | 	mr := Sequential("test", makeInputs(1), 1, MapFunc, ReduceFunc)
148 | 	mr.Wait()
149 | 	check(t, mr.files)
150 | 	checkWorker(t, mr.stats)
151 | 	cleanup(mr)
152 | }
153 | 
154 | func TestSequentialMany(t *testing.T) {
155 | 	mr := Sequential("test", makeInputs(5), 3, MapFunc, ReduceFunc)
156 | 	mr.Wait()
157 | 	check(t, mr.files)
158 | 	checkWorker(t, mr.stats)
159 | 	cleanup(mr)
160 | }
161 | 
162 | func TestBasic(t *testing.T) {
163 | 	mr := setup()
164 | 	for i := 0; i < 2; i++ {
165 | 		go RunWorker(mr.address, port("worker"+strconv.Itoa(i)),
166 | 			MapFunc, ReduceFunc, -1)
167 | 	}
168 | 	mr.Wait()
169 | 	check(t, mr.files)
170 | 	checkWorker(t, mr.stats)
171 | 	cleanup(mr)
172 | }
173 | 
174 | func TestOneFailure(t *testing.T) {
175 | 	mr := setup()
176 | 	// Start 2 workers that fail after 10 tasks
177 | 	go RunWorker(mr.address, port("worker"+strconv.Itoa(0)),
178 | 		MapFunc, ReduceFunc, 10)
179 | 	go RunWorker(mr.address, port("worker"+strconv.Itoa(1)),
180 | 		MapFunc, ReduceFunc, -1)
181 | 	mr.Wait()
182 | 	check(t, mr.files)
183 | 	checkWorker(t, mr.stats)
184 | 	cleanup(mr)
185 | }
186 | 
187 | func TestManyFailures(t *testing.T) {
188 | 	mr := setup()
189 | 	i := 0
190 | 	done := false
191 | 	for !done {
192 | 		select {
193 | 		case done = <-mr.doneChannel:
194 | 			check(t, mr.files)
195 | 			cleanup(mr)
196 | 			break
197 | 		default:
198 | 			// Start 2 workers each sec. The workers fail after 10 tasks
199 | 			w := port("worker" + strconv.Itoa(i))
200 | 			go RunWorker(mr.address, w, MapFunc, ReduceFunc, 10)
201 | 			i++
202 | 			w = port("worker" + strconv.Itoa(i))
203 | 			go RunWorker(mr.address, w, MapFunc, ReduceFunc, 10)
204 | 			i++
205 | 			time.Sleep(1 * time.Second)
206 | 		}
207 | 	}
208 | }
209 | 


--------------------------------------------------------------------------------
/src/mapreduce/master.go:
--------------------------------------------------------------------------------
  1 | package mapreduce
  2 | 
  3 | //
  4 | // Please do not modify this file.
  5 | //
  6 | 
  7 | import (
  8 | 	"fmt"
  9 | 	"net"
 10 | 	"sync"
 11 | )
 12 | 
 13 | // Master holds all the state that the master needs to keep track of.
 14 | type Master struct {
 15 | 	sync.Mutex
 16 | 
 17 | 	address     string
 18 | 	doneChannel chan bool
 19 | 
 20 | 	// protected by the mutex
 21 | 	newCond *sync.Cond // signals when Register() adds to workers[]
 22 | 	workers []string   // each worker's UNIX-domain socket name -- its RPC address
 23 | 
 24 | 	// Per-task information
 25 | 	jobName string   // Name of currently executing job
 26 | 	files   []string // Input files
 27 | 	nReduce int      // Number of reduce partitions
 28 | 
 29 | 	shutdown chan struct{}
 30 | 	l        net.Listener
 31 | 	stats    []int
 32 | }
 33 | 
 34 | // Register is an RPC method that is called by workers after they have started
 35 | // up to report that they are ready to receive tasks.
 36 | func (mr *Master) Register(args *RegisterArgs, _ *struct{}) error {
 37 | 	mr.Lock()
 38 | 	defer mr.Unlock()
 39 | 	debug("Register: worker %s\n", args.Worker)
 40 | 	mr.workers = append(mr.workers, args.Worker)
 41 | 
 42 | 	// tell forwardRegistrations() that there's a new workers[] entry.
 43 | 	mr.newCond.Broadcast()
 44 | 
 45 | 	return nil
 46 | }
 47 | 
 48 | // newMaster initializes a new Map/Reduce Master
 49 | func newMaster(master string) (mr *Master) {
 50 | 	mr = new(Master)
 51 | 	mr.address = master
 52 | 	mr.shutdown = make(chan struct{})
 53 | 	mr.newCond = sync.NewCond(mr)
 54 | 	mr.doneChannel = make(chan bool)
 55 | 	return
 56 | }
 57 | 
 58 | // Sequential runs map and reduce tasks sequentially, waiting for each task to
 59 | // complete before running the next.
 60 | func Sequential(jobName string, files []string, nreduce int,
 61 | 	mapF func(string, string) []KeyValue,
 62 | 	reduceF func(string, []string) string,
 63 | ) (mr *Master) {
 64 | 	mr = newMaster("master")
 65 | 	go mr.run(jobName, files, nreduce, func(phase jobPhase) {
 66 | 		switch phase {
 67 | 		case mapPhase:
 68 | 			for i, f := range mr.files {
 69 | 				doMap(mr.jobName, i, f, mr.nReduce, mapF)
 70 | 			}
 71 | 		case reducePhase:
 72 | 			for i := 0; i < mr.nReduce; i++ {
 73 | 				doReduce(mr.jobName, i, mergeName(mr.jobName, i), len(mr.files), reduceF)
 74 | 			}
 75 | 		}
 76 | 	}, func() {
 77 | 		mr.stats = []int{len(files) + nreduce}
 78 | 	})
 79 | 	return
 80 | }
 81 | 
 82 | // helper function that sends information about all existing
 83 | // and newly registered workers to channel ch. schedule()
 84 | // reads ch to learn about workers.
 85 | func (mr *Master) forwardRegistrations(ch chan string) {
 86 | 	i := 0
 87 | 	for {
 88 | 		mr.Lock()
 89 | 		if len(mr.workers) > i {
 90 | 			// there's a worker that we haven't told schedule() about.
 91 | 			w := mr.workers[i]
 92 | 			go func() { ch <- w }() // send without holding the lock.
 93 | 			i = i + 1
 94 | 		} else {
 95 | 			// wait for Register() to add an entry to workers[]
 96 | 			// in response to an RPC from a new worker.
 97 | 			mr.newCond.Wait()
 98 | 		}
 99 | 		mr.Unlock()
100 | 	}
101 | }
102 | 
103 | // Distributed schedules map and reduce tasks on workers that register with the
104 | // master over RPC.
105 | func Distributed(jobName string, files []string, nreduce int, master string) (mr *Master) {
106 | 	mr = newMaster(master)
107 | 	mr.startRPCServer()
108 | 	go mr.run(jobName, files, nreduce,
109 | 		func(phase jobPhase) {
110 | 			ch := make(chan string)
111 | 			go mr.forwardRegistrations(ch)
112 | 			schedule(mr.jobName, mr.files, mr.nReduce, phase, ch)
113 | 		},
114 | 		func() {
115 | 			mr.stats = mr.killWorkers()
116 | 			mr.stopRPCServer()
117 | 		})
118 | 	return
119 | }
120 | 
121 | // run executes a mapreduce job on the given number of mappers and reducers.
122 | //
123 | // First, it divides up the input file among the given number of mappers, and
124 | // schedules each task on workers as they become available. Each map task bins
125 | // its output in a number of bins equal to the given number of reduce tasks.
126 | // Once all the mappers have finished, workers are assigned reduce tasks.
127 | //
128 | // When all tasks have been completed, the reducer outputs are merged,
129 | // statistics are collected, and the master is shut down.
130 | //
131 | // Note that this implementation assumes a shared file system.
132 | func (mr *Master) run(jobName string, files []string, nreduce int,
133 | 	schedule func(phase jobPhase),
134 | 	finish func(),
135 | ) {
136 | 	mr.jobName = jobName
137 | 	mr.files = files
138 | 	mr.nReduce = nreduce
139 | 
140 | 	fmt.Printf("%s: Starting Map/Reduce task %s\n", mr.address, mr.jobName)
141 | 
142 | 	schedule(mapPhase)
143 | 	schedule(reducePhase)
144 | 	finish()
145 | 	mr.merge()
146 | 
147 | 	fmt.Printf("%s: Map/Reduce task completed\n", mr.address)
148 | 
149 | 	mr.doneChannel <- true
150 | }
151 | 
152 | // Wait blocks until the currently scheduled work has completed.
153 | // This happens when all tasks have scheduled and completed, the final output
154 | // have been computed, and all workers have been shut down.
155 | func (mr *Master) Wait() {
156 | 	<-mr.doneChannel
157 | }
158 | 
159 | // killWorkers cleans up all workers by sending each one a Shutdown RPC.
160 | // It also collects and returns the number of tasks each worker has performed.
161 | func (mr *Master) killWorkers() []int {
162 | 	mr.Lock()
163 | 	defer mr.Unlock()
164 | 	ntasks := make([]int, 0, len(mr.workers))
165 | 	for _, w := range mr.workers {
166 | 		debug("Master: shutdown worker %s\n", w)
167 | 		var reply ShutdownReply
168 | 		ok := call(w, "Worker.Shutdown", new(struct{}), &reply)
169 | 		if ok == false {
170 | 			fmt.Printf("Master: RPC %s shutdown error\n", w)
171 | 		} else {
172 | 			ntasks = append(ntasks, reply.Ntasks)
173 | 		}
174 | 	}
175 | 	return ntasks
176 | }
177 | 


--------------------------------------------------------------------------------
/src/raftkv/server.go:
--------------------------------------------------------------------------------
  1 | // adapted from ZiyueHuang's implementation:
  2 | // https://github.com/ZiyueHuang/Distributed-Systems/blob/master/src/kvraft/server.go
  3 | 
  4 | package raftkv
  5 | 
  6 | import (
  7 | 	"bytes"
  8 | 	"encoding/gob"
  9 | 	"labrpc"
 10 | 	"log"
 11 | 	"raft"
 12 | 	"sync"
 13 | 	"time"
 14 | )
 15 | 
 16 | // Debug enabled or not
 17 | const Debug = 0
 18 | 
 19 | // DPrintf prints debugging message
 20 | func DPrintf(format string, a ...interface{}) (n int, err error) {
 21 | 	if Debug > 0 {
 22 | 		log.Printf(format, a...)
 23 | 	}
 24 | 	return
 25 | }
 26 | 
 27 | // Op structure
 28 | type Op struct {
 29 | 	Type      string
 30 | 	Key       string
 31 | 	Value     string
 32 | 	ClientID  int64
 33 | 	RequestID int
 34 | }
 35 | 
 36 | // RaftKV structure that holds Raft instance
 37 | type RaftKV struct {
 38 | 	mu      sync.Mutex
 39 | 	me      int
 40 | 	rf      *raft.Raft
 41 | 	applyCh chan raft.ApplyMsg
 42 | 
 43 | 	maxraftstate int // snapshot if log grows this big
 44 | 
 45 | 	kvDB   map[string]string
 46 | 	dup    map[int64]int
 47 | 	result map[int]chan Op
 48 | 	killCh chan bool
 49 | }
 50 | 
 51 | // AppendEntry appends an entry Op and returns a boolean
 52 | func (kv *RaftKV) AppendEntry(entry Op) bool {
 53 | 	index, _, isLeader := kv.rf.Start(entry)
 54 | 	if !isLeader {
 55 | 		return false
 56 | 	}
 57 | 
 58 | 	kv.mu.Lock()
 59 | 	ch, ok := kv.result[index]
 60 | 
 61 | 	if !ok {
 62 | 		ch = make(chan Op, 1)
 63 | 		kv.result[index] = ch
 64 | 	}
 65 | 	kv.mu.Unlock()
 66 | 
 67 | 	select {
 68 | 	case op := <-ch:
 69 | 		return op.ClientID == entry.ClientID && op.RequestID == entry.RequestID
 70 | 	case <-time.After(800 * time.Millisecond):
 71 | 		return false
 72 | 	}
 73 | 	return false
 74 | }
 75 | 
 76 | // Get RPC
 77 | func (kv *RaftKV) Get(args *GetArgs, reply *GetReply) {
 78 | 	entry := Op{Type: "Get", Key: args.Key, ClientID: args.ClientID, RequestID: args.RequestID}
 79 | 
 80 | 	ok := kv.AppendEntry(entry)
 81 | 	if !ok {
 82 | 		reply.WrongLeader = true
 83 | 	} else {
 84 | 		reply.WrongLeader = false
 85 | 		reply.Err = OK
 86 | 
 87 | 		kv.mu.Lock()
 88 | 		reply.Value, ok = kv.kvDB[args.Key]
 89 | 		if !ok {
 90 | 			reply.Value = ""
 91 | 		}
 92 | 		kv.dup[args.ClientID] = args.RequestID
 93 | 		kv.mu.Unlock()
 94 | 	}
 95 | }
 96 | 
 97 | // PutAppend RPC
 98 | func (kv *RaftKV) PutAppend(args *PutAppendArgs, reply *PutAppendReply) {
 99 | 	entry := Op{
100 | 		Type:      args.Op,
101 | 		Key:       args.Key,
102 | 		Value:     args.Value,
103 | 		ClientID:  args.ClientID,
104 | 		RequestID: args.RequestID}
105 | 
106 | 	ok := kv.AppendEntry(entry)
107 | 	if !ok {
108 | 		reply.WrongLeader = true
109 | 	} else {
110 | 		reply.WrongLeader = false
111 | 		reply.Err = OK
112 | 	}
113 | }
114 | 
115 | // Kill is called by the tester when a RaftKV instance won't
116 | // be needed again. you are not required to do anything
117 | // in Kill, but it might be convenient to (for example)
118 | // turn off debug output from this instance.
119 | func (kv *RaftKV) Kill() {
120 | 	kv.rf.Kill()
121 | 	close(kv.killCh)
122 | }
123 | 
124 | // StartKVServer return a RaftKV
125 | // servers[] contains the ports of the set of
126 | // servers that will cooperate via Raft to
127 | // form the fault-tolerant key/value service.
128 | // me is the index of the current server in servers[].
129 | // the k/v server should store snapshots with persister.SaveSnapshot(),
130 | // and Raft should save its state (including log) with persister.SaveRaftState().
131 | // the k/v server should snapshot when Raft's saved state exceeds maxraftstate bytes,
132 | // in order to allow Raft to garbage-collect its log. if maxraftstate is -1,
133 | // you don't need to snapshot.
134 | // StartKVServer() must return quickly, so it should start goroutines
135 | // for any long-running work.
136 | func StartKVServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int) *RaftKV {
137 | 	// call gob.Register on structures you want
138 | 	// Go's RPC library to marshall/unmarshall.
139 | 	gob.Register(Op{})
140 | 
141 | 	kv := new(RaftKV)
142 | 	kv.me = me
143 | 	kv.maxraftstate = maxraftstate
144 | 
145 | 	kv.applyCh = make(chan raft.ApplyMsg, 100)
146 | 	kv.rf = raft.Make(servers, me, persister, kv.applyCh)
147 | 	kv.kvDB = make(map[string]string)
148 | 	kv.result = make(map[int]chan Op)
149 | 	kv.dup = make(map[int64]int)
150 | 	kv.killCh = make(chan bool)
151 | 
152 | 	go kv.run()
153 | 
154 | 	return kv
155 | }
156 | 
157 | // Run RaftKV
158 | func (kv *RaftKV) run() {
159 | 	for {
160 | 		select {
161 | 		case msg := <-kv.applyCh:
162 | 			if msg.UseSnapshot {
163 | 				var LastIncludedIndex int
164 | 				var LastIncludedTerm int
165 | 				r := bytes.NewBuffer(msg.Snapshot)
166 | 				d := gob.NewDecoder(r)
167 | 				kv.mu.Lock()
168 | 				d.Decode(&LastIncludedIndex)
169 | 				d.Decode(&LastIncludedTerm)
170 | 				kv.kvDB = make(map[string]string)
171 | 				kv.dup = make(map[int64]int)
172 | 				d.Decode(&kv.kvDB)
173 | 				d.Decode(&kv.dup)
174 | 				kv.mu.Unlock()
175 | 			} else {
176 | 				index := msg.Index
177 | 				op := msg.Command.(Op)
178 | 				kv.mu.Lock()
179 | 				if !kv.isDup(&op) {
180 | 					switch op.Type {
181 | 					case "Put":
182 | 						kv.kvDB[op.Key] = op.Value
183 | 					case "Append":
184 | 						kv.kvDB[op.Key] += op.Value
185 | 					}
186 | 					kv.dup[op.ClientID] = op.RequestID
187 | 				}
188 | 				ch, ok := kv.result[index]
189 | 				if ok {
190 | 					select {
191 | 					case <-kv.result[index]:
192 | 					case <-kv.killCh:
193 | 						return
194 | 					default:
195 | 					}
196 | 					ch <- op
197 | 				} else {
198 | 					kv.result[index] = make(chan Op, 1)
199 | 				}
200 | 				if kv.maxraftstate != -1 && kv.rf.GetPersistSize() > kv.maxraftstate {
201 | 					w := new(bytes.Buffer)
202 | 					e := gob.NewEncoder(w)
203 | 					e.Encode(kv.kvDB)
204 | 					e.Encode(kv.dup)
205 | 					data := w.Bytes()
206 | 					go kv.rf.StartSnapshot(data, msg.Index)
207 | 				}
208 | 				kv.mu.Unlock()
209 | 			}
210 | 		case <-kv.killCh:
211 | 			return
212 | 		}
213 | 	}
214 | }
215 | 
216 | // Check duplication
217 | func (kv *RaftKV) isDup(op *Op) bool {
218 | 	v, ok := kv.dup[op.ClientID]
219 | 	if ok {
220 | 		return v >= op.RequestID
221 | 	}
222 | 
223 | 	return false
224 | }
225 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # hstore
  2 | 
  3 | [![Go Report Card](https://goreportcard.com/badge/github.com/hoanhan101/hstore)](https://goreportcard.com/report/github.com/hoanhan101/hstore)
  4 | 
  5 | **hstore** is a fault-tolerant distributed key-value store inspired by
  6 | [MIT's 6.824: Distributed System Spring 2017 Lab](http://nil.csail.mit.edu/6.824/2017/).
  7 | The goal of the project is to build a simple, fast and reliable database on top
  8 | of Raft, a replicated state machine protocol.
  9 | 
 10 | ## Project Status
 11 | 
 12 | It is still a work in progress. Here is the initial project's [proposal](PROPOSAL.md).
 13 | Other parts will be updated as soon as it is live and ready.
 14 | 
 15 | ### Tasks
 16 | 
 17 | - [x] Implement Raft Consensus Algorithm
 18 |   - [x] Pass all lab tests
 19 |   - [x] Adapt a clear and understandable structure
 20 |   - [x] Follow the paper closely and comment heavily on the code
 21 | - [x] Implement Fault-tolerant Key-Value Service
 22 |   - [x] Pass all lab tests
 23 | - [x] Build a simple client's stdin
 24 |   - [x] Be able to specify number of servers to boot up
 25 |   - [x] Implement GET, PUT, APPEND
 26 | - [x] Add Go report card
 27 |   - [x] Clean up code and fix Golint, aim to have A+
 28 | 
 29 | ### Ideas
 30 | 
 31 | - [ ] Enable network I/O so every server in the quorum have a public host and port, instead of just
 32 |   communicating though Go routines
 33 |   - Details:
 34 |     - net/prc doesn't have a Network object so cannot add/remove server
 35 |     - labrpc doesn't have option for network I/O (e.g.: Client.Dial and Server.ServeConn)
 36 |   - Proposed solutions (either one of these):
 37 |     - [ ] Adapt laprpc to net/rpc, add more functions and rewrite the package to use websocket
 38 |     - [ ] Use net/rpc and adapt labrpc library's functionalities
 39 |     - [ ] Keep the labrpc code, wrap it with Go net.
 40 | - [ ] Be able to start a RaftKV server one by one and watch the leader election as well as
 41 |   log replication in real time (of course with key-value service)
 42 | - [ ] Implement RESTful APIs to query each server's kv store
 43 | - [ ] Implement logging
 44 | - [ ] Make sure things are configurable
 45 | - [ ] Build CLI for server and client (e.g.: [redis demo](http://try.redis.io/))
 46 | - [ ] Make Persister read/write Raft's snapshot on/to disk (instead of holding on memory)
 47 | - [ ] How to do service discovery? (e.g.: [consul demo](https://youtu.be/huvBEB3suoo))
 48 | - [ ] Dockerize + automate build
 49 | - [ ] Continuous Integration and Delivery
 50 | - [ ] Godoc 
 51 | - [ ] Code coverage 
 52 | 
 53 | ### Issues
 54 | 
 55 | - [ ] raftkv tests fail on Docker Ubuntu
 56 |   - Details:
 57 |     - All tests pass on Mac, but not on Docker on Mac
 58 |     - Mac OS Sierra 10.12.6, Docker version 17.06.1-ce, build 874a737, golang 1.10.1
 59 |     - Either something wrong with networking inside container or the labrpc package
 60 |   - Proposed solutions:
 61 |     - [ ] Test on real Ubuntu machine and go from there
 62 | 
 63 | ## Table of Contents
 64 | 
 65 | - [Getting Started](#getting-started)
 66 |   - [Installing](#installing)
 67 |   - [Running](#running)
 68 |   - [Testing](#testing)
 69 |   - [Example](#example)
 70 | - [MIT's 6.824](#mits-6824)
 71 |   - [raftkv](#raftkv)
 72 |   - [raft](#raft)
 73 |   - [mapreduce](#mapreduce)
 74 |   - [word-count](#word-count)
 75 | - [References](#references)
 76 | 
 77 | ## Getting Started
 78 | 
 79 | ### Installing
 80 | 
 81 | ```
 82 | git clone https://github.com/hoanhan101/hstore.git && cd hstore
 83 | ```
 84 | 
 85 | ### Running
 86 | 
 87 | > TODO
 88 | 
 89 | ### Testing
 90 | 
 91 | > Refer to MIT's 6.824 raftkv test below
 92 | 
 93 | ### Example
 94 | 
 95 | > TODO
 96 | 
 97 | ## MIT's 6.824
 98 | 
 99 | Assume that user set the `GOPATH` correctly, one can follow these instructions 
100 | to run tests for different programs. If not, here is
101 | [an example](https://github.com/hoanhan101/go-playground) on how to do it.
102 | 
103 | ### [raftkv](src/raftkv)
104 | 
105 | **raftkv** is a fault-tolerant key-value storage service built on top of Raft. It is a replicated
106 | state machine, consisting of several key-value servers that coordinate their activities through
107 | the Raft log. It should continue to process client requests as long as a majority of the servers
108 | are alive and can communicate, in spite of other failures or network partitions.
109 | 
110 | One can test the program by running:
111 | ```
112 | $ ./test_6824.sh raftkv
113 | ```
114 | 
115 | In order to execute a specific test in a package:
116 | ```
117 | $ ./test_6824.sh raftkv TestConcurrent
118 | ```
119 | 
120 | > More informations about the test script can be found at [test_6824](test_6824.sh).
121 | 
122 | Here is an example of test's output:
123 | 
124 | ![raftkv's test output](img/test_raftkv.png)
125 | 
126 | ### [raft](src/raft)
127 | 
128 | **raft** is a replicated state machine protocol. It achieves fault tolerance by storing copies of
129 | its data on multiple replica servers. Replication allows the service to continue operating even if
130 | some of its servers experience failures.
131 | 
132 | One can test the program by running:
133 | ```
134 | $ ./test_6824.sh raft 
135 | ```
136 | 
137 | This will run all the test for Raft. If one want to test features separately, then inside raft
138 | pacakge directory:
139 | - `go test -run 2A` checks leader election and heartbeats
140 | - `go test -run 2B` checks log replication
141 | - `go test -run 2C` checks persistent state
142 | 
143 | One can also check how much real time and CPU time with the `time` command:
144 | ```
145 | time go test
146 | ```
147 | 
148 | Here is an example of test's output:
149 | 
150 | ![raft's test output](img/test_raft.png)
151 | 
152 | ### [mapreduce](src/mapreduce)
153 | 
154 | By MapReduce's white paper:
155 | > MapReduce is a programming model and an associated implementation for processing and generating 
156 | > large data sets. Users specify a map function that processes a key/value pair to generate a set
157 | > of intermediate key/value pairs, and a reduce function that merges all intermediate values 
158 | > associated with the same intermediate key.
159 | 
160 | One can test the program by running:
161 | ```
162 | $ ./test_6824.sh mapreduce
163 | ```
164 | 
165 | ### [word-count](src/main/wc.go)
166 | 
167 | **word count** is a simple MapReduce example. It reports the number of occurrences of each word 
168 | in its input.
169 | 
170 | One can test the program by running:
171 | ```
172 | $ cd hstore
173 | $ export "GOPATH=$PWD"
174 | $ cd "$GOPATH/src/main"
175 | $ ./test-wc.sh
176 | ```
177 | 
178 | This will do the test and clean up all intermediate files afterward.
179 | 
180 | ## References
181 | 
182 | - [6.824: Distributed Systems Spring 2017](http://nil.csail.mit.edu/6.824/2017/)
183 | - [ZiyueHuang's raft implementation](https://github.com/ZiyueHuang/Distributed-Systems)
184 | 


--------------------------------------------------------------------------------
/src/shardmaster/config.go:
--------------------------------------------------------------------------------
  1 | package shardmaster
  2 | 
  3 | import "labrpc"
  4 | import "raft"
  5 | import "testing"
  6 | import "os"
  7 | 
  8 | // import "log"
  9 | import crand "crypto/rand"
 10 | import "math/rand"
 11 | import "encoding/base64"
 12 | import "sync"
 13 | import "runtime"
 14 | 
 15 | func randstring(n int) string {
 16 | 	b := make([]byte, 2*n)
 17 | 	crand.Read(b)
 18 | 	s := base64.URLEncoding.EncodeToString(b)
 19 | 	return s[0:n]
 20 | }
 21 | 
 22 | // Randomize server handles
 23 | func randomHandles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd {
 24 | 	sa := make([]*labrpc.ClientEnd, len(kvh))
 25 | 	copy(sa, kvh)
 26 | 	for i := range sa {
 27 | 		j := rand.Intn(i + 1)
 28 | 		sa[i], sa[j] = sa[j], sa[i]
 29 | 	}
 30 | 	return sa
 31 | }
 32 | 
 33 | type config struct {
 34 | 	mu           sync.Mutex
 35 | 	t            *testing.T
 36 | 	net          *labrpc.Network
 37 | 	n            int
 38 | 	servers      []*ShardMaster
 39 | 	saved        []*raft.Persister
 40 | 	endnames     [][]string // names of each server's sending ClientEnds
 41 | 	clerks       map[*Clerk][]string
 42 | 	nextClientId int
 43 | }
 44 | 
 45 | func (cfg *config) cleanup() {
 46 | 	cfg.mu.Lock()
 47 | 	defer cfg.mu.Unlock()
 48 | 	for i := 0; i < len(cfg.servers); i++ {
 49 | 		if cfg.servers[i] != nil {
 50 | 			cfg.servers[i].Kill()
 51 | 		}
 52 | 	}
 53 | }
 54 | 
 55 | // Maximum log size across all servers
 56 | func (cfg *config) LogSize() int {
 57 | 	logsize := 0
 58 | 	for i := 0; i < cfg.n; i++ {
 59 | 		n := cfg.saved[i].RaftStateSize()
 60 | 		if n > logsize {
 61 | 			logsize = n
 62 | 		}
 63 | 	}
 64 | 	return logsize
 65 | }
 66 | 
 67 | // attach server i to servers listed in to
 68 | // caller must hold cfg.mu
 69 | func (cfg *config) connectUnlocked(i int, to []int) {
 70 | 	// log.Printf("connect peer %d to %v\n", i, to)
 71 | 
 72 | 	// outgoing socket files
 73 | 	for j := 0; j < len(to); j++ {
 74 | 		endname := cfg.endnames[i][to[j]]
 75 | 		cfg.net.Enable(endname, true)
 76 | 	}
 77 | 
 78 | 	// incoming socket files
 79 | 	for j := 0; j < len(to); j++ {
 80 | 		endname := cfg.endnames[to[j]][i]
 81 | 		cfg.net.Enable(endname, true)
 82 | 	}
 83 | }
 84 | 
 85 | func (cfg *config) connect(i int, to []int) {
 86 | 	cfg.mu.Lock()
 87 | 	defer cfg.mu.Unlock()
 88 | 	cfg.connectUnlocked(i, to)
 89 | }
 90 | 
 91 | // detach server i from the servers listed in from
 92 | // caller must hold cfg.mu
 93 | func (cfg *config) disconnectUnlocked(i int, from []int) {
 94 | 	// log.Printf("disconnect peer %d from %v\n", i, from)
 95 | 
 96 | 	// outgoing socket files
 97 | 	for j := 0; j < len(from); j++ {
 98 | 		if cfg.endnames[i] != nil {
 99 | 			endname := cfg.endnames[i][from[j]]
100 | 			cfg.net.Enable(endname, false)
101 | 		}
102 | 	}
103 | 
104 | 	// incoming socket files
105 | 	for j := 0; j < len(from); j++ {
106 | 		if cfg.endnames[j] != nil {
107 | 			endname := cfg.endnames[from[j]][i]
108 | 			cfg.net.Enable(endname, false)
109 | 		}
110 | 	}
111 | }
112 | 
113 | func (cfg *config) disconnect(i int, from []int) {
114 | 	cfg.mu.Lock()
115 | 	defer cfg.mu.Unlock()
116 | 	cfg.disconnectUnlocked(i, from)
117 | }
118 | 
119 | func (cfg *config) All() []int {
120 | 	all := make([]int, cfg.n)
121 | 	for i := 0; i < cfg.n; i++ {
122 | 		all[i] = i
123 | 	}
124 | 	return all
125 | }
126 | 
127 | func (cfg *config) ConnectAll() {
128 | 	cfg.mu.Lock()
129 | 	defer cfg.mu.Unlock()
130 | 	for i := 0; i < cfg.n; i++ {
131 | 		cfg.connectUnlocked(i, cfg.All())
132 | 	}
133 | }
134 | 
135 | // Sets up 2 partitions with connectivity between servers in each  partition.
136 | func (cfg *config) partition(p1 []int, p2 []int) {
137 | 	cfg.mu.Lock()
138 | 	defer cfg.mu.Unlock()
139 | 	// log.Printf("partition servers into: %v %v\n", p1, p2)
140 | 	for i := 0; i < len(p1); i++ {
141 | 		cfg.disconnectUnlocked(p1[i], p2)
142 | 		cfg.connectUnlocked(p1[i], p1)
143 | 	}
144 | 	for i := 0; i < len(p2); i++ {
145 | 		cfg.disconnectUnlocked(p2[i], p1)
146 | 		cfg.connectUnlocked(p2[i], p2)
147 | 	}
148 | }
149 | 
150 | // Create a clerk with clerk specific server names.
151 | // Give it connections to all of the servers, but for
152 | // now enable only connections to servers in to[].
153 | func (cfg *config) makeClient(to []int) *Clerk {
154 | 	cfg.mu.Lock()
155 | 	defer cfg.mu.Unlock()
156 | 
157 | 	// a fresh set of ClientEnds.
158 | 	ends := make([]*labrpc.ClientEnd, cfg.n)
159 | 	endnames := make([]string, cfg.n)
160 | 	for j := 0; j < cfg.n; j++ {
161 | 		endnames[j] = randstring(20)
162 | 		ends[j] = cfg.net.MakeEnd(endnames[j])
163 | 		cfg.net.Connect(endnames[j], j)
164 | 	}
165 | 
166 | 	ck := MakeClerk(randomHandles(ends))
167 | 	cfg.clerks[ck] = endnames
168 | 	cfg.nextClientId++
169 | 	cfg.ConnectClientUnlocked(ck, to)
170 | 	return ck
171 | }
172 | 
173 | func (cfg *config) deleteClient(ck *Clerk) {
174 | 	cfg.mu.Lock()
175 | 	defer cfg.mu.Unlock()
176 | 
177 | 	v := cfg.clerks[ck]
178 | 	for i := 0; i < len(v); i++ {
179 | 		os.Remove(v[i])
180 | 	}
181 | 	delete(cfg.clerks, ck)
182 | }
183 | 
184 | // caller should hold cfg.mu
185 | func (cfg *config) ConnectClientUnlocked(ck *Clerk, to []int) {
186 | 	// log.Printf("ConnectClient %v to %v\n", ck, to)
187 | 	endnames := cfg.clerks[ck]
188 | 	for j := 0; j < len(to); j++ {
189 | 		s := endnames[to[j]]
190 | 		cfg.net.Enable(s, true)
191 | 	}
192 | }
193 | 
194 | func (cfg *config) ConnectClient(ck *Clerk, to []int) {
195 | 	cfg.mu.Lock()
196 | 	defer cfg.mu.Unlock()
197 | 	cfg.ConnectClientUnlocked(ck, to)
198 | }
199 | 
200 | // caller should hold cfg.mu
201 | func (cfg *config) DisconnectClientUnlocked(ck *Clerk, from []int) {
202 | 	// log.Printf("DisconnectClient %v from %v\n", ck, from)
203 | 	endnames := cfg.clerks[ck]
204 | 	for j := 0; j < len(from); j++ {
205 | 		s := endnames[from[j]]
206 | 		cfg.net.Enable(s, false)
207 | 	}
208 | }
209 | 
210 | func (cfg *config) DisconnectClient(ck *Clerk, from []int) {
211 | 	cfg.mu.Lock()
212 | 	defer cfg.mu.Unlock()
213 | 	cfg.DisconnectClientUnlocked(ck, from)
214 | }
215 | 
216 | // Shutdown a server by isolating it
217 | func (cfg *config) ShutdownServer(i int) {
218 | 	cfg.mu.Lock()
219 | 	defer cfg.mu.Unlock()
220 | 
221 | 	cfg.disconnectUnlocked(i, cfg.All())
222 | 
223 | 	// disable client connections to the server.
224 | 	// it's important to do this before creating
225 | 	// the new Persister in saved[i], to avoid
226 | 	// the possibility of the server returning a
227 | 	// positive reply to an Append but persisting
228 | 	// the result in the superseded Persister.
229 | 	cfg.net.DeleteServer(i)
230 | 
231 | 	// a fresh persister, in case old instance
232 | 	// continues to update the Persister.
233 | 	// but copy old persister's content so that we always
234 | 	// pass Make() the last persisted state.
235 | 	if cfg.saved[i] != nil {
236 | 		cfg.saved[i] = cfg.saved[i].Copy()
237 | 	}
238 | 
239 | 	kv := cfg.servers[i]
240 | 	if kv != nil {
241 | 		cfg.mu.Unlock()
242 | 		kv.Kill()
243 | 		cfg.mu.Lock()
244 | 		cfg.servers[i] = nil
245 | 	}
246 | }
247 | 
248 | // If restart servers, first call ShutdownServer
249 | func (cfg *config) StartServer(i int) {
250 | 	cfg.mu.Lock()
251 | 
252 | 	// a fresh set of outgoing ClientEnd names.
253 | 	cfg.endnames[i] = make([]string, cfg.n)
254 | 	for j := 0; j < cfg.n; j++ {
255 | 		cfg.endnames[i][j] = randstring(20)
256 | 	}
257 | 
258 | 	// a fresh set of ClientEnds.
259 | 	ends := make([]*labrpc.ClientEnd, cfg.n)
260 | 	for j := 0; j < cfg.n; j++ {
261 | 		ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j])
262 | 		cfg.net.Connect(cfg.endnames[i][j], j)
263 | 	}
264 | 
265 | 	// a fresh persister, so old instance doesn't overwrite
266 | 	// new instance's persisted state.
267 | 	// give the fresh persister a copy of the old persister's
268 | 	// state, so that the spec is that we pass StartKVServer()
269 | 	// the last persisted state.
270 | 	if cfg.saved[i] != nil {
271 | 		cfg.saved[i] = cfg.saved[i].Copy()
272 | 	} else {
273 | 		cfg.saved[i] = raft.MakePersister()
274 | 	}
275 | 
276 | 	cfg.mu.Unlock()
277 | 
278 | 	cfg.servers[i] = StartServer(ends, i, cfg.saved[i])
279 | 
280 | 	kvsvc := labrpc.MakeService(cfg.servers[i])
281 | 	rfsvc := labrpc.MakeService(cfg.servers[i].rf)
282 | 	srv := labrpc.MakeServer()
283 | 	srv.AddService(kvsvc)
284 | 	srv.AddService(rfsvc)
285 | 	cfg.net.AddServer(i, srv)
286 | }
287 | 
288 | func (cfg *config) Leader() (bool, int) {
289 | 	cfg.mu.Lock()
290 | 	defer cfg.mu.Unlock()
291 | 
292 | 	for i := 0; i < cfg.n; i++ {
293 | 		_, isLeader := cfg.servers[i].rf.GetState()
294 | 		if isLeader {
295 | 			return true, i
296 | 		}
297 | 	}
298 | 	return false, 0
299 | }
300 | 
301 | // Partition servers into 2 groups and put current leader in minority
302 | func (cfg *config) makePartition() ([]int, []int) {
303 | 	_, l := cfg.Leader()
304 | 	p1 := make([]int, cfg.n/2+1)
305 | 	p2 := make([]int, cfg.n/2)
306 | 	j := 0
307 | 	for i := 0; i < cfg.n; i++ {
308 | 		if i != l {
309 | 			if j < len(p1) {
310 | 				p1[j] = i
311 | 			} else {
312 | 				p2[j-len(p1)] = i
313 | 			}
314 | 			j++
315 | 		}
316 | 	}
317 | 	p2[len(p2)-1] = l
318 | 	return p1, p2
319 | }
320 | 
321 | func makeConfig(t *testing.T, n int, unreliable bool) *config {
322 | 	runtime.GOMAXPROCS(4)
323 | 	cfg := &config{}
324 | 	cfg.t = t
325 | 	cfg.net = labrpc.MakeNetwork()
326 | 	cfg.n = n
327 | 	cfg.servers = make([]*ShardMaster, cfg.n)
328 | 	cfg.saved = make([]*raft.Persister, cfg.n)
329 | 	cfg.endnames = make([][]string, cfg.n)
330 | 	cfg.clerks = make(map[*Clerk][]string)
331 | 	cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid
332 | 
333 | 	// create a full set of KV servers.
334 | 	for i := 0; i < cfg.n; i++ {
335 | 		cfg.StartServer(i)
336 | 	}
337 | 
338 | 	cfg.ConnectAll()
339 | 
340 | 	cfg.net.Reliable(!unreliable)
341 | 
342 | 	return cfg
343 | }
344 | 


--------------------------------------------------------------------------------
/src/raftkv/cmd_config.go:
--------------------------------------------------------------------------------
  1 | package raftkv
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"labrpc"
  6 | 	"os"
  7 | 	"raft"
  8 | 	"runtime"
  9 | 	"sync"
 10 | )
 11 | 
 12 | //
 13 | // cmdConfig structure
 14 | //
 15 | type cmdConfig struct {
 16 | 	mu           sync.Mutex
 17 | 	net          *labrpc.Network
 18 | 	n            int
 19 | 	kvservers    []*RaftKV
 20 | 	saved        []*raft.Persister
 21 | 	endnames     [][]string // names of each server's sending ClientEnds
 22 | 	clerks       map[*Clerk][]string
 23 | 	nextClientId int
 24 | 	maxraftstate int
 25 | }
 26 | 
 27 | //
 28 | // clean up cmdConfig
 29 | //
 30 | func (cfg *cmdConfig) cleanup() {
 31 | 	cfg.mu.Lock()
 32 | 	defer cfg.mu.Unlock()
 33 | 	for i := 0; i < len(cfg.kvservers); i++ {
 34 | 		if cfg.kvservers[i] != nil {
 35 | 			cfg.kvservers[i].Kill()
 36 | 		}
 37 | 	}
 38 | }
 39 | 
 40 | //
 41 | // return maximum log size across all servers
 42 | //
 43 | func (cfg *cmdConfig) LogSize() int {
 44 | 	logsize := 0
 45 | 	for i := 0; i < cfg.n; i++ {
 46 | 		n := cfg.saved[i].RaftStateSize()
 47 | 		if n > logsize {
 48 | 			logsize = n
 49 | 		}
 50 | 	}
 51 | 	return logsize
 52 | }
 53 | 
 54 | //
 55 | // return maximum snapshot size across all servers
 56 | //
 57 | func (cfg *cmdConfig) SnapshotSize() int {
 58 | 	snapshotsize := 0
 59 | 	for i := 0; i < cfg.n; i++ {
 60 | 		n := cfg.saved[i].SnapshotSize()
 61 | 		if n > snapshotsize {
 62 | 			snapshotsize = n
 63 | 		}
 64 | 	}
 65 | 	return snapshotsize
 66 | }
 67 | 
 68 | //
 69 | // attach server i to servers listed in to
 70 | // caller must hold cfg.mu
 71 | //
 72 | func (cfg *cmdConfig) connectUnlocked(i int, to []int) {
 73 | 	// log.Printf("connect peer %d to %v\n", i, to)
 74 | 
 75 | 	// outgoing socket files
 76 | 	for j := 0; j < len(to); j++ {
 77 | 		endname := cfg.endnames[i][to[j]]
 78 | 		cfg.net.Enable(endname, true)
 79 | 	}
 80 | 
 81 | 	// incoming socket files
 82 | 	for j := 0; j < len(to); j++ {
 83 | 		endname := cfg.endnames[to[j]][i]
 84 | 		cfg.net.Enable(endname, true)
 85 | 	}
 86 | }
 87 | 
 88 | //
 89 | // connect server
 90 | //
 91 | func (cfg *cmdConfig) connect(i int, to []int) {
 92 | 	cfg.mu.Lock()
 93 | 	defer cfg.mu.Unlock()
 94 | 	cfg.connectUnlocked(i, to)
 95 | }
 96 | 
 97 | //
 98 | // detach server i from the servers listed in from
 99 | // caller must hold cfg.mu
100 | //
101 | func (cfg *cmdConfig) disconnectUnlocked(i int, from []int) {
102 | 	// log.Printf("disconnect peer %d from %v\n", i, from)
103 | 
104 | 	// outgoing socket files
105 | 	for j := 0; j < len(from); j++ {
106 | 		if cfg.endnames[i] != nil {
107 | 			endname := cfg.endnames[i][from[j]]
108 | 			cfg.net.Enable(endname, false)
109 | 		}
110 | 	}
111 | 
112 | 	// incoming socket files
113 | 	for j := 0; j < len(from); j++ {
114 | 		if cfg.endnames[j] != nil {
115 | 			endname := cfg.endnames[from[j]][i]
116 | 			cfg.net.Enable(endname, false)
117 | 		}
118 | 	}
119 | }
120 | 
121 | //
122 | // disconnect server
123 | //
124 | func (cfg *cmdConfig) disconnect(i int, from []int) {
125 | 	cfg.mu.Lock()
126 | 	defer cfg.mu.Unlock()
127 | 	cfg.disconnectUnlocked(i, from)
128 | }
129 | 
130 | //
131 | // return all cmdConfig servers
132 | //
133 | func (cfg *cmdConfig) All() []int {
134 | 	all := make([]int, cfg.n)
135 | 	for i := 0; i < cfg.n; i++ {
136 | 		all[i] = i
137 | 	}
138 | 	return all
139 | }
140 | 
141 | //
142 | // connect all servers
143 | //
144 | func (cfg *cmdConfig) ConnectAll() {
145 | 	cfg.mu.Lock()
146 | 	defer cfg.mu.Unlock()
147 | 	for i := 0; i < cfg.n; i++ {
148 | 		cfg.connectUnlocked(i, cfg.All())
149 | 	}
150 | }
151 | 
152 | //
153 | // set up 2 partitions with connectivity between servers in each  partition.
154 | //
155 | func (cfg *cmdConfig) partition(p1 []int, p2 []int) {
156 | 	cfg.mu.Lock()
157 | 	defer cfg.mu.Unlock()
158 | 	// log.Printf("partition servers into: %v %v\n", p1, p2)
159 | 	for i := 0; i < len(p1); i++ {
160 | 		cfg.disconnectUnlocked(p1[i], p2)
161 | 		cfg.connectUnlocked(p1[i], p1)
162 | 	}
163 | 	for i := 0; i < len(p2); i++ {
164 | 		cfg.disconnectUnlocked(p2[i], p1)
165 | 		cfg.connectUnlocked(p2[i], p2)
166 | 	}
167 | }
168 | 
169 | //
170 | // create a clerk with clerk specific server names.
171 | // Give it connections to all of the servers, but for
172 | // now enable only connections to servers in to[].
173 | //
174 | func (cfg *cmdConfig) makeClient(to []int) *Clerk {
175 | 	cfg.mu.Lock()
176 | 	defer cfg.mu.Unlock()
177 | 
178 | 	// a fresh set of ClientEnds.
179 | 	ends := make([]*labrpc.ClientEnd, cfg.n)
180 | 	endnames := make([]string, cfg.n)
181 | 	for j := 0; j < cfg.n; j++ {
182 | 		endnames[j] = randstring(20)
183 | 		ends[j] = cfg.net.MakeEnd(endnames[j])
184 | 		cfg.net.Connect(endnames[j], j)
185 | 	}
186 | 
187 | 	ck := MakeClerk(randomHandles(ends))
188 | 	cfg.clerks[ck] = endnames
189 | 	cfg.nextClientId++
190 | 	cfg.ConnectClientUnlocked(ck, to)
191 | 	return ck
192 | }
193 | 
194 | //
195 | // delete a client
196 | //
197 | func (cfg *cmdConfig) deleteClient(ck *Clerk) {
198 | 	cfg.mu.Lock()
199 | 	defer cfg.mu.Unlock()
200 | 
201 | 	v := cfg.clerks[ck]
202 | 	for i := 0; i < len(v); i++ {
203 | 		os.Remove(v[i])
204 | 	}
205 | 	delete(cfg.clerks, ck)
206 | }
207 | 
208 | //
209 | // caller should hold cfg.mu
210 | //
211 | func (cfg *cmdConfig) ConnectClientUnlocked(ck *Clerk, to []int) {
212 | 	// log.Printf("ConnectClient %v to %v\n", ck, to)
213 | 	endnames := cfg.clerks[ck]
214 | 	for j := 0; j < len(to); j++ {
215 | 		s := endnames[to[j]]
216 | 		cfg.net.Enable(s, true)
217 | 	}
218 | }
219 | 
220 | //
221 | // connect a client
222 | //
223 | func (cfg *cmdConfig) ConnectClient(ck *Clerk, to []int) {
224 | 	cfg.mu.Lock()
225 | 	defer cfg.mu.Unlock()
226 | 	cfg.ConnectClientUnlocked(ck, to)
227 | }
228 | 
229 | //
230 | // caller should hold cfg.mu
231 | //
232 | func (cfg *cmdConfig) DisconnectClientUnlocked(ck *Clerk, from []int) {
233 | 	// log.Printf("DisconnectClient %v from %v\n", ck, from)
234 | 	endnames := cfg.clerks[ck]
235 | 	for j := 0; j < len(from); j++ {
236 | 		s := endnames[from[j]]
237 | 		cfg.net.Enable(s, false)
238 | 	}
239 | }
240 | 
241 | //
242 | // disconnect a client
243 | //
244 | func (cfg *cmdConfig) DisconnectClient(ck *Clerk, from []int) {
245 | 	cfg.mu.Lock()
246 | 	defer cfg.mu.Unlock()
247 | 	cfg.DisconnectClientUnlocked(ck, from)
248 | }
249 | 
250 | //
251 | // shutdown a server by isolating it
252 | //
253 | func (cfg *cmdConfig) ShutdownServer(i int) {
254 | 	cfg.mu.Lock()
255 | 	defer cfg.mu.Unlock()
256 | 
257 | 	cfg.disconnectUnlocked(i, cfg.All())
258 | 
259 | 	// disable client connections to the server.
260 | 	// it's important to do this before creating
261 | 	// the new Persister in saved[i], to avoid
262 | 	// the possibility of the server returning a
263 | 	// positive reply to an Append but persisting
264 | 	// the result in the superseded Persister.
265 | 	cfg.net.DeleteServer(i)
266 | 
267 | 	// a fresh persister, in case old instance
268 | 	// continues to update the Persister.
269 | 	// but copy old persister's content so that we always
270 | 	// pass Make() the last persisted state.
271 | 	if cfg.saved[i] != nil {
272 | 		cfg.saved[i] = cfg.saved[i].Copy()
273 | 	}
274 | 
275 | 	kv := cfg.kvservers[i]
276 | 	if kv != nil {
277 | 		cfg.mu.Unlock()
278 | 		kv.Kill()
279 | 		cfg.mu.Lock()
280 | 		cfg.kvservers[i] = nil
281 | 	}
282 | }
283 | 
284 | //
285 | // ff restart servers, first call ShutdownServer
286 | //
287 | func (cfg *cmdConfig) StartServer(i int) {
288 | 	cfg.mu.Lock()
289 | 
290 | 	// a fresh set of outgoing ClientEnd names.
291 | 	cfg.endnames[i] = make([]string, cfg.n)
292 | 	for j := 0; j < cfg.n; j++ {
293 | 		cfg.endnames[i][j] = randstring(20)
294 | 	}
295 | 
296 | 	// a fresh set of ClientEnds.
297 | 	ends := make([]*labrpc.ClientEnd, cfg.n)
298 | 	for j := 0; j < cfg.n; j++ {
299 | 		ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j])
300 | 		cfg.net.Connect(cfg.endnames[i][j], j)
301 | 	}
302 | 
303 | 	// a fresh persister, so old instance doesn't overwrite
304 | 	// new instance's persisted state.
305 | 	// give the fresh persister a copy of the old persister's
306 | 	// state, so that the spec is that we pass StartKVServer()
307 | 	// the last persisted state.
308 | 	if cfg.saved[i] != nil {
309 | 		cfg.saved[i] = cfg.saved[i].Copy()
310 | 	} else {
311 | 		cfg.saved[i] = raft.MakePersister()
312 | 	}
313 | 	cfg.mu.Unlock()
314 | 
315 | 	cfg.kvservers[i] = StartKVServer(ends, i, cfg.saved[i], cfg.maxraftstate)
316 | 
317 | 	kvsvc := labrpc.MakeService(cfg.kvservers[i])
318 | 	rfsvc := labrpc.MakeService(cfg.kvservers[i].rf)
319 | 	srv := labrpc.MakeServer()
320 | 	srv.AddService(kvsvc)
321 | 	srv.AddService(rfsvc)
322 | 	cfg.net.AddServer(i, srv)
323 | }
324 | 
325 | //
326 | // if leader?
327 | //
328 | func (cfg *cmdConfig) Leader() (bool, int) {
329 | 	cfg.mu.Lock()
330 | 	defer cfg.mu.Unlock()
331 | 
332 | 	for i := 0; i < cfg.n; i++ {
333 | 		_, isLeader := cfg.kvservers[i].rf.GetState()
334 | 		if isLeader {
335 | 			return true, i
336 | 		}
337 | 	}
338 | 	return false, 0
339 | }
340 | 
341 | //
342 | // partition servers into 2 groups and put current leader in minority
343 | //
344 | func (cfg *cmdConfig) makePartition() ([]int, []int) {
345 | 	_, l := cfg.Leader()
346 | 	p1 := make([]int, cfg.n/2+1)
347 | 	p2 := make([]int, cfg.n/2)
348 | 	j := 0
349 | 	for i := 0; i < cfg.n; i++ {
350 | 		if i != l {
351 | 			if j < len(p1) {
352 | 				p1[j] = i
353 | 			} else {
354 | 				p2[j-len(p1)] = i
355 | 			}
356 | 			j++
357 | 		}
358 | 	}
359 | 	p2[len(p2)-1] = l
360 | 	return p1, p2
361 | }
362 | 
363 | var cmdNcpuOnce sync.Once
364 | 
365 | //
366 | // make cmdConfig
367 | //
368 | func makeCmdConfig(n int, maxraftstate int) *cmdConfig {
369 | 	cmdNcpuOnce.Do(func() {
370 | 		if runtime.NumCPU() < 2 {
371 | 			fmt.Printf("warning: only one CPU, which may conceal locking bugs\n")
372 | 		}
373 | 	})
374 | 	runtime.GOMAXPROCS(4)
375 | 	cfg := &cmdConfig{}
376 | 	cfg.net = labrpc.MakeNetwork()
377 | 	cfg.n = n
378 | 	cfg.kvservers = make([]*RaftKV, cfg.n)
379 | 	cfg.saved = make([]*raft.Persister, cfg.n)
380 | 	cfg.endnames = make([][]string, cfg.n)
381 | 	cfg.clerks = make(map[*Clerk][]string)
382 | 	cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid
383 | 	cfg.maxraftstate = maxraftstate
384 | 
385 | 	// create a full set of KV servers.
386 | 	for i := 0; i < cfg.n; i++ {
387 | 		cfg.StartServer(i)
388 | 	}
389 | 
390 | 	cfg.ConnectAll()
391 | 
392 | 	return cfg
393 | }
394 | 


--------------------------------------------------------------------------------
/src/shardmaster/test_test.go:
--------------------------------------------------------------------------------
  1 | package shardmaster
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"sync"
  6 | 	"testing"
  7 | )
  8 | 
  9 | // General checking
 10 | func check(t *testing.T, groups []int, ck *Clerk) {
 11 | 	c := ck.Query(-1)
 12 | 	if len(c.Groups) != len(groups) {
 13 | 		t.Fatalf("wanted %v groups, got %v", len(groups), len(c.Groups))
 14 | 	}
 15 | 
 16 | 	// are the groups as expected?
 17 | 	for _, g := range groups {
 18 | 		_, ok := c.Groups[g]
 19 | 		if ok != true {
 20 | 			t.Fatalf("missing group %v", g)
 21 | 		}
 22 | 	}
 23 | 
 24 | 	// any un-allocated shards?
 25 | 	if len(groups) > 0 {
 26 | 		for s, g := range c.Shards {
 27 | 			_, ok := c.Groups[g]
 28 | 			if ok == false {
 29 | 				t.Fatalf("shard %v -> invalid group %v", s, g)
 30 | 			}
 31 | 		}
 32 | 	}
 33 | 
 34 | 	// more or less balanced sharding?
 35 | 	counts := map[int]int{}
 36 | 	for _, g := range c.Shards {
 37 | 		counts[g] += 1
 38 | 	}
 39 | 	min := 257
 40 | 	max := 0
 41 | 	for g := range c.Groups {
 42 | 		if counts[g] > max {
 43 | 			max = counts[g]
 44 | 		}
 45 | 		if counts[g] < min {
 46 | 			min = counts[g]
 47 | 		}
 48 | 	}
 49 | 	if max > min+1 {
 50 | 		t.Fatalf("max %v too much larger than min %v", max, min)
 51 | 	}
 52 | }
 53 | 
 54 | // Check if same config
 55 | func checkSameConfig(t *testing.T, c1 Config, c2 Config) {
 56 | 	if c1.Num != c2.Num {
 57 | 		t.Fatalf("Num wrong")
 58 | 	}
 59 | 	if c1.Shards != c2.Shards {
 60 | 		t.Fatalf("Shards wrong")
 61 | 	}
 62 | 	if len(c1.Groups) != len(c2.Groups) {
 63 | 		t.Fatalf("number of Groups is wrong")
 64 | 	}
 65 | 	for gid, sa := range c1.Groups {
 66 | 		sa1, ok := c2.Groups[gid]
 67 | 		if ok == false || len(sa1) != len(sa) {
 68 | 			t.Fatalf("len(Groups) wrong")
 69 | 		}
 70 | 		if ok && len(sa1) == len(sa) {
 71 | 			for j := 0; j < len(sa); j++ {
 72 | 				if sa[j] != sa1[j] {
 73 | 					t.Fatalf("Groups wrong")
 74 | 				}
 75 | 			}
 76 | 		}
 77 | 	}
 78 | }
 79 | 
 80 | // Test Basic
 81 | func TestBasic(t *testing.T) {
 82 | 	const nservers = 3
 83 | 	cfg := makeConfig(t, nservers, false)
 84 | 	defer cfg.cleanup()
 85 | 
 86 | 	ck := cfg.makeClient(cfg.All())
 87 | 
 88 | 	fmt.Printf("Test: Basic leave/join ...\n")
 89 | 
 90 | 	cfa := make([]Config, 6)
 91 | 	cfa[0] = ck.Query(-1)
 92 | 
 93 | 	check(t, []int{}, ck)
 94 | 
 95 | 	var gid1 int = 1
 96 | 	ck.Join(map[int][]string{gid1: []string{"x", "y", "z"}})
 97 | 	check(t, []int{gid1}, ck)
 98 | 	cfa[1] = ck.Query(-1)
 99 | 
100 | 	var gid2 int = 2
101 | 	ck.Join(map[int][]string{gid2: []string{"a", "b", "c"}})
102 | 	check(t, []int{gid1, gid2}, ck)
103 | 	cfa[2] = ck.Query(-1)
104 | 
105 | 	ck.Join(map[int][]string{gid2: []string{"a", "b", "c"}})
106 | 	check(t, []int{gid1, gid2}, ck)
107 | 	cfa[3] = ck.Query(-1)
108 | 
109 | 	cfx := ck.Query(-1)
110 | 	sa1 := cfx.Groups[gid1]
111 | 	if len(sa1) != 3 || sa1[0] != "x" || sa1[1] != "y" || sa1[2] != "z" {
112 | 		t.Fatalf("wrong servers for gid %v: %v\n", gid1, sa1)
113 | 	}
114 | 	sa2 := cfx.Groups[gid2]
115 | 	if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" {
116 | 		t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2)
117 | 	}
118 | 
119 | 	ck.Leave([]int{gid1})
120 | 	check(t, []int{gid2}, ck)
121 | 	cfa[4] = ck.Query(-1)
122 | 
123 | 	ck.Leave([]int{gid1})
124 | 	check(t, []int{gid2}, ck)
125 | 	cfa[5] = ck.Query(-1)
126 | 
127 | 	fmt.Printf("  ... Passed\n")
128 | 
129 | 	fmt.Printf("Test: Historical queries ...\n")
130 | 
131 | 	for s := 0; s < nservers; s++ {
132 | 		cfg.ShutdownServer(s)
133 | 		for i := 0; i < len(cfa); i++ {
134 | 			c := ck.Query(cfa[i].Num)
135 | 			checkSameConfig(t, c, cfa[i])
136 | 		}
137 | 		cfg.StartServer(s)
138 | 		cfg.ConnectAll()
139 | 	}
140 | 
141 | 	fmt.Printf("  ... Passed\n")
142 | 
143 | 	fmt.Printf("Test: Move ...\n")
144 | 	{
145 | 		var gid3 int = 503
146 | 		ck.Join(map[int][]string{gid3: []string{"3a", "3b", "3c"}})
147 | 		var gid4 int = 504
148 | 		ck.Join(map[int][]string{gid4: []string{"4a", "4b", "4c"}})
149 | 		for i := 0; i < NShards; i++ {
150 | 			cf := ck.Query(-1)
151 | 			if i < NShards/2 {
152 | 				ck.Move(i, gid3)
153 | 				if cf.Shards[i] != gid3 {
154 | 					cf1 := ck.Query(-1)
155 | 					if cf1.Num <= cf.Num {
156 | 						t.Fatalf("Move should increase Config.Num")
157 | 					}
158 | 				}
159 | 			} else {
160 | 				ck.Move(i, gid4)
161 | 				if cf.Shards[i] != gid4 {
162 | 					cf1 := ck.Query(-1)
163 | 					if cf1.Num <= cf.Num {
164 | 						t.Fatalf("Move should increase Config.Num")
165 | 					}
166 | 				}
167 | 			}
168 | 		}
169 | 		cf2 := ck.Query(-1)
170 | 		for i := 0; i < NShards; i++ {
171 | 			if i < NShards/2 {
172 | 				if cf2.Shards[i] != gid3 {
173 | 					t.Fatalf("expected shard %v on gid %v actually %v",
174 | 						i, gid3, cf2.Shards[i])
175 | 				}
176 | 			} else {
177 | 				if cf2.Shards[i] != gid4 {
178 | 					t.Fatalf("expected shard %v on gid %v actually %v",
179 | 						i, gid4, cf2.Shards[i])
180 | 				}
181 | 			}
182 | 		}
183 | 		ck.Leave([]int{gid3})
184 | 		ck.Leave([]int{gid4})
185 | 	}
186 | 	fmt.Printf("  ... Passed\n")
187 | 
188 | 	fmt.Printf("Test: Concurrent leave/join ...\n")
189 | 
190 | 	const npara = 10
191 | 	var cka [npara]*Clerk
192 | 	for i := 0; i < len(cka); i++ {
193 | 		cka[i] = cfg.makeClient(cfg.All())
194 | 	}
195 | 	gids := make([]int, npara)
196 | 	ch := make(chan bool)
197 | 	for xi := 0; xi < npara; xi++ {
198 | 		gids[xi] = int(xi + 1)
199 | 		go func(i int) {
200 | 			defer func() { ch <- true }()
201 | 			var gid int = gids[i]
202 | 			cka[i].Join(map[int][]string{gid + 1000: []string{"a", "b", "c"}})
203 | 			cka[i].Join(map[int][]string{gid: []string{"a", "b", "c"}})
204 | 			cka[i].Leave([]int{gid + 1000})
205 | 		}(xi)
206 | 	}
207 | 	for i := 0; i < npara; i++ {
208 | 		<-ch
209 | 	}
210 | 	check(t, gids, ck)
211 | 
212 | 	fmt.Printf("  ... Passed\n")
213 | 
214 | 	fmt.Printf("Test: Minimal transfers after joins ...\n")
215 | 
216 | 	c1 := ck.Query(-1)
217 | 	for i := 0; i < 5; i++ {
218 | 		ck.Join(map[int][]string{int(npara + 1 + i): []string{"a", "b", "c"}})
219 | 	}
220 | 	c2 := ck.Query(-1)
221 | 	for i := int(1); i <= npara; i++ {
222 | 		for j := 0; j < len(c1.Shards); j++ {
223 | 			if c2.Shards[j] == i {
224 | 				if c1.Shards[j] != i {
225 | 					t.Fatalf("non-minimal transfer after Join()s")
226 | 				}
227 | 			}
228 | 		}
229 | 	}
230 | 
231 | 	fmt.Printf("  ... Passed\n")
232 | 
233 | 	fmt.Printf("Test: Minimal transfers after leaves ...\n")
234 | 
235 | 	for i := 0; i < 5; i++ {
236 | 		ck.Leave([]int{int(npara + 1 + i)})
237 | 	}
238 | 	c3 := ck.Query(-1)
239 | 	for i := int(1); i <= npara; i++ {
240 | 		for j := 0; j < len(c1.Shards); j++ {
241 | 			if c2.Shards[j] == i {
242 | 				if c3.Shards[j] != i {
243 | 					t.Fatalf("non-minimal transfer after Leave()s")
244 | 				}
245 | 			}
246 | 		}
247 | 	}
248 | 
249 | 	fmt.Printf("  ... Passed\n")
250 | }
251 | 
252 | // Test multiple servers
253 | func TestMulti(t *testing.T) {
254 | 	const nservers = 3
255 | 	cfg := makeConfig(t, nservers, false)
256 | 	defer cfg.cleanup()
257 | 
258 | 	ck := cfg.makeClient(cfg.All())
259 | 
260 | 	fmt.Printf("Test: Multi-group join/leave ...\n")
261 | 
262 | 	cfa := make([]Config, 6)
263 | 	cfa[0] = ck.Query(-1)
264 | 
265 | 	check(t, []int{}, ck)
266 | 
267 | 	var gid1 int = 1
268 | 	var gid2 int = 2
269 | 	ck.Join(map[int][]string{
270 | 		gid1: []string{"x", "y", "z"},
271 | 		gid2: []string{"a", "b", "c"},
272 | 	})
273 | 	check(t, []int{gid1, gid2}, ck)
274 | 	cfa[1] = ck.Query(-1)
275 | 
276 | 	var gid3 int = 3
277 | 	ck.Join(map[int][]string{gid3: []string{"j", "k", "l"}})
278 | 	check(t, []int{gid1, gid2, gid3}, ck)
279 | 	cfa[2] = ck.Query(-1)
280 | 
281 | 	ck.Join(map[int][]string{gid2: []string{"a", "b", "c"}})
282 | 	check(t, []int{gid1, gid2, gid3}, ck)
283 | 	cfa[3] = ck.Query(-1)
284 | 
285 | 	cfx := ck.Query(-1)
286 | 	sa1 := cfx.Groups[gid1]
287 | 	if len(sa1) != 3 || sa1[0] != "x" || sa1[1] != "y" || sa1[2] != "z" {
288 | 		t.Fatalf("wrong servers for gid %v: %v\n", gid1, sa1)
289 | 	}
290 | 	sa2 := cfx.Groups[gid2]
291 | 	if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" {
292 | 		t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2)
293 | 	}
294 | 	sa3 := cfx.Groups[gid3]
295 | 	if len(sa3) != 3 || sa3[0] != "j" || sa3[1] != "k" || sa3[2] != "l" {
296 | 		t.Fatalf("wrong servers for gid %v: %v\n", gid3, sa3)
297 | 	}
298 | 
299 | 	ck.Leave([]int{gid1, gid3})
300 | 	check(t, []int{gid2}, ck)
301 | 	cfa[4] = ck.Query(-1)
302 | 
303 | 	cfx = ck.Query(-1)
304 | 	sa2 = cfx.Groups[gid2]
305 | 	if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" {
306 | 		t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2)
307 | 	}
308 | 
309 | 	fmt.Printf("  ... Passed\n")
310 | 
311 | 	fmt.Printf("Test: Concurrent multi leave/join ...\n")
312 | 
313 | 	const npara = 10
314 | 	var cka [npara]*Clerk
315 | 	for i := 0; i < len(cka); i++ {
316 | 		cka[i] = cfg.makeClient(cfg.All())
317 | 	}
318 | 	gids := make([]int, npara)
319 | 	var wg sync.WaitGroup
320 | 	for xi := 0; xi < npara; xi++ {
321 | 		wg.Add(1)
322 | 		gids[xi] = int(xi + 1)
323 | 		go func(i int) {
324 | 			defer wg.Done()
325 | 			var gid int = gids[i]
326 | 			cka[i].Join(map[int][]string{
327 | 				gid:        []string{"a", "b", "c"},
328 | 				gid + 1000: []string{"a", "b", "c"},
329 | 				gid + 2000: []string{"a", "b", "c"},
330 | 			})
331 | 			cka[i].Leave([]int{gid + 1000, gid + 2000})
332 | 		}(xi)
333 | 	}
334 | 	wg.Wait()
335 | 	check(t, gids, ck)
336 | 
337 | 	fmt.Printf("  ... Passed\n")
338 | 
339 | 	fmt.Printf("Test: Minimal transfers after multijoins ...\n")
340 | 
341 | 	c1 := ck.Query(-1)
342 | 	m := make(map[int][]string)
343 | 	for i := 0; i < 5; i++ {
344 | 		m[npara+1+i] = []string{"a", "b", "c"}
345 | 	}
346 | 	ck.Join(m)
347 | 	c2 := ck.Query(-1)
348 | 	for i := int(1); i <= npara; i++ {
349 | 		for j := 0; j < len(c1.Shards); j++ {
350 | 			if c2.Shards[j] == i {
351 | 				if c1.Shards[j] != i {
352 | 					t.Fatalf("non-minimal transfer after Join()s")
353 | 				}
354 | 			}
355 | 		}
356 | 	}
357 | 
358 | 	fmt.Printf("  ... Passed\n")
359 | 
360 | 	fmt.Printf("Test: Minimal transfers after multileaves ...\n")
361 | 
362 | 	var l []int
363 | 	for i := 0; i < 5; i++ {
364 | 		l = append(l, npara+1+i)
365 | 	}
366 | 	ck.Leave(l)
367 | 	c3 := ck.Query(-1)
368 | 	for i := int(1); i <= npara; i++ {
369 | 		for j := 0; j < len(c1.Shards); j++ {
370 | 			if c2.Shards[j] == i {
371 | 				if c3.Shards[j] != i {
372 | 					t.Fatalf("non-minimal transfer after Leave()s")
373 | 				}
374 | 			}
375 | 		}
376 | 	}
377 | 
378 | 	fmt.Printf("  ... Passed\n")
379 | }
380 | 


--------------------------------------------------------------------------------
/src/shardkv/config.go:
--------------------------------------------------------------------------------
  1 | package shardkv
  2 | 
  3 | import "shardmaster"
  4 | import "labrpc"
  5 | import "testing"
  6 | import "os"
  7 | 
  8 | // import "log"
  9 | import crand "crypto/rand"
 10 | import "math/rand"
 11 | import "encoding/base64"
 12 | import "sync"
 13 | import "runtime"
 14 | import "raft"
 15 | import "strconv"
 16 | import "fmt"
 17 | 
 18 | func randstring(n int) string {
 19 | 	b := make([]byte, 2*n)
 20 | 	crand.Read(b)
 21 | 	s := base64.URLEncoding.EncodeToString(b)
 22 | 	return s[0:n]
 23 | }
 24 | 
 25 | // Randomize server handles
 26 | func randomHandles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd {
 27 | 	sa := make([]*labrpc.ClientEnd, len(kvh))
 28 | 	copy(sa, kvh)
 29 | 	for i := range sa {
 30 | 		j := rand.Intn(i + 1)
 31 | 		sa[i], sa[j] = sa[j], sa[i]
 32 | 	}
 33 | 	return sa
 34 | }
 35 | 
 36 | type group struct {
 37 | 	gid       int
 38 | 	servers   []*ShardKV
 39 | 	saved     []*raft.Persister
 40 | 	endnames  [][]string
 41 | 	mendnames [][]string
 42 | }
 43 | 
 44 | type config struct {
 45 | 	mu  sync.Mutex
 46 | 	t   *testing.T
 47 | 	net *labrpc.Network
 48 | 
 49 | 	nmasters      int
 50 | 	masterservers []*shardmaster.ShardMaster
 51 | 	mck           *shardmaster.Clerk
 52 | 
 53 | 	ngroups int
 54 | 	n       int // servers per k/v group
 55 | 	groups  []*group
 56 | 
 57 | 	clerks       map[*Clerk][]string
 58 | 	nextClientId int
 59 | 	maxraftstate int
 60 | }
 61 | 
 62 | func (cfg *config) cleanup() {
 63 | 	for gi := 0; gi < cfg.ngroups; gi++ {
 64 | 		cfg.ShutdownGroup(gi)
 65 | 	}
 66 | }
 67 | 
 68 | // check that no server's log is too big.
 69 | func (cfg *config) checklogs() {
 70 | 	for gi := 0; gi < cfg.ngroups; gi++ {
 71 | 		for i := 0; i < cfg.n; i++ {
 72 | 			raft := cfg.groups[gi].saved[i].RaftStateSize()
 73 | 			snap := len(cfg.groups[gi].saved[i].ReadSnapshot())
 74 | 			if cfg.maxraftstate >= 0 && raft > 2*cfg.maxraftstate {
 75 | 				cfg.t.Fatalf("persister.RaftStateSize() %v, but maxraftstate %v",
 76 | 					raft, cfg.maxraftstate)
 77 | 			}
 78 | 			if cfg.maxraftstate < 0 && snap > 0 {
 79 | 				cfg.t.Fatalf("maxraftstate is -1, but snapshot is non-empty!")
 80 | 			}
 81 | 		}
 82 | 	}
 83 | }
 84 | 
 85 | // master server name for labrpc.
 86 | func (cfg *config) mastername(i int) string {
 87 | 	return "master" + strconv.Itoa(i)
 88 | }
 89 | 
 90 | // shard server name for labrpc.
 91 | // i'th server of group gid.
 92 | func (cfg *config) servername(gid int, i int) string {
 93 | 	return "server-" + strconv.Itoa(gid) + "-" + strconv.Itoa(i)
 94 | }
 95 | 
 96 | func (cfg *config) makeClient() *Clerk {
 97 | 	cfg.mu.Lock()
 98 | 	defer cfg.mu.Unlock()
 99 | 
100 | 	// ClientEnds to talk to master service.
101 | 	ends := make([]*labrpc.ClientEnd, cfg.nmasters)
102 | 	endnames := make([]string, cfg.n)
103 | 	for j := 0; j < cfg.nmasters; j++ {
104 | 		endnames[j] = randstring(20)
105 | 		ends[j] = cfg.net.MakeEnd(endnames[j])
106 | 		cfg.net.Connect(endnames[j], cfg.mastername(j))
107 | 		cfg.net.Enable(endnames[j], true)
108 | 	}
109 | 
110 | 	ck := MakeClerk(ends, func(servername string) *labrpc.ClientEnd {
111 | 		name := randstring(20)
112 | 		end := cfg.net.MakeEnd(name)
113 | 		cfg.net.Connect(name, servername)
114 | 		cfg.net.Enable(name, true)
115 | 		return end
116 | 	})
117 | 	cfg.clerks[ck] = endnames
118 | 	cfg.nextClientId++
119 | 	return ck
120 | }
121 | 
122 | func (cfg *config) deleteClient(ck *Clerk) {
123 | 	cfg.mu.Lock()
124 | 	defer cfg.mu.Unlock()
125 | 
126 | 	v := cfg.clerks[ck]
127 | 	for i := 0; i < len(v); i++ {
128 | 		os.Remove(v[i])
129 | 	}
130 | 	delete(cfg.clerks, ck)
131 | }
132 | 
133 | // Shutdown i'th server of gi'th group, by isolating it
134 | func (cfg *config) ShutdownServer(gi int, i int) {
135 | 	cfg.mu.Lock()
136 | 	defer cfg.mu.Unlock()
137 | 
138 | 	gg := cfg.groups[gi]
139 | 
140 | 	// prevent this server from sending
141 | 	for j := 0; j < len(gg.servers); j++ {
142 | 		name := gg.endnames[i][j]
143 | 		cfg.net.Enable(name, false)
144 | 	}
145 | 	for j := 0; j < len(gg.mendnames[i]); j++ {
146 | 		name := gg.mendnames[i][j]
147 | 		cfg.net.Enable(name, false)
148 | 	}
149 | 
150 | 	// disable client connections to the server.
151 | 	// it's important to do this before creating
152 | 	// the new Persister in saved[i], to avoid
153 | 	// the possibility of the server returning a
154 | 	// positive reply to an Append but persisting
155 | 	// the result in the superseded Persister.
156 | 	cfg.net.DeleteServer(cfg.servername(gg.gid, i))
157 | 
158 | 	// a fresh persister, in case old instance
159 | 	// continues to update the Persister.
160 | 	// but copy old persister's content so that we always
161 | 	// pass Make() the last persisted state.
162 | 	if gg.saved[i] != nil {
163 | 		gg.saved[i] = gg.saved[i].Copy()
164 | 	}
165 | 
166 | 	kv := gg.servers[i]
167 | 	if kv != nil {
168 | 		cfg.mu.Unlock()
169 | 		kv.Kill()
170 | 		cfg.mu.Lock()
171 | 		gg.servers[i] = nil
172 | 	}
173 | }
174 | 
175 | func (cfg *config) ShutdownGroup(gi int) {
176 | 	for i := 0; i < cfg.n; i++ {
177 | 		cfg.ShutdownServer(gi, i)
178 | 	}
179 | }
180 | 
181 | // start i'th server in gi'th group
182 | func (cfg *config) StartServer(gi int, i int) {
183 | 	cfg.mu.Lock()
184 | 
185 | 	gg := cfg.groups[gi]
186 | 
187 | 	// a fresh set of outgoing ClientEnd names
188 | 	// to talk to other servers in this group.
189 | 	gg.endnames[i] = make([]string, cfg.n)
190 | 	for j := 0; j < cfg.n; j++ {
191 | 		gg.endnames[i][j] = randstring(20)
192 | 	}
193 | 
194 | 	// and the connections to other servers in this group.
195 | 	ends := make([]*labrpc.ClientEnd, cfg.n)
196 | 	for j := 0; j < cfg.n; j++ {
197 | 		ends[j] = cfg.net.MakeEnd(gg.endnames[i][j])
198 | 		cfg.net.Connect(gg.endnames[i][j], cfg.servername(gg.gid, j))
199 | 		cfg.net.Enable(gg.endnames[i][j], true)
200 | 	}
201 | 
202 | 	// ends to talk to shardmaster service
203 | 	mends := make([]*labrpc.ClientEnd, cfg.nmasters)
204 | 	gg.mendnames[i] = make([]string, cfg.nmasters)
205 | 	for j := 0; j < cfg.nmasters; j++ {
206 | 		gg.mendnames[i][j] = randstring(20)
207 | 		mends[j] = cfg.net.MakeEnd(gg.mendnames[i][j])
208 | 		cfg.net.Connect(gg.mendnames[i][j], cfg.mastername(j))
209 | 		cfg.net.Enable(gg.mendnames[i][j], true)
210 | 	}
211 | 
212 | 	// a fresh persister, so old instance doesn't overwrite
213 | 	// new instance's persisted state.
214 | 	// give the fresh persister a copy of the old persister's
215 | 	// state, so that the spec is that we pass StartKVServer()
216 | 	// the last persisted state.
217 | 	if gg.saved[i] != nil {
218 | 		gg.saved[i] = gg.saved[i].Copy()
219 | 	} else {
220 | 		gg.saved[i] = raft.MakePersister()
221 | 	}
222 | 	cfg.mu.Unlock()
223 | 
224 | 	gg.servers[i] = StartServer(ends, i, gg.saved[i], cfg.maxraftstate,
225 | 		gg.gid, mends,
226 | 		func(servername string) *labrpc.ClientEnd {
227 | 			name := randstring(20)
228 | 			end := cfg.net.MakeEnd(name)
229 | 			cfg.net.Connect(name, servername)
230 | 			cfg.net.Enable(name, true)
231 | 			return end
232 | 		})
233 | 
234 | 	kvsvc := labrpc.MakeService(gg.servers[i])
235 | 	rfsvc := labrpc.MakeService(gg.servers[i].rf)
236 | 	srv := labrpc.MakeServer()
237 | 	srv.AddService(kvsvc)
238 | 	srv.AddService(rfsvc)
239 | 	cfg.net.AddServer(cfg.servername(gg.gid, i), srv)
240 | }
241 | 
242 | func (cfg *config) StartGroup(gi int) {
243 | 	for i := 0; i < cfg.n; i++ {
244 | 		cfg.StartServer(gi, i)
245 | 	}
246 | }
247 | 
248 | func (cfg *config) StartMasterServer(i int) {
249 | 	// ClientEnds to talk to other master replicas.
250 | 	ends := make([]*labrpc.ClientEnd, cfg.nmasters)
251 | 	for j := 0; j < cfg.nmasters; j++ {
252 | 		endname := randstring(20)
253 | 		ends[j] = cfg.net.MakeEnd(endname)
254 | 		cfg.net.Connect(endname, cfg.mastername(j))
255 | 		cfg.net.Enable(endname, true)
256 | 	}
257 | 
258 | 	p := raft.MakePersister()
259 | 
260 | 	cfg.masterservers[i] = shardmaster.StartServer(ends, i, p)
261 | 
262 | 	msvc := labrpc.MakeService(cfg.masterservers[i])
263 | 	rfsvc := labrpc.MakeService(cfg.masterservers[i].Raft())
264 | 	srv := labrpc.MakeServer()
265 | 	srv.AddService(msvc)
266 | 	srv.AddService(rfsvc)
267 | 	cfg.net.AddServer(cfg.mastername(i), srv)
268 | }
269 | 
270 | func (cfg *config) shardclerk() *shardmaster.Clerk {
271 | 	// ClientEnds to talk to master service.
272 | 	ends := make([]*labrpc.ClientEnd, cfg.nmasters)
273 | 	for j := 0; j < cfg.nmasters; j++ {
274 | 		name := randstring(20)
275 | 		ends[j] = cfg.net.MakeEnd(name)
276 | 		cfg.net.Connect(name, cfg.mastername(j))
277 | 		cfg.net.Enable(name, true)
278 | 	}
279 | 
280 | 	return shardmaster.MakeClerk(ends)
281 | }
282 | 
283 | // tell the shardmaster that a group is joining.
284 | func (cfg *config) join(gi int) {
285 | 	cfg.joinm([]int{gi})
286 | }
287 | 
288 | func (cfg *config) joinm(gis []int) {
289 | 	m := make(map[int][]string, len(gis))
290 | 	for _, g := range gis {
291 | 		gid := cfg.groups[g].gid
292 | 		servernames := make([]string, cfg.n)
293 | 		for i := 0; i < cfg.n; i++ {
294 | 			servernames[i] = cfg.servername(gid, i)
295 | 		}
296 | 		m[gid] = servernames
297 | 	}
298 | 	cfg.mck.Join(m)
299 | }
300 | 
301 | // tell the shardmaster that a group is leaving.
302 | func (cfg *config) leave(gi int) {
303 | 	cfg.leavem([]int{gi})
304 | }
305 | 
306 | func (cfg *config) leavem(gis []int) {
307 | 	gids := make([]int, 0, len(gis))
308 | 	for _, g := range gis {
309 | 		gids = append(gids, cfg.groups[g].gid)
310 | 	}
311 | 	cfg.mck.Leave(gids)
312 | }
313 | 
314 | var ncpuOnce sync.Once
315 | 
316 | func makeConfig(t *testing.T, n int, unreliable bool, maxraftstate int) *config {
317 | 	ncpuOnce.Do(func() {
318 | 		if runtime.NumCPU() < 2 {
319 | 			fmt.Printf("warning: only one CPU, which may conceal locking bugs\n")
320 | 		}
321 | 	})
322 | 	runtime.GOMAXPROCS(4)
323 | 	cfg := &config{}
324 | 	cfg.t = t
325 | 	cfg.maxraftstate = maxraftstate
326 | 	cfg.net = labrpc.MakeNetwork()
327 | 
328 | 	// master
329 | 	cfg.nmasters = 3
330 | 	cfg.masterservers = make([]*shardmaster.ShardMaster, cfg.nmasters)
331 | 	for i := 0; i < cfg.nmasters; i++ {
332 | 		cfg.StartMasterServer(i)
333 | 	}
334 | 	cfg.mck = cfg.shardclerk()
335 | 
336 | 	cfg.ngroups = 3
337 | 	cfg.groups = make([]*group, cfg.ngroups)
338 | 	cfg.n = n
339 | 	for gi := 0; gi < cfg.ngroups; gi++ {
340 | 		gg := &group{}
341 | 		cfg.groups[gi] = gg
342 | 		gg.gid = 100 + gi
343 | 		gg.servers = make([]*ShardKV, cfg.n)
344 | 		gg.saved = make([]*raft.Persister, cfg.n)
345 | 		gg.endnames = make([][]string, cfg.n)
346 | 		gg.mendnames = make([][]string, cfg.nmasters)
347 | 		for i := 0; i < cfg.n; i++ {
348 | 			cfg.StartServer(gi, i)
349 | 		}
350 | 	}
351 | 
352 | 	cfg.clerks = make(map[*Clerk][]string)
353 | 	cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid
354 | 
355 | 	cfg.net.Reliable(!unreliable)
356 | 
357 | 	return cfg
358 | }
359 | 


--------------------------------------------------------------------------------
/src/raftkv/config.go:
--------------------------------------------------------------------------------
  1 | package raftkv
  2 | 
  3 | import (
  4 | 	crand "crypto/rand"
  5 | 	"encoding/base64"
  6 | 	"fmt"
  7 | 	"labrpc"
  8 | 	"math/rand"
  9 | 	"os"
 10 | 	"raft"
 11 | 	"runtime"
 12 | 	"sync"
 13 | 	"testing"
 14 | )
 15 | 
 16 | // Generate a random string
 17 | func randstring(n int) string {
 18 | 	b := make([]byte, 2*n)
 19 | 	crand.Read(b)
 20 | 	s := base64.URLEncoding.EncodeToString(b)
 21 | 	return s[0:n]
 22 | }
 23 | 
 24 | // Randomize server handles
 25 | func randomHandles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd {
 26 | 	sa := make([]*labrpc.ClientEnd, len(kvh))
 27 | 	copy(sa, kvh)
 28 | 	for i := range sa {
 29 | 		j := rand.Intn(i + 1)
 30 | 		sa[i], sa[j] = sa[j], sa[i]
 31 | 	}
 32 | 	return sa
 33 | }
 34 | 
 35 | // Config structure
 36 | type config struct {
 37 | 	mu           sync.Mutex
 38 | 	t            *testing.T
 39 | 	tag          string
 40 | 	net          *labrpc.Network
 41 | 	n            int
 42 | 	kvservers    []*RaftKV
 43 | 	saved        []*raft.Persister
 44 | 	endnames     [][]string // names of each server's sending ClientEnds
 45 | 	clerks       map[*Clerk][]string
 46 | 	nextClientId int
 47 | 	maxraftstate int
 48 | }
 49 | 
 50 | // Clean up config
 51 | func (cfg *config) cleanup() {
 52 | 	cfg.mu.Lock()
 53 | 	defer cfg.mu.Unlock()
 54 | 	for i := 0; i < len(cfg.kvservers); i++ {
 55 | 		if cfg.kvservers[i] != nil {
 56 | 			cfg.kvservers[i].Kill()
 57 | 		}
 58 | 	}
 59 | }
 60 | 
 61 | // LogSize() return maximum log size across all servers
 62 | func (cfg *config) LogSize() int {
 63 | 	logsize := 0
 64 | 	for i := 0; i < cfg.n; i++ {
 65 | 		n := cfg.saved[i].RaftStateSize()
 66 | 		if n > logsize {
 67 | 			logsize = n
 68 | 		}
 69 | 	}
 70 | 	return logsize
 71 | }
 72 | 
 73 | // SnapshotSize() return maximum snapshot size across all servers
 74 | func (cfg *config) SnapshotSize() int {
 75 | 	snapshotsize := 0
 76 | 	for i := 0; i < cfg.n; i++ {
 77 | 		n := cfg.saved[i].SnapshotSize()
 78 | 		if n > snapshotsize {
 79 | 			snapshotsize = n
 80 | 		}
 81 | 	}
 82 | 	return snapshotsize
 83 | }
 84 | 
 85 | // Attach server i to servers listed in to
 86 | // caller must hold cfg.mu
 87 | func (cfg *config) connectUnlocked(i int, to []int) {
 88 | 	// log.Printf("connect peer %d to %v\n", i, to)
 89 | 
 90 | 	// outgoing socket files
 91 | 	for j := 0; j < len(to); j++ {
 92 | 		endname := cfg.endnames[i][to[j]]
 93 | 		cfg.net.Enable(endname, true)
 94 | 	}
 95 | 
 96 | 	// incoming socket files
 97 | 	for j := 0; j < len(to); j++ {
 98 | 		endname := cfg.endnames[to[j]][i]
 99 | 		cfg.net.Enable(endname, true)
100 | 	}
101 | }
102 | 
103 | // Connect server
104 | func (cfg *config) connect(i int, to []int) {
105 | 	cfg.mu.Lock()
106 | 	defer cfg.mu.Unlock()
107 | 	cfg.connectUnlocked(i, to)
108 | }
109 | 
110 | // Detach server i from the servers listed in from
111 | // caller must hold cfg.mu
112 | func (cfg *config) disconnectUnlocked(i int, from []int) {
113 | 	// log.Printf("disconnect peer %d from %v\n", i, from)
114 | 
115 | 	// outgoing socket files
116 | 	for j := 0; j < len(from); j++ {
117 | 		if cfg.endnames[i] != nil {
118 | 			endname := cfg.endnames[i][from[j]]
119 | 			cfg.net.Enable(endname, false)
120 | 		}
121 | 	}
122 | 
123 | 	// incoming socket files
124 | 	for j := 0; j < len(from); j++ {
125 | 		if cfg.endnames[j] != nil {
126 | 			endname := cfg.endnames[from[j]][i]
127 | 			cfg.net.Enable(endname, false)
128 | 		}
129 | 	}
130 | }
131 | 
132 | // Disconnect server
133 | func (cfg *config) disconnect(i int, from []int) {
134 | 	cfg.mu.Lock()
135 | 	defer cfg.mu.Unlock()
136 | 	cfg.disconnectUnlocked(i, from)
137 | }
138 | 
139 | // All() return all config servers
140 | func (cfg *config) All() []int {
141 | 	all := make([]int, cfg.n)
142 | 	for i := 0; i < cfg.n; i++ {
143 | 		all[i] = i
144 | 	}
145 | 	return all
146 | }
147 | 
148 | // ConnectAll() connect all servers
149 | func (cfg *config) ConnectAll() {
150 | 	cfg.mu.Lock()
151 | 	defer cfg.mu.Unlock()
152 | 	for i := 0; i < cfg.n; i++ {
153 | 		cfg.connectUnlocked(i, cfg.All())
154 | 	}
155 | }
156 | 
157 | // Set up 2 partitions with connectivity between servers in each  partition.
158 | func (cfg *config) partition(p1 []int, p2 []int) {
159 | 	cfg.mu.Lock()
160 | 	defer cfg.mu.Unlock()
161 | 	// log.Printf("partition servers into: %v %v\n", p1, p2)
162 | 	for i := 0; i < len(p1); i++ {
163 | 		cfg.disconnectUnlocked(p1[i], p2)
164 | 		cfg.connectUnlocked(p1[i], p1)
165 | 	}
166 | 	for i := 0; i < len(p2); i++ {
167 | 		cfg.disconnectUnlocked(p2[i], p1)
168 | 		cfg.connectUnlocked(p2[i], p2)
169 | 	}
170 | }
171 | 
172 | // Create a clerk with clerk specific server names.
173 | // Give it connections to all of the servers, but for
174 | // now enable only connections to servers in to[].
175 | func (cfg *config) makeClient(to []int) *Clerk {
176 | 	cfg.mu.Lock()
177 | 	defer cfg.mu.Unlock()
178 | 
179 | 	// a fresh set of ClientEnds.
180 | 	ends := make([]*labrpc.ClientEnd, cfg.n)
181 | 	endnames := make([]string, cfg.n)
182 | 	for j := 0; j < cfg.n; j++ {
183 | 		endnames[j] = randstring(20)
184 | 		ends[j] = cfg.net.MakeEnd(endnames[j])
185 | 		cfg.net.Connect(endnames[j], j)
186 | 	}
187 | 
188 | 	ck := MakeClerk(randomHandles(ends))
189 | 	cfg.clerks[ck] = endnames
190 | 	cfg.nextClientId++
191 | 	cfg.ConnectClientUnlocked(ck, to)
192 | 	return ck
193 | }
194 | 
195 | // Delete a client
196 | func (cfg *config) deleteClient(ck *Clerk) {
197 | 	cfg.mu.Lock()
198 | 	defer cfg.mu.Unlock()
199 | 
200 | 	v := cfg.clerks[ck]
201 | 	for i := 0; i < len(v); i++ {
202 | 		os.Remove(v[i])
203 | 	}
204 | 	delete(cfg.clerks, ck)
205 | }
206 | 
207 | // ConnectClientUnlocked: caller should hold cfg.mu
208 | func (cfg *config) ConnectClientUnlocked(ck *Clerk, to []int) {
209 | 	// log.Printf("ConnectClient %v to %v\n", ck, to)
210 | 	endnames := cfg.clerks[ck]
211 | 	for j := 0; j < len(to); j++ {
212 | 		s := endnames[to[j]]
213 | 		cfg.net.Enable(s, true)
214 | 	}
215 | }
216 | 
217 | // ConnectClient connect a client to a group of peers
218 | func (cfg *config) ConnectClient(ck *Clerk, to []int) {
219 | 	cfg.mu.Lock()
220 | 	defer cfg.mu.Unlock()
221 | 	cfg.ConnectClientUnlocked(ck, to)
222 | }
223 | 
224 | // DisconnectClientUnlocked: caller should hold cfg.mu
225 | func (cfg *config) DisconnectClientUnlocked(ck *Clerk, from []int) {
226 | 	// log.Printf("DisconnectClient %v from %v\n", ck, from)
227 | 	endnames := cfg.clerks[ck]
228 | 	for j := 0; j < len(from); j++ {
229 | 		s := endnames[from[j]]
230 | 		cfg.net.Enable(s, false)
231 | 	}
232 | }
233 | 
234 | // DisconnectClient
235 | func (cfg *config) DisconnectClient(ck *Clerk, from []int) {
236 | 	cfg.mu.Lock()
237 | 	defer cfg.mu.Unlock()
238 | 	cfg.DisconnectClientUnlocked(ck, from)
239 | }
240 | 
241 | // ShutdownServer by isolating it
242 | func (cfg *config) ShutdownServer(i int) {
243 | 	cfg.mu.Lock()
244 | 	defer cfg.mu.Unlock()
245 | 
246 | 	cfg.disconnectUnlocked(i, cfg.All())
247 | 
248 | 	// disable client connections to the server.
249 | 	// it's important to do this before creating
250 | 	// the new Persister in saved[i], to avoid
251 | 	// the possibility of the server returning a
252 | 	// positive reply to an Append but persisting
253 | 	// the result in the superseded Persister.
254 | 	cfg.net.DeleteServer(i)
255 | 
256 | 	// a fresh persister, in case old instance
257 | 	// continues to update the Persister.
258 | 	// but copy old persister's content so that we always
259 | 	// pass Make() the last persisted state.
260 | 	if cfg.saved[i] != nil {
261 | 		cfg.saved[i] = cfg.saved[i].Copy()
262 | 	}
263 | 
264 | 	kv := cfg.kvservers[i]
265 | 	if kv != nil {
266 | 		cfg.mu.Unlock()
267 | 		kv.Kill()
268 | 		cfg.mu.Lock()
269 | 		cfg.kvservers[i] = nil
270 | 	}
271 | }
272 | 
273 | // StartServer restart servers, first call ShutdownServer
274 | func (cfg *config) StartServer(i int) {
275 | 	cfg.mu.Lock()
276 | 
277 | 	// a fresh set of outgoing ClientEnd names.
278 | 	cfg.endnames[i] = make([]string, cfg.n)
279 | 	for j := 0; j < cfg.n; j++ {
280 | 		cfg.endnames[i][j] = randstring(20)
281 | 	}
282 | 
283 | 	// a fresh set of ClientEnds.
284 | 	ends := make([]*labrpc.ClientEnd, cfg.n)
285 | 	for j := 0; j < cfg.n; j++ {
286 | 		ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j])
287 | 		cfg.net.Connect(cfg.endnames[i][j], j)
288 | 	}
289 | 
290 | 	// a fresh persister, so old instance doesn't overwrite
291 | 	// new instance's persisted state.
292 | 	// give the fresh persister a copy of the old persister's
293 | 	// state, so that the spec is that we pass StartKVServer()
294 | 	// the last persisted state.
295 | 	if cfg.saved[i] != nil {
296 | 		cfg.saved[i] = cfg.saved[i].Copy()
297 | 	} else {
298 | 		cfg.saved[i] = raft.MakePersister()
299 | 	}
300 | 	cfg.mu.Unlock()
301 | 
302 | 	cfg.kvservers[i] = StartKVServer(ends, i, cfg.saved[i], cfg.maxraftstate)
303 | 
304 | 	kvsvc := labrpc.MakeService(cfg.kvservers[i])
305 | 	rfsvc := labrpc.MakeService(cfg.kvservers[i].rf)
306 | 	srv := labrpc.MakeServer()
307 | 	srv.AddService(kvsvc)
308 | 	srv.AddService(rfsvc)
309 | 	cfg.net.AddServer(i, srv)
310 | }
311 | 
312 | // Leader or not
313 | func (cfg *config) Leader() (bool, int) {
314 | 	cfg.mu.Lock()
315 | 	defer cfg.mu.Unlock()
316 | 
317 | 	for i := 0; i < cfg.n; i++ {
318 | 		_, isLeader := cfg.kvservers[i].rf.GetState()
319 | 		if isLeader {
320 | 			return true, i
321 | 		}
322 | 	}
323 | 	return false, 0
324 | }
325 | 
326 | // Partition servers into 2 groups and put current leader in minority
327 | func (cfg *config) makePartition() ([]int, []int) {
328 | 	_, l := cfg.Leader()
329 | 	p1 := make([]int, cfg.n/2+1)
330 | 	p2 := make([]int, cfg.n/2)
331 | 	j := 0
332 | 	for i := 0; i < cfg.n; i++ {
333 | 		if i != l {
334 | 			if j < len(p1) {
335 | 				p1[j] = i
336 | 			} else {
337 | 				p2[j-len(p1)] = i
338 | 			}
339 | 			j++
340 | 		}
341 | 	}
342 | 	p2[len(p2)-1] = l
343 | 	return p1, p2
344 | }
345 | 
346 | // Make a Once object
347 | var ncpuOnce sync.Once
348 | 
349 | // Make configuration
350 | func makeConfig(t *testing.T, tag string, n int, unreliable bool, maxraftstate int) *config {
351 | 	ncpuOnce.Do(func() {
352 | 		if runtime.NumCPU() < 2 {
353 | 			fmt.Printf("warning: only one CPU, which may conceal locking bugs\n")
354 | 		}
355 | 	})
356 | 	runtime.GOMAXPROCS(4)
357 | 	cfg := &config{}
358 | 	cfg.t = t
359 | 	cfg.tag = tag
360 | 	cfg.net = labrpc.MakeNetwork()
361 | 	cfg.n = n
362 | 	cfg.kvservers = make([]*RaftKV, cfg.n)
363 | 	cfg.saved = make([]*raft.Persister, cfg.n)
364 | 	cfg.endnames = make([][]string, cfg.n)
365 | 	cfg.clerks = make(map[*Clerk][]string)
366 | 	cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid
367 | 	cfg.maxraftstate = maxraftstate
368 | 
369 | 	// create a full set of KV servers.
370 | 	for i := 0; i < cfg.n; i++ {
371 | 		cfg.StartServer(i)
372 | 	}
373 | 
374 | 	cfg.ConnectAll()
375 | 
376 | 	cfg.net.Reliable(!unreliable)
377 | 
378 | 	return cfg
379 | }
380 | 


--------------------------------------------------------------------------------
/src/labrpc/test_test.go:
--------------------------------------------------------------------------------
  1 | package labrpc
  2 | 
  3 | import "testing"
  4 | import "strconv"
  5 | import "sync"
  6 | import "runtime"
  7 | import "time"
  8 | import "fmt"
  9 | 
 10 | type JunkArgs struct {
 11 | 	X int
 12 | }
 13 | type JunkReply struct {
 14 | 	X string
 15 | }
 16 | 
 17 | type JunkServer struct {
 18 | 	mu   sync.Mutex
 19 | 	log1 []string
 20 | 	log2 []int
 21 | }
 22 | 
 23 | func (js *JunkServer) Handler1(args string, reply *int) {
 24 | 	js.mu.Lock()
 25 | 	defer js.mu.Unlock()
 26 | 	js.log1 = append(js.log1, args)
 27 | 	*reply, _ = strconv.Atoi(args)
 28 | }
 29 | 
 30 | func (js *JunkServer) Handler2(args int, reply *string) {
 31 | 	js.mu.Lock()
 32 | 	defer js.mu.Unlock()
 33 | 	js.log2 = append(js.log2, args)
 34 | 	*reply = "handler2-" + strconv.Itoa(args)
 35 | }
 36 | 
 37 | func (js *JunkServer) Handler3(args int, reply *int) {
 38 | 	js.mu.Lock()
 39 | 	defer js.mu.Unlock()
 40 | 	time.Sleep(20 * time.Second)
 41 | 	*reply = -args
 42 | }
 43 | 
 44 | // args is a pointer
 45 | func (js *JunkServer) Handler4(args *JunkArgs, reply *JunkReply) {
 46 | 	reply.X = "pointer"
 47 | }
 48 | 
 49 | // args is a not pointer
 50 | func (js *JunkServer) Handler5(args JunkArgs, reply *JunkReply) {
 51 | 	reply.X = "no pointer"
 52 | }
 53 | 
 54 | func TestBasic(t *testing.T) {
 55 | 	runtime.GOMAXPROCS(4)
 56 | 
 57 | 	rn := MakeNetwork()
 58 | 
 59 | 	e := rn.MakeEnd("end1-99")
 60 | 
 61 | 	js := &JunkServer{}
 62 | 	svc := MakeService(js)
 63 | 
 64 | 	rs := MakeServer()
 65 | 	rs.AddService(svc)
 66 | 	rn.AddServer("server99", rs)
 67 | 
 68 | 	rn.Connect("end1-99", "server99")
 69 | 	rn.Enable("end1-99", true)
 70 | 
 71 | 	{
 72 | 		reply := ""
 73 | 		e.Call("JunkServer.Handler2", 111, &reply)
 74 | 		if reply != "handler2-111" {
 75 | 			t.Fatalf("wrong reply from Handler2")
 76 | 		}
 77 | 	}
 78 | 
 79 | 	{
 80 | 		reply := 0
 81 | 		e.Call("JunkServer.Handler1", "9099", &reply)
 82 | 		if reply != 9099 {
 83 | 			t.Fatalf("wrong reply from Handler1")
 84 | 		}
 85 | 	}
 86 | }
 87 | 
 88 | func TestTypes(t *testing.T) {
 89 | 	runtime.GOMAXPROCS(4)
 90 | 
 91 | 	rn := MakeNetwork()
 92 | 
 93 | 	e := rn.MakeEnd("end1-99")
 94 | 
 95 | 	js := &JunkServer{}
 96 | 	svc := MakeService(js)
 97 | 
 98 | 	rs := MakeServer()
 99 | 	rs.AddService(svc)
100 | 	rn.AddServer("server99", rs)
101 | 
102 | 	rn.Connect("end1-99", "server99")
103 | 	rn.Enable("end1-99", true)
104 | 
105 | 	{
106 | 		var args JunkArgs
107 | 		var reply JunkReply
108 | 		// args must match type (pointer or not) of handler.
109 | 		e.Call("JunkServer.Handler4", &args, &reply)
110 | 		if reply.X != "pointer" {
111 | 			t.Fatalf("wrong reply from Handler4")
112 | 		}
113 | 	}
114 | 
115 | 	{
116 | 		var args JunkArgs
117 | 		var reply JunkReply
118 | 		// args must match type (pointer or not) of handler.
119 | 		e.Call("JunkServer.Handler5", args, &reply)
120 | 		if reply.X != "no pointer" {
121 | 			t.Fatalf("wrong reply from Handler5")
122 | 		}
123 | 	}
124 | }
125 | 
126 | //
127 | // does net.Enable(endname, false) really disconnect a client?
128 | //
129 | func TestDisconnect(t *testing.T) {
130 | 	runtime.GOMAXPROCS(4)
131 | 
132 | 	rn := MakeNetwork()
133 | 
134 | 	e := rn.MakeEnd("end1-99")
135 | 
136 | 	js := &JunkServer{}
137 | 	svc := MakeService(js)
138 | 
139 | 	rs := MakeServer()
140 | 	rs.AddService(svc)
141 | 	rn.AddServer("server99", rs)
142 | 
143 | 	rn.Connect("end1-99", "server99")
144 | 
145 | 	{
146 | 		reply := ""
147 | 		e.Call("JunkServer.Handler2", 111, &reply)
148 | 		if reply != "" {
149 | 			t.Fatalf("unexpected reply from Handler2")
150 | 		}
151 | 	}
152 | 
153 | 	rn.Enable("end1-99", true)
154 | 
155 | 	{
156 | 		reply := 0
157 | 		e.Call("JunkServer.Handler1", "9099", &reply)
158 | 		if reply != 9099 {
159 | 			t.Fatalf("wrong reply from Handler1")
160 | 		}
161 | 	}
162 | }
163 | 
164 | //
165 | // test net.GetCount()
166 | //
167 | func TestCounts(t *testing.T) {
168 | 	runtime.GOMAXPROCS(4)
169 | 
170 | 	rn := MakeNetwork()
171 | 
172 | 	e := rn.MakeEnd("end1-99")
173 | 
174 | 	js := &JunkServer{}
175 | 	svc := MakeService(js)
176 | 
177 | 	rs := MakeServer()
178 | 	rs.AddService(svc)
179 | 	rn.AddServer(99, rs)
180 | 
181 | 	rn.Connect("end1-99", 99)
182 | 	rn.Enable("end1-99", true)
183 | 
184 | 	for i := 0; i < 17; i++ {
185 | 		reply := ""
186 | 		e.Call("JunkServer.Handler2", i, &reply)
187 | 		wanted := "handler2-" + strconv.Itoa(i)
188 | 		if reply != wanted {
189 | 			t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted)
190 | 		}
191 | 	}
192 | 
193 | 	n := rn.GetCount(99)
194 | 	if n != 17 {
195 | 		t.Fatalf("wrong GetCount() %v, expected 17\n", n)
196 | 	}
197 | }
198 | 
199 | //
200 | // test RPCs from concurrent ClientEnds
201 | //
202 | func TestConcurrentMany(t *testing.T) {
203 | 	runtime.GOMAXPROCS(4)
204 | 
205 | 	rn := MakeNetwork()
206 | 
207 | 	js := &JunkServer{}
208 | 	svc := MakeService(js)
209 | 
210 | 	rs := MakeServer()
211 | 	rs.AddService(svc)
212 | 	rn.AddServer(1000, rs)
213 | 
214 | 	ch := make(chan int)
215 | 
216 | 	nclients := 20
217 | 	nrpcs := 10
218 | 	for ii := 0; ii < nclients; ii++ {
219 | 		go func(i int) {
220 | 			n := 0
221 | 			defer func() { ch <- n }()
222 | 
223 | 			e := rn.MakeEnd(i)
224 | 			rn.Connect(i, 1000)
225 | 			rn.Enable(i, true)
226 | 
227 | 			for j := 0; j < nrpcs; j++ {
228 | 				arg := i*100 + j
229 | 				reply := ""
230 | 				e.Call("JunkServer.Handler2", arg, &reply)
231 | 				wanted := "handler2-" + strconv.Itoa(arg)
232 | 				if reply != wanted {
233 | 					t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted)
234 | 				}
235 | 				n += 1
236 | 			}
237 | 		}(ii)
238 | 	}
239 | 
240 | 	total := 0
241 | 	for ii := 0; ii < nclients; ii++ {
242 | 		x := <-ch
243 | 		total += x
244 | 	}
245 | 
246 | 	if total != nclients*nrpcs {
247 | 		t.Fatalf("wrong number of RPCs completed, got %v, expected %v", total, nclients*nrpcs)
248 | 	}
249 | 
250 | 	n := rn.GetCount(1000)
251 | 	if n != total {
252 | 		t.Fatalf("wrong GetCount() %v, expected %v\n", n, total)
253 | 	}
254 | }
255 | 
256 | //
257 | // test unreliable
258 | //
259 | func TestUnreliable(t *testing.T) {
260 | 	runtime.GOMAXPROCS(4)
261 | 
262 | 	rn := MakeNetwork()
263 | 	rn.Reliable(false)
264 | 
265 | 	js := &JunkServer{}
266 | 	svc := MakeService(js)
267 | 
268 | 	rs := MakeServer()
269 | 	rs.AddService(svc)
270 | 	rn.AddServer(1000, rs)
271 | 
272 | 	ch := make(chan int)
273 | 
274 | 	nclients := 300
275 | 	for ii := 0; ii < nclients; ii++ {
276 | 		go func(i int) {
277 | 			n := 0
278 | 			defer func() { ch <- n }()
279 | 
280 | 			e := rn.MakeEnd(i)
281 | 			rn.Connect(i, 1000)
282 | 			rn.Enable(i, true)
283 | 
284 | 			arg := i * 100
285 | 			reply := ""
286 | 			ok := e.Call("JunkServer.Handler2", arg, &reply)
287 | 			if ok {
288 | 				wanted := "handler2-" + strconv.Itoa(arg)
289 | 				if reply != wanted {
290 | 					t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted)
291 | 				}
292 | 				n += 1
293 | 			}
294 | 		}(ii)
295 | 	}
296 | 
297 | 	total := 0
298 | 	for ii := 0; ii < nclients; ii++ {
299 | 		x := <-ch
300 | 		total += x
301 | 	}
302 | 
303 | 	if total == nclients || total == 0 {
304 | 		t.Fatalf("all RPCs succeeded despite unreliable")
305 | 	}
306 | }
307 | 
308 | //
309 | // test concurrent RPCs from a single ClientEnd
310 | //
311 | func TestConcurrentOne(t *testing.T) {
312 | 	runtime.GOMAXPROCS(4)
313 | 
314 | 	rn := MakeNetwork()
315 | 
316 | 	js := &JunkServer{}
317 | 	svc := MakeService(js)
318 | 
319 | 	rs := MakeServer()
320 | 	rs.AddService(svc)
321 | 	rn.AddServer(1000, rs)
322 | 
323 | 	e := rn.MakeEnd("c")
324 | 	rn.Connect("c", 1000)
325 | 	rn.Enable("c", true)
326 | 
327 | 	ch := make(chan int)
328 | 
329 | 	nrpcs := 20
330 | 	for ii := 0; ii < nrpcs; ii++ {
331 | 		go func(i int) {
332 | 			n := 0
333 | 			defer func() { ch <- n }()
334 | 
335 | 			arg := 100 + i
336 | 			reply := ""
337 | 			e.Call("JunkServer.Handler2", arg, &reply)
338 | 			wanted := "handler2-" + strconv.Itoa(arg)
339 | 			if reply != wanted {
340 | 				t.Fatalf("wrong reply %v from Handler2, expecting %v", reply, wanted)
341 | 			}
342 | 			n += 1
343 | 		}(ii)
344 | 	}
345 | 
346 | 	total := 0
347 | 	for ii := 0; ii < nrpcs; ii++ {
348 | 		x := <-ch
349 | 		total += x
350 | 	}
351 | 
352 | 	if total != nrpcs {
353 | 		t.Fatalf("wrong number of RPCs completed, got %v, expected %v", total, nrpcs)
354 | 	}
355 | 
356 | 	js.mu.Lock()
357 | 	defer js.mu.Unlock()
358 | 	if len(js.log2) != nrpcs {
359 | 		t.Fatalf("wrong number of RPCs delivered")
360 | 	}
361 | 
362 | 	n := rn.GetCount(1000)
363 | 	if n != total {
364 | 		t.Fatalf("wrong GetCount() %v, expected %v\n", n, total)
365 | 	}
366 | }
367 | 
368 | //
369 | // regression: an RPC that's delayed during Enabled=false
370 | // should not delay subsequent RPCs (e.g. after Enabled=true).
371 | //
372 | func TestRegression1(t *testing.T) {
373 | 	runtime.GOMAXPROCS(4)
374 | 
375 | 	rn := MakeNetwork()
376 | 
377 | 	js := &JunkServer{}
378 | 	svc := MakeService(js)
379 | 
380 | 	rs := MakeServer()
381 | 	rs.AddService(svc)
382 | 	rn.AddServer(1000, rs)
383 | 
384 | 	e := rn.MakeEnd("c")
385 | 	rn.Connect("c", 1000)
386 | 
387 | 	// start some RPCs while the ClientEnd is disabled.
388 | 	// they'll be delayed.
389 | 	rn.Enable("c", false)
390 | 	ch := make(chan bool)
391 | 	nrpcs := 20
392 | 	for ii := 0; ii < nrpcs; ii++ {
393 | 		go func(i int) {
394 | 			ok := false
395 | 			defer func() { ch <- ok }()
396 | 
397 | 			arg := 100 + i
398 | 			reply := ""
399 | 			// this call ought to return false.
400 | 			e.Call("JunkServer.Handler2", arg, &reply)
401 | 			ok = true
402 | 		}(ii)
403 | 	}
404 | 
405 | 	time.Sleep(100 * time.Millisecond)
406 | 
407 | 	// now enable the ClientEnd and check that an RPC completes quickly.
408 | 	t0 := time.Now()
409 | 	rn.Enable("c", true)
410 | 	{
411 | 		arg := 99
412 | 		reply := ""
413 | 		e.Call("JunkServer.Handler2", arg, &reply)
414 | 		wanted := "handler2-" + strconv.Itoa(arg)
415 | 		if reply != wanted {
416 | 			t.Fatalf("wrong reply %v from Handler2, expecting %v", reply, wanted)
417 | 		}
418 | 	}
419 | 	dur := time.Since(t0).Seconds()
420 | 
421 | 	if dur > 0.03 {
422 | 		t.Fatalf("RPC took too long (%v) after Enable", dur)
423 | 	}
424 | 
425 | 	for ii := 0; ii < nrpcs; ii++ {
426 | 		<-ch
427 | 	}
428 | 
429 | 	js.mu.Lock()
430 | 	defer js.mu.Unlock()
431 | 	if len(js.log2) != 1 {
432 | 		t.Fatalf("wrong number (%v) of RPCs delivered, expected 1", len(js.log2))
433 | 	}
434 | 
435 | 	n := rn.GetCount(1000)
436 | 	if n != 1 {
437 | 		t.Fatalf("wrong GetCount() %v, expected %v\n", n, 1)
438 | 	}
439 | }
440 | 
441 | //
442 | // if an RPC is stuck in a server, and the server
443 | // is killed with DeleteServer(), does the RPC
444 | // get un-stuck?
445 | //
446 | func TestKilled(t *testing.T) {
447 | 	runtime.GOMAXPROCS(4)
448 | 
449 | 	rn := MakeNetwork()
450 | 
451 | 	e := rn.MakeEnd("end1-99")
452 | 
453 | 	js := &JunkServer{}
454 | 	svc := MakeService(js)
455 | 
456 | 	rs := MakeServer()
457 | 	rs.AddService(svc)
458 | 	rn.AddServer("server99", rs)
459 | 
460 | 	rn.Connect("end1-99", "server99")
461 | 	rn.Enable("end1-99", true)
462 | 
463 | 	doneCh := make(chan bool)
464 | 	go func() {
465 | 		reply := 0
466 | 		ok := e.Call("JunkServer.Handler3", 99, &reply)
467 | 		doneCh <- ok
468 | 	}()
469 | 
470 | 	time.Sleep(1000 * time.Millisecond)
471 | 
472 | 	select {
473 | 	case <-doneCh:
474 | 		t.Fatalf("Handler3 should not have returned yet")
475 | 	case <-time.After(100 * time.Millisecond):
476 | 	}
477 | 
478 | 	rn.DeleteServer("server99")
479 | 
480 | 	select {
481 | 	case x := <-doneCh:
482 | 		if x != false {
483 | 			t.Fatalf("Handler3 returned successfully despite DeleteServer()")
484 | 		}
485 | 	case <-time.After(100 * time.Millisecond):
486 | 		t.Fatalf("Handler3 should return after DeleteServer()")
487 | 	}
488 | }
489 | 
490 | func TestBenchmark(t *testing.T) {
491 | 	runtime.GOMAXPROCS(4)
492 | 
493 | 	rn := MakeNetwork()
494 | 
495 | 	e := rn.MakeEnd("end1-99")
496 | 
497 | 	js := &JunkServer{}
498 | 	svc := MakeService(js)
499 | 
500 | 	rs := MakeServer()
501 | 	rs.AddService(svc)
502 | 	rn.AddServer("server99", rs)
503 | 
504 | 	rn.Connect("end1-99", "server99")
505 | 	rn.Enable("end1-99", true)
506 | 
507 | 	t0 := time.Now()
508 | 	n := 100000
509 | 	for iters := 0; iters < n; iters++ {
510 | 		reply := ""
511 | 		e.Call("JunkServer.Handler2", 111, &reply)
512 | 		if reply != "handler2-111" {
513 | 			t.Fatalf("wrong reply from Handler2")
514 | 		}
515 | 	}
516 | 	fmt.Printf("%v for %v\n", time.Since(t0), n)
517 | 	// march 2016, rtm laptop, 22 microseconds per RPC
518 | }
519 | 


--------------------------------------------------------------------------------
/PROPOSAL.md:
--------------------------------------------------------------------------------
  1 | # Implementing a distributed key-value store
  2 | 
  3 | 
  4 | ## Introduction
  5 | 
  6 | By [Wikipedia](https://en.wikipedia.org/wiki/Distributed_data_store),
  7 | a distributed key-value store is a computer network where information is
  8 | stored on more than one node, often in a replicated fashion.
  9 | Examples of existing systems are 
 10 | [Apache Cassandra](http://cassandra.apache.org/), 
 11 | [Google's Bigtable](https://cloud.google.com/bigtable/), 
 12 | [Amazon's Dynamo](https://aws.amazon.com/dynamodb/),
 13 | [MongoDB](https://www.mongodb.com/), [etcd](https://coreos.com/etcd/).
 14 | 
 15 | It is used in production by many companies that need to solve big data
 16 | problem. It can also be found in different part of a distributed system acting
 17 | as a configuration control center. More interestingly, there are some
 18 | implementations that take the idea of a distributed key-value store and 
 19 | add more functionalities and features on top of that. One interesting
 20 | implementation is [HashiCorp's Consul](https://www.consul.io/) where 
 21 | they build a distributed, highly available, and data center aware solution 
 22 | to connect and configure applications across dynamic, distributed infrastructure. 
 23 | There is also [Redis](https://redis.io/) that is an in-memory data structure store, 
 24 | used as a database, cache and message broker.
 25 | 
 26 | My goal for this project is to be able to implement a distributed key-value store
 27 | from scratch as well as to gain a better understanding of some specific topics in
 28 | distributed systems, such as: consensus algorithm, distributed hash table,
 29 | RPC, CAP theorem. Another goal is to be familiar building distributed systems 
 30 | using [Go](https://golang.org/) language.
 31 | 
 32 | In many sections below, I will talk about the design/architecture of the
 33 | project, final product, testing. I will also provide a timeline 
 34 | with tasks, approaches and deliverables in a bi-weekly basis.
 35 | 
 36 | 
 37 | ## Design
 38 | 
 39 | There are 3 big components of the system: a key-value storage, consensus
 40 | algorithm and routing/service discovery.
 41 | 
 42 | ### A key-value storage
 43 | 
 44 | The most straightforward way is to use a hash table to store key-value pairs.
 45 | It allows user to read and write in constant time. It's also very easy to use.
 46 | In modern programming languages, hash table data structure is normally built-in
 47 | as a form of map (or dictionary) with supported CRUD (Create, Read, Update,
 48 | Delete) operations.
 49 | 
 50 | However, using a hash table means that I need to store everything in memory, which
 51 | is not great when the data get big. One way to solve it is to store them in
 52 | disk and use a cache system (something like LRU). Frequently visited data is kept in
 53 | memory and the rest is on disk.
 54 | 
 55 | > There might be other caching systems to learn from such as Redis and Memcached.
 56 | 
 57 | ### Consensus algorithm
 58 | 
 59 | A consensus algorithm is critical in a distributed system because it allows a
 60 | collection of machines to work as a coherent group that can survive failures of
 61 | some its members. Paxos is undeniably the most popular algorithm. However, it is also
 62 | known for its complexity. Understanding Paxos is hard. Last term I attempted to
 63 | implement Paxos but it did not turn out very well. There were still a lot of 
 64 | aspects that I was uncertain about. Therefore, I want to try something new this
 65 | term.
 66 | 
 67 | [Raft](https://raft.github.io/) seems like a good fit. 
 68 | It is made to solve Paxos's understandability problem. It has been used by a 
 69 | lot of organizations, such as etcd, HashiCorp's Consul, Docker Swarm,... 
 70 | and continued to gain its popularity.
 71 | 
 72 | > HashiCorp even provides a nice implementation of the Raft and it is
 73 | > imported by [many systems](https://godoc.org/github.com/hashicorp/raft?importers).
 74 | 
 75 | ### Routing/Service Discovery
 76 | 
 77 | The last piece of the system is routing/service discovery. At the moment,
 78 | I am not sure how to do this yet. I know that HashiCorp's Consul achieve this 
 79 | by using a DNS routing mechanism but I am not familiar with its implementation.
 80 | In the [final product section](#final-product), I will give an example of
 81 | a service discovery's behavior that I want.
 82 | 
 83 | > **TODO:** Spend more time looking at HashiCorp's Consul documentation.
 84 | 
 85 | 
 86 | ## Flow
 87 | 
 88 | > There should be some images in this section for the reader to visualize the
 89 | > system easily. It also helps forming the flow of the system. I don't have one
 90 | > yet and I can't think of any at the moment. I will update this as I take a 
 91 | > closer look at Raft as well as other documents.
 92 | 
 93 | After looking at some similar systems, I realize that getting the consensus 
 94 | algorithm right in the first place is the most important job.
 95 | As long as I have all the nodes perform resiliently using the protocol, 
 96 | building a key-value store on top seems much more natural.
 97 | In other word, Raft does most of the heavy lifting for the system.
 98 | 
 99 | ### Step by step
100 | 
101 | - I first start with a seed node as a server.
102 | - I use other node to join the seed node or I can choose to join other node
103 |   that are available in the system if I know its host. However, if I am able to
104 |   implement the service discovery feature, I just need to tell it to `join` 
105 |   without explicitly specifying the host. It automatically know how to route 
106 |   to the right cluster. 
107 | 
108 |   > I think this is how a service discovery should work. However I am not sure.
109 | 
110 | - When there are 3 nodes, leader election will occur. One is the leader, the
111 |   rest are followers.
112 | - Using the client machine, I can read, write, update or delete key-values in any
113 |   of these nodes.
114 | - The key-values should be replicated among themselves, no matter where I put
115 |   them. If I put a key-value pair in the master node then it will eventually
116 |   propagated to other nodes. However, if I put a key-value pair in the
117 |   non-master node, it should redirect the request to the master and do the
118 |   consensus checking there. 
119 | 
120 |   > In this situation, is it better to put a load balancer in front of
121 |   > these non-master nodes?
122 | 
123 | - If I choose to stop a node or multiple nodes, the system must still work.
124 | - If I stop the master, it will start the leader election again if and only if
125 |   the quorum size is big enough. Everything should behave the same way.
126 | 
127 | 
128 | ## Timeline
129 | 
130 | I am using [MIT's 6.824 Distributed System
131 | Spring 2017](http://nil.csail.mit.edu/6.824/2017/) 
132 | as a guideline for my implementation. Their labs provide pointers and 
133 | resources on how to build a fault-tolerant key-value storage, which is exactly
134 | what I want to accomplish for this project. They start with Raft implementation, 
135 | add a distributed key-value service on top of it and eventually exploring 
136 | the idea of sharding.
137 | 
138 | My timeline is also largely dependent on their course's schedule.
139 | 
140 | ### Week 1-2
141 | - Task:
142 |   - Finish first draft of the proposal.
143 | - Approach:
144 |   - Read about similar systems and learn how do they implement it.
145 |   - Come up with a solution myself that fits the scope of the project.
146 | - Deliverables:
147 |   - A reasonable well-written first draft.
148 | 
149 | ### Week 3-4
150 | - Task:
151 |   - Get familiar with Go by going through MapReduce's implementation.
152 |   - Start thinking about Raft implementation and update the proposal along the way.
153 | - Approach:
154 |   - [Lab 1: MapReduce](http://nil.csail.mit.edu/6.824/2017/labs/lab-1.html)
155 | - Deliverables:
156 |   - A working MapReduce library.
157 | 
158 | ### Week 5-6
159 | - Task:
160 |   - Implement a minimum version of Raft.
161 | - Approach:
162 |   - [Lab 2: Raft](http://nil.csail.mit.edu/6.824/2017/labs/lab-raft.html)
163 | - Deliverables:
164 |   - A working version of Raft.
165 | 
166 | ### Week 7-8
167 | - Task:
168 |   - Build a key-value store using Raft library.
169 | - Approach:
170 |   - [Lab 3: Fault-tolerant Key/Value
171 |     Service](http://nil.csail.mit.edu/6.824/2017/labs/lab-kvraft.html)
172 | - Deliverables:
173 |   - A robust key-value store library that pass all lab's tests
174 | 
175 | ### Week 9-10
176 | - Task:
177 |   - Build a simple client's stdin
178 |   - Use Go net/rpc instead of their custom labrpc for network I/O
179 | - Approach:
180 |   - Either modify labrpc or using Go net/rpc
181 | - Deliverables:
182 |   - A workable client's program
183 |   - A workable networking RaftKV program
184 | 
185 | ### Week 11-12
186 | - Task:
187 |   - Implement RESTful APIs to query each server's kv store
188 |   - Build CLI for server and client
189 | - Approach:
190 |   - If the networking work as expected, it's pretty straightforward from here
191 | - Deliverables:
192 |   - A workable networking RaftKV program
193 | 
194 | ### Week 13-14
195 | - Task:
196 |   - Automate build
197 |   - Add Docker and CI
198 | - Approach:
199 |   - See how other projects do it and model after them
200 | - Deliverables:
201 |   - Everything is setup correctly
202 | 
203 | ## Final Product
204 | 
205 | This is how I see it working as the final product.
206 | 
207 | ### CLI
208 | 
209 | ```
210 | NAME:
211 |    hstore - hstore shell
212 | 
213 | USAGE:
214 |    hstore [global options] role [role options] command [command options] [-h <host>] [-k <key>] [-v <value>]
215 | VERSION:
216 |    0.1.0
217 | 
218 | AUTHORS:
219 |    Hoanh An <hoanhan@bennington.edu>
220 | 
221 | COMMANDS:
222 |      start          Start a node
223 |      join           Join a node to another node
224 |      list           List all avaiable nodes
225 |      kill           Kill a node
226 |      stop           Stop a node
227 |      restart        Restart a node 
228 |      read           Read a value for a key 
229 |      write          Write a value to a key
230 |      update         Update a value for a key 
231 |      delete         Delete a key-value pair 
232 |      help, h        Shows a list of commands or help for one command
233 | 
234 | GLOBAL OPTIONS:
235 |    --help, -h                                  show help
236 |    --version, -v                               print the version
237 | ```
238 | 
239 | Here are the lists of example commands: 
240 | 
241 | Commands | Description
242 | -- | --
243 | `hstore server start -h <host>` | Start a seed node with a given host and prompt user into the shell.
244 | `hstore server join [-h <host>]` | Join a node to the cluster and prompt user into the shell.
245 | `hstore server list` | List all available nodes showing their name, address, health status and type.
246 | `hstore server kill -h <host>` | Kill a node with a given host.
247 | `hstore server stop -h <host>` | Stop a node with a given host.
248 | `hstore server restart -h <host>` | Restart a node with a given host.
249 | `hstore client read -h <host>` | Get all the keys and values for a given host.
250 | `hstore client read -h <host> -k <key>` | Read a value for a given key, for a given host.
251 | `hstore client write -h <host> -k <key> -v <value>` | Write a value to a key for a given host.
252 | `hstore client update -h <host> -k <key> -v <value>` | Update a value for a key for a given host.
253 | `hstore client delete -h <host> -k <key>` | Delete a key for a given host.
254 | 
255 | ### APIs
256 | 
257 | List of exposed APIs for each node.
258 | 
259 | Method | Endpoint | Description
260 | -- | -- | --
261 | `GET` | `/read` | Read all keys and values.
262 | `GET` | `/read/<key>` | Read a value for a given key in a node.
263 | `POST` | `/write` | Write a value to a key in a node.
264 | `POST` | `/update` | Update a value for a key in a node.
265 | `GET` | `/delete/<key>` | Delete a key in a node.
266 | 
267 | 
268 | ## Testing
269 | 
270 | > **Idea:** Unit test, integration test, end-to-end test 
271 | 
272 | > Other than unit test, integration test (if needed), end to end test (if
273 | > needed), how to introduce failure injections/exercises for the system,
274 | > exploring its behavior in the face of crashes and network partitioning?
275 | 
276 | > If the system fails in such a way that it can not function properly anymore,
277 |   how would I recover/bring everything back up gracefully?
278 | 
279 | 
280 | ## Report
281 | 
282 | > **Idea:** A dashboard and a write-up paper with discussion.
283 | 
284 | > Gather data and do different types of analysis for the system here
285 | 
286 | 
287 | ## UI
288 | 
289 | > **Idea:** A interactive webpage.
290 | 
291 | > Something like [etcd's playground](http://play.etcd.io/play) is nice to have
292 | > for better visualization.
293 | 
294 | ## Future features
295 | 
296 | - Full text search like Elasticsearch
297 | 


--------------------------------------------------------------------------------
/src/raft/config.go:
--------------------------------------------------------------------------------
  1 | package raft
  2 | 
  3 | //
  4 | // support for Raft tester.
  5 | //
  6 | // we will use the original config.go to test your code for grading.
  7 | // so, while you can modify this code to help you debug, please
  8 | // test with the original before submitting.
  9 | //
 10 | 
 11 | import (
 12 | 	crand "crypto/rand"
 13 | 	"encoding/base64"
 14 | 	"fmt"
 15 | 	"labrpc"
 16 | 	"log"
 17 | 	"runtime"
 18 | 	"sync"
 19 | 	"sync/atomic"
 20 | 	"testing"
 21 | 	"time"
 22 | )
 23 | 
 24 | // Generate random string
 25 | func randstring(n int) string {
 26 | 	b := make([]byte, 2*n)
 27 | 	crand.Read(b)
 28 | 	s := base64.URLEncoding.EncodeToString(b)
 29 | 	return s[0:n]
 30 | }
 31 | 
 32 | // Config structure
 33 | type config struct {
 34 | 	mu        sync.Mutex
 35 | 	t         *testing.T
 36 | 	net       *labrpc.Network
 37 | 	n         int
 38 | 	done      int32 // tell internal threads to die
 39 | 	rafts     []*Raft
 40 | 	applyErr  []string // from apply channel readers
 41 | 	connected []bool   // whether each server is on the net
 42 | 	saved     []*Persister
 43 | 	endnames  [][]string    // list of list string: the port file names each sends to
 44 | 	logs      []map[int]int // list of map[int]int: copy of each server's committed entries
 45 | }
 46 | 
 47 | // Define Once object
 48 | var ncpuOnce sync.Once
 49 | 
 50 | // Make configuration
 51 | func makeConfig(t *testing.T, n int, unreliable bool) *config {
 52 | 	ncpuOnce.Do(func() {
 53 | 		if runtime.NumCPU() < 2 {
 54 | 			fmt.Printf("warning: only one CPU, which may conceal locking bugs\n")
 55 | 		}
 56 | 	})
 57 | 	runtime.GOMAXPROCS(4)
 58 | 	cfg := &config{}
 59 | 	cfg.t = t
 60 | 	cfg.net = labrpc.MakeNetwork() // holds network, clients and servers
 61 | 	cfg.n = n
 62 | 	cfg.applyErr = make([]string, cfg.n)
 63 | 	cfg.rafts = make([]*Raft, cfg.n)
 64 | 	cfg.connected = make([]bool, cfg.n)
 65 | 	cfg.saved = make([]*Persister, cfg.n)
 66 | 	cfg.endnames = make([][]string, cfg.n)
 67 | 	cfg.logs = make([]map[int]int, cfg.n)
 68 | 
 69 | 	cfg.setunreliable(unreliable)
 70 | 
 71 | 	cfg.net.LongDelays(true)
 72 | 
 73 | 	// create a full set of Rafts.
 74 | 	for i := 0; i < cfg.n; i++ {
 75 | 		cfg.logs[i] = map[int]int{}
 76 | 		cfg.start1(i)
 77 | 	}
 78 | 
 79 | 	// connect everyone
 80 | 	for i := 0; i < cfg.n; i++ {
 81 | 		cfg.connect(i)
 82 | 	}
 83 | 
 84 | 	return cfg
 85 | }
 86 | 
 87 | // Shut down a Raft server but save its persistent state.
 88 | func (cfg *config) crash1(i int) {
 89 | 	cfg.disconnect(i)
 90 | 	cfg.net.DeleteServer(i) // disable client connections to the server.
 91 | 
 92 | 	cfg.mu.Lock()
 93 | 	defer cfg.mu.Unlock()
 94 | 
 95 | 	// a fresh persister, in case old instance
 96 | 	// continues to update the Persister.
 97 | 	// but copy old persister's content so that we always
 98 | 	// pass Make() the last persisted state.
 99 | 	if cfg.saved[i] != nil {
100 | 		cfg.saved[i] = cfg.saved[i].Copy()
101 | 	}
102 | 
103 | 	rf := cfg.rafts[i]
104 | 	if rf != nil {
105 | 		cfg.mu.Unlock()
106 | 		rf.Kill()
107 | 		cfg.mu.Lock()
108 | 		cfg.rafts[i] = nil
109 | 	}
110 | 
111 | 	if cfg.saved[i] != nil {
112 | 		raftlog := cfg.saved[i].ReadRaftState()
113 | 		cfg.saved[i] = &Persister{}
114 | 		cfg.saved[i].SaveRaftState(raftlog)
115 | 	}
116 | }
117 | 
118 | // Start or re-start a Raft.
119 | // if one already exists, "kill" it first.
120 | // allocate new outgoing port file names, and a new
121 | // state persister, to isolate previous instance of
122 | // this server. since we cannot really kill it.
123 | func (cfg *config) start1(i int) {
124 | 	cfg.crash1(i)
125 | 
126 | 	// for server i, create a fresh set of outgoing ClientEnd with random names.
127 | 	// so that old crashed instance's ClientEnds can't send.
128 | 	cfg.endnames[i] = make([]string, cfg.n)
129 | 	for j := 0; j < cfg.n; j++ {
130 | 		cfg.endnames[i][j] = randstring(20)
131 | 	}
132 | 
133 | 	// make a fresh set of ClientEnds given these name
134 | 	ends := make([]*labrpc.ClientEnd, cfg.n)
135 | 	for j := 0; j < cfg.n; j++ {
136 | 		// create a client end point to talk to
137 | 		ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j])
138 | 
139 | 		// connect a client to a server j
140 | 		cfg.net.Connect(cfg.endnames[i][j], j)
141 | 	}
142 | 
143 | 	cfg.mu.Lock()
144 | 
145 | 	// a fresh persister, so old instance doesn't overwrite
146 | 	// new instance's persisted state.
147 | 	// but copy old persister's content so that we always
148 | 	// pass Make() the last persisted state.
149 | 	if cfg.saved[i] != nil {
150 | 		cfg.saved[i] = cfg.saved[i].Copy()
151 | 	} else {
152 | 		cfg.saved[i] = MakePersister()
153 | 	}
154 | 
155 | 	cfg.mu.Unlock()
156 | 
157 | 	// listen to messages from Raft indicating newly committed messages.
158 | 	applyCh := make(chan ApplyMsg)
159 | 	go func() {
160 | 		for m := range applyCh {
161 | 			errMsg := ""
162 | 			if m.UseSnapshot {
163 | 				// ignore the snapshot
164 | 			} else if v, ok := (m.Command).(int); ok {
165 | 				cfg.mu.Lock()
166 | 				for j := 0; j < len(cfg.logs); j++ {
167 | 					if old, oldok := cfg.logs[j][m.Index]; oldok && old != v {
168 | 						// some server has already committed a different value for this entry!
169 | 						errMsg = fmt.Sprintf("commit index=%v server=%v %v != server=%v %v",
170 | 							m.Index, i, m.Command, j, old)
171 | 					}
172 | 				}
173 | 				_, prevok := cfg.logs[i][m.Index-1]
174 | 				cfg.logs[i][m.Index] = v
175 | 				cfg.mu.Unlock()
176 | 
177 | 				if m.Index > 1 && prevok == false {
178 | 					errMsg = fmt.Sprintf("server %v apply out of order %v", i, m.Index)
179 | 				}
180 | 			} else {
181 | 				errMsg = fmt.Sprintf("committed command %v is not an int", m.Command)
182 | 			}
183 | 
184 | 			if errMsg != "" {
185 | 				log.Fatalf("apply error: %v\n", errMsg)
186 | 				cfg.applyErr[i] = errMsg
187 | 				// keep reading after error so that Raft doesn't block
188 | 				// holding locks...
189 | 			}
190 | 		}
191 | 	}()
192 | 
193 | 	// Make a Raft instance
194 | 	rf := Make(ends, i, cfg.saved[i], applyCh)
195 | 
196 | 	cfg.mu.Lock()
197 | 	cfg.rafts[i] = rf
198 | 	cfg.mu.Unlock()
199 | 
200 | 	// Register a Raft service
201 | 	svc := labrpc.MakeService(rf)
202 | 
203 | 	// Make a server and add the Raft service
204 | 	srv := labrpc.MakeServer()
205 | 	srv.AddService(svc)
206 | 
207 | 	// Add that server to the network
208 | 	cfg.net.AddServer(i, srv)
209 | }
210 | 
211 | // Clean up configuration setup
212 | func (cfg *config) cleanup() {
213 | 	for i := 0; i < len(cfg.rafts); i++ {
214 | 		if cfg.rafts[i] != nil {
215 | 			cfg.rafts[i].Kill()
216 | 		}
217 | 	}
218 | 	atomic.StoreInt32(&cfg.done, 1)
219 | }
220 | 
221 | // Attach server i to the net.
222 | func (cfg *config) connect(i int) {
223 | 	// fmt.Printf("connect(%d)\n", i)
224 | 
225 | 	cfg.connected[i] = true
226 | 
227 | 	// outgoing ClientEnds
228 | 	for j := 0; j < cfg.n; j++ {
229 | 		if cfg.connected[j] {
230 | 			endname := cfg.endnames[i][j]
231 | 			cfg.net.Enable(endname, true)
232 | 		}
233 | 	}
234 | 
235 | 	// incoming ClientEnds
236 | 	for j := 0; j < cfg.n; j++ {
237 | 		if cfg.connected[j] {
238 | 			endname := cfg.endnames[j][i]
239 | 			cfg.net.Enable(endname, true)
240 | 		}
241 | 	}
242 | }
243 | 
244 | // Detach server i from the net.
245 | func (cfg *config) disconnect(i int) {
246 | 	// fmt.Printf("disconnect(%d)\n", i)
247 | 
248 | 	cfg.connected[i] = false
249 | 
250 | 	// outgoing ClientEnds
251 | 	for j := 0; j < cfg.n; j++ {
252 | 		if cfg.endnames[i] != nil {
253 | 			endname := cfg.endnames[i][j]
254 | 			cfg.net.Enable(endname, false)
255 | 		}
256 | 	}
257 | 
258 | 	// incoming ClientEnds
259 | 	for j := 0; j < cfg.n; j++ {
260 | 		if cfg.endnames[j] != nil {
261 | 			endname := cfg.endnames[j][i]
262 | 			cfg.net.Enable(endname, false)
263 | 		}
264 | 	}
265 | }
266 | 
267 | // Count the number of RPC messages
268 | func (cfg *config) rpcCount(server int) int {
269 | 	return cfg.net.GetCount(server)
270 | }
271 | 
272 | // Set unreliable network
273 | func (cfg *config) setunreliable(unrel bool) {
274 | 	cfg.net.Reliable(!unrel)
275 | }
276 | 
277 | // Set long ordering order
278 | func (cfg *config) setlongreordering(longrel bool) {
279 | 	cfg.net.LongReordering(longrel)
280 | }
281 | 
282 | // Check that there's exactly one leader.
283 | // Try a few times in case re-elections are needed.
284 | func (cfg *config) checkOneLeader() int {
285 | 	for iters := 0; iters < 10; iters++ {
286 | 		time.Sleep(500 * time.Millisecond)
287 | 		leaders := make(map[int][]int)
288 | 		for i := 0; i < cfg.n; i++ {
289 | 			if cfg.connected[i] {
290 | 				if t, leader := cfg.rafts[i].GetState(); leader {
291 | 					leaders[t] = append(leaders[t], i)
292 | 				}
293 | 			}
294 | 		}
295 | 
296 | 		lastTermWithLeader := -1
297 | 		for t, leaders := range leaders {
298 | 			if len(leaders) > 1 {
299 | 				cfg.t.Fatalf("term %d has %d (>1) leaders", t, len(leaders))
300 | 			}
301 | 			if t > lastTermWithLeader {
302 | 				lastTermWithLeader = t
303 | 			}
304 | 		}
305 | 
306 | 		if len(leaders) != 0 {
307 | 			return leaders[lastTermWithLeader][0]
308 | 		}
309 | 	}
310 | 	cfg.t.Fatalf("expected one leader, got none")
311 | 	return -1
312 | }
313 | 
314 | // Check that everyone agrees on the term.
315 | func (cfg *config) checkTerms() int {
316 | 	term := -1
317 | 	for i := 0; i < cfg.n; i++ {
318 | 		if cfg.connected[i] {
319 | 			xterm, _ := cfg.rafts[i].GetState()
320 | 			if term == -1 {
321 | 				term = xterm
322 | 			} else if term != xterm {
323 | 				cfg.t.Fatalf("servers disagree on term")
324 | 			}
325 | 		}
326 | 	}
327 | 	return term
328 | }
329 | 
330 | // Check that there's no leader
331 | func (cfg *config) checkNoLeader() {
332 | 	for i := 0; i < cfg.n; i++ {
333 | 		if cfg.connected[i] {
334 | 			_, isLeader := cfg.rafts[i].GetState()
335 | 			if isLeader {
336 | 				cfg.t.Fatalf("expected no leader, but %v claims to be leader", i)
337 | 			}
338 | 		}
339 | 	}
340 | }
341 | 
342 | // How many servers think a log entry is committed?
343 | func (cfg *config) nCommitted(index int) (int, interface{}) {
344 | 	count := 0
345 | 	cmd := -1
346 | 	for i := 0; i < len(cfg.rafts); i++ {
347 | 		if cfg.applyErr[i] != "" {
348 | 			cfg.t.Fatal(cfg.applyErr[i])
349 | 		}
350 | 
351 | 		cfg.mu.Lock()
352 | 		cmd1, ok := cfg.logs[i][index]
353 | 		cfg.mu.Unlock()
354 | 
355 | 		if ok {
356 | 			if count > 0 && cmd != cmd1 {
357 | 				cfg.t.Fatalf("committed values do not match: index %v, %v, %v\n",
358 | 					index, cmd, cmd1)
359 | 			}
360 | 			count += 1
361 | 			cmd = cmd1
362 | 		}
363 | 	}
364 | 	return count, cmd
365 | }
366 | 
367 | // Wait for at least n servers to commit.
368 | // But don't wait forever.
369 | func (cfg *config) wait(index int, n int, startTerm int) interface{} {
370 | 	to := 10 * time.Millisecond
371 | 	for iters := 0; iters < 30; iters++ {
372 | 		nd, _ := cfg.nCommitted(index)
373 | 		if nd >= n {
374 | 			break
375 | 		}
376 | 		time.Sleep(to)
377 | 		if to < time.Second {
378 | 			to *= 2
379 | 		}
380 | 		if startTerm > -1 {
381 | 			for _, r := range cfg.rafts {
382 | 				if t, _ := r.GetState(); t > startTerm {
383 | 					// someone has moved on
384 | 					// can no longer guarantee that we'll "win"
385 | 					return -1
386 | 				}
387 | 			}
388 | 		}
389 | 	}
390 | 	nd, cmd := cfg.nCommitted(index)
391 | 	if nd < n {
392 | 		cfg.t.Fatalf("only %d decided for index %d; wanted %d\n",
393 | 			nd, index, n)
394 | 	}
395 | 	return cmd
396 | }
397 | 
398 | // Do a complete agreement.
399 | // it might choose the wrong leader initially,
400 | // and have to re-submit after giving up.
401 | // entirely gives up after about 10 seconds.
402 | // indirectly checks that the servers agree on the
403 | // same value, since nCommitted() checks this,
404 | // as do the threads that read from applyCh.
405 | // returns index.
406 | func (cfg *config) one(cmd int, expectedServers int) int {
407 | 	t0 := time.Now()
408 | 	starts := 0
409 | 	for time.Since(t0).Seconds() < 10 {
410 | 		// try all the servers, maybe one is the leader.
411 | 		index := -1
412 | 		for si := 0; si < cfg.n; si++ {
413 | 			starts = (starts + 1) % cfg.n
414 | 			var rf *Raft
415 | 			cfg.mu.Lock()
416 | 			if cfg.connected[starts] {
417 | 				rf = cfg.rafts[starts]
418 | 			}
419 | 			cfg.mu.Unlock()
420 | 			if rf != nil {
421 | 				index1, _, ok := rf.Start(cmd)
422 | 				if ok {
423 | 					index = index1
424 | 					break
425 | 				}
426 | 			}
427 | 		}
428 | 
429 | 		if index != -1 {
430 | 			// somebody claimed to be the leader and to have
431 | 			// submitted our command; wait a while for agreement.
432 | 			t1 := time.Now()
433 | 			for time.Since(t1).Seconds() < 2 {
434 | 				nd, cmd1 := cfg.nCommitted(index)
435 | 				if nd > 0 && nd >= expectedServers {
436 | 					// committed
437 | 					if cmd2, ok := cmd1.(int); ok && cmd2 == cmd {
438 | 						// and it was the command we submitted.
439 | 						return index
440 | 					}
441 | 				}
442 | 				time.Sleep(20 * time.Millisecond)
443 | 			}
444 | 		} else {
445 | 			time.Sleep(50 * time.Millisecond)
446 | 		}
447 | 	}
448 | 	cfg.t.Fatalf("one(%v) failed to reach agreement", cmd)
449 | 	return -1
450 | }
451 | 


--------------------------------------------------------------------------------
/src/labrpc/labrpc.go:
--------------------------------------------------------------------------------
  1 | package labrpc
  2 | 
  3 | //
  4 | // channel-based RPC, for 824 labs.
  5 | //
  6 | // simulates a network that can lose requests, lose replies,
  7 | // delay messages, and entirely disconnect particular hosts.
  8 | //
  9 | // we will use the original labrpc.go to test your code for grading.
 10 | // so, while you can modify this code to help you debug, please
 11 | // test against the original before submitting.
 12 | //
 13 | // adapted from Go net/rpc/server.go.
 14 | //
 15 | // sends gob-encoded values to ensure that RPCs
 16 | // don't include references to program objects.
 17 | //
 18 | // net := MakeNetwork() -- holds network, clients, servers.
 19 | // end := net.MakeEnd(endname) -- create a client end-point, to talk to one server.
 20 | // net.AddServer(servername, server) -- adds a named server to network.
 21 | // net.DeleteServer(servername) -- eliminate the named server.
 22 | // net.Connect(endname, servername) -- connect a client to a server.
 23 | // net.Enable(endname, enabled) -- enable/disable a client.
 24 | // net.Reliable(bool) -- false means drop/delay messages
 25 | //
 26 | // end.Call("Raft.AppendEntries", &args, &reply) -- send an RPC, wait for reply.
 27 | // the "Raft" is the name of the server struct to be called.
 28 | // the "AppendEntries" is the name of the method to be called.
 29 | // Call() returns true to indicate that the server executed the request
 30 | // and the reply is valid.
 31 | // Call() returns false if the network lost the request or reply
 32 | // or the server is down.
 33 | // It is OK to have multiple Call()s in progress at the same time on the
 34 | // same ClientEnd.
 35 | // Concurrent calls to Call() may be delivered to the server out of order,
 36 | // since the network may re-order messages.
 37 | // Call() is guaranteed to return (perhaps after a delay) *except* if the
 38 | // handler function on the server side does not return. That is, there
 39 | // is no need to implement your own timeouts around Call().
 40 | // the server RPC handler function must declare its args and reply arguments
 41 | // as pointers, so that their types exactly match the types of the arguments
 42 | // to Call().
 43 | //
 44 | // srv := MakeServer()
 45 | // srv.AddService(svc) -- a server can have multiple services, e.g. Raft and k/v
 46 | //   pass srv to net.AddServer()
 47 | //
 48 | // svc := MakeService(receiverObject) -- obj's methods will handle RPCs
 49 | //   much like Go's rpcs.Register()
 50 | //   pass svc to srv.AddService()
 51 | //
 52 | 
 53 | import (
 54 | 	"bytes"
 55 | 	"encoding/gob"
 56 | 	"log"
 57 | 	"math/rand"
 58 | 	"reflect"
 59 | 	"strings"
 60 | 	"sync"
 61 | 	"time"
 62 | )
 63 | 
 64 | // redMsg structure for requesting message
 65 | type reqMsg struct {
 66 | 	endname  interface{} // name of sending ClientEnd
 67 | 	svcMeth  string      // e.g. "Raft.AppendEntries"
 68 | 	argsType reflect.Type
 69 | 	args     []byte
 70 | 	replyCh  chan replyMsg
 71 | }
 72 | 
 73 | // replyMsg structure for reply message
 74 | type replyMsg struct {
 75 | 	ok    bool
 76 | 	reply []byte
 77 | }
 78 | 
 79 | // ClientEnd structure
 80 | type ClientEnd struct {
 81 | 	endname interface{} // this end-point's name
 82 | 	ch      chan reqMsg // copy of Network.endCh
 83 | }
 84 | 
 85 | // Call sends an RPC, waits for the reply.
 86 | // The return value indicates success; false means that
 87 | // no reply was received from the server.
 88 | func (e *ClientEnd) Call(svcMeth string, args interface{}, reply interface{}) bool {
 89 | 	req := reqMsg{}
 90 | 	req.endname = e.endname
 91 | 	req.svcMeth = svcMeth
 92 | 	req.argsType = reflect.TypeOf(args)
 93 | 	req.replyCh = make(chan replyMsg)
 94 | 
 95 | 	qb := new(bytes.Buffer)
 96 | 	qe := gob.NewEncoder(qb)
 97 | 	qe.Encode(args)
 98 | 	req.args = qb.Bytes()
 99 | 
100 | 	e.ch <- req
101 | 
102 | 	rep := <-req.replyCh
103 | 	if rep.ok {
104 | 		rb := bytes.NewBuffer(rep.reply)
105 | 		rd := gob.NewDecoder(rb)
106 | 		if err := rd.Decode(reply); err != nil {
107 | 			log.Fatalf("ClientEnd.Call(): decode reply: %v\n", err)
108 | 		}
109 | 		return true
110 | 	}
111 | 
112 | 	return false
113 | }
114 | 
115 | // Network structure
116 | type Network struct {
117 | 	mu             sync.Mutex
118 | 	reliable       bool
119 | 	longDelays     bool                        // pause a long time on send on disabled connection
120 | 	longReordering bool                        // sometimes delay replies a long time
121 | 	ends           map[interface{}]*ClientEnd  // ends, by name
122 | 	enabled        map[interface{}]bool        // by end name
123 | 	servers        map[interface{}]*Server     // servers, by name
124 | 	connections    map[interface{}]interface{} // endname -> servername
125 | 	endCh          chan reqMsg
126 | }
127 | 
128 | // MakeNetwork initializes Network object
129 | func MakeNetwork() *Network {
130 | 	rn := &Network{}
131 | 	rn.reliable = true
132 | 	rn.ends = map[interface{}]*ClientEnd{}
133 | 	rn.enabled = map[interface{}]bool{}
134 | 	rn.servers = map[interface{}]*Server{}
135 | 	rn.connections = map[interface{}](interface{}){}
136 | 	rn.endCh = make(chan reqMsg)
137 | 
138 | 	// single goroutine to handle all ClientEnd.Call()s
139 | 	go func() {
140 | 		for xreq := range rn.endCh {
141 | 			go rn.ProcessReq(xreq)
142 | 		}
143 | 	}()
144 | 
145 | 	return rn
146 | }
147 | 
148 | // Reliable or not
149 | func (rn *Network) Reliable(yes bool) {
150 | 	rn.mu.Lock()
151 | 	defer rn.mu.Unlock()
152 | 
153 | 	rn.reliable = yes
154 | }
155 | 
156 | // LongReordering or not
157 | func (rn *Network) LongReordering(yes bool) {
158 | 	rn.mu.Lock()
159 | 	defer rn.mu.Unlock()
160 | 
161 | 	rn.longReordering = yes
162 | }
163 | 
164 | // LongDelays or not
165 | func (rn *Network) LongDelays(yes bool) {
166 | 	rn.mu.Lock()
167 | 	defer rn.mu.Unlock()
168 | 
169 | 	rn.longDelays = yes
170 | }
171 | 
172 | // ReadEndnameInfo reads ClientEnd name information
173 | func (rn *Network) ReadEndnameInfo(endname interface{}) (enabled bool,
174 | 	servername interface{}, server *Server, reliable bool, longreordering bool,
175 | ) {
176 | 	rn.mu.Lock()
177 | 	defer rn.mu.Unlock()
178 | 
179 | 	enabled = rn.enabled[endname]
180 | 	servername = rn.connections[endname]
181 | 	if servername != nil {
182 | 		server = rn.servers[servername]
183 | 	}
184 | 	reliable = rn.reliable
185 | 	longreordering = rn.longReordering
186 | 	return
187 | }
188 | 
189 | // IsServerDead checks if a server is dead
190 | func (rn *Network) IsServerDead(endname interface{}, servername interface{}, server *Server) bool {
191 | 	rn.mu.Lock()
192 | 	defer rn.mu.Unlock()
193 | 
194 | 	if rn.enabled[endname] == false || rn.servers[servername] != server {
195 | 		return true
196 | 	}
197 | 	return false
198 | }
199 | 
200 | // ProcessReq processes request message
201 | func (rn *Network) ProcessReq(req reqMsg) {
202 | 	enabled, servername, server, reliable, longreordering := rn.ReadEndnameInfo(req.endname)
203 | 
204 | 	if enabled && servername != nil && server != nil {
205 | 		if reliable == false {
206 | 			// short delay
207 | 			ms := (rand.Int() % 27)
208 | 			time.Sleep(time.Duration(ms) * time.Millisecond)
209 | 		}
210 | 
211 | 		if reliable == false && (rand.Int()%1000) < 100 {
212 | 			// drop the request, return as if timeout
213 | 			req.replyCh <- replyMsg{false, nil}
214 | 			return
215 | 		}
216 | 
217 | 		// execute the request (call the RPC handler).
218 | 		// in a separate thread so that we can periodically check
219 | 		// if the server has been killed and the RPC should get a
220 | 		// failure reply.
221 | 		ech := make(chan replyMsg)
222 | 		go func() {
223 | 			r := server.dispatch(req)
224 | 			ech <- r
225 | 		}()
226 | 
227 | 		// wait for handler to return,
228 | 		// but stop waiting if DeleteServer() has been called,
229 | 		// and return an error.
230 | 		var reply replyMsg
231 | 		replyOK := false
232 | 		serverDead := false
233 | 		for replyOK == false && serverDead == false {
234 | 			select {
235 | 			case reply = <-ech:
236 | 				replyOK = true
237 | 			case <-time.After(100 * time.Millisecond):
238 | 				serverDead = rn.IsServerDead(req.endname, servername, server)
239 | 			}
240 | 		}
241 | 
242 | 		// do not reply if DeleteServer() has been called, i.e.
243 | 		// the server has been killed. this is needed to avoid
244 | 		// situation in which a client gets a positive reply
245 | 		// to an Append, but the server persisted the update
246 | 		// into the old Persister. config.go is careful to call
247 | 		// DeleteServer() before superseding the Persister.
248 | 		serverDead = rn.IsServerDead(req.endname, servername, server)
249 | 
250 | 		if replyOK == false || serverDead == true {
251 | 			// server was killed while we were waiting; return error.
252 | 			req.replyCh <- replyMsg{false, nil}
253 | 		} else if reliable == false && (rand.Int()%1000) < 100 {
254 | 			// drop the reply, return as if timeout
255 | 			req.replyCh <- replyMsg{false, nil}
256 | 		} else if longreordering == true && rand.Intn(900) < 600 {
257 | 			// delay the response for a while
258 | 			ms := 200 + rand.Intn(1+rand.Intn(2000))
259 | 			time.Sleep(time.Duration(ms) * time.Millisecond)
260 | 			req.replyCh <- reply
261 | 		} else {
262 | 			req.replyCh <- reply
263 | 		}
264 | 	} else {
265 | 		// simulate no reply and eventual timeout.
266 | 		ms := 0
267 | 		if rn.longDelays {
268 | 			// let Raft tests check that leader doesn't send
269 | 			// RPCs synchronously.
270 | 			ms = (rand.Int() % 7000)
271 | 		} else {
272 | 			// many kv tests require the client to try each
273 | 			// server in fairly rapid succession.
274 | 			ms = (rand.Int() % 100)
275 | 		}
276 | 		time.Sleep(time.Duration(ms) * time.Millisecond)
277 | 		req.replyCh <- replyMsg{false, nil}
278 | 	}
279 | 
280 | }
281 | 
282 | // MakeEnd creates a client end-point.
283 | // Start the thread that listens and delivers.
284 | func (rn *Network) MakeEnd(endname interface{}) *ClientEnd {
285 | 	rn.mu.Lock()
286 | 	defer rn.mu.Unlock()
287 | 
288 | 	if _, ok := rn.ends[endname]; ok {
289 | 		log.Fatalf("MakeEnd: %v already exists\n", endname)
290 | 	}
291 | 
292 | 	e := &ClientEnd{}
293 | 	e.endname = endname
294 | 	e.ch = rn.endCh
295 | 	rn.ends[endname] = e
296 | 	rn.enabled[endname] = false
297 | 	rn.connections[endname] = nil
298 | 
299 | 	return e
300 | }
301 | 
302 | // AddServer adds a server to the network
303 | func (rn *Network) AddServer(servername interface{}, rs *Server) {
304 | 	rn.mu.Lock()
305 | 	defer rn.mu.Unlock()
306 | 
307 | 	rn.servers[servername] = rs
308 | }
309 | 
310 | // DeleteServer deletes a server in the network
311 | func (rn *Network) DeleteServer(servername interface{}) {
312 | 	rn.mu.Lock()
313 | 	defer rn.mu.Unlock()
314 | 
315 | 	rn.servers[servername] = nil
316 | }
317 | 
318 | // Connect connects a ClientEnd to a server.
319 | // A ClientEnd can only be connected once in its lifetime.
320 | func (rn *Network) Connect(endname interface{}, servername interface{}) {
321 | 	rn.mu.Lock()
322 | 	defer rn.mu.Unlock()
323 | 
324 | 	rn.connections[endname] = servername
325 | }
326 | 
327 | // Enable a ClientEnd.
328 | func (rn *Network) Enable(endname interface{}, enabled bool) {
329 | 	rn.mu.Lock()
330 | 	defer rn.mu.Unlock()
331 | 
332 | 	rn.enabled[endname] = enabled
333 | }
334 | 
335 | // GetCount of RPCs.
336 | func (rn *Network) GetCount(servername interface{}) int {
337 | 	rn.mu.Lock()
338 | 	defer rn.mu.Unlock()
339 | 
340 | 	svr := rn.servers[servername]
341 | 	return svr.GetCount()
342 | }
343 | 
344 | // Server is a collection of services, all sharing
345 | // the same rpc dispatcher. so that e.g. both a Raft
346 | // and a k/v server can listen to the same rpc endpoint.
347 | type Server struct {
348 | 	mu       sync.Mutex
349 | 	services map[string]*Service
350 | 	count    int // incoming RPCs
351 | }
352 | 
353 | // MakeServer initialize a Server object
354 | func MakeServer() *Server {
355 | 	rs := &Server{}
356 | 	rs.services = map[string]*Service{}
357 | 	return rs
358 | }
359 | 
360 | // AddService to a Server
361 | func (rs *Server) AddService(svc *Service) {
362 | 	rs.mu.Lock()
363 | 	defer rs.mu.Unlock()
364 | 	rs.services[svc.name] = svc
365 | }
366 | 
367 | // dispatch request message
368 | func (rs *Server) dispatch(req reqMsg) replyMsg {
369 | 	rs.mu.Lock()
370 | 
371 | 	rs.count += 1
372 | 
373 | 	// split Raft.AppendEntries into service and method
374 | 	dot := strings.LastIndex(req.svcMeth, ".")
375 | 	serviceName := req.svcMeth[:dot]
376 | 	methodName := req.svcMeth[dot+1:]
377 | 
378 | 	service, ok := rs.services[serviceName]
379 | 
380 | 	rs.mu.Unlock()
381 | 
382 | 	if ok {
383 | 		return service.dispatch(methodName, req)
384 | 	} else {
385 | 		choices := []string{}
386 | 		for k := range rs.services {
387 | 			choices = append(choices, k)
388 | 		}
389 | 		log.Fatalf("labrpc.Server.dispatch(): unknown service %v in %v.%v; expecting one of %v\n",
390 | 			serviceName, serviceName, methodName, choices)
391 | 		return replyMsg{false, nil}
392 | 	}
393 | }
394 | 
395 | // GetCount return number of servers
396 | func (rs *Server) GetCount() int {
397 | 	rs.mu.Lock()
398 | 	defer rs.mu.Unlock()
399 | 	return rs.count
400 | }
401 | 
402 | // Service is an object with methods that can be called via RPC.
403 | // a single server may have more than one Service.
404 | type Service struct {
405 | 	name    string
406 | 	rcvr    reflect.Value
407 | 	typ     reflect.Type
408 | 	methods map[string]reflect.Method
409 | }
410 | 
411 | // MakeService initializes a Service object
412 | func MakeService(rcvr interface{}) *Service {
413 | 	svc := &Service{}
414 | 	svc.typ = reflect.TypeOf(rcvr)
415 | 	svc.rcvr = reflect.ValueOf(rcvr)
416 | 	svc.name = reflect.Indirect(svc.rcvr).Type().Name()
417 | 	svc.methods = map[string]reflect.Method{}
418 | 
419 | 	for m := 0; m < svc.typ.NumMethod(); m++ {
420 | 		method := svc.typ.Method(m)
421 | 		mtype := method.Type
422 | 		mname := method.Name
423 | 
424 | 		//fmt.Printf("%v pp %v ni %v 1k %v 2k %v no %v\n",
425 | 		//	mname, method.PkgPath, mtype.NumIn(), mtype.In(1).Kind(), mtype.In(2).Kind(), mtype.NumOut())
426 | 
427 | 		if method.PkgPath != "" || // capitalized?
428 | 			mtype.NumIn() != 3 ||
429 | 			//mtype.In(1).Kind() != reflect.Ptr ||
430 | 			mtype.In(2).Kind() != reflect.Ptr ||
431 | 			mtype.NumOut() != 0 {
432 | 			// the method is not suitable for a handler
433 | 			//fmt.Printf("bad method: %v\n", mname)
434 | 		} else {
435 | 			// the method looks like a handler
436 | 			svc.methods[mname] = method
437 | 		}
438 | 	}
439 | 
440 | 	return svc
441 | }
442 | 
443 | // dispatch a service
444 | func (svc *Service) dispatch(methname string, req reqMsg) replyMsg {
445 | 	if method, ok := svc.methods[methname]; ok {
446 | 		// prepare space into which to read the argument.
447 | 		// the Value's type will be a pointer to req.argsType.
448 | 		args := reflect.New(req.argsType)
449 | 
450 | 		// decode the argument.
451 | 		ab := bytes.NewBuffer(req.args)
452 | 		ad := gob.NewDecoder(ab)
453 | 		ad.Decode(args.Interface())
454 | 
455 | 		// allocate space for the reply.
456 | 		replyType := method.Type.In(2)
457 | 		replyType = replyType.Elem()
458 | 		replyv := reflect.New(replyType)
459 | 
460 | 		// call the method.
461 | 		function := method.Func
462 | 		function.Call([]reflect.Value{svc.rcvr, args.Elem(), replyv})
463 | 
464 | 		// encode the reply.
465 | 		rb := new(bytes.Buffer)
466 | 		re := gob.NewEncoder(rb)
467 | 		re.EncodeValue(replyv)
468 | 
469 | 		return replyMsg{true, rb.Bytes()}
470 | 	} else {
471 | 		choices := []string{}
472 | 		for k := range svc.methods {
473 | 			choices = append(choices, k)
474 | 		}
475 | 		log.Fatalf("labrpc.Service.dispatch(): unknown method %v in %v; expecting one of %v\n",
476 | 			methname, req.svcMeth, choices)
477 | 		return replyMsg{false, nil}
478 | 	}
479 | }
480 | 


--------------------------------------------------------------------------------
/src/raftkv/test_test.go:
--------------------------------------------------------------------------------
  1 | package raftkv
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"log"
  6 | 	"math/rand"
  7 | 	"strconv"
  8 | 	"strings"
  9 | 	"sync/atomic"
 10 | 	"testing"
 11 | 	"time"
 12 | )
 13 | 
 14 | // The tester generously allows solutions to complete elections in one second
 15 | // (much more than the paper's range of timeouts).
 16 | const electionTimeout = 1 * time.Second
 17 | 
 18 | // Check a value for a key
 19 | func check(t *testing.T, ck *Clerk, key string, value string) {
 20 | 	v := ck.Get(key)
 21 | 	if v != value {
 22 | 		t.Fatalf("Get(%v): expected:\n%v\nreceived:\n%v", key, value, v)
 23 | 	}
 24 | }
 25 | 
 26 | // A client runs the function f and then signals it is done
 27 | func runClient(t *testing.T, cfg *config, me int, ca chan bool, fn func(me int, ck *Clerk, t *testing.T)) {
 28 | 	ok := false
 29 | 	defer func() { ca <- ok }()
 30 | 	ck := cfg.makeClient(cfg.All())
 31 | 	fn(me, ck, t)
 32 | 	ok = true
 33 | 	cfg.deleteClient(ck)
 34 | }
 35 | 
 36 | // Spawn ncli clients and wait until they are all done
 37 | func spawnClientsAndWait(t *testing.T, cfg *config, ncli int, fn func(me int, ck *Clerk, t *testing.T)) {
 38 | 	ca := make([]chan bool, ncli)
 39 | 	for cli := 0; cli < ncli; cli++ {
 40 | 		ca[cli] = make(chan bool)
 41 | 		go runClient(t, cfg, cli, ca[cli], fn)
 42 | 	}
 43 | 	// log.Printf("spawnClientsAndWait: waiting for clients")
 44 | 	for cli := 0; cli < ncli; cli++ {
 45 | 		ok := <-ca[cli]
 46 | 		// log.Printf("spawnClientsAndWait: client %d is done\n", cli)
 47 | 		if ok == false {
 48 | 			t.Fatalf("failure")
 49 | 		}
 50 | 	}
 51 | }
 52 | 
 53 | // NextValue predicts effect of Append(k, val) if old value is prev.
 54 | func NextValue(prev string, val string) string {
 55 | 	return prev + val
 56 | }
 57 | 
 58 | // Check that for a specific client all known appends are present in a value,
 59 | // and in order
 60 | func checkClntAppends(t *testing.T, clnt int, v string, count int) {
 61 | 	lastoff := -1
 62 | 	for j := 0; j < count; j++ {
 63 | 		wanted := "x " + strconv.Itoa(clnt) + " " + strconv.Itoa(j) + " y"
 64 | 		off := strings.Index(v, wanted)
 65 | 		if off < 0 {
 66 | 			t.Fatalf("%v missing element %v in Append result %v", clnt, wanted, v)
 67 | 		}
 68 | 		off1 := strings.LastIndex(v, wanted)
 69 | 		if off1 != off {
 70 | 			t.Fatalf("duplicate element %v in Append result", wanted)
 71 | 		}
 72 | 		if off <= lastoff {
 73 | 			t.Fatalf("wrong order for element %v in Append result", wanted)
 74 | 		}
 75 | 		lastoff = off
 76 | 	}
 77 | }
 78 | 
 79 | // Check that all known appends are present in a value,
 80 | // and are in order for each concurrent client.
 81 | func checkConcurrentAppends(t *testing.T, v string, counts []int) {
 82 | 	nclients := len(counts)
 83 | 	for i := 0; i < nclients; i++ {
 84 | 		lastoff := -1
 85 | 		for j := 0; j < counts[i]; j++ {
 86 | 			wanted := "x " + strconv.Itoa(i) + " " + strconv.Itoa(j) + " y"
 87 | 			off := strings.Index(v, wanted)
 88 | 			if off < 0 {
 89 | 				t.Fatalf("%v missing element %v in Append result %v", i, wanted, v)
 90 | 			}
 91 | 			off1 := strings.LastIndex(v, wanted)
 92 | 			if off1 != off {
 93 | 				t.Fatalf("duplicate element %v in Append result", wanted)
 94 | 			}
 95 | 			if off <= lastoff {
 96 | 				t.Fatalf("wrong order for element %v in Append result", wanted)
 97 | 			}
 98 | 			lastoff = off
 99 | 		}
100 | 	}
101 | }
102 | 
103 | // Repartition the servers periodically
104 | func partitioner(t *testing.T, cfg *config, ch chan bool, done *int32) {
105 | 	defer func() { ch <- true }()
106 | 	for atomic.LoadInt32(done) == 0 {
107 | 		a := make([]int, cfg.n)
108 | 		for i := 0; i < cfg.n; i++ {
109 | 			a[i] = (rand.Int() % 2)
110 | 		}
111 | 		pa := make([][]int, 2)
112 | 		for i := 0; i < 2; i++ {
113 | 			pa[i] = make([]int, 0)
114 | 			for j := 0; j < cfg.n; j++ {
115 | 				if a[j] == i {
116 | 					pa[i] = append(pa[i], j)
117 | 				}
118 | 			}
119 | 		}
120 | 		cfg.partition(pa[0], pa[1])
121 | 		time.Sleep(electionTimeout + time.Duration(rand.Int63()%200)*time.Millisecond)
122 | 	}
123 | }
124 | 
125 | // Basic test is as follows: one or more clients submitting Append/Get
126 | // operations to set of servers for some period of time.  After the period is
127 | // over, test checks that all appended values are present and in order for a
128 | // particular key.  If unreliable is set, RPCs may fail.  If crash is set, the
129 | // servers crash after the period is over and restart.  If partitions is set,
130 | // the test repartitions the network concurrently with the clients and servers. If
131 | // maxraftstate is a positive number, the size of the state for Raft (i.e., log
132 | // size) shouldn't exceed 2*maxraftstate.
133 | func GenericTest(t *testing.T, tag string, nclients int, unreliable bool, crash bool, partitions bool, maxraftstate int) {
134 | 	const nservers = 5
135 | 	cfg := makeConfig(t, tag, nservers, unreliable, maxraftstate)
136 | 	defer cfg.cleanup()
137 | 
138 | 	ck := cfg.makeClient(cfg.All())
139 | 
140 | 	donePartitioner := int32(0)
141 | 	doneClients := int32(0)
142 | 	chPartitioner := make(chan bool)
143 | 	clnts := make([]chan int, nclients)
144 | 	for i := 0; i < nclients; i++ {
145 | 		clnts[i] = make(chan int)
146 | 	}
147 | 	for i := 0; i < 3; i++ {
148 | 		// log.Printf("Iteration %v\n", i)
149 | 		atomic.StoreInt32(&doneClients, 0)
150 | 		atomic.StoreInt32(&donePartitioner, 0)
151 | 		go spawnClientsAndWait(t, cfg, nclients, func(cli int, myck *Clerk, t *testing.T) {
152 | 			j := 0
153 | 			defer func() {
154 | 				clnts[cli] <- j
155 | 			}()
156 | 			last := ""
157 | 			key := strconv.Itoa(cli)
158 | 			myck.Put(key, last)
159 | 			for atomic.LoadInt32(&doneClients) == 0 {
160 | 				if (rand.Int() % 1000) < 500 {
161 | 					nv := "x " + strconv.Itoa(cli) + " " + strconv.Itoa(j) + " y"
162 | 					// log.Printf("%d: client new append %v\n", cli, nv)
163 | 					myck.Append(key, nv)
164 | 					last = NextValue(last, nv)
165 | 					j++
166 | 				} else {
167 | 					// log.Printf("%d: client new get %v\n", cli, key)
168 | 					v := myck.Get(key)
169 | 					if v != last {
170 | 						log.Fatalf("get wrong value, key %v, wanted:\n%v\n, got\n%v\n", key, last, v)
171 | 					}
172 | 				}
173 | 			}
174 | 		})
175 | 
176 | 		if partitions {
177 | 			// Allow the clients to perform some operations without interruption
178 | 			time.Sleep(1 * time.Second)
179 | 			go partitioner(t, cfg, chPartitioner, &donePartitioner)
180 | 		}
181 | 		time.Sleep(5 * time.Second)
182 | 
183 | 		atomic.StoreInt32(&doneClients, 1)     // tell clients to quit
184 | 		atomic.StoreInt32(&donePartitioner, 1) // tell partitioner to quit
185 | 
186 | 		if partitions {
187 | 			// log.Printf("wait for partitioner\n")
188 | 			<-chPartitioner
189 | 			// reconnect network and submit a request. A client may
190 | 			// have submitted a request in a minority.  That request
191 | 			// won't return until that server discovers a new term
192 | 			// has started.
193 | 			cfg.ConnectAll()
194 | 			// wait for a while so that we have a new term
195 | 			time.Sleep(electionTimeout)
196 | 		}
197 | 
198 | 		if crash {
199 | 			// log.Printf("shutdown servers\n")
200 | 			for i := 0; i < nservers; i++ {
201 | 				cfg.ShutdownServer(i)
202 | 			}
203 | 			// Wait for a while for servers to shutdown, since
204 | 			// shutdown isn't a real crash and isn't instantaneous
205 | 			time.Sleep(electionTimeout)
206 | 			// log.Printf("restart servers\n")
207 | 			// crash and re-start all
208 | 			for i := 0; i < nservers; i++ {
209 | 				cfg.StartServer(i)
210 | 			}
211 | 			cfg.ConnectAll()
212 | 		}
213 | 
214 | 		// log.Printf("wait for clients\n")
215 | 		for i := 0; i < nclients; i++ {
216 | 			// log.Printf("read from clients %d\n", i)
217 | 			j := <-clnts[i]
218 | 			if j < 10 {
219 | 				// log.Printf("Warning: client %d managed to perform only %d put operations in 1 sec?\n", i, j)
220 | 			}
221 | 			key := strconv.Itoa(i)
222 | 			// log.Printf("Check %v for client %d\n", j, i)
223 | 			v := ck.Get(key)
224 | 			checkClntAppends(t, i, v, j)
225 | 		}
226 | 
227 | 		if maxraftstate > 0 {
228 | 			// Check maximum after the servers have processed all client
229 | 			// requests and had time to checkpoint
230 | 			if cfg.LogSize() > 2*maxraftstate {
231 | 				t.Fatalf("logs were not trimmed (%v > 2*%v)", cfg.LogSize(), maxraftstate)
232 | 			}
233 | 		}
234 | 	}
235 | 
236 | 	fmt.Printf("  ... Passed\n")
237 | }
238 | 
239 | func TestBasic(t *testing.T) {
240 | 	fmt.Printf("Test: One client ...\n")
241 | 	GenericTest(t, "basic", 1, false, false, false, -1)
242 | }
243 | 
244 | func TestConcurrent(t *testing.T) {
245 | 	fmt.Printf("Test: concurrent clients ...\n")
246 | 	GenericTest(t, "concur", 5, false, false, false, -1)
247 | }
248 | 
249 | func TestUnreliable(t *testing.T) {
250 | 	fmt.Printf("Test: unreliable ...\n")
251 | 	GenericTest(t, "unreliable", 5, true, false, false, -1)
252 | }
253 | 
254 | func TestUnreliableOneKey(t *testing.T) {
255 | 	const nservers = 3
256 | 	cfg := makeConfig(t, "onekey", nservers, true, -1)
257 | 	defer cfg.cleanup()
258 | 
259 | 	ck := cfg.makeClient(cfg.All())
260 | 
261 | 	fmt.Printf("Test: Concurrent Append to same key, unreliable ...\n")
262 | 
263 | 	ck.Put("k", "")
264 | 
265 | 	const nclient = 5
266 | 	const upto = 10
267 | 	spawnClientsAndWait(t, cfg, nclient, func(me int, myck *Clerk, t *testing.T) {
268 | 		n := 0
269 | 		for n < upto {
270 | 			myck.Append("k", "x "+strconv.Itoa(me)+" "+strconv.Itoa(n)+" y")
271 | 			n++
272 | 		}
273 | 	})
274 | 
275 | 	var counts []int
276 | 	for i := 0; i < nclient; i++ {
277 | 		counts = append(counts, upto)
278 | 	}
279 | 
280 | 	vx := ck.Get("k")
281 | 	checkConcurrentAppends(t, vx, counts)
282 | 
283 | 	fmt.Printf("  ... Passed\n")
284 | }
285 | 
286 | // Submit a request in the minority partition and check that the requests
287 | // doesn't go through until the partition heals.  The leader in the original
288 | // network ends up in the minority partition.
289 | func TestOnePartition(t *testing.T) {
290 | 	const nservers = 5
291 | 	cfg := makeConfig(t, "partition", nservers, false, -1)
292 | 	defer cfg.cleanup()
293 | 	ck := cfg.makeClient(cfg.All())
294 | 
295 | 	ck.Put("1", "13")
296 | 
297 | 	fmt.Printf("Test: Progress in majority ...\n")
298 | 
299 | 	p1, p2 := cfg.makePartition()
300 | 	cfg.partition(p1, p2)
301 | 
302 | 	ckp1 := cfg.makeClient(p1)  // connect ckp1 to p1
303 | 	ckp2a := cfg.makeClient(p2) // connect ckp2a to p2
304 | 	ckp2b := cfg.makeClient(p2) // connect ckp2b to p2
305 | 
306 | 	ckp1.Put("1", "14")
307 | 	check(t, ckp1, "1", "14")
308 | 
309 | 	fmt.Printf("  ... Passed\n")
310 | 
311 | 	done0 := make(chan bool)
312 | 	done1 := make(chan bool)
313 | 
314 | 	fmt.Printf("Test: No progress in minority ...\n")
315 | 	go func() {
316 | 		ckp2a.Put("1", "15")
317 | 		done0 <- true
318 | 	}()
319 | 	go func() {
320 | 		ckp2b.Get("1") // different clerk in p2
321 | 		done1 <- true
322 | 	}()
323 | 
324 | 	select {
325 | 	case <-done0:
326 | 		t.Fatalf("Put in minority completed")
327 | 	case <-done1:
328 | 		t.Fatalf("Get in minority completed")
329 | 	case <-time.After(time.Second):
330 | 	}
331 | 
332 | 	check(t, ckp1, "1", "14")
333 | 	ckp1.Put("1", "16")
334 | 	check(t, ckp1, "1", "16")
335 | 
336 | 	fmt.Printf("  ... Passed\n")
337 | 
338 | 	fmt.Printf("Test: Completion after heal ...\n")
339 | 
340 | 	cfg.ConnectAll()
341 | 	cfg.ConnectClient(ckp2a, cfg.All())
342 | 	cfg.ConnectClient(ckp2b, cfg.All())
343 | 
344 | 	time.Sleep(electionTimeout)
345 | 
346 | 	select {
347 | 	case <-done0:
348 | 	case <-time.After(30 * 100 * time.Millisecond):
349 | 		t.Fatalf("Put did not complete")
350 | 	}
351 | 
352 | 	select {
353 | 	case <-done1:
354 | 	case <-time.After(30 * 100 * time.Millisecond):
355 | 		t.Fatalf("Get did not complete")
356 | 	default:
357 | 	}
358 | 
359 | 	check(t, ck, "1", "15")
360 | 
361 | 	fmt.Printf("  ... Passed\n")
362 | }
363 | 
364 | func TestManyPartitionsOneClient(t *testing.T) {
365 | 	fmt.Printf("Test: many partitions ...\n")
366 | 	GenericTest(t, "manypartitions", 1, false, false, true, -1)
367 | }
368 | 
369 | func TestManyPartitionsManyClients(t *testing.T) {
370 | 	fmt.Printf("Test: many partitions, many clients ...\n")
371 | 	GenericTest(t, "manypartitionsclnts", 5, false, false, true, -1)
372 | }
373 | 
374 | func TestPersistOneClient(t *testing.T) {
375 | 	fmt.Printf("Test: persistence with one client ...\n")
376 | 	GenericTest(t, "persistone", 1, false, true, false, -1)
377 | }
378 | 
379 | func TestPersistConcurrent(t *testing.T) {
380 | 	fmt.Printf("Test: persistence with concurrent clients ...\n")
381 | 	GenericTest(t, "persistconcur", 5, false, true, false, -1)
382 | }
383 | 
384 | func TestPersistConcurrentUnreliable(t *testing.T) {
385 | 	fmt.Printf("Test: persistence with concurrent clients, unreliable ...\n")
386 | 	GenericTest(t, "persistconcurunreliable", 5, true, true, false, -1)
387 | }
388 | 
389 | func TestPersistPartition(t *testing.T) {
390 | 	fmt.Printf("Test: persistence with concurrent clients and repartitioning servers...\n")
391 | 	GenericTest(t, "persistpart", 5, false, true, true, -1)
392 | }
393 | 
394 | func TestPersistPartitionUnreliable(t *testing.T) {
395 | 	fmt.Printf("Test: persistence with concurrent clients and repartitioning servers, unreliable...\n")
396 | 	GenericTest(t, "persistpartunreliable", 5, true, true, true, -1)
397 | }
398 | 
399 | //
400 | // if one server falls behind, then rejoins, does it
401 | // recover by using the InstallSnapshot RPC?
402 | // also checks that majority discards committed log entries
403 | // even if minority doesn't respond.
404 | //
405 | func TestSnapshotRPC(t *testing.T) {
406 | 	const nservers = 3
407 | 	maxraftstate := 1000
408 | 	cfg := makeConfig(t, "snapshotrpc", nservers, false, maxraftstate)
409 | 	defer cfg.cleanup()
410 | 
411 | 	ck := cfg.makeClient(cfg.All())
412 | 
413 | 	fmt.Printf("Test: InstallSnapshot RPC ...\n")
414 | 
415 | 	ck.Put("a", "A")
416 | 	check(t, ck, "a", "A")
417 | 
418 | 	// a bunch of puts into the majority partition.
419 | 	cfg.partition([]int{0, 1}, []int{2})
420 | 	{
421 | 		ck1 := cfg.makeClient([]int{0, 1})
422 | 		for i := 0; i < 50; i++ {
423 | 			ck1.Put(strconv.Itoa(i), strconv.Itoa(i))
424 | 		}
425 | 		time.Sleep(electionTimeout)
426 | 		ck1.Put("b", "B")
427 | 	}
428 | 
429 | 	// check that the majority partition has thrown away
430 | 	// most of its log entries.
431 | 	if cfg.LogSize() > 2*maxraftstate {
432 | 		t.Fatalf("logs were not trimmed (%v > 2*%v)", cfg.LogSize(), maxraftstate)
433 | 	}
434 | 
435 | 	// now make group that requires participation of
436 | 	// lagging server, so that it has to catch up.
437 | 	cfg.partition([]int{0, 2}, []int{1})
438 | 	{
439 | 		ck1 := cfg.makeClient([]int{0, 2})
440 | 		ck1.Put("c", "C")
441 | 		ck1.Put("d", "D")
442 | 		check(t, ck1, "a", "A")
443 | 		check(t, ck1, "b", "B")
444 | 		check(t, ck1, "1", "1")
445 | 		check(t, ck1, "49", "49")
446 | 	}
447 | 
448 | 	// now everybody
449 | 	cfg.partition([]int{0, 1, 2}, []int{})
450 | 
451 | 	ck.Put("e", "E")
452 | 	check(t, ck, "c", "C")
453 | 	check(t, ck, "e", "E")
454 | 	check(t, ck, "1", "1")
455 | 
456 | 	fmt.Printf("  ... Passed\n")
457 | }
458 | 
459 | // are the snapshots not too huge? 500 bytes is a generous bound for the
460 | // operations we're doing here.
461 | func TestSnapshotSize(t *testing.T) {
462 | 	const nservers = 3
463 | 	maxraftstate := 1000
464 | 	maxsnapshotstate := 500
465 | 	cfg := makeConfig(t, "snapshotsize", nservers, false, maxraftstate)
466 | 	defer cfg.cleanup()
467 | 
468 | 	ck := cfg.makeClient(cfg.All())
469 | 
470 | 	fmt.Printf("Test: snapshot size is reasonable ...\n")
471 | 
472 | 	for i := 0; i < 200; i++ {
473 | 		ck.Put("x", "0")
474 | 		check(t, ck, "x", "0")
475 | 		ck.Put("x", "1")
476 | 		check(t, ck, "x", "1")
477 | 	}
478 | 
479 | 	// check that servers have thrown away most of their log entries
480 | 	if cfg.LogSize() > 2*maxraftstate {
481 | 		t.Fatalf("logs were not trimmed (%v > 2*%v)", cfg.LogSize(), maxraftstate)
482 | 	}
483 | 
484 | 	// check that the snapshots are not unreasonably large
485 | 	if cfg.SnapshotSize() > maxsnapshotstate {
486 | 		t.Fatalf("snapshot too large (%v > %v)", cfg.SnapshotSize(), maxsnapshotstate)
487 | 	}
488 | 
489 | 	fmt.Printf("  ... Passed\n")
490 | }
491 | 
492 | func TestSnapshotRecover(t *testing.T) {
493 | 	fmt.Printf("Test: persistence with one client and snapshots ...\n")
494 | 	GenericTest(t, "snapshot", 1, false, true, false, 1000)
495 | }
496 | 
497 | func TestSnapshotRecoverManyClients(t *testing.T) {
498 | 	fmt.Printf("Test: persistence with several clients and snapshots ...\n")
499 | 	GenericTest(t, "snapshotunreliable", 20, false, true, false, 1000)
500 | }
501 | 
502 | func TestSnapshotUnreliable(t *testing.T) {
503 | 	fmt.Printf("Test: persistence with several clients, snapshots, unreliable ...\n")
504 | 	GenericTest(t, "snapshotunreliable", 5, true, false, false, 1000)
505 | }
506 | 
507 | func TestSnapshotUnreliableRecover(t *testing.T) {
508 | 	fmt.Printf("Test: persistence with several clients, failures, and snapshots, unreliable ...\n")
509 | 	GenericTest(t, "snapshotunreliablecrash", 5, true, true, false, 1000)
510 | }
511 | 
512 | func TestSnapshotUnreliableRecoverConcurrentPartition(t *testing.T) {
513 | 	fmt.Printf("Test: persistence with several clients, failures, and snapshots, unreliable and partitions ...\n")
514 | 	GenericTest(t, "snapshotunreliableconcurpartitions", 5, true, true, true, 1000)
515 | }
516 | 


--------------------------------------------------------------------------------
/src/shardkv/test_test.go:
--------------------------------------------------------------------------------
  1 | package shardkv
  2 | 
  3 | import "testing"
  4 | import "strconv"
  5 | import "time"
  6 | import "fmt"
  7 | import "sync/atomic"
  8 | import "math/rand"
  9 | 
 10 | func check(t *testing.T, ck *Clerk, key string, value string) {
 11 | 	v := ck.Get(key)
 12 | 	if v != value {
 13 | 		t.Fatalf("Get(%v): expected:\n%v\nreceived:\n%v", key, value, v)
 14 | 	}
 15 | }
 16 | 
 17 | //
 18 | // test static 2-way sharding, without shard movement.
 19 | //
 20 | func TestStaticShards(t *testing.T) {
 21 | 	fmt.Printf("Test: static shards ...\n")
 22 | 
 23 | 	cfg := makeConfig(t, 3, false, -1)
 24 | 	defer cfg.cleanup()
 25 | 
 26 | 	ck := cfg.makeClient()
 27 | 
 28 | 	cfg.join(0)
 29 | 	cfg.join(1)
 30 | 
 31 | 	n := 10
 32 | 	ka := make([]string, n)
 33 | 	va := make([]string, n)
 34 | 	for i := 0; i < n; i++ {
 35 | 		ka[i] = strconv.Itoa(i) // ensure multiple shards
 36 | 		va[i] = randstring(20)
 37 | 		ck.Put(ka[i], va[i])
 38 | 	}
 39 | 	for i := 0; i < n; i++ {
 40 | 		check(t, ck, ka[i], va[i])
 41 | 	}
 42 | 
 43 | 	// make sure that the data really is sharded by
 44 | 	// shutting down one shard and checking that some
 45 | 	// Get()s don't succeed.
 46 | 	cfg.ShutdownGroup(1)
 47 | 	cfg.checklogs() // forbid snapshots
 48 | 
 49 | 	ch := make(chan bool)
 50 | 	for xi := 0; xi < n; xi++ {
 51 | 		ck1 := cfg.makeClient() // only one call allowed per client
 52 | 		go func(i int) {
 53 | 			defer func() { ch <- true }()
 54 | 			check(t, ck1, ka[i], va[i])
 55 | 		}(xi)
 56 | 	}
 57 | 
 58 | 	// wait a bit, only about half the Gets should succeed.
 59 | 	ndone := 0
 60 | 	done := false
 61 | 	for done == false {
 62 | 		select {
 63 | 		case <-ch:
 64 | 			ndone += 1
 65 | 		case <-time.After(time.Second * 2):
 66 | 			done = true
 67 | 			break
 68 | 		}
 69 | 	}
 70 | 
 71 | 	if ndone != 5 {
 72 | 		t.Fatalf("expected 5 completions with one shard dead; got %v\n", ndone)
 73 | 	}
 74 | 
 75 | 	// bring the crashed shard/group back to life.
 76 | 	cfg.StartGroup(1)
 77 | 	for i := 0; i < n; i++ {
 78 | 		check(t, ck, ka[i], va[i])
 79 | 	}
 80 | 
 81 | 	fmt.Printf("  ... Passed\n")
 82 | }
 83 | 
 84 | func TestJoinLeave(t *testing.T) {
 85 | 	fmt.Printf("Test: join then leave ...\n")
 86 | 
 87 | 	cfg := makeConfig(t, 3, false, -1)
 88 | 	defer cfg.cleanup()
 89 | 
 90 | 	ck := cfg.makeClient()
 91 | 
 92 | 	cfg.join(0)
 93 | 
 94 | 	n := 10
 95 | 	ka := make([]string, n)
 96 | 	va := make([]string, n)
 97 | 	for i := 0; i < n; i++ {
 98 | 		ka[i] = strconv.Itoa(i) // ensure multiple shards
 99 | 		va[i] = randstring(5)
100 | 		ck.Put(ka[i], va[i])
101 | 	}
102 | 	for i := 0; i < n; i++ {
103 | 		check(t, ck, ka[i], va[i])
104 | 	}
105 | 
106 | 	cfg.join(1)
107 | 
108 | 	for i := 0; i < n; i++ {
109 | 		check(t, ck, ka[i], va[i])
110 | 		x := randstring(5)
111 | 		ck.Append(ka[i], x)
112 | 		va[i] += x
113 | 	}
114 | 
115 | 	cfg.leave(0)
116 | 
117 | 	for i := 0; i < n; i++ {
118 | 		check(t, ck, ka[i], va[i])
119 | 		x := randstring(5)
120 | 		ck.Append(ka[i], x)
121 | 		va[i] += x
122 | 	}
123 | 
124 | 	// allow time for shards to transfer.
125 | 	time.Sleep(1 * time.Second)
126 | 
127 | 	cfg.checklogs()
128 | 	cfg.ShutdownGroup(0)
129 | 
130 | 	for i := 0; i < n; i++ {
131 | 		check(t, ck, ka[i], va[i])
132 | 	}
133 | 
134 | 	fmt.Printf("  ... Passed\n")
135 | }
136 | 
137 | func TestSnapshot(t *testing.T) {
138 | 	fmt.Printf("Test: snapshots, join, and leave ...\n")
139 | 
140 | 	cfg := makeConfig(t, 3, false, 1000)
141 | 	defer cfg.cleanup()
142 | 
143 | 	ck := cfg.makeClient()
144 | 
145 | 	cfg.join(0)
146 | 
147 | 	n := 30
148 | 	ka := make([]string, n)
149 | 	va := make([]string, n)
150 | 	for i := 0; i < n; i++ {
151 | 		ka[i] = strconv.Itoa(i) // ensure multiple shards
152 | 		va[i] = randstring(20)
153 | 		ck.Put(ka[i], va[i])
154 | 	}
155 | 	for i := 0; i < n; i++ {
156 | 		check(t, ck, ka[i], va[i])
157 | 	}
158 | 
159 | 	cfg.join(1)
160 | 	cfg.join(2)
161 | 	cfg.leave(0)
162 | 
163 | 	for i := 0; i < n; i++ {
164 | 		check(t, ck, ka[i], va[i])
165 | 		x := randstring(20)
166 | 		ck.Append(ka[i], x)
167 | 		va[i] += x
168 | 	}
169 | 
170 | 	cfg.leave(1)
171 | 	cfg.join(0)
172 | 
173 | 	for i := 0; i < n; i++ {
174 | 		check(t, ck, ka[i], va[i])
175 | 		x := randstring(20)
176 | 		ck.Append(ka[i], x)
177 | 		va[i] += x
178 | 	}
179 | 
180 | 	time.Sleep(1 * time.Second)
181 | 
182 | 	for i := 0; i < n; i++ {
183 | 		check(t, ck, ka[i], va[i])
184 | 	}
185 | 
186 | 	time.Sleep(1 * time.Second)
187 | 
188 | 	cfg.checklogs()
189 | 
190 | 	cfg.ShutdownGroup(0)
191 | 	cfg.ShutdownGroup(1)
192 | 	cfg.ShutdownGroup(2)
193 | 
194 | 	cfg.StartGroup(0)
195 | 	cfg.StartGroup(1)
196 | 	cfg.StartGroup(2)
197 | 
198 | 	for i := 0; i < n; i++ {
199 | 		check(t, ck, ka[i], va[i])
200 | 	}
201 | 
202 | 	fmt.Printf("  ... Passed\n")
203 | }
204 | 
205 | func TestMissChange(t *testing.T) {
206 | 	fmt.Printf("Test: servers miss configuration changes...\n")
207 | 
208 | 	cfg := makeConfig(t, 3, false, 1000)
209 | 	defer cfg.cleanup()
210 | 
211 | 	ck := cfg.makeClient()
212 | 
213 | 	cfg.join(0)
214 | 
215 | 	n := 10
216 | 	ka := make([]string, n)
217 | 	va := make([]string, n)
218 | 	for i := 0; i < n; i++ {
219 | 		ka[i] = strconv.Itoa(i) // ensure multiple shards
220 | 		va[i] = randstring(20)
221 | 		ck.Put(ka[i], va[i])
222 | 	}
223 | 	for i := 0; i < n; i++ {
224 | 		check(t, ck, ka[i], va[i])
225 | 	}
226 | 
227 | 	cfg.join(1)
228 | 
229 | 	cfg.ShutdownServer(0, 0)
230 | 	cfg.ShutdownServer(1, 0)
231 | 	cfg.ShutdownServer(2, 0)
232 | 
233 | 	cfg.join(2)
234 | 	cfg.leave(1)
235 | 	cfg.leave(0)
236 | 
237 | 	for i := 0; i < n; i++ {
238 | 		check(t, ck, ka[i], va[i])
239 | 		x := randstring(20)
240 | 		ck.Append(ka[i], x)
241 | 		va[i] += x
242 | 	}
243 | 
244 | 	cfg.join(1)
245 | 
246 | 	for i := 0; i < n; i++ {
247 | 		check(t, ck, ka[i], va[i])
248 | 		x := randstring(20)
249 | 		ck.Append(ka[i], x)
250 | 		va[i] += x
251 | 	}
252 | 
253 | 	cfg.StartServer(0, 0)
254 | 	cfg.StartServer(1, 0)
255 | 	cfg.StartServer(2, 0)
256 | 
257 | 	for i := 0; i < n; i++ {
258 | 		check(t, ck, ka[i], va[i])
259 | 		x := randstring(20)
260 | 		ck.Append(ka[i], x)
261 | 		va[i] += x
262 | 	}
263 | 
264 | 	time.Sleep(2 * time.Second)
265 | 
266 | 	cfg.ShutdownServer(0, 1)
267 | 	cfg.ShutdownServer(1, 1)
268 | 	cfg.ShutdownServer(2, 1)
269 | 
270 | 	cfg.join(0)
271 | 	cfg.leave(2)
272 | 
273 | 	for i := 0; i < n; i++ {
274 | 		check(t, ck, ka[i], va[i])
275 | 		x := randstring(20)
276 | 		ck.Append(ka[i], x)
277 | 		va[i] += x
278 | 	}
279 | 
280 | 	cfg.StartServer(0, 1)
281 | 	cfg.StartServer(1, 1)
282 | 	cfg.StartServer(2, 1)
283 | 
284 | 	for i := 0; i < n; i++ {
285 | 		check(t, ck, ka[i], va[i])
286 | 	}
287 | 
288 | 	fmt.Printf("  ... Passed\n")
289 | }
290 | 
291 | func TestConcurrent1(t *testing.T) {
292 | 	fmt.Printf("Test: concurrent puts and configuration changes...\n")
293 | 
294 | 	cfg := makeConfig(t, 3, false, 100)
295 | 	defer cfg.cleanup()
296 | 
297 | 	ck := cfg.makeClient()
298 | 
299 | 	cfg.join(0)
300 | 
301 | 	n := 10
302 | 	ka := make([]string, n)
303 | 	va := make([]string, n)
304 | 	for i := 0; i < n; i++ {
305 | 		ka[i] = strconv.Itoa(i) // ensure multiple shards
306 | 		va[i] = randstring(5)
307 | 		ck.Put(ka[i], va[i])
308 | 	}
309 | 
310 | 	var done int32
311 | 	ch := make(chan bool)
312 | 
313 | 	ff := func(i int) {
314 | 		defer func() { ch <- true }()
315 | 		ck1 := cfg.makeClient()
316 | 		for atomic.LoadInt32(&done) == 0 {
317 | 			x := randstring(5)
318 | 			ck1.Append(ka[i], x)
319 | 			va[i] += x
320 | 			time.Sleep(10 * time.Millisecond)
321 | 		}
322 | 	}
323 | 
324 | 	for i := 0; i < n; i++ {
325 | 		go ff(i)
326 | 	}
327 | 
328 | 	time.Sleep(150 * time.Millisecond)
329 | 	cfg.join(1)
330 | 	time.Sleep(500 * time.Millisecond)
331 | 	cfg.join(2)
332 | 	time.Sleep(500 * time.Millisecond)
333 | 	cfg.leave(0)
334 | 
335 | 	cfg.ShutdownGroup(0)
336 | 	time.Sleep(100 * time.Millisecond)
337 | 	cfg.ShutdownGroup(1)
338 | 	time.Sleep(100 * time.Millisecond)
339 | 	cfg.ShutdownGroup(2)
340 | 
341 | 	cfg.leave(2)
342 | 
343 | 	time.Sleep(100 * time.Millisecond)
344 | 	cfg.StartGroup(0)
345 | 	cfg.StartGroup(1)
346 | 	cfg.StartGroup(2)
347 | 
348 | 	time.Sleep(100 * time.Millisecond)
349 | 	cfg.join(0)
350 | 	cfg.leave(1)
351 | 	time.Sleep(500 * time.Millisecond)
352 | 	cfg.join(1)
353 | 
354 | 	time.Sleep(1 * time.Second)
355 | 
356 | 	atomic.StoreInt32(&done, 1)
357 | 	for i := 0; i < n; i++ {
358 | 		<-ch
359 | 	}
360 | 
361 | 	for i := 0; i < n; i++ {
362 | 		check(t, ck, ka[i], va[i])
363 | 	}
364 | 
365 | 	fmt.Printf("  ... Passed\n")
366 | }
367 | 
368 | //
369 | // this tests the various sources from which a re-starting
370 | // group might need to fetch shard contents.
371 | //
372 | func TestConcurrent2(t *testing.T) {
373 | 	fmt.Printf("Test: more concurrent puts and configuration changes...\n")
374 | 
375 | 	cfg := makeConfig(t, 3, false, -1)
376 | 	defer cfg.cleanup()
377 | 
378 | 	ck := cfg.makeClient()
379 | 
380 | 	cfg.join(1)
381 | 	cfg.join(0)
382 | 	cfg.join(2)
383 | 
384 | 	n := 10
385 | 	ka := make([]string, n)
386 | 	va := make([]string, n)
387 | 	for i := 0; i < n; i++ {
388 | 		ka[i] = strconv.Itoa(i) // ensure multiple shards
389 | 		va[i] = randstring(1)
390 | 		ck.Put(ka[i], va[i])
391 | 	}
392 | 
393 | 	var done int32
394 | 	ch := make(chan bool)
395 | 
396 | 	ff := func(i int, ck1 *Clerk) {
397 | 		defer func() { ch <- true }()
398 | 		for atomic.LoadInt32(&done) == 0 {
399 | 			x := randstring(1)
400 | 			ck1.Append(ka[i], x)
401 | 			va[i] += x
402 | 			time.Sleep(50 * time.Millisecond)
403 | 		}
404 | 	}
405 | 
406 | 	for i := 0; i < n; i++ {
407 | 		ck1 := cfg.makeClient()
408 | 		go ff(i, ck1)
409 | 	}
410 | 
411 | 	cfg.leave(0)
412 | 	cfg.leave(2)
413 | 	time.Sleep(3000 * time.Millisecond)
414 | 	cfg.join(0)
415 | 	cfg.join(2)
416 | 	cfg.leave(1)
417 | 	time.Sleep(3000 * time.Millisecond)
418 | 	cfg.join(1)
419 | 	cfg.leave(0)
420 | 	cfg.leave(2)
421 | 	time.Sleep(3000 * time.Millisecond)
422 | 
423 | 	cfg.ShutdownGroup(1)
424 | 	cfg.ShutdownGroup(2)
425 | 	time.Sleep(1000 * time.Millisecond)
426 | 	cfg.StartGroup(1)
427 | 	cfg.StartGroup(2)
428 | 
429 | 	time.Sleep(2 * time.Second)
430 | 
431 | 	atomic.StoreInt32(&done, 1)
432 | 	for i := 0; i < n; i++ {
433 | 		<-ch
434 | 	}
435 | 
436 | 	for i := 0; i < n; i++ {
437 | 		check(t, ck, ka[i], va[i])
438 | 	}
439 | 
440 | 	fmt.Printf("  ... Passed\n")
441 | }
442 | 
443 | func TestUnreliable1(t *testing.T) {
444 | 	fmt.Printf("Test: unreliable 1...\n")
445 | 
446 | 	cfg := makeConfig(t, 3, true, 100)
447 | 	defer cfg.cleanup()
448 | 
449 | 	ck := cfg.makeClient()
450 | 
451 | 	cfg.join(0)
452 | 
453 | 	n := 10
454 | 	ka := make([]string, n)
455 | 	va := make([]string, n)
456 | 	for i := 0; i < n; i++ {
457 | 		ka[i] = strconv.Itoa(i) // ensure multiple shards
458 | 		va[i] = randstring(5)
459 | 		ck.Put(ka[i], va[i])
460 | 	}
461 | 
462 | 	cfg.join(1)
463 | 	cfg.join(2)
464 | 	cfg.leave(0)
465 | 
466 | 	for ii := 0; ii < n*2; ii++ {
467 | 		i := ii % n
468 | 		check(t, ck, ka[i], va[i])
469 | 		x := randstring(5)
470 | 		ck.Append(ka[i], x)
471 | 		va[i] += x
472 | 	}
473 | 
474 | 	cfg.join(0)
475 | 	cfg.leave(1)
476 | 
477 | 	for ii := 0; ii < n*2; ii++ {
478 | 		i := ii % n
479 | 		check(t, ck, ka[i], va[i])
480 | 	}
481 | 
482 | 	fmt.Printf("  ... Passed\n")
483 | }
484 | 
485 | func TestUnreliable2(t *testing.T) {
486 | 	fmt.Printf("Test: unreliable 2...\n")
487 | 
488 | 	cfg := makeConfig(t, 3, true, 100)
489 | 	defer cfg.cleanup()
490 | 
491 | 	ck := cfg.makeClient()
492 | 
493 | 	cfg.join(0)
494 | 
495 | 	n := 10
496 | 	ka := make([]string, n)
497 | 	va := make([]string, n)
498 | 	for i := 0; i < n; i++ {
499 | 		ka[i] = strconv.Itoa(i) // ensure multiple shards
500 | 		va[i] = randstring(5)
501 | 		ck.Put(ka[i], va[i])
502 | 	}
503 | 
504 | 	var done int32
505 | 	ch := make(chan bool)
506 | 
507 | 	ff := func(i int) {
508 | 		defer func() { ch <- true }()
509 | 		ck1 := cfg.makeClient()
510 | 		for atomic.LoadInt32(&done) == 0 {
511 | 			x := randstring(5)
512 | 			ck1.Append(ka[i], x)
513 | 			va[i] += x
514 | 		}
515 | 	}
516 | 
517 | 	for i := 0; i < n; i++ {
518 | 		go ff(i)
519 | 	}
520 | 
521 | 	time.Sleep(150 * time.Millisecond)
522 | 	cfg.join(1)
523 | 	time.Sleep(500 * time.Millisecond)
524 | 	cfg.join(2)
525 | 	time.Sleep(500 * time.Millisecond)
526 | 	cfg.leave(0)
527 | 	time.Sleep(500 * time.Millisecond)
528 | 	cfg.leave(1)
529 | 	time.Sleep(500 * time.Millisecond)
530 | 	cfg.join(1)
531 | 	cfg.join(0)
532 | 
533 | 	time.Sleep(2 * time.Second)
534 | 
535 | 	atomic.StoreInt32(&done, 1)
536 | 	cfg.net.Reliable(true)
537 | 	for i := 0; i < n; i++ {
538 | 		<-ch
539 | 	}
540 | 
541 | 	for i := 0; i < n; i++ {
542 | 		check(t, ck, ka[i], va[i])
543 | 	}
544 | 
545 | 	fmt.Printf("  ... Passed\n")
546 | }
547 | 
548 | //
549 | // optional test to see whether servers are deleting
550 | // shards for which they are no longer responsible.
551 | //
552 | func TestChallenge1Delete(t *testing.T) {
553 | 	fmt.Printf("Test: shard deletion (challenge 1) ...\n")
554 | 
555 | 	// "1" means force snapshot after every log entry.
556 | 	cfg := makeConfig(t, 3, false, 1)
557 | 	defer cfg.cleanup()
558 | 
559 | 	ck := cfg.makeClient()
560 | 
561 | 	cfg.join(0)
562 | 
563 | 	// 30,000 bytes of total values.
564 | 	n := 30
565 | 	ka := make([]string, n)
566 | 	va := make([]string, n)
567 | 	for i := 0; i < n; i++ {
568 | 		ka[i] = strconv.Itoa(i)
569 | 		va[i] = randstring(1000)
570 | 		ck.Put(ka[i], va[i])
571 | 	}
572 | 	for i := 0; i < 3; i++ {
573 | 		check(t, ck, ka[i], va[i])
574 | 	}
575 | 
576 | 	for iters := 0; iters < 2; iters++ {
577 | 		cfg.join(1)
578 | 		cfg.leave(0)
579 | 		cfg.join(2)
580 | 		time.Sleep(3 * time.Second)
581 | 		for i := 0; i < 3; i++ {
582 | 			check(t, ck, ka[i], va[i])
583 | 		}
584 | 		cfg.leave(1)
585 | 		cfg.join(0)
586 | 		cfg.leave(2)
587 | 		time.Sleep(3 * time.Second)
588 | 		for i := 0; i < 3; i++ {
589 | 			check(t, ck, ka[i], va[i])
590 | 		}
591 | 	}
592 | 
593 | 	cfg.join(1)
594 | 	cfg.join(2)
595 | 	time.Sleep(1 * time.Second)
596 | 	for i := 0; i < 3; i++ {
597 | 		check(t, ck, ka[i], va[i])
598 | 	}
599 | 	time.Sleep(1 * time.Second)
600 | 	for i := 0; i < 3; i++ {
601 | 		check(t, ck, ka[i], va[i])
602 | 	}
603 | 	time.Sleep(1 * time.Second)
604 | 	for i := 0; i < 3; i++ {
605 | 		check(t, ck, ka[i], va[i])
606 | 	}
607 | 
608 | 	total := 0
609 | 	for gi := 0; gi < cfg.ngroups; gi++ {
610 | 		for i := 0; i < cfg.n; i++ {
611 | 			raft := cfg.groups[gi].saved[i].RaftStateSize()
612 | 			snap := len(cfg.groups[gi].saved[i].ReadSnapshot())
613 | 			total += raft + snap
614 | 		}
615 | 	}
616 | 
617 | 	// 27 keys should be stored once.
618 | 	// 3 keys should also be stored in client dup tables.
619 | 	// everything on 3 replicas.
620 | 	// plus slop.
621 | 	expected := 3 * (((n - 3) * 1000) + 2*3*1000 + 6000)
622 | 	if total > expected {
623 | 		t.Fatalf("snapshot + persisted Raft state are too big: %v > %v\n", total, expected)
624 | 	}
625 | 
626 | 	for i := 0; i < n; i++ {
627 | 		check(t, ck, ka[i], va[i])
628 | 	}
629 | 
630 | 	fmt.Printf("  ... Passed\n")
631 | }
632 | 
633 | func TestChallenge1Concurrent(t *testing.T) {
634 | 	fmt.Printf("Test: concurrent configuration change and restart (challenge 1)...\n")
635 | 
636 | 	cfg := makeConfig(t, 3, false, 300)
637 | 	defer cfg.cleanup()
638 | 
639 | 	ck := cfg.makeClient()
640 | 
641 | 	cfg.join(0)
642 | 
643 | 	n := 10
644 | 	ka := make([]string, n)
645 | 	va := make([]string, n)
646 | 	for i := 0; i < n; i++ {
647 | 		ka[i] = strconv.Itoa(i)
648 | 		va[i] = randstring(1)
649 | 		ck.Put(ka[i], va[i])
650 | 	}
651 | 
652 | 	var done int32
653 | 	ch := make(chan bool)
654 | 
655 | 	ff := func(i int, ck1 *Clerk) {
656 | 		defer func() { ch <- true }()
657 | 		for atomic.LoadInt32(&done) == 0 {
658 | 			x := randstring(1)
659 | 			ck1.Append(ka[i], x)
660 | 			va[i] += x
661 | 		}
662 | 	}
663 | 
664 | 	for i := 0; i < n; i++ {
665 | 		ck1 := cfg.makeClient()
666 | 		go ff(i, ck1)
667 | 	}
668 | 
669 | 	t0 := time.Now()
670 | 	for time.Since(t0) < 12*time.Second {
671 | 		cfg.join(2)
672 | 		cfg.join(1)
673 | 		time.Sleep(time.Duration(rand.Int()%900) * time.Millisecond)
674 | 		cfg.ShutdownGroup(0)
675 | 		cfg.ShutdownGroup(1)
676 | 		cfg.ShutdownGroup(2)
677 | 		cfg.StartGroup(0)
678 | 		cfg.StartGroup(1)
679 | 		cfg.StartGroup(2)
680 | 
681 | 		time.Sleep(time.Duration(rand.Int()%900) * time.Millisecond)
682 | 		cfg.leave(1)
683 | 		cfg.leave(2)
684 | 		time.Sleep(time.Duration(rand.Int()%900) * time.Millisecond)
685 | 	}
686 | 
687 | 	time.Sleep(2 * time.Second)
688 | 
689 | 	atomic.StoreInt32(&done, 1)
690 | 	for i := 0; i < n; i++ {
691 | 		<-ch
692 | 	}
693 | 
694 | 	for i := 0; i < n; i++ {
695 | 		check(t, ck, ka[i], va[i])
696 | 	}
697 | 
698 | 	fmt.Printf("  ... Passed\n")
699 | }
700 | 
701 | //
702 | // optional test to see whether servers can handle
703 | // shards that are not affected by a config change
704 | // while the config change is underway
705 | //
706 | func TestChallenge2Unaffected(t *testing.T) {
707 | 	fmt.Printf("Test: unaffected shard access (challenge 2) ...\n")
708 | 
709 | 	cfg := makeConfig(t, 3, true, 100)
710 | 	defer cfg.cleanup()
711 | 
712 | 	ck := cfg.makeClient()
713 | 
714 | 	// JOIN 100
715 | 	cfg.join(0)
716 | 
717 | 	// Do a bunch of puts to keys in all shards
718 | 	n := 10
719 | 	ka := make([]string, n)
720 | 	va := make([]string, n)
721 | 	for i := 0; i < n; i++ {
722 | 		ka[i] = strconv.Itoa(i) // ensure multiple shards
723 | 		va[i] = "100"
724 | 		ck.Put(ka[i], va[i])
725 | 	}
726 | 
727 | 	// JOIN 101
728 | 	cfg.join(1)
729 | 
730 | 	// QUERY to find shards now owned by 101
731 | 	c := cfg.mck.Query(-1)
732 | 	owned := make(map[int]bool, n)
733 | 	for s, gid := range c.Shards {
734 | 		owned[s] = gid == cfg.groups[1].gid
735 | 	}
736 | 
737 | 	// Wait for migration to new config to complete, and for clients to
738 | 	// start using this updated config. Gets to any key k such that
739 | 	// owned[shard(k)] == true should now be served by group 101.
740 | 	<-time.After(1 * time.Second)
741 | 	for i := 0; i < n; i++ {
742 | 		if owned[i] {
743 | 			va[i] = "101"
744 | 			ck.Put(ka[i], va[i])
745 | 		}
746 | 	}
747 | 
748 | 	// KILL 100
749 | 	cfg.ShutdownGroup(0)
750 | 
751 | 	// LEAVE 100
752 | 	// 101 doesn't get a chance to migrate things previously owned by 100
753 | 	cfg.leave(0)
754 | 
755 | 	// Wait to make sure clients see new config
756 | 	<-time.After(1 * time.Second)
757 | 
758 | 	// And finally: check that gets/puts for 101-owned keys still complete
759 | 	for i := 0; i < n; i++ {
760 | 		shard := int(ka[i][0]) % 10
761 | 		if owned[shard] {
762 | 			check(t, ck, ka[i], va[i])
763 | 			ck.Put(ka[i], va[i]+"-1")
764 | 			check(t, ck, ka[i], va[i]+"-1")
765 | 		}
766 | 	}
767 | 
768 | 	fmt.Printf("  ... Passed\n")
769 | }
770 | 
771 | //
772 | // optional test to see whether servers can handle operations on shards that
773 | // have been received as a part of a config migration when the entire migration
774 | // has not yet completed.
775 | //
776 | func TestChallenge2Partial(t *testing.T) {
777 | 	fmt.Printf("Test: partial migration shard access (challenge 2) ...\n")
778 | 
779 | 	cfg := makeConfig(t, 3, true, 100)
780 | 	defer cfg.cleanup()
781 | 
782 | 	ck := cfg.makeClient()
783 | 
784 | 	// JOIN 100 + 101 + 102
785 | 	cfg.joinm([]int{0, 1, 2})
786 | 
787 | 	// Give the implementation some time to reconfigure
788 | 	<-time.After(1 * time.Second)
789 | 
790 | 	// Do a bunch of puts to keys in all shards
791 | 	n := 10
792 | 	ka := make([]string, n)
793 | 	va := make([]string, n)
794 | 	for i := 0; i < n; i++ {
795 | 		ka[i] = strconv.Itoa(i) // ensure multiple shards
796 | 		va[i] = "100"
797 | 		ck.Put(ka[i], va[i])
798 | 	}
799 | 
800 | 	// QUERY to find shards owned by 102
801 | 	c := cfg.mck.Query(-1)
802 | 	owned := make(map[int]bool, n)
803 | 	for s, gid := range c.Shards {
804 | 		owned[s] = gid == cfg.groups[2].gid
805 | 	}
806 | 
807 | 	// KILL 100
808 | 	cfg.ShutdownGroup(0)
809 | 
810 | 	// LEAVE 100 + 102
811 | 	// 101 can get old shards from 102, but not from 100. 101 should start
812 | 	// serving shards that used to belong to 102 as soon as possible
813 | 	cfg.leavem([]int{0, 2})
814 | 
815 | 	// Give the implementation some time to start reconfiguration
816 | 	// And to migrate 102 -> 101
817 | 	<-time.After(1 * time.Second)
818 | 
819 | 	// And finally: check that gets/puts for 101-owned keys now complete
820 | 	for i := 0; i < n; i++ {
821 | 		shard := key2shard(ka[i])
822 | 		if owned[shard] {
823 | 			check(t, ck, ka[i], va[i])
824 | 			ck.Put(ka[i], va[i]+"-2")
825 | 			check(t, ck, ka[i], va[i]+"-2")
826 | 		}
827 | 	}
828 | 
829 | 	fmt.Printf("  ... Passed\n")
830 | }
831 | 


--------------------------------------------------------------------------------