├── src
    ├── go.sum
    ├── go.mod
    ├── .gitignore
    ├── shardkv
    │   ├── empty.go
    │   ├── utils.go
    │   ├── monitor.go
    │   ├── commandDef.go
    │   ├── execute.go
    │   ├── shard.go
    │   ├── common.go
    │   ├── commandHandler.go
    │   ├── gc.go
    │   ├── apply.go
    │   ├── snapshot.go
    │   ├── configure.go
    │   ├── migration.go
    │   ├── client.go
    │   ├── server.go
    │   ├── dslogs
    │   └── dstest
    ├── raft
    │   ├── constant.go
    │   ├── snapshotHandler.go
    │   ├── snapshot.go
    │   ├── status.go
    │   ├── election.go
    │   ├── apply.go
    │   ├── ticker.go
    │   ├── persist.go
    │   ├── persister.go
    │   ├── electionHandler.go
    │   ├── log.go
    │   ├── appendEntries.go
    │   ├── appendEntriesHandler.go
    │   ├── interface.go
    │   ├── rpc.go
    │   └── raft.go
    ├── main
    │   ├── viewd.go
    │   ├── pbd.go
    │   ├── test-mr-many.sh
    │   ├── mrcoordinator.go
    │   ├── lockc.go
    │   ├── lockd.go
    │   ├── pbc.go
    │   ├── mrworker.go
    │   ├── diskvd.go
    │   ├── mrsequential.go
    │   └── test-mr.sh
    ├── shardctrler
    │   ├── utils.go
    │   ├── common.go
    │   ├── apply.go
    │   ├── client.go
    │   ├── server.go
    │   ├── configModel.go
    │   └── config.go
    ├── kvraft
    │   ├── kv.go
    │   ├── common.go
    │   ├── snapshot.go
    │   ├── apply.go
    │   ├── client.go
    │   └── server.go
    ├── mrapps
    │   ├── early_exit.go
    │   ├── nocrash.go
    │   ├── jobcount.go
    │   ├── indexer.go
    │   ├── wc.go
    │   ├── crash.go
    │   ├── rtiming.go
    │   └── mtiming.go
    ├── utils
    │   ├── utils.go
    │   ├── dslogs
    │   └── dstest
    ├── mr
    │   ├── rpc.go
    │   ├── coordinator.go
    │   └── worker.go
    ├── porcupine
    │   ├── porcupine.go
    │   ├── bitset.go
    │   ├── model.go
    │   └── checker.go
    ├── models
    │   └── kv.go
    └── labgob
    │   ├── test_test.go
    │   └── labgob.go
├── .gitignore
├── README.md
├── Makefile
└── .check-build


/src/go.sum:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/go.mod:
--------------------------------------------------------------------------------
1 | module 6.824
2 | 
3 | go 1.15
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | pkg/
2 | api.key
3 | .api.key.trimmed
4 | *-handin.tar.gz
5 | *.log
6 | .DS_Store
7 | .idea


--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
 1 | *.*/
 2 | main/mr-tmp/
 3 | mrtmp.*
 4 | 824-mrinput-*.txt
 5 | /main/diff.out
 6 | /mapreduce/x.txt
 7 | /pbservice/x.txt
 8 | /kvpaxos/x.txt
 9 | *.so
10 | /main/mrcoordinator
11 | /main/mrsequential
12 | /main/mrworker
13 | 


--------------------------------------------------------------------------------
/src/shardkv/empty.go:
--------------------------------------------------------------------------------
 1 | package shardkv
 2 | 
 3 | func (kv *ShardKV) checkEntryInCurrentTermAction() {
 4 | 	if !kv.rf.HasLogInCurrentTerm() {
 5 | 		kv.Execute(NewEmptyEntryCommand(), &OpResp{})
 6 | 	}
 7 | }
 8 | 
 9 | func (kv *ShardKV) applyEmptyEntry() *OpResp {
10 | 	return &OpResp{OK, ""}
11 | }
12 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # mit6.824-2021Spring
 2 | 
 3 | 因为分布式系统的bug具有随机性和不确定性，故不能保证绝对bug，已经尽可能的在解决bug了.（逃）
 4 | 
 5 | lab1: 基于linux的文件系统实现MapReduce。（测试100+，无bug）
 6 | 
 7 | lab2: 实现raft算法，包含选举、日志同步、持久化、快照等功能。（测试5000+，无bug）
 8 | 
 9 | lab3: 实现带复制的KV（多个kv保存相同数据）。（测试1000+）
10 | 
11 | lab4: 实现带分片的shardKV（每个group保存不同数据（根据hash将数据横向切分），group内的各kv保存相同数据）。（测试1000+）
12 | 


--------------------------------------------------------------------------------
/src/raft/constant.go:
--------------------------------------------------------------------------------
 1 | package raft
 2 | 
 3 | // election
 4 | const (
 5 | 	// magic number
 6 | 	voted_nil int = -12345
 7 | )
 8 | 
 9 | // appendEntries
10 | const (
11 | 	magic_index int = 0
12 | 	magic_term  int = -1
13 | )
14 | 
15 | // ticker
16 | const (
17 | 	gap_time            int = 3
18 | 	election_base_time  int = 300
19 | 	election_range_time int = 100
20 | 	heartbeat_time      int = 50
21 | )
22 | 


--------------------------------------------------------------------------------
/src/main/viewd.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // see directions in pbc.go
 5 | //
 6 | 
 7 | import "time"
 8 | import "6.824/viewservice"
 9 | import "os"
10 | import "fmt"
11 | 
12 | func main() {
13 | 	if len(os.Args) != 2 {
14 | 		fmt.Printf("Usage: viewd port\n")
15 | 		os.Exit(1)
16 | 	}
17 | 
18 | 	viewservice.StartServer(os.Args[1])
19 | 
20 | 	for {
21 | 		time.Sleep(100 * time.Second)
22 | 	}
23 | }
24 | 


--------------------------------------------------------------------------------
/src/main/pbd.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // see directions in pbc.go
 5 | //
 6 | 
 7 | import "time"
 8 | import "6.824/pbservice"
 9 | import "os"
10 | import "fmt"
11 | 
12 | func main() {
13 | 	if len(os.Args) != 3 {
14 | 		fmt.Printf("Usage: pbd viewport myport\n")
15 | 		os.Exit(1)
16 | 	}
17 | 
18 | 	pbservice.StartServer(os.Args[1], os.Args[2])
19 | 
20 | 	for {
21 | 		time.Sleep(100 * time.Second)
22 | 	}
23 | }
24 | 


--------------------------------------------------------------------------------
/src/main/test-mr-many.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | if [ $# -ne 1 ]; then
 4 |     echo "Usage: $0 numTrials"
 5 |     exit 1
 6 | fi
 7 | 
 8 | trap 'kill -INT -$pid; exit 1' INT
 9 | 
10 | # Note: because the socketID is based on the current userID,
11 | # ./test-mr.sh cannot be run in parallel
12 | runs=$1
13 | chmod +x test-mr.sh
14 | 
15 | for i in $(seq 1 $runs); do
16 |     timeout -k 2s 900s ./test-mr.sh &
17 |     pid=$!
18 |     if ! wait $pid; then
19 |         echo '***' FAILED TESTS IN TRIAL $i
20 |         exit 1
21 |     fi
22 | done
23 | echo '***' PASSED ALL $i TESTING TRIALS
24 | 


--------------------------------------------------------------------------------
/src/main/mrcoordinator.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // start the coordinator process, which is implemented
 5 | // in ../mr/coordinator.go
 6 | //
 7 | // go run mrcoordinator.go pg*.txt
 8 | //
 9 | // Please do not change this file.
10 | //
11 | 
12 | import "6.824/mr"
13 | import "time"
14 | import "os"
15 | import "fmt"
16 | 
17 | func main() {
18 | 	if len(os.Args) < 2 {
19 | 		fmt.Fprintf(os.Stderr, "Usage: mrcoordinator inputfiles...\n")
20 | 		os.Exit(1)
21 | 	}
22 | 
23 | 	m := mr.MakeCoordinator(os.Args[1:], 10)
24 | 	for m.Done() == false {
25 | 		time.Sleep(time.Second)
26 | 	}
27 | 
28 | 	time.Sleep(time.Second)
29 | }
30 | 


--------------------------------------------------------------------------------
/src/main/lockc.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // see comments in lockd.go
 5 | //
 6 | 
 7 | import "6.824/lockservice"
 8 | import "os"
 9 | import "fmt"
10 | 
11 | func usage() {
12 | 	fmt.Printf("Usage: lockc -l|-u primaryport backupport lockname\n")
13 | 	os.Exit(1)
14 | }
15 | 
16 | func main() {
17 | 	if len(os.Args) == 5 {
18 | 		ck := lockservice.MakeClerk(os.Args[2], os.Args[3])
19 | 		var ok bool
20 | 		if os.Args[1] == "-l" {
21 | 			ok = ck.Lock(os.Args[4])
22 | 		} else if os.Args[1] == "-u" {
23 | 			ok = ck.Unlock(os.Args[4])
24 | 		} else {
25 | 			usage()
26 | 		}
27 | 		fmt.Printf("reply: %v\n", ok)
28 | 	} else {
29 | 		usage()
30 | 	}
31 | }
32 | 


--------------------------------------------------------------------------------
/src/main/lockd.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | // export GOPATH=~/6.824
 4 | // go build lockd.go
 5 | // go build lockc.go
 6 | // ./lockd -p a b &
 7 | // ./lockd -b a b &
 8 | // ./lockc -l a b lx
 9 | // ./lockc -u a b lx
10 | //
11 | // on Athena, use /tmp/myname-a and /tmp/myname-b
12 | // instead of a and b.
13 | 
14 | import "time"
15 | import "6.824/lockservice"
16 | import "os"
17 | import "fmt"
18 | 
19 | func main() {
20 | 	if len(os.Args) == 4 && os.Args[1] == "-p" {
21 | 		lockservice.StartServer(os.Args[2], os.Args[3], true)
22 | 	} else if len(os.Args) == 4 && os.Args[1] == "-b" {
23 | 		lockservice.StartServer(os.Args[2], os.Args[3], false)
24 | 	} else {
25 | 		fmt.Printf("Usage: lockd -p|-b primaryport backupport\n")
26 | 		os.Exit(1)
27 | 	}
28 | 	for {
29 | 		time.Sleep(100 * time.Second)
30 | 	}
31 | }
32 | 


--------------------------------------------------------------------------------
/src/shardkv/utils.go:
--------------------------------------------------------------------------------
 1 | package shardkv
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"log"
 6 | 	"time"
 7 | )
 8 | 
 9 | // Debugging
10 | const debug = true
11 | 
12 | type logTopic string
13 | 
14 | const (
15 | 	dClient logTopic = "CLNT"
16 | 	dError  logTopic = "ERRO"
17 | 	dInfo   logTopic = "INFO"
18 | 	dLog    logTopic = "LOG1"
19 | 	dLog2   logTopic = "LOG2"
20 | 	dTest   logTopic = "TEST"
21 | 	dTrace  logTopic = "TRCE"
22 | 	dWarn   logTopic = "WARN"
23 | 	dServer logTopic = "SEVR"
24 | )
25 | 
26 | var debugStart time.Time
27 | 
28 | func init() {
29 | 	debugStart = time.Now()
30 | 
31 | 	log.SetFlags(log.Flags() &^ (log.Ldate | log.Ltime))
32 | }
33 | 
34 | func Debug(topic logTopic, format string, a ...interface{}) {
35 | 	if debug {
36 | 		time := time.Since(debugStart).Microseconds()
37 | 		time /= 100
38 | 		prefix := fmt.Sprintf("%06d %v ", time, string(topic))
39 | 		format = prefix + format
40 | 		log.Printf(format, a...)
41 | 	}
42 | }
43 | 


--------------------------------------------------------------------------------
/src/shardctrler/utils.go:
--------------------------------------------------------------------------------
 1 | package shardctrler
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"log"
 6 | 	"time"
 7 | )
 8 | 
 9 | // Debugging
10 | const debug = false
11 | 
12 | type logTopic string
13 | 
14 | const (
15 | 	dClient logTopic = "CLNT"
16 | 	dError  logTopic = "ERRO"
17 | 	dInfo   logTopic = "INFO"
18 | 	dLog    logTopic = "LOG1"
19 | 	dLog2   logTopic = "LOG2"
20 | 	dTest   logTopic = "TEST"
21 | 	dTrace  logTopic = "TRCE"
22 | 	dWarn   logTopic = "WARN"
23 | 	dServer logTopic = "SEVR"
24 | )
25 | 
26 | var debugStart time.Time
27 | 
28 | func init() {
29 | 	debugStart = time.Now()
30 | 
31 | 	log.SetFlags(log.Flags() &^ (log.Ldate | log.Ltime))
32 | }
33 | 
34 | func Debug(topic logTopic, format string, a ...interface{}) {
35 | 	if debug {
36 | 		time := time.Since(debugStart).Microseconds()
37 | 		time /= 100
38 | 		prefix := fmt.Sprintf("%06d %v ", time, string(topic))
39 | 		format = prefix + format
40 | 		log.Printf(format, a...)
41 | 	}
42 | }
43 | 


--------------------------------------------------------------------------------
/src/shardkv/monitor.go:
--------------------------------------------------------------------------------
 1 | package shardkv
 2 | 
 3 | import "time"
 4 | 
 5 | const (
 6 | 	ConfigureMonitorTimeout        time.Duration = time.Duration(50) * time.Millisecond
 7 | 	MigrationMonitorTimeout        time.Duration = time.Duration(50) * time.Millisecond
 8 | 	GCMonitorTimeout               time.Duration = time.Duration(50) * time.Millisecond
 9 | 	checkEntryInCurrentTermTimeout time.Duration = time.Duration(100) * time.Millisecond
10 | )
11 | 
12 | func (kv *ShardKV) startMonitor() {
13 | 	go kv.monitor(kv.configureAction, ConfigureMonitorTimeout)
14 | 	go kv.monitor(kv.migrationAction, MigrationMonitorTimeout)
15 | 	go kv.monitor(kv.gcAction, GCMonitorTimeout)
16 | 	go kv.monitor(kv.checkEntryInCurrentTermAction, checkEntryInCurrentTermTimeout)
17 | }
18 | 
19 | func (kv *ShardKV) monitor(action func(), timeout time.Duration) {
20 | 	for kv.killed() == false {
21 | 		if _, isLeader := kv.rf.GetState(); isLeader {
22 | 			action()
23 | 		}
24 | 		time.Sleep(timeout)
25 | 	}
26 | }
27 | 


--------------------------------------------------------------------------------
/src/kvraft/kv.go:
--------------------------------------------------------------------------------
 1 | package kvraft
 2 | 
 3 | type KV struct {
 4 | 	Kvmap map[string]string
 5 | }
 6 | 
 7 | func NewKV() *KV {
 8 | 	return &KV{make(map[string]string)}
 9 | }
10 | 
11 | func (kv *KV) Put(key string, value string) Err {
12 | 	kv.Kvmap[key] = value
13 | 	return OK
14 | }
15 | 
16 | func (kv *KV) Append(key string, value string) Err {
17 | 	if value_ori, ok := kv.Kvmap[key]; ok {
18 | 		kv.Kvmap[key] = value_ori + value
19 | 		return OK
20 | 	}
21 | 	kv.Kvmap[key] = value
22 | 	return OK
23 | }
24 | 
25 | func (kv *KV) Get(key string) (string, Err) {
26 | 	if value, ok := kv.Kvmap[key]; ok {
27 | 		return value, OK
28 | 	}
29 | 	return "", ErrNoKey
30 | }
31 | 
32 | func (kv *KVServer) Opt(cmd Op) (string, Err) {
33 | 	switch cmd.OpType {
34 | 	case OpGet:
35 | 		value, err := kv.KvMap.Get(cmd.Key)
36 | 		return value, err
37 | 	case OpPut:
38 | 		err := kv.KvMap.Put(cmd.Key, cmd.Value)
39 | 		return "", err
40 | 	case OpAppend:
41 | 		err := kv.KvMap.Append(cmd.Key, cmd.Value)
42 | 		return "", err
43 | 	default:
44 | 		return "", OK
45 | 	}
46 | }
47 | 


--------------------------------------------------------------------------------
/src/shardkv/commandDef.go:
--------------------------------------------------------------------------------
 1 | package shardkv
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 
 6 | 	"6.824/shardctrler"
 7 | )
 8 | 
 9 | type Command struct {
10 | 	Op   CommandType
11 | 	Data interface{}
12 | }
13 | 
14 | func (command Command) String() string {
15 | 	return fmt.Sprintf("{Type:%v,Data:%v}", command.Op, command.Data)
16 | }
17 | 
18 | func NewOperationCommand(args *CmdArgs) Command {
19 | 	return Command{Operation, *args}
20 | }
21 | 
22 | func NewConfigurationCommand(config *shardctrler.Config) Command {
23 | 	return Command{Configuration, *config}
24 | }
25 | 
26 | func NewInsertShardsCommand(pullReply *PullDataReply) Command {
27 | 	return Command{InsertShards, *pullReply}
28 | }
29 | 
30 | func NewDeleteShardsCommand(pullArgs *PullDataArgs) Command {
31 | 	return Command{DeleteShards, *pullArgs}
32 | }
33 | 
34 | func NewEmptyEntryCommand() Command {
35 | 	return Command{EmptyEntry, nil}
36 | }
37 | 
38 | type CommandType uint8
39 | 
40 | const (
41 | 	Operation CommandType = iota
42 | 	Configuration
43 | 	InsertShards
44 | 	DeleteShards
45 | 	EmptyEntry
46 | )
47 | 


--------------------------------------------------------------------------------
/src/main/pbc.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // pbservice client application
 5 | //
 6 | // export GOPATH=~/6.824
 7 | // go build viewd.go
 8 | // go build pbd.go
 9 | // go build pbc.go
10 | // ./viewd /tmp/rtm-v &
11 | // ./pbd /tmp/rtm-v /tmp/rtm-1 &
12 | // ./pbd /tmp/rtm-v /tmp/rtm-2 &
13 | // ./pbc /tmp/rtm-v key1 value1
14 | // ./pbc /tmp/rtm-v key1
15 | //
16 | // change "rtm" to your user name.
17 | // start the pbd programs in separate windows and kill
18 | // and restart them to exercise fault tolerance.
19 | //
20 | 
21 | import "6.824/pbservice"
22 | import "os"
23 | import "fmt"
24 | 
25 | func usage() {
26 | 	fmt.Printf("Usage: pbc viewport key\n")
27 | 	fmt.Printf("       pbc viewport key value\n")
28 | 	os.Exit(1)
29 | }
30 | 
31 | func main() {
32 | 	if len(os.Args) == 3 {
33 | 		// get
34 | 		ck := pbservice.MakeClerk(os.Args[1], "")
35 | 		v := ck.Get(os.Args[2])
36 | 		fmt.Printf("%v\n", v)
37 | 	} else if len(os.Args) == 4 {
38 | 		// put
39 | 		ck := pbservice.MakeClerk(os.Args[1], "")
40 | 		ck.Put(os.Args[2], os.Args[3])
41 | 	} else {
42 | 		usage()
43 | 	}
44 | }
45 | 


--------------------------------------------------------------------------------
/src/mrapps/early_exit.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // a word-count application "plugin" for MapReduce.
 5 | //
 6 | // go build -buildmode=plugin wc_long.go
 7 | //
 8 | 
 9 | import (
10 | 	"strconv"
11 | 	"strings"
12 | 	"time"
13 | 
14 | 	"6.824/mr"
15 | )
16 | 
17 | //
18 | // The map function is called once for each file of input.
19 | // This map function just returns 1 for each file
20 | //
21 | func Map(filename string, contents string) []mr.KeyValue {
22 | 	kva := []mr.KeyValue{}
23 | 	kva = append(kva, mr.KeyValue{filename, "1"})
24 | 	return kva
25 | }
26 | 
27 | //
28 | // The reduce function is called once for each key generated by the
29 | // map tasks, with a list of all the values created for that key by
30 | // any map task.
31 | //
32 | func Reduce(key string, values []string) string {
33 | 	// some reduce tasks sleep for a long time; potentially seeing if
34 | 	// a worker will accidentally exit early
35 | 	if strings.Contains(key, "sherlock") || strings.Contains(key, "tom") {
36 | 		time.Sleep(time.Duration(3 * time.Second))
37 | 	}
38 | 	// return the number of occurrences of this file.
39 | 	return strconv.Itoa(len(values))
40 | }
41 | 


--------------------------------------------------------------------------------
/src/mrapps/nocrash.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // same as crash.go but doesn't actually crash.
 5 | //
 6 | // go build -buildmode=plugin nocrash.go
 7 | //
 8 | 
 9 | import "6.824/mr"
10 | import crand "crypto/rand"
11 | import "math/big"
12 | import "strings"
13 | import "os"
14 | import "sort"
15 | import "strconv"
16 | 
17 | func maybeCrash() {
18 | 	max := big.NewInt(1000)
19 | 	rr, _ := crand.Int(crand.Reader, max)
20 | 	if false && rr.Int64() < 500 {
21 | 		// crash!
22 | 		os.Exit(1)
23 | 	}
24 | }
25 | 
26 | func Map(filename string, contents string) []mr.KeyValue {
27 | 	maybeCrash()
28 | 
29 | 	kva := []mr.KeyValue{}
30 | 	kva = append(kva, mr.KeyValue{"a", filename})
31 | 	kva = append(kva, mr.KeyValue{"b", strconv.Itoa(len(filename))})
32 | 	kva = append(kva, mr.KeyValue{"c", strconv.Itoa(len(contents))})
33 | 	kva = append(kva, mr.KeyValue{"d", "xyzzy"})
34 | 	return kva
35 | }
36 | 
37 | func Reduce(key string, values []string) string {
38 | 	maybeCrash()
39 | 
40 | 	// sort values to ensure deterministic output.
41 | 	vv := make([]string, len(values))
42 | 	copy(vv, values)
43 | 	sort.Strings(vv)
44 | 
45 | 	val := strings.Join(vv, " ")
46 | 	return val
47 | }
48 | 


--------------------------------------------------------------------------------
/src/raft/snapshotHandler.go:
--------------------------------------------------------------------------------
 1 | package raft
 2 | 
 3 | import "6.824/utils"
 4 | 
 5 | func (rf *Raft) InstallSnapshot(args *InstallSnapshotArgs, reply *InstallSnapshotReply) {
 6 | 	rf.mu.Lock()
 7 | 	defer rf.mu.Unlock()
 8 | 
 9 | 	utils.Debug(utils.DSnap, "S%d S%d installSnapshot", rf.me, args.LeaderId)
10 | 	defer utils.Debug(utils.DSnap, "S%d arg: %+v reply: %+v", rf.me, args, reply)
11 | 
12 | 	if args.Term < rf.currentTerm {
13 | 		reply.Term = rf.currentTerm
14 | 		return
15 | 	}
16 | 
17 | 	if args.Term > rf.currentTerm {
18 | 		rf.currentTerm, rf.votedFor = args.Term, voted_nil
19 | 		rf.persist()
20 | 		rf.TurnTo(follower)
21 | 	}
22 | 
23 | 	if rf.status != follower {
24 | 		rf.TurnTo(follower)
25 | 	}
26 | 
27 | 	reply.Term = rf.currentTerm
28 | 	rf.resetElectionTime()
29 | 
30 | 	if args.LastIncludedIndex <= rf.commitIndex {
31 | 		utils.Debug(utils.DSnap, "S%d args's snapshot too old(%d < %d)", rf.me, args.LastIncludedIndex, rf.commitIndex)
32 | 		return
33 | 	}
34 | 
35 | 	go func() {
36 | 		rf.applyCh <- ApplyMsg{
37 | 			SnapshotValid: true,
38 | 			Snapshot:      args.Data,
39 | 			SnapshotTerm:  args.LastIncludedTerm,
40 | 			SnapshotIndex: args.LastIncludedIndex,
41 | 		}
42 | 	}()
43 | }
44 | 


--------------------------------------------------------------------------------
/src/utils/utils.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"log"
 6 | 	"time"
 7 | )
 8 | 
 9 | // Debugging
10 | const debug = false
11 | 
12 | type logTopic string
13 | 
14 | const (
15 | 	DClient  logTopic = "CLNT"
16 | 	DCommit  logTopic = "CMIT"
17 | 	DDrop    logTopic = "DROP"
18 | 	DError   logTopic = "ERRO"
19 | 	DInfo    logTopic = "INFO"
20 | 	DLeader  logTopic = "LEAD"
21 | 	DLog     logTopic = "LOG1"
22 | 	DLog2    logTopic = "LOG2"
23 | 	DPersist logTopic = "PERS"
24 | 	DSnap    logTopic = "SNAP"
25 | 	DTerm    logTopic = "TERM"
26 | 	dTest    logTopic = "TEST"
27 | 	DTimer   logTopic = "TIMR"
28 | 	DTrace   logTopic = "TRCE"
29 | 	DVote    logTopic = "VOTE"
30 | 	DWarn    logTopic = "WARN"
31 | 	DServer  logTopic = "SEVR"
32 | )
33 | 
34 | var debugStart time.Time
35 | 
36 | func init() {
37 | 	debugStart = time.Now()
38 | 
39 | 	log.SetFlags(log.Flags() &^ (log.Ldate | log.Ltime))
40 | }
41 | 
42 | func Debug(topic logTopic, format string, a ...interface{}) {
43 | 	if debug {
44 | 		time := time.Since(debugStart).Microseconds()
45 | 		time /= 100
46 | 		prefix := fmt.Sprintf("%06d %v ", time, string(topic))
47 | 		format = prefix + format
48 | 		log.Printf(format, a...)
49 | 	}
50 | }
51 | 


--------------------------------------------------------------------------------
/src/mr/rpc.go:
--------------------------------------------------------------------------------
 1 | package mr
 2 | 
 3 | //
 4 | // RPC definitions.
 5 | //
 6 | // remember to capitalize all names.
 7 | //
 8 | 
 9 | import (
10 | 	"os"
11 | 	"strconv"
12 | )
13 | 
14 | //
15 | // example to show how to declare the arguments
16 | // and reply for an RPC.
17 | //
18 | 
19 | type Errno int
20 | 
21 | const (
22 | 	SuccessCode Errno = iota
23 | 	ServiceErrCode
24 | 	ParaErrCode
25 | )
26 | 
27 | type TaskType int
28 | 
29 | const (
30 | 	MAP TaskType = iota
31 | 	REDUCE
32 | 	WAIT
33 | 	STOP
34 | )
35 | 
36 | // Add your RPC definitions here.
37 | type GetTaskArgs struct {
38 | }
39 | 
40 | type GetTaskReply struct {
41 | 	Type      TaskType
42 | 	Filenames []string
43 | 	Task_no   int
44 | 	NReduce   int
45 | 	Err       Errno
46 | }
47 | 
48 | type FinishTaskArgs struct {
49 | 	Type    TaskType
50 | 	Task_no int
51 | }
52 | 
53 | type FinishTaskReply struct {
54 | 	Err Errno
55 | }
56 | 
57 | // Cook up a unique-ish UNIX-domain socket name
58 | // in /var/tmp, for the coordinator.
59 | // Can't use the current directory since
60 | // Athena AFS doesn't support UNIX-domain sockets.
61 | func coordinatorSock() string {
62 | 	s := "/var/tmp/824-mr-"
63 | 	s += strconv.Itoa(os.Getuid())
64 | 	return s
65 | }
66 | 


--------------------------------------------------------------------------------
/src/mrapps/jobcount.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // a MapReduce pseudo-application that counts the number of times map/reduce
 5 | // tasks are run, to test whether jobs are assigned multiple times even when
 6 | // there is no failure.
 7 | //
 8 | // go build -buildmode=plugin crash.go
 9 | //
10 | 
11 | import "6.824/mr"
12 | import "math/rand"
13 | import "strings"
14 | import "strconv"
15 | import "time"
16 | import "fmt"
17 | import "os"
18 | import "io/ioutil"
19 | 
20 | var count int
21 | 
22 | func Map(filename string, contents string) []mr.KeyValue {
23 | 	me := os.Getpid()
24 | 	f := fmt.Sprintf("mr-worker-jobcount-%d-%d", me, count)
25 | 	count++
26 | 	err := ioutil.WriteFile(f, []byte("x"), 0666)
27 | 	if err != nil {
28 | 		panic(err)
29 | 	}
30 | 	time.Sleep(time.Duration(2000+rand.Intn(3000)) * time.Millisecond)
31 | 	return []mr.KeyValue{mr.KeyValue{"a", "x"}}
32 | }
33 | 
34 | func Reduce(key string, values []string) string {
35 | 	files, err := ioutil.ReadDir(".")
36 | 	if err != nil {
37 | 		panic(err)
38 | 	}
39 | 	invocations := 0
40 | 	for _, f := range files {
41 | 		if strings.HasPrefix(f.Name(), "mr-worker-jobcount") {
42 | 			invocations++
43 | 		}
44 | 	}
45 | 	return strconv.Itoa(invocations)
46 | }
47 | 


--------------------------------------------------------------------------------
/src/shardkv/execute.go:
--------------------------------------------------------------------------------
 1 | package shardkv
 2 | 
 3 | import "time"
 4 | 
 5 | func (kv *ShardKV) Execute(cmd Command, reply *OpResp) {
 6 | 	index, term, is_leader := kv.rf.Start(cmd)
 7 | 	if !is_leader {
 8 | 		reply.Value, reply.Err = "", ErrWrongLeader
 9 | 		return
10 | 	}
11 | 
12 | 	kv.mu.Lock()
13 | 	it := IndexAndTerm{index, term}
14 | 	ch := make(chan OpResp, 1)
15 | 	kv.cmdRespChans[it] = ch
16 | 	kv.mu.Unlock()
17 | 
18 | 	defer func() {
19 | 		kv.mu.Lock()
20 | 		// close(kv.cmdRespChans[index])
21 | 		delete(kv.cmdRespChans, it)
22 | 		kv.mu.Unlock()
23 | 		close(ch)
24 | 	}()
25 | 
26 | 	t := time.NewTimer(cmd_timeout)
27 | 	defer t.Stop()
28 | 
29 | 	for {
30 | 		kv.mu.Lock()
31 | 		select {
32 | 		case resp := <-ch:
33 | 			reply.Value, reply.Err = resp.Value, resp.Err
34 | 			kv.mu.Unlock()
35 | 			return
36 | 		case <-t.C:
37 | 		priority:
38 | 			for {
39 | 				select {
40 | 				case resp := <-ch:
41 | 					reply.Value, reply.Err = resp.Value, resp.Err
42 | 					kv.mu.Unlock()
43 | 					return
44 | 				default:
45 | 					break priority
46 | 				}
47 | 			}
48 | 			reply.Value, reply.Err = "", ErrTimeout
49 | 			kv.mu.Unlock()
50 | 			return
51 | 		default:
52 | 			kv.mu.Unlock()
53 | 			time.Sleep(gap_time)
54 | 		}
55 | 	}
56 | }
57 | 


--------------------------------------------------------------------------------
/src/mrapps/indexer.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // an indexing application "plugin" for MapReduce.
 5 | //
 6 | // go build -buildmode=plugin indexer.go
 7 | //
 8 | 
 9 | import "fmt"
10 | import "6.824/mr"
11 | 
12 | import "strings"
13 | import "unicode"
14 | import "sort"
15 | 
16 | // The mapping function is called once for each piece of the input.
17 | // In this framework, the key is the name of the file that is being processed,
18 | // and the value is the file's contents. The return value should be a slice of
19 | // key/value pairs, each represented by a mr.KeyValue.
20 | func Map(document string, value string) (res []mr.KeyValue) {
21 | 	m := make(map[string]bool)
22 | 	words := strings.FieldsFunc(value, func(x rune) bool { return !unicode.IsLetter(x) })
23 | 	for _, w := range words {
24 | 		m[w] = true
25 | 	}
26 | 	for w := range m {
27 | 		kv := mr.KeyValue{w, document}
28 | 		res = append(res, kv)
29 | 	}
30 | 	return
31 | }
32 | 
33 | // The reduce function is called once for each key generated by Map, with a
34 | // list of that key's string value (merged across all inputs). The return value
35 | // should be a single output value for that key.
36 | func Reduce(key string, values []string) string {
37 | 	sort.Strings(values)
38 | 	return fmt.Sprintf("%d %s", len(values), strings.Join(values, ","))
39 | }
40 | 


--------------------------------------------------------------------------------
/src/mrapps/wc.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // a word-count application "plugin" for MapReduce.
 5 | //
 6 | // go build -buildmode=plugin wc.go
 7 | //
 8 | 
 9 | import "6.824/mr"
10 | import "unicode"
11 | import "strings"
12 | import "strconv"
13 | 
14 | //
15 | // The map function is called once for each file of input. The first
16 | // argument is the name of the input file, and the second is the
17 | // file's complete contents. You should ignore the input file name,
18 | // and look only at the contents argument. The return value is a slice
19 | // of key/value pairs.
20 | //
21 | func Map(filename string, contents string) []mr.KeyValue {
22 | 	// function to detect word separators.
23 | 	ff := func(r rune) bool { return !unicode.IsLetter(r) }
24 | 
25 | 	// split contents into an array of words.
26 | 	words := strings.FieldsFunc(contents, ff)
27 | 
28 | 	kva := []mr.KeyValue{}
29 | 	for _, w := range words {
30 | 		kv := mr.KeyValue{w, "1"}
31 | 		kva = append(kva, kv)
32 | 	}
33 | 	return kva
34 | }
35 | 
36 | //
37 | // The reduce function is called once for each key generated by the
38 | // map tasks, with a list of all the values created for that key by
39 | // any map task.
40 | //
41 | func Reduce(key string, values []string) string {
42 | 	// return the number of occurrences of this word.
43 | 	return strconv.Itoa(len(values))
44 | }
45 | 


--------------------------------------------------------------------------------
/src/main/mrworker.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // start a worker process, which is implemented
 5 | // in ../mr/worker.go. typically there will be
 6 | // multiple worker processes, talking to one coordinator.
 7 | //
 8 | // go run mrworker.go wc.so
 9 | //
10 | // Please do not change this file.
11 | //
12 | 
13 | import "6.824/mr"
14 | import "plugin"
15 | import "os"
16 | import "fmt"
17 | import "log"
18 | 
19 | func main() {
20 | 	if len(os.Args) != 2 {
21 | 		fmt.Fprintf(os.Stderr, "Usage: mrworker xxx.so\n")
22 | 		os.Exit(1)
23 | 	}
24 | 
25 | 	mapf, reducef := loadPlugin(os.Args[1])
26 | 
27 | 	mr.Worker(mapf, reducef)
28 | }
29 | 
30 | //
31 | // load the application Map and Reduce functions
32 | // from a plugin file, e.g. ../mrapps/wc.so
33 | //
34 | func loadPlugin(filename string) (func(string, string) []mr.KeyValue, func(string, []string) string) {
35 | 	p, err := plugin.Open(filename)
36 | 	if err != nil {
37 | 		log.Fatalf("cannot load plugin %v", filename)
38 | 	}
39 | 	xmapf, err := p.Lookup("Map")
40 | 	if err != nil {
41 | 		log.Fatalf("cannot find Map in %v", filename)
42 | 	}
43 | 	mapf := xmapf.(func(string, string) []mr.KeyValue)
44 | 	xreducef, err := p.Lookup("Reduce")
45 | 	if err != nil {
46 | 		log.Fatalf("cannot find Reduce in %v", filename)
47 | 	}
48 | 	reducef := xreducef.(func(string, []string) string)
49 | 
50 | 	return mapf, reducef
51 | }
52 | 


--------------------------------------------------------------------------------
/src/shardctrler/common.go:
--------------------------------------------------------------------------------
 1 | package shardctrler
 2 | 
 3 | import "time"
 4 | 
 5 | const (
 6 | 	OK             = "OK"
 7 | 	ErrWrongLeader = "ErrWrongLeader"
 8 | 	ErrOpt         = "ErrOpt"
 9 | 	ErrTimeout     = "ErrTimeout"
10 | )
11 | 
12 | type Err string
13 | 
14 | type OpType string
15 | 
16 | const (
17 | 	OpJoin  OpType = "join"
18 | 	OpLeave OpType = "leave"
19 | 	OpMove  OpType = "move"
20 | 	OpQuery OpType = "query"
21 | )
22 | 
23 | type CommandArgs struct {
24 | 	Op       OpType
25 | 	ClientId int64
26 | 	SeqId    int64
27 | 	Servers  map[int][]string // for Join
28 | 	GIDs     []int            // for Leave
29 | 	Shard    int              // for Move
30 | 	GID      int              // for Move
31 | 	Num      int              // for Query
32 | }
33 | 
34 | type CommandReply struct {
35 | 	Err    Err
36 | 	Config Config
37 | }
38 | 
39 | type Op CommandArgs
40 | 
41 | type OpResp struct {
42 | 	Err    Err
43 | 	Config Config
44 | }
45 | 
46 | type OpContext struct {
47 | 	SeqId int64
48 | 	Reply OpResp
49 | }
50 | 
51 | type IndexAndTerm struct {
52 | 	index int
53 | 	term  int
54 | }
55 | 
56 | const (
57 | 	retry_timeout     time.Duration = time.Duration(1) * time.Millisecond
58 | 	cmd_timeout       time.Duration = time.Duration(2) * time.Second
59 | 	gap_time          time.Duration = time.Duration(5) * time.Millisecond
60 | 	snapshot_gap_time time.Duration = time.Duration(10) * time.Millisecond
61 | )
62 | 


--------------------------------------------------------------------------------
/src/kvraft/common.go:
--------------------------------------------------------------------------------
 1 | package kvraft
 2 | 
 3 | import "time"
 4 | 
 5 | const (
 6 | 	OK             = "OK"
 7 | 	ErrNoKey       = "ErrNoKey"
 8 | 	ErrWrongLeader = "ErrWrongLeader"
 9 | 	ErrTimeout     = "ErrTimeout"
10 | 	ErrTimeoutReq  = "ErrTimeoutReq"
11 | )
12 | 
13 | type Err string
14 | 
15 | // Put or Append
16 | type CmdArgs struct {	
17 | 	OpType OPType
18 | 	Key    string
19 | 	Value  string
20 | 	ClientId int64
21 | 	SeqId    int64
22 | }
23 | 
24 | type CmdReply struct {
25 | 	Err   Err
26 | 	Value string
27 | }
28 | 
29 | type OPType string
30 | 
31 | const (
32 | 	OpGet    OPType = "Get"
33 | 	OpPut    OPType = "Put"
34 | 	OpAppend OPType = "Append"
35 | )
36 | 
37 | type Op struct {
38 | 	// Your definitions here.
39 | 	// Field names must start with capital letters,
40 | 	// otherwise RPC will break.
41 | 	OpType OPType
42 | 	Key    string
43 | 	Value  string
44 | 	ClientId int64
45 | 	SeqId    int64
46 | }
47 | 
48 | type OpResp struct {
49 | 	Err   Err
50 | 	Value string
51 | }
52 | 
53 | type OpContext struct {
54 | 	SeqId int64
55 | 	Reply OpResp
56 | }
57 | 
58 | type IndexAndTerm struct {
59 | 	index int
60 | 	term  int
61 | }
62 | 
63 | const (
64 | 	retry_timeout     time.Duration = time.Duration(1) * time.Millisecond
65 | 	cmd_timeout       time.Duration = time.Duration(2) * time.Second
66 | 	gap_time          time.Duration = time.Duration(5) * time.Millisecond
67 | 	snapshot_gap_time time.Duration = time.Duration(10) * time.Millisecond
68 | )
69 | 


--------------------------------------------------------------------------------
/src/mrapps/crash.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // a MapReduce pseudo-application that sometimes crashes,
 5 | // and sometimes takes a long time,
 6 | // to test MapReduce's ability to recover.
 7 | //
 8 | // go build -buildmode=plugin crash.go
 9 | //
10 | 
11 | import "6.824/mr"
12 | import crand "crypto/rand"
13 | import "math/big"
14 | import "strings"
15 | import "os"
16 | import "sort"
17 | import "strconv"
18 | import "time"
19 | 
20 | func maybeCrash() {
21 | 	max := big.NewInt(1000)
22 | 	rr, _ := crand.Int(crand.Reader, max)
23 | 	if rr.Int64() < 330 {
24 | 		// crash!
25 | 		os.Exit(1)
26 | 	} else if rr.Int64() < 660 {
27 | 		// delay for a while.
28 | 		maxms := big.NewInt(10 * 1000)
29 | 		ms, _ := crand.Int(crand.Reader, maxms)
30 | 		time.Sleep(time.Duration(ms.Int64()) * time.Millisecond)
31 | 	}
32 | }
33 | 
34 | func Map(filename string, contents string) []mr.KeyValue {
35 | 	maybeCrash()
36 | 
37 | 	kva := []mr.KeyValue{}
38 | 	kva = append(kva, mr.KeyValue{"a", filename})
39 | 	kva = append(kva, mr.KeyValue{"b", strconv.Itoa(len(filename))})
40 | 	kva = append(kva, mr.KeyValue{"c", strconv.Itoa(len(contents))})
41 | 	kva = append(kva, mr.KeyValue{"d", "xyzzy"})
42 | 	return kva
43 | }
44 | 
45 | func Reduce(key string, values []string) string {
46 | 	maybeCrash()
47 | 
48 | 	// sort values to ensure deterministic output.
49 | 	vv := make([]string, len(values))
50 | 	copy(vv, values)
51 | 	sort.Strings(vv)
52 | 
53 | 	val := strings.Join(vv, " ")
54 | 	return val
55 | }
56 | 


--------------------------------------------------------------------------------
/src/raft/snapshot.go:
--------------------------------------------------------------------------------
 1 | package raft
 2 | 
 3 | import "6.824/utils"
 4 | 
 5 | func (rf *Raft) doInstallSnapshot(peer int) {
 6 | 	rf.mu.Lock()
 7 | 	if rf.status != leader {
 8 | 		utils.Debug(utils.DWarn, "S%d status change, it is not leader", rf.me)
 9 | 		rf.mu.Unlock()
10 | 		return
11 | 	}
12 | 	args := InstallSnapshotArgs{
13 | 		Term:              rf.currentTerm,
14 | 		LeaderId:          rf.me,
15 | 		LastIncludedIndex: rf.frontLog().Index,
16 | 		LastIncludedTerm:  rf.frontLog().Term,
17 | 	}
18 | 
19 | 	args.Data = make([]byte, rf.persister.SnapshotSize())
20 | 	copy(args.Data, rf.persister.ReadSnapshot())
21 | 	rf.mu.Unlock()
22 | 
23 | 	reply := InstallSnapshotReply{}
24 | 
25 | 	ok := rf.sendInstallSnapshot(peer, &args, &reply)
26 | 	if !ok {
27 | 		return
28 | 	}
29 | 
30 | 	rf.mu.Lock()
31 | 	defer rf.mu.Unlock()
32 | 
33 | 	// status changed or outdue data, ignore
34 | 	if rf.currentTerm != args.Term || rf.status != leader || reply.Term < rf.currentTerm {
35 | 		// overdue, ignore
36 | 		utils.Debug(utils.DInfo, "S%d old response from C%d, ignore it", rf.me, peer)
37 | 		return
38 | 	}
39 | 
40 | 	if reply.Term > rf.currentTerm {
41 | 		utils.Debug(utils.DTerm, "S%d S%d term larger(%d > %d)", rf.me, peer, args.Term, rf.currentTerm)
42 | 		rf.currentTerm, rf.votedFor = reply.Term, voted_nil
43 | 		rf.persist()
44 | 		rf.TurnTo(follower)
45 | 		return
46 | 	}
47 | 
48 | 	rf.nextIndex[peer] = args.LastIncludedIndex + 1
49 | 
50 | 	utils.Debug(utils.DInfo, "S%d send snapshot to C%d success!", rf.me, peer)
51 | }
52 | 


--------------------------------------------------------------------------------
/src/raft/status.go:
--------------------------------------------------------------------------------
 1 | package raft
 2 | 
 3 | import "6.824/utils"
 4 | 
 5 | type ServerStatus string
 6 | 
 7 | const (
 8 | 	follower  ServerStatus = "Follower"
 9 | 	candidate ServerStatus = "Candidate"
10 | 	leader    ServerStatus = "Leader"
11 | )
12 | 
13 | // return currentTerm and whether this server
14 | // believes it is the leader.
15 | func (rf *Raft) GetState() (int, bool) {
16 | 	// Your code here (2A).
17 | 	rf.mu.Lock()
18 | 	defer rf.mu.Unlock()
19 | 	term := rf.currentTerm
20 | 	isleader := (rf.status == leader)
21 | 	return term, isleader
22 | }
23 | 
24 | // without lock
25 | // if have a new goroutine, must lock it !!!
26 | func (rf *Raft) TurnTo(status ServerStatus) {
27 | 	switch status {
28 | 	case follower:
29 | 		rf.status = follower
30 | 		utils.Debug(utils.DTerm, "S%d converting to %v in T(%d)", rf.me, rf.status, rf.currentTerm)
31 | 	case candidate:
32 | 		// • Increment currentTerm
33 | 		rf.currentTerm++
34 | 		// • Vote for self
35 | 		rf.votedFor = rf.me
36 | 		rf.persist()
37 | 		rf.status = candidate
38 | 		utils.Debug(utils.DTerm, "S%d converting to %v in T(%d)", rf.me, rf.status, rf.currentTerm)
39 | 	case leader:
40 | 		rf.status = leader
41 | 		rf.leaderInit()
42 | 		// print before sending heartbeat
43 | 		utils.Debug(utils.DTerm, "S%d converting to %v in T(%d)", rf.me, rf.status, rf.currentTerm)
44 | 		// Upon election: send initial empty AppendEntries RPCs (heartbeat) to each server;
45 | 		// repeat during idle periods to prevent election timeouts (§5.2)
46 | 		rf.doAppendEntries()
47 | 	}
48 | }
49 | 


--------------------------------------------------------------------------------
/src/porcupine/porcupine.go:
--------------------------------------------------------------------------------
 1 | package porcupine
 2 | 
 3 | import "time"
 4 | 
 5 | func CheckOperations(model Model, history []Operation) bool {
 6 | 	res, _ := checkOperations(model, history, false, 0)
 7 | 	return res == Ok
 8 | }
 9 | 
10 | // timeout = 0 means no timeout
11 | // if this operation times out, then a false positive is possible
12 | func CheckOperationsTimeout(model Model, history []Operation, timeout time.Duration) CheckResult {
13 | 	res, _ := checkOperations(model, history, false, timeout)
14 | 	return res
15 | }
16 | 
17 | // timeout = 0 means no timeout
18 | // if this operation times out, then a false positive is possible
19 | func CheckOperationsVerbose(model Model, history []Operation, timeout time.Duration) (CheckResult, linearizationInfo) {
20 | 	return checkOperations(model, history, true, timeout)
21 | }
22 | 
23 | func CheckEvents(model Model, history []Event) bool {
24 | 	res, _ := checkEvents(model, history, false, 0)
25 | 	return res == Ok
26 | }
27 | 
28 | // timeout = 0 means no timeout
29 | // if this operation times out, then a false positive is possible
30 | func CheckEventsTimeout(model Model, history []Event, timeout time.Duration) CheckResult {
31 | 	res, _ := checkEvents(model, history, false, timeout)
32 | 	return res
33 | }
34 | 
35 | // timeout = 0 means no timeout
36 | // if this operation times out, then a false positive is possible
37 | func CheckEventsVerbose(model Model, history []Event, timeout time.Duration) (CheckResult, linearizationInfo) {
38 | 	return checkEvents(model, history, true, timeout)
39 | }
40 | 


--------------------------------------------------------------------------------
/src/raft/election.go:
--------------------------------------------------------------------------------
 1 | package raft
 2 | 
 3 | import "6.824/utils"
 4 | 
 5 | // ticker() call doElection(), ticker() hold lock
 6 | func (rf *Raft) doElection() {
 7 | 	votedcount := 1
 8 | 	entry := rf.lastLog()
 9 | 	args := RequestVoteArgs{
10 | 		Term:         rf.currentTerm,
11 | 		CandidateId:  rf.me,
12 | 		LastLogIndex: entry.Index,
13 | 		LastLogTerm:  entry.Term,
14 | 	}
15 | 
16 | 	for i := 0; i < len(rf.peers); i++ {
17 | 		if i == rf.me {
18 | 			continue
19 | 		}
20 | 
21 | 		go func(i int) {
22 | 			reply := RequestVoteReply{}
23 | 			ok := rf.sendRequestVote(i, &args, &reply)
24 | 			if !ok {
25 | 				return
26 | 			}
27 | 
28 | 			rf.mu.Lock()
29 | 			defer rf.mu.Unlock()
30 | 
31 | 			if rf.currentTerm != args.Term || rf.status != candidate {
32 | 				// election timeout, re-election
33 | 				// ignore it
34 | 				return
35 | 			}
36 | 
37 | 			// If RPC request or response contains term T > currentTerm:
38 | 			// set currentTerm = T, convert to follower (§5.1)
39 | 			if reply.Term > rf.currentTerm {
40 | 				utils.Debug(utils.DTerm, "S%d S%d term larger(%d > %d)", rf.me, i, args.Term, rf.currentTerm)
41 | 				// turn to follower
42 | 				rf.currentTerm, rf.votedFor = reply.Term, voted_nil
43 | 				rf.persist()
44 | 				rf.TurnTo(follower)
45 | 				return
46 | 			}
47 | 
48 | 			if reply.VoteGranted {
49 | 				votedcount++
50 | 				// If votes received from majority of servers: become leader
51 | 				if votedcount > len(rf.peers)/2 && rf.status == candidate {
52 | 					rf.TurnTo(leader)
53 | 				}
54 | 			}
55 | 		}(i)
56 | 	}
57 | }
58 | 


--------------------------------------------------------------------------------
/src/porcupine/bitset.go:
--------------------------------------------------------------------------------
 1 | package porcupine
 2 | 
 3 | import "math/bits"
 4 | 
 5 | type bitset []uint64
 6 | 
 7 | // data layout:
 8 | // bits 0-63 are in data[0], the next are in data[1], etc.
 9 | 
10 | func newBitset(bits uint) bitset {
11 | 	extra := uint(0)
12 | 	if bits%64 != 0 {
13 | 		extra = 1
14 | 	}
15 | 	chunks := bits/64 + extra
16 | 	return bitset(make([]uint64, chunks))
17 | }
18 | 
19 | func (b bitset) clone() bitset {
20 | 	dataCopy := make([]uint64, len(b))
21 | 	copy(dataCopy, b)
22 | 	return bitset(dataCopy)
23 | }
24 | 
25 | func bitsetIndex(pos uint) (uint, uint) {
26 | 	return pos / 64, pos % 64
27 | }
28 | 
29 | func (b bitset) set(pos uint) bitset {
30 | 	major, minor := bitsetIndex(pos)
31 | 	b[major] |= (1 << minor)
32 | 	return b
33 | }
34 | 
35 | func (b bitset) clear(pos uint) bitset {
36 | 	major, minor := bitsetIndex(pos)
37 | 	b[major] &^= (1 << minor)
38 | 	return b
39 | }
40 | 
41 | func (b bitset) get(pos uint) bool {
42 | 	major, minor := bitsetIndex(pos)
43 | 	return b[major]&(1<<minor) != 0
44 | }
45 | 
46 | func (b bitset) popcnt() uint {
47 | 	total := 0
48 | 	for _, v := range b {
49 | 		total += bits.OnesCount64(v)
50 | 	}
51 | 	return uint(total)
52 | }
53 | 
54 | func (b bitset) hash() uint64 {
55 | 	hash := uint64(b.popcnt())
56 | 	for _, v := range b {
57 | 		hash ^= v
58 | 	}
59 | 	return hash
60 | }
61 | 
62 | func (b bitset) equals(b2 bitset) bool {
63 | 	if len(b) != len(b2) {
64 | 		return false
65 | 	}
66 | 	for i := range b {
67 | 		if b[i] != b2[i] {
68 | 			return false
69 | 		}
70 | 	}
71 | 	return true
72 | }
73 | 


--------------------------------------------------------------------------------
/src/shardkv/shard.go:
--------------------------------------------------------------------------------
 1 | package shardkv
 2 | 
 3 | type ShardStatus uint8
 4 | 
 5 | const (
 6 | 	Serving ShardStatus = iota
 7 | 	Pulling
 8 | 	BePulling
 9 | 	GCing
10 | )
11 | 
12 | type Shard struct {
13 | 	KV             map[string]string
14 | 	Status         ShardStatus
15 | 	LastCmdContext map[int64]OpContext
16 | }
17 | 
18 | func NewShard(status ShardStatus) *Shard {
19 | 	return &Shard{make(map[string]string), status, make(map[int64]OpContext)}
20 | }
21 | 
22 | func (shard *Shard) Get(key string) (string, Err) {
23 | 	if value, ok := shard.KV[key]; ok {
24 | 		return value, OK
25 | 	}
26 | 	return "", ErrNoKey
27 | }
28 | 
29 | func (shard *Shard) Put(key, value string) Err {
30 | 	shard.KV[key] = value
31 | 	return OK
32 | }
33 | 
34 | func (shard *Shard) Append(key, value string) Err {
35 | 	shard.KV[key] += value
36 | 	return OK
37 | }
38 | 
39 | func (shard *Shard) deepCopy() *Shard {
40 | 	newShard := NewShard(Serving)
41 | 	for k, v := range shard.KV {
42 | 		newShard.KV[k] = v
43 | 	}
44 | 	for id, context := range shard.LastCmdContext {
45 | 		newShard.LastCmdContext[id] = context
46 | 	}
47 | 	return newShard
48 | }
49 | 
50 | func (kv *ShardKV) Opt(cmd *CmdArgs, shardID int) (string, Err) {
51 | 	shard := kv.shards[shardID]
52 | 
53 | 	switch cmd.OpType {
54 | 	case OpGet:
55 | 		value, err := shard.Get(cmd.Key)
56 | 		return value, err
57 | 	case OpPut:
58 | 		err := shard.Put(cmd.Key, cmd.Value)
59 | 		return "", err
60 | 	case OpAppend:
61 | 		err := shard.Append(cmd.Key, cmd.Value)
62 | 		return "", err
63 | 	default:
64 | 		return "", OK
65 | 	}
66 | }
67 | 


--------------------------------------------------------------------------------
/src/shardkv/common.go:
--------------------------------------------------------------------------------
 1 | package shardkv
 2 | 
 3 | import "time"
 4 | 
 5 | //
 6 | // Sharded key/value server.
 7 | // Lots of replica groups, each running Raft.
 8 | // Shardctrler decides which group serves each shard.
 9 | // Shardctrler may change shard assignment from time to time.
10 | //
11 | // You will have to modify these definitions.
12 | //
13 | 
14 | const (
15 | 	OK             = "OK"
16 | 	ErrNoKey       = "ErrNoKey"
17 | 	ErrWrongGroup  = "ErrWrongGroup"
18 | 	ErrWrongLeader = "ErrWrongLeader"
19 | 	ErrTimeout     = "ErrTimeout"
20 | 	ErrTimeoutReq  = "ErrTimeoutReq"
21 | 	ErrNotReady    = "ErrNotReady"
22 | 	ErrOutDated    = "ErrOutDated"
23 | )
24 | 
25 | type Err string
26 | 
27 | type OPType string
28 | 
29 | const (
30 | 	OpGet    OPType = "Get"
31 | 	OpPut    OPType = "Put"
32 | 	OpAppend OPType = "Append"
33 | )
34 | 
35 | type CmdArgs struct {
36 | 	OpType   OPType
37 | 	Key      string
38 | 	Value    string
39 | 	ClientId int64
40 | 	SeqId    int64
41 | }
42 | 
43 | type CmdReply struct {
44 | 	Err   Err
45 | 	Value string
46 | }
47 | 
48 | type OpResp struct {
49 | 	Err   Err
50 | 	Value string
51 | }
52 | 
53 | type OpContext struct {
54 | 	SeqId int64
55 | 	Reply OpResp
56 | }
57 | 
58 | type IndexAndTerm struct {
59 | 	index int
60 | 	term  int
61 | }
62 | 
63 | const (
64 | 	retry_timeout     time.Duration = time.Duration(1) * time.Millisecond
65 | 	cmd_timeout       time.Duration = time.Duration(2) * time.Second
66 | 	gap_time          time.Duration = time.Duration(5) * time.Millisecond
67 | 	snapshot_gap_time time.Duration = time.Duration(10) * time.Millisecond
68 | )
69 | 


--------------------------------------------------------------------------------
/src/kvraft/snapshot.go:
--------------------------------------------------------------------------------
 1 | package kvraft
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"log"
 6 | 	"time"
 7 | 
 8 | 	"6.824/labgob"
 9 | 	"6.824/utils"
10 | )
11 | 
12 | const threshold float32 = 0.8
13 | const snapshotLogGap int = 3
14 | 
15 | func (kv *KVServer) snapshoter() {
16 | 	for kv.killed() == false {
17 | 		kv.mu.Lock()
18 | 		if kv.isNeedSnapshot() && kv.lastApplied > kv.lastSnapshot + snapshotLogGap {
19 | 			kv.doSnapshot(kv.lastApplied)
20 | 			kv.lastSnapshot = kv.lastApplied
21 | 		}
22 | 		kv.mu.Unlock()
23 | 		time.Sleep(snapshot_gap_time)
24 | 	}
25 | }
26 | 
27 | func (kv *KVServer) isNeedSnapshot() bool {
28 | 	if kv.maxraftstate != -1 && kv.rf.RaftPersistSize() > int(threshold*float32(kv.maxraftstate)) {
29 | 		return true
30 | 	}
31 | 	return false
32 | }
33 | 
34 | func (kv *KVServer) doSnapshot(commandIndex int) {
35 | 	utils.Debug(utils.DServer, "S%d doSnapshot", kv.me)
36 | 	w := new(bytes.Buffer)
37 | 	e := labgob.NewEncoder(w)
38 | 	if e.Encode(*kv.KvMap) != nil ||
39 | 		e.Encode(kv.LastCmdContext) != nil {
40 | 		panic("server doSnapshot encode error")
41 | 	}
42 | 	kv.rf.Snapshot(commandIndex, w.Bytes())
43 | }
44 | 
45 | func (kv *KVServer) setSnapshot(snapshot []byte) {
46 | 	if snapshot == nil || len(snapshot) < 1 { // bootstrap without any state?
47 | 		return
48 | 	}
49 | 
50 | 	utils.Debug(utils.DServer, "S%d setSnapshot", kv.me)
51 | 	r := bytes.NewBuffer(snapshot)
52 | 	d := labgob.NewDecoder(r)
53 | 
54 | 	var kvMap KV
55 | 	var lastCmdContext map[int64]OpContext
56 | 
57 | 	if d.Decode(&kvMap) != nil ||
58 | 		d.Decode(&lastCmdContext) != nil {
59 | 		log.Fatalf("server setSnapshot decode error\n")
60 | 	} else {
61 | 		kv.KvMap = &kvMap
62 | 		kv.LastCmdContext = lastCmdContext
63 | 	}
64 | }
65 | 


--------------------------------------------------------------------------------
/src/raft/apply.go:
--------------------------------------------------------------------------------
 1 | package raft
 2 | 
 3 | import "6.824/utils"
 4 | 
 5 | //
 6 | // as each Raft peer becomes aware that successive log entries are
 7 | // committed, the peer should send an ApplyMsg to the service (or
 8 | // tester) on the same server, via the applyCh passed to Make(). set
 9 | // CommandValid to true to indicate that the ApplyMsg contains a newly
10 | // committed log entry.
11 | //
12 | // in part 2D you'll want to send other kinds of messages (e.g.,
13 | // snapshots) on the applyCh, but set CommandValid to false for these
14 | // other uses.
15 | //
16 | type ApplyMsg struct {
17 | 	CommandValid bool
18 | 	Command      interface{}
19 | 	CommandIndex int
20 | 	CommandTerm  int
21 | 
22 | 	// For 2D:
23 | 	SnapshotValid bool
24 | 	Snapshot      []byte
25 | 	SnapshotTerm  int
26 | 	SnapshotIndex int
27 | }
28 | 
29 | // a new goroutine to run it
30 | func (rf *Raft) applyLog() {
31 | 	for rf.killed() == false {
32 | 		rf.mu.Lock()
33 | 		for rf.lastApplied >= rf.commitIndex {
34 | 			rf.applyCond.Wait()
35 | 		}
36 | 		commitIndex := rf.commitIndex
37 | 		commit, _ := rf.transfer(rf.commitIndex)
38 | 		applied, _ := rf.transfer(rf.lastApplied)
39 | 		entries := make([]Entry, commit-applied)
40 | 		copy(entries, rf.log[applied+1:commit+1])
41 | 		rf.mu.Unlock()
42 | 
43 | 		for _, entry := range entries {
44 | 			rf.applyCh <- ApplyMsg{
45 | 				CommandValid: true,
46 | 				Command:      entry.Cmd,
47 | 				CommandIndex: entry.Index,
48 | 				CommandTerm:  entry.Term,
49 | 			}
50 | 		}
51 | 
52 | 		rf.mu.Lock()
53 | 		utils.Debug(utils.DCommit, "S%d apply %v - %v", rf.me, rf.lastApplied, commitIndex)
54 | 		if commitIndex > rf.lastApplied {
55 | 			rf.lastApplied = commitIndex
56 | 		}
57 | 		rf.mu.Unlock()
58 | 	}
59 | }
60 | 


--------------------------------------------------------------------------------
/src/shardctrler/apply.go:
--------------------------------------------------------------------------------
 1 | package shardctrler
 2 | 
 3 | import (
 4 | 	"time"
 5 | )
 6 | 
 7 | func (sc *ShardCtrler) applier() {
 8 | 	for sc.killed() == false {
 9 | 		select {
10 | 		case msg := <-sc.applyCh:
11 | 			Debug(dServer, "S%d apply msg: %+v", sc.me, msg)
12 | 			if msg.CommandValid {
13 | 				sc.mu.Lock()
14 | 
15 | 				if msg.CommandIndex <= sc.lastApplied {
16 | 					Debug(dWarn, "S%d out time apply(%d <= %d): %+v", sc.me, msg.CommandIndex, sc.lastApplied, msg)
17 | 					sc.mu.Unlock()
18 | 					continue
19 | 				}
20 | 				sc.lastApplied = msg.CommandIndex
21 | 
22 | 				var resp OpResp
23 | 				cmd := msg.Command.(Op)
24 | 
25 | 				if cmd.Op != OpQuery && sc.isDuplicate(cmd.ClientId, cmd.SeqId) {
26 | 					context := sc.LastCmdContext[cmd.ClientId]
27 | 					resp = context.Reply
28 | 				} else {
29 | 					resp.Config, resp.Err = sc.configs.Opt(cmd)
30 | 					sc.LastCmdContext[cmd.ClientId] = OpContext{
31 | 						SeqId: cmd.SeqId,
32 | 						Reply: resp,
33 | 					}
34 | 				}
35 | 
36 | 				term, isLeader := sc.rf.GetState()
37 | 
38 | 				if !isLeader || term != msg.CommandTerm {
39 | 					sc.mu.Unlock()
40 | 					continue
41 | 				}
42 | 
43 | 				it := IndexAndTerm{msg.CommandIndex, term}
44 | 				ch, ok := sc.cmdRespChans[it]
45 | 				if ok {
46 | 					select {
47 | 					case ch <- resp:
48 | 					case <-time.After(10 * time.Millisecond):
49 | 					}
50 | 				}
51 | 
52 | 				sc.mu.Unlock()
53 | 			} else {
54 | 				// ignore
55 | 			}
56 | 		default:
57 | 			time.Sleep(gap_time)
58 | 		}
59 | 	}
60 | }
61 | 
62 | func (sc *ShardCtrler) isDuplicate(clientId int64, seqId int64) bool {
63 | 	context, ok := sc.LastCmdContext[clientId]
64 | 	if !ok {
65 | 		return false
66 | 	}
67 | 	if seqId <= context.SeqId {
68 | 		return true
69 | 	}
70 | 	return false
71 | }
72 | 


--------------------------------------------------------------------------------
/src/shardkv/commandHandler.go:
--------------------------------------------------------------------------------
 1 | package shardkv
 2 | 
 3 | import "6.824/raft"
 4 | 
 5 | // Handler
 6 | func (kv *ShardKV) Command(args *CmdArgs, reply *CmdReply) {
 7 | 	defer Debug(dTrace, "G%+v {S%+v} args: %+v reply: %+v", kv.gid, kv.me, args, reply)
 8 | 
 9 | 	kv.mu.Lock()
10 | 	shardID := key2shard(args.Key)
11 | 	if !kv.canServe(shardID) {
12 | 		Debug(dWarn, "G%+v {S%+v} shard %d is %+v, can't servering(%+v)", kv.gid, kv.me, shardID, kv.shards[shardID], kv.currentConfig.Shards[shardID])
13 | 		reply.Err = ErrWrongGroup
14 | 		kv.mu.Unlock()
15 | 		return
16 | 	}
17 | 	if args.OpType != OpGet && kv.isDuplicate(shardID, args.ClientId, args.SeqId) {
18 | 		context := kv.shards[shardID].LastCmdContext[args.ClientId]
19 | 		reply.Value, reply.Err = context.Reply.Value, context.Reply.Err
20 | 		kv.mu.Unlock()
21 | 		return
22 | 	}
23 | 	kv.mu.Unlock()
24 | 
25 | 	var resp OpResp
26 | 	kv.Execute(NewOperationCommand(args), &resp)
27 | 	reply.Value, reply.Err = resp.Value, resp.Err
28 | }
29 | 
30 | func (kv *ShardKV) canServe(shardID int) bool {
31 | 	return kv.currentConfig.Shards[shardID] == kv.gid && (kv.shards[shardID].Status == Serving || kv.shards[shardID].Status == GCing)
32 | }
33 | 
34 | func (kv *ShardKV) applyOperation(msg *raft.ApplyMsg, cmd *CmdArgs) *OpResp {
35 | 	shardID := key2shard(cmd.Key)
36 | 	if kv.canServe(shardID) {
37 | 		if cmd.OpType != OpGet && kv.isDuplicate(shardID, cmd.ClientId, cmd.SeqId) {
38 | 			context := kv.shards[shardID].LastCmdContext[cmd.ClientId]
39 | 			return &context.Reply
40 | 		} else {
41 | 			var resp OpResp
42 | 			resp.Value, resp.Err = kv.Opt(cmd, shardID)
43 | 			kv.shards[shardID].LastCmdContext[cmd.ClientId] = OpContext{
44 | 				SeqId: cmd.SeqId,
45 | 				Reply: resp,
46 | 			}
47 | 			return &resp
48 | 		}
49 | 	}
50 | 	return &OpResp{ErrWrongGroup, ""}
51 | }
52 | 


--------------------------------------------------------------------------------
/src/shardkv/gc.go:
--------------------------------------------------------------------------------
 1 | package shardkv
 2 | 
 3 | import "sync"
 4 | 
 5 | func (kv *ShardKV) gcAction() {
 6 | 	kv.mu.Lock()
 7 | 	gid2shardIDs := kv.getShardIDsByStatus(GCing, &kv.lastConfig)
 8 | 	var wg sync.WaitGroup
 9 | 	for gid, shardIDs := range gid2shardIDs {
10 | 		wg.Add(1)
11 | 		servers := kv.lastConfig.Groups[gid]
12 | 		go func(servers []string, configNum int, shardIDs []int) {
13 | 			defer wg.Done()
14 | 			args := PullDataArgs{configNum, shardIDs}
15 | 			for _, server := range servers {
16 | 				var reply PullDataReply
17 | 				srv := kv.makeEnd(server)
18 | 				if srv.Call("ShardKV.DeleteShardsData", &args, &reply) && reply.Err == OK {
19 | 					kv.Execute(NewDeleteShardsCommand(&args), &OpResp{})
20 | 				}
21 | 			}
22 | 		}(servers, kv.currentConfig.Num, shardIDs)
23 | 	}
24 | 	kv.mu.Unlock()
25 | 	wg.Wait()
26 | }
27 | 
28 | func (kv *ShardKV) DeleteShardsData(args *PullDataArgs, reply *PullDataReply) {
29 | 	// only delete shards when role is leader
30 | 	if _, isLeader := kv.rf.GetState(); !isLeader {
31 | 		reply.Err = ErrWrongLeader
32 | 		return
33 | 	}
34 | 
35 | 	kv.mu.Lock()
36 | 	if kv.currentConfig.Num > args.ConfNum {
37 | 		reply.Err = OK
38 | 		kv.mu.Unlock()
39 | 		return
40 | 	}
41 | 	kv.mu.Unlock()
42 | 
43 | 	var resp OpResp
44 | 	kv.Execute(NewDeleteShardsCommand(args), &resp)
45 | 
46 | 	reply.Err = resp.Err
47 | }
48 | 
49 | func (kv *ShardKV) applyDeleteShards(shardsInfo *PullDataArgs) *OpResp {
50 | 	if shardsInfo.ConfNum == kv.currentConfig.Num {
51 | 		for _, shardId := range shardsInfo.ShardIds {
52 | 			shard := kv.shards[shardId]
53 | 			if shard.Status == GCing {
54 | 				shard.Status = Serving
55 | 			} else if shard.Status == BePulling {
56 | 				kv.shards[shardId] = NewShard(Serving)
57 | 			} else {
58 | 				break
59 | 			}
60 | 		}
61 | 		return &OpResp{OK, ""}
62 | 	}
63 | 	return &OpResp{OK, ""}
64 | }
65 | 


--------------------------------------------------------------------------------
/src/models/kv.go:
--------------------------------------------------------------------------------
 1 | package models
 2 | 
 3 | import "6.824/porcupine"
 4 | import "fmt"
 5 | import "sort"
 6 | 
 7 | type KvInput struct {
 8 | 	Op    uint8 // 0 => get, 1 => put, 2 => append
 9 | 	Key   string
10 | 	Value string
11 | }
12 | 
13 | type KvOutput struct {
14 | 	Value string
15 | }
16 | 
17 | var KvModel = porcupine.Model{
18 | 	Partition: func(history []porcupine.Operation) [][]porcupine.Operation {
19 | 		m := make(map[string][]porcupine.Operation)
20 | 		for _, v := range history {
21 | 			key := v.Input.(KvInput).Key
22 | 			m[key] = append(m[key], v)
23 | 		}
24 | 		keys := make([]string, 0, len(m))
25 | 		for k := range m {
26 | 			keys = append(keys, k)
27 | 		}
28 | 		sort.Strings(keys)
29 | 		ret := make([][]porcupine.Operation, 0, len(keys))
30 | 		for _, k := range keys {
31 | 			ret = append(ret, m[k])
32 | 		}
33 | 		return ret
34 | 	},
35 | 	Init: func() interface{} {
36 | 		// note: we are modeling a single key's value here;
37 | 		// we're partitioning by key, so this is okay
38 | 		return ""
39 | 	},
40 | 	Step: func(state, input, output interface{}) (bool, interface{}) {
41 | 		inp := input.(KvInput)
42 | 		out := output.(KvOutput)
43 | 		st := state.(string)
44 | 		if inp.Op == 0 {
45 | 			// get
46 | 			return out.Value == st, state
47 | 		} else if inp.Op == 1 {
48 | 			// put
49 | 			return true, inp.Value
50 | 		} else {
51 | 			// append
52 | 			return true, (st + inp.Value)
53 | 		}
54 | 	},
55 | 	DescribeOperation: func(input, output interface{}) string {
56 | 		inp := input.(KvInput)
57 | 		out := output.(KvOutput)
58 | 		switch inp.Op {
59 | 		case 0:
60 | 			return fmt.Sprintf("get('%s') -> '%s'", inp.Key, out.Value)
61 | 		case 1:
62 | 			return fmt.Sprintf("put('%s', '%s')", inp.Key, inp.Value)
63 | 		case 2:
64 | 			return fmt.Sprintf("append('%s', '%s')", inp.Key, inp.Value)
65 | 		default:
66 | 			return "<invalid>"
67 | 		}
68 | 	},
69 | }
70 | 


--------------------------------------------------------------------------------
/src/raft/ticker.go:
--------------------------------------------------------------------------------
 1 | package raft
 2 | 
 3 | import (
 4 | 	"math/rand"
 5 | 	"time"
 6 | 
 7 | 	"6.824/utils"
 8 | )
 9 | 
10 | func (rf *Raft) electionTimeout() bool {
11 | 	return time.Now().After(rf.electionTime)
12 | }
13 | 
14 | func (rf *Raft) heartbeatTimeout() bool {
15 | 	return time.Now().After(rf.heartbeatTime)
16 | }
17 | 
18 | func (rf *Raft) resetElectionTime() {
19 | 	sleep_time := rand.Intn(election_range_time) + election_base_time
20 | 	rf.electionTime = time.Now().Add(time.Duration(sleep_time) * time.Millisecond)
21 | }
22 | 
23 | func (rf *Raft) resetHeartbeatTime() {
24 | 	rf.heartbeatTime = time.Now().Add(time.Duration(heartbeat_time) * time.Millisecond)
25 | }
26 | 
27 | // The ticker go routine starts a new election if this peer hasn't received
28 | // heartsbeats recently.
29 | func (rf *Raft) ticker() {
30 | 	for rf.killed() == false {
31 | 		// Your code here to check if a leader election should
32 | 		// be started and to randomize sleeping time using
33 | 		// time.Sleep().
34 | 		rf.mu.Lock()
35 | 		switch rf.status {
36 | 		case follower:
37 | 			if rf.electionTimeout() {
38 | 				rf.TurnTo(candidate)
39 | 				utils.Debug(utils.DTimer, "S%d Election timeout, Start election, T%d", rf.me, rf.currentTerm)
40 | 				rf.doElection()
41 | 				rf.resetElectionTime()
42 | 			}
43 | 		case candidate:
44 | 			if rf.electionTimeout() {
45 | 				rf.TurnTo(candidate)
46 | 				utils.Debug(utils.DTimer, "S%d Election timeout, re-start election, T%d", rf.me, rf.currentTerm)
47 | 				rf.doElection()
48 | 				rf.resetElectionTime()
49 | 			}
50 | 		case leader:
51 | 			if rf.heartbeatTimeout() {
52 | 				utils.Debug(utils.DTimer, "S%d Heartbeat timeout, send heartbeat boardcast, T%d", rf.me, rf.currentTerm)
53 | 				rf.doAppendEntries()
54 | 				rf.resetHeartbeatTime()
55 | 			}
56 | 		}
57 | 		rf.mu.Unlock()
58 | 		time.Sleep(time.Duration(gap_time) * time.Millisecond)
59 | 	}
60 | }
61 | 


--------------------------------------------------------------------------------
/src/raft/persist.go:
--------------------------------------------------------------------------------
 1 | package raft
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 
 6 | 	"6.824/labgob"
 7 | 	"6.824/utils"
 8 | )
 9 | 
10 | func (rf *Raft) RaftPersistSize() int {
11 | 	rf.mu.Lock()
12 | 	defer rf.mu.Unlock()
13 | 	return rf.persister.RaftStateSize()
14 | }
15 | 
16 | func (rf *Raft) raftState() []byte {
17 | 	w := new(bytes.Buffer)
18 | 	e := labgob.NewEncoder(w)
19 | 
20 | 	if e.Encode(rf.log) != nil ||
21 | 		e.Encode(rf.currentTerm) != nil ||
22 | 		e.Encode(rf.votedFor) != nil {
23 | 		utils.Debug(utils.DError, "S%d encode fail", rf.me)
24 | 		panic("encode fail")
25 | 	}
26 | 	data := w.Bytes()
27 | 	return data
28 | }
29 | 
30 | //
31 | // save Raft's persistent state to stable storage,
32 | // where it can later be retrieved after a crash and restart.
33 | // see paper's Figure 2 for a description of what should be persistent.
34 | //
35 | func (rf *Raft) persist() {
36 | 	// Your code here (2C).
37 | 	rf.persister.SaveRaftState(rf.raftState())
38 | }
39 | 
40 | //
41 | // restore previously persisted state.
42 | //
43 | func (rf *Raft) readPersist(data []byte) {
44 | 	if data == nil || len(data) < 1 { // bootstrap without any state?
45 | 		return
46 | 	}
47 | 	// Your code here (2C).
48 | 	r := bytes.NewBuffer(data)
49 | 	d := labgob.NewDecoder(r)
50 | 
51 | 	var log []Entry
52 | 	var currentTerm, votedFor int
53 | 
54 | 	if d.Decode(&log) != nil ||
55 | 		d.Decode(&currentTerm) != nil ||
56 | 		d.Decode(&votedFor) != nil {
57 | 		utils.Debug(utils.DError, "S%d decode fail", rf.me)
58 | 		panic("encode fail")
59 | 	}
60 | 
61 | 	// log at least is 1
62 | 	rf.log = make([]Entry, len(log))
63 | 	copy(rf.log, log)
64 | 	rf.lastApplied = rf.frontLogIndex()
65 | 	rf.commitIndex = rf.frontLogIndex()
66 | 	rf.currentTerm = currentTerm
67 | 	rf.votedFor = votedFor
68 | }
69 | 
70 | func (rf *Raft) persistSnapshot(snapshot []byte) {
71 | 	rf.persister.SaveStateAndSnapshot(rf.raftState(), snapshot)
72 | }
73 | 


--------------------------------------------------------------------------------
/src/raft/persister.go:
--------------------------------------------------------------------------------
 1 | package raft
 2 | 
 3 | //
 4 | // support for Raft and kvraft to save persistent
 5 | // Raft state (log &c) and k/v server snapshots.
 6 | //
 7 | // we will use the original persister.go to test your code for grading.
 8 | // so, while you can modify this code to help you debug, please
 9 | // test with the original before submitting.
10 | //
11 | 
12 | import "sync"
13 | 
14 | type Persister struct {
15 | 	mu        sync.Mutex
16 | 	raftstate []byte
17 | 	snapshot  []byte
18 | }
19 | 
20 | func MakePersister() *Persister {
21 | 	return &Persister{}
22 | }
23 | 
24 | func clone(orig []byte) []byte {
25 | 	x := make([]byte, len(orig))
26 | 	copy(x, orig)
27 | 	return x
28 | }
29 | 
30 | func (ps *Persister) Copy() *Persister {
31 | 	ps.mu.Lock()
32 | 	defer ps.mu.Unlock()
33 | 	np := MakePersister()
34 | 	np.raftstate = ps.raftstate
35 | 	np.snapshot = ps.snapshot
36 | 	return np
37 | }
38 | 
39 | func (ps *Persister) SaveRaftState(state []byte) {
40 | 	ps.mu.Lock()
41 | 	defer ps.mu.Unlock()
42 | 	ps.raftstate = clone(state)
43 | }
44 | 
45 | func (ps *Persister) ReadRaftState() []byte {
46 | 	ps.mu.Lock()
47 | 	defer ps.mu.Unlock()
48 | 	return clone(ps.raftstate)
49 | }
50 | 
51 | func (ps *Persister) RaftStateSize() int {
52 | 	ps.mu.Lock()
53 | 	defer ps.mu.Unlock()
54 | 	return len(ps.raftstate)
55 | }
56 | 
57 | // Save both Raft state and K/V snapshot as a single atomic action,
58 | // to help avoid them getting out of sync.
59 | func (ps *Persister) SaveStateAndSnapshot(state []byte, snapshot []byte) {
60 | 	ps.mu.Lock()
61 | 	defer ps.mu.Unlock()
62 | 	ps.raftstate = clone(state)
63 | 	ps.snapshot = clone(snapshot)
64 | }
65 | 
66 | func (ps *Persister) ReadSnapshot() []byte {
67 | 	ps.mu.Lock()
68 | 	defer ps.mu.Unlock()
69 | 	return clone(ps.snapshot)
70 | }
71 | 
72 | func (ps *Persister) SnapshotSize() int {
73 | 	ps.mu.Lock()
74 | 	defer ps.mu.Unlock()
75 | 	return len(ps.snapshot)
76 | }
77 | 


--------------------------------------------------------------------------------
/src/shardctrler/client.go:
--------------------------------------------------------------------------------
 1 | package shardctrler
 2 | 
 3 | //
 4 | // Shardctrler clerk.
 5 | //
 6 | 
 7 | import (
 8 | 	"crypto/rand"
 9 | 	"math/big"
10 | 	"time"
11 | 
12 | 	"6.824/labrpc"
13 | )
14 | 
15 | type Clerk struct {
16 | 	servers []*labrpc.ClientEnd
17 | 	// Your data here.
18 | 	leaderId int
19 | 	clientId int64
20 | 	seqId    int64
21 | }
22 | 
23 | func nrand() int64 {
24 | 	max := big.NewInt(int64(1) << 62)
25 | 	bigx, _ := rand.Int(rand.Reader, max)
26 | 	x := bigx.Int64()
27 | 	return x
28 | }
29 | 
30 | func MakeClerk(servers []*labrpc.ClientEnd) *Clerk {
31 | 	ck := new(Clerk)
32 | 	ck.servers = servers
33 | 	// Your code here.
34 | 	ck.clientId = nrand()
35 | 	ck.seqId = 0
36 | 	ck.leaderId = 0
37 | 	return ck
38 | }
39 | 
40 | func (ck *Clerk) sendCmd(args CommandArgs) CommandReply {
41 | 	ck.seqId += 1
42 | 	args.SeqId = ck.seqId
43 | 	args.ClientId = ck.clientId
44 | 
45 | 	for {
46 | 		reply := CommandReply{}
47 | 
48 | 		ok := ck.servers[ck.leaderId].Call("ShardCtrler.Command", &args, &reply)
49 | 
50 | 		if !ok {
51 | 			ck.leaderId = (ck.leaderId + 1) % len(ck.servers)
52 | 			time.Sleep(retry_timeout)
53 | 			continue
54 | 		}
55 | 
56 | 		if reply.Err == OK {
57 | 			return reply
58 | 		}
59 | 
60 | 		ck.leaderId = (ck.leaderId + 1) % len(ck.servers)
61 | 		time.Sleep(retry_timeout)
62 | 	}
63 | }
64 | 
65 | func (ck *Clerk) Query(num int) Config {
66 | 	args := CommandArgs{
67 | 		Op:  OpQuery,
68 | 		Num: num,
69 | 	}
70 | 	reply := ck.sendCmd(args)
71 | 	return reply.Config
72 | }
73 | 
74 | func (ck *Clerk) Join(servers map[int][]string) {
75 | 	args := CommandArgs{
76 | 		Op:      OpJoin,
77 | 		Servers: servers,
78 | 	}
79 | 	// reply := ck.sendCmd(args)
80 | 	ck.sendCmd(args)
81 | }
82 | 
83 | func (ck *Clerk) Leave(gids []int) {
84 | 	args := CommandArgs{
85 | 		Op:   OpLeave,
86 | 		GIDs: gids,
87 | 	}
88 | 	ck.sendCmd(args)
89 | }
90 | 
91 | func (ck *Clerk) Move(shard int, gid int) {
92 | 	args := CommandArgs{
93 | 		Op:    OpMove,
94 | 		Shard: shard,
95 | 		GID:   gid,
96 | 	}
97 | 	ck.sendCmd(args)
98 | }
99 | 


--------------------------------------------------------------------------------
/src/kvraft/apply.go:
--------------------------------------------------------------------------------
 1 | package kvraft
 2 | 
 3 | import "6.824/utils"
 4 | import "time"
 5 | 
 6 | func (kv *KVServer) applier() {
 7 | 	for kv.killed() == false {
 8 | 		select {
 9 | 		case msg := <-kv.applyCh:
10 | 			utils.Debug(utils.DServer, "S%d apply msg: %+v", kv.me, msg)
11 | 			if msg.SnapshotValid {
12 | 				kv.mu.Lock()
13 | 				if kv.rf.CondInstallSnapshot(msg.SnapshotTerm, msg.SnapshotIndex, msg.Snapshot) {
14 | 					kv.setSnapshot(msg.Snapshot)
15 | 					kv.lastApplied = msg.SnapshotIndex
16 | 				}
17 | 				kv.mu.Unlock()
18 | 			} else if msg.CommandValid {
19 | 
20 | 				kv.mu.Lock()
21 | 
22 | 				if msg.CommandIndex <= kv.lastApplied {
23 | 					utils.Debug(utils.DWarn, "S%d out time apply(%d <= %d): %+v", kv.me, msg.CommandIndex, kv.lastApplied, msg)
24 | 					kv.mu.Unlock()
25 | 					continue
26 | 				}
27 | 				kv.lastApplied = msg.CommandIndex
28 | 
29 | 				var resp OpResp
30 | 				cmd := msg.Command.(Op)
31 | 
32 | 				if cmd.OpType != OpGet && kv.isDuplicate(cmd.ClientId, cmd.SeqId) {
33 | 					context := kv.LastCmdContext[cmd.ClientId]
34 | 					resp = context.Reply
35 | 				} else {
36 | 					resp.Value, resp.Err = kv.Opt(cmd)
37 | 					kv.LastCmdContext[cmd.ClientId] = OpContext{
38 | 						SeqId: cmd.SeqId,
39 | 						Reply: resp,
40 | 					}
41 | 				}
42 | 
43 | 				term, isLeader := kv.rf.GetState()
44 | 
45 | 				if !isLeader || term != msg.CommandTerm {
46 | 					kv.mu.Unlock()
47 | 					continue
48 | 				}
49 | 
50 | 				it := IndexAndTerm{msg.CommandIndex, term}
51 | 				ch, ok := kv.cmdRespChans[it]
52 | 				if ok {
53 | 					select {
54 | 					case ch <- resp:
55 | 					case <- time.After(10 * time.Millisecond):
56 | 					}
57 | 				}
58 | 
59 | 				kv.mu.Unlock()
60 | 			} else {
61 | 				// ignore
62 | 			}
63 | 		default:
64 | 			time.Sleep(gap_time)
65 | 		}
66 | 	}
67 | }
68 | 
69 | func (kv *KVServer) isDuplicate(clientId int64, seqId int64) bool {
70 | 	context, ok := kv.LastCmdContext[clientId]
71 | 	if !ok {
72 | 		return false
73 | 	}
74 | 	if seqId <= context.SeqId {
75 | 		return true
76 | 	}
77 | 	return false
78 | }
79 | 


--------------------------------------------------------------------------------
/src/raft/electionHandler.go:
--------------------------------------------------------------------------------
 1 | package raft
 2 | 
 3 | import "6.824/utils"
 4 | 
 5 | // handler need to require lock
 6 | func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) {
 7 | 	// Your code here (2A, 2B).
 8 | 	// fmt.Printf("vote request: term %d;  %d request to be voted\n", args.Term, args.CandidateId)
 9 | 	rf.mu.Lock()
10 | 	defer rf.mu.Unlock()
11 | 
12 | 	utils.Debug(utils.DVote, "S%d C%d asking vote", rf.me, args.CandidateId)
13 | 
14 | 	defer rf.persist()
15 | 
16 | 	if args.Term < rf.currentTerm { // ignore
17 | 		reply.VoteGranted = false
18 | 		reply.Term = rf.currentTerm
19 | 		utils.Debug(utils.DVote, "S%d Term is higher than C%d, refuse it", rf.me, args.CandidateId)
20 | 		return
21 | 	}
22 | 
23 | 	if args.Term > rf.currentTerm {
24 | 		// If RPC request or response contains term T > currentTerm:
25 | 		// set currentTerm = T, convert to follower (§5.1)
26 | 		rf.currentTerm, rf.votedFor = args.Term, voted_nil
27 | 		utils.Debug(utils.DVote, "S%d Term is lower than C%d, turn to follower && reset voted_for", rf.me, args.CandidateId)
28 | 		rf.TurnTo(follower)
29 | 		// can vote now
30 | 	}
31 | 
32 | 	if rf.votedFor == voted_nil || rf.votedFor == args.CandidateId { // haven't voted
33 | 		// log judge
34 | 		if !rf.isUpToDate(args.LastLogIndex, args.LastLogTerm) {
35 | 			reply.VoteGranted, reply.Term = false, rf.currentTerm
36 | 			utils.Debug(utils.DVote, "S%d C%d not up-to-date, refuse it{arg:%+v, index:%d term:%d}", rf.me, args.CandidateId, args, rf.lastLogIndex(), rf.lastLog().Term)
37 | 			return
38 | 		}
39 | 
40 | 		rf.votedFor = args.CandidateId
41 | 		reply.VoteGranted = true
42 | 		reply.Term = rf.currentTerm
43 | 		//  prevent election timeouts (§5.2)
44 | 		utils.Debug(utils.DVote, "S%d Granting Vote to S%d at T%d", rf.me, rf.votedFor, rf.currentTerm)
45 | 		rf.resetElectionTime()
46 | 		return
47 | 	}
48 | 
49 | 	// have voted
50 | 	reply.VoteGranted = false
51 | 	reply.Term = rf.currentTerm
52 | 	utils.Debug(utils.DVote, "S%d Have voted to S%d at T%d, refuse S%d", rf.me, rf.votedFor, rf.currentTerm, args.CandidateId)
53 | 	return
54 | }
55 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # This is the Makefile helping you submit the labs.
 2 | # Just create 6.824/api.key with your API key in it,
 3 | # and submit your lab with the following command:
 4 | #     $ make [lab1|lab2a|lab2b|lab2c|lab2d|lab3a|lab3b|lab4a|lab4b]
 5 | 
 6 | LABS=" lab1 lab2a lab2b lab2c lab2d lab3a lab3b lab4a lab4b "
 7 | 
 8 | %: check-%
 9 | 	@echo "Preparing $@-handin.tar.gz"
10 | 	@if echo $(LABS) | grep -q " $@ " ; then \
11 | 		echo "Tarring up your submission..." ; \
12 | 		COPYFILE_DISABLE=1 tar cvzf $@-handin.tar.gz \
13 | 			"--exclude=src/main/pg-*.txt" \
14 | 			"--exclude=src/main/diskvd" \
15 | 			"--exclude=src/mapreduce/824-mrinput-*.txt" \
16 | 			"--exclude=src/main/mr-*" \
17 | 			"--exclude=mrtmp.*" \
18 | 			"--exclude=src/main/diff.out" \
19 | 			"--exclude=src/main/mrmaster" \
20 | 			"--exclude=src/main/mrsequential" \
21 | 			"--exclude=src/main/mrworker" \
22 | 			"--exclude=*.so" \
23 | 			Makefile src; \
24 | 		if ! test -e api.key ; then \
25 | 			echo "Missing $(PWD)/api.key. Please create the file with your key in it or submit the $@-handin.tar.gz via the web interface."; \
26 | 		else \
27 | 			echo "Are you sure you want to submit $@? Enter 'yes' to continue:"; \
28 | 			read line; \
29 | 			if test "$$line" != "yes" ; then echo "Giving up submission"; exit; fi; \
30 | 			if test `stat -c "%s" "$@-handin.tar.gz" 2>/dev/null || stat -f "%z" "$@-handin.tar.gz"` -ge 20971520 ; then echo "File exceeds 20MB."; exit; fi; \
31 | 			cat api.key | tr -d '\n' > .api.key.trimmed ; \
32 | 			curl --silent --fail --show-error -F file=@$@-handin.tar.gz -F "key=<.api.key.trimmed" \
33 | 			https://6824.scripts.mit.edu/2021/handin.py/upload > /dev/null || { \
34 | 				echo ; \
35 | 				echo "Submit seems to have failed."; \
36 | 				echo "Please upload the tarball manually on the submission website."; } \
37 | 		fi; \
38 | 	else \
39 | 		echo "Bad target $@. Usage: make [$(LABS)]"; \
40 | 	fi
41 | 
42 | .PHONY: check-%
43 | check-%:
44 | 	@echo "Checking that your submission builds correctly..."
45 | 	@./.check-build git://g.csail.mit.edu/6.824-golabs-2021 $(patsubst check-%,%,$@)
46 | 


--------------------------------------------------------------------------------
/src/shardkv/apply.go:
--------------------------------------------------------------------------------
 1 | package shardkv
 2 | 
 3 | import (
 4 | 	"time"
 5 | 
 6 | 	"6.824/shardctrler"
 7 | )
 8 | 
 9 | func (kv *ShardKV) isDuplicate(shardId int, clientId int64, seqId int64) bool {
10 | 	context, ok := kv.shards[shardId].LastCmdContext[clientId]
11 | 	if !ok {
12 | 		return false
13 | 	}
14 | 	if seqId <= context.SeqId {
15 | 		return true
16 | 	}
17 | 	return false
18 | }
19 | 
20 | func (kv *ShardKV) applier() {
21 | 	for kv.killed() == false {
22 | 		select {
23 | 		case msg := <-kv.applyCh:
24 | 			if msg.SnapshotValid {
25 | 				kv.mu.Lock()
26 | 				if kv.rf.CondInstallSnapshot(msg.SnapshotTerm, msg.SnapshotIndex, msg.Snapshot) {
27 | 					kv.setSnapshot(msg.Snapshot)
28 | 					kv.lastApplied = msg.SnapshotIndex
29 | 				}
30 | 				kv.mu.Unlock()
31 | 			} else if msg.CommandValid {
32 | 
33 | 				kv.mu.Lock()
34 | 
35 | 				if msg.CommandIndex <= kv.lastApplied {
36 | 					kv.mu.Unlock()
37 | 					continue
38 | 				}
39 | 				kv.lastApplied = msg.CommandIndex
40 | 
41 | 				var resp OpResp
42 | 				command := msg.Command.(Command)
43 | 				switch command.Op {
44 | 				case Operation:
45 | 					cmd := command.Data.(CmdArgs)
46 | 					resp = *kv.applyOperation(&msg, &cmd)
47 | 				case Configuration:
48 | 					nextConfig := command.Data.(shardctrler.Config)
49 | 					resp = *kv.applyConfiguration(&nextConfig)
50 | 				case InsertShards:
51 | 					insertResp := command.Data.(PullDataReply)
52 | 					resp = *kv.applyInsertShards(&insertResp)
53 | 				case DeleteShards:
54 | 					deleteResp := command.Data.(PullDataArgs)
55 | 					resp = *kv.applyDeleteShards(&deleteResp)
56 | 				}
57 | 
58 | 				term, isLeader := kv.rf.GetState()
59 | 
60 | 				if !isLeader || term != msg.CommandTerm {
61 | 					kv.mu.Unlock()
62 | 					continue
63 | 				}
64 | 
65 | 				it := IndexAndTerm{msg.CommandIndex, term}
66 | 				ch, ok := kv.cmdRespChans[it]
67 | 				if ok {
68 | 					select {
69 | 					case ch <- resp:
70 | 					case <-time.After(10 * time.Millisecond):
71 | 					}
72 | 				}
73 | 
74 | 				kv.mu.Unlock()
75 | 			} else {
76 | 				// ignore
77 | 			}
78 | 		default:
79 | 			time.Sleep(gap_time)
80 | 		}
81 | 	}
82 | }
83 | 


--------------------------------------------------------------------------------
/src/shardkv/snapshot.go:
--------------------------------------------------------------------------------
 1 | package shardkv
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"fmt"
 6 | 	"log"
 7 | 	"time"
 8 | 
 9 | 	"6.824/labgob"
10 | 	"6.824/shardctrler"
11 | )
12 | 
13 | const threshold float32 = 0.8
14 | const snapshotLogGap int = 10
15 | 
16 | func (kv *ShardKV) snapshoter() {
17 | 	for kv.killed() == false {
18 | 		kv.mu.Lock()
19 | 		if kv.isNeedSnapshot() {
20 | 			kv.doSnapshot(kv.lastApplied)
21 | 			kv.lastSnapshot = kv.lastApplied
22 | 		}
23 | 		kv.mu.Unlock()
24 | 		time.Sleep(snapshot_gap_time)
25 | 	}
26 | }
27 | 
28 | func (kv *ShardKV) isNeedSnapshot() bool {
29 | 	for _, shard := range kv.shards {
30 | 		if shard.Status == BePulling {
31 | 			return false
32 | 		}
33 | 	}
34 | 
35 | 	if kv.maxraftstate != -1 {
36 | 		if kv.rf.RaftPersistSize() > int(threshold*float32(kv.maxraftstate)) ||
37 | 			kv.lastApplied > kv.lastSnapshot+snapshotLogGap {
38 | 			return true
39 | 		}
40 | 	}
41 | 	return false
42 | }
43 | 
44 | func (kv *ShardKV) doSnapshot(commandIndex int) {
45 | 	w := new(bytes.Buffer)
46 | 	e := labgob.NewEncoder(w)
47 | 	if e.Encode(kv.shards) != nil ||
48 | 		e.Encode(kv.lastConfig) != nil ||
49 | 		e.Encode(kv.currentConfig) != nil {
50 | 		panic("server doSnapshot encode error")
51 | 	}
52 | 	kv.rf.Snapshot(commandIndex, w.Bytes())
53 | }
54 | 
55 | func (kv *ShardKV) setSnapshot(snapshot []byte) {
56 | 	if snapshot == nil || len(snapshot) < 1 { // bootstrap without any state?
57 | 		return
58 | 	}
59 | 
60 | 	r := bytes.NewBuffer(snapshot)
61 | 	d := labgob.NewDecoder(r)
62 | 
63 | 	var shards map[int]*Shard
64 | 	var lastconfig, currentConfig shardctrler.Config
65 | 
66 | 	if d.Decode(&shards) != nil ||
67 | 		d.Decode(&lastconfig) != nil ||
68 | 		d.Decode(&currentConfig) != nil {
69 | 		log.Fatalf("server setSnapshot decode error\n")
70 | 	} else {
71 | 		var str string
72 | 		for shardID, shard := range shards {
73 | 			desc := fmt.Sprintf("[%d : %+v]\n ", shardID, shard)
74 | 			str += desc
75 | 		}
76 | 		Debug(dWarn, "G%+v {S%+v} snapshot read: %+v", kv.gid, kv.me, str)
77 | 		kv.shards = shards
78 | 		kv.lastConfig = lastconfig
79 | 		kv.currentConfig = currentConfig
80 | 	}
81 | }
82 | 


--------------------------------------------------------------------------------
/src/main/diskvd.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // start a diskvd server. it's a member of some replica
 5 | // group, which has other members, and it needs to know
 6 | // how to talk to the members of the shardmaster service.
 7 | // used by ../diskv/test_test.go
 8 | //
 9 | // arguments:
10 | //   -g groupid
11 | //   -m masterport1 -m masterport2 ...
12 | //   -s replicaport1 -s replicaport2 ...
13 | //   -i my-index-in-server-port-list
14 | //   -u unreliable
15 | //   -d directory
16 | //   -r restart
17 | 
18 | import "time"
19 | import "6.824/diskv"
20 | import "os"
21 | import "fmt"
22 | import "strconv"
23 | import "runtime"
24 | 
25 | func usage() {
26 | 	fmt.Printf("Usage: diskvd -g gid -m master... -s server... -i my-index -d dir\n")
27 | 	os.Exit(1)
28 | }
29 | 
30 | func main() {
31 | 	var gid int64 = -1     // my replica group ID
32 | 	masters := []string{}  // ports of shardmasters
33 | 	replicas := []string{} // ports of servers in my replica group
34 | 	me := -1               // my index in replicas[]
35 | 	unreliable := false
36 | 	dir := "" // store persistent data here
37 | 	restart := false
38 | 
39 | 	for i := 1; i+1 < len(os.Args); i += 2 {
40 | 		a0 := os.Args[i]
41 | 		a1 := os.Args[i+1]
42 | 		if a0 == "-g" {
43 | 			gid, _ = strconv.ParseInt(a1, 10, 64)
44 | 		} else if a0 == "-m" {
45 | 			masters = append(masters, a1)
46 | 		} else if a0 == "-s" {
47 | 			replicas = append(replicas, a1)
48 | 		} else if a0 == "-i" {
49 | 			me, _ = strconv.Atoi(a1)
50 | 		} else if a0 == "-u" {
51 | 			unreliable, _ = strconv.ParseBool(a1)
52 | 		} else if a0 == "-d" {
53 | 			dir = a1
54 | 		} else if a0 == "-r" {
55 | 			restart, _ = strconv.ParseBool(a1)
56 | 		} else {
57 | 			usage()
58 | 		}
59 | 	}
60 | 
61 | 	if gid < 0 || me < 0 || len(masters) < 1 || me >= len(replicas) || dir == "" {
62 | 		usage()
63 | 	}
64 | 
65 | 	runtime.GOMAXPROCS(4)
66 | 
67 | 	srv := diskv.StartServer(gid, masters, replicas, me, dir, restart)
68 | 	srv.Setunreliable(unreliable)
69 | 
70 | 	// for safety, force quit after 10 minutes.
71 | 	time.Sleep(10 * 60 * time.Second)
72 | 	mep, _ := os.FindProcess(os.Getpid())
73 | 	mep.Kill()
74 | }
75 | 


--------------------------------------------------------------------------------
/src/mrapps/rtiming.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // a MapReduce pseudo-application to test that workers
 5 | // execute reduce tasks in parallel.
 6 | //
 7 | // go build -buildmode=plugin rtiming.go
 8 | //
 9 | 
10 | import "6.824/mr"
11 | import "fmt"
12 | import "os"
13 | import "syscall"
14 | import "time"
15 | import "io/ioutil"
16 | 
17 | func nparallel(phase string) int {
18 | 	// create a file so that other workers will see that
19 | 	// we're running at the same time as them.
20 | 	pid := os.Getpid()
21 | 	myfilename := fmt.Sprintf("mr-worker-%s-%d", phase, pid)
22 | 	err := ioutil.WriteFile(myfilename, []byte("x"), 0666)
23 | 	if err != nil {
24 | 		panic(err)
25 | 	}
26 | 
27 | 	// are any other workers running?
28 | 	// find their PIDs by scanning directory for mr-worker-XXX files.
29 | 	dd, err := os.Open(".")
30 | 	if err != nil {
31 | 		panic(err)
32 | 	}
33 | 	names, err := dd.Readdirnames(1000000)
34 | 	if err != nil {
35 | 		panic(err)
36 | 	}
37 | 	ret := 0
38 | 	for _, name := range names {
39 | 		var xpid int
40 | 		pat := fmt.Sprintf("mr-worker-%s-%%d", phase)
41 | 		n, err := fmt.Sscanf(name, pat, &xpid)
42 | 		if n == 1 && err == nil {
43 | 			err := syscall.Kill(xpid, 0)
44 | 			if err == nil {
45 | 				// if err == nil, xpid is alive.
46 | 				ret += 1
47 | 			}
48 | 		}
49 | 	}
50 | 	dd.Close()
51 | 
52 | 	time.Sleep(1 * time.Second)
53 | 
54 | 	err = os.Remove(myfilename)
55 | 	if err != nil {
56 | 		panic(err)
57 | 	}
58 | 
59 | 	return ret
60 | }
61 | 
62 | func Map(filename string, contents string) []mr.KeyValue {
63 | 
64 | 	kva := []mr.KeyValue{}
65 | 	kva = append(kva, mr.KeyValue{"a", "1"})
66 | 	kva = append(kva, mr.KeyValue{"b", "1"})
67 | 	kva = append(kva, mr.KeyValue{"c", "1"})
68 | 	kva = append(kva, mr.KeyValue{"d", "1"})
69 | 	kva = append(kva, mr.KeyValue{"e", "1"})
70 | 	kva = append(kva, mr.KeyValue{"f", "1"})
71 | 	kva = append(kva, mr.KeyValue{"g", "1"})
72 | 	kva = append(kva, mr.KeyValue{"h", "1"})
73 | 	kva = append(kva, mr.KeyValue{"i", "1"})
74 | 	kva = append(kva, mr.KeyValue{"j", "1"})
75 | 	return kva
76 | }
77 | 
78 | func Reduce(key string, values []string) string {
79 | 	n := nparallel("reduce")
80 | 
81 | 	val := fmt.Sprintf("%d", n)
82 | 
83 | 	return val
84 | }
85 | 


--------------------------------------------------------------------------------
/src/mrapps/mtiming.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // a MapReduce pseudo-application to test that workers
 5 | // execute map tasks in parallel.
 6 | //
 7 | // go build -buildmode=plugin mtiming.go
 8 | //
 9 | 
10 | import "6.824/mr"
11 | import "strings"
12 | import "fmt"
13 | import "os"
14 | import "syscall"
15 | import "time"
16 | import "sort"
17 | import "io/ioutil"
18 | 
19 | func nparallel(phase string) int {
20 | 	// create a file so that other workers will see that
21 | 	// we're running at the same time as them.
22 | 	pid := os.Getpid()
23 | 	myfilename := fmt.Sprintf("mr-worker-%s-%d", phase, pid)
24 | 	err := ioutil.WriteFile(myfilename, []byte("x"), 0666)
25 | 	if err != nil {
26 | 		panic(err)
27 | 	}
28 | 
29 | 	// are any other workers running?
30 | 	// find their PIDs by scanning directory for mr-worker-XXX files.
31 | 	dd, err := os.Open(".")
32 | 	if err != nil {
33 | 		panic(err)
34 | 	}
35 | 	names, err := dd.Readdirnames(1000000)
36 | 	if err != nil {
37 | 		panic(err)
38 | 	}
39 | 	ret := 0
40 | 	for _, name := range names {
41 | 		var xpid int
42 | 		pat := fmt.Sprintf("mr-worker-%s-%%d", phase)
43 | 		n, err := fmt.Sscanf(name, pat, &xpid)
44 | 		if n == 1 && err == nil {
45 | 			err := syscall.Kill(xpid, 0)
46 | 			if err == nil {
47 | 				// if err == nil, xpid is alive.
48 | 				ret += 1
49 | 			}
50 | 		}
51 | 	}
52 | 	dd.Close()
53 | 
54 | 	time.Sleep(1 * time.Second)
55 | 
56 | 	err = os.Remove(myfilename)
57 | 	if err != nil {
58 | 		panic(err)
59 | 	}
60 | 
61 | 	return ret
62 | }
63 | 
64 | func Map(filename string, contents string) []mr.KeyValue {
65 | 	t0 := time.Now()
66 | 	ts := float64(t0.Unix()) + (float64(t0.Nanosecond()) / 1000000000.0)
67 | 	pid := os.Getpid()
68 | 
69 | 	n := nparallel("map")
70 | 
71 | 	kva := []mr.KeyValue{}
72 | 	kva = append(kva, mr.KeyValue{
73 | 		fmt.Sprintf("times-%v", pid),
74 | 		fmt.Sprintf("%.1f", ts)})
75 | 	kva = append(kva, mr.KeyValue{
76 | 		fmt.Sprintf("parallel-%v", pid),
77 | 		fmt.Sprintf("%d", n)})
78 | 	return kva
79 | }
80 | 
81 | func Reduce(key string, values []string) string {
82 | 	//n := nparallel("reduce")
83 | 
84 | 	// sort values to ensure deterministic output.
85 | 	vv := make([]string, len(values))
86 | 	copy(vv, values)
87 | 	sort.Strings(vv)
88 | 
89 | 	val := strings.Join(vv, " ")
90 | 	return val
91 | }
92 | 


--------------------------------------------------------------------------------
/src/kvraft/client.go:
--------------------------------------------------------------------------------
 1 | package kvraft
 2 | 
 3 | import (
 4 | 	"crypto/rand"
 5 | 	"math/big"
 6 | 	"time"
 7 | 
 8 | 	"6.824/labrpc"
 9 | )
10 | 
11 | type Clerk struct {
12 | 	servers []*labrpc.ClientEnd
13 | 	// You will have to modify this struct.
14 | 	leaderId int
15 | 	seqId    int64
16 | 	clientId int64
17 | }
18 | 
19 | func nrand() int64 {
20 | 	max := big.NewInt(int64(1) << 62)
21 | 	bigx, _ := rand.Int(rand.Reader, max)
22 | 	x := bigx.Int64()
23 | 	return x
24 | }
25 | 
26 | func MakeClerk(servers []*labrpc.ClientEnd) *Clerk {
27 | 	ck := new(Clerk)
28 | 	ck.servers = servers
29 | 	// You'll have to add code here.
30 | 	ck.leaderId = 0
31 | 	ck.clientId = nrand()
32 | 	ck.seqId = 0
33 | 	return ck
34 | }
35 | 
36 | //
37 | // fetch the current value for a key.
38 | // returns "" if the key does not exist.
39 | // keeps trying forever in the face of all other errors.
40 | //
41 | // you can send an RPC with code like this:
42 | // ok := ck.servers[i].Call("KVServer.Get", &args, &reply)
43 | //
44 | // the types of args and reply (including whether they are pointers)
45 | // must match the declared types of the RPC handler function's
46 | // arguments. and reply must be passed as a pointer.
47 | //
48 | func (ck *Clerk) sendCmd(key string, value string, OpType OPType) string {
49 | 	ck.seqId += 1
50 | 	args := CmdArgs{
51 | 		SeqId:    ck.seqId,
52 | 		ClientId: ck.clientId,
53 | 		Key:      key,
54 | 		Value:    value,
55 | 		OpType:   OpType,
56 | 	}
57 | 
58 | 	t0 := time.Now()
59 | 	for time.Since(t0).Seconds() < 10 {
60 | 		reply := CmdReply{}
61 | 
62 | 		ok := ck.servers[ck.leaderId].Call("KVServer.Command", &args, &reply)
63 | 
64 | 		if !ok {
65 | 			ck.leaderId = (ck.leaderId + 1) % len(ck.servers)
66 | 			time.Sleep(retry_timeout)
67 | 			continue
68 | 		}
69 | 
70 | 		if reply.Err == OK {
71 | 			return reply.Value
72 | 		} else if reply.Err == ErrNoKey {
73 | 			return ""
74 | 		}
75 | 
76 | 		ck.leaderId = (ck.leaderId + 1) % len(ck.servers)
77 | 		time.Sleep(retry_timeout)
78 | 	}
79 | 	panic("10s not reply")
80 | 	return ""
81 | }
82 | 
83 | func (ck *Clerk) Get(key string) string {
84 | 	return ck.sendCmd(key, "", OpGet)
85 | }
86 | 
87 | func (ck *Clerk) Put(key string, value string) {
88 | 	ck.sendCmd(key, value, OpPut)
89 | }
90 | 
91 | func (ck *Clerk) Append(key string, value string) {
92 | 	ck.sendCmd(key, value, OpAppend)
93 | }
94 | 


--------------------------------------------------------------------------------
/src/porcupine/model.go:
--------------------------------------------------------------------------------
 1 | package porcupine
 2 | 
 3 | import "fmt"
 4 | 
 5 | type Operation struct {
 6 | 	ClientId int // optional, unless you want a visualization; zero-indexed
 7 | 	Input    interface{}
 8 | 	Call     int64 // invocation time
 9 | 	Output   interface{}
10 | 	Return   int64 // response time
11 | }
12 | 
13 | type EventKind bool
14 | 
15 | const (
16 | 	CallEvent   EventKind = false
17 | 	ReturnEvent EventKind = true
18 | )
19 | 
20 | type Event struct {
21 | 	ClientId int // optional, unless you want a visualization; zero-indexed
22 | 	Kind     EventKind
23 | 	Value    interface{}
24 | 	Id       int
25 | }
26 | 
27 | type Model struct {
28 | 	// Partition functions, such that a history is linearizable if and only
29 | 	// if each partition is linearizable. If you don't want to implement
30 | 	// this, you can always use the `NoPartition` functions implemented
31 | 	// below.
32 | 	Partition      func(history []Operation) [][]Operation
33 | 	PartitionEvent func(history []Event) [][]Event
34 | 	// Initial state of the system.
35 | 	Init func() interface{}
36 | 	// Step function for the system. Returns whether or not the system
37 | 	// could take this step with the given inputs and outputs and also
38 | 	// returns the new state. This should not mutate the existing state.
39 | 	Step func(state interface{}, input interface{}, output interface{}) (bool, interface{})
40 | 	// Equality on states. If you are using a simple data type for states,
41 | 	// you can use the `ShallowEqual` function implemented below.
42 | 	Equal func(state1, state2 interface{}) bool
43 | 	// For visualization, describe an operation as a string.
44 | 	// For example, "Get('x') -> 'y'".
45 | 	DescribeOperation func(input interface{}, output interface{}) string
46 | 	// For visualization purposes, describe a state as a string.
47 | 	// For example, "{'x' -> 'y', 'z' -> 'w'}"
48 | 	DescribeState func(state interface{}) string
49 | }
50 | 
51 | func NoPartition(history []Operation) [][]Operation {
52 | 	return [][]Operation{history}
53 | }
54 | 
55 | func NoPartitionEvent(history []Event) [][]Event {
56 | 	return [][]Event{history}
57 | }
58 | 
59 | func ShallowEqual(state1, state2 interface{}) bool {
60 | 	return state1 == state2
61 | }
62 | 
63 | func DefaultDescribeOperation(input interface{}, output interface{}) string {
64 | 	return fmt.Sprintf("%v -> %v", input, output)
65 | }
66 | 
67 | func DefaultDescribeState(state interface{}) string {
68 | 	return fmt.Sprintf("%v", state)
69 | }
70 | 
71 | type CheckResult string
72 | 
73 | const (
74 | 	Unknown CheckResult = "Unknown" // timed out
75 | 	Ok                  = "Ok"
76 | 	Illegal             = "Illegal"
77 | )
78 | 


--------------------------------------------------------------------------------
/src/shardkv/configure.go:
--------------------------------------------------------------------------------
 1 | package shardkv
 2 | 
 3 | import (
 4 | 	"6.824/shardctrler"
 5 | )
 6 | 
 7 | func (kv *ShardKV) configureAction() {
 8 | 	canPerformNextConfig := true
 9 | 	kv.mu.Lock()
10 | 	for _, shard := range kv.shards {
11 | 		if shard.Status != Serving {
12 | 			Debug(dWarn, "G%+v S%d shard: %+v", kv.gid, kv.me, shard)
13 | 			canPerformNextConfig = false
14 | 			break
15 | 		}
16 | 	}
17 | 	currentConfigNum := kv.currentConfig.Num
18 | 	kv.mu.Unlock()
19 | 	if canPerformNextConfig {
20 | 		nextConfig := kv.sc.Query(currentConfigNum + 1)
21 | 		if nextConfig.Num == currentConfigNum+1 {
22 | 			kv.Execute(NewConfigurationCommand(&nextConfig), &OpResp{})
23 | 		}
24 | 	} else {
25 | 		Debug(dWarn, "G%+v {S%+v} don't need fetch config!", kv.gid, kv.me)
26 | 	}
27 | }
28 | 
29 | func (kv *ShardKV) applyConfiguration(nextConfig *shardctrler.Config) *OpResp {
30 | 	if nextConfig.Num == kv.currentConfig.Num+1 {
31 | 		kv.updateShardStatus(nextConfig)
32 | 		kv.lastConfig = kv.currentConfig.DeepCopy()
33 | 		kv.currentConfig = nextConfig.DeepCopy()
34 | 		Debug(dWarn, "G%+v {S%+v} applyConfiguration %d is %+v", kv.gid, kv.me, nextConfig.Num, nextConfig)
35 | 		return &OpResp{OK, ""}
36 | 	}
37 | 	return &OpResp{ErrTimeoutReq, ""}
38 | }
39 | 
40 | func (kv *ShardKV) updateShardStatus(nextConfig *shardctrler.Config) {
41 | 	// special judge
42 | 	if nextConfig.Num == 1 {
43 | 		shards := kv.getAllShards(nextConfig)
44 | 		for _, shard := range shards {
45 | 			kv.shards[shard] = NewShard(Serving)
46 | 		}
47 | 		return
48 | 	}
49 | 
50 | 	newShards := kv.getAllShards(nextConfig)
51 | 	nowShards := kv.getAllShards(&kv.currentConfig)
52 | 	// loss shard
53 | 	for _, nowShard := range nowShards {
54 | 		if nextConfig.Shards[nowShard] != kv.gid {
55 | 			// BePulling
56 | 			kv.shards[nowShard].Status = BePulling
57 | 		}
58 | 	}
59 | 	// get shard
60 | 	for _, newShard := range newShards {
61 | 		if kv.currentConfig.Shards[newShard] != kv.gid {
62 | 			// Pulling
63 | 			kv.shards[newShard] = NewShard(Pulling)
64 | 		}
65 | 	}
66 | }
67 | 
68 | func (kv *ShardKV) getAllShards(nextConfig *shardctrler.Config) []int {
69 | 	var shards []int
70 | 	for shard, gid := range nextConfig.Shards {
71 | 		if gid == kv.gid {
72 | 			shards = append(shards, shard)
73 | 		}
74 | 	}
75 | 	return shards
76 | }
77 | 
78 | func (kv *ShardKV) getShardIDsByStatus(status ShardStatus, config *shardctrler.Config) map[int][]int {
79 | 	gid2shardIDs := make(map[int][]int)
80 | 	for shard, _ := range kv.shards {
81 | 		if kv.shards[shard].Status == status {
82 | 			gid := config.Shards[shard]
83 | 			if _, ok := gid2shardIDs[gid]; !ok {
84 | 				vec := [1]int{shard}
85 | 				gid2shardIDs[gid] = vec[:]
86 | 			} else {
87 | 				gid2shardIDs[gid] = append(gid2shardIDs[gid], shard)
88 | 			}
89 | 		}
90 | 	}
91 | 	return gid2shardIDs
92 | }
93 | 


--------------------------------------------------------------------------------
/src/shardkv/migration.go:
--------------------------------------------------------------------------------
 1 | package shardkv
 2 | 
 3 | import (
 4 | 	"sync"
 5 | )
 6 | 
 7 | type PullDataArgs struct {
 8 | 	ConfNum  int
 9 | 	ShardIds []int
10 | }
11 | 
12 | type PullDataReply struct {
13 | 	Err     Err
14 | 	ConfNum int
15 | 	Shards  map[int]*Shard
16 | }
17 | 
18 | func (kv *ShardKV) migrationAction() {
19 | 	kv.mu.Lock()
20 | 	gid2shardIDs := kv.getShardIDsByStatus(Pulling, &kv.lastConfig)
21 | 	if len(gid2shardIDs) == 0 {
22 | 		kv.mu.Unlock()
23 | 		return
24 | 	}
25 | 	var wg sync.WaitGroup
26 | 	for gid, shardIDs := range gid2shardIDs {
27 | 		wg.Add(1)
28 | 		servers := kv.lastConfig.Groups[gid]
29 | 		go func(servers []string, configNum int, shardIDs []int) {
30 | 			defer wg.Done()
31 | 			args := PullDataArgs{
32 | 				ConfNum:  configNum,
33 | 				ShardIds: shardIDs,
34 | 			}
35 | 			for _, server := range servers {
36 | 				var resp PullDataReply
37 | 				srv := kv.makeEnd(server)
38 | 				if srv.Call("ShardKV.GetShardsData", &args, &resp) && resp.Err == OK {
39 | 					kv.Execute(NewInsertShardsCommand(&resp), &OpResp{})
40 | 				}
41 | 			}
42 | 		}(servers, kv.currentConfig.Num, shardIDs)
43 | 	}
44 | 	kv.mu.Unlock()
45 | 	Debug(dServer, "G%+v {S%+v} migrationAction wait", kv.gid, kv.me)
46 | 	wg.Wait()
47 | 	Debug(dServer, "G%+v {S%+v} migrationAction done", kv.gid, kv.me)
48 | }
49 | 
50 | func (kv *ShardKV) GetShardsData(args *PullDataArgs, reply *PullDataReply) {
51 | 	defer Debug(dServer, "G%+v {S%+v} GetShardsData: args: %+v reply: %+v", kv.gid, kv.me, args, reply)
52 | 	// only pull shards from leader
53 | 	if _, isLeader := kv.rf.GetState(); !isLeader {
54 | 		reply.Err = ErrWrongLeader
55 | 		return
56 | 	}
57 | 	kv.mu.Lock()
58 | 
59 | 	if kv.currentConfig.Num < args.ConfNum {
60 | 		reply.Err = ErrNotReady
61 | 		kv.mu.Unlock()
62 | 		kv.configureAction()
63 | 		return
64 | 	}
65 | 
66 | 	reply.Shards = make(map[int]*Shard)
67 | 	for _, shardID := range args.ShardIds {
68 | 		reply.Shards[shardID] = kv.shards[shardID].deepCopy()
69 | 	}
70 | 
71 | 	reply.ConfNum, reply.Err = args.ConfNum, OK
72 | 	kv.mu.Unlock()
73 | }
74 | 
75 | func (kv *ShardKV) applyInsertShards(shardsInfo *PullDataReply) *OpResp {
76 | 	Debug(dServer, "G%+v {S%+v} before applyInsertShards: %+v", kv.gid, kv.me, kv.shards)
77 | 	if shardsInfo.ConfNum == kv.currentConfig.Num {
78 | 		for shardId, shardData := range shardsInfo.Shards {
79 | 			if kv.shards[shardId].Status == Pulling {
80 | 				kv.shards[shardId] = shardData.deepCopy()
81 | 				kv.shards[shardId].Status = GCing
82 | 			} else {
83 | 				Debug(dWarn, "G%+v {S%+v} shard %d is not Pulling: %+v", kv.gid, kv.me, shardId, kv.shards[shardId])
84 | 				break
85 | 			}
86 | 		}
87 | 		Debug(dServer, "G%+v {S%+v} after applyInsertShards: %+v", kv.gid, kv.me, kv.shards)
88 | 		return &OpResp{OK, ""}
89 | 	}
90 | 	return &OpResp{ErrOutDated, ""}
91 | }
92 | 


--------------------------------------------------------------------------------
/src/raft/log.go:
--------------------------------------------------------------------------------
  1 | package raft
  2 | 
  3 | import "6.824/utils"
  4 | 
  5 | type Entry struct {
  6 | 	Index int
  7 | 	Term  int
  8 | 	Cmd   interface{}
  9 | }
 10 | 
 11 | func (rf *Raft) frontLog() Entry {
 12 | 	return rf.log[0]
 13 | }
 14 | 
 15 | func (rf *Raft) frontLogIndex() int {
 16 | 	return rf.log[0].Index
 17 | }
 18 | 
 19 | func (rf *Raft) lastLog() Entry {
 20 | 	return rf.log[len(rf.log)-1]
 21 | }
 22 | 
 23 | func (rf *Raft) lastLogIndex() int {
 24 | 	return rf.log[len(rf.log)-1].Index
 25 | }
 26 | 
 27 | // nextIndex max is len(log), will out of range
 28 | func (rf *Raft) transfer(index int) (int, int) {
 29 | 	begin := rf.frontLogIndex()
 30 | 	end := rf.lastLogIndex()
 31 | 	// left open, right close
 32 | 	// fuck range!
 33 | 	if index < begin || index > end {
 34 | 		utils.Debug(utils.DWarn, "S%d log out of range: %d, [%d, %d]", rf.me, index, begin, end)
 35 | 		return 0, -1
 36 | 	}
 37 | 	return index - begin, 0
 38 | }
 39 | 
 40 | func (rf *Raft) getEntry(index int) (Entry, int) {
 41 | 	begin := rf.frontLogIndex()
 42 | 	end := rf.lastLogIndex()
 43 | 	// left open, right close
 44 | 	// fuck range!
 45 | 	if index < begin || index > end {
 46 | 		utils.Debug(utils.DWarn, "S%d log out of range: %d, [%d, %d]", rf.me, index, begin, end)
 47 | 		return Entry{magic_index, magic_term, nil}, -1
 48 | 	}
 49 | 	return rf.log[index-begin], 0
 50 | }
 51 | 
 52 | func (rf *Raft) isUpToDate(lastLogIndex int, lastLogTerm int) bool {
 53 | 	entry := rf.lastLog()
 54 | 	index := entry.Index
 55 | 	term := entry.Term
 56 | 	if term == lastLogTerm {
 57 | 		return lastLogIndex >= index
 58 | 	}
 59 | 	return lastLogTerm > term
 60 | }
 61 | 
 62 | func (rf *Raft) toCommit() {
 63 | 	// append entries before commit
 64 | 	if rf.commitIndex >= rf.lastLogIndex() {
 65 | 		return
 66 | 	}
 67 | 
 68 | 	for i := rf.lastLogIndex(); i > rf.commitIndex; i-- {
 69 | 		entry, err := rf.getEntry(i)
 70 | 		if err < 0 {
 71 | 			continue
 72 | 		}
 73 | 
 74 | 		if entry.Term != rf.currentTerm {
 75 | 			return
 76 | 		}
 77 | 
 78 | 		cnt := 1 // 1 => self
 79 | 		for j, match := range rf.matchIndex {
 80 | 			if j != rf.me && match >= i {
 81 | 				cnt++
 82 | 			}
 83 | 			if cnt > len(rf.peers)/2 {
 84 | 				rf.commitIndex = i
 85 | 				utils.Debug(utils.DCommit, "S%d commit to %v", rf.me, rf.commitIndex)
 86 | 				rf.applyCond.Signal()
 87 | 				return
 88 | 			}
 89 | 		}
 90 | 	}
 91 | 
 92 | 	utils.Debug(utils.DCommit, "S%d don't have half replicated from %v to %v now", rf.me, rf.commitIndex, rf.lastLogIndex())
 93 | }
 94 | 
 95 | func (rf *Raft) HasLogInCurrentTerm() bool {
 96 | 	rf.mu.Lock()
 97 | 	defer rf.mu.Unlock()
 98 | 
 99 | 	for i := len(rf.log) - 1; i > 0; i-- {
100 | 		if rf.log[i].Term > rf.currentTerm {
101 | 			continue
102 | 		}
103 | 		if rf.log[i].Term == rf.currentTerm {
104 | 			return true
105 | 		}
106 | 		if rf.log[i].Term < rf.currentTerm {
107 | 			break
108 | 		}
109 | 	}
110 | 	return false
111 | }
112 | 


--------------------------------------------------------------------------------
/src/main/mrsequential.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | //
  4 | // simple sequential MapReduce.
  5 | //
  6 | // go run mrsequential.go wc.so pg*.txt
  7 | //
  8 | 
  9 | import "fmt"
 10 | import "6.824/mr"
 11 | import "plugin"
 12 | import "os"
 13 | import "log"
 14 | import "io/ioutil"
 15 | import "sort"
 16 | 
 17 | // for sorting by key.
 18 | type ByKey []mr.KeyValue
 19 | 
 20 | // for sorting by key.
 21 | func (a ByKey) Len() int           { return len(a) }
 22 | func (a ByKey) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
 23 | func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key }
 24 | 
 25 | func main() {
 26 | 	if len(os.Args) < 3 {
 27 | 		fmt.Fprintf(os.Stderr, "Usage: mrsequential xxx.so inputfiles...\n")
 28 | 		os.Exit(1)
 29 | 	}
 30 | 
 31 | 	mapf, reducef := loadPlugin(os.Args[1])
 32 | 
 33 | 	//
 34 | 	// read each input file,
 35 | 	// pass it to Map,
 36 | 	// accumulate the intermediate Map output.
 37 | 	//
 38 | 	intermediate := []mr.KeyValue{}
 39 | 	for _, filename := range os.Args[2:] {
 40 | 		file, err := os.Open(filename)
 41 | 		if err != nil {
 42 | 			log.Fatalf("cannot open %v", filename)
 43 | 		}
 44 | 		content, err := ioutil.ReadAll(file)
 45 | 		if err != nil {
 46 | 			log.Fatalf("cannot read %v", filename)
 47 | 		}
 48 | 		file.Close()
 49 | 		kva := mapf(filename, string(content))
 50 | 		intermediate = append(intermediate, kva...)
 51 | 	}
 52 | 
 53 | 	//
 54 | 	// a big difference from real MapReduce is that all the
 55 | 	// intermediate data is in one place, intermediate[],
 56 | 	// rather than being partitioned into NxM buckets.
 57 | 	//
 58 | 
 59 | 	sort.Sort(ByKey(intermediate))
 60 | 
 61 | 	oname := "mr-out-0"
 62 | 	ofile, _ := os.Create(oname)
 63 | 
 64 | 	//
 65 | 	// call Reduce on each distinct key in intermediate[],
 66 | 	// and print the result to mr-out-0.
 67 | 	//
 68 | 	i := 0
 69 | 	for i < len(intermediate) {
 70 | 		j := i + 1
 71 | 		for j < len(intermediate) && intermediate[j].Key == intermediate[i].Key {
 72 | 			j++
 73 | 		}
 74 | 		values := []string{}
 75 | 		for k := i; k < j; k++ {
 76 | 			values = append(values, intermediate[k].Value)
 77 | 		}
 78 | 		output := reducef(intermediate[i].Key, values)
 79 | 
 80 | 		// this is the correct format for each line of Reduce output.
 81 | 		fmt.Fprintf(ofile, "%v %v\n", intermediate[i].Key, output)
 82 | 
 83 | 		i = j
 84 | 	}
 85 | 
 86 | 	ofile.Close()
 87 | }
 88 | 
 89 | //
 90 | // load the application Map and Reduce functions
 91 | // from a plugin file, e.g. ../mrapps/wc.so
 92 | //
 93 | func loadPlugin(filename string) (func(string, string) []mr.KeyValue, func(string, []string) string) {
 94 | 	p, err := plugin.Open(filename)
 95 | 	if err != nil {
 96 | 		log.Fatalf("cannot load plugin %v", filename)
 97 | 	}
 98 | 	xmapf, err := p.Lookup("Map")
 99 | 	if err != nil {
100 | 		log.Fatalf("cannot find Map in %v", filename)
101 | 	}
102 | 	mapf := xmapf.(func(string, string) []mr.KeyValue)
103 | 	xreducef, err := p.Lookup("Reduce")
104 | 	if err != nil {
105 | 		log.Fatalf("cannot find Reduce in %v", filename)
106 | 	}
107 | 	reducef := xreducef.(func(string, []string) string)
108 | 
109 | 	return mapf, reducef
110 | }
111 | 


--------------------------------------------------------------------------------
/src/shardkv/client.go:
--------------------------------------------------------------------------------
  1 | package shardkv
  2 | 
  3 | //
  4 | // client code to talk to a sharded key/value service.
  5 | //
  6 | // the client first talks to the shardctrler to find out
  7 | // the assignment of shards (keys) to groups, and then
  8 | // talks to the group that holds the key's shard.
  9 | //
 10 | 
 11 | import (
 12 | 	"crypto/rand"
 13 | 	"math/big"
 14 | 	"time"
 15 | 
 16 | 	"6.824/labrpc"
 17 | 	"6.824/shardctrler"
 18 | )
 19 | 
 20 | //
 21 | // which shard is a key in?
 22 | // please use this function,
 23 | // and please do not change it.
 24 | //
 25 | func key2shard(key string) int {
 26 | 	shard := 0
 27 | 	if len(key) > 0 {
 28 | 		shard = int(key[0])
 29 | 	}
 30 | 	shard %= shardctrler.NShards
 31 | 	return shard
 32 | }
 33 | 
 34 | func nrand() int64 {
 35 | 	max := big.NewInt(int64(1) << 62)
 36 | 	bigx, _ := rand.Int(rand.Reader, max)
 37 | 	x := bigx.Int64()
 38 | 	return x
 39 | }
 40 | 
 41 | type Clerk struct {
 42 | 	sm       *shardctrler.Clerk
 43 | 	config   shardctrler.Config
 44 | 	make_end func(string) *labrpc.ClientEnd
 45 | 	// You will have to modify this struct.
 46 | 	seqId    int64
 47 | 	clientId int64
 48 | }
 49 | 
 50 | //
 51 | // the tester calls MakeClerk.
 52 | //
 53 | // ctrlers[] is needed to call shardctrler.MakeClerk().
 54 | //
 55 | // make_end(servername) turns a server name from a
 56 | // Config.Groups[gid][i] into a labrpc.ClientEnd on which you can
 57 | // send RPCs.
 58 | //
 59 | func MakeClerk(ctrlers []*labrpc.ClientEnd, make_end func(string) *labrpc.ClientEnd) *Clerk {
 60 | 	ck := new(Clerk)
 61 | 	ck.sm = shardctrler.MakeClerk(ctrlers)
 62 | 	ck.make_end = make_end
 63 | 	// You'll have to add code here.
 64 | 	ck.clientId = nrand()
 65 | 	ck.seqId = 0
 66 | 	ck.config = ck.sm.Query(-1)
 67 | 	return ck
 68 | }
 69 | 
 70 | //
 71 | // fetch the current value for a key.
 72 | // returns "" if the key does not exist.
 73 | // keeps trying forever in the face of all other errors.
 74 | // You will have to modify this function.
 75 | //
 76 | func (ck *Clerk) sendCmd(key string, value string, OpType OPType) string {
 77 | 	ck.seqId += 1
 78 | 	args := CmdArgs{
 79 | 		SeqId:    ck.seqId,
 80 | 		ClientId: ck.clientId,
 81 | 		Key:      key,
 82 | 		Value:    value,
 83 | 		OpType:   OpType,
 84 | 	}
 85 | 
 86 | 	t0 := time.Now()
 87 | 	for time.Since(t0).Seconds() < 15 {
 88 | 		shard := key2shard(key)
 89 | 		gid := ck.config.Shards[shard]
 90 | 		if servers, ok := ck.config.Groups[gid]; ok {
 91 | 			// try each server for the shard.
 92 | 			for si := 0; si < len(servers); si++ {
 93 | 				srv := ck.make_end(servers[si])
 94 | 				var reply CmdReply
 95 | 				ok := srv.Call("ShardKV.Command", &args, &reply)
 96 | 				if ok && (reply.Err == OK || reply.Err == ErrNoKey) {
 97 | 					return reply.Value
 98 | 				}
 99 | 				if ok && (reply.Err == ErrWrongGroup) {
100 | 					break
101 | 				}
102 | 				// ... not ok, or ErrWrongLeader
103 | 			}
104 | 		}
105 | 		time.Sleep(100 * time.Millisecond)
106 | 		// ask controler for the latest configuration.
107 | 		ck.config = ck.sm.Query(-1)
108 | 	}
109 | 
110 | 	panic("15s not reply")
111 | 	return ""
112 | }
113 | 
114 | func (ck *Clerk) Get(key string) string {
115 | 	return ck.sendCmd(key, "", OpGet)
116 | }
117 | 
118 | func (ck *Clerk) Put(key string, value string) {
119 | 	ck.sendCmd(key, value, OpPut)
120 | }
121 | 
122 | func (ck *Clerk) Append(key string, value string) {
123 | 	ck.sendCmd(key, value, OpAppend)
124 | }
125 | 


--------------------------------------------------------------------------------
/.check-build:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | set -eu
  4 | 
  5 | REFERENCE_FILES=(
  6 |     # lab 1
  7 |     src/mrapps/crash.go
  8 |     src/mrapps/indexer.go
  9 |     src/mrapps/mtiming.go
 10 |     src/mrapps/nocrash.go
 11 |     src/mrapps/rtiming.go
 12 |     src/mrapps/wc.go
 13 |     src/main/mrsequential.go
 14 |     src/main/mrcoordinator.go
 15 |     src/main/mrworker.go
 16 | 
 17 |     # lab 2
 18 |     src/raft/persister.go
 19 |     src/raft/test_test.go
 20 |     src/raft/config.go
 21 |     src/labrpc/labrpc.go
 22 | 
 23 |     # lab 3
 24 |     src/kvraft/test_test.go
 25 |     src/kvraft/config.go
 26 | 
 27 |     # lab 4a
 28 |     src/shardctrler/test_test.go
 29 |     src/shardctrler/config.go
 30 | 
 31 |     # lab 4b
 32 |     src/shardkv/test_test.go
 33 |     src/shardkv/config.go
 34 | )
 35 | 
 36 | main() {
 37 |     upstream="$1"
 38 |     labnum="$2"
 39 | 
 40 |     # make sure we have reference copy of lab, in FETCH_HEAD
 41 |     git fetch "$upstream" 2>/dev/null || die "unable to git fetch $upstream"
 42 | 
 43 |     # copy existing directory
 44 |     tmpdir="$(mktemp -d)"
 45 |     find src -type s -delete # cp can't copy sockets
 46 |     cp -r src "$tmpdir"
 47 |     orig="$PWD"
 48 |     cd "$tmpdir"
 49 | 
 50 |     # check out reference files
 51 |     for f in ${REFERENCE_FILES[@]}; do
 52 |         mkdir -p "$(dirname $f)"
 53 |         git --git-dir="$orig/.git" show "FETCH_HEAD:$f" > "$f"
 54 |     done
 55 | 
 56 |     case $labnum in
 57 |         "lab1") check_lab1;;
 58 |         "lab2a"|"lab2b"|"lab2c"|"lab2d") check_lab2;;
 59 |         "lab3a"|"lab3b") check_lab3;;
 60 |         "lab4a") check_lab4a;;
 61 |         "lab4b") check_lab4b;;
 62 |         *) die "unknown lab: $labnum";;
 63 |     esac
 64 | 
 65 |     cd
 66 |     rm -rf "$tmpdir"
 67 | }
 68 | 
 69 | check_lab1() {
 70 |     check_cmd cd src/mrapps
 71 |     check_cmd go build -buildmode=plugin wc.go
 72 |     check_cmd go build -buildmode=plugin indexer.go
 73 |     check_cmd go build -buildmode=plugin mtiming.go
 74 |     check_cmd go build -buildmode=plugin rtiming.go
 75 |     check_cmd go build -buildmode=plugin crash.go
 76 |     check_cmd go build -buildmode=plugin nocrash.go
 77 |     check_cmd cd ../main
 78 |     check_cmd go build mrcoordinator.go
 79 |     check_cmd go build mrworker.go
 80 |     check_cmd go build mrsequential.go
 81 | }
 82 | 
 83 | check_lab2() {
 84 |     check_cmd cd src/raft
 85 |     check_cmd go test -c
 86 | }
 87 | 
 88 | check_lab3() {
 89 |     check_cmd cd src/kvraft
 90 |     check_cmd go test -c
 91 | }
 92 | 
 93 | check_lab4a() {
 94 |     check_cmd cd src/shardctrler
 95 |     check_cmd go test -c
 96 | }
 97 | 
 98 | check_lab4b() {
 99 |     check_cmd cd src/shardkv
100 |     check_cmd go test -c
101 |     # also check other labs/parts
102 |     cd "$tmpdir"
103 |     check_lab4a
104 |     cd "$tmpdir"
105 |     check_lab3
106 |     cd "$tmpdir"
107 |     check_lab2
108 | }
109 | 
110 | check_cmd() {
111 |     if ! "$@" >/dev/null 2>&1; then
112 |         echo "We tried building your source code with testing-related files reverted to original versions, and the build failed. This copy of your code is preserved in $tmpdir for debugging purposes. Please make sure the code you are trying to hand in does not make changes to test code." >&2
113 |         echo >&2
114 |         echo "The build failed while trying to run the following command:" >&2
115 |         echo >&2
116 |         echo "$ $@" >&2
117 |         echo "  (cwd: ${PWD#$tmpdir/})" >&2
118 |         exit 1
119 |     fi
120 | }
121 | 
122 | die() {
123 |     echo "$1" >&2
124 |     exit 1
125 | }
126 | 
127 | main "$@"
128 | 


--------------------------------------------------------------------------------
/src/raft/appendEntries.go:
--------------------------------------------------------------------------------
  1 | package raft
  2 | 
  3 | import "6.824/utils"
  4 | 
  5 | // ticker() call doAppendEntries(), ticker() hold lock
  6 | // if a node turn to leader, leader will call doAppendEntries() to send a heartbeat
  7 | func (rf *Raft) doAppendEntries() {
  8 | 	for i := 0; i < len(rf.peers); i++ {
  9 | 		if i == rf.me {
 10 | 			continue
 11 | 		}
 12 | 
 13 | 		wantSendIndex := rf.nextIndex[i] - 1
 14 | 		if wantSendIndex < rf.frontLogIndex() {
 15 | 			go rf.doInstallSnapshot(i)
 16 | 		} else {
 17 | 			go rf.appendTo(i)
 18 | 		}
 19 | 	}
 20 | }
 21 | 
 22 | func (rf *Raft) appendTo(peer int) {
 23 | 	rf.mu.Lock()
 24 | 	if rf.status != leader {
 25 | 		utils.Debug(utils.DWarn, "S%d status change, it is not leader", rf.me)
 26 | 		rf.mu.Unlock()
 27 | 		return
 28 | 	}
 29 | 	args := AppendEntriesArgs{
 30 | 		Term:         rf.currentTerm,
 31 | 		LeaderId:     rf.me,
 32 | 		PrevLogIndex: magic_index,
 33 | 		PrevLogTerm:  magic_term,
 34 | 		LeaderCommit: rf.commitIndex,
 35 | 	}
 36 | 
 37 | 	// utils.Debug(utils.DTrace, "S%d log length: %d, nextIndex:{%+v}", rf.me, len(rf.log), rf.nextIndex)
 38 | 	// 0 <= prevLogIndex <= len(log) - 1
 39 | 	prevLogIndex := rf.nextIndex[peer] - 1
 40 | 	idx, err := rf.transfer(prevLogIndex)
 41 | 	if err < 0 {
 42 | 		rf.mu.Unlock()
 43 | 		return
 44 | 	}
 45 | 
 46 | 	args.PrevLogIndex = rf.log[idx].Index
 47 | 	args.PrevLogTerm = rf.log[idx].Term
 48 | 
 49 | 	// must copy in here
 50 | 	entries := rf.log[idx+1:]
 51 | 	args.Entries = make([]Entry, len(entries))
 52 | 	copy(args.Entries, entries)
 53 | 	rf.mu.Unlock()
 54 | 
 55 | 	reply := AppendEntriesReply{}
 56 | 
 57 | 	ok := rf.sendAppendEntries(peer, &args, &reply)
 58 | 	if !ok {
 59 | 		return
 60 | 	}
 61 | 
 62 | 	rf.mu.Lock()
 63 | 	defer rf.mu.Unlock()
 64 | 
 65 | 	// status changed or outdue data, ignore
 66 | 	if rf.currentTerm != args.Term || rf.status != leader || reply.Term < rf.currentTerm {
 67 | 		// overdue, ignore
 68 | 		utils.Debug(utils.DInfo, "S%d old response from C%d, ignore it", rf.me, peer)
 69 | 		return
 70 | 	}
 71 | 
 72 | 	// If RPC request or response contains term T > currentTerm:
 73 | 	// set currentTerm = T, convert to follower (§5.1)
 74 | 	if reply.Term > rf.currentTerm {
 75 | 		utils.Debug(utils.DTerm, "S%d S%d term larger(%d > %d)", rf.me, peer, reply.Term, rf.currentTerm)
 76 | 		rf.currentTerm, rf.votedFor = reply.Term, voted_nil
 77 | 		rf.persist()
 78 | 		rf.TurnTo(follower)
 79 | 		return
 80 | 	}
 81 | 
 82 | 	if reply.Success {
 83 | 		// utils.Debug(utils.DTrace, "S%d before nextIndex:{%+v} ", rf.me, rf.nextIndex)
 84 | 		rf.nextIndex[peer] = args.PrevLogIndex + len(args.Entries) + 1
 85 | 		// utils.Debug(utils.DTrace, "S%d after nextIndex:{%+v}", rf.me, rf.nextIndex)
 86 | 		rf.matchIndex[peer] = args.PrevLogIndex + len(args.Entries)
 87 | 		rf.toCommit()
 88 | 		return
 89 | 	}
 90 | 
 91 | 	if reply.XTerm == -1 { // null slot
 92 | 		rf.nextIndex[peer] -= reply.XLen
 93 | 	} else if reply.XTerm >= 0 {
 94 | 		termNotExit := true
 95 | 		for index := rf.nextIndex[peer] - 1; index >= 1; index-- {
 96 | 			entry, err := rf.getEntry(index)
 97 | 			if err < 0 {
 98 | 				continue
 99 | 			}
100 | 
101 | 			if entry.Term > reply.XTerm {
102 | 				continue
103 | 			}
104 | 
105 | 			if entry.Term == reply.XTerm {
106 | 				rf.nextIndex[peer] = index + 1
107 | 				termNotExit = false
108 | 				break
109 | 			}
110 | 			if entry.Term < reply.XTerm {
111 | 				break
112 | 			}
113 | 		}
114 | 		if termNotExit {
115 | 			rf.nextIndex[peer] = reply.XIndex
116 | 		}
117 | 	} else {
118 | 		rf.nextIndex[peer] = reply.XIndex
119 | 	}
120 | 
121 | 	// utils.Debug(utils.DTrace, "S%d nextIndex:{%+v}", rf.me, rf.nextIndex)
122 | 	// the smallest nextIndex is 1
123 | 	// otherwise, it will cause out of range error
124 | 	if rf.nextIndex[peer] < 1 {
125 | 		rf.nextIndex[peer] = 1
126 | 	}
127 | }
128 | 


--------------------------------------------------------------------------------
/src/raft/appendEntriesHandler.go:
--------------------------------------------------------------------------------
  1 | package raft
  2 | 
  3 | import "6.824/utils"
  4 | 
  5 | // handler need to require lock
  6 | func (rf *Raft) AppendEntries(args *AppendEntriesArgs, reply *AppendEntriesReply) {
  7 | 	rf.mu.Lock()
  8 | 	defer rf.mu.Unlock()
  9 | 
 10 | 	utils.Debug(utils.DLog, "S%d S%d appendEntries", rf.me, args.LeaderId)
 11 | 	defer utils.Debug(utils.DLog, "S%d arg: %+v reply: %+v", rf.me, args, reply)
 12 | 
 13 | 	defer rf.persist()
 14 | 
 15 | 	if args.Term < rf.currentTerm { // leader out, refuse
 16 | 		reply.Term = rf.currentTerm
 17 | 		reply.Success = false
 18 | 		utils.Debug(utils.DTerm, "S%d S%d term less(%d < %d)", rf.me, args.LeaderId, args.Term, rf.currentTerm)
 19 | 		return
 20 | 	}
 21 | 
 22 | 	if args.Term > rf.currentTerm {
 23 | 		// If RPC request or response contains term T > currentTerm:
 24 | 		// set currentTerm = T, convert to follower (§5.1)
 25 | 		rf.currentTerm, rf.votedFor = args.Term, voted_nil
 26 | 		utils.Debug(utils.DTerm, "S%d S%d term larger(%d > %d)", rf.me, args.LeaderId, args.Term, rf.currentTerm)
 27 | 		rf.TurnTo(follower)
 28 | 	}
 29 | 
 30 | 	if rf.status != follower {
 31 | 		// If AppendEntries RPC received from new leader:
 32 | 		// convert to follower
 33 | 		rf.TurnTo(follower)
 34 | 	}
 35 | 
 36 | 	reply.Success = true
 37 | 	reply.Term = rf.currentTerm
 38 | 	//  prevent election timeouts (§5.2)
 39 | 	rf.resetElectionTime()
 40 | 
 41 | 	// heartbeat, return
 42 | 	// if args.PrevLogIndex == magic_index && args.PrevLogTerm == magic_term {
 43 | 	// 	return
 44 | 	// }
 45 | 
 46 | 	if args.PrevLogIndex < rf.frontLogIndex() {
 47 | 		reply.XTerm, reply.XIndex, reply.Success = -2, rf.frontLogIndex() + 1, false
 48 | 		utils.Debug(utils.DInfo, "S%d args's prevLogIndex too smaller(%v < %v)", rf.me, args.PrevLogIndex, rf.frontLogIndex())
 49 | 		return
 50 | 	}
 51 | 
 52 | 	if args.PrevLogIndex > rf.lastLogIndex() {
 53 | 		reply.Success = false
 54 | 		reply.XTerm = -1
 55 | 		reply.XLen = args.PrevLogIndex - rf.lastLogIndex()
 56 | 		return
 57 | 	}
 58 | 
 59 | 	idx, err := rf.transfer(args.PrevLogIndex)
 60 | 	if err < 0 {
 61 | 		return
 62 | 	}
 63 | 
 64 | 	if rf.log[idx].Term != args.PrevLogTerm {
 65 | 		reply.Success = false
 66 | 		reply.XTerm = rf.log[idx].Term
 67 | 		reply.XIndex = args.PrevLogIndex
 68 | 		// 0 is a dummy entry => quit in index is 1
 69 | 		// binary search is better than this way
 70 | 		for index := idx; index >= 1; index-- {
 71 | 			if rf.log[index-1].Term != reply.XTerm {
 72 | 				reply.XIndex = index
 73 | 				break
 74 | 			}
 75 | 		}
 76 | 		return
 77 | 	}
 78 | 
 79 | 	if args.Entries != nil && len(args.Entries) != 0 {
 80 | 		if rf.isConflict(args) {
 81 | 			rf.log = rf.log[:idx+1]
 82 | 			entries := make([]Entry, len(args.Entries))
 83 | 			copy(entries, args.Entries)
 84 | 			rf.log = append(rf.log, entries...)
 85 | 			// utils.Debug(utils.DInfo, "S%d conflict, truncate log: %+v", rf.me, rf.log)
 86 | 		} else {
 87 | 			// utils.Debug(utils.DInfo, "S%d no conflict, log: %+v", rf.me, rf.log)
 88 | 		}
 89 | 	} else {
 90 | 		utils.Debug(utils.DInfo, "S%d args entries nil or length is 0: %v", rf.me, args.Entries)
 91 | 	}
 92 | 
 93 | 	if args.LeaderCommit > rf.commitIndex {
 94 | 		rf.commitIndex = args.LeaderCommit
 95 | 		if args.LeaderCommit > rf.lastLogIndex() {
 96 | 			rf.commitIndex = rf.lastLogIndex()
 97 | 		}
 98 | 		utils.Debug(utils.DCommit, "S%d commit to %v(lastLogIndex: %d)", rf.me, rf.commitIndex, rf.lastLogIndex())
 99 | 		rf.applyCond.Signal()
100 | 	}
101 | 	// utils.Debug(utils.DInfo, "S%d log: %+v", rf.me, rf.log)
102 | }
103 | 
104 | func (rf *Raft) isConflict(args *AppendEntriesArgs) bool {
105 | 	base_index := args.PrevLogIndex + 1
106 | 	for i, entry := range args.Entries {
107 | 		entry_rf, err := rf.getEntry(i + base_index)
108 | 		if err < 0 {
109 | 			return true
110 | 		}
111 | 		if entry_rf.Term != entry.Term {
112 | 			return true
113 | 		}
114 | 	}
115 | 	return false
116 | }
117 | 


--------------------------------------------------------------------------------
/src/shardctrler/server.go:
--------------------------------------------------------------------------------
  1 | package shardctrler
  2 | 
  3 | import (
  4 | 	"sync"
  5 | 	"sync/atomic"
  6 | 	"time"
  7 | 
  8 | 	"6.824/labgob"
  9 | 	"6.824/labrpc"
 10 | 	"6.824/raft"
 11 | )
 12 | 
 13 | type ShardCtrler struct {
 14 | 	mu      sync.Mutex
 15 | 	me      int
 16 | 	rf      *raft.Raft
 17 | 	applyCh chan raft.ApplyMsg
 18 | 	dead    int32
 19 | 
 20 | 	// Your data here.
 21 | 	configs        *ConfigModel
 22 | 	cmdRespChans   map[IndexAndTerm]chan OpResp
 23 | 	LastCmdContext map[int64]OpContext
 24 | 	lastApplied    int
 25 | }
 26 | 
 27 | //
 28 | // the tester calls Kill() when a ShardCtrler instance won't
 29 | // be needed again. you are not required to do anything
 30 | // in Kill(), but it might be convenient to (for example)
 31 | // turn off debug output from this instance.
 32 | //
 33 | func (sc *ShardCtrler) Kill() {
 34 | 	atomic.StoreInt32(&sc.dead, 1)
 35 | 	sc.rf.Kill()
 36 | 	// Your code here, if desired.
 37 | }
 38 | 
 39 | func (sc *ShardCtrler) killed() bool {
 40 | 	z := atomic.LoadInt32(&sc.dead)
 41 | 	return z == 1
 42 | }
 43 | 
 44 | // needed by shardsc tester
 45 | func (sc *ShardCtrler) Raft() *raft.Raft {
 46 | 	return sc.rf
 47 | }
 48 | 
 49 | //
 50 | // servers[] contains the ports of the set of
 51 | // servers that will cooperate via Raft to
 52 | // form the fault-tolerant shardctrler service.
 53 | // me is the index of the current server in servers[].
 54 | //
 55 | func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister) *ShardCtrler {
 56 | 	sc := new(ShardCtrler)
 57 | 	sc.me = me
 58 | 
 59 | 	labgob.Register(Op{})
 60 | 	sc.applyCh = make(chan raft.ApplyMsg)
 61 | 	sc.rf = raft.Make(servers, me, persister, sc.applyCh)
 62 | 
 63 | 	// Your code here.
 64 | 	sc.configs = NewConfigModel(me)
 65 | 	sc.cmdRespChans = make(map[IndexAndTerm]chan OpResp)
 66 | 	sc.LastCmdContext = make(map[int64]OpContext)
 67 | 	sc.lastApplied = 0
 68 | 
 69 | 	// long-time goroutines
 70 | 	go sc.applier()
 71 | 
 72 | 	return sc
 73 | }
 74 | 
 75 | // Handler
 76 | func (sc *ShardCtrler) Command(args *CommandArgs, reply *CommandReply) {
 77 | 	defer Debug(dWarn, "S%d args: %+v reply: %+v", sc.me, args, reply)
 78 | 
 79 | 	if !sc.configs.isLegal(args.Op) {
 80 | 		reply.Config, reply.Err = Config{}, ErrOpt
 81 | 	}
 82 | 
 83 | 	sc.mu.Lock()
 84 | 	if args.Op != OpQuery && sc.isDuplicate(args.ClientId, args.SeqId) {
 85 | 		context := sc.LastCmdContext[args.ClientId]
 86 | 		reply.Config, reply.Err = context.Reply.Config, context.Reply.Err
 87 | 		sc.mu.Unlock()
 88 | 		return
 89 | 	}
 90 | 	sc.mu.Unlock()
 91 | 
 92 | 	index, term, is_leader := sc.rf.Start(Op(*args))
 93 | 	if !is_leader {
 94 | 		reply.Config, reply.Err = Config{}, ErrWrongLeader
 95 | 		return
 96 | 	}
 97 | 
 98 | 	sc.mu.Lock()
 99 | 	it := IndexAndTerm{index, term}
100 | 	ch := make(chan OpResp, 1)
101 | 	sc.cmdRespChans[it] = ch
102 | 	sc.mu.Unlock()
103 | 
104 | 	defer func() {
105 | 		sc.mu.Lock()
106 | 		// close(sc.cmdRespChans[index])
107 | 		delete(sc.cmdRespChans, it)
108 | 		sc.mu.Unlock()
109 | 		close(ch)
110 | 	}()
111 | 
112 | 	t := time.NewTimer(cmd_timeout)
113 | 	defer t.Stop()
114 | 
115 | 	for {
116 | 		sc.mu.Lock()
117 | 		select {
118 | 		case resp := <-ch:
119 | 			Debug(dServer, "S%d have applied, resp: %+v", sc.me, resp)
120 | 			reply.Config, reply.Err = resp.Config, resp.Err
121 | 			sc.mu.Unlock()
122 | 			return
123 | 		case <-t.C:
124 | 		priority:
125 | 			for {
126 | 				select {
127 | 				case resp := <-ch:
128 | 					Debug(dServer, "S%d have applied, resp: %+v", sc.me, resp)
129 | 					reply.Config, reply.Err = resp.Config, resp.Err
130 | 					sc.mu.Unlock()
131 | 					return
132 | 				default:
133 | 					break priority
134 | 				}
135 | 			}
136 | 			Debug(dServer, "S%d timeout", sc.me)
137 | 			reply.Config, reply.Err = Config{}, ErrTimeout
138 | 			sc.mu.Unlock()
139 | 			return
140 | 		default:
141 | 			sc.mu.Unlock()
142 | 			time.Sleep(gap_time)
143 | 		}
144 | 	}
145 | }
146 | 


--------------------------------------------------------------------------------
/src/labgob/test_test.go:
--------------------------------------------------------------------------------
  1 | package labgob
  2 | 
  3 | import "testing"
  4 | 
  5 | import "bytes"
  6 | 
  7 | type T1 struct {
  8 | 	T1int0    int
  9 | 	T1int1    int
 10 | 	T1string0 string
 11 | 	T1string1 string
 12 | }
 13 | 
 14 | type T2 struct {
 15 | 	T2slice []T1
 16 | 	T2map   map[int]*T1
 17 | 	T2t3    interface{}
 18 | }
 19 | 
 20 | type T3 struct {
 21 | 	T3int999 int
 22 | }
 23 | 
 24 | //
 25 | // test that we didn't break GOB.
 26 | //
 27 | func TestGOB(t *testing.T) {
 28 | 	e0 := errorCount
 29 | 
 30 | 	w := new(bytes.Buffer)
 31 | 
 32 | 	Register(T3{})
 33 | 
 34 | 	{
 35 | 		x0 := 0
 36 | 		x1 := 1
 37 | 		t1 := T1{}
 38 | 		t1.T1int1 = 1
 39 | 		t1.T1string1 = "6.824"
 40 | 		t2 := T2{}
 41 | 		t2.T2slice = []T1{T1{}, t1}
 42 | 		t2.T2map = map[int]*T1{}
 43 | 		t2.T2map[99] = &T1{1, 2, "x", "y"}
 44 | 		t2.T2t3 = T3{999}
 45 | 
 46 | 		e := NewEncoder(w)
 47 | 		e.Encode(x0)
 48 | 		e.Encode(x1)
 49 | 		e.Encode(t1)
 50 | 		e.Encode(t2)
 51 | 	}
 52 | 	data := w.Bytes()
 53 | 
 54 | 	{
 55 | 		var x0 int
 56 | 		var x1 int
 57 | 		var t1 T1
 58 | 		var t2 T2
 59 | 
 60 | 		r := bytes.NewBuffer(data)
 61 | 		d := NewDecoder(r)
 62 | 		if d.Decode(&x0) != nil ||
 63 | 			d.Decode(&x1) != nil ||
 64 | 			d.Decode(&t1) != nil ||
 65 | 			d.Decode(&t2) != nil {
 66 | 			t.Fatalf("Decode failed")
 67 | 		}
 68 | 
 69 | 		if x0 != 0 {
 70 | 			t.Fatalf("wrong x0 %v\n", x0)
 71 | 		}
 72 | 		if x1 != 1 {
 73 | 			t.Fatalf("wrong x1 %v\n", x1)
 74 | 		}
 75 | 		if t1.T1int0 != 0 {
 76 | 			t.Fatalf("wrong t1.T1int0 %v\n", t1.T1int0)
 77 | 		}
 78 | 		if t1.T1int1 != 1 {
 79 | 			t.Fatalf("wrong t1.T1int1 %v\n", t1.T1int1)
 80 | 		}
 81 | 		if t1.T1string0 != "" {
 82 | 			t.Fatalf("wrong t1.T1string0 %v\n", t1.T1string0)
 83 | 		}
 84 | 		if t1.T1string1 != "6.824" {
 85 | 			t.Fatalf("wrong t1.T1string1 %v\n", t1.T1string1)
 86 | 		}
 87 | 		if len(t2.T2slice) != 2 {
 88 | 			t.Fatalf("wrong t2.T2slice len %v\n", len(t2.T2slice))
 89 | 		}
 90 | 		if t2.T2slice[1].T1int1 != 1 {
 91 | 			t.Fatalf("wrong slice value\n")
 92 | 		}
 93 | 		if len(t2.T2map) != 1 {
 94 | 			t.Fatalf("wrong t2.T2map len %v\n", len(t2.T2map))
 95 | 		}
 96 | 		if t2.T2map[99].T1string1 != "y" {
 97 | 			t.Fatalf("wrong map value\n")
 98 | 		}
 99 | 		t3 := (t2.T2t3).(T3)
100 | 		if t3.T3int999 != 999 {
101 | 			t.Fatalf("wrong t2.T2t3.T3int999\n")
102 | 		}
103 | 	}
104 | 
105 | 	if errorCount != e0 {
106 | 		t.Fatalf("there were errors, but should not have been")
107 | 	}
108 | }
109 | 
110 | type T4 struct {
111 | 	Yes int
112 | 	no  int
113 | }
114 | 
115 | //
116 | // make sure we check capitalization
117 | // labgob prints one warning during this test.
118 | //
119 | func TestCapital(t *testing.T) {
120 | 	e0 := errorCount
121 | 
122 | 	v := []map[*T4]int{}
123 | 
124 | 	w := new(bytes.Buffer)
125 | 	e := NewEncoder(w)
126 | 	e.Encode(v)
127 | 	data := w.Bytes()
128 | 
129 | 	var v1 []map[T4]int
130 | 	r := bytes.NewBuffer(data)
131 | 	d := NewDecoder(r)
132 | 	d.Decode(&v1)
133 | 
134 | 	if errorCount != e0+1 {
135 | 		t.Fatalf("failed to warn about lower-case field")
136 | 	}
137 | }
138 | 
139 | //
140 | // check that we warn when someone sends a default value over
141 | // RPC but the target into which we're decoding holds a non-default
142 | // value, which GOB seems not to overwrite as you'd expect.
143 | //
144 | // labgob does not print a warning.
145 | //
146 | func TestDefault(t *testing.T) {
147 | 	e0 := errorCount
148 | 
149 | 	type DD struct {
150 | 		X int
151 | 	}
152 | 
153 | 	// send a default value...
154 | 	dd1 := DD{}
155 | 
156 | 	w := new(bytes.Buffer)
157 | 	e := NewEncoder(w)
158 | 	e.Encode(dd1)
159 | 	data := w.Bytes()
160 | 
161 | 	// and receive it into memory that already
162 | 	// holds non-default values.
163 | 	reply := DD{99}
164 | 
165 | 	r := bytes.NewBuffer(data)
166 | 	d := NewDecoder(r)
167 | 	d.Decode(&reply)
168 | 
169 | 	if errorCount != e0+1 {
170 | 		t.Fatalf("failed to warn about decoding into non-default value")
171 | 	}
172 | }
173 | 


--------------------------------------------------------------------------------
/src/shardkv/server.go:
--------------------------------------------------------------------------------
  1 | package shardkv
  2 | 
  3 | import (
  4 | 	"sync"
  5 | 	"sync/atomic"
  6 | 
  7 | 	"6.824/labgob"
  8 | 	"6.824/labrpc"
  9 | 	"6.824/raft"
 10 | 	"6.824/shardctrler"
 11 | )
 12 | 
 13 | type ShardKV struct {
 14 | 	mu      sync.Mutex
 15 | 	me      int
 16 | 	rf      *raft.Raft
 17 | 	applyCh chan raft.ApplyMsg
 18 | 	dead    int32 // set by Kill()
 19 | 	makeEnd func(string) *labrpc.ClientEnd
 20 | 	gid     int
 21 | 	// ctrlers      []*labrpc.ClientEnd
 22 | 	maxraftstate int // snapshot if log grows this big
 23 | 
 24 | 	// Your definitions here.
 25 | 	shards       map[int]*Shard
 26 | 	cmdRespChans map[IndexAndTerm]chan OpResp
 27 | 	lastApplied  int
 28 | 	lastSnapshot int
 29 | 
 30 | 	lastConfig    shardctrler.Config
 31 | 	currentConfig shardctrler.Config
 32 | 	sc            *shardctrler.Clerk
 33 | }
 34 | 
 35 | //
 36 | // the tester calls Kill() when a ShardKV instance won't
 37 | // be needed again. you are not required to do anything
 38 | // in Kill(), but it might be convenient to (for example)
 39 | // turn off debug output from this instance.
 40 | //
 41 | func (kv *ShardKV) Kill() {
 42 | 	atomic.StoreInt32(&kv.dead, 1)
 43 | 	// Your code here, if desired.
 44 | 	kv.mu.Lock()
 45 | 	defer kv.mu.Unlock()
 46 | 	//fmt.Printf("---kill\n")
 47 | 	kv.doSnapshot(kv.lastApplied)
 48 | 	kv.rf.Kill()
 49 | 	Debug(dWarn, "G%+v {S%+v} close shards: %+v config: %+v", kv.gid, kv.me, kv.shards, kv.currentConfig)
 50 | }
 51 | 
 52 | func (kv *ShardKV) killed() bool {
 53 | 	z := atomic.LoadInt32(&kv.dead)
 54 | 	return z == 1
 55 | }
 56 | 
 57 | //
 58 | // servers[] contains the ports of the servers in this group.
 59 | //
 60 | // me is the index of the current server in servers[].
 61 | //
 62 | // the k/v server should store snapshots through the underlying Raft
 63 | // implementation, which should call persister.SaveStateAndSnapshot() to
 64 | // atomically save the Raft state along with the snapshot.
 65 | //
 66 | // the k/v server should snapshot when Raft's saved state exceeds
 67 | // maxraftstate bytes, in order to allow Raft to garbage-collect its
 68 | // log. if maxraftstate is -1, you don't need to snapshot.
 69 | //
 70 | // gid is this group's GID, for interacting with the shardctrler.
 71 | //
 72 | // pass ctrlers[] to shardctrler.MakeClerk() so you can send
 73 | // RPCs to the shardctrler.
 74 | //
 75 | // make_end(servername) turns a server name from a
 76 | // Config.Groups[gid][i] into a labrpc.ClientEnd on which you can
 77 | // send RPCs. You'll need this to send RPCs to other groups.
 78 | //
 79 | // look at client.go for examples of how to use ctrlers[]
 80 | // and make_end() to send RPCs to the group owning a specific shard.
 81 | //
 82 | // StartServer() must return quickly, so it should start goroutines
 83 | // for any long-running work.
 84 | //
 85 | func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int, gid int, ctrlers []*labrpc.ClientEnd, makeEnd func(string) *labrpc.ClientEnd) *ShardKV {
 86 | 	// call labgob.Register on structures you want
 87 | 	// Go's RPC library to marshall/unmarshall.
 88 | 	labgob.Register(Command{})
 89 | 	labgob.Register(CmdArgs{})
 90 | 	labgob.Register(shardctrler.Config{})
 91 | 	labgob.Register(PullDataReply{})
 92 | 	labgob.Register(PullDataArgs{})
 93 | 
 94 | 	kv := new(ShardKV)
 95 | 	kv.me = me
 96 | 	kv.maxraftstate = maxraftstate
 97 | 	kv.makeEnd = makeEnd
 98 | 	kv.gid = gid
 99 | 	// kv.ctrlers = ctrlers
100 | 
101 | 	// Use something like this to talk to the shardctrler:
102 | 	// kv.mck = shardctrler.MakeClerk(kv.ctrlers)
103 | 	kv.applyCh = make(chan raft.ApplyMsg, 5)
104 | 	kv.rf = raft.Make(servers, me, persister, kv.applyCh)
105 | 	kv.sc = shardctrler.MakeClerk(ctrlers)
106 | 
107 | 	// Your initialization code here.
108 | 	kv.shards = make(map[int]*Shard)
109 | 	kv.cmdRespChans = make(map[IndexAndTerm]chan OpResp)
110 | 	kv.lastApplied = 0
111 | 	kv.lastSnapshot = 0
112 | 
113 | 	// load data from persister
114 | 	kv.setSnapshot(persister.ReadSnapshot())
115 | 
116 | 	// long-time goroutines
117 | 	go kv.applier()
118 | 	go kv.snapshoter()
119 | 	kv.startMonitor()
120 | 
121 | 	return kv
122 | }
123 | 


--------------------------------------------------------------------------------
/src/utils/dslogs:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # Script for pretty printing logs of MIT 6.824 labs
  3 | import sys
  4 | import shutil
  5 | from typing import Optional, List, Tuple, Dict
  6 | 
  7 | import typer
  8 | from rich import print
  9 | from rich.columns import Columns
 10 | from rich.console import Console
 11 | from rich.traceback import install
 12 | 
 13 | # fmt: off
 14 | # Mapping from topics to colors
 15 | TOPICS = {
 16 |     "TIMR": "#9a9a99",
 17 |     "VOTE": "#67a0b2",
 18 |     "LEAD": "#d0b343",
 19 |     "TERM": "#70c43f",
 20 |     "LOG1": "#4878bc",
 21 |     "LOG2": "#398280",
 22 |     "CMIT": "#98719f",
 23 |     "PERS": "#d08341",
 24 |     "SNAP": "#FD971F",
 25 |     "DROP": "#ff615c",
 26 |     "CLNT": "#00813c",
 27 |     "TEST": "#fe2c79",
 28 |     "INFO": "#ffffff",
 29 |     "WARN": "#d08341",
 30 |     "ERRO": "#fe2626",
 31 |     "TRCE": "#fe2626",
 32 |     "SEVR": "#00813c",
 33 | }
 34 | # fmt: on
 35 | 
 36 | 
 37 | def list_topics(value: Optional[str]):
 38 |     if value is None:
 39 |         return value
 40 |     topics = value.split(",")
 41 |     for topic in topics:
 42 |         if topic not in TOPICS:
 43 |             raise typer.BadParameter(f"topic {topic} not recognized")
 44 |     return topics
 45 | 
 46 | 
 47 | def main(
 48 |     file: typer.FileText = typer.Argument(None, help="File to read, stdin otherwise"),
 49 |     colorize: bool = typer.Option(True, "--no-color"),
 50 |     n_columns: Optional[int] = typer.Option(None, "--columns", "-c"),
 51 |     ignore: Optional[str] = typer.Option(None, "--ignore", "-i", callback=list_topics),
 52 |     just: Optional[str] = typer.Option(None, "--just", "-j", callback=list_topics),
 53 | ):
 54 |     topics = list(TOPICS)
 55 | 
 56 |     # We can take input from a stdin (pipes) or from a file
 57 |     input_ = file if file else sys.stdin
 58 |     # Print just some topics or exclude some topics (good for avoiding verbose ones)
 59 |     if just:
 60 |         topics = just
 61 |     if ignore:
 62 |         topics = [lvl for lvl in topics if lvl not in set(ignore)]
 63 | 
 64 |     topics = set(topics)
 65 |     console = Console()
 66 |     width = console.size.width
 67 | 
 68 |     panic = False
 69 |     for line in input_:
 70 |         try:
 71 |             time, topic, *msg = line.strip().split(" ")
 72 |             # To ignore some topics
 73 |             if topic not in topics:
 74 |                 continue
 75 | 
 76 |             msg = " ".join(msg)
 77 | 
 78 |             # utils.Debug calls from the test suite aren't associated with
 79 |             # any particular peer. Otherwise we can treat second column
 80 |             # as peer id
 81 |             if topic != "TEST":
 82 |                 i = int(msg[1])
 83 | 
 84 |             # Colorize output by using rich syntax when needed
 85 |             if colorize and topic in TOPICS:
 86 |                 color = TOPICS[topic]
 87 |                 msg = f"[{color}]{msg}[/{color}]"
 88 | 
 89 |             # Single column printing. Always the case for debug stmts in tests
 90 |             if n_columns is None or topic == "TEST":
 91 |                 print(time, msg)
 92 |             # Multi column printing, timing is dropped to maximize horizontal
 93 |             # space. Heavylifting is done through rich.column.Columns object
 94 |             else:
 95 |                 cols = ["" for _ in range(n_columns)]
 96 |                 msg = "" + msg
 97 |                 cols[i] = msg
 98 |                 col_width = int(width / n_columns)
 99 |                 cols = Columns(cols, width=col_width - 1, equal=True, expand=True)
100 |                 print(cols)
101 |         except:
102 |             # Code from tests or panics does not follow format
103 |             # so we print it as is
104 |             if line.startswith("panic"):
105 |                 panic = True
106 |             # Output from tests is usually important so add a
107 |             # horizontal line with hashes to make it more obvious
108 |             if not panic:
109 |                 print("#" * console.width)
110 |             print(line, end="")
111 | 
112 | 
113 | if __name__ == "__main__":
114 |     typer.run(main)


--------------------------------------------------------------------------------
/src/shardkv/dslogs:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # Script for pretty printing logs of MIT 6.824 labs
  3 | import sys
  4 | import shutil
  5 | from typing import Optional, List, Tuple, Dict
  6 | 
  7 | import typer
  8 | from rich import print
  9 | from rich.columns import Columns
 10 | from rich.console import Console
 11 | from rich.traceback import install
 12 | 
 13 | # fmt: off
 14 | # Mapping from topics to colors
 15 | TOPICS = {
 16 |     "TIMR": "#9a9a99",
 17 |     "VOTE": "#67a0b2",
 18 |     "LEAD": "#d0b343",
 19 |     "TERM": "#70c43f",
 20 |     "LOG1": "#4878bc",
 21 |     "LOG2": "#398280",
 22 |     "CMIT": "#98719f",
 23 |     "PERS": "#d08341",
 24 |     "SNAP": "#FD971F",
 25 |     "DROP": "#ff615c",
 26 |     "CLNT": "#00813c",
 27 |     "TEST": "#fe2c79",
 28 |     "INFO": "#ffffff",
 29 |     "WARN": "#d08341",
 30 |     "ERRO": "#fe2626",
 31 |     "TRCE": "#fe2626",
 32 |     "SEVR": "#00813c",
 33 | }
 34 | # fmt: on
 35 | 
 36 | 
 37 | def list_topics(value: Optional[str]):
 38 |     if value is None:
 39 |         return value
 40 |     topics = value.split(",")
 41 |     for topic in topics:
 42 |         if topic not in TOPICS:
 43 |             raise typer.BadParameter(f"topic {topic} not recognized")
 44 |     return topics
 45 | 
 46 | 
 47 | def main(
 48 |     file: typer.FileText = typer.Argument(None, help="File to read, stdin otherwise"),
 49 |     colorize: bool = typer.Option(True, "--no-color"),
 50 |     n_columns: Optional[int] = typer.Option(None, "--columns", "-c"),
 51 |     ignore: Optional[str] = typer.Option(None, "--ignore", "-i", callback=list_topics),
 52 |     just: Optional[str] = typer.Option(None, "--just", "-j", callback=list_topics),
 53 | ):
 54 |     topics = list(TOPICS)
 55 | 
 56 |     # We can take input from a stdin (pipes) or from a file
 57 |     input_ = file if file else sys.stdin
 58 |     # Print just some topics or exclude some topics (good for avoiding verbose ones)
 59 |     if just:
 60 |         topics = just
 61 |     if ignore:
 62 |         topics = [lvl for lvl in topics if lvl not in set(ignore)]
 63 | 
 64 |     topics = set(topics)
 65 |     console = Console()
 66 |     width = console.size.width
 67 | 
 68 |     panic = False
 69 |     for line in input_:
 70 |         try:
 71 |             time, topic, *msg = line.strip().split(" ")
 72 |             # To ignore some topics
 73 |             if topic not in topics:
 74 |                 continue
 75 | 
 76 |             msg = " ".join(msg)
 77 |             
 78 |             # utils.Debug calls from the test suite aren't associated with
 79 |             # any particular peer. Otherwise we can treat second column
 80 |             # as peer id
 81 |             if topic != "TEST":
 82 |                 i = int(msg[3])
 83 | 
 84 |             # Colorize output by using rich syntax when needed
 85 |             if colorize and topic in TOPICS:
 86 |                 color = TOPICS[topic]
 87 |                 msg = f"[{color}]{msg}[/{color}]"
 88 | 
 89 |             # Single column printing. Always the case for debug stmts in tests
 90 |             if n_columns is None or topic == "TEST":
 91 |                 print(time, msg)
 92 |             # Multi column printing, timing is dropped to maximize horizontal
 93 |             # space. Heavylifting is done through rich.column.Columns object
 94 |             else:
 95 |                 cols = ["" for _ in range(n_columns)]
 96 |                 msg = "" + msg
 97 |                 cols[i] = msg
 98 |                 col_width = int(width / n_columns)
 99 |                 cols = Columns(cols, width=col_width - 1, equal=True, expand=True)
100 |                 print(cols)
101 |         except:
102 |             # Code from tests or panics does not follow format
103 |             # so we print it as is
104 |             if line.startswith("panic"):
105 |                 panic = True
106 |             # Output from tests is usually important so add a
107 |             # horizontal line with hashes to make it more obvious
108 |             if not panic:
109 |                 print("#" * console.width)
110 |             print(line, end="")
111 | 
112 | 
113 | if __name__ == "__main__":
114 |     typer.run(main)


--------------------------------------------------------------------------------
/src/raft/interface.go:
--------------------------------------------------------------------------------
  1 | package raft
  2 | 
  3 | import "6.824/utils"
  4 | 
  5 | //
  6 | // the service using Raft (e.g. a k/v server) wants to start
  7 | // agreement on the next command to be appended to Raft's log. if this
  8 | // server isn't the leader, returns false. otherwise start the
  9 | // agreement and return immediately. there is no guarantee that this
 10 | // command will ever be committed to the Raft log, since the leader
 11 | // may fail or lose an election. even if the Raft instance has been killed,
 12 | // this function should return gracefully.
 13 | //
 14 | // the first return value is the index that the command will appear at
 15 | // if it's ever committed. the second return value is the current
 16 | // term. the third return value is true if this server believes it is
 17 | // the leader.
 18 | //
 19 | func (rf *Raft) Start(command interface{}) (int, int, bool) {
 20 | 	rf.mu.Lock()
 21 | 	defer rf.mu.Unlock()
 22 | 
 23 | 	if rf.status != leader {
 24 | 		utils.Debug(utils.DClient, "S%d Not leader cmd: %+v", rf.me, command)
 25 | 		return -1, -1, false
 26 | 	}
 27 | 
 28 | 	index := rf.lastLogIndex() + 1
 29 | 	rf.log = append(rf.log, Entry{index, rf.currentTerm, command})
 30 | 	rf.persist()
 31 | 
 32 | 	// defer utils.Debug(utils.DLog2, "S%d append log: %+v", rf.me, rf.log)
 33 | 	utils.Debug(utils.DClient, "S%d cmd: %+v, logIndex: %d", rf.me, command, rf.lastLogIndex())
 34 | 
 35 | 	rf.doAppendEntries()
 36 | 
 37 | 	return rf.lastLogIndex(), rf.currentTerm, true
 38 | }
 39 | 
 40 | //
 41 | // A service wants to switch to snapshot.  Only do so if Raft hasn't
 42 | // have more recent info since it communicate the snapshot on applyCh.
 43 | //
 44 | func (rf *Raft) CondInstallSnapshot(lastIncludedTerm int, lastIncludedIndex int, snapshot []byte) bool {
 45 | 	// Your code here (2D).
 46 | 	rf.mu.Lock()
 47 | 	defer rf.mu.Unlock()
 48 | 
 49 | 	utils.Debug(utils.DSnap, "S%d CondInstallSnapshot(lastIncludedTerm: %d lastIncludedIndex: %d lastApplied: %d commitIndex: %d)", rf.me, lastIncludedTerm, lastIncludedIndex, rf.lastApplied, rf.commitIndex)
 50 | 
 51 | 	if lastIncludedIndex <= rf.commitIndex {
 52 | 		utils.Debug(utils.DSnap, "S%d refuse, snapshot too old(%d <= %d)", rf.me, lastIncludedIndex, rf.frontLogIndex())
 53 | 		return false
 54 | 	}
 55 | 
 56 | 	if lastIncludedIndex > rf.lastLogIndex() {
 57 | 		rf.log = make([]Entry, 1)
 58 | 	} else {
 59 | 		// in range, ignore out of range error
 60 | 		idx, _ := rf.transfer(lastIncludedIndex)
 61 | 		rf.log = rf.log[idx:]
 62 | 	}
 63 | 	// dummy node
 64 | 	rf.log[0].Term = lastIncludedTerm
 65 | 	rf.log[0].Index = lastIncludedIndex
 66 | 	rf.log[0].Cmd = nil
 67 | 
 68 | 	rf.persistSnapshot(snapshot)
 69 | 
 70 | 	// reset commit
 71 | 	if lastIncludedIndex > rf.lastApplied {
 72 | 		rf.lastApplied = lastIncludedIndex
 73 | 	}
 74 | 	if lastIncludedIndex > rf.commitIndex {
 75 | 		rf.commitIndex = lastIncludedIndex
 76 | 	}
 77 | 
 78 | 	// utils.Debug(utils.DSnap, "S%d after CondInstallSnapshot(lastApplied: %d commitIndex: %d) {%+v}", rf.me, rf.lastApplied, rf.commitIndex, rf.log)
 79 | 
 80 | 	return true
 81 | }
 82 | 
 83 | // the service says it has created a snapshot that has
 84 | // all info up to and including index. this means the
 85 | // service no longer needs the log through (and including)
 86 | // that index. Raft should now trim its log as much as possible.
 87 | func (rf *Raft) Snapshot(index int, snapshot []byte) {
 88 | 	// Your code here (2D).
 89 | 	rf.mu.Lock()
 90 | 	defer rf.mu.Unlock()
 91 | 
 92 | 	utils.Debug(utils.DSnap, "S%d call Snapshot, index: %d", rf.me, index)
 93 | 
 94 | 	// refuse to install a snapshot
 95 | 	if rf.frontLogIndex() >= index {
 96 | 		utils.Debug(utils.DSnap, "S%d refuse, have received %d snapshot", rf.me, index)
 97 | 		return
 98 | 	}
 99 | 
100 | 	idx, err := rf.transfer(index)
101 | 	if err < 0 {
102 | 		idx = len(rf.log) - 1
103 | 	}
104 | 	//before := len(rf.log)
105 | 	// let last snapshot node as dummy node
106 | 	rf.log = rf.log[idx:]
107 | 	rf.log[0].Cmd = nil // dummy node
108 | 	rf.persistSnapshot(snapshot)
109 | 	//fmt.Printf("S%d idx: %d log len before: %d after: %d\n", rf.me, idx, before, len(rf.log))
110 | 	// utils.Debug(utils.DSnap, "S%d call Snapshot success, index: %d {%+v}", rf.me, index, rf.log)
111 | }
112 | 


--------------------------------------------------------------------------------
/src/raft/rpc.go:
--------------------------------------------------------------------------------
  1 | package raft
  2 | 
  3 | import "6.824/utils"
  4 | 
  5 | //
  6 | // example RequestVote RPC arguments structure.
  7 | // field names must start with capital letters!
  8 | //
  9 | type RequestVoteArgs struct {
 10 | 	// Your data here (2A, 2B).
 11 | 	Term         int
 12 | 	CandidateId  int
 13 | 	LastLogIndex int
 14 | 	LastLogTerm  int
 15 | }
 16 | 
 17 | type RequestVoteReply struct {
 18 | 	// Your data here (2A).
 19 | 	Term        int
 20 | 	VoteGranted bool
 21 | }
 22 | 
 23 | type AppendEntriesArgs struct {
 24 | 	Term         int
 25 | 	LeaderId     int
 26 | 	PrevLogIndex int
 27 | 	PrevLogTerm  int
 28 | 	Entries      []Entry
 29 | 	LeaderCommit int
 30 | }
 31 | 
 32 | type AppendEntriesReply struct {
 33 | 	Term    int
 34 | 	Success bool
 35 | 	XTerm   int // for fast backup
 36 | 	XIndex  int
 37 | 	XLen    int
 38 | }
 39 | 
 40 | type InstallSnapshotArgs struct {
 41 | 	// Your data here (2A, 2B).
 42 | 	Term              int
 43 | 	LeaderId          int
 44 | 	LastIncludedIndex int
 45 | 	LastIncludedTerm  int
 46 | 	Data              []byte
 47 | }
 48 | 
 49 | type InstallSnapshotReply struct {
 50 | 	Term int
 51 | }
 52 | 
 53 | //
 54 | // example code to send a RequestVote RPC to a server.
 55 | // server is the index of the target server in rf.peers[].
 56 | // expects RPC arguments in args.
 57 | // fills in *reply with RPC reply, so caller should
 58 | // pass &reply.
 59 | // the types of the args and reply passed to Call() must be
 60 | // the same as the types of the arguments declared in the
 61 | // handler function (including whether they are pointers).
 62 | //
 63 | // The labrpc package simulates a lossy network, in which servers
 64 | // may be unreachable, and in which requests and replies may be lost.
 65 | // Call() sends a request and waits for a reply. If a reply arrives
 66 | // within a timeout interval, Call() returns true; otherwise
 67 | // Call() returns false. Thus Call() may not return for a while.
 68 | // A false return can be caused by a dead server, a live server that
 69 | // can't be reached, a lost request, or a lost reply.
 70 | //
 71 | // Call() is guaranteed to return (perhaps after a delay) *except* if the
 72 | // handler function on the server side does not return.  Thus there
 73 | // is no need to implement your own timeouts around Call().
 74 | //
 75 | // look at the comments in ../labrpc/labrpc.go for more details.
 76 | //
 77 | // if you're having trouble getting RPC to work, check that you've
 78 | // capitalized all field names in structs passed over RPC, and
 79 | // that the caller passes the address of the reply struct with &, not
 80 | // the struct itself.
 81 | //
 82 | func (rf *Raft) sendRequestVote(server int, args *RequestVoteArgs, reply *RequestVoteReply) bool {
 83 | 	utils.Debug(utils.DInfo, "S%d send RequestVote request to %d {%+v}", rf.me, server, args)
 84 | 	ok := rf.peers[server].Call("Raft.RequestVote", args, reply)
 85 | 	if !ok {
 86 | 		utils.Debug(utils.DWarn, "S%d call (RequestVote)rpc to C%d error", rf.me, server)
 87 | 		return ok
 88 | 	}
 89 | 	utils.Debug(utils.DInfo, "S%d get RequestVote response from %d {%+v}", rf.me, server, reply)
 90 | 	return ok
 91 | }
 92 | 
 93 | func (rf *Raft) sendAppendEntries(server int, args *AppendEntriesArgs, reply *AppendEntriesReply) bool {
 94 | 	utils.Debug(utils.DInfo, "S%d send AppendEntries request to %d {%+v}", rf.me, server, args)
 95 | 	ok := rf.peers[server].Call("Raft.AppendEntries", args, reply)
 96 | 	if !ok {
 97 | 		utils.Debug(utils.DWarn, "S%d call (AppendEntries)rpc to C%d error", rf.me, server)
 98 | 		return ok
 99 | 	}
100 | 	utils.Debug(utils.DInfo, "S%d get AppendEntries response from %d {%+v}", rf.me, server, reply)
101 | 	return ok
102 | }
103 | 
104 | func (rf *Raft) sendInstallSnapshot(server int, args *InstallSnapshotArgs, reply *InstallSnapshotReply) bool {
105 | 	utils.Debug(utils.DInfo, "S%d send InstallSnapshot request to %d {%+v}", rf.me, server, args)
106 | 	ok := rf.peers[server].Call("Raft.InstallSnapshot", args, reply)
107 | 	if !ok {
108 | 		utils.Debug(utils.DWarn, "S%d call (InstallSnapshot)rpc to C%d error", rf.me, server)
109 | 		return ok
110 | 	}
111 | 	utils.Debug(utils.DInfo, "S%d get InstallSnapshot response from %d {%+v}", rf.me, server, reply)
112 | 	return ok
113 | }
114 | 


--------------------------------------------------------------------------------
/src/labgob/labgob.go:
--------------------------------------------------------------------------------
  1 | package labgob
  2 | 
  3 | //
  4 | // trying to send non-capitalized fields over RPC produces a range of
  5 | // misbehavior, including both mysterious incorrect computation and
  6 | // outright crashes. so this wrapper around Go's encoding/gob warns
  7 | // about non-capitalized field names.
  8 | //
  9 | 
 10 | import "encoding/gob"
 11 | import "io"
 12 | import "reflect"
 13 | import "fmt"
 14 | import "sync"
 15 | import "unicode"
 16 | import "unicode/utf8"
 17 | 
 18 | var mu sync.Mutex
 19 | var errorCount int // for TestCapital
 20 | var checked map[reflect.Type]bool
 21 | 
 22 | type LabEncoder struct {
 23 | 	gob *gob.Encoder
 24 | }
 25 | 
 26 | func NewEncoder(w io.Writer) *LabEncoder {
 27 | 	enc := &LabEncoder{}
 28 | 	enc.gob = gob.NewEncoder(w)
 29 | 	return enc
 30 | }
 31 | 
 32 | func (enc *LabEncoder) Encode(e interface{}) error {
 33 | 	checkValue(e)
 34 | 	return enc.gob.Encode(e)
 35 | }
 36 | 
 37 | func (enc *LabEncoder) EncodeValue(value reflect.Value) error {
 38 | 	checkValue(value.Interface())
 39 | 	return enc.gob.EncodeValue(value)
 40 | }
 41 | 
 42 | type LabDecoder struct {
 43 | 	gob *gob.Decoder
 44 | }
 45 | 
 46 | func NewDecoder(r io.Reader) *LabDecoder {
 47 | 	dec := &LabDecoder{}
 48 | 	dec.gob = gob.NewDecoder(r)
 49 | 	return dec
 50 | }
 51 | 
 52 | func (dec *LabDecoder) Decode(e interface{}) error {
 53 | 	checkValue(e)
 54 | 	checkDefault(e)
 55 | 	return dec.gob.Decode(e)
 56 | }
 57 | 
 58 | func Register(value interface{}) {
 59 | 	checkValue(value)
 60 | 	gob.Register(value)
 61 | }
 62 | 
 63 | func RegisterName(name string, value interface{}) {
 64 | 	checkValue(value)
 65 | 	gob.RegisterName(name, value)
 66 | }
 67 | 
 68 | func checkValue(value interface{}) {
 69 | 	checkType(reflect.TypeOf(value))
 70 | }
 71 | 
 72 | func checkType(t reflect.Type) {
 73 | 	k := t.Kind()
 74 | 
 75 | 	mu.Lock()
 76 | 	// only complain once, and avoid recursion.
 77 | 	if checked == nil {
 78 | 		checked = map[reflect.Type]bool{}
 79 | 	}
 80 | 	if checked[t] {
 81 | 		mu.Unlock()
 82 | 		return
 83 | 	}
 84 | 	checked[t] = true
 85 | 	mu.Unlock()
 86 | 
 87 | 	switch k {
 88 | 	case reflect.Struct:
 89 | 		for i := 0; i < t.NumField(); i++ {
 90 | 			f := t.Field(i)
 91 | 			rune, _ := utf8.DecodeRuneInString(f.Name)
 92 | 			if unicode.IsUpper(rune) == false {
 93 | 				// ta da
 94 | 				fmt.Printf("labgob error: lower-case field %v of %v in RPC or persist/snapshot will break your Raft\n",
 95 | 					f.Name, t.Name())
 96 | 				mu.Lock()
 97 | 				errorCount += 1
 98 | 				mu.Unlock()
 99 | 			}
100 | 			checkType(f.Type)
101 | 		}
102 | 		return
103 | 	case reflect.Slice, reflect.Array, reflect.Ptr:
104 | 		checkType(t.Elem())
105 | 		return
106 | 	case reflect.Map:
107 | 		checkType(t.Elem())
108 | 		checkType(t.Key())
109 | 		return
110 | 	default:
111 | 		return
112 | 	}
113 | }
114 | 
115 | //
116 | // warn if the value contains non-default values,
117 | // as it would if one sent an RPC but the reply
118 | // struct was already modified. if the RPC reply
119 | // contains default values, GOB won't overwrite
120 | // the non-default value.
121 | //
122 | func checkDefault(value interface{}) {
123 | 	if value == nil {
124 | 		return
125 | 	}
126 | 	checkDefault1(reflect.ValueOf(value), 1, "")
127 | }
128 | 
129 | func checkDefault1(value reflect.Value, depth int, name string) {
130 | 	if depth > 3 {
131 | 		return
132 | 	}
133 | 
134 | 	t := value.Type()
135 | 	k := t.Kind()
136 | 
137 | 	switch k {
138 | 	case reflect.Struct:
139 | 		for i := 0; i < t.NumField(); i++ {
140 | 			vv := value.Field(i)
141 | 			name1 := t.Field(i).Name
142 | 			if name != "" {
143 | 				name1 = name + "." + name1
144 | 			}
145 | 			checkDefault1(vv, depth+1, name1)
146 | 		}
147 | 		return
148 | 	case reflect.Ptr:
149 | 		if value.IsNil() {
150 | 			return
151 | 		}
152 | 		checkDefault1(value.Elem(), depth+1, name)
153 | 		return
154 | 	case reflect.Bool,
155 | 		reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
156 | 		reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64,
157 | 		reflect.Uintptr, reflect.Float32, reflect.Float64,
158 | 		reflect.String:
159 | 		if reflect.DeepEqual(reflect.Zero(t).Interface(), value.Interface()) == false {
160 | 			mu.Lock()
161 | 			if errorCount < 1 {
162 | 				what := name
163 | 				if what == "" {
164 | 					what = t.Name()
165 | 				}
166 | 				// this warning typically arises if code re-uses the same RPC reply
167 | 				// variable for multiple RPC calls, or if code restores persisted
168 | 				// state into variable that already have non-default values.
169 | 				fmt.Printf("labgob warning: Decoding into a non-default variable/field %v may not work\n",
170 | 					what)
171 | 			}
172 | 			errorCount += 1
173 | 			mu.Unlock()
174 | 		}
175 | 		return
176 | 	}
177 | }
178 | 


--------------------------------------------------------------------------------
/src/raft/raft.go:
--------------------------------------------------------------------------------
  1 | package raft
  2 | 
  3 | //
  4 | // this is an outline of the API that raft must expose to
  5 | // the service (or tester). see comments below for
  6 | // each of these functions for more details.
  7 | //
  8 | // rf = Make(...)
  9 | //   create a new Raft server.
 10 | // rf.Start(command interface{}) (index, term, isleader)
 11 | //   start agreement on a new log entry
 12 | // rf.GetState() (term, isLeader)
 13 | //   ask a Raft for its current term, and whether it thinks it is leader
 14 | // ApplyMsg
 15 | //   each time a new entry is committed to the log, each Raft peer
 16 | //   should send an ApplyMsg to the service (or tester)
 17 | //   in the same server.
 18 | //
 19 | 
 20 | import (
 21 | 	"sync"
 22 | 	"sync/atomic"
 23 | 	"time"
 24 | 
 25 | 	"6.824/labrpc"
 26 | 	"6.824/utils"
 27 | )
 28 | 
 29 | //
 30 | // A Go object implementing a single Raft peer.
 31 | //
 32 | type Raft struct {
 33 | 	mu        sync.Mutex          // Lock to protect shared access to this peer's state
 34 | 	applyCond *sync.Cond          // haven't used now, it seem can be used for apply
 35 | 	peers     []*labrpc.ClientEnd // RPC end points of all peers
 36 | 	persister *Persister          // Object to hold this peer's persisted state
 37 | 	me        int                 // this peer's index into peers[]
 38 | 	dead      int32               // set by Kill()
 39 | 	status    ServerStatus
 40 | 	applyCh   chan ApplyMsg
 41 | 
 42 | 	// Your data here (2A, 2B, 2C).
 43 | 	// Look at the paper's Figure 2 for a description of what
 44 | 	// state a Raft server must maintain.
 45 | 
 46 | 	// persistent for all servers
 47 | 	currentTerm int
 48 | 	votedFor    int
 49 | 	log         []Entry
 50 | 
 51 | 	// volatile for all servers
 52 | 	commitIndex int
 53 | 	lastApplied int
 54 | 
 55 | 	// volatile for leaders
 56 | 	nextIndex  []int
 57 | 	matchIndex []int
 58 | 
 59 | 	// private
 60 | 	electionTime  time.Time
 61 | 	heartbeatTime time.Time
 62 | }
 63 | 
 64 | //
 65 | // the tester doesn't halt goroutines created by Raft after each test,
 66 | // but it does call the Kill() method. your code can use killed() to
 67 | // check whether Kill() has been called. the use of atomic avoids the
 68 | // need for a lock.
 69 | //
 70 | // the issue is that long-running goroutines use memory and may chew
 71 | // up CPU time, perhaps causing later tests to fail and generating
 72 | // confusing debug output. any goroutine with a long-running loop
 73 | // should call killed() to check whether it should stop.
 74 | //
 75 | func (rf *Raft) Kill() {
 76 | 	atomic.StoreInt32(&rf.dead, 1)
 77 | 	// Your code here, if desired.
 78 | }
 79 | 
 80 | func (rf *Raft) killed() bool {
 81 | 	z := atomic.LoadInt32(&rf.dead)
 82 | 	return z == 1
 83 | }
 84 | 
 85 | func (rf *Raft) leaderInit() {
 86 | 	rf.nextIndex = make([]int, len(rf.peers))
 87 | 	rf.matchIndex = make([]int, len(rf.peers))
 88 | 
 89 | 	for i := range rf.nextIndex {
 90 | 		rf.nextIndex[i] = rf.lastLogIndex() + 1
 91 | 		rf.matchIndex[i] = 0
 92 | 	}
 93 | 
 94 | 	rf.resetHeartbeatTime()
 95 | }
 96 | 
 97 | func (rf *Raft) init() {
 98 | 	rf.status = follower
 99 | 	rf.applyCond = sync.NewCond(&rf.mu)
100 | 	// persistent for all servers
101 | 	rf.currentTerm = 0
102 | 	rf.votedFor = voted_nil // means that vote for nobody
103 | 	rf.log = make([]Entry, 0)
104 | 	// use first log entry as last snapshot index
105 | 	// also it's dummy node!!
106 | 	rf.log = append(rf.log, Entry{magic_index, magic_term, nil})
107 | 	// volatile for all servers, will be changed in persister read
108 | 	rf.commitIndex = 0
109 | 	rf.lastApplied = 0
110 | 	// private
111 | 	// begin with follower, set election time
112 | 	rf.resetElectionTime()
113 | }
114 | 
115 | //
116 | // the service or tester wants to create a Raft server. the ports
117 | // of all the Raft servers (including this one) are in peers[]. this
118 | // server's port is peers[me]. all the servers' peers[] arrays
119 | // have the same order. persister is a place for this server to
120 | // save its persistent state, and also initially holds the most
121 | // recent saved state, if any. applyCh is a channel on which the
122 | // tester or service expects Raft to send ApplyMsg messages.
123 | // Make() must return quickly, so it should start goroutines
124 | // for any long-running work.
125 | //
126 | func Make(peers []*labrpc.ClientEnd, me int,
127 | 	persister *Persister, applyCh chan ApplyMsg) *Raft {
128 | 	rf := &Raft{
129 | 		peers:     peers,
130 | 		persister: persister,
131 | 		me:        me,
132 | 		applyCh:   applyCh,
133 | 	}
134 | 
135 | 	// Your initialization code here (2A, 2B, 2C).
136 | 	rf.init()
137 | 
138 | 	utils.Debug(utils.DClient, "S%d Started && init success", rf.me)
139 | 
140 | 	// initialize from state persisted before a crash
141 | 	rf.readPersist(persister.ReadRaftState())
142 | 
143 | 	// start ticker goroutine to start elections
144 | 	go rf.ticker()
145 | 	go rf.applyLog()
146 | 
147 | 	return rf
148 | }
149 | 


--------------------------------------------------------------------------------
/src/kvraft/server.go:
--------------------------------------------------------------------------------
  1 | package kvraft
  2 | 
  3 | import (
  4 | 	"sync"
  5 | 	"sync/atomic"
  6 | 	"time"
  7 | 
  8 | 	"6.824/labgob"
  9 | 	"6.824/labrpc"
 10 | 	"6.824/raft"
 11 | 	"6.824/utils"
 12 | )
 13 | 
 14 | type KVServer struct {
 15 | 	mu      sync.Mutex
 16 | 	me      int
 17 | 	rf      *raft.Raft
 18 | 	applyCh chan raft.ApplyMsg
 19 | 	dead    int32 // set by Kill()
 20 | 
 21 | 	maxraftstate int // snapshot if log grows this big
 22 | 
 23 | 	// Your definitions here.
 24 | 	KvMap          *KV
 25 | 	cmdRespChans   map[IndexAndTerm]chan OpResp
 26 | 	LastCmdContext map[int64]OpContext
 27 | 	lastApplied    int
 28 | 	lastSnapshot   int
 29 | }
 30 | 
 31 | //
 32 | // the tester calls Kill() when a KVServer instance won't
 33 | // be needed again. for your convenience, we supply
 34 | // code to set rf.dead (without needing a lock),
 35 | // and a killed() method to test rf.dead in
 36 | // long-running loops. you can also add your own
 37 | // code to Kill(). you're not required to do anything
 38 | // about this, but it may be convenient (for example)
 39 | // to suppress debug output from a Kill()ed instance.
 40 | //
 41 | func (kv *KVServer) Kill() {
 42 | 	atomic.StoreInt32(&kv.dead, 1)
 43 | 	// Your code here, if desired.
 44 | 	kv.mu.Lock()
 45 | 	defer kv.mu.Unlock()
 46 | 	//fmt.Printf("---kill\n")
 47 | 	kv.doSnapshot(kv.lastApplied)
 48 | 	kv.rf.Kill()
 49 | }
 50 | 
 51 | func (kv *KVServer) killed() bool {
 52 | 	z := atomic.LoadInt32(&kv.dead)
 53 | 	return z == 1
 54 | }
 55 | 
 56 | //
 57 | // servers[] contains the ports of the set of
 58 | // servers that will cooperate via Raft to
 59 | // form the fault-tolerant key/value service.
 60 | // me is the index of the current server in servers[].
 61 | // the k/v server should store snapshots through the underlying Raft
 62 | // implementation, which should call persister.SaveStateAndSnapshot() to
 63 | // atomically save the Raft state along with the snapshot.
 64 | // the k/v server should snapshot when Raft's saved state exceeds maxraftstate bytes,
 65 | // in order to allow Raft to garbage-collect its log. if maxraftstate is -1,
 66 | // you don't need to snapshot.
 67 | // StartKVServer() must return quickly, so it should start goroutines
 68 | // for any long-running work.
 69 | //
 70 | func StartKVServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int) *KVServer {
 71 | 	// call labgob.Register on structures you want
 72 | 	// Go's RPC library to marshall/unmarshall.
 73 | 	labgob.Register(Op{})
 74 | 
 75 | 	kv := new(KVServer)
 76 | 	kv.me = me
 77 | 	kv.maxraftstate = maxraftstate
 78 | 
 79 | 	// You may need initialization code here.
 80 | 
 81 | 	kv.applyCh = make(chan raft.ApplyMsg, 5)
 82 | 	kv.rf = raft.Make(servers, me, persister, kv.applyCh)
 83 | 
 84 | 	// You may need initialization code here.
 85 | 	kv.KvMap = NewKV()
 86 | 	kv.cmdRespChans = make(map[IndexAndTerm]chan OpResp)
 87 | 	kv.LastCmdContext = make(map[int64]OpContext)
 88 | 	kv.lastApplied = 0
 89 | 	kv.lastSnapshot = 0
 90 | 
 91 | 	// load data from persister
 92 | 	kv.setSnapshot(persister.ReadSnapshot())
 93 | 
 94 | 	// long-time goroutines
 95 | 	go kv.applier()
 96 | 	go kv.snapshoter()
 97 | 
 98 | 	return kv
 99 | }
100 | 
101 | // Handler
102 | func (kv *KVServer) Command(args *CmdArgs, reply *CmdReply) {
103 | 	defer utils.Debug(utils.DWarn, "S%d args: %+v reply: %+v", kv.me, args, reply)
104 | 
105 | 	kv.mu.Lock()
106 | 	if args.OpType != OpGet && kv.isDuplicate(args.ClientId, args.SeqId) {
107 | 		context := kv.LastCmdContext[args.ClientId]
108 | 		reply.Value, reply.Err = context.Reply.Value, context.Reply.Err
109 | 		kv.mu.Unlock()
110 | 		return
111 | 	}
112 | 	kv.mu.Unlock()
113 | 
114 | 	cmd := Op{
115 | 		ClientId: args.ClientId,
116 | 		SeqId:    args.SeqId,
117 | 		OpType:   args.OpType,
118 | 		Key:      args.Key,
119 | 		Value:    args.Value,
120 | 	}
121 | 	index, term, is_leader := kv.rf.Start(cmd)
122 | 	if !is_leader {
123 | 		reply.Value, reply.Err = "", ErrWrongLeader
124 | 		return
125 | 	}
126 | 
127 | 	kv.mu.Lock()
128 | 	it := IndexAndTerm{index, term}
129 | 	ch := make(chan OpResp, 1)
130 | 	kv.cmdRespChans[it] = ch
131 | 	kv.mu.Unlock()
132 | 
133 | 	defer func() {
134 | 		kv.mu.Lock()
135 | 		// close(kv.cmdRespChans[index])
136 | 		delete(kv.cmdRespChans, it)
137 | 		kv.mu.Unlock()
138 | 		close(ch)
139 | 	}()
140 | 
141 | 	t := time.NewTimer(cmd_timeout)
142 | 	defer t.Stop()
143 | 
144 | 	for {
145 | 		kv.mu.Lock()
146 | 		select {
147 | 		case resp := <-ch:
148 | 			utils.Debug(utils.DServer, "S%d have applied, resp: %+v", kv.me, resp)
149 | 			reply.Value, reply.Err = resp.Value, resp.Err
150 | 			kv.mu.Unlock()
151 | 			return
152 | 		case <-t.C:
153 | 		priority:
154 | 			for {
155 | 				select {
156 | 				case resp := <-ch:
157 | 					utils.Debug(utils.DServer, "S%d have applied, resp: %+v", kv.me, resp)
158 | 					reply.Value, reply.Err = resp.Value, resp.Err
159 | 					kv.mu.Unlock()
160 | 					return
161 | 				default:
162 | 					break priority
163 | 				}
164 | 			}
165 | 			utils.Debug(utils.DServer, "S%d timeout", kv.me)
166 | 			reply.Value, reply.Err = "", ErrTimeout
167 | 			kv.mu.Unlock()
168 | 			return
169 | 		default:
170 | 			kv.mu.Unlock()
171 | 			time.Sleep(gap_time)
172 | 		}
173 | 	}
174 | }
175 | 


--------------------------------------------------------------------------------
/src/mr/coordinator.go:
--------------------------------------------------------------------------------
  1 | package mr
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"log"
  6 | 	"net"
  7 | 	"net/http"
  8 | 	"net/rpc"
  9 | 	"os"
 10 | 	"sync"
 11 | 	"time"
 12 | )
 13 | 
 14 | type TaskStatus int
 15 | 
 16 | const (
 17 | 	idle TaskStatus = iota
 18 | 	in_progress
 19 | 	completed
 20 | )
 21 | 
 22 | type Task struct {
 23 | 	tno       int
 24 | 	filenames []string
 25 | 	status    TaskStatus
 26 | 	startTime time.Time
 27 | }
 28 | 
 29 | type CoordinatorStatus int
 30 | 
 31 | const (
 32 | 	MAP_PHASE CoordinatorStatus = iota
 33 | 	REDUCE_PHASE
 34 | 	FINISH_PHASE
 35 | )
 36 | 
 37 | type Coordinator struct {
 38 | 	// Your definitions here.
 39 | 	tasks   []Task
 40 | 	nReduce int
 41 | 	nMap    int
 42 | 	status  CoordinatorStatus
 43 | 	mu      sync.Mutex
 44 | }
 45 | 
 46 | // Your code here -- RPC handlers for the worker to call.
 47 | 
 48 | //
 49 | // an example RPC handler.
 50 | //
 51 | // the RPC argument and reply types are defined in rpc.go.
 52 | //
 53 | func (c *Coordinator) GetTask(args *GetTaskArgs, reply *GetTaskReply) error {
 54 | 	c.mu.Lock()
 55 | 	defer c.mu.Unlock()
 56 | 
 57 | 	finish_flag := c.IsAllFinish()
 58 | 	if finish_flag {
 59 | 		c.NextPhase()
 60 | 	}
 61 | 	for i := 0; i < len(c.tasks); i++ {
 62 | 		if c.tasks[i].status == idle {
 63 | 			log.Printf("send task %d to worker\n", i)
 64 | 			reply.Err = SuccessCode
 65 | 			reply.Task_no = i
 66 | 			reply.Filenames = c.tasks[i].filenames
 67 | 			if c.status == MAP_PHASE {
 68 | 				reply.Type = MAP
 69 | 				reply.NReduce = c.nReduce
 70 | 			} else if c.status == REDUCE_PHASE {
 71 | 				reply.NReduce = 0
 72 | 				reply.Type = REDUCE
 73 | 			} else {
 74 | 				log.Fatal("unexpected status")
 75 | 			}
 76 | 			c.tasks[i].startTime = time.Now()
 77 | 			c.tasks[i].status = in_progress
 78 | 			return nil
 79 | 		} else if c.tasks[i].status == in_progress {
 80 | 			curr := time.Now()
 81 | 			if curr.Sub(c.tasks[i].startTime) > time.Second*10 {
 82 | 				log.Printf("resend task %d to worker\n", i)
 83 | 				reply.Err = SuccessCode
 84 | 				reply.Task_no = i
 85 | 				reply.Filenames = c.tasks[i].filenames
 86 | 				if c.status == MAP_PHASE {
 87 | 					reply.Type = MAP
 88 | 					reply.NReduce = c.nReduce
 89 | 				} else if c.status == REDUCE_PHASE {
 90 | 					reply.NReduce = 0
 91 | 					reply.Type = REDUCE
 92 | 				} else {
 93 | 					log.Fatal("unexpected status")
 94 | 				}
 95 | 				c.tasks[i].startTime = time.Now()
 96 | 				return nil
 97 | 			}
 98 | 		}
 99 | 	}
100 | 	reply.Err = SuccessCode
101 | 	reply.Type = WAIT
102 | 	return nil
103 | }
104 | 
105 | func (c *Coordinator) FinishTask(args *FinishTaskArgs, reply *FinishTaskReply) error {
106 | 	c.mu.Lock()
107 | 	defer c.mu.Unlock()
108 | 	if args.Task_no >= len(c.tasks) || args.Task_no < 0 {
109 | 		reply.Err = ParaErrCode
110 | 		return nil
111 | 	}
112 | 	c.tasks[args.Task_no].status = completed
113 | 	if c.IsAllFinish() {
114 | 		c.NextPhase()
115 | 	}
116 | 	return nil
117 | }
118 | 
119 | //
120 | // start a thread that listens for RPCs from worker.go
121 | //
122 | func (c *Coordinator) server() {
123 | 	rpc.Register(c)
124 | 	rpc.HandleHTTP()
125 | 	//l, e := net.Listen("tcp", ":1234")
126 | 	sockname := coordinatorSock()
127 | 	os.Remove(sockname)
128 | 	l, e := net.Listen("unix", sockname)
129 | 	if e != nil {
130 | 		log.Fatal("listen error:", e)
131 | 	}
132 | 	go http.Serve(l, nil)
133 | }
134 | 
135 | // coordinator init code
136 | func (c *Coordinator) Init(files []string, nReduce int) {
137 | 	c.mu.Lock()
138 | 	defer c.mu.Unlock()
139 | 	log.Println("init coordinator")
140 | 
141 | 	// make map tasks
142 | 	log.Println("make map tasks")
143 | 	tasks := make([]Task, len(files))
144 | 	for i, file := range files {
145 | 		tasks[i].tno = i
146 | 		tasks[i].filenames = []string{file}
147 | 		tasks[i].status = idle
148 | 	}
149 | 
150 | 	// init coordinator
151 | 	c.tasks = tasks
152 | 	c.nReduce = nReduce
153 | 	c.nMap = len(files)
154 | 	c.status = MAP_PHASE
155 | }
156 | 
157 | func (c *Coordinator) MakeReduceTasks() {
158 | 	// make reduce tasks
159 | 	log.Println("make reduce tasks")
160 | 	tasks := make([]Task, c.nReduce)
161 | 	for i := 0; i < c.nReduce; i++ {
162 | 		tasks[i].tno = i
163 | 		files := make([]string, c.nMap)
164 | 		for j := 0; j < c.nMap; j++ {
165 | 			filename := fmt.Sprintf("mr-%d-%d", j, i)
166 | 			files[j] = filename
167 | 		}
168 | 		tasks[i].filenames = files
169 | 		tasks[i].status = idle
170 | 	}
171 | 	c.tasks = tasks
172 | }
173 | 
174 | func (c *Coordinator) IsAllFinish() bool {
175 | 	for i := len(c.tasks) - 1; i >= 0; i-- {
176 | 		if c.tasks[i].status != completed {
177 | 			return false
178 | 		}
179 | 	}
180 | 	return true
181 | }
182 | 
183 | func (c *Coordinator) NextPhase() {
184 | 	if c.status == MAP_PHASE {
185 | 		log.Println("change to REDUCE_PHASE")
186 | 		c.MakeReduceTasks()
187 | 		c.status = REDUCE_PHASE
188 | 	} else if c.status == REDUCE_PHASE {
189 | 		log.Println("change to FINISH_PHASE")
190 | 		c.status = FINISH_PHASE
191 | 	} else {
192 | 		log.Println("unexpected status change!")
193 | 	}
194 | }
195 | 
196 | //
197 | // main/mrcoordinator.go calls Done() periodically to find out
198 | // if the entire job has finished.
199 | //
200 | func (c *Coordinator) Done() bool {
201 | 	c.mu.Lock()
202 | 	defer c.mu.Unlock()
203 | 	if c.status == FINISH_PHASE {
204 | 		return true
205 | 	}
206 | 	return false
207 | }
208 | 
209 | //
210 | // create a Coordinator.
211 | // main/mrcoordinator.go calls this function.
212 | // nReduce is the number of reduce tasks to use.
213 | //
214 | func MakeCoordinator(files []string, nReduce int) *Coordinator {
215 | 	c := Coordinator{}
216 | 
217 | 	// Your code here.
218 | 	c.Init(files, nReduce)
219 | 
220 | 	c.server()
221 | 	return &c
222 | }
223 | 


--------------------------------------------------------------------------------
/src/mr/worker.go:
--------------------------------------------------------------------------------
  1 | package mr
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"hash/fnv"
  7 | 	"io/ioutil"
  8 | 	"log"
  9 | 	"net/rpc"
 10 | 	"os"
 11 | 	"sort"
 12 | 	"time"
 13 | )
 14 | 
 15 | //
 16 | // Map functions return a slice of KeyValue.
 17 | //
 18 | type KeyValue struct {
 19 | 	Key   string
 20 | 	Value string
 21 | }
 22 | 
 23 | // for sorting by key.
 24 | type ByKey []KeyValue
 25 | 
 26 | // for sorting by key.
 27 | func (a ByKey) Len() int           { return len(a) }
 28 | func (a ByKey) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
 29 | func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key }
 30 | 
 31 | //
 32 | // use ihash(key) % NReduce to choose the reduce
 33 | // task number for each KeyValue emitted by Map.
 34 | //
 35 | func ihash(key string) int {
 36 | 	h := fnv.New32a()
 37 | 	h.Write([]byte(key))
 38 | 	return int(h.Sum32() & 0x7fffffff)
 39 | }
 40 | 
 41 | //
 42 | // main/mrworker.go calls this function.
 43 | //
 44 | func Worker(mapf func(string, string) []KeyValue,
 45 | 	reducef func(string, []string) string) {
 46 | 
 47 | 	// Your worker implementation here.
 48 | 	for {
 49 | 		args := GetTaskArgs{}
 50 | 		reply := GetTaskReply{}
 51 | 		log.Printf("get task request: %v\n", args)
 52 | 		ok := CallGetTask(&args, &reply)
 53 | 		log.Printf("recv get task reply: %v\n", reply)
 54 | 		if !ok || reply.Type == STOP {
 55 | 			break
 56 | 		}
 57 | 
 58 | 		// handle map fynction
 59 | 		switch reply.Type {
 60 | 		case MAP:
 61 | 			if len(reply.Filenames) < 1 {
 62 | 				log.Fatalf("don't have filename")
 63 | 			}
 64 | 			DoMAP(reply.Filenames[0], reply.Task_no, reply.NReduce, mapf)
 65 | 			// map complete, send msg to master
 66 | 			finish_args := FinishTaskArgs{
 67 | 				Type:    MAP,
 68 | 				Task_no: reply.Task_no,
 69 | 			}
 70 | 			finish_reply := FinishTaskReply{}
 71 | 			log.Printf("finish request: %v\n", finish_args)
 72 | 			CallFinishTask(&finish_args, &finish_reply)
 73 | 			log.Printf("recv finish reply: %v\n", finish_reply)
 74 | 			// time.Sleep(time.Second)
 75 | 		case REDUCE:
 76 | 			if len(reply.Filenames) < 1 {
 77 | 				log.Fatalf("don't have filenames")
 78 | 			}
 79 | 			DoReduce(reply.Filenames, reply.Task_no, reducef)
 80 | 			// reduce complete, send msg to master
 81 | 			finish_args := FinishTaskArgs{
 82 | 				Type:    REDUCE,
 83 | 				Task_no: reply.Task_no,
 84 | 			}
 85 | 			finish_reply := FinishTaskReply{}
 86 | 			log.Printf("finish request: %v\n", finish_args)
 87 | 			CallFinishTask(&finish_args, &finish_reply)
 88 | 			log.Printf("recv finish reply: %v\n", finish_reply)
 89 | 			// time.Sleep(time.Second)
 90 | 		case WAIT:
 91 | 			log.Printf("wait task\n")
 92 | 			time.Sleep(time.Second)
 93 | 		default:
 94 | 			time.Sleep(time.Second)
 95 | 		}
 96 | 	}
 97 | }
 98 | 
 99 | func DoMAP(filename string, task_no int, nReduce int, mapf func(string, string) []KeyValue) {
100 | 	file, err := os.Open(filename)
101 | 	if err != nil {
102 | 		log.Fatalf("cannot open %v", filename)
103 | 	}
104 | 	content, err := ioutil.ReadAll(file)
105 | 	if err != nil {
106 | 		log.Fatalf("cannot read %v", filename)
107 | 	}
108 | 	file.Close()
109 | 
110 | 	kva := mapf(filename, string(content))
111 | 
112 | 	sort.Sort(ByKey(kva))
113 | 
114 | 	log.Println("encode to json")
115 | 	files := make([]*os.File, nReduce)
116 | 	encoders := make([]*json.Encoder, nReduce)
117 | 	for i := 0; i < nReduce; i++ {
118 | 		ofile, err := ioutil.TempFile("", "mr-tmp*")
119 | 		if err != nil {
120 | 			log.Fatalf("cannot create temp file")
121 | 		}
122 | 		defer ofile.Close()
123 | 
124 | 		encoder := json.NewEncoder(ofile)
125 | 		encoders[i] = encoder
126 | 		files[i] = ofile
127 | 	}
128 | 
129 | 	var index int
130 | 	for _, kv := range kva {
131 | 		index = ihash(kv.Key) % nReduce
132 | 		err = encoders[index].Encode(&kv)
133 | 		if err != nil {
134 | 			log.Fatalf("cannot encode %v", kv)
135 | 		}
136 | 	}
137 | 
138 | 	// atomically rename
139 | 	for i := 0; i < nReduce; i++ {
140 | 		filename_tmp := fmt.Sprintf("mr-%d-%d", task_no, i)
141 | 		err := os.Rename(files[i].Name(), filename_tmp)
142 | 		if err != nil {
143 | 			log.Fatalf("cannot rename %v to %v", files[i].Name(), filename_tmp)
144 | 		}
145 | 	}
146 | }
147 | 
148 | func DoReduce(filenames []string, task_no int, reducef func(string, []string) string) {
149 | 	// read data from mid-file
150 | 	kva := make([]KeyValue, 0)
151 | 	for _, filename := range filenames {
152 | 		file, err := os.Open(filename)
153 | 		if err != nil {
154 | 			log.Fatalf("cannot open %v", filename)
155 | 		}
156 | 		defer file.Close()
157 | 		dec := json.NewDecoder(file)
158 | 		for {
159 | 			var kv KeyValue
160 | 			if err := dec.Decode(&kv); err != nil {
161 | 				break
162 | 			}
163 | 			kva = append(kva, kv)
164 | 		}
165 | 	}
166 | 
167 | 	sort.Sort(ByKey(kva))
168 | 
169 | 	// call Reduce on each distinct key in kva[],
170 | 	// and print the result to mr-out-0.
171 | 	ofile, err := ioutil.TempFile("", "mr-out-tmp*")
172 | 	if err != nil {
173 | 		log.Fatalf("cannot create temp file")
174 | 	}
175 | 	defer ofile.Close()
176 | 
177 | 	i := 0
178 | 	for i < len(kva) {
179 | 		j := i + 1
180 | 		for j < len(kva) && kva[j].Key == kva[i].Key {
181 | 			j++
182 | 		}
183 | 		values := []string{}
184 | 		for k := i; k < j; k++ {
185 | 			values = append(values, kva[k].Value)
186 | 		}
187 | 		output := reducef(kva[i].Key, values)
188 | 
189 | 		// this is the correct format for each line of Reduce output.
190 | 		fmt.Fprintf(ofile, "%v %v\n", kva[i].Key, output)
191 | 
192 | 		i = j
193 | 	}
194 | 
195 | 	output_filename := fmt.Sprintf("mr-out-%d", task_no)
196 | 	err = os.Rename(ofile.Name(), output_filename)
197 | 	if err != nil {
198 | 		log.Fatalf("cannot rename %v to %v", ofile.Name(), output_filename)
199 | 	}
200 | }
201 | 
202 | // rpc interface
203 | func CallGetTask(args *GetTaskArgs, reply *GetTaskReply) bool {
204 | 	// send the RPC request, wait for the reply.
205 | 	return call("Coordinator.GetTask", args, reply)
206 | }
207 | 
208 | func CallFinishTask(args *FinishTaskArgs, reply *FinishTaskReply) bool {
209 | 	return call("Coordinator.FinishTask", args, reply)
210 | }
211 | 
212 | //
213 | // send an RPC request to the coordinator, wait for the response.
214 | // usually returns true.
215 | // returns false if something goes wrong.
216 | //
217 | func call(rpcname string, args interface{}, reply interface{}) bool {
218 | 	// c, err := rpc.DialHTTP("tcp", "127.0.0.1"+":1234")
219 | 	sockname := coordinatorSock()
220 | 	c, err := rpc.DialHTTP("unix", sockname)
221 | 	if err != nil {
222 | 		log.Println("dialing: ", err)
223 | 		return false
224 | 	}
225 | 	defer c.Close()
226 | 
227 | 	err = c.Call(rpcname, args, reply)
228 | 	if err == nil {
229 | 		return true
230 | 	}
231 | 
232 | 	fmt.Println(err)
233 | 	return false
234 | }
235 | 


--------------------------------------------------------------------------------
/src/shardctrler/configModel.go:
--------------------------------------------------------------------------------
  1 | package shardctrler
  2 | 
  3 | import (
  4 | 	"sort"
  5 | )
  6 | 
  7 | //
  8 | // Shard controler: assigns shards to replication groups.
  9 | //
 10 | // RPC interface:
 11 | // Join(servers) -- add a set of groups (gid -> server-list mapping).
 12 | // Leave(gids) -- delete a set of groups.
 13 | // Move(shard, gid) -- hand off one shard from current owner to gid.
 14 | // Query(num) -> fetch Config # num, or latest config if num==-1.
 15 | //
 16 | // A Config (configuration) describes a set of replica groups, and the
 17 | // replica group responsible for each shard. Configs are numbered. Config
 18 | // #0 is the initial configuration, with no groups and all shards
 19 | // assigned to group 0 (the invalid group).
 20 | //
 21 | // You will need to add fields to the RPC argument structs.
 22 | //
 23 | 
 24 | // The number of shards.
 25 | const NShards = 10
 26 | 
 27 | // A configuration -- an assignment of shards to groups.
 28 | // Please don't change this.
 29 | type Config struct {
 30 | 	Num    int              // config number
 31 | 	Shards [NShards]int     // shard -> gid
 32 | 	Groups map[int][]string // gid -> servers[]
 33 | }
 34 | 
 35 | func (cfg *Config) DeepCopy() Config {
 36 | 	ret := Config{
 37 | 		Num:    cfg.Num,
 38 | 		Shards: [NShards]int{},
 39 | 		Groups: make(map[int][]string),
 40 | 	}
 41 | 
 42 | 	for k, v := range cfg.Groups {
 43 | 		ret.Groups[k] = v
 44 | 	}
 45 | 	for i := range cfg.Shards {
 46 | 		ret.Shards[i] = cfg.Shards[i]
 47 | 	}
 48 | 	return ret
 49 | }
 50 | 
 51 | // --------------------------------------------------------------------- //
 52 | 
 53 | const magicNullGid = 0
 54 | 
 55 | type ConfigModel struct {
 56 | 	configs []Config // indexed by config num
 57 | 	me      int      // for debug
 58 | }
 59 | 
 60 | func NewConfigModel(me int) *ConfigModel {
 61 | 	cfg := ConfigModel{make([]Config, 1), me}
 62 | 	cfg.configs[0] = Config{
 63 | 		Num:    0,
 64 | 		Shards: [NShards]int{},
 65 | 		Groups: make(map[int][]string),
 66 | 	}
 67 | 	for i := range cfg.configs[0].Shards {
 68 | 		cfg.configs[0].Shards[i] = magicNullGid
 69 | 	}
 70 | 	return &cfg
 71 | }
 72 | 
 73 | func (cm *ConfigModel) getGroup2Shards(config *Config) map[int][]int {
 74 | 	group2shard := map[int][]int{}
 75 | 	for gid, _ := range config.Groups {
 76 | 		group2shard[gid] = []int{}
 77 | 	}
 78 | 	group2shard[magicNullGid] = []int{}
 79 | 
 80 | 	for shard, gid := range config.Shards {
 81 | 		group2shard[gid] = append(group2shard[gid], shard)
 82 | 	}
 83 | 	return group2shard
 84 | }
 85 | 
 86 | func (cm *ConfigModel) getMinShards(group2shard map[int][]int) int {
 87 | 	var keys []int
 88 | 	for k := range group2shard {
 89 | 		keys = append(keys, k)
 90 | 	}
 91 | 	sort.Ints(keys)
 92 | 
 93 | 	gidRet, minn := -1, NShards+1
 94 | 	for _, gid := range keys {
 95 | 		if gid != magicNullGid && len(group2shard[gid]) < minn {
 96 | 			gidRet, minn = gid, len(group2shard[gid])
 97 | 		}
 98 | 	}
 99 | 	if gidRet == -1 {
100 | 		return magicNullGid
101 | 	}
102 | 	return gidRet
103 | }
104 | 
105 | func (cm *ConfigModel) getMaxShards(group2shard map[int][]int) int {
106 | 	if shards, ok := group2shard[magicNullGid]; ok && len(shards) > 0 {
107 | 		return magicNullGid
108 | 	}
109 | 
110 | 	var keys []int
111 | 	for k := range group2shard {
112 | 		keys = append(keys, k)
113 | 	}
114 | 	sort.Ints(keys)
115 | 
116 | 	gidRet, maxn := -1, -1
117 | 	for _, gid := range keys {
118 | 		if len(group2shard[gid]) > maxn {
119 | 			gidRet, maxn = gid, len(group2shard[gid])
120 | 		}
121 | 	}
122 | 	return gidRet
123 | }
124 | 
125 | func (cm *ConfigModel) reBalance(config *Config) {
126 | 	// special judge
127 | 	if len(config.Groups) == 0 { // if none group, init shards
128 | 		for i := range config.Shards {
129 | 			config.Shards[i] = 0
130 | 		}
131 | 		return
132 | 	}
133 | 
134 | 	// 1 shard - 1 group, 1 group - n shards
135 | 	group2shard := cm.getGroup2Shards(config)
136 | 	for {
137 | 		src := cm.getMaxShards(group2shard)
138 | 		dst := cm.getMinShards(group2shard)
139 | 		if src != magicNullGid && len(group2shard[src])-len(group2shard[dst]) <= 1 {
140 | 			break
141 | 		}
142 | 
143 | 		group2shard[dst] = append(group2shard[dst], group2shard[src][0])
144 | 		group2shard[src] = group2shard[src][1:]
145 | 	}
146 | 
147 | 	// reset shard
148 | 	for gid, shards := range group2shard {
149 | 		for _, shard := range shards {
150 | 			config.Shards[shard] = gid
151 | 		}
152 | 	}
153 | }
154 | 
155 | func (cm *ConfigModel) join(servers map[int][]string) Err {
156 | 	newConfig := cm.configs[len(cm.configs)-1].DeepCopy()
157 | 	newConfig.Num = len(cm.configs)
158 | 
159 | 	for gid, servers_iter := range servers {
160 | 		newServers := make([]string, len(servers_iter))
161 | 		copy(newServers, servers_iter)
162 | 		if _, ok := newConfig.Groups[gid]; !ok {
163 | 			newConfig.Groups[gid] = newServers
164 | 		} else {
165 | 			newConfig.Groups[gid] = append(newConfig.Groups[gid], newServers...)
166 | 		}
167 | 	}
168 | 
169 | 	cm.reBalance(&newConfig)
170 | 	cm.configs = append(cm.configs, newConfig)
171 | 	return OK
172 | }
173 | 
174 | func (cm *ConfigModel) leave(GIDs []int) Err {
175 | 	newConfig := cm.configs[len(cm.configs)-1].DeepCopy()
176 | 	newConfig.Num = len(cm.configs)
177 | 
178 | 	group2shard := cm.getGroup2Shards(&newConfig)
179 | 	for _, gid := range GIDs {
180 | 		if _, ok := newConfig.Groups[gid]; ok {
181 | 			delete(newConfig.Groups, gid)
182 | 		}
183 | 		if shards, ok := group2shard[gid]; ok {
184 | 			for _, shard := range shards {
185 | 				newConfig.Shards[shard] = magicNullGid
186 | 			}
187 | 		}
188 | 	}
189 | 
190 | 	cm.reBalance(&newConfig)
191 | 	cm.configs = append(cm.configs, newConfig)
192 | 	return OK
193 | }
194 | 
195 | func (cm *ConfigModel) move(shard int, gid int) Err {
196 | 	newConfig := cm.configs[len(cm.configs)-1].DeepCopy()
197 | 	newConfig.Num = len(cm.configs)
198 | 	newConfig.Shards[shard] = gid
199 | 	cm.configs = append(cm.configs, newConfig)
200 | 	return OK
201 | }
202 | 
203 | func (cm *ConfigModel) query(num int) (Config, Err) {
204 | 	if num < 0 || num >= len(cm.configs) {
205 | 		return cm.configs[len(cm.configs)-1].DeepCopy(), OK
206 | 	}
207 | 	return cm.configs[num].DeepCopy(), OK
208 | }
209 | 
210 | func (cm *ConfigModel) isLegal(opType OpType) bool {
211 | 	switch opType {
212 | 	case OpJoin:
213 | 	case OpLeave:
214 | 	case OpMove:
215 | 	case OpQuery:
216 | 	default:
217 | 		return false
218 | 	}
219 | 	return true
220 | }
221 | 
222 | func (cm *ConfigModel) Opt(cmd Op) (Config, Err) {
223 | 	switch cmd.Op {
224 | 	case OpJoin:
225 | 		err := cm.join(cmd.Servers)
226 | 		return Config{}, err
227 | 	case OpLeave:
228 | 		err := cm.leave(cmd.GIDs)
229 | 		return Config{}, err
230 | 	case OpMove:
231 | 		err := cm.move(cmd.Shard, cmd.GID)
232 | 		return Config{}, err
233 | 	case OpQuery:
234 | 		config, err := cm.query(cmd.Num)
235 | 		return config, err
236 | 	default:
237 | 		return Config{}, ErrOpt
238 | 	}
239 | }
240 | 


--------------------------------------------------------------------------------
/src/main/test-mr.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | #
  4 | # basic map-reduce test
  5 | #
  6 | 
  7 | #RACE=
  8 | 
  9 | # comment this to run the tests without the Go race detector.
 10 | RACE=-race
 11 | 
 12 | # run the test in a fresh sub-directory.
 13 | rm -rf mr-tmp
 14 | mkdir mr-tmp || exit 1
 15 | cd mr-tmp || exit 1
 16 | rm -f mr-*
 17 | 
 18 | # make sure software is freshly built.
 19 | (cd ../../mrapps && go build $RACE -buildmode=plugin wc.go) || exit 1
 20 | (cd ../../mrapps && go build $RACE -buildmode=plugin indexer.go) || exit 1
 21 | (cd ../../mrapps && go build $RACE -buildmode=plugin mtiming.go) || exit 1
 22 | (cd ../../mrapps && go build $RACE -buildmode=plugin rtiming.go) || exit 1
 23 | (cd ../../mrapps && go build $RACE -buildmode=plugin jobcount.go) || exit 1
 24 | (cd ../../mrapps && go build $RACE -buildmode=plugin early_exit.go) || exit 1
 25 | (cd ../../mrapps && go build $RACE -buildmode=plugin crash.go) || exit 1
 26 | (cd ../../mrapps && go build $RACE -buildmode=plugin nocrash.go) || exit 1
 27 | (cd .. && go build $RACE mrcoordinator.go) || exit 1
 28 | (cd .. && go build $RACE mrworker.go) || exit 1
 29 | (cd .. && go build $RACE mrsequential.go) || exit 1
 30 | 
 31 | failed_any=0
 32 | 
 33 | #########################################################
 34 | # first word-count
 35 | 
 36 | # generate the correct output
 37 | ../mrsequential ../../mrapps/wc.so ../pg*txt || exit 1
 38 | sort mr-out-0 > mr-correct-wc.txt
 39 | rm -f mr-out*
 40 | 
 41 | echo '***' Starting wc test.
 42 | 
 43 | timeout -k 2s 180s ../mrcoordinator ../pg*txt &
 44 | pid=$!
 45 | 
 46 | # give the coordinator time to create the sockets.
 47 | sleep 1
 48 | 
 49 | # start multiple workers.
 50 | timeout -k 2s 180s ../mrworker ../../mrapps/wc.so &
 51 | timeout -k 2s 180s ../mrworker ../../mrapps/wc.so &
 52 | timeout -k 2s 180s ../mrworker ../../mrapps/wc.so &
 53 | 
 54 | # wait for the coordinator to exit.
 55 | wait $pid
 56 | 
 57 | # since workers are required to exit when a job is completely finished,
 58 | # and not before, that means the job has finished.
 59 | sort mr-out* | grep . > mr-wc-all
 60 | if cmp mr-wc-all mr-correct-wc.txt
 61 | then
 62 |   echo '---' wc test: PASS
 63 | else
 64 |   echo '---' wc output is not the same as mr-correct-wc.txt
 65 |   echo '---' wc test: FAIL
 66 |   failed_any=1
 67 | fi
 68 | 
 69 | # wait for remaining workers and coordinator to exit.
 70 | wait
 71 | 
 72 | #########################################################
 73 | # now indexer
 74 | rm -f mr-*
 75 | 
 76 | # generate the correct output
 77 | ../mrsequential ../../mrapps/indexer.so ../pg*txt || exit 1
 78 | sort mr-out-0 > mr-correct-indexer.txt
 79 | rm -f mr-out*
 80 | 
 81 | echo '***' Starting indexer test.
 82 | 
 83 | timeout -k 2s 180s ../mrcoordinator ../pg*txt &
 84 | sleep 1
 85 | 
 86 | # start multiple workers
 87 | timeout -k 2s 180s ../mrworker ../../mrapps/indexer.so &
 88 | timeout -k 2s 180s ../mrworker ../../mrapps/indexer.so
 89 | 
 90 | sort mr-out* | grep . > mr-indexer-all
 91 | if cmp mr-indexer-all mr-correct-indexer.txt
 92 | then
 93 |   echo '---' indexer test: PASS
 94 | else
 95 |   echo '---' indexer output is not the same as mr-correct-indexer.txt
 96 |   echo '---' indexer test: FAIL
 97 |   failed_any=1
 98 | fi
 99 | 
100 | wait
101 | 
102 | #########################################################
103 | echo '***' Starting map parallelism test.
104 | 
105 | rm -f mr-*
106 | 
107 | timeout -k 2s 180s ../mrcoordinator ../pg*txt &
108 | sleep 1
109 | 
110 | timeout -k 2s 180s ../mrworker ../../mrapps/mtiming.so &
111 | timeout -k 2s 180s ../mrworker ../../mrapps/mtiming.so
112 | 
113 | NT=`cat mr-out* | grep '^times-' | wc -l | sed 's/ //g'`
114 | if [ "$NT" != "2" ]
115 | then
116 |   echo '---' saw "$NT" workers rather than 2
117 |   echo '---' map parallelism test: FAIL
118 |   failed_any=1
119 | fi
120 | 
121 | if cat mr-out* | grep '^parallel.* 2' > /dev/null
122 | then
123 |   echo '---' map parallelism test: PASS
124 | else
125 |   echo '---' map workers did not run in parallel
126 |   echo '---' map parallelism test: FAIL
127 |   failed_any=1
128 | fi
129 | 
130 | wait
131 | 
132 | 
133 | #########################################################
134 | echo '***' Starting reduce parallelism test.
135 | 
136 | rm -f mr-*
137 | 
138 | timeout -k 2s 180s ../mrcoordinator ../pg*txt &
139 | sleep 1
140 | 
141 | timeout -k 2s 180s ../mrworker ../../mrapps/rtiming.so &
142 | timeout -k 2s 180s ../mrworker ../../mrapps/rtiming.so
143 | 
144 | NT=`cat mr-out* | grep '^[a-z] 2' | wc -l | sed 's/ //g'`
145 | if [ "$NT" -lt "2" ]
146 | then
147 |   echo '---' too few parallel reduces.
148 |   echo '---' reduce parallelism test: FAIL
149 |   failed_any=1
150 | else
151 |   echo '---' reduce parallelism test: PASS
152 | fi
153 | 
154 | wait
155 | 
156 | #########################################################
157 | echo '***' Starting job count test.
158 | 
159 | rm -f mr-*
160 | 
161 | timeout -k 2s 180s ../mrcoordinator ../pg*txt &
162 | sleep 1
163 | 
164 | timeout -k 2s 180s ../mrworker ../../mrapps/jobcount.so &
165 | timeout -k 2s 180s ../mrworker ../../mrapps/jobcount.so
166 | timeout -k 2s 180s ../mrworker ../../mrapps/jobcount.so &
167 | timeout -k 2s 180s ../mrworker ../../mrapps/jobcount.so
168 | 
169 | NT=`cat mr-out* | awk '{print $2}'`
170 | if [ "$NT" -ne "8" ]
171 | then
172 |   echo '---' map jobs ran incorrect number of times "($NT != 8)"
173 |   echo '---' job count test: FAIL
174 |   failed_any=1
175 | else
176 |   echo '---' job count test: PASS
177 | fi
178 | 
179 | wait
180 | 
181 | #########################################################
182 | # test whether any worker or coordinator exits before the
183 | # task has completed (i.e., all output files have been finalized)
184 | rm -f mr-*
185 | 
186 | echo '***' Starting early exit test.
187 | 
188 | timeout -k 2s 180s ../mrcoordinator ../pg*txt &
189 | 
190 | # give the coordinator time to create the sockets.
191 | sleep 1
192 | 
193 | # start multiple workers.
194 | timeout -k 2s 180s ../mrworker ../../mrapps/early_exit.so &
195 | timeout -k 2s 180s ../mrworker ../../mrapps/early_exit.so &
196 | timeout -k 2s 180s ../mrworker ../../mrapps/early_exit.so &
197 | 
198 | # wait for any of the coord or workers to exit
199 | # `jobs` ensures that any completed old processes from other tests
200 | # are not waited upon
201 | jobs &> /dev/null
202 | wait -n
203 | 
204 | # a process has exited. this means that the output should be finalized
205 | # otherwise, either a worker or the coordinator exited early
206 | sort mr-out* | grep . > mr-wc-all-initial
207 | 
208 | # wait for remaining workers and coordinator to exit.
209 | wait
210 | 
211 | # compare initial and final outputs
212 | sort mr-out* | grep . > mr-wc-all-final
213 | if cmp mr-wc-all-final mr-wc-all-initial
214 | then
215 |   echo '---' early exit test: PASS
216 | else
217 |   echo '---' output changed after first worker exited
218 |   echo '---' early exit test: FAIL
219 |   failed_any=1
220 | fi
221 | rm -f mr-*
222 | 
223 | #########################################################
224 | echo '***' Starting crash test.
225 | 
226 | # generate the correct output
227 | ../mrsequential ../../mrapps/nocrash.so ../pg*txt || exit 1
228 | sort mr-out-0 > mr-correct-crash.txt
229 | rm -f mr-out*
230 | 
231 | rm -f mr-done
232 | (timeout -k 2s 180s ../mrcoordinator ../pg*txt ; touch mr-done ) &
233 | sleep 1
234 | 
235 | # start multiple workers
236 | timeout -k 2s 180s ../mrworker ../../mrapps/crash.so &
237 | 
238 | # mimic rpc.go's coordinatorSock()
239 | SOCKNAME=/var/tmp/824-mr-`id -u`
240 | 
241 | ( while [ -e $SOCKNAME -a ! -f mr-done ]
242 |   do
243 |     timeout -k 2s 180s ../mrworker ../../mrapps/crash.so
244 |     sleep 1
245 |   done ) &
246 | 
247 | ( while [ -e $SOCKNAME -a ! -f mr-done ]
248 |   do
249 |     timeout -k 2s 180s ../mrworker ../../mrapps/crash.so
250 |     sleep 1
251 |   done ) &
252 | 
253 | while [ -e $SOCKNAME -a ! -f mr-done ]
254 | do
255 |   timeout -k 2s 180s ../mrworker ../../mrapps/crash.so
256 |   sleep 1
257 | done
258 | 
259 | wait
260 | 
261 | rm $SOCKNAME
262 | sort mr-out* | grep . > mr-crash-all
263 | if cmp mr-crash-all mr-correct-crash.txt
264 | then
265 |   echo '---' crash test: PASS
266 | else
267 |   echo '---' crash output is not the same as mr-correct-crash.txt
268 |   echo '---' crash test: FAIL
269 |   failed_any=1
270 | fi
271 | 
272 | #########################################################
273 | if [ $failed_any -eq 0 ]; then
274 |     echo '***' PASSED ALL TESTS
275 | else
276 |     echo '***' FAILED SOME TESTS
277 |     exit 1
278 | fi
279 | 


--------------------------------------------------------------------------------
/src/utils/dstest:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import itertools
  4 | import math
  5 | import signal
  6 | import subprocess
  7 | import tempfile
  8 | import shutil
  9 | import time
 10 | import os
 11 | import sys
 12 | import datetime
 13 | from collections import defaultdict
 14 | from concurrent.futures import ThreadPoolExecutor, wait, FIRST_COMPLETED
 15 | from dataclasses import dataclass
 16 | from pathlib import Path
 17 | from typing import List, Optional, Dict, DefaultDict, Tuple
 18 | 
 19 | import typer
 20 | import rich
 21 | from rich import print
 22 | from rich.table import Table
 23 | from rich.progress import (
 24 |     Progress,
 25 |     TimeElapsedColumn,
 26 |     TimeRemainingColumn,
 27 |     TextColumn,
 28 |     BarColumn,
 29 |     SpinnerColumn,
 30 | )
 31 | from rich.live import Live
 32 | from rich.panel import Panel
 33 | from rich.traceback import install
 34 | 
 35 | install(show_locals=True)
 36 | 
 37 | 
 38 | @dataclass
 39 | class StatsMeter:
 40 |     """
 41 |     Auxiliary classs to keep track of online stats including: count, mean, variance
 42 |     Uses Welford's algorithm to compute sample mean and sample variance incrementally.
 43 |     https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm
 44 |     """
 45 | 
 46 |     n: int = 0
 47 |     mean: float = 0.0
 48 |     S: float = 0.0
 49 | 
 50 |     def add(self, datum):
 51 |         self.n += 1
 52 |         delta = datum - self.mean
 53 |         # Mk = Mk-1+ (xk – Mk-1)/k
 54 |         self.mean += delta / self.n
 55 |         # Sk = Sk-1 + (xk – Mk-1)*(xk – Mk).
 56 |         self.S += delta * (datum - self.mean)
 57 | 
 58 |     @property
 59 |     def variance(self):
 60 |         return self.S / self.n
 61 | 
 62 |     @property
 63 |     def std(self):
 64 |         return math.sqrt(self.variance)
 65 | 
 66 | 
 67 | def print_results(results: Dict[str, Dict[str, StatsMeter]], timing=False):
 68 |     table = Table(show_header=True, header_style="bold")
 69 |     table.add_column("Test")
 70 |     table.add_column("Failed", justify="right")
 71 |     table.add_column("Total", justify="right")
 72 |     if not timing:
 73 |         table.add_column("Time", justify="right")
 74 |     else:
 75 |         table.add_column("Real Time", justify="right")
 76 |         table.add_column("User Time", justify="right")
 77 |         table.add_column("System Time", justify="right")
 78 | 
 79 |     for test, stats in results.items():
 80 |         if stats["completed"].n == 0:
 81 |             continue
 82 |         color = "green" if stats["failed"].n == 0 else "red"
 83 |         row = [
 84 |             f"[{color}]{test}[/{color}]",
 85 |             str(stats["failed"].n),
 86 |             str(stats["completed"].n),
 87 |         ]
 88 |         if not timing:
 89 |             row.append(f"{stats['time'].mean:.2f} ± {stats['time'].std:.2f}")
 90 |         else:
 91 |             row.extend(
 92 |                 [
 93 |                     f"{stats['real_time'].mean:.2f} ± {stats['real_time'].std:.2f}",
 94 |                     f"{stats['user_time'].mean:.2f} ± {stats['user_time'].std:.2f}",
 95 |                     f"{stats['system_time'].mean:.2f} ± {stats['system_time'].std:.2f}",
 96 |                 ]
 97 |             )
 98 |         table.add_row(*row)
 99 | 
100 |     print(table)
101 | 
102 | 
103 | def run_test(test: str, race: bool, timing: bool):
104 |     test_cmd = ["go", "test", f"-run={test}"]
105 |     if race:
106 |         test_cmd.append("-race")
107 |     if timing:
108 |         test_cmd = ["time"] + cmd
109 |     f, path = tempfile.mkstemp()
110 |     start = time.time()
111 |     proc = subprocess.run(test_cmd, stdout=f, stderr=f)
112 |     runtime = time.time() - start
113 |     os.close(f)
114 |     return test, path, proc.returncode, runtime
115 | 
116 | 
117 | def last_line(file: str) -> str:
118 |     with open(file, "rb") as f:
119 |         f.seek(-2, os.SEEK_END)
120 |         while f.read(1) != b"\n":
121 |             f.seek(-2, os.SEEK_CUR)
122 |         line = f.readline().decode()
123 |     return line
124 | 
125 | 
126 | # fmt: off
127 | def run_tests(
128 |     tests: List[str],
129 |     sequential: bool       = typer.Option(False,  '--sequential',      '-s',    help='Run all test of each group in order'),
130 |     workers: int           = typer.Option(1,      '--workers',         '-p',    help='Number of parallel tasks'),
131 |     iterations: int        = typer.Option(10,     '--iter',            '-n',    help='Number of iterations to run'),
132 |     output: Optional[Path] = typer.Option(None,   '--output',          '-o',    help='Output path to use'),
133 |     verbose: int           = typer.Option(0,      '--verbose',         '-v',    help='Verbosity level', count=True),
134 |     archive: bool          = typer.Option(False,  '--archive',         '-a',    help='Save all logs intead of only failed ones'),
135 |     race: bool             = typer.Option(False,  '--race/--no-race',  '-r/-R', help='Run with race checker'),
136 |     loop: bool             = typer.Option(False,  '--loop',            '-l',    help='Run continuously'),
137 |     growth: int            = typer.Option(10,     '--growth',          '-g',    help='Growth ratio of iterations when using --loop'),
138 |     timing: bool           = typer.Option(False,   '--timing',          '-t',    help='Report timing, only works on macOS'),
139 |     # fmt: on
140 | ):
141 | 
142 |     if output is None:
143 |         timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
144 |         output = Path(timestamp)
145 | 
146 |     if race:
147 |         print("[yellow]Running with the race detector\n[/yellow]")
148 | 
149 |     if verbose > 0:
150 |         print(f"[yellow] Verbosity level set to {verbose}[/yellow]")
151 |         os.environ['VERBOSE'] = str(verbose)
152 | 
153 |     while True:
154 | 
155 |         total = iterations * len(tests)
156 |         completed = 0
157 | 
158 |         results = {test: defaultdict(StatsMeter) for test in tests}
159 | 
160 |         if sequential:
161 |             test_instances = itertools.chain.from_iterable(itertools.repeat(test, iterations) for test in tests)
162 |         else:
163 |             test_instances = itertools.chain.from_iterable(itertools.repeat(tests, iterations))
164 |         test_instances = iter(test_instances)
165 | 
166 |         total_progress = Progress(
167 |             "[progress.description]{task.description}",
168 |             BarColumn(),
169 |             TimeRemainingColumn(),
170 |             "[progress.percentage]{task.percentage:>3.0f}%",
171 |             TimeElapsedColumn(),
172 |         )
173 |         total_task = total_progress.add_task("[yellow]Tests[/yellow]", total=total)
174 | 
175 |         task_progress = Progress(
176 |             "[progress.description]{task.description}",
177 |             SpinnerColumn(),
178 |             BarColumn(),
179 |             "{task.completed}/{task.total}",
180 |         )
181 |         tasks = {test: task_progress.add_task(test, total=iterations) for test in tests}
182 | 
183 |         progress_table = Table.grid()
184 |         progress_table.add_row(total_progress)
185 |         progress_table.add_row(Panel.fit(task_progress))
186 | 
187 |         with Live(progress_table, transient=True) as live:
188 | 
189 |             def handler(_, frame):
190 |                 live.stop()
191 |                 print('\n')
192 |                 print_results(results)
193 |                 sys.exit(1)
194 | 
195 |             signal.signal(signal.SIGINT, handler)
196 | 
197 |             with ThreadPoolExecutor(max_workers=workers) as executor:
198 | 
199 |                 futures = []
200 |                 while completed < total:
201 |                     n = len(futures)
202 |                     if n < workers:
203 |                         for test in itertools.islice(test_instances, workers-n):
204 |                             futures.append(executor.submit(run_test, test, race, timing))
205 | 
206 |                     done, not_done = wait(futures, return_when=FIRST_COMPLETED)
207 | 
208 |                     for future in done:
209 |                         test, path, rc, runtime = future.result()
210 | 
211 |                         results[test]['completed'].add(1)
212 |                         results[test]['time'].add(runtime)
213 |                         task_progress.update(tasks[test], advance=1)
214 |                         dest = (output / f"{test}_{completed}.log").as_posix()
215 |                         if rc != 0:
216 |                             print(f"Failed test {test} - {dest}")
217 |                             task_progress.update(tasks[test], description=f"[red]{test}[/red]")
218 |                             results[test]['failed'].add(1)
219 |                         else:
220 |                             if results[test]['completed'].n == iterations and results[test]['failed'].n == 0:
221 |                                 task_progress.update(tasks[test], description=f"[green]{test}[/green]")
222 | 
223 |                         if rc != 0 or archive:
224 |                             output.mkdir(exist_ok=True, parents=True)
225 |                             shutil.copy(path, dest)
226 |  
227 |                         if timing:
228 |                             line = last_line(path)
229 |                             real, _, user, _, system, _ = line.replace(' '*8, '').split(' ')
230 |                             results[test]['real_time'].add(float(real))
231 |                             results[test]['user_time'].add(float(user))
232 |                             results[test]['system_time'].add(float(system))
233 | 
234 |                         os.remove(path)
235 | 
236 |                         completed += 1
237 |                         total_progress.update(total_task, advance=1)
238 | 
239 |                         futures = list(not_done)
240 | 
241 |         print_results(results, timing)
242 | 
243 |         if loop:
244 |             iterations *= growth
245 |             print(f"[yellow]Increasing iterations to {iterations}[/yellow]")
246 |         else:
247 |             break
248 | 
249 | 
250 | if __name__ == "__main__":
251 |     typer.run(run_tests)


--------------------------------------------------------------------------------
/src/shardkv/dstest:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import itertools
  4 | import math
  5 | import signal
  6 | import subprocess
  7 | import tempfile
  8 | import shutil
  9 | import time
 10 | import os
 11 | import sys
 12 | import datetime
 13 | from collections import defaultdict
 14 | from concurrent.futures import ThreadPoolExecutor, wait, FIRST_COMPLETED
 15 | from dataclasses import dataclass
 16 | from pathlib import Path
 17 | from typing import List, Optional, Dict, DefaultDict, Tuple
 18 | 
 19 | import typer
 20 | import rich
 21 | from rich import print
 22 | from rich.table import Table
 23 | from rich.progress import (
 24 |     Progress,
 25 |     TimeElapsedColumn,
 26 |     TimeRemainingColumn,
 27 |     TextColumn,
 28 |     BarColumn,
 29 |     SpinnerColumn,
 30 | )
 31 | from rich.live import Live
 32 | from rich.panel import Panel
 33 | from rich.traceback import install
 34 | 
 35 | install(show_locals=True)
 36 | 
 37 | 
 38 | @dataclass
 39 | class StatsMeter:
 40 |     """
 41 |     Auxiliary classs to keep track of online stats including: count, mean, variance
 42 |     Uses Welford's algorithm to compute sample mean and sample variance incrementally.
 43 |     https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm
 44 |     """
 45 | 
 46 |     n: int = 0
 47 |     mean: float = 0.0
 48 |     S: float = 0.0
 49 | 
 50 |     def add(self, datum):
 51 |         self.n += 1
 52 |         delta = datum - self.mean
 53 |         # Mk = Mk-1+ (xk – Mk-1)/k
 54 |         self.mean += delta / self.n
 55 |         # Sk = Sk-1 + (xk – Mk-1)*(xk – Mk).
 56 |         self.S += delta * (datum - self.mean)
 57 | 
 58 |     @property
 59 |     def variance(self):
 60 |         return self.S / self.n
 61 | 
 62 |     @property
 63 |     def std(self):
 64 |         return math.sqrt(self.variance)
 65 | 
 66 | 
 67 | def print_results(results: Dict[str, Dict[str, StatsMeter]], timing=False):
 68 |     table = Table(show_header=True, header_style="bold")
 69 |     table.add_column("Test")
 70 |     table.add_column("Failed", justify="right")
 71 |     table.add_column("Total", justify="right")
 72 |     if not timing:
 73 |         table.add_column("Time", justify="right")
 74 |     else:
 75 |         table.add_column("Real Time", justify="right")
 76 |         table.add_column("User Time", justify="right")
 77 |         table.add_column("System Time", justify="right")
 78 | 
 79 |     for test, stats in results.items():
 80 |         if stats["completed"].n == 0:
 81 |             continue
 82 |         color = "green" if stats["failed"].n == 0 else "red"
 83 |         row = [
 84 |             f"[{color}]{test}[/{color}]",
 85 |             str(stats["failed"].n),
 86 |             str(stats["completed"].n),
 87 |         ]
 88 |         if not timing:
 89 |             row.append(f"{stats['time'].mean:.2f} ± {stats['time'].std:.2f}")
 90 |         else:
 91 |             row.extend(
 92 |                 [
 93 |                     f"{stats['real_time'].mean:.2f} ± {stats['real_time'].std:.2f}",
 94 |                     f"{stats['user_time'].mean:.2f} ± {stats['user_time'].std:.2f}",
 95 |                     f"{stats['system_time'].mean:.2f} ± {stats['system_time'].std:.2f}",
 96 |                 ]
 97 |             )
 98 |         table.add_row(*row)
 99 | 
100 |     print(table)
101 | 
102 | 
103 | def run_test(test: str, race: bool, timing: bool):
104 |     test_cmd = ["go", "test", f"-run={test}"]
105 |     if race:
106 |         test_cmd.append("-race")
107 |     if timing:
108 |         test_cmd = ["time"] + cmd
109 |     f, path = tempfile.mkstemp()
110 |     start = time.time()
111 |     proc = subprocess.run(test_cmd, stdout=f, stderr=f)
112 |     runtime = time.time() - start
113 |     os.close(f)
114 |     return test, path, proc.returncode, runtime
115 | 
116 | 
117 | def last_line(file: str) -> str:
118 |     with open(file, "rb") as f:
119 |         f.seek(-2, os.SEEK_END)
120 |         while f.read(1) != b"\n":
121 |             f.seek(-2, os.SEEK_CUR)
122 |         line = f.readline().decode()
123 |     return line
124 | 
125 | 
126 | # fmt: off
127 | def run_tests(
128 |     tests: List[str],
129 |     sequential: bool       = typer.Option(False,  '--sequential',      '-s',    help='Run all test of each group in order'),
130 |     workers: int           = typer.Option(1,      '--workers',         '-p',    help='Number of parallel tasks'),
131 |     iterations: int        = typer.Option(10,     '--iter',            '-n',    help='Number of iterations to run'),
132 |     output: Optional[Path] = typer.Option(None,   '--output',          '-o',    help='Output path to use'),
133 |     verbose: int           = typer.Option(0,      '--verbose',         '-v',    help='Verbosity level', count=True),
134 |     archive: bool          = typer.Option(False,  '--archive',         '-a',    help='Save all logs intead of only failed ones'),
135 |     race: bool             = typer.Option(False,  '--race/--no-race',  '-r/-R', help='Run with race checker'),
136 |     loop: bool             = typer.Option(False,  '--loop',            '-l',    help='Run continuously'),
137 |     growth: int            = typer.Option(10,     '--growth',          '-g',    help='Growth ratio of iterations when using --loop'),
138 |     timing: bool           = typer.Option(False,   '--timing',          '-t',    help='Report timing, only works on macOS'),
139 |     # fmt: on
140 | ):
141 | 
142 |     if output is None:
143 |         timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
144 |         output = Path(timestamp)
145 | 
146 |     if race:
147 |         print("[yellow]Running with the race detector\n[/yellow]")
148 | 
149 |     if verbose > 0:
150 |         print(f"[yellow] Verbosity level set to {verbose}[/yellow]")
151 |         os.environ['VERBOSE'] = str(verbose)
152 | 
153 |     while True:
154 | 
155 |         total = iterations * len(tests)
156 |         completed = 0
157 | 
158 |         results = {test: defaultdict(StatsMeter) for test in tests}
159 | 
160 |         if sequential:
161 |             test_instances = itertools.chain.from_iterable(itertools.repeat(test, iterations) for test in tests)
162 |         else:
163 |             test_instances = itertools.chain.from_iterable(itertools.repeat(tests, iterations))
164 |         test_instances = iter(test_instances)
165 | 
166 |         total_progress = Progress(
167 |             "[progress.description]{task.description}",
168 |             BarColumn(),
169 |             TimeRemainingColumn(),
170 |             "[progress.percentage]{task.percentage:>3.0f}%",
171 |             TimeElapsedColumn(),
172 |         )
173 |         total_task = total_progress.add_task("[yellow]Tests[/yellow]", total=total)
174 | 
175 |         task_progress = Progress(
176 |             "[progress.description]{task.description}",
177 |             SpinnerColumn(),
178 |             BarColumn(),
179 |             "{task.completed}/{task.total}",
180 |         )
181 |         tasks = {test: task_progress.add_task(test, total=iterations) for test in tests}
182 | 
183 |         progress_table = Table.grid()
184 |         progress_table.add_row(total_progress)
185 |         progress_table.add_row(Panel.fit(task_progress))
186 | 
187 |         with Live(progress_table, transient=True) as live:
188 | 
189 |             def handler(_, frame):
190 |                 live.stop()
191 |                 print('\n')
192 |                 print_results(results)
193 |                 sys.exit(1)
194 | 
195 |             signal.signal(signal.SIGINT, handler)
196 | 
197 |             with ThreadPoolExecutor(max_workers=workers) as executor:
198 | 
199 |                 futures = []
200 |                 while completed < total:
201 |                     n = len(futures)
202 |                     if n < workers:
203 |                         for test in itertools.islice(test_instances, workers-n):
204 |                             futures.append(executor.submit(run_test, test, race, timing))
205 | 
206 |                     done, not_done = wait(futures, return_when=FIRST_COMPLETED)
207 | 
208 |                     for future in done:
209 |                         test, path, rc, runtime = future.result()
210 | 
211 |                         results[test]['completed'].add(1)
212 |                         results[test]['time'].add(runtime)
213 |                         task_progress.update(tasks[test], advance=1)
214 |                         dest = (output / f"{test}_{completed}.log").as_posix()
215 |                         if rc != 0:
216 |                             print(f"Failed test {test} - {dest}")
217 |                             task_progress.update(tasks[test], description=f"[red]{test}[/red]")
218 |                             results[test]['failed'].add(1)
219 |                         else:
220 |                             if results[test]['completed'].n == iterations and results[test]['failed'].n == 0:
221 |                                 task_progress.update(tasks[test], description=f"[green]{test}[/green]")
222 | 
223 |                         if rc != 0 or archive:
224 |                             output.mkdir(exist_ok=True, parents=True)
225 |                             shutil.copy(path, dest)
226 |  
227 |                         if timing:
228 |                             line = last_line(path)
229 |                             real, _, user, _, system, _ = line.replace(' '*8, '').split(' ')
230 |                             results[test]['real_time'].add(float(real))
231 |                             results[test]['user_time'].add(float(user))
232 |                             results[test]['system_time'].add(float(system))
233 | 
234 |                         os.remove(path)
235 | 
236 |                         completed += 1
237 |                         total_progress.update(total_task, advance=1)
238 | 
239 |                         futures = list(not_done)
240 | 
241 |         print_results(results, timing)
242 | 
243 |         if loop:
244 |             iterations *= growth
245 |             print(f"[yellow]Increasing iterations to {iterations}[/yellow]")
246 |         else:
247 |             break
248 | 
249 | 
250 | if __name__ == "__main__":
251 |     typer.run(run_tests)


--------------------------------------------------------------------------------
/src/shardctrler/config.go:
--------------------------------------------------------------------------------
  1 | package shardctrler
  2 | 
  3 | import "6.824/labrpc"
  4 | import "6.824/raft"
  5 | import "testing"
  6 | import "os"
  7 | 
  8 | // import "log"
  9 | import crand "crypto/rand"
 10 | import "math/rand"
 11 | import "encoding/base64"
 12 | import "sync"
 13 | import "runtime"
 14 | import "time"
 15 | 
 16 | func randstring(n int) string {
 17 | 	b := make([]byte, 2*n)
 18 | 	crand.Read(b)
 19 | 	s := base64.URLEncoding.EncodeToString(b)
 20 | 	return s[0:n]
 21 | }
 22 | 
 23 | // Randomize server handles
 24 | func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd {
 25 | 	sa := make([]*labrpc.ClientEnd, len(kvh))
 26 | 	copy(sa, kvh)
 27 | 	for i := range sa {
 28 | 		j := rand.Intn(i + 1)
 29 | 		sa[i], sa[j] = sa[j], sa[i]
 30 | 	}
 31 | 	return sa
 32 | }
 33 | 
 34 | type config struct {
 35 | 	mu           sync.Mutex
 36 | 	t            *testing.T
 37 | 	net          *labrpc.Network
 38 | 	n            int
 39 | 	servers      []*ShardCtrler
 40 | 	saved        []*raft.Persister
 41 | 	endnames     [][]string // names of each server's sending ClientEnds
 42 | 	clerks       map[*Clerk][]string
 43 | 	nextClientId int
 44 | 	start        time.Time // time at which make_config() was called
 45 | }
 46 | 
 47 | func (cfg *config) checkTimeout() {
 48 | 	// enforce a two minute real-time limit on each test
 49 | 	if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second {
 50 | 		cfg.t.Fatal("test took longer than 120 seconds")
 51 | 	}
 52 | }
 53 | 
 54 | func (cfg *config) cleanup() {
 55 | 	cfg.mu.Lock()
 56 | 	defer cfg.mu.Unlock()
 57 | 	for i := 0; i < len(cfg.servers); i++ {
 58 | 		if cfg.servers[i] != nil {
 59 | 			cfg.servers[i].Kill()
 60 | 		}
 61 | 	}
 62 | 	cfg.net.Cleanup()
 63 | 	cfg.checkTimeout()
 64 | }
 65 | 
 66 | // Maximum log size across all servers
 67 | func (cfg *config) LogSize() int {
 68 | 	logsize := 0
 69 | 	for i := 0; i < cfg.n; i++ {
 70 | 		n := cfg.saved[i].RaftStateSize()
 71 | 		if n > logsize {
 72 | 			logsize = n
 73 | 		}
 74 | 	}
 75 | 	return logsize
 76 | }
 77 | 
 78 | // attach server i to servers listed in to
 79 | // caller must hold cfg.mu
 80 | func (cfg *config) connectUnlocked(i int, to []int) {
 81 | 	// log.Printf("connect peer %d to %v\n", i, to)
 82 | 
 83 | 	// outgoing socket files
 84 | 	for j := 0; j < len(to); j++ {
 85 | 		endname := cfg.endnames[i][to[j]]
 86 | 		cfg.net.Enable(endname, true)
 87 | 	}
 88 | 
 89 | 	// incoming socket files
 90 | 	for j := 0; j < len(to); j++ {
 91 | 		endname := cfg.endnames[to[j]][i]
 92 | 		cfg.net.Enable(endname, true)
 93 | 	}
 94 | }
 95 | 
 96 | func (cfg *config) connect(i int, to []int) {
 97 | 	cfg.mu.Lock()
 98 | 	defer cfg.mu.Unlock()
 99 | 	cfg.connectUnlocked(i, to)
100 | }
101 | 
102 | // detach server i from the servers listed in from
103 | // caller must hold cfg.mu
104 | func (cfg *config) disconnectUnlocked(i int, from []int) {
105 | 	// log.Printf("disconnect peer %d from %v\n", i, from)
106 | 
107 | 	// outgoing socket files
108 | 	for j := 0; j < len(from); j++ {
109 | 		if cfg.endnames[i] != nil {
110 | 			endname := cfg.endnames[i][from[j]]
111 | 			cfg.net.Enable(endname, false)
112 | 		}
113 | 	}
114 | 
115 | 	// incoming socket files
116 | 	for j := 0; j < len(from); j++ {
117 | 		if cfg.endnames[j] != nil {
118 | 			endname := cfg.endnames[from[j]][i]
119 | 			cfg.net.Enable(endname, false)
120 | 		}
121 | 	}
122 | }
123 | 
124 | func (cfg *config) disconnect(i int, from []int) {
125 | 	cfg.mu.Lock()
126 | 	defer cfg.mu.Unlock()
127 | 	cfg.disconnectUnlocked(i, from)
128 | }
129 | 
130 | func (cfg *config) All() []int {
131 | 	all := make([]int, cfg.n)
132 | 	for i := 0; i < cfg.n; i++ {
133 | 		all[i] = i
134 | 	}
135 | 	return all
136 | }
137 | 
138 | func (cfg *config) ConnectAll() {
139 | 	cfg.mu.Lock()
140 | 	defer cfg.mu.Unlock()
141 | 	for i := 0; i < cfg.n; i++ {
142 | 		cfg.connectUnlocked(i, cfg.All())
143 | 	}
144 | }
145 | 
146 | // Sets up 2 partitions with connectivity between servers in each  partition.
147 | func (cfg *config) partition(p1 []int, p2 []int) {
148 | 	cfg.mu.Lock()
149 | 	defer cfg.mu.Unlock()
150 | 	// log.Printf("partition servers into: %v %v\n", p1, p2)
151 | 	for i := 0; i < len(p1); i++ {
152 | 		cfg.disconnectUnlocked(p1[i], p2)
153 | 		cfg.connectUnlocked(p1[i], p1)
154 | 	}
155 | 	for i := 0; i < len(p2); i++ {
156 | 		cfg.disconnectUnlocked(p2[i], p1)
157 | 		cfg.connectUnlocked(p2[i], p2)
158 | 	}
159 | }
160 | 
161 | // Create a clerk with clerk specific server names.
162 | // Give it connections to all of the servers, but for
163 | // now enable only connections to servers in to[].
164 | func (cfg *config) makeClient(to []int) *Clerk {
165 | 	cfg.mu.Lock()
166 | 	defer cfg.mu.Unlock()
167 | 
168 | 	// a fresh set of ClientEnds.
169 | 	ends := make([]*labrpc.ClientEnd, cfg.n)
170 | 	endnames := make([]string, cfg.n)
171 | 	for j := 0; j < cfg.n; j++ {
172 | 		endnames[j] = randstring(20)
173 | 		ends[j] = cfg.net.MakeEnd(endnames[j])
174 | 		cfg.net.Connect(endnames[j], j)
175 | 	}
176 | 
177 | 	ck := MakeClerk(random_handles(ends))
178 | 	cfg.clerks[ck] = endnames
179 | 	cfg.nextClientId++
180 | 	cfg.ConnectClientUnlocked(ck, to)
181 | 	return ck
182 | }
183 | 
184 | func (cfg *config) deleteClient(ck *Clerk) {
185 | 	cfg.mu.Lock()
186 | 	defer cfg.mu.Unlock()
187 | 
188 | 	v := cfg.clerks[ck]
189 | 	for i := 0; i < len(v); i++ {
190 | 		os.Remove(v[i])
191 | 	}
192 | 	delete(cfg.clerks, ck)
193 | }
194 | 
195 | // caller should hold cfg.mu
196 | func (cfg *config) ConnectClientUnlocked(ck *Clerk, to []int) {
197 | 	// log.Printf("ConnectClient %v to %v\n", ck, to)
198 | 	endnames := cfg.clerks[ck]
199 | 	for j := 0; j < len(to); j++ {
200 | 		s := endnames[to[j]]
201 | 		cfg.net.Enable(s, true)
202 | 	}
203 | }
204 | 
205 | func (cfg *config) ConnectClient(ck *Clerk, to []int) {
206 | 	cfg.mu.Lock()
207 | 	defer cfg.mu.Unlock()
208 | 	cfg.ConnectClientUnlocked(ck, to)
209 | }
210 | 
211 | // caller should hold cfg.mu
212 | func (cfg *config) DisconnectClientUnlocked(ck *Clerk, from []int) {
213 | 	// log.Printf("DisconnectClient %v from %v\n", ck, from)
214 | 	endnames := cfg.clerks[ck]
215 | 	for j := 0; j < len(from); j++ {
216 | 		s := endnames[from[j]]
217 | 		cfg.net.Enable(s, false)
218 | 	}
219 | }
220 | 
221 | func (cfg *config) DisconnectClient(ck *Clerk, from []int) {
222 | 	cfg.mu.Lock()
223 | 	defer cfg.mu.Unlock()
224 | 	cfg.DisconnectClientUnlocked(ck, from)
225 | }
226 | 
227 | // Shutdown a server by isolating it
228 | func (cfg *config) ShutdownServer(i int) {
229 | 	cfg.mu.Lock()
230 | 	defer cfg.mu.Unlock()
231 | 
232 | 	cfg.disconnectUnlocked(i, cfg.All())
233 | 
234 | 	// disable client connections to the server.
235 | 	// it's important to do this before creating
236 | 	// the new Persister in saved[i], to avoid
237 | 	// the possibility of the server returning a
238 | 	// positive reply to an Append but persisting
239 | 	// the result in the superseded Persister.
240 | 	cfg.net.DeleteServer(i)
241 | 
242 | 	// a fresh persister, in case old instance
243 | 	// continues to update the Persister.
244 | 	// but copy old persister's content so that we always
245 | 	// pass Make() the last persisted state.
246 | 	if cfg.saved[i] != nil {
247 | 		cfg.saved[i] = cfg.saved[i].Copy()
248 | 	}
249 | 
250 | 	kv := cfg.servers[i]
251 | 	if kv != nil {
252 | 		cfg.mu.Unlock()
253 | 		kv.Kill()
254 | 		cfg.mu.Lock()
255 | 		cfg.servers[i] = nil
256 | 	}
257 | }
258 | 
259 | // If restart servers, first call ShutdownServer
260 | func (cfg *config) StartServer(i int) {
261 | 	cfg.mu.Lock()
262 | 
263 | 	// a fresh set of outgoing ClientEnd names.
264 | 	cfg.endnames[i] = make([]string, cfg.n)
265 | 	for j := 0; j < cfg.n; j++ {
266 | 		cfg.endnames[i][j] = randstring(20)
267 | 	}
268 | 
269 | 	// a fresh set of ClientEnds.
270 | 	ends := make([]*labrpc.ClientEnd, cfg.n)
271 | 	for j := 0; j < cfg.n; j++ {
272 | 		ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j])
273 | 		cfg.net.Connect(cfg.endnames[i][j], j)
274 | 	}
275 | 
276 | 	// a fresh persister, so old instance doesn't overwrite
277 | 	// new instance's persisted state.
278 | 	// give the fresh persister a copy of the old persister's
279 | 	// state, so that the spec is that we pass StartKVServer()
280 | 	// the last persisted state.
281 | 	if cfg.saved[i] != nil {
282 | 		cfg.saved[i] = cfg.saved[i].Copy()
283 | 	} else {
284 | 		cfg.saved[i] = raft.MakePersister()
285 | 	}
286 | 
287 | 	cfg.mu.Unlock()
288 | 
289 | 	cfg.servers[i] = StartServer(ends, i, cfg.saved[i])
290 | 
291 | 	kvsvc := labrpc.MakeService(cfg.servers[i])
292 | 	rfsvc := labrpc.MakeService(cfg.servers[i].rf)
293 | 	srv := labrpc.MakeServer()
294 | 	srv.AddService(kvsvc)
295 | 	srv.AddService(rfsvc)
296 | 	cfg.net.AddServer(i, srv)
297 | }
298 | 
299 | func (cfg *config) Leader() (bool, int) {
300 | 	cfg.mu.Lock()
301 | 	defer cfg.mu.Unlock()
302 | 
303 | 	for i := 0; i < cfg.n; i++ {
304 | 		if cfg.servers[i] != nil {
305 | 			_, is_leader := cfg.servers[i].rf.GetState()
306 | 			if is_leader {
307 | 				return true, i
308 | 			}
309 | 		}
310 | 	}
311 | 	return false, 0
312 | }
313 | 
314 | // Partition servers into 2 groups and put current leader in minority
315 | func (cfg *config) make_partition() ([]int, []int) {
316 | 	_, l := cfg.Leader()
317 | 	p1 := make([]int, cfg.n/2+1)
318 | 	p2 := make([]int, cfg.n/2)
319 | 	j := 0
320 | 	for i := 0; i < cfg.n; i++ {
321 | 		if i != l {
322 | 			if j < len(p1) {
323 | 				p1[j] = i
324 | 			} else {
325 | 				p2[j-len(p1)] = i
326 | 			}
327 | 			j++
328 | 		}
329 | 	}
330 | 	p2[len(p2)-1] = l
331 | 	return p1, p2
332 | }
333 | 
334 | func make_config(t *testing.T, n int, unreliable bool) *config {
335 | 	runtime.GOMAXPROCS(4)
336 | 	cfg := &config{}
337 | 	cfg.t = t
338 | 	cfg.net = labrpc.MakeNetwork()
339 | 	cfg.n = n
340 | 	cfg.servers = make([]*ShardCtrler, cfg.n)
341 | 	cfg.saved = make([]*raft.Persister, cfg.n)
342 | 	cfg.endnames = make([][]string, cfg.n)
343 | 	cfg.clerks = make(map[*Clerk][]string)
344 | 	cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid
345 | 	cfg.start = time.Now()
346 | 
347 | 	// create a full set of KV servers.
348 | 	for i := 0; i < cfg.n; i++ {
349 | 		cfg.StartServer(i)
350 | 	}
351 | 
352 | 	cfg.ConnectAll()
353 | 
354 | 	cfg.net.Reliable(!unreliable)
355 | 
356 | 	return cfg
357 | }
358 | 


--------------------------------------------------------------------------------
/src/porcupine/checker.go:
--------------------------------------------------------------------------------
  1 | package porcupine
  2 | 
  3 | import (
  4 | 	"sort"
  5 | 	"sync/atomic"
  6 | 	"time"
  7 | )
  8 | 
  9 | type entryKind bool
 10 | 
 11 | const (
 12 | 	callEntry   entryKind = false
 13 | 	returnEntry           = true
 14 | )
 15 | 
 16 | type entry struct {
 17 | 	kind     entryKind
 18 | 	value    interface{}
 19 | 	id       int
 20 | 	time     int64
 21 | 	clientId int
 22 | }
 23 | 
 24 | type linearizationInfo struct {
 25 | 	history               [][]entry // for each partition, a list of entries
 26 | 	partialLinearizations [][][]int // for each partition, a set of histories (list of ids)
 27 | }
 28 | 
 29 | type byTime []entry
 30 | 
 31 | func (a byTime) Len() int {
 32 | 	return len(a)
 33 | }
 34 | 
 35 | func (a byTime) Swap(i, j int) {
 36 | 	a[i], a[j] = a[j], a[i]
 37 | }
 38 | 
 39 | func (a byTime) Less(i, j int) bool {
 40 | 	if a[i].time != a[j].time {
 41 | 		return a[i].time < a[j].time
 42 | 	}
 43 | 	// if the timestamps are the same, we need to make sure we order calls
 44 | 	// before returns
 45 | 	return a[i].kind == callEntry && a[j].kind == returnEntry
 46 | }
 47 | 
 48 | func makeEntries(history []Operation) []entry {
 49 | 	var entries []entry = nil
 50 | 	id := 0
 51 | 	for _, elem := range history {
 52 | 		entries = append(entries, entry{
 53 | 			callEntry, elem.Input, id, elem.Call, elem.ClientId})
 54 | 		entries = append(entries, entry{
 55 | 			returnEntry, elem.Output, id, elem.Return, elem.ClientId})
 56 | 		id++
 57 | 	}
 58 | 	sort.Sort(byTime(entries))
 59 | 	return entries
 60 | }
 61 | 
 62 | type node struct {
 63 | 	value interface{}
 64 | 	match *node // call if match is nil, otherwise return
 65 | 	id    int
 66 | 	next  *node
 67 | 	prev  *node
 68 | }
 69 | 
 70 | func insertBefore(n *node, mark *node) *node {
 71 | 	if mark != nil {
 72 | 		beforeMark := mark.prev
 73 | 		mark.prev = n
 74 | 		n.next = mark
 75 | 		if beforeMark != nil {
 76 | 			n.prev = beforeMark
 77 | 			beforeMark.next = n
 78 | 		}
 79 | 	}
 80 | 	return n
 81 | }
 82 | 
 83 | func length(n *node) int {
 84 | 	l := 0
 85 | 	for n != nil {
 86 | 		n = n.next
 87 | 		l++
 88 | 	}
 89 | 	return l
 90 | }
 91 | 
 92 | func renumber(events []Event) []Event {
 93 | 	var e []Event
 94 | 	m := make(map[int]int) // renumbering
 95 | 	id := 0
 96 | 	for _, v := range events {
 97 | 		if r, ok := m[v.Id]; ok {
 98 | 			e = append(e, Event{v.ClientId, v.Kind, v.Value, r})
 99 | 		} else {
100 | 			e = append(e, Event{v.ClientId, v.Kind, v.Value, id})
101 | 			m[v.Id] = id
102 | 			id++
103 | 		}
104 | 	}
105 | 	return e
106 | }
107 | 
108 | func convertEntries(events []Event) []entry {
109 | 	var entries []entry
110 | 	for i, elem := range events {
111 | 		kind := callEntry
112 | 		if elem.Kind == ReturnEvent {
113 | 			kind = returnEntry
114 | 		}
115 | 		// use index as "time"
116 | 		entries = append(entries, entry{kind, elem.Value, elem.Id, int64(i), elem.ClientId})
117 | 	}
118 | 	return entries
119 | }
120 | 
121 | func makeLinkedEntries(entries []entry) *node {
122 | 	var root *node = nil
123 | 	match := make(map[int]*node)
124 | 	for i := len(entries) - 1; i >= 0; i-- {
125 | 		elem := entries[i]
126 | 		if elem.kind == returnEntry {
127 | 			entry := &node{value: elem.value, match: nil, id: elem.id}
128 | 			match[elem.id] = entry
129 | 			insertBefore(entry, root)
130 | 			root = entry
131 | 		} else {
132 | 			entry := &node{value: elem.value, match: match[elem.id], id: elem.id}
133 | 			insertBefore(entry, root)
134 | 			root = entry
135 | 		}
136 | 	}
137 | 	return root
138 | }
139 | 
140 | type cacheEntry struct {
141 | 	linearized bitset
142 | 	state      interface{}
143 | }
144 | 
145 | func cacheContains(model Model, cache map[uint64][]cacheEntry, entry cacheEntry) bool {
146 | 	for _, elem := range cache[entry.linearized.hash()] {
147 | 		if entry.linearized.equals(elem.linearized) && model.Equal(entry.state, elem.state) {
148 | 			return true
149 | 		}
150 | 	}
151 | 	return false
152 | }
153 | 
154 | type callsEntry struct {
155 | 	entry *node
156 | 	state interface{}
157 | }
158 | 
159 | func lift(entry *node) {
160 | 	entry.prev.next = entry.next
161 | 	entry.next.prev = entry.prev
162 | 	match := entry.match
163 | 	match.prev.next = match.next
164 | 	if match.next != nil {
165 | 		match.next.prev = match.prev
166 | 	}
167 | }
168 | 
169 | func unlift(entry *node) {
170 | 	match := entry.match
171 | 	match.prev.next = match
172 | 	if match.next != nil {
173 | 		match.next.prev = match
174 | 	}
175 | 	entry.prev.next = entry
176 | 	entry.next.prev = entry
177 | }
178 | 
179 | func checkSingle(model Model, history []entry, computePartial bool, kill *int32) (bool, []*[]int) {
180 | 	entry := makeLinkedEntries(history)
181 | 	n := length(entry) / 2
182 | 	linearized := newBitset(uint(n))
183 | 	cache := make(map[uint64][]cacheEntry) // map from hash to cache entry
184 | 	var calls []callsEntry
185 | 	// longest linearizable prefix that includes the given entry
186 | 	longest := make([]*[]int, n)
187 | 
188 | 	state := model.Init()
189 | 	headEntry := insertBefore(&node{value: nil, match: nil, id: -1}, entry)
190 | 	for headEntry.next != nil {
191 | 		if atomic.LoadInt32(kill) != 0 {
192 | 			return false, longest
193 | 		}
194 | 		if entry.match != nil {
195 | 			matching := entry.match // the return entry
196 | 			ok, newState := model.Step(state, entry.value, matching.value)
197 | 			if ok {
198 | 				newLinearized := linearized.clone().set(uint(entry.id))
199 | 				newCacheEntry := cacheEntry{newLinearized, newState}
200 | 				if !cacheContains(model, cache, newCacheEntry) {
201 | 					hash := newLinearized.hash()
202 | 					cache[hash] = append(cache[hash], newCacheEntry)
203 | 					calls = append(calls, callsEntry{entry, state})
204 | 					state = newState
205 | 					linearized.set(uint(entry.id))
206 | 					lift(entry)
207 | 					entry = headEntry.next
208 | 				} else {
209 | 					entry = entry.next
210 | 				}
211 | 			} else {
212 | 				entry = entry.next
213 | 			}
214 | 		} else {
215 | 			if len(calls) == 0 {
216 | 				return false, longest
217 | 			}
218 | 			// longest
219 | 			if computePartial {
220 | 				callsLen := len(calls)
221 | 				var seq []int = nil
222 | 				for _, v := range calls {
223 | 					if longest[v.entry.id] == nil || callsLen > len(*longest[v.entry.id]) {
224 | 						// create seq lazily
225 | 						if seq == nil {
226 | 							seq = make([]int, len(calls))
227 | 							for i, v := range calls {
228 | 								seq[i] = v.entry.id
229 | 							}
230 | 						}
231 | 						longest[v.entry.id] = &seq
232 | 					}
233 | 				}
234 | 			}
235 | 			callsTop := calls[len(calls)-1]
236 | 			entry = callsTop.entry
237 | 			state = callsTop.state
238 | 			linearized.clear(uint(entry.id))
239 | 			calls = calls[:len(calls)-1]
240 | 			unlift(entry)
241 | 			entry = entry.next
242 | 		}
243 | 	}
244 | 	// longest linearization is the complete linearization, which is calls
245 | 	seq := make([]int, len(calls))
246 | 	for i, v := range calls {
247 | 		seq[i] = v.entry.id
248 | 	}
249 | 	for i := 0; i < n; i++ {
250 | 		longest[i] = &seq
251 | 	}
252 | 	return true, longest
253 | }
254 | 
255 | func fillDefault(model Model) Model {
256 | 	if model.Partition == nil {
257 | 		model.Partition = NoPartition
258 | 	}
259 | 	if model.PartitionEvent == nil {
260 | 		model.PartitionEvent = NoPartitionEvent
261 | 	}
262 | 	if model.Equal == nil {
263 | 		model.Equal = ShallowEqual
264 | 	}
265 | 	if model.DescribeOperation == nil {
266 | 		model.DescribeOperation = DefaultDescribeOperation
267 | 	}
268 | 	if model.DescribeState == nil {
269 | 		model.DescribeState = DefaultDescribeState
270 | 	}
271 | 	return model
272 | }
273 | 
274 | func checkParallel(model Model, history [][]entry, computeInfo bool, timeout time.Duration) (CheckResult, linearizationInfo) {
275 | 	ok := true
276 | 	timedOut := false
277 | 	results := make(chan bool, len(history))
278 | 	longest := make([][]*[]int, len(history))
279 | 	kill := int32(0)
280 | 	for i, subhistory := range history {
281 | 		go func(i int, subhistory []entry) {
282 | 			ok, l := checkSingle(model, subhistory, computeInfo, &kill)
283 | 			longest[i] = l
284 | 			results <- ok
285 | 		}(i, subhistory)
286 | 	}
287 | 	var timeoutChan <-chan time.Time
288 | 	if timeout > 0 {
289 | 		timeoutChan = time.After(timeout)
290 | 	}
291 | 	count := 0
292 | loop:
293 | 	for {
294 | 		select {
295 | 		case result := <-results:
296 | 			count++
297 | 			ok = ok && result
298 | 			if !ok && !computeInfo {
299 | 				atomic.StoreInt32(&kill, 1)
300 | 				break loop
301 | 			}
302 | 			if count >= len(history) {
303 | 				break loop
304 | 			}
305 | 		case <-timeoutChan:
306 | 			timedOut = true
307 | 			atomic.StoreInt32(&kill, 1)
308 | 			break loop // if we time out, we might get a false positive
309 | 		}
310 | 	}
311 | 	var info linearizationInfo
312 | 	if computeInfo {
313 | 		// make sure we've waited for all goroutines to finish,
314 | 		// otherwise we might race on access to longest[]
315 | 		for count < len(history) {
316 | 			<-results
317 | 			count++
318 | 		}
319 | 		// return longest linearizable prefixes that include each history element
320 | 		partialLinearizations := make([][][]int, len(history))
321 | 		for i := 0; i < len(history); i++ {
322 | 			var partials [][]int
323 | 			// turn longest into a set of unique linearizations
324 | 			set := make(map[*[]int]struct{})
325 | 			for _, v := range longest[i] {
326 | 				if v != nil {
327 | 					set[v] = struct{}{}
328 | 				}
329 | 			}
330 | 			for k := range set {
331 | 				arr := make([]int, len(*k))
332 | 				for i, v := range *k {
333 | 					arr[i] = v
334 | 				}
335 | 				partials = append(partials, arr)
336 | 			}
337 | 			partialLinearizations[i] = partials
338 | 		}
339 | 		info.history = history
340 | 		info.partialLinearizations = partialLinearizations
341 | 	}
342 | 	var result CheckResult
343 | 	if !ok {
344 | 		result = Illegal
345 | 	} else {
346 | 		if timedOut {
347 | 			result = Unknown
348 | 		} else {
349 | 			result = Ok
350 | 		}
351 | 	}
352 | 	return result, info
353 | }
354 | 
355 | func checkEvents(model Model, history []Event, verbose bool, timeout time.Duration) (CheckResult, linearizationInfo) {
356 | 	model = fillDefault(model)
357 | 	partitions := model.PartitionEvent(history)
358 | 	l := make([][]entry, len(partitions))
359 | 	for i, subhistory := range partitions {
360 | 		l[i] = convertEntries(renumber(subhistory))
361 | 	}
362 | 	return checkParallel(model, l, verbose, timeout)
363 | }
364 | 
365 | func checkOperations(model Model, history []Operation, verbose bool, timeout time.Duration) (CheckResult, linearizationInfo) {
366 | 	model = fillDefault(model)
367 | 	partitions := model.Partition(history)
368 | 	l := make([][]entry, len(partitions))
369 | 	for i, subhistory := range partitions {
370 | 		l[i] = makeEntries(subhistory)
371 | 	}
372 | 	return checkParallel(model, l, verbose, timeout)
373 | }
374 | 


--------------------------------------------------------------------------------