├── src ├── go.sum ├── go.mod ├── .gitignore ├── shardkv │ ├── empty.go │ ├── utils.go │ ├── monitor.go │ ├── commandDef.go │ ├── execute.go │ ├── shard.go │ ├── common.go │ ├── commandHandler.go │ ├── gc.go │ ├── apply.go │ ├── snapshot.go │ ├── configure.go │ ├── migration.go │ ├── client.go │ ├── server.go │ ├── dslogs │ └── dstest ├── raft │ ├── constant.go │ ├── snapshotHandler.go │ ├── snapshot.go │ ├── status.go │ ├── election.go │ ├── apply.go │ ├── ticker.go │ ├── persist.go │ ├── persister.go │ ├── electionHandler.go │ ├── log.go │ ├── appendEntries.go │ ├── appendEntriesHandler.go │ ├── interface.go │ ├── rpc.go │ └── raft.go ├── main │ ├── viewd.go │ ├── pbd.go │ ├── test-mr-many.sh │ ├── mrcoordinator.go │ ├── lockc.go │ ├── lockd.go │ ├── pbc.go │ ├── mrworker.go │ ├── diskvd.go │ ├── mrsequential.go │ └── test-mr.sh ├── shardctrler │ ├── utils.go │ ├── common.go │ ├── apply.go │ ├── client.go │ ├── server.go │ ├── configModel.go │ └── config.go ├── kvraft │ ├── kv.go │ ├── common.go │ ├── snapshot.go │ ├── apply.go │ ├── client.go │ └── server.go ├── mrapps │ ├── early_exit.go │ ├── nocrash.go │ ├── jobcount.go │ ├── indexer.go │ ├── wc.go │ ├── crash.go │ ├── rtiming.go │ └── mtiming.go ├── utils │ ├── utils.go │ ├── dslogs │ └── dstest ├── mr │ ├── rpc.go │ ├── coordinator.go │ └── worker.go ├── porcupine │ ├── porcupine.go │ ├── bitset.go │ ├── model.go │ └── checker.go ├── models │ └── kv.go └── labgob │ ├── test_test.go │ └── labgob.go ├── .gitignore ├── README.md ├── Makefile └── .check-build /src/go.sum: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/go.mod: -------------------------------------------------------------------------------- 1 | module 6.824 2 | 3 | go 1.15 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | pkg/ 2 | api.key 3 | .api.key.trimmed 4 | *-handin.tar.gz 5 | *.log 6 | .DS_Store 7 | .idea -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | *.*/ 2 | main/mr-tmp/ 3 | mrtmp.* 4 | 824-mrinput-*.txt 5 | /main/diff.out 6 | /mapreduce/x.txt 7 | /pbservice/x.txt 8 | /kvpaxos/x.txt 9 | *.so 10 | /main/mrcoordinator 11 | /main/mrsequential 12 | /main/mrworker 13 | -------------------------------------------------------------------------------- /src/shardkv/empty.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | func (kv *ShardKV) checkEntryInCurrentTermAction() { 4 | if !kv.rf.HasLogInCurrentTerm() { 5 | kv.Execute(NewEmptyEntryCommand(), &OpResp{}) 6 | } 7 | } 8 | 9 | func (kv *ShardKV) applyEmptyEntry() *OpResp { 10 | return &OpResp{OK, ""} 11 | } 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mit6.824-2021Spring 2 | 3 | 因为分布式系统的bug具有随机性和不确定性,故不能保证绝对bug,已经尽可能的在解决bug了.(逃) 4 | 5 | lab1: 基于linux的文件系统实现MapReduce。(测试100+,无bug) 6 | 7 | lab2: 实现raft算法,包含选举、日志同步、持久化、快照等功能。(测试5000+,无bug) 8 | 9 | lab3: 实现带复制的KV(多个kv保存相同数据)。(测试1000+) 10 | 11 | lab4: 实现带分片的shardKV(每个group保存不同数据(根据hash将数据横向切分),group内的各kv保存相同数据)。(测试1000+) 12 | -------------------------------------------------------------------------------- /src/raft/constant.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | // election 4 | const ( 5 | // magic number 6 | voted_nil int = -12345 7 | ) 8 | 9 | // appendEntries 10 | const ( 11 | magic_index int = 0 12 | magic_term int = -1 13 | ) 14 | 15 | // ticker 16 | const ( 17 | gap_time int = 3 18 | election_base_time int = 300 19 | election_range_time int = 100 20 | heartbeat_time int = 50 21 | ) 22 | -------------------------------------------------------------------------------- /src/main/viewd.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // see directions in pbc.go 5 | // 6 | 7 | import "time" 8 | import "6.824/viewservice" 9 | import "os" 10 | import "fmt" 11 | 12 | func main() { 13 | if len(os.Args) != 2 { 14 | fmt.Printf("Usage: viewd port\n") 15 | os.Exit(1) 16 | } 17 | 18 | viewservice.StartServer(os.Args[1]) 19 | 20 | for { 21 | time.Sleep(100 * time.Second) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/main/pbd.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // see directions in pbc.go 5 | // 6 | 7 | import "time" 8 | import "6.824/pbservice" 9 | import "os" 10 | import "fmt" 11 | 12 | func main() { 13 | if len(os.Args) != 3 { 14 | fmt.Printf("Usage: pbd viewport myport\n") 15 | os.Exit(1) 16 | } 17 | 18 | pbservice.StartServer(os.Args[1], os.Args[2]) 19 | 20 | for { 21 | time.Sleep(100 * time.Second) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/main/test-mr-many.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if [ $# -ne 1 ]; then 4 | echo "Usage: $0 numTrials" 5 | exit 1 6 | fi 7 | 8 | trap 'kill -INT -$pid; exit 1' INT 9 | 10 | # Note: because the socketID is based on the current userID, 11 | # ./test-mr.sh cannot be run in parallel 12 | runs=$1 13 | chmod +x test-mr.sh 14 | 15 | for i in $(seq 1 $runs); do 16 | timeout -k 2s 900s ./test-mr.sh & 17 | pid=$! 18 | if ! wait $pid; then 19 | echo '***' FAILED TESTS IN TRIAL $i 20 | exit 1 21 | fi 22 | done 23 | echo '***' PASSED ALL $i TESTING TRIALS 24 | -------------------------------------------------------------------------------- /src/main/mrcoordinator.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // start the coordinator process, which is implemented 5 | // in ../mr/coordinator.go 6 | // 7 | // go run mrcoordinator.go pg*.txt 8 | // 9 | // Please do not change this file. 10 | // 11 | 12 | import "6.824/mr" 13 | import "time" 14 | import "os" 15 | import "fmt" 16 | 17 | func main() { 18 | if len(os.Args) < 2 { 19 | fmt.Fprintf(os.Stderr, "Usage: mrcoordinator inputfiles...\n") 20 | os.Exit(1) 21 | } 22 | 23 | m := mr.MakeCoordinator(os.Args[1:], 10) 24 | for m.Done() == false { 25 | time.Sleep(time.Second) 26 | } 27 | 28 | time.Sleep(time.Second) 29 | } 30 | -------------------------------------------------------------------------------- /src/main/lockc.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // see comments in lockd.go 5 | // 6 | 7 | import "6.824/lockservice" 8 | import "os" 9 | import "fmt" 10 | 11 | func usage() { 12 | fmt.Printf("Usage: lockc -l|-u primaryport backupport lockname\n") 13 | os.Exit(1) 14 | } 15 | 16 | func main() { 17 | if len(os.Args) == 5 { 18 | ck := lockservice.MakeClerk(os.Args[2], os.Args[3]) 19 | var ok bool 20 | if os.Args[1] == "-l" { 21 | ok = ck.Lock(os.Args[4]) 22 | } else if os.Args[1] == "-u" { 23 | ok = ck.Unlock(os.Args[4]) 24 | } else { 25 | usage() 26 | } 27 | fmt.Printf("reply: %v\n", ok) 28 | } else { 29 | usage() 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/lockd.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // export GOPATH=~/6.824 4 | // go build lockd.go 5 | // go build lockc.go 6 | // ./lockd -p a b & 7 | // ./lockd -b a b & 8 | // ./lockc -l a b lx 9 | // ./lockc -u a b lx 10 | // 11 | // on Athena, use /tmp/myname-a and /tmp/myname-b 12 | // instead of a and b. 13 | 14 | import "time" 15 | import "6.824/lockservice" 16 | import "os" 17 | import "fmt" 18 | 19 | func main() { 20 | if len(os.Args) == 4 && os.Args[1] == "-p" { 21 | lockservice.StartServer(os.Args[2], os.Args[3], true) 22 | } else if len(os.Args) == 4 && os.Args[1] == "-b" { 23 | lockservice.StartServer(os.Args[2], os.Args[3], false) 24 | } else { 25 | fmt.Printf("Usage: lockd -p|-b primaryport backupport\n") 26 | os.Exit(1) 27 | } 28 | for { 29 | time.Sleep(100 * time.Second) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/shardkv/utils.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "time" 7 | ) 8 | 9 | // Debugging 10 | const debug = true 11 | 12 | type logTopic string 13 | 14 | const ( 15 | dClient logTopic = "CLNT" 16 | dError logTopic = "ERRO" 17 | dInfo logTopic = "INFO" 18 | dLog logTopic = "LOG1" 19 | dLog2 logTopic = "LOG2" 20 | dTest logTopic = "TEST" 21 | dTrace logTopic = "TRCE" 22 | dWarn logTopic = "WARN" 23 | dServer logTopic = "SEVR" 24 | ) 25 | 26 | var debugStart time.Time 27 | 28 | func init() { 29 | debugStart = time.Now() 30 | 31 | log.SetFlags(log.Flags() &^ (log.Ldate | log.Ltime)) 32 | } 33 | 34 | func Debug(topic logTopic, format string, a ...interface{}) { 35 | if debug { 36 | time := time.Since(debugStart).Microseconds() 37 | time /= 100 38 | prefix := fmt.Sprintf("%06d %v ", time, string(topic)) 39 | format = prefix + format 40 | log.Printf(format, a...) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/shardctrler/utils.go: -------------------------------------------------------------------------------- 1 | package shardctrler 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "time" 7 | ) 8 | 9 | // Debugging 10 | const debug = false 11 | 12 | type logTopic string 13 | 14 | const ( 15 | dClient logTopic = "CLNT" 16 | dError logTopic = "ERRO" 17 | dInfo logTopic = "INFO" 18 | dLog logTopic = "LOG1" 19 | dLog2 logTopic = "LOG2" 20 | dTest logTopic = "TEST" 21 | dTrace logTopic = "TRCE" 22 | dWarn logTopic = "WARN" 23 | dServer logTopic = "SEVR" 24 | ) 25 | 26 | var debugStart time.Time 27 | 28 | func init() { 29 | debugStart = time.Now() 30 | 31 | log.SetFlags(log.Flags() &^ (log.Ldate | log.Ltime)) 32 | } 33 | 34 | func Debug(topic logTopic, format string, a ...interface{}) { 35 | if debug { 36 | time := time.Since(debugStart).Microseconds() 37 | time /= 100 38 | prefix := fmt.Sprintf("%06d %v ", time, string(topic)) 39 | format = prefix + format 40 | log.Printf(format, a...) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/shardkv/monitor.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | import "time" 4 | 5 | const ( 6 | ConfigureMonitorTimeout time.Duration = time.Duration(50) * time.Millisecond 7 | MigrationMonitorTimeout time.Duration = time.Duration(50) * time.Millisecond 8 | GCMonitorTimeout time.Duration = time.Duration(50) * time.Millisecond 9 | checkEntryInCurrentTermTimeout time.Duration = time.Duration(100) * time.Millisecond 10 | ) 11 | 12 | func (kv *ShardKV) startMonitor() { 13 | go kv.monitor(kv.configureAction, ConfigureMonitorTimeout) 14 | go kv.monitor(kv.migrationAction, MigrationMonitorTimeout) 15 | go kv.monitor(kv.gcAction, GCMonitorTimeout) 16 | go kv.monitor(kv.checkEntryInCurrentTermAction, checkEntryInCurrentTermTimeout) 17 | } 18 | 19 | func (kv *ShardKV) monitor(action func(), timeout time.Duration) { 20 | for kv.killed() == false { 21 | if _, isLeader := kv.rf.GetState(); isLeader { 22 | action() 23 | } 24 | time.Sleep(timeout) 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/kvraft/kv.go: -------------------------------------------------------------------------------- 1 | package kvraft 2 | 3 | type KV struct { 4 | Kvmap map[string]string 5 | } 6 | 7 | func NewKV() *KV { 8 | return &KV{make(map[string]string)} 9 | } 10 | 11 | func (kv *KV) Put(key string, value string) Err { 12 | kv.Kvmap[key] = value 13 | return OK 14 | } 15 | 16 | func (kv *KV) Append(key string, value string) Err { 17 | if value_ori, ok := kv.Kvmap[key]; ok { 18 | kv.Kvmap[key] = value_ori + value 19 | return OK 20 | } 21 | kv.Kvmap[key] = value 22 | return OK 23 | } 24 | 25 | func (kv *KV) Get(key string) (string, Err) { 26 | if value, ok := kv.Kvmap[key]; ok { 27 | return value, OK 28 | } 29 | return "", ErrNoKey 30 | } 31 | 32 | func (kv *KVServer) Opt(cmd Op) (string, Err) { 33 | switch cmd.OpType { 34 | case OpGet: 35 | value, err := kv.KvMap.Get(cmd.Key) 36 | return value, err 37 | case OpPut: 38 | err := kv.KvMap.Put(cmd.Key, cmd.Value) 39 | return "", err 40 | case OpAppend: 41 | err := kv.KvMap.Append(cmd.Key, cmd.Value) 42 | return "", err 43 | default: 44 | return "", OK 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/shardkv/commandDef.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | import ( 4 | "fmt" 5 | 6 | "6.824/shardctrler" 7 | ) 8 | 9 | type Command struct { 10 | Op CommandType 11 | Data interface{} 12 | } 13 | 14 | func (command Command) String() string { 15 | return fmt.Sprintf("{Type:%v,Data:%v}", command.Op, command.Data) 16 | } 17 | 18 | func NewOperationCommand(args *CmdArgs) Command { 19 | return Command{Operation, *args} 20 | } 21 | 22 | func NewConfigurationCommand(config *shardctrler.Config) Command { 23 | return Command{Configuration, *config} 24 | } 25 | 26 | func NewInsertShardsCommand(pullReply *PullDataReply) Command { 27 | return Command{InsertShards, *pullReply} 28 | } 29 | 30 | func NewDeleteShardsCommand(pullArgs *PullDataArgs) Command { 31 | return Command{DeleteShards, *pullArgs} 32 | } 33 | 34 | func NewEmptyEntryCommand() Command { 35 | return Command{EmptyEntry, nil} 36 | } 37 | 38 | type CommandType uint8 39 | 40 | const ( 41 | Operation CommandType = iota 42 | Configuration 43 | InsertShards 44 | DeleteShards 45 | EmptyEntry 46 | ) 47 | -------------------------------------------------------------------------------- /src/main/pbc.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // pbservice client application 5 | // 6 | // export GOPATH=~/6.824 7 | // go build viewd.go 8 | // go build pbd.go 9 | // go build pbc.go 10 | // ./viewd /tmp/rtm-v & 11 | // ./pbd /tmp/rtm-v /tmp/rtm-1 & 12 | // ./pbd /tmp/rtm-v /tmp/rtm-2 & 13 | // ./pbc /tmp/rtm-v key1 value1 14 | // ./pbc /tmp/rtm-v key1 15 | // 16 | // change "rtm" to your user name. 17 | // start the pbd programs in separate windows and kill 18 | // and restart them to exercise fault tolerance. 19 | // 20 | 21 | import "6.824/pbservice" 22 | import "os" 23 | import "fmt" 24 | 25 | func usage() { 26 | fmt.Printf("Usage: pbc viewport key\n") 27 | fmt.Printf(" pbc viewport key value\n") 28 | os.Exit(1) 29 | } 30 | 31 | func main() { 32 | if len(os.Args) == 3 { 33 | // get 34 | ck := pbservice.MakeClerk(os.Args[1], "") 35 | v := ck.Get(os.Args[2]) 36 | fmt.Printf("%v\n", v) 37 | } else if len(os.Args) == 4 { 38 | // put 39 | ck := pbservice.MakeClerk(os.Args[1], "") 40 | ck.Put(os.Args[2], os.Args[3]) 41 | } else { 42 | usage() 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/mrapps/early_exit.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // a word-count application "plugin" for MapReduce. 5 | // 6 | // go build -buildmode=plugin wc_long.go 7 | // 8 | 9 | import ( 10 | "strconv" 11 | "strings" 12 | "time" 13 | 14 | "6.824/mr" 15 | ) 16 | 17 | // 18 | // The map function is called once for each file of input. 19 | // This map function just returns 1 for each file 20 | // 21 | func Map(filename string, contents string) []mr.KeyValue { 22 | kva := []mr.KeyValue{} 23 | kva = append(kva, mr.KeyValue{filename, "1"}) 24 | return kva 25 | } 26 | 27 | // 28 | // The reduce function is called once for each key generated by the 29 | // map tasks, with a list of all the values created for that key by 30 | // any map task. 31 | // 32 | func Reduce(key string, values []string) string { 33 | // some reduce tasks sleep for a long time; potentially seeing if 34 | // a worker will accidentally exit early 35 | if strings.Contains(key, "sherlock") || strings.Contains(key, "tom") { 36 | time.Sleep(time.Duration(3 * time.Second)) 37 | } 38 | // return the number of occurrences of this file. 39 | return strconv.Itoa(len(values)) 40 | } 41 | -------------------------------------------------------------------------------- /src/mrapps/nocrash.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // same as crash.go but doesn't actually crash. 5 | // 6 | // go build -buildmode=plugin nocrash.go 7 | // 8 | 9 | import "6.824/mr" 10 | import crand "crypto/rand" 11 | import "math/big" 12 | import "strings" 13 | import "os" 14 | import "sort" 15 | import "strconv" 16 | 17 | func maybeCrash() { 18 | max := big.NewInt(1000) 19 | rr, _ := crand.Int(crand.Reader, max) 20 | if false && rr.Int64() < 500 { 21 | // crash! 22 | os.Exit(1) 23 | } 24 | } 25 | 26 | func Map(filename string, contents string) []mr.KeyValue { 27 | maybeCrash() 28 | 29 | kva := []mr.KeyValue{} 30 | kva = append(kva, mr.KeyValue{"a", filename}) 31 | kva = append(kva, mr.KeyValue{"b", strconv.Itoa(len(filename))}) 32 | kva = append(kva, mr.KeyValue{"c", strconv.Itoa(len(contents))}) 33 | kva = append(kva, mr.KeyValue{"d", "xyzzy"}) 34 | return kva 35 | } 36 | 37 | func Reduce(key string, values []string) string { 38 | maybeCrash() 39 | 40 | // sort values to ensure deterministic output. 41 | vv := make([]string, len(values)) 42 | copy(vv, values) 43 | sort.Strings(vv) 44 | 45 | val := strings.Join(vv, " ") 46 | return val 47 | } 48 | -------------------------------------------------------------------------------- /src/raft/snapshotHandler.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import "6.824/utils" 4 | 5 | func (rf *Raft) InstallSnapshot(args *InstallSnapshotArgs, reply *InstallSnapshotReply) { 6 | rf.mu.Lock() 7 | defer rf.mu.Unlock() 8 | 9 | utils.Debug(utils.DSnap, "S%d S%d installSnapshot", rf.me, args.LeaderId) 10 | defer utils.Debug(utils.DSnap, "S%d arg: %+v reply: %+v", rf.me, args, reply) 11 | 12 | if args.Term < rf.currentTerm { 13 | reply.Term = rf.currentTerm 14 | return 15 | } 16 | 17 | if args.Term > rf.currentTerm { 18 | rf.currentTerm, rf.votedFor = args.Term, voted_nil 19 | rf.persist() 20 | rf.TurnTo(follower) 21 | } 22 | 23 | if rf.status != follower { 24 | rf.TurnTo(follower) 25 | } 26 | 27 | reply.Term = rf.currentTerm 28 | rf.resetElectionTime() 29 | 30 | if args.LastIncludedIndex <= rf.commitIndex { 31 | utils.Debug(utils.DSnap, "S%d args's snapshot too old(%d < %d)", rf.me, args.LastIncludedIndex, rf.commitIndex) 32 | return 33 | } 34 | 35 | go func() { 36 | rf.applyCh <- ApplyMsg{ 37 | SnapshotValid: true, 38 | Snapshot: args.Data, 39 | SnapshotTerm: args.LastIncludedTerm, 40 | SnapshotIndex: args.LastIncludedIndex, 41 | } 42 | }() 43 | } 44 | -------------------------------------------------------------------------------- /src/utils/utils.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "time" 7 | ) 8 | 9 | // Debugging 10 | const debug = false 11 | 12 | type logTopic string 13 | 14 | const ( 15 | DClient logTopic = "CLNT" 16 | DCommit logTopic = "CMIT" 17 | DDrop logTopic = "DROP" 18 | DError logTopic = "ERRO" 19 | DInfo logTopic = "INFO" 20 | DLeader logTopic = "LEAD" 21 | DLog logTopic = "LOG1" 22 | DLog2 logTopic = "LOG2" 23 | DPersist logTopic = "PERS" 24 | DSnap logTopic = "SNAP" 25 | DTerm logTopic = "TERM" 26 | dTest logTopic = "TEST" 27 | DTimer logTopic = "TIMR" 28 | DTrace logTopic = "TRCE" 29 | DVote logTopic = "VOTE" 30 | DWarn logTopic = "WARN" 31 | DServer logTopic = "SEVR" 32 | ) 33 | 34 | var debugStart time.Time 35 | 36 | func init() { 37 | debugStart = time.Now() 38 | 39 | log.SetFlags(log.Flags() &^ (log.Ldate | log.Ltime)) 40 | } 41 | 42 | func Debug(topic logTopic, format string, a ...interface{}) { 43 | if debug { 44 | time := time.Since(debugStart).Microseconds() 45 | time /= 100 46 | prefix := fmt.Sprintf("%06d %v ", time, string(topic)) 47 | format = prefix + format 48 | log.Printf(format, a...) 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/mr/rpc.go: -------------------------------------------------------------------------------- 1 | package mr 2 | 3 | // 4 | // RPC definitions. 5 | // 6 | // remember to capitalize all names. 7 | // 8 | 9 | import ( 10 | "os" 11 | "strconv" 12 | ) 13 | 14 | // 15 | // example to show how to declare the arguments 16 | // and reply for an RPC. 17 | // 18 | 19 | type Errno int 20 | 21 | const ( 22 | SuccessCode Errno = iota 23 | ServiceErrCode 24 | ParaErrCode 25 | ) 26 | 27 | type TaskType int 28 | 29 | const ( 30 | MAP TaskType = iota 31 | REDUCE 32 | WAIT 33 | STOP 34 | ) 35 | 36 | // Add your RPC definitions here. 37 | type GetTaskArgs struct { 38 | } 39 | 40 | type GetTaskReply struct { 41 | Type TaskType 42 | Filenames []string 43 | Task_no int 44 | NReduce int 45 | Err Errno 46 | } 47 | 48 | type FinishTaskArgs struct { 49 | Type TaskType 50 | Task_no int 51 | } 52 | 53 | type FinishTaskReply struct { 54 | Err Errno 55 | } 56 | 57 | // Cook up a unique-ish UNIX-domain socket name 58 | // in /var/tmp, for the coordinator. 59 | // Can't use the current directory since 60 | // Athena AFS doesn't support UNIX-domain sockets. 61 | func coordinatorSock() string { 62 | s := "/var/tmp/824-mr-" 63 | s += strconv.Itoa(os.Getuid()) 64 | return s 65 | } 66 | -------------------------------------------------------------------------------- /src/mrapps/jobcount.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // a MapReduce pseudo-application that counts the number of times map/reduce 5 | // tasks are run, to test whether jobs are assigned multiple times even when 6 | // there is no failure. 7 | // 8 | // go build -buildmode=plugin crash.go 9 | // 10 | 11 | import "6.824/mr" 12 | import "math/rand" 13 | import "strings" 14 | import "strconv" 15 | import "time" 16 | import "fmt" 17 | import "os" 18 | import "io/ioutil" 19 | 20 | var count int 21 | 22 | func Map(filename string, contents string) []mr.KeyValue { 23 | me := os.Getpid() 24 | f := fmt.Sprintf("mr-worker-jobcount-%d-%d", me, count) 25 | count++ 26 | err := ioutil.WriteFile(f, []byte("x"), 0666) 27 | if err != nil { 28 | panic(err) 29 | } 30 | time.Sleep(time.Duration(2000+rand.Intn(3000)) * time.Millisecond) 31 | return []mr.KeyValue{mr.KeyValue{"a", "x"}} 32 | } 33 | 34 | func Reduce(key string, values []string) string { 35 | files, err := ioutil.ReadDir(".") 36 | if err != nil { 37 | panic(err) 38 | } 39 | invocations := 0 40 | for _, f := range files { 41 | if strings.HasPrefix(f.Name(), "mr-worker-jobcount") { 42 | invocations++ 43 | } 44 | } 45 | return strconv.Itoa(invocations) 46 | } 47 | -------------------------------------------------------------------------------- /src/shardkv/execute.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | import "time" 4 | 5 | func (kv *ShardKV) Execute(cmd Command, reply *OpResp) { 6 | index, term, is_leader := kv.rf.Start(cmd) 7 | if !is_leader { 8 | reply.Value, reply.Err = "", ErrWrongLeader 9 | return 10 | } 11 | 12 | kv.mu.Lock() 13 | it := IndexAndTerm{index, term} 14 | ch := make(chan OpResp, 1) 15 | kv.cmdRespChans[it] = ch 16 | kv.mu.Unlock() 17 | 18 | defer func() { 19 | kv.mu.Lock() 20 | // close(kv.cmdRespChans[index]) 21 | delete(kv.cmdRespChans, it) 22 | kv.mu.Unlock() 23 | close(ch) 24 | }() 25 | 26 | t := time.NewTimer(cmd_timeout) 27 | defer t.Stop() 28 | 29 | for { 30 | kv.mu.Lock() 31 | select { 32 | case resp := <-ch: 33 | reply.Value, reply.Err = resp.Value, resp.Err 34 | kv.mu.Unlock() 35 | return 36 | case <-t.C: 37 | priority: 38 | for { 39 | select { 40 | case resp := <-ch: 41 | reply.Value, reply.Err = resp.Value, resp.Err 42 | kv.mu.Unlock() 43 | return 44 | default: 45 | break priority 46 | } 47 | } 48 | reply.Value, reply.Err = "", ErrTimeout 49 | kv.mu.Unlock() 50 | return 51 | default: 52 | kv.mu.Unlock() 53 | time.Sleep(gap_time) 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/mrapps/indexer.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // an indexing application "plugin" for MapReduce. 5 | // 6 | // go build -buildmode=plugin indexer.go 7 | // 8 | 9 | import "fmt" 10 | import "6.824/mr" 11 | 12 | import "strings" 13 | import "unicode" 14 | import "sort" 15 | 16 | // The mapping function is called once for each piece of the input. 17 | // In this framework, the key is the name of the file that is being processed, 18 | // and the value is the file's contents. The return value should be a slice of 19 | // key/value pairs, each represented by a mr.KeyValue. 20 | func Map(document string, value string) (res []mr.KeyValue) { 21 | m := make(map[string]bool) 22 | words := strings.FieldsFunc(value, func(x rune) bool { return !unicode.IsLetter(x) }) 23 | for _, w := range words { 24 | m[w] = true 25 | } 26 | for w := range m { 27 | kv := mr.KeyValue{w, document} 28 | res = append(res, kv) 29 | } 30 | return 31 | } 32 | 33 | // The reduce function is called once for each key generated by Map, with a 34 | // list of that key's string value (merged across all inputs). The return value 35 | // should be a single output value for that key. 36 | func Reduce(key string, values []string) string { 37 | sort.Strings(values) 38 | return fmt.Sprintf("%d %s", len(values), strings.Join(values, ",")) 39 | } 40 | -------------------------------------------------------------------------------- /src/mrapps/wc.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // a word-count application "plugin" for MapReduce. 5 | // 6 | // go build -buildmode=plugin wc.go 7 | // 8 | 9 | import "6.824/mr" 10 | import "unicode" 11 | import "strings" 12 | import "strconv" 13 | 14 | // 15 | // The map function is called once for each file of input. The first 16 | // argument is the name of the input file, and the second is the 17 | // file's complete contents. You should ignore the input file name, 18 | // and look only at the contents argument. The return value is a slice 19 | // of key/value pairs. 20 | // 21 | func Map(filename string, contents string) []mr.KeyValue { 22 | // function to detect word separators. 23 | ff := func(r rune) bool { return !unicode.IsLetter(r) } 24 | 25 | // split contents into an array of words. 26 | words := strings.FieldsFunc(contents, ff) 27 | 28 | kva := []mr.KeyValue{} 29 | for _, w := range words { 30 | kv := mr.KeyValue{w, "1"} 31 | kva = append(kva, kv) 32 | } 33 | return kva 34 | } 35 | 36 | // 37 | // The reduce function is called once for each key generated by the 38 | // map tasks, with a list of all the values created for that key by 39 | // any map task. 40 | // 41 | func Reduce(key string, values []string) string { 42 | // return the number of occurrences of this word. 43 | return strconv.Itoa(len(values)) 44 | } 45 | -------------------------------------------------------------------------------- /src/main/mrworker.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // start a worker process, which is implemented 5 | // in ../mr/worker.go. typically there will be 6 | // multiple worker processes, talking to one coordinator. 7 | // 8 | // go run mrworker.go wc.so 9 | // 10 | // Please do not change this file. 11 | // 12 | 13 | import "6.824/mr" 14 | import "plugin" 15 | import "os" 16 | import "fmt" 17 | import "log" 18 | 19 | func main() { 20 | if len(os.Args) != 2 { 21 | fmt.Fprintf(os.Stderr, "Usage: mrworker xxx.so\n") 22 | os.Exit(1) 23 | } 24 | 25 | mapf, reducef := loadPlugin(os.Args[1]) 26 | 27 | mr.Worker(mapf, reducef) 28 | } 29 | 30 | // 31 | // load the application Map and Reduce functions 32 | // from a plugin file, e.g. ../mrapps/wc.so 33 | // 34 | func loadPlugin(filename string) (func(string, string) []mr.KeyValue, func(string, []string) string) { 35 | p, err := plugin.Open(filename) 36 | if err != nil { 37 | log.Fatalf("cannot load plugin %v", filename) 38 | } 39 | xmapf, err := p.Lookup("Map") 40 | if err != nil { 41 | log.Fatalf("cannot find Map in %v", filename) 42 | } 43 | mapf := xmapf.(func(string, string) []mr.KeyValue) 44 | xreducef, err := p.Lookup("Reduce") 45 | if err != nil { 46 | log.Fatalf("cannot find Reduce in %v", filename) 47 | } 48 | reducef := xreducef.(func(string, []string) string) 49 | 50 | return mapf, reducef 51 | } 52 | -------------------------------------------------------------------------------- /src/shardctrler/common.go: -------------------------------------------------------------------------------- 1 | package shardctrler 2 | 3 | import "time" 4 | 5 | const ( 6 | OK = "OK" 7 | ErrWrongLeader = "ErrWrongLeader" 8 | ErrOpt = "ErrOpt" 9 | ErrTimeout = "ErrTimeout" 10 | ) 11 | 12 | type Err string 13 | 14 | type OpType string 15 | 16 | const ( 17 | OpJoin OpType = "join" 18 | OpLeave OpType = "leave" 19 | OpMove OpType = "move" 20 | OpQuery OpType = "query" 21 | ) 22 | 23 | type CommandArgs struct { 24 | Op OpType 25 | ClientId int64 26 | SeqId int64 27 | Servers map[int][]string // for Join 28 | GIDs []int // for Leave 29 | Shard int // for Move 30 | GID int // for Move 31 | Num int // for Query 32 | } 33 | 34 | type CommandReply struct { 35 | Err Err 36 | Config Config 37 | } 38 | 39 | type Op CommandArgs 40 | 41 | type OpResp struct { 42 | Err Err 43 | Config Config 44 | } 45 | 46 | type OpContext struct { 47 | SeqId int64 48 | Reply OpResp 49 | } 50 | 51 | type IndexAndTerm struct { 52 | index int 53 | term int 54 | } 55 | 56 | const ( 57 | retry_timeout time.Duration = time.Duration(1) * time.Millisecond 58 | cmd_timeout time.Duration = time.Duration(2) * time.Second 59 | gap_time time.Duration = time.Duration(5) * time.Millisecond 60 | snapshot_gap_time time.Duration = time.Duration(10) * time.Millisecond 61 | ) 62 | -------------------------------------------------------------------------------- /src/kvraft/common.go: -------------------------------------------------------------------------------- 1 | package kvraft 2 | 3 | import "time" 4 | 5 | const ( 6 | OK = "OK" 7 | ErrNoKey = "ErrNoKey" 8 | ErrWrongLeader = "ErrWrongLeader" 9 | ErrTimeout = "ErrTimeout" 10 | ErrTimeoutReq = "ErrTimeoutReq" 11 | ) 12 | 13 | type Err string 14 | 15 | // Put or Append 16 | type CmdArgs struct { 17 | OpType OPType 18 | Key string 19 | Value string 20 | ClientId int64 21 | SeqId int64 22 | } 23 | 24 | type CmdReply struct { 25 | Err Err 26 | Value string 27 | } 28 | 29 | type OPType string 30 | 31 | const ( 32 | OpGet OPType = "Get" 33 | OpPut OPType = "Put" 34 | OpAppend OPType = "Append" 35 | ) 36 | 37 | type Op struct { 38 | // Your definitions here. 39 | // Field names must start with capital letters, 40 | // otherwise RPC will break. 41 | OpType OPType 42 | Key string 43 | Value string 44 | ClientId int64 45 | SeqId int64 46 | } 47 | 48 | type OpResp struct { 49 | Err Err 50 | Value string 51 | } 52 | 53 | type OpContext struct { 54 | SeqId int64 55 | Reply OpResp 56 | } 57 | 58 | type IndexAndTerm struct { 59 | index int 60 | term int 61 | } 62 | 63 | const ( 64 | retry_timeout time.Duration = time.Duration(1) * time.Millisecond 65 | cmd_timeout time.Duration = time.Duration(2) * time.Second 66 | gap_time time.Duration = time.Duration(5) * time.Millisecond 67 | snapshot_gap_time time.Duration = time.Duration(10) * time.Millisecond 68 | ) 69 | -------------------------------------------------------------------------------- /src/mrapps/crash.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // a MapReduce pseudo-application that sometimes crashes, 5 | // and sometimes takes a long time, 6 | // to test MapReduce's ability to recover. 7 | // 8 | // go build -buildmode=plugin crash.go 9 | // 10 | 11 | import "6.824/mr" 12 | import crand "crypto/rand" 13 | import "math/big" 14 | import "strings" 15 | import "os" 16 | import "sort" 17 | import "strconv" 18 | import "time" 19 | 20 | func maybeCrash() { 21 | max := big.NewInt(1000) 22 | rr, _ := crand.Int(crand.Reader, max) 23 | if rr.Int64() < 330 { 24 | // crash! 25 | os.Exit(1) 26 | } else if rr.Int64() < 660 { 27 | // delay for a while. 28 | maxms := big.NewInt(10 * 1000) 29 | ms, _ := crand.Int(crand.Reader, maxms) 30 | time.Sleep(time.Duration(ms.Int64()) * time.Millisecond) 31 | } 32 | } 33 | 34 | func Map(filename string, contents string) []mr.KeyValue { 35 | maybeCrash() 36 | 37 | kva := []mr.KeyValue{} 38 | kva = append(kva, mr.KeyValue{"a", filename}) 39 | kva = append(kva, mr.KeyValue{"b", strconv.Itoa(len(filename))}) 40 | kva = append(kva, mr.KeyValue{"c", strconv.Itoa(len(contents))}) 41 | kva = append(kva, mr.KeyValue{"d", "xyzzy"}) 42 | return kva 43 | } 44 | 45 | func Reduce(key string, values []string) string { 46 | maybeCrash() 47 | 48 | // sort values to ensure deterministic output. 49 | vv := make([]string, len(values)) 50 | copy(vv, values) 51 | sort.Strings(vv) 52 | 53 | val := strings.Join(vv, " ") 54 | return val 55 | } 56 | -------------------------------------------------------------------------------- /src/raft/snapshot.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import "6.824/utils" 4 | 5 | func (rf *Raft) doInstallSnapshot(peer int) { 6 | rf.mu.Lock() 7 | if rf.status != leader { 8 | utils.Debug(utils.DWarn, "S%d status change, it is not leader", rf.me) 9 | rf.mu.Unlock() 10 | return 11 | } 12 | args := InstallSnapshotArgs{ 13 | Term: rf.currentTerm, 14 | LeaderId: rf.me, 15 | LastIncludedIndex: rf.frontLog().Index, 16 | LastIncludedTerm: rf.frontLog().Term, 17 | } 18 | 19 | args.Data = make([]byte, rf.persister.SnapshotSize()) 20 | copy(args.Data, rf.persister.ReadSnapshot()) 21 | rf.mu.Unlock() 22 | 23 | reply := InstallSnapshotReply{} 24 | 25 | ok := rf.sendInstallSnapshot(peer, &args, &reply) 26 | if !ok { 27 | return 28 | } 29 | 30 | rf.mu.Lock() 31 | defer rf.mu.Unlock() 32 | 33 | // status changed or outdue data, ignore 34 | if rf.currentTerm != args.Term || rf.status != leader || reply.Term < rf.currentTerm { 35 | // overdue, ignore 36 | utils.Debug(utils.DInfo, "S%d old response from C%d, ignore it", rf.me, peer) 37 | return 38 | } 39 | 40 | if reply.Term > rf.currentTerm { 41 | utils.Debug(utils.DTerm, "S%d S%d term larger(%d > %d)", rf.me, peer, args.Term, rf.currentTerm) 42 | rf.currentTerm, rf.votedFor = reply.Term, voted_nil 43 | rf.persist() 44 | rf.TurnTo(follower) 45 | return 46 | } 47 | 48 | rf.nextIndex[peer] = args.LastIncludedIndex + 1 49 | 50 | utils.Debug(utils.DInfo, "S%d send snapshot to C%d success!", rf.me, peer) 51 | } 52 | -------------------------------------------------------------------------------- /src/raft/status.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import "6.824/utils" 4 | 5 | type ServerStatus string 6 | 7 | const ( 8 | follower ServerStatus = "Follower" 9 | candidate ServerStatus = "Candidate" 10 | leader ServerStatus = "Leader" 11 | ) 12 | 13 | // return currentTerm and whether this server 14 | // believes it is the leader. 15 | func (rf *Raft) GetState() (int, bool) { 16 | // Your code here (2A). 17 | rf.mu.Lock() 18 | defer rf.mu.Unlock() 19 | term := rf.currentTerm 20 | isleader := (rf.status == leader) 21 | return term, isleader 22 | } 23 | 24 | // without lock 25 | // if have a new goroutine, must lock it !!! 26 | func (rf *Raft) TurnTo(status ServerStatus) { 27 | switch status { 28 | case follower: 29 | rf.status = follower 30 | utils.Debug(utils.DTerm, "S%d converting to %v in T(%d)", rf.me, rf.status, rf.currentTerm) 31 | case candidate: 32 | // • Increment currentTerm 33 | rf.currentTerm++ 34 | // • Vote for self 35 | rf.votedFor = rf.me 36 | rf.persist() 37 | rf.status = candidate 38 | utils.Debug(utils.DTerm, "S%d converting to %v in T(%d)", rf.me, rf.status, rf.currentTerm) 39 | case leader: 40 | rf.status = leader 41 | rf.leaderInit() 42 | // print before sending heartbeat 43 | utils.Debug(utils.DTerm, "S%d converting to %v in T(%d)", rf.me, rf.status, rf.currentTerm) 44 | // Upon election: send initial empty AppendEntries RPCs (heartbeat) to each server; 45 | // repeat during idle periods to prevent election timeouts (§5.2) 46 | rf.doAppendEntries() 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/porcupine/porcupine.go: -------------------------------------------------------------------------------- 1 | package porcupine 2 | 3 | import "time" 4 | 5 | func CheckOperations(model Model, history []Operation) bool { 6 | res, _ := checkOperations(model, history, false, 0) 7 | return res == Ok 8 | } 9 | 10 | // timeout = 0 means no timeout 11 | // if this operation times out, then a false positive is possible 12 | func CheckOperationsTimeout(model Model, history []Operation, timeout time.Duration) CheckResult { 13 | res, _ := checkOperations(model, history, false, timeout) 14 | return res 15 | } 16 | 17 | // timeout = 0 means no timeout 18 | // if this operation times out, then a false positive is possible 19 | func CheckOperationsVerbose(model Model, history []Operation, timeout time.Duration) (CheckResult, linearizationInfo) { 20 | return checkOperations(model, history, true, timeout) 21 | } 22 | 23 | func CheckEvents(model Model, history []Event) bool { 24 | res, _ := checkEvents(model, history, false, 0) 25 | return res == Ok 26 | } 27 | 28 | // timeout = 0 means no timeout 29 | // if this operation times out, then a false positive is possible 30 | func CheckEventsTimeout(model Model, history []Event, timeout time.Duration) CheckResult { 31 | res, _ := checkEvents(model, history, false, timeout) 32 | return res 33 | } 34 | 35 | // timeout = 0 means no timeout 36 | // if this operation times out, then a false positive is possible 37 | func CheckEventsVerbose(model Model, history []Event, timeout time.Duration) (CheckResult, linearizationInfo) { 38 | return checkEvents(model, history, true, timeout) 39 | } 40 | -------------------------------------------------------------------------------- /src/raft/election.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import "6.824/utils" 4 | 5 | // ticker() call doElection(), ticker() hold lock 6 | func (rf *Raft) doElection() { 7 | votedcount := 1 8 | entry := rf.lastLog() 9 | args := RequestVoteArgs{ 10 | Term: rf.currentTerm, 11 | CandidateId: rf.me, 12 | LastLogIndex: entry.Index, 13 | LastLogTerm: entry.Term, 14 | } 15 | 16 | for i := 0; i < len(rf.peers); i++ { 17 | if i == rf.me { 18 | continue 19 | } 20 | 21 | go func(i int) { 22 | reply := RequestVoteReply{} 23 | ok := rf.sendRequestVote(i, &args, &reply) 24 | if !ok { 25 | return 26 | } 27 | 28 | rf.mu.Lock() 29 | defer rf.mu.Unlock() 30 | 31 | if rf.currentTerm != args.Term || rf.status != candidate { 32 | // election timeout, re-election 33 | // ignore it 34 | return 35 | } 36 | 37 | // If RPC request or response contains term T > currentTerm: 38 | // set currentTerm = T, convert to follower (§5.1) 39 | if reply.Term > rf.currentTerm { 40 | utils.Debug(utils.DTerm, "S%d S%d term larger(%d > %d)", rf.me, i, args.Term, rf.currentTerm) 41 | // turn to follower 42 | rf.currentTerm, rf.votedFor = reply.Term, voted_nil 43 | rf.persist() 44 | rf.TurnTo(follower) 45 | return 46 | } 47 | 48 | if reply.VoteGranted { 49 | votedcount++ 50 | // If votes received from majority of servers: become leader 51 | if votedcount > len(rf.peers)/2 && rf.status == candidate { 52 | rf.TurnTo(leader) 53 | } 54 | } 55 | }(i) 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/porcupine/bitset.go: -------------------------------------------------------------------------------- 1 | package porcupine 2 | 3 | import "math/bits" 4 | 5 | type bitset []uint64 6 | 7 | // data layout: 8 | // bits 0-63 are in data[0], the next are in data[1], etc. 9 | 10 | func newBitset(bits uint) bitset { 11 | extra := uint(0) 12 | if bits%64 != 0 { 13 | extra = 1 14 | } 15 | chunks := bits/64 + extra 16 | return bitset(make([]uint64, chunks)) 17 | } 18 | 19 | func (b bitset) clone() bitset { 20 | dataCopy := make([]uint64, len(b)) 21 | copy(dataCopy, b) 22 | return bitset(dataCopy) 23 | } 24 | 25 | func bitsetIndex(pos uint) (uint, uint) { 26 | return pos / 64, pos % 64 27 | } 28 | 29 | func (b bitset) set(pos uint) bitset { 30 | major, minor := bitsetIndex(pos) 31 | b[major] |= (1 << minor) 32 | return b 33 | } 34 | 35 | func (b bitset) clear(pos uint) bitset { 36 | major, minor := bitsetIndex(pos) 37 | b[major] &^= (1 << minor) 38 | return b 39 | } 40 | 41 | func (b bitset) get(pos uint) bool { 42 | major, minor := bitsetIndex(pos) 43 | return b[major]&(1< kv.lastSnapshot + snapshotLogGap { 19 | kv.doSnapshot(kv.lastApplied) 20 | kv.lastSnapshot = kv.lastApplied 21 | } 22 | kv.mu.Unlock() 23 | time.Sleep(snapshot_gap_time) 24 | } 25 | } 26 | 27 | func (kv *KVServer) isNeedSnapshot() bool { 28 | if kv.maxraftstate != -1 && kv.rf.RaftPersistSize() > int(threshold*float32(kv.maxraftstate)) { 29 | return true 30 | } 31 | return false 32 | } 33 | 34 | func (kv *KVServer) doSnapshot(commandIndex int) { 35 | utils.Debug(utils.DServer, "S%d doSnapshot", kv.me) 36 | w := new(bytes.Buffer) 37 | e := labgob.NewEncoder(w) 38 | if e.Encode(*kv.KvMap) != nil || 39 | e.Encode(kv.LastCmdContext) != nil { 40 | panic("server doSnapshot encode error") 41 | } 42 | kv.rf.Snapshot(commandIndex, w.Bytes()) 43 | } 44 | 45 | func (kv *KVServer) setSnapshot(snapshot []byte) { 46 | if snapshot == nil || len(snapshot) < 1 { // bootstrap without any state? 47 | return 48 | } 49 | 50 | utils.Debug(utils.DServer, "S%d setSnapshot", kv.me) 51 | r := bytes.NewBuffer(snapshot) 52 | d := labgob.NewDecoder(r) 53 | 54 | var kvMap KV 55 | var lastCmdContext map[int64]OpContext 56 | 57 | if d.Decode(&kvMap) != nil || 58 | d.Decode(&lastCmdContext) != nil { 59 | log.Fatalf("server setSnapshot decode error\n") 60 | } else { 61 | kv.KvMap = &kvMap 62 | kv.LastCmdContext = lastCmdContext 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/raft/apply.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import "6.824/utils" 4 | 5 | // 6 | // as each Raft peer becomes aware that successive log entries are 7 | // committed, the peer should send an ApplyMsg to the service (or 8 | // tester) on the same server, via the applyCh passed to Make(). set 9 | // CommandValid to true to indicate that the ApplyMsg contains a newly 10 | // committed log entry. 11 | // 12 | // in part 2D you'll want to send other kinds of messages (e.g., 13 | // snapshots) on the applyCh, but set CommandValid to false for these 14 | // other uses. 15 | // 16 | type ApplyMsg struct { 17 | CommandValid bool 18 | Command interface{} 19 | CommandIndex int 20 | CommandTerm int 21 | 22 | // For 2D: 23 | SnapshotValid bool 24 | Snapshot []byte 25 | SnapshotTerm int 26 | SnapshotIndex int 27 | } 28 | 29 | // a new goroutine to run it 30 | func (rf *Raft) applyLog() { 31 | for rf.killed() == false { 32 | rf.mu.Lock() 33 | for rf.lastApplied >= rf.commitIndex { 34 | rf.applyCond.Wait() 35 | } 36 | commitIndex := rf.commitIndex 37 | commit, _ := rf.transfer(rf.commitIndex) 38 | applied, _ := rf.transfer(rf.lastApplied) 39 | entries := make([]Entry, commit-applied) 40 | copy(entries, rf.log[applied+1:commit+1]) 41 | rf.mu.Unlock() 42 | 43 | for _, entry := range entries { 44 | rf.applyCh <- ApplyMsg{ 45 | CommandValid: true, 46 | Command: entry.Cmd, 47 | CommandIndex: entry.Index, 48 | CommandTerm: entry.Term, 49 | } 50 | } 51 | 52 | rf.mu.Lock() 53 | utils.Debug(utils.DCommit, "S%d apply %v - %v", rf.me, rf.lastApplied, commitIndex) 54 | if commitIndex > rf.lastApplied { 55 | rf.lastApplied = commitIndex 56 | } 57 | rf.mu.Unlock() 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/shardctrler/apply.go: -------------------------------------------------------------------------------- 1 | package shardctrler 2 | 3 | import ( 4 | "time" 5 | ) 6 | 7 | func (sc *ShardCtrler) applier() { 8 | for sc.killed() == false { 9 | select { 10 | case msg := <-sc.applyCh: 11 | Debug(dServer, "S%d apply msg: %+v", sc.me, msg) 12 | if msg.CommandValid { 13 | sc.mu.Lock() 14 | 15 | if msg.CommandIndex <= sc.lastApplied { 16 | Debug(dWarn, "S%d out time apply(%d <= %d): %+v", sc.me, msg.CommandIndex, sc.lastApplied, msg) 17 | sc.mu.Unlock() 18 | continue 19 | } 20 | sc.lastApplied = msg.CommandIndex 21 | 22 | var resp OpResp 23 | cmd := msg.Command.(Op) 24 | 25 | if cmd.Op != OpQuery && sc.isDuplicate(cmd.ClientId, cmd.SeqId) { 26 | context := sc.LastCmdContext[cmd.ClientId] 27 | resp = context.Reply 28 | } else { 29 | resp.Config, resp.Err = sc.configs.Opt(cmd) 30 | sc.LastCmdContext[cmd.ClientId] = OpContext{ 31 | SeqId: cmd.SeqId, 32 | Reply: resp, 33 | } 34 | } 35 | 36 | term, isLeader := sc.rf.GetState() 37 | 38 | if !isLeader || term != msg.CommandTerm { 39 | sc.mu.Unlock() 40 | continue 41 | } 42 | 43 | it := IndexAndTerm{msg.CommandIndex, term} 44 | ch, ok := sc.cmdRespChans[it] 45 | if ok { 46 | select { 47 | case ch <- resp: 48 | case <-time.After(10 * time.Millisecond): 49 | } 50 | } 51 | 52 | sc.mu.Unlock() 53 | } else { 54 | // ignore 55 | } 56 | default: 57 | time.Sleep(gap_time) 58 | } 59 | } 60 | } 61 | 62 | func (sc *ShardCtrler) isDuplicate(clientId int64, seqId int64) bool { 63 | context, ok := sc.LastCmdContext[clientId] 64 | if !ok { 65 | return false 66 | } 67 | if seqId <= context.SeqId { 68 | return true 69 | } 70 | return false 71 | } 72 | -------------------------------------------------------------------------------- /src/shardkv/commandHandler.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | import "6.824/raft" 4 | 5 | // Handler 6 | func (kv *ShardKV) Command(args *CmdArgs, reply *CmdReply) { 7 | defer Debug(dTrace, "G%+v {S%+v} args: %+v reply: %+v", kv.gid, kv.me, args, reply) 8 | 9 | kv.mu.Lock() 10 | shardID := key2shard(args.Key) 11 | if !kv.canServe(shardID) { 12 | Debug(dWarn, "G%+v {S%+v} shard %d is %+v, can't servering(%+v)", kv.gid, kv.me, shardID, kv.shards[shardID], kv.currentConfig.Shards[shardID]) 13 | reply.Err = ErrWrongGroup 14 | kv.mu.Unlock() 15 | return 16 | } 17 | if args.OpType != OpGet && kv.isDuplicate(shardID, args.ClientId, args.SeqId) { 18 | context := kv.shards[shardID].LastCmdContext[args.ClientId] 19 | reply.Value, reply.Err = context.Reply.Value, context.Reply.Err 20 | kv.mu.Unlock() 21 | return 22 | } 23 | kv.mu.Unlock() 24 | 25 | var resp OpResp 26 | kv.Execute(NewOperationCommand(args), &resp) 27 | reply.Value, reply.Err = resp.Value, resp.Err 28 | } 29 | 30 | func (kv *ShardKV) canServe(shardID int) bool { 31 | return kv.currentConfig.Shards[shardID] == kv.gid && (kv.shards[shardID].Status == Serving || kv.shards[shardID].Status == GCing) 32 | } 33 | 34 | func (kv *ShardKV) applyOperation(msg *raft.ApplyMsg, cmd *CmdArgs) *OpResp { 35 | shardID := key2shard(cmd.Key) 36 | if kv.canServe(shardID) { 37 | if cmd.OpType != OpGet && kv.isDuplicate(shardID, cmd.ClientId, cmd.SeqId) { 38 | context := kv.shards[shardID].LastCmdContext[cmd.ClientId] 39 | return &context.Reply 40 | } else { 41 | var resp OpResp 42 | resp.Value, resp.Err = kv.Opt(cmd, shardID) 43 | kv.shards[shardID].LastCmdContext[cmd.ClientId] = OpContext{ 44 | SeqId: cmd.SeqId, 45 | Reply: resp, 46 | } 47 | return &resp 48 | } 49 | } 50 | return &OpResp{ErrWrongGroup, ""} 51 | } 52 | -------------------------------------------------------------------------------- /src/shardkv/gc.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | import "sync" 4 | 5 | func (kv *ShardKV) gcAction() { 6 | kv.mu.Lock() 7 | gid2shardIDs := kv.getShardIDsByStatus(GCing, &kv.lastConfig) 8 | var wg sync.WaitGroup 9 | for gid, shardIDs := range gid2shardIDs { 10 | wg.Add(1) 11 | servers := kv.lastConfig.Groups[gid] 12 | go func(servers []string, configNum int, shardIDs []int) { 13 | defer wg.Done() 14 | args := PullDataArgs{configNum, shardIDs} 15 | for _, server := range servers { 16 | var reply PullDataReply 17 | srv := kv.makeEnd(server) 18 | if srv.Call("ShardKV.DeleteShardsData", &args, &reply) && reply.Err == OK { 19 | kv.Execute(NewDeleteShardsCommand(&args), &OpResp{}) 20 | } 21 | } 22 | }(servers, kv.currentConfig.Num, shardIDs) 23 | } 24 | kv.mu.Unlock() 25 | wg.Wait() 26 | } 27 | 28 | func (kv *ShardKV) DeleteShardsData(args *PullDataArgs, reply *PullDataReply) { 29 | // only delete shards when role is leader 30 | if _, isLeader := kv.rf.GetState(); !isLeader { 31 | reply.Err = ErrWrongLeader 32 | return 33 | } 34 | 35 | kv.mu.Lock() 36 | if kv.currentConfig.Num > args.ConfNum { 37 | reply.Err = OK 38 | kv.mu.Unlock() 39 | return 40 | } 41 | kv.mu.Unlock() 42 | 43 | var resp OpResp 44 | kv.Execute(NewDeleteShardsCommand(args), &resp) 45 | 46 | reply.Err = resp.Err 47 | } 48 | 49 | func (kv *ShardKV) applyDeleteShards(shardsInfo *PullDataArgs) *OpResp { 50 | if shardsInfo.ConfNum == kv.currentConfig.Num { 51 | for _, shardId := range shardsInfo.ShardIds { 52 | shard := kv.shards[shardId] 53 | if shard.Status == GCing { 54 | shard.Status = Serving 55 | } else if shard.Status == BePulling { 56 | kv.shards[shardId] = NewShard(Serving) 57 | } else { 58 | break 59 | } 60 | } 61 | return &OpResp{OK, ""} 62 | } 63 | return &OpResp{OK, ""} 64 | } 65 | -------------------------------------------------------------------------------- /src/models/kv.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import "6.824/porcupine" 4 | import "fmt" 5 | import "sort" 6 | 7 | type KvInput struct { 8 | Op uint8 // 0 => get, 1 => put, 2 => append 9 | Key string 10 | Value string 11 | } 12 | 13 | type KvOutput struct { 14 | Value string 15 | } 16 | 17 | var KvModel = porcupine.Model{ 18 | Partition: func(history []porcupine.Operation) [][]porcupine.Operation { 19 | m := make(map[string][]porcupine.Operation) 20 | for _, v := range history { 21 | key := v.Input.(KvInput).Key 22 | m[key] = append(m[key], v) 23 | } 24 | keys := make([]string, 0, len(m)) 25 | for k := range m { 26 | keys = append(keys, k) 27 | } 28 | sort.Strings(keys) 29 | ret := make([][]porcupine.Operation, 0, len(keys)) 30 | for _, k := range keys { 31 | ret = append(ret, m[k]) 32 | } 33 | return ret 34 | }, 35 | Init: func() interface{} { 36 | // note: we are modeling a single key's value here; 37 | // we're partitioning by key, so this is okay 38 | return "" 39 | }, 40 | Step: func(state, input, output interface{}) (bool, interface{}) { 41 | inp := input.(KvInput) 42 | out := output.(KvOutput) 43 | st := state.(string) 44 | if inp.Op == 0 { 45 | // get 46 | return out.Value == st, state 47 | } else if inp.Op == 1 { 48 | // put 49 | return true, inp.Value 50 | } else { 51 | // append 52 | return true, (st + inp.Value) 53 | } 54 | }, 55 | DescribeOperation: func(input, output interface{}) string { 56 | inp := input.(KvInput) 57 | out := output.(KvOutput) 58 | switch inp.Op { 59 | case 0: 60 | return fmt.Sprintf("get('%s') -> '%s'", inp.Key, out.Value) 61 | case 1: 62 | return fmt.Sprintf("put('%s', '%s')", inp.Key, inp.Value) 63 | case 2: 64 | return fmt.Sprintf("append('%s', '%s')", inp.Key, inp.Value) 65 | default: 66 | return "" 67 | } 68 | }, 69 | } 70 | -------------------------------------------------------------------------------- /src/raft/ticker.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import ( 4 | "math/rand" 5 | "time" 6 | 7 | "6.824/utils" 8 | ) 9 | 10 | func (rf *Raft) electionTimeout() bool { 11 | return time.Now().After(rf.electionTime) 12 | } 13 | 14 | func (rf *Raft) heartbeatTimeout() bool { 15 | return time.Now().After(rf.heartbeatTime) 16 | } 17 | 18 | func (rf *Raft) resetElectionTime() { 19 | sleep_time := rand.Intn(election_range_time) + election_base_time 20 | rf.electionTime = time.Now().Add(time.Duration(sleep_time) * time.Millisecond) 21 | } 22 | 23 | func (rf *Raft) resetHeartbeatTime() { 24 | rf.heartbeatTime = time.Now().Add(time.Duration(heartbeat_time) * time.Millisecond) 25 | } 26 | 27 | // The ticker go routine starts a new election if this peer hasn't received 28 | // heartsbeats recently. 29 | func (rf *Raft) ticker() { 30 | for rf.killed() == false { 31 | // Your code here to check if a leader election should 32 | // be started and to randomize sleeping time using 33 | // time.Sleep(). 34 | rf.mu.Lock() 35 | switch rf.status { 36 | case follower: 37 | if rf.electionTimeout() { 38 | rf.TurnTo(candidate) 39 | utils.Debug(utils.DTimer, "S%d Election timeout, Start election, T%d", rf.me, rf.currentTerm) 40 | rf.doElection() 41 | rf.resetElectionTime() 42 | } 43 | case candidate: 44 | if rf.electionTimeout() { 45 | rf.TurnTo(candidate) 46 | utils.Debug(utils.DTimer, "S%d Election timeout, re-start election, T%d", rf.me, rf.currentTerm) 47 | rf.doElection() 48 | rf.resetElectionTime() 49 | } 50 | case leader: 51 | if rf.heartbeatTimeout() { 52 | utils.Debug(utils.DTimer, "S%d Heartbeat timeout, send heartbeat boardcast, T%d", rf.me, rf.currentTerm) 53 | rf.doAppendEntries() 54 | rf.resetHeartbeatTime() 55 | } 56 | } 57 | rf.mu.Unlock() 58 | time.Sleep(time.Duration(gap_time) * time.Millisecond) 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/raft/persist.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import ( 4 | "bytes" 5 | 6 | "6.824/labgob" 7 | "6.824/utils" 8 | ) 9 | 10 | func (rf *Raft) RaftPersistSize() int { 11 | rf.mu.Lock() 12 | defer rf.mu.Unlock() 13 | return rf.persister.RaftStateSize() 14 | } 15 | 16 | func (rf *Raft) raftState() []byte { 17 | w := new(bytes.Buffer) 18 | e := labgob.NewEncoder(w) 19 | 20 | if e.Encode(rf.log) != nil || 21 | e.Encode(rf.currentTerm) != nil || 22 | e.Encode(rf.votedFor) != nil { 23 | utils.Debug(utils.DError, "S%d encode fail", rf.me) 24 | panic("encode fail") 25 | } 26 | data := w.Bytes() 27 | return data 28 | } 29 | 30 | // 31 | // save Raft's persistent state to stable storage, 32 | // where it can later be retrieved after a crash and restart. 33 | // see paper's Figure 2 for a description of what should be persistent. 34 | // 35 | func (rf *Raft) persist() { 36 | // Your code here (2C). 37 | rf.persister.SaveRaftState(rf.raftState()) 38 | } 39 | 40 | // 41 | // restore previously persisted state. 42 | // 43 | func (rf *Raft) readPersist(data []byte) { 44 | if data == nil || len(data) < 1 { // bootstrap without any state? 45 | return 46 | } 47 | // Your code here (2C). 48 | r := bytes.NewBuffer(data) 49 | d := labgob.NewDecoder(r) 50 | 51 | var log []Entry 52 | var currentTerm, votedFor int 53 | 54 | if d.Decode(&log) != nil || 55 | d.Decode(¤tTerm) != nil || 56 | d.Decode(&votedFor) != nil { 57 | utils.Debug(utils.DError, "S%d decode fail", rf.me) 58 | panic("encode fail") 59 | } 60 | 61 | // log at least is 1 62 | rf.log = make([]Entry, len(log)) 63 | copy(rf.log, log) 64 | rf.lastApplied = rf.frontLogIndex() 65 | rf.commitIndex = rf.frontLogIndex() 66 | rf.currentTerm = currentTerm 67 | rf.votedFor = votedFor 68 | } 69 | 70 | func (rf *Raft) persistSnapshot(snapshot []byte) { 71 | rf.persister.SaveStateAndSnapshot(rf.raftState(), snapshot) 72 | } 73 | -------------------------------------------------------------------------------- /src/raft/persister.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | // 4 | // support for Raft and kvraft to save persistent 5 | // Raft state (log &c) and k/v server snapshots. 6 | // 7 | // we will use the original persister.go to test your code for grading. 8 | // so, while you can modify this code to help you debug, please 9 | // test with the original before submitting. 10 | // 11 | 12 | import "sync" 13 | 14 | type Persister struct { 15 | mu sync.Mutex 16 | raftstate []byte 17 | snapshot []byte 18 | } 19 | 20 | func MakePersister() *Persister { 21 | return &Persister{} 22 | } 23 | 24 | func clone(orig []byte) []byte { 25 | x := make([]byte, len(orig)) 26 | copy(x, orig) 27 | return x 28 | } 29 | 30 | func (ps *Persister) Copy() *Persister { 31 | ps.mu.Lock() 32 | defer ps.mu.Unlock() 33 | np := MakePersister() 34 | np.raftstate = ps.raftstate 35 | np.snapshot = ps.snapshot 36 | return np 37 | } 38 | 39 | func (ps *Persister) SaveRaftState(state []byte) { 40 | ps.mu.Lock() 41 | defer ps.mu.Unlock() 42 | ps.raftstate = clone(state) 43 | } 44 | 45 | func (ps *Persister) ReadRaftState() []byte { 46 | ps.mu.Lock() 47 | defer ps.mu.Unlock() 48 | return clone(ps.raftstate) 49 | } 50 | 51 | func (ps *Persister) RaftStateSize() int { 52 | ps.mu.Lock() 53 | defer ps.mu.Unlock() 54 | return len(ps.raftstate) 55 | } 56 | 57 | // Save both Raft state and K/V snapshot as a single atomic action, 58 | // to help avoid them getting out of sync. 59 | func (ps *Persister) SaveStateAndSnapshot(state []byte, snapshot []byte) { 60 | ps.mu.Lock() 61 | defer ps.mu.Unlock() 62 | ps.raftstate = clone(state) 63 | ps.snapshot = clone(snapshot) 64 | } 65 | 66 | func (ps *Persister) ReadSnapshot() []byte { 67 | ps.mu.Lock() 68 | defer ps.mu.Unlock() 69 | return clone(ps.snapshot) 70 | } 71 | 72 | func (ps *Persister) SnapshotSize() int { 73 | ps.mu.Lock() 74 | defer ps.mu.Unlock() 75 | return len(ps.snapshot) 76 | } 77 | -------------------------------------------------------------------------------- /src/shardctrler/client.go: -------------------------------------------------------------------------------- 1 | package shardctrler 2 | 3 | // 4 | // Shardctrler clerk. 5 | // 6 | 7 | import ( 8 | "crypto/rand" 9 | "math/big" 10 | "time" 11 | 12 | "6.824/labrpc" 13 | ) 14 | 15 | type Clerk struct { 16 | servers []*labrpc.ClientEnd 17 | // Your data here. 18 | leaderId int 19 | clientId int64 20 | seqId int64 21 | } 22 | 23 | func nrand() int64 { 24 | max := big.NewInt(int64(1) << 62) 25 | bigx, _ := rand.Int(rand.Reader, max) 26 | x := bigx.Int64() 27 | return x 28 | } 29 | 30 | func MakeClerk(servers []*labrpc.ClientEnd) *Clerk { 31 | ck := new(Clerk) 32 | ck.servers = servers 33 | // Your code here. 34 | ck.clientId = nrand() 35 | ck.seqId = 0 36 | ck.leaderId = 0 37 | return ck 38 | } 39 | 40 | func (ck *Clerk) sendCmd(args CommandArgs) CommandReply { 41 | ck.seqId += 1 42 | args.SeqId = ck.seqId 43 | args.ClientId = ck.clientId 44 | 45 | for { 46 | reply := CommandReply{} 47 | 48 | ok := ck.servers[ck.leaderId].Call("ShardCtrler.Command", &args, &reply) 49 | 50 | if !ok { 51 | ck.leaderId = (ck.leaderId + 1) % len(ck.servers) 52 | time.Sleep(retry_timeout) 53 | continue 54 | } 55 | 56 | if reply.Err == OK { 57 | return reply 58 | } 59 | 60 | ck.leaderId = (ck.leaderId + 1) % len(ck.servers) 61 | time.Sleep(retry_timeout) 62 | } 63 | } 64 | 65 | func (ck *Clerk) Query(num int) Config { 66 | args := CommandArgs{ 67 | Op: OpQuery, 68 | Num: num, 69 | } 70 | reply := ck.sendCmd(args) 71 | return reply.Config 72 | } 73 | 74 | func (ck *Clerk) Join(servers map[int][]string) { 75 | args := CommandArgs{ 76 | Op: OpJoin, 77 | Servers: servers, 78 | } 79 | // reply := ck.sendCmd(args) 80 | ck.sendCmd(args) 81 | } 82 | 83 | func (ck *Clerk) Leave(gids []int) { 84 | args := CommandArgs{ 85 | Op: OpLeave, 86 | GIDs: gids, 87 | } 88 | ck.sendCmd(args) 89 | } 90 | 91 | func (ck *Clerk) Move(shard int, gid int) { 92 | args := CommandArgs{ 93 | Op: OpMove, 94 | Shard: shard, 95 | GID: gid, 96 | } 97 | ck.sendCmd(args) 98 | } 99 | -------------------------------------------------------------------------------- /src/kvraft/apply.go: -------------------------------------------------------------------------------- 1 | package kvraft 2 | 3 | import "6.824/utils" 4 | import "time" 5 | 6 | func (kv *KVServer) applier() { 7 | for kv.killed() == false { 8 | select { 9 | case msg := <-kv.applyCh: 10 | utils.Debug(utils.DServer, "S%d apply msg: %+v", kv.me, msg) 11 | if msg.SnapshotValid { 12 | kv.mu.Lock() 13 | if kv.rf.CondInstallSnapshot(msg.SnapshotTerm, msg.SnapshotIndex, msg.Snapshot) { 14 | kv.setSnapshot(msg.Snapshot) 15 | kv.lastApplied = msg.SnapshotIndex 16 | } 17 | kv.mu.Unlock() 18 | } else if msg.CommandValid { 19 | 20 | kv.mu.Lock() 21 | 22 | if msg.CommandIndex <= kv.lastApplied { 23 | utils.Debug(utils.DWarn, "S%d out time apply(%d <= %d): %+v", kv.me, msg.CommandIndex, kv.lastApplied, msg) 24 | kv.mu.Unlock() 25 | continue 26 | } 27 | kv.lastApplied = msg.CommandIndex 28 | 29 | var resp OpResp 30 | cmd := msg.Command.(Op) 31 | 32 | if cmd.OpType != OpGet && kv.isDuplicate(cmd.ClientId, cmd.SeqId) { 33 | context := kv.LastCmdContext[cmd.ClientId] 34 | resp = context.Reply 35 | } else { 36 | resp.Value, resp.Err = kv.Opt(cmd) 37 | kv.LastCmdContext[cmd.ClientId] = OpContext{ 38 | SeqId: cmd.SeqId, 39 | Reply: resp, 40 | } 41 | } 42 | 43 | term, isLeader := kv.rf.GetState() 44 | 45 | if !isLeader || term != msg.CommandTerm { 46 | kv.mu.Unlock() 47 | continue 48 | } 49 | 50 | it := IndexAndTerm{msg.CommandIndex, term} 51 | ch, ok := kv.cmdRespChans[it] 52 | if ok { 53 | select { 54 | case ch <- resp: 55 | case <- time.After(10 * time.Millisecond): 56 | } 57 | } 58 | 59 | kv.mu.Unlock() 60 | } else { 61 | // ignore 62 | } 63 | default: 64 | time.Sleep(gap_time) 65 | } 66 | } 67 | } 68 | 69 | func (kv *KVServer) isDuplicate(clientId int64, seqId int64) bool { 70 | context, ok := kv.LastCmdContext[clientId] 71 | if !ok { 72 | return false 73 | } 74 | if seqId <= context.SeqId { 75 | return true 76 | } 77 | return false 78 | } 79 | -------------------------------------------------------------------------------- /src/raft/electionHandler.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import "6.824/utils" 4 | 5 | // handler need to require lock 6 | func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) { 7 | // Your code here (2A, 2B). 8 | // fmt.Printf("vote request: term %d; %d request to be voted\n", args.Term, args.CandidateId) 9 | rf.mu.Lock() 10 | defer rf.mu.Unlock() 11 | 12 | utils.Debug(utils.DVote, "S%d C%d asking vote", rf.me, args.CandidateId) 13 | 14 | defer rf.persist() 15 | 16 | if args.Term < rf.currentTerm { // ignore 17 | reply.VoteGranted = false 18 | reply.Term = rf.currentTerm 19 | utils.Debug(utils.DVote, "S%d Term is higher than C%d, refuse it", rf.me, args.CandidateId) 20 | return 21 | } 22 | 23 | if args.Term > rf.currentTerm { 24 | // If RPC request or response contains term T > currentTerm: 25 | // set currentTerm = T, convert to follower (§5.1) 26 | rf.currentTerm, rf.votedFor = args.Term, voted_nil 27 | utils.Debug(utils.DVote, "S%d Term is lower than C%d, turn to follower && reset voted_for", rf.me, args.CandidateId) 28 | rf.TurnTo(follower) 29 | // can vote now 30 | } 31 | 32 | if rf.votedFor == voted_nil || rf.votedFor == args.CandidateId { // haven't voted 33 | // log judge 34 | if !rf.isUpToDate(args.LastLogIndex, args.LastLogTerm) { 35 | reply.VoteGranted, reply.Term = false, rf.currentTerm 36 | utils.Debug(utils.DVote, "S%d C%d not up-to-date, refuse it{arg:%+v, index:%d term:%d}", rf.me, args.CandidateId, args, rf.lastLogIndex(), rf.lastLog().Term) 37 | return 38 | } 39 | 40 | rf.votedFor = args.CandidateId 41 | reply.VoteGranted = true 42 | reply.Term = rf.currentTerm 43 | // prevent election timeouts (§5.2) 44 | utils.Debug(utils.DVote, "S%d Granting Vote to S%d at T%d", rf.me, rf.votedFor, rf.currentTerm) 45 | rf.resetElectionTime() 46 | return 47 | } 48 | 49 | // have voted 50 | reply.VoteGranted = false 51 | reply.Term = rf.currentTerm 52 | utils.Debug(utils.DVote, "S%d Have voted to S%d at T%d, refuse S%d", rf.me, rf.votedFor, rf.currentTerm, args.CandidateId) 53 | return 54 | } 55 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # This is the Makefile helping you submit the labs. 2 | # Just create 6.824/api.key with your API key in it, 3 | # and submit your lab with the following command: 4 | # $ make [lab1|lab2a|lab2b|lab2c|lab2d|lab3a|lab3b|lab4a|lab4b] 5 | 6 | LABS=" lab1 lab2a lab2b lab2c lab2d lab3a lab3b lab4a lab4b " 7 | 8 | %: check-% 9 | @echo "Preparing $@-handin.tar.gz" 10 | @if echo $(LABS) | grep -q " $@ " ; then \ 11 | echo "Tarring up your submission..." ; \ 12 | COPYFILE_DISABLE=1 tar cvzf $@-handin.tar.gz \ 13 | "--exclude=src/main/pg-*.txt" \ 14 | "--exclude=src/main/diskvd" \ 15 | "--exclude=src/mapreduce/824-mrinput-*.txt" \ 16 | "--exclude=src/main/mr-*" \ 17 | "--exclude=mrtmp.*" \ 18 | "--exclude=src/main/diff.out" \ 19 | "--exclude=src/main/mrmaster" \ 20 | "--exclude=src/main/mrsequential" \ 21 | "--exclude=src/main/mrworker" \ 22 | "--exclude=*.so" \ 23 | Makefile src; \ 24 | if ! test -e api.key ; then \ 25 | echo "Missing $(PWD)/api.key. Please create the file with your key in it or submit the $@-handin.tar.gz via the web interface."; \ 26 | else \ 27 | echo "Are you sure you want to submit $@? Enter 'yes' to continue:"; \ 28 | read line; \ 29 | if test "$$line" != "yes" ; then echo "Giving up submission"; exit; fi; \ 30 | if test `stat -c "%s" "$@-handin.tar.gz" 2>/dev/null || stat -f "%z" "$@-handin.tar.gz"` -ge 20971520 ; then echo "File exceeds 20MB."; exit; fi; \ 31 | cat api.key | tr -d '\n' > .api.key.trimmed ; \ 32 | curl --silent --fail --show-error -F file=@$@-handin.tar.gz -F "key=<.api.key.trimmed" \ 33 | https://6824.scripts.mit.edu/2021/handin.py/upload > /dev/null || { \ 34 | echo ; \ 35 | echo "Submit seems to have failed."; \ 36 | echo "Please upload the tarball manually on the submission website."; } \ 37 | fi; \ 38 | else \ 39 | echo "Bad target $@. Usage: make [$(LABS)]"; \ 40 | fi 41 | 42 | .PHONY: check-% 43 | check-%: 44 | @echo "Checking that your submission builds correctly..." 45 | @./.check-build git://g.csail.mit.edu/6.824-golabs-2021 $(patsubst check-%,%,$@) 46 | -------------------------------------------------------------------------------- /src/shardkv/apply.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | import ( 4 | "time" 5 | 6 | "6.824/shardctrler" 7 | ) 8 | 9 | func (kv *ShardKV) isDuplicate(shardId int, clientId int64, seqId int64) bool { 10 | context, ok := kv.shards[shardId].LastCmdContext[clientId] 11 | if !ok { 12 | return false 13 | } 14 | if seqId <= context.SeqId { 15 | return true 16 | } 17 | return false 18 | } 19 | 20 | func (kv *ShardKV) applier() { 21 | for kv.killed() == false { 22 | select { 23 | case msg := <-kv.applyCh: 24 | if msg.SnapshotValid { 25 | kv.mu.Lock() 26 | if kv.rf.CondInstallSnapshot(msg.SnapshotTerm, msg.SnapshotIndex, msg.Snapshot) { 27 | kv.setSnapshot(msg.Snapshot) 28 | kv.lastApplied = msg.SnapshotIndex 29 | } 30 | kv.mu.Unlock() 31 | } else if msg.CommandValid { 32 | 33 | kv.mu.Lock() 34 | 35 | if msg.CommandIndex <= kv.lastApplied { 36 | kv.mu.Unlock() 37 | continue 38 | } 39 | kv.lastApplied = msg.CommandIndex 40 | 41 | var resp OpResp 42 | command := msg.Command.(Command) 43 | switch command.Op { 44 | case Operation: 45 | cmd := command.Data.(CmdArgs) 46 | resp = *kv.applyOperation(&msg, &cmd) 47 | case Configuration: 48 | nextConfig := command.Data.(shardctrler.Config) 49 | resp = *kv.applyConfiguration(&nextConfig) 50 | case InsertShards: 51 | insertResp := command.Data.(PullDataReply) 52 | resp = *kv.applyInsertShards(&insertResp) 53 | case DeleteShards: 54 | deleteResp := command.Data.(PullDataArgs) 55 | resp = *kv.applyDeleteShards(&deleteResp) 56 | } 57 | 58 | term, isLeader := kv.rf.GetState() 59 | 60 | if !isLeader || term != msg.CommandTerm { 61 | kv.mu.Unlock() 62 | continue 63 | } 64 | 65 | it := IndexAndTerm{msg.CommandIndex, term} 66 | ch, ok := kv.cmdRespChans[it] 67 | if ok { 68 | select { 69 | case ch <- resp: 70 | case <-time.After(10 * time.Millisecond): 71 | } 72 | } 73 | 74 | kv.mu.Unlock() 75 | } else { 76 | // ignore 77 | } 78 | default: 79 | time.Sleep(gap_time) 80 | } 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /src/shardkv/snapshot.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "log" 7 | "time" 8 | 9 | "6.824/labgob" 10 | "6.824/shardctrler" 11 | ) 12 | 13 | const threshold float32 = 0.8 14 | const snapshotLogGap int = 10 15 | 16 | func (kv *ShardKV) snapshoter() { 17 | for kv.killed() == false { 18 | kv.mu.Lock() 19 | if kv.isNeedSnapshot() { 20 | kv.doSnapshot(kv.lastApplied) 21 | kv.lastSnapshot = kv.lastApplied 22 | } 23 | kv.mu.Unlock() 24 | time.Sleep(snapshot_gap_time) 25 | } 26 | } 27 | 28 | func (kv *ShardKV) isNeedSnapshot() bool { 29 | for _, shard := range kv.shards { 30 | if shard.Status == BePulling { 31 | return false 32 | } 33 | } 34 | 35 | if kv.maxraftstate != -1 { 36 | if kv.rf.RaftPersistSize() > int(threshold*float32(kv.maxraftstate)) || 37 | kv.lastApplied > kv.lastSnapshot+snapshotLogGap { 38 | return true 39 | } 40 | } 41 | return false 42 | } 43 | 44 | func (kv *ShardKV) doSnapshot(commandIndex int) { 45 | w := new(bytes.Buffer) 46 | e := labgob.NewEncoder(w) 47 | if e.Encode(kv.shards) != nil || 48 | e.Encode(kv.lastConfig) != nil || 49 | e.Encode(kv.currentConfig) != nil { 50 | panic("server doSnapshot encode error") 51 | } 52 | kv.rf.Snapshot(commandIndex, w.Bytes()) 53 | } 54 | 55 | func (kv *ShardKV) setSnapshot(snapshot []byte) { 56 | if snapshot == nil || len(snapshot) < 1 { // bootstrap without any state? 57 | return 58 | } 59 | 60 | r := bytes.NewBuffer(snapshot) 61 | d := labgob.NewDecoder(r) 62 | 63 | var shards map[int]*Shard 64 | var lastconfig, currentConfig shardctrler.Config 65 | 66 | if d.Decode(&shards) != nil || 67 | d.Decode(&lastconfig) != nil || 68 | d.Decode(¤tConfig) != nil { 69 | log.Fatalf("server setSnapshot decode error\n") 70 | } else { 71 | var str string 72 | for shardID, shard := range shards { 73 | desc := fmt.Sprintf("[%d : %+v]\n ", shardID, shard) 74 | str += desc 75 | } 76 | Debug(dWarn, "G%+v {S%+v} snapshot read: %+v", kv.gid, kv.me, str) 77 | kv.shards = shards 78 | kv.lastConfig = lastconfig 79 | kv.currentConfig = currentConfig 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/main/diskvd.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // start a diskvd server. it's a member of some replica 5 | // group, which has other members, and it needs to know 6 | // how to talk to the members of the shardmaster service. 7 | // used by ../diskv/test_test.go 8 | // 9 | // arguments: 10 | // -g groupid 11 | // -m masterport1 -m masterport2 ... 12 | // -s replicaport1 -s replicaport2 ... 13 | // -i my-index-in-server-port-list 14 | // -u unreliable 15 | // -d directory 16 | // -r restart 17 | 18 | import "time" 19 | import "6.824/diskv" 20 | import "os" 21 | import "fmt" 22 | import "strconv" 23 | import "runtime" 24 | 25 | func usage() { 26 | fmt.Printf("Usage: diskvd -g gid -m master... -s server... -i my-index -d dir\n") 27 | os.Exit(1) 28 | } 29 | 30 | func main() { 31 | var gid int64 = -1 // my replica group ID 32 | masters := []string{} // ports of shardmasters 33 | replicas := []string{} // ports of servers in my replica group 34 | me := -1 // my index in replicas[] 35 | unreliable := false 36 | dir := "" // store persistent data here 37 | restart := false 38 | 39 | for i := 1; i+1 < len(os.Args); i += 2 { 40 | a0 := os.Args[i] 41 | a1 := os.Args[i+1] 42 | if a0 == "-g" { 43 | gid, _ = strconv.ParseInt(a1, 10, 64) 44 | } else if a0 == "-m" { 45 | masters = append(masters, a1) 46 | } else if a0 == "-s" { 47 | replicas = append(replicas, a1) 48 | } else if a0 == "-i" { 49 | me, _ = strconv.Atoi(a1) 50 | } else if a0 == "-u" { 51 | unreliable, _ = strconv.ParseBool(a1) 52 | } else if a0 == "-d" { 53 | dir = a1 54 | } else if a0 == "-r" { 55 | restart, _ = strconv.ParseBool(a1) 56 | } else { 57 | usage() 58 | } 59 | } 60 | 61 | if gid < 0 || me < 0 || len(masters) < 1 || me >= len(replicas) || dir == "" { 62 | usage() 63 | } 64 | 65 | runtime.GOMAXPROCS(4) 66 | 67 | srv := diskv.StartServer(gid, masters, replicas, me, dir, restart) 68 | srv.Setunreliable(unreliable) 69 | 70 | // for safety, force quit after 10 minutes. 71 | time.Sleep(10 * 60 * time.Second) 72 | mep, _ := os.FindProcess(os.Getpid()) 73 | mep.Kill() 74 | } 75 | -------------------------------------------------------------------------------- /src/mrapps/rtiming.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // a MapReduce pseudo-application to test that workers 5 | // execute reduce tasks in parallel. 6 | // 7 | // go build -buildmode=plugin rtiming.go 8 | // 9 | 10 | import "6.824/mr" 11 | import "fmt" 12 | import "os" 13 | import "syscall" 14 | import "time" 15 | import "io/ioutil" 16 | 17 | func nparallel(phase string) int { 18 | // create a file so that other workers will see that 19 | // we're running at the same time as them. 20 | pid := os.Getpid() 21 | myfilename := fmt.Sprintf("mr-worker-%s-%d", phase, pid) 22 | err := ioutil.WriteFile(myfilename, []byte("x"), 0666) 23 | if err != nil { 24 | panic(err) 25 | } 26 | 27 | // are any other workers running? 28 | // find their PIDs by scanning directory for mr-worker-XXX files. 29 | dd, err := os.Open(".") 30 | if err != nil { 31 | panic(err) 32 | } 33 | names, err := dd.Readdirnames(1000000) 34 | if err != nil { 35 | panic(err) 36 | } 37 | ret := 0 38 | for _, name := range names { 39 | var xpid int 40 | pat := fmt.Sprintf("mr-worker-%s-%%d", phase) 41 | n, err := fmt.Sscanf(name, pat, &xpid) 42 | if n == 1 && err == nil { 43 | err := syscall.Kill(xpid, 0) 44 | if err == nil { 45 | // if err == nil, xpid is alive. 46 | ret += 1 47 | } 48 | } 49 | } 50 | dd.Close() 51 | 52 | time.Sleep(1 * time.Second) 53 | 54 | err = os.Remove(myfilename) 55 | if err != nil { 56 | panic(err) 57 | } 58 | 59 | return ret 60 | } 61 | 62 | func Map(filename string, contents string) []mr.KeyValue { 63 | 64 | kva := []mr.KeyValue{} 65 | kva = append(kva, mr.KeyValue{"a", "1"}) 66 | kva = append(kva, mr.KeyValue{"b", "1"}) 67 | kva = append(kva, mr.KeyValue{"c", "1"}) 68 | kva = append(kva, mr.KeyValue{"d", "1"}) 69 | kva = append(kva, mr.KeyValue{"e", "1"}) 70 | kva = append(kva, mr.KeyValue{"f", "1"}) 71 | kva = append(kva, mr.KeyValue{"g", "1"}) 72 | kva = append(kva, mr.KeyValue{"h", "1"}) 73 | kva = append(kva, mr.KeyValue{"i", "1"}) 74 | kva = append(kva, mr.KeyValue{"j", "1"}) 75 | return kva 76 | } 77 | 78 | func Reduce(key string, values []string) string { 79 | n := nparallel("reduce") 80 | 81 | val := fmt.Sprintf("%d", n) 82 | 83 | return val 84 | } 85 | -------------------------------------------------------------------------------- /src/mrapps/mtiming.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // a MapReduce pseudo-application to test that workers 5 | // execute map tasks in parallel. 6 | // 7 | // go build -buildmode=plugin mtiming.go 8 | // 9 | 10 | import "6.824/mr" 11 | import "strings" 12 | import "fmt" 13 | import "os" 14 | import "syscall" 15 | import "time" 16 | import "sort" 17 | import "io/ioutil" 18 | 19 | func nparallel(phase string) int { 20 | // create a file so that other workers will see that 21 | // we're running at the same time as them. 22 | pid := os.Getpid() 23 | myfilename := fmt.Sprintf("mr-worker-%s-%d", phase, pid) 24 | err := ioutil.WriteFile(myfilename, []byte("x"), 0666) 25 | if err != nil { 26 | panic(err) 27 | } 28 | 29 | // are any other workers running? 30 | // find their PIDs by scanning directory for mr-worker-XXX files. 31 | dd, err := os.Open(".") 32 | if err != nil { 33 | panic(err) 34 | } 35 | names, err := dd.Readdirnames(1000000) 36 | if err != nil { 37 | panic(err) 38 | } 39 | ret := 0 40 | for _, name := range names { 41 | var xpid int 42 | pat := fmt.Sprintf("mr-worker-%s-%%d", phase) 43 | n, err := fmt.Sscanf(name, pat, &xpid) 44 | if n == 1 && err == nil { 45 | err := syscall.Kill(xpid, 0) 46 | if err == nil { 47 | // if err == nil, xpid is alive. 48 | ret += 1 49 | } 50 | } 51 | } 52 | dd.Close() 53 | 54 | time.Sleep(1 * time.Second) 55 | 56 | err = os.Remove(myfilename) 57 | if err != nil { 58 | panic(err) 59 | } 60 | 61 | return ret 62 | } 63 | 64 | func Map(filename string, contents string) []mr.KeyValue { 65 | t0 := time.Now() 66 | ts := float64(t0.Unix()) + (float64(t0.Nanosecond()) / 1000000000.0) 67 | pid := os.Getpid() 68 | 69 | n := nparallel("map") 70 | 71 | kva := []mr.KeyValue{} 72 | kva = append(kva, mr.KeyValue{ 73 | fmt.Sprintf("times-%v", pid), 74 | fmt.Sprintf("%.1f", ts)}) 75 | kva = append(kva, mr.KeyValue{ 76 | fmt.Sprintf("parallel-%v", pid), 77 | fmt.Sprintf("%d", n)}) 78 | return kva 79 | } 80 | 81 | func Reduce(key string, values []string) string { 82 | //n := nparallel("reduce") 83 | 84 | // sort values to ensure deterministic output. 85 | vv := make([]string, len(values)) 86 | copy(vv, values) 87 | sort.Strings(vv) 88 | 89 | val := strings.Join(vv, " ") 90 | return val 91 | } 92 | -------------------------------------------------------------------------------- /src/kvraft/client.go: -------------------------------------------------------------------------------- 1 | package kvraft 2 | 3 | import ( 4 | "crypto/rand" 5 | "math/big" 6 | "time" 7 | 8 | "6.824/labrpc" 9 | ) 10 | 11 | type Clerk struct { 12 | servers []*labrpc.ClientEnd 13 | // You will have to modify this struct. 14 | leaderId int 15 | seqId int64 16 | clientId int64 17 | } 18 | 19 | func nrand() int64 { 20 | max := big.NewInt(int64(1) << 62) 21 | bigx, _ := rand.Int(rand.Reader, max) 22 | x := bigx.Int64() 23 | return x 24 | } 25 | 26 | func MakeClerk(servers []*labrpc.ClientEnd) *Clerk { 27 | ck := new(Clerk) 28 | ck.servers = servers 29 | // You'll have to add code here. 30 | ck.leaderId = 0 31 | ck.clientId = nrand() 32 | ck.seqId = 0 33 | return ck 34 | } 35 | 36 | // 37 | // fetch the current value for a key. 38 | // returns "" if the key does not exist. 39 | // keeps trying forever in the face of all other errors. 40 | // 41 | // you can send an RPC with code like this: 42 | // ok := ck.servers[i].Call("KVServer.Get", &args, &reply) 43 | // 44 | // the types of args and reply (including whether they are pointers) 45 | // must match the declared types of the RPC handler function's 46 | // arguments. and reply must be passed as a pointer. 47 | // 48 | func (ck *Clerk) sendCmd(key string, value string, OpType OPType) string { 49 | ck.seqId += 1 50 | args := CmdArgs{ 51 | SeqId: ck.seqId, 52 | ClientId: ck.clientId, 53 | Key: key, 54 | Value: value, 55 | OpType: OpType, 56 | } 57 | 58 | t0 := time.Now() 59 | for time.Since(t0).Seconds() < 10 { 60 | reply := CmdReply{} 61 | 62 | ok := ck.servers[ck.leaderId].Call("KVServer.Command", &args, &reply) 63 | 64 | if !ok { 65 | ck.leaderId = (ck.leaderId + 1) % len(ck.servers) 66 | time.Sleep(retry_timeout) 67 | continue 68 | } 69 | 70 | if reply.Err == OK { 71 | return reply.Value 72 | } else if reply.Err == ErrNoKey { 73 | return "" 74 | } 75 | 76 | ck.leaderId = (ck.leaderId + 1) % len(ck.servers) 77 | time.Sleep(retry_timeout) 78 | } 79 | panic("10s not reply") 80 | return "" 81 | } 82 | 83 | func (ck *Clerk) Get(key string) string { 84 | return ck.sendCmd(key, "", OpGet) 85 | } 86 | 87 | func (ck *Clerk) Put(key string, value string) { 88 | ck.sendCmd(key, value, OpPut) 89 | } 90 | 91 | func (ck *Clerk) Append(key string, value string) { 92 | ck.sendCmd(key, value, OpAppend) 93 | } 94 | -------------------------------------------------------------------------------- /src/porcupine/model.go: -------------------------------------------------------------------------------- 1 | package porcupine 2 | 3 | import "fmt" 4 | 5 | type Operation struct { 6 | ClientId int // optional, unless you want a visualization; zero-indexed 7 | Input interface{} 8 | Call int64 // invocation time 9 | Output interface{} 10 | Return int64 // response time 11 | } 12 | 13 | type EventKind bool 14 | 15 | const ( 16 | CallEvent EventKind = false 17 | ReturnEvent EventKind = true 18 | ) 19 | 20 | type Event struct { 21 | ClientId int // optional, unless you want a visualization; zero-indexed 22 | Kind EventKind 23 | Value interface{} 24 | Id int 25 | } 26 | 27 | type Model struct { 28 | // Partition functions, such that a history is linearizable if and only 29 | // if each partition is linearizable. If you don't want to implement 30 | // this, you can always use the `NoPartition` functions implemented 31 | // below. 32 | Partition func(history []Operation) [][]Operation 33 | PartitionEvent func(history []Event) [][]Event 34 | // Initial state of the system. 35 | Init func() interface{} 36 | // Step function for the system. Returns whether or not the system 37 | // could take this step with the given inputs and outputs and also 38 | // returns the new state. This should not mutate the existing state. 39 | Step func(state interface{}, input interface{}, output interface{}) (bool, interface{}) 40 | // Equality on states. If you are using a simple data type for states, 41 | // you can use the `ShallowEqual` function implemented below. 42 | Equal func(state1, state2 interface{}) bool 43 | // For visualization, describe an operation as a string. 44 | // For example, "Get('x') -> 'y'". 45 | DescribeOperation func(input interface{}, output interface{}) string 46 | // For visualization purposes, describe a state as a string. 47 | // For example, "{'x' -> 'y', 'z' -> 'w'}" 48 | DescribeState func(state interface{}) string 49 | } 50 | 51 | func NoPartition(history []Operation) [][]Operation { 52 | return [][]Operation{history} 53 | } 54 | 55 | func NoPartitionEvent(history []Event) [][]Event { 56 | return [][]Event{history} 57 | } 58 | 59 | func ShallowEqual(state1, state2 interface{}) bool { 60 | return state1 == state2 61 | } 62 | 63 | func DefaultDescribeOperation(input interface{}, output interface{}) string { 64 | return fmt.Sprintf("%v -> %v", input, output) 65 | } 66 | 67 | func DefaultDescribeState(state interface{}) string { 68 | return fmt.Sprintf("%v", state) 69 | } 70 | 71 | type CheckResult string 72 | 73 | const ( 74 | Unknown CheckResult = "Unknown" // timed out 75 | Ok = "Ok" 76 | Illegal = "Illegal" 77 | ) 78 | -------------------------------------------------------------------------------- /src/shardkv/configure.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | import ( 4 | "6.824/shardctrler" 5 | ) 6 | 7 | func (kv *ShardKV) configureAction() { 8 | canPerformNextConfig := true 9 | kv.mu.Lock() 10 | for _, shard := range kv.shards { 11 | if shard.Status != Serving { 12 | Debug(dWarn, "G%+v S%d shard: %+v", kv.gid, kv.me, shard) 13 | canPerformNextConfig = false 14 | break 15 | } 16 | } 17 | currentConfigNum := kv.currentConfig.Num 18 | kv.mu.Unlock() 19 | if canPerformNextConfig { 20 | nextConfig := kv.sc.Query(currentConfigNum + 1) 21 | if nextConfig.Num == currentConfigNum+1 { 22 | kv.Execute(NewConfigurationCommand(&nextConfig), &OpResp{}) 23 | } 24 | } else { 25 | Debug(dWarn, "G%+v {S%+v} don't need fetch config!", kv.gid, kv.me) 26 | } 27 | } 28 | 29 | func (kv *ShardKV) applyConfiguration(nextConfig *shardctrler.Config) *OpResp { 30 | if nextConfig.Num == kv.currentConfig.Num+1 { 31 | kv.updateShardStatus(nextConfig) 32 | kv.lastConfig = kv.currentConfig.DeepCopy() 33 | kv.currentConfig = nextConfig.DeepCopy() 34 | Debug(dWarn, "G%+v {S%+v} applyConfiguration %d is %+v", kv.gid, kv.me, nextConfig.Num, nextConfig) 35 | return &OpResp{OK, ""} 36 | } 37 | return &OpResp{ErrTimeoutReq, ""} 38 | } 39 | 40 | func (kv *ShardKV) updateShardStatus(nextConfig *shardctrler.Config) { 41 | // special judge 42 | if nextConfig.Num == 1 { 43 | shards := kv.getAllShards(nextConfig) 44 | for _, shard := range shards { 45 | kv.shards[shard] = NewShard(Serving) 46 | } 47 | return 48 | } 49 | 50 | newShards := kv.getAllShards(nextConfig) 51 | nowShards := kv.getAllShards(&kv.currentConfig) 52 | // loss shard 53 | for _, nowShard := range nowShards { 54 | if nextConfig.Shards[nowShard] != kv.gid { 55 | // BePulling 56 | kv.shards[nowShard].Status = BePulling 57 | } 58 | } 59 | // get shard 60 | for _, newShard := range newShards { 61 | if kv.currentConfig.Shards[newShard] != kv.gid { 62 | // Pulling 63 | kv.shards[newShard] = NewShard(Pulling) 64 | } 65 | } 66 | } 67 | 68 | func (kv *ShardKV) getAllShards(nextConfig *shardctrler.Config) []int { 69 | var shards []int 70 | for shard, gid := range nextConfig.Shards { 71 | if gid == kv.gid { 72 | shards = append(shards, shard) 73 | } 74 | } 75 | return shards 76 | } 77 | 78 | func (kv *ShardKV) getShardIDsByStatus(status ShardStatus, config *shardctrler.Config) map[int][]int { 79 | gid2shardIDs := make(map[int][]int) 80 | for shard, _ := range kv.shards { 81 | if kv.shards[shard].Status == status { 82 | gid := config.Shards[shard] 83 | if _, ok := gid2shardIDs[gid]; !ok { 84 | vec := [1]int{shard} 85 | gid2shardIDs[gid] = vec[:] 86 | } else { 87 | gid2shardIDs[gid] = append(gid2shardIDs[gid], shard) 88 | } 89 | } 90 | } 91 | return gid2shardIDs 92 | } 93 | -------------------------------------------------------------------------------- /src/shardkv/migration.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | import ( 4 | "sync" 5 | ) 6 | 7 | type PullDataArgs struct { 8 | ConfNum int 9 | ShardIds []int 10 | } 11 | 12 | type PullDataReply struct { 13 | Err Err 14 | ConfNum int 15 | Shards map[int]*Shard 16 | } 17 | 18 | func (kv *ShardKV) migrationAction() { 19 | kv.mu.Lock() 20 | gid2shardIDs := kv.getShardIDsByStatus(Pulling, &kv.lastConfig) 21 | if len(gid2shardIDs) == 0 { 22 | kv.mu.Unlock() 23 | return 24 | } 25 | var wg sync.WaitGroup 26 | for gid, shardIDs := range gid2shardIDs { 27 | wg.Add(1) 28 | servers := kv.lastConfig.Groups[gid] 29 | go func(servers []string, configNum int, shardIDs []int) { 30 | defer wg.Done() 31 | args := PullDataArgs{ 32 | ConfNum: configNum, 33 | ShardIds: shardIDs, 34 | } 35 | for _, server := range servers { 36 | var resp PullDataReply 37 | srv := kv.makeEnd(server) 38 | if srv.Call("ShardKV.GetShardsData", &args, &resp) && resp.Err == OK { 39 | kv.Execute(NewInsertShardsCommand(&resp), &OpResp{}) 40 | } 41 | } 42 | }(servers, kv.currentConfig.Num, shardIDs) 43 | } 44 | kv.mu.Unlock() 45 | Debug(dServer, "G%+v {S%+v} migrationAction wait", kv.gid, kv.me) 46 | wg.Wait() 47 | Debug(dServer, "G%+v {S%+v} migrationAction done", kv.gid, kv.me) 48 | } 49 | 50 | func (kv *ShardKV) GetShardsData(args *PullDataArgs, reply *PullDataReply) { 51 | defer Debug(dServer, "G%+v {S%+v} GetShardsData: args: %+v reply: %+v", kv.gid, kv.me, args, reply) 52 | // only pull shards from leader 53 | if _, isLeader := kv.rf.GetState(); !isLeader { 54 | reply.Err = ErrWrongLeader 55 | return 56 | } 57 | kv.mu.Lock() 58 | 59 | if kv.currentConfig.Num < args.ConfNum { 60 | reply.Err = ErrNotReady 61 | kv.mu.Unlock() 62 | kv.configureAction() 63 | return 64 | } 65 | 66 | reply.Shards = make(map[int]*Shard) 67 | for _, shardID := range args.ShardIds { 68 | reply.Shards[shardID] = kv.shards[shardID].deepCopy() 69 | } 70 | 71 | reply.ConfNum, reply.Err = args.ConfNum, OK 72 | kv.mu.Unlock() 73 | } 74 | 75 | func (kv *ShardKV) applyInsertShards(shardsInfo *PullDataReply) *OpResp { 76 | Debug(dServer, "G%+v {S%+v} before applyInsertShards: %+v", kv.gid, kv.me, kv.shards) 77 | if shardsInfo.ConfNum == kv.currentConfig.Num { 78 | for shardId, shardData := range shardsInfo.Shards { 79 | if kv.shards[shardId].Status == Pulling { 80 | kv.shards[shardId] = shardData.deepCopy() 81 | kv.shards[shardId].Status = GCing 82 | } else { 83 | Debug(dWarn, "G%+v {S%+v} shard %d is not Pulling: %+v", kv.gid, kv.me, shardId, kv.shards[shardId]) 84 | break 85 | } 86 | } 87 | Debug(dServer, "G%+v {S%+v} after applyInsertShards: %+v", kv.gid, kv.me, kv.shards) 88 | return &OpResp{OK, ""} 89 | } 90 | return &OpResp{ErrOutDated, ""} 91 | } 92 | -------------------------------------------------------------------------------- /src/raft/log.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import "6.824/utils" 4 | 5 | type Entry struct { 6 | Index int 7 | Term int 8 | Cmd interface{} 9 | } 10 | 11 | func (rf *Raft) frontLog() Entry { 12 | return rf.log[0] 13 | } 14 | 15 | func (rf *Raft) frontLogIndex() int { 16 | return rf.log[0].Index 17 | } 18 | 19 | func (rf *Raft) lastLog() Entry { 20 | return rf.log[len(rf.log)-1] 21 | } 22 | 23 | func (rf *Raft) lastLogIndex() int { 24 | return rf.log[len(rf.log)-1].Index 25 | } 26 | 27 | // nextIndex max is len(log), will out of range 28 | func (rf *Raft) transfer(index int) (int, int) { 29 | begin := rf.frontLogIndex() 30 | end := rf.lastLogIndex() 31 | // left open, right close 32 | // fuck range! 33 | if index < begin || index > end { 34 | utils.Debug(utils.DWarn, "S%d log out of range: %d, [%d, %d]", rf.me, index, begin, end) 35 | return 0, -1 36 | } 37 | return index - begin, 0 38 | } 39 | 40 | func (rf *Raft) getEntry(index int) (Entry, int) { 41 | begin := rf.frontLogIndex() 42 | end := rf.lastLogIndex() 43 | // left open, right close 44 | // fuck range! 45 | if index < begin || index > end { 46 | utils.Debug(utils.DWarn, "S%d log out of range: %d, [%d, %d]", rf.me, index, begin, end) 47 | return Entry{magic_index, magic_term, nil}, -1 48 | } 49 | return rf.log[index-begin], 0 50 | } 51 | 52 | func (rf *Raft) isUpToDate(lastLogIndex int, lastLogTerm int) bool { 53 | entry := rf.lastLog() 54 | index := entry.Index 55 | term := entry.Term 56 | if term == lastLogTerm { 57 | return lastLogIndex >= index 58 | } 59 | return lastLogTerm > term 60 | } 61 | 62 | func (rf *Raft) toCommit() { 63 | // append entries before commit 64 | if rf.commitIndex >= rf.lastLogIndex() { 65 | return 66 | } 67 | 68 | for i := rf.lastLogIndex(); i > rf.commitIndex; i-- { 69 | entry, err := rf.getEntry(i) 70 | if err < 0 { 71 | continue 72 | } 73 | 74 | if entry.Term != rf.currentTerm { 75 | return 76 | } 77 | 78 | cnt := 1 // 1 => self 79 | for j, match := range rf.matchIndex { 80 | if j != rf.me && match >= i { 81 | cnt++ 82 | } 83 | if cnt > len(rf.peers)/2 { 84 | rf.commitIndex = i 85 | utils.Debug(utils.DCommit, "S%d commit to %v", rf.me, rf.commitIndex) 86 | rf.applyCond.Signal() 87 | return 88 | } 89 | } 90 | } 91 | 92 | utils.Debug(utils.DCommit, "S%d don't have half replicated from %v to %v now", rf.me, rf.commitIndex, rf.lastLogIndex()) 93 | } 94 | 95 | func (rf *Raft) HasLogInCurrentTerm() bool { 96 | rf.mu.Lock() 97 | defer rf.mu.Unlock() 98 | 99 | for i := len(rf.log) - 1; i > 0; i-- { 100 | if rf.log[i].Term > rf.currentTerm { 101 | continue 102 | } 103 | if rf.log[i].Term == rf.currentTerm { 104 | return true 105 | } 106 | if rf.log[i].Term < rf.currentTerm { 107 | break 108 | } 109 | } 110 | return false 111 | } 112 | -------------------------------------------------------------------------------- /src/main/mrsequential.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // simple sequential MapReduce. 5 | // 6 | // go run mrsequential.go wc.so pg*.txt 7 | // 8 | 9 | import "fmt" 10 | import "6.824/mr" 11 | import "plugin" 12 | import "os" 13 | import "log" 14 | import "io/ioutil" 15 | import "sort" 16 | 17 | // for sorting by key. 18 | type ByKey []mr.KeyValue 19 | 20 | // for sorting by key. 21 | func (a ByKey) Len() int { return len(a) } 22 | func (a ByKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 23 | func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key } 24 | 25 | func main() { 26 | if len(os.Args) < 3 { 27 | fmt.Fprintf(os.Stderr, "Usage: mrsequential xxx.so inputfiles...\n") 28 | os.Exit(1) 29 | } 30 | 31 | mapf, reducef := loadPlugin(os.Args[1]) 32 | 33 | // 34 | // read each input file, 35 | // pass it to Map, 36 | // accumulate the intermediate Map output. 37 | // 38 | intermediate := []mr.KeyValue{} 39 | for _, filename := range os.Args[2:] { 40 | file, err := os.Open(filename) 41 | if err != nil { 42 | log.Fatalf("cannot open %v", filename) 43 | } 44 | content, err := ioutil.ReadAll(file) 45 | if err != nil { 46 | log.Fatalf("cannot read %v", filename) 47 | } 48 | file.Close() 49 | kva := mapf(filename, string(content)) 50 | intermediate = append(intermediate, kva...) 51 | } 52 | 53 | // 54 | // a big difference from real MapReduce is that all the 55 | // intermediate data is in one place, intermediate[], 56 | // rather than being partitioned into NxM buckets. 57 | // 58 | 59 | sort.Sort(ByKey(intermediate)) 60 | 61 | oname := "mr-out-0" 62 | ofile, _ := os.Create(oname) 63 | 64 | // 65 | // call Reduce on each distinct key in intermediate[], 66 | // and print the result to mr-out-0. 67 | // 68 | i := 0 69 | for i < len(intermediate) { 70 | j := i + 1 71 | for j < len(intermediate) && intermediate[j].Key == intermediate[i].Key { 72 | j++ 73 | } 74 | values := []string{} 75 | for k := i; k < j; k++ { 76 | values = append(values, intermediate[k].Value) 77 | } 78 | output := reducef(intermediate[i].Key, values) 79 | 80 | // this is the correct format for each line of Reduce output. 81 | fmt.Fprintf(ofile, "%v %v\n", intermediate[i].Key, output) 82 | 83 | i = j 84 | } 85 | 86 | ofile.Close() 87 | } 88 | 89 | // 90 | // load the application Map and Reduce functions 91 | // from a plugin file, e.g. ../mrapps/wc.so 92 | // 93 | func loadPlugin(filename string) (func(string, string) []mr.KeyValue, func(string, []string) string) { 94 | p, err := plugin.Open(filename) 95 | if err != nil { 96 | log.Fatalf("cannot load plugin %v", filename) 97 | } 98 | xmapf, err := p.Lookup("Map") 99 | if err != nil { 100 | log.Fatalf("cannot find Map in %v", filename) 101 | } 102 | mapf := xmapf.(func(string, string) []mr.KeyValue) 103 | xreducef, err := p.Lookup("Reduce") 104 | if err != nil { 105 | log.Fatalf("cannot find Reduce in %v", filename) 106 | } 107 | reducef := xreducef.(func(string, []string) string) 108 | 109 | return mapf, reducef 110 | } 111 | -------------------------------------------------------------------------------- /src/shardkv/client.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | // 4 | // client code to talk to a sharded key/value service. 5 | // 6 | // the client first talks to the shardctrler to find out 7 | // the assignment of shards (keys) to groups, and then 8 | // talks to the group that holds the key's shard. 9 | // 10 | 11 | import ( 12 | "crypto/rand" 13 | "math/big" 14 | "time" 15 | 16 | "6.824/labrpc" 17 | "6.824/shardctrler" 18 | ) 19 | 20 | // 21 | // which shard is a key in? 22 | // please use this function, 23 | // and please do not change it. 24 | // 25 | func key2shard(key string) int { 26 | shard := 0 27 | if len(key) > 0 { 28 | shard = int(key[0]) 29 | } 30 | shard %= shardctrler.NShards 31 | return shard 32 | } 33 | 34 | func nrand() int64 { 35 | max := big.NewInt(int64(1) << 62) 36 | bigx, _ := rand.Int(rand.Reader, max) 37 | x := bigx.Int64() 38 | return x 39 | } 40 | 41 | type Clerk struct { 42 | sm *shardctrler.Clerk 43 | config shardctrler.Config 44 | make_end func(string) *labrpc.ClientEnd 45 | // You will have to modify this struct. 46 | seqId int64 47 | clientId int64 48 | } 49 | 50 | // 51 | // the tester calls MakeClerk. 52 | // 53 | // ctrlers[] is needed to call shardctrler.MakeClerk(). 54 | // 55 | // make_end(servername) turns a server name from a 56 | // Config.Groups[gid][i] into a labrpc.ClientEnd on which you can 57 | // send RPCs. 58 | // 59 | func MakeClerk(ctrlers []*labrpc.ClientEnd, make_end func(string) *labrpc.ClientEnd) *Clerk { 60 | ck := new(Clerk) 61 | ck.sm = shardctrler.MakeClerk(ctrlers) 62 | ck.make_end = make_end 63 | // You'll have to add code here. 64 | ck.clientId = nrand() 65 | ck.seqId = 0 66 | ck.config = ck.sm.Query(-1) 67 | return ck 68 | } 69 | 70 | // 71 | // fetch the current value for a key. 72 | // returns "" if the key does not exist. 73 | // keeps trying forever in the face of all other errors. 74 | // You will have to modify this function. 75 | // 76 | func (ck *Clerk) sendCmd(key string, value string, OpType OPType) string { 77 | ck.seqId += 1 78 | args := CmdArgs{ 79 | SeqId: ck.seqId, 80 | ClientId: ck.clientId, 81 | Key: key, 82 | Value: value, 83 | OpType: OpType, 84 | } 85 | 86 | t0 := time.Now() 87 | for time.Since(t0).Seconds() < 15 { 88 | shard := key2shard(key) 89 | gid := ck.config.Shards[shard] 90 | if servers, ok := ck.config.Groups[gid]; ok { 91 | // try each server for the shard. 92 | for si := 0; si < len(servers); si++ { 93 | srv := ck.make_end(servers[si]) 94 | var reply CmdReply 95 | ok := srv.Call("ShardKV.Command", &args, &reply) 96 | if ok && (reply.Err == OK || reply.Err == ErrNoKey) { 97 | return reply.Value 98 | } 99 | if ok && (reply.Err == ErrWrongGroup) { 100 | break 101 | } 102 | // ... not ok, or ErrWrongLeader 103 | } 104 | } 105 | time.Sleep(100 * time.Millisecond) 106 | // ask controler for the latest configuration. 107 | ck.config = ck.sm.Query(-1) 108 | } 109 | 110 | panic("15s not reply") 111 | return "" 112 | } 113 | 114 | func (ck *Clerk) Get(key string) string { 115 | return ck.sendCmd(key, "", OpGet) 116 | } 117 | 118 | func (ck *Clerk) Put(key string, value string) { 119 | ck.sendCmd(key, value, OpPut) 120 | } 121 | 122 | func (ck *Clerk) Append(key string, value string) { 123 | ck.sendCmd(key, value, OpAppend) 124 | } 125 | -------------------------------------------------------------------------------- /.check-build: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -eu 4 | 5 | REFERENCE_FILES=( 6 | # lab 1 7 | src/mrapps/crash.go 8 | src/mrapps/indexer.go 9 | src/mrapps/mtiming.go 10 | src/mrapps/nocrash.go 11 | src/mrapps/rtiming.go 12 | src/mrapps/wc.go 13 | src/main/mrsequential.go 14 | src/main/mrcoordinator.go 15 | src/main/mrworker.go 16 | 17 | # lab 2 18 | src/raft/persister.go 19 | src/raft/test_test.go 20 | src/raft/config.go 21 | src/labrpc/labrpc.go 22 | 23 | # lab 3 24 | src/kvraft/test_test.go 25 | src/kvraft/config.go 26 | 27 | # lab 4a 28 | src/shardctrler/test_test.go 29 | src/shardctrler/config.go 30 | 31 | # lab 4b 32 | src/shardkv/test_test.go 33 | src/shardkv/config.go 34 | ) 35 | 36 | main() { 37 | upstream="$1" 38 | labnum="$2" 39 | 40 | # make sure we have reference copy of lab, in FETCH_HEAD 41 | git fetch "$upstream" 2>/dev/null || die "unable to git fetch $upstream" 42 | 43 | # copy existing directory 44 | tmpdir="$(mktemp -d)" 45 | find src -type s -delete # cp can't copy sockets 46 | cp -r src "$tmpdir" 47 | orig="$PWD" 48 | cd "$tmpdir" 49 | 50 | # check out reference files 51 | for f in ${REFERENCE_FILES[@]}; do 52 | mkdir -p "$(dirname $f)" 53 | git --git-dir="$orig/.git" show "FETCH_HEAD:$f" > "$f" 54 | done 55 | 56 | case $labnum in 57 | "lab1") check_lab1;; 58 | "lab2a"|"lab2b"|"lab2c"|"lab2d") check_lab2;; 59 | "lab3a"|"lab3b") check_lab3;; 60 | "lab4a") check_lab4a;; 61 | "lab4b") check_lab4b;; 62 | *) die "unknown lab: $labnum";; 63 | esac 64 | 65 | cd 66 | rm -rf "$tmpdir" 67 | } 68 | 69 | check_lab1() { 70 | check_cmd cd src/mrapps 71 | check_cmd go build -buildmode=plugin wc.go 72 | check_cmd go build -buildmode=plugin indexer.go 73 | check_cmd go build -buildmode=plugin mtiming.go 74 | check_cmd go build -buildmode=plugin rtiming.go 75 | check_cmd go build -buildmode=plugin crash.go 76 | check_cmd go build -buildmode=plugin nocrash.go 77 | check_cmd cd ../main 78 | check_cmd go build mrcoordinator.go 79 | check_cmd go build mrworker.go 80 | check_cmd go build mrsequential.go 81 | } 82 | 83 | check_lab2() { 84 | check_cmd cd src/raft 85 | check_cmd go test -c 86 | } 87 | 88 | check_lab3() { 89 | check_cmd cd src/kvraft 90 | check_cmd go test -c 91 | } 92 | 93 | check_lab4a() { 94 | check_cmd cd src/shardctrler 95 | check_cmd go test -c 96 | } 97 | 98 | check_lab4b() { 99 | check_cmd cd src/shardkv 100 | check_cmd go test -c 101 | # also check other labs/parts 102 | cd "$tmpdir" 103 | check_lab4a 104 | cd "$tmpdir" 105 | check_lab3 106 | cd "$tmpdir" 107 | check_lab2 108 | } 109 | 110 | check_cmd() { 111 | if ! "$@" >/dev/null 2>&1; then 112 | echo "We tried building your source code with testing-related files reverted to original versions, and the build failed. This copy of your code is preserved in $tmpdir for debugging purposes. Please make sure the code you are trying to hand in does not make changes to test code." >&2 113 | echo >&2 114 | echo "The build failed while trying to run the following command:" >&2 115 | echo >&2 116 | echo "$ $@" >&2 117 | echo " (cwd: ${PWD#$tmpdir/})" >&2 118 | exit 1 119 | fi 120 | } 121 | 122 | die() { 123 | echo "$1" >&2 124 | exit 1 125 | } 126 | 127 | main "$@" 128 | -------------------------------------------------------------------------------- /src/raft/appendEntries.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import "6.824/utils" 4 | 5 | // ticker() call doAppendEntries(), ticker() hold lock 6 | // if a node turn to leader, leader will call doAppendEntries() to send a heartbeat 7 | func (rf *Raft) doAppendEntries() { 8 | for i := 0; i < len(rf.peers); i++ { 9 | if i == rf.me { 10 | continue 11 | } 12 | 13 | wantSendIndex := rf.nextIndex[i] - 1 14 | if wantSendIndex < rf.frontLogIndex() { 15 | go rf.doInstallSnapshot(i) 16 | } else { 17 | go rf.appendTo(i) 18 | } 19 | } 20 | } 21 | 22 | func (rf *Raft) appendTo(peer int) { 23 | rf.mu.Lock() 24 | if rf.status != leader { 25 | utils.Debug(utils.DWarn, "S%d status change, it is not leader", rf.me) 26 | rf.mu.Unlock() 27 | return 28 | } 29 | args := AppendEntriesArgs{ 30 | Term: rf.currentTerm, 31 | LeaderId: rf.me, 32 | PrevLogIndex: magic_index, 33 | PrevLogTerm: magic_term, 34 | LeaderCommit: rf.commitIndex, 35 | } 36 | 37 | // utils.Debug(utils.DTrace, "S%d log length: %d, nextIndex:{%+v}", rf.me, len(rf.log), rf.nextIndex) 38 | // 0 <= prevLogIndex <= len(log) - 1 39 | prevLogIndex := rf.nextIndex[peer] - 1 40 | idx, err := rf.transfer(prevLogIndex) 41 | if err < 0 { 42 | rf.mu.Unlock() 43 | return 44 | } 45 | 46 | args.PrevLogIndex = rf.log[idx].Index 47 | args.PrevLogTerm = rf.log[idx].Term 48 | 49 | // must copy in here 50 | entries := rf.log[idx+1:] 51 | args.Entries = make([]Entry, len(entries)) 52 | copy(args.Entries, entries) 53 | rf.mu.Unlock() 54 | 55 | reply := AppendEntriesReply{} 56 | 57 | ok := rf.sendAppendEntries(peer, &args, &reply) 58 | if !ok { 59 | return 60 | } 61 | 62 | rf.mu.Lock() 63 | defer rf.mu.Unlock() 64 | 65 | // status changed or outdue data, ignore 66 | if rf.currentTerm != args.Term || rf.status != leader || reply.Term < rf.currentTerm { 67 | // overdue, ignore 68 | utils.Debug(utils.DInfo, "S%d old response from C%d, ignore it", rf.me, peer) 69 | return 70 | } 71 | 72 | // If RPC request or response contains term T > currentTerm: 73 | // set currentTerm = T, convert to follower (§5.1) 74 | if reply.Term > rf.currentTerm { 75 | utils.Debug(utils.DTerm, "S%d S%d term larger(%d > %d)", rf.me, peer, reply.Term, rf.currentTerm) 76 | rf.currentTerm, rf.votedFor = reply.Term, voted_nil 77 | rf.persist() 78 | rf.TurnTo(follower) 79 | return 80 | } 81 | 82 | if reply.Success { 83 | // utils.Debug(utils.DTrace, "S%d before nextIndex:{%+v} ", rf.me, rf.nextIndex) 84 | rf.nextIndex[peer] = args.PrevLogIndex + len(args.Entries) + 1 85 | // utils.Debug(utils.DTrace, "S%d after nextIndex:{%+v}", rf.me, rf.nextIndex) 86 | rf.matchIndex[peer] = args.PrevLogIndex + len(args.Entries) 87 | rf.toCommit() 88 | return 89 | } 90 | 91 | if reply.XTerm == -1 { // null slot 92 | rf.nextIndex[peer] -= reply.XLen 93 | } else if reply.XTerm >= 0 { 94 | termNotExit := true 95 | for index := rf.nextIndex[peer] - 1; index >= 1; index-- { 96 | entry, err := rf.getEntry(index) 97 | if err < 0 { 98 | continue 99 | } 100 | 101 | if entry.Term > reply.XTerm { 102 | continue 103 | } 104 | 105 | if entry.Term == reply.XTerm { 106 | rf.nextIndex[peer] = index + 1 107 | termNotExit = false 108 | break 109 | } 110 | if entry.Term < reply.XTerm { 111 | break 112 | } 113 | } 114 | if termNotExit { 115 | rf.nextIndex[peer] = reply.XIndex 116 | } 117 | } else { 118 | rf.nextIndex[peer] = reply.XIndex 119 | } 120 | 121 | // utils.Debug(utils.DTrace, "S%d nextIndex:{%+v}", rf.me, rf.nextIndex) 122 | // the smallest nextIndex is 1 123 | // otherwise, it will cause out of range error 124 | if rf.nextIndex[peer] < 1 { 125 | rf.nextIndex[peer] = 1 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /src/raft/appendEntriesHandler.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import "6.824/utils" 4 | 5 | // handler need to require lock 6 | func (rf *Raft) AppendEntries(args *AppendEntriesArgs, reply *AppendEntriesReply) { 7 | rf.mu.Lock() 8 | defer rf.mu.Unlock() 9 | 10 | utils.Debug(utils.DLog, "S%d S%d appendEntries", rf.me, args.LeaderId) 11 | defer utils.Debug(utils.DLog, "S%d arg: %+v reply: %+v", rf.me, args, reply) 12 | 13 | defer rf.persist() 14 | 15 | if args.Term < rf.currentTerm { // leader out, refuse 16 | reply.Term = rf.currentTerm 17 | reply.Success = false 18 | utils.Debug(utils.DTerm, "S%d S%d term less(%d < %d)", rf.me, args.LeaderId, args.Term, rf.currentTerm) 19 | return 20 | } 21 | 22 | if args.Term > rf.currentTerm { 23 | // If RPC request or response contains term T > currentTerm: 24 | // set currentTerm = T, convert to follower (§5.1) 25 | rf.currentTerm, rf.votedFor = args.Term, voted_nil 26 | utils.Debug(utils.DTerm, "S%d S%d term larger(%d > %d)", rf.me, args.LeaderId, args.Term, rf.currentTerm) 27 | rf.TurnTo(follower) 28 | } 29 | 30 | if rf.status != follower { 31 | // If AppendEntries RPC received from new leader: 32 | // convert to follower 33 | rf.TurnTo(follower) 34 | } 35 | 36 | reply.Success = true 37 | reply.Term = rf.currentTerm 38 | // prevent election timeouts (§5.2) 39 | rf.resetElectionTime() 40 | 41 | // heartbeat, return 42 | // if args.PrevLogIndex == magic_index && args.PrevLogTerm == magic_term { 43 | // return 44 | // } 45 | 46 | if args.PrevLogIndex < rf.frontLogIndex() { 47 | reply.XTerm, reply.XIndex, reply.Success = -2, rf.frontLogIndex() + 1, false 48 | utils.Debug(utils.DInfo, "S%d args's prevLogIndex too smaller(%v < %v)", rf.me, args.PrevLogIndex, rf.frontLogIndex()) 49 | return 50 | } 51 | 52 | if args.PrevLogIndex > rf.lastLogIndex() { 53 | reply.Success = false 54 | reply.XTerm = -1 55 | reply.XLen = args.PrevLogIndex - rf.lastLogIndex() 56 | return 57 | } 58 | 59 | idx, err := rf.transfer(args.PrevLogIndex) 60 | if err < 0 { 61 | return 62 | } 63 | 64 | if rf.log[idx].Term != args.PrevLogTerm { 65 | reply.Success = false 66 | reply.XTerm = rf.log[idx].Term 67 | reply.XIndex = args.PrevLogIndex 68 | // 0 is a dummy entry => quit in index is 1 69 | // binary search is better than this way 70 | for index := idx; index >= 1; index-- { 71 | if rf.log[index-1].Term != reply.XTerm { 72 | reply.XIndex = index 73 | break 74 | } 75 | } 76 | return 77 | } 78 | 79 | if args.Entries != nil && len(args.Entries) != 0 { 80 | if rf.isConflict(args) { 81 | rf.log = rf.log[:idx+1] 82 | entries := make([]Entry, len(args.Entries)) 83 | copy(entries, args.Entries) 84 | rf.log = append(rf.log, entries...) 85 | // utils.Debug(utils.DInfo, "S%d conflict, truncate log: %+v", rf.me, rf.log) 86 | } else { 87 | // utils.Debug(utils.DInfo, "S%d no conflict, log: %+v", rf.me, rf.log) 88 | } 89 | } else { 90 | utils.Debug(utils.DInfo, "S%d args entries nil or length is 0: %v", rf.me, args.Entries) 91 | } 92 | 93 | if args.LeaderCommit > rf.commitIndex { 94 | rf.commitIndex = args.LeaderCommit 95 | if args.LeaderCommit > rf.lastLogIndex() { 96 | rf.commitIndex = rf.lastLogIndex() 97 | } 98 | utils.Debug(utils.DCommit, "S%d commit to %v(lastLogIndex: %d)", rf.me, rf.commitIndex, rf.lastLogIndex()) 99 | rf.applyCond.Signal() 100 | } 101 | // utils.Debug(utils.DInfo, "S%d log: %+v", rf.me, rf.log) 102 | } 103 | 104 | func (rf *Raft) isConflict(args *AppendEntriesArgs) bool { 105 | base_index := args.PrevLogIndex + 1 106 | for i, entry := range args.Entries { 107 | entry_rf, err := rf.getEntry(i + base_index) 108 | if err < 0 { 109 | return true 110 | } 111 | if entry_rf.Term != entry.Term { 112 | return true 113 | } 114 | } 115 | return false 116 | } 117 | -------------------------------------------------------------------------------- /src/shardctrler/server.go: -------------------------------------------------------------------------------- 1 | package shardctrler 2 | 3 | import ( 4 | "sync" 5 | "sync/atomic" 6 | "time" 7 | 8 | "6.824/labgob" 9 | "6.824/labrpc" 10 | "6.824/raft" 11 | ) 12 | 13 | type ShardCtrler struct { 14 | mu sync.Mutex 15 | me int 16 | rf *raft.Raft 17 | applyCh chan raft.ApplyMsg 18 | dead int32 19 | 20 | // Your data here. 21 | configs *ConfigModel 22 | cmdRespChans map[IndexAndTerm]chan OpResp 23 | LastCmdContext map[int64]OpContext 24 | lastApplied int 25 | } 26 | 27 | // 28 | // the tester calls Kill() when a ShardCtrler instance won't 29 | // be needed again. you are not required to do anything 30 | // in Kill(), but it might be convenient to (for example) 31 | // turn off debug output from this instance. 32 | // 33 | func (sc *ShardCtrler) Kill() { 34 | atomic.StoreInt32(&sc.dead, 1) 35 | sc.rf.Kill() 36 | // Your code here, if desired. 37 | } 38 | 39 | func (sc *ShardCtrler) killed() bool { 40 | z := atomic.LoadInt32(&sc.dead) 41 | return z == 1 42 | } 43 | 44 | // needed by shardsc tester 45 | func (sc *ShardCtrler) Raft() *raft.Raft { 46 | return sc.rf 47 | } 48 | 49 | // 50 | // servers[] contains the ports of the set of 51 | // servers that will cooperate via Raft to 52 | // form the fault-tolerant shardctrler service. 53 | // me is the index of the current server in servers[]. 54 | // 55 | func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister) *ShardCtrler { 56 | sc := new(ShardCtrler) 57 | sc.me = me 58 | 59 | labgob.Register(Op{}) 60 | sc.applyCh = make(chan raft.ApplyMsg) 61 | sc.rf = raft.Make(servers, me, persister, sc.applyCh) 62 | 63 | // Your code here. 64 | sc.configs = NewConfigModel(me) 65 | sc.cmdRespChans = make(map[IndexAndTerm]chan OpResp) 66 | sc.LastCmdContext = make(map[int64]OpContext) 67 | sc.lastApplied = 0 68 | 69 | // long-time goroutines 70 | go sc.applier() 71 | 72 | return sc 73 | } 74 | 75 | // Handler 76 | func (sc *ShardCtrler) Command(args *CommandArgs, reply *CommandReply) { 77 | defer Debug(dWarn, "S%d args: %+v reply: %+v", sc.me, args, reply) 78 | 79 | if !sc.configs.isLegal(args.Op) { 80 | reply.Config, reply.Err = Config{}, ErrOpt 81 | } 82 | 83 | sc.mu.Lock() 84 | if args.Op != OpQuery && sc.isDuplicate(args.ClientId, args.SeqId) { 85 | context := sc.LastCmdContext[args.ClientId] 86 | reply.Config, reply.Err = context.Reply.Config, context.Reply.Err 87 | sc.mu.Unlock() 88 | return 89 | } 90 | sc.mu.Unlock() 91 | 92 | index, term, is_leader := sc.rf.Start(Op(*args)) 93 | if !is_leader { 94 | reply.Config, reply.Err = Config{}, ErrWrongLeader 95 | return 96 | } 97 | 98 | sc.mu.Lock() 99 | it := IndexAndTerm{index, term} 100 | ch := make(chan OpResp, 1) 101 | sc.cmdRespChans[it] = ch 102 | sc.mu.Unlock() 103 | 104 | defer func() { 105 | sc.mu.Lock() 106 | // close(sc.cmdRespChans[index]) 107 | delete(sc.cmdRespChans, it) 108 | sc.mu.Unlock() 109 | close(ch) 110 | }() 111 | 112 | t := time.NewTimer(cmd_timeout) 113 | defer t.Stop() 114 | 115 | for { 116 | sc.mu.Lock() 117 | select { 118 | case resp := <-ch: 119 | Debug(dServer, "S%d have applied, resp: %+v", sc.me, resp) 120 | reply.Config, reply.Err = resp.Config, resp.Err 121 | sc.mu.Unlock() 122 | return 123 | case <-t.C: 124 | priority: 125 | for { 126 | select { 127 | case resp := <-ch: 128 | Debug(dServer, "S%d have applied, resp: %+v", sc.me, resp) 129 | reply.Config, reply.Err = resp.Config, resp.Err 130 | sc.mu.Unlock() 131 | return 132 | default: 133 | break priority 134 | } 135 | } 136 | Debug(dServer, "S%d timeout", sc.me) 137 | reply.Config, reply.Err = Config{}, ErrTimeout 138 | sc.mu.Unlock() 139 | return 140 | default: 141 | sc.mu.Unlock() 142 | time.Sleep(gap_time) 143 | } 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /src/labgob/test_test.go: -------------------------------------------------------------------------------- 1 | package labgob 2 | 3 | import "testing" 4 | 5 | import "bytes" 6 | 7 | type T1 struct { 8 | T1int0 int 9 | T1int1 int 10 | T1string0 string 11 | T1string1 string 12 | } 13 | 14 | type T2 struct { 15 | T2slice []T1 16 | T2map map[int]*T1 17 | T2t3 interface{} 18 | } 19 | 20 | type T3 struct { 21 | T3int999 int 22 | } 23 | 24 | // 25 | // test that we didn't break GOB. 26 | // 27 | func TestGOB(t *testing.T) { 28 | e0 := errorCount 29 | 30 | w := new(bytes.Buffer) 31 | 32 | Register(T3{}) 33 | 34 | { 35 | x0 := 0 36 | x1 := 1 37 | t1 := T1{} 38 | t1.T1int1 = 1 39 | t1.T1string1 = "6.824" 40 | t2 := T2{} 41 | t2.T2slice = []T1{T1{}, t1} 42 | t2.T2map = map[int]*T1{} 43 | t2.T2map[99] = &T1{1, 2, "x", "y"} 44 | t2.T2t3 = T3{999} 45 | 46 | e := NewEncoder(w) 47 | e.Encode(x0) 48 | e.Encode(x1) 49 | e.Encode(t1) 50 | e.Encode(t2) 51 | } 52 | data := w.Bytes() 53 | 54 | { 55 | var x0 int 56 | var x1 int 57 | var t1 T1 58 | var t2 T2 59 | 60 | r := bytes.NewBuffer(data) 61 | d := NewDecoder(r) 62 | if d.Decode(&x0) != nil || 63 | d.Decode(&x1) != nil || 64 | d.Decode(&t1) != nil || 65 | d.Decode(&t2) != nil { 66 | t.Fatalf("Decode failed") 67 | } 68 | 69 | if x0 != 0 { 70 | t.Fatalf("wrong x0 %v\n", x0) 71 | } 72 | if x1 != 1 { 73 | t.Fatalf("wrong x1 %v\n", x1) 74 | } 75 | if t1.T1int0 != 0 { 76 | t.Fatalf("wrong t1.T1int0 %v\n", t1.T1int0) 77 | } 78 | if t1.T1int1 != 1 { 79 | t.Fatalf("wrong t1.T1int1 %v\n", t1.T1int1) 80 | } 81 | if t1.T1string0 != "" { 82 | t.Fatalf("wrong t1.T1string0 %v\n", t1.T1string0) 83 | } 84 | if t1.T1string1 != "6.824" { 85 | t.Fatalf("wrong t1.T1string1 %v\n", t1.T1string1) 86 | } 87 | if len(t2.T2slice) != 2 { 88 | t.Fatalf("wrong t2.T2slice len %v\n", len(t2.T2slice)) 89 | } 90 | if t2.T2slice[1].T1int1 != 1 { 91 | t.Fatalf("wrong slice value\n") 92 | } 93 | if len(t2.T2map) != 1 { 94 | t.Fatalf("wrong t2.T2map len %v\n", len(t2.T2map)) 95 | } 96 | if t2.T2map[99].T1string1 != "y" { 97 | t.Fatalf("wrong map value\n") 98 | } 99 | t3 := (t2.T2t3).(T3) 100 | if t3.T3int999 != 999 { 101 | t.Fatalf("wrong t2.T2t3.T3int999\n") 102 | } 103 | } 104 | 105 | if errorCount != e0 { 106 | t.Fatalf("there were errors, but should not have been") 107 | } 108 | } 109 | 110 | type T4 struct { 111 | Yes int 112 | no int 113 | } 114 | 115 | // 116 | // make sure we check capitalization 117 | // labgob prints one warning during this test. 118 | // 119 | func TestCapital(t *testing.T) { 120 | e0 := errorCount 121 | 122 | v := []map[*T4]int{} 123 | 124 | w := new(bytes.Buffer) 125 | e := NewEncoder(w) 126 | e.Encode(v) 127 | data := w.Bytes() 128 | 129 | var v1 []map[T4]int 130 | r := bytes.NewBuffer(data) 131 | d := NewDecoder(r) 132 | d.Decode(&v1) 133 | 134 | if errorCount != e0+1 { 135 | t.Fatalf("failed to warn about lower-case field") 136 | } 137 | } 138 | 139 | // 140 | // check that we warn when someone sends a default value over 141 | // RPC but the target into which we're decoding holds a non-default 142 | // value, which GOB seems not to overwrite as you'd expect. 143 | // 144 | // labgob does not print a warning. 145 | // 146 | func TestDefault(t *testing.T) { 147 | e0 := errorCount 148 | 149 | type DD struct { 150 | X int 151 | } 152 | 153 | // send a default value... 154 | dd1 := DD{} 155 | 156 | w := new(bytes.Buffer) 157 | e := NewEncoder(w) 158 | e.Encode(dd1) 159 | data := w.Bytes() 160 | 161 | // and receive it into memory that already 162 | // holds non-default values. 163 | reply := DD{99} 164 | 165 | r := bytes.NewBuffer(data) 166 | d := NewDecoder(r) 167 | d.Decode(&reply) 168 | 169 | if errorCount != e0+1 { 170 | t.Fatalf("failed to warn about decoding into non-default value") 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /src/shardkv/server.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | import ( 4 | "sync" 5 | "sync/atomic" 6 | 7 | "6.824/labgob" 8 | "6.824/labrpc" 9 | "6.824/raft" 10 | "6.824/shardctrler" 11 | ) 12 | 13 | type ShardKV struct { 14 | mu sync.Mutex 15 | me int 16 | rf *raft.Raft 17 | applyCh chan raft.ApplyMsg 18 | dead int32 // set by Kill() 19 | makeEnd func(string) *labrpc.ClientEnd 20 | gid int 21 | // ctrlers []*labrpc.ClientEnd 22 | maxraftstate int // snapshot if log grows this big 23 | 24 | // Your definitions here. 25 | shards map[int]*Shard 26 | cmdRespChans map[IndexAndTerm]chan OpResp 27 | lastApplied int 28 | lastSnapshot int 29 | 30 | lastConfig shardctrler.Config 31 | currentConfig shardctrler.Config 32 | sc *shardctrler.Clerk 33 | } 34 | 35 | // 36 | // the tester calls Kill() when a ShardKV instance won't 37 | // be needed again. you are not required to do anything 38 | // in Kill(), but it might be convenient to (for example) 39 | // turn off debug output from this instance. 40 | // 41 | func (kv *ShardKV) Kill() { 42 | atomic.StoreInt32(&kv.dead, 1) 43 | // Your code here, if desired. 44 | kv.mu.Lock() 45 | defer kv.mu.Unlock() 46 | //fmt.Printf("---kill\n") 47 | kv.doSnapshot(kv.lastApplied) 48 | kv.rf.Kill() 49 | Debug(dWarn, "G%+v {S%+v} close shards: %+v config: %+v", kv.gid, kv.me, kv.shards, kv.currentConfig) 50 | } 51 | 52 | func (kv *ShardKV) killed() bool { 53 | z := atomic.LoadInt32(&kv.dead) 54 | return z == 1 55 | } 56 | 57 | // 58 | // servers[] contains the ports of the servers in this group. 59 | // 60 | // me is the index of the current server in servers[]. 61 | // 62 | // the k/v server should store snapshots through the underlying Raft 63 | // implementation, which should call persister.SaveStateAndSnapshot() to 64 | // atomically save the Raft state along with the snapshot. 65 | // 66 | // the k/v server should snapshot when Raft's saved state exceeds 67 | // maxraftstate bytes, in order to allow Raft to garbage-collect its 68 | // log. if maxraftstate is -1, you don't need to snapshot. 69 | // 70 | // gid is this group's GID, for interacting with the shardctrler. 71 | // 72 | // pass ctrlers[] to shardctrler.MakeClerk() so you can send 73 | // RPCs to the shardctrler. 74 | // 75 | // make_end(servername) turns a server name from a 76 | // Config.Groups[gid][i] into a labrpc.ClientEnd on which you can 77 | // send RPCs. You'll need this to send RPCs to other groups. 78 | // 79 | // look at client.go for examples of how to use ctrlers[] 80 | // and make_end() to send RPCs to the group owning a specific shard. 81 | // 82 | // StartServer() must return quickly, so it should start goroutines 83 | // for any long-running work. 84 | // 85 | func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int, gid int, ctrlers []*labrpc.ClientEnd, makeEnd func(string) *labrpc.ClientEnd) *ShardKV { 86 | // call labgob.Register on structures you want 87 | // Go's RPC library to marshall/unmarshall. 88 | labgob.Register(Command{}) 89 | labgob.Register(CmdArgs{}) 90 | labgob.Register(shardctrler.Config{}) 91 | labgob.Register(PullDataReply{}) 92 | labgob.Register(PullDataArgs{}) 93 | 94 | kv := new(ShardKV) 95 | kv.me = me 96 | kv.maxraftstate = maxraftstate 97 | kv.makeEnd = makeEnd 98 | kv.gid = gid 99 | // kv.ctrlers = ctrlers 100 | 101 | // Use something like this to talk to the shardctrler: 102 | // kv.mck = shardctrler.MakeClerk(kv.ctrlers) 103 | kv.applyCh = make(chan raft.ApplyMsg, 5) 104 | kv.rf = raft.Make(servers, me, persister, kv.applyCh) 105 | kv.sc = shardctrler.MakeClerk(ctrlers) 106 | 107 | // Your initialization code here. 108 | kv.shards = make(map[int]*Shard) 109 | kv.cmdRespChans = make(map[IndexAndTerm]chan OpResp) 110 | kv.lastApplied = 0 111 | kv.lastSnapshot = 0 112 | 113 | // load data from persister 114 | kv.setSnapshot(persister.ReadSnapshot()) 115 | 116 | // long-time goroutines 117 | go kv.applier() 118 | go kv.snapshoter() 119 | kv.startMonitor() 120 | 121 | return kv 122 | } 123 | -------------------------------------------------------------------------------- /src/utils/dslogs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Script for pretty printing logs of MIT 6.824 labs 3 | import sys 4 | import shutil 5 | from typing import Optional, List, Tuple, Dict 6 | 7 | import typer 8 | from rich import print 9 | from rich.columns import Columns 10 | from rich.console import Console 11 | from rich.traceback import install 12 | 13 | # fmt: off 14 | # Mapping from topics to colors 15 | TOPICS = { 16 | "TIMR": "#9a9a99", 17 | "VOTE": "#67a0b2", 18 | "LEAD": "#d0b343", 19 | "TERM": "#70c43f", 20 | "LOG1": "#4878bc", 21 | "LOG2": "#398280", 22 | "CMIT": "#98719f", 23 | "PERS": "#d08341", 24 | "SNAP": "#FD971F", 25 | "DROP": "#ff615c", 26 | "CLNT": "#00813c", 27 | "TEST": "#fe2c79", 28 | "INFO": "#ffffff", 29 | "WARN": "#d08341", 30 | "ERRO": "#fe2626", 31 | "TRCE": "#fe2626", 32 | "SEVR": "#00813c", 33 | } 34 | # fmt: on 35 | 36 | 37 | def list_topics(value: Optional[str]): 38 | if value is None: 39 | return value 40 | topics = value.split(",") 41 | for topic in topics: 42 | if topic not in TOPICS: 43 | raise typer.BadParameter(f"topic {topic} not recognized") 44 | return topics 45 | 46 | 47 | def main( 48 | file: typer.FileText = typer.Argument(None, help="File to read, stdin otherwise"), 49 | colorize: bool = typer.Option(True, "--no-color"), 50 | n_columns: Optional[int] = typer.Option(None, "--columns", "-c"), 51 | ignore: Optional[str] = typer.Option(None, "--ignore", "-i", callback=list_topics), 52 | just: Optional[str] = typer.Option(None, "--just", "-j", callback=list_topics), 53 | ): 54 | topics = list(TOPICS) 55 | 56 | # We can take input from a stdin (pipes) or from a file 57 | input_ = file if file else sys.stdin 58 | # Print just some topics or exclude some topics (good for avoiding verbose ones) 59 | if just: 60 | topics = just 61 | if ignore: 62 | topics = [lvl for lvl in topics if lvl not in set(ignore)] 63 | 64 | topics = set(topics) 65 | console = Console() 66 | width = console.size.width 67 | 68 | panic = False 69 | for line in input_: 70 | try: 71 | time, topic, *msg = line.strip().split(" ") 72 | # To ignore some topics 73 | if topic not in topics: 74 | continue 75 | 76 | msg = " ".join(msg) 77 | 78 | # utils.Debug calls from the test suite aren't associated with 79 | # any particular peer. Otherwise we can treat second column 80 | # as peer id 81 | if topic != "TEST": 82 | i = int(msg[1]) 83 | 84 | # Colorize output by using rich syntax when needed 85 | if colorize and topic in TOPICS: 86 | color = TOPICS[topic] 87 | msg = f"[{color}]{msg}[/{color}]" 88 | 89 | # Single column printing. Always the case for debug stmts in tests 90 | if n_columns is None or topic == "TEST": 91 | print(time, msg) 92 | # Multi column printing, timing is dropped to maximize horizontal 93 | # space. Heavylifting is done through rich.column.Columns object 94 | else: 95 | cols = ["" for _ in range(n_columns)] 96 | msg = "" + msg 97 | cols[i] = msg 98 | col_width = int(width / n_columns) 99 | cols = Columns(cols, width=col_width - 1, equal=True, expand=True) 100 | print(cols) 101 | except: 102 | # Code from tests or panics does not follow format 103 | # so we print it as is 104 | if line.startswith("panic"): 105 | panic = True 106 | # Output from tests is usually important so add a 107 | # horizontal line with hashes to make it more obvious 108 | if not panic: 109 | print("#" * console.width) 110 | print(line, end="") 111 | 112 | 113 | if __name__ == "__main__": 114 | typer.run(main) -------------------------------------------------------------------------------- /src/shardkv/dslogs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Script for pretty printing logs of MIT 6.824 labs 3 | import sys 4 | import shutil 5 | from typing import Optional, List, Tuple, Dict 6 | 7 | import typer 8 | from rich import print 9 | from rich.columns import Columns 10 | from rich.console import Console 11 | from rich.traceback import install 12 | 13 | # fmt: off 14 | # Mapping from topics to colors 15 | TOPICS = { 16 | "TIMR": "#9a9a99", 17 | "VOTE": "#67a0b2", 18 | "LEAD": "#d0b343", 19 | "TERM": "#70c43f", 20 | "LOG1": "#4878bc", 21 | "LOG2": "#398280", 22 | "CMIT": "#98719f", 23 | "PERS": "#d08341", 24 | "SNAP": "#FD971F", 25 | "DROP": "#ff615c", 26 | "CLNT": "#00813c", 27 | "TEST": "#fe2c79", 28 | "INFO": "#ffffff", 29 | "WARN": "#d08341", 30 | "ERRO": "#fe2626", 31 | "TRCE": "#fe2626", 32 | "SEVR": "#00813c", 33 | } 34 | # fmt: on 35 | 36 | 37 | def list_topics(value: Optional[str]): 38 | if value is None: 39 | return value 40 | topics = value.split(",") 41 | for topic in topics: 42 | if topic not in TOPICS: 43 | raise typer.BadParameter(f"topic {topic} not recognized") 44 | return topics 45 | 46 | 47 | def main( 48 | file: typer.FileText = typer.Argument(None, help="File to read, stdin otherwise"), 49 | colorize: bool = typer.Option(True, "--no-color"), 50 | n_columns: Optional[int] = typer.Option(None, "--columns", "-c"), 51 | ignore: Optional[str] = typer.Option(None, "--ignore", "-i", callback=list_topics), 52 | just: Optional[str] = typer.Option(None, "--just", "-j", callback=list_topics), 53 | ): 54 | topics = list(TOPICS) 55 | 56 | # We can take input from a stdin (pipes) or from a file 57 | input_ = file if file else sys.stdin 58 | # Print just some topics or exclude some topics (good for avoiding verbose ones) 59 | if just: 60 | topics = just 61 | if ignore: 62 | topics = [lvl for lvl in topics if lvl not in set(ignore)] 63 | 64 | topics = set(topics) 65 | console = Console() 66 | width = console.size.width 67 | 68 | panic = False 69 | for line in input_: 70 | try: 71 | time, topic, *msg = line.strip().split(" ") 72 | # To ignore some topics 73 | if topic not in topics: 74 | continue 75 | 76 | msg = " ".join(msg) 77 | 78 | # utils.Debug calls from the test suite aren't associated with 79 | # any particular peer. Otherwise we can treat second column 80 | # as peer id 81 | if topic != "TEST": 82 | i = int(msg[3]) 83 | 84 | # Colorize output by using rich syntax when needed 85 | if colorize and topic in TOPICS: 86 | color = TOPICS[topic] 87 | msg = f"[{color}]{msg}[/{color}]" 88 | 89 | # Single column printing. Always the case for debug stmts in tests 90 | if n_columns is None or topic == "TEST": 91 | print(time, msg) 92 | # Multi column printing, timing is dropped to maximize horizontal 93 | # space. Heavylifting is done through rich.column.Columns object 94 | else: 95 | cols = ["" for _ in range(n_columns)] 96 | msg = "" + msg 97 | cols[i] = msg 98 | col_width = int(width / n_columns) 99 | cols = Columns(cols, width=col_width - 1, equal=True, expand=True) 100 | print(cols) 101 | except: 102 | # Code from tests or panics does not follow format 103 | # so we print it as is 104 | if line.startswith("panic"): 105 | panic = True 106 | # Output from tests is usually important so add a 107 | # horizontal line with hashes to make it more obvious 108 | if not panic: 109 | print("#" * console.width) 110 | print(line, end="") 111 | 112 | 113 | if __name__ == "__main__": 114 | typer.run(main) -------------------------------------------------------------------------------- /src/raft/interface.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import "6.824/utils" 4 | 5 | // 6 | // the service using Raft (e.g. a k/v server) wants to start 7 | // agreement on the next command to be appended to Raft's log. if this 8 | // server isn't the leader, returns false. otherwise start the 9 | // agreement and return immediately. there is no guarantee that this 10 | // command will ever be committed to the Raft log, since the leader 11 | // may fail or lose an election. even if the Raft instance has been killed, 12 | // this function should return gracefully. 13 | // 14 | // the first return value is the index that the command will appear at 15 | // if it's ever committed. the second return value is the current 16 | // term. the third return value is true if this server believes it is 17 | // the leader. 18 | // 19 | func (rf *Raft) Start(command interface{}) (int, int, bool) { 20 | rf.mu.Lock() 21 | defer rf.mu.Unlock() 22 | 23 | if rf.status != leader { 24 | utils.Debug(utils.DClient, "S%d Not leader cmd: %+v", rf.me, command) 25 | return -1, -1, false 26 | } 27 | 28 | index := rf.lastLogIndex() + 1 29 | rf.log = append(rf.log, Entry{index, rf.currentTerm, command}) 30 | rf.persist() 31 | 32 | // defer utils.Debug(utils.DLog2, "S%d append log: %+v", rf.me, rf.log) 33 | utils.Debug(utils.DClient, "S%d cmd: %+v, logIndex: %d", rf.me, command, rf.lastLogIndex()) 34 | 35 | rf.doAppendEntries() 36 | 37 | return rf.lastLogIndex(), rf.currentTerm, true 38 | } 39 | 40 | // 41 | // A service wants to switch to snapshot. Only do so if Raft hasn't 42 | // have more recent info since it communicate the snapshot on applyCh. 43 | // 44 | func (rf *Raft) CondInstallSnapshot(lastIncludedTerm int, lastIncludedIndex int, snapshot []byte) bool { 45 | // Your code here (2D). 46 | rf.mu.Lock() 47 | defer rf.mu.Unlock() 48 | 49 | utils.Debug(utils.DSnap, "S%d CondInstallSnapshot(lastIncludedTerm: %d lastIncludedIndex: %d lastApplied: %d commitIndex: %d)", rf.me, lastIncludedTerm, lastIncludedIndex, rf.lastApplied, rf.commitIndex) 50 | 51 | if lastIncludedIndex <= rf.commitIndex { 52 | utils.Debug(utils.DSnap, "S%d refuse, snapshot too old(%d <= %d)", rf.me, lastIncludedIndex, rf.frontLogIndex()) 53 | return false 54 | } 55 | 56 | if lastIncludedIndex > rf.lastLogIndex() { 57 | rf.log = make([]Entry, 1) 58 | } else { 59 | // in range, ignore out of range error 60 | idx, _ := rf.transfer(lastIncludedIndex) 61 | rf.log = rf.log[idx:] 62 | } 63 | // dummy node 64 | rf.log[0].Term = lastIncludedTerm 65 | rf.log[0].Index = lastIncludedIndex 66 | rf.log[0].Cmd = nil 67 | 68 | rf.persistSnapshot(snapshot) 69 | 70 | // reset commit 71 | if lastIncludedIndex > rf.lastApplied { 72 | rf.lastApplied = lastIncludedIndex 73 | } 74 | if lastIncludedIndex > rf.commitIndex { 75 | rf.commitIndex = lastIncludedIndex 76 | } 77 | 78 | // utils.Debug(utils.DSnap, "S%d after CondInstallSnapshot(lastApplied: %d commitIndex: %d) {%+v}", rf.me, rf.lastApplied, rf.commitIndex, rf.log) 79 | 80 | return true 81 | } 82 | 83 | // the service says it has created a snapshot that has 84 | // all info up to and including index. this means the 85 | // service no longer needs the log through (and including) 86 | // that index. Raft should now trim its log as much as possible. 87 | func (rf *Raft) Snapshot(index int, snapshot []byte) { 88 | // Your code here (2D). 89 | rf.mu.Lock() 90 | defer rf.mu.Unlock() 91 | 92 | utils.Debug(utils.DSnap, "S%d call Snapshot, index: %d", rf.me, index) 93 | 94 | // refuse to install a snapshot 95 | if rf.frontLogIndex() >= index { 96 | utils.Debug(utils.DSnap, "S%d refuse, have received %d snapshot", rf.me, index) 97 | return 98 | } 99 | 100 | idx, err := rf.transfer(index) 101 | if err < 0 { 102 | idx = len(rf.log) - 1 103 | } 104 | //before := len(rf.log) 105 | // let last snapshot node as dummy node 106 | rf.log = rf.log[idx:] 107 | rf.log[0].Cmd = nil // dummy node 108 | rf.persistSnapshot(snapshot) 109 | //fmt.Printf("S%d idx: %d log len before: %d after: %d\n", rf.me, idx, before, len(rf.log)) 110 | // utils.Debug(utils.DSnap, "S%d call Snapshot success, index: %d {%+v}", rf.me, index, rf.log) 111 | } 112 | -------------------------------------------------------------------------------- /src/raft/rpc.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import "6.824/utils" 4 | 5 | // 6 | // example RequestVote RPC arguments structure. 7 | // field names must start with capital letters! 8 | // 9 | type RequestVoteArgs struct { 10 | // Your data here (2A, 2B). 11 | Term int 12 | CandidateId int 13 | LastLogIndex int 14 | LastLogTerm int 15 | } 16 | 17 | type RequestVoteReply struct { 18 | // Your data here (2A). 19 | Term int 20 | VoteGranted bool 21 | } 22 | 23 | type AppendEntriesArgs struct { 24 | Term int 25 | LeaderId int 26 | PrevLogIndex int 27 | PrevLogTerm int 28 | Entries []Entry 29 | LeaderCommit int 30 | } 31 | 32 | type AppendEntriesReply struct { 33 | Term int 34 | Success bool 35 | XTerm int // for fast backup 36 | XIndex int 37 | XLen int 38 | } 39 | 40 | type InstallSnapshotArgs struct { 41 | // Your data here (2A, 2B). 42 | Term int 43 | LeaderId int 44 | LastIncludedIndex int 45 | LastIncludedTerm int 46 | Data []byte 47 | } 48 | 49 | type InstallSnapshotReply struct { 50 | Term int 51 | } 52 | 53 | // 54 | // example code to send a RequestVote RPC to a server. 55 | // server is the index of the target server in rf.peers[]. 56 | // expects RPC arguments in args. 57 | // fills in *reply with RPC reply, so caller should 58 | // pass &reply. 59 | // the types of the args and reply passed to Call() must be 60 | // the same as the types of the arguments declared in the 61 | // handler function (including whether they are pointers). 62 | // 63 | // The labrpc package simulates a lossy network, in which servers 64 | // may be unreachable, and in which requests and replies may be lost. 65 | // Call() sends a request and waits for a reply. If a reply arrives 66 | // within a timeout interval, Call() returns true; otherwise 67 | // Call() returns false. Thus Call() may not return for a while. 68 | // A false return can be caused by a dead server, a live server that 69 | // can't be reached, a lost request, or a lost reply. 70 | // 71 | // Call() is guaranteed to return (perhaps after a delay) *except* if the 72 | // handler function on the server side does not return. Thus there 73 | // is no need to implement your own timeouts around Call(). 74 | // 75 | // look at the comments in ../labrpc/labrpc.go for more details. 76 | // 77 | // if you're having trouble getting RPC to work, check that you've 78 | // capitalized all field names in structs passed over RPC, and 79 | // that the caller passes the address of the reply struct with &, not 80 | // the struct itself. 81 | // 82 | func (rf *Raft) sendRequestVote(server int, args *RequestVoteArgs, reply *RequestVoteReply) bool { 83 | utils.Debug(utils.DInfo, "S%d send RequestVote request to %d {%+v}", rf.me, server, args) 84 | ok := rf.peers[server].Call("Raft.RequestVote", args, reply) 85 | if !ok { 86 | utils.Debug(utils.DWarn, "S%d call (RequestVote)rpc to C%d error", rf.me, server) 87 | return ok 88 | } 89 | utils.Debug(utils.DInfo, "S%d get RequestVote response from %d {%+v}", rf.me, server, reply) 90 | return ok 91 | } 92 | 93 | func (rf *Raft) sendAppendEntries(server int, args *AppendEntriesArgs, reply *AppendEntriesReply) bool { 94 | utils.Debug(utils.DInfo, "S%d send AppendEntries request to %d {%+v}", rf.me, server, args) 95 | ok := rf.peers[server].Call("Raft.AppendEntries", args, reply) 96 | if !ok { 97 | utils.Debug(utils.DWarn, "S%d call (AppendEntries)rpc to C%d error", rf.me, server) 98 | return ok 99 | } 100 | utils.Debug(utils.DInfo, "S%d get AppendEntries response from %d {%+v}", rf.me, server, reply) 101 | return ok 102 | } 103 | 104 | func (rf *Raft) sendInstallSnapshot(server int, args *InstallSnapshotArgs, reply *InstallSnapshotReply) bool { 105 | utils.Debug(utils.DInfo, "S%d send InstallSnapshot request to %d {%+v}", rf.me, server, args) 106 | ok := rf.peers[server].Call("Raft.InstallSnapshot", args, reply) 107 | if !ok { 108 | utils.Debug(utils.DWarn, "S%d call (InstallSnapshot)rpc to C%d error", rf.me, server) 109 | return ok 110 | } 111 | utils.Debug(utils.DInfo, "S%d get InstallSnapshot response from %d {%+v}", rf.me, server, reply) 112 | return ok 113 | } 114 | -------------------------------------------------------------------------------- /src/labgob/labgob.go: -------------------------------------------------------------------------------- 1 | package labgob 2 | 3 | // 4 | // trying to send non-capitalized fields over RPC produces a range of 5 | // misbehavior, including both mysterious incorrect computation and 6 | // outright crashes. so this wrapper around Go's encoding/gob warns 7 | // about non-capitalized field names. 8 | // 9 | 10 | import "encoding/gob" 11 | import "io" 12 | import "reflect" 13 | import "fmt" 14 | import "sync" 15 | import "unicode" 16 | import "unicode/utf8" 17 | 18 | var mu sync.Mutex 19 | var errorCount int // for TestCapital 20 | var checked map[reflect.Type]bool 21 | 22 | type LabEncoder struct { 23 | gob *gob.Encoder 24 | } 25 | 26 | func NewEncoder(w io.Writer) *LabEncoder { 27 | enc := &LabEncoder{} 28 | enc.gob = gob.NewEncoder(w) 29 | return enc 30 | } 31 | 32 | func (enc *LabEncoder) Encode(e interface{}) error { 33 | checkValue(e) 34 | return enc.gob.Encode(e) 35 | } 36 | 37 | func (enc *LabEncoder) EncodeValue(value reflect.Value) error { 38 | checkValue(value.Interface()) 39 | return enc.gob.EncodeValue(value) 40 | } 41 | 42 | type LabDecoder struct { 43 | gob *gob.Decoder 44 | } 45 | 46 | func NewDecoder(r io.Reader) *LabDecoder { 47 | dec := &LabDecoder{} 48 | dec.gob = gob.NewDecoder(r) 49 | return dec 50 | } 51 | 52 | func (dec *LabDecoder) Decode(e interface{}) error { 53 | checkValue(e) 54 | checkDefault(e) 55 | return dec.gob.Decode(e) 56 | } 57 | 58 | func Register(value interface{}) { 59 | checkValue(value) 60 | gob.Register(value) 61 | } 62 | 63 | func RegisterName(name string, value interface{}) { 64 | checkValue(value) 65 | gob.RegisterName(name, value) 66 | } 67 | 68 | func checkValue(value interface{}) { 69 | checkType(reflect.TypeOf(value)) 70 | } 71 | 72 | func checkType(t reflect.Type) { 73 | k := t.Kind() 74 | 75 | mu.Lock() 76 | // only complain once, and avoid recursion. 77 | if checked == nil { 78 | checked = map[reflect.Type]bool{} 79 | } 80 | if checked[t] { 81 | mu.Unlock() 82 | return 83 | } 84 | checked[t] = true 85 | mu.Unlock() 86 | 87 | switch k { 88 | case reflect.Struct: 89 | for i := 0; i < t.NumField(); i++ { 90 | f := t.Field(i) 91 | rune, _ := utf8.DecodeRuneInString(f.Name) 92 | if unicode.IsUpper(rune) == false { 93 | // ta da 94 | fmt.Printf("labgob error: lower-case field %v of %v in RPC or persist/snapshot will break your Raft\n", 95 | f.Name, t.Name()) 96 | mu.Lock() 97 | errorCount += 1 98 | mu.Unlock() 99 | } 100 | checkType(f.Type) 101 | } 102 | return 103 | case reflect.Slice, reflect.Array, reflect.Ptr: 104 | checkType(t.Elem()) 105 | return 106 | case reflect.Map: 107 | checkType(t.Elem()) 108 | checkType(t.Key()) 109 | return 110 | default: 111 | return 112 | } 113 | } 114 | 115 | // 116 | // warn if the value contains non-default values, 117 | // as it would if one sent an RPC but the reply 118 | // struct was already modified. if the RPC reply 119 | // contains default values, GOB won't overwrite 120 | // the non-default value. 121 | // 122 | func checkDefault(value interface{}) { 123 | if value == nil { 124 | return 125 | } 126 | checkDefault1(reflect.ValueOf(value), 1, "") 127 | } 128 | 129 | func checkDefault1(value reflect.Value, depth int, name string) { 130 | if depth > 3 { 131 | return 132 | } 133 | 134 | t := value.Type() 135 | k := t.Kind() 136 | 137 | switch k { 138 | case reflect.Struct: 139 | for i := 0; i < t.NumField(); i++ { 140 | vv := value.Field(i) 141 | name1 := t.Field(i).Name 142 | if name != "" { 143 | name1 = name + "." + name1 144 | } 145 | checkDefault1(vv, depth+1, name1) 146 | } 147 | return 148 | case reflect.Ptr: 149 | if value.IsNil() { 150 | return 151 | } 152 | checkDefault1(value.Elem(), depth+1, name) 153 | return 154 | case reflect.Bool, 155 | reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, 156 | reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, 157 | reflect.Uintptr, reflect.Float32, reflect.Float64, 158 | reflect.String: 159 | if reflect.DeepEqual(reflect.Zero(t).Interface(), value.Interface()) == false { 160 | mu.Lock() 161 | if errorCount < 1 { 162 | what := name 163 | if what == "" { 164 | what = t.Name() 165 | } 166 | // this warning typically arises if code re-uses the same RPC reply 167 | // variable for multiple RPC calls, or if code restores persisted 168 | // state into variable that already have non-default values. 169 | fmt.Printf("labgob warning: Decoding into a non-default variable/field %v may not work\n", 170 | what) 171 | } 172 | errorCount += 1 173 | mu.Unlock() 174 | } 175 | return 176 | } 177 | } 178 | -------------------------------------------------------------------------------- /src/raft/raft.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | // 4 | // this is an outline of the API that raft must expose to 5 | // the service (or tester). see comments below for 6 | // each of these functions for more details. 7 | // 8 | // rf = Make(...) 9 | // create a new Raft server. 10 | // rf.Start(command interface{}) (index, term, isleader) 11 | // start agreement on a new log entry 12 | // rf.GetState() (term, isLeader) 13 | // ask a Raft for its current term, and whether it thinks it is leader 14 | // ApplyMsg 15 | // each time a new entry is committed to the log, each Raft peer 16 | // should send an ApplyMsg to the service (or tester) 17 | // in the same server. 18 | // 19 | 20 | import ( 21 | "sync" 22 | "sync/atomic" 23 | "time" 24 | 25 | "6.824/labrpc" 26 | "6.824/utils" 27 | ) 28 | 29 | // 30 | // A Go object implementing a single Raft peer. 31 | // 32 | type Raft struct { 33 | mu sync.Mutex // Lock to protect shared access to this peer's state 34 | applyCond *sync.Cond // haven't used now, it seem can be used for apply 35 | peers []*labrpc.ClientEnd // RPC end points of all peers 36 | persister *Persister // Object to hold this peer's persisted state 37 | me int // this peer's index into peers[] 38 | dead int32 // set by Kill() 39 | status ServerStatus 40 | applyCh chan ApplyMsg 41 | 42 | // Your data here (2A, 2B, 2C). 43 | // Look at the paper's Figure 2 for a description of what 44 | // state a Raft server must maintain. 45 | 46 | // persistent for all servers 47 | currentTerm int 48 | votedFor int 49 | log []Entry 50 | 51 | // volatile for all servers 52 | commitIndex int 53 | lastApplied int 54 | 55 | // volatile for leaders 56 | nextIndex []int 57 | matchIndex []int 58 | 59 | // private 60 | electionTime time.Time 61 | heartbeatTime time.Time 62 | } 63 | 64 | // 65 | // the tester doesn't halt goroutines created by Raft after each test, 66 | // but it does call the Kill() method. your code can use killed() to 67 | // check whether Kill() has been called. the use of atomic avoids the 68 | // need for a lock. 69 | // 70 | // the issue is that long-running goroutines use memory and may chew 71 | // up CPU time, perhaps causing later tests to fail and generating 72 | // confusing debug output. any goroutine with a long-running loop 73 | // should call killed() to check whether it should stop. 74 | // 75 | func (rf *Raft) Kill() { 76 | atomic.StoreInt32(&rf.dead, 1) 77 | // Your code here, if desired. 78 | } 79 | 80 | func (rf *Raft) killed() bool { 81 | z := atomic.LoadInt32(&rf.dead) 82 | return z == 1 83 | } 84 | 85 | func (rf *Raft) leaderInit() { 86 | rf.nextIndex = make([]int, len(rf.peers)) 87 | rf.matchIndex = make([]int, len(rf.peers)) 88 | 89 | for i := range rf.nextIndex { 90 | rf.nextIndex[i] = rf.lastLogIndex() + 1 91 | rf.matchIndex[i] = 0 92 | } 93 | 94 | rf.resetHeartbeatTime() 95 | } 96 | 97 | func (rf *Raft) init() { 98 | rf.status = follower 99 | rf.applyCond = sync.NewCond(&rf.mu) 100 | // persistent for all servers 101 | rf.currentTerm = 0 102 | rf.votedFor = voted_nil // means that vote for nobody 103 | rf.log = make([]Entry, 0) 104 | // use first log entry as last snapshot index 105 | // also it's dummy node!! 106 | rf.log = append(rf.log, Entry{magic_index, magic_term, nil}) 107 | // volatile for all servers, will be changed in persister read 108 | rf.commitIndex = 0 109 | rf.lastApplied = 0 110 | // private 111 | // begin with follower, set election time 112 | rf.resetElectionTime() 113 | } 114 | 115 | // 116 | // the service or tester wants to create a Raft server. the ports 117 | // of all the Raft servers (including this one) are in peers[]. this 118 | // server's port is peers[me]. all the servers' peers[] arrays 119 | // have the same order. persister is a place for this server to 120 | // save its persistent state, and also initially holds the most 121 | // recent saved state, if any. applyCh is a channel on which the 122 | // tester or service expects Raft to send ApplyMsg messages. 123 | // Make() must return quickly, so it should start goroutines 124 | // for any long-running work. 125 | // 126 | func Make(peers []*labrpc.ClientEnd, me int, 127 | persister *Persister, applyCh chan ApplyMsg) *Raft { 128 | rf := &Raft{ 129 | peers: peers, 130 | persister: persister, 131 | me: me, 132 | applyCh: applyCh, 133 | } 134 | 135 | // Your initialization code here (2A, 2B, 2C). 136 | rf.init() 137 | 138 | utils.Debug(utils.DClient, "S%d Started && init success", rf.me) 139 | 140 | // initialize from state persisted before a crash 141 | rf.readPersist(persister.ReadRaftState()) 142 | 143 | // start ticker goroutine to start elections 144 | go rf.ticker() 145 | go rf.applyLog() 146 | 147 | return rf 148 | } 149 | -------------------------------------------------------------------------------- /src/kvraft/server.go: -------------------------------------------------------------------------------- 1 | package kvraft 2 | 3 | import ( 4 | "sync" 5 | "sync/atomic" 6 | "time" 7 | 8 | "6.824/labgob" 9 | "6.824/labrpc" 10 | "6.824/raft" 11 | "6.824/utils" 12 | ) 13 | 14 | type KVServer struct { 15 | mu sync.Mutex 16 | me int 17 | rf *raft.Raft 18 | applyCh chan raft.ApplyMsg 19 | dead int32 // set by Kill() 20 | 21 | maxraftstate int // snapshot if log grows this big 22 | 23 | // Your definitions here. 24 | KvMap *KV 25 | cmdRespChans map[IndexAndTerm]chan OpResp 26 | LastCmdContext map[int64]OpContext 27 | lastApplied int 28 | lastSnapshot int 29 | } 30 | 31 | // 32 | // the tester calls Kill() when a KVServer instance won't 33 | // be needed again. for your convenience, we supply 34 | // code to set rf.dead (without needing a lock), 35 | // and a killed() method to test rf.dead in 36 | // long-running loops. you can also add your own 37 | // code to Kill(). you're not required to do anything 38 | // about this, but it may be convenient (for example) 39 | // to suppress debug output from a Kill()ed instance. 40 | // 41 | func (kv *KVServer) Kill() { 42 | atomic.StoreInt32(&kv.dead, 1) 43 | // Your code here, if desired. 44 | kv.mu.Lock() 45 | defer kv.mu.Unlock() 46 | //fmt.Printf("---kill\n") 47 | kv.doSnapshot(kv.lastApplied) 48 | kv.rf.Kill() 49 | } 50 | 51 | func (kv *KVServer) killed() bool { 52 | z := atomic.LoadInt32(&kv.dead) 53 | return z == 1 54 | } 55 | 56 | // 57 | // servers[] contains the ports of the set of 58 | // servers that will cooperate via Raft to 59 | // form the fault-tolerant key/value service. 60 | // me is the index of the current server in servers[]. 61 | // the k/v server should store snapshots through the underlying Raft 62 | // implementation, which should call persister.SaveStateAndSnapshot() to 63 | // atomically save the Raft state along with the snapshot. 64 | // the k/v server should snapshot when Raft's saved state exceeds maxraftstate bytes, 65 | // in order to allow Raft to garbage-collect its log. if maxraftstate is -1, 66 | // you don't need to snapshot. 67 | // StartKVServer() must return quickly, so it should start goroutines 68 | // for any long-running work. 69 | // 70 | func StartKVServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int) *KVServer { 71 | // call labgob.Register on structures you want 72 | // Go's RPC library to marshall/unmarshall. 73 | labgob.Register(Op{}) 74 | 75 | kv := new(KVServer) 76 | kv.me = me 77 | kv.maxraftstate = maxraftstate 78 | 79 | // You may need initialization code here. 80 | 81 | kv.applyCh = make(chan raft.ApplyMsg, 5) 82 | kv.rf = raft.Make(servers, me, persister, kv.applyCh) 83 | 84 | // You may need initialization code here. 85 | kv.KvMap = NewKV() 86 | kv.cmdRespChans = make(map[IndexAndTerm]chan OpResp) 87 | kv.LastCmdContext = make(map[int64]OpContext) 88 | kv.lastApplied = 0 89 | kv.lastSnapshot = 0 90 | 91 | // load data from persister 92 | kv.setSnapshot(persister.ReadSnapshot()) 93 | 94 | // long-time goroutines 95 | go kv.applier() 96 | go kv.snapshoter() 97 | 98 | return kv 99 | } 100 | 101 | // Handler 102 | func (kv *KVServer) Command(args *CmdArgs, reply *CmdReply) { 103 | defer utils.Debug(utils.DWarn, "S%d args: %+v reply: %+v", kv.me, args, reply) 104 | 105 | kv.mu.Lock() 106 | if args.OpType != OpGet && kv.isDuplicate(args.ClientId, args.SeqId) { 107 | context := kv.LastCmdContext[args.ClientId] 108 | reply.Value, reply.Err = context.Reply.Value, context.Reply.Err 109 | kv.mu.Unlock() 110 | return 111 | } 112 | kv.mu.Unlock() 113 | 114 | cmd := Op{ 115 | ClientId: args.ClientId, 116 | SeqId: args.SeqId, 117 | OpType: args.OpType, 118 | Key: args.Key, 119 | Value: args.Value, 120 | } 121 | index, term, is_leader := kv.rf.Start(cmd) 122 | if !is_leader { 123 | reply.Value, reply.Err = "", ErrWrongLeader 124 | return 125 | } 126 | 127 | kv.mu.Lock() 128 | it := IndexAndTerm{index, term} 129 | ch := make(chan OpResp, 1) 130 | kv.cmdRespChans[it] = ch 131 | kv.mu.Unlock() 132 | 133 | defer func() { 134 | kv.mu.Lock() 135 | // close(kv.cmdRespChans[index]) 136 | delete(kv.cmdRespChans, it) 137 | kv.mu.Unlock() 138 | close(ch) 139 | }() 140 | 141 | t := time.NewTimer(cmd_timeout) 142 | defer t.Stop() 143 | 144 | for { 145 | kv.mu.Lock() 146 | select { 147 | case resp := <-ch: 148 | utils.Debug(utils.DServer, "S%d have applied, resp: %+v", kv.me, resp) 149 | reply.Value, reply.Err = resp.Value, resp.Err 150 | kv.mu.Unlock() 151 | return 152 | case <-t.C: 153 | priority: 154 | for { 155 | select { 156 | case resp := <-ch: 157 | utils.Debug(utils.DServer, "S%d have applied, resp: %+v", kv.me, resp) 158 | reply.Value, reply.Err = resp.Value, resp.Err 159 | kv.mu.Unlock() 160 | return 161 | default: 162 | break priority 163 | } 164 | } 165 | utils.Debug(utils.DServer, "S%d timeout", kv.me) 166 | reply.Value, reply.Err = "", ErrTimeout 167 | kv.mu.Unlock() 168 | return 169 | default: 170 | kv.mu.Unlock() 171 | time.Sleep(gap_time) 172 | } 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /src/mr/coordinator.go: -------------------------------------------------------------------------------- 1 | package mr 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "net" 7 | "net/http" 8 | "net/rpc" 9 | "os" 10 | "sync" 11 | "time" 12 | ) 13 | 14 | type TaskStatus int 15 | 16 | const ( 17 | idle TaskStatus = iota 18 | in_progress 19 | completed 20 | ) 21 | 22 | type Task struct { 23 | tno int 24 | filenames []string 25 | status TaskStatus 26 | startTime time.Time 27 | } 28 | 29 | type CoordinatorStatus int 30 | 31 | const ( 32 | MAP_PHASE CoordinatorStatus = iota 33 | REDUCE_PHASE 34 | FINISH_PHASE 35 | ) 36 | 37 | type Coordinator struct { 38 | // Your definitions here. 39 | tasks []Task 40 | nReduce int 41 | nMap int 42 | status CoordinatorStatus 43 | mu sync.Mutex 44 | } 45 | 46 | // Your code here -- RPC handlers for the worker to call. 47 | 48 | // 49 | // an example RPC handler. 50 | // 51 | // the RPC argument and reply types are defined in rpc.go. 52 | // 53 | func (c *Coordinator) GetTask(args *GetTaskArgs, reply *GetTaskReply) error { 54 | c.mu.Lock() 55 | defer c.mu.Unlock() 56 | 57 | finish_flag := c.IsAllFinish() 58 | if finish_flag { 59 | c.NextPhase() 60 | } 61 | for i := 0; i < len(c.tasks); i++ { 62 | if c.tasks[i].status == idle { 63 | log.Printf("send task %d to worker\n", i) 64 | reply.Err = SuccessCode 65 | reply.Task_no = i 66 | reply.Filenames = c.tasks[i].filenames 67 | if c.status == MAP_PHASE { 68 | reply.Type = MAP 69 | reply.NReduce = c.nReduce 70 | } else if c.status == REDUCE_PHASE { 71 | reply.NReduce = 0 72 | reply.Type = REDUCE 73 | } else { 74 | log.Fatal("unexpected status") 75 | } 76 | c.tasks[i].startTime = time.Now() 77 | c.tasks[i].status = in_progress 78 | return nil 79 | } else if c.tasks[i].status == in_progress { 80 | curr := time.Now() 81 | if curr.Sub(c.tasks[i].startTime) > time.Second*10 { 82 | log.Printf("resend task %d to worker\n", i) 83 | reply.Err = SuccessCode 84 | reply.Task_no = i 85 | reply.Filenames = c.tasks[i].filenames 86 | if c.status == MAP_PHASE { 87 | reply.Type = MAP 88 | reply.NReduce = c.nReduce 89 | } else if c.status == REDUCE_PHASE { 90 | reply.NReduce = 0 91 | reply.Type = REDUCE 92 | } else { 93 | log.Fatal("unexpected status") 94 | } 95 | c.tasks[i].startTime = time.Now() 96 | return nil 97 | } 98 | } 99 | } 100 | reply.Err = SuccessCode 101 | reply.Type = WAIT 102 | return nil 103 | } 104 | 105 | func (c *Coordinator) FinishTask(args *FinishTaskArgs, reply *FinishTaskReply) error { 106 | c.mu.Lock() 107 | defer c.mu.Unlock() 108 | if args.Task_no >= len(c.tasks) || args.Task_no < 0 { 109 | reply.Err = ParaErrCode 110 | return nil 111 | } 112 | c.tasks[args.Task_no].status = completed 113 | if c.IsAllFinish() { 114 | c.NextPhase() 115 | } 116 | return nil 117 | } 118 | 119 | // 120 | // start a thread that listens for RPCs from worker.go 121 | // 122 | func (c *Coordinator) server() { 123 | rpc.Register(c) 124 | rpc.HandleHTTP() 125 | //l, e := net.Listen("tcp", ":1234") 126 | sockname := coordinatorSock() 127 | os.Remove(sockname) 128 | l, e := net.Listen("unix", sockname) 129 | if e != nil { 130 | log.Fatal("listen error:", e) 131 | } 132 | go http.Serve(l, nil) 133 | } 134 | 135 | // coordinator init code 136 | func (c *Coordinator) Init(files []string, nReduce int) { 137 | c.mu.Lock() 138 | defer c.mu.Unlock() 139 | log.Println("init coordinator") 140 | 141 | // make map tasks 142 | log.Println("make map tasks") 143 | tasks := make([]Task, len(files)) 144 | for i, file := range files { 145 | tasks[i].tno = i 146 | tasks[i].filenames = []string{file} 147 | tasks[i].status = idle 148 | } 149 | 150 | // init coordinator 151 | c.tasks = tasks 152 | c.nReduce = nReduce 153 | c.nMap = len(files) 154 | c.status = MAP_PHASE 155 | } 156 | 157 | func (c *Coordinator) MakeReduceTasks() { 158 | // make reduce tasks 159 | log.Println("make reduce tasks") 160 | tasks := make([]Task, c.nReduce) 161 | for i := 0; i < c.nReduce; i++ { 162 | tasks[i].tno = i 163 | files := make([]string, c.nMap) 164 | for j := 0; j < c.nMap; j++ { 165 | filename := fmt.Sprintf("mr-%d-%d", j, i) 166 | files[j] = filename 167 | } 168 | tasks[i].filenames = files 169 | tasks[i].status = idle 170 | } 171 | c.tasks = tasks 172 | } 173 | 174 | func (c *Coordinator) IsAllFinish() bool { 175 | for i := len(c.tasks) - 1; i >= 0; i-- { 176 | if c.tasks[i].status != completed { 177 | return false 178 | } 179 | } 180 | return true 181 | } 182 | 183 | func (c *Coordinator) NextPhase() { 184 | if c.status == MAP_PHASE { 185 | log.Println("change to REDUCE_PHASE") 186 | c.MakeReduceTasks() 187 | c.status = REDUCE_PHASE 188 | } else if c.status == REDUCE_PHASE { 189 | log.Println("change to FINISH_PHASE") 190 | c.status = FINISH_PHASE 191 | } else { 192 | log.Println("unexpected status change!") 193 | } 194 | } 195 | 196 | // 197 | // main/mrcoordinator.go calls Done() periodically to find out 198 | // if the entire job has finished. 199 | // 200 | func (c *Coordinator) Done() bool { 201 | c.mu.Lock() 202 | defer c.mu.Unlock() 203 | if c.status == FINISH_PHASE { 204 | return true 205 | } 206 | return false 207 | } 208 | 209 | // 210 | // create a Coordinator. 211 | // main/mrcoordinator.go calls this function. 212 | // nReduce is the number of reduce tasks to use. 213 | // 214 | func MakeCoordinator(files []string, nReduce int) *Coordinator { 215 | c := Coordinator{} 216 | 217 | // Your code here. 218 | c.Init(files, nReduce) 219 | 220 | c.server() 221 | return &c 222 | } 223 | -------------------------------------------------------------------------------- /src/mr/worker.go: -------------------------------------------------------------------------------- 1 | package mr 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "hash/fnv" 7 | "io/ioutil" 8 | "log" 9 | "net/rpc" 10 | "os" 11 | "sort" 12 | "time" 13 | ) 14 | 15 | // 16 | // Map functions return a slice of KeyValue. 17 | // 18 | type KeyValue struct { 19 | Key string 20 | Value string 21 | } 22 | 23 | // for sorting by key. 24 | type ByKey []KeyValue 25 | 26 | // for sorting by key. 27 | func (a ByKey) Len() int { return len(a) } 28 | func (a ByKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 29 | func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key } 30 | 31 | // 32 | // use ihash(key) % NReduce to choose the reduce 33 | // task number for each KeyValue emitted by Map. 34 | // 35 | func ihash(key string) int { 36 | h := fnv.New32a() 37 | h.Write([]byte(key)) 38 | return int(h.Sum32() & 0x7fffffff) 39 | } 40 | 41 | // 42 | // main/mrworker.go calls this function. 43 | // 44 | func Worker(mapf func(string, string) []KeyValue, 45 | reducef func(string, []string) string) { 46 | 47 | // Your worker implementation here. 48 | for { 49 | args := GetTaskArgs{} 50 | reply := GetTaskReply{} 51 | log.Printf("get task request: %v\n", args) 52 | ok := CallGetTask(&args, &reply) 53 | log.Printf("recv get task reply: %v\n", reply) 54 | if !ok || reply.Type == STOP { 55 | break 56 | } 57 | 58 | // handle map fynction 59 | switch reply.Type { 60 | case MAP: 61 | if len(reply.Filenames) < 1 { 62 | log.Fatalf("don't have filename") 63 | } 64 | DoMAP(reply.Filenames[0], reply.Task_no, reply.NReduce, mapf) 65 | // map complete, send msg to master 66 | finish_args := FinishTaskArgs{ 67 | Type: MAP, 68 | Task_no: reply.Task_no, 69 | } 70 | finish_reply := FinishTaskReply{} 71 | log.Printf("finish request: %v\n", finish_args) 72 | CallFinishTask(&finish_args, &finish_reply) 73 | log.Printf("recv finish reply: %v\n", finish_reply) 74 | // time.Sleep(time.Second) 75 | case REDUCE: 76 | if len(reply.Filenames) < 1 { 77 | log.Fatalf("don't have filenames") 78 | } 79 | DoReduce(reply.Filenames, reply.Task_no, reducef) 80 | // reduce complete, send msg to master 81 | finish_args := FinishTaskArgs{ 82 | Type: REDUCE, 83 | Task_no: reply.Task_no, 84 | } 85 | finish_reply := FinishTaskReply{} 86 | log.Printf("finish request: %v\n", finish_args) 87 | CallFinishTask(&finish_args, &finish_reply) 88 | log.Printf("recv finish reply: %v\n", finish_reply) 89 | // time.Sleep(time.Second) 90 | case WAIT: 91 | log.Printf("wait task\n") 92 | time.Sleep(time.Second) 93 | default: 94 | time.Sleep(time.Second) 95 | } 96 | } 97 | } 98 | 99 | func DoMAP(filename string, task_no int, nReduce int, mapf func(string, string) []KeyValue) { 100 | file, err := os.Open(filename) 101 | if err != nil { 102 | log.Fatalf("cannot open %v", filename) 103 | } 104 | content, err := ioutil.ReadAll(file) 105 | if err != nil { 106 | log.Fatalf("cannot read %v", filename) 107 | } 108 | file.Close() 109 | 110 | kva := mapf(filename, string(content)) 111 | 112 | sort.Sort(ByKey(kva)) 113 | 114 | log.Println("encode to json") 115 | files := make([]*os.File, nReduce) 116 | encoders := make([]*json.Encoder, nReduce) 117 | for i := 0; i < nReduce; i++ { 118 | ofile, err := ioutil.TempFile("", "mr-tmp*") 119 | if err != nil { 120 | log.Fatalf("cannot create temp file") 121 | } 122 | defer ofile.Close() 123 | 124 | encoder := json.NewEncoder(ofile) 125 | encoders[i] = encoder 126 | files[i] = ofile 127 | } 128 | 129 | var index int 130 | for _, kv := range kva { 131 | index = ihash(kv.Key) % nReduce 132 | err = encoders[index].Encode(&kv) 133 | if err != nil { 134 | log.Fatalf("cannot encode %v", kv) 135 | } 136 | } 137 | 138 | // atomically rename 139 | for i := 0; i < nReduce; i++ { 140 | filename_tmp := fmt.Sprintf("mr-%d-%d", task_no, i) 141 | err := os.Rename(files[i].Name(), filename_tmp) 142 | if err != nil { 143 | log.Fatalf("cannot rename %v to %v", files[i].Name(), filename_tmp) 144 | } 145 | } 146 | } 147 | 148 | func DoReduce(filenames []string, task_no int, reducef func(string, []string) string) { 149 | // read data from mid-file 150 | kva := make([]KeyValue, 0) 151 | for _, filename := range filenames { 152 | file, err := os.Open(filename) 153 | if err != nil { 154 | log.Fatalf("cannot open %v", filename) 155 | } 156 | defer file.Close() 157 | dec := json.NewDecoder(file) 158 | for { 159 | var kv KeyValue 160 | if err := dec.Decode(&kv); err != nil { 161 | break 162 | } 163 | kva = append(kva, kv) 164 | } 165 | } 166 | 167 | sort.Sort(ByKey(kva)) 168 | 169 | // call Reduce on each distinct key in kva[], 170 | // and print the result to mr-out-0. 171 | ofile, err := ioutil.TempFile("", "mr-out-tmp*") 172 | if err != nil { 173 | log.Fatalf("cannot create temp file") 174 | } 175 | defer ofile.Close() 176 | 177 | i := 0 178 | for i < len(kva) { 179 | j := i + 1 180 | for j < len(kva) && kva[j].Key == kva[i].Key { 181 | j++ 182 | } 183 | values := []string{} 184 | for k := i; k < j; k++ { 185 | values = append(values, kva[k].Value) 186 | } 187 | output := reducef(kva[i].Key, values) 188 | 189 | // this is the correct format for each line of Reduce output. 190 | fmt.Fprintf(ofile, "%v %v\n", kva[i].Key, output) 191 | 192 | i = j 193 | } 194 | 195 | output_filename := fmt.Sprintf("mr-out-%d", task_no) 196 | err = os.Rename(ofile.Name(), output_filename) 197 | if err != nil { 198 | log.Fatalf("cannot rename %v to %v", ofile.Name(), output_filename) 199 | } 200 | } 201 | 202 | // rpc interface 203 | func CallGetTask(args *GetTaskArgs, reply *GetTaskReply) bool { 204 | // send the RPC request, wait for the reply. 205 | return call("Coordinator.GetTask", args, reply) 206 | } 207 | 208 | func CallFinishTask(args *FinishTaskArgs, reply *FinishTaskReply) bool { 209 | return call("Coordinator.FinishTask", args, reply) 210 | } 211 | 212 | // 213 | // send an RPC request to the coordinator, wait for the response. 214 | // usually returns true. 215 | // returns false if something goes wrong. 216 | // 217 | func call(rpcname string, args interface{}, reply interface{}) bool { 218 | // c, err := rpc.DialHTTP("tcp", "127.0.0.1"+":1234") 219 | sockname := coordinatorSock() 220 | c, err := rpc.DialHTTP("unix", sockname) 221 | if err != nil { 222 | log.Println("dialing: ", err) 223 | return false 224 | } 225 | defer c.Close() 226 | 227 | err = c.Call(rpcname, args, reply) 228 | if err == nil { 229 | return true 230 | } 231 | 232 | fmt.Println(err) 233 | return false 234 | } 235 | -------------------------------------------------------------------------------- /src/shardctrler/configModel.go: -------------------------------------------------------------------------------- 1 | package shardctrler 2 | 3 | import ( 4 | "sort" 5 | ) 6 | 7 | // 8 | // Shard controler: assigns shards to replication groups. 9 | // 10 | // RPC interface: 11 | // Join(servers) -- add a set of groups (gid -> server-list mapping). 12 | // Leave(gids) -- delete a set of groups. 13 | // Move(shard, gid) -- hand off one shard from current owner to gid. 14 | // Query(num) -> fetch Config # num, or latest config if num==-1. 15 | // 16 | // A Config (configuration) describes a set of replica groups, and the 17 | // replica group responsible for each shard. Configs are numbered. Config 18 | // #0 is the initial configuration, with no groups and all shards 19 | // assigned to group 0 (the invalid group). 20 | // 21 | // You will need to add fields to the RPC argument structs. 22 | // 23 | 24 | // The number of shards. 25 | const NShards = 10 26 | 27 | // A configuration -- an assignment of shards to groups. 28 | // Please don't change this. 29 | type Config struct { 30 | Num int // config number 31 | Shards [NShards]int // shard -> gid 32 | Groups map[int][]string // gid -> servers[] 33 | } 34 | 35 | func (cfg *Config) DeepCopy() Config { 36 | ret := Config{ 37 | Num: cfg.Num, 38 | Shards: [NShards]int{}, 39 | Groups: make(map[int][]string), 40 | } 41 | 42 | for k, v := range cfg.Groups { 43 | ret.Groups[k] = v 44 | } 45 | for i := range cfg.Shards { 46 | ret.Shards[i] = cfg.Shards[i] 47 | } 48 | return ret 49 | } 50 | 51 | // --------------------------------------------------------------------- // 52 | 53 | const magicNullGid = 0 54 | 55 | type ConfigModel struct { 56 | configs []Config // indexed by config num 57 | me int // for debug 58 | } 59 | 60 | func NewConfigModel(me int) *ConfigModel { 61 | cfg := ConfigModel{make([]Config, 1), me} 62 | cfg.configs[0] = Config{ 63 | Num: 0, 64 | Shards: [NShards]int{}, 65 | Groups: make(map[int][]string), 66 | } 67 | for i := range cfg.configs[0].Shards { 68 | cfg.configs[0].Shards[i] = magicNullGid 69 | } 70 | return &cfg 71 | } 72 | 73 | func (cm *ConfigModel) getGroup2Shards(config *Config) map[int][]int { 74 | group2shard := map[int][]int{} 75 | for gid, _ := range config.Groups { 76 | group2shard[gid] = []int{} 77 | } 78 | group2shard[magicNullGid] = []int{} 79 | 80 | for shard, gid := range config.Shards { 81 | group2shard[gid] = append(group2shard[gid], shard) 82 | } 83 | return group2shard 84 | } 85 | 86 | func (cm *ConfigModel) getMinShards(group2shard map[int][]int) int { 87 | var keys []int 88 | for k := range group2shard { 89 | keys = append(keys, k) 90 | } 91 | sort.Ints(keys) 92 | 93 | gidRet, minn := -1, NShards+1 94 | for _, gid := range keys { 95 | if gid != magicNullGid && len(group2shard[gid]) < minn { 96 | gidRet, minn = gid, len(group2shard[gid]) 97 | } 98 | } 99 | if gidRet == -1 { 100 | return magicNullGid 101 | } 102 | return gidRet 103 | } 104 | 105 | func (cm *ConfigModel) getMaxShards(group2shard map[int][]int) int { 106 | if shards, ok := group2shard[magicNullGid]; ok && len(shards) > 0 { 107 | return magicNullGid 108 | } 109 | 110 | var keys []int 111 | for k := range group2shard { 112 | keys = append(keys, k) 113 | } 114 | sort.Ints(keys) 115 | 116 | gidRet, maxn := -1, -1 117 | for _, gid := range keys { 118 | if len(group2shard[gid]) > maxn { 119 | gidRet, maxn = gid, len(group2shard[gid]) 120 | } 121 | } 122 | return gidRet 123 | } 124 | 125 | func (cm *ConfigModel) reBalance(config *Config) { 126 | // special judge 127 | if len(config.Groups) == 0 { // if none group, init shards 128 | for i := range config.Shards { 129 | config.Shards[i] = 0 130 | } 131 | return 132 | } 133 | 134 | // 1 shard - 1 group, 1 group - n shards 135 | group2shard := cm.getGroup2Shards(config) 136 | for { 137 | src := cm.getMaxShards(group2shard) 138 | dst := cm.getMinShards(group2shard) 139 | if src != magicNullGid && len(group2shard[src])-len(group2shard[dst]) <= 1 { 140 | break 141 | } 142 | 143 | group2shard[dst] = append(group2shard[dst], group2shard[src][0]) 144 | group2shard[src] = group2shard[src][1:] 145 | } 146 | 147 | // reset shard 148 | for gid, shards := range group2shard { 149 | for _, shard := range shards { 150 | config.Shards[shard] = gid 151 | } 152 | } 153 | } 154 | 155 | func (cm *ConfigModel) join(servers map[int][]string) Err { 156 | newConfig := cm.configs[len(cm.configs)-1].DeepCopy() 157 | newConfig.Num = len(cm.configs) 158 | 159 | for gid, servers_iter := range servers { 160 | newServers := make([]string, len(servers_iter)) 161 | copy(newServers, servers_iter) 162 | if _, ok := newConfig.Groups[gid]; !ok { 163 | newConfig.Groups[gid] = newServers 164 | } else { 165 | newConfig.Groups[gid] = append(newConfig.Groups[gid], newServers...) 166 | } 167 | } 168 | 169 | cm.reBalance(&newConfig) 170 | cm.configs = append(cm.configs, newConfig) 171 | return OK 172 | } 173 | 174 | func (cm *ConfigModel) leave(GIDs []int) Err { 175 | newConfig := cm.configs[len(cm.configs)-1].DeepCopy() 176 | newConfig.Num = len(cm.configs) 177 | 178 | group2shard := cm.getGroup2Shards(&newConfig) 179 | for _, gid := range GIDs { 180 | if _, ok := newConfig.Groups[gid]; ok { 181 | delete(newConfig.Groups, gid) 182 | } 183 | if shards, ok := group2shard[gid]; ok { 184 | for _, shard := range shards { 185 | newConfig.Shards[shard] = magicNullGid 186 | } 187 | } 188 | } 189 | 190 | cm.reBalance(&newConfig) 191 | cm.configs = append(cm.configs, newConfig) 192 | return OK 193 | } 194 | 195 | func (cm *ConfigModel) move(shard int, gid int) Err { 196 | newConfig := cm.configs[len(cm.configs)-1].DeepCopy() 197 | newConfig.Num = len(cm.configs) 198 | newConfig.Shards[shard] = gid 199 | cm.configs = append(cm.configs, newConfig) 200 | return OK 201 | } 202 | 203 | func (cm *ConfigModel) query(num int) (Config, Err) { 204 | if num < 0 || num >= len(cm.configs) { 205 | return cm.configs[len(cm.configs)-1].DeepCopy(), OK 206 | } 207 | return cm.configs[num].DeepCopy(), OK 208 | } 209 | 210 | func (cm *ConfigModel) isLegal(opType OpType) bool { 211 | switch opType { 212 | case OpJoin: 213 | case OpLeave: 214 | case OpMove: 215 | case OpQuery: 216 | default: 217 | return false 218 | } 219 | return true 220 | } 221 | 222 | func (cm *ConfigModel) Opt(cmd Op) (Config, Err) { 223 | switch cmd.Op { 224 | case OpJoin: 225 | err := cm.join(cmd.Servers) 226 | return Config{}, err 227 | case OpLeave: 228 | err := cm.leave(cmd.GIDs) 229 | return Config{}, err 230 | case OpMove: 231 | err := cm.move(cmd.Shard, cmd.GID) 232 | return Config{}, err 233 | case OpQuery: 234 | config, err := cm.query(cmd.Num) 235 | return config, err 236 | default: 237 | return Config{}, ErrOpt 238 | } 239 | } 240 | -------------------------------------------------------------------------------- /src/main/test-mr.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # basic map-reduce test 5 | # 6 | 7 | #RACE= 8 | 9 | # comment this to run the tests without the Go race detector. 10 | RACE=-race 11 | 12 | # run the test in a fresh sub-directory. 13 | rm -rf mr-tmp 14 | mkdir mr-tmp || exit 1 15 | cd mr-tmp || exit 1 16 | rm -f mr-* 17 | 18 | # make sure software is freshly built. 19 | (cd ../../mrapps && go build $RACE -buildmode=plugin wc.go) || exit 1 20 | (cd ../../mrapps && go build $RACE -buildmode=plugin indexer.go) || exit 1 21 | (cd ../../mrapps && go build $RACE -buildmode=plugin mtiming.go) || exit 1 22 | (cd ../../mrapps && go build $RACE -buildmode=plugin rtiming.go) || exit 1 23 | (cd ../../mrapps && go build $RACE -buildmode=plugin jobcount.go) || exit 1 24 | (cd ../../mrapps && go build $RACE -buildmode=plugin early_exit.go) || exit 1 25 | (cd ../../mrapps && go build $RACE -buildmode=plugin crash.go) || exit 1 26 | (cd ../../mrapps && go build $RACE -buildmode=plugin nocrash.go) || exit 1 27 | (cd .. && go build $RACE mrcoordinator.go) || exit 1 28 | (cd .. && go build $RACE mrworker.go) || exit 1 29 | (cd .. && go build $RACE mrsequential.go) || exit 1 30 | 31 | failed_any=0 32 | 33 | ######################################################### 34 | # first word-count 35 | 36 | # generate the correct output 37 | ../mrsequential ../../mrapps/wc.so ../pg*txt || exit 1 38 | sort mr-out-0 > mr-correct-wc.txt 39 | rm -f mr-out* 40 | 41 | echo '***' Starting wc test. 42 | 43 | timeout -k 2s 180s ../mrcoordinator ../pg*txt & 44 | pid=$! 45 | 46 | # give the coordinator time to create the sockets. 47 | sleep 1 48 | 49 | # start multiple workers. 50 | timeout -k 2s 180s ../mrworker ../../mrapps/wc.so & 51 | timeout -k 2s 180s ../mrworker ../../mrapps/wc.so & 52 | timeout -k 2s 180s ../mrworker ../../mrapps/wc.so & 53 | 54 | # wait for the coordinator to exit. 55 | wait $pid 56 | 57 | # since workers are required to exit when a job is completely finished, 58 | # and not before, that means the job has finished. 59 | sort mr-out* | grep . > mr-wc-all 60 | if cmp mr-wc-all mr-correct-wc.txt 61 | then 62 | echo '---' wc test: PASS 63 | else 64 | echo '---' wc output is not the same as mr-correct-wc.txt 65 | echo '---' wc test: FAIL 66 | failed_any=1 67 | fi 68 | 69 | # wait for remaining workers and coordinator to exit. 70 | wait 71 | 72 | ######################################################### 73 | # now indexer 74 | rm -f mr-* 75 | 76 | # generate the correct output 77 | ../mrsequential ../../mrapps/indexer.so ../pg*txt || exit 1 78 | sort mr-out-0 > mr-correct-indexer.txt 79 | rm -f mr-out* 80 | 81 | echo '***' Starting indexer test. 82 | 83 | timeout -k 2s 180s ../mrcoordinator ../pg*txt & 84 | sleep 1 85 | 86 | # start multiple workers 87 | timeout -k 2s 180s ../mrworker ../../mrapps/indexer.so & 88 | timeout -k 2s 180s ../mrworker ../../mrapps/indexer.so 89 | 90 | sort mr-out* | grep . > mr-indexer-all 91 | if cmp mr-indexer-all mr-correct-indexer.txt 92 | then 93 | echo '---' indexer test: PASS 94 | else 95 | echo '---' indexer output is not the same as mr-correct-indexer.txt 96 | echo '---' indexer test: FAIL 97 | failed_any=1 98 | fi 99 | 100 | wait 101 | 102 | ######################################################### 103 | echo '***' Starting map parallelism test. 104 | 105 | rm -f mr-* 106 | 107 | timeout -k 2s 180s ../mrcoordinator ../pg*txt & 108 | sleep 1 109 | 110 | timeout -k 2s 180s ../mrworker ../../mrapps/mtiming.so & 111 | timeout -k 2s 180s ../mrworker ../../mrapps/mtiming.so 112 | 113 | NT=`cat mr-out* | grep '^times-' | wc -l | sed 's/ //g'` 114 | if [ "$NT" != "2" ] 115 | then 116 | echo '---' saw "$NT" workers rather than 2 117 | echo '---' map parallelism test: FAIL 118 | failed_any=1 119 | fi 120 | 121 | if cat mr-out* | grep '^parallel.* 2' > /dev/null 122 | then 123 | echo '---' map parallelism test: PASS 124 | else 125 | echo '---' map workers did not run in parallel 126 | echo '---' map parallelism test: FAIL 127 | failed_any=1 128 | fi 129 | 130 | wait 131 | 132 | 133 | ######################################################### 134 | echo '***' Starting reduce parallelism test. 135 | 136 | rm -f mr-* 137 | 138 | timeout -k 2s 180s ../mrcoordinator ../pg*txt & 139 | sleep 1 140 | 141 | timeout -k 2s 180s ../mrworker ../../mrapps/rtiming.so & 142 | timeout -k 2s 180s ../mrworker ../../mrapps/rtiming.so 143 | 144 | NT=`cat mr-out* | grep '^[a-z] 2' | wc -l | sed 's/ //g'` 145 | if [ "$NT" -lt "2" ] 146 | then 147 | echo '---' too few parallel reduces. 148 | echo '---' reduce parallelism test: FAIL 149 | failed_any=1 150 | else 151 | echo '---' reduce parallelism test: PASS 152 | fi 153 | 154 | wait 155 | 156 | ######################################################### 157 | echo '***' Starting job count test. 158 | 159 | rm -f mr-* 160 | 161 | timeout -k 2s 180s ../mrcoordinator ../pg*txt & 162 | sleep 1 163 | 164 | timeout -k 2s 180s ../mrworker ../../mrapps/jobcount.so & 165 | timeout -k 2s 180s ../mrworker ../../mrapps/jobcount.so 166 | timeout -k 2s 180s ../mrworker ../../mrapps/jobcount.so & 167 | timeout -k 2s 180s ../mrworker ../../mrapps/jobcount.so 168 | 169 | NT=`cat mr-out* | awk '{print $2}'` 170 | if [ "$NT" -ne "8" ] 171 | then 172 | echo '---' map jobs ran incorrect number of times "($NT != 8)" 173 | echo '---' job count test: FAIL 174 | failed_any=1 175 | else 176 | echo '---' job count test: PASS 177 | fi 178 | 179 | wait 180 | 181 | ######################################################### 182 | # test whether any worker or coordinator exits before the 183 | # task has completed (i.e., all output files have been finalized) 184 | rm -f mr-* 185 | 186 | echo '***' Starting early exit test. 187 | 188 | timeout -k 2s 180s ../mrcoordinator ../pg*txt & 189 | 190 | # give the coordinator time to create the sockets. 191 | sleep 1 192 | 193 | # start multiple workers. 194 | timeout -k 2s 180s ../mrworker ../../mrapps/early_exit.so & 195 | timeout -k 2s 180s ../mrworker ../../mrapps/early_exit.so & 196 | timeout -k 2s 180s ../mrworker ../../mrapps/early_exit.so & 197 | 198 | # wait for any of the coord or workers to exit 199 | # `jobs` ensures that any completed old processes from other tests 200 | # are not waited upon 201 | jobs &> /dev/null 202 | wait -n 203 | 204 | # a process has exited. this means that the output should be finalized 205 | # otherwise, either a worker or the coordinator exited early 206 | sort mr-out* | grep . > mr-wc-all-initial 207 | 208 | # wait for remaining workers and coordinator to exit. 209 | wait 210 | 211 | # compare initial and final outputs 212 | sort mr-out* | grep . > mr-wc-all-final 213 | if cmp mr-wc-all-final mr-wc-all-initial 214 | then 215 | echo '---' early exit test: PASS 216 | else 217 | echo '---' output changed after first worker exited 218 | echo '---' early exit test: FAIL 219 | failed_any=1 220 | fi 221 | rm -f mr-* 222 | 223 | ######################################################### 224 | echo '***' Starting crash test. 225 | 226 | # generate the correct output 227 | ../mrsequential ../../mrapps/nocrash.so ../pg*txt || exit 1 228 | sort mr-out-0 > mr-correct-crash.txt 229 | rm -f mr-out* 230 | 231 | rm -f mr-done 232 | (timeout -k 2s 180s ../mrcoordinator ../pg*txt ; touch mr-done ) & 233 | sleep 1 234 | 235 | # start multiple workers 236 | timeout -k 2s 180s ../mrworker ../../mrapps/crash.so & 237 | 238 | # mimic rpc.go's coordinatorSock() 239 | SOCKNAME=/var/tmp/824-mr-`id -u` 240 | 241 | ( while [ -e $SOCKNAME -a ! -f mr-done ] 242 | do 243 | timeout -k 2s 180s ../mrworker ../../mrapps/crash.so 244 | sleep 1 245 | done ) & 246 | 247 | ( while [ -e $SOCKNAME -a ! -f mr-done ] 248 | do 249 | timeout -k 2s 180s ../mrworker ../../mrapps/crash.so 250 | sleep 1 251 | done ) & 252 | 253 | while [ -e $SOCKNAME -a ! -f mr-done ] 254 | do 255 | timeout -k 2s 180s ../mrworker ../../mrapps/crash.so 256 | sleep 1 257 | done 258 | 259 | wait 260 | 261 | rm $SOCKNAME 262 | sort mr-out* | grep . > mr-crash-all 263 | if cmp mr-crash-all mr-correct-crash.txt 264 | then 265 | echo '---' crash test: PASS 266 | else 267 | echo '---' crash output is not the same as mr-correct-crash.txt 268 | echo '---' crash test: FAIL 269 | failed_any=1 270 | fi 271 | 272 | ######################################################### 273 | if [ $failed_any -eq 0 ]; then 274 | echo '***' PASSED ALL TESTS 275 | else 276 | echo '***' FAILED SOME TESTS 277 | exit 1 278 | fi 279 | -------------------------------------------------------------------------------- /src/utils/dstest: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import itertools 4 | import math 5 | import signal 6 | import subprocess 7 | import tempfile 8 | import shutil 9 | import time 10 | import os 11 | import sys 12 | import datetime 13 | from collections import defaultdict 14 | from concurrent.futures import ThreadPoolExecutor, wait, FIRST_COMPLETED 15 | from dataclasses import dataclass 16 | from pathlib import Path 17 | from typing import List, Optional, Dict, DefaultDict, Tuple 18 | 19 | import typer 20 | import rich 21 | from rich import print 22 | from rich.table import Table 23 | from rich.progress import ( 24 | Progress, 25 | TimeElapsedColumn, 26 | TimeRemainingColumn, 27 | TextColumn, 28 | BarColumn, 29 | SpinnerColumn, 30 | ) 31 | from rich.live import Live 32 | from rich.panel import Panel 33 | from rich.traceback import install 34 | 35 | install(show_locals=True) 36 | 37 | 38 | @dataclass 39 | class StatsMeter: 40 | """ 41 | Auxiliary classs to keep track of online stats including: count, mean, variance 42 | Uses Welford's algorithm to compute sample mean and sample variance incrementally. 43 | https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm 44 | """ 45 | 46 | n: int = 0 47 | mean: float = 0.0 48 | S: float = 0.0 49 | 50 | def add(self, datum): 51 | self.n += 1 52 | delta = datum - self.mean 53 | # Mk = Mk-1+ (xk – Mk-1)/k 54 | self.mean += delta / self.n 55 | # Sk = Sk-1 + (xk – Mk-1)*(xk – Mk). 56 | self.S += delta * (datum - self.mean) 57 | 58 | @property 59 | def variance(self): 60 | return self.S / self.n 61 | 62 | @property 63 | def std(self): 64 | return math.sqrt(self.variance) 65 | 66 | 67 | def print_results(results: Dict[str, Dict[str, StatsMeter]], timing=False): 68 | table = Table(show_header=True, header_style="bold") 69 | table.add_column("Test") 70 | table.add_column("Failed", justify="right") 71 | table.add_column("Total", justify="right") 72 | if not timing: 73 | table.add_column("Time", justify="right") 74 | else: 75 | table.add_column("Real Time", justify="right") 76 | table.add_column("User Time", justify="right") 77 | table.add_column("System Time", justify="right") 78 | 79 | for test, stats in results.items(): 80 | if stats["completed"].n == 0: 81 | continue 82 | color = "green" if stats["failed"].n == 0 else "red" 83 | row = [ 84 | f"[{color}]{test}[/{color}]", 85 | str(stats["failed"].n), 86 | str(stats["completed"].n), 87 | ] 88 | if not timing: 89 | row.append(f"{stats['time'].mean:.2f} ± {stats['time'].std:.2f}") 90 | else: 91 | row.extend( 92 | [ 93 | f"{stats['real_time'].mean:.2f} ± {stats['real_time'].std:.2f}", 94 | f"{stats['user_time'].mean:.2f} ± {stats['user_time'].std:.2f}", 95 | f"{stats['system_time'].mean:.2f} ± {stats['system_time'].std:.2f}", 96 | ] 97 | ) 98 | table.add_row(*row) 99 | 100 | print(table) 101 | 102 | 103 | def run_test(test: str, race: bool, timing: bool): 104 | test_cmd = ["go", "test", f"-run={test}"] 105 | if race: 106 | test_cmd.append("-race") 107 | if timing: 108 | test_cmd = ["time"] + cmd 109 | f, path = tempfile.mkstemp() 110 | start = time.time() 111 | proc = subprocess.run(test_cmd, stdout=f, stderr=f) 112 | runtime = time.time() - start 113 | os.close(f) 114 | return test, path, proc.returncode, runtime 115 | 116 | 117 | def last_line(file: str) -> str: 118 | with open(file, "rb") as f: 119 | f.seek(-2, os.SEEK_END) 120 | while f.read(1) != b"\n": 121 | f.seek(-2, os.SEEK_CUR) 122 | line = f.readline().decode() 123 | return line 124 | 125 | 126 | # fmt: off 127 | def run_tests( 128 | tests: List[str], 129 | sequential: bool = typer.Option(False, '--sequential', '-s', help='Run all test of each group in order'), 130 | workers: int = typer.Option(1, '--workers', '-p', help='Number of parallel tasks'), 131 | iterations: int = typer.Option(10, '--iter', '-n', help='Number of iterations to run'), 132 | output: Optional[Path] = typer.Option(None, '--output', '-o', help='Output path to use'), 133 | verbose: int = typer.Option(0, '--verbose', '-v', help='Verbosity level', count=True), 134 | archive: bool = typer.Option(False, '--archive', '-a', help='Save all logs intead of only failed ones'), 135 | race: bool = typer.Option(False, '--race/--no-race', '-r/-R', help='Run with race checker'), 136 | loop: bool = typer.Option(False, '--loop', '-l', help='Run continuously'), 137 | growth: int = typer.Option(10, '--growth', '-g', help='Growth ratio of iterations when using --loop'), 138 | timing: bool = typer.Option(False, '--timing', '-t', help='Report timing, only works on macOS'), 139 | # fmt: on 140 | ): 141 | 142 | if output is None: 143 | timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") 144 | output = Path(timestamp) 145 | 146 | if race: 147 | print("[yellow]Running with the race detector\n[/yellow]") 148 | 149 | if verbose > 0: 150 | print(f"[yellow] Verbosity level set to {verbose}[/yellow]") 151 | os.environ['VERBOSE'] = str(verbose) 152 | 153 | while True: 154 | 155 | total = iterations * len(tests) 156 | completed = 0 157 | 158 | results = {test: defaultdict(StatsMeter) for test in tests} 159 | 160 | if sequential: 161 | test_instances = itertools.chain.from_iterable(itertools.repeat(test, iterations) for test in tests) 162 | else: 163 | test_instances = itertools.chain.from_iterable(itertools.repeat(tests, iterations)) 164 | test_instances = iter(test_instances) 165 | 166 | total_progress = Progress( 167 | "[progress.description]{task.description}", 168 | BarColumn(), 169 | TimeRemainingColumn(), 170 | "[progress.percentage]{task.percentage:>3.0f}%", 171 | TimeElapsedColumn(), 172 | ) 173 | total_task = total_progress.add_task("[yellow]Tests[/yellow]", total=total) 174 | 175 | task_progress = Progress( 176 | "[progress.description]{task.description}", 177 | SpinnerColumn(), 178 | BarColumn(), 179 | "{task.completed}/{task.total}", 180 | ) 181 | tasks = {test: task_progress.add_task(test, total=iterations) for test in tests} 182 | 183 | progress_table = Table.grid() 184 | progress_table.add_row(total_progress) 185 | progress_table.add_row(Panel.fit(task_progress)) 186 | 187 | with Live(progress_table, transient=True) as live: 188 | 189 | def handler(_, frame): 190 | live.stop() 191 | print('\n') 192 | print_results(results) 193 | sys.exit(1) 194 | 195 | signal.signal(signal.SIGINT, handler) 196 | 197 | with ThreadPoolExecutor(max_workers=workers) as executor: 198 | 199 | futures = [] 200 | while completed < total: 201 | n = len(futures) 202 | if n < workers: 203 | for test in itertools.islice(test_instances, workers-n): 204 | futures.append(executor.submit(run_test, test, race, timing)) 205 | 206 | done, not_done = wait(futures, return_when=FIRST_COMPLETED) 207 | 208 | for future in done: 209 | test, path, rc, runtime = future.result() 210 | 211 | results[test]['completed'].add(1) 212 | results[test]['time'].add(runtime) 213 | task_progress.update(tasks[test], advance=1) 214 | dest = (output / f"{test}_{completed}.log").as_posix() 215 | if rc != 0: 216 | print(f"Failed test {test} - {dest}") 217 | task_progress.update(tasks[test], description=f"[red]{test}[/red]") 218 | results[test]['failed'].add(1) 219 | else: 220 | if results[test]['completed'].n == iterations and results[test]['failed'].n == 0: 221 | task_progress.update(tasks[test], description=f"[green]{test}[/green]") 222 | 223 | if rc != 0 or archive: 224 | output.mkdir(exist_ok=True, parents=True) 225 | shutil.copy(path, dest) 226 | 227 | if timing: 228 | line = last_line(path) 229 | real, _, user, _, system, _ = line.replace(' '*8, '').split(' ') 230 | results[test]['real_time'].add(float(real)) 231 | results[test]['user_time'].add(float(user)) 232 | results[test]['system_time'].add(float(system)) 233 | 234 | os.remove(path) 235 | 236 | completed += 1 237 | total_progress.update(total_task, advance=1) 238 | 239 | futures = list(not_done) 240 | 241 | print_results(results, timing) 242 | 243 | if loop: 244 | iterations *= growth 245 | print(f"[yellow]Increasing iterations to {iterations}[/yellow]") 246 | else: 247 | break 248 | 249 | 250 | if __name__ == "__main__": 251 | typer.run(run_tests) -------------------------------------------------------------------------------- /src/shardkv/dstest: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import itertools 4 | import math 5 | import signal 6 | import subprocess 7 | import tempfile 8 | import shutil 9 | import time 10 | import os 11 | import sys 12 | import datetime 13 | from collections import defaultdict 14 | from concurrent.futures import ThreadPoolExecutor, wait, FIRST_COMPLETED 15 | from dataclasses import dataclass 16 | from pathlib import Path 17 | from typing import List, Optional, Dict, DefaultDict, Tuple 18 | 19 | import typer 20 | import rich 21 | from rich import print 22 | from rich.table import Table 23 | from rich.progress import ( 24 | Progress, 25 | TimeElapsedColumn, 26 | TimeRemainingColumn, 27 | TextColumn, 28 | BarColumn, 29 | SpinnerColumn, 30 | ) 31 | from rich.live import Live 32 | from rich.panel import Panel 33 | from rich.traceback import install 34 | 35 | install(show_locals=True) 36 | 37 | 38 | @dataclass 39 | class StatsMeter: 40 | """ 41 | Auxiliary classs to keep track of online stats including: count, mean, variance 42 | Uses Welford's algorithm to compute sample mean and sample variance incrementally. 43 | https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm 44 | """ 45 | 46 | n: int = 0 47 | mean: float = 0.0 48 | S: float = 0.0 49 | 50 | def add(self, datum): 51 | self.n += 1 52 | delta = datum - self.mean 53 | # Mk = Mk-1+ (xk – Mk-1)/k 54 | self.mean += delta / self.n 55 | # Sk = Sk-1 + (xk – Mk-1)*(xk – Mk). 56 | self.S += delta * (datum - self.mean) 57 | 58 | @property 59 | def variance(self): 60 | return self.S / self.n 61 | 62 | @property 63 | def std(self): 64 | return math.sqrt(self.variance) 65 | 66 | 67 | def print_results(results: Dict[str, Dict[str, StatsMeter]], timing=False): 68 | table = Table(show_header=True, header_style="bold") 69 | table.add_column("Test") 70 | table.add_column("Failed", justify="right") 71 | table.add_column("Total", justify="right") 72 | if not timing: 73 | table.add_column("Time", justify="right") 74 | else: 75 | table.add_column("Real Time", justify="right") 76 | table.add_column("User Time", justify="right") 77 | table.add_column("System Time", justify="right") 78 | 79 | for test, stats in results.items(): 80 | if stats["completed"].n == 0: 81 | continue 82 | color = "green" if stats["failed"].n == 0 else "red" 83 | row = [ 84 | f"[{color}]{test}[/{color}]", 85 | str(stats["failed"].n), 86 | str(stats["completed"].n), 87 | ] 88 | if not timing: 89 | row.append(f"{stats['time'].mean:.2f} ± {stats['time'].std:.2f}") 90 | else: 91 | row.extend( 92 | [ 93 | f"{stats['real_time'].mean:.2f} ± {stats['real_time'].std:.2f}", 94 | f"{stats['user_time'].mean:.2f} ± {stats['user_time'].std:.2f}", 95 | f"{stats['system_time'].mean:.2f} ± {stats['system_time'].std:.2f}", 96 | ] 97 | ) 98 | table.add_row(*row) 99 | 100 | print(table) 101 | 102 | 103 | def run_test(test: str, race: bool, timing: bool): 104 | test_cmd = ["go", "test", f"-run={test}"] 105 | if race: 106 | test_cmd.append("-race") 107 | if timing: 108 | test_cmd = ["time"] + cmd 109 | f, path = tempfile.mkstemp() 110 | start = time.time() 111 | proc = subprocess.run(test_cmd, stdout=f, stderr=f) 112 | runtime = time.time() - start 113 | os.close(f) 114 | return test, path, proc.returncode, runtime 115 | 116 | 117 | def last_line(file: str) -> str: 118 | with open(file, "rb") as f: 119 | f.seek(-2, os.SEEK_END) 120 | while f.read(1) != b"\n": 121 | f.seek(-2, os.SEEK_CUR) 122 | line = f.readline().decode() 123 | return line 124 | 125 | 126 | # fmt: off 127 | def run_tests( 128 | tests: List[str], 129 | sequential: bool = typer.Option(False, '--sequential', '-s', help='Run all test of each group in order'), 130 | workers: int = typer.Option(1, '--workers', '-p', help='Number of parallel tasks'), 131 | iterations: int = typer.Option(10, '--iter', '-n', help='Number of iterations to run'), 132 | output: Optional[Path] = typer.Option(None, '--output', '-o', help='Output path to use'), 133 | verbose: int = typer.Option(0, '--verbose', '-v', help='Verbosity level', count=True), 134 | archive: bool = typer.Option(False, '--archive', '-a', help='Save all logs intead of only failed ones'), 135 | race: bool = typer.Option(False, '--race/--no-race', '-r/-R', help='Run with race checker'), 136 | loop: bool = typer.Option(False, '--loop', '-l', help='Run continuously'), 137 | growth: int = typer.Option(10, '--growth', '-g', help='Growth ratio of iterations when using --loop'), 138 | timing: bool = typer.Option(False, '--timing', '-t', help='Report timing, only works on macOS'), 139 | # fmt: on 140 | ): 141 | 142 | if output is None: 143 | timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") 144 | output = Path(timestamp) 145 | 146 | if race: 147 | print("[yellow]Running with the race detector\n[/yellow]") 148 | 149 | if verbose > 0: 150 | print(f"[yellow] Verbosity level set to {verbose}[/yellow]") 151 | os.environ['VERBOSE'] = str(verbose) 152 | 153 | while True: 154 | 155 | total = iterations * len(tests) 156 | completed = 0 157 | 158 | results = {test: defaultdict(StatsMeter) for test in tests} 159 | 160 | if sequential: 161 | test_instances = itertools.chain.from_iterable(itertools.repeat(test, iterations) for test in tests) 162 | else: 163 | test_instances = itertools.chain.from_iterable(itertools.repeat(tests, iterations)) 164 | test_instances = iter(test_instances) 165 | 166 | total_progress = Progress( 167 | "[progress.description]{task.description}", 168 | BarColumn(), 169 | TimeRemainingColumn(), 170 | "[progress.percentage]{task.percentage:>3.0f}%", 171 | TimeElapsedColumn(), 172 | ) 173 | total_task = total_progress.add_task("[yellow]Tests[/yellow]", total=total) 174 | 175 | task_progress = Progress( 176 | "[progress.description]{task.description}", 177 | SpinnerColumn(), 178 | BarColumn(), 179 | "{task.completed}/{task.total}", 180 | ) 181 | tasks = {test: task_progress.add_task(test, total=iterations) for test in tests} 182 | 183 | progress_table = Table.grid() 184 | progress_table.add_row(total_progress) 185 | progress_table.add_row(Panel.fit(task_progress)) 186 | 187 | with Live(progress_table, transient=True) as live: 188 | 189 | def handler(_, frame): 190 | live.stop() 191 | print('\n') 192 | print_results(results) 193 | sys.exit(1) 194 | 195 | signal.signal(signal.SIGINT, handler) 196 | 197 | with ThreadPoolExecutor(max_workers=workers) as executor: 198 | 199 | futures = [] 200 | while completed < total: 201 | n = len(futures) 202 | if n < workers: 203 | for test in itertools.islice(test_instances, workers-n): 204 | futures.append(executor.submit(run_test, test, race, timing)) 205 | 206 | done, not_done = wait(futures, return_when=FIRST_COMPLETED) 207 | 208 | for future in done: 209 | test, path, rc, runtime = future.result() 210 | 211 | results[test]['completed'].add(1) 212 | results[test]['time'].add(runtime) 213 | task_progress.update(tasks[test], advance=1) 214 | dest = (output / f"{test}_{completed}.log").as_posix() 215 | if rc != 0: 216 | print(f"Failed test {test} - {dest}") 217 | task_progress.update(tasks[test], description=f"[red]{test}[/red]") 218 | results[test]['failed'].add(1) 219 | else: 220 | if results[test]['completed'].n == iterations and results[test]['failed'].n == 0: 221 | task_progress.update(tasks[test], description=f"[green]{test}[/green]") 222 | 223 | if rc != 0 or archive: 224 | output.mkdir(exist_ok=True, parents=True) 225 | shutil.copy(path, dest) 226 | 227 | if timing: 228 | line = last_line(path) 229 | real, _, user, _, system, _ = line.replace(' '*8, '').split(' ') 230 | results[test]['real_time'].add(float(real)) 231 | results[test]['user_time'].add(float(user)) 232 | results[test]['system_time'].add(float(system)) 233 | 234 | os.remove(path) 235 | 236 | completed += 1 237 | total_progress.update(total_task, advance=1) 238 | 239 | futures = list(not_done) 240 | 241 | print_results(results, timing) 242 | 243 | if loop: 244 | iterations *= growth 245 | print(f"[yellow]Increasing iterations to {iterations}[/yellow]") 246 | else: 247 | break 248 | 249 | 250 | if __name__ == "__main__": 251 | typer.run(run_tests) -------------------------------------------------------------------------------- /src/shardctrler/config.go: -------------------------------------------------------------------------------- 1 | package shardctrler 2 | 3 | import "6.824/labrpc" 4 | import "6.824/raft" 5 | import "testing" 6 | import "os" 7 | 8 | // import "log" 9 | import crand "crypto/rand" 10 | import "math/rand" 11 | import "encoding/base64" 12 | import "sync" 13 | import "runtime" 14 | import "time" 15 | 16 | func randstring(n int) string { 17 | b := make([]byte, 2*n) 18 | crand.Read(b) 19 | s := base64.URLEncoding.EncodeToString(b) 20 | return s[0:n] 21 | } 22 | 23 | // Randomize server handles 24 | func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd { 25 | sa := make([]*labrpc.ClientEnd, len(kvh)) 26 | copy(sa, kvh) 27 | for i := range sa { 28 | j := rand.Intn(i + 1) 29 | sa[i], sa[j] = sa[j], sa[i] 30 | } 31 | return sa 32 | } 33 | 34 | type config struct { 35 | mu sync.Mutex 36 | t *testing.T 37 | net *labrpc.Network 38 | n int 39 | servers []*ShardCtrler 40 | saved []*raft.Persister 41 | endnames [][]string // names of each server's sending ClientEnds 42 | clerks map[*Clerk][]string 43 | nextClientId int 44 | start time.Time // time at which make_config() was called 45 | } 46 | 47 | func (cfg *config) checkTimeout() { 48 | // enforce a two minute real-time limit on each test 49 | if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second { 50 | cfg.t.Fatal("test took longer than 120 seconds") 51 | } 52 | } 53 | 54 | func (cfg *config) cleanup() { 55 | cfg.mu.Lock() 56 | defer cfg.mu.Unlock() 57 | for i := 0; i < len(cfg.servers); i++ { 58 | if cfg.servers[i] != nil { 59 | cfg.servers[i].Kill() 60 | } 61 | } 62 | cfg.net.Cleanup() 63 | cfg.checkTimeout() 64 | } 65 | 66 | // Maximum log size across all servers 67 | func (cfg *config) LogSize() int { 68 | logsize := 0 69 | for i := 0; i < cfg.n; i++ { 70 | n := cfg.saved[i].RaftStateSize() 71 | if n > logsize { 72 | logsize = n 73 | } 74 | } 75 | return logsize 76 | } 77 | 78 | // attach server i to servers listed in to 79 | // caller must hold cfg.mu 80 | func (cfg *config) connectUnlocked(i int, to []int) { 81 | // log.Printf("connect peer %d to %v\n", i, to) 82 | 83 | // outgoing socket files 84 | for j := 0; j < len(to); j++ { 85 | endname := cfg.endnames[i][to[j]] 86 | cfg.net.Enable(endname, true) 87 | } 88 | 89 | // incoming socket files 90 | for j := 0; j < len(to); j++ { 91 | endname := cfg.endnames[to[j]][i] 92 | cfg.net.Enable(endname, true) 93 | } 94 | } 95 | 96 | func (cfg *config) connect(i int, to []int) { 97 | cfg.mu.Lock() 98 | defer cfg.mu.Unlock() 99 | cfg.connectUnlocked(i, to) 100 | } 101 | 102 | // detach server i from the servers listed in from 103 | // caller must hold cfg.mu 104 | func (cfg *config) disconnectUnlocked(i int, from []int) { 105 | // log.Printf("disconnect peer %d from %v\n", i, from) 106 | 107 | // outgoing socket files 108 | for j := 0; j < len(from); j++ { 109 | if cfg.endnames[i] != nil { 110 | endname := cfg.endnames[i][from[j]] 111 | cfg.net.Enable(endname, false) 112 | } 113 | } 114 | 115 | // incoming socket files 116 | for j := 0; j < len(from); j++ { 117 | if cfg.endnames[j] != nil { 118 | endname := cfg.endnames[from[j]][i] 119 | cfg.net.Enable(endname, false) 120 | } 121 | } 122 | } 123 | 124 | func (cfg *config) disconnect(i int, from []int) { 125 | cfg.mu.Lock() 126 | defer cfg.mu.Unlock() 127 | cfg.disconnectUnlocked(i, from) 128 | } 129 | 130 | func (cfg *config) All() []int { 131 | all := make([]int, cfg.n) 132 | for i := 0; i < cfg.n; i++ { 133 | all[i] = i 134 | } 135 | return all 136 | } 137 | 138 | func (cfg *config) ConnectAll() { 139 | cfg.mu.Lock() 140 | defer cfg.mu.Unlock() 141 | for i := 0; i < cfg.n; i++ { 142 | cfg.connectUnlocked(i, cfg.All()) 143 | } 144 | } 145 | 146 | // Sets up 2 partitions with connectivity between servers in each partition. 147 | func (cfg *config) partition(p1 []int, p2 []int) { 148 | cfg.mu.Lock() 149 | defer cfg.mu.Unlock() 150 | // log.Printf("partition servers into: %v %v\n", p1, p2) 151 | for i := 0; i < len(p1); i++ { 152 | cfg.disconnectUnlocked(p1[i], p2) 153 | cfg.connectUnlocked(p1[i], p1) 154 | } 155 | for i := 0; i < len(p2); i++ { 156 | cfg.disconnectUnlocked(p2[i], p1) 157 | cfg.connectUnlocked(p2[i], p2) 158 | } 159 | } 160 | 161 | // Create a clerk with clerk specific server names. 162 | // Give it connections to all of the servers, but for 163 | // now enable only connections to servers in to[]. 164 | func (cfg *config) makeClient(to []int) *Clerk { 165 | cfg.mu.Lock() 166 | defer cfg.mu.Unlock() 167 | 168 | // a fresh set of ClientEnds. 169 | ends := make([]*labrpc.ClientEnd, cfg.n) 170 | endnames := make([]string, cfg.n) 171 | for j := 0; j < cfg.n; j++ { 172 | endnames[j] = randstring(20) 173 | ends[j] = cfg.net.MakeEnd(endnames[j]) 174 | cfg.net.Connect(endnames[j], j) 175 | } 176 | 177 | ck := MakeClerk(random_handles(ends)) 178 | cfg.clerks[ck] = endnames 179 | cfg.nextClientId++ 180 | cfg.ConnectClientUnlocked(ck, to) 181 | return ck 182 | } 183 | 184 | func (cfg *config) deleteClient(ck *Clerk) { 185 | cfg.mu.Lock() 186 | defer cfg.mu.Unlock() 187 | 188 | v := cfg.clerks[ck] 189 | for i := 0; i < len(v); i++ { 190 | os.Remove(v[i]) 191 | } 192 | delete(cfg.clerks, ck) 193 | } 194 | 195 | // caller should hold cfg.mu 196 | func (cfg *config) ConnectClientUnlocked(ck *Clerk, to []int) { 197 | // log.Printf("ConnectClient %v to %v\n", ck, to) 198 | endnames := cfg.clerks[ck] 199 | for j := 0; j < len(to); j++ { 200 | s := endnames[to[j]] 201 | cfg.net.Enable(s, true) 202 | } 203 | } 204 | 205 | func (cfg *config) ConnectClient(ck *Clerk, to []int) { 206 | cfg.mu.Lock() 207 | defer cfg.mu.Unlock() 208 | cfg.ConnectClientUnlocked(ck, to) 209 | } 210 | 211 | // caller should hold cfg.mu 212 | func (cfg *config) DisconnectClientUnlocked(ck *Clerk, from []int) { 213 | // log.Printf("DisconnectClient %v from %v\n", ck, from) 214 | endnames := cfg.clerks[ck] 215 | for j := 0; j < len(from); j++ { 216 | s := endnames[from[j]] 217 | cfg.net.Enable(s, false) 218 | } 219 | } 220 | 221 | func (cfg *config) DisconnectClient(ck *Clerk, from []int) { 222 | cfg.mu.Lock() 223 | defer cfg.mu.Unlock() 224 | cfg.DisconnectClientUnlocked(ck, from) 225 | } 226 | 227 | // Shutdown a server by isolating it 228 | func (cfg *config) ShutdownServer(i int) { 229 | cfg.mu.Lock() 230 | defer cfg.mu.Unlock() 231 | 232 | cfg.disconnectUnlocked(i, cfg.All()) 233 | 234 | // disable client connections to the server. 235 | // it's important to do this before creating 236 | // the new Persister in saved[i], to avoid 237 | // the possibility of the server returning a 238 | // positive reply to an Append but persisting 239 | // the result in the superseded Persister. 240 | cfg.net.DeleteServer(i) 241 | 242 | // a fresh persister, in case old instance 243 | // continues to update the Persister. 244 | // but copy old persister's content so that we always 245 | // pass Make() the last persisted state. 246 | if cfg.saved[i] != nil { 247 | cfg.saved[i] = cfg.saved[i].Copy() 248 | } 249 | 250 | kv := cfg.servers[i] 251 | if kv != nil { 252 | cfg.mu.Unlock() 253 | kv.Kill() 254 | cfg.mu.Lock() 255 | cfg.servers[i] = nil 256 | } 257 | } 258 | 259 | // If restart servers, first call ShutdownServer 260 | func (cfg *config) StartServer(i int) { 261 | cfg.mu.Lock() 262 | 263 | // a fresh set of outgoing ClientEnd names. 264 | cfg.endnames[i] = make([]string, cfg.n) 265 | for j := 0; j < cfg.n; j++ { 266 | cfg.endnames[i][j] = randstring(20) 267 | } 268 | 269 | // a fresh set of ClientEnds. 270 | ends := make([]*labrpc.ClientEnd, cfg.n) 271 | for j := 0; j < cfg.n; j++ { 272 | ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j]) 273 | cfg.net.Connect(cfg.endnames[i][j], j) 274 | } 275 | 276 | // a fresh persister, so old instance doesn't overwrite 277 | // new instance's persisted state. 278 | // give the fresh persister a copy of the old persister's 279 | // state, so that the spec is that we pass StartKVServer() 280 | // the last persisted state. 281 | if cfg.saved[i] != nil { 282 | cfg.saved[i] = cfg.saved[i].Copy() 283 | } else { 284 | cfg.saved[i] = raft.MakePersister() 285 | } 286 | 287 | cfg.mu.Unlock() 288 | 289 | cfg.servers[i] = StartServer(ends, i, cfg.saved[i]) 290 | 291 | kvsvc := labrpc.MakeService(cfg.servers[i]) 292 | rfsvc := labrpc.MakeService(cfg.servers[i].rf) 293 | srv := labrpc.MakeServer() 294 | srv.AddService(kvsvc) 295 | srv.AddService(rfsvc) 296 | cfg.net.AddServer(i, srv) 297 | } 298 | 299 | func (cfg *config) Leader() (bool, int) { 300 | cfg.mu.Lock() 301 | defer cfg.mu.Unlock() 302 | 303 | for i := 0; i < cfg.n; i++ { 304 | if cfg.servers[i] != nil { 305 | _, is_leader := cfg.servers[i].rf.GetState() 306 | if is_leader { 307 | return true, i 308 | } 309 | } 310 | } 311 | return false, 0 312 | } 313 | 314 | // Partition servers into 2 groups and put current leader in minority 315 | func (cfg *config) make_partition() ([]int, []int) { 316 | _, l := cfg.Leader() 317 | p1 := make([]int, cfg.n/2+1) 318 | p2 := make([]int, cfg.n/2) 319 | j := 0 320 | for i := 0; i < cfg.n; i++ { 321 | if i != l { 322 | if j < len(p1) { 323 | p1[j] = i 324 | } else { 325 | p2[j-len(p1)] = i 326 | } 327 | j++ 328 | } 329 | } 330 | p2[len(p2)-1] = l 331 | return p1, p2 332 | } 333 | 334 | func make_config(t *testing.T, n int, unreliable bool) *config { 335 | runtime.GOMAXPROCS(4) 336 | cfg := &config{} 337 | cfg.t = t 338 | cfg.net = labrpc.MakeNetwork() 339 | cfg.n = n 340 | cfg.servers = make([]*ShardCtrler, cfg.n) 341 | cfg.saved = make([]*raft.Persister, cfg.n) 342 | cfg.endnames = make([][]string, cfg.n) 343 | cfg.clerks = make(map[*Clerk][]string) 344 | cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid 345 | cfg.start = time.Now() 346 | 347 | // create a full set of KV servers. 348 | for i := 0; i < cfg.n; i++ { 349 | cfg.StartServer(i) 350 | } 351 | 352 | cfg.ConnectAll() 353 | 354 | cfg.net.Reliable(!unreliable) 355 | 356 | return cfg 357 | } 358 | -------------------------------------------------------------------------------- /src/porcupine/checker.go: -------------------------------------------------------------------------------- 1 | package porcupine 2 | 3 | import ( 4 | "sort" 5 | "sync/atomic" 6 | "time" 7 | ) 8 | 9 | type entryKind bool 10 | 11 | const ( 12 | callEntry entryKind = false 13 | returnEntry = true 14 | ) 15 | 16 | type entry struct { 17 | kind entryKind 18 | value interface{} 19 | id int 20 | time int64 21 | clientId int 22 | } 23 | 24 | type linearizationInfo struct { 25 | history [][]entry // for each partition, a list of entries 26 | partialLinearizations [][][]int // for each partition, a set of histories (list of ids) 27 | } 28 | 29 | type byTime []entry 30 | 31 | func (a byTime) Len() int { 32 | return len(a) 33 | } 34 | 35 | func (a byTime) Swap(i, j int) { 36 | a[i], a[j] = a[j], a[i] 37 | } 38 | 39 | func (a byTime) Less(i, j int) bool { 40 | if a[i].time != a[j].time { 41 | return a[i].time < a[j].time 42 | } 43 | // if the timestamps are the same, we need to make sure we order calls 44 | // before returns 45 | return a[i].kind == callEntry && a[j].kind == returnEntry 46 | } 47 | 48 | func makeEntries(history []Operation) []entry { 49 | var entries []entry = nil 50 | id := 0 51 | for _, elem := range history { 52 | entries = append(entries, entry{ 53 | callEntry, elem.Input, id, elem.Call, elem.ClientId}) 54 | entries = append(entries, entry{ 55 | returnEntry, elem.Output, id, elem.Return, elem.ClientId}) 56 | id++ 57 | } 58 | sort.Sort(byTime(entries)) 59 | return entries 60 | } 61 | 62 | type node struct { 63 | value interface{} 64 | match *node // call if match is nil, otherwise return 65 | id int 66 | next *node 67 | prev *node 68 | } 69 | 70 | func insertBefore(n *node, mark *node) *node { 71 | if mark != nil { 72 | beforeMark := mark.prev 73 | mark.prev = n 74 | n.next = mark 75 | if beforeMark != nil { 76 | n.prev = beforeMark 77 | beforeMark.next = n 78 | } 79 | } 80 | return n 81 | } 82 | 83 | func length(n *node) int { 84 | l := 0 85 | for n != nil { 86 | n = n.next 87 | l++ 88 | } 89 | return l 90 | } 91 | 92 | func renumber(events []Event) []Event { 93 | var e []Event 94 | m := make(map[int]int) // renumbering 95 | id := 0 96 | for _, v := range events { 97 | if r, ok := m[v.Id]; ok { 98 | e = append(e, Event{v.ClientId, v.Kind, v.Value, r}) 99 | } else { 100 | e = append(e, Event{v.ClientId, v.Kind, v.Value, id}) 101 | m[v.Id] = id 102 | id++ 103 | } 104 | } 105 | return e 106 | } 107 | 108 | func convertEntries(events []Event) []entry { 109 | var entries []entry 110 | for i, elem := range events { 111 | kind := callEntry 112 | if elem.Kind == ReturnEvent { 113 | kind = returnEntry 114 | } 115 | // use index as "time" 116 | entries = append(entries, entry{kind, elem.Value, elem.Id, int64(i), elem.ClientId}) 117 | } 118 | return entries 119 | } 120 | 121 | func makeLinkedEntries(entries []entry) *node { 122 | var root *node = nil 123 | match := make(map[int]*node) 124 | for i := len(entries) - 1; i >= 0; i-- { 125 | elem := entries[i] 126 | if elem.kind == returnEntry { 127 | entry := &node{value: elem.value, match: nil, id: elem.id} 128 | match[elem.id] = entry 129 | insertBefore(entry, root) 130 | root = entry 131 | } else { 132 | entry := &node{value: elem.value, match: match[elem.id], id: elem.id} 133 | insertBefore(entry, root) 134 | root = entry 135 | } 136 | } 137 | return root 138 | } 139 | 140 | type cacheEntry struct { 141 | linearized bitset 142 | state interface{} 143 | } 144 | 145 | func cacheContains(model Model, cache map[uint64][]cacheEntry, entry cacheEntry) bool { 146 | for _, elem := range cache[entry.linearized.hash()] { 147 | if entry.linearized.equals(elem.linearized) && model.Equal(entry.state, elem.state) { 148 | return true 149 | } 150 | } 151 | return false 152 | } 153 | 154 | type callsEntry struct { 155 | entry *node 156 | state interface{} 157 | } 158 | 159 | func lift(entry *node) { 160 | entry.prev.next = entry.next 161 | entry.next.prev = entry.prev 162 | match := entry.match 163 | match.prev.next = match.next 164 | if match.next != nil { 165 | match.next.prev = match.prev 166 | } 167 | } 168 | 169 | func unlift(entry *node) { 170 | match := entry.match 171 | match.prev.next = match 172 | if match.next != nil { 173 | match.next.prev = match 174 | } 175 | entry.prev.next = entry 176 | entry.next.prev = entry 177 | } 178 | 179 | func checkSingle(model Model, history []entry, computePartial bool, kill *int32) (bool, []*[]int) { 180 | entry := makeLinkedEntries(history) 181 | n := length(entry) / 2 182 | linearized := newBitset(uint(n)) 183 | cache := make(map[uint64][]cacheEntry) // map from hash to cache entry 184 | var calls []callsEntry 185 | // longest linearizable prefix that includes the given entry 186 | longest := make([]*[]int, n) 187 | 188 | state := model.Init() 189 | headEntry := insertBefore(&node{value: nil, match: nil, id: -1}, entry) 190 | for headEntry.next != nil { 191 | if atomic.LoadInt32(kill) != 0 { 192 | return false, longest 193 | } 194 | if entry.match != nil { 195 | matching := entry.match // the return entry 196 | ok, newState := model.Step(state, entry.value, matching.value) 197 | if ok { 198 | newLinearized := linearized.clone().set(uint(entry.id)) 199 | newCacheEntry := cacheEntry{newLinearized, newState} 200 | if !cacheContains(model, cache, newCacheEntry) { 201 | hash := newLinearized.hash() 202 | cache[hash] = append(cache[hash], newCacheEntry) 203 | calls = append(calls, callsEntry{entry, state}) 204 | state = newState 205 | linearized.set(uint(entry.id)) 206 | lift(entry) 207 | entry = headEntry.next 208 | } else { 209 | entry = entry.next 210 | } 211 | } else { 212 | entry = entry.next 213 | } 214 | } else { 215 | if len(calls) == 0 { 216 | return false, longest 217 | } 218 | // longest 219 | if computePartial { 220 | callsLen := len(calls) 221 | var seq []int = nil 222 | for _, v := range calls { 223 | if longest[v.entry.id] == nil || callsLen > len(*longest[v.entry.id]) { 224 | // create seq lazily 225 | if seq == nil { 226 | seq = make([]int, len(calls)) 227 | for i, v := range calls { 228 | seq[i] = v.entry.id 229 | } 230 | } 231 | longest[v.entry.id] = &seq 232 | } 233 | } 234 | } 235 | callsTop := calls[len(calls)-1] 236 | entry = callsTop.entry 237 | state = callsTop.state 238 | linearized.clear(uint(entry.id)) 239 | calls = calls[:len(calls)-1] 240 | unlift(entry) 241 | entry = entry.next 242 | } 243 | } 244 | // longest linearization is the complete linearization, which is calls 245 | seq := make([]int, len(calls)) 246 | for i, v := range calls { 247 | seq[i] = v.entry.id 248 | } 249 | for i := 0; i < n; i++ { 250 | longest[i] = &seq 251 | } 252 | return true, longest 253 | } 254 | 255 | func fillDefault(model Model) Model { 256 | if model.Partition == nil { 257 | model.Partition = NoPartition 258 | } 259 | if model.PartitionEvent == nil { 260 | model.PartitionEvent = NoPartitionEvent 261 | } 262 | if model.Equal == nil { 263 | model.Equal = ShallowEqual 264 | } 265 | if model.DescribeOperation == nil { 266 | model.DescribeOperation = DefaultDescribeOperation 267 | } 268 | if model.DescribeState == nil { 269 | model.DescribeState = DefaultDescribeState 270 | } 271 | return model 272 | } 273 | 274 | func checkParallel(model Model, history [][]entry, computeInfo bool, timeout time.Duration) (CheckResult, linearizationInfo) { 275 | ok := true 276 | timedOut := false 277 | results := make(chan bool, len(history)) 278 | longest := make([][]*[]int, len(history)) 279 | kill := int32(0) 280 | for i, subhistory := range history { 281 | go func(i int, subhistory []entry) { 282 | ok, l := checkSingle(model, subhistory, computeInfo, &kill) 283 | longest[i] = l 284 | results <- ok 285 | }(i, subhistory) 286 | } 287 | var timeoutChan <-chan time.Time 288 | if timeout > 0 { 289 | timeoutChan = time.After(timeout) 290 | } 291 | count := 0 292 | loop: 293 | for { 294 | select { 295 | case result := <-results: 296 | count++ 297 | ok = ok && result 298 | if !ok && !computeInfo { 299 | atomic.StoreInt32(&kill, 1) 300 | break loop 301 | } 302 | if count >= len(history) { 303 | break loop 304 | } 305 | case <-timeoutChan: 306 | timedOut = true 307 | atomic.StoreInt32(&kill, 1) 308 | break loop // if we time out, we might get a false positive 309 | } 310 | } 311 | var info linearizationInfo 312 | if computeInfo { 313 | // make sure we've waited for all goroutines to finish, 314 | // otherwise we might race on access to longest[] 315 | for count < len(history) { 316 | <-results 317 | count++ 318 | } 319 | // return longest linearizable prefixes that include each history element 320 | partialLinearizations := make([][][]int, len(history)) 321 | for i := 0; i < len(history); i++ { 322 | var partials [][]int 323 | // turn longest into a set of unique linearizations 324 | set := make(map[*[]int]struct{}) 325 | for _, v := range longest[i] { 326 | if v != nil { 327 | set[v] = struct{}{} 328 | } 329 | } 330 | for k := range set { 331 | arr := make([]int, len(*k)) 332 | for i, v := range *k { 333 | arr[i] = v 334 | } 335 | partials = append(partials, arr) 336 | } 337 | partialLinearizations[i] = partials 338 | } 339 | info.history = history 340 | info.partialLinearizations = partialLinearizations 341 | } 342 | var result CheckResult 343 | if !ok { 344 | result = Illegal 345 | } else { 346 | if timedOut { 347 | result = Unknown 348 | } else { 349 | result = Ok 350 | } 351 | } 352 | return result, info 353 | } 354 | 355 | func checkEvents(model Model, history []Event, verbose bool, timeout time.Duration) (CheckResult, linearizationInfo) { 356 | model = fillDefault(model) 357 | partitions := model.PartitionEvent(history) 358 | l := make([][]entry, len(partitions)) 359 | for i, subhistory := range partitions { 360 | l[i] = convertEntries(renumber(subhistory)) 361 | } 362 | return checkParallel(model, l, verbose, timeout) 363 | } 364 | 365 | func checkOperations(model Model, history []Operation, verbose bool, timeout time.Duration) (CheckResult, linearizationInfo) { 366 | model = fillDefault(model) 367 | partitions := model.Partition(history) 368 | l := make([][]entry, len(partitions)) 369 | for i, subhistory := range partitions { 370 | l[i] = makeEntries(subhistory) 371 | } 372 | return checkParallel(model, l, verbose, timeout) 373 | } 374 | --------------------------------------------------------------------------------