├── 6.824 ├── .gitignore ├── Makefile └── src │ ├── diskv │ ├── client.go │ ├── common.go │ ├── server.go │ └── test_test.go │ ├── kvpaxos │ ├── client.go │ ├── common.go │ ├── server.go │ └── test_test.go │ ├── kvraft │ ├── client.go │ ├── common.go │ ├── config.go │ ├── server.go │ └── test_test.go │ ├── labrpc │ ├── labrpc.go │ └── test_test.go │ ├── lockservice │ ├── client.go │ ├── common.go │ ├── server.go │ └── test_test.go │ ├── main │ ├── diskvd │ ├── diskvd.go │ ├── ii.go │ ├── lockc.go │ ├── lockd.go │ ├── mr-challenge.txt │ ├── mr-testout.txt │ ├── pbc.go │ ├── pbd.go │ ├── pg-being_ernest.txt │ ├── pg-dorian_gray.txt │ ├── pg-dracula.txt │ ├── pg-emma.txt │ ├── pg-frankenstein.txt │ ├── pg-great_expectations.txt │ ├── pg-grimm.txt │ ├── pg-huckleberry_finn.txt │ ├── pg-les_miserables.txt │ ├── pg-metamorphosis.txt │ ├── pg-moby_dick.txt │ ├── pg-sherlock_holmes.txt │ ├── pg-tale_of_two_cities.txt │ ├── pg-tom_sawyer.txt │ ├── pg-ulysses.txt │ ├── pg-war_and_peace.txt │ ├── test-ii.sh │ ├── test-mr.sh │ ├── test-wc.sh │ ├── viewd.go │ └── wc.go │ ├── mapreduce │ ├── common.go │ ├── common_map.go │ ├── common_reduce.go │ ├── common_rpc.go │ ├── master.go │ ├── master_rpc.go │ ├── master_splitmerge.go │ ├── readme.go │ ├── schedule.go │ ├── test_test.go │ └── worker.go │ ├── paxos-shardkv │ ├── client.go │ ├── common.go │ ├── server.go │ └── test_test.go │ ├── paxos-shardmaster │ ├── client.go │ ├── common.go │ ├── server.go │ └── test_test.go │ ├── paxos │ ├── paxos.go │ └── test_test.go │ ├── pbservice │ ├── client.go │ ├── common.go │ ├── server.go │ └── test_test.go │ ├── raft │ ├── config.go │ ├── persister.go │ ├── raft.go │ ├── test_test.go │ └── util.go │ ├── shardkv │ ├── client.go │ ├── common.go │ ├── config.go │ ├── server.go │ └── test_test.go │ ├── shardmaster │ ├── client.go │ ├── common.go │ ├── config.go │ ├── server.go │ └── test_test.go │ └── viewservice │ ├── client.go │ ├── common.go │ ├── server.go │ └── test_test.go ├── Lec01_Introduction ├── l01.md ├── l01.txt ├── lab1.md └── mapreduce.pdf ├── Lec02_RPC_and_Threads ├── l-rpc.md └── l-rpc.txt ├── Lec03_GFS ├── Bolosky.pdf ├── GFS.md ├── Question.md ├── gfs.pdf └── l-gfs.txt ├── Lec04_Primary_Backup_Replication ├── l-vm-ft.txt └── vm-ft.pdf ├── Lec05_Fault_Tolerance_Raft ├── l-raft.txt ├── lab2_Raft.md ├── raft-extended.pdf ├── raft-zh │ ├── .gitignore │ ├── README.md │ └── raft-zh_cn.md ├── raft.md └── 寻找一种易于理解的一致性算法.doc ├── Lec06_Fault_Tolerance_Raft └── l-raft2.txt ├── Lec07_Guest_lecturer_on_Go └── gomem.pdf ├── Lec08_Zookeeper ├── l-zookeeper.txt └── zookeeper.pdf ├── Lec09_Distributed_Transactions ├── l-2pc.txt └── thor95.pdf ├── Lec10_Optimistic_Concurrency_Control └── l-occ.txt ├── Lec11_FaRM ├── farm-2015.pdf └── l-farm.txt ├── Lec13_Disconnected_Operation_Eventual_Consistency ├── bayou-conflicts.pdf └── l-bayou.txt ├── Lec14_Case Studs_Relaxed_Consistency ├── cooper-pnuts.pdf └── l-pnuts.txt ├── Lec15_Case_Studis_Dynamo ├── dynamo.pdf └── l-dynamo.txt ├── Lec16_Wide-Area Publish_Subscribe ├── l-wormhole.txt └── wormhole.pdf ├── Lec17_Measuring_Consistency ├── fb-consistency.pdf └── l-existential.txt ├── Lec18_Case_Studies_Spark ├── l-spark.txt └── zaharia-spark.pdf ├── Lec19_Cluster_Management ├── borg.pdf └── l-borg.txt ├── Lec20_Peer-to-peer_Trackerless_Bittorrent_and_DHTs ├── bep_0005.html ├── bep_0005_files │ └── bep.css ├── l-dht.txt └── stoica-chord.pdf ├── Lec21_Peer-to-peer_Bitcoin ├── bitcoin.pdf └── l-bitcoin.txt ├── Lec23_Project_demos └── katabi-analogicfs.pdf └── README.md /6.824/.gitignore: -------------------------------------------------------------------------------- 1 | pkg/ 2 | api.key 3 | *-handin.tar.gz 4 | -------------------------------------------------------------------------------- /6.824/Makefile: -------------------------------------------------------------------------------- 1 | # This is the Makefile helping you submit the labs. 2 | # Just create 6.824/api.key with your API key in it, 3 | # and submit your lab with the following command: 4 | # $ make [lab1|lab2|lab3a|lab3b|lab4a|lab4b|lab5] 5 | 6 | LABS=" lab1 lab2 lab3a lab3b lab4a lab4b lab5 " 7 | 8 | %: 9 | @echo "Preparing $@-handin.tar.gz" 10 | @echo "Checking for committed temporary files..." 11 | @if git ls-files | grep -E 'mrtmp|mrinput' > /dev/null; then \ 12 | echo "" ; \ 13 | echo "OBS! You have committed some large temporary files:" ; \ 14 | echo "" ; \ 15 | git ls-files | grep -E 'mrtmp|mrinput' | sed 's/^/\t/' ; \ 16 | echo "" ; \ 17 | echo "Follow the instructions at http://stackoverflow.com/a/308684/472927" ; \ 18 | echo "to remove them, and then run make again." ; \ 19 | echo "" ; \ 20 | exit 1 ; \ 21 | fi 22 | @if echo $(LABS) | grep -q " $@ " ; then \ 23 | echo "Tarring up your submission..." ; \ 24 | tar cvzf $@-handin.tar.gz \ 25 | "--exclude=src/main/pg-*.txt" \ 26 | "--exclude=src/main/diskvd" \ 27 | "--exclude=src/mapreduce/824-mrinput-*.txt" \ 28 | "--exclude=mrtmp.*" \ 29 | "--exclude=src/main/diff.out" \ 30 | Makefile src; \ 31 | if ! test -e api.key ; then \ 32 | echo "Missing $(PWD)/api.key. Please create the file with your key in it or submit the $@-handin.tar.gz via the web interface."; \ 33 | else \ 34 | echo "Are you sure you want to submit $@? Enter 'yes' to continue:"; \ 35 | read line; \ 36 | if test "$$line" != "yes" ; then echo "Giving up submission"; exit; fi; \ 37 | if test `stat -c "%s" "$@-handin.tar.gz" 2>/dev/null || stat -f "%z" "$@-handin.tar.gz"` -ge 20971520 ; then echo "File exceeds 20MB."; exit; fi; \ 38 | mv api.key api.key.fix ; \ 39 | cat api.key.fix | tr -d '\n' > api.key ; \ 40 | rm api.key.fix ; \ 41 | curl -F file=@$@-handin.tar.gz -F "key= 0 { 74 | shard = int(key[0]) 75 | } 76 | shard %= shardmaster.NShards 77 | return shard 78 | } 79 | 80 | // 81 | // fetch the current value for a key. 82 | // returns "" if the key does not exist. 83 | // keeps trying forever in the face of all other errors. 84 | // 85 | func (ck *Clerk) Get(key string) string { 86 | ck.mu.Lock() 87 | defer ck.mu.Unlock() 88 | 89 | // You'll have to modify Get(). 90 | 91 | for { 92 | shard := key2shard(key) 93 | 94 | gid := ck.config.Shards[shard] 95 | 96 | servers, ok := ck.config.Groups[gid] 97 | 98 | if ok { 99 | // try each server in the shard's replication group. 100 | for _, srv := range servers { 101 | args := &GetArgs{} 102 | args.Key = key 103 | var reply GetReply 104 | ok := call(srv, "DisKV.Get", args, &reply) 105 | if ok && (reply.Err == OK || reply.Err == ErrNoKey) { 106 | return reply.Value 107 | } 108 | if ok && (reply.Err == ErrWrongGroup) { 109 | break 110 | } 111 | } 112 | } 113 | 114 | time.Sleep(100 * time.Millisecond) 115 | 116 | // ask master for a new configuration. 117 | ck.config = ck.sm.Query(-1) 118 | } 119 | } 120 | 121 | // send a Put or Append request. 122 | func (ck *Clerk) PutAppend(key string, value string, op string) { 123 | ck.mu.Lock() 124 | defer ck.mu.Unlock() 125 | 126 | // You'll have to modify PutAppend(). 127 | 128 | for { 129 | shard := key2shard(key) 130 | 131 | gid := ck.config.Shards[shard] 132 | 133 | servers, ok := ck.config.Groups[gid] 134 | 135 | if ok { 136 | // try each server in the shard's replication group. 137 | for _, srv := range servers { 138 | args := &PutAppendArgs{} 139 | args.Key = key 140 | args.Value = value 141 | args.Op = op 142 | var reply PutAppendReply 143 | ok := call(srv, "DisKV.PutAppend", args, &reply) 144 | if ok && reply.Err == OK { 145 | return 146 | } 147 | if ok && (reply.Err == ErrWrongGroup) { 148 | break 149 | } 150 | } 151 | } 152 | 153 | time.Sleep(100 * time.Millisecond) 154 | 155 | // ask master for a new configuration. 156 | ck.config = ck.sm.Query(-1) 157 | } 158 | } 159 | 160 | func (ck *Clerk) Put(key string, value string) { 161 | ck.PutAppend(key, value, "Put") 162 | } 163 | func (ck *Clerk) Append(key string, value string) { 164 | ck.PutAppend(key, value, "Append") 165 | } 166 | -------------------------------------------------------------------------------- /6.824/src/diskv/common.go: -------------------------------------------------------------------------------- 1 | package diskv 2 | 3 | // 4 | // Sharded key/value server. 5 | // Lots of replica groups, each running op-at-a-time paxos. 6 | // Shardmaster decides which group serves each shard. 7 | // Shardmaster may change shard assignment from time to time. 8 | // 9 | // You will have to modify these definitions. 10 | // 11 | 12 | const ( 13 | OK = "OK" 14 | ErrNoKey = "ErrNoKey" 15 | ErrWrongGroup = "ErrWrongGroup" 16 | ) 17 | 18 | type Err string 19 | 20 | type PutAppendArgs struct { 21 | Key string 22 | Value string 23 | Op string // "Put" or "Append" 24 | // You'll have to add definitions here. 25 | // Field names must start with capital letters, 26 | // otherwise RPC will break. 27 | 28 | } 29 | 30 | type PutAppendReply struct { 31 | Err Err 32 | } 33 | 34 | type GetArgs struct { 35 | Key string 36 | // You'll have to add definitions here. 37 | } 38 | 39 | type GetReply struct { 40 | Err Err 41 | Value string 42 | } 43 | 44 | -------------------------------------------------------------------------------- /6.824/src/diskv/server.go: -------------------------------------------------------------------------------- 1 | package diskv 2 | 3 | import "net" 4 | import "fmt" 5 | import "net/rpc" 6 | import "log" 7 | import "time" 8 | import "paxos" 9 | import "sync" 10 | import "sync/atomic" 11 | import "os" 12 | import "syscall" 13 | import "encoding/gob" 14 | import "encoding/base32" 15 | import "math/rand" 16 | import "shardmaster" 17 | import "io/ioutil" 18 | import "strconv" 19 | 20 | 21 | const Debug = 0 22 | 23 | func DPrintf(format string, a ...interface{}) (n int, err error) { 24 | if Debug > 0 { 25 | log.Printf(format, a...) 26 | } 27 | return 28 | } 29 | 30 | 31 | type Op struct { 32 | // Your definitions here. 33 | } 34 | 35 | 36 | type DisKV struct { 37 | mu sync.Mutex 38 | l net.Listener 39 | me int 40 | dead int32 // for testing 41 | unreliable int32 // for testing 42 | sm *shardmaster.Clerk 43 | px *paxos.Paxos 44 | dir string // each replica has its own data directory 45 | 46 | gid int64 // my replica group ID 47 | 48 | // Your definitions here. 49 | } 50 | 51 | // 52 | // these are handy functions that might be useful 53 | // for reading and writing key/value files, and 54 | // for reading and writing entire shards. 55 | // puts the key files for each shard in a separate 56 | // directory. 57 | // 58 | 59 | func (kv *DisKV) shardDir(shard int) string { 60 | d := kv.dir + "/shard-" + strconv.Itoa(shard) + "/" 61 | // create directory if needed. 62 | _, err := os.Stat(d) 63 | if err != nil { 64 | if err := os.Mkdir(d, 0777); err != nil { 65 | log.Fatalf("Mkdir(%v): %v", d, err) 66 | } 67 | } 68 | return d 69 | } 70 | 71 | // cannot use keys in file names directly, since 72 | // they might contain troublesome characters like /. 73 | // base32-encode the key to get a file name. 74 | // base32 rather than base64 b/c Mac has case-insensitive 75 | // file names. 76 | func (kv *DisKV) encodeKey(key string) string { 77 | return base32.StdEncoding.EncodeToString([]byte(key)) 78 | } 79 | 80 | func (kv *DisKV) decodeKey(filename string) (string, error) { 81 | key, err := base32.StdEncoding.DecodeString(filename) 82 | return string(key), err 83 | } 84 | 85 | // read the content of a key's file. 86 | func (kv *DisKV) fileGet(shard int, key string) (string, error) { 87 | fullname := kv.shardDir(shard) + "/key-" + kv.encodeKey(key) 88 | content, err := ioutil.ReadFile(fullname) 89 | return string(content), err 90 | } 91 | 92 | // replace the content of a key's file. 93 | // uses rename() to make the replacement atomic with 94 | // respect to crashes. 95 | func (kv *DisKV) filePut(shard int, key string, content string) error { 96 | fullname := kv.shardDir(shard) + "/key-" + kv.encodeKey(key) 97 | tempname := kv.shardDir(shard) + "/temp-" + kv.encodeKey(key) 98 | if err := ioutil.WriteFile(tempname, []byte(content), 0666); err != nil { 99 | return err 100 | } 101 | if err := os.Rename(tempname, fullname); err != nil { 102 | return err 103 | } 104 | return nil 105 | } 106 | 107 | // return content of every key file in a given shard. 108 | func (kv *DisKV) fileReadShard(shard int) map[string]string { 109 | m := map[string]string{} 110 | d := kv.shardDir(shard) 111 | files, err := ioutil.ReadDir(d) 112 | if err != nil { 113 | log.Fatalf("fileReadShard could not read %v: %v", d, err) 114 | } 115 | for _, fi := range files { 116 | n1 := fi.Name() 117 | if n1[0:4] == "key-" { 118 | key, err := kv.decodeKey(n1[4:]) 119 | if err != nil { 120 | log.Fatalf("fileReadShard bad file name %v: %v", n1, err) 121 | } 122 | content, err := kv.fileGet(shard, key) 123 | if err != nil { 124 | log.Fatalf("fileReadShard fileGet failed for %v: %v", key, err) 125 | } 126 | m[key] = content 127 | } 128 | } 129 | return m 130 | } 131 | 132 | // replace an entire shard directory. 133 | func (kv *DisKV) fileReplaceShard(shard int, m map[string]string) { 134 | d := kv.shardDir(shard) 135 | os.RemoveAll(d) // remove all existing files from shard. 136 | for k, v := range m { 137 | kv.filePut(shard, k, v) 138 | } 139 | } 140 | 141 | 142 | func (kv *DisKV) Get(args *GetArgs, reply *GetReply) error { 143 | // Your code here. 144 | return nil 145 | } 146 | 147 | // RPC handler for client Put and Append requests 148 | func (kv *DisKV) PutAppend(args *PutAppendArgs, reply *PutAppendReply) error { 149 | // Your code here. 150 | return nil 151 | } 152 | 153 | // 154 | // Ask the shardmaster if there's a new configuration; 155 | // if so, re-configure. 156 | // 157 | func (kv *DisKV) tick() { 158 | // Your code here. 159 | } 160 | 161 | // tell the server to shut itself down. 162 | // please don't change these two functions. 163 | func (kv *DisKV) kill() { 164 | atomic.StoreInt32(&kv.dead, 1) 165 | kv.l.Close() 166 | kv.px.Kill() 167 | } 168 | 169 | // call this to find out if the server is dead. 170 | func (kv *DisKV) isdead() bool { 171 | return atomic.LoadInt32(&kv.dead) != 0 172 | } 173 | 174 | // please do not change these two functions. 175 | func (kv *DisKV) Setunreliable(what bool) { 176 | if what { 177 | atomic.StoreInt32(&kv.unreliable, 1) 178 | } else { 179 | atomic.StoreInt32(&kv.unreliable, 0) 180 | } 181 | } 182 | 183 | func (kv *DisKV) isunreliable() bool { 184 | return atomic.LoadInt32(&kv.unreliable) != 0 185 | } 186 | 187 | // 188 | // Start a shardkv server. 189 | // gid is the ID of the server's replica group. 190 | // shardmasters[] contains the ports of the 191 | // servers that implement the shardmaster. 192 | // servers[] contains the ports of the servers 193 | // in this replica group. 194 | // Me is the index of this server in servers[]. 195 | // dir is the directory name under which this 196 | // replica should store all its files. 197 | // each replica is passed a different directory. 198 | // restart is false the very first time this server 199 | // is started, and true to indicate a re-start 200 | // after a crash or after a crash with disk loss. 201 | // 202 | func StartServer(gid int64, shardmasters []string, 203 | servers []string, me int, dir string, restart bool) *DisKV { 204 | 205 | kv := new(DisKV) 206 | kv.me = me 207 | kv.gid = gid 208 | kv.sm = shardmaster.MakeClerk(shardmasters) 209 | kv.dir = dir 210 | 211 | // Your initialization code here. 212 | // Don't call Join(). 213 | 214 | // log.SetOutput(ioutil.Discard) 215 | 216 | gob.Register(Op{}) 217 | 218 | rpcs := rpc.NewServer() 219 | rpcs.Register(kv) 220 | 221 | kv.px = paxos.Make(servers, me, rpcs) 222 | 223 | // log.SetOutput(os.Stdout) 224 | 225 | 226 | 227 | os.Remove(servers[me]) 228 | l, e := net.Listen("unix", servers[me]) 229 | if e != nil { 230 | log.Fatal("listen error: ", e) 231 | } 232 | kv.l = l 233 | 234 | // please do not change any of the following code, 235 | // or do anything to subvert it. 236 | 237 | go func() { 238 | for kv.isdead() == false { 239 | conn, err := kv.l.Accept() 240 | if err == nil && kv.isdead() == false { 241 | if kv.isunreliable() && (rand.Int63()%1000) < 100 { 242 | // discard the request. 243 | conn.Close() 244 | } else if kv.isunreliable() && (rand.Int63()%1000) < 200 { 245 | // process the request but force discard of reply. 246 | c1 := conn.(*net.UnixConn) 247 | f, _ := c1.File() 248 | err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR) 249 | if err != nil { 250 | fmt.Printf("shutdown: %v\n", err) 251 | } 252 | go rpcs.ServeConn(conn) 253 | } else { 254 | go rpcs.ServeConn(conn) 255 | } 256 | } else if err == nil { 257 | conn.Close() 258 | } 259 | if err != nil && kv.isdead() == false { 260 | fmt.Printf("DisKV(%v) accept: %v\n", me, err.Error()) 261 | kv.kill() 262 | } 263 | } 264 | }() 265 | 266 | go func() { 267 | for kv.isdead() == false { 268 | kv.tick() 269 | time.Sleep(250 * time.Millisecond) 270 | } 271 | }() 272 | 273 | return kv 274 | } 275 | -------------------------------------------------------------------------------- /6.824/src/kvpaxos/client.go: -------------------------------------------------------------------------------- 1 | package kvpaxos 2 | 3 | import "net/rpc" 4 | import "crypto/rand" 5 | import "math/big" 6 | 7 | import "fmt" 8 | 9 | type Clerk struct { 10 | servers []string 11 | // You will have to modify this struct. 12 | } 13 | 14 | func nrand() int64 { 15 | max := big.NewInt(int64(1) << 62) 16 | bigx, _ := rand.Int(rand.Reader, max) 17 | x := bigx.Int64() 18 | return x 19 | } 20 | 21 | func MakeClerk(servers []string) *Clerk { 22 | ck := new(Clerk) 23 | ck.servers = servers 24 | // You'll have to add code here. 25 | return ck 26 | } 27 | 28 | // 29 | // call() sends an RPC to the rpcname handler on server srv 30 | // with arguments args, waits for the reply, and leaves the 31 | // reply in reply. the reply argument should be a pointer 32 | // to a reply structure. 33 | // 34 | // the return value is true if the server responded, and false 35 | // if call() was not able to contact the server. in particular, 36 | // the reply's contents are only valid if call() returned true. 37 | // 38 | // you should assume that call() will return an 39 | // error after a while if the server is dead. 40 | // don't provide your own time-out mechanism. 41 | // 42 | // please use call() to send all RPCs, in client.go and server.go. 43 | // please don't change this function. 44 | // 45 | func call(srv string, rpcname string, 46 | args interface{}, reply interface{}) bool { 47 | c, errx := rpc.Dial("unix", srv) 48 | if errx != nil { 49 | return false 50 | } 51 | defer c.Close() 52 | 53 | err := c.Call(rpcname, args, reply) 54 | if err == nil { 55 | return true 56 | } 57 | 58 | fmt.Println(err) 59 | return false 60 | } 61 | 62 | // 63 | // fetch the current value for a key. 64 | // returns "" if the key does not exist. 65 | // keeps trying forever in the face of all other errors. 66 | // 67 | func (ck *Clerk) Get(key string) string { 68 | // You will have to modify this function. 69 | return "" 70 | } 71 | 72 | // 73 | // shared by Put and Append. 74 | // 75 | func (ck *Clerk) PutAppend(key string, value string, op string) { 76 | // You will have to modify this function. 77 | } 78 | 79 | func (ck *Clerk) Put(key string, value string) { 80 | ck.PutAppend(key, value, "Put") 81 | } 82 | func (ck *Clerk) Append(key string, value string) { 83 | ck.PutAppend(key, value, "Append") 84 | } 85 | -------------------------------------------------------------------------------- /6.824/src/kvpaxos/common.go: -------------------------------------------------------------------------------- 1 | package kvpaxos 2 | 3 | const ( 4 | OK = "OK" 5 | ErrNoKey = "ErrNoKey" 6 | ) 7 | 8 | type Err string 9 | 10 | // Put or Append 11 | type PutAppendArgs struct { 12 | // You'll have to add definitions here. 13 | Key string 14 | Value string 15 | Op string // "Put" or "Append" 16 | // You'll have to add definitions here. 17 | // Field names must start with capital letters, 18 | // otherwise RPC will break. 19 | } 20 | 21 | type PutAppendReply struct { 22 | Err Err 23 | } 24 | 25 | type GetArgs struct { 26 | Key string 27 | // You'll have to add definitions here. 28 | } 29 | 30 | type GetReply struct { 31 | Err Err 32 | Value string 33 | } 34 | -------------------------------------------------------------------------------- /6.824/src/kvpaxos/server.go: -------------------------------------------------------------------------------- 1 | package kvpaxos 2 | 3 | import "net" 4 | import "fmt" 5 | import "net/rpc" 6 | import "log" 7 | import "paxos" 8 | import "sync" 9 | import "sync/atomic" 10 | import "os" 11 | import "syscall" 12 | import "encoding/gob" 13 | import "math/rand" 14 | 15 | 16 | const Debug = 0 17 | 18 | func DPrintf(format string, a ...interface{}) (n int, err error) { 19 | if Debug > 0 { 20 | log.Printf(format, a...) 21 | } 22 | return 23 | } 24 | 25 | 26 | type Op struct { 27 | // Your definitions here. 28 | // Field names must start with capital letters, 29 | // otherwise RPC will break. 30 | } 31 | 32 | type KVPaxos struct { 33 | mu sync.Mutex 34 | l net.Listener 35 | me int 36 | dead int32 // for testing 37 | unreliable int32 // for testing 38 | px *paxos.Paxos 39 | 40 | // Your definitions here. 41 | } 42 | 43 | 44 | func (kv *KVPaxos) Get(args *GetArgs, reply *GetReply) error { 45 | // Your code here. 46 | return nil 47 | } 48 | 49 | func (kv *KVPaxos) PutAppend(args *PutAppendArgs, reply *PutAppendReply) error { 50 | // Your code here. 51 | 52 | return nil 53 | } 54 | 55 | // tell the server to shut itself down. 56 | // please do not change these two functions. 57 | func (kv *KVPaxos) kill() { 58 | DPrintf("Kill(%d): die\n", kv.me) 59 | atomic.StoreInt32(&kv.dead, 1) 60 | kv.l.Close() 61 | kv.px.Kill() 62 | } 63 | 64 | // call this to find out if the server is dead. 65 | func (kv *KVPaxos) isdead() bool { 66 | return atomic.LoadInt32(&kv.dead) != 0 67 | } 68 | 69 | // please do not change these two functions. 70 | func (kv *KVPaxos) setunreliable(what bool) { 71 | if what { 72 | atomic.StoreInt32(&kv.unreliable, 1) 73 | } else { 74 | atomic.StoreInt32(&kv.unreliable, 0) 75 | } 76 | } 77 | 78 | func (kv *KVPaxos) isunreliable() bool { 79 | return atomic.LoadInt32(&kv.unreliable) != 0 80 | } 81 | 82 | // 83 | // servers[] contains the ports of the set of 84 | // servers that will cooperate via Paxos to 85 | // form the fault-tolerant key/value service. 86 | // me is the index of the current server in servers[]. 87 | // 88 | func StartServer(servers []string, me int) *KVPaxos { 89 | // call gob.Register on structures you want 90 | // Go's RPC library to marshall/unmarshall. 91 | gob.Register(Op{}) 92 | 93 | kv := new(KVPaxos) 94 | kv.me = me 95 | 96 | // Your initialization code here. 97 | 98 | rpcs := rpc.NewServer() 99 | rpcs.Register(kv) 100 | 101 | kv.px = paxos.Make(servers, me, rpcs) 102 | 103 | os.Remove(servers[me]) 104 | l, e := net.Listen("unix", servers[me]) 105 | if e != nil { 106 | log.Fatal("listen error: ", e) 107 | } 108 | kv.l = l 109 | 110 | 111 | // please do not change any of the following code, 112 | // or do anything to subvert it. 113 | 114 | go func() { 115 | for kv.isdead() == false { 116 | conn, err := kv.l.Accept() 117 | if err == nil && kv.isdead() == false { 118 | if kv.isunreliable() && (rand.Int63()%1000) < 100 { 119 | // discard the request. 120 | conn.Close() 121 | } else if kv.isunreliable() && (rand.Int63()%1000) < 200 { 122 | // process the request but force discard of reply. 123 | c1 := conn.(*net.UnixConn) 124 | f, _ := c1.File() 125 | err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR) 126 | if err != nil { 127 | fmt.Printf("shutdown: %v\n", err) 128 | } 129 | go rpcs.ServeConn(conn) 130 | } else { 131 | go rpcs.ServeConn(conn) 132 | } 133 | } else if err == nil { 134 | conn.Close() 135 | } 136 | if err != nil && kv.isdead() == false { 137 | fmt.Printf("KVPaxos(%v) accept: %v\n", me, err.Error()) 138 | kv.kill() 139 | } 140 | } 141 | }() 142 | 143 | return kv 144 | } 145 | -------------------------------------------------------------------------------- /6.824/src/kvraft/client.go: -------------------------------------------------------------------------------- 1 | package raftkv 2 | 3 | import "labrpc" 4 | import "crypto/rand" 5 | import "math/big" 6 | 7 | 8 | type Clerk struct { 9 | servers []*labrpc.ClientEnd 10 | // You will have to modify this struct. 11 | } 12 | 13 | func nrand() int64 { 14 | max := big.NewInt(int64(1) << 62) 15 | bigx, _ := rand.Int(rand.Reader, max) 16 | x := bigx.Int64() 17 | return x 18 | } 19 | 20 | func MakeClerk(servers []*labrpc.ClientEnd) *Clerk { 21 | ck := new(Clerk) 22 | ck.servers = servers 23 | // You'll have to add code here. 24 | return ck 25 | } 26 | 27 | // 28 | // fetch the current value for a key. 29 | // returns "" if the key does not exist. 30 | // keeps trying forever in the face of all other errors. 31 | // 32 | // you can send an RPC with code like this: 33 | // ok := ck.servers[i].Call("RaftKV.Get", &args, &reply) 34 | // 35 | // the types of args and reply (including whether they are pointers) 36 | // must match the declared types of the RPC handler function's 37 | // arguments. and reply must be passed as a pointer. 38 | // 39 | func (ck *Clerk) Get(key string) string { 40 | 41 | // You will have to modify this function. 42 | return "" 43 | } 44 | 45 | // 46 | // shared by Put and Append. 47 | // 48 | // you can send an RPC with code like this: 49 | // ok := ck.servers[i].Call("RaftKV.PutAppend", &args, &reply) 50 | // 51 | // the types of args and reply (including whether they are pointers) 52 | // must match the declared types of the RPC handler function's 53 | // arguments. and reply must be passed as a pointer. 54 | // 55 | func (ck *Clerk) PutAppend(key string, value string, op string) { 56 | // You will have to modify this function. 57 | } 58 | 59 | func (ck *Clerk) Put(key string, value string) { 60 | ck.PutAppend(key, value, "Put") 61 | } 62 | func (ck *Clerk) Append(key string, value string) { 63 | ck.PutAppend(key, value, "Append") 64 | } 65 | -------------------------------------------------------------------------------- /6.824/src/kvraft/common.go: -------------------------------------------------------------------------------- 1 | package raftkv 2 | 3 | const ( 4 | OK = "OK" 5 | ErrNoKey = "ErrNoKey" 6 | ) 7 | 8 | type Err string 9 | 10 | // Put or Append 11 | type PutAppendArgs struct { 12 | // You'll have to add definitions here. 13 | Key string 14 | Value string 15 | Op string // "Put" or "Append" 16 | // You'll have to add definitions here. 17 | // Field names must start with capital letters, 18 | // otherwise RPC will break. 19 | } 20 | 21 | type PutAppendReply struct { 22 | WrongLeader bool 23 | Err Err 24 | } 25 | 26 | type GetArgs struct { 27 | Key string 28 | // You'll have to add definitions here. 29 | } 30 | 31 | type GetReply struct { 32 | WrongLeader bool 33 | Err Err 34 | Value string 35 | } 36 | -------------------------------------------------------------------------------- /6.824/src/kvraft/server.go: -------------------------------------------------------------------------------- 1 | package raftkv 2 | 3 | import ( 4 | "encoding/gob" 5 | "labrpc" 6 | "log" 7 | "raft" 8 | "sync" 9 | ) 10 | 11 | const Debug = 0 12 | 13 | func DPrintf(format string, a ...interface{}) (n int, err error) { 14 | if Debug > 0 { 15 | log.Printf(format, a...) 16 | } 17 | return 18 | } 19 | 20 | 21 | type Op struct { 22 | // Your definitions here. 23 | // Field names must start with capital letters, 24 | // otherwise RPC will break. 25 | } 26 | 27 | type RaftKV struct { 28 | mu sync.Mutex 29 | me int 30 | rf *raft.Raft 31 | applyCh chan raft.ApplyMsg 32 | 33 | maxraftstate int // snapshot if log grows this big 34 | 35 | // Your definitions here. 36 | } 37 | 38 | 39 | func (kv *RaftKV) Get(args *GetArgs, reply *GetReply) { 40 | // Your code here. 41 | } 42 | 43 | func (kv *RaftKV) PutAppend(args *PutAppendArgs, reply *PutAppendReply) { 44 | // Your code here. 45 | } 46 | 47 | // 48 | // the tester calls Kill() when a RaftKV instance won't 49 | // be needed again. you are not required to do anything 50 | // in Kill(), but it might be convenient to (for example) 51 | // turn off debug output from this instance. 52 | // 53 | func (kv *RaftKV) Kill() { 54 | kv.rf.Kill() 55 | // Your code here, if desired. 56 | } 57 | 58 | // 59 | // servers[] contains the ports of the set of 60 | // servers that will cooperate via Raft to 61 | // form the fault-tolerant key/value service. 62 | // me is the index of the current server in servers[]. 63 | // the k/v server should store snapshots with persister.SaveSnapshot(), 64 | // and Raft should save its state (including log) with persister.SaveRaftState(). 65 | // the k/v server should snapshot when Raft's saved state exceeds maxraftstate bytes, 66 | // in order to allow Raft to garbage-collect its log. if maxraftstate is -1, 67 | // you don't need to snapshot. 68 | // StartKVServer() must return quickly, so it should start goroutines 69 | // for any long-running work. 70 | // 71 | func StartKVServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int) *RaftKV { 72 | // call gob.Register on structures you want 73 | // Go's RPC library to marshall/unmarshall. 74 | gob.Register(Op{}) 75 | 76 | kv := new(RaftKV) 77 | kv.me = me 78 | kv.maxraftstate = maxraftstate 79 | 80 | // Your initialization code here. 81 | 82 | kv.applyCh = make(chan raft.ApplyMsg) 83 | kv.rf = raft.Make(servers, me, persister, kv.applyCh) 84 | 85 | 86 | return kv 87 | } 88 | -------------------------------------------------------------------------------- /6.824/src/lockservice/client.go: -------------------------------------------------------------------------------- 1 | package lockservice 2 | 3 | import "net/rpc" 4 | import "fmt" 5 | 6 | 7 | // 8 | // the lockservice Clerk lives in the client 9 | // and maintains a little state. 10 | // 11 | type Clerk struct { 12 | servers [2]string // primary port, backup port 13 | // Your definitions here. 14 | } 15 | 16 | 17 | func MakeClerk(primary string, backup string) *Clerk { 18 | ck := new(Clerk) 19 | ck.servers[0] = primary 20 | ck.servers[1] = backup 21 | // Your initialization code here. 22 | return ck 23 | } 24 | 25 | // 26 | // call() sends an RPC to the rpcname handler on server srv 27 | // with arguments args, waits for the reply, and leaves the 28 | // reply in reply. the reply argument should be the address 29 | // of a reply structure. 30 | // 31 | // call() returns true if the server responded, and false 32 | // if call() was not able to contact the server. in particular, 33 | // reply's contents are valid if and only if call() returned true. 34 | // 35 | // you should assume that call() will return an 36 | // error after a while if the server is dead. 37 | // don't provide your own time-out mechanism. 38 | // 39 | // please use call() to send all RPCs, in client.go and server.go. 40 | // please don't change this function. 41 | // 42 | func call(srv string, rpcname string, 43 | args interface{}, reply interface{}) bool { 44 | c, errx := rpc.Dial("unix", srv) 45 | if errx != nil { 46 | return false 47 | } 48 | defer c.Close() 49 | 50 | err := c.Call(rpcname, args, reply) 51 | if err == nil { 52 | return true 53 | } 54 | 55 | fmt.Println(err) 56 | return false 57 | } 58 | 59 | // 60 | // ask the lock service for a lock. 61 | // returns true if the lock service 62 | // granted the lock, false otherwise. 63 | // 64 | // you will have to modify this function. 65 | // 66 | func (ck *Clerk) Lock(lockname string) bool { 67 | // prepare the arguments. 68 | args := &LockArgs{} 69 | args.Lockname = lockname 70 | var reply LockReply 71 | 72 | // send an RPC request, wait for the reply. 73 | ok := call(ck.servers[0], "LockServer.Lock", args, &reply) 74 | if ok == false { 75 | return false 76 | } 77 | 78 | return reply.OK 79 | } 80 | 81 | 82 | // 83 | // ask the lock service to unlock a lock. 84 | // returns true if the lock was previously held, 85 | // false otherwise. 86 | // 87 | 88 | func (ck *Clerk) Unlock(lockname string) bool { 89 | 90 | // Your code here. 91 | 92 | return false 93 | } 94 | -------------------------------------------------------------------------------- /6.824/src/lockservice/common.go: -------------------------------------------------------------------------------- 1 | package lockservice 2 | 3 | // 4 | // RPC definitions for a simple lock service. 5 | // 6 | // You will need to modify this file. 7 | // 8 | 9 | // 10 | // Lock(lockname) returns OK=true if the lock is not held. 11 | // If it is held, it returns OK=false immediately. 12 | // 13 | type LockArgs struct { 14 | // Go's net/rpc requires that these field 15 | // names start with upper case letters! 16 | Lockname string // lock name 17 | } 18 | 19 | type LockReply struct { 20 | OK bool 21 | } 22 | 23 | // 24 | // Unlock(lockname) returns OK=true if the lock was held. 25 | // It returns OK=false if the lock was not held. 26 | // 27 | type UnlockArgs struct { 28 | Lockname string 29 | } 30 | 31 | type UnlockReply struct { 32 | OK bool 33 | } 34 | -------------------------------------------------------------------------------- /6.824/src/lockservice/server.go: -------------------------------------------------------------------------------- 1 | package lockservice 2 | 3 | import "net" 4 | import "net/rpc" 5 | import "log" 6 | import "sync" 7 | import "fmt" 8 | import "os" 9 | import "io" 10 | import "time" 11 | 12 | type LockServer struct { 13 | mu sync.Mutex 14 | l net.Listener 15 | dead bool // for test_test.go 16 | dying bool // for test_test.go 17 | 18 | am_primary bool // am I the primary? 19 | backup string // backup's port 20 | 21 | // for each lock name, is it locked? 22 | locks map[string]bool 23 | } 24 | 25 | 26 | // 27 | // server Lock RPC handler. 28 | // 29 | // you will have to modify this function 30 | // 31 | func (ls *LockServer) Lock(args *LockArgs, reply *LockReply) error { 32 | ls.mu.Lock() 33 | defer ls.mu.Unlock() 34 | 35 | 36 | locked, _ := ls.locks[args.Lockname] 37 | 38 | if locked { 39 | reply.OK = false 40 | } else { 41 | reply.OK = true 42 | ls.locks[args.Lockname] = true 43 | } 44 | 45 | return nil 46 | } 47 | 48 | // 49 | // server Unlock RPC handler. 50 | // 51 | func (ls *LockServer) Unlock(args *UnlockArgs, reply *UnlockReply) error { 52 | 53 | // Your code here. 54 | 55 | return nil 56 | } 57 | 58 | // 59 | // tell the server to shut itself down. 60 | // for testing. 61 | // please don't change this. 62 | // 63 | func (ls *LockServer) kill() { 64 | ls.dead = true 65 | ls.l.Close() 66 | } 67 | 68 | // 69 | // hack to allow test_test.go to have primary process 70 | // an RPC but not send a reply. can't use the shutdown() 71 | // trick b/c that causes client to immediately get an 72 | // error and send to backup before primary does. 73 | // please don't change anything to do with DeafConn. 74 | // 75 | type DeafConn struct { 76 | c io.ReadWriteCloser 77 | } 78 | 79 | func (dc DeafConn) Write(p []byte) (n int, err error) { 80 | return len(p), nil 81 | } 82 | func (dc DeafConn) Close() error { 83 | return dc.c.Close() 84 | } 85 | func (dc DeafConn) Read(p []byte) (n int, err error) { 86 | return dc.c.Read(p) 87 | } 88 | 89 | func StartServer(primary string, backup string, am_primary bool) *LockServer { 90 | ls := new(LockServer) 91 | ls.backup = backup 92 | ls.am_primary = am_primary 93 | ls.locks = map[string]bool{} 94 | 95 | // Your initialization code here. 96 | 97 | 98 | me := "" 99 | if am_primary { 100 | me = primary 101 | } else { 102 | me = backup 103 | } 104 | 105 | // tell net/rpc about our RPC server and handlers. 106 | rpcs := rpc.NewServer() 107 | rpcs.Register(ls) 108 | 109 | // prepare to receive connections from clients. 110 | // change "unix" to "tcp" to use over a network. 111 | os.Remove(me) // only needed for "unix" 112 | l, e := net.Listen("unix", me) 113 | if e != nil { 114 | log.Fatal("listen error: ", e) 115 | } 116 | ls.l = l 117 | 118 | // please don't change any of the following code, 119 | // or do anything to subvert it. 120 | 121 | // create a thread to accept RPC connections from clients. 122 | go func() { 123 | for ls.dead == false { 124 | conn, err := ls.l.Accept() 125 | if err == nil && ls.dead == false { 126 | if ls.dying { 127 | // process the request but force discard of reply. 128 | 129 | // without this the connection is never closed, 130 | // b/c ServeConn() is waiting for more requests. 131 | // test_test.go depends on this two seconds. 132 | go func() { 133 | time.Sleep(2 * time.Second) 134 | conn.Close() 135 | }() 136 | ls.l.Close() 137 | 138 | // this object has the type ServeConn expects, 139 | // but discards writes (i.e. discards the RPC reply). 140 | deaf_conn := DeafConn{c: conn} 141 | 142 | rpcs.ServeConn(deaf_conn) 143 | 144 | ls.dead = true 145 | } else { 146 | go rpcs.ServeConn(conn) 147 | } 148 | } else if err == nil { 149 | conn.Close() 150 | } 151 | if err != nil && ls.dead == false { 152 | fmt.Printf("LockServer(%v) accept: %v\n", me, err.Error()) 153 | ls.kill() 154 | } 155 | } 156 | }() 157 | 158 | return ls 159 | } 160 | -------------------------------------------------------------------------------- /6.824/src/main/diskvd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/6.824/src/main/diskvd -------------------------------------------------------------------------------- /6.824/src/main/diskvd.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // start a diskvd server. it's a member of some replica 5 | // group, which has other members, and it needs to know 6 | // how to talk to the members of the shardmaster service. 7 | // used by ../diskv/test_test.go 8 | // 9 | // arguments: 10 | // -g groupid 11 | // -m masterport1 -m masterport2 ... 12 | // -s replicaport1 -s replicaport2 ... 13 | // -i my-index-in-server-port-list 14 | // -u unreliable 15 | // -d directory 16 | // -r restart 17 | 18 | import "time" 19 | import "diskv" 20 | import "os" 21 | import "fmt" 22 | import "strconv" 23 | import "runtime" 24 | 25 | func usage() { 26 | fmt.Printf("Usage: diskvd -g gid -m master... -s server... -i my-index -d dir\n") 27 | os.Exit(1) 28 | } 29 | 30 | func main() { 31 | var gid int64 = -1 // my replica group ID 32 | masters := []string{} // ports of shardmasters 33 | replicas := []string{} // ports of servers in my replica group 34 | me := -1 // my index in replicas[] 35 | unreliable := false 36 | dir := "" // store persistent data here 37 | restart := false 38 | 39 | for i := 1; i+1 < len(os.Args); i += 2 { 40 | a0 := os.Args[i] 41 | a1 := os.Args[i+1] 42 | if a0 == "-g" { 43 | gid, _ = strconv.ParseInt(a1, 10, 64) 44 | } else if a0 == "-m" { 45 | masters = append(masters, a1) 46 | } else if a0 == "-s" { 47 | replicas = append(replicas, a1) 48 | } else if a0 == "-i" { 49 | me, _ = strconv.Atoi(a1) 50 | } else if a0 == "-u" { 51 | unreliable, _ = strconv.ParseBool(a1) 52 | } else if a0 == "-d" { 53 | dir = a1 54 | } else if a0 == "-r" { 55 | restart, _ = strconv.ParseBool(a1) 56 | } else { 57 | usage() 58 | } 59 | } 60 | 61 | if gid < 0 || me < 0 || len(masters) < 1 || me >= len(replicas) || dir == "" { 62 | usage() 63 | } 64 | 65 | runtime.GOMAXPROCS(4) 66 | 67 | srv := diskv.StartServer(gid, masters, replicas, me, dir, restart) 68 | srv.Setunreliable(unreliable) 69 | 70 | // for safety, force quit after 10 minutes. 71 | time.Sleep(10 * 60 * time.Second) 72 | mep, _ := os.FindProcess(os.Getpid()) 73 | mep.Kill() 74 | } 75 | -------------------------------------------------------------------------------- /6.824/src/main/ii.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "os" 4 | import "fmt" 5 | import "mapreduce" 6 | 7 | // The mapping function is called once for each piece of the input. 8 | // In this framework, the key is the name of the file that is being processed, 9 | // and the value is the file's contents. The return value should be a slice of 10 | // key/value pairs, each represented by a mapreduce.KeyValue. 11 | func mapF(document string, value string) (res []mapreduce.KeyValue) { 12 | // TODO: you should complete this to do the inverted index challenge 13 | } 14 | 15 | // The reduce function is called once for each key generated by Map, with a 16 | // list of that key's string value (merged across all inputs). The return value 17 | // should be a single output value for that key. 18 | func reduceF(key string, values []string) string { 19 | // TODO: you should complete this to do the inverted index challenge 20 | } 21 | 22 | // Can be run in 3 ways: 23 | // 1) Sequential (e.g., go run wc.go master sequential x1.txt .. xN.txt) 24 | // 2) Master (e.g., go run wc.go master localhost:7777 x1.txt .. xN.txt) 25 | // 3) Worker (e.g., go run wc.go worker localhost:7777 localhost:7778 &) 26 | func main() { 27 | if len(os.Args) < 4 { 28 | fmt.Printf("%s: see usage comments in file\n", os.Args[0]) 29 | } else if os.Args[1] == "master" { 30 | var mr *mapreduce.Master 31 | if os.Args[2] == "sequential" { 32 | mr = mapreduce.Sequential("iiseq", os.Args[3:], 3, mapF, reduceF) 33 | } else { 34 | mr = mapreduce.Distributed("iiseq", os.Args[3:], 3, os.Args[2]) 35 | } 36 | mr.Wait() 37 | } else { 38 | mapreduce.RunWorker(os.Args[2], os.Args[3], mapF, reduceF, 100) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /6.824/src/main/lockc.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // see comments in lockd.go 5 | // 6 | 7 | import "lockservice" 8 | import "os" 9 | import "fmt" 10 | 11 | func usage() { 12 | fmt.Printf("Usage: lockc -l|-u primaryport backupport lockname\n") 13 | os.Exit(1) 14 | } 15 | 16 | func main() { 17 | if len(os.Args) == 5 { 18 | ck := lockservice.MakeClerk(os.Args[2], os.Args[3]) 19 | var ok bool 20 | if os.Args[1] == "-l" { 21 | ok = ck.Lock(os.Args[4]) 22 | } else if os.Args[1] == "-u" { 23 | ok = ck.Unlock(os.Args[4]) 24 | } else { 25 | usage() 26 | } 27 | fmt.Printf("reply: %v\n", ok) 28 | } else { 29 | usage() 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /6.824/src/main/lockd.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // export GOPATH=~/6.824 4 | // go build lockd.go 5 | // go build lockc.go 6 | // ./lockd -p a b & 7 | // ./lockd -b a b & 8 | // ./lockc -l a b lx 9 | // ./lockc -u a b lx 10 | // 11 | // on Athena, use /tmp/myname-a and /tmp/myname-b 12 | // instead of a and b. 13 | 14 | import "time" 15 | import "lockservice" 16 | import "os" 17 | import "fmt" 18 | 19 | func main() { 20 | if len(os.Args) == 4 && os.Args[1] == "-p" { 21 | lockservice.StartServer(os.Args[2], os.Args[3], true) 22 | } else if len(os.Args) == 4 && os.Args[1] == "-b" { 23 | lockservice.StartServer(os.Args[2], os.Args[3], false) 24 | } else { 25 | fmt.Printf("Usage: lockd -p|-b primaryport backupport\n") 26 | os.Exit(1) 27 | } 28 | for { 29 | time.Sleep(100 * time.Second) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /6.824/src/main/mr-challenge.txt: -------------------------------------------------------------------------------- 1 | women: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-metamorphosis.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt 2 | won: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-metamorphosis.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt 3 | wonderful: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt 4 | words: 15 pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-metamorphosis.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt 5 | worked: 15 pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-metamorphosis.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt 6 | worse: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt 7 | wounded: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt 8 | yes: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-metamorphosis.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt 9 | younger: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt 10 | yours: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt 11 | -------------------------------------------------------------------------------- /6.824/src/main/mr-testout.txt: -------------------------------------------------------------------------------- 1 | he: 34077 2 | was: 37044 3 | that: 37495 4 | I: 44502 5 | in: 46092 6 | a: 60558 7 | to: 74357 8 | of: 79727 9 | and: 93990 10 | the: 154024 11 | -------------------------------------------------------------------------------- /6.824/src/main/pbc.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // pbservice client application 5 | // 6 | // export GOPATH=~/6.824 7 | // go build viewd.go 8 | // go build pbd.go 9 | // go build pbc.go 10 | // ./viewd /tmp/rtm-v & 11 | // ./pbd /tmp/rtm-v /tmp/rtm-1 & 12 | // ./pbd /tmp/rtm-v /tmp/rtm-2 & 13 | // ./pbc /tmp/rtm-v key1 value1 14 | // ./pbc /tmp/rtm-v key1 15 | // 16 | // change "rtm" to your user name. 17 | // start the pbd programs in separate windows and kill 18 | // and restart them to exercise fault tolerance. 19 | // 20 | 21 | import "pbservice" 22 | import "os" 23 | import "fmt" 24 | 25 | func usage() { 26 | fmt.Printf("Usage: pbc viewport key\n") 27 | fmt.Printf(" pbc viewport key value\n") 28 | os.Exit(1) 29 | } 30 | 31 | func main() { 32 | if len(os.Args) == 3 { 33 | // get 34 | ck := pbservice.MakeClerk(os.Args[1], "") 35 | v := ck.Get(os.Args[2]) 36 | fmt.Printf("%v\n", v) 37 | } else if len(os.Args) == 4 { 38 | // put 39 | ck := pbservice.MakeClerk(os.Args[1], "") 40 | ck.Put(os.Args[2], os.Args[3]) 41 | } else { 42 | usage() 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /6.824/src/main/pbd.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // see directions in pbc.go 5 | // 6 | 7 | import "time" 8 | import "pbservice" 9 | import "os" 10 | import "fmt" 11 | 12 | func main() { 13 | if len(os.Args) != 3 { 14 | fmt.Printf("Usage: pbd viewport myport\n") 15 | os.Exit(1) 16 | } 17 | 18 | pbservice.StartServer(os.Args[1], os.Args[2]) 19 | 20 | for { 21 | time.Sleep(100 * time.Second) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /6.824/src/main/test-ii.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | go run ii.go master sequential pg-*.txt 3 | sort -k1,1 mrtmp.iiseq | sort -snk2,2 | grep -v '16' | tail -10 | diff - mr-challenge.txt > diff.out 4 | if [ -s diff.out ] 5 | then 6 | echo "Failed test. Output should be as in mr-challenge.txt. Your output differs as follows (from diff.out):" > /dev/stderr 7 | cat diff.out 8 | else 9 | echo "Passed test" > /dev/stderr 10 | fi 11 | 12 | -------------------------------------------------------------------------------- /6.824/src/main/test-mr.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | here=$(dirname "$0") 3 | [[ "$here" = /* ]] || here="$PWD/$here" 4 | export GOPATH="$here/../../" 5 | echo "" 6 | echo "==> Part I" 7 | go test -run Sequential mapreduce/... 8 | echo "" 9 | echo "==> Part II" 10 | (cd "$here" && ./test-wc.sh > /dev/null) 11 | echo "" 12 | echo "==> Part III" 13 | go test -run TestBasic mapreduce/... 14 | echo "" 15 | echo "==> Part IV" 16 | go test -run Failure mapreduce/... 17 | echo "" 18 | echo "==> Part V (challenge)" 19 | (cd "$here" && ./test-ii.sh > /dev/null) 20 | 21 | rm "$here"/mrtmp.* "$here"/diff.out 22 | -------------------------------------------------------------------------------- /6.824/src/main/test-wc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | go run wc.go master sequential pg-*.txt 3 | sort -n -k2 mrtmp.wcseq | tail -10 | diff - mr-testout.txt > diff.out 4 | if [ -s diff.out ] 5 | then 6 | echo "Failed test. Output should be as in mr-testout.txt. Your output differs as follows (from diff.out):" > /dev/stderr 7 | cat diff.out 8 | else 9 | echo "Passed test" > /dev/stderr 10 | fi 11 | 12 | -------------------------------------------------------------------------------- /6.824/src/main/viewd.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // 4 | // see directions in pbc.go 5 | // 6 | 7 | import "time" 8 | import "viewservice" 9 | import "os" 10 | import "fmt" 11 | 12 | func main() { 13 | if len(os.Args) != 2 { 14 | fmt.Printf("Usage: viewd port\n") 15 | os.Exit(1) 16 | } 17 | 18 | viewservice.StartServer(os.Args[1]) 19 | 20 | for { 21 | time.Sleep(100 * time.Second) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /6.824/src/main/wc.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "../mapreduce" 6 | "os" 7 | "strings" 8 | "strconv" 9 | "log" 10 | "unicode" 11 | ) 12 | 13 | // The mapping function is called once for each piece of the input. 14 | // In this framework, the key is the name of the file that is being processed, 15 | // and the value is the file's contents. The return value should be a slice of 16 | // key/value pairs, each represented by a mapreduce.KeyValue. 17 | func mapF(document string, value string) (res []mapreduce.KeyValue) { 18 | // 在wordcount的例子中mapF的功能应该是string中获取到单词(关注下strings.FieldsFunc打用法吧), 19 | // 返回的结构应该类似KeyValue{w, "1"} 20 | } 21 | 22 | // The reduce function is called once for each key generated by Map, with a 23 | // list of that key's string value (merged across all inputs). The return value 24 | // should be a single output value for that key. 25 | func reduceF(key string, values []string) string { 26 | // TODO: you also have to write this function 27 | // reduceF对每个key调用,然后处理values,在这个例子中,相加全部的1就是单词出现打次数来 28 | } 29 | 30 | // Can be run in 3 ways: 31 | // 1) Sequential (e.g., go run wc.go master sequential x1.txt .. xN.txt) 32 | // 2) Master (e.g., go run wc.go master localhost:7777 x1.txt .. xN.txt) 33 | // 3) Worker (e.g., go run wc.go worker localhost:7777 localhost:7778 &) 34 | func main() { 35 | if len(os.Args) < 4 { 36 | fmt.Printf("%s: see usage comments in file\n", os.Args[0]) 37 | } else if os.Args[1] == "master" { 38 | var mr *mapreduce.Master 39 | if os.Args[2] == "sequential" { 40 | mr = mapreduce.Sequential("wcseq", os.Args[3:], 3, mapF, reduceF) 41 | } else { 42 | mr = mapreduce.Distributed("wcseq", os.Args[3:], 3, os.Args[2]) 43 | } 44 | mr.Wait() 45 | } else { 46 | mapreduce.RunWorker(os.Args[2], os.Args[3], mapF, reduceF, 100) 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /6.824/src/mapreduce/common.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | ) 7 | 8 | // Debugging enabled? 9 | const debugEnabled = false 10 | 11 | // DPrintf will only print if the debugEnabled const has been set to true 12 | func debug(format string, a ...interface{}) (n int, err error) { 13 | if debugEnabled { 14 | n, err = fmt.Printf(format, a...) 15 | } 16 | return 17 | } 18 | 19 | // jobPhase indicates whether a task is scheduled as a map or reduce task. 20 | type jobPhase string 21 | 22 | const ( 23 | mapPhase jobPhase = "Map" 24 | reducePhase = "Reduce" 25 | ) 26 | 27 | // KeyValue is a type used to hold the key/value pairs passed to the map and 28 | // reduce functions. 29 | type KeyValue struct { 30 | Key string 31 | Value string 32 | } 33 | 34 | // reduceName constructs the name of the intermediate file which map task 35 | // produces for reduce task . 36 | func reduceName(jobName string, mapTask int, reduceTask int) string { 37 | return "mrtmp." + jobName + "-" + strconv.Itoa(mapTask) + "-" + strconv.Itoa(reduceTask) 38 | } 39 | 40 | // mergeName constructs the name of the output file of reduce task 41 | func mergeName(jobName string, reduceTask int) string { 42 | return "mrtmp." + jobName + "-res-" + strconv.Itoa(reduceTask) 43 | } 44 | -------------------------------------------------------------------------------- /6.824/src/mapreduce/common_map.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "hash/fnv" 5 | "io/ioutil" 6 | "os" 7 | "encoding/json" 8 | "fmt" 9 | ) 10 | 11 | // doMap does the job of a map worker: it reads one of the input files 12 | // (inFile), calls the user-defined map function (mapF) for that file's 13 | // contents, and partitions the output into nReduce intermediate files. 14 | // doMap的工作内容如下:读取输入文件(inFile), 调用用户自己定义的map函数mapF处理文件内容, 15 | // 分割输出到nReduce份中间文件。 16 | func doMap( 17 | jobName string, // the name of the MapReduce job 18 | mapTaskNumber int, // which map task this is 19 | inFile string, 20 | nReduce int, // the number of reduce task that will be run ("R" in the paper) 21 | mapF func(file string, contents string) []KeyValue, 22 | ) { 23 | // TODO: 24 | // You will need to write this function. 25 | // You can find the filename for this map task's input to reduce task number 26 | // r using reduceName(jobName, mapTaskNumber, r). The ihash function (given 27 | // below doMap) should be used to decide which file a given key belongs into. 28 | // 29 | // The intermediate output of a map task is stored in the file 30 | // system as multiple files whose name indicates which map task produced 31 | // them, as well as which reduce task they are for. Coming up with a 32 | // scheme for how to store the key/value pairs on disk can be tricky, 33 | // especially when taking into account that both keys and values could 34 | // contain newlines, quotes, and any other character you can think of. 35 | // 36 | // One format often used for serializing data to a byte stream that the 37 | // other end can correctly reconstruct is JSON. You are not required to 38 | // use JSON, but as the output of the reduce tasks *must* be JSON, 39 | // familiarizing yourself with it here may prove useful. You can write 40 | // out a data structure as a JSON string to a file using the commented 41 | // code below. The corresponding decoding functions can be found in 42 | // common_reduce.go. 43 | // 44 | // enc := json.NewEncoder(file) 45 | // for _, kv := ... { 46 | // err := enc.Encode(&kv) 47 | // 48 | // Remember to close the file after you have written all the values! 49 | 50 | // 你需要重写这个函数。你可以通过reduceName获取文件名,使用map任务的输入为reduce任务提供输出。 51 | // 下面给出的ihash函数应该被用于决定每个key属于的文件。 52 | // 53 | // map任务的中间输入以多文件的形式保存在文件系统上,它们的文件名说明是哪个map任务产生的,同时也说明哪个reduce任务会处理它们。 54 | // 想出如何存储键/值对在磁盘上的方案可能会非常棘手,特别地, 当我们考虑到key和value都包含新行(newlines),引用(quotes),或者其他 55 | // 你想到的字符。 56 | // 57 | // 有一种格式经常被用来序列化数据到字节流,然后可以通过字节流进行重建,这种格式是json。你没有被强制使用JSON,但是reduce任务的输出 58 | // 必须是JSON格式,熟悉JSON数据格式会对你有所帮助。你可以使用下面的代码将数据结构以JSON字符串的形式输出。对应的解码函数在common_reduce.go 59 | // 可以找到。 60 | // 61 | // enc := json.NewEncoder(file) 62 | // for _, kv := ... { 63 | // err := enc.Encode(&kv) 64 | // 65 | // 记得关闭文件当你写完全部的数据之后。 66 | 67 | 68 | // 注:Map的大致流程如下(官方教材建议不上传代码,所以去除) 69 | //  S1:  打开输入文件,并且读取全部数据 70 | // S2: 调用用户自定义的mapF函数,分检数据,在word count的案例中分割成单词 71 | // S3: 将mapF返回的数据根据key分类,跟文件名对应(reduceName获取文件名) 72 | //  S4:  将分类好的数据分别写入不同文件 73 | 74 | } 75 | 76 | func ihash(s string) uint32 { 77 | h := fnv.New32a() 78 | h.Write([]byte(s)) 79 | return h.Sum32() 80 | } 81 | -------------------------------------------------------------------------------- /6.824/src/mapreduce/common_reduce.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "encoding/json" 7 | "io" 8 | "log" 9 | ) 10 | 11 | // doReduce does the job of a reduce worker: it reads the intermediate 12 | // key/value pairs (produced by the map phase) for this task, sorts the 13 | // intermediate key/value pairs by key, calls the user-defined reduce function 14 | // (reduceF) for each key, and writes the output to disk. 15 | func doReduce( 16 | jobName string, // the name of the whole MapReduce job 17 | reduceTaskNumber int, // which reduce task this is 18 | nMap int, // the number of map tasks that were run ("M" in the paper) 19 | reduceF func(key string, values []string) string, 20 | ) { 21 | // TODO: 22 | // You will need to write this function. 23 | // You can find the intermediate file for this reduce task from map task number 24 | // m using reduceName(jobName, m, reduceTaskNumber). 25 | // Remember that you've encoded the values in the intermediate files, so you 26 | // will need to decode them. If you chose to use JSON, you can read out 27 | // multiple decoded values by creating a decoder, and then repeatedly calling 28 | // .Decode() on it until Decode() returns an error. 29 | // 30 | // You should write the reduced output in as JSON encoded KeyValue 31 | // objects to a file named mergeName(jobName, reduceTaskNumber). We require 32 | // you to use JSON here because that is what the merger than combines the 33 | // output from all the reduce tasks expects. There is nothing "special" about 34 | // JSON -- it is just the marshalling format we chose to use. It will look 35 | // something like this: 36 | // 37 | // enc := json.NewEncoder(mergeFile) 38 | // for key in ... { 39 | // enc.Encode(KeyValue{key, reduceF(...)}) 40 | // } 41 | // file.Close() 42 | 43 | // 你需要完成这个函数。你可与获取到来自map任务生产的中间数据,通过reduceName获取到文件名。 44 | // 记住你应该编码了值到中间文件,所以你需要解码它们。如果你选择了使用JSON,你通过创建decoder读取到多个 45 | // 解码之后的值,直接调用Decode直到返回错误。 46 | // 47 | // 你应该将reduce输出以JSON编码的方式保存到文件,文件名通过mergeName获取。我们建议你在这里使用JSON, 48 | 49 | // key是中间文件里面键值,value是字符串,这个map用于存储相同键值元素的合并 50 | 51 | // Reduce的过程如下: 52 | // S1: 获取到Map产生的文件并打开(reduceName获取文件名) 53 | //  S2:获取中间文件的数据(对多个map产生的文件更加值合并) 54 | //  S3:打开文件(mergeName获取文件名),将用于存储Reduce任务的结果 55 | //  S4:合并结果之后(S2),进行reduceF操作, work count的操作将结果累加,也就是word出现在这个文件中出现的次数 56 | } 57 | -------------------------------------------------------------------------------- /6.824/src/mapreduce/common_rpc.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "fmt" 5 | "net/rpc" 6 | ) 7 | 8 | // What follows are RPC types and methods. 9 | // Field names must start with capital letters, otherwise RPC will break. 10 | 11 | // DoTaskArgs holds the arguments that are passed to a worker when a job is scheduled on it. 12 | // DoTaskArgs保存保护参数,用于为worker分配工作。 13 | type DoTaskArgs struct { 14 | JobName string // 工作的名字 15 | File string // 待处理的文件名 16 | Phase jobPhase // 工作类型是map还是reduce 17 | TaskNumber int // 任务的索引? 18 | 19 | // NumOtherPhase is the total number of tasks in other phase; mappers 20 | // need this to compute the number of output bins, and reducers needs 21 | // this to know how many input files to collect. 22 | // 全部任务数量,mapper需要这个数字去计算输出的数量, 同时reducer需要知道有多少输入文件需要收集。 23 | NumOtherPhase int 24 | } 25 | 26 | // ShutdownReply is the response to a WorkerShutdown. 27 | // It holds the number of tasks this worker has processed since it was started. 28 | // ShutdownReply是WorkerShutdown的回应,Ntasks表示worker从启动开始已经处理的任务。 29 | type ShutdownReply struct { 30 | Ntasks int 31 | } 32 | 33 | // RegisterArgs is the argument passed when a worker registers with the master. 34 | // worker注册到master的时候,传递的参数。 35 | type RegisterArgs struct { 36 | Worker string 37 | } 38 | 39 | // call() sends an RPC to the rpcname handler on server srv 40 | // with arguments args, waits for the reply, and leaves the 41 | // reply in reply. the reply argument should be the address 42 | // of a reply structure. 43 | // 44 | // call() returns true if the server responded, and false 45 | // if call() was not able to contact the server. in particular, 46 | // reply's contents are valid if and only if call() returned true. 47 | // 48 | // you should assume that call() will time out and return an 49 | // error after a while if it doesn't get a reply from the server. 50 | // 51 | // please use call() to send all RPCs, in master.go, mapreduce.go, 52 | // and worker.go. please don't change this function. 53 | // 54 | 55 | // 本地的rpc调用,使用是unix套接字 56 | func call(srv string, rpcname string, 57 | args interface{}, reply interface{}) bool { 58 | 59 | c, errx := rpc.Dial("unix", srv) 60 | if errx != nil { 61 | return false 62 | } 63 | defer c.Close() 64 | 65 | err := c.Call(rpcname, args, reply) 66 | if err == nil { 67 | return true 68 | } 69 | 70 | fmt.Println(err) 71 | return false 72 | } 73 | -------------------------------------------------------------------------------- /6.824/src/mapreduce/master.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "fmt" 5 | "net" 6 | "sync" 7 | ) 8 | 9 | // Master holds all the state that the master needs to keep track of. Of 10 | // particular importance is registerChannel, the channel that notifies the 11 | // master of workers that have gone idle and are in need of new work. 12 | type Master struct { 13 | sync.Mutex 14 | 15 | address string 16 | registerChannel chan string // 通知master那些worker处于空闲状态。 17 | doneChannel chan bool 18 | workers []string // protected by the mutex, master下面含有的worker的名字 19 | 20 | // Per-task information 21 | jobName string // Name of currently executing job 22 | files []string // Input files 23 | nReduce int // Number of reduce partitions 24 | 25 | shutdown chan struct{} 26 | l net.Listener 27 | stats []int 28 | } 29 | 30 | // Register is an RPC method that is called by workers after they have started 31 | // up to report that they are ready to receive tasks. 32 | // 一个供worker调用的rpc方法, 告诉master它们已经准备好接受任务。 33 | func (mr *Master) Register(args *RegisterArgs, _ *struct{}) error { 34 | mr.Lock() 35 | defer mr.Unlock() 36 | debug("Register: worker %s\n", args.Worker) 37 | mr.workers = append(mr.workers, args.Worker) 38 | go func() { 39 | mr.registerChannel <- args.Worker // 通知master那些worker处于空闲状态。 40 | }() 41 | return nil 42 | } 43 | 44 | // newMaster initializes a new Map/Reduce Master 45 | // 创建初始化master 46 | func newMaster(master string) (mr *Master) { 47 | mr = new(Master) 48 | mr.address = master 49 | mr.shutdown = make(chan struct{}) 50 | mr.registerChannel = make(chan string) 51 | mr.doneChannel = make(chan bool) 52 | return 53 | } 54 | 55 | // Sequential runs map and reduce tasks sequentially, waiting for each task to 56 | // complete before scheduling the next. 57 | // Sequential方法顺序的执行map和reduce任务,在分配下一个任务前需要前面的任务完成。 58 | func Sequential(jobName string, files []string, nreduce int, 59 | mapF func(string, string) []KeyValue, 60 | reduceF func(string, []string) string, 61 | ) (mr *Master) { 62 | mr = newMaster("master") 63 | // 两个匿名函数 64 | go mr.run(jobName, files, nreduce, func(phase jobPhase) { 65 | switch phase { 66 | case mapPhase: 67 | for i, f := range mr.files { 68 | doMap(mr.jobName, i, f, mr.nReduce, mapF) 69 | } 70 | case reducePhase: 71 | for i := 0; i < mr.nReduce; i++ { 72 | doReduce(mr.jobName, i, len(mr.files), reduceF) 73 | } 74 | } 75 | }, func() { 76 | mr.stats = []int{len(files) + nreduce} 77 | }) 78 | return 79 | } 80 | 81 | // Distributed schedules map and reduce tasks on workers that register with the master over RPC. 82 | // 将map和reduc任务分布到通过rpc注册到master的worker。 83 | func Distributed(jobName string, files []string, nreduce int, master string) (mr *Master) { 84 | mr = newMaster(master) 85 | mr.startRPCServer() 86 | go mr.run(jobName, files, nreduce, mr.schedule, func() { 87 | mr.stats = mr.killWorkers() 88 | mr.stopRPCServer() 89 | }) 90 | return 91 | } 92 | 93 | // run executes a mapreduce job on the given number of mappers and reducers. 94 | // 95 | // First, it divides up the input file among the given number of mappers, and 96 | // schedules each task on workers as they become available. Each map task bins 97 | // its output in a number of bins equal to the given number of reduce tasks. 98 | // Once all the mappers have finished, workers are assigned reduce tasks. 99 | // 100 | // When all tasks have been completed, the reducer outputs are merged, 101 | // statistics are collected, and the master is shut down. 102 | // 103 | // Note that this implementation assumes a shared file system. 104 | 105 | // 在指定的mapper和reducer数量上面执行mapreduce工作. 106 | // 首先,在指定数量的mapper上面分配输入文件,然后分配每个任务到可用的worker。每个map任务将它的输出 107 | // 放置在一些“箱子”, 数量等于给定的reduce任务的数量。一旦全部的mapper工作完成,worker开始安排reduce任务。 108 | // 109 | // 当全部的任务完成的时候,reducer的输出被合并,统计被收集,然后master关闭退出。 110 | // 111 | // 注意:实现假设在一个共享的文件系统之上。 112 | func (mr *Master) run(jobName string, files []string, nreduce int, 113 | schedule func(phase jobPhase), 114 | finish func(), 115 | ) { 116 | mr.jobName = jobName // job的名字 117 | mr.files = files // 输入的文件 118 | mr.nReduce = nreduce // reduce任务的数量限制 119 | 120 | fmt.Printf("%s: Starting Map/Reduce task %s\n", mr.address, mr.jobName) 121 | 122 | // 这两个函数都需要外面传入 123 | schedule(mapPhase) // 安排map任务 schedule即master.go 64行传入的函数 124 | schedule(reducePhase) // 安排reduce任务 125 | finish() // 任务完成 126 | 127 | mr.merge() // 合并结果 128 | 129 | fmt.Printf("%s: Map/Reduce task completed\n", mr.address) 130 | 131 | mr.doneChannel <- true 132 | } 133 | 134 | // Wait blocks until the currently scheduled work has completed. 135 | // This happens when all tasks have scheduled and completed, the final output 136 | // have been computed, and all workers have been shut down. 137 | func (mr *Master) Wait() { 138 | <-mr.doneChannel // 等待run运行完成 139 | } 140 | 141 | // killWorkers cleans up all workers by sending each one a Shutdown RPC. 142 | // It also collects and returns the number of tasks each worker has performed. 143 | func (mr *Master) killWorkers() []int { 144 | mr.Lock() 145 | defer mr.Unlock() 146 | ntasks := make([]int, 0, len(mr.workers)) 147 | for _, w := range mr.workers { 148 | debug("Master: shutdown worker %s\n", w) 149 | var reply ShutdownReply 150 | ok := call(w, "Worker.Shutdown", new(struct{}), &reply) 151 | if ok == false { 152 | fmt.Printf("Master: RPC %s shutdown error\n", w) 153 | } else { 154 | ntasks = append(ntasks, reply.Ntasks) 155 | } 156 | } 157 | return ntasks 158 | } 159 | -------------------------------------------------------------------------------- /6.824/src/mapreduce/master_rpc.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "net" 7 | "net/rpc" 8 | "os" 9 | ) 10 | 11 | // Shutdown is an RPC method that shuts down the Master's RPC server. 12 | // shotdown是一个RPC方法,用于关闭master rpc服务器。 13 | func (mr *Master) Shutdown(_, _ *struct{}) error { 14 | debug("Shutdown: registration server\n") 15 | close(mr.shutdown) 16 | mr.l.Close() // causes the Accept to fail 17 | return nil 18 | } 19 | 20 | // startRPCServer starts the Master's RPC server. It continues accepting RPC 21 | // calls (Register in particular) for as long as the worker is alive. 22 | // startServer是开启master rpc服务。不断的接受来自worker的rpc调用。 23 | func (mr *Master) startRPCServer() { 24 | rpcs := rpc.NewServer() 25 | rpcs.Register(mr) // 注册自己的方法 26 | os.Remove(mr.address) // only needed for "unix" 27 | l, e := net.Listen("unix", mr.address) // unix套接字,套接字和文件绑定 28 | if e != nil { 29 | log.Fatal("RegstrationServer", mr.address, " error: ", e) 30 | } 31 | mr.l = l 32 | 33 | // now that we are listening on the master address, can fork off 34 | // accepting connections to another thread. 35 | go func() { 36 | loop: 37 | for { 38 | select { 39 | case <-mr.shutdown: // close的时候退出 40 | break loop 41 | default: 42 | } 43 | conn, err := mr.l.Accept() 44 | if err == nil { 45 | go func() { 46 | rpcs.ServeConn(conn) 47 | conn.Close() 48 | }() 49 | } else { 50 | debug("RegistrationServer: accept error", err) 51 | break 52 | } 53 | } 54 | debug("RegistrationServer: done\n") 55 | }() 56 | } 57 | 58 | // stopRPCServer stops the master RPC server. 59 | // This must be done through an RPC to avoid race conditions between the RPC 60 | // server thread and the current thread. 61 | // 关闭rpc 服务的方法。 62 | func (mr *Master) stopRPCServer() { 63 | var reply ShutdownReply 64 | ok := call(mr.address, "Master.Shutdown", new(struct{}), &reply) 65 | if ok == false { 66 | fmt.Printf("Cleanup: RPC %s error\n", mr.address) 67 | } 68 | debug("cleanupRegistration: done\n") 69 | } 70 | -------------------------------------------------------------------------------- /6.824/src/mapreduce/master_splitmerge.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "bufio" 5 | "encoding/json" 6 | "fmt" 7 | "log" 8 | "os" 9 | "sort" 10 | ) 11 | 12 | // merge combines the results of the many reduce jobs into a single output file 13 | // XXX use merge sort 14 | // 将reduce工作产生的结果合并成一个输出文件。 15 | func (mr *Master) merge() { 16 | debug("Merge phase") 17 | kvs := make(map[string]string) 18 | // nReduce个reduce任务,有nReduce个任务。 19 | for i := 0; i < mr.nReduce; i++ { 20 | p := mergeName(mr.jobName, i) // 获取输出文件,类似mrtmp.test-res-0 21 | fmt.Printf("Merge: read %s\n", p) 22 | file, err := os.Open(p) 23 | if err != nil { 24 | log.Fatal("Merge: ", err) 25 | } 26 | dec := json.NewDecoder(file) // json数据的流式读写 27 | for { 28 | var kv KeyValue 29 | err = dec.Decode(&kv) 30 | if err != nil { 31 | break 32 | } 33 | kvs[kv.Key] = kv.Value 34 | } 35 | file.Close() 36 | } 37 | 38 | // key排序 39 | var keys []string 40 | for k := range kvs { 41 | keys = append(keys, k) 42 | } 43 | sort.Strings(keys) 44 | 45 | // 将排序后的结果写入文件mrtmp.* 46 | file, err := os.Create("mrtmp." + mr.jobName) 47 | if err != nil { 48 | log.Fatal("Merge: create ", err) 49 | } 50 | w := bufio.NewWriter(file) 51 | for _, k := range keys { 52 | fmt.Fprintf(w, "%s: %s\n", k, kvs[k]) 53 | } 54 | w.Flush() 55 | file.Close() 56 | } 57 | 58 | // removeFile is a simple wrapper around os.Remove that logs errors. 59 | func removeFile(n string) { 60 | err := os.Remove(n) 61 | if err != nil { 62 | log.Fatal("CleanupFiles ", err) 63 | } 64 | } 65 | 66 | // CleanupFiles removes all intermediate files produced by running mapreduce. 67 | // 删除mapreduce删除的全部中间文件。 68 | func (mr *Master) CleanupFiles() { 69 | for i := range mr.files { 70 | for j := 0; j < mr.nReduce; j++ { 71 | removeFile(reduceName(mr.jobName, i, j)) 72 | } 73 | } 74 | for i := 0; i < mr.nReduce; i++ { 75 | removeFile(mergeName(mr.jobName, i)) 76 | } 77 | removeFile("mrtmp." + mr.jobName) 78 | } 79 | -------------------------------------------------------------------------------- /6.824/src/mapreduce/readme.go: -------------------------------------------------------------------------------- 1 | // Package mapreduce provides a simple mapreduce library with a sequential 2 | // implementation. Applications should normally call Distributed() [located in 3 | // master.go] to start a job, but may instead call Sequential() [also in 4 | // master.go] to get a sequential execution for debugging purposes. 5 | // 6 | // The flow of the mapreduce implementation is as follows: 7 | // 8 | // 1. The application provides a number of input files, a map function, a 9 | // reduce function, and the number of reduce tasks (nReduce). 10 | // 2. A master is created with this knowledge. It spins up an RPC server (see 11 | // master_rpc.go), and waits for workers to register (using the RPC call 12 | // Register() [defined in master.go]). As tasks become available (in steps 13 | // 4 and 5), schedule() [schedule.go] decides how to assign those tasks to 14 | // workers, and how to handle worker failures. 15 | // 3. The master considers each input file one map tasks, and makes a call to 16 | // doMap() [common_map.go] at least once for each task. It does so either 17 | // directly (when using Sequential()) or by issuing the DoJob RPC on a 18 | // worker [worker.go]. Each call to doMap() reads the appropriate file, 19 | // calls the map function on that file's contents, and produces nReduce 20 | // files for each map file. Thus, there will be #files x nReduce files 21 | // after all map tasks are done: 22 | // 23 | // f0-0, ..., f0-0, f0-, ..., 24 | // f<#files-1>-0, ... f<#files-1>-. 25 | // 26 | // 4. The master next makes a call to doReduce() [common_reduce.go] at least 27 | // once for each reduce task. As for doMap(), it does so either directly or 28 | // through a worker. doReduce() collects nReduce reduce files from each 29 | // map (f-*-), and runs the reduce function on those files. This 30 | // produces nReduce result files. 31 | // 5. The master calls mr.merge() [master_splitmerge.go], which merges all 32 | // the nReduce files produced by the previous step into a single output. 33 | // 6. The master sends a Shutdown RPC to each of its workers, and then shuts 34 | // down its own RPC server. 35 | // 36 | // TODO: 37 | // You will have to write/modify doMap, doReduce, and schedule yourself. These 38 | // are located in common_map.go, common_reduce.go, and schedule.go 39 | // respectively. You will also have to write the map and reduce functions in 40 | // ../main/wc.go. 41 | // 42 | // You should not need to modify any other files, but reading them might be 43 | // useful in order to understand how the other methods fit into the overall 44 | // architecture of the system. 45 | package mapreduce 46 | -------------------------------------------------------------------------------- /6.824/src/mapreduce/schedule.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import "fmt" 4 | 5 | // schedule starts and waits for all tasks in the given phase (Map or Reduce). 6 | func (mr *Master) schedule(phase jobPhase) { 7 | var ntasks int 8 | var nios int // number of inputs (for reduce) or outputs (for map) 9 | switch phase { 10 | case mapPhase: 11 | ntasks = len(mr.files) 12 | nios = mr.nReduce 13 | case reducePhase: 14 | ntasks = mr.nReduce 15 | nios = len(mr.files) 16 | } 17 | 18 | fmt.Printf("Schedule: %v %v tasks (%d I/Os)\n", ntasks, phase, nios) 19 | 20 | // All ntasks tasks have to be scheduled on workers, and only once all of 21 | // them have been completed successfully should the function return. 22 | // Remember that workers may fail, and that any given worker may finish 23 | // multiple tasks. 24 | // 25 | // TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO 26 | // 27 | fmt.Printf("Schedule: %v phase done\n", phase) 28 | } 29 | -------------------------------------------------------------------------------- /6.824/src/mapreduce/test_test.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | "time" 7 | 8 | "bufio" 9 | "log" 10 | "os" 11 | "sort" 12 | "strconv" 13 | "strings" 14 | ) 15 | 16 | const ( 17 | nNumber = 100000 18 | nMap = 100 19 | nReduce = 50 20 | ) 21 | 22 | // Create input file with N numbers 23 | // Check if we have N numbers in output file 24 | 25 | // Split in words 26 | // 分割单词 27 | func MapFunc(file string, value string) (res []KeyValue) { 28 | words := strings.Fields(value) // 分隔空格隔开的单词 29 | for _, w := range words { 30 | kv := KeyValue{w, ""} 31 | res = append(res, kv) 32 | } 33 | return 34 | } 35 | 36 | // Just return key 37 | func ReduceFunc(key string, values []string) string { 38 | for _, e := range values { 39 | debug("Reduce %s %v\n", key, e) 40 | } 41 | return "" 42 | } 43 | 44 | // Checks input file agaist output file: each input number should show up 45 | // in the output file in string sorted order 46 | func check(t *testing.T, files []string) { 47 | output, err := os.Open("mrtmp.test") 48 | if err != nil { 49 | log.Fatal("check: ", err) 50 | } 51 | defer output.Close() 52 | 53 | var lines []string 54 | for _, f := range files { 55 | input, err := os.Open(f) 56 | if err != nil { 57 | log.Fatal("check: ", err) 58 | } 59 | defer input.Close() 60 | inputScanner := bufio.NewScanner(input) 61 | for inputScanner.Scan() { 62 | lines = append(lines, inputScanner.Text()) 63 | } 64 | } 65 | 66 | sort.Strings(lines) 67 | 68 | outputScanner := bufio.NewScanner(output) 69 | i := 0 70 | for outputScanner.Scan() { 71 | var v1 int 72 | var v2 int 73 | text := outputScanner.Text() 74 | n, err := fmt.Sscanf(lines[i], "%d", &v1) 75 | if n == 1 && err == nil { 76 | n, err = fmt.Sscanf(text, "%d", &v2) 77 | } 78 | if err != nil || v1 != v2 { 79 | t.Fatalf("line %d: %d != %d err %v\n", i, v1, v2, err) 80 | } 81 | i++ 82 | } 83 | if i != nNumber { 84 | t.Fatalf("Expected %d lines in output\n", nNumber) 85 | } 86 | } 87 | 88 | // Workers report back how many RPCs they have processed in the Shutdown reply. 89 | // Check that they processed at least 1 RPC. 90 | func checkWorker(t *testing.T, l []int) { 91 | for _, tasks := range l { 92 | if tasks == 0 { 93 | t.Fatalf("Some worker didn't do any work\n") 94 | } 95 | } 96 | } 97 | 98 | // Make input file 99 | func makeInputs(num int) []string { 100 | var names []string 101 | var i = 0 102 | for f := 0; f < num; f++ { 103 | names = append(names, fmt.Sprintf("824-mrinput-%d.txt", f)) 104 | file, err := os.Create(names[f]) 105 | if err != nil { 106 | log.Fatal("mkInput: ", err) 107 | } 108 | w := bufio.NewWriter(file) 109 | for i < (f+1)*(nNumber/num) { 110 | fmt.Fprintf(w, "%d\n", i) 111 | i++ 112 | } 113 | w.Flush() 114 | file.Close() 115 | } 116 | return names 117 | } 118 | 119 | // Cook up a unique-ish UNIX-domain socket name 120 | // in /var/tmp. can't use current directory since 121 | // AFS doesn't support UNIX-domain sockets. 122 | func port(suffix string) string { 123 | s := "/var/tmp/824-" 124 | s += strconv.Itoa(os.Getuid()) + "/" 125 | os.Mkdir(s, 0777) 126 | s += "mr" 127 | s += strconv.Itoa(os.Getpid()) + "-" 128 | s += suffix 129 | return s 130 | } 131 | 132 | func setup() *Master { 133 | files := makeInputs(nMap) 134 | master := port("master") 135 | mr := Distributed("test", files, nReduce, master) 136 | return mr 137 | } 138 | 139 | func cleanup(mr *Master) { 140 | mr.CleanupFiles() 141 | for _, f := range mr.files { 142 | removeFile(f) 143 | } 144 | } 145 | 146 | func TestSequentialSingle(t *testing.T) { 147 | //mr := Sequential("test", makeInputs(1), 1, MapFunc, ReduceFunc) 148 | mr := Sequential("test", makeInputs(1), 3, MapFunc, ReduceFunc) 149 | mr.Wait() 150 | check(t, mr.files) 151 | checkWorker(t, mr.stats) 152 | cleanup(mr) 153 | } 154 | 155 | func TestSequentialMany(t *testing.T) { 156 | mr := Sequential("test", makeInputs(5), 3, MapFunc, ReduceFunc) 157 | mr.Wait() 158 | check(t, mr.files) 159 | checkWorker(t, mr.stats) 160 | cleanup(mr) 161 | } 162 | 163 | func TestBasic(t *testing.T) { 164 | mr := setup() 165 | for i := 0; i < 2; i++ { 166 | go RunWorker(mr.address, port("worker"+strconv.Itoa(i)), 167 | MapFunc, ReduceFunc, -1) 168 | } 169 | mr.Wait() 170 | check(t, mr.files) 171 | checkWorker(t, mr.stats) 172 | cleanup(mr) 173 | } 174 | 175 | func TestOneFailure(t *testing.T) { 176 | mr := setup() 177 | // Start 2 workers that fail after 10 tasks 178 | go RunWorker(mr.address, port("worker"+strconv.Itoa(0)), 179 | MapFunc, ReduceFunc, 10) 180 | go RunWorker(mr.address, port("worker"+strconv.Itoa(1)), 181 | MapFunc, ReduceFunc, -1) 182 | mr.Wait() 183 | check(t, mr.files) 184 | checkWorker(t, mr.stats) 185 | cleanup(mr) 186 | } 187 | 188 | func TestManyFailures(t *testing.T) { 189 | mr := setup() 190 | i := 0 191 | done := false 192 | for !done { 193 | select { 194 | case done = <-mr.doneChannel: 195 | check(t, mr.files) 196 | cleanup(mr) 197 | break 198 | default: 199 | // Start 2 workers each sec. The workers fail after 10 tasks 200 | w := port("worker" + strconv.Itoa(i)) 201 | go RunWorker(mr.address, w, MapFunc, ReduceFunc, 10) 202 | i++ 203 | w = port("worker" + strconv.Itoa(i)) 204 | go RunWorker(mr.address, w, MapFunc, ReduceFunc, 10) 205 | i++ 206 | time.Sleep(1 * time.Second) 207 | } 208 | } 209 | } 210 | -------------------------------------------------------------------------------- /6.824/src/mapreduce/worker.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "net" 7 | "net/rpc" 8 | "os" 9 | "sync" 10 | ) 11 | 12 | // Worker holds the state for a server waiting for DoTask or Shutdown RPCs 13 | type Worker struct { 14 | sync.Mutex 15 | 16 | name string // worker的名字 17 | Map func(string, string) []KeyValue 18 | Reduce func(string, []string) string 19 | nRPC int // protected by mutex 20 | nTasks int // protected by mutex 21 | l net.Listener 22 | } 23 | 24 | // DoTask is called by the master when a new task is being scheduled on this worker. 25 | func (wk *Worker) DoTask(arg *DoTaskArgs, _ *struct{}) error { 26 | fmt.Printf("%s: given %v task #%d on file %s (nios: %d)\n", 27 | wk.name, arg.Phase, arg.TaskNumber, arg.File, arg.NumOtherPhase) 28 | 29 | switch arg.Phase { 30 | case mapPhase: 31 | doMap(arg.JobName, arg.TaskNumber, arg.File, arg.NumOtherPhase, wk.Map) 32 | case reducePhase: 33 | doReduce(arg.JobName, arg.TaskNumber, arg.NumOtherPhase, wk.Reduce) 34 | } 35 | 36 | fmt.Printf("%s: %v task #%d done\n", wk.name, arg.Phase, arg.TaskNumber) 37 | return nil 38 | } 39 | 40 | // Shutdown is called by the master when all work has been completed. 41 | // We should respond with the number of tasks we have processed. 42 | func (wk *Worker) Shutdown(_ *struct{}, res *ShutdownReply) error { 43 | debug("Shutdown %s\n", wk.name) 44 | wk.Lock() 45 | defer wk.Unlock() 46 | res.Ntasks = wk.nTasks 47 | wk.nRPC = 1 48 | wk.nTasks-- // Don't count the shutdown RPC 49 | return nil 50 | } 51 | 52 | // Tell the master we exist and ready to work 53 | func (wk *Worker) register(master string) { 54 | args := new(RegisterArgs) 55 | args.Worker = wk.name 56 | ok := call(master, "Master.Register", args, new(struct{})) 57 | if ok == false { 58 | fmt.Printf("Register: RPC %s register error\n", master) 59 | } 60 | } 61 | 62 | // RunWorker sets up a connection with the master, registers its address, and 63 | // waits for tasks to be scheduled. 64 | func RunWorker(MasterAddress string, me string, 65 | MapFunc func(string, string) []KeyValue, 66 | ReduceFunc func(string, []string) string, 67 | nRPC int, 68 | ) { 69 | debug("RunWorker %s\n", me) 70 | wk := new(Worker) 71 | wk.name = me 72 | wk.Map = MapFunc 73 | wk.Reduce = ReduceFunc 74 | wk.nRPC = nRPC 75 | rpcs := rpc.NewServer() 76 | rpcs.Register(wk) 77 | os.Remove(me) // only needed for "unix" 78 | l, e := net.Listen("unix", me) 79 | if e != nil { 80 | log.Fatal("RunWorker: worker ", me, " error: ", e) 81 | } 82 | wk.l = l 83 | wk.register(MasterAddress) 84 | 85 | // DON'T MODIFY CODE BELOW 86 | for { 87 | wk.Lock() 88 | if wk.nRPC == 0 { 89 | wk.Unlock() 90 | break 91 | } 92 | wk.Unlock() 93 | conn, err := wk.l.Accept() 94 | if err == nil { 95 | wk.Lock() 96 | wk.nRPC-- 97 | wk.Unlock() 98 | go rpcs.ServeConn(conn) 99 | wk.Lock() 100 | wk.nTasks++ 101 | wk.Unlock() 102 | } else { 103 | break 104 | } 105 | } 106 | wk.l.Close() 107 | debug("RunWorker %s exit\n", me) 108 | } 109 | -------------------------------------------------------------------------------- /6.824/src/paxos-shardkv/client.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | import "shardmaster" 4 | import "net/rpc" 5 | import "time" 6 | import "sync" 7 | import "fmt" 8 | import "crypto/rand" 9 | import "math/big" 10 | 11 | type Clerk struct { 12 | mu sync.Mutex // one RPC at a time 13 | sm *shardmaster.Clerk 14 | config shardmaster.Config 15 | // You'll have to modify Clerk. 16 | } 17 | 18 | func nrand() int64 { 19 | max := big.NewInt(int64(1) << 62) 20 | bigx, _ := rand.Int(rand.Reader, max) 21 | x := bigx.Int64() 22 | return x 23 | } 24 | 25 | func MakeClerk(shardmasters []string) *Clerk { 26 | ck := new(Clerk) 27 | ck.sm = shardmaster.MakeClerk(shardmasters) 28 | // You'll have to modify MakeClerk. 29 | return ck 30 | } 31 | 32 | // 33 | // call() sends an RPC to the rpcname handler on server srv 34 | // with arguments args, waits for the reply, and leaves the 35 | // reply in reply. the reply argument should be a pointer 36 | // to a reply structure. 37 | // 38 | // the return value is true if the server responded, and false 39 | // if call() was not able to contact the server. in particular, 40 | // the reply's contents are only valid if call() returned true. 41 | // 42 | // you should assume that call() will return an 43 | // error after a while if the server is dead. 44 | // don't provide your own time-out mechanism. 45 | // 46 | // please use call() to send all RPCs, in client.go and server.go. 47 | // please don't change this function. 48 | // 49 | func call(srv string, rpcname string, 50 | args interface{}, reply interface{}) bool { 51 | c, errx := rpc.Dial("unix", srv) 52 | if errx != nil { 53 | return false 54 | } 55 | defer c.Close() 56 | 57 | err := c.Call(rpcname, args, reply) 58 | if err == nil { 59 | return true 60 | } 61 | 62 | fmt.Println(err) 63 | return false 64 | } 65 | 66 | // 67 | // which shard is a key in? 68 | // please use this function, 69 | // and please do not change it. 70 | // 71 | func key2shard(key string) int { 72 | shard := 0 73 | if len(key) > 0 { 74 | shard = int(key[0]) 75 | } 76 | shard %= shardmaster.NShards 77 | return shard 78 | } 79 | 80 | // 81 | // fetch the current value for a key. 82 | // returns "" if the key does not exist. 83 | // keeps trying forever in the face of all other errors. 84 | // 85 | func (ck *Clerk) Get(key string) string { 86 | ck.mu.Lock() 87 | defer ck.mu.Unlock() 88 | 89 | // You'll have to modify Get(). 90 | 91 | for { 92 | shard := key2shard(key) 93 | 94 | gid := ck.config.Shards[shard] 95 | 96 | servers, ok := ck.config.Groups[gid] 97 | 98 | if ok { 99 | // try each server in the shard's replication group. 100 | for _, srv := range servers { 101 | args := &GetArgs{} 102 | args.Key = key 103 | var reply GetReply 104 | ok := call(srv, "ShardKV.Get", args, &reply) 105 | if ok && (reply.Err == OK || reply.Err == ErrNoKey) { 106 | return reply.Value 107 | } 108 | if ok && (reply.Err == ErrWrongGroup) { 109 | break 110 | } 111 | } 112 | } 113 | 114 | time.Sleep(100 * time.Millisecond) 115 | 116 | // ask master for a new configuration. 117 | ck.config = ck.sm.Query(-1) 118 | } 119 | } 120 | 121 | // send a Put or Append request. 122 | func (ck *Clerk) PutAppend(key string, value string, op string) { 123 | ck.mu.Lock() 124 | defer ck.mu.Unlock() 125 | 126 | // You'll have to modify PutAppend(). 127 | 128 | for { 129 | shard := key2shard(key) 130 | 131 | gid := ck.config.Shards[shard] 132 | 133 | servers, ok := ck.config.Groups[gid] 134 | 135 | if ok { 136 | // try each server in the shard's replication group. 137 | for _, srv := range servers { 138 | args := &PutAppendArgs{} 139 | args.Key = key 140 | args.Value = value 141 | args.Op = op 142 | var reply PutAppendReply 143 | ok := call(srv, "ShardKV.PutAppend", args, &reply) 144 | if ok && reply.Err == OK { 145 | return 146 | } 147 | if ok && (reply.Err == ErrWrongGroup) { 148 | break 149 | } 150 | } 151 | } 152 | 153 | time.Sleep(100 * time.Millisecond) 154 | 155 | // ask master for a new configuration. 156 | ck.config = ck.sm.Query(-1) 157 | } 158 | } 159 | 160 | func (ck *Clerk) Put(key string, value string) { 161 | ck.PutAppend(key, value, "Put") 162 | } 163 | func (ck *Clerk) Append(key string, value string) { 164 | ck.PutAppend(key, value, "Append") 165 | } 166 | -------------------------------------------------------------------------------- /6.824/src/paxos-shardkv/common.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | // 4 | // Sharded key/value server. 5 | // Lots of replica groups, each running op-at-a-time paxos. 6 | // Shardmaster decides which group serves each shard. 7 | // Shardmaster may change shard assignment from time to time. 8 | // 9 | // You will have to modify these definitions. 10 | // 11 | 12 | const ( 13 | OK = "OK" 14 | ErrNoKey = "ErrNoKey" 15 | ErrWrongGroup = "ErrWrongGroup" 16 | ) 17 | 18 | type Err string 19 | 20 | type PutAppendArgs struct { 21 | Key string 22 | Value string 23 | Op string // "Put" or "Append" 24 | // You'll have to add definitions here. 25 | // Field names must start with capital letters, 26 | // otherwise RPC will break. 27 | 28 | } 29 | 30 | type PutAppendReply struct { 31 | Err Err 32 | } 33 | 34 | type GetArgs struct { 35 | Key string 36 | // You'll have to add definitions here. 37 | } 38 | 39 | type GetReply struct { 40 | Err Err 41 | Value string 42 | } 43 | 44 | -------------------------------------------------------------------------------- /6.824/src/paxos-shardkv/server.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | import "net" 4 | import "fmt" 5 | import "net/rpc" 6 | import "log" 7 | import "time" 8 | import "paxos" 9 | import "sync" 10 | import "sync/atomic" 11 | import "os" 12 | import "syscall" 13 | import "encoding/gob" 14 | import "math/rand" 15 | import "shardmaster" 16 | 17 | 18 | const Debug = 0 19 | 20 | func DPrintf(format string, a ...interface{}) (n int, err error) { 21 | if Debug > 0 { 22 | log.Printf(format, a...) 23 | } 24 | return 25 | } 26 | 27 | 28 | type Op struct { 29 | // Your definitions here. 30 | } 31 | 32 | 33 | type ShardKV struct { 34 | mu sync.Mutex 35 | l net.Listener 36 | me int 37 | dead int32 // for testing 38 | unreliable int32 // for testing 39 | sm *shardmaster.Clerk 40 | px *paxos.Paxos 41 | 42 | gid int64 // my replica group ID 43 | 44 | // Your definitions here. 45 | } 46 | 47 | 48 | func (kv *ShardKV) Get(args *GetArgs, reply *GetReply) error { 49 | // Your code here. 50 | return nil 51 | } 52 | 53 | // RPC handler for client Put and Append requests 54 | func (kv *ShardKV) PutAppend(args *PutAppendArgs, reply *PutAppendReply) error { 55 | // Your code here. 56 | return nil 57 | } 58 | 59 | // 60 | // Ask the shardmaster if there's a new configuration; 61 | // if so, re-configure. 62 | // 63 | func (kv *ShardKV) tick() { 64 | } 65 | 66 | // tell the server to shut itself down. 67 | // please don't change these two functions. 68 | func (kv *ShardKV) kill() { 69 | atomic.StoreInt32(&kv.dead, 1) 70 | kv.l.Close() 71 | kv.px.Kill() 72 | } 73 | 74 | // call this to find out if the server is dead. 75 | func (kv *ShardKV) isdead() bool { 76 | return atomic.LoadInt32(&kv.dead) != 0 77 | } 78 | 79 | // please do not change these two functions. 80 | func (kv *ShardKV) Setunreliable(what bool) { 81 | if what { 82 | atomic.StoreInt32(&kv.unreliable, 1) 83 | } else { 84 | atomic.StoreInt32(&kv.unreliable, 0) 85 | } 86 | } 87 | 88 | func (kv *ShardKV) isunreliable() bool { 89 | return atomic.LoadInt32(&kv.unreliable) != 0 90 | } 91 | 92 | // 93 | // Start a shardkv server. 94 | // gid is the ID of the server's replica group. 95 | // shardmasters[] contains the ports of the 96 | // servers that implement the shardmaster. 97 | // servers[] contains the ports of the servers 98 | // in this replica group. 99 | // Me is the index of this server in servers[]. 100 | // 101 | func StartServer(gid int64, shardmasters []string, 102 | servers []string, me int) *ShardKV { 103 | gob.Register(Op{}) 104 | 105 | kv := new(ShardKV) 106 | kv.me = me 107 | kv.gid = gid 108 | kv.sm = shardmaster.MakeClerk(shardmasters) 109 | 110 | // Your initialization code here. 111 | // Don't call Join(). 112 | 113 | rpcs := rpc.NewServer() 114 | rpcs.Register(kv) 115 | 116 | kv.px = paxos.Make(servers, me, rpcs) 117 | 118 | 119 | os.Remove(servers[me]) 120 | l, e := net.Listen("unix", servers[me]) 121 | if e != nil { 122 | log.Fatal("listen error: ", e) 123 | } 124 | kv.l = l 125 | 126 | // please do not change any of the following code, 127 | // or do anything to subvert it. 128 | 129 | go func() { 130 | for kv.isdead() == false { 131 | conn, err := kv.l.Accept() 132 | if err == nil && kv.isdead() == false { 133 | if kv.isunreliable() && (rand.Int63()%1000) < 100 { 134 | // discard the request. 135 | conn.Close() 136 | } else if kv.isunreliable() && (rand.Int63()%1000) < 200 { 137 | // process the request but force discard of reply. 138 | c1 := conn.(*net.UnixConn) 139 | f, _ := c1.File() 140 | err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR) 141 | if err != nil { 142 | fmt.Printf("shutdown: %v\n", err) 143 | } 144 | go rpcs.ServeConn(conn) 145 | } else { 146 | go rpcs.ServeConn(conn) 147 | } 148 | } else if err == nil { 149 | conn.Close() 150 | } 151 | if err != nil && kv.isdead() == false { 152 | fmt.Printf("ShardKV(%v) accept: %v\n", me, err.Error()) 153 | kv.kill() 154 | } 155 | } 156 | }() 157 | 158 | go func() { 159 | for kv.isdead() == false { 160 | kv.tick() 161 | time.Sleep(250 * time.Millisecond) 162 | } 163 | }() 164 | 165 | return kv 166 | } 167 | -------------------------------------------------------------------------------- /6.824/src/paxos-shardkv/test_test.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | import "testing" 4 | import "shardmaster" 5 | import "runtime" 6 | import "strconv" 7 | import "os" 8 | import "time" 9 | import "fmt" 10 | import "sync" 11 | import "sync/atomic" 12 | import "math/rand" 13 | 14 | // information about the servers of one replica group. 15 | type tGroup struct { 16 | gid int64 17 | servers []*ShardKV 18 | ports []string 19 | } 20 | 21 | // information about all the servers of a k/v cluster. 22 | type tCluster struct { 23 | t *testing.T 24 | masters []*shardmaster.ShardMaster 25 | mck *shardmaster.Clerk 26 | masterports []string 27 | groups []*tGroup 28 | } 29 | 30 | func port(tag string, host int) string { 31 | s := "/var/tmp/824-" 32 | s += strconv.Itoa(os.Getuid()) + "/" 33 | os.Mkdir(s, 0777) 34 | s += "skv-" 35 | s += strconv.Itoa(os.Getpid()) + "-" 36 | s += tag + "-" 37 | s += strconv.Itoa(host) 38 | return s 39 | } 40 | 41 | // 42 | // start a k/v replica server thread. 43 | // 44 | func (tc *tCluster) start1(gi int, si int, unreliable bool) { 45 | s := StartServer(tc.groups[gi].gid, tc.masterports, tc.groups[gi].ports, si) 46 | tc.groups[gi].servers[si] = s 47 | s.Setunreliable(unreliable) 48 | } 49 | 50 | func (tc *tCluster) cleanup() { 51 | for gi := 0; gi < len(tc.groups); gi++ { 52 | g := tc.groups[gi] 53 | for si := 0; si < len(g.servers); si++ { 54 | if g.servers[si] != nil { 55 | g.servers[si].kill() 56 | } 57 | } 58 | } 59 | 60 | for i := 0; i < len(tc.masters); i++ { 61 | if tc.masters[i] != nil { 62 | tc.masters[i].Kill() 63 | } 64 | } 65 | } 66 | 67 | func (tc *tCluster) shardclerk() *shardmaster.Clerk { 68 | return shardmaster.MakeClerk(tc.masterports) 69 | } 70 | 71 | func (tc *tCluster) clerk() *Clerk { 72 | return MakeClerk(tc.masterports) 73 | } 74 | 75 | func (tc *tCluster) join(gi int) { 76 | tc.mck.Join(tc.groups[gi].gid, tc.groups[gi].ports) 77 | } 78 | 79 | func (tc *tCluster) leave(gi int) { 80 | tc.mck.Leave(tc.groups[gi].gid) 81 | } 82 | 83 | func setup(t *testing.T, tag string, unreliable bool) *tCluster { 84 | runtime.GOMAXPROCS(4) 85 | 86 | const nmasters = 3 87 | const ngroups = 3 // replica groups 88 | const nreplicas = 3 // servers per group 89 | 90 | tc := &tCluster{} 91 | tc.t = t 92 | tc.masters = make([]*shardmaster.ShardMaster, nmasters) 93 | tc.masterports = make([]string, nmasters) 94 | 95 | for i := 0; i < nmasters; i++ { 96 | tc.masterports[i] = port(tag+"m", i) 97 | } 98 | for i := 0; i < nmasters; i++ { 99 | tc.masters[i] = shardmaster.StartServer(tc.masterports, i) 100 | } 101 | tc.mck = tc.shardclerk() 102 | 103 | tc.groups = make([]*tGroup, ngroups) 104 | 105 | for i := 0; i < ngroups; i++ { 106 | tc.groups[i] = &tGroup{} 107 | tc.groups[i].gid = int64(i + 100) 108 | tc.groups[i].servers = make([]*ShardKV, nreplicas) 109 | tc.groups[i].ports = make([]string, nreplicas) 110 | for j := 0; j < nreplicas; j++ { 111 | tc.groups[i].ports[j] = port(tag+"s", (i*nreplicas)+j) 112 | } 113 | for j := 0; j < nreplicas; j++ { 114 | tc.start1(i, j, unreliable) 115 | } 116 | } 117 | 118 | // return smh, gids, ha, sa, clean 119 | return tc 120 | } 121 | 122 | func TestBasic(t *testing.T) { 123 | tc := setup(t, "basic", false) 124 | defer tc.cleanup() 125 | 126 | fmt.Printf("Test: Basic Join/Leave ...\n") 127 | 128 | tc.join(0) 129 | 130 | ck := tc.clerk() 131 | 132 | ck.Put("a", "x") 133 | ck.Append("a", "b") 134 | if ck.Get("a") != "xb" { 135 | t.Fatalf("Get got wrong value") 136 | } 137 | 138 | keys := make([]string, 10) 139 | vals := make([]string, len(keys)) 140 | for i := 0; i < len(keys); i++ { 141 | keys[i] = strconv.Itoa(rand.Int()) 142 | vals[i] = strconv.Itoa(rand.Int()) 143 | ck.Put(keys[i], vals[i]) 144 | } 145 | 146 | // are keys still there after joins? 147 | for g := 1; g < len(tc.groups); g++ { 148 | tc.join(g) 149 | time.Sleep(1 * time.Second) 150 | for i := 0; i < len(keys); i++ { 151 | v := ck.Get(keys[i]) 152 | if v != vals[i] { 153 | t.Fatalf("joining; wrong value; g=%v k=%v wanted=%v got=%v", 154 | g, keys[i], vals[i], v) 155 | } 156 | vals[i] = strconv.Itoa(rand.Int()) 157 | ck.Put(keys[i], vals[i]) 158 | } 159 | } 160 | 161 | // are keys still there after leaves? 162 | for g := 0; g < len(tc.groups)-1; g++ { 163 | tc.leave(g) 164 | time.Sleep(1 * time.Second) 165 | for i := 0; i < len(keys); i++ { 166 | v := ck.Get(keys[i]) 167 | if v != vals[i] { 168 | t.Fatalf("leaving; wrong value; g=%v k=%v wanted=%v got=%v", 169 | g, keys[i], vals[i], v) 170 | } 171 | vals[i] = strconv.Itoa(rand.Int()) 172 | ck.Put(keys[i], vals[i]) 173 | } 174 | } 175 | 176 | fmt.Printf(" ... Passed\n") 177 | } 178 | 179 | func TestMove(t *testing.T) { 180 | tc := setup(t, "move", false) 181 | defer tc.cleanup() 182 | 183 | fmt.Printf("Test: Shards really move ...\n") 184 | 185 | tc.join(0) 186 | 187 | ck := tc.clerk() 188 | 189 | // insert one key per shard 190 | for i := 0; i < shardmaster.NShards; i++ { 191 | ck.Put(string('0'+i), string('0'+i)) 192 | } 193 | 194 | // add group 1. 195 | tc.join(1) 196 | time.Sleep(5 * time.Second) 197 | 198 | // check that keys are still there. 199 | for i := 0; i < shardmaster.NShards; i++ { 200 | if ck.Get(string('0'+i)) != string('0'+i) { 201 | t.Fatalf("missing key/value") 202 | } 203 | } 204 | 205 | // remove sockets from group 0. 206 | for _, port := range tc.groups[0].ports { 207 | os.Remove(port) 208 | } 209 | 210 | count := int32(0) 211 | var mu sync.Mutex 212 | for i := 0; i < shardmaster.NShards; i++ { 213 | go func(me int) { 214 | myck := tc.clerk() 215 | v := myck.Get(string('0' + me)) 216 | if v == string('0'+me) { 217 | mu.Lock() 218 | atomic.AddInt32(&count, 1) 219 | mu.Unlock() 220 | } else { 221 | t.Fatalf("Get(%v) yielded %v\n", me, v) 222 | } 223 | }(i) 224 | } 225 | 226 | time.Sleep(10 * time.Second) 227 | 228 | ccc := atomic.LoadInt32(&count) 229 | if ccc > shardmaster.NShards/3 && ccc < 2*(shardmaster.NShards/3) { 230 | fmt.Printf(" ... Passed\n") 231 | } else { 232 | t.Fatalf("%v keys worked after killing 1/2 of groups; wanted %v", 233 | ccc, shardmaster.NShards/2) 234 | } 235 | } 236 | 237 | func TestLimp(t *testing.T) { 238 | tc := setup(t, "limp", false) 239 | defer tc.cleanup() 240 | 241 | fmt.Printf("Test: Reconfiguration with some dead replicas ...\n") 242 | 243 | tc.join(0) 244 | 245 | ck := tc.clerk() 246 | 247 | ck.Put("a", "b") 248 | if ck.Get("a") != "b" { 249 | t.Fatalf("got wrong value") 250 | } 251 | 252 | // kill one server from each replica group. 253 | for gi := 0; gi < len(tc.groups); gi++ { 254 | sa := tc.groups[gi].servers 255 | ns := len(sa) 256 | sa[rand.Int()%ns].kill() 257 | } 258 | 259 | keys := make([]string, 10) 260 | vals := make([]string, len(keys)) 261 | for i := 0; i < len(keys); i++ { 262 | keys[i] = strconv.Itoa(rand.Int()) 263 | vals[i] = strconv.Itoa(rand.Int()) 264 | ck.Put(keys[i], vals[i]) 265 | } 266 | 267 | // are keys still there after joins? 268 | for g := 1; g < len(tc.groups); g++ { 269 | tc.join(g) 270 | time.Sleep(1 * time.Second) 271 | for i := 0; i < len(keys); i++ { 272 | v := ck.Get(keys[i]) 273 | if v != vals[i] { 274 | t.Fatalf("joining; wrong value; g=%v k=%v wanted=%v got=%v", 275 | g, keys[i], vals[i], v) 276 | } 277 | vals[i] = strconv.Itoa(rand.Int()) 278 | ck.Put(keys[i], vals[i]) 279 | } 280 | } 281 | 282 | // are keys still there after leaves? 283 | for gi := 0; gi < len(tc.groups)-1; gi++ { 284 | tc.leave(gi) 285 | time.Sleep(2 * time.Second) 286 | g := tc.groups[gi] 287 | for i := 0; i < len(g.servers); i++ { 288 | g.servers[i].kill() 289 | } 290 | for i := 0; i < len(keys); i++ { 291 | v := ck.Get(keys[i]) 292 | if v != vals[i] { 293 | t.Fatalf("leaving; wrong value; g=%v k=%v wanted=%v got=%v", 294 | g, keys[i], vals[i], v) 295 | } 296 | vals[i] = strconv.Itoa(rand.Int()) 297 | ck.Put(keys[i], vals[i]) 298 | } 299 | } 300 | 301 | fmt.Printf(" ... Passed\n") 302 | } 303 | 304 | func doConcurrent(t *testing.T, unreliable bool) { 305 | tc := setup(t, "concurrent-"+strconv.FormatBool(unreliable), unreliable) 306 | defer tc.cleanup() 307 | 308 | for i := 0; i < len(tc.groups); i++ { 309 | tc.join(i) 310 | } 311 | 312 | const npara = 11 313 | var ca [npara]chan bool 314 | for i := 0; i < npara; i++ { 315 | ca[i] = make(chan bool) 316 | go func(me int) { 317 | ok := true 318 | defer func() { ca[me] <- ok }() 319 | ck := tc.clerk() 320 | mymck := tc.shardclerk() 321 | key := strconv.Itoa(me) 322 | last := "" 323 | for iters := 0; iters < 3; iters++ { 324 | nv := strconv.Itoa(rand.Int()) 325 | ck.Append(key, nv) 326 | last = last + nv 327 | v := ck.Get(key) 328 | if v != last { 329 | ok = false 330 | t.Fatalf("Get(%v) expected %v got %v\n", key, last, v) 331 | } 332 | 333 | gi := rand.Int() % len(tc.groups) 334 | gid := tc.groups[gi].gid 335 | mymck.Move(rand.Int()%shardmaster.NShards, gid) 336 | 337 | time.Sleep(time.Duration(rand.Int()%30) * time.Millisecond) 338 | } 339 | }(i) 340 | } 341 | 342 | for i := 0; i < npara; i++ { 343 | x := <-ca[i] 344 | if x == false { 345 | t.Fatalf("something is wrong") 346 | } 347 | } 348 | } 349 | 350 | func TestConcurrent(t *testing.T) { 351 | fmt.Printf("Test: Concurrent Put/Get/Move ...\n") 352 | doConcurrent(t, false) 353 | fmt.Printf(" ... Passed\n") 354 | } 355 | 356 | func TestConcurrentUnreliable(t *testing.T) { 357 | fmt.Printf("Test: Concurrent Put/Get/Move (unreliable) ...\n") 358 | doConcurrent(t, true) 359 | fmt.Printf(" ... Passed\n") 360 | } 361 | -------------------------------------------------------------------------------- /6.824/src/paxos-shardmaster/client.go: -------------------------------------------------------------------------------- 1 | package shardmaster 2 | 3 | // 4 | // Shardmaster clerk. 5 | // Please don't change this file. 6 | // 7 | 8 | import "net/rpc" 9 | import "time" 10 | import "fmt" 11 | 12 | type Clerk struct { 13 | servers []string // shardmaster replicas 14 | } 15 | 16 | func MakeClerk(servers []string) *Clerk { 17 | ck := new(Clerk) 18 | ck.servers = servers 19 | return ck 20 | } 21 | 22 | // 23 | // call() sends an RPC to the rpcname handler on server srv 24 | // with arguments args, waits for the reply, and leaves the 25 | // reply in reply. the reply argument should be a pointer 26 | // to a reply structure. 27 | // 28 | // the return value is true if the server responded, and false 29 | // if call() was not able to contact the server. in particular, 30 | // the reply's contents are only valid if call() returned true. 31 | // 32 | // you should assume that call() will return an 33 | // error after a while if the server is dead. 34 | // don't provide your own time-out mechanism. 35 | // 36 | // please use call() to send all RPCs, in client.go and server.go. 37 | // please don't change this function. 38 | // 39 | func call(srv string, rpcname string, 40 | args interface{}, reply interface{}) bool { 41 | c, errx := rpc.Dial("unix", srv) 42 | if errx != nil { 43 | return false 44 | } 45 | defer c.Close() 46 | 47 | err := c.Call(rpcname, args, reply) 48 | if err == nil { 49 | return true 50 | } 51 | 52 | fmt.Println(err) 53 | return false 54 | } 55 | 56 | func (ck *Clerk) Query(num int) Config { 57 | for { 58 | // try each known server. 59 | for _, srv := range ck.servers { 60 | args := &QueryArgs{} 61 | args.Num = num 62 | var reply QueryReply 63 | ok := call(srv, "ShardMaster.Query", args, &reply) 64 | if ok { 65 | return reply.Config 66 | } 67 | } 68 | time.Sleep(100 * time.Millisecond) 69 | } 70 | } 71 | 72 | func (ck *Clerk) Join(gid int64, servers []string) { 73 | for { 74 | // try each known server. 75 | for _, srv := range ck.servers { 76 | args := &JoinArgs{} 77 | args.GID = gid 78 | args.Servers = servers 79 | var reply JoinReply 80 | ok := call(srv, "ShardMaster.Join", args, &reply) 81 | if ok { 82 | return 83 | } 84 | } 85 | time.Sleep(100 * time.Millisecond) 86 | } 87 | } 88 | 89 | func (ck *Clerk) Leave(gid int64) { 90 | for { 91 | // try each known server. 92 | for _, srv := range ck.servers { 93 | args := &LeaveArgs{} 94 | args.GID = gid 95 | var reply LeaveReply 96 | ok := call(srv, "ShardMaster.Leave", args, &reply) 97 | if ok { 98 | return 99 | } 100 | } 101 | time.Sleep(100 * time.Millisecond) 102 | } 103 | } 104 | 105 | func (ck *Clerk) Move(shard int, gid int64) { 106 | for { 107 | // try each known server. 108 | for _, srv := range ck.servers { 109 | args := &MoveArgs{} 110 | args.Shard = shard 111 | args.GID = gid 112 | var reply MoveReply 113 | ok := call(srv, "ShardMaster.Move", args, &reply) 114 | if ok { 115 | return 116 | } 117 | } 118 | time.Sleep(100 * time.Millisecond) 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /6.824/src/paxos-shardmaster/common.go: -------------------------------------------------------------------------------- 1 | package shardmaster 2 | 3 | // 4 | // Master shard server: assigns shards to replication groups. 5 | // 6 | // RPC interface: 7 | // Join(gid, servers) -- replica group gid is joining, give it some shards. 8 | // Leave(gid) -- replica group gid is retiring, hand off all its shards. 9 | // Move(shard, gid) -- hand off one shard from current owner to gid. 10 | // Query(num) -> fetch Config # num, or latest config if num==-1. 11 | // 12 | // A Config (configuration) describes a set of replica groups, and the 13 | // replica group responsible for each shard. Configs are numbered. Config 14 | // #0 is the initial configuration, with no groups and all shards 15 | // assigned to group 0 (the invalid group). 16 | // 17 | // A GID is a replica group ID. GIDs must be uniqe and > 0. 18 | // Once a GID joins, and leaves, it should never join again. 19 | // 20 | // Please don't change this file. 21 | // 22 | 23 | const NShards = 10 24 | 25 | type Config struct { 26 | Num int // config number 27 | Shards [NShards]int64 // shard -> gid 28 | Groups map[int64][]string // gid -> servers[] 29 | } 30 | 31 | type JoinArgs struct { 32 | GID int64 // unique replica group ID 33 | Servers []string // group server ports 34 | } 35 | 36 | type JoinReply struct { 37 | } 38 | 39 | type LeaveArgs struct { 40 | GID int64 41 | } 42 | 43 | type LeaveReply struct { 44 | } 45 | 46 | type MoveArgs struct { 47 | Shard int 48 | GID int64 49 | } 50 | 51 | type MoveReply struct { 52 | } 53 | 54 | type QueryArgs struct { 55 | Num int // desired config number 56 | } 57 | 58 | type QueryReply struct { 59 | Config Config 60 | } 61 | -------------------------------------------------------------------------------- /6.824/src/paxos-shardmaster/server.go: -------------------------------------------------------------------------------- 1 | package shardmaster 2 | 3 | import "net" 4 | import "fmt" 5 | import "net/rpc" 6 | import "log" 7 | 8 | import "paxos" 9 | import "sync" 10 | import "sync/atomic" 11 | import "os" 12 | import "syscall" 13 | import "encoding/gob" 14 | import "math/rand" 15 | 16 | type ShardMaster struct { 17 | mu sync.Mutex 18 | l net.Listener 19 | me int 20 | dead int32 // for testing 21 | unreliable int32 // for testing 22 | px *paxos.Paxos 23 | 24 | configs []Config // indexed by config num 25 | } 26 | 27 | 28 | type Op struct { 29 | // Your data here. 30 | } 31 | 32 | 33 | func (sm *ShardMaster) Join(args *JoinArgs, reply *JoinReply) error { 34 | // Your code here. 35 | 36 | return nil 37 | } 38 | 39 | func (sm *ShardMaster) Leave(args *LeaveArgs, reply *LeaveReply) error { 40 | // Your code here. 41 | 42 | return nil 43 | } 44 | 45 | func (sm *ShardMaster) Move(args *MoveArgs, reply *MoveReply) error { 46 | // Your code here. 47 | 48 | return nil 49 | } 50 | 51 | func (sm *ShardMaster) Query(args *QueryArgs, reply *QueryReply) error { 52 | // Your code here. 53 | 54 | return nil 55 | } 56 | 57 | // please don't change these two functions. 58 | func (sm *ShardMaster) Kill() { 59 | atomic.StoreInt32(&sm.dead, 1) 60 | sm.l.Close() 61 | sm.px.Kill() 62 | } 63 | 64 | // call this to find out if the server is dead. 65 | func (sm *ShardMaster) isdead() bool { 66 | return atomic.LoadInt32(&sm.dead) != 0 67 | } 68 | 69 | // please do not change these two functions. 70 | func (sm *ShardMaster) setunreliable(what bool) { 71 | if what { 72 | atomic.StoreInt32(&sm.unreliable, 1) 73 | } else { 74 | atomic.StoreInt32(&sm.unreliable, 0) 75 | } 76 | } 77 | 78 | func (sm *ShardMaster) isunreliable() bool { 79 | return atomic.LoadInt32(&sm.unreliable) != 0 80 | } 81 | 82 | // 83 | // servers[] contains the ports of the set of 84 | // servers that will cooperate via Paxos to 85 | // form the fault-tolerant shardmaster service. 86 | // me is the index of the current server in servers[]. 87 | // 88 | func StartServer(servers []string, me int) *ShardMaster { 89 | sm := new(ShardMaster) 90 | sm.me = me 91 | 92 | sm.configs = make([]Config, 1) 93 | sm.configs[0].Groups = map[int64][]string{} 94 | 95 | rpcs := rpc.NewServer() 96 | 97 | gob.Register(Op{}) 98 | rpcs.Register(sm) 99 | sm.px = paxos.Make(servers, me, rpcs) 100 | 101 | os.Remove(servers[me]) 102 | l, e := net.Listen("unix", servers[me]) 103 | if e != nil { 104 | log.Fatal("listen error: ", e) 105 | } 106 | sm.l = l 107 | 108 | // please do not change any of the following code, 109 | // or do anything to subvert it. 110 | 111 | go func() { 112 | for sm.isdead() == false { 113 | conn, err := sm.l.Accept() 114 | if err == nil && sm.isdead() == false { 115 | if sm.isunreliable() && (rand.Int63()%1000) < 100 { 116 | // discard the request. 117 | conn.Close() 118 | } else if sm.isunreliable() && (rand.Int63()%1000) < 200 { 119 | // process the request but force discard of reply. 120 | c1 := conn.(*net.UnixConn) 121 | f, _ := c1.File() 122 | err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR) 123 | if err != nil { 124 | fmt.Printf("shutdown: %v\n", err) 125 | } 126 | go rpcs.ServeConn(conn) 127 | } else { 128 | go rpcs.ServeConn(conn) 129 | } 130 | } else if err == nil { 131 | conn.Close() 132 | } 133 | if err != nil && sm.isdead() == false { 134 | fmt.Printf("ShardMaster(%v) accept: %v\n", me, err.Error()) 135 | sm.Kill() 136 | } 137 | } 138 | }() 139 | 140 | return sm 141 | } 142 | -------------------------------------------------------------------------------- /6.824/src/paxos/paxos.go: -------------------------------------------------------------------------------- 1 | package paxos 2 | 3 | // 4 | // Paxos library, to be included in an application. 5 | // Multiple applications will run, each including 6 | // a Paxos peer. 7 | // 8 | // Manages a sequence of agreed-on values. 9 | // The set of peers is fixed. 10 | // Copes with network failures (partition, msg loss, &c). 11 | // Does not store anything persistently, so cannot handle crash+restart. 12 | // 13 | // The application interface: 14 | // 15 | // px = paxos.Make(peers []string, me int) 16 | // px.Start(seq int, v interface{}) -- start agreement on new instance 17 | // px.Status(seq int) (Fate, v interface{}) -- get info about an instance 18 | // px.Done(seq int) -- ok to forget all instances <= seq 19 | // px.Max() int -- highest instance seq known, or -1 20 | // px.Min() int -- instances before this seq have been forgotten 21 | // 22 | 23 | import "net" 24 | import "net/rpc" 25 | import "log" 26 | 27 | import "os" 28 | import "syscall" 29 | import "sync" 30 | import "sync/atomic" 31 | import "fmt" 32 | import "math/rand" 33 | 34 | 35 | // px.Status() return values, indicating 36 | // whether an agreement has been decided, 37 | // or Paxos has not yet reached agreement, 38 | // or it was agreed but forgotten (i.e. < Min()). 39 | type Fate int 40 | 41 | const ( 42 | Decided Fate = iota + 1 43 | Pending // not yet decided. 44 | Forgotten // decided but forgotten. 45 | ) 46 | 47 | type Paxos struct { 48 | mu sync.Mutex 49 | l net.Listener 50 | dead int32 // for testing 51 | unreliable int32 // for testing 52 | rpcCount int32 // for testing 53 | peers []string 54 | me int // index into peers[] 55 | 56 | 57 | // Your data here. 58 | } 59 | 60 | // 61 | // call() sends an RPC to the rpcname handler on server srv 62 | // with arguments args, waits for the reply, and leaves the 63 | // reply in reply. the reply argument should be a pointer 64 | // to a reply structure. 65 | // 66 | // the return value is true if the server responded, and false 67 | // if call() was not able to contact the server. in particular, 68 | // the replys contents are only valid if call() returned true. 69 | // 70 | // you should assume that call() will time out and return an 71 | // error after a while if it does not get a reply from the server. 72 | // 73 | // please use call() to send all RPCs, in client.go and server.go. 74 | // please do not change this function. 75 | // 76 | func call(srv string, name string, args interface{}, reply interface{}) bool { 77 | c, err := rpc.Dial("unix", srv) 78 | if err != nil { 79 | err1 := err.(*net.OpError) 80 | if err1.Err != syscall.ENOENT && err1.Err != syscall.ECONNREFUSED { 81 | fmt.Printf("paxos Dial() failed: %v\n", err1) 82 | } 83 | return false 84 | } 85 | defer c.Close() 86 | 87 | err = c.Call(name, args, reply) 88 | if err == nil { 89 | return true 90 | } 91 | 92 | fmt.Println(err) 93 | return false 94 | } 95 | 96 | 97 | // 98 | // the application wants paxos to start agreement on 99 | // instance seq, with proposed value v. 100 | // Start() returns right away; the application will 101 | // call Status() to find out if/when agreement 102 | // is reached. 103 | // 104 | func (px *Paxos) Start(seq int, v interface{}) { 105 | // Your code here. 106 | } 107 | 108 | // 109 | // the application on this machine is done with 110 | // all instances <= seq. 111 | // 112 | // see the comments for Min() for more explanation. 113 | // 114 | func (px *Paxos) Done(seq int) { 115 | // Your code here. 116 | } 117 | 118 | // 119 | // the application wants to know the 120 | // highest instance sequence known to 121 | // this peer. 122 | // 123 | func (px *Paxos) Max() int { 124 | // Your code here. 125 | return 0 126 | } 127 | 128 | // 129 | // Min() should return one more than the minimum among z_i, 130 | // where z_i is the highest number ever passed 131 | // to Done() on peer i. A peers z_i is -1 if it has 132 | // never called Done(). 133 | // 134 | // Paxos is required to have forgotten all information 135 | // about any instances it knows that are < Min(). 136 | // The point is to free up memory in long-running 137 | // Paxos-based servers. 138 | // 139 | // Paxos peers need to exchange their highest Done() 140 | // arguments in order to implement Min(). These 141 | // exchanges can be piggybacked on ordinary Paxos 142 | // agreement protocol messages, so it is OK if one 143 | // peers Min does not reflect another Peers Done() 144 | // until after the next instance is agreed to. 145 | // 146 | // The fact that Min() is defined as a minimum over 147 | // *all* Paxos peers means that Min() cannot increase until 148 | // all peers have been heard from. So if a peer is dead 149 | // or unreachable, other peers Min()s will not increase 150 | // even if all reachable peers call Done. The reason for 151 | // this is that when the unreachable peer comes back to 152 | // life, it will need to catch up on instances that it 153 | // missed -- the other peers therefor cannot forget these 154 | // instances. 155 | // 156 | func (px *Paxos) Min() int { 157 | // You code here. 158 | return 0 159 | } 160 | 161 | // 162 | // the application wants to know whether this 163 | // peer thinks an instance has been decided, 164 | // and if so what the agreed value is. Status() 165 | // should just inspect the local peer state; 166 | // it should not contact other Paxos peers. 167 | // 168 | func (px *Paxos) Status(seq int) (Fate, interface{}) { 169 | // Your code here. 170 | return Pending, nil 171 | } 172 | 173 | 174 | 175 | // 176 | // tell the peer to shut itself down. 177 | // for testing. 178 | // please do not change these two functions. 179 | // 180 | func (px *Paxos) Kill() { 181 | atomic.StoreInt32(&px.dead, 1) 182 | if px.l != nil { 183 | px.l.Close() 184 | } 185 | } 186 | 187 | // 188 | // has this peer been asked to shut down? 189 | // 190 | func (px *Paxos) isdead() bool { 191 | return atomic.LoadInt32(&px.dead) != 0 192 | } 193 | 194 | // please do not change these two functions. 195 | func (px *Paxos) setunreliable(what bool) { 196 | if what { 197 | atomic.StoreInt32(&px.unreliable, 1) 198 | } else { 199 | atomic.StoreInt32(&px.unreliable, 0) 200 | } 201 | } 202 | 203 | func (px *Paxos) isunreliable() bool { 204 | return atomic.LoadInt32(&px.unreliable) != 0 205 | } 206 | 207 | // 208 | // the application wants to create a paxos peer. 209 | // the ports of all the paxos peers (including this one) 210 | // are in peers[]. this servers port is peers[me]. 211 | // 212 | func Make(peers []string, me int, rpcs *rpc.Server) *Paxos { 213 | px := &Paxos{} 214 | px.peers = peers 215 | px.me = me 216 | 217 | 218 | // Your initialization code here. 219 | 220 | if rpcs != nil { 221 | // caller will create socket &c 222 | rpcs.Register(px) 223 | } else { 224 | rpcs = rpc.NewServer() 225 | rpcs.Register(px) 226 | 227 | // prepare to receive connections from clients. 228 | // change "unix" to "tcp" to use over a network. 229 | os.Remove(peers[me]) // only needed for "unix" 230 | l, e := net.Listen("unix", peers[me]) 231 | if e != nil { 232 | log.Fatal("listen error: ", e) 233 | } 234 | px.l = l 235 | 236 | // please do not change any of the following code, 237 | // or do anything to subvert it. 238 | 239 | // create a thread to accept RPC connections 240 | go func() { 241 | for px.isdead() == false { 242 | conn, err := px.l.Accept() 243 | if err == nil && px.isdead() == false { 244 | if px.isunreliable() && (rand.Int63()%1000) < 100 { 245 | // discard the request. 246 | conn.Close() 247 | } else if px.isunreliable() && (rand.Int63()%1000) < 200 { 248 | // process the request but force discard of reply. 249 | c1 := conn.(*net.UnixConn) 250 | f, _ := c1.File() 251 | err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR) 252 | if err != nil { 253 | fmt.Printf("shutdown: %v\n", err) 254 | } 255 | atomic.AddInt32(&px.rpcCount, 1) 256 | go rpcs.ServeConn(conn) 257 | } else { 258 | atomic.AddInt32(&px.rpcCount, 1) 259 | go rpcs.ServeConn(conn) 260 | } 261 | } else if err == nil { 262 | conn.Close() 263 | } 264 | if err != nil && px.isdead() == false { 265 | fmt.Printf("Paxos(%v) accept: %v\n", me, err.Error()) 266 | } 267 | } 268 | }() 269 | } 270 | 271 | 272 | return px 273 | } 274 | -------------------------------------------------------------------------------- /6.824/src/pbservice/client.go: -------------------------------------------------------------------------------- 1 | package pbservice 2 | 3 | import "viewservice" 4 | import "net/rpc" 5 | import "fmt" 6 | 7 | import "crypto/rand" 8 | import "math/big" 9 | 10 | 11 | type Clerk struct { 12 | vs *viewservice.Clerk 13 | // Your declarations here 14 | } 15 | 16 | // this may come in handy. 17 | func nrand() int64 { 18 | max := big.NewInt(int64(1) << 62) 19 | bigx, _ := rand.Int(rand.Reader, max) 20 | x := bigx.Int64() 21 | return x 22 | } 23 | 24 | func MakeClerk(vshost string, me string) *Clerk { 25 | ck := new(Clerk) 26 | ck.vs = viewservice.MakeClerk(me, vshost) 27 | // Your ck.* initializations here 28 | 29 | return ck 30 | } 31 | 32 | 33 | // 34 | // call() sends an RPC to the rpcname handler on server srv 35 | // with arguments args, waits for the reply, and leaves the 36 | // reply in reply. the reply argument should be a pointer 37 | // to a reply structure. 38 | // 39 | // the return value is true if the server responded, and false 40 | // if call() was not able to contact the server. in particular, 41 | // the reply's contents are only valid if call() returned true. 42 | // 43 | // you should assume that call() will return an 44 | // error after a while if the server is dead. 45 | // don't provide your own time-out mechanism. 46 | // 47 | // please use call() to send all RPCs, in client.go and server.go. 48 | // please don't change this function. 49 | // 50 | func call(srv string, rpcname string, 51 | args interface{}, reply interface{}) bool { 52 | c, errx := rpc.Dial("unix", srv) 53 | if errx != nil { 54 | return false 55 | } 56 | defer c.Close() 57 | 58 | err := c.Call(rpcname, args, reply) 59 | if err == nil { 60 | return true 61 | } 62 | 63 | fmt.Println(err) 64 | return false 65 | } 66 | 67 | // 68 | // fetch a key's value from the current primary; 69 | // if they key has never been set, return "". 70 | // Get() must keep trying until it either the 71 | // primary replies with the value or the primary 72 | // says the key doesn't exist (has never been Put(). 73 | // 74 | func (ck *Clerk) Get(key string) string { 75 | 76 | // Your code here. 77 | 78 | return "???" 79 | } 80 | 81 | // 82 | // send a Put or Append RPC 83 | // 84 | func (ck *Clerk) PutAppend(key string, value string, op string) { 85 | 86 | // Your code here. 87 | } 88 | 89 | // 90 | // tell the primary to update key's value. 91 | // must keep trying until it succeeds. 92 | // 93 | func (ck *Clerk) Put(key string, value string) { 94 | ck.PutAppend(key, value, "Put") 95 | } 96 | 97 | // 98 | // tell the primary to append to key's value. 99 | // must keep trying until it succeeds. 100 | // 101 | func (ck *Clerk) Append(key string, value string) { 102 | ck.PutAppend(key, value, "Append") 103 | } 104 | -------------------------------------------------------------------------------- /6.824/src/pbservice/common.go: -------------------------------------------------------------------------------- 1 | package pbservice 2 | 3 | const ( 4 | OK = "OK" 5 | ErrNoKey = "ErrNoKey" 6 | ErrWrongServer = "ErrWrongServer" 7 | ) 8 | 9 | type Err string 10 | 11 | // Put or Append 12 | type PutAppendArgs struct { 13 | Key string 14 | Value string 15 | // You'll have to add definitions here. 16 | 17 | // Field names must start with capital letters, 18 | // otherwise RPC will break. 19 | } 20 | 21 | type PutAppendReply struct { 22 | Err Err 23 | } 24 | 25 | type GetArgs struct { 26 | Key string 27 | // You'll have to add definitions here. 28 | } 29 | 30 | type GetReply struct { 31 | Err Err 32 | Value string 33 | } 34 | 35 | 36 | // Your RPC definitions here. 37 | -------------------------------------------------------------------------------- /6.824/src/pbservice/server.go: -------------------------------------------------------------------------------- 1 | package pbservice 2 | 3 | import "net" 4 | import "fmt" 5 | import "net/rpc" 6 | import "log" 7 | import "time" 8 | import "viewservice" 9 | import "sync" 10 | import "sync/atomic" 11 | import "os" 12 | import "syscall" 13 | import "math/rand" 14 | 15 | 16 | 17 | type PBServer struct { 18 | mu sync.Mutex 19 | l net.Listener 20 | dead int32 // for testing 21 | unreliable int32 // for testing 22 | me string 23 | vs *viewservice.Clerk 24 | // Your declarations here. 25 | } 26 | 27 | 28 | func (pb *PBServer) Get(args *GetArgs, reply *GetReply) error { 29 | 30 | // Your code here. 31 | 32 | return nil 33 | } 34 | 35 | 36 | func (pb *PBServer) PutAppend(args *PutAppendArgs, reply *PutAppendReply) error { 37 | 38 | // Your code here. 39 | 40 | 41 | return nil 42 | } 43 | 44 | 45 | // 46 | // ping the viewserver periodically. 47 | // if view changed: 48 | // transition to new view. 49 | // manage transfer of state from primary to new backup. 50 | // 51 | func (pb *PBServer) tick() { 52 | 53 | // Your code here. 54 | } 55 | 56 | // tell the server to shut itself down. 57 | // please do not change these two functions. 58 | func (pb *PBServer) kill() { 59 | atomic.StoreInt32(&pb.dead, 1) 60 | pb.l.Close() 61 | } 62 | 63 | // call this to find out if the server is dead. 64 | func (pb *PBServer) isdead() bool { 65 | return atomic.LoadInt32(&pb.dead) != 0 66 | } 67 | 68 | // please do not change these two functions. 69 | func (pb *PBServer) setunreliable(what bool) { 70 | if what { 71 | atomic.StoreInt32(&pb.unreliable, 1) 72 | } else { 73 | atomic.StoreInt32(&pb.unreliable, 0) 74 | } 75 | } 76 | 77 | func (pb *PBServer) isunreliable() bool { 78 | return atomic.LoadInt32(&pb.unreliable) != 0 79 | } 80 | 81 | 82 | func StartServer(vshost string, me string) *PBServer { 83 | pb := new(PBServer) 84 | pb.me = me 85 | pb.vs = viewservice.MakeClerk(me, vshost) 86 | // Your pb.* initializations here. 87 | 88 | rpcs := rpc.NewServer() 89 | rpcs.Register(pb) 90 | 91 | os.Remove(pb.me) 92 | l, e := net.Listen("unix", pb.me) 93 | if e != nil { 94 | log.Fatal("listen error: ", e) 95 | } 96 | pb.l = l 97 | 98 | // please do not change any of the following code, 99 | // or do anything to subvert it. 100 | 101 | go func() { 102 | for pb.isdead() == false { 103 | conn, err := pb.l.Accept() 104 | if err == nil && pb.isdead() == false { 105 | if pb.isunreliable() && (rand.Int63()%1000) < 100 { 106 | // discard the request. 107 | conn.Close() 108 | } else if pb.isunreliable() && (rand.Int63()%1000) < 200 { 109 | // process the request but force discard of reply. 110 | c1 := conn.(*net.UnixConn) 111 | f, _ := c1.File() 112 | err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR) 113 | if err != nil { 114 | fmt.Printf("shutdown: %v\n", err) 115 | } 116 | go rpcs.ServeConn(conn) 117 | } else { 118 | go rpcs.ServeConn(conn) 119 | } 120 | } else if err == nil { 121 | conn.Close() 122 | } 123 | if err != nil && pb.isdead() == false { 124 | fmt.Printf("PBServer(%v) accept: %v\n", me, err.Error()) 125 | pb.kill() 126 | } 127 | } 128 | }() 129 | 130 | go func() { 131 | for pb.isdead() == false { 132 | pb.tick() 133 | time.Sleep(viewservice.PingInterval) 134 | } 135 | }() 136 | 137 | return pb 138 | } 139 | -------------------------------------------------------------------------------- /6.824/src/raft/persister.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | // 4 | // support for Raft and kvraft to save persistent 5 | // Raft state (log &c) and k/v server snapshots. 6 | // 7 | // we will use the original persister.go to test your code for grading. 8 | // so, while you can modify this code to help you debug, please 9 | // test with the original before submitting. 10 | // 11 | 12 | import "sync" 13 | 14 | // 持久化对象 15 | type Persister struct { 16 | mu sync.Mutex // 锁保护 17 | raftstate []byte // Raft状态值 18 | snapshot []byte // 快照数据 19 | } 20 | 21 | // 创建 22 | func MakePersister() *Persister { 23 | return &Persister{} 24 | } 25 | 26 | // 拷贝持久化对象 27 | func (ps *Persister) Copy() *Persister { 28 | ps.mu.Lock() 29 | defer ps.mu.Unlock() 30 | np := MakePersister() 31 | // 居然是浅拷贝,数据变化相互影响 32 | np.raftstate = ps.raftstate 33 | np.snapshot = ps.snapshot 34 | return np 35 | } 36 | 37 | // 保存数据到持久化对象 38 | func (ps *Persister) SaveRaftState(data []byte) { 39 | ps.mu.Lock() 40 | defer ps.mu.Unlock() 41 | ps.raftstate = data 42 | } 43 | 44 | // 获取持久化数据 45 | func (ps *Persister) ReadRaftState() []byte { 46 | ps.mu.Lock() 47 | defer ps.mu.Unlock() 48 | return ps.raftstate 49 | } 50 | 51 | // 获取Raft状态数据的大小 52 | func (ps *Persister) RaftStateSize() int { 53 | ps.mu.Lock() 54 | defer ps.mu.Unlock() 55 | return len(ps.raftstate) 56 | } 57 | 58 | // 保存快照数据 59 | func (ps *Persister) SaveSnapshot(snapshot []byte) { 60 | ps.mu.Lock() 61 | defer ps.mu.Unlock() 62 | ps.snapshot = snapshot 63 | } 64 | 65 | // 获取快照数据 66 | func (ps *Persister) ReadSnapshot() []byte { 67 | ps.mu.Lock() 68 | defer ps.mu.Unlock() 69 | return ps.snapshot 70 | } 71 | -------------------------------------------------------------------------------- /6.824/src/raft/raft.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | // 4 | // this is an outline of the API that raft must expose to 5 | // the service (or tester). see comments below for 6 | // each of these functions for more details. 7 | // 8 | // rf = Make(...) 9 | // create a new Raft server. 10 | // rf.Start(command interface{}) (index, term, isleader) 11 | // start agreement on a new log entry 12 | // rf.GetState() (term, isLeader) 13 | // ask a Raft for its current term, and whether it thinks it is leader 14 | // ApplyMsg 15 | // each time a new entry is committed to the log, each Raft peer 16 | // should send an ApplyMsg to the service (or tester) 17 | // in the same server. 18 | // 19 | 20 | import "sync" 21 | import "labrpc" 22 | 23 | // import "bytes" 24 | // import "encoding/gob" 25 | 26 | 27 | 28 | // 29 | // as each Raft peer becomes aware that successive log entries are 30 | // committed, the peer should send an ApplyMsg to the service (or 31 | // tester) on the same server, via the applyCh passed to Make(). 32 | // 33 | type ApplyMsg struct { 34 | Index int 35 | Command interface{} 36 | UseSnapshot bool // ignore for lab2; only used in lab3 37 | Snapshot []byte // ignore for lab2; only used in lab3 38 | } 39 | 40 | // 41 | // A Go object implementing a single Raft peer. 42 | // 43 | type Raft struct { 44 | mu sync.Mutex 45 | peers []*labrpc.ClientEnd 46 | persister *Persister 47 | me int // index into peers[] 48 | 49 | // Your data here. 50 | // Look at the paper's Figure 2 for a description of what 51 | // state a Raft server must maintain. 52 | // 查看论文的图2部分,可知 53 | 54 | /* 55 | * 全部服务器上面的可持久化状态: 56 | * currentTerm 服务器看到的最近Term(第一次启动的时候为0,后面单调递增) 57 | * votedFor 当前Term收到的投票候选 (如果没有就为null) 58 | * log[] 日志项; 每个日志项包含机器状态和被leader接收的Term(first index is 1) 59 | */ 60 | // 删除代码部分 61 | /* 62 | * 全部服务器上面的不稳定状态: 63 | * commitIndex 已经被提交的最新的日志索引(第一次为0,后面单调递增) 64 | * lastApplied 已经应用到服务器状态的最新的日志索引(第一次为0,后面单调递增) 65 | */ 66 | // 删除代码部分 67 | 68 | /* 69 | * leader上面使用的不稳定状态(完成选举之后需要重新初始化) 70 | * nextIndex[] 71 | * 72 | * 73 | */ 74 | 75 | } 76 | 77 | // return currentTerm and whether this server 78 | // believes it is the leader. 79 | func (rf *Raft) GetState() (int, bool) { 80 | var term int 81 | var isleader bool 82 | // Your code here. 83 | return term, isleader 84 | } 85 | 86 | // 87 | // save Raft's persistent state to stable storage, 88 | // where it can later be retrieved after a crash and restart. 89 | // see paper's Figure 2 for a description of what should be persistent. 90 | // 91 | func (rf *Raft) persist() { 92 | // Your code here. 93 | // Example: 94 | // w := new(bytes.Buffer) 95 | // e := gob.NewEncoder(w) 96 | // e.Encode(rf.xxx) 97 | // e.Encode(rf.yyy) 98 | // data := w.Bytes() 99 | // rf.persister.SaveRaftState(data) 100 | } 101 | 102 | // 103 | // restore previously persisted state. 104 | // 105 | func (rf *Raft) readPersist(data []byte) { 106 | // Your code here. 107 | // Example: 108 | // r := bytes.NewBuffer(data) 109 | // d := gob.NewDecoder(r) 110 | // d.Decode(&rf.xxx) 111 | // d.Decode(&rf.yyy) 112 | } 113 | 114 | 115 | 116 | 117 | // 118 | // example RequestVote RPC arguments structure. 119 | // 120 | type RequestVoteArgs struct { 121 | // Your data here. 122 | } 123 | 124 | // 125 | // example RequestVote RPC reply structure. 126 | // 127 | type RequestVoteReply struct { 128 | // Your data here. 129 | } 130 | 131 | // 132 | // example RequestVote RPC handler. 133 | // 134 | func (rf *Raft) RequestVote(args RequestVoteArgs, reply *RequestVoteReply) { 135 | // Your code here. 136 | } 137 | 138 | // 139 | // example code to send a RequestVote RPC to a server. 140 | // server is the index of the target server in rf.peers[]. 141 | // expects RPC arguments in args. 142 | // fills in *reply with RPC reply, so caller should 143 | // pass &reply. 144 | // the types of the args and reply passed to Call() must be 145 | // the same as the types of the arguments declared in the 146 | // handler function (including whether they are pointers). 147 | // 148 | // returns true if labrpc says the RPC was delivered. 149 | // 150 | // if you're having trouble getting RPC to work, check that you've 151 | // capitalized all field names in structs passed over RPC, and 152 | // that the caller passes the address of the reply struct with &, not 153 | // the struct itself. 154 | // 155 | func (rf *Raft) sendRequestVote(server int, args RequestVoteArgs, reply *RequestVoteReply) bool { 156 | ok := rf.peers[server].Call("Raft.RequestVote", args, reply) 157 | return ok 158 | } 159 | 160 | 161 | // 162 | // the service using Raft (e.g. a k/v server) wants to start 163 | // agreement on the next command to be appended to Raft's log. if this 164 | // server isn't the leader, returns false. otherwise start the 165 | // agreement and return immediately. there is no guarantee that this 166 | // command will ever be committed to the Raft log, since the leader 167 | // may fail or lose an election. 168 | // 169 | // the first return value is the index that the command will appear at 170 | // if it's ever committed. the second return value is the current 171 | // term. the third return value is true if this server believes it is 172 | // the leader. 173 | // 174 | func (rf *Raft) Start(command interface{}) (int, int, bool) { 175 | index := -1 176 | term := -1 177 | isLeader := true 178 | 179 | 180 | return index, term, isLeader 181 | } 182 | 183 | // 184 | // the tester calls Kill() when a Raft instance won't 185 | // be needed again. you are not required to do anything 186 | // in Kill(), but it might be convenient to (for example) 187 | // turn off debug output from this instance. 188 | // 189 | func (rf *Raft) Kill() { 190 | // Your code here, if desired. 191 | } 192 | 193 | // 194 | // the service or tester wants to create a Raft server. the ports 195 | // of all the Raft servers (including this one) are in peers[]. this 196 | // server's port is peers[me]. all the servers' peers[] arrays 197 | // have the same order. persister is a place for this server to 198 | // save its persistent state, and also initially holds the most 199 | // recent saved state, if any. applyCh is a channel on which the 200 | // tester or service expects Raft to send ApplyMsg messages. 201 | // Make() must return quickly, so it should start goroutines 202 | // for any long-running work. 203 | // 204 | // 建一个Raft端点。 205 | // peers参数是通往其他Raft端点处于连接状态下的RPC连接。 206 | // me参数是自己在端点数组中的索引。 207 | func Make(peers []*labrpc.ClientEnd, me int, 208 | persister *Persister, applyCh chan ApplyMsg) *Raft { 209 | rf := &Raft{} 210 | rf.peers = peers 211 | rf.persister = persister 212 | rf.me = me 213 | 214 | // Your initialization code here. 215 | 216 | // initialize from state persisted before a crash 217 | rf.readPersist(persister.ReadRaftState()) 218 | 219 | 220 | return rf 221 | } 222 | -------------------------------------------------------------------------------- /6.824/src/raft/util.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import "log" 4 | 5 | // Debugging 6 | const Debug = 0 7 | 8 | func DPrintf(format string, a ...interface{}) (n int, err error) { 9 | if Debug > 0 { 10 | log.Printf(format, a...) 11 | } 12 | return 13 | } 14 | -------------------------------------------------------------------------------- /6.824/src/shardkv/client.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | // 4 | // client code to talk to a sharded key/value service. 5 | // 6 | // the client first talks to the shardmaster to find out 7 | // the assignment of shards (keys) to groups, and then 8 | // talks to the group that holds the key's shard. 9 | // 10 | 11 | import "labrpc" 12 | import "crypto/rand" 13 | import "math/big" 14 | import "shardmaster" 15 | import "time" 16 | 17 | // 18 | // which shard is a key in? 19 | // please use this function, 20 | // and please do not change it. 21 | // 22 | func key2shard(key string) int { 23 | shard := 0 24 | if len(key) > 0 { 25 | shard = int(key[0]) 26 | } 27 | shard %= shardmaster.NShards 28 | return shard 29 | } 30 | 31 | func nrand() int64 { 32 | max := big.NewInt(int64(1) << 62) 33 | bigx, _ := rand.Int(rand.Reader, max) 34 | x := bigx.Int64() 35 | return x 36 | } 37 | 38 | type Clerk struct { 39 | sm *shardmaster.Clerk 40 | config shardmaster.Config 41 | make_end func(string) *labrpc.ClientEnd 42 | // You will have to modify this struct. 43 | } 44 | 45 | // 46 | // the tester calls MakeClerk. 47 | // 48 | // masters[] is needed to call shardmaster.MakeClerk(). 49 | // 50 | // make_end(servername) turns a server name from a 51 | // Config.Groups[gid][i] into a labrpc.ClientEnd on which you can 52 | // send RPCs. 53 | // 54 | func MakeClerk(masters []*labrpc.ClientEnd, make_end func(string) *labrpc.ClientEnd) *Clerk { 55 | ck := new(Clerk) 56 | ck.sm = shardmaster.MakeClerk(masters) 57 | ck.make_end = make_end 58 | // You'll have to add code here. 59 | return ck 60 | } 61 | 62 | // 63 | // fetch the current value for a key. 64 | // returns "" if the key does not exist. 65 | // keeps trying forever in the face of all other errors. 66 | // You will have to modify this function. 67 | // 68 | func (ck *Clerk) Get(key string) string { 69 | args := GetArgs{} 70 | args.Key = key 71 | 72 | for { 73 | shard := key2shard(key) 74 | gid := ck.config.Shards[shard] 75 | if servers, ok := ck.config.Groups[gid]; ok { 76 | // try each server for the shard. 77 | for si := 0; si < len(servers); si++ { 78 | srv := ck.make_end(servers[si]) 79 | var reply GetReply 80 | ok := srv.Call("ShardKV.Get", &args, &reply) 81 | if ok && reply.WrongLeader == false && (reply.Err == OK || reply.Err == ErrNoKey) { 82 | return reply.Value 83 | } 84 | if ok && (reply.Err == ErrWrongGroup) { 85 | break 86 | } 87 | } 88 | } 89 | time.Sleep(100 * time.Millisecond) 90 | // ask master for the latest configuration. 91 | ck.config = ck.sm.Query(-1) 92 | } 93 | 94 | return "" 95 | } 96 | 97 | // 98 | // shared by Put and Append. 99 | // You will have to modify this function. 100 | // 101 | func (ck *Clerk) PutAppend(key string, value string, op string) { 102 | args := PutAppendArgs{} 103 | args.Key = key 104 | args.Value = value 105 | args.Op = op 106 | 107 | 108 | for { 109 | shard := key2shard(key) 110 | gid := ck.config.Shards[shard] 111 | if servers, ok := ck.config.Groups[gid]; ok { 112 | for si := 0; si < len(servers); si++ { 113 | srv := ck.make_end(servers[si]) 114 | var reply PutAppendReply 115 | ok := srv.Call("ShardKV.PutAppend", &args, &reply) 116 | if ok && reply.WrongLeader == false && reply.Err == OK { 117 | return 118 | } 119 | if ok && reply.Err == ErrWrongGroup { 120 | break 121 | } 122 | } 123 | } 124 | time.Sleep(100 * time.Millisecond) 125 | // ask master for the latest configuration. 126 | ck.config = ck.sm.Query(-1) 127 | } 128 | } 129 | 130 | func (ck *Clerk) Put(key string, value string) { 131 | ck.PutAppend(key, value, "Put") 132 | } 133 | func (ck *Clerk) Append(key string, value string) { 134 | ck.PutAppend(key, value, "Append") 135 | } 136 | -------------------------------------------------------------------------------- /6.824/src/shardkv/common.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | // 4 | // Sharded key/value server. 5 | // Lots of replica groups, each running op-at-a-time paxos. 6 | // Shardmaster decides which group serves each shard. 7 | // Shardmaster may change shard assignment from time to time. 8 | // 9 | // You will have to modify these definitions. 10 | // 11 | 12 | const ( 13 | OK = "OK" 14 | ErrNoKey = "ErrNoKey" 15 | ErrWrongGroup = "ErrWrongGroup" 16 | ) 17 | 18 | type Err string 19 | 20 | // Put or Append 21 | type PutAppendArgs struct { 22 | // You'll have to add definitions here. 23 | Key string 24 | Value string 25 | Op string // "Put" or "Append" 26 | // You'll have to add definitions here. 27 | // Field names must start with capital letters, 28 | // otherwise RPC will break. 29 | } 30 | 31 | type PutAppendReply struct { 32 | WrongLeader bool 33 | Err Err 34 | } 35 | 36 | type GetArgs struct { 37 | Key string 38 | // You'll have to add definitions here. 39 | } 40 | 41 | type GetReply struct { 42 | WrongLeader bool 43 | Err Err 44 | Value string 45 | } 46 | -------------------------------------------------------------------------------- /6.824/src/shardkv/server.go: -------------------------------------------------------------------------------- 1 | package shardkv 2 | 3 | 4 | // import "shardmaster" 5 | import "labrpc" 6 | import "raft" 7 | import "sync" 8 | import "encoding/gob" 9 | 10 | 11 | 12 | type Op struct { 13 | // Your definitions here. 14 | // Field names must start with capital letters, 15 | // otherwise RPC will break. 16 | } 17 | 18 | type ShardKV struct { 19 | mu sync.Mutex 20 | me int 21 | rf *raft.Raft 22 | applyCh chan raft.ApplyMsg 23 | make_end func(string) *labrpc.ClientEnd 24 | gid int 25 | masters []*labrpc.ClientEnd 26 | maxraftstate int // snapshot if log grows this big 27 | 28 | // Your definitions here. 29 | } 30 | 31 | 32 | func (kv *ShardKV) Get(args *GetArgs, reply *GetReply) { 33 | // Your code here. 34 | } 35 | 36 | func (kv *ShardKV) PutAppend(args *PutAppendArgs, reply *PutAppendReply) { 37 | // Your code here. 38 | } 39 | 40 | // 41 | // the tester calls Kill() when a ShardKV instance won't 42 | // be needed again. you are not required to do anything 43 | // in Kill(), but it might be convenient to (for example) 44 | // turn off debug output from this instance. 45 | // 46 | func (kv *ShardKV) Kill() { 47 | kv.rf.Kill() 48 | // Your code here, if desired. 49 | } 50 | 51 | 52 | // 53 | // servers[] contains the ports of the servers in this group. 54 | // 55 | // me is the index of the current server in servers[]. 56 | // 57 | // the k/v server should store snapshots with 58 | // persister.SaveSnapshot(), and Raft should save its state (including 59 | // log) with persister.SaveRaftState(). 60 | // 61 | // the k/v server should snapshot when Raft's saved state exceeds 62 | // maxraftstate bytes, in order to allow Raft to garbage-collect its 63 | // log. if maxraftstate is -1, you don't need to snapshot. 64 | // 65 | // gid is this group's GID, for interacting with the shardmaster. 66 | // 67 | // pass masters[] to shardmaster.MakeClerk() so you can send 68 | // RPCs to the shardmaster. 69 | // 70 | // make_end(servername) turns a server name from a 71 | // Config.Groups[gid][i] into a labrpc.ClientEnd on which you can 72 | // send RPCs. You'll need this to send RPCs to other groups. 73 | // 74 | // look at client.go for examples of how to use masters[] 75 | // and make_end() to send RPCs to the group owning a specific shard. 76 | // 77 | // StartServer() must return quickly, so it should start goroutines 78 | // for any long-running work. 79 | // 80 | func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int, gid int, masters []*labrpc.ClientEnd, make_end func(string) *labrpc.ClientEnd) *ShardKV { 81 | // call gob.Register on structures you want 82 | // Go's RPC library to marshall/unmarshall. 83 | gob.Register(Op{}) 84 | 85 | kv := new(ShardKV) 86 | kv.me = me 87 | kv.maxraftstate = maxraftstate 88 | kv.make_end = make_end 89 | kv.gid = gid 90 | kv.masters = masters 91 | 92 | // Your initialization code here. 93 | 94 | // Use something like this to talk to the shardmaster: 95 | // kv.mck = shardmaster.MakeClerk(kv.masters) 96 | 97 | kv.applyCh = make(chan raft.ApplyMsg) 98 | kv.rf = raft.Make(servers, me, persister, kv.applyCh) 99 | 100 | 101 | return kv 102 | } 103 | -------------------------------------------------------------------------------- /6.824/src/shardmaster/client.go: -------------------------------------------------------------------------------- 1 | package shardmaster 2 | 3 | // 4 | // Shardmaster clerk. 5 | // 6 | 7 | import "labrpc" 8 | import "time" 9 | import "crypto/rand" 10 | import "math/big" 11 | 12 | type Clerk struct { 13 | servers []*labrpc.ClientEnd 14 | // Your data here. 15 | } 16 | 17 | func nrand() int64 { 18 | max := big.NewInt(int64(1) << 62) 19 | bigx, _ := rand.Int(rand.Reader, max) 20 | x := bigx.Int64() 21 | return x 22 | } 23 | 24 | func MakeClerk(servers []*labrpc.ClientEnd) *Clerk { 25 | ck := new(Clerk) 26 | ck.servers = servers 27 | // Your code here. 28 | return ck 29 | } 30 | 31 | func (ck *Clerk) Query(num int) Config { 32 | args := &QueryArgs{} 33 | // Your code here. 34 | args.Num = num 35 | for { 36 | // try each known server. 37 | for _, srv := range ck.servers { 38 | var reply QueryReply 39 | ok := srv.Call("ShardMaster.Query", args, &reply) 40 | if ok && reply.WrongLeader == false { 41 | return reply.Config 42 | } 43 | } 44 | time.Sleep(100 * time.Millisecond) 45 | } 46 | } 47 | 48 | func (ck *Clerk) Join(servers map[int][]string) { 49 | args := &JoinArgs{} 50 | // Your code here. 51 | args.Servers = servers 52 | 53 | for { 54 | // try each known server. 55 | for _, srv := range ck.servers { 56 | var reply JoinReply 57 | ok := srv.Call("ShardMaster.Join", args, &reply) 58 | if ok && reply.WrongLeader == false { 59 | return 60 | } 61 | } 62 | time.Sleep(100 * time.Millisecond) 63 | } 64 | } 65 | 66 | func (ck *Clerk) Leave(gids []int) { 67 | args := &LeaveArgs{} 68 | // Your code here. 69 | args.GIDs = gids 70 | 71 | for { 72 | // try each known server. 73 | for _, srv := range ck.servers { 74 | var reply LeaveReply 75 | ok := srv.Call("ShardMaster.Leave", args, &reply) 76 | if ok && reply.WrongLeader == false { 77 | return 78 | } 79 | } 80 | time.Sleep(100 * time.Millisecond) 81 | } 82 | } 83 | 84 | func (ck *Clerk) Move(shard int, gid int) { 85 | args := &MoveArgs{} 86 | // Your code here. 87 | args.Shard = shard 88 | args.GID = gid 89 | 90 | for { 91 | // try each known server. 92 | for _, srv := range ck.servers { 93 | var reply MoveReply 94 | ok := srv.Call("ShardMaster.Move", args, &reply) 95 | if ok && reply.WrongLeader == false { 96 | return 97 | } 98 | } 99 | time.Sleep(100 * time.Millisecond) 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /6.824/src/shardmaster/common.go: -------------------------------------------------------------------------------- 1 | package shardmaster 2 | 3 | // 4 | // Master shard server: assigns shards to replication groups. 5 | // 6 | // RPC interface: 7 | // Join(servers) -- add a set of groups (gid -> server-list mapping). 8 | // Leave(gids) -- delete a set of groups. 9 | // Move(shard, gid) -- hand off one shard from current owner to gid. 10 | // Query(num) -> fetch Config # num, or latest config if num==-1. 11 | // 12 | // A Config (configuration) describes a set of replica groups, and the 13 | // replica group responsible for each shard. Configs are numbered. Config 14 | // #0 is the initial configuration, with no groups and all shards 15 | // assigned to group 0 (the invalid group). 16 | // 17 | // A GID is a replica group ID. GIDs must be uniqe and > 0. 18 | // Once a GID joins, and leaves, it should never join again. 19 | // 20 | // You will need to add fields to the RPC arguments. 21 | // 22 | 23 | // The number of shards. 24 | const NShards = 10 25 | 26 | // A configuration -- an assignment of shards to groups. 27 | // Please don't change this. 28 | type Config struct { 29 | Num int // config number 30 | Shards [NShards]int // shard -> gid 31 | Groups map[int][]string // gid -> servers[] 32 | } 33 | 34 | const ( 35 | OK = "OK" 36 | ) 37 | 38 | type Err string 39 | 40 | type JoinArgs struct { 41 | Servers map[int][]string // new GID -> servers mappings 42 | } 43 | 44 | type JoinReply struct { 45 | WrongLeader bool 46 | Err Err 47 | } 48 | 49 | type LeaveArgs struct { 50 | GIDs []int 51 | } 52 | 53 | type LeaveReply struct { 54 | WrongLeader bool 55 | Err Err 56 | } 57 | 58 | type MoveArgs struct { 59 | Shard int 60 | GID int 61 | } 62 | 63 | type MoveReply struct { 64 | WrongLeader bool 65 | Err Err 66 | } 67 | 68 | type QueryArgs struct { 69 | Num int // desired config number 70 | } 71 | 72 | type QueryReply struct { 73 | WrongLeader bool 74 | Err Err 75 | Config Config 76 | } 77 | -------------------------------------------------------------------------------- /6.824/src/shardmaster/config.go: -------------------------------------------------------------------------------- 1 | package shardmaster 2 | 3 | import "labrpc" 4 | import "raft" 5 | import "testing" 6 | import "os" 7 | 8 | // import "log" 9 | import crand "crypto/rand" 10 | import "math/rand" 11 | import "encoding/base64" 12 | import "sync" 13 | import "runtime" 14 | 15 | func randstring(n int) string { 16 | b := make([]byte, 2*n) 17 | crand.Read(b) 18 | s := base64.URLEncoding.EncodeToString(b) 19 | return s[0:n] 20 | } 21 | 22 | // Randomize server handles 23 | func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd { 24 | sa := make([]*labrpc.ClientEnd, len(kvh)) 25 | copy(sa, kvh) 26 | for i := range sa { 27 | j := rand.Intn(i + 1) 28 | sa[i], sa[j] = sa[j], sa[i] 29 | } 30 | return sa 31 | } 32 | 33 | type config struct { 34 | mu sync.Mutex 35 | t *testing.T 36 | net *labrpc.Network 37 | n int 38 | servers []*ShardMaster 39 | saved []*raft.Persister 40 | endnames [][]string // names of each server's sending ClientEnds 41 | clerks map[*Clerk][]string 42 | nextClientId int 43 | } 44 | 45 | func (cfg *config) cleanup() { 46 | cfg.mu.Lock() 47 | defer cfg.mu.Unlock() 48 | for i := 0; i < len(cfg.servers); i++ { 49 | if cfg.servers[i] != nil { 50 | cfg.servers[i].Kill() 51 | } 52 | } 53 | } 54 | 55 | // Maximum log size across all servers 56 | func (cfg *config) LogSize() int { 57 | logsize := 0 58 | for i := 0; i < cfg.n; i++ { 59 | n := cfg.saved[i].RaftStateSize() 60 | if n > logsize { 61 | logsize = n 62 | } 63 | } 64 | return logsize 65 | } 66 | 67 | // attach server i to servers listed in to 68 | // caller must hold cfg.mu 69 | func (cfg *config) connectUnlocked(i int, to []int) { 70 | // log.Printf("connect peer %d to %v\n", i, to) 71 | 72 | // outgoing socket files 73 | for j := 0; j < len(to); j++ { 74 | endname := cfg.endnames[i][to[j]] 75 | cfg.net.Enable(endname, true) 76 | } 77 | 78 | // incoming socket files 79 | for j := 0; j < len(to); j++ { 80 | endname := cfg.endnames[to[j]][i] 81 | cfg.net.Enable(endname, true) 82 | } 83 | } 84 | 85 | func (cfg *config) connect(i int, to []int) { 86 | cfg.mu.Lock() 87 | defer cfg.mu.Unlock() 88 | cfg.connectUnlocked(i, to) 89 | } 90 | 91 | // detach server i from the servers listed in from 92 | // caller must hold cfg.mu 93 | func (cfg *config) disconnectUnlocked(i int, from []int) { 94 | // log.Printf("disconnect peer %d from %v\n", i, from) 95 | 96 | // outgoing socket files 97 | for j := 0; j < len(from); j++ { 98 | if cfg.endnames[i] != nil { 99 | endname := cfg.endnames[i][from[j]] 100 | cfg.net.Enable(endname, false) 101 | } 102 | } 103 | 104 | // incoming socket files 105 | for j := 0; j < len(from); j++ { 106 | if cfg.endnames[j] != nil { 107 | endname := cfg.endnames[from[j]][i] 108 | cfg.net.Enable(endname, false) 109 | } 110 | } 111 | } 112 | 113 | func (cfg *config) disconnect(i int, from []int) { 114 | cfg.mu.Lock() 115 | defer cfg.mu.Unlock() 116 | cfg.disconnectUnlocked(i, from) 117 | } 118 | 119 | func (cfg *config) All() []int { 120 | all := make([]int, cfg.n) 121 | for i := 0; i < cfg.n; i++ { 122 | all[i] = i 123 | } 124 | return all 125 | } 126 | 127 | func (cfg *config) ConnectAll() { 128 | cfg.mu.Lock() 129 | defer cfg.mu.Unlock() 130 | for i := 0; i < cfg.n; i++ { 131 | cfg.connectUnlocked(i, cfg.All()) 132 | } 133 | } 134 | 135 | // Sets up 2 partitions with connectivity between servers in each partition. 136 | func (cfg *config) partition(p1 []int, p2 []int) { 137 | cfg.mu.Lock() 138 | defer cfg.mu.Unlock() 139 | // log.Printf("partition servers into: %v %v\n", p1, p2) 140 | for i := 0; i < len(p1); i++ { 141 | cfg.disconnectUnlocked(p1[i], p2) 142 | cfg.connectUnlocked(p1[i], p1) 143 | } 144 | for i := 0; i < len(p2); i++ { 145 | cfg.disconnectUnlocked(p2[i], p1) 146 | cfg.connectUnlocked(p2[i], p2) 147 | } 148 | } 149 | 150 | // Create a clerk with clerk specific server names. 151 | // Give it connections to all of the servers, but for 152 | // now enable only connections to servers in to[]. 153 | func (cfg *config) makeClient(to []int) *Clerk { 154 | cfg.mu.Lock() 155 | defer cfg.mu.Unlock() 156 | 157 | // a fresh set of ClientEnds. 158 | ends := make([]*labrpc.ClientEnd, cfg.n) 159 | endnames := make([]string, cfg.n) 160 | for j := 0; j < cfg.n; j++ { 161 | endnames[j] = randstring(20) 162 | ends[j] = cfg.net.MakeEnd(endnames[j]) 163 | cfg.net.Connect(endnames[j], j) 164 | } 165 | 166 | ck := MakeClerk(random_handles(ends)) 167 | cfg.clerks[ck] = endnames 168 | cfg.nextClientId++ 169 | cfg.ConnectClientUnlocked(ck, to) 170 | return ck 171 | } 172 | 173 | func (cfg *config) deleteClient(ck *Clerk) { 174 | cfg.mu.Lock() 175 | defer cfg.mu.Unlock() 176 | 177 | v := cfg.clerks[ck] 178 | for i := 0; i < len(v); i++ { 179 | os.Remove(v[i]) 180 | } 181 | delete(cfg.clerks, ck) 182 | } 183 | 184 | // caller should hold cfg.mu 185 | func (cfg *config) ConnectClientUnlocked(ck *Clerk, to []int) { 186 | // log.Printf("ConnectClient %v to %v\n", ck, to) 187 | endnames := cfg.clerks[ck] 188 | for j := 0; j < len(to); j++ { 189 | s := endnames[to[j]] 190 | cfg.net.Enable(s, true) 191 | } 192 | } 193 | 194 | func (cfg *config) ConnectClient(ck *Clerk, to []int) { 195 | cfg.mu.Lock() 196 | defer cfg.mu.Unlock() 197 | cfg.ConnectClientUnlocked(ck, to) 198 | } 199 | 200 | // caller should hold cfg.mu 201 | func (cfg *config) DisconnectClientUnlocked(ck *Clerk, from []int) { 202 | // log.Printf("DisconnectClient %v from %v\n", ck, from) 203 | endnames := cfg.clerks[ck] 204 | for j := 0; j < len(from); j++ { 205 | s := endnames[from[j]] 206 | cfg.net.Enable(s, false) 207 | } 208 | } 209 | 210 | func (cfg *config) DisconnectClient(ck *Clerk, from []int) { 211 | cfg.mu.Lock() 212 | defer cfg.mu.Unlock() 213 | cfg.DisconnectClientUnlocked(ck, from) 214 | } 215 | 216 | // Shutdown a server by isolating it 217 | func (cfg *config) ShutdownServer(i int) { 218 | cfg.mu.Lock() 219 | defer cfg.mu.Unlock() 220 | 221 | cfg.disconnectUnlocked(i, cfg.All()) 222 | 223 | // disable client connections to the server. 224 | // it's important to do this before creating 225 | // the new Persister in saved[i], to avoid 226 | // the possibility of the server returning a 227 | // positive reply to an Append but persisting 228 | // the result in the superseded Persister. 229 | cfg.net.DeleteServer(i) 230 | 231 | // a fresh persister, in case old instance 232 | // continues to update the Persister. 233 | // but copy old persister's content so that we always 234 | // pass Make() the last persisted state. 235 | if cfg.saved[i] != nil { 236 | cfg.saved[i] = cfg.saved[i].Copy() 237 | } 238 | 239 | kv := cfg.servers[i] 240 | if kv != nil { 241 | cfg.mu.Unlock() 242 | kv.Kill() 243 | cfg.mu.Lock() 244 | cfg.servers[i] = nil 245 | } 246 | } 247 | 248 | // If restart servers, first call ShutdownServer 249 | func (cfg *config) StartServer(i int) { 250 | cfg.mu.Lock() 251 | 252 | // a fresh set of outgoing ClientEnd names. 253 | cfg.endnames[i] = make([]string, cfg.n) 254 | for j := 0; j < cfg.n; j++ { 255 | cfg.endnames[i][j] = randstring(20) 256 | } 257 | 258 | // a fresh set of ClientEnds. 259 | ends := make([]*labrpc.ClientEnd, cfg.n) 260 | for j := 0; j < cfg.n; j++ { 261 | ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j]) 262 | cfg.net.Connect(cfg.endnames[i][j], j) 263 | } 264 | 265 | // a fresh persister, so old instance doesn't overwrite 266 | // new instance's persisted state. 267 | // give the fresh persister a copy of the old persister's 268 | // state, so that the spec is that we pass StartKVServer() 269 | // the last persisted state. 270 | if cfg.saved[i] != nil { 271 | cfg.saved[i] = cfg.saved[i].Copy() 272 | } else { 273 | cfg.saved[i] = raft.MakePersister() 274 | } 275 | 276 | cfg.mu.Unlock() 277 | 278 | cfg.servers[i] = StartServer(ends, i, cfg.saved[i]) 279 | 280 | kvsvc := labrpc.MakeService(cfg.servers[i]) 281 | rfsvc := labrpc.MakeService(cfg.servers[i].rf) 282 | srv := labrpc.MakeServer() 283 | srv.AddService(kvsvc) 284 | srv.AddService(rfsvc) 285 | cfg.net.AddServer(i, srv) 286 | } 287 | 288 | func (cfg *config) Leader() (bool, int) { 289 | cfg.mu.Lock() 290 | defer cfg.mu.Unlock() 291 | 292 | for i := 0; i < cfg.n; i++ { 293 | _, is_leader := cfg.servers[i].rf.GetState() 294 | if is_leader { 295 | return true, i 296 | } 297 | } 298 | return false, 0 299 | } 300 | 301 | // Partition servers into 2 groups and put current leader in minority 302 | func (cfg *config) make_partition() ([]int, []int) { 303 | _, l := cfg.Leader() 304 | p1 := make([]int, cfg.n/2+1) 305 | p2 := make([]int, cfg.n/2) 306 | j := 0 307 | for i := 0; i < cfg.n; i++ { 308 | if i != l { 309 | if j < len(p1) { 310 | p1[j] = i 311 | } else { 312 | p2[j-len(p1)] = i 313 | } 314 | j++ 315 | } 316 | } 317 | p2[len(p2)-1] = l 318 | return p1, p2 319 | } 320 | 321 | func make_config(t *testing.T, n int, unreliable bool) *config { 322 | runtime.GOMAXPROCS(4) 323 | cfg := &config{} 324 | cfg.t = t 325 | cfg.net = labrpc.MakeNetwork() 326 | cfg.n = n 327 | cfg.servers = make([]*ShardMaster, cfg.n) 328 | cfg.saved = make([]*raft.Persister, cfg.n) 329 | cfg.endnames = make([][]string, cfg.n) 330 | cfg.clerks = make(map[*Clerk][]string) 331 | cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid 332 | 333 | // create a full set of KV servers. 334 | for i := 0; i < cfg.n; i++ { 335 | cfg.StartServer(i) 336 | } 337 | 338 | cfg.ConnectAll() 339 | 340 | cfg.net.Reliable(!unreliable) 341 | 342 | return cfg 343 | } 344 | -------------------------------------------------------------------------------- /6.824/src/shardmaster/server.go: -------------------------------------------------------------------------------- 1 | package shardmaster 2 | 3 | 4 | import "raft" 5 | import "labrpc" 6 | import "sync" 7 | import "encoding/gob" 8 | 9 | 10 | type ShardMaster struct { 11 | mu sync.Mutex 12 | me int 13 | rf *raft.Raft 14 | applyCh chan raft.ApplyMsg 15 | 16 | // Your data here. 17 | 18 | configs []Config // indexed by config num 19 | } 20 | 21 | 22 | type Op struct { 23 | // Your data here. 24 | } 25 | 26 | 27 | func (sm *ShardMaster) Join(args *JoinArgs, reply *JoinReply) { 28 | // Your code here. 29 | } 30 | 31 | func (sm *ShardMaster) Leave(args *LeaveArgs, reply *LeaveReply) { 32 | // Your code here. 33 | } 34 | 35 | func (sm *ShardMaster) Move(args *MoveArgs, reply *MoveReply) { 36 | // Your code here. 37 | } 38 | 39 | func (sm *ShardMaster) Query(args *QueryArgs, reply *QueryReply) { 40 | // Your code here. 41 | } 42 | 43 | 44 | // 45 | // the tester calls Kill() when a ShardMaster instance won't 46 | // be needed again. you are not required to do anything 47 | // in Kill(), but it might be convenient to (for example) 48 | // turn off debug output from this instance. 49 | // 50 | func (sm *ShardMaster) Kill() { 51 | sm.rf.Kill() 52 | // Your code here, if desired. 53 | } 54 | 55 | // needed by shardkv tester 56 | func (sm *ShardMaster) Raft() *raft.Raft { 57 | return sm.rf 58 | } 59 | 60 | // 61 | // servers[] contains the ports of the set of 62 | // servers that will cooperate via Paxos to 63 | // form the fault-tolerant shardmaster service. 64 | // me is the index of the current server in servers[]. 65 | // 66 | func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister) *ShardMaster { 67 | sm := new(ShardMaster) 68 | sm.me = me 69 | 70 | sm.configs = make([]Config, 1) 71 | sm.configs[0].Groups = map[int][]string{} 72 | 73 | gob.Register(Op{}) 74 | sm.applyCh = make(chan raft.ApplyMsg) 75 | sm.rf = raft.Make(servers, me, persister, sm.applyCh) 76 | 77 | // Your code here. 78 | 79 | return sm 80 | } 81 | -------------------------------------------------------------------------------- /6.824/src/viewservice/client.go: -------------------------------------------------------------------------------- 1 | package viewservice 2 | 3 | import "net/rpc" 4 | import "fmt" 5 | 6 | // 7 | // the viewservice Clerk lives in the client 8 | // and maintains a little state. 9 | // 10 | type Clerk struct { 11 | me string // client's name (host:port) 12 | server string // viewservice's host:port 13 | } 14 | 15 | func MakeClerk(me string, server string) *Clerk { 16 | ck := new(Clerk) 17 | ck.me = me 18 | ck.server = server 19 | return ck 20 | } 21 | 22 | // 23 | // call() sends an RPC to the rpcname handler on server srv 24 | // with arguments args, waits for the reply, and leaves the 25 | // reply in reply. the reply argument should be a pointer 26 | // to a reply structure. 27 | // 28 | // the return value is true if the server responded, and false 29 | // if call() was not able to contact the server. in particular, 30 | // the reply's contents are only valid if call() returned true. 31 | // 32 | // you should assume that call() will return an 33 | // error after a while if the server is dead. 34 | // don't provide your own time-out mechanism. 35 | // 36 | // please use call() to send all RPCs, in client.go and server.go. 37 | // please don't change this function. 38 | // 39 | func call(srv string, rpcname string, 40 | args interface{}, reply interface{}) bool { 41 | c, errx := rpc.Dial("unix", srv) 42 | if errx != nil { 43 | return false 44 | } 45 | defer c.Close() 46 | 47 | err := c.Call(rpcname, args, reply) 48 | if err == nil { 49 | return true 50 | } 51 | 52 | fmt.Println(err) 53 | return false 54 | } 55 | 56 | func (ck *Clerk) Ping(viewnum uint) (View, error) { 57 | // prepare the arguments. 58 | args := &PingArgs{} 59 | args.Me = ck.me 60 | args.Viewnum = viewnum 61 | var reply PingReply 62 | 63 | // send an RPC request, wait for the reply. 64 | ok := call(ck.server, "ViewServer.Ping", args, &reply) 65 | if ok == false { 66 | return View{}, fmt.Errorf("Ping(%v) failed", viewnum) 67 | } 68 | 69 | return reply.View, nil 70 | } 71 | 72 | func (ck *Clerk) Get() (View, bool) { 73 | args := &GetArgs{} 74 | var reply GetReply 75 | ok := call(ck.server, "ViewServer.Get", args, &reply) 76 | if ok == false { 77 | return View{}, false 78 | } 79 | return reply.View, true 80 | } 81 | 82 | func (ck *Clerk) Primary() string { 83 | v, ok := ck.Get() 84 | if ok { 85 | return v.Primary 86 | } 87 | return "" 88 | } 89 | -------------------------------------------------------------------------------- /6.824/src/viewservice/common.go: -------------------------------------------------------------------------------- 1 | package viewservice 2 | 3 | import "time" 4 | 5 | // 6 | // This is a non-replicated view service for a simple 7 | // primary/backup system. 8 | // 9 | // The view service goes through a sequence of numbered 10 | // views, each with a primary and (if possible) a backup. 11 | // A view consists of a view number and the host:port of 12 | // the view's primary and backup p/b servers. 13 | // 14 | // The primary in a view is always either the primary 15 | // or the backup of the previous view (in order to ensure 16 | // that the p/b service's state is preserved). 17 | // 18 | // Each p/b server should send a Ping RPC once per PingInterval. 19 | // The view server replies with a description of the current 20 | // view. The Pings let the view server know that the p/b 21 | // server is still alive; inform the p/b server of the current 22 | // view; and inform the view server of the most recent view 23 | // that the p/b server knows about. 24 | // 25 | // The view server proceeds to a new view when either it hasn't 26 | // received a ping from the primary or backup for a while, or 27 | // if there was no backup and a new server starts Pinging. 28 | // 29 | // The view server will not proceed to a new view until 30 | // the primary from the current view acknowledges 31 | // that it is operating in the current view. This helps 32 | // ensure that there's at most one p/b primary operating at 33 | // a time. 34 | // 35 | 36 | type View struct { 37 | Viewnum uint 38 | Primary string 39 | Backup string 40 | } 41 | 42 | // clients should send a Ping RPC this often, 43 | // to tell the viewservice that the client is alive. 44 | const PingInterval = time.Millisecond * 100 45 | 46 | // the viewserver will declare a client dead if it misses 47 | // this many Ping RPCs in a row. 48 | const DeadPings = 5 49 | 50 | // 51 | // Ping(): called by a primary/backup server to tell the 52 | // view service it is alive, to indicate whether p/b server 53 | // has seen the latest view, and for p/b server to learn 54 | // the latest view. 55 | // 56 | // If Viewnum is zero, the caller is signalling that it is 57 | // alive and could become backup if needed. 58 | // 59 | 60 | type PingArgs struct { 61 | Me string // "host:port" 62 | Viewnum uint // caller's notion of current view # 63 | } 64 | 65 | type PingReply struct { 66 | View View 67 | } 68 | 69 | // 70 | // Get(): fetch the current view, without volunteering 71 | // to be a server. mostly for clients of the p/b service, 72 | // and for testing. 73 | // 74 | 75 | type GetArgs struct { 76 | } 77 | 78 | type GetReply struct { 79 | View View 80 | } 81 | -------------------------------------------------------------------------------- /6.824/src/viewservice/server.go: -------------------------------------------------------------------------------- 1 | package viewservice 2 | 3 | import "net" 4 | import "net/rpc" 5 | import "log" 6 | import "time" 7 | import "sync" 8 | import "fmt" 9 | import "os" 10 | import "sync/atomic" 11 | 12 | type ViewServer struct { 13 | mu sync.Mutex 14 | l net.Listener 15 | dead int32 // for testing 16 | rpccount int32 // for testing 17 | me string 18 | 19 | 20 | // Your declarations here. 21 | } 22 | 23 | // 24 | // server Ping RPC handler. 25 | // 26 | func (vs *ViewServer) Ping(args *PingArgs, reply *PingReply) error { 27 | 28 | // Your code here. 29 | 30 | return nil 31 | } 32 | 33 | // 34 | // server Get() RPC handler. 35 | // 36 | func (vs *ViewServer) Get(args *GetArgs, reply *GetReply) error { 37 | 38 | // Your code here. 39 | 40 | return nil 41 | } 42 | 43 | 44 | // 45 | // tick() is called once per PingInterval; it should notice 46 | // if servers have died or recovered, and change the view 47 | // accordingly. 48 | // 49 | func (vs *ViewServer) tick() { 50 | 51 | // Your code here. 52 | } 53 | 54 | // 55 | // tell the server to shut itself down. 56 | // for testing. 57 | // please don't change these two functions. 58 | // 59 | func (vs *ViewServer) Kill() { 60 | atomic.StoreInt32(&vs.dead, 1) 61 | vs.l.Close() 62 | } 63 | 64 | // 65 | // has this server been asked to shut down? 66 | // 67 | func (vs *ViewServer) isdead() bool { 68 | return atomic.LoadInt32(&vs.dead) != 0 69 | } 70 | 71 | // please don't change this function. 72 | func (vs *ViewServer) GetRPCCount() int32 { 73 | return atomic.LoadInt32(&vs.rpccount) 74 | } 75 | 76 | func StartServer(me string) *ViewServer { 77 | vs := new(ViewServer) 78 | vs.me = me 79 | // Your vs.* initializations here. 80 | 81 | // tell net/rpc about our RPC server and handlers. 82 | rpcs := rpc.NewServer() 83 | rpcs.Register(vs) 84 | 85 | // prepare to receive connections from clients. 86 | // change "unix" to "tcp" to use over a network. 87 | os.Remove(vs.me) // only needed for "unix" 88 | l, e := net.Listen("unix", vs.me) 89 | if e != nil { 90 | log.Fatal("listen error: ", e) 91 | } 92 | vs.l = l 93 | 94 | // please don't change any of the following code, 95 | // or do anything to subvert it. 96 | 97 | // create a thread to accept RPC connections from clients. 98 | go func() { 99 | for vs.isdead() == false { 100 | conn, err := vs.l.Accept() 101 | if err == nil && vs.isdead() == false { 102 | atomic.AddInt32(&vs.rpccount, 1) 103 | go rpcs.ServeConn(conn) 104 | } else if err == nil { 105 | conn.Close() 106 | } 107 | if err != nil && vs.isdead() == false { 108 | fmt.Printf("ViewServer(%v) accept: %v\n", me, err.Error()) 109 | vs.Kill() 110 | } 111 | } 112 | }() 113 | 114 | // create a thread to call tick() periodically. 115 | go func() { 116 | for vs.isdead() == false { 117 | vs.tick() 118 | time.Sleep(PingInterval) 119 | } 120 | }() 121 | 122 | return vs 123 | } 124 | -------------------------------------------------------------------------------- /6.824/src/viewservice/test_test.go: -------------------------------------------------------------------------------- 1 | package viewservice 2 | 3 | import "testing" 4 | import "runtime" 5 | import "time" 6 | import "fmt" 7 | import "os" 8 | import "strconv" 9 | 10 | func check(t *testing.T, ck *Clerk, p string, b string, n uint) { 11 | view, _ := ck.Get() 12 | if view.Primary != p { 13 | t.Fatalf("wanted primary %v, got %v", p, view.Primary) 14 | } 15 | if view.Backup != b { 16 | t.Fatalf("wanted backup %v, got %v", b, view.Backup) 17 | } 18 | if n != 0 && n != view.Viewnum { 19 | t.Fatalf("wanted viewnum %v, got %v", n, view.Viewnum) 20 | } 21 | if ck.Primary() != p { 22 | t.Fatalf("wanted primary %v, got %v", p, ck.Primary()) 23 | } 24 | } 25 | 26 | func port(suffix string) string { 27 | s := "/var/tmp/824-" 28 | s += strconv.Itoa(os.Getuid()) + "/" 29 | os.Mkdir(s, 0777) 30 | s += "viewserver-" 31 | s += strconv.Itoa(os.Getpid()) + "-" 32 | s += suffix 33 | return s 34 | } 35 | 36 | func Test1(t *testing.T) { 37 | runtime.GOMAXPROCS(4) 38 | 39 | vshost := port("v") 40 | vs := StartServer(vshost) 41 | 42 | ck1 := MakeClerk(port("1"), vshost) 43 | ck2 := MakeClerk(port("2"), vshost) 44 | ck3 := MakeClerk(port("3"), vshost) 45 | 46 | // 47 | 48 | if ck1.Primary() != "" { 49 | t.Fatalf("there was a primary too soon") 50 | } 51 | 52 | // very first primary 53 | fmt.Printf("Test: First primary ...\n") 54 | 55 | for i := 0; i < DeadPings*2; i++ { 56 | view, _ := ck1.Ping(0) 57 | if view.Primary == ck1.me { 58 | break 59 | } 60 | time.Sleep(PingInterval) 61 | } 62 | check(t, ck1, ck1.me, "", 1) 63 | fmt.Printf(" ... Passed\n") 64 | 65 | // very first backup 66 | fmt.Printf("Test: First backup ...\n") 67 | 68 | { 69 | vx, _ := ck1.Get() 70 | for i := 0; i < DeadPings*2; i++ { 71 | ck1.Ping(1) 72 | view, _ := ck2.Ping(0) 73 | if view.Backup == ck2.me { 74 | break 75 | } 76 | time.Sleep(PingInterval) 77 | } 78 | check(t, ck1, ck1.me, ck2.me, vx.Viewnum+1) 79 | } 80 | fmt.Printf(" ... Passed\n") 81 | 82 | // primary dies, backup should take over 83 | fmt.Printf("Test: Backup takes over if primary fails ...\n") 84 | 85 | { 86 | ck1.Ping(2) 87 | vx, _ := ck2.Ping(2) 88 | for i := 0; i < DeadPings*2; i++ { 89 | v, _ := ck2.Ping(vx.Viewnum) 90 | if v.Primary == ck2.me && v.Backup == "" { 91 | break 92 | } 93 | time.Sleep(PingInterval) 94 | } 95 | check(t, ck2, ck2.me, "", vx.Viewnum+1) 96 | } 97 | fmt.Printf(" ... Passed\n") 98 | 99 | // revive ck1, should become backup 100 | fmt.Printf("Test: Restarted server becomes backup ...\n") 101 | 102 | { 103 | vx, _ := ck2.Get() 104 | ck2.Ping(vx.Viewnum) 105 | for i := 0; i < DeadPings*2; i++ { 106 | ck1.Ping(0) 107 | v, _ := ck2.Ping(vx.Viewnum) 108 | if v.Primary == ck2.me && v.Backup == ck1.me { 109 | break 110 | } 111 | time.Sleep(PingInterval) 112 | } 113 | check(t, ck2, ck2.me, ck1.me, vx.Viewnum+1) 114 | } 115 | fmt.Printf(" ... Passed\n") 116 | 117 | // start ck3, kill the primary (ck2), the previous backup (ck1) 118 | // should become the server, and ck3 the backup. 119 | // this should happen in a single view change, without 120 | // any period in which there's no backup. 121 | fmt.Printf("Test: Idle third server becomes backup if primary fails ...\n") 122 | 123 | { 124 | vx, _ := ck2.Get() 125 | ck2.Ping(vx.Viewnum) 126 | for i := 0; i < DeadPings*2; i++ { 127 | ck3.Ping(0) 128 | v, _ := ck1.Ping(vx.Viewnum) 129 | if v.Primary == ck1.me && v.Backup == ck3.me { 130 | break 131 | } 132 | vx = v 133 | time.Sleep(PingInterval) 134 | } 135 | check(t, ck1, ck1.me, ck3.me, vx.Viewnum+1) 136 | } 137 | fmt.Printf(" ... Passed\n") 138 | 139 | // kill and immediately restart the primary -- does viewservice 140 | // conclude primary is down even though it's pinging? 141 | fmt.Printf("Test: Restarted primary treated as dead ...\n") 142 | 143 | { 144 | vx, _ := ck1.Get() 145 | ck1.Ping(vx.Viewnum) 146 | for i := 0; i < DeadPings*2; i++ { 147 | ck1.Ping(0) 148 | ck3.Ping(vx.Viewnum) 149 | v, _ := ck3.Get() 150 | if v.Primary != ck1.me { 151 | break 152 | } 153 | time.Sleep(PingInterval) 154 | } 155 | vy, _ := ck3.Get() 156 | if vy.Primary != ck3.me { 157 | t.Fatalf("expected primary=%v, got %v\n", ck3.me, vy.Primary) 158 | } 159 | } 160 | fmt.Printf(" ... Passed\n") 161 | 162 | fmt.Printf("Test: Dead backup is removed from view ...\n") 163 | 164 | // set up a view with just 3 as primary, 165 | // to prepare for the next test. 166 | { 167 | for i := 0; i < DeadPings*3; i++ { 168 | vx, _ := ck3.Get() 169 | ck3.Ping(vx.Viewnum) 170 | time.Sleep(PingInterval) 171 | } 172 | v, _ := ck3.Get() 173 | if v.Primary != ck3.me || v.Backup != "" { 174 | t.Fatalf("wrong primary or backup") 175 | } 176 | } 177 | fmt.Printf(" ... Passed\n") 178 | 179 | // does viewserver wait for ack of previous view before 180 | // starting the next one? 181 | fmt.Printf("Test: Viewserver waits for primary to ack view ...\n") 182 | 183 | { 184 | // set up p=ck3 b=ck1, but 185 | // but do not ack 186 | vx, _ := ck1.Get() 187 | for i := 0; i < DeadPings*3; i++ { 188 | ck1.Ping(0) 189 | ck3.Ping(vx.Viewnum) 190 | v, _ := ck1.Get() 191 | if v.Viewnum > vx.Viewnum { 192 | break 193 | } 194 | time.Sleep(PingInterval) 195 | } 196 | check(t, ck1, ck3.me, ck1.me, vx.Viewnum+1) 197 | vy, _ := ck1.Get() 198 | // ck3 is the primary, but it never acked. 199 | // let ck3 die. check that ck1 is not promoted. 200 | for i := 0; i < DeadPings*3; i++ { 201 | v, _ := ck1.Ping(vy.Viewnum) 202 | if v.Viewnum > vy.Viewnum { 203 | break 204 | } 205 | time.Sleep(PingInterval) 206 | } 207 | check(t, ck2, ck3.me, ck1.me, vy.Viewnum) 208 | } 209 | fmt.Printf(" ... Passed\n") 210 | 211 | // if old servers die, check that a new (uninitialized) server 212 | // cannot take over. 213 | fmt.Printf("Test: Uninitialized server can't become primary ...\n") 214 | 215 | { 216 | for i := 0; i < DeadPings*2; i++ { 217 | v, _ := ck1.Get() 218 | ck1.Ping(v.Viewnum) 219 | ck2.Ping(0) 220 | ck3.Ping(v.Viewnum) 221 | time.Sleep(PingInterval) 222 | } 223 | for i := 0; i < DeadPings*2; i++ { 224 | ck2.Ping(0) 225 | time.Sleep(PingInterval) 226 | } 227 | vz, _ := ck2.Get() 228 | if vz.Primary == ck2.me { 229 | t.Fatalf("uninitialized backup promoted to primary") 230 | } 231 | } 232 | fmt.Printf(" ... Passed\n") 233 | 234 | vs.Kill() 235 | } 236 | -------------------------------------------------------------------------------- /Lec01_Introduction/l01.md: -------------------------------------------------------------------------------- 1 | ### 6.824 2016 第1课:介绍 2 | 3 | #### 6.824: 分布式系统工程 4 | 5 | ##### 什么是分布式系统 ? 6 | + 多台机器共同协作 7 | + 如DNS域名解析, P2P文件分享, 大的数据库(big databases), MapReduce, &c 8 | + 很多关键基础设施是分布式的! 9 | 10 | 11 | ##### 为什么需要分布式 ? 12 | + 为了连接物理上相互分离的实体 13 | + 为了通过隔离(isolation)实现安全性 14 | + 为了通过复制(replication)实现容错 15 | + 为了使CPUs/mem/disk/net可以实现扩容 16 | 17 | ##### 然而 18 | + 复杂性: 多个并发的部分 19 | + 必须处理部分失败的情况 20 | + 难以实现的性能潜力 21 | 22 | ##### 为什么选这门课? 23 | + 兴趣 -- 难题, 非显而易见的解决方案(non-obvious solutions) 24 | + 被实际系统使用 -- 被大网站的崛起而驱动大网站的崛起 25 | + 活跃的研究领域 -- 快速进步的领域 和 有大量问题没有解决的领域 26 | + 动手做 -- 你讲通过实现建立多个系统 27 | 28 | #### 课程结构 29 | 30 | + http://pdos.csail.mit.edu/6.824 31 | 32 | ##### Course staff(课程工作人员): 33 | + Robert Morris, lecturer 34 | + Frans Kaashoek, lecturer 35 | + Steven Allen, TA 36 | + Stephanie Wang, TA 37 | + Jon Gjengset, TA 38 | + Daniel Ziegler, TA 39 | 40 | ##### 课程组成: 41 | + 课程 42 | + 阅读 43 | + 两个考试 44 | + 实验 45 | + 项目 46 | 47 | #### 课程涉及大的想法,阅读和实验 48 | 49 | ##### 阅读: 研究论文作为案例研究 50 | 51 | + 请课前阅读研究论文,否则你会觉得上课内容很无聊,而且你无法不费力地学会, 52 | 每篇论文都有为你准备的小问题,请务必给我们发送你阅读论文的时候存在的疑问, 53 | 晚上十点前给我们发送问题和答案。 54 | 55 | + 实验目标 56 | + 深入理解一些重要的技术 57 | + 掌握分布式编程的经验 58 | + 第一个实验的时间安排是从周五起的一周时间 59 | 60 | + 实验安排 61 | + Lab 1: MapReduce 62 | + Lab 2: replication for fault-tolerance 63 | + Lab 3: fault-tolerant key/value store 64 | + Lab 4: sharded key/value store 65 | 66 | 最后的项目,我们将会分成2到3组完成,你可以设想一个项目,然后和我们一起将他搞明白,或者你也可以做我们默认指定的项目。 67 | 实验的成绩基于你通过了多少测试案例,我们会给你测验,然后你就可以知道自己是否很小心的完成,如果它通常通过,但有时失败了,它有可能会失败,当我们运行它。 68 | 69 | + 实验代码审查 70 | 查看其它人的解决方案,发送反馈给我们,可能自己能学到其它方法。 71 | 72 | 73 | #### 主题 74 | 75 | + 这是一门关于会被应用程序抵用的基础设施的课程,它会对应用程序隐藏分布式系统的复杂性而进行抽象,包括下面的三个抽象: 76 | + 存储(Storage) 77 | + 通讯(Communication) 78 | + 计算(Computation) 79 | 80 | 两个主题将反复出现。 81 | 82 | ##### 主题:实现(implementation) 83 | + RPC, threads, concurrency control. 84 | 85 | ##### 主题: 性能(performance) 86 | + 理想:可伸缩的吞吐量。 87 | 通过购买更多的机器处理更高的负载。 88 | + 扩展变得越来越困难: 89 | 负载均衡,straggler问题。 90 | "Small" non-parallelizable parts。 91 | 隐藏共享资源等,还有网络问题。 92 | 93 | 94 | ##### 主题:容错(fault tolerance) 95 | + 上千的服务器,复杂的网络 ————> 总会有东西出错 96 | 我们需要对应用程序隐藏这些错误。 97 | 我们经常希望: 98 | 可用性: 即使出错我也希望可以使用我们的文件。 99 | 耐用性:当故障修复之后,我的数据可以恢复。 100 | 101 | 重要理念:复制服务器。 102 | 如果一个服务器故障了,客户们可以使用其他的服务器。 103 | 104 | ##### 主题:一致性(consistency) 105 | + 通用的基础设施需求定义良好的行为。 106 | 例如: Get(k) 获取到的值应该是最近的 Put(k,v)设置的。 107 | + 实现良好的行为是很困难的! 108 | + 客户提交的并发操作。 109 | + 服务器崩溃在尴尬的时刻。 110 | + 网络可能会使存活的服务器看起来跟挂了一样;存在“脑裂的风险“ 111 | + 一致性和性能不能兼得 112 | + 一致性需要沟通,如获取最新的Put()。 113 | + 带有严格同步语义的系统往往是缓慢的。 114 | + 快速系统通常使应用程序应对复杂(“放松”)的行为。 115 | + People have pursued many design points in this spectrum. 116 | 117 | #### 案例学习: MapReduce 118 | + 让我们将MR作为一个案例进行讨论。 119 | MR是课程6.284主题的一个很好的例子,也是实验1的主要关注点。 120 | 121 | + MapReduce概要 122 | + 背景: 几个小时处理完TB基本的数据集 123 | 例如:实验分析爬行网页的结构,通常不是由分布式系统开发的爱好者开发的这就会非常痛苦,如如何处理错误。 124 | + 总体目标: 非专业程序员可以轻松的在合理的效率下解决的巨大的数据处理问题。程序员定义Map函数和Reduce函数、顺序代码一般都比较简单。 125 | MR在成千的机器上面运行处理大量的数据输入,隐藏全部分布式的细节。 126 | 127 | + MapReduce的抽象试图 128 | 输入会被分配到不同的分片(splits) 129 | Input Map -> a,1 b,1 c,1 130 | Input Map -> b,1 131 | Input Map -> a,1 c,1 132 | | | | 133 | | -> Reduce -> c,2 134 | -----> Reduce -> b,2 135 | MR调用在每个分片上调用Map()函数,产生中间数据集k2,v2,然后MR将会收集相同k2的值v2,然后将v2分别传输给Reduce函数, 136 | 最后的输出是数据集 137 | 138 | + 例子: word count 139 | 输入时成千上万的文件文件 140 | Map(k, v) 141 | split v into words 142 | for each word w 143 | emit(w, "1") 144 | Reduce(k, v) 145 | emit(len(v)) 146 | 147 | + 这个模式很容易编程,隐藏了很多让人痛苦的细节 148 | + 并发: 顺序执行相同的结果 149 | + starting s/w on servers ??? 150 | + 数据移动 151 | + 失败 152 | 153 | 154 | + 这个模型容易扩展 155 | Nx台计算机可以同时执行nx个Map函数和Reduce函数,Map函数不需要相互等待或者共享数据,完全可以并行的执行。 156 | 在一定程度上,你可以通过购买更多的计算机来获取更大的吞吐量。而不是每个应用程序专用的高效并行。电脑是比程序员更便宜! 157 | 158 | + 哪些为成为现在性能的限制因素? 159 | + 我们关心的就是我们需要优化的。CPU?内存?硬盘?网络?他们一般将会被网络限制,网络的全内容量通常远小于主机网络链接速度。一般情况下 160 | 很难建立一个比单机快1000倍的网络,所以他们关心尽量减少运动的数据在网络上。 161 | 162 | 163 | + 容错呢? 164 | 165 | 比如:如果服务器在执行MR工作时崩溃怎么办?隐藏这个错误非常困难,为什么不重新执行这个工作呢? 166 | 167 | MR重新执行失败的Map函数和Reduce函数,他们是纯函数——他们不会改变数据输入、不会保持状态、不共享内存、不存在map和map,或者reduce和reduce之间的联系, 168 | 169 | 所以重新执行也会产生相同的输出。纯函数的这个需求是MR相对于其他并行编程方案的主要限制,然后也是因为这个需求使得MR非常简单。 170 | 171 | + 更多细节: 172 | master:给workers分配工作,记得中间输出的位置。 173 | NaN. 174 | 输入分割,输入存储在GFS,每个分片拷贝三份,全部电脑运行GFS和MR workers,输入的分片远远多于worker的数量, 175 | NaN. 176 | master在每台机器上面执行Map任务,当原来的任务完成之后map会处理新的任务,worker将输出按key散列映射输出到R分区保存在本地磁盘上, 177 | NaN. 178 | 当全部没有Map执行的时候Reduce将会执行。master告诉Reducers去获取Map workers产生的中间数据分区,Reduce worker讲最终的结果 179 | NaN. 180 | 输出到GFS。 181 | 182 | + 有哪些详细的设计帮助提示网络性能? 183 | + Map的输入来自本地的硬盘而非网络。 184 | + 中间数据只在网络上面传输一次,保存本地硬盘,而不是GFS. 185 | + 中间数据通过key被划分到多个文件,”大网络传输“更加有效。 186 | 187 | + 它们是怎么很好的处理负载均衡? 188 | + 扩展的关键 -- otherwise Nx servers -> no gain. ?? 189 | 不同的大小,不同的内容和不同的服务器硬件配置导致处理分片或者分区的时间不是一致的。 190 | + 解决方案: 分片的数据要多余worker. 191 | Master不断的讲分片分配给那些已经完成之前任务的worker的进行处理。所以没有分片是巨大的,分片的大小只 192 | 影响完成的时间,同时速度更快的服务器将会处理更多的工作, 最后一起完成。 193 | 194 | + MR怎么应对worker崩溃? 195 | + Map Worker崩溃: 196 | + master重新执行,基于GFS的其他副本的数据输入传播任务,即使worker已经完成,因为master依然需要硬盘上的数据。 197 | 有些Reduce workers也许在读取中间数据的时候就已经失败,我们依赖于功能和确定性的Map函数。 198 | + master怎么知道work崩溃?(pings) 199 | + 如果Reduces已经获取全部的中间数据,那么master不需要重启Map函数;如果Reduce崩溃那么必须等待Map再次运行。 200 | + Reduce worker在输出结果前崩溃,master必须在其他worker上面重新开始该任务。 201 | + Reduce worker在输出结果的过程中崩溃,GFS会自动重命名输出,然后使其保持不可见直到Reduce完成,所以master在其他地方再次运行Reduce worker将会是安全的。 202 | 203 | + 其他错误和问题: 204 | + 假如master意外的开启两个Map worker处理同一个输入会怎么样? 205 | 它只会告诉Reduce worker其中的一个。 206 | + 假如两个Reduce worker 处理中间数据的同一个分区会怎么样? 207 | 它们都会将同一份数据写到GFS上面,GFS的原子重命名操作会触发,先完成的获胜将结果写到GFS. 208 | + 假如一个worker非常慢怎么办—— 一个掉队者? 209 | 产生原因可能是非常糟糕的硬件设施。 210 | master会对这些最后的任务创建第二份拷贝任务执行。 211 | + 假如一个worker因为软件或者硬件的问题导致计算结果错误怎么办? 212 | 太糟糕了!MR假设是建立在"fail-stop"的cpu和软件之上。 213 | + 假如master崩溃怎么办? 214 | 215 | + 关于那些MapReduce不能很好执行的应用? 216 | + 并不是所以工作都适合map/shuffle/reduce这种模式 217 | + 小的数据,因为管理成本太高,如非网站后端 218 | + 大数据中的小更新,比如添加一些文件到大的索引 219 | + 不可预知的读(Map 和 Reduce都不能选择输入) 220 | + Multiple shuffles, e.g. page-rank (can use multiple MR but not very efficient) 221 | + 多数灵活的系统允许MR,但是使用非常复杂的r模型 222 | 223 | + 总结 224 | + 225 | Conclusion 226 | MapReduce single-handedly made big cluster computation popular. 227 | - Not the most efficient or flexible. 228 | + Scales well. 229 | + Easy to program -- failures and data movement are hidden. 230 | These were good trade-offs in practice. 231 | We'll see some more advanced successors later in the course. 232 | -------------------------------------------------------------------------------- /Lec01_Introduction/mapreduce.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec01_Introduction/mapreduce.pdf -------------------------------------------------------------------------------- /Lec02_RPC_and_Threads/l-rpc.md: -------------------------------------------------------------------------------- 1 | ##6.824 2016 Lecture 2: 基础设施: RPC和多线程 2 | 3 | [原文地址](https://pdos.csail.mit.edu/6.824/notes/l-rpc.txt) 4 | 5 | ##### 被问的最多的问题? 为什么这么课程选择了Go? 6 | + 6.824这门课程过去选择使用C++ 7 | 8 | 过去学生们花费了太多时间修改跟分布式系统无关的bug,比如他们释放了还在使用的对象。 9 | 10 | + Go拥有一些特性可以让你更加在集中注意力在分布式系统而不是语言细节 11 | 12 | + 类型安全 13 | + 垃圾回收(这样就不存在释放后使用的问题了) 14 | + 很好的支持并发 15 | + 很好的支持RPC 16 | 17 | 18 | + 我们喜欢使用Go编程 19 | 20 | 一门非常容易学习的语言,可以使用教程[effective_go](https://golang.org/doc/effective_go.html) 21 | 22 | ###### Remote Procedure Call (RPC) 23 | + 分布式系统的关键部分,全面的实验都使用RPC. 24 | + RPC的目的: 25 | + 容易编写网络通信程序 26 | + 隐藏客户端服务器通信的细节 27 | + 客户端调用更加像传统的过程调用 28 | + 服务端处理更加像传统的过程调用 29 | + RPC被广泛的使用! 30 | 31 | 32 | ###### RPC理想上想把网络通信做的跟函数调用一样 33 | + Client: 34 | 35 | z = fn(x, y) 36 | 37 | + Server: 38 | 39 | fn(x, y) { 40 | compute 41 | return z 42 | } 43 | RPC设计目标是这种水平的透明度。 44 | 45 | ###### Go example: 46 | + [https://golang.org/pkg/net/rpc/](https://golang.org/pkg/net/rpc/) 47 | 48 | ###### RPC消息流程图: 49 | 50 | Client Server 51 | request---> 52 | <---response 53 | 54 | ###### 软件架构 55 | 56 | client app handlers 57 | stubs dispatcher 58 | RPC lib RPC lib 59 |   net ------------ net 60 | 61 | ###### 一些细节: 62 | + 应该调用哪个服务器函数(handler)? 63 | + 序列化:格式化数据到包中 64 | + 棘手的数组,指针,对象等。 65 | + Go的RPC库非常强大。 66 | + 有些东西你不能传递:比如channels和function。 67 | + 绑定:客户端怎么知道应该跟谁通信? 68 | + 也许客户端使用服务器的hostname。 69 | + 也许使用命名服务,讲服务名字映射到最好的服务器。 70 | + 线程: 71 | + 客户端可能使用多线程,所以多于一个调用没有被处理,对应的处理器可能会是否缓慢,所以 72 | 服务器经常将每个请求放置在独立的线程中处理。 73 | 74 | ###### RPC问题:怎么处理失败? 75 | + 比如:丢包,网络断线,服务器运行缓慢,服务器崩溃。 76 | 77 | ###### 错误对RPC客户端意味着什么? 78 | + 客户端没有获取到服务器的回复。 79 | + 客户端不知道服务器是否接收到请求!也许服务器的网络在发生请求前就失败了。 80 | 81 | ###### 简单的方案:“最少一次”行为 82 | + RPC库等待回复一段时间,如果还是没有回复到达,重新发生请求。重复多次,如果还是没有回复,那么返回错误给应用程序。 83 | 84 | ###### Q: "至少一次"容易被应用程序处理吗? 85 | + 至少一次写的简单问题: 86 | 客户端发送"deduct $10 from bank account" 87 | 88 | ###### Q: 这个客户端程序会出现什么错误? 89 | + Put("k",10) -- 一个RPC调用在数据库服务器中设置键值对。 90 | + Put("k",20) -- 客户端对同一个键设置其他值。 91 | 92 | 93 | ###### Q: 至少一次每次都可以很好的工作吗? 94 | + 是的:如果回复操作的是OK,比如,只读操作。 95 | + 是的:如果应该程序有自己处理多个写副本的计划。 96 | 97 | ###### 更好的RPC行为:“最多一次” 98 | + idea:服务器的RPC代码发现重复的请求,返回之前的回复,而不是重写运行。 99 | + Q:如何发现相同的请求? 100 | client让每一个请求带有唯一标示码XID(unique ID),相同请求使用相同的XID重新发送。 101 | server: 102 | if seen[xid]: 103 | r = old[xid] 104 | else 105 | r = handler() 106 | old[xid] = r 107 | seen[xid] = true 108 | 109 | 110 | ###### 一些关于“最多一次”的复杂性 111 | 这些都会断断续续地出现在实验二中 112 | + 怎么确认xid是唯一的? 113 | + 很大的随机数? 114 | + 将唯一的客户端ID(ip address?)和序列号组合起来? 115 | + 服务器最后必须丢弃老的RPC信息? 116 | + 什么时候丢弃是安全的? 117 | + idea: 118 | + 唯一的客户端id 119 | + 上一个rpc请求的序列号 120 | + 客户端的每一个RPC请求包含"seen all replies <=X" 121 | + 类似tcp中的seq和ack 122 | + 或者每次只允许一个RPC调用,到达的是seq+1,那么忽略其他小于seq 123 | + 客户端最多可以尝试5次,服务器会忽略大于5次的请求。 124 | + 当原来的请求还在执行,怎么样处理相同seq的请求? 125 | + 服务器不想运行两次,也不想回复。 126 | + 想法:给每个执行的RPC,pending标识;等待或者忽略。 127 | 128 | ###### 如果“至多一次”服务器奔溃或者重启会怎么样? 129 | + 如果服务器将副本信息保存在内存中,服务器会忘记请求,同时在重启之后接受相同的请求。 130 | + 也许,你应该将副本信息保存到磁盘? 131 | + 也许,副本服务器应该保存副本信息? 132 | 133 | ###### 关于“至少执行一次”? 134 | + 至多一次+无限重试+容错服务 135 | 136 | ###### Go RPC实现的”最多一次“? 137 | + 打开TCP连接 138 | + 向TCP连接写入请求 139 | + TCP也许会重传,但是服务器的TCP协议栈会过滤重复的信息 140 | + 在Go代码里面不会有重试(即:不会创建第二个TCP连接) 141 | + Go RPC代码当没有获取到回复之后将返回错误 142 | + 也许是TCP连接的超时 143 | + 也许是服务器没有看到请求 144 | + 也许服务器处理了请求,但是在返回回复之前服务器的网络故障 145 | 146 | ###### 线程 147 | + 线程是基本的服务器构建工具 148 | + 你将会在实验中经常使用 149 | + 线程非常“狡猾” 150 | + 对RPC非常有用 151 | + Go中使用goroutines代替线程 152 | 153 | ###### 线程 = “控制线程” 154 | + 线程可以使一个程序同时执行很多事情 155 | + 线程共享内存 156 | + 每个线程包含额线程状态:程序计数器、寄存器、栈 157 | 158 | ###### 线程挑战 159 | + 共享数据 160 | + 两个线程在同一个时间修改同一个变量? 161 | + 当一个线程读取数据,同时另一个线程正在修改这个数据? 162 | 163 | 上面的问题经常被称为竞争,需要在共享数据上面使用Go中的sync.Mutex保护变量 164 | + 线程协调 165 | + 比如:使用Go中的channels等待全部相关的线程完成工作 166 | + 死锁 167 | + 线程1等待线程2 168 | + 线程2等待线程1 169 | + 比竞争容易诊断 170 | + 锁粒度 171 | + 粗粒度 --> 简单,但是更小的并发/并行 172 | + 细粒度 --> 更好的并发,更容易数据竞争和死锁 173 | + 让我们一起看看名为labrpc的RPC包说明这些问题 174 | 175 | ###### 看看今天的讲义 -- labrpc.go 176 | + 它很像Go的RPC系统,但是带有模拟网络 177 | + 这个模拟的网络会延迟请求和回复 178 | + 这个模拟的网络会丢失请求和回复 179 | + 这个模拟的网络会重新排序请求和回复 180 | + 对之后的实验二测试非常有用 181 | + 说明线程、互斥锁、通道 182 | + 完整的RPC包完全使用Go语言编写 183 | 184 | ###### 结构 185 | + 网络结构 186 | + 网络描述 187 | + 服务器 188 | + 客户端节点 189 | + 每个Network结构都持有一个sync.Mutex 190 | 191 | ###### RPC概述 192 | + 更多的例子在test_test.go中 193 | 比如: TestBasic()函数 194 | + 应用程序调用Call()函数,发生一个RPC请求并等待结果 195 | reply := end.Call("Raft.AppendEntries", args, &reply) 196 | + 服务器端 197 | srv := MakeServer() 198 | srv.AddService(svc) // 一个服务器含有多个服务, 比如. Raft and k/v 199 | svc := MakeService(receiverObject) // 对象的方法将会处理RPQ请求 200 | 201 | ###### 服务器结构 202 | + 一个服务器程序支持多个服务 203 | 204 | ###### AddService 205 | + 添加一个服务名字 206 | + Q: 为什么上锁? 207 | AddService可能在多个goroutine中被调用 208 | + Q: defer() 函数的作用? 209 | deter的作用是在函数退出前,调用之后的代码,在里面就是添加完新服务后,做解锁操作。 210 | 211 | ###### Dispatch 212 | + 分发请求到正确的服务 213 | + Q: 为什么持有锁? 214 | 这里应该指的是Server::dispatch函数, 215 | + Q: 为什么不在函数的结尾处持有锁? 216 | 217 | ###### Call(): 218 | + 使用反射查找参数类型 219 | + 使用“gob”序列化参数(译注:gob是Golang包自带的一个数据结构序列化的编码/解码工具。) 220 | + e.ch是用于发生请求的通道 221 | + 需要一个通道从网上接收回复(<- req.replyCh) 222 | 223 | ###### MakeEnd(): 224 | + 使用一个线程或者goroutine模拟网络 225 | + 从e.ch中获取请求,然后处理请求 226 | + 每个请求分别在不同的goroutine处理 227 | Q: 一个端点是否可以拥有多个未处理的请求 228 | + Q:为什么使用rn.mu.Lock()? 229 | + Q:锁保护了什么? 230 | 231 | ###### ProcessReq(): 232 | + 查看服务器端 233 | + 如果网络不可靠,可能会延迟或者丢失请求,在一个新的线程中分发请求。 234 | + 通过读取e.ch等待回复直到时间过去100毫秒。100毫秒只是来看看服务器是否崩溃。 235 | + 最后返回回复 236 | Q: 谁会读取回复? 237 | + Q:ProcessReq没有持有rn锁,是否安全? 238 | 239 | ###### Service.dispatch(): 240 | + 为请求找到合适的处理方法 241 | + 反序列化参数 242 | + 调用方法 243 | + 序列化回复 244 | + 返回回复 245 | 246 | ###### Go的内存模型需要明确的同步去通信 247 | + 下面的代码是错误的 248 | var x int 249 | done := false 250 | go func() { x = f(...); done = true } 251 | while done == false { } 252 | 代码很容易写成上面,但是Go的语法会说没有定义,使用channel或者sync.WaitGroup替换 253 | 254 | ###### 学习Go关于goroutine和channel的教程 255 | + 使用Go的竞争诊断器 256 | + [https://golang.org/doc/articles/race_detector.html](https://golang.org/doc/articles/race_detector.html) 257 | go test --race mypkg 258 | 259 | 260 | -------------------------------------------------------------------------------- /Lec03_GFS/Bolosky.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec03_GFS/Bolosky.pdf -------------------------------------------------------------------------------- /Lec03_GFS/GFS.md: -------------------------------------------------------------------------------- 1 | ### 6.824 2014 Lecture 3: GFS案例学习 2 | 3 | #### 初稿,后面会修改,建议大家看gfs的论文和《大规模分布式存储系统》中的GFS部分 4 | 5 | ##### 为什么我们阅读这个论文? 6 | + Map/reduce使用这种文件系统 7 | + 处理存储出错的案例学习 8 | + 用一致性换取简单和性能(trading consistency for simplicity and performance) 9 | + 后续设计的动机(motivation for subsequent designs) 10 | + 好的性能 —— 良好的并行I/O性能 11 | + 好的系统论文 —— 从apps到网络都有细节说明 12 | + 关于课程6.284的全部主题都会在这个论文中出现,性能、容错、一致性 13 | 14 | ##### 一致性是什么? 15 | + 正确性条件 16 | + 当数据存在副本,同时被应该程序并发访问的时候,正确性非常重要。 17 | + 如果一个应用程序进行写操作,那么之后的读操作可以观察到什么?如果这个读操作来自其他应用程序又会看到什么? 18 | + 弱一致性 19 | + read()可能返回不新鲜的数据 ———— 不是最近写操作的结果 20 | + 强一致性 21 | + read()返回的结果数据是最近一次的写操作结果 22 | + 一般的权衡: 23 | + 强一致性对程序的写操作(application writers)表现不错 24 | + 强一致性将会影响性能 25 | + 更多的正确性条件(通常被称为一致性模型) 26 | 27 | ##### 一致性模型的历史 28 | + 在架构,系统和数据库社区存在独立发展 29 | + 带有私有缓存的并行处理器访问共享内存 30 | + 并行客户端访问分布式文件系统 31 | + 分布式数据库之上的并行事务 32 | + 不同的模型考虑不同的权衡 33 | + 可串行性(serializability) 34 | + 顺序一致性(sequential consistency) 35 | + 线性一致性(linearizability) 36 | + 单项一致性模型(entry consistency) 37 | + 松散一致性(release consistency) 38 | + ...... 39 | 40 | ##### “理想”的一致性模型 41 | + 一个存在副本的文件表现的跟单一文件系统一样,就像很多客户端访问存在同一个机器的单一磁盘的文件 42 | + 如果一个程序写操作,之后的读操作会获取到这个写的结果。 43 | + 如果两个程序同时写同一份文件会怎么样? 44 | + 在文件系统中这种行为经常是未定义的 —— 文件也许会混合两个写操作的内容 45 | + 如果两个应用程序并发写同一个目录会怎么样? 46 | + 一个一个顺序执行 47 | 48 | ##### 不一致的来源 49 | + 并发 50 | + 机器失败 51 | + 网络割裂 52 | 53 | ##### 来自GFS论文的例子 54 | + 主节点是备份分区B,客户端添加1,主节点将1发送给自己和分区备份A, 告诉客户端失败,同时客户端访问分区B,可能获取到老的值 55 | 56 | ##### 为什么理想中的一致性模型在分布式文件系统中的实现这么困难? 57 | + 理想的一致性模型协议非常复杂 —— 后面的课程我们会看到很难实现正确的系统 58 | + 协议需要客户端和服务器进行通信,这样会消耗性能 59 | 60 | 61 | ###### GFS的设计者为了最求更好的性能和更简单的设计而放弃理想的一致性模型 62 | + 能否使应用程序开发人员的生活困难 63 | + 在一个理想的系统中应用程序的行为不容易被观察到 64 | + 如:获取到过期的数据 65 | + 如:重复添加记录 66 | + 应用数据不是你的银行账号,所以这样可能不存在问题 67 | + 今天的论文是展现下面因素权衡的一个例子 68 | + 一致性 69 | + 容错性 70 | + 性能 71 | + 简单的设计 72 | 73 | ##### GFS的目标 74 | + 创建共享文件系统 75 | + (管理)成千上万的物理机器 76 | + 存储大量的数据集 77 | 78 | ##### GFS存储什么? 79 | + 作者没有说明,我们可以根据论文猜猜,可能包括如下部分 80 | + 搜索索引和数据库 81 | + Web上面的全部HTML文件 82 | + Web上面的全部图片文件 83 | + ...... 84 | 85 | ##### 文件属性: 86 | + TB级别的数据集 87 | + 很多文件巨大 88 | + 作者在2003的时候建议存储100 MB大小的文件1M份, 100 TB的数据量 89 | + 文件只支持追加方式 90 | 91 | ##### 主要挑战: 92 | + 因为存在很多机器,所以出错的情况很常见,假设一台机器一年出错一次,那么当存在1000台机器的时候,每天都有三台机器出现问题。 93 | + 高性能:很多并发的读写操作,Map/Reduce工作会从GF读取数据,然后保存最后的结果,注意:保存的不是中间临时文件。 94 | + 有效的使用网络 95 | 96 | ##### 高层次的设计 97 | + 定义目录、文件、命名、打开/读/写操作,但是不是符合posix标准 98 | + 成百上千带有硬盘的Linux块服务器 99 | + 存储64MB的块(an ordinary Linux file for each chunk) 100 | + 每个块在三台服务器上面做备份 101 | + Q: 为什么是3个备份? 102 | + Q: 除了数据可用,三备份方案给我们带来了什么? 负载均衡热文件的读取 103 | + Q: 为什么不把每一份文件存储在RAID硬盘? 104 | RAID不是常用品,我们想给整台机器做容错,而不是仅仅针对存储系统。 105 | + Q: 为什么chunk这么大? 106 | 107 | + GFS的主控服务器知道目录层次 108 | + 对于目录而言,知道里面有哪些文件 109 | + 对于文件而言,知道哪些数据块服务器存储了相关的64MB大小数据块 110 | + 主控服务器在内存中保存状态信息,每个chunk在主控服务器上面只保存64bytes大小的元数据 111 | + 主控服务器有为元数据准备的可回收数据库,可以从断电故障后快速恢复。 112 | + 同时存在备份的主控服务器(shadow master),数据略比主控服务器服务器延迟,可以被提升为主控服务器 113 | 114 | ##### 基本操作 115 | + 客户端读操作: 116 | + 向主控服务器发送文件名和偏移量 117 | + 主控服务器回复带有相关chunk的数据块服务器集合,客户端临时缓存这些信息,然后访问最近的数据块服务器 118 | + 客户端写操作: 119 | + 询问主控服务器,我应该往哪里写文件 120 | + 如果文件大小超过64MB,主控服务器也许会选择一些新的数据块服务器,one chunk server is primary it chooses order of updates and forwards to two backups 121 | 122 | ##### 两种不同的容错计划 123 | + 一种为了主控服务器设计 124 | + 一种为了数据块服务器设计 125 | 126 | ##### 主控服务器容错 127 | + 单台主控服务器 128 | + 客户端都是跟主控服务器交互 129 | + 主控服务器整理全部的操作 130 | + 长期存储有限的信息 131 | + 命名空间(目录) 132 | + 文件到chunk的映射 133 | + 操作日志会改变他们(上面的命名空间和映射?) 134 | + 操作日志存在多个备份 135 | + 客户端进行修改操作,状态在修改数据记录到操作日志之后才返回 136 | + 我们可以看到操作日志在多数系统中都发挥核心作用 137 | + 操作日志在实验中发挥核心作用 138 | + 限制操作日志的文件大小 139 | + 为主控服务器的状态创建检查点 140 | + 删除操作日志中检查点之前的全部操作 141 | + 检查点复制备份 142 | + 恢复 143 | + 操作日志从最新的检查点进行恢复 144 | + chunk的位置信息则通过询问数据块服务器获取 145 | + 主控服务器单点故障 146 | + 恢复很快,因为主控服务器的状态文件很小,也许会有很小时间的不可用 147 | + 影子服务器,它数据落后于主控服务器,它们用log中备份的数据进行回复。服务器执行只读操作,返回的数据也许不是最新的。 148 | + 如果主控服务器不能恢复,然后主控服务器有重新启动,系统必须避免出现两台主控服务器的出现。 149 | + 我们将会在后面的一些课程看到强一致性的方案,同时将会更加复杂。 150 | 151 | ###### 数据块服务器容错 152 | + 主控服务器授予一个备份服务器契约,这个备份成为主块服务器,将确定的操作顺序。 153 | + 客户端将数据发生给副本 154 | + Replicas form a chain 155 | + Chain respects network topology 156 | + Allows fast replication 157 | + 客户端发生写请求给主Chunk服务器 158 | + 主Chunk服务器分配序列号 159 | + 主Chunk服务器在本地应用修改 160 | + 主Chunk服务器向副本发送修改数据的请求 161 | + 主Chunk服务器接收到全部副本的ack消息之后,回复客户端 162 | + 如果一个副本没有回复,那么客户端会重试 163 | + 如果副本的数量少于某个值,master服务器会备份chunks,重新负载备份 164 | 165 | 166 | ##### chunk数据的持久化 167 | + 有些数据因为错过了更新,所以过时了。 168 | + 通过chunk的版本号判断数据是否不新鲜的,在发生租约前,增加chunk版本号码,将数据发送到主数据块服务器,同时在其他数据块服务器中备份,主服务器和数据块服务器长久的存储版本信息。 169 | + 发送版本号给客户端 170 | + 版本号帮助主控服务器和客户端判断备份是否不新鲜 171 | 172 | ##### 并发的写/追加 173 | + 客户端们也许会并发的同时写文件的同一个区域。 174 | + 结果是这些写操作的混合--no guarantees 175 | few applications do this anyway, so it is fine 176 | + 在Unix系统上面的并发写也会导致奇怪的输出 177 | + 很多客户端也许想并发的往一个长文件里面添加 178 | + GFS支持原子操作,保证至少一次添加,主Chunk服务器选择记录需要添加到的文件位置,然后发送给其他副本。如果和一个副本的联系失败,那么主Chunk服务器会告诉客户端重试,如果重试成功,有些副本会出现追加两次的情况(因为这个副本追加成功两次)。当GFS要去填塞chunk的边缘时,如果追加操作跨越chunk的边缘,那么文件也可能存在空洞。 179 | 180 | ##### 一致性模型 181 | + 目录操作的强一致性 182 | + Master服务器原子的修改元数据,目录操作发生在理想情况 183 | + 如果Master服务器下线,只剩下备份服务器,这时只允许只读操作,同时返回的数据可能不新鲜。 184 | + chunk操作的弱一致性 185 | + 一个失败的突变使的chunk变得不一致 186 | + 主chunk服务器更新chunk文件,但是同步给副本时失败,这时副本的数据就过时了, 187 | + 客户端可能读到的数据不是最新的,当刷新获取新的租约的时候,客户端会获取到新的版本 188 | + 作者主张弱一致性对app而言不是什么大问题 189 | + 大多数文件更新操作只是追加 190 | + 应用程序可以使用添加记录中的uid判断是否重复 191 | + 应用程序也许只是读取到少量的数据(而不是不新鲜的数据) 192 | + 应用程序可以使用临时文件和原子的重命名操作 193 | 194 | ##### 性能 195 | + 巨大的读操作总吞吐量(3个副本,striping ???) 196 | + 125 MB/sec 197 | + 接近网络饱和状态 198 | + 写入不同的文件低于可能的最大值 199 | + 作者怪网络堆栈 200 | + chunk直接的复制操作会引起延迟 201 | + 并发追加同一份文件 202 | + 被服务器存在的最新的chunk所限制 203 | 204 | 205 | ##### 总结 206 | + GFS使用的比较重要的容错技术riz 207 | + 操作日志、检查点 208 | + chunk之间的主备备份(but with consistencies??) 209 | + 我们将会在其他系统中也看到这里 210 | + 哪些在GFS中工作很好 211 | + 巨大的顺序读写操作 212 | + 追加 213 | + 巨大的吞吐量 214 | + 数据之间的容错 215 | + 哪些在GFS中做的不怎么好 216 | + master服务器的容错 217 | + 小文件(master服务器的瓶颈) 218 | + 多个客户端并发的向同一份文件更新操作(除了追加) 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | -------------------------------------------------------------------------------- /Lec03_GFS/Question.md: -------------------------------------------------------------------------------- 1 | ## 阅读GFS论文时,可以尝试思考如下问题 2 | + 来自《大规模分布式存储系统》 3 | 4 | 5 | ##### 1)为什么存储三个副本?而不是两个或者四个? 6 | 7 | ##### 2)Chunk的大小为何选择64MB?这个选择主要基于哪些考虑? 8 | 9 | ##### 3)GFS主要支持追加(append)、改写(overwrite)操作比较少。为什么这样设计?如何基于一个仅支持追加操作的文件系统构建分布式表格系统Bigtable? 10 | 11 | ##### 4)为什么要将数据流和控制流分开?如果不分开,如何实现追加流程? 12 | 13 | ##### 5)GFS有时会出现重复记录或者补零记录(padding),为什么? 14 | 15 | ##### 6)租约(Lease)是什么?在GFS起什么作用?它与心跳(heartbeat)有何区别? 16 | 17 | ##### 7)GFS追加操作过程中如果备副本(Secondary)出现故障,如何处理?如果主副本(Primary)出现故障,如何处理? 18 | 19 | ##### 8)GFS Master需要存储哪些信息?Master数据结构如何设计? 20 | 21 | ##### 9)假设服务一千万个文件,每个文件1GB,Master中存储的元数据大概占用多少内存? 22 | 23 | ##### 10)Master如何实现高可用性? 24 | 25 | ##### 11)负载的影响因素有哪些?如何计算一台机器的负载值? 26 | 27 | ##### 12)Master新建chunk时如何选择ChunkServer?如果新机器上线,负载值特别低,如何避免其他ChunkServer同时往这台机器迁移chunk? 28 | 29 | ##### 13)如果某台ChunkServer报废,GFS如何处理? 30 | 31 | ##### 14)如果ChunkServer下线后过一会重新上线,GFS如何处理? 32 | 33 | ##### 15)如何实现分布式文件系统的快照操作? 34 | 35 | ##### 16)ChunkServer数据结构如何设计? 36 | 37 | ##### 17)磁盘可能出现“位翻转”错误,ChunkServer如何应对? 38 | 39 | ##### 18)ChunkServer重启后可能有一些过期的chunk,Master如何能够发现? -------------------------------------------------------------------------------- /Lec03_GFS/gfs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec03_GFS/gfs.pdf -------------------------------------------------------------------------------- /Lec04_Primary_Backup_Replication/vm-ft.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec04_Primary_Backup_Replication/vm-ft.pdf -------------------------------------------------------------------------------- /Lec05_Fault_Tolerance_Raft/lab2_Raft.md: -------------------------------------------------------------------------------- 1 | ## 6.824 Lab 2: Raft 2 | 3 | [原文地址](https://pdos.csail.mit.edu/6.824/labs/lab-raft.html) 4 | 5 | ### 简介 6 | + 下面的一系列实验,你们会构建一个具有容错功能的kv存储系统,这是一系列实验的第一个。你们将会从实现Raft算法开始,a replicated state machine protocol(一个复制状态机协议?). 在下一个实验中,你们将会在Raft算法之后构建KV服务。然后你们会分散你们的服务以换取更高的性能,最后实现分布式事务操作。 7 | + 一个复杂服务使用Raft协议有利于管理众多备份服务器。正是基于有备份服务器这一点,服务器在副本出错的情况(崩溃、a broken、糟糕的网络环境)也能继续操作。挑战也在这里,就是因为这种错误情况的存在,副本们不是总是保持数据一致性;Raft帮助服务挑选出哪些数据是正确的。 8 | + Raft基本的方法是实现了一个复杂的状态机。Raft将客户端请求组织成一个序列,称为日志,然后保证全部的副本同意日志的内容。每个副本按照日志中的请求顺序的执行,将这些日志里面的情况应用到本机服务。因为全部活着的副本看到一样的日志内容,它们都是顺序的执行一样的请求,因此它们有相同的服务状态。如果一个服务器失败了,之后又恢复了,Raft会小心翼翼地把它的日志更新到最新。只要多数的服务器可以工作,同时它们直接可以相互通信,Raft就可以工作。如果存活的服务不多了,那么Raft毫无进展,但是会等待多数服务存活的情况下继续工作。 9 | + 在这个实验中你们将会带有方法的Go对象实现Raft,这意味着将会作为一个更大服务的一个模块使用。一系列Raft实例通过RPC相互通信维护日志副本。你们的Raft接口将支持无顺序编号的命令,同时这些命令被称为日志节点( log entries)。节点被使用数字索引。带有索引的日志节点最终将会被提交。在那个阶段,也就是你们的Raft应该发生日志节点到更大的服务去执行。 10 | 11 | 注意: 不同Raft实例直接的交互我们只使用RPC。举例,不同的Raft实例直接不允许通过共享Go变量的方式交互。当然你们的实现也不能使用文件。 12 | 13 | + 在这个实验中你们的实现[《extended Raft paper》](https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf)中描述的大多数设计,包括持久化状态,然后当服务器失败重启的时候读取持久化数据。你们不会实现集群关系更改或者日志压缩/快照。 14 | + 你们应该总结[《extended Raft paper》](https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf)和Raft课程讲稿。你们也许会发现[《illustrated Raft guide 》](http://thesecretlivesofdata.com/raft/)有利于高层次的理解Raft的工作。为了更广阔的视角,应该去了解 Paxos, Chubby, Paxos Made Live, Spanner, Zookeeper, Harp, Viewstamped Replication,和[Bolosky et al](http://static.usenix.org/event/nsdi11/tech/full_papers/Bolosky.pdf). 15 | 16 | + 提示: 尽早开始。虽然实现部分代码不是很多,但是让它正常的工作将会是一个挑战。算法和代码都非常狡猾,同时还有很多偏僻的个案需要你们考虑。当一个测试失败的时候,也许比较费解到底是哪个场景让你们的解决方案不正确,怎么去修改你们的解决方案。 17 | + 提示: 在你开始之前阅读理解[《extended Raft paper》](https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf)和Raft课堂讲稿。你们的实现应该贴近论文的描述,因为那也是测试因为的。Figure 2部分的伪代码应该会有所帮助。 18 | 19 | ### 合作政策 20 | + 你们必须编写课程6.824出来我们提供的的全部代码,不能查看其他人的解决方案,也不能查看上一届的代码实现,也不允许查看其他Raft的实现。你们也许会跟其他学习讨论,反射不能查看或者直接复制他们的代码。请不要公开你的代码而被这门课程的学生所使用。比如,不要将你的代码上传到Github。[我这样不传代码应该没事吧]() 21 | 22 | ### 开始 23 | + 使用git pull命令获取最新的实验代码。我们在src/raft目录下面为你们提供框架代码和测试,在src/labrpc目录下面提供了一个简单的类rpc系统。 24 | + 获取代码,然后运行,执行下面的命令。 25 | 26 | $ setup ggo_v1.5 27 | $ cd ~/6.824 28 | $ git pull 29 | ... 30 | $ cd src/raft 31 | $ GOPATH=~/6.824 // 根据实际情况填写 32 | $ export GOPATH 33 | $ go test 34 | Test: initial election ... 35 | --- FAIL: TestInitialElection (5.03s) 36 | config.go:270: expected one leader, got 0 37 | Test: election after network failure ... 38 | --- FAIL: TestReElection (5.03s) 39 | config.go:270: expected one leader, got 0 40 | ... 41 | $ 42 | 43 | + 当你们全部完成的时候,你们的实现应该全部src/raft目录下面的测试: 44 | 45 | $ go test 46 | Test: initial election ... 47 | ... Passed 48 | Test: election after network failure ... 49 | ... Passed 50 | ... 51 | PASS 52 | ok raft 162.413s 53 | 54 | 55 | ### 你们的工作 56 | + 你们通过在raft/raft.go文件里面添加代码实现Raft。在那个文件里面,你们会发现一些框架代码,添加发生和接收RPC请求的例子,添加保存恢复状态的例子代码。 57 | + 你们的实现必须支持下面的接口,这些接口会在测试例子和你们最终的key/value服务器中使用。你们可以在raft.go里面获取更多的细节。 58 | 59 | // create a new Raft server instance: 60 | rf := Make(peers, me, persister, applyCh) 61 | 62 | // start agreement on a new log entry: 63 | rf.Start(command interface{}) (index, term, isleader) 64 | 65 | // ask a Raft for its current term, and whether it thinks it is leader 66 | rf.GetState() (term, isLeader) 67 | 68 | // each time a new entry is committed to the log, each Raft peer 69 | // should send an ApplyMsg to the service (or tester). 70 | type ApplyMsg 71 | 72 | + 一个服务通过调用Make(peers,me,…)创建一个Raft端点。peers参数是通往其他Raft端点处于连接状态下的RPC连接。me参数是自己在端点数组中的索引。Start(command)要求Raft开始将command命令追加到日志备份中。Start()函数马上返回,不等待处理完成。服务期待你们的实现发生一个ApplyMsg结构给每个完全提交的日志,通过applyCh通道。 73 | + 你们的Raft端点应该使用我们提供的librpc包来交换RPC调用。它是仿照Go的rpc库完成的,但是内部使用Go channles而不是socket。raft.go里面包含了一些发生RPC(sendRequestVote())和处理RPC请求(RequestVote())的例子代码。 74 | 75 | + 任务: 76 | 实现领导选举和心跳(empty AppendEntries calls). 这应该是足够一个领导人当选,并在出错的情况下保持领导者。一旦你们让下面的正常工作,你们就可以通过第一二个测试。 77 | 78 | + 提示:在raft.go文件中的Raft结构体中添加任何你想要保存的状态。论文中的Figure 2部分也许会成为很好的参考。你们需要定义一个结构体保存每个日志节点的信息。记住字段的名字,任何你打算通过RPC发生的结构都需要以大写字母开头,结构体里面的字段名字都会通过RPC传递。 79 | + 提示:你们应该先实现Raft的领导选举。补充RequestVoteArgs和RequestVoteReply结构体,然后修改Make()函数创建一个后台的goroutine,当长时间接收不到其他节点的信息时开始选举(通过对外发送RequestVote请求)。为了能让选举工作,你们需要实现RequestVote()请求的处理函数,这样服务器们就可以给其他服务器投票。 80 | + 提示: 为了实现心跳,你们将会定义一个AppendEntries结构(虽然你们可能用不到全部的从参数),有领导人定期发送出来。你们同时也需要实现AppendEntries请求的处理函数,重置选举超时,当有领导选举产生的时候其他服务器就不会想成为领导。 81 | + 提示:确保定时器在不同的Raft端点没有同步。尤其是确保选举的超时不是同时触发的,否则全部的端点都会要求会自己投票,然后没有服务器能够成为领导。 82 | 83 | + 当我们的代码可以完成领导选举之后,我们想要使用Raft保存一致,复杂日志操作。为了做到这些,我们需要通过Start()让服务器接受客户端的操作,然后将操作插入到日志中。在Raft中,只有领导者被允许追加日志,然后通过AppendEntries调用通过其他服务器增加新条目。 84 | 85 | 86 | + 任务: 87 | 88 | 实现领导者和随从者代码达到追加新的日志节点的目标。这里将会包含实现Start(),完成AppendEntries RPC结构体,发送他们,完成AppendEntry RPC调用的处理函数.你们的目标是通过test_test.go文件中的TestBasicAgree()测试。一旦这些工作之后,你们可以在"basic persistence" 测试完成之前,通过其他全部的测试。 89 | 90 | + 提示:这个实验的一大部分是让你们处理各种各样的错误。你们需要实现选举的限制(论文 secion 5.4.1描述)。下面的一系列测试也是处理各种各样的错误的例子,比如一些服务器接收不到一些RPC调用,一些服务器偶尔崩溃重启。 91 | + 提示:当领导者是仅存的服务器的时候,这会导致条目被添加到日志中,全部的服务器需要独立地给他们的本地服务副本提交的新条目(通过它们自己的applyCh)。因此,你们应该保持这两个活动尽可能独立。 92 | + 提示:在没有错误的情况下需要指出Raft应该使用的最小数量的消息,这样让你们的实现最小值。 93 | 94 | + 基于Raft的服务器必须有能力知道自己什么时候退出的,然后如果机器重启可以继续。这就需要Raft保存状态这样就可以经受得住重启。 95 | + 一个“真“的实现会在每一次改变状态的时候将状态值写到磁盘,然后在重启之后读取上次保存的最新的状态值。你们的实现不会使用磁盘;作为替代,你们将会使用可持久化的对象(查看persister.go)保存和恢复状态。不论是谁对可持久化对象调用Make(),如果有的话先持有Raft最近持久化状态。Raft将会从可持久化对象初始化自己的状态,每一次的状态更改的时候使用它保存自己的状态信息。你们应该使用ReadRaftState()和SaveRaftState() 方法分别处理读取和存储的操作。 96 | 97 | + 任务: 98 | 99 | 通过添加代码去序列化那些需要在persist()函数中需要保存的状态达到实现持久化状态的作用,在readPersist()函数中反序列化相同的状态。你们需要觉得在Raft协议栈中你们的完全在哪些关键点需要持久化它们的状态,然后在这些代码插入persist()函数。一旦这些代码完成,你们就可以通过剩下的测试了。你们也许想第一个尝试通过"basic persistence" 测试(go test -run 'TestPersist1$'),然后解决剩下的其他测试。 100 | 101 | + 注意:你们需要讲状态值编码成字节数组,为了将他传递给Persister;在raft.go中包含了在 persist()和readPersist()使用的例子代码 102 | + 注意:为了避免运行期间出现OOM(out of memory),Raft必须定期的忽略老的日志,你们在这个实验中不需要考虑日志的垃圾回收机制,你们会在下一个实验中实现。 103 | + 提示:跟RPC系统为什么只发生一大写字母开始的字段,忽略那些一小写字母开头的字段一样,你们持久化过程中使用的GOB编码也只会保存那些以大写字母开始的自段。这是一个常见的神秘的错误来源,但是Go不会警告你这些错误。 104 | + 提示:为了在实验接近尾声的时候可以通过一些具有挑战性的测试,比如那些被标记”不可靠的“,你们需要优先允许一个跟随者备份领导者的nextIndex(???)而不是一次只备份一个。可以在《the extended Raft 》的7,8页上面看到描述。 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | + 123 | 124 | -------------------------------------------------------------------------------- /Lec05_Fault_Tolerance_Raft/raft-extended.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec05_Fault_Tolerance_Raft/raft-extended.pdf -------------------------------------------------------------------------------- /Lec05_Fault_Tolerance_Raft/raft-zh/.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | -------------------------------------------------------------------------------- /Lec05_Fault_Tolerance_Raft/raft-zh/README.md: -------------------------------------------------------------------------------- 1 | # raft-zh_cn 2 | Raft一致性算法论文的中文翻译 3 | 4 | 英文[论文地址](https://ramcloud.atlassian.net/wiki/download/attachments/6586375/raft.pdf) 5 | 6 | 中文[翻译地址](https://github.com/maemual/raft-zh_cn/blob/master/raft-zh_cn.md) 7 | -------------------------------------------------------------------------------- /Lec05_Fault_Tolerance_Raft/raft.md: -------------------------------------------------------------------------------- 1 | ### 6.824 2014 Lecture 5: Raft(1) 2 | 3 | [《Lecture 5: Raft (1)》原文地址](https://pdos.csail.mit.edu/6.824/notes/l-raft.txt) 4 | 5 | #### 本课程 6 | + 今天:Raft(lab2) 7 | + 下一步计划:在kv服务中使用Raft(lab3) 8 | 9 | #### 整体的主题:使用复制状态机实现容错 (SRM ??) 10 | + [客户端、副本服务器] 11 | + 每一个副本服务器以相同的顺序执行一样的操作 12 | + 他们让副本的执行就行自己执行一样,当如果有失败的情况任何副本可以接管工作,比如:在失败的时候,客户端会切换到其他服务器,GFS和VMware FT都有这种“方式”(flavor) 13 | 14 | #### 典型的“状态机”是怎么样的? 15 | + 应用程序/服务的内部状态、输入命令序列、输出都被复制 16 | + 顺序意味着没有并行性,必须是确定的 17 | + 除了通过输入和输出不能和外界的状态机进行通讯 18 | + 在这里我们将讨论相当高层次的服务 19 | + 例子:配置服务,比如MapReduce或者GFS master 20 | + 例子:键值存储,get/put操作(lab3) 21 | 22 | #### 一个关键的问题:怎么避免脑裂? 23 | + 建设客户端可以连接副本A但是不能连接副本B,客户端可以只跟副本A交互吗? 24 | + 如果B已经崩溃,我们必须在处理的时候离开B,不然我们不能容错! 25 | + 如果B启动,但是网络阻止我们连接副本B,也许我们不应该在处理的时候离开B, 26 | 因为它可能存活同时在为其他客户端提供服务---存在脑裂网络分区的风险 27 | + 在一般情况下我们不能区分脑裂和崩溃 28 | 29 | #### 使用单个主节点的方式可以避免脑裂吗? 30 | + 主服务器计算决定A和B哪个会成为主节点,因为这边只有一个主节点所以它不会出现不同意自己决定的情况。 31 | + 客户端和主节点交互,如果主节点失败了怎么办?这是个单节点故障问题---这种方式不是很好 32 | 33 | #### 我们想使用复制状态机解决如下问题: 34 | + 不存在单节点故障(single point of failure) --可以处理任何一台机器挂掉的情况 35 | + 处理脑裂问题(partition w/o split brain) 36 | 37 | #### The big insight for coping w/ partition: majority vote ?? 38 | + 2f+1个服务器实例,比如3,5 39 | + 必须获取到不少于f+1票之后才能处理,比如:成为主节点,因此即使f个服务实例失败我们还是可以继续工作,这样就避免了单点故障问题。 40 | + 为什么这样做就不存在脑裂了? 41 | + 多数情况下,一个分区可以拥有多数实例 42 | 注意:多数是指2f+1中的多数,不是现在存活的实例中的多数,任何两个交叉的实例(intersect servers in the intersection)在投票的时候只能投一次这是非常重要的事情,我们在后面会看到交叉(intersection)传达着其他信息。 43 | 44 | #### 两个多数复制方案发明在1990年左右 45 | + 这两个方案是Paxos和 View-Stamped Replication 46 | + 在过去的十几年间,这些方案在现实世界中多次使用,Raft是这个想法的一个非常不错的描述。 47 | 48 | #### MapReduce、GFS和VMware FT都从SRM中获益 49 | + MapReduce的master节点并没有复制 50 | + GFS的master虽然被复制,但是没有自动切换到备份 51 | + VMware FT shared disk atomic test-and-set was (apparently) not replicated 52 | 53 | #### Raft实现的状态机复制 54 | + Raft选择一个实例充当领导者 55 | + 客户端通过RPC请求发送Put、Get、Append命令给领导者的键值层(k/v layer) 56 | + 领导者将这些请求全部发送给其他的副本 57 | + 每个追随者往后追加日志 58 | + 目标是含有相同的日志 59 | + 将并发的客户端请求Put(a,1) Put(a,2) 转换成顺序的请求 60 | + 如果多数将它添加到自己的日志,而且是持久化的,那么这个条目被“确认”,多数意味着即使少数服务器失败还是可以正常处理 61 | + 服务器当领导者说条目被确认之后执行一次,键值层将put操作应用到数据库,或者提取得到的结果,然后领导者返回给客户端写的结果 62 | 63 | #### 为什么是日志? 64 | + 为什么服务器不是通过其他方式,比如数据库来保证状态机状态呢? 65 | + 如果追随者错过了一些领导者的命令会怎么样? 66 | + 如果有效的更新到最新?回答:重发错过的命令 67 | + 到目前为止,日志是一些顺序的命令,它相对于状态---开始的状态 + 日志 = 最终的状态 68 | + 日志经常提供一个方便的编号方案,给操作排队同时日志处于隐藏区域直到我们确认命令被提交 69 | 70 | #### Raft的日志总是会被精确的复制吗? 71 | + 不:有些副本也许会滞后 72 | + 不:我们可以看到他们会拥有不同的条目 73 | + 好消息是: 74 | + 如果一个服务器已经执行了给定条目中的命令,那么没有其他服务器会执行这个条目中的其他命令.比如:the servers will agree on the command for each entry.State Machine Safety (Figure 3) 75 | 76 | 77 | #### 实验2:Raft接口 78 | + rf.Start(command) (index, term, isleader) 79 | + 启动新日志条目的协议 80 | + 成功还是失败都会马上返回 81 | + 如果服务器在提交命令之前失去领导者 82 | + index表示要观察的日志条目 83 | + ApplyMsg, with Index and Command 84 | + 当服务(k/v server)需要执行一个新命令的时候,Raft会在通道上面产生一个消息。它同时通知客户端的rpc处理器,所以同时可以给客户端回复。 85 | 86 | + 注意:领导者不需要等待给the AppendEntries RPCs的回复 87 | + 不希望被失败的服务所阻塞 88 | + 所以会在各自的goroutine中发送 89 | + 这样(??)意味着很多RPC请求到达的时候是无序的 90 | 91 | #### Raft的设计主要包括两个部分: 92 | + 领导者选举 93 | + 在故障后确保相同的日志 94 | 95 | #### Raft给领导者编号 96 | + 新的领导者--->新的项目(new term) 97 | + 一个项目至少有一个领导者;在某些情况下没有领导者(a term has at most one leader; might have no leader) 98 | + 编号会帮助服务器选择最新的领导,而不是取得领导 99 | 100 | #### Raft什么时候开启领导者选举? 101 | + 其他服务器在一段时间内感受不到领导者的的时候 102 | + 他们增加本地currentTerm,成为候选人,开始选举 103 | 104 | #### 怎么保证在一个项目(term)中只有一个领导者? 105 | + (Figure 2 RequestVote RPC and Rules for Servers) 106 | + 领导者必须获取到大多数服务器的投票 107 | + 每个服务器可以为每一项投一次,投票给请求的第一个服务器(within Figure 2 rules) 108 | + 最多一个服务器可以获得给定项目的多数票 109 | + 最多一个leader,即使网络分区 110 | + 即使一些服务器发生故障,选举也可以成功 111 | 112 | #### 服务器如何知道选举成功了? 113 | + 赢家获得是多数票 114 | + 其他人看到AppendEntries从胜利者的心跳 115 | 116 | #### 选举可能不成功 117 | + 大于3的候选人分裂投票,没有获得多数票 118 | + even # of live servers,两个候选人每个得到一半,少于大多数服务器可达 119 | 120 | #### 选举失败后会发生什么? 121 | + 另一个超时,增量currentTerm,成为候选 122 | + 较高的项目优先,候选的较旧的项目退出 123 | 124 | #### Raft如何减少分裂投票导致选举失败的机会? 125 | + 每个服务器在开始候选前,延迟一个随机时间 126 | + 延迟的作用是? 127 | + [diagram of times at which servers' delays expire] ??? 128 | + 一台服务器将选择最低的随机延迟 129 | + 希望有足够的时间在下一个超时到期之前选择 130 | + 其他人将看到新领导者的AppendEntries心跳,而不会成为候选人 131 | 132 | #### 如何选择随机延迟范围? 133 | + 时间太短:第二个候选在第一个结束前开始 134 | + 时间太长:系统在引导器故障后闲置太长时间 135 | + 粗略指南: 136 | + 假设完成一个无人选举需要10毫秒,同时我们有5台服务器 137 | + 我们希望延迟被20ms分开,因此随机延迟从0到100 ms 138 | 139 | #### Raft选举遵循一个共同的模式:安全与进展的分离 140 | + 硬机制排除> 1领导,以避免裂脑,但可能不是领导者,或未知的结果 141 | + 软机制试图确保进展,总是安全地在一个新的期限开始新的选举 142 | + 软机制试图避免不必要的选举 143 | + 来自领导者的心跳(提醒服务器不开始选举) 144 | + 超时时间(不要开始选择太快) 145 | + 随机延迟(给一个领导者时间被选举) 146 | 147 | #### 如果老领导不知道一个新的选举产生了怎么办? 148 | + 也许老领导接收不到新领导的信息 149 | + 新领导的产生意味着在大多数服务器已经增加了currentTerm 150 | + 所以老领导(w/ old term)不能获取到大多数AppendEntries 151 | + 所以老领导不会提交或执行任何新的日志条目 152 | + 从而没有分裂脑,尽管分裂 153 | + 但少数可以接受旧服务器的AppendEntries,因此日志可能在旧期限结束时分歧 154 | 155 | #### 现在让我们谈谈在失败后同步日志 156 | #### 我们想要确认什么? 157 | + 也许:每个服务器以相同的顺序执行相同的客户端命令,失败的服务器可能无法执行任何操作 158 | + 因此:if any server executes, then no server executes something 159 | else for that log entry 160 | Figure 3's State Machine Safety 161 | + 只要是单个领导者,这样很容易阻止日志中不一致的情况 162 | 163 | #### 日志怎么样会不一致? 164 | + 日志可能会缺少---在term的结尾处缺少 165 | + 在发送所有AppendEntries之前,term 3的领导者崩溃 166 | 167 | S1: 3 168 | S2: 3 3 169 | S3: 3 3 170 | + 日志可能在同一条目中具有不同的命令! 171 | + 在一系列领导者崩溃后,10 11 12 13 <- log entry # 172 | 173 | S1: 3 174 | S2: 3 3 4 175 | S3: 3 3 5 176 | 177 | #### 的领导者将强制其追随者使用自己的日志;比如 178 | + S3被选为term6的新领导者 179 | + S3发送新命令,entry 13, term 6 AppendEntries, previous entry 12, previous term 5 180 | + S2回复false(AppendEntries step 2) 181 | + S3 将nextIndex[S2]增加到12 182 | + S3 sends AppendEntries, prev entry 11, prev term 3 183 | + S2删除entry 12 (AppendEntries step 3) 184 | + S1的行为类似,但必须再回一个更远 185 | 186 | #### 回滚的结果 187 | + 每个存活的跟踪者删除不同于领导的尾部 188 | + 因此实时追踪者的日志是领导者日志的前缀日志 189 | + 存活的追随者将与领导者保持相同的日志,除非他们可能缺少最近的几个条目 -------------------------------------------------------------------------------- /Lec05_Fault_Tolerance_Raft/寻找一种易于理解的一致性算法.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec05_Fault_Tolerance_Raft/寻找一种易于理解的一致性算法.doc -------------------------------------------------------------------------------- /Lec07_Guest_lecturer_on_Go/gomem.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec07_Guest_lecturer_on_Go/gomem.pdf -------------------------------------------------------------------------------- /Lec08_Zookeeper/zookeeper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec08_Zookeeper/zookeeper.pdf -------------------------------------------------------------------------------- /Lec09_Distributed_Transactions/thor95.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec09_Distributed_Transactions/thor95.pdf -------------------------------------------------------------------------------- /Lec11_FaRM/farm-2015.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec11_FaRM/farm-2015.pdf -------------------------------------------------------------------------------- /Lec13_Disconnected_Operation_Eventual_Consistency/bayou-conflicts.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec13_Disconnected_Operation_Eventual_Consistency/bayou-conflicts.pdf -------------------------------------------------------------------------------- /Lec14_Case Studs_Relaxed_Consistency/cooper-pnuts.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec14_Case Studs_Relaxed_Consistency/cooper-pnuts.pdf -------------------------------------------------------------------------------- /Lec15_Case_Studis_Dynamo/dynamo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec15_Case_Studis_Dynamo/dynamo.pdf -------------------------------------------------------------------------------- /Lec16_Wide-Area Publish_Subscribe/l-wormhole.txt: -------------------------------------------------------------------------------- 1 | 6.824 2016 Lecture 16: 2 | 3 | Wormhole: Reliable Pub-Sub to support Geo-replicated Internet Services, Sharma 4 | et al, 2015. 5 | 6 | why are we reading this paper? 7 | pub-sub common building block in distributed systems 8 | YMB, FAB, Kafka 9 | case study: Facebook's Wormhole 10 | motivated by memcache 11 | 12 | how do web sites scale up with growing load? 13 | a typical story of evolution over time: 14 | 1. one machine, web server, application, DB 15 | DB stores on disk, crash recovery, transactions, SQL 16 | application queries DB, formats, HTML, &c 17 | but the load grows, your PHP application takes too much CPU time 18 | 2. many web FEs, one shared DB 19 | an easy change, since web server + app already separate from storage 20 | FEs are stateless, all sharing (and concurrency control) via DB 21 | but the load grows; add more FEs; soon single DB server is bottleneck 22 | 3. many web FEs, data sharded over cluster of DBs 23 | partition data by key over the DBs 24 | app looks at key (e.g. user), chooses the right DB 25 | good DB parallelism if no data is super-popular 26 | painful -- cross-shard transactions and queries probably don't work 27 | hard to partition too finely 28 | but DBs are slow, even for reads, why not cache read requests? 29 | 4. many web FEs, many caches for reads, many DBs for writes 30 | cost-effective b/c read-heavy and memcached 10x faster than a DB 31 | memcached just an in-memory hash table, very simple 32 | complex b/c DB and memcacheds can get out of sync 33 | (next bottleneck will be DB writes -- hard to solve) 34 | 35 | the big facebook infrastructure picture 36 | lots of users, friend lists, status, posts, likes, photos 37 | fresh/consistent data apparently not critical 38 | because humans are tolerant? 39 | high load: billions of operations per second 40 | that's 10,000x the throughput of one DB server 41 | multiple data centers (at least west and east coast) 42 | each data center -- "region": 43 | "real" data sharded over MySQL DBs 44 | memcached layer (mc) 45 | web servers (clients of memcached) 46 | each data center's DBs contain full replica 47 | west coast is master, others are slaves via MySQL async log replication 48 | 49 | what does FB store in mc? 50 | maybe userID -> name; userID -> friend list; postID -> text; URL -> likes 51 | basically copies of data from DB 52 | 53 | how do FB apps use mc? 54 | read: 55 | v = get(k) (computes hash(k) to choose mc server) 56 | if v is nil { 57 | v = fetch from local DB 58 | set(k, v) 59 | } 60 | write: 61 | v = new value 62 | send k,v to master DB # maybe in remote region 63 | 64 | how to arrange that DB in different regions get updated? 65 | master DB receives all writes (similar to PNUTS) 66 | adds entry to transaction log 67 | replicates transaction log to slaves 68 | 69 | how to arrange that mc in different regions learn about writes 70 | need to invalidate/update mc entry after write 71 | eval section suggests it is important to avoid stale data 72 | option 1: mc in remote region polls its local DB 73 | increases read load on DB 74 | what is the poll interval? 75 | option 2: wormhole pub/sub 76 | 77 | pub/sub 78 | a common building block in distributed systems 79 | facebook use case 80 | subscriber is mc 81 | publisher is DB 82 | subscribers link we a library 83 | update configuration file to express interest in updates 84 | stored in zookeeper 85 | publishers read a configuration file to find subscribers 86 | establishes a flow with each subscriber 87 | send wormhole updates on each flow asynchronously 88 | set of key-value pairs 89 | filters 90 | subscribers tell publishers about a filter 91 | filter is a query over keys in wormhole update 92 | publishers send only updates that pass filter 93 | 94 | delivery semantics 95 | all updates on a flow are delivered in order 96 | publisher maintains per subscriber a "data marker" 97 | sequence number of an update in the transaction log 98 | records what a subscriber has received 99 | publishers ask subscriber periodically for what it has received 100 | i.e., marker is a lower bound what subscriber has received 101 | updates are delivered at least once 102 | publisher persists marker 103 | if publisher fails start sending from last marker 104 | => subscribers may receive update twice 105 | Q: how do subscribers deal with an update delivered several times 106 | A: no problem for caches 107 | A: application can do duplicate filtering 108 | Q: can an update be never delivered? 109 | A: yes, because transaction log may have been truncated 110 | data in log is present for 1-2 days 111 | 112 | Q: why does subscriber not keep track of marker? 113 | A: FB wants subscribers to be stateless 114 | 115 | Q: why are markers periodically acked by subscribers 116 | A: Expensive to ack each update 117 | A: Uses TCP for delivery 118 | They don't have to worry about packet loss 119 | 120 | Where to store the marker? 121 | SCRD: publisher stores in local persistent storage 122 | if storage is unavailable, cannot fail over to new publisher 123 | caches will be stale 124 | if storage fails, lose marker 125 | opportunity: storage/log is often replicated 126 | MCRD: publishers stores marker in Zookeeper 127 | if publisher fails, another publisher can take over 128 | read last marker from zookeeper 129 | implementation challenge: format of marker 130 | replicas of same log have different binary format 131 | solution: "logical" positions 132 | Q: isn't it expensive to update marker in Zookeeper? 133 | A: yes, but only done periodically 134 | 135 | Implementation challenge: many different DBs 136 | don't want to modify any of them to support flows 137 | idea: publishers read transaction log of DB 138 | read library to read different log formats 139 | convert updates in standard format (Wormhole update) 140 | one key is a shard identifier 141 | 142 | Optimization 1: caravan 143 | design 1: one reader per flow 144 | puts too much load on DB 145 | in steady state, all readers read same updates 146 | design 2: one reader for all flows 147 | bad performance on recovery 148 | on recovery each flow may have to read from different point in log 149 | solution: one reader per cluster of flows ("caravan") 150 | in practice, number of caravans is small (~1) 151 | that one caravan is called the "lead caravan" 152 | 153 | Optimization 2: load balancing flows 154 | a single application has several subscribers 155 | application data is sharded too 156 | N DB shards 157 | M application machines 158 | -> an MC machine may have more than 1 subscriber 159 | e.g., when it stores 2 DB shards 160 | Q: on creation of a new flow which app machine is the subscriber? 161 | two plans: 162 | - weighted random selection 163 | - subscribers use zookeeper 164 | 165 | Optimization 3: one TCP connection 166 | multiplexes several flows for same subscriber 167 | subscriber may have several shards 168 | one flow for each shard 169 | 170 | Deployment 171 | in use at FB 172 | readers for several DBs (MySQL, HDFS, ...) 173 | 35 Gbyte/s of updates per day 174 | # caravans = 1.06 175 | 176 | Performance 177 | Publisher bottleneck: 350 Mbyte/s 178 | Subscriber bottleneck: 600,000 updates/s 179 | Enough for production workloads 180 | 181 | References 182 | Kafka (http://research.microsoft.com/en-us/um/people/srikanth/netdb11/netdb11papers/netdb11-final12.pdf) 183 | 184 | -------------------------------------------------------------------------------- /Lec16_Wide-Area Publish_Subscribe/wormhole.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec16_Wide-Area Publish_Subscribe/wormhole.pdf -------------------------------------------------------------------------------- /Lec17_Measuring_Consistency/fb-consistency.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec17_Measuring_Consistency/fb-consistency.pdf -------------------------------------------------------------------------------- /Lec18_Case_Studies_Spark/zaharia-spark.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec18_Case_Studies_Spark/zaharia-spark.pdf -------------------------------------------------------------------------------- /Lec19_Cluster_Management/borg.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec19_Cluster_Management/borg.pdf -------------------------------------------------------------------------------- /Lec20_Peer-to-peer_Trackerless_Bittorrent_and_DHTs/bep_0005_files/bep.css: -------------------------------------------------------------------------------- 1 | /* 2 | BitTorrent.org BEP CSS 3 | */ 4 | 5 | body { 6 | margin:0; 7 | padding:0; 8 | color:#333; 9 | background-color:#666; 10 | font:10px/1.5em "Trebuchet MS",sans-serif; 11 | text-align:center; 12 | } 13 | 14 | #upper { 15 | margin:0; 16 | padding:60px 0; 17 | background:#fff url("../img/bg.gif") repeat-x 0 0; 18 | } 19 | 20 | #wrap { 21 | margin:0 auto; 22 | width:700px; 23 | text-align:left; 24 | } 25 | 26 | #header { 27 | margin:0; 28 | width:100%; 29 | border-bottom:2px solid #eee; 30 | } 31 | 32 | #nav { 33 | margin:0 0 40px 0; 34 | padding:.5em 0; 35 | text-align:right; 36 | } 37 | 38 | #nav ul { 39 | list-style:none; 40 | padding:0; 41 | margin:0; 42 | } 43 | 44 | #nav li { 45 | margin-left:1em; 46 | padding:0; 47 | display:inline; 48 | font-size:1.5em; 49 | font-weight:bold; 50 | } 51 | 52 | #nav li span { 53 | color:#f60; 54 | } 55 | 56 | #home-l ul, 57 | #home-c ul, 58 | #home-r ul { 59 | margin:0 0 1.5em 0; 60 | padding:0 0 10px 2px; 61 | } 62 | 63 | #home-l li, 64 | #home-c li, 65 | #home-r li { 66 | list-style-type:none; 67 | margin:1em 0; 68 | padding:0 0 0 25px; 69 | font-size:1.5em; 70 | line-height:1.2em; 71 | background:url("../img/li.gif") no-repeat 0 0; 72 | } 73 | 74 | #first p { 75 | font-size:3pt; 76 | line-height:80pt; 77 | } 78 | 79 | 80 | #second li, 81 | #second dt { 82 | margin:.4em 0; 83 | font-size:10pt; 84 | line-height:12pt; 85 | } 86 | 87 | #second dd { 88 | margin-bottom:7px; 89 | font-size:1.2em; 90 | line-height:1.5em; 91 | } 92 | 93 | #intro { 94 | margin:0 0 2.5em 0; 95 | padding:0 3em; 96 | font-size:2.5em; 97 | line-height:1.2em; 98 | color:#000; 99 | } 100 | 101 | #footer { 102 | margin:0; 103 | padding:0; 104 | height:60px; 105 | background:url("../img/fbg.gif") repeat-x 0 0; 106 | } 107 | 108 | #home-l, 109 | #home-c, 110 | #home-r { 111 | margin:0 10px; 112 | width:210px; 113 | float:left; 114 | } 115 | 116 | #blog { 117 | padding:0 7em; 118 | } 119 | 120 | .field-name, 121 | .docinfo-name { 122 | margin:0; 123 | font-size: 1.2em; 124 | text-align: right; 125 | vertical-align: middle; 126 | } 127 | 128 | h1 { 129 | margin-top:1em; 130 | margin-bottom:.1em; 131 | margin-right: 0; 132 | margin-left: 0; 133 | font-size:2em; 134 | font-weight:normal; 135 | letter-spacing:-1px; 136 | } 137 | 138 | h1 span { 139 | font-weight:bold; 140 | color:#09f; 141 | } 142 | 143 | h2 { 144 | margin: .5em 0 0 0;; 145 | padding:0 0 .5em 0; 146 | font-size:1.6em; 147 | //border-bottom:2px solid #ffc; 148 | } 149 | 150 | #second h2, 151 | #blog h2 { 152 | margin:0; 153 | padding:0 0 .5em 0; 154 | font-size:1.6em; 155 | font-weight:normal; 156 | border:0; 157 | } 158 | 159 | h3 { 160 | font-size:1.3em; 161 | line-height:1.0em; 162 | } 163 | 164 | h4 { 165 | font-size:1.1em; 166 | text-transform:uppercase; 167 | } 168 | 169 | h5 { 170 | font-size:1.1em; 171 | color:#666; 172 | } 173 | 174 | #second p, 175 | #blog p { 176 | font-size:1.3em; 177 | line-height:1.5em; 178 | } 179 | 180 | #footer p { 181 | margin:0; 182 | padding:2em 0; 183 | color:#fff; 184 | } 185 | 186 | table { 187 | border:0; 188 | border-style:2; 189 | margin-left:4em; 190 | //margin-top:4em; 191 | //margin-bottom:4em; 192 | } 193 | 194 | td { 195 | font-size:9pt; 196 | padding:10px; 197 | } 198 | 199 | td.shade { 200 | background-color:#eee; 201 | } 202 | 203 | #blog .post { 204 | padding:5px 0; 205 | border-top:2px solid #ffc; 206 | font-size:1em; 207 | color:#999; 208 | } 209 | 210 | .clear:after { 211 | content:"."; 212 | display:block; 213 | height:0; 214 | font-size:0; 215 | clear:both; 216 | visibility:hidden; 217 | } 218 | 219 | .clear { 220 | display:inline-table; 221 | } 222 | 223 | /* \*/ 224 | * html .clear { 225 | height:1%; 226 | } 227 | 228 | .clear { 229 | display:block; 230 | } 231 | /* */ 232 | 233 | .img-r { 234 | float:right; 235 | width:300px; 236 | padding-bottom:10px; 237 | margin:0 0 0 20px; 238 | } 239 | 240 | .img-l { 241 | float:left; 242 | width:300px; 243 | padding-bottom:10px; 244 | margin:0 20px 0 0; 245 | } 246 | 247 | #second .img-l p, 248 | #second .img-r p { 249 | color:#09f; 250 | font-size:1.5em; 251 | text-align:center; 252 | } 253 | 254 | hr { 255 | display:none; 256 | } 257 | 258 | code { 259 | color:#963; 260 | } 261 | 262 | /* links */ 263 | 264 | a { 265 | color:#345; 266 | text-decoration:none; 267 | border-bottom:1px solid #eee; 268 | } 269 | 270 | a:visited { 271 | color:#678; 272 | } 273 | 274 | #nav a:visited { 275 | color:#345; 276 | } 277 | 278 | a:hover, 279 | #nav a:hover { 280 | color:#f60; 281 | } 282 | 283 | h1 a, 284 | h1 a:visited, 285 | h1 a:hover { 286 | color:#666; 287 | border:0; 288 | } 289 | 290 | a img { 291 | border:0; 292 | } 293 | 294 | .literal-block { 295 | margin-left:4em; 296 | } 297 | 298 | div.figure { 299 | margin:2em 4em 1em 4em; 300 | text-align: center; 301 | font-size: 7pt; 302 | } 303 | 304 | p.caption { 305 | text-align: left; 306 | } 307 | 308 | -------------------------------------------------------------------------------- /Lec20_Peer-to-peer_Trackerless_Bittorrent_and_DHTs/stoica-chord.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec20_Peer-to-peer_Trackerless_Bittorrent_and_DHTs/stoica-chord.pdf -------------------------------------------------------------------------------- /Lec21_Peer-to-peer_Bitcoin/bitcoin.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec21_Peer-to-peer_Bitcoin/bitcoin.pdf -------------------------------------------------------------------------------- /Lec23_Project_demos/katabi-analogicfs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec23_Project_demos/katabi-analogicfs.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Distributed-Systems 2 | MIT课程[《Distributed Systems 》](http://nil.csail.mit.edu/6.824/2016/schedule.html)学习和翻译 3 | + 翻译和完成课程的实验代码,之后在代码里添加了注释说明,去掉了代码实现 4 | + 整理课程,编写简单的分布式入门教程 5 | 6 | 7 | #### 资料推荐 8 | + [《大规模分布式存储系统》](https://book.douban.com/subject/25723658/) 9 | + [《分布式系统原理介绍》](http://pan.baidu.com/s/1geU1XAz) 10 | + [awesome-distributed-systems](https://github.com/kevinxhuang/awesome-distributed-systems) 11 | + [一名分布式存储工程师的技能树是怎样的?](https://www.zhihu.com/question/43687427/answer/96306564) 12 | + [袖珍分布式系统](http://www.jianshu.com/c/0cf64976a481) 13 | --------------------------------------------------------------------------------