├── 6.824
    ├── .gitignore
    ├── Makefile
    └── src
    │   ├── diskv
    │       ├── client.go
    │       ├── common.go
    │       ├── server.go
    │       └── test_test.go
    │   ├── kvpaxos
    │       ├── client.go
    │       ├── common.go
    │       ├── server.go
    │       └── test_test.go
    │   ├── kvraft
    │       ├── client.go
    │       ├── common.go
    │       ├── config.go
    │       ├── server.go
    │       └── test_test.go
    │   ├── labrpc
    │       ├── labrpc.go
    │       └── test_test.go
    │   ├── lockservice
    │       ├── client.go
    │       ├── common.go
    │       ├── server.go
    │       └── test_test.go
    │   ├── main
    │       ├── diskvd
    │       ├── diskvd.go
    │       ├── ii.go
    │       ├── lockc.go
    │       ├── lockd.go
    │       ├── mr-challenge.txt
    │       ├── mr-testout.txt
    │       ├── pbc.go
    │       ├── pbd.go
    │       ├── pg-being_ernest.txt
    │       ├── pg-dorian_gray.txt
    │       ├── pg-dracula.txt
    │       ├── pg-emma.txt
    │       ├── pg-frankenstein.txt
    │       ├── pg-great_expectations.txt
    │       ├── pg-grimm.txt
    │       ├── pg-huckleberry_finn.txt
    │       ├── pg-les_miserables.txt
    │       ├── pg-metamorphosis.txt
    │       ├── pg-moby_dick.txt
    │       ├── pg-sherlock_holmes.txt
    │       ├── pg-tale_of_two_cities.txt
    │       ├── pg-tom_sawyer.txt
    │       ├── pg-ulysses.txt
    │       ├── pg-war_and_peace.txt
    │       ├── test-ii.sh
    │       ├── test-mr.sh
    │       ├── test-wc.sh
    │       ├── viewd.go
    │       └── wc.go
    │   ├── mapreduce
    │       ├── common.go
    │       ├── common_map.go
    │       ├── common_reduce.go
    │       ├── common_rpc.go
    │       ├── master.go
    │       ├── master_rpc.go
    │       ├── master_splitmerge.go
    │       ├── readme.go
    │       ├── schedule.go
    │       ├── test_test.go
    │       └── worker.go
    │   ├── paxos-shardkv
    │       ├── client.go
    │       ├── common.go
    │       ├── server.go
    │       └── test_test.go
    │   ├── paxos-shardmaster
    │       ├── client.go
    │       ├── common.go
    │       ├── server.go
    │       └── test_test.go
    │   ├── paxos
    │       ├── paxos.go
    │       └── test_test.go
    │   ├── pbservice
    │       ├── client.go
    │       ├── common.go
    │       ├── server.go
    │       └── test_test.go
    │   ├── raft
    │       ├── config.go
    │       ├── persister.go
    │       ├── raft.go
    │       ├── test_test.go
    │       └── util.go
    │   ├── shardkv
    │       ├── client.go
    │       ├── common.go
    │       ├── config.go
    │       ├── server.go
    │       └── test_test.go
    │   ├── shardmaster
    │       ├── client.go
    │       ├── common.go
    │       ├── config.go
    │       ├── server.go
    │       └── test_test.go
    │   └── viewservice
    │       ├── client.go
    │       ├── common.go
    │       ├── server.go
    │       └── test_test.go
├── Lec01_Introduction
    ├── l01.md
    ├── l01.txt
    ├── lab1.md
    └── mapreduce.pdf
├── Lec02_RPC_and_Threads
    ├── l-rpc.md
    └── l-rpc.txt
├── Lec03_GFS
    ├── Bolosky.pdf
    ├── GFS.md
    ├── Question.md
    ├── gfs.pdf
    └── l-gfs.txt
├── Lec04_Primary_Backup_Replication
    ├── l-vm-ft.txt
    └── vm-ft.pdf
├── Lec05_Fault_Tolerance_Raft
    ├── l-raft.txt
    ├── lab2_Raft.md
    ├── raft-extended.pdf
    ├── raft-zh
    │   ├── .gitignore
    │   ├── README.md
    │   └── raft-zh_cn.md
    ├── raft.md
    └── 寻找一种易于理解的一致性算法.doc
├── Lec06_Fault_Tolerance_Raft
    └── l-raft2.txt
├── Lec07_Guest_lecturer_on_Go
    └── gomem.pdf
├── Lec08_Zookeeper
    ├── l-zookeeper.txt
    └── zookeeper.pdf
├── Lec09_Distributed_Transactions
    ├── l-2pc.txt
    └── thor95.pdf
├── Lec10_Optimistic_Concurrency_Control
    └── l-occ.txt
├── Lec11_FaRM
    ├── farm-2015.pdf
    └── l-farm.txt
├── Lec13_Disconnected_Operation_Eventual_Consistency
    ├── bayou-conflicts.pdf
    └── l-bayou.txt
├── Lec14_Case Studs_Relaxed_Consistency
    ├── cooper-pnuts.pdf
    └── l-pnuts.txt
├── Lec15_Case_Studis_Dynamo
    ├── dynamo.pdf
    └── l-dynamo.txt
├── Lec16_Wide-Area Publish_Subscribe
    ├── l-wormhole.txt
    └── wormhole.pdf
├── Lec17_Measuring_Consistency
    ├── fb-consistency.pdf
    └── l-existential.txt
├── Lec18_Case_Studies_Spark
    ├── l-spark.txt
    └── zaharia-spark.pdf
├── Lec19_Cluster_Management
    ├── borg.pdf
    └── l-borg.txt
├── Lec20_Peer-to-peer_Trackerless_Bittorrent_and_DHTs
    ├── bep_0005.html
    ├── bep_0005_files
    │   └── bep.css
    ├── l-dht.txt
    └── stoica-chord.pdf
├── Lec21_Peer-to-peer_Bitcoin
    ├── bitcoin.pdf
    └── l-bitcoin.txt
├── Lec23_Project_demos
    └── katabi-analogicfs.pdf
└── README.md


/6.824/.gitignore:
--------------------------------------------------------------------------------
1 | pkg/
2 | api.key
3 | *-handin.tar.gz
4 | 


--------------------------------------------------------------------------------
/6.824/Makefile:
--------------------------------------------------------------------------------
 1 | # This is the Makefile helping you submit the labs.  
 2 | # Just create 6.824/api.key with your API key in it, 
 3 | # and submit your lab with the following command: 
 4 | #     $ make [lab1|lab2|lab3a|lab3b|lab4a|lab4b|lab5]
 5 | 
 6 | LABS=" lab1 lab2 lab3a lab3b lab4a lab4b lab5 "
 7 | 
 8 | %:
 9 | 	@echo "Preparing $@-handin.tar.gz"
10 | 	@echo "Checking for committed temporary files..."
11 | 	@if git ls-files | grep -E 'mrtmp|mrinput' > /dev/null; then \
12 | 		echo "" ; \
13 | 		echo "OBS! You have committed some large temporary files:" ; \
14 | 		echo "" ; \
15 | 		git ls-files | grep -E 'mrtmp|mrinput' | sed 's/^/\t/' ; \
16 | 		echo "" ; \
17 | 		echo "Follow the instructions at http://stackoverflow.com/a/308684/472927" ; \
18 | 		echo "to remove them, and then run make again." ; \
19 | 		echo "" ; \
20 | 		exit 1 ; \
21 | 	fi
22 | 	@if echo $(LABS) | grep -q " $@ " ; then \
23 | 		echo "Tarring up your submission..." ; \
24 | 		tar cvzf $@-handin.tar.gz \
25 | 			"--exclude=src/main/pg-*.txt" \
26 | 			"--exclude=src/main/diskvd" \
27 | 			"--exclude=src/mapreduce/824-mrinput-*.txt" \
28 | 			"--exclude=mrtmp.*" \
29 | 			"--exclude=src/main/diff.out" \
30 | 			Makefile src; \
31 | 		if ! test -e api.key ; then \
32 | 			echo "Missing $(PWD)/api.key. Please create the file with your key in it or submit the $@-handin.tar.gz via the web interface."; \
33 | 		else \
34 | 			echo "Are you sure you want to submit $@? Enter 'yes' to continue:"; \
35 | 			read line; \
36 | 			if test "$$line" != "yes" ; then echo "Giving up submission"; exit; fi; \
37 | 			if test `stat -c "%s" "$@-handin.tar.gz" 2>/dev/null || stat -f "%z" "$@-handin.tar.gz"` -ge 20971520 ; then echo "File exceeds 20MB."; exit; fi; \
38 | 			mv api.key api.key.fix ; \
39 | 			cat api.key.fix | tr -d '\n' > api.key ; \
40 | 			rm api.key.fix ; \
41 | 			curl -F file=@$@-handin.tar.gz -F "key=<api.key" http://6824.scripts.mit.edu/submit/handin.py/upload; \
42 | 		fi; \
43 | 	else \
44 | 		echo "Bad target $@. Usage: make [$(LABS)]"; \
45 | 	fi
46 | 


--------------------------------------------------------------------------------
/6.824/src/diskv/client.go:
--------------------------------------------------------------------------------
  1 | package diskv
  2 | 
  3 | import "shardmaster"
  4 | import "net/rpc"
  5 | import "time"
  6 | import "sync"
  7 | import "fmt"
  8 | import "crypto/rand"
  9 | import "math/big"
 10 | 
 11 | type Clerk struct {
 12 | 	mu     sync.Mutex // one RPC at a time
 13 | 	sm     *shardmaster.Clerk
 14 | 	config shardmaster.Config
 15 | 	// You'll have to modify Clerk.
 16 | }
 17 | 
 18 | func nrand() int64 {
 19 | 	max := big.NewInt(int64(1) << 62)
 20 | 	bigx, _ := rand.Int(rand.Reader, max)
 21 | 	x := bigx.Int64()
 22 | 	return x
 23 | }
 24 | 
 25 | func MakeClerk(shardmasters []string) *Clerk {
 26 | 	ck := new(Clerk)
 27 | 	ck.sm = shardmaster.MakeClerk(shardmasters)
 28 | 	// You'll have to modify MakeClerk.
 29 | 	return ck
 30 | }
 31 | 
 32 | //
 33 | // call() sends an RPC to the rpcname handler on server srv
 34 | // with arguments args, waits for the reply, and leaves the
 35 | // reply in reply. the reply argument should be a pointer
 36 | // to a reply structure.
 37 | //
 38 | // the return value is true if the server responded, and false
 39 | // if call() was not able to contact the server. in particular,
 40 | // the reply's contents are only valid if call() returned true.
 41 | //
 42 | // you should assume that call() will return an
 43 | // error after a while if the server is dead.
 44 | // don't provide your own time-out mechanism.
 45 | //
 46 | // please use call() to send all RPCs, in client.go and server.go.
 47 | // please don't change this function.
 48 | //
 49 | func call(srv string, rpcname string,
 50 | 	args interface{}, reply interface{}) bool {
 51 | 	c, errx := rpc.Dial("unix", srv)
 52 | 	if errx != nil {
 53 | 		return false
 54 | 	}
 55 | 	defer c.Close()
 56 | 
 57 | 	err := c.Call(rpcname, args, reply)
 58 | 	if err == nil {
 59 | 		return true
 60 | 	}
 61 | 
 62 | 	fmt.Println(err)
 63 | 	return false
 64 | }
 65 | 
 66 | //
 67 | // which shard is a key in?
 68 | // please use this function,
 69 | // and please do not change it.
 70 | //
 71 | func key2shard(key string) int {
 72 | 	shard := 0
 73 | 	if len(key) > 0 {
 74 | 		shard = int(key[0])
 75 | 	}
 76 | 	shard %= shardmaster.NShards
 77 | 	return shard
 78 | }
 79 | 
 80 | //
 81 | // fetch the current value for a key.
 82 | // returns "" if the key does not exist.
 83 | // keeps trying forever in the face of all other errors.
 84 | //
 85 | func (ck *Clerk) Get(key string) string {
 86 | 	ck.mu.Lock()
 87 | 	defer ck.mu.Unlock()
 88 | 
 89 | 	// You'll have to modify Get().
 90 | 
 91 | 	for {
 92 | 		shard := key2shard(key)
 93 | 
 94 | 		gid := ck.config.Shards[shard]
 95 | 
 96 | 		servers, ok := ck.config.Groups[gid]
 97 | 
 98 | 		if ok {
 99 | 			// try each server in the shard's replication group.
100 | 			for _, srv := range servers {
101 | 				args := &GetArgs{}
102 | 				args.Key = key
103 | 				var reply GetReply
104 | 				ok := call(srv, "DisKV.Get", args, &reply)
105 | 				if ok && (reply.Err == OK || reply.Err == ErrNoKey) {
106 | 					return reply.Value
107 | 				}
108 | 				if ok && (reply.Err == ErrWrongGroup) {
109 | 					break
110 | 				}
111 | 			}
112 | 		}
113 | 
114 | 		time.Sleep(100 * time.Millisecond)
115 | 
116 | 		// ask master for a new configuration.
117 | 		ck.config = ck.sm.Query(-1)
118 | 	}
119 | }
120 | 
121 | // send a Put or Append request.
122 | func (ck *Clerk) PutAppend(key string, value string, op string) {
123 | 	ck.mu.Lock()
124 | 	defer ck.mu.Unlock()
125 | 
126 | 	// You'll have to modify PutAppend().
127 | 
128 | 	for {
129 | 		shard := key2shard(key)
130 | 
131 | 		gid := ck.config.Shards[shard]
132 | 
133 | 		servers, ok := ck.config.Groups[gid]
134 | 
135 | 		if ok {
136 | 			// try each server in the shard's replication group.
137 | 			for _, srv := range servers {
138 | 				args := &PutAppendArgs{}
139 | 				args.Key = key
140 | 				args.Value = value
141 | 				args.Op = op
142 | 				var reply PutAppendReply
143 | 				ok := call(srv, "DisKV.PutAppend", args, &reply)
144 | 				if ok && reply.Err == OK {
145 | 					return
146 | 				}
147 | 				if ok && (reply.Err == ErrWrongGroup) {
148 | 					break
149 | 				}
150 | 			}
151 | 		}
152 | 
153 | 		time.Sleep(100 * time.Millisecond)
154 | 
155 | 		// ask master for a new configuration.
156 | 		ck.config = ck.sm.Query(-1)
157 | 	}
158 | }
159 | 
160 | func (ck *Clerk) Put(key string, value string) {
161 | 	ck.PutAppend(key, value, "Put")
162 | }
163 | func (ck *Clerk) Append(key string, value string) {
164 | 	ck.PutAppend(key, value, "Append")
165 | }
166 | 


--------------------------------------------------------------------------------
/6.824/src/diskv/common.go:
--------------------------------------------------------------------------------
 1 | package diskv
 2 | 
 3 | //
 4 | // Sharded key/value server.
 5 | // Lots of replica groups, each running op-at-a-time paxos.
 6 | // Shardmaster decides which group serves each shard.
 7 | // Shardmaster may change shard assignment from time to time.
 8 | //
 9 | // You will have to modify these definitions.
10 | //
11 | 
12 | const (
13 | 	OK            = "OK"
14 | 	ErrNoKey      = "ErrNoKey"
15 | 	ErrWrongGroup = "ErrWrongGroup"
16 | )
17 | 
18 | type Err string
19 | 
20 | type PutAppendArgs struct {
21 | 	Key   string
22 | 	Value string
23 | 	Op    string // "Put" or "Append"
24 | 	// You'll have to add definitions here.
25 | 	// Field names must start with capital letters,
26 | 	// otherwise RPC will break.
27 | 
28 | }
29 | 
30 | type PutAppendReply struct {
31 | 	Err Err
32 | }
33 | 
34 | type GetArgs struct {
35 | 	Key string
36 | 	// You'll have to add definitions here.
37 | }
38 | 
39 | type GetReply struct {
40 | 	Err   Err
41 | 	Value string
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/6.824/src/diskv/server.go:
--------------------------------------------------------------------------------
  1 | package diskv
  2 | 
  3 | import "net"
  4 | import "fmt"
  5 | import "net/rpc"
  6 | import "log"
  7 | import "time"
  8 | import "paxos"
  9 | import "sync"
 10 | import "sync/atomic"
 11 | import "os"
 12 | import "syscall"
 13 | import "encoding/gob"
 14 | import "encoding/base32"
 15 | import "math/rand"
 16 | import "shardmaster"
 17 | import "io/ioutil"
 18 | import "strconv"
 19 | 
 20 | 
 21 | const Debug = 0
 22 | 
 23 | func DPrintf(format string, a ...interface{}) (n int, err error) {
 24 | 	if Debug > 0 {
 25 | 		log.Printf(format, a...)
 26 | 	}
 27 | 	return
 28 | }
 29 | 
 30 | 
 31 | type Op struct {
 32 | 	// Your definitions here.
 33 | }
 34 | 
 35 | 
 36 | type DisKV struct {
 37 | 	mu         sync.Mutex
 38 | 	l          net.Listener
 39 | 	me         int
 40 | 	dead       int32 // for testing
 41 | 	unreliable int32 // for testing
 42 | 	sm         *shardmaster.Clerk
 43 | 	px         *paxos.Paxos
 44 | 	dir        string // each replica has its own data directory
 45 | 
 46 | 	gid int64 // my replica group ID
 47 | 
 48 | 	// Your definitions here.
 49 | }
 50 | 
 51 | //
 52 | // these are handy functions that might be useful
 53 | // for reading and writing key/value files, and
 54 | // for reading and writing entire shards.
 55 | // puts the key files for each shard in a separate
 56 | // directory.
 57 | //
 58 | 
 59 | func (kv *DisKV) shardDir(shard int) string {
 60 | 	d := kv.dir + "/shard-" + strconv.Itoa(shard) + "/"
 61 | 	// create directory if needed.
 62 | 	_, err := os.Stat(d)
 63 | 	if err != nil {
 64 | 		if err := os.Mkdir(d, 0777); err != nil {
 65 | 			log.Fatalf("Mkdir(%v): %v", d, err)
 66 | 		}
 67 | 	}
 68 | 	return d
 69 | }
 70 | 
 71 | // cannot use keys in file names directly, since
 72 | // they might contain troublesome characters like /.
 73 | // base32-encode the key to get a file name.
 74 | // base32 rather than base64 b/c Mac has case-insensitive
 75 | // file names.
 76 | func (kv *DisKV) encodeKey(key string) string {
 77 | 	return base32.StdEncoding.EncodeToString([]byte(key))
 78 | }
 79 | 
 80 | func (kv *DisKV) decodeKey(filename string) (string, error) {
 81 | 	key, err := base32.StdEncoding.DecodeString(filename)
 82 | 	return string(key), err
 83 | }
 84 | 
 85 | // read the content of a key's file.
 86 | func (kv *DisKV) fileGet(shard int, key string) (string, error) {
 87 | 	fullname := kv.shardDir(shard) + "/key-" + kv.encodeKey(key)
 88 | 	content, err := ioutil.ReadFile(fullname)
 89 | 	return string(content), err
 90 | }
 91 | 
 92 | // replace the content of a key's file.
 93 | // uses rename() to make the replacement atomic with
 94 | // respect to crashes.
 95 | func (kv *DisKV) filePut(shard int, key string, content string) error {
 96 | 	fullname := kv.shardDir(shard) + "/key-" + kv.encodeKey(key)
 97 | 	tempname := kv.shardDir(shard) + "/temp-" + kv.encodeKey(key)
 98 | 	if err := ioutil.WriteFile(tempname, []byte(content), 0666); err != nil {
 99 | 		return err
100 | 	}
101 | 	if err := os.Rename(tempname, fullname); err != nil {
102 | 		return err
103 | 	}
104 | 	return nil
105 | }
106 | 
107 | // return content of every key file in a given shard.
108 | func (kv *DisKV) fileReadShard(shard int) map[string]string {
109 | 	m := map[string]string{}
110 | 	d := kv.shardDir(shard)
111 | 	files, err := ioutil.ReadDir(d)
112 | 	if err != nil {
113 | 		log.Fatalf("fileReadShard could not read %v: %v", d, err)
114 | 	}
115 | 	for _, fi := range files {
116 | 		n1 := fi.Name()
117 | 		if n1[0:4] == "key-" {
118 | 			key, err := kv.decodeKey(n1[4:])
119 | 			if err != nil {
120 | 				log.Fatalf("fileReadShard bad file name %v: %v", n1, err)
121 | 			}
122 | 			content, err := kv.fileGet(shard, key)
123 | 			if err != nil {
124 | 				log.Fatalf("fileReadShard fileGet failed for %v: %v", key, err)
125 | 			}
126 | 			m[key] = content
127 | 		}
128 | 	}
129 | 	return m
130 | }
131 | 
132 | // replace an entire shard directory.
133 | func (kv *DisKV) fileReplaceShard(shard int, m map[string]string) {
134 | 	d := kv.shardDir(shard)
135 | 	os.RemoveAll(d) // remove all existing files from shard.
136 | 	for k, v := range m {
137 | 		kv.filePut(shard, k, v)
138 | 	}
139 | }
140 | 
141 | 
142 | func (kv *DisKV) Get(args *GetArgs, reply *GetReply) error {
143 | 	// Your code here.
144 | 	return nil
145 | }
146 | 
147 | // RPC handler for client Put and Append requests
148 | func (kv *DisKV) PutAppend(args *PutAppendArgs, reply *PutAppendReply) error {
149 | 	// Your code here.
150 | 	return nil
151 | }
152 | 
153 | //
154 | // Ask the shardmaster if there's a new configuration;
155 | // if so, re-configure.
156 | //
157 | func (kv *DisKV) tick() {
158 | 	// Your code here.
159 | }
160 | 
161 | // tell the server to shut itself down.
162 | // please don't change these two functions.
163 | func (kv *DisKV) kill() {
164 | 	atomic.StoreInt32(&kv.dead, 1)
165 | 	kv.l.Close()
166 | 	kv.px.Kill()
167 | }
168 | 
169 | // call this to find out if the server is dead.
170 | func (kv *DisKV) isdead() bool {
171 | 	return atomic.LoadInt32(&kv.dead) != 0
172 | }
173 | 
174 | // please do not change these two functions.
175 | func (kv *DisKV) Setunreliable(what bool) {
176 | 	if what {
177 | 		atomic.StoreInt32(&kv.unreliable, 1)
178 | 	} else {
179 | 		atomic.StoreInt32(&kv.unreliable, 0)
180 | 	}
181 | }
182 | 
183 | func (kv *DisKV) isunreliable() bool {
184 | 	return atomic.LoadInt32(&kv.unreliable) != 0
185 | }
186 | 
187 | //
188 | // Start a shardkv server.
189 | // gid is the ID of the server's replica group.
190 | // shardmasters[] contains the ports of the
191 | //   servers that implement the shardmaster.
192 | // servers[] contains the ports of the servers
193 | //   in this replica group.
194 | // Me is the index of this server in servers[].
195 | // dir is the directory name under which this
196 | //   replica should store all its files.
197 | //   each replica is passed a different directory.
198 | // restart is false the very first time this server
199 | //   is started, and true to indicate a re-start
200 | //   after a crash or after a crash with disk loss.
201 | //
202 | func StartServer(gid int64, shardmasters []string,
203 | 	servers []string, me int, dir string, restart bool) *DisKV {
204 | 
205 | 	kv := new(DisKV)
206 | 	kv.me = me
207 | 	kv.gid = gid
208 | 	kv.sm = shardmaster.MakeClerk(shardmasters)
209 | 	kv.dir = dir
210 | 
211 | 	// Your initialization code here.
212 | 	// Don't call Join().
213 | 
214 | 	// log.SetOutput(ioutil.Discard)
215 | 
216 | 	gob.Register(Op{})
217 | 
218 | 	rpcs := rpc.NewServer()
219 | 	rpcs.Register(kv)
220 | 
221 | 	kv.px = paxos.Make(servers, me, rpcs)
222 | 
223 | 	// log.SetOutput(os.Stdout)
224 | 
225 | 
226 | 
227 | 	os.Remove(servers[me])
228 | 	l, e := net.Listen("unix", servers[me])
229 | 	if e != nil {
230 | 		log.Fatal("listen error: ", e)
231 | 	}
232 | 	kv.l = l
233 | 
234 | 	// please do not change any of the following code,
235 | 	// or do anything to subvert it.
236 | 
237 | 	go func() {
238 | 		for kv.isdead() == false {
239 | 			conn, err := kv.l.Accept()
240 | 			if err == nil && kv.isdead() == false {
241 | 				if kv.isunreliable() && (rand.Int63()%1000) < 100 {
242 | 					// discard the request.
243 | 					conn.Close()
244 | 				} else if kv.isunreliable() && (rand.Int63()%1000) < 200 {
245 | 					// process the request but force discard of reply.
246 | 					c1 := conn.(*net.UnixConn)
247 | 					f, _ := c1.File()
248 | 					err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR)
249 | 					if err != nil {
250 | 						fmt.Printf("shutdown: %v\n", err)
251 | 					}
252 | 					go rpcs.ServeConn(conn)
253 | 				} else {
254 | 					go rpcs.ServeConn(conn)
255 | 				}
256 | 			} else if err == nil {
257 | 				conn.Close()
258 | 			}
259 | 			if err != nil && kv.isdead() == false {
260 | 				fmt.Printf("DisKV(%v) accept: %v\n", me, err.Error())
261 | 				kv.kill()
262 | 			}
263 | 		}
264 | 	}()
265 | 
266 | 	go func() {
267 | 		for kv.isdead() == false {
268 | 			kv.tick()
269 | 			time.Sleep(250 * time.Millisecond)
270 | 		}
271 | 	}()
272 | 
273 | 	return kv
274 | }
275 | 


--------------------------------------------------------------------------------
/6.824/src/kvpaxos/client.go:
--------------------------------------------------------------------------------
 1 | package kvpaxos
 2 | 
 3 | import "net/rpc"
 4 | import "crypto/rand"
 5 | import "math/big"
 6 | 
 7 | import "fmt"
 8 | 
 9 | type Clerk struct {
10 | 	servers []string
11 | 	// You will have to modify this struct.
12 | }
13 | 
14 | func nrand() int64 {
15 | 	max := big.NewInt(int64(1) << 62)
16 | 	bigx, _ := rand.Int(rand.Reader, max)
17 | 	x := bigx.Int64()
18 | 	return x
19 | }
20 | 
21 | func MakeClerk(servers []string) *Clerk {
22 | 	ck := new(Clerk)
23 | 	ck.servers = servers
24 | 	// You'll have to add code here.
25 | 	return ck
26 | }
27 | 
28 | //
29 | // call() sends an RPC to the rpcname handler on server srv
30 | // with arguments args, waits for the reply, and leaves the
31 | // reply in reply. the reply argument should be a pointer
32 | // to a reply structure.
33 | //
34 | // the return value is true if the server responded, and false
35 | // if call() was not able to contact the server. in particular,
36 | // the reply's contents are only valid if call() returned true.
37 | //
38 | // you should assume that call() will return an
39 | // error after a while if the server is dead.
40 | // don't provide your own time-out mechanism.
41 | //
42 | // please use call() to send all RPCs, in client.go and server.go.
43 | // please don't change this function.
44 | //
45 | func call(srv string, rpcname string,
46 | 	args interface{}, reply interface{}) bool {
47 | 	c, errx := rpc.Dial("unix", srv)
48 | 	if errx != nil {
49 | 		return false
50 | 	}
51 | 	defer c.Close()
52 | 
53 | 	err := c.Call(rpcname, args, reply)
54 | 	if err == nil {
55 | 		return true
56 | 	}
57 | 
58 | 	fmt.Println(err)
59 | 	return false
60 | }
61 | 
62 | //
63 | // fetch the current value for a key.
64 | // returns "" if the key does not exist.
65 | // keeps trying forever in the face of all other errors.
66 | //
67 | func (ck *Clerk) Get(key string) string {
68 | 	// You will have to modify this function.
69 | 	return ""
70 | }
71 | 
72 | //
73 | // shared by Put and Append.
74 | //
75 | func (ck *Clerk) PutAppend(key string, value string, op string) {
76 | 	// You will have to modify this function.
77 | }
78 | 
79 | func (ck *Clerk) Put(key string, value string) {
80 | 	ck.PutAppend(key, value, "Put")
81 | }
82 | func (ck *Clerk) Append(key string, value string) {
83 | 	ck.PutAppend(key, value, "Append")
84 | }
85 | 


--------------------------------------------------------------------------------
/6.824/src/kvpaxos/common.go:
--------------------------------------------------------------------------------
 1 | package kvpaxos
 2 | 
 3 | const (
 4 | 	OK       = "OK"
 5 | 	ErrNoKey = "ErrNoKey"
 6 | )
 7 | 
 8 | type Err string
 9 | 
10 | // Put or Append
11 | type PutAppendArgs struct {
12 | 	// You'll have to add definitions here.
13 | 	Key   string
14 | 	Value string
15 | 	Op    string // "Put" or "Append"
16 | 	// You'll have to add definitions here.
17 | 	// Field names must start with capital letters,
18 | 	// otherwise RPC will break.
19 | }
20 | 
21 | type PutAppendReply struct {
22 | 	Err Err
23 | }
24 | 
25 | type GetArgs struct {
26 | 	Key string
27 | 	// You'll have to add definitions here.
28 | }
29 | 
30 | type GetReply struct {
31 | 	Err   Err
32 | 	Value string
33 | }
34 | 


--------------------------------------------------------------------------------
/6.824/src/kvpaxos/server.go:
--------------------------------------------------------------------------------
  1 | package kvpaxos
  2 | 
  3 | import "net"
  4 | import "fmt"
  5 | import "net/rpc"
  6 | import "log"
  7 | import "paxos"
  8 | import "sync"
  9 | import "sync/atomic"
 10 | import "os"
 11 | import "syscall"
 12 | import "encoding/gob"
 13 | import "math/rand"
 14 | 
 15 | 
 16 | const Debug = 0
 17 | 
 18 | func DPrintf(format string, a ...interface{}) (n int, err error) {
 19 | 	if Debug > 0 {
 20 | 		log.Printf(format, a...)
 21 | 	}
 22 | 	return
 23 | }
 24 | 
 25 | 
 26 | type Op struct {
 27 | 	// Your definitions here.
 28 | 	// Field names must start with capital letters,
 29 | 	// otherwise RPC will break.
 30 | }
 31 | 
 32 | type KVPaxos struct {
 33 | 	mu         sync.Mutex
 34 | 	l          net.Listener
 35 | 	me         int
 36 | 	dead       int32 // for testing
 37 | 	unreliable int32 // for testing
 38 | 	px         *paxos.Paxos
 39 | 
 40 | 	// Your definitions here.
 41 | }
 42 | 
 43 | 
 44 | func (kv *KVPaxos) Get(args *GetArgs, reply *GetReply) error {
 45 | 	// Your code here.
 46 | 	return nil
 47 | }
 48 | 
 49 | func (kv *KVPaxos) PutAppend(args *PutAppendArgs, reply *PutAppendReply) error {
 50 | 	// Your code here.
 51 | 
 52 | 	return nil
 53 | }
 54 | 
 55 | // tell the server to shut itself down.
 56 | // please do not change these two functions.
 57 | func (kv *KVPaxos) kill() {
 58 | 	DPrintf("Kill(%d): die\n", kv.me)
 59 | 	atomic.StoreInt32(&kv.dead, 1)
 60 | 	kv.l.Close()
 61 | 	kv.px.Kill()
 62 | }
 63 | 
 64 | // call this to find out if the server is dead.
 65 | func (kv *KVPaxos) isdead() bool {
 66 | 	return atomic.LoadInt32(&kv.dead) != 0
 67 | }
 68 | 
 69 | // please do not change these two functions.
 70 | func (kv *KVPaxos) setunreliable(what bool) {
 71 | 	if what {
 72 | 		atomic.StoreInt32(&kv.unreliable, 1)
 73 | 	} else {
 74 | 		atomic.StoreInt32(&kv.unreliable, 0)
 75 | 	}
 76 | }
 77 | 
 78 | func (kv *KVPaxos) isunreliable() bool {
 79 | 	return atomic.LoadInt32(&kv.unreliable) != 0
 80 | }
 81 | 
 82 | //
 83 | // servers[] contains the ports of the set of
 84 | // servers that will cooperate via Paxos to
 85 | // form the fault-tolerant key/value service.
 86 | // me is the index of the current server in servers[].
 87 | //
 88 | func StartServer(servers []string, me int) *KVPaxos {
 89 | 	// call gob.Register on structures you want
 90 | 	// Go's RPC library to marshall/unmarshall.
 91 | 	gob.Register(Op{})
 92 | 
 93 | 	kv := new(KVPaxos)
 94 | 	kv.me = me
 95 | 
 96 | 	// Your initialization code here.
 97 | 
 98 | 	rpcs := rpc.NewServer()
 99 | 	rpcs.Register(kv)
100 | 
101 | 	kv.px = paxos.Make(servers, me, rpcs)
102 | 
103 | 	os.Remove(servers[me])
104 | 	l, e := net.Listen("unix", servers[me])
105 | 	if e != nil {
106 | 		log.Fatal("listen error: ", e)
107 | 	}
108 | 	kv.l = l
109 | 
110 | 
111 | 	// please do not change any of the following code,
112 | 	// or do anything to subvert it.
113 | 
114 | 	go func() {
115 | 		for kv.isdead() == false {
116 | 			conn, err := kv.l.Accept()
117 | 			if err == nil && kv.isdead() == false {
118 | 				if kv.isunreliable() && (rand.Int63()%1000) < 100 {
119 | 					// discard the request.
120 | 					conn.Close()
121 | 				} else if kv.isunreliable() && (rand.Int63()%1000) < 200 {
122 | 					// process the request but force discard of reply.
123 | 					c1 := conn.(*net.UnixConn)
124 | 					f, _ := c1.File()
125 | 					err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR)
126 | 					if err != nil {
127 | 						fmt.Printf("shutdown: %v\n", err)
128 | 					}
129 | 					go rpcs.ServeConn(conn)
130 | 				} else {
131 | 					go rpcs.ServeConn(conn)
132 | 				}
133 | 			} else if err == nil {
134 | 				conn.Close()
135 | 			}
136 | 			if err != nil && kv.isdead() == false {
137 | 				fmt.Printf("KVPaxos(%v) accept: %v\n", me, err.Error())
138 | 				kv.kill()
139 | 			}
140 | 		}
141 | 	}()
142 | 
143 | 	return kv
144 | }
145 | 


--------------------------------------------------------------------------------
/6.824/src/kvraft/client.go:
--------------------------------------------------------------------------------
 1 | package raftkv
 2 | 
 3 | import "labrpc"
 4 | import "crypto/rand"
 5 | import "math/big"
 6 | 
 7 | 
 8 | type Clerk struct {
 9 | 	servers []*labrpc.ClientEnd
10 | 	// You will have to modify this struct.
11 | }
12 | 
13 | func nrand() int64 {
14 | 	max := big.NewInt(int64(1) << 62)
15 | 	bigx, _ := rand.Int(rand.Reader, max)
16 | 	x := bigx.Int64()
17 | 	return x
18 | }
19 | 
20 | func MakeClerk(servers []*labrpc.ClientEnd) *Clerk {
21 | 	ck := new(Clerk)
22 | 	ck.servers = servers
23 | 	// You'll have to add code here.
24 | 	return ck
25 | }
26 | 
27 | //
28 | // fetch the current value for a key.
29 | // returns "" if the key does not exist.
30 | // keeps trying forever in the face of all other errors.
31 | //
32 | // you can send an RPC with code like this:
33 | // ok := ck.servers[i].Call("RaftKV.Get", &args, &reply)
34 | //
35 | // the types of args and reply (including whether they are pointers)
36 | // must match the declared types of the RPC handler function's
37 | // arguments. and reply must be passed as a pointer.
38 | //
39 | func (ck *Clerk) Get(key string) string {
40 | 
41 | 	// You will have to modify this function.
42 | 	return ""
43 | }
44 | 
45 | //
46 | // shared by Put and Append.
47 | //
48 | // you can send an RPC with code like this:
49 | // ok := ck.servers[i].Call("RaftKV.PutAppend", &args, &reply)
50 | //
51 | // the types of args and reply (including whether they are pointers)
52 | // must match the declared types of the RPC handler function's
53 | // arguments. and reply must be passed as a pointer.
54 | //
55 | func (ck *Clerk) PutAppend(key string, value string, op string) {
56 | 	// You will have to modify this function.
57 | }
58 | 
59 | func (ck *Clerk) Put(key string, value string) {
60 | 	ck.PutAppend(key, value, "Put")
61 | }
62 | func (ck *Clerk) Append(key string, value string) {
63 | 	ck.PutAppend(key, value, "Append")
64 | }
65 | 


--------------------------------------------------------------------------------
/6.824/src/kvraft/common.go:
--------------------------------------------------------------------------------
 1 | package raftkv
 2 | 
 3 | const (
 4 | 	OK       = "OK"
 5 | 	ErrNoKey = "ErrNoKey"
 6 | )
 7 | 
 8 | type Err string
 9 | 
10 | // Put or Append
11 | type PutAppendArgs struct {
12 | 	// You'll have to add definitions here.
13 | 	Key   string
14 | 	Value string
15 | 	Op    string // "Put" or "Append"
16 | 	// You'll have to add definitions here.
17 | 	// Field names must start with capital letters,
18 | 	// otherwise RPC will break.
19 | }
20 | 
21 | type PutAppendReply struct {
22 | 	WrongLeader bool
23 | 	Err         Err
24 | }
25 | 
26 | type GetArgs struct {
27 | 	Key string
28 | 	// You'll have to add definitions here.
29 | }
30 | 
31 | type GetReply struct {
32 | 	WrongLeader bool
33 | 	Err         Err
34 | 	Value       string
35 | }
36 | 


--------------------------------------------------------------------------------
/6.824/src/kvraft/server.go:
--------------------------------------------------------------------------------
 1 | package raftkv
 2 | 
 3 | import (
 4 | 	"encoding/gob"
 5 | 	"labrpc"
 6 | 	"log"
 7 | 	"raft"
 8 | 	"sync"
 9 | )
10 | 
11 | const Debug = 0
12 | 
13 | func DPrintf(format string, a ...interface{}) (n int, err error) {
14 | 	if Debug > 0 {
15 | 		log.Printf(format, a...)
16 | 	}
17 | 	return
18 | }
19 | 
20 | 
21 | type Op struct {
22 | 	// Your definitions here.
23 | 	// Field names must start with capital letters,
24 | 	// otherwise RPC will break.
25 | }
26 | 
27 | type RaftKV struct {
28 | 	mu      sync.Mutex
29 | 	me      int
30 | 	rf      *raft.Raft
31 | 	applyCh chan raft.ApplyMsg
32 | 
33 | 	maxraftstate int // snapshot if log grows this big
34 | 
35 | 	// Your definitions here.
36 | }
37 | 
38 | 
39 | func (kv *RaftKV) Get(args *GetArgs, reply *GetReply) {
40 | 	// Your code here.
41 | }
42 | 
43 | func (kv *RaftKV) PutAppend(args *PutAppendArgs, reply *PutAppendReply) {
44 | 	// Your code here.
45 | }
46 | 
47 | //
48 | // the tester calls Kill() when a RaftKV instance won't
49 | // be needed again. you are not required to do anything
50 | // in Kill(), but it might be convenient to (for example)
51 | // turn off debug output from this instance.
52 | //
53 | func (kv *RaftKV) Kill() {
54 | 	kv.rf.Kill()
55 | 	// Your code here, if desired.
56 | }
57 | 
58 | //
59 | // servers[] contains the ports of the set of
60 | // servers that will cooperate via Raft to
61 | // form the fault-tolerant key/value service.
62 | // me is the index of the current server in servers[].
63 | // the k/v server should store snapshots with persister.SaveSnapshot(),
64 | // and Raft should save its state (including log) with persister.SaveRaftState().
65 | // the k/v server should snapshot when Raft's saved state exceeds maxraftstate bytes,
66 | // in order to allow Raft to garbage-collect its log. if maxraftstate is -1,
67 | // you don't need to snapshot.
68 | // StartKVServer() must return quickly, so it should start goroutines
69 | // for any long-running work.
70 | //
71 | func StartKVServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int) *RaftKV {
72 | 	// call gob.Register on structures you want
73 | 	// Go's RPC library to marshall/unmarshall.
74 | 	gob.Register(Op{})
75 | 
76 | 	kv := new(RaftKV)
77 | 	kv.me = me
78 | 	kv.maxraftstate = maxraftstate
79 | 
80 | 	// Your initialization code here.
81 | 
82 | 	kv.applyCh = make(chan raft.ApplyMsg)
83 | 	kv.rf = raft.Make(servers, me, persister, kv.applyCh)
84 | 
85 | 
86 | 	return kv
87 | }
88 | 


--------------------------------------------------------------------------------
/6.824/src/lockservice/client.go:
--------------------------------------------------------------------------------
 1 | package lockservice
 2 | 
 3 | import "net/rpc"
 4 | import "fmt"
 5 | 
 6 | 
 7 | //
 8 | // the lockservice Clerk lives in the client
 9 | // and maintains a little state.
10 | //
11 | type Clerk struct {
12 | 	servers [2]string // primary port, backup port
13 | 	// Your definitions here.
14 | }
15 | 
16 | 
17 | func MakeClerk(primary string, backup string) *Clerk {
18 | 	ck := new(Clerk)
19 | 	ck.servers[0] = primary
20 | 	ck.servers[1] = backup
21 | 	// Your initialization code here.
22 | 	return ck
23 | }
24 | 
25 | //
26 | // call() sends an RPC to the rpcname handler on server srv
27 | // with arguments args, waits for the reply, and leaves the
28 | // reply in reply. the reply argument should be the address
29 | // of a reply structure.
30 | //
31 | // call() returns true if the server responded, and false
32 | // if call() was not able to contact the server. in particular,
33 | // reply's contents are valid if and only if call() returned true.
34 | //
35 | // you should assume that call() will return an
36 | // error after a while if the server is dead.
37 | // don't provide your own time-out mechanism.
38 | //
39 | // please use call() to send all RPCs, in client.go and server.go.
40 | // please don't change this function.
41 | //
42 | func call(srv string, rpcname string,
43 | 	args interface{}, reply interface{}) bool {
44 | 	c, errx := rpc.Dial("unix", srv)
45 | 	if errx != nil {
46 | 		return false
47 | 	}
48 | 	defer c.Close()
49 | 
50 | 	err := c.Call(rpcname, args, reply)
51 | 	if err == nil {
52 | 		return true
53 | 	}
54 | 
55 | 	fmt.Println(err)
56 | 	return false
57 | }
58 | 
59 | //
60 | // ask the lock service for a lock.
61 | // returns true if the lock service
62 | // granted the lock, false otherwise.
63 | //
64 | // you will have to modify this function.
65 | //
66 | func (ck *Clerk) Lock(lockname string) bool {
67 | 	// prepare the arguments.
68 | 	args := &LockArgs{}
69 | 	args.Lockname = lockname
70 | 	var reply LockReply
71 | 
72 | 	// send an RPC request, wait for the reply.
73 | 	ok := call(ck.servers[0], "LockServer.Lock", args, &reply)
74 | 	if ok == false {
75 | 		return false
76 | 	}
77 | 
78 | 	return reply.OK
79 | }
80 | 
81 | 
82 | //
83 | // ask the lock service to unlock a lock.
84 | // returns true if the lock was previously held,
85 | // false otherwise.
86 | //
87 | 
88 | func (ck *Clerk) Unlock(lockname string) bool {
89 | 
90 | 	// Your code here.
91 | 
92 | 	return false
93 | }
94 | 


--------------------------------------------------------------------------------
/6.824/src/lockservice/common.go:
--------------------------------------------------------------------------------
 1 | package lockservice
 2 | 
 3 | //
 4 | // RPC definitions for a simple lock service.
 5 | //
 6 | // You will need to modify this file.
 7 | //
 8 | 
 9 | //
10 | // Lock(lockname) returns OK=true if the lock is not held.
11 | // If it is held, it returns OK=false immediately.
12 | //
13 | type LockArgs struct {
14 | 	// Go's net/rpc requires that these field
15 | 	// names start with upper case letters!
16 | 	Lockname string // lock name
17 | }
18 | 
19 | type LockReply struct {
20 | 	OK bool
21 | }
22 | 
23 | //
24 | // Unlock(lockname) returns OK=true if the lock was held.
25 | // It returns OK=false if the lock was not held.
26 | //
27 | type UnlockArgs struct {
28 | 	Lockname string
29 | }
30 | 
31 | type UnlockReply struct {
32 | 	OK bool
33 | }
34 | 


--------------------------------------------------------------------------------
/6.824/src/lockservice/server.go:
--------------------------------------------------------------------------------
  1 | package lockservice
  2 | 
  3 | import "net"
  4 | import "net/rpc"
  5 | import "log"
  6 | import "sync"
  7 | import "fmt"
  8 | import "os"
  9 | import "io"
 10 | import "time"
 11 | 
 12 | type LockServer struct {
 13 | 	mu    sync.Mutex
 14 | 	l     net.Listener
 15 | 	dead  bool // for test_test.go
 16 | 	dying bool // for test_test.go
 17 | 
 18 | 	am_primary bool   // am I the primary?
 19 | 	backup     string // backup's port
 20 | 
 21 | 	// for each lock name, is it locked?
 22 | 	locks map[string]bool
 23 | }
 24 | 
 25 | 
 26 | //
 27 | // server Lock RPC handler.
 28 | //
 29 | // you will have to modify this function
 30 | //
 31 | func (ls *LockServer) Lock(args *LockArgs, reply *LockReply) error {
 32 | 	ls.mu.Lock()
 33 | 	defer ls.mu.Unlock()
 34 | 
 35 | 
 36 | 	locked, _ := ls.locks[args.Lockname]
 37 | 
 38 | 	if locked {
 39 | 		reply.OK = false
 40 | 	} else {
 41 | 		reply.OK = true
 42 | 		ls.locks[args.Lockname] = true
 43 | 	}
 44 | 
 45 | 	return nil
 46 | }
 47 | 
 48 | //
 49 | // server Unlock RPC handler.
 50 | //
 51 | func (ls *LockServer) Unlock(args *UnlockArgs, reply *UnlockReply) error {
 52 | 
 53 | 	// Your code here.
 54 | 
 55 | 	return nil
 56 | }
 57 | 
 58 | //
 59 | // tell the server to shut itself down.
 60 | // for testing.
 61 | // please don't change this.
 62 | //
 63 | func (ls *LockServer) kill() {
 64 | 	ls.dead = true
 65 | 	ls.l.Close()
 66 | }
 67 | 
 68 | //
 69 | // hack to allow test_test.go to have primary process
 70 | // an RPC but not send a reply. can't use the shutdown()
 71 | // trick b/c that causes client to immediately get an
 72 | // error and send to backup before primary does.
 73 | // please don't change anything to do with DeafConn.
 74 | //
 75 | type DeafConn struct {
 76 | 	c io.ReadWriteCloser
 77 | }
 78 | 
 79 | func (dc DeafConn) Write(p []byte) (n int, err error) {
 80 | 	return len(p), nil
 81 | }
 82 | func (dc DeafConn) Close() error {
 83 | 	return dc.c.Close()
 84 | }
 85 | func (dc DeafConn) Read(p []byte) (n int, err error) {
 86 | 	return dc.c.Read(p)
 87 | }
 88 | 
 89 | func StartServer(primary string, backup string, am_primary bool) *LockServer {
 90 | 	ls := new(LockServer)
 91 | 	ls.backup = backup
 92 | 	ls.am_primary = am_primary
 93 | 	ls.locks = map[string]bool{}
 94 | 
 95 | 	// Your initialization code here.
 96 | 
 97 | 
 98 | 	me := ""
 99 | 	if am_primary {
100 | 		me = primary
101 | 	} else {
102 | 		me = backup
103 | 	}
104 | 
105 | 	// tell net/rpc about our RPC server and handlers.
106 | 	rpcs := rpc.NewServer()
107 | 	rpcs.Register(ls)
108 | 
109 | 	// prepare to receive connections from clients.
110 | 	// change "unix" to "tcp" to use over a network.
111 | 	os.Remove(me) // only needed for "unix"
112 | 	l, e := net.Listen("unix", me)
113 | 	if e != nil {
114 | 		log.Fatal("listen error: ", e)
115 | 	}
116 | 	ls.l = l
117 | 
118 | 	// please don't change any of the following code,
119 | 	// or do anything to subvert it.
120 | 
121 | 	// create a thread to accept RPC connections from clients.
122 | 	go func() {
123 | 		for ls.dead == false {
124 | 			conn, err := ls.l.Accept()
125 | 			if err == nil && ls.dead == false {
126 | 				if ls.dying {
127 | 					// process the request but force discard of reply.
128 | 
129 | 					// without this the connection is never closed,
130 | 					// b/c ServeConn() is waiting for more requests.
131 | 					// test_test.go depends on this two seconds.
132 | 					go func() {
133 | 						time.Sleep(2 * time.Second)
134 | 						conn.Close()
135 | 					}()
136 | 					ls.l.Close()
137 | 
138 | 					// this object has the type ServeConn expects,
139 | 					// but discards writes (i.e. discards the RPC reply).
140 | 					deaf_conn := DeafConn{c: conn}
141 | 
142 | 					rpcs.ServeConn(deaf_conn)
143 | 
144 | 					ls.dead = true
145 | 				} else {
146 | 					go rpcs.ServeConn(conn)
147 | 				}
148 | 			} else if err == nil {
149 | 				conn.Close()
150 | 			}
151 | 			if err != nil && ls.dead == false {
152 | 				fmt.Printf("LockServer(%v) accept: %v\n", me, err.Error())
153 | 				ls.kill()
154 | 			}
155 | 		}
156 | 	}()
157 | 
158 | 	return ls
159 | }
160 | 


--------------------------------------------------------------------------------
/6.824/src/main/diskvd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/6.824/src/main/diskvd


--------------------------------------------------------------------------------
/6.824/src/main/diskvd.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // start a diskvd server. it's a member of some replica
 5 | // group, which has other members, and it needs to know
 6 | // how to talk to the members of the shardmaster service.
 7 | // used by ../diskv/test_test.go
 8 | //
 9 | // arguments:
10 | //   -g groupid
11 | //   -m masterport1 -m masterport2 ...
12 | //   -s replicaport1 -s replicaport2 ...
13 | //   -i my-index-in-server-port-list
14 | //   -u unreliable
15 | //   -d directory
16 | //   -r restart
17 | 
18 | import "time"
19 | import "diskv"
20 | import "os"
21 | import "fmt"
22 | import "strconv"
23 | import "runtime"
24 | 
25 | func usage() {
26 | 	fmt.Printf("Usage: diskvd -g gid -m master... -s server... -i my-index -d dir\n")
27 | 	os.Exit(1)
28 | }
29 | 
30 | func main() {
31 | 	var gid int64 = -1     // my replica group ID
32 | 	masters := []string{}  // ports of shardmasters
33 | 	replicas := []string{} // ports of servers in my replica group
34 | 	me := -1               // my index in replicas[]
35 | 	unreliable := false
36 | 	dir := "" // store persistent data here
37 | 	restart := false
38 | 
39 | 	for i := 1; i+1 < len(os.Args); i += 2 {
40 | 		a0 := os.Args[i]
41 | 		a1 := os.Args[i+1]
42 | 		if a0 == "-g" {
43 | 			gid, _ = strconv.ParseInt(a1, 10, 64)
44 | 		} else if a0 == "-m" {
45 | 			masters = append(masters, a1)
46 | 		} else if a0 == "-s" {
47 | 			replicas = append(replicas, a1)
48 | 		} else if a0 == "-i" {
49 | 			me, _ = strconv.Atoi(a1)
50 | 		} else if a0 == "-u" {
51 | 			unreliable, _ = strconv.ParseBool(a1)
52 | 		} else if a0 == "-d" {
53 | 			dir = a1
54 | 		} else if a0 == "-r" {
55 | 			restart, _ = strconv.ParseBool(a1)
56 | 		} else {
57 | 			usage()
58 | 		}
59 | 	}
60 | 
61 | 	if gid < 0 || me < 0 || len(masters) < 1 || me >= len(replicas) || dir == "" {
62 | 		usage()
63 | 	}
64 | 
65 | 	runtime.GOMAXPROCS(4)
66 | 
67 | 	srv := diskv.StartServer(gid, masters, replicas, me, dir, restart)
68 | 	srv.Setunreliable(unreliable)
69 | 
70 | 	// for safety, force quit after 10 minutes.
71 | 	time.Sleep(10 * 60 * time.Second)
72 | 	mep, _ := os.FindProcess(os.Getpid())
73 | 	mep.Kill()
74 | }
75 | 


--------------------------------------------------------------------------------
/6.824/src/main/ii.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import "os"
 4 | import "fmt"
 5 | import "mapreduce"
 6 | 
 7 | // The mapping function is called once for each piece of the input.
 8 | // In this framework, the key is the name of the file that is being processed,
 9 | // and the value is the file's contents. The return value should be a slice of
10 | // key/value pairs, each represented by a mapreduce.KeyValue.
11 | func mapF(document string, value string) (res []mapreduce.KeyValue) {
12 | 	// TODO: you should complete this to do the inverted index challenge
13 | }
14 | 
15 | // The reduce function is called once for each key generated by Map, with a
16 | // list of that key's string value (merged across all inputs). The return value
17 | // should be a single output value for that key.
18 | func reduceF(key string, values []string) string {
19 | 	// TODO: you should complete this to do the inverted index challenge
20 | }
21 | 
22 | // Can be run in 3 ways:
23 | // 1) Sequential (e.g., go run wc.go master sequential x1.txt .. xN.txt)
24 | // 2) Master (e.g., go run wc.go master localhost:7777 x1.txt .. xN.txt)
25 | // 3) Worker (e.g., go run wc.go worker localhost:7777 localhost:7778 &)
26 | func main() {
27 | 	if len(os.Args) < 4 {
28 | 		fmt.Printf("%s: see usage comments in file\n", os.Args[0])
29 | 	} else if os.Args[1] == "master" {
30 | 		var mr *mapreduce.Master
31 | 		if os.Args[2] == "sequential" {
32 | 			mr = mapreduce.Sequential("iiseq", os.Args[3:], 3, mapF, reduceF)
33 | 		} else {
34 | 			mr = mapreduce.Distributed("iiseq", os.Args[3:], 3, os.Args[2])
35 | 		}
36 | 		mr.Wait()
37 | 	} else {
38 | 		mapreduce.RunWorker(os.Args[2], os.Args[3], mapF, reduceF, 100)
39 | 	}
40 | }
41 | 


--------------------------------------------------------------------------------
/6.824/src/main/lockc.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // see comments in lockd.go
 5 | //
 6 | 
 7 | import "lockservice"
 8 | import "os"
 9 | import "fmt"
10 | 
11 | func usage() {
12 | 	fmt.Printf("Usage: lockc -l|-u primaryport backupport lockname\n")
13 | 	os.Exit(1)
14 | }
15 | 
16 | func main() {
17 | 	if len(os.Args) == 5 {
18 | 		ck := lockservice.MakeClerk(os.Args[2], os.Args[3])
19 | 		var ok bool
20 | 		if os.Args[1] == "-l" {
21 | 			ok = ck.Lock(os.Args[4])
22 | 		} else if os.Args[1] == "-u" {
23 | 			ok = ck.Unlock(os.Args[4])
24 | 		} else {
25 | 			usage()
26 | 		}
27 | 		fmt.Printf("reply: %v\n", ok)
28 | 	} else {
29 | 		usage()
30 | 	}
31 | }
32 | 


--------------------------------------------------------------------------------
/6.824/src/main/lockd.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | // export GOPATH=~/6.824
 4 | // go build lockd.go
 5 | // go build lockc.go
 6 | // ./lockd -p a b &
 7 | // ./lockd -b a b &
 8 | // ./lockc -l a b lx
 9 | // ./lockc -u a b lx
10 | //
11 | // on Athena, use /tmp/myname-a and /tmp/myname-b
12 | // instead of a and b.
13 | 
14 | import "time"
15 | import "lockservice"
16 | import "os"
17 | import "fmt"
18 | 
19 | func main() {
20 | 	if len(os.Args) == 4 && os.Args[1] == "-p" {
21 | 		lockservice.StartServer(os.Args[2], os.Args[3], true)
22 | 	} else if len(os.Args) == 4 && os.Args[1] == "-b" {
23 | 		lockservice.StartServer(os.Args[2], os.Args[3], false)
24 | 	} else {
25 | 		fmt.Printf("Usage: lockd -p|-b primaryport backupport\n")
26 | 		os.Exit(1)
27 | 	}
28 | 	for {
29 | 		time.Sleep(100 * time.Second)
30 | 	}
31 | }
32 | 


--------------------------------------------------------------------------------
/6.824/src/main/mr-challenge.txt:
--------------------------------------------------------------------------------
 1 | women: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-metamorphosis.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
 2 | won: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-metamorphosis.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
 3 | wonderful: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
 4 | words: 15 pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-metamorphosis.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
 5 | worked: 15 pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-metamorphosis.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
 6 | worse: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
 7 | wounded: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
 8 | yes: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-metamorphosis.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
 9 | younger: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
10 | yours: 15 pg-being_ernest.txt,pg-dorian_gray.txt,pg-dracula.txt,pg-emma.txt,pg-frankenstein.txt,pg-great_expectations.txt,pg-grimm.txt,pg-huckleberry_finn.txt,pg-les_miserables.txt,pg-moby_dick.txt,pg-sherlock_holmes.txt,pg-tale_of_two_cities.txt,pg-tom_sawyer.txt,pg-ulysses.txt,pg-war_and_peace.txt
11 | 


--------------------------------------------------------------------------------
/6.824/src/main/mr-testout.txt:
--------------------------------------------------------------------------------
 1 | he: 34077
 2 | was: 37044
 3 | that: 37495
 4 | I: 44502
 5 | in: 46092
 6 | a: 60558
 7 | to: 74357
 8 | of: 79727
 9 | and: 93990
10 | the: 154024
11 | 


--------------------------------------------------------------------------------
/6.824/src/main/pbc.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // pbservice client application
 5 | //
 6 | // export GOPATH=~/6.824
 7 | // go build viewd.go
 8 | // go build pbd.go
 9 | // go build pbc.go
10 | // ./viewd /tmp/rtm-v &
11 | // ./pbd /tmp/rtm-v /tmp/rtm-1 &
12 | // ./pbd /tmp/rtm-v /tmp/rtm-2 &
13 | // ./pbc /tmp/rtm-v key1 value1
14 | // ./pbc /tmp/rtm-v key1
15 | //
16 | // change "rtm" to your user name.
17 | // start the pbd programs in separate windows and kill
18 | // and restart them to exercise fault tolerance.
19 | //
20 | 
21 | import "pbservice"
22 | import "os"
23 | import "fmt"
24 | 
25 | func usage() {
26 | 	fmt.Printf("Usage: pbc viewport key\n")
27 | 	fmt.Printf("       pbc viewport key value\n")
28 | 	os.Exit(1)
29 | }
30 | 
31 | func main() {
32 | 	if len(os.Args) == 3 {
33 | 		// get
34 | 		ck := pbservice.MakeClerk(os.Args[1], "")
35 | 		v := ck.Get(os.Args[2])
36 | 		fmt.Printf("%v\n", v)
37 | 	} else if len(os.Args) == 4 {
38 | 		// put
39 | 		ck := pbservice.MakeClerk(os.Args[1], "")
40 | 		ck.Put(os.Args[2], os.Args[3])
41 | 	} else {
42 | 		usage()
43 | 	}
44 | }
45 | 


--------------------------------------------------------------------------------
/6.824/src/main/pbd.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // see directions in pbc.go
 5 | //
 6 | 
 7 | import "time"
 8 | import "pbservice"
 9 | import "os"
10 | import "fmt"
11 | 
12 | func main() {
13 | 	if len(os.Args) != 3 {
14 | 		fmt.Printf("Usage: pbd viewport myport\n")
15 | 		os.Exit(1)
16 | 	}
17 | 
18 | 	pbservice.StartServer(os.Args[1], os.Args[2])
19 | 
20 | 	for {
21 | 		time.Sleep(100 * time.Second)
22 | 	}
23 | }
24 | 


--------------------------------------------------------------------------------
/6.824/src/main/test-ii.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | go run ii.go master sequential pg-*.txt
 3 | sort -k1,1 mrtmp.iiseq | sort -snk2,2 | grep -v '16' | tail -10 | diff - mr-challenge.txt > diff.out
 4 | if [ -s diff.out ]
 5 | then
 6 | echo "Failed test. Output should be as in mr-challenge.txt. Your output differs as follows (from diff.out):" > /dev/stderr
 7 |   cat diff.out
 8 | else
 9 |   echo "Passed test" > /dev/stderr
10 | fi
11 | 
12 | 


--------------------------------------------------------------------------------
/6.824/src/main/test-mr.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | here=$(dirname "$0")
 3 | [[ "$here" = /* ]] || here="$PWD/$here"
 4 | export GOPATH="$here/../../"
 5 | echo ""
 6 | echo "==> Part I"
 7 | go test -run Sequential mapreduce/...
 8 | echo ""
 9 | echo "==> Part II"
10 | (cd "$here" && ./test-wc.sh > /dev/null)
11 | echo ""
12 | echo "==> Part III"
13 | go test -run TestBasic mapreduce/...
14 | echo ""
15 | echo "==> Part IV"
16 | go test -run Failure mapreduce/...
17 | echo ""
18 | echo "==> Part V (challenge)"
19 | (cd "$here" && ./test-ii.sh > /dev/null)
20 | 
21 | rm "$here"/mrtmp.* "$here"/diff.out
22 | 


--------------------------------------------------------------------------------
/6.824/src/main/test-wc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | go run wc.go master sequential pg-*.txt
 3 | sort -n -k2 mrtmp.wcseq | tail -10 | diff - mr-testout.txt > diff.out
 4 | if [ -s diff.out ]
 5 | then
 6 | echo "Failed test. Output should be as in mr-testout.txt. Your output differs as follows (from diff.out):" > /dev/stderr
 7 |   cat diff.out
 8 | else
 9 |   echo "Passed test" > /dev/stderr
10 | fi
11 | 
12 | 


--------------------------------------------------------------------------------
/6.824/src/main/viewd.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | //
 4 | // see directions in pbc.go
 5 | //
 6 | 
 7 | import "time"
 8 | import "viewservice"
 9 | import "os"
10 | import "fmt"
11 | 
12 | func main() {
13 | 	if len(os.Args) != 2 {
14 | 		fmt.Printf("Usage: viewd port\n")
15 | 		os.Exit(1)
16 | 	}
17 | 
18 | 	viewservice.StartServer(os.Args[1])
19 | 
20 | 	for {
21 | 		time.Sleep(100 * time.Second)
22 | 	}
23 | }
24 | 


--------------------------------------------------------------------------------
/6.824/src/main/wc.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"../mapreduce"
 6 | 	"os"
 7 | 	"strings"
 8 | 	"strconv"
 9 | 	"log"
10 | 	"unicode"
11 | )
12 | 
13 | // The mapping function is called once for each piece of the input.
14 | // In this framework, the key is the name of the file that is being processed,
15 | // and the value is the file's contents. The return value should be a slice of
16 | // key/value pairs, each represented by a mapreduce.KeyValue.
17 | func mapF(document string, value string) (res []mapreduce.KeyValue) {
18 | 	// 在wordcount的例子中mapF的功能应该是string中获取到单词（关注下strings.FieldsFunc打用法吧），
19 | 	// 返回的结构应该类似KeyValue{w, "1"}
20 | }
21 | 
22 | // The reduce function is called once for each key generated by Map, with a
23 | // list of that key's string value (merged across all inputs). The return value
24 | // should be a single output value for that key.
25 | func reduceF(key string, values []string) string {
26 | 	// TODO: you also have to write this function
27 | 	// reduceF对每个key调用，然后处理values,在这个例子中，相加全部的１就是单词出现打次数来
28 | }
29 | 
30 | // Can be run in 3 ways:
31 | // 1) Sequential (e.g., go run wc.go master sequential x1.txt .. xN.txt)
32 | // 2) Master (e.g., go run wc.go master localhost:7777 x1.txt .. xN.txt)
33 | // 3) Worker (e.g., go run wc.go worker localhost:7777 localhost:7778 &)
34 | func main() {
35 | 	if len(os.Args) < 4 {
36 | 		fmt.Printf("%s: see usage comments in file\n", os.Args[0])
37 | 	} else if os.Args[1] == "master" {
38 | 		var mr *mapreduce.Master
39 | 		if os.Args[2] == "sequential" {
40 | 			mr = mapreduce.Sequential("wcseq", os.Args[3:], 3, mapF, reduceF)
41 | 		} else {
42 | 			mr = mapreduce.Distributed("wcseq", os.Args[3:], 3, os.Args[2])
43 | 		}
44 | 		mr.Wait()
45 | 	} else {
46 | 		mapreduce.RunWorker(os.Args[2], os.Args[3], mapF, reduceF, 100)
47 | 	}
48 | }
49 | 


--------------------------------------------------------------------------------
/6.824/src/mapreduce/common.go:
--------------------------------------------------------------------------------
 1 | package mapreduce
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"strconv"
 6 | )
 7 | 
 8 | // Debugging enabled?
 9 | const debugEnabled = false
10 | 
11 | // DPrintf will only print if the debugEnabled const has been set to true
12 | func debug(format string, a ...interface{}) (n int, err error) {
13 | 	if debugEnabled {
14 | 		n, err = fmt.Printf(format, a...)
15 | 	}
16 | 	return
17 | }
18 | 
19 | // jobPhase indicates whether a task is scheduled as a map or reduce task.
20 | type jobPhase string
21 | 
22 | const (
23 | 	mapPhase    jobPhase = "Map"
24 | 	reducePhase          = "Reduce"
25 | )
26 | 
27 | // KeyValue is a type used to hold the key/value pairs passed to the map and
28 | // reduce functions.
29 | type KeyValue struct {
30 | 	Key   string
31 | 	Value string
32 | }
33 | 
34 | // reduceName constructs the name of the intermediate file which map task
35 | // <mapTask> produces for reduce task <reduceTask>.
36 | func reduceName(jobName string, mapTask int, reduceTask int) string {
37 | 	return "mrtmp." + jobName + "-" + strconv.Itoa(mapTask) + "-" + strconv.Itoa(reduceTask)
38 | }
39 | 
40 | // mergeName constructs the name of the output file of reduce task <reduceTask>
41 | func mergeName(jobName string, reduceTask int) string {
42 | 	return "mrtmp." + jobName + "-res-" + strconv.Itoa(reduceTask)
43 | }
44 | 


--------------------------------------------------------------------------------
/6.824/src/mapreduce/common_map.go:
--------------------------------------------------------------------------------
 1 | package mapreduce
 2 | 
 3 | import (
 4 | 	"hash/fnv"
 5 | 	"io/ioutil"
 6 | 	"os"
 7 | 	"encoding/json"
 8 | 	"fmt"
 9 | )
10 | 
11 | // doMap does the job of a map worker: it reads one of the input files
12 | // (inFile), calls the user-defined map function (mapF) for that file's
13 | // contents, and partitions the output into nReduce intermediate files.
14 | // doMap的工作内容如下：读取输入文件（inFile）, 调用用户自己定义的map函数mapF处理文件内容，
15 | // 分割输出到nReduce份中间文件。
16 | func doMap(
17 | 	jobName string, // the name of the MapReduce job
18 | 	mapTaskNumber int, // which map task this is
19 | 	inFile string,
20 | 	nReduce int, // the number of reduce task that will be run ("R" in the paper)
21 | 	mapF func(file string, contents string) []KeyValue,
22 | ) {
23 | 	// TODO:
24 | 	// You will need to write this function.
25 | 	// You can find the filename for this map task's input to reduce task number
26 | 	// r using reduceName(jobName, mapTaskNumber, r). The ihash function (given
27 | 	// below doMap) should be used to decide which file a given key belongs into.
28 | 	//
29 | 	// The intermediate output of a map task is stored in the file
30 | 	// system as multiple files whose name indicates which map task produced
31 | 	// them, as well as which reduce task they are for. Coming up with a
32 | 	// scheme for how to store the key/value pairs on disk can be tricky,
33 | 	// especially when taking into account that both keys and values could
34 | 	// contain newlines, quotes, and any other character you can think of.
35 | 	//
36 | 	// One format often used for serializing data to a byte stream that the
37 | 	// other end can correctly reconstruct is JSON. You are not required to
38 | 	// use JSON, but as the output of the reduce tasks *must* be JSON,
39 | 	// familiarizing yourself with it here may prove useful. You can write
40 | 	// out a data structure as a JSON string to a file using the commented
41 | 	// code below. The corresponding decoding functions can be found in
42 | 	// common_reduce.go.
43 | 	//
44 | 	//   enc := json.NewEncoder(file)
45 | 	//   for _, kv := ... {
46 | 	//     err := enc.Encode(&kv)
47 | 	//
48 | 	// Remember to close the file after you have written all the values!
49 | 
50 | 	// 你需要重写这个函数。你可以通过reduceName获取文件名,使用map任务的输入为reduce任务提供输出。
51 | 	// 下面给出的ihash函数应该被用于决定每个key属于的文件。
52 | 	//
53 | 	// map任务的中间输入以多文件的形式保存在文件系统上,它们的文件名说明是哪个map任务产生的,同时也说明哪个reduce任务会处理它们。
54 | 	// 想出如何存储键/值对在磁盘上的方案可能会非常棘手,特别地, 当我们考虑到key和value都包含新行(newlines),引用(quotes),或者其他
55 | 	// 你想到的字符。
56 | 	//
57 | 	// 有一种格式经常被用来序列化数据到字节流,然后可以通过字节流进行重建，这种格式是json。你没有被强制使用JSON,但是reduce任务的输出
58 | 	// 必须是JSON格式,熟悉JSON数据格式会对你有所帮助。你可以使用下面的代码将数据结构以JSON字符串的形式输出。对应的解码函数在common_reduce.go
59 | 	// 可以找到。
60 | 	//
61 | 	//   enc := json.NewEncoder(file)
62 | 	//   for _, kv := ... {
63 | 	//     err := enc.Encode(&kv)
64 | 	//
65 | 	//   记得关闭文件当你写完全部的数据之后。
66 | 
67 | 
68 | 	// 注：Map的大致流程如下(官方教材建议不上传代码，所以去除)
69 | 	// 　S1: 　打开输入文件，并且读取全部数据
70 | 	//  S2： 调用用户自定义的mapF函数,分检数据,在word count的案例中分割成单词
71 | 	//  S3： 将mapF返回的数据根据key分类,跟文件名对应(reduceName获取文件名)
72 | 	// 　S4: 　将分类好的数据分别写入不同文件
73 | 
74 | }
75 | 
76 | func ihash(s string) uint32 {
77 | 	h := fnv.New32a()
78 | 	h.Write([]byte(s))
79 | 	return h.Sum32()
80 | }
81 | 


--------------------------------------------------------------------------------
/6.824/src/mapreduce/common_reduce.go:
--------------------------------------------------------------------------------
 1 | package mapreduce
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | 	"encoding/json"
 7 | 	"io"
 8 | 	"log"
 9 | )
10 | 
11 | // doReduce does the job of a reduce worker: it reads the intermediate
12 | // key/value pairs (produced by the map phase) for this task, sorts the
13 | // intermediate key/value pairs by key, calls the user-defined reduce function
14 | // (reduceF) for each key, and writes the output to disk.
15 | func doReduce(
16 | 	jobName string, // the name of the whole MapReduce job
17 | 	reduceTaskNumber int, // which reduce task this is
18 | 	nMap int, // the number of map tasks that were run ("M" in the paper)
19 | 	reduceF func(key string, values []string) string,
20 | ) {
21 | 	// TODO:
22 | 	// You will need to write this function.
23 | 	// You can find the intermediate file for this reduce task from map task number
24 | 	// m using reduceName(jobName, m, reduceTaskNumber).
25 | 	// Remember that you've encoded the values in the intermediate files, so you
26 | 	// will need to decode them. If you chose to use JSON, you can read out
27 | 	// multiple decoded values by creating a decoder, and then repeatedly calling
28 | 	// .Decode() on it until Decode() returns an error.
29 | 	//
30 | 	// You should write the reduced output in as JSON encoded KeyValue
31 | 	// objects to a file named mergeName(jobName, reduceTaskNumber). We require
32 | 	// you to use JSON here because that is what the merger than combines the
33 | 	// output from all the reduce tasks expects. There is nothing "special" about
34 | 	// JSON -- it is just the marshalling format we chose to use. It will look
35 | 	// something like this:
36 | 	//
37 | 	// enc := json.NewEncoder(mergeFile)
38 | 	// for key in ... {
39 | 	// 	enc.Encode(KeyValue{key, reduceF(...)})
40 | 	// }
41 | 	// file.Close()
42 | 
43 | 	// 你需要完成这个函数。你可与获取到来自map任务生产的中间数据，通过reduceName获取到文件名。
44 | 	//  记住你应该编码了值到中间文件,所以你需要解码它们。如果你选择了使用JSON,你通过创建decoder读取到多个
45 | 	// 解码之后的值，直接调用Decode直到返回错误。
46 | 	//
47 | 	// 你应该将reduce输出以JSON编码的方式保存到文件，文件名通过mergeName获取。我们建议你在这里使用JSON,
48 | 
49 | 	// key是中间文件里面键值，value是字符串,这个map用于存储相同键值元素的合并
50 | 
51 | 	// Reduce的过程如下：
52 | 	//  S1: 获取到Map产生的文件并打开(reduceName获取文件名)
53 | 	// 　S2：获取中间文件的数据(对多个map产生的文件更加值合并)
54 | 	// 　S3：打开文件（mergeName获取文件名），将用于存储Reduce任务的结果
55 | 	// 　S4：合并结果之后(S2)，进行reduceF操作, work count的操作将结果累加，也就是word出现在这个文件中出现的次数
56 | }
57 | 


--------------------------------------------------------------------------------
/6.824/src/mapreduce/common_rpc.go:
--------------------------------------------------------------------------------
 1 | package mapreduce
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"net/rpc"
 6 | )
 7 | 
 8 | // What follows are RPC types and methods.
 9 | // Field names must start with capital letters, otherwise RPC will break.
10 | 
11 | // DoTaskArgs holds the arguments that are passed to a worker when a job is scheduled on it.
12 | // DoTaskArgs保存保护参数，用于为worker分配工作。
13 | type DoTaskArgs struct {
14 | 	JobName    string   // 工作的名字
15 | 	File       string   // 待处理的文件名
16 | 	Phase      jobPhase // 工作类型是map还是reduce
17 | 	TaskNumber int      // 任务的索引？
18 | 
19 | 	// NumOtherPhase is the total number of tasks in other phase; mappers
20 | 	// need this to compute the number of output bins, and reducers needs
21 | 	// this to know how many input files to collect.
22 | 	// 全部任务数量，mapper需要这个数字去计算输出的数量, 同时reducer需要知道有多少输入文件需要收集。
23 | 	NumOtherPhase int
24 | }
25 | 
26 | // ShutdownReply is the response to a WorkerShutdown.
27 | // It holds the number of tasks this worker has processed since it was started.
28 | // ShutdownReply是WorkerShutdown的回应,Ntasks表示worker从启动开始已经处理的任务。
29 | type ShutdownReply struct {
30 | 	Ntasks int
31 | }
32 | 
33 | // RegisterArgs is the argument passed when a worker registers with the master.
34 | // worker注册到master的时候，传递的参数。
35 | type RegisterArgs struct {
36 | 	Worker string
37 | }
38 | 
39 | // call() sends an RPC to the rpcname handler on server srv
40 | // with arguments args, waits for the reply, and leaves the
41 | // reply in reply. the reply argument should be the address
42 | // of a reply structure.
43 | //
44 | // call() returns true if the server responded, and false
45 | // if call() was not able to contact the server. in particular,
46 | // reply's contents are valid if and only if call() returned true.
47 | //
48 | // you should assume that call() will time out and return an
49 | // error after a while if it doesn't get a reply from the server.
50 | //
51 | // please use call() to send all RPCs, in master.go, mapreduce.go,
52 | // and worker.go.  please don't change this function.
53 | //
54 | 
55 | // 本地的rpc调用，使用是unix套接字
56 | func call(srv string, rpcname string,
57 | 	args interface{}, reply interface{}) bool {
58 | 
59 | 	c, errx := rpc.Dial("unix", srv)
60 | 	if errx != nil {
61 | 		return false
62 | 	}
63 | 	defer c.Close()
64 | 
65 | 	err := c.Call(rpcname, args, reply)
66 | 	if err == nil {
67 | 		return true
68 | 	}
69 | 
70 | 	fmt.Println(err)
71 | 	return false
72 | }
73 | 


--------------------------------------------------------------------------------
/6.824/src/mapreduce/master.go:
--------------------------------------------------------------------------------
  1 | package mapreduce
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"net"
  6 | 	"sync"
  7 | )
  8 | 
  9 | // Master holds all the state that the master needs to keep track of. Of
 10 | // particular importance is registerChannel, the channel that notifies the
 11 | // master of workers that have gone idle and are in need of new work.
 12 | type Master struct {
 13 | 	sync.Mutex
 14 | 
 15 | 	address         string
 16 | 	registerChannel chan string	// 通知master那些worker处于空闲状态。
 17 | 	doneChannel     chan bool
 18 | 	workers         []string        // protected by the mutex, master下面含有的worker的名字
 19 | 
 20 | 	// Per-task information
 21 | 	jobName string   // Name of currently executing job
 22 | 	files   []string // Input files
 23 | 	nReduce int      // Number of reduce partitions
 24 | 
 25 | 	shutdown chan struct{}
 26 | 	l        net.Listener
 27 | 	stats    []int
 28 | }
 29 | 
 30 | // Register is an RPC method that is called by workers after they have started
 31 | // up to report that they are ready to receive tasks.
 32 | // 一个供worker调用的rpc方法, 告诉master它们已经准备好接受任务。
 33 | func (mr *Master) Register(args *RegisterArgs, _ *struct{}) error {
 34 | 	mr.Lock()
 35 | 	defer mr.Unlock()
 36 | 	debug("Register: worker %s\n", args.Worker)
 37 | 	mr.workers = append(mr.workers, args.Worker)
 38 | 	go func() {
 39 | 		mr.registerChannel <- args.Worker // 通知master那些worker处于空闲状态。
 40 | 	}()
 41 | 	return nil
 42 | }
 43 | 
 44 | // newMaster initializes a new Map/Reduce Master
 45 | // 创建初始化master
 46 | func newMaster(master string) (mr *Master) {
 47 | 	mr = new(Master)
 48 | 	mr.address = master
 49 | 	mr.shutdown = make(chan struct{})
 50 | 	mr.registerChannel = make(chan string)
 51 | 	mr.doneChannel = make(chan bool)
 52 | 	return
 53 | }
 54 | 
 55 | // Sequential runs map and reduce tasks sequentially, waiting for each task to
 56 | // complete before scheduling the next.
 57 | // Sequential方法顺序的执行map和reduce任务,在分配下一个任务前需要前面的任务完成。
 58 | func Sequential(jobName string, files []string, nreduce int,
 59 | 	mapF func(string, string) []KeyValue,
 60 | 	reduceF func(string, []string) string,
 61 | ) (mr *Master) {
 62 | 	mr = newMaster("master")
 63 | 	// 两个匿名函数
 64 | 	go mr.run(jobName, files, nreduce, func(phase jobPhase) {
 65 | 		switch phase {
 66 | 		case mapPhase:
 67 | 			for i, f := range mr.files {
 68 | 				doMap(mr.jobName, i, f, mr.nReduce, mapF)
 69 | 			}
 70 | 		case reducePhase:
 71 | 			for i := 0; i < mr.nReduce; i++ {
 72 | 				doReduce(mr.jobName, i, len(mr.files), reduceF)
 73 | 			}
 74 | 		}
 75 | 	}, func() {
 76 | 		mr.stats = []int{len(files) + nreduce}
 77 | 	})
 78 | 	return
 79 | }
 80 | 
 81 | // Distributed schedules map and reduce tasks on workers that register with the master over RPC.
 82 | // 将map和reduc任务分布到通过rpc注册到master的worker。
 83 | func Distributed(jobName string, files []string, nreduce int, master string) (mr *Master) {
 84 | 	mr = newMaster(master)
 85 | 	mr.startRPCServer()
 86 | 	go mr.run(jobName, files, nreduce, mr.schedule, func() {
 87 | 		mr.stats = mr.killWorkers()
 88 | 		mr.stopRPCServer()
 89 | 	})
 90 | 	return
 91 | }
 92 | 
 93 | // run executes a mapreduce job on the given number of mappers and reducers.
 94 | //
 95 | // First, it divides up the input file among the given number of mappers, and
 96 | // schedules each task on workers as they become available. Each map task bins
 97 | // its output in a number of bins equal to the given number of reduce tasks.
 98 | // Once all the mappers have finished, workers are assigned reduce tasks.
 99 | //
100 | // When all tasks have been completed, the reducer outputs are merged,
101 | // statistics are collected, and the master is shut down.
102 | //
103 | // Note that this implementation assumes a shared file system.
104 | 
105 | // 在指定的mapper和reducer数量上面执行mapreduce工作.
106 | // 首先,在指定数量的mapper上面分配输入文件，然后分配每个任务到可用的worker。每个map任务将它的输出
107 | // 放置在一些“箱子”, 数量等于给定的reduce任务的数量。一旦全部的mapper工作完成，worker开始安排reduce任务。
108 | //
109 | // 当全部的任务完成的时候,reducer的输出被合并,统计被收集，然后master关闭退出。
110 | //
111 | // 注意：实现假设在一个共享的文件系统之上。
112 | func (mr *Master) run(jobName string, files []string, nreduce int,
113 | 	schedule func(phase jobPhase),
114 | 	finish func(),
115 | ) {
116 | 	mr.jobName = jobName  	// job的名字
117 | 	mr.files = files	// 输入的文件
118 | 	mr.nReduce = nreduce    // reduce任务的数量限制
119 | 
120 | 	fmt.Printf("%s: Starting Map/Reduce task %s\n", mr.address, mr.jobName)
121 | 
122 | 	// 这两个函数都需要外面传入
123 | 	schedule(mapPhase)	// 安排map任务  schedule即master.go 64行传入的函数
124 | 	schedule(reducePhase)	// 安排reduce任务
125 | 	finish()		// 任务完成
126 | 
127 | 	mr.merge()              // 合并结果
128 | 
129 | 	fmt.Printf("%s: Map/Reduce task completed\n", mr.address)
130 | 
131 | 	mr.doneChannel <- true
132 | }
133 | 
134 | // Wait blocks until the currently scheduled work has completed.
135 | // This happens when all tasks have scheduled and completed, the final output
136 | // have been computed, and all workers have been shut down.
137 | func (mr *Master) Wait() {
138 | 	<-mr.doneChannel  // 等待run运行完成
139 | }
140 | 
141 | // killWorkers cleans up all workers by sending each one a Shutdown RPC.
142 | // It also collects and returns the number of tasks each worker has performed.
143 | func (mr *Master) killWorkers() []int {
144 | 	mr.Lock()
145 | 	defer mr.Unlock()
146 | 	ntasks := make([]int, 0, len(mr.workers))
147 | 	for _, w := range mr.workers {
148 | 		debug("Master: shutdown worker %s\n", w)
149 | 		var reply ShutdownReply
150 | 		ok := call(w, "Worker.Shutdown", new(struct{}), &reply)
151 | 		if ok == false {
152 | 			fmt.Printf("Master: RPC %s shutdown error\n", w)
153 | 		} else {
154 | 			ntasks = append(ntasks, reply.Ntasks)
155 | 		}
156 | 	}
157 | 	return ntasks
158 | }
159 | 


--------------------------------------------------------------------------------
/6.824/src/mapreduce/master_rpc.go:
--------------------------------------------------------------------------------
 1 | package mapreduce
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"log"
 6 | 	"net"
 7 | 	"net/rpc"
 8 | 	"os"
 9 | )
10 | 
11 | // Shutdown is an RPC method that shuts down the Master's RPC server.
12 | // shotdown是一个RPC方法，用于关闭master rpc服务器。
13 | func (mr *Master) Shutdown(_, _ *struct{}) error {
14 | 	debug("Shutdown: registration server\n")
15 | 	close(mr.shutdown)
16 | 	mr.l.Close() // causes the Accept to fail
17 | 	return nil
18 | }
19 | 
20 | // startRPCServer starts the Master's RPC server. It continues accepting RPC
21 | // calls (Register in particular) for as long as the worker is alive.
22 | // startServer是开启master rpc服务。不断的接受来自worker的rpc调用。
23 | func (mr *Master) startRPCServer() {
24 | 	rpcs := rpc.NewServer()
25 | 	rpcs.Register(mr) // 注册自己的方法
26 | 	os.Remove(mr.address) // only needed for "unix"
27 | 	l, e := net.Listen("unix", mr.address)  // unix套接字，套接字和文件绑定
28 | 	if e != nil {
29 | 		log.Fatal("RegstrationServer", mr.address, " error: ", e)
30 | 	}
31 | 	mr.l = l
32 | 
33 | 	// now that we are listening on the master address, can fork off
34 | 	// accepting connections to another thread.
35 | 	go func() {
36 | 	loop:
37 | 		for {
38 | 			select {
39 | 			case <-mr.shutdown: // close的时候退出
40 | 				break loop
41 | 			default:
42 | 			}
43 | 			conn, err := mr.l.Accept()
44 | 			if err == nil {
45 | 				go func() {
46 | 					rpcs.ServeConn(conn)
47 | 					conn.Close()
48 | 				}()
49 | 			} else {
50 | 				debug("RegistrationServer: accept error", err)
51 | 				break
52 | 			}
53 | 		}
54 | 		debug("RegistrationServer: done\n")
55 | 	}()
56 | }
57 | 
58 | // stopRPCServer stops the master RPC server.
59 | // This must be done through an RPC to avoid race conditions between the RPC
60 | // server thread and the current thread.
61 | // 关闭rpc 服务的方法。
62 | func (mr *Master) stopRPCServer() {
63 | 	var reply ShutdownReply
64 | 	ok := call(mr.address, "Master.Shutdown", new(struct{}), &reply)
65 | 	if ok == false {
66 | 		fmt.Printf("Cleanup: RPC %s error\n", mr.address)
67 | 	}
68 | 	debug("cleanupRegistration: done\n")
69 | }
70 | 


--------------------------------------------------------------------------------
/6.824/src/mapreduce/master_splitmerge.go:
--------------------------------------------------------------------------------
 1 | package mapreduce
 2 | 
 3 | import (
 4 | 	"bufio"
 5 | 	"encoding/json"
 6 | 	"fmt"
 7 | 	"log"
 8 | 	"os"
 9 | 	"sort"
10 | )
11 | 
12 | // merge combines the results of the many reduce jobs into a single output file
13 | // XXX use merge sort
14 | // 将reduce工作产生的结果合并成一个输出文件。
15 | func (mr *Master) merge() {
16 | 	debug("Merge phase")
17 | 	kvs := make(map[string]string)
18 | 	// nReduce个reduce任务，有nReduce个任务。
19 | 	for i := 0; i < mr.nReduce; i++ {
20 | 		p := mergeName(mr.jobName, i) 	// 获取输出文件，类似mrtmp.test-res-0
21 | 		fmt.Printf("Merge: read %s\n", p)
22 | 		file, err := os.Open(p)
23 | 		if err != nil {
24 | 			log.Fatal("Merge: ", err)
25 | 		}
26 | 		dec := json.NewDecoder(file) // json数据的流式读写
27 | 		for {
28 | 			var kv KeyValue
29 | 			err = dec.Decode(&kv)
30 | 			if err != nil {
31 | 				break
32 | 			}
33 | 			kvs[kv.Key] = kv.Value
34 | 		}
35 | 		file.Close()
36 | 	}
37 | 
38 | 	// key排序
39 | 	var keys []string
40 | 	for k := range kvs {
41 | 		keys = append(keys, k)
42 | 	}
43 | 	sort.Strings(keys)
44 | 
45 | 	// 将排序后的结果写入文件mrtmp.*
46 | 	file, err := os.Create("mrtmp." + mr.jobName)
47 | 	if err != nil {
48 | 		log.Fatal("Merge: create ", err)
49 | 	}
50 | 	w := bufio.NewWriter(file)
51 | 	for _, k := range keys {
52 | 		fmt.Fprintf(w, "%s: %s\n", k, kvs[k])
53 | 	}
54 | 	w.Flush()
55 | 	file.Close()
56 | }
57 | 
58 | // removeFile is a simple wrapper around os.Remove that logs errors.
59 | func removeFile(n string) {
60 | 	err := os.Remove(n)
61 | 	if err != nil {
62 | 		log.Fatal("CleanupFiles ", err)
63 | 	}
64 | }
65 | 
66 | // CleanupFiles removes all intermediate files produced by running mapreduce.
67 | // 删除mapreduce删除的全部中间文件。
68 | func (mr *Master) CleanupFiles() {
69 | 	for i := range mr.files {
70 | 		for j := 0; j < mr.nReduce; j++ {
71 | 			removeFile(reduceName(mr.jobName, i, j))
72 | 		}
73 | 	}
74 | 	for i := 0; i < mr.nReduce; i++ {
75 | 		removeFile(mergeName(mr.jobName, i))
76 | 	}
77 | 	removeFile("mrtmp." + mr.jobName)
78 | }
79 | 


--------------------------------------------------------------------------------
/6.824/src/mapreduce/readme.go:
--------------------------------------------------------------------------------
 1 | // Package mapreduce provides a simple mapreduce library with a sequential
 2 | // implementation. Applications should normally call Distributed() [located in
 3 | // master.go] to start a job, but may instead call Sequential() [also in
 4 | // master.go] to get a sequential execution for debugging purposes.
 5 | //
 6 | // The flow of the mapreduce implementation is as follows:
 7 | //
 8 | //   1. The application provides a number of input files, a map function, a
 9 | //      reduce function, and the number of reduce tasks (nReduce).
10 | //   2. A master is created with this knowledge. It spins up an RPC server (see
11 | //      master_rpc.go), and waits for workers to register (using the RPC call
12 | //      Register() [defined in master.go]). As tasks become available (in steps
13 | //      4 and 5), schedule() [schedule.go] decides how to assign those tasks to
14 | //      workers, and how to handle worker failures.
15 | //   3. The master considers each input file one map tasks, and makes a call to
16 | //      doMap() [common_map.go] at least once for each task. It does so either
17 | //      directly (when using Sequential()) or by issuing the DoJob RPC on a
18 | //      worker [worker.go]. Each call to doMap() reads the appropriate file,
19 | //      calls the map function on that file's contents, and produces nReduce
20 | //      files for each map file. Thus, there will be #files x nReduce files
21 | //      after all map tasks are done:
22 | //
23 | //          f0-0, ..., f0-0, f0-<nReduce-1>, ...,
24 | //          f<#files-1>-0, ... f<#files-1>-<nReduce-1>.
25 | //
26 | //   4. The master next makes a call to doReduce() [common_reduce.go] at least
27 | //      once for each reduce task. As for doMap(), it does so either directly or
28 | //      through a worker. doReduce() collects nReduce reduce files from each
29 | //      map (f-*-<reduce>), and runs the reduce function on those files. This
30 | //      produces nReduce result files.
31 | //   5. The master calls mr.merge() [master_splitmerge.go], which merges all
32 | //      the nReduce files produced by the previous step into a single output.
33 | //   6. The master sends a Shutdown RPC to each of its workers, and then shuts
34 | //      down its own RPC server.
35 | //
36 | // TODO:
37 | // You will have to write/modify doMap, doReduce, and schedule yourself. These
38 | // are located in common_map.go, common_reduce.go, and schedule.go
39 | // respectively. You will also have to write the map and reduce functions in
40 | // ../main/wc.go.
41 | //
42 | // You should not need to modify any other files, but reading them might be
43 | // useful in order to understand how the other methods fit into the overall
44 | // architecture of the system.
45 | package mapreduce
46 | 


--------------------------------------------------------------------------------
/6.824/src/mapreduce/schedule.go:
--------------------------------------------------------------------------------
 1 | package mapreduce
 2 | 
 3 | import "fmt"
 4 | 
 5 | // schedule starts and waits for all tasks in the given phase (Map or Reduce).
 6 | func (mr *Master) schedule(phase jobPhase) {
 7 | 	var ntasks int
 8 | 	var nios int // number of inputs (for reduce) or outputs (for map)
 9 | 	switch phase {
10 | 	case mapPhase:
11 | 		ntasks = len(mr.files)
12 | 		nios = mr.nReduce
13 | 	case reducePhase:
14 | 		ntasks = mr.nReduce
15 | 		nios = len(mr.files)
16 | 	}
17 | 
18 | 	fmt.Printf("Schedule: %v %v tasks (%d I/Os)\n", ntasks, phase, nios)
19 | 
20 | 	// All ntasks tasks have to be scheduled on workers, and only once all of
21 | 	// them have been completed successfully should the function return.
22 | 	// Remember that workers may fail, and that any given worker may finish
23 | 	// multiple tasks.
24 | 	//
25 | 	// TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO
26 | 	//
27 | 	fmt.Printf("Schedule: %v phase done\n", phase)
28 | }
29 | 


--------------------------------------------------------------------------------
/6.824/src/mapreduce/test_test.go:
--------------------------------------------------------------------------------
  1 | package mapreduce
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"testing"
  6 | 	"time"
  7 | 
  8 | 	"bufio"
  9 | 	"log"
 10 | 	"os"
 11 | 	"sort"
 12 | 	"strconv"
 13 | 	"strings"
 14 | )
 15 | 
 16 | const (
 17 | 	nNumber = 100000
 18 | 	nMap    = 100
 19 | 	nReduce = 50
 20 | )
 21 | 
 22 | // Create input file with N numbers
 23 | // Check if we have N numbers in output file
 24 | 
 25 | // Split in words
 26 | // 分割单词
 27 | func MapFunc(file string, value string) (res []KeyValue) {
 28 | 	words := strings.Fields(value) // 分隔空格隔开的单词
 29 | 	for _, w := range words {
 30 | 		kv := KeyValue{w, ""}
 31 | 		res = append(res, kv)
 32 | 	}
 33 | 	return
 34 | }
 35 | 
 36 | // Just return key
 37 | func ReduceFunc(key string, values []string) string {
 38 | 	for _, e := range values {
 39 | 		debug("Reduce %s %v\n", key, e)
 40 | 	}
 41 | 	return ""
 42 | }
 43 | 
 44 | // Checks input file agaist output file: each input number should show up
 45 | // in the output file in string sorted order
 46 | func check(t *testing.T, files []string) {
 47 | 	output, err := os.Open("mrtmp.test")
 48 | 	if err != nil {
 49 | 		log.Fatal("check: ", err)
 50 | 	}
 51 | 	defer output.Close()
 52 | 
 53 | 	var lines []string
 54 | 	for _, f := range files {
 55 | 		input, err := os.Open(f)
 56 | 		if err != nil {
 57 | 			log.Fatal("check: ", err)
 58 | 		}
 59 | 		defer input.Close()
 60 | 		inputScanner := bufio.NewScanner(input)
 61 | 		for inputScanner.Scan() {
 62 | 			lines = append(lines, inputScanner.Text())
 63 | 		}
 64 | 	}
 65 | 
 66 | 	sort.Strings(lines)
 67 | 
 68 | 	outputScanner := bufio.NewScanner(output)
 69 | 	i := 0
 70 | 	for outputScanner.Scan() {
 71 | 		var v1 int
 72 | 		var v2 int
 73 | 		text := outputScanner.Text()
 74 | 		n, err := fmt.Sscanf(lines[i], "%d", &v1)
 75 | 		if n == 1 && err == nil {
 76 | 			n, err = fmt.Sscanf(text, "%d", &v2)
 77 | 		}
 78 | 		if err != nil || v1 != v2 {
 79 | 			t.Fatalf("line %d: %d != %d err %v\n", i, v1, v2, err)
 80 | 		}
 81 | 		i++
 82 | 	}
 83 | 	if i != nNumber {
 84 | 		t.Fatalf("Expected %d lines in output\n", nNumber)
 85 | 	}
 86 | }
 87 | 
 88 | // Workers report back how many RPCs they have processed in the Shutdown reply.
 89 | // Check that they processed at least 1 RPC.
 90 | func checkWorker(t *testing.T, l []int) {
 91 | 	for _, tasks := range l {
 92 | 		if tasks == 0 {
 93 | 			t.Fatalf("Some worker didn't do any work\n")
 94 | 		}
 95 | 	}
 96 | }
 97 | 
 98 | // Make input file
 99 | func makeInputs(num int) []string {
100 | 	var names []string
101 | 	var i = 0
102 | 	for f := 0; f < num; f++ {
103 | 		names = append(names, fmt.Sprintf("824-mrinput-%d.txt", f))
104 | 		file, err := os.Create(names[f])
105 | 		if err != nil {
106 | 			log.Fatal("mkInput: ", err)
107 | 		}
108 | 		w := bufio.NewWriter(file)
109 | 		for i < (f+1)*(nNumber/num) {
110 | 			fmt.Fprintf(w, "%d\n", i)
111 | 			i++
112 | 		}
113 | 		w.Flush()
114 | 		file.Close()
115 | 	}
116 | 	return names
117 | }
118 | 
119 | // Cook up a unique-ish UNIX-domain socket name
120 | // in /var/tmp. can't use current directory since
121 | // AFS doesn't support UNIX-domain sockets.
122 | func port(suffix string) string {
123 | 	s := "/var/tmp/824-"
124 | 	s += strconv.Itoa(os.Getuid()) + "/"
125 | 	os.Mkdir(s, 0777)
126 | 	s += "mr"
127 | 	s += strconv.Itoa(os.Getpid()) + "-"
128 | 	s += suffix
129 | 	return s
130 | }
131 | 
132 | func setup() *Master {
133 | 	files := makeInputs(nMap)
134 | 	master := port("master")
135 | 	mr := Distributed("test", files, nReduce, master)
136 | 	return mr
137 | }
138 | 
139 | func cleanup(mr *Master) {
140 | 	mr.CleanupFiles()
141 | 	for _, f := range mr.files {
142 | 		removeFile(f)
143 | 	}
144 | }
145 | 
146 | func TestSequentialSingle(t *testing.T) {
147 | 	//mr := Sequential("test", makeInputs(1), 1, MapFunc, ReduceFunc)
148 | 	mr := Sequential("test", makeInputs(1), 3, MapFunc, ReduceFunc)
149 | 	mr.Wait()
150 | 	check(t, mr.files)
151 | 	checkWorker(t, mr.stats)
152 | 	cleanup(mr)
153 | }
154 | 
155 | func TestSequentialMany(t *testing.T) {
156 | 	mr := Sequential("test", makeInputs(5), 3, MapFunc, ReduceFunc)
157 | 	mr.Wait()
158 | 	check(t, mr.files)
159 | 	checkWorker(t, mr.stats)
160 | 	cleanup(mr)
161 | }
162 | 
163 | func TestBasic(t *testing.T) {
164 | 	mr := setup()
165 | 	for i := 0; i < 2; i++ {
166 | 		go RunWorker(mr.address, port("worker"+strconv.Itoa(i)),
167 | 			MapFunc, ReduceFunc, -1)
168 | 	}
169 | 	mr.Wait()
170 | 	check(t, mr.files)
171 | 	checkWorker(t, mr.stats)
172 | 	cleanup(mr)
173 | }
174 | 
175 | func TestOneFailure(t *testing.T) {
176 | 	mr := setup()
177 | 	// Start 2 workers that fail after 10 tasks
178 | 	go RunWorker(mr.address, port("worker"+strconv.Itoa(0)),
179 | 		MapFunc, ReduceFunc, 10)
180 | 	go RunWorker(mr.address, port("worker"+strconv.Itoa(1)),
181 | 		MapFunc, ReduceFunc, -1)
182 | 	mr.Wait()
183 | 	check(t, mr.files)
184 | 	checkWorker(t, mr.stats)
185 | 	cleanup(mr)
186 | }
187 | 
188 | func TestManyFailures(t *testing.T) {
189 | 	mr := setup()
190 | 	i := 0
191 | 	done := false
192 | 	for !done {
193 | 		select {
194 | 		case done = <-mr.doneChannel:
195 | 			check(t, mr.files)
196 | 			cleanup(mr)
197 | 			break
198 | 		default:
199 | 			// Start 2 workers each sec. The workers fail after 10 tasks
200 | 			w := port("worker" + strconv.Itoa(i))
201 | 			go RunWorker(mr.address, w, MapFunc, ReduceFunc, 10)
202 | 			i++
203 | 			w = port("worker" + strconv.Itoa(i))
204 | 			go RunWorker(mr.address, w, MapFunc, ReduceFunc, 10)
205 | 			i++
206 | 			time.Sleep(1 * time.Second)
207 | 		}
208 | 	}
209 | }
210 | 


--------------------------------------------------------------------------------
/6.824/src/mapreduce/worker.go:
--------------------------------------------------------------------------------
  1 | package mapreduce
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"log"
  6 | 	"net"
  7 | 	"net/rpc"
  8 | 	"os"
  9 | 	"sync"
 10 | )
 11 | 
 12 | // Worker holds the state for a server waiting for DoTask or Shutdown RPCs
 13 | type Worker struct {
 14 | 	sync.Mutex
 15 | 
 16 | 	name   string				// worker的名字
 17 | 	Map    func(string, string) []KeyValue	
 18 | 	Reduce func(string, []string) string
 19 | 	nRPC   int // protected by mutex
 20 | 	nTasks int // protected by mutex
 21 | 	l      net.Listener
 22 | }
 23 | 
 24 | // DoTask is called by the master when a new task is being scheduled on this worker.
 25 | func (wk *Worker) DoTask(arg *DoTaskArgs, _ *struct{}) error {
 26 | 	fmt.Printf("%s: given %v task #%d on file %s (nios: %d)\n",
 27 | 		wk.name, arg.Phase, arg.TaskNumber, arg.File, arg.NumOtherPhase)
 28 | 
 29 | 	switch arg.Phase {
 30 | 	case mapPhase:
 31 | 		doMap(arg.JobName, arg.TaskNumber, arg.File, arg.NumOtherPhase, wk.Map)
 32 | 	case reducePhase:
 33 | 		doReduce(arg.JobName, arg.TaskNumber, arg.NumOtherPhase, wk.Reduce)
 34 | 	}
 35 | 
 36 | 	fmt.Printf("%s: %v task #%d done\n", wk.name, arg.Phase, arg.TaskNumber)
 37 | 	return nil
 38 | }
 39 | 
 40 | // Shutdown is called by the master when all work has been completed.
 41 | // We should respond with the number of tasks we have processed.
 42 | func (wk *Worker) Shutdown(_ *struct{}, res *ShutdownReply) error {
 43 | 	debug("Shutdown %s\n", wk.name)
 44 | 	wk.Lock()
 45 | 	defer wk.Unlock()
 46 | 	res.Ntasks = wk.nTasks
 47 | 	wk.nRPC = 1
 48 | 	wk.nTasks-- // Don't count the shutdown RPC
 49 | 	return nil
 50 | }
 51 | 
 52 | // Tell the master we exist and ready to work
 53 | func (wk *Worker) register(master string) {
 54 | 	args := new(RegisterArgs)
 55 | 	args.Worker = wk.name
 56 | 	ok := call(master, "Master.Register", args, new(struct{}))
 57 | 	if ok == false {
 58 | 		fmt.Printf("Register: RPC %s register error\n", master)
 59 | 	}
 60 | }
 61 | 
 62 | // RunWorker sets up a connection with the master, registers its address, and
 63 | // waits for tasks to be scheduled.
 64 | func RunWorker(MasterAddress string, me string,
 65 | 	MapFunc func(string, string) []KeyValue,
 66 | 	ReduceFunc func(string, []string) string,
 67 | 	nRPC int,
 68 | ) {
 69 | 	debug("RunWorker %s\n", me)
 70 | 	wk := new(Worker)
 71 | 	wk.name = me
 72 | 	wk.Map = MapFunc
 73 | 	wk.Reduce = ReduceFunc
 74 | 	wk.nRPC = nRPC
 75 | 	rpcs := rpc.NewServer()
 76 | 	rpcs.Register(wk)
 77 | 	os.Remove(me) // only needed for "unix"
 78 | 	l, e := net.Listen("unix", me)
 79 | 	if e != nil {
 80 | 		log.Fatal("RunWorker: worker ", me, " error: ", e)
 81 | 	}
 82 | 	wk.l = l
 83 | 	wk.register(MasterAddress)
 84 | 
 85 | 	// DON'T MODIFY CODE BELOW
 86 | 	for {
 87 | 		wk.Lock()
 88 | 		if wk.nRPC == 0 {
 89 | 			wk.Unlock()
 90 | 			break
 91 | 		}
 92 | 		wk.Unlock()
 93 | 		conn, err := wk.l.Accept()
 94 | 		if err == nil {
 95 | 			wk.Lock()
 96 | 			wk.nRPC--
 97 | 			wk.Unlock()
 98 | 			go rpcs.ServeConn(conn)
 99 | 			wk.Lock()
100 | 			wk.nTasks++
101 | 			wk.Unlock()
102 | 		} else {
103 | 			break
104 | 		}
105 | 	}
106 | 	wk.l.Close()
107 | 	debug("RunWorker %s exit\n", me)
108 | }
109 | 


--------------------------------------------------------------------------------
/6.824/src/paxos-shardkv/client.go:
--------------------------------------------------------------------------------
  1 | package shardkv
  2 | 
  3 | import "shardmaster"
  4 | import "net/rpc"
  5 | import "time"
  6 | import "sync"
  7 | import "fmt"
  8 | import "crypto/rand"
  9 | import "math/big"
 10 | 
 11 | type Clerk struct {
 12 | 	mu     sync.Mutex // one RPC at a time
 13 | 	sm     *shardmaster.Clerk
 14 | 	config shardmaster.Config
 15 | 	// You'll have to modify Clerk.
 16 | }
 17 | 
 18 | func nrand() int64 {
 19 | 	max := big.NewInt(int64(1) << 62)
 20 | 	bigx, _ := rand.Int(rand.Reader, max)
 21 | 	x := bigx.Int64()
 22 | 	return x
 23 | }
 24 | 
 25 | func MakeClerk(shardmasters []string) *Clerk {
 26 | 	ck := new(Clerk)
 27 | 	ck.sm = shardmaster.MakeClerk(shardmasters)
 28 | 	// You'll have to modify MakeClerk.
 29 | 	return ck
 30 | }
 31 | 
 32 | //
 33 | // call() sends an RPC to the rpcname handler on server srv
 34 | // with arguments args, waits for the reply, and leaves the
 35 | // reply in reply. the reply argument should be a pointer
 36 | // to a reply structure.
 37 | //
 38 | // the return value is true if the server responded, and false
 39 | // if call() was not able to contact the server. in particular,
 40 | // the reply's contents are only valid if call() returned true.
 41 | //
 42 | // you should assume that call() will return an
 43 | // error after a while if the server is dead.
 44 | // don't provide your own time-out mechanism.
 45 | //
 46 | // please use call() to send all RPCs, in client.go and server.go.
 47 | // please don't change this function.
 48 | //
 49 | func call(srv string, rpcname string,
 50 | 	args interface{}, reply interface{}) bool {
 51 | 	c, errx := rpc.Dial("unix", srv)
 52 | 	if errx != nil {
 53 | 		return false
 54 | 	}
 55 | 	defer c.Close()
 56 | 
 57 | 	err := c.Call(rpcname, args, reply)
 58 | 	if err == nil {
 59 | 		return true
 60 | 	}
 61 | 
 62 | 	fmt.Println(err)
 63 | 	return false
 64 | }
 65 | 
 66 | //
 67 | // which shard is a key in?
 68 | // please use this function,
 69 | // and please do not change it.
 70 | //
 71 | func key2shard(key string) int {
 72 | 	shard := 0
 73 | 	if len(key) > 0 {
 74 | 		shard = int(key[0])
 75 | 	}
 76 | 	shard %= shardmaster.NShards
 77 | 	return shard
 78 | }
 79 | 
 80 | //
 81 | // fetch the current value for a key.
 82 | // returns "" if the key does not exist.
 83 | // keeps trying forever in the face of all other errors.
 84 | //
 85 | func (ck *Clerk) Get(key string) string {
 86 | 	ck.mu.Lock()
 87 | 	defer ck.mu.Unlock()
 88 | 
 89 | 	// You'll have to modify Get().
 90 | 
 91 | 	for {
 92 | 		shard := key2shard(key)
 93 | 
 94 | 		gid := ck.config.Shards[shard]
 95 | 
 96 | 		servers, ok := ck.config.Groups[gid]
 97 | 
 98 | 		if ok {
 99 | 			// try each server in the shard's replication group.
100 | 			for _, srv := range servers {
101 | 				args := &GetArgs{}
102 | 				args.Key = key
103 | 				var reply GetReply
104 | 				ok := call(srv, "ShardKV.Get", args, &reply)
105 | 				if ok && (reply.Err == OK || reply.Err == ErrNoKey) {
106 | 					return reply.Value
107 | 				}
108 | 				if ok && (reply.Err == ErrWrongGroup) {
109 | 					break
110 | 				}
111 | 			}
112 | 		}
113 | 
114 | 		time.Sleep(100 * time.Millisecond)
115 | 
116 | 		// ask master for a new configuration.
117 | 		ck.config = ck.sm.Query(-1)
118 | 	}
119 | }
120 | 
121 | // send a Put or Append request.
122 | func (ck *Clerk) PutAppend(key string, value string, op string) {
123 | 	ck.mu.Lock()
124 | 	defer ck.mu.Unlock()
125 | 
126 | 	// You'll have to modify PutAppend().
127 | 
128 | 	for {
129 | 		shard := key2shard(key)
130 | 
131 | 		gid := ck.config.Shards[shard]
132 | 
133 | 		servers, ok := ck.config.Groups[gid]
134 | 
135 | 		if ok {
136 | 			// try each server in the shard's replication group.
137 | 			for _, srv := range servers {
138 | 				args := &PutAppendArgs{}
139 | 				args.Key = key
140 | 				args.Value = value
141 | 				args.Op = op
142 | 				var reply PutAppendReply
143 | 				ok := call(srv, "ShardKV.PutAppend", args, &reply)
144 | 				if ok && reply.Err == OK {
145 | 					return
146 | 				}
147 | 				if ok && (reply.Err == ErrWrongGroup) {
148 | 					break
149 | 				}
150 | 			}
151 | 		}
152 | 
153 | 		time.Sleep(100 * time.Millisecond)
154 | 
155 | 		// ask master for a new configuration.
156 | 		ck.config = ck.sm.Query(-1)
157 | 	}
158 | }
159 | 
160 | func (ck *Clerk) Put(key string, value string) {
161 | 	ck.PutAppend(key, value, "Put")
162 | }
163 | func (ck *Clerk) Append(key string, value string) {
164 | 	ck.PutAppend(key, value, "Append")
165 | }
166 | 


--------------------------------------------------------------------------------
/6.824/src/paxos-shardkv/common.go:
--------------------------------------------------------------------------------
 1 | package shardkv
 2 | 
 3 | //
 4 | // Sharded key/value server.
 5 | // Lots of replica groups, each running op-at-a-time paxos.
 6 | // Shardmaster decides which group serves each shard.
 7 | // Shardmaster may change shard assignment from time to time.
 8 | //
 9 | // You will have to modify these definitions.
10 | //
11 | 
12 | const (
13 | 	OK            = "OK"
14 | 	ErrNoKey      = "ErrNoKey"
15 | 	ErrWrongGroup = "ErrWrongGroup"
16 | )
17 | 
18 | type Err string
19 | 
20 | type PutAppendArgs struct {
21 | 	Key   string
22 | 	Value string
23 | 	Op    string // "Put" or "Append"
24 | 	// You'll have to add definitions here.
25 | 	// Field names must start with capital letters,
26 | 	// otherwise RPC will break.
27 | 
28 | }
29 | 
30 | type PutAppendReply struct {
31 | 	Err Err
32 | }
33 | 
34 | type GetArgs struct {
35 | 	Key string
36 | 	// You'll have to add definitions here.
37 | }
38 | 
39 | type GetReply struct {
40 | 	Err   Err
41 | 	Value string
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/6.824/src/paxos-shardkv/server.go:
--------------------------------------------------------------------------------
  1 | package shardkv
  2 | 
  3 | import "net"
  4 | import "fmt"
  5 | import "net/rpc"
  6 | import "log"
  7 | import "time"
  8 | import "paxos"
  9 | import "sync"
 10 | import "sync/atomic"
 11 | import "os"
 12 | import "syscall"
 13 | import "encoding/gob"
 14 | import "math/rand"
 15 | import "shardmaster"
 16 | 
 17 | 
 18 | const Debug = 0
 19 | 
 20 | func DPrintf(format string, a ...interface{}) (n int, err error) {
 21 | 	if Debug > 0 {
 22 | 		log.Printf(format, a...)
 23 | 	}
 24 | 	return
 25 | }
 26 | 
 27 | 
 28 | type Op struct {
 29 | 	// Your definitions here.
 30 | }
 31 | 
 32 | 
 33 | type ShardKV struct {
 34 | 	mu         sync.Mutex
 35 | 	l          net.Listener
 36 | 	me         int
 37 | 	dead       int32 // for testing
 38 | 	unreliable int32 // for testing
 39 | 	sm         *shardmaster.Clerk
 40 | 	px         *paxos.Paxos
 41 | 
 42 | 	gid int64 // my replica group ID
 43 | 
 44 | 	// Your definitions here.
 45 | }
 46 | 
 47 | 
 48 | func (kv *ShardKV) Get(args *GetArgs, reply *GetReply) error {
 49 | 	// Your code here.
 50 | 	return nil
 51 | }
 52 | 
 53 | // RPC handler for client Put and Append requests
 54 | func (kv *ShardKV) PutAppend(args *PutAppendArgs, reply *PutAppendReply) error {
 55 | 	// Your code here.
 56 | 	return nil
 57 | }
 58 | 
 59 | //
 60 | // Ask the shardmaster if there's a new configuration;
 61 | // if so, re-configure.
 62 | //
 63 | func (kv *ShardKV) tick() {
 64 | }
 65 | 
 66 | // tell the server to shut itself down.
 67 | // please don't change these two functions.
 68 | func (kv *ShardKV) kill() {
 69 | 	atomic.StoreInt32(&kv.dead, 1)
 70 | 	kv.l.Close()
 71 | 	kv.px.Kill()
 72 | }
 73 | 
 74 | // call this to find out if the server is dead.
 75 | func (kv *ShardKV) isdead() bool {
 76 | 	return atomic.LoadInt32(&kv.dead) != 0
 77 | }
 78 | 
 79 | // please do not change these two functions.
 80 | func (kv *ShardKV) Setunreliable(what bool) {
 81 | 	if what {
 82 | 		atomic.StoreInt32(&kv.unreliable, 1)
 83 | 	} else {
 84 | 		atomic.StoreInt32(&kv.unreliable, 0)
 85 | 	}
 86 | }
 87 | 
 88 | func (kv *ShardKV) isunreliable() bool {
 89 | 	return atomic.LoadInt32(&kv.unreliable) != 0
 90 | }
 91 | 
 92 | //
 93 | // Start a shardkv server.
 94 | // gid is the ID of the server's replica group.
 95 | // shardmasters[] contains the ports of the
 96 | //   servers that implement the shardmaster.
 97 | // servers[] contains the ports of the servers
 98 | //   in this replica group.
 99 | // Me is the index of this server in servers[].
100 | //
101 | func StartServer(gid int64, shardmasters []string,
102 | 	servers []string, me int) *ShardKV {
103 | 	gob.Register(Op{})
104 | 
105 | 	kv := new(ShardKV)
106 | 	kv.me = me
107 | 	kv.gid = gid
108 | 	kv.sm = shardmaster.MakeClerk(shardmasters)
109 | 
110 | 	// Your initialization code here.
111 | 	// Don't call Join().
112 | 
113 | 	rpcs := rpc.NewServer()
114 | 	rpcs.Register(kv)
115 | 
116 | 	kv.px = paxos.Make(servers, me, rpcs)
117 | 
118 | 
119 | 	os.Remove(servers[me])
120 | 	l, e := net.Listen("unix", servers[me])
121 | 	if e != nil {
122 | 		log.Fatal("listen error: ", e)
123 | 	}
124 | 	kv.l = l
125 | 
126 | 	// please do not change any of the following code,
127 | 	// or do anything to subvert it.
128 | 
129 | 	go func() {
130 | 		for kv.isdead() == false {
131 | 			conn, err := kv.l.Accept()
132 | 			if err == nil && kv.isdead() == false {
133 | 				if kv.isunreliable() && (rand.Int63()%1000) < 100 {
134 | 					// discard the request.
135 | 					conn.Close()
136 | 				} else if kv.isunreliable() && (rand.Int63()%1000) < 200 {
137 | 					// process the request but force discard of reply.
138 | 					c1 := conn.(*net.UnixConn)
139 | 					f, _ := c1.File()
140 | 					err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR)
141 | 					if err != nil {
142 | 						fmt.Printf("shutdown: %v\n", err)
143 | 					}
144 | 					go rpcs.ServeConn(conn)
145 | 				} else {
146 | 					go rpcs.ServeConn(conn)
147 | 				}
148 | 			} else if err == nil {
149 | 				conn.Close()
150 | 			}
151 | 			if err != nil && kv.isdead() == false {
152 | 				fmt.Printf("ShardKV(%v) accept: %v\n", me, err.Error())
153 | 				kv.kill()
154 | 			}
155 | 		}
156 | 	}()
157 | 
158 | 	go func() {
159 | 		for kv.isdead() == false {
160 | 			kv.tick()
161 | 			time.Sleep(250 * time.Millisecond)
162 | 		}
163 | 	}()
164 | 
165 | 	return kv
166 | }
167 | 


--------------------------------------------------------------------------------
/6.824/src/paxos-shardkv/test_test.go:
--------------------------------------------------------------------------------
  1 | package shardkv
  2 | 
  3 | import "testing"
  4 | import "shardmaster"
  5 | import "runtime"
  6 | import "strconv"
  7 | import "os"
  8 | import "time"
  9 | import "fmt"
 10 | import "sync"
 11 | import "sync/atomic"
 12 | import "math/rand"
 13 | 
 14 | // information about the servers of one replica group.
 15 | type tGroup struct {
 16 | 	gid     int64
 17 | 	servers []*ShardKV
 18 | 	ports   []string
 19 | }
 20 | 
 21 | // information about all the servers of a k/v cluster.
 22 | type tCluster struct {
 23 | 	t           *testing.T
 24 | 	masters     []*shardmaster.ShardMaster
 25 | 	mck         *shardmaster.Clerk
 26 | 	masterports []string
 27 | 	groups      []*tGroup
 28 | }
 29 | 
 30 | func port(tag string, host int) string {
 31 | 	s := "/var/tmp/824-"
 32 | 	s += strconv.Itoa(os.Getuid()) + "/"
 33 | 	os.Mkdir(s, 0777)
 34 | 	s += "skv-"
 35 | 	s += strconv.Itoa(os.Getpid()) + "-"
 36 | 	s += tag + "-"
 37 | 	s += strconv.Itoa(host)
 38 | 	return s
 39 | }
 40 | 
 41 | //
 42 | // start a k/v replica server thread.
 43 | //
 44 | func (tc *tCluster) start1(gi int, si int, unreliable bool) {
 45 | 	s := StartServer(tc.groups[gi].gid, tc.masterports, tc.groups[gi].ports, si)
 46 | 	tc.groups[gi].servers[si] = s
 47 | 	s.Setunreliable(unreliable)
 48 | }
 49 | 
 50 | func (tc *tCluster) cleanup() {
 51 | 	for gi := 0; gi < len(tc.groups); gi++ {
 52 | 		g := tc.groups[gi]
 53 | 		for si := 0; si < len(g.servers); si++ {
 54 | 			if g.servers[si] != nil {
 55 | 				g.servers[si].kill()
 56 | 			}
 57 | 		}
 58 | 	}
 59 | 
 60 | 	for i := 0; i < len(tc.masters); i++ {
 61 | 		if tc.masters[i] != nil {
 62 | 			tc.masters[i].Kill()
 63 | 		}
 64 | 	}
 65 | }
 66 | 
 67 | func (tc *tCluster) shardclerk() *shardmaster.Clerk {
 68 | 	return shardmaster.MakeClerk(tc.masterports)
 69 | }
 70 | 
 71 | func (tc *tCluster) clerk() *Clerk {
 72 | 	return MakeClerk(tc.masterports)
 73 | }
 74 | 
 75 | func (tc *tCluster) join(gi int) {
 76 | 	tc.mck.Join(tc.groups[gi].gid, tc.groups[gi].ports)
 77 | }
 78 | 
 79 | func (tc *tCluster) leave(gi int) {
 80 | 	tc.mck.Leave(tc.groups[gi].gid)
 81 | }
 82 | 
 83 | func setup(t *testing.T, tag string, unreliable bool) *tCluster {
 84 | 	runtime.GOMAXPROCS(4)
 85 | 
 86 | 	const nmasters = 3
 87 | 	const ngroups = 3   // replica groups
 88 | 	const nreplicas = 3 // servers per group
 89 | 
 90 | 	tc := &tCluster{}
 91 | 	tc.t = t
 92 | 	tc.masters = make([]*shardmaster.ShardMaster, nmasters)
 93 | 	tc.masterports = make([]string, nmasters)
 94 | 
 95 | 	for i := 0; i < nmasters; i++ {
 96 | 		tc.masterports[i] = port(tag+"m", i)
 97 | 	}
 98 | 	for i := 0; i < nmasters; i++ {
 99 | 		tc.masters[i] = shardmaster.StartServer(tc.masterports, i)
100 | 	}
101 | 	tc.mck = tc.shardclerk()
102 | 
103 | 	tc.groups = make([]*tGroup, ngroups)
104 | 
105 | 	for i := 0; i < ngroups; i++ {
106 | 		tc.groups[i] = &tGroup{}
107 | 		tc.groups[i].gid = int64(i + 100)
108 | 		tc.groups[i].servers = make([]*ShardKV, nreplicas)
109 | 		tc.groups[i].ports = make([]string, nreplicas)
110 | 		for j := 0; j < nreplicas; j++ {
111 | 			tc.groups[i].ports[j] = port(tag+"s", (i*nreplicas)+j)
112 | 		}
113 | 		for j := 0; j < nreplicas; j++ {
114 | 			tc.start1(i, j, unreliable)
115 | 		}
116 | 	}
117 | 
118 | 	// return smh, gids, ha, sa, clean
119 | 	return tc
120 | }
121 | 
122 | func TestBasic(t *testing.T) {
123 | 	tc := setup(t, "basic", false)
124 | 	defer tc.cleanup()
125 | 
126 | 	fmt.Printf("Test: Basic Join/Leave ...\n")
127 | 
128 | 	tc.join(0)
129 | 
130 | 	ck := tc.clerk()
131 | 
132 | 	ck.Put("a", "x")
133 | 	ck.Append("a", "b")
134 | 	if ck.Get("a") != "xb" {
135 | 		t.Fatalf("Get got wrong value")
136 | 	}
137 | 
138 | 	keys := make([]string, 10)
139 | 	vals := make([]string, len(keys))
140 | 	for i := 0; i < len(keys); i++ {
141 | 		keys[i] = strconv.Itoa(rand.Int())
142 | 		vals[i] = strconv.Itoa(rand.Int())
143 | 		ck.Put(keys[i], vals[i])
144 | 	}
145 | 
146 | 	// are keys still there after joins?
147 | 	for g := 1; g < len(tc.groups); g++ {
148 | 		tc.join(g)
149 | 		time.Sleep(1 * time.Second)
150 | 		for i := 0; i < len(keys); i++ {
151 | 			v := ck.Get(keys[i])
152 | 			if v != vals[i] {
153 | 				t.Fatalf("joining; wrong value; g=%v k=%v wanted=%v got=%v",
154 | 					g, keys[i], vals[i], v)
155 | 			}
156 | 			vals[i] = strconv.Itoa(rand.Int())
157 | 			ck.Put(keys[i], vals[i])
158 | 		}
159 | 	}
160 | 
161 | 	// are keys still there after leaves?
162 | 	for g := 0; g < len(tc.groups)-1; g++ {
163 | 		tc.leave(g)
164 | 		time.Sleep(1 * time.Second)
165 | 		for i := 0; i < len(keys); i++ {
166 | 			v := ck.Get(keys[i])
167 | 			if v != vals[i] {
168 | 				t.Fatalf("leaving; wrong value; g=%v k=%v wanted=%v got=%v",
169 | 					g, keys[i], vals[i], v)
170 | 			}
171 | 			vals[i] = strconv.Itoa(rand.Int())
172 | 			ck.Put(keys[i], vals[i])
173 | 		}
174 | 	}
175 | 
176 | 	fmt.Printf("  ... Passed\n")
177 | }
178 | 
179 | func TestMove(t *testing.T) {
180 | 	tc := setup(t, "move", false)
181 | 	defer tc.cleanup()
182 | 
183 | 	fmt.Printf("Test: Shards really move ...\n")
184 | 
185 | 	tc.join(0)
186 | 
187 | 	ck := tc.clerk()
188 | 
189 | 	// insert one key per shard
190 | 	for i := 0; i < shardmaster.NShards; i++ {
191 | 		ck.Put(string('0'+i), string('0'+i))
192 | 	}
193 | 
194 | 	// add group 1.
195 | 	tc.join(1)
196 | 	time.Sleep(5 * time.Second)
197 | 
198 | 	// check that keys are still there.
199 | 	for i := 0; i < shardmaster.NShards; i++ {
200 | 		if ck.Get(string('0'+i)) != string('0'+i) {
201 | 			t.Fatalf("missing key/value")
202 | 		}
203 | 	}
204 | 
205 | 	// remove sockets from group 0.
206 | 	for _, port := range tc.groups[0].ports {
207 | 		os.Remove(port)
208 | 	}
209 | 
210 | 	count := int32(0)
211 | 	var mu sync.Mutex
212 | 	for i := 0; i < shardmaster.NShards; i++ {
213 | 		go func(me int) {
214 | 			myck := tc.clerk()
215 | 			v := myck.Get(string('0' + me))
216 | 			if v == string('0'+me) {
217 | 				mu.Lock()
218 | 				atomic.AddInt32(&count, 1)
219 | 				mu.Unlock()
220 | 			} else {
221 | 				t.Fatalf("Get(%v) yielded %v\n", me, v)
222 | 			}
223 | 		}(i)
224 | 	}
225 | 
226 | 	time.Sleep(10 * time.Second)
227 | 
228 | 	ccc := atomic.LoadInt32(&count)
229 | 	if ccc > shardmaster.NShards/3 && ccc < 2*(shardmaster.NShards/3) {
230 | 		fmt.Printf("  ... Passed\n")
231 | 	} else {
232 | 		t.Fatalf("%v keys worked after killing 1/2 of groups; wanted %v",
233 | 			ccc, shardmaster.NShards/2)
234 | 	}
235 | }
236 | 
237 | func TestLimp(t *testing.T) {
238 | 	tc := setup(t, "limp", false)
239 | 	defer tc.cleanup()
240 | 
241 | 	fmt.Printf("Test: Reconfiguration with some dead replicas ...\n")
242 | 
243 | 	tc.join(0)
244 | 
245 | 	ck := tc.clerk()
246 | 
247 | 	ck.Put("a", "b")
248 | 	if ck.Get("a") != "b" {
249 | 		t.Fatalf("got wrong value")
250 | 	}
251 | 
252 | 	// kill one server from each replica group.
253 | 	for gi := 0; gi < len(tc.groups); gi++ {
254 | 		sa := tc.groups[gi].servers
255 | 		ns := len(sa)
256 | 		sa[rand.Int()%ns].kill()
257 | 	}
258 | 
259 | 	keys := make([]string, 10)
260 | 	vals := make([]string, len(keys))
261 | 	for i := 0; i < len(keys); i++ {
262 | 		keys[i] = strconv.Itoa(rand.Int())
263 | 		vals[i] = strconv.Itoa(rand.Int())
264 | 		ck.Put(keys[i], vals[i])
265 | 	}
266 | 
267 | 	// are keys still there after joins?
268 | 	for g := 1; g < len(tc.groups); g++ {
269 | 		tc.join(g)
270 | 		time.Sleep(1 * time.Second)
271 | 		for i := 0; i < len(keys); i++ {
272 | 			v := ck.Get(keys[i])
273 | 			if v != vals[i] {
274 | 				t.Fatalf("joining; wrong value; g=%v k=%v wanted=%v got=%v",
275 | 					g, keys[i], vals[i], v)
276 | 			}
277 | 			vals[i] = strconv.Itoa(rand.Int())
278 | 			ck.Put(keys[i], vals[i])
279 | 		}
280 | 	}
281 | 
282 | 	// are keys still there after leaves?
283 | 	for gi := 0; gi < len(tc.groups)-1; gi++ {
284 | 		tc.leave(gi)
285 | 		time.Sleep(2 * time.Second)
286 | 		g := tc.groups[gi]
287 | 		for i := 0; i < len(g.servers); i++ {
288 | 			g.servers[i].kill()
289 | 		}
290 | 		for i := 0; i < len(keys); i++ {
291 | 			v := ck.Get(keys[i])
292 | 			if v != vals[i] {
293 | 				t.Fatalf("leaving; wrong value; g=%v k=%v wanted=%v got=%v",
294 | 					g, keys[i], vals[i], v)
295 | 			}
296 | 			vals[i] = strconv.Itoa(rand.Int())
297 | 			ck.Put(keys[i], vals[i])
298 | 		}
299 | 	}
300 | 
301 | 	fmt.Printf("  ... Passed\n")
302 | }
303 | 
304 | func doConcurrent(t *testing.T, unreliable bool) {
305 | 	tc := setup(t, "concurrent-"+strconv.FormatBool(unreliable), unreliable)
306 | 	defer tc.cleanup()
307 | 
308 | 	for i := 0; i < len(tc.groups); i++ {
309 | 		tc.join(i)
310 | 	}
311 | 
312 | 	const npara = 11
313 | 	var ca [npara]chan bool
314 | 	for i := 0; i < npara; i++ {
315 | 		ca[i] = make(chan bool)
316 | 		go func(me int) {
317 | 			ok := true
318 | 			defer func() { ca[me] <- ok }()
319 | 			ck := tc.clerk()
320 | 			mymck := tc.shardclerk()
321 | 			key := strconv.Itoa(me)
322 | 			last := ""
323 | 			for iters := 0; iters < 3; iters++ {
324 | 				nv := strconv.Itoa(rand.Int())
325 | 				ck.Append(key, nv)
326 | 				last = last + nv
327 | 				v := ck.Get(key)
328 | 				if v != last {
329 | 					ok = false
330 | 					t.Fatalf("Get(%v) expected %v got %v\n", key, last, v)
331 | 				}
332 | 
333 | 				gi := rand.Int() % len(tc.groups)
334 | 				gid := tc.groups[gi].gid
335 | 				mymck.Move(rand.Int()%shardmaster.NShards, gid)
336 | 
337 | 				time.Sleep(time.Duration(rand.Int()%30) * time.Millisecond)
338 | 			}
339 | 		}(i)
340 | 	}
341 | 
342 | 	for i := 0; i < npara; i++ {
343 | 		x := <-ca[i]
344 | 		if x == false {
345 | 			t.Fatalf("something is wrong")
346 | 		}
347 | 	}
348 | }
349 | 
350 | func TestConcurrent(t *testing.T) {
351 | 	fmt.Printf("Test: Concurrent Put/Get/Move ...\n")
352 | 	doConcurrent(t, false)
353 | 	fmt.Printf("  ... Passed\n")
354 | }
355 | 
356 | func TestConcurrentUnreliable(t *testing.T) {
357 | 	fmt.Printf("Test: Concurrent Put/Get/Move (unreliable) ...\n")
358 | 	doConcurrent(t, true)
359 | 	fmt.Printf("  ... Passed\n")
360 | }
361 | 


--------------------------------------------------------------------------------
/6.824/src/paxos-shardmaster/client.go:
--------------------------------------------------------------------------------
  1 | package shardmaster
  2 | 
  3 | //
  4 | // Shardmaster clerk.
  5 | // Please don't change this file.
  6 | //
  7 | 
  8 | import "net/rpc"
  9 | import "time"
 10 | import "fmt"
 11 | 
 12 | type Clerk struct {
 13 | 	servers []string // shardmaster replicas
 14 | }
 15 | 
 16 | func MakeClerk(servers []string) *Clerk {
 17 | 	ck := new(Clerk)
 18 | 	ck.servers = servers
 19 | 	return ck
 20 | }
 21 | 
 22 | //
 23 | // call() sends an RPC to the rpcname handler on server srv
 24 | // with arguments args, waits for the reply, and leaves the
 25 | // reply in reply. the reply argument should be a pointer
 26 | // to a reply structure.
 27 | //
 28 | // the return value is true if the server responded, and false
 29 | // if call() was not able to contact the server. in particular,
 30 | // the reply's contents are only valid if call() returned true.
 31 | //
 32 | // you should assume that call() will return an
 33 | // error after a while if the server is dead.
 34 | // don't provide your own time-out mechanism.
 35 | //
 36 | // please use call() to send all RPCs, in client.go and server.go.
 37 | // please don't change this function.
 38 | //
 39 | func call(srv string, rpcname string,
 40 | 	args interface{}, reply interface{}) bool {
 41 | 	c, errx := rpc.Dial("unix", srv)
 42 | 	if errx != nil {
 43 | 		return false
 44 | 	}
 45 | 	defer c.Close()
 46 | 
 47 | 	err := c.Call(rpcname, args, reply)
 48 | 	if err == nil {
 49 | 		return true
 50 | 	}
 51 | 
 52 | 	fmt.Println(err)
 53 | 	return false
 54 | }
 55 | 
 56 | func (ck *Clerk) Query(num int) Config {
 57 | 	for {
 58 | 		// try each known server.
 59 | 		for _, srv := range ck.servers {
 60 | 			args := &QueryArgs{}
 61 | 			args.Num = num
 62 | 			var reply QueryReply
 63 | 			ok := call(srv, "ShardMaster.Query", args, &reply)
 64 | 			if ok {
 65 | 				return reply.Config
 66 | 			}
 67 | 		}
 68 | 		time.Sleep(100 * time.Millisecond)
 69 | 	}
 70 | }
 71 | 
 72 | func (ck *Clerk) Join(gid int64, servers []string) {
 73 | 	for {
 74 | 		// try each known server.
 75 | 		for _, srv := range ck.servers {
 76 | 			args := &JoinArgs{}
 77 | 			args.GID = gid
 78 | 			args.Servers = servers
 79 | 			var reply JoinReply
 80 | 			ok := call(srv, "ShardMaster.Join", args, &reply)
 81 | 			if ok {
 82 | 				return
 83 | 			}
 84 | 		}
 85 | 		time.Sleep(100 * time.Millisecond)
 86 | 	}
 87 | }
 88 | 
 89 | func (ck *Clerk) Leave(gid int64) {
 90 | 	for {
 91 | 		// try each known server.
 92 | 		for _, srv := range ck.servers {
 93 | 			args := &LeaveArgs{}
 94 | 			args.GID = gid
 95 | 			var reply LeaveReply
 96 | 			ok := call(srv, "ShardMaster.Leave", args, &reply)
 97 | 			if ok {
 98 | 				return
 99 | 			}
100 | 		}
101 | 		time.Sleep(100 * time.Millisecond)
102 | 	}
103 | }
104 | 
105 | func (ck *Clerk) Move(shard int, gid int64) {
106 | 	for {
107 | 		// try each known server.
108 | 		for _, srv := range ck.servers {
109 | 			args := &MoveArgs{}
110 | 			args.Shard = shard
111 | 			args.GID = gid
112 | 			var reply MoveReply
113 | 			ok := call(srv, "ShardMaster.Move", args, &reply)
114 | 			if ok {
115 | 				return
116 | 			}
117 | 		}
118 | 		time.Sleep(100 * time.Millisecond)
119 | 	}
120 | }
121 | 


--------------------------------------------------------------------------------
/6.824/src/paxos-shardmaster/common.go:
--------------------------------------------------------------------------------
 1 | package shardmaster
 2 | 
 3 | //
 4 | // Master shard server: assigns shards to replication groups.
 5 | //
 6 | // RPC interface:
 7 | // Join(gid, servers) -- replica group gid is joining, give it some shards.
 8 | // Leave(gid) -- replica group gid is retiring, hand off all its shards.
 9 | // Move(shard, gid) -- hand off one shard from current owner to gid.
10 | // Query(num) -> fetch Config # num, or latest config if num==-1.
11 | //
12 | // A Config (configuration) describes a set of replica groups, and the
13 | // replica group responsible for each shard. Configs are numbered. Config
14 | // #0 is the initial configuration, with no groups and all shards
15 | // assigned to group 0 (the invalid group).
16 | //
17 | // A GID is a replica group ID. GIDs must be uniqe and > 0.
18 | // Once a GID joins, and leaves, it should never join again.
19 | //
20 | // Please don't change this file.
21 | //
22 | 
23 | const NShards = 10
24 | 
25 | type Config struct {
26 | 	Num    int                // config number
27 | 	Shards [NShards]int64     // shard -> gid
28 | 	Groups map[int64][]string // gid -> servers[]
29 | }
30 | 
31 | type JoinArgs struct {
32 | 	GID     int64    // unique replica group ID
33 | 	Servers []string // group server ports
34 | }
35 | 
36 | type JoinReply struct {
37 | }
38 | 
39 | type LeaveArgs struct {
40 | 	GID int64
41 | }
42 | 
43 | type LeaveReply struct {
44 | }
45 | 
46 | type MoveArgs struct {
47 | 	Shard int
48 | 	GID   int64
49 | }
50 | 
51 | type MoveReply struct {
52 | }
53 | 
54 | type QueryArgs struct {
55 | 	Num int // desired config number
56 | }
57 | 
58 | type QueryReply struct {
59 | 	Config Config
60 | }
61 | 


--------------------------------------------------------------------------------
/6.824/src/paxos-shardmaster/server.go:
--------------------------------------------------------------------------------
  1 | package shardmaster
  2 | 
  3 | import "net"
  4 | import "fmt"
  5 | import "net/rpc"
  6 | import "log"
  7 | 
  8 | import "paxos"
  9 | import "sync"
 10 | import "sync/atomic"
 11 | import "os"
 12 | import "syscall"
 13 | import "encoding/gob"
 14 | import "math/rand"
 15 | 
 16 | type ShardMaster struct {
 17 | 	mu         sync.Mutex
 18 | 	l          net.Listener
 19 | 	me         int
 20 | 	dead       int32 // for testing
 21 | 	unreliable int32 // for testing
 22 | 	px         *paxos.Paxos
 23 | 
 24 | 	configs []Config // indexed by config num
 25 | }
 26 | 
 27 | 
 28 | type Op struct {
 29 | 	// Your data here.
 30 | }
 31 | 
 32 | 
 33 | func (sm *ShardMaster) Join(args *JoinArgs, reply *JoinReply) error {
 34 | 	// Your code here.
 35 | 
 36 | 	return nil
 37 | }
 38 | 
 39 | func (sm *ShardMaster) Leave(args *LeaveArgs, reply *LeaveReply) error {
 40 | 	// Your code here.
 41 | 
 42 | 	return nil
 43 | }
 44 | 
 45 | func (sm *ShardMaster) Move(args *MoveArgs, reply *MoveReply) error {
 46 | 	// Your code here.
 47 | 
 48 | 	return nil
 49 | }
 50 | 
 51 | func (sm *ShardMaster) Query(args *QueryArgs, reply *QueryReply) error {
 52 | 	// Your code here.
 53 | 
 54 | 	return nil
 55 | }
 56 | 
 57 | // please don't change these two functions.
 58 | func (sm *ShardMaster) Kill() {
 59 | 	atomic.StoreInt32(&sm.dead, 1)
 60 | 	sm.l.Close()
 61 | 	sm.px.Kill()
 62 | }
 63 | 
 64 | // call this to find out if the server is dead.
 65 | func (sm *ShardMaster) isdead() bool {
 66 | 	return atomic.LoadInt32(&sm.dead) != 0
 67 | }
 68 | 
 69 | // please do not change these two functions.
 70 | func (sm *ShardMaster) setunreliable(what bool) {
 71 | 	if what {
 72 | 		atomic.StoreInt32(&sm.unreliable, 1)
 73 | 	} else {
 74 | 		atomic.StoreInt32(&sm.unreliable, 0)
 75 | 	}
 76 | }
 77 | 
 78 | func (sm *ShardMaster) isunreliable() bool {
 79 | 	return atomic.LoadInt32(&sm.unreliable) != 0
 80 | }
 81 | 
 82 | //
 83 | // servers[] contains the ports of the set of
 84 | // servers that will cooperate via Paxos to
 85 | // form the fault-tolerant shardmaster service.
 86 | // me is the index of the current server in servers[].
 87 | //
 88 | func StartServer(servers []string, me int) *ShardMaster {
 89 | 	sm := new(ShardMaster)
 90 | 	sm.me = me
 91 | 
 92 | 	sm.configs = make([]Config, 1)
 93 | 	sm.configs[0].Groups = map[int64][]string{}
 94 | 
 95 | 	rpcs := rpc.NewServer()
 96 | 
 97 | 	gob.Register(Op{})
 98 | 	rpcs.Register(sm)
 99 | 	sm.px = paxos.Make(servers, me, rpcs)
100 | 
101 | 	os.Remove(servers[me])
102 | 	l, e := net.Listen("unix", servers[me])
103 | 	if e != nil {
104 | 		log.Fatal("listen error: ", e)
105 | 	}
106 | 	sm.l = l
107 | 
108 | 	// please do not change any of the following code,
109 | 	// or do anything to subvert it.
110 | 
111 | 	go func() {
112 | 		for sm.isdead() == false {
113 | 			conn, err := sm.l.Accept()
114 | 			if err == nil && sm.isdead() == false {
115 | 				if sm.isunreliable() && (rand.Int63()%1000) < 100 {
116 | 					// discard the request.
117 | 					conn.Close()
118 | 				} else if sm.isunreliable() && (rand.Int63()%1000) < 200 {
119 | 					// process the request but force discard of reply.
120 | 					c1 := conn.(*net.UnixConn)
121 | 					f, _ := c1.File()
122 | 					err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR)
123 | 					if err != nil {
124 | 						fmt.Printf("shutdown: %v\n", err)
125 | 					}
126 | 					go rpcs.ServeConn(conn)
127 | 				} else {
128 | 					go rpcs.ServeConn(conn)
129 | 				}
130 | 			} else if err == nil {
131 | 				conn.Close()
132 | 			}
133 | 			if err != nil && sm.isdead() == false {
134 | 				fmt.Printf("ShardMaster(%v) accept: %v\n", me, err.Error())
135 | 				sm.Kill()
136 | 			}
137 | 		}
138 | 	}()
139 | 
140 | 	return sm
141 | }
142 | 


--------------------------------------------------------------------------------
/6.824/src/paxos/paxos.go:
--------------------------------------------------------------------------------
  1 | package paxos
  2 | 
  3 | //
  4 | // Paxos library, to be included in an application.
  5 | // Multiple applications will run, each including
  6 | // a Paxos peer.
  7 | //
  8 | // Manages a sequence of agreed-on values.
  9 | // The set of peers is fixed.
 10 | // Copes with network failures (partition, msg loss, &c).
 11 | // Does not store anything persistently, so cannot handle crash+restart.
 12 | //
 13 | // The application interface:
 14 | //
 15 | // px = paxos.Make(peers []string, me int)
 16 | // px.Start(seq int, v interface{}) -- start agreement on new instance
 17 | // px.Status(seq int) (Fate, v interface{}) -- get info about an instance
 18 | // px.Done(seq int) -- ok to forget all instances <= seq
 19 | // px.Max() int -- highest instance seq known, or -1
 20 | // px.Min() int -- instances before this seq have been forgotten
 21 | //
 22 | 
 23 | import "net"
 24 | import "net/rpc"
 25 | import "log"
 26 | 
 27 | import "os"
 28 | import "syscall"
 29 | import "sync"
 30 | import "sync/atomic"
 31 | import "fmt"
 32 | import "math/rand"
 33 | 
 34 | 
 35 | // px.Status() return values, indicating
 36 | // whether an agreement has been decided,
 37 | // or Paxos has not yet reached agreement,
 38 | // or it was agreed but forgotten (i.e. < Min()).
 39 | type Fate int
 40 | 
 41 | const (
 42 | 	Decided   Fate = iota + 1
 43 | 	Pending        // not yet decided.
 44 | 	Forgotten      // decided but forgotten.
 45 | )
 46 | 
 47 | type Paxos struct {
 48 | 	mu         sync.Mutex
 49 | 	l          net.Listener
 50 | 	dead       int32 // for testing
 51 | 	unreliable int32 // for testing
 52 | 	rpcCount   int32 // for testing
 53 | 	peers      []string
 54 | 	me         int // index into peers[]
 55 | 
 56 | 
 57 | 	// Your data here.
 58 | }
 59 | 
 60 | //
 61 | // call() sends an RPC to the rpcname handler on server srv
 62 | // with arguments args, waits for the reply, and leaves the
 63 | // reply in reply. the reply argument should be a pointer
 64 | // to a reply structure.
 65 | //
 66 | // the return value is true if the server responded, and false
 67 | // if call() was not able to contact the server. in particular,
 68 | // the replys contents are only valid if call() returned true.
 69 | //
 70 | // you should assume that call() will time out and return an
 71 | // error after a while if it does not get a reply from the server.
 72 | //
 73 | // please use call() to send all RPCs, in client.go and server.go.
 74 | // please do not change this function.
 75 | //
 76 | func call(srv string, name string, args interface{}, reply interface{}) bool {
 77 | 	c, err := rpc.Dial("unix", srv)
 78 | 	if err != nil {
 79 | 		err1 := err.(*net.OpError)
 80 | 		if err1.Err != syscall.ENOENT && err1.Err != syscall.ECONNREFUSED {
 81 | 			fmt.Printf("paxos Dial() failed: %v\n", err1)
 82 | 		}
 83 | 		return false
 84 | 	}
 85 | 	defer c.Close()
 86 | 
 87 | 	err = c.Call(name, args, reply)
 88 | 	if err == nil {
 89 | 		return true
 90 | 	}
 91 | 
 92 | 	fmt.Println(err)
 93 | 	return false
 94 | }
 95 | 
 96 | 
 97 | //
 98 | // the application wants paxos to start agreement on
 99 | // instance seq, with proposed value v.
100 | // Start() returns right away; the application will
101 | // call Status() to find out if/when agreement
102 | // is reached.
103 | //
104 | func (px *Paxos) Start(seq int, v interface{}) {
105 | 	// Your code here.
106 | }
107 | 
108 | //
109 | // the application on this machine is done with
110 | // all instances <= seq.
111 | //
112 | // see the comments for Min() for more explanation.
113 | //
114 | func (px *Paxos) Done(seq int) {
115 | 	// Your code here.
116 | }
117 | 
118 | //
119 | // the application wants to know the
120 | // highest instance sequence known to
121 | // this peer.
122 | //
123 | func (px *Paxos) Max() int {
124 | 	// Your code here.
125 | 	return 0
126 | }
127 | 
128 | //
129 | // Min() should return one more than the minimum among z_i,
130 | // where z_i is the highest number ever passed
131 | // to Done() on peer i. A peers z_i is -1 if it has
132 | // never called Done().
133 | //
134 | // Paxos is required to have forgotten all information
135 | // about any instances it knows that are < Min().
136 | // The point is to free up memory in long-running
137 | // Paxos-based servers.
138 | //
139 | // Paxos peers need to exchange their highest Done()
140 | // arguments in order to implement Min(). These
141 | // exchanges can be piggybacked on ordinary Paxos
142 | // agreement protocol messages, so it is OK if one
143 | // peers Min does not reflect another Peers Done()
144 | // until after the next instance is agreed to.
145 | //
146 | // The fact that Min() is defined as a minimum over
147 | // *all* Paxos peers means that Min() cannot increase until
148 | // all peers have been heard from. So if a peer is dead
149 | // or unreachable, other peers Min()s will not increase
150 | // even if all reachable peers call Done. The reason for
151 | // this is that when the unreachable peer comes back to
152 | // life, it will need to catch up on instances that it
153 | // missed -- the other peers therefor cannot forget these
154 | // instances.
155 | //
156 | func (px *Paxos) Min() int {
157 | 	// You code here.
158 | 	return 0
159 | }
160 | 
161 | //
162 | // the application wants to know whether this
163 | // peer thinks an instance has been decided,
164 | // and if so what the agreed value is. Status()
165 | // should just inspect the local peer state;
166 | // it should not contact other Paxos peers.
167 | //
168 | func (px *Paxos) Status(seq int) (Fate, interface{}) {
169 | 	// Your code here.
170 | 	return Pending, nil
171 | }
172 | 
173 | 
174 | 
175 | //
176 | // tell the peer to shut itself down.
177 | // for testing.
178 | // please do not change these two functions.
179 | //
180 | func (px *Paxos) Kill() {
181 | 	atomic.StoreInt32(&px.dead, 1)
182 | 	if px.l != nil {
183 | 		px.l.Close()
184 | 	}
185 | }
186 | 
187 | //
188 | // has this peer been asked to shut down?
189 | //
190 | func (px *Paxos) isdead() bool {
191 | 	return atomic.LoadInt32(&px.dead) != 0
192 | }
193 | 
194 | // please do not change these two functions.
195 | func (px *Paxos) setunreliable(what bool) {
196 | 	if what {
197 | 		atomic.StoreInt32(&px.unreliable, 1)
198 | 	} else {
199 | 		atomic.StoreInt32(&px.unreliable, 0)
200 | 	}
201 | }
202 | 
203 | func (px *Paxos) isunreliable() bool {
204 | 	return atomic.LoadInt32(&px.unreliable) != 0
205 | }
206 | 
207 | //
208 | // the application wants to create a paxos peer.
209 | // the ports of all the paxos peers (including this one)
210 | // are in peers[]. this servers port is peers[me].
211 | //
212 | func Make(peers []string, me int, rpcs *rpc.Server) *Paxos {
213 | 	px := &Paxos{}
214 | 	px.peers = peers
215 | 	px.me = me
216 | 
217 | 
218 | 	// Your initialization code here.
219 | 
220 | 	if rpcs != nil {
221 | 		// caller will create socket &c
222 | 		rpcs.Register(px)
223 | 	} else {
224 | 		rpcs = rpc.NewServer()
225 | 		rpcs.Register(px)
226 | 
227 | 		// prepare to receive connections from clients.
228 | 		// change "unix" to "tcp" to use over a network.
229 | 		os.Remove(peers[me]) // only needed for "unix"
230 | 		l, e := net.Listen("unix", peers[me])
231 | 		if e != nil {
232 | 			log.Fatal("listen error: ", e)
233 | 		}
234 | 		px.l = l
235 | 
236 | 		// please do not change any of the following code,
237 | 		// or do anything to subvert it.
238 | 
239 | 		// create a thread to accept RPC connections
240 | 		go func() {
241 | 			for px.isdead() == false {
242 | 				conn, err := px.l.Accept()
243 | 				if err == nil && px.isdead() == false {
244 | 					if px.isunreliable() && (rand.Int63()%1000) < 100 {
245 | 						// discard the request.
246 | 						conn.Close()
247 | 					} else if px.isunreliable() && (rand.Int63()%1000) < 200 {
248 | 						// process the request but force discard of reply.
249 | 						c1 := conn.(*net.UnixConn)
250 | 						f, _ := c1.File()
251 | 						err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR)
252 | 						if err != nil {
253 | 							fmt.Printf("shutdown: %v\n", err)
254 | 						}
255 | 						atomic.AddInt32(&px.rpcCount, 1)
256 | 						go rpcs.ServeConn(conn)
257 | 					} else {
258 | 						atomic.AddInt32(&px.rpcCount, 1)
259 | 						go rpcs.ServeConn(conn)
260 | 					}
261 | 				} else if err == nil {
262 | 					conn.Close()
263 | 				}
264 | 				if err != nil && px.isdead() == false {
265 | 					fmt.Printf("Paxos(%v) accept: %v\n", me, err.Error())
266 | 				}
267 | 			}
268 | 		}()
269 | 	}
270 | 
271 | 
272 | 	return px
273 | }
274 | 


--------------------------------------------------------------------------------
/6.824/src/pbservice/client.go:
--------------------------------------------------------------------------------
  1 | package pbservice
  2 | 
  3 | import "viewservice"
  4 | import "net/rpc"
  5 | import "fmt"
  6 | 
  7 | import "crypto/rand"
  8 | import "math/big"
  9 | 
 10 | 
 11 | type Clerk struct {
 12 | 	vs *viewservice.Clerk
 13 | 	// Your declarations here
 14 | }
 15 | 
 16 | // this may come in handy.
 17 | func nrand() int64 {
 18 | 	max := big.NewInt(int64(1) << 62)
 19 | 	bigx, _ := rand.Int(rand.Reader, max)
 20 | 	x := bigx.Int64()
 21 | 	return x
 22 | }
 23 | 
 24 | func MakeClerk(vshost string, me string) *Clerk {
 25 | 	ck := new(Clerk)
 26 | 	ck.vs = viewservice.MakeClerk(me, vshost)
 27 | 	// Your ck.* initializations here
 28 | 
 29 | 	return ck
 30 | }
 31 | 
 32 | 
 33 | //
 34 | // call() sends an RPC to the rpcname handler on server srv
 35 | // with arguments args, waits for the reply, and leaves the
 36 | // reply in reply. the reply argument should be a pointer
 37 | // to a reply structure.
 38 | //
 39 | // the return value is true if the server responded, and false
 40 | // if call() was not able to contact the server. in particular,
 41 | // the reply's contents are only valid if call() returned true.
 42 | //
 43 | // you should assume that call() will return an
 44 | // error after a while if the server is dead.
 45 | // don't provide your own time-out mechanism.
 46 | //
 47 | // please use call() to send all RPCs, in client.go and server.go.
 48 | // please don't change this function.
 49 | //
 50 | func call(srv string, rpcname string,
 51 | 	args interface{}, reply interface{}) bool {
 52 | 	c, errx := rpc.Dial("unix", srv)
 53 | 	if errx != nil {
 54 | 		return false
 55 | 	}
 56 | 	defer c.Close()
 57 | 
 58 | 	err := c.Call(rpcname, args, reply)
 59 | 	if err == nil {
 60 | 		return true
 61 | 	}
 62 | 
 63 | 	fmt.Println(err)
 64 | 	return false
 65 | }
 66 | 
 67 | //
 68 | // fetch a key's value from the current primary;
 69 | // if they key has never been set, return "".
 70 | // Get() must keep trying until it either the
 71 | // primary replies with the value or the primary
 72 | // says the key doesn't exist (has never been Put().
 73 | //
 74 | func (ck *Clerk) Get(key string) string {
 75 | 
 76 | 	// Your code here.
 77 | 
 78 | 	return "???"
 79 | }
 80 | 
 81 | //
 82 | // send a Put or Append RPC
 83 | //
 84 | func (ck *Clerk) PutAppend(key string, value string, op string) {
 85 | 
 86 | 	// Your code here.
 87 | }
 88 | 
 89 | //
 90 | // tell the primary to update key's value.
 91 | // must keep trying until it succeeds.
 92 | //
 93 | func (ck *Clerk) Put(key string, value string) {
 94 | 	ck.PutAppend(key, value, "Put")
 95 | }
 96 | 
 97 | //
 98 | // tell the primary to append to key's value.
 99 | // must keep trying until it succeeds.
100 | //
101 | func (ck *Clerk) Append(key string, value string) {
102 | 	ck.PutAppend(key, value, "Append")
103 | }
104 | 


--------------------------------------------------------------------------------
/6.824/src/pbservice/common.go:
--------------------------------------------------------------------------------
 1 | package pbservice
 2 | 
 3 | const (
 4 | 	OK             = "OK"
 5 | 	ErrNoKey       = "ErrNoKey"
 6 | 	ErrWrongServer = "ErrWrongServer"
 7 | )
 8 | 
 9 | type Err string
10 | 
11 | // Put or Append
12 | type PutAppendArgs struct {
13 | 	Key   string
14 | 	Value string
15 | 	// You'll have to add definitions here.
16 | 
17 | 	// Field names must start with capital letters,
18 | 	// otherwise RPC will break.
19 | }
20 | 
21 | type PutAppendReply struct {
22 | 	Err Err
23 | }
24 | 
25 | type GetArgs struct {
26 | 	Key string
27 | 	// You'll have to add definitions here.
28 | }
29 | 
30 | type GetReply struct {
31 | 	Err   Err
32 | 	Value string
33 | }
34 | 
35 | 
36 | // Your RPC definitions here.
37 | 


--------------------------------------------------------------------------------
/6.824/src/pbservice/server.go:
--------------------------------------------------------------------------------
  1 | package pbservice
  2 | 
  3 | import "net"
  4 | import "fmt"
  5 | import "net/rpc"
  6 | import "log"
  7 | import "time"
  8 | import "viewservice"
  9 | import "sync"
 10 | import "sync/atomic"
 11 | import "os"
 12 | import "syscall"
 13 | import "math/rand"
 14 | 
 15 | 
 16 | 
 17 | type PBServer struct {
 18 | 	mu         sync.Mutex
 19 | 	l          net.Listener
 20 | 	dead       int32 // for testing
 21 | 	unreliable int32 // for testing
 22 | 	me         string
 23 | 	vs         *viewservice.Clerk
 24 | 	// Your declarations here.
 25 | }
 26 | 
 27 | 
 28 | func (pb *PBServer) Get(args *GetArgs, reply *GetReply) error {
 29 | 
 30 | 	// Your code here.
 31 | 
 32 | 	return nil
 33 | }
 34 | 
 35 | 
 36 | func (pb *PBServer) PutAppend(args *PutAppendArgs, reply *PutAppendReply) error {
 37 | 
 38 | 	// Your code here.
 39 | 
 40 | 
 41 | 	return nil
 42 | }
 43 | 
 44 | 
 45 | //
 46 | // ping the viewserver periodically.
 47 | // if view changed:
 48 | //   transition to new view.
 49 | //   manage transfer of state from primary to new backup.
 50 | //
 51 | func (pb *PBServer) tick() {
 52 | 
 53 | 	// Your code here.
 54 | }
 55 | 
 56 | // tell the server to shut itself down.
 57 | // please do not change these two functions.
 58 | func (pb *PBServer) kill() {
 59 | 	atomic.StoreInt32(&pb.dead, 1)
 60 | 	pb.l.Close()
 61 | }
 62 | 
 63 | // call this to find out if the server is dead.
 64 | func (pb *PBServer) isdead() bool {
 65 | 	return atomic.LoadInt32(&pb.dead) != 0
 66 | }
 67 | 
 68 | // please do not change these two functions.
 69 | func (pb *PBServer) setunreliable(what bool) {
 70 | 	if what {
 71 | 		atomic.StoreInt32(&pb.unreliable, 1)
 72 | 	} else {
 73 | 		atomic.StoreInt32(&pb.unreliable, 0)
 74 | 	}
 75 | }
 76 | 
 77 | func (pb *PBServer) isunreliable() bool {
 78 | 	return atomic.LoadInt32(&pb.unreliable) != 0
 79 | }
 80 | 
 81 | 
 82 | func StartServer(vshost string, me string) *PBServer {
 83 | 	pb := new(PBServer)
 84 | 	pb.me = me
 85 | 	pb.vs = viewservice.MakeClerk(me, vshost)
 86 | 	// Your pb.* initializations here.
 87 | 
 88 | 	rpcs := rpc.NewServer()
 89 | 	rpcs.Register(pb)
 90 | 
 91 | 	os.Remove(pb.me)
 92 | 	l, e := net.Listen("unix", pb.me)
 93 | 	if e != nil {
 94 | 		log.Fatal("listen error: ", e)
 95 | 	}
 96 | 	pb.l = l
 97 | 
 98 | 	// please do not change any of the following code,
 99 | 	// or do anything to subvert it.
100 | 
101 | 	go func() {
102 | 		for pb.isdead() == false {
103 | 			conn, err := pb.l.Accept()
104 | 			if err == nil && pb.isdead() == false {
105 | 				if pb.isunreliable() && (rand.Int63()%1000) < 100 {
106 | 					// discard the request.
107 | 					conn.Close()
108 | 				} else if pb.isunreliable() && (rand.Int63()%1000) < 200 {
109 | 					// process the request but force discard of reply.
110 | 					c1 := conn.(*net.UnixConn)
111 | 					f, _ := c1.File()
112 | 					err := syscall.Shutdown(int(f.Fd()), syscall.SHUT_WR)
113 | 					if err != nil {
114 | 						fmt.Printf("shutdown: %v\n", err)
115 | 					}
116 | 					go rpcs.ServeConn(conn)
117 | 				} else {
118 | 					go rpcs.ServeConn(conn)
119 | 				}
120 | 			} else if err == nil {
121 | 				conn.Close()
122 | 			}
123 | 			if err != nil && pb.isdead() == false {
124 | 				fmt.Printf("PBServer(%v) accept: %v\n", me, err.Error())
125 | 				pb.kill()
126 | 			}
127 | 		}
128 | 	}()
129 | 
130 | 	go func() {
131 | 		for pb.isdead() == false {
132 | 			pb.tick()
133 | 			time.Sleep(viewservice.PingInterval)
134 | 		}
135 | 	}()
136 | 
137 | 	return pb
138 | }
139 | 


--------------------------------------------------------------------------------
/6.824/src/raft/persister.go:
--------------------------------------------------------------------------------
 1 | package raft
 2 | 
 3 | //
 4 | // support for Raft and kvraft to save persistent
 5 | // Raft state (log &c) and k/v server snapshots.
 6 | //
 7 | // we will use the original persister.go to test your code for grading.
 8 | // so, while you can modify this code to help you debug, please
 9 | // test with the original before submitting.
10 | //
11 | 
12 | import "sync"
13 | 
14 | // 持久化对象
15 | type Persister struct {
16 | 	mu        sync.Mutex	// 锁保护
17 | 	raftstate []byte 	// Raft状态值
18 | 	snapshot  []byte	// 快照数据
19 | }
20 | 
21 | // 创建
22 | func MakePersister() *Persister {
23 | 	return &Persister{}
24 | }
25 | 
26 | // 拷贝持久化对象
27 | func (ps *Persister) Copy() *Persister {
28 | 	ps.mu.Lock()
29 | 	defer ps.mu.Unlock()
30 | 	np := MakePersister()
31 | 	// 居然是浅拷贝,数据变化相互影响
32 | 	np.raftstate = ps.raftstate
33 | 	np.snapshot = ps.snapshot
34 | 	return np
35 | }
36 | 
37 | // 保存数据到持久化对象
38 | func (ps *Persister) SaveRaftState(data []byte) {
39 | 	ps.mu.Lock()
40 | 	defer ps.mu.Unlock()
41 | 	ps.raftstate = data
42 | }
43 | 
44 | // 获取持久化数据
45 | func (ps *Persister) ReadRaftState() []byte {
46 | 	ps.mu.Lock()
47 | 	defer ps.mu.Unlock()
48 | 	return ps.raftstate
49 | }
50 | 
51 | // 获取Raft状态数据的大小
52 | func (ps *Persister) RaftStateSize() int {
53 | 	ps.mu.Lock()
54 | 	defer ps.mu.Unlock()
55 | 	return len(ps.raftstate)
56 | }
57 | 
58 | // 保存快照数据
59 | func (ps *Persister) SaveSnapshot(snapshot []byte) {
60 | 	ps.mu.Lock()
61 | 	defer ps.mu.Unlock()
62 | 	ps.snapshot = snapshot
63 | }
64 | 
65 | // 获取快照数据
66 | func (ps *Persister) ReadSnapshot() []byte {
67 | 	ps.mu.Lock()
68 | 	defer ps.mu.Unlock()
69 | 	return ps.snapshot
70 | }
71 | 


--------------------------------------------------------------------------------
/6.824/src/raft/raft.go:
--------------------------------------------------------------------------------
  1 | package raft
  2 | 
  3 | //
  4 | // this is an outline of the API that raft must expose to
  5 | // the service (or tester). see comments below for
  6 | // each of these functions for more details.
  7 | //
  8 | // rf = Make(...)
  9 | //   create a new Raft server.
 10 | // rf.Start(command interface{}) (index, term, isleader)
 11 | //   start agreement on a new log entry
 12 | // rf.GetState() (term, isLeader)
 13 | //   ask a Raft for its current term, and whether it thinks it is leader
 14 | // ApplyMsg
 15 | //   each time a new entry is committed to the log, each Raft peer
 16 | //   should send an ApplyMsg to the service (or tester)
 17 | //   in the same server.
 18 | //
 19 | 
 20 | import "sync"
 21 | import "labrpc"
 22 | 
 23 | // import "bytes"
 24 | // import "encoding/gob"
 25 | 
 26 | 
 27 | 
 28 | //
 29 | // as each Raft peer becomes aware that successive log entries are
 30 | // committed, the peer should send an ApplyMsg to the service (or
 31 | // tester) on the same server, via the applyCh passed to Make().
 32 | //
 33 | type ApplyMsg struct {
 34 | 	Index       int
 35 | 	Command     interface{}
 36 | 	UseSnapshot bool   // ignore for lab2; only used in lab3
 37 | 	Snapshot    []byte // ignore for lab2; only used in lab3
 38 | }
 39 | 
 40 | //
 41 | // A Go object implementing a single Raft peer.
 42 | //
 43 | type Raft struct {
 44 | 	mu        sync.Mutex
 45 | 	peers     []*labrpc.ClientEnd
 46 | 	persister *Persister
 47 | 	me        int // index into peers[]
 48 | 
 49 | 	// Your data here.
 50 | 	// Look at the paper's Figure 2 for a description of what
 51 | 	// state a Raft server must maintain.
 52 | 	// 查看论文的图2部分，可知
 53 | 
 54 | 	/*
 55 | 	 * 全部服务器上面的可持久化状态:
 56 | 	 *  currentTerm 	服务器看到的最近Term(第一次启动的时候为0,后面单调递增)
 57 | 	 *  votedFor     	当前Term收到的投票候选 (如果没有就为null)
 58 | 	 *  log[]        	日志项; 每个日志项包含机器状态和被leader接收的Term(first index is 1)
 59 |          */
 60 | 	//  删除代码部分
 61 | 	/*
 62 | 	 * 全部服务器上面的不稳定状态:
 63 | 	 *	commitIndex 	已经被提交的最新的日志索引(第一次为0,后面单调递增)
 64 | 	 *	lastApplied      已经应用到服务器状态的最新的日志索引(第一次为0,后面单调递增)
 65 | 	*/
 66 | 	//  删除代码部分
 67 | 
 68 | 	/*
 69 | 	 * leader上面使用的不稳定状态（完成选举之后需要重新初始化）
 70 | 	 *	nextIndex[]	
 71 | 	 *
 72 | 	 *
 73 | 	*/
 74 | 
 75 | }
 76 | 
 77 | // return currentTerm and whether this server
 78 | // believes it is the leader.
 79 | func (rf *Raft) GetState() (int, bool) {
 80 | 	var term int
 81 | 	var isleader bool
 82 | 	// Your code here.
 83 | 	return term, isleader
 84 | }
 85 | 
 86 | //
 87 | // save Raft's persistent state to stable storage,
 88 | // where it can later be retrieved after a crash and restart.
 89 | // see paper's Figure 2 for a description of what should be persistent.
 90 | //
 91 | func (rf *Raft) persist() {
 92 | 	// Your code here.
 93 | 	// Example:
 94 | 	// w := new(bytes.Buffer)
 95 | 	// e := gob.NewEncoder(w)
 96 | 	// e.Encode(rf.xxx)
 97 | 	// e.Encode(rf.yyy)
 98 | 	// data := w.Bytes()
 99 | 	// rf.persister.SaveRaftState(data)
100 | }
101 | 
102 | //
103 | // restore previously persisted state.
104 | //
105 | func (rf *Raft) readPersist(data []byte) {
106 | 	// Your code here.
107 | 	// Example:
108 | 	// r := bytes.NewBuffer(data)
109 | 	// d := gob.NewDecoder(r)
110 | 	// d.Decode(&rf.xxx)
111 | 	// d.Decode(&rf.yyy)
112 | }
113 | 
114 | 
115 | 
116 | 
117 | //
118 | // example RequestVote RPC arguments structure.
119 | //
120 | type RequestVoteArgs struct {
121 | 	// Your data here.
122 | }
123 | 
124 | //
125 | // example RequestVote RPC reply structure.
126 | //
127 | type RequestVoteReply struct {
128 | 	// Your data here.
129 | }
130 | 
131 | //
132 | // example RequestVote RPC handler.
133 | //
134 | func (rf *Raft) RequestVote(args RequestVoteArgs, reply *RequestVoteReply) {
135 | 	// Your code here.
136 | }
137 | 
138 | //
139 | // example code to send a RequestVote RPC to a server.
140 | // server is the index of the target server in rf.peers[].
141 | // expects RPC arguments in args.
142 | // fills in *reply with RPC reply, so caller should
143 | // pass &reply.
144 | // the types of the args and reply passed to Call() must be
145 | // the same as the types of the arguments declared in the
146 | // handler function (including whether they are pointers).
147 | //
148 | // returns true if labrpc says the RPC was delivered.
149 | //
150 | // if you're having trouble getting RPC to work, check that you've
151 | // capitalized all field names in structs passed over RPC, and
152 | // that the caller passes the address of the reply struct with &, not
153 | // the struct itself.
154 | //
155 | func (rf *Raft) sendRequestVote(server int, args RequestVoteArgs, reply *RequestVoteReply) bool {
156 | 	ok := rf.peers[server].Call("Raft.RequestVote", args, reply)
157 | 	return ok
158 | }
159 | 
160 | 
161 | //
162 | // the service using Raft (e.g. a k/v server) wants to start
163 | // agreement on the next command to be appended to Raft's log. if this
164 | // server isn't the leader, returns false. otherwise start the
165 | // agreement and return immediately. there is no guarantee that this
166 | // command will ever be committed to the Raft log, since the leader
167 | // may fail or lose an election.
168 | //
169 | // the first return value is the index that the command will appear at
170 | // if it's ever committed. the second return value is the current
171 | // term. the third return value is true if this server believes it is
172 | // the leader.
173 | //
174 | func (rf *Raft) Start(command interface{}) (int, int, bool) {
175 | 	index := -1
176 | 	term := -1
177 | 	isLeader := true
178 | 
179 | 
180 | 	return index, term, isLeader
181 | }
182 | 
183 | //
184 | // the tester calls Kill() when a Raft instance won't
185 | // be needed again. you are not required to do anything
186 | // in Kill(), but it might be convenient to (for example)
187 | // turn off debug output from this instance.
188 | //
189 | func (rf *Raft) Kill() {
190 | 	// Your code here, if desired.
191 | }
192 | 
193 | //
194 | // the service or tester wants to create a Raft server. the ports
195 | // of all the Raft servers (including this one) are in peers[]. this
196 | // server's port is peers[me]. all the servers' peers[] arrays
197 | // have the same order. persister is a place for this server to
198 | // save its persistent state, and also initially holds the most
199 | // recent saved state, if any. applyCh is a channel on which the
200 | // tester or service expects Raft to send ApplyMsg messages.
201 | // Make() must return quickly, so it should start goroutines
202 | // for any long-running work.
203 | //
204 | // 建一个Raft端点。
205 | // peers参数是通往其他Raft端点处于连接状态下的RPC连接。
206 | // me参数是自己在端点数组中的索引。
207 | func Make(peers []*labrpc.ClientEnd, me int,
208 | 	persister *Persister, applyCh chan ApplyMsg) *Raft {
209 | 	rf := &Raft{}
210 | 	rf.peers = peers
211 | 	rf.persister = persister
212 | 	rf.me = me
213 | 
214 | 	// Your initialization code here.
215 | 
216 | 	// initialize from state persisted before a crash
217 | 	rf.readPersist(persister.ReadRaftState())
218 | 
219 | 
220 | 	return rf
221 | }
222 | 


--------------------------------------------------------------------------------
/6.824/src/raft/util.go:
--------------------------------------------------------------------------------
 1 | package raft
 2 | 
 3 | import "log"
 4 | 
 5 | // Debugging
 6 | const Debug = 0
 7 | 
 8 | func DPrintf(format string, a ...interface{}) (n int, err error) {
 9 | 	if Debug > 0 {
10 | 		log.Printf(format, a...)
11 | 	}
12 | 	return
13 | }
14 | 


--------------------------------------------------------------------------------
/6.824/src/shardkv/client.go:
--------------------------------------------------------------------------------
  1 | package shardkv
  2 | 
  3 | //
  4 | // client code to talk to a sharded key/value service.
  5 | //
  6 | // the client first talks to the shardmaster to find out
  7 | // the assignment of shards (keys) to groups, and then
  8 | // talks to the group that holds the key's shard.
  9 | //
 10 | 
 11 | import "labrpc"
 12 | import "crypto/rand"
 13 | import "math/big"
 14 | import "shardmaster"
 15 | import "time"
 16 | 
 17 | //
 18 | // which shard is a key in?
 19 | // please use this function,
 20 | // and please do not change it.
 21 | //
 22 | func key2shard(key string) int {
 23 | 	shard := 0
 24 | 	if len(key) > 0 {
 25 | 		shard = int(key[0])
 26 | 	}
 27 | 	shard %= shardmaster.NShards
 28 | 	return shard
 29 | }
 30 | 
 31 | func nrand() int64 {
 32 | 	max := big.NewInt(int64(1) << 62)
 33 | 	bigx, _ := rand.Int(rand.Reader, max)
 34 | 	x := bigx.Int64()
 35 | 	return x
 36 | }
 37 | 
 38 | type Clerk struct {
 39 | 	sm       *shardmaster.Clerk
 40 | 	config   shardmaster.Config
 41 | 	make_end func(string) *labrpc.ClientEnd
 42 | 	// You will have to modify this struct.
 43 | }
 44 | 
 45 | //
 46 | // the tester calls MakeClerk.
 47 | //
 48 | // masters[] is needed to call shardmaster.MakeClerk().
 49 | //
 50 | // make_end(servername) turns a server name from a
 51 | // Config.Groups[gid][i] into a labrpc.ClientEnd on which you can
 52 | // send RPCs.
 53 | //
 54 | func MakeClerk(masters []*labrpc.ClientEnd, make_end func(string) *labrpc.ClientEnd) *Clerk {
 55 | 	ck := new(Clerk)
 56 | 	ck.sm = shardmaster.MakeClerk(masters)
 57 | 	ck.make_end = make_end
 58 | 	// You'll have to add code here.
 59 | 	return ck
 60 | }
 61 | 
 62 | //
 63 | // fetch the current value for a key.
 64 | // returns "" if the key does not exist.
 65 | // keeps trying forever in the face of all other errors.
 66 | // You will have to modify this function.
 67 | //
 68 | func (ck *Clerk) Get(key string) string {
 69 | 	args := GetArgs{}
 70 | 	args.Key = key
 71 | 
 72 | 	for {
 73 | 		shard := key2shard(key)
 74 | 		gid := ck.config.Shards[shard]
 75 | 		if servers, ok := ck.config.Groups[gid]; ok {
 76 | 			// try each server for the shard.
 77 | 			for si := 0; si < len(servers); si++ {
 78 | 				srv := ck.make_end(servers[si])
 79 | 				var reply GetReply
 80 | 				ok := srv.Call("ShardKV.Get", &args, &reply)
 81 | 				if ok && reply.WrongLeader == false && (reply.Err == OK || reply.Err == ErrNoKey) {
 82 | 					return reply.Value
 83 | 				}
 84 | 				if ok && (reply.Err == ErrWrongGroup) {
 85 | 					break
 86 | 				}
 87 | 			}
 88 | 		}
 89 | 		time.Sleep(100 * time.Millisecond)
 90 | 		// ask master for the latest configuration.
 91 | 		ck.config = ck.sm.Query(-1)
 92 | 	}
 93 | 
 94 | 	return ""
 95 | }
 96 | 
 97 | //
 98 | // shared by Put and Append.
 99 | // You will have to modify this function.
100 | //
101 | func (ck *Clerk) PutAppend(key string, value string, op string) {
102 | 	args := PutAppendArgs{}
103 | 	args.Key = key
104 | 	args.Value = value
105 | 	args.Op = op
106 | 
107 | 
108 | 	for {
109 | 		shard := key2shard(key)
110 | 		gid := ck.config.Shards[shard]
111 | 		if servers, ok := ck.config.Groups[gid]; ok {
112 | 			for si := 0; si < len(servers); si++ {
113 | 				srv := ck.make_end(servers[si])
114 | 				var reply PutAppendReply
115 | 				ok := srv.Call("ShardKV.PutAppend", &args, &reply)
116 | 				if ok && reply.WrongLeader == false && reply.Err == OK {
117 | 					return
118 | 				}
119 | 				if ok && reply.Err == ErrWrongGroup {
120 | 					break
121 | 				}
122 | 			}
123 | 		}
124 | 		time.Sleep(100 * time.Millisecond)
125 | 		// ask master for the latest configuration.
126 | 		ck.config = ck.sm.Query(-1)
127 | 	}
128 | }
129 | 
130 | func (ck *Clerk) Put(key string, value string) {
131 | 	ck.PutAppend(key, value, "Put")
132 | }
133 | func (ck *Clerk) Append(key string, value string) {
134 | 	ck.PutAppend(key, value, "Append")
135 | }
136 | 


--------------------------------------------------------------------------------
/6.824/src/shardkv/common.go:
--------------------------------------------------------------------------------
 1 | package shardkv
 2 | 
 3 | //
 4 | // Sharded key/value server.
 5 | // Lots of replica groups, each running op-at-a-time paxos.
 6 | // Shardmaster decides which group serves each shard.
 7 | // Shardmaster may change shard assignment from time to time.
 8 | //
 9 | // You will have to modify these definitions.
10 | //
11 | 
12 | const (
13 | 	OK            = "OK"
14 | 	ErrNoKey      = "ErrNoKey"
15 | 	ErrWrongGroup = "ErrWrongGroup"
16 | )
17 | 
18 | type Err string
19 | 
20 | // Put or Append
21 | type PutAppendArgs struct {
22 | 	// You'll have to add definitions here.
23 | 	Key   string
24 | 	Value string
25 | 	Op    string // "Put" or "Append"
26 | 	// You'll have to add definitions here.
27 | 	// Field names must start with capital letters,
28 | 	// otherwise RPC will break.
29 | }
30 | 
31 | type PutAppendReply struct {
32 | 	WrongLeader bool
33 | 	Err         Err
34 | }
35 | 
36 | type GetArgs struct {
37 | 	Key string
38 | 	// You'll have to add definitions here.
39 | }
40 | 
41 | type GetReply struct {
42 | 	WrongLeader bool
43 | 	Err         Err
44 | 	Value       string
45 | }
46 | 


--------------------------------------------------------------------------------
/6.824/src/shardkv/server.go:
--------------------------------------------------------------------------------
  1 | package shardkv
  2 | 
  3 | 
  4 | // import "shardmaster"
  5 | import "labrpc"
  6 | import "raft"
  7 | import "sync"
  8 | import "encoding/gob"
  9 | 
 10 | 
 11 | 
 12 | type Op struct {
 13 | 	// Your definitions here.
 14 | 	// Field names must start with capital letters,
 15 | 	// otherwise RPC will break.
 16 | }
 17 | 
 18 | type ShardKV struct {
 19 | 	mu           sync.Mutex
 20 | 	me           int
 21 | 	rf           *raft.Raft
 22 | 	applyCh      chan raft.ApplyMsg
 23 | 	make_end     func(string) *labrpc.ClientEnd
 24 | 	gid          int
 25 | 	masters      []*labrpc.ClientEnd
 26 | 	maxraftstate int // snapshot if log grows this big
 27 | 
 28 | 	// Your definitions here.
 29 | }
 30 | 
 31 | 
 32 | func (kv *ShardKV) Get(args *GetArgs, reply *GetReply) {
 33 | 	// Your code here.
 34 | }
 35 | 
 36 | func (kv *ShardKV) PutAppend(args *PutAppendArgs, reply *PutAppendReply) {
 37 | 	// Your code here.
 38 | }
 39 | 
 40 | //
 41 | // the tester calls Kill() when a ShardKV instance won't
 42 | // be needed again. you are not required to do anything
 43 | // in Kill(), but it might be convenient to (for example)
 44 | // turn off debug output from this instance.
 45 | //
 46 | func (kv *ShardKV) Kill() {
 47 | 	kv.rf.Kill()
 48 | 	// Your code here, if desired.
 49 | }
 50 | 
 51 | 
 52 | //
 53 | // servers[] contains the ports of the servers in this group.
 54 | //
 55 | // me is the index of the current server in servers[].
 56 | //
 57 | // the k/v server should store snapshots with
 58 | // persister.SaveSnapshot(), and Raft should save its state (including
 59 | // log) with persister.SaveRaftState().
 60 | //
 61 | // the k/v server should snapshot when Raft's saved state exceeds
 62 | // maxraftstate bytes, in order to allow Raft to garbage-collect its
 63 | // log. if maxraftstate is -1, you don't need to snapshot.
 64 | //
 65 | // gid is this group's GID, for interacting with the shardmaster.
 66 | //
 67 | // pass masters[] to shardmaster.MakeClerk() so you can send
 68 | // RPCs to the shardmaster.
 69 | //
 70 | // make_end(servername) turns a server name from a
 71 | // Config.Groups[gid][i] into a labrpc.ClientEnd on which you can
 72 | // send RPCs. You'll need this to send RPCs to other groups.
 73 | //
 74 | // look at client.go for examples of how to use masters[]
 75 | // and make_end() to send RPCs to the group owning a specific shard.
 76 | //
 77 | // StartServer() must return quickly, so it should start goroutines
 78 | // for any long-running work.
 79 | //
 80 | func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int, gid int, masters []*labrpc.ClientEnd, make_end func(string) *labrpc.ClientEnd) *ShardKV {
 81 | 	// call gob.Register on structures you want
 82 | 	// Go's RPC library to marshall/unmarshall.
 83 | 	gob.Register(Op{})
 84 | 
 85 | 	kv := new(ShardKV)
 86 | 	kv.me = me
 87 | 	kv.maxraftstate = maxraftstate
 88 | 	kv.make_end = make_end
 89 | 	kv.gid = gid
 90 | 	kv.masters = masters
 91 | 
 92 | 	// Your initialization code here.
 93 | 
 94 | 	// Use something like this to talk to the shardmaster:
 95 | 	// kv.mck = shardmaster.MakeClerk(kv.masters)
 96 | 
 97 | 	kv.applyCh = make(chan raft.ApplyMsg)
 98 | 	kv.rf = raft.Make(servers, me, persister, kv.applyCh)
 99 | 
100 | 
101 | 	return kv
102 | }
103 | 


--------------------------------------------------------------------------------
/6.824/src/shardmaster/client.go:
--------------------------------------------------------------------------------
  1 | package shardmaster
  2 | 
  3 | //
  4 | // Shardmaster clerk.
  5 | //
  6 | 
  7 | import "labrpc"
  8 | import "time"
  9 | import "crypto/rand"
 10 | import "math/big"
 11 | 
 12 | type Clerk struct {
 13 | 	servers []*labrpc.ClientEnd
 14 | 	// Your data here.
 15 | }
 16 | 
 17 | func nrand() int64 {
 18 | 	max := big.NewInt(int64(1) << 62)
 19 | 	bigx, _ := rand.Int(rand.Reader, max)
 20 | 	x := bigx.Int64()
 21 | 	return x
 22 | }
 23 | 
 24 | func MakeClerk(servers []*labrpc.ClientEnd) *Clerk {
 25 | 	ck := new(Clerk)
 26 | 	ck.servers = servers
 27 | 	// Your code here.
 28 | 	return ck
 29 | }
 30 | 
 31 | func (ck *Clerk) Query(num int) Config {
 32 | 	args := &QueryArgs{}
 33 | 	// Your code here.
 34 | 	args.Num = num
 35 | 	for {
 36 | 		// try each known server.
 37 | 		for _, srv := range ck.servers {
 38 | 			var reply QueryReply
 39 | 			ok := srv.Call("ShardMaster.Query", args, &reply)
 40 | 			if ok && reply.WrongLeader == false {
 41 | 				return reply.Config
 42 | 			}
 43 | 		}
 44 | 		time.Sleep(100 * time.Millisecond)
 45 | 	}
 46 | }
 47 | 
 48 | func (ck *Clerk) Join(servers map[int][]string) {
 49 | 	args := &JoinArgs{}
 50 | 	// Your code here.
 51 | 	args.Servers = servers
 52 | 
 53 | 	for {
 54 | 		// try each known server.
 55 | 		for _, srv := range ck.servers {
 56 | 			var reply JoinReply
 57 | 			ok := srv.Call("ShardMaster.Join", args, &reply)
 58 | 			if ok && reply.WrongLeader == false {
 59 | 				return
 60 | 			}
 61 | 		}
 62 | 		time.Sleep(100 * time.Millisecond)
 63 | 	}
 64 | }
 65 | 
 66 | func (ck *Clerk) Leave(gids []int) {
 67 | 	args := &LeaveArgs{}
 68 | 	// Your code here.
 69 | 	args.GIDs = gids
 70 | 
 71 | 	for {
 72 | 		// try each known server.
 73 | 		for _, srv := range ck.servers {
 74 | 			var reply LeaveReply
 75 | 			ok := srv.Call("ShardMaster.Leave", args, &reply)
 76 | 			if ok && reply.WrongLeader == false {
 77 | 				return
 78 | 			}
 79 | 		}
 80 | 		time.Sleep(100 * time.Millisecond)
 81 | 	}
 82 | }
 83 | 
 84 | func (ck *Clerk) Move(shard int, gid int) {
 85 | 	args := &MoveArgs{}
 86 | 	// Your code here.
 87 | 	args.Shard = shard
 88 | 	args.GID = gid
 89 | 
 90 | 	for {
 91 | 		// try each known server.
 92 | 		for _, srv := range ck.servers {
 93 | 			var reply MoveReply
 94 | 			ok := srv.Call("ShardMaster.Move", args, &reply)
 95 | 			if ok && reply.WrongLeader == false {
 96 | 				return
 97 | 			}
 98 | 		}
 99 | 		time.Sleep(100 * time.Millisecond)
100 | 	}
101 | }
102 | 


--------------------------------------------------------------------------------
/6.824/src/shardmaster/common.go:
--------------------------------------------------------------------------------
 1 | package shardmaster
 2 | 
 3 | //
 4 | // Master shard server: assigns shards to replication groups.
 5 | //
 6 | // RPC interface:
 7 | // Join(servers) -- add a set of groups (gid -> server-list mapping).
 8 | // Leave(gids) -- delete a set of groups.
 9 | // Move(shard, gid) -- hand off one shard from current owner to gid.
10 | // Query(num) -> fetch Config # num, or latest config if num==-1.
11 | //
12 | // A Config (configuration) describes a set of replica groups, and the
13 | // replica group responsible for each shard. Configs are numbered. Config
14 | // #0 is the initial configuration, with no groups and all shards
15 | // assigned to group 0 (the invalid group).
16 | //
17 | // A GID is a replica group ID. GIDs must be uniqe and > 0.
18 | // Once a GID joins, and leaves, it should never join again.
19 | //
20 | // You will need to add fields to the RPC arguments.
21 | //
22 | 
23 | // The number of shards.
24 | const NShards = 10
25 | 
26 | // A configuration -- an assignment of shards to groups.
27 | // Please don't change this.
28 | type Config struct {
29 | 	Num    int              // config number
30 | 	Shards [NShards]int     // shard -> gid
31 | 	Groups map[int][]string // gid -> servers[]
32 | }
33 | 
34 | const (
35 | 	OK = "OK"
36 | )
37 | 
38 | type Err string
39 | 
40 | type JoinArgs struct {
41 | 	Servers map[int][]string // new GID -> servers mappings
42 | }
43 | 
44 | type JoinReply struct {
45 | 	WrongLeader bool
46 | 	Err         Err
47 | }
48 | 
49 | type LeaveArgs struct {
50 | 	GIDs []int
51 | }
52 | 
53 | type LeaveReply struct {
54 | 	WrongLeader bool
55 | 	Err         Err
56 | }
57 | 
58 | type MoveArgs struct {
59 | 	Shard int
60 | 	GID   int
61 | }
62 | 
63 | type MoveReply struct {
64 | 	WrongLeader bool
65 | 	Err         Err
66 | }
67 | 
68 | type QueryArgs struct {
69 | 	Num int // desired config number
70 | }
71 | 
72 | type QueryReply struct {
73 | 	WrongLeader bool
74 | 	Err         Err
75 | 	Config      Config
76 | }
77 | 


--------------------------------------------------------------------------------
/6.824/src/shardmaster/config.go:
--------------------------------------------------------------------------------
  1 | package shardmaster
  2 | 
  3 | import "labrpc"
  4 | import "raft"
  5 | import "testing"
  6 | import "os"
  7 | 
  8 | // import "log"
  9 | import crand "crypto/rand"
 10 | import "math/rand"
 11 | import "encoding/base64"
 12 | import "sync"
 13 | import "runtime"
 14 | 
 15 | func randstring(n int) string {
 16 | 	b := make([]byte, 2*n)
 17 | 	crand.Read(b)
 18 | 	s := base64.URLEncoding.EncodeToString(b)
 19 | 	return s[0:n]
 20 | }
 21 | 
 22 | // Randomize server handles
 23 | func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd {
 24 | 	sa := make([]*labrpc.ClientEnd, len(kvh))
 25 | 	copy(sa, kvh)
 26 | 	for i := range sa {
 27 | 		j := rand.Intn(i + 1)
 28 | 		sa[i], sa[j] = sa[j], sa[i]
 29 | 	}
 30 | 	return sa
 31 | }
 32 | 
 33 | type config struct {
 34 | 	mu           sync.Mutex
 35 | 	t            *testing.T
 36 | 	net          *labrpc.Network
 37 | 	n            int
 38 | 	servers      []*ShardMaster
 39 | 	saved        []*raft.Persister
 40 | 	endnames     [][]string // names of each server's sending ClientEnds
 41 | 	clerks       map[*Clerk][]string
 42 | 	nextClientId int
 43 | }
 44 | 
 45 | func (cfg *config) cleanup() {
 46 | 	cfg.mu.Lock()
 47 | 	defer cfg.mu.Unlock()
 48 | 	for i := 0; i < len(cfg.servers); i++ {
 49 | 		if cfg.servers[i] != nil {
 50 | 			cfg.servers[i].Kill()
 51 | 		}
 52 | 	}
 53 | }
 54 | 
 55 | // Maximum log size across all servers
 56 | func (cfg *config) LogSize() int {
 57 | 	logsize := 0
 58 | 	for i := 0; i < cfg.n; i++ {
 59 | 		n := cfg.saved[i].RaftStateSize()
 60 | 		if n > logsize {
 61 | 			logsize = n
 62 | 		}
 63 | 	}
 64 | 	return logsize
 65 | }
 66 | 
 67 | // attach server i to servers listed in to
 68 | // caller must hold cfg.mu
 69 | func (cfg *config) connectUnlocked(i int, to []int) {
 70 | 	// log.Printf("connect peer %d to %v\n", i, to)
 71 | 
 72 | 	// outgoing socket files
 73 | 	for j := 0; j < len(to); j++ {
 74 | 		endname := cfg.endnames[i][to[j]]
 75 | 		cfg.net.Enable(endname, true)
 76 | 	}
 77 | 
 78 | 	// incoming socket files
 79 | 	for j := 0; j < len(to); j++ {
 80 | 		endname := cfg.endnames[to[j]][i]
 81 | 		cfg.net.Enable(endname, true)
 82 | 	}
 83 | }
 84 | 
 85 | func (cfg *config) connect(i int, to []int) {
 86 | 	cfg.mu.Lock()
 87 | 	defer cfg.mu.Unlock()
 88 | 	cfg.connectUnlocked(i, to)
 89 | }
 90 | 
 91 | // detach server i from the servers listed in from
 92 | // caller must hold cfg.mu
 93 | func (cfg *config) disconnectUnlocked(i int, from []int) {
 94 | 	// log.Printf("disconnect peer %d from %v\n", i, from)
 95 | 
 96 | 	// outgoing socket files
 97 | 	for j := 0; j < len(from); j++ {
 98 | 		if cfg.endnames[i] != nil {
 99 | 			endname := cfg.endnames[i][from[j]]
100 | 			cfg.net.Enable(endname, false)
101 | 		}
102 | 	}
103 | 
104 | 	// incoming socket files
105 | 	for j := 0; j < len(from); j++ {
106 | 		if cfg.endnames[j] != nil {
107 | 			endname := cfg.endnames[from[j]][i]
108 | 			cfg.net.Enable(endname, false)
109 | 		}
110 | 	}
111 | }
112 | 
113 | func (cfg *config) disconnect(i int, from []int) {
114 | 	cfg.mu.Lock()
115 | 	defer cfg.mu.Unlock()
116 | 	cfg.disconnectUnlocked(i, from)
117 | }
118 | 
119 | func (cfg *config) All() []int {
120 | 	all := make([]int, cfg.n)
121 | 	for i := 0; i < cfg.n; i++ {
122 | 		all[i] = i
123 | 	}
124 | 	return all
125 | }
126 | 
127 | func (cfg *config) ConnectAll() {
128 | 	cfg.mu.Lock()
129 | 	defer cfg.mu.Unlock()
130 | 	for i := 0; i < cfg.n; i++ {
131 | 		cfg.connectUnlocked(i, cfg.All())
132 | 	}
133 | }
134 | 
135 | // Sets up 2 partitions with connectivity between servers in each  partition.
136 | func (cfg *config) partition(p1 []int, p2 []int) {
137 | 	cfg.mu.Lock()
138 | 	defer cfg.mu.Unlock()
139 | 	// log.Printf("partition servers into: %v %v\n", p1, p2)
140 | 	for i := 0; i < len(p1); i++ {
141 | 		cfg.disconnectUnlocked(p1[i], p2)
142 | 		cfg.connectUnlocked(p1[i], p1)
143 | 	}
144 | 	for i := 0; i < len(p2); i++ {
145 | 		cfg.disconnectUnlocked(p2[i], p1)
146 | 		cfg.connectUnlocked(p2[i], p2)
147 | 	}
148 | }
149 | 
150 | // Create a clerk with clerk specific server names.
151 | // Give it connections to all of the servers, but for
152 | // now enable only connections to servers in to[].
153 | func (cfg *config) makeClient(to []int) *Clerk {
154 | 	cfg.mu.Lock()
155 | 	defer cfg.mu.Unlock()
156 | 
157 | 	// a fresh set of ClientEnds.
158 | 	ends := make([]*labrpc.ClientEnd, cfg.n)
159 | 	endnames := make([]string, cfg.n)
160 | 	for j := 0; j < cfg.n; j++ {
161 | 		endnames[j] = randstring(20)
162 | 		ends[j] = cfg.net.MakeEnd(endnames[j])
163 | 		cfg.net.Connect(endnames[j], j)
164 | 	}
165 | 
166 | 	ck := MakeClerk(random_handles(ends))
167 | 	cfg.clerks[ck] = endnames
168 | 	cfg.nextClientId++
169 | 	cfg.ConnectClientUnlocked(ck, to)
170 | 	return ck
171 | }
172 | 
173 | func (cfg *config) deleteClient(ck *Clerk) {
174 | 	cfg.mu.Lock()
175 | 	defer cfg.mu.Unlock()
176 | 
177 | 	v := cfg.clerks[ck]
178 | 	for i := 0; i < len(v); i++ {
179 | 		os.Remove(v[i])
180 | 	}
181 | 	delete(cfg.clerks, ck)
182 | }
183 | 
184 | // caller should hold cfg.mu
185 | func (cfg *config) ConnectClientUnlocked(ck *Clerk, to []int) {
186 | 	// log.Printf("ConnectClient %v to %v\n", ck, to)
187 | 	endnames := cfg.clerks[ck]
188 | 	for j := 0; j < len(to); j++ {
189 | 		s := endnames[to[j]]
190 | 		cfg.net.Enable(s, true)
191 | 	}
192 | }
193 | 
194 | func (cfg *config) ConnectClient(ck *Clerk, to []int) {
195 | 	cfg.mu.Lock()
196 | 	defer cfg.mu.Unlock()
197 | 	cfg.ConnectClientUnlocked(ck, to)
198 | }
199 | 
200 | // caller should hold cfg.mu
201 | func (cfg *config) DisconnectClientUnlocked(ck *Clerk, from []int) {
202 | 	// log.Printf("DisconnectClient %v from %v\n", ck, from)
203 | 	endnames := cfg.clerks[ck]
204 | 	for j := 0; j < len(from); j++ {
205 | 		s := endnames[from[j]]
206 | 		cfg.net.Enable(s, false)
207 | 	}
208 | }
209 | 
210 | func (cfg *config) DisconnectClient(ck *Clerk, from []int) {
211 | 	cfg.mu.Lock()
212 | 	defer cfg.mu.Unlock()
213 | 	cfg.DisconnectClientUnlocked(ck, from)
214 | }
215 | 
216 | // Shutdown a server by isolating it
217 | func (cfg *config) ShutdownServer(i int) {
218 | 	cfg.mu.Lock()
219 | 	defer cfg.mu.Unlock()
220 | 
221 | 	cfg.disconnectUnlocked(i, cfg.All())
222 | 
223 | 	// disable client connections to the server.
224 | 	// it's important to do this before creating
225 | 	// the new Persister in saved[i], to avoid
226 | 	// the possibility of the server returning a
227 | 	// positive reply to an Append but persisting
228 | 	// the result in the superseded Persister.
229 | 	cfg.net.DeleteServer(i)
230 | 
231 | 	// a fresh persister, in case old instance
232 | 	// continues to update the Persister.
233 | 	// but copy old persister's content so that we always
234 | 	// pass Make() the last persisted state.
235 | 	if cfg.saved[i] != nil {
236 | 		cfg.saved[i] = cfg.saved[i].Copy()
237 | 	}
238 | 
239 | 	kv := cfg.servers[i]
240 | 	if kv != nil {
241 | 		cfg.mu.Unlock()
242 | 		kv.Kill()
243 | 		cfg.mu.Lock()
244 | 		cfg.servers[i] = nil
245 | 	}
246 | }
247 | 
248 | // If restart servers, first call ShutdownServer
249 | func (cfg *config) StartServer(i int) {
250 | 	cfg.mu.Lock()
251 | 
252 | 	// a fresh set of outgoing ClientEnd names.
253 | 	cfg.endnames[i] = make([]string, cfg.n)
254 | 	for j := 0; j < cfg.n; j++ {
255 | 		cfg.endnames[i][j] = randstring(20)
256 | 	}
257 | 
258 | 	// a fresh set of ClientEnds.
259 | 	ends := make([]*labrpc.ClientEnd, cfg.n)
260 | 	for j := 0; j < cfg.n; j++ {
261 | 		ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j])
262 | 		cfg.net.Connect(cfg.endnames[i][j], j)
263 | 	}
264 | 
265 | 	// a fresh persister, so old instance doesn't overwrite
266 | 	// new instance's persisted state.
267 | 	// give the fresh persister a copy of the old persister's
268 | 	// state, so that the spec is that we pass StartKVServer()
269 | 	// the last persisted state.
270 | 	if cfg.saved[i] != nil {
271 | 		cfg.saved[i] = cfg.saved[i].Copy()
272 | 	} else {
273 | 		cfg.saved[i] = raft.MakePersister()
274 | 	}
275 | 
276 | 	cfg.mu.Unlock()
277 | 
278 | 	cfg.servers[i] = StartServer(ends, i, cfg.saved[i])
279 | 
280 | 	kvsvc := labrpc.MakeService(cfg.servers[i])
281 | 	rfsvc := labrpc.MakeService(cfg.servers[i].rf)
282 | 	srv := labrpc.MakeServer()
283 | 	srv.AddService(kvsvc)
284 | 	srv.AddService(rfsvc)
285 | 	cfg.net.AddServer(i, srv)
286 | }
287 | 
288 | func (cfg *config) Leader() (bool, int) {
289 | 	cfg.mu.Lock()
290 | 	defer cfg.mu.Unlock()
291 | 
292 | 	for i := 0; i < cfg.n; i++ {
293 | 		_, is_leader := cfg.servers[i].rf.GetState()
294 | 		if is_leader {
295 | 			return true, i
296 | 		}
297 | 	}
298 | 	return false, 0
299 | }
300 | 
301 | // Partition servers into 2 groups and put current leader in minority
302 | func (cfg *config) make_partition() ([]int, []int) {
303 | 	_, l := cfg.Leader()
304 | 	p1 := make([]int, cfg.n/2+1)
305 | 	p2 := make([]int, cfg.n/2)
306 | 	j := 0
307 | 	for i := 0; i < cfg.n; i++ {
308 | 		if i != l {
309 | 			if j < len(p1) {
310 | 				p1[j] = i
311 | 			} else {
312 | 				p2[j-len(p1)] = i
313 | 			}
314 | 			j++
315 | 		}
316 | 	}
317 | 	p2[len(p2)-1] = l
318 | 	return p1, p2
319 | }
320 | 
321 | func make_config(t *testing.T, n int, unreliable bool) *config {
322 | 	runtime.GOMAXPROCS(4)
323 | 	cfg := &config{}
324 | 	cfg.t = t
325 | 	cfg.net = labrpc.MakeNetwork()
326 | 	cfg.n = n
327 | 	cfg.servers = make([]*ShardMaster, cfg.n)
328 | 	cfg.saved = make([]*raft.Persister, cfg.n)
329 | 	cfg.endnames = make([][]string, cfg.n)
330 | 	cfg.clerks = make(map[*Clerk][]string)
331 | 	cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid
332 | 
333 | 	// create a full set of KV servers.
334 | 	for i := 0; i < cfg.n; i++ {
335 | 		cfg.StartServer(i)
336 | 	}
337 | 
338 | 	cfg.ConnectAll()
339 | 
340 | 	cfg.net.Reliable(!unreliable)
341 | 
342 | 	return cfg
343 | }
344 | 


--------------------------------------------------------------------------------
/6.824/src/shardmaster/server.go:
--------------------------------------------------------------------------------
 1 | package shardmaster
 2 | 
 3 | 
 4 | import "raft"
 5 | import "labrpc"
 6 | import "sync"
 7 | import "encoding/gob"
 8 | 
 9 | 
10 | type ShardMaster struct {
11 | 	mu      sync.Mutex
12 | 	me      int
13 | 	rf      *raft.Raft
14 | 	applyCh chan raft.ApplyMsg
15 | 
16 | 	// Your data here.
17 | 
18 | 	configs []Config // indexed by config num
19 | }
20 | 
21 | 
22 | type Op struct {
23 | 	// Your data here.
24 | }
25 | 
26 | 
27 | func (sm *ShardMaster) Join(args *JoinArgs, reply *JoinReply) {
28 | 	// Your code here.
29 | }
30 | 
31 | func (sm *ShardMaster) Leave(args *LeaveArgs, reply *LeaveReply) {
32 | 	// Your code here.
33 | }
34 | 
35 | func (sm *ShardMaster) Move(args *MoveArgs, reply *MoveReply) {
36 | 	// Your code here.
37 | }
38 | 
39 | func (sm *ShardMaster) Query(args *QueryArgs, reply *QueryReply) {
40 | 	// Your code here.
41 | }
42 | 
43 | 
44 | //
45 | // the tester calls Kill() when a ShardMaster instance won't
46 | // be needed again. you are not required to do anything
47 | // in Kill(), but it might be convenient to (for example)
48 | // turn off debug output from this instance.
49 | //
50 | func (sm *ShardMaster) Kill() {
51 | 	sm.rf.Kill()
52 | 	// Your code here, if desired.
53 | }
54 | 
55 | // needed by shardkv tester
56 | func (sm *ShardMaster) Raft() *raft.Raft {
57 | 	return sm.rf
58 | }
59 | 
60 | //
61 | // servers[] contains the ports of the set of
62 | // servers that will cooperate via Paxos to
63 | // form the fault-tolerant shardmaster service.
64 | // me is the index of the current server in servers[].
65 | //
66 | func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister) *ShardMaster {
67 | 	sm := new(ShardMaster)
68 | 	sm.me = me
69 | 
70 | 	sm.configs = make([]Config, 1)
71 | 	sm.configs[0].Groups = map[int][]string{}
72 | 
73 | 	gob.Register(Op{})
74 | 	sm.applyCh = make(chan raft.ApplyMsg)
75 | 	sm.rf = raft.Make(servers, me, persister, sm.applyCh)
76 | 
77 | 	// Your code here.
78 | 
79 | 	return sm
80 | }
81 | 


--------------------------------------------------------------------------------
/6.824/src/viewservice/client.go:
--------------------------------------------------------------------------------
 1 | package viewservice
 2 | 
 3 | import "net/rpc"
 4 | import "fmt"
 5 | 
 6 | //
 7 | // the viewservice Clerk lives in the client
 8 | // and maintains a little state.
 9 | //
10 | type Clerk struct {
11 | 	me     string // client's name (host:port)
12 | 	server string // viewservice's host:port
13 | }
14 | 
15 | func MakeClerk(me string, server string) *Clerk {
16 | 	ck := new(Clerk)
17 | 	ck.me = me
18 | 	ck.server = server
19 | 	return ck
20 | }
21 | 
22 | //
23 | // call() sends an RPC to the rpcname handler on server srv
24 | // with arguments args, waits for the reply, and leaves the
25 | // reply in reply. the reply argument should be a pointer
26 | // to a reply structure.
27 | //
28 | // the return value is true if the server responded, and false
29 | // if call() was not able to contact the server. in particular,
30 | // the reply's contents are only valid if call() returned true.
31 | //
32 | // you should assume that call() will return an
33 | // error after a while if the server is dead.
34 | // don't provide your own time-out mechanism.
35 | //
36 | // please use call() to send all RPCs, in client.go and server.go.
37 | // please don't change this function.
38 | //
39 | func call(srv string, rpcname string,
40 | 	args interface{}, reply interface{}) bool {
41 | 	c, errx := rpc.Dial("unix", srv)
42 | 	if errx != nil {
43 | 		return false
44 | 	}
45 | 	defer c.Close()
46 | 
47 | 	err := c.Call(rpcname, args, reply)
48 | 	if err == nil {
49 | 		return true
50 | 	}
51 | 
52 | 	fmt.Println(err)
53 | 	return false
54 | }
55 | 
56 | func (ck *Clerk) Ping(viewnum uint) (View, error) {
57 | 	// prepare the arguments.
58 | 	args := &PingArgs{}
59 | 	args.Me = ck.me
60 | 	args.Viewnum = viewnum
61 | 	var reply PingReply
62 | 
63 | 	// send an RPC request, wait for the reply.
64 | 	ok := call(ck.server, "ViewServer.Ping", args, &reply)
65 | 	if ok == false {
66 | 		return View{}, fmt.Errorf("Ping(%v) failed", viewnum)
67 | 	}
68 | 
69 | 	return reply.View, nil
70 | }
71 | 
72 | func (ck *Clerk) Get() (View, bool) {
73 | 	args := &GetArgs{}
74 | 	var reply GetReply
75 | 	ok := call(ck.server, "ViewServer.Get", args, &reply)
76 | 	if ok == false {
77 | 		return View{}, false
78 | 	}
79 | 	return reply.View, true
80 | }
81 | 
82 | func (ck *Clerk) Primary() string {
83 | 	v, ok := ck.Get()
84 | 	if ok {
85 | 		return v.Primary
86 | 	}
87 | 	return ""
88 | }
89 | 


--------------------------------------------------------------------------------
/6.824/src/viewservice/common.go:
--------------------------------------------------------------------------------
 1 | package viewservice
 2 | 
 3 | import "time"
 4 | 
 5 | //
 6 | // This is a non-replicated view service for a simple
 7 | // primary/backup system.
 8 | //
 9 | // The view service goes through a sequence of numbered
10 | // views, each with a primary and (if possible) a backup.
11 | // A view consists of a view number and the host:port of
12 | // the view's primary and backup p/b servers.
13 | //
14 | // The primary in a view is always either the primary
15 | // or the backup of the previous view (in order to ensure
16 | // that the p/b service's state is preserved).
17 | //
18 | // Each p/b server should send a Ping RPC once per PingInterval.
19 | // The view server replies with a description of the current
20 | // view. The Pings let the view server know that the p/b
21 | // server is still alive; inform the p/b server of the current
22 | // view; and inform the view server of the most recent view
23 | // that the p/b server knows about.
24 | //
25 | // The view server proceeds to a new view when either it hasn't
26 | // received a ping from the primary or backup for a while, or
27 | // if there was no backup and a new server starts Pinging.
28 | //
29 | // The view server will not proceed to a new view until
30 | // the primary from the current view acknowledges
31 | // that it is operating in the current view. This helps
32 | // ensure that there's at most one p/b primary operating at
33 | // a time.
34 | //
35 | 
36 | type View struct {
37 | 	Viewnum uint
38 | 	Primary string
39 | 	Backup  string
40 | }
41 | 
42 | // clients should send a Ping RPC this often,
43 | // to tell the viewservice that the client is alive.
44 | const PingInterval = time.Millisecond * 100
45 | 
46 | // the viewserver will declare a client dead if it misses
47 | // this many Ping RPCs in a row.
48 | const DeadPings = 5
49 | 
50 | //
51 | // Ping(): called by a primary/backup server to tell the
52 | // view service it is alive, to indicate whether p/b server
53 | // has seen the latest view, and for p/b server to learn
54 | // the latest view.
55 | //
56 | // If Viewnum is zero, the caller is signalling that it is
57 | // alive and could become backup if needed.
58 | //
59 | 
60 | type PingArgs struct {
61 | 	Me      string // "host:port"
62 | 	Viewnum uint   // caller's notion of current view #
63 | }
64 | 
65 | type PingReply struct {
66 | 	View View
67 | }
68 | 
69 | //
70 | // Get(): fetch the current view, without volunteering
71 | // to be a server. mostly for clients of the p/b service,
72 | // and for testing.
73 | //
74 | 
75 | type GetArgs struct {
76 | }
77 | 
78 | type GetReply struct {
79 | 	View View
80 | }
81 | 


--------------------------------------------------------------------------------
/6.824/src/viewservice/server.go:
--------------------------------------------------------------------------------
  1 | package viewservice
  2 | 
  3 | import "net"
  4 | import "net/rpc"
  5 | import "log"
  6 | import "time"
  7 | import "sync"
  8 | import "fmt"
  9 | import "os"
 10 | import "sync/atomic"
 11 | 
 12 | type ViewServer struct {
 13 | 	mu       sync.Mutex
 14 | 	l        net.Listener
 15 | 	dead     int32 // for testing
 16 | 	rpccount int32 // for testing
 17 | 	me       string
 18 | 
 19 | 
 20 | 	// Your declarations here.
 21 | }
 22 | 
 23 | //
 24 | // server Ping RPC handler.
 25 | //
 26 | func (vs *ViewServer) Ping(args *PingArgs, reply *PingReply) error {
 27 | 
 28 | 	// Your code here.
 29 | 
 30 | 	return nil
 31 | }
 32 | 
 33 | //
 34 | // server Get() RPC handler.
 35 | //
 36 | func (vs *ViewServer) Get(args *GetArgs, reply *GetReply) error {
 37 | 
 38 | 	// Your code here.
 39 | 
 40 | 	return nil
 41 | }
 42 | 
 43 | 
 44 | //
 45 | // tick() is called once per PingInterval; it should notice
 46 | // if servers have died or recovered, and change the view
 47 | // accordingly.
 48 | //
 49 | func (vs *ViewServer) tick() {
 50 | 
 51 | 	// Your code here.
 52 | }
 53 | 
 54 | //
 55 | // tell the server to shut itself down.
 56 | // for testing.
 57 | // please don't change these two functions.
 58 | //
 59 | func (vs *ViewServer) Kill() {
 60 | 	atomic.StoreInt32(&vs.dead, 1)
 61 | 	vs.l.Close()
 62 | }
 63 | 
 64 | //
 65 | // has this server been asked to shut down?
 66 | //
 67 | func (vs *ViewServer) isdead() bool {
 68 | 	return atomic.LoadInt32(&vs.dead) != 0
 69 | }
 70 | 
 71 | // please don't change this function.
 72 | func (vs *ViewServer) GetRPCCount() int32 {
 73 | 	return atomic.LoadInt32(&vs.rpccount)
 74 | }
 75 | 
 76 | func StartServer(me string) *ViewServer {
 77 | 	vs := new(ViewServer)
 78 | 	vs.me = me
 79 | 	// Your vs.* initializations here.
 80 | 
 81 | 	// tell net/rpc about our RPC server and handlers.
 82 | 	rpcs := rpc.NewServer()
 83 | 	rpcs.Register(vs)
 84 | 
 85 | 	// prepare to receive connections from clients.
 86 | 	// change "unix" to "tcp" to use over a network.
 87 | 	os.Remove(vs.me) // only needed for "unix"
 88 | 	l, e := net.Listen("unix", vs.me)
 89 | 	if e != nil {
 90 | 		log.Fatal("listen error: ", e)
 91 | 	}
 92 | 	vs.l = l
 93 | 
 94 | 	// please don't change any of the following code,
 95 | 	// or do anything to subvert it.
 96 | 
 97 | 	// create a thread to accept RPC connections from clients.
 98 | 	go func() {
 99 | 		for vs.isdead() == false {
100 | 			conn, err := vs.l.Accept()
101 | 			if err == nil && vs.isdead() == false {
102 | 				atomic.AddInt32(&vs.rpccount, 1)
103 | 				go rpcs.ServeConn(conn)
104 | 			} else if err == nil {
105 | 				conn.Close()
106 | 			}
107 | 			if err != nil && vs.isdead() == false {
108 | 				fmt.Printf("ViewServer(%v) accept: %v\n", me, err.Error())
109 | 				vs.Kill()
110 | 			}
111 | 		}
112 | 	}()
113 | 
114 | 	// create a thread to call tick() periodically.
115 | 	go func() {
116 | 		for vs.isdead() == false {
117 | 			vs.tick()
118 | 			time.Sleep(PingInterval)
119 | 		}
120 | 	}()
121 | 
122 | 	return vs
123 | }
124 | 


--------------------------------------------------------------------------------
/6.824/src/viewservice/test_test.go:
--------------------------------------------------------------------------------
  1 | package viewservice
  2 | 
  3 | import "testing"
  4 | import "runtime"
  5 | import "time"
  6 | import "fmt"
  7 | import "os"
  8 | import "strconv"
  9 | 
 10 | func check(t *testing.T, ck *Clerk, p string, b string, n uint) {
 11 | 	view, _ := ck.Get()
 12 | 	if view.Primary != p {
 13 | 		t.Fatalf("wanted primary %v, got %v", p, view.Primary)
 14 | 	}
 15 | 	if view.Backup != b {
 16 | 		t.Fatalf("wanted backup %v, got %v", b, view.Backup)
 17 | 	}
 18 | 	if n != 0 && n != view.Viewnum {
 19 | 		t.Fatalf("wanted viewnum %v, got %v", n, view.Viewnum)
 20 | 	}
 21 | 	if ck.Primary() != p {
 22 | 		t.Fatalf("wanted primary %v, got %v", p, ck.Primary())
 23 | 	}
 24 | }
 25 | 
 26 | func port(suffix string) string {
 27 | 	s := "/var/tmp/824-"
 28 | 	s += strconv.Itoa(os.Getuid()) + "/"
 29 | 	os.Mkdir(s, 0777)
 30 | 	s += "viewserver-"
 31 | 	s += strconv.Itoa(os.Getpid()) + "-"
 32 | 	s += suffix
 33 | 	return s
 34 | }
 35 | 
 36 | func Test1(t *testing.T) {
 37 | 	runtime.GOMAXPROCS(4)
 38 | 
 39 | 	vshost := port("v")
 40 | 	vs := StartServer(vshost)
 41 | 
 42 | 	ck1 := MakeClerk(port("1"), vshost)
 43 | 	ck2 := MakeClerk(port("2"), vshost)
 44 | 	ck3 := MakeClerk(port("3"), vshost)
 45 | 
 46 | 	//
 47 | 
 48 | 	if ck1.Primary() != "" {
 49 | 		t.Fatalf("there was a primary too soon")
 50 | 	}
 51 | 
 52 | 	// very first primary
 53 | 	fmt.Printf("Test: First primary ...\n")
 54 | 
 55 | 	for i := 0; i < DeadPings*2; i++ {
 56 | 		view, _ := ck1.Ping(0)
 57 | 		if view.Primary == ck1.me {
 58 | 			break
 59 | 		}
 60 | 		time.Sleep(PingInterval)
 61 | 	}
 62 | 	check(t, ck1, ck1.me, "", 1)
 63 | 	fmt.Printf("  ... Passed\n")
 64 | 
 65 | 	// very first backup
 66 | 	fmt.Printf("Test: First backup ...\n")
 67 | 
 68 | 	{
 69 | 		vx, _ := ck1.Get()
 70 | 		for i := 0; i < DeadPings*2; i++ {
 71 | 			ck1.Ping(1)
 72 | 			view, _ := ck2.Ping(0)
 73 | 			if view.Backup == ck2.me {
 74 | 				break
 75 | 			}
 76 | 			time.Sleep(PingInterval)
 77 | 		}
 78 | 		check(t, ck1, ck1.me, ck2.me, vx.Viewnum+1)
 79 | 	}
 80 | 	fmt.Printf("  ... Passed\n")
 81 | 
 82 | 	// primary dies, backup should take over
 83 | 	fmt.Printf("Test: Backup takes over if primary fails ...\n")
 84 | 
 85 | 	{
 86 | 		ck1.Ping(2)
 87 | 		vx, _ := ck2.Ping(2)
 88 | 		for i := 0; i < DeadPings*2; i++ {
 89 | 			v, _ := ck2.Ping(vx.Viewnum)
 90 | 			if v.Primary == ck2.me && v.Backup == "" {
 91 | 				break
 92 | 			}
 93 | 			time.Sleep(PingInterval)
 94 | 		}
 95 | 		check(t, ck2, ck2.me, "", vx.Viewnum+1)
 96 | 	}
 97 | 	fmt.Printf("  ... Passed\n")
 98 | 
 99 | 	// revive ck1, should become backup
100 | 	fmt.Printf("Test: Restarted server becomes backup ...\n")
101 | 
102 | 	{
103 | 		vx, _ := ck2.Get()
104 | 		ck2.Ping(vx.Viewnum)
105 | 		for i := 0; i < DeadPings*2; i++ {
106 | 			ck1.Ping(0)
107 | 			v, _ := ck2.Ping(vx.Viewnum)
108 | 			if v.Primary == ck2.me && v.Backup == ck1.me {
109 | 				break
110 | 			}
111 | 			time.Sleep(PingInterval)
112 | 		}
113 | 		check(t, ck2, ck2.me, ck1.me, vx.Viewnum+1)
114 | 	}
115 | 	fmt.Printf("  ... Passed\n")
116 | 
117 | 	// start ck3, kill the primary (ck2), the previous backup (ck1)
118 | 	// should become the server, and ck3 the backup.
119 | 	// this should happen in a single view change, without
120 | 	// any period in which there's no backup.
121 | 	fmt.Printf("Test: Idle third server becomes backup if primary fails ...\n")
122 | 
123 | 	{
124 | 		vx, _ := ck2.Get()
125 | 		ck2.Ping(vx.Viewnum)
126 | 		for i := 0; i < DeadPings*2; i++ {
127 | 			ck3.Ping(0)
128 | 			v, _ := ck1.Ping(vx.Viewnum)
129 | 			if v.Primary == ck1.me && v.Backup == ck3.me {
130 | 				break
131 | 			}
132 | 			vx = v
133 | 			time.Sleep(PingInterval)
134 | 		}
135 | 		check(t, ck1, ck1.me, ck3.me, vx.Viewnum+1)
136 | 	}
137 | 	fmt.Printf("  ... Passed\n")
138 | 
139 | 	// kill and immediately restart the primary -- does viewservice
140 | 	// conclude primary is down even though it's pinging?
141 | 	fmt.Printf("Test: Restarted primary treated as dead ...\n")
142 | 
143 | 	{
144 | 		vx, _ := ck1.Get()
145 | 		ck1.Ping(vx.Viewnum)
146 | 		for i := 0; i < DeadPings*2; i++ {
147 | 			ck1.Ping(0)
148 | 			ck3.Ping(vx.Viewnum)
149 | 			v, _ := ck3.Get()
150 | 			if v.Primary != ck1.me {
151 | 				break
152 | 			}
153 | 			time.Sleep(PingInterval)
154 | 		}
155 | 		vy, _ := ck3.Get()
156 | 		if vy.Primary != ck3.me {
157 | 			t.Fatalf("expected primary=%v, got %v\n", ck3.me, vy.Primary)
158 | 		}
159 | 	}
160 | 	fmt.Printf("  ... Passed\n")
161 | 
162 | 	fmt.Printf("Test: Dead backup is removed from view ...\n")
163 | 
164 | 	// set up a view with just 3 as primary,
165 | 	// to prepare for the next test.
166 | 	{
167 | 		for i := 0; i < DeadPings*3; i++ {
168 | 			vx, _ := ck3.Get()
169 | 			ck3.Ping(vx.Viewnum)
170 | 			time.Sleep(PingInterval)
171 | 		}
172 | 		v, _ := ck3.Get()
173 | 		if v.Primary != ck3.me || v.Backup != "" {
174 | 			t.Fatalf("wrong primary or backup")
175 | 		}
176 | 	}
177 | 	fmt.Printf("  ... Passed\n")
178 | 
179 | 	// does viewserver wait for ack of previous view before
180 | 	// starting the next one?
181 | 	fmt.Printf("Test: Viewserver waits for primary to ack view ...\n")
182 | 
183 | 	{
184 | 		// set up p=ck3 b=ck1, but
185 | 		// but do not ack
186 | 		vx, _ := ck1.Get()
187 | 		for i := 0; i < DeadPings*3; i++ {
188 | 			ck1.Ping(0)
189 | 			ck3.Ping(vx.Viewnum)
190 | 			v, _ := ck1.Get()
191 | 			if v.Viewnum > vx.Viewnum {
192 | 				break
193 | 			}
194 | 			time.Sleep(PingInterval)
195 | 		}
196 | 		check(t, ck1, ck3.me, ck1.me, vx.Viewnum+1)
197 | 		vy, _ := ck1.Get()
198 | 		// ck3 is the primary, but it never acked.
199 | 		// let ck3 die. check that ck1 is not promoted.
200 | 		for i := 0; i < DeadPings*3; i++ {
201 | 			v, _ := ck1.Ping(vy.Viewnum)
202 | 			if v.Viewnum > vy.Viewnum {
203 | 				break
204 | 			}
205 | 			time.Sleep(PingInterval)
206 | 		}
207 | 		check(t, ck2, ck3.me, ck1.me, vy.Viewnum)
208 | 	}
209 | 	fmt.Printf("  ... Passed\n")
210 | 
211 | 	// if old servers die, check that a new (uninitialized) server
212 | 	// cannot take over.
213 | 	fmt.Printf("Test: Uninitialized server can't become primary ...\n")
214 | 
215 | 	{
216 | 		for i := 0; i < DeadPings*2; i++ {
217 | 			v, _ := ck1.Get()
218 | 			ck1.Ping(v.Viewnum)
219 | 			ck2.Ping(0)
220 | 			ck3.Ping(v.Viewnum)
221 | 			time.Sleep(PingInterval)
222 | 		}
223 | 		for i := 0; i < DeadPings*2; i++ {
224 | 			ck2.Ping(0)
225 | 			time.Sleep(PingInterval)
226 | 		}
227 | 		vz, _ := ck2.Get()
228 | 		if vz.Primary == ck2.me {
229 | 			t.Fatalf("uninitialized backup promoted to primary")
230 | 		}
231 | 	}
232 | 	fmt.Printf("  ... Passed\n")
233 | 
234 | 	vs.Kill()
235 | }
236 | 


--------------------------------------------------------------------------------
/Lec01_Introduction/l01.md:
--------------------------------------------------------------------------------
  1 | ### 6.824 2016 第1课:介绍
  2 | 
  3 | #### 6.824: 分布式系统工程
  4 | 
  5 | ##### 什么是分布式系统 ?
  6 |   + 多台机器共同协作
  7 |   + 如DNS域名解析, P2P文件分享, 大的数据库(big databases), MapReduce, &c
  8 |   + 很多关键基础设施是分布式的!
  9 | 
 10 | 
 11 | ##### 为什么需要分布式 ?
 12 |   + 为了连接物理上相互分离的实体
 13 |   + 为了通过隔离(isolation)实现安全性
 14 |   + 为了通过复制(replication)实现容错
 15 |   + 为了使CPUs/mem/disk/net可以实现扩容
 16 | 
 17 | ##### 然而
 18 |   + 复杂性: 多个并发的部分
 19 |   + 必须处理部分失败的情况
 20 |   + 难以实现的性能潜力
 21 | 
 22 | ##### 为什么选这门课?
 23 |   + 兴趣 -- 难题, 非显而易见的解决方案(non-obvious solutions)
 24 |   + 被实际系统使用 -- 被大网站的崛起而驱动大网站的崛起
 25 |   + 活跃的研究领域 -- 快速进步的领域 和 有大量问题没有解决的领域
 26 |   + 动手做 -- 你讲通过实现建立多个系统
 27 | 
 28 | #### 课程结构
 29 | 
 30 | + http://pdos.csail.mit.edu/6.824
 31 | 
 32 | ##### Course staff(课程工作人员):
 33 | + Robert Morris, lecturer
 34 | + Frans Kaashoek, lecturer
 35 | + Steven Allen, TA
 36 | + Stephanie Wang, TA
 37 | + Jon Gjengset, TA
 38 | + Daniel Ziegler, TA
 39 | 
 40 | ##### 课程组成:
 41 | + 课程
 42 | + 阅读
 43 | + 两个考试
 44 | + 实验
 45 | + 项目
 46 | 
 47 | #### 课程涉及大的想法，阅读和实验
 48 | 
 49 | ##### 阅读: 研究论文作为案例研究
 50 |   
 51 |   + 请课前阅读研究论文，否则你会觉得上课内容很无聊，而且你无法不费力地学会，
 52 |   每篇论文都有为你准备的小问题，请务必给我们发送你阅读论文的时候存在的疑问，
 53 |   晚上十点前给我们发送问题和答案。
 54 | 
 55 | + 实验目标
 56 | 	+ 深入理解一些重要的技术
 57 | 	+ 掌握分布式编程的经验
 58 | 	+ 第一个实验的时间安排是从周五起的一周时间
 59 | 
 60 | + 实验安排
 61 |     + Lab 1: MapReduce
 62 |     + Lab 2: replication for fault-tolerance
 63 |     + Lab 3: fault-tolerant key/value store
 64 |     + Lab 4: sharded key/value store
 65 |     
 66 |     最后的项目，我们将会分成2到3组完成，你可以设想一个项目，然后和我们一起将他搞明白，或者你也可以做我们默认指定的项目。
 67 |      实验的成绩基于你通过了多少测试案例，我们会给你测验，然后你就可以知道自己是否很小心的完成，如果它通常通过,但有时失败了,它有可能会失败,当我们运行它。
 68 | 
 69 | + 实验代码审查
 70 | 	查看其它人的解决方案，发送反馈给我们，可能自己能学到其它方法。
 71 | 
 72 | 
 73 | #### 主题
 74 | 
 75 | + 这是一门关于会被应用程序抵用的基础设施的课程，它会对应用程序隐藏分布式系统的复杂性而进行抽象，包括下面的三个抽象：
 76 | 	+ 存储(Storage)
 77 | 	+ 通讯(Communication)
 78 | 	+ 计算(Computation)
 79 | 	
 80 |     两个主题将反复出现。
 81 | 
 82 | ##### 主题：实现(implementation)
 83 | + RPC, threads, concurrency control.
 84 |     
 85 | ##### 主题： 性能(performance)
 86 | + 理想：可伸缩的吞吐量。
 87 | 		通过购买更多的机器处理更高的负载。
 88 | + 扩展变得越来越困难:
 89 |     	负载均衡，straggler问题。
 90 |         "Small" non-parallelizable parts。
 91 |         隐藏共享资源等，还有网络问题。
 92 | 
 93 | 
 94 | ##### 主题：容错(fault tolerance)
 95 | + 上千的服务器，复杂的网络 ————> 总会有东西出错
 96 | 	我们需要对应用程序隐藏这些错误。
 97 |     我们经常希望：
 98 |     	可用性: 即使出错我也希望可以使用我们的文件。
 99 |         耐用性：当故障修复之后，我的数据可以恢复。
100 |    
101 |    重要理念:复制服务器。
102 |    		如果一个服务器故障了，客户们可以使用其他的服务器。
103 |         
104 | ##### 主题：一致性(consistency)
105 | + 通用的基础设施需求定义良好的行为。
106 | 	例如： Get(k) 获取到的值应该是最近的 Put(k,v)设置的。
107 | + 实现良好的行为是很困难的!
108 | 	+ 客户提交的并发操作。
109 | 	+ 服务器崩溃在尴尬的时刻。
110 | 	+ 网络可能会使存活的服务器看起来跟挂了一样;存在“脑裂的风险“
111 | + 一致性和性能不能兼得
112 | 	+ 一致性需要沟通,如获取最新的Put()。
113 | 	+ 带有严格同步语义的系统往往是缓慢的。
114 | 	+ 快速系统通常使应用程序应对复杂(“放松”)的行为。
115 |     + People have pursued many design points in this spectrum.
116 | 
117 | #### 案例学习： MapReduce
118 | + 让我们将MR作为一个案例进行讨论。
119 | 	MR是课程6.284主题的一个很好的例子，也是实验1的主要关注点。
120 | 
121 | + MapReduce概要
122 | 	+ 背景: 几个小时处理完TB基本的数据集
123 |       例如：实验分析爬行网页的结构，通常不是由分布式系统开发的爱好者开发的这就会非常痛苦，如如何处理错误。
124 |     + 总体目标: 非专业程序员可以轻松的在合理的效率下解决的巨大的数据处理问题。程序员定义Map函数和Reduce函数、顺序代码一般都比较简单。
125 |     MR在成千的机器上面运行处理大量的数据输入，隐藏全部分布式的细节。
126 | 
127 | + MapReduce的抽象试图
128 | 	输入会被分配到不同的分片(splits)
129 |   	Input Map -> a,1 b,1 c,1
130 |  	 Input Map ->     b,1
131 |   	Input Map -> a,1     c,1
132 |     	            |   |   |
133 |         	            |   -> Reduce -> c,2
134 |             	        -----> Reduce -> b,2
135 |   MR调用在每个分片上调用Map()函数，产生中间数据集k2，v2，然后MR将会收集相同k2的值v2，然后将v2分别传输给Reduce函数，
136 |   最后的输出是数据集<k2，v3>
137 | 
138 | + 例子: word count
139 |  	输入时成千上万的文件文件
140 |     	Map(k, v)
141 |     		split v into words
142 |     		for each word w
143 |       			emit(w, "1")
144 |   			Reduce(k, v)
145 |     			emit(len(v))
146 |                 
147 | + 这个模式很容易编程，隐藏了很多让人痛苦的细节
148 | 	+ 并发: 顺序执行相同的结果
149 | 	+ starting s/w on servers  ???
150 | 	+ 数据移动
151 | 	+ 失败
152 | 
153 | 
154 | + 这个模型容易扩展
155 | 	Nx台计算机可以同时执行nx个Map函数和Reduce函数,Map函数不需要相互等待或者共享数据，完全可以并行的执行。
156 |     在一定程度上，你可以通过购买更多的计算机来获取更大的吞吐量。而不是每个应用程序专用的高效并行。电脑是比程序员更便宜!
157 | 
158 | + 哪些为成为现在性能的限制因素？
159 | 	+ 我们关心的就是我们需要优化的。CPU?内存？硬盘？网络？他们一般将会被网络限制，网络的全内容量通常远小于主机网络链接速度。一般情况下
160 | 	很难建立一个比单机快1000倍的网络，所以他们关心尽量减少运动的数据在网络上。
161 | 
162 | 
163 | +  容错呢?
164 | 
165 | 	比如：如果服务器在执行MR工作时崩溃怎么办？隐藏这个错误非常困难，为什么不重新执行这个工作呢？
166 |     
167 |     MR重新执行失败的Map函数和Reduce函数,他们是纯函数——他们不会改变数据输入、不会保持状态、不共享内存、不存在map和map，或者reduce和reduce之间的联系，
168 | 
169 |     所以重新执行也会产生相同的输出。纯函数的这个需求是MR相对于其他并行编程方案的主要限制，然后也是因为这个需求使得MR非常简单。
170 | 
171 | + 更多细节：
172 |     master：给workers分配工作，记得中间输出的位置。
173 |   NaN. 
174 |     输入分割，输入存储在GFS，每个分片拷贝三份，全部电脑运行GFS和MR workers，输入的分片远远多于worker的数量，
175 |   NaN. 
176 |     master在每台机器上面执行Map任务，当原来的任务完成之后map会处理新的任务，worker将输出按key散列映射输出到R分区保存在本地磁盘上，
177 |   NaN. 
178 |     当全部没有Map执行的时候Reduce将会执行。master告诉Reducers去获取Map workers产生的中间数据分区，Reduce worker讲最终的结果
179 |   NaN. 
180 |     输出到GFS。
181 |     
182 | + 有哪些详细的设计帮助提示网络性能？
183 |     + Map的输入来自本地的硬盘而非网络。
184 |     + 中间数据只在网络上面传输一次，保存本地硬盘，而不是GFS.
185 |     + 中间数据通过key被划分到多个文件，”大网络传输“更加有效。
186 |     
187 | + 它们是怎么很好的处理负载均衡？
188 |     + 扩展的关键 -- otherwise Nx servers -> no gain. ??
189 |       不同的大小，不同的内容和不同的服务器硬件配置导致处理分片或者分区的时间不是一致的。
190 |     +　解决方案: 分片的数据要多余ｗorker.
191 |         Master不断的讲分片分配给那些已经完成之前任务的worker的进行处理。所以没有分片是巨大的，分片的大小只
192 |         影响完成的时间，同时速度更快的服务器将会处理更多的工作， 最后一起完成。
193 | 
194 | + MR怎么应对worker崩溃？
195 |     + Map Worker崩溃：
196 |     	+ master重新执行，基于GFS的其他副本的数据输入传播任务，即使worker已经完成，因为master依然需要硬盘上的数据。
197 |    	有些Reduce　workers也许在读取中间数据的时候就已经失败，我们依赖于功能和确定性的Map函数。
198 | 	+ ｍaster怎么知道work崩溃？(pings)
199 |     + 如果Reduces已经获取全部的中间数据，那么master不需要重启Map函数；如果Reduce崩溃那么必须等待Ｍap再次运行。
200 |     + Reduce worker在输出结果前崩溃,master必须在其他worker上面重新开始该任务。
201 |     + Reduce worker在输出结果的过程中崩溃，GFS会自动重命名输出，然后使其保持不可见直到Reduce完成，所以master在其他地方再次运行Reduce worker将会是安全的。
202 | 
203 | + 其他错误和问题：
204 |     + 假如master意外的开启两个Ｍap　worker处理同一个输入会怎么样？
205 |         它只会告诉Reduce worker其中的一个。
206 |     + 假如两个Reduce worker　处理中间数据的同一个分区会怎么样？
207 |         它们都会将同一份数据写到GFS上面，GFS的原子重命名操作会触发，先完成的获胜将结果写到GFS.
208 |     + 假如一个ｗorker非常慢怎么办——　一个掉队者？
209 |         产生原因可能是非常糟糕的硬件设施。
210 |         master会对这些最后的任务创建第二份拷贝任务执行。
211 |     + 假如一个worker因为软件或者硬件的问题导致计算结果错误怎么办？
212 |         太糟糕了！MR假设是建立在"fail-stop"的cpu和软件之上。
213 |     + 假如master崩溃怎么办？
214 | 
215 | + 关于那些MapReduce不能很好执行的应用？
216 |     + 并不是所以工作都适合map/shuffle/reduce这种模式
217 |     + 小的数据，因为管理成本太高,如非网站后端
218 |     + 大数据中的小更新，比如添加一些文件到大的索引
219 |     + 不可预知的读(Map 和 Reduce都不能选择输入)
220 |     + Multiple shuffles, e.g. page-rank (can use multiple MR but not very efficient)
221 |     + 多数灵活的系统允许MR，但是使用非常复杂的r模型
222 | 
223 | + 总结
224 | 	+ 
225 | Conclusion
226 |   MapReduce single-handedly made big cluster computation popular.
227 |   - Not the most efficient or flexible.
228 |   + Scales well.
229 |   + Easy to program -- failures and data movement are hidden.
230 |   These were good trade-offs in practice.
231 |   We'll see some more advanced successors later in the course.
232 | 


--------------------------------------------------------------------------------
/Lec01_Introduction/mapreduce.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec01_Introduction/mapreduce.pdf


--------------------------------------------------------------------------------
/Lec02_RPC_and_Threads/l-rpc.md:
--------------------------------------------------------------------------------
  1 | ##6.824 2016 Lecture 2: 基础设施: RPC和多线程
  2 | 
  3 | [原文地址](https://pdos.csail.mit.edu/6.824/notes/l-rpc.txt)
  4 | 
  5 | ##### 被问的最多的问题？　为什么这么课程选择了Go？
  6 | + 6.824这门课程过去选择使用C++
  7 | 
  8 |     过去学生们花费了太多时间修改跟分布式系统无关的bug,比如他们释放了还在使用的对象。
  9 | 
 10 | + Go拥有一些特性可以让你更加在集中注意力在分布式系统而不是语言细节
 11 | 
 12 | 	+ 类型安全
 13 | 	+ 垃圾回收（这样就不存在释放后使用的问题了）
 14 | 	+ 很好的支持并发
 15 | 	+ 很好的支持RPC
 16 | 
 17 | 
 18 | + 我们喜欢使用Go编程
 19 | 
 20 | 	一门非常容易学习的语言，可以使用教程[effective_go](https://golang.org/doc/effective_go.html)
 21 | 
 22 | ###### Remote Procedure Call (RPC)
 23 | + 分布式系统的关键部分，全面的实验都使用RPC.
 24 | + RPC的目的：
 25 | 	+ 容易编写网络通信程序
 26 | 	+ 隐藏客户端服务器通信的细节
 27 | 	+ 客户端调用更加像传统的过程调用
 28 | 	+ 服务端处理更加像传统的过程调用
 29 | + RPC被广泛的使用！
 30 | 
 31 | 
 32 | ###### RPC理想上想把网络通信做的跟函数调用一样
 33 | + Client:
 34 | 
 35 |   	  z = fn(x, y)
 36 | 
 37 | + Server:
 38 | 
 39 | 	   fn(x, y) {
 40 |       		compute
 41 |       		return z
 42 |     	}
 43 | 	RPC设计目标是这种水平的透明度。
 44 | 
 45 | ###### Go example:
 46 | + [https://golang.org/pkg/net/rpc/](https://golang.org/pkg/net/rpc/)
 47 | 
 48 | ###### RPC消息流程图：
 49 | 
 50 |   	Client             Server
 51 |     	request--->
 52 |        		<---response
 53 | 
 54 | ###### 软件架构
 55 | 
 56 | 	client app         handlers
 57 |     	stubs           dispatcher
 58 |    	RPC lib           RPC lib
 59 |      　　net  ------------ net
 60 | 
 61 | ###### 一些细节：
 62 | + 应该调用哪个服务器函数（handler）？
 63 | + 序列化：格式化数据到包中
 64 |      + 棘手的数组，指针，对象等。
 65 |      + Go的RPC库非常强大。
 66 |      + 有些东西你不能传递：比如channels和function。
 67 | + 绑定：客户端怎么知道应该跟谁通信？
 68 | 	 + 也许客户端使用服务器的hostname。
 69 | 	 + 也许使用命名服务，讲服务名字映射到最好的服务器。
 70 | + 线程：
 71 | 	+ 客户端可能使用多线程，所以多于一个调用没有被处理，对应的处理器可能会是否缓慢，所以
 72 | 		服务器经常将每个请求放置在独立的线程中处理。
 73 | 
 74 | ###### RPC问题:怎么处理失败？
 75 | + 比如：丢包，网络断线，服务器运行缓慢，服务器崩溃。
 76 | 
 77 | ###### 错误对RPC客户端意味着什么?
 78 | + 客户端没有获取到服务器的回复。
 79 | + 客户端不知道服务器是否接收到请求！也许服务器的网络在发生请求前就失败了。
 80 | 
 81 | ###### 简单的方案：“最少一次”行为
 82 | + RPC库等待回复一段时间，如果还是没有回复到达，重新发生请求。重复多次，如果还是没有回复，那么返回错误给应用程序。
 83 | 
 84 | ###### Q: "至少一次"容易被应用程序处理吗？
 85 | + 至少一次写的简单问题：
 86 | 	客户端发送"deduct $10 from bank account"
 87 | 
 88 | ###### Q: 这个客户端程序会出现什么错误？
 89 | + Put("k",10) -- 一个RPC调用在数据库服务器中设置键值对。
 90 | + Put("k",20) -- 客户端对同一个键设置其他值。
 91 | 
 92 | 
 93 | ###### Q: 至少一次每次都可以很好的工作吗？
 94 | + 是的：如果回复操作的是OK,比如，只读操作。
 95 | + 是的：如果应该程序有自己处理多个写副本的计划。
 96 | 
 97 | ###### 更好的RPC行为：“最多一次”
 98 | + idea:服务器的RPC代码发现重复的请求，返回之前的回复，而不是重写运行。
 99 | + Q：如何发现相同的请求?
100 | 	client让每一个请求带有唯一标示码XID(unique ID),相同请求使用相同的XID重新发送。
101 |     server：
102 |   	  if seen[xid]:
103 |       		r = old[xid]
104 |     	else
105 |       		r = handler()
106 |       		old[xid] = r
107 |       		seen[xid] = true
108 | 
109 | 
110 | ###### 一些关于“最多一次”的复杂性
111 | 这些都会断断续续地出现在实验二中
112 | + 怎么确认xid是唯一的？
113 | 	+ 很大的随机数?
114 |     + 将唯一的客户端ID（ip address？）和序列号组合起来？
115 | + 服务器最后必须丢弃老的RPC信息？
116 | 	+ 什么时候丢弃是安全的？
117 | 	+ idea：
118 | 		+ 唯一的客户端id
119 |         + 上一个rpc请求的序列号
120 |         + 客户端的每一个RPC请求包含"seen all replies <=X" 
121 |         + 类似tcp中的seq和ack
122 |     + 或者每次只允许一个RPC调用，到达的是seq+1，那么忽略其他小于seq
123 |     + 客户端最多可以尝试5次，服务器会忽略大于5次的请求。
124 | + 当原来的请求还在执行，怎么样处理相同seq的请求？
125 | 	+ 服务器不想运行两次，也不想回复。
126 | 	+ 想法：给每个执行的RPC，pending标识；等待或者忽略。
127 | 
128 | ###### 如果“至多一次”服务器奔溃或者重启会怎么样？
129 | + 如果服务器将副本信息保存在内存中，服务器会忘记请求，同时在重启之后接受相同的请求。
130 | + 也许，你应该将副本信息保存到磁盘？
131 | + 也许，副本服务器应该保存副本信息？
132 | 
133 | ###### 关于“至少执行一次”？
134 | + 至多一次+无限重试+容错服务
135 | 
136 | ###### Go RPC实现的”最多一次“？
137 | + 打开TCP连接
138 | + 向TCP连接写入请求
139 | + TCP也许会重传，但是服务器的TCP协议栈会过滤重复的信息
140 | + 在Go代码里面不会有重试（即：不会创建第二个TCP连接）
141 | + Go RPC代码当没有获取到回复之后将返回错误
142 | 	+ 也许是TCP连接的超时
143 | 	+ 也许是服务器没有看到请求
144 | 	+ 也许服务器处理了请求，但是在返回回复之前服务器的网络故障
145 | 
146 | ###### 线程
147 | + 线程是基本的服务器构建工具
148 | + 你将会在实验中经常使用
149 | + 线程非常“狡猾”
150 | + 对RPC非常有用
151 | + Go中使用goroutines代替线程
152 | 
153 | ###### 线程 = “控制线程”
154 | + 线程可以使一个程序同时执行很多事情
155 | + 线程共享内存
156 | + 每个线程包含额线程状态：程序计数器、寄存器、栈
157 | 
158 | ###### 线程挑战
159 | + 共享数据
160 | 	+ 两个线程在同一个时间修改同一个变量？
161 | 	+ 当一个线程读取数据，同时另一个线程正在修改这个数据？
162 | 	
163 |     上面的问题经常被称为竞争，需要在共享数据上面使用Go中的sync.Mutex保护变量
164 | + 线程协调
165 | 	+ 比如：使用Go中的channels等待全部相关的线程完成工作
166 | + 死锁
167 | 	+ 线程1等待线程2
168 | 	+ 线程2等待线程1
169 | 	+ 比竞争容易诊断
170 | + 锁粒度
171 | 	+ 粗粒度 -->  	简单，但是更小的并发/并行
172 | 	+ 细粒度 -->	更好的并发，更容易数据竞争和死锁
173 | + 让我们一起看看名为labrpc的RPC包说明这些问题
174 | 
175 | ###### 看看今天的讲义 -- labrpc.go
176 | + 它很像Go的RPC系统，但是带有模拟网络
177 | 	+ 这个模拟的网络会延迟请求和回复
178 | 	+ 这个模拟的网络会丢失请求和回复
179 | 	+ 这个模拟的网络会重新排序请求和回复
180 | 	+ 对之后的实验二测试非常有用
181 | + 说明线程、互斥锁、通道
182 | + 完整的RPC包完全使用Go语言编写
183 | 
184 | ###### 结构
185 | + 网络结构
186 | 	+ 网络描述
187 | 		+ 服务器
188 | 		+ 客户端节点
189 | 	+ 每个Network结构都持有一个sync.Mutex
190 | 
191 | ###### RPC概述
192 | + 更多的例子在test_test.go中
193 | 		比如： TestBasic()函数
194 | + 应用程序调用Call()函数，发生一个RPC请求并等待结果
195 | 		 reply := end.Call("Raft.AppendEntries", args, &reply) 
196 | + 服务器端
197 | 	srv := MakeServer()
198 |     srv.AddService(svc) // 一个服务器含有多个服务, 比如. Raft and k/v
199 |     svc := MakeService(receiverObject) // 对象的方法将会处理RPQ请求
200 | 
201 | ###### 服务器结构
202 | + 一个服务器程序支持多个服务
203 | 
204 | ###### AddService
205 | + 添加一个服务名字
206 | + Q： 为什么上锁？
207 | 		AddService可能在多个goroutine中被调用
208 | + Q： defer() 函数的作用？
209 | 		deter的作用是在函数退出前，调用之后的代码，在里面就是添加完新服务后，做解锁操作。
210 | 
211 | ###### Dispatch
212 | + 分发请求到正确的服务
213 | + Q： 为什么持有锁？
214 | 		这里应该指的是Server::dispatch函数，
215 | + Q： 为什么不在函数的结尾处持有锁？
216 | 
217 | ###### Call():
218 | + 使用反射查找参数类型
219 | + 使用“gob”序列化参数(译注：gob是Golang包自带的一个数据结构序列化的编码/解码工具。)
220 | + e.ch是用于发生请求的通道
221 | + 需要一个通道从网上接收回复(<- req.replyCh)
222 | 
223 | ###### MakeEnd():
224 | + 使用一个线程或者goroutine模拟网络
225 | 	+ 从e.ch中获取请求，然后处理请求
226 | 	+ 每个请求分别在不同的goroutine处理
227 | 			Q: 一个端点是否可以拥有多个未处理的请求
228 |     + Q:为什么使用rn.mu.Lock()?
229 |     + Q:锁保护了什么?
230 | 
231 | ###### ProcessReq():
232 | + 查看服务器端
233 | + 如果网络不可靠，可能会延迟或者丢失请求，在一个新的线程中分发请求。
234 | + 通过读取e.ch等待回复直到时间过去100毫秒。100毫秒只是来看看服务器是否崩溃。
235 | + 最后返回回复
236 | 	Q： 谁会读取回复？
237 | + Q：ProcessReq没有持有rn锁，是否安全？
238 | 
239 | ###### Service.dispatch():
240 | + 为请求找到合适的处理方法
241 | + 反序列化参数
242 | + 调用方法
243 | + 序列化回复
244 | + 返回回复
245 | 
246 | ###### Go的内存模型需要明确的同步去通信
247 | + 下面的代码是错误的
248 | 	    var x int
249 |     	done := false
250 |     	go func() { x = f(...); done = true }
251 |     	while done == false { }
252 |    代码很容易写成上面，但是Go的语法会说没有定义，使用channel或者sync.WaitGroup替换
253 | 
254 | ###### 学习Go关于goroutine和channel的教程
255 | + 使用Go的竞争诊断器
256 | + [https://golang.org/doc/articles/race_detector.html](https://golang.org/doc/articles/race_detector.html)
257 |     go test --race mypkg
258 | 
259 | 
260 | 


--------------------------------------------------------------------------------
/Lec03_GFS/Bolosky.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec03_GFS/Bolosky.pdf


--------------------------------------------------------------------------------
/Lec03_GFS/GFS.md:
--------------------------------------------------------------------------------
  1 | ### 6.824 2014 Lecture 3: GFS案例学习
  2 | 
  3 | #### 初稿，后面会修改，建议大家看gfs的论文和《大规模分布式存储系统》中的GFS部分
  4 | 
  5 | ##### 为什么我们阅读这个论文？
  6 | + Map/reduce使用这种文件系统
  7 | + 处理存储出错的案例学习
  8 | 	+ 用一致性换取简单和性能(trading consistency for simplicity and performance)
  9 | 	+ 后续设计的动机(motivation for subsequent designs)
 10 | + 好的性能 —— 良好的并行I/O性能
 11 | + 好的系统论文 —— 从apps到网络都有细节说明
 12 | + 关于课程6.284的全部主题都会在这个论文中出现，性能、容错、一致性
 13 | 
 14 | ##### 一致性是什么？
 15 | + 正确性条件
 16 | + 当数据存在副本，同时被应该程序并发访问的时候，正确性非常重要。
 17 | 	+ 如果一个应用程序进行写操作，那么之后的读操作可以观察到什么？如果这个读操作来自其他应用程序又会看到什么？
 18 | + 弱一致性
 19 | 	+ read()可能返回不新鲜的数据 ———— 不是最近写操作的结果
 20 | + 强一致性
 21 | 	+ read()返回的结果数据是最近一次的写操作结果
 22 | + 一般的权衡：
 23 | 	+ 强一致性对程序的写操作（application writers）表现不错
 24 | 	+ 强一致性将会影响性能
 25 | + 更多的正确性条件（通常被称为一致性模型）
 26 | 
 27 | ##### 一致性模型的历史
 28 | + 在架构，系统和数据库社区存在独立发展
 29 | 	+ 带有私有缓存的并行处理器访问共享内存
 30 | 	+ 并行客户端访问分布式文件系统
 31 | 	+ 分布式数据库之上的并行事务
 32 | + 不同的模型考虑不同的权衡
 33 | 	+ 可串行性(serializability)
 34 | 	+ 顺序一致性(sequential consistency)
 35 | 	+ 线性一致性(linearizability)
 36 | 	+ 单项一致性模型(entry consistency)
 37 | 	+ 松散一致性(release consistency)
 38 | 	+ ......
 39 | 
 40 | ##### “理想”的一致性模型
 41 | + 一个存在副本的文件表现的跟单一文件系统一样,就像很多客户端访问存在同一个机器的单一磁盘的文件
 42 | + 如果一个程序写操作，之后的读操作会获取到这个写的结果。
 43 | + 如果两个程序同时写同一份文件会怎么样？
 44 | 	+ 在文件系统中这种行为经常是未定义的 —— 文件也许会混合两个写操作的内容
 45 | + 如果两个应用程序并发写同一个目录会怎么样？
 46 | 	+ 一个一个顺序执行
 47 | 
 48 | ##### 不一致的来源
 49 | + 并发
 50 | + 机器失败
 51 | + 网络割裂
 52 | 
 53 | ##### 来自GFS论文的例子
 54 | + 主节点是备份分区B，客户端添加1，主节点将1发送给自己和分区备份A, 告诉客户端失败,同时客户端访问分区B，可能获取到老的值
 55 | 
 56 | ##### 为什么理想中的一致性模型在分布式文件系统中的实现这么困难？
 57 | + 理想的一致性模型协议非常复杂 —— 后面的课程我们会看到很难实现正确的系统
 58 | + 协议需要客户端和服务器进行通信，这样会消耗性能
 59 | 
 60 | 
 61 | ###### GFS的设计者为了最求更好的性能和更简单的设计而放弃理想的一致性模型
 62 | + 能否使应用程序开发人员的生活困难
 63 | 	+ 在一个理想的系统中应用程序的行为不容易被观察到
 64 | 	+ 如：获取到过期的数据
 65 | 	+ 如：重复添加记录
 66 | 	+ 应用数据不是你的银行账号，所以这样可能不存在问题
 67 | + 今天的论文是展现下面因素权衡的一个例子
 68 | 	+ 一致性
 69 | 	+ 容错性
 70 | 	+ 性能
 71 | 	+ 简单的设计
 72 | 
 73 | ##### GFS的目标
 74 | + 创建共享文件系统
 75 | + （管理）成千上万的物理机器
 76 | + 存储大量的数据集
 77 | 
 78 | ##### GFS存储什么？
 79 | + 作者没有说明，我们可以根据论文猜猜，可能包括如下部分
 80 | 	+ 搜索索引和数据库
 81 | 	+ Web上面的全部HTML文件
 82 | 	+ Web上面的全部图片文件
 83 | 	+ ......
 84 | 
 85 | ##### 文件属性：
 86 | + TB级别的数据集
 87 | + 很多文件巨大
 88 | + 作者在2003的时候建议存储100 MB大小的文件1M份， 100 TB的数据量
 89 | + 文件只支持追加方式
 90 | 
 91 | ##### 主要挑战：
 92 | + 因为存在很多机器，所以出错的情况很常见，假设一台机器一年出错一次，那么当存在1000台机器的时候，每天都有三台机器出现问题。
 93 | + 高性能：很多并发的读写操作，Map/Reduce工作会从GF读取数据，然后保存最后的结果，注意：保存的不是中间临时文件。
 94 | + 有效的使用网络
 95 | 
 96 | ##### 高层次的设计
 97 | + 定义目录、文件、命名、打开/读/写操作，但是不是符合posix标准
 98 | + 成百上千带有硬盘的Linux块服务器
 99 | 	+ 存储64MB的块(an ordinary Linux file for each chunk)
100 | 	+ 每个块在三台服务器上面做备份
101 | 	+ Q： 为什么是3个备份？
102 | 	+ Q： 除了数据可用，三备份方案给我们带来了什么？ 负载均衡热文件的读取
103 | 	+ Q： 为什么不把每一份文件存储在RAID硬盘?
104 | 		RAID不是常用品，我们想给整台机器做容错，而不是仅仅针对存储系统。
105 |     + Q:  为什么chunk这么大？
106 | 
107 | + GFS的主控服务器知道目录层次
108 | 	+ 对于目录而言，知道里面有哪些文件
109 | 	+ 对于文件而言，知道哪些数据块服务器存储了相关的64MB大小数据块
110 | 	+ 主控服务器在内存中保存状态信息，每个chunk在主控服务器上面只保存64bytes大小的元数据
111 | 	+ 主控服务器有为元数据准备的可回收数据库，可以从断电故障后快速恢复。
112 | 	+ 同时存在备份的主控服务器(shadow master)，数据略比主控服务器服务器延迟，可以被提升为主控服务器
113 | 
114 | ##### 基本操作
115 | + 客户端读操作：
116 | 	+ 向主控服务器发送文件名和偏移量
117 | 	+ 主控服务器回复带有相关chunk的数据块服务器集合，客户端临时缓存这些信息，然后访问最近的数据块服务器
118 | + 客户端写操作：
119 | 	+ 询问主控服务器，我应该往哪里写文件
120 | 	+ 如果文件大小超过64MB，主控服务器也许会选择一些新的数据块服务器，one chunk server is primary it chooses order of updates and forwards to two backups
121 | 
122 | ##### 两种不同的容错计划
123 | + 一种为了主控服务器设计
124 | + 一种为了数据块服务器设计
125 | 
126 | ##### 主控服务器容错
127 | + 单台主控服务器
128 | 	 + 客户端都是跟主控服务器交互
129 | 	 + 主控服务器整理全部的操作
130 | + 长期存储有限的信息
131 | 	+ 命名空间(目录)
132 | 	+ 文件到chunk的映射
133 | + 操作日志会改变他们（上面的命名空间和映射？）
134 | 	+ 操作日志存在多个备份
135 | 	+ 客户端进行修改操作，状态在修改数据记录到操作日志之后才返回
136 | 	+ 我们可以看到操作日志在多数系统中都发挥核心作用
137 | 	+ 操作日志在实验中发挥核心作用
138 | + 限制操作日志的文件大小
139 | 	+ 为主控服务器的状态创建检查点
140 | 	+ 删除操作日志中检查点之前的全部操作
141 | 	+ 检查点复制备份
142 | + 恢复
143 | 	+ 操作日志从最新的检查点进行恢复
144 | 	+ chunk的位置信息则通过询问数据块服务器获取
145 | + 主控服务器单点故障
146 | 	+ 恢复很快，因为主控服务器的状态文件很小，也许会有很小时间的不可用
147 | 	+ 影子服务器，它数据落后于主控服务器，它们用log中备份的数据进行回复。服务器执行只读操作，返回的数据也许不是最新的。
148 | 	+ 如果主控服务器不能恢复，然后主控服务器有重新启动，系统必须避免出现两台主控服务器的出现。
149 | + 我们将会在后面的一些课程看到强一致性的方案，同时将会更加复杂。
150 | 
151 | ###### 数据块服务器容错
152 | + 主控服务器授予一个备份服务器契约，这个备份成为主块服务器，将确定的操作顺序。
153 | + 客户端将数据发生给副本
154 |    + Replicas form a chain
155 |    + Chain respects network topology
156 |    + Allows fast replication
157 | + 客户端发生写请求给主Chunk服务器
158 | 	+ 主Chunk服务器分配序列号
159 | 	+ 主Chunk服务器在本地应用修改
160 | 	+ 主Chunk服务器向副本发送修改数据的请求
161 | 	+ 主Chunk服务器接收到全部副本的ack消息之后，回复客户端
162 | + 如果一个副本没有回复，那么客户端会重试
163 | + 如果副本的数量少于某个值，master服务器会备份chunks，重新负载备份
164 | 
165 | 
166 | ##### chunk数据的持久化
167 | +  有些数据因为错过了更新，所以过时了。
168 | +  通过chunk的版本号判断数据是否不新鲜的，在发生租约前，增加chunk版本号码，将数据发送到主数据块服务器，同时在其他数据块服务器中备份，主服务器和数据块服务器长久的存储版本信息。
169 | +  发送版本号给客户端
170 | +  版本号帮助主控服务器和客户端判断备份是否不新鲜
171 | 
172 | ##### 并发的写/追加
173 | + 客户端们也许会并发的同时写文件的同一个区域。
174 | + 结果是这些写操作的混合--no guarantees
175 |     few applications do this anyway, so it is fine
176 | 	+ 在Unix系统上面的并发写也会导致奇怪的输出
177 | + 很多客户端也许想并发的往一个长文件里面添加
178 | 	+ GFS支持原子操作，保证至少一次添加，主Chunk服务器选择记录需要添加到的文件位置，然后发送给其他副本。如果和一个副本的联系失败，那么主Chunk服务器会告诉客户端重试，如果重试成功，有些副本会出现追加两次的情况(因为这个副本追加成功两次)。当GFS要去填塞chunk的边缘时，如果追加操作跨越chunk的边缘，那么文件也可能存在空洞。
179 | 
180 | ##### 一致性模型
181 | + 目录操作的强一致性
182 | 	+ Master服务器原子的修改元数据，目录操作发生在理想情况
183 | 	+ 如果Master服务器下线，只剩下备份服务器，这时只允许只读操作，同时返回的数据可能不新鲜。
184 | + chunk操作的弱一致性
185 | 	+ 一个失败的突变使的chunk变得不一致
186 | 		+ 主chunk服务器更新chunk文件，但是同步给副本时失败，这时副本的数据就过时了，
187 | 	+ 客户端可能读到的数据不是最新的，当刷新获取新的租约的时候，客户端会获取到新的版本
188 | + 作者主张弱一致性对app而言不是什么大问题
189 | 	+ 大多数文件更新操作只是追加
190 | 		+ 应用程序可以使用添加记录中的uid判断是否重复
191 | 		+ 应用程序也许只是读取到少量的数据（而不是不新鲜的数据）
192 | 	+ 应用程序可以使用临时文件和原子的重命名操作
193 | 
194 | ##### 性能
195 | + 巨大的读操作总吞吐量（3个副本，striping ？？？）
196 | 	+  125 MB/sec 
197 | 	+  接近网络饱和状态
198 | + 写入不同的文件低于可能的最大值
199 | 	+ 作者怪网络堆栈
200 | 	+ chunk直接的复制操作会引起延迟
201 | + 并发追加同一份文件
202 | 	+ 被服务器存在的最新的chunk所限制
203 | 
204 | 
205 | ##### 总结
206 | + GFS使用的比较重要的容错技术riz
207 | 	+ 操作日志、检查点
208 | 	+ chunk之间的主备备份（but with consistencies？？）
209 | 	+ 我们将会在其他系统中也看到这里
210 | + 哪些在GFS中工作很好
211 | 	+ 巨大的顺序读写操作
212 | 	+ 追加
213 | 	+ 巨大的吞吐量
214 | 	+ 数据之间的容错
215 | + 哪些在GFS中做的不怎么好
216 | 	+ master服务器的容错
217 | 	+ 小文件（master服务器的瓶颈）
218 | 	+ 多个客户端并发的向同一份文件更新操作（除了追加）
219 | 
220 | 
221 | 
222 | 
223 | 
224 | 
225 | 
226 | 
227 | 
228 | 
229 | 
230 | 
231 | 
232 | 
233 | 
234 | 
235 | 
236 | 
237 | 
238 | 
239 | 
240 | 


--------------------------------------------------------------------------------
/Lec03_GFS/Question.md:
--------------------------------------------------------------------------------
 1 | ## 阅读GFS论文时，可以尝试思考如下问题
 2 | + 来自《大规模分布式存储系统》
 3 | 
 4 | 
 5 | ##### 1）为什么存储三个副本？而不是两个或者四个？
 6 | 
 7 | ##### 2）Chunk的大小为何选择64MB？这个选择主要基于哪些考虑？
 8 | 
 9 | ##### 3）GFS主要支持追加（append）、改写（overwrite）操作比较少。为什么这样设计？如何基于一个仅支持追加操作的文件系统构建分布式表格系统Bigtable？
10 | 
11 | ##### 4）为什么要将数据流和控制流分开？如果不分开，如何实现追加流程？
12 | 
13 | ##### 5）GFS有时会出现重复记录或者补零记录（padding），为什么？
14 | 
15 | ##### 6）租约（Lease）是什么？在GFS起什么作用？它与心跳（heartbeat）有何区别？
16 | 
17 | ##### 7）GFS追加操作过程中如果备副本（Secondary）出现故障，如何处理？如果主副本（Primary）出现故障，如何处理？
18 | 
19 | ##### 8）GFS Master需要存储哪些信息？Master数据结构如何设计？
20 | 
21 | ##### 9）假设服务一千万个文件，每个文件1GB,Master中存储的元数据大概占用多少内存？
22 | 
23 | ##### 10）Master如何实现高可用性？
24 | 
25 | ##### 11）负载的影响因素有哪些？如何计算一台机器的负载值？
26 | 
27 | ##### 12）Master新建chunk时如何选择ChunkServer？如果新机器上线，负载值特别低，如何避免其他ChunkServer同时往这台机器迁移chunk？
28 | 
29 | ##### 13）如果某台ChunkServer报废，GFS如何处理？
30 | 
31 | ##### 14）如果ChunkServer下线后过一会重新上线，GFS如何处理？
32 | 
33 | ##### 15）如何实现分布式文件系统的快照操作？
34 | 
35 | ##### 16）ChunkServer数据结构如何设计？
36 | 
37 | ##### 17）磁盘可能出现“位翻转”错误，ChunkServer如何应对？
38 | 
39 | ##### 18）ChunkServer重启后可能有一些过期的chunk,Master如何能够发现？


--------------------------------------------------------------------------------
/Lec03_GFS/gfs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec03_GFS/gfs.pdf


--------------------------------------------------------------------------------
/Lec04_Primary_Backup_Replication/vm-ft.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec04_Primary_Backup_Replication/vm-ft.pdf


--------------------------------------------------------------------------------
/Lec05_Fault_Tolerance_Raft/lab2_Raft.md:
--------------------------------------------------------------------------------
  1 | ## 6.824 Lab 2: Raft
  2 | 
  3 | [原文地址](https://pdos.csail.mit.edu/6.824/labs/lab-raft.html)
  4 | 
  5 | ### 简介
  6 | + 下面的一系列实验，你们会构建一个具有容错功能的kv存储系统，这是一系列实验的第一个。你们将会从实现Raft算法开始，a replicated state machine protocol（一个复制状态机协议？）. 在下一个实验中，你们将会在Raft算法之后构建KV服务。然后你们会分散你们的服务以换取更高的性能，最后实现分布式事务操作。
  7 | + 一个复杂服务使用Raft协议有利于管理众多备份服务器。正是基于有备份服务器这一点，服务器在副本出错的情况（崩溃、a broken、糟糕的网络环境）也能继续操作。挑战也在这里，就是因为这种错误情况的存在，副本们不是总是保持数据一致性；Raft帮助服务挑选出哪些数据是正确的。
  8 | + Raft基本的方法是实现了一个复杂的状态机。Raft将客户端请求组织成一个序列,称为日志，然后保证全部的副本同意日志的内容。每个副本按照日志中的请求顺序的执行，将这些日志里面的情况应用到本机服务。因为全部活着的副本看到一样的日志内容，它们都是顺序的执行一样的请求，因此它们有相同的服务状态。如果一个服务器失败了，之后又恢复了，Raft会小心翼翼地把它的日志更新到最新。只要多数的服务器可以工作，同时它们直接可以相互通信，Raft就可以工作。如果存活的服务不多了，那么Raft毫无进展，但是会等待多数服务存活的情况下继续工作。
  9 | + 在这个实验中你们将会带有方法的Go对象实现Raft，这意味着将会作为一个更大服务的一个模块使用。一系列Raft实例通过RPC相互通信维护日志副本。你们的Raft接口将支持无顺序编号的命令，同时这些命令被称为日志节点（ log entries）。节点被使用数字索引。带有索引的日志节点最终将会被提交。在那个阶段，也就是你们的Raft应该发生日志节点到更大的服务去执行。
 10 | 
 11 | 		注意： 不同Raft实例直接的交互我们只使用RPC。举例，不同的Raft实例直接不允许通过共享Go变量的方式交互。当然你们的实现也不能使用文件。
 12 | 
 13 | + 在这个实验中你们的实现[《extended Raft paper》](https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf)中描述的大多数设计，包括持久化状态，然后当服务器失败重启的时候读取持久化数据。你们不会实现集群关系更改或者日志压缩/快照。
 14 | + 你们应该总结[《extended Raft paper》](https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf)和Raft课程讲稿。你们也许会发现[《illustrated Raft guide 》](http://thesecretlivesofdata.com/raft/)有利于高层次的理解Raft的工作。为了更广阔的视角，应该去了解 Paxos, Chubby, Paxos Made Live, Spanner, Zookeeper, Harp, Viewstamped Replication,和[Bolosky et al](http://static.usenix.org/event/nsdi11/tech/full_papers/Bolosky.pdf).
 15 | 
 16 | 	+  提示： 尽早开始。虽然实现部分代码不是很多，但是让它正常的工作将会是一个挑战。算法和代码都非常狡猾，同时还有很多偏僻的个案需要你们考虑。当一个测试失败的时候，也许比较费解到底是哪个场景让你们的解决方案不正确，怎么去修改你们的解决方案。
 17 | 	+  提示： 在你开始之前阅读理解[《extended Raft paper》](https://pdos.csail.mit.edu/6.824/papers/raft-extended.pdf)和Raft课堂讲稿。你们的实现应该贴近论文的描述，因为那也是测试因为的。Figure 2部分的伪代码应该会有所帮助。
 18 | 
 19 | ### 合作政策
 20 | + 你们必须编写课程6.824出来我们提供的的全部代码，不能查看其他人的解决方案，也不能查看上一届的代码实现，也不允许查看其他Raft的实现。你们也许会跟其他学习讨论，反射不能查看或者直接复制他们的代码。请不要公开你的代码而被这门课程的学生所使用。比如，不要将你的代码上传到Github。[我这样不传代码应该没事吧]()
 21 | 
 22 | ### 开始
 23 | + 使用git pull命令获取最新的实验代码。我们在src/raft目录下面为你们提供框架代码和测试，在src/labrpc目录下面提供了一个简单的类rpc系统。
 24 | + 获取代码，然后运行，执行下面的命令。
 25 | 
 26 | 		$ setup ggo_v1.5
 27 | 		$ cd ~/6.824
 28 | 		$ git pull
 29 | 		...
 30 | 		$ cd src/raft
 31 | 		$ GOPATH=~/6.824  // 根据实际情况填写
 32 | 		$ export GOPATH
 33 | 		$ go test
 34 | 		Test: initial election ...
 35 | 		--- FAIL: TestInitialElection (5.03s)
 36 | 		config.go:270: expected one leader, got 0
 37 | 		Test: election after network failure ...
 38 | 		--- FAIL: TestReElection (5.03s)
 39 | 		config.go:270: expected one leader, got 0
 40 | 		...
 41 | 		$
 42 | 		
 43 | + 当你们全部完成的时候，你们的实现应该全部src/raft目录下面的测试：
 44 | 
 45 | 		$ go test
 46 | 		Test: initial election ...
 47 |   		... Passed
 48 | 		Test: election after network failure ...
 49 | 		  ... Passed
 50 | 		...
 51 | 		PASS
 52 | 		ok  	raft	162.413s
 53 | 
 54 | 
 55 | ### 你们的工作
 56 | + 你们通过在raft/raft.go文件里面添加代码实现Raft。在那个文件里面，你们会发现一些框架代码，添加发生和接收RPC请求的例子，添加保存恢复状态的例子代码。
 57 | + 你们的实现必须支持下面的接口，这些接口会在测试例子和你们最终的key/value服务器中使用。你们可以在raft.go里面获取更多的细节。
 58 | 	
 59 |     	// create a new Raft server instance:
 60 | 		rf := Make(peers, me, persister, applyCh)
 61 | 
 62 | 		// start agreement on a new log entry:
 63 | 		rf.Start(command interface{}) (index, term, isleader)
 64 | 
 65 | 		// ask a Raft for its current term, and whether it thinks it is leader
 66 | 		rf.GetState() (term, isLeader)
 67 | 
 68 | 		// each time a new entry is committed to the log, each Raft peer
 69 | 		// should send an ApplyMsg to the service (or tester).
 70 | 		type ApplyMsg
 71 | 
 72 | + 一个服务通过调用Make(peers,me,…)创建一个Raft端点。peers参数是通往其他Raft端点处于连接状态下的RPC连接。me参数是自己在端点数组中的索引。Start(command)要求Raft开始将command命令追加到日志备份中。Start()函数马上返回，不等待处理完成。服务期待你们的实现发生一个ApplyMsg结构给每个完全提交的日志，通过applyCh通道。
 73 | + 你们的Raft端点应该使用我们提供的librpc包来交换RPC调用。它是仿照Go的rpc库完成的，但是内部使用Go channles而不是socket。raft.go里面包含了一些发生RPC(sendRequestVote())和处理RPC请求（RequestVote()）的例子代码。
 74 | 
 75 | + 任务：
 76 | 		实现领导选举和心跳(empty AppendEntries calls). 这应该是足够一个领导人当选,并在出错的情况下保持领导者。一旦你们让下面的正常工作，你们就可以通过第一二个测试。
 77 | 
 78 | 	+ 提示：在raft.go文件中的Raft结构体中添加任何你想要保存的状态。论文中的Figure 2部分也许会成为很好的参考。你们需要定义一个结构体保存每个日志节点的信息。记住字段的名字，任何你打算通过RPC发生的结构都需要以大写字母开头，结构体里面的字段名字都会通过RPC传递。
 79 | 	+ 提示：你们应该先实现Raft的领导选举。补充RequestVoteArgs和RequestVoteReply结构体，然后修改Make()函数创建一个后台的goroutine，当长时间接收不到其他节点的信息时开始选举(通过对外发送RequestVote请求)。为了能让选举工作，你们需要实现RequestVote()请求的处理函数，这样服务器们就可以给其他服务器投票。
 80 | 	+ 提示： 为了实现心跳，你们将会定义一个AppendEntries结构(虽然你们可能用不到全部的从参数)，有领导人定期发送出来。你们同时也需要实现AppendEntries请求的处理函数，重置选举超时，当有领导选举产生的时候其他服务器就不会想成为领导。
 81 | 	+ 提示：确保定时器在不同的Raft端点没有同步。尤其是确保选举的超时不是同时触发的，否则全部的端点都会要求会自己投票，然后没有服务器能够成为领导。
 82 | 
 83 | + 当我们的代码可以完成领导选举之后，我们想要使用Raft保存一致，复杂日志操作。为了做到这些，我们需要通过Start()让服务器接受客户端的操作，然后将操作插入到日志中。在Raft中，只有领导者被允许追加日志，然后通过AppendEntries调用通过其他服务器增加新条目。
 84 | 
 85 | 
 86 | + 任务：
 87 | 		
 88 |      	实现领导者和随从者代码达到追加新的日志节点的目标。这里将会包含实现Start(),完成AppendEntries RPC结构体，发送他们，完成AppendEntry RPC调用的处理函数.你们的目标是通过test_test.go文件中的TestBasicAgree（）测试。一旦这些工作之后，你们可以在"basic persistence" 测试完成之前，通过其他全部的测试。
 89 | 
 90 | 	+ 提示：这个实验的一大部分是让你们处理各种各样的错误。你们需要实现选举的限制（论文 secion 5.4.1描述）。下面的一系列测试也是处理各种各样的错误的例子，比如一些服务器接收不到一些RPC调用，一些服务器偶尔崩溃重启。
 91 | 	+ 提示：当领导者是仅存的服务器的时候，这会导致条目被添加到日志中，全部的服务器需要独立地给他们的本地服务副本提交的新条目（通过它们自己的applyCh）。因此，你们应该保持这两个活动尽可能独立。
 92 | 	+ 提示：在没有错误的情况下需要指出Raft应该使用的最小数量的消息，这样让你们的实现最小值。
 93 | 
 94 | + 基于Raft的服务器必须有能力知道自己什么时候退出的，然后如果机器重启可以继续。这就需要Raft保存状态这样就可以经受得住重启。
 95 | + 一个“真“的实现会在每一次改变状态的时候将状态值写到磁盘，然后在重启之后读取上次保存的最新的状态值。你们的实现不会使用磁盘；作为替代，你们将会使用可持久化的对象(查看persister.go)保存和恢复状态。不论是谁对可持久化对象调用Make(),如果有的话先持有Raft最近持久化状态。Raft将会从可持久化对象初始化自己的状态，每一次的状态更改的时候使用它保存自己的状态信息。你们应该使用ReadRaftState()和SaveRaftState() 方法分别处理读取和存储的操作。
 96 | 
 97 | + 任务：
 98 | 
 99 | 		 通过添加代码去序列化那些需要在persist()函数中需要保存的状态达到实现持久化状态的作用，在readPersist()函数中反序列化相同的状态。你们需要觉得在Raft协议栈中你们的完全在哪些关键点需要持久化它们的状态，然后在这些代码插入persist()函数。一旦这些代码完成，你们就可以通过剩下的测试了。你们也许想第一个尝试通过"basic persistence" 测试(go test -run 'TestPersist1$'),然后解决剩下的其他测试。
100 | 
101 | 	+ 注意：你们需要讲状态值编码成字节数组，为了将他传递给Persister；在raft.go中包含了在 persist()和readPersist()使用的例子代码
102 | 	+ 注意：为了避免运行期间出现OOM（out of memory），Raft必须定期的忽略老的日志，你们在这个实验中不需要考虑日志的垃圾回收机制，你们会在下一个实验中实现。
103 | 	+ 提示：跟RPC系统为什么只发生一大写字母开始的字段，忽略那些一小写字母开头的字段一样，你们持久化过程中使用的GOB编码也只会保存那些以大写字母开始的自段。这是一个常见的神秘的错误来源，但是Go不会警告你这些错误。
104 | 	+ 提示：为了在实验接近尾声的时候可以通过一些具有挑战性的测试，比如那些被标记”不可靠的“，你们需要优先允许一个跟随者备份领导者的nextIndex（？？？）而不是一次只备份一个。可以在《the extended Raft 》的7,8页上面看到描述。
105 | 
106 | 
107 | 
108 | 
109 | 
110 | 
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 
122 | + 
123 | 
124 | 


--------------------------------------------------------------------------------
/Lec05_Fault_Tolerance_Raft/raft-extended.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec05_Fault_Tolerance_Raft/raft-extended.pdf


--------------------------------------------------------------------------------
/Lec05_Fault_Tolerance_Raft/raft-zh/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | 


--------------------------------------------------------------------------------
/Lec05_Fault_Tolerance_Raft/raft-zh/README.md:
--------------------------------------------------------------------------------
1 | # raft-zh_cn
2 | Raft一致性算法论文的中文翻译
3 | 
4 | 英文[论文地址](https://ramcloud.atlassian.net/wiki/download/attachments/6586375/raft.pdf)
5 | 
6 | 中文[翻译地址](https://github.com/maemual/raft-zh_cn/blob/master/raft-zh_cn.md)
7 | 


--------------------------------------------------------------------------------
/Lec05_Fault_Tolerance_Raft/raft.md:
--------------------------------------------------------------------------------
  1 | ### 6.824 2014 Lecture 5: Raft（1）
  2 | 
  3 | [《Lecture 5: Raft (1)》原文地址](https://pdos.csail.mit.edu/6.824/notes/l-raft.txt)
  4 | 
  5 | #### 本课程
  6 | + 今天：Raft（lab2）
  7 | + 下一步计划：在kv服务中使用Raft(lab3)
  8 | 
  9 | #### 整体的主题:使用复制状态机实现容错 (SRM ??)
 10 | + [客户端、副本服务器]
 11 | + 每一个副本服务器以相同的顺序执行一样的操作
 12 | + 他们让副本的执行就行自己执行一样，当如果有失败的情况任何副本可以接管工作，比如：在失败的时候，客户端会切换到其他服务器，GFS和VMware FT都有这种“方式”（flavor）
 13 | 
 14 | #### 典型的“状态机”是怎么样的？
 15 | + 应用程序/服务的内部状态、输入命令序列、输出都被复制
 16 | + 顺序意味着没有并行性，必须是确定的
 17 | + 除了通过输入和输出不能和外界的状态机进行通讯
 18 | + 在这里我们将讨论相当高层次的服务
 19 |     + 例子：配置服务,比如MapReduce或者GFS master
 20 |     + 例子：键值存储，get/put操作（lab3）
 21 | 
 22 | #### 一个关键的问题:怎么避免脑裂？
 23 | + 建设客户端可以连接副本A但是不能连接副本B，客户端可以只跟副本A交互吗？
 24 | + 如果B已经崩溃，我们必须在处理的时候离开B，不然我们不能容错!
 25 | + 如果B启动，但是网络阻止我们连接副本B，也许我们不应该在处理的时候离开B,
 26 | 因为它可能存活同时在为其他客户端提供服务---存在脑裂网络分区的风险
 27 | + 在一般情况下我们不能区分脑裂和崩溃
 28 | 
 29 | #### 使用单个主节点的方式可以避免脑裂吗？
 30 | + 主服务器计算决定A和B哪个会成为主节点，因为这边只有一个主节点所以它不会出现不同意自己决定的情况。
 31 | + 客户端和主节点交互,如果主节点失败了怎么办？这是个单节点故障问题---这种方式不是很好
 32 | 
 33 | #### 我们想使用复制状态机解决如下问题：
 34 | + 不存在单节点故障(single point of failure) --可以处理任何一台机器挂掉的情况
 35 | + 处理脑裂问题（partition w/o split brain）
 36 | 
 37 | #### The big insight for coping w/ partition: majority vote ？？
 38 | + 2f+1个服务器实例，比如3,5 
 39 | + 必须获取到不少于f+1票之后才能处理，比如：成为主节点，因此即使f个服务实例失败我们还是可以继续工作，这样就避免了单点故障问题。
 40 | + 为什么这样做就不存在脑裂了？
 41 |     + 多数情况下，一个分区可以拥有多数实例
 42 |     注意：多数是指2f+1中的多数，不是现在存活的实例中的多数，任何两个交叉的实例（intersect servers in the intersection）在投票的时候只能投一次这是非常重要的事情，我们在后面会看到交叉（intersection）传达着其他信息。
 43 | 
 44 | #### 两个多数复制方案发明在1990年左右
 45 | + 这两个方案是Paxos和 View-Stamped Replication
 46 | + 在过去的十几年间，这些方案在现实世界中多次使用，Raft是这个想法的一个非常不错的描述。
 47 | 
 48 | #### MapReduce、GFS和VMware FT都从SRM中获益  
 49 | + MapReduce的master节点并没有复制
 50 | + GFS的master虽然被复制，但是没有自动切换到备份
 51 | + VMware FT shared disk atomic test-and-set was (apparently) not replicated
 52 | 
 53 | #### Raft实现的状态机复制
 54 | + Raft选择一个实例充当领导者
 55 | + 客户端通过RPC请求发送Put、Get、Append命令给领导者的键值层（k/v layer）
 56 | + 领导者将这些请求全部发送给其他的副本  
 57 |     + 每个追随者往后追加日志
 58 |     + 目标是含有相同的日志
 59 |     + 将并发的客户端请求Put(a,1) Put(a,2) 转换成顺序的请求
 60 | + 如果多数将它添加到自己的日志，而且是持久化的，那么这个条目被“确认”，多数意味着即使少数服务器失败还是可以正常处理
 61 | + 服务器当领导者说条目被确认之后执行一次，键值层将put操作应用到数据库，或者提取得到的结果，然后领导者返回给客户端写的结果
 62 | 
 63 | #### 为什么是日志？
 64 | + 为什么服务器不是通过其他方式，比如数据库来保证状态机状态呢？
 65 | + 如果追随者错过了一些领导者的命令会怎么样？
 66 |     + 如果有效的更新到最新？回答：重发错过的命令
 67 | + 到目前为止，日志是一些顺序的命令,它相对于状态---开始的状态 + 日志 = 最终的状态
 68 | + 日志经常提供一个方便的编号方案，给操作排队同时日志处于隐藏区域直到我们确认命令被提交
 69 | 
 70 | #### Raft的日志总是会被精确的复制吗？
 71 | + 不：有些副本也许会滞后
 72 | + 不：我们可以看到他们会拥有不同的条目
 73 | + 好消息是：
 74 |     + 如果一个服务器已经执行了给定条目中的命令，那么没有其他服务器会执行这个条目中的其他命令.比如：the servers will agree on the command for each entry.State Machine Safety (Figure 3)
 75 | 
 76 | 
 77 | #### 实验2:Raft接口
 78 | + rf.Start(command) (index, term, isleader)
 79 |     + 启动新日志条目的协议
 80 |     + 成功还是失败都会马上返回
 81 |     + 如果服务器在提交命令之前失去领导者
 82 |     + index表示要观察的日志条目
 83 | + ApplyMsg, with Index and Command
 84 |     + 当服务（k/v server）需要执行一个新命令的时候,Raft会在通道上面产生一个消息。它同时通知客户端的rpc处理器，所以同时可以给客户端回复。
 85 | 
 86 | + 注意：领导者不需要等待给the AppendEntries RPCs的回复
 87 |     + 不希望被失败的服务所阻塞
 88 |     + 所以会在各自的goroutine中发送
 89 |     + 这样（？？）意味着很多RPC请求到达的时候是无序的
 90 | 
 91 | #### Raft的设计主要包括两个部分：
 92 | + 领导者选举
 93 | + 在故障后确保相同的日志
 94 | 
 95 | #### Raft给领导者编号
 96 | + 新的领导者--->新的项目(new term)
 97 | + 一个项目至少有一个领导者；在某些情况下没有领导者（a term has at most one leader; might have no leader）
 98 | + 编号会帮助服务器选择最新的领导，而不是取得领导
 99 | 
100 | #### Raft什么时候开启领导者选举？
101 | + 其他服务器在一段时间内感受不到领导者的的时候
102 | + 他们增加本地currentTerm，成为候选人，开始选举
103 | 
104 | #### 怎么保证在一个项目（term）中只有一个领导者？
105 | + (Figure 2 RequestVote RPC and Rules for Servers)
106 | + 领导者必须获取到大多数服务器的投票
107 | + 每个服务器可以为每一项投一次，投票给请求的第一个服务器（within Figure 2 rules）
108 | + 最多一个服务器可以获得给定项目的多数票
109 |     + 最多一个leader，即使网络分区
110 |     + 即使一些服务器发生故障，选举也可以成功
111 | 
112 | #### 服务器如何知道选举成功了？
113 | + 赢家获得是多数票
114 | + 其他人看到AppendEntries从胜利者的心跳
115 | 
116 | #### 选举可能不成功
117 | + 大于3的候选人分裂投票，没有获得多数票
118 | + even # of live servers,两个候选人每个得到一半,少于大多数服务器可达
119 | 
120 | #### 选举失败后会发生什么？
121 | + 另一个超时，增量currentTerm，成为候选
122 | + 较高的项目优先，候选的较旧的项目退出
123 | 
124 | #### Raft如何减少分裂投票导致选举失败的机会？
125 | + 每个服务器在开始候选前，延迟一个随机时间
126 | + 延迟的作用是？
127 |     + [diagram of times at which servers' delays expire] ???
128 |     + 一台服务器将选择最低的随机延迟
129 |     + 希望有足够的时间在下一个超时到期之前选择
130 |     + 其他人将看到新领导者的AppendEntries心跳，而不会成为候选人
131 | 
132 | #### 如何选择随机延迟范围？
133 | + 时间太短：第二个候选在第一个结束前开始
134 | + 时间太长：系统在引导器故障后闲置太长时间
135 | + 粗略指南：
136 |    + 假设完成一个无人选举需要10毫秒，同时我们有5台服务器
137 |    + 我们希望延迟被20ms分开,因此随机延迟从0到100 ms
138 | 
139 | #### Raft选举遵循一个共同的模式：安全与进展的分离
140 | + 硬机制排除> 1领导，以避免裂脑，但可能不是领导者，或未知的结果
141 | + 软机制试图确保进展，总是安全地在一个新的期限开始新的选举
142 | + 软机制试图避免不必要的选举
143 |     + 来自领导者的心跳（提醒服务器不开始选举）
144 |     + 超时时间（不要开始选择太快）
145 |     + 随机延迟（给一个领导者时间被选举）
146 | 
147 | #### 如果老领导不知道一个新的选举产生了怎么办？
148 | + 也许老领导接收不到新领导的信息
149 | + 新领导的产生意味着在大多数服务器已经增加了currentTerm
150 |    + 所以老领导(w/ old term)不能获取到大多数AppendEntries
151 |    + 所以老领导不会提交或执行任何新的日志条目
152 |    + 从而没有分裂脑，尽管分裂
153 |    + 但少数可以接受旧服务器的AppendEntries，因此日志可能在旧期限结束时分歧
154 |         
155 | #### 现在让我们谈谈在失败后同步日志
156 | #### 我们想要确认什么？
157 | + 也许：每个服务器以相同的顺序执行相同的客户端命令，失败的服务器可能无法执行任何操作
158 | + 因此：if any server executes, then no server executes something
159 |     else for that log entry
160 |     Figure 3's State Machine Safety
161 | + 只要是单个领导者，这样很容易阻止日志中不一致的情况
162 | 
163 | #### 日志怎么样会不一致？
164 | + 日志可能会缺少---在term的结尾处缺少
165 |     + 在发送所有AppendEntries之前，term 3的领导者崩溃
166 |     
167 |             S1: 3
168 |             S2: 3 3
169 |             S3: 3 3
170 |     + 日志可能在同一条目中具有不同的命令！
171 |         + 在一系列领导者崩溃后，10 11 12 13  <- log entry #
172 |             
173 |                 S1:  3
174 |                 S2:  3  3  4
175 |                 S3:  3  3  5
176 |                 
177 | #### 的领导者将强制其追随者使用自己的日志;比如
178 | + S3被选为term6的新领导者
179 | + S3发送新命令，entry 13, term 6  AppendEntries, previous entry 12, previous term 5
180 | + S2回复false(AppendEntries step 2)
181 | + S3 将nextIndex[S2]增加到12
182 | + S3 sends AppendEntries, prev entry 11, prev term 3
183 | + S2删除entry 12 (AppendEntries step 3)
184 | + S1的行为类似，但必须再回一个更远
185 | 
186 | #### 回滚的结果  
187 | + 每个存活的跟踪者删除不同于领导的尾部
188 | + 因此实时追踪者的日志是领导者日志的前缀日志
189 | + 存活的追随者将与领导者保持相同的日志，除非他们可能缺少最近的几个条目


--------------------------------------------------------------------------------
/Lec05_Fault_Tolerance_Raft/寻找一种易于理解的一致性算法.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec05_Fault_Tolerance_Raft/寻找一种易于理解的一致性算法.doc


--------------------------------------------------------------------------------
/Lec07_Guest_lecturer_on_Go/gomem.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec07_Guest_lecturer_on_Go/gomem.pdf


--------------------------------------------------------------------------------
/Lec08_Zookeeper/zookeeper.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec08_Zookeeper/zookeeper.pdf


--------------------------------------------------------------------------------
/Lec09_Distributed_Transactions/thor95.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec09_Distributed_Transactions/thor95.pdf


--------------------------------------------------------------------------------
/Lec11_FaRM/farm-2015.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec11_FaRM/farm-2015.pdf


--------------------------------------------------------------------------------
/Lec13_Disconnected_Operation_Eventual_Consistency/bayou-conflicts.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec13_Disconnected_Operation_Eventual_Consistency/bayou-conflicts.pdf


--------------------------------------------------------------------------------
/Lec14_Case Studs_Relaxed_Consistency/cooper-pnuts.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec14_Case Studs_Relaxed_Consistency/cooper-pnuts.pdf


--------------------------------------------------------------------------------
/Lec15_Case_Studis_Dynamo/dynamo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec15_Case_Studis_Dynamo/dynamo.pdf


--------------------------------------------------------------------------------
/Lec16_Wide-Area Publish_Subscribe/l-wormhole.txt:
--------------------------------------------------------------------------------
  1 | 6.824 2016 Lecture 16: 
  2 | 
  3 | Wormhole: Reliable Pub-Sub to support Geo-replicated Internet Services, Sharma
  4 | et al, 2015.
  5 | 
  6 | why are we reading this paper?
  7 |   pub-sub common building block in distributed systems
  8 |     YMB, FAB, Kafka
  9 |   case study: Facebook's Wormhole
 10 |     motivated by memcache
 11 | 
 12 | how do web sites scale up with growing load?
 13 |   a typical story of evolution over time:
 14 |   1. one machine, web server, application, DB
 15 |      DB stores on disk, crash recovery, transactions, SQL
 16 |      application queries DB, formats, HTML, &c
 17 |      but the load grows, your PHP application takes too much CPU time
 18 |   2. many web FEs, one shared DB
 19 |      an easy change, since web server + app already separate from storage
 20 |      FEs are stateless, all sharing (and concurrency control) via DB
 21 |      but the load grows; add more FEs; soon single DB server is bottleneck
 22 |   3. many web FEs, data sharded over cluster of DBs
 23 |      partition data by key over the DBs
 24 |        app looks at key (e.g. user), chooses the right DB
 25 |      good DB parallelism if no data is super-popular
 26 |      painful -- cross-shard transactions and queries probably don't work
 27 |        hard to partition too finely
 28 |      but DBs are slow, even for reads, why not cache read requests?
 29 |   4. many web FEs, many caches for reads, many DBs for writes
 30 |      cost-effective b/c read-heavy and memcached 10x faster than a DB
 31 |        memcached just an in-memory hash table, very simple
 32 |      complex b/c DB and memcacheds can get out of sync
 33 |      (next bottleneck will be DB writes -- hard to solve)
 34 | 
 35 | the big facebook infrastructure picture
 36 |   lots of users, friend lists, status, posts, likes, photos
 37 |     fresh/consistent data apparently not critical
 38 |     because humans are tolerant?
 39 |   high load: billions of operations per second
 40 |     that's 10,000x the throughput of one DB server
 41 |   multiple data centers (at least west and east coast)
 42 |   each data center -- "region":
 43 |     "real" data sharded over MySQL DBs
 44 |     memcached layer (mc)
 45 |     web servers (clients of memcached)
 46 |   each data center's DBs contain full replica
 47 |   west coast is master, others are slaves via MySQL async log replication
 48 | 
 49 | what does FB store in mc?
 50 |   maybe userID -> name; userID -> friend list; postID -> text; URL -> likes
 51 |   basically copies of data from DB
 52 | 
 53 | how do FB apps use mc?
 54 |   read:
 55 |     v = get(k) (computes hash(k) to choose mc server)
 56 |     if v is nil {
 57 |       v = fetch from local DB
 58 |       set(k, v)
 59 |     }
 60 |   write:
 61 |     v = new value
 62 |     send k,v to master DB  # maybe in remote region
 63 | 
 64 | how to arrange that DB in different regions get updated?
 65 |   master DB receives all writes (similar to PNUTS)
 66 |   adds entry to transaction log
 67 |   replicates transaction log to slaves
 68 | 
 69 | how to arrange that mc in different regions learn about writes
 70 |   need to invalidate/update mc entry after write
 71 |     eval section suggests it is important to avoid stale data
 72 |   option 1: mc in remote region polls its local DB
 73 |      increases read load on DB
 74 |      what is the poll interval?
 75 |   option 2: wormhole pub/sub
 76 | 
 77 | pub/sub
 78 |   a common building block in distributed systems
 79 |   facebook use case
 80 |     subscriber is mc
 81 |     publisher is DB
 82 |   subscribers link we a library
 83 |     update configuration file to express interest in updates
 84 |     stored in zookeeper
 85 |   publishers read a configuration file to find subscribers
 86 |     establishes a flow with each subscriber
 87 |     send wormhole updates on each flow asynchronously
 88 |       set of key-value pairs
 89 |   filters
 90 |     subscribers tell publishers about a filter
 91 |     filter is a query over keys in wormhole update
 92 |     publishers send only updates that pass filter
 93 | 
 94 | delivery semantics
 95 |   all updates on a flow are delivered in order
 96 |   publisher maintains per subscriber a "data marker"
 97 |     sequence number of an update in the transaction log
 98 |     records what a subscriber has received
 99 |   publishers ask subscriber periodically for what it has received
100 |     i.e., marker is a lower bound what subscriber has received
101 |   updates are delivered at least once
102 |     publisher persists marker
103 |     if publisher fails start sending from last marker
104 |     => subscribers may receive update twice
105 |   Q: how do subscribers deal with an update delivered several times
106 |     A: no problem for caches
107 |     A: application can do duplicate filtering
108 |   Q: can an update be never delivered?
109 |     A: yes, because transaction log may have been truncated
110 |        data in log is present for 1-2 days
111 |        
112 | Q: why does subscriber not keep track of marker?
113 |    A: FB wants subscribers to be stateless
114 | 
115 | Q: why are markers periodically acked by subscribers
116 |    A: Expensive to ack each update
117 |    A: Uses TCP for delivery
118 |      They don't have to worry about packet loss
119 | 
120 | Where to store the marker?
121 |   SCRD: publisher stores in local persistent storage
122 |     if storage is unavailable, cannot fail over to new publisher
123 |        caches will be stale
124 |     if storage fails, lose marker
125 |     opportunity:  storage/log is often replicated
126 |   MCRD: publishers stores marker in Zookeeper
127 |     if publisher fails, another publisher can take over
128 |     read last marker from zookeeper
129 |     implementation challenge: format of marker
130 |       replicas of same log have different binary format
131 |       solution: "logical" positions
132 |   Q: isn't it expensive to update marker in Zookeeper?
133 |     A: yes, but only done periodically
134 | 
135 | Implementation challenge: many different DBs
136 |   don't want to modify any of them to support flows
137 |   idea: publishers read transaction log of DB
138 |     read library to read different log formats
139 |     convert updates in standard format (Wormhole update)
140 |     one key is a shard identifier
141 | 
142 | Optimization 1: caravan
143 |   design 1: one reader per flow
144 |     puts too much load on DB
145 |     in steady state, all readers read same updates
146 |   design 2: one reader for all flows
147 |     bad performance on recovery
148 |     on recovery each flow may have to read from different point in log
149 |   solution: one reader per cluster of flows ("caravan")
150 |     in practice, number of caravans is small (~1)
151 |     that one caravan is called the "lead caravan"
152 |     
153 | Optimization 2: load balancing flows
154 |   a single application has several subscribers
155 |     application data is sharded too
156 |     N DB shards
157 |     M application machines
158 |     -> an MC machine may have more than 1 subscriber
159 |        e.g., when it stores 2 DB shards
160 |     Q: on creation of a new flow which app machine is the subscriber? 
161 |   two plans:
162 |     - weighted random selection
163 |     - subscribers use zookeeper
164 | 
165 | Optimization 3: one TCP connection
166 |   multiplexes several flows for same subscriber
167 |   subscriber may have several shards
168 |   one flow for each shard
169 | 
170 | Deployment
171 |   in use at FB
172 |   readers for several DBs (MySQL, HDFS, ...)
173 |   35 Gbyte/s of updates per day
174 |   # caravans = 1.06    
175 | 
176 | Performance
177 |   Publisher bottleneck: 350 Mbyte/s
178 |   Subscriber bottleneck: 600,000 updates/s
179 |   Enough for production workloads
180 | 
181 | References
182 |   Kafka (http://research.microsoft.com/en-us/um/people/srikanth/netdb11/netdb11papers/netdb11-final12.pdf)
183 | 
184 | 


--------------------------------------------------------------------------------
/Lec16_Wide-Area Publish_Subscribe/wormhole.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec16_Wide-Area Publish_Subscribe/wormhole.pdf


--------------------------------------------------------------------------------
/Lec17_Measuring_Consistency/fb-consistency.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec17_Measuring_Consistency/fb-consistency.pdf


--------------------------------------------------------------------------------
/Lec18_Case_Studies_Spark/zaharia-spark.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec18_Case_Studies_Spark/zaharia-spark.pdf


--------------------------------------------------------------------------------
/Lec19_Cluster_Management/borg.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec19_Cluster_Management/borg.pdf


--------------------------------------------------------------------------------
/Lec20_Peer-to-peer_Trackerless_Bittorrent_and_DHTs/bep_0005_files/bep.css:
--------------------------------------------------------------------------------
  1 | /*
  2 |    BitTorrent.org BEP CSS
  3 | */ 
  4 | 
  5 | body {
  6 | 	margin:0;
  7 | 	padding:0;
  8 | 	color:#333;
  9 | 	background-color:#666;
 10 | 	font:10px/1.5em "Trebuchet MS",sans-serif; 
 11 | 	text-align:center; 
 12 | 	}
 13 | 
 14 | #upper {
 15 |     margin:0;
 16 |     padding:60px 0;
 17 |     background:#fff url("../img/bg.gif") repeat-x 0 0;
 18 |     }
 19 |     
 20 | #wrap {	
 21 | 	margin:0 auto;
 22 | 	width:700px;
 23 | 	text-align:left; 
 24 | 	}
 25 | 	
 26 | #header {
 27 |     margin:0;
 28 | 	width:100%;
 29 | 	border-bottom:2px solid #eee;
 30 | 	}
 31 | 	
 32 | #nav {
 33 |     margin:0 0 40px 0;
 34 |     padding:.5em 0;
 35 |     text-align:right;
 36 |     }
 37 | 
 38 | #nav ul {
 39 |     list-style:none;
 40 |     padding:0;
 41 |     margin:0;
 42 |     }
 43 | 
 44 | #nav li {
 45 |     margin-left:1em;
 46 |     padding:0;
 47 |     display:inline;
 48 |     font-size:1.5em;
 49 |     font-weight:bold;
 50 |     }
 51 | 
 52 | #nav li span {
 53 |     color:#f60;
 54 |     }
 55 | 
 56 | #home-l ul,
 57 | #home-c ul,
 58 | #home-r ul {
 59 |     margin:0 0 1.5em 0;
 60 | 	padding:0 0 10px 2px;
 61 |     }
 62 | 
 63 | #home-l li,
 64 | #home-c li,
 65 | #home-r li {
 66 | 	list-style-type:none;
 67 | 	margin:1em 0;
 68 | 	padding:0 0 0 25px;
 69 |     font-size:1.5em;
 70 |     line-height:1.2em;
 71 |     background:url("../img/li.gif") no-repeat 0 0; 
 72 |     }
 73 | 
 74 | #first p {
 75 | 	font-size:3pt;
 76 | 	line-height:80pt;
 77 | }
 78 |     
 79 | 
 80 | #second li,
 81 | #second dt {
 82 | 	margin:.4em 0;
 83 |     font-size:10pt;
 84 |     line-height:12pt;
 85 |     }
 86 | 
 87 | #second dd {
 88 |     margin-bottom:7px;
 89 |     font-size:1.2em;
 90 |     line-height:1.5em;
 91 |     }
 92 | 
 93 | #intro {
 94 |     margin:0 0 2.5em 0;
 95 |     padding:0 3em;
 96 |     font-size:2.5em;
 97 |     line-height:1.2em;
 98 |     color:#000;
 99 |     }
100 | 
101 | #footer {
102 |     margin:0;
103 |     padding:0;
104 |     height:60px;
105 |     background:url("../img/fbg.gif") repeat-x 0 0;
106 |     }
107 | 
108 | #home-l,
109 | #home-c,
110 | #home-r {
111 |     margin:0 10px;
112 |     width:210px;
113 |     float:left;
114 |     }
115 | 	
116 | #blog {
117 |     padding:0 7em;
118 |     }
119 | 
120 | .field-name,
121 | .docinfo-name {
122 |     margin:0;
123 |     font-size: 1.2em;
124 |     text-align: right;
125 |     vertical-align: middle;
126 | }
127 | 
128 | h1 {
129 |     margin-top:1em;
130 |     margin-bottom:.1em;
131 |     margin-right: 0;
132 |     margin-left: 0;
133 |     font-size:2em;
134 |     font-weight:normal;
135 |     letter-spacing:-1px;
136 |     }
137 | 
138 | h1 span {
139 |     font-weight:bold;
140 |     color:#09f;
141 |     }
142 | 
143 | h2 {
144 |     margin: .5em 0 0 0;;
145 |     padding:0 0 .5em 0;
146 |     font-size:1.6em;
147 |     //border-bottom:2px solid #ffc;
148 |     }
149 | 
150 | #second h2,
151 | #blog h2 {
152 |     margin:0;
153 |     padding:0 0 .5em 0;
154 |     font-size:1.6em;
155 |     font-weight:normal;
156 |     border:0;
157 |     }
158 | 
159 | h3 {
160 |     font-size:1.3em;
161 |     line-height:1.0em;
162 |     }
163 | 
164 | h4 {
165 |     font-size:1.1em;
166 |     text-transform:uppercase;
167 |     }
168 | 
169 | h5 {
170 |     font-size:1.1em;
171 |     color:#666;
172 |     }
173 | 
174 | #second p,
175 | #blog p {
176 |     font-size:1.3em;
177 |     line-height:1.5em;
178 |     }
179 | 
180 | #footer p {
181 |     margin:0;
182 |     padding:2em 0;
183 |     color:#fff;
184 |     }
185 | 
186 | table {
187 |     border:0;
188 |     border-style:2;
189 |     margin-left:4em;  
190 |     //margin-top:4em;
191 |     //margin-bottom:4em;
192 |     }
193 | 
194 | td {
195 |     font-size:9pt;
196 |     padding:10px;
197 |     }
198 | 
199 | td.shade {
200 |     background-color:#eee;
201 |     }
202 | 
203 | #blog .post {
204 |     padding:5px 0;
205 |     border-top:2px solid #ffc;
206 |     font-size:1em;
207 |     color:#999;
208 |     }
209 | 
210 | .clear:after {
211 | 	content:".";
212 | 	display:block;
213 | 	height:0;
214 | 	font-size:0;
215 | 	clear:both;
216 | 	visibility:hidden; 
217 | 	}
218 | 	
219 | .clear {
220 |     display:inline-table; 
221 |     }
222 | 
223 | /* \*/
224 | * html .clear { 
225 |     height:1%; 
226 |     }
227 | 
228 | .clear {
229 |     display:block; 
230 |     }
231 | /* */
232 | 
233 | .img-r {
234 |     float:right;
235 |     width:300px;
236 |     padding-bottom:10px;
237 |     margin:0 0 0 20px;
238 |     }
239 | 
240 | .img-l {
241 |     float:left;
242 |     width:300px;
243 |     padding-bottom:10px;
244 |     margin:0 20px 0 0;
245 |     }
246 | 
247 | #second .img-l p,
248 | #second .img-r p {
249 |     color:#09f;
250 |     font-size:1.5em;
251 |     text-align:center;
252 |     }
253 | 
254 | hr { 
255 |     display:none; 
256 |     }
257 | 
258 | code {
259 |     color:#963;
260 |     }
261 | 
262 | /* links */
263 | 
264 | a {
265 | 	color:#345;
266 | 	text-decoration:none;
267 | 	border-bottom:1px solid #eee; 
268 | 	}
269 | 
270 | a:visited {
271 | 	color:#678;	
272 | 	}
273 | 
274 | #nav a:visited {
275 |     color:#345; 
276 |     }
277 | 
278 | a:hover,
279 | #nav a:hover {
280 |     color:#f60;
281 | 	}
282 | 
283 | h1 a,
284 | h1 a:visited,
285 | h1 a:hover {
286 |     color:#666;
287 |     border:0;
288 |     }
289 | 
290 | a img {
291 | 	border:0; 
292 | 	}
293 | 
294 | .literal-block {
295 | 	margin-left:4em;
296 | }
297 | 
298 | div.figure {
299 | 	margin:2em 4em 1em 4em;
300 | 	text-align: center;
301 | 	font-size: 7pt;
302 | }
303 | 
304 | p.caption {
305 | 	text-align: left;
306 | }
307 | 
308 | 


--------------------------------------------------------------------------------
/Lec20_Peer-to-peer_Trackerless_Bittorrent_and_DHTs/stoica-chord.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec20_Peer-to-peer_Trackerless_Bittorrent_and_DHTs/stoica-chord.pdf


--------------------------------------------------------------------------------
/Lec21_Peer-to-peer_Bitcoin/bitcoin.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec21_Peer-to-peer_Bitcoin/bitcoin.pdf


--------------------------------------------------------------------------------
/Lec23_Project_demos/katabi-analogicfs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feixiao/Distributed-Systems/3aba6d5eacdf18c25b661c914accf17106cd9f9d/Lec23_Project_demos/katabi-analogicfs.pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Distributed-Systems
 2 | MIT课程[《Distributed Systems 》](http://nil.csail.mit.edu/6.824/2016/schedule.html)学习和翻译
 3 | + 翻译和完成课程的实验代码，之后在代码里添加了注释说明，去掉了代码实现
 4 | + 整理课程，编写简单的分布式入门教程
 5 | 
 6 | 
 7 | #### 资料推荐
 8 | + [《大规模分布式存储系统》](https://book.douban.com/subject/25723658/)
 9 | + [《分布式系统原理介绍》](http://pan.baidu.com/s/1geU1XAz)
10 | + [awesome-distributed-systems](https://github.com/kevinxhuang/awesome-distributed-systems)
11 | + [一名分布式存储工程师的技能树是怎样的？](https://www.zhihu.com/question/43687427/answer/96306564)
12 | + [袖珍分布式系统](http://www.jianshu.com/c/0cf64976a481)
13 | 


--------------------------------------------------------------------------------