├── .gitignore
├── .travis.yml
├── .vscode
    ├── bookmarks.json
    └── settings.json
├── LICENSE
├── Mutual-Exclusion
    ├── README.md
    ├── code
    │   ├── clock.go
    │   ├── clock_test.go
    │   ├── message.go
    │   ├── message_test.go
    │   ├── process.go
    │   ├── process_test.go
    │   ├── receivedTime.go
    │   ├── receivedTime_test.go
    │   ├── requestQueue.go
    │   ├── requestQueue_test.go
    │   ├── resource.go
    │   ├── resource_test.go
    │   ├── timestamp.go
    │   ├── timestamp_test.go
    │   ├── util.go
    │   └── util_test.go
    ├── mutual_exclusion_request_period.pptx
    ├── qna.md
    ├── request_period.png
    ├── spanner-osdi2012.pdf
    └── time-clocks.pdf
├── PoW.zip
├── README.md
├── Raft
    ├── 6.824-2018.zip
    ├── 6.824Lab2_Raft.html
    ├── 6.824Lab2_Raft_files
    │   └── style.css
    ├── README.md
    ├── code
    │   ├── config.go
    │   ├── labgob
    │   │   ├── labgob.go
    │   │   └── test_test.go
    │   ├── labrpc
    │   │   ├── labrpc.go
    │   │   └── test_test.go
    │   ├── persister.go
    │   ├── raft-API.go
    │   ├── raft-AppendEntries.go
    │   ├── raft-LogEntry.go
    │   ├── raft-Raft.go
    │   ├── raft-RequestVote.go
    │   ├── raft-method.go
    │   ├── raft-persist.go
    │   ├── raft-settings.go
    │   ├── raft-settings_test.go
    │   ├── raft-state.go
    │   ├── raft-state_test.go
    │   ├── test_test.go
    │   └── util.go
    └── raft-extended.pdf
└── test.sh


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Binaries for programs and plugins
 2 | *.exe
 3 | *.dll
 4 | *.so
 5 | *.dylib
 6 | 
 7 | # Test binary, build with `go test -c`
 8 | *.test
 9 | 
10 | # Output of the go coverage tool, specifically when used with LiteIDE
11 | *.out
12 | 
13 | # Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736
14 | .glide/
15 | 
16 | output.*.txt


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: go
 2 | 
 3 | go:
 4 |   - 1.13.x
 5 | 
 6 | # whitelist
 7 | branches:
 8 |   only:
 9 |   - master
10 |   - stable
11 | 
12 | script:
13 |   - go get -t -v ./...
14 |   - go vet ./...
15 |   - bash ./test.sh
16 | 
17 | after_success:
18 |   - bash <(curl -s https://codecov.io/bash)


--------------------------------------------------------------------------------
/.vscode/bookmarks.json:
--------------------------------------------------------------------------------
 1 | [
 2 | 	{
 3 | 		"path": "$ROOTPATH$/output.test.txt",
 4 | 		"bookmarks": [
 5 | 			{
 6 | 				"line": 3088,
 7 | 				"column": 1,
 8 | 				"label": ""
 9 | 			},
10 | 			{
11 | 				"line": 3154,
12 | 				"column": 1,
13 | 				"label": ""
14 | 			},
15 | 			{
16 | 				"line": 11967,
17 | 				"column": 1,
18 | 				"label": ""
19 | 			},
20 | 			{
21 | 				"line": 12053,
22 | 				"column": 1,
23 | 				"label": ""
24 | 			}
25 | 		]
26 | 	},
27 | 	{
28 | 		"path": "$ROOTPATH$/Raft/code/config.go",
29 | 		"bookmarks": [
30 | 			{
31 | 				"line": 470,
32 | 				"column": 11,
33 | 				"label": ""
34 | 			}
35 | 		]
36 | 	}
37 | ]


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "cSpell.words": [
 3 |         "Deserialize",
 4 |         "Jeiwan",
 5 |         "TXOs",
 6 |         "Txid",
 7 |         "UTXOs",
 8 |         "Vout",
 9 |         "abcdefghijklmn",
10 |         "blockchain",
11 |         "boltdb",
12 |         "cbtx",
13 |         "deserializes",
14 |         "labrpc",
15 |         "priv",
16 |         "rcvr",
17 |         "replyv"
18 |     ]
19 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 aQua Yi
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Mutual-Exclusion/README.md:
--------------------------------------------------------------------------------
  1 | # Mutual Exclusion Algorithm Demo
  2 | 
  3 | 使用 Go 语言实现了 Lamport 在论文 [《Time, Clocks and the Ordering of Events in a Distributed System》](time-clocks.pdf)中提到的 Mutual Exclusion 算法。
  4 | 
  5 | ## 问题
  6 | 
  7 | 多个 process 组成分享同一个 resource，但 resource 最多只能被一个 process 占用。由于 process 是分布式的，只能通过各自的 clock 读取时间值，这些 clock 的时间值不一定同步，没有办法通过时间上的编排来分别占用 resource。需要靠算法满足以下要求：
  8 | 
  9 | 1. 对于 resource，一定要先释放，再占用。
 10 | 1. 对于 process， 先申请，先占用。
 11 | 1. 如果 process 占用 resource 的时间有限，那么，所有占用 resource 的申请，都会被满足。
 12 | 
 13 | 为了简化问题，还存在以下假设：
 14 | 
 15 | 1. 任意两个 process 都可以直接相互发送消息
 16 | 1. 对于任意两个 process Pi 和 Pj 而言，从 Pi 发往 Pj 的消息，满足先发送先到达的原则
 17 | 1. process 间发送的消息，一定会收到
 18 | 
 19 | ## 从局部排序到全局排序
 20 | 
 21 | 在展开之前，先强调几个定义：
 22 | 
 23 | 1. 多个 process 中第 i 个 process 标记为 Pi
 24 | 1. process 由一系列 event 组成，第 j 个 event 标记为 Ej
 25 | 1. 每个 process 都有一个 clock 用于标记 event 发生的时间。第 i 个 process 发生第 j 个 event 的时间，标记为 Ci(Ej)
 26 | 1. 每个 process 都是 **串行** 的
 27 | 1. process 之间可以通过 send 和 receive message 来直接通信。send 和 receive 是两个 process 的单独事件。
 28 | 
 29 | ### "happened before"
 30 | 
 31 | "happened before" 表示一个局部排序关系，有两种情况下成立
 32 | 
 33 | 1. 串行的 Pm 中， Ei 比 Ej 早发生。 Ei "happened before" Ej，所以有 Cm(Ei) < Cm(Ej)。
 34 | 1. 从 Pm 发送到 Pn 中的消息 message，Pm 中 Ei 是发送 message， Pn 中 Ej 是接受 message。Ei "happened before" Ej，所以有 Cm(Ei) < Cn(Ej)
 35 | 
 36 | 以上两条，在论文中被称为 `Clock Condition`。
 37 | 
 38 | ### Lamport timestamps
 39 | 
 40 | 为了让 system 中的 clocks 满足 `Clock Condition`，论文上的规定了 IR1 和 IR2，并在最后演变成了 [Lamport timestamps](https://en.wikipedia.org/wiki/Lamport_timestamps) 规则：
 41 | 
 42 | 1. 进程在每做一件事情之前，计数器+1
 43 | 1. 当进程发送消息的时候，需要带上计数器的值
 44 | 1. 当进程接收消息的时候，需要根据消息中的值，更新自己的计数器。更新规则为 max(自身值，消息值)+1
 45 | 
 46 | 以下是规则的伪代码
 47 | 
 48 | ```code
 49 | // 在进程内
 50 | time_stamp = time_stamp + 1
 51 | doOneEvent()
 52 | 
 53 | // 进程发现消息时
 54 | time = time + 1
 55 | time_stamp = time
 56 | send(message, time_stamp)
 57 | 
 58 | // 进程接收消息时
 59 | (message, time_stamp) = receive()
 60 | time = max(time_stamp, time) + 1
 61 | ```
 62 | 
 63 | > 基维百科上的说法和论文的说法，略有不同。我的代码以论文为准。
 64 | 
 65 | ### 全局排序
 66 | 
 67 | `Ei => Ej` 表示，在全局排序中， Ei 排在 Ej 前面。
 68 | 
 69 | 对于 system 中的任意一个 event，可以使用其所在的 process P 和发生的 timestamp T 进行编号为： `<T:P>`。
 70 | 
 71 | 任意两个事件 Ei`<Tm:Pa>` 和 Ej`<Tn:Pb>`， 若要使得 `Ei => Ej` 成立，需要以下两个条件之一成立：
 72 | 
 73 | 1. Tm < Tn
 74 | 1. Tm == Tn 且 Pa < Pb
 75 | 
 76 | 其中 Pa < Pb 的含义是， system 中 process 中也存在一种排序方式。我在代码中选择使用 process 的代号，对其进行排序。
 77 | 
 78 | ## mutual exclusion 算法
 79 | 
 80 | mutual exclusion 算法需要每个 process 维护自己的 request queue。 由 5 个规则组成
 81 | 
 82 | 1. 为了申请 resource，process Pi 需要
 83 |     1. 生成 request `<Tm:Pi>`
 84 |     1. 发送 request message `<Tm:Pi>` 到所有其他的 process
 85 |     1. 把 `<Tm:Pi>` 放入自己的 request queue
 86 | 1. 当 Pj 收到 request message `<Tm:Pi>` 后
 87 |     1. 把 `<Tm:Pi>` 放入自己的 request queue
 88 |     1. 回复 Pi 一条 acknowledge message，确认收到。
 89 | 1. 为了释放 resource，process Pi 需要
 90 |     1. 释放 resource
 91 |     1. 把 `<Tm:Pi>` 移出自己的 request queue
 92 |     1. 发送 release message `<Tm:Pi>` 到所有其他的 process
 93 | 1. 当 Pj 收到 release message `<Tm:Pi>` 后
 94 |     1. 把 `<Tm:Pi>` 移出自己的 request queue
 95 | 1. 当以下全部条件满足时，Pi 可以占用 resource：
 96 |     1. 在 Pi 的 request queue 中，`<Tm:Pi>` 与其他 event 都是 `=>` 关系。
 97 |     1. Pi 收到所有其他 process 消息的最新时间中的最小值 > Tm
 98 | 
 99 | 每个 process 只需要独立平等地处理这 5 种 event，就可以避免 process 同时占用 resource 的情况。
100 | 
101 | 以上 5 个规则，是从 process 之间交互的角度来规定的。如果把 request `<Tm:Pi>` 的整个生命周期放在 Pi 的时间轴上。如下图所示
102 | 
103 | ![request周期](request_period.png)
104 | 
105 | 从图中可以看到
106 | 
107 | 1. 资源占用期两边是 `<Tm:Pi>` 加入和退出 Pi.requestQueue
108 | 1. 操作 Pi.requestQueue 的两边是，给其他 process 发送消息的时间
109 | 
110 | 这个顺序很重要，因为这个顺序保证了 `<Tm:Pi>` 在 Pi 中满足 Rule5 的时候，`<Tm:Pi>` 在所有的 process 的 request queue 中都是全局排序排在最前面的。所以 Pi 才能大胆地占用 resource，而不担心重复。
111 | 
112 | ## 总结
113 | 
114 | 由 lamport timestamps 规则和 process 排序，可以得到 system 内所有 event 的一种全局排序。request event 是全部 event 的子集，因此也可以全局排序。resource 占用顺序与其排序顺序一致。因此 mutual exclusion 算法能够满足要求。
115 | 
116 | ## 思考问题
117 | 
118 | 1. 为什么会出现多种全局排序？请举例说明。
119 | 1. 真实时间上先 request 的 process 会不会后得到 resource？如果会的话，能不能说明 mutual exclusion 算法失败了？请说明理由。
120 | 
121 | [参考答案](qna.md)
122 | 


--------------------------------------------------------------------------------
/Mutual-Exclusion/code/clock.go:
--------------------------------------------------------------------------------
 1 | package mutualexclusion
 2 | 
 3 | import (
 4 | 	"math/rand"
 5 | 	"sync"
 6 | )
 7 | 
 8 | // Clock 是系统的逻辑时钟接口
 9 | type Clock interface {
10 | 	// Update 根据输入参数更新自身的值
11 | 	Update(int)
12 | 	// Tick 时钟跳动一次，并返回最新的时间值
13 | 	Tick() int
14 | 	// Now 返回当前的时间值
15 | 	Now() int
16 | }
17 | 
18 | type clock struct {
19 | 	time int
20 | 	rwmu sync.RWMutex
21 | }
22 | 
23 | // 每个 process 的 clock 的 initial time，都是随机的
24 | func newClock() Clock {
25 | 	return &clock{
26 | 		time: 1 + rand.Intn(100),
27 | 	}
28 | }
29 | 
30 | func (c *clock) Update(t int) {
31 | 	c.rwmu.Lock()
32 | 	c.time = max(c.time, t+1)
33 | 	c.rwmu.Unlock()
34 | }
35 | 
36 | func (c *clock) Tick() int {
37 | 	c.rwmu.Lock()
38 | 	c.time++
39 | 	t := c.time
40 | 	c.rwmu.Unlock()
41 | 	return t
42 | }
43 | 
44 | func (c *clock) Now() int {
45 | 	c.rwmu.RLock()
46 | 	t := c.time
47 | 	c.rwmu.RUnlock()
48 | 	return t
49 | }
50 | 


--------------------------------------------------------------------------------
/Mutual-Exclusion/code/clock_test.go:
--------------------------------------------------------------------------------
 1 | package mutualexclusion
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/stretchr/testify/assert"
 7 | )
 8 | 
 9 | func Test_clock_update(t *testing.T) {
10 | 	ast := assert.New(t)
11 | 	//
12 | 	c := newClock()
13 | 	newTime := 1000
14 | 	ast.True(newTime+1 >= c.Now())
15 | 	//
16 | 	c.Update(newTime)
17 | 	//
18 | 	expected := newTime + 1
19 | 	actual := c.Now()
20 | 	ast.Equal(expected, actual)
21 | }
22 | 
23 | func Test_clock_tick(t *testing.T) {
24 | 	ast := assert.New(t)
25 | 	//
26 | 	c := newClock()
27 | 	expected := c.Now() + 1
28 | 	actual := c.Tick()
29 | 	ast.Equal(expected, actual)
30 | }
31 | 


--------------------------------------------------------------------------------
/Mutual-Exclusion/code/message.go:
--------------------------------------------------------------------------------
 1 | package mutualexclusion
 2 | 
 3 | import "fmt"
 4 | 
 5 | type message struct {
 6 | 	msgType   msgType
 7 | 	from      int // message 发送方的 ID
 8 | 	to        int // message 接收方的 ID， 当值为 OTHERS 的时候，表示接收方为除 from 外的所有
 9 | 	timestamp Timestamp
10 | 	msgTime   int
11 | }
12 | 
13 | func newMessage(mt msgType, msgTime, from, to int, ts Timestamp) *message {
14 | 	return &message{
15 | 		msgType:   mt,
16 | 		msgTime:   msgTime,
17 | 		from:      from,
18 | 		to:        to,
19 | 		timestamp: ts,
20 | 	}
21 | }
22 | 
23 | func (m *message) String() string {
24 | 	return fmt.Sprintf("{%s, Time:%d, From:%d, To:%2d, %s}", m.msgType, m.msgTime, m.from, m.to, m.timestamp)
25 | }
26 | 
27 | type msgType int
28 | 
29 | // 枚举了 message 的所有类型
30 | const (
31 | 	// REQUEST_RESOURCE 请求资源
32 | 	requestResource msgType = iota
33 | 	releaseResource
34 | 	acknowledgment
35 | )
36 | 
37 | func (mt msgType) String() string {
38 | 	switch mt {
39 | 	case requestResource:
40 | 		return "申请"
41 | 	case releaseResource:
42 | 		return "释放"
43 | 	default:
44 | 		return "确认"
45 | 	}
46 | }
47 | 


--------------------------------------------------------------------------------
/Mutual-Exclusion/code/message_test.go:
--------------------------------------------------------------------------------
 1 | package mutualexclusion
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/stretchr/testify/assert"
 7 | )
 8 | 
 9 | func Test_Message(t *testing.T) {
10 | 	ast := assert.New(t)
11 | 	//
12 | 	ts := newTimestamp(0, 0)
13 | 	m := newMessage(requestResource, 0, 0, OTHERS, ts)
14 | 	//
15 | 	expected := "{申请, Time:0, From:0, To:-1, <T0:P0>}"
16 | 	actual := m.String()
17 | 	ast.Equal(expected, actual)
18 | 	//
19 | 	m.msgType = releaseResource
20 | 	expected = "{释放, Time:0, From:0, To:-1, <T0:P0>}"
21 | 	actual = m.String()
22 | 	ast.Equal(expected, actual)
23 | 	//
24 | 	m.msgType = acknowledgment
25 | 	expected = "{确认, Time:0, From:0, To:-1, <T0:P0>}"
26 | 	actual = m.String()
27 | 	ast.Equal(expected, actual)
28 | 	//
29 | }
30 | 


--------------------------------------------------------------------------------
/Mutual-Exclusion/code/process.go:
--------------------------------------------------------------------------------
  1 | package mutualexclusion
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"sync"
  6 | 
  7 | 	"github.com/aQuaYi/observer"
  8 | )
  9 | 
 10 | // OTHERS 表示信息接收方为其他所有 process
 11 | const OTHERS = -1
 12 | 
 13 | // Process 是进程的接口
 14 | type Process interface {
 15 | 	// Request 会申请占用资源
 16 | 	// 如果上次 Request 后，还没有占用并释放资源，会发生阻塞
 17 | 	// 非线程安全
 18 | 	Request()
 19 | }
 20 | 
 21 | type process struct {
 22 | 	me int            // process 的 ID
 23 | 	wg sync.WaitGroup // 阻塞 Request() 用
 24 | 
 25 | 	clock        Clock
 26 | 	resource     Resource
 27 | 	receivedTime ReceivedTime
 28 | 	requestQueue RequestQueue
 29 | 
 30 | 	mutex sync.Mutex
 31 | 	// 为了保证发送消息的原子性，
 32 | 	// 从生成 timestamp 开始到 prop.update 完成，这个过程需要上锁
 33 | 	prop observer.Property
 34 | 	// 操作以下属性，需要加锁
 35 | 	isOccupying      bool
 36 | 	requestTimestamp Timestamp
 37 | }
 38 | 
 39 | func (p *process) String() string {
 40 | 	return fmt.Sprintf("[%d]P%d", p.clock.Now(), p.me)
 41 | }
 42 | 
 43 | func newProcess(all, me int, r Resource, prop observer.Property) Process {
 44 | 	p := &process{
 45 | 		me:           me,
 46 | 		resource:     r,
 47 | 		prop:         prop,
 48 | 		clock:        newClock(),
 49 | 		requestQueue: newRequestQueue(),
 50 | 		receivedTime: newReceivedTime(all, me),
 51 | 	}
 52 | 
 53 | 	p.Listening()
 54 | 
 55 | 	debugPrintf("%s 完成创建工作", p)
 56 | 
 57 | 	return p
 58 | }
 59 | 
 60 | func (p *process) Listening() {
 61 | 	// stream 的观察起点位置，由上层调用 newProcess 的方式决定
 62 | 	// 在生成完所有的 process 后，再更新 prop，
 63 | 	// 才能保证所有的 process 都能收到全部消息
 64 | 	stream := p.prop.Observe()
 65 | 
 66 | 	debugPrintf("%s 获取了 stream 开始监听", p)
 67 | 
 68 | 	go func() {
 69 | 		for {
 70 | 			msg := stream.Next().(*message)
 71 | 			if msg.from == p.me ||
 72 | 				(msg.msgType == acknowledgment && msg.to != p.me) {
 73 | 				// 忽略不该看见的消息
 74 | 				continue
 75 | 			}
 76 | 
 77 | 			p.updateTime(msg.from, msg.msgTime)
 78 | 
 79 | 			switch msg.msgType {
 80 | 			// case acknowledgment: 收到此类消息只用更新时钟，前面已经做了
 81 | 			case requestResource:
 82 | 				p.handleRequestMessage(msg)
 83 | 			case releaseResource:
 84 | 				p.handleReleaseMessage(msg)
 85 | 			}
 86 | 			p.checkRule5()
 87 | 		}
 88 | 	}()
 89 | }
 90 | 
 91 | func (p *process) updateTime(from, time int) {
 92 | 	p.mutex.Lock()
 93 | 
 94 | 	// 收到消息的第一件，更新自己的 clock
 95 | 	p.clock.Update(time)
 96 | 	// 然后为了 Rule5(ii) 记录收到消息的时间
 97 | 	// NOTICE: 接收时间一定要是对方发出的时间
 98 | 	p.receivedTime.Update(from, time)
 99 | 
100 | 	p.mutex.Unlock()
101 | }
102 | 
103 | func (p *process) handleRequestMessage(msg *message) {
104 | 
105 | 	// rule 2.1: 把 msg.timestamp 放入自己的 requestQueue 当中
106 | 	p.requestQueue.Push(msg.timestamp)
107 | 
108 | 	debugPrintf("%s 添加了 %s 后的 request queue 是 %s", p, msg.timestamp, p.requestQueue)
109 | 
110 | 	p.mutex.Lock()
111 | 
112 | 	// rule 2.2: 给对方发送一条 acknowledge 消息
113 | 	p.prop.Update(newMessage(
114 | 		acknowledgment,
115 | 		p.clock.Tick(),
116 | 		p.me,
117 | 		msg.from,
118 | 		msg.timestamp,
119 | 	))
120 | 
121 | 	p.mutex.Unlock()
122 | }
123 | 
124 | func (p *process) handleReleaseMessage(msg *message) {
125 | 	// rule 4: 从 request queue 中删除相应的申请
126 | 	p.requestQueue.Remove(msg.timestamp)
127 | 	debugPrintf("%s 删除了 %s 后的 request queue 是 %s", p, msg.timestamp, p.requestQueue)
128 | }
129 | 
130 | func (p *process) checkRule5() {
131 | 	p.mutex.Lock()
132 | 	if p.isSatisfiedRule5() {
133 | 		p.occupyResource()
134 | 		go func() {
135 | 			// process 释放资源的时机交给 goroutine 调度
136 | 			p.releaseResource()
137 | 		}()
138 | 	}
139 | 	p.mutex.Unlock()
140 | }
141 | 
142 | func (p *process) isSatisfiedRule5() bool {
143 | 	// 利用 checkRule5 的锁进行锁定
144 | 	return !p.isOccupying && // 还没有占领资源
145 | 		p.requestTimestamp != nil && // 已经申请资源
146 | 		p.requestTimestamp.IsEqual(p.requestQueue.Min()) && // Rule5.1 申请排在第一位
147 | 		p.requestTimestamp.IsBefore(p.receivedTime.Min()) // Rule5.2: 申请后，收到全部回复
148 | }
149 | 
150 | func (p *process) occupyResource() {
151 | 	// 利用 checkRule5 的锁进行锁定
152 | 	debugPrintf("%s 准备占用资源 %s", p, p.requestQueue)
153 | 	p.isOccupying = true
154 | 	p.resource.Occupy(p.requestTimestamp)
155 | }
156 | 
157 | func (p *process) releaseResource() {
158 | 	p.mutex.Lock()
159 | 
160 | 	ts := p.requestTimestamp
161 | 	// rule 3: 先释放资源
162 | 	p.resource.Release(ts)
163 | 	// rule 3: 在 requestQueue 中删除 ts
164 | 	p.requestQueue.Remove(ts)
165 | 	// rule 3: 把释放的消息发送给其他 process
166 | 	msg := newMessage(releaseResource, p.clock.Tick(), p.me, OTHERS, ts)
167 | 	p.prop.Update(msg)
168 | 	p.isOccupying = false
169 | 	p.requestTimestamp = nil
170 | 
171 | 	p.mutex.Unlock()
172 | 
173 | 	p.wg.Done()
174 | }
175 | 
176 | func (p *process) Request() {
177 | 	p.wg.Wait()
178 | 	p.wg.Add(1)
179 | 
180 | 	p.mutex.Lock()
181 | 
182 | 	p.clock.Tick() // 做事之前，先更新 clock
183 | 	ts := newTimestamp(p.clock.Now(), p.me)
184 | 	msg := newMessage(requestResource, p.clock.Now(), p.me, OTHERS, ts)
185 | 	// Rule 1.1: 发送申请信息给其他的 process
186 | 	p.prop.Update(msg)
187 | 	// Rule 1.2: 把申请消息放入自己的 request queue
188 | 	p.requestQueue.Push(ts)
189 | 	// 修改辅助属性，便于后续检查
190 | 	p.requestTimestamp = ts
191 | 
192 | 	p.mutex.Unlock()
193 | }
194 | 


--------------------------------------------------------------------------------
/Mutual-Exclusion/code/process_test.go:
--------------------------------------------------------------------------------
 1 | package mutualexclusion
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"log"
 6 | 	"testing"
 7 | 
 8 | 	"github.com/aQuaYi/observer"
 9 | 	"github.com/stretchr/testify/assert"
10 | )
11 | 
12 | func run(all, occupyTimesPerProcess int) {
13 | 	rsc := newResource(all * occupyTimesPerProcess)
14 | 
15 | 	prop := observer.NewProperty(nil)
16 | 
17 | 	ps := make([]Process, all)
18 | 	// 需要一口气同时生成，保证所有的 stream 都能从同样的位置开始观察
19 | 	for i := range ps {
20 | 		p := newProcess(all, i, rsc, prop)
21 | 		ps[i] = p
22 | 	}
23 | 	debugPrintf("~~~ 已经成功创建了 %d 个 Process ~~~", all)
24 | 
25 | 	stream := prop.Observe()
26 | 	go func() {
27 | 		for {
28 | 			msg := stream.Next().(*message)
29 | 			debugPrintf(" ## %s", msg)
30 | 		}
31 | 	}()
32 | 
33 | 	for _, p := range ps {
34 | 		go func(p Process, times int) {
35 | 			i := 0
36 | 			debugPrintf("%s 开始申请资源", p)
37 | 			for i < times {
38 | 				p.Request()
39 | 				i++
40 | 			}
41 | 		}(p, occupyTimesPerProcess)
42 | 	}
43 | 
44 | 	rsc.wait()
45 | 
46 | 	log.Println(rsc.report())
47 | }
48 | 
49 | func Test_process(t *testing.T) {
50 | 	ast := assert.New(t)
51 | 	//
52 | 	amount := 131072
53 | 	for all := 2; all <= 64; all *= 2 {
54 | 		times := amount / all
55 | 		name := fmt.Sprintf("%d Process × %d 次 = 共计 %d 次", all, times, amount)
56 | 		t.Run(name, func(t *testing.T) {
57 | 			ast.NotPanics(func() {
58 | 				run(all, times)
59 | 			})
60 | 		})
61 | 	}
62 | }
63 | 
64 | func Test_process_String(t *testing.T) {
65 | 	ast := assert.New(t)
66 | 	//
67 | 	me := 1
68 | 	clock := newClock()
69 | 	p := &process{
70 | 		me:    me,
71 | 		clock: clock,
72 | 	}
73 | 	time := 999
74 | 	p.clock.Update(time)
75 | 	expected := fmt.Sprintf("[%d]P%d", time+1, me)
76 | 	actual := p.String()
77 | 	ast.Equal(expected, actual)
78 | }
79 | 


--------------------------------------------------------------------------------
/Mutual-Exclusion/code/receivedTime.go:
--------------------------------------------------------------------------------
 1 | package mutualexclusion
 2 | 
 3 | import (
 4 | 	"container/heap"
 5 | 	"sync"
 6 | )
 7 | 
 8 | // ReceivedTime 是最新接受时间的操作接口
 9 | // 因为 Rule5(ii) 需要用到最小的接受时间
10 | type ReceivedTime interface {
11 | 	// Update 更新从 process 接收到的最新时间
12 | 	Update(process, time int)
13 | 	// Min 返回从各个 process 接收时间的最小值
14 | 	Min() int
15 | }
16 | 
17 | type receivedTime struct {
18 | 	trq   *timeRecordQueue
19 | 	trs   []*timeRecord
20 | 	mutex sync.Mutex
21 | }
22 | 
23 | func newReceivedTime(all, me int) ReceivedTime {
24 | 	trq := new(timeRecordQueue)
25 | 	trs := make([]*timeRecord, all)
26 | 	for i := range trs {
27 | 		if i == me {
28 | 			continue
29 | 		}
30 | 		trs[i] = &timeRecord{}
31 | 		heap.Push(trq, trs[i])
32 | 	}
33 | 	return &receivedTime{
34 | 		trq: trq,
35 | 		trs: trs,
36 | 	}
37 | }
38 | 
39 | func (rt *receivedTime) Update(id, time int) {
40 | 	rt.mutex.Lock()
41 | 	rt.trq.update(rt.trs[id], time)
42 | 	rt.mutex.Unlock()
43 | }
44 | 
45 | // 返回 rt 中的最小值
46 | func (rt *receivedTime) Min() int {
47 | 	rt.mutex.Lock()
48 | 	defer rt.mutex.Unlock()
49 | 	return (*rt.trq)[0].time
50 | }
51 | 
52 | // timeRecord 是 priorityQueue 中的元素
53 | type timeRecord struct {
54 | 	time  int
55 | 	index int
56 | }
57 | 
58 | type timeRecordQueue []*timeRecord
59 | 
60 | func (trq timeRecordQueue) Len() int { return len(trq) }
61 | 
62 | func (trq timeRecordQueue) Less(i, j int) bool {
63 | 	return trq[i].time < trq[j].time
64 | }
65 | 
66 | func (trq timeRecordQueue) Swap(i, j int) {
67 | 	trq[i], trq[j] = trq[j], trq[i]
68 | 	trq[i].index = i
69 | 	trq[j].index = j
70 | }
71 | 
72 | // Push 往 pq 中放 entry
73 | func (trq *timeRecordQueue) Push(x interface{}) {
74 | 	temp := x.(*timeRecord)
75 | 	temp.index = len(*trq)
76 | 	*trq = append(*trq, temp)
77 | }
78 | 
79 | // Pop 从 pq 中取出最优先的 entry
80 | func (trq *timeRecordQueue) Pop() interface{} {
81 | 	temp := (*trq)[len(*trq)-1]
82 | 	temp.index = -1 // for safety
83 | 	*trq = (*trq)[0 : len(*trq)-1]
84 | 	return temp
85 | }
86 | 
87 | func (trq *timeRecordQueue) update(tr *timeRecord, time int) {
88 | 	tr.time = time
89 | 	heap.Fix(trq, tr.index)
90 | }
91 | 


--------------------------------------------------------------------------------
/Mutual-Exclusion/code/receivedTime_test.go:
--------------------------------------------------------------------------------
 1 | package mutualexclusion
 2 | 
 3 | import (
 4 | 	"container/heap"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/stretchr/testify/assert"
 8 | )
 9 | 
10 | func Test_receivedTime_update(t *testing.T) {
11 | 	ast := assert.New(t)
12 | 	all, me := 10, 0
13 | 	rt := newReceivedTime(all, me)
14 | 	// 把所有的接受值调整到较大的值
15 | 	for i := 1; i < all; i++ {
16 | 		rt.Update(i, all+1)
17 | 	}
18 | 	// 依次按照以最小值更新第 i 个时间值
19 | 	for i := all - 1; i > me; i-- {
20 | 		expected := i
21 | 		rt.Update(i, i)
22 | 		actual := rt.Min()
23 | 		ast.Equal(expected, actual)
24 | 	}
25 | }
26 | 
27 | func Test_receivedTime_updateItselfWillPanic(t *testing.T) {
28 | 	ast := assert.New(t)
29 | 	all, me := 10, 0
30 | 	rt := newReceivedTime(all, me)
31 | 	ast.Panics(func() { rt.Update(me, 1) })
32 | }
33 | 
34 | func Test_timeRecordQueue_Pop(t *testing.T) {
35 | 	ast := assert.New(t)
36 | 	trq := new(timeRecordQueue)
37 | 	expected := &timeRecord{
38 | 		time: 1,
39 | 	}
40 | 	heap.Push(trq, expected)
41 | 	actual := heap.Pop(trq).(*timeRecord)
42 | 	ast.Equal(expected.time, actual.time)
43 | }
44 | 


--------------------------------------------------------------------------------
/Mutual-Exclusion/code/requestQueue.go:
--------------------------------------------------------------------------------
  1 | package mutualexclusion
  2 | 
  3 | import (
  4 | 	"container/heap"
  5 | 	"strings"
  6 | 	"sync"
  7 | )
  8 | 
  9 | // RequestQueue 提供了操作 request queue 的接口
 10 | type RequestQueue interface {
 11 | 	// Min 返回最小的 Timestamp 值
 12 | 	Min() Less
 13 | 	// Push 把元素加入 RequestQueue 中
 14 | 	Push(Less)
 15 | 	// Remove 在 RequestQueue 中删除 Less
 16 | 	Remove(Less)
 17 | 	// String 输出 RequestQueue 的细节
 18 | 	String() string
 19 | }
 20 | 
 21 | type requestQueue struct {
 22 | 	rpq       *requestPriorityQueue
 23 | 	requestOf map[Less]*request
 24 | 	mutex     sync.Mutex
 25 | }
 26 | 
 27 | func newRequestQueue() RequestQueue {
 28 | 	return &requestQueue{
 29 | 		rpq:       new(requestPriorityQueue),
 30 | 		requestOf: make(map[Less]*request, 1024),
 31 | 	}
 32 | }
 33 | 
 34 | func (rq *requestQueue) Min() Less {
 35 | 	rq.mutex.Lock()
 36 | 	defer rq.mutex.Unlock()
 37 | 	if len(*rq.rpq) == 0 {
 38 | 		return nil
 39 | 	}
 40 | 	return (*rq.rpq)[0].ls
 41 | }
 42 | 
 43 | func (rq *requestQueue) Push(ls Less) {
 44 | 	rq.mutex.Lock()
 45 | 	r := &request{
 46 | 		ls: ls,
 47 | 	}
 48 | 
 49 | 	rq.requestOf[ls] = r
 50 | 	heap.Push(rq.rpq, r)
 51 | 	rq.mutex.Unlock()
 52 | }
 53 | 
 54 | func (rq *requestQueue) Remove(ls Less) {
 55 | 	rq.mutex.Lock()
 56 | 	rq.rpq.remove(rq.requestOf[ls])
 57 | 	delete(rq.requestOf, ls)
 58 | 	rq.mutex.Unlock()
 59 | }
 60 | 
 61 | func (rq *requestQueue) String() string {
 62 | 	return rq.rpq.String()
 63 | }
 64 | 
 65 | // Less 是 rpq 元素中的主要成分
 66 | type Less interface {
 67 | 	// Less 比较两个接口的值
 68 | 	Less(interface{}) bool
 69 | 	// String() 输出内容
 70 | 	String() string
 71 | }
 72 | 
 73 | // request 是 priorityQueue 中的元素
 74 | type request struct {
 75 | 	ls    Less
 76 | 	index int
 77 | }
 78 | 
 79 | // rpq implements heap.Interface and holds entries.
 80 | type requestPriorityQueue []*request
 81 | 
 82 | func (q *requestPriorityQueue) String() string {
 83 | 	var b strings.Builder
 84 | 	b.WriteString("{request queue:")
 85 | 	for i := range *q {
 86 | 		b.WriteString((*q)[i].ls.String())
 87 | 	}
 88 | 	b.WriteString("}")
 89 | 	return b.String()
 90 | }
 91 | 
 92 | func (q requestPriorityQueue) Len() int { return len(q) }
 93 | 
 94 | func (q requestPriorityQueue) Less(i, j int) bool {
 95 | 	return q[i].ls.Less(q[j].ls)
 96 | }
 97 | 
 98 | func (q requestPriorityQueue) Swap(i, j int) {
 99 | 	q[i], q[j] = q[j], q[i]
100 | 	q[i].index = i
101 | 	q[j].index = j
102 | }
103 | 
104 | // Push 往 pq 中放 entry
105 | func (q *requestPriorityQueue) Push(x interface{}) {
106 | 	temp := x.(*request)
107 | 	temp.index = len(*q)
108 | 	*q = append(*q, temp)
109 | }
110 | 
111 | // Pop 从 pq 中取出最优先的 entry
112 | func (q *requestPriorityQueue) Pop() interface{} {
113 | 	temp := (*q)[len(*q)-1]
114 | 	temp.index = -1 // for safety
115 | 	*q = (*q)[0 : len(*q)-1]
116 | 	return temp
117 | }
118 | 
119 | func (q *requestPriorityQueue) remove(r *request) {
120 | 	heap.Remove(q, r.index)
121 | }
122 | 


--------------------------------------------------------------------------------
/Mutual-Exclusion/code/requestQueue_test.go:
--------------------------------------------------------------------------------
 1 | package mutualexclusion
 2 | 
 3 | import (
 4 | 	"strings"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/stretchr/testify/assert"
 8 | )
 9 | 
10 | func makeIncreasingTimestamps(half int) []Timestamp {
11 | 	res := make([]Timestamp, 0, half*2)
12 | 	for i := 0; i < half; i++ {
13 | 		res = append(res,
14 | 			newTimestamp(i, i*2),
15 | 			newTimestamp(i, i*2+1),
16 | 		)
17 | 	}
18 | 	return res
19 | }
20 | 
21 | func Test_requestQueue(t *testing.T) {
22 | 	ast := assert.New(t)
23 | 	//
24 | 	half := 10
25 | 	size := half * 2
26 | 	tss := makeIncreasingTimestamps(half)
27 | 	rq := newRequestQueue()
28 | 	//
29 | 	for i := size - 1; i >= 0; i-- {
30 | 		ts := tss[i]
31 | 		rq.Push(ts) // 每次放入到都是新的最小值
32 | 		expected := ts
33 | 		actual := rq.Min()
34 | 		ast.Equal(expected, actual)
35 | 	}
36 | 	//
37 | 	for i := 0; i+1 < size; i++ {
38 | 		rq.Remove(tss[i])
39 | 		expected := tss[i+1] // 删除了最小值后，下个就是新的最小值
40 | 		actual := rq.Min()
41 | 		ast.Equal(expected, actual)
42 | 	}
43 | }
44 | 
45 | func Test_requestQueue_remove(t *testing.T) {
46 | 	ast := assert.New(t)
47 | 	//
48 | 	half := 10
49 | 	size := half * 2
50 | 	tss := makeIncreasingTimestamps(half)
51 | 	rq := newRequestQueue()
52 | 	//
53 | 	for i := 0; i < size; i++ {
54 | 		ts := tss[i]
55 | 		rq.Push(ts)
56 | 	}
57 | 	//
58 | 	expected := tss[0]
59 | 	for i, j := 1, size-1; i < j; i, j = i+1, j-1 {
60 | 		rq.Remove(tss[i])
61 | 		actual := rq.Min()
62 | 		ast.Equal(expected, actual)
63 | 		//
64 | 		rq.Remove(tss[j])
65 | 		actual = rq.Min()
66 | 		ast.Equal(expected, actual)
67 | 	}
68 | }
69 | 
70 | func Test_requestQueue_MinOfEmpty(t *testing.T) {
71 | 	ast := assert.New(t)
72 | 	rq := newRequestQueue()
73 | 	ast.Nil(rq.Min())
74 | }
75 | 
76 | func Test_requestQueue_String(t *testing.T) {
77 | 	ast := assert.New(t)
78 | 	size := 100
79 | 	// 创建 timestamps
80 | 	timestamps := make([]Timestamp, 0, size)
81 | 	for i := 1; i < size; i++ {
82 | 		timestamps = append(timestamps, newTimestamp(i, i))
83 | 	}
84 | 	// 创建 requestQueue，并添加 timestamp
85 | 	rq := newRequestQueue()
86 | 	for i := range timestamps {
87 | 		rq.Push(timestamps[i])
88 | 	}
89 | 	// 获取 rq 的字符输出
90 | 	rqs := rq.String()
91 | 	// 验证 rqs 中的内容
92 | 	for i := range timestamps {
93 | 		tss := timestamps[i].String()
94 | 		ast.True(strings.Contains(rqs, tss))
95 | 	}
96 | }
97 | 


--------------------------------------------------------------------------------
/Mutual-Exclusion/code/resource.go:
--------------------------------------------------------------------------------
  1 | package mutualexclusion
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"strings"
  6 | 	"sync"
  7 | 	"time"
  8 | 
  9 | 	"github.com/montanaflynn/stats"
 10 | )
 11 | 
 12 | // Resource 是 Process 占用资源的接口
 13 | type Resource interface {
 14 | 	// Occupy 表示占用资源
 15 | 	Occupy(Timestamp)
 16 | 	// Release 表示释放资源
 17 | 	Release(Timestamp)
 18 | }
 19 | 
 20 | type resource struct {
 21 | 	lastOccupiedBy Timestamp      // 记录上次占用资源的 timestamp
 22 | 	occupiedBy     Timestamp      // 记录当前占用资源的 timestamp, nil 表示资源未被占用
 23 | 	timestamps     []Timestamp    // 按顺序保存占用资源的 timestamp
 24 | 	times          []time.Time    // 记录每次占用资源的起止时间，用于分析算法的效率
 25 | 	wg             sync.WaitGroup // 完成全部占用前，阻塞主 goroutine
 26 | }
 27 | 
 28 | func newResource(times int) *resource {
 29 | 	r := &resource{
 30 | 		lastOccupiedBy: newTimestamp(-1, -1),
 31 | 	}
 32 | 	r.wg.Add(times)
 33 | 	return r
 34 | }
 35 | 
 36 | func (r *resource) wait() {
 37 | 	r.wg.Wait()
 38 | }
 39 | 
 40 | func (r *resource) Occupy(ts Timestamp) {
 41 | 	r.times = append(r.times, time.Now())
 42 | 
 43 | 	if r.occupiedBy != nil {
 44 | 		msg := fmt.Sprintf("资源正在被 %s 占据，%s 却想获取资源。", r.occupiedBy, ts)
 45 | 		panic(msg)
 46 | 	}
 47 | 
 48 | 	if !r.lastOccupiedBy.Less(ts) {
 49 | 		msg := fmt.Sprintf("资源上次被 %s 占据，这次 %s 却想占据资源。", r.lastOccupiedBy, ts)
 50 | 		panic(msg)
 51 | 	}
 52 | 
 53 | 	r.occupiedBy = ts
 54 | 	r.timestamps = append(r.timestamps, ts)
 55 | 	debugPrintf("~~~ @resource: %s occupied ~~~ ", ts)
 56 | }
 57 | 
 58 | func (r *resource) Release(ts Timestamp) {
 59 | 	if !r.occupiedBy.IsEqual(ts) {
 60 | 		msg := fmt.Sprintf("%s 想要释放正在被 P%s 占据的资源。", ts, r.occupiedBy)
 61 | 		panic(msg)
 62 | 	}
 63 | 
 64 | 	r.lastOccupiedBy, r.occupiedBy = ts, nil
 65 | 	r.times = append(r.times, time.Now())
 66 | 	debugPrintf("~~~ @resource: %s released ~~~ ", ts)
 67 | 
 68 | 	r.wg.Done() // 完成一次占用
 69 | }
 70 | 
 71 | func (r *resource) report() string {
 72 | 	var b strings.Builder
 73 | 	size := len(r.times)
 74 | 
 75 | 	// 统计资源被占用的时间
 76 | 	totalTime := r.times[size-1].Sub(r.times[0])
 77 | 	format := "resource 被占用了 %s， "
 78 | 	fmt.Fprintf(&b, format, totalTime)
 79 | 
 80 | 	// 计算占用率
 81 | 	busys := make([]float64, 0, size/2)
 82 | 	idles := make([]float64, 0, size/2)
 83 | 
 84 | 	var i int
 85 | 	for i = 0; i+2 < size; i += 2 {
 86 | 		busys = append(busys, float64(r.times[i+1].Sub(r.times[i]).Nanoseconds()))
 87 | 		idles = append(idles, float64(r.times[i+2].Sub(r.times[i+1]).Nanoseconds()))
 88 | 	}
 89 | 	busys = append(busys, float64(r.times[i+1].Sub(r.times[i]).Nanoseconds()))
 90 | 
 91 | 	busy, _ := stats.Sum(busys)
 92 | 	idle, _ := stats.Sum(idles)
 93 | 	total := busy + idle
 94 | 	rate := busy * 100 / total
 95 | 
 96 | 	format = "占用比率为 %4.2f%%。\n"
 97 | 	fmt.Fprintf(&b, format, rate)
 98 | 
 99 | 	// 计算资源占用时间的均值和方差
100 | 	format = "资源占用: %s\n"
101 | 	fmt.Fprintf(&b, format, statisticAnalyze(busys))
102 | 
103 | 	// 计算资源空闲间隙的均值和方差
104 | 	format = "资源空闲: %s\n"
105 | 	fmt.Fprintf(&b, format, statisticAnalyze(idles))
106 | 
107 | 	return b.String()
108 | }
109 | 
110 | func statisticAnalyze(floats []float64) string {
111 | 	format := "min %8.2fus, max %8.2fus, mean %8.2fus, sd %8.2f"
112 | 	min, _ := stats.Min(floats)
113 | 	max, _ := stats.Max(floats)
114 | 	mean, _ := stats.Mean(floats)
115 | 	sd, _ := stats.StandardDeviation(floats)
116 | 	return fmt.Sprintf(format, min/1000, max/1000, mean/1000, sd/1000)
117 | }
118 | 


--------------------------------------------------------------------------------
/Mutual-Exclusion/code/resource_test.go:
--------------------------------------------------------------------------------
  1 | package mutualexclusion
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"strings"
  6 | 	"testing"
  7 | 	"time"
  8 | 
  9 | 	"github.com/stretchr/testify/assert"
 10 | )
 11 | 
 12 | func Test_resource_occupyAndRelease(t *testing.T) {
 13 | 	ast := assert.New(t)
 14 | 	//
 15 | 	p := 0
 16 | 	ts := newTimestamp(0, p)
 17 | 	r := newResource(1)
 18 | 	// 占用
 19 | 	r.Occupy(ts)
 20 | 	ast.Equal(ts, r.occupiedBy)
 21 | 	// 释放
 22 | 	r.Release(ts)
 23 | 	r.wait()
 24 | 	ast.Equal(ts, r.lastOccupiedBy)
 25 | 	ast.Equal(ts, r.timestamps[0])
 26 | }
 27 | 
 28 | func Test_resource_occupy_occupyInvalidResource(t *testing.T) {
 29 | 	ast := assert.New(t)
 30 | 	//
 31 | 	p0 := 0
 32 | 	p1 := 1
 33 | 	ts0 := newTimestamp(0, p0)
 34 | 	ts1 := newTimestamp(1, p1)
 35 | 	r := newResource(1)
 36 | 	r.Occupy(ts0)
 37 | 	//
 38 | 	expected := fmt.Sprintf("资源正在被 %s 占据，%s 却想获取资源。", ts0, ts1)
 39 | 	ast.PanicsWithValue(expected, func() { r.Occupy(ts1) })
 40 | }
 41 | 
 42 | func Test_resource_occupy_panicOfEarlyTimestampWantToOccupy(t *testing.T) {
 43 | 	ast := assert.New(t)
 44 | 	//
 45 | 	ts0 := newTimestamp(0, 1)
 46 | 	ts1 := newTimestamp(1, 1)
 47 | 	r := newResource(2)
 48 | 	r.Occupy(ts1)
 49 | 	r.Release(ts1)
 50 | 	//
 51 | 	expected := fmt.Sprintf("资源上次被 %s 占据，这次 %s 却想占据资源。", ts1, ts0)
 52 | 	ast.PanicsWithValue(expected, func() { r.Occupy(ts0) })
 53 | }
 54 | 
 55 | func Test_resource_report(t *testing.T) {
 56 | 	ast := assert.New(t)
 57 | 	//
 58 | 	p := 0
 59 | 	ts0 := newTimestamp(0, p)
 60 | 	ts1 := newTimestamp(1, p)
 61 | 	r := newResource(3)
 62 | 	r.Occupy(ts0)
 63 | 	r.Release(ts0)
 64 | 	r.Occupy(ts1)
 65 | 	r.Release(ts1)
 66 | 	now := time.Now()
 67 | 	r.times[0] = now
 68 | 	r.times[1] = now.Add(100 * time.Second)
 69 | 	r.times[2] = now.Add(200 * time.Second)
 70 | 	r.times[3] = now.Add(400 * time.Second)
 71 | 	//
 72 | 	report := r.report()
 73 | 	ast.True(strings.Contains(report, "75.00%"), report)
 74 | 	//
 75 | 	ast.Equal(4, len(r.times), "资源被占用了 2 次，但是 r.times 的长度不等于 4")
 76 | }
 77 | 
 78 | func Test_resource_Occupy_lenOfTimes(t *testing.T) {
 79 | 	ast := assert.New(t)
 80 | 	//
 81 | 	times := 100
 82 | 	r := newResource(times)
 83 | 	go func(max int) {
 84 | 		time, p := 0, 0
 85 | 		for i := 0; i < max; i++ {
 86 | 			if i%2 == 0 {
 87 | 				time++
 88 | 			} else {
 89 | 				p++
 90 | 			}
 91 | 			ts := newTimestamp(time, p)
 92 | 			r.Occupy(ts)
 93 | 			r.Release(ts)
 94 | 		}
 95 | 	}(times)
 96 | 	r.wait()
 97 | 	expected := times * 2
 98 | 	actual := len(r.times)
 99 | 	ast.Equal(expected, actual)
100 | }
101 | 
102 | func Test_resource_Release_panicOfReleaseByOther(t *testing.T) {
103 | 	ast := assert.New(t)
104 | 	//
105 | 	r := newResource(1)
106 | 	ts0 := newTimestamp(0, 1)
107 | 	ts1 := newTimestamp(1, 1)
108 | 	r.Occupy(ts0)
109 | 	expected := fmt.Sprintf("%s 想要释放正在被 P%s 占据的资源。", ts1, ts0)
110 | 	ast.PanicsWithValue(expected, func() { r.Release(ts1) })
111 | }
112 | 


--------------------------------------------------------------------------------
/Mutual-Exclusion/code/timestamp.go:
--------------------------------------------------------------------------------
 1 | package mutualexclusion
 2 | 
 3 | import "fmt"
 4 | 
 5 | // Timestamp 是用于全局排序的接口
 6 | type Timestamp interface {
 7 | 	// Less 比较两个 Timestamp 的大小
 8 | 	Less(interface{}) bool
 9 | 	// Equal 判断两个 Timestamp 是否相等
10 | 	IsEqual(interface{}) bool
11 | 	// IsBefore 在比较同一个 clock 的时间，所以，不需要 process
12 | 	IsBefore(int) bool
13 | 	// String 输出 Timestamp 的内容
14 | 	String() string
15 | }
16 | 
17 | type timestamp struct {
18 | 	time, process int
19 | }
20 | 
21 | func newTimestamp(time, process int) Timestamp {
22 | 	return &timestamp{
23 | 		time:    time,
24 | 		process: process,
25 | 	}
26 | }
27 | 
28 | func (ts *timestamp) String() string {
29 | 	return fmt.Sprintf("<T%d:P%d>", ts.time, ts.process)
30 | }
31 | 
32 | func (ts *timestamp) Less(tsi interface{}) bool {
33 | 	ts2 := tsi.(*timestamp)
34 | 	// 这就是将局部顺序推广到全局顺序的关键
35 | 	if ts.time == ts2.time {
36 | 		return ts.process < ts2.process
37 | 	}
38 | 	return ts.time < ts2.time
39 | }
40 | 
41 | func (ts *timestamp) IsEqual(tsi interface{}) bool {
42 | 	if tsi == nil {
43 | 		return false
44 | 	}
45 | 	ts2 := tsi.(*timestamp)
46 | 	return ts.time == ts2.time && ts.process == ts2.process
47 | }
48 | 
49 | func (ts *timestamp) IsBefore(t int) bool {
50 | 	return ts.time < t
51 | }
52 | 


--------------------------------------------------------------------------------
/Mutual-Exclusion/code/timestamp_test.go:
--------------------------------------------------------------------------------
 1 | package mutualexclusion
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/stretchr/testify/assert"
 7 | )
 8 | 
 9 | func Test_timestamp_String(t *testing.T) {
10 | 	ast := assert.New(t)
11 | 	ts := newTimestamp(0, 1)
12 | 	actual := ts.String()
13 | 	expected := "<T0:P1>"
14 | 	ast.Equal(expected, actual)
15 | }
16 | 
17 | func Test_timestamp_Less(t *testing.T) {
18 | 	ast := assert.New(t)
19 | 
20 | 	// a < b < c
21 | 	a := newTimestamp(1, 1)
22 | 	b := newTimestamp(1, 2)
23 | 	c := newTimestamp(2, 3)
24 | 
25 | 	ast.True(a.Less(b))
26 | 	ast.True(a.Less(c))
27 | 	ast.True(b.Less(c))
28 | 
29 | 	ast.False(b.Less(a))
30 | 	ast.False(c.Less(a))
31 | 	ast.False(c.Less(b))
32 | }
33 | 
34 | func Test_timestamp_IsEqual_nil_false(t *testing.T) {
35 | 	ast := assert.New(t)
36 | 	ts := newTimestamp(0, 0)
37 | 	ast.False(ts.IsEqual(nil))
38 | }
39 | 
40 | func Test_timestamp_IsEqual_same_true(t *testing.T) {
41 | 	ast := assert.New(t)
42 | 	time, process := 0, 0
43 | 	ts := newTimestamp(time, process)
44 | 	tsi := newTimestamp(time, process)
45 | 	ast.True(ts.IsEqual(tsi))
46 | }
47 | 
48 | func Test_timestamp_IsBefore(t *testing.T) {
49 | 	ast := assert.New(t)
50 | 	time, process := 1, 0
51 | 	ts := newTimestamp(time, process)
52 | 	ast.False(ts.IsBefore(0))
53 | 	ast.True(ts.IsBefore(2))
54 | }
55 | 


--------------------------------------------------------------------------------
/Mutual-Exclusion/code/util.go:
--------------------------------------------------------------------------------
 1 | package mutualexclusion
 2 | 
 3 | import (
 4 | 	"log"
 5 | 	"math/rand"
 6 | 	"sync"
 7 | 	"time"
 8 | )
 9 | 
10 | func init() {
11 | 	log.SetFlags(log.LstdFlags | log.Lmicroseconds)
12 | 	debugPrintf("程序开始运行")
13 | 	rand.Seed(time.Now().UnixNano())
14 | }
15 | 
16 | var needDebug = false
17 | 
18 | // 读取和修改 needebug 前需要上锁
19 | var rwm sync.RWMutex
20 | 
21 | // debugPrintf 根据设置打印输出
22 | func debugPrintf(format string, a ...interface{}) {
23 | 	rwm.RLock()
24 | 	if needDebug {
25 | 		log.Printf(format, a...)
26 | 	}
27 | 	rwm.RUnlock()
28 | }
29 | 
30 | func max(a, b int) int {
31 | 	if a > b {
32 | 		return a
33 | 	}
34 | 	return b
35 | }
36 | 


--------------------------------------------------------------------------------
/Mutual-Exclusion/code/util_test.go:
--------------------------------------------------------------------------------
  1 | package mutualexclusion
  2 | 
  3 | import (
  4 | 	"log"
  5 | 	"os"
  6 | 	"strings"
  7 | 	"testing"
  8 | 
  9 | 	"github.com/stretchr/testify/assert"
 10 | )
 11 | 
 12 | func Test_debugPrintf_toPrint(t *testing.T) {
 13 | 	rwm.Lock()
 14 | 	temp := needDebug
 15 | 	needDebug = true
 16 | 	rwm.Unlock()
 17 | 	//
 18 | 	var sb strings.Builder
 19 | 	log.SetOutput(&sb)
 20 | 	defer log.SetOutput(os.Stderr)
 21 | 	//
 22 | 	ast := assert.New(t)
 23 | 	//
 24 | 	words := "众鸟高飞尽，孤云独去闲。"
 25 | 	//
 26 | 	debugPrintf("%s", words)
 27 | 	//
 28 | 	ast.True(strings.Contains(sb.String(), words))
 29 | 	// 还原 needDebug
 30 | 	rwm.Lock()
 31 | 	needDebug = temp
 32 | 	rwm.Unlock()
 33 | }
 34 | 
 35 | func Test_debugPrintf_notToPrint(t *testing.T) {
 36 | 	rwm.Lock()
 37 | 	temp := needDebug
 38 | 	needDebug = false
 39 | 	rwm.Unlock()
 40 | 	//
 41 | 	var b strings.Builder
 42 | 	log.SetOutput(&b)
 43 | 	defer log.SetOutput(os.Stderr)
 44 | 	//
 45 | 	ast := assert.New(t)
 46 | 	//
 47 | 	words := "众鸟高飞尽，孤云独去闲。"
 48 | 	//
 49 | 	debugPrintf("%s", words)
 50 | 	//
 51 | 	ast.False(strings.Contains(b.String(), words))
 52 | 	// 还原 needDebug
 53 | 	rwm.Lock()
 54 | 	needDebug = temp
 55 | 	rwm.Unlock()
 56 | }
 57 | 
 58 | func Test_max(t *testing.T) {
 59 | 	type args struct {
 60 | 		a int
 61 | 		b int
 62 | 	}
 63 | 	tests := []struct {
 64 | 		name string
 65 | 		args args
 66 | 		want int
 67 | 	}{
 68 | 
 69 | 		{
 70 | 			"a < b",
 71 | 			args{
 72 | 				1,
 73 | 				2,
 74 | 			},
 75 | 			2,
 76 | 		},
 77 | 
 78 | 		{
 79 | 			"a > b",
 80 | 			args{
 81 | 				2,
 82 | 				1,
 83 | 			},
 84 | 			2,
 85 | 		},
 86 | 
 87 | 		{
 88 | 			"a = b",
 89 | 			args{
 90 | 				2,
 91 | 				2,
 92 | 			},
 93 | 			2,
 94 | 		},
 95 | 	}
 96 | 	for _, tt := range tests {
 97 | 		t.Run(tt.name, func(t *testing.T) {
 98 | 			if got := max(tt.args.a, tt.args.b); got != tt.want {
 99 | 				t.Errorf("max() = %v, want %v", got, tt.want)
100 | 			}
101 | 		})
102 | 	}
103 | }
104 | 


--------------------------------------------------------------------------------
/Mutual-Exclusion/mutual_exclusion_request_period.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aQuaYi/Distributed-Algorithms/8c27e1220fb1c467da999f30244d40f520365522/Mutual-Exclusion/mutual_exclusion_request_period.pptx


--------------------------------------------------------------------------------
/Mutual-Exclusion/qna.md:
--------------------------------------------------------------------------------
 1 | # 思考问题
 2 | 
 3 | 1.为什么会出现多种全局排序？请举例说明。
 4 | 
 5 | ```text
 6 | 由于不同 process 的 event 可能会有相同的 timestamp，例如 E1 = `<3:7>`, E2 = `<3:5>`。
 7 | 如果 P7 < P5 的话， E1 => E2。
 8 | 如果 P5 < P7 的话， E2 => E1。
 9 | ```
10 | 
11 | 2.真实时间上先 request 的 process 会不会后得到 resource？如果会的话，能不能说明 mutual exclusion 算法失败了？请说明理由。
12 | 
13 | ```text
14 | 会，但不是全局排序的失败。
15 | 
16 | 我在编程的时候，就遇到了这个问题。开始以为是程序的 bug 。后来重新阅读的论文，才发现是自己的理解的不够。
17 | 
18 | 首先区分一下时间(time)和时刻(timestamp)，时间是一个物理量，时刻是这个物理量的值，2018年05月15日15:20:55 是现在的时刻。就好像温度是一个物理量，33℃是温度的一个值。但是如果33℃的物体比44℃的物体摸起来要热，只能说明这个物体不是使用同一个温度计测量的温度，并且两个温度计的基准差别还蛮大。
19 | 第二，时刻(timestamp)的作用是给 event 一个标记，多个 event 可以利用这个时间标记进行排序。例如，同一天中，E1(15:31:31) 排在 E2(15:31:51) 前面。但这包含了一个隐含前提，这两个 event 的时刻，是由同一个可靠的 clock 标记的。
20 | 第三，mutual exclusion 是一个分布式算法。每个 process 都有自己单独的 clock。不同 process 中的 event 的时间标记都是不同的 clock 标记的。考虑到程序运行的速度，这些 clock 与真实时间之间的偏差，绝对不能忽略不计。
21 | 第四，为了 process 间的局部排序，引入了 message 机制，并制定了 lamport timestamp 规则。为了全局排序，再引入 process 排序。
22 | 
23 | 再解释一下题意，存在一个观察者，拿着同一个 clock 去分别标记每个 process 的 request，结果发现某个先标记的 request 却后得到了 resource。
24 | 这不能说明 mutual exclusion 算法失败的原因是，这个算法就是为了解决分布式系统中，不可能存在同一个 clock 去分别标记每个 process 的 request 的问题而提出的。
25 | 
26 | 如果能像 Google 在 [Spanner](spanner-osdi2012.pdf) 里面，引入 True Time 一样，使得各个 process 的 clock 之间的偏差，相对于程序的速度可以忽略不计。就可以保证真实时间上先 request 的 process 先占用 resource。那样的话，也不需要 mutual exclusion 算法了。
27 | ```
28 | 


--------------------------------------------------------------------------------
/Mutual-Exclusion/request_period.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aQuaYi/Distributed-Algorithms/8c27e1220fb1c467da999f30244d40f520365522/Mutual-Exclusion/request_period.png


--------------------------------------------------------------------------------
/Mutual-Exclusion/spanner-osdi2012.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aQuaYi/Distributed-Algorithms/8c27e1220fb1c467da999f30244d40f520365522/Mutual-Exclusion/spanner-osdi2012.pdf


--------------------------------------------------------------------------------
/Mutual-Exclusion/time-clocks.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aQuaYi/Distributed-Algorithms/8c27e1220fb1c467da999f30244d40f520365522/Mutual-Exclusion/time-clocks.pdf


--------------------------------------------------------------------------------
/PoW.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aQuaYi/Distributed-Algorithms/8c27e1220fb1c467da999f30244d40f520365522/PoW.zip


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 分布式系统原型
 2 | 
 3 | [![License](http://img.shields.io/badge/license-MIT-red.svg?style=flat)](https://github.com/aQuaYi/Distributed-Algorithms/blob/master/LICENSE)
 4 | [![Build Status](https://www.travis-ci.org/aQuaYi/Distributed-Algorithms.svg?branch=master)](https://www.travis-ci.org/aQuaYi/Distributed-Algorithms)
 5 | [![codecov](https://codecov.io/gh/aQuaYi/Distributed-Algorithms/branch/master/graph/badge.svg)](https://codecov.io/gh/aQuaYi/Distributed-Algorithms)
 6 | [![Go Report Card](https://goreportcard.com/badge/github.com/aQuaYi/Distributed-Algorithms)](https://goreportcard.com/report/github.com/aQuaYi/Distributed-Algorithms)
 7 | 
 8 | ## [Mutual Exclusion](Mutual-Exclusion)
 9 | 
10 | Lamport 在论文《Time, Clocks and the Ordering of Events in a Distributed System》中提到的 Mutual Exclusion 算法。
11 | 
12 | ## [Raft](Raft)
13 | 
14 | Diego Ongaro 和 John Ousterhout 认为 Paxos 难以理解， 于是在 [《In Search of an Understandable Consensus Algorithm (Extended Version)》](Raft/raft-extended.pdf) 中以可理解为目标，提出了一种新的共识算法——Raft。
15 | 
16 | ## [PoW](PoW)
17 | 
18 | 为了实现去中心化的数字货币--[Bitcoin](https://github.com/bitcoin/bitcoin)
19 | ，[中本聪](https://zh.wikipedia.org/zh-hans/%E4%B8%AD%E6%9C%AC%E8%81%AA)利用 PoW (Proof of Work) 算法来解决系统中的拜占庭将军问题。
20 | 
21 | ## PoS
22 | 
23 | ## DPoS
24 | 
25 | ## PBFT
26 | 
27 | TODO: 总结分布式系统的特点


--------------------------------------------------------------------------------
/Raft/6.824-2018.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aQuaYi/Distributed-Algorithms/8c27e1220fb1c467da999f30244d40f520365522/Raft/6.824-2018.zip


--------------------------------------------------------------------------------
/Raft/6.824Lab2_Raft.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aQuaYi/Distributed-Algorithms/8c27e1220fb1c467da999f30244d40f520365522/Raft/6.824Lab2_Raft.html


--------------------------------------------------------------------------------
/Raft/6.824Lab2_Raft_files/style.css:
--------------------------------------------------------------------------------
  1 | body { max-width: 45em; }
  2 | body pre { overflow-x: auto; }
  3 | 
  4 | body {
  5 | 	color: black;
  6 | 	background-color: white;
  7 | 	font-family: sans-serif;
  8 | }
  9 | 
 10 | .title {
 11 | 	text-align: center
 12 | }
 13 | .subtitle {
 14 | 	text-align: center;
 15 | 	font-style: italic;
 16 | }
 17 | .author {
 18 | 	text-align: center;
 19 | }
 20 | 
 21 | ul.hints, .note, .challenge, .todo, pre {
 22 | 	margin: 1em;
 23 | 	border: 1px dashed;
 24 | 	padding: 1em;
 25 | }
 26 | 
 27 | ul.hints { color: #50A02D; }
 28 | 	ul.hints li { margin-left: 1em; }
 29 | 	ul.hints li::before {
 30 | 		content: "Hint: ";
 31 | 		font-weight: bold;
 32 | 	}
 33 | 
 34 | .important {
 35 |   margin: 1em;
 36 |   padding: 1em;
 37 |   background-color: #990000;
 38 |   color: #fff;
 39 | }
 40 | 	.important::before {
 41 | 		content: "Important: ";
 42 |     background-color: #550000;
 43 |     width: 100%;
 44 |     display: block;
 45 |     margin: -1em -1em 1em -1em;
 46 |     padding: 1em;
 47 | 		font-weight: bold;
 48 | 	}
 49 | .note { color: #4682B4; }
 50 | 	.note::before {
 51 | 		content: "Note: ";
 52 | 		font-weight: bold;
 53 | 	}
 54 | 
 55 | .challenge, .todo { border-style: solid; }
 56 | .challenge::before, .todo::before {
 57 | 	float: right;
 58 | 	font-weight: bold;
 59 | 	color: white;
 60 | 	margin-right: -1em;
 61 | 	margin-top: -1em;
 62 | 	margin-bottom: .5em;
 63 | 	margin-left: 1em;
 64 | 	padding: .5em 1em;
 65 | }
 66 | .todo { color: #B22222; }
 67 | 	.todo::before {
 68 | 		content: "TASK";
 69 | 		background: #B22222;
 70 | 	}
 71 | .challenge { color: #8B4513; }
 72 | 	.challenge::before {
 73 | 		content: "CHALLENGE";
 74 | 		background: #8B4513;
 75 | 	}
 76 | 
 77 | 
 78 | tt, code {
 79 | 	font-family: monospace;
 80 | 	border-radius: 3px;
 81 | 	font-size: 110%;
 82 | 	color: #657b83;
 83 | 	background-color: #fdf6e3;
 84 | 	padding: 0.2em;
 85 | 	word-wrap: break-word;
 86 | }
 87 | 
 88 | pre {
 89 | 	font-size: 100%;
 90 | 	color: #839496;
 91 | 	background: #002b36;
 92 | }
 93 | 
 94 | .classic {
 95 | color: black;
 96 | }
 97 | 
 98 | 
 99 | div.required .header {
100 |     font-weight: bold;
101 | }
102 | 
103 | div.challenge .header {
104 |     font-style: italic;
105 | }
106 | 
107 | div.required {
108 |     background-color: #eeeeff;
109 | }
110 | 
111 | 


--------------------------------------------------------------------------------
/Raft/README.md:
--------------------------------------------------------------------------------
 1 | # Raft: 一个比 Paxos 好懂的共识算法
 2 | 
 3 | Diego Ongaro 和 John Ousterhout 认为 Paxos 难以理解， 于是在 [《In Search of an Understandable Consensus Algorithm (Extended Version)》](raft-extended.pdf) 中以可理解为目标，提出了一种新的共识算法——Raft。
 4 | 
 5 | 本 demo 的测试代码及其辅助库来源于 [MIT 6.824 课程](https://pdos.csail.mit.edu/6.824/) 的课程实验。原始代码[点击这里下载](6.824-2018.zip)，代码说明在[此页面](6.824Lab2_Raft.html)。
 6 | 
 7 | 相关资料：
 8 | 
 9 | - [《In Search of an Understandable Consensus Algorithm (Extended Version)》](raft-extended.pdf)
10 | - [Raft 算法演示](http://thesecretlivesofdata.com/raft/)
11 | 


--------------------------------------------------------------------------------
/Raft/code/config.go:
--------------------------------------------------------------------------------
  1 | package raft
  2 | 
  3 | //
  4 | // support for Raft tester.
  5 | //
  6 | // we will use the original config.go to test your code for grading.
  7 | // so, while you can modify this code to help you debug, please
  8 | // test with the original before submitting.
  9 | //
 10 | 
 11 | import (
 12 | 	"log"
 13 | 	"math/rand"
 14 | 	"runtime"
 15 | 	"sync"
 16 | 	"testing"
 17 | 
 18 | 	crand "crypto/rand"
 19 | 	"encoding/base64"
 20 | 	"fmt"
 21 | 	"math/big"
 22 | 	"time"
 23 | 
 24 | 	"github.com/aQuaYi/Distributed-Algorithms/Raft/code/labrpc"
 25 | )
 26 | 
 27 | func randstring(n int) string {
 28 | 	b := make([]byte, 2*n)
 29 | 	crand.Read(b)
 30 | 	s := base64.URLEncoding.EncodeToString(b)
 31 | 	return s[0:n]
 32 | }
 33 | 
 34 | func makeSeed() int64 {
 35 | 	max := big.NewInt(int64(1) << 62)
 36 | 	bigx, _ := crand.Int(crand.Reader, max)
 37 | 	x := bigx.Int64()
 38 | 	return x
 39 | }
 40 | 
 41 | type config struct {
 42 | 	mu        sync.Mutex
 43 | 	t         *testing.T
 44 | 	net       *labrpc.Network
 45 | 	n         int
 46 | 	rafts     []*Raft
 47 | 	applyErr  []string // from apply channel readers
 48 | 	connected []bool   // whether each server is on the net
 49 | 	saved     []*Persister
 50 | 	endnames  [][]string    // the port file names each sends to
 51 | 	logs      []map[int]int // copy of each server's committed entries
 52 | 	start     time.Time     // time at which make_config() was called
 53 | 	// begin()/end() statistics
 54 | 	beginTime time.Time // time at which test_test.go called cfg.begin()
 55 | 	rpcs0     int       // rpcTotal() at start of test
 56 | 	cmds0     int       // number of agreements
 57 | 	maxIndex  int
 58 | 	maxIndex0 int
 59 | }
 60 | 
 61 | var ncpuOnce sync.Once
 62 | 
 63 | func makeConfig(t *testing.T, n int, unreliable bool) *config {
 64 | 	ncpuOnce.Do(func() {
 65 | 		if runtime.NumCPU() < 2 {
 66 | 			fmt.Printf("warning: only one CPU, which may conceal locking bugs\n")
 67 | 		}
 68 | 		rand.Seed(makeSeed())
 69 | 	})
 70 | 	runtime.GOMAXPROCS(4)
 71 | 	cfg := &config{}
 72 | 	cfg.t = t
 73 | 	cfg.net = labrpc.MakeNetwork()
 74 | 	cfg.n = n
 75 | 	cfg.applyErr = make([]string, cfg.n)
 76 | 	cfg.rafts = make([]*Raft, cfg.n)
 77 | 	cfg.connected = make([]bool, cfg.n)
 78 | 	cfg.saved = make([]*Persister, cfg.n)
 79 | 	cfg.endnames = make([][]string, cfg.n)
 80 | 	cfg.logs = make([]map[int]int, cfg.n)
 81 | 	cfg.start = time.Now()
 82 | 
 83 | 	cfg.setunreliable(unreliable)
 84 | 
 85 | 	cfg.net.LongDelays(true)
 86 | 
 87 | 	// create a full set of Rafts.
 88 | 	for i := 0; i < cfg.n; i++ {
 89 | 		cfg.logs[i] = map[int]int{}
 90 | 		cfg.start1(i)
 91 | 	}
 92 | 
 93 | 	// connect everyone
 94 | 	for i := 0; i < cfg.n; i++ {
 95 | 		cfg.connect(i)
 96 | 	}
 97 | 
 98 | 	return cfg
 99 | }
100 | 
101 | // shut down a Raft server but save its persistent state.
102 | func (cfg *config) crash1(i int) {
103 | 	cfg.disconnect(i)
104 | 	cfg.net.DeleteServer(i) // disable client connections to the server.
105 | 
106 | 	cfg.mu.Lock()
107 | 	defer cfg.mu.Unlock()
108 | 
109 | 	// a fresh persister, in case old instance
110 | 	// continues to update the Persister.
111 | 	// but copy old persister's content so that we always
112 | 	// pass Make() the last persisted state.
113 | 	if cfg.saved[i] != nil {
114 | 		cfg.saved[i] = cfg.saved[i].Copy()
115 | 	}
116 | 
117 | 	rf := cfg.rafts[i]
118 | 	if rf != nil {
119 | 		cfg.mu.Unlock()
120 | 		rf.Kill()
121 | 		cfg.mu.Lock()
122 | 		cfg.rafts[i] = nil
123 | 	}
124 | 
125 | 	if cfg.saved[i] != nil {
126 | 		raftlog := cfg.saved[i].ReadRaftState()
127 | 		cfg.saved[i] = &Persister{}
128 | 		cfg.saved[i].SaveRaftState(raftlog)
129 | 	}
130 | }
131 | 
132 | //
133 | // start or re-start a Raft.
134 | // if one already exists, "kill" it first.
135 | // allocate new outgoing port file names, and a new
136 | // state persister, to isolate previous instance of
137 | // this server. since we cannot really kill it.
138 | //
139 | func (cfg *config) start1(i int) {
140 | 	cfg.crash1(i)
141 | 
142 | 	// a fresh set of outgoing ClientEnd names.
143 | 	// so that old crashed instance's ClientEnds can't send.
144 | 	cfg.endnames[i] = make([]string, cfg.n)
145 | 	for j := 0; j < cfg.n; j++ {
146 | 		cfg.endnames[i][j] = randstring(20)
147 | 	}
148 | 
149 | 	// a fresh set of ClientEnds.
150 | 	ends := make([]*labrpc.ClientEnd, cfg.n)
151 | 	for j := 0; j < cfg.n; j++ {
152 | 		ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j])
153 | 		cfg.net.Connect(cfg.endnames[i][j], j)
154 | 	}
155 | 
156 | 	cfg.mu.Lock()
157 | 
158 | 	// a fresh persister, so old instance doesn't overwrite
159 | 	// new instance's persisted state.
160 | 	// but copy old persister's content so that we always
161 | 	// pass Make() the last persisted state.
162 | 	if cfg.saved[i] != nil {
163 | 		cfg.saved[i] = cfg.saved[i].Copy()
164 | 	} else {
165 | 		cfg.saved[i] = MakePersister()
166 | 	}
167 | 
168 | 	cfg.mu.Unlock()
169 | 
170 | 	// listen to messages from Raft indicating newly committed messages.
171 | 	applyCh := make(chan ApplyMsg)
172 | 	go func() {
173 | 		for m := range applyCh {
174 | 			errMsg := ""
175 | 			if m.CommandValid == false {
176 | 				// ignore other types of ApplyMsg
177 | 			} else if v, ok := (m.Command).(int); ok {
178 | 				cfg.mu.Lock()
179 | 				for j := 0; j < len(cfg.logs); j++ {
180 | 					if old, oldOK := cfg.logs[j][m.CommandIndex]; oldOK && old != v {
181 | 						// some server has already committed a different value for this entry!
182 | 						errMsg = fmt.Sprintf("commit index=%v server=%v %v != server=%v %v",
183 | 							m.CommandIndex, i, m.Command, j, old)
184 | 					}
185 | 				}
186 | 				_, prevOK := cfg.logs[i][m.CommandIndex-1]
187 | 				cfg.logs[i][m.CommandIndex] = v
188 | 				if m.CommandIndex > cfg.maxIndex {
189 | 					cfg.maxIndex = m.CommandIndex
190 | 				}
191 | 				cfg.mu.Unlock()
192 | 
193 | 				if m.CommandIndex > 1 && prevOK == false {
194 | 					errMsg = fmt.Sprintf("server %v apply out of order %v", i, m.CommandIndex)
195 | 				}
196 | 			} else {
197 | 				errMsg = fmt.Sprintf("committed command %v is not an int", m.Command)
198 | 			}
199 | 
200 | 			if errMsg != "" {
201 | 				log.Fatalf("apply error: %v\n", errMsg)
202 | 				cfg.applyErr[i] = errMsg
203 | 				// keep reading after error so that Raft doesn't block
204 | 				// holding locks...
205 | 			}
206 | 		}
207 | 	}()
208 | 
209 | 	rf := Make(ends, i, cfg.saved[i], applyCh)
210 | 
211 | 	cfg.mu.Lock()
212 | 	cfg.rafts[i] = rf
213 | 	cfg.mu.Unlock()
214 | 
215 | 	svc := labrpc.MakeService(rf)
216 | 	srv := labrpc.MakeServer()
217 | 	srv.AddService(svc)
218 | 	cfg.net.AddServer(i, srv)
219 | }
220 | 
221 | func (cfg *config) checkTimeout() {
222 | 	// enforce a two minute real-time limit on each test
223 | 	if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second {
224 | 		cfg.t.Fatal("test took longer than 120 seconds")
225 | 	}
226 | }
227 | 
228 | func (cfg *config) cleanup() {
229 | 	for i := 0; i < len(cfg.rafts); i++ {
230 | 		if cfg.rafts[i] != nil {
231 | 			cfg.rafts[i].Kill()
232 | 		}
233 | 	}
234 | 	cfg.net.Cleanup()
235 | 	cfg.checkTimeout()
236 | }
237 | 
238 | // attach server i to the net.
239 | func (cfg *config) connect(i int) {
240 | 	// fmt.Printf("connect(%d)\n", i)
241 | 
242 | 	cfg.connected[i] = true
243 | 
244 | 	// outgoing ClientEnds
245 | 	for j := 0; j < cfg.n; j++ {
246 | 		if cfg.connected[j] {
247 | 			endname := cfg.endnames[i][j]
248 | 			cfg.net.Enable(endname, true)
249 | 		}
250 | 	}
251 | 
252 | 	// incoming ClientEnds
253 | 	for j := 0; j < cfg.n; j++ {
254 | 		if cfg.connected[j] {
255 | 			endname := cfg.endnames[j][i]
256 | 			cfg.net.Enable(endname, true)
257 | 		}
258 | 	}
259 | }
260 | 
261 | // detach server i from the net.
262 | func (cfg *config) disconnect(i int) {
263 | 	// fmt.Printf("disconnect(%d)\n", i)
264 | 
265 | 	cfg.connected[i] = false
266 | 
267 | 	// outgoing ClientEnds
268 | 	for j := 0; j < cfg.n; j++ {
269 | 		if cfg.endnames[i] != nil {
270 | 			endname := cfg.endnames[i][j]
271 | 			cfg.net.Enable(endname, false)
272 | 		}
273 | 	}
274 | 
275 | 	// incoming ClientEnds
276 | 	for j := 0; j < cfg.n; j++ {
277 | 		if cfg.endnames[j] != nil {
278 | 			endname := cfg.endnames[j][i]
279 | 			cfg.net.Enable(endname, false)
280 | 		}
281 | 	}
282 | }
283 | 
284 | func (cfg *config) rpcCount(server int) int {
285 | 	return cfg.net.GetCount(server)
286 | }
287 | 
288 | func (cfg *config) rpcTotal() int {
289 | 	return cfg.net.GetTotalCount()
290 | }
291 | 
292 | func (cfg *config) setunreliable(unRel bool) {
293 | 	cfg.net.Reliable(!unRel)
294 | }
295 | 
296 | func (cfg *config) setlongreordering(longrel bool) {
297 | 	cfg.net.LongReordering(longrel)
298 | }
299 | 
300 | // check that there's exactly one leader.
301 | // try a few times in case re-elections are needed.
302 | func (cfg *config) checkOneLeader() int {
303 | 	for iters := 0; iters < 10; iters++ {
304 | 		ms := 450 + (rand.Int63() % 100)
305 | 		time.Sleep(time.Duration(ms) * time.Millisecond)
306 | 
307 | 		leaders := make(map[int][]int)
308 | 		for i := 0; i < cfg.n; i++ {
309 | 			if cfg.connected[i] {
310 | 				if term, leader := cfg.rafts[i].GetState(); leader {
311 | 					leaders[term] = append(leaders[term], i)
312 | 				}
313 | 			}
314 | 		}
315 | 
316 | 		lastTermWithLeader := -1
317 | 		for term, leaders := range leaders {
318 | 			if len(leaders) > 1 {
319 | 				cfg.t.Fatalf("term %d has %d (>1) leaders", term, len(leaders))
320 | 			}
321 | 			if term > lastTermWithLeader {
322 | 				lastTermWithLeader = term
323 | 			}
324 | 		}
325 | 
326 | 		if len(leaders) != 0 {
327 | 			return leaders[lastTermWithLeader][0]
328 | 		}
329 | 	}
330 | 	cfg.t.Fatalf("expected one leader, got none")
331 | 	return -1
332 | }
333 | 
334 | // check that everyone agrees on the term.
335 | func (cfg *config) checkTerms() int {
336 | 	term := -1
337 | 	for i := 0; i < cfg.n; i++ {
338 | 		if cfg.connected[i] {
339 | 			xterm, _ := cfg.rafts[i].GetState()
340 | 			if term == -1 {
341 | 				term = xterm
342 | 			} else if term != xterm {
343 | 				cfg.t.Fatalf("servers disagree on term")
344 | 			}
345 | 		}
346 | 	}
347 | 	return term
348 | }
349 | 
350 | // check that there's no leader
351 | func (cfg *config) checkNoLeader() {
352 | 	for i := 0; i < cfg.n; i++ {
353 | 		if cfg.connected[i] {
354 | 			_, isLeader := cfg.rafts[i].GetState()
355 | 			if isLeader {
356 | 				cfg.t.Fatalf("expected no leader, but %v claims to be leader", i)
357 | 			}
358 | 		}
359 | 	}
360 | }
361 | 
362 | // how many servers think a log entry is committed?
363 | func (cfg *config) nCommitted(index int) (int, interface{}) {
364 | 	count := 0
365 | 	cmd := -1
366 | 	for i := 0; i < len(cfg.rafts); i++ {
367 | 		if cfg.applyErr[i] != "" {
368 | 			cfg.t.Fatal(cfg.applyErr[i])
369 | 		}
370 | 
371 | 		cfg.mu.Lock()
372 | 		cmd1, ok := cfg.logs[i][index]
373 | 		cfg.mu.Unlock()
374 | 
375 | 		if ok {
376 | 			if count > 0 && cmd != cmd1 {
377 | 				cfg.t.Fatalf("committed values do not match: index %v, %v, %v\n",
378 | 					index, cmd, cmd1)
379 | 			}
380 | 			count++
381 | 			cmd = cmd1
382 | 		}
383 | 	}
384 | 	return count, cmd
385 | }
386 | 
387 | // wait for at least n servers to commit.
388 | // but don't wait forever.
389 | func (cfg *config) wait(index int, n int, startTerm int) interface{} {
390 | 	to := 10 * time.Millisecond
391 | 	for iters := 0; iters < 30; iters++ {
392 | 		nd, _ := cfg.nCommitted(index)
393 | 		if nd >= n {
394 | 			break
395 | 		}
396 | 		time.Sleep(to)
397 | 		if to < time.Second {
398 | 			to *= 2
399 | 		}
400 | 		if startTerm > -1 {
401 | 			for _, r := range cfg.rafts {
402 | 				if t, _ := r.GetState(); t > startTerm {
403 | 					// someone has moved on
404 | 					// can no longer guarantee that we'll "win"
405 | 					return -1
406 | 				}
407 | 			}
408 | 		}
409 | 	}
410 | 	nd, cmd := cfg.nCommitted(index)
411 | 	if nd < n {
412 | 		cfg.t.Fatalf("only %d decided for index %d; wanted %d\n",
413 | 			nd, index, n)
414 | 	}
415 | 	return cmd
416 | }
417 | 
418 | // do a complete agreement.
419 | // it might choose the wrong leader initially,
420 | // and have to re-submit after giving up.
421 | // entirely gives up after about 10 seconds.
422 | // indirectly checks that the servers agree on the
423 | // same value, since nCommitted() checks this,
424 | // as do the threads that read from applyCh.
425 | // returns index.
426 | // if retry==true, may submit the command multiple
427 | // times, in case a leader fails just after Start().
428 | // if retry==false, calls Start() only once, in order
429 | // to simplify the early Lab 2B tests.
430 | func (cfg *config) one(cmd int, expectedServers int, retry bool) int {
431 | 	t0 := time.Now()
432 | 	starts := 0
433 | 	for time.Since(t0).Seconds() < 10 {
434 | 		// try all the servers, maybe one is the leader.
435 | 		index := -1
436 | 		for si := 0; si < cfg.n; si++ {
437 | 			starts = (starts + 1) % cfg.n
438 | 			var rf *Raft
439 | 			cfg.mu.Lock()
440 | 			if cfg.connected[starts] {
441 | 				rf = cfg.rafts[starts]
442 | 			}
443 | 			cfg.mu.Unlock()
444 | 			if rf != nil {
445 | 				index1, _, ok := rf.Start(cmd)
446 | 				if ok {
447 | 					index = index1
448 | 					cfg.t.Logf(" ## %v 的 logIndex: %d, %s %s ", cmd, index, rf, rf.details())
449 | 					break
450 | 				}
451 | 			}
452 | 		}
453 | 
454 | 		if index != -1 {
455 | 			// somebody claimed to be the leader and to have
456 | 			// submitted our command; wait a while for agreement.
457 | 			t1 := time.Now()
458 | 			for time.Since(t1).Seconds() < 2 {
459 | 				nd, cmd1 := cfg.nCommitted(index)
460 | 				cfg.t.Logf(" ## %d/%d raft 的 logIndex：%d 的值为 %v", nd, len(cfg.logs), index, cmd1)
461 | 				if nd > 0 && nd >= expectedServers {
462 | 					// committed
463 | 					if cmd2, ok := cmd1.(int); ok && cmd2 == cmd {
464 | 						// and it was the command we submitted.
465 | 						return index
466 | 					}
467 | 				}
468 | 				time.Sleep(20 * time.Millisecond)
469 | 			}
470 | 			if retry == false {
471 | 				cfg.t.Fatalf("one(%v) failed to reach agreement", cmd)
472 | 			}
473 | 		} else {
474 | 			time.Sleep(50 * time.Millisecond)
475 | 		}
476 | 	}
477 | 	cfg.t.Fatalf("one(%v) failed to reach agreement", cmd)
478 | 	return -1
479 | }
480 | 
481 | // start a Test.
482 | // print the Test message.
483 | // e.g. cfg.begin("Test (2B): RPC counts aren't too high")
484 | func (cfg *config) begin(description string) {
485 | 	fmt.Printf("%s ...\n", description)
486 | 	cfg.beginTime = time.Now()
487 | 	cfg.rpcs0 = cfg.rpcTotal()
488 | 	cfg.cmds0 = 0
489 | 	cfg.maxIndex0 = cfg.maxIndex
490 | }
491 | 
492 | // end a Test -- the fact that we got here means there
493 | // was no failure.
494 | // print the Passed message,
495 | // and some performance numbers.
496 | func (cfg *config) end() {
497 | 	cfg.checkTimeout()
498 | 	if cfg.t.Failed() == false {
499 | 		cfg.mu.Lock()
500 | 		t := time.Since(cfg.beginTime).Seconds() // real time
501 | 		nPeers := cfg.n                          // number of Raft peers
502 | 		nRPC := cfg.rpcTotal() - cfg.rpcs0       // number of RPC sends
503 | 		nCMDs := cfg.maxIndex - cfg.maxIndex0    // number of Raft agreements reported
504 | 		cfg.mu.Unlock()
505 | 
506 | 		fmt.Printf("  ... Passed --")
507 | 		fmt.Printf("  %4.1f  %d %4d %4d\n", t, nPeers, nRPC, nCMDs)
508 | 	}
509 | }
510 | 


--------------------------------------------------------------------------------
/Raft/code/labgob/labgob.go:
--------------------------------------------------------------------------------
  1 | package labgob
  2 | 
  3 | //
  4 | // trying to send non-capitalized fields over RPC produces a range of
  5 | // misbehavior, including both mysterious incorrect computation and
  6 | // outright crashes. so this wrapper around Go's encoding/gob warns
  7 | // about non-capitalized field names.
  8 | //
  9 | 
 10 | import "encoding/gob"
 11 | import "io"
 12 | import "reflect"
 13 | import "fmt"
 14 | import "sync"
 15 | import "unicode"
 16 | import "unicode/utf8"
 17 | 
 18 | var mu sync.Mutex
 19 | var errorCount int // for TestCapital
 20 | var checked map[reflect.Type]bool
 21 | 
 22 | // LabEncoder is
 23 | type LabEncoder struct {
 24 | 	gob *gob.Encoder
 25 | }
 26 | 
 27 | // NewEncoder is
 28 | func NewEncoder(w io.Writer) *LabEncoder {
 29 | 	enc := &LabEncoder{}
 30 | 	enc.gob = gob.NewEncoder(w)
 31 | 	return enc
 32 | }
 33 | 
 34 | // Encode is
 35 | func (enc *LabEncoder) Encode(e interface{}) error {
 36 | 	checkValue(e)
 37 | 	return enc.gob.Encode(e)
 38 | }
 39 | 
 40 | // EncodeValue is
 41 | func (enc *LabEncoder) EncodeValue(value reflect.Value) error {
 42 | 	checkValue(value.Interface())
 43 | 	return enc.gob.EncodeValue(value)
 44 | }
 45 | 
 46 | // LabDecoder is
 47 | type LabDecoder struct {
 48 | 	gob *gob.Decoder
 49 | }
 50 | 
 51 | // NewDecoder is
 52 | func NewDecoder(r io.Reader) *LabDecoder {
 53 | 	dec := &LabDecoder{}
 54 | 	dec.gob = gob.NewDecoder(r)
 55 | 	return dec
 56 | }
 57 | 
 58 | // Decode is
 59 | func (dec *LabDecoder) Decode(e interface{}) error {
 60 | 	checkValue(e)
 61 | 	checkDefault(e)
 62 | 	return dec.gob.Decode(e)
 63 | }
 64 | 
 65 | // Register is
 66 | func Register(value interface{}) {
 67 | 	checkValue(value)
 68 | 	gob.Register(value)
 69 | }
 70 | 
 71 | // RegisterName is
 72 | func RegisterName(name string, value interface{}) {
 73 | 	checkValue(value)
 74 | 	gob.RegisterName(name, value)
 75 | }
 76 | 
 77 | func checkValue(value interface{}) {
 78 | 	checkType(reflect.TypeOf(value))
 79 | }
 80 | 
 81 | func checkType(t reflect.Type) {
 82 | 	k := t.Kind()
 83 | 
 84 | 	mu.Lock()
 85 | 	// only complain once, and avoid recursion.
 86 | 	if checked == nil {
 87 | 		checked = map[reflect.Type]bool{}
 88 | 	}
 89 | 	if checked[t] {
 90 | 		mu.Unlock()
 91 | 		return
 92 | 	}
 93 | 	checked[t] = true
 94 | 	mu.Unlock()
 95 | 
 96 | 	switch k {
 97 | 	case reflect.Struct:
 98 | 		for i := 0; i < t.NumField(); i++ {
 99 | 			f := t.Field(i)
100 | 			rune, _ := utf8.DecodeRuneInString(f.Name)
101 | 			if unicode.IsUpper(rune) == false {
102 | 				// ta da
103 | 				fmt.Printf("labgob error: lower-case field %v of %v in RPC or persist/snapshot will break your Raft\n",
104 | 					f.Name, t.Name())
105 | 				mu.Lock()
106 | 				errorCount++
107 | 				mu.Unlock()
108 | 			}
109 | 			checkType(f.Type)
110 | 		}
111 | 		return
112 | 	case reflect.Slice, reflect.Array, reflect.Ptr:
113 | 		checkType(t.Elem())
114 | 		return
115 | 	case reflect.Map:
116 | 		checkType(t.Elem())
117 | 		checkType(t.Key())
118 | 		return
119 | 	default:
120 | 		return
121 | 	}
122 | }
123 | 
124 | //
125 | // warn if the value contains non-default values,
126 | // as it would if one sent an RPC but the reply
127 | // struct was already modified. if the RPC reply
128 | // contains default values, GOB won't overwrite
129 | // the non-default value.
130 | //
131 | func checkDefault(value interface{}) {
132 | 	if value == nil {
133 | 		return
134 | 	}
135 | 	checkDefault1(reflect.ValueOf(value), 1, "")
136 | }
137 | 
138 | func checkDefault1(value reflect.Value, depth int, name string) {
139 | 	if depth > 3 {
140 | 		return
141 | 	}
142 | 
143 | 	t := value.Type()
144 | 	k := t.Kind()
145 | 
146 | 	switch k {
147 | 	case reflect.Struct:
148 | 		for i := 0; i < t.NumField(); i++ {
149 | 			vv := value.Field(i)
150 | 			name1 := t.Field(i).Name
151 | 			if name != "" {
152 | 				name1 = name + "." + name1
153 | 			}
154 | 			checkDefault1(vv, depth+1, name1)
155 | 		}
156 | 		return
157 | 	case reflect.Ptr:
158 | 		if value.IsNil() {
159 | 			return
160 | 		}
161 | 		checkDefault1(value.Elem(), depth+1, name)
162 | 		return
163 | 	case reflect.Bool,
164 | 		reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
165 | 		reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64,
166 | 		reflect.Uintptr, reflect.Float32, reflect.Float64,
167 | 		reflect.String:
168 | 		if reflect.DeepEqual(reflect.Zero(t).Interface(), value.Interface()) == false {
169 | 			mu.Lock()
170 | 			if errorCount < 1 {
171 | 				what := name
172 | 				if what == "" {
173 | 					what = t.Name()
174 | 				}
175 | 				// this warning typically arises if code re-uses the same RPC reply
176 | 				// variable for multiple RPC calls, or if code restores persisted
177 | 				// state into variable that already have non-default values.
178 | 				fmt.Printf("labgob warning: Decoding into a non-default variable/field %v may not work\n",
179 | 					what)
180 | 			}
181 | 			errorCount++
182 | 			mu.Unlock()
183 | 		}
184 | 		return
185 | 	}
186 | }
187 | 


--------------------------------------------------------------------------------
/Raft/code/labgob/test_test.go:
--------------------------------------------------------------------------------
  1 | package labgob
  2 | 
  3 | import "testing"
  4 | 
  5 | import "bytes"
  6 | 
  7 | type T1 struct {
  8 | 	T1int0    int
  9 | 	T1int1    int
 10 | 	T1string0 string
 11 | 	T1string1 string
 12 | }
 13 | 
 14 | type T2 struct {
 15 | 	T2slice []T1
 16 | 	T2map   map[int]*T1
 17 | 	T2t3    interface{}
 18 | }
 19 | 
 20 | type T3 struct {
 21 | 	T3int999 int
 22 | }
 23 | 
 24 | //
 25 | // test that we didn't break GOB.
 26 | //
 27 | func TestGOB(t *testing.T) {
 28 | 	e0 := errorCount
 29 | 
 30 | 	w := new(bytes.Buffer)
 31 | 
 32 | 	Register(T3{})
 33 | 
 34 | 	{
 35 | 		x0 := 0
 36 | 		x1 := 1
 37 | 		t1 := T1{}
 38 | 		t1.T1int1 = 1
 39 | 		t1.T1string1 = "6.824"
 40 | 		t2 := T2{}
 41 | 		t2.T2slice = []T1{T1{}, t1}
 42 | 		t2.T2map = map[int]*T1{}
 43 | 		t2.T2map[99] = &T1{1, 2, "x", "y"}
 44 | 		t2.T2t3 = T3{999}
 45 | 
 46 | 		e := NewEncoder(w)
 47 | 		e.Encode(x0)
 48 | 		e.Encode(x1)
 49 | 		e.Encode(t1)
 50 | 		e.Encode(t2)
 51 | 	}
 52 | 	data := w.Bytes()
 53 | 
 54 | 	{
 55 | 		var x0 int
 56 | 		var x1 int
 57 | 		var t1 T1
 58 | 		var t2 T2
 59 | 
 60 | 		r := bytes.NewBuffer(data)
 61 | 		d := NewDecoder(r)
 62 | 		if d.Decode(&x0) != nil ||
 63 | 			d.Decode(&x1) != nil ||
 64 | 			d.Decode(&t1) != nil ||
 65 | 			d.Decode(&t2) != nil {
 66 | 			t.Fatalf("Decode failed")
 67 | 		}
 68 | 
 69 | 		if x0 != 0 {
 70 | 			t.Fatalf("wrong x0 %v\n", x0)
 71 | 		}
 72 | 		if x1 != 1 {
 73 | 			t.Fatalf("wrong x1 %v\n", x1)
 74 | 		}
 75 | 		if t1.T1int0 != 0 {
 76 | 			t.Fatalf("wrong t1.T1int0 %v\n", t1.T1int0)
 77 | 		}
 78 | 		if t1.T1int1 != 1 {
 79 | 			t.Fatalf("wrong t1.T1int1 %v\n", t1.T1int1)
 80 | 		}
 81 | 		if t1.T1string0 != "" {
 82 | 			t.Fatalf("wrong t1.T1string0 %v\n", t1.T1string0)
 83 | 		}
 84 | 		if t1.T1string1 != "6.824" {
 85 | 			t.Fatalf("wrong t1.T1string1 %v\n", t1.T1string1)
 86 | 		}
 87 | 		if len(t2.T2slice) != 2 {
 88 | 			t.Fatalf("wrong t2.T2slice len %v\n", len(t2.T2slice))
 89 | 		}
 90 | 		if t2.T2slice[1].T1int1 != 1 {
 91 | 			t.Fatalf("wrong slice value\n")
 92 | 		}
 93 | 		if len(t2.T2map) != 1 {
 94 | 			t.Fatalf("wrong t2.T2map len %v\n", len(t2.T2map))
 95 | 		}
 96 | 		if t2.T2map[99].T1string1 != "y" {
 97 | 			t.Fatalf("wrong map value\n")
 98 | 		}
 99 | 		t3 := (t2.T2t3).(T3)
100 | 		if t3.T3int999 != 999 {
101 | 			t.Fatalf("wrong t2.T2t3.T3int999\n")
102 | 		}
103 | 	}
104 | 
105 | 	if errorCount != e0 {
106 | 		t.Fatalf("there were errors, but should not have been")
107 | 	}
108 | }
109 | 
110 | type T4 struct {
111 | 	Yes int
112 | 	no  int
113 | }
114 | 
115 | //
116 | // make sure we check capitalization
117 | // labgob prints one warning during this test.
118 | //
119 | func TestCapital(t *testing.T) {
120 | 	e0 := errorCount
121 | 
122 | 	v := []map[*T4]int{}
123 | 
124 | 	w := new(bytes.Buffer)
125 | 	e := NewEncoder(w)
126 | 	e.Encode(v)
127 | 	data := w.Bytes()
128 | 
129 | 	var v1 []map[T4]int
130 | 	r := bytes.NewBuffer(data)
131 | 	d := NewDecoder(r)
132 | 	d.Decode(&v1)
133 | 
134 | 	if errorCount != e0+1 {
135 | 		t.Fatalf("failed to warn about lower-case field")
136 | 	}
137 | }
138 | 
139 | //
140 | // check that we warn when someone sends a default value over
141 | // RPC but the target into which we're decoding holds a non-default
142 | // value, which GOB seems not to overwrite as you'd expect.
143 | //
144 | // labgob does not print a warning.
145 | //
146 | func TestDefault(t *testing.T) {
147 | 	e0 := errorCount
148 | 
149 | 	type DD struct {
150 | 		X int
151 | 	}
152 | 
153 | 	// send a default value...
154 | 	dd1 := DD{}
155 | 
156 | 	w := new(bytes.Buffer)
157 | 	e := NewEncoder(w)
158 | 	e.Encode(dd1)
159 | 	data := w.Bytes()
160 | 
161 | 	// and receive it into memory that already
162 | 	// holds non-default values.
163 | 	reply := DD{99}
164 | 
165 | 	r := bytes.NewBuffer(data)
166 | 	d := NewDecoder(r)
167 | 	d.Decode(&reply)
168 | 
169 | 	if errorCount != e0+1 {
170 | 		t.Fatalf("failed to warn about decoding into non-default value")
171 | 	}
172 | }
173 | 


--------------------------------------------------------------------------------
/Raft/code/labrpc/labrpc.go:
--------------------------------------------------------------------------------
  1 | package labrpc
  2 | 
  3 | //
  4 | // channel-based RPC, for 824 labs.
  5 | //
  6 | // simulates a network that can lose requests, lose replies,
  7 | // delay messages, and entirely disconnect particular hosts.
  8 | //
  9 | // we will use the original labrpc.go to test your code for grading.
 10 | // so, while you can modify this code to help you debug, please
 11 | // test against the original before submitting.
 12 | //
 13 | // adapted from Go net/rpc/server.go.
 14 | //
 15 | // sends labgob-encoded values to ensure that RPCs
 16 | // don't include references to program objects.
 17 | //
 18 | // net := MakeNetwork() -- holds network, clients, servers.
 19 | // end := net.MakeEnd(endname) -- create a client end-point, to talk to one server.
 20 | // net.AddServer(servername, server) -- adds a named server to network.
 21 | // net.DeleteServer(servername) -- eliminate the named server.
 22 | // net.Connect(endname, servername) -- connect a client to a server.
 23 | // net.Enable(endname, enabled) -- enable/disable a client.
 24 | // net.Reliable(bool) -- false means drop/delay messages
 25 | //
 26 | // end.Call("Raft.AppendEntries", &args, &reply) -- send an RPC, wait for reply.
 27 | // the "Raft" is the name of the server struct to be called.
 28 | // the "AppendEntries" is the name of the method to be called.
 29 | // Call() returns true to indicate that the server executed the request
 30 | // and the reply is valid.
 31 | // Call() returns false if the network lost the request or reply
 32 | // or the server is down.
 33 | // It is OK to have multiple Call()s in progress at the same time on the
 34 | // same ClientEnd.
 35 | // Concurrent calls to Call() may be delivered to the server out of order,
 36 | // since the network may re-order messages.
 37 | // Call() is guaranteed to return (perhaps after a delay) *except* if the
 38 | // handler function on the server side does not return.
 39 | // the server RPC handler function must declare its args and reply arguments
 40 | // as pointers, so that their types exactly match the types of the arguments
 41 | // to Call().
 42 | //
 43 | // srv := MakeServer()
 44 | // srv.AddService(svc) -- a server can have multiple services, e.g. Raft and k/v
 45 | //   pass srv to net.AddServer()
 46 | //
 47 | // svc := MakeService(receiverObject) -- obj's methods will handle RPCs
 48 | //   much like Go's rpcs.Register()
 49 | //   pass svc to srv.AddService()
 50 | //
 51 | 
 52 | import "github.com/aQuaYi/Distributed-Algorithms/Raft/code/labgob"
 53 | import "bytes"
 54 | import "reflect"
 55 | import "sync"
 56 | import "log"
 57 | import "strings"
 58 | import "math/rand"
 59 | import "time"
 60 | import "sync/atomic"
 61 | 
 62 | type reqMsg struct {
 63 | 	endname  interface{} // name of sending ClientEnd
 64 | 	svcMeth  string      // e.g. "Raft.AppendEntries"
 65 | 	argsType reflect.Type
 66 | 	args     []byte
 67 | 	replyCh  chan replyMsg
 68 | }
 69 | 
 70 | type replyMsg struct {
 71 | 	ok    bool
 72 | 	reply []byte
 73 | }
 74 | 
 75 | // ClientEnd is
 76 | type ClientEnd struct {
 77 | 	endname interface{}   // this end-point's name
 78 | 	ch      chan reqMsg   // copy of Network.endCh
 79 | 	done    chan struct{} // closed when Network is cleaned up
 80 | }
 81 | 
 82 | // Call is
 83 | // send an RPC, wait for the reply.
 84 | // the return value indicates success; false means that
 85 | // no reply was received from the server.
 86 | func (e *ClientEnd) Call(svcMeth string, args interface{}, reply interface{}) bool {
 87 | 	req := reqMsg{}
 88 | 	req.endname = e.endname
 89 | 	req.svcMeth = svcMeth
 90 | 	req.argsType = reflect.TypeOf(args)
 91 | 	req.replyCh = make(chan replyMsg)
 92 | 
 93 | 	qb := new(bytes.Buffer)
 94 | 	qe := labgob.NewEncoder(qb)
 95 | 	qe.Encode(args)
 96 | 	req.args = qb.Bytes()
 97 | 
 98 | 	select {
 99 | 	case e.ch <- req:
100 | 		// ok
101 | 	case <-e.done:
102 | 		return false
103 | 	}
104 | 
105 | 	rep := <-req.replyCh
106 | 	if rep.ok {
107 | 		rb := bytes.NewBuffer(rep.reply)
108 | 		rd := labgob.NewDecoder(rb)
109 | 		if err := rd.Decode(reply); err != nil {
110 | 			log.Fatalf("ClientEnd.Call(): decode reply: %v\n", err)
111 | 		}
112 | 		return true
113 | 	} else {
114 | 		return false
115 | 	}
116 | }
117 | 
118 | type Network struct {
119 | 	mu             sync.Mutex
120 | 	reliable       bool
121 | 	longDelays     bool                        // pause a long time on send on disabled connection
122 | 	longReordering bool                        // sometimes delay replies a long time
123 | 	ends           map[interface{}]*ClientEnd  // ends, by name
124 | 	enabled        map[interface{}]bool        // by end name
125 | 	servers        map[interface{}]*Server     // servers, by name
126 | 	connections    map[interface{}]interface{} // endname -> servername
127 | 	endCh          chan reqMsg
128 | 	done           chan struct{} // closed when Network is cleaned up
129 | 	count          int32         // total RPC count, for statistics
130 | }
131 | 
132 | func MakeNetwork() *Network {
133 | 	rn := &Network{}
134 | 	rn.reliable = true
135 | 	rn.ends = map[interface{}]*ClientEnd{}
136 | 	rn.enabled = map[interface{}]bool{}
137 | 	rn.servers = map[interface{}]*Server{}
138 | 	rn.connections = map[interface{}](interface{}){}
139 | 	rn.endCh = make(chan reqMsg)
140 | 	rn.done = make(chan struct{})
141 | 
142 | 	// single goroutine to handle all ClientEnd.Call()s
143 | 	go func() {
144 | 		for {
145 | 			select {
146 | 			case xReq := <-rn.endCh:
147 | 				atomic.AddInt32(&rn.count, 1)
148 | 				go rn.ProcessReq(xReq)
149 | 			case <-rn.done:
150 | 				return
151 | 			}
152 | 		}
153 | 	}()
154 | 
155 | 	return rn
156 | }
157 | 
158 | func (rn *Network) Cleanup() {
159 | 	close(rn.done)
160 | }
161 | 
162 | func (rn *Network) Reliable(yes bool) {
163 | 	rn.mu.Lock()
164 | 	defer rn.mu.Unlock()
165 | 
166 | 	rn.reliable = yes
167 | }
168 | 
169 | func (rn *Network) LongReordering(yes bool) {
170 | 	rn.mu.Lock()
171 | 	defer rn.mu.Unlock()
172 | 
173 | 	rn.longReordering = yes
174 | }
175 | 
176 | func (rn *Network) LongDelays(yes bool) {
177 | 	rn.mu.Lock()
178 | 	defer rn.mu.Unlock()
179 | 
180 | 	rn.longDelays = yes
181 | }
182 | 
183 | func (rn *Network) ReadEndnameInfo(endname interface{}) (enabled bool,
184 | 	servername interface{}, server *Server, reliable bool, longreordering bool,
185 | ) {
186 | 	rn.mu.Lock()
187 | 	defer rn.mu.Unlock()
188 | 
189 | 	enabled = rn.enabled[endname]
190 | 	servername = rn.connections[endname]
191 | 	if servername != nil {
192 | 		server = rn.servers[servername]
193 | 	}
194 | 	reliable = rn.reliable
195 | 	longreordering = rn.longReordering
196 | 	return
197 | }
198 | 
199 | func (rn *Network) IsServerDead(endname interface{}, servername interface{}, server *Server) bool {
200 | 	rn.mu.Lock()
201 | 	defer rn.mu.Unlock()
202 | 
203 | 	if rn.enabled[endname] == false || rn.servers[servername] != server {
204 | 		return true
205 | 	}
206 | 	return false
207 | }
208 | 
209 | func (rn *Network) ProcessReq(req reqMsg) {
210 | 	enabled, servername, server, reliable, longreordering := rn.ReadEndnameInfo(req.endname)
211 | 
212 | 	if enabled && servername != nil && server != nil {
213 | 		if reliable == false {
214 | 			// short delay
215 | 			ms := (rand.Int() % 27)
216 | 			time.Sleep(time.Duration(ms) * time.Millisecond)
217 | 		}
218 | 
219 | 		if reliable == false && (rand.Int()%1000) < 100 {
220 | 			// drop the request, return as if timeout
221 | 			req.replyCh <- replyMsg{false, nil}
222 | 			return
223 | 		}
224 | 
225 | 		// execute the request (call the RPC handler).
226 | 		// in a separate thread so that we can periodically check
227 | 		// if the server has been killed and the RPC should get a
228 | 		// failure reply.
229 | 		ech := make(chan replyMsg)
230 | 		go func() {
231 | 			r := server.dispatch(req)
232 | 			ech <- r
233 | 		}()
234 | 
235 | 		// wait for handler to return,
236 | 		// but stop waiting if DeleteServer() has been called,
237 | 		// and return an error.
238 | 		var reply replyMsg
239 | 		replyOK := false
240 | 		serverDead := false
241 | 		for replyOK == false && serverDead == false {
242 | 			select {
243 | 			case reply = <-ech:
244 | 				replyOK = true
245 | 			case <-time.After(100 * time.Millisecond):
246 | 				serverDead = rn.IsServerDead(req.endname, servername, server)
247 | 				if serverDead {
248 | 					go func() {
249 | 						<-ech // drain channel to let the goroutine created earlier terminate
250 | 					}()
251 | 				}
252 | 			}
253 | 		}
254 | 
255 | 		// do not reply if DeleteServer() has been called, i.e.
256 | 		// the server has been killed. this is needed to avoid
257 | 		// situation in which a client gets a positive reply
258 | 		// to an Append, but the server persisted the update
259 | 		// into the old Persister. config.go is careful to call
260 | 		// DeleteServer() before superseding the Persister.
261 | 		serverDead = rn.IsServerDead(req.endname, servername, server)
262 | 
263 | 		if replyOK == false || serverDead == true {
264 | 			// server was killed while we were waiting; return error.
265 | 			req.replyCh <- replyMsg{false, nil}
266 | 		} else if reliable == false && (rand.Int()%1000) < 100 {
267 | 			// drop the reply, return as if timeout
268 | 			req.replyCh <- replyMsg{false, nil}
269 | 		} else if longreordering == true && rand.Intn(900) < 600 {
270 | 			// delay the response for a while
271 | 			ms := 200 + rand.Intn(1+rand.Intn(2000))
272 | 			// Russ points out that this timer arrangement will decrease
273 | 			// the number of goroutines, so that the race
274 | 			// detector is less likely to get upset.
275 | 			time.AfterFunc(time.Duration(ms)*time.Millisecond, func() {
276 | 				req.replyCh <- reply
277 | 			})
278 | 		} else {
279 | 			req.replyCh <- reply
280 | 		}
281 | 	} else {
282 | 		// simulate no reply and eventual timeout.
283 | 		ms := 0
284 | 		if rn.longDelays {
285 | 			// let Raft tests check that leader doesn't send
286 | 			// RPCs synchronously.
287 | 			ms = (rand.Int() % 7000)
288 | 		} else {
289 | 			// many kv tests require the client to try each
290 | 			// server in fairly rapid succession.
291 | 			ms = (rand.Int() % 100)
292 | 		}
293 | 		time.AfterFunc(time.Duration(ms)*time.Millisecond, func() {
294 | 			req.replyCh <- replyMsg{false, nil}
295 | 		})
296 | 	}
297 | 
298 | }
299 | 
300 | // create a client end-point.
301 | // start the thread that listens and delivers.
302 | func (rn *Network) MakeEnd(endname interface{}) *ClientEnd {
303 | 	rn.mu.Lock()
304 | 	defer rn.mu.Unlock()
305 | 
306 | 	if _, ok := rn.ends[endname]; ok {
307 | 		log.Fatalf("MakeEnd: %v already exists\n", endname)
308 | 	}
309 | 
310 | 	e := &ClientEnd{}
311 | 	e.endname = endname
312 | 	e.ch = rn.endCh
313 | 	e.done = rn.done
314 | 	rn.ends[endname] = e
315 | 	rn.enabled[endname] = false
316 | 	rn.connections[endname] = nil
317 | 
318 | 	return e
319 | }
320 | 
321 | func (rn *Network) AddServer(servername interface{}, rs *Server) {
322 | 	rn.mu.Lock()
323 | 	defer rn.mu.Unlock()
324 | 
325 | 	rn.servers[servername] = rs
326 | }
327 | 
328 | func (rn *Network) DeleteServer(servername interface{}) {
329 | 	rn.mu.Lock()
330 | 	defer rn.mu.Unlock()
331 | 
332 | 	rn.servers[servername] = nil
333 | }
334 | 
335 | // connect a ClientEnd to a server.
336 | // a ClientEnd can only be connected once in its lifetime.
337 | func (rn *Network) Connect(endname interface{}, servername interface{}) {
338 | 	rn.mu.Lock()
339 | 	defer rn.mu.Unlock()
340 | 
341 | 	rn.connections[endname] = servername
342 | }
343 | 
344 | // enable/disable a ClientEnd.
345 | func (rn *Network) Enable(endname interface{}, enabled bool) {
346 | 	rn.mu.Lock()
347 | 	defer rn.mu.Unlock()
348 | 
349 | 	rn.enabled[endname] = enabled
350 | }
351 | 
352 | // get a server's count of incoming RPCs.
353 | func (rn *Network) GetCount(servername interface{}) int {
354 | 	rn.mu.Lock()
355 | 	defer rn.mu.Unlock()
356 | 
357 | 	svr := rn.servers[servername]
358 | 	return svr.GetCount()
359 | }
360 | 
361 | func (rn *Network) GetTotalCount() int {
362 | 	x := atomic.LoadInt32(&rn.count)
363 | 	return int(x)
364 | }
365 | 
366 | //
367 | // a server is a collection of services, all sharing
368 | // the same rpc dispatcher. so that e.g. both a Raft
369 | // and a k/v server can listen to the same rpc endpoint.
370 | //
371 | type Server struct {
372 | 	mu       sync.Mutex
373 | 	services map[string]*Service
374 | 	count    int // incoming RPCs
375 | }
376 | 
377 | func MakeServer() *Server {
378 | 	rs := &Server{}
379 | 	rs.services = map[string]*Service{}
380 | 	return rs
381 | }
382 | 
383 | func (rs *Server) AddService(svc *Service) {
384 | 	rs.mu.Lock()
385 | 	defer rs.mu.Unlock()
386 | 	rs.services[svc.name] = svc
387 | }
388 | 
389 | func (rs *Server) dispatch(req reqMsg) replyMsg {
390 | 	rs.mu.Lock()
391 | 
392 | 	rs.count += 1
393 | 
394 | 	// split Raft.AppendEntries into service and method
395 | 	dot := strings.LastIndex(req.svcMeth, ".")
396 | 	serviceName := req.svcMeth[:dot]
397 | 	methodName := req.svcMeth[dot+1:]
398 | 
399 | 	service, ok := rs.services[serviceName]
400 | 
401 | 	rs.mu.Unlock()
402 | 
403 | 	if ok {
404 | 		return service.dispatch(methodName, req)
405 | 	} else {
406 | 		choices := []string{}
407 | 		for k, _ := range rs.services {
408 | 			choices = append(choices, k)
409 | 		}
410 | 		log.Fatalf("labrpc.Server.dispatch(): unknown service %v in %v.%v; expecting one of %v\n",
411 | 			serviceName, serviceName, methodName, choices)
412 | 		return replyMsg{false, nil}
413 | 	}
414 | }
415 | 
416 | // GetCount is
417 | func (rs *Server) GetCount() int {
418 | 	rs.mu.Lock()
419 | 	defer rs.mu.Unlock()
420 | 	return rs.count
421 | }
422 | 
423 | // Service is
424 | // an object with methods that can be called via RPC.
425 | // a single server may have more than one Service.
426 | type Service struct {
427 | 	name    string
428 | 	rcvr    reflect.Value
429 | 	typ     reflect.Type
430 | 	methods map[string]reflect.Method
431 | }
432 | 
433 | // MakeService is
434 | func MakeService(rcvr interface{}) *Service {
435 | 	svc := &Service{}
436 | 	svc.typ = reflect.TypeOf(rcvr)
437 | 	svc.rcvr = reflect.ValueOf(rcvr)
438 | 	svc.name = reflect.Indirect(svc.rcvr).Type().Name()
439 | 	svc.methods = map[string]reflect.Method{}
440 | 
441 | 	for m := 0; m < svc.typ.NumMethod(); m++ {
442 | 		method := svc.typ.Method(m)
443 | 		mType := method.Type
444 | 		mName := method.Name
445 | 
446 | 		//fmt.Printf("%v pp %v ni %v 1k %v 2k %v no %v\n",
447 | 		//	mName, method.PkgPath, mType.NumIn(), mType.In(1).Kind(), mType.In(2).Kind(), mType.NumOut())
448 | 
449 | 		if method.PkgPath != "" || // capitalized?
450 | 			mType.NumIn() != 3 ||
451 | 			//mType.In(1).Kind() != reflect.Ptr ||
452 | 			mType.In(2).Kind() != reflect.Ptr ||
453 | 			mType.NumOut() != 0 {
454 | 			// the method is not suitable for a handler
455 | 			//fmt.Printf("bad method: %v\n", mName)
456 | 		} else {
457 | 			// the method looks like a handler
458 | 			svc.methods[mName] = method
459 | 		}
460 | 	}
461 | 
462 | 	return svc
463 | }
464 | 
465 | func (svc *Service) dispatch(methname string, req reqMsg) replyMsg {
466 | 	if method, ok := svc.methods[methname]; ok {
467 | 		// prepare space into which to read the argument.
468 | 		// the Value's type will be a pointer to req.argsType.
469 | 		args := reflect.New(req.argsType)
470 | 
471 | 		// decode the argument.
472 | 		ab := bytes.NewBuffer(req.args)
473 | 		ad := labgob.NewDecoder(ab)
474 | 		ad.Decode(args.Interface())
475 | 
476 | 		// allocate space for the reply.
477 | 		replyType := method.Type.In(2)
478 | 		replyType = replyType.Elem()
479 | 		replyv := reflect.New(replyType)
480 | 
481 | 		// call the method.
482 | 		function := method.Func
483 | 		function.Call([]reflect.Value{svc.rcvr, args.Elem(), replyv})
484 | 
485 | 		// encode the reply.
486 | 		rb := new(bytes.Buffer)
487 | 		re := labgob.NewEncoder(rb)
488 | 		re.EncodeValue(replyv)
489 | 
490 | 		return replyMsg{true, rb.Bytes()}
491 | 	} else {
492 | 		choices := []string{}
493 | 		for k := range svc.methods {
494 | 			choices = append(choices, k)
495 | 		}
496 | 		log.Fatalf("labrpc.Service.dispatch(): unknown method %v in %v; expecting one of %v\n",
497 | 			methname, req.svcMeth, choices)
498 | 		return replyMsg{false, nil}
499 | 	}
500 | }
501 | 


--------------------------------------------------------------------------------
/Raft/code/labrpc/test_test.go:
--------------------------------------------------------------------------------
  1 | package labrpc
  2 | 
  3 | import "testing"
  4 | import "strconv"
  5 | import "sync"
  6 | import "runtime"
  7 | import "time"
  8 | import "fmt"
  9 | 
 10 | type JunkArgs struct {
 11 | 	X int
 12 | }
 13 | type JunkReply struct {
 14 | 	X string
 15 | }
 16 | 
 17 | type JunkServer struct {
 18 | 	mu   sync.Mutex
 19 | 	log1 []string
 20 | 	log2 []int
 21 | }
 22 | 
 23 | func (js *JunkServer) Handler1(args string, reply *int) {
 24 | 	js.mu.Lock()
 25 | 	defer js.mu.Unlock()
 26 | 	js.log1 = append(js.log1, args)
 27 | 	*reply, _ = strconv.Atoi(args)
 28 | }
 29 | 
 30 | func (js *JunkServer) Handler2(args int, reply *string) {
 31 | 	js.mu.Lock()
 32 | 	defer js.mu.Unlock()
 33 | 	js.log2 = append(js.log2, args)
 34 | 	*reply = "handler2-" + strconv.Itoa(args)
 35 | }
 36 | 
 37 | func (js *JunkServer) Handler3(args int, reply *int) {
 38 | 	js.mu.Lock()
 39 | 	defer js.mu.Unlock()
 40 | 	time.Sleep(20 * time.Second)
 41 | 	*reply = -args
 42 | }
 43 | 
 44 | // args is a pointer
 45 | func (js *JunkServer) Handler4(args *JunkArgs, reply *JunkReply) {
 46 | 	reply.X = "pointer"
 47 | }
 48 | 
 49 | // args is a not pointer
 50 | func (js *JunkServer) Handler5(args JunkArgs, reply *JunkReply) {
 51 | 	reply.X = "no pointer"
 52 | }
 53 | 
 54 | func TestBasic(t *testing.T) {
 55 | 	runtime.GOMAXPROCS(4)
 56 | 
 57 | 	rn := MakeNetwork()
 58 | 	defer rn.Cleanup()
 59 | 
 60 | 	e := rn.MakeEnd("end1-99")
 61 | 
 62 | 	js := &JunkServer{}
 63 | 	svc := MakeService(js)
 64 | 
 65 | 	rs := MakeServer()
 66 | 	rs.AddService(svc)
 67 | 	rn.AddServer("server99", rs)
 68 | 
 69 | 	rn.Connect("end1-99", "server99")
 70 | 	rn.Enable("end1-99", true)
 71 | 
 72 | 	{
 73 | 		reply := ""
 74 | 		e.Call("JunkServer.Handler2", 111, &reply)
 75 | 		if reply != "handler2-111" {
 76 | 			t.Fatalf("wrong reply from Handler2")
 77 | 		}
 78 | 	}
 79 | 
 80 | 	{
 81 | 		reply := 0
 82 | 		e.Call("JunkServer.Handler1", "9099", &reply)
 83 | 		if reply != 9099 {
 84 | 			t.Fatalf("wrong reply from Handler1")
 85 | 		}
 86 | 	}
 87 | }
 88 | 
 89 | func TestTypes(t *testing.T) {
 90 | 	runtime.GOMAXPROCS(4)
 91 | 
 92 | 	rn := MakeNetwork()
 93 | 	defer rn.Cleanup()
 94 | 
 95 | 	e := rn.MakeEnd("end1-99")
 96 | 
 97 | 	js := &JunkServer{}
 98 | 	svc := MakeService(js)
 99 | 
100 | 	rs := MakeServer()
101 | 	rs.AddService(svc)
102 | 	rn.AddServer("server99", rs)
103 | 
104 | 	rn.Connect("end1-99", "server99")
105 | 	rn.Enable("end1-99", true)
106 | 
107 | 	{
108 | 		var args JunkArgs
109 | 		var reply JunkReply
110 | 		// args must match type (pointer or not) of handler.
111 | 		e.Call("JunkServer.Handler4", &args, &reply)
112 | 		if reply.X != "pointer" {
113 | 			t.Fatalf("wrong reply from Handler4")
114 | 		}
115 | 	}
116 | 
117 | 	{
118 | 		var args JunkArgs
119 | 		var reply JunkReply
120 | 		// args must match type (pointer or not) of handler.
121 | 		e.Call("JunkServer.Handler5", args, &reply)
122 | 		if reply.X != "no pointer" {
123 | 			t.Fatalf("wrong reply from Handler5")
124 | 		}
125 | 	}
126 | }
127 | 
128 | //
129 | // does net.Enable(endname, false) really disconnect a client?
130 | //
131 | func TestDisconnect(t *testing.T) {
132 | 	runtime.GOMAXPROCS(4)
133 | 
134 | 	rn := MakeNetwork()
135 | 	defer rn.Cleanup()
136 | 
137 | 	e := rn.MakeEnd("end1-99")
138 | 
139 | 	js := &JunkServer{}
140 | 	svc := MakeService(js)
141 | 
142 | 	rs := MakeServer()
143 | 	rs.AddService(svc)
144 | 	rn.AddServer("server99", rs)
145 | 
146 | 	rn.Connect("end1-99", "server99")
147 | 
148 | 	{
149 | 		reply := ""
150 | 		e.Call("JunkServer.Handler2", 111, &reply)
151 | 		if reply != "" {
152 | 			t.Fatalf("unexpected reply from Handler2")
153 | 		}
154 | 	}
155 | 
156 | 	rn.Enable("end1-99", true)
157 | 
158 | 	{
159 | 		reply := 0
160 | 		e.Call("JunkServer.Handler1", "9099", &reply)
161 | 		if reply != 9099 {
162 | 			t.Fatalf("wrong reply from Handler1")
163 | 		}
164 | 	}
165 | }
166 | 
167 | //
168 | // test net.GetCount()
169 | //
170 | func TestCounts(t *testing.T) {
171 | 	runtime.GOMAXPROCS(4)
172 | 
173 | 	rn := MakeNetwork()
174 | 	defer rn.Cleanup()
175 | 
176 | 	e := rn.MakeEnd("end1-99")
177 | 
178 | 	js := &JunkServer{}
179 | 	svc := MakeService(js)
180 | 
181 | 	rs := MakeServer()
182 | 	rs.AddService(svc)
183 | 	rn.AddServer(99, rs)
184 | 
185 | 	rn.Connect("end1-99", 99)
186 | 	rn.Enable("end1-99", true)
187 | 
188 | 	for i := 0; i < 17; i++ {
189 | 		reply := ""
190 | 		e.Call("JunkServer.Handler2", i, &reply)
191 | 		wanted := "handler2-" + strconv.Itoa(i)
192 | 		if reply != wanted {
193 | 			t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted)
194 | 		}
195 | 	}
196 | 
197 | 	n := rn.GetCount(99)
198 | 	if n != 17 {
199 | 		t.Fatalf("wrong GetCount() %v, expected 17\n", n)
200 | 	}
201 | }
202 | 
203 | //
204 | // test RPCs from concurrent ClientEnds
205 | //
206 | func TestConcurrentMany(t *testing.T) {
207 | 	runtime.GOMAXPROCS(4)
208 | 
209 | 	rn := MakeNetwork()
210 | 	defer rn.Cleanup()
211 | 
212 | 	js := &JunkServer{}
213 | 	svc := MakeService(js)
214 | 
215 | 	rs := MakeServer()
216 | 	rs.AddService(svc)
217 | 	rn.AddServer(1000, rs)
218 | 
219 | 	ch := make(chan int)
220 | 
221 | 	nClients := 20
222 | 	nRPCs := 10
223 | 	for ii := 0; ii < nClients; ii++ {
224 | 		go func(i int) {
225 | 			n := 0
226 | 			defer func() { ch <- n }()
227 | 
228 | 			e := rn.MakeEnd(i)
229 | 			rn.Connect(i, 1000)
230 | 			rn.Enable(i, true)
231 | 
232 | 			for j := 0; j < nRPCs; j++ {
233 | 				arg := i*100 + j
234 | 				reply := ""
235 | 				e.Call("JunkServer.Handler2", arg, &reply)
236 | 				wanted := "handler2-" + strconv.Itoa(arg)
237 | 				if reply != wanted {
238 | 					t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted)
239 | 				}
240 | 				n++
241 | 			}
242 | 		}(ii)
243 | 	}
244 | 
245 | 	total := 0
246 | 	for ii := 0; ii < nClients; ii++ {
247 | 		x := <-ch
248 | 		total += x
249 | 	}
250 | 
251 | 	if total != nClients*nRPCs {
252 | 		t.Fatalf("wrong number of RPCs completed, got %v, expected %v", total, nClients*nRPCs)
253 | 	}
254 | 
255 | 	n := rn.GetCount(1000)
256 | 	if n != total {
257 | 		t.Fatalf("wrong GetCount() %v, expected %v\n", n, total)
258 | 	}
259 | }
260 | 
261 | //
262 | // test unreliable
263 | //
264 | func TestUnreliable(t *testing.T) {
265 | 	runtime.GOMAXPROCS(4)
266 | 
267 | 	rn := MakeNetwork()
268 | 	defer rn.Cleanup()
269 | 	rn.Reliable(false)
270 | 
271 | 	js := &JunkServer{}
272 | 	svc := MakeService(js)
273 | 
274 | 	rs := MakeServer()
275 | 	rs.AddService(svc)
276 | 	rn.AddServer(1000, rs)
277 | 
278 | 	ch := make(chan int)
279 | 
280 | 	nClients := 300
281 | 	for ii := 0; ii < nClients; ii++ {
282 | 		go func(i int) {
283 | 			n := 0
284 | 			defer func() { ch <- n }()
285 | 
286 | 			e := rn.MakeEnd(i)
287 | 			rn.Connect(i, 1000)
288 | 			rn.Enable(i, true)
289 | 
290 | 			arg := i * 100
291 | 			reply := ""
292 | 			ok := e.Call("JunkServer.Handler2", arg, &reply)
293 | 			if ok {
294 | 				wanted := "handler2-" + strconv.Itoa(arg)
295 | 				if reply != wanted {
296 | 					t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted)
297 | 				}
298 | 				n++
299 | 			}
300 | 		}(ii)
301 | 	}
302 | 
303 | 	total := 0
304 | 	for ii := 0; ii < nClients; ii++ {
305 | 		x := <-ch
306 | 		total += x
307 | 	}
308 | 
309 | 	if total == nClients || total == 0 {
310 | 		t.Fatalf("all RPCs succeeded despite unreliable")
311 | 	}
312 | }
313 | 
314 | //
315 | // test concurrent RPCs from a single ClientEnd
316 | //
317 | func TestConcurrentOne(t *testing.T) {
318 | 	runtime.GOMAXPROCS(4)
319 | 
320 | 	rn := MakeNetwork()
321 | 	defer rn.Cleanup()
322 | 
323 | 	js := &JunkServer{}
324 | 	svc := MakeService(js)
325 | 
326 | 	rs := MakeServer()
327 | 	rs.AddService(svc)
328 | 	rn.AddServer(1000, rs)
329 | 
330 | 	e := rn.MakeEnd("c")
331 | 	rn.Connect("c", 1000)
332 | 	rn.Enable("c", true)
333 | 
334 | 	ch := make(chan int)
335 | 
336 | 	nRPCs := 20
337 | 	for ii := 0; ii < nRPCs; ii++ {
338 | 		go func(i int) {
339 | 			n := 0
340 | 			defer func() { ch <- n }()
341 | 
342 | 			arg := 100 + i
343 | 			reply := ""
344 | 			e.Call("JunkServer.Handler2", arg, &reply)
345 | 			wanted := "handler2-" + strconv.Itoa(arg)
346 | 			if reply != wanted {
347 | 				t.Fatalf("wrong reply %v from Handler2, expecting %v", reply, wanted)
348 | 			}
349 | 			n++
350 | 		}(ii)
351 | 	}
352 | 
353 | 	total := 0
354 | 	for ii := 0; ii < nRPCs; ii++ {
355 | 		x := <-ch
356 | 		total += x
357 | 	}
358 | 
359 | 	if total != nRPCs {
360 | 		t.Fatalf("wrong number of RPCs completed, got %v, expected %v", total, nRPCs)
361 | 	}
362 | 
363 | 	js.mu.Lock()
364 | 	defer js.mu.Unlock()
365 | 	if len(js.log2) != nRPCs {
366 | 		t.Fatalf("wrong number of RPCs delivered")
367 | 	}
368 | 
369 | 	n := rn.GetCount(1000)
370 | 	if n != total {
371 | 		t.Fatalf("wrong GetCount() %v, expected %v\n", n, total)
372 | 	}
373 | }
374 | 
375 | //
376 | // regression: an RPC that's delayed during Enabled=false
377 | // should not delay subsequent RPCs (e.g. after Enabled=true).
378 | //
379 | func TestRegression1(t *testing.T) {
380 | 	runtime.GOMAXPROCS(4)
381 | 
382 | 	rn := MakeNetwork()
383 | 	defer rn.Cleanup()
384 | 
385 | 	js := &JunkServer{}
386 | 	svc := MakeService(js)
387 | 
388 | 	rs := MakeServer()
389 | 	rs.AddService(svc)
390 | 	rn.AddServer(1000, rs)
391 | 
392 | 	e := rn.MakeEnd("c")
393 | 	rn.Connect("c", 1000)
394 | 
395 | 	// start some RPCs while the ClientEnd is disabled.
396 | 	// they'll be delayed.
397 | 	rn.Enable("c", false)
398 | 	ch := make(chan bool)
399 | 	nRPCs := 20
400 | 	for ii := 0; ii < nRPCs; ii++ {
401 | 		go func(i int) {
402 | 			ok := false
403 | 			defer func() { ch <- ok }()
404 | 
405 | 			arg := 100 + i
406 | 			reply := ""
407 | 			// this call ought to return false.
408 | 			e.Call("JunkServer.Handler2", arg, &reply)
409 | 			ok = true
410 | 		}(ii)
411 | 	}
412 | 
413 | 	time.Sleep(100 * time.Millisecond)
414 | 
415 | 	// now enable the ClientEnd and check that an RPC completes quickly.
416 | 	t0 := time.Now()
417 | 	rn.Enable("c", true)
418 | 	{
419 | 		arg := 99
420 | 		reply := ""
421 | 		e.Call("JunkServer.Handler2", arg, &reply)
422 | 		wanted := "handler2-" + strconv.Itoa(arg)
423 | 		if reply != wanted {
424 | 			t.Fatalf("wrong reply %v from Handler2, expecting %v", reply, wanted)
425 | 		}
426 | 	}
427 | 	dur := time.Since(t0).Seconds()
428 | 
429 | 	if dur > 0.03 {
430 | 		t.Fatalf("RPC took too long (%v) after Enable", dur)
431 | 	}
432 | 
433 | 	for ii := 0; ii < nRPCs; ii++ {
434 | 		<-ch
435 | 	}
436 | 
437 | 	js.mu.Lock()
438 | 	defer js.mu.Unlock()
439 | 	if len(js.log2) != 1 {
440 | 		t.Fatalf("wrong number (%v) of RPCs delivered, expected 1", len(js.log2))
441 | 	}
442 | 
443 | 	n := rn.GetCount(1000)
444 | 	if n != 1 {
445 | 		t.Fatalf("wrong GetCount() %v, expected %v\n", n, 1)
446 | 	}
447 | }
448 | 
449 | //
450 | // if an RPC is stuck in a server, and the server
451 | // is killed with DeleteServer(), does the RPC
452 | // get un-stuck?
453 | //
454 | func TestKilled(t *testing.T) {
455 | 	runtime.GOMAXPROCS(4)
456 | 
457 | 	rn := MakeNetwork()
458 | 	defer rn.Cleanup()
459 | 
460 | 	e := rn.MakeEnd("end1-99")
461 | 
462 | 	js := &JunkServer{}
463 | 	svc := MakeService(js)
464 | 
465 | 	rs := MakeServer()
466 | 	rs.AddService(svc)
467 | 	rn.AddServer("server99", rs)
468 | 
469 | 	rn.Connect("end1-99", "server99")
470 | 	rn.Enable("end1-99", true)
471 | 
472 | 	doneCh := make(chan bool)
473 | 	go func() {
474 | 		reply := 0
475 | 		ok := e.Call("JunkServer.Handler3", 99, &reply)
476 | 		doneCh <- ok
477 | 	}()
478 | 
479 | 	time.Sleep(1000 * time.Millisecond)
480 | 
481 | 	select {
482 | 	case <-doneCh:
483 | 		t.Fatalf("Handler3 should not have returned yet")
484 | 	case <-time.After(100 * time.Millisecond):
485 | 	}
486 | 
487 | 	rn.DeleteServer("server99")
488 | 
489 | 	select {
490 | 	case x := <-doneCh:
491 | 		if x != false {
492 | 			t.Fatalf("Handler3 returned successfully despite DeleteServer()")
493 | 		}
494 | 	case <-time.After(100 * time.Millisecond):
495 | 		t.Fatalf("Handler3 should return after DeleteServer()")
496 | 	}
497 | }
498 | 
499 | func TestBenchmark(t *testing.T) {
500 | 	runtime.GOMAXPROCS(4)
501 | 
502 | 	rn := MakeNetwork()
503 | 	defer rn.Cleanup()
504 | 
505 | 	e := rn.MakeEnd("end1-99")
506 | 
507 | 	js := &JunkServer{}
508 | 	svc := MakeService(js)
509 | 
510 | 	rs := MakeServer()
511 | 	rs.AddService(svc)
512 | 	rn.AddServer("server99", rs)
513 | 
514 | 	rn.Connect("end1-99", "server99")
515 | 	rn.Enable("end1-99", true)
516 | 
517 | 	t0 := time.Now()
518 | 	n := 100000
519 | 	for iters := 0; iters < n; iters++ {
520 | 		reply := ""
521 | 		e.Call("JunkServer.Handler2", 111, &reply)
522 | 		if reply != "handler2-111" {
523 | 			t.Fatalf("wrong reply from Handler2")
524 | 		}
525 | 	}
526 | 	fmt.Printf("%v for %v\n", time.Since(t0), n)
527 | 	// march 2016, rtm laptop, 22 microseconds per RPC
528 | }
529 | 


--------------------------------------------------------------------------------
/Raft/code/persister.go:
--------------------------------------------------------------------------------
 1 | package raft
 2 | 
 3 | //
 4 | // support for Raft and kvraft to save persistent
 5 | // Raft state (log &c) and k/v server snapshots.
 6 | //
 7 | // we will use the original persister.go to test your code for grading.
 8 | // so, while you can modify this code to help you debug, please
 9 | // test with the original before submitting.
10 | //
11 | 
12 | import "sync"
13 | 
14 | // Persister is
15 | type Persister struct {
16 | 	mu        sync.Mutex
17 | 	raftstate []byte
18 | 	snapshot  []byte
19 | }
20 | 
21 | // MakePersister is
22 | func MakePersister() *Persister {
23 | 	return &Persister{}
24 | }
25 | 
26 | // Copy is
27 | func (ps *Persister) Copy() *Persister {
28 | 	ps.mu.Lock()
29 | 	defer ps.mu.Unlock()
30 | 	np := MakePersister()
31 | 	np.raftstate = ps.raftstate
32 | 	np.snapshot = ps.snapshot
33 | 	return np
34 | }
35 | 
36 | // SaveRaftState is
37 | func (ps *Persister) SaveRaftState(state []byte) {
38 | 	ps.mu.Lock()
39 | 	defer ps.mu.Unlock()
40 | 	ps.raftstate = state
41 | }
42 | 
43 | // ReadRaftState is
44 | func (ps *Persister) ReadRaftState() []byte {
45 | 	ps.mu.Lock()
46 | 	defer ps.mu.Unlock()
47 | 	return ps.raftstate
48 | }
49 | 
50 | // RaftStateSize is
51 | func (ps *Persister) RaftStateSize() int {
52 | 	ps.mu.Lock()
53 | 	defer ps.mu.Unlock()
54 | 	return len(ps.raftstate)
55 | }
56 | 
57 | // SaveStateAndSnapshot is
58 | // Save both Raft state and K/V snapshot as a single atomic action,
59 | // to help avoid them getting out of sync.
60 | func (ps *Persister) SaveStateAndSnapshot(state []byte, snapshot []byte) {
61 | 	ps.mu.Lock()
62 | 	defer ps.mu.Unlock()
63 | 	ps.raftstate = state
64 | 	ps.snapshot = snapshot
65 | }
66 | 
67 | // ReadSnapshot is
68 | func (ps *Persister) ReadSnapshot() []byte {
69 | 	ps.mu.Lock()
70 | 	defer ps.mu.Unlock()
71 | 	return ps.snapshot
72 | }
73 | 
74 | // SnapshotSize is
75 | func (ps *Persister) SnapshotSize() int {
76 | 	ps.mu.Lock()
77 | 	defer ps.mu.Unlock()
78 | 	return len(ps.snapshot)
79 | }
80 | 


--------------------------------------------------------------------------------
/Raft/code/raft-API.go:
--------------------------------------------------------------------------------
  1 | package raft
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"time"
  6 | 
  7 | 	"github.com/aQuaYi/Distributed-Algorithms/Raft/code/labrpc"
  8 | )
  9 | 
 10 | /**
 11 |  * // create a new Raft server instance:
 12 |  * rf := Make(peers, me, persister, applyCh)
 13 |  *
 14 |  * // start agreement on a new log entry:
 15 |  * rf.Start(command interface{}) (index, term, isLeader)
 16 |  *
 17 |  * // ask a Raft for its current term, and whether it thinks it is leader
 18 |  * rf.GetState() (term, isLeader)
 19 |  *
 20 |  * // each time a new entry is committed to the log, each Raft peer
 21 |  * // should send an ApplyMsg to the service (or tester).
 22 |  * type ApplyMsg
 23 |  *
 24 |  */
 25 | 
 26 | // Make is
 27 | // the service or tester wants to create a Raft server. the ports
 28 | // of all the Raft servers (including this one) are in peers[]. this
 29 | // server's port is peers[me]. all the servers' peers[] arrays
 30 | // have the same order. persister is a place for this server to
 31 | // save its persistent state, and also initially holds the most
 32 | // recent saved state, if any. applyCh is a channel on which the
 33 | // tester or service expects Raft to send ApplyMsg messages.
 34 | // Make() must return quickly, so it should start goroutines
 35 | // for any long-running work.
 36 | //
 37 | func Make(peers []*labrpc.ClientEnd, me int,
 38 | 	persister *Persister, applyCh chan ApplyMsg) *Raft {
 39 | 	rf := &Raft{}
 40 | 
 41 | 	// Make 函数参数的去处
 42 | 	rf.peers = peers
 43 | 	rf.me = me
 44 | 	rf.persister = persister
 45 | 	rf.chanApply = applyCh
 46 | 
 47 | 	// 需要 persist 的参数
 48 | 	rf.currentTerm = 0
 49 | 	rf.votedFor = NOBODY
 50 | 	le := LogEntry{LogIndex: 0, LogTerm: 0, Command: 0}
 51 | 	rf.logs = append(rf.logs, le) // 在 logs 预先放入一个，方便 Raft.getLastIndex()
 52 | 
 53 | 	// 私有状态
 54 | 	rf.state = FOLLOWER
 55 | 	// REVIEW: 把这些通道的设置成非缓冲的，看看会不会出错
 56 | 	rf.chanCommit = make(chan struct{}, 100)
 57 | 	rf.chanHeartBeat = make(chan struct{}, 100)
 58 | 	rf.chanBeElected = make(chan struct{}, 100)
 59 | 
 60 | 	// initialize from state persisted before a crash
 61 | 	rf.readPersist(persister.ReadRaftState())
 62 | 
 63 | 	go rf.statesLoop()
 64 | 
 65 | 	go rf.applyLoop()
 66 | 
 67 | 	return rf
 68 | }
 69 | 
 70 | func (rf *Raft) statesLoop() {
 71 | 	for {
 72 | 		switch rf.state {
 73 | 		case FOLLOWER:
 74 | 			select {
 75 | 			case <-time.After(electionTimeout()):
 76 | 				rf.state = CANDIDATE
 77 | 			case <-rf.chanHeartBeat:
 78 | 			}
 79 | 		case CANDIDATE:
 80 | 			rf.newElection()
 81 | 		case LEADER:
 82 | 			rf.newHeartBeat()
 83 | 		}
 84 | 	}
 85 | }
 86 | 
 87 | func (rf *Raft) newElection() {
 88 | 	rf.mu.Lock()
 89 | 
 90 | 	rf.currentTerm++
 91 | 	rf.votedFor = rf.me
 92 | 	rf.voteCount = 1
 93 | 
 94 | 	rf.persist()
 95 | 	rf.mu.Unlock()
 96 | 
 97 | 	DPrintf("%s begin new election\n", rf)
 98 | 
 99 | 	go rf.broadcastRequestVote()
100 | 
101 | 	select {
102 | 	case <-time.After(electionTimeout()):
103 | 	case <-rf.chanHeartBeat:
104 | 		rf.state = FOLLOWER
105 | 		DPrintf("%s receives chanHeartbeat", rf)
106 | 	case <-rf.chanBeElected:
107 | 		rf.comeToPower()
108 | 	}
109 | }
110 | 
111 | func (rf *Raft) comeToPower() {
112 | 	rf.mu.Lock()
113 | 	rf.state = LEADER
114 | 	DPrintf("%s is Leader now", rf)
115 | 	rf.nextIndex = make([]int, len(rf.peers))
116 | 	rf.matchIndex = make([]int, len(rf.peers))
117 | 	for i := range rf.peers {
118 | 		rf.nextIndex[i] = rf.getLastIndex() + 1
119 | 		rf.matchIndex[i] = 0
120 | 	}
121 | 	rf.mu.Unlock()
122 | }
123 | 
124 | func (rf *Raft) newHeartBeat() {
125 | 	DPrintf("%s broadcastAppendEntries", rf)
126 | 	rf.broadcastAppendEntries()
127 | 	<-time.After(heartBeat)
128 | }
129 | 
130 | func (rf *Raft) applyLoop() {
131 | 	for {
132 | 		<-rf.chanCommit
133 | 		DPrintf("%s COMMITTED %s", rf, rf.details())
134 | 		//
135 | 		rf.mu.Lock()
136 | 		//
137 | 		commitIndex := rf.commitIndex
138 | 		baseIndex := rf.getBaseIndex()
139 | 		for i := rf.lastApplied + 1; i <= commitIndex; i++ {
140 | 			msg := ApplyMsg{
141 | 				CommandValid: true,
142 | 				CommandIndex: i,
143 | 				Command:      rf.logs[i-baseIndex].Command,
144 | 			}
145 | 			rf.chanApply <- msg
146 | 			DPrintf("%s ApplyMSG: %s %s", rf, msg, rf.details())
147 | 			rf.lastApplied = i
148 | 		}
149 | 		//
150 | 		rf.mu.Unlock()
151 | 	}
152 | }
153 | 
154 | // Start is
155 | // the service using Raft (e.g. a k/v server) wants to start
156 | // agreement on the next command to be appended to Raft's log. if this
157 | // server isn't the leader, returns false. otherwise start the
158 | // agreement and
159 | // ** return immediately, without waiting for the log appends to complete. **
160 | // there is no guarantee that this
161 | // command will ever be committed to the Raft log, since the leader
162 | // may fail or lose an election. even if the Raft instance has been killed,
163 | // this function should return gracefully.
164 | //
165 | // the first return value is the index that the command will appear at
166 | // if it's ever committed. the second return value is the current
167 | // term. the third return value is true if this server believes it is
168 | // the leader.
169 | //
170 | func (rf *Raft) Start(command interface{}) (int, int, bool) {
171 | 	// Your code here (2B).
172 | 	rf.mu.Lock()
173 | 	defer rf.mu.Unlock()
174 | 
175 | 	if !rf.isLeader() {
176 | 		return -1, -1, false
177 | 	}
178 | 
179 | 	DPrintf("%s Start %v", rf, command)
180 | 
181 | 	logIndex := rf.getLastIndex() + 1
182 | 	term := rf.currentTerm
183 | 	isLeader := rf.isLeader()
184 | 
185 | 	rf.logs = append(rf.logs,
186 | 		LogEntry{
187 | 			LogIndex: logIndex,
188 | 			LogTerm:  term,
189 | 			Command:  command,
190 | 		}) // append new entry from client
191 | 
192 | 	rf.persist()
193 | 
194 | 	// Your code above
195 | 	return logIndex, term, isLeader
196 | }
197 | 
198 | // GetState is
199 | // return currentTerm and whether this server
200 | // believes it is the leader.
201 | func (rf *Raft) GetState() (int, bool) {
202 | 
203 | 	var term int
204 | 	var isLeader bool
205 | 	// Your code here (2A).
206 | 
207 | 	term = rf.currentTerm
208 | 	isLeader = rf.isLeader()
209 | 
210 | 	// Your code above (2A)
211 | 	return term, isLeader
212 | }
213 | 
214 | // ApplyMsg is
215 | // as each Raft peer becomes aware that successive log entries are
216 | // committed, the peer should send an ApplyMsg to the service (or
217 | // tester) on the same server, via the applyCh passed to Make(). set
218 | // CommandValid to true to indicate that the ApplyMsg contains a newly
219 | // committed log entry.
220 | //
221 | // in Lab 3 you'll want to send other kinds of messages (e.g.,
222 | // snapshots) on the applyCh; at that point you can add fields to
223 | // ApplyMsg, but set CommandValid to false for these other uses.
224 | //
225 | type ApplyMsg struct {
226 | 	CommandValid bool // CommandValid = true 表示， 此消息是用于应用 Command
227 | 	CommandIndex int  // Command 所在的 logEntry.logIndex 值
228 | 	Command      interface{}
229 | }
230 | 
231 | func (m ApplyMsg) String() string {
232 | 	return fmt.Sprintf("ApplyMsg{Valid:%t,Index:%d,Command:%v}", m.CommandValid, m.CommandIndex, m.Command)
233 | }
234 | 
235 | // Kill is
236 | // the tester calls Kill() when a Raft instance won't
237 | // be needed again. you are not required to do anything
238 | // in Kill(), but it might be convenient to (for example)
239 | // turn off debug output from this instance.
240 | //
241 | func (rf *Raft) Kill() {
242 | 	// Your code here, if desired.
243 | }
244 | 


--------------------------------------------------------------------------------
/Raft/code/raft-AppendEntries.go:
--------------------------------------------------------------------------------
  1 | package raft
  2 | 
  3 | import "fmt"
  4 | 
  5 | // AppendEntriesArgs 是添加 log 的参数
  6 | type AppendEntriesArgs struct {
  7 | 	Term         int        // leader.currentTerm
  8 | 	LeaderID     int        // leader.me
  9 | 	PrevLogIndex int        // index of log entry immediately preceding new ones
 10 | 	PrevLogTerm  int        // term of prevLogIndex entry
 11 | 	LeaderCommit int        // leader.commitIndex
 12 | 	Entries      []LogEntry // 需要添加的 log 单元
 13 | }
 14 | 
 15 | func (a AppendEntriesArgs) String() string {
 16 | 	return fmt.Sprintf("appendEntriesArgs{R%d:T%d, PrevLogIndex:%d, PrevLogTerm:%d, LeaderCommit:%d, entries:%v}",
 17 | 		a.LeaderID, a.Term, a.PrevLogIndex, a.PrevLogTerm, a.LeaderCommit, a.Entries)
 18 | }
 19 | 
 20 | func (rf *Raft) newAppendEntriesArgs(server int) AppendEntriesArgs {
 21 | 	prevLogIndex := rf.nextIndex[server] - 1
 22 | 	baseIndex := rf.getBaseIndex()
 23 | 	return AppendEntriesArgs{
 24 | 		Term:         rf.currentTerm,
 25 | 		LeaderID:     rf.me,
 26 | 		PrevLogIndex: prevLogIndex,
 27 | 		PrevLogTerm:  rf.logs[prevLogIndex-baseIndex].LogTerm,
 28 | 		Entries:      rf.logs[prevLogIndex+1-baseIndex:],
 29 | 		LeaderCommit: rf.commitIndex,
 30 | 	}
 31 | }
 32 | 
 33 | // AppendEntriesReply 是 flower 回复 leader 的内容
 34 | type AppendEntriesReply struct {
 35 | 	Term      int  // 回复者的 term
 36 | 	Success   bool // 返回 true，如果被调用的 rf.logs 真的 append 了 entries
 37 | 	NextIndex int  // 下一次发送的 AppendEntriesArgs.Entries[0] 在 Leader.logs 中的索引号
 38 | }
 39 | 
 40 | func (r AppendEntriesReply) String() string {
 41 | 	return fmt.Sprintf("appendEntriesReply{T%d, Success:%t, NextIndex:%d}",
 42 | 		r.Term, r.Success, r.NextIndex)
 43 | }
 44 | 
 45 | func (rf *Raft) sendAppendEntries(server int, args AppendEntriesArgs, reply *AppendEntriesReply) bool {
 46 | 	return rf.peers[server].Call("Raft.AppendEntries", args, reply)
 47 | }
 48 | 
 49 | // 广播 AppendEntries 有两个作用
 50 | // 1. heart beat: 阻止其他 server 发起选举
 51 | // 2. 同步 log 到其他 server
 52 | func (rf *Raft) broadcastAppendEntries() {
 53 | 	rf.mu.Lock()
 54 | 	defer rf.mu.Unlock()
 55 | 
 56 | 	lastIndex := rf.getLastIndex()
 57 | 	baseIndex := rf.getBaseIndex()
 58 | 
 59 | 	newCommitIndex := 0
 60 | 	// 统计 leader 的此 term 的已复制 log 数量，超过半数，就可以 commit 了
 61 | 	for idx := rf.commitIndex + 1; idx <= lastIndex; idx++ {
 62 | 		count := 1 // 1 是 rf 自己的一票
 63 | 		for id := range rf.peers {
 64 | 			if id != rf.me &&
 65 | 				rf.matchIndex[id] >= idx &&
 66 | 				rf.logs[idx-baseIndex].LogTerm == rf.currentTerm {
 67 | 				count++
 68 | 			}
 69 | 		}
 70 | 		if 2*count > len(rf.peers) {
 71 | 			newCommitIndex = idx
 72 | 		}
 73 | 	}
 74 | 	if newCommitIndex > rf.commitIndex {
 75 | 		rf.commitIndex = newCommitIndex
 76 | 		rf.chanCommit <- struct{}{}
 77 | 		DPrintf("%s COMMITTED %s", rf, rf.details())
 78 | 	}
 79 | 
 80 | 	for id := range rf.peers {
 81 | 		if id != rf.me && rf.isLeader() {
 82 | 			args := rf.newAppendEntriesArgs(id)
 83 | 			go rf.sendAppendEntriesAndDealReply(id, args)
 84 | 		}
 85 | 	}
 86 | }
 87 | 
 88 | func (rf *Raft) sendAppendEntriesAndDealReply(id int, args AppendEntriesArgs) {
 89 | 	var reply AppendEntriesReply
 90 | 
 91 | 	DPrintf("%s AppendEntries to R%d with %s", rf, id, args)
 92 | 
 93 | 	ok := rf.sendAppendEntries(id, args, &reply)
 94 | 	if !ok {
 95 | 		return
 96 | 	}
 97 | 
 98 | 	rf.mu.Lock()
 99 | 	defer rf.mu.Unlock()
100 | 
101 | 	if reply.Term > rf.currentTerm {
102 | 		rf.currentTerm = reply.Term
103 | 		rf.state = FOLLOWER
104 | 		rf.votedFor = NOBODY
105 | 		rf.persist()
106 | 		return
107 | 	}
108 | 
109 | 	if rf.currentTerm != args.Term {
110 | 		// term 已经改变
111 | 		return
112 | 	}
113 | 
114 | 	if !reply.Success {
115 | 		rf.nextIndex[id] = reply.NextIndex
116 | 		return
117 | 	}
118 | 
119 | 	if len(args.Entries) == 0 {
120 | 		// 纯 heartBeat 就无需进一步处理了
121 | 		return
122 | 	}
123 | 
124 | 	lastArgsLogIndex := args.Entries[len(args.Entries)-1].LogIndex
125 | 	rf.matchIndex[id] = lastArgsLogIndex
126 | 	rf.nextIndex[id] = lastArgsLogIndex + 1
127 | }
128 | 
129 | // AppendEntries 会处理收到 AppendEntries RPC
130 | func (rf *Raft) AppendEntries(args AppendEntriesArgs, reply *AppendEntriesReply) {
131 | 	rf.mu.Lock()
132 | 	defer rf.mu.Unlock()
133 | 
134 | 	// REVIEW: 按照 figure 2 中的内容来，重新编写此函数
135 | 
136 | 	reply.Success = false
137 | 
138 | 	// 1. Replay false at once if term < currentTerm
139 | 	if args.Term < rf.currentTerm {
140 | 		reply.Term = rf.currentTerm
141 | 		DPrintf("%s rejected %s", rf, args)
142 | 		return
143 | 	}
144 | 
145 | 	defer rf.persist()
146 | 
147 | 	rf.chanHeartBeat <- struct{}{}
148 | 
149 | 	DPrintf("%s 收到了真实有效的信号 %s", rf, args)
150 | 
151 | 	if args.Term > rf.currentTerm {
152 | 		rf.currentTerm = args.Term
153 | 		rf.state = FOLLOWER
154 | 		rf.votedFor = NOBODY
155 | 	}
156 | 
157 | 	reply.Term = rf.currentTerm
158 | 
159 | 	if args.PrevLogIndex > rf.getLastIndex() {
160 | 		reply.NextIndex = rf.getLastIndex() + 1
161 | 		return
162 | 	}
163 | 
164 | 	baseIndex := rf.getBaseIndex()
165 | 
166 | 	if args.PrevLogIndex > baseIndex {
167 | 		term := rf.logs[args.PrevLogIndex-baseIndex].LogTerm
168 | 		if args.PrevLogTerm != term {
169 | 			for i := args.PrevLogIndex - 1; i >= baseIndex; i-- {
170 | 				if rf.logs[i-baseIndex].LogTerm != term {
171 | 					reply.NextIndex = i + 1
172 | 					break
173 | 				}
174 | 			}
175 | 			return
176 | 		}
177 | 	}
178 | 
179 | 	if args.PrevLogIndex >= baseIndex {
180 | 		rf.logs = rf.logs[:args.PrevLogIndex+1-baseIndex]
181 | 		rf.logs = append(rf.logs, args.Entries...)
182 | 		reply.Success = true
183 | 		reply.NextIndex = rf.getLastIndex() + 1
184 | 	}
185 | 
186 | 	// 5. if leadercommit > commitIndex, set commitIndex = min(leaderCommit, index of last new entry)
187 | 	if args.LeaderCommit > rf.commitIndex {
188 | 		rf.commitIndex = min(args.LeaderCommit, rf.getLastIndex())
189 | 		rf.chanCommit <- struct{}{}
190 | 	}
191 | 
192 | }
193 | 


--------------------------------------------------------------------------------
/Raft/code/raft-LogEntry.go:
--------------------------------------------------------------------------------
1 | package raft
2 | 
3 | // LogEntry is log entry
4 | type LogEntry struct {
5 | 	LogIndex int         // raft.logs 会被压缩裁剪，需要保存此 log 在原本的索引号
6 | 	LogTerm  int         // LEADER 在生成此 log 时的 LEADER.currentTerm
7 | 	Command  interface{} // 具体的命令内容
8 | }
9 | 


--------------------------------------------------------------------------------
/Raft/code/raft-Raft.go:
--------------------------------------------------------------------------------
 1 | package raft
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"sync"
 6 | 
 7 | 	"github.com/aQuaYi/Distributed-Algorithms/Raft/code/labrpc"
 8 | )
 9 | 
10 | const (
11 | 	// NOBODY used for Raft.votedFor, means vote for none
12 | 	NOBODY = -1
13 | )
14 | 
15 | // Raft is
16 | // A Go object implementing a single Raft peer.
17 | //
18 | type Raft struct {
19 | 	mu        sync.Mutex          // Lock to protect shared access to this peer's state
20 | 	peers     []*labrpc.ClientEnd // RPC end points of all peers
21 | 	persister *Persister          // Object to hold this peer's persisted state
22 | 	me        int                 // this peer's index into peers[]
23 | 
24 | 	// Your data here (2A, 2B, 2C).
25 | 	// Look at the paper's Figure 2 for a description of what
26 | 	// state a Raft server must maintain.
27 | 
28 | 	/* ↓ state of raft on Figure 2 ↓ */
29 | 
30 | 	// Persistent state on all servers:
31 | 	// "Persistent" 的意思是，一旦被修改，就要运行 rf.persist()
32 | 	currentTerm int        // latest term server has seen. Initialized to 0.
33 | 	votedFor    int        // candidateID that received vote in current Term
34 | 	logs        []LogEntry // NOTICE: first LogEntry.LogIndex is 1
35 | 
36 | 	// Volatile state on all servers: initialized to 0, increase monotonically
37 | 	commitIndex int // index of highest log entry known to be committed
38 | 	lastApplied int // index of highest log entry known to be applied to state machine
39 | 
40 | 	// Volatile state on leader:
41 | 	// nextIndex : for each server, index of the next log entry to send to that server
42 | 	// initialized to leader last LogIndex+1
43 | 	nextIndex []int
44 | 	// matchIndex : for each server, index of highest log entry known to be replicated on server
45 | 	// initialized to 0, increases monotonically
46 | 	matchIndex []int
47 | 
48 | 	/* ↑ state of raft on Figure 2 ↑ */
49 | 
50 | 	state     state
51 | 	voteCount int
52 | 
53 | 	chanApply chan ApplyMsg
54 | 
55 | 	//channel
56 | 	chanCommit    chan struct{}
57 | 	chanHeartBeat chan struct{}
58 | 	chanBeElected chan struct{}
59 | }
60 | 
61 | func (rf *Raft) String() string {
62 | 	return fmt.Sprintf(" <R%d:T%d:%s:C%d:A%d> ",
63 | 		rf.me, rf.currentTerm, rf.state, rf.commitIndex, rf.lastApplied)
64 | }
65 | 
66 | func (rf *Raft) details() string {
67 | 	postfix := ""
68 | 	if rf.state == LEADER {
69 | 		postfix = fmt.Sprintf(", nextIndex%v, matchIndex%v", rf.nextIndex, rf.matchIndex)
70 | 	}
71 | 	return fmt.Sprintf("@@ votedFor:%2d, commitIndex:%d, lastApplied:%d, logs:%v%s @@",
72 | 		rf.votedFor, rf.commitIndex, rf.lastApplied, rf.logs, postfix)
73 | }
74 | 


--------------------------------------------------------------------------------
/Raft/code/raft-RequestVote.go:
--------------------------------------------------------------------------------
  1 | package raft
  2 | 
  3 | import "fmt"
  4 | 
  5 | // RequestVoteArgs 获取投票参数
  6 | // example RequestVote RPC arguments structure.
  7 | // field names must start with capital letters!
  8 | //
  9 | type RequestVoteArgs struct {
 10 | 	Term         int // candidate's term
 11 | 	CandidateID  int // candidate requesting vote
 12 | 	LastLogIndex int // index of candidate's last log entry
 13 | 	LastLogTerm  int // term of candidate's last log entry
 14 | }
 15 | 
 16 | func (a RequestVoteArgs) String() string {
 17 | 	return fmt.Sprintf("voteArgs{R%d:T%d;LastIndex:%d;LastTerm:%d}",
 18 | 		a.CandidateID, a.Term, a.LastLogIndex, a.LastLogTerm)
 19 | }
 20 | 
 21 | // RequestVoteReply is
 22 | // example RequestVote RPC reply structure.
 23 | // field names must start with capital letters!
 24 | //
 25 | type RequestVoteReply struct {
 26 | 	Term        int
 27 | 	VoteGranted bool
 28 | }
 29 | 
 30 | func (reply RequestVoteReply) String() string {
 31 | 	return fmt.Sprintf("voteReply{T%d,Granted:%t}", reply.Term, reply.VoteGranted)
 32 | }
 33 | 
 34 | // RequestVote is
 35 | // example RequestVote RPC handler.
 36 | //
 37 | func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) {
 38 | 	DPrintf("%s 收到投票请求 [%s]", rf, args)
 39 | 
 40 | 	rf.mu.Lock()
 41 | 	defer rf.mu.Unlock()
 42 | 
 43 | 	// 1. replay false if term < currentTerm
 44 | 	if args.Term < rf.currentTerm {
 45 | 		reply.Term = rf.currentTerm
 46 | 		reply.VoteGranted = false
 47 | 		return
 48 | 	}
 49 | 
 50 | 	defer rf.persist()
 51 | 
 52 | 	if args.Term > rf.currentTerm {
 53 | 		rf.currentTerm = args.Term
 54 | 		rf.state = FOLLOWER
 55 | 		rf.votedFor = NOBODY
 56 | 	}
 57 | 
 58 | 	reply.Term = rf.currentTerm
 59 | 
 60 | 	// 2. votedFor is null or candidateId and
 61 | 	//    candidate's log is at least as up-to-date as receiver's log, then grant vote
 62 | 	//    If the logs have last entries with different terms, then the log with the later term is more up-to-date
 63 | 	//    If the logs end with the same term, then whichever log is longer is more up-to-date
 64 | 
 65 | 	if isValidArgs(rf, args) {
 66 | 		reply.VoteGranted = true
 67 | 		rf.chanHeartBeat <- struct{}{}
 68 | 		rf.votedFor = args.CandidateID
 69 | 		DPrintf("%s voted for %s", rf, args)
 70 | 		return
 71 | 	}
 72 | 	DPrintf("%s **NOT** voted for %s", rf, args)
 73 | }
 74 | 
 75 | func isValidArgs(rf *Raft, args *RequestVoteArgs) bool {
 76 | 	term := rf.getLastTerm()
 77 | 	index := rf.getLastIndex()
 78 | 	return (rf.votedFor == NOBODY || rf.votedFor == args.CandidateID) &&
 79 | 		isUpToDate(args, term, index)
 80 | }
 81 | 
 82 | func isUpToDate(args *RequestVoteArgs, term, index int) bool {
 83 | 	return (args.LastLogTerm > term) ||
 84 | 		(args.LastLogTerm == term && args.LastLogIndex >= index)
 85 | }
 86 | 
 87 | func (rf *Raft) broadcastRequestVote() {
 88 | 	var args RequestVoteArgs
 89 | 
 90 | 	rf.mu.Lock()
 91 | 	args.Term = rf.currentTerm
 92 | 	args.CandidateID = rf.me
 93 | 	args.LastLogTerm = rf.getLastTerm()
 94 | 	args.LastLogIndex = rf.getLastIndex()
 95 | 	rf.mu.Unlock()
 96 | 
 97 | 	for i := range rf.peers {
 98 | 		if i != rf.me && rf.isCandidate() {
 99 | 			go rf.sendRequestVoteAndDealReply(i, args)
100 | 		}
101 | 	}
102 | }
103 | 
104 | func (rf *Raft) sendRequestVoteAndDealReply(i int, args RequestVoteArgs) {
105 | 	var reply RequestVoteReply
106 | 
107 | 	DPrintf("%s RequestVote to %d", rf, i)
108 | 
109 | 	ok := rf.sendRequestVote(i, &args, &reply)
110 | 	if !ok {
111 | 		return
112 | 	}
113 | 
114 | 	rf.mu.Lock()
115 | 	defer rf.mu.Unlock()
116 | 
117 | 	if reply.Term > rf.currentTerm {
118 | 		rf.currentTerm = reply.Term
119 | 		rf.state = FOLLOWER
120 | 		rf.votedFor = NOBODY
121 | 		rf.persist()
122 | 		return
123 | 	}
124 | 
125 | 	if rf.currentTerm != args.Term || !reply.VoteGranted {
126 | 		// term 已经改变 或 没有投我的票
127 | 		return
128 | 	}
129 | 
130 | 	rf.voteCount++
131 | 	if 2*rf.voteCount > len(rf.peers) && rf.isCandidate() {
132 | 		rf.chanBeElected <- struct{}{}
133 | 	}
134 | }
135 | 
136 | //
137 | // example code to send a RequestVote RPC to a server.
138 | // server is the index of the target server in rf.peers[].
139 | // expects RPC arguments in args.
140 | // fills in *reply with RPC reply, so caller should
141 | // pass &reply.
142 | // the types of the args and reply passed to Call() must be
143 | // the same as the types of the arguments declared in the
144 | // handler function (including whether they are pointers).
145 | //
146 | // The labrpc package simulates a lossy network, in which servers
147 | // may be unreachable, and in which requests and replies may be lost.
148 | // Call() sends a request and waits for a reply. If a reply arrives
149 | // within a timeout interval, Call() returns true; otherwise
150 | // Call() returns false. Thus Call() may not return for a while.
151 | // A false return can be caused by a dead server, a live server that
152 | // can't be reached, a lost request, or a lost reply.
153 | //
154 | // Call() is guaranteed to return (perhaps after a delay) *except* if the
155 | // handler function on the server side does not return.  Thus there
156 | // is no need to implement your own timeouts around Call().
157 | //
158 | // look at the comments in ../labrpc/labrpc.go for more details.
159 | //
160 | // if you're having trouble getting RPC to work, check that you've
161 | // capitalized all field names in struts passed over RPC, and
162 | // that the caller passes the address of the reply struct with &, not
163 | // the struct itself.
164 | //
165 | func (rf *Raft) sendRequestVote(server int, args *RequestVoteArgs, reply *RequestVoteReply) bool {
166 | 	return rf.peers[server].Call("Raft.RequestVote", args, reply)
167 | }
168 | 


--------------------------------------------------------------------------------
/Raft/code/raft-method.go:
--------------------------------------------------------------------------------
 1 | package raft
 2 | 
 3 | // 这里的方法都是被内部引用的，所以无需加锁
 4 | 
 5 | func (rf *Raft) getLastIndex() int {
 6 | 	return rf.logs[len(rf.logs)-1].LogIndex
 7 | }
 8 | 
 9 | func (rf *Raft) getBaseIndex() int {
10 | 	return rf.logs[0].LogIndex
11 | }
12 | 
13 | func (rf *Raft) getLastTerm() int {
14 | 	return rf.logs[len(rf.logs)-1].LogTerm
15 | }
16 | 
17 | func (rf *Raft) isLeader() bool {
18 | 	return rf.state == LEADER
19 | }
20 | 
21 | func (rf *Raft) isCandidate() bool {
22 | 	return rf.state == CANDIDATE
23 | }
24 | 
25 | func (rf *Raft) isFollower() bool {
26 | 	return rf.state == FOLLOWER
27 | }
28 | 


--------------------------------------------------------------------------------
/Raft/code/raft-persist.go:
--------------------------------------------------------------------------------
 1 | package raft
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"encoding/gob"
 6 | 
 7 | 	"github.com/aQuaYi/Distributed-Algorithms/Raft/code/labgob"
 8 | )
 9 | 
10 | //
11 | // save Raft's persistent state to stable storage,
12 | // where it can later be retrieved after a crash and restart.
13 | // see paper's Figure 2 for a description of what should be persistent.
14 | //
15 | func (rf *Raft) persist() {
16 | 	// Your code here (2C).
17 | 	// Example:
18 | 	w := new(bytes.Buffer)
19 | 	e := labgob.NewEncoder(w)
20 | 	e.Encode(rf.currentTerm)
21 | 	e.Encode(rf.votedFor)
22 | 	e.Encode(rf.logs)
23 | 	data := w.Bytes()
24 | 	rf.persister.SaveRaftState(data)
25 | 
26 | 	DPrintf("%s PERSISTED", rf)
27 | }
28 | 
29 | //
30 | // restore previously persisted state.
31 | //
32 | func (rf *Raft) readPersist(data []byte) {
33 | 	if data == nil || len(data) < 1 { // bootstrap without any state?
34 | 		return
35 | 	}
36 | 	r := bytes.NewBuffer(data)
37 | 	d := gob.NewDecoder(r)
38 | 	d.Decode(&rf.currentTerm)
39 | 	d.Decode(&rf.votedFor)
40 | 	d.Decode(&rf.logs)
41 | }
42 | 


--------------------------------------------------------------------------------
/Raft/code/raft-settings.go:
--------------------------------------------------------------------------------
 1 | package raft
 2 | 
 3 | import (
 4 | 	"log"
 5 | 	"math/rand"
 6 | 	"time"
 7 | )
 8 | 
 9 | func init() {
10 | 	log.SetFlags(log.LstdFlags | log.Lmicroseconds)
11 | 	DPrintf("程序开始运行")
12 | }
13 | 
14 | const (
15 | 	// heartBeat 发送心跳的时间间隔，ms
16 | 	heartBeat = 50 * time.Millisecond
17 | 	// minElection 选举过期的最小时间间隔，ms
18 | 	minElection = heartBeat * 10
19 | 	// minElectionInterval 选举过期的最大时间间隔，ms
20 | 	maxElection = minElection * 8 / 5
21 | 
22 | 	// 按照论文 5.6 Timing and availability 的要求
23 | 	// heartBeat 和 minElection 需要相差了一个数量级
24 | )
25 | 
26 | func electionTimeout() time.Duration {
27 | 	interval := int(minElection) +
28 | 		rand.Intn(int(maxElection-minElection))
29 | 	return time.Duration(interval)
30 | }
31 | 


--------------------------------------------------------------------------------
/Raft/code/raft-settings_test.go:
--------------------------------------------------------------------------------
 1 | package raft
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 	"time"
 6 | 
 7 | 	"github.com/stretchr/testify/assert"
 8 | )
 9 | 
10 | func Test_ElectionTimeout(t *testing.T) {
11 | 	ast := assert.New(t)
12 | 	for i := 0; i < 1000; i++ {
13 | 		rate := electionTimeout() / heartBeat
14 | 		ast.True(rate >= 10, "electionTimeout 没有比 heartBeatInterval 大 10 倍")
15 | 	}
16 | }
17 | 
18 | func Test_heartBeat_isInRange(t *testing.T) {
19 | 	ast := assert.New(t)
20 | 	minInterval := 30 * time.Millisecond
21 | 	maxInterval := 100 * time.Millisecond
22 | 	isInRange := minInterval <= heartBeat && heartBeat <= maxInterval
23 | 	ast.True(isInRange, " heartBeat 设置的过大或过小")
24 | }
25 | 


--------------------------------------------------------------------------------
/Raft/code/raft-state.go:
--------------------------------------------------------------------------------
 1 | package raft
 2 | 
 3 | type state int
 4 | 
 5 | // 规定了 server 所需的 3 种状态
 6 | const (
 7 | 	LEADER state = iota
 8 | 	CANDIDATE
 9 | 	FOLLOWER
10 | )
11 | 
12 | func (s state) String() string {
13 | 	switch s {
14 | 	case LEADER:
15 | 		return "Leader"
16 | 	case CANDIDATE:
17 | 		return "Candidate"
18 | 	case FOLLOWER:
19 | 		return "Follower"
20 | 	default:
21 | 		panic("出现了第4种 server state")
22 | 	}
23 | }
24 | 


--------------------------------------------------------------------------------
/Raft/code/raft-state_test.go:
--------------------------------------------------------------------------------
 1 | package raft
 2 | 
 3 | import "testing"
 4 | 
 5 | func Test_state_String(t *testing.T) {
 6 | 	tests := []struct {
 7 | 		name string
 8 | 		s    state
 9 | 		want string
10 | 	}{
11 | 
12 | 		{
13 | 			"Follower",
14 | 			FOLLOWER,
15 | 			"Follower",
16 | 		},
17 | 
18 | 		{
19 | 			"Candidate",
20 | 			CANDIDATE,
21 | 			"Candidate",
22 | 		},
23 | 
24 | 		{
25 | 			"Leader",
26 | 			LEADER,
27 | 			"Leader",
28 | 		},
29 | 	}
30 | 	for _, tt := range tests {
31 | 		t.Run(tt.name, func(t *testing.T) {
32 | 			if got := tt.s.String(); got != tt.want {
33 | 				t.Errorf("state.String() = %v, want %v", got, tt.want)
34 | 			}
35 | 		})
36 | 	}
37 | }
38 | 


--------------------------------------------------------------------------------
/Raft/code/test_test.go:
--------------------------------------------------------------------------------
  1 | package raft
  2 | 
  3 | //
  4 | // Raft tests.
  5 | //
  6 | // we will use the original test_test.go to test your code for grading.
  7 | // so, while you can modify this code to help you debug, please
  8 | // test with the original before submitting.
  9 | //
 10 | 
 11 | import "testing"
 12 | import "fmt"
 13 | import "time"
 14 | import "math/rand"
 15 | import "sync/atomic"
 16 | import "sync"
 17 | 
 18 | // The tester generously allows solutions to complete elections in one second
 19 | // (much more than the paper's range of timeouts).
 20 | const RaftElectionTimeout = 1000 * time.Millisecond
 21 | 
 22 | func TestInitialElection2A(t *testing.T) {
 23 | 	servers := 3
 24 | 	cfg := makeConfig(t, servers, false)
 25 | 	defer cfg.cleanup()
 26 | 
 27 | 	cfg.begin("Test (2A): InitialElection2A initial election")
 28 | 	// is a leader elected?
 29 | 	cfg.checkOneLeader()
 30 | 
 31 | 	// sleep a bit to avoid racing with followers learning of the
 32 | 	// election, then check that all peers agree on the term.
 33 | 	time.Sleep(50 * time.Millisecond)
 34 | 	term1 := cfg.checkTerms()
 35 | 
 36 | 	// does the leader+term stay the same if there is no network failure?
 37 | 	time.Sleep(2 * RaftElectionTimeout)
 38 | 	term2 := cfg.checkTerms()
 39 | 	if term1 != term2 {
 40 | 		fmt.Printf("warning: term changed even though there were no failures")
 41 | 	}
 42 | 
 43 | 	// there should still be a leader.
 44 | 	cfg.checkOneLeader()
 45 | 
 46 | 	cfg.end()
 47 | }
 48 | 
 49 | func TestReElection2A(t *testing.T) {
 50 | 	servers := 3
 51 | 	cfg := makeConfig(t, servers, false)
 52 | 	defer cfg.cleanup()
 53 | 
 54 | 	cfg.begin("Test (2A): ReElection2A election after network failure")
 55 | 
 56 | 	leader1 := cfg.checkOneLeader()
 57 | 
 58 | 	// if the leader disconnects, a new one should be elected.
 59 | 	cfg.disconnect(leader1)
 60 | 	cfg.checkOneLeader()
 61 | 
 62 | 	// if the old leader rejoins, that shouldn't
 63 | 	// disturb the new leader.
 64 | 	cfg.connect(leader1)
 65 | 	leader2 := cfg.checkOneLeader()
 66 | 
 67 | 	// if there's no quorum, no leader should
 68 | 	// be elected.
 69 | 	cfg.disconnect(leader2)
 70 | 	cfg.disconnect((leader2 + 1) % servers)
 71 | 	time.Sleep(2 * RaftElectionTimeout)
 72 | 	cfg.checkNoLeader()
 73 | 
 74 | 	// if a quorum arises, it should elect a leader.
 75 | 	cfg.connect((leader2 + 1) % servers)
 76 | 	cfg.checkOneLeader()
 77 | 
 78 | 	// re-join of last node shouldn't prevent leader from existing.
 79 | 	cfg.connect(leader2)
 80 | 	cfg.checkOneLeader()
 81 | 
 82 | 	cfg.end()
 83 | }
 84 | 
 85 | func TestBasicAgree2B(t *testing.T) {
 86 | 	servers := 5
 87 | 	cfg := makeConfig(t, servers, false)
 88 | 	defer cfg.cleanup()
 89 | 
 90 | 	cfg.begin("Test (2B): BasicAgree2B basic agreement")
 91 | 
 92 | 	iters := 3
 93 | 	for index := 1; index < iters+1; index++ {
 94 | 		nd, _ := cfg.nCommitted(index)
 95 | 		if nd > 0 {
 96 | 			t.Fatalf("some have committed before Start()")
 97 | 		}
 98 | 
 99 | 		xindex := cfg.one(index*100, servers, false)
100 | 		if xindex != index {
101 | 			t.Fatalf("got index %v but expected %v", xindex, index)
102 | 		}
103 | 	}
104 | 
105 | 	cfg.end()
106 | }
107 | 
108 | func TestFailAgree2B(t *testing.T) {
109 | 	servers := 3
110 | 	cfg := makeConfig(t, servers, false)
111 | 	defer cfg.cleanup()
112 | 
113 | 	cfg.begin("Test (2B): FailAgree2B agreement despite follower disconnection")
114 | 
115 | 	cfg.one(101, servers, false)
116 | 
117 | 	// follower network disconnection
118 | 	leader := cfg.checkOneLeader()
119 | 	cfg.disconnect((leader + 1) % servers)
120 | 
121 | 	// agree despite one disconnected server?
122 | 	cfg.one(102, servers-1, false)
123 | 	cfg.one(103, servers-1, false)
124 | 	time.Sleep(RaftElectionTimeout)
125 | 	cfg.one(104, servers-1, false)
126 | 	cfg.one(105, servers-1, false)
127 | 
128 | 	// re-connect
129 | 	cfg.connect((leader + 1) % servers)
130 | 
131 | 	// agree with full set of servers?
132 | 	cfg.one(106, servers, true)
133 | 	time.Sleep(RaftElectionTimeout)
134 | 	cfg.one(107, servers, true)
135 | 
136 | 	cfg.end()
137 | }
138 | 
139 | func TestFailNoAgree2B(t *testing.T) {
140 | 	servers := 5
141 | 	cfg := makeConfig(t, servers, false)
142 | 	defer cfg.cleanup()
143 | 
144 | 	cfg.begin("Test (2B): FailNoAgree2B no agreement if too many followers disconnect")
145 | 
146 | 	cfg.one(10, servers, false)
147 | 
148 | 	// 3 of 5 followers disconnect
149 | 	leader := cfg.checkOneLeader()
150 | 	cfg.disconnect((leader + 1) % servers)
151 | 	cfg.disconnect((leader + 2) % servers)
152 | 	cfg.disconnect((leader + 3) % servers)
153 | 
154 | 	index, _, ok := cfg.rafts[leader].Start(20)
155 | 	if ok != true {
156 | 		t.Fatalf("leader rejected Start()")
157 | 	}
158 | 	if index != 2 {
159 | 		t.Fatalf("expected index 2, got %v", index)
160 | 	}
161 | 
162 | 	time.Sleep(2 * RaftElectionTimeout)
163 | 
164 | 	n, _ := cfg.nCommitted(index)
165 | 	if n > 0 {
166 | 		t.Fatalf("%v committed but no majority", n)
167 | 	}
168 | 
169 | 	// repair
170 | 	cfg.connect((leader + 1) % servers)
171 | 	cfg.connect((leader + 2) % servers)
172 | 	cfg.connect((leader + 3) % servers)
173 | 
174 | 	// the disconnected majority may have chosen a leader from
175 | 	// among their own ranks, forgetting index 2.
176 | 	leader2 := cfg.checkOneLeader()
177 | 	index2, _, ok2 := cfg.rafts[leader2].Start(30)
178 | 	if ok2 == false {
179 | 		t.Fatalf("leader2 rejected Start()")
180 | 	}
181 | 	if index2 < 2 || index2 > 3 {
182 | 		t.Fatalf("unexpected index %v", index2)
183 | 	}
184 | 
185 | 	cfg.one(1000, servers, true)
186 | 
187 | 	cfg.end()
188 | }
189 | 
190 | func TestConcurrentStarts2B(t *testing.T) {
191 | 	servers := 3
192 | 	cfg := makeConfig(t, servers, false)
193 | 	defer cfg.cleanup()
194 | 
195 | 	cfg.begin("Test (2B): ConcurrentStarts2B concurrent Start()s")
196 | 
197 | 	var success bool
198 | loop:
199 | 	for try := 0; try < 5; try++ {
200 | 		if try > 0 {
201 | 			// give solution some time to settle
202 | 			time.Sleep(3 * time.Second)
203 | 		}
204 | 
205 | 		leader := cfg.checkOneLeader()
206 | 		_, term, ok := cfg.rafts[leader].Start(1)
207 | 		if !ok {
208 | 			// leader moved on really quickly
209 | 			continue
210 | 		}
211 | 
212 | 		iters := 5
213 | 		var wg sync.WaitGroup
214 | 		is := make(chan int, iters)
215 | 		for ii := 0; ii < iters; ii++ {
216 | 			wg.Add(1)
217 | 			go func(i int) {
218 | 				defer wg.Done()
219 | 				i, term1, ok := cfg.rafts[leader].Start(100 + i)
220 | 				if term1 != term {
221 | 					return
222 | 				}
223 | 				if ok != true {
224 | 					return
225 | 				}
226 | 				is <- i
227 | 			}(ii)
228 | 		}
229 | 
230 | 		wg.Wait()
231 | 		close(is)
232 | 
233 | 		for j := 0; j < servers; j++ {
234 | 			if t, _ := cfg.rafts[j].GetState(); t != term {
235 | 				// term changed -- can't expect low RPC counts
236 | 				continue loop
237 | 			}
238 | 		}
239 | 
240 | 		failed := false
241 | 		cmds := []int{}
242 | 		for index := range is {
243 | 			cmd := cfg.wait(index, servers, term)
244 | 			if ix, ok := cmd.(int); ok {
245 | 				if ix == -1 {
246 | 					// peers have moved on to later terms
247 | 					// so we can't expect all Start()s to
248 | 					// have succeeded
249 | 					failed = true
250 | 					break
251 | 				}
252 | 				cmds = append(cmds, ix)
253 | 			} else {
254 | 				t.Fatalf("value %v is not an int", cmd)
255 | 			}
256 | 		}
257 | 
258 | 		if failed {
259 | 			// avoid leaking goroutines
260 | 			go func() {
261 | 				for range is {
262 | 				}
263 | 			}()
264 | 			continue
265 | 		}
266 | 
267 | 		for ii := 0; ii < iters; ii++ {
268 | 			x := 100 + ii
269 | 			ok := false
270 | 			for j := 0; j < len(cmds); j++ {
271 | 				if cmds[j] == x {
272 | 					ok = true
273 | 				}
274 | 			}
275 | 			if ok == false {
276 | 				t.Fatalf("cmd %v missing in %v", x, cmds)
277 | 			}
278 | 		}
279 | 
280 | 		success = true
281 | 		break
282 | 	}
283 | 
284 | 	if !success {
285 | 		t.Fatalf("term changed too often")
286 | 	}
287 | 
288 | 	cfg.end()
289 | }
290 | 
291 | func TestRejoin2B(t *testing.T) {
292 | 	servers := 3
293 | 	cfg := makeConfig(t, servers, false)
294 | 	defer cfg.cleanup()
295 | 
296 | 	cfg.begin("Test (2B): Rejoin2B rejoin of partitioned leader")
297 | 
298 | 	cfg.one(101, servers, true)
299 | 
300 | 	// leader network failure
301 | 	leader1 := cfg.checkOneLeader()
302 | 	cfg.disconnect(leader1)
303 | 
304 | 	// make old leader try to agree on some entries
305 | 	cfg.rafts[leader1].Start(102)
306 | 	cfg.rafts[leader1].Start(103)
307 | 	cfg.rafts[leader1].Start(104)
308 | 
309 | 	// new leader commits, also for index=2
310 | 	cfg.one(103, 2, true)
311 | 
312 | 	// new leader network failure
313 | 	leader2 := cfg.checkOneLeader()
314 | 	cfg.disconnect(leader2)
315 | 
316 | 	// old leader connected again
317 | 	cfg.connect(leader1)
318 | 
319 | 	cfg.one(104, 2, true)
320 | 
321 | 	// all together now
322 | 	cfg.connect(leader2)
323 | 
324 | 	cfg.one(105, servers, true)
325 | 
326 | 	cfg.end()
327 | }
328 | 
329 | func TestBackup2B(t *testing.T) {
330 | 	servers := 5
331 | 	cfg := makeConfig(t, servers, false)
332 | 	defer cfg.cleanup()
333 | 
334 | 	cfg.begin("Test (2B): Backup2B leader backs up quickly over incorrect follower logs")
335 | 
336 | 	cfg.one(rand.Int(), servers, true)
337 | 
338 | 	// put leader and one follower in a partition
339 | 	leader1 := cfg.checkOneLeader()
340 | 	cfg.disconnect((leader1 + 2) % servers)
341 | 	cfg.disconnect((leader1 + 3) % servers)
342 | 	cfg.disconnect((leader1 + 4) % servers)
343 | 
344 | 	// submit lots of commands that won't commit
345 | 	for i := 0; i < 50; i++ {
346 | 		cfg.rafts[leader1].Start(rand.Int())
347 | 	}
348 | 
349 | 	time.Sleep(RaftElectionTimeout / 2)
350 | 
351 | 	cfg.disconnect((leader1 + 0) % servers)
352 | 	cfg.disconnect((leader1 + 1) % servers)
353 | 
354 | 	// allow other partition to recover
355 | 	cfg.connect((leader1 + 2) % servers)
356 | 	cfg.connect((leader1 + 3) % servers)
357 | 	cfg.connect((leader1 + 4) % servers)
358 | 
359 | 	// lots of successful commands to new group.
360 | 	for i := 0; i < 50; i++ {
361 | 		cfg.one(rand.Int(), 3, true)
362 | 	}
363 | 
364 | 	// now another partitioned leader and one follower
365 | 	leader2 := cfg.checkOneLeader()
366 | 	other := (leader1 + 2) % servers
367 | 	if leader2 == other {
368 | 		other = (leader2 + 1) % servers
369 | 	}
370 | 	cfg.disconnect(other)
371 | 
372 | 	// lots more commands that won't commit
373 | 	for i := 0; i < 50; i++ {
374 | 		cfg.rafts[leader2].Start(rand.Int())
375 | 	}
376 | 
377 | 	time.Sleep(RaftElectionTimeout / 2)
378 | 
379 | 	// bring original leader back to life,
380 | 	for i := 0; i < servers; i++ {
381 | 		cfg.disconnect(i)
382 | 	}
383 | 	cfg.connect((leader1 + 0) % servers)
384 | 	cfg.connect((leader1 + 1) % servers)
385 | 	cfg.connect(other)
386 | 
387 | 	// lots of successful commands to new group.
388 | 	for i := 0; i < 50; i++ {
389 | 		cfg.one(rand.Int(), 3, true)
390 | 	}
391 | 
392 | 	// now everyone
393 | 	for i := 0; i < servers; i++ {
394 | 		cfg.connect(i)
395 | 	}
396 | 	cfg.one(rand.Int(), servers, true)
397 | 
398 | 	cfg.end()
399 | }
400 | 
401 | func TestCount2B(t *testing.T) {
402 | 	servers := 3
403 | 	cfg := makeConfig(t, servers, false)
404 | 	defer cfg.cleanup()
405 | 
406 | 	cfg.begin("Test (2B): Count2B RPC counts aren't too high")
407 | 
408 | 	rpcs := func() (n int) {
409 | 		for j := 0; j < servers; j++ {
410 | 			n += cfg.rpcCount(j)
411 | 		}
412 | 		return
413 | 	}
414 | 
415 | 	leader := cfg.checkOneLeader()
416 | 
417 | 	total1 := rpcs()
418 | 
419 | 	if total1 > 30 || total1 < 1 {
420 | 		t.Fatalf("too many or few RPCs (%v) to elect initial leader\n", total1)
421 | 	}
422 | 
423 | 	var total2 int
424 | 	var success bool
425 | loop:
426 | 	for try := 0; try < 5; try++ {
427 | 		if try > 0 {
428 | 			// give solution some time to settle
429 | 			time.Sleep(3 * time.Second)
430 | 		}
431 | 
432 | 		leader = cfg.checkOneLeader()
433 | 		total1 = rpcs()
434 | 
435 | 		iters := 10
436 | 		starti, term, ok := cfg.rafts[leader].Start(1)
437 | 		if !ok {
438 | 			// leader moved on really quickly
439 | 			continue
440 | 		}
441 | 		cmds := []int{}
442 | 		for i := 1; i < iters+2; i++ {
443 | 			x := int(rand.Int31())
444 | 			cmds = append(cmds, x)
445 | 			index1, term1, ok := cfg.rafts[leader].Start(x)
446 | 			if term1 != term {
447 | 				// Term changed while starting
448 | 				continue loop
449 | 			}
450 | 			if !ok {
451 | 				// No longer the leader, so term has changed
452 | 				continue loop
453 | 			}
454 | 			if starti+i != index1 {
455 | 				t.Fatalf("Start() failed")
456 | 			}
457 | 		}
458 | 
459 | 		for i := 1; i < iters+1; i++ {
460 | 			cmd := cfg.wait(starti+i, servers, term)
461 | 			if ix, ok := cmd.(int); ok == false || ix != cmds[i-1] {
462 | 				if ix == -1 {
463 | 					// term changed -- try again
464 | 					continue loop
465 | 				}
466 | 				t.Fatalf("wrong value %v committed for index %v; expected %v\n", cmd, starti+i, cmds)
467 | 			}
468 | 		}
469 | 
470 | 		failed := false
471 | 		total2 = 0
472 | 		for j := 0; j < servers; j++ {
473 | 			if t, _ := cfg.rafts[j].GetState(); t != term {
474 | 				// term changed -- can't expect low RPC counts
475 | 				// need to keep going to update total2
476 | 				failed = true
477 | 			}
478 | 			total2 += cfg.rpcCount(j)
479 | 		}
480 | 
481 | 		if failed {
482 | 			continue loop
483 | 		}
484 | 
485 | 		if total2-total1 > (iters+1+3)*3 {
486 | 			t.Fatalf("too many RPCs (%v) for %v entries\n", total2-total1, iters)
487 | 		}
488 | 
489 | 		success = true
490 | 		break
491 | 	}
492 | 
493 | 	if !success {
494 | 		t.Fatalf("term changed too often")
495 | 	}
496 | 
497 | 	time.Sleep(RaftElectionTimeout)
498 | 
499 | 	total3 := 0
500 | 	for j := 0; j < servers; j++ {
501 | 		total3 += cfg.rpcCount(j)
502 | 	}
503 | 
504 | 	if total3-total2 > 3*20 {
505 | 		t.Fatalf("too many RPCs (%v) for 1 second of idleness\n", total3-total2)
506 | 	}
507 | 
508 | 	cfg.end()
509 | }
510 | 
511 | func TestPersist12C(t *testing.T) {
512 | 	servers := 3
513 | 	cfg := makeConfig(t, servers, false)
514 | 	defer cfg.cleanup()
515 | 
516 | 	cfg.begin("Test (2C): Persist12C basic persistence")
517 | 
518 | 	cfg.one(11, servers, true)
519 | 
520 | 	// crash and re-start all
521 | 	for i := 0; i < servers; i++ {
522 | 		cfg.start1(i)
523 | 	}
524 | 	for i := 0; i < servers; i++ {
525 | 		cfg.disconnect(i)
526 | 		cfg.connect(i)
527 | 	}
528 | 
529 | 	cfg.one(12, servers, true)
530 | 
531 | 	leader1 := cfg.checkOneLeader()
532 | 	cfg.disconnect(leader1)
533 | 	cfg.start1(leader1)
534 | 	cfg.connect(leader1)
535 | 
536 | 	cfg.one(13, servers, true)
537 | 
538 | 	leader2 := cfg.checkOneLeader()
539 | 	cfg.disconnect(leader2)
540 | 	cfg.one(14, servers-1, true)
541 | 	cfg.start1(leader2)
542 | 	cfg.connect(leader2)
543 | 
544 | 	cfg.wait(4, servers, -1) // wait for leader2 to join before killing i3
545 | 
546 | 	i3 := (cfg.checkOneLeader() + 1) % servers
547 | 	cfg.disconnect(i3)
548 | 	cfg.one(15, servers-1, true)
549 | 	cfg.start1(i3)
550 | 	cfg.connect(i3)
551 | 
552 | 	cfg.one(16, servers, true)
553 | 
554 | 	cfg.end()
555 | }
556 | 
557 | func TestPersist22C(t *testing.T) {
558 | 	servers := 5
559 | 	cfg := makeConfig(t, servers, false)
560 | 	defer cfg.cleanup()
561 | 
562 | 	cfg.begin("Test (2C): Persist22C more persistence")
563 | 
564 | 	index := 1
565 | 	for iters := 0; iters < 5; iters++ {
566 | 		cfg.one(10+index, servers, true)
567 | 		index++
568 | 
569 | 		leader1 := cfg.checkOneLeader()
570 | 
571 | 		cfg.disconnect((leader1 + 1) % servers)
572 | 		cfg.disconnect((leader1 + 2) % servers)
573 | 
574 | 		cfg.one(10+index, servers-2, true)
575 | 		index++
576 | 
577 | 		cfg.disconnect((leader1 + 0) % servers)
578 | 		cfg.disconnect((leader1 + 3) % servers)
579 | 		cfg.disconnect((leader1 + 4) % servers)
580 | 
581 | 		cfg.start1((leader1 + 1) % servers)
582 | 		cfg.start1((leader1 + 2) % servers)
583 | 		cfg.connect((leader1 + 1) % servers)
584 | 		cfg.connect((leader1 + 2) % servers)
585 | 
586 | 		time.Sleep(RaftElectionTimeout)
587 | 
588 | 		cfg.start1((leader1 + 3) % servers)
589 | 		cfg.connect((leader1 + 3) % servers)
590 | 
591 | 		cfg.one(10+index, servers-2, true)
592 | 		index++
593 | 
594 | 		cfg.connect((leader1 + 4) % servers)
595 | 		cfg.connect((leader1 + 0) % servers)
596 | 	}
597 | 
598 | 	cfg.one(1000, servers, true)
599 | 
600 | 	cfg.end()
601 | }
602 | 
603 | func TestPersist32C(t *testing.T) {
604 | 	servers := 3
605 | 	cfg := makeConfig(t, servers, false)
606 | 	defer cfg.cleanup()
607 | 
608 | 	cfg.begin("Test (2C): Persist32C partitioned leader and one follower crash, leader restarts")
609 | 
610 | 	cfg.one(101, 3, true)
611 | 
612 | 	leader := cfg.checkOneLeader()
613 | 	cfg.disconnect((leader + 2) % servers)
614 | 
615 | 	cfg.one(102, 2, true)
616 | 
617 | 	cfg.crash1((leader + 0) % servers)
618 | 	cfg.crash1((leader + 1) % servers)
619 | 	cfg.connect((leader + 2) % servers)
620 | 	cfg.start1((leader + 0) % servers)
621 | 	cfg.connect((leader + 0) % servers)
622 | 
623 | 	cfg.one(103, 2, true)
624 | 
625 | 	cfg.start1((leader + 1) % servers)
626 | 	cfg.connect((leader + 1) % servers)
627 | 
628 | 	cfg.one(104, servers, true)
629 | 
630 | 	cfg.end()
631 | }
632 | 
633 | //
634 | // Test the scenarios described in Figure 8 of the extended Raft paper. Each
635 | // iteration asks a leader, if there is one, to insert a command in the Raft
636 | // log.  If there is a leader, that leader will fail quickly with a high
637 | // probability (perhaps without committing the command), or crash after a while
638 | // with low probability (most likely committing the command).  If the number of
639 | // alive servers isn't enough to form a majority, perhaps start a new server.
640 | // The leader in a new term may try to finish replicating log entries that
641 | // haven't been committed yet.
642 | //
643 | func TestFigure82C(t *testing.T) {
644 | 	servers := 5
645 | 	cfg := makeConfig(t, servers, false)
646 | 	defer cfg.cleanup()
647 | 
648 | 	cfg.begin("Test (2C): Figure82C Figure 8")
649 | 
650 | 	cfg.one(rand.Int(), 1, true)
651 | 
652 | 	nup := servers
653 | 	for iters := 0; iters < 1000; iters++ {
654 | 		leader := -1
655 | 		for i := 0; i < servers; i++ {
656 | 			if cfg.rafts[i] != nil {
657 | 				_, _, ok := cfg.rafts[i].Start(rand.Int())
658 | 				if ok {
659 | 					leader = i
660 | 				}
661 | 			}
662 | 		}
663 | 
664 | 		if (rand.Int() % 1000) < 100 {
665 | 			ms := rand.Int63() % (int64(RaftElectionTimeout/time.Millisecond) / 2)
666 | 			time.Sleep(time.Duration(ms) * time.Millisecond)
667 | 		} else {
668 | 			ms := (rand.Int63() % 13)
669 | 			time.Sleep(time.Duration(ms) * time.Millisecond)
670 | 		}
671 | 
672 | 		if leader != -1 {
673 | 			cfg.crash1(leader)
674 | 			nup--
675 | 		}
676 | 
677 | 		if nup < 3 {
678 | 			s := rand.Int() % servers
679 | 			if cfg.rafts[s] == nil {
680 | 				cfg.start1(s)
681 | 				cfg.connect(s)
682 | 				nup++
683 | 			}
684 | 		}
685 | 	}
686 | 
687 | 	for i := 0; i < servers; i++ {
688 | 		if cfg.rafts[i] == nil {
689 | 			cfg.start1(i)
690 | 			cfg.connect(i)
691 | 		}
692 | 	}
693 | 
694 | 	cfg.one(rand.Int(), servers, true)
695 | 
696 | 	cfg.end()
697 | }
698 | 
699 | func TestUnreliableAgree2C(t *testing.T) {
700 | 	servers := 5
701 | 	cfg := makeConfig(t, servers, true)
702 | 	defer cfg.cleanup()
703 | 
704 | 	cfg.begin("Test (2C): UnreliableAgree2C unreliable agreement")
705 | 
706 | 	var wg sync.WaitGroup
707 | 
708 | 	for iters := 1; iters < 50; iters++ {
709 | 		for j := 0; j < 4; j++ {
710 | 			wg.Add(1)
711 | 			go func(iters, j int) {
712 | 				defer wg.Done()
713 | 				cfg.one((100*iters)+j, 1, true)
714 | 			}(iters, j)
715 | 		}
716 | 		cfg.one(iters, 1, true)
717 | 	}
718 | 
719 | 	cfg.setunreliable(false)
720 | 
721 | 	wg.Wait()
722 | 
723 | 	cfg.one(100, servers, true)
724 | 
725 | 	cfg.end()
726 | }
727 | 
728 | func TestFigure8Unreliable2C(t *testing.T) {
729 | 	servers := 5
730 | 	cfg := makeConfig(t, servers, true)
731 | 	defer cfg.cleanup()
732 | 
733 | 	cfg.begin("Test (2C): Figure8Unreliable2C Figure 8 (unreliable)")
734 | 
735 | 	cfg.one(rand.Int()%10000, 1, true)
736 | 
737 | 	nup := servers
738 | 	for iters := 0; iters < 1000; iters++ {
739 | 		if iters == 200 {
740 | 			cfg.setlongreordering(true)
741 | 		}
742 | 		leader := -1
743 | 		for i := 0; i < servers; i++ {
744 | 			_, _, ok := cfg.rafts[i].Start(rand.Int() % 10000)
745 | 			if ok && cfg.connected[i] {
746 | 				leader = i
747 | 			}
748 | 		}
749 | 
750 | 		if (rand.Int() % 1000) < 100 {
751 | 			ms := rand.Int63() % (int64(RaftElectionTimeout/time.Millisecond) / 2)
752 | 			time.Sleep(time.Duration(ms) * time.Millisecond)
753 | 		} else {
754 | 			ms := (rand.Int63() % 13)
755 | 			time.Sleep(time.Duration(ms) * time.Millisecond)
756 | 		}
757 | 
758 | 		if leader != -1 && (rand.Int()%1000) < int(RaftElectionTimeout/time.Millisecond)/2 {
759 | 			cfg.disconnect(leader)
760 | 			nup--
761 | 		}
762 | 
763 | 		if nup < 3 {
764 | 			s := rand.Int() % servers
765 | 			if cfg.connected[s] == false {
766 | 				cfg.connect(s)
767 | 				nup++
768 | 			}
769 | 		}
770 | 	}
771 | 
772 | 	for i := 0; i < servers; i++ {
773 | 		if cfg.connected[i] == false {
774 | 			cfg.connect(i)
775 | 		}
776 | 	}
777 | 
778 | 	cfg.one(rand.Int()%10000, servers, true)
779 | 
780 | 	cfg.end()
781 | }
782 | 
783 | func internalChurn(t *testing.T, unreliable bool) {
784 | 
785 | 	servers := 5
786 | 	cfg := makeConfig(t, servers, unreliable)
787 | 	defer cfg.cleanup()
788 | 
789 | 	if unreliable {
790 | 		cfg.begin("Test (2C): unreliable churn")
791 | 	} else {
792 | 		cfg.begin("Test (2C): churn")
793 | 	}
794 | 
795 | 	stop := int32(0)
796 | 
797 | 	// create concurrent clients
798 | 	cfn := func(me int, ch chan []int) {
799 | 		var ret []int
800 | 		ret = nil
801 | 		defer func() { ch <- ret }()
802 | 		values := []int{}
803 | 		for atomic.LoadInt32(&stop) == 0 {
804 | 			x := rand.Int()
805 | 			index := -1
806 | 			ok := false
807 | 			for i := 0; i < servers; i++ {
808 | 				// try them all, maybe one of them is a leader
809 | 				cfg.mu.Lock()
810 | 				rf := cfg.rafts[i]
811 | 				cfg.mu.Unlock()
812 | 				if rf != nil {
813 | 					index1, _, ok1 := rf.Start(x)
814 | 					if ok1 {
815 | 						ok = ok1
816 | 						index = index1
817 | 					}
818 | 				}
819 | 			}
820 | 			if ok {
821 | 				// maybe leader will commit our value, maybe not.
822 | 				// but don't wait forever.
823 | 				for _, to := range []int{10, 20, 50, 100, 200} {
824 | 					nd, cmd := cfg.nCommitted(index)
825 | 					if nd > 0 {
826 | 						if xx, ok := cmd.(int); ok {
827 | 							if xx == x {
828 | 								values = append(values, x)
829 | 							}
830 | 						} else {
831 | 							cfg.t.Fatalf("wrong command type")
832 | 						}
833 | 						break
834 | 					}
835 | 					time.Sleep(time.Duration(to) * time.Millisecond)
836 | 				}
837 | 			} else {
838 | 				time.Sleep(time.Duration(79+me*17) * time.Millisecond)
839 | 			}
840 | 		}
841 | 		ret = values
842 | 	}
843 | 
844 | 	ncli := 3
845 | 	cha := []chan []int{}
846 | 	for i := 0; i < ncli; i++ {
847 | 		cha = append(cha, make(chan []int))
848 | 		go cfn(i, cha[i])
849 | 	}
850 | 
851 | 	for iters := 0; iters < 20; iters++ {
852 | 		if (rand.Int() % 1000) < 200 {
853 | 			i := rand.Int() % servers
854 | 			cfg.disconnect(i)
855 | 		}
856 | 
857 | 		if (rand.Int() % 1000) < 500 {
858 | 			i := rand.Int() % servers
859 | 			if cfg.rafts[i] == nil {
860 | 				cfg.start1(i)
861 | 			}
862 | 			cfg.connect(i)
863 | 		}
864 | 
865 | 		if (rand.Int() % 1000) < 200 {
866 | 			i := rand.Int() % servers
867 | 			if cfg.rafts[i] != nil {
868 | 				cfg.crash1(i)
869 | 			}
870 | 		}
871 | 
872 | 		// Make crash/restart infrequent enough that the peers can often
873 | 		// keep up, but not so infrequent that everything has settled
874 | 		// down from one change to the next. Pick a value smaller than
875 | 		// the election timeout, but not hugely smaller.
876 | 		time.Sleep((RaftElectionTimeout * 7) / 10)
877 | 	}
878 | 
879 | 	time.Sleep(RaftElectionTimeout)
880 | 	cfg.setunreliable(false)
881 | 	for i := 0; i < servers; i++ {
882 | 		if cfg.rafts[i] == nil {
883 | 			cfg.start1(i)
884 | 		}
885 | 		cfg.connect(i)
886 | 	}
887 | 
888 | 	atomic.StoreInt32(&stop, 1)
889 | 
890 | 	values := []int{}
891 | 	for i := 0; i < ncli; i++ {
892 | 		vv := <-cha[i]
893 | 		if vv == nil {
894 | 			t.Fatal("client failed")
895 | 		}
896 | 		values = append(values, vv...)
897 | 	}
898 | 
899 | 	time.Sleep(RaftElectionTimeout)
900 | 
901 | 	lastIndex := cfg.one(rand.Int(), servers, true)
902 | 
903 | 	really := make([]int, lastIndex+1)
904 | 	for index := 1; index <= lastIndex; index++ {
905 | 		v := cfg.wait(index, servers, -1)
906 | 		if vi, ok := v.(int); ok {
907 | 			really = append(really, vi)
908 | 		} else {
909 | 			t.Fatalf("not an int")
910 | 		}
911 | 	}
912 | 
913 | 	for _, v1 := range values {
914 | 		ok := false
915 | 		for _, v2 := range really {
916 | 			if v1 == v2 {
917 | 				ok = true
918 | 			}
919 | 		}
920 | 		if ok == false {
921 | 			cfg.t.Fatalf("didn't find a value")
922 | 		}
923 | 	}
924 | 
925 | 	cfg.end()
926 | }
927 | 
928 | func TestReliableChurn2C(t *testing.T) {
929 | 	internalChurn(t, false)
930 | }
931 | 
932 | func TestUnreliableChurn2C(t *testing.T) {
933 | 	internalChurn(t, true)
934 | }
935 | 


--------------------------------------------------------------------------------
/Raft/code/util.go:
--------------------------------------------------------------------------------
 1 | package raft
 2 | 
 3 | import "log"
 4 | 
 5 | // Debugging is
 6 | const Debugging = 0
 7 | 
 8 | // DPrintf is
 9 | func DPrintf(format string, a ...interface{}) {
10 | 	if Debugging > 0 {
11 | 		log.Printf(format, a...)
12 | 	}
13 | }
14 | 
15 | func min(a, b int) int {
16 | 	if a < b {
17 | 		return a
18 | 	}
19 | 	return b
20 | }
21 | 


--------------------------------------------------------------------------------
/Raft/raft-extended.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aQuaYi/Distributed-Algorithms/8c27e1220fb1c467da999f30244d40f520365522/Raft/raft-extended.pdf


--------------------------------------------------------------------------------
/test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e
 4 | echo "" > coverage.txt
 5 | 
 6 | for d in $(go list ./... | grep -v vendor); do
 7 |     echo $d
 8 |     go test -coverprofile=profile.out -covermode=atomic $d
 9 |     if [ -f profile.out ]; then
10 |         cat profile.out >> coverage.txt
11 |         rm profile.out
12 |     fi
13 | done


--------------------------------------------------------------------------------