├── .gitignore
├── .vscode
└── tasks.json
├── LICENSE
├── benchmark
├── binary-serialize
│ ├── binary-serialize.md
│ └── binary-serialize_test.go
├── generate
│ ├── generate-data.go
│ └── generate_test.go
├── multi-raft
│ ├── client
│ │ ├── client.go
│ │ └── main
│ │ │ └── main.go
│ ├── raft-server-benchmark.md
│ ├── raft_client
│ │ ├── main
│ │ │ └── main.go
│ │ └── raft_client.go
│ ├── raft_server
│ │ ├── main
│ │ │ └── main.go
│ │ └── raft_server.go
│ ├── server
│ │ ├── main
│ │ │ └── main.go
│ │ └── server.go
│ └── simple-server-benchmark.md
└── thrift-serialize
│ ├── thrift-serialize.md
│ └── thrift-serialize_test.go
├── config
└── config.go
├── experiment
├── ondisk
│ ├── db.go
│ ├── engine
│ │ ├── engine.go
│ │ └── mraft_router.go
│ ├── fsm.go
│ ├── main
│ │ └── app.go
│ ├── metrics.go
│ ├── ondisk.go
│ └── raftd
│ │ └── mraft.go
├── simpleondisk
│ ├── db.go
│ ├── fsm.go
│ ├── httpengine
│ │ └── engine.go
│ ├── main
│ │ └── main.go
│ ├── ondisk.go
│ └── test
│ │ └── test.go
└── store
│ ├── kv.go
│ └── kvstore.go
├── go.mod
├── go.sum
├── gossip
├── config.go
├── coordinate
│ ├── client.go
│ ├── client_test.go
│ ├── config.go
│ ├── coordinate.go
│ ├── coordinate_test.go
│ ├── performance_test.go
│ ├── phantom.go
│ └── util_test.go
├── delegate.go
├── event.go
├── gossip.go
├── gossip_test.go
├── message.go
└── ping_delegate.go
├── logger
└── zaplog.go
├── productready
├── README.md
├── config
│ └── config.go
├── engine.go
├── httpd
│ └── handle.go
├── ilogger
│ └── logger.go
├── main
│ └── app.go
├── router.go
├── storage
│ ├── command.go
│ ├── config.go
│ ├── del.go
│ ├── event.go
│ ├── get.go
│ ├── op.go
│ ├── put.go
│ ├── sm.go
│ ├── storage.go
│ └── store
│ │ ├── pebbledb.go
│ │ ├── store.go
│ │ └── utils.go
└── utils
│ └── utils.go
├── readme.md
└── test
├── metrics
└── main.go
├── serialize
└── serialize.go
└── test.go
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | # Created by https://www.gitignore.io/api/windows,osx,linux,code
3 |
4 | ### Code ###
5 | # Visual Studio Code - https://code.visualstudio.com/
6 | /vendor
7 | .settings/
8 | tsconfig.json
9 | jsconfig.json
10 | idl/
11 | rpc/
12 | productready/cmd/main/data
13 | productready/cmd/main/main
14 |
15 | ### Linux ###
16 | *~
17 |
18 | # temporary files which can be created if a process still has a handle open of a deleted file
19 | .fuse_hidden*
20 |
21 | # KDE directory preferences
22 | .directory
23 |
24 | # Linux trash folder which might appear on any partition or disk
25 | .Trash-*
26 |
27 | # .nfs files are created when an open file is removed but is still being accessed
28 | .nfs*
29 |
30 | ### OSX ###
31 | *.DS_Store
32 | .AppleDouble
33 | .LSOverride
34 |
35 | # Icon must end with two \r
36 | Icon
37 |
38 | # Thumbnails
39 | ._*
40 |
41 | # Files that might appear in the root of a volume
42 | .DocumentRevisions-V100
43 | .fseventsd
44 | .Spotlight-V100
45 | .TemporaryItems
46 | .Trashes
47 | .VolumeIcon.icns
48 | .com.apple.timemachine.donotpresent
49 |
50 | # Directories potentially created on remote AFP share
51 | .AppleDB
52 | .AppleDesktop
53 | Network Trash Folder
54 | Temporary Items
55 | .apdisk
56 |
57 | ### Windows ###
58 | # Windows thumbnail cache files
59 | Thumbs.db
60 | ehthumbs.db
61 | ehthumbs_vista.db
62 |
63 | # Folder config file
64 | Desktop.ini
65 |
66 | # Recycle Bin used on file shares
67 | $RECYCLE.BIN/
68 |
69 | # Windows Installer files
70 | *.cab
71 | *.msi
72 | *.msm
73 | *.msp
74 |
75 | # Windows shortcuts
76 | *.lnk
77 |
78 |
79 | # End of https://www.gitignore.io/api/windows,osx,linux,code
80 | mraft
81 | simpleondisk/test/test
82 |
--------------------------------------------------------------------------------
/.vscode/tasks.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": "2.0.0",
3 | "command": "go",
4 | "type": "shell",
5 | "presentation" : {
6 | "reveal": "always"
7 | },
8 | "options":{
9 | "cwd": "${fileDirname}"
10 | },
11 | "problemMatcher":[],
12 | "tasks": [
13 | {
14 | "label": "run",
15 | "options": {
16 | "env": {
17 | "CGO_CFLAGS": "-I/usr/local/include/rocksdb",
18 | "CGO_LDFLAGS":"-L/usr/local/lib -lrocksdb -lstdc++ -lm -lz -lbz2 -lsnappy -llz4",
19 | "http_proxy": "",
20 | "https_proxy": "",
21 | "all_proxy": ""
22 | }
23 | },
24 | "osx": {
25 | "args": [
26 | "run",
27 | "${workspaceRoot}/app.go"
28 | ]
29 | }
30 | },
31 | {
32 | "label": "build",
33 | "options": {
34 | "cwd": "${fileDirname}",
35 | "env": {
36 | "CGO_CFLAGS": "-I/usr/local/include/rocksdb",
37 | "CGO_LDFLAGS":"-L/usr/local/lib -lrocksdb -lstdc++ -lm -lz -lbz2 -lsnappy -llz4",
38 | "http_proxy": "",
39 | "https_proxy": "",
40 | "all_proxy": ""
41 | }
42 | },
43 | "args":[
44 | "build",
45 | "-v",
46 | //"-x",
47 | "."
48 | ]
49 | }
50 | ]
51 | }
--------------------------------------------------------------------------------
/benchmark/binary-serialize/binary-serialize.md:
--------------------------------------------------------------------------------
1 | go test -bench=. -benchmem
2 |
3 |
4 |
5 | ```
6 | goos: darwin
7 | goarch: amd64
8 | pkg: github.com/xkeyideal/mraft/benchmark/binary-serialize
9 | BenchmarkBinaryEncode-4 10000000 214 ns/op 149 B/op 3 allocs/op
10 | BenchmarkBinaryDecode-4 50000000 29.9 ns/op 8 B/op 1 allocs/op
11 | PASS
12 | ok github.com/xkeyideal/mraft/benchmark/binary-serialize 4.059s
13 | ```
--------------------------------------------------------------------------------
/benchmark/binary-serialize/binary-serialize_test.go:
--------------------------------------------------------------------------------
1 | package serialize
2 |
3 | import (
4 | "bytes"
5 | "encoding/binary"
6 | "errors"
7 | "io"
8 | "testing"
9 | )
10 |
11 | func encode(key, val []byte, w io.Writer) error {
12 | dataSize := make([]byte, 8)
13 | keySize := make([]byte, 8)
14 | valSize := make([]byte, 8)
15 |
16 | kl := len(key)
17 | vl := len(val)
18 |
19 | binary.LittleEndian.PutUint64(dataSize, uint64(kl+vl+8+8))
20 | if _, err := w.Write(dataSize); err != nil {
21 | return err
22 | }
23 |
24 | binary.LittleEndian.PutUint64(keySize, uint64(kl))
25 | if _, err := w.Write(keySize); err != nil {
26 | return err
27 | }
28 |
29 | if _, err := w.Write(key); err != nil {
30 | return err
31 | }
32 |
33 | binary.LittleEndian.PutUint64(valSize, uint64(vl))
34 | if _, err := w.Write(valSize); err != nil {
35 | return err
36 | }
37 |
38 | if _, err := w.Write(val); err != nil {
39 | return err
40 | }
41 |
42 | return nil
43 | }
44 |
45 | func decode(r io.Reader) ([]byte, []byte, error) {
46 | sz := make([]byte, 8)
47 | if _, err := io.ReadFull(r, sz); err != nil {
48 | return nil, nil, err
49 | }
50 | dataSize := binary.LittleEndian.Uint64(sz)
51 | data := make([]byte, dataSize)
52 | if _, err := io.ReadFull(r, data); err != nil {
53 | return nil, nil, err
54 | }
55 |
56 | kl := binary.LittleEndian.Uint64(data[:8])
57 | key := data[8 : kl+8]
58 | vl := binary.LittleEndian.Uint64(data[kl+8 : kl+16])
59 | val := data[kl+16:]
60 | if uint64(len(val)) != vl {
61 | return nil, nil, errors.New("size isn't equal")
62 | }
63 |
64 | return key, val, nil
65 | }
66 |
67 | func TestBinarySerialize(t *testing.T) {
68 | key := []byte("multi-raft-key")
69 | val := []byte("multi-raft-value")
70 |
71 | buf := &bytes.Buffer{}
72 | err := encode(key, val, buf)
73 | if err != nil {
74 | t.Fatalf("binary marshal fatal, %+v", err)
75 | return
76 | }
77 |
78 | key1, val1, err := decode(buf)
79 | if err != nil {
80 | t.Fatalf("binary unmarshal fatal, %+v", err)
81 | return
82 | }
83 |
84 | if !bytes.Equal(key1, key) {
85 | t.Fatalf("binary unmarshal expected %v, got %v", key, key1)
86 | return
87 | }
88 |
89 | if !bytes.Equal(val1, val) {
90 | t.Fatalf("binary unmarshal expected %v, got %v", val, val1)
91 | return
92 | }
93 | }
94 |
95 | func BenchmarkBinaryEncode(b *testing.B) {
96 | key := []byte("multi-raft-key")
97 | val := []byte("multi-raft-value")
98 |
99 | buf := &bytes.Buffer{}
100 | b.ResetTimer()
101 | for i := 0; i < b.N; i++ {
102 | encode(key, val, buf)
103 | }
104 | }
105 |
106 | func BenchmarkBinaryDecode(b *testing.B) {
107 | key := []byte("multi-raft-key")
108 | val := []byte("multi-raft-value")
109 |
110 | buf := &bytes.Buffer{}
111 | err := encode(key, val, buf)
112 | if err != nil {
113 | b.Fatalf("binary marshal fatal, %+v", err)
114 | return
115 | }
116 |
117 | b.ResetTimer()
118 | for i := 0; i < b.N; i++ {
119 | decode(buf)
120 | }
121 | }
122 |
--------------------------------------------------------------------------------
/benchmark/generate/generate-data.go:
--------------------------------------------------------------------------------
1 | package generate
2 |
3 | import (
4 | "crypto/rand"
5 | "io"
6 | mrand "math/rand"
7 | "time"
8 |
9 | "github.com/xkeyideal/mraft/experiment/store"
10 | )
11 |
12 | var idChars = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789")
13 |
14 | const idLen = 20
15 |
16 | func GenerateData() *store.RaftAttribute {
17 | mrand.Seed(time.Now().UnixNano())
18 |
19 | attr := &store.RaftAttribute{
20 | AttrID: uint64(mrand.Int31n(1000000) + 1000000),
21 | AttrName: randomId(),
22 | Ages: []int32{},
23 | Locations: make(map[string]string),
24 | Timestamp: time.Now().UnixNano(),
25 | }
26 |
27 | l := mrand.Intn(100) + 1
28 | for i := 0; i < l; i++ {
29 | attr.Ages = append(attr.Ages, mrand.Int31n(200)+1)
30 | }
31 |
32 | n := mrand.Intn(50) + 1
33 | for i := 0; i < n; i++ {
34 | attr.Locations[randomId()] = randomId()
35 | }
36 |
37 | return attr
38 | }
39 |
40 | // randomId returns a new random id string.
41 | func randomId() string {
42 | b := randomBytesMod(idLen, byte(len(idChars)))
43 | for i, c := range b {
44 | b[i] = idChars[c]
45 | }
46 | return string(b)
47 | }
48 |
49 | func randomBytes(length int) (b []byte) {
50 | b = make([]byte, length)
51 | io.ReadFull(rand.Reader, b)
52 | return
53 | }
54 |
55 | func randomBytesMod(length int, mod byte) (b []byte) {
56 | maxrb := 255 - byte(256%int(mod))
57 | b = make([]byte, length)
58 | i := 0
59 | for {
60 | r := randomBytes(length + (length / 4))
61 | for _, c := range r {
62 | if c > maxrb {
63 | continue
64 | }
65 | b[i] = c % mod
66 | i++
67 | if i == length {
68 | return b
69 | }
70 | }
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/benchmark/generate/generate_test.go:
--------------------------------------------------------------------------------
1 | package generate
2 |
3 | import "testing"
4 |
5 | func TestGenerateAttr(t *testing.T) {
6 | attr := GenerateData()
7 | t.Logf("%+v", attr)
8 | }
9 |
10 | func BenchmarkGenerateAttr(b *testing.B) {
11 | for i := 0; i < b.N; i++ {
12 | GenerateData()
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/benchmark/multi-raft/client/client.go:
--------------------------------------------------------------------------------
1 | package client
2 |
3 | import (
4 | "bufio"
5 | "encoding/binary"
6 | "fmt"
7 | "io"
8 | "net"
9 | "sync"
10 | "time"
11 |
12 | "github.com/xkeyideal/mraft/experiment/store"
13 | )
14 |
15 | const defaultBufferSize = 5 * 1024
16 |
17 | type SimpleClient struct {
18 | mu *sync.Mutex
19 | message chan *store.RaftAttribute
20 | conn net.Conn
21 | reader *bufio.Reader
22 | writer *bufio.Writer
23 |
24 | recv chan struct{}
25 | }
26 |
27 | func NewSimpleClient(address string, recv chan struct{}) (*SimpleClient, error) {
28 | conn, err := net.Dial("tcp", address)
29 | if err != nil {
30 | return nil, err
31 | }
32 |
33 | sc := &SimpleClient{
34 | mu: &sync.Mutex{},
35 | message: make(chan *store.RaftAttribute, 1000),
36 | conn: conn,
37 | writer: bufio.NewWriterSize(conn, defaultBufferSize),
38 | reader: bufio.NewReaderSize(conn, defaultBufferSize),
39 |
40 | recv: recv,
41 | }
42 |
43 | go sc.handleSend()
44 | go sc.handleRecv()
45 |
46 | return sc, nil
47 | }
48 |
49 | // Flush writes all buffered data to the underlying TCP connection
50 | func (sc *SimpleClient) Flush() error {
51 | return sc.writer.Flush()
52 | }
53 |
54 | func (sc *SimpleClient) Write(b []byte) (int, error) {
55 | return sc.writer.Write(b)
56 | }
57 |
58 | func (sc *SimpleClient) writeCommand(attr *store.RaftAttribute) error {
59 | sc.mu.Lock()
60 |
61 | _, err := attr.WriteTo2(sc)
62 | if err != nil {
63 | fmt.Println("Write failed,", err.Error())
64 | sc.mu.Unlock()
65 | return err
66 | }
67 |
68 | sc.conn.SetWriteDeadline(time.Now().Add(3 * time.Second))
69 |
70 | sc.Flush()
71 |
72 | sc.mu.Unlock()
73 |
74 | return nil
75 | }
76 |
77 | func (sc *SimpleClient) SendMessage(attr *store.RaftAttribute) {
78 | sc.message <- attr
79 | }
80 |
81 | func (sc *SimpleClient) handleSend() {
82 |
83 | for {
84 | select {
85 | case msg := <-sc.message:
86 | err := sc.writeCommand(msg)
87 | if err != nil {
88 | fmt.Println("Error to send message because of ", err.Error())
89 | break
90 | }
91 | }
92 | }
93 | }
94 |
95 | func (sc *SimpleClient) handleRecv() {
96 | var err error
97 | sz := make([]byte, 8)
98 |
99 | for {
100 | sc.conn.SetReadDeadline(time.Now().Add(3 * time.Second))
101 | _, err = sc.reader.Read(sz)
102 | fmt.Println("Read:", err)
103 | if err != nil {
104 | if err == io.EOF {
105 | err = nil
106 | } else {
107 | err = fmt.Errorf("failed to read datasize - %s", err)
108 | }
109 | break
110 | }
111 |
112 | dataSize := binary.LittleEndian.Uint64(sz)
113 | body := make([]byte, dataSize)
114 |
115 | _, err = io.ReadFull(sc.reader, body)
116 | if err != nil {
117 | err = fmt.Errorf("failed to read databody - %s", err)
118 | break
119 | }
120 |
121 | sc.recv <- struct{}{}
122 |
123 | // fmt.Println(string(body))
124 | }
125 | }
126 |
127 | func (sc *SimpleClient) Stop() {
128 | sc.conn.Close()
129 | }
130 |
--------------------------------------------------------------------------------
/benchmark/multi-raft/client/main/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "sync"
6 | "time"
7 |
8 | "github.com/xkeyideal/mraft/benchmark/generate"
9 | "github.com/xkeyideal/mraft/benchmark/multi-raft/client"
10 | "github.com/xkeyideal/mraft/experiment/store"
11 | )
12 |
13 | type TestClient struct {
14 | client *client.SimpleClient
15 | send chan *store.RaftAttribute
16 | recv chan struct{}
17 |
18 | exitchan chan struct{}
19 | wg sync.WaitGroup
20 | }
21 |
22 | func (tc *TestClient) Gen(n int) {
23 | for i := 0; i < n; i++ {
24 | attr := generate.GenerateData()
25 | tc.send <- attr
26 | }
27 | }
28 |
29 | func (tc *TestClient) Send() {
30 | for {
31 | select {
32 | case <-tc.exitchan:
33 | return
34 | case attr := <-tc.send:
35 | tc.client.SendMessage(attr)
36 | }
37 | }
38 | }
39 |
40 | func (tc *TestClient) Recv() {
41 | for {
42 | select {
43 | case <-tc.exitchan:
44 | return
45 | case <-tc.recv:
46 | tc.wg.Done()
47 | }
48 | }
49 | }
50 |
51 | func (tc *TestClient) Stop() {
52 | close(tc.exitchan)
53 | tc.client.Stop()
54 | }
55 |
56 | func makeClient(connections, taskNum int, wg *sync.WaitGroup) {
57 |
58 | cwg := &sync.WaitGroup{}
59 | cwg.Add(connections)
60 |
61 | for i := 0; i < connections; i++ {
62 | go func(cwg *sync.WaitGroup) {
63 | tc := &TestClient{
64 | send: make(chan *store.RaftAttribute, 1000),
65 | recv: make(chan struct{}, 1000),
66 | exitchan: make(chan struct{}),
67 | wg: sync.WaitGroup{},
68 | }
69 |
70 | client, err := client.NewSimpleClient("10.101.44.4:25701", tc.recv)
71 | if err != nil {
72 | panic(err)
73 | }
74 |
75 | tc.client = client
76 |
77 | tc.wg.Add(taskNum)
78 |
79 | go tc.Gen(taskNum)
80 | go tc.Send()
81 | go tc.Recv()
82 |
83 | tc.wg.Wait()
84 |
85 | cwg.Done()
86 | }(cwg)
87 | }
88 |
89 | cwg.Wait()
90 |
91 | wg.Done()
92 | }
93 |
94 | func main() {
95 | st := time.Now()
96 |
97 | g, c, n := 1, 1, 1
98 |
99 | wg := &sync.WaitGroup{}
100 |
101 | wg.Add(g)
102 |
103 | for i := 0; i < g; i++ {
104 | go makeClient(c, n, wg)
105 | }
106 |
107 | wg.Wait()
108 |
109 | ed := time.Now()
110 | op := float64(ed.UnixNano()-st.UnixNano()) / float64(n*1000)
111 |
112 | fmt.Printf("线程数:%d, 每个线程连接数:%d, 请求次数:%d, 平均耗时:%.1f us/op\n", g, c, n, op)
113 | }
114 |
--------------------------------------------------------------------------------
/benchmark/multi-raft/raft-server-benchmark.md:
--------------------------------------------------------------------------------
1 | ## multi-raft 压测结果
2 |
3 | ```json
4 | 总次数: 6000, 错误数: 0, 线程数: 6, 每个线程连接数: 1, 请求次数: 1000
5 | 最小值: 663us, 最大值: 203700us, 中间值: 6182.6us
6 | 75百分位: 3939.0us, 90百分位: 5404.0us, 95百分位: 11294.0us, 99百分位: 93239.6us
7 | ```
8 |
9 | ### 压测程序Server端
10 |
11 | [server端主程序](https://github.com/xkeyideal/mraft/blob/master/benchmark/multi-raft/raft_server/raft_server.go)注意修改ip地址
12 | [server端启动程序](https://github.com/xkeyideal/mraft/blob/master/benchmark/multi-raft/raft_server/main/main.go) 注意启动参数,同时注意修改raft的存储目录地址
13 |
14 | 第一个参数为raft的NodeID,第二个参数为应用程序的TCP端口号
15 |
16 | ```go
17 | go run main.go 10000 25700
18 | go run main.go 10001 25800
19 | go run main.go 10002 25900
20 | ```
21 |
22 | server端总计配置了10个cluster
23 |
24 | ### 压测程序Client端
25 |
26 | [client端主程序](https://github.com/xkeyideal/mraft/blob/master/benchmark/multi-raft/raft_client/raft_client.go)
27 | [client端启动程序](https://github.com/xkeyideal/mraft/blob/master/benchmark/multi-raft/raft_client/main/main.go) 注意ip地址和端口号要与server对应
28 |
29 | ### 压测环境
30 |
31 | 机器采用的是开发环境的机器,操作系统macOS High Sierra,Darwin Kernel Version 18.6.0 root:xnu-4903.261.4~2/RELEASE_X86_64 x86_64 i386 iMac14,2 Darwin
32 |
33 | CPU:3.29 GHz Intel Core i5
34 |
35 | 内存:20 GB 1600 MHz DDR3
36 |
37 | 磁盘:256GB SATA SSD
--------------------------------------------------------------------------------
/benchmark/multi-raft/raft_client/main/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "sync"
6 | "time"
7 |
8 | "github.com/xkeyideal/mraft/benchmark/generate"
9 | "github.com/xkeyideal/mraft/benchmark/multi-raft/raft_client"
10 | "github.com/xkeyideal/mraft/experiment/store"
11 |
12 | "github.com/rcrowley/go-metrics"
13 | )
14 |
15 | var histogram metrics.Histogram
16 | var counter metrics.Counter
17 |
18 | var servers = []string{"10.101.44.4:25700", "10.101.44.4:25800", "10.101.44.4:25900"}
19 |
20 | type TestClient struct {
21 | client *raft_client.RaftSimpleClient
22 | send chan *store.RaftAttribute
23 | query chan *store.ReadArgument
24 |
25 | exitchan chan struct{}
26 | wg sync.WaitGroup
27 | }
28 |
29 | func (tc *TestClient) Gen(n int) {
30 | for i := 0; i < n; i++ {
31 | attr := generate.GenerateData()
32 | tc.send <- attr
33 | }
34 | }
35 |
36 | func (tc *TestClient) SendWriteCommand() {
37 | for {
38 | select {
39 | case <-tc.exitchan:
40 | return
41 | case attr := <-tc.send:
42 | st := time.Now().UnixNano()
43 | _, err := tc.client.PublishCommand(attr)
44 | if err != nil {
45 | counter.Inc(1)
46 | }
47 |
48 | x := time.Now().UnixNano() - st
49 | histogram.Update(x)
50 |
51 | // n := rand.Intn(10)
52 | // fmt.Println("random_n:", n)
53 | // if n < 10 { // 30%入查询
54 | // tc.query <- &store.ReadArgument{
55 | // Key: fmt.Sprintf("%d_%s", attr.AttrID, attr.AttrName),
56 | // HashKey: attr.AttrID,
57 | // Sync: true,
58 | // }
59 | // } else {
60 | tc.wg.Done()
61 | // }
62 | }
63 | }
64 | }
65 |
66 | func (tc *TestClient) SendQueryCommand() {
67 | for {
68 | select {
69 | case <-tc.exitchan:
70 | return
71 | case arg := <-tc.query:
72 | attr, err := tc.client.PublishCommand(arg)
73 | fmt.Println(attr, err)
74 | tc.wg.Done()
75 | }
76 | }
77 | }
78 |
79 | func (tc *TestClient) Stop() {
80 | close(tc.exitchan)
81 | tc.client.Stop()
82 | }
83 |
84 | func makeClient(index, connections, taskNum int, wg *sync.WaitGroup) {
85 |
86 | cwg := &sync.WaitGroup{}
87 | cwg.Add(connections)
88 |
89 | for i := 0; i < connections; i++ {
90 | go func(index int, cwg *sync.WaitGroup) {
91 | tc := &TestClient{
92 | send: make(chan *store.RaftAttribute, 100),
93 | query: make(chan *store.ReadArgument, 30),
94 | exitchan: make(chan struct{}),
95 | wg: sync.WaitGroup{},
96 | }
97 |
98 | client, err := raft_client.NewRaftSimpleClient(servers[index%3])
99 | if err != nil {
100 | panic(err)
101 | }
102 |
103 | tc.client = client
104 |
105 | tc.wg.Add(taskNum)
106 |
107 | go tc.Gen(taskNum)
108 | go tc.SendWriteCommand()
109 | go tc.SendQueryCommand()
110 |
111 | tc.wg.Wait()
112 |
113 | cwg.Done()
114 | }(index, cwg)
115 | }
116 |
117 | cwg.Wait()
118 |
119 | wg.Done()
120 | }
121 |
122 | func main() {
123 | g, c, n := 6, 1, 1000
124 |
125 | s := metrics.NewExpDecaySample(10240, 0.015) // or metrics.NewUniformSample(1028)
126 | histogram = metrics.NewHistogram(s)
127 |
128 | counter = metrics.NewCounter()
129 |
130 | wg := &sync.WaitGroup{}
131 |
132 | wg.Add(g)
133 |
134 | for i := 0; i < g; i++ {
135 | go makeClient(i, c, n, wg)
136 | }
137 |
138 | wg.Wait()
139 |
140 | fmt.Printf("总次数: %d, 错误数: %d, 线程数: %d, 每个线程连接数: %d, 请求次数: %d\n", histogram.Count(), counter.Count(), g, c, n)
141 | fmt.Printf("最小值: %dus, 最大值: %dus, 中间值: %.1fus\n", histogram.Min()/1e3, histogram.Max()/1e3, histogram.Mean()/1e3)
142 | fmt.Printf("75百分位: %.1fus, 90百分位: %.1fus, 95百分位: %.1fus, 99百分位: %.1fus\n", histogram.Percentile(0.75)/1e3, histogram.Percentile(0.9)/1e3, histogram.Percentile(0.95)/1e3, histogram.Percentile(0.99)/1e3)
143 | }
144 |
--------------------------------------------------------------------------------
/benchmark/multi-raft/raft_client/raft_client.go:
--------------------------------------------------------------------------------
1 | package raft_client
2 |
3 | import (
4 | "bufio"
5 | "encoding/binary"
6 | "errors"
7 | "fmt"
8 | "io"
9 | "net"
10 | "sync"
11 | "time"
12 |
13 | "github.com/xkeyideal/mraft/experiment/store"
14 | )
15 |
16 | const defaultBufferSize = 5 * 1024
17 |
18 | var ErrStopped = errors.New("stopped")
19 |
20 | type ProducerTransaction struct {
21 | cmd interface{}
22 | doneChan chan *ProducerTransaction
23 | Resp *store.RaftAttribute
24 | Error error
25 | }
26 |
27 | type RaftSimpleClient struct {
28 | mu *sync.Mutex
29 |
30 | conn net.Conn
31 |
32 | reader *bufio.Reader
33 | writer *bufio.Writer
34 |
35 | responseChan chan []byte
36 |
37 | transactionChan chan *ProducerTransaction
38 | transactions []*ProducerTransaction
39 |
40 | exitChan chan struct{}
41 | }
42 |
43 | func NewRaftSimpleClient(address string) (*RaftSimpleClient, error) {
44 | conn, err := net.Dial("tcp", address)
45 | if err != nil {
46 | return nil, err
47 | }
48 |
49 | sc := &RaftSimpleClient{
50 | mu: &sync.Mutex{},
51 | conn: conn,
52 | writer: bufio.NewWriterSize(conn, defaultBufferSize),
53 | reader: bufio.NewReaderSize(conn, defaultBufferSize),
54 | responseChan: make(chan []byte, 10),
55 | transactionChan: make(chan *ProducerTransaction),
56 | exitChan: make(chan struct{}),
57 | }
58 |
59 | go sc.handleSend()
60 | go sc.handleRecv()
61 |
62 | return sc, nil
63 | }
64 |
65 | // Flush writes all buffered data to the underlying TCP connection
66 | func (sc *RaftSimpleClient) Flush() error {
67 | return sc.writer.Flush()
68 | }
69 |
70 | func (sc *RaftSimpleClient) Write(b []byte) (int, error) {
71 | return sc.writer.Write(b)
72 | }
73 |
74 | func (sc *RaftSimpleClient) readCommand(arg *store.ReadArgument) error {
75 | sc.mu.Lock()
76 |
77 | _, err := arg.WriteTo(store.CommandRead, sc)
78 | if err != nil {
79 | sc.mu.Unlock()
80 | return err
81 | }
82 |
83 | sc.conn.SetWriteDeadline(time.Now().Add(3 * time.Second))
84 |
85 | sc.Flush()
86 |
87 | sc.mu.Unlock()
88 |
89 | return nil
90 | }
91 |
92 | func (sc *RaftSimpleClient) writeCommand(attr *store.RaftAttribute) error {
93 | sc.mu.Lock()
94 |
95 | _, err := attr.WriteTo(store.CommandUpsert, sc)
96 | if err != nil {
97 | sc.mu.Unlock()
98 | return err
99 | }
100 |
101 | sc.conn.SetWriteDeadline(time.Now().Add(3 * time.Second))
102 |
103 | sc.Flush()
104 |
105 | sc.mu.Unlock()
106 |
107 | return nil
108 | }
109 |
110 | func (sc *RaftSimpleClient) PublishCommand(cmd interface{}) (*store.RaftAttribute, error) {
111 | doneChan := make(chan *ProducerTransaction)
112 | err := sc.sendCommandAsync(cmd, doneChan)
113 | if err != nil {
114 | close(doneChan)
115 | return nil, err
116 | }
117 |
118 | // 阻塞
119 | t := <-doneChan
120 | return t.Resp, t.Error
121 | }
122 |
123 | func (sc *RaftSimpleClient) sendCommandAsync(cmd interface{}, doneChan chan *ProducerTransaction) error {
124 | t := &ProducerTransaction{
125 | cmd: cmd,
126 | doneChan: doneChan,
127 | }
128 |
129 | select {
130 | case sc.transactionChan <- t:
131 | case <-sc.exitChan:
132 | return ErrStopped
133 | }
134 |
135 | return nil
136 | }
137 |
138 | func (sc *RaftSimpleClient) popTransaction(data []byte) {
139 | t := sc.transactions[0]
140 | sc.transactions = sc.transactions[1:]
141 |
142 | cmdSize := binary.LittleEndian.Uint32(data[:4])
143 | cmd := string(data[4 : 4+cmdSize])
144 | errSignal := string(data[4+cmdSize : 4+cmdSize+1])
145 | switch cmd {
146 | case store.CommandUpsert:
147 | if errSignal == "0" {
148 | t.Error = errors.New(string(data[4+cmdSize+1:]))
149 | }
150 | case store.CommandRead:
151 | if errSignal == "0" {
152 | t.Error = errors.New(string(data[4+cmdSize+1:]))
153 | } else {
154 | attr := &store.RaftAttribute{}
155 | t.Error = attr.Unmarshal(data[4+cmdSize+1:])
156 | t.Resp = attr
157 | }
158 | }
159 |
160 | t.doneChan <- t
161 | }
162 |
163 | func (sc *RaftSimpleClient) handleSend() {
164 | for {
165 | select {
166 | case t := <-sc.transactionChan:
167 | sc.transactions = append(sc.transactions, t)
168 | switch t.cmd.(type) {
169 | case *store.ReadArgument:
170 | sc.readCommand(t.cmd.(*store.ReadArgument))
171 | case *store.RaftAttribute:
172 | sc.writeCommand(t.cmd.(*store.RaftAttribute))
173 | }
174 | case data := <-sc.responseChan:
175 | sc.popTransaction(data)
176 | case <-sc.exitChan:
177 | return
178 | }
179 | }
180 | }
181 |
182 | func (sc *RaftSimpleClient) handleRecv() {
183 | var err error
184 | sz := make([]byte, 8)
185 |
186 | for {
187 | sc.conn.SetReadDeadline(time.Now().Add(6 * time.Second))
188 | _, err = sc.reader.Read(sz)
189 | if err != nil {
190 | if err == io.EOF {
191 | err = nil
192 | } else {
193 | err = fmt.Errorf("failed to read datasize - %s", err)
194 | }
195 | break
196 | }
197 |
198 | dataSize := binary.LittleEndian.Uint64(sz)
199 | body := make([]byte, dataSize)
200 |
201 | _, err = io.ReadFull(sc.reader, body)
202 | if err != nil {
203 | err = fmt.Errorf("failed to read databody - %s", err)
204 | break
205 | }
206 |
207 | sc.responseChan <- body
208 | }
209 | }
210 |
211 | func (sc *RaftSimpleClient) Stop() {
212 | close(sc.exitChan)
213 | sc.conn.Close()
214 | }
215 |
--------------------------------------------------------------------------------
/benchmark/multi-raft/raft_server/main/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "os"
6 | "os/signal"
7 | "strconv"
8 | "syscall"
9 |
10 | "github.com/xkeyideal/mraft/benchmark/multi-raft/raft_server"
11 | )
12 |
13 | func main() {
14 | nodeID, err := strconv.ParseUint(os.Args[1], 10, 64)
15 | if err != nil {
16 | fmt.Println(err.Error())
17 | os.Exit(1)
18 | }
19 |
20 | port := os.Args[2]
21 |
22 | // nodeID: 10000, 10001, 10002
23 | // port: 25700, 25800, 25900
24 | server, err := raft_server.NewRaftSimpleServer(fmt.Sprintf("10.101.44.4:%s", port), nodeID)
25 | if err != nil {
26 | panic(err)
27 | }
28 |
29 | signals := make(chan os.Signal, 1)
30 | signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM, syscall.SIGKILL)
31 | <-signals
32 |
33 | server.Stop()
34 | }
35 |
--------------------------------------------------------------------------------
/benchmark/multi-raft/raft_server/raft_server.go:
--------------------------------------------------------------------------------
1 | package raft_server
2 |
3 | import (
4 | "bufio"
5 | "encoding/binary"
6 | "fmt"
7 | "io"
8 | "net"
9 | "runtime"
10 | "strings"
11 | "sync"
12 | "time"
13 |
14 | "github.com/xkeyideal/mraft/experiment/ondisk"
15 | "github.com/xkeyideal/mraft/experiment/store"
16 | )
17 |
18 | const defaultBufferSize = 5 * 1024
19 |
20 | var (
21 | raftDataDir = "/Users/xkey/raftlab/mraft-server-ondisk"
22 | raftNodePeers = map[uint64]string{
23 | 10000: "10.101.44.4:54000",
24 | 10001: "10.101.44.4:54100",
25 | 10002: "10.101.44.4:54200",
26 | }
27 | // raftClusterIDs = []uint64{254000, 254100, 254200}
28 | )
29 |
30 | type clientConn struct {
31 | writeLock sync.RWMutex
32 | net.Conn
33 |
34 | Reader *bufio.Reader
35 | Writer *bufio.Writer
36 | }
37 |
38 | func newClientConn(conn net.Conn) *clientConn {
39 | return &clientConn{
40 | Conn: conn,
41 | Reader: bufio.NewReaderSize(conn, defaultBufferSize),
42 | Writer: bufio.NewWriterSize(conn, defaultBufferSize),
43 | }
44 | }
45 |
46 | type RaftSimpleServer struct {
47 | writelock *sync.Mutex
48 |
49 | nh *ondisk.OnDiskRaft
50 |
51 | tcpListener net.Listener
52 | }
53 |
54 | func NewRaftSimpleServer(address string, nodeID uint64) (*RaftSimpleServer, error) {
55 | l, err := net.Listen("tcp", address)
56 | if err != nil {
57 | return nil, err
58 | }
59 |
60 | raftClusterIDs := []uint64{}
61 | var clusterID uint64 = 250000
62 | var i uint64
63 | for i = 0; i < 10; i++ {
64 | raftClusterIDs = append(raftClusterIDs, clusterID+i)
65 | }
66 |
67 | ss := &RaftSimpleServer{
68 | writelock: &sync.Mutex{},
69 | nh: ondisk.NewOnDiskRaft(raftNodePeers, raftClusterIDs),
70 | tcpListener: l,
71 | }
72 |
73 | ss.nh.Start(raftDataDir, nodeID, "", false)
74 |
75 | go ss.RaftTCPServer()
76 |
77 | return ss, nil
78 | }
79 |
80 | func (ss *RaftSimpleServer) RaftTCPServer() error {
81 | for {
82 | conn, err := ss.tcpListener.Accept()
83 | if err != nil {
84 | if nerr, ok := err.(net.Error); ok && nerr.Temporary() {
85 | runtime.Gosched()
86 | continue
87 | }
88 | // theres no direct way to detect this error because it is not exposed
89 | if !strings.Contains(err.Error(), "use of closed network connection") {
90 | return fmt.Errorf("listener.Accept() error - %s", err)
91 | }
92 | break
93 | }
94 |
95 | go ss.handle(conn)
96 | }
97 |
98 | return nil
99 | }
100 |
101 | func (ss *RaftSimpleServer) handle(conn net.Conn) error {
102 |
103 | var err error
104 |
105 | client := newClientConn(conn)
106 |
107 | sz := make([]byte, 8)
108 |
109 | for {
110 | client.SetReadDeadline(time.Now().Add(3 * time.Second))
111 |
112 | _, err = client.Reader.Read(sz)
113 | if err != nil {
114 | if err == io.EOF {
115 | err = nil
116 | } else {
117 | err = fmt.Errorf("failed to read datasize - %s", err)
118 | }
119 | break
120 | }
121 |
122 | dataSize := binary.LittleEndian.Uint64(sz)
123 | body := make([]byte, dataSize)
124 |
125 | _, err = io.ReadFull(client.Reader, body)
126 | if err != nil {
127 | err = fmt.Errorf("failed to read databody - %s", err)
128 | break
129 | }
130 |
131 | cmdSize := binary.LittleEndian.Uint32(body[:4])
132 |
133 | command := string(body[4 : 4+cmdSize])
134 |
135 | data := body[4+cmdSize:]
136 |
137 | attr, err := ss.execCommand(command, data)
138 |
139 | ss.sendResponse(client, command, attr, err)
140 | }
141 |
142 | return err
143 | }
144 |
145 | func (ss *RaftSimpleServer) execCommand(command string, data []byte) (*store.RaftAttribute, error) {
146 | switch command {
147 | case store.CommandRead:
148 | arg := &store.ReadArgument{}
149 | err := arg.Unmarshal(data)
150 |
151 | if err != nil {
152 | return nil, err
153 | }
154 |
155 | if arg.Sync {
156 | return ss.nh.SyncRead(arg.Key, arg.HashKey)
157 | }
158 | return ss.nh.ReadLocal(arg.Key, arg.HashKey)
159 | case store.CommandUpsert:
160 | attr := &store.RaftAttribute{}
161 | err := attr.Unmarshal(data)
162 | if err != nil {
163 | return nil, err
164 | }
165 |
166 | cmd, _ := attr.GenerateCommand(store.CommandUpsert)
167 | return nil, ss.nh.AdvanceWrite(cmd)
168 | }
169 |
170 | return nil, nil
171 | }
172 |
173 | func (ss *RaftSimpleServer) sendResponse(client *clientConn, command string, attr *store.RaftAttribute, err error) (int, error) {
174 | client.writeLock.Lock()
175 |
176 | var e error
177 | var n int
178 |
179 | if err != nil {
180 | n, e = sendFramedResponse(client.Writer, command, []byte("0"), []byte(err.Error()))
181 | } else {
182 | b, _ := attr.Marshal()
183 | n, e = sendFramedResponse(client.Writer, command, []byte("1"), b)
184 | }
185 |
186 | e = client.SetWriteDeadline(time.Now().Add(3 * time.Second))
187 |
188 | e = client.Writer.Flush()
189 |
190 | client.writeLock.Unlock()
191 |
192 | return n, e
193 | }
194 |
195 | func (ss *RaftSimpleServer) Stop() {
196 | ss.tcpListener.Close()
197 | }
198 |
199 | func sendFramedResponse(w io.Writer, command string, errSignal, b []byte) (int, error) {
200 |
201 | dataSize := make([]byte, 8)
202 |
203 | l := len(b) + 4 + len(command) + 1
204 |
205 | binary.LittleEndian.PutUint64(dataSize, uint64(l))
206 | if _, err := w.Write(dataSize); err != nil {
207 | return 0, err
208 | }
209 |
210 | cmdSize := make([]byte, 4)
211 | binary.LittleEndian.PutUint32(cmdSize, uint32(len(command)))
212 | if _, err := w.Write(cmdSize); err != nil {
213 | return 0, err
214 | }
215 |
216 | if _, err := w.Write([]byte(command)); err != nil {
217 | return 0, err
218 | }
219 |
220 | if _, err := w.Write(errSignal); err != nil {
221 | return 0, err
222 | }
223 |
224 | if _, err := w.Write(b); err != nil {
225 | return 0, err
226 | }
227 |
228 | return l + 8, nil
229 | }
230 |
--------------------------------------------------------------------------------
/benchmark/multi-raft/server/main/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "os"
5 | "os/signal"
6 | "syscall"
7 |
8 | "github.com/xkeyideal/mraft/benchmark/multi-raft/server"
9 | )
10 |
11 | func main() {
12 | server, err := server.NewSimpleServer("10.101.44.4:25701")
13 | if err != nil {
14 | panic(err)
15 | }
16 |
17 | signals := make(chan os.Signal, 1)
18 | signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM, syscall.SIGKILL)
19 | <-signals
20 |
21 | server.Stop()
22 | }
23 |
--------------------------------------------------------------------------------
/benchmark/multi-raft/server/server.go:
--------------------------------------------------------------------------------
1 | package server
2 |
3 | import (
4 | "bufio"
5 | "bytes"
6 | "encoding/binary"
7 | "fmt"
8 | "io"
9 | "net"
10 | "runtime"
11 | "strings"
12 | "sync"
13 | "time"
14 |
15 | "github.com/xkeyideal/mraft/experiment/store"
16 | )
17 |
18 | const defaultBufferSize = 5 * 1024
19 |
20 | type SimpleServer struct {
21 | writelock *sync.Mutex
22 |
23 | tcpListener net.Listener
24 | }
25 |
26 | func NewSimpleServer(address string) (*SimpleServer, error) {
27 | l, err := net.Listen("tcp", address)
28 | if err != nil {
29 | return nil, err
30 | }
31 |
32 | ss := &SimpleServer{
33 | writelock: &sync.Mutex{},
34 | tcpListener: l,
35 | }
36 |
37 | go ss.TCPServer()
38 |
39 | return ss, nil
40 | }
41 |
42 | func (ss *SimpleServer) TCPServer() error {
43 | for {
44 | conn, err := ss.tcpListener.Accept()
45 | if err != nil {
46 | if nerr, ok := err.(net.Error); ok && nerr.Temporary() {
47 | runtime.Gosched()
48 | continue
49 | }
50 | // theres no direct way to detect this error because it is not exposed
51 | if !strings.Contains(err.Error(), "use of closed network connection") {
52 | return fmt.Errorf("listener.Accept() error - %s", err)
53 | }
54 | fmt.Println(err)
55 | break
56 | }
57 |
58 | go ss.handle(conn)
59 | }
60 |
61 | return nil
62 | }
63 |
64 | func (ss *SimpleServer) handle(conn net.Conn) error {
65 |
66 | var err error
67 |
68 | reader := bufio.NewReaderSize(conn, defaultBufferSize)
69 | writer := bufio.NewWriterSize(conn, defaultBufferSize)
70 |
71 | sz := make([]byte, 8)
72 |
73 | for {
74 | conn.SetReadDeadline(time.Now().Add(3 * time.Second))
75 |
76 | _, err = reader.Read(sz)
77 | if err != nil {
78 | if err == io.EOF {
79 | err = nil
80 | } else {
81 | err = fmt.Errorf("failed to read datasize - %s", err)
82 | }
83 | break
84 | }
85 |
86 | dataSize := binary.LittleEndian.Uint64(sz)
87 | body := make([]byte, dataSize)
88 |
89 | _, err = io.ReadFull(reader, body)
90 | if err != nil {
91 | err = fmt.Errorf("failed to read databody - %s", err)
92 | break
93 | }
94 |
95 | attr := &store.RaftAttribute{}
96 | err = attr.Unmarshal(body)
97 | if err != nil {
98 | err = fmt.Errorf("failed to unmarshal databody - %s", err)
99 | break
100 | }
101 |
102 | ss.writelock.Lock()
103 |
104 | wd, e := encode([]byte("done"))
105 | if e != nil {
106 | fmt.Println(e)
107 | }
108 |
109 | writer.Write(wd)
110 | //fmt.Println("send:", n, e)
111 |
112 | conn.SetWriteDeadline(time.Now().Add(3 * time.Second))
113 | writer.Flush()
114 |
115 | ss.writelock.Unlock()
116 | }
117 |
118 | return err
119 | }
120 |
121 | func (ss *SimpleServer) Stop() {
122 | ss.tcpListener.Close()
123 | }
124 |
125 | func encode(b []byte) ([]byte, error) {
126 | buf := &bytes.Buffer{}
127 |
128 | dataSize := make([]byte, 8)
129 |
130 | l := len(b)
131 |
132 | binary.LittleEndian.PutUint64(dataSize, uint64(l))
133 | if _, err := buf.Write(dataSize); err != nil {
134 | return nil, err
135 | }
136 |
137 | if _, err := buf.Write(b); err != nil {
138 | return nil, err
139 | }
140 |
141 | return buf.Bytes(), nil
142 | }
143 |
--------------------------------------------------------------------------------
/benchmark/multi-raft/simple-server-benchmark.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | TCPServer的压测是multi-group raft压测的前提, multi-group raft的网络协议和数据格式均与simple-server服务一致, 下述是simple-server的简单压测数据
4 |
5 | ```
6 | 线程数:5, 每个线程连接数:5, 请求次数:1000, 平均耗时:2062.8 us/op
7 | 线程数:1, 每个线程连接数:1, 请求次数:1000, 平均耗时:99.3 us/op
8 | 线程数:10, 每个线程连接数:1, 请求次数:1000, 平均耗时:812.8 us/op
9 | 线程数:10, 每个线程连接数:4, 请求次数:1000, 平均耗时:3441.0 us/op
10 | ```
--------------------------------------------------------------------------------
/benchmark/thrift-serialize/thrift-serialize.md:
--------------------------------------------------------------------------------
1 | go test -bench=. -benchmem
2 |
3 |
4 |
5 | ```
6 | goos: darwin
7 | goarch: amd64
8 | pkg: github.com/xkeyideal/mraft/benchmark/thrift-serialize
9 | BenchmarkMarshalByThrift-4 3000000 413 ns/op 208 B/op 6 allocs/op
10 | BenchmarkUnmarshalByThrift-4 3000000 418 ns/op 152 B/op 5 allocs/op
11 | PASS
12 | ok github.com/xkeyideal/mraft/benchmark/thrift-serialize 3.343s
13 | ```
--------------------------------------------------------------------------------
/benchmark/thrift-serialize/thrift-serialize_test.go:
--------------------------------------------------------------------------------
1 | package serialize
2 |
3 | import (
4 | "reflect"
5 | "testing"
6 |
7 | "github.com/xkeyideal/mraft/experiment/store"
8 | )
9 |
10 | func TestMarshalByThrift(t *testing.T) {
11 | cmd := store.NewCommand("command", "key", "value", 10000)
12 | b, err := cmd.Marshal()
13 | if err != nil {
14 | t.Fatalf("thrift marshal fatal, %+v", err)
15 | return
16 | }
17 |
18 | cmd2 := &store.Command{}
19 | err = cmd2.Unmarshal(b)
20 | if err != nil {
21 | t.Fatalf("thrift unmarshal fatal, %+v", err)
22 | return
23 | }
24 |
25 | if !reflect.DeepEqual(cmd, cmd2) {
26 | t.Fatalf("thrift unmarshal expected %v, got %v", cmd, cmd2)
27 | }
28 |
29 | t.Logf("%+v, %+v", cmd, cmd2)
30 | }
31 |
32 | func BenchmarkMarshalByThrift(b *testing.B) {
33 | cmd := store.NewCommand("command", "key", "value", 10000)
34 |
35 | for i := 0; i < b.N; i++ {
36 | cmd.Marshal()
37 | }
38 | }
39 |
40 | func BenchmarkUnmarshalByThrift(b *testing.B) {
41 | cmd := store.NewCommand("command", "key", "value", 10000)
42 | bs, err := cmd.Marshal()
43 | if err != nil {
44 | b.Fatalf("thrift marshal fatal, %+v", err)
45 | return
46 | }
47 |
48 | cmd2 := &store.Command{}
49 | b.ResetTimer()
50 | for i := 0; i < b.N; i++ {
51 | cmd2.Unmarshal(bs)
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/config/config.go:
--------------------------------------------------------------------------------
1 | package config
2 |
3 | type OnDiskRaftConfig struct {
4 | RaftDataDir string
5 |
6 | RaftNodePeers map[uint64]string
7 |
8 | RaftClusterIDs []uint64
9 | }
10 |
11 | func NewOnDiskRaftConfig() *OnDiskRaftConfig {
12 | return &OnDiskRaftConfig{
13 | RaftDataDir: "/Users/xkey/test/mraft-ondisk1",
14 | RaftNodePeers: map[uint64]string{
15 | 10000: "10.181.20.34:11000",
16 | 10001: "10.181.20.34:11100",
17 | 10002: "10.181.20.34:11200",
18 | //10004: "10.181.20.34:11400",
19 | },
20 | RaftClusterIDs: []uint64{14000, 14100, 14200},
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/experiment/ondisk/db.go:
--------------------------------------------------------------------------------
1 | package ondisk
2 |
3 | import (
4 | "bytes"
5 | "crypto/md5"
6 | "errors"
7 | "fmt"
8 | "io"
9 | "io/ioutil"
10 | "math/rand"
11 | "os"
12 | "path/filepath"
13 | "runtime"
14 | "time"
15 | )
16 |
17 | const (
18 | mraftDBDirName string = "/Users/xkey/raftlab/mraft-rocksdb"
19 | currentDBFilename string = "current"
20 | updatingDBFilename string = "current.updating"
21 | )
22 |
23 | func isNewRun(dir string) bool {
24 | fp := filepath.Join(dir, currentDBFilename)
25 | if _, err := os.Stat(fp); os.IsNotExist(err) {
26 | return true
27 | }
28 | return false
29 | }
30 |
31 | func getNodeDBDirName(clusterID uint64, nodeID uint64) string {
32 | return filepath.Join(mraftDBDirName, fmt.Sprintf("%d_%d", clusterID, nodeID))
33 | }
34 |
35 | func getNewRandomDBDirName(dir string) string {
36 | part := "%d_%d"
37 | rn := rand.Uint64()
38 | ct := time.Now().UnixNano()
39 | return filepath.Join(dir, fmt.Sprintf(part, rn, ct))
40 | }
41 |
42 | func createNodeDataDir(dir string) error {
43 | return os.MkdirAll(dir, 0755)
44 | }
45 |
46 | func getCurrentDBDirName(dir string) (string, error) {
47 | fp := filepath.Join(dir, currentDBFilename)
48 | f, err := os.OpenFile(fp, os.O_RDONLY, 0755)
49 | if err != nil {
50 | return "", err
51 | }
52 | defer f.Close()
53 |
54 | data, err := io.ReadAll(f)
55 | if err != nil {
56 | return "", err
57 | }
58 | if len(data) <= 8 {
59 | return "", errors.New("corrupted content")
60 | }
61 | crc := data[:8]
62 | content := data[8:]
63 | h := md5.New()
64 | if _, err := h.Write(content); err != nil {
65 | return "", err
66 | }
67 | if !bytes.Equal(crc, h.Sum(nil)[:8]) {
68 | return "", errors.New("corrupted content with not matched crc")
69 | }
70 | return string(content), nil
71 | }
72 |
73 | func cleanupNodeDataDir(dir string) error {
74 | os.RemoveAll(filepath.Join(dir, updatingDBFilename))
75 |
76 | dbdir, err := getCurrentDBDirName(dir)
77 | if err != nil {
78 | return err
79 | }
80 |
81 | files, err := ioutil.ReadDir(dir)
82 | if err != nil {
83 | return err
84 | }
85 |
86 | for _, fi := range files {
87 | if !fi.IsDir() {
88 | continue
89 | }
90 | //fmt.Printf("dbdir %s, fi.name %s, dir %s\n", dbdir, fi.Name(), dir)
91 | toDelete := filepath.Join(dir, fi.Name())
92 | if toDelete != dbdir {
93 | //fmt.Printf("removing %s\n", toDelete)
94 | if err := os.RemoveAll(toDelete); err != nil {
95 | return err
96 | }
97 | }
98 | }
99 |
100 | return nil
101 | }
102 |
103 | func replaceCurrentDBFile(dir string) error {
104 | fp := filepath.Join(dir, currentDBFilename)
105 | tmpFp := filepath.Join(dir, updatingDBFilename)
106 | if err := os.Rename(tmpFp, fp); err != nil {
107 | return err
108 | }
109 | return SyncDir(dir)
110 | }
111 |
112 | func saveCurrentDBDirName(dir string, dbdir string) error {
113 | h := md5.New()
114 | if _, err := h.Write([]byte(dbdir)); err != nil {
115 | return err
116 | }
117 |
118 | fp := filepath.Join(dir, updatingDBFilename)
119 | f, err := os.Create(fp)
120 | if err != nil {
121 | return err
122 | }
123 |
124 | defer func() {
125 | f.Close()
126 | SyncDir(dir)
127 | }()
128 |
129 | if _, err := f.Write(h.Sum(nil)[:8]); err != nil {
130 | return err
131 | }
132 | if _, err := f.Write([]byte(dbdir)); err != nil {
133 | return err
134 | }
135 |
136 | if err := f.Sync(); err != nil {
137 | return err
138 | }
139 |
140 | return nil
141 | }
142 |
143 | const (
144 | // DefaultFileMode is the default file mode for files generated by
145 | // Dragonboat.
146 | DefaultFileMode = 0640
147 | defaultDirFileMode = 0750
148 | deleteFilename = "DELETED.dragonboat"
149 | )
150 |
151 | // Exist returns whether the specified filesystem entry exists.
152 | func Exist(name string) (bool, error) {
153 | _, err := os.Stat(name)
154 | if err != nil && os.IsNotExist(err) {
155 | return false, nil
156 | }
157 | if err != nil {
158 | return false, err
159 | }
160 | return true, nil
161 | }
162 |
163 | // MkdirAll creates the specified dir along with any necessary parents.
164 | func MkdirAll(dir string) error {
165 | exist, err := Exist(dir)
166 | if err != nil {
167 | return err
168 | }
169 | if exist {
170 | return nil
171 | }
172 | parent := filepath.Dir(dir)
173 | exist, err = Exist(parent)
174 | if err != nil {
175 | return err
176 | }
177 | if !exist {
178 | if err := MkdirAll(parent); err != nil {
179 | return err
180 | }
181 | }
182 | return Mkdir(dir)
183 | }
184 |
185 | // Mkdir creates the specified dir.
186 | func Mkdir(dir string) error {
187 | if err := os.Mkdir(dir, defaultDirFileMode); err != nil {
188 | return err
189 | }
190 | return SyncDir(filepath.Dir(dir))
191 | }
192 |
193 | // SyncDir calls fsync on the specified directory.
194 | func SyncDir(dir string) (err error) {
195 | if runtime.GOOS == "windows" {
196 | return nil
197 | }
198 | fileInfo, err := os.Stat(dir)
199 | if err != nil {
200 | return err
201 | }
202 | if !fileInfo.IsDir() {
203 | panic("not a dir")
204 | }
205 | df, err := os.Open(filepath.Clean(dir))
206 | if err != nil {
207 | return err
208 | }
209 | defer func() {
210 | if cerr := df.Close(); err == nil {
211 | err = cerr
212 | }
213 | }()
214 | return df.Sync()
215 | }
216 |
--------------------------------------------------------------------------------
/experiment/ondisk/engine/engine.go:
--------------------------------------------------------------------------------
1 | package engine
2 |
3 | import (
4 | "context"
5 | "fmt"
6 | "log"
7 | "net/http"
8 | "time"
9 |
10 | "github.com/xkeyideal/mraft/config"
11 | "github.com/xkeyideal/mraft/experiment/ondisk"
12 | "github.com/xkeyideal/mraft/experiment/ondisk/raftd"
13 |
14 | "github.com/gin-gonic/gin"
15 | )
16 |
17 | type Engine struct {
18 | prefix string
19 |
20 | nodeID uint64
21 | raftDataDir string
22 |
23 | server *http.Server
24 | router *gin.Engine
25 |
26 | nh *ondisk.OnDiskRaft
27 |
28 | mraftHandle *raftd.MRaftHandle
29 | }
30 |
31 | func NewEngine(nodeID uint64, port string) *Engine {
32 |
33 | cfg := config.NewOnDiskRaftConfig()
34 |
35 | router := gin.New()
36 | router.Use(gin.Recovery())
37 |
38 | var nh *ondisk.OnDiskRaft
39 | if nodeID == 10003 || nodeID == 10004 || nodeID == 5 {
40 | nh = ondisk.NewOnDiskRaft(map[uint64]string{}, cfg.RaftClusterIDs)
41 | } else {
42 | nh = ondisk.NewOnDiskRaft(cfg.RaftNodePeers, cfg.RaftClusterIDs)
43 | }
44 |
45 | engine := &Engine{
46 | nodeID: nodeID,
47 | raftDataDir: cfg.RaftDataDir,
48 | prefix: "/mraft",
49 | router: router,
50 | server: &http.Server{
51 | Addr: fmt.Sprintf("0.0.0.0:%s", port), //"9080"
52 | Handler: router,
53 | ReadTimeout: 20 * time.Second,
54 | WriteTimeout: 40 * time.Second,
55 | },
56 | nh: nh,
57 | mraftHandle: raftd.NewMRaftHandle(nh),
58 | }
59 |
60 | engine.registerMraftRouter(router)
61 |
62 | return engine
63 | }
64 |
65 | func (engine *Engine) Start() {
66 | join := false
67 | nodeAddr := ""
68 | if engine.nodeID == 10003 {
69 | join = true
70 | nodeAddr = "10.181.20.34:11300"
71 | } else if engine.nodeID == 10004 {
72 | join = true
73 | nodeAddr = "10.181.20.34:11400"
74 | } else if engine.nodeID == 10005 {
75 | join = true
76 | nodeAddr = "10.181.20.34:11500"
77 | }
78 |
79 | engine.nh.Start(engine.raftDataDir, engine.nodeID, nodeAddr, join)
80 |
81 | // 等待raft集群ready
82 | for {
83 | if engine.nh.ClusterAllReady() {
84 | break
85 | }
86 | time.Sleep(2 * time.Second)
87 | }
88 |
89 | log.Println("cluster all ready")
90 |
91 | if err := engine.server.ListenAndServe(); err != nil {
92 | panic(err.Error())
93 | }
94 | }
95 |
96 | func (engine *Engine) Stop() {
97 | if engine.server != nil {
98 | if err := engine.server.Shutdown(context.Background()); err != nil {
99 | fmt.Println("Server Shutdown: ", err)
100 | }
101 | }
102 |
103 | engine.nh.Stop()
104 | }
105 |
--------------------------------------------------------------------------------
/experiment/ondisk/engine/mraft_router.go:
--------------------------------------------------------------------------------
1 | package engine
2 |
3 | import "github.com/gin-gonic/gin"
4 |
5 | func (engine *Engine) registerMraftRouter(router *gin.Engine) {
6 | group := router.Group(engine.prefix)
7 | {
8 | group.GET("/info", engine.mraftHandle.Info)
9 | group.GET("/metrics", engine.mraftHandle.RaftMetrics)
10 |
11 | group.GET("/key", engine.mraftHandle.Query)
12 | group.POST("/key", engine.mraftHandle.Upsert)
13 | group.DELETE("/key", engine.mraftHandle.Delete)
14 |
15 | group.GET("/join", engine.mraftHandle.JoinNode)
16 | group.GET("/del", engine.mraftHandle.DelNode)
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/experiment/ondisk/fsm.go:
--------------------------------------------------------------------------------
1 | package ondisk
2 |
3 | import (
4 | "encoding/binary"
5 | "errors"
6 | "fmt"
7 | "io"
8 | "os"
9 | "sync/atomic"
10 |
11 | "github.com/cockroachdb/pebble"
12 | "github.com/xkeyideal/mraft/experiment/store"
13 |
14 | sm "github.com/lni/dragonboat/v3/statemachine"
15 | )
16 |
17 | const (
18 | appliedIndexKey = "disk_kv_applied_index"
19 | endSignal = "mraft-end-signal"
20 | )
21 |
22 | type DiskKV struct {
23 | clusterID uint64
24 | nodeID uint64
25 |
26 | dbIndex uint32
27 | stores []*store.Store
28 | lastApplied uint64
29 | }
30 |
31 | func NewDiskKV(cluserID uint64, nodeID uint64) sm.IOnDiskStateMachine {
32 | return &DiskKV{
33 | clusterID: cluserID,
34 | nodeID: nodeID,
35 | stores: make([]*store.Store, 2),
36 | }
37 | }
38 |
39 | func (d *DiskKV) queryAppliedIndex() (uint64, error) {
40 | idx := atomic.LoadUint32(&d.dbIndex)
41 |
42 | return d.stores[idx].LookupAppliedIndex([]byte(appliedIndexKey))
43 | }
44 |
45 | func (d *DiskKV) Open(stopc <-chan struct{}) (uint64, error) {
46 | select {
47 | case <-stopc:
48 | return 0, sm.ErrOpenStopped
49 | default:
50 | dir := getNodeDBDirName(d.clusterID, d.nodeID)
51 | if err := createNodeDataDir(dir); err != nil {
52 | return 0, nil
53 | }
54 |
55 | var dbdir string
56 | if !isNewRun(dir) {
57 | if err := cleanupNodeDataDir(dir); err != nil {
58 | return 0, err
59 | }
60 | var err error
61 | dbdir, err = getCurrentDBDirName(dir)
62 | if err != nil {
63 | return 0, err
64 | }
65 | if _, err := os.Stat(dbdir); err != nil {
66 | if os.IsNotExist(err) {
67 | return 0, err
68 | }
69 | }
70 | } else {
71 | dbdir = getNewRandomDBDirName(dir)
72 | if err := saveCurrentDBDirName(dir, dbdir); err != nil {
73 | return 0, err
74 | }
75 | if err := replaceCurrentDBFile(dir); err != nil {
76 | return 0, err
77 | }
78 | }
79 |
80 | store, err := store.NewStore(dbdir)
81 | if err != nil {
82 | return 0, err
83 | }
84 |
85 | d.dbIndex = 0
86 |
87 | d.stores[d.dbIndex] = store
88 | appliedIndex, err := d.queryAppliedIndex()
89 | if err != nil {
90 | return 0, err
91 | }
92 |
93 | d.lastApplied = appliedIndex
94 |
95 | return appliedIndex, nil
96 | }
97 | }
98 |
99 | // Update 与 LookUp, SaveSnapshot的调用是并发安全的
100 | func (d *DiskKV) Update(ents []sm.Entry) ([]sm.Entry, error) {
101 |
102 | if len(ents) == 0 {
103 | return ents, nil
104 | }
105 |
106 | dbIndex := atomic.LoadUint32(&d.dbIndex)
107 | db := d.stores[dbIndex]
108 |
109 | batch := db.Batch()
110 | defer batch.Close()
111 |
112 | for index, entry := range ents {
113 | if entry.Index <= d.lastApplied {
114 | continue
115 | }
116 |
117 | cmd := &store.Command{}
118 | err := cmd.Unmarshal(entry.Cmd)
119 | if err != nil {
120 | continue
121 | }
122 |
123 | switch cmd.Cmd {
124 | case store.CommandDelete:
125 | batch.Delete([]byte(cmd.Key), db.GetWo())
126 | case store.CommandUpsert:
127 | batch.Set([]byte(cmd.Key), []byte(cmd.Val), db.GetWo())
128 | default:
129 | }
130 |
131 | ents[index].Result = sm.Result{Value: uint64(len(ents[index].Cmd))}
132 | }
133 |
134 | idx := fmt.Sprintf("%d", ents[len(ents)-1].Index)
135 | batch.Set([]byte(appliedIndexKey), []byte(idx), db.GetWo())
136 |
137 | if err := db.Write(batch); err != nil {
138 | return nil, err
139 | }
140 |
141 | d.lastApplied = ents[len(ents)-1].Index
142 |
143 | return ents, nil
144 | }
145 |
146 | // Lookup 与 Update and RecoverFromSnapshot 是并发安全的
147 | func (d *DiskKV) Lookup(key interface{}) (interface{}, error) {
148 | dbIndex := atomic.LoadUint32(&d.dbIndex)
149 | if d.stores[dbIndex] != nil {
150 | v, err := d.stores[dbIndex].Lookup(key.([]byte))
151 | return v, err
152 | }
153 | return nil, errors.New("db is nil")
154 | }
155 |
156 | func (d *DiskKV) NALookup(key []byte) ([]byte, error) {
157 | dbIndex := atomic.LoadUint32(&d.dbIndex)
158 | if d.stores[dbIndex] != nil {
159 | return d.stores[dbIndex].NALookup(key)
160 | }
161 | return nil, errors.New("db is nil")
162 | }
163 |
164 | type diskKVCtx struct {
165 | store *store.Store
166 | snapshot *pebble.Snapshot
167 | }
168 |
169 | func (d *DiskKV) PrepareSnapshot() (interface{}, error) {
170 | dbIndex := atomic.LoadUint32(&d.dbIndex)
171 | store := d.stores[dbIndex]
172 |
173 | return &diskKVCtx{
174 | store: store,
175 | snapshot: store.NewSnapshot(),
176 | }, nil
177 | }
178 |
179 | func (d *DiskKV) saveToWriter(store *store.Store, snapshot *pebble.Snapshot, w io.Writer) error {
180 | iter := snapshot.NewIter(store.GetRo())
181 | defer iter.Close()
182 |
183 | keySize := make([]byte, 4)
184 | valSize := make([]byte, 4)
185 | for iter.First(); iter.Valid(); iter.Next() {
186 | key := iter.Key()
187 | val := iter.Value()
188 |
189 | kl := len(key)
190 | vl := len(val)
191 |
192 | binary.LittleEndian.PutUint32(keySize, uint32(kl))
193 | if _, err := w.Write(keySize); err != nil {
194 | return err
195 | }
196 |
197 | if _, err := w.Write(key); err != nil {
198 | return err
199 | }
200 |
201 | binary.LittleEndian.PutUint32(valSize, uint32(vl))
202 | if _, err := w.Write(valSize); err != nil {
203 | return err
204 | }
205 |
206 | if _, err := w.Write(val); err != nil {
207 | return err
208 | }
209 | }
210 |
211 | return nil
212 | }
213 |
214 | func (d *DiskKV) SaveSnapshot(ctx interface{}, w io.Writer, done <-chan struct{}) error {
215 | select {
216 | case <-done:
217 | return sm.ErrSnapshotStopped
218 | default:
219 | ctxdata := ctx.(*diskKVCtx)
220 |
221 | store := ctxdata.store
222 | ss := ctxdata.snapshot
223 | defer ss.Close()
224 |
225 | return d.saveToWriter(store, ss, w)
226 | }
227 | }
228 |
229 | // RecoverFromSnapshot 执行时,sm 的其他接口不会被同时执行
230 | func (d *DiskKV) RecoverFromSnapshot(reader io.Reader, done <-chan struct{}) error {
231 | dir := getNodeDBDirName(d.clusterID, d.nodeID)
232 | dbdir := getNewRandomDBDirName(dir)
233 | oldDirName, err := getCurrentDBDirName(dir)
234 | if err != nil {
235 | return err
236 | }
237 |
238 | store, err := store.NewStore(dbdir)
239 | if err != nil {
240 | return err
241 | }
242 |
243 | sz := make([]byte, 4)
244 | for {
245 | if isStop(done) {
246 | return sm.ErrSnapshotStopped
247 | }
248 |
249 | // 先读key
250 | _, err := io.ReadFull(reader, sz) // key size
251 | if err == io.EOF {
252 | break
253 | }
254 |
255 | if err != nil {
256 | return err
257 | }
258 |
259 | toRead := binary.LittleEndian.Uint64(sz)
260 | kdata := make([]byte, toRead)
261 | _, err = io.ReadFull(reader, kdata) // key data
262 | if err == io.EOF {
263 | break
264 | }
265 | if err != nil {
266 | return err
267 | }
268 |
269 | // 再读val
270 | _, err = io.ReadFull(reader, sz) // val size
271 | if err == io.EOF {
272 | break
273 | }
274 | if err != nil {
275 | return err
276 | }
277 |
278 | toRead = binary.LittleEndian.Uint64(sz)
279 | vdata := make([]byte, toRead)
280 | _, err = io.ReadFull(reader, vdata) // val data
281 | if err == io.EOF {
282 | break
283 | }
284 | if err != nil {
285 | return err
286 | }
287 |
288 | store.SetKv(kdata, vdata)
289 | }
290 |
291 | store.Flush() // db 刷盘
292 |
293 | if err := saveCurrentDBDirName(dir, dbdir); err != nil {
294 | return err
295 | }
296 | if err := replaceCurrentDBFile(dir); err != nil {
297 | return err
298 | }
299 |
300 | oldDbIndex := atomic.LoadUint32(&d.dbIndex)
301 | newDbIndex := 1 - oldDbIndex
302 | atomic.StoreUint32(&d.dbIndex, newDbIndex)
303 | d.stores[newDbIndex] = store
304 |
305 | newLastApplied, err := d.queryAppliedIndex()
306 | if err != nil {
307 | return err
308 | }
309 |
310 | d.stores[oldDbIndex].Close()
311 |
312 | d.lastApplied = newLastApplied
313 |
314 | return os.RemoveAll(oldDirName)
315 | }
316 |
317 | func (d *DiskKV) Close() error {
318 | for i := 0; i < 2; i++ {
319 | if d.stores[i] != nil {
320 | d.stores[i].Close()
321 | }
322 | }
323 |
324 | return nil
325 | }
326 |
327 | func (d *DiskKV) Sync() error {
328 | return nil
329 | }
330 |
331 | func isStop(ch <-chan struct{}) bool {
332 | select {
333 | case <-ch:
334 | return true
335 | default:
336 | return false
337 | }
338 | }
339 |
--------------------------------------------------------------------------------
/experiment/ondisk/main/app.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "os"
6 | "os/signal"
7 | "strconv"
8 | "syscall"
9 |
10 | "github.com/xkeyideal/mraft/experiment/ondisk/engine"
11 | )
12 |
13 | // CGO_CFLAGS="-I/usr/local/include/rocksdb" CGO_LDFLAGS="-L/usr/local/lib -lrocksdb -lstdc++ -lm -lz -lbz2 -lsnappy -llz4" go run app.go 10000 9800
14 | func main() {
15 | if len(os.Args) <= 2 {
16 | fmt.Println("input arg $1 nodeID, arg $2 port")
17 | os.Exit(1)
18 | }
19 |
20 | nodeID, err := strconv.ParseUint(os.Args[1], 10, 64)
21 | if err != nil {
22 | fmt.Println(err.Error())
23 | os.Exit(1)
24 | }
25 |
26 | port := os.Args[2]
27 |
28 | engine := engine.NewEngine(nodeID, port)
29 |
30 | go engine.Start()
31 |
32 | signals := make(chan os.Signal, 1)
33 | signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM, syscall.SIGKILL)
34 | <-signals
35 |
36 | engine.Stop()
37 | }
38 |
--------------------------------------------------------------------------------
/experiment/ondisk/metrics.go:
--------------------------------------------------------------------------------
1 | package ondisk
2 |
3 | import "github.com/rcrowley/go-metrics"
4 |
5 | type ondiskMetrics struct {
6 | total metrics.Counter
7 | err metrics.Counter
8 | }
9 |
10 | func newOndiskMetrics() *ondiskMetrics {
11 | return &ondiskMetrics{
12 | total: metrics.NewCounter(),
13 | err: metrics.NewCounter(),
14 | }
15 | }
16 |
17 | func (m *ondiskMetrics) add(delta int64, err bool) {
18 | m.total.Inc(delta)
19 |
20 | if err {
21 | m.err.Inc(delta)
22 | }
23 | }
24 |
25 | func (m *ondiskMetrics) clear() {
26 | m.total.Clear()
27 | m.err.Clear()
28 | }
29 |
--------------------------------------------------------------------------------
/experiment/ondisk/raftd/mraft.go:
--------------------------------------------------------------------------------
1 | package raftd
2 |
3 | import (
4 | "encoding/json"
5 | "io/ioutil"
6 | "net/http"
7 | "strconv"
8 |
9 | "github.com/xkeyideal/mraft/experiment/ondisk"
10 | "github.com/xkeyideal/mraft/experiment/store"
11 |
12 | "github.com/gin-gonic/gin"
13 | )
14 |
15 | type MRaftHandle struct {
16 | raft *ondisk.OnDiskRaft
17 | }
18 |
19 | func NewMRaftHandle(raft *ondisk.OnDiskRaft) *MRaftHandle {
20 | return &MRaftHandle{
21 | raft: raft,
22 | }
23 | }
24 |
25 | func (mh *MRaftHandle) Info(c *gin.Context) {
26 | SetStrResp(http.StatusOK, 0, "", mh.raft.Info(), c)
27 | }
28 |
29 | func (mh *MRaftHandle) Query(c *gin.Context) {
30 | key := c.Query("key")
31 | sync := c.Query("sync")
32 | hashKey, err := strconv.ParseUint(c.Query("hashKey"), 10, 64)
33 | if err != nil {
34 | SetStrResp(http.StatusBadRequest, -1, err.Error(), "", c)
35 | return
36 | }
37 |
38 | if sync == "true" {
39 | val, err := mh.raft.SyncRead(key, hashKey)
40 | if err != nil {
41 | SetStrResp(http.StatusBadRequest, -1, err.Error(), "", c)
42 | return
43 | }
44 | SetStrResp(http.StatusOK, 0, "", val, c)
45 | return
46 | }
47 |
48 | val, err := mh.raft.ReadLocal(key, hashKey)
49 | if err != nil {
50 | SetStrResp(http.StatusBadRequest, -1, err.Error(), "", c)
51 | return
52 | }
53 |
54 | SetStrResp(http.StatusOK, 0, "", val, c)
55 | }
56 |
57 | func (mh *MRaftHandle) Upsert(c *gin.Context) {
58 | bytes, err := ioutil.ReadAll(c.Request.Body)
59 | if err != nil {
60 | SetStrResp(http.StatusBadRequest, -1, err.Error(), "", c)
61 | return
62 | }
63 |
64 | attr := &store.RaftAttribute{}
65 | err = json.Unmarshal(bytes, attr)
66 | if err != nil {
67 | SetStrResp(http.StatusBadRequest, -1, err.Error(), "", c)
68 | return
69 | }
70 |
71 | cmd, err := attr.GenerateCommand(store.CommandUpsert)
72 | if err != nil {
73 | SetStrResp(http.StatusBadRequest, -1, err.Error(), "", c)
74 | return
75 | }
76 |
77 | mh.raft.Write(cmd)
78 |
79 | SetStrResp(http.StatusOK, 0, "", "OK", c)
80 | }
81 |
82 | func (mh *MRaftHandle) Delete(c *gin.Context) {
83 | bytes, err := ioutil.ReadAll(c.Request.Body)
84 | if err != nil {
85 | SetStrResp(http.StatusBadRequest, -1, err.Error(), "", c)
86 | return
87 | }
88 |
89 | attr := &store.RaftAttribute{}
90 | err = json.Unmarshal(bytes, attr)
91 | if err != nil {
92 | SetStrResp(http.StatusBadRequest, -1, err.Error(), "", c)
93 | return
94 | }
95 |
96 | cmd, err := attr.GenerateCommand(store.CommandDelete)
97 | if err != nil {
98 | SetStrResp(http.StatusBadRequest, -1, err.Error(), "", c)
99 | return
100 | }
101 |
102 | mh.raft.Write(cmd)
103 |
104 | SetStrResp(http.StatusOK, 0, "", "OK", c)
105 | }
106 |
107 | func (mh *MRaftHandle) JoinNode(c *gin.Context) {
108 | nodeID, err := strconv.ParseUint(c.Query("nodeID"), 10, 64)
109 | if err != nil {
110 | SetStrResp(http.StatusBadRequest, -1, err.Error(), "", c)
111 | return
112 | }
113 |
114 | nodeAddr := c.Query("nodeAddr")
115 |
116 | err = mh.raft.RaftAddNode(nodeID, nodeAddr)
117 | if err != nil {
118 | SetStrResp(http.StatusBadRequest, -1, err.Error(), "", c)
119 | return
120 | }
121 | SetStrResp(http.StatusOK, 0, "", "OK", c)
122 | }
123 |
124 | func (mh *MRaftHandle) DelNode(c *gin.Context) {
125 | nodeID, err := strconv.ParseUint(c.Query("nodeID"), 10, 64)
126 | if err != nil {
127 | SetStrResp(http.StatusBadRequest, -1, err.Error(), "", c)
128 | return
129 | }
130 |
131 | err = mh.raft.RaftRemoveNode(nodeID)
132 | if err != nil {
133 | SetStrResp(http.StatusBadRequest, -1, err.Error(), "", c)
134 | return
135 | }
136 | SetStrResp(http.StatusOK, 0, "", "OK", c)
137 | }
138 |
139 | func (mh *MRaftHandle) RaftMetrics(c *gin.Context) {
140 | SetStrResp(http.StatusOK, 0, "", mh.raft.MetricsInfo(), c)
141 | }
142 |
143 | func SetStrResp(httpCode, code int, msg string, result interface{}, c *gin.Context) {
144 |
145 | m := msg
146 |
147 | if code == 0 {
148 | c.JSON(httpCode, gin.H{
149 | "code": code,
150 | "msg": m,
151 | "result": result,
152 | })
153 | } else {
154 | c.JSON(httpCode, gin.H{
155 | "code": code,
156 | "msg": m,
157 | })
158 | }
159 | }
160 |
--------------------------------------------------------------------------------
/experiment/simpleondisk/db.go:
--------------------------------------------------------------------------------
1 | package simpleondisk
2 |
3 | import (
4 | "bytes"
5 | "crypto/md5"
6 | "errors"
7 | "fmt"
8 | "io/ioutil"
9 | "math/rand"
10 | "os"
11 | "path/filepath"
12 | "runtime"
13 | "time"
14 | )
15 |
16 | const (
17 | mraftDBDirName string = "/Volumes/ST1000/mraft-simplerocksdb"
18 | currentDBFilename string = "current"
19 | updatingDBFilename string = "current.updating"
20 | )
21 |
22 | func isNewRun(dir string) bool {
23 | fp := filepath.Join(dir, currentDBFilename)
24 | if _, err := os.Stat(fp); os.IsNotExist(err) {
25 | return true
26 | }
27 | return false
28 | }
29 |
30 | func getNodeDBDirName(clusterID uint64, nodeID uint64) string {
31 | return filepath.Join(mraftDBDirName, fmt.Sprintf("%d_%d", clusterID, nodeID))
32 | }
33 |
34 | func getNewRandomDBDirName(dir string) string {
35 | part := "%d_%d"
36 | rn := rand.Uint64()
37 | ct := time.Now().UnixNano()
38 | return filepath.Join(dir, fmt.Sprintf(part, rn, ct))
39 | }
40 |
41 | func createNodeDataDir(dir string) error {
42 | return os.MkdirAll(dir, 0755)
43 | }
44 |
45 | func getCurrentDBDirName(dir string) (string, error) {
46 | fp := filepath.Join(dir, currentDBFilename)
47 | f, err := os.OpenFile(fp, os.O_RDONLY, 0755)
48 | if err != nil {
49 | return "", err
50 | }
51 | defer f.Close()
52 |
53 | data, err := ioutil.ReadAll(f)
54 | if err != nil {
55 | return "", err
56 | }
57 | if len(data) <= 8 {
58 | return "", errors.New("corrupted content")
59 | }
60 | crc := data[:8]
61 | content := data[8:]
62 | h := md5.New()
63 | if _, err := h.Write(content); err != nil {
64 | return "", err
65 | }
66 | if !bytes.Equal(crc, h.Sum(nil)[:8]) {
67 | return "", errors.New("corrupted content with not matched crc")
68 | }
69 | return string(content), nil
70 | }
71 |
72 | func cleanupNodeDataDir(dir string) error {
73 | os.RemoveAll(filepath.Join(dir, updatingDBFilename))
74 |
75 | dbdir, err := getCurrentDBDirName(dir)
76 | if err != nil {
77 | return err
78 | }
79 |
80 | files, err := ioutil.ReadDir(dir)
81 | if err != nil {
82 | return err
83 | }
84 |
85 | for _, fi := range files {
86 | if !fi.IsDir() {
87 | continue
88 | }
89 | //fmt.Printf("dbdir %s, fi.name %s, dir %s\n", dbdir, fi.Name(), dir)
90 | toDelete := filepath.Join(dir, fi.Name())
91 | if toDelete != dbdir {
92 | //fmt.Printf("removing %s\n", toDelete)
93 | if err := os.RemoveAll(toDelete); err != nil {
94 | return err
95 | }
96 | }
97 | }
98 |
99 | return nil
100 | }
101 |
102 | func replaceCurrentDBFile(dir string) error {
103 | fp := filepath.Join(dir, currentDBFilename)
104 | tmpFp := filepath.Join(dir, updatingDBFilename)
105 | if err := os.Rename(tmpFp, fp); err != nil {
106 | return err
107 | }
108 | return SyncDir(dir)
109 | }
110 |
111 | func saveCurrentDBDirName(dir string, dbdir string) error {
112 | h := md5.New()
113 | if _, err := h.Write([]byte(dbdir)); err != nil {
114 | return err
115 | }
116 |
117 | fp := filepath.Join(dir, updatingDBFilename)
118 | f, err := os.Create(fp)
119 | if err != nil {
120 | return err
121 | }
122 |
123 | defer func() {
124 | f.Close()
125 | SyncDir(dir)
126 | }()
127 |
128 | if _, err := f.Write(h.Sum(nil)[:8]); err != nil {
129 | return err
130 | }
131 | if _, err := f.Write([]byte(dbdir)); err != nil {
132 | return err
133 | }
134 |
135 | if err := f.Sync(); err != nil {
136 | return err
137 | }
138 |
139 | return nil
140 | }
141 |
142 | const (
143 | // DefaultFileMode is the default file mode for files generated by
144 | // Dragonboat.
145 | DefaultFileMode = 0640
146 | defaultDirFileMode = 0750
147 | deleteFilename = "DELETED.dragonboat"
148 | )
149 |
150 | // Exist returns whether the specified filesystem entry exists.
151 | func Exist(name string) (bool, error) {
152 | _, err := os.Stat(name)
153 | if err != nil && os.IsNotExist(err) {
154 | return false, nil
155 | }
156 | if err != nil {
157 | return false, err
158 | }
159 | return true, nil
160 | }
161 |
162 | // MkdirAll creates the specified dir along with any necessary parents.
163 | func MkdirAll(dir string) error {
164 | exist, err := Exist(dir)
165 | if err != nil {
166 | return err
167 | }
168 | if exist {
169 | return nil
170 | }
171 | parent := filepath.Dir(dir)
172 | exist, err = Exist(parent)
173 | if err != nil {
174 | return err
175 | }
176 | if !exist {
177 | if err := MkdirAll(parent); err != nil {
178 | return err
179 | }
180 | }
181 | return Mkdir(dir)
182 | }
183 |
184 | // Mkdir creates the specified dir.
185 | func Mkdir(dir string) error {
186 | if err := os.Mkdir(dir, defaultDirFileMode); err != nil {
187 | return err
188 | }
189 | return SyncDir(filepath.Dir(dir))
190 | }
191 |
192 | // SyncDir calls fsync on the specified directory.
193 | func SyncDir(dir string) (err error) {
194 | if runtime.GOOS == "windows" {
195 | return nil
196 | }
197 | fileInfo, err := os.Stat(dir)
198 | if err != nil {
199 | return err
200 | }
201 | if !fileInfo.IsDir() {
202 | panic("not a dir")
203 | }
204 | df, err := os.Open(filepath.Clean(dir))
205 | if err != nil {
206 | return err
207 | }
208 | defer func() {
209 | if cerr := df.Close(); err == nil {
210 | err = cerr
211 | }
212 | }()
213 | return df.Sync()
214 | }
215 |
--------------------------------------------------------------------------------
/experiment/simpleondisk/fsm.go:
--------------------------------------------------------------------------------
1 | package simpleondisk
2 |
3 | import (
4 | "encoding/binary"
5 | "encoding/json"
6 | "errors"
7 | "fmt"
8 | "io"
9 | "os"
10 | "strconv"
11 | "sync/atomic"
12 |
13 | "github.com/cockroachdb/pebble"
14 | "github.com/xkeyideal/mraft/experiment/store"
15 |
16 | sm "github.com/lni/dragonboat/v3/statemachine"
17 | )
18 |
19 | const (
20 | appliedIndexKey = "disk_kv_applied_index"
21 | endSignal = "mraft-end-signal"
22 | )
23 |
24 | type kv struct {
25 | Key string `json:"key"`
26 | Val int `json:"val"`
27 | }
28 |
29 | type SimpleDiskKV struct {
30 | clusterID uint64
31 | nodeID uint64
32 |
33 | dbIndex uint32
34 | stores []*store.Store
35 | lastApplied uint64
36 | }
37 |
38 | func NewSimpleDiskKV(cluserID uint64, nodeID uint64) sm.IOnDiskStateMachine {
39 | return &SimpleDiskKV{
40 | clusterID: cluserID,
41 | nodeID: nodeID,
42 | stores: make([]*store.Store, 2),
43 | }
44 | }
45 |
46 | func (d *SimpleDiskKV) queryAppliedIndex() (uint64, error) {
47 | idx := atomic.LoadUint32(&d.dbIndex)
48 |
49 | return d.stores[idx].LookupAppliedIndex([]byte(appliedIndexKey))
50 | }
51 |
52 | func (d *SimpleDiskKV) Open(stopc <-chan struct{}) (uint64, error) {
53 | select {
54 | case <-stopc:
55 | return 0, sm.ErrOpenStopped
56 | default:
57 | dir := getNodeDBDirName(d.clusterID, d.nodeID)
58 | if err := createNodeDataDir(dir); err != nil {
59 | return 0, nil
60 | }
61 |
62 | var dbdir string
63 | if !isNewRun(dir) {
64 | if err := cleanupNodeDataDir(dir); err != nil {
65 | return 0, err
66 | }
67 | var err error
68 | dbdir, err = getCurrentDBDirName(dir)
69 | if err != nil {
70 | return 0, err
71 | }
72 | if _, err := os.Stat(dbdir); err != nil {
73 | if os.IsNotExist(err) {
74 | return 0, err
75 | }
76 | }
77 | } else {
78 | dbdir = getNewRandomDBDirName(dir)
79 | if err := saveCurrentDBDirName(dir, dbdir); err != nil {
80 | return 0, err
81 | }
82 | if err := replaceCurrentDBFile(dir); err != nil {
83 | return 0, err
84 | }
85 | }
86 |
87 | store, err := store.NewStore(dbdir)
88 | if err != nil {
89 | return 0, err
90 | }
91 |
92 | d.dbIndex = 0
93 |
94 | d.stores[d.dbIndex] = store
95 | appliedIndex, err := d.queryAppliedIndex()
96 | if err != nil {
97 | return 0, err
98 | }
99 |
100 | d.lastApplied = appliedIndex
101 |
102 | return appliedIndex, nil
103 | }
104 | }
105 |
106 | // Update 与 LookUp, SaveSnapshot的调用是并发安全的
107 | func (d *SimpleDiskKV) Update(ents []sm.Entry) ([]sm.Entry, error) {
108 |
109 | fmt.Println("SimpleDiskKV Entry length: ", len(ents))
110 |
111 | if len(ents) == 0 {
112 | return ents, nil
113 | }
114 |
115 | dbIndex := atomic.LoadUint32(&d.dbIndex)
116 |
117 | for index, entry := range ents {
118 | if entry.Index <= d.lastApplied {
119 | continue
120 | }
121 |
122 | data := &kv{}
123 | json.Unmarshal(entry.Cmd, data)
124 |
125 | oldVal, err := d.NALookup([]byte(data.Key))
126 |
127 | if err != nil {
128 | d.stores[dbIndex].SetKv([]byte(data.Key), []byte(strconv.Itoa(data.Val)))
129 | } else {
130 | v, err := strconv.ParseInt(string(oldVal), 10, 32)
131 | if err != nil {
132 | fmt.Printf("%s ParseInt %s", string(oldVal), err.Error())
133 | continue
134 | }
135 |
136 | d.stores[dbIndex].SetKv([]byte(data.Key), []byte(strconv.Itoa(data.Val+int(v))))
137 | }
138 |
139 | ents[index].Result = sm.Result{Value: uint64(len(ents[index].Cmd))}
140 | }
141 |
142 | idx := fmt.Sprintf("%d", ents[len(ents)-1].Index)
143 | d.stores[dbIndex].SetKv([]byte(appliedIndexKey), []byte(idx))
144 |
145 | d.lastApplied = ents[len(ents)-1].Index
146 |
147 | return ents, nil
148 | }
149 |
150 | // Lookup 与 Update and RecoverFromSnapshot 是并发安全的
151 | func (d *SimpleDiskKV) Lookup(key interface{}) (interface{}, error) {
152 | dbIndex := atomic.LoadUint32(&d.dbIndex)
153 | if d.stores[dbIndex] != nil {
154 | v, err := d.stores[dbIndex].NALookup(key.([]byte))
155 | return v, err
156 | }
157 | return nil, errors.New("db is nil")
158 | }
159 |
160 | func (d *SimpleDiskKV) NALookup(key []byte) ([]byte, error) {
161 | dbIndex := atomic.LoadUint32(&d.dbIndex)
162 | if d.stores[dbIndex] != nil {
163 | return d.stores[dbIndex].NALookup(key)
164 | }
165 | return nil, errors.New("db is nil")
166 | }
167 |
168 | type diskKVCtx struct {
169 | store *store.Store
170 | snapshot *pebble.Snapshot
171 | }
172 |
173 | func (d *SimpleDiskKV) PrepareSnapshot() (interface{}, error) {
174 | dbIndex := atomic.LoadUint32(&d.dbIndex)
175 | store := d.stores[dbIndex]
176 |
177 | return &diskKVCtx{
178 | store: store,
179 | snapshot: store.NewSnapshot(),
180 | }, nil
181 | }
182 |
183 | func (d *SimpleDiskKV) saveToWriter(store *store.Store, snapshot *pebble.Snapshot, w io.Writer) error {
184 | iter := snapshot.NewIter(store.GetRo())
185 | defer iter.Close()
186 |
187 | keySize := make([]byte, 4)
188 | valSize := make([]byte, 4)
189 | for iter.First(); iter.Valid(); iter.Next() {
190 | key := iter.Key()
191 | val := iter.Value()
192 |
193 | kl := len(key)
194 | vl := len(val)
195 |
196 | binary.LittleEndian.PutUint32(keySize, uint32(kl))
197 | if _, err := w.Write(keySize); err != nil {
198 | return err
199 | }
200 |
201 | if _, err := w.Write(key); err != nil {
202 | return err
203 | }
204 |
205 | binary.LittleEndian.PutUint32(valSize, uint32(vl))
206 | if _, err := w.Write(valSize); err != nil {
207 | return err
208 | }
209 |
210 | if _, err := w.Write(val); err != nil {
211 | return err
212 | }
213 | }
214 |
215 | return nil
216 | }
217 |
218 | func (d *SimpleDiskKV) SaveSnapshot(ctx interface{}, w io.Writer, done <-chan struct{}) error {
219 | select {
220 | case <-done:
221 | return sm.ErrSnapshotStopped
222 | default:
223 | ctxdata := ctx.(*diskKVCtx)
224 |
225 | store := ctxdata.store
226 | ss := ctxdata.snapshot
227 |
228 | defer ss.Close()
229 |
230 | return d.saveToWriter(store, ss, w)
231 | }
232 | }
233 |
234 | // RecoverFromSnapshot 执行时,sm 的其他接口不会被同时执行
235 | func (d *SimpleDiskKV) RecoverFromSnapshot(reader io.Reader, done <-chan struct{}) error {
236 | dir := getNodeDBDirName(d.clusterID, d.nodeID)
237 | dbdir := getNewRandomDBDirName(dir)
238 | oldDirName, err := getCurrentDBDirName(dir)
239 | if err != nil {
240 | return err
241 | }
242 |
243 | store, err := store.NewStore(dbdir)
244 | if err != nil {
245 | return err
246 | }
247 |
248 | sz := make([]byte, 4)
249 | for {
250 | if isStop(done) {
251 | return sm.ErrSnapshotStopped
252 | }
253 |
254 | // 先读key
255 | _, err := io.ReadFull(reader, sz) // key size
256 | if err == io.EOF {
257 | break
258 | }
259 |
260 | if err != nil {
261 | return err
262 | }
263 |
264 | toRead := binary.LittleEndian.Uint64(sz)
265 | kdata := make([]byte, toRead)
266 | _, err = io.ReadFull(reader, kdata) // key data
267 | if err == io.EOF {
268 | break
269 | }
270 | if err != nil {
271 | return err
272 | }
273 |
274 | // 再读val
275 | _, err = io.ReadFull(reader, sz) // val size
276 | if err == io.EOF {
277 | break
278 | }
279 | if err != nil {
280 | return err
281 | }
282 |
283 | toRead = binary.LittleEndian.Uint64(sz)
284 | vdata := make([]byte, toRead)
285 | _, err = io.ReadFull(reader, vdata) // val data
286 | if err == io.EOF {
287 | break
288 | }
289 | if err != nil {
290 | return err
291 | }
292 |
293 | store.SetKv(kdata, vdata)
294 | }
295 |
296 | store.Flush() // db 刷盘
297 |
298 | if err := saveCurrentDBDirName(dir, dbdir); err != nil {
299 | return err
300 | }
301 | if err := replaceCurrentDBFile(dir); err != nil {
302 | return err
303 | }
304 |
305 | oldDbIndex := atomic.LoadUint32(&d.dbIndex)
306 | newDbIndex := 1 - oldDbIndex
307 | atomic.StoreUint32(&d.dbIndex, newDbIndex)
308 | d.stores[newDbIndex] = store
309 |
310 | newLastApplied, err := d.queryAppliedIndex()
311 | if err != nil {
312 | return err
313 | }
314 |
315 | d.stores[oldDbIndex].Close()
316 |
317 | d.lastApplied = newLastApplied
318 |
319 | return os.RemoveAll(oldDirName)
320 | }
321 |
322 | func (d *SimpleDiskKV) Close() error {
323 | for i := 0; i < 2; i++ {
324 | if d.stores[i] != nil {
325 | d.stores[i].Close()
326 | }
327 | }
328 |
329 | return nil
330 | }
331 |
332 | func (d *SimpleDiskKV) Sync() error {
333 | return nil
334 | }
335 |
336 | func isStop(ch <-chan struct{}) bool {
337 | select {
338 | case <-ch:
339 | return true
340 | default:
341 | return false
342 | }
343 | }
344 |
--------------------------------------------------------------------------------
/experiment/simpleondisk/httpengine/engine.go:
--------------------------------------------------------------------------------
1 | package httpengine
2 |
3 | import (
4 | "context"
5 | "fmt"
6 | "net/http"
7 | "strconv"
8 | "time"
9 |
10 | "github.com/gin-gonic/gin"
11 | "github.com/xkeyideal/mraft/experiment/simpleondisk"
12 | )
13 |
14 | var (
15 | RaftDataDir = "/Volumes/ST1000/mraft-simpleondisk"
16 | RaftNodePeers = map[uint64]string{
17 | 10000: "10.101.44.4:34000",
18 | 10001: "10.101.44.4:34100",
19 | 10002: "10.101.44.4:34200",
20 | }
21 | RaftClusterIDs = []uint64{234000, 234100, 234200}
22 | )
23 |
24 | type Engine struct {
25 | nodeID uint64
26 | raftDataDir string
27 |
28 | server *http.Server
29 | router *gin.Engine
30 |
31 | nh *simpleondisk.SimpleOnDiskRaft
32 | }
33 |
34 | func NewEngine(nodeID uint64, port string) *Engine {
35 |
36 | router := gin.New()
37 | router.Use(gin.Recovery())
38 |
39 | nh := simpleondisk.NewSimpleOnDiskRaft(RaftNodePeers, RaftClusterIDs)
40 |
41 | engine := &Engine{
42 | nodeID: nodeID,
43 | raftDataDir: RaftDataDir,
44 | router: router,
45 | server: &http.Server{
46 | Addr: fmt.Sprintf("0.0.0.0:%s", port), //"9080"
47 | Handler: router,
48 | ReadTimeout: 20 * time.Second,
49 | WriteTimeout: 40 * time.Second,
50 | },
51 | nh: nh,
52 | }
53 |
54 | engine.router.GET("/msimpleraft/key", engine.Query)
55 | engine.router.POST("/msimpleraft/key", engine.Upsert)
56 |
57 | return engine
58 | }
59 |
60 | func (engine *Engine) Start() {
61 |
62 | engine.nh.Start(engine.raftDataDir, engine.nodeID, "", false)
63 |
64 | if err := engine.server.ListenAndServe(); err != nil {
65 | panic(err.Error())
66 | }
67 | }
68 |
69 | func (engine *Engine) Stop() {
70 | if engine.server != nil {
71 | if err := engine.server.Shutdown(context.Background()); err != nil {
72 | fmt.Println("Server Shutdown: ", err)
73 | }
74 | }
75 |
76 | engine.nh.Stop()
77 | }
78 |
79 | func (engine *Engine) Query(c *gin.Context) {
80 | key := c.Query("key")
81 | hashKey, err := strconv.ParseUint(c.Query("hashKey"), 10, 64)
82 | if err != nil {
83 | SetStrResp(http.StatusBadRequest, -1, err.Error(), "", c)
84 | return
85 | }
86 |
87 | val, err := engine.nh.SyncRead(key, hashKey)
88 | if err != nil {
89 | SetStrResp(http.StatusBadRequest, -1, err.Error(), "", c)
90 | return
91 | }
92 | SetStrResp(http.StatusOK, 0, "", string(val), c)
93 | }
94 |
95 | func (engine *Engine) Upsert(c *gin.Context) {
96 | key := c.Query("key")
97 | hashKey, err := strconv.ParseUint(c.Query("hashKey"), 10, 64)
98 | if err != nil {
99 | SetStrResp(http.StatusBadRequest, -1, err.Error(), "", c)
100 | return
101 | }
102 |
103 | val, err := strconv.ParseUint(c.Query("val"), 10, 64)
104 | if err != nil {
105 | SetStrResp(http.StatusBadRequest, -1, err.Error(), "", c)
106 | return
107 | }
108 |
109 | engine.nh.Write(key, hashKey, int(val))
110 |
111 | SetStrResp(http.StatusOK, 0, "", "OK", c)
112 | }
113 |
114 | func SetStrResp(httpCode, code int, msg string, result interface{}, c *gin.Context) {
115 |
116 | m := msg
117 |
118 | if code == 0 {
119 | c.JSON(httpCode, gin.H{
120 | "code": code,
121 | "msg": m,
122 | "result": result,
123 | })
124 | } else {
125 | c.JSON(httpCode, gin.H{
126 | "code": code,
127 | "msg": m,
128 | })
129 | }
130 | }
131 |
--------------------------------------------------------------------------------
/experiment/simpleondisk/main/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "os"
6 | "os/signal"
7 | "strconv"
8 | "syscall"
9 |
10 | "github.com/xkeyideal/mraft/experiment/simpleondisk/httpengine"
11 | )
12 |
13 | // CGO_CFLAGS="-I/usr/local/include/rocksdb" CGO_LDFLAGS="-L/usr/local/lib -lrocksdb -lstdc++ -lm -lz -lbz2 -lsnappy -llz4" go run app.go 10000 9800
14 | func main() {
15 | if len(os.Args) <= 2 {
16 | fmt.Println("input arg $1 nodeID, arg $2 port")
17 | os.Exit(1)
18 | }
19 |
20 | nodeID, err := strconv.ParseUint(os.Args[1], 10, 64)
21 | if err != nil {
22 | fmt.Println(err.Error())
23 | os.Exit(1)
24 | }
25 |
26 | port := os.Args[2]
27 |
28 | engine := httpengine.NewEngine(nodeID, port)
29 |
30 | go engine.Start()
31 |
32 | signals := make(chan os.Signal, 1)
33 | signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM, syscall.SIGKILL)
34 | <-signals
35 |
36 | engine.Stop()
37 | }
38 |
--------------------------------------------------------------------------------
/experiment/simpleondisk/ondisk.go:
--------------------------------------------------------------------------------
1 | package simpleondisk
2 |
3 | import (
4 | "context"
5 | "encoding/json"
6 | "fmt"
7 | "path/filepath"
8 | "sync"
9 | "time"
10 |
11 | "github.com/lni/dragonboat/v3"
12 | "github.com/lni/dragonboat/v3/client"
13 | "github.com/lni/dragonboat/v3/config"
14 | "github.com/lni/dragonboat/v3/logger"
15 | )
16 |
17 | type SimpleOnDiskRaft struct {
18 | RaftNodePeers map[uint64]string // mraft节点地址
19 | RaftClusterIDs []uint64
20 |
21 | nodehost *dragonboat.NodeHost
22 | clusterSession map[uint64]*client.Session
23 | lock sync.RWMutex
24 | }
25 |
26 | func NewSimpleOnDiskRaft(peers map[uint64]string, clusterIDs []uint64) *SimpleOnDiskRaft {
27 |
28 | dr := &SimpleOnDiskRaft{
29 | RaftNodePeers: peers,
30 | RaftClusterIDs: clusterIDs,
31 | clusterSession: make(map[uint64]*client.Session),
32 | lock: sync.RWMutex{},
33 | }
34 |
35 | return dr
36 | }
37 |
38 | func (disk *SimpleOnDiskRaft) Start(raftDataDir string, nodeID uint64, nodeAddr string, join bool) error {
39 |
40 | datadir := filepath.Join(raftDataDir, fmt.Sprintf("node%d", nodeID))
41 |
42 | logger.GetLogger("raft").SetLevel(logger.ERROR)
43 | logger.GetLogger("rsm").SetLevel(logger.WARNING)
44 | logger.GetLogger("transport").SetLevel(logger.WARNING)
45 | logger.GetLogger("grpc").SetLevel(logger.WARNING)
46 | logger.GetLogger("dragonboat").SetLevel(logger.WARNING)
47 | logger.GetLogger("logdb").SetLevel(logger.WARNING)
48 |
49 | raftAddress := disk.RaftNodePeers[nodeID]
50 | peers := disk.RaftNodePeers
51 | if join {
52 | raftAddress = nodeAddr
53 | peers = make(map[uint64]string)
54 | }
55 |
56 | nhc := config.NodeHostConfig{
57 | DeploymentID: 20,
58 | WALDir: datadir,
59 | NodeHostDir: datadir,
60 | RTTMillisecond: 100,
61 | RaftAddress: raftAddress,
62 | }
63 |
64 | nh, err := dragonboat.NewNodeHost(nhc)
65 | if err != nil {
66 | return err
67 | }
68 |
69 | disk.nodehost = nh
70 |
71 | for _, clusterID := range disk.RaftClusterIDs {
72 | rc := config.Config{
73 | NodeID: nodeID,
74 | ClusterID: clusterID,
75 | ElectionRTT: 10,
76 | HeartbeatRTT: 1,
77 | CheckQuorum: true,
78 | SnapshotEntries: 1000,
79 | CompactionOverhead: 100,
80 | }
81 |
82 | if err := nh.StartOnDiskCluster(peers, join, NewSimpleDiskKV, rc); err != nil {
83 | panic(err)
84 | }
85 |
86 | disk.clusterSession[clusterID] = disk.nodehost.GetNoOPSession(clusterID)
87 | }
88 |
89 | return nil
90 | }
91 |
92 | func (disk *SimpleOnDiskRaft) Write(key string, hashKey uint64, value int) error {
93 | idx := hashKey % uint64(len(disk.RaftClusterIDs))
94 | clusterID := disk.RaftClusterIDs[idx]
95 | cs := disk.clusterSession[clusterID]
96 | ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
97 |
98 | d := kv{key, value}
99 | b, _ := json.Marshal(d)
100 |
101 | _, err := disk.nodehost.SyncPropose(ctx, cs, b)
102 |
103 | cancel()
104 | return err
105 | }
106 |
107 | // SyncRead 线性读
108 | func (disk *SimpleOnDiskRaft) SyncRead(key string, hashKey uint64) ([]byte, error) {
109 | idx := hashKey % uint64(len(disk.RaftClusterIDs))
110 | clusterID := disk.RaftClusterIDs[idx]
111 | ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
112 | result, err := disk.nodehost.SyncRead(ctx, clusterID, []byte(key))
113 | cancel()
114 |
115 | if err != nil {
116 | return nil, err
117 | }
118 |
119 | return result.([]byte), nil
120 | }
121 |
122 | // ReadLocal 读本地
123 | func (disk *SimpleOnDiskRaft) ReadLocal(key string, hashKey uint64) ([]byte, error) {
124 | idx := hashKey % uint64(len(disk.RaftClusterIDs))
125 | clusterID := disk.RaftClusterIDs[idx]
126 | result, err := disk.nodehost.StaleRead(clusterID, []byte(key))
127 |
128 | if err != nil {
129 | return nil, err
130 | }
131 |
132 | return result.([]byte), nil
133 | }
134 |
135 | func (disk *SimpleOnDiskRaft) Stop() {
136 | disk.nodehost.Stop()
137 |
138 | disk.clusterSession = make(map[uint64]*client.Session)
139 | }
140 |
141 | // Info 查询NodeHostInfo
142 | func (disk *SimpleOnDiskRaft) Info() *dragonboat.NodeHostInfo {
143 | return disk.nodehost.GetNodeHostInfo(dragonboat.NodeHostInfoOption{SkipLogInfo: false})
144 | }
145 |
--------------------------------------------------------------------------------
/experiment/simpleondisk/test/test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "strconv"
6 | "sync"
7 | "time"
8 |
9 | "github.com/xkeyideal/gokit/httpkit"
10 | )
11 |
12 | const (
13 | HttpConnTimeout = 3 * time.Second
14 | HttpReadWriteTimeout = 5 * time.Second
15 | HttpRetry = 0
16 | HttpRetryInterval = 2 * time.Second
17 | )
18 |
19 | func doHttp(addr string, val int, wg *sync.WaitGroup) {
20 |
21 | defer wg.Done()
22 |
23 | client := httpkit.NewHttpClient(HttpReadWriteTimeout, HttpRetry, HttpRetryInterval, HttpConnTimeout, nil)
24 |
25 | client = client.SetParam("key", "simple_4").SetParam("hashKey", "123").SetParam("val", strconv.Itoa(val))
26 |
27 | resp, err := client.Post(fmt.Sprintf("http://%s/msimpleraft/key", addr))
28 |
29 | if err != nil {
30 | fmt.Println(addr, err)
31 | return
32 | }
33 |
34 | if resp.StatusCode != 200 {
35 | fmt.Println(resp.StatusCode)
36 | }
37 | }
38 |
39 | func doQuery(addr string) {
40 | client := httpkit.NewHttpClient(HttpReadWriteTimeout, HttpRetry, HttpRetryInterval, HttpConnTimeout, nil)
41 |
42 | client = client.SetParam("key", "simple_4").SetParam("hashKey", "123")
43 |
44 | resp, err := client.Get(fmt.Sprintf("http://%s/msimpleraft/key", addr))
45 |
46 | if err != nil {
47 | fmt.Println(addr, err)
48 | return
49 | }
50 |
51 | if resp.StatusCode != 200 {
52 | fmt.Println(resp.StatusCode)
53 | return
54 | }
55 |
56 | fmt.Println(string(resp.Body))
57 | }
58 |
59 | func main() {
60 | addrs := []string{"10.101.44.4:10100", "10.101.44.4:10101", "10.101.44.4:10102"}
61 |
62 | n := 10
63 |
64 | wg := &sync.WaitGroup{}
65 | wg.Add(n)
66 |
67 | for i := 0; i < n; i++ {
68 | go doHttp(addrs[i%3], i, wg)
69 | }
70 |
71 | wg.Wait()
72 |
73 | doQuery(addrs[0])
74 | }
75 |
--------------------------------------------------------------------------------
/experiment/store/kv.go:
--------------------------------------------------------------------------------
1 | package store
2 |
3 | import (
4 | "encoding/binary"
5 | "fmt"
6 | "io"
7 |
8 | thrifter "github.com/thrift-iterator/go"
9 | )
10 |
11 | const (
12 | CommandRead = "read"
13 | CommandDelete = "delete"
14 | CommandUpsert = "upsert"
15 | )
16 |
17 | type Command struct {
18 | Cmd string `thrift:"Cmd,1"`
19 | HashKey uint64 `thrift:"HashKey,2"`
20 | Key string `thrift:"Key,3"`
21 | Val string `thrift:"Val,4"`
22 | }
23 |
24 | func NewCommand(cmd, key, val string, hashKey uint64) *Command {
25 | return &Command{
26 | Cmd: cmd,
27 | HashKey: hashKey,
28 | Key: key,
29 | Val: val,
30 | }
31 | }
32 |
33 | func (cmd *Command) Unmarshal(b []byte) error {
34 | return thrifter.Unmarshal(b, cmd)
35 | }
36 |
37 | func (cmd *Command) Marshal() ([]byte, error) {
38 | return thrifter.Marshal(cmd)
39 | }
40 |
41 | type RaftAttribute struct {
42 | AttrID uint64 `thrift:"AttrID,1" db:"AttrID" json:"AttrID"`
43 | AttrName string `thrift:"AttrName,2" db:"AttrName" json:"AttrName"`
44 | Ages []int32 `thrift:"Ages,3" db:"Ages" json:"Ages"`
45 | Locations map[string]string `thrift:"Locations,4" db:"Locations" json:"Locations"`
46 | Timestamp int64 `thrift:"Timestamp,5" db:"Timestamp" json:"Timestamp"`
47 | }
48 |
49 | func (attr *RaftAttribute) Marshal() ([]byte, error) {
50 | if attr == nil {
51 | return []byte{}, nil
52 | }
53 | return thrifter.Marshal(attr)
54 | }
55 |
56 | func (attr *RaftAttribute) GenerateCommand(cmd string) (*Command, error) {
57 | b, err := attr.Marshal()
58 | if err != nil {
59 | return nil, err
60 | }
61 |
62 | return NewCommand(cmd, fmt.Sprintf("%d_%s", attr.AttrID, attr.AttrName), string(b), attr.AttrID), nil
63 | }
64 |
65 | func (attr *RaftAttribute) Unmarshal(b []byte) error {
66 | return thrifter.Unmarshal(b, attr)
67 | }
68 |
69 | func (attr *RaftAttribute) WriteTo(command string, w io.Writer) (int64, error) {
70 | dataSize := make([]byte, 8)
71 |
72 | b, err := attr.Marshal()
73 | if err != nil {
74 | return 0, err
75 | }
76 |
77 | l := len(b) + 4 + len(command)
78 |
79 | binary.LittleEndian.PutUint64(dataSize, uint64(l))
80 | if _, err := w.Write(dataSize); err != nil {
81 | return 0, err
82 | }
83 |
84 | cmdSize := make([]byte, 4)
85 | binary.LittleEndian.PutUint32(cmdSize, uint32(len(command)))
86 | if _, err := w.Write(cmdSize); err != nil {
87 | return 0, err
88 | }
89 |
90 | if _, err := w.Write([]byte(command)); err != nil {
91 | return 0, err
92 | }
93 |
94 | if _, err := w.Write(b); err != nil {
95 | return 0, err
96 | }
97 |
98 | return int64(8 + l), nil
99 | }
100 |
101 | func (attr *RaftAttribute) WriteTo2(w io.Writer) (int64, error) {
102 | dataSize := make([]byte, 8)
103 |
104 | b, err := attr.Marshal()
105 | if err != nil {
106 | return 0, err
107 | }
108 |
109 | l := len(b)
110 |
111 | binary.LittleEndian.PutUint64(dataSize, uint64(l))
112 | if _, err := w.Write(dataSize); err != nil {
113 | return 0, err
114 | }
115 |
116 | if _, err := w.Write(b); err != nil {
117 | return 0, err
118 | }
119 |
120 | return int64(8 + l), nil
121 | }
122 |
123 | type ReadArgument struct {
124 | Key string `thrift:"Key,1"`
125 | HashKey uint64 `thrift:"HashKey,2"`
126 | Sync bool `thrift:"Sync,3"`
127 | }
128 |
129 | func (arg *ReadArgument) Marshal() ([]byte, error) {
130 | return thrifter.Marshal(arg)
131 | }
132 |
133 | func (arg *ReadArgument) Unmarshal(b []byte) error {
134 | return thrifter.Unmarshal(b, arg)
135 | }
136 |
137 | func (arg *ReadArgument) WriteTo(command string, w io.Writer) (int64, error) {
138 | dataSize := make([]byte, 8)
139 |
140 | b, err := arg.Marshal()
141 | if err != nil {
142 | return 0, err
143 | }
144 |
145 | l := 4 + len(command) + len(b)
146 |
147 | binary.LittleEndian.PutUint64(dataSize, uint64(l))
148 | if _, err := w.Write(dataSize); err != nil {
149 | return 0, err
150 | }
151 |
152 | cmdSize := make([]byte, 4)
153 | binary.LittleEndian.PutUint32(cmdSize, uint32(len(command)))
154 | if _, err := w.Write(cmdSize); err != nil {
155 | return 0, err
156 | }
157 |
158 | if _, err := w.Write([]byte(command)); err != nil {
159 | return 0, err
160 | }
161 |
162 | if _, err := w.Write(b); err != nil {
163 | return 0, err
164 | }
165 |
166 | return int64(8 + l), nil
167 | }
168 |
--------------------------------------------------------------------------------
/experiment/store/kvstore.go:
--------------------------------------------------------------------------------
1 | package store
2 |
3 | import (
4 | "os"
5 | "path/filepath"
6 | "strconv"
7 | "sync"
8 |
9 | "github.com/cockroachdb/pebble"
10 | "go.uber.org/atomic"
11 | )
12 |
13 | type Store struct {
14 | mu sync.RWMutex
15 | db *pebble.DB
16 | ro *pebble.IterOptions
17 | wo *pebble.WriteOptions
18 | syncwo *pebble.WriteOptions
19 | closed *atomic.Bool
20 | }
21 |
22 | func NewStore(dbdir string) (*Store, error) {
23 | cfg := getDefaultPebbleDBConfig()
24 |
25 | db, err := openPebbleDB(cfg, dbdir)
26 | if err != nil {
27 | return nil, err
28 | }
29 |
30 | return &Store{
31 | db: db,
32 | ro: &pebble.IterOptions{},
33 | wo: &pebble.WriteOptions{Sync: false},
34 | syncwo: &pebble.WriteOptions{Sync: true},
35 | closed: atomic.NewBool(false),
36 | }, nil
37 | }
38 |
39 | func (db *Store) LookupAppliedIndex(key []byte) (uint64, error) {
40 | if db.closed.Load() {
41 | return 0, pebble.ErrClosed
42 | }
43 |
44 | db.mu.RLock()
45 | defer db.mu.RUnlock()
46 |
47 | val, closer, err := db.db.Get(key)
48 | if err != nil {
49 | return 0, err
50 | }
51 |
52 | // 这里需要copy
53 | data := make([]byte, len(val))
54 | copy(data, val)
55 |
56 | if err := closer.Close(); err != nil {
57 | return 0, err
58 | }
59 |
60 | return strconv.ParseUint(string(data), 10, 64)
61 | }
62 |
63 | func (db *Store) Lookup(key []byte) (*RaftAttribute, error) {
64 | if db.closed.Load() {
65 | return nil, pebble.ErrClosed
66 | }
67 |
68 | db.mu.RLock()
69 | defer db.mu.RUnlock()
70 |
71 | val, closer, err := db.db.Get(key)
72 | if err != nil {
73 | return nil, err
74 | }
75 |
76 | // 这里需要copy
77 | data := make([]byte, len(val))
78 | copy(data, val)
79 |
80 | if err := closer.Close(); err != nil {
81 | return nil, err
82 | }
83 |
84 | attr := &RaftAttribute{}
85 | err = attr.Unmarshal(data)
86 |
87 | return attr, err
88 | }
89 |
90 | func (db *Store) NALookup(key []byte) ([]byte, error) {
91 | db.mu.RLock()
92 | defer db.mu.RUnlock()
93 |
94 | val, closer, err := db.db.Get(key)
95 | if err != nil {
96 | return nil, err
97 | }
98 |
99 | // 这里需要copy
100 | data := make([]byte, len(val))
101 | copy(data, val)
102 |
103 | if err := closer.Close(); err != nil {
104 | return nil, err
105 | }
106 |
107 | return data, err
108 | }
109 |
110 | func (db *Store) Batch() *pebble.Batch {
111 | return db.db.NewBatch()
112 | }
113 |
114 | func (db *Store) GetWo() *pebble.WriteOptions {
115 | return db.wo
116 | }
117 |
118 | func (db *Store) GetRo() *pebble.IterOptions {
119 | return db.ro
120 | }
121 |
122 | func (db *Store) Write(b *pebble.Batch) error {
123 | return b.Commit(db.wo)
124 | }
125 |
126 | func (db *Store) SetKv(key, val []byte) {
127 | db.db.Set(key, val, db.wo)
128 | }
129 |
130 | func (db *Store) Flush() {
131 | db.db.Flush()
132 | }
133 |
134 | func (db *Store) NewSnapshot() *pebble.Snapshot {
135 | db.mu.RLock()
136 | defer db.mu.RUnlock()
137 |
138 | return db.db.NewSnapshot()
139 | }
140 |
141 | func (db *Store) ReleaseSnapshot(snap *pebble.Snapshot) {
142 | snap.Close()
143 | }
144 |
145 | func (db *Store) GetIterator() *pebble.Iterator {
146 | return db.db.NewIter(db.ro)
147 | }
148 |
149 | func (db *Store) Close() error {
150 | if db == nil {
151 | return nil
152 | }
153 |
154 | db.mu.Lock()
155 | defer db.mu.Unlock()
156 |
157 | db.closed.Store(true) // set pebbledb closed
158 |
159 | if db.db != nil {
160 | db.db.Flush()
161 | db.db.Close()
162 | db.db = nil
163 | }
164 |
165 | return nil
166 | }
167 |
168 | type PebbleDBConfig struct {
169 | KVLRUCacheSize int64
170 | KVWriteBufferSize int
171 | KVMaxWriteBufferNumber int
172 | KVLevel0FileNumCompactionTrigger int
173 | KVLevel0StopWritesTrigger int
174 | KVMaxBytesForLevelBase int64
175 | KVTargetFileSizeBase int64
176 | KVTargetFileSizeMultiplier int64
177 | KVNumOfLevels int
178 | KVMaxOpenFiles int
179 | KVMaxConcurrentCompactions int
180 | KVBlockSize int
181 | KVMaxManifestFileSize int64
182 | KVBytesPerSync int
183 | KVWALBytesPerSync int
184 | }
185 |
186 | func getDefaultPebbleDBConfig() PebbleDBConfig {
187 | return PebbleDBConfig{
188 | KVLRUCacheSize: 128 * 1024 * 1024, // 128MB
189 | KVWriteBufferSize: 32 * 1024 * 1024, // 32MB
190 | KVMaxWriteBufferNumber: 4,
191 | KVLevel0FileNumCompactionTrigger: 1,
192 | KVLevel0StopWritesTrigger: 24,
193 | KVMaxBytesForLevelBase: 512 * 1024 * 1024, // 512MB
194 | KVTargetFileSizeBase: 128 * 1024 * 1024, // 128MB
195 | KVTargetFileSizeMultiplier: 1,
196 | KVNumOfLevels: 7,
197 | KVMaxOpenFiles: 102400,
198 | KVMaxConcurrentCompactions: 8,
199 | KVBlockSize: 64 * 1024, // 64KB
200 | KVMaxManifestFileSize: 128 * 1024 * 1024, // 128MB
201 | KVBytesPerSync: 2 * 1024 * 1024, // 2MB
202 | KVWALBytesPerSync: 2 * 1024 * 1024, // 2MB
203 | }
204 | }
205 |
206 | func openPebbleDB(config PebbleDBConfig, dir string) (*pebble.DB, error) {
207 | blockSize := config.KVBlockSize
208 | levelSizeMultiplier := config.KVTargetFileSizeMultiplier
209 | sz := config.KVTargetFileSizeBase
210 | lopts := make([]pebble.LevelOptions, 0)
211 |
212 | for l := 0; l < config.KVNumOfLevels; l++ {
213 | opt := pebble.LevelOptions{
214 | Compression: pebble.DefaultCompression,
215 | BlockSize: blockSize,
216 | TargetFileSize: sz,
217 | }
218 | sz = sz * levelSizeMultiplier
219 | lopts = append(lopts, opt)
220 | }
221 |
222 | dataPath := filepath.Join(dir, "data")
223 | if err := os.MkdirAll(dataPath, os.ModePerm); err != nil {
224 | return nil, err
225 | }
226 |
227 | walPath := filepath.Join(dir, "wal")
228 | if err := os.MkdirAll(walPath, os.ModePerm); err != nil {
229 | return nil, err
230 | }
231 |
232 | cache := pebble.NewCache(config.KVLRUCacheSize)
233 | opts := &pebble.Options{
234 | BytesPerSync: config.KVBytesPerSync,
235 | Levels: lopts,
236 | MaxManifestFileSize: config.KVMaxManifestFileSize,
237 | MemTableSize: config.KVWriteBufferSize,
238 | MemTableStopWritesThreshold: config.KVMaxWriteBufferNumber,
239 | LBaseMaxBytes: config.KVMaxBytesForLevelBase,
240 | L0CompactionThreshold: config.KVLevel0FileNumCompactionTrigger,
241 | L0StopWritesThreshold: config.KVLevel0StopWritesTrigger,
242 | Cache: cache,
243 | WALDir: walPath,
244 | MaxOpenFiles: config.KVMaxOpenFiles,
245 | MaxConcurrentCompactions: config.KVMaxConcurrentCompactions,
246 | WALBytesPerSync: config.KVWALBytesPerSync,
247 | }
248 |
249 | db, err := pebble.Open(dataPath, opts)
250 | if err != nil {
251 | return nil, err
252 | }
253 | cache.Unref()
254 |
255 | return db, nil
256 | }
257 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/xkeyideal/mraft
2 |
3 | go 1.21.0
4 |
5 | require (
6 | github.com/cockroachdb/pebble v0.0.0-20210331181633-27fc006b8bfb
7 | github.com/gin-gonic/gin v1.9.1
8 | github.com/lni/dragonboat/v3 v3.3.7
9 | github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475
10 | github.com/spf13/cobra v1.7.0
11 | github.com/thrift-iterator/go v0.0.0-20190402154806-9b5a67519118
12 | github.com/ugorji/go/codec v1.2.11
13 | github.com/xkeyideal/gokit v1.4.2
14 | go.uber.org/atomic v1.11.0
15 | go.uber.org/zap v1.25.0
16 | gopkg.in/natefinch/lumberjack.v2 v2.2.1
17 | )
18 |
19 | require (
20 | github.com/VictoriaMetrics/metrics v1.6.2 // indirect
21 | github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da // indirect
22 | github.com/bytedance/sonic v1.9.1 // indirect
23 | github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect
24 | github.com/cockroachdb/errors v1.8.1 // indirect
25 | github.com/cockroachdb/logtags v0.0.0-20190617123548-eb05cc24525f // indirect
26 | github.com/cockroachdb/redact v1.0.8 // indirect
27 | github.com/cockroachdb/sentry-go v0.6.1-cockroachdb.2 // indirect
28 | github.com/gabriel-vasile/mimetype v1.4.2 // indirect
29 | github.com/gin-contrib/sse v0.1.0 // indirect
30 | github.com/go-playground/locales v0.14.1 // indirect
31 | github.com/go-playground/universal-translator v0.18.1 // indirect
32 | github.com/go-playground/validator/v10 v10.14.0 // indirect
33 | github.com/goccy/go-json v0.10.2 // indirect
34 | github.com/gogo/protobuf v1.3.2 // indirect
35 | github.com/golang/protobuf v1.5.2 // indirect
36 | github.com/golang/snappy v0.0.4 // indirect
37 | github.com/google/btree v1.0.0 // indirect
38 | github.com/hashicorp/errwrap v1.0.0 // indirect
39 | github.com/hashicorp/go-immutable-radix v1.0.0 // indirect
40 | github.com/hashicorp/go-msgpack v0.5.3 // indirect
41 | github.com/hashicorp/go-multierror v1.0.0 // indirect
42 | github.com/hashicorp/go-sockaddr v1.0.0 // indirect
43 | github.com/hashicorp/golang-lru v0.5.1 // indirect
44 | github.com/hashicorp/memberlist v0.2.2 // indirect
45 | github.com/inconshreveable/mousetrap v1.1.0 // indirect
46 | github.com/json-iterator/go v1.1.12 // indirect
47 | github.com/juju/ratelimit v1.0.2-0.20191002062651-f60b32039441 // indirect
48 | github.com/klauspost/cpuid/v2 v2.2.4 // indirect
49 | github.com/kr/pretty v0.2.1 // indirect
50 | github.com/kr/text v0.2.0 // indirect
51 | github.com/leodido/go-urn v1.2.4 // indirect
52 | github.com/lni/goutils v1.3.0 // indirect
53 | github.com/mattn/go-isatty v0.0.19 // indirect
54 | github.com/miekg/dns v1.1.26 // indirect
55 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
56 | github.com/modern-go/reflect2 v1.0.2 // indirect
57 | github.com/pelletier/go-toml/v2 v2.0.8 // indirect
58 | github.com/pkg/errors v0.9.1 // indirect
59 | github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 // indirect
60 | github.com/spf13/pflag v1.0.5 // indirect
61 | github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
62 | github.com/v2pro/plz v0.0.0-20221028024117-e5f9aec5b631 // indirect
63 | github.com/v2pro/quokka v0.0.0-20171201153428-382cb39c6ee6 // indirect
64 | github.com/v2pro/wombat v0.0.0-20180402055224-a56dbdcddef2 // indirect
65 | github.com/valyala/fastrand v1.0.0 // indirect
66 | github.com/valyala/histogram v1.0.1 // indirect
67 | go.uber.org/multierr v1.10.0 // indirect
68 | golang.org/x/arch v0.3.0 // indirect
69 | golang.org/x/crypto v0.9.0 // indirect
70 | golang.org/x/exp v0.0.0-20200513190911-00229845015e // indirect
71 | golang.org/x/net v0.10.0 // indirect
72 | golang.org/x/sync v0.0.0-20201207232520-09787c993a3a // indirect
73 | golang.org/x/sys v0.8.0 // indirect
74 | golang.org/x/text v0.9.0 // indirect
75 | google.golang.org/protobuf v1.30.0 // indirect
76 | gopkg.in/yaml.v3 v3.0.1 // indirect
77 | )
78 |
--------------------------------------------------------------------------------
/gossip/config.go:
--------------------------------------------------------------------------------
1 | package gossip
2 |
3 | import (
4 | "errors"
5 | "net"
6 | "strconv"
7 |
8 | "github.com/lni/goutils/stringutil"
9 | "go.uber.org/zap/zapcore"
10 | )
11 |
12 | type GossipOptions struct {
13 | Name string
14 | MoveToGrpcAddr string
15 | RubikGrpcAddr string
16 | GossipNodes int
17 | LogDir string
18 | LogLevel zapcore.Level
19 | DisableCoordinates bool
20 | }
21 |
22 | type GossipConfig struct {
23 | // GossipProbeInterval define the probe interval used by the gossip
24 | // service in tests.
25 | // GossipProbeInterval time.Duration `json:"gossipProbeInterval"`
26 | // BindAddress is the address for the gossip service to bind to and listen on.
27 | // Both UDP and TCP ports are used by the gossip service. The local gossip
28 | // service should be able to receive gossip service related messages by
29 | // binding to and listening on this address. BindAddress is usually in the
30 | // format of IP:Port, Hostname:Port or DNS Name:Port.
31 | BindAddress string `json:"-"`
32 | BindPort uint16 `json:"bindPort"`
33 | // AdvertiseAddress is the address to advertise to other NodeHost instances
34 | // used for NAT traversal. Gossip services running on remote NodeHost
35 | // instances will use AdvertiseAddress to exchange gossip service related
36 | // messages. AdvertiseAddress is in the format of IP:Port.
37 | // AdvertiseAddress string
38 | // Seed is a list of AdvertiseAddress of remote NodeHost instances. Local
39 | // NodeHost instance will try to contact all of them to bootstrap the gossip
40 | // service. At least one reachable NodeHost instance is required to
41 | // successfully bootstrap the gossip service. Each seed address is in the
42 | // format of IP:Port, Hostname:Port or DNS Name:Port.
43 | //
44 | // It is ok to include seed addresses that are temporarily unreachable, e.g.
45 | // when launching the first NodeHost instance in your deployment, you can
46 | // include AdvertiseAddresses from other NodeHost instances that you plan to
47 | // launch shortly afterwards.
48 | Seeds []string `json:"seeds"`
49 |
50 | // 用于当cluster的数据在gossip内更新后,存储到文件中的接口
51 | clusterCallback ClusterCallback `json:"-"`
52 | }
53 |
54 | // IsEmpty returns a boolean flag indicating whether the GossipConfig instance
55 | // is empty.
56 | func (g *GossipConfig) IsEmpty() bool {
57 | return len(g.BindAddress) == 0 && len(g.Seeds) == 0
58 | }
59 |
60 | func (g *GossipConfig) SetClusterCallback(fn ClusterCallback) {
61 | g.clusterCallback = fn
62 | }
63 |
64 | // Validate validates the GossipConfig instance.
65 | func (g *GossipConfig) Validate() error {
66 | if len(g.BindAddress) > 0 && !stringutil.IsValidAddress(g.BindAddress) {
67 | return errors.New("invalid GossipConfig.BindAddress")
68 | } else if len(g.BindAddress) == 0 {
69 | return errors.New("BindAddress not set")
70 | }
71 |
72 | if g.clusterCallback == nil {
73 | return errors.New("clusterCallback not set")
74 | }
75 |
76 | // if len(g.AdvertiseAddress) > 0 && !isValidAdvertiseAddress(g.AdvertiseAddress) {
77 | // return errors.New("invalid GossipConfig.AdvertiseAddress")
78 | // }
79 | if len(g.Seeds) == 0 {
80 | return errors.New("seed nodes not set")
81 | }
82 | count := 0
83 | for _, v := range g.Seeds {
84 | if v != g.BindAddress /*&& v != g.AdvertiseAddress*/ {
85 | count++
86 | }
87 | if !stringutil.IsValidAddress(v) {
88 | return errors.New("invalid GossipConfig.Seed value")
89 | }
90 | }
91 | if count == 0 {
92 | return errors.New("no valid seed node")
93 | }
94 | return nil
95 | }
96 |
97 | func isValidAdvertiseAddress(addr string) bool {
98 | host, sp, err := net.SplitHostPort(addr)
99 | if err != nil {
100 | return false
101 | }
102 | port, err := strconv.ParseUint(sp, 10, 16)
103 | if err != nil {
104 | return false
105 | }
106 | if port > 65535 {
107 | return false
108 | }
109 | // the memberlist package doesn't allow hostname or DNS name to be used in
110 | // advertise address
111 | return stringutil.IPV4Regex.MatchString(host)
112 | }
113 |
114 | func parseAddress(addr string) (string, int, error) {
115 | host, sp, err := net.SplitHostPort(addr)
116 | if err != nil {
117 | return "", 0, err
118 | }
119 | port, err := strconv.ParseUint(sp, 10, 16)
120 | if err != nil {
121 | return "", 0, err
122 | }
123 | return host, int(port), nil
124 | }
125 |
--------------------------------------------------------------------------------
/gossip/coordinate/client.go:
--------------------------------------------------------------------------------
1 | package coordinate
2 |
3 | import (
4 | "fmt"
5 | "math"
6 | "sort"
7 | "sync"
8 | "time"
9 | )
10 |
11 | // Client manages the estimated network coordinate for a given node, and adjusts
12 | // it as the node observes round trip times and estimated coordinates from other
13 | // nodes. The core algorithm is based on Vivaldi, see the documentation for Config
14 | // for more details.
15 | type Client struct {
16 | // coord is the current estimate of the client's network coordinate.
17 | coord *Coordinate
18 |
19 | // origin is a coordinate sitting at the origin.
20 | origin *Coordinate
21 |
22 | // config contains the tuning parameters that govern the performance of
23 | // the algorithm.
24 | config *Config
25 |
26 | // adjustmentIndex is the current index into the adjustmentSamples slice.
27 | adjustmentIndex uint
28 |
29 | // adjustment is used to store samples for the adjustment calculation.
30 | adjustmentSamples []float64
31 |
32 | // latencyFilterSamples is used to store the last several RTT samples,
33 | // keyed by node name. We will use the config's LatencyFilterSamples
34 | // value to determine how many samples we keep, per node.
35 | latencyFilterSamples map[string][]float64
36 |
37 | // stats is used to record events that occur when updating coordinates.
38 | stats ClientStats
39 |
40 | // mutex enables safe concurrent access to the client.
41 | mutex sync.RWMutex
42 | }
43 |
44 | // ClientStats is used to record events that occur when updating coordinates.
45 | type ClientStats struct {
46 | // Resets is incremented any time we reset our local coordinate because
47 | // our calculations have resulted in an invalid state.
48 | Resets int
49 | }
50 |
51 | // NewClient creates a new Client and verifies the configuration is valid.
52 | func NewClient(config *Config) (*Client, error) {
53 | if !(config.Dimensionality > 0) {
54 | return nil, fmt.Errorf("dimensionality must be >0")
55 | }
56 |
57 | return &Client{
58 | coord: NewCoordinate(config),
59 | origin: NewCoordinate(config),
60 | config: config,
61 | adjustmentIndex: 0,
62 | adjustmentSamples: make([]float64, config.AdjustmentWindowSize),
63 | latencyFilterSamples: make(map[string][]float64),
64 | }, nil
65 | }
66 |
67 | // GetCoordinate returns a copy of the coordinate for this client.
68 | func (c *Client) GetCoordinate() *Coordinate {
69 | c.mutex.RLock()
70 | defer c.mutex.RUnlock()
71 |
72 | return c.coord.Clone()
73 | }
74 |
75 | // SetCoordinate forces the client's coordinate to a known state.
76 | func (c *Client) SetCoordinate(coord *Coordinate) error {
77 | c.mutex.Lock()
78 | defer c.mutex.Unlock()
79 |
80 | if err := c.checkCoordinate(coord); err != nil {
81 | return err
82 | }
83 |
84 | c.coord = coord.Clone()
85 | return nil
86 | }
87 |
88 | // ForgetNode removes any client state for the given node.
89 | func (c *Client) ForgetNode(node string) {
90 | c.mutex.Lock()
91 | defer c.mutex.Unlock()
92 |
93 | delete(c.latencyFilterSamples, node)
94 | }
95 |
96 | // Stats returns a copy of stats for the client.
97 | func (c *Client) Stats() ClientStats {
98 | c.mutex.Lock()
99 | defer c.mutex.Unlock()
100 |
101 | return c.stats
102 | }
103 |
104 | // checkCoordinate returns an error if the coordinate isn't compatible with
105 | // this client, or if the coordinate itself isn't valid. This assumes the mutex
106 | // has been locked already.
107 | func (c *Client) checkCoordinate(coord *Coordinate) error {
108 | if !c.coord.IsCompatibleWith(coord) {
109 | return fmt.Errorf("dimensions aren't compatible")
110 | }
111 |
112 | if !coord.IsValid() {
113 | return fmt.Errorf("coordinate is invalid")
114 | }
115 |
116 | return nil
117 | }
118 |
119 | // latencyFilter applies a simple moving median filter with a new sample for
120 | // a node. This assumes that the mutex has been locked already.
121 | func (c *Client) latencyFilter(node string, rttSeconds float64) float64 {
122 | samples, ok := c.latencyFilterSamples[node]
123 | if !ok {
124 | samples = make([]float64, 0, c.config.LatencyFilterSize)
125 | }
126 |
127 | // Add the new sample and trim the list, if needed.
128 | samples = append(samples, rttSeconds)
129 | if len(samples) > int(c.config.LatencyFilterSize) {
130 | samples = samples[1:]
131 | }
132 | c.latencyFilterSamples[node] = samples
133 |
134 | // Sort a copy of the samples and return the median.
135 | sorted := make([]float64, len(samples))
136 | copy(sorted, samples)
137 | sort.Float64s(sorted)
138 | return sorted[len(sorted)/2]
139 | }
140 |
141 | // updateVivialdi updates the Vivaldi portion of the client's coordinate. This
142 | // assumes that the mutex has been locked already.
143 | func (c *Client) updateVivaldi(other *Coordinate, rttSeconds float64) {
144 | const zeroThreshold = 1.0e-6
145 |
146 | dist := c.coord.DistanceTo(other).Seconds()
147 | if rttSeconds < zeroThreshold {
148 | rttSeconds = zeroThreshold
149 | }
150 | wrongness := math.Abs(dist-rttSeconds) / rttSeconds
151 |
152 | totalError := c.coord.Error + other.Error
153 | if totalError < zeroThreshold {
154 | totalError = zeroThreshold
155 | }
156 | weight := c.coord.Error / totalError
157 |
158 | c.coord.Error = c.config.VivaldiCE*weight*wrongness + c.coord.Error*(1.0-c.config.VivaldiCE*weight)
159 | if c.coord.Error > c.config.VivaldiErrorMax {
160 | c.coord.Error = c.config.VivaldiErrorMax
161 | }
162 |
163 | delta := c.config.VivaldiCC * weight
164 | force := delta * (rttSeconds - dist)
165 | c.coord = c.coord.ApplyForce(c.config, force, other)
166 | }
167 |
168 | // updateAdjustment updates the adjustment portion of the client's coordinate, if
169 | // the feature is enabled. This assumes that the mutex has been locked already.
170 | func (c *Client) updateAdjustment(other *Coordinate, rttSeconds float64) {
171 | if c.config.AdjustmentWindowSize == 0 {
172 | return
173 | }
174 |
175 | // Note that the existing adjustment factors don't figure in to this
176 | // calculation so we use the raw distance here.
177 | dist := c.coord.rawDistanceTo(other)
178 | c.adjustmentSamples[c.adjustmentIndex] = rttSeconds - dist
179 | c.adjustmentIndex = (c.adjustmentIndex + 1) % c.config.AdjustmentWindowSize
180 |
181 | sum := 0.0
182 | for _, sample := range c.adjustmentSamples {
183 | sum += sample
184 | }
185 | c.coord.Adjustment = sum / (2.0 * float64(c.config.AdjustmentWindowSize))
186 | }
187 |
188 | // updateGravity applies a small amount of gravity to pull coordinates towards
189 | // the center of the coordinate system to combat drift. This assumes that the
190 | // mutex is locked already.
191 | func (c *Client) updateGravity() {
192 | dist := c.origin.DistanceTo(c.coord).Seconds()
193 | force := -1.0 * math.Pow(dist/c.config.GravityRho, 2.0)
194 | c.coord = c.coord.ApplyForce(c.config, force, c.origin)
195 | }
196 |
197 | // Update takes other, a coordinate for another node, and rtt, a round trip
198 | // time observation for a ping to that node, and updates the estimated position of
199 | // the client's coordinate. Returns the updated coordinate.
200 | func (c *Client) Update(node string, other *Coordinate, rtt time.Duration) (*Coordinate, error) {
201 | c.mutex.Lock()
202 | defer c.mutex.Unlock()
203 |
204 | if err := c.checkCoordinate(other); err != nil {
205 | return nil, err
206 | }
207 |
208 | // The code down below can handle zero RTTs, which we have seen in
209 | // https://github.com/hashicorp/consul/issues/3789, presumably in
210 | // environments with coarse-grained monotonic clocks (we are still
211 | // trying to pin this down). In any event, this is ok from a code PoV
212 | // so we don't need to alert operators with spammy messages. We did
213 | // add a counter so this is still observable, though.
214 | const maxRTT = 10 * time.Second
215 | if rtt < 0 || rtt > maxRTT {
216 | return nil, fmt.Errorf("round trip time not in valid range, duration %v is not a positive value less than %v ", rtt, maxRTT)
217 | }
218 | if rtt == 0 {
219 | //metrics.IncrCounter([]string{"serf", "coordinate", "zero-rtt"}, 1)
220 | }
221 |
222 | rttSeconds := c.latencyFilter(node, rtt.Seconds())
223 | c.updateVivaldi(other, rttSeconds)
224 | c.updateAdjustment(other, rttSeconds)
225 | c.updateGravity()
226 | if !c.coord.IsValid() {
227 | c.stats.Resets++
228 | c.coord = NewCoordinate(c.config)
229 | }
230 |
231 | return c.coord.Clone(), nil
232 | }
233 |
234 | // DistanceTo returns the estimated RTT from the client's coordinate to other, the
235 | // coordinate for another node.
236 | func (c *Client) DistanceTo(other *Coordinate) time.Duration {
237 | c.mutex.RLock()
238 | defer c.mutex.RUnlock()
239 |
240 | return c.coord.DistanceTo(other)
241 | }
242 |
--------------------------------------------------------------------------------
/gossip/coordinate/client_test.go:
--------------------------------------------------------------------------------
1 | package coordinate
2 |
3 | import (
4 | "fmt"
5 | "math"
6 | "reflect"
7 | "strings"
8 | "testing"
9 | "time"
10 | )
11 |
12 | func TestClient_NewClient(t *testing.T) {
13 | config := DefaultConfig()
14 |
15 | config.Dimensionality = 0
16 | client, err := NewClient(config)
17 | if err == nil || !strings.Contains(err.Error(), "dimensionality") {
18 | t.Fatal(err)
19 | }
20 |
21 | config.Dimensionality = 7
22 | client, err = NewClient(config)
23 | if err != nil {
24 | t.Fatal(err)
25 | }
26 |
27 | origin := NewCoordinate(config)
28 | if !reflect.DeepEqual(client.GetCoordinate(), origin) {
29 | t.Fatalf("fresh client should be located at the origin")
30 | }
31 | }
32 |
33 | func TestClient_Update(t *testing.T) {
34 | config := DefaultConfig()
35 | config.Dimensionality = 3
36 |
37 | client, err := NewClient(config)
38 | if err != nil {
39 | t.Fatal(err)
40 | }
41 |
42 | // Make sure the Euclidean part of our coordinate is what we expect.
43 | c := client.GetCoordinate()
44 | verifyEqualVectors(t, c.Vec, []float64{0.0, 0.0, 0.0})
45 |
46 | // Place a node right above the client and observe an RTT longer than the
47 | // client expects, given its distance.
48 | other := NewCoordinate(config)
49 | other.Vec[2] = 0.001
50 | rtt := time.Duration(2.0 * other.Vec[2] * secondsToNanoseconds)
51 | c, err = client.Update("node", other, rtt)
52 | if err != nil {
53 | t.Fatalf("err: %v", err)
54 | }
55 |
56 | // The client should have scooted down to get away from it.
57 | if !(c.Vec[2] < 0.0) {
58 | t.Fatalf("client z coordinate %9.6f should be < 0.0", c.Vec[2])
59 | }
60 |
61 | // Set the coordinate to a known state.
62 | c.Vec[2] = 99.0
63 | client.SetCoordinate(c)
64 | c = client.GetCoordinate()
65 | verifyEqualFloats(t, c.Vec[2], 99.0)
66 | }
67 |
68 | func TestClient_InvalidInPingValues(t *testing.T) {
69 | config := DefaultConfig()
70 | config.Dimensionality = 3
71 |
72 | client, err := NewClient(config)
73 | if err != nil {
74 | t.Fatal(err)
75 | }
76 |
77 | // Place another node
78 | other := NewCoordinate(config)
79 | other.Vec[2] = 0.001
80 | dist := client.DistanceTo(other)
81 |
82 | // Update with a series of invalid ping periods, should return an error and estimated rtt remains unchanged
83 | pings := []int{1<<63 - 1, -35, 11}
84 |
85 | for _, ping := range pings {
86 | expectedErr := fmt.Errorf("round trip time not in valid range, duration %v is not a positive value less than %v", ping, 10*time.Second)
87 | _, err = client.Update("node", other, time.Duration(ping*secondsToNanoseconds))
88 | if err == nil {
89 | t.Fatalf("Unexpected error, wanted %v but got %v", expectedErr, err)
90 | }
91 |
92 | dist_new := client.DistanceTo(other)
93 | if dist_new != dist {
94 | t.Fatalf("distance estimate %v not equal to %v", dist_new, dist)
95 | }
96 | }
97 |
98 | }
99 |
100 | func TestClient_DistanceTo(t *testing.T) {
101 | config := DefaultConfig()
102 | config.Dimensionality = 3
103 | config.HeightMin = 0
104 |
105 | client, err := NewClient(config)
106 | if err != nil {
107 | t.Fatal(err)
108 | }
109 |
110 | // Fiddle a raw coordinate to put it a specific number of seconds away.
111 | other := NewCoordinate(config)
112 | other.Vec[2] = 12.345
113 | expected := time.Duration(other.Vec[2] * secondsToNanoseconds)
114 | dist := client.DistanceTo(other)
115 | if dist != expected {
116 | t.Fatalf("distance doesn't match %9.6f != %9.6f", dist.Seconds(), expected.Seconds())
117 | }
118 | }
119 |
120 | func TestClient_latencyFilter(t *testing.T) {
121 | config := DefaultConfig()
122 | config.LatencyFilterSize = 3
123 |
124 | client, err := NewClient(config)
125 | if err != nil {
126 | t.Fatal(err)
127 | }
128 |
129 | // Make sure we get the median, and that things age properly.
130 | verifyEqualFloats(t, client.latencyFilter("alice", 0.201), 0.201)
131 | verifyEqualFloats(t, client.latencyFilter("alice", 0.200), 0.201)
132 | verifyEqualFloats(t, client.latencyFilter("alice", 0.207), 0.201)
133 |
134 | // This glitch will get median-ed out and never seen by Vivaldi.
135 | verifyEqualFloats(t, client.latencyFilter("alice", 1.9), 0.207)
136 | verifyEqualFloats(t, client.latencyFilter("alice", 0.203), 0.207)
137 | verifyEqualFloats(t, client.latencyFilter("alice", 0.199), 0.203)
138 | verifyEqualFloats(t, client.latencyFilter("alice", 0.211), 0.203)
139 |
140 | // Make sure different nodes are not coupled.
141 | verifyEqualFloats(t, client.latencyFilter("bob", 0.310), 0.310)
142 |
143 | // Make sure we don't leak coordinates for nodes that leave.
144 | client.ForgetNode("alice")
145 | verifyEqualFloats(t, client.latencyFilter("alice", 0.888), 0.888)
146 | }
147 |
148 | func TestClient_NaN_Defense(t *testing.T) {
149 | config := DefaultConfig()
150 | config.Dimensionality = 3
151 |
152 | client, err := NewClient(config)
153 | if err != nil {
154 | t.Fatal(err)
155 | }
156 |
157 | // Block a bad coordinate from coming in.
158 | other := NewCoordinate(config)
159 | other.Vec[0] = math.NaN()
160 | if other.IsValid() {
161 | t.Fatalf("bad: %#v", *other)
162 | }
163 | rtt := 250 * time.Millisecond
164 | c, err := client.Update("node", other, rtt)
165 | if err == nil || !strings.Contains(err.Error(), "coordinate is invalid") {
166 | t.Fatalf("err: %v", err)
167 | }
168 | if c := client.GetCoordinate(); !c.IsValid() {
169 | t.Fatalf("bad: %#v", *c)
170 | }
171 |
172 | // Block setting an invalid coordinate directly.
173 | err = client.SetCoordinate(other)
174 | if err == nil || !strings.Contains(err.Error(), "coordinate is invalid") {
175 | t.Fatalf("err: %v", err)
176 | }
177 | if c := client.GetCoordinate(); !c.IsValid() {
178 | t.Fatalf("bad: %#v", *c)
179 | }
180 |
181 | // Block an incompatible coordinate.
182 | other.Vec = make([]float64, 2*len(other.Vec))
183 | c, err = client.Update("node", other, rtt)
184 | if err == nil || !strings.Contains(err.Error(), "dimensions aren't compatible") {
185 | t.Fatalf("err: %v", err)
186 | }
187 | if c := client.GetCoordinate(); !c.IsValid() {
188 | t.Fatalf("bad: %#v", *c)
189 | }
190 |
191 | // Block setting an incompatible coordinate directly.
192 | err = client.SetCoordinate(other)
193 | if err == nil || !strings.Contains(err.Error(), "dimensions aren't compatible") {
194 | t.Fatalf("err: %v", err)
195 | }
196 | if c := client.GetCoordinate(); !c.IsValid() {
197 | t.Fatalf("bad: %#v", *c)
198 | }
199 |
200 | // Poison the internal state and make sure we reset on an update.
201 | client.coord.Vec[0] = math.NaN()
202 | other = NewCoordinate(config)
203 | c, err = client.Update("node", other, rtt)
204 | if err != nil {
205 | t.Fatalf("err: %v", err)
206 | }
207 | if !c.IsValid() {
208 | t.Fatalf("bad: %#v", *c)
209 | }
210 | if got, want := client.Stats().Resets, 1; got != want {
211 | t.Fatalf("got %d want %d", got, want)
212 | }
213 | }
214 |
--------------------------------------------------------------------------------
/gossip/coordinate/config.go:
--------------------------------------------------------------------------------
1 | package coordinate
2 |
3 | // Config is used to set the parameters of the Vivaldi-based coordinate mapping
4 | // algorithm.
5 | //
6 | // The following references are called out at various points in the documentation
7 | // here:
8 | //
9 | // [1] Dabek, Frank, et al. "Vivaldi: A decentralized network coordinate system."
10 | // ACM SIGCOMM Computer Communication Review. Vol. 34. No. 4. ACM, 2004.
11 | // [2] Ledlie, Jonathan, Paul Gardner, and Margo I. Seltzer. "Network Coordinates
12 | // in the Wild." NSDI. Vol. 7. 2007.
13 | // [3] Lee, Sanghwan, et al. "On suitability of Euclidean embedding for
14 | // host-based network coordinate systems." Networking, IEEE/ACM Transactions
15 | // on 18.1 (2010): 27-40.
16 | type Config struct {
17 | // The dimensionality of the coordinate system. As discussed in [2], more
18 | // dimensions improves the accuracy of the estimates up to a point. Per [2]
19 | // we chose 8 dimensions plus a non-Euclidean height.
20 | Dimensionality uint
21 |
22 | // VivaldiErrorMax is the default error value when a node hasn't yet made
23 | // any observations. It also serves as an upper limit on the error value in
24 | // case observations cause the error value to increase without bound.
25 | VivaldiErrorMax float64
26 |
27 | // VivaldiCE is a tuning factor that controls the maximum impact an
28 | // observation can have on a node's confidence. See [1] for more details.
29 | VivaldiCE float64
30 |
31 | // VivaldiCC is a tuning factor that controls the maximum impact an
32 | // observation can have on a node's coordinate. See [1] for more details.
33 | VivaldiCC float64
34 |
35 | // AdjustmentWindowSize is a tuning factor that determines how many samples
36 | // we retain to calculate the adjustment factor as discussed in [3]. Setting
37 | // this to zero disables this feature.
38 | AdjustmentWindowSize uint
39 |
40 | // HeightMin is the minimum value of the height parameter. Since this
41 | // always must be positive, it will introduce a small amount error, so
42 | // the chosen value should be relatively small compared to "normal"
43 | // coordinates.
44 | HeightMin float64
45 |
46 | // LatencyFilterSamples is the maximum number of samples that are retained
47 | // per node, in order to compute a median. The intent is to ride out blips
48 | // but still keep the delay low, since our time to probe any given node is
49 | // pretty infrequent. See [2] for more details.
50 | LatencyFilterSize uint
51 |
52 | // GravityRho is a tuning factor that sets how much gravity has an effect
53 | // to try to re-center coordinates. See [2] for more details.
54 | GravityRho float64
55 | }
56 |
57 | // DefaultConfig returns a Config that has some default values suitable for
58 | // basic testing of the algorithm, but not tuned to any particular type of cluster.
59 | func DefaultConfig() *Config {
60 | return &Config{
61 | Dimensionality: 8,
62 | VivaldiErrorMax: 1.5,
63 | VivaldiCE: 0.25,
64 | VivaldiCC: 0.25,
65 | AdjustmentWindowSize: 20,
66 | HeightMin: 10.0e-6,
67 | LatencyFilterSize: 3,
68 | GravityRho: 150.0,
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/gossip/coordinate/coordinate.go:
--------------------------------------------------------------------------------
1 | package coordinate
2 |
3 | import (
4 | "math"
5 | "math/rand"
6 | "time"
7 | )
8 |
9 | // Coordinate is a specialized structure for holding network coordinates for the
10 | // Vivaldi-based coordinate mapping algorithm. All of the fields should be public
11 | // to enable this to be serialized. All values in here are in units of seconds.
12 | type Coordinate struct {
13 | // Vec is the Euclidean portion of the coordinate. This is used along
14 | // with the other fields to provide an overall distance estimate. The
15 | // units here are seconds.
16 | Vec []float64
17 |
18 | // Err reflects the confidence in the given coordinate and is updated
19 | // dynamically by the Vivaldi Client. This is dimensionless.
20 | Error float64
21 |
22 | // Adjustment is a distance offset computed based on a calculation over
23 | // observations from all other nodes over a fixed window and is updated
24 | // dynamically by the Vivaldi Client. The units here are seconds.
25 | Adjustment float64
26 |
27 | // Height is a distance offset that accounts for non-Euclidean effects
28 | // which model the access links from nodes to the core Internet. The access
29 | // links are usually set by bandwidth and congestion, and the core links
30 | // usually follow distance based on geography.
31 | Height float64
32 | }
33 |
34 | const (
35 | // secondsToNanoseconds is used to convert float seconds to nanoseconds.
36 | secondsToNanoseconds = 1.0e9
37 |
38 | // zeroThreshold is used to decide if two coordinates are on top of each
39 | // other.
40 | zeroThreshold = 1.0e-6
41 | )
42 |
43 | // ErrDimensionalityConflict will be panic-d if you try to perform operations
44 | // with incompatible dimensions.
45 | type DimensionalityConflictError struct{}
46 |
47 | // Adds the error interface.
48 | func (e DimensionalityConflictError) Error() string {
49 | return "coordinate dimensionality does not match"
50 | }
51 |
52 | // NewCoordinate creates a new coordinate at the origin, using the given config
53 | // to supply key initial values.
54 | func NewCoordinate(config *Config) *Coordinate {
55 | return &Coordinate{
56 | Vec: make([]float64, config.Dimensionality),
57 | Error: config.VivaldiErrorMax,
58 | Adjustment: 0.0,
59 | Height: config.HeightMin,
60 | }
61 | }
62 |
63 | // Clone creates an independent copy of this coordinate.
64 | func (c *Coordinate) Clone() *Coordinate {
65 | vec := make([]float64, len(c.Vec))
66 | copy(vec, c.Vec)
67 | return &Coordinate{
68 | Vec: vec,
69 | Error: c.Error,
70 | Adjustment: c.Adjustment,
71 | Height: c.Height,
72 | }
73 | }
74 |
75 | // componentIsValid returns false if a floating point value is a NaN or an
76 | // infinity.
77 | func componentIsValid(f float64) bool {
78 | return !math.IsInf(f, 0) && !math.IsNaN(f)
79 | }
80 |
81 | // IsValid returns false if any component of a coordinate isn't valid, per the
82 | // componentIsValid() helper above.
83 | func (c *Coordinate) IsValid() bool {
84 | for i := range c.Vec {
85 | if !componentIsValid(c.Vec[i]) {
86 | return false
87 | }
88 | }
89 |
90 | return componentIsValid(c.Error) &&
91 | componentIsValid(c.Adjustment) &&
92 | componentIsValid(c.Height)
93 | }
94 |
95 | // IsCompatibleWith checks to see if the two coordinates are compatible
96 | // dimensionally. If this returns true then you are guaranteed to not get
97 | // any runtime errors operating on them.
98 | func (c *Coordinate) IsCompatibleWith(other *Coordinate) bool {
99 | return len(c.Vec) == len(other.Vec)
100 | }
101 |
102 | // ApplyForce returns the result of applying the force from the direction of the
103 | // other coordinate.
104 | func (c *Coordinate) ApplyForce(config *Config, force float64, other *Coordinate) *Coordinate {
105 | if !c.IsCompatibleWith(other) {
106 | panic(DimensionalityConflictError{})
107 | }
108 |
109 | ret := c.Clone()
110 | unit, mag := unitVectorAt(c.Vec, other.Vec)
111 | ret.Vec = add(ret.Vec, mul(unit, force))
112 | if mag > zeroThreshold {
113 | ret.Height = (ret.Height+other.Height)*force/mag + ret.Height
114 | ret.Height = math.Max(ret.Height, config.HeightMin)
115 | }
116 | return ret
117 | }
118 |
119 | // DistanceTo returns the distance between this coordinate and the other
120 | // coordinate, including adjustments.
121 | func (c *Coordinate) DistanceTo(other *Coordinate) time.Duration {
122 | if !c.IsCompatibleWith(other) {
123 | panic(DimensionalityConflictError{})
124 | }
125 |
126 | dist := c.rawDistanceTo(other)
127 | adjustedDist := dist + c.Adjustment + other.Adjustment
128 | if adjustedDist > 0.0 {
129 | dist = adjustedDist
130 | }
131 | return time.Duration(dist * secondsToNanoseconds)
132 | }
133 |
134 | // rawDistanceTo returns the Vivaldi distance between this coordinate and the
135 | // other coordinate in seconds, not including adjustments. This assumes the
136 | // dimensions have already been checked to be compatible.
137 | func (c *Coordinate) rawDistanceTo(other *Coordinate) float64 {
138 | return magnitude(diff(c.Vec, other.Vec)) + c.Height + other.Height
139 | }
140 |
141 | // add returns the sum of vec1 and vec2. This assumes the dimensions have
142 | // already been checked to be compatible.
143 | func add(vec1 []float64, vec2 []float64) []float64 {
144 | ret := make([]float64, len(vec1))
145 | for i := range ret {
146 | ret[i] = vec1[i] + vec2[i]
147 | }
148 | return ret
149 | }
150 |
151 | // diff returns the difference between the vec1 and vec2. This assumes the
152 | // dimensions have already been checked to be compatible.
153 | func diff(vec1 []float64, vec2 []float64) []float64 {
154 | ret := make([]float64, len(vec1))
155 | for i := range ret {
156 | ret[i] = vec1[i] - vec2[i]
157 | }
158 | return ret
159 | }
160 |
161 | // mul returns vec multiplied by a scalar factor.
162 | func mul(vec []float64, factor float64) []float64 {
163 | ret := make([]float64, len(vec))
164 | for i := range vec {
165 | ret[i] = vec[i] * factor
166 | }
167 | return ret
168 | }
169 |
170 | // magnitude computes the magnitude of the vec.
171 | func magnitude(vec []float64) float64 {
172 | sum := 0.0
173 | for i := range vec {
174 | sum += vec[i] * vec[i]
175 | }
176 | return math.Sqrt(sum)
177 | }
178 |
179 | // unitVectorAt returns a unit vector pointing at vec1 from vec2. If the two
180 | // positions are the same then a random unit vector is returned. We also return
181 | // the distance between the points for use in the later height calculation.
182 | func unitVectorAt(vec1 []float64, vec2 []float64) ([]float64, float64) {
183 | ret := diff(vec1, vec2)
184 |
185 | // If the coordinates aren't on top of each other we can normalize.
186 | if mag := magnitude(ret); mag > zeroThreshold {
187 | return mul(ret, 1.0/mag), mag
188 | }
189 |
190 | // Otherwise, just return a random unit vector.
191 | for i := range ret {
192 | ret[i] = rand.Float64() - 0.5
193 | }
194 | if mag := magnitude(ret); mag > zeroThreshold {
195 | return mul(ret, 1.0/mag), 0.0
196 | }
197 |
198 | // And finally just give up and make a unit vector along the first
199 | // dimension. This should be exceedingly rare.
200 | ret = make([]float64, len(ret))
201 | ret[0] = 1.0
202 | return ret, 0.0
203 | }
204 |
--------------------------------------------------------------------------------
/gossip/coordinate/performance_test.go:
--------------------------------------------------------------------------------
1 | package coordinate
2 |
3 | import (
4 | "math"
5 | "testing"
6 | "time"
7 | )
8 |
9 | func TestPerformance_Line(t *testing.T) {
10 | const spacing = 10 * time.Millisecond
11 | const nodes, cycles = 10, 1000
12 | config := DefaultConfig()
13 | clients, err := GenerateClients(nodes, config)
14 | if err != nil {
15 | t.Fatal(err)
16 | }
17 | truth := GenerateLine(nodes, spacing)
18 | Simulate(clients, truth, cycles)
19 | stats := Evaluate(clients, truth)
20 | if stats.ErrorAvg > 0.0018 || stats.ErrorMax > 0.0092 {
21 | t.Fatalf("performance stats are out of spec: %v", stats)
22 | }
23 | }
24 |
25 | func TestPerformance_Grid(t *testing.T) {
26 | const spacing = 10 * time.Millisecond
27 | const nodes, cycles = 25, 1000
28 | config := DefaultConfig()
29 | clients, err := GenerateClients(nodes, config)
30 | if err != nil {
31 | t.Fatal(err)
32 | }
33 | truth := GenerateGrid(nodes, spacing)
34 | Simulate(clients, truth, cycles)
35 | stats := Evaluate(clients, truth)
36 | if stats.ErrorAvg > 0.0015 || stats.ErrorMax > 0.022 {
37 | t.Fatalf("performance stats are out of spec: %v", stats)
38 | }
39 | }
40 |
41 | func TestPerformance_Split(t *testing.T) {
42 | const lan, wan = 1 * time.Millisecond, 10 * time.Millisecond
43 | const nodes, cycles = 25, 1000
44 | config := DefaultConfig()
45 | clients, err := GenerateClients(nodes, config)
46 | if err != nil {
47 | t.Fatal(err)
48 | }
49 | truth := GenerateSplit(nodes, lan, wan)
50 | Simulate(clients, truth, cycles)
51 | stats := Evaluate(clients, truth)
52 | if stats.ErrorAvg > 0.000060 || stats.ErrorMax > 0.00048 {
53 | t.Fatalf("performance stats are out of spec: %v", stats)
54 | }
55 | }
56 |
57 | func TestPerformance_Height(t *testing.T) {
58 | const radius = 100 * time.Millisecond
59 | const nodes, cycles = 25, 1000
60 |
61 | // Constrain us to two dimensions so that we can just exactly represent
62 | // the circle.
63 | config := DefaultConfig()
64 | config.Dimensionality = 2
65 | clients, err := GenerateClients(nodes, config)
66 | if err != nil {
67 | t.Fatal(err)
68 | }
69 |
70 | // Generate truth where the first coordinate is in the "middle" because
71 | // it's equidistant from all the nodes, but it will have an extra radius
72 | // added to the distance, so it should come out above all the others.
73 | truth := GenerateCircle(nodes, radius)
74 | Simulate(clients, truth, cycles)
75 |
76 | // Make sure the height looks reasonable with the regular nodes all in a
77 | // plane, and the center node up above.
78 | for i := range clients {
79 | coord := clients[i].GetCoordinate()
80 | if i == 0 {
81 | if coord.Height < 0.97*radius.Seconds() {
82 | t.Fatalf("height is out of spec: %9.6f", coord.Height)
83 | }
84 | } else {
85 | if coord.Height > 0.03*radius.Seconds() {
86 | t.Fatalf("height is out of spec: %9.6f", coord.Height)
87 | }
88 | }
89 | }
90 | stats := Evaluate(clients, truth)
91 | if stats.ErrorAvg > 0.0025 || stats.ErrorMax > 0.064 {
92 | t.Fatalf("performance stats are out of spec: %v", stats)
93 | }
94 | }
95 |
96 | func TestPerformance_Drift(t *testing.T) {
97 | const dist = 500 * time.Millisecond
98 | const nodes = 4
99 | config := DefaultConfig()
100 | config.Dimensionality = 2
101 | clients, err := GenerateClients(nodes, config)
102 | if err != nil {
103 | t.Fatal(err)
104 | }
105 |
106 | // Do some icky surgery on the clients to put them into a square, up in
107 | // the first quadrant.
108 | clients[0].coord.Vec = []float64{0.0, 0.0}
109 | clients[1].coord.Vec = []float64{0.0, dist.Seconds()}
110 | clients[2].coord.Vec = []float64{dist.Seconds(), dist.Seconds()}
111 | clients[3].coord.Vec = []float64{dist.Seconds(), dist.Seconds()}
112 |
113 | // Make a corresponding truth matrix. The nodes are laid out like this
114 | // so the distances are all equal, except for the diagonal:
115 | //
116 | // (1) <- dist -> (2)
117 | //
118 | // | <- dist |
119 | // | |
120 | // | dist -> |
121 | //
122 | // (0) <- dist -> (3)
123 | //
124 | truth := make([][]time.Duration, nodes)
125 | for i := range truth {
126 | truth[i] = make([]time.Duration, nodes)
127 | }
128 | for i := 0; i < nodes; i++ {
129 | for j := i + 1; j < nodes; j++ {
130 | rtt := dist
131 | if (i%2 == 0) && (j%2 == 0) {
132 | rtt = time.Duration(math.Sqrt2 * float64(rtt))
133 | }
134 | truth[i][j], truth[j][i] = rtt, rtt
135 | }
136 | }
137 |
138 | calcCenterError := func() float64 {
139 | min, max := clients[0].GetCoordinate(), clients[0].GetCoordinate()
140 | for i := 1; i < nodes; i++ {
141 | coord := clients[i].GetCoordinate()
142 | for j, v := range coord.Vec {
143 | min.Vec[j] = math.Min(min.Vec[j], v)
144 | max.Vec[j] = math.Max(max.Vec[j], v)
145 | }
146 | }
147 |
148 | mid := make([]float64, config.Dimensionality)
149 | for i := range mid {
150 | mid[i] = min.Vec[i] + (max.Vec[i]-min.Vec[i])/2
151 | }
152 | return magnitude(mid)
153 | }
154 |
155 | // Let the simulation run for a while to stabilize, then snap a baseline
156 | // for the center error.
157 | Simulate(clients, truth, 1000)
158 | baseline := calcCenterError()
159 |
160 | // Now run for a bunch more cycles and see if gravity pulls the center
161 | // in the right direction.
162 | Simulate(clients, truth, 10000)
163 | if error := calcCenterError(); error > 0.8*baseline {
164 | t.Fatalf("drift performance out of spec: %9.6f -> %9.6f", baseline, error)
165 | }
166 | }
167 |
168 | func TestPerformance_Random(t *testing.T) {
169 | const mean, deviation = 100 * time.Millisecond, 10 * time.Millisecond
170 | const nodes, cycles = 25, 1000
171 | config := DefaultConfig()
172 | clients, err := GenerateClients(nodes, config)
173 | if err != nil {
174 | t.Fatal(err)
175 | }
176 | truth := GenerateRandom(nodes, mean, deviation)
177 | Simulate(clients, truth, cycles)
178 | stats := Evaluate(clients, truth)
179 | if stats.ErrorAvg > 0.075 || stats.ErrorMax > 0.33 {
180 | t.Fatalf("performance stats are out of spec: %v", stats)
181 | }
182 | }
183 |
--------------------------------------------------------------------------------
/gossip/coordinate/phantom.go:
--------------------------------------------------------------------------------
1 | package coordinate
2 |
3 | import (
4 | "fmt"
5 | "math"
6 | "math/rand"
7 | "time"
8 | )
9 |
10 | // GenerateClients returns a slice with nodes number of clients, all with the
11 | // given config.
12 | func GenerateClients(nodes int, config *Config) ([]*Client, error) {
13 | clients := make([]*Client, nodes)
14 | for i := range clients {
15 | client, err := NewClient(config)
16 | if err != nil {
17 | return nil, err
18 | }
19 |
20 | clients[i] = client
21 | }
22 | return clients, nil
23 | }
24 |
25 | // GenerateLine returns a truth matrix as if all the nodes are in a straight linke
26 | // with the given spacing between them.
27 | func GenerateLine(nodes int, spacing time.Duration) [][]time.Duration {
28 | truth := make([][]time.Duration, nodes)
29 | for i := range truth {
30 | truth[i] = make([]time.Duration, nodes)
31 | }
32 |
33 | for i := 0; i < nodes; i++ {
34 | for j := i + 1; j < nodes; j++ {
35 | rtt := time.Duration(j-i) * spacing
36 | truth[i][j], truth[j][i] = rtt, rtt
37 | }
38 | }
39 | return truth
40 | }
41 |
42 | // GenerateGrid returns a truth matrix as if all the nodes are in a two dimensional
43 | // grid with the given spacing between them.
44 | func GenerateGrid(nodes int, spacing time.Duration) [][]time.Duration {
45 | truth := make([][]time.Duration, nodes)
46 | for i := range truth {
47 | truth[i] = make([]time.Duration, nodes)
48 | }
49 |
50 | n := int(math.Sqrt(float64(nodes)))
51 | for i := 0; i < nodes; i++ {
52 | for j := i + 1; j < nodes; j++ {
53 | x1, y1 := float64(i%n), float64(i/n)
54 | x2, y2 := float64(j%n), float64(j/n)
55 | dx, dy := x2-x1, y2-y1
56 | dist := math.Sqrt(dx*dx + dy*dy)
57 | rtt := time.Duration(dist * float64(spacing))
58 | truth[i][j], truth[j][i] = rtt, rtt
59 | }
60 | }
61 | return truth
62 | }
63 |
64 | // GenerateSplit returns a truth matrix as if half the nodes are close together in
65 | // one location and half the nodes are close together in another. The lan factor
66 | // is used to separate the nodes locally and the wan factor represents the split
67 | // between the two sides.
68 | func GenerateSplit(nodes int, lan time.Duration, wan time.Duration) [][]time.Duration {
69 | truth := make([][]time.Duration, nodes)
70 | for i := range truth {
71 | truth[i] = make([]time.Duration, nodes)
72 | }
73 |
74 | split := nodes / 2
75 | for i := 0; i < nodes; i++ {
76 | for j := i + 1; j < nodes; j++ {
77 | rtt := lan
78 | if (i <= split && j > split) || (i > split && j <= split) {
79 | rtt += wan
80 | }
81 | truth[i][j], truth[j][i] = rtt, rtt
82 | }
83 | }
84 | return truth
85 | }
86 |
87 | // GenerateCircle returns a truth matrix for a set of nodes, evenly distributed
88 | // around a circle with the given radius. The first node is at the "center" of the
89 | // circle because it's equidistant from all the other nodes, but we place it at
90 | // double the radius, so it should show up above all the other nodes in height.
91 | func GenerateCircle(nodes int, radius time.Duration) [][]time.Duration {
92 | truth := make([][]time.Duration, nodes)
93 | for i := range truth {
94 | truth[i] = make([]time.Duration, nodes)
95 | }
96 |
97 | for i := 0; i < nodes; i++ {
98 | for j := i + 1; j < nodes; j++ {
99 | var rtt time.Duration
100 | if i == 0 {
101 | rtt = 2 * radius
102 | } else {
103 | t1 := 2.0 * math.Pi * float64(i) / float64(nodes)
104 | x1, y1 := math.Cos(t1), math.Sin(t1)
105 | t2 := 2.0 * math.Pi * float64(j) / float64(nodes)
106 | x2, y2 := math.Cos(t2), math.Sin(t2)
107 | dx, dy := x2-x1, y2-y1
108 | dist := math.Sqrt(dx*dx + dy*dy)
109 | rtt = time.Duration(dist * float64(radius))
110 | }
111 | truth[i][j], truth[j][i] = rtt, rtt
112 | }
113 | }
114 | return truth
115 | }
116 |
117 | // GenerateRandom returns a truth matrix for a set of nodes with normally
118 | // distributed delays, with the given mean and deviation. The RNG is re-seeded
119 | // so you always get the same matrix for a given size.
120 | func GenerateRandom(nodes int, mean time.Duration, deviation time.Duration) [][]time.Duration {
121 | rand.Seed(1)
122 |
123 | truth := make([][]time.Duration, nodes)
124 | for i := range truth {
125 | truth[i] = make([]time.Duration, nodes)
126 | }
127 |
128 | for i := 0; i < nodes; i++ {
129 | for j := i + 1; j < nodes; j++ {
130 | rttSeconds := rand.NormFloat64()*deviation.Seconds() + mean.Seconds()
131 | rtt := time.Duration(rttSeconds * secondsToNanoseconds)
132 | truth[i][j], truth[j][i] = rtt, rtt
133 | }
134 | }
135 | return truth
136 | }
137 |
138 | // Simulate runs the given number of cycles using the given list of clients and
139 | // truth matrix. On each cycle, each client will pick a random node and observe
140 | // the truth RTT, updating its coordinate estimate. The RNG is re-seeded for
141 | // each simulation run to get deterministic results (for this algorithm and the
142 | // underlying algorithm which will use random numbers for position vectors when
143 | // starting out with everything at the origin).
144 | func Simulate(clients []*Client, truth [][]time.Duration, cycles int) {
145 | rand.Seed(1)
146 |
147 | nodes := len(clients)
148 | for cycle := 0; cycle < cycles; cycle++ {
149 | for i := range clients {
150 | if j := rand.Intn(nodes); j != i {
151 | c := clients[j].GetCoordinate()
152 | rtt := truth[i][j]
153 | node := fmt.Sprintf("node_%d", j)
154 | clients[i].Update(node, c, rtt)
155 | }
156 | }
157 | }
158 | }
159 |
160 | // Stats is returned from the Evaluate function with a summary of the algorithm
161 | // performance.
162 | type Stats struct {
163 | ErrorMax float64
164 | ErrorAvg float64
165 | }
166 |
167 | // Evaluate uses the coordinates of the given clients to calculate estimated
168 | // distances and compares them with the given truth matrix, returning summary
169 | // stats.
170 | func Evaluate(clients []*Client, truth [][]time.Duration) (stats Stats) {
171 | nodes := len(clients)
172 | count := 0
173 | for i := 0; i < nodes; i++ {
174 | for j := i + 1; j < nodes; j++ {
175 | est := clients[i].DistanceTo(clients[j].GetCoordinate()).Seconds()
176 | actual := truth[i][j].Seconds()
177 | error := math.Abs(est-actual) / actual
178 | stats.ErrorMax = math.Max(stats.ErrorMax, error)
179 | stats.ErrorAvg += error
180 | count += 1
181 | }
182 | }
183 |
184 | stats.ErrorAvg /= float64(count)
185 | fmt.Printf("Error avg=%9.6f max=%9.6f\n", stats.ErrorAvg, stats.ErrorMax)
186 | return
187 | }
188 |
--------------------------------------------------------------------------------
/gossip/coordinate/util_test.go:
--------------------------------------------------------------------------------
1 | package coordinate
2 |
3 | import (
4 | "math"
5 | "testing"
6 | )
7 |
8 | // verifyEqualFloats will compare f1 and f2 and fail if they are not
9 | // "equal" within a threshold.
10 | func verifyEqualFloats(t *testing.T, f1 float64, f2 float64) {
11 | const zeroThreshold = 1.0e-6
12 | if math.Abs(f1-f2) > zeroThreshold {
13 | t.Fatalf("equal assertion fail, %9.6f != %9.6f", f1, f2)
14 | }
15 | }
16 |
17 | // verifyEqualVectors will compare vec1 and vec2 and fail if they are not
18 | // "equal" within a threshold.
19 | func verifyEqualVectors(t *testing.T, vec1 []float64, vec2 []float64) {
20 | if len(vec1) != len(vec2) {
21 | t.Fatalf("vector length mismatch, %d != %d", len(vec1), len(vec2))
22 | }
23 |
24 | for i := range vec1 {
25 | verifyEqualFloats(t, vec1[i], vec2[i])
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/gossip/event.go:
--------------------------------------------------------------------------------
1 | package gossip
2 |
3 | import (
4 | "sync"
5 |
6 | "github.com/hashicorp/memberlist"
7 | "github.com/lni/goutils/syncutil"
8 | "go.uber.org/zap"
9 | )
10 |
11 | type eventDelegate struct {
12 | g *GossipManager
13 | memberlist.ChannelEventDelegate
14 | ch chan memberlist.NodeEvent
15 | stopper *syncutil.Stopper
16 | nodes sync.Map
17 | }
18 |
19 | type aliveInstance struct {
20 | mu sync.RWMutex
21 |
22 | // moveTo grpcaddr映射rubik对外提供服务grpcaddr
23 | moveToRubik map[string]string
24 |
25 | moveToInstances map[string]bool
26 | rubikInstances map[string]bool
27 | }
28 |
29 | func newAliveInstance() *aliveInstance {
30 | return &aliveInstance{
31 | moveToRubik: make(map[string]string),
32 | moveToInstances: make(map[string]bool),
33 | rubikInstances: make(map[string]bool),
34 | }
35 | }
36 |
37 | func (ai *aliveInstance) updateInstance(meta *Meta, alive bool) {
38 | if meta == nil {
39 | return
40 | }
41 |
42 | ai.mu.Lock()
43 | if alive {
44 | ai.moveToRubik[meta.MoveToGrpcAddr] = meta.RubikGrpcAddr
45 | } else {
46 | delete(ai.moveToRubik, meta.MoveToGrpcAddr)
47 | }
48 | ai.moveToInstances[meta.MoveToGrpcAddr] = alive
49 | ai.rubikInstances[meta.RubikGrpcAddr] = alive
50 | ai.mu.Unlock()
51 | }
52 |
53 | func (ai *aliveInstance) getMoveToInstances() map[string]bool {
54 | ai.mu.RLock()
55 | defer ai.mu.RUnlock()
56 |
57 | return ai.moveToInstances
58 | }
59 |
60 | func (ai *aliveInstance) getRubikInstances() map[string]bool {
61 | ai.mu.RLock()
62 | defer ai.mu.RUnlock()
63 |
64 | return ai.rubikInstances
65 | }
66 |
67 | func (ai *aliveInstance) getMoveToRubik() map[string]string {
68 | ai.mu.RLock()
69 | defer ai.mu.RUnlock()
70 |
71 | return ai.moveToRubik
72 | }
73 |
74 | func newEventDelegate(s *syncutil.Stopper, g *GossipManager) *eventDelegate {
75 | ch := make(chan memberlist.NodeEvent, 10)
76 | ed := &eventDelegate{
77 | g: g,
78 | stopper: s,
79 | ch: ch,
80 | ChannelEventDelegate: memberlist.ChannelEventDelegate{Ch: ch},
81 | }
82 | return ed
83 | }
84 |
85 | func (d *eventDelegate) decodeMeta(e memberlist.NodeEvent, fields []zap.Field) *Meta {
86 | if len(e.Node.Meta) == 0 || messageType(e.Node.Meta[0]) != tagMagicByte {
87 | d.g.log.Warn("[multiraft] [self-gossip-user] [eventdelegate] [metaType]",
88 | append(fields,
89 | zap.Int("event", int(e.Event)),
90 | zap.String("nodename", e.Node.Name),
91 | zap.String("nodeaddress", e.Node.Address()),
92 | zap.String("meta", string(e.Node.Meta)),
93 | zap.String("err", "meta messageType error"),
94 | )...)
95 | return nil
96 | }
97 |
98 | meta := &Meta{}
99 | if err := decodeMessage(e.Node.Meta[1:], &meta); err != nil {
100 | d.g.log.Warn("[multiraft] [self-gossip-user] [eventdelegate] [metaDecode]",
101 | append(fields,
102 | zap.Int("event", int(e.Event)),
103 | zap.String("nodename", e.Node.Name),
104 | zap.String("nodeaddress", e.Node.Address()),
105 | zap.String("meta", string(e.Node.Meta)),
106 | zap.Error(err),
107 | )...)
108 | return nil
109 | }
110 |
111 | return meta
112 | }
113 |
114 | func (d *eventDelegate) start() {
115 | localNode := d.g.list.LocalNode()
116 | fields := []zap.Field{
117 | zap.String("name", localNode.Name),
118 | zap.String("address", localNode.Address()),
119 | }
120 |
121 | d.stopper.RunWorker(func() {
122 | for {
123 | select {
124 | case <-d.stopper.ShouldStop():
125 | return
126 | case e := <-d.ch:
127 | meta := d.decodeMeta(e, fields)
128 | if e.Event == memberlist.NodeJoin || e.Event == memberlist.NodeUpdate {
129 | d.g.log.Info("[multiraft] [self-gossip-user] [eventdelegate] [update]",
130 | append(fields,
131 | zap.Int("event", int(e.Event)),
132 | zap.String("nodename", e.Node.Name),
133 | zap.String("nodeaddress", e.Node.Address()),
134 | zap.String("meta", string(e.Node.Meta)),
135 | )...,
136 | )
137 | d.nodes.Store(e.Node.Name, string(e.Node.Meta))
138 | d.g.aliveInstance.updateInstance(meta, true)
139 | } else if e.Event == memberlist.NodeLeave {
140 | d.g.log.Info("[multiraft] [self-gossip-user] [eventdelegate] [delete]",
141 | append(fields,
142 | zap.Int("event", int(e.Event)),
143 | zap.String("nodename", e.Node.Name),
144 | zap.String("nodeaddress", e.Node.Address()),
145 | zap.String("meta", string(e.Node.Meta)),
146 | )...,
147 | )
148 | d.nodes.Delete(e.Node.Name)
149 | d.g.aliveInstance.updateInstance(meta, false)
150 | }
151 | }
152 | }
153 | })
154 | }
155 |
--------------------------------------------------------------------------------
/gossip/message.go:
--------------------------------------------------------------------------------
1 | package gossip
2 |
3 | import (
4 | "bytes"
5 | "compress/gzip"
6 | "encoding/json"
7 | "io"
8 |
9 | "github.com/hashicorp/go-msgpack/codec"
10 | "github.com/hashicorp/memberlist"
11 | )
12 |
13 | // messageType are the types of gossip messages will send along
14 | // memberlist.
15 | type messageType uint8
16 |
17 | const (
18 | messageClusterType messageType = iota
19 | messageMembershipType
20 | messagePushPullType
21 | )
22 |
23 | type TargetClusterId struct {
24 | GrpcAddr string `json:"grpcAddr"`
25 | ClusterIds []uint64 `json:"clusterIds"`
26 | }
27 |
28 | type RaftClusterMessage struct {
29 | Revision int64 `json:"revision"`
30 |
31 | // 机器ID对应的MoveTo的GRPC地址
32 | // key: 当raft以nodehostid=true的方式起的时候是机器ID,以固定地址方式起是raftAddr
33 | Targets map[string]TargetClusterId `json:"targets"`
34 |
35 | // 每个raft cluster对应的机器ID|raftAddr
36 | Clusters map[uint64][]string `json:"clusters"`
37 |
38 | // 每个raft cluster的initial members
39 | // key: clusterId, key: nodeId, val: raftAddr或nodeHostID
40 | InitialMembers map[uint64]map[uint64]string `json:"initial_members"`
41 | Join map[uint64]map[uint64]bool `json:"join"`
42 | }
43 |
44 | func (rm *RaftClusterMessage) String() string {
45 | b, _ := json.Marshal(rm)
46 | return string(b)
47 | }
48 |
49 | type MemberInfo struct {
50 | ClusterId uint64
51 | ConfigChangeId uint64
52 | Nodes map[uint64]string
53 | Observers map[uint64]string
54 | LeaderId uint64
55 | LeaderValid bool
56 | }
57 |
58 | func (mi *MemberInfo) String() string {
59 | b, _ := json.Marshal(mi)
60 | return string(b)
61 | }
62 |
63 | type RaftMembershipMessage struct {
64 | // key: clusterId
65 | MemberInfos map[uint64]*MemberInfo
66 | }
67 |
68 | func (rm *RaftMembershipMessage) String() string {
69 | b, _ := json.Marshal(rm)
70 | return string(b)
71 | }
72 |
73 | type PushPullMessage struct {
74 | Cluster *RaftClusterMessage
75 | Membership *RaftMembershipMessage
76 | }
77 |
78 | func (pp *PushPullMessage) String() string {
79 | b, _ := json.Marshal(pp)
80 | return string(b)
81 | }
82 |
83 | func decodeMessage(buf []byte, out interface{}) error {
84 | bbuf, err := GZipDecode(buf)
85 | if err != nil {
86 | return err
87 | }
88 |
89 | var handle codec.MsgpackHandle
90 | return codec.NewDecoder(bytes.NewReader(bbuf), &handle).Decode(out)
91 | }
92 |
93 | func encodeMessage(t messageType, msg interface{}) ([]byte, error) {
94 | buf := bytes.NewBuffer(nil)
95 |
96 | handle := codec.MsgpackHandle{}
97 | encoder := codec.NewEncoder(buf, &handle)
98 | err := encoder.Encode(msg)
99 | if err != nil {
100 | return nil, err
101 | }
102 |
103 | gbuf, err := GZipEncode(buf.Bytes())
104 | if err != nil {
105 | return nil, err
106 | }
107 |
108 | return append([]byte{uint8(t)}, gbuf...), nil
109 | }
110 |
111 | type broadcast struct {
112 | msg []byte
113 | notify chan<- struct{}
114 | }
115 |
116 | func newBroadcast(msg []byte) *broadcast {
117 | return &broadcast{
118 | msg: msg,
119 | notify: make(chan struct{}),
120 | }
121 | }
122 |
123 | func (b *broadcast) Invalidates(other memberlist.Broadcast) bool {
124 | return false
125 | }
126 |
127 | func (b *broadcast) Message() []byte {
128 | return b.msg
129 | }
130 |
131 | func (b *broadcast) Finished() {
132 | if b.notify != nil {
133 | close(b.notify)
134 | }
135 | }
136 |
137 | func GZipEncode(content []byte) ([]byte, error) {
138 | var buffer bytes.Buffer
139 | writer := gzip.NewWriter(&buffer)
140 | if _, err := writer.Write(content); err != nil {
141 | return nil, err
142 | }
143 |
144 | if err := writer.Flush(); err != nil {
145 | return nil, err
146 | }
147 |
148 | if err := writer.Close(); err != nil {
149 | return nil, err
150 | }
151 |
152 | return buffer.Bytes(), nil
153 | }
154 |
155 | func GZipDecode(buf []byte) ([]byte, error) {
156 | reader, err := gzip.NewReader(bytes.NewReader(buf))
157 | if err != nil {
158 | return nil, err
159 | }
160 | defer reader.Close()
161 |
162 | return io.ReadAll(reader)
163 | }
164 |
--------------------------------------------------------------------------------
/gossip/ping_delegate.go:
--------------------------------------------------------------------------------
1 | package gossip
2 |
3 | import (
4 | "bytes"
5 | "time"
6 |
7 | "github.com/xkeyideal/mraft/gossip/coordinate"
8 |
9 | "github.com/hashicorp/go-msgpack/codec"
10 | "github.com/hashicorp/memberlist"
11 | "go.uber.org/zap"
12 | )
13 |
14 | // pingDelegate is notified when memberlist successfully completes a direct ping
15 | // of a peer node. We use this to update our estimated network coordinate, as
16 | // well as cache the coordinate of the peer.
17 | type pingDelegate struct {
18 | g *GossipManager
19 | }
20 |
21 | const (
22 | // PingVersion is an internal version for the ping message, above the normal
23 | // versioning we get from the protocol version. This enables small updates
24 | // to the ping message without a full protocol bump.
25 | PingVersion = 1
26 | )
27 |
28 | // AckPayload is called to produce a payload to send back in response to a ping
29 | // request.
30 | func (p *pingDelegate) AckPayload() []byte {
31 | var buf bytes.Buffer
32 |
33 | // The first byte is the version number, forming a simple header.
34 | version := []byte{PingVersion}
35 | buf.Write(version)
36 |
37 | // The rest of the message is the serialized coordinate.
38 | enc := codec.NewEncoder(&buf, &codec.MsgpackHandle{})
39 | if err := enc.Encode(p.g.coordClient.GetCoordinate()); err != nil {
40 | p.g.log.Error("[multiraft] [self-gossip-user] [ping-delegate] [AckPayload] [encode]", zap.Error(err))
41 | }
42 | return buf.Bytes()
43 | }
44 |
45 | // NotifyPingComplete is called when this node successfully completes a direct ping
46 | // of a peer node.
47 | func (p *pingDelegate) NotifyPingComplete(other *memberlist.Node, rtt time.Duration, payload []byte) {
48 | if payload == nil || len(payload) == 0 {
49 | return
50 | }
51 |
52 | // Verify ping version in the header.
53 | version := payload[0]
54 | if version != PingVersion {
55 | p.g.log.Error("[multiraft] [self-gossip-user] [ping-delegate] [NotifyPingComplete] [version]", zap.Uint8("version", version))
56 | return
57 | }
58 |
59 | // Process the remainder of the message as a coordinate.
60 | r := bytes.NewReader(payload[1:])
61 | dec := codec.NewDecoder(r, &codec.MsgpackHandle{})
62 | var coord coordinate.Coordinate
63 | if err := dec.Decode(&coord); err != nil {
64 | p.g.log.Error("[multiraft] [self-gossip-user] [ping-delegate] [NotifyPingComplete] [decode]", zap.Error(err))
65 | return
66 | }
67 |
68 | // Apply the update.
69 | before := p.g.coordClient.GetCoordinate()
70 | after, err := p.g.coordClient.Update(other.Name, &coord, rtt)
71 | if err != nil {
72 | //metrics.IncrCounter([]string{"serf", "coordinate", "rejected"}, 1)
73 | p.g.log.Error("[multiraft] [self-gossip-user] [ping-delegate] [NotifyPingComplete] [rejected]", zap.Error(err))
74 | return
75 | }
76 |
77 | // Publish some metrics to give us an idea of how much we are
78 | // adjusting each time we update.
79 | d := float32(before.DistanceTo(after).Seconds() * 1.0e3)
80 | //metrics.AddSample([]string{"serf", "coordinate", "adjustment-ms"}, d)
81 | if d >= 100.0 {
82 | p.g.log.Warn("[multiraft] [self-gossip-user] [ping-delegate] [NotifyPingComplete] [DistanceTo]",
83 | zap.String("src", p.g.opts.Name),
84 | zap.String("dest", other.Name),
85 | zap.Int64("rtt", int64(rtt)),
86 | zap.Float32("adjustment-ms", d),
87 | )
88 | } else {
89 | p.g.log.Info("[multiraft] [self-gossip-user] [ping-delegate] [NotifyPingComplete] [DistanceTo]",
90 | zap.String("src", p.g.opts.Name),
91 | zap.String("dest", other.Name),
92 | zap.Int64("rtt", int64(rtt)),
93 | zap.Float32("adjustment-ms", d),
94 | )
95 | }
96 |
97 | // Cache the coordinate for the other node, and add our own
98 | // to the cache as well since it just got updated. This lets
99 | // users call GetCachedCoordinate with our node name, which is
100 | // more friendly.
101 | p.g.coordCacheLock.Lock()
102 | p.g.coordCache[other.Name] = &coord
103 | p.g.coordCache[p.g.opts.Name] = p.g.coordClient.GetCoordinate()
104 | p.g.coordCacheLock.Unlock()
105 | }
106 |
--------------------------------------------------------------------------------
/logger/zaplog.go:
--------------------------------------------------------------------------------
1 | package logger
2 |
3 | import (
4 | "os"
5 | "time"
6 |
7 | "go.uber.org/zap"
8 | "go.uber.org/zap/zapcore"
9 | "gopkg.in/natefinch/lumberjack.v2"
10 | )
11 |
12 | var (
13 | cstLocal *time.Location
14 | err error
15 | )
16 |
17 | func init() {
18 | cstLocal, err = time.LoadLocation("Asia/Shanghai")
19 | if err != nil {
20 | panic(err)
21 | }
22 | }
23 |
24 | func encodeTimeLayout(t time.Time, layout string, enc zapcore.PrimitiveArrayEncoder) {
25 | type appendTimeEncoder interface {
26 | AppendTimeLayout(time.Time, string)
27 | }
28 |
29 | if enc, ok := enc.(appendTimeEncoder); ok {
30 | enc.AppendTimeLayout(t, layout)
31 | return
32 | }
33 |
34 | enc.AppendString(t.Format(layout))
35 | }
36 |
37 | func CSTTimeEncoder(t time.Time, enc zapcore.PrimitiveArrayEncoder) {
38 | encodeTimeLayout(t.In(cstLocal), "2006-01-02 15:04:05.000", enc)
39 | }
40 |
41 | func NewLogger(logFilename string, level zapcore.Level, stdout bool) *zap.Logger {
42 | encoderConfig := zapcore.EncoderConfig{
43 | TimeKey: "time",
44 | LevelKey: "level",
45 | NameKey: "logger",
46 | CallerKey: "caller",
47 | MessageKey: "msg",
48 | StacktraceKey: "stacktrace",
49 | LineEnding: zapcore.DefaultLineEnding,
50 | EncodeLevel: zapcore.CapitalLevelEncoder, // 小写编码器
51 | EncodeTime: CSTTimeEncoder,
52 | EncodeDuration: zapcore.SecondsDurationEncoder,
53 | EncodeCaller: zapcore.FullCallerEncoder, // 全路径编码器
54 | }
55 |
56 | hook := lumberjack.Logger{
57 | Filename: logFilename, // 日志文件路径
58 | MaxSize: 512, // 每个日志文件保存的最大尺寸 单位:M
59 | MaxBackups: 300, // 日志文件最多保存多少个备份
60 | MaxAge: 30, // 文件最多保存多少天
61 | Compress: true, // 是否压缩
62 | }
63 |
64 | // 设置日志级别
65 | atomicLevel := zap.NewAtomicLevel()
66 | atomicLevel.SetLevel(level)
67 |
68 | writeSyncer := []zapcore.WriteSyncer{zapcore.AddSync(&hook)}
69 | if stdout {
70 | writeSyncer = append(writeSyncer, zapcore.Lock(os.Stdout))
71 | }
72 |
73 | core := zapcore.NewCore(
74 | zapcore.NewConsoleEncoder(encoderConfig), // 编码器配置
75 | zapcore.NewMultiWriteSyncer(writeSyncer...), // 打印到控制台和文件
76 | atomicLevel, // 日志级别
77 | )
78 |
79 | logger := zap.New(core)
80 |
81 | return logger
82 | }
83 |
--------------------------------------------------------------------------------
/productready/README.md:
--------------------------------------------------------------------------------
1 | ### 启动方式
2 |
3 | 程序启动入口: productready/main/app.go httpPort, raftPort
4 |
5 | ### 启动配置项
6 |
7 | ```go
8 | type DynamicConfig struct {
9 | // raft数据存储目录
10 | RaftDir string `json:"raftDir"`
11 |
12 | // 日志存储目录
13 | LogDir string `json:"logDir"`
14 |
15 | // 每个raft节点的Id,一旦生成且加入集群,再次启动后,不能变动
16 | NodeId uint64 `json:"nodeId"`
17 |
18 | // key: clusterId, key:nodeId
19 | Join map[uint64]map[uint64]bool `json:"join"`
20 |
21 | // key: clusterId, key:nodeId
22 | // val: 根据dragonboat的启动方式决定
23 | // gossip方式: NodeHostId
24 | // 常规方式: raftAddr
25 | InitialMembers map[uint64]map[uint64]string `json:"initial_members"`
26 |
27 | // 本机的地址
28 | IP string `json:"ip"`
29 |
30 | // raft port
31 | RaftPort uint16 `json:"raft_port"`
32 |
33 | // http port
34 | HttpPort uint16 `json:"http_port"`
35 | }
36 | ```
37 |
38 | ### dragonboat raft的启动方式
39 |
40 | 该raft框架的启动方式有两种
41 |
42 | 1. 采用常规的ip:port方式,此方式限制了重启后ip:port不能发生改变
43 | 2. 采用gossip方式启动,此方式只需要保证raft config里的
44 | ```go
45 | Expert: config.ExpertConfig{
46 | TestNodeHostID: nodeId,
47 | },
48 | ```
49 | TestNodeHostID不变即可,可以实现重启后的ip改动,但数据文件和raft文件不能丢失,本质上仅支持机器换ip
50 |
51 | 本示例采用的是第一种方式.
52 |
53 | raft集群里的每台集群管理的`clusterIds`不一定必须完全一致,即每台机器没必要存储全量的数据,可以写一个管理端程序来管理clusterId的每台机器分配情况。
54 | 如果每台机器不存全量的`clusterIds`,那么业务请求的key到来,可能该机器并不存在此key的value,解决办法可以采用该机器找到该key实际存在在哪来机器,
55 | 然后帮助业务完成请求并返回。
56 |
57 | ### 本示例目前无法直接启动
58 |
59 | 由于需要配置`DynamicConfig` 里的 `Join` 和 `InitialMembers` 字段后才能顺利启动,本人使用的真实生产环境是将启动配置写入服务器文件,每次重启时读取该文件获取上述必备的启动数据;如果不存在本地文件,则等待管理端推送该份配置。
60 |
61 | 因此想启动,可以先研究一下代码,然后将 `Join` 和 `InitialMembers` 字段写死在`productready/main/app.go`文件的配置里,然后尝试启动
62 |
63 | 刚开始第一步,将所有的`clusterId`和`nodeId`全部以原始节点启动,目前在`productready/engine.go`里系统是将所有`clusterIds`写死的`clusterIds = []uint64{0, 1, 2}`,真实的生产环境`clusterIds`的个数也是预先配置好的,均存储在管理端程序中。
64 |
65 | 如果节点是以`join`的方式加入,那么先启动该节点,然后调用集群接口,将此节点加入进集群,然后重新生成 `Join` 和 `InitialMembers` 字段,推送给新节点,等待即可;该份新配置也应该推送给集群中原本以存在的机器,本人真实使用的情况是对此份配置加上版本号来进行控制的。
66 |
67 | **根据[dragonboat](https://github.com/lni/dragonboat/blob/master/docs/overview.CHS.md)节点启动的文档,当一个节点重启时,不论该节点是一个初始节点还是后续通过成员变更添加的节点,均无需再次提供初始成员信息,也不再需要设置join参数为true**。
68 |
69 | 配置解释:
70 |
71 | 1. raft集群中每个节点ID(NodeID),采用该节点IP+port生成的48位uint64整型值
72 | 2. 若未配置raftPort,则控制中心采用默认的raftPort端口 `13890`启动
73 | 3. 样例提供的http端口采用raftPort的整型值加1作为HttpPort,无需用户配置
74 | 4. `join`字段,每个clusterId下对应存在哪些nodeId,且这些nodeId是集群的原始节点或后加入的节点
75 | 5. `initial_members`字段,告知集群每个clusterId下对应存在哪些nodeId,每个nodeId的raftAddr
76 |
77 | ### 节点加入raft集群
78 |
79 | 根据上述配置的说明,当节点主动加入raft集群时节点无需配置上述配置里的参数 `native` 和 `raftPeers`,
80 | 以在集群里的节点不能加入,**之前被删除的节点不能再次加入集群**
81 |
82 | 1. 调用样例加入节点的http接口,通知控制中心的raft集群,有新的节点需要加入集群, 待接口返回加入集群成功
83 | 2. 启动待加入集群的节点,此时原raft集群会自动寻址该新节点并同步数据
84 | 3. 新节点成功加入集群后,请立即去webapi的控制面板里修改raft集群的节点数据
85 |
86 | ### NodeHostConfig
87 |
88 | ```
89 | config.NodeHostConfig{
90 | // DeploymentID用于确定两个NodeHost实例是否属于同一部署,并因此允许彼此通信。
91 | // 通过将上下文消息发送到不相关的Raft节点,这有助于防止意外配置错误的NodeHost实例导致数据损坏错误。
92 | // 对于特定的基于Dragonboat的应用程序,可以在所有生产NodeHost实例上将DeploymentID设置为相同的uint64值,
93 | // 然后在登台和开发环境中使用不同的DeploymentID值。 对于不同的基于Dragonboat的应用程序,也建议使用不同的DeploymentID值。
94 | // 如果未设置,则默认值0将用作部署ID,从而允许所有具有部署ID 0的NodeHost实例相互通信。
95 | DeploymentID: deploymentId,
96 |
97 | // WALDir是用于存储所有Raft日志的WAL的目录,这仅用于存储Raft日志的WAL,它的大小通常很小,
98 | // 每个NodeHost的64GB通常绰绰有余。如果不设置,则所有内容会存储在NodeHostDir中
99 | WALDir: raftDir,
100 |
101 | // NodeHostDir存储所有需要存储的信息
102 | NodeHostDir: raftDir,
103 |
104 | // RTTMillisecond定义了两个NodeHost实例之间的平均往返时间(RTT),以毫秒为单位
105 | // 这样的RTT间隔在内部用作逻辑时钟滴答,raft的心跳和选举间隔都根据有多少这样的RTT间隔来定义
106 | // 请注意,RTTMillisecond是两个NodeHost实例之间的组合延迟,包括由网络传输引起的所有延迟,NodeHost排队和处理引起的所有延迟。
107 | // 例如,在满载时,我们用于基准测试的两个NodeHost实例之间的平均往返时间最多为500微秒,而它们之间的ping时间为100微秒。
108 | // 当您的环境中的RTTMillisecond小于1百万时,请将其设置为1。
109 | RTTMillisecond: 200,
110 |
111 | //当前节点对外的IP和端口,其他raft节点需要通过这个信息获得
112 | RaftAddress: addr,
113 |
114 | // ListenAddress是Raft RPC模块用于侦听Raft消息和快照的IP:端口地址。
115 | // 如果未设置ListenAddress字段,则Raft RPC模块将使用RaftAddress。
116 | // 如果将0.0.0.0指定为ListenAddress的IP,则Dragonboat将侦听所有接口上的指定端口。
117 | // 指定主机名或域名后,它将首先在本地解析为IP地址,而Dragonboat会侦听所有解析的IP地址。
118 | // 一般不指定这个,和RaftAddress保持一致就好了,收发就用一个端口,没有必要分开
119 | ListenAddress: listenAddr,
120 |
121 | //是否使用TLS进行安全认证,整个程序都是部署在内网中,可以认为是安全的,就不打开这个了
122 | MutualTLS: false,
123 |
124 | //当配置了TLS时,需要指定CA文件的地址
125 | //当配置了TLS时,需要指定CertFile的地址
126 | //CertFile string
127 | //当配置了TLS时,需要指定KeyFile的地址
128 | //KeyFile string
129 | //MaxReceiveQueueSize是每个接收队列的最大大小(以字节为单位)。 一旦达到最大大小,将删除更多复制消息以限制内存使用。 设置为0时,表示队列大小不受限制。
130 | //暂时先设置为128M
131 | MaxSendQueueSize: 128 * 1024 * 1024,
132 |
133 | // EnableMetrics确定是否应启用Prometheus格式的健康度量。
134 | EnableMetrics: false,
135 |
136 | //MaxSnapshotSendBytesPerSecond定义了NodeHost实例管理的所有Raft群集每秒可发送多少快照数据。默认值0表示没有为快照流设置限制。
137 | //每秒最多传输256M数据
138 | MaxSnapshotSendBytesPerSecond: 256 * 1024 * 1024,
139 |
140 | // MaxSnapshotRecvBytesPerSecond定义可以存储多少快照数据由NodeHost实例管理的所有Raft群集每秒收到一次。默认值0表示接收快照数据没有限制。
141 | //目前不限制接受的大小,由发送端决定
142 | MaxSnapshotRecvBytesPerSecond: 0,
143 | }
144 | ```
145 |
146 | ### RaftConfig
147 |
148 | ```
149 | config.Config{
150 | //当前节点的ID
151 | NodeID: nodeId,
152 |
153 | //当前节点的分片ID,如果当前raft是多组的,那么这个地方是指定当前组的ID
154 | ClusterID: clusterId,
155 |
156 | //领导节点是否应定期检查非领导者节点的状态,并在其不再具有法定人数时退出成为跟随者节点
157 | //当有5台机器,挂了3台,法定人数不够,则主节点退出,不再是主节点了,所有的写操作和同步读操作应该都不能执行了
158 | //各个节点只能读取本地的数据
159 | CheckQuorum: false,
160 |
161 | // ElectionRTT是两次选举之间的消息RTT的最小数量。 消息RTT由NodeHostConfig.RTTMillisecond定义。
162 | // Raft论文建议其幅度大于HeartbeatRTT(因为是先发现不健康,才会进行选举),即两个心跳之间的间隔。
163 | // 在Raft中,选举之间的实际间隔被随机分配在ElectionRTT和2 * ElectionRTT之间。例如,假设NodeHostConfig.RTTMillisecond为100毫秒,
164 | // 要将选举间隔设置为1秒,则应该将ElectionRTT设置为10。启用CheckQuorum后,ElectionRTT还将定义检查领导者定额的时间间隔。
165 | // 这个值是个比例,具体的RTT时间大小是RTTMillisecond*ElectionRTT,当需要选举主节点时,各个节点的随机间隔在ElectionRTT和2 * ElectionRTT,
166 | // 当CheckQuorum为true,主也会每隔这个时间检查下从机数据是否符合法定人数
167 | ElectionRTT: 60,
168 |
169 | // HeartbeatRTT是两次心跳之间的消息RTT数。 消息RTT由NodeHostConfig.RTTMillisecond定义。 Raft论文建议心跳间隔应接近节点之间的平均RTT。
170 | // 例如,假设NodeHostConfig.RTTMillisecond为100毫秒,要将心跳间隔设置为每200毫秒,则应将HeartbeatRTT设置为2。
171 | HeartbeatRTT: 6,
172 |
173 | // SnapshotEntries定义应自动对状态机进行快照的频率,可以将SnapshotEntries设置为0以禁用此类自动快照。
174 | // 当SnapshotEntries设置为N时,意味着大约每N条Raft日志创建一个快照。这也意味着向跟踪者发送N个日志条目比发送快照要昂贵。
175 | // 生成快照后,可以压缩新快照覆盖的Raft日志条目。这涉及两个步骤,冗余日志条目首先被标记为已删除,然后在稍后发布 LogDB 压缩时将其从基础存储中物理删除。
176 | // 有关在生成快照后实际删除和压缩哪些日志条目的详细信息,请参见CompactionOverhead,通过将SnapshotEntries字段设置为0禁用自动快照后,
177 | // 用户仍然可以使用NodeHost的RequestSnapshot或SyncRequestSnapshot方法手动请求快照。
178 | SnapshotEntries: 25 * 10000 * 10,
179 |
180 | // CompactionOverhead定义每次Raft日志压缩后要保留的最新条目数。
181 | // 假设当前的日志为10000,开始创建快照,那么快照创建完成后,<=10000的日志都会被清理,
182 | // 如果想获得9000这样的日志,那么就得先完全加载快照,再从快照中读取,如果设置了CompactionOverhead为3000,
183 | // 那么就算创建了快照,我们仍然能获得10000-7000之间的日志记录,只有小于7000的,才需要重新加载日志获取
184 | CompactionOverhead: 25 * 10000,
185 |
186 | //确定是否使用ChangeID的顺序强制执行Raft成员资格更改。
187 | OrderedConfigChange: true,
188 |
189 | // MaxInMemLogSize是允许在每个Raft节点上的Raft日志存储在内存中的目标大小(以字节为单位)。 内存中的筏日志是尚未应用的日志。
190 | // MaxInMemLogSize是为防止内存无限增长而实现的目标值,并非用于精确限制确切的内存使用量。
191 | // 当MaxInMemLogSize为0时,目标设置为math.MaxUint64。 设置MaxInMemLogSize并达到目标后,客户端尝试提出新建议时将返回错误。
192 | // 建议将MaxInMemLogSize大于要使用的最大建议。
193 | //内存中未应用的日志大小,暂定为256M,超过256M的大小后会返回错误
194 | MaxInMemLogSize: 256 * 1024 * 1024,
195 |
196 | // SnapshotCompressionType是用于压缩生成的快照数据的压缩类型。 默认情况下不使用压缩。
197 | // 快照数据本身由rocksdb生成,采用了LZ4压缩,所以这边就不再继续压缩了
198 | SnapshotCompressionType: config.NoCompression,
199 |
200 | // EntryCompressionType是用于压缩用户日志。 使用Snappy时,允许的最大建议有效负载大致限制为3.42GB。
201 | EntryCompressionType: config.Snappy,
202 |
203 | // DisableAutoCompactions禁用用于回收Raft条目存储空间的自动压缩。
204 | // 默认情况下,每次捕获快照时都会执行压缩,这有助于以较高的IO开销为代价,尽快回收磁盘空间。
205 | // 用户可以禁用此类自动压缩,并在必要时使用NodeHost.RequestCompaction手动请求此类压缩。
206 | DisableAutoCompactions: false,
207 |
208 | // IsObserver指示当前节点是否是Observer节点,(观察者节点通常用于允许新节点加入群集并追赶其他日志,而不会影响可用性。 还可以引入额外的观察者节点来满足只读请求,而不会影响系统的写吞吐量)
209 | IsObserver: false,
210 |
211 | // IsWitness指示这是否是没有实际日志复制且没有状态机的见证Raft节点,见证节点支持目前处于试验阶段。
212 | IsWitness: false,
213 |
214 | //停顿指定在没有群集活动时是否让Raft群集进入停顿模式。 静默模式下的群集不交换心跳消息以最小化带宽消耗。当前处于试验阶段
215 | Quiesce: false,
216 | }
217 | ```
--------------------------------------------------------------------------------
/productready/config/config.go:
--------------------------------------------------------------------------------
1 | package config
2 |
3 | type DynamicConfig struct {
4 | // raft数据存储目录
5 | RaftDir string `json:"raftDir"`
6 |
7 | // 日志存储目录
8 | LogDir string `json:"logDir"`
9 |
10 | // 每个raft节点的Id,一旦生成且加入集群,再次启动后,不能变动
11 | NodeId uint64 `json:"nodeId"`
12 |
13 | // key: clusterId, key:nodeId
14 | Join map[uint64]map[uint64]bool `json:"join"`
15 |
16 | // key: clusterId, key:nodeId
17 | // val: 根据dragonboat的启动方式决定
18 | // gossip方式: NodeHostId
19 | // 常规方式: raftAddr
20 | InitialMembers map[uint64]map[uint64]string `json:"initial_members"`
21 |
22 | // 本机的地址
23 | IP string `json:"ip"`
24 |
25 | // raft port
26 | RaftPort uint16 `json:"raft_port"`
27 |
28 | // http port
29 | HttpPort uint16 `json:"http_port"`
30 | }
31 |
--------------------------------------------------------------------------------
/productready/engine.go:
--------------------------------------------------------------------------------
1 | package productready
2 |
3 | import (
4 | "context"
5 | "fmt"
6 | "log"
7 | "net/http"
8 | "time"
9 |
10 | "github.com/xkeyideal/mraft/productready/config"
11 | "github.com/xkeyideal/mraft/productready/httpd"
12 | "github.com/xkeyideal/mraft/productready/storage"
13 | "go.uber.org/zap/zapcore"
14 |
15 | "github.com/gin-gonic/gin"
16 | )
17 |
18 | type Engine struct {
19 | prefix string
20 |
21 | server *http.Server
22 | router *gin.Engine
23 |
24 | raftStorage *storage.Storage
25 |
26 | kvHandle *httpd.KVHandle
27 | }
28 |
29 | var (
30 | clusterIds = []uint64{0, 1, 2}
31 | )
32 |
33 | func NewEngine(cfg *config.DynamicConfig) *Engine {
34 | raftCfg := &storage.RaftConfig{
35 | LogDir: cfg.LogDir,
36 | LogLevel: zapcore.DebugLevel,
37 | HostIP: cfg.IP,
38 | NodeId: cfg.NodeId,
39 | ClusterIds: clusterIds,
40 | RaftAddr: fmt.Sprintf("%s:%d", cfg.IP, cfg.RaftPort),
41 | MultiGroupSize: uint32(len(clusterIds)),
42 | StorageDir: cfg.RaftDir,
43 | Join: cfg.Join,
44 | InitialMembers: cfg.InitialMembers,
45 | // Gossip: metadata.Gossip,
46 | // GossipPort: metadata.GossipPort,
47 | // GossipSeeds: metadata.GossipSeeds,
48 | Metrics: false,
49 | // BindAddress: fmt.Sprintf("%s:%d", engine.cfg.IP, metadata.GossipConfig.BindPort),
50 | // BindPort: uint16(metadata.GossipConfig.BindPort),
51 | // Seeds: metadata.GossipConfig.Seeds,
52 | }
53 |
54 | raftStorage, err := storage.NewStorage(raftCfg)
55 | if err != nil {
56 | log.Fatal(err)
57 | }
58 |
59 | log.Println("raft started, waiting raft cluster ready")
60 |
61 | // 等待raft集群ready
62 | err = raftStorage.RaftReady()
63 | if err != nil {
64 | log.Fatalf("[ERROR] raft ready %s\n", err.Error())
65 | }
66 |
67 | router := gin.New()
68 | router.Use(gin.Recovery())
69 |
70 | engine := &Engine{
71 | prefix: "/raft",
72 | router: router,
73 | server: &http.Server{
74 | Addr: fmt.Sprintf("0.0.0.0:%s", cfg.HttpPort),
75 | Handler: router,
76 | ReadTimeout: 20 * time.Second,
77 | WriteTimeout: 40 * time.Second,
78 | },
79 | raftStorage: raftStorage,
80 | kvHandle: httpd.NewKVHandle("kvstorage", raftStorage),
81 | }
82 |
83 | engine.registerRouter(router)
84 |
85 | go func() {
86 | if err := engine.server.ListenAndServe(); err != nil {
87 | panic(err.Error())
88 | }
89 | }()
90 |
91 | return engine
92 | }
93 |
94 | func (engine *Engine) Stop() {
95 | if engine.server != nil {
96 | if err := engine.server.Shutdown(context.Background()); err != nil {
97 | fmt.Println("Server Shutdown: ", err)
98 | }
99 | }
100 |
101 | engine.raftStorage.StopRaftNode()
102 | }
103 |
--------------------------------------------------------------------------------
/productready/httpd/handle.go:
--------------------------------------------------------------------------------
1 | package httpd
2 |
3 | import (
4 | "context"
5 | "fmt"
6 | "net/http"
7 | "time"
8 |
9 | "github.com/xkeyideal/mraft/productready/storage"
10 | "github.com/xkeyideal/mraft/productready/utils"
11 |
12 | "github.com/gin-gonic/gin"
13 | )
14 |
15 | type KVHandle struct {
16 | cf string
17 | raftStorage *storage.Storage
18 | }
19 |
20 | func NewKVHandle(cf string, raftStorage *storage.Storage) *KVHandle {
21 | return &KVHandle{
22 | cf: cf,
23 | raftStorage: raftStorage,
24 | }
25 | }
26 |
27 | func (mh *KVHandle) Info(c *gin.Context) {
28 | ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
29 | defer cancel()
30 |
31 | info, _ := mh.raftStorage.GetMembership(ctx)
32 | utils.SetStrResp(http.StatusOK, 0, "", info, c)
33 | }
34 |
35 | func (mh *KVHandle) Query(c *gin.Context) {
36 | key := c.Query("key")
37 | sync := c.Query("sync")
38 |
39 | ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
40 | defer cancel()
41 |
42 | val, err := mh.raftStorage.Get(ctx, mh.cf, key, sync == "true", []byte(key))
43 | if err != nil {
44 | utils.SetStrResp(http.StatusBadRequest, -1, err.Error(), "", c)
45 | return
46 | }
47 |
48 | utils.SetStrResp(http.StatusOK, 0, "", string(val), c)
49 | }
50 |
51 | func (mh *KVHandle) Upsert(c *gin.Context) {
52 | key := c.Query("key")
53 | val := c.Query("val")
54 |
55 | ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
56 | defer cancel()
57 |
58 | err := mh.raftStorage.Put(ctx, mh.cf, key, []byte(key), []byte(val))
59 | if err != nil {
60 | utils.SetStrResp(http.StatusBadRequest, -1, err.Error(), "", c)
61 | return
62 | }
63 |
64 | utils.SetStrResp(http.StatusOK, 0, "", "OK", c)
65 | }
66 |
67 | func (mh *KVHandle) Delete(c *gin.Context) {
68 | key := c.Query("key")
69 |
70 | ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
71 | defer cancel()
72 |
73 | err := mh.raftStorage.Del(ctx, mh.cf, key, []byte(key))
74 | if err != nil {
75 | utils.SetStrResp(http.StatusBadRequest, -1, err.Error(), "", c)
76 | return
77 | }
78 |
79 | utils.SetStrResp(http.StatusOK, 0, "", "OK", c)
80 | }
81 |
82 | func (mh *KVHandle) JoinNode(c *gin.Context) {
83 | nodeAddr := c.Query("addr")
84 |
85 | raftAddrs := mh.raftStorage.GetNodeHost()
86 | for _, raftAddr := range raftAddrs {
87 | if nodeAddr == raftAddr {
88 | utils.SetStrResp(http.StatusOK, 1, fmt.Sprintf("%s 待加入的节点已经在集群raft节点中", nodeAddr), "OK", c)
89 | }
90 | }
91 |
92 | err := mh.raftStorage.AddRaftNode(utils.Addr2RaftNodeID(nodeAddr), nodeAddr)
93 | if err != nil {
94 | utils.SetStrResp(http.StatusBadRequest, -1, err.Error(), "", c)
95 | return
96 | }
97 | utils.SetStrResp(http.StatusOK, 0, "", "OK", c)
98 | }
99 |
100 | func (mh *KVHandle) DelNode(c *gin.Context) {
101 | nodeAddr := c.Query("addr")
102 |
103 | err := mh.raftStorage.RemoveRaftNode(utils.Addr2RaftNodeID(nodeAddr))
104 | if err != nil {
105 | utils.SetStrResp(http.StatusBadRequest, -1, err.Error(), "", c)
106 | return
107 | }
108 | utils.SetStrResp(http.StatusOK, 0, "", "OK", c)
109 | }
110 |
--------------------------------------------------------------------------------
/productready/ilogger/logger.go:
--------------------------------------------------------------------------------
1 | package ilogger
2 |
3 | import (
4 | "fmt"
5 | "path/filepath"
6 |
7 | zlog "github.com/xkeyideal/mraft/logger"
8 |
9 | "github.com/lni/dragonboat/v3/logger"
10 | "go.uber.org/zap"
11 | "go.uber.org/zap/zapcore"
12 | )
13 |
14 | const LogDir = "/tmp/logs/mraft/"
15 |
16 | type LoggerOptions struct {
17 | logDir string
18 | nodeId uint64
19 | target string
20 | }
21 |
22 | var Lo *LoggerOptions = &LoggerOptions{
23 | logDir: LogDir,
24 | }
25 |
26 | func (lo *LoggerOptions) SetLogDir(dir string) {
27 | lo.logDir = dir
28 | }
29 |
30 | func (lo *LoggerOptions) SetNodeId(nodeId uint64) {
31 | lo.nodeId = nodeId
32 | }
33 |
34 | func (lo *LoggerOptions) SetTarget(target string) {
35 | lo.target = target
36 | }
37 |
38 | func init() {
39 | logger.SetLoggerFactory(RaftFactory)
40 | logger.GetLogger("raft").SetLevel(logger.WARNING)
41 | logger.GetLogger("rsm").SetLevel(logger.ERROR)
42 | logger.GetLogger("transport").SetLevel(logger.WARNING)
43 | logger.GetLogger("gossip").SetLevel(logger.ERROR)
44 | logger.GetLogger("grpc").SetLevel(logger.ERROR)
45 | logger.GetLogger("dragonboat").SetLevel(logger.WARNING)
46 | logger.GetLogger("logdb").SetLevel(logger.ERROR)
47 | logger.GetLogger("raftpb").SetLevel(logger.ERROR)
48 | logger.GetLogger("config").SetLevel(logger.ERROR)
49 | logger.GetLogger("settings").SetLevel(logger.INFO)
50 | }
51 |
52 | type raftLogger struct {
53 | pkgName string
54 | logDir string
55 | log *zap.Logger
56 | }
57 |
58 | func NewRaftLogger(logDir, pkgName string, level zapcore.Level) *raftLogger {
59 | name := fmt.Sprintf("%s.log", pkgName)
60 | return &raftLogger{
61 | pkgName: pkgName,
62 | logDir: logDir,
63 | log: zlog.NewLogger(filepath.Join(logDir, name), level, false),
64 | }
65 | }
66 |
67 | func RaftFactory(pkgName string) logger.ILogger {
68 | return &raftLogger{
69 | logDir: Lo.logDir,
70 | pkgName: pkgName,
71 | }
72 | }
73 |
74 | var _ logger.ILogger = (*raftLogger)(nil)
75 |
76 | func (c *raftLogger) SetLevel(level logger.LogLevel) {
77 | var cl zapcore.Level
78 | if level == logger.CRITICAL {
79 | cl = zapcore.PanicLevel
80 | } else if level == logger.ERROR {
81 | cl = zapcore.ErrorLevel
82 | } else if level == logger.WARNING {
83 | cl = zapcore.WarnLevel
84 | } else if level == logger.INFO {
85 | cl = zapcore.InfoLevel
86 | } else if level == logger.DEBUG {
87 | cl = zapcore.DebugLevel
88 | } else {
89 | panic("unexpected level")
90 | }
91 |
92 | name := fmt.Sprintf("dragonboat-%s.log", c.pkgName)
93 | c.log = zlog.NewLogger(filepath.Join(c.logDir, name), cl, false)
94 | }
95 |
96 | func (c *raftLogger) fmsg() string {
97 | return "[multiraft] [" + c.pkgName + "]"
98 | }
99 |
100 | func (c *raftLogger) Debugf(format string, args ...interface{}) {
101 | c.log.Debug(c.fmsg(),
102 | zap.String("target", Lo.target),
103 | zap.Uint64("nodeId", Lo.nodeId),
104 | zap.String("msg", fmt.Sprintf(format, args...)),
105 | )
106 | }
107 |
108 | func (c *raftLogger) Infof(format string, args ...interface{}) {
109 | c.log.Info(c.fmsg(), zap.String("target", Lo.target),
110 | zap.Uint64("nodeId", Lo.nodeId),
111 | zap.String("msg", fmt.Sprintf(format, args...)),
112 | )
113 | }
114 |
115 | func (c *raftLogger) Warningf(format string, args ...interface{}) {
116 | c.log.Warn(c.fmsg(),
117 | zap.String("target", Lo.target),
118 | zap.Uint64("nodeId", Lo.nodeId),
119 | zap.String("msg", fmt.Sprintf(format, args...)),
120 | )
121 | }
122 |
123 | func (c *raftLogger) Errorf(format string, args ...interface{}) {
124 | c.log.Error(c.fmsg(),
125 | zap.String("target", Lo.target),
126 | zap.Uint64("nodeId", Lo.nodeId),
127 | zap.String("msg", fmt.Sprintf(format, args...)),
128 | )
129 | }
130 |
131 | func (c *raftLogger) Panicf(format string, args ...interface{}) {
132 | c.log.Panic(c.fmsg(),
133 | zap.String("target", Lo.target),
134 | zap.Uint64("nodeId", Lo.nodeId),
135 | zap.String("msg", fmt.Sprintf(format, args...)),
136 | )
137 | }
138 |
--------------------------------------------------------------------------------
/productready/main/app.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "log"
5 | "os"
6 | "os/signal"
7 | "strconv"
8 | "syscall"
9 |
10 | "github.com/xkeyideal/mraft/productready"
11 | "github.com/xkeyideal/mraft/productready/config"
12 | )
13 |
14 | var DevelopDefaultDynamicConfig = &config.DynamicConfig{
15 | RaftDir: "/Users/xkey/test/raftdata",
16 | LogDir: "/tmp/",
17 | IP: "127.0.0.1",
18 | }
19 |
20 | func main() {
21 | if len(os.Args) <= 2 {
22 | log.Fatal("input arg $1 httpPort, $2 raftPort")
23 | }
24 |
25 | config := DevelopDefaultDynamicConfig
26 |
27 | httpPort, err := strconv.ParseUint(os.Args[1], 10, 64)
28 | if err != nil {
29 | log.Fatal("[ERROR]", err)
30 | }
31 |
32 | config.HttpPort = uint16(httpPort)
33 |
34 | raftPort, err := strconv.ParseUint(os.Args[1], 10, 64)
35 | if err != nil {
36 | log.Fatal("[ERROR]", err)
37 | }
38 |
39 | config.RaftPort = uint16(raftPort)
40 |
41 | eg := productready.NewEngine(config)
42 |
43 | signals := make(chan os.Signal, 1)
44 | signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM, syscall.SIGKILL)
45 |
46 | log.Println(<-signals)
47 |
48 | eg.Stop()
49 | }
50 |
--------------------------------------------------------------------------------
/productready/router.go:
--------------------------------------------------------------------------------
1 | package productready
2 |
3 | import "github.com/gin-gonic/gin"
4 |
5 | func (engine *Engine) registerRouter(router *gin.Engine) {
6 | group := router.Group(engine.prefix)
7 | {
8 | group.GET("/info", engine.kvHandle.Info)
9 |
10 | group.GET("/key", engine.kvHandle.Query)
11 | group.POST("/key", engine.kvHandle.Upsert)
12 | group.DELETE("/key", engine.kvHandle.Delete)
13 |
14 | group.GET("/join", engine.kvHandle.JoinNode)
15 | group.GET("/del", engine.kvHandle.DelNode)
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/productready/storage/command.go:
--------------------------------------------------------------------------------
1 | package storage
2 |
3 | import (
4 | "bytes"
5 | "context"
6 | "errors"
7 | "strconv"
8 |
9 | "github.com/xkeyideal/mraft/productready/storage/store"
10 |
11 | "github.com/lni/dragonboat/v3"
12 | "github.com/lni/dragonboat/v3/client"
13 | "github.com/ugorji/go/codec"
14 | )
15 |
16 | var (
17 | revisionKey = []byte("__RAFT_KEY_REVISION__")
18 | )
19 |
20 | type CommandType byte
21 |
22 | const (
23 | DELETE CommandType = 0
24 | PUT CommandType = 1
25 | GET CommandType = 2
26 | )
27 |
28 | type WriteOptions struct {
29 | // 存储key时,此key的revision
30 | Revision uint64
31 | }
32 |
33 | func mergeWriteOptions(opts ...*WriteOptions) *WriteOptions {
34 | wo := &WriteOptions{}
35 | for _, opt := range opts {
36 | if opt == nil {
37 | continue
38 | }
39 |
40 | wo.Revision = opt.Revision
41 | }
42 |
43 | return wo
44 | }
45 |
46 | type RaftCommand interface {
47 | GetType() CommandType
48 | RaftInvoke(ctx context.Context, nh *dragonboat.NodeHost, clusterId uint64, session *client.Session) error
49 | LocalInvoke(s *store.Store, opts ...*WriteOptions) error
50 | GetResp() []byte
51 | }
52 |
53 | func Decode(buf []byte, e interface{}) error {
54 | handle := codec.MsgpackHandle{}
55 | return codec.NewDecoder(bytes.NewReader(buf), &handle).Decode(e)
56 | }
57 |
58 | func DecodeCmd(data []byte) (RaftCommand, error) {
59 | var cmd RaftCommand
60 | switch CommandType(data[0]) {
61 | case DELETE:
62 | cmd = &DelCommand{}
63 | case PUT:
64 | cmd = &PutCommand{}
65 | case GET:
66 | cmd = &GetCommand{}
67 | default:
68 | return nil, errors.New("can not find command type:" + strconv.Itoa(int(data[0])))
69 | }
70 |
71 | handle := codec.MsgpackHandle{}
72 | return cmd, codec.NewDecoder(bytes.NewReader(data[1:]), &handle).Decode(cmd)
73 | }
74 |
75 | func EncodeCmd(cmd RaftCommand) ([]byte, error) {
76 | buf := bytes.NewBuffer(nil)
77 | buf.WriteByte(byte(cmd.GetType()))
78 |
79 | handle := codec.MsgpackHandle{}
80 | encoder := codec.NewEncoder(buf, &handle)
81 | err := encoder.Encode(cmd)
82 | return buf.Bytes(), err
83 | }
84 |
85 | func syncWrite(ctx context.Context, nh *dragonboat.NodeHost, session *client.Session, cmd RaftCommand) ([]byte, error) {
86 | b, err := EncodeCmd(cmd)
87 | if err != nil {
88 | return nil, err
89 | }
90 |
91 | result, err := nh.SyncPropose(ctx, session, b)
92 | if err != nil {
93 | return nil, err
94 | }
95 |
96 | return result.Data, nil
97 | }
98 |
99 | func syncRead(ctx context.Context, nh *dragonboat.NodeHost, clusterId uint64, cmd RaftCommand) ([]byte, error) {
100 | b, err := EncodeCmd(cmd)
101 | if err != nil {
102 | return nil, err
103 | }
104 |
105 | result, err := nh.SyncRead(ctx, clusterId, b)
106 | if err != nil {
107 | return nil, err
108 | }
109 |
110 | return result.([]byte), nil
111 | }
112 |
113 | func buildRevisionKey(key []byte) []byte {
114 | return append(revisionKey, key...)
115 | }
116 |
--------------------------------------------------------------------------------
/productready/storage/config.go:
--------------------------------------------------------------------------------
1 | package storage
2 |
3 | import (
4 | "github.com/lni/dragonboat/v3/config"
5 | "go.uber.org/zap/zapcore"
6 | )
7 |
8 | const deploymentId = 2023082513
9 |
10 | type RaftConfig struct {
11 | LogDir string
12 | // 数据存储地址
13 | StorageDir string
14 | LogLevel zapcore.Level
15 |
16 | // 本机IP地址
17 | HostIP string
18 |
19 | // nodeId一旦生成不能变动
20 | NodeId uint64
21 |
22 | // 该节点被分配的clusterIds
23 | ClusterIds []uint64
24 |
25 | // raft通信地址
26 | RaftAddr string
27 |
28 | // 用于moveTo命令时对方raft节点的grpc端口
29 | // GrpcPort uint16
30 |
31 | // raft cluster的分组个数, 用于hashKey计算clusterId
32 | MultiGroupSize uint32
33 |
34 | // 是否以join的方式加入raft集群
35 | // key: clusterId, key:nodeId
36 | Join map[uint64]map[uint64]bool
37 |
38 | // 此参数需注意的是:
39 | // 采用gossip方式启动时val是nodehostId, 详细参考dragonboat的文档,
40 | // 初次可以使用dragonboat id.NewNodeHostID(id uint64)来生成;
41 | // 若采用raftAddr固定不变的方式启动,val就是raftAddr
42 | // key: clusterId, key:nodeId
43 | InitialMembers map[uint64]map[uint64]string
44 |
45 | // 如果raft集群采用gossip可变IP的方式启动需设置
46 | Gossip bool
47 | GossipPort uint16
48 | GossipSeeds []string
49 |
50 | // dragonboat 是否开启metrics
51 | Metrics bool
52 | }
53 |
54 | func buildNodeHostConfig(raftDir string, raftAddr string, metrics bool, re *raftEvent, se *systemEvent) config.NodeHostConfig {
55 | return config.NodeHostConfig{
56 | // DeploymentID用于确定两个NodeHost实例是否属于同一部署,并因此允许彼此通信。
57 | // 通过将上下文消息发送到不相关的Raft节点,这有助于防止意外配置错误的NodeHost实例导致数据损坏错误。
58 | // 对于特定的基于Dragonboat的应用程序,可以在所有生产NodeHost实例上将DeploymentID设置为相同的uint64值,
59 | // 然后在登台和开发环境中使用不同的DeploymentID值。 对于不同的基于Dragonboat的应用程序,也建议使用不同的DeploymentID值。
60 | // 如果未设置,则默认值0将用作部署ID,从而允许所有具有部署ID 0的NodeHost实例相互通信。
61 | DeploymentID: deploymentId,
62 |
63 | // WALDir是用于存储所有Raft日志的WAL的目录,这仅用于存储Raft日志的WAL,它的大小通常很小,
64 | // 每个NodeHost的64GB通常绰绰有余。如果不设置,则所有内容会存储在NodeHostDir中
65 | WALDir: raftDir,
66 |
67 | // NodeHostDir存储所有需要存储的信息
68 | NodeHostDir: raftDir,
69 |
70 | // RTTMillisecond定义了两个NodeHost实例之间的平均往返时间(RTT),以毫秒为单位
71 | // 这样的RTT间隔在内部用作逻辑时钟滴答,raft的心跳和选举间隔都根据有多少这样的RTT间隔来定义
72 | // 请注意,RTTMillisecond是两个NodeHost实例之间的组合延迟,包括由网络传输引起的所有延迟,NodeHost排队和处理引起的所有延迟。
73 | // 例如,在满载时,我们用于基准测试的两个NodeHost实例之间的平均往返时间最多为500微秒,而它们之间的ping时间为100微秒。
74 | // 当您的环境中的RTTMillisecond小于1百万时,请将其设置为1。
75 | RTTMillisecond: 200,
76 |
77 | //当前节点对外的IP和端口,其他raft节点需要通过这个信息获得
78 | RaftAddress: raftAddr,
79 |
80 | // ListenAddress是Raft RPC模块用于侦听Raft消息和快照的IP:端口地址。
81 | // 如果未设置ListenAddress字段,则Raft RPC模块将使用RaftAddress。
82 | // 如果将0.0.0.0指定为ListenAddress的IP,则Dragonboat将侦听所有接口上的指定端口。
83 | // 指定主机名或域名后,它将首先在本地解析为IP地址,而Dragonboat会侦听所有解析的IP地址。
84 | // 一般不指定这个,和RaftAddress保持一致就好了,收发就用一个端口,没有必要分开
85 | // ListenAddress: listenAddr,
86 |
87 | //是否使用TLS进行安全认证,整个程序都是部署在内网中,可以认为是安全的,就不打开这个了
88 | MutualTLS: false,
89 |
90 | //当配置了TLS时,需要指定CA文件的地址
91 | //当配置了TLS时,需要指定CertFile的地址
92 | //CertFile string
93 | //当配置了TLS时,需要指定KeyFile的地址
94 | //KeyFile string
95 | //MaxReceiveQueueSize是每个接收队列的最大大小(以字节为单位)。 一旦达到最大大小,将删除更多复制消息以限制内存使用。 设置为0时,表示队列大小不受限制。
96 | //暂时先设置为128M
97 | MaxSendQueueSize: 128 * 1024 * 1024,
98 |
99 | // EnableMetrics确定是否应启用Prometheus格式的健康度量。
100 | EnableMetrics: metrics,
101 |
102 | //MaxSnapshotSendBytesPerSecond定义了NodeHost实例管理的所有Raft群集每秒可发送多少快照数据。默认值0表示没有为快照流设置限制。
103 | //每秒最多传输256M数据
104 | MaxSnapshotSendBytesPerSecond: 256 * 1024 * 1024,
105 |
106 | // MaxSnapshotRecvBytesPerSecond定义可以存储多少快照数据由NodeHost实例管理的所有Raft群集每秒收到一次。默认值0表示接收快照数据没有限制。
107 | //目前不限制接受的大小,由发送端决定
108 | MaxSnapshotRecvBytesPerSecond: 0,
109 |
110 | // RaftEventListener是暴露给用户空间的Raft事件(例如Raft领导变更)的侦听器。
111 | // NodeHost使用一个专用的goroutine来逐个调用所有RaftEventListener方法,
112 | // 可能导致长时间延迟的CPU密集型或IO相关过程,应将其分流到用户管理的工作程序中。
113 | RaftEventListener: re,
114 |
115 | //SystemEventsListener允许向用户通知系统事件,例如快照创建,日志压缩和快照流。 它通常用于测试目的或用于其他高级用途,不需要Dragonboat应用程序来显式设置此字段。
116 | SystemEventListener: se,
117 | }
118 | }
119 |
120 | func buildRaftConfig(nodeId, clusterId uint64) config.Config {
121 | return config.Config{
122 | //当前节点的ID
123 | NodeID: nodeId,
124 |
125 | //当前节点的分片ID,如果当前raft是多组的,那么这个地方是指定当前组的ID
126 | ClusterID: clusterId,
127 |
128 | //领导节点是否应定期检查非领导者节点的状态,并在其不再具有法定人数时退出成为跟随者节点
129 | //当有5台机器,挂了3台,法定人数不够,则主节点退出,不再是主节点了,所有的写操作和同步读操作应该都不能执行了
130 | //各个节点只能读取本地的数据
131 | CheckQuorum: false,
132 |
133 | // ElectionRTT是两次选举之间的消息RTT的最小数量。 消息RTT由NodeHostConfig.RTTMillisecond定义。
134 | // Raft论文建议其幅度大于HeartbeatRTT(因为是先发现不健康,才会进行选举),即两个心跳之间的间隔。
135 | // 在Raft中,选举之间的实际间隔被随机分配在ElectionRTT和2 * ElectionRTT之间。例如,假设NodeHostConfig.RTTMillisecond为100毫秒,
136 | // 要将选举间隔设置为1秒,则应该将ElectionRTT设置为10。启用CheckQuorum后,ElectionRTT还将定义检查领导者定额的时间间隔。
137 | // 这个值是个比例,具体的RTT时间大小是RTTMillisecond*ElectionRTT,当需要选举主节点时,各个节点的随机间隔在ElectionRTT和2 * ElectionRTT,
138 | // 当CheckQuorum为true,主也会每隔这个时间检查下从机数据是否符合法定人数
139 | ElectionRTT: 60,
140 |
141 | // HeartbeatRTT是两次心跳之间的消息RTT数。 消息RTT由NodeHostConfig.RTTMillisecond定义。 Raft论文建议心跳间隔应接近节点之间的平均RTT。
142 | // 例如,假设NodeHostConfig.RTTMillisecond为100毫秒,要将心跳间隔设置为每200毫秒,则应将HeartbeatRTT设置为2。
143 | HeartbeatRTT: 6,
144 |
145 | // SnapshotEntries定义应自动对状态机进行快照的频率,可以将SnapshotEntries设置为0以禁用此类自动快照。
146 | // 当SnapshotEntries设置为N时,意味着大约每N条Raft日志创建一个快照。这也意味着向跟踪者发送N个日志条目比发送快照要昂贵。
147 | // 生成快照后,可以压缩新快照覆盖的Raft日志条目。这涉及两个步骤,冗余日志条目首先被标记为已删除,然后在稍后发布 LogDB 压缩时将其从基础存储中物理删除。
148 | // 有关在生成快照后实际删除和压缩哪些日志条目的详细信息,请参见CompactionOverhead,通过将SnapshotEntries字段设置为0禁用自动快照后,
149 | // 用户仍然可以使用NodeHost的RequestSnapshot或SyncRequestSnapshot方法手动请求快照。
150 | SnapshotEntries: 25 * 10000 * 10,
151 |
152 | // CompactionOverhead定义每次Raft日志压缩后要保留的最新条目数。
153 | // 假设当前的日志为10000,开始创建快照,那么快照创建完成后,<=10000的日志都会被清理,
154 | // 如果想获得9000这样的日志,那么就得先完全加载快照,再从快照中读取,如果设置了CompactionOverhead为3000,
155 | // 那么就算创建了快照,我们仍然能获得10000-7000之间的日志记录,只有小于7000的,才需要重新加载日志获取
156 | CompactionOverhead: 25 * 10000,
157 |
158 | //确定是否使用ChangeID的顺序强制执行Raft成员资格更改。
159 | OrderedConfigChange: true,
160 |
161 | // MaxInMemLogSize是允许在每个Raft节点上的Raft日志存储在内存中的目标大小(以字节为单位)。 内存中的筏日志是尚未应用的日志。
162 | // MaxInMemLogSize是为防止内存无限增长而实现的目标值,并非用于精确限制确切的内存使用量。
163 | // 当MaxInMemLogSize为0时,目标设置为math.MaxUint64。 设置MaxInMemLogSize并达到目标后,客户端尝试提出新建议时将返回错误。
164 | // 建议将MaxInMemLogSize大于要使用的最大建议。
165 | //内存中未应用的日志大小,暂定为256M,超过256M的大小后会返回错误
166 | MaxInMemLogSize: 256 * 1024 * 1024,
167 |
168 | // SnapshotCompressionType是用于压缩生成的快照数据的压缩类型。 默认情况下不使用压缩。
169 | // 快照数据本身由rocksdb生成,采用了LZ4压缩,所以这边就不再继续压缩了
170 | SnapshotCompressionType: config.NoCompression,
171 |
172 | // EntryCompressionType是用于压缩用户日志。 使用Snappy时,允许的最大建议有效负载大致限制为3.42GB。
173 | EntryCompressionType: config.Snappy,
174 |
175 | // DisableAutoCompactions禁用用于回收Raft条目存储空间的自动压缩。
176 | // 默认情况下,每次捕获快照时都会执行压缩,这有助于以较高的IO开销为代价,尽快回收磁盘空间。
177 | // 用户可以禁用此类自动压缩,并在必要时使用NodeHost.RequestCompaction手动请求此类压缩。
178 | DisableAutoCompactions: false,
179 |
180 | // IsObserver指示当前节点是否是Observer节点,(观察者节点通常用于允许新节点加入群集并追赶其他日志,而不会影响可用性。 还可以引入额外的观察者节点来满足只读请求,而不会影响系统的写吞吐量)
181 | IsObserver: false,
182 |
183 | // IsWitness指示这是否是没有实际日志复制且没有状态机的见证Raft节点,见证节点支持目前处于试验阶段。
184 | IsWitness: false,
185 |
186 | //停顿指定在没有群集活动时是否让Raft群集进入停顿模式。 静默模式下的群集不交换心跳消息以最小化带宽消耗。当前处于试验阶段
187 | Quiesce: false,
188 | }
189 | }
190 |
--------------------------------------------------------------------------------
/productready/storage/del.go:
--------------------------------------------------------------------------------
1 | package storage
2 |
3 | import (
4 | "context"
5 |
6 | "github.com/cockroachdb/pebble"
7 | "github.com/xkeyideal/mraft/productready/storage/store"
8 |
9 | "github.com/lni/dragonboat/v3"
10 | "github.com/lni/dragonboat/v3/client"
11 | )
12 |
13 | type DelCommand struct {
14 | CfName string
15 | Key []byte
16 | }
17 |
18 | func NewDelCommand(cfName string, key []byte) *DelCommand {
19 | return &DelCommand{CfName: cfName, Key: key}
20 | }
21 |
22 | func (c *DelCommand) GetType() CommandType {
23 | return DELETE
24 | }
25 |
26 | func (c *DelCommand) RaftInvoke(ctx context.Context, nh *dragonboat.NodeHost, _ uint64, session *client.Session) error {
27 | _, err := syncWrite(ctx, nh, session, c)
28 | return err
29 | }
30 |
31 | func (c *DelCommand) LocalInvoke(s *store.Store, opts ...*WriteOptions) error {
32 | batch := s.Batch()
33 | defer batch.Close()
34 |
35 | cf := s.GetColumnFamily(c.CfName)
36 |
37 | batch.Delete(s.BuildColumnFamilyKey(cf, c.Key), pebble.Sync)
38 |
39 | // 删除revision
40 | revisionKey := buildRevisionKey(c.Key)
41 | batch.Delete(s.BuildColumnFamilyKey(cf, revisionKey), pebble.Sync)
42 |
43 | return s.Write(batch)
44 | }
45 |
46 | func (c *DelCommand) GetResp() []byte {
47 | return nil
48 | }
49 |
--------------------------------------------------------------------------------
/productready/storage/event.go:
--------------------------------------------------------------------------------
1 | package storage
2 |
3 | import (
4 | "sync/atomic"
5 | "time"
6 |
7 | "github.com/lni/dragonboat/v3/raftio"
8 | "go.uber.org/zap"
9 | )
10 |
11 | type raftEvent struct {
12 | s *Storage
13 | }
14 |
15 | func (e *raftEvent) LeaderUpdated(info raftio.LeaderInfo) {
16 | e.s.log.Warn("[raftstorage] [event] [LeaderUpdated]",
17 | zap.String("target", e.s.target),
18 | zap.Any("info", info),
19 | )
20 |
21 | if atomic.LoadUint32(&e.s.status) == ready && info.LeaderID != 0 {
22 | e.s.leaderc <- info
23 | }
24 | }
25 |
26 | type systemEvent struct {
27 | s *Storage
28 | }
29 |
30 | func (e *systemEvent) NodeHostShuttingDown() {
31 | e.s.log.Warn("[raftstorage] [event] [NodeHostShuttingDown]", zap.String("target", e.s.target))
32 | }
33 |
34 | func (e *systemEvent) NodeUnloaded(info raftio.NodeInfo) {
35 | e.s.log.Warn("[raftstorage] [event] [NodeUnloaded]", zap.String("target", e.s.target), zap.Any("info", info))
36 | }
37 |
38 | func (e *systemEvent) NodeReady(info raftio.NodeInfo) {
39 | e.s.log.Info("[raftstorage] [event] [NodeReady]", zap.String("target", e.s.target), zap.Any("info", info))
40 | }
41 | func (e *systemEvent) MembershipChanged(info raftio.NodeInfo) {
42 | e.s.log.Warn("[raftstorage] [event] [MembershipChanged]", zap.String("target", e.s.target), zap.Any("info", info))
43 | if atomic.LoadUint32(&e.s.status) == ready {
44 | e.s.memberc <- info
45 | }
46 | }
47 | func (e *systemEvent) ConnectionEstablished(info raftio.ConnectionInfo) {
48 | e.s.log.Info("[raftstorage] [event] [ConnectionEstablished]", zap.String("target", e.s.target), zap.Any("info", info))
49 | }
50 | func (e *systemEvent) ConnectionFailed(info raftio.ConnectionInfo) {
51 | e.s.log.Warn("[raftstorage] [event] [ConnectionFailed]", zap.String("target", e.s.target), zap.Any("info", info))
52 | }
53 | func (e *systemEvent) SendSnapshotStarted(info raftio.SnapshotInfo) {
54 | e.s.log.Info("[raftstorage] [event] [SendSnapshotStarted]", zap.String("target", e.s.target), zap.Any("info", info))
55 | }
56 | func (e *systemEvent) SendSnapshotCompleted(info raftio.SnapshotInfo) {
57 | e.s.log.Info("[raftstorage] [event] [SendSnapshotCompleted]", zap.String("target", e.s.target), zap.Any("info", info))
58 | }
59 | func (e *systemEvent) SendSnapshotAborted(info raftio.SnapshotInfo) {
60 | e.s.log.Info("[raftstorage] [event] [SendSnapshotAborted]", zap.String("target", e.s.target), zap.Any("info", info))
61 | }
62 | func (e *systemEvent) SnapshotReceived(info raftio.SnapshotInfo) {
63 | e.s.log.Info("[raftstorage] [event] [SnapshotReceived]", zap.String("target", e.s.target), zap.Any("info", info))
64 | }
65 | func (e *systemEvent) SnapshotRecovered(info raftio.SnapshotInfo) {
66 | e.s.log.Info("[raftstorage] [event] [SnapshotRecovered]", zap.String("target", e.s.target), zap.Any("info", info))
67 | }
68 | func (e *systemEvent) SnapshotCreated(info raftio.SnapshotInfo) {
69 | e.s.log.Warn("[raftstorage] [event] [SnapshotCreated]", zap.String("target", e.s.target), zap.Any("info", info))
70 | }
71 | func (e *systemEvent) SnapshotCompacted(info raftio.SnapshotInfo) {
72 | e.s.log.Warn("[raftstorage] [event] [SnapshotCompacted]", zap.String("target", e.s.target), zap.Any("info", info))
73 | }
74 | func (e *systemEvent) LogCompacted(info raftio.EntryInfo) {
75 | e.s.log.Info("[raftstorage] [event] [LogCompacted]", zap.String("target", e.s.target), zap.Any("info", info))
76 | }
77 | func (e *systemEvent) LogDBCompacted(info raftio.EntryInfo) {
78 | e.s.log.Info("[raftstorage] [event] [LogDBCompacted]", zap.String("target", e.s.target), zap.Any("info", info))
79 | }
80 |
81 | func (s *Storage) handleEvents() {
82 | ticker := time.NewTicker(2 * time.Second)
83 | for {
84 | select {
85 | case info := <-s.memberc:
86 | if info.NodeID == s.cfg.NodeId {
87 | m, err := s.getClusterMembership(info.ClusterID)
88 | if err != nil {
89 | continue
90 | }
91 |
92 | s.cmu.Lock()
93 | s.memberCache[info.ClusterID] = m
94 | s.cmu.Unlock()
95 | }
96 | case info := <-s.leaderc:
97 | if info.NodeID == s.cfg.NodeId {
98 | m, err := s.getClusterMembership(info.ClusterID)
99 | if err != nil {
100 | continue
101 | }
102 |
103 | s.cmu.Lock()
104 | s.memberCache[info.ClusterID] = m
105 | s.cmu.Unlock()
106 | }
107 | case <-ticker.C:
108 | s.cmu.Lock()
109 | if len(s.memberCache) > 0 {
110 | // mc := s.memberCache
111 | // s.gossip.UpdateMembershipMessage(&gossip.RaftMembershipMessage{
112 | // MemberInfos: mc,
113 | // })
114 |
115 | s.memberCache = make(map[uint64]*MemberInfo)
116 | }
117 | s.cmu.Unlock()
118 | case <-s.stopper.ShouldStop():
119 | return
120 | }
121 | }
122 | }
123 |
--------------------------------------------------------------------------------
/productready/storage/get.go:
--------------------------------------------------------------------------------
1 | package storage
2 |
3 | import (
4 | "context"
5 | "encoding/binary"
6 |
7 | "github.com/xkeyideal/mraft/productready/storage/store"
8 |
9 | "github.com/lni/dragonboat/v3"
10 | "github.com/lni/dragonboat/v3/client"
11 | )
12 |
13 | type GetCommand struct {
14 | Cf string
15 | Key []byte
16 | resp []byte
17 | }
18 |
19 | func (c *GetCommand) GetResp() []byte {
20 | return c.resp
21 | }
22 |
23 | func NewGetCommand(cf string, key []byte) *GetCommand {
24 | return &GetCommand{Cf: cf, Key: key}
25 | }
26 |
27 | func (c *GetCommand) GetType() CommandType {
28 | return GET
29 | }
30 |
31 | func (c *GetCommand) RaftInvoke(ctx context.Context, nh *dragonboat.NodeHost, clusterId uint64, _ *client.Session) (err error) {
32 | c.resp, err = syncRead(ctx, nh, clusterId, c)
33 | return err
34 | }
35 |
36 | func (c *GetCommand) LocalInvoke(s *store.Store, opts ...*WriteOptions) error {
37 | cf := s.GetColumnFamily(c.Cf)
38 |
39 | // get revision
40 | v, err := s.GetBytes(s.BuildColumnFamilyKey(cf, buildRevisionKey(c.Key)))
41 | if err != nil {
42 | return err
43 | }
44 |
45 | if len(v) == 0 {
46 | v = make([]byte, 8)
47 | binary.BigEndian.PutUint64(v, 0)
48 | }
49 |
50 | // get value
51 | d, err := s.GetBytes(s.BuildColumnFamilyKey(cf, c.Key))
52 | if err != nil {
53 | return err
54 | }
55 |
56 | c.resp = append(v, d...)
57 | return nil
58 | }
59 |
60 | func (c *GetCommand) GetResult() []byte {
61 | return c.resp
62 | }
63 |
--------------------------------------------------------------------------------
/productready/storage/op.go:
--------------------------------------------------------------------------------
1 | package storage
2 |
3 | import (
4 | "context"
5 | "time"
6 | )
7 |
8 | func (s *Storage) Get(ctx context.Context, cf string, hashKey string, linearizable bool, key []byte) ([]byte, error) {
9 | var (
10 | clusterId = s.getClusterId(hashKey)
11 | cmd = NewGetCommand(cf, key)
12 | err error
13 | )
14 |
15 | if linearizable {
16 | err = cmd.RaftInvoke(ctx, s.nh, clusterId, s.csMap[clusterId])
17 | } else {
18 | err = cmd.LocalInvoke(s.smMap[clusterId])
19 | }
20 | return cmd.GetResult(), err
21 | }
22 |
23 | func (s *Storage) Put(ctx context.Context, cf string, hashKey string, key, val []byte) error {
24 | cmd := NewPutCommand(cf, key, val)
25 | clusterId := s.getClusterId(hashKey)
26 | return cmd.RaftInvoke(ctx, s.nh, clusterId, s.csMap[clusterId])
27 | }
28 |
29 | func (s *Storage) Del(ctx context.Context, cf string, hashKey string, key []byte) error {
30 | cmd := NewDelCommand(cf, key)
31 | clusterId := s.getClusterId(hashKey)
32 | return cmd.RaftInvoke(ctx, s.nh, clusterId, s.csMap[clusterId])
33 | }
34 |
35 | func (s *Storage) AddRaftNode(nodeId uint64, target string) error {
36 | for _, clusterId := range s.cfg.ClusterIds {
37 | ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
38 | ms, err := s.nh.SyncGetClusterMembership(ctx, uint64(clusterId))
39 | cancel()
40 | if err != nil {
41 | return err
42 | }
43 |
44 | ctx, cancel = context.WithTimeout(context.Background(), 3*time.Second)
45 | err = s.nh.SyncRequestAddNode(ctx, uint64(clusterId), nodeId, target, ms.ConfigChangeID)
46 | cancel()
47 | if err != nil {
48 | return err
49 | }
50 | }
51 |
52 | return nil
53 | }
54 |
55 | func (s *Storage) AddRaftObserver(nodeId uint64, addr string) error {
56 | for _, clusterId := range s.cfg.ClusterIds {
57 | ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
58 | ms, err := s.nh.SyncGetClusterMembership(ctx, uint64(clusterId))
59 | cancel()
60 | if err != nil {
61 | return err
62 | }
63 |
64 | ctx, cancel = context.WithTimeout(context.Background(), 4*time.Second)
65 | err = s.nh.SyncRequestAddObserver(ctx, uint64(clusterId), nodeId, addr, ms.ConfigChangeID)
66 | cancel()
67 | if err != nil {
68 | return err
69 | }
70 | }
71 |
72 | return nil
73 | }
74 |
75 | func (s *Storage) RemoveRaftNode(nodeId uint64) error {
76 | for _, clusterId := range s.cfg.ClusterIds {
77 | ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
78 | ms, err := s.nh.SyncGetClusterMembership(ctx, uint64(clusterId))
79 | cancel()
80 | if err != nil {
81 | return err
82 | }
83 |
84 | ctx, cancel = context.WithTimeout(context.Background(), 4*time.Second)
85 | err = s.nh.SyncRequestDeleteNode(ctx, uint64(clusterId), nodeId, ms.ConfigChangeID)
86 | cancel()
87 | if err != nil {
88 | return err
89 | }
90 | }
91 |
92 | return nil
93 | }
94 |
--------------------------------------------------------------------------------
/productready/storage/put.go:
--------------------------------------------------------------------------------
1 | package storage
2 |
3 | import (
4 | "context"
5 |
6 | "github.com/cockroachdb/pebble"
7 | "github.com/xkeyideal/mraft/productready/storage/store"
8 |
9 | "github.com/lni/dragonboat/v3"
10 | "github.com/lni/dragonboat/v3/client"
11 | )
12 |
13 | type PutCommand struct {
14 | Cf string
15 | Key []byte
16 | Value []byte
17 | }
18 |
19 | func (c *PutCommand) GetResp() []byte {
20 | return nil
21 | }
22 |
23 | func NewPutCommand(cf string, key, value []byte) *PutCommand {
24 | return &PutCommand{Cf: cf, Key: key, Value: value}
25 | }
26 |
27 | func (c *PutCommand) GetType() CommandType {
28 | return PUT
29 | }
30 |
31 | func (c *PutCommand) RaftInvoke(ctx context.Context, nh *dragonboat.NodeHost, _ uint64, session *client.Session) error {
32 | _, err := syncWrite(ctx, nh, session, c)
33 | return err
34 | }
35 |
36 | func (c *PutCommand) LocalInvoke(s *store.Store, opts ...*WriteOptions) error {
37 | batch := s.Batch()
38 | defer batch.Close()
39 |
40 | cf := s.GetColumnFamily(c.Cf)
41 |
42 | batch.Delete(s.BuildColumnFamilyKey(cf, c.Key), pebble.Sync)
43 |
44 | // 删除revision
45 | revisionKey := buildRevisionKey(c.Key)
46 | batch.Delete(s.BuildColumnFamilyKey(cf, revisionKey), pebble.Sync)
47 |
48 | return s.Write(batch)
49 |
50 | return s.Write(batch)
51 | }
52 |
--------------------------------------------------------------------------------
/productready/storage/sm.go:
--------------------------------------------------------------------------------
1 | package storage
2 |
3 | import (
4 | "encoding/binary"
5 | "errors"
6 | "io"
7 | "log"
8 |
9 | "github.com/xkeyideal/mraft/productready/storage/store"
10 |
11 | "github.com/cockroachdb/pebble"
12 | sm "github.com/lni/dragonboat/v3/statemachine"
13 | )
14 |
15 | var (
16 | indexKeyPrefix = []byte("__RAFT_APPLIED_INDEX__")
17 | nodeReadyKey = []byte("__RAFT_NODE_READY__")
18 | nodeReadyVal = []byte("^_^")
19 |
20 | moveToErr = errors.New("MoveTo Jump Exceed")
21 | sessionNotFound = errors.New("Raft clusterId session not found")
22 | storeNotFound = errors.New("Raft clusterId store not found")
23 | )
24 |
25 | type StateMachine struct {
26 | raftAddr string
27 | target string
28 | ClusterID uint64
29 | NodeID uint64
30 | store *store.Store
31 |
32 | // db里存储revision的key
33 | indexKey []byte
34 | }
35 |
36 | func newStateMachine(raftAddr, target string, clusterid uint64, nodeId uint64, s *store.Store) *StateMachine {
37 | // 生成存储revision的key
38 | smIndexKey := make([]byte, len(indexKeyPrefix)+8)
39 | copy(smIndexKey, indexKeyPrefix)
40 | binary.BigEndian.PutUint64(smIndexKey[len(indexKeyPrefix):], clusterid)
41 |
42 | return &StateMachine{
43 | raftAddr: raftAddr,
44 | target: target,
45 | ClusterID: clusterid,
46 | NodeID: nodeId,
47 | indexKey: smIndexKey,
48 | store: s,
49 | }
50 | }
51 |
52 | func (r *StateMachine) Open(stopChan <-chan struct{}) (uint64, error) {
53 | select {
54 | case <-stopChan:
55 | return 0, sm.ErrOpenStopped
56 | default:
57 | val, err := r.store.GetBytes(r.indexKey)
58 | if err != nil {
59 | return 0, err
60 | }
61 |
62 | // 系统初次启动时,全局revision应该是不存在的,db里查不到,此时返回0
63 | if len(val) == 0 {
64 | return 0, nil
65 | }
66 |
67 | return binary.BigEndian.Uint64(val), nil
68 | }
69 | }
70 |
71 | func (r *StateMachine) Update(entries []sm.Entry) ([]sm.Entry, error) {
72 | if r.store.Closed() {
73 | return entries, nil
74 | }
75 |
76 | resultEntries := make([]sm.Entry, 0, len(entries))
77 |
78 | // 将raft的日志转换为db要执行的命令
79 | for _, e := range entries {
80 | r, err := r.processEntry(e)
81 | if err != nil {
82 | return nil, err
83 | }
84 |
85 | resultEntries = append(resultEntries, r)
86 | }
87 |
88 | idx := entries[len(entries)-1].Index
89 | idxByte := make([]byte, 8)
90 | binary.BigEndian.PutUint64(idxByte, idx)
91 |
92 | batch := r.store.Batch()
93 | defer batch.Close()
94 |
95 | // 更新revision的值
96 | batch.Set(r.indexKey, idxByte, pebble.Sync)
97 | if err := r.store.Write(batch); err != nil {
98 | return nil, err
99 | }
100 |
101 | return resultEntries, nil
102 | }
103 |
104 | func (r *StateMachine) processEntry(e sm.Entry) (sm.Entry, error) {
105 | cmd, err := DecodeCmd(e.Cmd)
106 | if err != nil {
107 | return e, err
108 | }
109 |
110 | opts := &WriteOptions{
111 | Revision: e.Index,
112 | }
113 |
114 | if err := cmd.LocalInvoke(r.store, opts); err != nil {
115 | return e, err
116 | }
117 |
118 | resp := cmd.GetResp()
119 | e.Result = sm.Result{Value: uint64(len(e.Cmd)), Data: resp}
120 |
121 | return e, nil
122 | }
123 |
124 | func (r *StateMachine) Lookup(query interface{}) (interface{}, error) {
125 | if r.store.Closed() {
126 | return nil, pebble.ErrClosed
127 | }
128 |
129 | cmd, err := DecodeCmd(query.([]byte))
130 | if err != nil {
131 | return nil, err
132 | }
133 |
134 | if err := cmd.LocalInvoke(r.store); err != nil {
135 | return nil, err
136 | }
137 |
138 | return cmd.GetResp(), nil
139 | }
140 |
141 | func (r *StateMachine) Sync() error {
142 | return nil
143 | }
144 |
145 | type stateMachineStoreCtx struct {
146 | snapshot *pebble.Snapshot
147 | }
148 |
149 | func (r *StateMachine) PrepareSnapshot() (interface{}, error) {
150 | if r.store.Closed() {
151 | return nil, pebble.ErrClosed
152 | }
153 |
154 | return &stateMachineStoreCtx{
155 | snapshot: r.store.GetSnapshot(),
156 | }, nil
157 | }
158 |
159 | func (r *StateMachine) SaveSnapshot(snapshot interface{}, writer io.Writer, stopChan <-chan struct{}) error {
160 | if r.store.Closed() {
161 | return pebble.ErrClosed
162 | }
163 |
164 | log.Println("SaveSnapshot", r.target, r.raftAddr, r.NodeID, r.ClusterID)
165 | ctxData := snapshot.(*stateMachineStoreCtx)
166 |
167 | ss := ctxData.snapshot
168 | defer ss.Close()
169 |
170 | return r.store.SaveSnapshotToWriter(r.target, r.raftAddr, ss, writer, stopChan)
171 | }
172 |
173 | func (r *StateMachine) RecoverFromSnapshot(reader io.Reader, stopChan <-chan struct{}) error {
174 | if r.store.Closed() {
175 | return pebble.ErrClosed
176 | }
177 |
178 | log.Println("RecoverFromSnapshot", r.target, r.raftAddr, r.NodeID, r.ClusterID)
179 | return r.store.LoadSnapShotFromReader(r.target, r.raftAddr, reader, stopChan)
180 | }
181 |
182 | func (r *StateMachine) Close() error {
183 | if r.store.Closed() {
184 | return nil
185 | }
186 |
187 | return r.store.Close()
188 | }
189 |
--------------------------------------------------------------------------------
/productready/storage/store/utils.go:
--------------------------------------------------------------------------------
1 | package store
2 |
3 | import (
4 | "bytes"
5 | "crypto/md5"
6 | "errors"
7 | "fmt"
8 | "io/ioutil"
9 | "math/rand"
10 | "os"
11 | "path/filepath"
12 | "runtime"
13 | "time"
14 | )
15 |
16 | const (
17 | currentDBFilename string = "pebble.running"
18 | updatingDBFilename string = "pebble.updating"
19 | )
20 |
21 | func GetPebbleDBDir(dir string) (string, error) {
22 | var dbdir string
23 |
24 | // 判断是否存在 dir/pebble.running文件
25 | newRunning := isNewRun(dir)
26 |
27 | // 全新启动的程序
28 | if newRunning { // 不存在pebble.running文件
29 | // 此处为了兼容,现有数据已经使用了data_node11772876503705/1/current 进行存储了
30 | // fp := filepath.Join(dir, "current")
31 | // if existFilePath(fp) {
32 | // return fp, nil
33 | // }
34 |
35 | // 没有,随机生成一个目录作为pebbledb的存储目录
36 | dbdir = getNewRandomDBDirName(dir)
37 | if err := saveCurrentDBDirName(dir, dbdir); err != nil {
38 | return "", err
39 | }
40 | if err := replaceCurrentDBFile(dir); err != nil {
41 | return "", err
42 | }
43 |
44 | return dbdir, nil
45 | }
46 |
47 | if err := cleanupNodeDataDir(dir); err != nil {
48 | return "", err
49 | }
50 |
51 | var err error
52 | dbdir, err = getCurrentDBDirName(dir)
53 | if err != nil {
54 | return "", err
55 | }
56 | if _, err := os.Stat(dbdir); err != nil {
57 | if os.IsNotExist(err) {
58 | return "", errors.New("db dir unexpectedly deleted")
59 | }
60 | }
61 |
62 | return dbdir, nil
63 | }
64 |
65 | // functions below are used to manage the current data directory of Pebble DB.
66 | func isNewRun(dir string) bool {
67 | fp := filepath.Join(dir, currentDBFilename)
68 | if _, err := os.Stat(fp); os.IsNotExist(err) {
69 | return true
70 | }
71 | return false
72 | }
73 |
74 | func getNewRandomDBDirName(dir string) string {
75 | part := "%d_%d"
76 | rn := rand.Uint64()
77 | ct := time.Now().UnixNano()
78 | return filepath.Join(dir, fmt.Sprintf(part, rn, ct))
79 | }
80 |
81 | func replaceCurrentDBFile(dir string) error {
82 | fp := filepath.Join(dir, currentDBFilename)
83 | tmpFp := filepath.Join(dir, updatingDBFilename)
84 | if err := os.Rename(tmpFp, fp); err != nil {
85 | return err
86 | }
87 | return syncDir(dir)
88 | }
89 |
90 | func saveCurrentDBDirName(dir string, dbdir string) error {
91 | h := md5.New()
92 | if _, err := h.Write([]byte(dbdir)); err != nil {
93 | return err
94 | }
95 | fp := filepath.Join(dir, updatingDBFilename)
96 | f, err := os.Create(fp)
97 | if err != nil {
98 | return err
99 | }
100 | defer func() {
101 | if err := f.Close(); err != nil {
102 | panic(err)
103 | }
104 | if err := syncDir(dir); err != nil {
105 | panic(err)
106 | }
107 | }()
108 | if _, err := f.Write(h.Sum(nil)[:8]); err != nil {
109 | return err
110 | }
111 | if _, err := f.Write([]byte(dbdir)); err != nil {
112 | return err
113 | }
114 | if err := f.Sync(); err != nil {
115 | return err
116 | }
117 | return nil
118 | }
119 |
120 | func getCurrentDBDirName(dir string) (string, error) {
121 | fp := filepath.Join(dir, currentDBFilename)
122 | f, err := os.OpenFile(fp, os.O_RDONLY, 0755)
123 | if err != nil {
124 | return "", err
125 | }
126 |
127 | defer func() {
128 | f.Close()
129 | }()
130 |
131 | data, err := ioutil.ReadAll(f)
132 | if err != nil {
133 | return "", err
134 | }
135 | if len(data) <= 8 {
136 | return "", errors.New("corrupted content")
137 | }
138 | crc := data[:8]
139 | content := data[8:]
140 | h := md5.New()
141 | if _, err := h.Write(content); err != nil {
142 | return "", err
143 | }
144 | if !bytes.Equal(crc, h.Sum(nil)[:8]) {
145 | return "", errors.New("corrupted content with not matched crc")
146 | }
147 | return string(content), nil
148 | }
149 |
150 | func createNodeDataDir(dir string) error {
151 | if err := os.MkdirAll(dir, 0755); err != nil {
152 | return err
153 | }
154 | return syncDir(filepath.Dir(dir))
155 | }
156 |
157 | func cleanupNodeDataDir(dir string) error {
158 | os.RemoveAll(filepath.Join(dir, updatingDBFilename))
159 | dbdir, err := getCurrentDBDirName(dir)
160 | if err != nil {
161 | return err
162 | }
163 | files, err := ioutil.ReadDir(dir)
164 | if err != nil {
165 | return err
166 | }
167 | for _, fi := range files {
168 | if !fi.IsDir() {
169 | continue
170 | }
171 |
172 | toDelete := filepath.Join(dir, fi.Name())
173 | if toDelete != dbdir {
174 | if err := os.RemoveAll(toDelete); err != nil {
175 | return err
176 | }
177 | }
178 | }
179 |
180 | return nil
181 | }
182 |
183 | func syncDir(dir string) (err error) {
184 | if runtime.GOOS == "windows" {
185 | return nil
186 | }
187 |
188 | fileInfo, err := os.Stat(dir)
189 | if err != nil {
190 | return err
191 | }
192 |
193 | if !fileInfo.IsDir() {
194 | return nil
195 | }
196 |
197 | df, err := os.Open(filepath.Clean(dir))
198 | if err != nil {
199 | return err
200 | }
201 |
202 | defer func() {
203 | if cerr := df.Close(); err == nil {
204 | err = cerr
205 | }
206 | }()
207 |
208 | return df.Sync()
209 | }
210 |
211 | func existFilePath(path string) bool {
212 | _, err := os.Stat(path)
213 | if err != nil {
214 | if os.IsExist(err) {
215 | return true
216 | }
217 |
218 | return false
219 | }
220 |
221 | return true
222 | }
223 |
--------------------------------------------------------------------------------
/productready/utils/utils.go:
--------------------------------------------------------------------------------
1 | package utils
2 |
3 | import (
4 | "strconv"
5 | "strings"
6 |
7 | "github.com/gin-gonic/gin"
8 | )
9 |
10 | func Addr2RaftNodeID(addr string) uint64 {
11 | s := strings.Split(addr, ":")
12 | bits := strings.Split(s[0], ".")
13 |
14 | b0, _ := strconv.Atoi(bits[0])
15 | b1, _ := strconv.Atoi(bits[1])
16 | b2, _ := strconv.Atoi(bits[2])
17 | b3, _ := strconv.Atoi(bits[3])
18 |
19 | var sum uint64
20 |
21 | sum += uint64(b0) << 24
22 | sum += uint64(b1) << 16
23 | sum += uint64(b2) << 8
24 | sum += uint64(b3)
25 |
26 | port, _ := strconv.Atoi(s[1])
27 |
28 | sum = sum<<16 + uint64(port)
29 |
30 | return sum
31 | }
32 |
33 | func SetStrResp(httpCode, code int, msg string, result interface{}, c *gin.Context) {
34 | m := msg
35 |
36 | if code == 0 {
37 | c.JSON(httpCode, gin.H{
38 | "code": code,
39 | "msg": m,
40 | "result": result,
41 | })
42 | } else {
43 | c.JSON(httpCode, gin.H{
44 | "code": code,
45 | "msg": m,
46 | })
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | ## dragonboat multi-group raft simple example
2 |
3 | multi-group raft的简单使用示例,由于对[dragonboat](https://github.com/lni/dragonboat)的理解有限,可能存在部分错误,还望指出。
4 |
5 | ### 生产ready的样例
6 |
7 | 提供生产ready的样例,[productready](https://github.com/xkeyideal/mraft/blob/master/productready/README.md)
8 |
9 | 1. 提供了完整的采用`pebbledb`作为业务数据存储的状态机代码,此代码已用于生产环境。
10 | 2. 提供了支持动态配置的启动方式,提供了`dragonboat`配置需处理节点ID等问题的一个解决思路
11 | 3. 程序化的提供了新增raft节点的方案
12 |
13 | ### 示例说明
14 |
15 | 本示例是对[dragonboat-example](https://github.com/lni/dragonboat-example)中ondisk示例的重写,改变其代码结构,状态机的数据协议采用自定义的二进制协议,尽可能的提高读写性能。
16 |
17 | 本示例[dragonboat](https://github.com/lni/dragonboat) 使用的是v3.3.7版本, [pebbledb](https://github.com/cockroachdb/pebble) 使用的是跟随`dragonboat`所使用的版本
18 |
19 | ### 序列化工具
20 |
21 | 本示例为了兼容后续项目的需要,业务上只能使用 `thrift` 作为序列化方式,`thrift` 序列化库未采用官方库,使用的是[thrifter](https://github.com/thrift-iterator/go),压测结果详见[thrifter-benchmark](https://github.com/xkeyideal/mraft/blob/master/benchmark/thrift-serialize/thrift-serialize.md)
22 |
23 |
24 |
25 | 在Raft SaveSnapshot与RecoverFromSnapshot时,采用的是自定义二进制协议,详细见[fsm.go](https://github.com/xkeyideal/mraft/blob/master/ondisk/fsm.go#L233),压测结果详见[binary-benchmark](https://github.com/xkeyideal/mraft/blob/master/benchmark/binary-serialize/binary-serialize.md)
26 |
27 | ### TCPServer压测结果
28 |
29 | multi-raft的网络协议与数据格式均使用simple-server中相同的方式,压测结果详见[simple-server-benchmark](https://github.com/xkeyideal/mraft/blob/master/benchmark/multi-raft/simple-server-benchmark.md)
30 |
31 | ### RaftServer压测结果
32 |
33 | multi-raft的压测协议与数据格式均使用simple-server中相同的方式,压测结果详见[raft-server-benchmark](https://github.com/xkeyideal/mraft/blob/master/benchmark/multi-raft/raft-server-benchmark.md)
34 |
35 | 压测数据用例使用的是[代码自动化数据生成工具](https://github.com/xkeyideal/mraft/blob/master/benchmark/generate/generate-data.go),每条数据的数据量大约在2KB以上,具体未做统计。
36 |
37 | ### 压测机器说明
38 |
39 | 机器采用的是开发环境的机器,操作系统macOS High Sierra,`Darwin Kernel Version 18.6.0 root:xnu-4903.261.4~2/RELEASE_X86_64 x86_64 i386 iMac14,2 Darwin`
40 |
41 | CPU:3.29 GHz Intel Core i5
42 |
43 | 内存:20 GB 1600 MHz DDR3
44 |
45 | 磁盘:256GB Intel SATA SSD
46 |
47 | 参考了[dragonboat](https://github.com/lni/dragonboat)作者的文章[从共识算法开谈 - 硬盘性能的最大几个误解](https://zhuanlan.zhihu.com/p/55658164),
48 | 特对开发环境的磁盘的fsync()落盘写性能使用**pg_test_fsync**工具进行测试
49 |
50 | ```
51 | 5 seconds per test
52 | Direct I/O is not supported on this platform.
53 |
54 | Compare file sync methods using one 8kB write:
55 | (in wal_sync_method preference order, except fdatasync is Linux's default)
56 | open_datasync 15293.184 ops/sec 65 usecs/op
57 | fdatasync 15042.152 ops/sec 66 usecs/op
58 | fsync 15062.644 ops/sec 66 usecs/op
59 | fsync_writethrough 87.954 ops/sec 11370 usecs/op
60 | open_sync 15060.335 ops/sec 66 usecs/op
61 |
62 | Compare file sync methods using two 8kB writes:
63 | (in wal_sync_method preference order, except fdatasync is Linux's default)
64 | open_datasync 7342.068 ops/sec 136 usecs/op
65 | fdatasync 11375.823 ops/sec 88 usecs/op
66 | fsync 11035.212 ops/sec 91 usecs/op
67 | fsync_writethrough 87.290 ops/sec 11456 usecs/op
68 | open_sync 6943.205 ops/sec 144 usecs/op
69 |
70 | Compare open_sync with different write sizes:
71 | (This is designed to compare the cost of writing 16kB in different write
72 | open_sync sizes.)
73 | 1 * 16kB open_sync write 11774.650 ops/sec 85 usecs/op
74 | 2 * 8kB open_sync writes 7335.006 ops/sec 136 usecs/op
75 | 4 * 4kB open_sync writes 4147.836 ops/sec 241 usecs/op
76 | 8 * 2kB open_sync writes 2048.232 ops/sec 488 usecs/op
77 | 16 * 1kB open_sync writes 1015.277 ops/sec 985 usecs/op
78 |
79 | Test if fsync on non-write file descriptor is honored:
80 | (If the times are similar, fsync() can sync data written on a different
81 | descriptor.)
82 | write, fsync, close 9232.970 ops/sec 108 usecs/op
83 | write, close, fsync 11632.603 ops/sec 86 usecs/op
84 |
85 | Non-sync'ed 8kB writes:
86 | write 14077.617 ops/sec 71 usecs/op
87 | ```
88 |
89 | ### 启动方式
90 |
91 | 示例代码已经放弃使用`rocksdb`作为存储,已经是纯`go`实现
92 |
93 | `go run app.go 10000 9800`
94 |
95 | **10000** 是NodeID,已经在代码里限定了(代码中的NodeID分别是10000,10001,10002),不能修改.
96 | **9800**是HTTP的端口号,随意设定即可
97 |
98 | ```go
99 | peers := map[uint64]string{
100 | 10000: "10.101.44.4:54000",
101 | 10001: "10.101.44.4:54100",
102 | 10002: "10.101.44.4:54200",
103 | }
104 |
105 | clusters := []uint64{254000, 254100, 254200}
106 | ```
107 |
108 | ### HTTP服务
109 |
110 | 示例的核心入口代码在engine/engine.go中,由于是示例,很多参数直接在代码中写死了。
111 |
112 | HTTP服务采用[gin](https://github.com/gin-gonic/gin)
113 |
114 | ### RequestAddNode 向集群添加节点的注意事项
115 |
116 | 详细的`dragonboat raft` 添加集群节点的示例请参考[productready](https://github.com/xkeyideal/mraft/blob/master/productready/README.md)
117 |
118 | 1. 先在集群中调用添加节点的命令RequestAddNode
119 | 2. 启动新增的节点,注意join节点的启动参数, nh.StartOnDiskCluster(map[uint64]string{}, true, NewDiskKV, rc)
120 | 3. 新增节点成功后,机器会通过Snapshot将数据同步给join节点
121 | 4. 新增节点与集群原有节点的启动顺序不影响集群的工作
122 | 5. 若新的集群需要重启,那么不能改变原有的peers(将新节点加入到peers),否则集群启动不起来,报错如下:
123 |
124 | ```json
125 | join节点的报错
126 |
127 | 2019-08-30 15:29:09.597258 E | raftpb: restarting previously joined node, member list map[10000:10.101.44.4:54000 10001:10.101.44.4:54100 10002:10.101.44.4:54200 10003:10.101.44.4:54300]
128 | 2019-08-30 15:29:09.597454 E | dragonboat: bootstrap validation failed, [54000:10003], map[], true, map[10000:10.101.44.4:54000 10001:10.101.44.4:54100 10002:10.101.44.4:54200 10003:10.101.44.4:54300], false
129 | panic: cluster settings are invalid
130 | ```
131 |
132 | ```json
133 | 集群原来节点的报错
134 |
135 | 2019-08-30 15:29:06.590245 E | raftpb: inconsistent node list, bootstrap map[10000:10.101.44.4:54000 10001:10.101.44.4:54100 10002:10.101.44.4:54200], incoming map[10000:10.101.44.4:54000 10001:10.101.44.4:54100 10002:10.101.44.4:54200 10003:10.101.44.4:54300]
136 | 2019-08-30 15:29:06.590289 E | dragonboat: bootstrap validation failed, [54000:10002], map[10000:10.101.44.4:54000 10001:10.101.44.4:54100 10002:10.101.44.4:54200], false, map[10000:10.101.44.4:54000 10001:10.101.44.4:54100 10002:10.101.44.4:54200 10003:10.101.44.4:54300], false
137 | panic: cluster settings are invalid
138 | ```
139 |
140 | ```json
141 | 原来的集群节点
142 | map[uint64]string{
143 | 10000: "10.101.44.4:54000",
144 | 10001: "10.101.44.4:54100",
145 | 10002: "10.101.44.4:54200",
146 | }
147 |
148 | 新增的节点:10003: "10.101.44.4:54300"
149 | ```
150 |
151 | ```json
152 | 正确join或重启的方式
153 | join := false
154 | nodeAddr := ""
155 | if engine.nodeID == 10003 {
156 | join = true
157 | nodeAddr = "10.101.44.4:54300"
158 | }
159 |
160 | engine.nh.Start(engine.raftDataDir, engine.nodeID, nodeAddr, join)
161 | ```
--------------------------------------------------------------------------------
/test/metrics/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "encoding/json"
5 | "fmt"
6 | )
7 |
8 | type kv struct {
9 | Key string `json:"key"`
10 | Val int `json:"val"`
11 | }
12 |
13 | func main() {
14 | // s := metrics.NewExpDecaySample(2028, 0.015) // or metrics.NewUniformSample(1028)
15 | // h := metrics.NewHistogram(s)
16 | // metrics.Register("baz", h)
17 |
18 | // for i := 90; i < 105; i++ {
19 | // h.Update(int64(i))
20 | // }
21 |
22 | // fmt.Println(h.Min(), h.Max(), h.Mean(), h.Percentiles([]float64{0.9, 0.95, 0.99}))
23 |
24 | // for i := 0; i < 10; i++ {
25 | // rand.Seed(time.Now().UnixNano())
26 | // fmt.Println(rand.Int31n(1000000))
27 | // }
28 |
29 | // d := kv{"2", 2}
30 | // b, _ := json.Marshal(d)
31 | // fmt.Println(string(b))
32 |
33 | b := `{"key":"2","val":2}`
34 | d := kv{}
35 | json.Unmarshal([]byte(b), &d)
36 | fmt.Println(d)
37 | }
38 |
--------------------------------------------------------------------------------
/test/serialize/serialize.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "bytes"
5 | "encoding/binary"
6 | "errors"
7 | "fmt"
8 | "io"
9 | )
10 |
11 | func encode(key, val []byte, w io.Writer) error {
12 | dataSize := make([]byte, 8)
13 | keySize := make([]byte, 8)
14 | valSize := make([]byte, 8)
15 |
16 | kl := len(key)
17 | vl := len(val)
18 |
19 | binary.LittleEndian.PutUint64(dataSize, uint64(kl+vl+8+8))
20 | if _, err := w.Write(dataSize); err != nil {
21 | return err
22 | }
23 |
24 | binary.LittleEndian.PutUint64(keySize, uint64(kl))
25 | if _, err := w.Write(keySize); err != nil {
26 | return err
27 | }
28 |
29 | if _, err := w.Write(key); err != nil {
30 | return err
31 | }
32 |
33 | binary.LittleEndian.PutUint64(valSize, uint64(vl))
34 | if _, err := w.Write(valSize); err != nil {
35 | return err
36 | }
37 |
38 | if _, err := w.Write(val); err != nil {
39 | return err
40 | }
41 |
42 | return nil
43 | }
44 |
45 | func decode(r io.Reader) ([]byte, []byte, error) {
46 | sz := make([]byte, 8)
47 | if _, err := io.ReadFull(r, sz); err != nil {
48 | return nil, nil, err
49 | }
50 | dataSize := binary.LittleEndian.Uint64(sz)
51 | data := make([]byte, dataSize)
52 | if _, err := io.ReadFull(r, data); err != nil {
53 | return nil, nil, err
54 | }
55 |
56 | kl := binary.LittleEndian.Uint64(data[:8])
57 | key := data[8 : kl+8]
58 | vl := binary.LittleEndian.Uint64(data[kl+8 : kl+16])
59 | val := data[kl+16:]
60 | if uint64(len(val)) != vl {
61 | return nil, nil, errors.New("size isn't equal")
62 | }
63 |
64 | return key, val, nil
65 | }
66 |
67 | func main() {
68 | key := []byte("multi-raft-key")
69 | val := []byte("multi-raft-value")
70 |
71 | buf := &bytes.Buffer{}
72 | err := encode(key, val, buf)
73 | if err != nil {
74 | panic(err)
75 | }
76 |
77 | key1, val1, err := decode(buf)
78 | fmt.Println(string(key1), string(val1), err)
79 | }
80 |
--------------------------------------------------------------------------------
/test/test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | )
6 |
7 | func main() {
8 | b := []byte("1234")
9 | bb := make([]byte, len(b))
10 |
11 | copy(bb, b)
12 | fmt.Println(string(bb))
13 |
14 | b[2] = '5'
15 |
16 | fmt.Println(string(bb), string(b))
17 | }
18 |
--------------------------------------------------------------------------------