├── .gitignore ├── .gitmodules ├── .travis.yml ├── CMakeLists.txt ├── README.md ├── build.sh ├── example ├── CMakeLists.txt └── demo.cc └── raft ├── CMakeLists.txt ├── Callback.h ├── Config.h ├── Log.cc ├── Log.h ├── Node.cc ├── Node.h ├── Raft.cc ├── Raft.h ├── RaftPeer.cc ├── RaftPeer.h ├── RaftService.cc ├── RaftService.h ├── Random.h ├── Storage.cc ├── Storage.h ├── Struct.h └── spec.json /.gitignore: -------------------------------------------------------------------------------- 1 | cmake-build-debug 2 | cmake-build-release 3 | .idea -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "3rd/jrpc"] 2 | path = 3rd/jrpc 3 | url = https://github.com/guangqianpeng/jrpc.git 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: cpp 2 | sudo: required 3 | dist: trusty 4 | compiler: 5 | - gcc 6 | os: 7 | - linux 8 | addons: 9 | apt: 10 | sources: 11 | - ubuntu-toolchain-r-test 12 | packages: 13 | - g++-7 14 | env: 15 | - BUILD_TYPE=Debug 16 | - BUILD_TYPE=Release 17 | script: 18 | - sudo unlink /usr/bin/gcc 19 | - sudo ln -s /usr/bin/gcc-7 /usr/bin/gcc 20 | - sudo unlink /usr/bin/g++ 21 | - sudo ln -s /usr/bin/g++-7 /usr/bin/g++ 22 | - g++ -v 23 | - sudo apt install libleveldb-dev 24 | - ./build.sh && ./build.sh install 25 | notifications: 26 | email: never -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.6) 2 | project(raft) 3 | 4 | enable_testing() 5 | 6 | if(NOT CMAKE_BUILD_TYPE) 7 | set(CMAKE_BUILD_TYPE "Release") 8 | endif() 9 | 10 | if(NOT CMAKE_STUB_FORMATTER) 11 | set(CMAKE_STUB_FORMATTER echo) 12 | endif() 13 | 14 | set(CXX_FLAGS 15 | -fno-omit-frame-pointer # linux perf 16 | -Wall 17 | -Wextra 18 | -Werror 19 | -Wconversion 20 | -Wno-unused-parameter 21 | -Wold-style-cast 22 | -Woverloaded-virtual 23 | -Wpointer-arith 24 | -Wshadow 25 | -Wwrite-strings 26 | -std=c++17 27 | -march=native 28 | -rdynamic) 29 | string(REPLACE ";" " " CMAKE_CXX_FLAGS "${CXX_FLAGS}") 30 | 31 | set(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin) 32 | set(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib) 33 | 34 | find_path(LEVELDB_INCLUDE_PATH NAMES leveldb/db.h) 35 | find_library(LEVELDB_LIB NAMES leveldb) 36 | if ((NOT LEVELDB_INCLUDE_PATH) OR (NOT LEVELDB_LIB)) 37 | message(FATAL_ERROR "Fail to find leveldb") 38 | endif() 39 | 40 | include_directories( 41 | raft 42 | 3rd/jrpc 43 | 3rd/jrpc/3rd/tinyev 44 | 3rd/jrpc/3rd/jackson 45 | ${LEVELDB_INCLUDE_PATH} 46 | ${PROJECT_SOURCE_DIR} 47 | ${PROJECT_BINARY_DIR}) 48 | add_subdirectory(3rd/jrpc) 49 | add_subdirectory(raft) 50 | add_subdirectory(example) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/guangqianpeng/raft.svg?branch=master)](https://travis-ci.org/guangqianpeng/raft) 2 | 3 | # 简介 4 | 5 | 该版本的raft实现源自[MIT 6.824](http://nil.csail.mit.edu/6.824/2017/),做完lab以后我尝试用C++实现一遍。相比于go语言的版本,这个版本的特点有: 6 | 7 | - RPC框架使用了我自己写的[jrpc](https://github.com/guangqianpeng/jrpc),请求是异步的,而lab中是同步的 8 | - 使用多线程 + EventLoop的并发模型,而不是goroutine 9 | - 使用EventLoop, CountdownLatch之类的线程同步设施,拒绝直接使用mutex 10 | - 使用JSON格式的序列化/反序列化,使用LevelDB持久化JSON文本(应该用protobuf ?) 11 | 12 | # 功能 13 | 14 | - Leader election 15 | - Log replication 16 | - Persistence 17 | 18 | # TODO 19 | 20 | - Log compaction 21 | - Test 22 | - Benchmark 23 | 24 | # 实现 25 | 26 | 一个raft节点有两个线程(即两个EventLoop),一个跑rpc server,另一个跑raft算法以及rpc client。若将这两部分放在一个线程里面固然可以简化代码(单线程编程),但是由于rpc框架调度的延迟不确定,可能导致心跳发送不及时。也许应该把rpc client单独放在一个线程,在jrpc的支持下这点不难做到。 27 | 28 | 核心部分是raft的纯算法实现([Raft.h](raft/Raft.h)/[Raft.cc](raft/Raft.cc)),它的rpc请求、回复以及时钟都需要外部输入,这些输入具体包括: 29 | 30 | - rpc server收到的请求(`Raft::RequestVote()`,`Raft::AppendEntries()`) 31 | - rpc client收到的回复 (`Raft::OnRequestVoteReply()`, `Raft::OnAppendEntriesReply()`) 32 | - 固定频率的时钟激励(`Raft::Tick()`) 33 | - raft用户尝试提交log(`Raft::Propose()`) 34 | 35 | 我并没有将rpc请求和回复关联起来,而是当成独立的消息输入来处理,这样方便处理 expired/duplicate/out-of-order rpc 消息。用户并不直接使用Raft类,而是使用Node类([Node.h](raft/Node.h)/[Node.cc](raft/Node.cc))。Node类封装了rpc通信、时钟、多线程等内容。 36 | 37 | # 玩一下 38 | 39 | ## 安装 40 | 41 | **首先,你需要gcc 7.x或者更高的版本,可以手动安装或者直接上Ubuntu 18.04。。。** 42 | 43 | ```sh 44 | sudo apt install libleveldb-dev 45 | git clone https://github.com/guangqianpeng/raft.git 46 | cd raft 47 | git submodule update --init --recursive 48 | ./build.sh 49 | ./build.sh install 50 | cd ../raft-build/Release-install/bin 51 | ``` 52 | 53 | ## 单节点 54 | 55 | 开一个单节点的raft,server端口号是`9877`,虽然这个server没什么用: 56 | 57 | ```sh 58 | ./raft_demo 0 9877 | grep 'raft\[' 59 | ``` 60 | 61 | 你可以看到运行的流程,每隔1s,leader就会propose一条log,最后一行是每隔5s输出一次的统计信息: 62 | 63 | ``` 64 | 20180504 07:06:39.926993 raft[0] follower, peerNum = 1 starting... 65 | 20180504 07:06:40.427085 raft[0] follower -> candidate 66 | 20180504 07:06:40.427127 raft[0] candidate -> leader 67 | 20180504 07:06:40.927118 raft[0] leader, term 1, start log 1 68 | 20180504 07:06:40.927164 raft[0] leader, term 1, apply log [1] 69 | 20180504 07:06:41.927097 raft[0] leader, term 1, start log 2 70 | 20180504 07:06:41.927145 raft[0] leader, term 1, apply log [2] 71 | 20180504 07:06:42.927097 raft[0] leader, term 1, start log 3 72 | 20180504 07:06:42.927144 raft[0] leader, term 1, apply log [3] 73 | 20180504 07:06:43.927027 raft[0] leader, term 1, start log 4 74 | 20180504 07:06:43.927082 raft[0] leader, term 1, apply log [4] 75 | 20180504 07:06:44.927069 raft[0] leader, term 1, #votes 1, commit 4 76 | ``` 77 | 78 | ## 多节点 79 | 80 | 开3个节点的raft集群,需要起3个进程,server端口号分别是`9877,9878,9879`分别运行: 81 | 82 | ```shell 83 | ./raft_demo 0 9877 9878 9879 | grep 'raft\[' 84 | ./raft_demo 1 9877 9878 9879 | grep 'raft\[' 85 | ./raft_demo 2 9877 9878 9879 | grep 'raft\[' 86 | ``` 87 | 88 | 你可以看到leader election的过程,然后重启一个进程,看看会发生什么?Have fun :-) -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -x 4 | 5 | SOURCE_DIR=`pwd` 6 | BUILD_DIR=${BUILD_DIR:-../raft-build} 7 | BUILD_TYPE=${BUILD_TYPE:-Release} 8 | INSTALL_DIR=${INSTALL_DIR:-../${BUILD_TYPE}-install} 9 | BUILD_NO_EXAMPLES=${BUILD_NO_EXAMPLES:-1} 10 | STUB_FORMATTER="echo" 11 | 12 | mkdir -p $BUILD_DIR/$BUILD_TYPE \ 13 | && cd $BUILD_DIR/$BUILD_TYPE \ 14 | && cmake \ 15 | -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ 16 | -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR \ 17 | -DCMAKE_BUILD_NO_EXAMPLES=$BUILD_NO_EXAMPLES \ 18 | -DCMAKE_STUB_FORMATTER=$STUB_FORMATTER \ 19 | $SOURCE_DIR \ 20 | && make $* -------------------------------------------------------------------------------- /example/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(raft_demo demo.cc) 2 | target_link_libraries(raft_demo raft) 3 | install(TARGETS raft_demo DESTINATION bin) -------------------------------------------------------------------------------- /example/demo.cc: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace std::chrono_literals; 8 | 9 | void usage() 10 | { 11 | printf("usage: ./raft id address1 address2..."); 12 | exit(EXIT_FAILURE); 13 | } 14 | 15 | int main(int argc, char** argv) 16 | { 17 | if (argc < 3) 18 | usage(); 19 | 20 | setLogLevel(LOG_LEVEL_DEBUG); 21 | 22 | int id = std::stoi(argv[1]); 23 | 24 | std::vector 25 | peerAddresses; 26 | 27 | if (id + 2 >= argc) { 28 | usage(); 29 | } 30 | 31 | for (int i = 2; i < argc; i++) { 32 | peerAddresses.emplace_back(std::stoi(argv[i])); 33 | } 34 | 35 | raft::Config config; 36 | config.id = id; 37 | config.storagePath = "./raft." + std::to_string(id); 38 | config.heartbeatTimeout = 1; 39 | config.electionTimeout = 5; 40 | config.timeUnit = 100ms; 41 | config.serverAddress = peerAddresses[id]; 42 | config.peerAddresses = peerAddresses; 43 | config.applyCallback = [](const raft::ApplyMsg& msg) { 44 | assert(msg.command.getStringView() == "raft example"); 45 | }; 46 | config.snapshotCallback = [](const json::Value& snapshot) { 47 | FATAL("not implemented yet"); 48 | }; 49 | 50 | ev::EventLoopThread loopThread; 51 | ev::EventLoop* raftServerLoop = loopThread.startLoop(); 52 | raft::Node raftNode(config, raftServerLoop); 53 | 54 | ev::EventLoop loop; 55 | loop.runEvery(1s, [&](){ 56 | auto ret = raftNode.GetState(); 57 | if (ret.isLeader) { 58 | raftNode.Propose(json::Value("raft example")); 59 | } 60 | }); 61 | 62 | raftNode.Start(); 63 | loop.loop(); 64 | } -------------------------------------------------------------------------------- /raft/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_custom_command( 2 | OUTPUT raw_header 3 | COMMAND jrpcstub 4 | ARGS -o -i ${CMAKE_CURRENT_SOURCE_DIR}/spec.json 5 | MAIN_DEPENDENCY spec.json 6 | DEPENDS jrpcstub 7 | COMMENT "Generating Server/Client Stub..." 8 | VERBATIM 9 | ) 10 | 11 | set(stub_dir ${PROJECT_BINARY_DIR}/raft) 12 | 13 | add_custom_command( 14 | OUTPUT RaftServiceStub.h RaftClientStub.h 15 | COMMAND ${CMAKE_STUB_FORMATTER} 16 | ARGS -i ${stub_dir}/RaftServiceStub.h ${stub_dir}/RaftClientStub.h 17 | DEPENDS raw_header 18 | COMMENT "clang format Stub..." 19 | VERBATIM 20 | ) 21 | 22 | add_library(raft STATIC 23 | Raft.cc Raft.h 24 | RaftPeer.cc RaftPeer.h 25 | RaftService.h RaftService.cc 26 | RaftServiceStub.h RaftClientStub.h 27 | Log.h Log.cc 28 | Storage.cc Storage.h 29 | Node.cc Node.h 30 | Config.h Struct.h Random.h Callback.h) 31 | target_link_libraries(raft jrpc leveldb) 32 | install(TARGETS raft DESTINATION lib) -------------------------------------------------------------------------------- /raft/Callback.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by frank on 18-4-20. 3 | // 4 | 5 | #ifndef RAFT_CALLBACK_H 6 | #define RAFT_CALLBACK_H 7 | 8 | #include 9 | #include 10 | 11 | namespace raft 12 | { 13 | 14 | struct RequestVoteArgs; 15 | struct RequestVoteReply; 16 | 17 | struct AppendEntriesArgs; 18 | struct AppendEntriesReply; 19 | 20 | struct ApplyMsg; 21 | 22 | typedef std::function RequestVoteDoneCallback; 23 | typedef std::function AppendEntriesDoneCallback; 24 | typedef std::function DoRequestVoteCallback; 26 | typedef std::function DoAppendEntriesCallback; 28 | typedef std::function RequestVoteReplyCallback; 31 | typedef std::function AppendEntriesReplyCallback; 34 | typedef std::function ApplyCallback; 35 | typedef std::function SnapshotCallback; 36 | 37 | } 38 | 39 | #endif //RAFT_CALLBACK_H 40 | -------------------------------------------------------------------------------- /raft/Config.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by frank on 18-5-15. 3 | // 4 | 5 | #ifndef RAFT_CONFIG_H 6 | #define RAFT_CONFIG_H 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | namespace raft 15 | { 16 | 17 | struct Config 18 | { 19 | // 20 | // unique id of the raft Node, starting from 0 21 | // e.g. 0, 1, 2... 22 | // 23 | int id; 24 | 25 | // 26 | // leveldb persistence data path for each raftNode 27 | // e.g. /tmp/raft.0, /tmp/raft.1 ... 28 | // 29 | std::string storagePath; 30 | 31 | // 32 | // leader heartbeat timeout, the unit is config.timeUnit 33 | // 34 | int heartbeatTimeout = 1; 35 | 36 | // 37 | // assert(heartbeat < electionTimeout) 38 | // default = 5 39 | // 40 | int electionTimeout = 5; 41 | 42 | // 43 | // tick frequency of the Node 44 | // 45 | std::chrono::milliseconds timeUnit { 100 }; 46 | 47 | // 48 | // RPC server address of this Node 49 | // 50 | ev::InetAddress serverAddress; 51 | 52 | // 53 | // all peer addresses of this Node 54 | // assert(peerAddresses[id] == serverAddress) 55 | // 56 | std::vector peerAddresses; 57 | 58 | // 59 | // user callback of a newly applied log 60 | // 61 | ApplyCallback applyCallback; 62 | 63 | // 64 | // user callback of a newly installed snapshot 65 | // 66 | SnapshotCallback snapshotCallback; 67 | }; 68 | 69 | } 70 | 71 | #endif //RAFT_CONFIG_H 72 | -------------------------------------------------------------------------------- /raft/Log.cc: -------------------------------------------------------------------------------- 1 | // 2 | // Created by frank on 18-5-10. 3 | // 4 | 5 | #include 6 | 7 | #include 8 | 9 | using namespace raft; 10 | 11 | Log::Log(Storage* storage) 12 | : storage_(storage) 13 | , firstIndex_(storage->GetFirstIndex()) 14 | , lastIndex_(storage->GetLastIndex()) 15 | { 16 | assert(firstIndex_ <= lastIndex_); 17 | size_t entryNum = lastIndex_ - firstIndex_ + 1; 18 | log_.reserve(entryNum); 19 | for (auto& entry: storage->GetEntries()) { 20 | PutEntryFromJson(entry); 21 | } 22 | 23 | assert(entryNum == log_.size()); 24 | } 25 | 26 | IndexAndTerm Log::LastIndexInTerm(int startIndex, int term) const 27 | { 28 | int index = std::min(startIndex, lastIndex_); 29 | for (; index >= FirstIndex(); index--) { 30 | if (TermAt(index) <= term) 31 | break; 32 | } 33 | return { index, TermAt(index) }; 34 | } 35 | 36 | bool Log::IsUpToDate(int index, int term) const 37 | { 38 | int lastLogTerm_ = LastTerm(); 39 | if (lastLogTerm_ != term) 40 | return lastLogTerm_ < term; 41 | return lastIndex_ <= index; 42 | } 43 | 44 | void Log::Append(int term, const json::Value& command) 45 | { 46 | log_.emplace_back(term, command); 47 | lastIndex_++; 48 | 49 | auto entry = GetEntryAsJson(lastIndex_); 50 | storage_->PrepareEntry(lastIndex_, entry); 51 | storage_->PutPreparedEntries(); 52 | storage_->PutLastIndex(lastIndex_); 53 | } 54 | 55 | void Log::Overwrite(int firstIndex, const json::Value& entries) 56 | { 57 | assert(firstIndex <= lastIndex_ + 1); 58 | 59 | log_.resize(firstIndex); 60 | for (const json::Value& entry: entries.getArray()) { 61 | PutEntryFromJson(entry); 62 | storage_->PrepareEntry(firstIndex++, entry); 63 | } 64 | if (entries.getSize() > 0) { 65 | storage_->PutPreparedEntries(); 66 | } 67 | lastIndex_ = static_cast(log_.size()) - 1; 68 | storage_->PutLastIndex(lastIndex_); 69 | } 70 | 71 | json::Value Log::GetEntriesAsJson(int firstIndex, int maxEntries) const 72 | { 73 | json::Value entries(json::TYPE_ARRAY); 74 | 75 | int lastIndex = std::min(lastIndex_, firstIndex + maxEntries - 1); 76 | for (int i = firstIndex; i <= lastIndex; i++) 77 | { 78 | auto element = GetEntryAsJson(i); 79 | entries.addValue(element); 80 | } 81 | return entries; 82 | } 83 | 84 | bool Log::Contain(int index, int term) const 85 | { 86 | if (index > lastIndex_) 87 | return false; 88 | return log_[index].term == term; 89 | } 90 | 91 | json::Value Log::GetEntryAsJson(int index) const 92 | { 93 | json::Value entry(json::TYPE_OBJECT); 94 | entry.addMember("term", log_[index].term); 95 | entry.addMember("command", log_[index].command); 96 | return entry; 97 | } 98 | 99 | void Log::PutEntryFromJson(const json::Value& entry) 100 | { 101 | int term = entry["term"].getInt32(); 102 | auto& command = entry["command"]; 103 | log_.emplace_back(term, command); 104 | } 105 | -------------------------------------------------------------------------------- /raft/Log.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by frank on 18-4-20. 3 | // 4 | 5 | #ifndef RAFT_LOG_H 6 | #define RAFT_LOG_H 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace raft 17 | { 18 | 19 | class Log : ev::noncopyable 20 | { 21 | public: 22 | explicit 23 | Log(Storage* storage); 24 | 25 | int FirstIndex() const 26 | { return firstIndex_; } 27 | 28 | int FirstTerm() const 29 | { return log_[firstIndex_].term; } 30 | 31 | int LastIndex() const 32 | { return lastIndex_; } 33 | 34 | int LastTerm() const 35 | { return log_[lastIndex_].term; } 36 | 37 | int TermAt(int index) const 38 | { return log_[index].term; } 39 | 40 | const json::Value& CommandAt(int index) const 41 | { return log_[index].command; } 42 | 43 | IndexAndTerm LastIndexInTerm(int startIndex, int term) const; 44 | 45 | bool IsUpToDate(int index, int term) const; 46 | 47 | void Append(int term, const json::Value& command); 48 | 49 | void Overwrite(int firstIndex, const json::Value& entries); 50 | 51 | json::Value GetEntriesAsJson(int firstIndex, int maxEntries) const; 52 | 53 | bool Contain(int index, int term) const; 54 | 55 | private: 56 | 57 | json::Value GetEntryAsJson(int index) const; 58 | 59 | void PutEntryFromJson(const json::Value& entry); 60 | 61 | struct Entry 62 | { 63 | Entry(int term_, const json::Value& command_) 64 | : term(term_), command(command_) 65 | {} 66 | 67 | Entry() 68 | : term(0), command(json::TYPE_NULL) 69 | {} 70 | 71 | int term; 72 | json::Value command; // from raft user or raft peer 73 | }; 74 | 75 | Storage* storage_; 76 | int firstIndex_; 77 | int lastIndex_; 78 | std::vector log_; 79 | }; 80 | 81 | } 82 | 83 | #endif //RAFT_LOG_H 84 | -------------------------------------------------------------------------------- /raft/Node.cc: -------------------------------------------------------------------------------- 1 | // 2 | // Created by frank on 18-5-15. 3 | // 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | using std::placeholders::_1; 11 | using std::placeholders::_2; 12 | using std::placeholders::_3; 13 | 14 | using namespace raft; 15 | 16 | namespace 17 | { 18 | 19 | void CheckConfig(const Config& c) 20 | { 21 | // todo 22 | } 23 | 24 | } 25 | 26 | Node::Node(const Config& c, ev::EventLoop* serverLoop) 27 | : id_(c.id) 28 | , peerNum_(static_cast(c.peerAddresses.size())) 29 | , tickInterval_(c.timeUnit) 30 | , rpcServer_(serverLoop, c.serverAddress) 31 | , raftService_(rpcServer_) 32 | , loop_(loopThread_.startLoop()) 33 | { 34 | CheckConfig(c); 35 | 36 | std::vector rawPeers; 37 | for (int i = 0; i < peerNum_; i++) { 38 | // fixme: pass serverLoop or loop_ ??? 39 | auto ptr = new RaftPeer(i, loop_, c.peerAddresses[i]); 40 | rawPeers.push_back(ptr); 41 | peers_.emplace_back(ptr); 42 | } 43 | raft_ = std::make_unique(c, rawPeers); 44 | 45 | for (auto peer: rawPeers) { 46 | peer->SetRequestVoteReplyCallback( 47 | std::bind(&Node::OnRequestVoteReply, this, _1, _2, _3)); 48 | peer->SetAppendEntriesReplyCallback( 49 | std::bind(&Node::OnAppendEntriesReply, this, _1, _2, _3)); 50 | } 51 | 52 | raftService_.SetDoRequestVoteCallback( 53 | std::bind(&Node::RequestVote, this, _1, _2)); 54 | raftService_.SetDoAppendEntriesCallback( 55 | std::bind(&Node::AppendEntries, this, _1, _2)); 56 | } 57 | 58 | void Node::Start() 59 | { 60 | RunTaskInLoopAndWait([=]() { 61 | StartInLoop(); 62 | }); 63 | } 64 | 65 | void Node::StartInLoop() 66 | { 67 | AssertInLoop(); 68 | 69 | if (started_.exchange(true)) 70 | return; 71 | 72 | // start rpc server 73 | rpcServer_.start(); 74 | 75 | // connect other peerAddresses, non-blocking! 76 | for (int i = 0; i < peerNum_; i++) { 77 | if (i != id_) { 78 | peers_[i]->Start(); 79 | } 80 | } 81 | 82 | DEBUG("raft[%d] peerNum = %d starting...", id_, peerNum_); 83 | 84 | loop_->runEvery(std::chrono::seconds(3), [this](){ raft_->DebugOutput(); }); 85 | loop_->runEvery(tickInterval_, [this](){ raft_->Tick(); }); 86 | } 87 | 88 | RaftState Node::GetState() 89 | { 90 | AssertStarted(); 91 | 92 | RaftState state; 93 | RunTaskInLoopAndWait([&, this]() { 94 | AssertStarted(); 95 | state = raft_->GetState(); 96 | }); 97 | 98 | return state; 99 | } 100 | 101 | ProposeResult Node::Propose(const json::Value& command) 102 | { 103 | AssertStarted(); 104 | 105 | ProposeResult result; 106 | RunTaskInLoopAndWait([&, this]() { 107 | AssertStarted(); 108 | result = raft_->Propose(command); 109 | }); 110 | return result; 111 | } 112 | 113 | void Node::RequestVote(const RequestVoteArgs& args, 114 | const RequestVoteDoneCallback& done) 115 | { 116 | AssertStarted(); 117 | 118 | RunTaskInLoop([=]() { 119 | RequestVoteReply reply; 120 | raft_->RequestVote(args, reply); 121 | done(reply); 122 | }); 123 | } 124 | 125 | // 126 | // RequestVote done callback, thread safe. 127 | // In current implementation, it is only called in Raft thread 128 | // 129 | void Node::OnRequestVoteReply(int peer, 130 | const RequestVoteArgs& args, 131 | const RequestVoteReply& reply) 132 | { 133 | AssertStarted(); 134 | 135 | RunTaskInLoop([=]() { 136 | raft_->OnRequestVoteReply(peer, args, reply); 137 | }); 138 | } 139 | 140 | // 141 | // AppendEntries RPC handler, thread safe 142 | // 143 | void Node::AppendEntries(const AppendEntriesArgs& args, 144 | const AppendEntriesDoneCallback& done) 145 | { 146 | AssertStarted(); 147 | 148 | RunTaskInLoop([=]() { 149 | AppendEntriesReply reply; 150 | raft_->AppendEntries(args, reply); 151 | done(reply); 152 | }); 153 | } 154 | 155 | // 156 | // AppendEntries RPC handler, thread safe 157 | // In current implementation, it is only called in Raft thread 158 | // 159 | void Node::OnAppendEntriesReply(int peer, 160 | const AppendEntriesArgs& args, 161 | const AppendEntriesReply& reply) 162 | { 163 | AssertStarted(); 164 | 165 | RunTaskInLoop([=]() { 166 | raft_->OnAppendEntriesReply(peer, args, reply); 167 | }); 168 | } 169 | 170 | template 171 | void Node::RunTaskInLoop(Task&& task) 172 | { 173 | loop_->runInLoop(std::forward(task)); 174 | } 175 | 176 | template 177 | void Node::QueueTaskInLoop(Task&& task) 178 | { 179 | loop_->queueInLoop(std::forward(task)); 180 | } 181 | 182 | template 183 | void Node::RunTaskInLoopAndWait(Task&& task) 184 | { 185 | ev::CountDownLatch latch(1); 186 | RunTaskInLoop([&, this]() { 187 | task(); 188 | latch.count(); 189 | }); 190 | latch.wait(); 191 | } -------------------------------------------------------------------------------- /raft/Node.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by frank on 18-5-15. 3 | // 4 | 5 | #ifndef RAFT_NODE_H 6 | #define RAFT_NODE_H 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | 23 | namespace raft 24 | { 25 | 26 | class Node : ev::noncopyable 27 | { 28 | public: 29 | Node(const Config& config, ev::EventLoop* serverLoop); 30 | 31 | // 32 | // start the node instance, thread safe 33 | // 34 | void Start(); 35 | 36 | // 37 | // wrapper of Raft::GetState(), thread safe 38 | // 39 | RaftState GetState(); 40 | 41 | // 42 | // wrapper of Raft::Propose(), thread safe 43 | // 44 | ProposeResult Propose(const json::Value& command); 45 | 46 | private: 47 | 48 | void StartInLoop(); 49 | 50 | // 51 | // wrapper of Raft::RequestVote(), thread safe 52 | // 53 | void RequestVote(const RequestVoteArgs& args, 54 | const RequestVoteDoneCallback& done); 55 | 56 | // 57 | // wrapper of Raft::OnRequestVoteReply(), thread safe 58 | // 59 | void OnRequestVoteReply(int peer, 60 | const RequestVoteArgs& args, 61 | const RequestVoteReply& reply); 62 | 63 | // 64 | // wrapper of Raft::AppendEntries(), thread safe 65 | // 66 | void AppendEntries(const AppendEntriesArgs& args, 67 | const AppendEntriesDoneCallback& done); 68 | 69 | // 70 | // Wrapper of Raft::OnAppendEntriesReply(), thread safe 71 | // 72 | void OnAppendEntriesReply(int peer, 73 | const AppendEntriesArgs& args, 74 | const AppendEntriesReply& reply); 75 | 76 | private: 77 | // 78 | // three kinds of eventloop schedulers 79 | // 80 | template 81 | void RunTaskInLoop(Task&& task); 82 | 83 | template 84 | void QueueTaskInLoop(Task&& task); 85 | 86 | template 87 | void RunTaskInLoopAndWait(Task&& task); 88 | 89 | void AssertInLoop() const 90 | { loop_->assertInLoopThread(); } 91 | 92 | void AssertStarted() const 93 | { assert(started_); } 94 | 95 | void AssertNotStarted() const 96 | { assert(!started_); } 97 | 98 | private: 99 | typedef std::unique_ptr RaftPtr; 100 | typedef std::unique_ptr RaftPeerPtr; 101 | typedef std::vector RaftPeerList; 102 | 103 | std::atomic_bool started_ = false; 104 | RaftPtr raft_; 105 | RaftPeerList peers_; 106 | 107 | const int id_; 108 | const int peerNum_; 109 | 110 | std::chrono::milliseconds tickInterval_; 111 | 112 | jrpc::RpcServer rpcServer_; 113 | RaftService raftService_; 114 | 115 | ev::EventLoopThread loopThread_; 116 | ev::EventLoop* loop_; 117 | }; 118 | 119 | } 120 | 121 | #endif //RAFT_NODE_H 122 | -------------------------------------------------------------------------------- /raft/Raft.cc: -------------------------------------------------------------------------------- 1 | // 2 | // Created by frank on 18-4-19. 3 | // 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | using namespace raft; 12 | 13 | Raft::Raft(const Config& c, const std::vector& peers) 14 | : id_(c.id) 15 | , peerNum_(static_cast(peers.size())) 16 | , storage_(c.storagePath) 17 | , currentTerm_(storage_.GetCurrentTerm()) 18 | , votedFor_(storage_.GetVotedFor()) 19 | , log_(&storage_) 20 | , heartbeatTimeout_(c.heartbeatTimeout) 21 | , electionTimeout_(c.electionTimeout) 22 | , randomGen_(id_, electionTimeout_, 2 * electionTimeout_) 23 | , peers_(peers) 24 | , applyCallback_(c.applyCallback) 25 | , snapshotCallback_(c.snapshotCallback) 26 | { 27 | ResetTimer(); 28 | DEBUG("raft[%d] %s, term %d, first_index %d, last_index %d", 29 | id_, RoleString(), 30 | currentTerm_, 31 | log_.FirstIndex(), 32 | log_.LastIndex()); 33 | } 34 | 35 | RaftState Raft::GetState() const 36 | { 37 | return { currentTerm_, role_ == kLeader }; 38 | } 39 | 40 | ProposeResult Raft::Propose(const json::Value& command) 41 | { 42 | int index = log_.LastIndex() + 1; 43 | int currentTerm = currentTerm_; 44 | bool isLeader = (role_ == kLeader); 45 | 46 | if (isLeader) { 47 | log_.Append(currentTerm_, command); 48 | DEBUG("raft[%d] %s, term %d, propose log %d", 49 | id_, RoleString(), currentTerm_, index); 50 | } 51 | 52 | if (IsStandalone()) { 53 | // 54 | // there is only one node in raft cluster, 55 | // log proposed can be committed and applied right now 56 | // 57 | commitIndex_ = index; 58 | ApplyLog(); 59 | } 60 | 61 | return { index, currentTerm, isLeader }; 62 | } 63 | 64 | void Raft::StartRequestVote() 65 | { 66 | RequestVoteArgs args; 67 | args.term = currentTerm_; 68 | args.candidateId = id_; 69 | args.lastLogIndex = log_.LastIndex(); 70 | args.lastLogTerm = log_.LastTerm(); 71 | 72 | for (int i = 0; i < peerNum_; i++) { 73 | if (i != id_) { 74 | peers_[i]->RequestVote(args); 75 | } 76 | } 77 | } 78 | 79 | void Raft::RequestVote(const RequestVoteArgs& args, 80 | RequestVoteReply& reply) 81 | { 82 | OnNewInputTerm(args.term); 83 | ResetTimer(); 84 | 85 | reply.term = currentTerm_; 86 | 87 | if (args.term == currentTerm_ && 88 | (votedFor_ == kVotedForNull || votedFor_ == args.candidateId) && 89 | log_.IsUpToDate(args.lastLogIndex, args.lastLogTerm)) 90 | { 91 | DEBUG("raft[%d] -> raft[%d]", id_, args.candidateId); 92 | SetVotedFor(args.candidateId); 93 | reply.voteGranted = true; 94 | } 95 | else 96 | { 97 | reply.voteGranted = false; 98 | } 99 | } 100 | 101 | 102 | void Raft::OnRequestVoteReply(int peer, 103 | const RequestVoteArgs& args, 104 | const RequestVoteReply& reply) 105 | { 106 | OnNewInputTerm(reply.term); 107 | 108 | if (role_ != kCandidate || // not a candidate anymore 109 | !reply.voteGranted || // vote not granted 110 | currentTerm_ > reply.term) // expired vote 111 | { 112 | return; 113 | } 114 | 115 | DEBUG("raft[%d] <- raft[%d]", id_, peer); 116 | 117 | votesGot_++; 118 | if (votesGot_ > peerNum_ / 2) { 119 | ToLeader(); 120 | } 121 | } 122 | 123 | void Raft::StartAppendEntries() 124 | { 125 | for (int i = 0; i < peerNum_; i++) { 126 | if (i == id_) 127 | continue; 128 | 129 | AppendEntriesArgs args; 130 | args.term = currentTerm_; 131 | args.prevLogIndex = nextIndex_[i] - 1; 132 | args.prevLogTerm = log_.TermAt(args.prevLogIndex); 133 | args.entries = log_.GetEntriesAsJson(nextIndex_[i], kMaxEntriesSendOneTime); 134 | args.leaderCommit = commitIndex_; 135 | peers_[i]->AppendEntries(args); 136 | } 137 | } 138 | 139 | void Raft::AppendEntries(const AppendEntriesArgs& args, 140 | AppendEntriesReply& reply) 141 | { 142 | OnNewInputTerm(args.term); 143 | ResetTimer(); 144 | 145 | reply.term = currentTerm_; 146 | 147 | if (currentTerm_ > args.term) { 148 | // expired heartbeat 149 | reply.success = false; 150 | return; 151 | } 152 | else if (role_ == kCandidate) { 153 | // lose leader election 154 | ToFollower(currentTerm_); 155 | } 156 | else if (role_ == kLeader) { 157 | FATAL("multiple leaders in term %d", currentTerm_); 158 | } 159 | 160 | // 161 | // invariant here: 162 | // 1. role == kFollower 163 | // 2. args.term == currentTerm 164 | // 165 | if (log_.Contain(args.prevLogIndex, args.prevLogTerm)) { 166 | log_.Overwrite(args.prevLogIndex + 1, args.entries); 167 | 168 | // 169 | // update commit index monotonically 170 | // 171 | int possibleCommit = std::min(args.leaderCommit, log_.LastIndex()); 172 | if (commitIndex_ < possibleCommit) { 173 | commitIndex_ = possibleCommit; 174 | ApplyLog(); 175 | } 176 | reply.success = true; 177 | } 178 | else { 179 | auto p = log_.LastIndexInTerm(args.prevLogIndex, args.prevLogTerm); 180 | reply.expectIndex = p.index; 181 | reply.expectTerm = p.term; 182 | reply.success = false; 183 | } 184 | } 185 | 186 | 187 | void Raft::OnAppendEntriesReply(int peer, 188 | const AppendEntriesArgs& args, 189 | const AppendEntriesReply& reply) 190 | { 191 | OnNewInputTerm(reply.term); 192 | 193 | if (role_ != kLeader || currentTerm_ > reply.term) { 194 | // 1. not a leader anymore 195 | // 2. expired RPC(return too late) 196 | return; 197 | } 198 | 199 | if (!reply.success) { 200 | // 201 | // log replication failed, back nexIndex_[peer] quickly!!! 202 | // 203 | int nextIndex = nextIndex_[peer]; 204 | 205 | if (reply.expectTerm == args.prevLogTerm) { 206 | assert(reply.expectIndex < args.prevLogIndex); 207 | nextIndex = reply.expectIndex; 208 | } 209 | else { 210 | assert(reply.expectTerm < args.prevLogTerm); 211 | auto p = log_.LastIndexInTerm(nextIndex, reply.expectTerm); 212 | nextIndex = p.index; 213 | } 214 | 215 | // 216 | // take care of duplicate & out-of-order & expired reply 217 | // 218 | if (nextIndex > nextIndex_[peer]) { 219 | nextIndex = nextIndex_[peer] - 1; 220 | } 221 | if (nextIndex <= matchIndex_[peer]) { 222 | DEBUG("raft[%d] %s, nextIndex <= matchIndex_[%d], set to %d", 223 | id_, RoleString(), peer, matchIndex_[peer] + 1); 224 | nextIndex = matchIndex_[peer] + 1; 225 | } 226 | 227 | nextIndex_[peer] = nextIndex; 228 | return; 229 | } 230 | 231 | // 232 | // log replication succeed 233 | // 234 | int startIndex = args.prevLogIndex + 1; 235 | int entryNum = static_cast(args.entries.getSize()); 236 | int endIndex = startIndex + entryNum - 1; 237 | 238 | for (int i = endIndex; i >= startIndex; i--) { 239 | 240 | // 241 | // log[i] has already replicated on peer, 242 | // duplicate reply takes no effects 243 | // 244 | if (i <= matchIndex_[peer]) 245 | break; 246 | 247 | // 248 | // a leader cannot immediately conclude that a 249 | // entry from previous term is committed once it is 250 | // stored on majority of servers, so, just don't count #replica 251 | // 252 | if (log_.TermAt(i) < currentTerm_) 253 | break; 254 | assert(log_.TermAt(i) == currentTerm_); 255 | 256 | // 257 | // logs already committed 258 | // 259 | if (i <= commitIndex_) 260 | break; 261 | 262 | // 263 | // initial replica is 2, one for id_, one for peer 264 | // 265 | int replica = 2; 266 | for (int p = 0; p < peerNum_; p++) { 267 | if (i <= matchIndex_[p]) 268 | replica++; 269 | } 270 | 271 | // 272 | // update commitIndex monotonically 273 | // 274 | if (replica > peerNum_ / 2) { 275 | commitIndex_ = i; 276 | break; 277 | } 278 | } 279 | 280 | ApplyLog(); 281 | if (nextIndex_[peer] <= endIndex) { 282 | nextIndex_[peer] = endIndex + 1; 283 | matchIndex_[peer] = endIndex; 284 | } 285 | } 286 | 287 | void Raft::Tick() 288 | { 289 | switch (role_) 290 | { 291 | case kFollower: 292 | case kCandidate: 293 | TickOnElection(); 294 | break; 295 | case kLeader: 296 | TickOnHeartbeat(); 297 | break; 298 | default: 299 | assert(false && "bad role"); 300 | } 301 | } 302 | 303 | void Raft::DebugOutput() const 304 | { 305 | DEBUG("raft[%d] %s, term %d, #votes %d, commit %d", 306 | id_, RoleString(), currentTerm_, votesGot_, commitIndex_); 307 | } 308 | 309 | void Raft::ApplyLog() 310 | { 311 | assert(lastApplied_ <= commitIndex_); 312 | 313 | if (commitIndex_ != lastApplied_) { 314 | if (lastApplied_ + 1 == commitIndex_) { 315 | DEBUG("raft[%d] %s, term %d, apply log [%d]", 316 | id_, RoleString(), currentTerm_, commitIndex_); 317 | } 318 | else { 319 | DEBUG("raft[%d] %s, term %d, apply log (%d, %d]", 320 | id_, RoleString(), currentTerm_, lastApplied_, commitIndex_); 321 | } 322 | } 323 | 324 | for (int i = lastApplied_ + 1; i <= commitIndex_; i++) { 325 | ApplyMsg msg(i, log_.CommandAt(i)); 326 | applyCallback_(msg); 327 | } 328 | lastApplied_ = commitIndex_; 329 | } 330 | 331 | void Raft::TickOnElection() 332 | { 333 | timeElapsed_++; 334 | if (timeElapsed_ >= randomizedElectionTimeout_) { 335 | ToCandidate(); // candidate -> candidate is OK 336 | } 337 | } 338 | 339 | void Raft::TickOnHeartbeat() 340 | { 341 | timeElapsed_++; 342 | if (timeElapsed_ >= heartbeatTimeout_) { 343 | StartAppendEntries(); 344 | ResetTimer(); 345 | } 346 | } 347 | 348 | void Raft::SetCurrentTerm(int term) 349 | { 350 | currentTerm_ = term; 351 | storage_.PutCurrentTerm(currentTerm_); 352 | } 353 | 354 | void Raft::SetVotedFor(int votedFor) 355 | { 356 | votedFor_ = votedFor; 357 | storage_.PutVotedFor(votedFor_); 358 | } 359 | 360 | void Raft::ToFollower(int targetTerm) 361 | { 362 | if (role_ != kFollower) { 363 | DEBUG("raft[%d] %s -> follower", id_, RoleString()); 364 | } 365 | 366 | assert(currentTerm_ <= targetTerm); 367 | 368 | role_ = kFollower; 369 | if (currentTerm_ < targetTerm) { 370 | SetCurrentTerm(targetTerm); 371 | SetVotedFor(kVotedForNull); 372 | votesGot_ = 0; 373 | } 374 | ResetTimer(); 375 | } 376 | 377 | void Raft::ToCandidate() 378 | { 379 | if (role_ != kCandidate) { 380 | DEBUG("raft[%d] %s -> candidate", id_, RoleString()); 381 | } 382 | 383 | role_ = kCandidate; 384 | SetCurrentTerm(currentTerm_+1); 385 | SetVotedFor(id_); // vote myself 386 | votesGot_ = 1; 387 | 388 | if (IsStandalone()) { 389 | ToLeader(); 390 | } 391 | else { 392 | ResetTimer(); 393 | StartRequestVote(); 394 | } 395 | } 396 | 397 | void Raft::ToLeader() 398 | { 399 | DEBUG("raft[%d] %s -> leader", id_, RoleString()); 400 | 401 | nextIndex_.assign(peerNum_, log_.LastIndex() + 1); 402 | matchIndex_.assign(peerNum_, kInitialMatchIndex); 403 | role_ = kLeader; 404 | ResetTimer(); 405 | } 406 | 407 | void Raft::OnNewInputTerm(int term) 408 | { 409 | if (currentTerm_ < term) { 410 | ToFollower(term); 411 | } 412 | } 413 | 414 | void Raft::ResetTimer() 415 | { 416 | timeElapsed_ = 0; 417 | if (role_ != kLeader) 418 | randomizedElectionTimeout_ = randomGen_.Generate(); 419 | } 420 | -------------------------------------------------------------------------------- /raft/Raft.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by frank on 18-4-19. 3 | // 4 | 5 | #ifndef RAFT_RAFT_H 6 | #define RAFT_RAFT_H 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | namespace raft 23 | { 24 | 25 | class Raft : ev::noncopyable 26 | { 27 | public: 28 | Raft(const Config& config, 29 | const std::vector& peers); 30 | 31 | // 32 | // return 33 | // struct RaftState 34 | // { 35 | // int currentTerm; // current term 36 | // bool isLeader; // whether this node believes it is the leader 37 | // }; 38 | // 39 | RaftState GetState() const; 40 | 41 | // 42 | // the service using Raft (e.g. a k/v serverAddress) wants to start 43 | // agreement on the next command to be appended to Raft's log. if this 44 | // serverAddress isn't the leader, returns false. Otherwise propose the 45 | // agreement and return immediately. there is no guarantee that this 46 | // command will ever be committed to the Raft log, since the leader 47 | // may fail or lose an election. 48 | // 49 | // Thread safe, return 50 | // struct ProposeResult 51 | // { 52 | // int expectIndex; // the index that the command will appear if it's ever committed. 53 | // int currentTerm; // current term 54 | // bool isLeader; // true if this node believes it is the leader 55 | // }; 56 | // 57 | ProposeResult Propose(const json::Value& command); 58 | 59 | // 60 | // RequestVote RPC handler 61 | // 62 | void RequestVote(const RequestVoteArgs& args, 63 | RequestVoteReply& reply); 64 | 65 | // 66 | // RequestVote reply callback 67 | // 68 | void OnRequestVoteReply(int peer, 69 | const RequestVoteArgs& args, 70 | const RequestVoteReply& reply); 71 | 72 | // 73 | // AppendEntries RPC handler 74 | // 75 | void AppendEntries(const AppendEntriesArgs& args, 76 | AppendEntriesReply& reply); 77 | 78 | // 79 | // AppendEntries reply callback 80 | // 81 | void OnAppendEntriesReply(int peer, 82 | const AppendEntriesArgs& args, 83 | const AppendEntriesReply& reply); 84 | 85 | // 86 | // external timer input, the frequency is determined by config.timeUnit 87 | // 88 | void Tick(); 89 | 90 | void DebugOutput() const; 91 | 92 | private: 93 | enum Role 94 | { 95 | kLeader, 96 | kCandidate, 97 | kFollower, 98 | }; 99 | 100 | void TickOnElection(); 101 | 102 | void TickOnHeartbeat(); 103 | 104 | void ToFollower(int targetTerm); 105 | 106 | void ToCandidate(); 107 | 108 | void ToLeader(); 109 | 110 | void OnNewInputTerm(int term); 111 | 112 | void ResetTimer(); 113 | 114 | void StartRequestVote(); 115 | 116 | void StartAppendEntries(); 117 | 118 | void ApplyLog(); 119 | 120 | bool IsStandalone() const 121 | { return peerNum_ == 1; } 122 | 123 | void SetCurrentTerm(int term); 124 | 125 | void SetVotedFor(int votedFor); 126 | 127 | const char* RoleString() const 128 | { 129 | return role_ == kLeader ? "leader" : 130 | role_ == kFollower ? "follower" : 131 | "candidate"; 132 | } 133 | 134 | private: 135 | constexpr static int kVotedForNull = -1; 136 | constexpr static int kInitialTerm = 0; 137 | constexpr static int kInitialCommitIndex = 0; 138 | constexpr static int kInitialLastApplied = 0; 139 | constexpr static int kInitialMatchIndex = 0; 140 | constexpr static int kMaxEntriesSendOneTime = 100; 141 | 142 | const int id_; 143 | const int peerNum_; 144 | 145 | Storage storage_; 146 | int currentTerm_ = kInitialTerm; // persistent 147 | Role role_ = kFollower; 148 | int votedFor_ = kVotedForNull; // persistent 149 | int votesGot_ = 0; 150 | 151 | int commitIndex_ = kInitialCommitIndex; 152 | int lastApplied_ = kInitialLastApplied; 153 | Log log_; // persistent 154 | 155 | int timeElapsed_ = 0; 156 | const int heartbeatTimeout_; 157 | const int electionTimeout_; 158 | int randomizedElectionTimeout_ = 0; 159 | Random randomGen_; 160 | 161 | std::vector nextIndex_; 162 | std::vector matchIndex_; 163 | 164 | typedef std::vector RaftPeerList; 165 | const RaftPeerList peers_; 166 | 167 | ApplyCallback applyCallback_; 168 | SnapshotCallback snapshotCallback_; 169 | }; 170 | 171 | } 172 | 173 | #endif //RAFT_RAFT_H 174 | -------------------------------------------------------------------------------- /raft/RaftPeer.cc: -------------------------------------------------------------------------------- 1 | // 2 | // Created by frank on 18-4-19. 3 | // 4 | 5 | #include 6 | #include 7 | 8 | using namespace raft; 9 | 10 | RaftPeer::RaftPeer(int peer, ev::EventLoop* loop, const ev::InetAddress& serverAddress) 11 | : peer_(peer) 12 | , loop_(loop) 13 | , serverAddress_(serverAddress) 14 | , rpcClient(new jrpc::RaftClientStub(loop_, serverAddress)) 15 | { 16 | SetConnectionCallback(); 17 | } 18 | 19 | RaftPeer::~RaftPeer() = default; 20 | 21 | void RaftPeer::Start() 22 | { 23 | AssertInLoop(); 24 | rpcClient->start(); 25 | } 26 | 27 | void RaftPeer::SetConnectionCallback() 28 | { 29 | rpcClient->setConnectionCallback( 30 | [this](const ev::TcpConnectionPtr& conn) { 31 | bool connected = conn->connected(); 32 | loop_->runInLoop([=](){ 33 | OnConnection(connected); 34 | }); 35 | }); 36 | } 37 | 38 | void RaftPeer::OnConnection(bool connected) 39 | { 40 | AssertInLoop(); 41 | 42 | connected_ = connected; 43 | if (!connected_) { 44 | rpcClient.reset(new jrpc::RaftClientStub(loop_, serverAddress_)); 45 | SetConnectionCallback(); 46 | rpcClient->start(); 47 | } 48 | } 49 | 50 | 51 | void RaftPeer::RequestVote(const RequestVoteArgs& args) 52 | { 53 | AssertInLoop(); 54 | 55 | if (!connected_) 56 | return; 57 | 58 | auto cb = [=](json::Value response, bool isError, bool timeout){ 59 | if (isError || timeout) 60 | return; 61 | 62 | int term = response["term"].getInt32(); 63 | bool voteGranted = response["voteGranted"].getBool(); 64 | 65 | RequestVoteReply reply; 66 | reply.term = term; 67 | reply.voteGranted = voteGranted; 68 | requestVoteReply_(peer_, args, reply); 69 | }; 70 | 71 | rpcClient->RequestVote(args.term, 72 | args.candidateId, 73 | args.lastLogIndex, 74 | args.lastLogTerm, 75 | std::move(cb)); 76 | } 77 | 78 | void RaftPeer::AppendEntries(const AppendEntriesArgs& args) 79 | { 80 | AssertInLoop(); 81 | 82 | if (!connected_) 83 | return; 84 | 85 | auto cb = [=](json::Value response, bool isError, bool timeout) { 86 | if (isError || timeout) 87 | return; 88 | 89 | int term = response["term"].getInt32(); 90 | bool success = response["success"].getBool(); 91 | int expectIndex = response["expectIndex"].getInt32(); 92 | int expectTerm = response["expectTerm"].getInt32(); 93 | 94 | loop_->runInLoop([=](){ 95 | AppendEntriesReply reply; 96 | reply.term = term; 97 | reply.success = success; 98 | reply.expectIndex = expectIndex; 99 | reply.expectTerm = expectTerm; 100 | appendEntriesReply_(peer_, args, reply); 101 | }); 102 | }; 103 | 104 | rpcClient->AppendEntries(args.term, 105 | args.prevLogIndex, 106 | args.prevLogTerm, 107 | args.entries, 108 | args.leaderCommit, 109 | std::move(cb)); 110 | } 111 | -------------------------------------------------------------------------------- /raft/RaftPeer.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by frank on 18-4-19. 3 | // 4 | 5 | #ifndef RAFT_RAFTPEER_H 6 | #define RAFT_RAFTPEER_H 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | namespace raft 13 | { 14 | 15 | class RaftPeer: ev::noncopyable 16 | { 17 | public: 18 | RaftPeer(int peer, ev::EventLoop* loop, const ev::InetAddress& serverAddress); 19 | 20 | ~RaftPeer(); 21 | 22 | void Start(); 23 | 24 | void RequestVote(const RequestVoteArgs& args); 25 | 26 | void AppendEntries(const AppendEntriesArgs& args); 27 | 28 | void SetRequestVoteReplyCallback(const RequestVoteReplyCallback& cb) 29 | { requestVoteReply_ = cb; } 30 | 31 | void SetAppendEntriesReplyCallback(const AppendEntriesReplyCallback& cb) 32 | { appendEntriesReply_ = cb; } 33 | 34 | private: 35 | void AssertInLoop() 36 | { loop_->assertInLoopThread(); } 37 | 38 | void SetConnectionCallback(); 39 | 40 | void OnConnection(bool connected); 41 | 42 | private: 43 | const int peer_; 44 | ev::EventLoop* loop_; 45 | ev::InetAddress serverAddress_; 46 | bool connected_ = false; 47 | 48 | RequestVoteReplyCallback requestVoteReply_; 49 | AppendEntriesReplyCallback appendEntriesReply_; 50 | 51 | typedef std::unique_ptr ClientPtr; 52 | ClientPtr rpcClient; 53 | }; 54 | 55 | } 56 | 57 | #endif //RAFT_RAFTPEER_H 58 | -------------------------------------------------------------------------------- /raft/RaftService.cc: -------------------------------------------------------------------------------- 1 | // 2 | // Created by frank on 18-4-19. 3 | // 4 | 5 | #include 6 | #include 7 | 8 | using namespace jrpc; 9 | using namespace raft; 10 | 11 | RaftService::RaftService(jrpc::RpcServer& server) 12 | : RaftServiceStub(server) 13 | { 14 | 15 | } 16 | 17 | void RaftService::RequestVote(int term, 18 | int candidateId, 19 | int lastLogIndex, 20 | int lastLogTerm, 21 | const UserDoneCallback& done) 22 | { 23 | RequestVoteArgs args; 24 | args.term = term; 25 | args.candidateId = candidateId; 26 | args.lastLogIndex = lastLogIndex; 27 | args.lastLogTerm = lastLogTerm; 28 | 29 | doRequestVote_(args, [=] (const RequestVoteReply& reply) { 30 | json::Value value(json::TYPE_OBJECT); 31 | value.addMember("term", reply.term); 32 | value.addMember("voteGranted", reply.voteGranted); 33 | done(std::move(value)); 34 | }); 35 | } 36 | 37 | void RaftService::AppendEntries(int term, 38 | int prevLogIndex, 39 | int prevLogTerm, 40 | json::Value entries, 41 | int leaderCommit, 42 | const UserDoneCallback& done) 43 | { 44 | AppendEntriesArgs args; 45 | args.term = term; 46 | args.prevLogIndex = prevLogIndex; 47 | args.prevLogTerm = prevLogTerm; 48 | args.entries = std::move(entries); 49 | args.leaderCommit = leaderCommit; 50 | 51 | doAppendEntries_(args, [=](const AppendEntriesReply& reply) { 52 | json::Value value(json::TYPE_OBJECT); 53 | value.addMember("term", reply.term); 54 | value.addMember("success", reply.success); 55 | value.addMember("expectIndex", reply.expectIndex); 56 | value.addMember("expectTerm", reply.expectTerm); 57 | done(std::move(value)); 58 | }); 59 | } -------------------------------------------------------------------------------- /raft/RaftService.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by frank on 18-4-19. 3 | // 4 | 5 | #ifndef RAFT_RAFTSERVICE_H 6 | #define RAFT_RAFTSERVICE_H 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | class RaftService: public jrpc::RaftServiceStub 14 | { 15 | public: 16 | explicit 17 | RaftService(jrpc::RpcServer& server); 18 | 19 | void SetDoRequestVoteCallback(const raft::DoRequestVoteCallback& cb) 20 | { doRequestVote_ = cb; } 21 | 22 | void SetDoAppendEntriesCallback(const raft::DoAppendEntriesCallback& cb) 23 | { doAppendEntries_ = cb; } 24 | 25 | void RequestVote(int term, 26 | int candidateId, 27 | int lastLogIndex, 28 | int lastLogTerm, 29 | const jrpc::UserDoneCallback& done); 30 | void AppendEntries(int term, 31 | int prevLogIndex, 32 | int prevLogTerm, 33 | json::Value entries, 34 | int leaderCommit, 35 | const jrpc::UserDoneCallback& done); 36 | 37 | private: 38 | raft::DoRequestVoteCallback doRequestVote_; 39 | raft::DoAppendEntriesCallback doAppendEntries_; 40 | }; 41 | 42 | 43 | #endif //RAFT_RAFTSERVICE_H 44 | -------------------------------------------------------------------------------- /raft/Random.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by frank on 18-4-20. 3 | // 4 | 5 | #ifndef RAFT_RANDOM_H 6 | #define RAFT_RANDOM_H 7 | 8 | #include 9 | 10 | namespace raft 11 | { 12 | 13 | class Random 14 | { 15 | public: 16 | Random(int seed, int left, int right) 17 | : engine_(seed), dist_(left, right) 18 | {} 19 | 20 | int Generate() 21 | { 22 | return dist_(engine_); 23 | } 24 | 25 | private: 26 | std::default_random_engine engine_; 27 | std::uniform_int_distribution<> dist_; 28 | }; 29 | 30 | } 31 | 32 | #endif //RAFT_RANDOM_H 33 | -------------------------------------------------------------------------------- /raft/Storage.cc: -------------------------------------------------------------------------------- 1 | // 2 | // Created by frank on 18-5-10. 3 | // 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | using namespace raft; 13 | 14 | namespace 15 | { 16 | 17 | const int kInitialTerm = 0; 18 | const int kVotedForNull = -1; 19 | const int kInitialIndex = 0; 20 | 21 | // leading space makes the keys < index 22 | const char* kCurrentTermKey = " currentTerm"; 23 | const char* kVotedForKey = " votedFor"; 24 | const char* kFirstIndexKey = " firstIndex"; 25 | const char* kLastIndexKey = " lastIndex"; 26 | 27 | json::Value ParseSlice(const leveldb::Slice& slice) 28 | { 29 | json::Document doc; 30 | std::string_view view(slice.data(), slice.size()); 31 | json::ParseError ret = doc.parse(view); 32 | assert(ret == json::PARSE_OK); (void)ret; 33 | return doc; 34 | } 35 | 36 | } 37 | 38 | Storage::Storage(const std::string& path) 39 | { 40 | leveldb::Options options; 41 | leveldb::Status status = leveldb::DB::Open(options, path, &db_); 42 | if (status.ok()) { 43 | InitNoneEmptyDB(); 44 | } 45 | else { 46 | INFO("creating new database..."); 47 | options.create_if_missing = true; 48 | status = leveldb::DB::Open(options, path, &db_); 49 | if (!status.ok()) 50 | FATAL("leveldb create error: %s", status.ToString().c_str()); 51 | InitEmptyDB(); 52 | } 53 | } 54 | 55 | Storage::~Storage() 56 | { delete db_; } 57 | 58 | std::vector 59 | Storage::GetEntries() const 60 | { 61 | char first[11], last[11]; 62 | snprintf(first, sizeof first, "%010d", firstIndex_); 63 | snprintf(last, sizeof last, "%010d", lastIndex_); 64 | 65 | auto it = db_->NewIterator(leveldb::ReadOptions()); 66 | it->Seek(first); 67 | 68 | std::vector vec; 69 | for (; it->Valid(); it->Next()) { 70 | auto key = it->key().ToString(); 71 | if (key > last) 72 | break; 73 | vec.push_back(ParseSlice(it->value())); 74 | } 75 | delete it; 76 | assert(!vec.empty()); 77 | return vec; 78 | } 79 | 80 | void Storage::PutCurrentTerm(int currentTerm) 81 | { 82 | if (currentTerm_ != currentTerm) { 83 | currentTerm_ = currentTerm; 84 | Put(kCurrentTermKey, currentTerm); 85 | } 86 | } 87 | 88 | void Storage::PutVotedFor(int votedFor) 89 | { 90 | if (votedFor_ != votedFor) { 91 | votedFor_ = votedFor; 92 | Put(kVotedForKey, votedFor); 93 | } 94 | } 95 | 96 | void Storage::PutFirstIndex(int firstIndex) 97 | { 98 | if (firstIndex_ != firstIndex) { 99 | firstIndex_ = firstIndex; 100 | Put(kFirstIndexKey, firstIndex); 101 | } 102 | } 103 | 104 | void Storage::PutLastIndex(int lastIndex) 105 | { 106 | if (lastIndex_ != lastIndex) { 107 | lastIndex_ = lastIndex; 108 | Put(kLastIndexKey, lastIndex); 109 | } 110 | } 111 | 112 | void Storage::PrepareEntry(int index, const json::Value& entry) 113 | { 114 | // 115 | // add leading zero, so we can iterate keys in order 116 | // fixme: snprintf() may be very slow!!! 117 | // 118 | char key[11]; 119 | snprintf(key, sizeof key, "%010d", index); 120 | 121 | json::StringWriteStream os; 122 | json::Writer writer(os); 123 | entry.writeTo(writer); 124 | auto value = os.get(); 125 | 126 | batch_.Put(key, leveldb::Slice(value.data(), value.size())); 127 | preparing = true; 128 | } 129 | 130 | void Storage::PutPreparedEntries() 131 | { 132 | assert(preparing); 133 | db_->Write(leveldb::WriteOptions(), &batch_); 134 | batch_.Clear(); 135 | preparing = false; 136 | } 137 | 138 | void Storage::InitEmptyDB() 139 | { 140 | currentTerm_ = kInitialTerm; 141 | votedFor_ = kVotedForNull; 142 | firstIndex_ = kInitialIndex; 143 | lastIndex_ = kInitialIndex; 144 | 145 | Put(kCurrentTermKey, currentTerm_); 146 | Put(kVotedForKey, votedFor_); 147 | Put(kFirstIndexKey, firstIndex_); 148 | Put(kLastIndexKey, lastIndex_); 149 | 150 | json::Value entry(json::TYPE_OBJECT); 151 | entry.addMember("term", kInitialTerm); 152 | entry.addMember("command", "leveldb initialized"); 153 | PrepareEntry(kInitialIndex, entry); 154 | PutPreparedEntries(); 155 | } 156 | 157 | void Storage::InitNoneEmptyDB() 158 | { 159 | currentTerm_ = Get(kCurrentTermKey); 160 | votedFor_ = Get(kVotedForKey); 161 | firstIndex_ = Get(kFirstIndexKey); 162 | lastIndex_ = Get(kLastIndexKey); 163 | } 164 | 165 | void Storage::Put(const leveldb::Slice& key, int value) 166 | { 167 | auto status = db_->Put(leveldb::WriteOptions(), key, 168 | std::to_string(value)); 169 | if (!status.ok()) { 170 | FATAL("levedb::Put failed: %s", status.ToString().c_str()); 171 | } 172 | } 173 | 174 | int Storage::Get(const leveldb::Slice& key) 175 | { 176 | std::string value; 177 | auto status = db_->Get(leveldb::ReadOptions(), key, &value); 178 | if (!status.ok()) { 179 | FATAL("leveldb::Get failed: %s", status.ToString().c_str()); 180 | } 181 | return std::stoi(value); 182 | } 183 | 184 | -------------------------------------------------------------------------------- /raft/Storage.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by frank on 18-5-10. 3 | // 4 | 5 | #ifndef RAFT_STORAGE_H 6 | #define RAFT_STORAGE_H 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | namespace raft 15 | { 16 | 17 | class Storage : ev::noncopyable 18 | { 19 | public: 20 | explicit 21 | Storage(const std::string& path); 22 | 23 | ~Storage(); 24 | 25 | void PutCurrentTerm(int currentTerm); 26 | 27 | void PutVotedFor(int votedFor); 28 | 29 | void PutFirstIndex(int firstIndex); 30 | 31 | void PutLastIndex(int lastIndex); 32 | 33 | void PrepareEntry(int index, const json::Value& entry); 34 | 35 | void PutPreparedEntries(); 36 | 37 | int GetCurrentTerm() const 38 | { return currentTerm_; } 39 | 40 | int GetVotedFor() const 41 | { return votedFor_; } 42 | 43 | int GetFirstIndex() const 44 | { return firstIndex_; } 45 | 46 | int GetLastIndex() const 47 | { return lastIndex_; } 48 | 49 | std::vector GetEntries() const; 50 | 51 | private: 52 | void InitEmptyDB(); 53 | 54 | void InitNoneEmptyDB(); 55 | 56 | void Put(const leveldb::Slice& key, int value); 57 | 58 | int Get(const leveldb::Slice& key); 59 | 60 | private: 61 | int currentTerm_; 62 | int votedFor_; 63 | int firstIndex_; 64 | int lastIndex_; 65 | leveldb::DB* db_; 66 | leveldb::WriteBatch batch_; 67 | bool preparing = false; 68 | }; 69 | 70 | } 71 | 72 | #endif //RAFT_STORAGE_H 73 | -------------------------------------------------------------------------------- /raft/Struct.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by frank on 18-5-15. 3 | // 4 | 5 | #ifndef RAFT_STRUCT_H 6 | #define RAFT_STRUCT_H 7 | 8 | #include 9 | 10 | namespace raft 11 | { 12 | 13 | struct RequestVoteArgs 14 | { 15 | int term = -1; 16 | int candidateId = -1; 17 | int lastLogIndex = -1; 18 | int lastLogTerm = -1; 19 | }; 20 | 21 | struct RequestVoteReply 22 | { 23 | int term = -1; 24 | bool voteGranted = false; 25 | }; 26 | 27 | struct AppendEntriesArgs 28 | { 29 | int term = -1; 30 | int prevLogIndex = -1; 31 | int prevLogTerm = -1; 32 | json::Value entries; 33 | int leaderCommit = -1; 34 | }; 35 | 36 | struct AppendEntriesReply 37 | { 38 | int term = -1; 39 | bool success = false; 40 | int expectIndex = -1; 41 | int expectTerm = -1; 42 | }; 43 | 44 | struct ProposeResult 45 | { 46 | int expectIndex = -1; 47 | int currentTerm = -1; 48 | bool isLeader = false; 49 | }; 50 | 51 | struct RaftState 52 | { 53 | int currentTerm = -1; 54 | bool isLeader = false; 55 | }; 56 | 57 | struct IndexAndTerm 58 | { 59 | int index; 60 | int term; 61 | }; 62 | 63 | struct ApplyMsg 64 | { 65 | ApplyMsg(int index_, const json::Value& command_) 66 | : index(index_), command(command_) 67 | {} 68 | 69 | int index; 70 | json::Value command; 71 | }; 72 | 73 | } 74 | 75 | #endif //RAFT_STRUCT_H 76 | -------------------------------------------------------------------------------- /raft/spec.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Raft", 3 | "rpc": [ 4 | { 5 | "name": "RequestVote", 6 | "params": { 7 | "term": 0i32, 8 | "candidateId": 0i32, 9 | "lastLogIndex": 0i32, 10 | "lastLogTerm": 0i32 11 | }, 12 | "returns": { 13 | "term": 0i32, 14 | "voteGranted": true 15 | } 16 | }, 17 | { 18 | "name": "AppendEntries", 19 | "params": { 20 | "term": 0i32, 21 | "prevLogIndex": 0i32, 22 | "prevLogTerm": 0i32, 23 | "entries": [], 24 | "leaderCommit": 0i32 25 | }, 26 | "returns": { 27 | "term": 0i32, 28 | "success": true, 29 | "expectIndex": 0i32, 30 | "expectTerm": 0i32 31 | } 32 | } 33 | ] 34 | } --------------------------------------------------------------------------------