├── .gitignore
├── META
├── Makefile
├── Makefile.opamlib
├── README.md
├── _tags
├── doc
    ├── introduction.md
    └── replication_performance.md
├── opam
├── src
    ├── raft.mllib
    ├── raft.odocl
    ├── raft_helper.ml
    ├── raft_helper.mli
    ├── raft_log.ml
    ├── raft_log.mli
    ├── raft_protocol.ml
    ├── raft_protocol.mli
    ├── raft_types.ml
    └── raft_types.mli
└── tests
    └── test.ml


/.gitignore:
--------------------------------------------------------------------------------
1 | *.cm*
2 | *.tsk
3 | *.a
4 | *.o
5 | 
6 | *.sw*
7 | 


--------------------------------------------------------------------------------
/META:
--------------------------------------------------------------------------------
1 | version = "0.1.0"
2 | description = "Raft Protocol Logical Library"
3 | archive(byte) = "raft.cma"
4 | archive(native) = "raft.cmxa"
5 | exists_if = "raft.cma"
6 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | LIB_NAME=raft
 2 | 
 3 | LIB_FILES+=raft_helper
 4 | LIB_FILES+=raft_protocol
 5 | LIB_FILES+=raft_types
 6 | LIB_FILES+=raft_log
 7 | 
 8 | LIB_DEPS=
 9 | 
10 | test: 
11 | 	$(OCB) test.native
12 | 	export OCAMLRUNPARAM="b" && ./test.native 
13 | 
14 | doc-gen:
15 | 	$(OCB) src/$(LIB_NAME).docdir/index.html
16 | 
17 | ## Generic library makefile ## 
18 | include Makefile.opamlib
19 | 
20 | 


--------------------------------------------------------------------------------
/Makefile.opamlib:
--------------------------------------------------------------------------------
 1 | ifneq ($(LIB_DEPS),)
 2 | 	LIB_DEPS:=-pkgs $(LIB_DEPS)
 3 | endif
 4 | 
 5 | OCB_INC   = -I src -I tests
 6 | OCB_FLAGS = -use-ocamlfind $(LIB_DEPS) 
 7 | OCB       = ocamlbuild $(OCB_FLAGS) $(OCB_INC)
 8 | 
 9 | .PHONY: lib.native lib.byte lib.install lib.uninstall clean 
10 | 
11 | lib.native:
12 | 	$(OCB) $(LIB_NAME).cmxa
13 | 	$(OCB) $(LIB_NAME).cmxs
14 | 
15 | lib.byte:
16 | 	$(OCB) $(LIB_NAME).cma
17 | 
18 | LIB_BUILD     =_build/src/
19 | LIB_INSTALL   = META 
20 | LIB_INSTALL  +=$(patsubst %,$(LIB_BUILD)/%.mli,$(LIB_FILES))
21 | LIB_INSTALL  +=$(patsubst %,$(LIB_BUILD)/%.cmi,$(LIB_FILES))
22 | LIB_INSTALL  +=$(patsubst %,$(LIB_BUILD)/%.annot,$(LIB_FILES))
23 | LIB_INSTALL  +=$(patsubst %,$(LIB_BUILD)/%.cmo,$(LIB_FILES))
24 | LIB_INSTALL  +=$(LIB_BUILD)/$(LIB_NAME).cma 
25 | 
26 | LIB_INSTALL  +=-optional  
27 | LIB_INSTALL  +=$(patsubst %,$(LIB_BUILD)/%.cmx,$(LIB_FILES))
28 | LIB_INSTALL  +=$(patsubst %,$(LIB_BUILD)/%.cmt,$(LIB_FILES))
29 | LIB_INSTALL  +=$(LIB_BUILD)/$(LIB_NAME).cmxa 
30 | LIB_INSTALL  +=$(LIB_BUILD)/$(LIB_NAME).cmxs
31 | LIB_INSTALL  +=$(LIB_BUILD)/$(LIB_NAME).a
32 | 
33 | lib.install:
34 | 	ocamlfind install $(LIB_NAME) $(LIB_INSTALL)
35 | 
36 | lib.uninstall:
37 | 	ocamlfind remove $(LIB_NAME)
38 | 
39 | clean:
40 | 	$(OCB) -clean
41 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## Raft Protocol ##
  2 | 
  3 | > This repo implements the [Raft consensus protocol](https://raft.github.io/) 
  4 | > in a functional way using the OCaml language. 
  5 | 
  6 | The library solely implements the pure logical part of the protocol. It is 
  7 | **agnostic** of the transport mechanism, server identification and 
  8 | payload (ie log data) format. 
  9 | 
 10 | ## Project structure ##
 11 | 
 12 | #### [Types](src/raft.proto) ####
 13 | 
 14 | All the types involved in the RAFT protocoal are defined in the 
 15 | [protobuf](https://developers.google.com/protocol-buffers/) format (see [raft.proto](src/raft.proto)). 
 16 | [ocaml-protoc](https://github.com/mransan/ocaml-protoc/) compiler is then used to 
 17 | generate [the equivalent types](src/raft_pb.mli) in OCaml as well as serialization functions.
 18 | 
 19 | 
 20 | #### [Helper Functions](src/raft_helper.mli) ####
 21 | 
 22 | The module [Raft_helper](src/raft_helper.mli) provides convenience routines to manipulate the 
 23 | generated [types](src/raft_pb.mli). 
 24 | 
 25 | 
 26 | #### [Protocol Logic](src/raft_logic.mli) ####
 27 | 
 28 | The protocol logic is implemented in the [Raft_logic](src/raft_logic.mli) module. The protcol implementation
 29 | is divided into 2 subsections for each of the 2 request/response of the protocol:
 30 | * Request Vote : For leader election 
 31 | * Append log entry: For state replication 
 32 | 
 33 | Each subsection implements the following logic:
 34 | 
 35 | **Make Request**
 36 | 
 37 | > This function creates a request value given the current state of the server and optionally which
 38 | > server the request is sent to. 
 39 | 
 40 | **Handle Request**
 41 | 
 42 | > This function computes the effect of the received request to the server state. The implementaiton being 
 43 | > functional, a new state is returned. 
 44 | 
 45 | **Handle Response**
 46 | 
 47 | > This function computes the effect of a received response to the server. Along with a new state, a 
 48 | > **Follow Up Action** is returned to let the application perform the expected actions. 
 49 | 
 50 | 
 51 | #### Example
 52 | 
 53 | The example below demonstrates how to use the library to implement an Append Entry request/response
 54 | communication between a leader (server 0) and a follower (server 1). In this example the 
 55 | leader has a log which needs to be replicated on the follower. 
 56 | 
 57 | ```OCaml
 58 | let () = 
 59 |   (* Example *)
 60 |   
 61 |   (* Create a 3 server configuration. 
 62 |    *) 
 63 |   let configuration = Raft_pb.( {
 64 |     nb_of_server     = 3;
 65 |     election_timeout = 0.1;
 66 |   }) in 
 67 | 
 68 |   (* Create a leader state by simulating a (rigged) election
 69 |    *)
 70 |   let leader_0 = 
 71 |     Raft_helper.Follower.create ~configuration ~id:0 () 
 72 |     |> Raft_helper.Candidate.become ~now:0.0 
 73 |     |> Raft_helper.Leader.become 
 74 |     |> Raft_helper.Leader.add_log (Bytes.of_string "Foo") 
 75 |   in 
 76 |   
 77 |   (* Create a follower
 78 |    *)
 79 |   let follower_1 = 
 80 |     Raft_helper.Follower.create ~configuration ~id:1 () 
 81 |   in 
 82 | 
 83 |   (* First create an 'Append Entries' request from the 
 84 |      leader to server 1.
 85 |    *) 
 86 |   match Raft_logic.Append_entries.make leader_0 1 with
 87 |   | Some request -> (
 88 | 
 89 |     (* 'Update' server 1 (ie follower) by applying the request. This returns
 90 |        the response to send to the leader. 
 91 |      *)
 92 |     let follower_1, response = Raft_logic.Append_entries.handle_request follower_1 request in 
 93 | 
 94 |     (* 'Update' server 0 (ie leader) by applying the response. This returns
 95 |        the new state as well as a follow up action to take. 
 96 |      *)
 97 |     let leader_0, action = Raft_logic.Append_entries.handle_response leader_0 response in 
 98 | 
 99 |     (* Check that the follower has successfully replicated the leader single
100 |        log
101 |      *)
102 |     match follower_1.log with
103 |     | {data; _ } :: `` -> 
104 |       if "Foo" = Bytes.to_string data
105 |       then print_endline "Log successfully replicated in follower"
106 |       else print_endline "Log replication was corrupted"
107 |     | _ -> print_endline "Log replication failure"
108 |   )
109 |   | None -> () 
110 | ```
111 | 
112 | ## Notes on implementing Log size limit ##
113 | 
114 | **Problem with current implementation**
115 | 
116 | > The current implementation keeps on adding `log entry`s to an in-memory 
117 | > log data structure and never deletes them. 
118 | > Eventually the process will run out of memory. 
119 | 
120 | The idea would be to keep only the most recent `log entry`s in memory; keep in 
121 | mind that all `log entry`s are permanently recorded on disk. 
122 | 
123 | **Problem with Limitting the log size**
124 | 
125 | In normal mode of operations, followers are replicating the leader log 
126 | quickly and are never too far behind. Problem arise when one of the server is
127 | taken offline for a long time. In this case the follower has a lot of 
128 | `log entry`s to catch up on. This creates 2 major issues:
129 | 
130 | * The leader does a lot of of work to make the follower catch up. The leader
131 |   keep sending Append Entries Request one after the othe and this usually 
132 |   slows down the leader process which also need to keep append new 
133 |   `log entry`s. 
134 | 
135 | * If we limit the log size proposed above, then it is likely that if 
136 |   follower has been taken offline for an extended period of time the logs 
137 |   it needs to replicate next are not be stored in the in-memory log data 
138 |   structure of the leader.  
139 | 
140 | **Solution overview** 
141 | 
142 | First, the leader can easily detect when the follower is too far behind 
143 | (see notes on backlog situation detection). The next Append Entries request 
144 | it sends can indicate to the follower that no logs can be replicated 
145 | until the follower has caught up. Additionally through heartbeat Append Entries 
146 | request the leader can also indicates the minimum log index to be replicated 
147 | on the follower for it to resume the RAFT replication.
148 | 
149 | In order to not disrupt the leader, a seperate process called `backlog` can 
150 | run on each of the RAFT server. Its sole purpose is streaming logs back to the 
151 | follower which is lagging behind. This multi process approach offers several 
152 | advantages: 
153 | 
154 | * The backlog server protocol could be optimize for sending large quantity
155 |   of logs. For instance compression could be used and TCP is likely more 
156 |   suited as well. 
157 | 
158 | * The backlog process won't affect the raft server (leader); it will use 
159 |   limited memory, and can run on a separate CPU. Furthermore it will only 
160 |   read past `log entry`s from the permanent storage which are immutable. 
161 |   Database like rocksdb allows a separate process to open the DB in a read
162 |   only mode, and our design could leverage fully this feature. 
163 | 
164 | * The follower which is behind could use the backlog server of another follower,
165 |   in general the leader process is the one which consumes the most resources, 
166 |   so it could be better to use a follower. 
167 | 
168 | **Solution Details**
169 | 
170 | * 2 new field in configuration: max_log_size\_\{upper_bound|lower_bound}. When
171 |   adding new log entries will make the log size go over upper bound, then 
172 |   truncate the log to lower bound prior to adding the new log entries.
173 | 
174 | * Implement the enforcement of the upper and lower bound value in 
175 |   [Raft_log](src/raft_log.mli) module
176 | 
177 | * Leader detecs the backlog situation when the follower sends back an 
178 |   Append Entries Response with `prev log index` which is smaller than the 
179 |   earliest `log entry` in the in-memory data structure.
180 | 
181 | * Append Entries Request needs to be modified to notify the follower that it is
182 |   in a backlog situation.
183 | 
184 | * Follower state needs to keep track of the backlog requirement (ie the 
185 |   earlies `log entry` to be replicated. 
186 | 
187 | * `Raft_logic.result` should inform the app of the backlog requirement. This 
188 |   will then leveraged by the application to contact a backlog server for 
189 |   streaming back the missing logs. 
190 |   
191 | * Add `Raft_logic.handle_add_backlog` for the application to fill back the 
192 |   in memory log data structure of the follower
193 | 
194 | * Make sure to read the lower bound entries backward from the permanent storage
195 |   when starting a server
196 | 


--------------------------------------------------------------------------------
/_tags:
--------------------------------------------------------------------------------
1 | true: bin_annot
2 | true: annot
3 | true: safe_string
4 | true: keep_locs
5 | true: warn(A-4-42-41)
6 | true: debug
7 | true: no_alias_deps
8 | 


--------------------------------------------------------------------------------
/doc/introduction.md:
--------------------------------------------------------------------------------
  1 | # A Functional Implementation of the RAFT protocol
  2 | 
  3 | ## Introduction
  4 | 
  5 | > In this serie of blog posts we will implement the consensus protocol RAFT in 
  6 | > a purely functional style. 
  7 | 
  8 | **Consensus algorithms** are interesting protocols which are particularly relevant with modern distributed architetures. The RAFT protocol proposes a relatively simple to understand approach and offers a great introduction to the consensus problem. 
  9 | 
 10 | The benefit of **Functional Programming** have been discussed (and argued over) numerous time, the focus here is rather to provide a concrete implementation of a protocol with a functional approach.
 11 | 
 12 | **OCaml** language is elegant, well proven and really fast. No OCaml knowledge is required for these blog posts and if you are interested in learning this language through a concrete application then stay tuned. We'll only cover a very small fraction of the language and its ecosystem and hopefuly it will make you want to lean more.
 13 | 
 14 | Here are the main technology we'll be using:
 15 | 
 16 | * [Google Protobuf](https://developers.google.com/protocol-buffers): Language agnostic message speficications. We'll use that format for both the protocol messages and state specifications.
 17 | * [OCaml-protoc](https://github.com/mransan/ocaml-protoc): OCaml compiler for protobuf messages.
 18 | * [OCaml](http://ocaml.org/) core language
 19 | * [Lwt](http://ocsigen.org/lwt/):OCaml library for concurrent programming (with futures and promises)
 20 | * Unix UDP for the transport protocol
 21 | 
 22 | ## Consensus Protocols
 23 | 
 24 | The goal of a consensus protocols is to ensure that participating servers will eventually be consistent even if certain failure happened. Such protocol can differ with regards to the state they manage as well as the type of failure they are resilient to. 
 25 | 
 26 | **RAFT** protocol ensures the consistent execution of a `state machine` and is resilient to `fail-stop` failures. `fail-stop` failures are essentially server crashes or a server not receiving messages. **RAFT** protocol does not support Byzantine failures which is when a server is acting maliciously.
 27 | 
 28 | In the next section we will look into details about what is a state machine with a concrete and simple example implemented in OCaml.
 29 | 
 30 | #### State Machine 
 31 | 
 32 | A `state machine` is composed of a state and a series of command to be executed on the state. Each command has a payload and executing the command will modify the state. 
 33 | 
 34 | For instance if we want to represent a (limited) algebra state machine we could have the following:
 35 | ```OCaml
 36 | (** Named variable  *)
 37 | type var = {
 38 |   name : string; 
 39 |   value : float;
 40 | } 
 41 | 
 42 | (** State of the state machine *)
 43 | type state = var list 
 44 | 
 45 | (** Command of the state machine  *)
 46 | type cmd = 
 47 |   | Store          of var 
 48 |   | Add_and_assign of string * string * string 
 49 |                   (* (lhs , rhs , new_variable_name) *)
 50 | ```
 51 | 
 52 | Let's stop here for a second and look at our first OCaml code. The code above demonstrates the use of the 3 most widely used OCaml types
 53 | 
 54 | * **Records** (`type var = {...}`): Similar to a C `struct` 
 55 | * **List** (`type state = var list`): List is a builtin type in OCaml and represents a singly linked list. It's an immutable data structure.
 56 | * **Variant** (`type cmd = |... |... `): Type to represent a choice. `cmd` can either be `Store` or `Add_and_assign`. Each choice is called a constructor. A constructor can have zero or many arguments. 
 57 | 
 58 | `Store x` adds the given variable `x` to the state.
 59 | 
 60 | `Add_and_assign ("x", "y", "z")` sums the values associated with `"x"` and `"y"` and stores the result in a variable with name `"z"`. 
 61 | 
 62 | Here is an example of a sequence of commands:
 63 | 
 64 | ```OCaml
 65 | let cmds = [
 66 |   Store {name = "x"; value = 1.}; 
 67 |   Store {name = "y"; value = 4.}; 
 68 |   Add_and_assign ("x", "y", "z"); 
 69 | ]
 70 | ```
 71 | After exectution of the above commands on an initial empty list state, we would then expect the resulting state:
 72 | ```OCaml
 73 | let expected_state = [
 74 |   {name = "z"; value = 5.}; 
 75 |   {name = "y"; value = 4.}; 
 76 |   {name = "x"; value = 1.}; 
 77 | ]
 78 | ```
 79 | As far as OCaml is concerned we've just learned how to create values of the types we previously defined. 
 80 | 
 81 | Let's now write our first function in OCaml which will perform the execution of the state machine command:
 82 | 
 83 | ```OCaml
 84 | let execute cmd state = 
 85 |   match cmd with
 86 |   | Store v -> v::state
 87 |   | Add_and_assign (xname, yname, zname) -> 
 88 |     let xvar = List.find (fun v -> v.name = xname) state in 
 89 |     let yvar = List.find (fun v -> v.name = yname) state in 
 90 |     {name = zname; value = xvar.value +. yvar.value} :: state
 91 | ```
 92 | 
 93 | Let's look into more details at a few constructs that the OCaml language offers:
 94 | 
 95 | **`match cmd with | Store ..-> ... | ...`**
 96 | 
 97 | This `match with` expression performs a proof by case logic. The OCaml compiler has special support to detect missing cases which helps a lot for finding bugs early. This construct is called **pattern matching** and is heavily used in OCaml. 
 98 | 
 99 | **`v::state`**
100 | 
101 | The expression `v::state` is simply the builtin syntax for to append a value (`v`) at the head of the list (`state`). 
102 | 
103 | **`fun x -> ...`**
104 | 
105 | OCaml is a functional language; you can create anonymous function using `(fun x -> ...)` expression. 
106 | 
107 | **`yvar.value`**
108 | 
109 | Record fields access using the classic `.` (dot) syntax. (`yvar.value`). 
110 | 
111 | > Notice the lack of type annotation in the above code! In fact the OCaml compiler infer all the types and guarantees type safety. The syntax is minimal without sacrifying program correctness.
112 | 
113 | ## Reaching consensus for a State machine
114 | 
115 | Because a state machine has a deterministic execution, in order to get a replicated state on all the servers, the consensus protocol must solely ensure the correct ordered replication of the commands. 
116 | 
117 | The **RAFT** protocol is agnostic of the type of command; in fact for our implementation we will define the command as a byte sequence. Each command is wrapped by the RAFT protocol into a `log entry` data structure which uniquely index and stricly order each command. 
118 | 
119 | This `log entry` is a fundamental part of the RAFT protocol and used throughout messages and state specification. Let's therefore introduce our first Protobuf message:
120 | 
121 | ```JavaScript
122 | message LogEntry {
123 |   required int32 index = 1; 
124 |   required int32 term  = 2; 
125 |   required bytes data  = 3; 
126 | }
127 | ```
128 | 
129 | The [term] field can be ignored for now as it will be explained later. The [index] field is a unique and strickly increasing value for each command. [data] is a placeholder field the application state machine commands. 
130 | 


--------------------------------------------------------------------------------
/doc/replication_performance.md:
--------------------------------------------------------------------------------
  1 | ### Functional log replication in RAFT
  2 | 
  3 | The RAFT protocol is a consensus protocol for a log data structure. Each server 
  4 | in a RAFT cluster will eventually have the same log data structure. 
  5 | 
  6 | The RAFT protocol is based on the leader election mechanism; the initial phase of the protocol consists 
  7 | in deciding which server will be the leader. From then on, the leader server is responsible to append 
  8 | new log entries to its internal log as well as replicating the new entries in all the follower servers. 
  9 | 
 10 | In this post we will look at a practical performance problem related to implementing log replication 
 11 | both efficiently using solely functional data structures.
 12 | 
 13 | **Log Entry data structure**
 14 | 
 15 | ```OCaml 
 16 | type log_entry = {
 17 |   index : int;
 18 |   term : int;
 19 |   data : bytes;
 20 | }
 21 | ```
 22 | 
 23 | The `index` is monotically increasing (by 1) and starting at 1. 
 24 | The `term` corresponds to the election term that this `log entry` was created at. 
 25 | 
 26 | It's important to notice that both index and term are needed to uniquely identify a log 
 27 | entry. In fact, in certain circumstances where a previous leader crashed without fully 
 28 | replicating a log entry, it is possible that 2 servers will have log entries with 
 29 | the same index but different terms. (RAFT protocol ensure that only one is eventually persisted). 
 30 | 
 31 | **Log data structure**
 32 | 
 33 | The log is the collection of `log_entry`s:
 34 | 
 35 | ```OCaml
 36 | {index = 10;term = 2}::{index = 9; term=1}:: .... ::{index = 1; term=1}::[]
 37 | ```
 38 | 
 39 | The log ordering is from lastest to earliest log.  
 40 | 
 41 | **Replication protocol**
 42 | 
 43 | During log replication the leader must replicate its `log` to all the followers and the RAFT protocol 
 44 | defines the following (simplified) request to be send from the leader to each followers.
 45 | 
 46 | ```OCaml
 47 | type request = {
 48 |   leader_id       : int;
 49 |   prev_log_index  : int;
 50 |   prev_log_term   : int;
 51 |   rev_log_entries : log_entry list;
 52 | }
 53 | ```
 54 | 
 55 | Besides its `id` and the `log_entry`s to be replicated, the leader is also sending 
 56 | the `index` and `term` of the last log it believed was replicated on the follower. This extra
 57 | information helps both parties to synchronize. 
 58 | 
 59 | The (simplified) response replied by the follower to its leader is:
 60 | 
 61 | ```OCaml
 62 | type response =
 63 |   | Success of {last_log_index : int} 
 64 |   | Failure of {last_log_index : int; last_log_term : int} 
 65 | ```
 66 | 
 67 | In case of succesfful replication the follower server sends the index of the log it replicated (In general this would 
 68 | be the latest log sent in the query).  In case of failure the follower additionally sends the corresponding term. 
 69 | 
 70 | A failure happens in the follower when the `index`/`term` information sent by the leader is not matching 
 71 | the follower state. 2 main reaons:
 72 | a) The entry is simply not in the follower log. The follower is lagging behind and 
 73 |    needs earlier log entries than the ones sent in that request. 
 74 | b) The entry has different term. This could happen during a leader crash. That log index was 
 75 |    replicated on the follower, however the index was never commited.
 76 | 
 77 | **Normal operation**
 78 | 
 79 | Let's look at what the server is doing when computing the replication request to be 
 80 | sent to a follower. We assume here that the leader is keeping track of the `prev_log_index`
 81 | for each of the followers.
 82 | 
 83 | ```OCaml
 84 | let collect (prev_log_index:int) (leader_id:int) (leader_log:log_entry list) = 
 85 | 
 86 |   let rec aux rev_log_entries = function
 87 |     | [] -> (
 88 |       (* This can happen when no logs hae previously been 
 89 |        * replicated on the follower. 
 90 |        * By convention the first log index is 1 and therefore
 91 |        * the expected `prev_log_index` should be 0. 
 92 |        *)
 93 |       assert(prev_log_index = 0); 
 94 |       {leader_id; prev_log_index = 0; prev_log_term = 0; rev_log_entries} 
 95 |     )
 96 | 
 97 |     | {index; term; _}::tl when index = prev_log_index -> 
 98 |       (* The prev_log_index is successfully found 
 99 |        * in the leader log. 
100 |        *)
101 |       {leader_id; prev_log_index; prev_log_term = term; rev_log_entries} 
102 |     
103 |     | log_entry::tl -> 
104 |       (* More logs need to be accumulated. 
105 |        *)
106 |       aux (log_entry::rev_log_entries) tl 
107 |   in
108 |   aux [] log_entry
109 | ```
110 | 
111 | The code above iterates from latest to earlier `log entry`s until the `prev_log_index` is found. Log entries
112 | are accumulated in a list in reverse order (hence the name `rev_log_entries`). 
113 | 
114 | In normal mode of operation, the follower is not lagging too much behind the leader; concequently only 
115 | small number of iteration should be required to compute the requests. 
116 | The `list` data structure used to store the logs is particularly efficient: adding new log at the head of the 
117 | `list` is constant time and computing the replication request is linear in terms of logs to replicate. 
118 | 
119 | **Corner cases (1)**
120 | 
121 | From time to time servers will go offline. This can happen in case of a machine failure or if a machine needs to 
122 | go through maintenance. No matter what the reason, when the follower will join the replication process it will be lagging
123 | behind massively. 
124 | 
125 | The first problem is that in reality replication requests have a `max` number of log entries. It's impractical to send 
126 | gigantic messages and the RAFT protocol accomodates nicely for the leader to send a limited number of log entries.
127 | 
128 | Here is the code to get the subset of log for the replication request 
129 | 
130 | ```OCaml
131 | let rec keep_first_n l = function
132 |   | 0 -> []
133 |   | n ->
134 |     begin match l with
135 |     | hd::tl -> hd::(keep_first_n tl (n - 1))
136 |     | []     -> []
137 |     end
138 | 
139 | let collect_with_max max prev_log_index leader_id leader_log = 
140 |   let request = collect prev_log_index leader_id leader_log in 
141 |   {request with 
142 |    rev_log_entries = keep_first_n request.rev_log_entries max}
143 | ```
144 | 
145 | Let's define :
146 | * `n` the number of log entries to be replicated 
147 | * `m` the max number of log entries that can be sent in a single replication request
148 | 
149 | The replication complexity will therefore be 
150 | > 0(n) = n + (n - m) + (n - 2\*m) + ... (n - (n * m)/m)
151 | > = n^2
152 | 
153 | Note that during normal operation mode the `m` does not play a role since it's likely `>n` 
154 | which makes the complexity `O(n)` as previously mentioned.
155 | 
156 | The impact of such slow performance is quite large. Not only the replication for one of the 
157 | follower is slow, but also the entire leader is getting slower. 
158 | 
159 | The RAFT protocol relies on heartbeat messages from the leader to 
160 | the followers to notify that it is still active; if the replication request 
161 | takes too long then followers will not receive a heartbeat and they 
162 | will start a new leader election.  
163 | 


--------------------------------------------------------------------------------
/opam:
--------------------------------------------------------------------------------
 1 | opam-version: "1.2"
 2 | name: "raft"
 3 | version: "0.1.0"
 4 | maintainer: "Maxime Ransan <maxime.ransan@gmail.com>"
 5 | authors:[
 6 |   "Maxime Ransan <maxime.ransan@gmail.com>"
 7 | ]
 8 | homepage: "https://github.com/mransan/raft"
 9 | bug-reports:"https://github.com/mransan/raft/issues"
10 | dev-repo:"https://github.com/mransan/raft.git"
11 | license: "MIT"
12 | build: [
13 |   [make "lib.byte"]   
14 |   [make "lib.native"] { ocaml-native }
15 | ]
16 | install: [
17 |   [make "lib.install" ]
18 | ]
19 | remove:  [make "lib.uninstall"]
20 | depends: [
21 |   "ocamlfind"  {build}
22 |   "ocamlbuild" {build}
23 | ]
24 | 


--------------------------------------------------------------------------------
/src/raft.mllib:
--------------------------------------------------------------------------------
1 | Raft_helper
2 | Raft_protocol
3 | Raft_types
4 | Raft_log
5 | 


--------------------------------------------------------------------------------
/src/raft.odocl:
--------------------------------------------------------------------------------
1 | Raft_protocol
2 | Raft_types
3 | Raft_log
4 | Raft_helper
5 | 


--------------------------------------------------------------------------------
/src/raft_helper.ml:
--------------------------------------------------------------------------------
  1 | module Types = Raft_types
  2 | module Log = Raft_log
  3 | 
  4 | module Configuration = struct
  5 | 
  6 |   let is_majority {Types.nb_of_server; _} nb =
  7 |     nb > (nb_of_server / 2)
  8 | 
  9 |   let election_timeout configuration = 
 10 |     let {
 11 |       Types.election_timeout = t;
 12 |       election_timeout_range = r; _ } = configuration in
 13 |     t +. (Random.float r -. (r /. 2.)) 
 14 | 
 15 | end
 16 | 
 17 | module Follower = struct
 18 | 
 19 |   let make ?log ?commit_index ?current_term ~configuration ~now ~server_id () =
 20 | 
 21 |     let current_term = match current_term with
 22 |       | None -> 0 
 23 |       | Some current_term -> current_term 
 24 |     in 
 25 | 
 26 |     let log = match log with
 27 |       | None -> Log.empty configuration.Types.max_log_size 
 28 |       | Some log -> log 
 29 |     in 
 30 | 
 31 |     let commit_index = match commit_index with
 32 |       | None -> 0 
 33 |       | Some commit_index -> commit_index
 34 |     in 
 35 | 
 36 |     let timeout = Configuration.election_timeout configuration in
 37 |     {
 38 |       Types.current_term;
 39 |       server_id;
 40 |       log;
 41 |       commit_index;
 42 |       role = Types.(Follower {
 43 |         voted_for = None;
 44 |         current_leader = None;
 45 |         election_deadline = now +.  timeout
 46 |       });
 47 |       configuration;
 48 |     }
 49 | 
 50 |   let become ?current_leader ~now ~term state =
 51 |     let election_deadline = 
 52 |       now +. Configuration.election_timeout state.Types.configuration 
 53 |     in 
 54 | 
 55 |     let role = match state.Types.role with
 56 |       | Types.Follower follower_state ->
 57 |         let voted_for =
 58 |           if term = state.Types.current_term
 59 |           then follower_state.Types.voted_for
 60 |           else None
 61 |         in
 62 |         Types.Follower {
 63 |           Types.current_leader;
 64 |           Types.election_deadline;
 65 |           Types.voted_for;
 66 |         }
 67 |       | Types.Candidate _ when state.Types.current_term = term ->
 68 |         Types.Follower {
 69 |           Types.voted_for = Some state.Types.server_id;
 70 |           Types.current_leader;
 71 |           Types.election_deadline;
 72 |         }
 73 |       | _ -> Types.Follower {
 74 |         Types.voted_for = None;
 75 |         Types.current_leader;
 76 |         Types.election_deadline;
 77 |       }
 78 |     in
 79 |     { state with Types.current_term = term; role }
 80 | 
 81 | end
 82 | 
 83 | module Candidate = struct
 84 | 
 85 |   let become ~now state =
 86 |     let timeout = Configuration.election_timeout state.Types.configuration in 
 87 |     let role = Types.Candidate {
 88 |       Types.vote_count = 1;
 89 |       Types.election_deadline = now +. timeout;
 90 |     } in
 91 |     {state with
 92 |      Types.role;
 93 |      Types.current_term = state.Types.current_term + 1;
 94 |     }
 95 | 
 96 |   let increment_vote_count ({Types.vote_count; _ } as candidate_state) =
 97 |     {candidate_state with Types.vote_count = vote_count + 1}
 98 | 
 99 | end
100 | 
101 | module Leader = struct
102 | 
103 |   let become ~now state =
104 | 
105 |     let last_log_index = Log.last_log_index state.Types.log in
106 | 
107 |     let {
108 |       Types.nb_of_server;
109 |       hearbeat_timeout; _} = state.Types.configuration in
110 | 
111 |     let rec aux followers = function
112 |       | (-1) -> followers
113 |       |  i   ->
114 |         if i = state.Types.server_id
115 |         then
116 |           aux followers (i -1)
117 |         else
118 |           let next_index = last_log_index + 1 in
119 |           let match_index = 0  in
120 | 
121 |           let follower:Types.follower_info = {
122 |             Types.follower_id = i;
123 |             next_index;
124 |             match_index;
125 |             outstanding_request = false;
126 |             heartbeat_deadline = now +. hearbeat_timeout;
127 |               (*
128 |                * Here the expectation is that after becoming a leader
129 |                * the client application will send a message to all the 
130 |                * receivers and therefore the heartbeat_deadline is set
131 |                * to [now + timeout] rather than [now]. *)
132 |           } in
133 |           aux (follower::followers) (i - 1)
134 |     in
135 |     let followers = aux [] (nb_of_server - 1) in
136 | 
137 |     {state with Types.role = Types.(Leader followers)}
138 | 
139 |   (* Reusable function to update the index of a particular
140 |    * receiver id. *)
141 |   let update_follower ~follower_id ~f leader_state =
142 | 
143 |     List.map (fun follower ->
144 |       if follower.Types.follower_id = follower_id 
145 |       then (f follower)
146 |       else follower
147 |     ) leader_state
148 | 
149 |   let update_follower_last_log_index ~follower_id ~index followers =
150 | 
151 |     let followers = update_follower ~follower_id ~f:(fun follower ->
152 |       let {Types.next_index; match_index; _} = follower  in
153 |       let upd_next_index = index + 1 in
154 |       let upd_match_index = index in
155 |       if upd_match_index > match_index && upd_next_index > next_index
156 |       then
157 |         {follower with Types.next_index = index + 1; match_index = index}
158 |       else
159 |         (* It is possible to receive out of order responses from the other
160 |          * raft servers.
161 |          *
162 |          * In such a case we don't want to decrement the next index 
163 |          * of the server since the server is expected to never remove 
164 |          * previously saved log entries.  *)
165 |         follower
166 |     ) followers 
167 |     in
168 | 
169 |     (* Calculate the number of server which also have replicated that
170 |        log entry *)
171 |     let nb_of_replications = List.fold_left (fun n {Types.match_index; _ } ->
172 |       if match_index >= index 
173 |       then n + 1
174 |       else n
175 |     ) 0 followers in
176 | 
177 |     (followers, nb_of_replications)
178 | 
179 |   let decrement_next_index 
180 |                 ~follower_last_log_index ~follower_id state followers =
181 |     let latest_log_index = Log.last_log_index state.Types.log  in
182 | 
183 |     assert(follower_last_log_index <= latest_log_index);
184 |       (* This is an invariant. The server cannot have replicated more logs
185 |        * than the Leader.
186 |        *
187 |        * However due to message re-ordering it is possible to receive a 
188 |        * [LogFailure] with the receiver_last_log_index equal to 
189 |        * the latest_log_index.
190 |        *
191 |        * Consider the following scenario
192 |        * - [Leader] Append_entry prev_index = x rev_log_entries [x+1]
193 |        * - [Server] receives the request and correctly replicate x + 1
194 |        * - !! RESPONSE IS LOST !!
195 |        * - [Leader] Append_entry prev_index = x rev_log_entries [x+1]
196 |        * - [Server] return a failure since it has replicated x + 1 and cannot
197 |        *   remove that log entry since it is coming from the current term 
198 |        *   leader.  *) 
199 |     update_follower ~follower_id ~f:(fun index ->
200 |       {index with
201 |        Types.next_index = follower_last_log_index + 1;
202 |        Types.match_index = follower_last_log_index}
203 |     )  followers 
204 | 
205 |   let record_response_received ~follower_id followers =
206 | 
207 |     update_follower
208 |       ~follower_id
209 |       ~f:(fun index ->
210 |         {index with Types.outstanding_request = false;}
211 |       )
212 |       followers 
213 | 
214 |   let min_heartbeat_timout ~now followers =
215 | 
216 |     let min_heartbeat_deadline = List.fold_left (fun min_deadline f ->
217 |       let {Types.heartbeat_deadline; _ } = f in
218 |         if heartbeat_deadline < min_deadline
219 |         then heartbeat_deadline
220 |         else min_deadline
221 |       ) max_float followers
222 |     in
223 |     min_heartbeat_deadline -. now
224 | 
225 | end (* Leader *)
226 | 
227 | module Timeout_event = struct
228 | 
229 |   let existing_election_wait election_deadline now =
230 |     Types.({
231 |       timeout = election_deadline -. now;
232 |       timeout_type = New_leader_election;
233 |     })
234 | 
235 |   let make_heartbeat_wait timeout =
236 |     Types.({
237 |       timeout = timeout;
238 |       timeout_type = Heartbeat;
239 |     })
240 | 
241 |   let next ~now state =
242 |     match state.Types.role with
243 |     | Types.Follower {Types.election_deadline; _} ->
244 |       existing_election_wait election_deadline  now
245 |     | Types.Leader leader_state ->
246 |       make_heartbeat_wait (Leader.min_heartbeat_timout ~now leader_state)
247 |     | Types.Candidate {Types.election_deadline; _ } ->
248 |       existing_election_wait election_deadline  now
249 | 
250 | end (* Timeout_event *)
251 | 
252 | module Diff = struct 
253 | 
254 |   let leader_change before after =
255 |     let open Types in 
256 |   
257 |     let { role = brole; _ } = before in
258 |     let { role = arole; _ } = after in
259 |   
260 |     match brole, arole with
261 |     | Follower _   , Leader _
262 |     | Leader _     , Candidate _ ->
263 |       (* Impossible transition which would violate the rules of the
264 |        * RAFT protocol *)
265 |       assert(false)
266 |   
267 |     | Candidate _  , Leader _ ->
268 |       (* Case of the server becoming a leader *)
269 |       Some (New_leader after.server_id)
270 |   
271 |     | Follower {current_leader = Some bleader; _ },
272 |       Follower {current_leader = Some aleader; _ } when bleader = aleader ->
273 |       None
274 |       (* No leader change, following the same leader *)
275 |   
276 |     | _, Follower{current_leader = Some aleader;_} ->
277 |       Some (New_leader aleader)
278 |       (* There is a new leader *)
279 |   
280 |     | Follower {current_leader = Some _; _}, Candidate _
281 |     | Follower {current_leader = Some _; _}, Follower {current_leader = None; _}
282 |     | Leader  _  , Follower  {current_leader = None; _} ->
283 |       Some No_leader
284 |   
285 |     | Leader _                             , Leader _
286 |     | Candidate _                          , Candidate _
287 |     | Candidate _                          , Follower {current_leader = None;_}
288 |     | Follower {current_leader = None; _}  , Follower {current_leader = None;_}
289 |     | Follower {current_leader = None; _}  , Candidate _ ->
290 |       None
291 |   
292 |   let committed_logs before after = 
293 |     let open Types in 
294 |   
295 |     let { commit_index = bcommit_index;  _ } = before in
296 |     let { commit_index = acommit_index;  _ } = after in
297 |   
298 |     if acommit_index > bcommit_index
299 |     then
300 |       let recent_entries = after.log.Log.recent_entries in 
301 |       let _, prev_commit, sub = Log.IntMap.split bcommit_index recent_entries in 
302 |       begin match prev_commit with 
303 |       | None -> assert(bcommit_index = 0)
304 |       | Some _ -> ()
305 |       end;
306 |       let sub, last_commit, _ = Log.IntMap.split acommit_index sub in 
307 |       let sub = match last_commit with 
308 |         | None -> assert(false) 
309 |         | Some ({Log.index; _ } as log_entry) -> 
310 |           Log.IntMap.add index log_entry sub 
311 |       in 
312 |       let committed_entries = List.map snd (Log.IntMap.bindings sub) in  
313 |       committed_entries
314 |     else
315 |       (* Should we assert false if after < before, ie it's a violation 
316 |          of the RAFT protocol *)
317 |       []
318 | 
319 | end (* Diff *)
320 | 


--------------------------------------------------------------------------------
/src/raft_helper.mli:
--------------------------------------------------------------------------------
  1 | (** Helper functions for manipulating Raft types *)
  2 | 
  3 | module Configuration : sig
  4 | 
  5 |   val is_majority : Raft_types.configuration -> int -> bool
  6 |   (** [is_majority configuration nb] returns true if [nb] is a majority *)
  7 | 
  8 | end (* Configuration *)
  9 | 
 10 | module Follower : sig
 11 | 
 12 |   val make :
 13 |     ?log:Raft_log.t ->
 14 |     ?commit_index:int -> 
 15 |     ?current_term:int -> 
 16 |     configuration:Raft_types.configuration ->
 17 |     now:float ->
 18 |     server_id:int ->
 19 |     unit ->
 20 |     Raft_types.state
 21 |   (** [create ~configuration ~now ~server_id ()] creates an initial
 22 |       follower state.*)
 23 | 
 24 |   val become :
 25 |     ?current_leader:int ->
 26 |     now:float ->
 27 |     term:int ->
 28 |     Raft_types.state ->
 29 |     Raft_types.state
 30 |   (** [become ~current_leader state term] return the new follower state.
 31 | 
 32 |       {ul
 33 |       {li [voted_for] is [None]}
 34 |       {li [current_leader] is taken from the function argument}
 35 |       {li [current_term] is taken from the function argument}
 36 |       } *)
 37 | 
 38 | end (* Follower *)
 39 | 
 40 | module Candidate : sig
 41 | 
 42 |   val become : 
 43 |     now:float ->  
 44 |     Raft_types.state -> 
 45 |     Raft_types.state
 46 |   (** [become state now] returns the new state with Candidate role. 
 47 |       [current_term] is incremented and [vote_count] initialized to 1. 
 48 |       (ie we assume the candidate votes for itself.
 49 | 
 50 |       The [election_timeout] is reset to a random number between the boundaries
 51 |       of the configuration. *)
 52 | 
 53 |   val increment_vote_count :
 54 |     Raft_types.candidate_state ->
 55 |     Raft_types.candidate_state
 56 |   (** [increment_vote_count state] increments the candidate vote count
 57 |       by 1.
 58 | 
 59 |       This function is called upon receiving a successful response to a vote 
 60 |       request to one of the servers. *)
 61 | 
 62 | end (* Candidate *)
 63 | 
 64 | module Leader : sig
 65 | 
 66 |   val become : 
 67 |     now:float -> 
 68 |     Raft_types.state -> 
 69 |     Raft_types.state
 70 |   (** [become state] returns the new state with a Leader role.
 71 | 
 72 |         While only candidate with a majority are allowed by the protocol to
 73 |         become a leader, this function does not perform any checks but simply
 74 |         initialize the role of Leader.
 75 | 
 76 |         The calling application is responsible to ensure that it is correct
 77 |         to become a leader.  *)
 78 | 
 79 | 
 80 |   val update_follower_last_log_index :
 81 |     follower_id:int ->
 82 |     index:int ->
 83 |     Raft_types.leader_state ->
 84 |     (Raft_types.leader_state * int)
 85 |   (** [update_receiver_last_log_index leader_state receiver_id last_log_index] 
 86 |       updates the leader state with the [last_log_index] information received 
 87 |       from a server. (Both [next_index] and [match_index] are updated.
 88 | 
 89 |       The function returns [(state, nb_of_replication)]. The 
 90 |       [nb_of_replication] is useful for the application to determine how many 
 91 |       servers have replicated the log and therefore determine if it can 
 92 |       be considered commited.  *)
 93 | 
 94 |   val record_response_received :
 95 |     follower_id:int ->
 96 |     Raft_types.leader_state ->
 97 |     Raft_types.leader_state
 98 |   (** [record_response_received ~server_id leader_state] keeps track of the
 99 |       fact that there are no more outstanding request for [server_id].
100 |     *)
101 | 
102 |   val decrement_next_index :
103 |     follower_last_log_index:int ->
104 |     follower_id:int ->
105 |     Raft_types.state ->
106 |     Raft_types.leader_state ->
107 |     Raft_types.leader_state
108 | 
109 |   val min_heartbeat_timout : 
110 |     now:float -> 
111 |     Raft_types.leader_state -> 
112 |     float
113 |   (** [min_heartbeat_timout ~now ~leader_state] returns when the next timeout
114 |       event should occured based on the last request sent to the 
115 |       followers *)
116 | 
117 | end (* Leader  *)
118 | 
119 | module Timeout_event : sig
120 | 
121 |   val next : now:float -> Raft_types.state -> Raft_types.timeout_event
122 |   (** [next ~now state] returns the next timeout event which should happened
123 |       unless another RAFT event happened first. *)
124 | 
125 | end (* Timeout_event *)
126 | 
127 | module Diff : sig 
128 |   val leader_change : 
129 |     Raft_types.state -> 
130 |     Raft_types.state -> 
131 |     Raft_types.leader_change option
132 |   (** [notifications before after] computes the notification between 2 states
133 |    *)
134 | 
135 |   val committed_logs :
136 |     Raft_types.state -> 
137 |     Raft_types.state -> 
138 |     Raft_log.log_entry list
139 |   (** [committed_logs before after] returns the newly committed log entries 
140 |       between [before] and [after] state *)
141 | 
142 | end (* Diff *)
143 | 


--------------------------------------------------------------------------------
/src/raft_log.ml:
--------------------------------------------------------------------------------
  1 | type log_entry = {
  2 |   index : int;
  3 |   term : int;
  4 |   data : bytes;
  5 |   id : string;
  6 | }
  7 | 
  8 | let pp_log_entry fmt {index; term; id; _} =
  9 |   Format.fprintf fmt "{index: %i; term: %i, id: %s}"
 10 |    index term id 
 11 | 
 12 | module IntMap = Map.Make(struct 
 13 |   type t = int 
 14 |   let compare (x:int) (y:int) = Pervasives.compare x y
 15 | end) 
 16 | 
 17 | type size = 
 18 |   | Number of int  
 19 |   | Bytes of int * int 
 20 | 
 21 | let add size sum log_entry = 
 22 |   match size with
 23 |   | Number _ -> (sum + 1)
 24 |   | Bytes (_, overhead) -> 
 25 |     let {data; id; _} = log_entry in 
 26 |     sum + overhead + 16 (* 2x64bits for index/term *) 
 27 |         + (String.length id) + (Bytes.length data)
 28 | 
 29 | let has_reach_max size sum =
 30 |   match size with 
 31 |   | Number n -> sum >= n
 32 |   | Bytes (b,_) -> sum >= b
 33 | 
 34 | type max_log_size = {
 35 |   upper_bound : int;
 36 |   lower_bound : int; 
 37 | }
 38 | 
 39 | type t = {
 40 |   recent_entries : log_entry IntMap.t;
 41 |   max_log_size : max_log_size;
 42 | } 
 43 | 
 44 | type log_diff = {
 45 |   added_logs : log_entry list; 
 46 |   deleted_logs : log_entry list;
 47 | }
 48 | 
 49 | let empty_diff = { added_logs = []; deleted_logs = [] }
 50 | 
 51 | let empty max_log_size = {
 52 |   recent_entries = IntMap.empty;
 53 |   max_log_size;
 54 | }
 55 | 
 56 | let last_log_index_and_term {recent_entries; _ } =
 57 |   match IntMap.max_binding recent_entries  with
 58 |   | (_ , {index;term; _}) -> (index, term)
 59 |   | exception Not_found -> (0, 0)
 60 | 
 61 | let last_log_index {recent_entries; _}  =
 62 |   match IntMap.max_binding recent_entries  with
 63 |   | (_ , {index; _}) -> index
 64 |   | exception Not_found -> 0
 65 | 
 66 | exception Done of (log_entry list * int) 
 67 | 
 68 | let log_entries_since ~since ~max log =
 69 |   let {recent_entries ; _} = log in
 70 |   if recent_entries = IntMap.empty
 71 |   then ([], 0)
 72 |      (* TODO questionable, shoudl all cases go to the sub function *) 
 73 |   else 
 74 |     let _, prev, sub = IntMap.split since recent_entries in 
 75 |     let prev_term = 
 76 |       match prev with 
 77 |       | None -> assert (since = 0); 0  
 78 |       | Some {term; _} -> term 
 79 |     in
 80 | 
 81 |     let log_entries, _ = 
 82 |       try IntMap.fold (fun _ log_entry (log_entries, sum) -> 
 83 |         let sum' = add max sum log_entry in
 84 |         if has_reach_max max sum'
 85 |         then raise (Done (log_entries, sum))
 86 |         else (log_entry :: log_entries, sum')
 87 |       ) sub ([], 0) 
 88 |       with | Done x -> x
 89 |     in 
 90 | 
 91 |     (List.rev log_entries, prev_term)
 92 | (*    let sub, _, _ = IntMap.split (since + max + 1) sub in 
 93 |     (List.map snd (IntMap.bindings sub), prev_term)  *)
 94 | 
 95 | (* Enforce that the size of the recent_entries stays within the 
 96 |  * max log size configuration *) 
 97 | let truncate add_size ({recent_entries; max_log_size; _} as t) = 
 98 |   let {upper_bound; lower_bound} = max_log_size in 
 99 |   match IntMap.min_binding recent_entries with
100 |   | exception Not_found -> t
101 |     (* when empty, no need for size limitation *)
102 |   | (lower_index, _) -> 
103 |     let (upper_index, _) = IntMap.max_binding recent_entries in 
104 |     let size = upper_index - lower_index + 1 in 
105 |     if size + add_size > upper_bound 
106 |     then 
107 |       let over = size - lower_bound in 
108 |       let lower_index = lower_index + over + add_size - 1 in 
109 |       let _, _, recent_entries = IntMap.split lower_index recent_entries in 
110 |       {t with recent_entries}
111 |     else 
112 |       t
113 |    
114 | let add_log_datas current_term datas log =
115 |   let log = truncate (List.length datas) log in 
116 | 
117 |   let rec aux term last_log_index recent_entries added_logs = function
118 |     | [] -> (recent_entries, (List.rev added_logs))
119 |     | (data, id)::tl ->
120 |       let last_log_index = last_log_index + 1 in
121 | 
122 |       let new_log_entry = {
123 |         index = last_log_index;
124 |         term; data; id;
125 |       } in
126 |       
127 |       let added_logs = new_log_entry :: added_logs in
128 | 
129 |       let recent_entries = 
130 |         IntMap.add last_log_index new_log_entry recent_entries 
131 |       in 
132 | 
133 |       aux term last_log_index recent_entries added_logs tl
134 |   in
135 | 
136 |   let term = current_term in
137 |   let last_log_index = last_log_index log in
138 |   let recent_entries = log.recent_entries in
139 | 
140 |   let recent_entries, added_logs =
141 |     aux term last_log_index recent_entries [] datas
142 |   in
143 | 
144 |   let log_diff = { deleted_logs = []; added_logs; } in 
145 | 
146 |   ({log with recent_entries}, log_diff)
147 | 
148 | let add_log_entries ~log_entries log =
149 |   let log = truncate (List.length log_entries) log in 
150 |   let rec aux recent_entries = function
151 |     | [] ->
152 |       {log with recent_entries}
153 | 
154 |     | hd::tl ->
155 |       let recent_entries = IntMap.add hd.index hd recent_entries in 
156 |       aux recent_entries tl
157 |   in
158 | 
159 |   let log_diff = {
160 |     added_logs = log_entries; 
161 |     deleted_logs = []; 
162 |   } in 
163 | 
164 |   (aux log.recent_entries log_entries, log_diff) 
165 | 
166 | let remove_log_since ~prev_log_index ~prev_log_term log =
167 |   let {recent_entries; max_log_size = _ } = log in 
168 |   if prev_log_index > (last_log_index log)
169 |   then raise Not_found 
170 |   else 
171 | 
172 |     let before, e, after = IntMap.split prev_log_index recent_entries in 
173 |     let recent_entries, deleted_logs_map = 
174 |       match e with
175 |       | None -> 
176 |         if prev_log_index = 0 
177 |         then (before, after)
178 |         else raise Not_found 
179 |       | Some ({term; index; _} as log_entry) -> 
180 |         if term = prev_log_term
181 |         then (IntMap.add index log_entry before, after) 
182 |         else raise Not_found
183 |     in 
184 | 
185 |     let deleted_logs = List.map snd @@ IntMap.bindings deleted_logs_map in
186 | 
187 |     (
188 |       {log with 
189 |         recent_entries;}, 
190 |       {deleted_logs; added_logs = []}
191 |     ) 
192 | 
193 | let merge_diff lhs rhs = 
194 |   match lhs, rhs with
195 |   | {added_logs = []; deleted_logs = []}, rhs -> rhs 
196 |   | lhs, {added_logs = []; deleted_logs = []} -> lhs 
197 | 
198 |   | {added_logs; deleted_logs = []}, 
199 |     {added_logs = []; deleted_logs} 
200 |   | {added_logs = []; deleted_logs}, 
201 |     {added_logs; deleted_logs = []} -> {added_logs; deleted_logs}
202 |   | _ -> assert(false) 
203 | 
204 | module Builder = struct
205 | 
206 |   type builder = t
207 | 
208 |   let make max_log_size = empty max_log_size 
209 | 
210 |   let add_log_entry log log_entry =
211 |     let log = truncate 1 log in 
212 |     (* assert(log_entry.index > (last_log_index log));
213 |      *)
214 |     { log with
215 |       recent_entries = IntMap.add log_entry.index log_entry log.recent_entries; 
216 |     }
217 | 
218 |   let to_log x = x
219 | 
220 | end (* Builder *)
221 | 


--------------------------------------------------------------------------------
/src/raft_log.mli:
--------------------------------------------------------------------------------
  1 | (** Log Data Structure and related logic*)
  2 | 
  3 | (** {2 Types} *)
  4 | 
  5 | (** unit of log *)
  6 | type log_entry = {
  7 |   index : int;
  8 |   term : int;
  9 |   data : bytes;
 10 |   id : string;
 11 | }
 12 | 
 13 | (** Log size limitation parameters.
 14 |     
 15 |     The log data structure can grow to an inifinite size as new logs 
 16 |     keeps being appended. However for practical reason it is important to limit
 17 |     the size of the log in memory. [upper_bound] defines the maximum size, 
 18 |     while [lower_bound] is the size the log is truncated to when
 19 |     reaching its [upper_bound] value. *)
 20 | type max_log_size = {
 21 |   upper_bound : int;
 22 |   lower_bound : int; 
 23 | }
 24 | 
 25 | module IntMap : Map.S with type key = int
 26 | 
 27 | (** log data structure *)
 28 | type t = {
 29 |   recent_entries : log_entry IntMap.t;
 30 |   max_log_size : max_log_size;
 31 | }
 32 | 
 33 | (** {2 Creators} *)
 34 | 
 35 | val empty : max_log_size -> t
 36 | (** [empty] is an empty log *)
 37 | 
 38 | (** {2 Accessors} *)
 39 | 
 40 | val last_log_index_and_term : t -> (int * int)
 41 | (** [last_log_index_and_term state] returns the [(index, term)] of the last log
 42 |     entry. If the log is empty [(0, 0)] is returned.*)
 43 | 
 44 | val last_log_index: t ->  int
 45 | (** [last_log_index state] returns the index of the last log entry.*)
 46 | 
 47 | (** Various representation for the size of a subset of the log *)
 48 | type size = 
 49 |   | Number of int  
 50 |   | Bytes of int * int (* (value, overhead per entry) *) 
 51 | 
 52 | val log_entries_since : since:int -> max:size -> t -> (log_entry list * int)
 53 | (** [log_entries_since ~since:index ~max log] returns 
 54 |     [(log_entries, prev_log_term)] in chronological order. If not empty 
 55 |     [List.hd log_entrie] is the log entry with [index = since + 1]. 
 56 |     
 57 |     [max] size is enforced by the function.
 58 |     *)
 59 | 
 60 | (** {2 Modifiers} *)
 61 | 
 62 | (** A data structure detailing the modification done to the log by the 
 63 |     modifiers functions *)
 64 | type log_diff = {
 65 |   added_logs : log_entry list; 
 66 |   deleted_logs : log_entry list;
 67 | }
 68 | 
 69 | val empty_diff : log_diff 
 70 | (** [empty_diff] represent the no difference value *)
 71 | 
 72 | 
 73 | val merge_diff : log_diff -> log_diff -> log_diff 
 74 | (** [merge_diff lhs rhs] merges together 2 different diff. Right now it only
 75 |     works if lhs or rhs are not both containing either added logs or removed
 76 |     logs *)
 77 | 
 78 | val add_log_datas : int -> (bytes * string) list -> t -> (t * log_diff) 
 79 | (** [add_log_datas current_term datas state] adds [datas] to the log of the
 80 |     [state].
 81 | 
 82 |     Note that the logs are in chronological order
 83 |     In other word [List.hd datas] is the earliest entry
 84 |     and should be appended first to the server logs.*)
 85 | 
 86 | val add_log_entries : log_entries:log_entry list -> t -> (t * log_diff)
 87 | (** [add_log_entries log_entries log] appends [log_entries] to the
 88 |     [log]. 
 89 |     This function assumes [log_entries] are in chronological order. *)
 90 | 
 91 | val remove_log_since : 
 92 |   prev_log_index:int -> 
 93 |   prev_log_term:int -> 
 94 |   t -> 
 95 |   (t * log_diff)
 96 | (** [remove_log_since ~prev_log_index ~prev_log_term log] removes all the
 97 |     entries which are after the log_entry defined by [prev_log_index] and
 98 |     [prev_log_term].
 99 | 
100 |     If [log] does not contain any log_entry defined by [prev_log_index] and
101 |     [prev_log_term] then [Not_found] is raised.  *)
102 | 
103 | (** {2 Utilities} *)
104 | 
105 | val pp_log_entry : Format.formatter -> log_entry -> unit 
106 | (** [pp_log_entry fmt log_entry] format [log_entry] *)
107 | 
108 | (** Helper module to build the log type from saved log entries. 
109 |     
110 |     This module is designed to be used by a RAFT server at start time. *)
111 | module Builder : sig
112 | 
113 |   type builder
114 | 
115 |   val make : max_log_size -> builder
116 | 
117 |   val add_log_entry : builder -> log_entry -> builder
118 | 
119 |   val to_log : builder -> t
120 | 
121 | end (* Builder *)
122 | 


--------------------------------------------------------------------------------
/src/raft_protocol.ml:
--------------------------------------------------------------------------------
  1 | module Types = Raft_types
  2 | module Follower = Raft_helper.Follower
  3 | module Candidate = Raft_helper.Candidate
  4 | module Leader = Raft_helper.Leader
  5 | module Configuration = Raft_helper.Configuration
  6 | module Log = Raft_log
  7 | module Timeout_event = Raft_helper.Timeout_event
  8 | module Helper = Raft_helper
  9 | 
 10 | let make_result ?(msgs_to_send = []) ?leader_change ?(deleted_logs = []) 
 11 |                 ?(committed_logs = []) ?(added_logs = []) state = {
 12 |   Types.state;
 13 |   messages_to_send = msgs_to_send;
 14 |   leader_change; 
 15 |   committed_logs;
 16 |   added_logs;
 17 |   deleted_logs;
 18 | }
 19 | 
 20 | module Log_entry_util = struct
 21 | 
 22 |   let make_append_entries prev_log_index state =
 23 | 
 24 |     let to_send, prev_log_term = 
 25 |       let since = prev_log_index in 
 26 |       let max = state.Types.configuration.Types.max_nb_logs_per_message in 
 27 |       Log.log_entries_since ~since ~max state.Types.log
 28 |     in 
 29 | 
 30 |     let request = Types.({
 31 |       leader_term = state.current_term;
 32 |       leader_id = state.server_id;
 33 |       prev_log_index;
 34 |       prev_log_term;
 35 |       log_entries = to_send;
 36 |       leader_commit = state.commit_index;
 37 |     }) in
 38 | 
 39 |     request
 40 | 
 41 |   let compute_append_entries ?(force = false) state followers now =
 42 | 
 43 |     let rec aux followers msgs_to_send = function
 44 |       | [] -> (List.rev followers, msgs_to_send)
 45 | 
 46 |       | follower::tl ->
 47 |         let {
 48 |           Types.follower_id;
 49 |           heartbeat_deadline;
 50 |           outstanding_request;
 51 |           next_index;_
 52 |         } = follower in
 53 | 
 54 |         let shoud_send_request =
 55 |           if force || now >= heartbeat_deadline
 56 |           then true
 57 |             (* The heartbeat deadline is past due, the [Leader] must
 58 |              * sent an [Append_entries] request.  *)
 59 |           else
 60 |             if outstanding_request
 61 |             then false
 62 |               (* In case of an outstanding request there is no point
 63 |                * in sending a new request to that server.
 64 |                * Even if the outstanding request was lost and it could be 
 65 |                * beneficial to send a new request, this would happen
 66 |                * at the next heartbeat. We assume it's more likely that 
 67 |                * the server is down and therefore there is no need to keep 
 68 |                * on sending the same request over and over. *)
 69 |             else
 70 |               let prev_index = next_index - 1 in
 71 |               let last_log_index = Log.last_log_index state.Types.log in
 72 |               if prev_index = last_log_index
 73 |               then false
 74 |                 (* The receipient has the most recent log entry and the 
 75 |                  * heartbeat deadline has not expired, no need to send a 
 76 |                  * new heartbeat. *)
 77 |               else true
 78 |                 (* The recipient is missing recent log entries *)
 79 |         in
 80 | 
 81 |         if shoud_send_request
 82 |         then
 83 |           let request = make_append_entries (next_index - 1) state in
 84 |           let follower =
 85 |             let outstanding_request = true in
 86 |             let heartbeat_deadline =
 87 |               now +. state.Types.configuration.Types.hearbeat_timeout
 88 |             in
 89 |             {follower with
 90 |               Types.outstanding_request;
 91 |               Types.heartbeat_deadline;
 92 |             }
 93 |           in
 94 |           let followers = follower::followers in
 95 |           let msgs_to_send = 
 96 |             let msg = (Types.Append_entries_request request, follower_id) in 
 97 |             msg::msgs_to_send 
 98 |           in
 99 |           aux followers msgs_to_send tl
100 |         else
101 |           aux (follower::followers) msgs_to_send tl
102 |     in
103 |     aux [] [] followers
104 | 
105 | end (* Log_entry_util *)
106 | 
107 | (** {2 Request Vote} *)
108 | 
109 | let make_request_vote_request state =
110 |   let index, term = Log.last_log_index_and_term state.Types.log in
111 |   Types.({
112 |     candidate_term = state.current_term;
113 |     candidate_id = state.server_id;
114 |     candidate_last_log_index = index;
115 |     candidate_last_log_term = term;
116 |   })
117 | 
118 | let handle_request_vote_request state request now =
119 |   let {
120 |     Types.candidate_id; 
121 |     candidate_term; 
122 |     candidate_last_log_index;_} = request in
123 | 
124 |   let make_response state vote_granted =
125 |     Types.({
126 |       voter_term = state.current_term; 
127 |       voter_id = state.server_id ; 
128 |       vote_granted; 
129 |     })
130 |   in
131 | 
132 |   if candidate_term < state.Types.current_term
133 |   then
134 |     (* This request is coming from a candidate lagging behind ...
135 |      * no vote for him. *)
136 |     (state, make_response state false)
137 |   else
138 |     let state =
139 |       (* Enforce invariant that if this server is lagging behind
140 |        * it must convert to a follower and update to that term. *)
141 |       if candidate_term > state.Types.current_term
142 |       then Follower.become ~term:candidate_term ~now state
143 |       else state
144 |     in
145 | 
146 |     let last_log_index = Log.last_log_index state.Types.log in
147 | 
148 |     if candidate_last_log_index < last_log_index
149 |     then
150 |       (* Enforce the safety constraint by denying vote if this server
151 |        * last log is more recent than the candidate one.*)
152 |       (state, make_response state false)
153 |     else
154 |       let role = state.Types.role in
155 |       match role with
156 |       | Types.Follower {Types.voted_for = None; _} ->
157 |         (* This server has never voted before, candidate is getting the vote
158 |          *
159 |          * In accordance to the `Rules for Servers`, the follower must
160 |          * reset its election deadline when granting its vote. *)
161 | 
162 |         let {
163 |           Types.configuration = {
164 |             Types.election_timeout; _
165 |           }; _} = state in
166 | 
167 |         let state = {state with
168 |           Types.role = Types.Follower {
169 |             Types.voted_for = Some candidate_id;
170 |             Types.current_leader = None;
171 |             Types.election_deadline = now +. election_timeout;
172 |           }
173 |         } in
174 |         (state, make_response state true)
175 | 
176 |       | Types.Follower {Types.voted_for = Some id; _} when id = candidate_id ->
177 |         (* This server has already voted for that candidate... reminding him *)
178 |         (state, make_response state true)
179 | 
180 |       | _ ->
181 |         (* Server has previously voted for another candidate or
182 |          * itself *)
183 |         (state, make_response state false)
184 | 
185 | let handle_request_vote_response state response now =
186 |   let {Types.current_term; role; configuration; _ } = state in
187 |   let {Types.voter_term; vote_granted; _ } = response in
188 | 
189 |   if voter_term > current_term
190 |   then
191 |     (* Enforce invariant that if this server is lagging behind
192 |      * it must convert to a follower and update to the latest term. *)
193 |     let state = Follower.become ~term:voter_term ~now state in
194 |     (state, [])
195 | 
196 |   else
197 |     match role, vote_granted  with
198 |     | Types.Candidate ({Types.vote_count; _ } as candidate_state) , true ->
199 | 
200 |       if  Configuration.is_majority configuration (vote_count + 1)
201 |       then
202 |         (* By reaching a majority of votes, the candidate is now 
203 |          * the new leader *)
204 |         let state = Leader.become ~now state in
205 | 
206 |         (* As a new leader, the server must send Append entries request
207 |          * to the other servers to both establish its leadership and
208 |          * start synching its log with the others.  *)
209 |         begin match state.Types.role with
210 |         | Types.Leader followers ->
211 | 
212 |           let followers, msgs_to_send = 
213 |             let force = true in 
214 |             Log_entry_util.compute_append_entries ~force state followers now
215 |           in 
216 | 
217 |           let state = Types.{state with role = Leader followers} in 
218 |           (state, msgs_to_send) 
219 |         | _ -> assert(false)
220 |         end
221 |       else
222 |         (* Candidate has a new vote but not yet reached the majority *)
223 |         let new_state = Types.{state with
224 |           role = Candidate (Candidate.increment_vote_count candidate_state);
225 |         } in
226 |         (new_state, [] (* no message to send *))
227 | 
228 |     | Types.Candidate _ , false
229 |       (* The vote was denied, the election keeps on going until
230 |        * its deadline.  *)
231 | 
232 |     | Types.Follower _ , _
233 |     | Types.Leader   _ , _ -> (state, [])
234 |       (* If the server is either Follower or Leader, it means that
235 |        * it has changed role in between the time it sent the
236 |        * [RequestVote] request and this response.
237 |        * This response can therefore safely be ignored and the server
238 |        * keeps its current state.Types.  *)
239 | 
240 | (** {2 Append Entries} *)
241 | 
242 | let update_state leader_commit receiver_last_log_index log state =
243 |   if leader_commit > state.Types.commit_index
244 |   then
245 |     let commit_index = min leader_commit receiver_last_log_index in
246 |     {state with Types.log; commit_index}
247 |   else
248 |     {state with Types.log}
249 | 
250 | let handle_append_entries_request state request now =
251 |   let {Types.leader_term; leader_id; _} = request in
252 | 
253 |   let make_response state result =
254 |     Types.({
255 |       receiver_term = state.current_term; 
256 |       receiver_id = state.server_id; 
257 |       result;
258 |     })
259 |   in
260 | 
261 |   if leader_term < state.Types.current_term
262 |   then
263 |     (* This request is coming from a leader lagging behind... *)
264 |     (state, make_response state Types.Term_failure, Log.empty_diff)
265 | 
266 |   else
267 |     (* This request is coming from a legetimate leader,
268 |      * let's ensure that this server is a follower.  *)
269 |     let state =
270 |       let current_leader = leader_id and term = leader_term in
271 |       Follower.become ~current_leader ~term ~now state
272 |     in
273 | 
274 |     (* Next step is to handle the log entries from the leader.  *)
275 |     let {
276 |       Types.prev_log_index;
277 |       prev_log_term;
278 |       log_entries;
279 |       leader_commit; _ } = request in
280 | 
281 |     let (
282 |       receiver_last_log_index,
283 |       receiver_last_log_term
284 |     ) = Log.last_log_index_and_term state.Types.log in
285 | 
286 |     let commit_index = state.Types.commit_index in
287 | 
288 |     if prev_log_index < commit_index
289 |     then
290 |       (* The only reason that can happen is if the messages
291 |        * are delivered out of order. (Which is completely possible).
292 |        * No need to update this follower. *)
293 |       let success = Types.Success  receiver_last_log_index in 
294 |       (state, make_response state success, Log.empty_diff)
295 | 
296 |     else
297 |       if leader_term = receiver_last_log_term
298 |       then
299 |         match compare prev_log_index receiver_last_log_index with
300 |         | 0 ->
301 |           (* Leader info about the receiver last log index is matching
302 |            * perfectly, we can append the logs.  *)
303 |           let log, log_diff = 
304 |             Log.add_log_entries ~log_entries state.Types.log 
305 |           in
306 |           let receiver_last_log_index = Log.last_log_index log in
307 |           let state =
308 |             update_state leader_commit receiver_last_log_index log state
309 |           in
310 | 
311 |           let success = Types.Success  receiver_last_log_index in 
312 |           (state, make_response state success, log_diff)
313 | 
314 |         | x when x > 0 ->
315 |           (* This should really never happen since:
316 |            * - No logs belonging to the Leader term can be removed
317 |            * - The leader is supposed to keep track of the latest log from
318 |            *   the receiver within the same term.
319 |            *)
320 |           let failure = Types.Log_failure receiver_last_log_index in 
321 |           (state, make_response state failure, Log.empty_diff)
322 | 
323 |         | _ (* x when x < 0 *) ->
324 |           (* This case is possible when messages are received out of order  by
325 |            * the Follower
326 |            *
327 |            * Note that even if the prev_log_index is earlier, it's important
328 |            * that no log entry is removed from the log if they come from the
329 |            * current leader.
330 |            *
331 |            * The current leader might have sent a commit message back to a
332 |            * client believing that the log entry is replicated on this server.
333 |            * If we remove the log entry we violate the assumption.  *)
334 |           let success = Types.Success  receiver_last_log_index in 
335 |           (state, make_response state success, Log.empty_diff)
336 | 
337 |       else (* leader_term > receiver_last_log_term *)
338 | 
339 |         if prev_log_index > receiver_last_log_index
340 |         then
341 |           (* This is likely the case after a new election, the Leader has
342 |            * more log entries in its log and assumes that all followers have 
343 |            * the same number of log entries. *)
344 |           let failure = Types.Log_failure receiver_last_log_index in 
345 |           (state, make_response state failure, Log.empty_diff)
346 |         else
347 |           (* Because it is a new Leader, this follower can safely remove all
348 |            * the logs from previous terms which were not committed.  *)
349 | 
350 |           match Log.remove_log_since ~prev_log_index
351 |                 ~prev_log_term state.Types.log with
352 |           | exception Not_found ->
353 |             let failure = Types.Log_failure commit_index in
354 |             (state, make_response state failure, Log.empty_diff)
355 |             (* This is the case where there is a mismatch between the [Leader]
356 |              * and this server and the log entry identified with
357 |              * (prev_log_index, prev_log_term)
358 |              * could not be found.
359 |              *
360 |              * In such a case, the safest log entry to synchronize upon is the
361 |              * commit_index
362 |              * of the follower. *)
363 | 
364 |           | log, log_diff ->
365 |             let log, log_diff' = Log.add_log_entries ~log_entries log in
366 |             let log_diff = Log.merge_diff log_diff log_diff' in 
367 |             let receiver_last_log_index = Log.last_log_index log in
368 |             let state =
369 |               update_state leader_commit receiver_last_log_index log state
370 |             in
371 |             let success = Types.Success receiver_last_log_index in 
372 |             (state, make_response state success, log_diff)
373 | 
374 | let handle_append_entries_response state response now =
375 |   let {
376 |     Types.receiver_term; 
377 |     receiver_id = follower_id ; 
378 |     result} = response in
379 | 
380 |   if receiver_term > state.Types.current_term
381 |   then
382 |     (* Enforce invariant that if the server is lagging behind
383 |      * it must convert to a follower and update to that term.  *)
384 |     (Follower.become ~term:receiver_term ~now state , [])
385 | 
386 |   else
387 |     match state.Types.role with
388 |     | Types.Follower _
389 |     | Types.Candidate _ -> (state, [])
390 | 
391 |     | Types.Leader followers ->
392 | 
393 |       let followers =
394 |         Leader.record_response_received ~follower_id followers
395 |       in
396 | 
397 |       begin match result with
398 |       | Types.Success follower_last_log_index ->
399 |         (* Log entries were successfully inserted by the receiver...
400 |          *
401 |          * let's update our leader state about that receiver *)
402 | 
403 |         let configuration = state.Types.configuration in
404 | 
405 |         let followers , nb_of_replications = 
406 |           Leader.update_follower_last_log_index 
407 |                 ~follower_id ~index:follower_last_log_index followers
408 |         in
409 | 
410 |         let state =
411 |           (* Check if the received log entry from has reached
412 |            * a majority of server.
413 |            * Note that we need to add `+1` simply to count this
414 |            * server (ie leader) which does not update its next/match
415 |            * *)
416 |           if Configuration.is_majority configuration (nb_of_replications + 1) &&
417 |              follower_last_log_index > state.Types.commit_index
418 |           then {state with Types.commit_index = follower_last_log_index;}
419 |           else state
420 |         in
421 | 
422 |         let followers, msgs_to_send =
423 |           Log_entry_util.compute_append_entries state followers now
424 |         in
425 | 
426 |         let state = Types.({state with role = Leader followers}) in
427 | 
428 |         (state, msgs_to_send)
429 | 
430 |     | Types.Log_failure follower_last_log_index ->
431 |       (* The receiver log is not matching this server current belief.
432 |        * If a leader this server should decrement the next
433 |        * log index and retry the append.  *)
434 |       let leader_state = 
435 |         Leader.decrement_next_index 
436 |               ~follower_last_log_index ~follower_id state followers
437 |       in
438 |       let leader_state, msgs_to_send = 
439 |         Log_entry_util.compute_append_entries state leader_state now 
440 |       in
441 |       let state = Types.({state with role = Leader leader_state}) in
442 |       (state, msgs_to_send)
443 | 
444 |     | Types.Term_failure  ->
445 |       (state, [])
446 |       (* The receiver could have detected that this server term was not the 
447 |        * latest one and sent the [Term_failure] response.
448 |        *
449 |        * This could typically happen in a network partition:
450 |        *      
451 |        *      Old-leader---------X-------New Leader
452 |        *           \                         /
453 |        *            ---------Follower---------
454 |        *
455 |        * In the diagram above this server is the Old leader.
456 |        *)
457 | 
458 |     end (* match result *)
459 | 
460 | let init ?log ?commit_index ?current_term ~configuration ~now ~server_id () =
461 |   Follower.make ?log ?commit_index ?current_term 
462 |                 ~configuration ~now ~server_id ()
463 | 
464 | let handle_message state message now =
465 |   let state', msgs_to_send, log_diff =
466 |     match message with
467 |     | Types.Request_vote_request ({Types.candidate_id; _ } as r) ->
468 |       let state, response = handle_request_vote_request state r now in
469 |       let msgs = 
470 |         (Types.Request_vote_response response, candidate_id)::[]
471 |       in 
472 |       (state, msgs, Log.empty_diff)
473 | 
474 |     | Types.Append_entries_request ({Types.leader_id; _ } as r) ->
475 |       let state, response, log_diff = 
476 |         handle_append_entries_request state r now 
477 |       in 
478 |       let msgs = 
479 |         (Types.Append_entries_response response, leader_id) :: []
480 |       in 
481 |       (state, msgs, log_diff)
482 | 
483 |     | Types.Request_vote_response r ->
484 |       let state, msgs = handle_request_vote_response state r now in
485 |       (state, msgs, Log.empty_diff)
486 | 
487 |     | Types.Append_entries_response r ->
488 |       let state, msgs = handle_append_entries_response state r now in
489 |       (state, msgs, Log.empty_diff) 
490 | 
491 |   in
492 |   let leader_change = Helper.Diff.leader_change state state' in 
493 |   let committed_logs = Helper.Diff.committed_logs state state' in
494 |   let {Log.added_logs; deleted_logs} = log_diff in 
495 |   make_result ~msgs_to_send ?leader_change ~added_logs 
496 |               ~deleted_logs ~committed_logs state'
497 | 
498 | (* Iterates over all the other server ids. (ie the ones different
499 |  * from the state id).  *)
500 | let fold_over_servers f e0 state =
501 | 
502 |   let {
503 |     Types.server_id;
504 |     configuration = {Types.nb_of_server; _}; _
505 |   } = state in
506 | 
507 |   let rec aux acc = function
508 |     | -1 -> acc
509 |     | id   ->
510 |       let next = id - 1 in
511 |       if id  = server_id
512 |       then aux acc next
513 |       else aux (f acc id) next
514 |   in
515 |   aux e0 (nb_of_server - 1)
516 | let handle_new_election_timeout state now =
517 |   let state' = Candidate.become ~now state in
518 |   let msgs_to_send =
519 |     fold_over_servers (fun acc server_id ->
520 |       let request = make_request_vote_request state' in
521 |       (Types.Request_vote_request request, server_id) :: acc
522 |     ) [] state'
523 |   in
524 |   let leader_change = Helper.Diff.leader_change state state' in 
525 |   let committed_logs = Helper.Diff.committed_logs state state' in
526 |   make_result ~msgs_to_send ?leader_change ~committed_logs state'
527 | 
528 | let handle_heartbeat_timeout state now =
529 |   match state.Types.role with
530 |   | Types.Leader leader_state ->
531 |     let leader_state, msgs_to_send =
532 |       Log_entry_util.compute_append_entries state leader_state now
533 |     in
534 |     let state = Types.({state with role = Leader leader_state}) in
535 |     make_result ~msgs_to_send state
536 |   | _ ->
537 |     make_result state
538 | 
539 | type new_log_response =
540 |   | Appended of Types.result 
541 |   | Forward_to_leader of int
542 |   | Delay
543 | 
544 | let handle_add_log_entries state datas now =
545 |   match state.Types.role with
546 |   | Types.Follower {Types.current_leader = None ; _ }
547 |   | Types.Candidate _ ->
548 |     Delay
549 |     (* Server in the middle of an election with no [Leader]
550 |      * agreed upon yet *)
551 | 
552 |   | Types.Follower {Types.current_leader = Some leader_id; _ } ->
553 |     Forward_to_leader leader_id
554 |     (* The [Leader] should be the one centralizing all the
555 |      * new log entries.  *)
556 | 
557 |   | Types.Leader leader_state ->
558 | 
559 |     let log, log_diff = 
560 |       Log.add_log_datas state.Types.current_term datas state.Types.log
561 |     in
562 |     
563 |     let state' = Types.({state with log }) in
564 | 
565 |     let leader_state, msgs_to_send =
566 |       Log_entry_util.compute_append_entries state' leader_state now
567 |     in
568 |     
569 |     let state' = Types.({state' with role = Leader leader_state }) in
570 |     let {Log.added_logs; deleted_logs} = log_diff in 
571 |     Appended (make_result ~msgs_to_send ~added_logs ~deleted_logs state')
572 | 
573 | let next_timeout_event state now = Timeout_event.next ~now state 
574 | 
575 | let committed_entries_since ~since {Types.commit_index; log; _} = 
576 |   let max = commit_index - since in  
577 |   fst @@ Log.log_entries_since ~since ~max:(Log.Number max)log  
578 | 


--------------------------------------------------------------------------------
/src/raft_protocol.mli:
--------------------------------------------------------------------------------
 1 | (** Protocol Implementation *)
 2 | 
 3 | (** This module implements the RAFT protocol logic in a functional way and is 
 4 |     agnostic of:
 5 |     {ul
 6 |     {- {b Transport protocol}: This module simply defines which message
 7 |        should be sent upon any RAFT protocol event. It is the caller 
 8 |        responsability to handle all the communication between RAFT servers. 
 9 |        Note that the package {b raft-pb} provides message serialization, 
10 |        based on Protobuf technology.}
11 |     {- {b Persistent storage}: The RAFT protocol requires data to be 
12 |        recorded permanently. This implementation simply notifies of state 
13 |        change; the caller is responsible to store this information. 
14 |        Note that the package {b raft-rocks} provides a persistent storage
15 |        solution using RocksDB.}
16 |     }*)
17 | 
18 | (** {2 Protocol Event Implementation} *)
19 | 
20 | val init :
21 |   ?log:Raft_log.t ->
22 |   ?commit_index:int -> 
23 |   ?current_term:int -> 
24 |   configuration:Raft_types.configuration ->
25 |   now:Raft_types.time ->
26 |   server_id:Raft_types.server_id ->
27 |   unit ->
28 |   Raft_types.state
29 | (** [init ~configuration ~now ~server_id ()] creates an initial server
30 |     state.
31 | 
32 |     The server is initially a [Follower] and its [election_deadline] will be
33 |     set according to the [configuration] and the [now] value. *)
34 | 
35 | val handle_message :
36 |   Raft_types.state ->
37 |   Raft_types.message ->
38 |   Raft_types.time ->
39 |   Raft_types.result 
40 | (** [handle_message state message now] handles a new RAFT message received.*)
41 | 
42 | val handle_new_election_timeout :
43 |   Raft_types.state ->
44 |   Raft_types.time ->
45 |   Raft_types.result 
46 | (** [handle_new_election_timeout state now] handles a new election timeout.*)
47 | 
48 | val handle_heartbeat_timeout :
49 |   Raft_types.state ->
50 |   Raft_types.time ->
51 |   Raft_types.result 
52 | (** [handle_heartbeat_timeout state now] handles an heartbeat event.*)
53 | 
54 | type new_log_response =
55 |   | Appended of Raft_types.result 
56 |     (** The new log can correctly be handled by this server (ie it is
57 |         a valid [Leader] and new [Append_entries] request message can be
58 |         sent to follower servers.*)
59 |   | Forward_to_leader of int
60 |     (** If the current server is not a [Leader], the new log entry should
61 |         not be handled by this server but rather forwarded to the current 
62 |         [Leader] which id is returned.*)
63 |   | Delay
64 |     (** The current state of the system (as this server is aware) does not
65 |         seem to be in a configuration that can handle the the log.
66 |         For instance during an election it is possible that this server
67 |         is a [Candidate] waiting for more votes. Another scenario would be
68 |         that this server is a [Follower] which has not yet received confirmation
69 |         from a valid [Leader].*)
70 | 
71 | val handle_add_log_entries:
72 |   Raft_types.state ->
73 |   (bytes * string) list ->
74 |   Raft_types.time ->
75 |   new_log_response
76 | (** [handle_add_log_entries state data now] processes new log entries [data]
77 |     which is a list [(data, id)]. See new_log_response for more 
78 |     information.*)
79 | 
80 | (** {2 Utilities} *)
81 | 
82 | val next_timeout_event :
83 |   Raft_types.state ->
84 |   Raft_types.time ->
85 |   Raft_types.timeout_event
86 | (** [next_timeout_event state now] returns the timeout information
87 |     that the serve should implement.
88 | 
89 |     The server application is responsible for managing the main event
90 |     loop such as listening for messaging and waking up for timeout events.*)
91 | 
92 | val committed_entries_since : 
93 |   since:int -> 
94 |   Raft_types.state -> 
95 |   Raft_log.log_entry list 
96 | (** [committed_entries_since ~since state] returns all the committed entries
97 |     since the log index [since].*)
98 | 


--------------------------------------------------------------------------------
/src/raft_types.ml:
--------------------------------------------------------------------------------
  1 | module Log = Raft_log
  2 | 
  3 | type time = float
  4 | 
  5 | type duration = float
  6 | 
  7 | type server_id = int
  8 | 
  9 | type configuration = {
 10 |   nb_of_server : int;
 11 |   election_timeout : duration;
 12 |   election_timeout_range : duration;
 13 |   hearbeat_timeout : duration;
 14 |   max_nb_logs_per_message : Raft_log.size;
 15 |   max_log_size : Log.max_log_size; 
 16 | }
 17 | 
 18 | type request_vote_request = {
 19 |   candidate_term : int;
 20 |   candidate_id : server_id;
 21 |   candidate_last_log_index : int;
 22 |   candidate_last_log_term : int;
 23 | }
 24 | 
 25 | type request_vote_response = {
 26 |   voter_id : server_id;
 27 |   voter_term : int;
 28 |   vote_granted : bool;
 29 | }
 30 | 
 31 | type append_entries_request = {
 32 |   leader_term : int;
 33 |   leader_id : server_id;
 34 |   prev_log_index : int;
 35 |   prev_log_term : int;
 36 |   log_entries : Log.log_entry list;
 37 |   leader_commit : int;
 38 | }
 39 | 
 40 | type append_entries_response_result =
 41 |   | Success of int
 42 |   | Log_failure of int
 43 |   | Term_failure
 44 | 
 45 | type append_entries_response = {
 46 |   receiver_id : server_id;
 47 |   receiver_term : int;
 48 |   result : append_entries_response_result;
 49 | }
 50 | 
 51 | type message =
 52 |   | Request_vote_request of request_vote_request
 53 |   | Request_vote_response of request_vote_response
 54 |   | Append_entries_request of append_entries_request
 55 |   | Append_entries_response of append_entries_response
 56 | 
 57 | type message_to_send = message * server_id
 58 | 
 59 | type timeout_type =
 60 |   | New_leader_election
 61 |   | Heartbeat
 62 | 
 63 | type timeout_event = {
 64 |   timeout : time;
 65 |   timeout_type : timeout_type;
 66 | }
 67 | 
 68 | type leader_change = 
 69 |   | New_leader of int
 70 |   | No_leader
 71 | 
 72 | type follower_info = {
 73 |   follower_id : int;
 74 |   next_index : int;
 75 |   match_index : int;
 76 |   heartbeat_deadline : float;
 77 |   outstanding_request : bool;
 78 | }
 79 | 
 80 | type leader_state = follower_info list
 81 | 
 82 | type candidate_state = {
 83 |   vote_count : int;
 84 |   election_deadline : time;
 85 | }
 86 | 
 87 | type follower_state = {
 88 |   voted_for : server_id option;
 89 |   current_leader : server_id option;
 90 |   election_deadline : time;
 91 | }
 92 | 
 93 | type role =
 94 |   | Leader of leader_state
 95 |   | Candidate of candidate_state
 96 |   | Follower of follower_state
 97 | 
 98 | type state = {
 99 |   server_id : server_id;
100 |   current_term : int;
101 |   log : Raft_log.t;
102 |   commit_index : int;
103 |   role : role;
104 |   configuration : configuration;
105 | }
106 | 
107 | type result = {
108 |   state : state;  
109 |   messages_to_send : message_to_send list; 
110 |   leader_change : leader_change option;
111 |   committed_logs : Raft_log.log_entry list;
112 |   added_logs : Raft_log.log_entry list; 
113 |   deleted_logs : Raft_log.log_entry list;
114 | }
115 | 
116 | 
117 | let is_follower {role; _} =
118 |   match role with
119 |   | Follower _ -> true
120 |   | _ -> false
121 | 
122 | let is_candidate {role; _ } =
123 |   match role with
124 |   | Candidate _ -> true
125 |   | _ -> false
126 | 
127 | let is_leader {role; _ } =
128 |   match role with
129 |   | Leader _ -> true
130 |   | _ -> false
131 | 
132 | let current_leader {server_id; role; _} =
133 |   match role with
134 |   | Follower {current_leader; _ }-> current_leader
135 |   | Candidate _ -> None
136 |   | Leader _ -> Some server_id
137 | 


--------------------------------------------------------------------------------
/src/raft_types.mli:
--------------------------------------------------------------------------------
  1 | (** Protocol Types: State, Message and Events *) 
  2 | 
  3 | (** {2 Common types } *)
  4 | 
  5 | type time = float
  6 | (** Monotonic time (Absolute value) *)
  7 | 
  8 | type duration = float 
  9 | (** Duration, must be in the same unit as time*)
 10 | 
 11 | type server_id = int
 12 | (** Server id *)
 13 | 
 14 | (** Configuration *)
 15 | type configuration = {
 16 |   nb_of_server : int;
 17 |     (** Number of servers for the RAFT clusters, servers are then
 18 |         identified with 0 indexing *)
 19 |   election_timeout : duration;
 20 |     (** Average duration time for which a server will wait before starting
 21 |         a new election when the leader is not sending messages. *)
 22 |   election_timeout_range : duration;
 23 |     (** Duration range for the election timeout. The effective election
 24 |         timeout is randomly chosen between timeout +|- range/2.
 25 |         This value must be strickly less than [2 *. election_timeout] *)
 26 |   hearbeat_timeout : duration;
 27 |     (** Duration between heartbeat sent by the leader. [hearbeat_timeout]
 28 |         must be much less than [election_timeout] *)
 29 |   max_nb_logs_per_message : Raft_log.size;
 30 |     (** Limit the number of log entries per append entries message  *)
 31 |   max_log_size : Raft_log.max_log_size; 
 32 |     (** define boundaries for the "in-memory log" size limitation *)
 33 | }
 34 | 
 35 | (** {2 Protocol Messages} *) 
 36 | 
 37 | type request_vote_request = {
 38 |   candidate_term : int;
 39 |   candidate_id : server_id;
 40 |   candidate_last_log_index : int;
 41 |   candidate_last_log_term : int;
 42 | }
 43 | 
 44 | type request_vote_response = {
 45 |   voter_id : server_id;
 46 |   voter_term : int;
 47 |   vote_granted : bool;
 48 | }
 49 | 
 50 | type append_entries_request = {
 51 |   leader_term : int;
 52 |   leader_id : server_id;
 53 |   prev_log_index : int;
 54 |   prev_log_term : int;
 55 |   log_entries : Raft_log.log_entry list;
 56 |   leader_commit : int;
 57 | }
 58 | 
 59 | type append_entries_response_result =
 60 |   | Success of int (** receiver last log index *) 
 61 |   | Log_failure of int (** receiver last log index *)
 62 |   | Term_failure
 63 | 
 64 | type append_entries_response = {
 65 |   receiver_id : server_id;
 66 |   receiver_term : int;
 67 |   result : append_entries_response_result;
 68 | }
 69 | 
 70 | type message =
 71 |   | Request_vote_request of request_vote_request
 72 |   | Request_vote_response of request_vote_response
 73 |   | Append_entries_request of append_entries_request
 74 |   | Append_entries_response of append_entries_response
 75 | 
 76 | (** RAFT Message to send to the given server *)
 77 | type message_to_send = message * server_id
 78 | 
 79 | (** {2 Protocol State} *)
 80 | 
 81 | (** Follower information that a leader keeps track of*)
 82 | type follower_info = {
 83 |   follower_id : server_id;
 84 |     (** Id of the follower *)
 85 |   next_index : int;
 86 |     (** Which [Raft_types.log_entry] should be sent next. *)
 87 |   match_index : int;
 88 |     (** The last replicated [Raft_types.log_entry] for this follower *)
 89 |   heartbeat_deadline : time;
 90 |     (** The time at which a heartbeat should be sent next *)
 91 |   outstanding_request : bool;
 92 |     (** Whether or not the follower has an outstanding request (ie if
 93 |         no response was received since the last time an append entries
 94 |         request was sent. *)
 95 | }
 96 | 
 97 | (** Leader state *)
 98 | type leader_state = follower_info list
 99 | 
100 | (** Candidate state *)
101 | type candidate_state = {
102 |   vote_count : int;
103 |     (** The number of positive vote received so far *)
104 |   election_deadline : time;
105 |     (** The time at whic the election for which the server is a candidate
106 |         is ending *)
107 | }
108 | 
109 | (** Follower state *)
110 | type follower_state = {
111 |   voted_for : server_id option;
112 |     (** If [None] then this follower has not voted yet, other it is the id
113 |      *  of the candidate for which it previously voted *)
114 |   current_leader : server_id option;
115 |     (** Current leader *)
116 |   election_deadline : time;
117 |     (** The time at which the next election should take place and this
118 |      *  follower become a candidate if not leadership was established by
119 |      *  another server. *)
120 | }
121 | 
122 | (** Role of a server in the RAFT protocol *)
123 | type role =
124 |   | Leader of leader_state
125 |   | Candidate of candidate_state
126 |   | Follower of follower_state
127 | 
128 | (** Raft server state *)
129 | type state = {
130 |   server_id : server_id;
131 |     (** Unique Identifier must be between [0] and [nb_of_server - 1]. *)
132 |   current_term : int;
133 |     (** RAFT protocol divide time into terms during which a single 
134 |         leader can be established. *)
135 |   log : Raft_log.t;
136 |     (** Set of log entries *)
137 |   commit_index : int;
138 |     (** The index of the last log entry to be committed. A committed log 
139 |         is guaranteed by the RAFT protocol to never be removed. *)
140 |   role : role;
141 |     (** Role of the server. *)
142 |   configuration : configuration;
143 |     (** Various parameter to configure the RAFT cluster *) 
144 | }
145 | 
146 | (** {2 API types} *)
147 | 
148 | (** The RAFT protocol defines 2 type of Timeout event which should be 
149 |     triggered if no other protocol event has happened. *)
150 | type timeout_type =
151 |   | New_leader_election
152 |     (** Timeout until when the server should start a new election (ie 
153 |         increase term by 1 and become a candidate. *)
154 |   | Heartbeat
155 |     (** Timeout until when the server which is in a Leader state should 
156 |         send a heartbeat message to a follower. Heartbeats are used to maintain
157 |         leadership and indicate the Leader is still operating as normal *)
158 | 
159 | (** Timeout event which defines when (is absolute time) the next timeout 
160 |     event should be triggered as well as its type *)
161 | type timeout_event = {
162 |   timeout : time;
163 |   timeout_type : timeout_type;
164 | }
165 | 
166 | (** Notification of a change of leadership in the RAFT protocol *)
167 | type leader_change = 
168 |   | New_leader of server_id
169 |     (** A new leader has been elected *) 
170 |   | No_leader
171 |     (** The previous leader is no longer considered a leader. The previous 
172 |         leader could have been any server including this server. *)
173 | 
174 | (** Data returned by each of the protocol event *)
175 | type result = {
176 |   state : state;  
177 |     (** The new state *)
178 |   messages_to_send : message_to_send list; 
179 |     (** Raft messages to be send to the given servers.*)
180 |   leader_change : leader_change option;
181 |     (** Notification of a change in leaderhsip.*)
182 |   committed_logs : Raft_log.log_entry list;
183 |     (** Log entries which are now commited.*)
184 |   added_logs : Raft_log.log_entry list; 
185 |     (** Log entries added to the log. Note that this could overlap 
186 |         with [committed_logs].*)
187 |   deleted_logs : Raft_log.log_entry list;
188 |     (** Log entries deleted from the log.*)
189 | }
190 | 
191 | (** {2 Role functionality} *)
192 | 
193 | val is_follower : state -> bool
194 | (** [is_follower state] returns [true] if [state] role is a follower, [false]
195 |     otherwise.  *)
196 | 
197 | val is_candidate : state -> bool
198 | (** [is_candidate state] returns [true] if [state] role is a candidate, [false]
199 |     otherwise.  *)
200 | 
201 | val is_leader : state -> bool
202 | (** [is_leader state] returns [true] if [state] role is a leader, [false]
203 |     otherwise.  *)
204 | 
205 | val current_leader: state -> server_id option
206 | (** [current_leader state] return the current leader for the current term.
207 |     If no leader is known then [None] is returned.  *)
208 | 


--------------------------------------------------------------------------------
/tests/test.ml:
--------------------------------------------------------------------------------
   1 | [@@@ocaml.warning "-45"]
   2 | 
   3 | open Raft_types
   4 | open Raft_log
   5 | 
   6 | module Candidate = Raft_helper.Candidate
   7 | module Follower = Raft_helper.Follower
   8 | module Leader = Raft_helper.Leader
   9 | module Timeout_event = Raft_helper.Timeout_event
  10 | 
  11 | module Protocol = Raft_protocol
  12 | 
  13 | let default_configuration = {
  14 |   nb_of_server = 3;
  15 |   election_timeout = 0.1;
  16 |   election_timeout_range = 0.0;
  17 |     (* To get deterministic number for testing.
  18 |      *)
  19 |   hearbeat_timeout = 0.02;
  20 |   max_nb_logs_per_message = Number 10;
  21 |   max_log_size = {
  22 |     upper_bound = 7; 
  23 |     lower_bound = 5; 
  24 |   }
  25 | }
  26 | 
  27 | let recent_log_length {log = {recent_entries; _ }; _ } =
  28 |   IntMap.cardinal recent_entries
  29 | 
  30 | let recent_log_hd {log = {recent_entries; _ }; _ } =
  31 |   snd @@  IntMap.max_binding recent_entries 
  32 | 
  33 | let initial_state ~now server_id  =
  34 |   let configuration = default_configuration in 
  35 |   Protocol.init ~configuration ~now ~server_id ()
  36 | 
  37 | let now = 0.
  38 | 
  39 | let msg_for_server msgs id =
  40 |   match List.find (fun (_, server_id) -> id = server_id) msgs with
  41 |   | (msg, _) -> msg
  42 |   | exception Not_found -> assert(false)
  43 | 
  44 | type t = {
  45 |   server0 : state; 
  46 |   server1 : state; 
  47 |   server2 : state; 
  48 | }
  49 | 
  50 | let init () = {
  51 |   server0 = initial_state ~now 0;
  52 |   server1 = initial_state ~now 1;
  53 |   server2 = initial_state ~now 2;
  54 | }
  55 | 
  56 | (* This part of the test will simulate the successful 
  57 |  * election of server0. Server1 will grant its vote during the election, 
  58 |  * while server2 is kept disconnected from server0 and does not receive
  59 |  * any message. 
  60 |  *
  61 |  * At the end of this function, server0 is the leader, server1 is 
  62 |  * a follower of server0 and server2 is still a follower not aware 
  63 |  * than an election occured. *)
  64 | let election_1 {server0; server1; server2} now = 
  65 | 
  66 |   (* All of those severs should have an election timeout randomly
  67 |    * generated between [election_timeout +/- election_timeout_range/2]. *)
  68 |   let next_event = Protocol.next_timeout_event server0 now in
  69 |   assert(next_event.timeout = default_configuration.election_timeout);
  70 |   assert(next_event.timeout_type = New_leader_election);
  71 | 
  72 |   assert(0 = server0.current_term);
  73 |   assert(0 = server1.current_term);
  74 |   assert(0 = server2.current_term);
  75 |     (* The current term initiale value is expected
  76 |      * to be 0.  *)
  77 | 
  78 |   (*
  79 |    * Server0 reach election timeout and kicks off the election 
  80 |    * -------------------------------------------------------------------
  81 |    *)
  82 | 
  83 |   let {
  84 |     state = server0;
  85 |     messages_to_send = msgs; 
  86 |     leader_change; _} = Protocol.handle_new_election_timeout server0 now in
  87 | 
  88 |   assert(is_candidate server0);
  89 |     (* When an election timeout happens the server starts a new election
  90 |      * and become a [Candidate].  *)
  91 | 
  92 |   assert(None = leader_change);
  93 |     (* There was no previous Leader so no leader_change *)
  94 | 
  95 |   assert(1 = server0.current_term);
  96 |     (* Part of the new election process is to increment the [current_term] *)
  97 | 
  98 |   assert(2 = List.length msgs);
  99 |   List.iter (fun (msg, _) ->
 100 |     match msg with
 101 |     | Request_vote_request r -> (
 102 |         assert(r.candidate_term = 1);
 103 |         assert(r.candidate_id = 0);
 104 |         assert(r.candidate_last_log_index = 0);
 105 |         assert(r.candidate_last_log_term = 0);
 106 |     )
 107 |     | _ -> assert(false)
 108 |   ) msgs;
 109 |     (* Upon becoming a candidate the sever0 is expected to send a `RequestVote`
 110 |      * message to all the other servers. Since we are in a cluster
 111 |      * of 3, 2 messages should be sent from  by the new [Candidate].  *)
 112 | 
 113 |   (*
 114 |    * Send Request_vote_request Server0 -> Server1 
 115 |    * -----------------------------------------------------------------------
 116 |    *)
 117 | 
 118 |   let now = now +. 0.001 in
 119 | 
 120 |   let res = 
 121 |     let msg = msg_for_server msgs 1 in
 122 |     Protocol.handle_message server1 msg now
 123 |   in
 124 | 
 125 |   let {
 126 |     state = server1; 
 127 |     messages_to_send = msgs; 
 128 |     leader_change;
 129 |     committed_logs; 
 130 |     added_logs; 
 131 |     deleted_logs;
 132 |   } = res in
 133 | 
 134 |   assert(1 = server1.current_term);
 135 |     (* Server0 [Candidate] sent a higher term to server1 which is then 
 136 |      * expected to update its own [current_term] and be a follower 
 137 |      * (which it was already).  *)
 138 | 
 139 |   assert(None = leader_change);
 140 |   assert([] = committed_logs);
 141 |   assert([] = added_logs);
 142 |   assert([] = deleted_logs);
 143 | 
 144 |   begin match server1.role with
 145 |   | Follower {voted_for; current_leader; election_deadline; } -> begin
 146 |     (* Server1 should still be a follower.  *)
 147 | 
 148 |     assert(voted_for = (Some 0));
 149 |     (* Because server1 did not previously vote for a candidate it should
 150 |      * grant its vote to server0 *)
 151 |     assert(current_leader = None);
 152 |     (* Granting a vote does not guarantee that server0 will be a [Leader].
 153 |      *
 154 |      * Note that only a valid [Append_entries] request can establish the leadership
 155 |      * role of the sender. (We will this later).  *)
 156 |     assert(election_deadline = now +. default_configuration.election_timeout);
 157 |     (* Election deadline should be updated. *)
 158 |   end
 159 |   | _ -> assert(false)
 160 |   end;
 161 | 
 162 |   begin match msgs with
 163 |   | (msg, 0)::[]  ->
 164 |       (* A single response to server 0 is expected from server1.  *)
 165 |     begin match msg with
 166 |     | Request_vote_response r ->
 167 |       assert(r.voter_id = 1);
 168 |       assert(r.voter_term = 1);
 169 |       assert(r.vote_granted = true);
 170 |         (* The message confirms the server1 state and decision to grant its vote
 171 |          * to server0.  *)
 172 |     | _ -> assert(false)
 173 |     end
 174 |   | _ -> assert(false)
 175 |   end;
 176 | 
 177 |   (*
 178 |    * Send Request_vote_response Server1 -> Server0
 179 |    * --------------------------------------------------------------------------
 180 |    *)
 181 | 
 182 |   let now = now +. 0.001 in
 183 |   
 184 |   let res = 
 185 |     let msg = msg_for_server msgs 0 in
 186 |     Protocol.handle_message server0 msg now
 187 |   in
 188 | 
 189 |   let {
 190 |     state = server0; 
 191 |     messages_to_send = msgs; 
 192 |     leader_change;
 193 |     committed_logs; 
 194 |     added_logs;
 195 |     deleted_logs;
 196 |   } = res in
 197 |   
 198 |   assert(is_leader server0);
 199 |   assert(Some (New_leader 0) = leader_change);
 200 |     (* Because a single vote is enough to reach a majority in a 3-server 
 201 |      * cluster, server0 becomes a [Leader].  *)
 202 |   assert(committed_logs = []);
 203 |   assert(added_logs = []);
 204 |   assert([] = deleted_logs);
 205 | 
 206 |   assert(1 = server0.current_term);
 207 |     (* Becoming a [Leader] should not affect the term. (Only a new election).*)
 208 | 
 209 |   begin match server0.role with
 210 |   | Leader followers -> (
 211 |     assert(2 = List.length followers);
 212 |       (* The leader maintain various state for each of the
 213 |        * other servers.  *)
 214 | 
 215 |       List.iter (fun follower ->
 216 | 
 217 |         assert(follower.next_index = 1);
 218 |         assert(follower.match_index = 0);
 219 |         assert(follower.heartbeat_deadline = 
 220 |                now +. default_configuration.hearbeat_timeout);
 221 |         assert(follower.outstanding_request = true);
 222 | 
 223 |       ) followers;
 224 |   )
 225 |   | _ -> assert(false)
 226 |   end;
 227 | 
 228 |   assert(2 = List.length msgs);
 229 |     (* Upon becoming a Leader a server must immediately
 230 |      * send an [Append_entries] request to all the other servers
 231 |      * to establish its leadership. *)
 232 | 
 233 |   List.iter (fun (msg, _ ) ->
 234 |     match msg with
 235 |     | Append_entries_request r ->
 236 |       assert(1  = r.leader_term);
 237 |       assert(0  = r.leader_id);
 238 |       assert(0  = r.prev_log_index);
 239 |       assert(0  = r.prev_log_term);
 240 |       assert([] = r.log_entries);
 241 |         (* We have not yet added any log to the [Leader] so
 242 |          * no new entries are sent to the other servers.  *)
 243 |       assert(0 = r.leader_commit);
 244 |     | _ -> assert(false);
 245 |   ) msgs;
 246 | 
 247 |   (*
 248 |    * Send Append_entries_request Server0 -> Server1
 249 |    * ----------------------------------------------------------------------
 250 |    *)
 251 | 
 252 |   let now = now +. 0.001 in
 253 |   let res = 
 254 |     let msg = msg_for_server msgs 1 in
 255 |     Protocol.handle_message server1 msg now
 256 |   in
 257 | 
 258 |   let {
 259 |     state = server1; 
 260 |     messages_to_send = msgs; 
 261 |     leader_change;
 262 |     committed_logs; 
 263 |     added_logs;
 264 |     deleted_logs;
 265 |   } = res in
 266 | 
 267 |   assert(Some (New_leader 0)= leader_change);
 268 |   assert([] = committed_logs);
 269 |   assert(added_logs = []);
 270 |   assert([] = deleted_logs);
 271 | 
 272 |   begin match server1.role with
 273 |   | Follower f -> (
 274 | 
 275 |     assert(f.voted_for = Some 0);
 276 |       (* [voted_for] is still assigned to server0 since the current term
 277 |        * has not changed. *)
 278 | 
 279 |     assert(f.current_leader = Some 0);
 280 |       (* [Append_entries] request indicates the leadership role of the sender.
 281 |        *
 282 |        * server1 then updates its state to keep track of the current [Leader] (ie
 283 |        * server0 in our case). *)
 284 | 
 285 |     assert(f.election_deadline = now +. default_configuration.election_timeout);
 286 |       (* Because it just receive a message from the [Leader], the
 287 |        * [election_deadline] is extended for another [election_timeout] amount
 288 |        * of time. *)
 289 |   )
 290 |   | _ -> assert(false)
 291 |   end;
 292 | 
 293 |   assert(1 = List.length msgs);
 294 |     (* The server1 is expected to send a single response back to the
 295 |      * sender (ie server0). *)
 296 | 
 297 |   begin match List.hd msgs with
 298 |   | (Append_entries_response r, 0) -> (
 299 |     assert(r.receiver_id = 1);
 300 |     assert(r.receiver_term = 1);
 301 |     assert(r.result = Success 0); 
 302 |   )
 303 |   | _ -> assert(false)
 304 |   end;
 305 | 
 306 |   ({server0; server1; server2}, now)
 307 | 
 308 | 
 309 | (* In this part of the test server2 which was previously disconnected 
 310 |  * during the election_1 is now starting its own election for the same
 311 |  * term 1. Both server1 and server0 denies their vote since they have both
 312 |  * already granted their vote in this term. 
 313 |  *
 314 |  * At the end of the election server2 is still a candidate while server0
 315 |  * maintains its leader role and server1 is its follower.  *) 
 316 | let failed_election_1 {server0; server1; server2} now = 
 317 |   
 318 |   (*
 319 |    * Since server2 has not received any message from a leader, 
 320 |    * it will start a new election.
 321 |    * ----------------------------------------------------------------------
 322 |    *)
 323 | 
 324 |   let {
 325 |     state = server2; 
 326 |     messages_to_send = request_vote_msgs; 
 327 |     leader_change; 
 328 |     committed_logs;
 329 |     added_logs; 
 330 |     deleted_logs; } = Protocol.handle_new_election_timeout server2 now
 331 |   in
 332 | 
 333 |   assert(None = leader_change);
 334 |   assert([] = committed_logs);
 335 |   assert(added_logs = []);
 336 |   assert(deleted_logs = []);
 337 | 
 338 |   assert(is_candidate server2);
 339 |     (* Server2 never got an [Append_entries] request which would have
 340 |      * established server0 leadership. Therefore as far as server2 is
 341 |      * concerned there were no leaders.  *)
 342 |   assert(1 = server2.current_term);
 343 |   assert(2 = List.length request_vote_msgs);
 344 | 
 345 |   List.iter (fun (msg, _) ->
 346 |     match msg with
 347 |     | Request_vote_request r -> (
 348 |       assert(r.candidate_term = 1);
 349 |       assert(r.candidate_id = 2);
 350 |       assert(r.candidate_last_log_index = 0);
 351 |       assert(r.candidate_last_log_term = 0);
 352 |     )
 353 |     | _ -> assert(false)
 354 |   ) request_vote_msgs;
 355 | 
 356 |   (*
 357 |    * Send Request_vote_request Server2 -> Server1 
 358 |    * ---------------------------------------------------------------------
 359 |    *)
 360 | 
 361 |   let now = now +. 0.001 in
 362 |   let res = 
 363 |     let msg = msg_for_server request_vote_msgs 1 in
 364 |     Protocol.handle_message server1 msg now
 365 |   in
 366 | 
 367 |   let {
 368 |     state = server1; 
 369 |     messages_to_send = msgs; 
 370 |     leader_change;
 371 |     committed_logs; 
 372 |     added_logs;
 373 |     deleted_logs;
 374 |   } = res in
 375 | 
 376 |   assert(None = leader_change);
 377 |   assert([] = committed_logs);
 378 |   assert(added_logs = []);
 379 |   assert([] = deleted_logs);
 380 | 
 381 |   begin match server1.role with
 382 |   | Follower f -> (
 383 |     assert(f.voted_for = Some 0);
 384 |     assert(f.current_leader = Some 0);
 385 |       (* server0 state is unaffected by this new [Candidate] for this
 386 |        * term.  *)
 387 |   )
 388 |   | _ -> assert(false);
 389 |   end;
 390 | 
 391 |   begin match msgs with
 392 |   | (Request_vote_response r, 2)::[] -> (
 393 |     assert(r.voter_id = 1);
 394 |     assert(r.voter_term = 1);
 395 |     assert(r.vote_granted = false);
 396 |       (* Server1 has already voted for server0 in this election
 397 |        * term, so it should deny its vote to server2.  *)
 398 |   )
 399 |   | _ -> assert(false)
 400 |   end;
 401 | 
 402 |   (* Send Request_vote_response Server1 -> Server2 
 403 |    *
 404 |    * (vote not granted)
 405 |    * ---------------------------------------------------------------------
 406 |    *)
 407 | 
 408 |   let now = now +. 0.001 in
 409 | 
 410 |   let res = 
 411 |     let msg = msg_for_server msgs 2 in
 412 |     Protocol.handle_message server2 msg now
 413 |   in
 414 | 
 415 |   let {
 416 |     state = server2; 
 417 |     messages_to_send = msgs; 
 418 |     leader_change;
 419 |     committed_logs; 
 420 |     added_logs;
 421 |     deleted_logs;
 422 |   } = res in
 423 | 
 424 |   assert(is_candidate server2);
 425 |   assert(None = leader_change);
 426 |     (* Despite the vote not being granted by server1, server2
 427 |      * should continue to be a [Candidate] until either
 428 |      *
 429 |      * - a [Request_vote] response with a granted vote is replied
 430 |      *   by another server
 431 |      *
 432 |      * - new election timeout elapsed in this case it will start
 433 |      *   a new election.
 434 |      *
 435 |      * - a valid [Leader] sends an [Append_entries] request, in which
 436 |      *   case it will become a [Follower].
 437 |      *)
 438 | 
 439 |   assert([] = committed_logs);
 440 |   assert(added_logs = []);
 441 |   assert([] = deleted_logs);
 442 |   assert(1 = server2.current_term);
 443 |     (* No new election should have been started.
 444 |      *)
 445 | 
 446 |   assert([] = msgs);
 447 |     (* server2 is still a candidate but for the time being it has no 
 448 |      * message to send to any servers.  *)
 449 | 
 450 |   (*
 451 |    * Send Request_vote Server2 -> Server0
 452 |    * -----------------------------------------------------------------------
 453 |    *)
 454 | 
 455 |   let res = 
 456 |     let msg = msg_for_server request_vote_msgs 0 in
 457 |     Protocol.handle_message server0 msg now
 458 |   in
 459 | 
 460 |   let {
 461 |     state = server0; 
 462 |     messages_to_send = msgs; 
 463 |     leader_change;
 464 |     committed_logs; 
 465 |     added_logs;
 466 |     deleted_logs;
 467 |   } = res in
 468 | 
 469 |   assert(is_leader server0);
 470 |   assert(None = leader_change);
 471 |     (* Server0 is still a [Leader] and should not be affected
 472 |      * by a Candidate for the same term.
 473 |      *
 474 |      * (This would be different if the Candidate was for a later term) *)
 475 |   assert(1 = List.length msgs);
 476 |   assert([] = committed_logs);
 477 |   assert(added_logs = []);
 478 |   assert([] = deleted_logs);
 479 | 
 480 |   begin match List.hd msgs with
 481 |   | (Request_vote_response r, 2) -> (
 482 | 
 483 |     assert(r.voter_id = 0);
 484 |     assert(r.voter_term = 1);
 485 |     assert(r.vote_granted = false);
 486 |       (* Server0 being the [Leader] for term 1, it should
 487 |        * not grant its vote.  *)
 488 |   )
 489 |   | _ -> assert(false)
 490 |   end;
 491 | 
 492 |   let now = now +. 0.002 in
 493 | 
 494 |   (*
 495 |    * Send Request_vote_response Server0 -> Server2
 496 |    *
 497 |    * (vote not granted)
 498 |    * -----------------------------------------------------------------------
 499 |    *)
 500 | 
 501 |   let res = 
 502 |     let msg = msg_for_server msgs 2 in
 503 |     Protocol.handle_message server2 msg now
 504 |   in
 505 | 
 506 |   let {
 507 |     state = server2; 
 508 |     messages_to_send = msgs; 
 509 |     leader_change;
 510 |     committed_logs; 
 511 |     added_logs;
 512 |     deleted_logs;
 513 |   } = res in
 514 | 
 515 |   assert(deleted_logs = []);
 516 |   assert(is_candidate server2);
 517 |     (* Yes despite all other server denying their vote, server2
 518 |      * is still a [Candidate]. It should not take any further
 519 |      * action until its election timeout has elapsed.  *)
 520 | 
 521 |   assert(None = leader_change);
 522 |   assert([] = committed_logs);
 523 |   assert(added_logs = []);
 524 | 
 525 |   assert([] = msgs);
 526 |     (* No new message from server2 for this elections. All [Request_vote]
 527 |      * requests have been sent and the unsucessful responses received.  *)
 528 |   ({server0; server1; server2}, now)
 529 | 
 530 | (* In this part of the test server0 the leader is sending heartbeat
 531 |  * messages (ie empty Append_entries_request) to all the followers. 
 532 |  *
 533 |  * Server2 is no longer disconnected and will receive the msg; the msg 
 534 |  * will establish server0 leadership to server2 which will update its role
 535 |  * to be a follower of server0. 
 536 |  *
 537 |  * At the end server0 is the leader and both server1 and server2 are followers
 538 |  * of server0.*)
 539 | let leader_heartbeat_1 {server0; server1; server2} now = 
 540 | 
 541 |   (* Since the heartbeat timeout is usually much shorter
 542 |    * than a new election timeout, it's likely that server0
 543 |    * (ie the current [Leader]) will send heartbeats to the other
 544 |    * 2 servers.
 545 |    * -----------------------------------------------------------------------
 546 |    *)
 547 | 
 548 |   (* First let's make sure that even because the heartbeat deadline
 549 |    * for any server has not been reached, a heartbeat timeout should not
 550 |    * trigger new messages.
 551 |    *)
 552 | 
 553 |   let {state = server0; messages_to_send = msgs; _ } =
 554 |     Protocol.handle_heartbeat_timeout server0 now 
 555 |   in
 556 | 
 557 |   assert([] = msgs);
 558 |     (* Heartbeat messages are only sent to servers which have not recveived a
 559 |      * message for at least the [hearbeat_timeout] amount of time.
 560 |      * It's not the case since [hearbeat_timeout] is 0.02.  *)
 561 | 
 562 |   let now = now +. default_configuration.hearbeat_timeout in
 563 | 
 564 |   let {state = server0; messages_to_send = hb_msgs; _ } =
 565 |       Protocol.handle_heartbeat_timeout server0 now 
 566 |   in
 567 | 
 568 |   assert(2 = List.length hb_msgs);
 569 |     (* Because we added [hearbeat_timeout] to the previous time, we know for
 570 |      * sure that heartbeats messages are past due for all of the followers. *)
 571 | 
 572 |   List.iter (fun (msg, _) ->
 573 |     match msg with
 574 |     | Append_entries_request r -> (
 575 |       assert(r.leader_term = 1);
 576 |       assert(r.leader_id = 0);
 577 |       assert(r.prev_log_index = 0);
 578 |       assert(r.prev_log_term = 0);
 579 |       assert(r.log_entries = []);
 580 |       assert(r.leader_commit = 0);
 581 |     )
 582 |     | _ -> assert(false);
 583 |   ) hb_msgs;
 584 | 
 585 |   (*
 586 |    * Send Append_entries_request Server0 -> Server1 
 587 |    *
 588 |    * (server2 becomes a follower)
 589 |    * --------------------------------------------------------------------
 590 |    *)
 591 | 
 592 |   let now = now +. 0.001 in
 593 |   let res = 
 594 |     let msg = msg_for_server hb_msgs 2 in
 595 |     Protocol.handle_message server2 msg now
 596 |   in
 597 | 
 598 |   let {
 599 |     state = server2; 
 600 |     messages_to_send = msgs; 
 601 |     leader_change;
 602 |     committed_logs; 
 603 |     added_logs;
 604 |     deleted_logs;
 605 |   } = res in
 606 | 
 607 |   assert(is_follower server2);
 608 |   assert(1 = server2.current_term);
 609 |   assert(Some (New_leader 0)= leader_change);
 610 |   assert([] = committed_logs);
 611 |   assert(added_logs = []);
 612 |   assert(deleted_logs = []);
 613 |     (* Receiving an [Append_entries] request with a term at least equal
 614 |      * or supperior to one [current_term] means that the sender is a valid
 615 |      * [Leader] for that term and therefore the recipient must become a
 616 |      * [Follower].  *)
 617 | 
 618 |   begin match server2.role with
 619 |   | Follower fs -> (
 620 |     assert(fs.voted_for = Some 2);
 621 |       (* For that term, since this server was also a candidate it
 622 |        * did already vote for itself.  *)
 623 |     assert(fs.current_leader = Some 0);
 624 |       (* server2 is now aware that server0 is the [Leader] for
 625 |        * term 1 *)
 626 |     assert(fs.election_deadline = 
 627 |               now +. default_configuration.election_timeout);
 628 |   )
 629 |   | _ -> assert(false);
 630 |   end;
 631 | 
 632 |   assert(1 = List.length msgs);
 633 |     (* Response for the [Append_entries] *)
 634 | 
 635 |   begin match msgs with
 636 |   | ((Append_entries_response r), server_id) :: []  -> (
 637 |     assert(server_id = 0);
 638 |     assert(r.receiver_id = 2);
 639 |     assert(r.receiver_term = 1);
 640 |     assert(r.result = Success 0);
 641 |   )
 642 |   | _ -> assert(false)
 643 |   end;
 644 | 
 645 |   (*
 646 |    * Send Append_entries_response Server2 -> Server0
 647 |    *-----------------------------------------------------------------------
 648 |    *)
 649 | 
 650 |   let res = 
 651 |     let msg = msg_for_server msgs 0 in
 652 |     Protocol.handle_message server0 msg now
 653 |   in
 654 | 
 655 |   let {
 656 |     state = server0; 
 657 |     messages_to_send = msgs; 
 658 |     leader_change;
 659 |     committed_logs; 
 660 |     added_logs;
 661 |     deleted_logs;
 662 |   } = res in
 663 |   assert(None = leader_change);
 664 |   assert([] = committed_logs);
 665 |   assert([] = msgs);
 666 |   assert(added_logs = []);
 667 |   assert(deleted_logs = []);
 668 | 
 669 |   (*
 670 |    * Send Append_entries_request Server0 -> Server1
 671 |    * -----------------------------------------------------------------------
 672 |    *)
 673 | 
 674 |   let res = 
 675 |     let msg = msg_for_server hb_msgs 1 in
 676 |     Protocol.handle_message server1 msg now
 677 |   in
 678 | 
 679 |   let {
 680 |     state = server1; 
 681 |     messages_to_send = msgs; 
 682 |     leader_change;
 683 |     committed_logs; 
 684 |     added_logs;
 685 |     deleted_logs;
 686 |   } = res in
 687 | 
 688 |   assert(is_follower server1);
 689 |    (* No change in the role, server0 is a valid [Leader],
 690 |     * server1 stays a [Follower]. *)
 691 | 
 692 |   assert(None = leader_change);
 693 |   assert([] = committed_logs);
 694 |   assert(added_logs = []);
 695 |   assert(deleted_logs = []);
 696 |     (* The heartbeat message from server0 did not bring new
 697 |      * information (ie server1 already new server0 was the
 698 |      * [Leader].) *)
 699 | 
 700 |   assert(1 = List.length msgs);
 701 |    (* Single [Append_entries_response] expected. *)
 702 | 
 703 |   begin match msgs with
 704 |   | (Append_entries_response r, 0)::[] -> (
 705 |     assert(r.receiver_id = 1);
 706 |     assert(r.receiver_term = 1);
 707 |     assert(r.result = Success 0); 
 708 |   )
 709 |   | _ -> assert(false)
 710 |   end;
 711 | 
 712 |   let now = now +. 0.001 in
 713 | 
 714 |   (*
 715 |    * Send Append_entries_response Server1 -> Server0 
 716 |    *
 717 |    * --------------------------------------------------------------------------
 718 |    *)
 719 | 
 720 |   (* Note that it's important for the rest of the test that
 721 |    * we explicitely handle the response from server1 in server0.
 722 |    * Each [Leader] is keeping track of whether or not there is an
 723 |    * [outstanding_request] for each server.
 724 |    *
 725 |    * The [Leader] would then avoid sending new [Append_entries] requests
 726 |    * which already have an outstanding requests. However if a heartbeat timeout
 727 |    * has been reached, a new [Append_entries] request will be sent no matter
 728 |    * what.  *)
 729 | 
 730 |   let res = 
 731 |     let msg = msg_for_server msgs 0 in
 732 |     Protocol.handle_message server0 msg now
 733 |   in
 734 | 
 735 |   let {
 736 |     state = server0; 
 737 |     messages_to_send = msgs; 
 738 |     leader_change;
 739 |     committed_logs; 
 740 |     added_logs;
 741 |     deleted_logs;
 742 |   } = res in
 743 | 
 744 |   assert(is_leader server0);
 745 |   assert(None = leader_change);
 746 |   assert([] = committed_logs);
 747 |   assert(added_logs = []);
 748 |   assert(deleted_logs = []);
 749 |   assert([] = msgs);
 750 | 
 751 |   ({server0; server1; server2}, now)
 752 | 
 753 | (* In this part of the a new log is added to server0 the leader. This log 
 754 |  * triggers 2 Append_entries_request to server1 and server2. Only server1
 755 |  * successfully receives the msg, replicates the log and send back the 
 756 |  * response to server0. server2 will not receive the msg. 
 757 |  *
 758 |  * At the end server0 is the leader with a single log entry in its log; its
 759 |  * commit index is 1 since it was successfully replicated on server1. Server1
 760 |  * is still a follower with a single log entry in its log and a commit index 
 761 |  * of 0 since it is not yet aware that this first log entry was commited. 
 762 |  * Server2 is still a follower with an empty log since it never received 
 763 |  * the log entry #1. *)
 764 | let add_first_log {server0; server1; server2} now = 
 765 |   
 766 |   (* Let's now add a log entry to the [Leader] which is expected to trigger
 767 |    * the corresponding [Append_entries] requests to the other servers.  *)
 768 |   let new_log_result =
 769 |     let data = Bytes.of_string "Message01" in
 770 |     Protocol.handle_add_log_entries server0 [(data, "01")] now
 771 |   in
 772 | 
 773 |   let server0, data1_msgs =
 774 |     let open Protocol in
 775 |     match new_log_result with
 776 |     | Appended result -> 
 777 |         let {
 778 |           state; 
 779 |           messages_to_send; 
 780 |           committed_logs; 
 781 |           added_logs;deleted_logs;_} = result in 
 782 |         assert([] = committed_logs); 
 783 |         assert(deleted_logs = []);
 784 |         begin match added_logs with
 785 |         | [{id="01"; index = 1; term = 1; _}] -> () 
 786 |         | _ -> assert(false)
 787 |         end; 
 788 |         (state, messages_to_send)
 789 |       (* server0 is the [Leader] and is therefore expected to
 790 |        * handle the new log entry.  *)
 791 | 
 792 |     | Delay | Forward_to_leader _ -> assert(false)
 793 |   in
 794 | 
 795 |   assert(1 = recent_log_length server0);
 796 |     (* The new log entry should have been appended to the current empty log.*)
 797 | 
 798 |   begin match recent_log_hd server0 with 
 799 |   | {index = 1; term = 1; _ } -> ()
 800 |     (* Log should start as 1 and be set to the current
 801 |      * term (ie 1.) *)
 802 | 
 803 |   | _ -> assert(false)
 804 |   end;
 805 | 
 806 |   assert(2 = List.length data1_msgs);
 807 |     (* Both [Follower]s have no outstanding request and have also less
 808 |      * log entries than the [Leader], therefore they
 809 |      * should get a new [Append_entries] request message with the new
 810 |      * log *)
 811 | 
 812 |   List.iter (fun (msg, _) ->
 813 |     match msg with
 814 |     | Append_entries_request r -> (
 815 |       assert(r.leader_term = 1);
 816 |       assert(r.leader_id = 0);
 817 |       assert(r.prev_log_index = 0);
 818 |       assert(r.prev_log_term = 0);
 819 |       assert(1 = List.length r.log_entries);
 820 |         (*
 821 |          * Contains the log entry to be synchronized.
 822 |          *)
 823 |       begin match r.log_entries with
 824 |       | {index = 1; term = 1; _} :: [] -> ()
 825 |       | _ -> assert(false)
 826 |       end;
 827 |       assert(r.leader_commit = 0);
 828 |     )
 829 |     | _ -> assert(false)
 830 |   ) data1_msgs;
 831 | 
 832 |   let now = now +. 0.001 in
 833 | 
 834 |   (*
 835 |    * Send Append_entries_request Server0 -> Server1 
 836 |    * ----------------------------------------------------------------------
 837 |    *)
 838 | 
 839 |   let res = 
 840 |     let msg = msg_for_server data1_msgs 1 in
 841 |     Protocol.handle_message server1 msg now
 842 |   in
 843 | 
 844 |   let {
 845 |     state = server1; 
 846 |     messages_to_send = msgs; 
 847 |     leader_change;
 848 |     committed_logs; 
 849 |     added_logs;
 850 |     deleted_logs;
 851 |   } = res in
 852 | 
 853 |   assert(is_follower server1);
 854 |     (* No change of role for server1, [Append_entries] only
 855 |      * re-inforce that server0 is the [Leader].  *)
 856 | 
 857 |   begin match added_logs with
 858 |   | [{id = "01"; index = 1; term = 1; _}] -> () 
 859 |   | _ -> assert(false)
 860 |   end;
 861 |   assert(1 = recent_log_length server1);
 862 |     (* The [log_entry] with index 1 has been replicated
 863 |      * on server1. *)
 864 | 
 865 |   assert(0 = server1.commit_index);
 866 |   assert(None = leader_change);
 867 |   assert(deleted_logs = []);
 868 |   assert([] = committed_logs);
 869 |     (* While server1 has successfully replicated the log entry
 870 |      * with [index = 1], it cannot assume that this latter log
 871 |      * entry has been committed (ie that it has been replicated on
 872 |      * a majority of servers).
 873 |      *
 874 |      * Therefore the [commit_index] is still 0.
 875 |      *
 876 |      * It will be updated upon receiving the next [Append_entries_request] *)
 877 | 
 878 |   begin match msgs with
 879 |   | (Append_entries_response r, 0) :: [] -> (
 880 |     assert(r.receiver_id = 1);
 881 |     assert(r.receiver_term = 1);
 882 |     assert(r.result = Success 1);
 883 |      (* server1 has correctly replicated the log entry which has index1. *)
 884 |   )
 885 |   | _ -> assert(false)
 886 |   end;
 887 | 
 888 |   (*
 889 |    * Send Append_entries_response Server1 -> Server0 
 890 |    * --------------------------------------------------------------------
 891 |    *)
 892 | 
 893 |   let res = 
 894 |     let msg = msg_for_server msgs 0 in
 895 |     Protocol.handle_message server0 msg now
 896 |   in
 897 | 
 898 |   let {
 899 |     state = server0; 
 900 |     messages_to_send = _; 
 901 |     leader_change;
 902 |     committed_logs; 
 903 |     added_logs;
 904 |     deleted_logs;
 905 |   } = res in
 906 | 
 907 |   assert(is_leader server0);
 908 |   assert(1 = server0.commit_index);
 909 |   assert(None = leader_change);
 910 |   assert(deleted_logs = []);
 911 |   assert(added_logs = []);
 912 |     (* The log with index1 was already added so added_log is empty *)
 913 |   begin match committed_logs with
 914 |   | {id = "01"; _ }::[] -> ()
 915 |     (* server1 has replicated the log successfully so this means
 916 |      * that a majority of servers have done the replication.
 917 |      * The [Leader] commit_index can now be updated to that latest
 918 |      * log index (ie 1).  *)
 919 |   | _ -> assert(false)
 920 |   end;
 921 | 
 922 |   ({server0; server1; server2}, now)
 923 | 
 924 | (* In this part of the test a second log entry is added to server0 the leader.
 925 |  * This log entry is successfully replicated on server1 only. Server2 is 
 926 |  * not receiving the Append_entries_request message. Server1 commit index 
 927 |  * is now [1] since the leader commit index of 1 was sent in the request. 
 928 |  *
 929 |  * At the end server0 is the leader with 2 log entries and a commit index 
 930 |  * of 2, server1 is a follower with 2 log entries and a commit index of 1, 
 931 |  * server2 is a follower with an empty log. *)
 932 | let add_second_log {server0; server1; server2} now = 
 933 |   let now = now +. 0.001 in
 934 | 
 935 |   let new_log_result =
 936 |     let data = Bytes.of_string "Message02" in
 937 |     Protocol.handle_add_log_entries server0 [(data,"02")] now
 938 |   in
 939 | 
 940 |   let server0, data2_msg =
 941 |     let open Protocol in
 942 |     match new_log_result with
 943 |     | Delay | Forward_to_leader _ -> assert(false)
 944 |     | Appended {state; messages_to_send; added_logs; _ } -> begin 
 945 |       begin match added_logs with
 946 |       | [{id = "02"; index = 2; term = 1; _}] -> ()
 947 |       | _ -> assert(false)
 948 |       end;
 949 |       (state, messages_to_send)
 950 |     end 
 951 |   in
 952 | 
 953 |   assert(is_leader server0);
 954 | 
 955 |   assert(2 = recent_log_length server0);
 956 |     (* The second log entry should have been appended to the
 957 |      * server log. *)
 958 | 
 959 |   begin match recent_log_hd server0 with
 960 |   | {index = 2; term = 1; _ }  -> ()
 961 |     (* Make sure the index is incremented by 1. *)
 962 |   | _ -> assert(false)
 963 |   end;
 964 | 
 965 |   assert(1 = server0.commit_index);
 966 |     (* The new log entry (with index [2]) should not be committed
 967 |      * since no request/response interaction has yet been done.  *)
 968 | 
 969 |   assert(1 = List.length data2_msg);
 970 |     (* Since server2 has an outstanding request, it should not
 971 |      * be sent an additional request. Only server1 should receive
 972 |      * an [Append_entries] request. *)
 973 | 
 974 |   begin match List.hd data2_msg with
 975 |   | (Append_entries_request r, 1) -> (
 976 | 
 977 |     assert(r.leader_term = 1);
 978 |     assert(r.leader_id = 0);
 979 |     assert(r.prev_log_index = 1);
 980 |       (* Server0 [Leader] knows that the server1 has successfully
 981 |        * replicated the log entry with [index = 1] and therefore
 982 |        * will send [prev_log_index] with value 1.  *)
 983 | 
 984 |     assert(r.prev_log_term = 1);
 985 |     assert(1 = List.length r.log_entries);
 986 |       (* Only the last [log_entry] should be sent to that follower,
 987 |        * since the first [log_entry] was already replicated.
 988 |        *)
 989 |     assert(r.leader_commit = 1);
 990 |   )
 991 |   | _ -> assert(false)
 992 |   end;
 993 | 
 994 |   let now = now +. 0.001 in
 995 | 
 996 |   (*
 997 |    * Send Append_entries_request Server0 -> Server1
 998 |    * -------------------------------------------------------------------
 999 |    *)
1000 | 
1001 |   let res = 
1002 |     let msg = msg_for_server data2_msg 1 in
1003 |     Protocol.handle_message server1 msg now
1004 |   in
1005 | 
1006 |   let {
1007 |     state = server1; 
1008 |     messages_to_send = msgs; 
1009 |     leader_change;
1010 |     committed_logs; 
1011 |     added_logs;
1012 |     deleted_logs;
1013 |   } = res in
1014 | 
1015 |   assert(is_follower server1);
1016 | 
1017 |   assert(2 = recent_log_length server1);
1018 |     (* server1 should have correctly replicated the log
1019 |      * with [index = 2].  *)
1020 | 
1021 |   assert(None = leader_change);
1022 | 
1023 |   assert(1 = server1.commit_index);
1024 |   begin match committed_logs with
1025 |   | {id = "01"; _ }::[] -> ()
1026 |   | _ -> assert(false)
1027 |   end;
1028 |     (* The [Append_entries] request contained the [commit_index]
1029 |      * of the [Leader] (1 in this case) and therefore server1 has
1030 |      * updated its own.  *)
1031 |   begin match added_logs with
1032 |   | {id = "02"; index = 2; term = 1; _}::[] -> () 
1033 |   | _ -> assert(false) 
1034 |   end;
1035 |     (* The new log number 2 is now added in this follower .. but not commited
1036 |      * as verified previously. *)
1037 | 
1038 |   assert(deleted_logs = []);
1039 |   assert(1 = List.length msgs);
1040 |     (* Only a single response to server0 should be
1041 |      * sent back. *)
1042 | 
1043 |   begin match List.hd msgs with
1044 |   | (Append_entries_response r, 0) -> (
1045 |     assert(r.receiver_id = 1);
1046 |     assert(r.receiver_term = 1);
1047 |     assert(r.result = Success 2); 
1048 |      (* server1 notifies the [Leader] about the last log it has
1049 |       * replicated.
1050 |       *)
1051 |   )
1052 |   | _ -> assert(false)
1053 |   end;
1054 | 
1055 |   (*
1056 |    * Send Append_entries_response Server1 -> Server0 
1057 |    * ----------------------------------------------------------------------
1058 |    *)
1059 | 
1060 |   let now = now +. 0.001 in
1061 | 
1062 |   let res = 
1063 |     let msg = msg_for_server msgs 0 in
1064 |     Protocol.handle_message server0 msg now
1065 |   in
1066 | 
1067 |   let {
1068 |     state = server0; 
1069 |     messages_to_send = _; 
1070 |     leader_change;
1071 |     committed_logs; 
1072 |     added_logs;
1073 |     deleted_logs;
1074 |   } = res in
1075 | 
1076 |   assert(is_leader server0);
1077 | 
1078 |   assert(None = leader_change);
1079 |   
1080 |   assert(2 = server0.commit_index);
1081 |   begin match committed_logs with
1082 |   | {id = "02"; _ }::[] -> ()
1083 |   | _ -> assert(false)
1084 |   end;
1085 |     (* A successfull replication is enough for a majority.  *)
1086 |   assert(added_logs = []);
1087 |   assert(deleted_logs = []);
1088 |   ({server0; server1; server2}, now) 
1089 | 
1090 | (* In this part of the test, the leader reaches its heartbeat timeout 
1091 |  * and send an Append_entries_request to each of its follower. Since
1092 |  * server1 has replicated all the logs, its message contains no log 
1093 |  * entries but its commit_index will be set to 2. Server2 msg will 
1094 |  * contains the last 2 log entries since it has not replicated them. 
1095 |  *
1096 |  * This time server2 receives the messages, replicates the 2 log and update
1097 |  * its commit index to [2]. 
1098 |  *
1099 |  * At the end of the test, server0 is the leader with 2 log index and a 
1100 |  * commit index of [2]. server1 and server2 are followers with 2 log index 
1101 |  * and commit index of [2]. *)
1102 | let leader_heartbeat_2 {server0; server1; server2} now = 
1103 |   
1104 |   let now = now +. default_configuration.hearbeat_timeout in
1105 | 
1106 |   let {state = server0; messages_to_send = msgs; added_logs; _ } =
1107 |     Protocol.handle_heartbeat_timeout server0 now 
1108 |   in
1109 | 
1110 |   assert(added_logs = []);
1111 |   assert(2 = List.length msgs);
1112 | 
1113 |   begin match msg_for_server msgs 1 with
1114 |   | Append_entries_request r -> (
1115 |     assert(r.leader_term = 1);
1116 |     assert(r.leader_id = 0);
1117 |     assert(r.prev_log_term = 1);
1118 |     assert(r.log_entries = []);
1119 |       (* As expected no new log entry should be sent
1120 |        * since they all have been previously and [server1]
1121 |        * replied successfully.  *)
1122 | 
1123 |     assert(r.prev_log_index = 2);
1124 |     assert(r.leader_commit = 2);
1125 |   )
1126 |   | _ -> assert(false)
1127 |   end;
1128 | 
1129 |   begin match msg_for_server msgs 2 with
1130 |   | Append_entries_request r -> (
1131 |     assert(r.leader_term = 1);
1132 |     assert(r.leader_id = 0);
1133 |     assert(r.prev_log_term = 0);
1134 | 
1135 |     assert(List.length r.log_entries = 2);
1136 |     assert(r.prev_log_index = 0);
1137 |       (* this reflect the knowledge of server0 (leader) which has never
1138 |        * received an Append_entries_response from server2 indicating 
1139 |        * that logs were replicated. *)
1140 |     assert(r.leader_commit = 2);
1141 |   )
1142 |   | _ -> assert(false)
1143 |   end;
1144 | 
1145 |   let now = now +. 0.001 in
1146 | 
1147 |   (*
1148 |    * Send Append_entries_request Server0 -> Server1
1149 |    * ---------------------------------------------------------------------
1150 |    *)
1151 |   let res = 
1152 |     let msg = msg_for_server msgs 1 in
1153 |     Protocol.handle_message server1 msg now
1154 |   in
1155 | 
1156 |   let {
1157 |     state = server1; 
1158 |     messages_to_send = server1_response; 
1159 |     leader_change;
1160 |     committed_logs; 
1161 |     added_logs;
1162 |     deleted_logs;
1163 |   } = res in
1164 | 
1165 |   assert(is_follower server1);
1166 |   assert(None = leader_change);
1167 | 
1168 |   begin match committed_logs with
1169 |   | {id = "02"; _ }::[] -> ()
1170 |   | _ -> assert(false)
1171 |   end;
1172 |   assert(2 = server1.commit_index);
1173 |     (* server1 is updating its commit index based on latest
1174 |      * [leader_commit] value of 2 in the request it received.  *)
1175 |   
1176 |   assert(added_logs = []);
1177 |     (* All logs have already been replicated in server 1. *)
1178 | 
1179 |   assert(deleted_logs = []);
1180 | 
1181 |   assert(1 = List.length server1_response);
1182 |    (* Only a single response is expected.  *)
1183 | 
1184 |   (*
1185 |    * Send Append_entries_request Server0 -> Server2
1186 |    * ---------------------------------------------------------------------
1187 |    *)
1188 | 
1189 |   let res = 
1190 |     let msg = msg_for_server msgs 2 in
1191 |     Protocol.handle_message server2 msg now
1192 |   in
1193 | 
1194 |   let {
1195 |     state = server2; 
1196 |     messages_to_send = server2_response; 
1197 |     leader_change;
1198 |     committed_logs; 
1199 |     added_logs;
1200 |     deleted_logs;
1201 |   } = res in
1202 | 
1203 |   assert(is_follower server2);
1204 |   begin match committed_logs with
1205 |   | {id = "01"; _ }::{id = "02"; _}::[] -> ()
1206 |   | _ -> assert(false)
1207 |   end;
1208 |   assert(added_logs = committed_logs); 
1209 |     (* The 2 previous logs were never replicated to the server 2 before 
1210 |      * this heartbeat message *)
1211 |   assert(deleted_logs = []);
1212 |   assert(None = leader_change);
1213 |   assert(2 = server2.commit_index);
1214 |    (* server2 is updating its commit index based on latest
1215 |     * [leader_commit] value of 2 in the request it received. *)
1216 | 
1217 |   assert(2 = recent_log_length server2);
1218 |    (* server2 should have caught up with server0 and replicated
1219 |     * all the logs in the cache (ie 1) *)
1220 | 
1221 |   assert(1 = List.length server2_response);
1222 |    (* Only a single response is expected.  *)
1223 | 
1224 |   begin match server2_response with
1225 |   | (Append_entries_response r, 0)::[] -> (
1226 |     assert(r.receiver_id = 2);
1227 |     assert(r.receiver_term = 1);
1228 |     assert(r.result = Success 2);
1229 |       (* server2 has successfully replicated the 2 log entries *)
1230 |   )
1231 |   | _ -> assert(false)
1232 |   end;
1233 | 
1234 |   (*
1235 |    * Send Append_entries_response Server2 -> Server0
1236 |    * ----------------------------------------------------------------------
1237 |    *)
1238 | 
1239 |   let res = 
1240 |     let res = 
1241 |       let msg = msg_for_server server1_response 0 in
1242 |       Protocol.handle_message server0 msg now
1243 |     in
1244 | 
1245 |     let {
1246 |       state = server0; 
1247 |       messages_to_send; 
1248 |       leader_change;
1249 |       committed_logs; 
1250 |       added_logs;
1251 |       deleted_logs;
1252 |     } = res in
1253 |     
1254 |     assert([] = messages_to_send);
1255 |     assert(None = leader_change);
1256 |     assert([] = committed_logs);
1257 |     assert(added_logs = []);
1258 |     assert(deleted_logs = []);
1259 |       (* Server1 has replicated the 2 logs it has nothing
1260 |        * left.  *)
1261 | 
1262 |     let msg = msg_for_server server2_response 0  in
1263 |     Protocol.handle_message server0 msg now
1264 |   in
1265 | 
1266 |   let {
1267 |     state = server0; 
1268 |     messages_to_send; 
1269 |     leader_change;
1270 |     committed_logs; 
1271 |     added_logs;
1272 |     deleted_logs;
1273 |   } = res in
1274 | 
1275 |   assert(is_leader server0);
1276 |   assert(2 = server0.commit_index);
1277 |   assert(2 = recent_log_length server0);
1278 |   assert(None = leader_change);
1279 |   assert([] = committed_logs);
1280 |   assert(added_logs = []);
1281 |   assert(deleted_logs = []);
1282 | 
1283 |   (* Both servers have replicated the 2 logs, no outstanding
1284 |    * logs to be sent.  *)
1285 |   assert(0 = List.length messages_to_send);
1286 | 
1287 |   ({server0; server1; server2}, now)
1288 | 
1289 | (* In this part of the test, a 3rd log is added to the leader which 
1290 |  * successfully replicates it on server1 but the Append_entries_response 
1291 |  * is not sent back to server0. Therefore the log is not yet committed. 
1292 |  * Server 2 does not replicate the 3rd log however. 
1293 |  *
1294 |  * At the end server0 is the leader with 3 logs and commit_index set to [2], 
1295 |  * server1 has 3 logs with commit_index set to [2] and server2 has 2 logs 
1296 |  * and commit_index set to [2]. *)
1297 | let add_third_log {server0; server1; server2} now = 
1298 |   
1299 |   (* Let's now add a 3rd [log_entry] to the [Leader].
1300 |    *
1301 |    * We'll replicate this 3rd entry on [server1] only and
1302 |    * then simulate a [Leader] crash.
1303 |    *
1304 |    * The consequence of a [Leader] crash will be that one of the
1305 |    * follower will start a new election.
1306 |    * However only [server1] should become a [Leader] since it has replicated
1307 |    * more [log_entry]s than [server2].
1308 |    *
1309 |    * We will simulate and test the above assumption. *)
1310 | 
1311 |   let now = now +. 0.002 in
1312 | 
1313 |   let new_log_result =
1314 |     let data = Bytes.of_string "Message03" in
1315 |     Protocol.handle_add_log_entries server0 [(data, "03") ] now
1316 |   in
1317 | 
1318 |   let server0, msgs =
1319 |     match new_log_result with
1320 |     | Protocol.Appended {state; messages_to_send; added_logs; _ } -> 
1321 |         begin match added_logs with
1322 |         | {id = "03"; index = 3; term = 1; _ } :: [] -> () 
1323 |         | _ -> assert(false)
1324 |         end;
1325 |         (state, messages_to_send)
1326 |     | _ -> assert(false)
1327 |   in
1328 |   assert(is_leader server0);
1329 |   assert(3 = recent_log_length server0);
1330 |     (* Correctly added log since server0 is a [Leader].  *)
1331 | 
1332 |   assert(2 = List.length msgs);
1333 |     (* 2 [Append_entries] requests, one for each of the other
1334 |      * 2 servers. *)
1335 | 
1336 |   (*
1337 |    * Send Append_entries_request Server0 -> Server1 
1338 |    * ---------------------------------------------------------------------
1339 |    *)
1340 | 
1341 |   let res = 
1342 |     let msg = msg_for_server msgs 1 in
1343 |     Protocol.handle_message server1 msg now
1344 |   in
1345 | 
1346 |   let {
1347 |     state = server1; 
1348 |     messages_to_send = _; 
1349 |     leader_change;
1350 |     committed_logs; 
1351 |     added_logs;
1352 |     deleted_logs;
1353 |   } = res in
1354 | 
1355 |   assert(is_follower server1);
1356 |   assert(3 = recent_log_length server1);
1357 |     (* * The 3rd [log_entry] is correctly replicated.  *)
1358 | 
1359 |   assert(None = leader_change);
1360 |   assert([] = committed_logs);
1361 |   assert(2  = server1.commit_index);
1362 |     (* No change since the [commit_index] was still
1363 |      * 2 in the [Append_entries] request.  *)
1364 |   begin match added_logs with
1365 |   | {id = "03"; index = 3; term =1 ; _ } :: [] -> () 
1366 |   | _ -> assert(false)
1367 |   end;
1368 |   
1369 |   assert(deleted_logs = []);
1370 | 
1371 |   (* The response is not send back to server0 *)
1372 |   ({server0; server1; server2}, now)
1373 | 
1374 | (* In this part of the test, server2 starts a new election, however since
1375 |  * it has only 2 log entries and server1 has 3 log entries, server1 will 
1376 |  * not grant its vote.
1377 |  *
1378 |  * At the end of the test server2 is a candidate for term [2] with only
1379 |  * its own vote granted. Server1 is still a follower with no leader and 
1380 |  * no vote granted in term [2]. Server0 is still a leader for term [1]. *) 
1381 | let failed_election_2 {server0; server1; server2} now = 
1382 |   
1383 |   (*
1384 |    * Server2 starts a new election
1385 |    * --------------------------------------------------------------------------
1386 |    *)
1387 | 
1388 |   let now = now +. default_configuration.election_timeout  in
1389 | 
1390 |   let {
1391 |     state = server2; 
1392 |     messages_to_send = msgs; 
1393 |     leader_change;
1394 |     committed_logs;
1395 |     added_logs; 
1396 |     deleted_logs} = Protocol.handle_new_election_timeout server2 now
1397 |   in
1398 | 
1399 |   assert(deleted_logs = []);
1400 |   assert(is_candidate server2);
1401 |     (* Server2 started a new election. *)
1402 | 
1403 |   assert(Some No_leader = leader_change);
1404 |     (* Becoming a candidate and incrementing the terms
1405 |      * means that there are no longer a [Leader].  *)
1406 |   assert([] = committed_logs);
1407 |   assert(added_logs = []);
1408 | 
1409 |   assert(2 = server2.current_term);
1410 |     (* Each new election increments the term *)
1411 | 
1412 |   assert(2 = List.length msgs);
1413 |     (* 2 [Request_vote] request for each of the other 2
1414 |      * servers.  *)
1415 | 
1416 |   List.iter (fun (r, _) ->
1417 |     match r with
1418 |     | Request_vote_request r -> (
1419 |       assert(r.candidate_term = 2);
1420 |       assert(r.candidate_id = 2);
1421 |       assert(r.candidate_last_log_index = 2);
1422 |       assert(r.candidate_last_log_term = 1);
1423 |     )
1424 |     | _ -> assert(false)
1425 |   ) msgs;
1426 | 
1427 | 
1428 |   (*
1429 |    * Send Request_vote_request Server2 -> Server1
1430 |    * ----------------------------------------------------------------------
1431 |    *)
1432 | 
1433 |   let now = now +. 0.001 in
1434 | 
1435 |   let res = 
1436 |     let msg = msg_for_server msgs 1 in
1437 |     Protocol.handle_message server1 msg now
1438 |   in
1439 | 
1440 |   let {
1441 |     state = server1; 
1442 |     messages_to_send = msgs; 
1443 |     leader_change;
1444 |     committed_logs; 
1445 |     added_logs;
1446 |     deleted_logs;
1447 |   } = res in
1448 | 
1449 |   assert(is_follower server1);
1450 |   assert(2 = server1.current_term);
1451 |     (* The sender (ie server2) term was greater than the current
1452 |      * term of server1, it should then
1453 |      * increments its current term. *)
1454 | 
1455 |   assert(Some No_leader = leader_change);
1456 |     (* The change of term means that there are no current [Leader]
1457 |      * for it yet.  *)
1458 |   assert([] = committed_logs);
1459 |   assert(added_logs = []);
1460 |   assert(deleted_logs = []);
1461 | 
1462 |   assert(1 = List.length msgs);
1463 |     (* [Request_vote] response to server2.  *)
1464 | 
1465 |   begin match msgs with
1466 |   | (Request_vote_response r, 2)::[] -> (
1467 |     assert(r.voter_id = 1);
1468 |     assert(r.voter_term = 2);
1469 |     assert(r.vote_granted = false);
1470 |      (* [server1] has more [log_entry] than [server2] and therefore
1471 |       * rejects [server2] candidacy.
1472 |       *
1473 |       * This is to ensure the safety property of the RAFT protocol
1474 |       * so that no committed entries are later invalidated.  *)
1475 |   )
1476 |   | _  -> assert(false)
1477 |   end;
1478 |   ({server0; server1; server2}, now) 
1479 | 
1480 | (* In this part of the test, server1 takes its turn to start a new election 
1481 |  * in term3. This time since it has more logs than server2, server2 grants its 
1482 |  * vote and server1 becomes a leader. 
1483 |  *
1484 |  * Upon become a leader server1 send Append_entries_request to both
1485 |  * server0 and server2 assuming their previous log index matches his (ie [3]). 
1486 |  * Server0 is disconnected and the msg is not sent to it. Server2 receives
1487 |  * the message, however it has not replicated log [3] and therefore sends 
1488 |  * back a log failure indicating its previous log index which is [2]. 
1489 |  * Server1 will then update its information about server2 and send back 
1490 |  * a corrected Append_entries_request, with this time the missing third 
1491 |  * entry. Upon receiving the Append_entries_response server1 will mark
1492 |  * the 3rd log entry to be commited!
1493 |  *
1494 |  * At the end server0 is still in a leader role but for term 1 and it has 
1495 |  * 3 log entries with commit index of 2. Both server1 and server2 are in 
1496 |  * term 3 with server1 the leader and server2 a follower. They both have 3
1497 |  * log entries, server1 commit index is 3 while server2 is 2. *) 
1498 | let election_2 {server0; server1; server2} now = 
1499 |   
1500 |   (*
1501 |    * Let's now have server1 starts a new election.
1502 |    * ----------------------------------------------------------------------
1503 |    *)
1504 | 
1505 |   let now = now +. default_configuration.election_timeout in
1506 | 
1507 |   let {
1508 |     state = server1; 
1509 |     messages_to_send = msgs; 
1510 |     leader_change; 
1511 |     committed_logs; 
1512 |     added_logs; 
1513 |     deleted_logs} = Protocol.handle_new_election_timeout server1 now
1514 |   in
1515 | 
1516 |   assert(None = leader_change);
1517 |   assert([] = committed_logs);
1518 |   assert(added_logs = []);
1519 |   assert(deleted_logs = []);
1520 | 
1521 |   assert(is_candidate server1);
1522 |   assert(3 = server1.current_term);
1523 |   assert(2 =  List.length msgs);
1524 | 
1525 |   List.iter (fun (r, _) ->
1526 |     begin match r with
1527 |     | Request_vote_request r -> (
1528 |       assert(r.candidate_term = 3);
1529 |       assert(r.candidate_id = 1);
1530 |       assert(r.candidate_last_log_index = 3);
1531 |       assert(r.candidate_last_log_term = 1);
1532 |     )
1533 |     | _ -> assert(false)
1534 |     end
1535 |   ) msgs;
1536 | 
1537 |   (*
1538 |    * Send Request_vote_request server1 -> server2 
1539 |    * -----------------------------------------------------------------------
1540 |    *)
1541 | 
1542 |   let res = 
1543 |     let msg = msg_for_server msgs 2 in
1544 |     Protocol.handle_message server2 msg now
1545 |   in
1546 | 
1547 |   let {
1548 |     state = server2; 
1549 |     messages_to_send = msgs; 
1550 |     leader_change;
1551 |     committed_logs; 
1552 |     added_logs;
1553 |     deleted_logs;
1554 |   } = res in
1555 | 
1556 |   assert(is_follower server2);
1557 |     (* server1 [current_term] is greater than the one
1558 |      * in server2 (3 versus 2).
1559 |      *
1560 |      * Therefore server2 becomes a [Follower] and update
1561 |      * its current_term.  *)
1562 | 
1563 |   assert(3 = server2.current_term);
1564 | 
1565 |   assert(None = leader_change);
1566 |   assert(deleted_logs = []);
1567 |   assert([] = committed_logs);
1568 |   assert(added_logs = []);
1569 |     (* Server2 already knew there was no [Leader] since it was a candidate
1570 |      * in the previous term and never got elected.  *)
1571 | 
1572 |   assert(1 = List.length msgs);
1573 | 
1574 |   begin match msgs with
1575 |   | (Request_vote_response r, 1)::[] -> (
1576 |     assert(r.voter_id = 2);
1577 |     assert(r.voter_term = 3);
1578 |     assert(r.vote_granted = true);
1579 |       (* Vote is indeed granted since server1 has a greater
1580 |        * [last_log_index].  *)
1581 |   )
1582 |   | _ -> assert(false)
1583 |   end;
1584 | 
1585 |   let now = now +. 0.001 in
1586 | 
1587 |   (*
1588 |    * Send Request_vote_response Server2 -> Server1 
1589 |    * ---------------------------------------------------------------------
1590 |    *)
1591 | 
1592 |   let res = 
1593 |     let msg = msg_for_server msgs 1 in
1594 |     Protocol.handle_message server1 msg now
1595 |   in
1596 | 
1597 |   let {
1598 |     state = server1; 
1599 |     messages_to_send = msgs; 
1600 |     leader_change;
1601 |     committed_logs; 
1602 |     added_logs;
1603 |     deleted_logs;
1604 |   } = res in
1605 | 
1606 |   assert(Some (New_leader 1) = leader_change);
1607 |   assert([] = committed_logs);
1608 |   assert(added_logs = []);
1609 |   assert(deleted_logs = []);
1610 |   assert(is_leader server1);
1611 |     (* One vote is enough to become a [Leader].  *)
1612 | 
1613 |   assert(3 = server1.current_term);
1614 |     (* [current_term] should be the same after becoming
1615 |      * a [Leader].  *)
1616 | 
1617 |   assert(2 = List.length msgs);
1618 |     (* Imediately after becoming a [Leader], the server
1619 |      * will send [Append_entries] to establish its
1620 |      * leadership.  *)
1621 | 
1622 |   List.iter (fun (r, _) ->
1623 |     match r with
1624 |     | Append_entries_request r -> (
1625 |       assert(r.leader_term = 3);
1626 |       assert(r.leader_id = 1);
1627 |       assert(r.prev_log_index = 3);
1628 |       assert(r.prev_log_term = 1);
1629 |       assert(r.log_entries = []);
1630 |         (* Initially the [Leader] believes that all other servers
1631 |          * have replicated the same [log_entry]s as itself.  *)
1632 |       assert(r.leader_commit = 2);
1633 |     )
1634 |     | _ -> assert(false)
1635 |   ) msgs;
1636 | 
1637 |   (* Send Append_entries_request Server1 -> Server2
1638 |    * -----------------------------------------------------------------------
1639 |    *)
1640 | 
1641 |   let now = now +.  0.001 in
1642 | 
1643 |   let res = 
1644 |     let msg = msg_for_server msgs 2 in
1645 |     Protocol.handle_message server2 msg now
1646 |   in
1647 | 
1648 |   let {
1649 |     state = server2; 
1650 |     messages_to_send = msgs; 
1651 |     leader_change;
1652 |     committed_logs; 
1653 |     added_logs;
1654 |     deleted_logs;
1655 |   } = res in
1656 | 
1657 |   assert(deleted_logs = []);
1658 |   assert(Some (New_leader 1) = leader_change);
1659 |   assert([] = committed_logs);
1660 |   assert(is_follower server2);
1661 |   assert(3 = server2.current_term);
1662 |   assert(added_logs = []);
1663 |     (* See explanation below as to why the 3rd log entry was not replicated *)
1664 | 
1665 |   assert(1 = List.length msgs);
1666 |     (* Single response to server1.  *)
1667 | 
1668 |   begin match msgs with
1669 |   | (Append_entries_response r, 1)::[] -> (
1670 |     assert(r.receiver_id = 2);
1671 |     assert(r.receiver_term = 3);
1672 |     assert(r.result = Log_failure 2);      
1673 |       (* server2 did not replicate the 3rd [log_entry] that server1
1674 |        * did during [term = 1].
1675 |        *
1676 |        * Therefore the previous [Append_entries] request is rejected. *)
1677 |   )
1678 |   | _ -> assert(false)
1679 |   end;
1680 | 
1681 |   (*
1682 |    * Send Append_entries_response Server2 -> Server1 
1683 |    * ----------------------------------------------------------------------
1684 |    *)
1685 | 
1686 |   let now = now +. 0.001 in
1687 | 
1688 |   let res = 
1689 |     let msg = msg_for_server msgs 1 in
1690 |     Protocol.handle_message server1 msg now
1691 |   in
1692 | 
1693 |   let {
1694 |     state = server1; 
1695 |     messages_to_send = msgs; 
1696 |     leader_change;
1697 |     committed_logs; 
1698 |     added_logs;
1699 |     deleted_logs;
1700 |   } = res in
1701 | 
1702 |   assert(deleted_logs = []);
1703 |   assert(is_leader server1);
1704 |   assert(3 = server1.current_term);
1705 | 
1706 |   assert(None = leader_change);
1707 |   assert([] = committed_logs);
1708 |   assert(added_logs = []);
1709 | 
1710 |   assert(1 = List.length msgs);
1711 |   assert(2 = server1.commit_index);
1712 |     (* A new request for server2 has been computed which
1713 |      * should now contain the 3rd [log_entry] *)
1714 | 
1715 |   begin match msgs with
1716 |   | (Append_entries_request r, 2) :: [] -> (
1717 |     assert(r.leader_term = 3);
1718 |     assert(r.leader_id = 1);
1719 |     assert(r.prev_log_index = 2);
1720 |     assert(r.prev_log_term = 1);
1721 |     assert(1 = List.length r.log_entries);
1722 |      (* The missing 3rd log entry is now part of the
1723 |       * request for server2 to catch up
1724 |       *)
1725 |     assert(r.leader_commit = 2);
1726 |   )
1727 |   | _ -> assert(false)
1728 |   end;
1729 | 
1730 |   (*
1731 |    * Send Append_entries_request Server1 -> Server2
1732 |    * ----------------------------------------------------------------------
1733 |    *)
1734 | 
1735 |   let now = now +. 0.001 in
1736 | 
1737 |   let res = 
1738 |     let msg = msg_for_server msgs 2 in
1739 |     Protocol.handle_message server2 msg now
1740 |   in
1741 | 
1742 |   let {
1743 |     state = server2; 
1744 |     messages_to_send = msgs; 
1745 |     leader_change;
1746 |     committed_logs; 
1747 |     added_logs;
1748 |     deleted_logs;
1749 |   } = res in
1750 | 
1751 |   assert(is_follower server2);
1752 |   assert(3 = server2.current_term);
1753 | 
1754 |   assert(3 = recent_log_length server2);
1755 |     (* server2 has correctly replicated the 3rd [log_entry].  *)
1756 | 
1757 |   assert(deleted_logs = []);
1758 |   assert(None = leader_change);
1759 |   assert([] = committed_logs);
1760 |   assert(2  = server2.commit_index);
1761 |     (*  The 3rd log while succesfully replicated is not  yet
1762 |      *  committed.
1763 |      *)
1764 |   begin match added_logs with
1765 |   | {id = "03"; index = 3; term = 1; _ }::[]  -> ()
1766 |   | _ -> assert(false); 
1767 |   end;
1768 | 
1769 |   assert(1 = List.length msgs);
1770 |     (* Single response for server1.  *)
1771 | 
1772 |   begin match msgs with
1773 |   | (Append_entries_response r, 1) :: [] -> (
1774 |     assert(r.receiver_id = 2);
1775 |     assert(r.receiver_term = 3);
1776 |     assert(r.result = Success 3);
1777 |       (* Confirmation that the replication of the log has
1778 |        * been successful.  *)
1779 |   )
1780 |   | _ -> assert(false)
1781 |   end;
1782 | 
1783 |   (* Send Append_entries_response Server2 -> Server1 
1784 |    * ----------------------------------------------------------------------
1785 |    *)
1786 | 
1787 |   let now = now +. 0.001  in
1788 | 
1789 |   let res = 
1790 |     let msg = msg_for_server msgs 1 in
1791 |     Protocol.handle_message server1 msg now
1792 |   in
1793 | 
1794 |   let {
1795 |     state = server1; 
1796 |     messages_to_send = msgs; 
1797 |     leader_change;
1798 |     committed_logs; 
1799 |     added_logs;
1800 |     deleted_logs;
1801 |   } = res in
1802 | 
1803 |   assert(is_leader server1);
1804 |   assert(3 = server1.current_term);
1805 | 
1806 |   assert(deleted_logs = []);
1807 |   begin match committed_logs with
1808 |   | {id = "03"; _ }::[] -> ()
1809 |   | _ -> assert(false)
1810 |   end;
1811 |   assert(None = leader_change);
1812 |   assert(3 = server1.commit_index);
1813 |     (*
1814 |      * The 3rd [log_entry] has been replicated one one other
1815 |      * server than the [Leader]; this makes a majority and therefore
1816 |      * indicates that [commit_index] can be set to 3.
1817 |      *)
1818 |   assert(added_logs = []);
1819 |   assert([] = msgs);
1820 |   ({server0; server1; server2}, now)
1821 | 
1822 | (* In this part of the test we simply add 2 logs at the same time to server1
1823 |  * the latest leader. Server1 successfully adds, replicate and commits 
1824 |  * both logs. 
1825 |  * Server2 replicates the logs but its commit index is not yet updated. 
1826 |  * Server0 is still disconnected and the leader of term [1]. *)
1827 | let add_4_and_5_logs {server0; server1; server2} now = 
1828 | 
1829 |   let new_log_result =
1830 |     let datas = [
1831 |       (Bytes.of_string "Message04", "04");
1832 |       (Bytes.of_string "Message05", "05");
1833 |     ] in
1834 |     Protocol.handle_add_log_entries server1 datas now
1835 |   in
1836 | 
1837 |   let server1, data45_msgs =
1838 |     let open Protocol in
1839 |     match new_log_result with
1840 |     | Appended {state; messages_to_send; added_logs; _ } -> 
1841 |       begin 
1842 |         assert(2 = List.length added_logs); 
1843 |         (state, messages_to_send)
1844 |       end
1845 |     | Delay | Forward_to_leader _ -> assert(false)
1846 |   in
1847 | 
1848 |   assert(5 = recent_log_length server1);
1849 |   assert(5 = last_log_index server1.log);
1850 | 
1851 |   assert(3 = server1.commit_index);
1852 |     (* The 2 logs have not been committed.  *)
1853 | 
1854 |   assert(1 = List.length data45_msgs);
1855 |     (* Only one message needs to be sent since server0 has still an outstanding
1856 |      * request. (ie server1 the leader never received any `append entries 
1857 |      * response` from that server *)
1858 | 
1859 |   List.iter (fun (msg, _) ->
1860 | 
1861 |     match msg with
1862 |     | Append_entries_request r -> (
1863 |       let {
1864 |         leader_term;
1865 |         leader_id;
1866 |         prev_log_index;
1867 |         prev_log_term;
1868 |         log_entries;
1869 |         leader_commit;
1870 |       } =  r in
1871 |       assert(3 = leader_term);
1872 |       assert(1 = leader_id);
1873 |       assert(prev_log_index = 3);
1874 |       assert(prev_log_term = 1);
1875 |       assert(2 = List.length log_entries);
1876 |       assert(3 = leader_commit);
1877 |     )
1878 |     | _ -> assert(false);
1879 |   ) data45_msgs;
1880 | 
1881 |   (* 
1882 |    * Send Append_entries_request Server1 -> Server2
1883 |    * ------------------------------------------------------------------
1884 |    *)
1885 | 
1886 |   let now = now +. 0.001  in
1887 | 
1888 |   let res = 
1889 |     let msg = msg_for_server data45_msgs 2 in
1890 |     Protocol.handle_message server2 msg now
1891 |   in
1892 | 
1893 |   let {
1894 |     state = server2; 
1895 |     messages_to_send = msgs; 
1896 |     leader_change;
1897 |     committed_logs; 
1898 |     added_logs;
1899 |     deleted_logs;
1900 |   } = res in
1901 | 
1902 |   assert(deleted_logs = []);
1903 |   assert(is_follower server2);
1904 |   assert(3 = server2.current_term);
1905 | 
1906 |   assert(2 = List.length added_logs);
1907 |   assert(5 = recent_log_length server2);
1908 |     (* The last 2 logs where succesfully replicated
1909 |      *)
1910 |   begin match recent_log_hd server2 with
1911 |   | {data; _ } ->
1912 |     assert((Bytes.of_string "Message05") = data);
1913 |     (* Let's make sure the order was properly replicated.
1914 |      *)
1915 |   end;
1916 | 
1917 |   assert(3 = server2.commit_index);
1918 |     (* Replicated from server1, previous one was 2 so we
1919 |      * can expect a notification. *)
1920 | 
1921 |   assert(None = leader_change);
1922 |   begin match committed_logs with
1923 |   | {id = "03"; _ }::[] -> ()
1924 |   | _ -> assert(false)
1925 |   end;
1926 | 
1927 |   begin match msgs with
1928 |   | (Append_entries_response r, 1) :: [] -> (
1929 |     let {
1930 |       receiver_id;
1931 |       receiver_term;
1932 |       result;
1933 |     } = r in
1934 |     assert(2 = receiver_id);
1935 |     assert(3 = receiver_term);
1936 |     assert(Success 5 = result);
1937 |   )
1938 |   | _ -> assert(false);
1939 |   end;
1940 | 
1941 |   (*
1942 |    * Send Append_entries_response Server2 -> Server1 
1943 |    * ------------------------------------------------------------------------
1944 |    *)
1945 | 
1946 |   let now = now +. 0.001  in
1947 | 
1948 |   let res = 
1949 |     let msg = msg_for_server msgs 1 in
1950 |     Protocol.handle_message server1 msg now
1951 |   in
1952 | 
1953 |   let {
1954 |     state = server1; 
1955 |     messages_to_send = msgs; 
1956 |     leader_change;
1957 |     committed_logs; 
1958 |     added_logs;
1959 |     deleted_logs;
1960 |   } = res in
1961 | 
1962 |   assert(deleted_logs = []);
1963 |   assert(5 = server1.commit_index);
1964 |   assert([] = msgs);
1965 |   assert(2 = List.length committed_logs);
1966 |   assert(added_logs = []);
1967 |   assert(None = leader_change);
1968 | 
1969 |   ({server0; server1; server2}, now)
1970 | 
1971 | (* In this part of the test we add a log to server0 which is disconnected 
1972 |  * (ie the network is partitioned) from the other servers. Because server0 
1973 |  * is still a leader for term [1] it will add the log but will not receive
1974 |  * any response from the other server, therefore this 4th log entry will 
1975 |  * not be commited. 
1976 |  *
1977 |  * At the end, server0 is still a leader for term [1], it has 4 log entries 
1978 |  * but a commit_index of [2]. Server1 is the leader of term [3] and server2 is
1979 |  * a follower of server1. *)
1980 | 
1981 | let add_log_to_outdated_leader {server0; server1; server2} now = 
1982 |   let new_log_result =
1983 |     let data = Bytes.of_string "NeverCommitted" in
1984 |     Protocol.handle_add_log_entries server0 [(data, "NC")] now
1985 |   in
1986 | 
1987 |   let server0, msgs =
1988 |     let open Protocol in
1989 |     match new_log_result with
1990 |     | Appended result -> 
1991 |         let {
1992 |           state; 
1993 |           messages_to_send; 
1994 |           committed_logs; 
1995 |           added_logs;deleted_logs; _} = result in 
1996 |         assert([] = committed_logs); 
1997 |         assert(deleted_logs = []);
1998 |         assert(1 = List.length added_logs); 
1999 |         begin match added_logs with
2000 |         | [{id="NC"; index = 4; term = 1; _}] -> () 
2001 |         | _ -> assert(false)
2002 |         end; 
2003 |         (state, messages_to_send)
2004 |       (* server0 is the [Leader] and is therefore expected to
2005 |        * handle the new log entry.  *)
2006 | 
2007 |     | Delay | Forward_to_leader _ -> assert(false)
2008 |   in
2009 | 
2010 |   assert(2 = List.length msgs);
2011 |   assert(4 = recent_log_length server0); 
2012 |   assert(2 = server0.commit_index); 
2013 |   
2014 |   List.iter (fun (msg, _) ->
2015 |     match msg with
2016 |     | Append_entries_request r -> (
2017 |       assert(r.leader_term = 1);
2018 |       assert(r.leader_id = 0);
2019 |       assert(r.prev_log_index = 2);
2020 |       assert(r.prev_log_term = 1);
2021 |       assert(2 = List.length r.log_entries);
2022 |         (* * Contains the log entry to be synchronized.  *)
2023 |       begin match r.log_entries with
2024 |       | {index = 3; term = 1; _} :: {index = 4; id = "NC";_} :: [] -> ()
2025 |       | _ -> assert(false)
2026 |       end;
2027 |       assert(r.leader_commit = 2);
2028 |     )
2029 |     | _ -> assert(false)
2030 |   ) msgs;
2031 | 
2032 |   ({server0; server1; server2}, now)
2033 | 
2034 | (* In this part of the test server1 the leader in term [3] will send 
2035 |  * heartbeat messages to both server0 and server2. Server0 is no longer
2036 |  * disconnected and receives the message, the synchronization sequence 
2037 |  * between the latest leader (server1) and the previous one (server0) is 
2038 |  * deleting non commited messages in server0. 
2039 |  *
2040 |  * At the end server1 is the leader and has 5 log entries and a commit 
2041 |  * index of 5. Both server0 and server2 have fully replicated the 
2042 |  * 5 log entries and their commit index is 5 as well*)
2043 | let leader_heartbeat_3 {server0; server1; server2} now = 
2044 |   let now = now +. default_configuration.hearbeat_timeout in
2045 |   
2046 |   let {state = server1; messages_to_send = msgs; added_logs; _ } =
2047 |     Protocol.handle_heartbeat_timeout server1 now 
2048 |   in
2049 | 
2050 |   assert(added_logs = []);
2051 |   assert(2 = List.length msgs);
2052 | 
2053 |   begin match msg_for_server msgs 0 with
2054 |   | Append_entries_request r -> (
2055 |     assert(r.leader_term = 3);
2056 |     assert(r.leader_id = 1);
2057 |     assert(r.prev_log_term = 1);
2058 |     assert(2 = List.length r.log_entries); 
2059 |       (* Because no msg was received from server0, server1 knowledge of 
2060 |        * server0 still believes its prev_log_index to be 3... the same as 
2061 |        * when it was elected a leader. Therefore for the message contains
2062 |        * the 2 latest entries. *)
2063 |     assert(r.prev_log_index = 3);
2064 |     assert(r.leader_commit = 5);
2065 |   )
2066 |   | _ -> assert(false)
2067 |   end;
2068 |   
2069 |   begin match msg_for_server msgs 2 with
2070 |   | Append_entries_request r -> (
2071 |     assert(r.leader_term = 3);
2072 |     assert(r.leader_id = 1);
2073 |     assert(r.prev_log_term = 3);
2074 |     assert(0 = List.length r.log_entries); 
2075 |     assert(r.prev_log_index = 5);
2076 |       (* Server2 has replicated both log_entries 4 & 5 *)
2077 |     assert(r.leader_commit = 5);
2078 |   )
2079 |   | _ -> assert(false)
2080 |   end;
2081 | 
2082 |   (* 
2083 |    * Send Append_entries_request Server1 -> Server0 
2084 |    * ---------------------------------------------------------------------
2085 |    *)
2086 | 
2087 |   let res = 
2088 |     let msg = msg_for_server msgs 0 in
2089 |     Protocol.handle_message server0 msg now
2090 |   in
2091 | 
2092 |   let {
2093 |     state = server0; 
2094 |     messages_to_send = server0_response; 
2095 |     leader_change;
2096 |     committed_logs; 
2097 |     added_logs;
2098 |     deleted_logs;
2099 |   } = res in
2100 | 
2101 |   begin match deleted_logs with
2102 |   | {index = 4; term = 1; id = "NC"; _} :: [] -> () 
2103 |   | _ -> assert(false)
2104 |   end;
2105 |     (* The non commited log added by server0 in term [1] is deleted since
2106 |      * it was never replicated anywher and server1 is now the leader
2107 |      * in a later term *)
2108 | 
2109 |   assert(is_follower server0); 
2110 |   assert(3 = server0.current_term); 
2111 |   assert(5 = recent_log_length server0);
2112 |   begin match committed_logs with
2113 |   | {index = 3; term = 1; _}::{index = 4; term = 3; _}::
2114 |     {index = 5; term = 3; _}::[] -> ()
2115 |   | _ -> assert(false)
2116 |   end;
2117 |   assert(5 = server0.commit_index);
2118 |   assert(2 = List.length added_logs); 
2119 | 
2120 |   begin match msg_for_server server0_response 1 with 
2121 |   | Append_entries_response r ->  
2122 |     assert(r.receiver_id = 0);
2123 |     assert(r.receiver_term = 3);
2124 |     assert(r.result = Success 5); 
2125 |   | _ -> assert(false); 
2126 |   end; 
2127 |   assert(Some (New_leader 1) = leader_change);
2128 | 
2129 |   (* 
2130 |    * Send Append_entries_request Server1 -> Server2 
2131 |    * ----------------------------------------------------------------------
2132 |    *)
2133 |   
2134 |   let res = 
2135 |     let msg = msg_for_server msgs 2 in
2136 |     Protocol.handle_message server2 msg now
2137 |   in
2138 | 
2139 |   let {
2140 |     state = server2; 
2141 |     messages_to_send = server2_response; 
2142 |     leader_change;
2143 |     committed_logs; 
2144 |     added_logs;
2145 |     deleted_logs;
2146 |   } = res in
2147 | 
2148 |   assert(deleted_logs = []);
2149 |   assert(is_follower server2); 
2150 |   assert(3 = server2.current_term); 
2151 |   assert(5 = recent_log_length server2);
2152 |   begin match committed_logs with
2153 |   | {index = 4; term = 3; _}::{index = 5; term = 3; _}::[] -> ()
2154 |   | _ -> assert(false)
2155 |   end;
2156 |   assert(5 = server2.commit_index);
2157 |   assert(0 = List.length added_logs); 
2158 | 
2159 |   begin match msg_for_server server2_response 1 with 
2160 |   | Append_entries_response r ->  
2161 |     assert(r.receiver_id = 2);
2162 |     assert(r.receiver_term = 3);
2163 |     assert(r.result = Success 5); 
2164 |   | _ -> assert(false); 
2165 |   end; 
2166 |   assert(None= leader_change);
2167 | 
2168 |   let now = now +. 0.001 in 
2169 |   
2170 |   (* 
2171 |    * Spend Append_entries_response Server0 -> Server1 
2172 |    * -------------------------------------------------------------------
2173 |    *) 
2174 | 
2175 |   let res = 
2176 |     let msg = msg_for_server server0_response 1 in
2177 |     Protocol.handle_message server1 msg now
2178 |   in
2179 |   let {
2180 |     state = server1; 
2181 |     messages_to_send = msgs; 
2182 |     leader_change;
2183 |     committed_logs; 
2184 |     added_logs;
2185 |     deleted_logs;
2186 |   } = res in
2187 |   assert(deleted_logs = []);
2188 |   assert([] = msgs); 
2189 |   assert([] = committed_logs); 
2190 |   assert([] = added_logs); 
2191 |   assert(None = leader_change);
2192 | 
2193 |   (* 
2194 |    * Spend Append_entries_response Server2 -> Server1 
2195 |    * -------------------------------------------------------------------
2196 |    *) 
2197 |   
2198 |   let res = 
2199 |     let msg = msg_for_server server2_response 1 in
2200 |     Protocol.handle_message server1 msg now
2201 |   in
2202 |   let {
2203 |     state = server1; 
2204 |     messages_to_send = msgs; 
2205 |     leader_change;
2206 |     committed_logs; 
2207 |     added_logs;
2208 |     deleted_logs;
2209 |   } = res in
2210 |   assert(deleted_logs = []);
2211 |   assert([] = msgs); 
2212 |   assert([] = committed_logs); 
2213 |   assert([] = added_logs); 
2214 |   assert(None = leader_change);
2215 | 
2216 |   ({server0; server1; server2}, now)
2217 | 
2218 | (* Prior to this test, the log size was 5, but in this test we're adding
2219 |  * 3 new entries which means the resulting size would be 8. 8 is greated
2220 |  * than the configured max log size upper boud and so the log size 
2221 |  * limitation enforcement should kick in. Log entries [1] [2] [3] should 
2222 |  * be removed from the Raft_log.recent_entries to make sure 
2223 |  * the recent entries size is set to lower bound (ie [5]). 
2224 |  *
2225 |  * Intentionally at the end of this test we returned the server state
2226 |  * unmodified, this test is solely checking the log size enforcement 
2227 |  * and has no side effect *)
2228 | let enforce_log_size ({server0; server1; _} as servers) now = 
2229 |   let new_log_result =
2230 |     let datas = [
2231 |       (Bytes.of_string "Message06", "06");
2232 |       (Bytes.of_string "Message07", "07");
2233 |       (Bytes.of_string "Message08", "08");
2234 |     ] in
2235 |     Protocol.handle_add_log_entries server1 datas now
2236 |   in
2237 |   
2238 |   let min_max_index server = 
2239 |     (
2240 |       fst @@ IntMap.min_binding server.log.recent_entries, 
2241 |       fst @@ IntMap.max_binding server.log.recent_entries
2242 |     ) 
2243 |   in
2244 | 
2245 |   let min_index, max_index = min_max_index server1 in  
2246 |   assert(1 = min_index); 
2247 |   assert(5 = max_index); 
2248 | 
2249 |   let server1, msgs =
2250 |     let open Protocol in
2251 |     match new_log_result with
2252 |     | Appended {state; messages_to_send; added_logs; _ } -> 
2253 |       begin 
2254 |         assert(3 = List.length added_logs); 
2255 |         (state, messages_to_send)
2256 |       end
2257 |     | Delay | Forward_to_leader _ -> assert(false)
2258 |   in
2259 | 
2260 |   let min_index, max_index = min_max_index server1 in  
2261 |   assert(4 = min_index); 
2262 |   assert(8 = max_index); 
2263 | 
2264 |   (* 
2265 |    * Send Append_entries_request Server1 -> Server0 
2266 |    * ---------------------------------------------- 
2267 |    *)
2268 | 
2269 |   let res = 
2270 |     let msg = msg_for_server msgs 0 in
2271 |     Protocol.handle_message server0 msg now
2272 |   in
2273 | 
2274 |   let {
2275 |     state = server0; _
2276 |   } = res in
2277 | 
2278 |   let min_index, max_index = min_max_index server0 in 
2279 |   assert(4 = min_index); 
2280 |   assert(8 = max_index); 
2281 | 
2282 |   (servers, now)
2283 | 
2284 | let ()  =
2285 | 
2286 |   let servers = init () in 
2287 |   let servers, now = election_1 servers now in 
2288 |   let servers, now = failed_election_1 servers now in 
2289 |   let servers, now = leader_heartbeat_1 servers now in 
2290 |   let servers, now = add_first_log servers now in 
2291 |   let servers, now = add_second_log servers now in 
2292 |   let servers, now = leader_heartbeat_2 servers now in
2293 |   let servers, now = add_third_log servers now in 
2294 |   let servers, now = failed_election_2 servers now in  
2295 |   let servers, now = election_2 servers now in 
2296 |   let servers, now = add_4_and_5_logs servers now in 
2297 |   let servers, now = add_log_to_outdated_leader servers now in 
2298 |   let servers, now = leader_heartbeat_3 servers now in 
2299 |   let servers, now = enforce_log_size servers now in 
2300 |   let _ = servers and _ = now in 
2301 | 
2302 |   ()
2303 | 


--------------------------------------------------------------------------------