├── .gitignore
├── Cargo.toml
├── .github
    └── workflows
    │   ├── rust.yml
    │   └── greetings.yml
├── little_raft
    ├── Cargo.toml
    ├── src
    │   ├── timer.rs
    │   ├── lib.rs
    │   ├── cluster.rs
    │   ├── message.rs
    │   ├── state_machine.rs
    │   └── replica.rs
    └── tests
    │   ├── raft_stable.rs
    │   └── raft_unstable.rs
├── LICENSE
├── README.md
└── Cargo.lock


/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [workspace]
2 | members = [
3 |     "little_raft"
4 | ]


--------------------------------------------------------------------------------
/.github/workflows/rust.yml:
--------------------------------------------------------------------------------
 1 | name: Rust
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 | env:
10 |   CARGO_TERM_COLOR: always
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 | 
17 |     steps:
18 |     - uses: actions/checkout@v2
19 |     - name: Build
20 |       run: cargo build --verbose
21 |     - name: Run tests
22 |       run: cargo test --verbose
23 | 
24 | 


--------------------------------------------------------------------------------
/.github/workflows/greetings.yml:
--------------------------------------------------------------------------------
 1 | name: Greetings
 2 | 
 3 | on: [pull_request, issues]
 4 | 
 5 | jobs:
 6 |   greeting:
 7 |     runs-on: ubuntu-latest
 8 |     permissions:
 9 |       issues: write
10 |       pull-requests: write
11 |     steps:
12 |     - uses: actions/first-interaction@v1
13 |       with:
14 |         repo-token: ${{ secrets.GITHUB_TOKEN }}
15 |         issue-message: 'Welcome to Little Raft! Thanks for contributing.'
16 |         pr-message: 'Congratulations on opening your first PR to Little Raft! Welcome.'
17 | 


--------------------------------------------------------------------------------
/little_raft/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | description = "The lightest distributed consensus library. Run your own replicated state machine!"
 3 | name = "little_raft"
 4 | version = "0.2.0"
 5 | authors = ["Ilya Andreev <iandre3@illinois.edu>"]
 6 | edition = "2018"
 7 | license = "MIT"
 8 | homepage = "https://github.com/andreev-io/little-raft"
 9 | repository = "https://github.com/andreev-io/little-raft"
10 | readme = "../README.md"
11 | keywords = ["distributed-systems", "raft", "consensus"]
12 | categories = ["concurrency", "database", "database-implementations"]
13 | 
14 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
15 | 
16 | [dependencies]
17 | rand = "0.8.3"
18 | crossbeam-channel = "0.5.1"
19 | crossbeam = "0.8.0"
20 | timer = "0.1.3"
21 | time = "0.1.39"
22 | bytes = "0.4.7"
23 | 


--------------------------------------------------------------------------------
/little_raft/src/timer.rs:
--------------------------------------------------------------------------------
 1 | use crossbeam::channel::{bounded, Receiver};
 2 | use std::{thread, time::Duration};
 3 | 
 4 | pub struct Timer {
 5 |     rx: Receiver<()>,
 6 |     timeout: Duration,
 7 | }
 8 | 
 9 | // Timer fires after the specified duration. The timer can be renewed.
10 | impl Timer {
11 |     pub fn new(timeout: Duration) -> Timer {
12 |         Timer {
13 |             timeout,
14 |             rx: Timer::get_timeout_channel(timeout),
15 |         }
16 |     }
17 | 
18 |     pub fn renew(&mut self) {
19 |         self.rx = Timer::get_timeout_channel(self.timeout);
20 |     }
21 | 
22 |     pub fn get_rx(&self) -> &Receiver<()> {
23 |         &self.rx
24 |     }
25 | 
26 |     fn get_timeout_channel(timeout: Duration) -> Receiver<()> {
27 |         let (tx, rx) = bounded(1);
28 |         thread::spawn(move || {
29 |             thread::sleep(timeout);
30 |             let _ = tx.send(());
31 |         });
32 | 
33 |         rx
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/little_raft/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! This crate is a small but full-featured implementation of the Raft
 2 | //! distributed consensus protocol. By using this library, you can run a
 3 | //! replicated state machine in your own cluster. The cluster could be comprised
 4 | //! of dozens of physical servers in different parts of the world or of two
 5 | //! threads on a single CPU.
 6 | //!
 7 | //! The goal of this library is to provide a generic implementation of the
 8 | //! algorithm that the library user can leverage in their own way. It is
 9 | //! entirely up to the user how to configure the Raft cluster, how to ensure
10 | //! communication between the nodes, how to process client's messages, how to do
11 | //! service discovery, and what kind of state machine to replicate.
12 | //!
13 | //! The implementation is kept as simple as possible on purpose, with the entire
14 | //! library code base fitting in under 1,000 lines of code.
15 | pub mod cluster;
16 | pub mod message;
17 | pub mod replica;
18 | pub mod state_machine;
19 | mod timer;
20 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Ilia (Ilya) Andreev
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/little_raft/src/cluster.rs:
--------------------------------------------------------------------------------
 1 | use crate::{message::Message, replica::ReplicaID, state_machine::{StateMachineTransition}};
 2 | 
 3 | /// Cluster is used for the local Raft Replica to communicate with the rest of
 4 | /// the Raft cluster. It is up to the user how to abstract that communication.
 5 | /// The Cluster trait also contains hooks which the Replica will use to inform
 6 | /// the crate user of state changes.
 7 | pub trait Cluster<T, D>
 8 | where
 9 |     T: StateMachineTransition,
10 |     D: Clone,
11 | {
12 |     /// This function is used to deliver messages to target Replicas. The
13 |     /// Replica will provide the to_id of the other Replica it's trying to send
14 |     /// its message to and provide the message itself. The send_message
15 |     /// implementation must not block but is allowed to silently fail -- Raft
16 |     /// exists to achieve consensus in spite of failures, after all.
17 |     fn send_message(&mut self, to_id: usize, message: Message<T, D>);
18 | 
19 |     /// This function is used by the Replica to receive pending messages from
20 |     /// the cluster. The receive_messages implementation must not block and must
21 |     /// not return the same message more than once. Note that receive_messages
22 |     /// is only called when the Replica is notified via the recv_msg channel.
23 |     fn receive_messages(&mut self) -> Vec<Message<T, D>>;
24 | 
25 |     /// By returning true from halt you can signal to the Replica that it should
26 |     /// stop running.
27 |     fn halt(&self) -> bool;
28 | 
29 |     /// This function is a hook that the Replica uses to inform the user of the
30 |     /// Leader change. The leader_id is an Option<usize> because the Leader
31 |     /// might be unknown for a period of time. Remember that only Leaders can
32 |     /// process transitions submitted by the Raft users, so the leader_id can be
33 |     /// used to redirect the requests from non-Leader nodes to the Leader node.
34 |     fn register_leader(&mut self, leader_id: Option<ReplicaID>);
35 | }
36 | 


--------------------------------------------------------------------------------
/little_raft/src/message.rs:
--------------------------------------------------------------------------------
 1 | use crate::replica::ReplicaID;
 2 | use crate::state_machine::{StateMachineTransition};
 3 | 
 4 | /// LogEntry is a state machine transition along with some metadata needed for
 5 | /// Raft.
 6 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd)]
 7 | pub struct LogEntry<T>
 8 | where
 9 |     T: StateMachineTransition,
10 | {
11 |     pub transition: T,
12 |     pub index: usize,
13 |     pub term: usize,
14 | }
15 | 
16 | /// Message describes messages that the replicas pass between each other to
17 | /// achieve consensus on the distributed state machine.
18 | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd)]
19 | pub enum Message<T, D>
20 | where
21 |     T: StateMachineTransition,
22 |     D: Clone,
23 | {
24 |     /// AppendEntryRequest is used by the Leader to send out logs for other
25 |     /// replicas to append to their log. It also has information on what logs
26 |     /// are ready to be applied to the state machine. AppendEntryRequest is also
27 |     /// used as a heartbeat message by the Leader even when no new logs need to
28 |     /// be processed.
29 |     AppendEntryRequest {
30 |         from_id: ReplicaID,
31 |         term: usize,
32 |         prev_log_index: usize,
33 |         prev_log_term: usize,
34 |         entries: Vec<LogEntry<T>>,
35 |         commit_index: usize,
36 |     },
37 | 
38 |     /// AppendEntryResponse is used by replicas to respond to AppendEntryRequest
39 |     /// messages.
40 |     AppendEntryResponse {
41 |         from_id: ReplicaID,
42 |         term: usize,
43 |         success: bool,
44 |         last_index: usize,
45 |         mismatch_index: Option<usize>,
46 |     },
47 | 
48 |     /// VoteRequest is used by Candidates to solicit votes for themselves.
49 |     VoteRequest {
50 |         from_id: ReplicaID,
51 |         term: usize,
52 |         last_log_index: usize,
53 |         last_log_term: usize,
54 |     },
55 | 
56 |     /// VoteResponse is used by replicas to respond to VoteRequest messages.
57 |     VoteResponse {
58 |         from_id: ReplicaID,
59 |         term: usize,
60 |         vote_granted: bool,
61 |     },
62 | 
63 |     InstallSnapshotRequest {
64 |         from_id: ReplicaID,
65 |         term: usize,
66 |         last_included_index: usize,
67 |         last_included_term: usize,
68 |         offset: usize,
69 |         data: D,
70 |         done: bool,
71 |     },
72 | 
73 |     InstallSnapshotResponse {
74 |         from_id: ReplicaID,
75 |         term: usize,
76 |         last_included_index: usize,
77 |     },
78 | }
79 | 


--------------------------------------------------------------------------------
/little_raft/src/state_machine.rs:
--------------------------------------------------------------------------------
  1 | use std::fmt::Debug;
  2 | 
  3 | /// TransitionState describes the state of a particular transition.
  4 | #[derive(Clone, Debug, PartialEq)]
  5 | pub enum TransitionState {
  6 |     /// Queued transitions have been received from the user but have not been
  7 |     /// processed yet. They are in the queue.
  8 |     Queued,
  9 | 
 10 |     /// Committed transitions have not yet been applied to the state machine but
 11 |     /// have already been replicated across the cluster such that they are
 12 |     /// guaranteed to be present in the log of all future cluster leaders.
 13 |     Committed,
 14 | 
 15 |     /// Applied transitions have been replicated across the cluster and have
 16 |     /// been applied to the local state machine.
 17 |     Applied,
 18 | 
 19 |     /// Abandoned transitions have been ignored by the replica.
 20 |     Abandoned(TransitionAbandonedReason),
 21 | }
 22 | 
 23 | /// TransitionAbandonedReason explains why a particular transition has been
 24 | /// abandoned by the replica.
 25 | #[derive(Clone, Debug, PartialEq)]
 26 | pub enum TransitionAbandonedReason {
 27 |     /// NotLeader transitions have been abandoned because the replica is not the
 28 |     /// cluster leader.
 29 |     NotLeader,
 30 | 
 31 |     // ConflictWithLeader uncommitted transitions are abandoned because they
 32 |     // don't match the consensus achieved by the majority of the cluster.
 33 |     ConflictWithLeader,
 34 | }
 35 | 
 36 | /// StateMachineTransition describes a user-defined transition that can be
 37 | /// applied to the state machine replicated by Raft.
 38 | pub trait StateMachineTransition: Clone + Debug {
 39 |     /// TransitionID is used to identify the transition.
 40 |     type TransitionID: Eq;
 41 | 
 42 |     /// get_id is used by the Replica to identify the transition to be able to
 43 |     /// call register_transition_state.
 44 |     fn get_id(&self) -> Self::TransitionID;
 45 | }
 46 | 
 47 | /// Snapshot is an object used for log compaction. The user can use snapshots to
 48 | /// represent StateMachine state at a particular point. This will let the
 49 | /// Replica start from a saved state or perform log compaction before the log
 50 | /// sequence starts taking up too much memory.
 51 | #[derive(Clone)]
 52 | pub struct Snapshot<D> where D: Clone {
 53 |     pub last_included_index: usize,
 54 |     pub last_included_term: usize,
 55 |     pub data: D,
 56 | }
 57 | 
 58 | /// StateMachine describes a user-defined state machine that is replicated
 59 | /// across the cluster. Raft can replicate whatever distributed state machine
 60 | /// can implement this trait.
 61 | pub trait StateMachine<T, D>
 62 | where
 63 |     T: StateMachineTransition,
 64 |     D: Clone,
 65 | {
 66 |     /// This is a hook that the local Replica will call each time the state of a
 67 |     /// particular transition changes. It is up to the user what to do with that
 68 |     /// information.
 69 |     fn register_transition_state(&mut self, transition_id: T::TransitionID, state: TransitionState);
 70 | 
 71 |     /// When a particular transition is ready to be applied, the Replica will
 72 |     /// call apply_transition to apply said transition to the local state
 73 |     /// machine.
 74 |     fn apply_transition(&mut self, transition: T);
 75 | 
 76 |     /// This function is used to receive transitions from the user that need to
 77 |     /// be applied to the replicated state machine. Note that only the Leader
 78 |     /// Replica processes transitions and only when notified via the
 79 |     /// recv_transition channel. All other Replicas poll for transitions and
 80 |     /// discard them. get_pending_transitions must not return the same
 81 |     /// transition twice.
 82 |     fn get_pending_transitions(&mut self) -> Vec<T>;
 83 | 
 84 |     /// Replica calls get_snapshot once upon startup. If the Replica and the
 85 |     /// associated StateMachine should start from a certain checkpoint
 86 |     /// previously saved with a call to create_snapshot or set_snapshot, this
 87 |     /// function should return Some(snapshot). Otherwise it can return None. If
 88 |     /// None is returned, the Replica can still recover its state from other
 89 |     /// nodes in the cluster, but it might take longer to do so than if it
 90 |     /// recovered from a previously snapshotted value.
 91 |     ///
 92 |     /// Little Raft will take care of loading the Snapshot into the Replica and
 93 |     /// achieving consensus provided snapshot.last_included_index and
 94 |     /// snapshot.last_included_term are truthful. However, it is up to the user
 95 |     /// to put the StateMachine into the right state before returning from
 96 |     /// load_snapshot().
 97 |     fn get_snapshot(&mut self) -> Option<Snapshot<D>>;
 98 | 
 99 |     /// create_snapshot is periodically called by the Replica if log compaction
100 |     /// is enabled by setting snapshot_delta > 0. The implementation MUST create
101 |     /// a snapshot object with truthful values of index and term.
102 |     ///
103 |     /// If the Replica should use this snapshot as a checkpoint upon restart,
104 |     /// the implementation MUST save the created snapshot object to permanent
105 |     /// storage and return it with get_snapshot after restart.
106 |     fn create_snapshot(
107 |         &mut self,
108 |         last_included_index: usize,
109 |         last_included_term: usize,
110 |     ) -> Snapshot<D>;
111 | 
112 |     /// When a Replica receives a snapshot from another Replica, set_snapshot is
113 |     /// called. The StateMachine MUST then load its state from the provided
114 |     /// snapshot and potentially save said snapshot to persistent storage, same
115 |     /// way it is done in create_snapshot.
116 |     fn set_snapshot(&mut self, snapshot: Snapshot<D>);
117 | }
118 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Little Raft
  2 | The lightest distributed consensus library. Run your own replicated state machine! :heart:
  3 | 
  4 | ## Installing
  5 | Simply import the crate. In your `Cargo.toml`, add
  6 | ```
  7 | [dependencies]
  8 | little_raft = "0.2.0"
  9 | ```
 10 | 
 11 | ## Using
 12 | To start running Little Raft, you only need to do three things.
 13 | 1. Implement the StateMachine that you want your cluster to maintain. Little Raft will take care of replicating this machine across the cluster and achieving consensus on its state.
 14 | ```rust
 15 | /// StateMachine describes a user-defined state machine that is replicated
 16 | /// across the cluster. Raft can Replica whatever distributed state machine can
 17 | /// implement this trait.
 18 | pub trait StateMachine<T>
 19 | where
 20 |     T: StateMachineTransition,
 21 | {
 22 |     /// This is a hook that the local Replica will call each time the state of a
 23 |     /// particular transition changes. It is up to the user what to do with that
 24 |     /// information.
 25 |     fn register_transition_state(&mut self, transition_id: T::TransitionID, state: TransitionState);
 26 | 
 27 |     /// When a particular transition is ready to be applied, the Replica will
 28 |     /// call apply_transition to apply said transition to the local state
 29 |     /// machine.
 30 |     fn apply_transition(&mut self, transition: T);
 31 | 
 32 |     /// This function is used to receive transitions from the user that need to
 33 |     /// be applied to the replicated state machine. Note that while all Replicas
 34 |     /// poll get_pending_transitions periodically, only the Leader Replica
 35 |     /// actually processes them. All other Replicas discard pending transitions.
 36 |     /// get_pending_transitions must not return the same transition twice.
 37 |     fn get_pending_transitions(&mut self) -> Vec<T>;
 38 | }
 39 | ```
 40 | 
 41 | 2. Implement the Cluster abstraction so that the local Replica can communicate with other nodes.
 42 | ```rust
 43 | /// Cluster is used for the local Raft Replica to communicate with the rest of
 44 | /// the Raft cluster. It is up to the user how to abstract that communication.
 45 | /// The Cluster trait also contains hooks which the Replica will use to inform
 46 | /// the crate user of state changes.
 47 | pub trait Cluster<T>
 48 | where
 49 |     T: StateMachineTransition,
 50 | {
 51 |     /// This function is used to deliver messages to target Replicas. The
 52 |     /// Replica will provide the to_id of the other Replica it's trying to send
 53 |     /// its message to and provide the message itself. The send_message
 54 |     /// implementation must not block but is allowed silently fail -- Raft
 55 |     /// exists to achieve consensus in spite of failures, after all.
 56 |     fn send_message(&mut self, to_id: usize, message: Message<T>);
 57 | 
 58 |     /// This function is used by the Replica to receive pending messages from
 59 |     /// the cluster. The receive_messages implementation must not block and must
 60 |     /// not return the same message more than once.
 61 |     fn receive_messages(&mut self) -> Vec<Message<T>>;
 62 | 
 63 |     /// By returning true from halt you can signal to the Replica that it should
 64 |     /// stop running.
 65 |     fn halt(&self) -> bool;
 66 | 
 67 |     /// This function is a hook that the Replica uses to inform the user of the
 68 |     /// Leader change. The leader_id is an Option<usize> because the Leader
 69 |     /// might be unknown for a period of time. Remember that only Leaders can
 70 |     /// process transitions submitted by the Raft users, so the leader_id can be
 71 |     /// used to redirect the requests from non-Leader nodes to the Leader node.
 72 |     fn register_leader(&mut self, leader_id: Option<ReplicaID>);
 73 | }
 74 | ```
 75 | 3. Start your replica!
 76 | ```rust
 77 |     /// Create a new Replica.
 78 |     ///
 79 |     /// id is the ID of this Replica within the cluster.
 80 |     ///
 81 |     /// peer_ids is a vector of IDs of all other Replicas in the cluster.
 82 |     ///
 83 |     /// cluster represents the abstraction the Replica uses to talk with other
 84 |     /// Replicas.
 85 |     ///
 86 |     /// state_machine is the state machine that Raft maintains.
 87 |     ///
 88 |     /// noop_transition is a transition that can be applied to the state machine
 89 |     /// multiple times with no effect.
 90 |     ///
 91 |     /// heartbeat_timeout defines how often the Leader Replica sends out
 92 |     /// heartbeat messages.
 93 |     ///
 94 |     /// election_timeout_range defines the election timeout interval. If the
 95 |     /// Replica gets no messages from the Leader before the timeout, it
 96 |     /// initiates an election.
 97 |     ///
 98 |     /// In practice, pick election_timeout_range to be 2-3x the value of
 99 |     /// heartbeat_timeout, depending on your particular use-case network latency
100 |     /// and responsiveness needs. An election_timeout_range / heartbeat_timeout
101 |     /// ratio that's too low might cause unwarranted re-elections in the
102 |     /// cluster.
103 |     pub fn new(
104 |         id: ReplicaID,
105 |         peer_ids: Vec<ReplicaID>,
106 |         cluster: Arc<Mutex<C>>,
107 |         state_machine: Arc<Mutex<S>>,
108 |         noop_transition: T,
109 |         heartbeat_timeout: Duration,
110 |         election_timeout_range: (Duration, Duration),
111 |     ) -> Replica<S, T, C>;
112 | 
113 |     /// This function starts the Replica and blocks forever.
114 |     ///
115 |     /// recv_msg is a channel on which the user must notify the Replica whenever
116 |     /// new messages from the Cluster are available. The Replica will not poll
117 |     /// for messages from the Cluster unless notified through recv_msg.
118 |     ///
119 |     /// recv_transition is a channel on which the user must notify the Replica
120 |     /// whenever new transitions to be processed for the StateMachine are
121 |     /// available. The Replica will not poll for pending transitions for the
122 |     /// StateMachine unless notified through recv_transition.
123 |     pub fn start(&mut self, recv_msg: Receiver<()>, recv_transition: Receiver<()>);
124 | ```
125 | 
126 | 
127 | With that, you're good to go. We are working on examples, but for now you can look at the `little_raft/tests` directory and at the documentation at [https://docs.rs/little_raft/0.1.3/little_raft/](https://docs.rs/little_raft/0.1.3/little_raft/). We're working on adding more tests.
128 | 
129 | 
130 | ## Testing
131 | Run `cargo test`.
132 | 
133 | ## Contributing
134 | Contributions are very welcome! Do remember that one of the goals of this library is to be as small and simple as possible. Let's keep the code in `little_raft/src` **under 1,000 lines**. PRs breaking this rule will be declined.
135 | ```bash
136 | > cloc little_raft/src
137 |        6 text files.
138 |        6 unique files.                              
139 |        0 files ignored.
140 | 
141 | github.com/AlDanial/cloc v 1.90  T=0.02 s (369.2 files/s, 56185.0 lines/s)
142 | -------------------------------------------------------------------------------
143 | Language                     files          blank        comment           code
144 | -------------------------------------------------------------------------------
145 | Rust                             6             82            199            632
146 | -------------------------------------------------------------------------------
147 | SUM:                             6             82            199            632
148 | -------------------------------------------------------------------------------
149 | ```
150 | 
151 | You are welcome to pick up and work on any of the issues open for this project. Or you can submit new issues if anything comes up from your experience using this library.


--------------------------------------------------------------------------------
/Cargo.lock:
--------------------------------------------------------------------------------
  1 | # This file is automatically @generated by Cargo.
  2 | # It is not intended for manual editing.
  3 | version = 3
  4 | 
  5 | [[package]]
  6 | name = "autocfg"
  7 | version = "1.0.1"
  8 | source = "registry+https://github.com/rust-lang/crates.io-index"
  9 | checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
 10 | 
 11 | [[package]]
 12 | name = "byteorder"
 13 | version = "1.4.3"
 14 | source = "registry+https://github.com/rust-lang/crates.io-index"
 15 | checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
 16 | 
 17 | [[package]]
 18 | name = "bytes"
 19 | version = "0.4.12"
 20 | source = "registry+https://github.com/rust-lang/crates.io-index"
 21 | checksum = "206fdffcfa2df7cbe15601ef46c813fce0965eb3286db6b56c583b814b51c81c"
 22 | dependencies = [
 23 |  "byteorder",
 24 |  "iovec",
 25 | ]
 26 | 
 27 | [[package]]
 28 | name = "cfg-if"
 29 | version = "1.0.0"
 30 | source = "registry+https://github.com/rust-lang/crates.io-index"
 31 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 32 | 
 33 | [[package]]
 34 | name = "chrono"
 35 | version = "0.2.25"
 36 | source = "registry+https://github.com/rust-lang/crates.io-index"
 37 | checksum = "9213f7cd7c27e95c2b57c49f0e69b1ea65b27138da84a170133fd21b07659c00"
 38 | dependencies = [
 39 |  "num",
 40 |  "time",
 41 | ]
 42 | 
 43 | [[package]]
 44 | name = "crossbeam"
 45 | version = "0.8.0"
 46 | source = "registry+https://github.com/rust-lang/crates.io-index"
 47 | checksum = "fd01a6eb3daaafa260f6fc94c3a6c36390abc2080e38e3e34ced87393fb77d80"
 48 | dependencies = [
 49 |  "cfg-if",
 50 |  "crossbeam-channel",
 51 |  "crossbeam-deque",
 52 |  "crossbeam-epoch",
 53 |  "crossbeam-queue",
 54 |  "crossbeam-utils",
 55 | ]
 56 | 
 57 | [[package]]
 58 | name = "crossbeam-channel"
 59 | version = "0.5.1"
 60 | source = "registry+https://github.com/rust-lang/crates.io-index"
 61 | checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4"
 62 | dependencies = [
 63 |  "cfg-if",
 64 |  "crossbeam-utils",
 65 | ]
 66 | 
 67 | [[package]]
 68 | name = "crossbeam-deque"
 69 | version = "0.8.0"
 70 | source = "registry+https://github.com/rust-lang/crates.io-index"
 71 | checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9"
 72 | dependencies = [
 73 |  "cfg-if",
 74 |  "crossbeam-epoch",
 75 |  "crossbeam-utils",
 76 | ]
 77 | 
 78 | [[package]]
 79 | name = "crossbeam-epoch"
 80 | version = "0.9.3"
 81 | source = "registry+https://github.com/rust-lang/crates.io-index"
 82 | checksum = "2584f639eb95fea8c798496315b297cf81b9b58b6d30ab066a75455333cf4b12"
 83 | dependencies = [
 84 |  "cfg-if",
 85 |  "crossbeam-utils",
 86 |  "lazy_static",
 87 |  "memoffset",
 88 |  "scopeguard",
 89 | ]
 90 | 
 91 | [[package]]
 92 | name = "crossbeam-queue"
 93 | version = "0.3.1"
 94 | source = "registry+https://github.com/rust-lang/crates.io-index"
 95 | checksum = "0f6cb3c7f5b8e51bc3ebb73a2327ad4abdbd119dc13223f14f961d2f38486756"
 96 | dependencies = [
 97 |  "cfg-if",
 98 |  "crossbeam-utils",
 99 | ]
100 | 
101 | [[package]]
102 | name = "crossbeam-utils"
103 | version = "0.8.3"
104 | source = "registry+https://github.com/rust-lang/crates.io-index"
105 | checksum = "e7e9d99fa91428effe99c5c6d4634cdeba32b8cf784fc428a2a687f61a952c49"
106 | dependencies = [
107 |  "autocfg",
108 |  "cfg-if",
109 |  "lazy_static",
110 | ]
111 | 
112 | [[package]]
113 | name = "getrandom"
114 | version = "0.2.2"
115 | source = "registry+https://github.com/rust-lang/crates.io-index"
116 | checksum = "c9495705279e7140bf035dde1f6e750c162df8b625267cd52cc44e0b156732c8"
117 | dependencies = [
118 |  "cfg-if",
119 |  "libc",
120 |  "wasi",
121 | ]
122 | 
123 | [[package]]
124 | name = "iovec"
125 | version = "0.1.4"
126 | source = "registry+https://github.com/rust-lang/crates.io-index"
127 | checksum = "b2b3ea6ff95e175473f8ffe6a7eb7c00d054240321b84c57051175fe3c1e075e"
128 | dependencies = [
129 |  "libc",
130 | ]
131 | 
132 | [[package]]
133 | name = "lazy_static"
134 | version = "1.4.0"
135 | source = "registry+https://github.com/rust-lang/crates.io-index"
136 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
137 | 
138 | [[package]]
139 | name = "libc"
140 | version = "0.2.93"
141 | source = "registry+https://github.com/rust-lang/crates.io-index"
142 | checksum = "9385f66bf6105b241aa65a61cb923ef20efc665cb9f9bb50ac2f0c4b7f378d41"
143 | 
144 | [[package]]
145 | name = "little_raft"
146 | version = "0.2.0"
147 | dependencies = [
148 |  "bytes",
149 |  "crossbeam",
150 |  "crossbeam-channel",
151 |  "rand",
152 |  "time",
153 |  "timer",
154 | ]
155 | 
156 | [[package]]
157 | name = "memoffset"
158 | version = "0.6.3"
159 | source = "registry+https://github.com/rust-lang/crates.io-index"
160 | checksum = "f83fb6581e8ed1f85fd45c116db8405483899489e38406156c25eb743554361d"
161 | dependencies = [
162 |  "autocfg",
163 | ]
164 | 
165 | [[package]]
166 | name = "num"
167 | version = "0.1.42"
168 | source = "registry+https://github.com/rust-lang/crates.io-index"
169 | checksum = "4703ad64153382334aa8db57c637364c322d3372e097840c72000dabdcf6156e"
170 | dependencies = [
171 |  "num-integer",
172 |  "num-iter",
173 |  "num-traits",
174 | ]
175 | 
176 | [[package]]
177 | name = "num-integer"
178 | version = "0.1.44"
179 | source = "registry+https://github.com/rust-lang/crates.io-index"
180 | checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db"
181 | dependencies = [
182 |  "autocfg",
183 |  "num-traits",
184 | ]
185 | 
186 | [[package]]
187 | name = "num-iter"
188 | version = "0.1.42"
189 | source = "registry+https://github.com/rust-lang/crates.io-index"
190 | checksum = "b2021c8337a54d21aca0d59a92577a029af9431cb59b909b03252b9c164fad59"
191 | dependencies = [
192 |  "autocfg",
193 |  "num-integer",
194 |  "num-traits",
195 | ]
196 | 
197 | [[package]]
198 | name = "num-traits"
199 | version = "0.2.14"
200 | source = "registry+https://github.com/rust-lang/crates.io-index"
201 | checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
202 | dependencies = [
203 |  "autocfg",
204 | ]
205 | 
206 | [[package]]
207 | name = "ppv-lite86"
208 | version = "0.2.10"
209 | source = "registry+https://github.com/rust-lang/crates.io-index"
210 | checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857"
211 | 
212 | [[package]]
213 | name = "rand"
214 | version = "0.8.3"
215 | source = "registry+https://github.com/rust-lang/crates.io-index"
216 | checksum = "0ef9e7e66b4468674bfcb0c81af8b7fa0bb154fa9f28eb840da5c447baeb8d7e"
217 | dependencies = [
218 |  "libc",
219 |  "rand_chacha",
220 |  "rand_core",
221 |  "rand_hc",
222 | ]
223 | 
224 | [[package]]
225 | name = "rand_chacha"
226 | version = "0.3.0"
227 | source = "registry+https://github.com/rust-lang/crates.io-index"
228 | checksum = "e12735cf05c9e10bf21534da50a147b924d555dc7a547c42e6bb2d5b6017ae0d"
229 | dependencies = [
230 |  "ppv-lite86",
231 |  "rand_core",
232 | ]
233 | 
234 | [[package]]
235 | name = "rand_core"
236 | version = "0.6.2"
237 | source = "registry+https://github.com/rust-lang/crates.io-index"
238 | checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7"
239 | dependencies = [
240 |  "getrandom",
241 | ]
242 | 
243 | [[package]]
244 | name = "rand_hc"
245 | version = "0.3.0"
246 | source = "registry+https://github.com/rust-lang/crates.io-index"
247 | checksum = "3190ef7066a446f2e7f42e239d161e905420ccab01eb967c9eb27d21b2322a73"
248 | dependencies = [
249 |  "rand_core",
250 | ]
251 | 
252 | [[package]]
253 | name = "scopeguard"
254 | version = "1.1.0"
255 | source = "registry+https://github.com/rust-lang/crates.io-index"
256 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
257 | 
258 | [[package]]
259 | name = "time"
260 | version = "0.1.43"
261 | source = "registry+https://github.com/rust-lang/crates.io-index"
262 | checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438"
263 | dependencies = [
264 |  "libc",
265 |  "winapi",
266 | ]
267 | 
268 | [[package]]
269 | name = "timer"
270 | version = "0.1.6"
271 | source = "registry+https://github.com/rust-lang/crates.io-index"
272 | checksum = "a9522a9ec40055e2f9e514e38d2415a496e81dbfc1ece15d98d2fe55c44946b3"
273 | dependencies = [
274 |  "chrono",
275 | ]
276 | 
277 | [[package]]
278 | name = "wasi"
279 | version = "0.10.2+wasi-snapshot-preview1"
280 | source = "registry+https://github.com/rust-lang/crates.io-index"
281 | checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
282 | 
283 | [[package]]
284 | name = "winapi"
285 | version = "0.3.9"
286 | source = "registry+https://github.com/rust-lang/crates.io-index"
287 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
288 | dependencies = [
289 |  "winapi-i686-pc-windows-gnu",
290 |  "winapi-x86_64-pc-windows-gnu",
291 | ]
292 | 
293 | [[package]]
294 | name = "winapi-i686-pc-windows-gnu"
295 | version = "0.4.0"
296 | source = "registry+https://github.com/rust-lang/crates.io-index"
297 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
298 | 
299 | [[package]]
300 | name = "winapi-x86_64-pc-windows-gnu"
301 | version = "0.4.0"
302 | source = "registry+https://github.com/rust-lang/crates.io-index"
303 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
304 | 


--------------------------------------------------------------------------------
/little_raft/tests/raft_stable.rs:
--------------------------------------------------------------------------------
  1 | use bytes::Bytes;
  2 | use crossbeam_channel as channel;
  3 | use crossbeam_channel::{unbounded, Receiver, Sender};
  4 | use little_raft::{
  5 |     cluster::Cluster,
  6 |     message::Message,
  7 |     replica::Replica,
  8 |     state_machine::{Snapshot, StateMachine, StateMachineTransition, TransitionState},
  9 | };
 10 | use std::convert::TryInto;
 11 | use std::sync::{Arc, Mutex};
 12 | 
 13 | use std::{collections::BTreeMap, thread, time::Duration};
 14 | 
 15 | const HEARTBEAT_TIMEOUT: Duration = Duration::from_millis(500);
 16 | const MIN_ELECTION_TIMEOUT: Duration = Duration::from_millis(750);
 17 | const MAX_ELECTION_TIMEOUT: Duration = Duration::from_millis(950);
 18 | 
 19 | // Our state machine will carry out simple plus and minus operations on a
 20 | // number, starting from zero.
 21 | #[derive(Clone, Debug)]
 22 | struct ArithmeticOperation {
 23 |     id: usize,
 24 |     delta: i32,
 25 | }
 26 | 
 27 | impl StateMachineTransition for ArithmeticOperation {
 28 |     type TransitionID = usize;
 29 |     fn get_id(&self) -> Self::TransitionID {
 30 |         self.id
 31 |     }
 32 | }
 33 | 
 34 | // The Calculator is the state machine that maintains a number that we can add
 35 | // to or subtract from. ID is simply for convenience.
 36 | struct Calculator {
 37 |     id: usize,
 38 |     value: i32,
 39 |     applied_ids_tx: Sender<(usize, usize)>,
 40 |     pending_transitions: Vec<ArithmeticOperation>,
 41 | }
 42 | 
 43 | impl StateMachine<ArithmeticOperation, Bytes> for Calculator {
 44 |     fn apply_transition(&mut self, transition: ArithmeticOperation) {
 45 |         self.value += transition.delta;
 46 |         println!("id {} my value is now {} after applying delta {}", self.id, self.value, transition.delta);
 47 |     }
 48 | 
 49 |     fn register_transition_state(
 50 |         &mut self,
 51 |         transition_id: <ArithmeticOperation as StateMachineTransition>::TransitionID,
 52 |         state: TransitionState,
 53 |     ) {
 54 |         // Send IDs of applied transitions down the channel so we can confirm
 55 |         // they were applied in the right order.
 56 |         if state == TransitionState::Applied {
 57 |             self.applied_ids_tx
 58 |                 .send((self.id, transition_id))
 59 |                 .expect("could not send applied transition id");
 60 |         }
 61 |     }
 62 | 
 63 |     fn get_pending_transitions(&mut self) -> Vec<ArithmeticOperation> {
 64 |         let cur = self.pending_transitions.clone();
 65 |         self.pending_transitions = Vec::new();
 66 |         cur
 67 |     }
 68 | 
 69 |     fn get_snapshot(&mut self) -> Option<Snapshot<Bytes>> {
 70 |         println!("checked for snapshot");
 71 |         None
 72 |     }
 73 | 
 74 |     fn create_snapshot(&mut self, index: usize, term: usize) -> Snapshot<Bytes> {
 75 |         println!("created snapshot");
 76 |         Snapshot {
 77 |             last_included_index: index,
 78 |             last_included_term: term,
 79 |             data: Bytes::from(self.value.to_be_bytes().to_vec()),
 80 |         }
 81 |     }
 82 | 
 83 |     fn set_snapshot(&mut self, snapshot: Snapshot<Bytes>) {
 84 |         let v: Vec<u8> = snapshot.data.into_iter().collect();
 85 |         self.value = i32::from_be_bytes(v[..].try_into().expect("incorrect length"));
 86 |         println!("my value is now {} after loading", self.value);
 87 |     }
 88 | }
 89 | 
 90 | // Our test replicas will be running each in its own thread.
 91 | struct ThreadCluster {
 92 |     id: usize,
 93 |     is_leader: bool,
 94 |     transmitters: BTreeMap<usize, Sender<Message<ArithmeticOperation, Bytes>>>,
 95 |     pending_messages: Vec<Message<ArithmeticOperation, Bytes>>,
 96 |     halt: bool,
 97 | }
 98 | 
 99 | impl Cluster<ArithmeticOperation, Bytes> for ThreadCluster {
100 |     fn register_leader(&mut self, leader_id: Option<usize>) {
101 |         if let Some(id) = leader_id {
102 |             if id == self.id {
103 |                 self.is_leader = true;
104 |             } else {
105 |                 self.is_leader = false;
106 |             }
107 |         } else {
108 |             self.is_leader = false;
109 |         }
110 |     }
111 | 
112 |     fn send_message(&mut self, to_id: usize, message: Message<ArithmeticOperation, Bytes>) {
113 |         if let Some(transmitter) = self.transmitters.get(&to_id) {
114 |             transmitter.send(message).expect("could not send message");
115 |         }
116 |     }
117 | 
118 |     fn halt(&self) -> bool {
119 |         self.halt
120 |     }
121 | 
122 |     fn receive_messages(&mut self) -> Vec<Message<ArithmeticOperation, Bytes>> {
123 |         let cur = self.pending_messages.clone();
124 |         self.pending_messages = Vec::new();
125 |         cur
126 |     }
127 | }
128 | 
129 | // Create n clusters, each with their own copy of trasmitters used for
130 | // communication between replicas (threads).
131 | fn create_clusters(
132 |     n: usize,
133 |     transmitters: BTreeMap<usize, Sender<Message<ArithmeticOperation, Bytes>>>,
134 | ) -> Vec<Arc<Mutex<ThreadCluster>>> {
135 |     let mut clusters = Vec::new();
136 |     for i in 0..n {
137 |         let cluster = Arc::new(Mutex::new(ThreadCluster {
138 |             id: i,
139 |             is_leader: false,
140 |             transmitters: transmitters.clone(),
141 |             pending_messages: Vec::new(),
142 |             halt: false,
143 |         }));
144 | 
145 |         clusters.push(cluster);
146 |     }
147 | 
148 |     clusters
149 | }
150 | 
151 | // Create channels for the threads to communicate with.
152 | fn create_communication_between_clusters(
153 |     n: usize,
154 | ) -> (
155 |     BTreeMap<usize, Sender<Message<ArithmeticOperation, Bytes>>>,
156 |     Vec<Receiver<Message<ArithmeticOperation, Bytes>>>,
157 | ) {
158 |     let (mut transmitters, mut receivers) = (BTreeMap::new(), Vec::new());
159 |     for i in 0..n {
160 |         let (tx, rx) = unbounded::<Message<ArithmeticOperation, Bytes>>();
161 |         transmitters.insert(i, tx);
162 |         receivers.push(rx);
163 |     }
164 | 
165 |     (transmitters, receivers)
166 | }
167 | 
168 | fn create_peer_ids(n: usize) -> Vec<Vec<usize>> {
169 |     let mut all_peer_ids = Vec::new();
170 |     for i in 0..n {
171 |         let mut peer_ids = Vec::new();
172 |         for n in 0..n {
173 |             if n != i {
174 |                 peer_ids.push(n);
175 |             }
176 |         }
177 |         all_peer_ids.push(peer_ids);
178 |     }
179 | 
180 |     all_peer_ids
181 | }
182 | 
183 | // Create state machines, each with its own copy on which to send
184 | // (state_machine_id, transition_id) for transitions that have been applied.
185 | fn create_state_machines(
186 |     n: usize,
187 |     applied_transitions_tx: Sender<(usize, usize)>,
188 | ) -> Vec<Arc<Mutex<Calculator>>> {
189 |     let mut state_machines = Vec::new();
190 |     for i in 0..n {
191 |         let state_machine = Arc::new(Mutex::new(Calculator {
192 |             id: i,
193 |             value: 0,
194 |             pending_transitions: Vec::new(),
195 |             applied_ids_tx: applied_transitions_tx.clone(),
196 |         }));
197 |         state_machines.push(state_machine);
198 |     }
199 |     state_machines
200 | }
201 | 
202 | // Create sending ends of message notifiers, sending ends of transition
203 | // notifiers, receiving ends of message notifiers, receiving neds of transition
204 | // notifiers.
205 | fn create_notifiers(
206 |     n: usize,
207 | ) -> (
208 |     Vec<Sender<()>>,
209 |     Vec<Sender<()>>,
210 |     Vec<Receiver<()>>,
211 |     Vec<Receiver<()>>,
212 | ) {
213 |     let mut message_tx = Vec::new();
214 |     let mut message_rx = Vec::new();
215 |     let mut transition_tx = Vec::new();
216 |     let mut transition_rx = Vec::new();
217 |     for _ in 0..n {
218 |         let (message_notifier_tx, message_notifier_rx) = channel::unbounded();
219 |         let (transition_notifier_tx, transition_notifier_rx) = channel::unbounded();
220 |         message_tx.push(message_notifier_tx);
221 |         message_rx.push(message_notifier_rx);
222 |         transition_tx.push(transition_notifier_tx);
223 |         transition_rx.push(transition_notifier_rx);
224 |     }
225 | 
226 |     (message_tx, transition_tx, message_rx, transition_rx)
227 | }
228 | 
229 | fn run_clusters_communication(
230 |     mut clusters: Vec<Arc<Mutex<ThreadCluster>>>,
231 |     mut cluster_message_receivers: Vec<Receiver<Message<ArithmeticOperation, Bytes>>>,
232 |     mut message_notifiers_tx: Vec<Sender<()>>,
233 | ) {
234 |     for _ in (0..clusters.len()).rev() {
235 |         let cluster = clusters.pop().unwrap();
236 |         let cluster_message_rx = cluster_message_receivers.pop().unwrap();
237 |         let message_notifier = message_notifiers_tx.pop().unwrap();
238 | 
239 |         // For each cluster, start a thread where we notify the cluster replica
240 |         // of a new message as soon as we receive one for it.
241 |         thread::spawn(move || loop {
242 |             let msg = cluster_message_rx.recv().unwrap();
243 |             match cluster.lock() {
244 |                 Ok(mut unlocked_cluster) => {
245 |                     unlocked_cluster.pending_messages.push(msg);
246 |                     message_notifier
247 |                         .send(())
248 |                         .expect("could not notify of message");
249 |                 }
250 |                 _ => return,
251 |             }
252 |         });
253 |     }
254 | }
255 | 
256 | fn run_arithmetic_operation_on_cluster(
257 |     clusters: Vec<Arc<Mutex<ThreadCluster>>>,
258 |     state_machines: Vec<Arc<Mutex<Calculator>>>,
259 |     transition_notifiers: Vec<Sender<()>>,
260 |     delta: i32,
261 |     id: usize,
262 | ) {
263 |     thread::sleep(Duration::from_secs(1));
264 |     // Find the leader and send the transition request to it.
265 |     for cluster in clusters.iter() {
266 |         let cluster = cluster.lock().unwrap();
267 |         if cluster.is_leader {
268 |             state_machines[cluster.id]
269 |                 .lock()
270 |                 .unwrap()
271 |                 .pending_transitions
272 |                 .push(ArithmeticOperation { delta, id });
273 |             transition_notifiers[cluster.id]
274 |                 .send(())
275 |                 .expect("could not send transition notification");
276 |             break;
277 |         }
278 |     }
279 | 
280 |     thread::sleep(Duration::from_secs(2));
281 | }
282 | 
283 | fn halt_clusters(clusters: Vec<Arc<Mutex<ThreadCluster>>>) {
284 |     thread::sleep(Duration::from_secs(1));
285 |     for cluster in clusters.iter() {
286 |         let mut c = cluster.lock().unwrap();
287 |         c.halt = true;
288 |     }
289 |     thread::sleep(Duration::from_secs(3));
290 | }
291 | 
292 | #[test]
293 | fn run_replicas() {
294 |     let n = 3;
295 |     // We are going to test that three replicas can elect a leader and process a
296 |     // few simple operations.
297 |     //
298 |     // Main complexity of this test set up comes from the fact that everything
299 |     // is running on a single machine, so we have to keep track of every
300 |     // cluster, replica, and state machine object. In the real world usage of
301 |     // the library it's unlikely there will ever be more than a single instance
302 |     // of each object per process or even a physical machine.
303 |     let (transmitters, receivers) = create_communication_between_clusters(3);
304 |     let clusters = create_clusters(n, transmitters);
305 |     let peer_ids = create_peer_ids(n);
306 |     let noop = ArithmeticOperation { delta: 0, id: 0 };
307 |     let (applied_transitions_tx, applied_transitions_rx) = unbounded();
308 |     let state_machines = create_state_machines(n, applied_transitions_tx);
309 |     let (message_tx, transition_tx, message_rx, transition_rx) = create_notifiers(n);
310 | 
311 |     for i in 0..n {
312 |         let noop = noop.clone();
313 |         let local_peer_ids = peer_ids[i].clone();
314 |         let cluster = clusters[i].clone();
315 |         let state_machine = state_machines[i].clone();
316 |         let m_rx = message_rx[i].clone();
317 |         let t_rx = transition_rx[i].clone();
318 | 
319 |         thread::spawn(move || {
320 |             let mut replica = Replica::new(
321 |                 i,
322 |                 local_peer_ids,
323 |                 cluster,
324 |                 state_machine,
325 |                 1,
326 |                 noop.clone(),
327 |                 HEARTBEAT_TIMEOUT,
328 |                 (MIN_ELECTION_TIMEOUT, MAX_ELECTION_TIMEOUT),
329 |             );
330 | 
331 |             replica.start(m_rx, t_rx);
332 |         });
333 |     }
334 | 
335 |     run_clusters_communication(clusters.clone(), receivers, message_tx);
336 | 
337 |     run_arithmetic_operation_on_cluster(
338 |         clusters.clone(),
339 |         state_machines.clone(),
340 |         transition_tx.clone(),
341 |         5,
342 |         1,
343 |     );
344 | 
345 |     run_arithmetic_operation_on_cluster(
346 |         clusters.clone(),
347 |         state_machines.clone(),
348 |         transition_tx.clone(),
349 |         -51,
350 |         2,
351 |     );
352 | 
353 |     run_arithmetic_operation_on_cluster(
354 |         clusters.clone(),
355 |         state_machines.clone(),
356 |         transition_tx.clone(),
357 |         -511,
358 |         3,
359 |     );
360 | 
361 |     run_arithmetic_operation_on_cluster(clusters.clone(), state_machines.clone(), transition_tx.clone(), 3, 4);
362 | 
363 |     halt_clusters(clusters);
364 | 
365 |     // Below we confirm that every replica applied the same transitions in the
366 |     // same order.
367 |     let applied_transactions: Vec<(usize, usize)> = applied_transitions_rx.try_iter().collect();
368 |     let expected_vec: Vec<usize> = vec![0, 1, 2, 3, 4];
369 |     assert_eq!(
370 |         expected_vec,
371 |         applied_transactions.iter().fold(Vec::new(), |mut acc, x| {
372 |             if x.0 == 0 {
373 |                 acc.push(x.1);
374 |             };
375 |             acc
376 |         })
377 |     );
378 | 
379 |     assert_eq!(
380 |         expected_vec,
381 |         applied_transactions.iter().fold(Vec::new(), |mut acc, x| {
382 |             if x.0 == 1 {
383 |                 acc.push(x.1);
384 |             };
385 |             acc
386 |         })
387 |     );
388 | 
389 |     assert_eq!(
390 |         expected_vec,
391 |         applied_transactions.iter().fold(Vec::new(), |mut acc, x| {
392 |             if x.0 == 2 {
393 |                 acc.push(x.1);
394 |             };
395 |             acc
396 |         })
397 |     );
398 | }
399 | 


--------------------------------------------------------------------------------
/little_raft/tests/raft_unstable.rs:
--------------------------------------------------------------------------------
  1 | use bytes::Bytes;
  2 | use crossbeam_channel as channel;
  3 | use crossbeam_channel::{unbounded, Receiver, Sender};
  4 | use rand::{thread_rng, Rng};
  5 | use rand::seq::SliceRandom;
  6 | use little_raft::{
  7 |     cluster::Cluster,
  8 |     message::Message,
  9 |     replica::Replica,
 10 |     state_machine::{Snapshot, StateMachine, StateMachineTransition, TransitionState},
 11 | };
 12 | use std::convert::TryInto;
 13 | use std::sync::{Arc, Mutex};
 14 | 
 15 | use std::{collections::BTreeMap, thread, time::Duration};
 16 | 
 17 | const HEARTBEAT_TIMEOUT: Duration = Duration::from_millis(50);
 18 | const MIN_ELECTION_TIMEOUT: Duration = Duration::from_millis(750);
 19 | const MAX_ELECTION_TIMEOUT: Duration = Duration::from_millis(950);
 20 | 
 21 | // Our state machine will carry out simple plus and minus operations on a
 22 | // number, starting from zero.
 23 | #[derive(Clone, Debug)]
 24 | struct ArithmeticOperation {
 25 |     id: usize,
 26 |     delta: i32,
 27 | }
 28 | 
 29 | impl StateMachineTransition for ArithmeticOperation {
 30 |     type TransitionID = usize;
 31 |     fn get_id(&self) -> Self::TransitionID {
 32 |         self.id
 33 |     }
 34 | }
 35 | 
 36 | // The Calculator is the state machine that maintains a number that we can add
 37 | // to or subtract from. ID is simply for convenience.
 38 | struct Calculator {
 39 |     id: usize,
 40 |     value: i32,
 41 |     applied_ids_tx: Sender<(usize, usize)>,
 42 |     pending_transitions: Vec<ArithmeticOperation>,
 43 | }
 44 | 
 45 | impl StateMachine<ArithmeticOperation, Bytes> for Calculator {
 46 |     fn apply_transition(&mut self, transition: ArithmeticOperation) {
 47 |         self.value += transition.delta;
 48 |         println!("id {} my value is now {} after applying delta {}", self.id, self.value, transition.delta);
 49 |     }
 50 | 
 51 |     fn register_transition_state(
 52 |         &mut self,
 53 |         transition_id: <ArithmeticOperation as StateMachineTransition>::TransitionID,
 54 |         state: TransitionState,
 55 |     ) {
 56 |         // Send IDs of applied transitions down the channel so we can confirm
 57 |         // they were applied in the right order.
 58 |         if state == TransitionState::Applied {
 59 |             self.applied_ids_tx
 60 |                 .send((self.id, transition_id))
 61 |                 .expect("could not send applied transition id");
 62 |         }
 63 |     }
 64 | 
 65 |     fn get_pending_transitions(&mut self) -> Vec<ArithmeticOperation> {
 66 |         let cur = self.pending_transitions.clone();
 67 |         self.pending_transitions = Vec::new();
 68 |         cur
 69 |     }
 70 | 
 71 |     fn get_snapshot(&mut self) -> Option<Snapshot<Bytes>> {
 72 |         println!("id {} checked for snapshot", self.id);
 73 |         None
 74 |     }
 75 | 
 76 |     fn create_snapshot(&mut self, index: usize, term: usize) -> Snapshot<Bytes> {
 77 |         println!("id {} created snapshot", self.id);
 78 |         Snapshot {
 79 |             last_included_index: index,
 80 |             last_included_term: term,
 81 |             data: Bytes::from(self.value.to_be_bytes().to_vec()),
 82 |         }
 83 |     }
 84 | 
 85 |     fn set_snapshot(&mut self, snapshot: Snapshot<Bytes>) {
 86 |         let v: Vec<u8> = snapshot.data.into_iter().collect();
 87 |         self.value = i32::from_be_bytes(v[..].try_into().expect("incorrect length"));
 88 |         println!("id {} my value is now {} after loading", self.id, self.value);
 89 |     }
 90 | }
 91 | 
 92 | // Our test replicas will be running each in its own thread.
 93 | struct ThreadCluster {
 94 |     id: usize,
 95 |     is_leader: bool,
 96 |     transmitters: BTreeMap<usize, Sender<Message<ArithmeticOperation, Bytes>>>,
 97 |     pending_messages: Vec<Message<ArithmeticOperation, Bytes>>,
 98 |     halt: bool,
 99 | }
100 | 
101 | impl Cluster<ArithmeticOperation, Bytes> for ThreadCluster {
102 |     fn register_leader(&mut self, leader_id: Option<usize>) {
103 |         if let Some(id) = leader_id {
104 |             if id == self.id {
105 |                 self.is_leader = true;
106 |             } else {
107 |                 self.is_leader = false;
108 |             }
109 |         } else {
110 |             self.is_leader = false;
111 |         }
112 |     }
113 | 
114 |     fn send_message(&mut self, to_id: usize, message: Message<ArithmeticOperation, Bytes>) {
115 |         // Drop messages with probability 0.25.
116 |         let n: u8 = rand::thread_rng().gen();
117 |         if n % 4 == 0 {
118 |             return
119 |         }
120 | 
121 |         if let Some(transmitter) = self.transmitters.get(&to_id) {
122 |             transmitter.send(message).expect("could not send message");
123 |         }
124 |     }
125 | 
126 |     fn halt(&self) -> bool {
127 |         self.halt
128 |     }
129 | 
130 |     fn receive_messages(&mut self) -> Vec<Message<ArithmeticOperation, Bytes>> {
131 |         let mut cur = self.pending_messages.clone();
132 |         // Shuffle messages.
133 |         cur.shuffle(&mut thread_rng());
134 |         self.pending_messages = Vec::new();
135 |         cur
136 |     }
137 | }
138 | 
139 | // Create n clusters, each with their own copy of trasmitters used for
140 | // communication between replicas (threads).
141 | fn create_clusters(
142 |     n: usize,
143 |     transmitters: BTreeMap<usize, Sender<Message<ArithmeticOperation, Bytes>>>,
144 | ) -> Vec<Arc<Mutex<ThreadCluster>>> {
145 |     let mut clusters = Vec::new();
146 |     for i in 0..n {
147 |         let cluster = Arc::new(Mutex::new(ThreadCluster {
148 |             id: i,
149 |             is_leader: false,
150 |             transmitters: transmitters.clone(),
151 |             pending_messages: Vec::new(),
152 |             halt: false,
153 |         }));
154 | 
155 |         clusters.push(cluster);
156 |     }
157 | 
158 |     clusters
159 | }
160 | 
161 | // Create channels for the threads to communicate with.
162 | fn create_communication_between_clusters(
163 |     n: usize,
164 | ) -> (
165 |     BTreeMap<usize, Sender<Message<ArithmeticOperation, Bytes>>>,
166 |     Vec<Receiver<Message<ArithmeticOperation, Bytes>>>,
167 | ) {
168 |     let (mut transmitters, mut receivers) = (BTreeMap::new(), Vec::new());
169 |     for i in 0..n {
170 |         let (tx, rx) = unbounded::<Message<ArithmeticOperation, Bytes>>();
171 |         transmitters.insert(i, tx);
172 |         receivers.push(rx);
173 |     }
174 | 
175 |     (transmitters, receivers)
176 | }
177 | 
178 | fn create_peer_ids(n: usize) -> Vec<Vec<usize>> {
179 |     let mut all_peer_ids = Vec::new();
180 |     for i in 0..n {
181 |         let mut peer_ids = Vec::new();
182 |         for n in 0..n {
183 |             if n != i {
184 |                 peer_ids.push(n);
185 |             }
186 |         }
187 |         all_peer_ids.push(peer_ids);
188 |     }
189 | 
190 |     all_peer_ids
191 | }
192 | 
193 | // Create state machines, each with its own copy on which to send
194 | // (state_machine_id, transition_id) for transitions that have been applied.
195 | fn create_state_machines(
196 |     n: usize,
197 |     applied_transitions_tx: Sender<(usize, usize)>,
198 | ) -> Vec<Arc<Mutex<Calculator>>> {
199 |     let mut state_machines = Vec::new();
200 |     for i in 0..n {
201 |         let state_machine = Arc::new(Mutex::new(Calculator {
202 |             id: i,
203 |             value: 0,
204 |             pending_transitions: Vec::new(),
205 |             applied_ids_tx: applied_transitions_tx.clone(),
206 |         }));
207 |         state_machines.push(state_machine);
208 |     }
209 |     state_machines
210 | }
211 | 
212 | // Create sending ends of message notifiers, sending ends of transition
213 | // notifiers, receiving ends of message notifiers, receiving neds of transition
214 | // notifiers.
215 | fn create_notifiers(
216 |     n: usize,
217 | ) -> (
218 |     Vec<Sender<()>>,
219 |     Vec<Sender<()>>,
220 |     Vec<Receiver<()>>,
221 |     Vec<Receiver<()>>,
222 | ) {
223 |     let mut message_tx = Vec::new();
224 |     let mut message_rx = Vec::new();
225 |     let mut transition_tx = Vec::new();
226 |     let mut transition_rx = Vec::new();
227 |     for _ in 0..n {
228 |         let (message_notifier_tx, message_notifier_rx) = channel::unbounded();
229 |         let (transition_notifier_tx, transition_notifier_rx) = channel::unbounded();
230 |         message_tx.push(message_notifier_tx);
231 |         message_rx.push(message_notifier_rx);
232 |         transition_tx.push(transition_notifier_tx);
233 |         transition_rx.push(transition_notifier_rx);
234 |     }
235 | 
236 |     (message_tx, transition_tx, message_rx, transition_rx)
237 | }
238 | 
239 | fn run_clusters_communication(
240 |     mut clusters: Vec<Arc<Mutex<ThreadCluster>>>,
241 |     mut cluster_message_receivers: Vec<Receiver<Message<ArithmeticOperation, Bytes>>>,
242 |     mut message_notifiers_tx: Vec<Sender<()>>,
243 | ) {
244 |     for _ in (0..clusters.len()).rev() {
245 |         let cluster = clusters.pop().unwrap();
246 |         let cluster_message_rx = cluster_message_receivers.pop().unwrap();
247 |         let message_notifier = message_notifiers_tx.pop().unwrap();
248 | 
249 |         // For each cluster, start a thread where we notify the cluster replica
250 |         // of a new message as soon as we receive one for it.
251 |         thread::spawn(move || loop {
252 |             let msg = cluster_message_rx.recv().unwrap();
253 |             match cluster.lock() {
254 |                 Ok(mut unlocked_cluster) => {
255 |                     unlocked_cluster.pending_messages.push(msg);
256 |                     message_notifier
257 |                         .send(())
258 |                         .expect("could not notify of message");
259 |                 }
260 |                 _ => return,
261 |             }
262 |         });
263 |     }
264 | }
265 | 
266 | fn run_arithmetic_operation_on_cluster(
267 |     clusters: Vec<Arc<Mutex<ThreadCluster>>>,
268 |     state_machines: Vec<Arc<Mutex<Calculator>>>,
269 |     transition_notifiers: Vec<Sender<()>>,
270 |     delta: i32,
271 |     id: usize,
272 | ) {
273 |     // Sleep longer because in this test we're dropping 25% of all messages.
274 |     thread::sleep(Duration::from_secs(2));
275 |     // Find the leader and send the transition request to it.
276 |     for cluster in clusters.iter() {
277 |         let cluster = cluster.lock().unwrap();
278 |         if cluster.is_leader {
279 |             state_machines[cluster.id]
280 |                 .lock()
281 |                 .unwrap()
282 |                 .pending_transitions
283 |                 .push(ArithmeticOperation { delta, id });
284 |             transition_notifiers[cluster.id]
285 |                 .send(())
286 |                 .expect("could not send transition notification");
287 |             break;
288 |         }
289 |     }
290 | 
291 |     // Sleep long.
292 |     thread::sleep(Duration::from_secs(3));
293 | }
294 | 
295 | fn halt_clusters(clusters: Vec<Arc<Mutex<ThreadCluster>>>) {
296 |     thread::sleep(Duration::from_secs(1));
297 |     for cluster in clusters.iter() {
298 |         let mut c = cluster.lock().unwrap();
299 |         c.halt = true;
300 |     }
301 |     thread::sleep(Duration::from_secs(2));
302 | }
303 | 
304 | #[test]
305 | fn run_replicas() {
306 |     let n = 3;
307 |     // We are going to test that three replicas can elect a leader and process a
308 |     // few simple operations.
309 |     //
310 |     // Main complexity of this test set up comes from the fact that everything
311 |     // is running on a single machine, so we have to keep track of every
312 |     // cluster, replica, and state machine object. In the real world usage of
313 |     // the library it's unlikely there will ever be more than a single instance
314 |     // of each object per process or even a physical machine.
315 |     let (transmitters, receivers) = create_communication_between_clusters(3);
316 |     let clusters = create_clusters(n, transmitters);
317 |     let peer_ids = create_peer_ids(n);
318 |     let noop = ArithmeticOperation { delta: 0, id: 0 };
319 |     let (applied_transitions_tx, _applied_transitions_rx) = unbounded();
320 |     let state_machines = create_state_machines(n, applied_transitions_tx);
321 |     let (message_tx, transition_tx, message_rx, transition_rx) = create_notifiers(n);
322 |     for i in 0..n {
323 |         let noop = noop.clone();
324 |         let local_peer_ids = peer_ids[i].clone();
325 |         let cluster = clusters[i].clone();
326 |         let state_machine = state_machines[i].clone();
327 |         let m_rx = message_rx[i].clone();
328 |         let t_rx = transition_rx[i].clone();
329 |         thread::spawn(move || {
330 |             let mut replica = Replica::new(
331 |                 i,
332 |                 local_peer_ids,
333 |                 cluster,
334 |                 state_machine,
335 |                 1,
336 |                 noop.clone(),
337 |                 HEARTBEAT_TIMEOUT,
338 |                 (MIN_ELECTION_TIMEOUT, MAX_ELECTION_TIMEOUT),
339 |             );
340 | 
341 |             replica.start(m_rx, t_rx);
342 |         });
343 |     }
344 | 
345 |     run_clusters_communication(clusters.clone(), receivers, message_tx);
346 |     run_arithmetic_operation_on_cluster(
347 |         clusters.clone(),
348 |         state_machines.clone(),
349 |         transition_tx.clone(),
350 |         5,
351 |         1,
352 |     );
353 | 
354 |     // In this test, we confirm that the cluster converged on true value one by
355 |     // one after each arithmetic operation. This is different from
356 |     // raft_stable.rs, where we check the order in which transition have been
357 |     // applied post-factum. We can't do the same in raft_unstable.rs, because
358 |     // replicas reload from snapshots in this test, meaning not all replicas go
359 |     // over all transitions. Some replicas load directly from their peer's
360 |     // snapshots.
361 |     for machine in state_machines.clone() {
362 |         assert_eq!(machine.lock().unwrap().value, 5);
363 |     }
364 | 
365 |     run_arithmetic_operation_on_cluster(
366 |         clusters.clone(),
367 |         state_machines.clone(),
368 |         transition_tx.clone(),
369 |         -51,
370 |         2,
371 |     );
372 | 
373 |     for machine in state_machines.clone() {
374 |         assert_eq!(machine.lock().unwrap().value, -46);
375 |     }
376 | 
377 |     run_arithmetic_operation_on_cluster(
378 |         clusters.clone(),
379 |         state_machines.clone(),
380 |         transition_tx.clone(),
381 |         -511,
382 |         3,
383 |     );
384 | 
385 | 
386 |     for machine in state_machines.clone() {
387 |         assert_eq!(machine.lock().unwrap().value, -557);
388 |     }
389 | 
390 |     run_arithmetic_operation_on_cluster(clusters.clone(), state_machines.clone(), transition_tx.clone(), 3, 4);
391 | 
392 |     for machine in state_machines.clone() {
393 |         assert_eq!(machine.lock().unwrap().value, -554);
394 |     }
395 | 
396 |     halt_clusters(clusters);
397 | }
398 | 


--------------------------------------------------------------------------------
/little_raft/src/replica.rs:
--------------------------------------------------------------------------------
  1 | use crate::{
  2 |     cluster::Cluster,
  3 |     message::{LogEntry, Message},
  4 |     state_machine::{
  5 |         Snapshot, StateMachine, StateMachineTransition, TransitionAbandonedReason, TransitionState,
  6 |     },
  7 |     timer::Timer,
  8 | };
  9 | use crossbeam_channel::{Receiver, Select};
 10 | use rand::Rng;
 11 | use std::cmp::Ordering;
 12 | use std::sync::{Arc, Mutex};
 13 | use std::{
 14 |     cmp,
 15 |     collections::{BTreeMap, BTreeSet},
 16 |     time::{Duration, Instant},
 17 | };
 18 | 
 19 | #[derive(Clone, PartialEq, Debug)]
 20 | enum State {
 21 |     Follower,
 22 |     Candidate,
 23 |     Leader,
 24 | }
 25 | 
 26 | /// ReplicaID is a type alias used to identify Raft nodes.
 27 | pub type ReplicaID = usize;
 28 | 
 29 | type Result<T> = std::result::Result<T, ReplicaError>;
 30 | 
 31 | #[derive(Debug, Clone)]
 32 | enum ReplicaError {
 33 |     LogCompacted,
 34 | }
 35 | 
 36 | /// Replica describes the local instance running the Raft algorithm. Its goal is
 37 | /// to maintain the consistency of the user-defined StateMachine across the
 38 | /// cluster. It uses the user-defined Cluster implementation to talk to other
 39 | /// Replicas, be it over the network or pigeon post.
 40 | pub struct Replica<C, M, T, D>
 41 | where
 42 |     C: Cluster<T, D>,
 43 |     M: StateMachine<T, D>,
 44 |     T: StateMachineTransition,
 45 |     D: Clone,
 46 | {
 47 |     /// ID of this Replica.
 48 |     id: ReplicaID,
 49 | 
 50 |     /// IDs of other Replicas in the cluster.
 51 |     peer_ids: Vec<ReplicaID>,
 52 | 
 53 |     /// User-defined state machine that the cluster Replicates.
 54 |     state_machine: Arc<Mutex<M>>,
 55 | 
 56 |     /// Interface a Replica uses to communicate with the rest of the cluster.
 57 |     cluster: Arc<Mutex<C>>,
 58 | 
 59 |     /// Current term.
 60 |     current_term: usize,
 61 | 
 62 |     /// ID of peers with votes for self.
 63 |     current_votes: Option<Box<BTreeSet<usize>>>,
 64 | 
 65 |     /// State of this Replica.
 66 |     state: State,
 67 | 
 68 |     /// Who the last vote was cast for.
 69 |     voted_for: Option<usize>,
 70 | 
 71 |     /// entries this Replica is aware of.
 72 |     log: Vec<LogEntry<T>>,
 73 | 
 74 |     /// Index of the highest transition known to be committed.
 75 |     commit_index: usize,
 76 | 
 77 |     /// Index of the highest transition applied to the local state machine.
 78 |     last_applied: usize,
 79 | 
 80 |     /// For each server, index of the next log entry to send to that server.
 81 |     /// Only present on leaders.
 82 |     next_index: BTreeMap<usize, usize>,
 83 | 
 84 |     /// For each server, index of highest log entry known to be replicated on
 85 |     /// that server. Only present on leaders.
 86 |     match_index: BTreeMap<usize, usize>,
 87 | 
 88 |     /// No-op transition used to force a faster Replica update when a cluster
 89 |     /// Leader changes. Applied this transition multiple times must have no
 90 |     /// affect on the state machine.
 91 |     noop_transition: T,
 92 | 
 93 |     /// Timer used for heartbeat messages.
 94 |     heartbeat_timer: Timer,
 95 | 
 96 |     /// Timeout range within a randomized timeout is picked for when to start a
 97 |     /// new Leader election if the current Leader is not sending heartbeats.
 98 |     election_timeout: (Duration, Duration),
 99 | 
100 |     /// If no heartbeat message is received by the deadline, the Replica will
101 |     /// start an election.
102 |     next_election_deadline: Instant,
103 | 
104 |     /// The number of transaction logs that this instance will let accumulate
105 |     /// before merging them into a single snapshot. Snapshotting is enabled <=>
106 |     /// snapshot_delta > 0.
107 |     snapshot_delta: usize,
108 | 
109 |     /// The log snapshot of this Replica. Even if snapshot_delta is 0, the
110 |     /// snapshot field can be Some(_), since the Replica can be started with a
111 |     /// seed snapshot.
112 |     snapshot: Option<Snapshot<D>>,
113 | 
114 |     /// The length of the log sequence that is represented by the snapshot.
115 |     /// Since compacted entries aren't in the log anymore, access to the log
116 |     /// should be done with log[log_index - index_offset].
117 |     ///
118 |     /// The following is always true:
119 |     ///
120 |     /// last_log_index = log.len() - 1 + index_offset.
121 |     index_offset: usize,
122 | }
123 | 
124 | impl<C, M, T, D> Replica<C, M, T, D>
125 | where
126 |     C: Cluster<T, D>,
127 |     M: StateMachine<T, D>,
128 |     T: StateMachineTransition,
129 |     D: Clone,
130 | {
131 |     /// Create a new Replica.
132 |     ///
133 |     /// id is the ID of this Replica within the cluster.
134 |     ///
135 |     /// peer_ids is a vector of IDs of all other Replicas in the cluster.
136 |     ///
137 |     /// cluster represents the abstraction the Replica uses to talk with other
138 |     /// Replicas.
139 |     ///
140 |     /// state_machine is the state machine that Raft maintains.
141 |     ///
142 |     /// snapshot_delta tells the Replica how many transaction logs to accumulate
143 |     /// before doing compaction and merging them into a snapshot. Snapshotting
144 |     /// is enabled if and only if snapshot_delta > 0.
145 |     ///
146 |     /// noop_transition is a transition that can be applied to the state machine
147 |     /// multiple times with no effect.
148 |     ///
149 |     /// heartbeat_timeout defines how often the Leader Replica sends out
150 |     /// heartbeat messages.
151 |     ///
152 |     /// election_timeout_range defines the election timeout interval. If the
153 |     /// Replica gets no messages from the Leader before the timeout, it
154 |     /// initiates an election. In practice, pick election_timeout_range to be
155 |     /// 2-3x the value of heartbeat_timeout, depending on your particular
156 |     /// use-case network latency and responsiveness needs. An
157 |     /// election_timeout_range / heartbeat_timeout ratio that's too low might
158 |     /// cause unwarranted re-elections in the cluster.
159 |     pub fn new(
160 |         id: ReplicaID,
161 |         peer_ids: Vec<ReplicaID>,
162 |         cluster: Arc<Mutex<C>>,
163 |         state_machine: Arc<Mutex<M>>,
164 |         snapshot_delta: usize,
165 |         noop_transition: T,
166 |         heartbeat_timeout: Duration,
167 |         election_timeout_range: (Duration, Duration),
168 |     ) -> Replica<C, M, T, D> {
169 |         let snapshot = state_machine.lock().unwrap().get_snapshot();
170 |         // index_offset is the "length" of the snapshot, so calculate it as
171 |         // snapshot.last_included_index + 1.
172 |         let mut index_offset: usize = 0;
173 |         let mut current_term: usize = 0;
174 |         let mut log: Vec<LogEntry<T>> = Vec::new();
175 |         if let Some(ref snapshot) = snapshot {
176 |             index_offset = snapshot.last_included_index + 1;
177 |             current_term = snapshot.last_included_term;
178 |         } else {
179 |             // If the Replica is starting anew, create a default no-op transition as
180 |             // the very first entry in the log. This trick lets us make sure every
181 |             // Replica has a non-empty log. If the Replica is starting from a
182 |             // snapshot, initialize current log to empty.
183 |             log = vec![LogEntry {
184 |                 term: 0,
185 |                 index: 0,
186 |                 transition: noop_transition.clone(),
187 |             }]
188 |         }
189 | 
190 |         Replica {
191 |             state_machine,
192 |             cluster,
193 |             peer_ids,
194 |             id,
195 |             current_term,
196 |             current_votes: None,
197 |             state: State::Follower,
198 |             voted_for: None,
199 |             log,
200 |             noop_transition,
201 |             commit_index: 0,
202 |             last_applied: 0,
203 |             next_index: BTreeMap::new(),
204 |             match_index: BTreeMap::new(),
205 |             election_timeout: election_timeout_range,
206 |             heartbeat_timer: Timer::new(heartbeat_timeout),
207 |             next_election_deadline: Instant::now(),
208 |             snapshot,
209 |             snapshot_delta,
210 |             index_offset,
211 |         }
212 |     }
213 | 
214 |     /// This function starts the Replica and blocks forever.
215 |     ///
216 |     /// recv_msg is a channel on which the user must notify the Replica whenever
217 |     /// new messages from the Cluster are available. The Replica will not poll
218 |     /// for messages from the Cluster unless notified through recv_msg.
219 |     ///
220 |     /// recv_transition is a channel on which the user must notify the Replica
221 |     /// whenever new transitions to be processed for the StateMachine are
222 |     /// available. The Replica will not poll for pending transitions for the
223 |     /// StateMachine unless notified through recv_transition.
224 |     pub fn start(&mut self, recv_msg: Receiver<()>, recv_transition: Receiver<()>) {
225 |         loop {
226 |             if self.cluster.lock().unwrap().halt() {
227 |                 return;
228 |             }
229 | 
230 |             match self.state {
231 |                 State::Leader => self.poll_as_leader(&recv_msg, &recv_transition),
232 |                 State::Follower => self.poll_as_follower(&recv_msg),
233 |                 State::Candidate => self.poll_as_candidate(&recv_msg),
234 |             }
235 | 
236 |             self.apply_ready_entries();
237 |         }
238 |     }
239 | 
240 |     fn poll_as_leader(&mut self, recv_msg: &Receiver<()>, recv_transition: &Receiver<()>) {
241 |         let mut select = Select::new();
242 |         let recv_heartbeat = self.heartbeat_timer.get_rx();
243 |         let (msg, transition, heartbeat) = (
244 |             select.recv(recv_msg),
245 |             select.recv(recv_transition),
246 |             select.recv(recv_heartbeat),
247 |         );
248 | 
249 |         let oper = select.select();
250 |         match oper.index() {
251 |             // Process pending messages.
252 |             i if i == msg => {
253 |                 oper.recv(recv_msg)
254 |                     .expect("could not react to a new message");
255 |                 let messages = self.cluster.lock().unwrap().receive_messages();
256 |                 for message in messages {
257 |                     self.process_message(message);
258 |                 }
259 |             }
260 |             // Process pending transitions.
261 |             i if i == transition => {
262 |                 oper.recv(recv_transition)
263 |                     .expect("could not react to a new transition");
264 |                 self.load_new_transitions();
265 |                 self.broadcast_append_entry_request();
266 |             }
267 |             // Broadcast heartbeat messages.
268 |             i if i == heartbeat => {
269 |                 oper.recv(recv_heartbeat)
270 |                     .expect("could not react to the heartbeat");
271 |                 self.broadcast_append_entry_request();
272 |                 self.heartbeat_timer.renew();
273 |             }
274 |             _ => unreachable!(),
275 |         }
276 |     }
277 | 
278 |     fn broadcast_append_entry_request(&mut self) {
279 |         self.broadcast_message(|peer_id: ReplicaID| {
280 |             match self.get_term_at_index(self.next_index[&peer_id] - 1) {
281 |                 Ok(term) => Message::AppendEntryRequest {
282 |                     from_id: self.id,
283 |                     term: self.current_term,
284 |                     prev_log_index: self.next_index[&peer_id] - 1,
285 |                     prev_log_term: term,
286 |                     entries: self.get_entries_for_peer(peer_id),
287 |                     commit_index: self.commit_index,
288 |                 },
289 |                 Err(ReplicaError::LogCompacted) => {
290 |                     let snapshot = self.snapshot.as_ref().unwrap();
291 |                     Message::InstallSnapshotRequest {
292 |                         from_id: self.id,
293 |                         term: self.current_term,
294 |                         last_included_index: snapshot.last_included_index,
295 |                         last_included_term: snapshot.last_included_term,
296 |                         offset: 0,
297 |                         data: snapshot.data.clone(),
298 |                         done: true,
299 |                     }
300 |                 }
301 |             }
302 |         });
303 |     }
304 | 
305 |     fn get_term_at_index(&self, index: usize) -> Result<usize> {
306 |         if let Some(snapshot) = &self.snapshot {
307 |             if index == snapshot.last_included_index {
308 |                 return Ok(snapshot.last_included_term);
309 |             } else if index > snapshot.last_included_index {
310 |                 let localized_index = index - self.index_offset;
311 |                 return Ok(self.log[localized_index].term);
312 |             }
313 |             Err(ReplicaError::LogCompacted)
314 |         } else {
315 |             Ok(self.log[index].term)
316 |         }
317 |     }
318 | 
319 |     fn poll_as_follower(&mut self, recv_msg: &Receiver<()>) {
320 |         match recv_msg.recv_deadline(self.next_election_deadline) {
321 |             // Process pending messages.
322 |             Ok(_) => {
323 |                 let messages = self.cluster.lock().unwrap().receive_messages();
324 |                 // Update the election deadline if more than zero messages were
325 |                 // actually received.
326 |                 if !messages.is_empty() {
327 |                     self.update_election_deadline();
328 |                 }
329 | 
330 |                 for message in messages {
331 |                     self.process_message(message);
332 |                 }
333 |             }
334 |             // Become candidate and update elction deadline.
335 |             _ => {
336 |                 self.become_candidate();
337 |                 self.update_election_deadline();
338 |             }
339 |         }
340 | 
341 |         // Load new transitions. The follower will ignore these transitions, but
342 |         // they are still polled for periodically to ensure there are no stale
343 |         // transitions in case the Replica's state changes.
344 |         self.load_new_transitions();
345 |     }
346 | 
347 |     fn process_message(&mut self, message: Message<T, D>) {
348 |         match self.state {
349 |             State::Leader => self.process_message_as_leader(message),
350 |             State::Candidate => self.process_message_as_candidate(message),
351 |             State::Follower => self.process_message_as_follower(message),
352 |         }
353 |     }
354 | 
355 |     fn update_election_deadline(&mut self) {
356 |         // Randomize each election deadline within the allowed range.
357 |         self.next_election_deadline = Instant::now()
358 |             + rand::thread_rng().gen_range(self.election_timeout.0..=self.election_timeout.1);
359 |     }
360 | 
361 |     fn poll_as_candidate(&mut self, recv_msg: &Receiver<()>) {
362 |         match recv_msg.recv_deadline(self.next_election_deadline) {
363 |             Ok(_) => {
364 |                 // Process pending messages.
365 |                 let messages = self.cluster.lock().unwrap().receive_messages();
366 |                 // Update the election deadline if more than zero messages were
367 |                 // actually received.
368 |                 if !messages.is_empty() {
369 |                     self.update_election_deadline();
370 |                 }
371 |                 for message in messages {
372 |                     self.process_message(message);
373 |                 }
374 |             }
375 |             // Become candidate and update elction deadline.
376 |             _ => {
377 |                 self.become_candidate();
378 |                 self.update_election_deadline();
379 |             }
380 |         }
381 | 
382 |         // Load new transitions. The candidate will ignore these transitions,
383 |         // but they are still polled for periodically to ensure there are no
384 |         // stale transitions in case the Replica's state changes.
385 |         self.load_new_transitions();
386 |     }
387 | 
388 |     fn broadcast_message<F>(&self, message_generator: F)
389 |     where
390 |         F: Fn(usize) -> Message<T, D>,
391 |     {
392 |         self.peer_ids.iter().for_each(|peer_id| {
393 |             self.cluster
394 |                 .lock()
395 |                 .unwrap()
396 |                 .send_message(*peer_id, message_generator(*peer_id))
397 |         });
398 |     }
399 | 
400 |     // Get log entries that have not been acknowledged by the peer.
401 |     fn get_entries_for_peer(&self, peer_id: ReplicaID) -> Vec<LogEntry<T>> {
402 |         // TODO: double check
403 |         self.log[self.next_index[&peer_id] - self.index_offset..self.log.len()].to_vec()
404 |     }
405 | 
406 |     // Apply entries that are ready to be applied.
407 |     fn apply_ready_entries(&mut self) {
408 |         if self.log.is_empty() {
409 |             return;
410 |         }
411 | 
412 |         // Move the commit index to the latest log index that has been
413 |         // replicated on the majority of the replicas.
414 |         let mut state_machine = self.state_machine.lock().unwrap();
415 |         let mut n = self.log.len() - 1 + self.index_offset;
416 |         if self.state == State::Leader && self.commit_index < n {
417 |             let old_commit_index = self.commit_index;
418 |             while n > self.commit_index {
419 |                 let num_replications =
420 |                     self.match_index.iter().fold(
421 |                         0,
422 |                         |acc, mtch_idx| if mtch_idx.1 >= &n { acc + 1 } else { acc },
423 |                     );
424 | 
425 |                 if num_replications * 2 >= self.peer_ids.len()
426 |                     && self.log[n - self.index_offset].term == self.current_term
427 |                 {
428 |                     self.commit_index = n;
429 |                 }
430 |                 n -= 1;
431 |             }
432 | 
433 |             for i in old_commit_index + 1..=self.commit_index {
434 |                 state_machine.register_transition_state(
435 |                     self.log[i - self.index_offset].transition.get_id(),
436 |                     TransitionState::Committed,
437 |                 );
438 |             }
439 |         }
440 | 
441 |         // Apply entries that are behind the currently committed index.
442 |         while self.commit_index > self.last_applied {
443 |             self.last_applied += 1;
444 |             let local_idx = self.last_applied - self.index_offset;
445 |             state_machine.apply_transition(self.log[local_idx].transition.clone());
446 |             state_machine.register_transition_state(
447 |                 self.log[local_idx].transition.get_id(),
448 |                 TransitionState::Applied,
449 |             );
450 |         }
451 | 
452 |         // If snapshot_delta is greater than 0, check whether it's time for log
453 |         // compaction.
454 |         if self.snapshot_delta > 0 {
455 |             // Calculate number of applied logs that haven't been compacted yet.
456 |             let curr_delta = self.last_applied + 1 - self.index_offset;
457 |             // If the number of accumulated logs is greater than or equal to the
458 |             // configured delta, do compaction.
459 |             if curr_delta >= self.snapshot_delta {
460 |                 let last_applied = self.last_applied;
461 |                 self.snapshot = Some(state_machine.create_snapshot(
462 |                     last_applied,
463 |                     self.log[last_applied - self.index_offset].term,
464 |                 ));
465 |                 self.log.retain(|l| l.index > last_applied);
466 |                 self.index_offset = last_applied + 1;
467 |             }
468 |         }
469 |     }
470 | 
471 |     fn load_new_transitions(&mut self) {
472 |         // Load new transitions. Ignore the transitions if the replica is not
473 |         // the Leader.
474 |         let mut state_machine = self.state_machine.lock().unwrap();
475 |         let transitions = state_machine.get_pending_transitions();
476 |         for transition in transitions {
477 |             if self.state == State::Leader {
478 |                 self.log.push(LogEntry {
479 |                     index: self.log.len() + self.index_offset,
480 |                     transition: transition.clone(),
481 |                     term: self.current_term,
482 |                 });
483 | 
484 |                 state_machine
485 |                     .register_transition_state(transition.get_id(), TransitionState::Queued);
486 |             } else {
487 |                 state_machine.register_transition_state(
488 |                     transition.get_id(),
489 |                     TransitionState::Abandoned(TransitionAbandonedReason::NotLeader),
490 |                 );
491 |             }
492 |         }
493 |     }
494 | 
495 |     fn process_message_as_leader(&mut self, message: Message<T, D>) {
496 |         match message {
497 |             Message::AppendEntryResponse {
498 |                 from_id,
499 |                 term,
500 |                 success,
501 |                 last_index,
502 |                 mismatch_index,
503 |             } => {
504 |                 if term > self.current_term {
505 |                     // Become follower if another node's term is higher.
506 |                     self.cluster.lock().unwrap().register_leader(None);
507 |                     self.become_follower(term);
508 |                 } else if success {
509 |                     // Update information about the peer's logs.
510 |                     self.next_index.insert(from_id, last_index + 1);
511 |                     self.match_index.insert(from_id, last_index);
512 |                 } else {
513 |                     // Update information about the peer's logs.
514 |                     //
515 |                     // If the mismatch_index is greater than or equal to the
516 |                     // existing next_index, then we know that this rejection is a
517 |                     // stray out-of-order or duplicate rejection, which we can
518 |                     // ignore. The reason we know that is because mismatch_index is
519 |                     // set by the follower to prev_log_index, which was in turn set
520 |                     // by the leader to next_index-1. Hence mismatch_index can't be
521 |                     // greater than or equal to next_index.
522 |                     //
523 |                     // If the mismatch_index isn't stray, we set next_index to the
524 |                     // min of next_index and last_index; this is equivalent to the
525 |                     // Raft paper's guidance on decreasing next_index by one at a
526 |                     // time, but is more performant in cases when we can cut
527 |                     // straight to the follower's last_index+1.
528 |                     if let Some(mismatch_index) = mismatch_index {
529 |                         if mismatch_index < self.next_index[&from_id] {
530 |                             let next_index = cmp::min(mismatch_index, last_index + 1);
531 |                             self.next_index.insert(from_id, next_index);
532 |                         }
533 |                     }
534 |                 }
535 |             }
536 |             Message::InstallSnapshotResponse {
537 |                 from_id,
538 |                 term,
539 |                 last_included_index,
540 |             } => {
541 |                 if term > self.current_term {
542 |                     // Become follower if another node's term is higher.
543 |                     self.cluster.lock().unwrap().register_leader(None);
544 |                     self.become_follower(term);
545 |                 } else {
546 |                     self.next_index.insert(from_id, last_included_index + 1);
547 |                     self.match_index.insert(from_id, last_included_index);
548 |                 }
549 |             }
550 |             _ => {}
551 |         }
552 |     }
553 | 
554 |     fn process_vote_request_as_follower(
555 |         &mut self,
556 |         from_id: ReplicaID,
557 |         term: usize,
558 |         last_log_index: usize,
559 |         last_log_term: usize,
560 |     ) {
561 |         match self.current_term.cmp(&term) {
562 |             Ordering::Greater => {
563 |                 // Do not vote for Replicas that are behind.
564 |                 self.cluster.lock().unwrap().send_message(
565 |                     from_id,
566 |                     Message::VoteResponse {
567 |                         from_id: self.id,
568 |                         term: self.current_term,
569 |                         vote_granted: false,
570 |                     },
571 |                 );
572 |             }
573 |             Ordering::Less => {
574 |                 // Become a follower if the other replica's term is higher.
575 |                 self.cluster.lock().unwrap().register_leader(None);
576 |                 self.become_follower(term);
577 |             }
578 |             _ => {}
579 |         }
580 | 
581 |         let self_last_log_index = self.get_last_log_index();
582 |         let self_last_log_term = self.get_last_log_term();
583 |         if (self.voted_for == None || self.voted_for == Some(from_id))
584 |             && self_last_log_index <= last_log_index
585 |             && self_last_log_term <= last_log_term
586 |         {
587 |             // If the criteria are met, grant the vote.
588 |             let mut cluster = self.cluster.lock().unwrap();
589 |             cluster.register_leader(None);
590 |             cluster.send_message(
591 |                 from_id,
592 |                 Message::VoteResponse {
593 |                     from_id: self.id,
594 |                     term: self.current_term,
595 |                     vote_granted: true,
596 |                 },
597 |             );
598 |             self.voted_for = Some(from_id);
599 |             return;
600 |         }
601 | 
602 |         // If the criteria are not met or if already voted for someone else, do
603 |         // not grant the vote.
604 |         self.cluster.lock().unwrap().send_message(
605 |             from_id,
606 |             Message::VoteResponse {
607 |                 from_id: self.id,
608 |                 term: self.current_term,
609 |                 vote_granted: false,
610 |             },
611 |         );
612 |     }
613 | 
614 |     fn process_install_snapshot_request_as_follower(
615 |         &mut self,
616 |         from_id: ReplicaID,
617 |         term: usize,
618 |         last_included_index: usize,
619 |         last_included_term: usize,
620 |         _offset: usize,
621 |         data: D,
622 |         _done: bool,
623 |     ) {
624 |         if self.current_term > term {
625 |             self.cluster.lock().unwrap().send_message(
626 |                 from_id,
627 |                 Message::InstallSnapshotResponse {
628 |                     from_id: self.id,
629 |                     term: self.current_term,
630 |                     last_included_index: self.get_last_log_index(),
631 |                 },
632 |             );
633 |             return;
634 |         }
635 | 
636 |         let snapshot = Snapshot {
637 |             last_included_index,
638 |             last_included_term,
639 |             data,
640 |         };
641 | 
642 |         // Retain only logs not already in the snapshot. These logs are
643 |         // guaranteed to not be committed yet (otherwise we wouldn't be
644 |         // receiving the snapshot in the first place), so it is correct to
645 |         // restore StateMachine state from the snapshot.
646 |         let mut state_machine = self.state_machine.lock().unwrap();
647 |         self.log.retain(|l| l.index > last_included_index);
648 |         state_machine.set_snapshot(snapshot.clone());
649 |         self.snapshot = Some(snapshot);
650 |         self.index_offset = last_included_index + 1;
651 |         self.commit_index = last_included_index;
652 |         self.last_applied = last_included_index;
653 |         // It is likely that the snapshot contained new information, so we need
654 |         // to update our current term.
655 |         self.current_term = self.get_last_log_term();
656 |         self.cluster.lock().unwrap().send_message(
657 |             from_id,
658 |             Message::InstallSnapshotResponse {
659 |                 from_id: self.id,
660 |                 term: self.current_term,
661 |                 last_included_index: self.get_last_log_index(),
662 |             },
663 |         );
664 |     }
665 | 
666 |     fn process_append_entry_request_as_follower(
667 |         &mut self,
668 |         from_id: ReplicaID,
669 |         term: usize,
670 |         prev_log_index: usize,
671 |         prev_log_term: usize,
672 |         entries: Vec<LogEntry<T>>,
673 |         commit_index: usize,
674 |     ) {
675 |         // Check that the leader's term is at least as large as ours.
676 |         if self.current_term > term {
677 |             self.cluster.lock().unwrap().send_message(
678 |                 from_id,
679 |                 Message::AppendEntryResponse {
680 |                     from_id: self.id,
681 |                     term: self.current_term,
682 |                     success: false,
683 |                     last_index: self.get_last_log_index(),
684 |                     mismatch_index: None,
685 |                 },
686 |             );
687 |             return;
688 |         }
689 | 
690 |         // If our log doesn't contain an entry at prev_log_index with the
691 |         // prev_log_term term, reply false.
692 |         if prev_log_index >= self.log.len() + self.index_offset
693 |             || self.get_term_at_index(prev_log_index).unwrap() != prev_log_term
694 |         {
695 |             self.cluster.lock().unwrap().send_message(
696 |                 from_id,
697 |                 Message::AppendEntryResponse {
698 |                     from_id: self.id,
699 |                     term: self.current_term,
700 |                     success: false,
701 |                     last_index: self.get_last_log_index(),
702 |                     mismatch_index: Some(prev_log_index),
703 |                 },
704 |             );
705 |             return;
706 |         }
707 | 
708 |         self.process_entries(entries);
709 | 
710 |         // Update local commit index to either the received commit index or the
711 |         // latest local log position, whichever is smaller.
712 |         if commit_index > self.commit_index && !self.log.is_empty() {
713 |             self.commit_index = cmp::min(commit_index, self.log[self.log.len() - 1].index);
714 |         }
715 | 
716 |         let mut cluster = self.cluster.lock().unwrap();
717 |         cluster.register_leader(Some(from_id));
718 |         cluster.send_message(
719 |             from_id,
720 |             Message::AppendEntryResponse {
721 |                 from_id: self.id,
722 |                 term: self.current_term,
723 |                 success: true,
724 |                 last_index: self.get_last_log_index(),
725 |                 mismatch_index: None,
726 |             },
727 |         );
728 |     }
729 | 
730 |     fn process_entries(&mut self, entries: Vec<LogEntry<T>>) {
731 |         let mut state_machine = self.state_machine.lock().unwrap();
732 |         for entry in entries {
733 |             // Drop local inconsistent logs.
734 |             if entry.index <= self.get_last_log_index()
735 |                 && entry.term != self.get_term_at_index(entry.index).unwrap()
736 |             {
737 |                 for i in entry.index..self.log.len() {
738 |                     state_machine.register_transition_state(
739 |                         self.log[i].transition.get_id(),
740 |                         TransitionState::Abandoned(TransitionAbandonedReason::ConflictWithLeader),
741 |                     );
742 |                 }
743 |                 self.log.truncate(entry.index);
744 |             }
745 | 
746 |             // Push received logs.
747 |             if entry.index == self.log.len() + self.index_offset {
748 |                 self.log.push(entry);
749 |             }
750 |         }
751 |     }
752 | 
753 |     fn process_message_as_follower(&mut self, message: Message<T, D>) {
754 |         match message {
755 |             Message::VoteRequest {
756 |                 from_id,
757 |                 term,
758 |                 last_log_index,
759 |                 last_log_term,
760 |             } => {
761 |                 self.process_vote_request_as_follower(from_id, term, last_log_index, last_log_term)
762 |             }
763 |             Message::AppendEntryRequest {
764 |                 term,
765 |                 from_id,
766 |                 prev_log_index,
767 |                 prev_log_term,
768 |                 entries,
769 |                 commit_index,
770 |             } => self.process_append_entry_request_as_follower(
771 |                 from_id,
772 |                 term,
773 |                 prev_log_index,
774 |                 prev_log_term,
775 |                 entries,
776 |                 commit_index,
777 |             ),
778 |             Message::InstallSnapshotRequest {
779 |                 from_id,
780 |                 term,
781 |                 last_included_index,
782 |                 last_included_term,
783 |                 offset,
784 |                 data,
785 |                 done,
786 |             } => self.process_install_snapshot_request_as_follower(
787 |                 from_id,
788 |                 term,
789 |                 last_included_index,
790 |                 last_included_term,
791 |                 offset,
792 |                 data,
793 |                 done,
794 |             ),
795 |             _ => { /* ignore */ }
796 |         }
797 |     }
798 | 
799 |     fn process_message_as_candidate(&mut self, message: Message<T, D>) {
800 |         match message {
801 |             Message::AppendEntryRequest { term, from_id, .. } => {
802 |                 self.process_append_entry_request_as_candidate(term, from_id, message)
803 |             }
804 |             Message::VoteRequest { term, from_id, .. } => {
805 |                 self.process_vote_request_as_candidate(term, from_id, message)
806 |             }
807 |             Message::VoteResponse {
808 |                 from_id,
809 |                 term,
810 |                 vote_granted,
811 |             } => self.process_vote_response_as_candidate(from_id, term, vote_granted),
812 |             Message::InstallSnapshotRequest { from_id, term, .. } => {
813 |                 self.process_install_snapshot_request_as_candidate(from_id, term, message)
814 |             }
815 |             _ => { /* ignore */ }
816 |         }
817 |     }
818 | 
819 |     fn process_install_snapshot_request_as_candidate(
820 |         &mut self,
821 |         from_id: ReplicaID,
822 |         term: usize,
823 |         message: Message<T, D>,
824 |     ) {
825 |         // If the term is greater or equal to current term, then there's an
826 |         // active Leader, so convert self to a follower. If the term is smaller
827 |         // than the current term, inform the sender of your current term.
828 |         if term >= self.current_term {
829 |             self.cluster.lock().unwrap().register_leader(None);
830 |             self.become_follower(term);
831 |             self.process_message(message);
832 |         } else {
833 |             self.cluster.lock().unwrap().send_message(
834 |                 from_id,
835 |                 Message::InstallSnapshotResponse {
836 |                     from_id: self.id,
837 |                     last_included_index: self.get_last_log_index(),
838 |                     term: self.current_term,
839 |                 },
840 |             );
841 |         }
842 |     }
843 | 
844 |     fn process_vote_response_as_candidate(
845 |         &mut self,
846 |         from_id: ReplicaID,
847 |         term: usize,
848 |         vote_granted: bool,
849 |     ) {
850 |         if term > self.current_term {
851 |             self.cluster.lock().unwrap().register_leader(None);
852 |             self.become_follower(term);
853 |         } else if vote_granted && term == self.current_term {
854 |             // Record that the vote has been granted.
855 |             if let Some(cur_votes) = &mut self.current_votes {
856 |                 cur_votes.insert(from_id);
857 |                 // If more than half of the cluster has voted for the Replica
858 |                 // (the Replica itself included), it's time to become the
859 |                 // Leader.
860 |                 if cur_votes.len() * 2 > self.peer_ids.len() {
861 |                     self.become_leader();
862 |                 }
863 |             }
864 |         }
865 |     }
866 | 
867 |     fn process_vote_request_as_candidate(
868 |         &mut self,
869 |         term: usize,
870 |         from_id: ReplicaID,
871 |         message: Message<T, D>,
872 |     ) {
873 |         if term > self.current_term {
874 |             self.cluster.lock().unwrap().register_leader(None);
875 |             self.become_follower(term);
876 |             self.process_message(message);
877 |         } else {
878 |             self.cluster.lock().unwrap().send_message(
879 |                 from_id,
880 |                 Message::VoteResponse {
881 |                     from_id: self.id,
882 |                     term: self.current_term,
883 |                     vote_granted: false,
884 |                 },
885 |             );
886 |         }
887 |     }
888 | 
889 |     fn process_append_entry_request_as_candidate(
890 |         &mut self,
891 |         term: usize,
892 |         from_id: ReplicaID,
893 |         message: Message<T, D>,
894 |     ) {
895 |         if term >= self.current_term {
896 |             self.cluster.lock().unwrap().register_leader(None);
897 |             self.become_follower(term);
898 |             self.process_message(message);
899 |         } else {
900 |             self.cluster.lock().unwrap().send_message(
901 |                 from_id,
902 |                 Message::AppendEntryResponse {
903 |                     from_id: self.id,
904 |                     term: self.current_term,
905 |                     success: false,
906 |                     last_index: self.get_last_log_index(),
907 |                     mismatch_index: None,
908 |                 },
909 |             );
910 |         }
911 |     }
912 | 
913 |     fn become_leader(&mut self) {
914 |         self.cluster.lock().unwrap().register_leader(Some(self.id));
915 |         self.state = State::Leader;
916 |         self.current_votes = None;
917 |         self.voted_for = None;
918 |         self.next_index = BTreeMap::new();
919 |         self.match_index = BTreeMap::new();
920 |         for peer_id in &self.peer_ids {
921 |             self.next_index
922 |                 .insert(*peer_id, self.log.len() + self.index_offset);
923 |             self.match_index.insert(*peer_id, 0);
924 |         }
925 | 
926 |         // If the previous Leader had some uncommitted entries that were
927 |         // replicated to this now-Leader server, this replica will not commit
928 |         // them until its commit index advances to a log entry appended in this
929 |         // Leader's term. To carry out this operation as soon as the new Leader
930 |         // emerges, append a no-op entry. This is a neat optimization described
931 |         // in the part 8 of the paper.
932 |         self.log.push(LogEntry {
933 |             index: self.log.len() + self.index_offset,
934 |             transition: self.noop_transition.clone(),
935 |             term: self.current_term,
936 |         });
937 |     }
938 | 
939 |     fn become_follower(&mut self, term: usize) {
940 |         self.current_term = term;
941 |         self.state = State::Follower;
942 |         self.current_votes = None;
943 |         self.voted_for = None;
944 |     }
945 | 
946 |     fn become_candidate(&mut self) {
947 |         // Increase current term.
948 |         self.current_term += 1;
949 |         // Claim yourself a candidate.
950 |         self.state = State::Candidate;
951 |         // Initialize votes. Vote for yourself.
952 |         let mut votes = BTreeSet::new();
953 |         votes.insert(self.id);
954 |         self.current_votes = Some(Box::new(votes));
955 |         self.voted_for = Some(self.id);
956 |         // Fan out vote requests.
957 |         self.broadcast_message(|_: usize| Message::VoteRequest {
958 |             from_id: self.id,
959 |             term: self.current_term,
960 |             last_log_index: self.get_last_log_index(),
961 |             last_log_term: self.get_last_log_term(),
962 |         });
963 | 
964 |         if self.peer_ids.is_empty() {
965 |             self.become_leader();
966 |         }
967 |     }
968 | 
969 |     fn get_last_log_index(&self) -> usize {
970 |         if let Some(log) = self.log.last() {
971 |             log.index
972 |         } else {
973 |             self.index_offset - 1
974 |         }
975 |     }
976 | 
977 |     fn get_last_log_term(&self) -> usize {
978 |         if let Some(log) = self.log.last() {
979 |             log.term
980 |         } else {
981 |             self.snapshot.as_ref().unwrap().last_included_term
982 |         }
983 |     }
984 | }
985 | 


--------------------------------------------------------------------------------