├── .github
    └── workflows
    │   └── rust.yml
├── .gitignore
├── .rustfmt.toml
├── .travis.yml
├── Cargo.toml
├── LICENSE
├── README.md
├── build.rs
├── design.md
└── src
    ├── async_ch.rs
    ├── async_rt.rs
    ├── conf_change
        ├── conf_change.rs
        ├── datadriven_test.rs
        ├── mod.rs
        ├── quick_test.rs
        ├── restore.rs
        └── testdata
        │   ├── joint_autoleave.txt
        │   ├── joint_idempotency.txt
        │   ├── joint_learners_next.txt
        │   ├── joint_safety.txt
        │   ├── simple_idempotency.txt
        │   ├── simple_promote_demote.txt
        │   ├── simple_safety.txt
        │   ├── update.txt
        │   └── zero.txt
    ├── lib.rs
    ├── node.rs
    ├── nom_data_test
        └── mod.rs
    ├── paper_test.rs
    ├── protocol
        ├── mod.rs
        └── raft.proto
    ├── quorum
        ├── data_driven_test.rs
        ├── joint.rs
        ├── majority.rs
        ├── majority_vote.txt
        ├── mod.rs
        ├── quick_test.rs
        ├── quorum.rs
        └── testdata
        │   ├── joint_commit.txt
        │   ├── joint_vote.txt
        │   ├── majority_commit.txt
        │   └── majority_vote.txt
    ├── raft.rs
    ├── raft_flow_control_test.rs
    ├── raft_log.rs
    ├── raft_snap_test.rs
    ├── raft_test.rs
    ├── raftpb
        ├── .gitignore
        ├── gogoproto
        │   ├── .gitignore
        │   └── gogo.proto
        ├── mod.rs
        └── raft.proto
    ├── rawnode.rs
    ├── read_only.rs
    ├── status.rs
    ├── storage.rs
    ├── tests_util.rs
    ├── tracker
        ├── inflights.rs
        ├── mod.rs
        ├── progress.rs
        └── state.rs
    ├── unstable.rs
    └── util
        └── mod.rs


/.github/workflows/rust.yml:
--------------------------------------------------------------------------------
 1 | name: Rust
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 | jobs:
10 |   build:
11 | 
12 |     runs-on: ubuntu-latest
13 | 
14 |     steps:
15 |       - uses: actions/checkout@v2
16 |       - name: Install latest nightly
17 |         uses: actions-rs/toolchain@v1
18 |         with:
19 |             toolchain: nightly
20 |             override: true
21 |         
22 |       - name: Install Protoc
23 |         uses: arduino/setup-protoc@master
24 |           
25 |       - name: Build
26 |         run: cargo build --verbose
27 |       - name: Run tests
28 |         run: RUST_LOG=debug cargo test --verbose -- --test-threads=1
29 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Generated by Cargo
 2 | # will have compiled files and executables
 3 | /target/
 4 | 
 5 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
 6 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
 7 | Cargo.lock
 8 | 
 9 | # These are backup files generated by rustfmt
10 | **/*.rs.bk
11 | 


--------------------------------------------------------------------------------
/.rustfmt.toml:
--------------------------------------------------------------------------------
1 | reorder_imports = true


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: rust
 2 | rust:
 3 |   - nightly
 4 | jobs:
 5 |   allow_failures:
 6 |     - rust: nightly
 7 |   fast_finish: true
 8 | install:
 9 |   - curl -L https://github.com/google/protobuf/releases/download/v3.3.0/protoc-3.3.0-linux-x86_64.zip -o /tmp/protoc.zip
10 |   - unzip /tmp/protoc.zip -d $HOME/protoc
11 | env:
12 |   - PATH=$HOME/protoc/bin:$PATH
13 | script:
14 |   - cargo build --verbose --all
15 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "eraft-rs"
 3 | version = "0.1.0"
 4 | authors = ["Rg"]
 5 | edition = "2018"
 6 | 
 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 8 | 
 9 | [dependencies]
10 | log = "0.4.14"
11 | protobuf = { version = "2.27.1", features = ["with-bytes"] }
12 | bytes = "1.1.0"
13 | anyhow = "1.0.53"
14 | thiserror = "1.0.30"
15 | getset = "0.1.2"
16 | nom = "7.1.0"
17 | chrono = "0.4.19"
18 | env_logger = "0.9.0"
19 | rand = "0.8.5"
20 | serde = { version = "1.0.136", features = ["derive"] }
21 | serde_json = "1.0.79"
22 | tokio = { version = "1.16.1", features = ["full"] }
23 | futures = { version = "0.3.21", default-features = true }
24 | lazy_static = "1.4.0"
25 | async-channel = "1.6.1"
26 | async-io = "1.6.0"
27 | async-trait = "0.1.52"
28 | [dev-dependencies]
29 | maplit = "1.0.2"
30 | env_logger = "0.9.0"
31 | [build-dependencies]
32 | protoc-rust = "2.27.1"
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # eraft-rs
2 | eraft-rs is raft component of etcd-rs [![Build Status](https://travis-ci.org/laohanlinux/eraft-rs.svg?branch=master)](https://travis-ci.org/laohanlinux/eraft-rs)
3 | 


--------------------------------------------------------------------------------
/build.rs:
--------------------------------------------------------------------------------
 1 | use protoc_rust::Customize;
 2 | 
 3 | fn main() {
 4 |     protoc_rust::Codegen::new()
 5 |         .out_dir("src/raftpb")
 6 |         .inputs(&["src/raftpb/raft.proto"])
 7 |         .includes(&["src/raftpb"])
 8 |         .customize(protoc_rust::Customize {
 9 |             carllerche_bytes_for_bytes: Some(true),
10 |             carllerche_bytes_for_string: Some(true),
11 |             ..Default::default()
12 |         })
13 |         .run()
14 |         .expect("protoc");
15 | }
16 | 


--------------------------------------------------------------------------------
/design.md:
--------------------------------------------------------------------------------
 1 | ## Progress
 2 | 
 3 | Progress represents a follower’s progress in the view of the leader. Leader maintains progresses of all followers, and sends `replication message` to the follower based on its progress.
 4 | 
 5 | `replication message` is a `msgApp` with log entries.
 6 | 
 7 | A progress has two attribute: `match` and `next`. `match` is the index of the highest known matched entry. If leader knows nothing about follower’s replication status, `match` is set to zero. `next` is the index of the first entry that will be replicated to the follower. Leader puts entries from `next` to its latest one in next `replication message`.
 8 | 
 9 | A progress is in one of the three state: `probe`, `replicate`, `snapshot`.
10 | 
11 | ```
12 |                             +--------------------------------------------------------+
13 |                             |                  send snapshot                         |
14 |                             |                                                        |
15 |                   +---------+----------+                                  +----------v---------+
16 |               +--->       probe        |                                  |      snapshot      |
17 |               |   |  max inflight = 1  <----------------------------------+  max inflight = 0  |
18 |               |   +---------+----------+                                  +--------------------+
19 |               |             |            1. snapshot success
20 |               |             |               (next=snapshot.index + 1)
21 |               |             |            2. snapshot failure
22 |               |             |               (no change)
23 |               |             |            3. receives msgAppResp(rej=false&&index>lastsnap.index)
24 |               |             |               (match=m.index,next=match+1)
25 | receives msgAppResp(rej=true)
26 | (next=match+1)|             |
27 |               |             |
28 |               |             |
29 |               |             |   receives msgAppResp(rej=false&&index>match)
30 |               |             |   (match=m.index,next=match+1)
31 |               |             |
32 |               |             |
33 |               |             |
34 |               |   +---------v----------+
35 |               |   |     replicate      |
36 |               +---+  max inflight = n  |
37 |                   +--------------------+
38 | ```
39 | 
40 | When the progress of a follower is in `probe` state, leader sends at most one `replication message` per heartbeat interval. The leader sends `replication message` slowly and probing the actual progress of the follower. A `msgHeartbeatResp` or a `msgAppResp` with reject might trigger the sending of the next `replication message`.
41 | 
42 | When the progress of a follower is in `replicate` state, leader sends `replication message`, then optimistically increases `next` to the latest entry sent. This is an optimized state for fast replicating log entries to the follower.
43 | 
44 | When the progress of a follower is in `snapshot` state, leader stops sending any `replication message`.
45 | 
46 | A newly elected leader sets the progresses of all the followers to `probe` state with `match` = 0 and `next` = last index. The leader slowly (at most once per heartbeat) sends `replication message` to the follower and probes its progress.
47 | 
48 | A progress changes to `replicate` when the follower replies with a non-rejection `msgAppResp`, which implies that it has matched the index sent. At this point, leader starts to stream log entries to the follower fast. The progress will fall back to `probe` when the follower replies a rejection `msgAppResp` or the link layer reports the follower is unreachable. We aggressively reset `next` to `match`+1 since if we receive any `msgAppResp` soon, both `match` and `next` will increase directly to the `index` in `msgAppResp`. (We might end up with sending some duplicate entries when aggressively reset `next` too low.  see open question)
49 | 
50 | A progress changes from `probe` to `snapshot` when the follower falls very far behind and requires a snapshot. After sending `msgSnap`, the leader waits until the success, failure or abortion of the previous snapshot sent. The progress will go back to `probe` after the sending result is applied.
51 | 
52 | ### Flow Control
53 | 
54 | 1. limit the max size of message sent per message. Max should be configurable.
55 | Lower the cost at probing state as we limit the size per message; lower the penalty when aggressively decreased to a too low `next`
56 | 
57 | 2. limit the # of in flight messages < N when in `replicate` state. N should be configurable. Most implementation will have a sending buffer on top of its actual network transport layer (not blocking raft node). We want to make sure raft does not overflow that buffer, which can cause message dropping and triggering a bunch of unnecessary resending repeatedly.
58 | 


--------------------------------------------------------------------------------
/src/async_ch.rs:
--------------------------------------------------------------------------------
  1 | use std::sync::Arc;
  2 | use std::time::Duration;
  3 | use futures::SinkExt;
  4 | use async_channel::{bounded, Sender, Receiver, SendError, RecvError, TryRecvError};
  5 | use env_logger::Env;
  6 | use futures::task::SpawnExt;
  7 | use tokio::select;
  8 | use crate::node::SafeResult;
  9 | use crate::raftpb::raft::Message;
 10 | 
 11 | #[derive(Clone)]
 12 | pub(crate) struct Channel<T> {
 13 |     rx: Option<Receiver<T>>,
 14 |     tx: Option<Sender<T>>,
 15 | }
 16 | 
 17 | impl<T> Channel<T> {
 18 |     pub(crate) fn new(n: usize) -> Self {
 19 |         let (tx, rx) = bounded(n);
 20 |         Channel {
 21 |             rx: Some(rx),
 22 |             tx: Some(tx),
 23 |         }
 24 |     }
 25 |     async fn try_send(&self, msg: T) -> Result<(), SendError<T>> {
 26 |         if let Some(tx) = &self.tx {
 27 |             return tx.send(msg).await;
 28 |         }
 29 |         Ok(())
 30 |     }
 31 | 
 32 |     pub(crate) async fn try_recv(&self) -> Result<T, TryRecvError> {
 33 |         if let Some(rx) = &self.rx {
 34 |             return rx.try_recv();
 35 |         }
 36 |         Err(TryRecvError::Empty)
 37 |     }
 38 | 
 39 |     pub(crate) async fn recv(&self) -> Result<T, async_channel::RecvError> {
 40 |         let rx = self.rx.as_ref().unwrap();
 41 |         rx.recv().await
 42 |     }
 43 | 
 44 |     pub(crate) async fn send(&self, msg: T) -> Result<(), SendError<T>> {
 45 |         let tx = self.tx.as_ref().unwrap();
 46 |         tx.send(msg).await
 47 |     }
 48 | 
 49 |     pub(crate) fn tx(&self) -> Sender<T> {
 50 |         self.tx.as_ref().unwrap().clone()
 51 |     }
 52 | 
 53 |     pub(crate) fn take_tx(&mut self) -> Option<Sender<T>> {
 54 |         self.tx.take()
 55 |     }
 56 | }
 57 | 
 58 | #[derive(Clone)]
 59 | pub(crate) struct MsgWithResult {
 60 |     m: Option<Message>,
 61 |     ch: Option<Sender<SafeResult<()>>>,
 62 | }
 63 | 
 64 | impl Default for MsgWithResult {
 65 |     fn default() -> Self {
 66 |         MsgWithResult {
 67 |             m: None,
 68 |             ch: None,
 69 |         }
 70 |     }
 71 | }
 72 | 
 73 | impl MsgWithResult {
 74 |     pub fn new() -> Self {
 75 |         MsgWithResult {
 76 |             m: None,
 77 |             ch: None,
 78 |         }
 79 |     }
 80 | 
 81 |     pub fn new_with_msg(msg: Message) -> Self {
 82 |         MsgWithResult {
 83 |             m: Some(msg),
 84 |             ch: None,
 85 |         }
 86 |     }
 87 | 
 88 |     pub fn new_with_channel(tx: Sender<SafeResult<()>>, msg: Message) -> Self {
 89 |         MsgWithResult {
 90 |             m: Some(msg),
 91 |             ch: Some(tx),
 92 |         }
 93 |     }
 94 | 
 95 |     pub fn get_msg(&self) -> Option<&Message> {
 96 |         self.m.as_ref()
 97 |     }
 98 | 
 99 |     pub(crate) async fn notify(&self, msg: SafeResult<()>) {
100 |         if let Some(sender) = &self.ch {
101 |             sender.send(msg).await;
102 |         }
103 |     }
104 | 
105 |     pub(crate) async fn notify_and_close(&mut self, msg: SafeResult<()>) {
106 |         if let Some(sender) = self.ch.take() {
107 |             sender.send(msg).await;
108 |             sender.close();
109 |         }
110 |     }
111 | }
112 | 


--------------------------------------------------------------------------------
/src/async_rt.rs:
--------------------------------------------------------------------------------
 1 | use std::future::Future;
 2 | use tokio::runtime::Handle;
 3 | use tokio::time::error::Elapsed;
 4 | use tokio::task;
 5 | use tokio::time::{self, Duration};
 6 | 
 7 | pub(crate) fn wait_timeout<F>(d: Duration, fut: F) -> Result<F::Output, Elapsed>
 8 |     where F: Future + Send + 'static, F::Output: Send + 'static
 9 | {
10 |     task::block_in_place(move || {
11 |         Handle::current().block_on(async move {
12 |             time::timeout(d, fut).await
13 |         })
14 |     })
15 | }
16 | 
17 | pub(crate) fn sleep(d: Duration) {
18 |     task::block_in_place(move || {
19 |         Handle::current().block_on(async move {
20 |             time::sleep(d).await
21 |         });
22 |     });
23 | }
24 | 
25 | pub(crate) fn wait<F>(fut: F) -> F::Output
26 |     where F: Future + Send + 'static, F::Output: Send + 'static {
27 |     tokio::task::block_in_place(move || {
28 |         Handle::current().block_on(async move {
29 |             fut.await
30 |         })
31 |     })
32 | }
33 | 
34 | #[test]
35 | fn it_works() {}


--------------------------------------------------------------------------------
/src/conf_change/datadriven_test.rs:
--------------------------------------------------------------------------------
  1 | #[cfg(test)]
  2 | mod test {
  3 |     use crate::conf_change::conf_change::Changer;
  4 |     use crate::nom_data_test::{execute_test, walk};
  5 |     use crate::raftpb::raft::{ConfChange, ConfChangeSingle, ConfChangeType};
  6 |     use crate::tracker::progress::ProgressMap;
  7 |     use crate::tracker::{Config, ProgressTracker};
  8 |     use env_logger::init;
  9 |     use protobuf::ProtobufEnum;
 10 |     use std::convert::AsMut;
 11 | 
 12 |     #[test]
 13 |     fn t_conf_data_driven() {
 14 |         // flexi_logger::Logger::with_env().start();
 15 |         walk("src/conf_change/testdata", |p| {
 16 |             let mut tr = ProgressTracker::new(10);
 17 |             let mut c = Changer {
 18 |                 tracker: tr,
 19 |                 last_index: 0, // incremented in this test with each cmd
 20 |             };
 21 |             execute_test(p, "--------------------------------", |data| -> String {
 22 |                 // The test files use the commands
 23 |                 // - simple: run a simple conf change (i.e. no joint consensus),
 24 |                 // - enter-joint: enter a joint config, and
 25 |                 // - leave-joint: leave a joint config
 26 |                 // The first two take a list of config changes, which have the following
 27 |                 // syntax:
 28 |                 // - vn: make a voter,
 29 |                 // - ln: make n a learner,
 30 |                 // - rn: remove n, and
 31 |                 // - un: update n
 32 |                 let mut ccs: Vec<ConfChangeSingle> = vec![];
 33 |                 let mut auto_leave = false;
 34 |                 for cmd_arg in data.cmd_args.iter() {
 35 |                     let mut cc = ConfChangeSingle::new();
 36 |                     match cmd_arg.key.as_str() {
 37 |                         "v" => {
 38 |                             cc.set_field_type(ConfChangeType::ConfChangeAddNode);
 39 |                         }
 40 |                         "l" => cc.set_field_type(ConfChangeType::ConfChangeAddLearnerNode),
 41 |                         "r" => cc.set_field_type(ConfChangeType::ConfChangeRemoveNode),
 42 |                         "u" => cc.set_field_type(ConfChangeType::ConfChangeUpdateNode),
 43 |                         "autoleave" => {
 44 |                             auto_leave = cmd_arg.vals[0].parse().unwrap();
 45 |                         }
 46 |                         u => panic!("unknown input: {}", u),
 47 |                     }
 48 |                     if cmd_arg.key.as_str() != "autoleave" {
 49 |                         let id = cmd_arg.vals[0].parse().unwrap();
 50 |                         cc.set_node_id(id);
 51 |                         ccs.push(cc);
 52 |                     }
 53 |                 }
 54 | 
 55 |                 let mut cfg = Config::default();
 56 |                 let mut prs = ProgressMap::default();
 57 |                 match data.cmd.as_str() {
 58 |                     "simple" => match c.simple(&mut ccs) {
 59 |                         Ok((new_cfg, new_prs)) => {
 60 |                             cfg = new_cfg;
 61 |                             prs = new_prs;
 62 |                         }
 63 |                         e => {
 64 |                             c.last_index += 1;
 65 |                             return e.unwrap_err();
 66 |                         }
 67 |                     },
 68 |                     "enter-joint" => match c.enter_joint(auto_leave, &mut ccs) {
 69 |                         Ok((new_cfg, new_prs)) => {
 70 |                             cfg = new_cfg;
 71 |                             prs = new_prs;
 72 |                         }
 73 |                         e => {
 74 |                             c.last_index += 1;
 75 |                             return e.unwrap_err();
 76 |                         }
 77 |                     },
 78 |                     "leave-joint" => {
 79 |                         info!("ccs {:?}", ccs);
 80 |                         if !ccs.is_empty() {
 81 |                             return "this command takes no input".to_owned();
 82 |                         }
 83 |                         match c.leave_joint() {
 84 |                             Ok((new_cfg, new_prs)) => {
 85 |                                 cfg = new_cfg;
 86 |                                 prs = new_prs;
 87 |                             }
 88 |                             e => {
 89 |                                 c.last_index += 1;
 90 |                                 return e.unwrap_err();
 91 |                             }
 92 |                         }
 93 |                     }
 94 |                     u => panic!("unknown command: {}", u),
 95 |                 }
 96 |                 c.tracker.config = cfg;
 97 |                 c.tracker.progress = prs;
 98 |                 c.last_index += 1;
 99 |                 format!("{}\n{}", c.tracker.config, c.tracker.progress)
100 |             })
101 |         });
102 |     }
103 | }
104 | 


--------------------------------------------------------------------------------
/src/conf_change/mod.rs:
--------------------------------------------------------------------------------
 1 | use crate::raftpb::raft::{ConfChangeSingle, ConfChangeType};
 2 | 
 3 | pub mod conf_change;
 4 | mod datadriven_test;
 5 | mod quick_test;
 6 | pub mod restore;
 7 | 
 8 | pub(crate) fn new_conf_change_single(id: u64, typ: ConfChangeType) -> ConfChangeSingle {
 9 |     let mut ccs = ConfChangeSingle::new();
10 |     ccs.set_node_id(id);
11 |     ccs.set_field_type(typ);
12 |     ccs
13 | }
14 | 


--------------------------------------------------------------------------------
/src/conf_change/quick_test.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 The etcd Authors
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | #[cfg(test)]
 16 | mod tests {
 17 |     use crate::conf_change::conf_change::Changer;
 18 |     use crate::raftpb::raft::{ConfChange, ConfChangeSingle, ConfChangeType};
 19 |     use crate::tracker::ProgressTracker;
 20 |     use protobuf::ProtobufEnum;
 21 |     use rand::Rng;
 22 |     use std::fmt::Error;
 23 | 
 24 |     // uses quick_check to verify that simple and joint config
 25 |     // changes arrive at the same result.
 26 |     #[test]
 27 |     fn t_conf_change_quick() {
 28 |         // flexi_logger::Logger::with_env().start();
 29 |         let count = 1000;
 30 |         // log the first couple of runs of give some indication of things working
 31 |         // as intended.
 32 |         const info_count: usize = 5;
 33 | 
 34 |         for i in 0..count {
 35 |             let (simple_change, mut ccs) = wrapper().unwrap();
 36 |             let mut epoch_cc = ccs.drain(..1).collect::<Vec<_>>();
 37 |             let mut tr = ProgressTracker::new(10);
 38 |             let mut c = Changer {
 39 |                 tracker: tr,
 40 |                 last_index: 10,
 41 |             };
 42 |             let ret = c.simple(&mut epoch_cc);
 43 |             assert!(ret.is_ok());
 44 |             c.tracker.config = ret.as_ref().unwrap().0.clone();
 45 |             c.tracker.progress = ret.as_ref().unwrap().1.clone();
 46 |             let ret = with_joint(&mut c, &mut ccs);
 47 |             assert!(ret.is_ok());
 48 |             assert_eq!(simple_change, c);
 49 |         }
 50 |     }
 51 | 
 52 |     fn gen_cc(
 53 |         num: impl Fn() -> usize,
 54 |         id: impl Fn() -> u64,
 55 |         typ: impl Fn() -> ConfChangeType,
 56 |     ) -> Vec<ConfChangeSingle> {
 57 |         let mut ccs = Vec::new();
 58 |         let n = num();
 59 |         for i in 0..n {
 60 |             let mut cc = ConfChangeSingle::new();
 61 |             cc.set_field_type(typ());
 62 |             cc.set_node_id(id());
 63 |             ccs.push(cc);
 64 |         }
 65 |         ccs
 66 |     }
 67 | 
 68 |     fn wrapper() -> Result<(Changer, Vec<ConfChangeSingle>), String> {
 69 |         let mut ccs = gen_cc(
 70 |             || -> usize {
 71 |                 let mut r = rand::thread_rng();
 72 |                 r.gen_range(1..9) + 1
 73 |             },
 74 |             || -> u64 {
 75 |                 let mut r = rand::thread_rng();
 76 |                 r.gen_range(1..9) + 1
 77 |             },
 78 |             || -> ConfChangeType {
 79 |                 let mut r = rand::thread_rng();
 80 |                 let n = ConfChangeType::values().len();
 81 |                 let em = r.gen_range(0..n);
 82 |                 ConfChangeType::from_i32(em as i32).unwrap()
 83 |             },
 84 |         );
 85 |         let mut epoch_cc = ConfChangeSingle::new();
 86 |         epoch_cc.set_node_id(1);
 87 |         epoch_cc.set_field_type(ConfChangeType::ConfChangeAddNode);
 88 |         ccs.push(epoch_cc);
 89 |         ccs.reverse();
 90 | 
 91 |         let ccs_copy = ccs.clone();
 92 | 
 93 |         let mut tr = ProgressTracker::new(10);
 94 |         let mut c = Changer {
 95 |             tracker: tr,
 96 |             last_index: 10,
 97 |         };
 98 |         with_simple(&mut c, &mut ccs).map(|_| (c, ccs_copy))
 99 |     }
100 | 
101 |     fn with_simple(c: &mut Changer, ccs: &mut [ConfChangeSingle]) -> Result<(), String> {
102 |         for cc in ccs.iter() {
103 |             let mut ccs = Vec::new();
104 |             ccs.push(cc.clone());
105 |             let (cfg, prs) = c.simple(&mut ccs)?;
106 |             c.tracker.config = cfg;
107 |             c.tracker.progress = prs;
108 |         }
109 |         Ok(())
110 |     }
111 | 
112 |     fn with_joint(c: &mut Changer, ccs: &mut [ConfChangeSingle]) -> Result<(), String> {
113 |         let (cfg, prs) = c.enter_joint(false, ccs)?;
114 |         // Also do this with auto_leave on, just to check that we'd get the same
115 |         // result.
116 |         let (mut cfg2a, mut prs2a) = c.enter_joint(true, ccs)?;
117 |         cfg2a.auto_leave = false;
118 |         assert_eq!(cfg, cfg2a);
119 |         assert_eq!(prs, prs2a);
120 | 
121 |         c.tracker.config = cfg.clone();
122 |         c.tracker.progress = prs.clone();
123 |         let (mut cfg2b, mut prs2b) = c.leave_joint()?;
124 |         // Reset back to the main branch with auto_leave = false.
125 |         c.tracker.config = cfg.clone();
126 |         c.tracker.progress = prs.clone();
127 |         let (cfg, prs) = c.leave_joint()?;
128 |         assert_eq!(cfg, cfg2b);
129 |         assert_eq!(prs, prs2b);
130 | 
131 |         c.tracker.config = cfg.clone();
132 |         c.tracker.progress = prs.clone();
133 | 
134 |         Ok(())
135 |     }
136 | }
137 | 


--------------------------------------------------------------------------------
/src/conf_change/restore.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 The etcd Authors
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | use crate::conf_change::conf_change::Changer;
 16 | use crate::conf_change::new_conf_change_single;
 17 | use crate::raftpb::raft::ConfChangeType::{
 18 |     ConfChangeAddLearnerNode, ConfChangeAddNode, ConfChangeRemoveNode,
 19 | };
 20 | use crate::raftpb::raft::{ConfChange, ConfChangeSingle, ConfChangeType, ConfState};
 21 | use crate::tracker::progress::ProgressMap;
 22 | use crate::tracker::Config;
 23 | 
 24 | // toConfChangeSingle translates a conf state into 1) a slice of operations creating
 25 | // first the config that will become the outgoing one, and then the incoming one, and
 26 | // b) another slice that, when applied to the config resulted from 1), respresents the
 27 | // ConfState.
 28 | fn to_conf_change_single(cs: &ConfState) -> (Vec<ConfChangeSingle>, Vec<ConfChangeSingle>) {
 29 |     // Example to follow along this code:
 30 |     // voters=(1 2 3) learners=(5) outgoing=(1 2 4 6) learners_next=(4)
 31 |     //
 32 |     // This means that before entering the joint config, the configuration
 33 |     // had voters (1 2 4 6) and perhaps some learners that are already gone.
 34 |     // The new set of voters is (1 2 3), i.e. (1 2) were kept around, and (4 6)
 35 |     // are no longer voters; however 4 is poised to become a learner upon leaving
 36 |     // the joint state.
 37 |     // We can't tell whether 5 was a learner before entering the joint config,
 38 |     // but it doesn't matter (we'll pretend that it wasn't)
 39 |     //
 40 |     // The code below will construct
 41 |     // outgoing = add 1; add 2; add 4; add 6
 42 |     // incoming = remove 1; remove 2; remove 4; remove 6
 43 |     // outgoing    add 1; add 2; add 3;
 44 |     // incoming    add-learner 5;
 45 |     //             add-learner 4;
 46 |     // So, when starting with an empty config, after applying 'outgoing' we have
 47 |     //
 48 |     //  quorum=(1 2 4 6)
 49 |     //
 50 |     // From which we enter a joint state via 'incoming'
 51 |     //  quorum=(1 2 3)&&(1 2 4 6) learner=(5) learners_next=(4)
 52 |     //
 53 |     // as desired.
 54 | 
 55 |     let mut outgoing = Vec::new();
 56 |     let mut incoming = Vec::new();
 57 |     for id in cs.get_voters_outgoing() {
 58 |         // If there are outgoing voters, first add them one by one so that the
 59 |         // (non-joint) config has them all.
 60 |         outgoing.push(new_conf_change_single(
 61 |             *id,
 62 |             ConfChangeType::ConfChangeAddNode,
 63 |         ));
 64 |     }
 65 |     // We're done constructing the outgoing slice, now on to the incoming one
 66 |     // (which will apply on top of the config created by the outgoing slice).
 67 | 
 68 |     // First, we'll remove all of the outgoing voters.
 69 |     for id in cs.get_voters_outgoing() {
 70 |         incoming.push(new_conf_change_single(
 71 |             *id,
 72 |             ConfChangeType::ConfChangeRemoveNode,
 73 |         ));
 74 |     }
 75 |     // Then we'll add the incoming voters and learners.
 76 |     for id in cs.get_voters() {
 77 |         incoming.push(new_conf_change_single(
 78 |             *id,
 79 |             ConfChangeType::ConfChangeAddNode,
 80 |         ));
 81 |     }
 82 |     for id in cs.get_learners() {
 83 |         incoming.push(new_conf_change_single(
 84 |             *id,
 85 |             ConfChangeType::ConfChangeAddLearnerNode,
 86 |         ));
 87 |     }
 88 |     // Same for LeanersNext; these are nodes we want to be learners but which
 89 |     // are currently voters in the outgoing config.
 90 |     for id in cs.get_learners_next() {
 91 |         incoming.push(new_conf_change_single(
 92 |             *id,
 93 |             ConfChangeType::ConfChangeAddLearnerNode,
 94 |         ))
 95 |     }
 96 |     (outgoing, incoming)
 97 | }
 98 | 
 99 | // pub fn chain(chg ConfChange, ops: impl )
100 | 
101 | /// takes a Changer (which must represent an empty configuration), and
102 | /// runs a sequence of changes enacting the configuration described in the
103 | /// ConfState
104 | ///
105 | /// TODO(tbg) it's silly that this takes a Changer. Unravel this by making sure
106 | /// the Changer only needs a ProgressMap (not a whole Tracker) at which point
107 | /// this can just take last_index and max_inflight directly instead and cook up
108 | /// the results from that alone.
109 | pub fn restore(chg: &mut Changer, cs: &ConfState) -> Result<(Config, ProgressMap), String> {
110 |     warn!("execute restore ");
111 |     let (outgoing, mut incoming) = to_conf_change_single(cs);
112 |     if outgoing.is_empty() {
113 |         // No outgoing config, so just apply the incoming changes one by one.
114 |         for cc in incoming.iter() {
115 |             let cc = &mut vec![cc.clone()];
116 |             let (cfg, progress) = chg.simple(cc)?;
117 |             chg.tracker.config = cfg;
118 |             chg.tracker.progress = progress;
119 |         }
120 |     } else {
121 |         // The ConfState describes a joint configuration.
122 |         //
123 |         // First, apply all of the changes of the outgoing config one by one, so
124 |         // that it temporarily becomes the incoming active config. For example,
125 |         // if the config is (1 2 3)&(2 3 4), this will establish (2 3 4)&().
126 |         for cc in outgoing.iter() {
127 |             let cc = &mut vec![cc.clone()];
128 |             let (cfg, progress) = chg.simple(cc)?;
129 |             chg.tracker.config = cfg;
130 |             chg.tracker.progress = progress;
131 |         }
132 | 
133 |         // Now enter the joint state, which rotates the above additions into the
134 |         // outgoing config, and adds the incoming config in. Continuing the
135 |         // example above. we'd get (1 2 3)&(2 3 4), i.e. the incoming operations
136 |         // would be removing 2,3,4 and then adding in 1,2,3 while transitioning
137 |         // into a joint state.
138 |         let (cfg, progress) = chg.enter_joint(cs.get_auto_leave(), &mut *incoming)?;
139 |         chg.tracker.config = cfg;
140 |         chg.tracker.progress = progress;
141 |     }
142 | 
143 |     Ok((chg.tracker.config.clone(), chg.tracker.progress.clone()))
144 | }
145 | 
146 | #[cfg(test)]
147 | mod tests {
148 |     use crate::conf_change::conf_change::Changer;
149 |     use crate::conf_change::restore::restore;
150 |     use crate::raftpb::raft::ConfState;
151 |     use crate::tracker::ProgressTracker;
152 |     use protobuf::reflect::ProtobufValue;
153 |     use rand::prelude::SliceRandom;
154 |     use rand::Rng;
155 | 
156 |     #[test]
157 |     fn t_restore() {
158 |         // flexi_logger::Logger::with_env().start();
159 |         let count = 1000;
160 |         let f = |cs: &mut ConfState| -> bool {
161 |             let mut chg = Changer {
162 |                 tracker: ProgressTracker::new(10),
163 |                 last_index: 0,
164 |             };
165 |             let (cfg, prs) = {
166 |                 match restore(&mut chg, cs) {
167 |                     Ok((cfg, prs)) => (cfg, prs),
168 |                     Err(e) => {
169 |                         error!("{}", e);
170 |                         return false;
171 |                     }
172 |                 }
173 |             };
174 | 
175 |             chg.tracker.config = cfg;
176 |             chg.tracker.progress = prs;
177 | 
178 |             cs.voters.sort();
179 |             cs.learners.sort();
180 |             cs.voters_outgoing.sort();
181 |             cs.learners_next.sort();
182 |             let mut cs2 = chg.tracker.config_state();
183 |             cs2.voters.sort();
184 |             cs2.learners.sort();
185 |             cs2.voters_outgoing.sort();
186 |             cs2.learners_next.sort();
187 |             // NB: cs.Equivalent does the same "sorting" dance internally, but let's
188 |             // test it a bit here instead of relying on it.
189 |             if cs.get_auto_leave() == false {
190 |                 cs.set_auto_leave(false);
191 |             }
192 |             if cs2.get_auto_leave() == false {
193 |                 cs2.set_auto_leave(false);
194 |             }
195 |             if *cs == cs2 {
196 |                 return true;
197 |             }
198 |             false
199 |         };
200 | 
201 |         let new_conf_state = |voters: Option<Vec<u64>>,
202 |                               learners: Option<Vec<u64>>,
203 |                               voters_outgoing: Option<Vec<u64>>,
204 |                               learners_next: Option<Vec<u64>>,
205 |                               auto_leave: bool|
206 |                               -> ConfState {
207 |             let mut cs = ConfState::new();
208 |             if voters.is_some() {
209 |                 cs.set_voters(voters.unwrap());
210 |             }
211 |             if learners.is_some() {
212 |                 cs.set_learners(learners.unwrap());
213 |             }
214 |             if voters_outgoing.is_some() {
215 |                 cs.set_voters_outgoing(voters_outgoing.unwrap());
216 |             }
217 |             if learners_next.is_some() {
218 |                 cs.set_learners_next(learners_next.unwrap());
219 |             }
220 |             cs.set_auto_leave(auto_leave);
221 |             cs
222 |         };
223 |         for mut cs in vec![
224 |             ConfState::new(),
225 |             new_conf_state(Some(vec![1, 2, 3]), None, None, None, false),
226 |             new_conf_state(Some(vec![1, 2, 3]), Some(vec![4, 5, 6]), None, None, false),
227 |             new_conf_state(
228 |                 Some(vec![1, 2, 3]),
229 |                 Some(vec![5]),
230 |                 Some(vec![1, 2, 4, 6]),
231 |                 Some(vec![4]),
232 |                 false,
233 |             ),
234 |         ]
235 |             .iter_mut()
236 |         {
237 |             assert!(f(&mut cs));
238 |         }
239 | 
240 |         for _ in 0..count {
241 |             let mut cs = generate_rnd_conf_change();
242 |             println!("{:?}", cs);
243 |             assert!(f(&mut cs));
244 |         }
245 |     }
246 | 
247 |     // Generate create a random (valid) ConfState for use with quickcheck.
248 |     fn generate_rnd_conf_change() -> ConfState {
249 |         let conv = |sl: &Vec<u64>| -> Vec<u64> {
250 |             // We want IDs but the incoming slice is zero-indexed, so add one to
251 |             // each.
252 |             let mut out = [0].repeat(sl.len());
253 |             for i in 0..sl.len() {
254 |                 out[i] = sl[i] + 1;
255 |             }
256 |             out
257 |         };
258 | 
259 |         let mut r = rand::thread_rng();
260 |         // NB: never generate the empty ConfState, that one should be unit tested.
261 |         let n_voters = r.gen_range(0..5) + 1;
262 |         let n_learners = r.gen_range(0..5);
263 | 
264 |         // The number of voters that are in the outgoing config but not in the
265 |         // incoming one. (We'll additionally retain a random number of the
266 |         // incoming voters below).
267 |         let n_removed_voters = r.gen_range(0..3);
268 | 
269 |         // Voters, learners, and removed voters must not overlap. A "removed voter"
270 |         // is one that we have in the outgoing config but not the incoming one.
271 |         let mut ids =
272 |             (1..=2 * (n_voters + n_learners + n_removed_voters) as u64).collect::<Vec<_>>();
273 |         ids.shuffle(&mut r);
274 |         // println!("ids {:?}, {}", ids, 2 * (n_voters + n_learners + n_removed_voters));
275 |         let mut cs = ConfState::new();
276 |         cs.voters = ids.drain(..n_voters).collect();
277 | 
278 |         if n_learners > 0 {
279 |             cs.learners = ids.drain(..n_learners).collect::<Vec<_>>();
280 |         }
281 | 
282 |         // Roll the dice on how many of the incoming voters we decide were also
283 |         // previously voters.
284 |         //
285 |         // NB: this code avoids creating non-nil empty slices (here and below).
286 |         let n_outgoing_retained_voters = r.gen_range(0..(n_voters + 1));
287 |         if n_outgoing_retained_voters > 0 || n_removed_voters > 0 {
288 |             cs.voters_outgoing
289 |                 .extend_from_slice(&cs.voters[..n_outgoing_retained_voters]);
290 |             cs.voters_outgoing
291 |                 .extend_from_slice(&ids[..n_removed_voters]);
292 |         }
293 | 
294 |         // Only outgoing voters that are not also incoming voters can be in
295 |         // learners_next (they represent demotions).
296 |         if n_removed_voters > 0 {
297 |             let n_learners = r.gen_range(0..n_removed_voters + 1);
298 |             if n_learners > 0 {
299 |                 cs.learners_next = ids[..n_learners].to_vec();
300 |             }
301 |         }
302 | 
303 |         cs.set_auto_leave(cs.voters_outgoing.len() > 0 && r.gen_range(0..2) == 1);
304 |         cs
305 |     }
306 | }
307 | 


--------------------------------------------------------------------------------
/src/conf_change/testdata/joint_autoleave.txt:
--------------------------------------------------------------------------------
 1 | # Test the autoleave argument to EnterJoint. It defaults to false in the
 2 | # datadriven tests. The flag has no associated semantics in this package,
 3 | # it is simply passed through.
 4 | cmd: simple
 5 | args: v=(1)
 6 | voters=(1)
 7 | 1: StateProbe match=0 next=0
 8 | 
 9 | --------------------------------
10 | # Autoleave is reflected in the config.
11 | cmd: enter-joint
12 | args: autoleave=(true) v=(2) v=(3)
13 | voters=(1 2 3)&&(1) autoleave
14 | 1: StateProbe match=0 next=0
15 | 2: StateProbe match=0 next=1
16 | 3: StateProbe match=0 next=1
17 | 
18 | --------------------------------
19 | # Can't enter-joint twice, even if autoleave changes.
20 | cmd: enter-joint
21 | args: autoleave=(false)
22 | config is already joint
23 | 
24 | --------------------------------
25 | cmd: leave-joint
26 | voters=(1 2 3)
27 | 1: StateProbe match=0 next=0
28 | 2: StateProbe match=0 next=1
29 | 3: StateProbe match=0 next=1


--------------------------------------------------------------------------------
/src/conf_change/testdata/joint_idempotency.txt:
--------------------------------------------------------------------------------
 1 | # Verify that operations upon entering the joint state are idempotent, i.e.
 2 | # removing an absent node is fine, etc.
 3 | 
 4 | cmd: simple
 5 | args: v=(1)
 6 | voters=(1)
 7 | 1: StateProbe match=0 next=0
 8 | 
 9 | --------------------------------
10 | cmd: enter-joint
11 | args: r=(1) r=(2) r=(9) v=(2) v=(3) v=(4) v=(2) v=(3) v=(4) l=(2) l=(2) r=(4) r=(4) l=(1) l=(1)
12 | voters=(3)&&(1) learners=(2) learners_next=(1)
13 | 1: StateProbe match=0 next=0
14 | 2: StateProbe match=0 next=1 learner
15 | 3: StateProbe match=0 next=1
16 | 
17 | --------------------------------
18 | cmd: leave-joint
19 | voters=(3) learners=(1 2)
20 | 1: StateProbe match=0 next=0 learner
21 | 2: StateProbe match=0 next=1 learner
22 | 3: StateProbe match=0 next=1


--------------------------------------------------------------------------------
/src/conf_change/testdata/joint_learners_next.txt:
--------------------------------------------------------------------------------
 1 | # Verify that when a voter is demoted in a joint config, it will show up in
 2 | # learners_next until the joint config is left, and only then will the progress
 3 | # turn into that of a learner, without resetting the progress. Note that this
 4 | # last fact is verified by `next`, which can tell us which "round" the progress
 5 | # was originally created in.
 6 | 
 7 | cmd: simple
 8 | args: v=(1)
 9 | voters=(1)
10 | 1: StateProbe match=0 next=0
11 | 
12 | --------------------------------
13 | cmd: enter-joint
14 | args: v=(2) l=(1)
15 | voters=(2)&&(1) learners_next=(1)
16 | 1: StateProbe match=0 next=0
17 | 2: StateProbe match=0 next=1
18 | 
19 | --------------------------------
20 | cmd: leave-joint
21 | voters=(2) learners=(1)
22 | 1: StateProbe match=0 next=0 learner
23 | 2: StateProbe match=0 next=1


--------------------------------------------------------------------------------
/src/conf_change/testdata/joint_safety.txt:
--------------------------------------------------------------------------------
 1 | cmd: leave-joint
 2 | can't leave a non-joint config
 3 | 
 4 | --------------------------------
 5 | cmd: enter-joint
 6 | can't make a zero-voter config joint
 7 | 
 8 | --------------------------------
 9 | cmd: enter-joint
10 | args: v=(1)
11 | can't make a zero-voter config joint
12 | 
13 | --------------------------------
14 | cmd: simple
15 | args: v=(1)
16 | voters=(1)
17 | 1: StateProbe match=0 next=3
18 | 
19 | --------------------------------
20 | cmd: leave-joint
21 | can't leave a non-joint config
22 | 
23 | --------------------------------
24 | # Can enter into joint config.
25 | cmd: enter-joint
26 | voters=(1)&&(1)
27 | 1: StateProbe match=0 next=3
28 | 
29 | --------------------------------
30 | cmd: enter-joint
31 | config is already joint
32 | 
33 | --------------------------------
34 | cmd: leave-joint
35 | voters=(1)
36 | 1: StateProbe match=0 next=3
37 | 
38 | --------------------------------
39 | cmd: leave-joint
40 | can't leave a non-joint config
41 | 
42 | --------------------------------
43 | # Can enter again, this time with some ops.
44 | cmd: enter-joint
45 | args: r=(1) v=(2) v=(3) l=(4)
46 | voters=(2 3)&&(1) learners=(4)
47 | 1: StateProbe match=0 next=3
48 | 2: StateProbe match=0 next=9
49 | 3: StateProbe match=0 next=9
50 | 4: StateProbe match=0 next=9 learner
51 | 
52 | --------------------------------
53 | cmd: enter-joint
54 | config is already joint
55 | 
56 | --------------------------------
57 | cmd: enter-joint
58 | args: v=(12)
59 | config is already joint
60 | 
61 | --------------------------------
62 | cmd: simple
63 | args: l=(15)
64 | can't apply simple config change in joint config
65 | 
66 | --------------------------------
67 | cmd: leave-joint
68 | voters=(2 3) learners=(4)
69 | 2: StateProbe match=0 next=9
70 | 3: StateProbe match=0 next=9
71 | 4: StateProbe match=0 next=9 learner
72 | 
73 | --------------------------------
74 | cmd: simple
75 | args: l=(9)
76 | voters=(2 3) learners=(4 9)
77 | 2: StateProbe match=0 next=9
78 | 3: StateProbe match=0 next=9
79 | 4: StateProbe match=0 next=9 learner
80 | 9: StateProbe match=0 next=14 learner


--------------------------------------------------------------------------------
/src/conf_change/testdata/simple_idempotency.txt:
--------------------------------------------------------------------------------
 1 | cmd: simple
 2 | args: v=(1)
 3 | voters=(1)
 4 | 1: StateProbe match=0 next=0
 5 | 
 6 | --------------------------------
 7 | cmd: simple
 8 | args: v=(1)
 9 | voters=(1)
10 | 1: StateProbe match=0 next=0
11 | 
12 | --------------------------------
13 | cmd: simple
14 | args: v=(2)
15 | voters=(1 2)
16 | 1: StateProbe match=0 next=0
17 | 2: StateProbe match=0 next=2
18 | 
19 | --------------------------------
20 | cmd: simple
21 | args: l=(1)
22 | voters=(2) learners=(1)
23 | 1: StateProbe match=0 next=0 learner
24 | 2: StateProbe match=0 next=2
25 | 
26 | --------------------------------
27 | cmd: simple
28 | args: l=(1)
29 | voters=(2) learners=(1)
30 | 1: StateProbe match=0 next=0 learner
31 | 2: StateProbe match=0 next=2
32 | 
33 | --------------------------------
34 | cmd: simple
35 | args: r=(1)
36 | voters=(2)
37 | 2: StateProbe match=0 next=2
38 | 
39 | --------------------------------
40 | cmd: simple
41 | args: r=(1)
42 | voters=(2)
43 | 2: StateProbe match=0 next=2
44 | 
45 | --------------------------------
46 | cmd: simple
47 | args: v=(3)
48 | voters=(2 3)
49 | 2: StateProbe match=0 next=2
50 | 3: StateProbe match=0 next=7
51 | 
52 | --------------------------------
53 | cmd: simple
54 | args: r=(3)
55 | voters=(2)
56 | 2: StateProbe match=0 next=2
57 | 
58 | --------------------------------
59 | cmd: simple
60 | args: r=(3)
61 | voters=(2)
62 | 2: StateProbe match=0 next=2
63 | 
64 | --------------------------------
65 | cmd: simple
66 | args: r=(4)
67 | voters=(2)
68 | 2: StateProbe match=0 next=2


--------------------------------------------------------------------------------
/src/conf_change/testdata/simple_promote_demote.txt:
--------------------------------------------------------------------------------
 1 | # Set up three voters for this test.
 2 | cmd: simple
 3 | args: v=(1)
 4 | voters=(1)
 5 | 1: StateProbe match=0 next=0
 6 | 
 7 | --------------------------------
 8 | cmd: simple
 9 | args: v=(2)
10 | voters=(1 2)
11 | 1: StateProbe match=0 next=0
12 | 2: StateProbe match=0 next=1
13 | 
14 | --------------------------------
15 | cmd: simple
16 | args: v=(3)
17 | voters=(1 2 3)
18 | 1: StateProbe match=0 next=0
19 | 2: StateProbe match=0 next=1
20 | 3: StateProbe match=0 next=2
21 | 
22 | --------------------------------
23 | # Can atomically demote and promote without a hitch.
24 | # This is pointless, but possible.
25 | cmd: simple
26 | args: l=(1) v=(1)
27 | voters=(1 2 3)
28 | 1: StateProbe match=0 next=0
29 | 2: StateProbe match=0 next=1
30 | 3: StateProbe match=0 next=2
31 | 
32 | --------------------------------
33 | # Can demote a voter.
34 | cmd: simple
35 | args: l=(2)
36 | voters=(1 3) learners=(2)
37 | 1: StateProbe match=0 next=0
38 | 2: StateProbe match=0 next=1 learner
39 | 3: StateProbe match=0 next=2
40 | 
41 | --------------------------------
42 | # Can atomically promote and demote the same voter.
43 | # This is pointless, but possible.
44 | cmd: simple
45 | args: v=(2) l=(2)
46 | voters=(1 3) learners=(2)
47 | 1: StateProbe match=0 next=0
48 | 2: StateProbe match=0 next=1 learner
49 | 3: StateProbe match=0 next=2
50 | 
51 | --------------------------------
52 | # Can promote a voter.
53 | cmd: simple
54 | args: v=(2)
55 | voters=(1 2 3)
56 | 1: StateProbe match=0 next=0
57 | 2: StateProbe match=0 next=1
58 | 3: StateProbe match=0 next=2
59 | 


--------------------------------------------------------------------------------
/src/conf_change/testdata/simple_safety.txt:
--------------------------------------------------------------------------------
 1 | cmd: simple
 2 | args: l=(1)
 3 | removed all voters
 4 | 
 5 | --------------------------------
 6 | cmd: simple
 7 | args: v=(1)
 8 | voters=(1)
 9 | 1: StateProbe match=0 next=1
10 | 
11 | --------------------------------
12 | cmd: simple
13 | args: v=(2) l=(3)
14 | voters=(1 2) learners=(3)
15 | 1: StateProbe match=0 next=1
16 | 2: StateProbe match=0 next=2
17 | 3: StateProbe match=0 next=2 learner
18 | 
19 | --------------------------------
20 | cmd: simple
21 | args: r=(1) v=(5)
22 | more than one voter changed without entering joint config
23 | 
24 | --------------------------------
25 | cmd: simple
26 | args: r=(1) r=(2)
27 | removed all voters
28 | 
29 | --------------------------------
30 | cmd: simple
31 | args: v=(3) v=(4)
32 | more than one voter changed without entering joint config
33 | 
34 | --------------------------------
35 | cmd: simple
36 | args: l=(1) v=(5)
37 | more than one voter changed without entering joint config
38 | 
39 | --------------------------------
40 | cmd: simple
41 | args: l=(1) l=(2)
42 | removed all voters
43 | 
44 | --------------------------------
45 | cmd: simple
46 | args: l=(2) l=(3) l=(4) l=(5)
47 | voters=(1) learners=(2 3 4 5)
48 | 1: StateProbe match=0 next=1
49 | 2: StateProbe match=0 next=2 learner
50 | 3: StateProbe match=0 next=2 learner
51 | 4: StateProbe match=0 next=8 learner
52 | 5: StateProbe match=0 next=8 learner
53 | 
54 | --------------------------------
55 | cmd: simple
56 | args: r=(1)
57 | removed all voters
58 | 
59 | --------------------------------
60 | cmd: simple
61 | args: r=(2) r=(3) r=(4) r=(5)
62 | voters=(1)
63 | 1: StateProbe match=0 next=1


--------------------------------------------------------------------------------
/src/conf_change/testdata/update.txt:
--------------------------------------------------------------------------------
 1 | # Nobody cares about ConfChangeUpdateNode, but at least use it once. It is used
 2 | # by etcd as a convenient way to pass a blob through their conf change machinery
 3 | # that updates information tracked outside of raft.
 4 | cmd: simple
 5 | args: v=(1)
 6 | voters=(1)
 7 | 1: StateProbe match=0 next=0
 8 | 
 9 | --------------------------------
10 | cmd: simple
11 | args: v=(2) u=(1)
12 | voters=(1 2)
13 | 1: StateProbe match=0 next=0
14 | 2: StateProbe match=0 next=1
15 | 
16 | --------------------------------
17 | cmd: simple
18 | args: u=(1) u=(2) u=(3) u=(1) u=(2) u=(3)
19 | voters=(1 2)
20 | 1: StateProbe match=0 next=0
21 | 2: StateProbe match=0 next=1


--------------------------------------------------------------------------------
/src/conf_change/testdata/zero.txt:
--------------------------------------------------------------------------------
1 | # NodeID zero is ignored.
2 | cmd: simple
3 | args: v=(1) r=(0) v=(0) l=(0)
4 | output:
5 | voters=(1)
6 | 1: StateProbe match=0 next=0
7 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | #![feature(is_sorted)]
 2 | #![feature(custom_test_frameworks)]
 3 | #![feature(in_band_lifetimes)]
 4 | 
 5 | #[macro_use]
 6 | extern crate log;
 7 | #[macro_use]
 8 | extern crate nom;
 9 | 
10 | pub mod conf_change;
11 | pub mod node;
12 | pub(crate) mod nom_data_test;
13 | mod paper_test;
14 | pub mod protocol;
15 | pub mod quorum;
16 | pub mod raft;
17 | mod raft_flow_control_test;
18 | pub mod raft_log;
19 | mod raft_snap_test;
20 | pub mod raftpb;
21 | pub mod rawnode;
22 | pub mod read_only;
23 | pub mod status;
24 | pub mod storage;
25 | pub mod tracker;
26 | pub mod unstable;
27 | pub(crate) mod util;
28 | pub(crate) mod raft_test;
29 | mod async_rt;
30 | mod async_ch;
31 | mod tests_util;
32 | 
33 | use async_rt::{sleep, wait, wait_timeout};
34 | 


--------------------------------------------------------------------------------
/src/nom_data_test/mod.rs:
--------------------------------------------------------------------------------
  1 | use bytes::Buf;
  2 | use bytes::Bytes;
  3 | use std::fs::{read_dir, read_to_string};
  4 | use std::io::BufRead;
  5 | use std::path::Path;
  6 | 
  7 | pub fn walk<F>(path: &str, mut f: F)
  8 | where
  9 |     F: FnMut(&str),
 10 | {
 11 |     for entry in read_dir(path).unwrap() {
 12 |         let path = entry.unwrap().path();
 13 |         if !path.is_file() {
 14 |             //if !path.is_file() || !path.ends_with("joint_commit.txt") {
 15 |             continue;
 16 |         }
 17 |         f(path.to_str().unwrap())
 18 |     }
 19 | }
 20 | 
 21 | pub fn execute_test<P: AsRef<Path>, F>(path: P, split: &str, mut f: F)
 22 | where
 23 |     F: FnMut(&TestData) -> String,
 24 | 
 25 | {
 26 |     use bytes::Buf;
 27 |     let mut data = vec![];
 28 |     let txt = read_to_string(path).unwrap();
 29 |     let lines = txt.split(split).collect::<Vec<_>>();
 30 |     let mut print_buf = vec![];
 31 |     for line in lines {
 32 |         let mut rd = Bytes::from(line.to_string()).reader();
 33 |         let mut buf = String::new();
 34 |         let mut cmd = TestData {
 35 |             title: "".to_string(),
 36 |             cmd: "".to_string(),
 37 |             cmd_args: vec![],
 38 |             output: "".to_string(),
 39 |         };
 40 |         while let Ok(n) = rd.read_line(&mut buf) {
 41 |             if n == 0 {
 42 |                 break;
 43 |             }
 44 |             if buf.starts_with("#") {
 45 |                 buf.clear();
 46 |                 continue;
 47 |             }
 48 |             buf = buf.trim_end().to_string();
 49 |             if buf.len() == 0 {
 50 |                 buf.clear();
 51 |                 continue;
 52 |             }
 53 |             if buf.starts_with("title: ") {
 54 |                 cmd.title = buf.as_str()["title: ".len()..].to_string();
 55 |             } else if buf.starts_with("cmd: ") {
 56 |                 cmd.cmd = buf.as_str()["cmd: ".len()..].to_string();
 57 |             } else if buf.starts_with("args: ") {
 58 |                 let args = buf.as_str()["args: ".len()..].to_string();
 59 |                 for arg in args.split_terminator(" ").collect::<Vec<_>>() {
 60 |                     let mut cmd_arg = CmdArg {
 61 |                         key: "".to_string(),
 62 |                         vals: vec![],
 63 |                     };
 64 |                     let arg = arg.split("=").collect::<Vec<_>>();
 65 |                     cmd_arg.key = arg[0].to_string();
 66 |                     cmd_arg.vals = arg[1]
 67 |                         .trim_start_matches('(')
 68 |                         .trim_end_matches(')')
 69 |                         .split(",")
 70 |                         .filter(|s| s.trim() != "")
 71 |                         .map(|s| s.to_string())
 72 |                         .collect::<Vec<_>>();
 73 |                     cmd.cmd_args.push(cmd_arg);
 74 |                 }
 75 |             } else if buf.starts_with("output:") {
 76 |             } else {
 77 |                 cmd.output.push_str(buf.as_str());
 78 |                 cmd.output.push_str("\n");
 79 |             }
 80 |             buf.clear();
 81 |         }
 82 |         cmd.output = cmd.output.trim_end().to_string();
 83 |         // println!("title: {}, cmd: {}, args: {:?}, output: {}", cmd.title, cmd.cmd, cmd.cmd_args, cmd.output);
 84 |         data.push(cmd);
 85 |         print_buf.push(line);
 86 |     }
 87 | 
 88 |     for (i, datum) in data.iter_mut().enumerate() {
 89 |         println!("t_{}", i);
 90 |         println!("{}", print_buf[i]);
 91 |         println!("{:?}", datum);
 92 |         assert_eq!(f(datum), datum.output);
 93 |     }
 94 | }
 95 | 
 96 | #[derive(Debug, PartialEq)]
 97 | pub struct TestData {
 98 |     pub title: String,
 99 |     pub cmd: String,
100 |     pub cmd_args: Vec<CmdArg>,
101 |     pub output: String,
102 | }
103 | 
104 | #[derive(Debug, Default, PartialEq)]
105 | pub struct CmdArg {
106 |     pub key: String,
107 |     pub vals: Vec<String>,
108 | }
109 | 


--------------------------------------------------------------------------------
/src/protocol/mod.rs:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/src/protocol/raft.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | package raftpb;
 3 | 
 4 | 
 5 | 
 6 | enum EntryType {
 7 |     EntryNormal = 0;
 8 |     optional uint64     Term  = 2 [(gogoproto.nullable) = false]; // must be 64-bit aligned for atomic operations
 9 |     optional uint64     Index = 3 [(gogoproto.nullable) = false]; // must be 64-bit aligned for atomic operations
10 |     optional EntryType  Type  = 1 [(gogoproto.nullable) = false];
11 |     optional bytes      Data  = 4;
12 | }
13 | 
14 | message SnapshotMetadata {
15 |     optional ConfState conf_state = 1 [(gogoproto.nullable) = false];
16 |     optional uint64    index      = 2 [(gogoproto.nullable) = false];
17 |     optional uint64    term       = 3 [(gogoproto.nullable) = false];
18 | }
19 | 
20 | message Snapshot {
21 |     optional bytes            data     = 1;
22 |     optional SnapshotMetadata metadata = 2 [(gogoproto.nullable) = false];
23 | }
24 | 


--------------------------------------------------------------------------------
/src/quorum/data_driven_test.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 The etcd Authors
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | #[cfg(test)]
 16 | mod tests {
 17 |     use crate::nom_data_test::{execute_test, walk, TestData};
 18 |     use crate::quorum::joint::JointConfig;
 19 |     use crate::quorum::majority::MajorityConfig;
 20 |     use crate::quorum::quick_test::alternative_majority_committed_index;
 21 |     use crate::quorum::quorum::{to_string, AckedIndexer, Index, MapAckIndexer};
 22 |     use std::collections::{HashMap, HashSet};
 23 |     use std::fmt::Write;
 24 |     use std::iter::FromIterator;
 25 | 
 26 |     // parses and executes and the test cases in ./testdata/*. An entry
 27 |     // in such a file specifies the command, which is either of "committed" to check
 28 |     // committed_index or "vote" to verify a VoteResult. The underlying configuration
 29 |     // and inputs are specified via the arguments 'cfg' and 'cfgj' (for the majority
 30 |     // config and, optionally, majority config joint to the first one) and `idx`
 31 |     // (for CommittedIndex) and 'votes' (for VoteResult).
 32 |     //
 33 |     // Internally, the harness runs some additional checks on each test case for
 34 |     // which it is known that the result shouldn't change. For example,
 35 |     // interchanging the majority c configurations of a joint quorum must not
 36 |     // influence the result; if it does, this is noted in the test's output.
 37 |     #[test]
 38 |     fn t_data_driven() {
 39 |         // flexi_logger::Logger::with_env().start();
 40 |         walk("src/quorum/testdata", |p| {
 41 |             execute_test(p, "--------------------------------", |data| -> String {
 42 |                 // Two majority configs. The first one is always used (though it may
 43 |                 // be empty) and the second one is used if used iff joint is true.
 44 |                 let mut joint = false;
 45 |                 let mut ids = Vec::<u64>::new();
 46 |                 let mut idsj = Vec::<u64>::new();
 47 |                 // The committed indexes for the nodes in the config in the order in
 48 |                 // which they appear in (ids,idsj), without repetition. An underscore
 49 |                 // denotes an omission (i.e. no information for this voter); this is
 50 |                 // different from 0, For example,
 51 |                 //
 52 |                 // cfg=(1,2) cfgj=(2,3,4) idx=(_,5,_7) initializes the idx for voter 2
 53 |                 // to 5 and that for voter 4 to 7 (and no others).
 54 |                 //
 55 |                 // cfgj=zero is specified to instruct the test harness to treat cfgj
 56 |                 // as zero instead of not specified (i.e. it will trigger a joint
 57 |                 // quorum test instead of a majority quorum test for cfg only).
 58 |                 let mut idxs = Vec::<Index>::new();
 59 |                 // votes. these are initialized similar to idxs except the only values
 60 |                 // used are 1 (voted against) and 2 (voted for). This looks awkward,
 61 |                 // but it convenient because it allows sharing code between the two.
 62 |                 let mut votes = Vec::<Index>::new();
 63 | 
 64 |                 // parse the args.
 65 |                 for cmd_arg in &data.cmd_args {
 66 |                     for val in &cmd_arg.vals {
 67 |                         match cmd_arg.key.as_str() {
 68 |                             "cfg" => {
 69 |                                 ids.push(val.parse().unwrap());
 70 |                             }
 71 |                             "cfgj" => {
 72 |                                 joint = true;
 73 |                                 if val == &"zero" {
 74 |                                     assert_eq!(cmd_arg.vals.len(), 1);
 75 |                                 } else {
 76 |                                     idsj.push(val.parse().unwrap());
 77 |                                 }
 78 |                             }
 79 |                             "idx" => {
 80 |                                 // register placeholders as zeros.
 81 |                                 if val != &"_" {
 82 |                                     idxs.push(val.parse().unwrap());
 83 |                                     // This is a restriction caused by the above
 84 |                                     // special-casing for _.
 85 |                                     assert_ne!(idxs.last().unwrap(), &0, "cannot use 0 as idx");
 86 |                                 }
 87 |                             }
 88 |                             "votes" => {
 89 |                                 if val == &"y" {
 90 |                                     votes.push(2);
 91 |                                 } else if val == &"n" {
 92 |                                     votes.push(1);
 93 |                                 } else if val == &"_" {
 94 |                                     votes.push(0);
 95 |                                 } else {
 96 |                                     panic!(format!("unknown vote: {}", val));
 97 |                                 }
 98 |                             }
 99 |                             other => panic!(format!("unknown arg {:?}", cmd_arg)),
100 |                         }
101 |                     }
102 |                 }
103 | 
104 |                 // Build the two majority configs.
105 |                 let mut c = MajorityConfig {
106 |                     votes: HashSet::from_iter(ids.clone().into_iter()),
107 |                 };
108 |                 let mut cj = MajorityConfig {
109 |                     votes: HashSet::from_iter(idsj.clone().into_iter()),
110 |                 };
111 | 
112 |                 // Helper that returns an AckedIndexer which has the specified indexes
113 |                 // mapped to the right IDs.
114 |                 let make_lookuper =
115 |                     |idxs: &Vec<Index>, ids: &Vec<u64>, idsj: &Vec<u64>| -> MapAckIndexer {
116 |                         let mut l: HashMap<u64, u64> = HashMap::new();
117 |                         let mut p = 0;
118 |                         let mut _ids: Vec<Index> = Vec::new();
119 |                         _ids.extend(ids);
120 |                         _ids.extend(idsj);
121 |                         for id in &_ids {
122 |                             if l.contains_key(id) {
123 |                                 continue;
124 |                             }
125 |                             if p < idxs.len() {
126 |                                 // NB: this creates zero entries for placeholders that we remove later.
127 |                                 // The upshot of doing it that way is to avoid having to specify placeholders
128 |                                 // multiple times when omitting voters present in both halves of
129 |                                 // a joint config.
130 |                                 l.insert(*id, idxs[p]);
131 |                                 p += 1;
132 |                             }
133 |                         }
134 | 
135 |                         // zero entries are created by _ placeholders; we don't want
136 |                         // them in the lookuper because "no entry" is different from
137 |                         // "zero entry". Note that we prevent tests from specifying
138 |                         // zero commit Indexes, so that there's no confusion between
139 |                         // the two concepts.
140 |                         l.retain(|_, val| *val != 0);
141 |                         l
142 |                     };
143 | 
144 |                 if data.cmd == "vote" {
145 |                     let mut joint_config = JointConfig::new();
146 |                     joint_config.incoming = c.clone();
147 |                     joint_config.outgoing = cj.clone();
148 |                     let voters = joint_config.ids();
149 |                     assert_eq!(
150 |                         voters.len(),
151 |                         votes.len(),
152 |                         "mismatch input (explicit for _) fro votes {:?}: {:?}",
153 |                         voters,
154 |                         votes
155 |                     );
156 |                 }
157 | 
158 |                 let mut buf = String::new();
159 |                 match data.cmd.as_str() {
160 |                     "committed" => {
161 |                         let l = make_lookuper(&idxs, &ids, &idsj);
162 |                         // branch based on wether this is a majority or joint quorum.
163 |                         // test case.
164 |                         if !joint {
165 |                             let idx = c.committed_index(&l);
166 |                             buf.write_str(c.describe(&l).as_str());
167 |                             println!("MapAckIndexer {:?}, ack_id:{}", l, idx);
168 |                             // These alternative computations should return the same
169 |                             // result. If not, print to the output.
170 |                             let a_idx = alternative_majority_committed_index(c.clone(), &l);
171 |                             if a_idx != idx {
172 |                                 buf.write_str(
173 |                                     format!("{} <-- via alternative computation\n", a_idx).as_str(),
174 |                                 );
175 |                             }
176 |                             // Joining a majority with the empty majority should give same result.
177 |                             let a_idx =
178 |                                 JointConfig::new2(c.clone(), MajorityConfig::new()).committed(&l);
179 |                             if a_idx != idx {
180 |                                 buf.write_str(
181 |                                     format!("{} >-- via zero-joint quorum\n", a_idx).as_str(),
182 |                                 );
183 |                             }
184 |                             // Joining a majority with it self should give the same result.
185 |                             let a_idx = JointConfig::new2(c.clone(), c.clone()).committed(&l);
186 |                             if a_idx != idx {
187 |                                 buf.write_str(
188 |                                     format!("{} >-- via self-joint quorum\n", a_idx).as_str(),
189 |                                 );
190 |                             }
191 | 
192 |                             let overlay = |c: MajorityConfig,
193 |                                            l: &dyn AckedIndexer,
194 |                                            id: u64,
195 |                                            idx: Index|
196 |                              -> MapAckIndexer {
197 |                                 let mut ll = MapAckIndexer::new();
198 |                                 for iid in c.iter() {
199 |                                     if *iid == id {
200 |                                         ll.insert(*iid, idx);
201 |                                     } else if let Some(idx) = l.acked_index(iid) {
202 |                                         ll.insert(*iid, *idx);
203 |                                     }
204 |                                 }
205 |                                 ll
206 |                             };
207 |                             for id in c.iter() {
208 |                                 let iidx = l.acked_index(id).map(|idx| *idx).unwrap_or_else(|| 0);
209 |                                 if idx > iidx && iidx > 0 {
210 |                                     // If the committed index was definitely above the currently
211 |                                     // inspected idx, the result shouldn't change if we lower it
212 |                                     // further.
213 |                                     let lo = overlay(c.clone(), &l, *id, iidx - 1);
214 |                                     let a_idx = c.committed_index(&lo);
215 |                                     if a_idx != idx {
216 |                                         buf.write_str(
217 |                                             format!("{} <-- overlaying {}-->{}", a_idx, id, iidx)
218 |                                                 .as_str(),
219 |                                         );
220 |                                     }
221 | 
222 |                                     let lo = overlay(c.clone(), &l, *id, 0);
223 |                                     let a_idx = c.committed_index(&lo);
224 |                                     if a_idx != idx {
225 |                                         buf.write_str(
226 |                                             format!("{} <-- overlaying {}-->0", a_idx, id).as_str(),
227 |                                         );
228 |                                     }
229 |                                 }
230 |                             }
231 |                             buf.write_str(to_string(idx).as_str());
232 |                         } else {
233 |                             let mut cc = JointConfig::new2(c.clone(), cj.clone());
234 |                             buf.write_str(cc.describe(&l).as_str());
235 |                             let idx = cc.committed(&l);
236 |                             // Interchanging the majority shouldn't make a difference. If it does, print.
237 |                             let a_idx = JointConfig::new2(c.clone(), cj.clone()).committed(&l);
238 |                             if a_idx != idx {
239 |                                 buf.write_str(format!("{} <-- via symmetry\n", a_idx).as_str());
240 |                             }
241 |                             buf.write_str(to_string(idx).as_str());
242 |                         }
243 |                     }
244 |                     "vote" => {
245 |                         let ll = make_lookuper(&votes, &ids, &idsj);
246 |                         println!(
247 |                             "ids: {:?}, idsj: {:?}, votes: {:?}, ll: {:?}",
248 |                             ids, idsj, votes, ll
249 |                         );
250 |                         let mut l = HashMap::new();
251 |                         for (id, v) in ll.iter() {
252 |                             l.insert(*id, *v != 1); // NB: 1 == false, 2 == true
253 |                         }
254 |                         if !joint {
255 |                             // Test a majority quorum
256 |                             buf.write_str(&format!("{:?}", c.vote_result(&l)));
257 |                         } else {
258 |                             // Run a joint quorum test case.
259 |                             let r = JointConfig::new2(c.clone(), cj.clone()).vote_result(&l);
260 |                             // Interchanging the majorities shouldn't make a difference. If it does, print.
261 |                             let ar = JointConfig::new2(cj.clone(), c.clone()).vote_result(&l);
262 |                             assert_eq!(r, ar);
263 |                             buf.write_str(format!("{:?}", r).as_str());
264 |                         }
265 |                     }
266 |                     _ => {}
267 |                 }
268 |                 buf
269 |             });
270 |         });
271 |     }
272 | }
273 | 


--------------------------------------------------------------------------------
/src/quorum/joint.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 The etcd Authors
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | use crate::quorum::majority::MajorityConfig;
 16 | use crate::quorum::quorum::VoteResult::{VoteLost, VotePending};
 17 | use crate::quorum::quorum::{AckedIndexer, Index, VoteResult};
 18 | use std::collections::{HashMap, HashSet};
 19 | use std::fmt::{self, Display, Error, Formatter};
 20 | use std::process::id;
 21 | 
 22 | /// JointConfig is a configuration of two groups of (possibly overlapping)
 23 | /// majority configurations. Decisions require the support of both majorities.
 24 | /// Here Thanks tikv
 25 | #[derive(Clone, PartialEq, Debug)]
 26 | pub struct JointConfig {
 27 |     pub(crate) incoming: MajorityConfig,
 28 |     pub(crate) outgoing: MajorityConfig,
 29 | }
 30 | 
 31 | impl JointConfig {
 32 |     pub fn new() -> Self {
 33 |         JointConfig {
 34 |             incoming: MajorityConfig::new(),
 35 |             outgoing: MajorityConfig::new(),
 36 |         }
 37 |     }
 38 |     pub fn new2(incoming: MajorityConfig, outgoing: MajorityConfig) -> Self {
 39 |         JointConfig { incoming, outgoing }
 40 |     }
 41 | }
 42 | 
 43 | impl Default for JointConfig {
 44 |     fn default() -> Self {
 45 |         JointConfig::new()
 46 |     }
 47 | }
 48 | 
 49 | impl Display for JointConfig {
 50 |     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
 51 |         if !self.outgoing.is_empty() {
 52 |             write!(f, "{}&&{}", self.incoming, self.outgoing)
 53 |         } else {
 54 |             write!(f, "{}", self.incoming)
 55 |         }
 56 |     }
 57 | }
 58 | 
 59 | impl JointConfig {
 60 |     /// IDs returns a newly initialized map representing the set of voters present
 61 |     /// in the joint configuration.
 62 |     pub fn ids(&self) -> HashSet<u64> {
 63 |         let mut hash_set = HashSet::new();
 64 |         hash_set.extend(self.incoming.iter());
 65 |         hash_set.extend(self.outgoing.iter());
 66 |         hash_set
 67 |     }
 68 | 
 69 |     /// TODO
 70 |     /// Describe returns a (multi-line) representation of the commit indexes for the
 71 |     /// given lookuper.
 72 |     pub fn describe<T: AckedIndexer>(&self, l: &T) -> String {
 73 |         MajorityConfig::from(self.ids()).describe(l)
 74 |     }
 75 | 
 76 |     /// committed_index returns the largest committed index for the given joint
 77 |     /// quorum. An index is jointly committed if it is committed in both constituent
 78 |     /// majorities
 79 |     pub fn committed<T: AckedIndexer>(&self, l: &T) -> Index {
 80 |         let idx0 = self.incoming.committed_index(l);
 81 |         let idx1 = self.outgoing.committed_index(l);
 82 |         if idx0 < idx1 {
 83 |             return idx0;
 84 |         }
 85 |         idx1
 86 |     }
 87 | 
 88 |     pub fn vote_result(&self, votes: &HashMap<u64, bool>) -> VoteResult {
 89 |         let r1 = self.incoming.vote_result(votes);
 90 |         let r2 = self.outgoing.vote_result(votes);
 91 |         if r1 == r2 {
 92 |             return r1;
 93 |         }
 94 |         if r1 == VoteLost || r2 == VoteLost {
 95 |             // If either config has lost, loss is the only possible outcome.
 96 |             return VoteLost;
 97 |         }
 98 |         // TODO: Why?
 99 |         // One side won, the other one is pending, so the whole outcome is
100 |         VotePending
101 |     }
102 | 
103 |     /// clears all IDs.
104 |     pub fn clear(&mut self) {
105 |         self.incoming.clear();
106 |         self.outgoing.clear();
107 |     }
108 | 
109 |     /// Returns true if (and only if) there is only one voting member
110 |     /// (i.e. the leader) in the current configuration.
111 |     #[inline]
112 |     pub fn is_singleton(&self) -> bool {
113 |         self.outgoing.is_empty() && self.incoming.len() == 1
114 |     }
115 | 
116 |     /// Check if an id is a voter.
117 |     #[inline]
118 |     pub fn contains(&self, id: u64) -> bool {
119 |         self.incoming.contains(&id) || self.outgoing.contains(&id)
120 |     }
121 | 
122 |     #[inline]
123 |     pub fn joint(&self) -> bool {
124 |         !self.outgoing.is_empty()
125 |     }
126 | }
127 | 


--------------------------------------------------------------------------------
/src/quorum/majority.rs:
--------------------------------------------------------------------------------
  1 | use crate::quorum::quorum::{AckedIndexer, Index, VoteResult};
  2 | use std::cmp::Ordering;
  3 | use std::collections::hash_set::Iter;
  4 | use std::collections::{HashMap, HashSet};
  5 | use std::fmt::{self, Display, Formatter, Write};
  6 | use std::process::id;
  7 | 
  8 | /// MajorityConfig is a set of IDs that uses majority quorums to make decisions.
  9 | #[derive(Clone, PartialEq, Debug)]
 10 | pub struct MajorityConfig {
 11 |     pub(crate) votes: HashSet<u64>,
 12 | }
 13 | 
 14 | impl From<HashSet<u64>> for MajorityConfig {
 15 |     fn from(h: HashSet<u64>) -> Self {
 16 |         MajorityConfig { votes: h }
 17 |     }
 18 | }
 19 | 
 20 | impl MajorityConfig {
 21 |     pub fn new() -> Self {
 22 |         MajorityConfig {
 23 |             votes: HashSet::new(),
 24 |         }
 25 |     }
 26 | 
 27 |     /// returns a (multi-line) representation of the commit indexes for the
 28 |     /// given lookuper.
 29 |     pub fn describe<T: AckedIndexer>(&self, l: &T) -> String {
 30 |         if self.votes.is_empty() {
 31 |             return "<empty majority quorum>".to_string();
 32 |         }
 33 | 
 34 |         #[derive(Default, Clone, Copy)]
 35 |         struct Tup {
 36 |             id: u64,
 37 |             idx: Index,
 38 |             // idx found?
 39 |             ok: bool,
 40 |             // length of bar displayed for this up
 41 |             bar: usize,
 42 |         }
 43 | 
 44 |         // Below, populate .bar so that the i-th largest commit index has bar i (we
 45 |         // plot this as sort of a progress bar). The actual code is a bit more
 46 |         // complicated and also makes sure that equal index => equal bar.
 47 |         let n = self.votes.len();
 48 |         let mut info: Vec<Tup> = vec![Tup::default()].repeat(n);
 49 |         for (i, id) in self.iter().enumerate() {
 50 |             let idx = l.acked_index(id);
 51 |             info[i].id = *id;
 52 |             info[i].idx = *idx.or_else(|| Some(&0)).unwrap();
 53 |             info[i].ok = idx.is_some();
 54 |         }
 55 |         // sort by index
 56 |         info.sort_by(|a, b| {
 57 |             if a.idx == b.idx {
 58 |                 a.id.cmp(&b.id)
 59 |             } else {
 60 |                 a.idx.cmp(&b.idx)
 61 |             }
 62 |         });
 63 | 
 64 |         // Populate .bar.
 65 |         for i in 0..info.len() {
 66 |             if i > 0 && info[i - 1].idx < info[i].idx {
 67 |                 info[i].bar = i;
 68 |             }
 69 |         }
 70 | 
 71 |         // sort by id
 72 |         info.sort_by(|a, b| a.id.cmp(&b.id));
 73 | 
 74 |         let mut buf = String::new();
 75 |         // print
 76 | 
 77 |         buf.write_str((" ".repeat(n) + "    idx\n").as_str())
 78 |             .unwrap();
 79 | 
 80 |         for i in 0..info.len() {
 81 |             let bar = info[i].bar;
 82 |             if !info[i].ok {
 83 |                 buf.write_str("?").unwrap();
 84 |                 buf.write_str(" ".repeat(n).as_str()).unwrap();
 85 |             } else {
 86 |                 buf.write_str(&*("x".repeat(bar) + ">" + " ".repeat(n - bar).as_str()))
 87 |                     .unwrap();
 88 |             }
 89 |             buf.write_str(format!(" {:>5}    (id={})\n", info[i].idx, info[i].id).as_str())
 90 |                 .unwrap();
 91 |         }
 92 |         buf
 93 |     }
 94 | 
 95 |     /// commit_index computes the committed index from those supplied via the
 96 |     /// provide acked_index (for the active config).
 97 |     pub fn committed_index<T: AckedIndexer>(&self, l: &T) -> Index {
 98 |         if self.is_empty() {
 99 |             // This plays well with joint quorum which, when one of half is the zero
100 |             // MajorityConfig, should behave like the other half.
101 |             return u64::max_value();
102 |         }
103 |         // Use a on-stack slice to collect the committed indexes when n <= 7
104 |         // (otherwise we alloc). The alternative is to stash a slice on
105 |         // MajorityConfig, but this impairs usability (as is, MajorityConfig is just
106 |         // a map, and that's nice). The assumption is that running with a
107 |         // performance is a lesser concern (additionally the performance
108 |         // implication of an allocation here are far from drastic).
109 |         // TODO: optimized use stack
110 |         let n = self.len();
111 |         let mut srt: Vec<u64> = [0].repeat(n);
112 |         let mut i = 0;
113 |         for id in self.iter() {
114 |             if let Some(idx) = l.acked_index(&id) {
115 |                 srt[i as usize] = *idx;
116 |                 i += 1;
117 |             }
118 |         }
119 | 
120 |         srt.sort_by_key(|key| *key);
121 |         let pos = n - (n / 2 + 1);
122 |         srt[pos]
123 |     }
124 | 
125 |     /// VoteResult takes a mapping of voters to yes/no (true/false) votes and returns
126 |     /// a result indicating whether the vote is pending (i.e. neither a quorum of
127 |     /// yes/no has been reached), won (a quorum of yes has been reached), or lost (a
128 |     /// quorum of no has been reached).
129 |     pub fn vote_result(&self, votes: &HashMap<u64, bool>) -> VoteResult {
130 |         if self.is_empty() {
131 |             // By convention, the elections on an empty config win. This comes in
132 |             // handy with joint quorums because it'll make a half-populated joint
133 |             // quorum behave like a majority quorum
134 |             return VoteResult::VoteWon;
135 |         }
136 |         let (against, agree, missing) =
137 |             self.votes
138 |                 .iter()
139 |                 .fold((0, 0, 0), |(mut against, mut agree, mut missing), id| {
140 |                     if let Some(v) = votes.get(id) {
141 |                         if *v {
142 |                             agree += 1
143 |                         } else {
144 |                             against += 1
145 |                         }
146 |                     } else {
147 |                         missing += 1;
148 |                     }
149 |                     (against, agree, missing)
150 |                 });
151 |         // vote counts for no and yes, responsibility
152 |         let q = self.len() / 2 + 1;
153 |         debug!("agree:{}, missing:{}, q:{}", agree, missing, q);
154 |         if agree >= q {
155 |             return VoteResult::VoteWon;
156 |         }
157 |         if agree + missing >= q {
158 |             return VoteResult::VotePending;
159 |         }
160 |         VoteResult::VoteLost
161 |     }
162 | 
163 |     #[inline]
164 |     pub fn as_slice(&self) -> Vec<u64> {
165 |         let mut s1: Vec<u64> = self.iter().map(|v| *v).collect();
166 |         s1.sort_by_key(|v| *v);
167 |         s1
168 |     }
169 | 
170 |     #[inline]
171 |     pub fn len(&self) -> usize {
172 |         self.votes.len()
173 |     }
174 | 
175 |     #[inline]
176 |     pub(crate) fn get(&self, id: &u64) -> Option<&u64> {
177 |         self.votes.get(id)
178 |     }
179 | 
180 |     #[inline]
181 |     pub(crate) fn insert(&mut self, id: u64) {
182 |         self.votes.insert(id);
183 |     }
184 | 
185 |     #[inline]
186 |     pub(crate) fn remove(&mut self, id: &u64) -> bool {
187 |         self.votes.remove(id)
188 |     }
189 | 
190 |     #[inline]
191 |     pub(crate) fn contains(&self, id: &u64) -> bool {
192 |         self.votes.contains(id)
193 |     }
194 | 
195 |     #[inline]
196 |     pub fn is_empty(&self) -> bool {
197 |         self.votes.is_empty()
198 |     }
199 | 
200 |     #[inline]
201 |     pub(crate) fn clear(&mut self) {
202 |         self.votes.clear();
203 |     }
204 | 
205 |     #[inline]
206 |     pub(crate) fn extend(&mut self, other: &Self) {
207 |         self.votes.extend(other.iter())
208 |     }
209 | 
210 |     #[inline]
211 |     pub fn iter(&self) -> Iter<'_, u64> {
212 |         self.votes.iter()
213 |     }
214 | }
215 | 
216 | impl From<&Vec<u64>> for MajorityConfig {
217 |     fn from(v: &Vec<u64>) -> Self {
218 |         let mut config = MajorityConfig {
219 |             votes: HashSet::new(),
220 |         };
221 |         for item in v.iter() {
222 |             config.votes.insert(*item);
223 |         }
224 |         config
225 |     }
226 | }
227 | 
228 | impl From<Vec<u64>> for MajorityConfig {
229 |     fn from(v: Vec<u64>) -> Self {
230 |         let mut config = MajorityConfig {
231 |             votes: HashSet::new(),
232 |         };
233 |         for item in v.iter() {
234 |             config.votes.insert(*item);
235 |         }
236 |         config
237 |     }
238 | }
239 | 
240 | impl Display for MajorityConfig {
241 |     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
242 |         let mut votes: Vec<u64> = self.votes.iter().map(|v| *v).collect();
243 |         votes.sort();
244 |         let votes: Vec<String> = votes.iter().map(|v| format!("{}", v)).collect();
245 |         let s: String = votes.join(" ");
246 |         write!(f, "({})", s)
247 |     }
248 | }
249 | 
250 | #[cfg(test)]
251 | mod tests {
252 |     use crate::quorum::majority::MajorityConfig;
253 |     use crate::quorum::quorum::AckedIndexer;
254 |     use crate::quorum::quorum::VoteResult::{VoteLost, VotePending, VoteWon};
255 |     use crate::tracker::progress::Progress;
256 |     use crate::tracker::MatchAckIndexer;
257 |     use std::collections::HashMap;
258 | 
259 |     #[test]
260 |     fn t_majority() {
261 |         let mut majority = MajorityConfig::new();
262 |         majority.votes.insert(0);
263 |         majority.votes.insert(1);
264 |         assert_eq!("(0 1)", format!("{}", majority));
265 |         let mut majority = MajorityConfig::new();
266 |         assert_eq!("()", format!("{}", majority));
267 | 
268 |         let v = &vec![0, 1, 2];
269 |         let majority: MajorityConfig = v.into();
270 |         assert_eq!("(0 1 2)", format!("{}", majority));
271 |         let majority: MajorityConfig = v.into();
272 |         assert_eq!("(0 1 2)", format!("{}", majority));
273 | 
274 |         let mut majority = MajorityConfig::new();
275 |         majority.votes.insert(0);
276 |         assert_eq!(vec![0], majority.as_slice());
277 |     }
278 | 
279 |     #[test]
280 |     fn t_majority_vote_result() {
281 |         let mut majority = MajorityConfig::new();
282 |         for id in 0..5 {
283 |             majority.votes.insert(id);
284 |         }
285 |         let mut votes = HashMap::new();
286 |         assert_eq!(majority.vote_result(&votes), VotePending);
287 |         for id in 0..2 {
288 |             votes.insert(id, true);
289 |             assert_eq!(majority.vote_result(&votes), VotePending);
290 |         }
291 |         votes.insert(3, true);
292 |         assert_eq!(majority.vote_result(&votes), VoteWon);
293 |         for id in 0..3 {
294 |             votes.insert(id, false);
295 |         }
296 |         assert_eq!(majority.vote_result(&votes), VoteLost);
297 |     }
298 | 
299 |     #[test]
300 |     fn t_majority_committed_index() {
301 |         let mut majority = MajorityConfig::new();
302 |         let n = 5;
303 |         let tests = vec![
304 |             (vec![(3, 3), (4, 4), (5, 5)], 3),
305 |             (vec![(4, 4), (3, 3), (5, 5)], 3),
306 |             (vec![(5, 5), (4, 4), (3, 3)], 3),
307 |             (vec![(3, 3), (4, 4), (5, 5), (4, 4), (3, 3)], 4),
308 |             (vec![(3, 3), (6, 6), (5, 5), (7, 7), (3, 3)], 5),
309 |             (vec![(3, 3), (6, 6), (6, 6), (6, 6), (6, 6)], 6),
310 |         ];
311 |         for id in 0..n {
312 |             majority.votes.insert(id);
313 |         }
314 |         for (set, w_commit) in tests {
315 |             let match_ack_indexer = new_match_ack_indexer(set.clone());
316 |             let index = majority.committed_index(&match_ack_indexer);
317 |             assert_eq!(index, w_commit);
318 |         }
319 |     }
320 | 
321 |     fn new_match_ack_indexer(v: Vec<(u64, u64)>) -> MatchAckIndexer {
322 |         let mut match_ack_indexer = MatchAckIndexer::new();
323 |         v.iter().fold(0, |acc, (m, n)| {
324 |             let mut progress = Progress::new(*m, *n);
325 |             match_ack_indexer.insert(acc, progress);
326 |             acc + 1
327 |         });
328 |         match_ack_indexer
329 |     }
330 | }
331 | 


--------------------------------------------------------------------------------
/src/quorum/majority_vote.txt:
--------------------------------------------------------------------------------
 1 | # The empty config always announces a won vote.
 2 | vote
 3 | ----
 4 | VoteWon
 5 | 
 6 | vote cfg=(1) votes=(_)
 7 | ----
 8 | VotePending
 9 | 
10 | vote cfg=(1) votes=(n)
11 | ----
12 | VoteLost
13 | 
14 | vote cfg=(123) votes=(y)
15 | ----
16 | VoteWon
17 | 
18 | 
19 | 
20 | 
21 | vote cfg=(4,8) votes=(_,_)
22 | ----
23 | VotePending
24 | 
25 | # With two voters, a single rejection loses the vote.
26 | vote cfg=(4,8) votes=(n,_)
27 | ----
28 | VoteLost
29 | 
30 | vote cfg=(4,8) votes=(y,_)
31 | ----
32 | VotePending
33 | 
34 | vote cfg=(4,8) votes=(n,y)
35 | ----
36 | VoteLost
37 | 
38 | vote cfg=(4,8) votes=(y,y)
39 | ----
40 | VoteWon
41 | 
42 | 
43 | 
44 | vote cfg=(2,4,7) votes=(_,_,_)
45 | ----
46 | VotePending
47 | 
48 | vote cfg=(2,4,7) votes=(n,_,_)
49 | ----
50 | VotePending
51 | 
52 | vote cfg=(2,4,7) votes=(y,_,_)
53 | ----
54 | VotePending
55 | 
56 | vote cfg=(2,4,7) votes=(n,n,_)
57 | ----
58 | VoteLost
59 | 
60 | vote cfg=(2,4,7) votes=(y,n,_)
61 | ----
62 | VotePending
63 | 
64 | vote cfg=(2,4,7) votes=(y,y,_)
65 | ----
66 | VoteWon
67 | 
68 | vote cfg=(2,4,7) votes=(y,y,n)
69 | ----
70 | VoteWon
71 | 
72 | vote cfg=(2,4,7) votes=(n,y,n)
73 | ----
74 | VoteLost
75 | 
76 | 
77 | 
78 | # Test some random example with seven nodes (why not).
79 | vote cfg=(1,2,3,4,5,6,7) votes=(y,y,n,y,_,_,_)
80 | ----
81 | VotePending
82 | 
83 | vote cfg=(1,2,3,4,5,6,7) votes=(_,y,y,_,n,y,n)
84 | ----
85 | VotePending
86 | 
87 | vote cfg=(1,2,3,4,5,6,7) votes=(y,y,n,y,_,n,y)
88 | ----
89 | VoteWon
90 | 
91 | vote cfg=(1,2,3,4,5,6,7) votes=(y,y,_,n,y,n,n)
92 | ----
93 | VotePending
94 | 
95 | vote cfg=(1,2,3,4,5,6,7) votes=(y,y,n,y,n,n,n)
96 | ----
97 | VoteLost


--------------------------------------------------------------------------------
/src/quorum/mod.rs:
--------------------------------------------------------------------------------
1 | mod data_driven_test;
2 | pub mod joint;
3 | pub mod majority;
4 | mod quick_test;
5 | pub mod quorum;
6 | 


--------------------------------------------------------------------------------
/src/quorum/quick_test.rs:
--------------------------------------------------------------------------------
  1 | use crate::quorum::majority::MajorityConfig;
  2 | use crate::quorum::quorum::{AckedIndexer, Index};
  3 | use std::collections::HashMap;
  4 | 
  5 | #[cfg(test)]
  6 | mod tests {
  7 |     use crate::quorum::majority::MajorityConfig;
  8 |     use crate::quorum::quick_test::alternative_majority_committed_index;
  9 |     use crate::quorum::quorum::{AckedIndexer, Index};
 10 |     use rand::prelude::*;
 11 |     use rand::Rng;
 12 |     use std::cmp::Ordering;
 13 |     use std::collections::{HashMap, HashSet};
 14 | 
 15 |     type IdxMap = HashMap<u64, Index>;
 16 | 
 17 |     fn new_idx_map() -> IdxMap {
 18 |         small_ran_idx_map(0)
 19 |     }
 20 | 
 21 |     type MemberMap = HashSet<u64>;
 22 | 
 23 |     fn convert_idx_map_to_member_map(idx_map: &IdxMap) -> MemberMap {
 24 |         let mut m = HashSet::new();
 25 |         idx_map.iter().for_each(|(k, v)| {
 26 |             m.insert(*k);
 27 |         });
 28 |         m
 29 |     }
 30 | 
 31 |     fn new_member_map() -> MemberMap {
 32 |         let mut m = HashSet::new();
 33 |         small_ran_idx_map(0).iter().for_each(|(k, v)| {
 34 |             m.insert(*k);
 35 |         });
 36 |         m
 37 |     }
 38 | 
 39 |     // returns a reasonably sized map of ids to commit indexes.
 40 |     fn small_ran_idx_map(size: usize) -> HashMap<u64, Index> {
 41 |         // Hard-code a reasonably small here (quick will hard-code 50, which
 42 |         // is not usefull here).
 43 |         let size = 10;
 44 |         let mut rng = rand::thread_rng();
 45 |         let n: usize = rng.gen_range(0..size);
 46 |         let mut ids: Vec<usize> = (1..size).collect();
 47 |         ids.shuffle(&mut rng);
 48 |         ids.drain(n..);
 49 |         let mut idxs = [0].repeat(ids.len());
 50 |         for idx in idxs.iter_mut() {
 51 |             *idx = rng.gen_range(0..n);
 52 |         }
 53 |         let mut m = HashMap::new();
 54 |         for (i, v) in ids.iter().enumerate() {
 55 |             m.insert(*v as u64, *idxs.get(i).unwrap() as Index);
 56 |         }
 57 |         m
 58 |     }
 59 | 
 60 |     #[test]
 61 |     fn tt_majority() {
 62 |         let count = 5000;
 63 |         for i in 0..count {
 64 |             let idx_map = new_idx_map();
 65 |             let member_map = convert_idx_map_to_member_map(&idx_map);
 66 |             let mut majority = MajorityConfig::new();
 67 |             majority.votes = member_map.clone();
 68 |             let idx = majority.committed_index(&idx_map);
 69 |             let expect_idx = alternative_majority_committed_index(majority.clone(), &idx_map);
 70 |             assert_eq!(idx, expect_idx);
 71 |         }
 72 |     }
 73 | }
 74 | 
 75 | // This is an alternative implmentation of (MajorityConfig).CommittedIndex(l).
 76 | pub(crate) fn alternative_majority_committed_index<T: AckedIndexer>(
 77 |     c: MajorityConfig,
 78 |     l: &T,
 79 | ) -> Index {
 80 |     if c.is_empty() {
 81 |         return u64::MAX;
 82 |     }
 83 |     let mut id_to_idx = HashMap::new();
 84 |     c.votes.iter().for_each(|node| {
 85 |         if let Some(idx) = l.acked_index(node) {
 86 |             id_to_idx.insert(node, idx);
 87 |         }
 88 |     });
 89 | 
 90 |     // Build a map from index to voters who have acked that or any higher index.
 91 |     let mut idx_to_votes = HashMap::new();
 92 |     id_to_idx.iter().for_each(|(id, idx)| {
 93 |         idx_to_votes.insert(idx, 0);
 94 |     });
 95 | 
 96 |     for (_, idx) in id_to_idx.iter() {
 97 |         for (idy, v) in idx_to_votes.iter_mut() {
 98 |             if ***idy > **idx {
 99 |                 continue;
100 |             }
101 |             *v += 1;
102 |         }
103 |     }
104 | 
105 |     // Find the maximum index that has achieved quorum.
106 |     let q = c.len() / 2 + 1;
107 |     let mut max_quorum_index = Index::default();
108 |     for (idx, n) in idx_to_votes.clone() {
109 |         if n >= q as u64 && *idx > &max_quorum_index {
110 |             max_quorum_index = **idx;
111 |         }
112 |     }
113 |     // println!("---->{:?}, {:?}, quorum: {}, max_quorum_index: {:?}", id_to_idx, idx_to_votes, q, max_quorum_index);
114 |     max_quorum_index
115 | }
116 | 


--------------------------------------------------------------------------------
/src/quorum/quorum.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 The etcd Authors
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | use std::collections::HashMap;
16 | 
17 | // Index is a Raft log position
18 | pub type Index = u64;
19 | 
20 | pub fn to_string(index: Index) -> String {
21 |     if index == u64::MAX {
22 |         "∞".to_string()
23 |     } else {
24 |         index.to_string()
25 |     }
26 | }
27 | 
28 | /// AckedIndexer allows looking up a commit index for a given ID of a voter
29 | /// from a corresponding MajorityConfig.
30 | pub trait AckedIndexer {
31 |     fn acked_index(&self, voter_id: &u64) -> Option<&Index>;
32 | }
33 | 
34 | pub(crate) type MapAckIndexer = HashMap<u64, Index>;
35 | 
36 | impl AckedIndexer for MapAckIndexer {
37 |     fn acked_index(&self, voter_id: &u64) -> Option<&Index> {
38 |         self.get(voter_id)
39 |     }
40 | }
41 | 
42 | /// VoteResult indicates the outcome of a vote.
43 | #[derive(Debug, Clone, PartialEq, Eq)]
44 | pub enum VoteResult {
45 |     /// VotePending indicates that the decision of the vote depends on future
46 |     /// votes, i.e. neither "yes" or "no" has reached quorum yet.
47 |     VotePending,
48 |     /// VoteLost indicates that the quorum has votes "no"
49 |     VoteLost,
50 |     /// VoteWon indicates that the quorum has voted "yes"
51 |     VoteWon,
52 | }
53 | 


--------------------------------------------------------------------------------
/src/quorum/testdata/joint_commit.txt:
--------------------------------------------------------------------------------
 1 | # No difference between a simple majority quorum and a simple majority quorum
 2 | # joint with an empty majority quorum. (This is asserted for all datadriven tests
 3 | # by the framework, so we don't dwell on it more).
 4 | #
 5 | # Note that by specifying cfgj explicitly we tell the test harness to treat the
 6 | # input as a joint quorum and not a majority quorum. If we didn't specify
 7 | # cfgj=zero the test would pass just the same, but it wouldn't be exercising the
 8 | # joint quorum path.
 9 | cmd: committed
10 | args: cfg=(1,2,3) cfgj=zero idx=(100,101,99)
11 |        idx
12 | x>     100    (id=1)
13 | xx>    101    (id=2)
14 | >       99    (id=3)
15 | 100


--------------------------------------------------------------------------------
/src/quorum/testdata/joint_vote.txt:
--------------------------------------------------------------------------------
  1 | # Empty joint config wins all votes. This isn't used in production. Note that
  2 | # by specifying cfgj explicitly we tell the test harness to treat the input as
  3 | # a joint quorum and not a majority quorum.
  4 | cmd: vote
  5 | args: cfgj=zero
  6 | VoteWon
  7 | 
  8 | --------------------------------
  9 | # More examples with close to trivial configs.
 10 | cmd: vote
 11 | args: cfg=(1) cfgj=zero votes=(_)
 12 | VotePending
 13 | 
 14 | --------------------------------
 15 | cmd: vote
 16 | args: cfg=(1) cfgj=zero votes=(y)
 17 | VoteWon
 18 | 
 19 | --------------------------------
 20 | cmd: vote
 21 | args: cfg=(1) cfgj=zero votes=(n)
 22 | VoteLost
 23 | 
 24 | --------------------------------
 25 | cmd: vote
 26 | args: cfg=(1) cfgj=(1) votes=(_)
 27 | VotePending
 28 | 
 29 | --------------------------------
 30 | cmd: vote
 31 | args: cfg=(1) cfgj=(1) votes=(y)
 32 | VoteWon
 33 | 
 34 | --------------------------------
 35 | cmd: vote
 36 | args: cfg=(1) cfgj=(1) votes=(n)
 37 | VoteLost
 38 | 
 39 | --------------------------------
 40 | cmd: vote
 41 | args: cfg=(1) cfgj=(2) votes=(_,_)
 42 | VotePending
 43 | 
 44 | --------------------------------
 45 | cmd: vote
 46 | args: cfg=(1) cfgj=(2) votes=(y,_)
 47 | VotePending
 48 | 
 49 | --------------------------------
 50 | cmd: vote
 51 | args: cfg=(1) cfgj=(2) votes=(y,y)
 52 | VoteWon
 53 | 
 54 | --------------------------------
 55 | cmd: vote
 56 | args: cfg=(1) cfgj=(2) votes=(y,n)
 57 | VoteLost
 58 | 
 59 | --------------------------------
 60 | cmd: vote
 61 | args: cfg=(1) cfgj=(2) votes=(n,_)
 62 | VoteLost
 63 | 
 64 | --------------------------------
 65 | cmd: vote
 66 | args: cfg=(1) cfgj=(2) votes=(n,n)
 67 | VoteLost
 68 | 
 69 | --------------------------------
 70 | cmd: vote
 71 | args: cfg=(1) cfgj=(2) votes=(n,y)
 72 | VoteLost
 73 | 
 74 | --------------------------------
 75 | # Two node configs.
 76 | cmd: vote
 77 | args: cfg=(1,2) cfgj=(3,4) votes=(_,_,_,_)
 78 | VotePending
 79 | 
 80 | --------------------------------
 81 | cmd: vote
 82 | args: cfg=(1,2) cfgj=(3,4) votes=(y,_,_,_)
 83 | VotePending
 84 | 
 85 | --------------------------------
 86 | cmd: vote
 87 | args: cfg=(1,2) cfgj=(3,4) votes=(y,y,_,_)
 88 | VotePending
 89 | 
 90 | --------------------------------
 91 | cmd: vote
 92 | args: cfg=(1,2) cfgj=(3,4) votes=(y,y,n,_)
 93 | VoteLost
 94 | 
 95 | --------------------------------
 96 | cmd: vote
 97 | args: cfg=(1,2) cfgj=(3,4) votes=(y,y,n,n)
 98 | VoteLost
 99 | 
100 | --------------------------------
101 | cmd: vote
102 | args: cfg=(1,2) cfgj=(3,4) votes=(y,y,y,n)
103 | VoteLost
104 | 
105 | --------------------------------
106 | cmd: vote
107 | args: cfg=(1,2) cfgj=(3,4) votes=(y,y,y,y)
108 | VoteWon
109 | 
110 | --------------------------------
111 | cmd: vote
112 | args: cfg=(1,2) cfgj=(2,3) votes=(_,_,_)
113 | VotePending
114 | 
115 | --------------------------------
116 | cmd: vote
117 | args: cfg=(1,2) cfgj=(2,3) votes=(_,n,_)
118 | VoteLost
119 | 
120 | --------------------------------
121 | cmd: vote
122 | args: cfg=(1,2) cfgj=(2,3) votes=(y,y,_)
123 | VotePending
124 | 
125 | --------------------------------
126 | cmd: vote
127 | args: cfg=(1,2) cfgj=(2,3) votes=(y,y,n)
128 | VoteLost
129 | 
130 | --------------------------------
131 | cmd: vote
132 | args: cfg=(1,2) cfgj=(2,3) votes=(y,y,y)
133 | VoteWon
134 | 
135 | --------------------------------
136 | cmd: vote
137 | args: cfg=(1,2) cfgj=(1,2) votes=(_,_)
138 | VotePending
139 | 
140 | --------------------------------
141 | cmd: vote
142 | args: cfg=(1,2) cfgj=(1,2) votes=(y,_)
143 | VotePending
144 | 
145 | --------------------------------
146 | cmd: vote
147 | args: cfg=(1,2) cfgj=(1,2) votes=(y,n)
148 | VoteLost
149 | 
150 | --------------------------------
151 | cmd: vote
152 | args: cfg=(1,2) cfgj=(1,2) votes=(n,_)
153 | VoteLost
154 | 
155 | --------------------------------
156 | cmd: vote
157 | args: cfg=(1,2) cfgj=(1,2) votes=(n,n)
158 | VoteLost
159 | 
160 | --------------------------------
161 | # Simple example for overlapping three node configs.
162 | cmd: vote
163 | args: cfg=(1,2,3) cfgj=(2,3,4) votes=(_,_,_,_)
164 | VotePending
165 | 
166 | --------------------------------
167 | cmd: vote
168 | args: cfg=(1,2,3) cfgj=(2,3,4) votes=(_,n,_,_)
169 | VotePending
170 | 
171 | --------------------------------
172 | cmd: vote
173 | args: cfg=(1,2,3) cfgj=(2,3,4) votes=(_,n,n,_)
174 | VoteLost
175 | 
176 | --------------------------------
177 | cmd: vote
178 | args: cfg=(1,2,3) cfgj=(2,3,4) votes=(_,y,y,_)
179 | VoteWon
180 | 
181 | --------------------------------
182 | cmd: vote
183 | args: cfg=(1,2,3) cfgj=(2,3,4) votes=(y,y,_,_)
184 | VotePending
185 | 
186 | --------------------------------
187 | cmd: vote
188 | args: cfg=(1,2,3) cfgj=(2,3,4) votes=(y,y,n,_)
189 | VotePending
190 | 
191 | --------------------------------
192 | cmd: vote
193 | args: cfg=(1,2,3) cfgj=(2,3,4) votes=(y,y,n,n)
194 | VoteLost
195 | 
196 | --------------------------------
197 | cmd: vote
198 | args: cfg=(1,2,3) cfgj=(2,3,4) votes=(y,y,n,y)
199 | VoteWon


--------------------------------------------------------------------------------
/src/quorum/testdata/majority_commit.txt:
--------------------------------------------------------------------------------
  1 | # The empty quorum commits "everything". This is useful for its use in joint
  2 | # quorums.
  3 | cmd: committed
  4 | output:
  5 | <empty majority quorum>∞
  6 | 
  7 | 
  8 | --------------------------------
  9 | title: A single voter quorum is not final when no index is known.
 10 | cmd: committed
 11 | args: cfg=(1) idx=(_)
 12 | output:
 13 |      idx
 14 | ?      0    (id=1)
 15 | 0
 16 | 
 17 | 
 18 | --------------------------------
 19 | title: When an index is known, that's the committed index, and that's final.
 20 | cmd: committed
 21 | args: cfg=(1) idx=(12)
 22 | output:
 23 |      idx
 24 | >     12    (id=1)
 25 | 12
 26 | 
 27 | 
 28 | --------------------------------
 29 | title: With two nodes, start out similarly.
 30 | cmd: committed
 31 | args: cfg=(1,2) idx=(_,_)
 32 | output:
 33 |       idx
 34 | ?       0    (id=1)
 35 | ?       0    (id=2)
 36 | 0
 37 | 
 38 | 
 39 | --------------------------------
 40 | # The first committed index becomes known (for n1). Nothing changes in the output
 41 | # because idx=12 is not known to be on a quorum (which is both nodes).
 42 | cmd: committed
 43 | args: cfg=(1,2) idx=(12,_)
 44 | output:
 45 |       idx
 46 | x>     12    (id=1)
 47 | ?       0    (id=2)
 48 | 0
 49 | 
 50 | --------------------------------
 51 | # The second index comes in and finalize the decision. The result will be the
 52 | # smaller of the two indexes.
 53 | cmd: committed
 54 | args: cfg=(1,2) idx=(12,5)
 55 | output:
 56 |       idx
 57 | x>     12    (id=1)
 58 | >       5    (id=2)
 59 | 5
 60 | 
 61 | --------------------------------
 62 | # No surprises for three nodes.
 63 | cmd: committed
 64 | args: cfg=(1,2,3) idx=(_,_,_)
 65 | output:
 66 |        idx
 67 | ?        0    (id=1)
 68 | ?        0    (id=2)
 69 | ?        0    (id=3)
 70 | 0
 71 | 
 72 | --------------------------------
 73 | cmd: committed
 74 | args: cfg=(1,2,3) idx=(12,_,_)
 75 | output:
 76 |        idx
 77 | xx>     12    (id=1)
 78 | ?        0    (id=2)
 79 | ?        0    (id=3)
 80 | 0
 81 | 
 82 | --------------------------------
 83 | # We see a committed index, but a higher committed index for the last pending
 84 | # votes could change (increment) the outcome, so not final yet.
 85 | cmd: committed
 86 | args: cfg=(1,2,3) idx=(12,5,_)
 87 | output:
 88 |        idx
 89 | xx>     12    (id=1)
 90 | x>       5    (id=2)
 91 | ?        0    (id=3)
 92 | 5
 93 | 
 94 | --------------------------------
 95 | # a) the case in which it does:
 96 | cmd: committed
 97 | args: cfg=(1,2,3) idx=(12,5,6)
 98 | output:
 99 |        idx
100 | xx>     12    (id=1)
101 | >        5    (id=2)
102 | x>       6    (id=3)
103 | 6
104 | 
105 | --------------------------------
106 | # b) the case in which it does not:
107 | cmd: committed
108 | args: cfg=(1,2,3) idx=(12,5,4)
109 | output:
110 |        idx
111 | xx>     12    (id=1)
112 | x>       5    (id=2)
113 | >        4    (id=3)
114 | 5
115 | 
116 | --------------------------------
117 | # c) a different case in which the last index is pending but it has no chance of
118 | # swaying the outcome (because nobody in the current quorum agrees on anything
119 | # higher than the candidate):
120 | cmd: committed
121 | args: cfg=(1,2,3) idx=(5,5,_)
122 | output:
123 |        idx
124 | x>       5    (id=1)
125 | >        5    (id=2)
126 | ?        0    (id=3)
127 | 5
128 | 
129 | --------------------------------
130 | # With all committed idx known, the result is final.
131 | cmd: committed
132 | args: cfg=(1,2,3) idx=(100,101,103)
133 | output:
134 |        idx
135 | >      100    (id=1)
136 | x>     101    (id=2)
137 | xx>    103    (id=3)
138 | 101
139 | 
140 | 
141 | --------------------------------
142 | # Some more complicated examples. Similar to case c) above. The result is
143 | # already final because no index higher than 103 is one short of quorum.
144 | cmd: committed
145 | args: cfg=(1,2,3,4,5) idx=(101,104,103,103,_)
146 | output:
147 |          idx
148 | x>       101    (id=1)
149 | xxxx>    104    (id=2)
150 | xx>      103    (id=3)
151 | >        103    (id=4)
152 | ?          0    (id=5)
153 | 103
154 | 
155 | --------------------------------
156 | # A similar case which is not final because another vote for >= 103 would change
157 | # the outcome.
158 | cmd: committed
159 | args: cfg=(1,2,3,4,5) idx=(101,102,103,103,_)
160 | output:
161 |          idx
162 | x>       101    (id=1)
163 | xx>      102    (id=2)
164 | xxx>     103    (id=3)
165 | >        103    (id=4)
166 | ?          0    (id=5)
167 | 102


--------------------------------------------------------------------------------
/src/quorum/testdata/majority_vote.txt:
--------------------------------------------------------------------------------
  1 | # The empty config always announces a won vote.
  2 | cmd: vote
  3 | output:
  4 | VoteWon
  5 | 
  6 | --------------------------------
  7 | cmd: vote
  8 | args: cfg=(1) votes=(_)
  9 | output:
 10 | VotePending
 11 | 
 12 | --------------------------------
 13 | cmd: vote
 14 | args: cfg=(1) votes=(n)
 15 | output:
 16 | VoteLost
 17 | 
 18 | --------------------------------
 19 | cmd: vote
 20 | args: cfg=(123) votes=(y)
 21 | output:
 22 | VoteWon
 23 | 
 24 | --------------------------------
 25 | cmd: vote
 26 | args: cfg=(4,8) votes=(_,_)
 27 | output:
 28 | VotePending
 29 | 
 30 | 
 31 | --------------------------------
 32 | # With two voters, a single rejection loses the vote.
 33 | cmd: vote
 34 | args: cfg=(4,8) votes=(n,_)
 35 | output:
 36 | VoteLost
 37 | 
 38 | --------------------------------
 39 | cmd: vote
 40 | args: cfg=(4,8) votes=(y,_)
 41 | output:
 42 | VotePending
 43 | 
 44 | --------------------------------
 45 | cmd: vote
 46 | args: cfg=(4,8) votes=(n,y)
 47 | output:
 48 | VoteLost
 49 | 
 50 | --------------------------------
 51 | cmd: vote
 52 | args: cfg=(4,8) votes=(y,y)
 53 | output:
 54 | VoteWon
 55 | 
 56 | --------------------------------
 57 | cmd: vote
 58 | args: cfg=(2,4,7) votes=(_,_,_)
 59 | output:
 60 | VotePending
 61 | 
 62 | --------------------------------
 63 | cmd: vote
 64 | args: cfg=(2,4,7) votes=(n,_,_)
 65 | output:
 66 | VotePending
 67 | 
 68 | --------------------------------
 69 | cmd: vote
 70 | args: cfg=(2,4,7) votes=(y,_,_)
 71 | VotePending
 72 | 
 73 | --------------------------------
 74 | cmd: vote
 75 | args: cfg=(2,4,7) votes=(n,n,_)
 76 | output:
 77 | VoteLost
 78 | 
 79 | --------------------------------
 80 | cmd: vote
 81 | args: cfg=(2,4,7) votes=(y,n,_)
 82 | output:
 83 | VotePending
 84 | 
 85 | --------------------------------
 86 | cmd: vote
 87 | args: cfg=(2,4,7) votes=(y,y,_)
 88 | output:
 89 | VoteWon
 90 | 
 91 | --------------------------------
 92 | cmd: vote
 93 | args: cfg=(2,4,7) votes=(y,y,n)
 94 | output:
 95 | VoteWon
 96 | 
 97 | --------------------------------
 98 | cmd: vote
 99 | args: cfg=(2,4,7) votes=(n,y,n)
100 | output:
101 | VoteLost
102 | 
103 | --------------------------------
104 | # Test some random example with seven nodes (why not).
105 | cmd: vote
106 | args: cfg=(1,2,3,4,5,6,7) votes=(y,y,n,y,_,_,_)
107 | output:
108 | VotePending
109 | 
110 | --------------------------------
111 | cmd: vote
112 | args: cfg=(1,2,3,4,5,6,7) votes=(_,y,y,_,n,y,n)
113 | VotePending
114 | 
115 | --------------------------------
116 | cmd: vote
117 | args: cfg=(1,2,3,4,5,6,7) votes=(y,y,n,y,_,n,y)
118 | VoteWon
119 | 
120 | --------------------------------
121 | cmd: vote
122 | args: cfg=(1,2,3,4,5,6,7) votes=(y,y,_,n,y,n,n)
123 | VotePending
124 | 
125 | --------------------------------
126 | cmd: vote
127 | args: cfg=(1,2,3,4,5,6,7) votes=(y,y,n,y,n,n,n)
128 | VoteLost


--------------------------------------------------------------------------------
/src/raft_flow_control_test.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 The etcd Authors
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | #[cfg(test)]
 16 | mod tests {
 17 |     use crate::tests_util::mock::{new_test_raw_node, read_message, MockEntry, MocksEnts};
 18 |     use crate::raft::Raft;
 19 |     use crate::raftpb::raft::MessageType::{MsgAppResp, MsgHeartbeatResp, MsgProp};
 20 |     use crate::raftpb::raft::{Entry, Message};
 21 |     use crate::storage::{SafeMemStorage, Storage};
 22 |     use bytes::Bytes;
 23 |     use protobuf::RepeatedField;
 24 |     use crate::tests_util::try_init_log;
 25 | 
 26 |     // Ensures:
 27 |     // 1. `MsgApp` fill the sending windows until full
 28 |     // 2. when the windows is full, no more `MsgApp` can be sent.
 29 |     #[test]
 30 |     fn msg_app_flow_control_full() {
 31 |         try_init_log();
 32 |         let raft = new_test_raw_node(1, vec![1, 2], 5, 1, SafeMemStorage::new());
 33 |         let mut wl_raft = raft.wl();
 34 |         wl_raft.raft.become_candidate();
 35 |         wl_raft.raft.become_leader();
 36 | 
 37 |         {
 38 |             let mut pr = wl_raft.raft.prs.progress.get_mut(&2).unwrap();
 39 |             // force the progress to be in replicate state.
 40 |             pr.become_replicate();
 41 |         }
 42 |         // fill in the inflights windows
 43 |         {
 44 |             for i in 0..wl_raft.raft.prs.max_inflight {
 45 |                 let mut msg = Message::new();
 46 |                 msg.from = 1;
 47 |                 msg.to = 1;
 48 |                 msg.field_type = MsgProp;
 49 |                 msg.entries = MocksEnts::from("somedata").into();
 50 |                 wl_raft.step(msg);
 51 |                 let msg = read_message(&mut wl_raft.raft);
 52 |                 assert_eq!(msg.len(), 1, "{}: len(ms) = {}, want: 1", i, msg.len());
 53 |             }
 54 |         }
 55 | 
 56 |         // ensure 1
 57 |         {
 58 |             let mut pr = wl_raft.raft.prs.progress.get_mut(&2).unwrap();
 59 |             assert!(
 60 |                 pr.inflights.full(),
 61 |                 "inflights.full = {}, want: {}",
 62 |                 pr.inflights.full(),
 63 |                 true
 64 |             );
 65 |         }
 66 | 
 67 |         //ensure 2
 68 |         {
 69 |             for i in 0..10 {
 70 |                 let mut msg = Message::new();
 71 |                 msg.from = 1;
 72 |                 msg.to = 1;
 73 |                 msg.field_type = MsgProp;
 74 |                 msg.entries = MocksEnts::from("somedata").into();
 75 |                 wl_raft.step(msg);
 76 |                 let msg = read_message(&mut wl_raft.raft);
 77 |                 assert_eq!(msg.len(), 0, "{}: len(ms) = {}, want: 1", i, msg.len());
 78 |             }
 79 |         }
 80 |     }
 81 | 
 82 |     // Ensures `MsgAppResp` can move
 83 |     // forward the sending windows correctly:
 84 |     // 1. valid `MsgAppResp.Index` moves the windows to pass all smaller or euqal index.
 85 |     // 2. out-of-dated `MsgAppResp` has no effect on the sliding windows.
 86 |     #[test]
 87 |     fn msg_app_flow_control_move_forward() {
 88 |         try_init_log();
 89 |         let raft = new_test_raw_node(1, vec![1, 2], 5, 1, SafeMemStorage::new());
 90 |         let mut wl_raft = raft.wl();
 91 |         wl_raft.raft.become_candidate();
 92 |         wl_raft.raft.become_leader();
 93 |         {
 94 |             let mut pr2 = wl_raft.raft.prs.progress.get_mut(&2).unwrap();
 95 |             // force the progress to be in replicate state
 96 |             pr2.become_replicate();
 97 |         }
 98 | 
 99 |         // fill in the inflights windows.
100 |         {
101 |             for i in 0..wl_raft.raft.prs.max_inflight {
102 |                 let mut msg = Message::new();
103 |                 msg.from = 1;
104 |                 msg.to = 1;
105 |                 msg.field_type = MsgProp;
106 |                 msg.set_entries(MocksEnts::from("somedata").into());
107 |                 wl_raft.step(msg);
108 |                 let msg = read_message(&mut wl_raft.raft);
109 |                 assert_eq!(msg.len(), 1, "{}: len(ms) = {}, want: 1", i, msg.len());
110 |             }
111 |         }
112 | 
113 |         // 1 is noop, 2 is the first proposal we just sent.
114 |         // so we start with 2.
115 |         {
116 |             for tt in 2..wl_raft.raft.prs.max_inflight {
117 |                 // move forward the windows
118 |                 {
119 |                     let mut msg = Message::new();
120 |                     msg.from = 2;
121 |                     msg.to = 1;
122 |                     msg.field_type = MsgAppResp;
123 |                     msg.index = tt;
124 |                     assert!(wl_raft.step(msg).is_ok());
125 |                 }
126 |             }
127 |         }
128 |     }
129 | 
130 |     // Ensure a heartbeat response frees one slot if the window is full
131 |     #[test]
132 |     fn msg_app_flow_control_recv_heartbeat() {
133 |         try_init_log();
134 |         let raft = new_test_raw_node(0x1, vec![0x1, 0x2], 5, 1, SafeMemStorage::new());
135 |         let mut wl_raft = raft.wl();
136 |         wl_raft.raft.become_candidate();
137 |         // NOTE: the first index entry log is config change for leader 0x1
138 |         wl_raft.raft.become_leader();
139 | 
140 |         // force the progress to be in replicate state
141 |         wl_raft
142 |             .raft
143 |             .prs
144 |             .progress
145 |             .must_get_mut(&0x2)
146 |             .become_replicate();
147 |         // fill in the inflights window
148 |         for i in 0..wl_raft.raft.prs.max_inflight {
149 |             assert!(wl_raft
150 |                 .step(Message {
151 |                     from: 0x1,
152 |                     to: 0x1,
153 |                     field_type: MsgProp,
154 |                     entries: MocksEnts::from("somedata").into(),
155 |                     ..Default::default()
156 |                 })
157 |                 .is_ok());
158 |             read_message(&mut wl_raft.raft);
159 |         }
160 | 
161 |         for tt in 1..5 {
162 |             let full = wl_raft.raft.prs.progress.must_get(&0x2).inflights.full();
163 |             assert!(full, "{}: inflights.full = {}, want {}", tt, full, true);
164 |             // recv tt `MsgHeartbeatResp` and expect one free slot
165 |             for i in 0..tt {
166 |                 let msg = Message {
167 |                     from: 0x2,
168 |                     to: 0x1,
169 |                     field_type: MsgHeartbeatResp,
170 |                     ..Default::default()
171 |                 };
172 |                 assert!(wl_raft.step(msg).is_ok());
173 |                 read_message(&mut wl_raft.raft);
174 |                 let full = wl_raft.raft.prs.progress.must_get(&0x2).inflights.full();
175 |                 assert_eq!(
176 |                     full, false,
177 |                     "{}.{}: inflights.full = {}, want {}",
178 |                     tt, i, full, false
179 |                 );
180 |             }
181 | 
182 |             // one slot
183 |             let msg = Message {
184 |                 from: 0x1,
185 |                 to: 0x1,
186 |                 field_type: MsgProp,
187 |                 entries: MocksEnts::from("somedata").into(),
188 |                 ..Default::default()
189 |             };
190 |             assert!(wl_raft.step(msg).is_ok());
191 |             let ms = read_message(&mut wl_raft.raft);
192 |             assert!(
193 |                 wl_raft.raft.prs.progress.must_get(&0x2).inflights.full(),
194 |                 "inflights.full = {}",
195 |                 false
196 |             );
197 | 
198 |             // and just one slot and inflights is full.
199 |             for i in 0..10 {
200 |                 let mut msg = Message {
201 |                     from: 0x1,
202 |                     to: 0x1,
203 |                     field_type: MsgProp,
204 |                     entries: MocksEnts::from("somedata").into(),
205 |                     ..Default::default()
206 |                 };
207 |                 assert!(wl_raft.step(msg).is_ok());
208 |                 let ms1 = read_message(&mut wl_raft.raft);
209 |                 assert_eq!(ms1.len(), 0, "{}.{}: ms.len = {}, want 0", tt, i, ms1.len());
210 |             }
211 | 
212 |             // clear all pending messages.
213 |             let mut msg = Message {
214 |                 from: 0x2,
215 |                 to: 0x1,
216 |                 field_type: MsgHeartbeatResp,
217 |                 ..Default::default()
218 |             };
219 |             assert!(wl_raft.step(msg).is_ok());
220 |             read_message(&mut wl_raft.raft);
221 |         }
222 |     }
223 | }
224 | 


--------------------------------------------------------------------------------
/src/raft_snap_test.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 The etcd Authors
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | #[cfg(test)]
 16 | mod tests {
 17 |     use crate::raftpb::raft::MessageType::{MsgAppResp, MsgProp, MsgSnapStatus};
 18 |     use crate::raftpb::raft::{ConfState, Message, Snapshot, SnapshotMetadata};
 19 |     use crate::storage::SafeMemStorage;
 20 |     use crate::tests_util::mock::{
 21 |         new_test_core_node, new_test_inner_node, new_test_raw_node, read_message, MocksEnts,
 22 |     };
 23 |     use crate::tests_util::try_init_log;
 24 |     use crate::tracker::state::StateType;
 25 |     use env_logger::Env;
 26 |     use protobuf::{SingularField, SingularPtrField};
 27 | 
 28 |     #[test]
 29 |     fn sending_snapshot_set_pending_snapshot() {
 30 |         try_init_log();
 31 |         let mut raft = new_test_inner_node(0x1, vec![1], 10, 1, SafeMemStorage::new());
 32 |         raft.restore(&new_testing_snap());
 33 | 
 34 |         raft.become_candidate();
 35 |         raft.become_leader();
 36 | 
 37 |         // force set the next of node 2, so that
 38 |         // node 2 needs a snapshot
 39 |         let first_index = raft.raft_log.first_index();
 40 |         raft.prs.progress.must_get_mut(&0x2).next = first_index;
 41 | 
 42 |         let index = raft.prs.progress.must_get(&0x2).next - 1;
 43 |         raft.step(Message {
 44 |             from: 0x2,
 45 |             to: 0x1,
 46 |             field_type: MsgAppResp,
 47 |             index,
 48 |             reject: true,
 49 |             ..Default::default()
 50 |         });
 51 | 
 52 |         let pending_snapshot = raft.prs.progress.must_get(&0x2).pending_snapshot;
 53 |         assert_eq!(
 54 |             pending_snapshot, 11,
 55 |             "pending_snapshot = {}, want 11",
 56 |             pending_snapshot
 57 |         );
 58 |     }
 59 | 
 60 |     #[test]
 61 |     fn pending_snapshot_pause_replication() {
 62 |         try_init_log();
 63 | 
 64 |         let mut raft = new_test_inner_node(0x1, vec![0x1, 0x2], 10, 1, SafeMemStorage::new());
 65 |         raft.restore(&new_testing_snap());
 66 | 
 67 |         raft.become_candidate();
 68 |         raft.become_leader();
 69 | 
 70 |         raft.prs.progress.must_get_mut(&0x2).become_snapshot(11);
 71 | 
 72 |         raft.step(Message {
 73 |             from: 0x1,
 74 |             to: 0x1,
 75 |             field_type: MsgProp,
 76 |             entries: MocksEnts::from("somedata").into(),
 77 |             ..Default::default()
 78 |         });
 79 |         let msg = read_message(&mut raft);
 80 |         assert!(msg.is_empty(), "len(msgs) = {}, want 0", msg.len());
 81 |     }
 82 | 
 83 |     #[test]
 84 |     fn snapshot_failure() {
 85 |         try_init_log();
 86 | 
 87 |         let mut raft = new_test_inner_node(0x1, vec![0x1, 0x2], 10, 1, SafeMemStorage::new());
 88 |         raft.restore(&new_testing_snap());
 89 | 
 90 |         raft.become_candidate();
 91 |         raft.become_leader();
 92 | 
 93 |         raft.prs.progress.must_get_mut(&0x2).next = 1;
 94 |         raft.prs.progress.must_get_mut(&0x2).become_snapshot(11);
 95 |         raft.step(Message {
 96 |             from: 0x2,
 97 |             to: 0x1,
 98 |             field_type: MsgSnapStatus,
 99 |             reject: true,
100 |             ..Default::default()
101 |         });
102 |         assert_eq!(
103 |             raft.prs.progress.must_get(&0x2).pending_snapshot,
104 |             0,
105 |             "pending_snapshot = {}, want 0",
106 |             raft.prs.progress.must_get(&0x2).pending_snapshot
107 |         );
108 |         assert_eq!(
109 |             raft.prs.progress.must_get(&0x2).next,
110 |             1,
111 |             "next = {}, want 1",
112 |             raft.prs.progress.must_get(&0x2).next
113 |         );
114 |         assert!(
115 |             raft.prs.progress.must_get(&0x2).probe_sent,
116 |             "probe_sent = {}, want true",
117 |             raft.prs.progress.must_get(&0x2).probe_sent
118 |         );
119 |     }
120 | 
121 |     #[test]
122 |     fn snapshot_succeed() {
123 |         try_init_log();
124 |         let mut raft = new_test_inner_node(0x1, vec![0x1, 0x2], 10, 1, SafeMemStorage::new());
125 |         raft.restore(&new_testing_snap());
126 | 
127 |         raft.become_candidate();
128 |         raft.become_leader();
129 | 
130 |         raft.prs.progress.must_get_mut(&0x2).next = 2;
131 |         raft.prs.progress.must_get_mut(&0x2).become_snapshot(11);
132 | 
133 |         raft.step(Message {
134 |             from: 0x2,
135 |             to: 0x1,
136 |             field_type: MsgSnapStatus,
137 |             reject: false,
138 |             ..Default::default()
139 |         });
140 | 
141 |         let pending_snapshot = raft.prs.progress.must_get(&0x2).pending_snapshot;
142 |         assert_eq!(
143 |             pending_snapshot, 0,
144 |             "pending_snapshot = {}, want 0",
145 |             pending_snapshot
146 |         );
147 |         let next = raft.prs.progress.must_get(&0x2).next;
148 |         assert_eq!(next, 12, "next = {}, want 0", next);
149 |         let probe_sent = raft.prs.progress.must_get(&0x2).probe_sent;
150 |         assert!(probe_sent, "probe_sent={}, want false", probe_sent);
151 |     }
152 | 
153 |     #[test]
154 |     fn snapshot_abort() {
155 |         try_init_log();
156 |         let mut raft = new_test_inner_node(0x1, vec![0x1, 0x2], 10, 1, SafeMemStorage::new());
157 |         raft.restore(&new_testing_snap());
158 |         raft.become_candidate();
159 |         raft.become_leader(); // new leader will append a noop log entry
160 |         raft.prs.progress.must_get_mut(&0x2).next = 1;
161 |         raft.prs.progress.must_get_mut(&0x2).become_snapshot(11);
162 | 
163 |         // A successful MsgAppResp that has a higher/equal index than the
164 |         // pending snapshot should abort the pending snapshot.
165 |         info!("last index {}", raft.raft_log.last_index());
166 |         raft.step(Message {
167 |             from: 0x2,
168 |             to: 0x1,
169 |             field_type: MsgAppResp,
170 |             index: 11,
171 |             ..Default::default()
172 |         });
173 |         let pending_snapshot = raft.prs.progress.must_get(&0x2).pending_snapshot;
174 |         assert_eq!(
175 |             pending_snapshot, 0,
176 |             "pending_snapshot = {}, want 0",
177 |             pending_snapshot
178 |         );
179 | 
180 |         // The follower entered StateReplicate and the leader send an append
181 |         // and optimistically updated the progress (so we see 13 instead of 12).
182 |         // There is something to append because the leader appended an empty entry
183 |         // to the log at index 12 when it assumed leadership.
184 |         let next = raft.prs.progress.must_get(&0x2).next;
185 |         assert_eq!(next, 13, "next = {}, want 13", next);
186 |         let count = raft.prs.progress.must_get(&0x2).inflights.count();
187 |         assert_eq!(count, 1, "expected an inflight message, got {}", count);
188 |     }
189 | 
190 |     fn new_testing_snap() -> Snapshot {
191 |         let mut snap = Snapshot::new();
192 |         let mut conf_state = ConfState::new();
193 |         conf_state.set_voters(vec![1, 2]);
194 |         snap.set_metadata(SnapshotMetadata {
195 |             index: 11,
196 |             term: 11,
197 |             conf_state: SingularPtrField::from(Some(conf_state)),
198 |             ..Default::default()
199 |         });
200 |         snap
201 |     }
202 | }
203 | 


--------------------------------------------------------------------------------
/src/raft_test.rs:
--------------------------------------------------------------------------------
 1 | use crate::raftpb::raft::{Message, Entry, MessageType};
 2 | use crate::raft::Raft;
 3 | use crate::storage::{SafeMemStorage, Storage};
 4 | use nom::lib::std::collections::HashMap;
 5 | 
 6 | // // Returns the appliable entries and updates the applied index
 7 | // fn next_ents(mut raft: Raft<SafeMemStorage>, s: &mut SafeMemStorage) -> Vec<Entry> {
 8 | //     // transfer all unstable entries to "stable" storage.
 9 | //     s.wl().append(raft.raft_log.unstable_entries().to_vec());
10 | //     raft.raft_log.stable_to(raft.raft_log.last_index(), raft.raft_log.last_term());
11 | //
12 | //     let ents = raft.raft_log.next_ents();
13 | //     raft.raft_log.applied_to(raft.raft_log.committed);
14 | //     return ents;
15 | // }
16 | //
17 | // fn must_append_entry<S>(raft: &mut Raft<S>, mut ents: Vec<Entry>) where S: Storage {
18 | //     assert!(raft.append_entry(&mut ents), "entry unexpectedly dropped");
19 | // }
20 | //
21 | // trait StateMachine {
22 | //     fn step(&mut self, m: Message) -> Result<(), String>;
23 | //     fn read_message(&mut self) -> Vec<Message>;
24 | // }
25 | //
26 | // struct NetWork<M: StateMachine> {
27 | //     peers: HashMap<u64, M>,
28 | //     storage: HashMap<u64, SafeMemStorage>,
29 | //     dropm: HashMap<ConnEm, SafeMemStorage>,
30 | //     ignorem: HashMap<MessageType, bool>,
31 | //     // `msg_hook` is called for each message sent. It may inspect the
32 | //     // message and return true to send it for false to drop it
33 | //     msg_hook: Box<Fn(Message) -> bool>,
34 | // }
35 | //
36 | // impl<M: StateMachine> NetWork<M> {
37 | //     pub fn send(&mut self, msgs: Vec<Message>) {
38 | //         unimplemented!("unimplemented")
39 | //     }
40 | //
41 | //     pub fn drop(&mut self, from: u64, to: u64, perc: f64) {
42 | //         unimplemented!("unimplemented")
43 | //     }
44 | //
45 | //     pub fn cut(&mut self, one: u64, other: u64) {
46 | //         unimplemented!("unimplemented")
47 | //     }
48 | //
49 | //     pub fn isolated(&mut self, id: u64) {
50 | //         unimplemented!("unimplemented")
51 | //     }
52 | //
53 | //     pub fn ignore(&mut self, t: MessageType) {
54 | //         unimplemented!("unimplemented")
55 | //     }
56 | //
57 | //     pub fn recover(&mut self) {
58 | //         self.dropm.clear();
59 | //         self.ignorem.clear();
60 | //     }
61 | //
62 | //     pub fn filter(&mut self, msgs: Vec<Message>) -> Vec<Message> {
63 | //         unimplemented!("unimplemented")
64 | //     }
65 | //
66 | // }
67 | //
68 | // #[derive(Debug, Clone)]
69 | // struct ConnEm {
70 | //     from: u64,
71 | //     to: u64,
72 | // }
73 | //
74 | // #[derive(Debug, Clone)]
75 | // struct BlackHole {}
76 | //
77 | // impl StateMachine for BlackHole {
78 | //     fn step(&mut self, m: Message) -> Result<(), String> {
79 | //         Ok(())
80 | //     }
81 | //
82 | //     fn read_message(&mut self) -> Vec<Message> {
83 | //         vec![]
84 | //     }
85 | // }


--------------------------------------------------------------------------------
/src/raftpb/.gitignore:
--------------------------------------------------------------------------------
1 | raft.rs


--------------------------------------------------------------------------------
/src/raftpb/gogoproto/.gitignore:
--------------------------------------------------------------------------------
 1 | # Created by .ignore support plugin (hsz.mobi)
 2 | ### Rust template
 3 | # Generated by Cargo
 4 | # will have compiled files and executables
 5 | /target/
 6 | 
 7 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
 8 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
 9 | Cargo.lock
10 | 
11 | # These are backup files generated by rustfmt
12 | **/*.rs.bk
13 | raft.rs
14 | 
15 | 


--------------------------------------------------------------------------------
/src/raftpb/gogoproto/gogo.proto:
--------------------------------------------------------------------------------
  1 | // Protocol Buffers for Go with Gadgets
  2 | //
  3 | // Copyright (c) 2013, The GoGo Authors. All rights reserved.
  4 | // http://github.com/gogo/protobuf
  5 | //
  6 | // Redistribution and use in source and binary forms, with or without
  7 | // modification, are permitted provided that the following conditions are
  8 | // met:
  9 | //
 10 | //     * Redistributions of source code must retain the above copyright
 11 | // notice, this list of conditions and the following disclaimer.
 12 | //     * Redistributions in binary form must reproduce the above
 13 | // copyright notice, this list of conditions and the following disclaimer
 14 | // in the documentation and/or other materials provided with the
 15 | // distribution.
 16 | //
 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | 
 29 | syntax = "proto2";
 30 | package gogoproto;
 31 | 
 32 | import "google/protobuf/descriptor.proto";
 33 | 
 34 | option java_package = "com.google.protobuf";
 35 | option java_outer_classname = "GoGoProtos";
 36 | option go_package = "github.com/gogo/protobuf/gogoproto";
 37 | 
 38 | extend google.protobuf.EnumOptions {
 39 | 	optional bool goproto_enum_prefix = 62001;
 40 | 	optional bool goproto_enum_stringer = 62021;
 41 | 	optional bool enum_stringer = 62022;
 42 | 	optional string enum_customname = 62023;
 43 | 	optional bool enumdecl = 62024;
 44 | }
 45 | 
 46 | extend google.protobuf.EnumValueOptions {
 47 | 	optional string enumvalue_customname = 66001;
 48 | }
 49 | 
 50 | extend google.protobuf.FileOptions {
 51 | 	optional bool goproto_getters_all = 63001;
 52 | 	optional bool goproto_enum_prefix_all = 63002;
 53 | 	optional bool goproto_stringer_all = 63003;
 54 | 	optional bool verbose_equal_all = 63004;
 55 | 	optional bool face_all = 63005;
 56 | 	optional bool gostring_all = 63006;
 57 | 	optional bool populate_all = 63007;
 58 | 	optional bool stringer_all = 63008;
 59 | 	optional bool onlyone_all = 63009;
 60 | 
 61 | 	optional bool equal_all = 63013;
 62 | 	optional bool description_all = 63014;
 63 | 	optional bool testgen_all = 63015;
 64 | 	optional bool benchgen_all = 63016;
 65 | 	optional bool marshaler_all = 63017;
 66 | 	optional bool unmarshaler_all = 63018;
 67 | 	optional bool stable_marshaler_all = 63019;
 68 | 
 69 | 	optional bool sizer_all = 63020;
 70 | 
 71 | 	optional bool goproto_enum_stringer_all = 63021;
 72 | 	optional bool enum_stringer_all = 63022;
 73 | 
 74 | 	optional bool unsafe_marshaler_all = 63023;
 75 | 	optional bool unsafe_unmarshaler_all = 63024;
 76 | 
 77 | 	optional bool goproto_extensions_map_all = 63025;
 78 | 	optional bool goproto_unrecognized_all = 63026;
 79 | 	optional bool gogoproto_import = 63027;
 80 | 	optional bool protosizer_all = 63028;
 81 | 	optional bool compare_all = 63029;
 82 |     optional bool typedecl_all = 63030;
 83 |     optional bool enumdecl_all = 63031;
 84 | 
 85 | 	optional bool goproto_registration = 63032;
 86 | 	optional bool messagename_all = 63033;
 87 | 
 88 | 	optional bool goproto_sizecache_all = 63034;
 89 | 	optional bool goproto_unkeyed_all = 63035;
 90 | }
 91 | 
 92 | extend google.protobuf.MessageOptions {
 93 | 	optional bool goproto_getters = 64001;
 94 | 	optional bool goproto_stringer = 64003;
 95 | 	optional bool verbose_equal = 64004;
 96 | 	optional bool face = 64005;
 97 | 	optional bool gostring = 64006;
 98 | 	optional bool populate = 64007;
 99 | 	optional bool stringer = 67008;
100 | 	optional bool onlyone = 64009;
101 | 
102 | 	optional bool equal = 64013;
103 | 	optional bool description = 64014;
104 | 	optional bool testgen = 64015;
105 | 	optional bool benchgen = 64016;
106 | 	optional bool marshaler = 64017;
107 | 	optional bool unmarshaler = 64018;
108 | 	optional bool stable_marshaler = 64019;
109 | 
110 | 	optional bool sizer = 64020;
111 | 
112 | 	optional bool unsafe_marshaler = 64023;
113 | 	optional bool unsafe_unmarshaler = 64024;
114 | 
115 | 	optional bool goproto_extensions_map = 64025;
116 | 	optional bool goproto_unrecognized = 64026;
117 | 
118 | 	optional bool protosizer = 64028;
119 | 	optional bool compare = 64029;
120 | 
121 | 	optional bool typedecl = 64030;
122 | 
123 | 	optional bool messagename = 64033;
124 | 
125 | 	optional bool goproto_sizecache = 64034;
126 | 	optional bool goproto_unkeyed = 64035;
127 | }
128 | 
129 | extend google.protobuf.FieldOptions {
130 | 	optional bool nullable = 65001;
131 | 	optional bool embed = 65002;
132 | 	optional string customtype = 65003;
133 | 	optional string customname = 65004;
134 | 	optional string jsontag = 65005;
135 | 	optional string moretags = 65006;
136 | 	optional string casttype = 65007;
137 | 	optional string castkey = 65008;
138 | 	optional string castvalue = 65009;
139 | 
140 | 	optional bool stdtime = 65010;
141 | 	optional bool stdduration = 65011;
142 | 	optional bool wktpointer = 65012;
143 | 
144 | }
145 | 


--------------------------------------------------------------------------------
/src/raftpb/mod.rs:
--------------------------------------------------------------------------------
  1 | use crate::raftpb::raft::ConfChangeTransition::{
  2 |     ConfChangeTransitionAuto, ConfChangeTransitionJointExplicit, ConfChangeTransitionJointImplicit,
  3 | };
  4 | use crate::raftpb::raft::ConfChangeType::{
  5 |     ConfChangeAddLearnerNode, ConfChangeAddNode, ConfChangeRemoveNode, ConfChangeUpdateNode,
  6 | };
  7 | use crate::raftpb::raft::EntryType::{EntryConfChange, EntryConfChangeV2};
  8 | use crate::raftpb::raft::{ConfChange, ConfChangeSingle, ConfChangeV2, ConfState, Entry};
  9 | use crate::util::vote_resp_msg_type;
 10 | use bytes::{Buf, Bytes};
 11 | use nom::lib::std::borrow::Cow;
 12 | use nom::lib::std::fmt::{Display, Formatter};
 13 | use protobuf::{Message, RepeatedField};
 14 | 
 15 | pub mod raft;
 16 | // pub mod gogoproto;
 17 | 
 18 | // returns a nil error if the inputs describe the same configuration.
 19 | // On mismatch, returns a descriptive error showing the difference.
 20 | pub fn equivalent(cs1: &ConfState, cs2: &ConfState) -> Result<(), String> {
 21 |     let orig1 = cs1.clone();
 22 |     let orig2 = cs2.clone();
 23 |     let mut cs1 = cs1.clone();
 24 |     let mut cs2 = cs2.clone();
 25 |     cs1.voters.sort();
 26 |     cs1.learners.sort();
 27 |     cs1.voters_outgoing.sort();
 28 |     cs1.learners_next.sort();
 29 |     if !cs1.get_auto_leave() {
 30 |         cs1.set_auto_leave(false);
 31 |     }
 32 | 
 33 |     cs2.voters.sort();
 34 |     cs2.learners.sort();
 35 |     cs2.voters_outgoing.sort();
 36 |     cs2.learners_next.sort();
 37 |     if !cs2.get_auto_leave() {
 38 |         cs2.set_auto_leave(false);
 39 |     }
 40 | 
 41 |     if cs1 != cs2 {
 42 |         info!("cs1: {:?}\ncs2:{:?}", cs1, cs2);
 43 |         return Err(format!(
 44 |             "ConfStates not equivalent after sorting:{:?}\n{:?}\nInputs were:\n{:?}\n{:?}",
 45 |             cs1, cs2, orig1, orig2
 46 |         ));
 47 |     }
 48 | 
 49 |     Ok(())
 50 | }
 51 | 
 52 | // ConfChangeI abstracts over ConfChangeV2 and (legacy) ConfChange to allow
 53 | // treating them in a unified manner.
 54 | pub trait ConfChangeI: Display + protobuf::Message {
 55 |     fn as_v2(&self) -> ConfChangeV2;
 56 |     fn as_v1(&self) -> Option<&ConfChange>;
 57 |     fn to_entry(&self) -> Entry;
 58 | }
 59 | 
 60 | impl ConfChangeI for ConfChange {
 61 |     #[inline]
 62 |     fn as_v2(&self) -> ConfChangeV2 {
 63 |         let mut cc2 = ConfChangeV2::new();
 64 |         cc2.context = self.context.clone();
 65 |         let mut change = ConfChangeSingle::new();
 66 |         change.set_field_type(self.get_field_type());
 67 |         change.set_node_id(self.get_node_id());
 68 |         cc2.set_changes(RepeatedField::from(vec![change]));
 69 |         cc2
 70 |     }
 71 | 
 72 |     #[inline]
 73 |     fn as_v1(&self) -> Option<&ConfChange> {
 74 |         Some(&self)
 75 |     }
 76 | 
 77 |     #[inline]
 78 |     fn to_entry(&self) -> Entry {
 79 |         let data = self.write_to_bytes().unwrap();
 80 |         let mut entry = Entry::new();
 81 |         entry.set_Data(Bytes::from(data));
 82 |         entry.set_Type(EntryConfChange);
 83 |         entry
 84 |     }
 85 | }
 86 | 
 87 | impl Display for ConfChange {
 88 |     fn fmt(&self, f: &mut Formatter<'_>) -> ::std::fmt::Result {
 89 |         write!(f, "{}", self)
 90 |     }
 91 | }
 92 | 
 93 | impl ConfChangeI for ConfChangeV2 {
 94 |     #[inline]
 95 |     fn as_v2(&self) -> ConfChangeV2 {
 96 |         self.clone()
 97 |     }
 98 | 
 99 |     #[inline]
100 |     fn as_v1(&self) -> Option<&ConfChange> {
101 |         None
102 |     }
103 | 
104 |     #[inline]
105 |     fn to_entry(&self) -> Entry {
106 |         let data = self.write_to_bytes().unwrap();
107 |         let mut entry = Entry::new();
108 |         entry.set_Data(Bytes::from(data));
109 |         entry.set_Type(EntryConfChangeV2);
110 |         entry
111 |     }
112 | }
113 | 
114 | impl Display for ConfChangeV2 {
115 |     fn fmt(&self, f: &mut Formatter<'_>) -> ::std::fmt::Result {
116 |         write!(f, "{}", self)
117 |     }
118 | }
119 | 
120 | pub trait ExtendConfChange {
121 |     fn leave_joint(&self) -> bool;
122 |     fn enter_joint(&self) -> (bool, bool);
123 | }
124 | 
125 | impl ExtendConfChange for ConfChangeV2 {
126 |     fn leave_joint(&self) -> bool {
127 |         let mut cp = self.clone();
128 |         cp.clear_context();
129 |         let empty = ConfChangeV2::default();
130 |         cp.eq(&empty)
131 |     }
132 |     // EnterJoint returns two bools. The second bool is true if and only if this
133 |     // config change will use Joint Consensus, which is the case if it contains more
134 |     // than one change or if the use of Joint Consensus was requested explicitly.
135 |     // The first bool can only be true if second one is, and indicates whether the
136 |     // Joint State will be left automatically.
137 |     fn enter_joint(&self) -> (bool, bool) {
138 |         // NB: in theory, more config changes could qualify for the "simple"
139 |         // protocol but it depends on the config on top of which the changes apply.
140 |         // For example, adding two learners is not OK if both nodes are part of the
141 |         // base config (i.e. two voters are turned into learners in the process of
142 |         // applying the conf change). In practice, these distinctions should not
143 |         // matter, so we keep it simple and use Joint Consensus liberally.
144 |         if self.get_transition() != ConfChangeTransitionAuto || self.changes.len() > 1 {
145 |             // Use Joint Consensus.
146 |             let mut auto_leave = false;
147 |             match self.get_transition() {
148 |                 ConfChangeTransitionAuto | ConfChangeTransitionJointImplicit => auto_leave = true,
149 |                 ConfChangeTransitionJointExplicit => {}
150 |             }
151 |             return (auto_leave, true);
152 |         }
153 |         (false, false)
154 |     }
155 | }
156 | 
157 | pub fn cmp_conf_state(a: &ConfState, b: &ConfState) -> bool {
158 |     let mut a = a.clone();
159 |     let mut b = b.clone();
160 |     a.voters.sort();
161 |     b.voters.sort();
162 |     a.learners.sort();
163 |     b.learners.sort();
164 |     a.voters_outgoing.sort();
165 |     b.voters_outgoing.sort();
166 |     a.learners_next.sort();
167 |     b.learners_next.sort();
168 | 
169 |     a.get_auto_leave() == b.get_auto_leave()
170 |         && a.get_voters() == b.get_voters()
171 |         && a.get_voters_outgoing() == b.get_voters_outgoing()
172 |         && a.get_learners() == b.get_learners()
173 | }
174 | 
175 | pub fn cmp_config_change_v2(a: &ConfChangeV2, b: &ConfChangeV2) -> bool {
176 |     a.get_transition() == b.get_transition()
177 |         && a.get_changes() == b.get_changes()
178 |         && a.get_context() == b.get_context()
179 | }
180 | 
181 | pub fn entry_to_conf_changei(entry: &Entry) -> Option<Box<dyn ConfChangeI>> {
182 |     if entry.get_Type() == EntryConfChange {
183 |         let mut cc = ConfChange::default();
184 |         assert!(cc.merge_from_bytes(entry.get_Data()).is_ok());
185 |         return Some(Box::new(cc));
186 |     } else if entry.get_Type() == EntryConfChangeV2 {
187 |         let mut cc = ConfChangeV2::default();
188 |         assert!(cc.merge_from_bytes(entry.get_Data()).is_ok());
189 |         return Some(Box::new(cc));
190 |     }
191 |     None
192 | }
193 | 
194 | // ConfChangesFromString parses a Space-delimited sequence of operations into a
195 | // slice of ConfChangeSingle. The supported operations are:
196 | // - vn: make n a voter,
197 | // - ln: make n a learner,
198 | // - rn: remove n, and
199 | // - un: update n.
200 | pub fn conf_changes_from_string(s: &str) -> Result<Vec<ConfChangeSingle>, String> {
201 |     let mut ccs = Vec::<ConfChangeSingle>::new();
202 |     for tok in &mut s
203 |         .split_ascii_whitespace()
204 |         .map(|s| s.chars())
205 |         .collect::<Vec<_>>()
206 |     {
207 |         if tok.count() < 2 {
208 |             return Err(format!(
209 |                 "unknown token {}",
210 |                 tok.into_iter().collect::<String>()
211 |             ));
212 |         }
213 |         let mut cc = ConfChangeSingle::new();
214 |         match tok.nth(0).unwrap() {
215 |             'v' => cc.set_field_type(ConfChangeAddNode),
216 |             'l' => cc.set_field_type(ConfChangeAddLearnerNode),
217 |             'r' => cc.set_field_type(ConfChangeRemoveNode),
218 |             'u' => cc.set_field_type(ConfChangeUpdateNode),
219 |             _ => {
220 |                 return Err(format!(
221 |                     "unknown token {}",
222 |                     tok.into_iter().collect::<String>()
223 |                 ));
224 |             }
225 |         }
226 |         let id = tok.skip(0).into_iter().collect::<String>();
227 |         cc.set_node_id(id.parse().unwrap());
228 |         ccs.push(cc);
229 |     }
230 |     Ok(ccs)
231 | }
232 | 
233 | #[cfg(test)]
234 | mod tests {
235 |     use crate::raftpb::raft::ConfChangeV2;
236 |     use bytes::Bytes;
237 |     use protobuf::Message;
238 | 
239 |     #[test]
240 |     fn it_works() {
241 |         let mut cc = ConfChangeV2::new();
242 |         cc.set_context(Bytes::from("manual"));
243 |         let data = cc.write_to_bytes().unwrap();
244 |         let mut expect = ConfChangeV2::default();
245 |         expect.merge_from_bytes(data.as_slice()).unwrap();
246 |         assert_eq!(expect.get_context(), "manual".as_bytes());
247 |     }
248 | }
249 | 


--------------------------------------------------------------------------------
/src/raftpb/raft.proto:
--------------------------------------------------------------------------------
  1 | syntax = "proto3";
  2 | package raftpb;
  3 | 
  4 | enum EntryType {
  5 |   EntryNormal = 0;
  6 |   EntryConfChange = 1; // corresponds to pb.ConfChange
  7 |   EntryConfChangeV2 = 2; // corresponds to pb.ConfChangeV2
  8 | }
  9 | 
 10 | message Entry {
 11 |   uint64     Term = 2 ;
 12 |   uint64     Index = 3 ;
 13 |   EntryType  Type = 1 ;
 14 |   bytes      Data = 4;
 15 | }
 16 | 
 17 | message SnapshotMetadata {
 18 |   ConfState conf_state = 1;
 19 |   uint64    index = 2;
 20 |   uint64    term = 3 ;
 21 | }
 22 | 
 23 | message Snapshot {
 24 |   bytes            data = 1;
 25 |   SnapshotMetadata metadata = 2;
 26 | }
 27 | 
 28 | enum MessageType {
 29 |   MsgHup = 0;
 30 |   MsgBeat = 1;
 31 |   MsgProp = 2;
 32 |   MsgApp = 3;
 33 |   MsgAppResp = 4;
 34 |   MsgVote = 5;
 35 |   MsgVoteResp = 6;
 36 |   MsgSnap = 7;
 37 |   MsgHeartbeat = 8;
 38 |   MsgHeartbeatResp = 9;
 39 |   MsgUnreachable = 10;
 40 |   MsgSnapStatus = 11;
 41 |   MsgCheckQuorum = 12;
 42 |   MsgTransferLeader = 13;
 43 |   MsgTimeoutNow = 14;
 44 |   MsgReadIndex = 15;
 45 |   MsgReadIndexResp = 16;
 46 |   MsgPreVote = 17;
 47 |   MsgPreVoteResp = 18;
 48 | }
 49 | 
 50 | message Message {
 51 |   MessageType type = 1 ;
 52 |   uint64      to = 2  ;
 53 |   uint64      from = 3  ;
 54 |   uint64      term = 4  ;
 55 |   uint64      logTerm = 5 ;
 56 |   uint64      index = 6  ;
 57 |   repeated Entry       entries = 7  ;
 58 |   uint64      commit = 8 ;
 59 |   Snapshot    snapshot = 9 ;
 60 |   bool        reject = 10 ;
 61 |   uint64      rejectHint = 11 ;
 62 |   bytes       context = 12;
 63 | }
 64 | 
 65 | message HardState {
 66 |   uint64 term = 1;
 67 |   uint64 vote = 2;
 68 |   uint64 commit = 3;
 69 | }
 70 | 
 71 | // ConfChangeTransition specifies the behavior of a configuration change with
 72 | // respect to joint consensus.
 73 | enum ConfChangeTransition {
 74 |   // Automatically use the simple protocol if possible, otherwise fall back
 75 |   // to ConfChangeJointImplicit. Most applications will want to use this.
 76 |   ConfChangeTransitionAuto = 0;
 77 |   // Use joint consensus unconditionally, and transition out of them
 78 |   // automatically (by proposing a zero configuration change).
 79 |   //
 80 |   // This option is suitable for applications that want to minimize the time
 81 |   // spent in the joint configuration and do not store the joint configuration
 82 |   // in the state machine (outside of InitialState).
 83 |   ConfChangeTransitionJointImplicit = 1;
 84 |   // Use joint consensus and remain in the joint configuration until the
 85 |   // application proposes a no-op configuration change. This is suitable for
 86 |   // applications that want to explicitly control the transitions, for example
 87 |   // to use a custom payload (via the Context field).
 88 |   ConfChangeTransitionJointExplicit = 2;
 89 | }
 90 | 
 91 | message ConfState {
 92 |   // The voters in the incoming config. (If the configuration is not joint,
 93 |   // then the outgoing config is empty).
 94 |   repeated uint64 voters = 1;
 95 |   // The learners in the incoming config.
 96 |   repeated uint64 learners = 2;
 97 |   // The voters in the outgoing config.
 98 |   repeated uint64 voters_outgoing = 3;
 99 |   // The nodes that will become learners when the outgoing config is removed.
100 |   // These nodes are necessarily currently in nodes_joint (or they would have
101 |   // been added to the incoming config right away).
102 |   repeated uint64 learners_next = 4;
103 |   // If set, the config is joint and Raft will automatically transition into
104 |   // the final config (i.e. remove the outgoing config) when this is safe.
105 |   bool   auto_leave = 5;
106 | }
107 | 
108 | enum ConfChangeType {
109 |   ConfChangeAddNode = 0;
110 |   ConfChangeRemoveNode = 1;
111 |   ConfChangeUpdateNode = 2;
112 |   ConfChangeAddLearnerNode = 3;
113 | }
114 | 
115 | message ConfChange {
116 |   ConfChangeType  type = 2;
117 |   uint64          node_id = 3 ;
118 |   bytes           context = 4;
119 | 
120 |   // NB: this is used only by etcd to thread through a unique identifier.
121 |   // Ideally it should really use the Context instead. No counterpart to
122 |   // this field exists in ConfChangeV2.
123 |   uint64          id = 1;
124 | }
125 | 
126 | // ConfChangeSingle is an individual configuration change operation. Multiple
127 | // such operations can be carried out atomically via a ConfChangeV2.
128 | message ConfChangeSingle {
129 |   ConfChangeType  type = 1;
130 |   uint64          node_id = 2 ;
131 | }
132 | 
133 | // ConfChangeV2 messages initiate configuration changes. They support both the
134 | // simple "one at a time" membership change protocol and full Joint Consensus
135 | // allowing for arbitrary changes in membership.
136 | //
137 | // The supplied context is treated as an opaque payload and can be used to
138 | // attach an action on the state machine to the application of the config change
139 | // proposal. Note that contrary to Joint Consensus as outlined in the Raft
140 | // paper[1], configuration changes become active when they are *applied* to the
141 | // state machine (not when they are appended to the log).
142 | //
143 | // The simple protocol can be used whenever only a single change is made.
144 | //
145 | // Non-simple changes require the use of Joint Consensus, for which two
146 | // configuration changes are run. The first configuration change specifies the
147 | // desired changes and transitions the Raft group into the joint configuration,
148 | // in which quorum requires a majority of both the pre-changes and post-changes
149 | // configuration. Joint Consensus avoids entering fragile intermediate
150 | // configurations that could compromise survivability. For example, without the
151 | // use of Joint Consensus and running across three availability zones with a
152 | // replication factor of three, it is not possible to replace a voter without
153 | // entering an intermediate configuration that does not survive the outage of
154 | // one availability zone.
155 | //
156 | // The provided ConfChangeTransition specifies how (and whether) Joint Consensus
157 | // is used, and assigns the task of leaving the joint configuration either to
158 | // Raft or the application. Leaving the joint configuration is accomplished by
159 | // proposing a ConfChangeV2 with only and optionally the Context field
160 | // populated.
161 | //
162 | // For details on Raft membership changes, see:
163 | //
164 | // [1]: https://github.com/ongardie/dissertation/blob/master/online-trim.pdf
165 | message ConfChangeV2 {
166 |   ConfChangeTransition transition = 1 ;
167 |   repeated ConfChangeSingle     changes = 2;
168 |   bytes                context = 3;
169 | }
170 | 


--------------------------------------------------------------------------------
/src/read_only.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2016 The etcd Authors
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | use crate::raft::ReadOnlyOption;
 16 | use crate::raftpb::raft::{Message, MessageType};
 17 | use std::borrow::Cow;
 18 | use std::collections::HashMap;
 19 | 
 20 | // ReadState provides state for read only query.
 21 | // It's caller's responsibility to call ReadIndex first before getting
 22 | // this state from ready. it's also caller's duty to differentiate if this
 23 | // state is what it requests through request_ctx, eg. given a unique id as
 24 | // request_ctx
 25 | #[derive(Default, Debug, Clone, Eq, PartialEq)]
 26 | pub struct ReadState {
 27 |     pub index: u64,
 28 |     pub request_ctx: Vec<u8>,
 29 | }
 30 | 
 31 | #[derive(Default, Debug, Clone)]
 32 | pub(crate) struct ReadIndexStatus {
 33 |     pub req: Message,
 34 |     pub index: u64,
 35 |     // NB: this never records 'false', but it's more convenient to use this
 36 |     // instead of a HashMap<u64,bool> due to the API of quorum.VoteResult. If
 37 |     // this becomes performance sensitive enough (doubtful), quorum.VoteResult
 38 |     // can change to an API that is closer to that of CommittedIndex.
 39 |     pub acks: HashMap<u64, bool>,
 40 | }
 41 | 
 42 | #[derive(Clone)]
 43 | pub struct ReadOnly {
 44 |     pub(crate) option: ReadOnlyOption,
 45 |     pub(crate) pending_read_index: HashMap<Vec<u8>, ReadIndexStatus>,
 46 |     pub(crate) read_index_queue: Vec<Vec<u8>>,
 47 | }
 48 | 
 49 | impl ReadOnly {
 50 |     pub(crate) fn new(option: ReadOnlyOption) -> Self {
 51 |         ReadOnly {
 52 |             option,
 53 |             pending_read_index: Default::default(),
 54 |             read_index_queue: vec![],
 55 |         }
 56 |     }
 57 | 
 58 |     // add_request adds a record only request into readonly struct.
 59 |     // `index` is the commit index of the raft state machine when it received
 60 |     // the read only request.
 61 |     // `m` is the original read only request message from the local or remote node.
 62 |     pub(crate) fn add_request(&mut self, index: u64, m: Message) {
 63 |         let s = m.get_entries()[0].get_Data().to_vec();
 64 |         let read_index_status = ReadIndexStatus {
 65 |             req: m,
 66 |             index,
 67 |             acks: Default::default(),
 68 |         };
 69 |         self.pending_read_index
 70 |             .entry(s.clone())
 71 |             .or_insert(read_index_status);
 72 |         self.read_index_queue.push(s);
 73 |     }
 74 | 
 75 |     // recv_ack notifies the read_only struct that the raft state machine received
 76 |     // an acknowledgment of the heartbeat that attached with the read only request
 77 |     // context.
 78 |     pub(crate) fn recv_ack(&mut self, id: u64, context: Vec<u8>) -> Option<&HashMap<u64, bool>> {
 79 |         if let Some(mut entry) = self.pending_read_index.get_mut(&context) {
 80 |             entry.acks.insert(id, true);
 81 |             return Some(&entry.acks);
 82 |         }
 83 |         None
 84 |     }
 85 | 
 86 |     // Advances the read only request queue kept by the read_only struct.
 87 |     // It dequeues the requests until it finds the read only request that has
 88 |     // the same context as the given `m`.
 89 |     pub(crate) fn advance(&mut self, m: Message) -> Vec<ReadIndexStatus> {
 90 |         let mut rss: Vec<ReadIndexStatus> = vec![];
 91 |         let mut i = 0;
 92 |         let mut found = false;
 93 |         for ok_ctx in &self.read_index_queue {
 94 |             i += 1;
 95 |             let rs = self.pending_read_index.get(ok_ctx);
 96 |             if rs.is_none() {
 97 |                 panic!("cannot find corresponding read state from pending map");
 98 |             }
 99 |             let rs = rs.unwrap();
100 |             rss.push(rs.clone());
101 |             if ok_ctx.as_slice() == m.get_context() {
102 |                 found = true;
103 |                 break;
104 |             }
105 |         }
106 |         if found {
107 |             self.read_index_queue.drain(..i);
108 |             rss.iter().for_each(|rs| {
109 |                 self.pending_read_index
110 |                     .remove(rs.req.get_entries()[0].get_Data());
111 |             });
112 |             return rss;
113 |         }
114 |         vec![]
115 |     }
116 | 
117 |     // last_pending_request returns the context of the last pending read only
118 |     // request in readonly struct
119 |     pub(crate) fn last_pending_request(&self) -> Option<Vec<u8>> {
120 |         self.read_index_queue.last().map(|v| v.clone())
121 |     }
122 | }
123 | 


--------------------------------------------------------------------------------
/src/status.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 The etcd Authors
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | use crate::node::SoftState;
16 | use crate::raft::{Raft, StateType};
17 | use crate::raftpb::raft::HardState;
18 | use crate::storage::Storage;
19 | use crate::tracker::progress::ProgressMap;
20 | use crate::tracker::Config;
21 | use std::fmt::{Display, Formatter};
22 | 
23 | /// Contains information about this Raft peer and its view of the system.
24 | /// The Progress is only populated on the leader.
25 | #[derive(Clone, Debug)]
26 | pub struct Status {
27 |     pub(crate) base_status: BaseStatus,
28 |     pub config: Config,
29 |     pub progress: ProgressMap,
30 | }
31 | 
32 | impl Display for Status {
33 |     fn fmt(&self, f: &mut Formatter<'_>) -> ::std::fmt::Result {
34 |         write!(f, "{:?}", self)
35 |     }
36 | }
37 | 
38 | /// Contains basic information about the Raft peer. It does not allocate
39 | #[derive(Clone, Debug)]
40 | pub struct BaseStatus {
41 |     id: u64,
42 |     hard_state: HardState,
43 |     soft_state: SoftState,
44 |     applied: u64,
45 |     lead_transferee: u64,
46 | }
47 | 
48 | impl<S: Storage> From<&Raft<S>> for BaseStatus {
49 |     fn from(raft: &Raft<S>) -> Self {
50 |         BaseStatus {
51 |             id: raft.id,
52 |             hard_state: raft.hard_state(),
53 |             soft_state: raft.soft_state(),
54 |             applied: raft.raft_log.applied,
55 |             lead_transferee: raft.lead_transferee,
56 |         }
57 |     }
58 | }
59 | 
60 | impl<S: Storage> From<&Raft<S>> for Status {
61 |     fn from(raft: &Raft<S>) -> Self {
62 |         let mut s = Status {
63 |             base_status: BaseStatus::from(raft),
64 |             config: Default::default(),
65 |             progress: Default::default(),
66 |         };
67 |         if s.base_status.soft_state.raft_state == StateType::Leader {
68 |             s.progress = raft.prs.progress.clone();
69 |         }
70 |         s.config = raft.prs.config.clone();
71 |         s
72 |     }
73 | }
74 | 


--------------------------------------------------------------------------------
/src/tests_util.rs:
--------------------------------------------------------------------------------
  1 | use env_logger::Env;
  2 | use std::io::Write;
  3 | 
  4 | #[cfg(any(test))]
  5 | pub(crate) fn try_init_log() {
  6 |     // env_logger::try_init_from_env(Env::new().default_filter_or("info"));
  7 |     let mut env = env_logger::Env::default().filter_or(env_logger::DEFAULT_FILTER_ENV, "trace");
  8 |     env_logger::Builder::from_env(env)
  9 |         .format(|buf, record| {
 10 |             writeln!(
 11 |                 buf,
 12 |                 "{} {} [{}:{}], {}",
 13 |                 chrono::Local::now().format("%Y-%m-%d %H:%M:%S"),
 14 |                 record.level(),
 15 |                 record.file().unwrap_or("<unnamed>"),
 16 |                 record.line().unwrap(),
 17 |                 &record.args()
 18 |             )
 19 |         })
 20 |         .try_init();
 21 | }
 22 | 
 23 | 
 24 | #[cfg(any(test))]
 25 | pub(crate) mod mock {
 26 |     use std::collections::HashMap;
 27 |     use bytes::Bytes;
 28 |     use protobuf::RepeatedField;
 29 |     use crate::raft::{Config, NO_LIMIT, Raft, ReadOnlyOption};
 30 |     use crate::raft_log::RaftLog;
 31 |     use crate::raftpb::raft::{Entry, Message, MessageType, Snapshot};
 32 |     use crate::rawnode::{RawCoreNode, SafeRawNode};
 33 |     use crate::storage::{SafeMemStorage, Storage};
 34 | 
 35 |     pub fn read_message<S: Storage>(raft: &mut Raft<S>) -> Vec<Message> {
 36 |         let msg = raft.msgs.clone();
 37 |         raft.msgs.clear();
 38 |         msg
 39 |     }
 40 | 
 41 |     pub struct MocksEnts(Entry);
 42 | 
 43 |     impl Into<Entry> for MocksEnts {
 44 |         fn into(self) -> Entry {
 45 |             self.0
 46 |         }
 47 |     }
 48 | 
 49 |     impl Into<RepeatedField<Entry>> for MocksEnts {
 50 |         fn into(self) -> RepeatedField<Entry> {
 51 |             RepeatedField::from_vec(vec![self.0])
 52 |         }
 53 |     }
 54 | 
 55 |     impl From<&str> for MocksEnts {
 56 |         fn from(buf: &str) -> Self {
 57 |             let v = Vec::from(buf);
 58 |             let mut entry = Entry::new();
 59 |             entry.set_Data(Bytes::from(v));
 60 |             MocksEnts(entry)
 61 |         }
 62 |     }
 63 | 
 64 |     pub struct MockEntry(Entry);
 65 | 
 66 |     impl MockEntry {
 67 |         pub fn set_data(mut self, buf: Vec<u8>) -> MockEntry {
 68 |             self.0.set_Data(Bytes::from(buf));
 69 |             self
 70 |         }
 71 | 
 72 |         pub fn set_index(mut self, index: u64) -> MockEntry {
 73 |             self.0.set_Index(index);
 74 |             self
 75 |         }
 76 |     }
 77 | 
 78 |     impl Into<Entry> for MockEntry {
 79 |         fn into(self) -> Entry {
 80 |             self.0
 81 |         }
 82 |     }
 83 | 
 84 |     impl From<Vec<u8>> for MockEntry {
 85 |         fn from(v: Vec<u8>) -> Self {
 86 |             let mut entry = Entry::new();
 87 |             entry.set_Data(Bytes::from(v));
 88 |             MockEntry(entry)
 89 |         }
 90 |     }
 91 | 
 92 |     impl From<&str> for MockEntry {
 93 |         fn from(buf: &str) -> Self {
 94 |             let data = Vec::from(buf);
 95 |             let mut entry = Entry::new();
 96 |             entry.set_Data(Bytes::from(data));
 97 |             MockEntry(entry)
 98 |         }
 99 |     }
100 | 
101 |     pub fn new_entry(index: u64, term: u64) -> Entry {
102 |         let mut entry = Entry::new();
103 |         entry.set_Index(index);
104 |         entry.set_Term(term);
105 |         entry
106 |     }
107 | 
108 |     pub fn new_entry_set(set: Vec<(u64, u64)>) -> Vec<Entry> {
109 |         set.iter()
110 |             .map(|(index, term)| new_entry(*index, *term))
111 |             .collect()
112 |     }
113 | 
114 |     pub fn new_entry_set2(set: Vec<(u64, u64, &str)>) -> Vec<Entry> {
115 |         set.iter().map(|(index, term, data)| {
116 |             let mut entry = new_entry(*index, *term);
117 |             let data = Vec::from(*data);
118 |             entry.set_Data(Bytes::from(data));
119 |             entry
120 |         }).collect()
121 |     }
122 | 
123 |     pub fn new_empty_entry_set() -> Vec<Entry> {
124 |         Vec::new()
125 |     }
126 | 
127 |     pub fn new_snapshot(index: u64, term: u64) -> Snapshot {
128 |         let mut snapshot = Snapshot::new();
129 |         snapshot.mut_metadata().set_index(index);
130 |         snapshot.mut_metadata().set_term(term);
131 |         snapshot
132 |     }
133 | 
134 |     pub fn new_memory() -> SafeMemStorage {
135 |         let storage = SafeMemStorage::new();
136 |         storage
137 |     }
138 | 
139 |     pub fn new_log() -> RaftLog<SafeMemStorage> {
140 |         RaftLog::new(new_memory())
141 |     }
142 | 
143 |     pub fn new_log_with_storage<T: Storage + Clone>(storage: T) -> RaftLog<T> {
144 |         RaftLog::new(storage)
145 |     }
146 | 
147 |     pub fn new_test_raw_node(
148 |         id: u64,
149 |         peers: Vec<u64>,
150 |         election_tick: u64,
151 |         heartbeat_tick: u64,
152 |         s: SafeMemStorage,
153 |     ) -> SafeRawNode<SafeMemStorage> {
154 |         SafeRawNode::new2(new_test_conf(id, peers, election_tick, heartbeat_tick), s)
155 |     }
156 | 
157 |     pub fn new_test_core_node(
158 |         id: u64,
159 |         peers: Vec<u64>,
160 |         election_tick: u64,
161 |         heartbeat_tick: u64,
162 |         s: SafeMemStorage,
163 |     ) -> RawCoreNode<SafeMemStorage> {
164 |         RawCoreNode::new(new_test_conf(id, peers, election_tick, heartbeat_tick), s)
165 |     }
166 | 
167 |     pub fn new_test_inner_node(
168 |         id: u64,
169 |         peers: Vec<u64>,
170 |         election_tick: u64,
171 |         heartbeat_tick: u64,
172 |         s: SafeMemStorage,
173 |     ) -> Raft<SafeMemStorage> {
174 |         Raft::new(new_test_conf(id, peers, election_tick, heartbeat_tick), s)
175 |     }
176 | 
177 |     pub fn new_test_conf(id: u64, peers: Vec<u64>, election_tick: u64, heartbeat_tick: u64) -> Config {
178 |         Config {
179 |             id,
180 |             peers,
181 |             learners: vec![],
182 |             election_tick,
183 |             heartbeat_tick,
184 |             applied: 0,
185 |             max_size_per_msg: NO_LIMIT,
186 |             max_committed_size_per_ready: 0,
187 |             max_uncommitted_entries_size: 0,
188 |             max_inflight_msgs: 1 << 3,
189 |             check_quorum: false,
190 |             pre_vote: false,
191 |             read_only_option: ReadOnlyOption::ReadOnlySafe,
192 |             disable_proposal_forwarding: false,
193 |         }
194 |     }
195 | 
196 | 
197 |     // Returns the appliable entries and updates the applied index
198 |     fn next_ents(mut raft: Raft<SafeMemStorage>, s: &mut SafeMemStorage) -> Vec<Entry> {
199 |         // transfer all unstable entries to "stable" storage.
200 |         s.wl().append(raft.raft_log.unstable_entries().to_vec());
201 |         raft.raft_log.stable_to(raft.raft_log.last_index(), raft.raft_log.last_term());
202 | 
203 |         let ents = raft.raft_log.next_ents();
204 |         raft.raft_log.applied_to(raft.raft_log.committed);
205 |         return ents;
206 |     }
207 | 
208 |     fn must_append_entry<S>(raft: &mut Raft<S>, mut ents: Vec<Entry>) where S: Storage {
209 |         assert!(raft.append_entry(&mut ents), "entry unexpectedly dropped");
210 |     }
211 | 
212 |     trait StateMachine {
213 |         fn step(&mut self, m: Message) -> Result<(), String>;
214 |         fn read_message(&mut self) -> Vec<Message>;
215 |     }
216 | 
217 |     struct NetWork<M: StateMachine> {
218 |         peers: HashMap<u64, M>,
219 |         storage: HashMap<u64, SafeMemStorage>,
220 |         dropm: HashMap<ConnEm, SafeMemStorage>,
221 |         ignorem: HashMap<MessageType, bool>,
222 |         // `msg_hook` is called for each message sent. It may inspect the
223 |         // message and return true to send it for false to drop it
224 |         msg_hook: Box<dyn Fn(Message) -> bool>,
225 |     }
226 | 
227 |     impl<M: StateMachine> NetWork<M> {
228 |         pub fn send(&mut self, msgs: Vec<Message>) {
229 |             unimplemented!("unimplemented")
230 |         }
231 | 
232 |         pub fn drop(&mut self, from: u64, to: u64, perc: f64) {
233 |             unimplemented!("unimplemented")
234 |         }
235 | 
236 |         pub fn cut(&mut self, one: u64, other: u64) {
237 |             unimplemented!("unimplemented")
238 |         }
239 | 
240 |         pub fn isolated(&mut self, id: u64) {
241 |             unimplemented!("unimplemented")
242 |         }
243 | 
244 |         pub fn ignore(&mut self, t: MessageType) {
245 |             unimplemented!("unimplemented")
246 |         }
247 | 
248 |         pub fn recover(&mut self) {
249 |             self.dropm.clear();
250 |             self.ignorem.clear();
251 |         }
252 | 
253 |         pub fn filter(&mut self, msgs: Vec<Message>) -> Vec<Message> {
254 |             unimplemented!("unimplemented")
255 |         }
256 |     }
257 | 
258 |     #[derive(Debug, Clone)]
259 |     struct ConnEm {
260 |         from: u64,
261 |         to: u64,
262 |     }
263 | 
264 |     #[derive(Debug, Clone)]
265 |     struct BlackHole {}
266 | 
267 |     impl StateMachine for BlackHole {
268 |         fn step(&mut self, m: Message) -> Result<(), String> {
269 |             Ok(())
270 |         }
271 | 
272 |         fn read_message(&mut self) -> Vec<Message> {
273 |             vec![]
274 |         }
275 |     }
276 | 
277 | 
278 |     pub fn ids_by_size(size: u64) -> Vec<u64> {
279 |         (1..=size).collect::<Vec<_>>()
280 |     }
281 | }


--------------------------------------------------------------------------------
/src/tracker/inflights.rs:
--------------------------------------------------------------------------------
  1 | use nom::lib::std::fmt::{Display, Formatter};
  2 | 
  3 | // Inflights limits the number of MsgApp(represented by the largest index
  4 | // contained within) sent to followers but not yet acknowledged by them. Callers
  5 | // use Full() to check whether more messages can be sent, call Add() whenever
  6 | // the are sending a new append, and release "quota" via free_le() whenever an
  7 | // ack is received.
  8 | #[derive(Default, PartialEq, Clone, Debug)]
  9 | pub struct Inflights {
 10 |     // the starting index in the buffer
 11 |     start: usize,
 12 |     // number of inflights in the buffer
 13 |     count: usize,
 14 |     // the size of the buffer
 15 |     size: usize,
 16 |     // buffer contains the index of the last entry
 17 |     // inside one message
 18 |     buffer: Vec<u64>,
 19 | }
 20 | 
 21 | impl Display for Inflights {
 22 |     fn fmt(&self, f: &mut Formatter<'_>) -> ::std::fmt::Result {
 23 |         write!(
 24 |             f,
 25 |             "start:{}, count:{}, size:{}, is_full: {}, buffer:{:?}",
 26 |             self.start,
 27 |             self.count,
 28 |             self.size,
 29 |             self.full(),
 30 |             self.buffer
 31 |         )
 32 |     }
 33 | }
 34 | 
 35 | impl Inflights {
 36 |     pub fn new(size: u64) -> Self {
 37 |         Inflights {
 38 |             start: 0,
 39 |             count: 0,
 40 |             size: size as usize,
 41 |             buffer: vec![0].repeat(size as usize),
 42 |         }
 43 |     }
 44 | 
 45 |     // Add notifies the Inflights that a new message with the given index is being
 46 |     // dispatched. Full() must be called prior to Add() to verify that there is room
 47 |     // for one more message, and consecutive calls to add Add() must provide a
 48 |     // monotonic sequence of indexes.
 49 |     pub fn add(&mut self, inflight: u64) {
 50 |         if self.full() {
 51 |             panic!("cannot add into a Full inflights");
 52 |         }
 53 |         let mut next = self.start + self.count;
 54 |         let mut size = self.size;
 55 |         if next >= size {
 56 |             next -= size;
 57 |         }
 58 |         if next >= self.buffer.len() {
 59 |             self.grow();
 60 |         }
 61 |         self.buffer[next] = inflight;
 62 |         self.count += 1;
 63 |         if self.full() {
 64 |             info!("[has full {}]", self.count());
 65 |         }
 66 |     }
 67 | 
 68 |     /// The inflight buffer by doubling up tp `inflights.size`. We grow on demand
 69 |     /// instead of preallocating to `inflights.size` to handle system which have
 70 |     /// thousands of Raft groups per process.
 71 |     pub fn grow(&mut self) {
 72 |         let mut new_size = self.buffer.len() * 2;
 73 |         if new_size == 0 {
 74 |             new_size = 1;
 75 |         } else if new_size > self.size {
 76 |             new_size = self.size;
 77 |         }
 78 |         let mut new_buffer = Vec::with_capacity(new_size);
 79 |         new_buffer.extend_from_slice(&self.buffer);
 80 |         self.buffer = new_buffer;
 81 |     }
 82 | 
 83 |     /// Frees the inflights smaller or equal to the given `to` flight.
 84 |     pub fn free_le(&mut self, to: u64) {
 85 |         if self.count == 0 || to < self.buffer[self.start] {
 86 |             // out of the left side of the window
 87 |             return;
 88 |         }
 89 | 
 90 |         let mut idx = self.start;
 91 |         let mut i = 0;
 92 |         while i < self.count {
 93 |             if to < self.buffer[idx] {
 94 |                 // found the first large inflight
 95 |                 break;
 96 |             }
 97 |             let size = self.size;
 98 |             idx += 1;
 99 |             if idx >= size {
100 |                 idx -= size;
101 |             }
102 |             i += 1;
103 |         }
104 |         // free i inflights and set new start index
105 |         self.count -= i;
106 |         self.start = idx;
107 |         if self.count == 0 {
108 |             // inflights is empty, reset the start index so that we don't grow the
109 |             // buffer unnecessarily.
110 |             self.start = 0;
111 |         }
112 |     }
113 | 
114 |     // FreeFirstOne releases the first inflight. This is a no-op if nothing is inflight.
115 |     pub fn free_first_one(&mut self) {
116 |         self.free_le(self.buffer[self.start]);
117 |     }
118 | 
119 |     pub fn full(&self) -> bool {
120 |         self.count == self.size
121 |     }
122 | 
123 |     pub fn count(&self) -> usize {
124 |         self.count
125 |     }
126 | 
127 |     pub(crate) fn reset(&mut self) {
128 |         self.count = 0;
129 |         self.start = 0;
130 |     }
131 | }
132 | 
133 | #[cfg(test)]
134 | mod tests {
135 |     use crate::tracker::inflights::Inflights;
136 | 
137 |     #[test]
138 |     fn it_inflights_add() {
139 |         let mut inf = Inflights {
140 |             start: 0,
141 |             count: 0,
142 |             size: 10,
143 |             buffer: vec![0].repeat(10),
144 |         };
145 |         (0..5).for_each(|i| inf.add(i));
146 |         let want_inf = Inflights {
147 |             start: 0,
148 |             count: 5,
149 |             size: 10,
150 |             buffer: vec![0, 1, 2, 3, 4, 0, 0, 0, 0, 0],
151 |         };
152 |         assert_eq!(inf, want_inf);
153 | 
154 |         (5..10).for_each(|i| inf.add(i));
155 |         let want_inf = Inflights {
156 |             start: 0,
157 |             count: 10,
158 |             size: 10,
159 |             buffer: vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
160 |         };
161 |         assert_eq!(inf, want_inf);
162 | 
163 |         // rotating case
164 |         let mut in2 = Inflights {
165 |             start: 5,
166 |             count: 0,
167 |             size: 10,
168 |             buffer: vec![0].repeat(10),
169 |         };
170 |         (0..5).for_each(|i| in2.add(i));
171 |         let want_inf = Inflights {
172 |             start: 5,
173 |             count: 5,
174 |             size: 10,
175 |             buffer: vec![0, 0, 0, 0, 0, 0, 1, 2, 3, 4],
176 |         };
177 |         assert_eq!(in2, want_inf);
178 | 
179 |         (5..10).for_each(|i| in2.add(i));
180 |         let want_inf = Inflights {
181 |             start: 5,
182 |             count: 10,
183 |             size: 10,
184 |             buffer: vec![5, 6, 7, 8, 9, 0, 1, 2, 3, 4],
185 |         };
186 |         assert_eq!(in2, want_inf);
187 |     }
188 | 
189 |     #[test]
190 |     fn it_inflights_free_to() {
191 |         let mut inf = Inflights::new(10);
192 |         (0..10).for_each(|i| inf.add(i));
193 |         inf.free_le(4);
194 |         let want_inf = Inflights {
195 |             start: 5,
196 |             count: 5,
197 |             size: 10,
198 |             buffer: vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
199 |         };
200 |         assert_eq!(inf, want_inf);
201 | 
202 |         inf.free_le(4);
203 |         assert_eq!(inf, want_inf);
204 | 
205 |         inf.free_le(8);
206 |         let want_inf = Inflights {
207 |             start: 9,
208 |             count: 1,
209 |             size: 10,
210 |             buffer: vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
211 |         };
212 |         assert_eq!(inf, want_inf);
213 | 
214 |         // rotating case
215 |         (10..15).for_each(|i| inf.add(i));
216 |         let want_inf = Inflights {
217 |             start: 9,
218 |             count: 6,
219 |             size: 10,
220 |             buffer: vec![10, 11, 12, 13, 14, 5, 6, 7, 8, 9],
221 |         };
222 |         assert_eq!(inf, want_inf);
223 | 
224 |         inf.free_le(12);
225 |         let want_inf = Inflights {
226 |             start: 3,
227 |             count: 2,
228 |             size: 10,
229 |             buffer: vec![10, 11, 12, 13, 14, 5, 6, 7, 8, 9],
230 |         };
231 |         assert_eq!(inf, want_inf);
232 | 
233 |         inf.free_le(14);
234 |         let want_inf = Inflights {
235 |             start: 0,
236 |             count: 0,
237 |             size: 10,
238 |             buffer: vec![10, 11, 12, 13, 14, 5, 6, 7, 8, 9],
239 |         };
240 |         assert_eq!(inf, want_inf);
241 |     }
242 | 
243 |     #[test]
244 |     fn it_inflights_free_first_one() {
245 |         let mut inf = Inflights::new(10);
246 |         (0..10).for_each(|i| inf.add(i));
247 |         inf.free_first_one();
248 |         let want_inf = Inflights {
249 |             start: 1,
250 |             count: 9,
251 |             size: 10,
252 |             buffer: vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
253 |         };
254 |         assert_eq!(inf, want_inf);
255 |     }
256 | }
257 | 


--------------------------------------------------------------------------------
/src/tracker/mod.rs:
--------------------------------------------------------------------------------
  1 | use crate::quorum::joint::JointConfig;
  2 | use crate::quorum::majority::MajorityConfig;
  3 | use crate::quorum::quorum::VoteResult::VoteWon;
  4 | use crate::quorum::quorum::{AckedIndexer, Index, VoteResult};
  5 | use crate::raftpb::raft::ConfState;
  6 | use crate::tracker::progress::{Progress, ProgressMap};
  7 | 
  8 | use std::collections::{HashMap, HashSet};
  9 | use std::fmt::{Display, Formatter};
 10 | use std::iter::Cloned;
 11 | 
 12 | pub mod inflights;
 13 | pub mod progress;
 14 | pub mod state;
 15 | 
 16 | // Config reflects the configuration tracked in a ProgressTacker.
 17 | #[derive(Default, Clone, PartialEq, Debug)]
 18 | pub struct Config {
 19 |     pub voters: JointConfig,
 20 |     // auto_leave is true if the configuration is joint and a transition to the
 21 |     // incoming configuration should be carried out automatically by Raft when
 22 |     // this is possible. If false, the configuration will be joint until the
 23 |     // application initiates than transition manually.
 24 |     pub auto_leave: bool,
 25 |     // Learner is a set of Ids corresponding to the learners active in th
 26 |     // current configutation.
 27 |     //
 28 |     // Invariant: Learners and Voters does not intersect, i.e if a peer is in
 29 |     // either half of the joint config, it can't be a learner; if it is a
 30 |     // learner it can't be in either half of the joint config. This invariant
 31 |     // simplifies the implementation since it allows peers to have clarity about
 32 |     // its current role without taking into account joint consensus.
 33 |     pub learners: HashSet<u64>,
 34 |     // When we return a voter into a learner during a joint consensus transition,
 35 |     // we cannot add the learner directly when entering the joint state. This is
 36 |     // because this would violate the invariant that the intersect of
 37 |     // voters and learners is empty. For example, assume a Voter is removed and
 38 |     // imediately re-added as a learner (or in other words, it it demoted):
 39 |     //
 40 |     // Initially, the configuration will be
 41 |     //
 42 |     //  voters: {1, 2, 3}
 43 |     //  learners: {}
 44 |     //
 45 |     // and we want to demote 3. Entering the joint configuration, we naively get
 46 |     //
 47 |     //  voters: {1, 2} & {1, 2, 3}
 48 |     //  learners: {3}
 49 |     //
 50 |     // but this violates invariant (3 is both voter and learner). Instead,
 51 |     // we get
 52 |     //
 53 |     //  voters: {1, 2} & {1, 2, 3}
 54 |     //  learners: {}
 55 |     //  next_learners: {3}
 56 |     //
 57 |     // Where 3 is not still purely a voter, but we are remembering the intention
 58 |     // to make it a learner upon transitioning into the final configuration:
 59 |     //
 60 |     //  voters: {1, 2}
 61 |     //  learners: {3}
 62 |     //  next_learners: {}
 63 |     //
 64 |     // Note that next_learners is not used while adding a learner that is not
 65 |     // also a voter in the joint config. In this case, the learner is added
 66 |     // right away when entering the joint configuration, so that it is caught up
 67 |     // as soon as possible.
 68 |     pub learners_next: HashSet<u64>,
 69 | }
 70 | 
 71 | impl Display for Config {
 72 |     fn fmt(&self, f: &mut Formatter<'_>) -> ::std::fmt::Result {
 73 |         write!(f, "voters={}", self.voters).unwrap();
 74 |         if !self.learners.is_empty() {
 75 |             write!(
 76 |                 f,
 77 |                 " learners={}",
 78 |                 MajorityConfig {
 79 |                     votes: self.learners.clone()
 80 |                 }
 81 |             )
 82 |             .unwrap();
 83 |         }
 84 |         if !self.learners_next.is_empty() {
 85 |             write!(
 86 |                 f,
 87 |                 " learners_next={}",
 88 |                 MajorityConfig {
 89 |                     votes: self.learners_next.clone()
 90 |                 }
 91 |             )
 92 |             .unwrap();
 93 |         }
 94 |         if self.auto_leave {
 95 |             write!(f, " autoleave").unwrap();
 96 |         }
 97 |         Ok(())
 98 |     }
 99 | }
100 | 
101 | /// ProgressTracker tracks the currently active configuration and the information
102 | /// known about the nodes and learners in it. In particular, it tracks the match
103 | /// index for each peer when in turn allows reasoning abound the committed index.
104 | #[derive(Debug, PartialEq)]
105 | pub struct ProgressTracker {
106 |     pub config: Config,
107 |     pub progress: ProgressMap,
108 |     pub votes: HashMap<u64, bool>,
109 |     pub max_inflight: u64,
110 | }
111 | 
112 | impl Clone for ProgressTracker {
113 |     fn clone(&self) -> Self {
114 |         let mut to = ProgressTracker::new(self.max_inflight);
115 |         to.config = self.config.clone();
116 |         let mut progress_inner = HashMap::new();
117 |         progress_inner.extend(
118 |             self.progress
119 |                 .iter()
120 |                 .map(|(key, value)| (*key, value.clone())),
121 |         );
122 |         to.progress = ProgressMap::new(progress_inner);
123 |         to.votes = self.votes.clone();
124 |         to
125 |     }
126 | }
127 | 
128 | impl ProgressTracker {
129 |     pub fn new(max_inflight: u64) -> ProgressTracker {
130 |         let mut p = ProgressTracker {
131 |             config: Default::default(),
132 |             progress: ProgressMap::default(),
133 |             votes: Default::default(),
134 |             max_inflight,
135 |         };
136 |         p
137 |     }
138 | 
139 |     // ConfState returns a ConfState representing the active configuration.
140 |     pub fn config_state(&self) -> ConfState {
141 |         let mut conf_state = ConfState::new();
142 |         conf_state.set_voters(self.config.voters.incoming.as_slice());
143 |         conf_state.set_voters_outgoing(self.config.voters.outgoing.as_slice());
144 |         conf_state.set_learners(
145 |             self.config
146 |                 .learners
147 |                 .iter()
148 |                 .map(|learner| *learner)
149 |                 .collect(),
150 |         );
151 |         conf_state.set_learners_next(
152 |             self.config
153 |                 .learners_next
154 |                 .iter()
155 |                 .map(|learner| *learner)
156 |                 .collect(),
157 |         );
158 |         conf_state.set_auto_leave(self.config.auto_leave);
159 |         conf_state
160 |     }
161 | 
162 |     // is_singleton returns true if (and only if) there is only one voting number
163 |     // (i.e. the leader) in the current configuration.
164 |     pub fn is_singleton(&self) -> bool {
165 |         self.config.voters.is_singleton()
166 |     }
167 | 
168 |     // committed returns the largest log index known to be committed based on what
169 |     // the voting members of the group have acknowledged.
170 |     pub fn committed(&mut self) -> u64 {
171 |         self.config
172 |             .voters
173 |             .committed(&MatchAckIndexer::from(&self.progress))
174 |     }
175 | 
176 |     // visit invokes the supplied closure for all tracked progresses in stable order.
177 |     pub fn visit<F>(&mut self, mut f: F)
178 |     where
179 |         F: FnMut(u64, &mut Progress),
180 |     {
181 |         let n = self.progress.len();
182 |         // We need to sort the IDs and don't want to allocate since this is hot code.
183 |         // The optimized here mirrors that in `(MajorityConfig).CommittedIndex`,
184 |         // see there for details
185 |         // TODO optimized
186 |         let mut ids: Vec<u64> = Vec::new();
187 |         ids.extend(self.progress.keys().into_iter());
188 |         ids.sort_by_key(|k| *k);
189 |         for id in ids {
190 |             let progress = self.progress.get_mut(&id).unwrap();
191 |             f(id, progress);
192 |         }
193 |     }
194 | 
195 |     #[inline]
196 |     pub fn visit_nodes(&self) -> Vec<u64> {
197 |         let mut ids: Vec<u64> = self.progress.keys().map(|id| *id).collect::<Vec<_>>();
198 |         ids.sort_by_key(|k| *k);
199 |         ids
200 |     }
201 | 
202 |     // returns true if the quorum is active from the view of the local
203 |     // raft state machine. Otherwise, it returns false.
204 |     pub fn quorum_active(&mut self) -> bool {
205 |         let mut votes = HashMap::new();
206 |         self.visit(|id, progress| {
207 |             if progress.is_learner {
208 |                 return;
209 |             }
210 |             votes.insert(id, progress.recent_active);
211 |         });
212 |         self.config.voters.vote_result(&votes) == VoteWon
213 |     }
214 | 
215 |     // returns a sorted slice of voters.
216 |     pub fn voter_nodes(&self) -> Vec<u64> {
217 |         let mut nodes: Vec<u64> = self.config.voters.ids().iter().map(|id| *id).collect();
218 |         nodes.sort_by_key(|id| *id);
219 |         nodes
220 |     }
221 | 
222 |     // returns a sorted slice of voters
223 |     pub fn learner_nodes(&self) -> Vec<u64> {
224 |         let mut nodes: Vec<u64> = self.config.learners.iter().map(|id| *id).collect();
225 |         nodes.sort_by_key(|id| *id);
226 |         nodes
227 |     }
228 | 
229 |     // prepares for a new round of vote counting via record_vote.
230 |     pub fn reset_votes(&mut self) {
231 |         self.votes.clear();
232 |     }
233 | 
234 |     // records that the node with the given id voted for this Raft
235 |     // instance if v == true (and declined it otherwise)
236 |     pub fn record_vote(&mut self, id: u64, v: bool) {
237 |         self.votes.entry(id).or_insert(v);
238 |     }
239 | 
240 |     // returns the number of granted and rejected votes, and whether the election outcome is known
241 |     pub fn tally_votes(&self) -> (usize, usize, VoteResult) {
242 |         // Make sure to populate granted/rejected correctly even if the votes slice
243 |         // contains members no larger part of the configuration. This doesn't really
244 |         // matter in the way the numbers are used (they're information), but might
245 |         // as well get it right.
246 |         let mut granted = 0;
247 |         let mut rejected = 0;
248 |         for (id, progress) in self.progress.iter() {
249 |             if progress.is_learner {
250 |                 continue;
251 |             }
252 |             match self.votes.get(id) {
253 |                 Some(v) => {
254 |                     if *v {
255 |                         granted += 1;
256 |                     } else {
257 |                         rejected += 1;
258 |                     }
259 |                 }
260 |                 None => {}
261 |             }
262 |         }
263 |         let res = self.config.voters.vote_result(&self.votes);
264 |         info!("grant: {}, rejected: {}, res: {:?}", granted, rejected, res);
265 |         (granted, rejected, res)
266 |     }
267 | }
268 | 
269 | pub(crate) type MatchAckIndexer = HashMap<u64, Progress>;
270 | 
271 | // implements IndexLookuper
272 | impl AckedIndexer for MatchAckIndexer {
273 |     fn acked_index(&self, voter_id: &u64) -> Option<&u64> {
274 |         self.get(voter_id).map(|pr| &pr._match)
275 |     }
276 | }
277 | 
278 | impl From<&ProgressMap> for MatchAckIndexer {
279 |     fn from(progress: &ProgressMap) -> Self {
280 |         let mut match_ack_indexer: MatchAckIndexer = Default::default();
281 |         match_ack_indexer.clone_from(progress.to_map());
282 |         match_ack_indexer
283 |     }
284 | }
285 | 


--------------------------------------------------------------------------------
/src/tracker/state.rs:
--------------------------------------------------------------------------------
 1 | use std::fmt::{self, Display, Error, Formatter};
 2 | 
 3 | // StateType is the state of a tracked follower.
 4 | #[derive(Clone, Debug, PartialEq)]
 5 | pub enum StateType {
 6 |     // StateProbe indicates that a follower whose last index isn't known. Such a
 7 |     // follower is "probe" (i.e. an append sent periodically) to narrow down
 8 |     // its last index. In the ideal (and common) case, only one round of probing
 9 |     // is necessary as the follower will react with a hint. Followers that are
10 |     // probed over extend periods of time are often offline.
11 |     Probe,
12 |     // StateReplicate is the steady in which a follower eagerly receives
13 |     // log entries to append to its log.
14 |     Replicate,
15 |     // StateSnapshot indicates a follower that needs log entries not avaliable
16 |     // from the leader's Raft log. Such a follower needs a full snapshot to
17 |     // return a StateReplicate
18 |     Snapshot,
19 | }
20 | 
21 | impl Default for StateType {
22 |     fn default() -> Self {
23 |         StateType::Probe
24 |     }
25 | }
26 | 
27 | impl Display for StateType {
28 |     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
29 |         match self {
30 |             StateType::Probe => write!(f, "StateProbe"),
31 |             StateType::Replicate => {
32 |                 write!(f, "StateReplicate")
33 |             }
34 |             StateType::Snapshot => write!(f, "StateSnapshot"),
35 |         }
36 |     }
37 | }
38 | 
39 | #[cfg(test)]
40 | mod tests {
41 |     use crate::tracker::state::StateType;
42 |     #[test]
43 |     fn it_works() {
44 |         assert_eq!(format!("{}", StateType::Probe), "StateProbe");
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/src/unstable.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 The etcd Authors
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | use crate::raftpb::raft::{Entry, Snapshot};
 16 | 
 17 | // unstable.entries[i] has raft log position i+unstable.offset.
 18 | // Note that unstable.offset may be less than highest log
 19 | // position in storage; this means that the next write to storage
 20 | // might need to truncate the log before persisting unstable.entries.
 21 | #[derive(Default, Clone)]
 22 | pub(crate) struct Unstable {
 23 |     // the incoming unstable snapshot, if any.
 24 |     pub(crate) snapshot: Option<Snapshot>,
 25 |     // all entries that have not been yet been written to storage.
 26 |     pub(crate) entries: Vec<Entry>,
 27 |     // the first index of `entries` first entry
 28 |     pub(crate) offset: u64,
 29 | }
 30 | 
 31 | impl Unstable {
 32 |     // Returns the index of the first possible entry in entries if it has a snapshot.
 33 |     pub(crate) fn maybe_first_index(&self) -> Option<u64> {
 34 |         self.snapshot
 35 |             .as_ref()
 36 |             .map(|snapshot| snapshot.get_metadata().get_index() + 1)
 37 |     }
 38 | 
 39 |     // Returns last index if it has at least one unstable entry or snapshot
 40 |     pub(crate) fn maybe_last_index(&self) -> Option<u64> {
 41 |         if !self.entries.is_empty() {
 42 |             return Some(self.offset + self.entries.len() as u64 - 1);
 43 |         }
 44 |         self.snapshot
 45 |             .as_ref()
 46 |             .map(|snapshot| snapshot.get_metadata().get_index())
 47 |     }
 48 | 
 49 |     // Returns the term of the entry at index i, if there is any.
 50 |     pub(crate) fn maybe_term(&self, i: u64) -> Option<u64> {
 51 |         if i < self.offset {
 52 |             if let Some(snapshot) = self.snapshot.as_ref() {
 53 |                 if snapshot.get_metadata().get_index() == i {
 54 |                     return Some(snapshot.get_metadata().get_term());
 55 |                 }
 56 |             }
 57 |             return None;
 58 |         }
 59 |         match self.maybe_last_index() {
 60 |             Some(index) => {
 61 |                 if i > index {
 62 |                     None
 63 |                 } else {
 64 |                     Some(self.entries[(i - self.offset) as usize].Term)
 65 |                 }
 66 |             }
 67 |             None => None,
 68 |         }
 69 |     }
 70 | 
 71 |     // If self.entries had written to storage then clears these entries by stable_to
 72 |     pub(crate) fn stable_to(&mut self, i: u64, t: u64) {
 73 |         if let Some(gt) = self.maybe_term(i) {
 74 |             // if i < offset, term is matched with the snapshot
 75 |             // only update the unstable entries if term is matched with
 76 |             // an unstable entry
 77 |             if gt == t && i >= self.offset {
 78 |                 let start = i + 1 - self.offset;
 79 |                 // TODO: Optz entries memory 
 80 |                 self.entries.drain(..start as usize);
 81 |                 self.offset = i + 1;
 82 |             }
 83 |         }
 84 |     }
 85 | 
 86 |    
 87 |     // As same to stable_to, if self.snapshot had written to storage then reset snapshot
 88 |     pub(crate) fn stable_snap_to(&mut self, i: u64) {
 89 |         if let Some(ref snapshot) = self.snapshot {
 90 |             if snapshot.get_metadata().get_index() == i {
 91 |                 self.snapshot = None;
 92 |             }
 93 |         }
 94 |     }
 95 | 
 96 |     pub(crate) fn restore(&mut self, s: Snapshot) {
 97 |         self.offset = s.get_metadata().get_index() + 1;
 98 |         self.entries.clear();
 99 |         self.snapshot = Some(s);
100 |     }
101 | 
102 |     pub(crate) fn truncate_and_append(&mut self, ents: &[Entry]) {
103 |         match ents[0].get_Index() {
104 |             after if after == self.offset + self.entries.len() as u64 => {
105 |                 // after is the next index in the self.entries
106 |                 // directly append
107 |                 self.entries.extend_from_slice(ents);
108 |             }
109 |             after if after <= self.offset => {
110 |                 info!("replace the unstable entries from index {}", after);
111 |                 // The log is being truncated to before our current offset
112 |                 // portion, so set the offset and replace the entries
113 |                 self.offset = after;
114 |                 self.entries.clear();
115 |                 self.entries.extend_from_slice(ents);
116 |             }
117 |             after => {
118 |                 // truncate to after and copy to self.entries
119 |                 // then append
120 |                 info!("truncate the unstable entries before index {}", after);
121 |                 self.entries.truncate((after - self.offset) as usize);
122 |                 self.entries.extend_from_slice(&ents);
123 |             }
124 |         }
125 |     }
126 | 
127 |     pub(crate) fn slice(&self, lo: u64, hi: u64) -> Vec<Entry> {
128 |         self.must_check_out_of_bounds(lo, hi);
129 |         self.entries[(lo - self.offset) as usize..(hi - self.offset) as usize].to_vec()
130 |     }
131 | 
132 |     // self.offset <= lo <= hi <= self.offset + self.entries.len()
133 |     fn must_check_out_of_bounds(&self, lo: u64, hi: u64) {
134 |         if lo > hi {
135 |             panic!("invalid unstable.slice {} > {}", lo, hi);
136 |         }
137 |         let upper = self.offset + self.entries.len() as u64;
138 |         if lo < self.offset || hi > upper {
139 |             panic!(
140 |                 "unstable.slice[{}, {}] out of bound [{}, {}]",
141 |                 lo, hi, self.offset, upper
142 |             );
143 |         }
144 |     }
145 | }
146 | 
147 | #[cfg(test)]
148 | mod tests {
149 |     use crate::raftpb::raft::{Entry, Snapshot};
150 |     use crate::unstable::Unstable;
151 | 
152 |     #[test]
153 |     fn it_works() {
154 |         assert_eq!(2 + 2, 4);
155 |     }
156 | 
157 |     #[test]
158 |     fn it_unstable_maybe_first_index() {
159 |         // (entries, offset, snapshot, w_ok, w_index)
160 |         let tests = vec![
161 |             // no snapshot
162 |             (vec![new_entry(5, 1)], 0, None, false, 0),
163 |             (vec![], 0, None, false, 0),
164 |             // has snapshot
165 |             (vec![new_entry(5, 1)], 5, Some(new_snapshot(4, 1)), true, 5),
166 |             (vec![], 5, Some(new_snapshot(4, 1)), true, 5),
167 |         ];
168 |         for (i, (entries, offset, snapshot, w_ok, w_index)) in tests.iter().enumerate() {
169 |             let mut u = Unstable {
170 |                 snapshot: snapshot.clone(),
171 |                 entries: entries.clone(),
172 |                 offset: *offset,
173 |             };
174 |             match u.maybe_first_index() {
175 |                 Some(i) => {
176 |                     assert_eq!(i, *w_index);
177 |                 }
178 |                 None => assert!(!*w_ok),
179 |             }
180 |         }
181 |     }
182 | 
183 |     #[test]
184 |     fn it_maybe_last_index() {
185 |         // (entries, offset, snapshot, w_ok, w_index)
186 |         let tests = vec![
187 |             // last in entries
188 |             (vec![new_entry(5, 1)], 5, None, true, 5),
189 |             (vec![new_entry(5, 1)], 5, Some(new_snapshot(4, 1)), true, 5),
190 |             // last in snapshot
191 |             (vec![], 5, Some(new_snapshot(4, 1)), true, 4),
192 |             // empty unstable
193 |             (vec![], 0, None, false, 0),
194 |         ];
195 |         for (i, (entries, offset, snapshot, w_ok, w_index)) in tests.iter().enumerate() {
196 |             let u = Unstable {
197 |                 snapshot: snapshot.clone(),
198 |                 entries: entries.clone(),
199 |                 offset: *offset,
200 |             };
201 |             match u.maybe_last_index() {
202 |                 Some(i) => {
203 |                     assert_eq!(i, *w_index);
204 |                 }
205 |                 None => assert!(!*w_ok),
206 |             }
207 |         }
208 |     }
209 | 
210 |     #[test]
211 |     fn it_unstable_maybe_term() {
212 |         // (entries, offset, snapshot, index, w_ok, w_term)
213 |         let tests = vec![
214 |             // term from entries
215 |             (vec![new_entry(5, 1)], 5, None, 5, true, 1),
216 |             (vec![new_entry(5, 1)], 5, None, 6, false, 0),
217 |             (
218 |                 vec![new_entry(5, 1)],
219 |                 5,
220 |                 Some(new_snapshot(4, 1)),
221 |                 5,
222 |                 true,
223 |                 1,
224 |             ),
225 |             (
226 |                 vec![new_entry(5, 1)],
227 |                 5,
228 |                 Some(new_snapshot(4, 1)),
229 |                 6,
230 |                 false,
231 |                 0,
232 |             ),
233 |             // term from snapshot
234 |             (
235 |                 vec![new_entry(5, 1)],
236 |                 5,
237 |                 Some(new_snapshot(4, 1)),
238 |                 4,
239 |                 true,
240 |                 1,
241 |             ),
242 |             (
243 |                 vec![new_entry(5, 1)],
244 |                 5,
245 |                 Some(new_snapshot(4, 1)),
246 |                 3,
247 |                 false,
248 |                 0,
249 |             ),
250 |             (vec![], 5, Some(new_snapshot(4, 1)), 5, false, 0),
251 |             (vec![], 5, Some(new_snapshot(4, 1)), 4, true, 1),
252 |             (vec![], 0, None, 5, false, 0),
253 |         ];
254 |         for (i, (entries, offset, snapshot, index, w_ok, w_term)) in tests.iter().enumerate() {
255 |             let u = Unstable {
256 |                 snapshot: snapshot.clone(),
257 |                 entries: entries.clone(),
258 |                 offset: *offset,
259 |             };
260 |             match u.maybe_term(*index) {
261 |                 Some(i) => assert_eq!(i, *w_term),
262 |                 None => assert!(!*w_ok),
263 |             }
264 |         }
265 |     }
266 | 
267 |     #[test]
268 |     fn it_unstable_restore() {
269 |         let mut u = Unstable {
270 |             snapshot: Some(new_snapshot(4, 1)),
271 |             entries: vec![new_entry(5, 1)],
272 |             offset: 5,
273 |         };
274 |         let s = new_snapshot(6, 2);
275 |         u.restore(s.clone());
276 |         assert_eq!(u.offset, s.get_metadata().get_index() + 1);
277 |         assert!(u.entries.is_empty());
278 |         assert_eq!(u.snapshot.unwrap(), s);
279 |     }
280 | 
281 |     #[test]
282 |     fn it_unstable_stable_to() {
283 |         // (entries, offset, snapshot, index, term, w_offset, w_len)
284 |         let tests = vec![
285 |             (vec![], 0, None, 5, 1, 0, 0),
286 |             (vec![new_entry(5, 1)], 5, None, 5, 1, 6, 0), // stable to the first entry
287 |             (new_batch_entry(vec![(5, 1), (6, 1)]), 5, None, 5, 1, 6, 1), // stable to the first entry
288 |             (vec![new_entry(6, 2)], 6, None, 6, 1, 6, 1), // stable to the first entry and term mismatch
289 |             (vec![new_entry(5, 1)], 5, None, 4, 1, 5, 1), // stable to old entry
290 |             (vec![new_entry(5, 1)], 5, None, 4, 2, 5, 1), // stable to old entry
291 |             // with snapshot
292 |             (
293 |                 vec![new_entry(5, 1)],
294 |                 5,
295 |                 Some(new_snapshot(4, 1)),
296 |                 5,
297 |                 1,
298 |                 6,
299 |                 0,
300 |             ), // stable to the first entry
301 |             (
302 |                 new_batch_entry(vec![(5, 1), (6, 1)]),
303 |                 5,
304 |                 Some(new_snapshot(4, 1)),
305 |                 5,
306 |                 1,
307 |                 6,
308 |                 1,
309 |             ), // stable to the first entry
310 |             (
311 |                 vec![new_entry(6, 2)],
312 |                 6,
313 |                 Some(new_snapshot(5, 1)),
314 |                 6,
315 |                 1,
316 |                 6,
317 |                 1,
318 |             ), // stable to the first entry and term mismatch
319 |             (
320 |                 vec![new_entry(5, 1)],
321 |                 5,
322 |                 Some(new_snapshot(4, 1)),
323 |                 4,
324 |                 1,
325 |                 5,
326 |                 1,
327 |             ), // stable to snapshot
328 |             (
329 |                 vec![new_entry(5, 2)],
330 |                 5,
331 |                 Some(new_snapshot(4, 2)),
332 |                 4,
333 |                 1,
334 |                 5,
335 |                 1,
336 |             ), // stable to old entry
337 |         ];
338 |         for (i, (entries, offset, snapshot, index, term, w_offset, w_len)) in
339 |         tests.iter().enumerate()
340 |         {
341 |             let mut u = Unstable {
342 |                 snapshot: snapshot.clone(),
343 |                 entries: entries.clone(),
344 |                 offset: *offset,
345 |             };
346 |             u.stable_to(*index, *term);
347 |             assert_eq!(u.offset, *w_offset);
348 |             assert_eq!(u.entries.len(), *w_len);
349 |         }
350 |     }
351 | 
352 |     #[test]
353 |     fn it_unstable_stable_truncate_and_append() {
354 |         // (entries, offset, snapshot, to_append, w_offset, w_entries)
355 |         let tests: Vec<(_, _, Option<Snapshot>, _, _, _)> = vec![
356 |             // append to the end
357 |             (
358 |                 vec![new_entry(5, 1)],
359 |                 5,
360 |                 None,
361 |                 new_batch_entry(vec![(6, 1), (7, 1)]),
362 |                 5,
363 |                 new_batch_entry(vec![(5, 1), (6, 1), (7, 1)]),
364 |             ),
365 |             // replace the unstable entries
366 |             (
367 |                 vec![new_entry(5, 1)],
368 |                 5,
369 |                 None,
370 |                 new_batch_entry(vec![(5, 2), (6, 2)]),
371 |                 5,
372 |                 new_batch_entry(vec![(5, 2), (6, 2)]),
373 |             ),
374 |             (
375 |                 vec![new_entry(5, 1)],
376 |                 5,
377 |                 None,
378 |                 new_batch_entry(vec![(4, 2), (5, 2), (6, 2)]),
379 |                 4,
380 |                 new_batch_entry(vec![(4, 2), (5, 2), (6, 2)]),
381 |             ),
382 |             // truncate the existing entries and append
383 |             (
384 |                 new_batch_entry(vec![(5, 1), (6, 1), (7, 1)]),
385 |                 5,
386 |                 None,
387 |                 new_batch_entry(vec![(6, 2)]),
388 |                 5,
389 |                 new_batch_entry(vec![(5, 1), (6, 2)]),
390 |             ),
391 |             (
392 |                 new_batch_entry(vec![(5, 1), (6, 1), (7, 1)]),
393 |                 5,
394 |                 None,
395 |                 new_batch_entry(vec![(7, 2), (8, 2)]),
396 |                 5,
397 |                 new_batch_entry(vec![(5, 1), (6, 1), (7, 2), (8, 2)]),
398 |             ),
399 |         ];
400 | 
401 |         for (entries, offset, snapshot, to_append, w_offset, w_entries) in tests {
402 |             let mut u = Unstable {
403 |                 snapshot,
404 |                 entries,
405 |                 offset,
406 |             };
407 |             u.truncate_and_append(to_append.as_slice());
408 |             assert_eq!(u.offset, w_offset);
409 |             assert_eq!(u.entries, w_entries);
410 |         }
411 |     }
412 | 
413 |     fn new_entry(index: u64, term: u64) -> Entry {
414 |         let mut entry = Entry::new();
415 |         entry.set_Term(term);
416 |         entry.set_Index(index);
417 |         entry
418 |     }
419 | 
420 |     fn new_batch_entry(batch: Vec<(u64, u64)>) -> Vec<Entry> {
421 |         batch
422 |             .iter()
423 |             .map(|(index, term)| new_entry(*index, *term))
424 |             .collect()
425 |     }
426 | 
427 |     fn new_snapshot(index: u64, term: u64) -> Snapshot {
428 |         let mut snapshot = Snapshot::new();
429 |         snapshot.mut_metadata().set_index(index);
430 |         snapshot.mut_metadata().set_term(term);
431 |         snapshot
432 |     }
433 | }
434 | 


--------------------------------------------------------------------------------
/src/util/mod.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 The etcd Authors
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | use crate::raftpb::raft::{Entry, HardState, MessageType};
16 | use protobuf::Message;
17 | 
18 | pub fn is_local_message(msg_type: MessageType) -> bool {
19 |     msg_type == MessageType::MsgHup
20 |         || msg_type == MessageType::MsgBeat
21 |         || msg_type == MessageType::MsgUnreachable
22 |         || msg_type == MessageType::MsgSnapStatus
23 |         || msg_type == MessageType::MsgCheckQuorum
24 | }
25 | 
26 | // TODO: add more information
27 | pub fn is_response_message(msg_type: MessageType) -> bool {
28 |     msg_type == MessageType::MsgAppResp
29 |         || msg_type == MessageType::MsgVoteResp
30 |         || msg_type == MessageType::MsgHeartbeatResp
31 |         || msg_type == MessageType::MsgUnreachable
32 |         || msg_type == MessageType::MsgPreVoteResp
33 | }
34 | 
35 | // TODO:
36 | pub fn is_hard_state_equal(a: &HardState, b: &HardState) -> bool {
37 |     a.get_term() == b.get_term() && a.get_vote() == b.get_vote() || a.get_commit() == b.get_commit()
38 | }
39 | 
40 | // [0..max_size]
41 | pub fn limit_size(ents: Vec<Entry>, max_size: u64) -> Vec<Entry> {
42 |     if ents.is_empty() {
43 |         return vec![];
44 |     }
45 |     let mut size = ents[0].compute_size() as u64;
46 |     let mut limit = 1;
47 |     while limit < ents.len() {
48 |         size += ents[limit].compute_size() as u64;
49 |         if size > max_size {
50 |             break;
51 |         }
52 |         limit += 1;
53 |     }
54 |     ents[..limit].to_vec()
55 | }
56 | 
57 | pub fn vote_resp_msg_type(msgt: MessageType) -> MessageType {
58 |     match msgt {
59 |         MessageType::MsgVote => MessageType::MsgVoteResp,
60 |         MessageType::MsgPreVote => MessageType::MsgPreVoteResp,
61 |         _ => panic!("not a vote message: {:?}", msgt),
62 |     }
63 | }
64 | 


--------------------------------------------------------------------------------