├── .github
└── workflows
│ ├── gh-pages.yml
│ └── test.yml
├── .gitignore
├── Cargo.toml
├── LICENSE
├── README.md
├── examples
├── README.md
├── raftcat.rs
├── simple.rs
└── threaded.rs
├── src
├── core.rs
├── lib.rs
├── log.rs
├── log
│ ├── mem.rs
│ └── tests.rs
├── macros.rs
├── message.rs
├── node.rs
├── prelude.rs
└── raft.proto
└── tests
├── commit.rs
├── common.rs
├── leader.rs
├── term.rs
└── voting.rs
/.github/workflows/gh-pages.yml:
--------------------------------------------------------------------------------
1 | name: gh-pages
2 | on:
3 | push:
4 | branches:
5 | - master
6 |
7 | defaults:
8 | run:
9 | shell: bash
10 | jobs:
11 | gh-pages:
12 | name: gh-pages
13 | runs-on: ubuntu-latest
14 | steps:
15 | - name: checkout
16 | uses: actions/checkout@v2
17 |
18 | - name: cargo doc
19 | run: cargo doc --verbose --lib --no-deps
20 |
21 | - name: force push docs to gh-pages branch
22 | run: |
23 | git config user.name "Github CI -- gh-pages"
24 | git config user.email "<>"
25 |
26 | mv target/doc/ docs/
27 | git add docs
28 | git commit -m "render rustdocs"
29 | git push -f origin HEAD:gh-pages
30 |
--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | name: test
2 | on: [push]
3 |
4 | defaults:
5 | run:
6 | shell: bash
7 | jobs:
8 | build-and-test-release:
9 | name: build-and-test-release
10 | runs-on: ubuntu-latest
11 | steps:
12 | - name: checkout
13 | uses: actions/checkout@v2
14 | with:
15 | fetch-depth: 1
16 |
17 | - name: cargo build --release
18 | run: cargo build --release --verbose
19 |
20 | - name: cargo test --release
21 | run: cargo test --release --verbose
22 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | Cargo.lock
3 |
--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "simple-raft"
3 | version = "0.2.0"
4 | edition = "2018"
5 |
6 | description = "A minimal implementation of the Raft consensus algorithm"
7 | license = "AGPL-3.0-or-later"
8 | repository = "https://github.com/simple-raft-rs/raft-rs"
9 | categories = ["database-implementations", "no-std"]
10 | keywords = ["raft", "no_std"]
11 |
12 | exclude = [".github/"]
13 |
14 | [features]
15 | default = ["prost"]
16 |
17 | [dependencies]
18 | bytes = { version = "1.0", default-features = false, features = [] }
19 | log = "0.4"
20 | prost = { version = "0.7", default-features = false, features = ["prost-derive"], optional = true }
21 | rand_core = { version = "0.6", default-features = false, features = [] }
22 |
23 | [dev-dependencies]
24 | derive_more = "0.99"
25 | env_logger = { version = "0.8", default_features = false, features = [] }
26 | itertools = "0.10"
27 | rand = "0.8"
28 | rand_chacha = "0.3"
29 |
30 | [[example]]
31 | name = "simple"
32 | test = true
33 |
34 | [[example]]
35 | name = "threaded"
36 | test = true
37 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU AFFERO GENERAL PUBLIC LICENSE
2 | Version 3, 19 November 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU Affero General Public License is a free, copyleft license for
11 | software and other kinds of works, specifically designed to ensure
12 | cooperation with the community in the case of network server software.
13 |
14 | The licenses for most software and other practical works are designed
15 | to take away your freedom to share and change the works. By contrast,
16 | our General Public Licenses are intended to guarantee your freedom to
17 | share and change all versions of a program--to make sure it remains free
18 | software for all its users.
19 |
20 | When we speak of free software, we are referring to freedom, not
21 | price. Our General Public Licenses are designed to make sure that you
22 | have the freedom to distribute copies of free software (and charge for
23 | them if you wish), that you receive source code or can get it if you
24 | want it, that you can change the software or use pieces of it in new
25 | free programs, and that you know you can do these things.
26 |
27 | Developers that use our General Public Licenses protect your rights
28 | with two steps: (1) assert copyright on the software, and (2) offer
29 | you this License which gives you legal permission to copy, distribute
30 | and/or modify the software.
31 |
32 | A secondary benefit of defending all users' freedom is that
33 | improvements made in alternate versions of the program, if they
34 | receive widespread use, become available for other developers to
35 | incorporate. Many developers of free software are heartened and
36 | encouraged by the resulting cooperation. However, in the case of
37 | software used on network servers, this result may fail to come about.
38 | The GNU General Public License permits making a modified version and
39 | letting the public access it on a server without ever releasing its
40 | source code to the public.
41 |
42 | The GNU Affero General Public License is designed specifically to
43 | ensure that, in such cases, the modified source code becomes available
44 | to the community. It requires the operator of a network server to
45 | provide the source code of the modified version running there to the
46 | users of that server. Therefore, public use of a modified version, on
47 | a publicly accessible server, gives the public access to the source
48 | code of the modified version.
49 |
50 | An older license, called the Affero General Public License and
51 | published by Affero, was designed to accomplish similar goals. This is
52 | a different license, not a version of the Affero GPL, but Affero has
53 | released a new version of the Affero GPL which permits relicensing under
54 | this license.
55 |
56 | The precise terms and conditions for copying, distribution and
57 | modification follow.
58 |
59 | TERMS AND CONDITIONS
60 |
61 | 0. Definitions.
62 |
63 | "This License" refers to version 3 of the GNU Affero General Public License.
64 |
65 | "Copyright" also means copyright-like laws that apply to other kinds of
66 | works, such as semiconductor masks.
67 |
68 | "The Program" refers to any copyrightable work licensed under this
69 | License. Each licensee is addressed as "you". "Licensees" and
70 | "recipients" may be individuals or organizations.
71 |
72 | To "modify" a work means to copy from or adapt all or part of the work
73 | in a fashion requiring copyright permission, other than the making of an
74 | exact copy. The resulting work is called a "modified version" of the
75 | earlier work or a work "based on" the earlier work.
76 |
77 | A "covered work" means either the unmodified Program or a work based
78 | on the Program.
79 |
80 | To "propagate" a work means to do anything with it that, without
81 | permission, would make you directly or secondarily liable for
82 | infringement under applicable copyright law, except executing it on a
83 | computer or modifying a private copy. Propagation includes copying,
84 | distribution (with or without modification), making available to the
85 | public, and in some countries other activities as well.
86 |
87 | To "convey" a work means any kind of propagation that enables other
88 | parties to make or receive copies. Mere interaction with a user through
89 | a computer network, with no transfer of a copy, is not conveying.
90 |
91 | An interactive user interface displays "Appropriate Legal Notices"
92 | to the extent that it includes a convenient and prominently visible
93 | feature that (1) displays an appropriate copyright notice, and (2)
94 | tells the user that there is no warranty for the work (except to the
95 | extent that warranties are provided), that licensees may convey the
96 | work under this License, and how to view a copy of this License. If
97 | the interface presents a list of user commands or options, such as a
98 | menu, a prominent item in the list meets this criterion.
99 |
100 | 1. Source Code.
101 |
102 | The "source code" for a work means the preferred form of the work
103 | for making modifications to it. "Object code" means any non-source
104 | form of a work.
105 |
106 | A "Standard Interface" means an interface that either is an official
107 | standard defined by a recognized standards body, or, in the case of
108 | interfaces specified for a particular programming language, one that
109 | is widely used among developers working in that language.
110 |
111 | The "System Libraries" of an executable work include anything, other
112 | than the work as a whole, that (a) is included in the normal form of
113 | packaging a Major Component, but which is not part of that Major
114 | Component, and (b) serves only to enable use of the work with that
115 | Major Component, or to implement a Standard Interface for which an
116 | implementation is available to the public in source code form. A
117 | "Major Component", in this context, means a major essential component
118 | (kernel, window system, and so on) of the specific operating system
119 | (if any) on which the executable work runs, or a compiler used to
120 | produce the work, or an object code interpreter used to run it.
121 |
122 | The "Corresponding Source" for a work in object code form means all
123 | the source code needed to generate, install, and (for an executable
124 | work) run the object code and to modify the work, including scripts to
125 | control those activities. However, it does not include the work's
126 | System Libraries, or general-purpose tools or generally available free
127 | programs which are used unmodified in performing those activities but
128 | which are not part of the work. For example, Corresponding Source
129 | includes interface definition files associated with source files for
130 | the work, and the source code for shared libraries and dynamically
131 | linked subprograms that the work is specifically designed to require,
132 | such as by intimate data communication or control flow between those
133 | subprograms and other parts of the work.
134 |
135 | The Corresponding Source need not include anything that users
136 | can regenerate automatically from other parts of the Corresponding
137 | Source.
138 |
139 | The Corresponding Source for a work in source code form is that
140 | same work.
141 |
142 | 2. Basic Permissions.
143 |
144 | All rights granted under this License are granted for the term of
145 | copyright on the Program, and are irrevocable provided the stated
146 | conditions are met. This License explicitly affirms your unlimited
147 | permission to run the unmodified Program. The output from running a
148 | covered work is covered by this License only if the output, given its
149 | content, constitutes a covered work. This License acknowledges your
150 | rights of fair use or other equivalent, as provided by copyright law.
151 |
152 | You may make, run and propagate covered works that you do not
153 | convey, without conditions so long as your license otherwise remains
154 | in force. You may convey covered works to others for the sole purpose
155 | of having them make modifications exclusively for you, or provide you
156 | with facilities for running those works, provided that you comply with
157 | the terms of this License in conveying all material for which you do
158 | not control copyright. Those thus making or running the covered works
159 | for you must do so exclusively on your behalf, under your direction
160 | and control, on terms that prohibit them from making any copies of
161 | your copyrighted material outside their relationship with you.
162 |
163 | Conveying under any other circumstances is permitted solely under
164 | the conditions stated below. Sublicensing is not allowed; section 10
165 | makes it unnecessary.
166 |
167 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
168 |
169 | No covered work shall be deemed part of an effective technological
170 | measure under any applicable law fulfilling obligations under article
171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
172 | similar laws prohibiting or restricting circumvention of such
173 | measures.
174 |
175 | When you convey a covered work, you waive any legal power to forbid
176 | circumvention of technological measures to the extent such circumvention
177 | is effected by exercising rights under this License with respect to
178 | the covered work, and you disclaim any intention to limit operation or
179 | modification of the work as a means of enforcing, against the work's
180 | users, your or third parties' legal rights to forbid circumvention of
181 | technological measures.
182 |
183 | 4. Conveying Verbatim Copies.
184 |
185 | You may convey verbatim copies of the Program's source code as you
186 | receive it, in any medium, provided that you conspicuously and
187 | appropriately publish on each copy an appropriate copyright notice;
188 | keep intact all notices stating that this License and any
189 | non-permissive terms added in accord with section 7 apply to the code;
190 | keep intact all notices of the absence of any warranty; and give all
191 | recipients a copy of this License along with the Program.
192 |
193 | You may charge any price or no price for each copy that you convey,
194 | and you may offer support or warranty protection for a fee.
195 |
196 | 5. Conveying Modified Source Versions.
197 |
198 | You may convey a work based on the Program, or the modifications to
199 | produce it from the Program, in the form of source code under the
200 | terms of section 4, provided that you also meet all of these conditions:
201 |
202 | a) The work must carry prominent notices stating that you modified
203 | it, and giving a relevant date.
204 |
205 | b) The work must carry prominent notices stating that it is
206 | released under this License and any conditions added under section
207 | 7. This requirement modifies the requirement in section 4 to
208 | "keep intact all notices".
209 |
210 | c) You must license the entire work, as a whole, under this
211 | License to anyone who comes into possession of a copy. This
212 | License will therefore apply, along with any applicable section 7
213 | additional terms, to the whole of the work, and all its parts,
214 | regardless of how they are packaged. This License gives no
215 | permission to license the work in any other way, but it does not
216 | invalidate such permission if you have separately received it.
217 |
218 | d) If the work has interactive user interfaces, each must display
219 | Appropriate Legal Notices; however, if the Program has interactive
220 | interfaces that do not display Appropriate Legal Notices, your
221 | work need not make them do so.
222 |
223 | A compilation of a covered work with other separate and independent
224 | works, which are not by their nature extensions of the covered work,
225 | and which are not combined with it such as to form a larger program,
226 | in or on a volume of a storage or distribution medium, is called an
227 | "aggregate" if the compilation and its resulting copyright are not
228 | used to limit the access or legal rights of the compilation's users
229 | beyond what the individual works permit. Inclusion of a covered work
230 | in an aggregate does not cause this License to apply to the other
231 | parts of the aggregate.
232 |
233 | 6. Conveying Non-Source Forms.
234 |
235 | You may convey a covered work in object code form under the terms
236 | of sections 4 and 5, provided that you also convey the
237 | machine-readable Corresponding Source under the terms of this License,
238 | in one of these ways:
239 |
240 | a) Convey the object code in, or embodied in, a physical product
241 | (including a physical distribution medium), accompanied by the
242 | Corresponding Source fixed on a durable physical medium
243 | customarily used for software interchange.
244 |
245 | b) Convey the object code in, or embodied in, a physical product
246 | (including a physical distribution medium), accompanied by a
247 | written offer, valid for at least three years and valid for as
248 | long as you offer spare parts or customer support for that product
249 | model, to give anyone who possesses the object code either (1) a
250 | copy of the Corresponding Source for all the software in the
251 | product that is covered by this License, on a durable physical
252 | medium customarily used for software interchange, for a price no
253 | more than your reasonable cost of physically performing this
254 | conveying of source, or (2) access to copy the
255 | Corresponding Source from a network server at no charge.
256 |
257 | c) Convey individual copies of the object code with a copy of the
258 | written offer to provide the Corresponding Source. This
259 | alternative is allowed only occasionally and noncommercially, and
260 | only if you received the object code with such an offer, in accord
261 | with subsection 6b.
262 |
263 | d) Convey the object code by offering access from a designated
264 | place (gratis or for a charge), and offer equivalent access to the
265 | Corresponding Source in the same way through the same place at no
266 | further charge. You need not require recipients to copy the
267 | Corresponding Source along with the object code. If the place to
268 | copy the object code is a network server, the Corresponding Source
269 | may be on a different server (operated by you or a third party)
270 | that supports equivalent copying facilities, provided you maintain
271 | clear directions next to the object code saying where to find the
272 | Corresponding Source. Regardless of what server hosts the
273 | Corresponding Source, you remain obligated to ensure that it is
274 | available for as long as needed to satisfy these requirements.
275 |
276 | e) Convey the object code using peer-to-peer transmission, provided
277 | you inform other peers where the object code and Corresponding
278 | Source of the work are being offered to the general public at no
279 | charge under subsection 6d.
280 |
281 | A separable portion of the object code, whose source code is excluded
282 | from the Corresponding Source as a System Library, need not be
283 | included in conveying the object code work.
284 |
285 | A "User Product" is either (1) a "consumer product", which means any
286 | tangible personal property which is normally used for personal, family,
287 | or household purposes, or (2) anything designed or sold for incorporation
288 | into a dwelling. In determining whether a product is a consumer product,
289 | doubtful cases shall be resolved in favor of coverage. For a particular
290 | product received by a particular user, "normally used" refers to a
291 | typical or common use of that class of product, regardless of the status
292 | of the particular user or of the way in which the particular user
293 | actually uses, or expects or is expected to use, the product. A product
294 | is a consumer product regardless of whether the product has substantial
295 | commercial, industrial or non-consumer uses, unless such uses represent
296 | the only significant mode of use of the product.
297 |
298 | "Installation Information" for a User Product means any methods,
299 | procedures, authorization keys, or other information required to install
300 | and execute modified versions of a covered work in that User Product from
301 | a modified version of its Corresponding Source. The information must
302 | suffice to ensure that the continued functioning of the modified object
303 | code is in no case prevented or interfered with solely because
304 | modification has been made.
305 |
306 | If you convey an object code work under this section in, or with, or
307 | specifically for use in, a User Product, and the conveying occurs as
308 | part of a transaction in which the right of possession and use of the
309 | User Product is transferred to the recipient in perpetuity or for a
310 | fixed term (regardless of how the transaction is characterized), the
311 | Corresponding Source conveyed under this section must be accompanied
312 | by the Installation Information. But this requirement does not apply
313 | if neither you nor any third party retains the ability to install
314 | modified object code on the User Product (for example, the work has
315 | been installed in ROM).
316 |
317 | The requirement to provide Installation Information does not include a
318 | requirement to continue to provide support service, warranty, or updates
319 | for a work that has been modified or installed by the recipient, or for
320 | the User Product in which it has been modified or installed. Access to a
321 | network may be denied when the modification itself materially and
322 | adversely affects the operation of the network or violates the rules and
323 | protocols for communication across the network.
324 |
325 | Corresponding Source conveyed, and Installation Information provided,
326 | in accord with this section must be in a format that is publicly
327 | documented (and with an implementation available to the public in
328 | source code form), and must require no special password or key for
329 | unpacking, reading or copying.
330 |
331 | 7. Additional Terms.
332 |
333 | "Additional permissions" are terms that supplement the terms of this
334 | License by making exceptions from one or more of its conditions.
335 | Additional permissions that are applicable to the entire Program shall
336 | be treated as though they were included in this License, to the extent
337 | that they are valid under applicable law. If additional permissions
338 | apply only to part of the Program, that part may be used separately
339 | under those permissions, but the entire Program remains governed by
340 | this License without regard to the additional permissions.
341 |
342 | When you convey a copy of a covered work, you may at your option
343 | remove any additional permissions from that copy, or from any part of
344 | it. (Additional permissions may be written to require their own
345 | removal in certain cases when you modify the work.) You may place
346 | additional permissions on material, added by you to a covered work,
347 | for which you have or can give appropriate copyright permission.
348 |
349 | Notwithstanding any other provision of this License, for material you
350 | add to a covered work, you may (if authorized by the copyright holders of
351 | that material) supplement the terms of this License with terms:
352 |
353 | a) Disclaiming warranty or limiting liability differently from the
354 | terms of sections 15 and 16 of this License; or
355 |
356 | b) Requiring preservation of specified reasonable legal notices or
357 | author attributions in that material or in the Appropriate Legal
358 | Notices displayed by works containing it; or
359 |
360 | c) Prohibiting misrepresentation of the origin of that material, or
361 | requiring that modified versions of such material be marked in
362 | reasonable ways as different from the original version; or
363 |
364 | d) Limiting the use for publicity purposes of names of licensors or
365 | authors of the material; or
366 |
367 | e) Declining to grant rights under trademark law for use of some
368 | trade names, trademarks, or service marks; or
369 |
370 | f) Requiring indemnification of licensors and authors of that
371 | material by anyone who conveys the material (or modified versions of
372 | it) with contractual assumptions of liability to the recipient, for
373 | any liability that these contractual assumptions directly impose on
374 | those licensors and authors.
375 |
376 | All other non-permissive additional terms are considered "further
377 | restrictions" within the meaning of section 10. If the Program as you
378 | received it, or any part of it, contains a notice stating that it is
379 | governed by this License along with a term that is a further
380 | restriction, you may remove that term. If a license document contains
381 | a further restriction but permits relicensing or conveying under this
382 | License, you may add to a covered work material governed by the terms
383 | of that license document, provided that the further restriction does
384 | not survive such relicensing or conveying.
385 |
386 | If you add terms to a covered work in accord with this section, you
387 | must place, in the relevant source files, a statement of the
388 | additional terms that apply to those files, or a notice indicating
389 | where to find the applicable terms.
390 |
391 | Additional terms, permissive or non-permissive, may be stated in the
392 | form of a separately written license, or stated as exceptions;
393 | the above requirements apply either way.
394 |
395 | 8. Termination.
396 |
397 | You may not propagate or modify a covered work except as expressly
398 | provided under this License. Any attempt otherwise to propagate or
399 | modify it is void, and will automatically terminate your rights under
400 | this License (including any patent licenses granted under the third
401 | paragraph of section 11).
402 |
403 | However, if you cease all violation of this License, then your
404 | license from a particular copyright holder is reinstated (a)
405 | provisionally, unless and until the copyright holder explicitly and
406 | finally terminates your license, and (b) permanently, if the copyright
407 | holder fails to notify you of the violation by some reasonable means
408 | prior to 60 days after the cessation.
409 |
410 | Moreover, your license from a particular copyright holder is
411 | reinstated permanently if the copyright holder notifies you of the
412 | violation by some reasonable means, this is the first time you have
413 | received notice of violation of this License (for any work) from that
414 | copyright holder, and you cure the violation prior to 30 days after
415 | your receipt of the notice.
416 |
417 | Termination of your rights under this section does not terminate the
418 | licenses of parties who have received copies or rights from you under
419 | this License. If your rights have been terminated and not permanently
420 | reinstated, you do not qualify to receive new licenses for the same
421 | material under section 10.
422 |
423 | 9. Acceptance Not Required for Having Copies.
424 |
425 | You are not required to accept this License in order to receive or
426 | run a copy of the Program. Ancillary propagation of a covered work
427 | occurring solely as a consequence of using peer-to-peer transmission
428 | to receive a copy likewise does not require acceptance. However,
429 | nothing other than this License grants you permission to propagate or
430 | modify any covered work. These actions infringe copyright if you do
431 | not accept this License. Therefore, by modifying or propagating a
432 | covered work, you indicate your acceptance of this License to do so.
433 |
434 | 10. Automatic Licensing of Downstream Recipients.
435 |
436 | Each time you convey a covered work, the recipient automatically
437 | receives a license from the original licensors, to run, modify and
438 | propagate that work, subject to this License. You are not responsible
439 | for enforcing compliance by third parties with this License.
440 |
441 | An "entity transaction" is a transaction transferring control of an
442 | organization, or substantially all assets of one, or subdividing an
443 | organization, or merging organizations. If propagation of a covered
444 | work results from an entity transaction, each party to that
445 | transaction who receives a copy of the work also receives whatever
446 | licenses to the work the party's predecessor in interest had or could
447 | give under the previous paragraph, plus a right to possession of the
448 | Corresponding Source of the work from the predecessor in interest, if
449 | the predecessor has it or can get it with reasonable efforts.
450 |
451 | You may not impose any further restrictions on the exercise of the
452 | rights granted or affirmed under this License. For example, you may
453 | not impose a license fee, royalty, or other charge for exercise of
454 | rights granted under this License, and you may not initiate litigation
455 | (including a cross-claim or counterclaim in a lawsuit) alleging that
456 | any patent claim is infringed by making, using, selling, offering for
457 | sale, or importing the Program or any portion of it.
458 |
459 | 11. Patents.
460 |
461 | A "contributor" is a copyright holder who authorizes use under this
462 | License of the Program or a work on which the Program is based. The
463 | work thus licensed is called the contributor's "contributor version".
464 |
465 | A contributor's "essential patent claims" are all patent claims
466 | owned or controlled by the contributor, whether already acquired or
467 | hereafter acquired, that would be infringed by some manner, permitted
468 | by this License, of making, using, or selling its contributor version,
469 | but do not include claims that would be infringed only as a
470 | consequence of further modification of the contributor version. For
471 | purposes of this definition, "control" includes the right to grant
472 | patent sublicenses in a manner consistent with the requirements of
473 | this License.
474 |
475 | Each contributor grants you a non-exclusive, worldwide, royalty-free
476 | patent license under the contributor's essential patent claims, to
477 | make, use, sell, offer for sale, import and otherwise run, modify and
478 | propagate the contents of its contributor version.
479 |
480 | In the following three paragraphs, a "patent license" is any express
481 | agreement or commitment, however denominated, not to enforce a patent
482 | (such as an express permission to practice a patent or covenant not to
483 | sue for patent infringement). To "grant" such a patent license to a
484 | party means to make such an agreement or commitment not to enforce a
485 | patent against the party.
486 |
487 | If you convey a covered work, knowingly relying on a patent license,
488 | and the Corresponding Source of the work is not available for anyone
489 | to copy, free of charge and under the terms of this License, through a
490 | publicly available network server or other readily accessible means,
491 | then you must either (1) cause the Corresponding Source to be so
492 | available, or (2) arrange to deprive yourself of the benefit of the
493 | patent license for this particular work, or (3) arrange, in a manner
494 | consistent with the requirements of this License, to extend the patent
495 | license to downstream recipients. "Knowingly relying" means you have
496 | actual knowledge that, but for the patent license, your conveying the
497 | covered work in a country, or your recipient's use of the covered work
498 | in a country, would infringe one or more identifiable patents in that
499 | country that you have reason to believe are valid.
500 |
501 | If, pursuant to or in connection with a single transaction or
502 | arrangement, you convey, or propagate by procuring conveyance of, a
503 | covered work, and grant a patent license to some of the parties
504 | receiving the covered work authorizing them to use, propagate, modify
505 | or convey a specific copy of the covered work, then the patent license
506 | you grant is automatically extended to all recipients of the covered
507 | work and works based on it.
508 |
509 | A patent license is "discriminatory" if it does not include within
510 | the scope of its coverage, prohibits the exercise of, or is
511 | conditioned on the non-exercise of one or more of the rights that are
512 | specifically granted under this License. You may not convey a covered
513 | work if you are a party to an arrangement with a third party that is
514 | in the business of distributing software, under which you make payment
515 | to the third party based on the extent of your activity of conveying
516 | the work, and under which the third party grants, to any of the
517 | parties who would receive the covered work from you, a discriminatory
518 | patent license (a) in connection with copies of the covered work
519 | conveyed by you (or copies made from those copies), or (b) primarily
520 | for and in connection with specific products or compilations that
521 | contain the covered work, unless you entered into that arrangement,
522 | or that patent license was granted, prior to 28 March 2007.
523 |
524 | Nothing in this License shall be construed as excluding or limiting
525 | any implied license or other defenses to infringement that may
526 | otherwise be available to you under applicable patent law.
527 |
528 | 12. No Surrender of Others' Freedom.
529 |
530 | If conditions are imposed on you (whether by court order, agreement or
531 | otherwise) that contradict the conditions of this License, they do not
532 | excuse you from the conditions of this License. If you cannot convey a
533 | covered work so as to satisfy simultaneously your obligations under this
534 | License and any other pertinent obligations, then as a consequence you may
535 | not convey it at all. For example, if you agree to terms that obligate you
536 | to collect a royalty for further conveying from those to whom you convey
537 | the Program, the only way you could satisfy both those terms and this
538 | License would be to refrain entirely from conveying the Program.
539 |
540 | 13. Remote Network Interaction; Use with the GNU General Public License.
541 |
542 | Notwithstanding any other provision of this License, if you modify the
543 | Program, your modified version must prominently offer all users
544 | interacting with it remotely through a computer network (if your version
545 | supports such interaction) an opportunity to receive the Corresponding
546 | Source of your version by providing access to the Corresponding Source
547 | from a network server at no charge, through some standard or customary
548 | means of facilitating copying of software. This Corresponding Source
549 | shall include the Corresponding Source for any work covered by version 3
550 | of the GNU General Public License that is incorporated pursuant to the
551 | following paragraph.
552 |
553 | Notwithstanding any other provision of this License, you have
554 | permission to link or combine any covered work with a work licensed
555 | under version 3 of the GNU General Public License into a single
556 | combined work, and to convey the resulting work. The terms of this
557 | License will continue to apply to the part which is the covered work,
558 | but the work with which it is combined will remain governed by version
559 | 3 of the GNU General Public License.
560 |
561 | 14. Revised Versions of this License.
562 |
563 | The Free Software Foundation may publish revised and/or new versions of
564 | the GNU Affero General Public License from time to time. Such new versions
565 | will be similar in spirit to the present version, but may differ in detail to
566 | address new problems or concerns.
567 |
568 | Each version is given a distinguishing version number. If the
569 | Program specifies that a certain numbered version of the GNU Affero General
570 | Public License "or any later version" applies to it, you have the
571 | option of following the terms and conditions either of that numbered
572 | version or of any later version published by the Free Software
573 | Foundation. If the Program does not specify a version number of the
574 | GNU Affero General Public License, you may choose any version ever published
575 | by the Free Software Foundation.
576 |
577 | If the Program specifies that a proxy can decide which future
578 | versions of the GNU Affero General Public License can be used, that proxy's
579 | public statement of acceptance of a version permanently authorizes you
580 | to choose that version for the Program.
581 |
582 | Later license versions may give you additional or different
583 | permissions. However, no additional obligations are imposed on any
584 | author or copyright holder as a result of your choosing to follow a
585 | later version.
586 |
587 | 15. Disclaimer of Warranty.
588 |
589 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
590 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
594 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
595 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
597 |
598 | 16. Limitation of Liability.
599 |
600 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
608 | SUCH DAMAGES.
609 |
610 | 17. Interpretation of Sections 15 and 16.
611 |
612 | If the disclaimer of warranty and limitation of liability provided
613 | above cannot be given local legal effect according to their terms,
614 | reviewing courts shall apply local law that most closely approximates
615 | an absolute waiver of all civil liability in connection with the
616 | Program, unless a warranty or assumption of liability accompanies a
617 | copy of the Program in return for a fee.
618 |
619 | END OF TERMS AND CONDITIONS
620 |
621 | How to Apply These Terms to Your New Programs
622 |
623 | If you develop a new program, and you want it to be of the greatest
624 | possible use to the public, the best way to achieve this is to make it
625 | free software which everyone can redistribute and change under these terms.
626 |
627 | To do so, attach the following notices to the program. It is safest
628 | to attach them to the start of each source file to most effectively
629 | state the exclusion of warranty; and each file should have at least
630 | the "copyright" line and a pointer to where the full notice is found.
631 |
632 |
633 | Copyright (C)
634 |
635 | This program is free software: you can redistribute it and/or modify
636 | it under the terms of the GNU Affero General Public License as published by
637 | the Free Software Foundation, either version 3 of the License, or
638 | (at your option) any later version.
639 |
640 | This program is distributed in the hope that it will be useful,
641 | but WITHOUT ANY WARRANTY; without even the implied warranty of
642 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
643 | GNU Affero General Public License for more details.
644 |
645 | You should have received a copy of the GNU Affero General Public License
646 | along with this program. If not, see .
647 |
648 | Also add information on how to contact you by electronic and paper mail.
649 |
650 | If your software can interact with users remotely through a computer
651 | network, you should also make sure that it provides a way for users to
652 | get its source. For example, if your program is a web application, its
653 | interface could display a "Source" link that leads users to an archive
654 | of the code. There are many ways you could offer source, and different
655 | solutions will be better for different programs; see section 13 for the
656 | specific requirements.
657 |
658 | You should also get your employer (if you work as a programmer) or school,
659 | if any, to sign a "copyright disclaimer" for the program, if necessary.
660 | For more information on this, and how to apply and follow the GNU AGPL, see
661 | .
662 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Raft
2 |
3 | A Rust implementation of the [Raft consensus algorithm](https://raft.github.io/), focused on:
4 |
5 | - Correctness and readability. The core implementation is [written](src/core.rs) alongside the [original Raft TLA+
6 | specification](https://github.com/ongardie/raft.tla) to aid auditability.
7 | - Simplicity. Some optional features described in Diego Ongaro's [Raft
8 | thesis](http://web.stanford.edu/~ouster/cgi-bin/papers/OngaroPhD.pdf) such as pre-voting, membership changes, and
9 | snapshots are currently not implemented.
10 | - Usability. A primary goal of the API to be simple and not error-prone.
11 |
12 | Important caveats:
13 |
14 | - Unicast message delivery is assumed to be non-lossy in order for replication to make progress. In other words, once a
15 | non-broadcast message is returned from an API, it must be retained and retransmitted until it is acknowledged as
16 | delivered by its destination. Messages may be safely delivered out-of-order or more than once, however. To prevent
17 | unbounded queueing, the API is designed to only ever return a bounded amount of unacknowledged unicast message data.
18 |
19 | This crate is `no_std`, but depends on the `alloc` crate.
20 |
21 | [API Documentation](https://simple-raft-rs.github.io/raft-rs/simple_raft)
22 | [Examples](examples)
23 |
24 | ## Crate Features
25 |
26 | This crate has the following optional features:
27 |
28 | - `prost` enables optional protobuf serialization of Raft messages. A corresponding [protobuf file](src/raft.proto) is
29 | also provided.
30 |
31 | ## License
32 |
33 | Copyright (C) 2019 Open Whisper Systems
34 | Copyright (C) 2021 jessa0
35 |
36 | This program is free software: you can redistribute it and/or modify
37 | it under the terms of the GNU Affero General Public License as published by
38 | the Free Software Foundation, either version 3 of the License, or
39 | (at your option) any later version.
40 |
41 | This program is distributed in the hope that it will be useful,
42 | but WITHOUT ANY WARRANTY; without even the implied warranty of
43 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 | GNU Affero General Public License for more details.
45 |
46 | You should have received a copy of the GNU Affero General Public License
47 | along with this program. If not, see .
48 |
--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
1 | # Examples
2 |
3 | * [Simple](simple.rs) -- A simple example from the crate-level documentation.
4 | * [Threaded](threaded.rs) -- A simple example with a thread per RaftNode.
5 | * [`raftcat`](raftcat.rs) -- A complex networked example as a command-line tool.
6 |
7 | ## `raftcat`
8 |
9 | `raftcat` is a command-line tool to run a networked Raft group over TCP. Lines from stdin are appended to the Raft log
10 | as log entries. Committed log entries are written to stdout. This is a toy example, so no retry is attempted on log
11 | appends, which in a database would normally be handled by the database client. This examples also does not persist
12 | state, so restarting a node may result in data loss or inconsistency.
13 |
--------------------------------------------------------------------------------
/examples/raftcat.rs:
--------------------------------------------------------------------------------
1 | //! A complex networked example as a command-line tool.
2 |
3 | use bytes::{BufMut, Bytes};
4 | use prost::Message;
5 | use rand_core::OsRng;
6 | use simple_raft::log::mem::RaftLogMemory;
7 | use simple_raft::node::{AppendError, RaftConfig, RaftNode};
8 | use simple_raft::message::{RaftMessage, RaftMessageDestination, SendableRaftMessage};
9 | use std::collections::{BTreeMap, BTreeSet};
10 | use std::error::Error;
11 | use std::io::{BufRead, BufReader, Read, Write};
12 | use std::net::{TcpListener, TcpStream};
13 | use std::sync::mpsc;
14 | use std::time::{Duration, Instant};
15 |
16 | const TICK_DURATION: Duration = Duration::from_millis(50);
17 | const RAFT_LOG_CAPACITY: usize = 100 * 1024 * 1024;
18 | const RAFT_CONFIG: RaftConfig = RaftConfig {
19 | election_timeout_ticks: 10,
20 | heartbeat_interval_ticks: 5,
21 | replication_chunk_size: 65536,
22 | };
23 |
24 | type NodeId = String;
25 |
26 | #[derive(Clone)]
27 | enum IncomingMessage {
28 | Append(Bytes),
29 | Message(NetworkMessage),
30 | }
31 |
32 | #[derive(Clone, Message)]
33 | pub struct NetworkMessage {
34 | #[prost(bytes, required)]
35 | pub from: Bytes,
36 | #[prost(message, required)]
37 | pub message: RaftMessage,
38 | }
39 |
40 | struct Network {
41 | peers_tx: BTreeMap>,
42 | }
43 |
44 | struct Args {
45 | bind_addr: Option,
46 | node_id: NodeId,
47 | peers: BTreeSet,
48 | }
49 |
50 | fn main() {
51 | env_logger::builder().filter_level(log::LevelFilter::Info).parse_default_env().init();
52 |
53 | let Args { bind_addr, node_id, peers } = parse_args();
54 |
55 | let (main_tx, main_rx) = mpsc::channel::();
56 | if let Some(bind_addr) = bind_addr {
57 | start_peer_listener(main_tx.clone(), bind_addr);
58 | }
59 | let network = start_peer_senders(node_id.clone(), peers.clone());
60 |
61 | // Send lines from stdin to the main thread
62 | std::thread::spawn(move || {
63 | let stdin = std::io::stdin();
64 | let mut stdin_lock = stdin.lock();
65 | let mut line = String::new();
66 | while stdin_lock.read_line(&mut line).expect("error reading from stdin") != 0 {
67 | let _ignore = main_tx.send(IncomingMessage::Append(line.clone().into()));
68 | line.clear();
69 | }
70 | });
71 |
72 | let mut raft = RaftNode::new(
73 | node_id.clone(),
74 | peers.clone(),
75 | RaftLogMemory::with_capacity(10240, RAFT_LOG_CAPACITY),
76 | OsRng::default(),
77 | RAFT_CONFIG,
78 | );
79 |
80 | let stdout = std::io::stdout();
81 | let mut stdout_lock = stdout.lock();
82 |
83 | let mut next_tick = Instant::now() + TICK_DURATION;
84 | loop {
85 | match main_rx.recv_timeout(next_tick.saturating_duration_since(Instant::now())) {
86 | Ok(IncomingMessage::Append(data)) => {
87 | // Append log entries from stdin
88 | match raft.append(data) {
89 | Ok(new_messages) =>
90 | new_messages.for_each(|message| network.send(message)),
91 | Err(AppendError::Cancelled { data }) =>
92 | log::info!("append cancelled: {}", String::from_utf8_lossy(&data)),
93 | Err(AppendError::RaftLogErr(err)) =>
94 | log::error!("raft log error: {:?}", err),
95 | }
96 | }
97 | Ok(IncomingMessage::Message(NetworkMessage { from, message })) => {
98 | // Process incoming message
99 | let new_messages = raft.receive(message, String::from_utf8_lossy(&from).to_string());
100 | new_messages.for_each(|message| network.send(message));
101 | }
102 | Err(mpsc::RecvTimeoutError::Timeout) => {
103 | // Tick the timer
104 | let new_messages = raft.timer_tick();
105 | new_messages.for_each(|message| network.send(message));
106 | next_tick = Instant::now() + TICK_DURATION;
107 | }
108 | Err(mpsc::RecvTimeoutError::Disconnected) =>
109 | panic!("child threads died"),
110 | }
111 |
112 | // Check for committed log entries
113 | for log_entry in raft.take_committed() {
114 | if !log_entry.data.is_empty() {
115 | stdout_lock.write(&log_entry.data).expect("error writing to stdout");
116 | }
117 | }
118 | }
119 | }
120 |
121 | fn parse_args() -> Args {
122 | let mut args = std::env::args();
123 | let executable_name = args.next().unwrap_or_default();
124 |
125 | let (bind_addr, node_id) = match (args.next(), args.next()) {
126 | (Some(first_arg), _) if first_arg.starts_with('-') => usage(&executable_name),
127 | (Some(_), None) => usage(&executable_name),
128 | (Some(bind_addr), Some(node_id)) => (Some(bind_addr), node_id),
129 | (None, _) => (None, String::new()),
130 | };
131 |
132 | let peers = args.collect::>();
133 |
134 | Args { bind_addr, node_id, peers }
135 | }
136 |
137 | fn usage(executable_name: &str) -> ! {
138 | eprint!(concat!(
139 | "Usage: {} [-h] [[bind_addr:]port node_host:port] [peer_host:port ...]\n",
140 | "\n",
141 | "[bind_addr:]port - the host:port to listen on\n",
142 | "node_host:port - the public host:port of this node\n",
143 | "peer_host:port - the public host:port of any peers\n",
144 | ), executable_name);
145 | std::process::exit(1)
146 | }
147 |
148 | fn start_peer_listener(main_tx: mpsc::Sender, bind_addr: String) {
149 | let bind_addr = if bind_addr.contains(':') { bind_addr } else { format!("0.0.0.0:{}", bind_addr) };
150 | let listener = TcpListener::bind(&bind_addr).unwrap_or_else(|error| panic!("error listening on {}: {}", bind_addr, error));
151 | std::thread::spawn(move || {
152 | for stream in listener.incoming() {
153 | start_peer_receiver(BufReader::new(stream.expect("error accepting connecting")), main_tx.clone());
154 | }
155 | });
156 | }
157 |
158 | fn start_peer_receiver(mut reader: BufReader, main_tx: mpsc::Sender) {
159 | std::thread::spawn(move || {
160 | let addr = reader.get_mut().peer_addr().unwrap();
161 | log::info!("accepted connection from {}", addr);
162 | while let Ok(message) = read_peer_message(&mut reader).map_err(|error| log::info!("error receiving from {}: {}", addr, error)) {
163 | let _ignore = main_tx.send(IncomingMessage::Message(message));
164 | }
165 | });
166 | }
167 |
168 | fn read_peer_message(reader: &mut BufReader) -> Result> {
169 | let mut len_data = [0; 4];
170 | reader.read_exact(&mut len_data)?;
171 | let mut message_data = vec![0; u32::from_be_bytes(len_data) as usize];
172 | reader.read_exact(&mut message_data)?;
173 | let message = NetworkMessage::decode(&message_data[..]).map_err(|error| format!("invalid message from peer: {}", error))?;
174 | log::debug!("{} -> self: {}", String::from_utf8_lossy(&message.from), &message.message);
175 | Ok(message)
176 | }
177 |
178 | fn start_peer_senders(node_id: NodeId, peers: BTreeSet) -> Network {
179 | let (peers_tx, peers_rx): (BTreeMap<_,_>, Vec<_>) = peers.iter().map(|peer_id| {
180 | let (peer_tx, peer_rx) = mpsc::channel();
181 | ((peer_id.clone(), peer_tx), (peer_id.clone(), peer_rx))
182 | }).unzip();
183 |
184 | for (peer_id, peer_rx) in peers_rx {
185 | start_peer_sender(node_id.clone().into(), peer_id, peer_rx);
186 | }
187 |
188 | Network { peers_tx }
189 | }
190 |
191 | fn start_peer_sender(from: Bytes, address: String, peer_rx: mpsc::Receiver) {
192 | std::thread::spawn(move || {
193 | let mut connection = None;
194 | let mut data = Vec::new();
195 | loop {
196 | let message = match peer_rx.recv_timeout(TICK_DURATION * RAFT_CONFIG.election_timeout_ticks) {
197 | Ok(message) => Some(NetworkMessage { from: from.clone(), message }),
198 | Err(mpsc::RecvTimeoutError::Timeout) => None,
199 | Err(mpsc::RecvTimeoutError::Disconnected) => break,
200 | };
201 |
202 | if connection.is_none() {
203 | match TcpStream::connect(&address) {
204 | Ok(established_connection) => {
205 | log::info!("connected to {}", &address);
206 | let _ignore = established_connection.set_nodelay(true);
207 | connection = Some(established_connection);
208 | }
209 | Err(error) =>
210 | log::info!("error connecting to {}: {}", &address, error),
211 | }
212 | }
213 | if let (Some(established_connection), Some(message)) = (&mut connection, &message) {
214 | data.clear();
215 | data.put_u32(message.encoded_len() as u32);
216 | message.encode(&mut data).unwrap();
217 | if let Err(error) = established_connection.write_all(&data) {
218 | log::info!("error sending to {}: {}", &address, error);
219 | connection = None;
220 | }
221 | }
222 | }
223 | });
224 | }
225 |
226 | impl Network {
227 | fn send(&self, sendable: SendableRaftMessage) {
228 | match sendable.dest {
229 | RaftMessageDestination::Broadcast => {
230 | log::debug!("self -> all: {}", sendable.message);
231 | self.peers_tx.values().for_each(|peer_tx| drop(peer_tx.send(sendable.message.clone())));
232 | }
233 | RaftMessageDestination::To(dst_id) => {
234 | log::debug!("self -> {}: {}", dst_id, sendable.message);
235 | let _ = self.peers_tx[&dst_id].send(sendable.message);
236 | }
237 | }
238 | }
239 | }
240 |
--------------------------------------------------------------------------------
/examples/simple.rs:
--------------------------------------------------------------------------------
1 | //! A simple example from the crate-level documentation
2 |
3 | use rand_chacha::ChaChaRng;
4 | use rand_core::SeedableRng;
5 | use simple_raft::log::mem::RaftLogMemory;
6 | use simple_raft::node::{RaftConfig, RaftNode};
7 | use simple_raft::message::{RaftMessageDestination, SendableRaftMessage};
8 | use std::collections::VecDeque;
9 | use std::str;
10 |
11 | fn main() {
12 | // Construct 5 Raft peers
13 | type NodeId = usize;
14 | let mut peers = (0..5).map(|id: NodeId| RaftNode::new(
15 | id,
16 | (0..5).collect(),
17 | RaftLogMemory::new_unbounded(),
18 | ChaChaRng::seed_from_u64(id as u64),
19 | RaftConfig {
20 | election_timeout_ticks: 10,
21 | heartbeat_interval_ticks: 1,
22 | replication_chunk_size: usize::max_value(),
23 | },
24 | )).collect::>();
25 |
26 | // Simulate reliably sending messages instantaneously between peers
27 | let mut inboxes = vec![VecDeque::new(); peers.len()];
28 | let send_message = |src_id: NodeId, sendable: SendableRaftMessage, inboxes: &mut Vec>| {
29 | match sendable.dest {
30 | RaftMessageDestination::Broadcast => {
31 | println!("peer {} -> all: {}", src_id, &sendable.message);
32 | inboxes.iter_mut().for_each(|inbox| inbox.push_back((src_id, sendable.message.clone())));
33 | }
34 | RaftMessageDestination::To(dst_id) => {
35 | println!("peer {} -> peer {}: {}", src_id, dst_id, &sendable.message);
36 | inboxes[dst_id].push_back((src_id, sendable.message));
37 | }
38 | }
39 | };
40 |
41 | // Loop until a log entry is committed on all peers
42 | let mut appended = false;
43 | let mut peers_committed = vec![false; peers.len()];
44 | while !peers_committed.iter().all(|seen| *seen) {
45 | for (peer_id, peer) in peers.iter_mut().enumerate() {
46 | // Tick the timer
47 | let new_messages = peer.timer_tick();
48 | new_messages.for_each(|message| send_message(peer_id, message, &mut inboxes));
49 |
50 | // Append a log entry on the leader
51 | if !appended && peer.is_leader() {
52 | if let Ok(new_messages) = peer.append("Hello world!") {
53 | println!("peer {} appending to the log", peer_id);
54 | new_messages.for_each(|message| send_message(peer_id, message, &mut inboxes));
55 | appended = true;
56 | }
57 | }
58 |
59 | // Process message inbox
60 | while let Some((src_id, message)) = inboxes[peer_id].pop_front() {
61 | let new_messages = peer.receive(message, src_id);
62 | new_messages.for_each(|message| send_message(peer_id, message, &mut inboxes));
63 | }
64 |
65 | // Check for committed log entries
66 | for log_entry in peer.take_committed() {
67 | if !log_entry.data.is_empty() {
68 | println!("peer {} saw commit {}", peer_id, str::from_utf8(&log_entry.data).unwrap());
69 | assert!(!peers_committed[peer_id]);
70 | peers_committed[peer_id] = true;
71 | }
72 | }
73 | }
74 | }
75 | }
76 |
77 | #[cfg(test)]
78 | mod test {
79 | #[test]
80 | fn main() {
81 | super::main();
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/examples/threaded.rs:
--------------------------------------------------------------------------------
1 | //! A simple example with a thread per RaftNode
2 |
3 | use rand_chacha::ChaChaRng;
4 | use rand_core::SeedableRng;
5 | use simple_raft::log::mem::RaftLogMemory;
6 | use simple_raft::node::{RaftConfig, RaftNode};
7 | use simple_raft::message::{RaftMessage, RaftMessageDestination, SendableRaftMessage};
8 | use std::str;
9 | use std::sync::{Arc, Mutex};
10 | use std::sync::mpsc;
11 | use std::thread;
12 | use std::time::{Duration, Instant};
13 |
14 | type NodeId = usize;
15 |
16 | const TICK_DURATION: Duration = Duration::from_millis(100);
17 | const RAFT_CONFIG: RaftConfig = RaftConfig {
18 | election_timeout_ticks: 10,
19 | heartbeat_interval_ticks: 1,
20 | replication_chunk_size: usize::max_value(),
21 | };
22 |
23 | #[derive(Clone)]
24 | struct IncomingMessage {
25 | from: NodeId,
26 | message: RaftMessage,
27 | }
28 |
29 | #[derive(Clone)]
30 | struct Network {
31 | peers_tx: Vec>,
32 | }
33 |
34 | fn main() {
35 | // Construct 5 Raft peers
36 | let (peers_tx, peers_rx): (Vec<_>, Vec<_>) = (0..5).map(|_| mpsc::channel()).unzip();
37 | let network = Network { peers_tx };
38 | let peers = peers_rx.into_iter().enumerate().map(|(peer_id, rx): (NodeId, _)| (
39 | RaftNode::new(
40 | peer_id,
41 | (0..5).collect(),
42 | RaftLogMemory::new_unbounded(),
43 | ChaChaRng::seed_from_u64(peer_id as u64),
44 | RAFT_CONFIG,
45 | ),
46 | rx,
47 | ));
48 |
49 | let appended = Arc::new(Mutex::new(false));
50 | let mut peers_committed = vec![false; peers.len()];
51 | let (peer_committed_tx, peer_committed_rx) = mpsc::channel();
52 |
53 | for (peer_id, (mut peer, rx)) in peers.enumerate() {
54 | let appended = Arc::clone(&appended);
55 | let network = network.clone();
56 | let peer_committed_tx = peer_committed_tx.clone();
57 | thread::spawn(move || {
58 | // Loop until a log entry is committed
59 | let mut next_tick = Instant::now() + TICK_DURATION;
60 | loop {
61 | match rx.recv_timeout(next_tick.saturating_duration_since(Instant::now())) {
62 | Ok(message) => {
63 | // Process incoming message
64 | let new_messages = peer.receive(message.message, message.from);
65 | new_messages.for_each(|message| network.send(peer_id, message));
66 | }
67 | Err(mpsc::RecvTimeoutError::Timeout) => {
68 | // Tick the timer
69 | let new_messages = peer.timer_tick();
70 | new_messages.for_each(|message| network.send(peer_id, message));
71 | next_tick = Instant::now() + TICK_DURATION;
72 | }
73 | Err(mpsc::RecvTimeoutError::Disconnected) =>
74 | panic!("peer {} disconnected", peer_id),
75 | }
76 |
77 | // Append a log entry on the leader
78 | let mut appended = appended.lock().unwrap();
79 | if !*appended && peer.is_leader() {
80 | if let Ok(new_messages) = peer.append("Hello world!") {
81 | println!("peer {} appending to the log", peer_id);
82 | new_messages.for_each(|message| network.send(peer_id, message));
83 | *appended = true;
84 | }
85 | }
86 | drop(appended);
87 |
88 | // Check for committed log entries
89 | for log_entry in peer.take_committed() {
90 | if !log_entry.data.is_empty() {
91 | println!("peer {} saw commit {}", peer_id, str::from_utf8(&log_entry.data).unwrap());
92 | peer_committed_tx.send(peer_id).unwrap();
93 | }
94 | }
95 | }
96 | });
97 | }
98 | drop((network, peer_committed_tx));
99 |
100 | // Loop until a log entry is committed on all peers
101 | while !peers_committed.iter().all(|seen| *seen) {
102 | let peer_id = peer_committed_rx.recv().unwrap();
103 | assert!(!peers_committed[peer_id]);
104 | peers_committed[peer_id] = true;
105 | }
106 | }
107 |
108 | impl Network {
109 | fn send(&self, from: NodeId, sendable: SendableRaftMessage) {
110 | let message = IncomingMessage { from, message: sendable.message };
111 | match sendable.dest {
112 | RaftMessageDestination::Broadcast => {
113 | println!("peer {} -> all: {}", from, message.message);
114 | self.peers_tx.iter().for_each(|peer_tx| drop(peer_tx.send(message.clone())));
115 | }
116 | RaftMessageDestination::To(dst_id) => {
117 | println!("peer {} -> peer {}: {}", from, dst_id, message.message);
118 | let _ = self.peers_tx[dst_id].send(message);
119 | }
120 | }
121 | }
122 | }
123 |
124 | #[cfg(test)]
125 | mod test {
126 | #[test]
127 | fn main() {
128 | super::main();
129 | }
130 | }
131 |
--------------------------------------------------------------------------------
/src/core.rs:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2019 Open Whisper Systems
3 | * Copyright (C) 2021 jessa0
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with this program. If not, see .
17 | */
18 |
19 | //! Unstable, low-level API for the complete state of a Raft node.
20 |
21 | use alloc::collections::{BTreeMap, BTreeSet};
22 | use bytes::Bytes;
23 | use core::fmt;
24 | use core::iter;
25 | use crate::message::*;
26 | use crate::node::{AppendError, RaftConfig};
27 | use crate::prelude::*;
28 | use crate::log::{CommittedIter, RaftLog, RaftLogState};
29 | use log::{error, warn, info, debug};
30 | use rand_core::RngCore;
31 | use self::LeadershipState::*;
32 |
33 | /// The state of Raft log replication from a Raft node to one of its peers.
34 | pub struct ReplicationState {
35 | // \* The next entry to send to each follower.
36 | // VARIABLE nextIndex
37 | /// The index of the next log entry to be sent to this peer.
38 | pub next_idx: LogIndex,
39 |
40 | // \* The latest entry that each follower has acknowledged is the same as the
41 | // \* leader's. This is used to calculate commitIndex on the leader.
42 | // VARIABLE matchIndex
43 | /// The index of the last log entry on this peer to up which the peer's log is known to match this node's log.
44 | pub match_idx: LogIndex,
45 |
46 | /// The index of the last log entry sent to this peer but which has not yet been acknowledged by the peer.
47 | pub inflight: Option,
48 |
49 | /// Whether this node is currently probing to discover the correct [`match_idx`][Self::match_idx] for this peer.
50 | pub send_probe: bool,
51 |
52 | /// Whether a heartbeat "ping" message is due to be sent to this peer.
53 | send_heartbeat: bool,
54 | }
55 |
56 | // \* Server states.
57 | // CONSTANTS Follower, Candidate, Leader
58 | enum LeadershipState {
59 | Follower(FollowerState),
60 | Candidate(CandidateState),
61 | Leader(LeaderState),
62 | }
63 |
64 | struct FollowerState {
65 | leader: Option,
66 |
67 | election_ticks: u32,
68 | random_election_ticks: u32,
69 | }
70 |
71 | struct CandidateState {
72 | // \* The latest entry that each follower has acknowledged is the same as the
73 | // \* leader's. This is used to calculate commitIndex on the leader.
74 | // VARIABLE votesGranted
75 | votes_granted: BTreeSet,
76 |
77 | election_ticks: u32,
78 | }
79 |
80 | struct LeaderState {
81 | followers: BTreeMap,
82 |
83 | heartbeat_ticks: u32,
84 | }
85 |
86 | /// The complete state of a Raft node.
87 | pub struct RaftState {
88 | node_id: NodeId,
89 | peers: BTreeSet,
90 | random: Random,
91 | config: RaftConfig,
92 |
93 | // \* The server's term number.
94 | // VARIABLE currentTerm
95 | current_term: TermId,
96 |
97 | // \* The candidate the server voted for in its current term, or
98 | // \* Nil if it hasn't voted for any.
99 | // VARIABLE votedFor
100 | voted_for: Option,
101 |
102 | // \* The server's state (Follower, Candidate, or Leader).
103 | // VARIABLE state
104 | leadership: LeadershipState,
105 |
106 | // \* A Sequence of log entries. The index into this sequence is the index of the
107 | // \* log entry. Unfortunately, the Sequence module defines Head(s) as the entry
108 | // \* with index 1, so be careful not to use that!
109 | // VARIABLE log
110 | // \* The index of the latest entry in the log the state machine may apply.
111 | // VARIABLE commitIndex
112 | log: RaftLogState,
113 | }
114 |
115 | #[allow(missing_docs)]
116 | impl RaftState
117 | where Log: RaftLog,
118 | Random: RngCore,
119 | NodeId: Ord + Clone + fmt::Display,
120 | {
121 | pub fn new(node_id: NodeId,
122 | mut peers: BTreeSet,
123 | log: Log,
124 | mut random: Random,
125 | config: RaftConfig)
126 | -> Self {
127 | peers.remove(&node_id);
128 | let random_election_ticks = random_election_timeout(&mut random, config.election_timeout_ticks);
129 | Self {
130 | node_id,
131 | peers,
132 | random,
133 | config,
134 | log: RaftLogState::new(log),
135 | current_term: Default::default(),
136 | voted_for: Default::default(),
137 | leadership: Follower(FollowerState {
138 | leader: None,
139 | election_ticks: random_election_ticks,
140 | random_election_ticks,
141 | }),
142 | }
143 | }
144 |
145 | pub fn commit_idx(&self) -> &LogIndex {
146 | &self.log.commit_idx
147 | }
148 |
149 | pub fn config(&self) -> &RaftConfig {
150 | &self.config
151 | }
152 |
153 | pub fn is_leader(&self) -> bool {
154 | if let Leader(_) = &self.leadership {
155 | true
156 | } else {
157 | false
158 | }
159 | }
160 |
161 | pub fn leader(&self) -> (Option<&NodeId>, &TermId) {
162 | let leader = match &self.leadership {
163 | Follower(follower_state) => follower_state.leader.as_ref(),
164 | Candidate(_) => None,
165 | Leader(_) => Some(&self.node_id),
166 | };
167 | (leader, &self.current_term)
168 | }
169 |
170 | pub fn log(&self) -> &Log {
171 | self.log.log()
172 | }
173 |
174 | pub fn log_mut(&mut self) -> &mut Log {
175 | self.log.log_mut()
176 | }
177 |
178 | pub fn node_id(&self) -> &NodeId {
179 | &self.node_id
180 | }
181 |
182 | pub fn peers(&self) -> &BTreeSet {
183 | &self.peers
184 | }
185 |
186 | pub fn replication_state(&self, peer_node_id: &NodeId) -> Option<&ReplicationState> {
187 | if let LeadershipState::Leader(leader_state) = &self.leadership {
188 | leader_state.followers.get(peer_node_id)
189 | } else {
190 | None
191 | }
192 | }
193 |
194 | pub fn set_config(&mut self, config: RaftConfig) {
195 | self.config = config;
196 |
197 | match &mut self.leadership {
198 | Follower(FollowerState { election_ticks, random_election_ticks, .. }) => {
199 | if *random_election_ticks > self.config.election_timeout_ticks.saturating_mul(2) {
200 | *random_election_ticks = random_election_timeout(&mut self.random, self.config.election_timeout_ticks);
201 | }
202 | if election_ticks > random_election_ticks {
203 | *election_ticks = *random_election_ticks;
204 | }
205 | }
206 | Candidate(CandidateState { election_ticks, .. }) => {
207 | if *election_ticks > self.config.election_timeout_ticks.saturating_mul(2) {
208 | *election_ticks = random_election_timeout(&mut self.random, self.config.election_timeout_ticks);
209 | }
210 | }
211 | Leader(LeaderState { heartbeat_ticks, .. }) => {
212 | if *heartbeat_ticks > self.config.heartbeat_interval_ticks {
213 | *heartbeat_ticks = self.config.heartbeat_interval_ticks;
214 | }
215 | }
216 | }
217 | }
218 |
219 | pub fn take_committed(&mut self) -> CommittedIter<'_, Log> {
220 | self.log.take_committed()
221 | }
222 |
223 | pub fn timer_tick(&mut self) -> Option> {
224 | match &mut self.leadership {
225 | Follower(FollowerState { election_ticks, .. }) |
226 | Candidate(CandidateState { election_ticks, .. }) => {
227 | match election_ticks.saturating_sub(1) {
228 | 0 => {
229 | info!("election timeout at {}", &self.current_term);
230 | self.timeout()
231 | }
232 | new_election_ticks => {
233 | *election_ticks = new_election_ticks;
234 | None
235 | }
236 | }
237 | }
238 | Leader(leader_state) => {
239 | match leader_state.heartbeat_ticks.saturating_sub(1) {
240 | 0 => {
241 | leader_state.heartbeat_ticks = self.config.heartbeat_interval_ticks;
242 | debug!("sending heartbeat");
243 | for replication in leader_state.followers.values_mut() {
244 | replication.send_heartbeat = true;
245 | }
246 | }
247 | new_heartbeat_ticks => {
248 | leader_state.heartbeat_ticks = new_heartbeat_ticks;
249 | }
250 | }
251 | None
252 | }
253 | }
254 | }
255 |
256 | pub fn reset_peer(&mut self, peer_node_id: NodeId) -> Option> {
257 | match &mut self.leadership {
258 | Follower(_) => {
259 | None
260 | }
261 | Candidate(_) => {
262 | if self.peers.contains(&peer_node_id) {
263 | let vote_request = self.request_vote();
264 | let from = peer_node_id;
265 | vote_request.map(|message| SendableRaftMessage { message, dest: RaftMessageDestination::To(from) })
266 | } else {
267 | None
268 | }
269 | }
270 | Leader(leader_state) => {
271 | if let Some(replication) = leader_state.followers.get_mut(&peer_node_id) {
272 | info!("resetting follower state {}", &peer_node_id);
273 | replication.next_idx = self.log.last_index() + 1;
274 | replication.send_probe = true;
275 | replication.send_heartbeat = true;
276 | replication.inflight = None;
277 | }
278 | None
279 | }
280 | }
281 | }
282 |
283 | //
284 | // -- raft TLA+ parallel code --
285 | // the code below is so similar to Raft's TLA+ code that the TLA+ is provided
286 | // in the right-hand column for sections which correspond almost exactly. code
287 | // is provided in the same order as the TLA+ so that the reader can follow.
288 | //
289 |
290 | //
291 | // \* Define state transitions
292 | //
293 |
294 | // \* Server i times out and starts a new election.
295 | pub fn timeout(&mut self) -> Option> { // Timeout(i) ==
296 | match &self.leadership {
297 | Follower(_) | Candidate(_) => { // /\ state[i] \in {Follower, Candidate}
298 | self.current_term += 1; // /\ currentTerm' = [currentTerm EXCEPT ![i] = currentTerm[i] + 1]
299 | // \* Most implementations would probably just set the local vote
300 | // \* atomically, but messaging localhost for it is weaker.
301 | self.voted_for = Some(self.node_id.clone()); // /\ votedFor' = [votedFor EXCEPT ![i] = Nil]
302 | let votes_granted = iter::once(self.node_id.clone()).collect(); // /\ votesGranted' = [votesGranted EXCEPT ![i] = {}]
303 | self.leadership = Candidate(CandidateState { // /\ state' = [state EXCEPT ![i] = Candidate]
304 | votes_granted,
305 | election_ticks: self.random_election_timeout(),
306 | });
307 |
308 | info!("became candidate at {}", self.current_term);
309 | self.become_leader();
310 | self.advance_commit_idx();
311 | self.request_vote().map(|message| SendableRaftMessage {
312 | message,
313 | dest: RaftMessageDestination::Broadcast,
314 | })
315 | }
316 | Leader(_) => {
317 | None
318 | }
319 | }
320 | }
321 |
322 | // \* Candidate i sends j a RequestVote request.
323 | fn request_vote(&self) -> Option { // RequestVote(i,j) ==
324 | match self.leadership {
325 | Candidate { .. } => { // /\ state[i] = Candidate
326 | let vote_request_msg = RaftMessage { // /\ Send([
327 | term: self.current_term, // mterm |-> currentTerm[i],
328 | rpc: Some(Rpc::VoteRequest(VoteRequest { // mtype |-> RequestVoteRequest,
329 | last_log_term: self.log.last_term(), // mlastLogTerm |-> LastTerm(log[i]),
330 | last_log_idx: self.log.last_index(), // mlastLogIndex |-> Len(log[i]),
331 | })),
332 | };
333 | Some(vote_request_msg)
334 | }
335 | _ => None,
336 | }
337 | }
338 |
339 | // \* Leader i sends j an AppendEntries request containing up to 1 entry.
340 | // \* While implementations may want to send more than 1 at a time, this spec uses
341 | // \* just 1 because it minimizes atomic regions without loss of generality.
342 | pub fn append_entries(&mut self,
343 | to_node_id: NodeId)
344 | -> Option> { // AppendEntries(i, j) ==
345 | if let Leader(leader_state) = &mut self.leadership { // /\ state[i] = Leader
346 | let replication =
347 | match leader_state.followers.get_mut(&to_node_id) { // /\ i /= j
348 | Some(replication) => replication,
349 | None => return None,
350 | };
351 | let last_log_idx = self.log.last_index();
352 | let next_idx = replication.next_idx;
353 | let send_entries = (last_log_idx >= next_idx &&
354 | !replication.send_probe);
355 | if !send_entries && !replication.send_heartbeat {
356 | return None;
357 | }
358 | if replication.inflight.is_some() {
359 | return None;
360 | }
361 | let prev_log_idx = next_idx - 1; // /\ LET prevLogIndex == nextIndex[i][j] - 1
362 | let maybe_prev_log_term = if prev_log_idx != Default::default() { // prevLogTerm == IF prevLogIndex > 0 THEN
363 | self.log.get_term(prev_log_idx) // log[i][prevLogIndex].term
364 | } else { // ELSE
365 | Some(Default::default()) // 0
366 | };
367 |
368 | let prev_log_term = match maybe_prev_log_term {
369 | Some(prev_log_term) => prev_log_term,
370 | None => {
371 | error!("missing log {} to send to {}!",
372 | &prev_log_idx, &to_node_id);
373 | return None;
374 | }
375 | };
376 |
377 | let mut entries: Vec = Vec::new();
378 | let last_entry: LogIndex;
379 | if send_entries { // \* Send up to 1 entry, constrained by the end of the log.
380 | let mut entries_size = 0usize;
381 | let max_entries_size = self.config.replication_chunk_size;
382 | let entry_log_idxs = (0..).map(|idx| next_idx + idx)
383 | .take_while(|log_idx| *log_idx <= last_log_idx);
384 | for entry_log_idx in entry_log_idxs { // entries == SubSeq(log[i], nextIndex[i][j], lastEntry)
385 | let append_log_entry = if let Some(log_entry) = self.log.get(entry_log_idx) {
386 | let first_entry = entries_size == 0;
387 | if !first_entry && entries_size == max_entries_size {
388 | None
389 | } else {
390 | entries_size = entries_size.saturating_add(self.log.entry_len(&log_entry));
391 | if first_entry || entries_size <= max_entries_size {
392 | Some(log_entry)
393 | } else {
394 | None
395 | }
396 | }
397 | } else {
398 | error!("error fetching raft log {} to send to {}!",
399 | &entry_log_idx, &to_node_id);
400 | None
401 | };
402 | if let Some(log_entry) = append_log_entry {
403 | entries.push(log_entry);
404 | } else {
405 | break;
406 | }
407 | }
408 | last_entry = prev_log_idx + (entries.len() as u64); // lastEntry == Min({Len(log[i]), nextIndex[i][j]})
409 | } else {
410 | last_entry = prev_log_idx;
411 | }
412 | let append_request_msg = RaftMessage { // IN Send([
413 | term: self.current_term, // mterm |-> currentTerm[i],
414 | rpc: Some(Rpc::AppendRequest(AppendRequest { // mtype |-> AppendEntriesRequest,
415 | prev_log_idx, // mprevLogIndex |-> prevLogIndex,
416 | prev_log_term, // mprevLogTerm |-> prevLogTerm,
417 | entries, // mentries |-> entries,
418 | leader_commit: self.log.commit_idx.min(last_entry), // mcommitIndex |-> Min({commitIndex[i], lastEntry}),
419 | })),
420 | };
421 | replication.send_heartbeat = false;
422 | replication.inflight = Some(last_entry);
423 | Some(SendableRaftMessage {
424 | message: append_request_msg,
425 | dest: RaftMessageDestination::To(to_node_id),
426 | })
427 | } else {
428 | None
429 | }
430 | }
431 |
432 | // \* Candidate i transitions to leader.
433 | fn become_leader(&mut self) { // BecomeLeader(i) ==
434 | if let Candidate(candidate_state) = &self.leadership { // /\ state[i] = Candidate
435 | if candidate_state.votes_granted.len() >= self.quorum_size() { // /\ votesGranted[i] \in Quorum
436 | info!("became leader at {}", &self.current_term);
437 | self.leadership = Leader(LeaderState { // /\ state' = [state EXCEPT ![i] = Leader]
438 | followers: (self.peers.iter().cloned())
439 | .map(|id| (id, ReplicationState {
440 | next_idx: self.log.last_index() + 1, // /\ nextIndex' = [nextIndex EXCEPT ![i] = [j \in Server |-> Len(log[i]) + 1]]
441 | match_idx: Default::default(), // /\ matchIndex' = [matchIndex EXCEPT ![i] = [j \in Server |-> 0]]
442 | inflight: Default::default(),
443 | send_probe: Default::default(),
444 | send_heartbeat: Default::default(),
445 | })).collect(),
446 | heartbeat_ticks: 0,
447 | });
448 | // append a noop in the new term to commit entries from past terms (Raft Section 5.4.2)
449 | let _ignore = self.client_request(Default::default());
450 | }
451 | }
452 | }
453 |
454 | // \* Leader i receives a client request to add v to the log.
455 | pub fn client_request(
456 | &mut self,
457 | data: Bytes,
458 | ) -> Result<(), AppendError> { // ClientRequest(i, v) ==
459 | let entry = LogEntry {
460 | term: self.current_term, // /\ LET entry == [term |-> currentTerm[i],
461 | data, // value |-> v]
462 | };
463 | if let Leader(_) = &self.leadership { // /\ state[i] = Leader
464 | self.log.append(entry).map_err(AppendError::RaftLogErr)?; // newLog == Append(log[i], entry)
465 | self.advance_commit_idx();
466 | Ok(()) // IN log' = [log EXCEPT ![i] = newLog]
467 | } else {
468 | Err(AppendError::Cancelled { data: entry.data })
469 | }
470 | }
471 |
472 | // \* Leader i advances its commitIndex.
473 | // \* This is done as a separate step from handling AppendEntries responses,
474 | // \* in part to minimize atomic regions, and in part so that leaders of
475 | // \* single-server clusters are able to mark entries committed.
476 | fn advance_commit_idx(&mut self) { // AdvanceCommitIndex(i) ==
477 | if let Leader(leader_state) = &self.leadership { // /\ state[i] = Leader
478 | let mut match_idxs: Vec<_> = // /\ LET \* The set of servers that agree up through index.
479 | (leader_state.followers.values())
480 | .map(|follower| follower.match_idx)
481 | .chain(iter::once(self.log.last_index()))
482 | .collect();
483 | match_idxs.sort_unstable(); // Agree(index) == {i} \cup {k \in Server : matchIndex[i][k] >= index}
484 | let agree_idxs = (match_idxs.into_iter()) // \* The maximum indexes for which a quorum agrees
485 | .rev().skip(self.quorum_size() - 1); // agreeIndexes == {index \in 1..Len(log[i]) : Agree(index) \in Quorum}
486 | let commit_idx = match agree_idxs.max() { // \* New value for commitIndex'[i]
487 | Some(agree_idx) => { // newCommitIndex == IF /\ agreeIndexes /= {}
488 | if self.log.get_term(agree_idx) == Some(self.current_term) {// /\ log[i][Max(agreeIndexes)].term = currentTerm[i]
489 | self.log.commit_idx.max(agree_idx) // THEN Max(agreeIndexes)
490 | } else {
491 | self.log.commit_idx // ELSE commitIndex[i]
492 | }
493 | }
494 | None => self.log.commit_idx,
495 | };
496 | if commit_idx != self.log.commit_idx {
497 | debug!("committed transactions from {} to {}",
498 | &self.log.commit_idx, &commit_idx);
499 | }
500 | self.log.commit_idx = commit_idx; // IN commitIndex' = [commitIndex EXCEPT ![i] = newCommitIndex]
501 | }
502 | }
503 |
504 | //
505 | // \* Message handlers
506 | // \* i = recipient, j = sender, m = message
507 | //
508 |
509 | // \* Server i receives a RequestVote request from server j with
510 | // \* m.mterm <= currentTerm[i].
511 | fn handle_vote_request(&mut self,
512 | msg_term: TermId,
513 | msg: VoteRequest,
514 | from: NodeId)
515 | -> Option> { // HandleRequestVoteRequest(i, j, m) ==
516 | let last_log_idx = self.log.last_index();
517 | let last_log_term = self.log.last_term();
518 | let log_ok = // LET logOk ==
519 | (msg.last_log_term > last_log_term) || // \/ m.mlastLogTerm > LastTerm(log[i])
520 | (msg.last_log_term == last_log_term && // \/ /\ m.mlastLogTerm = LastTerm(log[i])
521 | msg.last_log_idx >= last_log_idx); // /\ m.mlastLogIndex >= Len(log[i])
522 | let grant = // LET grant ==
523 | msg_term == self.current_term && // /\ m.mterm = currentTerm[i]
524 | log_ok && // /\ logOk
525 | self.voted_for.as_ref().map(|vote| &from == vote).unwrap_or(true); // /\ votedFor[i] \in {Nil, j}
526 | assert!(msg_term <= self.current_term); // IN /\ m.mterm <= currentTerm[i]
527 | if grant {
528 | self.voted_for = Some(from.clone()); // /\ \/ grant /\ votedFor' = [votedFor EXCEPT ![i] = j]
529 | } // \/ ~grant /\ UNCHANGED votedFor
530 |
531 | if grant {
532 | info!("granted vote at {} with {} at {} for node {} with {} at {}",
533 | &self.current_term, &last_log_idx, &last_log_term,
534 | &from, &msg.last_log_idx, &msg.last_log_term);
535 | match &mut self.leadership {
536 | Follower(FollowerState { election_ticks, random_election_ticks, .. }) =>
537 | *election_ticks = *random_election_ticks,
538 | Candidate(_) | Leader(_) => (),
539 | }
540 | } else if msg_term != self.current_term {
541 | info!("ignored message with {} < current {}: {}",
542 | &msg_term, &self.current_term, &msg);
543 | } else if let Some(vote) = &self.voted_for {
544 | info!("rejected vote at {} for node {} as already voted for {}",
545 | &self.current_term, &from, vote);
546 | } else {
547 | info!("rejected vote at {} with {} at {} for node {} with {} at {}",
548 | &self.current_term, &last_log_idx, &last_log_term,
549 | &from, &msg.last_log_idx, &msg.last_log_term);
550 | }
551 |
552 | let message = RaftMessage { // /\ Reply([
553 | term: self.current_term, // mterm |-> currentTerm[i],
554 | rpc: Some(Rpc::VoteResponse(VoteResponse { // mtype |-> RequestVoteResponse,
555 | vote_granted: grant, // mvoteGranted |-> grant,
556 | })),
557 | };
558 | Some(SendableRaftMessage { message, dest: RaftMessageDestination::To(from) })
559 | }
560 |
561 | // \* Server i receives a RequestVote response from server j with
562 | // \* m.mterm = currentTerm[i].
563 | fn handle_vote_response(&mut self,
564 | msg_term: TermId,
565 | msg: VoteResponse,
566 | from: NodeId)
567 | -> Option> { // HandleRequestVoteResponse(i, j, m) ==
568 | assert!(msg_term == self.current_term); // /\ m.mterm = currentTerm[i]
569 | if let Candidate(candidate_state) = &mut self.leadership {
570 | if msg.vote_granted { // /\ \/ /\ m.mvoteGranted
571 | info!("received vote granted from {} at {}",
572 | &from, &self.current_term);
573 | candidate_state.votes_granted.insert(from); // /\ votesGranted' = [votesGranted EXCEPT ![i] = votesGranted[i] \cup {j}]
574 | } else { // \/ /\ ~m.mvoteGranted /\ UNCHANGED <>
575 | info!("received vote rejected from {} at {}",
576 | &from, &self.current_term);
577 | }
578 | }
579 | None
580 | }
581 |
582 | // \* Server i receives an AppendEntries request from server j with
583 | // \* m.mterm <= currentTerm[i]. This just handles m.entries of length 0 or 1, but
584 | // \* implementations could safely accept more by treating them the same as
585 | // \* multiple independent requests of 1 entry.
586 | fn handle_append_request(&mut self,
587 | msg_term: TermId,
588 | msg: AppendRequest,
589 | from: NodeId)
590 | -> Option> { // HandleAppendEntriesRequest(i, j, m) ==
591 | let prev_log_idx = msg.prev_log_idx;
592 | let msg_prev_log_term = msg.prev_log_term;
593 | let our_prev_log_term = self.log.get_term(prev_log_idx);
594 | let log_ok =
595 | prev_log_idx == Default::default() || // LET logOk == \/ m.mprevLogIndex = 0
596 | Some(msg_prev_log_term) == our_prev_log_term; // \/ /\ m.mprevLogIndex > 0 /\ m.mprevLogIndex <= Len(log[i]) /\ m.mprevLogTerm = log[i][m.mprevLogIndex].term
597 | assert!(msg_term <= self.current_term); // IN /\ m.mterm <= currentTerm[i]
598 | // /\ \/ \* return to follower state
599 | if msg_term == self.current_term { // /\ m.mterm = currentTerm[i]
600 | match &mut self.leadership {
601 | Candidate(_) => { // /\ state[i] = Candidate
602 | let random_election_ticks = self.random_election_timeout();
603 | self.leadership = Follower(FollowerState { // /\ state' = [state EXCEPT ![i] = Follower]
604 | leader: Some(from.clone()),
605 | election_ticks: random_election_ticks,
606 | random_election_ticks,
607 | });
608 | info!("became follower at {} of {}", &self.current_term, &from);
609 | }
610 | Follower(follower_state) => {
611 | if follower_state.leader.is_none() {
612 | info!("became follower at {} of {}", &self.current_term, &from);
613 | }
614 | follower_state.leader = Some(from.clone());
615 | follower_state.election_ticks = follower_state.random_election_ticks;
616 | }
617 | Leader { .. } => {
618 | panic!("received append request as leader at {} from {}",
619 | &self.current_term, &from);
620 | }
621 | }
622 | }
623 | // \/ /\ \* reject request
624 | if (msg_term < self.current_term || // \/ m.mterm < currentTerm[i]
625 | (assert_true!(msg_term == self.current_term) && // \/ /\ m.mterm = currentTerm[i]
626 | assert_match!(Follower(_) = &self.leadership) && // /\ state[i] = Follower
627 | !log_ok)) // /\ \lnot logOk
628 | {
629 | if msg_term < self.current_term {
630 | info!("ignored message with {} < current {}: {}",
631 | &msg_term, &self.current_term, &msg);
632 | } else if let Some(our_prev_log_term) = our_prev_log_term {
633 | warn!("rejected append from {} with {} at {}, we have {}",
634 | &from, &prev_log_idx, msg_prev_log_term, &our_prev_log_term);
635 | } else {
636 | info!("rejected append from {} with {}, we are behind at {}",
637 | &from, &prev_log_idx, self.log.last_index());
638 | }
639 |
640 | let message = RaftMessage { // /\ Reply([
641 | term: self.current_term, // mterm |-> currentTerm[i],
642 | rpc: Some(Rpc::AppendResponse(AppendResponse { // mtype |-> AppendEntriesResponse,
643 | success: false, // msuccess |-> FALSE,
644 | match_idx: self.log.prev_index(), // mmatchIndex |-> 0,
645 | last_log_idx: self.log.last_index(),
646 | })),
647 | };
648 | Some(SendableRaftMessage { message, dest: RaftMessageDestination::To(from) })
649 | } else { // \/ \* accept request
650 | assert!(msg_term == self.current_term); // /\ m.mterm = currentTerm[i]
651 | assert_match!(Follower(_) = &self.leadership); // /\ state[i] = Follower
652 | assert!(log_ok); // /\ logOk
653 | // ... and the TLA+ that follows doesn't correspond to procedural code well
654 | // find point of log conflict
655 | let msg_last_log_idx = prev_log_idx + (msg.entries.len() as u64);
656 | let msg_entries_iter = (1..).map(|idx| prev_log_idx + idx).zip(msg.entries);
657 | let mut last_processed_idx = prev_log_idx;
658 | for (msg_entry_log_idx, msg_entry) in msg_entries_iter {
659 | if msg_entry_log_idx == self.log.last_index() + 1 {
660 | match self.log.append(msg_entry) {
661 | Ok(()) => (),
662 | Err(_) => break,
663 | }
664 | } else if let Some(our_entry_log_term) = self.log.get_term(msg_entry_log_idx) {
665 | if our_entry_log_term != msg_entry.term {
666 | assert!(msg_entry_log_idx > self.log.commit_idx);
667 | match self.log.cancel_from(msg_entry_log_idx) {
668 | Ok(cancelled_len) =>
669 | info!("cancelled {} transactions from {}", cancelled_len, &msg_entry_log_idx),
670 | Err(_) =>
671 | break,
672 | }
673 | match self.log.append(msg_entry) {
674 | Ok(()) => (),
675 | Err(_) => break,
676 | }
677 | }
678 | } else {
679 | error!("failed to fetch log index {} to find conflicts for append!", &msg_entry_log_idx);
680 | break;
681 | }
682 | last_processed_idx = msg_entry_log_idx;
683 | }
684 |
685 | // update commit index from leader
686 | let leader_commit = msg.leader_commit.min(last_processed_idx);
687 | if leader_commit > self.log.commit_idx {
688 | debug!("committed transactions from {} to {}", &self.log.commit_idx, &leader_commit);
689 |
690 | self.log.commit_idx = leader_commit; // /\ commitIndex' = [commitIndex EXCEPT ![i] = m.mcommitIndex]
691 | }
692 |
693 | let message = RaftMessage { // /\ Reply([
694 | term: self.current_term, // mterm |-> currentTerm[i],
695 | rpc: Some(Rpc::AppendResponse(AppendResponse { // mtype |-> AppendEntriesResponse,
696 | success: true, // msuccess |-> TRUE,
697 | match_idx: msg_last_log_idx.min(self.log.last_index()), // mmatchIndex |-> m.mprevLogIndex + Len(m.mentries),
698 | last_log_idx: self.log.last_index(),
699 | })),
700 | };
701 | Some(SendableRaftMessage { message, dest: RaftMessageDestination::To(from) })
702 | }
703 | }
704 |
705 | // \* Server i receives an AppendEntries response from server j with
706 | // \* m.mterm = currentTerm[i].
707 | fn handle_append_response(&mut self,
708 | msg_term: TermId,
709 | msg: AppendResponse,
710 | from: NodeId)
711 | -> Option> { // HandleAppendEntriesResponse(i, j, m) ==
712 | assert!(msg_term == self.current_term); // /\ m.mterm = currentTerm[i]
713 | if let Leader(leader_state) = &mut self.leadership {
714 | if let Some(replication) = leader_state.followers.get_mut(&from) {
715 | if msg.success { // /\ \/ /\ m.msuccess \* successful
716 | if Some(msg.match_idx) >= replication.inflight {
717 | replication.inflight = None;
718 | }
719 | if msg.match_idx + 1 > replication.next_idx {
720 | replication.next_idx = msg.match_idx + 1; // /\ nextIndex' = [nextIndex EXCEPT ![i][j] = m.mmatchIndex + 1]
721 | }
722 | if msg.match_idx > replication.match_idx {
723 | replication.match_idx = msg.match_idx; // /\ matchIndex' = [matchIndex EXCEPT ![i][j] = m.mmatchIndex]
724 | }
725 | replication.send_probe = false;
726 | } else { // \/ /\ \lnot m.msuccess \* not successful
727 | if !replication.send_probe {
728 | info!("received append rejection at {} from {} having {}",
729 | &replication.next_idx, &from, &msg.last_log_idx);
730 | } else {
731 | verbose!("received append rejection at {} from {} having {}",
732 | &replication.next_idx, &from, &msg.last_log_idx);
733 | }
734 | replication.next_idx = ((replication.next_idx - 1) // /\ nextIndex' = [nextIndex EXCEPT ![i][j] = Max({nextIndex[i][j] - 1, 1})]
735 | .min(msg.last_log_idx + 1)
736 | .max(msg.match_idx + 1));
737 | replication.send_probe = true;
738 | replication.inflight = None;
739 |
740 | let mut chunk_size_remaining = self.config.replication_chunk_size;
741 | while let Some(next_idx) = replication.next_idx.checked_sub(1) {
742 | if next_idx <= msg.match_idx {
743 | break;
744 | }
745 | let entry_len = match self.log.get_len(replication.next_idx) {
746 | Some(entry_len) => entry_len,
747 | None => break,
748 | };
749 | chunk_size_remaining = match chunk_size_remaining.checked_sub(entry_len) {
750 | Some(new_chunk_size_remaining) => new_chunk_size_remaining,
751 | None => break,
752 | };
753 | replication.next_idx = next_idx;
754 | }
755 | }
756 | }
757 | }
758 | None
759 | }
760 |
761 | // \* Any RPC with a newer term causes the recipient to advance its term first.
762 | fn update_term(&mut self,
763 | from: &NodeId,
764 | msg: &RaftMessage) { // UpdateTerm(i, j, m) ==
765 | if msg.term > self.current_term { // /\ m.mterm > currentTerm[i]
766 | info!("became follower at {} (from {}) due to message from {}: {}",
767 | &msg.term, &self.current_term, from, &msg);
768 | let random_election_ticks = self.random_election_timeout();
769 |
770 | let election_ticks = match &self.leadership {
771 | Follower(FollowerState { election_ticks, .. }) |
772 | Candidate(CandidateState { election_ticks, .. }) =>
773 | *election_ticks,
774 | Leader(_) =>
775 | random_election_ticks,
776 | };
777 | self.current_term = msg.term; // /\ currentTerm' = [currentTerm EXCEPT ![i] = m.mterm]
778 | self.leadership = Follower(FollowerState { // /\ state' = [state EXCEPT ![i] = Follower]
779 | leader: None,
780 | election_ticks,
781 | random_election_ticks,
782 | });
783 | self.voted_for = Default::default(); // /\ votedFor' = [votedFor EXCEPT ![i] = Nil]
784 | }
785 | }
786 |
787 | // \* Responses with stale terms are ignored.
788 | fn drop_stale_response(&self,
789 | msg_term: TermId,
790 | msg: T)
791 | -> Result<(), T>
792 | where T: fmt::Display
793 | { // DropStaleResponse(i, j, m) ==
794 | if msg_term < self.current_term { // /\ m.mterm < currentTerm[i]
795 | info!("ignored message with {} < current {}: {}",
796 | &msg_term, &self.current_term, &msg);
797 | drop(msg); // /\ Discard(m)
798 | Ok(())
799 | } else {
800 | Err(msg)
801 | }
802 | }
803 |
804 | // /* Receive a message.
805 | pub fn receive(&mut self,
806 | msg: RaftMessage,
807 | from: NodeId)
808 | -> Option> { // Receive(m) ==
809 | if !self.peers.contains(&from) {
810 | error!("received raft message from {} for wrong group", &from);
811 | return None;
812 | }
813 | // IN \* Any RPC with a newer term causes the recipient to advance
814 | // \* its term first. Responses with stale terms are ignored.
815 | self.update_term(&from, &msg); // \/ UpdateTerm(i, j, m)
816 | let reply = match msg.rpc {
817 | Some(Rpc::VoteRequest(request)) => // \/ /\ m.mtype = RequestVoteRequest
818 | self.handle_vote_request(msg.term, request, from), // /\ HandleRequestVoteRequest(i, j, m)
819 | Some(Rpc::VoteResponse(response)) => { // \/ /\ m.mtype = RequestVoteResponse
820 | match self.drop_stale_response(msg.term, response) { // /\ \/ DropStaleResponse(i, j, m)
821 | Ok(()) => None,
822 | Err(response) =>
823 | self.handle_vote_response(msg.term, response, from), // \/ HandleRequestVoteResponse(i, j, m)
824 | }
825 | }
826 | Some(Rpc::AppendRequest(request)) => // \/ /\ m.mtype = AppendEntriesRequest
827 | self.handle_append_request(msg.term, request, from), // /\ HandleAppendEntriesRequest(i, j, m)
828 | Some(Rpc::AppendResponse(response)) => { // \/ /\ m.mtype = AppendEntriesResponse
829 | match self.drop_stale_response(msg.term, response) { // /\ \/ DropStaleResponse(i, j, m)
830 | Ok(()) => None,
831 | Err(response) =>
832 | self.handle_append_response(msg.term, response, from), // \/ HandleAppendEntriesResponse(i, j, m)
833 | }
834 | }
835 | None => None,
836 | };
837 | self.become_leader();
838 | self.advance_commit_idx();
839 | reply
840 | }
841 |
842 | //
843 | // helpers
844 | //
845 |
846 | fn quorum_size(&self) -> usize {
847 | quorum_size(self.peers.len())
848 | }
849 |
850 | fn random_election_timeout(&mut self) -> u32 {
851 | random_election_timeout(&mut self.random, self.config.election_timeout_ticks)
852 | }
853 | }
854 |
855 | /// Computes the minimum size of a quorum of nodes in a Raft group.
856 | ///
857 | /// Returns the minimum number of nodes out of a Raft group with total `peer_count` nodes necessary to constitute a
858 | /// quorum. A quorum of reachable nodes is needed to elect a leader and append to the distributed log.
859 | pub fn quorum_size(peer_count: usize) -> usize {
860 | (peer_count.saturating_add(1)) / 2 + 1
861 | }
862 |
863 | fn random_election_timeout(random: &mut impl RngCore, election_timeout_ticks: u32) -> u32 {
864 | let random = random.next_u32().checked_rem(election_timeout_ticks).unwrap_or(0);
865 | election_timeout_ticks.saturating_add(random)
866 | }
867 |
--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2021 jessa0
3 | *
4 | * This program is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Affero General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * This program is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Affero General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU Affero General Public License
15 | * along with this program. If not, see .
16 | */
17 |
18 | //! Raft consensus algorithm implementation.
19 | //!
20 | //! Raft is a consensus algorithm which replicates a strongly-consistent distributed log of entries with arbitrary data
21 | //! amongst a group of peers. It is also fault-tolerant, allowing replication to continue while a majority of peers can
22 | //! still communicate with each other. This crate provides an implementation of the Raft consensus algorithm with some
23 | //! optional features not implemented, such as pre-voting, membership changes, and snapshots.
24 | //!
25 | //! The Raft algorithm is implemented as a state machine driven in a few ways:
26 | //!
27 | //! * When attempting to append a new entry to the distributed log: [`append`](node::RaftNode::append) is called.
28 | //! * When a message is received from a peer: [`receive`](node::RaftNode::receive) is called.
29 | //! * Every time a fixed amount of time has elapsed: [`timer_tick`](node::RaftNode::timer_tick) is called.
30 | //!
31 | //! Each of these functions modifies the internal state and returns [messages](message::SendableRaftMessage) to be sent
32 | //! to peers. Once a log entry is "committed", or guaranteed to be returned at the same index on every functioning peer
33 | //! in the group, it may be retrieved using [`take_committed`](node::RaftNode::take_committed). An append to the log may
34 | //! be cancelled before reaching the committed state, however, which is discussed in more detail in ["Appending entries to the distributed log"].
35 | //!
36 | //! The backing storage for the distributed log must be provided as an implementation of the [`RaftLog`](log::RaftLog)
37 | //! trait, with careful attention to following the trait specification. A trivial in-memory implementation is provided
38 | //! by [`RaftLogMemory`](log::mem::RaftLogMemory).
39 | //!
40 | //! # Example
41 | //!
42 | //! ```
43 | //! use simple_raft::log::mem::RaftLogMemory;
44 | //! use simple_raft::node::{RaftConfig, RaftNode};
45 | //! use simple_raft::message::{RaftMessageDestination, SendableRaftMessage};
46 | //! use rand_chacha::ChaChaRng;
47 | //! use rand_core::SeedableRng;
48 | //! use std::collections::VecDeque;
49 | //! use std::str;
50 | //!
51 | //! // Construct 5 Raft peers
52 | //! type NodeId = usize;
53 | //! let mut peers = (0..5).map(|id: NodeId| RaftNode::new(
54 | //! id,
55 | //! (0..5).collect(),
56 | //! RaftLogMemory::new_unbounded(),
57 | //! ChaChaRng::seed_from_u64(id as u64),
58 | //! RaftConfig {
59 | //! election_timeout_ticks: 10,
60 | //! heartbeat_interval_ticks: 1,
61 | //! replication_chunk_size: usize::max_value(),
62 | //! },
63 | //! )).collect::>();
64 | //!
65 | //! // Simulate reliably sending messages instantaneously between peers
66 | //! let mut inboxes = vec![VecDeque::new(); peers.len()];
67 | //! let send_message = |src_id: NodeId, sendable: SendableRaftMessage, inboxes: &mut Vec>| {
68 | //! match sendable.dest {
69 | //! RaftMessageDestination::Broadcast => {
70 | //! println!("peer {} -> all: {}", src_id, &sendable.message);
71 | //! inboxes.iter_mut().for_each(|inbox| inbox.push_back((src_id, sendable.message.clone())))
72 | //! }
73 | //! RaftMessageDestination::To(dst_id) => {
74 | //! println!("peer {} -> peer {}: {}", src_id, dst_id, &sendable.message);
75 | //! inboxes[dst_id].push_back((src_id, sendable.message));
76 | //! }
77 | //! }
78 | //! };
79 | //!
80 | //! // Loop until a log entry is committed on all peers
81 | //! let mut appended = false;
82 | //! let mut peers_committed = vec![false; peers.len()];
83 | //! while !peers_committed.iter().all(|seen| *seen) {
84 | //! for (peer_id, peer) in peers.iter_mut().enumerate() {
85 | //! // Tick the timer
86 | //! let new_messages = peer.timer_tick();
87 | //! new_messages.for_each(|message| send_message(peer_id, message, &mut inboxes));
88 | //!
89 | //! // Append a log entry on the leader
90 | //! if !appended && peer.is_leader() {
91 | //! if let Ok(new_messages) = peer.append("Hello world!") {
92 | //! new_messages.for_each(|message| send_message(peer_id, message, &mut inboxes));
93 | //! appended = true;
94 | //! }
95 | //! }
96 | //!
97 | //! // Process message inbox
98 | //! while let Some((src_id, message)) = inboxes[peer_id].pop_front() {
99 | //! let new_messages = peer.receive(message, src_id);
100 | //! new_messages.for_each(|message| send_message(peer_id, message, &mut inboxes));
101 | //! }
102 | //!
103 | //! // Check for committed log entries
104 | //! for log_entry in peer.take_committed() {
105 | //! if !log_entry.data.is_empty() {
106 | //! println!("peer {} saw commit {}", peer_id, str::from_utf8(&log_entry.data).unwrap());
107 | //! assert!(!peers_committed[peer_id]);
108 | //! peers_committed[peer_id] = true;
109 | //! }
110 | //! }
111 | //! }
112 | //! }
113 | //! ```
114 | //!
115 | //! ["Appending entries to the distributed log"]: node::RaftNode#appending-entries-to-the-distributed-log
116 |
117 | #![no_std]
118 |
119 | #![allow(unused_parens)]
120 | #![warn(missing_docs)]
121 |
122 | extern crate alloc;
123 |
124 | #[macro_use]
125 | mod macros;
126 |
127 | pub mod core;
128 | pub mod log;
129 | pub mod message;
130 | pub mod node;
131 | mod prelude;
132 |
--------------------------------------------------------------------------------
/src/log.rs:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2019 Open Whisper Systems
3 | * Copyright (C) 2021 jessa0
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with this program. If not, see .
17 | */
18 |
19 | //! Types related to Raft log storage.
20 | //!
21 | //! Raft requires a backing storage for entries of its distributed log as they are being replicated to and from other
22 | //! nodes. The [`RaftLog`] trait is implemented for that purpose, and the implementation is supplied to
23 | //! [`RaftNode`](crate::node::RaftNode).
24 |
25 | #[cfg(any(feature = "test", test))]
26 | #[macro_use]
27 | pub mod tests;
28 | pub mod mem;
29 |
30 | use core::iter;
31 | use crate::message::{LogEntry, LogIndex, TermId};
32 |
33 | /// An interface for storage of the Raft log of a [`RaftNode`](crate::node::RaftNode).
34 | ///
35 | /// # Initial state
36 | ///
37 | /// A Raft log is initialized as empty, with both [`prev_index`] and [`last_index`] returning
38 | /// [`LogIndex::default()`](crate::message::LogIndex::default). The index of the first appended log entry is `1` and all
39 | /// indices are contiguous.
40 | ///
41 | /// # Log truncation
42 | ///
43 | /// A Raft log of bounded size may discard old entries previously taken from the beginning of the log via [`take_next`]
44 | /// if, for example, it runs out of space. However, the term of the last discarded entry is preserved to be returned
45 | /// from [`prev_term`] if requested. The log can also be truncated explicitly from the end via [`cancel_from`].
46 | ///
47 | /// [`append`]: Self::append
48 | /// [`cancel_from`]: Self::cancel_from
49 | /// [`last_index`]: Self::last_index
50 | /// [`prev_index`]: Self::prev_index
51 | /// [`prev_term`]: Self::prev_term
52 | /// [`take_next`]: Self::take_next
53 | pub trait RaftLog {
54 | /// The type of error returned by fallable operations.
55 | type Error;
56 |
57 | /// Appends an entry to the end of the log.
58 | ///
59 | /// # Errors
60 | ///
61 | /// If there was any error modifying the log, an error is returned.
62 | fn append(&mut self, entry: LogEntry) -> Result<(), Self::Error>;
63 |
64 | /// Cancels all entries including and after the entry at index `from_index`, removing them from the log. Returns the
65 | /// number of entries removed.
66 | ///
67 | /// # Errors
68 | ///
69 | /// If there was any error modifying the log, or if the entries did not exist, an error is returned.
70 | fn cancel_from(&mut self, from_index: LogIndex) -> Result;
71 |
72 | /// Returns the approximate serialized length in bytes of a given log entry.
73 | fn entry_len(&self, entry: &LogEntry) -> usize;
74 |
75 | /// Returns the entry at a given index, or `None` if the index is greater than the length of the log or if the entry
76 | /// has been discarded.
77 | fn get(&mut self, index: LogIndex) -> Option;
78 |
79 | /// Returns the term of the entry at a given index, or `None` if the index is greater than the length of the log or
80 | /// if the entry has been discarded.
81 | fn get_term(&mut self, index: LogIndex) -> Option;
82 |
83 | /// Returns the approximate serialized length of the entry at a given index, or `None` if the index is greater than
84 | /// the length of the log or if the entry has been discarded.
85 | fn get_len(&mut self, index: LogIndex) -> Option {
86 | self.get(index).map(|entry: LogEntry| self.entry_len(&entry))
87 | }
88 |
89 | /// Returns the index of the last entry which has been returned by [`take_next`], or
90 | /// [`LogIndex::default()`](crate::message::LogIndex::default) if none have been.
91 | ///
92 | /// [`take_next`]: Self::take_next
93 | /// [`LogEntry`]: crate::message::LogEntry
94 | fn last_taken_index(&self) -> LogIndex;
95 |
96 | /// Returns the index of the last entry in the log, or [`LogIndex::default()`](crate::message::LogIndex::default) if
97 | /// empty.
98 | fn last_index(&self) -> LogIndex;
99 |
100 | /// Returns the term of the last entry in the log, or [`TermId::default()`](crate::message::TermId::default) if
101 | /// empty.
102 | fn last_term(&self) -> TermId;
103 |
104 | /// Returns the index immediately before the index of the first undiscarded entry in the log (see ["Log
105 | /// Truncation"](RaftLog#log-truncation)).
106 | fn prev_index(&self) -> LogIndex;
107 |
108 | /// Returns the term of the entry immediately preceding the first undiscarded entry in the log (see ["Log
109 | /// Truncation"](RaftLog#log-truncation)).
110 | fn prev_term(&self) -> TermId;
111 |
112 | /// Returns the next entry in the log not previously returned by this function, marking the returned entry eligible
113 | /// for future discard (see ["Log Truncation"](RaftLog#log-truncation)). Returns `None` if there is no such entry.
114 | fn take_next(&mut self) -> Option;
115 | }
116 |
117 | pub(crate) struct RaftLogState {
118 | log: Log,
119 | pub commit_idx: LogIndex,
120 | }
121 |
122 | /// An iterator yielding committed [log entries][`LogEntry`].
123 | ///
124 | /// A given [`LogEntry`] will be yielded only once over the lifetime of a Raft node.
125 | ///
126 | /// [`LogEntry`]: crate::message::LogEntry
127 | pub struct CommittedIter<'a, Log> {
128 | log: &'a mut RaftLogState,
129 | }
130 |
131 | //
132 | // RaftLogState
133 | //
134 |
135 | impl RaftLogState {
136 | pub fn new(log: Log) -> Self {
137 | Self {
138 | log,
139 | commit_idx: LogIndex::default(),
140 | }
141 | }
142 |
143 | pub fn append(&mut self, entry: LogEntry) -> Result<(), Log::Error> {
144 | self.log.append(entry)
145 | }
146 |
147 | pub fn cancel_from(&mut self, from_index: LogIndex) -> Result {
148 | self.log.cancel_from(from_index)
149 | }
150 |
151 | pub fn entry_len(&self, entry: &LogEntry) -> usize {
152 | self.log.entry_len(entry)
153 | }
154 |
155 | pub fn get(&mut self, index: LogIndex) -> Option {
156 | if index == LogIndex::default() {
157 | None
158 | } else {
159 | self.log.get(index)
160 | }
161 | }
162 |
163 | pub fn get_term(&mut self, index: LogIndex) -> Option {
164 | if index == self.prev_index() {
165 | Some(self.prev_term())
166 | } else if index == LogIndex::default() {
167 | None
168 | } else {
169 | self.log.get_term(index)
170 | }
171 | }
172 |
173 | pub fn get_len(&mut self, index: LogIndex) -> Option {
174 | self.log.get_len(index)
175 | }
176 |
177 | pub fn last_index(&self) -> LogIndex {
178 | self.log.last_index()
179 | }
180 |
181 | pub fn last_term(&self) -> TermId {
182 | self.log.last_term()
183 | }
184 |
185 | pub fn log(&self) -> &Log {
186 | &self.log
187 | }
188 |
189 | pub fn log_mut(&mut self) -> &mut Log {
190 | &mut self.log
191 | }
192 |
193 | pub fn prev_index(&self) -> LogIndex {
194 | self.log.prev_index()
195 | }
196 |
197 | pub fn prev_term(&self) -> TermId {
198 | self.log.prev_term()
199 | }
200 |
201 | pub fn take_committed(&mut self) -> CommittedIter<'_, Log> {
202 | CommittedIter { log: self }
203 | }
204 | }
205 |
206 | //
207 | // CommittedIter impls
208 | //
209 |
210 | impl Iterator for CommittedIter<'_, Log> {
211 | type Item = LogEntry;
212 | fn next(&mut self) -> Option {
213 | if self.log.log.last_taken_index() < self.log.commit_idx {
214 | self.log.log.take_next()
215 | } else {
216 | None
217 | }
218 | }
219 |
220 | fn size_hint(&self) -> (usize, Option) {
221 | let remaining = (self.log.commit_idx.id - self.log.log.last_taken_index().id) as usize;
222 | (remaining, Some(remaining))
223 | }
224 | }
225 |
226 | impl ExactSizeIterator for CommittedIter<'_, Log> {}
227 |
228 | impl iter::FusedIterator for CommittedIter<'_, Log> {}
229 |
--------------------------------------------------------------------------------
/src/log/mem.rs:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2021 jessa0
3 | *
4 | * This program is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Affero General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * This program is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Affero General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU Affero General Public License
15 | * along with this program. If not, see .
16 | */
17 |
18 | //! A naive in-memory implementation of [`RaftLog`](super::RaftLog), primarily for testing.
19 |
20 | use alloc::collections::VecDeque;
21 | use core::convert::{TryFrom, TryInto};
22 | use crate::message::{LogEntry, LogIndex, TermId};
23 | use super::RaftLog;
24 |
25 | /// A naive in-memory implementation of [`RaftLog`](super::RaftLog), primarily for testing.
26 | pub struct RaftLogMemory {
27 | entries: VecDeque,
28 | prev_log_idx: LogIndex,
29 | prev_log_term: TermId,
30 | last_taken: LogIndex,
31 | data_len: usize,
32 | data_capacity: usize,
33 | }
34 |
35 | impl RaftLogMemory {
36 | /// Constructs an empty Raft log with unbounded capacity.
37 | pub fn new_unbounded() -> Self {
38 | Self::with_capacity(0, usize::max_value())
39 | }
40 |
41 | /// Constructs an empty Raft log with bounded capacity.
42 | ///
43 | /// `initial_entries_capacity` specifies how many log entries the Raft log will be able to store without
44 | /// reallocating. `data_capacity` specifies the maximum size of log entry data to store before discarding entries
45 | /// from the beginning of the log.
46 | pub fn with_capacity(initial_entries_capacity: usize, data_capacity: usize) -> Self {
47 | Self {
48 | entries: VecDeque::with_capacity(initial_entries_capacity),
49 | prev_log_idx: LogIndex::default(),
50 | prev_log_term: TermId::default(),
51 | last_taken: LogIndex::default(),
52 | data_len: 0,
53 | data_capacity,
54 | }
55 | }
56 |
57 | fn entry_index(&self, log_idx: LogIndex) -> Option {
58 | log_idx.id
59 | .checked_sub(self.prev_log_idx.id)?
60 | .checked_sub(1)?
61 | .try_into()
62 | .ok()
63 | }
64 |
65 | fn pop_front(&mut self) -> Result<(), ::Error> {
66 | self.entry_index(self.last_taken)
67 | .ok_or(())?;
68 | let prev_log = self.entries.pop_front().ok_or(())?;
69 | self.prev_log_idx = self.prev_log_idx + 1;
70 | self.prev_log_term = prev_log.term;
71 | Ok(())
72 | }
73 | }
74 |
75 | impl RaftLog for RaftLogMemory {
76 | type Error = ();
77 | fn append(&mut self, log_entry: LogEntry) -> Result<(), Self::Error> {
78 | if log_entry.data.len() > self.data_capacity {
79 | return Err(());
80 | }
81 | self.data_len = loop {
82 | match self.data_len.checked_add(log_entry.data.len()) {
83 | Some(new_data_len) if new_data_len <= self.data_capacity =>
84 | break new_data_len,
85 | Some(_) | None => {
86 | self.pop_front()?;
87 | }
88 | }
89 | };
90 | self.entries.push_back(log_entry);
91 | Ok(())
92 | }
93 | fn cancel_from(&mut self, from_log_idx: LogIndex) -> Result {
94 | let from_index = self.entry_index(from_log_idx).ok_or(())?;
95 | match self.entries.len().checked_sub(from_index) {
96 | Some(0) | None =>
97 | Err(()),
98 | Some(cancelled_len) => {
99 | self.entries.truncate(from_index);
100 | Ok(cancelled_len)
101 | }
102 | }
103 | }
104 | fn entry_len(&self, log_entry: &LogEntry) -> usize {
105 | 4 + log_entry.data.len()
106 | }
107 | fn get(&mut self, log_idx: LogIndex) -> Option {
108 | let index = self.entry_index(log_idx)?;
109 | self.entries.get(index).cloned()
110 | }
111 | fn get_term(&mut self, log_idx: LogIndex) -> Option {
112 | if log_idx != self.prev_log_idx {
113 | self.get(log_idx)
114 | .map(|log_entry: LogEntry| log_entry.term)
115 | } else {
116 | Some(self.prev_log_term)
117 | }
118 | }
119 | fn prev_index(&self) -> LogIndex {
120 | self.prev_log_idx
121 | }
122 | fn last_index(&self) -> LogIndex {
123 | let entries_len = u64::try_from(self.entries.len())
124 | .unwrap_or_else(|_| panic!("more than 2^64 log entries"));
125 | self.prev_log_idx + entries_len
126 | }
127 | fn last_taken_index(&self) -> LogIndex {
128 | self.last_taken
129 | }
130 | fn last_term(&self) -> TermId {
131 | self.entries
132 | .iter()
133 | .map(|log_entry: &LogEntry| log_entry.term)
134 | .last()
135 | .unwrap_or(self.prev_log_term)
136 | }
137 | fn prev_term(&self) -> TermId {
138 | self.prev_log_term
139 | }
140 | fn take_next(&mut self) -> Option {
141 | let log_idx = self.last_taken + 1;
142 | let log_entry = self.get(log_idx)?;
143 | self.last_taken = log_idx;
144 | Some(log_entry)
145 | }
146 | }
147 |
148 | #[cfg(test)]
149 | mod test {
150 | use crate::raft_log_tests;
151 | use super::*;
152 |
153 | raft_log_tests!(RaftLogMemory, RaftLogMemory::new_unbounded());
154 | }
155 |
--------------------------------------------------------------------------------
/src/log/tests.rs:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2021 jessa0
3 | *
4 | * This program is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Affero General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * This program is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Affero General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU Affero General Public License
15 | * along with this program. If not, see .
16 | */
17 |
18 | use bytes::Bytes;
19 | use crate::message::{LogEntry, LogIndex, TermId};
20 | use super::RaftLog;
21 |
22 | /// Defines test functions for a type implementing RaftLog.
23 | #[macro_export]
24 | macro_rules! raft_log_tests {
25 | ($ty:ty, $new:expr) => {
26 | $crate::raft_log_test! { $ty, $new, test_log_empty }
27 | $crate::raft_log_test! { $ty, $new, test_log_append }
28 | $crate::raft_log_test! { $ty, $new, test_log_cancel_from }
29 | };
30 | }
31 |
32 | /// Defines a given test function for a type implementing RaftLog.
33 | #[macro_export]
34 | macro_rules! raft_log_test {
35 | ($ty:ty, $new:expr, $test:ident) => {
36 | #[test]
37 | fn $test() {
38 | let mut log: $ty = $new;
39 | $crate::log::tests::$test(&mut log);
40 | }
41 | }
42 | }
43 |
44 | pub fn test_log_empty(log: &mut Log) {
45 | verify_log(log, &[], LogIndex::default(), LogIndex::default());
46 | }
47 |
48 | pub fn test_log_append(log: &mut Log) {
49 | let entries = test_entries();
50 | for (index, entry) in entries.iter().cloned().enumerate() {
51 | log.append(entry).unwrap_or_else(|_| panic!());
52 | verify_log(log, &entries, LogIndex::default(), LogIndex { id: 1 + index as u64 });
53 | }
54 |
55 | }
56 |
57 | pub fn test_log_cancel_from(log: &mut Log) {
58 | let entries = append_test_entries(log);
59 | for &truncate_len in &[1, 2, 1] {
60 | let last_log_idx = log.last_index();
61 | log.cancel_from(last_log_idx + 2).unwrap_err();
62 | log.cancel_from(last_log_idx + 1).unwrap_err();
63 | verify_log(log, &entries, LogIndex::default(), last_log_idx);
64 | assert_eq!(log.cancel_from(last_log_idx + 1 - truncate_len).map_err(drop), Ok(truncate_len as usize));
65 | verify_log(log, &entries, LogIndex::default(), last_log_idx - truncate_len);
66 | }
67 | log.cancel_from(log.last_index() + 2).unwrap_err();
68 | log.cancel_from(log.last_index() + 1).unwrap_err();
69 | }
70 |
71 | //
72 | // internal
73 | //
74 |
75 | fn test_entries() -> [LogEntry; 5] {
76 | [
77 | LogEntry { term: TermId { id: 1 }, data: Bytes::from_static(&[]) },
78 | LogEntry { term: TermId { id: 1 }, data: Bytes::from_static(&[2; 1]) },
79 | LogEntry { term: TermId { id: 2 }, data: Bytes::from_static(&[3; 2]) },
80 | LogEntry { term: TermId { id: 9 }, data: Bytes::from_static(&[4; 100]) },
81 | LogEntry { term: TermId { id: u64::max_value() }, data: Bytes::from_static(&[5; 100]) },
82 | ]
83 | }
84 |
85 | fn append_test_entries(log: &mut Log) -> [LogEntry; 5] {
86 | let entries = test_entries();
87 | entries.iter().cloned().for_each(|entry| log.append(entry).unwrap_or_else(|_| panic!()));
88 | entries
89 | }
90 |
91 | fn verify_log(log: &mut Log, entries: &[LogEntry], prev_log_idx: LogIndex, last_log_idx: LogIndex) {
92 | assert_eq!(log.prev_index(), prev_log_idx);
93 |
94 | assert_eq!(log.get(LogIndex::default()), None);
95 | assert_eq!(log.get_len(LogIndex::default()), None);
96 |
97 | assert_eq!(log.get(prev_log_idx), None);
98 | assert_eq!(log.get_term(prev_log_idx), Some(prev_log_idx.id.checked_sub(1).map(|index| entries[index as usize].term).unwrap_or_default()));
99 | assert_eq!(log.get_len(prev_log_idx), None);
100 |
101 | assert_eq!(log.last_index(), last_log_idx);
102 | assert_eq!(log.last_term(), log.last_index().id.checked_sub(1).map(|index| entries[index as usize].term).unwrap_or_default());
103 |
104 | verify_entries(entries, prev_log_idx, last_log_idx, |log_idx, entry| {
105 | assert_eq!(log.get(log_idx).as_ref(), entry);
106 | assert_eq!(log.get_term(log_idx), entry.map(|entry| entry.term));
107 | assert_eq!(log.get_len(log_idx), entry.map(|entry| log.entry_len(&entry)));
108 | });
109 | }
110 |
111 | fn verify_entries(entries: &[LogEntry], prev_log_idx: LogIndex, last_log_idx: LogIndex, mut fun: F)
112 | where F: FnMut(LogIndex, Option<&LogEntry>),
113 | {
114 | for log_index in 0..prev_log_idx.id {
115 | fun(LogIndex { id: log_index }, None);
116 | }
117 | for entry_index in prev_log_idx.id..last_log_idx.id {
118 | fun(LogIndex { id: 1 + entry_index }, Some(&entries[entry_index as usize]));
119 | }
120 | for entry_index in last_log_idx.id..=entries.len() as u64 {
121 | fun(LogIndex { id: 1 + entry_index }, None);
122 | }
123 | }
124 |
--------------------------------------------------------------------------------
/src/macros.rs:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2019 Open Whisper Systems
3 | * Copyright (C) 2021 jessa0
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with this program. If not, see .
17 | */
18 |
19 | macro_rules! assert_true {
20 | ($($arg:tt)*) => ({
21 | assert!($($arg)+);
22 | true
23 | });
24 | }
25 |
26 | macro_rules! assert_match {
27 | ($pat:pat = $expr:expr) => ({
28 | if let $pat = $expr {
29 | true
30 | } else {
31 | panic!("assertion failed: `$pat = $expr`")
32 | }
33 | });
34 | ($pat:pat = $expr:expr, $($arg:tt)*) => ({
35 | if let $pat = $expr {
36 | true
37 | } else {
38 | panic!("assertion failed: `$pat = $expr`: {}", format_args!($($arg)+))
39 | }
40 | });
41 | }
42 |
43 | macro_rules! verbose {
44 | ($($arg:tt)*) => (
45 | log::debug!($($arg)*)
46 | );
47 | }
48 |
--------------------------------------------------------------------------------
/src/message.rs:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2019 Open Whisper Systems
3 | *
4 | * This program is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Affero General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * This program is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Affero General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU Affero General Public License
15 | * along with this program. If not, see .
16 | */
17 |
18 | //! Raft message types for sending between nodes.
19 | //!
20 | //! This module provides data types for messages to be sent between Raft nodes. The top-level message type is
21 | //! [`RaftMessage`]. Protobuf-based serialization of all types in this module is provided through the `prost` crate if
22 | //! the corresponding feature is enabled.
23 |
24 | use bytes::Bytes;
25 | use core::cmp::Ordering;
26 | use core::fmt;
27 | use core::ops::{Add, AddAssign, Sub};
28 | use crate::prelude::*;
29 |
30 | /// A [`RaftMessage`] to be sent to a destination.
31 | pub struct SendableRaftMessage {
32 | /// The message to be sent.
33 | pub message: RaftMessage,
34 |
35 | /// The destination for the message.
36 | pub dest: RaftMessageDestination,
37 | }
38 |
39 | /// The destination for a [`SendableRaftMessage`].
40 | pub enum RaftMessageDestination {
41 | /// The associated message should be sent to all known peers.
42 | Broadcast,
43 | /// The associated message should be sent to one particular peer.
44 | To(NodeId),
45 | }
46 |
47 | /// A message sent between Raft nodes.
48 | #[derive(Clone, PartialEq)]
49 | #[cfg_attr(feature = "prost", derive(prost::Message))]
50 | #[cfg_attr(not(feature = "prost"), derive(Debug, Default))]
51 | pub struct RaftMessage {
52 | /// The greatest Raft leadership term ID seen by the sender.
53 | #[cfg_attr(feature = "prost", prost(message, required, tag="2"))]
54 | pub term: TermId,
55 |
56 | /// The Remote Procedure Call contained by this message.
57 | ///
58 | /// This field is only optional in order to support protobuf serialization.
59 | #[cfg_attr(feature = "prost", prost(oneof="Rpc", tags="3, 4, 5, 6"))]
60 | pub rpc: Option,
61 | }
62 |
63 | /// A Remote Procedure Call message to a Raft node.
64 | #[derive(Clone, PartialEq)]
65 | #[cfg_attr(feature = "prost", derive(prost::Oneof))]
66 | #[cfg_attr(not(feature = "prost"), derive(Debug))]
67 | pub enum Rpc {
68 | /// A request to obtain leadership amongst Raft nodes.
69 | #[cfg_attr(feature = "prost", prost(message, tag="3"))]
70 | VoteRequest(VoteRequest),
71 |
72 | /// A response to a [`VoteRequest`] granting or denying leadership.
73 | #[cfg_attr(feature = "prost", prost(message, tag="4"))]
74 | VoteResponse(VoteResponse),
75 |
76 | /// A request to append entries to a Raft node's log.
77 | #[cfg_attr(feature = "prost", prost(message, tag="5"))]
78 | AppendRequest(AppendRequest),
79 |
80 | /// A response to an [`AppendRequest`] allowing or denying an append to the Raft node's log.
81 | #[cfg_attr(feature = "prost", prost(message, tag="6"))]
82 | AppendResponse(AppendResponse),
83 | }
84 |
85 | /// A request to obtain leadership amongst Raft nodes.
86 | #[derive(Clone, PartialEq)]
87 | #[cfg_attr(feature = "prost", derive(prost::Message))]
88 | #[cfg_attr(not(feature = "prost"), derive(Debug, Default))]
89 | pub struct VoteRequest {
90 | /// The Raft log index of the last log entry stored by this node.
91 | #[cfg_attr(feature = "prost", prost(message, required, tag="2"))]
92 | pub last_log_idx: LogIndex,
93 |
94 | /// The Raft leadership term of the last log entry stored by this node.
95 | #[cfg_attr(feature = "prost", prost(message, required, tag="3"))]
96 | pub last_log_term: TermId,
97 | }
98 |
99 | /// The response to a [`VoteRequest`] granting or denying leadership.
100 | #[derive(Clone, PartialEq)]
101 | #[cfg_attr(feature = "prost", derive(prost::Message))]
102 | #[cfg_attr(not(feature = "prost"), derive(Debug, Default))]
103 | pub struct VoteResponse {
104 | /// Whether the [`VoteRequest`] was granted or not.
105 | #[cfg_attr(feature = "prost", prost(bool, required, tag="2"))]
106 | pub vote_granted: bool,
107 | }
108 |
109 | /// A request to append entries to a Raft node's log.
110 | #[derive(Clone, PartialEq)]
111 | #[cfg_attr(feature = "prost", derive(prost::Message))]
112 | #[cfg_attr(not(feature = "prost"), derive(Debug, Default))]
113 | pub struct AppendRequest {
114 | /// The Raft log index immediately before the index of the first entry in [`entries`](Self::entries).
115 | #[cfg_attr(feature = "prost", prost(message, required, tag="1"))]
116 | pub prev_log_idx: LogIndex,
117 |
118 | /// The Raft leadership term of the log entry immediately before the first entry in [`entries`](Self::entries).
119 | #[cfg_attr(feature = "prost", prost(message, required, tag="2"))]
120 | pub prev_log_term: TermId,
121 |
122 | /// The Raft log index of the last log entry known by the requester to be committed.
123 | #[cfg_attr(feature = "prost", prost(message, required, tag="3"))]
124 | pub leader_commit: LogIndex,
125 |
126 | /// A list of consecutive Raft log entries to append.
127 | #[cfg_attr(feature = "prost", prost(message, repeated, tag="4"))]
128 | pub entries: Vec,
129 | }
130 |
131 | /// The response to an [`AppendRequest`] allowing or denying an append to the Raft node's log.
132 | #[derive(Clone, PartialEq)]
133 | #[cfg_attr(feature = "prost", derive(prost::Message))]
134 | #[cfg_attr(not(feature = "prost"), derive(Debug, Default))]
135 | pub struct AppendResponse {
136 | /// Whether the [`AppendRequest`] was granted or not.
137 | #[cfg_attr(feature = "prost", prost(bool, required, tag="1"))]
138 | pub success: bool,
139 |
140 | /// The Raft log index of the last log entry up to which the responder's log is known to match the requester's log.
141 | #[cfg_attr(feature = "prost", prost(message, required, tag="2"))]
142 | pub match_idx: LogIndex,
143 |
144 | /// The Raft log index of the last log entry in the responder's log.
145 | #[cfg_attr(feature = "prost", prost(message, required, tag="3"))]
146 | pub last_log_idx: LogIndex,
147 | }
148 |
149 | /// An entry in a [Raft log][crate::log::RaftLog].
150 | #[derive(Clone, PartialEq)]
151 | #[cfg_attr(feature = "prost", derive(prost::Message))]
152 | #[cfg_attr(not(feature = "prost"), derive(Debug, Default))]
153 | pub struct LogEntry {
154 | /// The term of leadership of the node which appended this log entry.
155 | #[cfg_attr(feature = "prost", prost(message, required, tag="1"))]
156 | pub term: TermId,
157 |
158 | /// Arbitrary data associated with the log entry.
159 | #[cfg_attr(feature = "prost", prost(bytes="vec", required, tag="2"))]
160 | pub data: Bytes,
161 | }
162 |
163 | /// The unique, monotonically-increasing ID for a term of Raft group leadership.
164 | #[derive(Clone, PartialEq)]
165 | #[cfg_attr(feature = "prost", derive(prost::Message))]
166 | #[cfg_attr(not(feature = "prost"), derive(Debug, Default))]
167 | pub struct TermId {
168 | /// The non-negative integer assigned to this term.
169 | #[cfg_attr(feature = "prost", prost(uint64, required, tag="1"))]
170 | pub id: u64,
171 | }
172 |
173 | /// A 1-based index into a [Raft log][crate::log::RaftLog].
174 | #[derive(Clone, PartialEq)]
175 | #[cfg_attr(feature = "prost", derive(prost::Message))]
176 | #[cfg_attr(not(feature = "prost"), derive(Debug, Default))]
177 | pub struct LogIndex {
178 | /// The integer representing this log index.
179 | #[cfg_attr(feature = "prost", prost(uint64, required, tag="1"))]
180 | pub id: u64,
181 | }
182 |
183 | //
184 | // RaftMessage impls
185 | //
186 |
187 | impl fmt::Display for RaftMessage {
188 | fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
189 | let Self { term, rpc } = self;
190 | let mut debug = fmt.debug_tuple("");
191 | debug.field(&format_args!("{}", term));
192 | if let Some(rpc) = rpc {
193 | debug.field(&format_args!("{}", rpc));
194 | } else {
195 | debug.field(&"None");
196 | }
197 | debug.finish()
198 | }
199 | }
200 |
201 | //
202 | // Rpc impls
203 | //
204 |
205 | impl fmt::Display for Rpc {
206 | fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
207 | match &self {
208 | Rpc::VoteRequest(msg) => fmt::Display::fmt(msg, fmt),
209 | Rpc::VoteResponse(msg) => fmt::Display::fmt(msg, fmt),
210 | Rpc::AppendRequest(msg) => fmt::Display::fmt(msg, fmt),
211 | Rpc::AppendResponse(msg) => fmt::Display::fmt(msg, fmt),
212 | }
213 | }
214 | }
215 |
216 | //
217 | // VoteRequest impls
218 | //
219 |
220 | impl fmt::Display for VoteRequest {
221 | fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
222 | let Self { last_log_idx, last_log_term } = self;
223 | fmt.debug_struct("VoteRequest")
224 | .field("last_log_idx", &format_args!("{}", last_log_idx))
225 | .field("last_log_term", &format_args!("{}", last_log_term))
226 | .finish()
227 | }
228 | }
229 |
230 | //
231 | // VoteResponse impls
232 | //
233 |
234 | impl fmt::Display for VoteResponse {
235 | fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
236 | let Self { vote_granted } = self;
237 | fmt.debug_struct("VoteResponse")
238 | .field("vote_granted", vote_granted)
239 | .finish()
240 | }
241 | }
242 |
243 | //
244 | // AppendRequest impls
245 | //
246 |
247 | impl fmt::Display for AppendRequest {
248 | fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
249 | let Self { prev_log_idx, prev_log_term, leader_commit, entries } = self;
250 | fmt.debug_struct("AppendRequest")
251 | .field("prev_log_idx", &format_args!("{}", prev_log_idx))
252 | .field("prev_log_term", &format_args!("{}", prev_log_term))
253 | .field("leader_commit", &format_args!("{}", leader_commit))
254 | .field("entries", &entries.len())
255 | .finish()
256 | }
257 | }
258 |
259 | //
260 | // AppendResponse impls
261 | //
262 |
263 | impl fmt::Display for AppendResponse {
264 | fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
265 | let Self { success, match_idx, last_log_idx } = self;
266 | fmt.debug_struct("AppendResponse")
267 | .field("success", &success)
268 | .field("match_idx", &format_args!("{}", match_idx))
269 | .field("last_log_idx", &format_args!("{}", last_log_idx))
270 | .finish()
271 | }
272 | }
273 |
274 |
275 | //
276 | // TermId impls
277 | //
278 |
279 | impl fmt::Display for TermId {
280 | fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
281 | let Self { id } = self;
282 | fmt.debug_tuple("TermId")
283 | .field(id)
284 | .finish()
285 | }
286 | }
287 |
288 | impl Copy for TermId {}
289 | impl Eq for TermId {}
290 | impl PartialOrd for TermId {
291 | fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) }
292 | }
293 | impl Ord for TermId {
294 | fn cmp(&self, other: &Self) -> Ordering { self.id.cmp(&other.id) }
295 | }
296 | impl AddAssign for TermId {
297 | fn add_assign(&mut self, rhs: u64) {
298 | self.id = self.id.checked_add(rhs).unwrap_or_else(|| panic!("overflow"));
299 | }
300 | }
301 |
302 | //
303 | // LogIndex impls
304 | //
305 |
306 | impl LogIndex {
307 | /// Subtraction with a non-negative integer, checking for overflow. Returns `self - dec`, or `None` if an overflow
308 | /// occurred.
309 | pub fn checked_sub(self, dec: u64) -> Option {
310 | if let Some(id) = self.id.checked_sub(dec) {
311 | Some(Self { id })
312 | } else {
313 | None
314 | }
315 | }
316 | }
317 |
318 | impl fmt::Display for LogIndex {
319 | fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
320 | let Self { id } = self;
321 | fmt.debug_tuple("LogIdx")
322 | .field(id)
323 | .finish()
324 | }
325 | }
326 |
327 | impl Copy for LogIndex {}
328 | impl Eq for LogIndex {}
329 | impl PartialOrd for LogIndex {
330 | fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) }
331 | }
332 | impl Ord for LogIndex {
333 | fn cmp(&self, other: &Self) -> Ordering { self.id.cmp(&other.id) }
334 | }
335 | impl Add for LogIndex {
336 | type Output = Self;
337 | fn add(self, inc: u64) -> Self {
338 | Self { id: self.id.checked_add(inc).unwrap_or_else(|| panic!("overflow")) }
339 | }
340 | }
341 | impl Sub for LogIndex {
342 | type Output = Self;
343 | fn sub(self, dec: u64) -> Self {
344 | Self { id: self.id.saturating_sub(dec) }
345 | }
346 | }
347 |
--------------------------------------------------------------------------------
/src/node.rs:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2019 Open Whisper Systems
3 | * Copyright (C) 2021 jessa0
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with this program. If not, see .
17 | */
18 |
19 | //! Higher-level API for a Raft node.
20 |
21 | use alloc::collections::BTreeSet;
22 | use bytes::Bytes;
23 | use core::fmt::Display;
24 | use crate::core::{RaftState, ReplicationState};
25 | use crate::message::{LogIndex, RaftMessage, SendableRaftMessage, TermId};
26 | use crate::log::{CommittedIter, RaftLog};
27 | use rand_core::RngCore;
28 |
29 | /// A Raft node, used for replicating a strongly-consistent distributed log of entries with arbitrary data amongst its
30 | /// peers.
31 | ///
32 | /// The distributed log can be used, for example, to replicate transactions in a database.
33 | ///
34 | /// # Appending entries to the distributed log
35 | ///
36 | /// Log entries passed to [`append`] are not guaranteed to ultimately be appended to the distributed log, and may be
37 | /// cancelled any time [`receive`] is called before they are "committed". The provided [`RaftLog`] should provide an API
38 | /// to find out which log entries have been cancelled. Only log entries passed to [`append`] on a particular node are
39 | /// guaranteed to appear as cancelled in its own [`RaftLog`], but entries appended on other nodes may appear as well.
40 | ///
41 | /// The distributed log may only be appended to by the node returned by [`leader`], but even that node is not guaranteed
42 | /// to be able to append to the log, since it must be able to send each new entry to a majority of its peers before
43 | /// losing leadership in order for the entry to become committed. The leader may change at any time, and therefore an
44 | /// entry may be first returned from [`take_committed`] on a node different than that to which it was submitted.
45 | /// However, [`take_committed`] is guaranteed to return the same entries in the same order on every node.
46 | ///
47 | /// # Timer ticks
48 | ///
49 | /// Timeouts in [`RaftNode`] are driven by a timer ticking at fixed interval, with the number of ticks between timeouts
50 | /// configured by the provided [`RaftConfig`]. Any consistent time interval between ticks may be chosen, but the time
51 | /// interval and [`RaftConfig`] must be the same on all peers in a group. Shorter timeouts will allow Raft to react
52 | /// quicker to network disruptions, but may result in spurious leadership changes when the network latency exceeds
53 | /// `time_interval * election_timeout_ticks`.
54 | ///
55 | /// # Message delivery
56 | ///
57 | /// Unicast message delivery is assumed to be non-lossy in order for replication to make progress. In other words, once
58 | /// a non-broadcast [`SendableRaftMessage`] is returned from an API such as [`append`], [`receive`], or [`timer_tick`],
59 | /// it must be retained and retransmitted until it is confirmed to have been processed by [`receive`] on its
60 | /// destination. Messages may be safely delivered out-of-order or more than once, however.
61 | ///
62 | /// To prevent unbounded queueing, the API is designed to only ever return a bounded amount of unacknowledged unicast
63 | /// message data. This amount can be approximately controlled by [`replication_chunk_size`].
64 | ///
65 | /// [`append`]: Self::append
66 | /// [`leader`]: Self::leader
67 | /// [`receive`]: Self::receive
68 | /// [`replication_chunk_size`]: RaftConfig::replication_chunk_size
69 | /// [`SendableRaftMessage`]: crate::message::SendableRaftMessage
70 | /// [`take_committed`]: Self::take_committed
71 | /// [`timer_tick`]: Self::timer_tick
72 | pub struct RaftNode {
73 | state: RaftState,
74 | }
75 |
76 | /// Configurable parameters of a Raft node.
77 | #[derive(Clone, Eq, PartialEq)]
78 | pub struct RaftConfig {
79 | /// The minimum number of timer ticks between leadership elections.
80 | pub election_timeout_ticks: u32,
81 |
82 | /// The number of timer ticks between sending heartbeats to peers.
83 | pub heartbeat_interval_ticks: u32,
84 |
85 | /// The maximum number of bytes to replicate to a peer at a time.
86 | pub replication_chunk_size: usize,
87 | }
88 |
89 | /// An error returned while attempting to append to a Raft log.
90 | pub enum AppendError {
91 | /// The append to the Raft log was cancelled and should be resubmitted to the current Raft leader.
92 | Cancelled {
93 | /// Arbitrary data associated with the log entry.
94 | data: Bytes,
95 | },
96 | /// An error was returned by the [`RaftLog`](crate::log::RaftLog) implementation.
97 | RaftLogErr(E),
98 | }
99 |
100 | impl RaftNode
101 | where Log: RaftLog,
102 | Random: RngCore,
103 | NodeId: Ord + Clone + Display,
104 | {
105 | /// Constructs a new Raft node with specified peers and configuration.
106 | ///
107 | /// The Raft node will start with an empty initial state. The `log` provided should also be in an empty initial
108 | /// state. Each Raft node in a group must be constructed with the same set of peers and `config`. `peers` may
109 | /// contain `node_id` or omit it to the same effect. `rand` must produce different values on every node in a group.
110 | pub fn new(
111 | node_id: NodeId,
112 | peers: BTreeSet,
113 | log: Log,
114 | random: Random,
115 | config: RaftConfig,
116 | ) -> Self {
117 | Self {
118 | state: RaftState::new(
119 | node_id,
120 | peers,
121 | log,
122 | random,
123 | config,
124 | ),
125 | }
126 | }
127 |
128 |
129 | /// Request appending an entry with arbitrary `data` to the Raft log, returning messages to be sent.
130 | ///
131 | /// See ["Message delivery"] for details about delivery requirements for the returned messages.
132 | ///
133 | /// # Errors
134 | ///
135 | /// If this request would immediately be cancelled, then an error is returned.
136 | ///
137 | /// ["Message delivery"]: RaftNode#message-delivery
138 | #[must_use = "This function returns Raft messages to be sent."]
139 | pub fn append>(&mut self, data: T) -> Result> + '_, AppendError> {
140 | let () = self.state.client_request(data.into())?;
141 | Ok(self.append_entries())
142 | }
143 |
144 | /// Returns this node's configurable parameters.
145 | pub fn config(&self) -> &RaftConfig {
146 | self.state.config()
147 | }
148 |
149 | /// Returns whether this node is the leader of the latest known term.
150 | pub fn is_leader(&self) -> bool {
151 | self.state.is_leader()
152 | }
153 |
154 | /// Returns the index of the last [`LogEntry`] which has been committed and thus may be returned by
155 | /// [`take_committed`].
156 | ///
157 | /// [`take_committed`]: Self::take_committed
158 | /// [`LogEntry`]: crate::message::LogEntry
159 | pub fn last_committed_log_index(&self) -> LogIndex {
160 | *self.state.commit_idx()
161 | }
162 |
163 | /// Returns the ID of the leader, if there is one, of the latest known term, along with the term.
164 | pub fn leader(&self) -> (Option<&NodeId>, TermId) {
165 | let (leader, term) = self.state.leader();
166 | (leader, *term)
167 | }
168 |
169 | /// Returns a reference to the Raft log storage.
170 | pub fn log(&self) -> &Log {
171 | self.state.log()
172 | }
173 |
174 | /// Returns a mutable reference to the Raft log storage.
175 | pub fn log_mut(&mut self) -> &mut Log {
176 | self.state.log_mut()
177 | }
178 |
179 | /// Returns this node's ID.
180 | pub fn node_id(&self) -> &NodeId {
181 | self.state.node_id()
182 | }
183 |
184 | /// Returns the IDs of this node's peers.
185 | pub fn peers(&self) -> &BTreeSet {
186 | self.state.peers()
187 | }
188 |
189 | /// Processes receipt of a `message` from a peer with ID `from`, returning messages to be sent.
190 | ///
191 | /// See ["Message delivery"] for details about delivery requirements for the returned messages.
192 | ///
193 | /// ["Message delivery"]: RaftNode#message-delivery
194 | #[must_use = "This function returns Raft messages to be sent."]
195 | pub fn receive(
196 | &mut self,
197 | message: RaftMessage,
198 | from: NodeId,
199 | ) -> impl Iterator- > + '_ {
200 | let message = self.state.receive(message, from);
201 | message.into_iter().chain(self.append_entries())
202 | }
203 |
204 | /// Returns the replication state corresponding to the peer with ID `peer_node_id`.
205 | pub fn replication_state(&self, peer_node_id: &NodeId) -> Option<&ReplicationState> {
206 | self.state.replication_state(peer_node_id)
207 | }
208 |
209 | /// Returns a reference to the low-level state of the Raft node.
210 | pub fn state(&mut self) -> &RaftState {
211 | &self.state
212 | }
213 |
214 | /// Returns a mutable reference to the low-level state of the Raft node.
215 | pub fn state_mut(&mut self) -> &mut RaftState {
216 | &mut self.state
217 | }
218 |
219 | /// Returns an iterator yielding committed [log entries][`LogEntry`]. A given [`LogEntry`] will be yielded only once
220 | /// over the lifetime of a [`RaftNode`]. See ["Appending entries to the distributed log"] for details about log
221 | /// commital.
222 | ///
223 | /// ["Appending entries to the distributed log"]: RaftNode#appending-entries-to-the-distributed-log
224 | /// [`LogEntry`]: crate::message::LogEntry
225 | pub fn take_committed(&mut self) -> CommittedIter<'_, Log> {
226 | self.state.take_committed()
227 | }
228 |
229 | /// Ticks forward this node's internal clock by one tick, returning messages to be sent.
230 | ///
231 | /// See ["Message delivery"] for details about delivery requirements for the returned messages.
232 | ///
233 | /// ["Message delivery"]: RaftNode#message-delivery
234 | #[must_use = "This function returns Raft messages to be sent."]
235 | pub fn timer_tick(&mut self) -> impl Iterator
- > + '_ {
236 | let message = self.state.timer_tick();
237 | message.into_iter().chain(self.append_entries())
238 | }
239 |
240 | #[must_use = "This function returns Raft messages to be sent."]
241 | fn append_entries(
242 | &mut self,
243 | ) -> impl Iterator
- > + '_ {
244 | let peers = self.state.peers().clone().into_iter();
245 | peers.flat_map(move |peer| self.state.append_entries(peer))
246 | }
247 | }
248 |
--------------------------------------------------------------------------------
/src/prelude.rs:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2019 Open Whisper Systems
3 | *
4 | * This program is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Affero General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * This program is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Affero General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU Affero General Public License
15 | * along with this program. If not, see .
16 | */
17 |
18 | pub use alloc::{format, vec};
19 | pub use alloc::borrow::{ToOwned};
20 | pub use alloc::boxed::{Box};
21 | pub use alloc::string::{String, ToString};
22 | pub use alloc::vec::{Vec};
23 |
--------------------------------------------------------------------------------
/src/raft.proto:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2019 Open Whisper Systems
3 | * Copyright (C) 2021 jessa0
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with this program. If not, see .
17 | */
18 |
19 | syntax = "proto2";
20 |
21 | package raft.protobufs;
22 |
23 | message RaftMessage {
24 | required TermId term = 2;
25 | oneof rpc {
26 | VoteRequest vote_request = 3;
27 | VoteResponse vote_response = 4;
28 | AppendRequest append_request = 5;
29 | AppendResponse append_response = 6;
30 | };
31 | }
32 |
33 | message VoteRequest {
34 | required LogIndex last_log_idx = 2;
35 | required TermId last_log_term = 3;
36 | }
37 |
38 | message VoteResponse {
39 | required bool vote_granted = 2;
40 | }
41 |
42 | message AppendRequest {
43 | required LogIndex prev_log_idx = 1;
44 | required TermId prev_log_term = 2;
45 | required LogIndex leader_commit = 3;
46 | repeated LogEntry entries = 4;
47 | }
48 |
49 | message AppendResponse {
50 | required bool success = 1;
51 | required LogIndex match_idx = 2;
52 | required LogIndex last_log_idx = 3;
53 | }
54 |
55 | message LogEntry {
56 | required TermId term = 1;
57 | required bytes data = 2;
58 | }
59 |
60 | message TermId {
61 | required uint64 id = 1;
62 | }
63 |
64 | message LogIndex {
65 | required uint64 id = 1;
66 | }
67 |
--------------------------------------------------------------------------------
/tests/commit.rs:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2021 jessa0
3 | *
4 | * This program is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Affero General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * This program is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Affero General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU Affero General Public License
15 | * along with this program. If not, see .
16 | */
17 |
18 | mod common;
19 |
20 | use common::*;
21 |
22 | #[test]
23 | pub fn _1_commit() {
24 | TestRaftGroup::new(1, &mut init_random(), config())
25 | .run_until(|group| group.has_leader())
26 | .modify(|group| assert!(group.nodes.iter_mut().any(|raft| raft.client_request("one".into()).is_ok())))
27 | .run_until_commit(|commit| { assert_eq!(commit.data, "one"); true });
28 | }
29 |
30 | #[test]
31 | pub fn _2_commit() {
32 | TestRaftGroup::new(2, &mut init_random(), config())
33 | .run_until(|group| group.has_leader())
34 | .modify(|group| assert!(group.nodes.iter_mut().any(|raft| raft.client_request("one".into()).is_ok())))
35 | .run_until_commit(|commit| { assert_eq!(commit.data, "one"); true });
36 | }
37 |
38 | #[test]
39 | pub fn _3_commit() {
40 | TestRaftGroup::new(3, &mut init_random(), config())
41 | .run_until(|group| group.has_leader())
42 | .modify(|group| assert!(group.nodes.iter_mut().any(|raft| raft.client_request("one".into()).is_ok())))
43 | .run_until_commit(|commit| { assert_eq!(commit.data, "one"); true });
44 | }
45 |
46 | #[test]
47 | pub fn commit_leader_change() {
48 | let mut group = TestRaftGroup::new(3, &mut init_random(), config());
49 | group.run_on_node(0, |raft| raft.timeout());
50 | group.run_until(|group| group.nodes[0].is_leader());
51 |
52 | assert!(group.nodes[0].client_request("one".into()).is_ok());
53 | group.config = config().drop_to(0);
54 | group.run_for(1);
55 |
56 | assert!(group.take_committed().all(|commit| commit.data.is_empty()));
57 | group.config = config().isolate(0);
58 | group.run_until_commit(|commit| { assert_eq!(commit.data, "one"); true });
59 | }
60 |
61 | #[test]
62 | pub fn cancel_uncommitted() {
63 | let mut group = TestRaftGroup::new(3, &mut init_random(), config());
64 | group.run_on_node(0, |raft| raft.timeout());
65 | group.run_until(|group| group.nodes[0].is_leader());
66 |
67 | assert!(group.nodes[0].client_request("one".into()).is_ok());
68 | group.config = config().isolate(0);
69 | group.run_until(|group| group.nodes[1..].iter().any(|raft| raft.is_leader()));
70 |
71 | assert!(group.nodes[1..].iter_mut().any(|raft| raft.client_request("two".into()).is_ok()));
72 | group.run_until_commit(|commit| { assert_eq!(commit.data, "two"); true });
73 |
74 | log::info!("committed two");
75 | group.config = config();
76 | group.run_until(|group| group.nodes[0].take_committed().any(|commit| {
77 | if !commit.data.is_empty() {
78 | assert_eq!(commit.data, "two");
79 | true
80 | } else {
81 | false
82 | }
83 | }));
84 | }
85 |
--------------------------------------------------------------------------------
/tests/common.rs:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2021 jessa0
3 | *
4 | * This program is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Affero General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * This program is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Affero General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU Affero General Public License
15 | * along with this program. If not, see .
16 | */
17 |
18 | #![allow(dead_code)]
19 |
20 | use rand_core::{RngCore, SeedableRng};
21 | use rand_chacha::ChaChaRng;
22 | use simple_raft::core::RaftState;
23 | use simple_raft::log::mem::RaftLogMemory;
24 | use simple_raft::message::{LogEntry, RaftMessage, RaftMessageDestination, Rpc, SendableRaftMessage, TermId};
25 | use simple_raft::node::RaftConfig;
26 | use std::cell::RefCell;
27 | use std::collections::{BTreeSet, VecDeque};
28 |
29 | pub const CONFIG: RaftConfig = RaftConfig {
30 | election_timeout_ticks: 10,
31 | heartbeat_interval_ticks: 9,
32 | replication_chunk_size: 1024,
33 | };
34 | const RANDOM_SEED: u64 = 0;
35 | const MAX_TICKS: u32 = 100_000;
36 |
37 | pub type TestRaft = RaftState;
38 |
39 | pub struct TestRaftGroup {
40 | pub nodes: Vec,
41 | pub tick: u32,
42 | pub config: TestRaftGroupConfig,
43 | pub dropped_messages: Vec<(NodeId, SendableRaftMessage)>,
44 | }
45 |
46 | #[derive(Clone, Default)]
47 | pub struct TestRaftGroupConfig {
48 | pub drops: BTreeSet<(Option, Option)>,
49 | pub down: BTreeSet,
50 | }
51 |
52 | #[derive(Clone, Copy, Debug, derive_more::Display, Eq, derive_more::From, PartialEq, PartialOrd, Ord)]
53 | #[display(fmt = "{:?}", self)]
54 | pub struct NodeId(u64);
55 |
56 | pub struct TestLogger;
57 |
58 | pub struct TestLoggerContext {
59 | node_id: Option,
60 | tick: Option,
61 | }
62 |
63 | pub fn rpc_types() -> [Rpc; 4] {
64 | [
65 | Rpc::VoteRequest(Default::default()),
66 | Rpc::VoteResponse(Default::default()),
67 | Rpc::AppendRequest(Default::default()),
68 | Rpc::AppendResponse(Default::default()),
69 | ]
70 | }
71 |
72 | pub fn init_random() -> ChaChaRng {
73 | ChaChaRng::seed_from_u64(RANDOM_SEED)
74 | }
75 |
76 | pub fn raft(node_id: u64, peers: Vec, log: Option, random: &mut impl RngCore) -> TestRaft {
77 | TestLogger::init();
78 | RaftState::new(
79 | NodeId(node_id),
80 | peers.into_iter().map(NodeId).collect(),
81 | log.unwrap_or_else(|| RaftLogMemory::new_unbounded()),
82 | ChaChaRng::seed_from_u64(random.next_u64()),
83 | CONFIG,
84 | )
85 | }
86 |
87 | pub fn config() -> TestRaftGroupConfig {
88 | TestRaftGroupConfig::default()
89 | }
90 |
91 | pub fn send(raft: &mut TestRaft, from: u64, term: TermId, rpc: Rpc) -> Option> {
92 | raft.receive(RaftMessage {
93 | term,
94 | rpc: Some(rpc),
95 | }, NodeId(from))
96 | }
97 |
98 | pub fn append_entries<'a>(node: &'a mut TestRaft, peers: impl IntoIterator
- + 'a) -> impl Iterator
- > + 'a {
99 | let node_id = *node.node_id();
100 | peers.into_iter().flat_map(move |append_to_node_id| {
101 | if append_to_node_id != node_id {
102 | node.append_entries(append_to_node_id)
103 | } else {
104 | None
105 | }
106 | })
107 | }
108 |
109 | pub fn run_group<'a>(
110 | nodes: impl Iterator
- + ExactSizeIterator,
111 | initial_messages: impl IntoIterator
- )>,
112 | start_tick: u32,
113 | ticks: Option,
114 | config: &mut TestRaftGroupConfig,
115 | dropped_messages: &mut Vec<(NodeId, SendableRaftMessage)>,
116 | ) {
117 | let mut nodes: Vec<_> = nodes.collect();
118 | let node_ids: Vec<_> = nodes.iter().map(|node| *node.node_id()).collect();
119 | let mut messages = VecDeque::with_capacity(nodes.len() * nodes.len());
120 | messages.extend(initial_messages.into_iter());
121 | messages.extend(dropped_messages.drain(..));
122 |
123 | for tick in 0..ticks.unwrap_or(1) {
124 | TestLogger::set_tick(Some(start_tick + tick));
125 | if ticks.is_some() {
126 | for node in &mut nodes {
127 | let node_id = *node.node_id();
128 | if !config.is_node_down(node_id) {
129 | TestLogger::set_node_id(Some(node_id));
130 | messages.extend(node.timer_tick().map(|message| (node_id, message)));
131 | messages.extend(append_entries(node, node_ids.iter().cloned()).map(|message| (node_id, message)));
132 | }
133 | }
134 | }
135 |
136 | while let Some((from, sendable)) = messages.pop_front() {
137 | let (reply_to_node_id, to_node_count) = match sendable.dest {
138 | RaftMessageDestination::Broadcast => (None, nodes.len().saturating_sub(1)),
139 | RaftMessageDestination::To(to) => (Some(to), 1),
140 | };
141 | let to_nodes = nodes.iter_mut().filter(|node| match &reply_to_node_id {
142 | Some(to_node_id) => node.node_id() == to_node_id,
143 | None => node.node_id() != &from,
144 | });
145 |
146 | for (to_node, message) in Iterator::zip(to_nodes, itertools::repeat_n(sendable.message, to_node_count)) {
147 | let to_node_id = *to_node.node_id();
148 | TestLogger::set_node_id(Some(to_node_id));
149 | if !config.should_drop(from, to_node_id) {
150 | log::info!("<- {} {}", from, message);
151 | messages.extend(to_node.receive(message, from).map(|message| (to_node_id, message)));
152 | } else {
153 | log::info!("<- {} DROPPED {}", from, message);
154 | if let Some(reply_to_node_id) = reply_to_node_id {
155 | dropped_messages.push((from, SendableRaftMessage { message, dest: RaftMessageDestination::To(reply_to_node_id) }));
156 | }
157 | }
158 | messages.extend(append_entries(to_node, node_ids.iter().cloned()).map(|message| (to_node_id, message)));
159 | }
160 | }
161 | }
162 | TestLogger::set_tick(None);
163 | TestLogger::set_node_id(None);
164 | }
165 |
166 | //
167 | // RaftGroup impls
168 | //
169 |
170 | impl TestRaftGroup {
171 | pub fn new(size: u64, random: &mut impl RngCore, config: TestRaftGroupConfig) -> Self {
172 | let nodes: Vec = (0..size).collect();
173 | Self {
174 | nodes: nodes.iter().map(|node_id| raft(*node_id, nodes.clone(), None, random)).collect(),
175 | tick: 0,
176 | config,
177 | dropped_messages: Default::default(),
178 | }
179 | }
180 |
181 | pub fn run_until(&mut self, mut until_fun: impl FnMut(&mut Self) -> bool) -> &mut Self {
182 | let mut ticks_remaining = MAX_TICKS;
183 | while !until_fun(self) {
184 | ticks_remaining = ticks_remaining.checked_sub(1).expect("condition failed after maximum simulation length");
185 | self.tick += 1;
186 | run_group(self.nodes.iter_mut(), None, self.tick, Some(1), &mut self.config, &mut self.dropped_messages);
187 | }
188 | self
189 | }
190 |
191 | pub fn run_until_commit(&mut self, mut until_fun: impl FnMut(&LogEntry) -> bool) -> &mut Self {
192 | self.run_until(|group| {
193 | let result = group.take_committed().any(|commit| !commit.data.is_empty() && until_fun(&commit));
194 | group.take_committed().for_each(drop);
195 | result
196 | })
197 | }
198 |
199 | pub fn run_for(&mut self, ticks: u32) -> &mut Self {
200 | self.run_for_inspect(ticks, |_| ())
201 | }
202 |
203 | pub fn run_for_inspect(&mut self, ticks: u32, mut fun: impl FnMut(&mut Self)) -> &mut Self {
204 | let mut ticks_remaining = ticks;
205 | while let Some(new_ticks_remaining) = ticks_remaining.checked_sub(1) {
206 | ticks_remaining = new_ticks_remaining;
207 | self.tick += 1;
208 | run_group(self.nodes.iter_mut(), None, self.tick, Some(1), &mut self.config, &mut self.dropped_messages);
209 | fun(self);
210 | }
211 | self
212 | }
213 |
214 | pub fn run_on_all(
215 | &mut self,
216 | mut fun: impl FnMut(&mut TestRaft) -> Option>,
217 | ) -> &mut Self {
218 | let messages = self.nodes.iter_mut().flat_map(|node| fun(node).map(|message| (*node.node_id(), message))).collect::>();
219 | run_group(self.nodes.iter_mut(), messages, self.tick, None, &mut self.config, &mut self.dropped_messages);
220 | self
221 | }
222 |
223 | pub fn run_on_node(
224 | &mut self,
225 | node_idx: usize,
226 | fun: impl FnOnce(&mut TestRaft) -> Option>,
227 | ) -> &mut Self {
228 | let node_id = *self.nodes[node_idx].node_id();
229 | let messages = fun(&mut self.nodes[node_idx]).map(|message| (node_id, message));
230 | run_group(self.nodes.iter_mut(), messages, self.tick, None, &mut self.config, &mut self.dropped_messages);
231 | self
232 | }
233 |
234 | pub fn inspect(&mut self, fun: impl FnOnce(&Self)) -> &mut Self {
235 | fun(self);
236 | self
237 | }
238 |
239 | pub fn modify(&mut self, fun: impl FnOnce(&mut Self)) -> &mut Self {
240 | fun(self);
241 | self
242 | }
243 |
244 | pub fn take_committed(&mut self) -> impl Iterator
- + '_ {
245 | self.nodes.iter_mut().flat_map(|node| node.take_committed())
246 | }
247 |
248 | pub fn has_leader(&self) -> bool {
249 | self.nodes.iter().any(|node| node.is_leader())
250 | }
251 | }
252 |
253 | //
254 | // TestRaftGroupConfig impls
255 | //
256 |
257 | impl TestRaftGroupConfig {
258 | pub fn node_down(mut self, node_id: u64) -> Self {
259 | self.down.insert(NodeId(node_id));
260 | self
261 | }
262 |
263 | pub fn isolate(mut self, node_id: u64) -> Self {
264 | self.drops.insert((Some(NodeId(node_id)), None));
265 | self.drops.insert((None, Some(NodeId(node_id))));
266 | self
267 | }
268 |
269 | pub fn drop_between(mut self, from: u64, to: u64) -> Self {
270 | self.drops.insert((Some(NodeId(from)), Some(NodeId(to))));
271 | self.drops.insert((Some(NodeId(to)), Some(NodeId(from))));
272 | self
273 | }
274 |
275 | pub fn drop_to(mut self, node_id: u64) -> Self {
276 | self.drops.insert((None, Some(NodeId(node_id))));
277 | self
278 | }
279 |
280 | pub fn is_node_down(&self, node_id: NodeId) -> bool {
281 | self.down.contains(&node_id)
282 | }
283 |
284 | pub fn should_drop(&self, from: NodeId, to: NodeId) -> bool {
285 | self.drops.contains(&(Some(from), Some(to))) ||
286 | self.drops.contains(&(Some(from), None)) ||
287 | self.drops.contains(&(None, Some(to))) ||
288 | self.down.contains(&from) ||
289 | self.down.contains(&to)
290 | }
291 | }
292 |
293 | //
294 | // TestLogger impls
295 | //
296 |
297 | thread_local! {
298 | static LOGGER_CONTEXT: RefCell = RefCell::new(TestLoggerContext::new());
299 | }
300 |
301 | impl TestLogger {
302 | pub fn init() {
303 | let _ignore = log::set_logger(&Self);
304 | log::set_max_level(log::LevelFilter::Debug);
305 | }
306 | pub fn set_node_id(node_id: Option) {
307 | LOGGER_CONTEXT.with(|context| {
308 | context.borrow_mut().node_id = node_id;
309 | });
310 | }
311 | pub fn set_tick(tick: Option) {
312 | LOGGER_CONTEXT.with(|context| {
313 | context.borrow_mut().tick = tick;
314 | });
315 | }
316 | }
317 |
318 | impl log::Log for TestLogger {
319 | fn enabled(&self, _metadata: &log::Metadata) -> bool {
320 | true
321 | }
322 |
323 | fn log(&self, record: &log::Record) {
324 | LOGGER_CONTEXT.with(|context| {
325 | let context = context.borrow();
326 | if let Some(node_id) = context.node_id {
327 | if let Some(tick) = context.tick {
328 | eprintln!("tick {:03} {} {}", tick, node_id, record.args());
329 | } else {
330 | eprintln!("tick ??? {} {}", node_id, record.args());
331 | }
332 | } else {
333 | eprintln!("{}", record.args());
334 | }
335 | })
336 | }
337 |
338 | fn flush(&self) {}
339 | }
340 |
341 | //
342 | // TextLoggerContext impls
343 | //
344 |
345 | impl TestLoggerContext {
346 | const fn new() -> Self {
347 | Self {
348 | node_id: None,
349 | tick: None,
350 | }
351 | }
352 | }
353 |
--------------------------------------------------------------------------------
/tests/leader.rs:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2021 jessa0
3 | *
4 | * This program is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Affero General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * This program is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Affero General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU Affero General Public License
15 | * along with this program. If not, see .
16 | */
17 |
18 | mod common;
19 |
20 | use common::*;
21 | use simple_raft::message::{Rpc, TermId};
22 |
23 | #[test]
24 | pub fn append_request_update_leader() {
25 | let mut raft = raft(1, vec![2], None, &mut init_random());
26 | assert!(!raft.is_leader());
27 | let (_, &(mut term)) = raft.leader();
28 | term += 1;
29 |
30 | send(&mut raft, 2, term, Rpc::AppendRequest(Default::default()));
31 | assert_eq!(raft.leader(), (Some(&2.into()), &term));
32 | }
33 |
34 | #[test]
35 | pub fn no_update_leader() {
36 | for rpc in rpc_types().iter().cloned().filter(|rpc| !matches!(rpc, Rpc::AppendRequest(_))) {
37 | let mut raft = raft(1, vec![2, 3], None, &mut init_random());
38 | let mut term = TermId::default();
39 | assert_eq!(raft.leader(), (None, &term));
40 |
41 | term += 1;
42 | send(&mut raft, 2, term, rpc);
43 | assert_eq!(raft.leader(), (None, &term));
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/tests/term.rs:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2021 jessa0
3 | *
4 | * This program is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Affero General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * This program is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Affero General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU Affero General Public License
15 | * along with this program. If not, see .
16 | */
17 |
18 | mod common;
19 |
20 | use common::*;
21 | use simple_raft::message::{RaftMessage, Rpc, TermId, VoteResponse};
22 |
23 | #[test]
24 | pub fn leader_update_term() {
25 | for rpc in rpc_types().iter().cloned() {
26 | let mut raft = raft(1, vec![2, 3], None, &mut init_random());
27 | let mut term = TermId::default();
28 | assert_eq!(raft.leader().1, &term);
29 |
30 | term += 1;
31 | let RaftMessage { term: new_term, .. } = raft.timeout().unwrap().message;
32 | assert_eq!(new_term, term);
33 | assert_eq!(raft.leader().1, &term);
34 |
35 | send(&mut raft, 2, term, Rpc::VoteResponse(VoteResponse { vote_granted: true }));
36 | assert_eq!(raft.leader(), (Some(raft.node_id()), &term));
37 |
38 | term += 1;
39 | send(&mut raft, 2, term, rpc);
40 | assert_eq!(raft.leader().1, &term);
41 | }
42 | }
43 |
44 | #[test]
45 | pub fn candidate_update_term() {
46 | for rpc in rpc_types().iter().cloned() {
47 | let mut raft = raft(1, vec![2, 3], None, &mut init_random());
48 | let mut term = TermId::default();
49 | assert_eq!(raft.leader().1, &term);
50 |
51 | term += 1;
52 | let RaftMessage { term: new_term, .. } = raft.timeout().unwrap().message;
53 | assert_eq!(new_term, term);
54 | assert_eq!(raft.leader(), (None, &term));
55 |
56 | term += 1;
57 | send(&mut raft, 2, term, rpc);
58 | assert_eq!(raft.leader().1, &term);
59 | }
60 | }
61 |
62 | #[test]
63 | pub fn follower_update_term() {
64 | for rpc in rpc_types().iter().cloned() {
65 | let mut raft = raft(1, vec![2, 3], None, &mut init_random());
66 | let mut term = TermId::default();
67 | assert_eq!(raft.leader(), (None, &term));
68 |
69 | term += 1;
70 | send(&mut raft, 2, term, rpc);
71 | assert_eq!(raft.leader().1, &term);
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/tests/voting.rs:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2021 jessa0
3 | *
4 | * This program is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Affero General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * This program is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Affero General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU Affero General Public License
15 | * along with this program. If not, see .
16 | */
17 |
18 | mod common;
19 |
20 | use common::*;
21 | use simple_raft::message::{RaftMessage, Rpc, VoteResponse};
22 |
23 | #[test]
24 | pub fn empty_group_become_leader() {
25 | let mut raft = raft(1, vec![], None, &mut init_random());
26 | assert!(!raft.is_leader());
27 |
28 | raft.timeout();
29 | assert!(raft.is_leader());
30 | }
31 |
32 | #[test]
33 | pub fn _1_peer_become_leader() {
34 | let mut raft = raft(1, vec![2], None, &mut init_random());
35 | assert!(!raft.is_leader());
36 |
37 | let RaftMessage { term, .. } = raft.timeout().unwrap().message;
38 | assert!(!raft.is_leader());
39 |
40 | send(&mut raft, 2, term, Rpc::VoteResponse(VoteResponse { vote_granted: true }));
41 | assert!(raft.is_leader());
42 | }
43 |
44 | #[test]
45 | pub fn become_leader() {
46 | let mut raft = raft(1, vec![2, 3], None, &mut init_random());
47 | assert!(!raft.is_leader());
48 |
49 | let RaftMessage { term, .. } = raft.timeout().unwrap().message;
50 | assert!(!raft.is_leader());
51 |
52 | send(&mut raft, 2, term, Rpc::VoteResponse(VoteResponse { vote_granted: false }));
53 | assert!(!raft.is_leader());
54 |
55 | send(&mut raft, 3, term, Rpc::VoteResponse(VoteResponse { vote_granted: true }));
56 | assert!(raft.is_leader());
57 | }
58 |
59 | #[test]
60 | pub fn vote_old_term() {
61 | let mut raft = raft(1, vec![2, 3], None, &mut init_random());
62 | let RaftMessage { term, .. } = raft.timeout().unwrap().message;
63 | raft.timeout();
64 |
65 | send(&mut raft, 2, term, Rpc::VoteResponse(VoteResponse { vote_granted: true }));
66 | assert!(!raft.is_leader());
67 | }
68 |
69 | #[test]
70 | pub fn vote_twice() {
71 | let mut raft = raft(1, vec![2, 3, 4, 5], None, &mut init_random());
72 | let RaftMessage { term, .. } = raft.timeout().unwrap().message;
73 |
74 | send(&mut raft, 2, term, Rpc::VoteResponse(VoteResponse { vote_granted: true }));
75 | send(&mut raft, 2, term, Rpc::VoteResponse(VoteResponse { vote_granted: true }));
76 | assert!(!raft.is_leader());
77 |
78 | send(&mut raft, 3, term, Rpc::VoteResponse(VoteResponse { vote_granted: true }));
79 | assert!(raft.is_leader());
80 | }
81 |
82 | #[test]
83 | pub fn _1_timeout() {
84 | TestRaftGroup::new(1, &mut init_random(), config())
85 | .run_on_node(0, |raft| raft.timeout())
86 | .inspect(|group| assert!(group.has_leader()));
87 | }
88 |
89 | #[test]
90 | pub fn _2_nodes_timeout() {
91 | TestRaftGroup::new(2, &mut init_random(), config())
92 | .run_on_node(0, |raft| raft.timeout())
93 | .inspect(|group| assert!(group.has_leader()));
94 | }
95 |
96 | #[test]
97 | pub fn _2_nodes_failed_timeout() {
98 | TestRaftGroup::new(2, &mut init_random(), config().node_down(1))
99 | .run_on_node(0, |raft| raft.timeout())
100 | .inspect(|group| assert!(!group.has_leader()));
101 | }
102 |
103 | #[test]
104 | pub fn _3_nodes_timeout() {
105 | TestRaftGroup::new(3, &mut init_random(), config())
106 | .run_on_node(0, |raft| raft.timeout())
107 | .inspect(|group| assert!(group.has_leader()));
108 | }
109 |
110 | #[test]
111 | pub fn _3_nodes_degraded_timeout() {
112 | TestRaftGroup::new(3, &mut init_random(), config().isolate(1))
113 | .run_on_node(0, |raft| raft.timeout())
114 | .inspect(|group| assert!(group.has_leader()));
115 | }
116 |
117 | #[test]
118 | pub fn _3_nodes_split_timeout() {
119 | TestRaftGroup::new(3, &mut init_random(), config().drop_between(0, 1))
120 | .run_on_node(0, |raft| raft.timeout())
121 | .inspect(|group| assert!(group.has_leader()));
122 | }
123 |
124 | #[test]
125 | pub fn _3_nodes_failed_timeout() {
126 | TestRaftGroup::new(3, &mut init_random(), config().node_down(1).node_down(2))
127 | .run_on_node(0, |raft| raft.timeout())
128 | .inspect(|group| assert!(!group.has_leader()));
129 | }
130 |
131 | #[test]
132 | pub fn _4_nodes_degraded_timeout() {
133 | TestRaftGroup::new(4, &mut init_random(), config().isolate(1))
134 | .run_on_node(0, |raft| raft.timeout())
135 | .inspect(|group| assert!(group.has_leader()));
136 | }
137 |
138 | #[test]
139 | pub fn _4_nodes_failed_timeout() {
140 | TestRaftGroup::new(4, &mut init_random(), config().isolate(1).isolate(2))
141 | .run_on_node(0, |raft| raft.timeout())
142 | .inspect(|group| assert!(!group.has_leader()));
143 | }
144 |
145 | #[test]
146 | pub fn _5_nodes_degraded_timeout() {
147 | TestRaftGroup::new(5, &mut init_random(), config().isolate(1).isolate(2))
148 | .run_on_node(0, |raft| raft.timeout())
149 | .inspect(|group| assert!(group.has_leader()));
150 | }
151 |
152 | #[test]
153 | pub fn _5_nodes_failed_timeout() {
154 | TestRaftGroup::new(5, &mut init_random(), config().isolate(1).isolate(2).isolate(3))
155 | .run_on_node(0, |raft| raft.timeout())
156 | .inspect(|group| assert!(!group.has_leader()));
157 | }
158 |
159 | #[test]
160 | pub fn election_timeout() {
161 | TestRaftGroup::new(3, &mut init_random(), config())
162 | .run_until(|group| group.has_leader())
163 | .run_for_inspect(10 * CONFIG.election_timeout_ticks, |group| assert!(group.has_leader()));
164 | }
165 |
166 | #[test]
167 | pub fn degraded() {
168 | TestRaftGroup::new(3, &mut init_random(), config().isolate(0))
169 | .run_until(|group| group.has_leader())
170 | .run_for_inspect(10 * CONFIG.election_timeout_ticks, |group| assert!(group.has_leader()));
171 | }
172 |
173 | #[test]
174 | pub fn split_unstable() {
175 | TestRaftGroup::new(3, &mut init_random(), config().drop_between(1, 2))
176 | .run_on_node(1, |raft| raft.timeout())
177 | .inspect(|group| assert!(group.nodes[1].is_leader()))
178 | .run_until(|group| !group.has_leader())
179 | .run_until(|group| group.has_leader());
180 | }
181 |
182 | #[test]
183 | pub fn split_stable() {
184 | TestRaftGroup::new(3, &mut init_random(), config().drop_between(1, 2))
185 | .run_on_node(0, |raft| raft.timeout())
186 | .run_for_inspect(10 * CONFIG.election_timeout_ticks, |group| assert!(group.nodes[0].is_leader()));
187 | }
188 |
189 | #[test]
190 | pub fn split_rejoin() {
191 | TestRaftGroup::new(3, &mut init_random(), config().drop_between(1, 2))
192 | .run_on_node(1, |raft| raft.timeout())
193 | .inspect(|group| assert!(group.nodes[1].is_leader()))
194 | .run_until(|group| !group.has_leader())
195 | .modify(|group| group.config = config())
196 | .run_until(|group| group.has_leader())
197 | .run_for_inspect(10 * CONFIG.election_timeout_ticks, |group| assert!(group.has_leader()));
198 | }
199 |
--------------------------------------------------------------------------------