├── .github └── workflows │ ├── gh-pages.yml │ └── test.yml ├── .gitignore ├── Cargo.toml ├── LICENSE ├── README.md ├── examples ├── README.md ├── raftcat.rs ├── simple.rs └── threaded.rs ├── src ├── core.rs ├── lib.rs ├── log.rs ├── log │ ├── mem.rs │ └── tests.rs ├── macros.rs ├── message.rs ├── node.rs ├── prelude.rs └── raft.proto └── tests ├── commit.rs ├── common.rs ├── leader.rs ├── term.rs └── voting.rs /.github/workflows/gh-pages.yml: -------------------------------------------------------------------------------- 1 | name: gh-pages 2 | on: 3 | push: 4 | branches: 5 | - master 6 | 7 | defaults: 8 | run: 9 | shell: bash 10 | jobs: 11 | gh-pages: 12 | name: gh-pages 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: checkout 16 | uses: actions/checkout@v2 17 | 18 | - name: cargo doc 19 | run: cargo doc --verbose --lib --no-deps 20 | 21 | - name: force push docs to gh-pages branch 22 | run: | 23 | git config user.name "Github CI -- gh-pages" 24 | git config user.email "<>" 25 | 26 | mv target/doc/ docs/ 27 | git add docs 28 | git commit -m "render rustdocs" 29 | git push -f origin HEAD:gh-pages 30 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | on: [push] 3 | 4 | defaults: 5 | run: 6 | shell: bash 7 | jobs: 8 | build-and-test-release: 9 | name: build-and-test-release 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: checkout 13 | uses: actions/checkout@v2 14 | with: 15 | fetch-depth: 1 16 | 17 | - name: cargo build --release 18 | run: cargo build --release --verbose 19 | 20 | - name: cargo test --release 21 | run: cargo test --release --verbose 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "simple-raft" 3 | version = "0.2.0" 4 | edition = "2018" 5 | 6 | description = "A minimal implementation of the Raft consensus algorithm" 7 | license = "AGPL-3.0-or-later" 8 | repository = "https://github.com/simple-raft-rs/raft-rs" 9 | categories = ["database-implementations", "no-std"] 10 | keywords = ["raft", "no_std"] 11 | 12 | exclude = [".github/"] 13 | 14 | [features] 15 | default = ["prost"] 16 | 17 | [dependencies] 18 | bytes = { version = "1.0", default-features = false, features = [] } 19 | log = "0.4" 20 | prost = { version = "0.7", default-features = false, features = ["prost-derive"], optional = true } 21 | rand_core = { version = "0.6", default-features = false, features = [] } 22 | 23 | [dev-dependencies] 24 | derive_more = "0.99" 25 | env_logger = { version = "0.8", default_features = false, features = [] } 26 | itertools = "0.10" 27 | rand = "0.8" 28 | rand_chacha = "0.3" 29 | 30 | [[example]] 31 | name = "simple" 32 | test = true 33 | 34 | [[example]] 35 | name = "threaded" 36 | test = true 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU AFFERO GENERAL PUBLIC LICENSE 2 | Version 3, 19 November 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU Affero General Public License is a free, copyleft license for 11 | software and other kinds of works, specifically designed to ensure 12 | cooperation with the community in the case of network server software. 13 | 14 | The licenses for most software and other practical works are designed 15 | to take away your freedom to share and change the works. By contrast, 16 | our General Public Licenses are intended to guarantee your freedom to 17 | share and change all versions of a program--to make sure it remains free 18 | software for all its users. 19 | 20 | When we speak of free software, we are referring to freedom, not 21 | price. Our General Public Licenses are designed to make sure that you 22 | have the freedom to distribute copies of free software (and charge for 23 | them if you wish), that you receive source code or can get it if you 24 | want it, that you can change the software or use pieces of it in new 25 | free programs, and that you know you can do these things. 26 | 27 | Developers that use our General Public Licenses protect your rights 28 | with two steps: (1) assert copyright on the software, and (2) offer 29 | you this License which gives you legal permission to copy, distribute 30 | and/or modify the software. 31 | 32 | A secondary benefit of defending all users' freedom is that 33 | improvements made in alternate versions of the program, if they 34 | receive widespread use, become available for other developers to 35 | incorporate. Many developers of free software are heartened and 36 | encouraged by the resulting cooperation. However, in the case of 37 | software used on network servers, this result may fail to come about. 38 | The GNU General Public License permits making a modified version and 39 | letting the public access it on a server without ever releasing its 40 | source code to the public. 41 | 42 | The GNU Affero General Public License is designed specifically to 43 | ensure that, in such cases, the modified source code becomes available 44 | to the community. It requires the operator of a network server to 45 | provide the source code of the modified version running there to the 46 | users of that server. Therefore, public use of a modified version, on 47 | a publicly accessible server, gives the public access to the source 48 | code of the modified version. 49 | 50 | An older license, called the Affero General Public License and 51 | published by Affero, was designed to accomplish similar goals. This is 52 | a different license, not a version of the Affero GPL, but Affero has 53 | released a new version of the Affero GPL which permits relicensing under 54 | this license. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | TERMS AND CONDITIONS 60 | 61 | 0. Definitions. 62 | 63 | "This License" refers to version 3 of the GNU Affero General Public License. 64 | 65 | "Copyright" also means copyright-like laws that apply to other kinds of 66 | works, such as semiconductor masks. 67 | 68 | "The Program" refers to any copyrightable work licensed under this 69 | License. Each licensee is addressed as "you". "Licensees" and 70 | "recipients" may be individuals or organizations. 71 | 72 | To "modify" a work means to copy from or adapt all or part of the work 73 | in a fashion requiring copyright permission, other than the making of an 74 | exact copy. The resulting work is called a "modified version" of the 75 | earlier work or a work "based on" the earlier work. 76 | 77 | A "covered work" means either the unmodified Program or a work based 78 | on the Program. 79 | 80 | To "propagate" a work means to do anything with it that, without 81 | permission, would make you directly or secondarily liable for 82 | infringement under applicable copyright law, except executing it on a 83 | computer or modifying a private copy. Propagation includes copying, 84 | distribution (with or without modification), making available to the 85 | public, and in some countries other activities as well. 86 | 87 | To "convey" a work means any kind of propagation that enables other 88 | parties to make or receive copies. Mere interaction with a user through 89 | a computer network, with no transfer of a copy, is not conveying. 90 | 91 | An interactive user interface displays "Appropriate Legal Notices" 92 | to the extent that it includes a convenient and prominently visible 93 | feature that (1) displays an appropriate copyright notice, and (2) 94 | tells the user that there is no warranty for the work (except to the 95 | extent that warranties are provided), that licensees may convey the 96 | work under this License, and how to view a copy of this License. If 97 | the interface presents a list of user commands or options, such as a 98 | menu, a prominent item in the list meets this criterion. 99 | 100 | 1. Source Code. 101 | 102 | The "source code" for a work means the preferred form of the work 103 | for making modifications to it. "Object code" means any non-source 104 | form of a work. 105 | 106 | A "Standard Interface" means an interface that either is an official 107 | standard defined by a recognized standards body, or, in the case of 108 | interfaces specified for a particular programming language, one that 109 | is widely used among developers working in that language. 110 | 111 | The "System Libraries" of an executable work include anything, other 112 | than the work as a whole, that (a) is included in the normal form of 113 | packaging a Major Component, but which is not part of that Major 114 | Component, and (b) serves only to enable use of the work with that 115 | Major Component, or to implement a Standard Interface for which an 116 | implementation is available to the public in source code form. A 117 | "Major Component", in this context, means a major essential component 118 | (kernel, window system, and so on) of the specific operating system 119 | (if any) on which the executable work runs, or a compiler used to 120 | produce the work, or an object code interpreter used to run it. 121 | 122 | The "Corresponding Source" for a work in object code form means all 123 | the source code needed to generate, install, and (for an executable 124 | work) run the object code and to modify the work, including scripts to 125 | control those activities. However, it does not include the work's 126 | System Libraries, or general-purpose tools or generally available free 127 | programs which are used unmodified in performing those activities but 128 | which are not part of the work. For example, Corresponding Source 129 | includes interface definition files associated with source files for 130 | the work, and the source code for shared libraries and dynamically 131 | linked subprograms that the work is specifically designed to require, 132 | such as by intimate data communication or control flow between those 133 | subprograms and other parts of the work. 134 | 135 | The Corresponding Source need not include anything that users 136 | can regenerate automatically from other parts of the Corresponding 137 | Source. 138 | 139 | The Corresponding Source for a work in source code form is that 140 | same work. 141 | 142 | 2. Basic Permissions. 143 | 144 | All rights granted under this License are granted for the term of 145 | copyright on the Program, and are irrevocable provided the stated 146 | conditions are met. This License explicitly affirms your unlimited 147 | permission to run the unmodified Program. The output from running a 148 | covered work is covered by this License only if the output, given its 149 | content, constitutes a covered work. This License acknowledges your 150 | rights of fair use or other equivalent, as provided by copyright law. 151 | 152 | You may make, run and propagate covered works that you do not 153 | convey, without conditions so long as your license otherwise remains 154 | in force. You may convey covered works to others for the sole purpose 155 | of having them make modifications exclusively for you, or provide you 156 | with facilities for running those works, provided that you comply with 157 | the terms of this License in conveying all material for which you do 158 | not control copyright. Those thus making or running the covered works 159 | for you must do so exclusively on your behalf, under your direction 160 | and control, on terms that prohibit them from making any copies of 161 | your copyrighted material outside their relationship with you. 162 | 163 | Conveying under any other circumstances is permitted solely under 164 | the conditions stated below. Sublicensing is not allowed; section 10 165 | makes it unnecessary. 166 | 167 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 168 | 169 | No covered work shall be deemed part of an effective technological 170 | measure under any applicable law fulfilling obligations under article 171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 172 | similar laws prohibiting or restricting circumvention of such 173 | measures. 174 | 175 | When you convey a covered work, you waive any legal power to forbid 176 | circumvention of technological measures to the extent such circumvention 177 | is effected by exercising rights under this License with respect to 178 | the covered work, and you disclaim any intention to limit operation or 179 | modification of the work as a means of enforcing, against the work's 180 | users, your or third parties' legal rights to forbid circumvention of 181 | technological measures. 182 | 183 | 4. Conveying Verbatim Copies. 184 | 185 | You may convey verbatim copies of the Program's source code as you 186 | receive it, in any medium, provided that you conspicuously and 187 | appropriately publish on each copy an appropriate copyright notice; 188 | keep intact all notices stating that this License and any 189 | non-permissive terms added in accord with section 7 apply to the code; 190 | keep intact all notices of the absence of any warranty; and give all 191 | recipients a copy of this License along with the Program. 192 | 193 | You may charge any price or no price for each copy that you convey, 194 | and you may offer support or warranty protection for a fee. 195 | 196 | 5. Conveying Modified Source Versions. 197 | 198 | You may convey a work based on the Program, or the modifications to 199 | produce it from the Program, in the form of source code under the 200 | terms of section 4, provided that you also meet all of these conditions: 201 | 202 | a) The work must carry prominent notices stating that you modified 203 | it, and giving a relevant date. 204 | 205 | b) The work must carry prominent notices stating that it is 206 | released under this License and any conditions added under section 207 | 7. This requirement modifies the requirement in section 4 to 208 | "keep intact all notices". 209 | 210 | c) You must license the entire work, as a whole, under this 211 | License to anyone who comes into possession of a copy. This 212 | License will therefore apply, along with any applicable section 7 213 | additional terms, to the whole of the work, and all its parts, 214 | regardless of how they are packaged. This License gives no 215 | permission to license the work in any other way, but it does not 216 | invalidate such permission if you have separately received it. 217 | 218 | d) If the work has interactive user interfaces, each must display 219 | Appropriate Legal Notices; however, if the Program has interactive 220 | interfaces that do not display Appropriate Legal Notices, your 221 | work need not make them do so. 222 | 223 | A compilation of a covered work with other separate and independent 224 | works, which are not by their nature extensions of the covered work, 225 | and which are not combined with it such as to form a larger program, 226 | in or on a volume of a storage or distribution medium, is called an 227 | "aggregate" if the compilation and its resulting copyright are not 228 | used to limit the access or legal rights of the compilation's users 229 | beyond what the individual works permit. Inclusion of a covered work 230 | in an aggregate does not cause this License to apply to the other 231 | parts of the aggregate. 232 | 233 | 6. Conveying Non-Source Forms. 234 | 235 | You may convey a covered work in object code form under the terms 236 | of sections 4 and 5, provided that you also convey the 237 | machine-readable Corresponding Source under the terms of this License, 238 | in one of these ways: 239 | 240 | a) Convey the object code in, or embodied in, a physical product 241 | (including a physical distribution medium), accompanied by the 242 | Corresponding Source fixed on a durable physical medium 243 | customarily used for software interchange. 244 | 245 | b) Convey the object code in, or embodied in, a physical product 246 | (including a physical distribution medium), accompanied by a 247 | written offer, valid for at least three years and valid for as 248 | long as you offer spare parts or customer support for that product 249 | model, to give anyone who possesses the object code either (1) a 250 | copy of the Corresponding Source for all the software in the 251 | product that is covered by this License, on a durable physical 252 | medium customarily used for software interchange, for a price no 253 | more than your reasonable cost of physically performing this 254 | conveying of source, or (2) access to copy the 255 | Corresponding Source from a network server at no charge. 256 | 257 | c) Convey individual copies of the object code with a copy of the 258 | written offer to provide the Corresponding Source. This 259 | alternative is allowed only occasionally and noncommercially, and 260 | only if you received the object code with such an offer, in accord 261 | with subsection 6b. 262 | 263 | d) Convey the object code by offering access from a designated 264 | place (gratis or for a charge), and offer equivalent access to the 265 | Corresponding Source in the same way through the same place at no 266 | further charge. You need not require recipients to copy the 267 | Corresponding Source along with the object code. If the place to 268 | copy the object code is a network server, the Corresponding Source 269 | may be on a different server (operated by you or a third party) 270 | that supports equivalent copying facilities, provided you maintain 271 | clear directions next to the object code saying where to find the 272 | Corresponding Source. Regardless of what server hosts the 273 | Corresponding Source, you remain obligated to ensure that it is 274 | available for as long as needed to satisfy these requirements. 275 | 276 | e) Convey the object code using peer-to-peer transmission, provided 277 | you inform other peers where the object code and Corresponding 278 | Source of the work are being offered to the general public at no 279 | charge under subsection 6d. 280 | 281 | A separable portion of the object code, whose source code is excluded 282 | from the Corresponding Source as a System Library, need not be 283 | included in conveying the object code work. 284 | 285 | A "User Product" is either (1) a "consumer product", which means any 286 | tangible personal property which is normally used for personal, family, 287 | or household purposes, or (2) anything designed or sold for incorporation 288 | into a dwelling. In determining whether a product is a consumer product, 289 | doubtful cases shall be resolved in favor of coverage. For a particular 290 | product received by a particular user, "normally used" refers to a 291 | typical or common use of that class of product, regardless of the status 292 | of the particular user or of the way in which the particular user 293 | actually uses, or expects or is expected to use, the product. A product 294 | is a consumer product regardless of whether the product has substantial 295 | commercial, industrial or non-consumer uses, unless such uses represent 296 | the only significant mode of use of the product. 297 | 298 | "Installation Information" for a User Product means any methods, 299 | procedures, authorization keys, or other information required to install 300 | and execute modified versions of a covered work in that User Product from 301 | a modified version of its Corresponding Source. The information must 302 | suffice to ensure that the continued functioning of the modified object 303 | code is in no case prevented or interfered with solely because 304 | modification has been made. 305 | 306 | If you convey an object code work under this section in, or with, or 307 | specifically for use in, a User Product, and the conveying occurs as 308 | part of a transaction in which the right of possession and use of the 309 | User Product is transferred to the recipient in perpetuity or for a 310 | fixed term (regardless of how the transaction is characterized), the 311 | Corresponding Source conveyed under this section must be accompanied 312 | by the Installation Information. But this requirement does not apply 313 | if neither you nor any third party retains the ability to install 314 | modified object code on the User Product (for example, the work has 315 | been installed in ROM). 316 | 317 | The requirement to provide Installation Information does not include a 318 | requirement to continue to provide support service, warranty, or updates 319 | for a work that has been modified or installed by the recipient, or for 320 | the User Product in which it has been modified or installed. Access to a 321 | network may be denied when the modification itself materially and 322 | adversely affects the operation of the network or violates the rules and 323 | protocols for communication across the network. 324 | 325 | Corresponding Source conveyed, and Installation Information provided, 326 | in accord with this section must be in a format that is publicly 327 | documented (and with an implementation available to the public in 328 | source code form), and must require no special password or key for 329 | unpacking, reading or copying. 330 | 331 | 7. Additional Terms. 332 | 333 | "Additional permissions" are terms that supplement the terms of this 334 | License by making exceptions from one or more of its conditions. 335 | Additional permissions that are applicable to the entire Program shall 336 | be treated as though they were included in this License, to the extent 337 | that they are valid under applicable law. If additional permissions 338 | apply only to part of the Program, that part may be used separately 339 | under those permissions, but the entire Program remains governed by 340 | this License without regard to the additional permissions. 341 | 342 | When you convey a copy of a covered work, you may at your option 343 | remove any additional permissions from that copy, or from any part of 344 | it. (Additional permissions may be written to require their own 345 | removal in certain cases when you modify the work.) You may place 346 | additional permissions on material, added by you to a covered work, 347 | for which you have or can give appropriate copyright permission. 348 | 349 | Notwithstanding any other provision of this License, for material you 350 | add to a covered work, you may (if authorized by the copyright holders of 351 | that material) supplement the terms of this License with terms: 352 | 353 | a) Disclaiming warranty or limiting liability differently from the 354 | terms of sections 15 and 16 of this License; or 355 | 356 | b) Requiring preservation of specified reasonable legal notices or 357 | author attributions in that material or in the Appropriate Legal 358 | Notices displayed by works containing it; or 359 | 360 | c) Prohibiting misrepresentation of the origin of that material, or 361 | requiring that modified versions of such material be marked in 362 | reasonable ways as different from the original version; or 363 | 364 | d) Limiting the use for publicity purposes of names of licensors or 365 | authors of the material; or 366 | 367 | e) Declining to grant rights under trademark law for use of some 368 | trade names, trademarks, or service marks; or 369 | 370 | f) Requiring indemnification of licensors and authors of that 371 | material by anyone who conveys the material (or modified versions of 372 | it) with contractual assumptions of liability to the recipient, for 373 | any liability that these contractual assumptions directly impose on 374 | those licensors and authors. 375 | 376 | All other non-permissive additional terms are considered "further 377 | restrictions" within the meaning of section 10. If the Program as you 378 | received it, or any part of it, contains a notice stating that it is 379 | governed by this License along with a term that is a further 380 | restriction, you may remove that term. If a license document contains 381 | a further restriction but permits relicensing or conveying under this 382 | License, you may add to a covered work material governed by the terms 383 | of that license document, provided that the further restriction does 384 | not survive such relicensing or conveying. 385 | 386 | If you add terms to a covered work in accord with this section, you 387 | must place, in the relevant source files, a statement of the 388 | additional terms that apply to those files, or a notice indicating 389 | where to find the applicable terms. 390 | 391 | Additional terms, permissive or non-permissive, may be stated in the 392 | form of a separately written license, or stated as exceptions; 393 | the above requirements apply either way. 394 | 395 | 8. Termination. 396 | 397 | You may not propagate or modify a covered work except as expressly 398 | provided under this License. Any attempt otherwise to propagate or 399 | modify it is void, and will automatically terminate your rights under 400 | this License (including any patent licenses granted under the third 401 | paragraph of section 11). 402 | 403 | However, if you cease all violation of this License, then your 404 | license from a particular copyright holder is reinstated (a) 405 | provisionally, unless and until the copyright holder explicitly and 406 | finally terminates your license, and (b) permanently, if the copyright 407 | holder fails to notify you of the violation by some reasonable means 408 | prior to 60 days after the cessation. 409 | 410 | Moreover, your license from a particular copyright holder is 411 | reinstated permanently if the copyright holder notifies you of the 412 | violation by some reasonable means, this is the first time you have 413 | received notice of violation of this License (for any work) from that 414 | copyright holder, and you cure the violation prior to 30 days after 415 | your receipt of the notice. 416 | 417 | Termination of your rights under this section does not terminate the 418 | licenses of parties who have received copies or rights from you under 419 | this License. If your rights have been terminated and not permanently 420 | reinstated, you do not qualify to receive new licenses for the same 421 | material under section 10. 422 | 423 | 9. Acceptance Not Required for Having Copies. 424 | 425 | You are not required to accept this License in order to receive or 426 | run a copy of the Program. Ancillary propagation of a covered work 427 | occurring solely as a consequence of using peer-to-peer transmission 428 | to receive a copy likewise does not require acceptance. However, 429 | nothing other than this License grants you permission to propagate or 430 | modify any covered work. These actions infringe copyright if you do 431 | not accept this License. Therefore, by modifying or propagating a 432 | covered work, you indicate your acceptance of this License to do so. 433 | 434 | 10. Automatic Licensing of Downstream Recipients. 435 | 436 | Each time you convey a covered work, the recipient automatically 437 | receives a license from the original licensors, to run, modify and 438 | propagate that work, subject to this License. You are not responsible 439 | for enforcing compliance by third parties with this License. 440 | 441 | An "entity transaction" is a transaction transferring control of an 442 | organization, or substantially all assets of one, or subdividing an 443 | organization, or merging organizations. If propagation of a covered 444 | work results from an entity transaction, each party to that 445 | transaction who receives a copy of the work also receives whatever 446 | licenses to the work the party's predecessor in interest had or could 447 | give under the previous paragraph, plus a right to possession of the 448 | Corresponding Source of the work from the predecessor in interest, if 449 | the predecessor has it or can get it with reasonable efforts. 450 | 451 | You may not impose any further restrictions on the exercise of the 452 | rights granted or affirmed under this License. For example, you may 453 | not impose a license fee, royalty, or other charge for exercise of 454 | rights granted under this License, and you may not initiate litigation 455 | (including a cross-claim or counterclaim in a lawsuit) alleging that 456 | any patent claim is infringed by making, using, selling, offering for 457 | sale, or importing the Program or any portion of it. 458 | 459 | 11. Patents. 460 | 461 | A "contributor" is a copyright holder who authorizes use under this 462 | License of the Program or a work on which the Program is based. The 463 | work thus licensed is called the contributor's "contributor version". 464 | 465 | A contributor's "essential patent claims" are all patent claims 466 | owned or controlled by the contributor, whether already acquired or 467 | hereafter acquired, that would be infringed by some manner, permitted 468 | by this License, of making, using, or selling its contributor version, 469 | but do not include claims that would be infringed only as a 470 | consequence of further modification of the contributor version. For 471 | purposes of this definition, "control" includes the right to grant 472 | patent sublicenses in a manner consistent with the requirements of 473 | this License. 474 | 475 | Each contributor grants you a non-exclusive, worldwide, royalty-free 476 | patent license under the contributor's essential patent claims, to 477 | make, use, sell, offer for sale, import and otherwise run, modify and 478 | propagate the contents of its contributor version. 479 | 480 | In the following three paragraphs, a "patent license" is any express 481 | agreement or commitment, however denominated, not to enforce a patent 482 | (such as an express permission to practice a patent or covenant not to 483 | sue for patent infringement). To "grant" such a patent license to a 484 | party means to make such an agreement or commitment not to enforce a 485 | patent against the party. 486 | 487 | If you convey a covered work, knowingly relying on a patent license, 488 | and the Corresponding Source of the work is not available for anyone 489 | to copy, free of charge and under the terms of this License, through a 490 | publicly available network server or other readily accessible means, 491 | then you must either (1) cause the Corresponding Source to be so 492 | available, or (2) arrange to deprive yourself of the benefit of the 493 | patent license for this particular work, or (3) arrange, in a manner 494 | consistent with the requirements of this License, to extend the patent 495 | license to downstream recipients. "Knowingly relying" means you have 496 | actual knowledge that, but for the patent license, your conveying the 497 | covered work in a country, or your recipient's use of the covered work 498 | in a country, would infringe one or more identifiable patents in that 499 | country that you have reason to believe are valid. 500 | 501 | If, pursuant to or in connection with a single transaction or 502 | arrangement, you convey, or propagate by procuring conveyance of, a 503 | covered work, and grant a patent license to some of the parties 504 | receiving the covered work authorizing them to use, propagate, modify 505 | or convey a specific copy of the covered work, then the patent license 506 | you grant is automatically extended to all recipients of the covered 507 | work and works based on it. 508 | 509 | A patent license is "discriminatory" if it does not include within 510 | the scope of its coverage, prohibits the exercise of, or is 511 | conditioned on the non-exercise of one or more of the rights that are 512 | specifically granted under this License. You may not convey a covered 513 | work if you are a party to an arrangement with a third party that is 514 | in the business of distributing software, under which you make payment 515 | to the third party based on the extent of your activity of conveying 516 | the work, and under which the third party grants, to any of the 517 | parties who would receive the covered work from you, a discriminatory 518 | patent license (a) in connection with copies of the covered work 519 | conveyed by you (or copies made from those copies), or (b) primarily 520 | for and in connection with specific products or compilations that 521 | contain the covered work, unless you entered into that arrangement, 522 | or that patent license was granted, prior to 28 March 2007. 523 | 524 | Nothing in this License shall be construed as excluding or limiting 525 | any implied license or other defenses to infringement that may 526 | otherwise be available to you under applicable patent law. 527 | 528 | 12. No Surrender of Others' Freedom. 529 | 530 | If conditions are imposed on you (whether by court order, agreement or 531 | otherwise) that contradict the conditions of this License, they do not 532 | excuse you from the conditions of this License. If you cannot convey a 533 | covered work so as to satisfy simultaneously your obligations under this 534 | License and any other pertinent obligations, then as a consequence you may 535 | not convey it at all. For example, if you agree to terms that obligate you 536 | to collect a royalty for further conveying from those to whom you convey 537 | the Program, the only way you could satisfy both those terms and this 538 | License would be to refrain entirely from conveying the Program. 539 | 540 | 13. Remote Network Interaction; Use with the GNU General Public License. 541 | 542 | Notwithstanding any other provision of this License, if you modify the 543 | Program, your modified version must prominently offer all users 544 | interacting with it remotely through a computer network (if your version 545 | supports such interaction) an opportunity to receive the Corresponding 546 | Source of your version by providing access to the Corresponding Source 547 | from a network server at no charge, through some standard or customary 548 | means of facilitating copying of software. This Corresponding Source 549 | shall include the Corresponding Source for any work covered by version 3 550 | of the GNU General Public License that is incorporated pursuant to the 551 | following paragraph. 552 | 553 | Notwithstanding any other provision of this License, you have 554 | permission to link or combine any covered work with a work licensed 555 | under version 3 of the GNU General Public License into a single 556 | combined work, and to convey the resulting work. The terms of this 557 | License will continue to apply to the part which is the covered work, 558 | but the work with which it is combined will remain governed by version 559 | 3 of the GNU General Public License. 560 | 561 | 14. Revised Versions of this License. 562 | 563 | The Free Software Foundation may publish revised and/or new versions of 564 | the GNU Affero General Public License from time to time. Such new versions 565 | will be similar in spirit to the present version, but may differ in detail to 566 | address new problems or concerns. 567 | 568 | Each version is given a distinguishing version number. If the 569 | Program specifies that a certain numbered version of the GNU Affero General 570 | Public License "or any later version" applies to it, you have the 571 | option of following the terms and conditions either of that numbered 572 | version or of any later version published by the Free Software 573 | Foundation. If the Program does not specify a version number of the 574 | GNU Affero General Public License, you may choose any version ever published 575 | by the Free Software Foundation. 576 | 577 | If the Program specifies that a proxy can decide which future 578 | versions of the GNU Affero General Public License can be used, that proxy's 579 | public statement of acceptance of a version permanently authorizes you 580 | to choose that version for the Program. 581 | 582 | Later license versions may give you additional or different 583 | permissions. However, no additional obligations are imposed on any 584 | author or copyright holder as a result of your choosing to follow a 585 | later version. 586 | 587 | 15. Disclaimer of Warranty. 588 | 589 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 590 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 594 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 595 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 597 | 598 | 16. Limitation of Liability. 599 | 600 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 608 | SUCH DAMAGES. 609 | 610 | 17. Interpretation of Sections 15 and 16. 611 | 612 | If the disclaimer of warranty and limitation of liability provided 613 | above cannot be given local legal effect according to their terms, 614 | reviewing courts shall apply local law that most closely approximates 615 | an absolute waiver of all civil liability in connection with the 616 | Program, unless a warranty or assumption of liability accompanies a 617 | copy of the Program in return for a fee. 618 | 619 | END OF TERMS AND CONDITIONS 620 | 621 | How to Apply These Terms to Your New Programs 622 | 623 | If you develop a new program, and you want it to be of the greatest 624 | possible use to the public, the best way to achieve this is to make it 625 | free software which everyone can redistribute and change under these terms. 626 | 627 | To do so, attach the following notices to the program. It is safest 628 | to attach them to the start of each source file to most effectively 629 | state the exclusion of warranty; and each file should have at least 630 | the "copyright" line and a pointer to where the full notice is found. 631 | 632 | 633 | Copyright (C) 634 | 635 | This program is free software: you can redistribute it and/or modify 636 | it under the terms of the GNU Affero General Public License as published by 637 | the Free Software Foundation, either version 3 of the License, or 638 | (at your option) any later version. 639 | 640 | This program is distributed in the hope that it will be useful, 641 | but WITHOUT ANY WARRANTY; without even the implied warranty of 642 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 643 | GNU Affero General Public License for more details. 644 | 645 | You should have received a copy of the GNU Affero General Public License 646 | along with this program. If not, see . 647 | 648 | Also add information on how to contact you by electronic and paper mail. 649 | 650 | If your software can interact with users remotely through a computer 651 | network, you should also make sure that it provides a way for users to 652 | get its source. For example, if your program is a web application, its 653 | interface could display a "Source" link that leads users to an archive 654 | of the code. There are many ways you could offer source, and different 655 | solutions will be better for different programs; see section 13 for the 656 | specific requirements. 657 | 658 | You should also get your employer (if you work as a programmer) or school, 659 | if any, to sign a "copyright disclaimer" for the program, if necessary. 660 | For more information on this, and how to apply and follow the GNU AGPL, see 661 | . 662 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Raft 2 | 3 | A Rust implementation of the [Raft consensus algorithm](https://raft.github.io/), focused on: 4 | 5 | - Correctness and readability. The core implementation is [written](src/core.rs) alongside the [original Raft TLA+ 6 | specification](https://github.com/ongardie/raft.tla) to aid auditability. 7 | - Simplicity. Some optional features described in Diego Ongaro's [Raft 8 | thesis](http://web.stanford.edu/~ouster/cgi-bin/papers/OngaroPhD.pdf) such as pre-voting, membership changes, and 9 | snapshots are currently not implemented. 10 | - Usability. A primary goal of the API to be simple and not error-prone. 11 | 12 | Important caveats: 13 | 14 | - Unicast message delivery is assumed to be non-lossy in order for replication to make progress. In other words, once a 15 | non-broadcast message is returned from an API, it must be retained and retransmitted until it is acknowledged as 16 | delivered by its destination. Messages may be safely delivered out-of-order or more than once, however. To prevent 17 | unbounded queueing, the API is designed to only ever return a bounded amount of unacknowledged unicast message data. 18 | 19 | This crate is `no_std`, but depends on the `alloc` crate. 20 | 21 | [API Documentation](https://simple-raft-rs.github.io/raft-rs/simple_raft) 22 | [Examples](examples) 23 | 24 | ## Crate Features 25 | 26 | This crate has the following optional features: 27 | 28 | - `prost` enables optional protobuf serialization of Raft messages. A corresponding [protobuf file](src/raft.proto) is 29 | also provided. 30 | 31 | ## License 32 | 33 | Copyright (C) 2019 Open Whisper Systems 34 | Copyright (C) 2021 jessa0 35 | 36 | This program is free software: you can redistribute it and/or modify 37 | it under the terms of the GNU Affero General Public License as published by 38 | the Free Software Foundation, either version 3 of the License, or 39 | (at your option) any later version. 40 | 41 | This program is distributed in the hope that it will be useful, 42 | but WITHOUT ANY WARRANTY; without even the implied warranty of 43 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 44 | GNU Affero General Public License for more details. 45 | 46 | You should have received a copy of the GNU Affero General Public License 47 | along with this program. If not, see . 48 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | * [Simple](simple.rs) -- A simple example from the crate-level documentation. 4 | * [Threaded](threaded.rs) -- A simple example with a thread per RaftNode. 5 | * [`raftcat`](raftcat.rs) -- A complex networked example as a command-line tool. 6 | 7 | ## `raftcat` 8 | 9 | `raftcat` is a command-line tool to run a networked Raft group over TCP. Lines from stdin are appended to the Raft log 10 | as log entries. Committed log entries are written to stdout. This is a toy example, so no retry is attempted on log 11 | appends, which in a database would normally be handled by the database client. This examples also does not persist 12 | state, so restarting a node may result in data loss or inconsistency. 13 | -------------------------------------------------------------------------------- /examples/raftcat.rs: -------------------------------------------------------------------------------- 1 | //! A complex networked example as a command-line tool. 2 | 3 | use bytes::{BufMut, Bytes}; 4 | use prost::Message; 5 | use rand_core::OsRng; 6 | use simple_raft::log::mem::RaftLogMemory; 7 | use simple_raft::node::{AppendError, RaftConfig, RaftNode}; 8 | use simple_raft::message::{RaftMessage, RaftMessageDestination, SendableRaftMessage}; 9 | use std::collections::{BTreeMap, BTreeSet}; 10 | use std::error::Error; 11 | use std::io::{BufRead, BufReader, Read, Write}; 12 | use std::net::{TcpListener, TcpStream}; 13 | use std::sync::mpsc; 14 | use std::time::{Duration, Instant}; 15 | 16 | const TICK_DURATION: Duration = Duration::from_millis(50); 17 | const RAFT_LOG_CAPACITY: usize = 100 * 1024 * 1024; 18 | const RAFT_CONFIG: RaftConfig = RaftConfig { 19 | election_timeout_ticks: 10, 20 | heartbeat_interval_ticks: 5, 21 | replication_chunk_size: 65536, 22 | }; 23 | 24 | type NodeId = String; 25 | 26 | #[derive(Clone)] 27 | enum IncomingMessage { 28 | Append(Bytes), 29 | Message(NetworkMessage), 30 | } 31 | 32 | #[derive(Clone, Message)] 33 | pub struct NetworkMessage { 34 | #[prost(bytes, required)] 35 | pub from: Bytes, 36 | #[prost(message, required)] 37 | pub message: RaftMessage, 38 | } 39 | 40 | struct Network { 41 | peers_tx: BTreeMap>, 42 | } 43 | 44 | struct Args { 45 | bind_addr: Option, 46 | node_id: NodeId, 47 | peers: BTreeSet, 48 | } 49 | 50 | fn main() { 51 | env_logger::builder().filter_level(log::LevelFilter::Info).parse_default_env().init(); 52 | 53 | let Args { bind_addr, node_id, peers } = parse_args(); 54 | 55 | let (main_tx, main_rx) = mpsc::channel::(); 56 | if let Some(bind_addr) = bind_addr { 57 | start_peer_listener(main_tx.clone(), bind_addr); 58 | } 59 | let network = start_peer_senders(node_id.clone(), peers.clone()); 60 | 61 | // Send lines from stdin to the main thread 62 | std::thread::spawn(move || { 63 | let stdin = std::io::stdin(); 64 | let mut stdin_lock = stdin.lock(); 65 | let mut line = String::new(); 66 | while stdin_lock.read_line(&mut line).expect("error reading from stdin") != 0 { 67 | let _ignore = main_tx.send(IncomingMessage::Append(line.clone().into())); 68 | line.clear(); 69 | } 70 | }); 71 | 72 | let mut raft = RaftNode::new( 73 | node_id.clone(), 74 | peers.clone(), 75 | RaftLogMemory::with_capacity(10240, RAFT_LOG_CAPACITY), 76 | OsRng::default(), 77 | RAFT_CONFIG, 78 | ); 79 | 80 | let stdout = std::io::stdout(); 81 | let mut stdout_lock = stdout.lock(); 82 | 83 | let mut next_tick = Instant::now() + TICK_DURATION; 84 | loop { 85 | match main_rx.recv_timeout(next_tick.saturating_duration_since(Instant::now())) { 86 | Ok(IncomingMessage::Append(data)) => { 87 | // Append log entries from stdin 88 | match raft.append(data) { 89 | Ok(new_messages) => 90 | new_messages.for_each(|message| network.send(message)), 91 | Err(AppendError::Cancelled { data }) => 92 | log::info!("append cancelled: {}", String::from_utf8_lossy(&data)), 93 | Err(AppendError::RaftLogErr(err)) => 94 | log::error!("raft log error: {:?}", err), 95 | } 96 | } 97 | Ok(IncomingMessage::Message(NetworkMessage { from, message })) => { 98 | // Process incoming message 99 | let new_messages = raft.receive(message, String::from_utf8_lossy(&from).to_string()); 100 | new_messages.for_each(|message| network.send(message)); 101 | } 102 | Err(mpsc::RecvTimeoutError::Timeout) => { 103 | // Tick the timer 104 | let new_messages = raft.timer_tick(); 105 | new_messages.for_each(|message| network.send(message)); 106 | next_tick = Instant::now() + TICK_DURATION; 107 | } 108 | Err(mpsc::RecvTimeoutError::Disconnected) => 109 | panic!("child threads died"), 110 | } 111 | 112 | // Check for committed log entries 113 | for log_entry in raft.take_committed() { 114 | if !log_entry.data.is_empty() { 115 | stdout_lock.write(&log_entry.data).expect("error writing to stdout"); 116 | } 117 | } 118 | } 119 | } 120 | 121 | fn parse_args() -> Args { 122 | let mut args = std::env::args(); 123 | let executable_name = args.next().unwrap_or_default(); 124 | 125 | let (bind_addr, node_id) = match (args.next(), args.next()) { 126 | (Some(first_arg), _) if first_arg.starts_with('-') => usage(&executable_name), 127 | (Some(_), None) => usage(&executable_name), 128 | (Some(bind_addr), Some(node_id)) => (Some(bind_addr), node_id), 129 | (None, _) => (None, String::new()), 130 | }; 131 | 132 | let peers = args.collect::>(); 133 | 134 | Args { bind_addr, node_id, peers } 135 | } 136 | 137 | fn usage(executable_name: &str) -> ! { 138 | eprint!(concat!( 139 | "Usage: {} [-h] [[bind_addr:]port node_host:port] [peer_host:port ...]\n", 140 | "\n", 141 | "[bind_addr:]port - the host:port to listen on\n", 142 | "node_host:port - the public host:port of this node\n", 143 | "peer_host:port - the public host:port of any peers\n", 144 | ), executable_name); 145 | std::process::exit(1) 146 | } 147 | 148 | fn start_peer_listener(main_tx: mpsc::Sender, bind_addr: String) { 149 | let bind_addr = if bind_addr.contains(':') { bind_addr } else { format!("0.0.0.0:{}", bind_addr) }; 150 | let listener = TcpListener::bind(&bind_addr).unwrap_or_else(|error| panic!("error listening on {}: {}", bind_addr, error)); 151 | std::thread::spawn(move || { 152 | for stream in listener.incoming() { 153 | start_peer_receiver(BufReader::new(stream.expect("error accepting connecting")), main_tx.clone()); 154 | } 155 | }); 156 | } 157 | 158 | fn start_peer_receiver(mut reader: BufReader, main_tx: mpsc::Sender) { 159 | std::thread::spawn(move || { 160 | let addr = reader.get_mut().peer_addr().unwrap(); 161 | log::info!("accepted connection from {}", addr); 162 | while let Ok(message) = read_peer_message(&mut reader).map_err(|error| log::info!("error receiving from {}: {}", addr, error)) { 163 | let _ignore = main_tx.send(IncomingMessage::Message(message)); 164 | } 165 | }); 166 | } 167 | 168 | fn read_peer_message(reader: &mut BufReader) -> Result> { 169 | let mut len_data = [0; 4]; 170 | reader.read_exact(&mut len_data)?; 171 | let mut message_data = vec![0; u32::from_be_bytes(len_data) as usize]; 172 | reader.read_exact(&mut message_data)?; 173 | let message = NetworkMessage::decode(&message_data[..]).map_err(|error| format!("invalid message from peer: {}", error))?; 174 | log::debug!("{} -> self: {}", String::from_utf8_lossy(&message.from), &message.message); 175 | Ok(message) 176 | } 177 | 178 | fn start_peer_senders(node_id: NodeId, peers: BTreeSet) -> Network { 179 | let (peers_tx, peers_rx): (BTreeMap<_,_>, Vec<_>) = peers.iter().map(|peer_id| { 180 | let (peer_tx, peer_rx) = mpsc::channel(); 181 | ((peer_id.clone(), peer_tx), (peer_id.clone(), peer_rx)) 182 | }).unzip(); 183 | 184 | for (peer_id, peer_rx) in peers_rx { 185 | start_peer_sender(node_id.clone().into(), peer_id, peer_rx); 186 | } 187 | 188 | Network { peers_tx } 189 | } 190 | 191 | fn start_peer_sender(from: Bytes, address: String, peer_rx: mpsc::Receiver) { 192 | std::thread::spawn(move || { 193 | let mut connection = None; 194 | let mut data = Vec::new(); 195 | loop { 196 | let message = match peer_rx.recv_timeout(TICK_DURATION * RAFT_CONFIG.election_timeout_ticks) { 197 | Ok(message) => Some(NetworkMessage { from: from.clone(), message }), 198 | Err(mpsc::RecvTimeoutError::Timeout) => None, 199 | Err(mpsc::RecvTimeoutError::Disconnected) => break, 200 | }; 201 | 202 | if connection.is_none() { 203 | match TcpStream::connect(&address) { 204 | Ok(established_connection) => { 205 | log::info!("connected to {}", &address); 206 | let _ignore = established_connection.set_nodelay(true); 207 | connection = Some(established_connection); 208 | } 209 | Err(error) => 210 | log::info!("error connecting to {}: {}", &address, error), 211 | } 212 | } 213 | if let (Some(established_connection), Some(message)) = (&mut connection, &message) { 214 | data.clear(); 215 | data.put_u32(message.encoded_len() as u32); 216 | message.encode(&mut data).unwrap(); 217 | if let Err(error) = established_connection.write_all(&data) { 218 | log::info!("error sending to {}: {}", &address, error); 219 | connection = None; 220 | } 221 | } 222 | } 223 | }); 224 | } 225 | 226 | impl Network { 227 | fn send(&self, sendable: SendableRaftMessage) { 228 | match sendable.dest { 229 | RaftMessageDestination::Broadcast => { 230 | log::debug!("self -> all: {}", sendable.message); 231 | self.peers_tx.values().for_each(|peer_tx| drop(peer_tx.send(sendable.message.clone()))); 232 | } 233 | RaftMessageDestination::To(dst_id) => { 234 | log::debug!("self -> {}: {}", dst_id, sendable.message); 235 | let _ = self.peers_tx[&dst_id].send(sendable.message); 236 | } 237 | } 238 | } 239 | } 240 | -------------------------------------------------------------------------------- /examples/simple.rs: -------------------------------------------------------------------------------- 1 | //! A simple example from the crate-level documentation 2 | 3 | use rand_chacha::ChaChaRng; 4 | use rand_core::SeedableRng; 5 | use simple_raft::log::mem::RaftLogMemory; 6 | use simple_raft::node::{RaftConfig, RaftNode}; 7 | use simple_raft::message::{RaftMessageDestination, SendableRaftMessage}; 8 | use std::collections::VecDeque; 9 | use std::str; 10 | 11 | fn main() { 12 | // Construct 5 Raft peers 13 | type NodeId = usize; 14 | let mut peers = (0..5).map(|id: NodeId| RaftNode::new( 15 | id, 16 | (0..5).collect(), 17 | RaftLogMemory::new_unbounded(), 18 | ChaChaRng::seed_from_u64(id as u64), 19 | RaftConfig { 20 | election_timeout_ticks: 10, 21 | heartbeat_interval_ticks: 1, 22 | replication_chunk_size: usize::max_value(), 23 | }, 24 | )).collect::>(); 25 | 26 | // Simulate reliably sending messages instantaneously between peers 27 | let mut inboxes = vec![VecDeque::new(); peers.len()]; 28 | let send_message = |src_id: NodeId, sendable: SendableRaftMessage, inboxes: &mut Vec>| { 29 | match sendable.dest { 30 | RaftMessageDestination::Broadcast => { 31 | println!("peer {} -> all: {}", src_id, &sendable.message); 32 | inboxes.iter_mut().for_each(|inbox| inbox.push_back((src_id, sendable.message.clone()))); 33 | } 34 | RaftMessageDestination::To(dst_id) => { 35 | println!("peer {} -> peer {}: {}", src_id, dst_id, &sendable.message); 36 | inboxes[dst_id].push_back((src_id, sendable.message)); 37 | } 38 | } 39 | }; 40 | 41 | // Loop until a log entry is committed on all peers 42 | let mut appended = false; 43 | let mut peers_committed = vec![false; peers.len()]; 44 | while !peers_committed.iter().all(|seen| *seen) { 45 | for (peer_id, peer) in peers.iter_mut().enumerate() { 46 | // Tick the timer 47 | let new_messages = peer.timer_tick(); 48 | new_messages.for_each(|message| send_message(peer_id, message, &mut inboxes)); 49 | 50 | // Append a log entry on the leader 51 | if !appended && peer.is_leader() { 52 | if let Ok(new_messages) = peer.append("Hello world!") { 53 | println!("peer {} appending to the log", peer_id); 54 | new_messages.for_each(|message| send_message(peer_id, message, &mut inboxes)); 55 | appended = true; 56 | } 57 | } 58 | 59 | // Process message inbox 60 | while let Some((src_id, message)) = inboxes[peer_id].pop_front() { 61 | let new_messages = peer.receive(message, src_id); 62 | new_messages.for_each(|message| send_message(peer_id, message, &mut inboxes)); 63 | } 64 | 65 | // Check for committed log entries 66 | for log_entry in peer.take_committed() { 67 | if !log_entry.data.is_empty() { 68 | println!("peer {} saw commit {}", peer_id, str::from_utf8(&log_entry.data).unwrap()); 69 | assert!(!peers_committed[peer_id]); 70 | peers_committed[peer_id] = true; 71 | } 72 | } 73 | } 74 | } 75 | } 76 | 77 | #[cfg(test)] 78 | mod test { 79 | #[test] 80 | fn main() { 81 | super::main(); 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /examples/threaded.rs: -------------------------------------------------------------------------------- 1 | //! A simple example with a thread per RaftNode 2 | 3 | use rand_chacha::ChaChaRng; 4 | use rand_core::SeedableRng; 5 | use simple_raft::log::mem::RaftLogMemory; 6 | use simple_raft::node::{RaftConfig, RaftNode}; 7 | use simple_raft::message::{RaftMessage, RaftMessageDestination, SendableRaftMessage}; 8 | use std::str; 9 | use std::sync::{Arc, Mutex}; 10 | use std::sync::mpsc; 11 | use std::thread; 12 | use std::time::{Duration, Instant}; 13 | 14 | type NodeId = usize; 15 | 16 | const TICK_DURATION: Duration = Duration::from_millis(100); 17 | const RAFT_CONFIG: RaftConfig = RaftConfig { 18 | election_timeout_ticks: 10, 19 | heartbeat_interval_ticks: 1, 20 | replication_chunk_size: usize::max_value(), 21 | }; 22 | 23 | #[derive(Clone)] 24 | struct IncomingMessage { 25 | from: NodeId, 26 | message: RaftMessage, 27 | } 28 | 29 | #[derive(Clone)] 30 | struct Network { 31 | peers_tx: Vec>, 32 | } 33 | 34 | fn main() { 35 | // Construct 5 Raft peers 36 | let (peers_tx, peers_rx): (Vec<_>, Vec<_>) = (0..5).map(|_| mpsc::channel()).unzip(); 37 | let network = Network { peers_tx }; 38 | let peers = peers_rx.into_iter().enumerate().map(|(peer_id, rx): (NodeId, _)| ( 39 | RaftNode::new( 40 | peer_id, 41 | (0..5).collect(), 42 | RaftLogMemory::new_unbounded(), 43 | ChaChaRng::seed_from_u64(peer_id as u64), 44 | RAFT_CONFIG, 45 | ), 46 | rx, 47 | )); 48 | 49 | let appended = Arc::new(Mutex::new(false)); 50 | let mut peers_committed = vec![false; peers.len()]; 51 | let (peer_committed_tx, peer_committed_rx) = mpsc::channel(); 52 | 53 | for (peer_id, (mut peer, rx)) in peers.enumerate() { 54 | let appended = Arc::clone(&appended); 55 | let network = network.clone(); 56 | let peer_committed_tx = peer_committed_tx.clone(); 57 | thread::spawn(move || { 58 | // Loop until a log entry is committed 59 | let mut next_tick = Instant::now() + TICK_DURATION; 60 | loop { 61 | match rx.recv_timeout(next_tick.saturating_duration_since(Instant::now())) { 62 | Ok(message) => { 63 | // Process incoming message 64 | let new_messages = peer.receive(message.message, message.from); 65 | new_messages.for_each(|message| network.send(peer_id, message)); 66 | } 67 | Err(mpsc::RecvTimeoutError::Timeout) => { 68 | // Tick the timer 69 | let new_messages = peer.timer_tick(); 70 | new_messages.for_each(|message| network.send(peer_id, message)); 71 | next_tick = Instant::now() + TICK_DURATION; 72 | } 73 | Err(mpsc::RecvTimeoutError::Disconnected) => 74 | panic!("peer {} disconnected", peer_id), 75 | } 76 | 77 | // Append a log entry on the leader 78 | let mut appended = appended.lock().unwrap(); 79 | if !*appended && peer.is_leader() { 80 | if let Ok(new_messages) = peer.append("Hello world!") { 81 | println!("peer {} appending to the log", peer_id); 82 | new_messages.for_each(|message| network.send(peer_id, message)); 83 | *appended = true; 84 | } 85 | } 86 | drop(appended); 87 | 88 | // Check for committed log entries 89 | for log_entry in peer.take_committed() { 90 | if !log_entry.data.is_empty() { 91 | println!("peer {} saw commit {}", peer_id, str::from_utf8(&log_entry.data).unwrap()); 92 | peer_committed_tx.send(peer_id).unwrap(); 93 | } 94 | } 95 | } 96 | }); 97 | } 98 | drop((network, peer_committed_tx)); 99 | 100 | // Loop until a log entry is committed on all peers 101 | while !peers_committed.iter().all(|seen| *seen) { 102 | let peer_id = peer_committed_rx.recv().unwrap(); 103 | assert!(!peers_committed[peer_id]); 104 | peers_committed[peer_id] = true; 105 | } 106 | } 107 | 108 | impl Network { 109 | fn send(&self, from: NodeId, sendable: SendableRaftMessage) { 110 | let message = IncomingMessage { from, message: sendable.message }; 111 | match sendable.dest { 112 | RaftMessageDestination::Broadcast => { 113 | println!("peer {} -> all: {}", from, message.message); 114 | self.peers_tx.iter().for_each(|peer_tx| drop(peer_tx.send(message.clone()))); 115 | } 116 | RaftMessageDestination::To(dst_id) => { 117 | println!("peer {} -> peer {}: {}", from, dst_id, message.message); 118 | let _ = self.peers_tx[dst_id].send(message); 119 | } 120 | } 121 | } 122 | } 123 | 124 | #[cfg(test)] 125 | mod test { 126 | #[test] 127 | fn main() { 128 | super::main(); 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /src/core.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 Open Whisper Systems 3 | * Copyright (C) 2021 jessa0 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | 19 | //! Unstable, low-level API for the complete state of a Raft node. 20 | 21 | use alloc::collections::{BTreeMap, BTreeSet}; 22 | use bytes::Bytes; 23 | use core::fmt; 24 | use core::iter; 25 | use crate::message::*; 26 | use crate::node::{AppendError, RaftConfig}; 27 | use crate::prelude::*; 28 | use crate::log::{CommittedIter, RaftLog, RaftLogState}; 29 | use log::{error, warn, info, debug}; 30 | use rand_core::RngCore; 31 | use self::LeadershipState::*; 32 | 33 | /// The state of Raft log replication from a Raft node to one of its peers. 34 | pub struct ReplicationState { 35 | // \* The next entry to send to each follower. 36 | // VARIABLE nextIndex 37 | /// The index of the next log entry to be sent to this peer. 38 | pub next_idx: LogIndex, 39 | 40 | // \* The latest entry that each follower has acknowledged is the same as the 41 | // \* leader's. This is used to calculate commitIndex on the leader. 42 | // VARIABLE matchIndex 43 | /// The index of the last log entry on this peer to up which the peer's log is known to match this node's log. 44 | pub match_idx: LogIndex, 45 | 46 | /// The index of the last log entry sent to this peer but which has not yet been acknowledged by the peer. 47 | pub inflight: Option, 48 | 49 | /// Whether this node is currently probing to discover the correct [`match_idx`][Self::match_idx] for this peer. 50 | pub send_probe: bool, 51 | 52 | /// Whether a heartbeat "ping" message is due to be sent to this peer. 53 | send_heartbeat: bool, 54 | } 55 | 56 | // \* Server states. 57 | // CONSTANTS Follower, Candidate, Leader 58 | enum LeadershipState { 59 | Follower(FollowerState), 60 | Candidate(CandidateState), 61 | Leader(LeaderState), 62 | } 63 | 64 | struct FollowerState { 65 | leader: Option, 66 | 67 | election_ticks: u32, 68 | random_election_ticks: u32, 69 | } 70 | 71 | struct CandidateState { 72 | // \* The latest entry that each follower has acknowledged is the same as the 73 | // \* leader's. This is used to calculate commitIndex on the leader. 74 | // VARIABLE votesGranted 75 | votes_granted: BTreeSet, 76 | 77 | election_ticks: u32, 78 | } 79 | 80 | struct LeaderState { 81 | followers: BTreeMap, 82 | 83 | heartbeat_ticks: u32, 84 | } 85 | 86 | /// The complete state of a Raft node. 87 | pub struct RaftState { 88 | node_id: NodeId, 89 | peers: BTreeSet, 90 | random: Random, 91 | config: RaftConfig, 92 | 93 | // \* The server's term number. 94 | // VARIABLE currentTerm 95 | current_term: TermId, 96 | 97 | // \* The candidate the server voted for in its current term, or 98 | // \* Nil if it hasn't voted for any. 99 | // VARIABLE votedFor 100 | voted_for: Option, 101 | 102 | // \* The server's state (Follower, Candidate, or Leader). 103 | // VARIABLE state 104 | leadership: LeadershipState, 105 | 106 | // \* A Sequence of log entries. The index into this sequence is the index of the 107 | // \* log entry. Unfortunately, the Sequence module defines Head(s) as the entry 108 | // \* with index 1, so be careful not to use that! 109 | // VARIABLE log 110 | // \* The index of the latest entry in the log the state machine may apply. 111 | // VARIABLE commitIndex 112 | log: RaftLogState, 113 | } 114 | 115 | #[allow(missing_docs)] 116 | impl RaftState 117 | where Log: RaftLog, 118 | Random: RngCore, 119 | NodeId: Ord + Clone + fmt::Display, 120 | { 121 | pub fn new(node_id: NodeId, 122 | mut peers: BTreeSet, 123 | log: Log, 124 | mut random: Random, 125 | config: RaftConfig) 126 | -> Self { 127 | peers.remove(&node_id); 128 | let random_election_ticks = random_election_timeout(&mut random, config.election_timeout_ticks); 129 | Self { 130 | node_id, 131 | peers, 132 | random, 133 | config, 134 | log: RaftLogState::new(log), 135 | current_term: Default::default(), 136 | voted_for: Default::default(), 137 | leadership: Follower(FollowerState { 138 | leader: None, 139 | election_ticks: random_election_ticks, 140 | random_election_ticks, 141 | }), 142 | } 143 | } 144 | 145 | pub fn commit_idx(&self) -> &LogIndex { 146 | &self.log.commit_idx 147 | } 148 | 149 | pub fn config(&self) -> &RaftConfig { 150 | &self.config 151 | } 152 | 153 | pub fn is_leader(&self) -> bool { 154 | if let Leader(_) = &self.leadership { 155 | true 156 | } else { 157 | false 158 | } 159 | } 160 | 161 | pub fn leader(&self) -> (Option<&NodeId>, &TermId) { 162 | let leader = match &self.leadership { 163 | Follower(follower_state) => follower_state.leader.as_ref(), 164 | Candidate(_) => None, 165 | Leader(_) => Some(&self.node_id), 166 | }; 167 | (leader, &self.current_term) 168 | } 169 | 170 | pub fn log(&self) -> &Log { 171 | self.log.log() 172 | } 173 | 174 | pub fn log_mut(&mut self) -> &mut Log { 175 | self.log.log_mut() 176 | } 177 | 178 | pub fn node_id(&self) -> &NodeId { 179 | &self.node_id 180 | } 181 | 182 | pub fn peers(&self) -> &BTreeSet { 183 | &self.peers 184 | } 185 | 186 | pub fn replication_state(&self, peer_node_id: &NodeId) -> Option<&ReplicationState> { 187 | if let LeadershipState::Leader(leader_state) = &self.leadership { 188 | leader_state.followers.get(peer_node_id) 189 | } else { 190 | None 191 | } 192 | } 193 | 194 | pub fn set_config(&mut self, config: RaftConfig) { 195 | self.config = config; 196 | 197 | match &mut self.leadership { 198 | Follower(FollowerState { election_ticks, random_election_ticks, .. }) => { 199 | if *random_election_ticks > self.config.election_timeout_ticks.saturating_mul(2) { 200 | *random_election_ticks = random_election_timeout(&mut self.random, self.config.election_timeout_ticks); 201 | } 202 | if election_ticks > random_election_ticks { 203 | *election_ticks = *random_election_ticks; 204 | } 205 | } 206 | Candidate(CandidateState { election_ticks, .. }) => { 207 | if *election_ticks > self.config.election_timeout_ticks.saturating_mul(2) { 208 | *election_ticks = random_election_timeout(&mut self.random, self.config.election_timeout_ticks); 209 | } 210 | } 211 | Leader(LeaderState { heartbeat_ticks, .. }) => { 212 | if *heartbeat_ticks > self.config.heartbeat_interval_ticks { 213 | *heartbeat_ticks = self.config.heartbeat_interval_ticks; 214 | } 215 | } 216 | } 217 | } 218 | 219 | pub fn take_committed(&mut self) -> CommittedIter<'_, Log> { 220 | self.log.take_committed() 221 | } 222 | 223 | pub fn timer_tick(&mut self) -> Option> { 224 | match &mut self.leadership { 225 | Follower(FollowerState { election_ticks, .. }) | 226 | Candidate(CandidateState { election_ticks, .. }) => { 227 | match election_ticks.saturating_sub(1) { 228 | 0 => { 229 | info!("election timeout at {}", &self.current_term); 230 | self.timeout() 231 | } 232 | new_election_ticks => { 233 | *election_ticks = new_election_ticks; 234 | None 235 | } 236 | } 237 | } 238 | Leader(leader_state) => { 239 | match leader_state.heartbeat_ticks.saturating_sub(1) { 240 | 0 => { 241 | leader_state.heartbeat_ticks = self.config.heartbeat_interval_ticks; 242 | debug!("sending heartbeat"); 243 | for replication in leader_state.followers.values_mut() { 244 | replication.send_heartbeat = true; 245 | } 246 | } 247 | new_heartbeat_ticks => { 248 | leader_state.heartbeat_ticks = new_heartbeat_ticks; 249 | } 250 | } 251 | None 252 | } 253 | } 254 | } 255 | 256 | pub fn reset_peer(&mut self, peer_node_id: NodeId) -> Option> { 257 | match &mut self.leadership { 258 | Follower(_) => { 259 | None 260 | } 261 | Candidate(_) => { 262 | if self.peers.contains(&peer_node_id) { 263 | let vote_request = self.request_vote(); 264 | let from = peer_node_id; 265 | vote_request.map(|message| SendableRaftMessage { message, dest: RaftMessageDestination::To(from) }) 266 | } else { 267 | None 268 | } 269 | } 270 | Leader(leader_state) => { 271 | if let Some(replication) = leader_state.followers.get_mut(&peer_node_id) { 272 | info!("resetting follower state {}", &peer_node_id); 273 | replication.next_idx = self.log.last_index() + 1; 274 | replication.send_probe = true; 275 | replication.send_heartbeat = true; 276 | replication.inflight = None; 277 | } 278 | None 279 | } 280 | } 281 | } 282 | 283 | // 284 | // -- raft TLA+ parallel code -- 285 | // the code below is so similar to Raft's TLA+ code that the TLA+ is provided 286 | // in the right-hand column for sections which correspond almost exactly. code 287 | // is provided in the same order as the TLA+ so that the reader can follow. 288 | // 289 | 290 | // 291 | // \* Define state transitions 292 | // 293 | 294 | // \* Server i times out and starts a new election. 295 | pub fn timeout(&mut self) -> Option> { // Timeout(i) == 296 | match &self.leadership { 297 | Follower(_) | Candidate(_) => { // /\ state[i] \in {Follower, Candidate} 298 | self.current_term += 1; // /\ currentTerm' = [currentTerm EXCEPT ![i] = currentTerm[i] + 1] 299 | // \* Most implementations would probably just set the local vote 300 | // \* atomically, but messaging localhost for it is weaker. 301 | self.voted_for = Some(self.node_id.clone()); // /\ votedFor' = [votedFor EXCEPT ![i] = Nil] 302 | let votes_granted = iter::once(self.node_id.clone()).collect(); // /\ votesGranted' = [votesGranted EXCEPT ![i] = {}] 303 | self.leadership = Candidate(CandidateState { // /\ state' = [state EXCEPT ![i] = Candidate] 304 | votes_granted, 305 | election_ticks: self.random_election_timeout(), 306 | }); 307 | 308 | info!("became candidate at {}", self.current_term); 309 | self.become_leader(); 310 | self.advance_commit_idx(); 311 | self.request_vote().map(|message| SendableRaftMessage { 312 | message, 313 | dest: RaftMessageDestination::Broadcast, 314 | }) 315 | } 316 | Leader(_) => { 317 | None 318 | } 319 | } 320 | } 321 | 322 | // \* Candidate i sends j a RequestVote request. 323 | fn request_vote(&self) -> Option { // RequestVote(i,j) == 324 | match self.leadership { 325 | Candidate { .. } => { // /\ state[i] = Candidate 326 | let vote_request_msg = RaftMessage { // /\ Send([ 327 | term: self.current_term, // mterm |-> currentTerm[i], 328 | rpc: Some(Rpc::VoteRequest(VoteRequest { // mtype |-> RequestVoteRequest, 329 | last_log_term: self.log.last_term(), // mlastLogTerm |-> LastTerm(log[i]), 330 | last_log_idx: self.log.last_index(), // mlastLogIndex |-> Len(log[i]), 331 | })), 332 | }; 333 | Some(vote_request_msg) 334 | } 335 | _ => None, 336 | } 337 | } 338 | 339 | // \* Leader i sends j an AppendEntries request containing up to 1 entry. 340 | // \* While implementations may want to send more than 1 at a time, this spec uses 341 | // \* just 1 because it minimizes atomic regions without loss of generality. 342 | pub fn append_entries(&mut self, 343 | to_node_id: NodeId) 344 | -> Option> { // AppendEntries(i, j) == 345 | if let Leader(leader_state) = &mut self.leadership { // /\ state[i] = Leader 346 | let replication = 347 | match leader_state.followers.get_mut(&to_node_id) { // /\ i /= j 348 | Some(replication) => replication, 349 | None => return None, 350 | }; 351 | let last_log_idx = self.log.last_index(); 352 | let next_idx = replication.next_idx; 353 | let send_entries = (last_log_idx >= next_idx && 354 | !replication.send_probe); 355 | if !send_entries && !replication.send_heartbeat { 356 | return None; 357 | } 358 | if replication.inflight.is_some() { 359 | return None; 360 | } 361 | let prev_log_idx = next_idx - 1; // /\ LET prevLogIndex == nextIndex[i][j] - 1 362 | let maybe_prev_log_term = if prev_log_idx != Default::default() { // prevLogTerm == IF prevLogIndex > 0 THEN 363 | self.log.get_term(prev_log_idx) // log[i][prevLogIndex].term 364 | } else { // ELSE 365 | Some(Default::default()) // 0 366 | }; 367 | 368 | let prev_log_term = match maybe_prev_log_term { 369 | Some(prev_log_term) => prev_log_term, 370 | None => { 371 | error!("missing log {} to send to {}!", 372 | &prev_log_idx, &to_node_id); 373 | return None; 374 | } 375 | }; 376 | 377 | let mut entries: Vec = Vec::new(); 378 | let last_entry: LogIndex; 379 | if send_entries { // \* Send up to 1 entry, constrained by the end of the log. 380 | let mut entries_size = 0usize; 381 | let max_entries_size = self.config.replication_chunk_size; 382 | let entry_log_idxs = (0..).map(|idx| next_idx + idx) 383 | .take_while(|log_idx| *log_idx <= last_log_idx); 384 | for entry_log_idx in entry_log_idxs { // entries == SubSeq(log[i], nextIndex[i][j], lastEntry) 385 | let append_log_entry = if let Some(log_entry) = self.log.get(entry_log_idx) { 386 | let first_entry = entries_size == 0; 387 | if !first_entry && entries_size == max_entries_size { 388 | None 389 | } else { 390 | entries_size = entries_size.saturating_add(self.log.entry_len(&log_entry)); 391 | if first_entry || entries_size <= max_entries_size { 392 | Some(log_entry) 393 | } else { 394 | None 395 | } 396 | } 397 | } else { 398 | error!("error fetching raft log {} to send to {}!", 399 | &entry_log_idx, &to_node_id); 400 | None 401 | }; 402 | if let Some(log_entry) = append_log_entry { 403 | entries.push(log_entry); 404 | } else { 405 | break; 406 | } 407 | } 408 | last_entry = prev_log_idx + (entries.len() as u64); // lastEntry == Min({Len(log[i]), nextIndex[i][j]}) 409 | } else { 410 | last_entry = prev_log_idx; 411 | } 412 | let append_request_msg = RaftMessage { // IN Send([ 413 | term: self.current_term, // mterm |-> currentTerm[i], 414 | rpc: Some(Rpc::AppendRequest(AppendRequest { // mtype |-> AppendEntriesRequest, 415 | prev_log_idx, // mprevLogIndex |-> prevLogIndex, 416 | prev_log_term, // mprevLogTerm |-> prevLogTerm, 417 | entries, // mentries |-> entries, 418 | leader_commit: self.log.commit_idx.min(last_entry), // mcommitIndex |-> Min({commitIndex[i], lastEntry}), 419 | })), 420 | }; 421 | replication.send_heartbeat = false; 422 | replication.inflight = Some(last_entry); 423 | Some(SendableRaftMessage { 424 | message: append_request_msg, 425 | dest: RaftMessageDestination::To(to_node_id), 426 | }) 427 | } else { 428 | None 429 | } 430 | } 431 | 432 | // \* Candidate i transitions to leader. 433 | fn become_leader(&mut self) { // BecomeLeader(i) == 434 | if let Candidate(candidate_state) = &self.leadership { // /\ state[i] = Candidate 435 | if candidate_state.votes_granted.len() >= self.quorum_size() { // /\ votesGranted[i] \in Quorum 436 | info!("became leader at {}", &self.current_term); 437 | self.leadership = Leader(LeaderState { // /\ state' = [state EXCEPT ![i] = Leader] 438 | followers: (self.peers.iter().cloned()) 439 | .map(|id| (id, ReplicationState { 440 | next_idx: self.log.last_index() + 1, // /\ nextIndex' = [nextIndex EXCEPT ![i] = [j \in Server |-> Len(log[i]) + 1]] 441 | match_idx: Default::default(), // /\ matchIndex' = [matchIndex EXCEPT ![i] = [j \in Server |-> 0]] 442 | inflight: Default::default(), 443 | send_probe: Default::default(), 444 | send_heartbeat: Default::default(), 445 | })).collect(), 446 | heartbeat_ticks: 0, 447 | }); 448 | // append a noop in the new term to commit entries from past terms (Raft Section 5.4.2) 449 | let _ignore = self.client_request(Default::default()); 450 | } 451 | } 452 | } 453 | 454 | // \* Leader i receives a client request to add v to the log. 455 | pub fn client_request( 456 | &mut self, 457 | data: Bytes, 458 | ) -> Result<(), AppendError> { // ClientRequest(i, v) == 459 | let entry = LogEntry { 460 | term: self.current_term, // /\ LET entry == [term |-> currentTerm[i], 461 | data, // value |-> v] 462 | }; 463 | if let Leader(_) = &self.leadership { // /\ state[i] = Leader 464 | self.log.append(entry).map_err(AppendError::RaftLogErr)?; // newLog == Append(log[i], entry) 465 | self.advance_commit_idx(); 466 | Ok(()) // IN log' = [log EXCEPT ![i] = newLog] 467 | } else { 468 | Err(AppendError::Cancelled { data: entry.data }) 469 | } 470 | } 471 | 472 | // \* Leader i advances its commitIndex. 473 | // \* This is done as a separate step from handling AppendEntries responses, 474 | // \* in part to minimize atomic regions, and in part so that leaders of 475 | // \* single-server clusters are able to mark entries committed. 476 | fn advance_commit_idx(&mut self) { // AdvanceCommitIndex(i) == 477 | if let Leader(leader_state) = &self.leadership { // /\ state[i] = Leader 478 | let mut match_idxs: Vec<_> = // /\ LET \* The set of servers that agree up through index. 479 | (leader_state.followers.values()) 480 | .map(|follower| follower.match_idx) 481 | .chain(iter::once(self.log.last_index())) 482 | .collect(); 483 | match_idxs.sort_unstable(); // Agree(index) == {i} \cup {k \in Server : matchIndex[i][k] >= index} 484 | let agree_idxs = (match_idxs.into_iter()) // \* The maximum indexes for which a quorum agrees 485 | .rev().skip(self.quorum_size() - 1); // agreeIndexes == {index \in 1..Len(log[i]) : Agree(index) \in Quorum} 486 | let commit_idx = match agree_idxs.max() { // \* New value for commitIndex'[i] 487 | Some(agree_idx) => { // newCommitIndex == IF /\ agreeIndexes /= {} 488 | if self.log.get_term(agree_idx) == Some(self.current_term) {// /\ log[i][Max(agreeIndexes)].term = currentTerm[i] 489 | self.log.commit_idx.max(agree_idx) // THEN Max(agreeIndexes) 490 | } else { 491 | self.log.commit_idx // ELSE commitIndex[i] 492 | } 493 | } 494 | None => self.log.commit_idx, 495 | }; 496 | if commit_idx != self.log.commit_idx { 497 | debug!("committed transactions from {} to {}", 498 | &self.log.commit_idx, &commit_idx); 499 | } 500 | self.log.commit_idx = commit_idx; // IN commitIndex' = [commitIndex EXCEPT ![i] = newCommitIndex] 501 | } 502 | } 503 | 504 | // 505 | // \* Message handlers 506 | // \* i = recipient, j = sender, m = message 507 | // 508 | 509 | // \* Server i receives a RequestVote request from server j with 510 | // \* m.mterm <= currentTerm[i]. 511 | fn handle_vote_request(&mut self, 512 | msg_term: TermId, 513 | msg: VoteRequest, 514 | from: NodeId) 515 | -> Option> { // HandleRequestVoteRequest(i, j, m) == 516 | let last_log_idx = self.log.last_index(); 517 | let last_log_term = self.log.last_term(); 518 | let log_ok = // LET logOk == 519 | (msg.last_log_term > last_log_term) || // \/ m.mlastLogTerm > LastTerm(log[i]) 520 | (msg.last_log_term == last_log_term && // \/ /\ m.mlastLogTerm = LastTerm(log[i]) 521 | msg.last_log_idx >= last_log_idx); // /\ m.mlastLogIndex >= Len(log[i]) 522 | let grant = // LET grant == 523 | msg_term == self.current_term && // /\ m.mterm = currentTerm[i] 524 | log_ok && // /\ logOk 525 | self.voted_for.as_ref().map(|vote| &from == vote).unwrap_or(true); // /\ votedFor[i] \in {Nil, j} 526 | assert!(msg_term <= self.current_term); // IN /\ m.mterm <= currentTerm[i] 527 | if grant { 528 | self.voted_for = Some(from.clone()); // /\ \/ grant /\ votedFor' = [votedFor EXCEPT ![i] = j] 529 | } // \/ ~grant /\ UNCHANGED votedFor 530 | 531 | if grant { 532 | info!("granted vote at {} with {} at {} for node {} with {} at {}", 533 | &self.current_term, &last_log_idx, &last_log_term, 534 | &from, &msg.last_log_idx, &msg.last_log_term); 535 | match &mut self.leadership { 536 | Follower(FollowerState { election_ticks, random_election_ticks, .. }) => 537 | *election_ticks = *random_election_ticks, 538 | Candidate(_) | Leader(_) => (), 539 | } 540 | } else if msg_term != self.current_term { 541 | info!("ignored message with {} < current {}: {}", 542 | &msg_term, &self.current_term, &msg); 543 | } else if let Some(vote) = &self.voted_for { 544 | info!("rejected vote at {} for node {} as already voted for {}", 545 | &self.current_term, &from, vote); 546 | } else { 547 | info!("rejected vote at {} with {} at {} for node {} with {} at {}", 548 | &self.current_term, &last_log_idx, &last_log_term, 549 | &from, &msg.last_log_idx, &msg.last_log_term); 550 | } 551 | 552 | let message = RaftMessage { // /\ Reply([ 553 | term: self.current_term, // mterm |-> currentTerm[i], 554 | rpc: Some(Rpc::VoteResponse(VoteResponse { // mtype |-> RequestVoteResponse, 555 | vote_granted: grant, // mvoteGranted |-> grant, 556 | })), 557 | }; 558 | Some(SendableRaftMessage { message, dest: RaftMessageDestination::To(from) }) 559 | } 560 | 561 | // \* Server i receives a RequestVote response from server j with 562 | // \* m.mterm = currentTerm[i]. 563 | fn handle_vote_response(&mut self, 564 | msg_term: TermId, 565 | msg: VoteResponse, 566 | from: NodeId) 567 | -> Option> { // HandleRequestVoteResponse(i, j, m) == 568 | assert!(msg_term == self.current_term); // /\ m.mterm = currentTerm[i] 569 | if let Candidate(candidate_state) = &mut self.leadership { 570 | if msg.vote_granted { // /\ \/ /\ m.mvoteGranted 571 | info!("received vote granted from {} at {}", 572 | &from, &self.current_term); 573 | candidate_state.votes_granted.insert(from); // /\ votesGranted' = [votesGranted EXCEPT ![i] = votesGranted[i] \cup {j}] 574 | } else { // \/ /\ ~m.mvoteGranted /\ UNCHANGED <> 575 | info!("received vote rejected from {} at {}", 576 | &from, &self.current_term); 577 | } 578 | } 579 | None 580 | } 581 | 582 | // \* Server i receives an AppendEntries request from server j with 583 | // \* m.mterm <= currentTerm[i]. This just handles m.entries of length 0 or 1, but 584 | // \* implementations could safely accept more by treating them the same as 585 | // \* multiple independent requests of 1 entry. 586 | fn handle_append_request(&mut self, 587 | msg_term: TermId, 588 | msg: AppendRequest, 589 | from: NodeId) 590 | -> Option> { // HandleAppendEntriesRequest(i, j, m) == 591 | let prev_log_idx = msg.prev_log_idx; 592 | let msg_prev_log_term = msg.prev_log_term; 593 | let our_prev_log_term = self.log.get_term(prev_log_idx); 594 | let log_ok = 595 | prev_log_idx == Default::default() || // LET logOk == \/ m.mprevLogIndex = 0 596 | Some(msg_prev_log_term) == our_prev_log_term; // \/ /\ m.mprevLogIndex > 0 /\ m.mprevLogIndex <= Len(log[i]) /\ m.mprevLogTerm = log[i][m.mprevLogIndex].term 597 | assert!(msg_term <= self.current_term); // IN /\ m.mterm <= currentTerm[i] 598 | // /\ \/ \* return to follower state 599 | if msg_term == self.current_term { // /\ m.mterm = currentTerm[i] 600 | match &mut self.leadership { 601 | Candidate(_) => { // /\ state[i] = Candidate 602 | let random_election_ticks = self.random_election_timeout(); 603 | self.leadership = Follower(FollowerState { // /\ state' = [state EXCEPT ![i] = Follower] 604 | leader: Some(from.clone()), 605 | election_ticks: random_election_ticks, 606 | random_election_ticks, 607 | }); 608 | info!("became follower at {} of {}", &self.current_term, &from); 609 | } 610 | Follower(follower_state) => { 611 | if follower_state.leader.is_none() { 612 | info!("became follower at {} of {}", &self.current_term, &from); 613 | } 614 | follower_state.leader = Some(from.clone()); 615 | follower_state.election_ticks = follower_state.random_election_ticks; 616 | } 617 | Leader { .. } => { 618 | panic!("received append request as leader at {} from {}", 619 | &self.current_term, &from); 620 | } 621 | } 622 | } 623 | // \/ /\ \* reject request 624 | if (msg_term < self.current_term || // \/ m.mterm < currentTerm[i] 625 | (assert_true!(msg_term == self.current_term) && // \/ /\ m.mterm = currentTerm[i] 626 | assert_match!(Follower(_) = &self.leadership) && // /\ state[i] = Follower 627 | !log_ok)) // /\ \lnot logOk 628 | { 629 | if msg_term < self.current_term { 630 | info!("ignored message with {} < current {}: {}", 631 | &msg_term, &self.current_term, &msg); 632 | } else if let Some(our_prev_log_term) = our_prev_log_term { 633 | warn!("rejected append from {} with {} at {}, we have {}", 634 | &from, &prev_log_idx, msg_prev_log_term, &our_prev_log_term); 635 | } else { 636 | info!("rejected append from {} with {}, we are behind at {}", 637 | &from, &prev_log_idx, self.log.last_index()); 638 | } 639 | 640 | let message = RaftMessage { // /\ Reply([ 641 | term: self.current_term, // mterm |-> currentTerm[i], 642 | rpc: Some(Rpc::AppendResponse(AppendResponse { // mtype |-> AppendEntriesResponse, 643 | success: false, // msuccess |-> FALSE, 644 | match_idx: self.log.prev_index(), // mmatchIndex |-> 0, 645 | last_log_idx: self.log.last_index(), 646 | })), 647 | }; 648 | Some(SendableRaftMessage { message, dest: RaftMessageDestination::To(from) }) 649 | } else { // \/ \* accept request 650 | assert!(msg_term == self.current_term); // /\ m.mterm = currentTerm[i] 651 | assert_match!(Follower(_) = &self.leadership); // /\ state[i] = Follower 652 | assert!(log_ok); // /\ logOk 653 | // ... and the TLA+ that follows doesn't correspond to procedural code well 654 | // find point of log conflict 655 | let msg_last_log_idx = prev_log_idx + (msg.entries.len() as u64); 656 | let msg_entries_iter = (1..).map(|idx| prev_log_idx + idx).zip(msg.entries); 657 | let mut last_processed_idx = prev_log_idx; 658 | for (msg_entry_log_idx, msg_entry) in msg_entries_iter { 659 | if msg_entry_log_idx == self.log.last_index() + 1 { 660 | match self.log.append(msg_entry) { 661 | Ok(()) => (), 662 | Err(_) => break, 663 | } 664 | } else if let Some(our_entry_log_term) = self.log.get_term(msg_entry_log_idx) { 665 | if our_entry_log_term != msg_entry.term { 666 | assert!(msg_entry_log_idx > self.log.commit_idx); 667 | match self.log.cancel_from(msg_entry_log_idx) { 668 | Ok(cancelled_len) => 669 | info!("cancelled {} transactions from {}", cancelled_len, &msg_entry_log_idx), 670 | Err(_) => 671 | break, 672 | } 673 | match self.log.append(msg_entry) { 674 | Ok(()) => (), 675 | Err(_) => break, 676 | } 677 | } 678 | } else { 679 | error!("failed to fetch log index {} to find conflicts for append!", &msg_entry_log_idx); 680 | break; 681 | } 682 | last_processed_idx = msg_entry_log_idx; 683 | } 684 | 685 | // update commit index from leader 686 | let leader_commit = msg.leader_commit.min(last_processed_idx); 687 | if leader_commit > self.log.commit_idx { 688 | debug!("committed transactions from {} to {}", &self.log.commit_idx, &leader_commit); 689 | 690 | self.log.commit_idx = leader_commit; // /\ commitIndex' = [commitIndex EXCEPT ![i] = m.mcommitIndex] 691 | } 692 | 693 | let message = RaftMessage { // /\ Reply([ 694 | term: self.current_term, // mterm |-> currentTerm[i], 695 | rpc: Some(Rpc::AppendResponse(AppendResponse { // mtype |-> AppendEntriesResponse, 696 | success: true, // msuccess |-> TRUE, 697 | match_idx: msg_last_log_idx.min(self.log.last_index()), // mmatchIndex |-> m.mprevLogIndex + Len(m.mentries), 698 | last_log_idx: self.log.last_index(), 699 | })), 700 | }; 701 | Some(SendableRaftMessage { message, dest: RaftMessageDestination::To(from) }) 702 | } 703 | } 704 | 705 | // \* Server i receives an AppendEntries response from server j with 706 | // \* m.mterm = currentTerm[i]. 707 | fn handle_append_response(&mut self, 708 | msg_term: TermId, 709 | msg: AppendResponse, 710 | from: NodeId) 711 | -> Option> { // HandleAppendEntriesResponse(i, j, m) == 712 | assert!(msg_term == self.current_term); // /\ m.mterm = currentTerm[i] 713 | if let Leader(leader_state) = &mut self.leadership { 714 | if let Some(replication) = leader_state.followers.get_mut(&from) { 715 | if msg.success { // /\ \/ /\ m.msuccess \* successful 716 | if Some(msg.match_idx) >= replication.inflight { 717 | replication.inflight = None; 718 | } 719 | if msg.match_idx + 1 > replication.next_idx { 720 | replication.next_idx = msg.match_idx + 1; // /\ nextIndex' = [nextIndex EXCEPT ![i][j] = m.mmatchIndex + 1] 721 | } 722 | if msg.match_idx > replication.match_idx { 723 | replication.match_idx = msg.match_idx; // /\ matchIndex' = [matchIndex EXCEPT ![i][j] = m.mmatchIndex] 724 | } 725 | replication.send_probe = false; 726 | } else { // \/ /\ \lnot m.msuccess \* not successful 727 | if !replication.send_probe { 728 | info!("received append rejection at {} from {} having {}", 729 | &replication.next_idx, &from, &msg.last_log_idx); 730 | } else { 731 | verbose!("received append rejection at {} from {} having {}", 732 | &replication.next_idx, &from, &msg.last_log_idx); 733 | } 734 | replication.next_idx = ((replication.next_idx - 1) // /\ nextIndex' = [nextIndex EXCEPT ![i][j] = Max({nextIndex[i][j] - 1, 1})] 735 | .min(msg.last_log_idx + 1) 736 | .max(msg.match_idx + 1)); 737 | replication.send_probe = true; 738 | replication.inflight = None; 739 | 740 | let mut chunk_size_remaining = self.config.replication_chunk_size; 741 | while let Some(next_idx) = replication.next_idx.checked_sub(1) { 742 | if next_idx <= msg.match_idx { 743 | break; 744 | } 745 | let entry_len = match self.log.get_len(replication.next_idx) { 746 | Some(entry_len) => entry_len, 747 | None => break, 748 | }; 749 | chunk_size_remaining = match chunk_size_remaining.checked_sub(entry_len) { 750 | Some(new_chunk_size_remaining) => new_chunk_size_remaining, 751 | None => break, 752 | }; 753 | replication.next_idx = next_idx; 754 | } 755 | } 756 | } 757 | } 758 | None 759 | } 760 | 761 | // \* Any RPC with a newer term causes the recipient to advance its term first. 762 | fn update_term(&mut self, 763 | from: &NodeId, 764 | msg: &RaftMessage) { // UpdateTerm(i, j, m) == 765 | if msg.term > self.current_term { // /\ m.mterm > currentTerm[i] 766 | info!("became follower at {} (from {}) due to message from {}: {}", 767 | &msg.term, &self.current_term, from, &msg); 768 | let random_election_ticks = self.random_election_timeout(); 769 | 770 | let election_ticks = match &self.leadership { 771 | Follower(FollowerState { election_ticks, .. }) | 772 | Candidate(CandidateState { election_ticks, .. }) => 773 | *election_ticks, 774 | Leader(_) => 775 | random_election_ticks, 776 | }; 777 | self.current_term = msg.term; // /\ currentTerm' = [currentTerm EXCEPT ![i] = m.mterm] 778 | self.leadership = Follower(FollowerState { // /\ state' = [state EXCEPT ![i] = Follower] 779 | leader: None, 780 | election_ticks, 781 | random_election_ticks, 782 | }); 783 | self.voted_for = Default::default(); // /\ votedFor' = [votedFor EXCEPT ![i] = Nil] 784 | } 785 | } 786 | 787 | // \* Responses with stale terms are ignored. 788 | fn drop_stale_response(&self, 789 | msg_term: TermId, 790 | msg: T) 791 | -> Result<(), T> 792 | where T: fmt::Display 793 | { // DropStaleResponse(i, j, m) == 794 | if msg_term < self.current_term { // /\ m.mterm < currentTerm[i] 795 | info!("ignored message with {} < current {}: {}", 796 | &msg_term, &self.current_term, &msg); 797 | drop(msg); // /\ Discard(m) 798 | Ok(()) 799 | } else { 800 | Err(msg) 801 | } 802 | } 803 | 804 | // /* Receive a message. 805 | pub fn receive(&mut self, 806 | msg: RaftMessage, 807 | from: NodeId) 808 | -> Option> { // Receive(m) == 809 | if !self.peers.contains(&from) { 810 | error!("received raft message from {} for wrong group", &from); 811 | return None; 812 | } 813 | // IN \* Any RPC with a newer term causes the recipient to advance 814 | // \* its term first. Responses with stale terms are ignored. 815 | self.update_term(&from, &msg); // \/ UpdateTerm(i, j, m) 816 | let reply = match msg.rpc { 817 | Some(Rpc::VoteRequest(request)) => // \/ /\ m.mtype = RequestVoteRequest 818 | self.handle_vote_request(msg.term, request, from), // /\ HandleRequestVoteRequest(i, j, m) 819 | Some(Rpc::VoteResponse(response)) => { // \/ /\ m.mtype = RequestVoteResponse 820 | match self.drop_stale_response(msg.term, response) { // /\ \/ DropStaleResponse(i, j, m) 821 | Ok(()) => None, 822 | Err(response) => 823 | self.handle_vote_response(msg.term, response, from), // \/ HandleRequestVoteResponse(i, j, m) 824 | } 825 | } 826 | Some(Rpc::AppendRequest(request)) => // \/ /\ m.mtype = AppendEntriesRequest 827 | self.handle_append_request(msg.term, request, from), // /\ HandleAppendEntriesRequest(i, j, m) 828 | Some(Rpc::AppendResponse(response)) => { // \/ /\ m.mtype = AppendEntriesResponse 829 | match self.drop_stale_response(msg.term, response) { // /\ \/ DropStaleResponse(i, j, m) 830 | Ok(()) => None, 831 | Err(response) => 832 | self.handle_append_response(msg.term, response, from), // \/ HandleAppendEntriesResponse(i, j, m) 833 | } 834 | } 835 | None => None, 836 | }; 837 | self.become_leader(); 838 | self.advance_commit_idx(); 839 | reply 840 | } 841 | 842 | // 843 | // helpers 844 | // 845 | 846 | fn quorum_size(&self) -> usize { 847 | quorum_size(self.peers.len()) 848 | } 849 | 850 | fn random_election_timeout(&mut self) -> u32 { 851 | random_election_timeout(&mut self.random, self.config.election_timeout_ticks) 852 | } 853 | } 854 | 855 | /// Computes the minimum size of a quorum of nodes in a Raft group. 856 | /// 857 | /// Returns the minimum number of nodes out of a Raft group with total `peer_count` nodes necessary to constitute a 858 | /// quorum. A quorum of reachable nodes is needed to elect a leader and append to the distributed log. 859 | pub fn quorum_size(peer_count: usize) -> usize { 860 | (peer_count.saturating_add(1)) / 2 + 1 861 | } 862 | 863 | fn random_election_timeout(random: &mut impl RngCore, election_timeout_ticks: u32) -> u32 { 864 | let random = random.next_u32().checked_rem(election_timeout_ticks).unwrap_or(0); 865 | election_timeout_ticks.saturating_add(random) 866 | } 867 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021 jessa0 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU Affero General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU Affero General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Affero General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | //! Raft consensus algorithm implementation. 19 | //! 20 | //! Raft is a consensus algorithm which replicates a strongly-consistent distributed log of entries with arbitrary data 21 | //! amongst a group of peers. It is also fault-tolerant, allowing replication to continue while a majority of peers can 22 | //! still communicate with each other. This crate provides an implementation of the Raft consensus algorithm with some 23 | //! optional features not implemented, such as pre-voting, membership changes, and snapshots. 24 | //! 25 | //! The Raft algorithm is implemented as a state machine driven in a few ways: 26 | //! 27 | //! * When attempting to append a new entry to the distributed log: [`append`](node::RaftNode::append) is called. 28 | //! * When a message is received from a peer: [`receive`](node::RaftNode::receive) is called. 29 | //! * Every time a fixed amount of time has elapsed: [`timer_tick`](node::RaftNode::timer_tick) is called. 30 | //! 31 | //! Each of these functions modifies the internal state and returns [messages](message::SendableRaftMessage) to be sent 32 | //! to peers. Once a log entry is "committed", or guaranteed to be returned at the same index on every functioning peer 33 | //! in the group, it may be retrieved using [`take_committed`](node::RaftNode::take_committed). An append to the log may 34 | //! be cancelled before reaching the committed state, however, which is discussed in more detail in ["Appending entries to the distributed log"]. 35 | //! 36 | //! The backing storage for the distributed log must be provided as an implementation of the [`RaftLog`](log::RaftLog) 37 | //! trait, with careful attention to following the trait specification. A trivial in-memory implementation is provided 38 | //! by [`RaftLogMemory`](log::mem::RaftLogMemory). 39 | //! 40 | //! # Example 41 | //! 42 | //! ``` 43 | //! use simple_raft::log::mem::RaftLogMemory; 44 | //! use simple_raft::node::{RaftConfig, RaftNode}; 45 | //! use simple_raft::message::{RaftMessageDestination, SendableRaftMessage}; 46 | //! use rand_chacha::ChaChaRng; 47 | //! use rand_core::SeedableRng; 48 | //! use std::collections::VecDeque; 49 | //! use std::str; 50 | //! 51 | //! // Construct 5 Raft peers 52 | //! type NodeId = usize; 53 | //! let mut peers = (0..5).map(|id: NodeId| RaftNode::new( 54 | //! id, 55 | //! (0..5).collect(), 56 | //! RaftLogMemory::new_unbounded(), 57 | //! ChaChaRng::seed_from_u64(id as u64), 58 | //! RaftConfig { 59 | //! election_timeout_ticks: 10, 60 | //! heartbeat_interval_ticks: 1, 61 | //! replication_chunk_size: usize::max_value(), 62 | //! }, 63 | //! )).collect::>(); 64 | //! 65 | //! // Simulate reliably sending messages instantaneously between peers 66 | //! let mut inboxes = vec![VecDeque::new(); peers.len()]; 67 | //! let send_message = |src_id: NodeId, sendable: SendableRaftMessage, inboxes: &mut Vec>| { 68 | //! match sendable.dest { 69 | //! RaftMessageDestination::Broadcast => { 70 | //! println!("peer {} -> all: {}", src_id, &sendable.message); 71 | //! inboxes.iter_mut().for_each(|inbox| inbox.push_back((src_id, sendable.message.clone()))) 72 | //! } 73 | //! RaftMessageDestination::To(dst_id) => { 74 | //! println!("peer {} -> peer {}: {}", src_id, dst_id, &sendable.message); 75 | //! inboxes[dst_id].push_back((src_id, sendable.message)); 76 | //! } 77 | //! } 78 | //! }; 79 | //! 80 | //! // Loop until a log entry is committed on all peers 81 | //! let mut appended = false; 82 | //! let mut peers_committed = vec![false; peers.len()]; 83 | //! while !peers_committed.iter().all(|seen| *seen) { 84 | //! for (peer_id, peer) in peers.iter_mut().enumerate() { 85 | //! // Tick the timer 86 | //! let new_messages = peer.timer_tick(); 87 | //! new_messages.for_each(|message| send_message(peer_id, message, &mut inboxes)); 88 | //! 89 | //! // Append a log entry on the leader 90 | //! if !appended && peer.is_leader() { 91 | //! if let Ok(new_messages) = peer.append("Hello world!") { 92 | //! new_messages.for_each(|message| send_message(peer_id, message, &mut inboxes)); 93 | //! appended = true; 94 | //! } 95 | //! } 96 | //! 97 | //! // Process message inbox 98 | //! while let Some((src_id, message)) = inboxes[peer_id].pop_front() { 99 | //! let new_messages = peer.receive(message, src_id); 100 | //! new_messages.for_each(|message| send_message(peer_id, message, &mut inboxes)); 101 | //! } 102 | //! 103 | //! // Check for committed log entries 104 | //! for log_entry in peer.take_committed() { 105 | //! if !log_entry.data.is_empty() { 106 | //! println!("peer {} saw commit {}", peer_id, str::from_utf8(&log_entry.data).unwrap()); 107 | //! assert!(!peers_committed[peer_id]); 108 | //! peers_committed[peer_id] = true; 109 | //! } 110 | //! } 111 | //! } 112 | //! } 113 | //! ``` 114 | //! 115 | //! ["Appending entries to the distributed log"]: node::RaftNode#appending-entries-to-the-distributed-log 116 | 117 | #![no_std] 118 | 119 | #![allow(unused_parens)] 120 | #![warn(missing_docs)] 121 | 122 | extern crate alloc; 123 | 124 | #[macro_use] 125 | mod macros; 126 | 127 | pub mod core; 128 | pub mod log; 129 | pub mod message; 130 | pub mod node; 131 | mod prelude; 132 | -------------------------------------------------------------------------------- /src/log.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 Open Whisper Systems 3 | * Copyright (C) 2021 jessa0 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | 19 | //! Types related to Raft log storage. 20 | //! 21 | //! Raft requires a backing storage for entries of its distributed log as they are being replicated to and from other 22 | //! nodes. The [`RaftLog`] trait is implemented for that purpose, and the implementation is supplied to 23 | //! [`RaftNode`](crate::node::RaftNode). 24 | 25 | #[cfg(any(feature = "test", test))] 26 | #[macro_use] 27 | pub mod tests; 28 | pub mod mem; 29 | 30 | use core::iter; 31 | use crate::message::{LogEntry, LogIndex, TermId}; 32 | 33 | /// An interface for storage of the Raft log of a [`RaftNode`](crate::node::RaftNode). 34 | /// 35 | /// # Initial state 36 | /// 37 | /// A Raft log is initialized as empty, with both [`prev_index`] and [`last_index`] returning 38 | /// [`LogIndex::default()`](crate::message::LogIndex::default). The index of the first appended log entry is `1` and all 39 | /// indices are contiguous. 40 | /// 41 | /// # Log truncation 42 | /// 43 | /// A Raft log of bounded size may discard old entries previously taken from the beginning of the log via [`take_next`] 44 | /// if, for example, it runs out of space. However, the term of the last discarded entry is preserved to be returned 45 | /// from [`prev_term`] if requested. The log can also be truncated explicitly from the end via [`cancel_from`]. 46 | /// 47 | /// [`append`]: Self::append 48 | /// [`cancel_from`]: Self::cancel_from 49 | /// [`last_index`]: Self::last_index 50 | /// [`prev_index`]: Self::prev_index 51 | /// [`prev_term`]: Self::prev_term 52 | /// [`take_next`]: Self::take_next 53 | pub trait RaftLog { 54 | /// The type of error returned by fallable operations. 55 | type Error; 56 | 57 | /// Appends an entry to the end of the log. 58 | /// 59 | /// # Errors 60 | /// 61 | /// If there was any error modifying the log, an error is returned. 62 | fn append(&mut self, entry: LogEntry) -> Result<(), Self::Error>; 63 | 64 | /// Cancels all entries including and after the entry at index `from_index`, removing them from the log. Returns the 65 | /// number of entries removed. 66 | /// 67 | /// # Errors 68 | /// 69 | /// If there was any error modifying the log, or if the entries did not exist, an error is returned. 70 | fn cancel_from(&mut self, from_index: LogIndex) -> Result; 71 | 72 | /// Returns the approximate serialized length in bytes of a given log entry. 73 | fn entry_len(&self, entry: &LogEntry) -> usize; 74 | 75 | /// Returns the entry at a given index, or `None` if the index is greater than the length of the log or if the entry 76 | /// has been discarded. 77 | fn get(&mut self, index: LogIndex) -> Option; 78 | 79 | /// Returns the term of the entry at a given index, or `None` if the index is greater than the length of the log or 80 | /// if the entry has been discarded. 81 | fn get_term(&mut self, index: LogIndex) -> Option; 82 | 83 | /// Returns the approximate serialized length of the entry at a given index, or `None` if the index is greater than 84 | /// the length of the log or if the entry has been discarded. 85 | fn get_len(&mut self, index: LogIndex) -> Option { 86 | self.get(index).map(|entry: LogEntry| self.entry_len(&entry)) 87 | } 88 | 89 | /// Returns the index of the last entry which has been returned by [`take_next`], or 90 | /// [`LogIndex::default()`](crate::message::LogIndex::default) if none have been. 91 | /// 92 | /// [`take_next`]: Self::take_next 93 | /// [`LogEntry`]: crate::message::LogEntry 94 | fn last_taken_index(&self) -> LogIndex; 95 | 96 | /// Returns the index of the last entry in the log, or [`LogIndex::default()`](crate::message::LogIndex::default) if 97 | /// empty. 98 | fn last_index(&self) -> LogIndex; 99 | 100 | /// Returns the term of the last entry in the log, or [`TermId::default()`](crate::message::TermId::default) if 101 | /// empty. 102 | fn last_term(&self) -> TermId; 103 | 104 | /// Returns the index immediately before the index of the first undiscarded entry in the log (see ["Log 105 | /// Truncation"](RaftLog#log-truncation)). 106 | fn prev_index(&self) -> LogIndex; 107 | 108 | /// Returns the term of the entry immediately preceding the first undiscarded entry in the log (see ["Log 109 | /// Truncation"](RaftLog#log-truncation)). 110 | fn prev_term(&self) -> TermId; 111 | 112 | /// Returns the next entry in the log not previously returned by this function, marking the returned entry eligible 113 | /// for future discard (see ["Log Truncation"](RaftLog#log-truncation)). Returns `None` if there is no such entry. 114 | fn take_next(&mut self) -> Option; 115 | } 116 | 117 | pub(crate) struct RaftLogState { 118 | log: Log, 119 | pub commit_idx: LogIndex, 120 | } 121 | 122 | /// An iterator yielding committed [log entries][`LogEntry`]. 123 | /// 124 | /// A given [`LogEntry`] will be yielded only once over the lifetime of a Raft node. 125 | /// 126 | /// [`LogEntry`]: crate::message::LogEntry 127 | pub struct CommittedIter<'a, Log> { 128 | log: &'a mut RaftLogState, 129 | } 130 | 131 | // 132 | // RaftLogState 133 | // 134 | 135 | impl RaftLogState { 136 | pub fn new(log: Log) -> Self { 137 | Self { 138 | log, 139 | commit_idx: LogIndex::default(), 140 | } 141 | } 142 | 143 | pub fn append(&mut self, entry: LogEntry) -> Result<(), Log::Error> { 144 | self.log.append(entry) 145 | } 146 | 147 | pub fn cancel_from(&mut self, from_index: LogIndex) -> Result { 148 | self.log.cancel_from(from_index) 149 | } 150 | 151 | pub fn entry_len(&self, entry: &LogEntry) -> usize { 152 | self.log.entry_len(entry) 153 | } 154 | 155 | pub fn get(&mut self, index: LogIndex) -> Option { 156 | if index == LogIndex::default() { 157 | None 158 | } else { 159 | self.log.get(index) 160 | } 161 | } 162 | 163 | pub fn get_term(&mut self, index: LogIndex) -> Option { 164 | if index == self.prev_index() { 165 | Some(self.prev_term()) 166 | } else if index == LogIndex::default() { 167 | None 168 | } else { 169 | self.log.get_term(index) 170 | } 171 | } 172 | 173 | pub fn get_len(&mut self, index: LogIndex) -> Option { 174 | self.log.get_len(index) 175 | } 176 | 177 | pub fn last_index(&self) -> LogIndex { 178 | self.log.last_index() 179 | } 180 | 181 | pub fn last_term(&self) -> TermId { 182 | self.log.last_term() 183 | } 184 | 185 | pub fn log(&self) -> &Log { 186 | &self.log 187 | } 188 | 189 | pub fn log_mut(&mut self) -> &mut Log { 190 | &mut self.log 191 | } 192 | 193 | pub fn prev_index(&self) -> LogIndex { 194 | self.log.prev_index() 195 | } 196 | 197 | pub fn prev_term(&self) -> TermId { 198 | self.log.prev_term() 199 | } 200 | 201 | pub fn take_committed(&mut self) -> CommittedIter<'_, Log> { 202 | CommittedIter { log: self } 203 | } 204 | } 205 | 206 | // 207 | // CommittedIter impls 208 | // 209 | 210 | impl Iterator for CommittedIter<'_, Log> { 211 | type Item = LogEntry; 212 | fn next(&mut self) -> Option { 213 | if self.log.log.last_taken_index() < self.log.commit_idx { 214 | self.log.log.take_next() 215 | } else { 216 | None 217 | } 218 | } 219 | 220 | fn size_hint(&self) -> (usize, Option) { 221 | let remaining = (self.log.commit_idx.id - self.log.log.last_taken_index().id) as usize; 222 | (remaining, Some(remaining)) 223 | } 224 | } 225 | 226 | impl ExactSizeIterator for CommittedIter<'_, Log> {} 227 | 228 | impl iter::FusedIterator for CommittedIter<'_, Log> {} 229 | -------------------------------------------------------------------------------- /src/log/mem.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021 jessa0 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU Affero General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU Affero General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Affero General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | //! A naive in-memory implementation of [`RaftLog`](super::RaftLog), primarily for testing. 19 | 20 | use alloc::collections::VecDeque; 21 | use core::convert::{TryFrom, TryInto}; 22 | use crate::message::{LogEntry, LogIndex, TermId}; 23 | use super::RaftLog; 24 | 25 | /// A naive in-memory implementation of [`RaftLog`](super::RaftLog), primarily for testing. 26 | pub struct RaftLogMemory { 27 | entries: VecDeque, 28 | prev_log_idx: LogIndex, 29 | prev_log_term: TermId, 30 | last_taken: LogIndex, 31 | data_len: usize, 32 | data_capacity: usize, 33 | } 34 | 35 | impl RaftLogMemory { 36 | /// Constructs an empty Raft log with unbounded capacity. 37 | pub fn new_unbounded() -> Self { 38 | Self::with_capacity(0, usize::max_value()) 39 | } 40 | 41 | /// Constructs an empty Raft log with bounded capacity. 42 | /// 43 | /// `initial_entries_capacity` specifies how many log entries the Raft log will be able to store without 44 | /// reallocating. `data_capacity` specifies the maximum size of log entry data to store before discarding entries 45 | /// from the beginning of the log. 46 | pub fn with_capacity(initial_entries_capacity: usize, data_capacity: usize) -> Self { 47 | Self { 48 | entries: VecDeque::with_capacity(initial_entries_capacity), 49 | prev_log_idx: LogIndex::default(), 50 | prev_log_term: TermId::default(), 51 | last_taken: LogIndex::default(), 52 | data_len: 0, 53 | data_capacity, 54 | } 55 | } 56 | 57 | fn entry_index(&self, log_idx: LogIndex) -> Option { 58 | log_idx.id 59 | .checked_sub(self.prev_log_idx.id)? 60 | .checked_sub(1)? 61 | .try_into() 62 | .ok() 63 | } 64 | 65 | fn pop_front(&mut self) -> Result<(), ::Error> { 66 | self.entry_index(self.last_taken) 67 | .ok_or(())?; 68 | let prev_log = self.entries.pop_front().ok_or(())?; 69 | self.prev_log_idx = self.prev_log_idx + 1; 70 | self.prev_log_term = prev_log.term; 71 | Ok(()) 72 | } 73 | } 74 | 75 | impl RaftLog for RaftLogMemory { 76 | type Error = (); 77 | fn append(&mut self, log_entry: LogEntry) -> Result<(), Self::Error> { 78 | if log_entry.data.len() > self.data_capacity { 79 | return Err(()); 80 | } 81 | self.data_len = loop { 82 | match self.data_len.checked_add(log_entry.data.len()) { 83 | Some(new_data_len) if new_data_len <= self.data_capacity => 84 | break new_data_len, 85 | Some(_) | None => { 86 | self.pop_front()?; 87 | } 88 | } 89 | }; 90 | self.entries.push_back(log_entry); 91 | Ok(()) 92 | } 93 | fn cancel_from(&mut self, from_log_idx: LogIndex) -> Result { 94 | let from_index = self.entry_index(from_log_idx).ok_or(())?; 95 | match self.entries.len().checked_sub(from_index) { 96 | Some(0) | None => 97 | Err(()), 98 | Some(cancelled_len) => { 99 | self.entries.truncate(from_index); 100 | Ok(cancelled_len) 101 | } 102 | } 103 | } 104 | fn entry_len(&self, log_entry: &LogEntry) -> usize { 105 | 4 + log_entry.data.len() 106 | } 107 | fn get(&mut self, log_idx: LogIndex) -> Option { 108 | let index = self.entry_index(log_idx)?; 109 | self.entries.get(index).cloned() 110 | } 111 | fn get_term(&mut self, log_idx: LogIndex) -> Option { 112 | if log_idx != self.prev_log_idx { 113 | self.get(log_idx) 114 | .map(|log_entry: LogEntry| log_entry.term) 115 | } else { 116 | Some(self.prev_log_term) 117 | } 118 | } 119 | fn prev_index(&self) -> LogIndex { 120 | self.prev_log_idx 121 | } 122 | fn last_index(&self) -> LogIndex { 123 | let entries_len = u64::try_from(self.entries.len()) 124 | .unwrap_or_else(|_| panic!("more than 2^64 log entries")); 125 | self.prev_log_idx + entries_len 126 | } 127 | fn last_taken_index(&self) -> LogIndex { 128 | self.last_taken 129 | } 130 | fn last_term(&self) -> TermId { 131 | self.entries 132 | .iter() 133 | .map(|log_entry: &LogEntry| log_entry.term) 134 | .last() 135 | .unwrap_or(self.prev_log_term) 136 | } 137 | fn prev_term(&self) -> TermId { 138 | self.prev_log_term 139 | } 140 | fn take_next(&mut self) -> Option { 141 | let log_idx = self.last_taken + 1; 142 | let log_entry = self.get(log_idx)?; 143 | self.last_taken = log_idx; 144 | Some(log_entry) 145 | } 146 | } 147 | 148 | #[cfg(test)] 149 | mod test { 150 | use crate::raft_log_tests; 151 | use super::*; 152 | 153 | raft_log_tests!(RaftLogMemory, RaftLogMemory::new_unbounded()); 154 | } 155 | -------------------------------------------------------------------------------- /src/log/tests.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021 jessa0 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU Affero General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU Affero General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Affero General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | use bytes::Bytes; 19 | use crate::message::{LogEntry, LogIndex, TermId}; 20 | use super::RaftLog; 21 | 22 | /// Defines test functions for a type implementing RaftLog. 23 | #[macro_export] 24 | macro_rules! raft_log_tests { 25 | ($ty:ty, $new:expr) => { 26 | $crate::raft_log_test! { $ty, $new, test_log_empty } 27 | $crate::raft_log_test! { $ty, $new, test_log_append } 28 | $crate::raft_log_test! { $ty, $new, test_log_cancel_from } 29 | }; 30 | } 31 | 32 | /// Defines a given test function for a type implementing RaftLog. 33 | #[macro_export] 34 | macro_rules! raft_log_test { 35 | ($ty:ty, $new:expr, $test:ident) => { 36 | #[test] 37 | fn $test() { 38 | let mut log: $ty = $new; 39 | $crate::log::tests::$test(&mut log); 40 | } 41 | } 42 | } 43 | 44 | pub fn test_log_empty(log: &mut Log) { 45 | verify_log(log, &[], LogIndex::default(), LogIndex::default()); 46 | } 47 | 48 | pub fn test_log_append(log: &mut Log) { 49 | let entries = test_entries(); 50 | for (index, entry) in entries.iter().cloned().enumerate() { 51 | log.append(entry).unwrap_or_else(|_| panic!()); 52 | verify_log(log, &entries, LogIndex::default(), LogIndex { id: 1 + index as u64 }); 53 | } 54 | 55 | } 56 | 57 | pub fn test_log_cancel_from(log: &mut Log) { 58 | let entries = append_test_entries(log); 59 | for &truncate_len in &[1, 2, 1] { 60 | let last_log_idx = log.last_index(); 61 | log.cancel_from(last_log_idx + 2).unwrap_err(); 62 | log.cancel_from(last_log_idx + 1).unwrap_err(); 63 | verify_log(log, &entries, LogIndex::default(), last_log_idx); 64 | assert_eq!(log.cancel_from(last_log_idx + 1 - truncate_len).map_err(drop), Ok(truncate_len as usize)); 65 | verify_log(log, &entries, LogIndex::default(), last_log_idx - truncate_len); 66 | } 67 | log.cancel_from(log.last_index() + 2).unwrap_err(); 68 | log.cancel_from(log.last_index() + 1).unwrap_err(); 69 | } 70 | 71 | // 72 | // internal 73 | // 74 | 75 | fn test_entries() -> [LogEntry; 5] { 76 | [ 77 | LogEntry { term: TermId { id: 1 }, data: Bytes::from_static(&[]) }, 78 | LogEntry { term: TermId { id: 1 }, data: Bytes::from_static(&[2; 1]) }, 79 | LogEntry { term: TermId { id: 2 }, data: Bytes::from_static(&[3; 2]) }, 80 | LogEntry { term: TermId { id: 9 }, data: Bytes::from_static(&[4; 100]) }, 81 | LogEntry { term: TermId { id: u64::max_value() }, data: Bytes::from_static(&[5; 100]) }, 82 | ] 83 | } 84 | 85 | fn append_test_entries(log: &mut Log) -> [LogEntry; 5] { 86 | let entries = test_entries(); 87 | entries.iter().cloned().for_each(|entry| log.append(entry).unwrap_or_else(|_| panic!())); 88 | entries 89 | } 90 | 91 | fn verify_log(log: &mut Log, entries: &[LogEntry], prev_log_idx: LogIndex, last_log_idx: LogIndex) { 92 | assert_eq!(log.prev_index(), prev_log_idx); 93 | 94 | assert_eq!(log.get(LogIndex::default()), None); 95 | assert_eq!(log.get_len(LogIndex::default()), None); 96 | 97 | assert_eq!(log.get(prev_log_idx), None); 98 | assert_eq!(log.get_term(prev_log_idx), Some(prev_log_idx.id.checked_sub(1).map(|index| entries[index as usize].term).unwrap_or_default())); 99 | assert_eq!(log.get_len(prev_log_idx), None); 100 | 101 | assert_eq!(log.last_index(), last_log_idx); 102 | assert_eq!(log.last_term(), log.last_index().id.checked_sub(1).map(|index| entries[index as usize].term).unwrap_or_default()); 103 | 104 | verify_entries(entries, prev_log_idx, last_log_idx, |log_idx, entry| { 105 | assert_eq!(log.get(log_idx).as_ref(), entry); 106 | assert_eq!(log.get_term(log_idx), entry.map(|entry| entry.term)); 107 | assert_eq!(log.get_len(log_idx), entry.map(|entry| log.entry_len(&entry))); 108 | }); 109 | } 110 | 111 | fn verify_entries(entries: &[LogEntry], prev_log_idx: LogIndex, last_log_idx: LogIndex, mut fun: F) 112 | where F: FnMut(LogIndex, Option<&LogEntry>), 113 | { 114 | for log_index in 0..prev_log_idx.id { 115 | fun(LogIndex { id: log_index }, None); 116 | } 117 | for entry_index in prev_log_idx.id..last_log_idx.id { 118 | fun(LogIndex { id: 1 + entry_index }, Some(&entries[entry_index as usize])); 119 | } 120 | for entry_index in last_log_idx.id..=entries.len() as u64 { 121 | fun(LogIndex { id: 1 + entry_index }, None); 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /src/macros.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 Open Whisper Systems 3 | * Copyright (C) 2021 jessa0 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | 19 | macro_rules! assert_true { 20 | ($($arg:tt)*) => ({ 21 | assert!($($arg)+); 22 | true 23 | }); 24 | } 25 | 26 | macro_rules! assert_match { 27 | ($pat:pat = $expr:expr) => ({ 28 | if let $pat = $expr { 29 | true 30 | } else { 31 | panic!("assertion failed: `$pat = $expr`") 32 | } 33 | }); 34 | ($pat:pat = $expr:expr, $($arg:tt)*) => ({ 35 | if let $pat = $expr { 36 | true 37 | } else { 38 | panic!("assertion failed: `$pat = $expr`: {}", format_args!($($arg)+)) 39 | } 40 | }); 41 | } 42 | 43 | macro_rules! verbose { 44 | ($($arg:tt)*) => ( 45 | log::debug!($($arg)*) 46 | ); 47 | } 48 | -------------------------------------------------------------------------------- /src/message.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 Open Whisper Systems 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU Affero General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU Affero General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Affero General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | //! Raft message types for sending between nodes. 19 | //! 20 | //! This module provides data types for messages to be sent between Raft nodes. The top-level message type is 21 | //! [`RaftMessage`]. Protobuf-based serialization of all types in this module is provided through the `prost` crate if 22 | //! the corresponding feature is enabled. 23 | 24 | use bytes::Bytes; 25 | use core::cmp::Ordering; 26 | use core::fmt; 27 | use core::ops::{Add, AddAssign, Sub}; 28 | use crate::prelude::*; 29 | 30 | /// A [`RaftMessage`] to be sent to a destination. 31 | pub struct SendableRaftMessage { 32 | /// The message to be sent. 33 | pub message: RaftMessage, 34 | 35 | /// The destination for the message. 36 | pub dest: RaftMessageDestination, 37 | } 38 | 39 | /// The destination for a [`SendableRaftMessage`]. 40 | pub enum RaftMessageDestination { 41 | /// The associated message should be sent to all known peers. 42 | Broadcast, 43 | /// The associated message should be sent to one particular peer. 44 | To(NodeId), 45 | } 46 | 47 | /// A message sent between Raft nodes. 48 | #[derive(Clone, PartialEq)] 49 | #[cfg_attr(feature = "prost", derive(prost::Message))] 50 | #[cfg_attr(not(feature = "prost"), derive(Debug, Default))] 51 | pub struct RaftMessage { 52 | /// The greatest Raft leadership term ID seen by the sender. 53 | #[cfg_attr(feature = "prost", prost(message, required, tag="2"))] 54 | pub term: TermId, 55 | 56 | /// The Remote Procedure Call contained by this message. 57 | /// 58 | /// This field is only optional in order to support protobuf serialization. 59 | #[cfg_attr(feature = "prost", prost(oneof="Rpc", tags="3, 4, 5, 6"))] 60 | pub rpc: Option, 61 | } 62 | 63 | /// A Remote Procedure Call message to a Raft node. 64 | #[derive(Clone, PartialEq)] 65 | #[cfg_attr(feature = "prost", derive(prost::Oneof))] 66 | #[cfg_attr(not(feature = "prost"), derive(Debug))] 67 | pub enum Rpc { 68 | /// A request to obtain leadership amongst Raft nodes. 69 | #[cfg_attr(feature = "prost", prost(message, tag="3"))] 70 | VoteRequest(VoteRequest), 71 | 72 | /// A response to a [`VoteRequest`] granting or denying leadership. 73 | #[cfg_attr(feature = "prost", prost(message, tag="4"))] 74 | VoteResponse(VoteResponse), 75 | 76 | /// A request to append entries to a Raft node's log. 77 | #[cfg_attr(feature = "prost", prost(message, tag="5"))] 78 | AppendRequest(AppendRequest), 79 | 80 | /// A response to an [`AppendRequest`] allowing or denying an append to the Raft node's log. 81 | #[cfg_attr(feature = "prost", prost(message, tag="6"))] 82 | AppendResponse(AppendResponse), 83 | } 84 | 85 | /// A request to obtain leadership amongst Raft nodes. 86 | #[derive(Clone, PartialEq)] 87 | #[cfg_attr(feature = "prost", derive(prost::Message))] 88 | #[cfg_attr(not(feature = "prost"), derive(Debug, Default))] 89 | pub struct VoteRequest { 90 | /// The Raft log index of the last log entry stored by this node. 91 | #[cfg_attr(feature = "prost", prost(message, required, tag="2"))] 92 | pub last_log_idx: LogIndex, 93 | 94 | /// The Raft leadership term of the last log entry stored by this node. 95 | #[cfg_attr(feature = "prost", prost(message, required, tag="3"))] 96 | pub last_log_term: TermId, 97 | } 98 | 99 | /// The response to a [`VoteRequest`] granting or denying leadership. 100 | #[derive(Clone, PartialEq)] 101 | #[cfg_attr(feature = "prost", derive(prost::Message))] 102 | #[cfg_attr(not(feature = "prost"), derive(Debug, Default))] 103 | pub struct VoteResponse { 104 | /// Whether the [`VoteRequest`] was granted or not. 105 | #[cfg_attr(feature = "prost", prost(bool, required, tag="2"))] 106 | pub vote_granted: bool, 107 | } 108 | 109 | /// A request to append entries to a Raft node's log. 110 | #[derive(Clone, PartialEq)] 111 | #[cfg_attr(feature = "prost", derive(prost::Message))] 112 | #[cfg_attr(not(feature = "prost"), derive(Debug, Default))] 113 | pub struct AppendRequest { 114 | /// The Raft log index immediately before the index of the first entry in [`entries`](Self::entries). 115 | #[cfg_attr(feature = "prost", prost(message, required, tag="1"))] 116 | pub prev_log_idx: LogIndex, 117 | 118 | /// The Raft leadership term of the log entry immediately before the first entry in [`entries`](Self::entries). 119 | #[cfg_attr(feature = "prost", prost(message, required, tag="2"))] 120 | pub prev_log_term: TermId, 121 | 122 | /// The Raft log index of the last log entry known by the requester to be committed. 123 | #[cfg_attr(feature = "prost", prost(message, required, tag="3"))] 124 | pub leader_commit: LogIndex, 125 | 126 | /// A list of consecutive Raft log entries to append. 127 | #[cfg_attr(feature = "prost", prost(message, repeated, tag="4"))] 128 | pub entries: Vec, 129 | } 130 | 131 | /// The response to an [`AppendRequest`] allowing or denying an append to the Raft node's log. 132 | #[derive(Clone, PartialEq)] 133 | #[cfg_attr(feature = "prost", derive(prost::Message))] 134 | #[cfg_attr(not(feature = "prost"), derive(Debug, Default))] 135 | pub struct AppendResponse { 136 | /// Whether the [`AppendRequest`] was granted or not. 137 | #[cfg_attr(feature = "prost", prost(bool, required, tag="1"))] 138 | pub success: bool, 139 | 140 | /// The Raft log index of the last log entry up to which the responder's log is known to match the requester's log. 141 | #[cfg_attr(feature = "prost", prost(message, required, tag="2"))] 142 | pub match_idx: LogIndex, 143 | 144 | /// The Raft log index of the last log entry in the responder's log. 145 | #[cfg_attr(feature = "prost", prost(message, required, tag="3"))] 146 | pub last_log_idx: LogIndex, 147 | } 148 | 149 | /// An entry in a [Raft log][crate::log::RaftLog]. 150 | #[derive(Clone, PartialEq)] 151 | #[cfg_attr(feature = "prost", derive(prost::Message))] 152 | #[cfg_attr(not(feature = "prost"), derive(Debug, Default))] 153 | pub struct LogEntry { 154 | /// The term of leadership of the node which appended this log entry. 155 | #[cfg_attr(feature = "prost", prost(message, required, tag="1"))] 156 | pub term: TermId, 157 | 158 | /// Arbitrary data associated with the log entry. 159 | #[cfg_attr(feature = "prost", prost(bytes="vec", required, tag="2"))] 160 | pub data: Bytes, 161 | } 162 | 163 | /// The unique, monotonically-increasing ID for a term of Raft group leadership. 164 | #[derive(Clone, PartialEq)] 165 | #[cfg_attr(feature = "prost", derive(prost::Message))] 166 | #[cfg_attr(not(feature = "prost"), derive(Debug, Default))] 167 | pub struct TermId { 168 | /// The non-negative integer assigned to this term. 169 | #[cfg_attr(feature = "prost", prost(uint64, required, tag="1"))] 170 | pub id: u64, 171 | } 172 | 173 | /// A 1-based index into a [Raft log][crate::log::RaftLog]. 174 | #[derive(Clone, PartialEq)] 175 | #[cfg_attr(feature = "prost", derive(prost::Message))] 176 | #[cfg_attr(not(feature = "prost"), derive(Debug, Default))] 177 | pub struct LogIndex { 178 | /// The integer representing this log index. 179 | #[cfg_attr(feature = "prost", prost(uint64, required, tag="1"))] 180 | pub id: u64, 181 | } 182 | 183 | // 184 | // RaftMessage impls 185 | // 186 | 187 | impl fmt::Display for RaftMessage { 188 | fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { 189 | let Self { term, rpc } = self; 190 | let mut debug = fmt.debug_tuple(""); 191 | debug.field(&format_args!("{}", term)); 192 | if let Some(rpc) = rpc { 193 | debug.field(&format_args!("{}", rpc)); 194 | } else { 195 | debug.field(&"None"); 196 | } 197 | debug.finish() 198 | } 199 | } 200 | 201 | // 202 | // Rpc impls 203 | // 204 | 205 | impl fmt::Display for Rpc { 206 | fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { 207 | match &self { 208 | Rpc::VoteRequest(msg) => fmt::Display::fmt(msg, fmt), 209 | Rpc::VoteResponse(msg) => fmt::Display::fmt(msg, fmt), 210 | Rpc::AppendRequest(msg) => fmt::Display::fmt(msg, fmt), 211 | Rpc::AppendResponse(msg) => fmt::Display::fmt(msg, fmt), 212 | } 213 | } 214 | } 215 | 216 | // 217 | // VoteRequest impls 218 | // 219 | 220 | impl fmt::Display for VoteRequest { 221 | fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { 222 | let Self { last_log_idx, last_log_term } = self; 223 | fmt.debug_struct("VoteRequest") 224 | .field("last_log_idx", &format_args!("{}", last_log_idx)) 225 | .field("last_log_term", &format_args!("{}", last_log_term)) 226 | .finish() 227 | } 228 | } 229 | 230 | // 231 | // VoteResponse impls 232 | // 233 | 234 | impl fmt::Display for VoteResponse { 235 | fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { 236 | let Self { vote_granted } = self; 237 | fmt.debug_struct("VoteResponse") 238 | .field("vote_granted", vote_granted) 239 | .finish() 240 | } 241 | } 242 | 243 | // 244 | // AppendRequest impls 245 | // 246 | 247 | impl fmt::Display for AppendRequest { 248 | fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { 249 | let Self { prev_log_idx, prev_log_term, leader_commit, entries } = self; 250 | fmt.debug_struct("AppendRequest") 251 | .field("prev_log_idx", &format_args!("{}", prev_log_idx)) 252 | .field("prev_log_term", &format_args!("{}", prev_log_term)) 253 | .field("leader_commit", &format_args!("{}", leader_commit)) 254 | .field("entries", &entries.len()) 255 | .finish() 256 | } 257 | } 258 | 259 | // 260 | // AppendResponse impls 261 | // 262 | 263 | impl fmt::Display for AppendResponse { 264 | fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { 265 | let Self { success, match_idx, last_log_idx } = self; 266 | fmt.debug_struct("AppendResponse") 267 | .field("success", &success) 268 | .field("match_idx", &format_args!("{}", match_idx)) 269 | .field("last_log_idx", &format_args!("{}", last_log_idx)) 270 | .finish() 271 | } 272 | } 273 | 274 | 275 | // 276 | // TermId impls 277 | // 278 | 279 | impl fmt::Display for TermId { 280 | fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { 281 | let Self { id } = self; 282 | fmt.debug_tuple("TermId") 283 | .field(id) 284 | .finish() 285 | } 286 | } 287 | 288 | impl Copy for TermId {} 289 | impl Eq for TermId {} 290 | impl PartialOrd for TermId { 291 | fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } 292 | } 293 | impl Ord for TermId { 294 | fn cmp(&self, other: &Self) -> Ordering { self.id.cmp(&other.id) } 295 | } 296 | impl AddAssign for TermId { 297 | fn add_assign(&mut self, rhs: u64) { 298 | self.id = self.id.checked_add(rhs).unwrap_or_else(|| panic!("overflow")); 299 | } 300 | } 301 | 302 | // 303 | // LogIndex impls 304 | // 305 | 306 | impl LogIndex { 307 | /// Subtraction with a non-negative integer, checking for overflow. Returns `self - dec`, or `None` if an overflow 308 | /// occurred. 309 | pub fn checked_sub(self, dec: u64) -> Option { 310 | if let Some(id) = self.id.checked_sub(dec) { 311 | Some(Self { id }) 312 | } else { 313 | None 314 | } 315 | } 316 | } 317 | 318 | impl fmt::Display for LogIndex { 319 | fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { 320 | let Self { id } = self; 321 | fmt.debug_tuple("LogIdx") 322 | .field(id) 323 | .finish() 324 | } 325 | } 326 | 327 | impl Copy for LogIndex {} 328 | impl Eq for LogIndex {} 329 | impl PartialOrd for LogIndex { 330 | fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } 331 | } 332 | impl Ord for LogIndex { 333 | fn cmp(&self, other: &Self) -> Ordering { self.id.cmp(&other.id) } 334 | } 335 | impl Add for LogIndex { 336 | type Output = Self; 337 | fn add(self, inc: u64) -> Self { 338 | Self { id: self.id.checked_add(inc).unwrap_or_else(|| panic!("overflow")) } 339 | } 340 | } 341 | impl Sub for LogIndex { 342 | type Output = Self; 343 | fn sub(self, dec: u64) -> Self { 344 | Self { id: self.id.saturating_sub(dec) } 345 | } 346 | } 347 | -------------------------------------------------------------------------------- /src/node.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 Open Whisper Systems 3 | * Copyright (C) 2021 jessa0 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | 19 | //! Higher-level API for a Raft node. 20 | 21 | use alloc::collections::BTreeSet; 22 | use bytes::Bytes; 23 | use core::fmt::Display; 24 | use crate::core::{RaftState, ReplicationState}; 25 | use crate::message::{LogIndex, RaftMessage, SendableRaftMessage, TermId}; 26 | use crate::log::{CommittedIter, RaftLog}; 27 | use rand_core::RngCore; 28 | 29 | /// A Raft node, used for replicating a strongly-consistent distributed log of entries with arbitrary data amongst its 30 | /// peers. 31 | /// 32 | /// The distributed log can be used, for example, to replicate transactions in a database. 33 | /// 34 | /// # Appending entries to the distributed log 35 | /// 36 | /// Log entries passed to [`append`] are not guaranteed to ultimately be appended to the distributed log, and may be 37 | /// cancelled any time [`receive`] is called before they are "committed". The provided [`RaftLog`] should provide an API 38 | /// to find out which log entries have been cancelled. Only log entries passed to [`append`] on a particular node are 39 | /// guaranteed to appear as cancelled in its own [`RaftLog`], but entries appended on other nodes may appear as well. 40 | /// 41 | /// The distributed log may only be appended to by the node returned by [`leader`], but even that node is not guaranteed 42 | /// to be able to append to the log, since it must be able to send each new entry to a majority of its peers before 43 | /// losing leadership in order for the entry to become committed. The leader may change at any time, and therefore an 44 | /// entry may be first returned from [`take_committed`] on a node different than that to which it was submitted. 45 | /// However, [`take_committed`] is guaranteed to return the same entries in the same order on every node. 46 | /// 47 | /// # Timer ticks 48 | /// 49 | /// Timeouts in [`RaftNode`] are driven by a timer ticking at fixed interval, with the number of ticks between timeouts 50 | /// configured by the provided [`RaftConfig`]. Any consistent time interval between ticks may be chosen, but the time 51 | /// interval and [`RaftConfig`] must be the same on all peers in a group. Shorter timeouts will allow Raft to react 52 | /// quicker to network disruptions, but may result in spurious leadership changes when the network latency exceeds 53 | /// `time_interval * election_timeout_ticks`. 54 | /// 55 | /// # Message delivery 56 | /// 57 | /// Unicast message delivery is assumed to be non-lossy in order for replication to make progress. In other words, once 58 | /// a non-broadcast [`SendableRaftMessage`] is returned from an API such as [`append`], [`receive`], or [`timer_tick`], 59 | /// it must be retained and retransmitted until it is confirmed to have been processed by [`receive`] on its 60 | /// destination. Messages may be safely delivered out-of-order or more than once, however. 61 | /// 62 | /// To prevent unbounded queueing, the API is designed to only ever return a bounded amount of unacknowledged unicast 63 | /// message data. This amount can be approximately controlled by [`replication_chunk_size`]. 64 | /// 65 | /// [`append`]: Self::append 66 | /// [`leader`]: Self::leader 67 | /// [`receive`]: Self::receive 68 | /// [`replication_chunk_size`]: RaftConfig::replication_chunk_size 69 | /// [`SendableRaftMessage`]: crate::message::SendableRaftMessage 70 | /// [`take_committed`]: Self::take_committed 71 | /// [`timer_tick`]: Self::timer_tick 72 | pub struct RaftNode { 73 | state: RaftState, 74 | } 75 | 76 | /// Configurable parameters of a Raft node. 77 | #[derive(Clone, Eq, PartialEq)] 78 | pub struct RaftConfig { 79 | /// The minimum number of timer ticks between leadership elections. 80 | pub election_timeout_ticks: u32, 81 | 82 | /// The number of timer ticks between sending heartbeats to peers. 83 | pub heartbeat_interval_ticks: u32, 84 | 85 | /// The maximum number of bytes to replicate to a peer at a time. 86 | pub replication_chunk_size: usize, 87 | } 88 | 89 | /// An error returned while attempting to append to a Raft log. 90 | pub enum AppendError { 91 | /// The append to the Raft log was cancelled and should be resubmitted to the current Raft leader. 92 | Cancelled { 93 | /// Arbitrary data associated with the log entry. 94 | data: Bytes, 95 | }, 96 | /// An error was returned by the [`RaftLog`](crate::log::RaftLog) implementation. 97 | RaftLogErr(E), 98 | } 99 | 100 | impl RaftNode 101 | where Log: RaftLog, 102 | Random: RngCore, 103 | NodeId: Ord + Clone + Display, 104 | { 105 | /// Constructs a new Raft node with specified peers and configuration. 106 | /// 107 | /// The Raft node will start with an empty initial state. The `log` provided should also be in an empty initial 108 | /// state. Each Raft node in a group must be constructed with the same set of peers and `config`. `peers` may 109 | /// contain `node_id` or omit it to the same effect. `rand` must produce different values on every node in a group. 110 | pub fn new( 111 | node_id: NodeId, 112 | peers: BTreeSet, 113 | log: Log, 114 | random: Random, 115 | config: RaftConfig, 116 | ) -> Self { 117 | Self { 118 | state: RaftState::new( 119 | node_id, 120 | peers, 121 | log, 122 | random, 123 | config, 124 | ), 125 | } 126 | } 127 | 128 | 129 | /// Request appending an entry with arbitrary `data` to the Raft log, returning messages to be sent. 130 | /// 131 | /// See ["Message delivery"] for details about delivery requirements for the returned messages. 132 | /// 133 | /// # Errors 134 | /// 135 | /// If this request would immediately be cancelled, then an error is returned. 136 | /// 137 | /// ["Message delivery"]: RaftNode#message-delivery 138 | #[must_use = "This function returns Raft messages to be sent."] 139 | pub fn append>(&mut self, data: T) -> Result> + '_, AppendError> { 140 | let () = self.state.client_request(data.into())?; 141 | Ok(self.append_entries()) 142 | } 143 | 144 | /// Returns this node's configurable parameters. 145 | pub fn config(&self) -> &RaftConfig { 146 | self.state.config() 147 | } 148 | 149 | /// Returns whether this node is the leader of the latest known term. 150 | pub fn is_leader(&self) -> bool { 151 | self.state.is_leader() 152 | } 153 | 154 | /// Returns the index of the last [`LogEntry`] which has been committed and thus may be returned by 155 | /// [`take_committed`]. 156 | /// 157 | /// [`take_committed`]: Self::take_committed 158 | /// [`LogEntry`]: crate::message::LogEntry 159 | pub fn last_committed_log_index(&self) -> LogIndex { 160 | *self.state.commit_idx() 161 | } 162 | 163 | /// Returns the ID of the leader, if there is one, of the latest known term, along with the term. 164 | pub fn leader(&self) -> (Option<&NodeId>, TermId) { 165 | let (leader, term) = self.state.leader(); 166 | (leader, *term) 167 | } 168 | 169 | /// Returns a reference to the Raft log storage. 170 | pub fn log(&self) -> &Log { 171 | self.state.log() 172 | } 173 | 174 | /// Returns a mutable reference to the Raft log storage. 175 | pub fn log_mut(&mut self) -> &mut Log { 176 | self.state.log_mut() 177 | } 178 | 179 | /// Returns this node's ID. 180 | pub fn node_id(&self) -> &NodeId { 181 | self.state.node_id() 182 | } 183 | 184 | /// Returns the IDs of this node's peers. 185 | pub fn peers(&self) -> &BTreeSet { 186 | self.state.peers() 187 | } 188 | 189 | /// Processes receipt of a `message` from a peer with ID `from`, returning messages to be sent. 190 | /// 191 | /// See ["Message delivery"] for details about delivery requirements for the returned messages. 192 | /// 193 | /// ["Message delivery"]: RaftNode#message-delivery 194 | #[must_use = "This function returns Raft messages to be sent."] 195 | pub fn receive( 196 | &mut self, 197 | message: RaftMessage, 198 | from: NodeId, 199 | ) -> impl Iterator> + '_ { 200 | let message = self.state.receive(message, from); 201 | message.into_iter().chain(self.append_entries()) 202 | } 203 | 204 | /// Returns the replication state corresponding to the peer with ID `peer_node_id`. 205 | pub fn replication_state(&self, peer_node_id: &NodeId) -> Option<&ReplicationState> { 206 | self.state.replication_state(peer_node_id) 207 | } 208 | 209 | /// Returns a reference to the low-level state of the Raft node. 210 | pub fn state(&mut self) -> &RaftState { 211 | &self.state 212 | } 213 | 214 | /// Returns a mutable reference to the low-level state of the Raft node. 215 | pub fn state_mut(&mut self) -> &mut RaftState { 216 | &mut self.state 217 | } 218 | 219 | /// Returns an iterator yielding committed [log entries][`LogEntry`]. A given [`LogEntry`] will be yielded only once 220 | /// over the lifetime of a [`RaftNode`]. See ["Appending entries to the distributed log"] for details about log 221 | /// commital. 222 | /// 223 | /// ["Appending entries to the distributed log"]: RaftNode#appending-entries-to-the-distributed-log 224 | /// [`LogEntry`]: crate::message::LogEntry 225 | pub fn take_committed(&mut self) -> CommittedIter<'_, Log> { 226 | self.state.take_committed() 227 | } 228 | 229 | /// Ticks forward this node's internal clock by one tick, returning messages to be sent. 230 | /// 231 | /// See ["Message delivery"] for details about delivery requirements for the returned messages. 232 | /// 233 | /// ["Message delivery"]: RaftNode#message-delivery 234 | #[must_use = "This function returns Raft messages to be sent."] 235 | pub fn timer_tick(&mut self) -> impl Iterator> + '_ { 236 | let message = self.state.timer_tick(); 237 | message.into_iter().chain(self.append_entries()) 238 | } 239 | 240 | #[must_use = "This function returns Raft messages to be sent."] 241 | fn append_entries( 242 | &mut self, 243 | ) -> impl Iterator> + '_ { 244 | let peers = self.state.peers().clone().into_iter(); 245 | peers.flat_map(move |peer| self.state.append_entries(peer)) 246 | } 247 | } 248 | -------------------------------------------------------------------------------- /src/prelude.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 Open Whisper Systems 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU Affero General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU Affero General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Affero General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | pub use alloc::{format, vec}; 19 | pub use alloc::borrow::{ToOwned}; 20 | pub use alloc::boxed::{Box}; 21 | pub use alloc::string::{String, ToString}; 22 | pub use alloc::vec::{Vec}; 23 | -------------------------------------------------------------------------------- /src/raft.proto: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 Open Whisper Systems 3 | * Copyright (C) 2021 jessa0 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | 19 | syntax = "proto2"; 20 | 21 | package raft.protobufs; 22 | 23 | message RaftMessage { 24 | required TermId term = 2; 25 | oneof rpc { 26 | VoteRequest vote_request = 3; 27 | VoteResponse vote_response = 4; 28 | AppendRequest append_request = 5; 29 | AppendResponse append_response = 6; 30 | }; 31 | } 32 | 33 | message VoteRequest { 34 | required LogIndex last_log_idx = 2; 35 | required TermId last_log_term = 3; 36 | } 37 | 38 | message VoteResponse { 39 | required bool vote_granted = 2; 40 | } 41 | 42 | message AppendRequest { 43 | required LogIndex prev_log_idx = 1; 44 | required TermId prev_log_term = 2; 45 | required LogIndex leader_commit = 3; 46 | repeated LogEntry entries = 4; 47 | } 48 | 49 | message AppendResponse { 50 | required bool success = 1; 51 | required LogIndex match_idx = 2; 52 | required LogIndex last_log_idx = 3; 53 | } 54 | 55 | message LogEntry { 56 | required TermId term = 1; 57 | required bytes data = 2; 58 | } 59 | 60 | message TermId { 61 | required uint64 id = 1; 62 | } 63 | 64 | message LogIndex { 65 | required uint64 id = 1; 66 | } 67 | -------------------------------------------------------------------------------- /tests/commit.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021 jessa0 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU Affero General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU Affero General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Affero General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | mod common; 19 | 20 | use common::*; 21 | 22 | #[test] 23 | pub fn _1_commit() { 24 | TestRaftGroup::new(1, &mut init_random(), config()) 25 | .run_until(|group| group.has_leader()) 26 | .modify(|group| assert!(group.nodes.iter_mut().any(|raft| raft.client_request("one".into()).is_ok()))) 27 | .run_until_commit(|commit| { assert_eq!(commit.data, "one"); true }); 28 | } 29 | 30 | #[test] 31 | pub fn _2_commit() { 32 | TestRaftGroup::new(2, &mut init_random(), config()) 33 | .run_until(|group| group.has_leader()) 34 | .modify(|group| assert!(group.nodes.iter_mut().any(|raft| raft.client_request("one".into()).is_ok()))) 35 | .run_until_commit(|commit| { assert_eq!(commit.data, "one"); true }); 36 | } 37 | 38 | #[test] 39 | pub fn _3_commit() { 40 | TestRaftGroup::new(3, &mut init_random(), config()) 41 | .run_until(|group| group.has_leader()) 42 | .modify(|group| assert!(group.nodes.iter_mut().any(|raft| raft.client_request("one".into()).is_ok()))) 43 | .run_until_commit(|commit| { assert_eq!(commit.data, "one"); true }); 44 | } 45 | 46 | #[test] 47 | pub fn commit_leader_change() { 48 | let mut group = TestRaftGroup::new(3, &mut init_random(), config()); 49 | group.run_on_node(0, |raft| raft.timeout()); 50 | group.run_until(|group| group.nodes[0].is_leader()); 51 | 52 | assert!(group.nodes[0].client_request("one".into()).is_ok()); 53 | group.config = config().drop_to(0); 54 | group.run_for(1); 55 | 56 | assert!(group.take_committed().all(|commit| commit.data.is_empty())); 57 | group.config = config().isolate(0); 58 | group.run_until_commit(|commit| { assert_eq!(commit.data, "one"); true }); 59 | } 60 | 61 | #[test] 62 | pub fn cancel_uncommitted() { 63 | let mut group = TestRaftGroup::new(3, &mut init_random(), config()); 64 | group.run_on_node(0, |raft| raft.timeout()); 65 | group.run_until(|group| group.nodes[0].is_leader()); 66 | 67 | assert!(group.nodes[0].client_request("one".into()).is_ok()); 68 | group.config = config().isolate(0); 69 | group.run_until(|group| group.nodes[1..].iter().any(|raft| raft.is_leader())); 70 | 71 | assert!(group.nodes[1..].iter_mut().any(|raft| raft.client_request("two".into()).is_ok())); 72 | group.run_until_commit(|commit| { assert_eq!(commit.data, "two"); true }); 73 | 74 | log::info!("committed two"); 75 | group.config = config(); 76 | group.run_until(|group| group.nodes[0].take_committed().any(|commit| { 77 | if !commit.data.is_empty() { 78 | assert_eq!(commit.data, "two"); 79 | true 80 | } else { 81 | false 82 | } 83 | })); 84 | } 85 | -------------------------------------------------------------------------------- /tests/common.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021 jessa0 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU Affero General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU Affero General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Affero General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | #![allow(dead_code)] 19 | 20 | use rand_core::{RngCore, SeedableRng}; 21 | use rand_chacha::ChaChaRng; 22 | use simple_raft::core::RaftState; 23 | use simple_raft::log::mem::RaftLogMemory; 24 | use simple_raft::message::{LogEntry, RaftMessage, RaftMessageDestination, Rpc, SendableRaftMessage, TermId}; 25 | use simple_raft::node::RaftConfig; 26 | use std::cell::RefCell; 27 | use std::collections::{BTreeSet, VecDeque}; 28 | 29 | pub const CONFIG: RaftConfig = RaftConfig { 30 | election_timeout_ticks: 10, 31 | heartbeat_interval_ticks: 9, 32 | replication_chunk_size: 1024, 33 | }; 34 | const RANDOM_SEED: u64 = 0; 35 | const MAX_TICKS: u32 = 100_000; 36 | 37 | pub type TestRaft = RaftState; 38 | 39 | pub struct TestRaftGroup { 40 | pub nodes: Vec, 41 | pub tick: u32, 42 | pub config: TestRaftGroupConfig, 43 | pub dropped_messages: Vec<(NodeId, SendableRaftMessage)>, 44 | } 45 | 46 | #[derive(Clone, Default)] 47 | pub struct TestRaftGroupConfig { 48 | pub drops: BTreeSet<(Option, Option)>, 49 | pub down: BTreeSet, 50 | } 51 | 52 | #[derive(Clone, Copy, Debug, derive_more::Display, Eq, derive_more::From, PartialEq, PartialOrd, Ord)] 53 | #[display(fmt = "{:?}", self)] 54 | pub struct NodeId(u64); 55 | 56 | pub struct TestLogger; 57 | 58 | pub struct TestLoggerContext { 59 | node_id: Option, 60 | tick: Option, 61 | } 62 | 63 | pub fn rpc_types() -> [Rpc; 4] { 64 | [ 65 | Rpc::VoteRequest(Default::default()), 66 | Rpc::VoteResponse(Default::default()), 67 | Rpc::AppendRequest(Default::default()), 68 | Rpc::AppendResponse(Default::default()), 69 | ] 70 | } 71 | 72 | pub fn init_random() -> ChaChaRng { 73 | ChaChaRng::seed_from_u64(RANDOM_SEED) 74 | } 75 | 76 | pub fn raft(node_id: u64, peers: Vec, log: Option, random: &mut impl RngCore) -> TestRaft { 77 | TestLogger::init(); 78 | RaftState::new( 79 | NodeId(node_id), 80 | peers.into_iter().map(NodeId).collect(), 81 | log.unwrap_or_else(|| RaftLogMemory::new_unbounded()), 82 | ChaChaRng::seed_from_u64(random.next_u64()), 83 | CONFIG, 84 | ) 85 | } 86 | 87 | pub fn config() -> TestRaftGroupConfig { 88 | TestRaftGroupConfig::default() 89 | } 90 | 91 | pub fn send(raft: &mut TestRaft, from: u64, term: TermId, rpc: Rpc) -> Option> { 92 | raft.receive(RaftMessage { 93 | term, 94 | rpc: Some(rpc), 95 | }, NodeId(from)) 96 | } 97 | 98 | pub fn append_entries<'a>(node: &'a mut TestRaft, peers: impl IntoIterator + 'a) -> impl Iterator> + 'a { 99 | let node_id = *node.node_id(); 100 | peers.into_iter().flat_map(move |append_to_node_id| { 101 | if append_to_node_id != node_id { 102 | node.append_entries(append_to_node_id) 103 | } else { 104 | None 105 | } 106 | }) 107 | } 108 | 109 | pub fn run_group<'a>( 110 | nodes: impl Iterator + ExactSizeIterator, 111 | initial_messages: impl IntoIterator)>, 112 | start_tick: u32, 113 | ticks: Option, 114 | config: &mut TestRaftGroupConfig, 115 | dropped_messages: &mut Vec<(NodeId, SendableRaftMessage)>, 116 | ) { 117 | let mut nodes: Vec<_> = nodes.collect(); 118 | let node_ids: Vec<_> = nodes.iter().map(|node| *node.node_id()).collect(); 119 | let mut messages = VecDeque::with_capacity(nodes.len() * nodes.len()); 120 | messages.extend(initial_messages.into_iter()); 121 | messages.extend(dropped_messages.drain(..)); 122 | 123 | for tick in 0..ticks.unwrap_or(1) { 124 | TestLogger::set_tick(Some(start_tick + tick)); 125 | if ticks.is_some() { 126 | for node in &mut nodes { 127 | let node_id = *node.node_id(); 128 | if !config.is_node_down(node_id) { 129 | TestLogger::set_node_id(Some(node_id)); 130 | messages.extend(node.timer_tick().map(|message| (node_id, message))); 131 | messages.extend(append_entries(node, node_ids.iter().cloned()).map(|message| (node_id, message))); 132 | } 133 | } 134 | } 135 | 136 | while let Some((from, sendable)) = messages.pop_front() { 137 | let (reply_to_node_id, to_node_count) = match sendable.dest { 138 | RaftMessageDestination::Broadcast => (None, nodes.len().saturating_sub(1)), 139 | RaftMessageDestination::To(to) => (Some(to), 1), 140 | }; 141 | let to_nodes = nodes.iter_mut().filter(|node| match &reply_to_node_id { 142 | Some(to_node_id) => node.node_id() == to_node_id, 143 | None => node.node_id() != &from, 144 | }); 145 | 146 | for (to_node, message) in Iterator::zip(to_nodes, itertools::repeat_n(sendable.message, to_node_count)) { 147 | let to_node_id = *to_node.node_id(); 148 | TestLogger::set_node_id(Some(to_node_id)); 149 | if !config.should_drop(from, to_node_id) { 150 | log::info!("<- {} {}", from, message); 151 | messages.extend(to_node.receive(message, from).map(|message| (to_node_id, message))); 152 | } else { 153 | log::info!("<- {} DROPPED {}", from, message); 154 | if let Some(reply_to_node_id) = reply_to_node_id { 155 | dropped_messages.push((from, SendableRaftMessage { message, dest: RaftMessageDestination::To(reply_to_node_id) })); 156 | } 157 | } 158 | messages.extend(append_entries(to_node, node_ids.iter().cloned()).map(|message| (to_node_id, message))); 159 | } 160 | } 161 | } 162 | TestLogger::set_tick(None); 163 | TestLogger::set_node_id(None); 164 | } 165 | 166 | // 167 | // RaftGroup impls 168 | // 169 | 170 | impl TestRaftGroup { 171 | pub fn new(size: u64, random: &mut impl RngCore, config: TestRaftGroupConfig) -> Self { 172 | let nodes: Vec = (0..size).collect(); 173 | Self { 174 | nodes: nodes.iter().map(|node_id| raft(*node_id, nodes.clone(), None, random)).collect(), 175 | tick: 0, 176 | config, 177 | dropped_messages: Default::default(), 178 | } 179 | } 180 | 181 | pub fn run_until(&mut self, mut until_fun: impl FnMut(&mut Self) -> bool) -> &mut Self { 182 | let mut ticks_remaining = MAX_TICKS; 183 | while !until_fun(self) { 184 | ticks_remaining = ticks_remaining.checked_sub(1).expect("condition failed after maximum simulation length"); 185 | self.tick += 1; 186 | run_group(self.nodes.iter_mut(), None, self.tick, Some(1), &mut self.config, &mut self.dropped_messages); 187 | } 188 | self 189 | } 190 | 191 | pub fn run_until_commit(&mut self, mut until_fun: impl FnMut(&LogEntry) -> bool) -> &mut Self { 192 | self.run_until(|group| { 193 | let result = group.take_committed().any(|commit| !commit.data.is_empty() && until_fun(&commit)); 194 | group.take_committed().for_each(drop); 195 | result 196 | }) 197 | } 198 | 199 | pub fn run_for(&mut self, ticks: u32) -> &mut Self { 200 | self.run_for_inspect(ticks, |_| ()) 201 | } 202 | 203 | pub fn run_for_inspect(&mut self, ticks: u32, mut fun: impl FnMut(&mut Self)) -> &mut Self { 204 | let mut ticks_remaining = ticks; 205 | while let Some(new_ticks_remaining) = ticks_remaining.checked_sub(1) { 206 | ticks_remaining = new_ticks_remaining; 207 | self.tick += 1; 208 | run_group(self.nodes.iter_mut(), None, self.tick, Some(1), &mut self.config, &mut self.dropped_messages); 209 | fun(self); 210 | } 211 | self 212 | } 213 | 214 | pub fn run_on_all( 215 | &mut self, 216 | mut fun: impl FnMut(&mut TestRaft) -> Option>, 217 | ) -> &mut Self { 218 | let messages = self.nodes.iter_mut().flat_map(|node| fun(node).map(|message| (*node.node_id(), message))).collect::>(); 219 | run_group(self.nodes.iter_mut(), messages, self.tick, None, &mut self.config, &mut self.dropped_messages); 220 | self 221 | } 222 | 223 | pub fn run_on_node( 224 | &mut self, 225 | node_idx: usize, 226 | fun: impl FnOnce(&mut TestRaft) -> Option>, 227 | ) -> &mut Self { 228 | let node_id = *self.nodes[node_idx].node_id(); 229 | let messages = fun(&mut self.nodes[node_idx]).map(|message| (node_id, message)); 230 | run_group(self.nodes.iter_mut(), messages, self.tick, None, &mut self.config, &mut self.dropped_messages); 231 | self 232 | } 233 | 234 | pub fn inspect(&mut self, fun: impl FnOnce(&Self)) -> &mut Self { 235 | fun(self); 236 | self 237 | } 238 | 239 | pub fn modify(&mut self, fun: impl FnOnce(&mut Self)) -> &mut Self { 240 | fun(self); 241 | self 242 | } 243 | 244 | pub fn take_committed(&mut self) -> impl Iterator + '_ { 245 | self.nodes.iter_mut().flat_map(|node| node.take_committed()) 246 | } 247 | 248 | pub fn has_leader(&self) -> bool { 249 | self.nodes.iter().any(|node| node.is_leader()) 250 | } 251 | } 252 | 253 | // 254 | // TestRaftGroupConfig impls 255 | // 256 | 257 | impl TestRaftGroupConfig { 258 | pub fn node_down(mut self, node_id: u64) -> Self { 259 | self.down.insert(NodeId(node_id)); 260 | self 261 | } 262 | 263 | pub fn isolate(mut self, node_id: u64) -> Self { 264 | self.drops.insert((Some(NodeId(node_id)), None)); 265 | self.drops.insert((None, Some(NodeId(node_id)))); 266 | self 267 | } 268 | 269 | pub fn drop_between(mut self, from: u64, to: u64) -> Self { 270 | self.drops.insert((Some(NodeId(from)), Some(NodeId(to)))); 271 | self.drops.insert((Some(NodeId(to)), Some(NodeId(from)))); 272 | self 273 | } 274 | 275 | pub fn drop_to(mut self, node_id: u64) -> Self { 276 | self.drops.insert((None, Some(NodeId(node_id)))); 277 | self 278 | } 279 | 280 | pub fn is_node_down(&self, node_id: NodeId) -> bool { 281 | self.down.contains(&node_id) 282 | } 283 | 284 | pub fn should_drop(&self, from: NodeId, to: NodeId) -> bool { 285 | self.drops.contains(&(Some(from), Some(to))) || 286 | self.drops.contains(&(Some(from), None)) || 287 | self.drops.contains(&(None, Some(to))) || 288 | self.down.contains(&from) || 289 | self.down.contains(&to) 290 | } 291 | } 292 | 293 | // 294 | // TestLogger impls 295 | // 296 | 297 | thread_local! { 298 | static LOGGER_CONTEXT: RefCell = RefCell::new(TestLoggerContext::new()); 299 | } 300 | 301 | impl TestLogger { 302 | pub fn init() { 303 | let _ignore = log::set_logger(&Self); 304 | log::set_max_level(log::LevelFilter::Debug); 305 | } 306 | pub fn set_node_id(node_id: Option) { 307 | LOGGER_CONTEXT.with(|context| { 308 | context.borrow_mut().node_id = node_id; 309 | }); 310 | } 311 | pub fn set_tick(tick: Option) { 312 | LOGGER_CONTEXT.with(|context| { 313 | context.borrow_mut().tick = tick; 314 | }); 315 | } 316 | } 317 | 318 | impl log::Log for TestLogger { 319 | fn enabled(&self, _metadata: &log::Metadata) -> bool { 320 | true 321 | } 322 | 323 | fn log(&self, record: &log::Record) { 324 | LOGGER_CONTEXT.with(|context| { 325 | let context = context.borrow(); 326 | if let Some(node_id) = context.node_id { 327 | if let Some(tick) = context.tick { 328 | eprintln!("tick {:03} {} {}", tick, node_id, record.args()); 329 | } else { 330 | eprintln!("tick ??? {} {}", node_id, record.args()); 331 | } 332 | } else { 333 | eprintln!("{}", record.args()); 334 | } 335 | }) 336 | } 337 | 338 | fn flush(&self) {} 339 | } 340 | 341 | // 342 | // TextLoggerContext impls 343 | // 344 | 345 | impl TestLoggerContext { 346 | const fn new() -> Self { 347 | Self { 348 | node_id: None, 349 | tick: None, 350 | } 351 | } 352 | } 353 | -------------------------------------------------------------------------------- /tests/leader.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021 jessa0 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU Affero General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU Affero General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Affero General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | mod common; 19 | 20 | use common::*; 21 | use simple_raft::message::{Rpc, TermId}; 22 | 23 | #[test] 24 | pub fn append_request_update_leader() { 25 | let mut raft = raft(1, vec![2], None, &mut init_random()); 26 | assert!(!raft.is_leader()); 27 | let (_, &(mut term)) = raft.leader(); 28 | term += 1; 29 | 30 | send(&mut raft, 2, term, Rpc::AppendRequest(Default::default())); 31 | assert_eq!(raft.leader(), (Some(&2.into()), &term)); 32 | } 33 | 34 | #[test] 35 | pub fn no_update_leader() { 36 | for rpc in rpc_types().iter().cloned().filter(|rpc| !matches!(rpc, Rpc::AppendRequest(_))) { 37 | let mut raft = raft(1, vec![2, 3], None, &mut init_random()); 38 | let mut term = TermId::default(); 39 | assert_eq!(raft.leader(), (None, &term)); 40 | 41 | term += 1; 42 | send(&mut raft, 2, term, rpc); 43 | assert_eq!(raft.leader(), (None, &term)); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /tests/term.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021 jessa0 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU Affero General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU Affero General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Affero General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | mod common; 19 | 20 | use common::*; 21 | use simple_raft::message::{RaftMessage, Rpc, TermId, VoteResponse}; 22 | 23 | #[test] 24 | pub fn leader_update_term() { 25 | for rpc in rpc_types().iter().cloned() { 26 | let mut raft = raft(1, vec![2, 3], None, &mut init_random()); 27 | let mut term = TermId::default(); 28 | assert_eq!(raft.leader().1, &term); 29 | 30 | term += 1; 31 | let RaftMessage { term: new_term, .. } = raft.timeout().unwrap().message; 32 | assert_eq!(new_term, term); 33 | assert_eq!(raft.leader().1, &term); 34 | 35 | send(&mut raft, 2, term, Rpc::VoteResponse(VoteResponse { vote_granted: true })); 36 | assert_eq!(raft.leader(), (Some(raft.node_id()), &term)); 37 | 38 | term += 1; 39 | send(&mut raft, 2, term, rpc); 40 | assert_eq!(raft.leader().1, &term); 41 | } 42 | } 43 | 44 | #[test] 45 | pub fn candidate_update_term() { 46 | for rpc in rpc_types().iter().cloned() { 47 | let mut raft = raft(1, vec![2, 3], None, &mut init_random()); 48 | let mut term = TermId::default(); 49 | assert_eq!(raft.leader().1, &term); 50 | 51 | term += 1; 52 | let RaftMessage { term: new_term, .. } = raft.timeout().unwrap().message; 53 | assert_eq!(new_term, term); 54 | assert_eq!(raft.leader(), (None, &term)); 55 | 56 | term += 1; 57 | send(&mut raft, 2, term, rpc); 58 | assert_eq!(raft.leader().1, &term); 59 | } 60 | } 61 | 62 | #[test] 63 | pub fn follower_update_term() { 64 | for rpc in rpc_types().iter().cloned() { 65 | let mut raft = raft(1, vec![2, 3], None, &mut init_random()); 66 | let mut term = TermId::default(); 67 | assert_eq!(raft.leader(), (None, &term)); 68 | 69 | term += 1; 70 | send(&mut raft, 2, term, rpc); 71 | assert_eq!(raft.leader().1, &term); 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /tests/voting.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021 jessa0 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU Affero General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU Affero General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Affero General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | mod common; 19 | 20 | use common::*; 21 | use simple_raft::message::{RaftMessage, Rpc, VoteResponse}; 22 | 23 | #[test] 24 | pub fn empty_group_become_leader() { 25 | let mut raft = raft(1, vec![], None, &mut init_random()); 26 | assert!(!raft.is_leader()); 27 | 28 | raft.timeout(); 29 | assert!(raft.is_leader()); 30 | } 31 | 32 | #[test] 33 | pub fn _1_peer_become_leader() { 34 | let mut raft = raft(1, vec![2], None, &mut init_random()); 35 | assert!(!raft.is_leader()); 36 | 37 | let RaftMessage { term, .. } = raft.timeout().unwrap().message; 38 | assert!(!raft.is_leader()); 39 | 40 | send(&mut raft, 2, term, Rpc::VoteResponse(VoteResponse { vote_granted: true })); 41 | assert!(raft.is_leader()); 42 | } 43 | 44 | #[test] 45 | pub fn become_leader() { 46 | let mut raft = raft(1, vec![2, 3], None, &mut init_random()); 47 | assert!(!raft.is_leader()); 48 | 49 | let RaftMessage { term, .. } = raft.timeout().unwrap().message; 50 | assert!(!raft.is_leader()); 51 | 52 | send(&mut raft, 2, term, Rpc::VoteResponse(VoteResponse { vote_granted: false })); 53 | assert!(!raft.is_leader()); 54 | 55 | send(&mut raft, 3, term, Rpc::VoteResponse(VoteResponse { vote_granted: true })); 56 | assert!(raft.is_leader()); 57 | } 58 | 59 | #[test] 60 | pub fn vote_old_term() { 61 | let mut raft = raft(1, vec![2, 3], None, &mut init_random()); 62 | let RaftMessage { term, .. } = raft.timeout().unwrap().message; 63 | raft.timeout(); 64 | 65 | send(&mut raft, 2, term, Rpc::VoteResponse(VoteResponse { vote_granted: true })); 66 | assert!(!raft.is_leader()); 67 | } 68 | 69 | #[test] 70 | pub fn vote_twice() { 71 | let mut raft = raft(1, vec![2, 3, 4, 5], None, &mut init_random()); 72 | let RaftMessage { term, .. } = raft.timeout().unwrap().message; 73 | 74 | send(&mut raft, 2, term, Rpc::VoteResponse(VoteResponse { vote_granted: true })); 75 | send(&mut raft, 2, term, Rpc::VoteResponse(VoteResponse { vote_granted: true })); 76 | assert!(!raft.is_leader()); 77 | 78 | send(&mut raft, 3, term, Rpc::VoteResponse(VoteResponse { vote_granted: true })); 79 | assert!(raft.is_leader()); 80 | } 81 | 82 | #[test] 83 | pub fn _1_timeout() { 84 | TestRaftGroup::new(1, &mut init_random(), config()) 85 | .run_on_node(0, |raft| raft.timeout()) 86 | .inspect(|group| assert!(group.has_leader())); 87 | } 88 | 89 | #[test] 90 | pub fn _2_nodes_timeout() { 91 | TestRaftGroup::new(2, &mut init_random(), config()) 92 | .run_on_node(0, |raft| raft.timeout()) 93 | .inspect(|group| assert!(group.has_leader())); 94 | } 95 | 96 | #[test] 97 | pub fn _2_nodes_failed_timeout() { 98 | TestRaftGroup::new(2, &mut init_random(), config().node_down(1)) 99 | .run_on_node(0, |raft| raft.timeout()) 100 | .inspect(|group| assert!(!group.has_leader())); 101 | } 102 | 103 | #[test] 104 | pub fn _3_nodes_timeout() { 105 | TestRaftGroup::new(3, &mut init_random(), config()) 106 | .run_on_node(0, |raft| raft.timeout()) 107 | .inspect(|group| assert!(group.has_leader())); 108 | } 109 | 110 | #[test] 111 | pub fn _3_nodes_degraded_timeout() { 112 | TestRaftGroup::new(3, &mut init_random(), config().isolate(1)) 113 | .run_on_node(0, |raft| raft.timeout()) 114 | .inspect(|group| assert!(group.has_leader())); 115 | } 116 | 117 | #[test] 118 | pub fn _3_nodes_split_timeout() { 119 | TestRaftGroup::new(3, &mut init_random(), config().drop_between(0, 1)) 120 | .run_on_node(0, |raft| raft.timeout()) 121 | .inspect(|group| assert!(group.has_leader())); 122 | } 123 | 124 | #[test] 125 | pub fn _3_nodes_failed_timeout() { 126 | TestRaftGroup::new(3, &mut init_random(), config().node_down(1).node_down(2)) 127 | .run_on_node(0, |raft| raft.timeout()) 128 | .inspect(|group| assert!(!group.has_leader())); 129 | } 130 | 131 | #[test] 132 | pub fn _4_nodes_degraded_timeout() { 133 | TestRaftGroup::new(4, &mut init_random(), config().isolate(1)) 134 | .run_on_node(0, |raft| raft.timeout()) 135 | .inspect(|group| assert!(group.has_leader())); 136 | } 137 | 138 | #[test] 139 | pub fn _4_nodes_failed_timeout() { 140 | TestRaftGroup::new(4, &mut init_random(), config().isolate(1).isolate(2)) 141 | .run_on_node(0, |raft| raft.timeout()) 142 | .inspect(|group| assert!(!group.has_leader())); 143 | } 144 | 145 | #[test] 146 | pub fn _5_nodes_degraded_timeout() { 147 | TestRaftGroup::new(5, &mut init_random(), config().isolate(1).isolate(2)) 148 | .run_on_node(0, |raft| raft.timeout()) 149 | .inspect(|group| assert!(group.has_leader())); 150 | } 151 | 152 | #[test] 153 | pub fn _5_nodes_failed_timeout() { 154 | TestRaftGroup::new(5, &mut init_random(), config().isolate(1).isolate(2).isolate(3)) 155 | .run_on_node(0, |raft| raft.timeout()) 156 | .inspect(|group| assert!(!group.has_leader())); 157 | } 158 | 159 | #[test] 160 | pub fn election_timeout() { 161 | TestRaftGroup::new(3, &mut init_random(), config()) 162 | .run_until(|group| group.has_leader()) 163 | .run_for_inspect(10 * CONFIG.election_timeout_ticks, |group| assert!(group.has_leader())); 164 | } 165 | 166 | #[test] 167 | pub fn degraded() { 168 | TestRaftGroup::new(3, &mut init_random(), config().isolate(0)) 169 | .run_until(|group| group.has_leader()) 170 | .run_for_inspect(10 * CONFIG.election_timeout_ticks, |group| assert!(group.has_leader())); 171 | } 172 | 173 | #[test] 174 | pub fn split_unstable() { 175 | TestRaftGroup::new(3, &mut init_random(), config().drop_between(1, 2)) 176 | .run_on_node(1, |raft| raft.timeout()) 177 | .inspect(|group| assert!(group.nodes[1].is_leader())) 178 | .run_until(|group| !group.has_leader()) 179 | .run_until(|group| group.has_leader()); 180 | } 181 | 182 | #[test] 183 | pub fn split_stable() { 184 | TestRaftGroup::new(3, &mut init_random(), config().drop_between(1, 2)) 185 | .run_on_node(0, |raft| raft.timeout()) 186 | .run_for_inspect(10 * CONFIG.election_timeout_ticks, |group| assert!(group.nodes[0].is_leader())); 187 | } 188 | 189 | #[test] 190 | pub fn split_rejoin() { 191 | TestRaftGroup::new(3, &mut init_random(), config().drop_between(1, 2)) 192 | .run_on_node(1, |raft| raft.timeout()) 193 | .inspect(|group| assert!(group.nodes[1].is_leader())) 194 | .run_until(|group| !group.has_leader()) 195 | .modify(|group| group.config = config()) 196 | .run_until(|group| group.has_leader()) 197 | .run_for_inspect(10 * CONFIG.election_timeout_ticks, |group| assert!(group.has_leader())); 198 | } 199 | --------------------------------------------------------------------------------