├── .github ├── CODE_OF_CONDUCT.md ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── doc_improve.md │ ├── enhancement.md │ ├── feature_request.md │ ├── github-does-not-support-choosing-PR-template-on-web-yet │ ├── refactor_request.md │ └── testing_request.md ├── PULL_REQUEST_TEMPLATE.md ├── settings-sample.yml └── workflows │ ├── golangci-lint.yml │ └── test.yml ├── .gitignore ├── .travis.yml ├── .xpt.vim ├── LICENSE ├── Makefile ├── README.md ├── _tmpl └── _inc │ ├── README.md.j2 │ ├── badges.md │ └── install.md ├── ballot.go ├── ballot_test.go ├── build-logs.sh ├── clusterconfig.go ├── clusterconfig_test.go ├── cmd.go ├── cmd_test.go ├── commit.go ├── common.mk ├── docs ├── design │ └── mergelog.md ├── log.md ├── traft-package.md └── traft.md ├── elect.go ├── elect_test.go ├── errors.go ├── gen-proto.sh ├── go.mod ├── go.sum ├── internal_api.go ├── leaderid.go ├── leaderid_test.go ├── log.go ├── logforward.go ├── logforward_test.go ├── logging.go ├── mainloop.go ├── propose.go ├── quorum.go ├── quorum_test.go ├── record.go ├── record_test.go ├── replicastatus.go ├── replicastatus_test.go ├── rpc.go ├── scripts ├── build_md.py └── requirements.txt ├── server.go ├── str.go ├── tailbitmap.go ├── tailbitmap_test.go ├── traft.go ├── traft.pb.go ├── traft.proto ├── util.go ├── util_test.go ├── votereply.go └── votereply_test.go /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # OpenACID's Code of Conduct 2 | 3 | 4 | 5 | 6 | 7 | - [Why have a Code of Conduct?](#why-have-a-code-of-conduct) 8 | - [Our Standards](#our-standards) 9 | - [Social Rules](#social-rules) 10 | - [No feigning surprise](#no-feigning-surprise) 11 | - [No condescending well-actually’s](#no-condescending-well-actually%E2%80%99s) 12 | - [No backseat driving](#no-backseat-driving) 13 | - [No subtle -isms](#no-subtle--isms) 14 | - [Giving and Receiving Feedback](#giving-and-receiving-feedback) 15 | - [Enforcement](#enforcement) 16 | 17 | 18 | 19 | ## Why have a Code of Conduct? 20 | 21 | This Code of Conduct is designed to help all of us build a pleasant, productive, 22 | and fearless community. 23 | 24 | We are striving to make our community a great group of people to work with. 25 | 26 | This Code of Conduct applies both within project spaces and in public spaces 27 | when an individual is representing the project or its community. 28 | 29 | 30 | ## Our Standards 31 | 32 | Examples of behavior that contributes to creating a positive environment 33 | include: 34 | 35 | * Using welcoming and inclusive language 36 | * Being respectful of differing viewpoints and experiences 37 | * Gracefully accepting constructive criticism 38 | * Focusing on what is best for the community 39 | * Showing empathy towards other community members 40 | 41 | Examples of unacceptable behavior by participants include: 42 | 43 | * The use of sexualized language or imagery and unwelcome sexual attention or 44 | advances 45 | * Trolling, insulting/derogatory comments, and personal or political attacks 46 | * Public or private harassment 47 | * Publishing others' private information, such as a physical or electronic 48 | address, without explicit permission 49 | * Other conduct which could reasonably be considered inappropriate in a 50 | professional setting 51 | 52 | 53 | ## Social Rules 54 | 55 | 56 | ### No feigning surprise 57 | 58 | **The first rule means you shouldn't act surprised when people say they don't know 59 | something**. 60 | 61 | This applies to both technical things ("What?! I can't believe you 62 | don't know what the stack is!") and non-technical things ("You don't know who 63 | RMS is?!"). 64 | 65 | Feigning surprise has absolutely no social or educational benefit: When people 66 | feign surprise, it's usually to make them feel better about themselves and 67 | others feel worse. 68 | And even when that's not the intention, it's almost always the effect. 69 | 70 | As you've probably already guessed, this rule is tightly coupled to our belief 71 | in the importance of people **feeling comfortable saying "I don't know" and "I 72 | don't understand."** 73 | 74 | 75 | ### No condescending well-actually’s 76 | 77 | **A well-actually happens when someone says something that's almost— but not 78 | entirely— correct, and you say, "well, actually…" and then give a minor 79 | correction**. 80 | 81 | Even in complicated environments where small details and edge-cases can be 82 | forgotten, unless they are critical, they should not be interjected. 83 | 84 | If they are critical to the conversation phrasing can be the difference between 85 | a valuable clarification and condescension e.g. instead of “well actually …” a 86 | simple change to “don’t forget …” or “it’s easy to forget …” 87 | 88 | 89 | ### No backseat driving 90 | 91 | **If you overhear people working through a problem, you shouldn't intermittently 92 | lob advice across the room**. 93 | 94 | This can lead to the "too many cooks" problem, but more important, it can be 95 | rude and disruptive to half-participate in a conversation. 96 | This is particularly true in a distributed environment involving conversations 97 | in Slack. 98 | The occasional interjection to an on-going conversation, particularly based on 99 | backscroll, can be very disruptive. 100 | 101 | This isn't to say you shouldn't help, offer advice, or join conversations. 102 | On the contrary, we encourage all those things. 103 | Rather, it just means that when you want to help out or work with others, you 104 | should fully engage and not just interject sporadically. 105 | 106 | 107 | ### No subtle -isms 108 | 109 | **Subtle -isms, also called microaggressions, are small things that make others 110 | feel uncomfortable**, for example, saying "It's so easy my grandmother could do 111 | it" is a subtle -ism, as it is both subtly sexist and ageist. 112 | 113 | The "subtle" in "subtle -isms" means that it's probably not obvious to everyone 114 | right away what was wrong with the comment, even people in the group otherwise 115 | affected by the comment. 116 | And, even though they are subtle, might seem insignificant, and are 117 | often unintentional, a steady stream of them compounds to make people in 118 | under-represented groups feel less welcome. 119 | 120 | 121 | ## Giving and Receiving Feedback 122 | 123 | **Give constructive, not critical feedback**. 124 | 125 | Feedback is negatively critical when it surfaces something wrong with someone or 126 | something they produced, especially without any mention of ways to make their 127 | behavior or their product better. 128 | 129 | - **Critical feedback** on work often looks like "you don't write enough tests" or 130 | "your code quality isn't good enough". 131 | Personal criticism can be more severe and often looks like "you should be less 132 | judgemental" or "you are a burden because you ask too many questions”. 133 | 134 | - **Constructive feedback** is more about how a person can do better rather than what 135 | they are doing wrong. 136 | If you want someone to do something better, you should tell them what better 137 | looks like. 138 | Ask a question to get a discussion rolling, to gain context, and then if you see 139 | room for improvement give declarative feedback to that effect. 140 | 141 | This creates an environment where people understand what success looks like 142 | instead of just feeling like they are unsuccessful. 143 | 144 | **Code, configurations, and their reviews are also mechanisms for communication**. 145 | 146 | Just as you shouldn't interact with people poorly in person, do not interact 147 | poorly through code or code review. 148 | 149 | **You are not your products**. 150 | Technical critiques are integral, and should be hard on the product, not on the 151 | producer. 152 | While it is important to care about your work and producing the best thing you 153 | can, this can make review difficult. 154 | It is important to realize that it’s better to find errors in review than in 155 | production and recognize that your work fits into a larger whole. 156 | 157 | **Go about your review under the assumption that the decisions were made for a 158 | reason, not in a vacuum**. 159 | Ask about circumstances if you’re confused. 160 | 161 | Be pragmatic, ask for context, don’t filibuster, don’t block on style not 162 | explicitly covered in DO’s style guides. 163 | 164 | Code, configurations, architecture, platforms, frameworks will need to be 165 | changed. Fight for your way if you think it’s right, **but not only to be right**. 166 | 167 | 168 | ## Enforcement 169 | 170 | Enforcement of the Code of Conduct is essential. 171 | 172 | If there is no enforcement, then the Code of Conduct becomes a feel-good 173 | document without value. 174 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: 'bug' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | 12 | **To Reproduce** 13 | 1. Go to '...' 14 | 2. Click on '....' 15 | 3. Scroll down to '....' 16 | 4. See error 17 | 18 | **Expected behavior** 19 | 20 | **Actual behavior** 21 | 22 | **Screenshots** 23 | 24 | **Environment (please complete the following information):** 25 | - OS: [e.g. iOS] 26 | - Version [e.g. 22] 27 | 28 | **Language (please complete the following information):** 29 | - Language: [e.g. Go] 30 | - Version [e.g. 22] 31 | 32 | **Additional context** 33 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/doc_improve.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Document 3 | about: Add document 4 | title: '' 5 | labels: 'doc' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **What document to add** 11 | 12 | **Additional context** 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/enhancement.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Enhancement 3 | about: Enhancement to a feature/API 4 | title: '' 5 | labels: 'enhancement' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is the requested enhancement related to a problem? Please describe.** 11 | 12 | **Describe the solution** 13 | 14 | **Describe alternatives you've considered** 15 | 16 | **Additional context** 17 | 18 | **Affect other component or side effect** 19 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: 'feature' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | 12 | **Describe the solution you'd like** 13 | 14 | **Describe alternatives you've considered** 15 | 16 | **Additional context** 17 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/github-does-not-support-choosing-PR-template-on-web-yet: -------------------------------------------------------------------------------- 1 | Choosing PR template can be done with URL query. 2 | 3 | For now github will only read PR template from `.github/PULL_REQUEST_TEMPLATE.md` only. 4 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/refactor_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Refactor 3 | about: Refactoring without impacting on end-user 4 | title: '' 5 | labels: 'refactor' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is the requested refactoring related to a known/potential problem? Please describe.** 11 | 12 | **Describe what to do** 13 | 14 | **Describe alternatives you've considered** 15 | 16 | **Additional context** 17 | 18 | **Affect other component or side effect** 19 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/testing_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Testing 3 | about: Improve test suite. 4 | title: '' 5 | labels: 'testing' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is the requested test related to a known/potential problem? Please describe.** 11 | 12 | **Describe what test should be added/modified** 13 | 14 | **Describe alternatives you've considered** 15 | 16 | **Additional context** 17 | 18 | **Affect other component or side effect** 19 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | # Description 2 | 3 | 7 | 8 | Fixes # (issue) 9 | 10 | ## Type of change 11 | 12 | 13 | 14 | - **Bug fix** 15 | - **New feature** 16 | - **Breaking change** 17 | - **Refactoring** 18 | - **Document changes** 19 | - **Test changes** 20 | 21 | 22 | ## How to reproduce it (if it is a bug-fix PR) 23 | 24 | - Env: x86-64, CentOS-7.4, kernel-3.10.0, GO-1.10.1, etc. 25 | 26 | - Step-1: 27 | - Step-2: 28 | 29 | 30 | ## The solution (to fix a bug, implement a new feature etc.) 31 | 32 | 33 | 34 | # Checklist: 35 | 36 | - [ ] **Style**: My code follows the **style guidelines** of this project 37 | - [ ] **Self-review**: I have performed a **self-review** of my own code 38 | - [ ] **Comment**: I have **commented my code**, particularly in hard-to-understand areas 39 | - [ ] **Doc**: I have made corresponding changes to the **documentation** 40 | - [ ] **No-warnings**: My changes generate **no new warnings** 41 | - [ ] **Add-test**: I have added **tests** that prove my fix is effective or that my feature works 42 | - [ ] **Pass**: New and existing **unit tests pass** locally with my changes 43 | - [ ] **Dep**: Any **dependent** changes have been merged and published in downstream modules 44 | -------------------------------------------------------------------------------- /.github/settings-sample.yml: -------------------------------------------------------------------------------- 1 | # Usage copy this to settings.yml 2 | 3 | _extends: gh-config 4 | 5 | 6 | repository: 7 | name: name 8 | description: desc 9 | homepage: https://openacid.github.io/ 10 | topics: go, golang 11 | -------------------------------------------------------------------------------- /.github/workflows/golangci-lint.yml: -------------------------------------------------------------------------------- 1 | name: golangci-lint 2 | on: 3 | push: 4 | tags: 5 | - v* 6 | branches: 7 | - '*' 8 | pull_request: 9 | jobs: 10 | golangci: 11 | name: lint 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | 16 | - name: golangci-lint 17 | uses: golangci/golangci-lint-action@v2 18 | with: 19 | # Required: the version of golangci-lint is required and must be specified without patch version: we always use the latest patch version. 20 | version: v1.29 21 | 22 | # Optional: working directory, useful for monorepos 23 | # working-directory: somedir 24 | 25 | # disable staticcheck: 26 | # SA1019: package github.com/golang/protobuf/proto is deprecated: Use the "google.golang.org/protobuf/proto" package instead 27 | args: --issues-exit-code=0 --exclude SA1019 28 | 29 | # Optional: show only new issues if it's a pull request. The default value is `false`. 30 | # only-new-issues: true 31 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | on: 3 | push: 4 | pull_request: 5 | 6 | jobs: 7 | test: 8 | strategy: 9 | matrix: 10 | go-version: 11 | - 1.14.x 12 | - 1.15.x 13 | os: 14 | - ubuntu-latest 15 | - macos-latest 16 | - windows-latest 17 | 18 | runs-on: ${{ matrix.os }} 19 | 20 | steps: 21 | - name: Install Go 22 | uses: actions/setup-go@v2 23 | with: 24 | go-version: ${{ matrix.go-version }} 25 | 26 | - name: checkout 27 | uses: actions/checkout@v2 28 | 29 | - name: cache 30 | uses: actions/cache@v2 31 | with: 32 | path: ~/go/pkg/mod 33 | key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} 34 | restore-keys: | 35 | ${{ runner.os }}-go- 36 | 37 | - name: Check go-deps.txt existence 38 | id: check_deps 39 | uses: andstor/file-existence-action@v1 40 | with: 41 | files: "go-deps.txt" 42 | 43 | - name: Read go-deps.txt 44 | id: godeps 45 | if: steps.check_deps.outputs.files_exists == 'true' 46 | uses: juliangruber/read-file-action@v1 47 | with: 48 | path: ./go-deps.txt 49 | 50 | - name: Install go-deps.txt 51 | if: steps.check_deps.outputs.files_exists == 'true' 52 | run: | 53 | go get ${{ steps.godeps.outputs.content }} 54 | 55 | - name: test 56 | run: | 57 | go test -race ./... 58 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, built with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | # Dependency directories (remove the comment below to include it) 15 | # vendor/ 16 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | go: 4 | - 1.14.x 5 | - 1.15.x 6 | # - tip 7 | 8 | script: 9 | - make travis 10 | 11 | after_success: 12 | - make coveralls 13 | 14 | # private repo requires a crypted token: 15 | # env: 16 | # global: 17 | # # coveralls token 18 | # secure: fJOPOuwBaZ59iQA1VskxZ3h08Nt5CjGbz5PdrZVT/v5UCG7DOLuVTx3x0Tb+gR9AG9lB8Fqpsnm0jjbBAPvOnyn1KIJDuK9Xj2PvKT78vhJ/SyCnn0BAinmxu9hZqghvyWIzeM8RrA3IrvmnoSUTdE1jnTC7McJ7np6cTRGO9Xe6b4mOO1xQOHJFMyTBFvA84uSKZPbuUHCrh19YH7NKrA4MKunX49R+niEFlFEM4oNM/2FXMca+4+OdlGNJmPkG0kV5exP87ihfqI3Q++9v3Z8SR0KOblL6yRBspaRDHmfKxuGx/YEf71pu0yu7nyT7uVeIABTz5SLrqX2Fhb/cpKb7iqCBQ+ifvgpd86pkfhrPUOsIO9N6pieNxmb+aCNm5WBJ2AaT1zrrfthpfbXvEl66K209rUDL0PV1n/u1pAgY5q7DQD5YuOnyAJNPBNQYYzJnZ+X1GjSNrHKOQPjXmrgwkq7KPVlDoqiaJAh97YwUmjXaULKYOm9JBPwVaToEUeCxzK82ZZRwa4YiYl3MLpJb+SvDMl97hgc58lolfg01wHgLYAT901bbq+qsrQZY4pkW9nDGvBuJg0Mru1bu6hqk/tUA7G4amh2y/5lJxxELednfnyzQ6fBeXKb0FVOTN9xRuFBkpRL1Drmbz3y6J2flAcdpJ4KgAMUP/941J6o= 19 | -------------------------------------------------------------------------------- /.xpt.vim: -------------------------------------------------------------------------------- 1 | call XPTemplate( 'info', 'lg.Infow("`^", `^)' ) 2 | call XPTemplate( 'dd', 'lg.Infow("`mes^", "`f^", `f^`...{{^, "`f^", `f^`...^`}}^)' ) 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 openacid 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | include common.mk 2 | 3 | log: 4 | ./build-logs.sh 5 | 6 | update: log readme 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | **Table of Contents** *generated with [DocToc](https://github.com/thlorenz/doctoc)* 4 | 5 | - [traft](#traft) 6 | - [Merge Log](#merge-log) 7 | - [Definition](#definition) 8 | - [Lemma-only-max-committer-logs](#lemma-only-max-committer-logs) 9 | - [Planned improvement to original raft](#planned-improvement-to-original-raft) 10 | - [Features:](#features) 11 | - [Progress](#progress) 12 | 13 | 14 | 15 | # traft 16 | 17 | ## Merge Log 18 | 19 | ### Definition 20 | 21 | - **Safe**: A log that is safe if it has been replicated to a quorum, no matter 22 | whether or not the **committed** flag is set on any replica. 23 | 24 | --- 25 | 26 | After a replica established leadership, it needs to merge the latest logs from a 27 | quorum of replicas, to ensure that the leader replica has all **safe** logs. 28 | 29 | 30 | ### Lemma-only-max-committer-logs 31 | 32 | In TRaft, a leader only needs to use the logs from replicas with the max **committer**(`(term, id)`). 33 | 34 | A log that does not present in any max-committer replicas a leader seen can **not** 35 | be safe. 36 | 37 | Proof: 38 | 39 | If a log `A` becomes **safe**, it will be seen by the next leader `Li`. 40 | Because a leader has to collect logs from a quorum and any two quorum 41 | intersections with each other. 42 | 43 | If the next next leader `Lj`(`j>i`) has seen `Li`, it will choose `A`. 44 | Otherwise, it will see a replica that has `A`. 45 | 46 | ∴ Any newer leader will choose `A`. 47 | 48 | ∴ TRaft only need to merge logs from replicas with the latest `Committer`. 49 | 50 | E.g.: 51 | 52 | ``` 53 | Li: indicates a replica becomes leader 54 | A/B: a log is written 55 | 56 | R0 L1 A 57 | R1 A 58 | R2 A 59 | R3 L2 B 60 | R4 L3 61 | ------------------------> time 62 | ``` 63 | 64 | In this digram: 65 | L2 sees `A` and then updates its `Committer` to `L2`, then writes log `B` to its 66 | local log. And R3 crashed before forwarding any log out. 67 | 68 | - If L3 established its leadership by contacting R2 and R3, it uses logs from only 69 | R3(R3 has the latest committer `L2`), it will see `A`. 70 | 71 | - If L3 established its leadership by contacting R0 and R1, it will also see `A`. 72 | 73 | 74 | 都说deadline 是第一生产力. 放假没事, 试试看能不能7天写个raft出来, 当练手了. 2021-02-09 75 | 装逼有风险, 万一写不出来也别笑话我. 76 | 77 | 7天内直播: https://live.bilibili.com/22732677?visit_id=6eoy1n42a1w0 78 | 79 | # Planned improvement to original raft 80 | 81 | - Leader election with less conflict. 82 | Raft has the issue that in a term, if multiple candidates try to elect theirselves, 83 | the conflicts are unacceptable. 84 | Because in one term, A voter will only vote for at most one leader. 85 | 86 | In our impl, `term` is less strict: 87 | A voter is allowed to vote another GREATER leader. 88 | 89 | - Out of order commit out of order apply. 90 | 91 | We use a bitmap to describe what logs interfere each other. 92 | 93 | 94 | - Adopt a more generalized member change algo. 95 | 96 | Get rid of single-step member change. 97 | Because any of the CORRECT single-step member change algo is equivalent to joint-consensus. 98 | 99 | But joint-consensus is a specialization of the TODO (I dont have good name for it yet). 100 | 101 | 102 | 103 | # Features: 104 | 105 | - [ ] Leader election 106 | - [ ] WAL log 107 | - [ ] snapshot: impl with https://github.com/openacid/slim , a static kv-like storage engine supporting protobuf. 108 | - [ ] member change with generalized joint consensus. 109 | - [ ] Out of order commit/apply if possible. 110 | 111 | # Progress 112 | 113 | - 2021-02-10: 114 | LOC: +1008 -1 115 | 116 | ``` 117 | buildMajorityQuorums() 118 | import TailBitmap, add Union() 119 | TailBitmap.Clone() 120 | TailBitmap.Diff() 121 | ``` 122 | 123 | - 2021-02-11: 124 | LOC: +754 -44 125 | 126 | ``` 127 | refactor buildMajorityQuorums() 128 | LeaderId.Cmp() 129 | add NewLeaderId 130 | add NewBallot() 131 | fix: LeaderId.Cmp() accept nil as operand 132 | Ballot.CmpLog() to compare only the log-related fields 133 | rename Ballot.Accepted to Ballot.AcceptedFrom 134 | TailBitmap.Len() 135 | refactor design, add ReplicaStatus, remove Ballot. 136 | impl Vote Handler, under dev!!! 137 | use gogoproto, build clean .pb.go with less code, add serveCluster() to setup a simple cluster for test 138 | add NewCmdI64() 139 | add NewRecord() 140 | add ClusterConfig.GetReplicaInfo() 141 | NewTailBitmap() accepts extra bits to set 142 | ``` 143 | 144 | - 2021-02-12: 145 | LOC: +2601 -70 146 | 147 | ``` 148 | add LeaderId:Clone() 149 | refactor TailBitmap.Clone(): use proto 150 | draft test of vote 151 | rename By to Author, AcceptedFrom to Committer. borrowed concepts from git:DDD 152 | add test: HandleVoteReq. add ShortStr() to Cmd, LeaderI, Record and []Record 153 | refactor VoteReply: do not use ReplicaStatus to describe log status 154 | add readme to record progress 155 | readme: impl storage with slim 156 | test that voter send back nil log 157 | update readme: collect git log 158 | TailBitmap: accept second operand to be nil 159 | ``` 160 | 161 | - 2021-02-13: 162 | LOC: +1309 -183 163 | 164 | ``` 165 | test Replicate, under dev 166 | rename Node.Log to Node.Logs 167 | add Cmd.Intefering() to check if two commands not be allowed to change execution order 168 | add Record.Interfering() 169 | update readme 170 | NewTailBitmap() support non-64-aligned offset 171 | add AddLog() for leader to propose a command 172 | use map to store cluster members instead of slice 173 | add ClusterConfig.IsQuorum() to check if a set of members is a quorum 174 | TRaft.VoteOnce() run one round voting, to establish leadership 175 | TRaft actor loop and voting loop 176 | ``` 177 | 178 | - 2021-02-14: 179 | LOC: +891 -229 180 | 181 | ``` 182 | mainloop as an actor is the only one update traft data. under dev. not passed yet 183 | test VoteLoop 184 | granted leader merges collected logs 185 | after vote, leader merge responded logs 186 | add API Propose without replication 187 | ``` 188 | 189 | - 2021-02-15: 190 | LOC: +476 -197 191 | 192 | ``` 193 | add interface toCmd() to build Cmd from string 194 | test Replicate() 195 | add TRaft.sleep(): sleep only if it is not stopped. 196 | ``` 197 | 198 | - 2021-02-16: 199 | LOC: +1297 -592 200 | 201 | ``` 202 | refactor: rename Replicate to LogForward 203 | refactor: vote_test: remove unused type, funcs 204 | gruop daily log format by date 205 | ``` 206 | 207 | 208 | 209 | 210 | -------------------------------------------------------------------------------- /_tmpl/_inc/README.md.j2: -------------------------------------------------------------------------------- 1 | # traft 2 | 3 | {% include "docs/design/mergelog.md" %} 4 | 5 | 都说deadline 是第一生产力. 放假没事, 试试看能不能7天写个raft出来, 当练手了. 2021-02-09 6 | 装逼有风险, 万一写不出来也别笑话我. 7 | 8 | 7天内直播: https://live.bilibili.com/22732677?visit_id=6eoy1n42a1w0 9 | 10 | # Planned improvement to original raft 11 | 12 | - Leader election with less conflict. 13 | Raft has the issue that in a term, if multiple candidates try to elect theirselves, 14 | the conflicts are unacceptable. 15 | Because in one term, A voter will only vote for at most one leader. 16 | 17 | In our impl, `term` is less strict: 18 | A voter is allowed to vote another GREATER leader. 19 | 20 | - Out of order commit out of order apply. 21 | 22 | We use a bitmap to describe what logs interfere each other. 23 | 24 | 25 | - Adopt a more generalized member change algo. 26 | 27 | Get rid of single-step member change. 28 | Because any of the CORRECT single-step member change algo is equivalent to joint-consensus. 29 | 30 | But joint-consensus is a specialization of the TODO (I dont have good name for it yet). 31 | 32 | 33 | 34 | # Features: 35 | 36 | - [ ] Leader election 37 | - [ ] WAL log 38 | - [ ] snapshot: impl with https://github.com/openacid/slim , a static kv-like storage engine supporting protobuf. 39 | - [ ] member change with generalized joint consensus. 40 | - [ ] Out of order commit/apply if possible. 41 | 42 | # Progress 43 | 44 | {% include "docs/log.md" %} 45 | 46 | 47 | 48 | 69 | -------------------------------------------------------------------------------- /_tmpl/_inc/badges.md: -------------------------------------------------------------------------------- 1 | [![Travis](https://travis-ci.com/openacid/{{ name }}.svg?branch=main)](https://travis-ci.com/openacid/{{ name }}) 2 | ![test](https://github.com/openacid/{{ name }}/workflows/test/badge.svg) 3 | 4 | [![Report card](https://goreportcard.com/badge/github.com/openacid/{{ name }})](https://goreportcard.com/report/github.com/openacid/{{ name }}) 5 | [![Coverage Status](https://coveralls.io/repos/github/openacid/{{ name }}/badge.svg?branch=main&service=github)](https://coveralls.io/github/openacid/{{ name }}?branch=main&service=github) 6 | 7 | [![GoDoc](https://godoc.org/github.com/openacid/{{ name }}?status.svg)](http://godoc.org/github.com/openacid/{{ name }}) 8 | [![PkgGoDev](https://pkg.go.dev/badge/github.com/openacid/{{ name }})](https://pkg.go.dev/github.com/openacid/{{ name }}) 9 | [![Sourcegraph](https://sourcegraph.com/github.com/openacid/{{ name }}/-/badge.svg)](https://sourcegraph.com/github.com/openacid/{{ name }}?badge) 10 | -------------------------------------------------------------------------------- /_tmpl/_inc/install.md: -------------------------------------------------------------------------------- 1 | # Install 2 | 3 | ```sh 4 | go get github.com/openacid/{{ name }} 5 | ``` 6 | -------------------------------------------------------------------------------- /ballot.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | // func NewBallot(cterm, cid, aterm, aid, lsn int64) *Ballot { 4 | // return &Ballot{ 5 | // Current: NewLeaderId(cterm, cid), 6 | // MaxLogSeq: lsn, 7 | // Committer: NewLeaderId(aterm, aid), 8 | // } 9 | // } 10 | 11 | // // CmpLog compares log related fields with another ballot. 12 | // // I.e. Committer and MaxLogSeq. 13 | // func (a *Ballot) CmpLog(b *Ballot) int { 14 | // r := a.Committer.Cmp(b.Committer) 15 | // if r != 0 { 16 | // return r 17 | // } 18 | 19 | // return cmpI64(a.MaxLogSeq, b.MaxLogSeq) 20 | // } 21 | -------------------------------------------------------------------------------- /ballot_test.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | // func TestNewBallot(t *testing.T) { 4 | 5 | // ta := require.New(t) 6 | 7 | // got := NewBallot(1, 2, 3, 4, 5) 8 | // ta.Equal(int64(1), got.Current.Term) 9 | // ta.Equal(int64(2), got.Current.Id) 10 | 11 | // ta.Equal(int64(5), got.MaxLogSeq) 12 | 13 | // ta.Equal(int64(3), got.Committer.Term) 14 | // ta.Equal(int64(4), got.Committer.Id) 15 | // } 16 | 17 | // func TestBallog_CmpLog(t *testing.T) { 18 | 19 | // ta := require.New(t) 20 | 21 | // cases := []struct { 22 | // a, b *Ballot 23 | // want int 24 | // }{ 25 | // {a: NewBallot(0, 0, 1, 1, 1), b: NewBallot(0, 0, 1, 1, 1), want: 0}, 26 | // {a: NewBallot(0, 0, 1, 1, 2), b: NewBallot(0, 0, 1, 1, 1), want: 1}, 27 | // {a: NewBallot(0, 0, 1, 2, 0), b: NewBallot(0, 0, 1, 1, 1), want: 1}, 28 | // {a: NewBallot(0, 0, 2, 0, 0), b: NewBallot(0, 0, 1, 1, 1), want: 1}, 29 | // } 30 | 31 | // for i, c := range cases { 32 | // ta.Equal(c.want, c.a.CmpLog(c.b), "%d-th: case: %+v", i+1, c) 33 | // ta.Equal(-c.want, c.b.CmpLog(c.a), "%d-th: case: %+v", i+1, c) 34 | // } 35 | // } 36 | -------------------------------------------------------------------------------- /build-logs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | { 4 | 5 | git log --format="%ai %h" --reverse \ 6 | | awk ' 7 | { 8 | # find the first diff date, print prev 9 | if ($1 != prev) {print prevhash } 10 | prev = $1 11 | prevhash = $4 12 | } 13 | ' 14 | 15 | git log -n1 --format="%h" 16 | } \ 17 | | while read h; do 18 | if [ ".$prev" == "." ]; then 19 | prev=$h 20 | continue 21 | fi 22 | 23 | git log -n1 $h --format="%ai" \ 24 | | awk '{print "- " $1 ":"}' 25 | git diff $prev $h --shortstat -- . ':(exclude)*.pb.go' \ 26 | | awk '{ print " LOC: +" $4 " -" $6 ""} ' 27 | echo 28 | 29 | echo ' ```' 30 | git log $prev..$h --reverse --format="%s" \ 31 | | awk '{ gsub("day-.: ", "", $0); print " " $0 }' 32 | echo ' ```' 33 | echo 34 | prev=$h 35 | done > docs/log.md 36 | -------------------------------------------------------------------------------- /clusterconfig.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | import proto "github.com/gogo/protobuf/proto" 4 | 5 | func (cc *Cluster) MaxPosition() int64 { 6 | maxPos := int64(0) 7 | for _, m := range cc.Members { 8 | if maxPos < m.Position { 9 | maxPos = m.Position 10 | } 11 | } 12 | 13 | return maxPos 14 | } 15 | 16 | func (cc *Cluster) Clone() *Cluster { 17 | return proto.Clone(cc).(*Cluster) 18 | } 19 | 20 | func (cc *Cluster) SortedReplicaInfos() []*ReplicaInfo { 21 | maxPos := cc.MaxPosition() 22 | 23 | members := make([]*ReplicaInfo, maxPos+1) 24 | 25 | for _, m := range cc.Members { 26 | members[m.Position] = m 27 | } 28 | 29 | return members 30 | } 31 | 32 | // check if a set of member is a quorum. 33 | // The set of member is a bitmap in which a `1` indicates a present member. 34 | // In this system, the position of `1` is ReplicaInfo.Position. 35 | func (cc *Cluster) IsQuorum(v uint64) bool { 36 | 37 | for _, q := range cc.Quorums { 38 | if v&q == q { 39 | return true 40 | } 41 | } 42 | 43 | return false 44 | } 45 | -------------------------------------------------------------------------------- /clusterconfig_test.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/require" 7 | ) 8 | 9 | func TestClusterConfig_SortedReplicaInfos(t *testing.T) { 10 | 11 | ta := require.New(t) 12 | 13 | cc := &Cluster{ 14 | Members: map[int64]*ReplicaInfo{ 15 | 1: {1, "111", 0}, 16 | 2: {2, "222", 2}, 17 | 3: {3, "333", 4}, 18 | }, 19 | } 20 | 21 | sorted := cc.SortedReplicaInfos() 22 | 23 | cases := []struct { 24 | input int64 25 | want *ReplicaInfo 26 | }{ 27 | {0, &ReplicaInfo{1, "111", 0}}, 28 | {1, nil}, 29 | {2, &ReplicaInfo{2, "222", 2}}, 30 | {3, nil}, 31 | {4, &ReplicaInfo{3, "333", 4}}, 32 | } 33 | 34 | for i, c := range cases { 35 | got := sorted[c.input] 36 | ta.Equal(c.want, got, "%d-th: case: %+v", i+1, c) 37 | } 38 | } 39 | 40 | func TestClusterConfig_IsQuorum(t *testing.T) { 41 | 42 | ta := require.New(t) 43 | 44 | cc := &Cluster{ 45 | Members: map[int64]*ReplicaInfo{ 46 | 1: {1, "111", 0}, 47 | 2: {2, "222", 2}, 48 | 3: {3, "333", 4}, 49 | }, 50 | } 51 | // quorums are: 52 | // 10100 53 | // 00101 54 | // 10001 55 | cc.Quorums = buildMajorityQuorums(1 | 4 | 16) 56 | 57 | cases := []struct { 58 | input uint64 59 | want bool 60 | }{ 61 | {1, false}, 62 | {4, false}, 63 | {16, false}, 64 | {1 | 4, true}, 65 | {4 | 16, true}, 66 | {1 | 16, true}, 67 | {1 | 4 | 16, true}, 68 | } 69 | 70 | for i, c := range cases { 71 | got := cc.IsQuorum(c.input) 72 | ta.Equal(c.want, got, "%d-th: case: %+v", i+1, c) 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /cmd.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | import ( 4 | fmt "fmt" 5 | "strconv" 6 | "strings" 7 | ) 8 | 9 | func NewCmdI64(op, key string, v int64) *Cmd { 10 | cmd := &Cmd{ 11 | Op: op, 12 | Key: key, 13 | Value: &Cmd_VI64{v}, 14 | } 15 | return cmd 16 | } 17 | 18 | func cmdValueShortStr(v isCmd_Value) string { 19 | switch vv := v.(type) { 20 | case *Cmd_VI64: 21 | return fmt.Sprintf("%d", vv.VI64) 22 | case *Cmd_VStr: 23 | return vv.VStr 24 | // TODO Cluster 25 | default: 26 | return fmt.Sprintf("%s", vv) 27 | } 28 | } 29 | 30 | func (c *Cmd) ShortStr() string { 31 | if c == nil { 32 | return "()" 33 | } 34 | return fmt.Sprintf("%s(%s, %s)", 35 | c.Op, c.Key, cmdValueShortStr(c.Value)) 36 | } 37 | 38 | // Interfering check if a command interferes with another one, 39 | // i.e. they change the same key. 40 | func (a *Cmd) Interfering(b *Cmd) bool { 41 | if a == nil || b == nil { 42 | return false 43 | } 44 | 45 | if a.Op == "set" && b.Op == "set" { 46 | if a.Key == b.Key { 47 | return true 48 | } 49 | } 50 | 51 | return false 52 | } 53 | 54 | type toCmder interface { 55 | ToCmd() *Cmd 56 | } 57 | 58 | type cstr string 59 | 60 | func (c *cstr) ToCmd() *Cmd { 61 | if *c == "" { 62 | return nil 63 | } 64 | 65 | kv := strings.Split(string(*c), "=") 66 | k := kv[0] 67 | 68 | v, err := strconv.ParseInt(kv[1], 10, 64) 69 | if err != nil { 70 | panic(string(*c) + " convert to Cmd") 71 | } 72 | return NewCmdI64("set", k, v) 73 | } 74 | 75 | func toCmd(x interface{}) *Cmd { 76 | if x == nil { 77 | return nil 78 | } 79 | 80 | switch v := x.(type) { 81 | case string: 82 | s := cstr(v) 83 | return s.ToCmd() 84 | 85 | case *Cmd: 86 | return v 87 | } 88 | panic("invalid type to convert to cmd") 89 | } 90 | -------------------------------------------------------------------------------- /cmd_test.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/require" 7 | ) 8 | 9 | func TestNewCmdI64(t *testing.T) { 10 | 11 | ta := require.New(t) 12 | 13 | ta.Equal(&Cmd{ 14 | Op: "foo", 15 | Key: "key", 16 | Value: &Cmd_VI64{3}, 17 | }, NewCmdI64("foo", "key", 3)) 18 | 19 | } 20 | 21 | func TestCmd_Interfering(t *testing.T) { 22 | 23 | ta := require.New(t) 24 | 25 | cases := []struct { 26 | a, b *Cmd 27 | want bool 28 | }{ 29 | {nil, nil, false}, 30 | {nil, NewCmdI64("bar", "x", 4), false}, 31 | {NewCmdI64("foo", "x", 3), NewCmdI64("bar", "x", 4), false}, 32 | {NewCmdI64("foo", "x", 3), NewCmdI64("foo", "x", 4), false}, 33 | {NewCmdI64("set", "x", 3), NewCmdI64("set", "y", 4), false}, 34 | {NewCmdI64("set", "x", 3), NewCmdI64("set", "x", 4), true}, 35 | } 36 | 37 | for i, c := range cases { 38 | ta.Equal(c.want, c.a.Interfering(c.b), "%d-th: case: %+v", i+1, c) 39 | ta.Equal(c.want, c.b.Interfering(c.a), "%d-th: case: %+v", i+1, c) 40 | } 41 | } 42 | 43 | func Test_cstr(t *testing.T) { 44 | 45 | ta := require.New(t) 46 | 47 | cases := []struct { 48 | input cstr 49 | want *Cmd 50 | }{ 51 | {"x=3", NewCmdI64("set", "x", 3)}, 52 | {"y=4", NewCmdI64("set", "y", 4)}, 53 | {"", nil}, 54 | } 55 | 56 | for i, c := range cases { 57 | got := c.input.ToCmd() 58 | ta.Equal(c.want, got, "%d-th: case: %+v", i+1, c) 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /commit.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | import "github.com/pkg/errors" 4 | 5 | func (tr *TRaft) leaderUpdateCommitted( 6 | committer *LeaderId, lsns []int64) error { 7 | 8 | id := tr.Id 9 | me := tr.Status[id] 10 | if committer.Equal(me.VotedFor) { 11 | // Logs are intact. 12 | // may be expired, Logs wont change unless VotedFor changes. 13 | 14 | // NOTE: using start, end lsn to describe logs requires every 15 | // forwarding action operates on continous logs 16 | for i := lsns[0]; i < lsns[1]; i++ { 17 | r := tr.Logs[i-tr.LogOffset] 18 | me.Committed.Union(r.Overrides) 19 | } 20 | 21 | return nil 22 | } 23 | 24 | err := errors.Wrapf(ErrLeaderLost, 25 | "committer: %s, current %s", 26 | committer.ShortStr(), me.VotedFor.ShortStr(), 27 | ) 28 | lg.Infow("leaderUpdateCommitted", "err", err) 29 | return err 30 | } 31 | -------------------------------------------------------------------------------- /common.mk: -------------------------------------------------------------------------------- 1 | # Usage: echo 'include common.mk' > Makefile 2 | 3 | TOP_PKG := $(shell go list .) 4 | NAME := $(shell go list . | awk -F/ '{print $$NF}') 5 | PKGS := $(shell go list ./... | grep -v "^$(TOP_PKG)/\(vendor\|prototype\)") 6 | 7 | # PKGS := github.com/openacid/slimarray/array \ 8 | # github.com/openacid/slimarray/bit \ 9 | # github.com/openacid/slimarray/trie \ 10 | 11 | SRCDIRS := $(shell go list -f '{{.Dir}}' $(PKGS)) 12 | 13 | # gofmt check vendor dir. we need to skip vendor manually 14 | GOFILES := $(shell find $(SRCDIRS) -not -path "*/vendor/*" -name "*.go") 15 | GO := go 16 | 17 | check: test vet gofmt misspell unconvert staticcheck ineffassign unparam 18 | 19 | travis: vet gofmt misspell unconvert ineffassign unparam test 20 | 21 | test: 22 | # fail fast with severe bugs 23 | $(GO) test -short $(PKGS) 24 | $(GO) test -tags debug $(PKGS) 25 | # test release version and generate coverage data for task `coveralls`. 26 | $(GO) test -covermode=count -coverprofile=coverage.out $(PKGS) 27 | 28 | vet: 29 | $(GO) vet $(PKGS) 30 | 31 | staticcheck: 32 | $(GO) get honnef.co/go/tools/cmd/staticcheck 33 | # ST1016: methods on the same type should have the same receiver name 34 | # .pb.go have this issue. 35 | staticcheck -checks all,-ST1016 $(PKGS) 36 | 37 | misspell: 38 | $(GO) get github.com/client9/misspell/cmd/misspell 39 | find $(SRCDIRS) -name '*.go' -or -name '*.md' | grep -v "\bvendor/" | xargs misspell \ 40 | -locale US \ 41 | -error 42 | misspell \ 43 | -locale US \ 44 | -error \ 45 | *.md *.go 46 | 47 | unconvert: 48 | $(GO) get github.com/mdempsky/unconvert 49 | unconvert -v $(PKGS) 50 | 51 | ineffassign: 52 | $(GO) get github.com/gordonklaus/ineffassign 53 | find $(SRCDIRS) -name '*.go' | grep -v "\bvendor/" | xargs ineffassign 54 | 55 | pedantic: check errcheck 56 | 57 | unparam: 58 | $(GO) get mvdan.cc/unparam 59 | unparam ./... 60 | 61 | errcheck: 62 | $(GO) get github.com/kisielk/errcheck 63 | errcheck $(PKGS) 64 | 65 | gofmt: 66 | @echo Checking code is gofmted 67 | @test -z "$(shell gofmt -s -l -d -e $(GOFILES) | tee /dev/stderr)" 68 | 69 | ben: test 70 | $(GO) test ./... -run=none -bench=. -benchmem 71 | 72 | gen: 73 | $(GO) generate ./... 74 | 75 | doc: 76 | # build a markdown version of package doc to embed to README.md 77 | # $(GO) get github.com/robertkrimen/godocdown/godocdown 78 | godocdown . > docs/$(NAME).md 79 | # "package" is the first phrase in a go doc. 80 | # "## Usage" is the start of API section. 81 | cat docs/$(NAME).md | awk '/^Package /,/^## Usage/' | grep -v '^## Usage' > docs/$(NAME)-package.md 82 | 83 | 84 | readme: doc 85 | python ./scripts/build_md.py 86 | # brew install nodejs 87 | # npm install -g doctoc 88 | doctoc --title '' --github README.md 89 | 90 | fix: 91 | gofmt -s -w $(GOFILES) 92 | unconvert -v -apply $(PKGS) 93 | 94 | 95 | # local coverage 96 | coverage: 97 | $(GO) test -covermode=count -coverprofile=coverage.out $(PKGS) 98 | go tool cover -func=coverage.out 99 | # go tool cover -html=coverage.out 100 | 101 | # send coverage to coveralls 102 | coveralls: 103 | # this job relies on the output of task test: `-coverprofile=coverage.out` 104 | $(GO) get golang.org/x/tools/cmd/cover 105 | $(GO) get github.com/mattn/goveralls 106 | goveralls -ignore='*.pb.go' -coverprofile=coverage.out -service=travis-ci 107 | -------------------------------------------------------------------------------- /docs/design/mergelog.md: -------------------------------------------------------------------------------- 1 | ## Merge Log 2 | 3 | ### Definition 4 | 5 | - **Safe**: A log that is safe if it has been replicated to a quorum, no matter 6 | whether or not the **committed** flag is set on any replica. 7 | 8 | --- 9 | 10 | After a replica established leadership, it needs to merge the latest logs from a 11 | quorum of replicas, to ensure that the leader replica has all **safe** logs. 12 | 13 | 14 | ### Lemma-only-max-committer-logs 15 | 16 | In TRaft, a leader only needs to use the logs from replicas with the max **committer**(`(term, id)`). 17 | 18 | A log that does not present in any max-committer replicas a leader seen can **not** 19 | be safe. 20 | 21 | Proof: 22 | 23 | If a log `A` becomes **safe**, it will be seen by the next leader `Li`. 24 | Because a leader has to collect logs from a quorum and any two quorum 25 | intersections with each other. 26 | 27 | If the next next leader `Lj`(`j>i`) has seen `Li`, it will choose `A`. 28 | Otherwise, it will see a replica that has `A`. 29 | 30 | ∴ Any newer leader will choose `A`. 31 | 32 | ∴ TRaft only need to merge logs from replicas with the latest `Committer`. 33 | 34 | E.g.: 35 | 36 | ``` 37 | Li: indicates a replica becomes leader 38 | A/B: a log is written 39 | 40 | R0 L1 A 41 | R1 A 42 | R2 A 43 | R3 L2 B 44 | R4 L3 45 | ------------------------> time 46 | ``` 47 | 48 | In this digram: 49 | L2 sees `A` and then updates its `Committer` to `L2`, then writes log `B` to its 50 | local log. And R3 crashed before forwarding any log out. 51 | 52 | - If L3 established its leadership by contacting R2 and R3, it uses logs from only 53 | R3(R3 has the latest committer `L2`), it will see `A`. 54 | 55 | - If L3 established its leadership by contacting R0 and R1, it will also see `A`. 56 | 57 | -------------------------------------------------------------------------------- /docs/log.md: -------------------------------------------------------------------------------- 1 | - 2021-02-10: 2 | LOC: +1008 -1 3 | 4 | ``` 5 | buildMajorityQuorums() 6 | import TailBitmap, add Union() 7 | TailBitmap.Clone() 8 | TailBitmap.Diff() 9 | ``` 10 | 11 | - 2021-02-11: 12 | LOC: +754 -44 13 | 14 | ``` 15 | refactor buildMajorityQuorums() 16 | LeaderId.Cmp() 17 | add NewLeaderId 18 | add NewBallot() 19 | fix: LeaderId.Cmp() accept nil as operand 20 | Ballot.CmpLog() to compare only the log-related fields 21 | rename Ballot.Accepted to Ballot.AcceptedFrom 22 | TailBitmap.Len() 23 | refactor design, add ReplicaStatus, remove Ballot. 24 | impl Vote Handler, under dev!!! 25 | use gogoproto, build clean .pb.go with less code, add serveCluster() to setup a simple cluster for test 26 | add NewCmdI64() 27 | add NewRecord() 28 | add ClusterConfig.GetReplicaInfo() 29 | NewTailBitmap() accepts extra bits to set 30 | ``` 31 | 32 | - 2021-02-12: 33 | LOC: +2601 -70 34 | 35 | ``` 36 | add LeaderId:Clone() 37 | refactor TailBitmap.Clone(): use proto 38 | draft test of vote 39 | rename By to Author, AcceptedFrom to Committer. borrowed concepts from git:DDD 40 | add test: HandleVoteReq. add ShortStr() to Cmd, LeaderI, Record and []Record 41 | refactor VoteReply: do not use ReplicaStatus to describe log status 42 | add readme to record progress 43 | readme: impl storage with slim 44 | test that voter send back nil log 45 | update readme: collect git log 46 | TailBitmap: accept second operand to be nil 47 | ``` 48 | 49 | - 2021-02-13: 50 | LOC: +1309 -183 51 | 52 | ``` 53 | test Replicate, under dev 54 | rename Node.Log to Node.Logs 55 | add Cmd.Intefering() to check if two commands not be allowed to change execution order 56 | add Record.Interfering() 57 | update readme 58 | NewTailBitmap() support non-64-aligned offset 59 | add AddLog() for leader to propose a command 60 | use map to store cluster members instead of slice 61 | add ClusterConfig.IsQuorum() to check if a set of members is a quorum 62 | TRaft.VoteOnce() run one round voting, to establish leadership 63 | TRaft actor loop and voting loop 64 | ``` 65 | 66 | - 2021-02-14: 67 | LOC: +891 -229 68 | 69 | ``` 70 | mainloop as an actor is the only one update traft data. under dev. not passed yet 71 | test VoteLoop 72 | granted leader merges collected logs 73 | after vote, leader merge responded logs 74 | add API Propose without replication 75 | ``` 76 | 77 | - 2021-02-15: 78 | LOC: +476 -197 79 | 80 | ``` 81 | add interface toCmd() to build Cmd from string 82 | test Replicate() 83 | add TRaft.sleep(): sleep only if it is not stopped. 84 | ``` 85 | 86 | - 2021-02-16: 87 | LOC: +1297 -592 88 | 89 | ``` 90 | refactor: rename Replicate to LogForward 91 | refactor: vote_test: remove unused type, funcs 92 | gruop daily log format by date 93 | ``` 94 | 95 | -------------------------------------------------------------------------------- /docs/traft-package.md: -------------------------------------------------------------------------------- 1 | Package traft is a raft variant with out-of-order commit/apply and a more 2 | generalized member change algo. 3 | 4 | -------------------------------------------------------------------------------- /elect.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | import ( 4 | "math/rand" 5 | "sync/atomic" 6 | "time" 7 | 8 | "github.com/openacid/low/mathext/util" 9 | "github.com/pkg/errors" 10 | ) 11 | 12 | // run forever to elect itself as leader if there is no leader in this cluster. 13 | func (tr *TRaft) ElectLoop() { 14 | 15 | id := tr.Id 16 | 17 | slp := tr.sleep 18 | 19 | maxStaleTermSleep := time.Millisecond * 200 20 | heartBeatSleep := time.Millisecond * 200 21 | followerSleep := time.Millisecond * 200 22 | 23 | for atomic.LoadInt64(&tr.running) == 1 { 24 | var currVote *LeaderId 25 | var expireAt int64 26 | var logst *LogStatus 27 | var config *Cluster 28 | 29 | now := uSecondI64() 30 | lg.Infow("vote loop round start:", 31 | "Id", tr.Id, 32 | ) 33 | 34 | err := tr.query(func() error { 35 | me := tr.Status[tr.Id] 36 | 37 | currVote = me.VotedFor.Clone() 38 | expireAt = me.VoteExpireAt 39 | logst = ExportLogStatus(tr.Status[tr.Id]) 40 | config = tr.Config.Clone() 41 | 42 | if now < expireAt { 43 | return nil 44 | } 45 | 46 | // init state for voting myself 47 | 48 | me.VotedFor.Term++ 49 | me.VotedFor.Id = tr.Id 50 | currVote = me.VotedFor.Clone() 51 | 52 | me.VoteExpireAt = uSecondI64() + leaderLease 53 | 54 | return errors.Wrapf(ErrNeedElect, "expireAt-now: %d", expireAt-now) 55 | 56 | }).err 57 | 58 | if err == nil { 59 | // TODO refine this: wait until VoteExpireAt and watch for missing 60 | // heartbeat. 61 | lg.Infow("leader-not-expired", 62 | "Id", tr.Id, 63 | "VotedFor", currVote, 64 | "leadst.VoteExpireAt-now", expireAt-now) 65 | 66 | if currVote.Id == tr.Id { 67 | // I am a leader 68 | // TODO heartbeat other replicas to keep leadership 69 | slp(heartBeatSleep) 70 | } else { 71 | slp(followerSleep) 72 | } 73 | continue 74 | } 75 | 76 | // call for a new leader!!! 77 | lg.Infow("leader-expired", 78 | "Id", tr.Id, 79 | "VotedFor", currVote, 80 | "leadst.VoteExpireAt-now", expireAt-now) 81 | 82 | tr.sendMsg("vote-start", currVote.ShortStr(), logst) 83 | 84 | voteReplies, err, higher := ElectOnce( 85 | currVote, 86 | logst, 87 | config, 88 | ) 89 | 90 | lg.Infow("vote-loop:result", "Id", tr.Id, "voteReplies", voteReplies, "err", err, "higher", higher) 91 | 92 | if voteReplies == nil { 93 | // fail to elect me. 94 | tr.sendMsg("vote-fail", "err", err) 95 | tr.query(func() error { 96 | 97 | me := tr.Status[tr.Id] 98 | 99 | if currVote.Cmp(me.VotedFor) == 0 { 100 | // I did not vote other ones yet, and I am not leader. 101 | // reset it. 102 | me.VoteExpireAt = 0 103 | } 104 | 105 | return nil 106 | }) 107 | 108 | // wait for some time by err 109 | switch errors.Cause(err) { 110 | case ErrStaleTermId: 111 | slp(time.Millisecond*5 + time.Duration(rand.Int63n(int64(maxStaleTermSleep)))) 112 | case ErrTimeout: 113 | slp(time.Millisecond * 10) 114 | case ErrStaleLog: 115 | // I can not be the leader. 116 | // sleep a day. waiting for others to elect to be a leader. 117 | slp(time.Second * 86400) 118 | } 119 | continue 120 | } 121 | 122 | // granted by a quorum 123 | 124 | lg.Infow("to-update-leader", "votedFor", currVote) 125 | 126 | updateErr := tr.query(func() error { 127 | 128 | me := tr.Status[tr.Id] 129 | 130 | if currVote.Cmp(me.VotedFor) != 0 { 131 | return errors.Wrapf(ErrLeaderLost, "when updating leadership and follower state") 132 | } 133 | 134 | tr.establishLeadership(currVote, voteReplies) 135 | return nil 136 | 137 | }).err 138 | 139 | if updateErr != nil { 140 | tr.sendMsg("vote-fail", "reason:fail-to-update", currVote) 141 | lg.Infow("vote-fail", "Id", id, 142 | "currVote", currVote, 143 | "err", updateErr.Error(), 144 | ) 145 | continue 146 | } 147 | 148 | tr.sendMsg("vote-win", currVote) 149 | slp(heartBeatSleep) 150 | } 151 | } 152 | 153 | // returns: 154 | // ElectReply-s: if vote granted by a quorum, returns collected replies. 155 | // Otherwise returns nil. 156 | // error: ErrStaleLog, ErrStaleTermId, ErrTimeout. 157 | // higherTerm: if seen, upgrade term and retry 158 | func ElectOnce( 159 | candidate *LeaderId, 160 | logStatus *LogStatus, 161 | cluster *Cluster, 162 | ) ([]*ElectReply, error, int64) { 163 | 164 | // TODO vote need cluster id: 165 | // a stale member may try to elect on another cluster. 166 | 167 | id := candidate.Id 168 | 169 | replies := make([]*ElectReply, 0) 170 | 171 | req := &ElectReq{ 172 | Candidate: candidate, 173 | Committer: logStatus.GetCommitter(), 174 | Accepted: logStatus.GetAccepted(), 175 | } 176 | 177 | type voteRst struct { 178 | from *ReplicaInfo 179 | reply *ElectReply 180 | err error 181 | } 182 | 183 | higherTerm := int64(-1) 184 | var logErr error 185 | 186 | timeout := time.Second 187 | sess := rpcToAll(id, cluster, meth.Elect, req, timeout) 188 | 189 | for res := range sess.resCh { 190 | 191 | reply := res.reply.(*ElectReply) 192 | lg.Infow("elect:recv-reply", "reply", reply, "res.err", res.err) 193 | 194 | if reply.OK { 195 | replies = append(replies, reply) 196 | if sess.updateOKBitmap(res) { 197 | // do not cancel 198 | return replies, nil, -1 199 | } 200 | continue 201 | } 202 | 203 | if reply.VotedFor.Cmp(candidate) > 0 { 204 | higherTerm = util.MaxI64(higherTerm, reply.VotedFor.Term) 205 | } 206 | 207 | if CmpLogStatus(reply, logStatus) > 0 { 208 | // TODO cancel timer 209 | logErr = errors.Wrapf(ErrStaleLog, 210 | "local: committer:%s max-lsn:%d remote: committer:%s max-lsn:%d", 211 | logStatus.GetCommitter().ShortStr(), 212 | logStatus.GetAccepted().Len(), 213 | reply.Committer.ShortStr(), 214 | reply.Accepted.Len()) 215 | } 216 | } 217 | 218 | if logErr != nil { 219 | return nil, logErr, higherTerm 220 | } 221 | 222 | err := errors.Wrapf(ErrStaleTermId, "seen a higher term:%d", higherTerm) 223 | return nil, err, higherTerm 224 | } 225 | 226 | // no lock protect, must be called by TRaft.Loop() 227 | func (tr *TRaft) hdlElectReq(req *ElectReq) *ElectReply { 228 | 229 | id := tr.Id 230 | 231 | me := tr.Status[tr.Id] 232 | 233 | // A vote reply just send back a voter's status. 234 | // It is the candidate's responsibility to check if a voter granted it. 235 | repl := &ElectReply{ 236 | OK: false, 237 | Id: id, 238 | VotedFor: me.VotedFor.Clone(), 239 | Committer: me.Committer.Clone(), 240 | Accepted: me.Accepted.Clone(), 241 | Committed: me.Committed.Clone(), 242 | } 243 | 244 | lg.Infow("handleVoteReq", 245 | "Id", id, 246 | "req.Candidate", req.Candidate, 247 | "me.Committer", me.Committer.ShortStr(), 248 | "me.Accepted", me.Accepted.ShortStr(), 249 | "me.VotedFor", me.VotedFor.ShortStr(), 250 | "req.Committer", req.Committer.ShortStr(), 251 | "req.Accepted", req.Accepted.ShortStr(), 252 | ) 253 | 254 | if CmpLogStatus(req, me) < 0 { 255 | // I have more logs than the candidate. 256 | // It cant be a leader. 257 | tr.sendMsg("hdl-vote-req:reject-by-logstat", 258 | "req.Candidate", req.Candidate, 259 | "me.Committer", me.Committer, 260 | "me.Accepted", me.Accepted, 261 | "req.Committer", req.Committer, 262 | "req.Accepted", req.Accepted, 263 | ) 264 | return repl 265 | } 266 | 267 | // candidate has the upto date logs. 268 | 269 | r := req.Candidate.Cmp(me.VotedFor) 270 | if r < 0 { 271 | // I've voted for other leader with higher privilege. 272 | // This candidate could not be a legal leader. 273 | // just send back enssential info to info it. 274 | tr.sendMsg("hdl-vote-req:reject-by-term-id", 275 | "req.Candidate", req.Candidate, 276 | "me.VotedFor", me.VotedFor, 277 | ) 278 | return repl 279 | } 280 | 281 | // grant vote 282 | 283 | lg.Infow("voted", "id", id, "VotedFor", me.VotedFor) 284 | tr.sendMsg("hdl-vote-req:grant", 285 | "req.Candidate", req.Candidate, 286 | "me.VotedFor", me.VotedFor) 287 | 288 | me.VotedFor = req.Candidate.Clone() 289 | me.VoteExpireAt = uSecondI64() + leaderLease 290 | repl.OK = true 291 | repl.VotedFor = req.Candidate.Clone() 292 | 293 | // send back the logs I have but the candidate does not. 294 | 295 | logs := make([]*LogRecord, 0) 296 | 297 | lg.Infow("hdlElectReq", "me.Accepted", me.Accepted) 298 | lg.Infow("hdlElectReq", "req.Accepted", req.Accepted) 299 | start := me.Accepted.Offset 300 | end := me.Accepted.Len() 301 | for i := start; i < end; i++ { 302 | if me.Accepted.Get(i) != 0 && req.Accepted.Get(i) == 0 { 303 | r := tr.Logs[i-tr.LogOffset] 304 | logs = append(logs, r) 305 | lg.Infow("hdlElectReq:send-log", "r", r) 306 | } 307 | } 308 | 309 | repl.Logs = logs 310 | 311 | return repl 312 | } 313 | 314 | // establishLeadership updates leader state when a election approved by a quorum. 315 | func (tr *TRaft) establishLeadership(currVote *LeaderId, replies []*ElectReply) { 316 | 317 | me := tr.Status[tr.Id] 318 | 319 | // not to update expire time. 320 | // let the leader expire earlier than follower to reduce chance that follower reject replication from leader. 321 | 322 | tr.mergeFollowerLogs(replies) 323 | 324 | // then going on replicating these logs to others. 325 | // 326 | // TODO update local view of status of other replicas. 327 | for _, r := range replies { 328 | follower := tr.Status[r.Id] 329 | if r.Committer.Equal(me.Committer) { 330 | follower.Accepted = r.Accepted.Clone() 331 | } else { 332 | // if committers are different, the leader can no be 333 | // sure whether a follower has identical logs 334 | follower.Accepted = r.Committed.Clone() 335 | } 336 | follower.Committed = r.Committed.Clone() 337 | 338 | follower.Committer = r.Committer.Clone() 339 | } 340 | 341 | // Leader accept all the logs it sees 342 | me.Committer = currVote.Clone() 343 | 344 | } 345 | 346 | // find the max committer log to fill in local log holes. 347 | func (tr *TRaft) mergeFollowerLogs(votes []*ElectReply) { 348 | 349 | // TODO if the leader chose Logs[i] from replica `r`, e.g. R[r].Logs[i] 350 | // then the logs R[r].Logs[:i] are safe to choose. 351 | // Because if a different R[r'].Logs[j] is committed, for a j <= i 352 | // the leader that written R[r].Log[i] must have chosen R[r'].Logs[j] . 353 | // ∴ R[r].Logs[j] == R[r'].Logs[j] 354 | // 355 | // For now 2021 Feb 14, 356 | // we just erase all non-committed logs on followers. 357 | 358 | id := tr.Id 359 | me := tr.Status[id] 360 | 361 | maxCommitter, chosen := tr.chooseMaxCommitterReplies(votes) 362 | lg.Infow("mergeFollowerLogs", "maxCommitter", maxCommitter, "chosen", chosen) 363 | lg.Infow("mergeFollowerLogs", "mylogs", RecordsShortStr(tr.Logs)) 364 | 365 | l := me.Accepted.Len() 366 | for lsn := me.Accepted.Offset; lsn < l; lsn++ { 367 | if me.Accepted.Get(lsn) != 0 { 368 | continue 369 | } 370 | 371 | rec := getLog(lsn, chosen) 372 | 373 | // TODO fill in with empty log 374 | if rec.Empty() { 375 | continue 376 | } 377 | 378 | tr.Logs[lsn-tr.LogOffset] = rec 379 | me.Accepted.Union(rec.Overrides) 380 | 381 | lg.Infow("merge-log", 382 | "lsn", lsn, 383 | "committer", maxCommitter, 384 | "record", rec) 385 | } 386 | } 387 | 388 | // getLog returns one log record if a log with the specified lsn presents in any vote replies. 389 | func getLog(lsn int64, replies []*ElectReply) *LogRecord { 390 | var rec *LogRecord 391 | for _, vr := range replies { 392 | r := vr.PopRecord(lsn) 393 | if r == nil { 394 | continue 395 | } 396 | 397 | if rec != nil && rec.Author.Cmp(r.Author) != 0 { 398 | panic("wtf") 399 | } 400 | 401 | rec = r 402 | // TODO if r is not nil: break 403 | } 404 | 405 | return rec 406 | } 407 | 408 | // chooseMaxCommitterReplies chooses the max Committer and the vote-replies with the max Committer. 409 | // logs with Committer smaller than me are discarded too. 410 | func (tr *TRaft) chooseMaxCommitterReplies(replies []*ElectReply) (*LeaderId, []*ElectReply) { 411 | me := tr.Status[tr.Id] 412 | maxCommitter := me.Committer 413 | for _, v := range replies { 414 | if v.Committer.Cmp(maxCommitter) > 0 { 415 | maxCommitter = v.Committer 416 | } 417 | } 418 | chosen := make([]*ElectReply, 0, len(replies)) 419 | for _, v := range replies { 420 | if v.Committer.Cmp(maxCommitter) == 0 { 421 | chosen = append(chosen, v) 422 | } 423 | } 424 | return maxCommitter, chosen 425 | } 426 | -------------------------------------------------------------------------------- /elect_test.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "reflect" 7 | "strings" 8 | "testing" 9 | "time" 10 | 11 | "github.com/pkg/errors" 12 | "github.com/stretchr/testify/require" 13 | ) 14 | 15 | type candStat struct { 16 | candidateId *LeaderId 17 | committer *LeaderId 18 | logs []int64 19 | } 20 | 21 | type voterStat struct { 22 | votedFor *LeaderId 23 | committer *LeaderId 24 | author *LeaderId 25 | logs []int64 26 | nilLogs map[int64]bool 27 | committed []int64 28 | } 29 | 30 | type wantVoteReply struct { 31 | OK bool 32 | votedFor *LeaderId 33 | committer *LeaderId 34 | allLogBitmap *TailBitmap 35 | logs string 36 | } 37 | 38 | // a helper func to setup TRaft cluster and close it. 39 | // Because `defer tr.Stop()` does not block until the next test case 40 | func withCluster(t *testing.T, 41 | name string, 42 | ids []int64, 43 | f func(t *testing.T, ts []*TRaft)) { 44 | 45 | lid := NewLeaderId 46 | 47 | ts := newCluster(ids) 48 | for i, id := range ids { 49 | ts[i].initTraft(lid(0, 0), lid(0, 0), []int64{}, nil, nil, lid(0, id)) 50 | } 51 | startCluster(ts) 52 | 53 | t.Run(name, func(t *testing.T) { 54 | f(t, ts) 55 | }) 56 | 57 | stopAll(ts) 58 | } 59 | 60 | func TestTRaft_hdlVoteReq(t *testing.T) { 61 | 62 | ta := require.New(t) 63 | 64 | bm := NewTailBitmap 65 | 66 | ids := []int64{1, 2, 3} 67 | id := int64(1) 68 | 69 | testVote := func( 70 | cand candStat, 71 | voter voterStat, 72 | ) (*ElectReply, int64) { 73 | 74 | ts := newCluster(ids) 75 | 76 | t1 := ts[0] 77 | 78 | t1.initTraft( 79 | voter.committer, voter.author, voter.logs, voter.nilLogs, nil, 80 | voter.votedFor, 81 | ) 82 | 83 | startCluster(ts) 84 | defer stopAll(ts) 85 | 86 | req := &ElectReq{ 87 | Candidate: cand.candidateId, 88 | Committer: cand.committer, 89 | Accepted: bm(0, cand.logs...), 90 | } 91 | 92 | var reply *ElectReply 93 | addr := t1.Config.Members[id].Addr 94 | 95 | rpcTo(addr, func(cli TRaftClient, ctx context.Context) { 96 | var err error 97 | reply, err = cli.Elect(ctx, req) 98 | if err != nil { 99 | panic("wtf") 100 | } 101 | }) 102 | 103 | return reply, t1.Status[id].VoteExpireAt 104 | } 105 | 106 | lid := NewLeaderId 107 | 108 | cases := []struct { 109 | cand candStat 110 | voter voterStat 111 | want wantVoteReply 112 | }{ 113 | // vote granted 114 | { 115 | candStat{candidateId: lid(2, 2), committer: lid(1, id), logs: []int64{5}}, 116 | voterStat{votedFor: lid(0, id), committer: lid(0, id), author: lid(1, id), logs: []int64{5, 6}}, 117 | wantVoteReply{ 118 | OK: true, 119 | votedFor: lid(2, 2), 120 | committer: lid(0, id), 121 | allLogBitmap: bm(0, 5, 6), 122 | logs: "[<001#001:006{set(x, 6)}-0→0>]", 123 | }, 124 | }, 125 | 126 | // vote granted 127 | // send back nil logs 128 | { 129 | candStat{candidateId: lid(2, 2), committer: lid(1, id), logs: []int64{5}}, 130 | voterStat{votedFor: lid(0, id), committer: lid(0, id), author: lid(1, id), logs: []int64{5, 6, 7}, nilLogs: map[int64]bool{6: true}}, 131 | wantVoteReply{ 132 | OK: true, 133 | votedFor: lid(2, 2), 134 | committer: lid(0, id), 135 | allLogBitmap: bm(0, 5, 6, 7), 136 | logs: "[<>, <001#001:007{set(x, 7)}-0→0>]", 137 | }, 138 | }, 139 | 140 | // candidate has no upto date logs 141 | { 142 | candStat{candidateId: lid(2, 2), committer: lid(0, id), logs: []int64{5, 6}}, 143 | voterStat{votedFor: lid(1, id), committer: lid(1, id), author: lid(1, id), logs: []int64{5, 6}}, 144 | wantVoteReply{ 145 | OK: false, 146 | votedFor: lid(1, id), 147 | committer: lid(1, id), 148 | allLogBitmap: bm(0, 5, 6), 149 | logs: "[]", 150 | }, 151 | }, 152 | 153 | // candidate has not enough logs 154 | // No log is sent back to candidate because it does not need to rebuild 155 | // full log history. 156 | { 157 | candStat{candidateId: lid(2, 2), committer: lid(1, id), logs: []int64{5}}, 158 | voterStat{votedFor: lid(1, id), committer: lid(1, id), author: lid(1, id), logs: []int64{5, 6}}, 159 | wantVoteReply{ 160 | OK: false, 161 | votedFor: lid(1, id), 162 | committer: lid(1, id), 163 | allLogBitmap: bm(0, 5, 6), 164 | logs: "[]", 165 | }, 166 | }, 167 | 168 | // candidate has smaller term. 169 | // No log sent back. 170 | { 171 | candStat{candidateId: lid(2, 2), committer: lid(1, id), logs: []int64{5, 6}}, 172 | voterStat{votedFor: lid(3, id), committer: lid(1, id), author: lid(1, id), logs: []int64{5, 6}}, 173 | wantVoteReply{ 174 | OK: false, 175 | votedFor: lid(3, id), 176 | committer: lid(1, id), 177 | allLogBitmap: bm(0, 5, 6), 178 | logs: "[]", 179 | }, 180 | }, 181 | 182 | // candidate has smaller id. 183 | // No log sent back. 184 | { 185 | candStat{candidateId: lid(3, id-1), committer: lid(1, id), logs: []int64{5, 6}}, 186 | voterStat{votedFor: lid(3, id), committer: lid(1, id), author: lid(1, id), logs: []int64{5, 6}}, 187 | wantVoteReply{ 188 | OK: false, 189 | votedFor: lid(3, id), 190 | committer: lid(1, id), 191 | allLogBitmap: bm(0, 5, 6), 192 | logs: "[]", 193 | }, 194 | }, 195 | } 196 | 197 | for i, c := range cases { 198 | reply, gotExpire := testVote(c.cand, c.voter) 199 | 200 | ta.Equal( 201 | c.want, 202 | wantVoteReply{ 203 | OK: reply.OK, 204 | votedFor: reply.VotedFor, 205 | committer: reply.Committer, 206 | allLogBitmap: reply.Accepted, 207 | logs: RecordsShortStr(reply.Logs), 208 | }, 209 | "%d-th: case: %+v", i+1, c) 210 | 211 | if reply.OK { 212 | ta.InDelta(uSecondI64()+leaderLease, gotExpire, 1000*1000*1000) 213 | } else { 214 | ta.Equal(int64(0), gotExpire) 215 | 216 | } 217 | } 218 | } 219 | 220 | func TestTRaft_VoteOnce(t *testing.T) { 221 | 222 | // cluster = {0, 1, 2} 223 | // ts[0] vote once with differnt Committer/VotedFor settings. 224 | 225 | lid := NewLeaderId 226 | 227 | type wt struct { 228 | hasVoteReplies bool 229 | err error 230 | higherTerm int64 231 | } 232 | 233 | cases := []struct { 234 | name string 235 | committers []*LeaderId 236 | votedFors []*LeaderId 237 | logs [][]string 238 | candidate *LeaderId 239 | want wt 240 | }{ 241 | {name: "2emptyVoter/term-0", 242 | candidate: lid(0, 0), 243 | want: wt{ 244 | hasVoteReplies: false, 245 | err: ErrStaleTermId, 246 | higherTerm: 0, 247 | }, 248 | }, 249 | {name: "2emptyVoter/term-1", 250 | candidate: lid(1, 0), 251 | want: wt{ 252 | hasVoteReplies: true, 253 | err: nil, 254 | higherTerm: -1, 255 | }, 256 | }, 257 | {name: "reject-by-one/stalelog", 258 | committers: []*LeaderId{nil, lid(2, 0)}, 259 | votedFors: []*LeaderId{nil, lid(2, 1)}, 260 | candidate: lid(1, 0), 261 | want: wt{ 262 | hasVoteReplies: true, 263 | err: nil, 264 | higherTerm: -1, 265 | }, 266 | }, 267 | {name: "reject-by-one/higherTerm", 268 | committers: []*LeaderId{nil, nil, lid(0, 0)}, 269 | votedFors: []*LeaderId{nil, nil, lid(5, 2)}, 270 | candidate: lid(1, 0), 271 | want: wt{ 272 | hasVoteReplies: true, 273 | err: nil, 274 | higherTerm: -1, 275 | }, 276 | }, 277 | {name: "reject-by-two/stalelog", 278 | committers: []*LeaderId{nil, lid(2, 0), lid(0, 0)}, 279 | votedFors: []*LeaderId{nil, lid(2, 1), lid(2, 2)}, 280 | candidate: lid(1, 0), 281 | want: wt{ 282 | hasVoteReplies: false, 283 | err: ErrStaleLog, 284 | higherTerm: 2, 285 | }, 286 | }, 287 | {name: "reject-by-two/stalelog-higherTerm", 288 | committers: []*LeaderId{nil, lid(2, 0), lid(0, 0)}, 289 | votedFors: []*LeaderId{nil, lid(2, 1), lid(5, 2)}, 290 | logs: [][]string{nil, nil, []string{"x=0"}}, 291 | candidate: lid(1, 0), 292 | want: wt{ 293 | hasVoteReplies: false, 294 | err: ErrStaleLog, 295 | higherTerm: 5, 296 | }, 297 | }, 298 | {name: "reject-by-two/higherTerm", 299 | votedFors: []*LeaderId{nil, lid(3, 1), lid(5, 2)}, 300 | candidate: lid(1, 0), 301 | want: wt{ 302 | hasVoteReplies: false, 303 | err: ErrStaleTermId, 304 | higherTerm: 5, 305 | }, 306 | }, 307 | } 308 | 309 | for _, c := range cases { 310 | withCluster(t, c.name, 311 | []int64{0, 1, 2}, 312 | func(t *testing.T, ts []*TRaft) { 313 | ta := require.New(t) 314 | for i, cmt := range c.committers { 315 | if cmt != nil { 316 | ts[i].Status[int64(i)].Committer = cmt 317 | } 318 | } 319 | 320 | for i, v := range c.votedFors { 321 | if v != nil { 322 | ts[i].Status[int64(i)].VotedFor = v 323 | } 324 | } 325 | 326 | for i, ls := range c.logs { 327 | for _, l := range ls { 328 | ts[i].addLogs(l) 329 | } 330 | } 331 | 332 | voted, err, higher := ElectOnce( 333 | c.candidate, 334 | ExportLogStatus(ts[0].Status[0]), 335 | ts[0].Config.Clone(), 336 | ) 337 | 338 | if c.want.hasVoteReplies { 339 | ta.NotNil(voted) 340 | } else { 341 | ta.Nil(voted) 342 | } 343 | ta.Equal(c.want.err, errors.Cause(err)) 344 | ta.Equal(c.want.higherTerm, higher) 345 | }) 346 | } 347 | } 348 | 349 | func TestTRaft_query(t *testing.T) { 350 | 351 | ta := require.New(t) 352 | 353 | ids := []int64{1} 354 | id1 := int64(1) 355 | lid := NewLeaderId 356 | 357 | ts := newCluster(ids) 358 | 359 | t1 := ts[0] 360 | t1.initTraft(lid(1, 2), lid(3, 4), []int64{5}, nil, nil, lid(2, id1)) 361 | 362 | startCluster(ts) 363 | defer stopAll(ts) 364 | 365 | got := t1.query(func() interface{} { 366 | return ExportLogStatus(t1.Status[t1.Id]) 367 | }).v.(*LogStatus) 368 | ta.Equal("001#002", got.Committer.ShortStr()) 369 | ta.Equal("0:20", got.Accepted.ShortStr()) 370 | } 371 | 372 | func stopAll(ts []*TRaft) { 373 | for _, s := range ts { 374 | s.Stop() 375 | } 376 | } 377 | 378 | func readMsg(ts []*TRaft) string { 379 | 380 | // var msg string 381 | // select { 382 | // case msg = <-ts[0].MsgCh: 383 | // case msg = <-ts[1].MsgCh: 384 | // case msg = <-ts[2].MsgCh: 385 | // case <-time.After(time.Second): 386 | // panic("timeout") 387 | // } 388 | 389 | // n TRaft and a timeout 390 | cases := make([]reflect.SelectCase, len(ts)+1) 391 | for i, t := range ts { 392 | cases[i] = reflect.SelectCase{Dir: reflect.SelectRecv, Chan: reflect.ValueOf(t.MsgCh)} 393 | } 394 | cases[len(ts)] = reflect.SelectCase{Dir: reflect.SelectRecv, Chan: reflect.ValueOf(time.After(time.Second))} 395 | 396 | chosen, value, ok := reflect.Select(cases) 397 | // ok will be true if the channel has not been closed. 398 | if chosen == len(ts) { 399 | panic("timeout") 400 | } 401 | 402 | _ = ok 403 | 404 | msg := value.String() 405 | return msg 406 | } 407 | 408 | // waiting for expected message substring to present n times. 409 | func waitForMsg(ts []*TRaft, msgs map[string]int) { 410 | for { 411 | msg := readMsg(ts) 412 | for s, _ := range msgs { 413 | if strings.Contains(msg, s) { 414 | msgs[s]-- 415 | lg.Infow("got-msg", "msg", msg) 416 | } 417 | } 418 | 419 | all0 := true 420 | for _, n := range msgs { 421 | all0 = all0 && n == 0 422 | } 423 | 424 | lg.Infow("require-msg", "msgs", msgs) 425 | 426 | if all0 { 427 | return 428 | } 429 | } 430 | } 431 | 432 | func waitForAnyMsg(ts []*TRaft, msgs []string, total int) []string { 433 | 434 | rst := []string{} 435 | 436 | for { 437 | msg := readMsg(ts) 438 | for _, s := range msgs { 439 | if strings.Contains(msg, s) { 440 | total-- 441 | rst = append(rst, msg) 442 | lg.Infow("got-msg", "msg", msg) 443 | } 444 | } 445 | 446 | lg.Infow("require-msg", "msgs", msgs, "total", total) 447 | 448 | if total == 0 { 449 | return rst 450 | } 451 | } 452 | } 453 | 454 | func findLeader(ts []*TRaft) int64 { 455 | votes := make([]int, len(ts)) 456 | for i, t := range ts { 457 | id := t.Status[int64(i)].VotedFor.Id 458 | votes[id]++ 459 | if votes[id] > len(ts)/2 { 460 | // TODO joint consensus 461 | return id 462 | } 463 | } 464 | return -1 465 | } 466 | 467 | func TestTRaft_VoteLoop(t *testing.T) { 468 | 469 | lid := NewLeaderId 470 | bm := NewTailBitmap 471 | 472 | withCluster(t, "emptyVoters/candidate-1", 473 | []int64{0, 1, 2}, 474 | func(t *testing.T, ts []*TRaft) { 475 | ta := require.New(t) 476 | 477 | ts[2].Stop() 478 | 479 | go ts[0].ElectLoop() 480 | 481 | waitForMsg(ts, map[string]int{ 482 | "vote-win 001#000": 1, 483 | }) 484 | 485 | ta.Equal(lid(1, 0), ts[0].Status[0].VotedFor) 486 | ta.InDelta(uSecondI64()+leaderLease, 487 | ts[0].Status[0].VoteExpireAt, 1000*1000*1000) 488 | 489 | ta.Equal(lid(1, 0), ts[1].Status[1].VotedFor) 490 | ta.InDelta(uSecondI64()+leaderLease, 491 | ts[1].Status[1].VoteExpireAt, 1000*1000*1000) 492 | }) 493 | 494 | withCluster(t, "emptyVoters/candidate-2", 495 | []int64{0, 1, 2}, 496 | func(t *testing.T, ts []*TRaft) { 497 | ta := require.New(t) 498 | 499 | go ts[1].ElectLoop() 500 | waitForMsg(ts, map[string]int{ 501 | "vote-win 001#001": 1, 502 | }) 503 | 504 | ta.Equal(lid(1, 1), ts[1].Status[1].VotedFor) 505 | 506 | ta.InDelta(uSecondI64()+leaderLease, 507 | ts[1].Status[1].VoteExpireAt, 1000*1000*1000) 508 | }) 509 | 510 | withCluster(t, "emptyVoters/candidate-12", 511 | []int64{0, 1, 2}, 512 | func(t *testing.T, ts []*TRaft) { 513 | 514 | go ts[0].ElectLoop() 515 | go ts[1].ElectLoop() 516 | 517 | // only one succ to elect. 518 | // In 1 second, there wont be another winning election. 519 | waitForMsg(ts, map[string]int{ 520 | "vote-win 001#001": 1, 521 | }) 522 | }) 523 | 524 | withCluster(t, "emptyVoters/candidate-123", 525 | []int64{0, 1, 2}, 526 | func(t *testing.T, ts []*TRaft) { 527 | 528 | ta := require.New(t) 529 | 530 | go ts[0].ElectLoop() 531 | go ts[1].ElectLoop() 532 | go ts[2].ElectLoop() 533 | 534 | // only one succ to elect. 535 | // In 1 second, there wont be another winning election. 536 | got := waitForAnyMsg(ts, []string{ 537 | "vote-win", 538 | "vote-fail", 539 | }, 3) 540 | 541 | winner := findLeader(ts) 542 | 543 | ta.Contains(strings.Join(got, ";"), 544 | fmt.Sprintf("Id=%d vote-win", winner)) 545 | }) 546 | 547 | withCluster(t, "id2MaxCommitter", 548 | []int64{0, 1, 2}, 549 | func(t *testing.T, ts []*TRaft) { 550 | ts[0].initTraft0(lid(2, 1), lid(4, 0), "x=1") 551 | ts[1].initTraft0(lid(3, 2), lid(4, 1), "x=1") 552 | ts[2].initTraft0(lid(1, 3), lid(4, 2), "x=1") 553 | 554 | go ts[0].ElectLoop() 555 | go ts[1].ElectLoop() 556 | go ts[2].ElectLoop() 557 | 558 | // only one succ to elect. 559 | // In 1 second, there wont be another winning election. 560 | waitForMsg(ts, map[string]int{ 561 | "vote-win 005#001": 1, 562 | "vote-fail": 2, 563 | }) 564 | }) 565 | 566 | withCluster(t, "id1MaxLog", 567 | []int64{0, 1, 2, 3, 4}, 568 | func(t *testing.T, ts []*TRaft) { 569 | // we need 5 replica to collect different log from 2 replica 570 | ta := require.New(t) 571 | _ = ta 572 | 573 | // R0 0.2 Committer: 2-0 574 | // R1 0...4 Committer: 3-1 575 | // R2 n..3 Committer: 1-2 576 | ts[0].initTraft(lid(2, 0), lid(1, 1), []int64{0, 2}, nil, nil, lid(4, 0)) 577 | ts[1].initTraft(lid(3, 1), lid(1, 1), []int64{0, 4}, nil, nil, lid(4, 1)) 578 | ts[2].initTraft(lid(1, 2), lid(2, 1), []int64{0, 3}, nil, []int64{0}, lid(4, 2)) 579 | // ts[3].initTraft(lid(1, 2), lid(1, 1), []int64{0, 2, 3}, nil, nil, lid(0, 3)) 580 | // ts[4].initTraft(lid(1, 2), lid(1, 1), []int64{0, 2, 3}, nil, nil, lid(0, 4)) 581 | 582 | ts[3].Stop() 583 | ts[4].Stop() 584 | 585 | ts[1].Status[1].VotedFor = lid(3, 1) 586 | go ts[1].ElectLoop() 587 | 588 | // only one succ to elect. 589 | // In 1 second, there wont be another winning election. 590 | waitForMsg(ts, map[string]int{ 591 | "vote-win 005#001": 1, 592 | }) 593 | 594 | ta.Equal( 595 | join("[<001#001:000{set(x, 0)}-0→0>", 596 | "<>", 597 | "<>", 598 | "<>", 599 | "<001#001:004{set(x, 4)}-0→0>]"), 600 | RecordsShortStr(ts[1].Logs, ""), 601 | ) 602 | 603 | ta.Equal(lid(5, 1), ts[1].Status[1].Committer) 604 | ta.Equal(bm(0, 0, 4), ts[1].Status[1].Accepted) 605 | ta.Equal(bm(0), ts[1].Status[1].Committed) 606 | 607 | ta.Equal(lid(2, 0), ts[1].Status[0].Committer) 608 | // using Equal to avoid comparison between nil and []int64{} 609 | ta.True(bm(0).Equal(ts[1].Status[0].Accepted)) 610 | ta.True(bm(0).Equal(ts[1].Status[0].Committed)) 611 | 612 | ta.Equal(lid(1, 2), ts[1].Status[2].Committer) 613 | // reduced Accepted to Committed 614 | ta.Equal(bm(0, 0), ts[1].Status[2].Accepted) 615 | ta.Equal(bm(0, 0), ts[1].Status[2].Committed) 616 | }) 617 | 618 | withCluster(t, "id1LeaderMergeOverrides", 619 | []int64{0, 1, 2}, 620 | func(t *testing.T, ts []*TRaft) { 621 | ta := require.New(t) 622 | _ = ta 623 | 624 | // R0 .1.3 Committer: 2-0; 3 overrides 1 625 | // R1 Committer: 3-1 626 | ts[0].initTraft0(lid(3, 1), lid(4, 1)) 627 | ts[1].initTraft0(lid(3, 1), lid(4, 1)) 628 | // ts[2].initTraft0(lid(1, 2), lid(4, 2)) 629 | 630 | ts[2].Stop() 631 | 632 | ts[0].addLogs(nil, nil, nil, "x=1") 633 | ts[0].Logs[3].Overrides = bm(0, 1, 3) 634 | ts[0].Status[0].Accepted = bm(0, 1, 3) 635 | 636 | ts[1].addLogs(nil, nil, nil, nil, "y=1") 637 | ts[1].Logs[4].Overrides = bm(0, 4) 638 | ts[1].Status[1].Accepted = bm(0, 4) 639 | 640 | go ts[1].ElectLoop() 641 | 642 | // only one succ to elect. 643 | // In 1 second, there wont be another winning election. 644 | waitForMsg(ts, map[string]int{ 645 | "vote-win 005#001": 1, 646 | }) 647 | 648 | ta.Equal( 649 | join("[", 650 | "<>", 651 | "<>", 652 | "<>", 653 | "<004#001:003{set(x, 1)}-0:a→0>", 654 | "<004#001:004{set(y, 1)}-0:10→0>", 655 | "]"), 656 | RecordsShortStr(ts[1].Logs, ""), 657 | ) 658 | 659 | ta.Equal(lid(5, 1), ts[1].Status[1].Committer) 660 | ta.Equal(bm(0, 1, 3, 4), ts[1].Status[1].Accepted) 661 | }) 662 | } 663 | 664 | func TestTRaft_Propose(t *testing.T) { 665 | 666 | lid := NewLeaderId 667 | bm := NewTailBitmap 668 | 669 | sendPropose := func(addr string, xcmd interface{}) *ProposeReply { 670 | cmd := toCmd(xcmd) 671 | var reply *ProposeReply 672 | rpcTo(addr, func(cli TRaftClient, ctx context.Context) { 673 | var err error 674 | reply, err = cli.Propose(ctx, cmd) 675 | if err != nil { 676 | lg.Infow("err:", "err", err) 677 | } 678 | }) 679 | return reply 680 | } 681 | 682 | withCluster(t, "invalidLeader", 683 | []int64{0, 1, 2}, 684 | func(t *testing.T, ts []*TRaft) { 685 | ta := require.New(t) 686 | 687 | ts[0].initTraft(lid(2, 0), lid(1, 1), []int64{}, nil, nil, lid(2, 0)) 688 | ts[1].initTraft(lid(3, 1), lid(1, 1), []int64{}, nil, nil, lid(3, 1)) 689 | ts[2].initTraft(lid(1, 2), lid(2, 1), []int64{}, nil, []int64{0}, lid(1, 2)) 690 | 691 | mems := ts[1].Config.Members 692 | 693 | // no leader elected, not allow to propose 694 | reply := sendPropose(mems[1].Addr, NewCmdI64("foo", "x", 1)) 695 | ta.Equal(&ProposeReply{ 696 | OK: false, 697 | Err: "vote expired", 698 | OtherLeader: nil, 699 | }, reply) 700 | 701 | // elect ts[1] 702 | go ts[1].ElectLoop() 703 | 704 | waitForMsg(ts, map[string]int{ 705 | "vote-win 004#001": 1, 706 | }) 707 | 708 | // send to non-leader replica: 709 | reply = sendPropose(mems[0].Addr, NewCmdI64("foo", "x", 1)) 710 | ta.Equal(&ProposeReply{ 711 | OK: false, 712 | Err: "I am not leader", 713 | OtherLeader: lid(4, 1)}, reply) 714 | }) 715 | 716 | withCluster(t, "succ", 717 | []int64{0, 1, 2}, 718 | func(t *testing.T, ts []*TRaft) { 719 | 720 | ta := require.New(t) 721 | 722 | ts[0].initTraft(lid(2, 0), lid(1, 1), []int64{}, nil, nil, lid(3, 0)) 723 | ts[1].initTraft(lid(3, 1), lid(1, 1), []int64{}, nil, nil, lid(3, 1)) 724 | ts[2].initTraft(lid(1, 2), lid(2, 1), []int64{}, nil, []int64{0}, lid(3, 2)) 725 | 726 | mems := ts[1].Config.Members 727 | 728 | // elect ts[1] 729 | go ts[1].ElectLoop() 730 | 731 | waitForMsg(ts, map[string]int{ 732 | "vote-win 004#001": 1, 733 | }) 734 | 735 | // TODO check state of other replicas 736 | 737 | // succ to propsoe 738 | reply := sendPropose(mems[1].Addr, "y=1") 739 | ta.Equal(&ProposeReply{OK: true}, reply) 740 | 741 | ta.Equal(bm(1), ts[1].Status[1].Accepted) 742 | ta.Equal(bm(1), ts[1].Status[1].Committed) 743 | ta.Equal( 744 | join("[<004#001:000{set(y, 1)}-0:1→0>", "]"), 745 | RecordsShortStr(ts[1].Logs, ""), 746 | ) 747 | 748 | reply = sendPropose(mems[1].Addr, "y=2") 749 | ta.Equal(&ProposeReply{OK: true, OtherLeader: nil}, reply) 750 | 751 | ta.Equal(bm(2), ts[1].Status[1].Accepted) 752 | ta.Equal(bm(2), ts[1].Status[1].Committed) 753 | ta.Equal( 754 | join("[<004#001:000{set(y, 1)}-0:1→0>", 755 | "<004#001:001{set(y, 2)}-0:3→0>", 756 | "]"), 757 | RecordsShortStr(ts[1].Logs, ""), 758 | ) 759 | 760 | reply = sendPropose(mems[1].Addr, "x=3") 761 | ta.Equal(&ProposeReply{OK: true, OtherLeader: nil}, reply) 762 | 763 | ta.Equal(bm(3), ts[1].Status[1].Accepted) 764 | ta.Equal( 765 | join("[<004#001:000{set(y, 1)}-0:1→0>", 766 | "<004#001:001{set(y, 2)}-0:3→0>", 767 | "<004#001:002{set(x, 3)}-0:4→0>", 768 | "]"), 769 | RecordsShortStr(ts[1].Logs, ""), 770 | ) 771 | }) 772 | } 773 | 774 | func TestTRaft_AddLog_nil(t *testing.T) { 775 | 776 | ta := require.New(t) 777 | 778 | id := int64(1) 779 | tr := NewTRaft(id, map[int64]string{id: "123"}) 780 | 781 | tr.addLogs("x=1", "y=1", nil, "x=1") 782 | 783 | ta.Equal(join( 784 | "[<000#001:000{set(x, 1)}-0:1→0>", 785 | "<000#001:001{set(y, 1)}-0:2→0>", 786 | "<>", 787 | "<000#001:003{set(x, 1)}-0:9→0>]"), RecordsShortStr(tr.Logs, "")) 788 | } 789 | 790 | func TestTRaft_AddLog(t *testing.T) { 791 | 792 | ta := require.New(t) 793 | 794 | id := int64(1) 795 | tr := NewTRaft(id, map[int64]string{id: "123"}) 796 | 797 | tr.AddLog(NewCmdI64("set", "x", 1)) 798 | ta.Equal("[<000#001:000{set(x, 1)}-0:1→0>]", RecordsShortStr(tr.Logs)) 799 | 800 | tr.AddLog(NewCmdI64("set", "y", 1)) 801 | ta.Equal(join( 802 | "[<000#001:000{set(x, 1)}-0:1→0>", 803 | "<000#001:001{set(y, 1)}-0:2→0>]"), RecordsShortStr(tr.Logs, "")) 804 | 805 | tr.AddLog(NewCmdI64("set", "x", 1)) 806 | ta.Equal(join( 807 | "[<000#001:000{set(x, 1)}-0:1→0>", 808 | "<000#001:001{set(y, 1)}-0:2→0>", 809 | "<000#001:002{set(x, 1)}-0:5→0>]"), RecordsShortStr(tr.Logs, "")) 810 | 811 | varnames := "wxyz" 812 | 813 | for i := 0; i < 67; i++ { 814 | vi := i % len(varnames) 815 | tr.AddLog(NewCmdI64("set", varnames[vi:vi+1], int64(i))) 816 | } 817 | l := len(tr.Logs) 818 | ta.Equal("<000#001:069{set(y, 66)}-0:2222222222222222:22→0>", tr.Logs[l-1].ShortStr()) 819 | 820 | // truncate some logs, then add another 67 821 | // To check Overrides and Depends 822 | 823 | tr.LogOffset = 65 824 | tr.Logs = tr.Logs[65:] 825 | 826 | for i := 0; i < 67; i++ { 827 | vi := i % len(varnames) 828 | tr.AddLog(NewCmdI64("set", varnames[vi:vi+1], 100+int64(i))) 829 | } 830 | l = len(tr.Logs) 831 | ta.Equal("<000#001:136{set(y, 166)}-64:1111111111111122:111→64:1>", tr.Logs[l-1].ShortStr()) 832 | 833 | } 834 | -------------------------------------------------------------------------------- /errors.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | import "github.com/pkg/errors" 4 | 5 | var ( 6 | ErrStaleLog = errors.New("local log is stale") 7 | ErrStaleTermId = errors.New("local Term-Id is stale") 8 | ErrTimeout = errors.New("timeout") 9 | ErrLeaderLost = errors.New("leadership lost") 10 | ErrNeedElect = errors.New("no leader found, need to elect") 11 | ) 12 | -------------------------------------------------------------------------------- /gen-proto.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # protoc --proto_path=. --go_out=plugins=grpc:. traft.proto 4 | # protoc --proto_path=. --gofast_out=plugins=grpc:. traft.proto 5 | 6 | 7 | # go get github.com/gogo/protobuf/protoc-gen-gogofast 8 | # go get github.com/gogo/protobuf/protoc-gen-gogofaster 9 | # go get github.com/gogo/protobuf/protoc-gen-gogoslick 10 | 11 | protoc -I=. \ 12 | -I=$GOPATH/src \ 13 | -I=$GOPATH/src/github.com/gogo/protobuf/protobuf \ 14 | --gogofaster_out=plugins=grpc:. \ 15 | traft.proto 16 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/openacid/traft 2 | 3 | go 1.14 4 | 5 | require ( 6 | github.com/gogo/protobuf v1.3.2 7 | github.com/golang/protobuf v1.4.3 8 | github.com/kr/pretty v0.2.1 9 | github.com/openacid/low v0.1.22-0.20210209151724-95ca9483dbbb 10 | github.com/pkg/errors v0.9.1 11 | github.com/stretchr/testify v1.7.0 12 | go.uber.org/multierr v1.6.0 // indirect 13 | go.uber.org/zap v1.16.0 14 | google.golang.org/grpc v1.27.0 15 | google.golang.org/protobuf v1.25.0 16 | ) 17 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= 2 | github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= 3 | github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= 4 | github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= 5 | github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= 6 | github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= 7 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 8 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 9 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 10 | github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= 11 | github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= 12 | github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= 13 | github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= 14 | github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= 15 | github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= 16 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 17 | github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg= 18 | github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 19 | github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 20 | github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= 21 | github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= 22 | github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= 23 | github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= 24 | github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= 25 | github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= 26 | github.com/golang/protobuf v1.4.3 h1:JjCZWpVbqXDqFVmTfYWEVTMIYrL/NPdPSCHPJ0T/raM= 27 | github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= 28 | github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= 29 | github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= 30 | github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= 31 | github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 32 | github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 33 | github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= 34 | github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= 35 | github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= 36 | github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= 37 | github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI= 38 | github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= 39 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 40 | github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= 41 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 42 | github.com/openacid/errors v0.8.1/go.mod h1:GUQEJJOJE3W9skHm8E8Y4phdl2LLEN8iD7c5gcGgdx0= 43 | github.com/openacid/low v0.1.22-0.20210209151724-95ca9483dbbb h1:II/fUVgcmT9iD94uquwb/pVUMbEm83SzYdeldAnTE/c= 44 | github.com/openacid/low v0.1.22-0.20210209151724-95ca9483dbbb/go.mod h1:KbBlxORT7soCdBGWfYoUsipHkG4vRKgm54uaBf222co= 45 | github.com/openacid/must v0.1.3/go.mod h1:luPiXCuJlEo3UUFQngVQokV0MPGryeYvtCbQPs3U1+I= 46 | github.com/openacid/testkeys v0.1.7/go.mod h1:MfA7cACzBpbiwekivj8StqX0WIRmqlMsci1c37CA3Do= 47 | github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 48 | github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= 49 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 50 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 51 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 52 | github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= 53 | github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= 54 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 55 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 56 | github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= 57 | github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 58 | github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= 59 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 60 | github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= 61 | github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= 62 | go.uber.org/atomic v1.6.0 h1:Ezj3JGmsOnG1MoRWQkPBsKLe9DwWD9QeXzTRzzldNVk= 63 | go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= 64 | go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw= 65 | go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= 66 | go.uber.org/multierr v1.5.0 h1:KCa4XfM8CWFCpxXRGok+Q0SS/0XBhMDbHHGABQLvD2A= 67 | go.uber.org/multierr v1.5.0/go.mod h1:FeouvMocqHpRaaGuG9EjoKcStLC43Zu/fmqdUMPcKYU= 68 | go.uber.org/multierr v1.6.0 h1:y6IPFStTAIT5Ytl7/XYmHvzXQ7S3g/IeZW9hyZ5thw4= 69 | go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= 70 | go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA= 71 | go.uber.org/zap v1.16.0 h1:uFRZXykJGK9lLY4HtgSw44DnIcAM+kRBP7x5m+NpAOM= 72 | go.uber.org/zap v1.16.0/go.mod h1:MA8QOfq0BHJwdXa996Y4dYkAqRKB8/1K1QMMZVaNZjQ= 73 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 74 | golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 75 | golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 76 | golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= 77 | golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 78 | golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= 79 | golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= 80 | golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= 81 | golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= 82 | golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= 83 | golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 84 | golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 85 | golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 86 | golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 87 | golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 88 | golang.org/x/net v0.0.0-20190311183353-d8887717615a h1:oWX7TPOiFAMXLq8o0ikBYfCJVlRHBcsciT5bXOrH628= 89 | golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 90 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 91 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 92 | golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 93 | golang.org/x/net v0.0.0-20201021035429-f5854403a974 h1:IX6qOQeG5uLjB/hjjwjedwfjND0hgjPMMyO1RoIXQNI= 94 | golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= 95 | golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= 96 | golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 97 | golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 98 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 99 | golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 100 | golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 101 | golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 102 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 103 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 104 | golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 105 | golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= 106 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 107 | golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k= 108 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 109 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 110 | golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 111 | golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= 112 | golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= 113 | golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= 114 | golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= 115 | golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 116 | golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 117 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 118 | golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= 119 | golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= 120 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 121 | golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 122 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 123 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 124 | google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= 125 | google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= 126 | google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= 127 | google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= 128 | google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 h1:+kGHl1aib/qcwaRi1CbqBZ1rk19r85MNUf8HaBghugY= 129 | google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= 130 | google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= 131 | google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= 132 | google.golang.org/grpc v1.27.0 h1:rRYRFMVgRv6E0D70Skyfsr28tDXIuuPZyWGMPdMcnXg= 133 | google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= 134 | google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= 135 | google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= 136 | google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= 137 | google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= 138 | google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= 139 | google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= 140 | google.golang.org/protobuf v1.23.0 h1:4MY060fB1DLGMB/7MBTLnwQUY6+F09GEiz6SsrNqyzM= 141 | google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= 142 | google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= 143 | google.golang.org/protobuf v1.25.0 h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c= 144 | google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= 145 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 146 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 147 | gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= 148 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 149 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= 150 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 151 | honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= 152 | honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= 153 | honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= 154 | -------------------------------------------------------------------------------- /internal_api.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | import "context" 4 | 5 | func (tr *TRaft) Elect(ctx context.Context, req *ElectReq) (*ElectReply, error) { 6 | var reply *ElectReply 7 | rst := tr.query( func() error { 8 | reply= tr.hdlElectReq(req) 9 | return nil 10 | }) 11 | _ = rst 12 | return reply,nil 13 | } 14 | 15 | func (tr *TRaft) LogForward(ctx context.Context, req *LogForwardReq) (*LogForwardReply, error) { 16 | 17 | // TODO: if a newer committer is seen, non-committed logs 18 | // can be sure to stale and should be cleaned. 19 | 20 | var reply *LogForwardReply 21 | rst := tr.query( func() error { 22 | reply = tr.hdlLogForward(req) 23 | return nil 24 | }) 25 | _ = rst 26 | return reply, nil 27 | } 28 | 29 | func (tr *TRaft) Propose(ctx context.Context, cmd *Cmd) (*ProposeReply, error) { 30 | 31 | finCh := make(chan *ProposeReply, 1) 32 | 33 | rst := tr.query( func() error { 34 | tr.hdlPropose(cmd, finCh) 35 | return nil 36 | }) 37 | _ = rst 38 | 39 | lg.Infow("waitingFor:finCh") 40 | reply := <-finCh 41 | lg.Infow("got:finCh", "reply", reply) 42 | 43 | return reply, nil 44 | } 45 | -------------------------------------------------------------------------------- /leaderid.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | import ( 4 | fmt "fmt" 5 | 6 | proto "github.com/gogo/protobuf/proto" 7 | ) 8 | 9 | func NewLeaderId(term, id int64) *LeaderId { 10 | return &LeaderId{ 11 | Term: term, 12 | Id: id, 13 | } 14 | } 15 | 16 | // Compare two leader id and returns 1, 0 or -1 for greater, equal and less 17 | func (a *LeaderId) Cmp(b *LeaderId) int { 18 | if a == nil { 19 | a = &LeaderId{} 20 | } 21 | if b == nil { 22 | b = &LeaderId{} 23 | } 24 | 25 | r := cmpI64(a.Term, b.Term) 26 | if r != 0 { 27 | return r 28 | } 29 | 30 | return cmpI64(a.Id, b.Id) 31 | } 32 | 33 | func (l *LeaderId) Clone() *LeaderId { 34 | return proto.Clone(l).(*LeaderId) 35 | } 36 | 37 | func (l *LeaderId) ShortStr() string { 38 | if l == nil { 39 | return "000#000" 40 | } 41 | return fmt.Sprintf("%03d#%03d", l.Term, l.Id) 42 | } 43 | -------------------------------------------------------------------------------- /leaderid_test.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/require" 7 | ) 8 | 9 | func TestLeaderId_Cmp(t *testing.T) { 10 | 11 | ta := require.New(t) 12 | 13 | cases := []struct { 14 | a, b *LeaderId 15 | want int 16 | }{ 17 | {a: NewLeaderId(1, 1), b: NewLeaderId(1, 1), want: 0}, 18 | {a: NewLeaderId(1, 2), b: NewLeaderId(1, 1), want: 1}, 19 | {a: NewLeaderId(1, 0), b: NewLeaderId(1, 1), want: -1}, 20 | {a: NewLeaderId(2, 0), b: NewLeaderId(1, 1), want: 1}, 21 | {a: NewLeaderId(0, 0), b: NewLeaderId(1, 1), want: -1}, 22 | 23 | {a: NewLeaderId(0, 0), b: nil, want: 0}, 24 | {a: nil, b: NewLeaderId(1, 1), want: -1}, 25 | {a: nil, b: nil, want: 0}, 26 | } 27 | 28 | for i, c := range cases { 29 | got := c.a.Cmp(c.b) 30 | ta.Equal(c.want, got, "%d-th: case: %+v", i+1, c) 31 | } 32 | } 33 | 34 | func TestLeaderId_Clone(t *testing.T) { 35 | 36 | ta := require.New(t) 37 | 38 | a := NewLeaderId(1, 2) 39 | b := a.Clone() 40 | a.Term = 3 41 | a.Id = 4 42 | ta.Equal(int64(1), b.Term) 43 | ta.Equal(int64(2), b.Id) 44 | } 45 | -------------------------------------------------------------------------------- /log.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | var ( 4 | emptyRecord = &LogRecord{} 5 | ) 6 | 7 | // a func for test purpose only 8 | func (tr *TRaft) addLogs(cmds ...interface{}) { 9 | me := tr.Status[tr.Id] 10 | for _, cs := range cmds { 11 | cmd := toCmd(cs) 12 | r := tr.addLogInternal(cmd) 13 | me.Accepted.Union(r.Overrides) 14 | } 15 | } 16 | 17 | // Only a established leader should use this func. 18 | // no lock protection, must be called from Loop() 19 | func (tr *TRaft) AddLog(cmd *Cmd) *LogRecord { 20 | 21 | me := tr.Status[tr.Id] 22 | 23 | if me.VotedFor.Id != tr.Id { 24 | panic("wtf") 25 | } 26 | 27 | return tr.addLogInternal(cmd) 28 | } 29 | 30 | func (tr *TRaft) GetLog(lsn int64) *LogRecord { 31 | idx := lsn - tr.LogOffset 32 | r := tr.Logs[idx] 33 | if r.Seq != lsn { 34 | panic("wtf") 35 | } 36 | return r 37 | } 38 | 39 | func (tr *TRaft) addLogInternal(cmd *Cmd) *LogRecord { 40 | 41 | me := tr.Status[tr.Id] 42 | 43 | lsn := tr.LogOffset + int64(len(tr.Logs)) 44 | 45 | r := NewRecord(me.VotedFor.Clone(), lsn, cmd) 46 | 47 | // find the first interfering record. 48 | 49 | var i int 50 | for i = len(tr.Logs) - 1; i >= 0; i-- { 51 | prev := tr.Logs[i] 52 | if r.Interfering(prev) { 53 | r.Overrides = prev.Overrides.Clone() 54 | break 55 | } 56 | } 57 | 58 | if i == -1 { 59 | // there is not a interfering record. 60 | r.Overrides = NewTailBitmap(0) 61 | } 62 | 63 | r.Overrides.Set(lsn) 64 | 65 | // all log I do not know must be executed in order. 66 | // Because I do not know of the intefering relations. 67 | r.Depends = NewTailBitmap(tr.LogOffset) 68 | 69 | // reduce bitmap size by removing unknown logs 70 | r.Overrides.Union(NewTailBitmap(tr.LogOffset & ^63)) 71 | 72 | tr.Logs = append(tr.Logs, r) 73 | 74 | return r 75 | } 76 | -------------------------------------------------------------------------------- /logforward.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/pkg/errors" 7 | ) 8 | 9 | // the result of forwarding logs from leader to follower 10 | type logForwardRst struct { 11 | from *ReplicaInfo 12 | reply *LogForwardReply 13 | err error 14 | } 15 | 16 | // forward log from leader to follower concurrently 17 | func (tr *TRaft) forwardLog( 18 | committer *LeaderId, 19 | config *Cluster, 20 | logs []*LogRecord, 21 | callback func(*logForwardRst), 22 | ) { 23 | 24 | lsns := []int64{logs[0].Seq, logs[len(logs)-1].Seq + 1} 25 | lg.Infow("forward", "LSNs", lsns, "cmtr", committer) 26 | 27 | req := &LogForwardReq{ 28 | Committer: committer, 29 | Logs: logs, 30 | } 31 | 32 | id := tr.Id 33 | 34 | // TODO 35 | timeout := time.Second 36 | sess := rpcToAll(id, config, meth.LogForward, req, timeout) 37 | 38 | for res := range sess.resCh { 39 | 40 | lg.Infow("logforward:recv-reply", "res", res) 41 | 42 | if sess.updateOKBitmap(res) { 43 | 44 | rst := tr.query(func() error { 45 | return tr.leaderUpdateCommitted( 46 | committer, lsns, 47 | ) 48 | }) 49 | 50 | if rst.err == nil { 51 | lg.Infow("forward:a-quorum-done") 52 | callback(&logForwardRst{}) 53 | } else { 54 | // TODO let the root cause to generate the error 55 | callback(&logForwardRst{ 56 | err: errors.Wrapf(rst.err, "forward"), 57 | }) 58 | } 59 | // LogForward does not cancel, try best to send logs to followers. 60 | return 61 | } 62 | } 63 | 64 | lg.Infow("forward:timeout", "cmtr", committer.ShortStr()) 65 | callback(&logForwardRst{ 66 | err: errors.Wrapf(ErrTimeout, "forward"), 67 | }) 68 | } 69 | 70 | // hdlLogForward handles LogForward request on a follower 71 | // LogForward is similar to paxos-phase-2. 72 | func (tr *TRaft) hdlLogForward(req *LogForwardReq) *LogForwardReply { 73 | me := tr.Status[tr.Id] 74 | now := uSecondI64() 75 | 76 | lg.Infow("hdl-logforward", "req", req) 77 | lg.Infow("hdl-logforward", "me", me) 78 | 79 | cr := req.Committer.Cmp(me.VotedFor) 80 | 81 | // If req.Committer > me.VotedFor, it is a valid leader too. 82 | // It is safe to accept its log. 83 | // This is a common optimization of paxos: an Acceptor accepts request if rnd >= lastrnd. 84 | // See: https://blog.openacid.com/algo/paxos/#slide-42 85 | 86 | if cr < 0 && now < me.VoteExpireAt { 87 | lg.Infow("hdl-logforward: illegal committer", 88 | "req.Commiter", req.Committer, 89 | "me.VotedFor", me.VotedFor, 90 | "me.VoteExpireAt-now", me.VoteExpireAt-now) 91 | 92 | return &LogForwardReply{ 93 | OK: false, 94 | VotedFor: me.VotedFor.Clone(), 95 | } 96 | } 97 | 98 | if cr > 0 { 99 | me.VotedFor = req.Committer.Clone() 100 | me.VoteExpireAt = now + leaderLease 101 | } 102 | 103 | // TODO apply req.Committed 104 | 105 | cr = req.Committer.Cmp(me.Committer) 106 | if cr > 0 { 107 | lg.Infow("hdl-log-forward: newer committer", 108 | "req.Committer", req.Committer, 109 | "me.Committer", me.Committer, 110 | ) 111 | 112 | // if req.Committer is newer, discard all non-committed logs 113 | // Because non-committed local log may have been overridden by some new leader. 114 | me.Accepted = me.Committed.Clone() 115 | 116 | i := len(tr.Logs) - 1 117 | for ; i >= 0; i-- { 118 | r := tr.Logs[i] 119 | if r.Empty() { 120 | continue 121 | } 122 | 123 | if me.Accepted.Get(r.Seq) == 0 { 124 | tr.Logs[i] = &LogRecord{} 125 | } 126 | } 127 | } 128 | 129 | // add new logs 130 | 131 | for _, r := range req.Logs { 132 | lsn := r.Seq 133 | idx := lsn - tr.LogOffset 134 | 135 | for int(idx) >= len(tr.Logs) { 136 | tr.Logs = append(tr.Logs, &LogRecord{}) 137 | } 138 | 139 | if me.Accepted.Get(lsn) != 0 { 140 | if !tr.Logs[idx].Empty() && !tr.Logs[idx].Equal(r) { 141 | panic("wtf") 142 | } 143 | } 144 | tr.Logs[idx] = r 145 | 146 | me.Accepted.Union(r.Overrides) 147 | 148 | lg.Infow("hdl-logforward", "accept-log", r) 149 | lg.Infow("hdl-logforward", "accepted", me.Accepted) 150 | } 151 | 152 | // TODO refine me 153 | // remove empty logs at top 154 | for len(tr.Logs) > 0 { 155 | l := len(tr.Logs) 156 | if tr.Logs[l-1].Empty() { 157 | tr.Logs = tr.Logs[:l-1] 158 | } else { 159 | break 160 | } 161 | } 162 | 163 | me.Committer = req.Committer.Clone() 164 | 165 | me.UpdatedCommitted(req.Committer, req.Committed) 166 | 167 | return &LogForwardReply{ 168 | OK: true, 169 | VotedFor: me.VotedFor.Clone(), 170 | Accepted: me.Accepted.Clone(), 171 | Committed: me.Committed.Clone(), 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /logforward_test.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | import ( 4 | context "context" 5 | fmt "fmt" 6 | "testing" 7 | "time" 8 | 9 | "github.com/stretchr/testify/require" 10 | ) 11 | 12 | func TestTRaft_LogForward(t *testing.T) { 13 | 14 | ta := require.New(t) 15 | _ = ta 16 | 17 | lid := NewLeaderId 18 | bm := NewTailBitmap 19 | 20 | sendLogForward := func(addr string, req *LogForwardReq) *LogForwardReply { 21 | var reply *LogForwardReply 22 | rpcTo(addr, func(cli TRaftClient, ctx context.Context) { 23 | var err error 24 | reply, err = cli.LogForward(ctx, req) 25 | if err != nil { 26 | lg.Infow("sendLogForward:err", "err", err) 27 | } 28 | }) 29 | return reply 30 | } 31 | 32 | logs := []*LogRecord{ 33 | NewRecordOverride(lid(5,1), 0, toCmd("x=0"), nil), 34 | NewRecordOverride(lid(5,1), 1, toCmd("y=1"), nil), 35 | NewRecordOverride(lid(5,1), 2, toCmd("x=2"), bm(1)), 36 | } 37 | 38 | sec1k := int64(time.Second * 1000) 39 | cases := []struct { 40 | name string 41 | to int64 42 | votedFor *LeaderId 43 | expire int64 44 | 45 | committer *LeaderId 46 | logs []*LogRecord 47 | committed *TailBitmap 48 | 49 | wantOK bool 50 | wantVotedFor *LeaderId 51 | wantAccepted *TailBitmap 52 | wantCommitted *TailBitmap 53 | wantLogs []string 54 | }{ 55 | {"unmatchedCommitter", 56 | 0, lid(3, 0), sec1k, 57 | lid(1, 2), logs[0:], nil, 58 | false, lid(3, 0), nil, nil, nil, 59 | }, 60 | {"accept/log2", 61 | 0, lid(3, 1), sec1k, 62 | lid(3, 1), logs[2:], nil, 63 | true, lid(3, 1), bm(0, 0, 2), bm(0), 64 | []string{ 65 | "<>", 66 | "<>", 67 | "<005#001:002{set(x, 2)}-0:5→0>", 68 | }, 69 | }, 70 | {"accept/log12", 71 | 0, lid(3, 1), sec1k, 72 | lid(3, 1), logs[1:], nil, 73 | true, lid(3, 1), bm(3), bm(0), 74 | []string{ 75 | "<>", 76 | "<005#001:001{set(y, 1)}-0:2→0>", 77 | "<005#001:002{set(x, 2)}-0:5→0>", 78 | }, 79 | }, 80 | {"accept/log12/overrideOld", 81 | 2, lid(3, 1), sec1k, 82 | lid(3, 1), logs[1:], nil, 83 | true, lid(3, 1), bm(3), bm(1), 84 | []string{ 85 | "<>", 86 | "<005#001:001{set(y, 1)}-0:2→0>", 87 | "<005#001:002{set(x, 2)}-0:5→0>", 88 | }, 89 | }, 90 | {"accept/log12/mergeCommitted", 91 | 2, lid(3, 1), sec1k, 92 | lid(3, 1), logs[1:], bm(0, 2, 3), 93 | true, lid(3, 1), bm(3), bm(0, 0, 2), 94 | []string{ 95 | "<>", 96 | "<005#001:001{set(y, 1)}-0:2→0>", 97 | "<005#001:002{set(x, 2)}-0:5→0>", 98 | }, 99 | }, 100 | {"accept/log12/overrideVotedFor", 101 | 2, lid(2, 1), sec1k, 102 | lid(3, 1), logs[1:], nil, 103 | true, lid(3, 1), bm(3), bm(1), 104 | []string{ 105 | "<>", 106 | "<005#001:001{set(y, 1)}-0:2→0>", 107 | "<005#001:002{set(x, 2)}-0:5→0>", 108 | }, 109 | }, 110 | {"accept/log12/overrideExpiredVotedFor", 111 | 2, lid(2, 1), -sec1k, 112 | lid(3, 1), logs[1:], nil, 113 | true, lid(3, 1), bm(3), bm(1), 114 | []string{ 115 | "<>", 116 | "<005#001:001{set(y, 1)}-0:2→0>", 117 | "<005#001:002{set(x, 2)}-0:5→0>", 118 | }, 119 | }, 120 | } 121 | 122 | for _, c := range cases { 123 | 124 | withCluster(t, 125 | fmt.Sprintf("%d-to-%d/%s", 1, c.to, c.name), 126 | []int64{0, 1, 2}, 127 | func(t *testing.T, ts []*TRaft) { 128 | ta := require.New(t) 129 | _ = ta 130 | 131 | ts[0].initTraft(lid(2, 0), lid(0, 1), []int64{}, nil, nil, lid(3, 0)) 132 | ts[1].initTraft(lid(3, 1), lid(0, 1), []int64{}, nil, nil, lid(5, 1)) 133 | ts[2].initTraft(lid(1, 2), lid(0, 1), []int64{}, nil, []int64{0}, lid(2, 2)) 134 | 135 | ts[0].addLogs() 136 | ts[1].addLogs("x=0", "y=1", "x=2") 137 | ts[2].addLogs("", "y=5") 138 | 139 | dst := ts[c.to].Status[c.to] 140 | dst.VotedFor = c.votedFor 141 | dst.VoteExpireAt = uSecondI64() + c.expire 142 | 143 | fmt.Println(ts[c.to].Node) 144 | ts[c.to].checkStatus() 145 | 146 | addr := ts[1].Config.Members[c.to].Addr 147 | repl := sendLogForward(addr, &LogForwardReq{ 148 | Committer: c.committer, 149 | Logs: c.logs, 150 | Committed: c.committed, 151 | }) 152 | 153 | ta.Equal(c.wantOK, repl.OK) 154 | 155 | ta.Equal(c.wantVotedFor, repl.VotedFor) 156 | if c.wantAccepted != nil { 157 | ta.Equal(c.wantAccepted, repl.Accepted.Normalize()) 158 | ta.Equal(c.wantAccepted, dst.Accepted.Normalize()) 159 | } 160 | 161 | if c.wantCommitted != nil { 162 | ta.Equal(c.wantCommitted, repl.Committed.Normalize()) 163 | ta.Equal(c.wantCommitted, dst.Committed.Normalize()) 164 | } 165 | 166 | if c.wantLogs != nil { 167 | ta.Equal("["+join(c.wantLogs...)+"]", 168 | RecordsShortStr(ts[c.to].Logs, "")) 169 | } 170 | 171 | }) 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /logging.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | import ( 4 | "encoding/json" 5 | 6 | "go.uber.org/zap" 7 | ) 8 | 9 | var ( 10 | llg = zap.NewNop() 11 | lg *zap.SugaredLogger 12 | ) 13 | 14 | func initLogging() { 15 | // if os.Getenv("CLUSTER_DEBUG") != "" { 16 | // } 17 | var err error 18 | // llg, err = zap.NewProduction() 19 | llg, err = zap.NewDevelopment() 20 | if err != nil { 21 | panic(err) 22 | } 23 | lg = llg.Sugar() 24 | 25 | // initZap() 26 | 27 | } 28 | 29 | func initZap() { 30 | rawJSON := []byte(`{ 31 | "level": "debug", 32 | "encoding": "json", 33 | "outputPaths": ["stdout", "/tmp/logs"], 34 | "errorOutputPaths": ["stderr"], 35 | "initialFields": {"foo": "bar"}, 36 | "encoderConfig": { 37 | "messageKey": "message", 38 | "levelKey": "level", 39 | "levelEncoder": "lowercase" 40 | } 41 | }`) 42 | 43 | var cfg zap.Config 44 | if err := json.Unmarshal(rawJSON, &cfg); err != nil { 45 | panic(err) 46 | } 47 | 48 | var err error 49 | llg, err = cfg.Build() 50 | if err != nil { 51 | panic(err) 52 | } 53 | defer llg.Sync() 54 | 55 | llg.Info("logger construction succeeded") 56 | 57 | lg = llg.Sugar() 58 | 59 | } 60 | -------------------------------------------------------------------------------- /mainloop.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | import fmt "fmt" 4 | 5 | type queryBody struct { 6 | arg interface{} 7 | rstCh chan *queryRst 8 | } 9 | 10 | type queryRst struct { 11 | v interface{} 12 | err error 13 | } 14 | 15 | // query the mainloop goroutine for something, by other goroutines, such as 16 | // update traft state or get some info. 17 | func (tr *TRaft) query(arg interface{}) *queryRst { 18 | rstCh := make(chan *queryRst) 19 | tr.actionCh <- &queryBody{arg, rstCh} 20 | rst := <-rstCh 21 | lg.Infow("chan-query", 22 | // "arg", arg, 23 | "rst.err", rst.err, 24 | "rst.v", toStr(rst.v)) 25 | return rst 26 | } 27 | 28 | // Loop handles actions from other components. 29 | // This is the only goroutine that is allowed to update traft state. 30 | // Any info to send out of this goroutine must be cloned. 31 | func (tr *TRaft) Loop() { 32 | 33 | for { 34 | select { 35 | case <-tr.shutdown: 36 | return 37 | case a := <-tr.actionCh: 38 | 39 | tr.checkStatus() 40 | 41 | switch f := a.arg.(type) { 42 | case func() error: 43 | err := f() 44 | a.rstCh <- &queryRst{err: err} 45 | case func() interface{}: 46 | v := f() 47 | a.rstCh <- &queryRst{v: v} 48 | default: 49 | panic("unknown func signature:" + fmt.Sprintf("%v", a.arg)) 50 | } 51 | 52 | tr.checkStatus() 53 | } 54 | } 55 | } 56 | 57 | // checkStatus checks if TRaft status violate consistency requirement. 58 | // This is just a routine for debug. 59 | func (tr *TRaft) checkStatus() { 60 | id := tr.Id 61 | me := tr.Status[id] 62 | 63 | // committer can never greater than voted leader 64 | if me.Committer.Cmp(me.VotedFor) > 0 { 65 | panic( 66 | fmt.Sprintf("Commiter > VotedFor: Id:%d %s %s", 67 | id, 68 | me.Committer.ShortStr(), 69 | me.VotedFor.ShortStr(), 70 | )) 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /propose.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | // request sent to Loop() to propose a cmd 4 | type proposeReq struct { 5 | cmd *Cmd 6 | finCh chan *ProposeReply 7 | } 8 | 9 | func (tr *TRaft) hdlPropose(cmd *Cmd, finCh chan<- *ProposeReply) { 10 | id := tr.Id 11 | me := tr.Status[id] 12 | now := uSecondI64() 13 | 14 | if now > me.VoteExpireAt { 15 | lg.Infow("hdl-propose:VoteExpired", "me.VoteExpireAt-now", me.VoteExpireAt-now) 16 | // no valid leader for now 17 | finCh <- &ProposeReply{ 18 | OK: false, 19 | Err: "vote expired", 20 | } 21 | lg.Infow("hdl-propose: returning") 22 | return 23 | } 24 | 25 | if me.VotedFor.Id != id { 26 | finCh <- &ProposeReply{ 27 | OK: false, 28 | Err: "I am not leader", 29 | OtherLeader: me.VotedFor.Clone(), 30 | } 31 | return 32 | } 33 | 34 | rec := tr.AddLog(cmd) 35 | lg.Infow("hdl-propose:added-rec", "rec", rec.ShortStr(), "rec.Overrides:", rec.Overrides.DebugStr()) 36 | 37 | me.Accepted.Union(rec.Overrides) 38 | 39 | go tr.forwardLog( 40 | me.VotedFor.Clone(), 41 | tr.Config.Clone(), 42 | []*LogRecord{rec}, 43 | func(rst *logForwardRst) { 44 | if rst.err != nil { 45 | finCh <- &ProposeReply{ 46 | OK: false, 47 | Err: rst.err.Error(), 48 | } 49 | } else { 50 | finCh <- &ProposeReply{ 51 | OK: true, 52 | } 53 | } 54 | }) 55 | } 56 | -------------------------------------------------------------------------------- /quorum.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | import "math/bits" 4 | 5 | func buildMajorityQuorums(mask uint64) []uint64 { 6 | rst := make([]uint64, 0) 7 | major := bits.OnesCount64(mask)/2 + 1 8 | for i := uint64(0); i <= mask; i++ { 9 | if i&mask == i && bits.OnesCount64(i) == major { 10 | rst = append(rst, i) 11 | } 12 | } 13 | return rst 14 | } 15 | -------------------------------------------------------------------------------- /quorum_test.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | import ( 4 | "fmt" 5 | "math/bits" 6 | "testing" 7 | 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | func TestBuildMajorityQuorums(t *testing.T) { 12 | 13 | ta := require.New(t) 14 | 15 | cases := []struct { 16 | input uint64 17 | want []string 18 | }{ 19 | { 20 | input: 7, 21 | want: []string{ 22 | "11000000", 23 | "10100000", 24 | "01100000", 25 | }, 26 | }, 27 | { 28 | input: 1 + 1<<2 + 1<<3, 29 | want: []string{ 30 | "10100000", 31 | "10010000", 32 | "00110000", 33 | }, 34 | }, 35 | { 36 | input: 1 + 1<<3 + 1<<4, 37 | want: []string{ 38 | "10010000", 39 | "10001000", 40 | "00011000", 41 | }, 42 | }, 43 | { 44 | input: 1<<2 + 1<<3 + 1<<4 + 1<<5, 45 | want: []string{ 46 | "00111000", 47 | "00110100", 48 | "00101100", 49 | "00011100", 50 | }, 51 | }, 52 | } 53 | 54 | for i, c := range cases { 55 | got := buildMajorityQuorums(c.input) 56 | gotStr := fmtBitmap(got) 57 | ta.Equal(c.want, gotStr, "%d-th: case: %+v", i+1, c) 58 | } 59 | } 60 | 61 | func fmtBitmap(vs []uint64) []string { 62 | rst := make([]string, 0) 63 | for _, v := range vs { 64 | rst = append(rst, fmt.Sprintf("%08b", bits.Reverse8(byte(v)))) 65 | } 66 | return rst 67 | } 68 | -------------------------------------------------------------------------------- /record.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | import ( 4 | fmt "fmt" 5 | "strings" 6 | ) 7 | 8 | // NewRecord: without Overrides yet!!! TODO 9 | func NewRecord(leader *LeaderId, seq int64, cmd *Cmd) *LogRecord { 10 | 11 | rec := &LogRecord{ 12 | Author: leader, 13 | Seq: seq, 14 | Cmd: cmd, 15 | } 16 | 17 | return rec 18 | } 19 | 20 | func NewRecordOverride(leader *LeaderId, seq int64, cmd *Cmd, override *TailBitmap) *LogRecord { 21 | 22 | rec := NewRecord(leader,seq,cmd) 23 | rec.Overrides = NewTailBitmap(0, seq) 24 | rec.Overrides.Union(override) 25 | 26 | return rec 27 | } 28 | 29 | // gogoproto would panic if a []*LogRecord has a nil in it. 30 | // Thus we use r.Cmd == nil to indicate an absent log record. 31 | func (r *LogRecord) Empty() bool { 32 | return r == nil || r.Cmd == nil 33 | } 34 | 35 | func (a *LogRecord) Interfering(b *LogRecord) bool { 36 | if a == nil || b == nil { 37 | return false 38 | } 39 | 40 | return a.Cmd.Interfering(b.Cmd) 41 | } 42 | 43 | func (r *LogRecord) ShortStr() string { 44 | if r.Empty() { 45 | return "<>" 46 | } 47 | 48 | return fmt.Sprintf("<%s:%03d{%s}-%s→%s>", 49 | r.Author.ShortStr(), 50 | r.Seq, 51 | r.Cmd.ShortStr(), 52 | r.Overrides.ShortStr(), 53 | r.Depends.ShortStr(), 54 | ) 55 | } 56 | 57 | func RecordsShortStr(rs []*LogRecord, sep ...string) string { 58 | s := ", " 59 | if len(sep) > 0 { 60 | s = sep[0] 61 | } 62 | rst := []string{} 63 | for _, r := range rs { 64 | rst = append(rst, r.ShortStr()) 65 | } 66 | return "[" + strings.Join(rst, s) + "]" 67 | 68 | } 69 | -------------------------------------------------------------------------------- /record_test.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/require" 7 | ) 8 | 9 | func TestNewRecord(t *testing.T) { 10 | 11 | ta := require.New(t) 12 | 13 | ta.Equal(&LogRecord{ 14 | Author: &LeaderId{1, 2}, 15 | Seq: 3, 16 | Cmd: &Cmd{ 17 | Op: "foo", 18 | Key: "key", 19 | Value: &Cmd_VI64{4}, 20 | }, 21 | }, NewRecord(NewLeaderId(1, 2), 3, NewCmdI64("foo", "key", 4))) 22 | 23 | } 24 | 25 | func TestRecord_Interfering(t *testing.T) { 26 | 27 | ta := require.New(t) 28 | 29 | lid := NewLeaderId 30 | cmd := NewCmdI64 31 | 32 | cases := []struct { 33 | a, b *LogRecord 34 | want bool 35 | }{ 36 | {nil, nil, false}, 37 | {nil, NewRecord(lid(0, 1), 0, cmd("bar", "x", 1)), false}, 38 | {NewRecord(lid(0, 1), 0, cmd("foo", "x", 1)), NewRecord(lid(0, 1), 0, nil), false}, 39 | {NewRecord(lid(0, 1), 0, cmd("foo", "x", 1)), NewRecord(lid(0, 1), 0, cmd("bar", "x", 1)), false}, 40 | {NewRecord(lid(0, 1), 0, cmd("set", "x", 1)), NewRecord(lid(0, 1), 0, cmd("set", "y", 1)), false}, 41 | {NewRecord(lid(0, 1), 0, cmd("set", "x", 1)), NewRecord(lid(0, 1), 0, cmd("set", "x", 1)), true}, 42 | } 43 | 44 | for i, c := range cases { 45 | ta.Equal(c.want, c.a.Interfering(c.b), "%d-th: case: %+v", i+1, c) 46 | ta.Equal(c.want, c.b.Interfering(c.a), "%d-th: case: %+v", i+1, c) 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /replicastatus.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | // newStatusAcc creates a ReplicaStatus with only accepted fields inited. 4 | // Mostly for test purpose only. 5 | func newStatusAcc(aterm, aid, lsn int64) *ReplicaStatus { 6 | acc := NewTailBitmap((lsn + 1) &^ 63) 7 | if (lsn+1)&63 != 0 { 8 | acc.Words = append(acc.Words, 1<>6), 29 | } 30 | if residual != 0 { 31 | for i := int64(0); i < residual; i++ { 32 | tb.Set(tb.Offset + i) 33 | } 34 | } 35 | for _, pos := range set { 36 | tb.Set(pos) 37 | } 38 | return tb 39 | } 40 | 41 | // Compact all leading all-ones words in the bitmap. 42 | // 43 | // Since 0.1.22 44 | func (tb *TailBitmap) Compact() { 45 | 46 | allOnes := uint64(0xffffffffffffffff) 47 | 48 | for len(tb.Words) > 0 && tb.Words[0] == allOnes { 49 | tb.Offset += 64 50 | tb.Words = tb.Words[1:] 51 | } 52 | 53 | if tb.Offset-tb.Reclamed >= reclaimThreshold { 54 | l := len(tb.Words) 55 | newWords := make([]uint64, l, l*2) 56 | 57 | copy(newWords, tb.Words) 58 | tb.Reclamed = tb.Offset 59 | } 60 | } 61 | 62 | // Set the bit at `idx` to `1`. 63 | // 64 | // Since 0.1.22 65 | func (tb *TailBitmap) Set(idx int64) { 66 | if idx < tb.Offset { 67 | return 68 | } 69 | 70 | idx = idx - tb.Offset 71 | wordIdx := idx >> 6 72 | 73 | for int(wordIdx) >= len(tb.Words) { 74 | tb.Words = append(tb.Words, 0) 75 | } 76 | 77 | tb.Words[wordIdx] |= bitmap.Bit[idx&63] 78 | 79 | if wordIdx == 0 { 80 | tb.Compact() 81 | } 82 | } 83 | 84 | // Get retrieves a bit at its 64-based offset. 85 | // 86 | // Since 0.1.22 87 | func (tb *TailBitmap) Get(idx int64) uint64 { 88 | if idx < tb.Offset { 89 | return bitmap.Bit[idx&63] 90 | } 91 | 92 | idx = idx - tb.Offset 93 | if int(idx>>6) >= len(tb.Words) { 94 | return 0 95 | } 96 | return tb.Words[idx>>6] & bitmap.Bit[idx&63] 97 | } 98 | 99 | // Get1 retrieves a bit and returns a 1-bit word, i.e., putting the bit in the 100 | // lowest bit. 101 | // 102 | // Since 0.1.22 103 | func (tb *TailBitmap) Get1(idx int64) uint64 { 104 | if idx < tb.Offset { 105 | return 1 106 | } 107 | idx = idx - tb.Offset 108 | return (tb.Words[idx>>6] >> uint(idx&63)) & 1 109 | } 110 | 111 | func (tb *TailBitmap) Clone() *TailBitmap { 112 | return proto.Clone(tb).(*TailBitmap) 113 | } 114 | 115 | func (tb *TailBitmap) Normalize() *TailBitmap { 116 | if tb.Words == nil { 117 | tb.Words = make([]uint64, 0) 118 | } 119 | return tb 120 | } 121 | 122 | func (tb *TailBitmap) Union(tc *TailBitmap) { 123 | 124 | if tc == nil { 125 | return 126 | } 127 | 128 | lb := tb.Offset + int64(len(tb.Words)*64) 129 | lc := tc.Offset + int64(len(tc.Words)*64) 130 | 131 | if tb.Offset >= lc { 132 | return 133 | } 134 | 135 | if lb <= tc.Offset { 136 | tb.Offset = tc.Offset 137 | tb.Words = make([]uint64, len(tc.Words)) 138 | copy(tb.Words, tc.Words) 139 | 140 | // building a new Words reclames unused spaces in it. 141 | tb.Reclamed = tb.Offset 142 | return 143 | } 144 | 145 | var ws []uint64 146 | if tb.Offset >= tc.Offset { 147 | delta := tb.Offset - tc.Offset 148 | ws = tc.Words[delta>>6:] 149 | 150 | } else { 151 | // tb.Offset < tc.Offset 152 | 153 | delta := tc.Offset - tb.Offset 154 | tb.Words = tb.Words[delta>>6:] 155 | tb.Offset = tc.Offset 156 | ws = tc.Words 157 | } 158 | 159 | var i int 160 | for i = 0; i < len(tb.Words) && i < len(ws); i++ { 161 | tb.Words[i] |= ws[i] 162 | } 163 | 164 | for ; i < len(ws); i++ { 165 | tb.Words = append(tb.Words, ws[i]) 166 | } 167 | 168 | tb.Compact() 169 | } 170 | 171 | func (ta *TailBitmap) Intersection(tb *TailBitmap) { 172 | 173 | if tb == nil { 174 | ta.Offset = 0 175 | ta.Words = make([]uint64, 0) 176 | ta.Reclamed = 0 177 | return 178 | } 179 | 180 | la := ta.Offset + int64(len(ta.Words)*64) 181 | lb := tb.Offset + int64(len(tb.Words)*64) 182 | 183 | // 1111 1111 xxxx 184 | // 1111 yyyy 185 | if ta.Offset >= lb { 186 | ta.Offset = tb.Offset 187 | ta.Words = make([]uint64, len(tb.Words)) 188 | copy(ta.Words, tb.Words) 189 | 190 | // building a new Words reclames unused spaces in it. 191 | ta.Reclamed = ta.Offset 192 | return 193 | } 194 | 195 | // 1111 xxxx 196 | // 1111 1111 yyyy 197 | if la <= tb.Offset { 198 | return 199 | } 200 | 201 | s := util.MinI64(ta.Offset, tb.Offset) 202 | e := util.MinI64(la, lb) 203 | ws := make([]uint64, (e-s)>>6) 204 | cur := int64(0) 205 | i := int64(0) 206 | j := int64(0) 207 | if ta.Offset >= tb.Offset { 208 | n := (ta.Offset - s) >> 6 209 | n = util.MinI64(n, (e-s)>>6) 210 | copy(ws, tb.Words[:n]) 211 | cur += n 212 | j = n 213 | } else { 214 | n := (tb.Offset - s) >> 6 215 | n = util.MinI64(n, (e-s)>>6) 216 | copy(ws, ta.Words[:n]) 217 | cur += n 218 | i = n 219 | } 220 | 221 | for cur < int64(len(ws)) { 222 | ws[cur] = ta.Words[i] & tb.Words[j] 223 | cur++ 224 | i++ 225 | j++ 226 | } 227 | 228 | for len(ws) > 0 && ws[len(ws)-1] == 0 { 229 | ws = ws[:len(ws)-1] 230 | } 231 | ta.Offset = s 232 | ta.Words = ws 233 | ta.Reclamed = s 234 | } 235 | 236 | // Diff AKA substraction A - B or A \ B 237 | // TODO: This impl is wrong!!! 238 | func (tb *TailBitmap) Diff(tc *TailBitmap) { 239 | 240 | if tc == nil { 241 | return 242 | } 243 | 244 | lb := tb.Offset + int64(len(tb.Words)*64) 245 | lc := tc.Offset + int64(len(tc.Words)*64) 246 | 247 | if lb <= tc.Offset { 248 | for i := 0; i < len(tb.Words); i++ { 249 | tb.Words[i] = ^tb.Words[i] 250 | } 251 | return 252 | } 253 | 254 | if tb.Offset > lc { 255 | // 1111 1111 1111 xxxx xxxx 256 | // 1111 yyyy 257 | l := int((tb.Offset - tc.Offset) >> 6) 258 | words := make([]uint64, l+len(tb.Words)) 259 | var i int 260 | for i = 0; i < l && i < len(tc.Words); i++ { 261 | words[i] = ^tc.Words[i] 262 | } 263 | for ; i < l; i++ { 264 | words[i] = 0xffffffffffffffff 265 | } 266 | 267 | copy(words[i:], tb.Words) 268 | tb.Words = words 269 | tb.Offset = tc.Offset 270 | tb.Reclamed = tb.Offset 271 | return 272 | } 273 | 274 | if tb.Offset <= tc.Offset { 275 | // 1111 1111 xxxx xxxx 276 | // 1111 1111 1111 yyyy yyyy 277 | delta := (tc.Offset - tb.Offset) >> 6 278 | var i int64 279 | for i = 0; i < delta; i++ { 280 | tb.Words[i] = 0 281 | } 282 | for ; i < int64(len(tb.Words)) && i < (lc-tb.Offset)>>6; i++ { 283 | tb.Words[i] &= ^tc.Words[i-delta] 284 | } 285 | 286 | } else { 287 | // tb.Offset > tc.Offset 288 | // 1111 1111 xxxx xxxx 289 | // 1111 yyyy yyyy 290 | 291 | delta := int((tb.Offset - tc.Offset) >> 6) 292 | words := make([]uint64, delta+len(tb.Words)) 293 | 294 | var i int 295 | for i = 0; i < delta; i++ { 296 | words[i] = ^tc.Words[i] 297 | } 298 | for ; i < len(words) && i < len(tc.Words); i++ { 299 | words[i] = tb.Words[i-delta] &^ tc.Words[i] 300 | } 301 | copy(words[i:], tb.Words[i-delta:]) 302 | 303 | tb.Words = words 304 | tb.Offset = tc.Offset 305 | tb.Reclamed = tb.Offset 306 | } 307 | } 308 | 309 | // Last returns last set bit index + 1. 310 | func (tb *TailBitmap) Len() int64 { 311 | 312 | r := len(tb.Words) - 1 313 | for ; r >= 0 && tb.Words[r] == 0; r-- { 314 | } 315 | 316 | if r < 0 { 317 | // all Words are 0 318 | return tb.Offset 319 | } 320 | 321 | return tb.Offset + int64(r+1)<<6 - int64(bits.LeadingZeros64(tb.Words[r])) 322 | } 323 | 324 | func (tb *TailBitmap) ShortStr() string { 325 | if tb == nil { 326 | return "0" 327 | } 328 | s := []string{fmt.Sprintf("%d", tb.Offset)} 329 | for _, w := range tb.Words { 330 | s = append(s, fmt.Sprintf(":%x", w)) 331 | } 332 | 333 | return strings.Join(s, "") 334 | } 335 | 336 | func (tb *TailBitmap) DebugStr() string { 337 | if tb == nil { 338 | return "0" 339 | } 340 | s := []string{fmt.Sprintf("%d", tb.Offset)} 341 | for _, w := range tb.Words { 342 | v := bitmap.Fmt(w) 343 | s = append(s, v) 344 | } 345 | 346 | return strings.Join(s, ",") 347 | } 348 | -------------------------------------------------------------------------------- /tailbitmap_test.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/require" 7 | ) 8 | 9 | func TestNewTailBitmap(t *testing.T) { 10 | 11 | ta := require.New(t) 12 | 13 | cases := []struct { 14 | input int64 15 | set []int64 16 | want *TailBitmap 17 | }{ 18 | { 19 | input: 0, 20 | want: &TailBitmap{ 21 | Offset: 0, 22 | Words: make([]uint64, 0, 1024), 23 | Reclamed: 0, 24 | }, 25 | }, 26 | 27 | // non-64 aligned offset 28 | { 29 | input: 64 + 3, 30 | want: &TailBitmap{ 31 | Offset: 64, 32 | Words: []uint64{7}, 33 | Reclamed: 64, 34 | }, 35 | }, 36 | { 37 | input: 64 * 1025, 38 | want: &TailBitmap{ 39 | Offset: 64 * 1025, 40 | Words: make([]uint64, 0, 1024), 41 | Reclamed: 64 * 1025, 42 | }, 43 | }, 44 | // with extra bits to set 45 | { 46 | input: 64 * 1, 47 | set: []int64{1, 64, 65}, 48 | want: &TailBitmap{ 49 | Offset: 64 * 1, 50 | Words: []uint64{3}, 51 | Reclamed: 64 * 1, 52 | }, 53 | }, 54 | } 55 | 56 | for i, c := range cases { 57 | got := NewTailBitmap(c.input, c.set...) 58 | ta.Equal(c.want, got, "%d-th: case: %+v", i+1, c) 59 | } 60 | } 61 | 62 | func TestTailBitmap_Compact(t *testing.T) { 63 | 64 | ta := require.New(t) 65 | 66 | allOnes1024 := make([]uint64, 1024) 67 | for i, _ := range allOnes1024 { 68 | allOnes1024[i] = 0xffffffffffffffff 69 | } 70 | 71 | cases := []struct { 72 | input *TailBitmap 73 | want *TailBitmap 74 | }{ 75 | { 76 | input: &TailBitmap{ 77 | Offset: 0, 78 | Words: []uint64{0xffffffffffffffff}, 79 | Reclamed: 0, 80 | }, 81 | want: &TailBitmap{ 82 | Offset: 64, 83 | Words: []uint64{}, 84 | Reclamed: 0, 85 | }, 86 | }, 87 | { 88 | input: &TailBitmap{ 89 | Offset: 64, 90 | Words: []uint64{0xffffffffffffffff}, 91 | Reclamed: 0, 92 | }, 93 | want: &TailBitmap{ 94 | Offset: 64 * 2, 95 | Words: []uint64{}, 96 | Reclamed: 0, 97 | }, 98 | }, 99 | { 100 | input: &TailBitmap{ 101 | Offset: 64, 102 | Words: []uint64{0xffffffffffffffff, 1}, 103 | Reclamed: 0, 104 | }, 105 | want: &TailBitmap{ 106 | Offset: 64 * 2, 107 | Words: []uint64{1}, 108 | Reclamed: 0, 109 | }, 110 | }, 111 | { 112 | input: &TailBitmap{ 113 | Offset: 64, 114 | Words: allOnes1024, 115 | Reclamed: 0, 116 | }, 117 | want: &TailBitmap{ 118 | Offset: 64 * 1025, 119 | Words: []uint64{}, 120 | Reclamed: 64 * 1025, 121 | }, 122 | }, 123 | } 124 | 125 | for i, c := range cases { 126 | c.input.Compact() 127 | ta.Equal(c.want, c.input, "%d-th: case: %+v", i+1, c) 128 | } 129 | } 130 | 131 | func TestTailBitmap_Set(t *testing.T) { 132 | 133 | ta := require.New(t) 134 | 135 | allOnes1024 := make([]uint64, 1024) 136 | for i, _ := range allOnes1024 { 137 | allOnes1024[i] = 0xffffffffffffffff 138 | } 139 | 140 | cases := []struct { 141 | input *TailBitmap 142 | set int64 143 | want *TailBitmap 144 | }{ 145 | { 146 | input: &TailBitmap{ 147 | Offset: 0, 148 | Words: []uint64{}, 149 | Reclamed: 0, 150 | }, 151 | set: 0, 152 | want: &TailBitmap{ 153 | Offset: 0, 154 | Words: []uint64{1}, 155 | Reclamed: 0, 156 | }, 157 | }, 158 | { 159 | input: &TailBitmap{ 160 | Offset: 64, 161 | Words: []uint64{}, 162 | Reclamed: 0, 163 | }, 164 | set: 65, 165 | want: &TailBitmap{ 166 | Offset: 64, 167 | Words: []uint64{2}, 168 | Reclamed: 0, 169 | }, 170 | }, 171 | { 172 | input: &TailBitmap{ 173 | Offset: 64 * 2, 174 | Words: []uint64{1}, 175 | Reclamed: 0, 176 | }, 177 | set: 5, 178 | want: &TailBitmap{ 179 | Offset: 64 * 2, 180 | Words: []uint64{1}, 181 | Reclamed: 0, 182 | }, 183 | }, 184 | { 185 | input: &TailBitmap{ 186 | Offset: 64 * 2, 187 | Words: []uint64{1}, 188 | Reclamed: 0, 189 | }, 190 | set: 64*2 + 1, 191 | want: &TailBitmap{ 192 | Offset: 64 * 2, 193 | Words: []uint64{3}, 194 | Reclamed: 0, 195 | }, 196 | }, 197 | { 198 | input: &TailBitmap{ 199 | Offset: 64 * 2, 200 | Words: []uint64{1}, 201 | Reclamed: 0, 202 | }, 203 | set: 64*3 + 2, 204 | want: &TailBitmap{ 205 | Offset: 64 * 2, 206 | Words: []uint64{1, 4}, 207 | Reclamed: 0, 208 | }, 209 | }, 210 | { 211 | input: &TailBitmap{ 212 | Offset: 64, 213 | Words: []uint64{0xffffffffffffff7f, 1}, 214 | Reclamed: 0, 215 | }, 216 | set: 64 + 7, 217 | want: &TailBitmap{ 218 | Offset: 64 * 2, 219 | Words: []uint64{1}, 220 | Reclamed: 0, 221 | }, 222 | }, 223 | { 224 | input: &TailBitmap{ 225 | Offset: 64 * 1023, 226 | Words: []uint64{0xffffffffffffff7f, 1}, 227 | Reclamed: 0, 228 | }, 229 | set: 64*1023 + 7, 230 | want: &TailBitmap{ 231 | Offset: 64 * 1024, 232 | Words: []uint64{1}, 233 | Reclamed: 64 * 1024, 234 | }, 235 | }, 236 | } 237 | 238 | for i, c := range cases { 239 | c.input.Set(c.set) 240 | ta.Equal(c.want, c.input, "%d-th: case: %+v", i+1, c) 241 | } 242 | } 243 | 244 | func TestTailBitmap_Get(t *testing.T) { 245 | 246 | ta := require.New(t) 247 | 248 | allOnes1024 := make([]uint64, 1024) 249 | for i, _ := range allOnes1024 { 250 | allOnes1024[i] = 0xffffffffffffffff 251 | } 252 | 253 | cases := []struct { 254 | input *TailBitmap 255 | get int64 256 | want uint64 257 | }{ 258 | { 259 | input: &TailBitmap{ 260 | Offset: 64, 261 | Words: []uint64{}, 262 | Reclamed: 0, 263 | }, 264 | get: 0, 265 | want: 1, 266 | }, 267 | { 268 | input: &TailBitmap{ 269 | Offset: 64, 270 | Words: []uint64{}, 271 | Reclamed: 0, 272 | }, 273 | get: 1, 274 | want: 2, 275 | }, 276 | { 277 | input: &TailBitmap{ 278 | Offset: 64, 279 | Words: []uint64{}, 280 | Reclamed: 0, 281 | }, 282 | get: 63, 283 | want: 1 << 63, 284 | }, 285 | 286 | { 287 | input: &TailBitmap{ 288 | Offset: 64, 289 | Words: []uint64{0xffffffffffffff7f, 1}, 290 | Reclamed: 0, 291 | }, 292 | get: 64 + 7, 293 | want: 0, 294 | }, 295 | { 296 | input: &TailBitmap{ 297 | Offset: 64, 298 | Words: []uint64{0xffffffffffffff7f, 1}, 299 | Reclamed: 0, 300 | }, 301 | get: 64 + 6, 302 | want: 1 << 6, 303 | }, 304 | { 305 | input: &TailBitmap{ 306 | Offset: 64, 307 | Words: []uint64{0xffffffffffffff7f, 1}, 308 | Reclamed: 0, 309 | }, 310 | get: 64 + 8, 311 | want: 1 << 8, 312 | }, 313 | { 314 | input: &TailBitmap{ 315 | Offset: 64, 316 | Words: []uint64{0xffffffffffffff7f, 1}, 317 | Reclamed: 0, 318 | }, 319 | get: 64*2 + 0, 320 | want: 1, 321 | }, 322 | } 323 | 324 | for i, c := range cases { 325 | got := c.input.Get(c.get) 326 | ta.Equal(c.want, got, "%d-th: Get case: %+v", i+1, c) 327 | 328 | got1 := c.input.Get1(c.get) 329 | if c.want != 0 { 330 | ta.Equal(uint64(1), got1, "%d-th: Get1 case: %+v", i+1, c) 331 | } else { 332 | ta.Equal(uint64(0), got1, "%d-th: Get1 case: %+v", i+1, c) 333 | } 334 | } 335 | } 336 | 337 | func TestTailBitmap_Clone(t *testing.T) { 338 | 339 | ta := require.New(t) 340 | 341 | allOnes1024 := make([]uint64, 1024) 342 | for i, _ := range allOnes1024 { 343 | allOnes1024[i] = 0xffffffffffffffff 344 | } 345 | 346 | cases := []struct { 347 | input *TailBitmap 348 | }{ 349 | { 350 | input: &TailBitmap{ 351 | Offset: 64, 352 | Words: []uint64{1, 2, 3}, 353 | Reclamed: 0, 354 | }, 355 | }, 356 | } 357 | 358 | for i, c := range cases { 359 | got := c.input.Clone() 360 | ta.Equal(c.input, got, "%d-th: same as cloned case: %+v", i+1, c) 361 | 362 | prev := c.input.Words[0] 363 | ta.NotEqual(1000, prev, "%d-th: not 1000 case: %+v", i+1, c) 364 | c.input.Words[0] = 1000 365 | ta.Equal(prev, got.Words[0], "%d-th: cloned does not change the original case: %+v", i+1, c) 366 | } 367 | } 368 | 369 | func TestTailBitmap_Union(t *testing.T) { 370 | 371 | ta := require.New(t) 372 | 373 | ff := uint64(0xffffffffffffffff) 374 | 375 | cases := []struct { 376 | input *TailBitmap 377 | other *TailBitmap 378 | want *TailBitmap 379 | }{ 380 | // 1111 xxxx 381 | // nil 382 | { 383 | input: &TailBitmap{Offset: 64, Words: []uint64{1}, Reclamed: 0}, 384 | other: nil, 385 | want: &TailBitmap{Offset: 64, Words: []uint64{1}, Reclamed: 0}, 386 | }, 387 | 388 | // 1111 xxxx 389 | // 1111 yyyy 390 | { 391 | input: &TailBitmap{Offset: 64, Words: []uint64{1}, Reclamed: 0}, 392 | other: &TailBitmap{Offset: 64, Words: []uint64{2}, Reclamed: 0}, 393 | want: &TailBitmap{Offset: 64, Words: []uint64{3}, Reclamed: 0}, 394 | }, 395 | // 1111 1111 xxxx 396 | // 1111 yyyy 397 | { 398 | input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1}, Reclamed: 0}, 399 | other: &TailBitmap{Offset: 64 * 1, Words: []uint64{2}, Reclamed: 0}, 400 | want: &TailBitmap{Offset: 64 * 2, Words: []uint64{1}, Reclamed: 0}, 401 | }, 402 | // 1111 1111 xxxx 403 | // 1111 1111 1111 yyyy 404 | { 405 | input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1}, Reclamed: 0}, 406 | other: &TailBitmap{Offset: 64 * 3, Words: []uint64{2}, Reclamed: 0}, 407 | want: &TailBitmap{Offset: 64 * 3, Words: []uint64{2}, Reclamed: 64 * 3}, 408 | }, 409 | // 1111 1111 xxxx xxxx xxxx 410 | // 1111 1111 1111 yyyy 411 | { 412 | input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1, 1, 7}, Reclamed: 0}, 413 | other: &TailBitmap{Offset: 64 * 3, Words: []uint64{2}, Reclamed: 0}, 414 | want: &TailBitmap{Offset: 64 * 3, Words: []uint64{3, 7}, Reclamed: 0}, 415 | }, 416 | // 1111 1111 xxxx 417 | // 1111 yyyy yyyy yyyy 418 | { 419 | input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1}, Reclamed: 0}, 420 | other: &TailBitmap{Offset: 64 * 1, Words: []uint64{8, 2, 4}, Reclamed: 0}, 421 | want: &TailBitmap{Offset: 64 * 2, Words: []uint64{3, 4}, Reclamed: 0}, 422 | }, 423 | // 1111 1111 xxxx xxxx xxxx 424 | // 1111 yyyy yyyy yyyy 425 | { 426 | input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1, 3, 7}, Reclamed: 0}, 427 | other: &TailBitmap{Offset: 64 * 1, Words: []uint64{8, 2, 4}, Reclamed: 0}, 428 | want: &TailBitmap{Offset: 64 * 2, Words: []uint64{3, 7, 7}, Reclamed: 0}, 429 | }, 430 | 431 | // trigger reclaim if new all-ones are found. 432 | { 433 | input: &TailBitmap{Offset: 64 * 1023, Words: []uint64{1, 3, 7}, Reclamed: 0}, 434 | other: &TailBitmap{Offset: 64 * 1023, Words: []uint64{ff - 1}, Reclamed: 0}, 435 | want: &TailBitmap{Offset: 64 * 1024, Words: []uint64{3, 7}, Reclamed: 64 * 1024}, 436 | }, 437 | } 438 | 439 | for i, c := range cases { 440 | c.input.Union(c.other) 441 | ta.Equal(c.want, c.input, "%d-th: Get case: %+v", i+1, c) 442 | 443 | } 444 | } 445 | 446 | func TestTailBitmap_Intersection(t *testing.T) { 447 | 448 | ta := require.New(t) 449 | 450 | cases := []struct { 451 | input *TailBitmap 452 | other *TailBitmap 453 | want *TailBitmap 454 | }{ 455 | // 1111 xxxx 456 | // nil 457 | { 458 | input: &TailBitmap{Offset: 64 * 1, Words: []uint64{1}, Reclamed: 0}, 459 | other: nil, 460 | want: &TailBitmap{Offset: 64 * 0, Words: []uint64{}, Reclamed: 0}, 461 | }, 462 | 463 | // 1111 xxxx 464 | // 1111 1111 1111 yyyy 465 | { 466 | input: &TailBitmap{Offset: 64 * 1, Words: []uint64{1}, Reclamed: 0}, 467 | other: &TailBitmap{Offset: 64 * 3, Words: []uint64{2}, Reclamed: 0}, 468 | want: &TailBitmap{Offset: 64 * 1, Words: []uint64{1}, Reclamed: 0}, 469 | }, 470 | 471 | // 1111 1111 1111 xxxx xxxx 472 | // 1111 yyyy 473 | { 474 | input: &TailBitmap{Offset: 64 * 3, Words: []uint64{1, 3}, Reclamed: 0}, 475 | other: &TailBitmap{Offset: 64 * 1, Words: []uint64{2}, Reclamed: 0}, 476 | want: &TailBitmap{Offset: 64 * 1, Words: []uint64{2}, Reclamed: 64 * 1}, 477 | }, 478 | 479 | // 1111 1111 xxxx xxxx 480 | // 1111 1111 1111 yyyy yyyy 481 | { 482 | input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1, 3}, Reclamed: 0}, 483 | other: &TailBitmap{Offset: 64 * 3, Words: []uint64{2, 4}, Reclamed: 0}, 484 | want: &TailBitmap{Offset: 64 * 2, Words: []uint64{1, 2 & 3}, Reclamed: 64 * 2}, 485 | }, 486 | 487 | // 1111 1111 xxxx xxxx xxxx xxxx 488 | // 1111 1111 1111 yyyy yyyy 489 | { 490 | input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1, 3, 7, 7}, Reclamed: 0}, 491 | other: &TailBitmap{Offset: 64 * 3, Words: []uint64{2, 4}, Reclamed: 0}, 492 | want: &TailBitmap{Offset: 64 * 2, Words: []uint64{1, 2 & 3, 7 & 4}, Reclamed: 64 * 2}, 493 | }, 494 | 495 | // 1111 1111 xxxx xxxx 496 | // 1111 yyyy yyyy 497 | { 498 | input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1, 3}, Reclamed: 0}, 499 | other: &TailBitmap{Offset: 64 * 1, Words: []uint64{2, 4}, Reclamed: 0}, 500 | want: &TailBitmap{Offset: 64 * 1, Words: []uint64{2}, Reclamed: 64 * 1}, 501 | }, 502 | 503 | // 1111 1111 xxxx xxxx 504 | // 1111 yyyy yyyy yyyy yyyy 505 | { 506 | input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1, 3}, Reclamed: 0}, 507 | other: &TailBitmap{Offset: 64 * 1, Words: []uint64{2, 2, 3, 4}, Reclamed: 0}, 508 | want: &TailBitmap{Offset: 64 * 1, Words: []uint64{2, 1 & 2, 3 & 3}, Reclamed: 64 * 1}, 509 | }, 510 | } 511 | 512 | for i, c := range cases { 513 | c.input.Intersection(c.other) 514 | ta.Equal(c.want, c.input, "%d-th: Get case: %+v", i+1, c) 515 | } 516 | } 517 | 518 | func TestTailBitmap_Diff(t *testing.T) { 519 | 520 | ta := require.New(t) 521 | 522 | ff := uint64(0xffffffffffffffff) 523 | 524 | cases := []struct { 525 | input *TailBitmap 526 | other *TailBitmap 527 | want *TailBitmap 528 | }{ 529 | // 1111 xxxx 530 | // nil 531 | { 532 | input: &TailBitmap{Offset: 64 * 1, Words: []uint64{1}, Reclamed: 0}, 533 | other: nil, 534 | want: &TailBitmap{Offset: 64 * 1, Words: []uint64{1}, Reclamed: 0}, 535 | }, 536 | 537 | // 1111 xxxx 538 | // 1111 1111 1111 yyyy 539 | { 540 | input: &TailBitmap{Offset: 64 * 1, Words: []uint64{1}, Reclamed: 0}, 541 | other: &TailBitmap{Offset: 64 * 3, Words: []uint64{2}, Reclamed: 0}, 542 | want: &TailBitmap{Offset: 64 * 1, Words: []uint64{ff - 1}, Reclamed: 0}, 543 | }, 544 | 545 | // 1111 1111 1111 xxxx xxxx 546 | // 1111 yyyy 547 | { 548 | input: &TailBitmap{Offset: 64 * 3, Words: []uint64{1, 3}, Reclamed: 0}, 549 | other: &TailBitmap{Offset: 64 * 1, Words: []uint64{2}, Reclamed: 0}, 550 | want: &TailBitmap{Offset: 64 * 1, Words: []uint64{ff - 2, ff, 1, 3}, Reclamed: 64 * 1}, 551 | }, 552 | 553 | // 1111 1111 xxxx xxxx 554 | // 1111 1111 1111 yyyy yyyy 555 | { 556 | input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1, 3}, Reclamed: 0}, 557 | other: &TailBitmap{Offset: 64 * 3, Words: []uint64{2, 4}, Reclamed: 0}, 558 | want: &TailBitmap{Offset: 64 * 2, Words: []uint64{0, 1}, Reclamed: 0}, 559 | }, 560 | 561 | // 1111 1111 xxxx xxxx xxxx xxxx 562 | // 1111 1111 1111 yyyy yyyy 563 | { 564 | input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1, 3, 7, 7}, Reclamed: 0}, 565 | other: &TailBitmap{Offset: 64 * 3, Words: []uint64{2, 4}, Reclamed: 0}, 566 | want: &TailBitmap{Offset: 64 * 2, Words: []uint64{0, 1, 3, 7}, Reclamed: 0}, 567 | }, 568 | 569 | // 1111 1111 xxxx xxxx 570 | // 1111 yyyy yyyy 571 | { 572 | input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1, 3}, Reclamed: 0}, 573 | other: &TailBitmap{Offset: 64 * 1, Words: []uint64{2, 4}, Reclamed: 0}, 574 | want: &TailBitmap{Offset: 64 * 1, Words: []uint64{ff - 2, 1, 3}, Reclamed: 64 * 1}, 575 | }, 576 | 577 | // 1111 1111 xxxx xxxx 578 | // 1111 yyyy yyyy yyyy yyyy 579 | { 580 | input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1, 3}, Reclamed: 0}, 581 | other: &TailBitmap{Offset: 64 * 1, Words: []uint64{2, 2, 3, 4}, Reclamed: 0}, 582 | want: &TailBitmap{Offset: 64 * 1, Words: []uint64{ff - 2, 1, 0}, Reclamed: 64 * 1}, 583 | }, 584 | } 585 | 586 | for i, c := range cases { 587 | c.input.Diff(c.other) 588 | ta.Equal(c.want, c.input, "%d-th: Get case: %+v", i+1, c) 589 | } 590 | } 591 | 592 | func TestTailBitmap_Len(t *testing.T) { 593 | 594 | ta := require.New(t) 595 | 596 | cases := []struct { 597 | input *TailBitmap 598 | want int64 599 | }{ 600 | {input: &TailBitmap{Offset: 0, Words: []uint64{}}, want: 0}, 601 | {input: &TailBitmap{Offset: 0, Words: []uint64{1}}, want: 1}, 602 | {input: &TailBitmap{Offset: 64 * 1, Words: []uint64{}}, want: 64 * 1}, 603 | {input: &TailBitmap{Offset: 64 * 1, Words: []uint64{1}}, want: 64*1 + 1}, 604 | {input: &TailBitmap{Offset: 64 * 2, Words: []uint64{2}}, want: 64*2 + 2}, 605 | {input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1, 2}}, want: 64*3 + 2}, 606 | {input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1, 2, 0}}, want: 64*3 + 2}, 607 | } 608 | 609 | for i, c := range cases { 610 | got := c.input.Len() 611 | ta.Equal(c.want, got, "%d-th: Get case: %+v", i+1, c) 612 | } 613 | } 614 | -------------------------------------------------------------------------------- /traft.go: -------------------------------------------------------------------------------- 1 | // Package traft is a raft variant with out-of-order commit/apply 2 | // and a more generalized member change algo. 3 | package traft 4 | 5 | import ( 6 | "fmt" 7 | "net" 8 | "sort" 9 | "strings" 10 | "sync" 11 | "sync/atomic" 12 | "time" 13 | 14 | grpc "google.golang.org/grpc" 15 | "google.golang.org/grpc/reflection" 16 | ) 17 | 18 | type TRaft struct { 19 | running int64 20 | 21 | // close it to notify all goroutines to shutdown. 22 | shutdown chan struct{} 23 | 24 | // Communication channel with Loop(). 25 | // Only Loop() modifies state of TRaft. 26 | // Other goroutines send an queryBody through this channel and wait for an 27 | // operation reply. 28 | actionCh chan *queryBody 29 | 30 | // for external component to receive traft state changes. 31 | MsgCh chan string 32 | 33 | grpcServer *grpc.Server 34 | 35 | // wait group of all worker goroutines 36 | workerWG sync.WaitGroup 37 | 38 | Node 39 | } 40 | 41 | func init() { 42 | initLogging() 43 | } 44 | 45 | func NewTRaft(id int64, idAddrs map[int64]string) *TRaft { 46 | _, ok := idAddrs[id] 47 | if !ok { 48 | panic("my id is not in cluster") 49 | } 50 | 51 | members := make(map[int64]*ReplicaInfo, 0) 52 | 53 | ids := []int64{} 54 | for id, _ := range idAddrs { 55 | ids = append(ids, id) 56 | } 57 | 58 | sort.Slice(ids, func(i, j int) bool { 59 | return ids[i] < ids[j] 60 | }) 61 | 62 | for p, id := range ids { 63 | members[id] = &ReplicaInfo{ 64 | Id: id, 65 | Addr: idAddrs[id], 66 | Position: int64(p), 67 | } 68 | } 69 | 70 | conf := &Cluster{ 71 | Members: members, 72 | } 73 | maxPos := conf.MaxPosition() 74 | conf.Quorums = buildMajorityQuorums(1< KVs = 1; 111 | * int64 WAL 112 | * } 113 | * 114 | * 115 | * message WALRecord { 116 | * 117 | * 118 | * } 119 | */ 120 | 121 | message LeaderId { 122 | int64 Term = 1; 123 | int64 Id = 2; 124 | } 125 | 126 | 127 | // The replica that has the latest log is allow to be a new leader. 128 | // I.e., log is forwarded from latest leader(max Committer), and has the highest 129 | // log seq number. 130 | // Then a leader is chosen from these candidates by their Current leader id. 131 | // The max LeaderId wins. 132 | 133 | message Node { 134 | // replica id of this replica. 135 | int64 Id = 3; 136 | 137 | Cluster Config = 1; 138 | 139 | // From which log seq number we keeps here. 140 | int64 LogOffset = 4; 141 | repeated LogRecord Logs = 2; 142 | 143 | // local view of every replica, including this node too. 144 | map Status = 6; 145 | } 146 | 147 | message LogStatus { 148 | LeaderId Committer = 4; 149 | TailBitmap Accepted = 1; 150 | } 151 | 152 | message ReplicaStatus { 153 | 154 | // last seen term+id 155 | // int64 Term = 3; 156 | // int64 Id = 10; 157 | // the last leader it voted for. or it is local term + local id. 158 | // E.g., voted for itself. 159 | // 160 | // TODO cleanup comment: 161 | // which replica it has voted for as a leader. 162 | // 163 | // Accepted is the same as VotedFor after receiving one log-replication 164 | // message from the leader. 165 | // 166 | // Before receiving a message, VotedFor is the leader this replica knows of, 167 | // Accepted is nil. 168 | LeaderId VotedFor = 10; 169 | 170 | // at what time the voted value expires, 171 | // in unix time in nanosecond: 10^-9 second 172 | int64 VoteExpireAt = 11; 173 | 174 | // The Leader tried to commit all of the local logs. 175 | // The Committer is the same as Author if a log entry is committed by its 176 | // Author. 177 | // 178 | // If an Author fails and the log is finally committed by some other leader, 179 | // Committer is a higher value than Author. 180 | // 181 | // It is similar to the vrnd/vballot concept in paxos. 182 | // the Ballot number a value is accepted at. 183 | LeaderId Committer = 4; 184 | 185 | // What logs has been accepted by this replica. 186 | TailBitmap Accepted = 1; 187 | TailBitmap Committed = 2; 188 | TailBitmap Applied = 3; 189 | } 190 | 191 | message ReplicaInfo { 192 | int64 Id = 1; 193 | string Addr = 2; 194 | // Position indicates the index of this member in its cluster. 195 | int64 Position = 3; 196 | } 197 | 198 | message Cluster { 199 | map Members = 11; 200 | repeated uint64 Quorums = 21; 201 | } 202 | 203 | message ElectReq { 204 | // who initiates the election 205 | LeaderId Candidate = 1; 206 | 207 | 208 | // candidate local log status: 209 | 210 | // Latest leader that forwarded log to the candidate 211 | LeaderId Committer = 2; 212 | 213 | // what logs the candidate has. 214 | TailBitmap Accepted = 3; 215 | } 216 | 217 | message ElectReply { 218 | bool OK = 10; 219 | 220 | // the replica id this reply comes from 221 | int64 Id = 1; 222 | 223 | // voted for a candidate or the previous voted other leader. 224 | LeaderId VotedFor = 2; 225 | 226 | // latest log committer. 227 | LeaderId Committer = 4; 228 | 229 | // what logs I have. 230 | TailBitmap Accepted = 21; 231 | 232 | TailBitmap Committed = 22; 233 | 234 | // The logs that voter has but leader candidate does not have. 235 | // For the leader to rebuild all possibly committed logs from a quorum. 236 | repeated LogRecord Logs = 30; 237 | } 238 | 239 | message LogForwardReq { 240 | LeaderId Committer = 1; 241 | repeated LogRecord Logs = 2; 242 | 243 | // Committed indicates logs committed by leader. 244 | // A follower should commit every log it has that is in Committed directly. 245 | TailBitmap Committed = 3; 246 | } 247 | 248 | message LogForwardReply { 249 | bool OK = 10; 250 | // A replica responding a VotedFor with the same value with 251 | // ReplciateReq.Committer indicates the logs are accepted. 252 | // Otherwise declined. 253 | LeaderId VotedFor =1; 254 | 255 | // Also a replica should respond with what logs it already has and 256 | // has committed. 257 | 258 | TailBitmap Accepted = 2; 259 | TailBitmap Committed = 3; 260 | } 261 | 262 | message ProposeReply { 263 | bool OK = 2; 264 | string Err = 3; 265 | // I am not leader, please redirect to `OtherLeader` to write to TRaft. 266 | LeaderId OtherLeader =1; 267 | } 268 | 269 | service TRaft { 270 | rpc Elect (ElectReq) returns (ElectReply) {} 271 | rpc LogForward (LogForwardReq) returns (LogForwardReply) {} 272 | rpc Propose (Cmd) returns (ProposeReply) {} 273 | } 274 | -------------------------------------------------------------------------------- /util.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | "time" 7 | ) 8 | 9 | func cmpI64(a, b int64) int { 10 | if a > b { 11 | return 1 12 | } 13 | if a < b { 14 | return -1 15 | } 16 | return 0 17 | } 18 | 19 | func uSecondI64() int64 { 20 | return int64(uSecond()) 21 | } 22 | 23 | func uSecond() time.Duration { 24 | now := time.Now() 25 | return time.Duration(now.Unix())*1000*1000*1000 + time.Duration(now.Nanosecond()) 26 | } 27 | 28 | func join(ss ...string) string { 29 | return strings.Join(ss, "") 30 | } 31 | 32 | var basePort = int64(5500) 33 | 34 | // newCluster starts a grpc server for every replica. 35 | func newCluster(ids []int64) []*TRaft { 36 | 37 | cluster := make(map[int64]string) 38 | 39 | trafts := make([]*TRaft, 0) 40 | 41 | for _, id := range ids { 42 | addr := fmt.Sprintf(":%d", basePort+int64(id)) 43 | cluster[id] = addr 44 | } 45 | 46 | for _, id := range ids { 47 | srv := NewTRaft(id, cluster) 48 | trafts = append(trafts, srv) 49 | } 50 | 51 | return trafts 52 | } 53 | 54 | func startCluster(ts []*TRaft) { 55 | 56 | for _, t := range ts { 57 | // in a test env, only start server 58 | // manually start loops 59 | t.StartServer() 60 | t.StartMainLoop() 61 | } 62 | } 63 | 64 | -------------------------------------------------------------------------------- /util_test.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | import ( 4 | fmt "fmt" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/require" 8 | ) 9 | 10 | func Test_serveCluster(t *testing.T) { 11 | 12 | ta := require.New(t) 13 | 14 | ids := []int64{1, 2, 3} 15 | 16 | ts := newCluster(ids) 17 | defer stopAll(ts) 18 | 19 | for i, tr := range ts { 20 | // 110 101 011 21 | ta.Equal([]uint64{3, 5, 6}, tr.Config.Quorums) 22 | 23 | fmt.Println("===", tr.Config.Members) 24 | fmt.Println("---", tr.Config.SortedReplicaInfos()) 25 | 26 | ta.Equal([]*ReplicaInfo{ 27 | &ReplicaInfo{Id: 1, Addr: ":5501", Position: 0}, 28 | &ReplicaInfo{Id: 2, Addr: ":5502", Position: 1}, 29 | &ReplicaInfo{Id: 3, Addr: ":5503", Position: 2}, 30 | }, tr.Config.SortedReplicaInfos()) 31 | 32 | ta.Equal(int64(0), tr.LogOffset) 33 | ta.Equal([]*LogRecord{}, tr.Logs) 34 | ta.Equal(ids[i], tr.Id) 35 | for _, id := range ids { 36 | st := tr.Status[id] 37 | ta.Equal(&ReplicaStatus{ 38 | // voted for ones self at first. 39 | VotedFor: &LeaderId{Term: 0, Id: id}, 40 | Committer: nil, 41 | Accepted: NewTailBitmap(0), 42 | Committed: NewTailBitmap(0), 43 | Applied: NewTailBitmap(0), 44 | }, st) 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /votereply.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | // if the first log in v.Logs matches lsn, pop and return it. 4 | // Otherwise return nil. 5 | func (v *ElectReply) PopRecord(lsn int64) *LogRecord { 6 | if len(v.Logs) == 0 { 7 | return nil 8 | } 9 | 10 | r := v.Logs[0] 11 | if r.Seq < lsn { 12 | panic("wtf") 13 | } 14 | 15 | if r.Seq == lsn { 16 | v.Logs = v.Logs[1:] 17 | return r 18 | } 19 | 20 | return nil 21 | 22 | } 23 | -------------------------------------------------------------------------------- /votereply_test.go: -------------------------------------------------------------------------------- 1 | package traft 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/require" 7 | ) 8 | 9 | func TestVoteReply_Pop(t *testing.T) { 10 | 11 | ta := require.New(t) 12 | 13 | vr := &ElectReply{ 14 | Logs: []*LogRecord{ 15 | NewRecord(NewLeaderId(1, 2), 5, nil), 16 | NewRecord(NewLeaderId(1, 2), 7, nil), 17 | }, 18 | } 19 | 20 | var got *LogRecord 21 | ta.Nil(vr.PopRecord(4)) 22 | 23 | got = vr.PopRecord(5) 24 | ta.NotNil(got) 25 | ta.Equal(int64(5), got.Seq) 26 | 27 | // pop again 28 | ta.Nil(vr.PopRecord(5)) 29 | 30 | ta.Nil(vr.PopRecord(6)) 31 | 32 | got = vr.PopRecord(7) 33 | ta.NotNil(got) 34 | ta.Equal(int64(7), got.Seq) 35 | 36 | // pop from empty logs: 37 | ta.Nil(vr.PopRecord(5)) 38 | } 39 | --------------------------------------------------------------------------------