├── .github
    ├── CODE_OF_CONDUCT.md
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   ├── doc_improve.md
    │   ├── enhancement.md
    │   ├── feature_request.md
    │   ├── github-does-not-support-choosing-PR-template-on-web-yet
    │   ├── refactor_request.md
    │   └── testing_request.md
    ├── PULL_REQUEST_TEMPLATE.md
    ├── settings-sample.yml
    └── workflows
    │   ├── golangci-lint.yml
    │   └── test.yml
├── .gitignore
├── .travis.yml
├── .xpt.vim
├── LICENSE
├── Makefile
├── README.md
├── _tmpl
    └── _inc
    │   ├── README.md.j2
    │   ├── badges.md
    │   └── install.md
├── ballot.go
├── ballot_test.go
├── build-logs.sh
├── clusterconfig.go
├── clusterconfig_test.go
├── cmd.go
├── cmd_test.go
├── commit.go
├── common.mk
├── docs
    ├── design
    │   └── mergelog.md
    ├── log.md
    ├── traft-package.md
    └── traft.md
├── elect.go
├── elect_test.go
├── errors.go
├── gen-proto.sh
├── go.mod
├── go.sum
├── internal_api.go
├── leaderid.go
├── leaderid_test.go
├── log.go
├── logforward.go
├── logforward_test.go
├── logging.go
├── mainloop.go
├── propose.go
├── quorum.go
├── quorum_test.go
├── record.go
├── record_test.go
├── replicastatus.go
├── replicastatus_test.go
├── rpc.go
├── scripts
    ├── build_md.py
    └── requirements.txt
├── server.go
├── str.go
├── tailbitmap.go
├── tailbitmap_test.go
├── traft.go
├── traft.pb.go
├── traft.proto
├── util.go
├── util_test.go
├── votereply.go
└── votereply_test.go


/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # OpenACID's Code of Conduct
  2 | 
  3 | <!-- START doctoc generated TOC please keep comment here to allow auto update -->
  4 | <!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
  5 | 
  6 | 
  7 | - [Why have a Code of Conduct?](#why-have-a-code-of-conduct)
  8 | - [Our Standards](#our-standards)
  9 | - [Social Rules](#social-rules)
 10 |   - [No feigning surprise](#no-feigning-surprise)
 11 |   - [No condescending well-actually’s](#no-condescending-well-actually%E2%80%99s)
 12 |   - [No backseat driving](#no-backseat-driving)
 13 |   - [No subtle -isms](#no-subtle--isms)
 14 | - [Giving and Receiving Feedback](#giving-and-receiving-feedback)
 15 | - [Enforcement](#enforcement)
 16 | 
 17 | <!-- END doctoc generated TOC please keep comment here to allow auto update -->
 18 | 
 19 | ## Why have a Code of Conduct?
 20 | 
 21 | This Code of Conduct is designed to help all of us build a pleasant, productive,
 22 | and fearless community.
 23 | 
 24 | We are striving to make our community a great group of people to work with.
 25 | 
 26 | This Code of Conduct applies both within project spaces and in public spaces
 27 | when an individual is representing the project or its community.
 28 | 
 29 | 
 30 | ## Our Standards
 31 | 
 32 | Examples of behavior that contributes to creating a positive environment
 33 | include:
 34 | 
 35 | * Using welcoming and inclusive language
 36 | * Being respectful of differing viewpoints and experiences
 37 | * Gracefully accepting constructive criticism
 38 | * Focusing on what is best for the community
 39 | * Showing empathy towards other community members
 40 | 
 41 | Examples of unacceptable behavior by participants include:
 42 | 
 43 | * The use of sexualized language or imagery and unwelcome sexual attention or
 44 |   advances
 45 | * Trolling, insulting/derogatory comments, and personal or political attacks
 46 | * Public or private harassment
 47 | * Publishing others' private information, such as a physical or electronic
 48 |   address, without explicit permission
 49 | * Other conduct which could reasonably be considered inappropriate in a
 50 |   professional setting
 51 | 
 52 | 
 53 | ## Social Rules
 54 | 
 55 | 
 56 | ### No feigning surprise
 57 | 
 58 | **The first rule means you shouldn't act surprised when people say they don't know
 59 | something**.
 60 | 
 61 | This applies to both technical things ("What?! I can't believe you
 62 | don't know what the stack is!") and non-technical things ("You don't know who
 63 | RMS is?!").
 64 | 
 65 | Feigning surprise has absolutely no social or educational benefit: When people
 66 | feign surprise, it's usually to make them feel better about themselves and
 67 | others feel worse.
 68 | And even when that's not the intention, it's almost always the effect.
 69 | 
 70 | As you've probably already guessed, this rule is tightly coupled to our belief
 71 | in the importance of people **feeling comfortable saying "I don't know" and "I
 72 | don't understand."**
 73 | 
 74 | 
 75 | ### No condescending well-actually’s
 76 | 
 77 | **A well-actually happens when someone says something that's almost— but not
 78 | entirely— correct, and you say, "well, actually…" and then give a minor
 79 | correction**.
 80 | 
 81 | Even in complicated environments where small details and edge-cases can be
 82 | forgotten, unless they are critical, they should not be interjected.
 83 | 
 84 | If they are critical to the conversation phrasing can be the difference between
 85 | a valuable clarification and condescension e.g. instead of “well actually …” a
 86 | simple change to “don’t forget …” or “it’s easy to forget …”
 87 | 
 88 | 
 89 | ### No backseat driving
 90 | 
 91 | **If you overhear people working through a problem, you shouldn't intermittently
 92 | lob advice across the room**.
 93 | 
 94 | This can lead to the "too many cooks" problem, but more important, it can be
 95 | rude and disruptive to half-participate in a conversation.
 96 | This is particularly true in a distributed environment involving conversations
 97 | in Slack.
 98 | The occasional interjection to an on-going conversation, particularly based on
 99 | backscroll, can be very disruptive.
100 | 
101 | This isn't to say you shouldn't help, offer advice, or join conversations.
102 | On the contrary, we encourage all those things.
103 | Rather, it just means that when you want to help out or work with others, you
104 | should fully engage and not just interject sporadically.
105 | 
106 | 
107 | ### No subtle -isms
108 | 
109 | **Subtle -isms, also called microaggressions, are small things that make others
110 | feel uncomfortable**, for example, saying "It's so easy my grandmother could do
111 | it" is a subtle -ism, as it is both subtly sexist and ageist.
112 | 
113 | The "subtle" in "subtle -isms" means that it's probably not obvious to everyone
114 | right away what was wrong with the comment, even people in the group otherwise
115 | affected by the comment.
116 | And, even though they are subtle, might seem insignificant, and are
117 | often unintentional, a steady stream of them compounds to make people in
118 | under-represented groups feel less welcome.
119 | 
120 | 
121 | ## Giving and Receiving Feedback
122 | 
123 | **Give constructive, not critical feedback**.
124 | 
125 | Feedback is negatively critical when it surfaces something wrong with someone or
126 | something they produced, especially without any mention of ways to make their
127 | behavior or their product better.
128 | 
129 | - **Critical feedback** on work often looks like "you don't write enough tests" or
130 |   "your code quality isn't good enough".
131 |   Personal criticism can be more severe and often looks like "you should be less
132 |   judgemental" or "you are a burden because you ask too many questions”.
133 | 
134 | - **Constructive feedback** is more about how a person can do better rather than what
135 |   they are doing wrong.
136 |   If you want someone to do something better, you should tell them what better
137 |   looks like.
138 |   Ask a question to get a discussion rolling, to gain context, and then if you see
139 |   room for improvement give declarative feedback to that effect.
140 | 
141 |   This creates an environment where people understand what success looks like
142 |   instead of just feeling like they are unsuccessful.
143 | 
144 | **Code, configurations, and their reviews are also mechanisms for communication**.
145 | 
146 | Just as you shouldn't interact with people poorly in person, do not interact
147 | poorly through code or code review.
148 | 
149 | **You are not your products**.
150 | Technical critiques are integral, and should be hard on the product, not on the
151 | producer.
152 | While it is important to care about your work and producing the best thing you
153 | can, this can make review difficult.
154 | It is important to realize that it’s better to find errors in review than in
155 | production and recognize that your work fits into a larger whole.
156 | 
157 | **Go about your review under the assumption that the decisions were made for a
158 | reason, not in a vacuum**.
159 | Ask about circumstances if you’re confused.
160 | 
161 | Be pragmatic, ask for context, don’t filibuster, don’t block on style not
162 | explicitly covered in DO’s style guides.
163 | 
164 | Code, configurations, architecture, platforms, frameworks will need to be
165 | changed. Fight for your way if you think it’s right, **but not only to be right**.
166 | 
167 | 
168 | ## Enforcement
169 | 
170 | Enforcement of the Code of Conduct is essential.
171 | 
172 | If there is no enforcement, then the Code of Conduct becomes a feel-good
173 | document without value.
174 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: 'bug'
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | 
12 | **To Reproduce**
13 | 1. Go to '...'
14 | 2. Click on '....'
15 | 3. Scroll down to '....'
16 | 4. See error
17 | 
18 | **Expected behavior**
19 | 
20 | **Actual behavior**
21 | 
22 | **Screenshots**
23 | 
24 | **Environment (please complete the following information):**
25 |  - OS: [e.g. iOS]
26 |  - Version [e.g. 22]
27 | 
28 | **Language (please complete the following information):**
29 |  - Language: [e.g. Go]
30 |  - Version [e.g. 22]
31 | 
32 | **Additional context**
33 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/doc_improve.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Document
 3 | about: Add document
 4 | title: ''
 5 | labels: 'doc'
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **What document to add**
11 | 
12 | **Additional context**
13 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/enhancement.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Enhancement
 3 | about: Enhancement to a feature/API
 4 | title: ''
 5 | labels: 'enhancement'
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is the requested enhancement related to a problem? Please describe.**
11 | 
12 | **Describe the solution**
13 | 
14 | **Describe alternatives you've considered**
15 | 
16 | **Additional context**
17 | 
18 | **Affect other component or side effect**
19 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: 'feature'
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | 
12 | **Describe the solution you'd like**
13 | 
14 | **Describe alternatives you've considered**
15 | 
16 | **Additional context**
17 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/github-does-not-support-choosing-PR-template-on-web-yet:
--------------------------------------------------------------------------------
1 | Choosing PR template can be done with URL query.
2 | 
3 | For now github will only read PR template from `.github/PULL_REQUEST_TEMPLATE.md` only.
4 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/refactor_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Refactor
 3 | about: Refactoring without impacting on end-user
 4 | title: ''
 5 | labels: 'refactor'
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is the requested refactoring related to a known/potential problem? Please describe.**
11 | 
12 | **Describe what to do**
13 | 
14 | **Describe alternatives you've considered**
15 | 
16 | **Additional context**
17 | 
18 | **Affect other component or side effect**
19 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/testing_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Testing
 3 | about: Improve test suite.
 4 | title: ''
 5 | labels: 'testing'
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is the requested test related to a known/potential problem? Please describe.**
11 | 
12 | **Describe what test should be added/modified**
13 | 
14 | **Describe alternatives you've considered**
15 | 
16 | **Additional context**
17 | 
18 | **Affect other component or side effect**
19 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | # Description
 2 | 
 3 | <!-- Please include a summary of the change.
 4 |      Please also include relevant motivation and context.
 5 |      List any dependencies that are required for this change.
 6 | -->
 7 | 
 8 | Fixes # (issue)
 9 | 
10 | ## Type of change
11 | 
12 | <!-- Please delete options that are not relevant. -->
13 | 
14 | - **Bug fix**         <!-- non-breaking change which fixes an issue -->
15 | - **New feature**     <!-- non-breaking change which adds functionality -->
16 | - **Breaking change** <!-- fix or feature that would cause existing functionality to not work as expected -->
17 | - **Refactoring**
18 | - **Document changes**
19 | - **Test changes**
20 | 
21 | 
22 | ## How to reproduce it (if it is a bug-fix PR)
23 | 
24 | - Env: x86-64, CentOS-7.4, kernel-3.10.0, GO-1.10.1, etc.
25 | 
26 | - Step-1:
27 | - Step-2:
28 | 
29 | 
30 | ## The solution (to fix a bug, implement a new feature etc.)
31 | 
32 | 
33 | 
34 | # Checklist:
35 | 
36 | - [ ] **Style**:       My code follows the **style guidelines** of this project
37 | - [ ] **Self-review**: I have performed a **self-review** of my own code
38 | - [ ] **Comment**:     I have **commented my code**, particularly in hard-to-understand areas
39 | - [ ] **Doc**:         I have made corresponding changes to the **documentation**
40 | - [ ] **No-warnings**: My changes generate **no new warnings**
41 | - [ ] **Add-test**:    I have added **tests** that prove my fix is effective or that my feature works
42 | - [ ] **Pass**:        New and existing **unit tests pass** locally with my changes
43 | - [ ] **Dep**:         Any **dependent** changes have been merged and published in downstream modules
44 | 


--------------------------------------------------------------------------------
/.github/settings-sample.yml:
--------------------------------------------------------------------------------
 1 | # Usage copy this to settings.yml
 2 | 
 3 | _extends: gh-config
 4 | 
 5 | 
 6 | repository:
 7 |   name: name
 8 |   description: desc
 9 |   homepage: https://openacid.github.io/
10 |   topics: go, golang
11 | 


--------------------------------------------------------------------------------
/.github/workflows/golangci-lint.yml:
--------------------------------------------------------------------------------
 1 | name: golangci-lint
 2 | on:
 3 |   push:
 4 |     tags:
 5 |       - v*
 6 |     branches:
 7 |       - '*'
 8 |   pull_request:
 9 | jobs:
10 |   golangci:
11 |     name: lint
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - uses: actions/checkout@v2
15 | 
16 |       - name: golangci-lint
17 |         uses: golangci/golangci-lint-action@v2
18 |         with:
19 |           # Required: the version of golangci-lint is required and must be specified without patch version: we always use the latest patch version.
20 |           version: v1.29
21 | 
22 |           # Optional: working directory, useful for monorepos
23 |           # working-directory: somedir
24 | 
25 |           # disable staticcheck:
26 |           # SA1019: package github.com/golang/protobuf/proto is deprecated: Use the "google.golang.org/protobuf/proto" package instead
27 |           args: --issues-exit-code=0 --exclude SA1019
28 | 
29 |           # Optional: show only new issues if it's a pull request. The default value is `false`.
30 |           # only-new-issues: true
31 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: test
 2 | on:
 3 |   push:
 4 |   pull_request:
 5 | 
 6 | jobs:
 7 |   test:
 8 |     strategy:
 9 |       matrix:
10 |         go-version:
11 |             - 1.14.x
12 |             - 1.15.x
13 |         os:
14 |             - ubuntu-latest
15 |             - macos-latest
16 |             - windows-latest
17 | 
18 |     runs-on: ${{ matrix.os }}
19 | 
20 |     steps:
21 |         - name: Install Go
22 |           uses: actions/setup-go@v2
23 |           with:
24 |             go-version: ${{ matrix.go-version }}
25 | 
26 |         - name: checkout
27 |           uses: actions/checkout@v2
28 | 
29 |         - name: cache
30 |           uses: actions/cache@v2
31 |           with:
32 |             path: ~/go/pkg/mod
33 |             key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
34 |             restore-keys: |
35 |               ${{ runner.os }}-go-
36 | 
37 |         - name: Check go-deps.txt existence
38 |           id: check_deps
39 |           uses: andstor/file-existence-action@v1
40 |           with:
41 |             files: "go-deps.txt"
42 | 
43 |         - name: Read go-deps.txt
44 |           id: godeps
45 |           if: steps.check_deps.outputs.files_exists == 'true'
46 |           uses: juliangruber/read-file-action@v1
47 |           with:
48 |             path: ./go-deps.txt
49 | 
50 |         - name: Install go-deps.txt
51 |           if: steps.check_deps.outputs.files_exists == 'true'
52 |           run: |
53 |             go get ${{ steps.godeps.outputs.content }}
54 | 
55 |         - name: test
56 |           run: |
57 |             go test -race ./...
58 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Binaries for programs and plugins
 2 | *.exe
 3 | *.exe~
 4 | *.dll
 5 | *.so
 6 | *.dylib
 7 | 
 8 | # Test binary, built with `go test -c`
 9 | *.test
10 | 
11 | # Output of the go coverage tool, specifically when used with LiteIDE
12 | *.out
13 | 
14 | # Dependency directories (remove the comment below to include it)
15 | # vendor/
16 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: go
 2 | 
 3 | go:
 4 |     - 1.14.x
 5 |     - 1.15.x
 6 |     # - tip
 7 | 
 8 | script:
 9 |     - make travis
10 | 
11 | after_success:
12 |     - make coveralls
13 | 
14 | # private repo requires a crypted token:
15 | # env:
16 | #     global:
17 | #         # coveralls token
18 | #         secure: fJOPOuwBaZ59iQA1VskxZ3h08Nt5CjGbz5PdrZVT/v5UCG7DOLuVTx3x0Tb+gR9AG9lB8Fqpsnm0jjbBAPvOnyn1KIJDuK9Xj2PvKT78vhJ/SyCnn0BAinmxu9hZqghvyWIzeM8RrA3IrvmnoSUTdE1jnTC7McJ7np6cTRGO9Xe6b4mOO1xQOHJFMyTBFvA84uSKZPbuUHCrh19YH7NKrA4MKunX49R+niEFlFEM4oNM/2FXMca+4+OdlGNJmPkG0kV5exP87ihfqI3Q++9v3Z8SR0KOblL6yRBspaRDHmfKxuGx/YEf71pu0yu7nyT7uVeIABTz5SLrqX2Fhb/cpKb7iqCBQ+ifvgpd86pkfhrPUOsIO9N6pieNxmb+aCNm5WBJ2AaT1zrrfthpfbXvEl66K209rUDL0PV1n/u1pAgY5q7DQD5YuOnyAJNPBNQYYzJnZ+X1GjSNrHKOQPjXmrgwkq7KPVlDoqiaJAh97YwUmjXaULKYOm9JBPwVaToEUeCxzK82ZZRwa4YiYl3MLpJb+SvDMl97hgc58lolfg01wHgLYAT901bbq+qsrQZY4pkW9nDGvBuJg0Mru1bu6hqk/tUA7G4amh2y/5lJxxELednfnyzQ6fBeXKb0FVOTN9xRuFBkpRL1Drmbz3y6J2flAcdpJ4KgAMUP/941J6o=
19 | 


--------------------------------------------------------------------------------
/.xpt.vim:
--------------------------------------------------------------------------------
1 | call XPTemplate( 'info',  'lg.Infow("`^", `^)' )
2 | call XPTemplate( 'dd',  'lg.Infow("`mes^", "`f^", `f^`...{{^, "`f^", `f^`...^`}}^)' )
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 openacid
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | include common.mk
2 | 
3 | log:
4 | 	./build-logs.sh
5 | 
6 | update: log readme
7 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <!-- START doctoc generated TOC please keep comment here to allow auto update -->
  2 | <!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
  3 | **Table of Contents**  *generated with [DocToc](https://github.com/thlorenz/doctoc)*
  4 | 
  5 | - [traft](#traft)
  6 |   - [Merge Log](#merge-log)
  7 |     - [Definition](#definition)
  8 |     - [Lemma-only-max-committer-logs](#lemma-only-max-committer-logs)
  9 | - [Planned improvement to original raft](#planned-improvement-to-original-raft)
 10 | - [Features:](#features)
 11 | - [Progress](#progress)
 12 | 
 13 | <!-- END doctoc generated TOC please keep comment here to allow auto update -->
 14 | 
 15 | # traft
 16 | 
 17 | ## Merge Log
 18 | 
 19 | ### Definition
 20 | 
 21 | - **Safe**: A log that is safe if it has been replicated to a quorum, no matter
 22 |   whether or not the **committed** flag is set on any replica.
 23 | 
 24 | ---
 25 | 
 26 | After a replica established leadership, it needs to merge the latest logs from a
 27 | quorum of replicas, to ensure that the leader replica has all **safe** logs.
 28 | 
 29 | 
 30 | ### Lemma-only-max-committer-logs
 31 | 
 32 | In TRaft, a leader only needs to use the logs from replicas with the max **committer**(`(term, id)`).
 33 | 
 34 | A log that does not present in any max-committer replicas a leader seen can **not**
 35 | be safe.
 36 | 
 37 | Proof:
 38 | 
 39 | If a log `A` becomes **safe**, it will be seen by the next leader `Li`.
 40 | Because a leader has to collect logs from a quorum and any two quorum
 41 | intersections with each other.
 42 | 
 43 | If the next next leader `Lj`(`j>i`) has seen `Li`, it will choose `A`.
 44 | Otherwise, it will see a replica that has `A`.
 45 | 
 46 | ∴ Any newer leader will choose `A`.
 47 | 
 48 | ∴ TRaft only need to merge logs from replicas with the latest `Committer`.
 49 | 
 50 | E.g.:
 51 | 
 52 | ```
 53 | Li: indicates a replica becomes leader
 54 | A/B: a log is written
 55 | 
 56 | R0 L1 A
 57 | R1    A
 58 | R2    A
 59 | R3      L2 B
 60 | R4           L3
 61 | ------------------------> time
 62 | ```
 63 | 
 64 | In this digram:
 65 | L2 sees `A` and then updates its `Committer` to `L2`, then writes log `B` to its
 66 | local log. And R3 crashed before forwarding any log out.
 67 | 
 68 | -   If L3 established its leadership by contacting R2 and R3, it uses logs from only
 69 |     R3(R3 has the latest committer `L2`), it will see `A`.
 70 | 
 71 | -   If L3 established its leadership by contacting R0 and R1, it will also see `A`.
 72 | 
 73 | 
 74 | 都说deadline 是第一生产力. 放假没事, 试试看能不能7天写个raft出来, 当练手了. 2021-02-09
 75 | 装逼有风险, 万一写不出来也别笑话我.
 76 | 
 77 | 7天内直播: https://live.bilibili.com/22732677?visit_id=6eoy1n42a1w0
 78 | 
 79 | # Planned improvement to original raft
 80 | 
 81 | -   Leader election with less conflict.
 82 |     Raft has the issue that in a term, if multiple candidates try to elect theirselves,
 83 |     the conflicts are unacceptable.
 84 |     Because in one term, A voter will only vote for at most one leader.
 85 | 
 86 |     In our impl, `term` is less strict:
 87 |     A voter is allowed to vote another GREATER leader.
 88 | 
 89 | -   Out of order commit out of order apply.
 90 | 
 91 |     We use a bitmap to describe what logs interfere each other.
 92 | 
 93 |     
 94 | -   Adopt a more generalized member change algo.
 95 | 
 96 |     Get rid of single-step member change.
 97 |     Because any of the CORRECT single-step member change algo is equivalent to joint-consensus.
 98 | 
 99 |     But joint-consensus is a specialization of the TODO (I dont have good name for it yet).
100 | 
101 | 
102 | 
103 | # Features:
104 | 
105 | - [ ] Leader election
106 | - [ ] WAL log
107 | - [ ] snapshot: impl with https://github.com/openacid/slim , a static kv-like storage engine supporting protobuf.
108 | - [ ] member change with generalized joint consensus.
109 | - [ ] Out of order commit/apply if possible.
110 | 
111 | # Progress
112 | 
113 | -   2021-02-10:
114 |     LOC: +1008 -1
115 | 
116 |     ```
117 |     buildMajorityQuorums()
118 |     import TailBitmap, add Union()
119 |     TailBitmap.Clone()
120 |     TailBitmap.Diff()
121 |     ```
122 | 
123 | -   2021-02-11:
124 |     LOC: +754 -44
125 | 
126 |     ```
127 |     refactor buildMajorityQuorums()
128 |     LeaderId.Cmp()
129 |     add NewLeaderId
130 |     add NewBallot()
131 |     fix: LeaderId.Cmp() accept nil as operand
132 |     Ballot.CmpLog() to compare only the log-related fields
133 |     rename Ballot.Accepted to Ballot.AcceptedFrom
134 |     TailBitmap.Len()
135 |     refactor design, add ReplicaStatus, remove Ballot.
136 |     impl Vote Handler, under dev!!!
137 |     use gogoproto, build clean .pb.go with less code, add serveCluster() to setup a simple cluster for test
138 |     add NewCmdI64()
139 |     add NewRecord()
140 |     add ClusterConfig.GetReplicaInfo()
141 |     NewTailBitmap() accepts extra bits to set
142 |     ```
143 | 
144 | -   2021-02-12:
145 |     LOC: +2601 -70
146 | 
147 |     ```
148 |     add LeaderId:Clone()
149 |     refactor TailBitmap.Clone(): use proto
150 |     draft test of vote
151 |     rename By to Author, AcceptedFrom to Committer. borrowed concepts from git:DDD
152 |     add test: HandleVoteReq. add ShortStr() to Cmd, LeaderI, Record and []Record
153 |     refactor VoteReply: do not use ReplicaStatus to describe log status
154 |     add readme to record progress
155 |     readme: impl storage with slim
156 |     test that voter send back nil log
157 |     update readme: collect git log
158 |     TailBitmap: accept second operand to be nil
159 |     ```
160 | 
161 | -   2021-02-13:
162 |     LOC: +1309 -183
163 | 
164 |     ```
165 |     test Replicate, under dev
166 |     rename Node.Log to Node.Logs
167 |     add Cmd.Intefering() to check if two commands not be allowed to change execution order
168 |     add Record.Interfering()
169 |     update readme
170 |     NewTailBitmap() support non-64-aligned offset
171 |     add AddLog() for leader to propose a command
172 |     use map to store cluster members instead of slice
173 |     add ClusterConfig.IsQuorum() to check if a set of members is a quorum
174 |     TRaft.VoteOnce() run one round voting, to establish leadership
175 |     TRaft actor loop and voting loop
176 |     ```
177 | 
178 | -   2021-02-14:
179 |     LOC: +891 -229
180 | 
181 |     ```
182 |     mainloop as an actor is the only one update traft data. under dev. not passed yet
183 |     test VoteLoop
184 |     granted leader merges collected logs
185 |     after vote, leader merge responded logs
186 |     add API Propose without replication
187 |     ```
188 | 
189 | -   2021-02-15:
190 |     LOC: +476 -197
191 | 
192 |     ```
193 |     add interface toCmd() to build Cmd from string
194 |     test Replicate()
195 |     add TRaft.sleep(): sleep only if it is not stopped.
196 |     ```
197 | 
198 | -   2021-02-16:
199 |     LOC: +1297 -592
200 | 
201 |     ```
202 |     refactor: rename Replicate to LogForward
203 |     refactor: vote_test: remove unused type, funcs
204 |     gruop daily log format by date
205 |     ```
206 | 
207 | 
208 | 
209 | 
210 | <!--
211 | # Day-0 2021-02-09
212 | 
213 | - [x] TailBitmap to support for describing log dependency etc. see: https://github.com/openacid/low/blob/ci/bitmap/tailbitmap.go
214 | 
215 | # Day-1 2021-02-10
216 | 
217 | - [x]: design t-raft protobuf
218 | 
219 | # Day-2 2021-02-11
220 | 
221 | - [x]: design t-raft protobuf
222 | - [x]: impl handle_vote
223 | 
224 | # Day-3 2021-02-12
225 | 
226 | - [x]: refactor concepts
227 | - [x]: test handle_vote
228 | - [ ]: impl log replication
229 | - [ ]: impl traft main-loop
230 | -->


--------------------------------------------------------------------------------
/_tmpl/_inc/README.md.j2:
--------------------------------------------------------------------------------
 1 | # traft
 2 | 
 3 | {% include "docs/design/mergelog.md" %}
 4 | 
 5 | 都说deadline 是第一生产力. 放假没事, 试试看能不能7天写个raft出来, 当练手了. 2021-02-09
 6 | 装逼有风险, 万一写不出来也别笑话我.
 7 | 
 8 | 7天内直播: https://live.bilibili.com/22732677?visit_id=6eoy1n42a1w0
 9 | 
10 | # Planned improvement to original raft
11 | 
12 | -   Leader election with less conflict.
13 |     Raft has the issue that in a term, if multiple candidates try to elect theirselves,
14 |     the conflicts are unacceptable.
15 |     Because in one term, A voter will only vote for at most one leader.
16 | 
17 |     In our impl, `term` is less strict:
18 |     A voter is allowed to vote another GREATER leader.
19 | 
20 | -   Out of order commit out of order apply.
21 | 
22 |     We use a bitmap to describe what logs interfere each other.
23 | 
24 |     
25 | -   Adopt a more generalized member change algo.
26 | 
27 |     Get rid of single-step member change.
28 |     Because any of the CORRECT single-step member change algo is equivalent to joint-consensus.
29 | 
30 |     But joint-consensus is a specialization of the TODO (I dont have good name for it yet).
31 | 
32 | 
33 | 
34 | # Features:
35 | 
36 | - [ ] Leader election
37 | - [ ] WAL log
38 | - [ ] snapshot: impl with https://github.com/openacid/slim , a static kv-like storage engine supporting protobuf.
39 | - [ ] member change with generalized joint consensus.
40 | - [ ] Out of order commit/apply if possible.
41 | 
42 | # Progress
43 | 
44 | {% include "docs/log.md" %}
45 | 
46 | 
47 | 
48 | <!--
49 | # Day-0 2021-02-09
50 | 
51 | - [x] TailBitmap to support for describing log dependency etc. see: https://github.com/openacid/low/blob/ci/bitmap/tailbitmap.go
52 | 
53 | # Day-1 2021-02-10
54 | 
55 | - [x]: design t-raft protobuf
56 | 
57 | # Day-2 2021-02-11
58 | 
59 | - [x]: design t-raft protobuf
60 | - [x]: impl handle_vote
61 | 
62 | # Day-3 2021-02-12
63 | 
64 | - [x]: refactor concepts
65 | - [x]: test handle_vote
66 | - [ ]: impl log replication
67 | - [ ]: impl traft main-loop
68 | -->
69 | 


--------------------------------------------------------------------------------
/_tmpl/_inc/badges.md:
--------------------------------------------------------------------------------
 1 | [![Travis](https://travis-ci.com/openacid/{{ name }}.svg?branch=main)](https://travis-ci.com/openacid/{{ name }})
 2 | ![test](https://github.com/openacid/{{ name }}/workflows/test/badge.svg)
 3 | 
 4 | [![Report card](https://goreportcard.com/badge/github.com/openacid/{{ name }})](https://goreportcard.com/report/github.com/openacid/{{ name }})
 5 | [![Coverage Status](https://coveralls.io/repos/github/openacid/{{ name }}/badge.svg?branch=main&service=github)](https://coveralls.io/github/openacid/{{ name }}?branch=main&service=github)
 6 | 
 7 | [![GoDoc](https://godoc.org/github.com/openacid/{{ name }}?status.svg)](http://godoc.org/github.com/openacid/{{ name }})
 8 | [![PkgGoDev](https://pkg.go.dev/badge/github.com/openacid/{{ name }})](https://pkg.go.dev/github.com/openacid/{{ name }})
 9 | [![Sourcegraph](https://sourcegraph.com/github.com/openacid/{{ name }}/-/badge.svg)](https://sourcegraph.com/github.com/openacid/{{ name }}?badge)
10 | 


--------------------------------------------------------------------------------
/_tmpl/_inc/install.md:
--------------------------------------------------------------------------------
1 | # Install
2 | 
3 | ```sh
4 | go get github.com/openacid/{{ name }}
5 | ```
6 | 


--------------------------------------------------------------------------------
/ballot.go:
--------------------------------------------------------------------------------
 1 | package traft
 2 | 
 3 | // func NewBallot(cterm, cid, aterm, aid, lsn int64) *Ballot {
 4 | //     return &Ballot{
 5 | //         Current:      NewLeaderId(cterm, cid),
 6 | //         MaxLogSeq:    lsn,
 7 | //         Committer: NewLeaderId(aterm, aid),
 8 | //     }
 9 | // }
10 | 
11 | // // CmpLog compares log related fields with another ballot.
12 | // // I.e. Committer and MaxLogSeq.
13 | // func (a *Ballot) CmpLog(b *Ballot) int {
14 | //     r := a.Committer.Cmp(b.Committer)
15 | //     if r != 0 {
16 | //         return r
17 | //     }
18 | 
19 | //     return cmpI64(a.MaxLogSeq, b.MaxLogSeq)
20 | // }
21 | 


--------------------------------------------------------------------------------
/ballot_test.go:
--------------------------------------------------------------------------------
 1 | package traft
 2 | 
 3 | // func TestNewBallot(t *testing.T) {
 4 | 
 5 | //     ta := require.New(t)
 6 | 
 7 | //     got := NewBallot(1, 2, 3, 4, 5)
 8 | //     ta.Equal(int64(1), got.Current.Term)
 9 | //     ta.Equal(int64(2), got.Current.Id)
10 | 
11 | //     ta.Equal(int64(5), got.MaxLogSeq)
12 | 
13 | //     ta.Equal(int64(3), got.Committer.Term)
14 | //     ta.Equal(int64(4), got.Committer.Id)
15 | // }
16 | 
17 | // func TestBallog_CmpLog(t *testing.T) {
18 | 
19 | //     ta := require.New(t)
20 | 
21 | //     cases := []struct {
22 | //         a, b *Ballot
23 | //         want int
24 | //     }{
25 | //         {a: NewBallot(0, 0, 1, 1, 1), b: NewBallot(0, 0, 1, 1, 1), want: 0},
26 | //         {a: NewBallot(0, 0, 1, 1, 2), b: NewBallot(0, 0, 1, 1, 1), want: 1},
27 | //         {a: NewBallot(0, 0, 1, 2, 0), b: NewBallot(0, 0, 1, 1, 1), want: 1},
28 | //         {a: NewBallot(0, 0, 2, 0, 0), b: NewBallot(0, 0, 1, 1, 1), want: 1},
29 | //     }
30 | 
31 | //     for i, c := range cases {
32 | //         ta.Equal(c.want, c.a.CmpLog(c.b), "%d-th: case: %+v", i+1, c)
33 | //         ta.Equal(-c.want, c.b.CmpLog(c.a), "%d-th: case: %+v", i+1, c)
34 | //     }
35 | // }
36 | 


--------------------------------------------------------------------------------
/build-logs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | {
 4 | 
 5 | git log --format="%ai %h" --reverse \
 6 |     | awk '
 7 |     {
 8 |         # find the first diff date, print prev
 9 |         if ($1 != prev) {print prevhash }
10 |             prev = $1
11 |             prevhash = $4
12 |     }
13 |     '
14 | 
15 |     git log -n1 --format="%h"
16 | } \
17 |     | while read h; do
18 |         if [ ".$prev" == "." ]; then
19 |             prev=$h
20 |             continue
21 |         fi
22 | 
23 |         git log -n1 $h --format="%ai" \
24 |             | awk '{print "-   " $1 ":"}'
25 |         git diff $prev $h --shortstat -- . ':(exclude)*.pb.go' \
26 |             | awk '{ print "    LOC: +" $4 " -" $6 ""} '
27 |         echo
28 | 
29 |         echo '    ```'
30 |         git log  $prev..$h --reverse --format="%s" \
31 |             | awk '{ gsub("day-.: ", "", $0); print "    " $0 }'
32 |         echo '    ```'
33 |         echo
34 |         prev=$h
35 | done > docs/log.md
36 | 


--------------------------------------------------------------------------------
/clusterconfig.go:
--------------------------------------------------------------------------------
 1 | package traft
 2 | 
 3 | import proto "github.com/gogo/protobuf/proto"
 4 | 
 5 | func (cc *Cluster) MaxPosition() int64 {
 6 | 	maxPos := int64(0)
 7 | 	for _, m := range cc.Members {
 8 | 		if maxPos < m.Position {
 9 | 			maxPos = m.Position
10 | 		}
11 | 	}
12 | 
13 | 	return maxPos
14 | }
15 | 
16 | func (cc *Cluster) Clone() *Cluster {
17 | 	return proto.Clone(cc).(*Cluster)
18 | }
19 | 
20 | func (cc *Cluster) SortedReplicaInfos() []*ReplicaInfo {
21 | 	maxPos := cc.MaxPosition()
22 | 
23 | 	members := make([]*ReplicaInfo, maxPos+1)
24 | 
25 | 	for _, m := range cc.Members {
26 | 		members[m.Position] = m
27 | 	}
28 | 
29 | 	return members
30 | }
31 | 
32 | // check if a set of member is a quorum.
33 | // The set of member is a bitmap in which a `1` indicates a present member.
34 | // In this system, the position of `1` is ReplicaInfo.Position.
35 | func (cc *Cluster) IsQuorum(v uint64) bool {
36 | 
37 | 	for _, q := range cc.Quorums {
38 | 		if v&q == q {
39 | 			return true
40 | 		}
41 | 	}
42 | 
43 | 	return false
44 | }
45 | 


--------------------------------------------------------------------------------
/clusterconfig_test.go:
--------------------------------------------------------------------------------
 1 | package traft
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/stretchr/testify/require"
 7 | )
 8 | 
 9 | func TestClusterConfig_SortedReplicaInfos(t *testing.T) {
10 | 
11 | 	ta := require.New(t)
12 | 
13 | 	cc := &Cluster{
14 | 		Members: map[int64]*ReplicaInfo{
15 | 			1: {1, "111", 0},
16 | 			2: {2, "222", 2},
17 | 			3: {3, "333", 4},
18 | 		},
19 | 	}
20 | 
21 | 	sorted := cc.SortedReplicaInfos()
22 | 
23 | 	cases := []struct {
24 | 		input int64
25 | 		want  *ReplicaInfo
26 | 	}{
27 | 		{0, &ReplicaInfo{1, "111", 0}},
28 | 		{1, nil},
29 | 		{2, &ReplicaInfo{2, "222", 2}},
30 | 		{3, nil},
31 | 		{4, &ReplicaInfo{3, "333", 4}},
32 | 	}
33 | 
34 | 	for i, c := range cases {
35 | 		got := sorted[c.input]
36 | 		ta.Equal(c.want, got, "%d-th: case: %+v", i+1, c)
37 | 	}
38 | }
39 | 
40 | func TestClusterConfig_IsQuorum(t *testing.T) {
41 | 
42 | 	ta := require.New(t)
43 | 
44 | 	cc := &Cluster{
45 | 		Members: map[int64]*ReplicaInfo{
46 | 			1: {1, "111", 0},
47 | 			2: {2, "222", 2},
48 | 			3: {3, "333", 4},
49 | 		},
50 | 	}
51 | 	// quorums are:
52 | 	// 10100
53 | 	// 00101
54 | 	// 10001
55 | 	cc.Quorums = buildMajorityQuorums(1 | 4 | 16)
56 | 
57 | 	cases := []struct {
58 | 		input uint64
59 | 		want  bool
60 | 	}{
61 | 		{1, false},
62 | 		{4, false},
63 | 		{16, false},
64 | 		{1 | 4, true},
65 | 		{4 | 16, true},
66 | 		{1 | 16, true},
67 | 		{1 | 4 | 16, true},
68 | 	}
69 | 
70 | 	for i, c := range cases {
71 | 		got := cc.IsQuorum(c.input)
72 | 		ta.Equal(c.want, got, "%d-th: case: %+v", i+1, c)
73 | 	}
74 | }
75 | 


--------------------------------------------------------------------------------
/cmd.go:
--------------------------------------------------------------------------------
 1 | package traft
 2 | 
 3 | import (
 4 | 	fmt "fmt"
 5 | 	"strconv"
 6 | 	"strings"
 7 | )
 8 | 
 9 | func NewCmdI64(op, key string, v int64) *Cmd {
10 | 	cmd := &Cmd{
11 | 		Op:    op,
12 | 		Key:   key,
13 | 		Value: &Cmd_VI64{v},
14 | 	}
15 | 	return cmd
16 | }
17 | 
18 | func cmdValueShortStr(v isCmd_Value) string {
19 | 	switch vv := v.(type) {
20 | 	case *Cmd_VI64:
21 | 		return fmt.Sprintf("%d", vv.VI64)
22 | 	case *Cmd_VStr:
23 | 		return vv.VStr
24 | 		// TODO Cluster
25 | 	default:
26 | 		return fmt.Sprintf("%s", vv)
27 | 	}
28 | }
29 | 
30 | func (c *Cmd) ShortStr() string {
31 | 	if c == nil {
32 | 		return "()"
33 | 	}
34 | 	return fmt.Sprintf("%s(%s, %s)",
35 | 		c.Op, c.Key, cmdValueShortStr(c.Value))
36 | }
37 | 
38 | // Interfering check if a command interferes with another one,
39 | // i.e. they change the same key.
40 | func (a *Cmd) Interfering(b *Cmd) bool {
41 | 	if a == nil || b == nil {
42 | 		return false
43 | 	}
44 | 
45 | 	if a.Op == "set" && b.Op == "set" {
46 | 		if a.Key == b.Key {
47 | 			return true
48 | 		}
49 | 	}
50 | 
51 | 	return false
52 | }
53 | 
54 | type toCmder interface {
55 | 	ToCmd() *Cmd
56 | }
57 | 
58 | type cstr string
59 | 
60 | func (c *cstr) ToCmd() *Cmd {
61 | 	if *c == "" {
62 | 		return nil
63 | 	}
64 | 
65 | 	kv := strings.Split(string(*c), "=")
66 | 	k := kv[0]
67 | 
68 | 	v, err := strconv.ParseInt(kv[1], 10, 64)
69 | 	if err != nil {
70 | 		panic(string(*c) + " convert to Cmd")
71 | 	}
72 | 	return NewCmdI64("set", k, v)
73 | }
74 | 
75 | func toCmd(x interface{}) *Cmd {
76 | 	if x == nil {
77 | 		return nil
78 | 	}
79 | 
80 | 	switch v := x.(type) {
81 | 	case string:
82 | 		s := cstr(v)
83 | 		return s.ToCmd()
84 | 
85 | 	case *Cmd:
86 | 		return v
87 | 	}
88 | 	panic("invalid type to convert to cmd")
89 | }
90 | 


--------------------------------------------------------------------------------
/cmd_test.go:
--------------------------------------------------------------------------------
 1 | package traft
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/stretchr/testify/require"
 7 | )
 8 | 
 9 | func TestNewCmdI64(t *testing.T) {
10 | 
11 | 	ta := require.New(t)
12 | 
13 | 	ta.Equal(&Cmd{
14 | 		Op:    "foo",
15 | 		Key:   "key",
16 | 		Value: &Cmd_VI64{3},
17 | 	}, NewCmdI64("foo", "key", 3))
18 | 
19 | }
20 | 
21 | func TestCmd_Interfering(t *testing.T) {
22 | 
23 | 	ta := require.New(t)
24 | 
25 | 	cases := []struct {
26 | 		a, b *Cmd
27 | 		want bool
28 | 	}{
29 | 		{nil, nil, false},
30 | 		{nil, NewCmdI64("bar", "x", 4), false},
31 | 		{NewCmdI64("foo", "x", 3), NewCmdI64("bar", "x", 4), false},
32 | 		{NewCmdI64("foo", "x", 3), NewCmdI64("foo", "x", 4), false},
33 | 		{NewCmdI64("set", "x", 3), NewCmdI64("set", "y", 4), false},
34 | 		{NewCmdI64("set", "x", 3), NewCmdI64("set", "x", 4), true},
35 | 	}
36 | 
37 | 	for i, c := range cases {
38 | 		ta.Equal(c.want, c.a.Interfering(c.b), "%d-th: case: %+v", i+1, c)
39 | 		ta.Equal(c.want, c.b.Interfering(c.a), "%d-th: case: %+v", i+1, c)
40 | 	}
41 | }
42 | 
43 | func Test_cstr(t *testing.T) {
44 | 
45 | 	ta := require.New(t)
46 | 
47 | 	cases := []struct {
48 | 		input cstr
49 | 		want  *Cmd
50 | 	}{
51 | 		{"x=3", NewCmdI64("set", "x", 3)},
52 | 		{"y=4", NewCmdI64("set", "y", 4)},
53 | 		{"", nil},
54 | 	}
55 | 
56 | 	for i, c := range cases {
57 | 		got := c.input.ToCmd()
58 | 		ta.Equal(c.want, got, "%d-th: case: %+v", i+1, c)
59 | 	}
60 | }
61 | 


--------------------------------------------------------------------------------
/commit.go:
--------------------------------------------------------------------------------
 1 | package traft
 2 | 
 3 | import "github.com/pkg/errors"
 4 | 
 5 | func (tr *TRaft) leaderUpdateCommitted(
 6 | 	committer *LeaderId, lsns []int64) error {
 7 | 
 8 | 	id := tr.Id
 9 | 	me := tr.Status[id]
10 | 	if committer.Equal(me.VotedFor) {
11 | 		// Logs are intact.
12 | 		// may be expired, Logs wont change unless VotedFor changes.
13 | 
14 | 		// NOTE: using start, end lsn to describe logs requires every
15 | 		// forwarding action operates on continous logs
16 | 		for i := lsns[0]; i < lsns[1]; i++ {
17 | 			r := tr.Logs[i-tr.LogOffset]
18 | 			me.Committed.Union(r.Overrides)
19 | 		}
20 | 
21 | 		return nil
22 | 	}
23 | 
24 | 	err := errors.Wrapf(ErrLeaderLost,
25 | 		"committer: %s, current %s",
26 | 		committer.ShortStr(), me.VotedFor.ShortStr(),
27 | 	)
28 | 	lg.Infow("leaderUpdateCommitted", "err", err)
29 | 	return err
30 | }
31 | 


--------------------------------------------------------------------------------
/common.mk:
--------------------------------------------------------------------------------
  1 | # Usage: echo 'include common.mk' > Makefile
  2 | 
  3 | TOP_PKG := $(shell go list .)
  4 | NAME := $(shell go list . | awk -F/ '{print $$NF}')
  5 | PKGS := $(shell go list ./... | grep -v "^$(TOP_PKG)/\(vendor\|prototype\)")
  6 | 
  7 | # PKGS := github.com/openacid/slimarray/array \
  8 | #         github.com/openacid/slimarray/bit \
  9 | #         github.com/openacid/slimarray/trie \
 10 | 
 11 | SRCDIRS := $(shell go list -f '{{.Dir}}' $(PKGS))
 12 | 
 13 | # gofmt check vendor dir. we need to skip vendor manually
 14 | GOFILES := $(shell find $(SRCDIRS) -not -path "*/vendor/*" -name "*.go")
 15 | GO := go
 16 | 
 17 | check: test vet gofmt misspell unconvert staticcheck ineffassign unparam
 18 | 
 19 | travis: vet gofmt misspell unconvert ineffassign unparam test
 20 | 
 21 | test:
 22 | 	# fail fast with severe bugs
 23 | 	$(GO) test -short      $(PKGS)
 24 | 	$(GO) test -tags debug $(PKGS)
 25 | 	# test release version and generate coverage data for task `coveralls`.
 26 | 	$(GO) test -covermode=count -coverprofile=coverage.out $(PKGS)
 27 | 
 28 | vet:
 29 | 	$(GO) vet $(PKGS)
 30 | 
 31 | staticcheck:
 32 | 	$(GO) get honnef.co/go/tools/cmd/staticcheck
 33 | 	# ST1016: methods on the same type should have the same receiver name
 34 | 	#         .pb.go have this issue.
 35 | 	staticcheck -checks all,-ST1016 $(PKGS)
 36 | 
 37 | misspell:
 38 | 	$(GO) get github.com/client9/misspell/cmd/misspell
 39 | 	find $(SRCDIRS) -name '*.go' -or -name '*.md' | grep -v "\bvendor/" | xargs misspell \
 40 | 		-locale US \
 41 | 		-error
 42 | 	misspell \
 43 | 		-locale US \
 44 | 		-error \
 45 | 		*.md *.go
 46 | 
 47 | unconvert:
 48 | 	$(GO) get github.com/mdempsky/unconvert
 49 | 	unconvert -v $(PKGS)
 50 | 
 51 | ineffassign:
 52 | 	$(GO) get github.com/gordonklaus/ineffassign
 53 | 	find $(SRCDIRS) -name '*.go' | grep -v "\bvendor/" | xargs ineffassign
 54 | 
 55 | pedantic: check errcheck
 56 | 
 57 | unparam:
 58 | 	$(GO) get mvdan.cc/unparam
 59 | 	unparam ./...
 60 | 
 61 | errcheck:
 62 | 	$(GO) get github.com/kisielk/errcheck
 63 | 	errcheck $(PKGS)
 64 | 
 65 | gofmt:
 66 | 	@echo Checking code is gofmted
 67 | 	@test -z "$(shell gofmt -s -l -d -e $(GOFILES) | tee /dev/stderr)"
 68 | 
 69 | ben: test
 70 | 	$(GO) test ./... -run=none -bench=. -benchmem
 71 | 
 72 | gen:
 73 | 	$(GO) generate ./...
 74 | 
 75 | doc:
 76 | 	# build a markdown version of package doc to embed to README.md
 77 | 	# $(GO) get github.com/robertkrimen/godocdown/godocdown
 78 | 	godocdown . > docs/$(NAME).md
 79 | 	# "package" is the first phrase in a go doc.
 80 | 	# "## Usage" is the start of API section.
 81 | 	cat docs/$(NAME).md | awk '/^Package /,/^## Usage/' | grep -v '^## Usage' > docs/$(NAME)-package.md
 82 | 
 83 | 
 84 | readme: doc
 85 | 	python ./scripts/build_md.py
 86 | 	# brew install nodejs
 87 | 	# npm install -g doctoc
 88 | 	doctoc --title '' --github README.md
 89 | 
 90 | fix:
 91 | 	gofmt -s -w $(GOFILES)
 92 | 	unconvert -v -apply $(PKGS)
 93 | 
 94 | 
 95 | # local coverage
 96 | coverage:
 97 | 	$(GO) test -covermode=count -coverprofile=coverage.out $(PKGS)
 98 | 	go tool cover -func=coverage.out
 99 | 	# go tool cover -html=coverage.out
100 | 
101 | # send coverage to coveralls
102 | coveralls:
103 | 	# this job relies on the output of task test: `-coverprofile=coverage.out`
104 | 	$(GO) get golang.org/x/tools/cmd/cover
105 | 	$(GO) get github.com/mattn/goveralls
106 | 	goveralls -ignore='*.pb.go' -coverprofile=coverage.out -service=travis-ci
107 | 


--------------------------------------------------------------------------------
/docs/design/mergelog.md:
--------------------------------------------------------------------------------
 1 | ## Merge Log
 2 | 
 3 | ### Definition
 4 | 
 5 | - **Safe**: A log that is safe if it has been replicated to a quorum, no matter
 6 |   whether or not the **committed** flag is set on any replica.
 7 | 
 8 | ---
 9 | 
10 | After a replica established leadership, it needs to merge the latest logs from a
11 | quorum of replicas, to ensure that the leader replica has all **safe** logs.
12 | 
13 | 
14 | ### Lemma-only-max-committer-logs
15 | 
16 | In TRaft, a leader only needs to use the logs from replicas with the max **committer**(`(term, id)`).
17 | 
18 | A log that does not present in any max-committer replicas a leader seen can **not**
19 | be safe.
20 | 
21 | Proof:
22 | 
23 | If a log `A` becomes **safe**, it will be seen by the next leader `Li`.
24 | Because a leader has to collect logs from a quorum and any two quorum
25 | intersections with each other.
26 | 
27 | If the next next leader `Lj`(`j>i`) has seen `Li`, it will choose `A`.
28 | Otherwise, it will see a replica that has `A`.
29 | 
30 | ∴ Any newer leader will choose `A`.
31 | 
32 | ∴ TRaft only need to merge logs from replicas with the latest `Committer`.
33 | 
34 | E.g.:
35 | 
36 | ```
37 | Li: indicates a replica becomes leader
38 | A/B: a log is written
39 | 
40 | R0 L1 A
41 | R1    A
42 | R2    A
43 | R3      L2 B
44 | R4           L3
45 | ------------------------> time
46 | ```
47 | 
48 | In this digram:
49 | L2 sees `A` and then updates its `Committer` to `L2`, then writes log `B` to its
50 | local log. And R3 crashed before forwarding any log out.
51 | 
52 | -   If L3 established its leadership by contacting R2 and R3, it uses logs from only
53 |     R3(R3 has the latest committer `L2`), it will see `A`.
54 | 
55 | -   If L3 established its leadership by contacting R0 and R1, it will also see `A`.
56 | 
57 | 


--------------------------------------------------------------------------------
/docs/log.md:
--------------------------------------------------------------------------------
 1 | -   2021-02-10:
 2 |     LOC: +1008 -1
 3 | 
 4 |     ```
 5 |     buildMajorityQuorums()
 6 |     import TailBitmap, add Union()
 7 |     TailBitmap.Clone()
 8 |     TailBitmap.Diff()
 9 |     ```
10 | 
11 | -   2021-02-11:
12 |     LOC: +754 -44
13 | 
14 |     ```
15 |     refactor buildMajorityQuorums()
16 |     LeaderId.Cmp()
17 |     add NewLeaderId
18 |     add NewBallot()
19 |     fix: LeaderId.Cmp() accept nil as operand
20 |     Ballot.CmpLog() to compare only the log-related fields
21 |     rename Ballot.Accepted to Ballot.AcceptedFrom
22 |     TailBitmap.Len()
23 |     refactor design, add ReplicaStatus, remove Ballot.
24 |     impl Vote Handler, under dev!!!
25 |     use gogoproto, build clean .pb.go with less code, add serveCluster() to setup a simple cluster for test
26 |     add NewCmdI64()
27 |     add NewRecord()
28 |     add ClusterConfig.GetReplicaInfo()
29 |     NewTailBitmap() accepts extra bits to set
30 |     ```
31 | 
32 | -   2021-02-12:
33 |     LOC: +2601 -70
34 | 
35 |     ```
36 |     add LeaderId:Clone()
37 |     refactor TailBitmap.Clone(): use proto
38 |     draft test of vote
39 |     rename By to Author, AcceptedFrom to Committer. borrowed concepts from git:DDD
40 |     add test: HandleVoteReq. add ShortStr() to Cmd, LeaderI, Record and []Record
41 |     refactor VoteReply: do not use ReplicaStatus to describe log status
42 |     add readme to record progress
43 |     readme: impl storage with slim
44 |     test that voter send back nil log
45 |     update readme: collect git log
46 |     TailBitmap: accept second operand to be nil
47 |     ```
48 | 
49 | -   2021-02-13:
50 |     LOC: +1309 -183
51 | 
52 |     ```
53 |     test Replicate, under dev
54 |     rename Node.Log to Node.Logs
55 |     add Cmd.Intefering() to check if two commands not be allowed to change execution order
56 |     add Record.Interfering()
57 |     update readme
58 |     NewTailBitmap() support non-64-aligned offset
59 |     add AddLog() for leader to propose a command
60 |     use map to store cluster members instead of slice
61 |     add ClusterConfig.IsQuorum() to check if a set of members is a quorum
62 |     TRaft.VoteOnce() run one round voting, to establish leadership
63 |     TRaft actor loop and voting loop
64 |     ```
65 | 
66 | -   2021-02-14:
67 |     LOC: +891 -229
68 | 
69 |     ```
70 |     mainloop as an actor is the only one update traft data. under dev. not passed yet
71 |     test VoteLoop
72 |     granted leader merges collected logs
73 |     after vote, leader merge responded logs
74 |     add API Propose without replication
75 |     ```
76 | 
77 | -   2021-02-15:
78 |     LOC: +476 -197
79 | 
80 |     ```
81 |     add interface toCmd() to build Cmd from string
82 |     test Replicate()
83 |     add TRaft.sleep(): sleep only if it is not stopped.
84 |     ```
85 | 
86 | -   2021-02-16:
87 |     LOC: +1297 -592
88 | 
89 |     ```
90 |     refactor: rename Replicate to LogForward
91 |     refactor: vote_test: remove unused type, funcs
92 |     gruop daily log format by date
93 |     ```
94 | 
95 | 


--------------------------------------------------------------------------------
/docs/traft-package.md:
--------------------------------------------------------------------------------
1 | Package traft is a raft variant with out-of-order commit/apply and a more
2 | generalized member change algo.
3 | 
4 | 


--------------------------------------------------------------------------------
/elect.go:
--------------------------------------------------------------------------------
  1 | package traft
  2 | 
  3 | import (
  4 | 	"math/rand"
  5 | 	"sync/atomic"
  6 | 	"time"
  7 | 
  8 | 	"github.com/openacid/low/mathext/util"
  9 | 	"github.com/pkg/errors"
 10 | )
 11 | 
 12 | // run forever to elect itself as leader if there is no leader in this cluster.
 13 | func (tr *TRaft) ElectLoop() {
 14 | 
 15 | 	id := tr.Id
 16 | 
 17 | 	slp := tr.sleep
 18 | 
 19 | 	maxStaleTermSleep := time.Millisecond * 200
 20 | 	heartBeatSleep := time.Millisecond * 200
 21 | 	followerSleep := time.Millisecond * 200
 22 | 
 23 | 	for atomic.LoadInt64(&tr.running) == 1 {
 24 | 		var currVote *LeaderId
 25 | 		var expireAt int64
 26 | 		var logst *LogStatus
 27 | 		var config *Cluster
 28 | 
 29 | 		now := uSecondI64()
 30 | 		lg.Infow("vote loop round start:",
 31 | 			"Id", tr.Id,
 32 | 		)
 33 | 
 34 | 		err := tr.query(func() error {
 35 | 			me := tr.Status[tr.Id]
 36 | 
 37 | 			currVote = me.VotedFor.Clone()
 38 | 			expireAt = me.VoteExpireAt
 39 | 			logst = ExportLogStatus(tr.Status[tr.Id])
 40 | 			config = tr.Config.Clone()
 41 | 
 42 | 			if now < expireAt {
 43 | 				return nil
 44 | 			}
 45 | 
 46 | 			// init state for voting myself
 47 | 
 48 | 			me.VotedFor.Term++
 49 | 			me.VotedFor.Id = tr.Id
 50 | 			currVote = me.VotedFor.Clone()
 51 | 
 52 | 			me.VoteExpireAt = uSecondI64() + leaderLease
 53 | 
 54 | 			return errors.Wrapf(ErrNeedElect, "expireAt-now: %d", expireAt-now)
 55 | 
 56 | 		}).err
 57 | 
 58 | 		if err == nil {
 59 | 			// TODO refine this: wait until VoteExpireAt and watch for missing
 60 | 			// heartbeat.
 61 | 			lg.Infow("leader-not-expired",
 62 | 				"Id", tr.Id,
 63 | 				"VotedFor", currVote,
 64 | 				"leadst.VoteExpireAt-now", expireAt-now)
 65 | 
 66 | 			if currVote.Id == tr.Id {
 67 | 				// I am a leader
 68 | 				// TODO heartbeat other replicas to keep leadership
 69 | 				slp(heartBeatSleep)
 70 | 			} else {
 71 | 				slp(followerSleep)
 72 | 			}
 73 | 			continue
 74 | 		}
 75 | 
 76 | 		// call for a new leader!!!
 77 | 		lg.Infow("leader-expired",
 78 | 			"Id", tr.Id,
 79 | 			"VotedFor", currVote,
 80 | 			"leadst.VoteExpireAt-now", expireAt-now)
 81 | 
 82 | 		tr.sendMsg("vote-start", currVote.ShortStr(), logst)
 83 | 
 84 | 		voteReplies, err, higher := ElectOnce(
 85 | 			currVote,
 86 | 			logst,
 87 | 			config,
 88 | 		)
 89 | 
 90 | 		lg.Infow("vote-loop:result", "Id", tr.Id, "voteReplies", voteReplies, "err", err, "higher", higher)
 91 | 
 92 | 		if voteReplies == nil {
 93 | 			// fail to elect me.
 94 | 			tr.sendMsg("vote-fail", "err", err)
 95 | 			tr.query(func() error {
 96 | 
 97 | 				me := tr.Status[tr.Id]
 98 | 
 99 | 				if currVote.Cmp(me.VotedFor) == 0 {
100 | 					// I did not vote other ones yet, and I am not leader.
101 | 					// reset it.
102 | 					me.VoteExpireAt = 0
103 | 				}
104 | 
105 | 				return nil
106 | 			})
107 | 
108 | 			// wait for some time by err
109 | 			switch errors.Cause(err) {
110 | 			case ErrStaleTermId:
111 | 				slp(time.Millisecond*5 + time.Duration(rand.Int63n(int64(maxStaleTermSleep))))
112 | 			case ErrTimeout:
113 | 				slp(time.Millisecond * 10)
114 | 			case ErrStaleLog:
115 | 				// I can not be the leader.
116 | 				// sleep a day. waiting for others to elect to be a leader.
117 | 				slp(time.Second * 86400)
118 | 			}
119 | 			continue
120 | 		}
121 | 
122 | 		// granted by a quorum
123 | 
124 | 		lg.Infow("to-update-leader", "votedFor", currVote)
125 | 
126 | 		updateErr := tr.query(func() error {
127 | 
128 | 			me := tr.Status[tr.Id]
129 | 
130 | 			if currVote.Cmp(me.VotedFor) != 0 {
131 | 				return errors.Wrapf(ErrLeaderLost, "when updating leadership and follower state")
132 | 			}
133 | 
134 | 			tr.establishLeadership(currVote, voteReplies)
135 | 			return nil
136 | 
137 | 		}).err
138 | 
139 | 		if updateErr != nil {
140 | 			tr.sendMsg("vote-fail", "reason:fail-to-update", currVote)
141 | 			lg.Infow("vote-fail", "Id", id,
142 | 				"currVote", currVote,
143 | 				"err", updateErr.Error(),
144 | 			)
145 | 			continue
146 | 		}
147 | 
148 | 		tr.sendMsg("vote-win", currVote)
149 | 		slp(heartBeatSleep)
150 | 	}
151 | }
152 | 
153 | // returns:
154 | // ElectReply-s: if vote granted by a quorum, returns collected replies.
155 | //				Otherwise returns nil.
156 | // error: ErrStaleLog, ErrStaleTermId, ErrTimeout.
157 | // higherTerm: if seen, upgrade term and retry
158 | func ElectOnce(
159 | 	candidate *LeaderId,
160 | 	logStatus *LogStatus,
161 | 	cluster *Cluster,
162 | ) ([]*ElectReply, error, int64) {
163 | 
164 | 	// TODO vote need cluster id:
165 | 	// a stale member may try to elect on another cluster.
166 | 
167 | 	id := candidate.Id
168 | 
169 | 	replies := make([]*ElectReply, 0)
170 | 
171 | 	req := &ElectReq{
172 | 		Candidate: candidate,
173 | 		Committer: logStatus.GetCommitter(),
174 | 		Accepted:  logStatus.GetAccepted(),
175 | 	}
176 | 
177 | 	type voteRst struct {
178 | 		from  *ReplicaInfo
179 | 		reply *ElectReply
180 | 		err   error
181 | 	}
182 | 
183 | 	higherTerm := int64(-1)
184 | 	var logErr error
185 | 
186 | 	timeout := time.Second
187 | 	sess := rpcToAll(id, cluster, meth.Elect, req, timeout)
188 | 
189 | 	for res := range sess.resCh {
190 | 
191 | 		reply := res.reply.(*ElectReply)
192 | 		lg.Infow("elect:recv-reply", "reply", reply, "res.err", res.err)
193 | 
194 | 		if reply.OK {
195 | 			replies = append(replies, reply)
196 | 			if sess.updateOKBitmap(res) {
197 | 				// do not cancel
198 | 				return replies, nil, -1
199 | 			}
200 | 			continue
201 | 		}
202 | 
203 | 		if reply.VotedFor.Cmp(candidate) > 0 {
204 | 			higherTerm = util.MaxI64(higherTerm, reply.VotedFor.Term)
205 | 		}
206 | 
207 | 		if CmpLogStatus(reply, logStatus) > 0 {
208 | 			// TODO cancel timer
209 | 			logErr = errors.Wrapf(ErrStaleLog,
210 | 				"local: committer:%s max-lsn:%d remote: committer:%s max-lsn:%d",
211 | 				logStatus.GetCommitter().ShortStr(),
212 | 				logStatus.GetAccepted().Len(),
213 | 				reply.Committer.ShortStr(),
214 | 				reply.Accepted.Len())
215 | 		}
216 | 	}
217 | 
218 | 	if logErr != nil {
219 | 		return nil, logErr, higherTerm
220 | 	}
221 | 
222 | 	err := errors.Wrapf(ErrStaleTermId, "seen a higher term:%d", higherTerm)
223 | 	return nil, err, higherTerm
224 | }
225 | 
226 | // no lock protect, must be called by TRaft.Loop()
227 | func (tr *TRaft) hdlElectReq(req *ElectReq) *ElectReply {
228 | 
229 | 	id := tr.Id
230 | 
231 | 	me := tr.Status[tr.Id]
232 | 
233 | 	// A vote reply just send back a voter's status.
234 | 	// It is the candidate's responsibility to check if a voter granted it.
235 | 	repl := &ElectReply{
236 | 		OK:        false,
237 | 		Id:        id,
238 | 		VotedFor:  me.VotedFor.Clone(),
239 | 		Committer: me.Committer.Clone(),
240 | 		Accepted:  me.Accepted.Clone(),
241 | 		Committed: me.Committed.Clone(),
242 | 	}
243 | 
244 | 	lg.Infow("handleVoteReq",
245 | 		"Id", id,
246 | 		"req.Candidate", req.Candidate,
247 | 		"me.Committer", me.Committer.ShortStr(),
248 | 		"me.Accepted", me.Accepted.ShortStr(),
249 | 		"me.VotedFor", me.VotedFor.ShortStr(),
250 | 		"req.Committer", req.Committer.ShortStr(),
251 | 		"req.Accepted", req.Accepted.ShortStr(),
252 | 	)
253 | 
254 | 	if CmpLogStatus(req, me) < 0 {
255 | 		// I have more logs than the candidate.
256 | 		// It cant be a leader.
257 | 		tr.sendMsg("hdl-vote-req:reject-by-logstat",
258 | 			"req.Candidate", req.Candidate,
259 | 			"me.Committer", me.Committer,
260 | 			"me.Accepted", me.Accepted,
261 | 			"req.Committer", req.Committer,
262 | 			"req.Accepted", req.Accepted,
263 | 		)
264 | 		return repl
265 | 	}
266 | 
267 | 	// candidate has the upto date logs.
268 | 
269 | 	r := req.Candidate.Cmp(me.VotedFor)
270 | 	if r < 0 {
271 | 		// I've voted for other leader with higher privilege.
272 | 		// This candidate could not be a legal leader.
273 | 		// just send back enssential info to info it.
274 | 		tr.sendMsg("hdl-vote-req:reject-by-term-id",
275 | 			"req.Candidate", req.Candidate,
276 | 			"me.VotedFor", me.VotedFor,
277 | 		)
278 | 		return repl
279 | 	}
280 | 
281 | 	// grant vote
282 | 
283 | 	lg.Infow("voted", "id", id, "VotedFor", me.VotedFor)
284 | 	tr.sendMsg("hdl-vote-req:grant",
285 | 		"req.Candidate", req.Candidate,
286 | 		"me.VotedFor", me.VotedFor)
287 | 
288 | 	me.VotedFor = req.Candidate.Clone()
289 | 	me.VoteExpireAt = uSecondI64() + leaderLease
290 | 	repl.OK = true
291 | 	repl.VotedFor = req.Candidate.Clone()
292 | 
293 | 	// send back the logs I have but the candidate does not.
294 | 
295 | 	logs := make([]*LogRecord, 0)
296 | 
297 | 	lg.Infow("hdlElectReq", "me.Accepted", me.Accepted)
298 | 	lg.Infow("hdlElectReq", "req.Accepted", req.Accepted)
299 | 	start := me.Accepted.Offset
300 | 	end := me.Accepted.Len()
301 | 	for i := start; i < end; i++ {
302 | 		if me.Accepted.Get(i) != 0 && req.Accepted.Get(i) == 0 {
303 | 			r := tr.Logs[i-tr.LogOffset]
304 | 			logs = append(logs, r)
305 | 			lg.Infow("hdlElectReq:send-log", "r", r)
306 | 		}
307 | 	}
308 | 
309 | 	repl.Logs = logs
310 | 
311 | 	return repl
312 | }
313 | 
314 | // establishLeadership updates leader state when a election approved by a quorum.
315 | func (tr *TRaft) establishLeadership(currVote *LeaderId, replies []*ElectReply) {
316 | 
317 | 	me := tr.Status[tr.Id]
318 | 
319 | 	// not to update expire time.
320 | 	// let the leader expire earlier than follower to reduce chance that follower reject replication from leader.
321 | 
322 | 	tr.mergeFollowerLogs(replies)
323 | 
324 | 	// then going on replicating these logs to others.
325 | 	//
326 | 	// TODO update local view of status of other replicas.
327 | 	for _, r := range replies {
328 | 		follower := tr.Status[r.Id]
329 | 		if r.Committer.Equal(me.Committer) {
330 | 			follower.Accepted = r.Accepted.Clone()
331 | 		} else {
332 | 			// if committers are different, the leader can no be
333 | 			// sure whether a follower has identical logs
334 | 			follower.Accepted = r.Committed.Clone()
335 | 		}
336 | 		follower.Committed = r.Committed.Clone()
337 | 
338 | 		follower.Committer = r.Committer.Clone()
339 | 	}
340 | 
341 | 	// Leader accept all the logs it sees
342 | 	me.Committer = currVote.Clone()
343 | 
344 | }
345 | 
346 | // find the max committer log to fill in local log holes.
347 | func (tr *TRaft) mergeFollowerLogs(votes []*ElectReply) {
348 | 
349 | 	// TODO if the leader chose Logs[i] from replica `r`, e.g. R[r].Logs[i]
350 | 	// then the logs R[r].Logs[:i] are safe to choose.
351 | 	// Because if a different R[r'].Logs[j] is committed, for a j <= i
352 | 	// the leader that written R[r].Log[i] must have chosen R[r'].Logs[j] .
353 | 	// ∴ R[r].Logs[j] == R[r'].Logs[j]
354 | 	//
355 | 	// For now 2021 Feb 14,
356 | 	// we just erase all non-committed logs on followers.
357 | 
358 | 	id := tr.Id
359 | 	me := tr.Status[id]
360 | 
361 | 	maxCommitter, chosen := tr.chooseMaxCommitterReplies(votes)
362 | 	lg.Infow("mergeFollowerLogs", "maxCommitter", maxCommitter, "chosen", chosen)
363 | 	lg.Infow("mergeFollowerLogs", "mylogs", RecordsShortStr(tr.Logs))
364 | 
365 | 	l := me.Accepted.Len()
366 | 	for lsn := me.Accepted.Offset; lsn < l; lsn++ {
367 | 		if me.Accepted.Get(lsn) != 0 {
368 | 			continue
369 | 		}
370 | 
371 | 		rec := getLog(lsn, chosen)
372 | 
373 | 		// TODO fill in with empty log
374 | 		if rec.Empty() {
375 | 			continue
376 | 		}
377 | 
378 | 		tr.Logs[lsn-tr.LogOffset] = rec
379 | 		me.Accepted.Union(rec.Overrides)
380 | 
381 | 		lg.Infow("merge-log",
382 | 			"lsn", lsn,
383 | 			"committer", maxCommitter,
384 | 			"record", rec)
385 | 	}
386 | }
387 | 
388 | // getLog returns one log record if a log  with the specified lsn presents in any vote replies.
389 | func getLog(lsn int64, replies []*ElectReply) *LogRecord {
390 | 	var rec *LogRecord
391 | 	for _, vr := range replies {
392 | 		r := vr.PopRecord(lsn)
393 | 		if r == nil {
394 | 			continue
395 | 		}
396 | 
397 | 		if rec != nil && rec.Author.Cmp(r.Author) != 0 {
398 | 			panic("wtf")
399 | 		}
400 | 
401 | 		rec = r
402 | 		// TODO if r is not nil: break
403 | 	}
404 | 
405 | 	return rec
406 | }
407 | 
408 | // chooseMaxCommitterReplies chooses the max Committer and the vote-replies with the max Committer.
409 | // logs with Committer smaller than me are discarded too.
410 | func (tr *TRaft) chooseMaxCommitterReplies(replies []*ElectReply) (*LeaderId, []*ElectReply) {
411 | 	me := tr.Status[tr.Id]
412 | 	maxCommitter := me.Committer
413 | 	for _, v := range replies {
414 | 		if v.Committer.Cmp(maxCommitter) > 0 {
415 | 			maxCommitter = v.Committer
416 | 		}
417 | 	}
418 | 	chosen := make([]*ElectReply, 0, len(replies))
419 | 	for _, v := range replies {
420 | 		if v.Committer.Cmp(maxCommitter) == 0 {
421 | 			chosen = append(chosen, v)
422 | 		}
423 | 	}
424 | 	return maxCommitter, chosen
425 | }
426 | 


--------------------------------------------------------------------------------
/elect_test.go:
--------------------------------------------------------------------------------
  1 | package traft
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"reflect"
  7 | 	"strings"
  8 | 	"testing"
  9 | 	"time"
 10 | 
 11 | 	"github.com/pkg/errors"
 12 | 	"github.com/stretchr/testify/require"
 13 | )
 14 | 
 15 | type candStat struct {
 16 | 	candidateId *LeaderId
 17 | 	committer   *LeaderId
 18 | 	logs        []int64
 19 | }
 20 | 
 21 | type voterStat struct {
 22 | 	votedFor  *LeaderId
 23 | 	committer *LeaderId
 24 | 	author    *LeaderId
 25 | 	logs      []int64
 26 | 	nilLogs   map[int64]bool
 27 | 	committed []int64
 28 | }
 29 | 
 30 | type wantVoteReply struct {
 31 | 	OK           bool
 32 | 	votedFor     *LeaderId
 33 | 	committer    *LeaderId
 34 | 	allLogBitmap *TailBitmap
 35 | 	logs         string
 36 | }
 37 | 
 38 | // a helper func to setup TRaft cluster and close it.
 39 | // Because `defer tr.Stop()` does not block until the next test case
 40 | func withCluster(t *testing.T,
 41 | 	name string,
 42 | 	ids []int64,
 43 | 	f func(t *testing.T, ts []*TRaft)) {
 44 | 
 45 | 	lid := NewLeaderId
 46 | 
 47 | 	ts := newCluster(ids)
 48 | 	for i, id := range ids {
 49 | 		ts[i].initTraft(lid(0, 0), lid(0, 0), []int64{}, nil, nil, lid(0, id))
 50 | 	}
 51 | 	startCluster(ts)
 52 | 
 53 | 	t.Run(name, func(t *testing.T) {
 54 | 		f(t, ts)
 55 | 	})
 56 | 
 57 | 	stopAll(ts)
 58 | }
 59 | 
 60 | func TestTRaft_hdlVoteReq(t *testing.T) {
 61 | 
 62 | 	ta := require.New(t)
 63 | 
 64 | 	bm := NewTailBitmap
 65 | 
 66 | 	ids := []int64{1, 2, 3}
 67 | 	id := int64(1)
 68 | 
 69 | 	testVote := func(
 70 | 		cand candStat,
 71 | 		voter voterStat,
 72 | 	) (*ElectReply, int64) {
 73 | 
 74 | 		ts := newCluster(ids)
 75 | 
 76 | 		t1 := ts[0]
 77 | 
 78 | 		t1.initTraft(
 79 | 			voter.committer, voter.author, voter.logs, voter.nilLogs, nil,
 80 | 			voter.votedFor,
 81 | 		)
 82 | 
 83 | 		startCluster(ts)
 84 | 		defer stopAll(ts)
 85 | 
 86 | 		req := &ElectReq{
 87 | 			Candidate: cand.candidateId,
 88 | 			Committer: cand.committer,
 89 | 			Accepted:  bm(0, cand.logs...),
 90 | 		}
 91 | 
 92 | 		var reply *ElectReply
 93 | 		addr := t1.Config.Members[id].Addr
 94 | 
 95 | 		rpcTo(addr, func(cli TRaftClient, ctx context.Context) {
 96 | 			var err error
 97 | 			reply, err = cli.Elect(ctx, req)
 98 | 			if err != nil {
 99 | 				panic("wtf")
100 | 			}
101 | 		})
102 | 
103 | 		return reply, t1.Status[id].VoteExpireAt
104 | 	}
105 | 
106 | 	lid := NewLeaderId
107 | 
108 | 	cases := []struct {
109 | 		cand  candStat
110 | 		voter voterStat
111 | 		want  wantVoteReply
112 | 	}{
113 | 		// vote granted
114 | 		{
115 | 			candStat{candidateId: lid(2, 2), committer: lid(1, id), logs: []int64{5}},
116 | 			voterStat{votedFor: lid(0, id), committer: lid(0, id), author: lid(1, id), logs: []int64{5, 6}},
117 | 			wantVoteReply{
118 | 				OK:           true,
119 | 				votedFor:     lid(2, 2),
120 | 				committer:    lid(0, id),
121 | 				allLogBitmap: bm(0, 5, 6),
122 | 				logs:         "[<001#001:006{set(x, 6)}-0→0>]",
123 | 			},
124 | 		},
125 | 
126 | 		// vote granted
127 | 		// send back nil logs
128 | 		{
129 | 			candStat{candidateId: lid(2, 2), committer: lid(1, id), logs: []int64{5}},
130 | 			voterStat{votedFor: lid(0, id), committer: lid(0, id), author: lid(1, id), logs: []int64{5, 6, 7}, nilLogs: map[int64]bool{6: true}},
131 | 			wantVoteReply{
132 | 				OK:           true,
133 | 				votedFor:     lid(2, 2),
134 | 				committer:    lid(0, id),
135 | 				allLogBitmap: bm(0, 5, 6, 7),
136 | 				logs:         "[<>, <001#001:007{set(x, 7)}-0→0>]",
137 | 			},
138 | 		},
139 | 
140 | 		// candidate has no upto date logs
141 | 		{
142 | 			candStat{candidateId: lid(2, 2), committer: lid(0, id), logs: []int64{5, 6}},
143 | 			voterStat{votedFor: lid(1, id), committer: lid(1, id), author: lid(1, id), logs: []int64{5, 6}},
144 | 			wantVoteReply{
145 | 				OK:           false,
146 | 				votedFor:     lid(1, id),
147 | 				committer:    lid(1, id),
148 | 				allLogBitmap: bm(0, 5, 6),
149 | 				logs:         "[]",
150 | 			},
151 | 		},
152 | 
153 | 		// candidate has not enough logs
154 | 		// No log is sent back to candidate because it does not need to rebuild
155 | 		// full log history.
156 | 		{
157 | 			candStat{candidateId: lid(2, 2), committer: lid(1, id), logs: []int64{5}},
158 | 			voterStat{votedFor: lid(1, id), committer: lid(1, id), author: lid(1, id), logs: []int64{5, 6}},
159 | 			wantVoteReply{
160 | 				OK:           false,
161 | 				votedFor:     lid(1, id),
162 | 				committer:    lid(1, id),
163 | 				allLogBitmap: bm(0, 5, 6),
164 | 				logs:         "[]",
165 | 			},
166 | 		},
167 | 
168 | 		// candidate has smaller term.
169 | 		// No log sent back.
170 | 		{
171 | 			candStat{candidateId: lid(2, 2), committer: lid(1, id), logs: []int64{5, 6}},
172 | 			voterStat{votedFor: lid(3, id), committer: lid(1, id), author: lid(1, id), logs: []int64{5, 6}},
173 | 			wantVoteReply{
174 | 				OK:           false,
175 | 				votedFor:     lid(3, id),
176 | 				committer:    lid(1, id),
177 | 				allLogBitmap: bm(0, 5, 6),
178 | 				logs:         "[]",
179 | 			},
180 | 		},
181 | 
182 | 		// candidate has smaller id.
183 | 		// No log sent back.
184 | 		{
185 | 			candStat{candidateId: lid(3, id-1), committer: lid(1, id), logs: []int64{5, 6}},
186 | 			voterStat{votedFor: lid(3, id), committer: lid(1, id), author: lid(1, id), logs: []int64{5, 6}},
187 | 			wantVoteReply{
188 | 				OK:           false,
189 | 				votedFor:     lid(3, id),
190 | 				committer:    lid(1, id),
191 | 				allLogBitmap: bm(0, 5, 6),
192 | 				logs:         "[]",
193 | 			},
194 | 		},
195 | 	}
196 | 
197 | 	for i, c := range cases {
198 | 		reply, gotExpire := testVote(c.cand, c.voter)
199 | 
200 | 		ta.Equal(
201 | 			c.want,
202 | 			wantVoteReply{
203 | 				OK:           reply.OK,
204 | 				votedFor:     reply.VotedFor,
205 | 				committer:    reply.Committer,
206 | 				allLogBitmap: reply.Accepted,
207 | 				logs:         RecordsShortStr(reply.Logs),
208 | 			},
209 | 			"%d-th: case: %+v", i+1, c)
210 | 
211 | 		if reply.OK {
212 | 			ta.InDelta(uSecondI64()+leaderLease, gotExpire, 1000*1000*1000)
213 | 		} else {
214 | 			ta.Equal(int64(0), gotExpire)
215 | 
216 | 		}
217 | 	}
218 | }
219 | 
220 | func TestTRaft_VoteOnce(t *testing.T) {
221 | 
222 | 	// cluster = {0, 1, 2}
223 | 	// ts[0] vote once with differnt Committer/VotedFor settings.
224 | 
225 | 	lid := NewLeaderId
226 | 
227 | 	type wt struct {
228 | 		hasVoteReplies bool
229 | 		err            error
230 | 		higherTerm     int64
231 | 	}
232 | 
233 | 	cases := []struct {
234 | 		name       string
235 | 		committers []*LeaderId
236 | 		votedFors  []*LeaderId
237 | 		logs       [][]string
238 | 		candidate  *LeaderId
239 | 		want       wt
240 | 	}{
241 | 		{name: "2emptyVoter/term-0",
242 | 			candidate: lid(0, 0),
243 | 			want: wt{
244 | 				hasVoteReplies: false,
245 | 				err:            ErrStaleTermId,
246 | 				higherTerm:     0,
247 | 			},
248 | 		},
249 | 		{name: "2emptyVoter/term-1",
250 | 			candidate: lid(1, 0),
251 | 			want: wt{
252 | 				hasVoteReplies: true,
253 | 				err:            nil,
254 | 				higherTerm:     -1,
255 | 			},
256 | 		},
257 | 		{name: "reject-by-one/stalelog",
258 | 			committers: []*LeaderId{nil, lid(2, 0)},
259 | 			votedFors:  []*LeaderId{nil, lid(2, 1)},
260 | 			candidate:  lid(1, 0),
261 | 			want: wt{
262 | 				hasVoteReplies: true,
263 | 				err:            nil,
264 | 				higherTerm:     -1,
265 | 			},
266 | 		},
267 | 		{name: "reject-by-one/higherTerm",
268 | 			committers: []*LeaderId{nil, nil, lid(0, 0)},
269 | 			votedFors:  []*LeaderId{nil, nil, lid(5, 2)},
270 | 			candidate:  lid(1, 0),
271 | 			want: wt{
272 | 				hasVoteReplies: true,
273 | 				err:            nil,
274 | 				higherTerm:     -1,
275 | 			},
276 | 		},
277 | 		{name: "reject-by-two/stalelog",
278 | 			committers: []*LeaderId{nil, lid(2, 0), lid(0, 0)},
279 | 			votedFors:  []*LeaderId{nil, lid(2, 1), lid(2, 2)},
280 | 			candidate:  lid(1, 0),
281 | 			want: wt{
282 | 				hasVoteReplies: false,
283 | 				err:            ErrStaleLog,
284 | 				higherTerm:     2,
285 | 			},
286 | 		},
287 | 		{name: "reject-by-two/stalelog-higherTerm",
288 | 			committers: []*LeaderId{nil, lid(2, 0), lid(0, 0)},
289 | 			votedFors:  []*LeaderId{nil, lid(2, 1), lid(5, 2)},
290 | 			logs:       [][]string{nil, nil, []string{"x=0"}},
291 | 			candidate:  lid(1, 0),
292 | 			want: wt{
293 | 				hasVoteReplies: false,
294 | 				err:            ErrStaleLog,
295 | 				higherTerm:     5,
296 | 			},
297 | 		},
298 | 		{name: "reject-by-two/higherTerm",
299 | 			votedFors: []*LeaderId{nil, lid(3, 1), lid(5, 2)},
300 | 			candidate: lid(1, 0),
301 | 			want: wt{
302 | 				hasVoteReplies: false,
303 | 				err:            ErrStaleTermId,
304 | 				higherTerm:     5,
305 | 			},
306 | 		},
307 | 	}
308 | 
309 | 	for _, c := range cases {
310 | 		withCluster(t, c.name,
311 | 			[]int64{0, 1, 2},
312 | 			func(t *testing.T, ts []*TRaft) {
313 | 				ta := require.New(t)
314 | 				for i, cmt := range c.committers {
315 | 					if cmt != nil {
316 | 						ts[i].Status[int64(i)].Committer = cmt
317 | 					}
318 | 				}
319 | 
320 | 				for i, v := range c.votedFors {
321 | 					if v != nil {
322 | 						ts[i].Status[int64(i)].VotedFor = v
323 | 					}
324 | 				}
325 | 
326 | 				for i, ls := range c.logs {
327 | 					for _, l := range ls {
328 | 						ts[i].addLogs(l)
329 | 					}
330 | 				}
331 | 
332 | 				voted, err, higher := ElectOnce(
333 | 					c.candidate,
334 | 					ExportLogStatus(ts[0].Status[0]),
335 | 					ts[0].Config.Clone(),
336 | 				)
337 | 
338 | 				if c.want.hasVoteReplies {
339 | 					ta.NotNil(voted)
340 | 				} else {
341 | 					ta.Nil(voted)
342 | 				}
343 | 				ta.Equal(c.want.err, errors.Cause(err))
344 | 				ta.Equal(c.want.higherTerm, higher)
345 | 			})
346 | 	}
347 | }
348 | 
349 | func TestTRaft_query(t *testing.T) {
350 | 
351 | 	ta := require.New(t)
352 | 
353 | 	ids := []int64{1}
354 | 	id1 := int64(1)
355 | 	lid := NewLeaderId
356 | 
357 | 	ts := newCluster(ids)
358 | 
359 | 	t1 := ts[0]
360 | 	t1.initTraft(lid(1, 2), lid(3, 4), []int64{5}, nil, nil, lid(2, id1))
361 | 
362 | 	startCluster(ts)
363 | 	defer stopAll(ts)
364 | 
365 | 	got := t1.query(func() interface{} {
366 | 		return ExportLogStatus(t1.Status[t1.Id])
367 | 	}).v.(*LogStatus)
368 | 	ta.Equal("001#002", got.Committer.ShortStr())
369 | 	ta.Equal("0:20", got.Accepted.ShortStr())
370 | }
371 | 
372 | func stopAll(ts []*TRaft) {
373 | 	for _, s := range ts {
374 | 		s.Stop()
375 | 	}
376 | }
377 | 
378 | func readMsg(ts []*TRaft) string {
379 | 
380 | 	// var msg string
381 | 	// select {
382 | 	// case msg = <-ts[0].MsgCh:
383 | 	// case msg = <-ts[1].MsgCh:
384 | 	// case msg = <-ts[2].MsgCh:
385 | 	// case <-time.After(time.Second):
386 | 	//     panic("timeout")
387 | 	// }
388 | 
389 | 	// n TRaft and a timeout
390 | 	cases := make([]reflect.SelectCase, len(ts)+1)
391 | 	for i, t := range ts {
392 | 		cases[i] = reflect.SelectCase{Dir: reflect.SelectRecv, Chan: reflect.ValueOf(t.MsgCh)}
393 | 	}
394 | 	cases[len(ts)] = reflect.SelectCase{Dir: reflect.SelectRecv, Chan: reflect.ValueOf(time.After(time.Second))}
395 | 
396 | 	chosen, value, ok := reflect.Select(cases)
397 | 	// ok will be true if the channel has not been closed.
398 | 	if chosen == len(ts) {
399 | 		panic("timeout")
400 | 	}
401 | 
402 | 	_ = ok
403 | 
404 | 	msg := value.String()
405 | 	return msg
406 | }
407 | 
408 | // waiting for expected message substring to present n times.
409 | func waitForMsg(ts []*TRaft, msgs map[string]int) {
410 | 	for {
411 | 		msg := readMsg(ts)
412 | 		for s, _ := range msgs {
413 | 			if strings.Contains(msg, s) {
414 | 				msgs[s]--
415 | 				lg.Infow("got-msg", "msg", msg)
416 | 			}
417 | 		}
418 | 
419 | 		all0 := true
420 | 		for _, n := range msgs {
421 | 			all0 = all0 && n == 0
422 | 		}
423 | 
424 | 		lg.Infow("require-msg", "msgs", msgs)
425 | 
426 | 		if all0 {
427 | 			return
428 | 		}
429 | 	}
430 | }
431 | 
432 | func waitForAnyMsg(ts []*TRaft, msgs []string, total int) []string {
433 | 
434 | 	rst := []string{}
435 | 
436 | 	for {
437 | 		msg := readMsg(ts)
438 | 		for _, s := range msgs {
439 | 			if strings.Contains(msg, s) {
440 | 				total--
441 | 				rst = append(rst, msg)
442 | 				lg.Infow("got-msg", "msg", msg)
443 | 			}
444 | 		}
445 | 
446 | 		lg.Infow("require-msg", "msgs", msgs, "total", total)
447 | 
448 | 		if total == 0 {
449 | 			return rst
450 | 		}
451 | 	}
452 | }
453 | 
454 | func findLeader(ts []*TRaft) int64 {
455 | 	votes := make([]int, len(ts))
456 | 	for i, t := range ts {
457 | 		id := t.Status[int64(i)].VotedFor.Id
458 | 		votes[id]++
459 | 		if votes[id] > len(ts)/2 {
460 | 			// TODO joint consensus
461 | 			return id
462 | 		}
463 | 	}
464 | 	return -1
465 | }
466 | 
467 | func TestTRaft_VoteLoop(t *testing.T) {
468 | 
469 | 	lid := NewLeaderId
470 | 	bm := NewTailBitmap
471 | 
472 | 	withCluster(t, "emptyVoters/candidate-1",
473 | 		[]int64{0, 1, 2},
474 | 		func(t *testing.T, ts []*TRaft) {
475 | 			ta := require.New(t)
476 | 
477 | 			ts[2].Stop()
478 | 
479 | 			go ts[0].ElectLoop()
480 | 
481 | 			waitForMsg(ts, map[string]int{
482 | 				"vote-win 001#000": 1,
483 | 			})
484 | 
485 | 			ta.Equal(lid(1, 0), ts[0].Status[0].VotedFor)
486 | 			ta.InDelta(uSecondI64()+leaderLease,
487 | 				ts[0].Status[0].VoteExpireAt, 1000*1000*1000)
488 | 
489 | 			ta.Equal(lid(1, 0), ts[1].Status[1].VotedFor)
490 | 			ta.InDelta(uSecondI64()+leaderLease,
491 | 				ts[1].Status[1].VoteExpireAt, 1000*1000*1000)
492 | 		})
493 | 
494 | 	withCluster(t, "emptyVoters/candidate-2",
495 | 		[]int64{0, 1, 2},
496 | 		func(t *testing.T, ts []*TRaft) {
497 | 			ta := require.New(t)
498 | 
499 | 			go ts[1].ElectLoop()
500 | 			waitForMsg(ts, map[string]int{
501 | 				"vote-win 001#001": 1,
502 | 			})
503 | 
504 | 			ta.Equal(lid(1, 1), ts[1].Status[1].VotedFor)
505 | 
506 | 			ta.InDelta(uSecondI64()+leaderLease,
507 | 				ts[1].Status[1].VoteExpireAt, 1000*1000*1000)
508 | 		})
509 | 
510 | 	withCluster(t, "emptyVoters/candidate-12",
511 | 		[]int64{0, 1, 2},
512 | 		func(t *testing.T, ts []*TRaft) {
513 | 
514 | 			go ts[0].ElectLoop()
515 | 			go ts[1].ElectLoop()
516 | 
517 | 			// only one succ to elect.
518 | 			// In 1 second, there wont be another winning election.
519 | 			waitForMsg(ts, map[string]int{
520 | 				"vote-win 001#001": 1,
521 | 			})
522 | 		})
523 | 
524 | 	withCluster(t, "emptyVoters/candidate-123",
525 | 		[]int64{0, 1, 2},
526 | 		func(t *testing.T, ts []*TRaft) {
527 | 
528 | 			ta := require.New(t)
529 | 
530 | 			go ts[0].ElectLoop()
531 | 			go ts[1].ElectLoop()
532 | 			go ts[2].ElectLoop()
533 | 
534 | 			// only one succ to elect.
535 | 			// In 1 second, there wont be another winning election.
536 | 			got := waitForAnyMsg(ts, []string{
537 | 				"vote-win",
538 | 				"vote-fail",
539 | 			}, 3)
540 | 
541 | 			winner := findLeader(ts)
542 | 
543 | 			ta.Contains(strings.Join(got, ";"),
544 | 				fmt.Sprintf("Id=%d vote-win", winner))
545 | 		})
546 | 
547 | 	withCluster(t, "id2MaxCommitter",
548 | 		[]int64{0, 1, 2},
549 | 		func(t *testing.T, ts []*TRaft) {
550 | 			ts[0].initTraft0(lid(2, 1), lid(4, 0), "x=1")
551 | 			ts[1].initTraft0(lid(3, 2), lid(4, 1), "x=1")
552 | 			ts[2].initTraft0(lid(1, 3), lid(4, 2), "x=1")
553 | 
554 | 			go ts[0].ElectLoop()
555 | 			go ts[1].ElectLoop()
556 | 			go ts[2].ElectLoop()
557 | 
558 | 			// only one succ to elect.
559 | 			// In 1 second, there wont be another winning election.
560 | 			waitForMsg(ts, map[string]int{
561 | 				"vote-win 005#001": 1,
562 | 				"vote-fail":        2,
563 | 			})
564 | 		})
565 | 
566 | 	withCluster(t, "id1MaxLog",
567 | 		[]int64{0, 1, 2, 3, 4},
568 | 		func(t *testing.T, ts []*TRaft) {
569 | 			// we need 5 replica to collect different log from 2 replica
570 | 			ta := require.New(t)
571 | 			_ = ta
572 | 
573 | 			// R0 0.2      Committer: 2-0
574 | 			// R1 0...4    Committer: 3-1
575 | 			// R2 n..3     Committer: 1-2
576 | 			ts[0].initTraft(lid(2, 0), lid(1, 1), []int64{0, 2}, nil, nil, lid(4, 0))
577 | 			ts[1].initTraft(lid(3, 1), lid(1, 1), []int64{0, 4}, nil, nil, lid(4, 1))
578 | 			ts[2].initTraft(lid(1, 2), lid(2, 1), []int64{0, 3}, nil, []int64{0}, lid(4, 2))
579 | 			// ts[3].initTraft(lid(1, 2), lid(1, 1), []int64{0, 2, 3}, nil, nil, lid(0, 3))
580 | 			// ts[4].initTraft(lid(1, 2), lid(1, 1), []int64{0, 2, 3}, nil, nil, lid(0, 4))
581 | 
582 | 			ts[3].Stop()
583 | 			ts[4].Stop()
584 | 
585 | 			ts[1].Status[1].VotedFor = lid(3, 1)
586 | 			go ts[1].ElectLoop()
587 | 
588 | 			// only one succ to elect.
589 | 			// In 1 second, there wont be another winning election.
590 | 			waitForMsg(ts, map[string]int{
591 | 				"vote-win 005#001": 1,
592 | 			})
593 | 
594 | 			ta.Equal(
595 | 				join("[<001#001:000{set(x, 0)}-0→0>",
596 | 					"<>",
597 | 					"<>",
598 | 					"<>",
599 | 					"<001#001:004{set(x, 4)}-0→0>]"),
600 | 				RecordsShortStr(ts[1].Logs, ""),
601 | 			)
602 | 
603 | 			ta.Equal(lid(5, 1), ts[1].Status[1].Committer)
604 | 			ta.Equal(bm(0, 0, 4), ts[1].Status[1].Accepted)
605 | 			ta.Equal(bm(0), ts[1].Status[1].Committed)
606 | 
607 | 			ta.Equal(lid(2, 0), ts[1].Status[0].Committer)
608 | 			// using Equal to avoid comparison between nil and []int64{}
609 | 			ta.True(bm(0).Equal(ts[1].Status[0].Accepted))
610 | 			ta.True(bm(0).Equal(ts[1].Status[0].Committed))
611 | 
612 | 			ta.Equal(lid(1, 2), ts[1].Status[2].Committer)
613 | 			// reduced Accepted to Committed
614 | 			ta.Equal(bm(0, 0), ts[1].Status[2].Accepted)
615 | 			ta.Equal(bm(0, 0), ts[1].Status[2].Committed)
616 | 		})
617 | 
618 | 	withCluster(t, "id1LeaderMergeOverrides",
619 | 		[]int64{0, 1, 2},
620 | 		func(t *testing.T, ts []*TRaft) {
621 | 			ta := require.New(t)
622 | 			_ = ta
623 | 
624 | 			// R0 .1.3     Committer: 2-0; 3 overrides 1
625 | 			// R1          Committer: 3-1
626 | 			ts[0].initTraft0(lid(3, 1), lid(4, 1))
627 | 			ts[1].initTraft0(lid(3, 1), lid(4, 1))
628 | 			// ts[2].initTraft0(lid(1, 2), lid(4, 2))
629 | 
630 | 			ts[2].Stop()
631 | 
632 | 			ts[0].addLogs(nil, nil, nil, "x=1")
633 | 			ts[0].Logs[3].Overrides = bm(0, 1, 3)
634 | 			ts[0].Status[0].Accepted = bm(0, 1, 3)
635 | 
636 | 			ts[1].addLogs(nil, nil, nil, nil, "y=1")
637 | 			ts[1].Logs[4].Overrides = bm(0, 4)
638 | 			ts[1].Status[1].Accepted = bm(0, 4)
639 | 
640 | 			go ts[1].ElectLoop()
641 | 
642 | 			// only one succ to elect.
643 | 			// In 1 second, there wont be another winning election.
644 | 			waitForMsg(ts, map[string]int{
645 | 				"vote-win 005#001": 1,
646 | 			})
647 | 
648 | 			ta.Equal(
649 | 				join("[",
650 | 					"<>",
651 | 					"<>",
652 | 					"<>",
653 | 					"<004#001:003{set(x, 1)}-0:a→0>",
654 | 					"<004#001:004{set(y, 1)}-0:10→0>",
655 | 					"]"),
656 | 				RecordsShortStr(ts[1].Logs, ""),
657 | 			)
658 | 
659 | 			ta.Equal(lid(5, 1), ts[1].Status[1].Committer)
660 | 			ta.Equal(bm(0, 1, 3, 4), ts[1].Status[1].Accepted)
661 | 		})
662 | }
663 | 
664 | func TestTRaft_Propose(t *testing.T) {
665 | 
666 | 	lid := NewLeaderId
667 | 	bm := NewTailBitmap
668 | 
669 | 	sendPropose := func(addr string, xcmd interface{}) *ProposeReply {
670 | 		cmd := toCmd(xcmd)
671 | 		var reply *ProposeReply
672 | 		rpcTo(addr, func(cli TRaftClient, ctx context.Context) {
673 | 			var err error
674 | 			reply, err = cli.Propose(ctx, cmd)
675 | 			if err != nil {
676 | 				lg.Infow("err:", "err", err)
677 | 			}
678 | 		})
679 | 		return reply
680 | 	}
681 | 
682 | 	withCluster(t, "invalidLeader",
683 | 		[]int64{0, 1, 2},
684 | 		func(t *testing.T, ts []*TRaft) {
685 | 			ta := require.New(t)
686 | 
687 | 			ts[0].initTraft(lid(2, 0), lid(1, 1), []int64{}, nil, nil, lid(2, 0))
688 | 			ts[1].initTraft(lid(3, 1), lid(1, 1), []int64{}, nil, nil, lid(3, 1))
689 | 			ts[2].initTraft(lid(1, 2), lid(2, 1), []int64{}, nil, []int64{0}, lid(1, 2))
690 | 
691 | 			mems := ts[1].Config.Members
692 | 
693 | 			// no leader elected, not allow to propose
694 | 			reply := sendPropose(mems[1].Addr, NewCmdI64("foo", "x", 1))
695 | 			ta.Equal(&ProposeReply{
696 | 				OK:          false,
697 | 				Err:         "vote expired",
698 | 				OtherLeader: nil,
699 | 			}, reply)
700 | 
701 | 			// elect ts[1]
702 | 			go ts[1].ElectLoop()
703 | 
704 | 			waitForMsg(ts, map[string]int{
705 | 				"vote-win 004#001": 1,
706 | 			})
707 | 
708 | 			// send to non-leader replica:
709 | 			reply = sendPropose(mems[0].Addr, NewCmdI64("foo", "x", 1))
710 | 			ta.Equal(&ProposeReply{
711 | 				OK:          false,
712 | 				Err:         "I am not leader",
713 | 				OtherLeader: lid(4, 1)}, reply)
714 | 		})
715 | 
716 | 	withCluster(t, "succ",
717 | 		[]int64{0, 1, 2},
718 | 		func(t *testing.T, ts []*TRaft) {
719 | 
720 | 			ta := require.New(t)
721 | 
722 | 			ts[0].initTraft(lid(2, 0), lid(1, 1), []int64{}, nil, nil, lid(3, 0))
723 | 			ts[1].initTraft(lid(3, 1), lid(1, 1), []int64{}, nil, nil, lid(3, 1))
724 | 			ts[2].initTraft(lid(1, 2), lid(2, 1), []int64{}, nil, []int64{0}, lid(3, 2))
725 | 
726 | 			mems := ts[1].Config.Members
727 | 
728 | 			// elect ts[1]
729 | 			go ts[1].ElectLoop()
730 | 
731 | 			waitForMsg(ts, map[string]int{
732 | 				"vote-win 004#001": 1,
733 | 			})
734 | 
735 | 			// TODO check state of other replicas
736 | 
737 | 			// succ to propsoe
738 | 			reply := sendPropose(mems[1].Addr, "y=1")
739 | 			ta.Equal(&ProposeReply{OK: true}, reply)
740 | 
741 | 			ta.Equal(bm(1), ts[1].Status[1].Accepted)
742 | 			ta.Equal(bm(1), ts[1].Status[1].Committed)
743 | 			ta.Equal(
744 | 				join("[<004#001:000{set(y, 1)}-0:1→0>", "]"),
745 | 				RecordsShortStr(ts[1].Logs, ""),
746 | 			)
747 | 
748 | 			reply = sendPropose(mems[1].Addr, "y=2")
749 | 			ta.Equal(&ProposeReply{OK: true, OtherLeader: nil}, reply)
750 | 
751 | 			ta.Equal(bm(2), ts[1].Status[1].Accepted)
752 | 			ta.Equal(bm(2), ts[1].Status[1].Committed)
753 | 			ta.Equal(
754 | 				join("[<004#001:000{set(y, 1)}-0:1→0>",
755 | 					"<004#001:001{set(y, 2)}-0:3→0>",
756 | 					"]"),
757 | 				RecordsShortStr(ts[1].Logs, ""),
758 | 			)
759 | 
760 | 			reply = sendPropose(mems[1].Addr, "x=3")
761 | 			ta.Equal(&ProposeReply{OK: true, OtherLeader: nil}, reply)
762 | 
763 | 			ta.Equal(bm(3), ts[1].Status[1].Accepted)
764 | 			ta.Equal(
765 | 				join("[<004#001:000{set(y, 1)}-0:1→0>",
766 | 					"<004#001:001{set(y, 2)}-0:3→0>",
767 | 					"<004#001:002{set(x, 3)}-0:4→0>",
768 | 					"]"),
769 | 				RecordsShortStr(ts[1].Logs, ""),
770 | 			)
771 | 		})
772 | }
773 | 
774 | func TestTRaft_AddLog_nil(t *testing.T) {
775 | 
776 | 	ta := require.New(t)
777 | 
778 | 	id := int64(1)
779 | 	tr := NewTRaft(id, map[int64]string{id: "123"})
780 | 
781 | 	tr.addLogs("x=1", "y=1", nil, "x=1")
782 | 
783 | 	ta.Equal(join(
784 | 		"[<000#001:000{set(x, 1)}-0:1→0>",
785 | 		"<000#001:001{set(y, 1)}-0:2→0>",
786 | 		"<>",
787 | 		"<000#001:003{set(x, 1)}-0:9→0>]"), RecordsShortStr(tr.Logs, ""))
788 | }
789 | 
790 | func TestTRaft_AddLog(t *testing.T) {
791 | 
792 | 	ta := require.New(t)
793 | 
794 | 	id := int64(1)
795 | 	tr := NewTRaft(id, map[int64]string{id: "123"})
796 | 
797 | 	tr.AddLog(NewCmdI64("set", "x", 1))
798 | 	ta.Equal("[<000#001:000{set(x, 1)}-0:1→0>]", RecordsShortStr(tr.Logs))
799 | 
800 | 	tr.AddLog(NewCmdI64("set", "y", 1))
801 | 	ta.Equal(join(
802 | 		"[<000#001:000{set(x, 1)}-0:1→0>",
803 | 		"<000#001:001{set(y, 1)}-0:2→0>]"), RecordsShortStr(tr.Logs, ""))
804 | 
805 | 	tr.AddLog(NewCmdI64("set", "x", 1))
806 | 	ta.Equal(join(
807 | 		"[<000#001:000{set(x, 1)}-0:1→0>",
808 | 		"<000#001:001{set(y, 1)}-0:2→0>",
809 | 		"<000#001:002{set(x, 1)}-0:5→0>]"), RecordsShortStr(tr.Logs, ""))
810 | 
811 | 	varnames := "wxyz"
812 | 
813 | 	for i := 0; i < 67; i++ {
814 | 		vi := i % len(varnames)
815 | 		tr.AddLog(NewCmdI64("set", varnames[vi:vi+1], int64(i)))
816 | 	}
817 | 	l := len(tr.Logs)
818 | 	ta.Equal("<000#001:069{set(y, 66)}-0:2222222222222222:22→0>", tr.Logs[l-1].ShortStr())
819 | 
820 | 	// truncate some logs, then add another 67
821 | 	// To check Overrides and Depends
822 | 
823 | 	tr.LogOffset = 65
824 | 	tr.Logs = tr.Logs[65:]
825 | 
826 | 	for i := 0; i < 67; i++ {
827 | 		vi := i % len(varnames)
828 | 		tr.AddLog(NewCmdI64("set", varnames[vi:vi+1], 100+int64(i)))
829 | 	}
830 | 	l = len(tr.Logs)
831 | 	ta.Equal("<000#001:136{set(y, 166)}-64:1111111111111122:111→64:1>", tr.Logs[l-1].ShortStr())
832 | 
833 | }
834 | 


--------------------------------------------------------------------------------
/errors.go:
--------------------------------------------------------------------------------
 1 | package traft
 2 | 
 3 | import "github.com/pkg/errors"
 4 | 
 5 | var (
 6 | 	ErrStaleLog    = errors.New("local log is stale")
 7 | 	ErrStaleTermId = errors.New("local Term-Id is stale")
 8 | 	ErrTimeout     = errors.New("timeout")
 9 | 	ErrLeaderLost  = errors.New("leadership lost")
10 | 	ErrNeedElect   = errors.New("no leader found, need to elect")
11 | )
12 | 


--------------------------------------------------------------------------------
/gen-proto.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # protoc --proto_path=. --go_out=plugins=grpc:. traft.proto
 4 | # protoc --proto_path=. --gofast_out=plugins=grpc:. traft.proto
 5 | 
 6 | 
 7 | # go get github.com/gogo/protobuf/protoc-gen-gogofast
 8 | # go get github.com/gogo/protobuf/protoc-gen-gogofaster
 9 | # go get github.com/gogo/protobuf/protoc-gen-gogoslick
10 | 
11 | protoc -I=. \
12 |     -I=$GOPATH/src \
13 |     -I=$GOPATH/src/github.com/gogo/protobuf/protobuf \
14 |     --gogofaster_out=plugins=grpc:. \
15 |     traft.proto
16 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/openacid/traft
 2 | 
 3 | go 1.14
 4 | 
 5 | require (
 6 | 	github.com/gogo/protobuf v1.3.2
 7 | 	github.com/golang/protobuf v1.4.3
 8 | 	github.com/kr/pretty v0.2.1
 9 | 	github.com/openacid/low v0.1.22-0.20210209151724-95ca9483dbbb
10 | 	github.com/pkg/errors v0.9.1
11 | 	github.com/stretchr/testify v1.7.0
12 | 	go.uber.org/multierr v1.6.0 // indirect
13 | 	go.uber.org/zap v1.16.0
14 | 	google.golang.org/grpc v1.27.0
15 | 	google.golang.org/protobuf v1.25.0
16 | )
17 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
  1 | cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
  2 | github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
  3 | github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
  4 | github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
  5 | github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
  6 | github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
  7 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
  8 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
  9 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 10 | github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
 11 | github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
 12 | github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
 13 | github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
 14 | github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
 15 | github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
 16 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
 17 | github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg=
 18 | github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
 19 | github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
 20 | github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
 21 | github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
 22 | github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
 23 | github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
 24 | github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
 25 | github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
 26 | github.com/golang/protobuf v1.4.3 h1:JjCZWpVbqXDqFVmTfYWEVTMIYrL/NPdPSCHPJ0T/raM=
 27 | github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
 28 | github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
 29 | github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
 30 | github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
 31 | github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 32 | github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 33 | github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
 34 | github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
 35 | github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
 36 | github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
 37 | github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI=
 38 | github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
 39 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
 40 | github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
 41 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
 42 | github.com/openacid/errors v0.8.1/go.mod h1:GUQEJJOJE3W9skHm8E8Y4phdl2LLEN8iD7c5gcGgdx0=
 43 | github.com/openacid/low v0.1.22-0.20210209151724-95ca9483dbbb h1:II/fUVgcmT9iD94uquwb/pVUMbEm83SzYdeldAnTE/c=
 44 | github.com/openacid/low v0.1.22-0.20210209151724-95ca9483dbbb/go.mod h1:KbBlxORT7soCdBGWfYoUsipHkG4vRKgm54uaBf222co=
 45 | github.com/openacid/must v0.1.3/go.mod h1:luPiXCuJlEo3UUFQngVQokV0MPGryeYvtCbQPs3U1+I=
 46 | github.com/openacid/testkeys v0.1.7/go.mod h1:MfA7cACzBpbiwekivj8StqX0WIRmqlMsci1c37CA3Do=
 47 | github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 48 | github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 49 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 50 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 51 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 52 | github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
 53 | github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
 54 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 55 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 56 | github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
 57 | github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 58 | github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
 59 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 60 | github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 61 | github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 62 | go.uber.org/atomic v1.6.0 h1:Ezj3JGmsOnG1MoRWQkPBsKLe9DwWD9QeXzTRzzldNVk=
 63 | go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ=
 64 | go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw=
 65 | go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
 66 | go.uber.org/multierr v1.5.0 h1:KCa4XfM8CWFCpxXRGok+Q0SS/0XBhMDbHHGABQLvD2A=
 67 | go.uber.org/multierr v1.5.0/go.mod h1:FeouvMocqHpRaaGuG9EjoKcStLC43Zu/fmqdUMPcKYU=
 68 | go.uber.org/multierr v1.6.0 h1:y6IPFStTAIT5Ytl7/XYmHvzXQ7S3g/IeZW9hyZ5thw4=
 69 | go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU=
 70 | go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA=
 71 | go.uber.org/zap v1.16.0 h1:uFRZXykJGK9lLY4HtgSw44DnIcAM+kRBP7x5m+NpAOM=
 72 | go.uber.org/zap v1.16.0/go.mod h1:MA8QOfq0BHJwdXa996Y4dYkAqRKB8/1K1QMMZVaNZjQ=
 73 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 74 | golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 75 | golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 76 | golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 77 | golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 78 | golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
 79 | golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
 80 | golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
 81 | golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
 82 | golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
 83 | golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 84 | golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 85 | golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 86 | golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 87 | golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 88 | golang.org/x/net v0.0.0-20190311183353-d8887717615a h1:oWX7TPOiFAMXLq8o0ikBYfCJVlRHBcsciT5bXOrH628=
 89 | golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 90 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 91 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 92 | golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 93 | golang.org/x/net v0.0.0-20201021035429-f5854403a974 h1:IX6qOQeG5uLjB/hjjwjedwfjND0hgjPMMyO1RoIXQNI=
 94 | golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
 95 | golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 96 | golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 97 | golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 98 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 99 | golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
100 | golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
101 | golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
102 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
103 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
104 | golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
105 | golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
106 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
107 | golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
108 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
109 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
110 | golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
111 | golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
112 | golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
113 | golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
114 | golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
115 | golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
116 | golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
117 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
118 | golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
119 | golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
120 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
121 | golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
122 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
123 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
124 | google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
125 | google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
126 | google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
127 | google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
128 | google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 h1:+kGHl1aib/qcwaRi1CbqBZ1rk19r85MNUf8HaBghugY=
129 | google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
130 | google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
131 | google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
132 | google.golang.org/grpc v1.27.0 h1:rRYRFMVgRv6E0D70Skyfsr28tDXIuuPZyWGMPdMcnXg=
133 | google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
134 | google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
135 | google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
136 | google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
137 | google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
138 | google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
139 | google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
140 | google.golang.org/protobuf v1.23.0 h1:4MY060fB1DLGMB/7MBTLnwQUY6+F09GEiz6SsrNqyzM=
141 | google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
142 | google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
143 | google.golang.org/protobuf v1.25.0 h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c=
144 | google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
145 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
146 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
147 | gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
148 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
149 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
150 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
151 | honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
152 | honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
153 | honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
154 | 


--------------------------------------------------------------------------------
/internal_api.go:
--------------------------------------------------------------------------------
 1 | package traft
 2 | 
 3 | import "context"
 4 | 
 5 | func (tr *TRaft) Elect(ctx context.Context, req *ElectReq) (*ElectReply, error) {
 6 | 	var reply *ElectReply
 7 | 	rst := tr.query( func() error {
 8 | 		reply= tr.hdlElectReq(req)
 9 | 		return nil
10 | 	})
11 | 	_ = rst
12 | 	return reply,nil
13 | }
14 | 
15 | func (tr *TRaft) LogForward(ctx context.Context, req *LogForwardReq) (*LogForwardReply, error) {
16 | 
17 | 	// TODO: if a newer committer is seen, non-committed logs
18 | 	// can be sure to stale and should be cleaned.
19 | 
20 | 	var reply *LogForwardReply
21 | 	rst := tr.query( func() error {
22 | 		reply = tr.hdlLogForward(req)
23 | 		return nil
24 | 	})
25 | 	_ = rst
26 | 	return reply, nil
27 | }
28 | 
29 | func (tr *TRaft) Propose(ctx context.Context, cmd *Cmd) (*ProposeReply, error) {
30 | 
31 | 	finCh := make(chan *ProposeReply, 1)
32 | 
33 | 	rst := tr.query( func() error {
34 | 		tr.hdlPropose(cmd, finCh)
35 | 		return nil
36 | 	})
37 | 	_ = rst
38 | 
39 | 	lg.Infow("waitingFor:finCh")
40 | 	reply := <-finCh
41 | 	lg.Infow("got:finCh", "reply", reply)
42 | 
43 | 	return reply, nil
44 | }
45 | 


--------------------------------------------------------------------------------
/leaderid.go:
--------------------------------------------------------------------------------
 1 | package traft
 2 | 
 3 | import (
 4 | 	fmt "fmt"
 5 | 
 6 | 	proto "github.com/gogo/protobuf/proto"
 7 | )
 8 | 
 9 | func NewLeaderId(term, id int64) *LeaderId {
10 | 	return &LeaderId{
11 | 		Term: term,
12 | 		Id:   id,
13 | 	}
14 | }
15 | 
16 | // Compare two leader id and returns 1, 0 or -1 for greater, equal and less
17 | func (a *LeaderId) Cmp(b *LeaderId) int {
18 | 	if a == nil {
19 | 		a = &LeaderId{}
20 | 	}
21 | 	if b == nil {
22 | 		b = &LeaderId{}
23 | 	}
24 | 
25 | 	r := cmpI64(a.Term, b.Term)
26 | 	if r != 0 {
27 | 		return r
28 | 	}
29 | 
30 | 	return cmpI64(a.Id, b.Id)
31 | }
32 | 
33 | func (l *LeaderId) Clone() *LeaderId {
34 | 	return proto.Clone(l).(*LeaderId)
35 | }
36 | 
37 | func (l *LeaderId) ShortStr() string {
38 | 	if l == nil {
39 | 		return "000#000"
40 | 	}
41 | 	return fmt.Sprintf("%03d#%03d", l.Term, l.Id)
42 | }
43 | 


--------------------------------------------------------------------------------
/leaderid_test.go:
--------------------------------------------------------------------------------
 1 | package traft
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/stretchr/testify/require"
 7 | )
 8 | 
 9 | func TestLeaderId_Cmp(t *testing.T) {
10 | 
11 | 	ta := require.New(t)
12 | 
13 | 	cases := []struct {
14 | 		a, b *LeaderId
15 | 		want int
16 | 	}{
17 | 		{a: NewLeaderId(1, 1), b: NewLeaderId(1, 1), want: 0},
18 | 		{a: NewLeaderId(1, 2), b: NewLeaderId(1, 1), want: 1},
19 | 		{a: NewLeaderId(1, 0), b: NewLeaderId(1, 1), want: -1},
20 | 		{a: NewLeaderId(2, 0), b: NewLeaderId(1, 1), want: 1},
21 | 		{a: NewLeaderId(0, 0), b: NewLeaderId(1, 1), want: -1},
22 | 
23 | 		{a: NewLeaderId(0, 0), b: nil, want: 0},
24 | 		{a: nil, b: NewLeaderId(1, 1), want: -1},
25 | 		{a: nil, b: nil, want: 0},
26 | 	}
27 | 
28 | 	for i, c := range cases {
29 | 		got := c.a.Cmp(c.b)
30 | 		ta.Equal(c.want, got, "%d-th: case: %+v", i+1, c)
31 | 	}
32 | }
33 | 
34 | func TestLeaderId_Clone(t *testing.T) {
35 | 
36 | 	ta := require.New(t)
37 | 
38 | 	a := NewLeaderId(1, 2)
39 | 	b := a.Clone()
40 | 	a.Term = 3
41 | 	a.Id = 4
42 | 	ta.Equal(int64(1), b.Term)
43 | 	ta.Equal(int64(2), b.Id)
44 | }
45 | 


--------------------------------------------------------------------------------
/log.go:
--------------------------------------------------------------------------------
 1 | package traft
 2 | 
 3 | var (
 4 | 	emptyRecord = &LogRecord{}
 5 | )
 6 | 
 7 | // a func for test purpose only
 8 | func (tr *TRaft) addLogs(cmds ...interface{}) {
 9 | 	me := tr.Status[tr.Id]
10 | 	for _, cs := range cmds {
11 | 		cmd := toCmd(cs)
12 | 		r := tr.addLogInternal(cmd)
13 | 		me.Accepted.Union(r.Overrides)
14 | 	}
15 | }
16 | 
17 | // Only a established leader should use this func.
18 | // no lock protection, must be called from Loop()
19 | func (tr *TRaft) AddLog(cmd *Cmd) *LogRecord {
20 | 
21 | 	me := tr.Status[tr.Id]
22 | 
23 | 	if me.VotedFor.Id != tr.Id {
24 | 		panic("wtf")
25 | 	}
26 | 
27 | 	return tr.addLogInternal(cmd)
28 | }
29 | 
30 | func (tr *TRaft) GetLog(lsn int64) *LogRecord {
31 | 	idx := lsn - tr.LogOffset
32 | 	r := tr.Logs[idx]
33 | 	if r.Seq != lsn {
34 | 		panic("wtf")
35 | 	}
36 | 	return r
37 | }
38 | 
39 | func (tr *TRaft) addLogInternal(cmd *Cmd) *LogRecord {
40 | 
41 | 	me := tr.Status[tr.Id]
42 | 
43 | 	lsn := tr.LogOffset + int64(len(tr.Logs))
44 | 
45 | 	r := NewRecord(me.VotedFor.Clone(), lsn, cmd)
46 | 
47 | 	// find the first interfering record.
48 | 
49 | 	var i int
50 | 	for i = len(tr.Logs) - 1; i >= 0; i-- {
51 | 		prev := tr.Logs[i]
52 | 		if r.Interfering(prev) {
53 | 			r.Overrides = prev.Overrides.Clone()
54 | 			break
55 | 		}
56 | 	}
57 | 
58 | 	if i == -1 {
59 | 		// there is not a interfering record.
60 | 		r.Overrides = NewTailBitmap(0)
61 | 	}
62 | 
63 | 	r.Overrides.Set(lsn)
64 | 
65 | 	// all log I do not know must be executed in order.
66 | 	// Because I do not know of the intefering relations.
67 | 	r.Depends = NewTailBitmap(tr.LogOffset)
68 | 
69 | 	// reduce bitmap size by removing unknown logs
70 | 	r.Overrides.Union(NewTailBitmap(tr.LogOffset & ^63))
71 | 
72 | 	tr.Logs = append(tr.Logs, r)
73 | 
74 | 	return r
75 | }
76 | 


--------------------------------------------------------------------------------
/logforward.go:
--------------------------------------------------------------------------------
  1 | package traft
  2 | 
  3 | import (
  4 | 	"time"
  5 | 
  6 | 	"github.com/pkg/errors"
  7 | )
  8 | 
  9 | // the result of forwarding logs from leader to follower
 10 | type logForwardRst struct {
 11 | 	from  *ReplicaInfo
 12 | 	reply *LogForwardReply
 13 | 	err   error
 14 | }
 15 | 
 16 | // forward log from leader to follower concurrently
 17 | func (tr *TRaft) forwardLog(
 18 | 	committer *LeaderId,
 19 | 	config *Cluster,
 20 | 	logs []*LogRecord,
 21 | 	callback func(*logForwardRst),
 22 | ) {
 23 | 
 24 | 	lsns := []int64{logs[0].Seq, logs[len(logs)-1].Seq + 1}
 25 | 	lg.Infow("forward", "LSNs", lsns, "cmtr", committer)
 26 | 
 27 | 	req := &LogForwardReq{
 28 | 		Committer: committer,
 29 | 		Logs:      logs,
 30 | 	}
 31 | 
 32 | 	id := tr.Id
 33 | 
 34 | 	// TODO
 35 | 	timeout := time.Second
 36 | 	sess := rpcToAll(id, config, meth.LogForward, req, timeout)
 37 | 
 38 | 	for res := range sess.resCh {
 39 | 
 40 | 		lg.Infow("logforward:recv-reply", "res", res)
 41 | 
 42 | 		if sess.updateOKBitmap(res) {
 43 | 
 44 | 			rst := tr.query(func() error {
 45 | 				return tr.leaderUpdateCommitted(
 46 | 					committer, lsns,
 47 | 				)
 48 | 			})
 49 | 
 50 | 			if rst.err == nil {
 51 | 				lg.Infow("forward:a-quorum-done")
 52 | 				callback(&logForwardRst{})
 53 | 			} else {
 54 | 				// TODO let the root cause to generate the error
 55 | 				callback(&logForwardRst{
 56 | 					err: errors.Wrapf(rst.err, "forward"),
 57 | 				})
 58 | 			}
 59 | 			// LogForward does not cancel, try best to send logs to followers.
 60 | 			return
 61 | 		}
 62 | 	}
 63 | 
 64 | 	lg.Infow("forward:timeout", "cmtr", committer.ShortStr())
 65 | 	callback(&logForwardRst{
 66 | 		err: errors.Wrapf(ErrTimeout, "forward"),
 67 | 	})
 68 | }
 69 | 
 70 | // hdlLogForward handles LogForward request on a follower
 71 | // LogForward is similar to paxos-phase-2.
 72 | func (tr *TRaft) hdlLogForward(req *LogForwardReq) *LogForwardReply {
 73 | 	me := tr.Status[tr.Id]
 74 | 	now := uSecondI64()
 75 | 
 76 | 	lg.Infow("hdl-logforward", "req", req)
 77 | 	lg.Infow("hdl-logforward", "me", me)
 78 | 
 79 | 	cr := req.Committer.Cmp(me.VotedFor)
 80 | 
 81 | 	// If req.Committer > me.VotedFor, it is a valid leader too.
 82 | 	// It is safe to accept its log.
 83 | 	// This is a common optimization of paxos: an Acceptor accepts request if rnd >= lastrnd.
 84 | 	// See: https://blog.openacid.com/algo/paxos/#slide-42
 85 | 
 86 | 	if cr < 0 && now < me.VoteExpireAt {
 87 | 		lg.Infow("hdl-logforward: illegal committer",
 88 | 			"req.Commiter", req.Committer,
 89 | 			"me.VotedFor", me.VotedFor,
 90 | 			"me.VoteExpireAt-now", me.VoteExpireAt-now)
 91 | 
 92 | 		return &LogForwardReply{
 93 | 			OK:       false,
 94 | 			VotedFor: me.VotedFor.Clone(),
 95 | 		}
 96 | 	}
 97 | 
 98 | 	if cr > 0 {
 99 | 		me.VotedFor = req.Committer.Clone()
100 | 		me.VoteExpireAt = now + leaderLease
101 | 	}
102 | 
103 | 	// TODO apply req.Committed
104 | 
105 | 	cr = req.Committer.Cmp(me.Committer)
106 | 	if cr > 0 {
107 | 		lg.Infow("hdl-log-forward: newer committer",
108 | 			"req.Committer", req.Committer,
109 | 			"me.Committer", me.Committer,
110 | 		)
111 | 
112 | 		// if req.Committer is newer, discard all non-committed logs
113 | 		// Because non-committed local log may have been overridden by some new leader.
114 | 		me.Accepted = me.Committed.Clone()
115 | 
116 | 		i := len(tr.Logs) - 1
117 | 		for ; i >= 0; i-- {
118 | 			r := tr.Logs[i]
119 | 			if r.Empty() {
120 | 				continue
121 | 			}
122 | 
123 | 			if me.Accepted.Get(r.Seq) == 0 {
124 | 				tr.Logs[i] = &LogRecord{}
125 | 			}
126 | 		}
127 | 	}
128 | 
129 | 	// add new logs
130 | 
131 | 	for _, r := range req.Logs {
132 | 		lsn := r.Seq
133 | 		idx := lsn - tr.LogOffset
134 | 
135 | 		for int(idx) >= len(tr.Logs) {
136 | 			tr.Logs = append(tr.Logs, &LogRecord{})
137 | 		}
138 | 
139 | 		if me.Accepted.Get(lsn) != 0 {
140 | 			if !tr.Logs[idx].Empty() && !tr.Logs[idx].Equal(r) {
141 | 				panic("wtf")
142 | 			}
143 | 		}
144 | 		tr.Logs[idx] = r
145 | 
146 | 		me.Accepted.Union(r.Overrides)
147 | 
148 | 		lg.Infow("hdl-logforward", "accept-log", r)
149 | 		lg.Infow("hdl-logforward", "accepted", me.Accepted)
150 | 	}
151 | 
152 | 	// TODO refine me
153 | 	// remove empty logs at top
154 | 	for len(tr.Logs) > 0 {
155 | 		l := len(tr.Logs)
156 | 		if tr.Logs[l-1].Empty() {
157 | 			tr.Logs = tr.Logs[:l-1]
158 | 		} else {
159 | 			break
160 | 		}
161 | 	}
162 | 
163 | 	me.Committer = req.Committer.Clone()
164 | 
165 | 	me.UpdatedCommitted(req.Committer, req.Committed)
166 | 
167 | 	return &LogForwardReply{
168 | 		OK:        true,
169 | 		VotedFor:  me.VotedFor.Clone(),
170 | 		Accepted:  me.Accepted.Clone(),
171 | 		Committed: me.Committed.Clone(),
172 | 	}
173 | }
174 | 


--------------------------------------------------------------------------------
/logforward_test.go:
--------------------------------------------------------------------------------
  1 | package traft
  2 | 
  3 | import (
  4 | 	context "context"
  5 | 	fmt "fmt"
  6 | 	"testing"
  7 | 	"time"
  8 | 
  9 | 	"github.com/stretchr/testify/require"
 10 | )
 11 | 
 12 | func TestTRaft_LogForward(t *testing.T) {
 13 | 
 14 | 	ta := require.New(t)
 15 | 	_ = ta
 16 | 
 17 | 	lid := NewLeaderId
 18 | 	bm := NewTailBitmap
 19 | 
 20 | 	sendLogForward := func(addr string, req *LogForwardReq) *LogForwardReply {
 21 | 		var reply *LogForwardReply
 22 | 		rpcTo(addr, func(cli TRaftClient, ctx context.Context) {
 23 | 			var err error
 24 | 			reply, err = cli.LogForward(ctx, req)
 25 | 			if err != nil {
 26 | 				lg.Infow("sendLogForward:err", "err", err)
 27 | 			}
 28 | 		})
 29 | 		return reply
 30 | 	}
 31 | 
 32 | 	logs := []*LogRecord{
 33 | 		NewRecordOverride(lid(5,1), 0, toCmd("x=0"), nil),
 34 | 		NewRecordOverride(lid(5,1), 1, toCmd("y=1"), nil),
 35 | 		NewRecordOverride(lid(5,1), 2, toCmd("x=2"), bm(1)),
 36 | 	}
 37 | 
 38 | 	sec1k := int64(time.Second * 1000)
 39 | 	cases := []struct {
 40 | 		name     string
 41 | 		to       int64
 42 | 		votedFor *LeaderId
 43 | 		expire   int64
 44 | 
 45 | 		committer *LeaderId
 46 | 		logs      []*LogRecord
 47 | 		committed *TailBitmap
 48 | 
 49 | 		wantOK        bool
 50 | 		wantVotedFor  *LeaderId
 51 | 		wantAccepted  *TailBitmap
 52 | 		wantCommitted *TailBitmap
 53 | 		wantLogs      []string
 54 | 	}{
 55 | 		{"unmatchedCommitter",
 56 | 			0, lid(3, 0), sec1k,
 57 | 			lid(1, 2), logs[0:], nil,
 58 | 			false, lid(3, 0), nil, nil, nil,
 59 | 		},
 60 | 		{"accept/log2",
 61 | 			0, lid(3, 1), sec1k,
 62 | 			lid(3, 1), logs[2:], nil,
 63 | 			true, lid(3, 1), bm(0, 0, 2), bm(0),
 64 | 			[]string{
 65 | 				"<>",
 66 | 				"<>",
 67 | 				"<005#001:002{set(x, 2)}-0:5→0>",
 68 | 			},
 69 | 		},
 70 | 		{"accept/log12",
 71 | 			0, lid(3, 1), sec1k,
 72 | 			lid(3, 1), logs[1:], nil,
 73 | 			true, lid(3, 1), bm(3), bm(0),
 74 | 			[]string{
 75 | 				"<>",
 76 | 				"<005#001:001{set(y, 1)}-0:2→0>",
 77 | 				"<005#001:002{set(x, 2)}-0:5→0>",
 78 | 			},
 79 | 		},
 80 | 		{"accept/log12/overrideOld",
 81 | 			2, lid(3, 1), sec1k,
 82 | 			lid(3, 1), logs[1:], nil,
 83 | 			true, lid(3, 1), bm(3), bm(1),
 84 | 			[]string{
 85 | 				"<>",
 86 | 				"<005#001:001{set(y, 1)}-0:2→0>",
 87 | 				"<005#001:002{set(x, 2)}-0:5→0>",
 88 | 			},
 89 | 		},
 90 | 		{"accept/log12/mergeCommitted",
 91 | 			2, lid(3, 1), sec1k,
 92 | 			lid(3, 1), logs[1:], bm(0, 2, 3),
 93 | 			true, lid(3, 1), bm(3), bm(0, 0, 2),
 94 | 			[]string{
 95 | 				"<>",
 96 | 				"<005#001:001{set(y, 1)}-0:2→0>",
 97 | 				"<005#001:002{set(x, 2)}-0:5→0>",
 98 | 			},
 99 | 		},
100 | 		{"accept/log12/overrideVotedFor",
101 | 			2, lid(2, 1), sec1k,
102 | 			lid(3, 1), logs[1:], nil,
103 | 			true, lid(3, 1), bm(3), bm(1),
104 | 			[]string{
105 | 				"<>",
106 | 				"<005#001:001{set(y, 1)}-0:2→0>",
107 | 				"<005#001:002{set(x, 2)}-0:5→0>",
108 | 			},
109 | 		},
110 | 		{"accept/log12/overrideExpiredVotedFor",
111 | 			2, lid(2, 1), -sec1k,
112 | 			lid(3, 1), logs[1:], nil,
113 | 			true, lid(3, 1), bm(3), bm(1),
114 | 			[]string{
115 | 				"<>",
116 | 				"<005#001:001{set(y, 1)}-0:2→0>",
117 | 				"<005#001:002{set(x, 2)}-0:5→0>",
118 | 			},
119 | 		},
120 | 	}
121 | 
122 | 	for _, c := range cases {
123 | 
124 | 		withCluster(t,
125 | 			fmt.Sprintf("%d-to-%d/%s", 1, c.to, c.name),
126 | 			[]int64{0, 1, 2},
127 | 			func(t *testing.T, ts []*TRaft) {
128 | 				ta := require.New(t)
129 | 				_ = ta
130 | 
131 | 				ts[0].initTraft(lid(2, 0), lid(0, 1), []int64{}, nil, nil, lid(3, 0))
132 | 				ts[1].initTraft(lid(3, 1), lid(0, 1), []int64{}, nil, nil, lid(5, 1))
133 | 				ts[2].initTraft(lid(1, 2), lid(0, 1), []int64{}, nil, []int64{0}, lid(2, 2))
134 | 
135 | 				ts[0].addLogs()
136 | 				ts[1].addLogs("x=0", "y=1", "x=2")
137 | 				ts[2].addLogs("", "y=5")
138 | 
139 | 				dst := ts[c.to].Status[c.to]
140 | 				dst.VotedFor = c.votedFor
141 | 				dst.VoteExpireAt = uSecondI64() + c.expire
142 | 
143 | 				fmt.Println(ts[c.to].Node)
144 | 				ts[c.to].checkStatus()
145 | 
146 | 				addr := ts[1].Config.Members[c.to].Addr
147 | 				repl := sendLogForward(addr, &LogForwardReq{
148 | 					Committer: c.committer,
149 | 					Logs:      c.logs,
150 | 					Committed: c.committed,
151 | 				})
152 | 
153 | 				ta.Equal(c.wantOK, repl.OK)
154 | 
155 | 				ta.Equal(c.wantVotedFor, repl.VotedFor)
156 | 				if c.wantAccepted != nil {
157 | 					ta.Equal(c.wantAccepted, repl.Accepted.Normalize())
158 | 					ta.Equal(c.wantAccepted, dst.Accepted.Normalize())
159 | 				}
160 | 
161 | 				if c.wantCommitted != nil {
162 | 					ta.Equal(c.wantCommitted, repl.Committed.Normalize())
163 | 					ta.Equal(c.wantCommitted, dst.Committed.Normalize())
164 | 				}
165 | 
166 | 				if c.wantLogs != nil {
167 | 					ta.Equal("["+join(c.wantLogs...)+"]",
168 | 						RecordsShortStr(ts[c.to].Logs, ""))
169 | 				}
170 | 
171 | 			})
172 | 	}
173 | }
174 | 


--------------------------------------------------------------------------------
/logging.go:
--------------------------------------------------------------------------------
 1 | package traft
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 
 6 | 	"go.uber.org/zap"
 7 | )
 8 | 
 9 | var (
10 | 	llg = zap.NewNop()
11 | 	lg  *zap.SugaredLogger
12 | )
13 | 
14 | func initLogging() {
15 | 	// if os.Getenv("CLUSTER_DEBUG") != "" {
16 | 	// }
17 | 	var err error
18 | 	// llg, err = zap.NewProduction()
19 | 	llg, err = zap.NewDevelopment()
20 | 	if err != nil {
21 | 		panic(err)
22 | 	}
23 | 	lg = llg.Sugar()
24 | 
25 | 	// initZap()
26 | 
27 | }
28 | 
29 | func initZap() {
30 | 	rawJSON := []byte(`{
31 | 	  "level": "debug",
32 | 	  "encoding": "json",
33 | 	  "outputPaths": ["stdout", "/tmp/logs"],
34 | 	  "errorOutputPaths": ["stderr"],
35 | 	  "initialFields": {"foo": "bar"},
36 | 	  "encoderConfig": {
37 | 	    "messageKey": "message",
38 | 	    "levelKey": "level",
39 | 	    "levelEncoder": "lowercase"
40 | 	  }
41 | 	}`)
42 | 
43 | 	var cfg zap.Config
44 | 	if err := json.Unmarshal(rawJSON, &cfg); err != nil {
45 | 		panic(err)
46 | 	}
47 | 
48 | 	var err error
49 | 	llg, err = cfg.Build()
50 | 	if err != nil {
51 | 		panic(err)
52 | 	}
53 | 	defer llg.Sync()
54 | 
55 | 	llg.Info("logger construction succeeded")
56 | 
57 | 	lg = llg.Sugar()
58 | 
59 | }
60 | 


--------------------------------------------------------------------------------
/mainloop.go:
--------------------------------------------------------------------------------
 1 | package traft
 2 | 
 3 | import fmt "fmt"
 4 | 
 5 | type queryBody struct {
 6 | 	arg   interface{}
 7 | 	rstCh chan *queryRst
 8 | }
 9 | 
10 | type queryRst struct {
11 | 	v   interface{}
12 | 	err error
13 | }
14 | 
15 | // query the mainloop goroutine for something, by other goroutines, such as
16 | // update traft state or get some info.
17 | func (tr *TRaft) query(arg interface{}) *queryRst {
18 | 	rstCh := make(chan *queryRst)
19 | 	tr.actionCh <- &queryBody{arg, rstCh}
20 | 	rst := <-rstCh
21 | 	lg.Infow("chan-query",
22 | 		// "arg", arg,
23 | 		"rst.err", rst.err,
24 | 		"rst.v", toStr(rst.v))
25 | 	return rst
26 | }
27 | 
28 | // Loop handles actions from other components.
29 | // This is the only goroutine that is allowed to update traft state.
30 | // Any info to send out of this goroutine must be cloned.
31 | func (tr *TRaft) Loop() {
32 | 
33 | 	for {
34 | 		select {
35 | 		case <-tr.shutdown:
36 | 			return
37 | 		case a := <-tr.actionCh:
38 | 
39 | 			tr.checkStatus()
40 | 
41 | 			switch f := a.arg.(type) {
42 | 			case func() error:
43 | 				err := f()
44 | 				a.rstCh <- &queryRst{err: err}
45 | 			case func() interface{}:
46 | 				v := f()
47 | 				a.rstCh <- &queryRst{v: v}
48 | 			default:
49 | 				panic("unknown func signature:" + fmt.Sprintf("%v", a.arg))
50 | 			}
51 | 
52 | 			tr.checkStatus()
53 | 		}
54 | 	}
55 | }
56 | 
57 | // checkStatus checks if TRaft status violate consistency requirement.
58 | // This is just a routine for debug.
59 | func (tr *TRaft) checkStatus() {
60 | 	id := tr.Id
61 | 	me := tr.Status[id]
62 | 
63 | 	// committer can never greater than voted leader
64 | 	if me.Committer.Cmp(me.VotedFor) > 0 {
65 | 		panic(
66 | 			fmt.Sprintf("Commiter > VotedFor: Id:%d %s %s",
67 | 				id,
68 | 				me.Committer.ShortStr(),
69 | 				me.VotedFor.ShortStr(),
70 | 			))
71 | 	}
72 | }
73 | 


--------------------------------------------------------------------------------
/propose.go:
--------------------------------------------------------------------------------
 1 | package traft
 2 | 
 3 | // request sent to Loop() to propose a cmd
 4 | type proposeReq struct {
 5 | 	cmd   *Cmd
 6 | 	finCh chan *ProposeReply
 7 | }
 8 | 
 9 | func (tr *TRaft) hdlPropose(cmd *Cmd, finCh chan<- *ProposeReply) {
10 | 	id := tr.Id
11 | 	me := tr.Status[id]
12 | 	now := uSecondI64()
13 | 
14 | 	if now > me.VoteExpireAt {
15 | 		lg.Infow("hdl-propose:VoteExpired", "me.VoteExpireAt-now", me.VoteExpireAt-now)
16 | 		// no valid leader for now
17 | 		finCh <- &ProposeReply{
18 | 			OK:  false,
19 | 			Err: "vote expired",
20 | 		}
21 | 		lg.Infow("hdl-propose: returning")
22 | 		return
23 | 	}
24 | 
25 | 	if me.VotedFor.Id != id {
26 | 		finCh <- &ProposeReply{
27 | 			OK:          false,
28 | 			Err:         "I am not leader",
29 | 			OtherLeader: me.VotedFor.Clone(),
30 | 		}
31 | 		return
32 | 	}
33 | 
34 | 	rec := tr.AddLog(cmd)
35 | 	lg.Infow("hdl-propose:added-rec", "rec", rec.ShortStr(), "rec.Overrides:", rec.Overrides.DebugStr())
36 | 
37 | 	me.Accepted.Union(rec.Overrides)
38 | 
39 | 	go tr.forwardLog(
40 | 		me.VotedFor.Clone(),
41 | 		tr.Config.Clone(),
42 | 		[]*LogRecord{rec},
43 | 		func(rst *logForwardRst) {
44 | 			if rst.err != nil {
45 | 				finCh <- &ProposeReply{
46 | 					OK:  false,
47 | 					Err: rst.err.Error(),
48 | 				}
49 | 			} else {
50 | 				finCh <- &ProposeReply{
51 | 					OK: true,
52 | 				}
53 | 			}
54 | 		})
55 | }
56 | 


--------------------------------------------------------------------------------
/quorum.go:
--------------------------------------------------------------------------------
 1 | package traft
 2 | 
 3 | import "math/bits"
 4 | 
 5 | func buildMajorityQuorums(mask uint64) []uint64 {
 6 | 	rst := make([]uint64, 0)
 7 | 	major := bits.OnesCount64(mask)/2 + 1
 8 | 	for i := uint64(0); i <= mask; i++ {
 9 | 		if i&mask == i && bits.OnesCount64(i) == major {
10 | 			rst = append(rst, i)
11 | 		}
12 | 	}
13 | 	return rst
14 | }
15 | 


--------------------------------------------------------------------------------
/quorum_test.go:
--------------------------------------------------------------------------------
 1 | package traft
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"math/bits"
 6 | 	"testing"
 7 | 
 8 | 	"github.com/stretchr/testify/require"
 9 | )
10 | 
11 | func TestBuildMajorityQuorums(t *testing.T) {
12 | 
13 | 	ta := require.New(t)
14 | 
15 | 	cases := []struct {
16 | 		input uint64
17 | 		want  []string
18 | 	}{
19 | 		{
20 | 			input: 7,
21 | 			want: []string{
22 | 				"11000000",
23 | 				"10100000",
24 | 				"01100000",
25 | 			},
26 | 		},
27 | 		{
28 | 			input: 1 + 1<<2 + 1<<3,
29 | 			want: []string{
30 | 				"10100000",
31 | 				"10010000",
32 | 				"00110000",
33 | 			},
34 | 		},
35 | 		{
36 | 			input: 1 + 1<<3 + 1<<4,
37 | 			want: []string{
38 | 				"10010000",
39 | 				"10001000",
40 | 				"00011000",
41 | 			},
42 | 		},
43 | 		{
44 | 			input: 1<<2 + 1<<3 + 1<<4 + 1<<5,
45 | 			want: []string{
46 | 				"00111000",
47 | 				"00110100",
48 | 				"00101100",
49 | 				"00011100",
50 | 			},
51 | 		},
52 | 	}
53 | 
54 | 	for i, c := range cases {
55 | 		got := buildMajorityQuorums(c.input)
56 | 		gotStr := fmtBitmap(got)
57 | 		ta.Equal(c.want, gotStr, "%d-th: case: %+v", i+1, c)
58 | 	}
59 | }
60 | 
61 | func fmtBitmap(vs []uint64) []string {
62 | 	rst := make([]string, 0)
63 | 	for _, v := range vs {
64 | 		rst = append(rst, fmt.Sprintf("%08b", bits.Reverse8(byte(v))))
65 | 	}
66 | 	return rst
67 | }
68 | 


--------------------------------------------------------------------------------
/record.go:
--------------------------------------------------------------------------------
 1 | package traft
 2 | 
 3 | import (
 4 | 	fmt "fmt"
 5 | 	"strings"
 6 | )
 7 | 
 8 | // NewRecord: without Overrides yet!!! TODO
 9 | func NewRecord(leader *LeaderId, seq int64, cmd *Cmd) *LogRecord {
10 | 
11 | 	rec := &LogRecord{
12 | 		Author: leader,
13 | 		Seq:    seq,
14 | 		Cmd:    cmd,
15 | 	}
16 | 
17 | 	return rec
18 | }
19 | 
20 | func NewRecordOverride(leader *LeaderId, seq int64, cmd *Cmd, override *TailBitmap) *LogRecord {
21 | 
22 | 	rec := NewRecord(leader,seq,cmd)
23 | 	rec.Overrides = NewTailBitmap(0, seq)
24 | 	rec.Overrides.Union(override)
25 | 
26 | 	return rec
27 | }
28 | 
29 | // gogoproto would panic if a []*LogRecord has a nil in it.
30 | // Thus we use r.Cmd == nil  to indicate an absent log record.
31 | func (r *LogRecord) Empty() bool {
32 | 	return r == nil || r.Cmd == nil
33 | }
34 | 
35 | func (a *LogRecord) Interfering(b *LogRecord) bool {
36 | 	if a == nil || b == nil {
37 | 		return false
38 | 	}
39 | 
40 | 	return a.Cmd.Interfering(b.Cmd)
41 | }
42 | 
43 | func (r *LogRecord) ShortStr() string {
44 | 	if r.Empty() {
45 | 		return "<>"
46 | 	}
47 | 
48 | 	return fmt.Sprintf("<%s:%03d{%s}-%s→%s>",
49 | 		r.Author.ShortStr(),
50 | 		r.Seq,
51 | 		r.Cmd.ShortStr(),
52 | 		r.Overrides.ShortStr(),
53 | 		r.Depends.ShortStr(),
54 | 	)
55 | }
56 | 
57 | func RecordsShortStr(rs []*LogRecord, sep ...string) string {
58 | 	s := ", "
59 | 	if len(sep) > 0 {
60 | 		s = sep[0]
61 | 	}
62 | 	rst := []string{}
63 | 	for _, r := range rs {
64 | 		rst = append(rst, r.ShortStr())
65 | 	}
66 | 	return "[" + strings.Join(rst, s) + "]"
67 | 
68 | }
69 | 


--------------------------------------------------------------------------------
/record_test.go:
--------------------------------------------------------------------------------
 1 | package traft
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/stretchr/testify/require"
 7 | )
 8 | 
 9 | func TestNewRecord(t *testing.T) {
10 | 
11 | 	ta := require.New(t)
12 | 
13 | 	ta.Equal(&LogRecord{
14 | 		Author: &LeaderId{1, 2},
15 | 		Seq:    3,
16 | 		Cmd: &Cmd{
17 | 			Op:    "foo",
18 | 			Key:   "key",
19 | 			Value: &Cmd_VI64{4},
20 | 		},
21 | 	}, NewRecord(NewLeaderId(1, 2), 3, NewCmdI64("foo", "key", 4)))
22 | 
23 | }
24 | 
25 | func TestRecord_Interfering(t *testing.T) {
26 | 
27 | 	ta := require.New(t)
28 | 
29 | 	lid := NewLeaderId
30 | 	cmd := NewCmdI64
31 | 
32 | 	cases := []struct {
33 | 		a, b *LogRecord
34 | 		want bool
35 | 	}{
36 | 		{nil, nil, false},
37 | 		{nil, NewRecord(lid(0, 1), 0, cmd("bar", "x", 1)), false},
38 | 		{NewRecord(lid(0, 1), 0, cmd("foo", "x", 1)), NewRecord(lid(0, 1), 0, nil), false},
39 | 		{NewRecord(lid(0, 1), 0, cmd("foo", "x", 1)), NewRecord(lid(0, 1), 0, cmd("bar", "x", 1)), false},
40 | 		{NewRecord(lid(0, 1), 0, cmd("set", "x", 1)), NewRecord(lid(0, 1), 0, cmd("set", "y", 1)), false},
41 | 		{NewRecord(lid(0, 1), 0, cmd("set", "x", 1)), NewRecord(lid(0, 1), 0, cmd("set", "x", 1)), true},
42 | 	}
43 | 
44 | 	for i, c := range cases {
45 | 		ta.Equal(c.want, c.a.Interfering(c.b), "%d-th: case: %+v", i+1, c)
46 | 		ta.Equal(c.want, c.b.Interfering(c.a), "%d-th: case: %+v", i+1, c)
47 | 	}
48 | }
49 | 


--------------------------------------------------------------------------------
/replicastatus.go:
--------------------------------------------------------------------------------
 1 | package traft
 2 | 
 3 | // newStatusAcc creates a ReplicaStatus with only accepted fields inited.
 4 | // Mostly for test purpose only.
 5 | func newStatusAcc(aterm, aid, lsn int64) *ReplicaStatus {
 6 | 	acc := NewTailBitmap((lsn + 1) &^ 63)
 7 | 	if (lsn+1)&63 != 0 {
 8 | 		acc.Words = append(acc.Words, 1<<uint(lsn&63))
 9 | 	}
10 | 	return &ReplicaStatus{
11 | 		Committer: NewLeaderId(aterm, aid),
12 | 		Accepted:  acc,
13 | 	}
14 | }
15 | 
16 | 
17 | func emptyReplicaStatus(id int64) *ReplicaStatus {
18 | 	return &ReplicaStatus{
19 | 		// initially it votes for itself with term 0
20 | 		VotedFor:  NewLeaderId(0, id),
21 | 		Committer: nil,
22 | 		Accepted:  NewTailBitmap(0),
23 | 		Committed: NewTailBitmap(0),
24 | 		Applied:   NewTailBitmap(0),
25 | 	}
26 | }
27 | 
28 | 
29 | type logStater interface {
30 | 	GetCommitter() *LeaderId
31 | 	GetAccepted() *TailBitmap
32 | }
33 | 
34 | func CmpLogStatus(a, b logStater) int {
35 | 	r := a.GetCommitter().Cmp(b.GetCommitter())
36 | 	if r != 0 {
37 | 		return r
38 | 	}
39 | 
40 | 	return cmpI64(a.GetAccepted().Len(), b.GetAccepted().Len())
41 | }
42 | 
43 | func ExportLogStatus(ls logStater) *LogStatus {
44 | 	return &LogStatus{
45 | 		Committer: ls.GetCommitter().Clone(),
46 | 		Accepted:  ls.GetAccepted().Clone(),
47 | 	}
48 | }
49 | 
50 | // CmpAccepted compares log related fields with another ballot.
51 | // I.e. Committer and MaxLogSeq.
52 | func (a *ReplicaStatus) CmpAccepted(b *ReplicaStatus) int {
53 | 	return CmpLogStatus(a, b)
54 | }
55 | 
56 | // If I have a log from a leader and the leader has committed it.
57 | // I commit it too.
58 | func (a *ReplicaStatus) UpdatedCommitted(committer *LeaderId, committed *TailBitmap) {
59 | 	if committer.Cmp(a.Committer) != 0 {
60 | 		panic("can not accept committed from non-leader committer")
61 | 	}
62 | 
63 | 	update := a.Accepted.Clone()
64 | 	update.Intersection(committed)
65 | 	a.Committed.Union(update)
66 | }
67 | 


--------------------------------------------------------------------------------
/replicastatus_test.go:
--------------------------------------------------------------------------------
 1 | package traft
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/stretchr/testify/require"
 7 | )
 8 | 
 9 | func Test_newStatusAcc(t *testing.T) {
10 | 
11 | 	ta := require.New(t)
12 | 
13 | 	got := newStatusAcc(3, 4, 5)
14 | 
15 | 	ta.Equal(int64(3), got.Committer.Term)
16 | 	ta.Equal(int64(4), got.Committer.Id)
17 | 
18 | 	ta.Equal(int64(6), got.Accepted.Len())
19 | }
20 | 
21 | func TestReplicaStatus_CmpAccepted(t *testing.T) {
22 | 
23 | 	ta := require.New(t)
24 | 
25 | 	cases := []struct {
26 | 		a, b *ReplicaStatus
27 | 		want int
28 | 	}{
29 | 		{a: newStatusAcc(1, 1, 1), b: newStatusAcc(1, 1, 1), want: 0},
30 | 		{a: newStatusAcc(1, 1, 2), b: newStatusAcc(1, 1, 1), want: 1},
31 | 		{a: newStatusAcc(1, 2, 0), b: newStatusAcc(1, 1, 1), want: 1},
32 | 		{a: newStatusAcc(2, 0, 0), b: newStatusAcc(1, 1, 1), want: 1},
33 | 	}
34 | 
35 | 	for i, c := range cases {
36 | 		ta.Equal(c.want, c.a.CmpAccepted(c.b), "%d-th: case: %+v", i+1, c)
37 | 		ta.Equal(-c.want, c.b.CmpAccepted(c.a), "%d-th: case: %+v", i+1, c)
38 | 	}
39 | }
40 | 


--------------------------------------------------------------------------------
/rpc.go:
--------------------------------------------------------------------------------
  1 | package traft
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"sync/atomic"
  6 | 	"time"
  7 | 
  8 | 	"github.com/gogo/protobuf/proto"
  9 | 	"github.com/pkg/errors"
 10 | 	"google.golang.org/grpc"
 11 | )
 12 | 
 13 | var (
 14 | 	meth = struct {
 15 | 		LogForward, Elect, Propose string
 16 | 	}{
 17 | 		LogForward: "LogForward",
 18 | 		Elect:      "Elect",
 19 | 		Propose:    "Propose",
 20 | 	}
 21 | )
 22 | 
 23 | // rpcResult is a container of rpc reply and other supporting info.
 24 | type rpcResult struct {
 25 | 	ri     ReplicaInfo
 26 | 	addr   string
 27 | 	method string
 28 | 
 29 | 	reply interface{}
 30 | 	err   error
 31 | 
 32 | 	quorum int32
 33 | }
 34 | 
 35 | // rpcSession is a session of RPCs to all members in a cluster except the sender.
 36 | type rpcSession struct {
 37 | 	// context for all rpc
 38 | 	ctx context.Context
 39 | 
 40 | 	// call it if we have collected enough response and do not need to wait for
 41 | 	// other rpc replies
 42 | 	cancel context.CancelFunc
 43 | 
 44 | 	// the cluster to send rpc to.
 45 | 	// cluster must not be modified by other goroutine.
 46 | 	cluster *Cluster
 47 | 
 48 | 	// the method name, one of "Elect", "LogForward" and "Propose"
 49 | 	method string
 50 | 
 51 | 	// the request body
 52 | 	req proto.Message
 53 | 
 54 | 	// a channel to receive responded replies.
 55 | 	resCh chan *rpcResult
 56 | 
 57 | 	// bitmap of peers that responded positive reply, i.e., reply responded, and
 58 | 	// the field "OK" is true.
 59 | 	// The bit position for a peer is ReplicaInfo.Position
 60 | 	okBitmap uint64
 61 | 
 62 | 	// count of unresponded peers
 63 | 	pending int64
 64 | }
 65 | type getOKer interface {
 66 | 	GetOK() bool
 67 | }
 68 | 
 69 | // return if quorum constituted.
 70 | func (s *rpcSession) updateOKBitmap(res *rpcResult) bool {
 71 | 	if res.reply.(getOKer).GetOK() {
 72 | 		s.okBitmap |= 1 << uint(res.ri.Position)
 73 | 	}
 74 | 	if s.cluster.IsQuorum(s.okBitmap) {
 75 | 		return true
 76 | 	}
 77 | 	return false
 78 | 
 79 | }
 80 | 
 81 | // send rpc to addr.
 82 | // TODO use a single loop to send to one replica
 83 | func rpcTo(addr string,
 84 | 	action func(TRaftClient, context.Context)) {
 85 | 
 86 | 	conn, err := grpc.Dial(addr, grpc.WithInsecure())
 87 | 	if err != nil {
 88 | 		// TODO check error
 89 | 		panic("wooooooh")
 90 | 	}
 91 | 	defer conn.Close()
 92 | 
 93 | 	cli := NewTRaftClient(conn)
 94 | 
 95 | 	ctx, cancel := context.WithTimeout(context.Background(), time.Second)
 96 | 	defer cancel()
 97 | 
 98 | 	action(cli, ctx)
 99 | }
100 | 
101 | func rpcToAll(
102 | 	id int64,
103 | 	cluster *Cluster,
104 | 	method string,
105 | 	req proto.Message,
106 | 	timeout time.Duration,
107 | ) *rpcSession {
108 | 
109 | 	ctx, cancel := context.WithTimeout(context.Background(), timeout)
110 | 
111 | 	ms := cluster.Members
112 | 
113 | 	sess := &rpcSession{
114 | 		ctx:    ctx,
115 | 		cancel: cancel,
116 | 
117 | 		cluster: cluster,
118 | 		method:  method,
119 | 		req:     req,
120 | 
121 | 		resCh: make(chan *rpcResult, len(ms)),
122 | 
123 | 		okBitmap: 1 << uint(ms[id].Position),
124 | 		pending:  int64(len(ms) - 1),
125 | 	}
126 | 
127 | 	for _, m := range cluster.Members {
128 | 		if m.Id == id {
129 | 			continue
130 | 		}
131 | 		go func(ri ReplicaInfo) {
132 | 			res := rpcToPeer(ri, sess)
133 | 			if res.err == nil {
134 | 				// if there is a non-business error, no need to send back result
135 | 				sess.resCh <- res
136 | 			}
137 | 
138 | 			// pending will be read by other goroutine thus must be read/written
139 | 			// atomically.
140 | 			pending := atomic.AddInt64(&sess.pending, -1)
141 | 
142 | 			lg.Infow("rpcToPeer", "pending", pending)
143 | 
144 | 			if pending == 0 {
145 | 				close(sess.resCh)
146 | 			}
147 | 		}(*m)
148 | 	}
149 | 
150 | 	return sess
151 | }
152 | 
153 | // rpcToPeer sends request and wait for the reply.
154 | // It also update essential info such as:
155 | // - pending: the N.O. unfinished rpcs.
156 | // - okBitmap: a bitmap indicates which peer responded a reply with OK=true.
157 | func rpcToPeer(ri ReplicaInfo, sess *rpcSession) *rpcResult {
158 | 
159 | 	res := &rpcResult{
160 | 		ri:     ri,
161 | 		addr:   ri.Addr,
162 | 		method: sess.method,
163 | 		reply:  nil,
164 | 		err:    nil,
165 | 	}
166 | 
167 | 	conn, err := grpc.Dial(ri.Addr, grpc.WithInsecure())
168 | 	if err != nil {
169 | 		lg.Infow("rpc-to", "addr", ri.Addr, "err", err)
170 | 		res.err = errors.Wrapf(err, "to %s", ri.Addr)
171 | 		return res
172 | 	}
173 | 	defer conn.Close()
174 | 
175 | 	res.reply = newReply(sess.method)
176 | 	res.err = conn.Invoke(sess.ctx, "/TRaft/"+sess.method, sess.req, res.reply)
177 | 
178 | 	if res.err != nil {
179 | 		lg.Infow("rpc-reply", "err", err)
180 | 		return res
181 | 	}
182 | 
183 | 	return res
184 | }
185 | 
186 | // newReply creates an empty reply structure by method name.
187 | // method name is one of the RPC func defined in traft.proto.
188 | func newReply(method string) proto.Message {
189 | 	switch method {
190 | 	case "Elect":
191 | 		return &ElectReply{}
192 | 	case "LogForward":
193 | 		return &LogForwardReply{}
194 | 	case "Propose":
195 | 		return &ProposeReply{}
196 | 	default:
197 | 		panic("unknown method:" + method)
198 | 	}
199 | }
200 | 
201 | // use check-and-swap loop to atomically set a bit in an uint64
202 | func casOrU64(addr *uint64, mask uint64) uint64 {
203 | 	for {
204 | 		oldV := atomic.LoadUint64(addr)
205 | 		newV := oldV | mask
206 | 		if atomic.CompareAndSwapUint64(addr, oldV, newV) {
207 | 			return newV
208 | 		}
209 | 	}
210 | }
211 | 


--------------------------------------------------------------------------------
/scripts/build_md.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | import os
 5 | import jinja2
 6 | import subprocess
 7 | 
 8 | def render_j2(tmpl_path, tmpl_vars, output_path):
 9 | 
10 |     def include_file(name):
11 |         return jinja2.Markup(loader.get_source(env, name)[0])
12 | 
13 |     loader = jinja2.FileSystemLoader(searchpath='./')
14 |     env = jinja2.Environment(loader=loader,
15 |                                       undefined=jinja2.StrictUndefined)
16 |     env.globals['include_file'] = include_file
17 |     template = env.get_template(tmpl_path)
18 | 
19 |     txt = template.render(tmpl_vars)
20 | 
21 |     with open(output_path, 'w') as f:
22 |         f.write(txt)
23 | 
24 | def command(cmd, *arguments, **options):
25 | 
26 |     close_fds = options.get('close_fds', True)
27 |     cwd = options.get('cwd', None)
28 |     shell = options.get('shell', False)
29 |     env = options.get('env', None)
30 |     if env is not None:
31 |         env = dict(os.environ, **env)
32 |     stdin = options.get('stdin', None)
33 | 
34 |     subproc = subprocess.Popen([cmd] + list(arguments),
35 |                                  close_fds=close_fds,
36 |                                  shell=shell,
37 |                                  cwd=cwd,
38 |                                  env=env,
39 |                                  encoding='utf-8',
40 |                                  stdin=subprocess.PIPE,
41 |                                  stdout=subprocess.PIPE,
42 |                                  stderr=subprocess.PIPE, )
43 | 
44 |     out, err = subproc.communicate(input=stdin)
45 | 
46 |     subproc.wait()
47 | 
48 |     if subproc.returncode != 0:
49 |         raise Exception(subproc.returncode, out, err)
50 | 
51 |     return out
52 | 
53 | if __name__ == "__main__":
54 |     pkg = command('go', 'list', '.')
55 |     name = pkg.strip().split('/')[-1]
56 |     tmpl_vars = {
57 |             "name": name
58 |     }
59 | 
60 |     for d, subdir, fns in os.walk('_tmpl'):
61 |         for fn in fns:
62 |             if not fn.endswith('.md.j2'):
63 |                 continue
64 | 
65 |             print(d, fn)
66 | 
67 |             dst = d + '/' + fn[:-3]
68 |             dst = '/'.join(os.path.split(dst)[1:])
69 |             print(dst)
70 |             render_j2(d + '/'+fn, tmpl_vars, dst)
71 | 


--------------------------------------------------------------------------------
/scripts/requirements.txt:
--------------------------------------------------------------------------------
1 | semantic_version==2.6.0
2 | jinja2==2.10.1
3 | PyYAML==5.1
4 | 


--------------------------------------------------------------------------------
/server.go:
--------------------------------------------------------------------------------
 1 | package traft
 2 | 
 3 | // TRaftServer impl
 4 | 
 5 | import (
 6 | 	"time"
 7 | )
 8 | 
 9 | var leaderLease = int64(time.Second * 1)
10 | 
11 | // init a TRaft for test, all logs are `set x=lsn`
12 | func (tr *TRaft) initTraft0(committer *LeaderId, votedFor *LeaderId, cmds...interface{}) {
13 | 	id := tr.Id
14 | 
15 | 	tr.Status[id].Committer = committer.Clone()
16 | 	tr.Status[id].VotedFor = votedFor.Clone()
17 | 
18 | 	tr.addLogs(cmds...)
19 | 
20 | 	tr.checkStatus()
21 | }
22 | 
23 | // init a TRaft for test, all logs are `set x=lsn`
24 | func (tr *TRaft) initTraft(
25 | 	// proposer of the logs
26 | 	committer *LeaderId,
27 | 	// author of the logs
28 | 	author *LeaderId,
29 | 	// log seq numbers to generate.
30 | 	lsns []int64,
31 | 	nilLogs map[int64]bool,
32 | 	committed []int64,
33 | 	votedFor *LeaderId,
34 | ) {
35 | 	id := tr.Id
36 | 
37 | 	tr.LogOffset, tr.Logs = buildPseudoLogs(author, lsns, nilLogs)
38 | 
39 | 	tr.Status[id].Committer = committer.Clone()
40 | 	tr.Status[id].Accepted = NewTailBitmap(0, lsns...)
41 | 
42 | 	if committed == nil {
43 | 		tr.Status[id].Committed = NewTailBitmap(0)
44 | 	} else {
45 | 		tr.Status[id].Committed = NewTailBitmap(0, committed...)
46 | 	}
47 | 
48 | 	tr.Status[id].VotedFor = votedFor.Clone()
49 | 
50 | 	tr.checkStatus()
51 | }
52 | 
53 | func buildPseudoLogs(
54 | 	// author of the logs
55 | 	author *LeaderId,
56 | 	// log seq numbers to generate.
57 | 	lsns []int64,
58 | 	nilLogs map[int64]bool,
59 | ) (int64, []*LogRecord) {
60 | 	logs := make([]*LogRecord, 0)
61 | 	if len(lsns) == 0 {
62 | 		return 0, logs
63 | 	}
64 | 
65 | 	last := lsns[len(lsns)-1]
66 | 	start := lsns[0]
67 | 	for i := start; i <= last; i++ {
68 | 		logs = append(logs, &LogRecord{})
69 | 	}
70 | 
71 | 	for _, lsn := range lsns {
72 | 		if nilLogs != nil && nilLogs[lsn] {
73 | 		} else {
74 | 			logs[lsn-start] = NewRecord(
75 | 				author.Clone(),
76 | 				lsn,
77 | 				NewCmdI64("set", "x", lsn))
78 | 		}
79 | 	}
80 | 	return start, logs
81 | }
82 | 
83 | 


--------------------------------------------------------------------------------
/str.go:
--------------------------------------------------------------------------------
 1 | package traft
 2 | 
 3 | import "fmt"
 4 | 
 5 | type shortStrer interface {
 6 | 	ShortStr() string
 7 | }
 8 | 
 9 | func toStr(v interface{}) string {
10 | 
11 | 	switch ss := v.(type) {
12 | 	case shortStrer:
13 | 		return ss.ShortStr()
14 | 	case fmt.Stringer:
15 | 		return ss.String()
16 | 	default:
17 | 		return fmt.Sprintf("%v", v)
18 | 
19 | 	}
20 | }
21 | 


--------------------------------------------------------------------------------
/tailbitmap.go:
--------------------------------------------------------------------------------
  1 | package traft
  2 | 
  3 | import (
  4 | 	fmt "fmt"
  5 | 	"math/bits"
  6 | 	"strings"
  7 | 
  8 | 	proto "github.com/gogo/protobuf/proto"
  9 | 	"github.com/openacid/low/bitmap"
 10 | 	"github.com/openacid/low/mathext/util"
 11 | )
 12 | 
 13 | // reclaimThreshold is the size threshold in bit for reclamation of `Words`.
 14 | var reclaimThreshold = int64(1024) * 64
 15 | 
 16 | // NewTailBitmap creates an TailBitmap with a preset Offset and an empty
 17 | // tail bitmap.
 18 | //
 19 | // Optional arg `set` specifies what bit to set to 1.
 20 | // The bit positions in `set` is absolute, NOT based on offset.
 21 | //
 22 | // Since 0.1.22
 23 | func NewTailBitmap(offset int64, set ...int64) *TailBitmap {
 24 | 	residual := offset & 63
 25 | 	tb := &TailBitmap{
 26 | 		Offset:   offset & ^63,
 27 | 		Reclamed: offset & ^63,
 28 | 		Words:    make([]uint64, 0, reclaimThreshold>>6),
 29 | 	}
 30 | 	if residual != 0 {
 31 | 		for i := int64(0); i < residual; i++ {
 32 | 			tb.Set(tb.Offset + i)
 33 | 		}
 34 | 	}
 35 | 	for _, pos := range set {
 36 | 		tb.Set(pos)
 37 | 	}
 38 | 	return tb
 39 | }
 40 | 
 41 | // Compact all leading all-ones words in the bitmap.
 42 | //
 43 | // Since 0.1.22
 44 | func (tb *TailBitmap) Compact() {
 45 | 
 46 | 	allOnes := uint64(0xffffffffffffffff)
 47 | 
 48 | 	for len(tb.Words) > 0 && tb.Words[0] == allOnes {
 49 | 		tb.Offset += 64
 50 | 		tb.Words = tb.Words[1:]
 51 | 	}
 52 | 
 53 | 	if tb.Offset-tb.Reclamed >= reclaimThreshold {
 54 | 		l := len(tb.Words)
 55 | 		newWords := make([]uint64, l, l*2)
 56 | 
 57 | 		copy(newWords, tb.Words)
 58 | 		tb.Reclamed = tb.Offset
 59 | 	}
 60 | }
 61 | 
 62 | // Set the bit at `idx` to `1`.
 63 | //
 64 | // Since 0.1.22
 65 | func (tb *TailBitmap) Set(idx int64) {
 66 | 	if idx < tb.Offset {
 67 | 		return
 68 | 	}
 69 | 
 70 | 	idx = idx - tb.Offset
 71 | 	wordIdx := idx >> 6
 72 | 
 73 | 	for int(wordIdx) >= len(tb.Words) {
 74 | 		tb.Words = append(tb.Words, 0)
 75 | 	}
 76 | 
 77 | 	tb.Words[wordIdx] |= bitmap.Bit[idx&63]
 78 | 
 79 | 	if wordIdx == 0 {
 80 | 		tb.Compact()
 81 | 	}
 82 | }
 83 | 
 84 | // Get retrieves a bit at its 64-based offset.
 85 | //
 86 | // Since 0.1.22
 87 | func (tb *TailBitmap) Get(idx int64) uint64 {
 88 | 	if idx < tb.Offset {
 89 | 		return bitmap.Bit[idx&63]
 90 | 	}
 91 | 
 92 | 	idx = idx - tb.Offset
 93 | 	if int(idx>>6) >= len(tb.Words) {
 94 | 		return 0
 95 | 	}
 96 | 	return tb.Words[idx>>6] & bitmap.Bit[idx&63]
 97 | }
 98 | 
 99 | // Get1 retrieves a bit and returns a 1-bit word, i.e., putting the bit in the
100 | // lowest bit.
101 | //
102 | // Since 0.1.22
103 | func (tb *TailBitmap) Get1(idx int64) uint64 {
104 | 	if idx < tb.Offset {
105 | 		return 1
106 | 	}
107 | 	idx = idx - tb.Offset
108 | 	return (tb.Words[idx>>6] >> uint(idx&63)) & 1
109 | }
110 | 
111 | func (tb *TailBitmap) Clone() *TailBitmap {
112 | 	return proto.Clone(tb).(*TailBitmap)
113 | }
114 | 
115 | func (tb *TailBitmap) Normalize() *TailBitmap {
116 | 	if tb.Words == nil {
117 | 		tb.Words = make([]uint64, 0)
118 | 	}
119 | 	return tb
120 | }
121 | 
122 | func (tb *TailBitmap) Union(tc *TailBitmap) {
123 | 
124 | 	if tc == nil {
125 | 		return
126 | 	}
127 | 
128 | 	lb := tb.Offset + int64(len(tb.Words)*64)
129 | 	lc := tc.Offset + int64(len(tc.Words)*64)
130 | 
131 | 	if tb.Offset >= lc {
132 | 		return
133 | 	}
134 | 
135 | 	if lb <= tc.Offset {
136 | 		tb.Offset = tc.Offset
137 | 		tb.Words = make([]uint64, len(tc.Words))
138 | 		copy(tb.Words, tc.Words)
139 | 
140 | 		// building a new Words reclames unused spaces in it.
141 | 		tb.Reclamed = tb.Offset
142 | 		return
143 | 	}
144 | 
145 | 	var ws []uint64
146 | 	if tb.Offset >= tc.Offset {
147 | 		delta := tb.Offset - tc.Offset
148 | 		ws = tc.Words[delta>>6:]
149 | 
150 | 	} else {
151 | 		// tb.Offset < tc.Offset
152 | 
153 | 		delta := tc.Offset - tb.Offset
154 | 		tb.Words = tb.Words[delta>>6:]
155 | 		tb.Offset = tc.Offset
156 | 		ws = tc.Words
157 | 	}
158 | 
159 | 	var i int
160 | 	for i = 0; i < len(tb.Words) && i < len(ws); i++ {
161 | 		tb.Words[i] |= ws[i]
162 | 	}
163 | 
164 | 	for ; i < len(ws); i++ {
165 | 		tb.Words = append(tb.Words, ws[i])
166 | 	}
167 | 
168 | 	tb.Compact()
169 | }
170 | 
171 | func (ta *TailBitmap) Intersection(tb *TailBitmap) {
172 | 
173 | 	if tb == nil {
174 | 		ta.Offset = 0
175 | 		ta.Words = make([]uint64, 0)
176 | 		ta.Reclamed = 0
177 | 		return
178 | 	}
179 | 
180 | 	la := ta.Offset + int64(len(ta.Words)*64)
181 | 	lb := tb.Offset + int64(len(tb.Words)*64)
182 | 
183 | 	// 1111 1111 xxxx
184 | 	// 1111 yyyy
185 | 	if ta.Offset >= lb {
186 | 		ta.Offset = tb.Offset
187 | 		ta.Words = make([]uint64, len(tb.Words))
188 | 		copy(ta.Words, tb.Words)
189 | 
190 | 		// building a new Words reclames unused spaces in it.
191 | 		ta.Reclamed = ta.Offset
192 | 		return
193 | 	}
194 | 
195 | 	// 1111 xxxx
196 | 	// 1111 1111 yyyy
197 | 	if la <= tb.Offset {
198 | 		return
199 | 	}
200 | 
201 | 	s := util.MinI64(ta.Offset, tb.Offset)
202 | 	e := util.MinI64(la, lb)
203 | 	ws := make([]uint64, (e-s)>>6)
204 | 	cur := int64(0)
205 | 	i := int64(0)
206 | 	j := int64(0)
207 | 	if ta.Offset >= tb.Offset {
208 | 		n := (ta.Offset - s) >> 6
209 | 		n = util.MinI64(n, (e-s)>>6)
210 | 		copy(ws, tb.Words[:n])
211 | 		cur += n
212 | 		j = n
213 | 	} else {
214 | 		n := (tb.Offset - s) >> 6
215 | 		n = util.MinI64(n, (e-s)>>6)
216 | 		copy(ws, ta.Words[:n])
217 | 		cur += n
218 | 		i = n
219 | 	}
220 | 
221 | 	for cur < int64(len(ws)) {
222 | 		ws[cur] = ta.Words[i] & tb.Words[j]
223 | 		cur++
224 | 		i++
225 | 		j++
226 | 	}
227 | 
228 | 	for len(ws) > 0 && ws[len(ws)-1] == 0 {
229 | 		ws = ws[:len(ws)-1]
230 | 	}
231 | 	ta.Offset = s
232 | 	ta.Words = ws
233 | 	ta.Reclamed = s
234 | }
235 | 
236 | // Diff AKA substraction A - B or  A \ B
237 | // TODO: This impl is wrong!!!
238 | func (tb *TailBitmap) Diff(tc *TailBitmap) {
239 | 
240 | 	if tc == nil {
241 | 		return
242 | 	}
243 | 
244 | 	lb := tb.Offset + int64(len(tb.Words)*64)
245 | 	lc := tc.Offset + int64(len(tc.Words)*64)
246 | 
247 | 	if lb <= tc.Offset {
248 | 		for i := 0; i < len(tb.Words); i++ {
249 | 			tb.Words[i] = ^tb.Words[i]
250 | 		}
251 | 		return
252 | 	}
253 | 
254 | 	if tb.Offset > lc {
255 | 		// 1111 1111 1111 xxxx xxxx
256 | 		// 1111 yyyy
257 | 		l := int((tb.Offset - tc.Offset) >> 6)
258 | 		words := make([]uint64, l+len(tb.Words))
259 | 		var i int
260 | 		for i = 0; i < l && i < len(tc.Words); i++ {
261 | 			words[i] = ^tc.Words[i]
262 | 		}
263 | 		for ; i < l; i++ {
264 | 			words[i] = 0xffffffffffffffff
265 | 		}
266 | 
267 | 		copy(words[i:], tb.Words)
268 | 		tb.Words = words
269 | 		tb.Offset = tc.Offset
270 | 		tb.Reclamed = tb.Offset
271 | 		return
272 | 	}
273 | 
274 | 	if tb.Offset <= tc.Offset {
275 | 		// 1111 1111 xxxx xxxx
276 | 		// 1111 1111 1111 yyyy yyyy
277 | 		delta := (tc.Offset - tb.Offset) >> 6
278 | 		var i int64
279 | 		for i = 0; i < delta; i++ {
280 | 			tb.Words[i] = 0
281 | 		}
282 | 		for ; i < int64(len(tb.Words)) && i < (lc-tb.Offset)>>6; i++ {
283 | 			tb.Words[i] &= ^tc.Words[i-delta]
284 | 		}
285 | 
286 | 	} else {
287 | 		// tb.Offset > tc.Offset
288 | 		// 1111 1111 xxxx xxxx
289 | 		// 1111 yyyy yyyy
290 | 
291 | 		delta := int((tb.Offset - tc.Offset) >> 6)
292 | 		words := make([]uint64, delta+len(tb.Words))
293 | 
294 | 		var i int
295 | 		for i = 0; i < delta; i++ {
296 | 			words[i] = ^tc.Words[i]
297 | 		}
298 | 		for ; i < len(words) && i < len(tc.Words); i++ {
299 | 			words[i] = tb.Words[i-delta] &^ tc.Words[i]
300 | 		}
301 | 		copy(words[i:], tb.Words[i-delta:])
302 | 
303 | 		tb.Words = words
304 | 		tb.Offset = tc.Offset
305 | 		tb.Reclamed = tb.Offset
306 | 	}
307 | }
308 | 
309 | // Last returns last set bit index + 1.
310 | func (tb *TailBitmap) Len() int64 {
311 | 
312 | 	r := len(tb.Words) - 1
313 | 	for ; r >= 0 && tb.Words[r] == 0; r-- {
314 | 	}
315 | 
316 | 	if r < 0 {
317 | 		// all Words are 0
318 | 		return tb.Offset
319 | 	}
320 | 
321 | 	return tb.Offset + int64(r+1)<<6 - int64(bits.LeadingZeros64(tb.Words[r]))
322 | }
323 | 
324 | func (tb *TailBitmap) ShortStr() string {
325 | 	if tb == nil {
326 | 		return "0"
327 | 	}
328 | 	s := []string{fmt.Sprintf("%d", tb.Offset)}
329 | 	for _, w := range tb.Words {
330 | 		s = append(s, fmt.Sprintf(":%x", w))
331 | 	}
332 | 
333 | 	return strings.Join(s, "")
334 | }
335 | 
336 | func (tb *TailBitmap) DebugStr() string {
337 | 	if tb == nil {
338 | 		return "0"
339 | 	}
340 | 	s := []string{fmt.Sprintf("%d", tb.Offset)}
341 | 	for _, w := range tb.Words {
342 | 		v := bitmap.Fmt(w)
343 | 		s = append(s, v)
344 | 	}
345 | 
346 | 	return strings.Join(s, ",")
347 | }
348 | 


--------------------------------------------------------------------------------
/tailbitmap_test.go:
--------------------------------------------------------------------------------
  1 | package traft
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 
  6 | 	"github.com/stretchr/testify/require"
  7 | )
  8 | 
  9 | func TestNewTailBitmap(t *testing.T) {
 10 | 
 11 | 	ta := require.New(t)
 12 | 
 13 | 	cases := []struct {
 14 | 		input int64
 15 | 		set   []int64
 16 | 		want  *TailBitmap
 17 | 	}{
 18 | 		{
 19 | 			input: 0,
 20 | 			want: &TailBitmap{
 21 | 				Offset:   0,
 22 | 				Words:    make([]uint64, 0, 1024),
 23 | 				Reclamed: 0,
 24 | 			},
 25 | 		},
 26 | 
 27 | 		// non-64 aligned offset
 28 | 		{
 29 | 			input: 64 + 3,
 30 | 			want: &TailBitmap{
 31 | 				Offset:   64,
 32 | 				Words:    []uint64{7},
 33 | 				Reclamed: 64,
 34 | 			},
 35 | 		},
 36 | 		{
 37 | 			input: 64 * 1025,
 38 | 			want: &TailBitmap{
 39 | 				Offset:   64 * 1025,
 40 | 				Words:    make([]uint64, 0, 1024),
 41 | 				Reclamed: 64 * 1025,
 42 | 			},
 43 | 		},
 44 | 		// with extra bits to set
 45 | 		{
 46 | 			input: 64 * 1,
 47 | 			set:   []int64{1, 64, 65},
 48 | 			want: &TailBitmap{
 49 | 				Offset:   64 * 1,
 50 | 				Words:    []uint64{3},
 51 | 				Reclamed: 64 * 1,
 52 | 			},
 53 | 		},
 54 | 	}
 55 | 
 56 | 	for i, c := range cases {
 57 | 		got := NewTailBitmap(c.input, c.set...)
 58 | 		ta.Equal(c.want, got, "%d-th: case: %+v", i+1, c)
 59 | 	}
 60 | }
 61 | 
 62 | func TestTailBitmap_Compact(t *testing.T) {
 63 | 
 64 | 	ta := require.New(t)
 65 | 
 66 | 	allOnes1024 := make([]uint64, 1024)
 67 | 	for i, _ := range allOnes1024 {
 68 | 		allOnes1024[i] = 0xffffffffffffffff
 69 | 	}
 70 | 
 71 | 	cases := []struct {
 72 | 		input *TailBitmap
 73 | 		want  *TailBitmap
 74 | 	}{
 75 | 		{
 76 | 			input: &TailBitmap{
 77 | 				Offset:   0,
 78 | 				Words:    []uint64{0xffffffffffffffff},
 79 | 				Reclamed: 0,
 80 | 			},
 81 | 			want: &TailBitmap{
 82 | 				Offset:   64,
 83 | 				Words:    []uint64{},
 84 | 				Reclamed: 0,
 85 | 			},
 86 | 		},
 87 | 		{
 88 | 			input: &TailBitmap{
 89 | 				Offset:   64,
 90 | 				Words:    []uint64{0xffffffffffffffff},
 91 | 				Reclamed: 0,
 92 | 			},
 93 | 			want: &TailBitmap{
 94 | 				Offset:   64 * 2,
 95 | 				Words:    []uint64{},
 96 | 				Reclamed: 0,
 97 | 			},
 98 | 		},
 99 | 		{
100 | 			input: &TailBitmap{
101 | 				Offset:   64,
102 | 				Words:    []uint64{0xffffffffffffffff, 1},
103 | 				Reclamed: 0,
104 | 			},
105 | 			want: &TailBitmap{
106 | 				Offset:   64 * 2,
107 | 				Words:    []uint64{1},
108 | 				Reclamed: 0,
109 | 			},
110 | 		},
111 | 		{
112 | 			input: &TailBitmap{
113 | 				Offset:   64,
114 | 				Words:    allOnes1024,
115 | 				Reclamed: 0,
116 | 			},
117 | 			want: &TailBitmap{
118 | 				Offset:   64 * 1025,
119 | 				Words:    []uint64{},
120 | 				Reclamed: 64 * 1025,
121 | 			},
122 | 		},
123 | 	}
124 | 
125 | 	for i, c := range cases {
126 | 		c.input.Compact()
127 | 		ta.Equal(c.want, c.input, "%d-th: case: %+v", i+1, c)
128 | 	}
129 | }
130 | 
131 | func TestTailBitmap_Set(t *testing.T) {
132 | 
133 | 	ta := require.New(t)
134 | 
135 | 	allOnes1024 := make([]uint64, 1024)
136 | 	for i, _ := range allOnes1024 {
137 | 		allOnes1024[i] = 0xffffffffffffffff
138 | 	}
139 | 
140 | 	cases := []struct {
141 | 		input *TailBitmap
142 | 		set   int64
143 | 		want  *TailBitmap
144 | 	}{
145 | 		{
146 | 			input: &TailBitmap{
147 | 				Offset:   0,
148 | 				Words:    []uint64{},
149 | 				Reclamed: 0,
150 | 			},
151 | 			set: 0,
152 | 			want: &TailBitmap{
153 | 				Offset:   0,
154 | 				Words:    []uint64{1},
155 | 				Reclamed: 0,
156 | 			},
157 | 		},
158 | 		{
159 | 			input: &TailBitmap{
160 | 				Offset:   64,
161 | 				Words:    []uint64{},
162 | 				Reclamed: 0,
163 | 			},
164 | 			set: 65,
165 | 			want: &TailBitmap{
166 | 				Offset:   64,
167 | 				Words:    []uint64{2},
168 | 				Reclamed: 0,
169 | 			},
170 | 		},
171 | 		{
172 | 			input: &TailBitmap{
173 | 				Offset:   64 * 2,
174 | 				Words:    []uint64{1},
175 | 				Reclamed: 0,
176 | 			},
177 | 			set: 5,
178 | 			want: &TailBitmap{
179 | 				Offset:   64 * 2,
180 | 				Words:    []uint64{1},
181 | 				Reclamed: 0,
182 | 			},
183 | 		},
184 | 		{
185 | 			input: &TailBitmap{
186 | 				Offset:   64 * 2,
187 | 				Words:    []uint64{1},
188 | 				Reclamed: 0,
189 | 			},
190 | 			set: 64*2 + 1,
191 | 			want: &TailBitmap{
192 | 				Offset:   64 * 2,
193 | 				Words:    []uint64{3},
194 | 				Reclamed: 0,
195 | 			},
196 | 		},
197 | 		{
198 | 			input: &TailBitmap{
199 | 				Offset:   64 * 2,
200 | 				Words:    []uint64{1},
201 | 				Reclamed: 0,
202 | 			},
203 | 			set: 64*3 + 2,
204 | 			want: &TailBitmap{
205 | 				Offset:   64 * 2,
206 | 				Words:    []uint64{1, 4},
207 | 				Reclamed: 0,
208 | 			},
209 | 		},
210 | 		{
211 | 			input: &TailBitmap{
212 | 				Offset:   64,
213 | 				Words:    []uint64{0xffffffffffffff7f, 1},
214 | 				Reclamed: 0,
215 | 			},
216 | 			set: 64 + 7,
217 | 			want: &TailBitmap{
218 | 				Offset:   64 * 2,
219 | 				Words:    []uint64{1},
220 | 				Reclamed: 0,
221 | 			},
222 | 		},
223 | 		{
224 | 			input: &TailBitmap{
225 | 				Offset:   64 * 1023,
226 | 				Words:    []uint64{0xffffffffffffff7f, 1},
227 | 				Reclamed: 0,
228 | 			},
229 | 			set: 64*1023 + 7,
230 | 			want: &TailBitmap{
231 | 				Offset:   64 * 1024,
232 | 				Words:    []uint64{1},
233 | 				Reclamed: 64 * 1024,
234 | 			},
235 | 		},
236 | 	}
237 | 
238 | 	for i, c := range cases {
239 | 		c.input.Set(c.set)
240 | 		ta.Equal(c.want, c.input, "%d-th: case: %+v", i+1, c)
241 | 	}
242 | }
243 | 
244 | func TestTailBitmap_Get(t *testing.T) {
245 | 
246 | 	ta := require.New(t)
247 | 
248 | 	allOnes1024 := make([]uint64, 1024)
249 | 	for i, _ := range allOnes1024 {
250 | 		allOnes1024[i] = 0xffffffffffffffff
251 | 	}
252 | 
253 | 	cases := []struct {
254 | 		input *TailBitmap
255 | 		get   int64
256 | 		want  uint64
257 | 	}{
258 | 		{
259 | 			input: &TailBitmap{
260 | 				Offset:   64,
261 | 				Words:    []uint64{},
262 | 				Reclamed: 0,
263 | 			},
264 | 			get:  0,
265 | 			want: 1,
266 | 		},
267 | 		{
268 | 			input: &TailBitmap{
269 | 				Offset:   64,
270 | 				Words:    []uint64{},
271 | 				Reclamed: 0,
272 | 			},
273 | 			get:  1,
274 | 			want: 2,
275 | 		},
276 | 		{
277 | 			input: &TailBitmap{
278 | 				Offset:   64,
279 | 				Words:    []uint64{},
280 | 				Reclamed: 0,
281 | 			},
282 | 			get:  63,
283 | 			want: 1 << 63,
284 | 		},
285 | 
286 | 		{
287 | 			input: &TailBitmap{
288 | 				Offset:   64,
289 | 				Words:    []uint64{0xffffffffffffff7f, 1},
290 | 				Reclamed: 0,
291 | 			},
292 | 			get:  64 + 7,
293 | 			want: 0,
294 | 		},
295 | 		{
296 | 			input: &TailBitmap{
297 | 				Offset:   64,
298 | 				Words:    []uint64{0xffffffffffffff7f, 1},
299 | 				Reclamed: 0,
300 | 			},
301 | 			get:  64 + 6,
302 | 			want: 1 << 6,
303 | 		},
304 | 		{
305 | 			input: &TailBitmap{
306 | 				Offset:   64,
307 | 				Words:    []uint64{0xffffffffffffff7f, 1},
308 | 				Reclamed: 0,
309 | 			},
310 | 			get:  64 + 8,
311 | 			want: 1 << 8,
312 | 		},
313 | 		{
314 | 			input: &TailBitmap{
315 | 				Offset:   64,
316 | 				Words:    []uint64{0xffffffffffffff7f, 1},
317 | 				Reclamed: 0,
318 | 			},
319 | 			get:  64*2 + 0,
320 | 			want: 1,
321 | 		},
322 | 	}
323 | 
324 | 	for i, c := range cases {
325 | 		got := c.input.Get(c.get)
326 | 		ta.Equal(c.want, got, "%d-th: Get case: %+v", i+1, c)
327 | 
328 | 		got1 := c.input.Get1(c.get)
329 | 		if c.want != 0 {
330 | 			ta.Equal(uint64(1), got1, "%d-th: Get1 case: %+v", i+1, c)
331 | 		} else {
332 | 			ta.Equal(uint64(0), got1, "%d-th: Get1 case: %+v", i+1, c)
333 | 		}
334 | 	}
335 | }
336 | 
337 | func TestTailBitmap_Clone(t *testing.T) {
338 | 
339 | 	ta := require.New(t)
340 | 
341 | 	allOnes1024 := make([]uint64, 1024)
342 | 	for i, _ := range allOnes1024 {
343 | 		allOnes1024[i] = 0xffffffffffffffff
344 | 	}
345 | 
346 | 	cases := []struct {
347 | 		input *TailBitmap
348 | 	}{
349 | 		{
350 | 			input: &TailBitmap{
351 | 				Offset:   64,
352 | 				Words:    []uint64{1, 2, 3},
353 | 				Reclamed: 0,
354 | 			},
355 | 		},
356 | 	}
357 | 
358 | 	for i, c := range cases {
359 | 		got := c.input.Clone()
360 | 		ta.Equal(c.input, got, "%d-th: same as cloned case: %+v", i+1, c)
361 | 
362 | 		prev := c.input.Words[0]
363 | 		ta.NotEqual(1000, prev, "%d-th: not 1000 case: %+v", i+1, c)
364 | 		c.input.Words[0] = 1000
365 | 		ta.Equal(prev, got.Words[0], "%d-th: cloned does not change the original case: %+v", i+1, c)
366 | 	}
367 | }
368 | 
369 | func TestTailBitmap_Union(t *testing.T) {
370 | 
371 | 	ta := require.New(t)
372 | 
373 | 	ff := uint64(0xffffffffffffffff)
374 | 
375 | 	cases := []struct {
376 | 		input *TailBitmap
377 | 		other *TailBitmap
378 | 		want  *TailBitmap
379 | 	}{
380 | 		// 1111 xxxx
381 | 		// nil
382 | 		{
383 | 			input: &TailBitmap{Offset: 64, Words: []uint64{1}, Reclamed: 0},
384 | 			other: nil,
385 | 			want:  &TailBitmap{Offset: 64, Words: []uint64{1}, Reclamed: 0},
386 | 		},
387 | 
388 | 		// 1111 xxxx
389 | 		// 1111 yyyy
390 | 		{
391 | 			input: &TailBitmap{Offset: 64, Words: []uint64{1}, Reclamed: 0},
392 | 			other: &TailBitmap{Offset: 64, Words: []uint64{2}, Reclamed: 0},
393 | 			want:  &TailBitmap{Offset: 64, Words: []uint64{3}, Reclamed: 0},
394 | 		},
395 | 		// 1111 1111 xxxx
396 | 		// 1111 yyyy
397 | 		{
398 | 			input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1}, Reclamed: 0},
399 | 			other: &TailBitmap{Offset: 64 * 1, Words: []uint64{2}, Reclamed: 0},
400 | 			want:  &TailBitmap{Offset: 64 * 2, Words: []uint64{1}, Reclamed: 0},
401 | 		},
402 | 		// 1111 1111 xxxx
403 | 		// 1111 1111 1111 yyyy
404 | 		{
405 | 			input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1}, Reclamed: 0},
406 | 			other: &TailBitmap{Offset: 64 * 3, Words: []uint64{2}, Reclamed: 0},
407 | 			want:  &TailBitmap{Offset: 64 * 3, Words: []uint64{2}, Reclamed: 64 * 3},
408 | 		},
409 | 		// 1111 1111 xxxx xxxx xxxx
410 | 		// 1111 1111 1111 yyyy
411 | 		{
412 | 			input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1, 1, 7}, Reclamed: 0},
413 | 			other: &TailBitmap{Offset: 64 * 3, Words: []uint64{2}, Reclamed: 0},
414 | 			want:  &TailBitmap{Offset: 64 * 3, Words: []uint64{3, 7}, Reclamed: 0},
415 | 		},
416 | 		// 1111 1111 xxxx
417 | 		// 1111 yyyy yyyy yyyy
418 | 		{
419 | 			input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1}, Reclamed: 0},
420 | 			other: &TailBitmap{Offset: 64 * 1, Words: []uint64{8, 2, 4}, Reclamed: 0},
421 | 			want:  &TailBitmap{Offset: 64 * 2, Words: []uint64{3, 4}, Reclamed: 0},
422 | 		},
423 | 		// 1111 1111 xxxx xxxx xxxx
424 | 		// 1111 yyyy yyyy yyyy
425 | 		{
426 | 			input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1, 3, 7}, Reclamed: 0},
427 | 			other: &TailBitmap{Offset: 64 * 1, Words: []uint64{8, 2, 4}, Reclamed: 0},
428 | 			want:  &TailBitmap{Offset: 64 * 2, Words: []uint64{3, 7, 7}, Reclamed: 0},
429 | 		},
430 | 
431 | 		// trigger reclaim if new all-ones are found.
432 | 		{
433 | 			input: &TailBitmap{Offset: 64 * 1023, Words: []uint64{1, 3, 7}, Reclamed: 0},
434 | 			other: &TailBitmap{Offset: 64 * 1023, Words: []uint64{ff - 1}, Reclamed: 0},
435 | 			want:  &TailBitmap{Offset: 64 * 1024, Words: []uint64{3, 7}, Reclamed: 64 * 1024},
436 | 		},
437 | 	}
438 | 
439 | 	for i, c := range cases {
440 | 		c.input.Union(c.other)
441 | 		ta.Equal(c.want, c.input, "%d-th: Get case: %+v", i+1, c)
442 | 
443 | 	}
444 | }
445 | 
446 | func TestTailBitmap_Intersection(t *testing.T) {
447 | 
448 | 	ta := require.New(t)
449 | 
450 | 	cases := []struct {
451 | 		input *TailBitmap
452 | 		other *TailBitmap
453 | 		want  *TailBitmap
454 | 	}{
455 | 		// 1111 xxxx
456 | 		// nil
457 | 		{
458 | 			input: &TailBitmap{Offset: 64 * 1, Words: []uint64{1}, Reclamed: 0},
459 | 			other: nil,
460 | 			want:  &TailBitmap{Offset: 64 * 0, Words: []uint64{}, Reclamed: 0},
461 | 		},
462 | 
463 | 		// 1111 xxxx
464 | 		// 1111 1111 1111 yyyy
465 | 		{
466 | 			input: &TailBitmap{Offset: 64 * 1, Words: []uint64{1}, Reclamed: 0},
467 | 			other: &TailBitmap{Offset: 64 * 3, Words: []uint64{2}, Reclamed: 0},
468 | 			want:  &TailBitmap{Offset: 64 * 1, Words: []uint64{1}, Reclamed: 0},
469 | 		},
470 | 
471 | 		// 1111 1111 1111 xxxx xxxx
472 | 		// 1111 yyyy
473 | 		{
474 | 			input: &TailBitmap{Offset: 64 * 3, Words: []uint64{1, 3}, Reclamed: 0},
475 | 			other: &TailBitmap{Offset: 64 * 1, Words: []uint64{2}, Reclamed: 0},
476 | 			want:  &TailBitmap{Offset: 64 * 1, Words: []uint64{2}, Reclamed: 64 * 1},
477 | 		},
478 | 
479 | 		// 1111 1111 xxxx xxxx
480 | 		// 1111 1111 1111 yyyy yyyy
481 | 		{
482 | 			input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1, 3}, Reclamed: 0},
483 | 			other: &TailBitmap{Offset: 64 * 3, Words: []uint64{2, 4}, Reclamed: 0},
484 | 			want:  &TailBitmap{Offset: 64 * 2, Words: []uint64{1, 2 & 3}, Reclamed: 64 * 2},
485 | 		},
486 | 
487 | 		// 1111 1111 xxxx xxxx xxxx xxxx
488 | 		// 1111 1111 1111 yyyy yyyy
489 | 		{
490 | 			input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1, 3, 7, 7}, Reclamed: 0},
491 | 			other: &TailBitmap{Offset: 64 * 3, Words: []uint64{2, 4}, Reclamed: 0},
492 | 			want:  &TailBitmap{Offset: 64 * 2, Words: []uint64{1, 2 & 3, 7 & 4}, Reclamed: 64 * 2},
493 | 		},
494 | 
495 | 		// 1111 1111 xxxx xxxx
496 | 		// 1111 yyyy yyyy
497 | 		{
498 | 			input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1, 3}, Reclamed: 0},
499 | 			other: &TailBitmap{Offset: 64 * 1, Words: []uint64{2, 4}, Reclamed: 0},
500 | 			want:  &TailBitmap{Offset: 64 * 1, Words: []uint64{2}, Reclamed: 64 * 1},
501 | 		},
502 | 
503 | 		// 1111 1111 xxxx xxxx
504 | 		// 1111 yyyy yyyy yyyy yyyy
505 | 		{
506 | 			input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1, 3}, Reclamed: 0},
507 | 			other: &TailBitmap{Offset: 64 * 1, Words: []uint64{2, 2, 3, 4}, Reclamed: 0},
508 | 			want:  &TailBitmap{Offset: 64 * 1, Words: []uint64{2, 1 & 2, 3 & 3}, Reclamed: 64 * 1},
509 | 		},
510 | 	}
511 | 
512 | 	for i, c := range cases {
513 | 		c.input.Intersection(c.other)
514 | 		ta.Equal(c.want, c.input, "%d-th: Get case: %+v", i+1, c)
515 | 	}
516 | }
517 | 
518 | func TestTailBitmap_Diff(t *testing.T) {
519 | 
520 | 	ta := require.New(t)
521 | 
522 | 	ff := uint64(0xffffffffffffffff)
523 | 
524 | 	cases := []struct {
525 | 		input *TailBitmap
526 | 		other *TailBitmap
527 | 		want  *TailBitmap
528 | 	}{
529 | 		// 1111 xxxx
530 | 		// nil
531 | 		{
532 | 			input: &TailBitmap{Offset: 64 * 1, Words: []uint64{1}, Reclamed: 0},
533 | 			other: nil,
534 | 			want:  &TailBitmap{Offset: 64 * 1, Words: []uint64{1}, Reclamed: 0},
535 | 		},
536 | 
537 | 		// 1111 xxxx
538 | 		// 1111 1111 1111 yyyy
539 | 		{
540 | 			input: &TailBitmap{Offset: 64 * 1, Words: []uint64{1}, Reclamed: 0},
541 | 			other: &TailBitmap{Offset: 64 * 3, Words: []uint64{2}, Reclamed: 0},
542 | 			want:  &TailBitmap{Offset: 64 * 1, Words: []uint64{ff - 1}, Reclamed: 0},
543 | 		},
544 | 
545 | 		// 1111 1111 1111 xxxx xxxx
546 | 		// 1111 yyyy
547 | 		{
548 | 			input: &TailBitmap{Offset: 64 * 3, Words: []uint64{1, 3}, Reclamed: 0},
549 | 			other: &TailBitmap{Offset: 64 * 1, Words: []uint64{2}, Reclamed: 0},
550 | 			want:  &TailBitmap{Offset: 64 * 1, Words: []uint64{ff - 2, ff, 1, 3}, Reclamed: 64 * 1},
551 | 		},
552 | 
553 | 		// 1111 1111 xxxx xxxx
554 | 		// 1111 1111 1111 yyyy yyyy
555 | 		{
556 | 			input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1, 3}, Reclamed: 0},
557 | 			other: &TailBitmap{Offset: 64 * 3, Words: []uint64{2, 4}, Reclamed: 0},
558 | 			want:  &TailBitmap{Offset: 64 * 2, Words: []uint64{0, 1}, Reclamed: 0},
559 | 		},
560 | 
561 | 		// 1111 1111 xxxx xxxx xxxx xxxx
562 | 		// 1111 1111 1111 yyyy yyyy
563 | 		{
564 | 			input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1, 3, 7, 7}, Reclamed: 0},
565 | 			other: &TailBitmap{Offset: 64 * 3, Words: []uint64{2, 4}, Reclamed: 0},
566 | 			want:  &TailBitmap{Offset: 64 * 2, Words: []uint64{0, 1, 3, 7}, Reclamed: 0},
567 | 		},
568 | 
569 | 		// 1111 1111 xxxx xxxx
570 | 		// 1111 yyyy yyyy
571 | 		{
572 | 			input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1, 3}, Reclamed: 0},
573 | 			other: &TailBitmap{Offset: 64 * 1, Words: []uint64{2, 4}, Reclamed: 0},
574 | 			want:  &TailBitmap{Offset: 64 * 1, Words: []uint64{ff - 2, 1, 3}, Reclamed: 64 * 1},
575 | 		},
576 | 
577 | 		// 1111 1111 xxxx xxxx
578 | 		// 1111 yyyy yyyy yyyy yyyy
579 | 		{
580 | 			input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1, 3}, Reclamed: 0},
581 | 			other: &TailBitmap{Offset: 64 * 1, Words: []uint64{2, 2, 3, 4}, Reclamed: 0},
582 | 			want:  &TailBitmap{Offset: 64 * 1, Words: []uint64{ff - 2, 1, 0}, Reclamed: 64 * 1},
583 | 		},
584 | 	}
585 | 
586 | 	for i, c := range cases {
587 | 		c.input.Diff(c.other)
588 | 		ta.Equal(c.want, c.input, "%d-th: Get case: %+v", i+1, c)
589 | 	}
590 | }
591 | 
592 | func TestTailBitmap_Len(t *testing.T) {
593 | 
594 | 	ta := require.New(t)
595 | 
596 | 	cases := []struct {
597 | 		input *TailBitmap
598 | 		want  int64
599 | 	}{
600 | 		{input: &TailBitmap{Offset: 0, Words: []uint64{}}, want: 0},
601 | 		{input: &TailBitmap{Offset: 0, Words: []uint64{1}}, want: 1},
602 | 		{input: &TailBitmap{Offset: 64 * 1, Words: []uint64{}}, want: 64 * 1},
603 | 		{input: &TailBitmap{Offset: 64 * 1, Words: []uint64{1}}, want: 64*1 + 1},
604 | 		{input: &TailBitmap{Offset: 64 * 2, Words: []uint64{2}}, want: 64*2 + 2},
605 | 		{input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1, 2}}, want: 64*3 + 2},
606 | 		{input: &TailBitmap{Offset: 64 * 2, Words: []uint64{1, 2, 0}}, want: 64*3 + 2},
607 | 	}
608 | 
609 | 	for i, c := range cases {
610 | 		got := c.input.Len()
611 | 		ta.Equal(c.want, got, "%d-th: Get case: %+v", i+1, c)
612 | 	}
613 | }
614 | 


--------------------------------------------------------------------------------
/traft.go:
--------------------------------------------------------------------------------
  1 | // Package traft is a raft variant with out-of-order commit/apply
  2 | // and a more generalized member change algo.
  3 | package traft
  4 | 
  5 | import (
  6 | 	"fmt"
  7 | 	"net"
  8 | 	"sort"
  9 | 	"strings"
 10 | 	"sync"
 11 | 	"sync/atomic"
 12 | 	"time"
 13 | 
 14 | 	grpc "google.golang.org/grpc"
 15 | 	"google.golang.org/grpc/reflection"
 16 | )
 17 | 
 18 | type TRaft struct {
 19 | 	running int64
 20 | 
 21 | 	// close it to notify all goroutines to shutdown.
 22 | 	shutdown chan struct{}
 23 | 
 24 | 	// Communication channel with Loop().
 25 | 	// Only Loop() modifies state of TRaft.
 26 | 	// Other goroutines send an queryBody through this channel and wait for an
 27 | 	// operation reply.
 28 | 	actionCh chan *queryBody
 29 | 
 30 | 	// for external component to receive traft state changes.
 31 | 	MsgCh chan string
 32 | 
 33 | 	grpcServer *grpc.Server
 34 | 
 35 | 	// wait group of all worker goroutines
 36 | 	workerWG sync.WaitGroup
 37 | 
 38 | 	Node
 39 | }
 40 | 
 41 | func init() {
 42 | 	initLogging()
 43 | }
 44 | 
 45 | func NewTRaft(id int64, idAddrs map[int64]string) *TRaft {
 46 | 	_, ok := idAddrs[id]
 47 | 	if !ok {
 48 | 		panic("my id is not in cluster")
 49 | 	}
 50 | 
 51 | 	members := make(map[int64]*ReplicaInfo, 0)
 52 | 
 53 | 	ids := []int64{}
 54 | 	for id, _ := range idAddrs {
 55 | 		ids = append(ids, id)
 56 | 	}
 57 | 
 58 | 	sort.Slice(ids, func(i, j int) bool {
 59 | 		return ids[i] < ids[j]
 60 | 	})
 61 | 
 62 | 	for p, id := range ids {
 63 | 		members[id] = &ReplicaInfo{
 64 | 			Id:       id,
 65 | 			Addr:     idAddrs[id],
 66 | 			Position: int64(p),
 67 | 		}
 68 | 	}
 69 | 
 70 | 	conf := &Cluster{
 71 | 		Members: members,
 72 | 	}
 73 | 	maxPos := conf.MaxPosition()
 74 | 	conf.Quorums = buildMajorityQuorums(1<<uint(maxPos+1) - 1)
 75 | 
 76 | 	progs := make(map[int64]*ReplicaStatus, 0)
 77 | 	for _, m := range members {
 78 | 		progs[m.Id] = emptyReplicaStatus(m.Id)
 79 | 	}
 80 | 
 81 | 	node := &Node{
 82 | 		Config: conf,
 83 | 		Logs:   make([]*LogRecord, 0),
 84 | 		Id:     id,
 85 | 		Status: progs,
 86 | 	}
 87 | 
 88 | 	// TODO buffer size
 89 | 	shutdown := make(chan struct{})
 90 | 	actionCh := make(chan *queryBody)
 91 | 
 92 | 	tr := &TRaft{
 93 | 		running:    1,
 94 | 		shutdown:   shutdown,
 95 | 		actionCh:   actionCh,
 96 | 		MsgCh:      make(chan string, 1024),
 97 | 		grpcServer: nil,
 98 | 		workerWG:   sync.WaitGroup{},
 99 | 		Node:       *node,
100 | 	}
101 | 
102 | 	{
103 | 		s := grpc.NewServer()
104 | 		RegisterTRaftServer(s, tr)
105 | 		reflection.Register(s)
106 | 
107 | 		tr.grpcServer = s
108 | 	}
109 | 
110 | 	return tr
111 | }
112 | 
113 | func (tr *TRaft) Start() {
114 | 	tr.StartServer()
115 | 	tr.StartMainLoop()
116 | 	tr.StartVoteLoop()
117 | }
118 | 
119 | func (tr *TRaft) StartServer() {
120 | 
121 | 	id := tr.Id
122 | 	addr := tr.Config.Members[id].Addr
123 | 
124 | 	lis, err := net.Listen("tcp", addr)
125 | 	if err != nil {
126 | 		lg.Fatalw("Fail to listen:", "addr", addr, "err", err)
127 | 	}
128 | 
129 | 	go tr.grpcServer.Serve(lis)
130 | 	lg.Infow("grpc started", "addr", addr)
131 | }
132 | 
133 | func (tr *TRaft) goit(f func()) {
134 | 	tr.workerWG.Add(1)
135 | 	go func() {
136 | 		defer tr.workerWG.Done()
137 | 		f()
138 | 	}()
139 | }
140 | 
141 | func (tr *TRaft) StartMainLoop() {
142 | 	tr.goit(tr.Loop)
143 | 	lg.Infow("Started Loop")
144 | }
145 | 
146 | func (tr *TRaft) StartVoteLoop() {
147 | 	tr.goit(tr.ElectLoop)
148 | 	lg.Infow("Start ElectLoop")
149 | }
150 | 
151 | func (tr *TRaft) Stop() {
152 | 	id := tr.Id
153 | 	addr := tr.Config.Members[id].Addr
154 | 	lg.Infow("Stopping grpc: ", "addr:", addr)
155 | 	// tr.grpcServer.Stop() does not wait.
156 | 	tr.grpcServer.GracefulStop()
157 | 
158 | 	if atomic.LoadInt64(&tr.running) == 0 {
159 | 		lg.Infow("TRaft already stopped")
160 | 		return
161 | 	}
162 | 
163 | 	lg.Infow("close shutdown")
164 | 	close(tr.shutdown)
165 | 	atomic.StoreInt64(&tr.running, 0)
166 | 
167 | 	tr.workerWG.Wait()
168 | 
169 | 	lg.Infow("TRaft stopped")
170 | }
171 | 
172 | // stoppable sleep, if tr.Stop() has been called, it returns at once
173 | func (tr *TRaft) sleep(t time.Duration) {
174 | 	select {
175 | 	case <-time.After(t):
176 | 	case <-tr.shutdown:
177 | 	}
178 | }
179 | 
180 | func (tr *TRaft) sendMsg(msg ...interface{}) {
181 | 
182 | 	mm := []string{fmt.Sprintf("Id=%d", tr.Id)}
183 | 	for _, m := range msg {
184 | 		mm = append(mm, toStr(m))
185 | 	}
186 | 
187 | 	vv := strings.Join(mm, " ")
188 | 	fmt.Println("===", vv)
189 | 
190 | 	select {
191 | 	case tr.MsgCh <- vv:
192 | 		// lg.Infow("succ-send-msg", "msg", vv)
193 | 	default:
194 | 		lg.Infow("fail-send-msg", "msg", vv)
195 | 	}
196 | }
197 | 


--------------------------------------------------------------------------------
/traft.proto:
--------------------------------------------------------------------------------
  1 | syntax = "proto3";
  2 | 
  3 | option go_package = ".;traft";
  4 | 
  5 | import "github.com/gogo/protobuf/gogoproto/gogo.proto";
  6 | 
  7 | // option (gogoproto.goproto_unrecognized_all) = false;
  8 | option (gogoproto.equal_all) = true;
  9 | option (gogoproto.goproto_enum_prefix_all) = false;
 10 | // option (gogoproto.goproto_stringer_all) = true;
 11 | // option (gogoproto.goproto_getters_all) = false;
 12 | 
 13 | //
 14 | // Nonexchangeable logs: r1 and r3. They both modify the same key `x`.
 15 | // r1 and r2, r2 and r3 are exchangeable.
 16 | //
 17 | //  r3: set x = 2  <-.
 18 | //  r2: set y = 1    |
 19 | //  r1: set x = 1  <-'
 20 | 
 21 | // Issue about the original raft
 22 | //
 23 | // 1. Only one leader can be established in a `term`.
 24 | // Such a design introduces more conflict if multiple replicas try to become a
 25 | // leader in a short time.
 26 | //
 27 | // Solution
 28 | //
 29 | // blabla TODO 
 30 | 
 31 | // Cmd defines the action a log record does
 32 | message Cmd {
 33 |     string Op = 10;
 34 |     string Key = 20;
 35 | 
 36 |     oneof Value {
 37 |         string VStr = 31;
 38 |         int64 VI64 = 32;
 39 | 
 40 |         // cluster config change: adding/removing members.
 41 |         Cluster VClusterConfig = 33;
 42 |     }
 43 | }
 44 | 
 45 | // TailBitmap is a bitmap that has all its leading bits set to `1`.
 46 | // Thus it is compressed with an Offset of all-ones position and a trailing
 47 | // bitmap.
 48 | // It is used to describe LogRecord dependency etc.
 49 | // 
 50 | // The data structure is as the following described:
 51 | //
 52 | //                      reclaimed
 53 | //                      |
 54 | //                      |     Offset
 55 | //                      |     |
 56 | //                      v     v
 57 | //                ..... X ... 01010...00111  00...
 58 | //   bitIndex:    0123...     ^              ^
 59 | //                            |              |
 60 | //                            Words[0]       Words[1]
 61 | //
 62 | message TailBitmap {
 63 |     int64 Offset = 1;
 64 |     repeated uint64 Words = 2;
 65 |     int64 Reclamed = 3;
 66 | }
 67 | 
 68 | // LogRecord is a log record
 69 | message LogRecord {
 70 |     // Which leader initially proposed this log.
 71 |     // Author may not be the same with Committer, if Author fails when trying to
 72 |     // commit a log record.
 73 |     //
 74 |     // TODO It seems this field is useless. Because we already have `Accepted`.
 75 |     // This is different from the original raft:
 76 |     // raft does not have a explicit concept `accepted`, which is essential in
 77 |     // paxos.
 78 |     // Instead, The `committed` in raft is defined as: leader forwards its
 79 |     // own term log to a quorum.
 80 |     LeaderId Author = 1;
 81 | 
 82 |     // Log sequence number.
 83 |     int64 Seq = 10;
 84 | 
 85 |     // Cmd describes what this log does.
 86 |     Cmd Cmd = 30;
 87 | 
 88 |     // The logs that must be executed before this one.
 89 |     // Normally it is the least lsn on a leader that is not purged yet.
 90 |     TailBitmap Depends = 32;
 91 | 
 92 |     // Overrides describes what previous logs this log record overrides.
 93 |     TailBitmap Overrides = 40;
 94 | }
 95 | 
 96 | /*
 97 |  * // SSValue is the value structure in snapshot.
 98 |  * message SSValue {
 99 |  * 
100 |  *     oneof Value {
101 |  *         string VStr = 31;
102 |  *         int64 VI64 = 32;
103 |  *     }
104 |  * 
105 |  *     // The last log seq number modifying this value.
106 |  *     int64 Lsn = 40;
107 |  * }
108 |  * 
109 |  * message Snapshot {
110 |  *     map<string, SnapShotValue> KVs = 1;
111 |  *     int64 WAL
112 |  * }
113 |  * 
114 |  * 
115 |  * message WALRecord {
116 |  *     
117 |  * 
118 |  * }
119 |  */
120 | 
121 | message LeaderId {
122 |     int64 Term = 1;
123 |     int64 Id = 2;
124 | }
125 | 
126 | 
127 | // The replica that has the latest log is allow to be a new leader.
128 | // I.e., log is forwarded from latest leader(max Committer), and has the highest
129 | // log seq number.
130 | // Then a leader is chosen from these candidates by their Current leader id.
131 | // The max LeaderId wins.
132 | 
133 | message Node {
134 |     // replica id of this replica.
135 |     int64 Id = 3;
136 | 
137 |     Cluster Config = 1;
138 | 
139 |     // From which log seq number we keeps here.
140 |     int64 LogOffset = 4;
141 |     repeated LogRecord Logs = 2;
142 | 
143 |     // local view of every replica, including this node too.
144 |     map<int64, ReplicaStatus> Status = 6;
145 | }
146 | 
147 | message LogStatus {
148 |     LeaderId Committer = 4;
149 |     TailBitmap Accepted = 1;
150 | }
151 | 
152 | message ReplicaStatus {
153 | 
154 |     // last seen term+id
155 |     // int64 Term = 3;
156 |     // int64 Id = 10;
157 |     // the last leader it voted for. or it is local term + local id.
158 |     // E.g., voted for itself.
159 |     //
160 |     // TODO cleanup comment:
161 |     // which replica it has voted for as a leader.
162 |     //
163 |     // Accepted is the same as VotedFor after receiving one log-replication
164 |     // message from the leader.
165 |     //
166 |     // Before receiving a message, VotedFor is the leader this replica knows of,
167 |     // Accepted is nil.
168 |     LeaderId VotedFor = 10;
169 | 
170 |     // at what time the voted value expires,
171 |     // in unix time in nanosecond: 10^-9 second
172 |     int64    VoteExpireAt = 11;
173 | 
174 |     // The Leader tried to commit all of the local logs.
175 |     // The Committer is the same as Author if a log entry is committed by its
176 |     // Author.
177 |     //
178 |     // If an Author fails and the log is finally committed by some other leader,
179 |     // Committer is a higher value than Author.
180 |     //
181 |     // It is similar to the vrnd/vballot concept in paxos.
182 |     // the Ballot number a value is accepted at.
183 |     LeaderId Committer = 4;
184 | 
185 |     // What logs has been accepted by this replica.
186 |     TailBitmap Accepted = 1;
187 |     TailBitmap Committed = 2;
188 |     TailBitmap Applied = 3;
189 | }
190 | 
191 | message ReplicaInfo {
192 |     int64 Id = 1;
193 |     string Addr = 2;
194 |     // Position indicates the index of this member in its cluster.
195 |     int64 Position = 3;
196 | }
197 | 
198 | message Cluster {
199 |     map<int64, ReplicaInfo> Members = 11;
200 |     repeated uint64 Quorums = 21;
201 | }
202 | 
203 | message ElectReq {
204 |     // who initiates the election
205 |     LeaderId Candidate = 1;
206 | 
207 | 
208 |     // candidate local log status:
209 | 
210 |     // Latest leader that forwarded log to the candidate
211 |     LeaderId Committer = 2;
212 | 
213 |     // what logs the candidate has.
214 |     TailBitmap Accepted = 3;
215 | }
216 | 
217 | message ElectReply {
218 |     bool OK = 10;
219 | 
220 |     // the replica id this reply comes from
221 |     int64 Id = 1;
222 | 
223 |     // voted for a candidate or the previous voted other leader.
224 |     LeaderId VotedFor = 2;
225 | 
226 |     // latest log committer.
227 |     LeaderId Committer = 4;
228 | 
229 |     // what logs I have.
230 |     TailBitmap Accepted = 21;
231 | 
232 |     TailBitmap Committed = 22;
233 | 
234 |     // The logs that voter has but leader candidate does not have.
235 |     // For the leader to rebuild all possibly committed logs from a quorum.
236 |     repeated LogRecord Logs = 30;
237 | }
238 | 
239 | message LogForwardReq {
240 |     LeaderId Committer = 1;
241 |     repeated LogRecord Logs = 2;
242 | 
243 |     // Committed indicates logs committed by leader.
244 |     // A follower should commit every log it has that is in Committed directly.
245 |     TailBitmap Committed = 3;
246 | }
247 | 
248 | message LogForwardReply {
249 |     bool OK = 10;
250 |     // A replica responding a VotedFor with the same value with
251 |     // ReplciateReq.Committer indicates the logs are accepted.
252 |     // Otherwise declined.
253 |     LeaderId VotedFor =1;
254 | 
255 |     // Also a replica should respond with what logs it already has and
256 |     // has committed.
257 | 
258 |     TailBitmap Accepted = 2;
259 |     TailBitmap Committed = 3;
260 | }
261 | 
262 | message ProposeReply {
263 |     bool OK = 2;
264 |     string Err = 3;
265 |     // I am not leader, please redirect to `OtherLeader` to write to TRaft.
266 |     LeaderId OtherLeader =1;
267 | }
268 | 
269 | service TRaft {
270 |     rpc Elect (ElectReq) returns (ElectReply) {}
271 |     rpc LogForward (LogForwardReq) returns (LogForwardReply) {}
272 |     rpc Propose (Cmd) returns (ProposeReply) {}
273 | }
274 | 


--------------------------------------------------------------------------------
/util.go:
--------------------------------------------------------------------------------
 1 | package traft
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"strings"
 6 | 	"time"
 7 | )
 8 | 
 9 | func cmpI64(a, b int64) int {
10 | 	if a > b {
11 | 		return 1
12 | 	}
13 | 	if a < b {
14 | 		return -1
15 | 	}
16 | 	return 0
17 | }
18 | 
19 | func uSecondI64() int64 {
20 | 	return int64(uSecond())
21 | }
22 | 
23 | func uSecond() time.Duration {
24 | 	now := time.Now()
25 | 	return time.Duration(now.Unix())*1000*1000*1000 + time.Duration(now.Nanosecond())
26 | }
27 | 
28 | func join(ss ...string) string {
29 | 	return strings.Join(ss, "")
30 | }
31 | 
32 | var basePort = int64(5500)
33 | 
34 | // newCluster starts a grpc server for every replica.
35 | func newCluster(ids []int64) []*TRaft {
36 | 
37 | 	cluster := make(map[int64]string)
38 | 
39 | 	trafts := make([]*TRaft, 0)
40 | 
41 | 	for _, id := range ids {
42 | 		addr := fmt.Sprintf(":%d", basePort+int64(id))
43 | 		cluster[id] = addr
44 | 	}
45 | 
46 | 	for _, id := range ids {
47 | 		srv := NewTRaft(id, cluster)
48 | 		trafts = append(trafts, srv)
49 | 	}
50 | 
51 | 	return trafts
52 | }
53 | 
54 | func startCluster(ts []*TRaft) {
55 | 
56 | 	for _, t := range ts {
57 | 		// in a test env, only start server
58 | 		// manually start loops
59 | 		t.StartServer()
60 | 		t.StartMainLoop()
61 | 	}
62 | }
63 | 
64 | 


--------------------------------------------------------------------------------
/util_test.go:
--------------------------------------------------------------------------------
 1 | package traft
 2 | 
 3 | import (
 4 | 	fmt "fmt"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/stretchr/testify/require"
 8 | )
 9 | 
10 | func Test_serveCluster(t *testing.T) {
11 | 
12 | 	ta := require.New(t)
13 | 
14 | 	ids := []int64{1, 2, 3}
15 | 
16 | 	ts := newCluster(ids)
17 | 	defer stopAll(ts)
18 | 
19 | 	for i, tr := range ts {
20 | 		// 110 101 011
21 | 		ta.Equal([]uint64{3, 5, 6}, tr.Config.Quorums)
22 | 
23 | 		fmt.Println("===", tr.Config.Members)
24 | 		fmt.Println("---", tr.Config.SortedReplicaInfos())
25 | 
26 | 		ta.Equal([]*ReplicaInfo{
27 | 			&ReplicaInfo{Id: 1, Addr: ":5501", Position: 0},
28 | 			&ReplicaInfo{Id: 2, Addr: ":5502", Position: 1},
29 | 			&ReplicaInfo{Id: 3, Addr: ":5503", Position: 2},
30 | 		}, tr.Config.SortedReplicaInfos())
31 | 
32 | 		ta.Equal(int64(0), tr.LogOffset)
33 | 		ta.Equal([]*LogRecord{}, tr.Logs)
34 | 		ta.Equal(ids[i], tr.Id)
35 | 		for _, id := range ids {
36 | 			st := tr.Status[id]
37 | 			ta.Equal(&ReplicaStatus{
38 | 				// voted for ones self at first.
39 | 				VotedFor:  &LeaderId{Term: 0, Id: id},
40 | 				Committer: nil,
41 | 				Accepted:  NewTailBitmap(0),
42 | 				Committed: NewTailBitmap(0),
43 | 				Applied:   NewTailBitmap(0),
44 | 			}, st)
45 | 		}
46 | 	}
47 | }
48 | 


--------------------------------------------------------------------------------
/votereply.go:
--------------------------------------------------------------------------------
 1 | package traft
 2 | 
 3 | // if the first log in v.Logs matches lsn, pop and return it.
 4 | // Otherwise return nil.
 5 | func (v *ElectReply) PopRecord(lsn int64) *LogRecord {
 6 | 	if len(v.Logs) == 0 {
 7 | 		return nil
 8 | 	}
 9 | 
10 | 	r := v.Logs[0]
11 | 	if r.Seq < lsn {
12 | 		panic("wtf")
13 | 	}
14 | 
15 | 	if r.Seq == lsn {
16 | 		v.Logs = v.Logs[1:]
17 | 		return r
18 | 	}
19 | 
20 | 	return nil
21 | 
22 | }
23 | 


--------------------------------------------------------------------------------
/votereply_test.go:
--------------------------------------------------------------------------------
 1 | package traft
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/stretchr/testify/require"
 7 | )
 8 | 
 9 | func TestVoteReply_Pop(t *testing.T) {
10 | 
11 | 	ta := require.New(t)
12 | 
13 | 	vr := &ElectReply{
14 | 		Logs: []*LogRecord{
15 | 			NewRecord(NewLeaderId(1, 2), 5, nil),
16 | 			NewRecord(NewLeaderId(1, 2), 7, nil),
17 | 		},
18 | 	}
19 | 
20 | 	var got *LogRecord
21 | 	ta.Nil(vr.PopRecord(4))
22 | 
23 | 	got = vr.PopRecord(5)
24 | 	ta.NotNil(got)
25 | 	ta.Equal(int64(5), got.Seq)
26 | 
27 | 	// pop again
28 | 	ta.Nil(vr.PopRecord(5))
29 | 
30 | 	ta.Nil(vr.PopRecord(6))
31 | 
32 | 	got = vr.PopRecord(7)
33 | 	ta.NotNil(got)
34 | 	ta.Equal(int64(7), got.Seq)
35 | 
36 | 	// pop from empty logs:
37 | 	ta.Nil(vr.PopRecord(5))
38 | }
39 | 


--------------------------------------------------------------------------------