├── .github
    ├── CODEOWNERS
    ├── dependabot.yml
    ├── stale.yml
    └── workflows
    │   ├── ci.yml
    │   └── two-step-pr-approval.yml
├── .gitignore
├── .gitmodules
├── .golangci-lint.yml
├── .travis.yml
├── CHANGELOG.md
├── LICENSE
├── Makefile
├── README.md
├── api.go
├── bench
    └── bench.go
├── bench_test.go
├── commands.go
├── commitment.go
├── commitment_test.go
├── config.go
├── configuration.go
├── configuration_test.go
├── discard_snapshot.go
├── discard_snapshot_test.go
├── docs
    ├── README.md
    └── apply.md
├── file_snapshot.go
├── file_snapshot_test.go
├── fsm.go
├── future.go
├── future_test.go
├── fuzzy
    ├── apply_src.go
    ├── cluster.go
    ├── fsm.go
    ├── fsm_batch.go
    ├── go.mod
    ├── go.sum
    ├── leadershiptransfer_test.go
    ├── membership_test.go
    ├── node.go
    ├── partition_test.go
    ├── readme.md
    ├── resolve.go
    ├── simple_test.go
    ├── slowvoter_test.go
    ├── transport.go
    └── verifier.go
├── go.mod
├── go.sum
├── inmem_snapshot.go
├── inmem_snapshot_test.go
├── inmem_store.go
├── inmem_transport.go
├── inmem_transport_test.go
├── integ_test.go
├── log.go
├── log_cache.go
├── log_cache_test.go
├── log_test.go
├── membership.md
├── net_transport.go
├── net_transport_test.go
├── observer.go
├── peersjson.go
├── peersjson_test.go
├── progress.go
├── raft-compat
    ├── go.mod
    ├── go.sum
    ├── prevote_test.go
    ├── rolling_upgrade_test.go
    ├── testcluster
    │   └── cluster.go
    └── utils
    │   └── test_utils.go
├── raft.go
├── raft_test.go
├── replication.go
├── saturation.go
├── saturation_test.go
├── snapshot.go
├── stable.go
├── state.go
├── tag.sh
├── tcp_transport.go
├── tcp_transport_test.go
├── testing.go
├── testing_batch.go
├── transport.go
├── transport_test.go
├── util.go
└── util_test.go


/.github/CODEOWNERS:
--------------------------------------------------------------------------------
 1 | # Each line is a file pattern followed by one or more owners.
 2 | # More on CODEOWNERS files: https://help.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners
 3 | 
 4 | # Default owner
 5 | * @hashicorp/team-ip-compliance @hashicorp/consul-core-reviewers @hashicorp/nomad-eng @hashicorp/raft-force
 6 | 
 7 | # Add override rules below. Each line is a file/folder pattern followed by one or more owners.
 8 | # Being an owner means those groups or individuals will be added as reviewers to PRs affecting
 9 | # those areas of the code.
10 | # Examples:
11 | # /docs/  @docs-team
12 | # *.js    @js-team
13 | # *.go    @go-team
14 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | 
3 | updates:
4 |   - package-ecosystem: "gomod"
5 |     directory: "/"
6 |     schedule:
7 |       interval: "weekly"
8 | 


--------------------------------------------------------------------------------
/.github/stale.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) HashiCorp, Inc.
 2 | # SPDX-License-Identifier: MPL-2.0
 3 | 
 4 | # Number of days of inactivity before an Issue becomes stale
 5 | daysUntilStale: 60
 6 | 
 7 | # Number of days of inactivity before an Issue with the stale label is closed.
 8 | # Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale.
 9 | daysUntilClose: 30
10 | 
11 | # Issues with these labels will never be considered stale. Set to `[]` to disable
12 | # We don't close any issue that is an enhancement or confirmed bug, but issues
13 | # waiting for reproduction cases and questions tend to get outdated.
14 | exemptLabels:
15 |   - "enhancement"
16 |   - "bug"
17 |   - "thinking"
18 |   - "docs"
19 | 
20 | # Label to use when marking as stale
21 | staleLabel: "waiting-reply"
22 | 
23 | # Comment to post when marking as stale. Set to `false` to disable
24 | markComment: |
25 |   Hey there,
26 |   We wanted to check in on this request since it has been inactive for at least 90 days.
27 |   Have you reviewed the latest [godocs](https://godoc.org/github.com/hashicorp/raft)? 
28 |   If you think this is still an important issue in the latest version of [the Raft library](https://github.com/hashicorp/raft/compare/) or 
29 |   [its documentation](https://github.com/hashicorp/raft/compare/) please feel let us know and we'll keep it open for investigation.
30 |   If there is still no activity on this request in 30 days, we will go ahead and close it.
31 |   Thank you!
32 | 
33 | # Comment to post when removing the stale label. Set to `false` to disable
34 | unmarkComment: false
35 | 
36 | # Comment to post when closing a stale Issue. Set to `false` to disable
37 | closeComment: >
38 |   Hey there,
39 |   This issue has been automatically closed because there hasn't been any activity for a while. 
40 |   If you are still experiencing problems, or still have questions, feel free to [open a new one](https://github.com/hashicorp/raft/issues/new) :+1
41 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: ci
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches: ["main"]
 6 |   push:
 7 |     branches: ["main"]
 8 |     tags: ["*"]
 9 | 
10 | permissions:
11 |   contents: read
12 | 
13 | jobs:
14 |   go-fmt-and-vet:
15 |     runs-on: ubuntu-22.04
16 |     steps:
17 |     - uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
18 |     - uses: actions/setup-go@6edd4406fa81c3da01a34fa6f6343087c207a568 # v3.5.0
19 |       with:
20 |         go-version: '1.20'
21 |         cache: true
22 |     - run: |
23 |         files=$(go fmt ./...)
24 |         if [ -n "$files" ]; then
25 |           echo "The following file(s) do not conform to go fmt:"
26 |           echo "$files"
27 |           exit 1
28 |         fi
29 |     - run: |
30 |         PACKAGE_NAMES=$(go list ./... | grep -v github.com/hashicorp/raft/fuzzy)
31 |         go vet $PACKAGE_NAMES
32 | 
33 |   go-test:
34 |     needs: go-fmt-and-vet
35 |     strategy:
36 |       matrix:
37 |         go: ['1.19', '1.20']
38 |         arch: ['x32', 'x64']
39 |     runs-on: ubuntu-22.04
40 |     env:
41 |       INTEG_TESTS: yes
42 |     steps:
43 |     - uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
44 |     - uses: actions/setup-go@6edd4406fa81c3da01a34fa6f6343087c207a568 # v3.5.0
45 |       with:
46 |         go-version: ${{ matrix.go }}
47 |         architecture: ${{ matrix.arch }}
48 |         cache: true
49 |     # x86 specific build.
50 |     - if: matrix.arch == 'x32'
51 |       run: |
52 |         sudo apt-get update
53 |         sudo apt-get install gcc-multilib
54 |         go test --tags batchtest ./...
55 |     # x86-64 specific build.
56 |     - if: matrix.arch == 'x64'
57 |       run: go test -race --tags batchtest ./...
58 |   go-test-compat:
59 |     needs: go-test
60 |     strategy:
61 |       matrix:
62 |         go: [ '1.20', '1.21', '1.22' ]
63 |         arch: [ 'x32', 'x64' ]
64 |     runs-on: ubuntu-22.04
65 |     steps:
66 |       - uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
67 |       - uses: actions/setup-go@6edd4406fa81c3da01a34fa6f6343087c207a568 # v3.5.0
68 |         with:
69 |           go-version: ${{ matrix.go }}
70 |           architecture: ${{ matrix.arch }}
71 |           cache: true
72 |           submodules: true
73 |       # x86 specific build.
74 |       - if: matrix.arch == 'x32'
75 |         run: |
76 |           sudo apt-get update
77 |           sudo apt-get install gcc-multilib
78 |           git submodule update --init --recursive
79 |           cd raft-compat
80 |           go mod tidy
81 |           go test -v -coverpkg=./... ./... -coverprofile="${{ github.workspace }}/coverage.out" 
82 |       # x86-64 specific build.
83 |       - if: matrix.arch == 'x64'
84 |         run: |
85 |           git submodule update --init --recursive
86 |           cd raft-compat
87 |           go mod tidy
88 |           go test -race -v -coverpkg=./... ./... -coverprofile="${{ github.workspace }}/coverage.out"
89 |       - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
90 |         with: 
91 |           path: "${{ github.workspace }}/coverage.out"
92 |           name: coverage-report-${{matrix.go}}-${{matrix.arch}}
93 | 


--------------------------------------------------------------------------------
/.github/workflows/two-step-pr-approval.yml:
--------------------------------------------------------------------------------
 1 | name: Two-Stage PR Review Process
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     types: [opened, synchronize, reopened, labeled, unlabeled, ready_for_review, converted_to_draft]
 6 |   pull_request_review:
 7 |     types: [submitted]
 8 | 
 9 | jobs:
10 |   manage-pr-status:
11 |     runs-on: ubuntu-latest
12 |     permissions:
13 |       pull-requests: write
14 |       contents: write
15 |     steps:
16 |       - name: Checkout code
17 |         uses: actions/checkout@v4
18 |       
19 |       - name: Two stage PR review
20 |         uses: hashicorp/two-stage-pr-approval@v0.1.0
21 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects)
 2 | *.o
 3 | *.a
 4 | *.so
 5 | 
 6 | # Folders
 7 | _obj
 8 | _test
 9 | 
10 | # Architecture specific extensions/prefixes
11 | *.[568vq]
12 | [568vq].out
13 | 
14 | *.cgo1.go
15 | *.cgo2.c
16 | _cgo_defun.c
17 | _cgo_gotypes.go
18 | _cgo_export.*
19 | 
20 | _testmain.go
21 | 
22 | *.exe
23 | *.test
24 | 
25 | # Goland IDE
26 | .idea
27 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "raft-compat/raft-latest"]
2 | 	path = raft-compat/raft-previous-version
3 | 	url = https://github.com/hashicorp/raft.git
4 | 


--------------------------------------------------------------------------------
/.golangci-lint.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) HashiCorp, Inc.
 2 | # SPDX-License-Identifier: MPL-2.0
 3 | 
 4 | run:
 5 |   deadline: 5m
 6 | 
 7 | linters-settings:
 8 |   govet:
 9 |     check-shadowing: true
10 |   golint:
11 |     min-confidence: 0
12 |   depguard:
13 |     rules:
14 |       main:
15 |         list-mode: lax
16 |         allow:
17 |           - "github.com/hashicorp/go-metrics/compat"
18 |         deny:
19 |           - pkg: "github.com/hashicorp/go-metrics"
20 |             desc: not allowed, use github.com/hashicorp/go-metrics/compat instead
21 |           - pkg: "github.com/armon/go-metrics"
22 |             desc: not allowed, use github.com/hashicorp/go-metrics/compat instead
23 | 
24 | linters:
25 |   disable-all: true
26 |   enable:
27 |     - gofmt
28 |     #- golint
29 |     - govet
30 |     - depguard
31 |     #- varcheck
32 |     #- typecheck
33 |     #- gosimple
34 | 
35 | issues:
36 |   exclude-use-default: false
37 |   exclude:
38 |       # ignore the false positive erros resulting from not including a comment above every `package` keyword
39 |     - should have a package comment, unless it's in another file for this package (golint)
40 |       # golint: Annoying issue about not having a comment. The rare codebase has such comments
41 |       # - (comment on exported (method|function|type|const)|should have( a package)? comment|comment should be of the form)
42 |       # errcheck: Almost all programs ignore errors on these functions and in most cases it's ok
43 |     - Error return value of .((os\.)?std(out|err)\..*|.*Close|.*Flush|os\.Remove(All)?|.*printf?|os\.(Un)?Setenv). is not checked
44 | 
45 |       # golint: False positive when tests are defined in package 'test'
46 |     - func name will be used as test\.Test.* by other packages, and that stutters; consider calling this
47 | 
48 |       # staticcheck: Developers tend to write in C-style with an 
49 |       # explicit 'break' in a 'switch', so it's ok to ignore
50 |     - ineffective break statement. Did you mean to break out of the outer loop
51 |       # gosec: Too many false-positives on 'unsafe' usage
52 |     - Use of unsafe calls should be audited
53 | 
54 |     # gosec: Too many false-positives for parametrized shell calls
55 |     - Subprocess launch(ed with variable|ing should be audited)
56 | 
57 |     # gosec: Duplicated errcheck checks
58 |     - G104
59 | 
60 |     # gosec: Too many issues in popular repos
61 |     - (Expect directory permissions to be 0750 or less|Expect file permissions to be 0600 or less)
62 | 
63 |     # gosec: False positive is triggered by 'src, err := ioutil.ReadFile(filename)'
64 |     - Potential file inclusion via variable
65 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) HashiCorp, Inc.
 2 | # SPDX-License-Identifier: MPL-2.0
 3 | 
 4 | language: go
 5 | 
 6 | go:
 7 |     # Disabled until https://github.com/armon/go-metrics/issues/59 is fixed
 8 |     # - 1.6
 9 |     - 1.8
10 |     - 1.9
11 |     - 1.12
12 |     - tip
13 | 
14 | install: 
15 |     - make deps 
16 |     - curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | sh -s -- -b $(go env GOPATH)/bin latest 
17 | 
18 | script:
19 |     - make integ
20 | 
21 | notifications:
22 |     flowdock:
23 |         secure: fZrcf9rlh2IrQrlch1sHkn3YI7SKvjGnAl/zyV5D6NROe1Bbr6d3QRMuCXWWdhJHzjKmXk5rIzbqJhUc0PNF7YjxGNKSzqWMQ56KcvN1k8DzlqxpqkcA3Jbs6fXCWo2fssRtZ7hj/wOP1f5n6cc7kzHDt9dgaYJ6nO2fqNPJiTc=
24 | 
25 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | DEPS = $(go list -f '{{range .TestImports}}{{.}} {{end}}' ./...)
 2 | ENV  = $(shell go env GOPATH)
 3 | GO_VERSION  = $(shell go version)
 4 | GOLANG_CI_VERSION = v1.19.0
 5 | 
 6 | # Look for versions prior to 1.10 which have a different fmt output
 7 | # and don't lint with gofmt against them.
 8 | ifneq (,$(findstring go version go1.8, $(GO_VERSION)))
 9 | 	FMT=
10 | else ifneq (,$(findstring go version go1.9, $(GO_VERSION)))
11 | 	FMT=
12 | else
13 |     FMT=--enable gofmt
14 | endif
15 | 
16 | TEST_RESULTS_DIR?=/tmp/test-results
17 | 
18 | test:
19 | 	GOTRACEBACK=all go test $(TESTARGS) -timeout=180s -race .
20 | 	GOTRACEBACK=all go test $(TESTARGS) -timeout=180s -tags batchtest -race .
21 | 
22 | integ: test
23 | 	INTEG_TESTS=yes go test $(TESTARGS) -timeout=60s -run=Integ .
24 | 	INTEG_TESTS=yes go test $(TESTARGS) -timeout=60s -tags batchtest -run=Integ .
25 | 
26 | fuzz:
27 | 	cd ./fuzzy && go test $(TESTARGS) -timeout=20m .
28 | 	cd ./fuzzy && go test $(TESTARGS) -timeout=20m -tags batchtest .
29 | 
30 | deps:
31 | 	go get -t -d -v ./...
32 | 	echo $(DEPS) | xargs -n1 go get -d
33 | 
34 | lint:
35 | 	gofmt -s -w .
36 | 	golangci-lint run -c .golangci-lint.yml $(FMT) .
37 | 
38 | dep-linter:
39 | 	curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | sh -s -- -b $(ENV)/bin $(GOLANG_CI_VERSION)
40 | 
41 | cov:
42 | 	INTEG_TESTS=yes gocov test github.com/hashicorp/raft | gocov-html > /tmp/coverage.html
43 | 	open /tmp/coverage.html
44 | 
45 | .PHONY: test cov integ deps dep-linter lint
46 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | raft [![Build Status](https://github.com/hashicorp/raft/workflows/ci/badge.svg)](https://github.com/hashicorp/raft/actions)
  2 | [![Go Reference](https://pkg.go.dev/badge/github.com/hashicorp/raft.svg)](https://pkg.go.dev/github.com/hashicorp/raft)
  3 | [![Go Report Card](https://goreportcard.com/badge/github.com/hashicorp/raft)](https://goreportcard.com/report/github.com/hashicorp/raft)
  4 | [![License: MPL 2.0](https://img.shields.io/badge/License-MPL%202.0-brightgreen.svg)](https://opensource.org/licenses/MPL-2.0)
  5 | [![Build Status](https://github.com/hashicorp/raft/actions/workflows/ci.yml/badge.svg)](https://github.com/hashicorp/raft/actions)
  6 | [![Release](https://img.shields.io/github/v/release/hashicorp/raft)](https://github.com/hashicorp/raft/releases)
  7 | [![Issues](https://img.shields.io/github/issues/hashicorp/raft)](https://github.com/hashicorp/raft/issues)
  8 | [![Pull Requests](https://img.shields.io/github/issues-pr/hashicorp/raft)](https://github.com/hashicorp/raft/pulls)
  9 | ====
 10 | 
 11 | raft is a [Go](http://www.golang.org) library that manages a replicated
 12 | log and can be used with an FSM to manage replicated state machines. It
 13 | is a library for providing [consensus](http://en.wikipedia.org/wiki/Consensus_(computer_science)).
 14 | 
 15 | The use cases for such a library are far-reaching, such as replicated state
 16 | machines which are a key component of many distributed systems. They enable
 17 | building Consistent, Partition Tolerant (CP) systems, with limited
 18 | fault tolerance as well.
 19 | 
 20 | ## Building
 21 | 
 22 | If you wish to build raft you'll need Go version 1.16+ installed.
 23 | 
 24 | Please check your installation with:
 25 | 
 26 | ```
 27 | go version
 28 | ```
 29 | 
 30 | ## Documentation
 31 | 
 32 | For complete documentation, see the associated [Godoc](http://godoc.org/github.com/hashicorp/raft).
 33 | 
 34 | To prevent complications with cgo, the primary backend `MDBStore` is in a separate repository,
 35 | called [raft-mdb](http://github.com/hashicorp/raft-mdb). That is the recommended implementation
 36 | for the `LogStore` and `StableStore`.
 37 | 
 38 | A pure Go backend using [Bbolt](https://github.com/etcd-io/bbolt) is also available called
 39 | [raft-boltdb](https://github.com/hashicorp/raft-boltdb). It can also be used as a `LogStore`
 40 | and `StableStore`.
 41 | 
 42 | 
 43 | ## Community Contributed Examples 
 44 | - [Raft gRPC Example](https://github.com/Jille/raft-grpc-example) - Utilizing the Raft repository with gRPC
 45 | - [Raft-based KV-store Example](https://github.com/otoolep/hraftd) - Uses Hashicorp Raft to build a distributed key-value store
 46 | 
 47 | 
 48 | ## Tagged Releases
 49 | 
 50 | As of September 2017, HashiCorp will start using tags for this library to clearly indicate
 51 | major version updates. We recommend you vendor your application's dependency on this library.
 52 | 
 53 | * v0.1.0 is the original stable version of the library that was in main and has been maintained
 54 | with no breaking API changes. This was in use by Consul prior to version 0.7.0.
 55 | 
 56 | * v1.0.0 takes the changes that were staged in the library-v2-stage-one branch. This version
 57 | manages server identities using a UUID, so introduces some breaking API changes. It also versions
 58 | the Raft protocol, and requires some special steps when interoperating with Raft servers running
 59 | older versions of the library (see the detailed comment in config.go about version compatibility).
 60 | You can reference https://github.com/hashicorp/consul/pull/2222 for an idea of what was required
 61 | to port Consul to these new interfaces.
 62 | 
 63 |     This version includes some new features as well, including non voting servers, a new address
 64 |     provider abstraction in the transport layer, and more resilient snapshots.
 65 | 
 66 | ## Protocol
 67 | 
 68 | raft is based on ["Raft: In Search of an Understandable Consensus Algorithm"](https://raft.github.io/raft.pdf)
 69 | 
 70 | A high level overview of the Raft protocol is described below, but for details please read the full
 71 | [Raft paper](https://raft.github.io/raft.pdf)
 72 | followed by the raft source. Any questions about the raft protocol should be sent to the
 73 | [raft-dev mailing list](https://groups.google.com/forum/#!forum/raft-dev).
 74 | 
 75 | ### Protocol Description
 76 | 
 77 | Raft nodes are always in one of three states: follower, candidate or leader. All
 78 | nodes initially start out as a follower. In this state, nodes can accept log entries
 79 | from a leader and cast votes. If no entries are received for some time, nodes
 80 | self-promote to the candidate state. In the candidate state nodes request votes from
 81 | their peers. If a candidate receives a quorum of votes, then it is promoted to a leader.
 82 | The leader must accept new log entries and replicate to all the other followers.
 83 | In addition, if stale reads are not acceptable, all queries must also be performed on
 84 | the leader.
 85 | 
 86 | Once a cluster has a leader, it is able to accept new log entries. A client can
 87 | request that a leader append a new log entry, which is an opaque binary blob to
 88 | Raft. The leader then writes the entry to durable storage and attempts to replicate
 89 | to a quorum of followers. Once the log entry is considered *committed*, it can be
 90 | *applied* to a finite state machine. The finite state machine is application specific,
 91 | and is implemented using an interface.
 92 | 
 93 | An obvious question relates to the unbounded nature of a replicated log. Raft provides
 94 | a mechanism by which the current state is snapshotted, and the log is compacted. Because
 95 | of the FSM abstraction, restoring the state of the FSM must result in the same state
 96 | as a replay of old logs. This allows Raft to capture the FSM state at a point in time,
 97 | and then remove all the logs that were used to reach that state. This is performed automatically
 98 | without user intervention, and prevents unbounded disk usage as well as minimizing
 99 | time spent replaying logs.
100 | 
101 | Lastly, there is the issue of updating the peer set when new servers are joining
102 | or existing servers are leaving. As long as a quorum of nodes is available, this
103 | is not an issue as Raft provides mechanisms to dynamically update the peer set.
104 | If a quorum of nodes is unavailable, then this becomes a very challenging issue.
105 | For example, suppose there are only 2 peers, A and B. The quorum size is also
106 | 2, meaning both nodes must agree to commit a log entry. If either A or B fails,
107 | it is now impossible to reach quorum. This means the cluster is unable to add,
108 | or remove a node, or commit any additional log entries. This results in *unavailability*.
109 | At this point, manual intervention would be required to remove either A or B,
110 | and to restart the remaining node in bootstrap mode.
111 | 
112 | A Raft cluster of 3 nodes can tolerate a single node failure, while a cluster
113 | of 5 can tolerate 2 node failures. The recommended configuration is to either
114 | run 3 or 5 raft servers. This maximizes availability without
115 | greatly sacrificing performance.
116 | 
117 | In terms of performance, Raft is comparable to Paxos. Assuming stable leadership,
118 | committing a log entry requires a single round trip to half of the cluster.
119 | Thus performance is bound by disk I/O and network latency.
120 | 
121 | 
122 |   ## Metrics Emission and Compatibility
123 | 
124 |   This library can emit metrics using either `github.com/armon/go-metrics` or `github.com/hashicorp/go-metrics`. Choosing between the libraries is controlled via build tags. 
125 | 
126 |   **Build Tags**
127 |   * `armonmetrics` - Using this tag will cause metrics to be routed to `armon/go-metrics`
128 |   * `hashicorpmetrics` - Using this tag will cause all metrics to be routed to `hashicorp/go-metrics`
129 | 
130 |   If no build tag is specified, the default behavior is to use `armon/go-metrics`. 
131 | 
132 |   **Deprecating `armon/go-metrics`**
133 | 
134 |   Emitting metrics to `armon/go-metrics` is officially deprecated. Usage of `armon/go-metrics` will remain the default until mid-2025 with opt-in support continuing to the end of 2025.
135 | 
136 |   **Migration**
137 |   To migrate an application currently using the older `armon/go-metrics` to instead use `hashicorp/go-metrics` the following should be done.
138 | 
139 |   1. Upgrade libraries using `armon/go-metrics` to consume `hashicorp/go-metrics/compat` instead. This should involve only changing import statements. All repositories in the `hashicorp` namespace
140 |   2. Update an applications library dependencies to those that have the compatibility layer configured.
141 |   3. Update the application to use `hashicorp/go-metrics` for configuring metrics export instead of `armon/go-metrics`
142 |      * Replace all application imports of `github.com/armon/go-metrics` with `github.com/hashicorp/go-metrics`
143 |      * Instrument your build system to build with the `hashicorpmetrics` tag.
144 | 
145 |   Eventually once the default behavior changes to use `hashicorp/go-metrics` by default (mid-2025), you can drop the `hashicorpmetrics` build tag.
146 | 


--------------------------------------------------------------------------------
/bench/bench.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package raftbench
  5 | 
  6 | // raftbench provides common benchmarking functions which can be used by
  7 | // anything which implements the raft.LogStore and raft.StableStore interfaces.
  8 | // All functions accept these interfaces and perform benchmarking. This
  9 | // makes comparing backend performance easier by sharing the tests.
 10 | 
 11 | import (
 12 | 	"testing"
 13 | 
 14 | 	"github.com/hashicorp/raft"
 15 | )
 16 | 
 17 | func FirstIndex(b *testing.B, store raft.LogStore) {
 18 | 	// Create some fake data
 19 | 	var logs []*raft.Log
 20 | 	for i := 1; i < 10; i++ {
 21 | 		logs = append(logs, &raft.Log{Index: uint64(i), Data: []byte("data")})
 22 | 	}
 23 | 	if err := store.StoreLogs(logs); err != nil {
 24 | 		b.Fatalf("err: %s", err)
 25 | 	}
 26 | 	b.ResetTimer()
 27 | 
 28 | 	// Run FirstIndex a number of times
 29 | 	for n := 0; n < b.N; n++ {
 30 | 		store.FirstIndex()
 31 | 	}
 32 | }
 33 | 
 34 | func LastIndex(b *testing.B, store raft.LogStore) {
 35 | 	// Create some fake data
 36 | 	var logs []*raft.Log
 37 | 	for i := 1; i < 10; i++ {
 38 | 		logs = append(logs, &raft.Log{Index: uint64(i), Data: []byte("data")})
 39 | 	}
 40 | 	if err := store.StoreLogs(logs); err != nil {
 41 | 		b.Fatalf("err: %s", err)
 42 | 	}
 43 | 	b.ResetTimer()
 44 | 
 45 | 	// Run LastIndex a number of times
 46 | 	for n := 0; n < b.N; n++ {
 47 | 		store.LastIndex()
 48 | 	}
 49 | }
 50 | 
 51 | func GetLog(b *testing.B, store raft.LogStore) {
 52 | 	// Create some fake data
 53 | 	var logs []*raft.Log
 54 | 	for i := 1; i < 10; i++ {
 55 | 		logs = append(logs, &raft.Log{Index: uint64(i), Data: []byte("data")})
 56 | 	}
 57 | 	if err := store.StoreLogs(logs); err != nil {
 58 | 		b.Fatalf("err: %s", err)
 59 | 	}
 60 | 	b.ResetTimer()
 61 | 
 62 | 	// Run GetLog a number of times
 63 | 	for n := 0; n < b.N; n++ {
 64 | 		if err := store.GetLog(5, new(raft.Log)); err != nil {
 65 | 			b.Fatalf("err: %s", err)
 66 | 		}
 67 | 	}
 68 | }
 69 | 
 70 | func StoreLog(b *testing.B, store raft.LogStore) {
 71 | 	// Run StoreLog a number of times
 72 | 	for n := 0; n < b.N; n++ {
 73 | 		log := &raft.Log{Index: uint64(n), Data: []byte("data")}
 74 | 		if err := store.StoreLog(log); err != nil {
 75 | 			b.Fatalf("err: %s", err)
 76 | 		}
 77 | 	}
 78 | }
 79 | 
 80 | func StoreLogs(b *testing.B, store raft.LogStore) {
 81 | 	// Run StoreLogs a number of times. We want to set multiple logs each
 82 | 	// run, so we create 3 logs with incrementing indexes for each iteration.
 83 | 	for n := 0; n < b.N; n++ {
 84 | 		b.StopTimer()
 85 | 		offset := 3 * (n + 1)
 86 | 		logs := []*raft.Log{
 87 | 			{Index: uint64(offset - 2), Data: []byte("data")},
 88 | 			{Index: uint64(offset - 1), Data: []byte("data")},
 89 | 			{Index: uint64(offset), Data: []byte("data")},
 90 | 		}
 91 | 		b.StartTimer()
 92 | 
 93 | 		if err := store.StoreLogs(logs); err != nil {
 94 | 			b.Fatalf("err: %s", err)
 95 | 		}
 96 | 	}
 97 | }
 98 | 
 99 | func DeleteRange(b *testing.B, store raft.LogStore) {
100 | 	// Create some fake data. In this case, we create 3 new log entries for each
101 | 	// test case, and separate them by index in multiples of 10. This allows
102 | 	// some room so that we can test deleting ranges with "extra" logs
103 | 	// to ensure we stop going to the database once our max index is hit.
104 | 	var logs []*raft.Log
105 | 	for n := 0; n < b.N; n++ {
106 | 		offset := 10 * n
107 | 		for i := offset; i < offset+3; i++ {
108 | 			logs = append(logs, &raft.Log{Index: uint64(i), Data: []byte("data")})
109 | 		}
110 | 	}
111 | 	if err := store.StoreLogs(logs); err != nil {
112 | 		b.Fatalf("err: %s", err)
113 | 	}
114 | 	b.ResetTimer()
115 | 
116 | 	// Delete a range of the data
117 | 	for n := 0; n < b.N; n++ {
118 | 		offset := 10 * n
119 | 		if err := store.DeleteRange(uint64(offset), uint64(offset+9)); err != nil {
120 | 			b.Fatalf("err: %s", err)
121 | 		}
122 | 	}
123 | }
124 | 
125 | func Set(b *testing.B, store raft.StableStore) {
126 | 	// Run Set a number of times
127 | 	for n := 0; n < b.N; n++ {
128 | 		if err := store.Set([]byte{byte(n)}, []byte("val")); err != nil {
129 | 			b.Fatalf("err: %s", err)
130 | 		}
131 | 	}
132 | }
133 | 
134 | func Get(b *testing.B, store raft.StableStore) {
135 | 	// Create some fake data
136 | 	for i := 1; i < 10; i++ {
137 | 		if err := store.Set([]byte{byte(i)}, []byte("val")); err != nil {
138 | 			b.Fatalf("err: %s", err)
139 | 		}
140 | 	}
141 | 	b.ResetTimer()
142 | 
143 | 	// Run Get a number of times
144 | 	for n := 0; n < b.N; n++ {
145 | 		if _, err := store.Get([]byte{0x05}); err != nil {
146 | 			b.Fatalf("err: %s", err)
147 | 		}
148 | 	}
149 | }
150 | 
151 | func SetUint64(b *testing.B, store raft.StableStore) {
152 | 	// Run SetUint64 a number of times
153 | 	for n := 0; n < b.N; n++ {
154 | 		if err := store.SetUint64([]byte{byte(n)}, uint64(n)); err != nil {
155 | 			b.Fatalf("err: %s", err)
156 | 		}
157 | 	}
158 | }
159 | 
160 | func GetUint64(b *testing.B, store raft.StableStore) {
161 | 	// Create some fake data
162 | 	for i := 0; i < 10; i++ {
163 | 		if err := store.SetUint64([]byte{byte(i)}, uint64(i)); err != nil {
164 | 			b.Fatalf("err: %s", err)
165 | 		}
166 | 	}
167 | 	b.ResetTimer()
168 | 
169 | 	// Run GetUint64 a number of times
170 | 	for n := 0; n < b.N; n++ {
171 | 		if _, err := store.GetUint64([]byte{0x05}); err != nil {
172 | 			b.Fatalf("err: %s", err)
173 | 		}
174 | 	}
175 | }
176 | 


--------------------------------------------------------------------------------
/bench_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright (c) HashiCorp, Inc.
 2 | // SPDX-License-Identifier: MPL-2.0
 3 | 
 4 | package raft
 5 | 
 6 | import (
 7 | 	"testing"
 8 | 	"time"
 9 | 
10 | 	"github.com/hashicorp/go-hclog"
11 | )
12 | 
13 | func BenchmarkStoreLogInMem(b *testing.B) {
14 | 	conf := DefaultConfig()
15 | 	conf.LocalID = "first"
16 | 	conf.HeartbeatTimeout = 50 * time.Millisecond
17 | 	conf.ElectionTimeout = 50 * time.Millisecond
18 | 	conf.LeaderLeaseTimeout = 50 * time.Millisecond
19 | 	conf.CommitTimeout = 5 * time.Millisecond
20 | 	conf.SnapshotThreshold = 100
21 | 	conf.TrailingLogs = 10
22 | 	conf.LogLevel = "OFF"
23 | 	raft := MakeRaft(b, conf, true)
24 | 	raft.logger.SetLevel(hclog.Off)
25 | 
26 | 	NoErr(WaitFor(raft, Leader), b)
27 | 
28 | 	applyAndWait := func(leader *RaftEnv, n, sz int) {
29 | 		// Do some commits
30 | 		var futures []ApplyFuture
31 | 		for i := 0; i < n; i++ {
32 | 			futures = append(futures, leader.raft.Apply(logBytes(i, sz), 0))
33 | 		}
34 | 		for _, f := range futures {
35 | 			NoErr(WaitFuture(f), b)
36 | 			leader.logger.Debug("applied", "index", f.Index(), "size", sz)
37 | 		}
38 | 	}
39 | 
40 | 	for i := 0; i < b.N; i++ {
41 | 		// Do some commits
42 | 		applyAndWait(raft, 100, 10)
43 | 		// Do a snapshot
44 | 		NoErr(WaitFuture(raft.raft.Snapshot()), b)
45 | 	}
46 | }
47 | 


--------------------------------------------------------------------------------
/commands.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package raft
  5 | 
  6 | // RPCHeader is a common sub-structure used to pass along protocol version and
  7 | // other information about the cluster. For older Raft implementations before
  8 | // versioning was added this will default to a zero-valued structure when read
  9 | // by newer Raft versions.
 10 | type RPCHeader struct {
 11 | 	// ProtocolVersion is the version of the protocol the sender is
 12 | 	// speaking.
 13 | 	ProtocolVersion ProtocolVersion
 14 | 	// ID is the ServerID of the node sending the RPC Request or Response
 15 | 	ID []byte
 16 | 	// Addr is the ServerAddr of the node sending the RPC Request or Response
 17 | 	Addr []byte
 18 | }
 19 | 
 20 | // WithRPCHeader is an interface that exposes the RPC header.
 21 | type WithRPCHeader interface {
 22 | 	GetRPCHeader() RPCHeader
 23 | }
 24 | 
 25 | // AppendEntriesRequest is the command used to append entries to the
 26 | // replicated log.
 27 | type AppendEntriesRequest struct {
 28 | 	RPCHeader
 29 | 
 30 | 	// Provide the current term and leader
 31 | 	Term uint64
 32 | 
 33 | 	// Deprecated: use RPCHeader.Addr instead
 34 | 	Leader []byte
 35 | 
 36 | 	// Provide the previous entries for integrity checking
 37 | 	PrevLogEntry uint64
 38 | 	PrevLogTerm  uint64
 39 | 
 40 | 	// New entries to commit
 41 | 	Entries []*Log
 42 | 
 43 | 	// Commit index on the leader
 44 | 	LeaderCommitIndex uint64
 45 | }
 46 | 
 47 | // GetRPCHeader - See WithRPCHeader.
 48 | func (r *AppendEntriesRequest) GetRPCHeader() RPCHeader {
 49 | 	return r.RPCHeader
 50 | }
 51 | 
 52 | // AppendEntriesResponse is the response returned from an
 53 | // AppendEntriesRequest.
 54 | type AppendEntriesResponse struct {
 55 | 	RPCHeader
 56 | 
 57 | 	// Newer term if leader is out of date
 58 | 	Term uint64
 59 | 
 60 | 	// Last Log is a hint to help accelerate rebuilding slow nodes
 61 | 	LastLog uint64
 62 | 
 63 | 	// We may not succeed if we have a conflicting entry
 64 | 	Success bool
 65 | 
 66 | 	// There are scenarios where this request didn't succeed
 67 | 	// but there's no need to wait/back-off the next attempt.
 68 | 	NoRetryBackoff bool
 69 | }
 70 | 
 71 | // GetRPCHeader - See WithRPCHeader.
 72 | func (r *AppendEntriesResponse) GetRPCHeader() RPCHeader {
 73 | 	return r.RPCHeader
 74 | }
 75 | 
 76 | // RequestVoteRequest is the command used by a candidate to ask a Raft peer
 77 | // for a vote in an election.
 78 | type RequestVoteRequest struct {
 79 | 	RPCHeader
 80 | 
 81 | 	// Provide the term and our id
 82 | 	Term uint64
 83 | 
 84 | 	// Deprecated: use RPCHeader.Addr instead
 85 | 	Candidate []byte
 86 | 
 87 | 	// Used to ensure safety
 88 | 	LastLogIndex uint64
 89 | 	LastLogTerm  uint64
 90 | 
 91 | 	// Used to indicate to peers if this vote was triggered by a leadership
 92 | 	// transfer. It is required for leadership transfer to work, because servers
 93 | 	// wouldn't vote otherwise if they are aware of an existing leader.
 94 | 	LeadershipTransfer bool
 95 | }
 96 | 
 97 | // GetRPCHeader - See WithRPCHeader.
 98 | func (r *RequestVoteRequest) GetRPCHeader() RPCHeader {
 99 | 	return r.RPCHeader
100 | }
101 | 
102 | // RequestVoteResponse is the response returned from a RequestVoteRequest.
103 | type RequestVoteResponse struct {
104 | 	RPCHeader
105 | 
106 | 	// Newer term if leader is out of date.
107 | 	Term uint64
108 | 
109 | 	// Peers is deprecated, but required by servers that only understand
110 | 	// protocol version 0. This is not populated in protocol version 2
111 | 	// and later.
112 | 	Peers []byte
113 | 
114 | 	// Is the vote granted.
115 | 	Granted bool
116 | }
117 | 
118 | // GetRPCHeader - See WithRPCHeader.
119 | func (r *RequestVoteResponse) GetRPCHeader() RPCHeader {
120 | 	return r.RPCHeader
121 | }
122 | 
123 | // RequestPreVoteRequest is the command used by a candidate to ask a Raft peer
124 | // for a vote in an election.
125 | type RequestPreVoteRequest struct {
126 | 	RPCHeader
127 | 
128 | 	// Provide the term and our id
129 | 	Term uint64
130 | 
131 | 	// Used to ensure safety
132 | 	LastLogIndex uint64
133 | 	LastLogTerm  uint64
134 | }
135 | 
136 | // GetRPCHeader - See WithRPCHeader.
137 | func (r *RequestPreVoteRequest) GetRPCHeader() RPCHeader {
138 | 	return r.RPCHeader
139 | }
140 | 
141 | // RequestPreVoteResponse is the response returned from a RequestPreVoteRequest.
142 | type RequestPreVoteResponse struct {
143 | 	RPCHeader
144 | 
145 | 	// Newer term if leader is out of date.
146 | 	Term uint64
147 | 
148 | 	// Is the vote granted.
149 | 	Granted bool
150 | }
151 | 
152 | // GetRPCHeader - See WithRPCHeader.
153 | func (r *RequestPreVoteResponse) GetRPCHeader() RPCHeader {
154 | 	return r.RPCHeader
155 | }
156 | 
157 | // InstallSnapshotRequest is the command sent to a Raft peer to bootstrap its
158 | // log (and state machine) from a snapshot on another peer.
159 | type InstallSnapshotRequest struct {
160 | 	RPCHeader
161 | 	SnapshotVersion SnapshotVersion
162 | 
163 | 	Term   uint64
164 | 	Leader []byte
165 | 
166 | 	// These are the last index/term included in the snapshot
167 | 	LastLogIndex uint64
168 | 	LastLogTerm  uint64
169 | 
170 | 	// Peer Set in the snapshot.
171 | 	// but remains here in case we receive an InstallSnapshot from a leader
172 | 	// that's running old code.
173 | 	// Deprecated: This is deprecated in favor of Configuration
174 | 	Peers []byte
175 | 
176 | 	// Cluster membership.
177 | 	Configuration []byte
178 | 	// Log index where 'Configuration' entry was originally written.
179 | 	ConfigurationIndex uint64
180 | 
181 | 	// Size of the snapshot
182 | 	Size int64
183 | }
184 | 
185 | // GetRPCHeader - See WithRPCHeader.
186 | func (r *InstallSnapshotRequest) GetRPCHeader() RPCHeader {
187 | 	return r.RPCHeader
188 | }
189 | 
190 | // InstallSnapshotResponse is the response returned from an
191 | // InstallSnapshotRequest.
192 | type InstallSnapshotResponse struct {
193 | 	RPCHeader
194 | 
195 | 	Term    uint64
196 | 	Success bool
197 | }
198 | 
199 | // GetRPCHeader - See WithRPCHeader.
200 | func (r *InstallSnapshotResponse) GetRPCHeader() RPCHeader {
201 | 	return r.RPCHeader
202 | }
203 | 
204 | // TimeoutNowRequest is the command used by a leader to signal another server to
205 | // start an election.
206 | type TimeoutNowRequest struct {
207 | 	RPCHeader
208 | }
209 | 
210 | // GetRPCHeader - See WithRPCHeader.
211 | func (r *TimeoutNowRequest) GetRPCHeader() RPCHeader {
212 | 	return r.RPCHeader
213 | }
214 | 
215 | // TimeoutNowResponse is the response to TimeoutNowRequest.
216 | type TimeoutNowResponse struct {
217 | 	RPCHeader
218 | }
219 | 
220 | // GetRPCHeader - See WithRPCHeader.
221 | func (r *TimeoutNowResponse) GetRPCHeader() RPCHeader {
222 | 	return r.RPCHeader
223 | }
224 | 


--------------------------------------------------------------------------------
/commitment.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package raft
  5 | 
  6 | import (
  7 | 	"sort"
  8 | 	"sync"
  9 | )
 10 | 
 11 | // Commitment is used to advance the leader's commit index. The leader and
 12 | // replication goroutines report in newly written entries with match(), and
 13 | // this notifies on commitCh when the commit index has advanced.
 14 | type commitment struct {
 15 | 	// protects matchIndexes and commitIndex
 16 | 	sync.Mutex
 17 | 	// notified when commitIndex increases
 18 | 	commitCh chan struct{}
 19 | 	// voter ID to log index: the server stores up through this log entry
 20 | 	matchIndexes map[ServerID]uint64
 21 | 	// a quorum stores up through this log entry. monotonically increases.
 22 | 	commitIndex uint64
 23 | 	// the first index of this leader's term: this needs to be replicated to a
 24 | 	// majority of the cluster before this leader may mark anything committed
 25 | 	// (per Raft's commitment rule)
 26 | 	startIndex uint64
 27 | }
 28 | 
 29 | // newCommitment returns a commitment struct that notifies the provided
 30 | // channel when log entries have been committed. A new commitment struct is
 31 | // created each time this server becomes leader for a particular term.
 32 | // 'configuration' is the servers in the cluster.
 33 | // 'startIndex' is the first index created in this term (see
 34 | // its description above).
 35 | func newCommitment(commitCh chan struct{}, configuration Configuration, startIndex uint64) *commitment {
 36 | 	matchIndexes := make(map[ServerID]uint64)
 37 | 	for _, server := range configuration.Servers {
 38 | 		if server.Suffrage == Voter {
 39 | 			matchIndexes[server.ID] = 0
 40 | 		}
 41 | 	}
 42 | 	return &commitment{
 43 | 		commitCh:     commitCh,
 44 | 		matchIndexes: matchIndexes,
 45 | 		commitIndex:  0,
 46 | 		startIndex:   startIndex,
 47 | 	}
 48 | }
 49 | 
 50 | // Called when a new cluster membership configuration is created: it will be
 51 | // used to determine commitment from now on. 'configuration' is the servers in
 52 | // the cluster.
 53 | func (c *commitment) setConfiguration(configuration Configuration) {
 54 | 	c.Lock()
 55 | 	defer c.Unlock()
 56 | 	oldMatchIndexes := c.matchIndexes
 57 | 	c.matchIndexes = make(map[ServerID]uint64)
 58 | 	for _, server := range configuration.Servers {
 59 | 		if server.Suffrage == Voter {
 60 | 			c.matchIndexes[server.ID] = oldMatchIndexes[server.ID] // defaults to 0
 61 | 		}
 62 | 	}
 63 | 	c.recalculate()
 64 | }
 65 | 
 66 | // Called by leader after commitCh is notified
 67 | func (c *commitment) getCommitIndex() uint64 {
 68 | 	c.Lock()
 69 | 	defer c.Unlock()
 70 | 	return c.commitIndex
 71 | }
 72 | 
 73 | // Match is called once a server completes writing entries to disk: either the
 74 | // leader has written the new entry or a follower has replied to an
 75 | // AppendEntries RPC. The given server's disk agrees with this server's log up
 76 | // through the given index.
 77 | func (c *commitment) match(server ServerID, matchIndex uint64) {
 78 | 	c.Lock()
 79 | 	defer c.Unlock()
 80 | 	if prev, hasVote := c.matchIndexes[server]; hasVote && matchIndex > prev {
 81 | 		c.matchIndexes[server] = matchIndex
 82 | 		c.recalculate()
 83 | 	}
 84 | }
 85 | 
 86 | // Internal helper to calculate new commitIndex from matchIndexes.
 87 | // Must be called with lock held.
 88 | func (c *commitment) recalculate() {
 89 | 	if len(c.matchIndexes) == 0 {
 90 | 		return
 91 | 	}
 92 | 
 93 | 	matched := make([]uint64, 0, len(c.matchIndexes))
 94 | 	for _, idx := range c.matchIndexes {
 95 | 		matched = append(matched, idx)
 96 | 	}
 97 | 	sort.Sort(uint64Slice(matched))
 98 | 	quorumMatchIndex := matched[(len(matched)-1)/2]
 99 | 
100 | 	if quorumMatchIndex > c.commitIndex && quorumMatchIndex >= c.startIndex {
101 | 		c.commitIndex = quorumMatchIndex
102 | 		asyncNotifyCh(c.commitCh)
103 | 	}
104 | }
105 | 


--------------------------------------------------------------------------------
/commitment_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package raft
  5 | 
  6 | import (
  7 | 	"testing"
  8 | )
  9 | 
 10 | func makeConfiguration(voters []string) Configuration {
 11 | 	var configuration Configuration
 12 | 	for _, voter := range voters {
 13 | 		configuration.Servers = append(configuration.Servers, Server{
 14 | 			Suffrage: Voter,
 15 | 			Address:  ServerAddress(voter + "addr"),
 16 | 			ID:       ServerID(voter),
 17 | 		})
 18 | 	}
 19 | 	return configuration
 20 | }
 21 | 
 22 | // Returns a slice of server names of size n.
 23 | func voters(n int) Configuration {
 24 | 	if n > 7 {
 25 | 		panic("only up to 7 servers implemented")
 26 | 	}
 27 | 	return makeConfiguration([]string{"s1", "s2", "s3", "s4", "s5", "s6", "s7"}[:n])
 28 | }
 29 | 
 30 | // Tests setVoters() keeps matchIndexes where possible.
 31 | func TestCommitment_setVoters(t *testing.T) {
 32 | 	commitCh := make(chan struct{}, 1)
 33 | 	c := newCommitment(commitCh, makeConfiguration([]string{"a", "b", "c"}), 0)
 34 | 	c.match("a", 10)
 35 | 	c.match("b", 20)
 36 | 	c.match("c", 30)
 37 | 	// commitIndex: 20
 38 | 	if !drainNotifyCh(commitCh) {
 39 | 		t.Fatalf("expected commit notify")
 40 | 	}
 41 | 	c.setConfiguration(makeConfiguration([]string{"c", "d", "e"}))
 42 | 	// c: 30, d: 0, e: 0
 43 | 	c.match("e", 40)
 44 | 	if c.getCommitIndex() != 30 {
 45 | 		t.Fatalf("expected 30 entries committed, found %d",
 46 | 			c.getCommitIndex())
 47 | 	}
 48 | 	if !drainNotifyCh(commitCh) {
 49 | 		t.Fatalf("expected commit notify")
 50 | 	}
 51 | }
 52 | 
 53 | // Tests match() being called with smaller index than before.
 54 | func TestCommitment_match_max(t *testing.T) {
 55 | 	commitCh := make(chan struct{}, 1)
 56 | 	c := newCommitment(commitCh, voters(5), 4)
 57 | 
 58 | 	c.match("s1", 8)
 59 | 	c.match("s2", 8)
 60 | 	c.match("s2", 1)
 61 | 	c.match("s3", 8)
 62 | 
 63 | 	if c.getCommitIndex() != 8 {
 64 | 		t.Fatalf("calling match with an earlier index should be ignored")
 65 | 	}
 66 | }
 67 | 
 68 | // Tests match() being called with non-voters.
 69 | func TestCommitment_match_nonVoting(t *testing.T) {
 70 | 	commitCh := make(chan struct{}, 1)
 71 | 	c := newCommitment(commitCh, voters(5), 4)
 72 | 
 73 | 	c.match("s1", 8)
 74 | 	c.match("s2", 8)
 75 | 	c.match("s3", 8)
 76 | 
 77 | 	if !drainNotifyCh(commitCh) {
 78 | 		t.Fatalf("expected commit notify")
 79 | 	}
 80 | 
 81 | 	c.match("s90", 10)
 82 | 	c.match("s91", 10)
 83 | 	c.match("s92", 10)
 84 | 
 85 | 	if c.getCommitIndex() != 8 {
 86 | 		t.Fatalf("non-voting servers shouldn't be able to commit")
 87 | 	}
 88 | 	if drainNotifyCh(commitCh) {
 89 | 		t.Fatalf("unexpected commit notify")
 90 | 	}
 91 | }
 92 | 
 93 | // Tests recalculate() algorithm.
 94 | func TestCommitment_recalculate(t *testing.T) {
 95 | 	commitCh := make(chan struct{}, 1)
 96 | 	c := newCommitment(commitCh, voters(5), 0)
 97 | 
 98 | 	c.match("s1", 30)
 99 | 	c.match("s2", 20)
100 | 
101 | 	if c.getCommitIndex() != 0 {
102 | 		t.Fatalf("shouldn't commit after two of five servers")
103 | 	}
104 | 	if drainNotifyCh(commitCh) {
105 | 		t.Fatalf("unexpected commit notify")
106 | 	}
107 | 
108 | 	c.match("s3", 10)
109 | 	if c.getCommitIndex() != 10 {
110 | 		t.Fatalf("expected 10 entries committed, found %d",
111 | 			c.getCommitIndex())
112 | 	}
113 | 	if !drainNotifyCh(commitCh) {
114 | 		t.Fatalf("expected commit notify")
115 | 	}
116 | 	c.match("s4", 15)
117 | 	if c.getCommitIndex() != 15 {
118 | 		t.Fatalf("expected 15 entries committed, found %d",
119 | 			c.getCommitIndex())
120 | 	}
121 | 	if !drainNotifyCh(commitCh) {
122 | 		t.Fatalf("expected commit notify")
123 | 	}
124 | 
125 | 	c.setConfiguration(voters(3))
126 | 	// s1: 30, s2: 20, s3: 10
127 | 	if c.getCommitIndex() != 20 {
128 | 		t.Fatalf("expected 20 entries committed, found %d",
129 | 			c.getCommitIndex())
130 | 	}
131 | 	if !drainNotifyCh(commitCh) {
132 | 		t.Fatalf("expected commit notify")
133 | 	}
134 | 
135 | 	c.setConfiguration(voters(4))
136 | 	// s1: 30, s2: 20, s3: 10, s4: 0
137 | 	c.match("s2", 25)
138 | 	if c.getCommitIndex() != 20 {
139 | 		t.Fatalf("expected 20 entries committed, found %d",
140 | 			c.getCommitIndex())
141 | 	}
142 | 	if drainNotifyCh(commitCh) {
143 | 		t.Fatalf("unexpected commit notify")
144 | 	}
145 | 	c.match("s4", 23)
146 | 	if c.getCommitIndex() != 23 {
147 | 		t.Fatalf("expected 23 entries committed, found %d",
148 | 			c.getCommitIndex())
149 | 	}
150 | 	if !drainNotifyCh(commitCh) {
151 | 		t.Fatalf("expected commit notify")
152 | 	}
153 | }
154 | 
155 | // Tests recalculate() respecting startIndex.
156 | func TestCommitment_recalculate_startIndex(t *testing.T) {
157 | 	commitCh := make(chan struct{}, 1)
158 | 	c := newCommitment(commitCh, voters(5), 4)
159 | 
160 | 	c.match("s1", 3)
161 | 	c.match("s2", 3)
162 | 	c.match("s3", 3)
163 | 
164 | 	if c.getCommitIndex() != 0 {
165 | 		t.Fatalf("can't commit until startIndex is replicated to a quorum")
166 | 	}
167 | 	if drainNotifyCh(commitCh) {
168 | 		t.Fatalf("unexpected commit notify")
169 | 	}
170 | 
171 | 	c.match("s1", 4)
172 | 	c.match("s2", 4)
173 | 	c.match("s3", 4)
174 | 
175 | 	if c.getCommitIndex() != 4 {
176 | 		t.Fatalf("should be able to commit startIndex once replicated to a quorum")
177 | 	}
178 | 	if !drainNotifyCh(commitCh) {
179 | 		t.Fatalf("expected commit notify")
180 | 	}
181 | }
182 | 
183 | // With no voting members in the cluster, the most sane behavior is probably
184 | // to not mark anything committed.
185 | func TestCommitment_noVoterSanity(t *testing.T) {
186 | 	commitCh := make(chan struct{}, 1)
187 | 	c := newCommitment(commitCh, makeConfiguration([]string{}), 4)
188 | 	c.match("s1", 10)
189 | 	c.setConfiguration(makeConfiguration([]string{}))
190 | 	c.match("s1", 10)
191 | 	if c.getCommitIndex() != 0 {
192 | 		t.Fatalf("no voting servers: shouldn't be able to commit")
193 | 	}
194 | 	if drainNotifyCh(commitCh) {
195 | 		t.Fatalf("unexpected commit notify")
196 | 	}
197 | 
198 | 	// add a voter so we can commit something and then remove it
199 | 	c.setConfiguration(voters(1))
200 | 	c.match("s1", 10)
201 | 	if c.getCommitIndex() != 10 {
202 | 		t.Fatalf("expected 10 entries committed, found %d",
203 | 			c.getCommitIndex())
204 | 	}
205 | 	if !drainNotifyCh(commitCh) {
206 | 		t.Fatalf("expected commit notify")
207 | 	}
208 | 
209 | 	c.setConfiguration(makeConfiguration([]string{}))
210 | 	c.match("s1", 20)
211 | 	if c.getCommitIndex() != 10 {
212 | 		t.Fatalf("expected 10 entries committed, found %d",
213 | 			c.getCommitIndex())
214 | 	}
215 | 	if drainNotifyCh(commitCh) {
216 | 		t.Fatalf("unexpected commit notify")
217 | 	}
218 | }
219 | 
220 | // Single voter commits immediately.
221 | func TestCommitment_singleVoter(t *testing.T) {
222 | 	commitCh := make(chan struct{}, 1)
223 | 	c := newCommitment(commitCh, voters(1), 4)
224 | 	c.match("s1", 10)
225 | 	if c.getCommitIndex() != 10 {
226 | 		t.Fatalf("expected 10 entries committed, found %d",
227 | 			c.getCommitIndex())
228 | 	}
229 | 	if !drainNotifyCh(commitCh) {
230 | 		t.Fatalf("expected commit notify")
231 | 	}
232 | 	c.setConfiguration(voters(1))
233 | 	if drainNotifyCh(commitCh) {
234 | 		t.Fatalf("unexpected commit notify")
235 | 	}
236 | 	c.match("s1", 12)
237 | 	if c.getCommitIndex() != 12 {
238 | 		t.Fatalf("expected 12 entries committed, found %d",
239 | 			c.getCommitIndex())
240 | 	}
241 | 	if !drainNotifyCh(commitCh) {
242 | 		t.Fatalf("expected commit notify")
243 | 	}
244 | }
245 | 


--------------------------------------------------------------------------------
/discard_snapshot.go:
--------------------------------------------------------------------------------
 1 | // Copyright (c) HashiCorp, Inc.
 2 | // SPDX-License-Identifier: MPL-2.0
 3 | 
 4 | package raft
 5 | 
 6 | import (
 7 | 	"fmt"
 8 | 	"io"
 9 | )
10 | 
11 | // DiscardSnapshotStore is used to successfully snapshot while
12 | // always discarding the snapshot. This is useful for when the
13 | // log should be truncated but no snapshot should be retained.
14 | // This should never be used for production use, and is only
15 | // suitable for testing.
16 | type DiscardSnapshotStore struct{}
17 | 
18 | // DiscardSnapshotSink is used to fulfill the SnapshotSink interface
19 | // while always discarding the . This is useful for when the log
20 | // should be truncated but no snapshot should be retained. This
21 | // should never be used for production use, and is only suitable
22 | // for testing.
23 | type DiscardSnapshotSink struct{}
24 | 
25 | // NewDiscardSnapshotStore is used to create a new DiscardSnapshotStore.
26 | func NewDiscardSnapshotStore() *DiscardSnapshotStore {
27 | 	return &DiscardSnapshotStore{}
28 | }
29 | 
30 | // Create returns a valid type implementing the SnapshotSink which
31 | // always discards the snapshot.
32 | func (d *DiscardSnapshotStore) Create(version SnapshotVersion, index, term uint64,
33 | 	configuration Configuration, configurationIndex uint64, trans Transport) (SnapshotSink, error) {
34 | 	return &DiscardSnapshotSink{}, nil
35 | }
36 | 
37 | // List returns successfully with a nil for []*SnapshotMeta.
38 | func (d *DiscardSnapshotStore) List() ([]*SnapshotMeta, error) {
39 | 	return nil, nil
40 | }
41 | 
42 | // Open returns an error since the DiscardSnapshotStore does not
43 | // support opening snapshots.
44 | func (d *DiscardSnapshotStore) Open(id string) (*SnapshotMeta, io.ReadCloser, error) {
45 | 	return nil, nil, fmt.Errorf("open is not supported")
46 | }
47 | 
48 | // Write returns successfully with the length of the input byte slice
49 | // to satisfy the WriteCloser interface
50 | func (d *DiscardSnapshotSink) Write(b []byte) (int, error) {
51 | 	return len(b), nil
52 | }
53 | 
54 | // Close returns a nil error
55 | func (d *DiscardSnapshotSink) Close() error {
56 | 	return nil
57 | }
58 | 
59 | // ID returns "discard" for DiscardSnapshotSink
60 | func (d *DiscardSnapshotSink) ID() string {
61 | 	return "discard"
62 | }
63 | 
64 | // Cancel returns successfully with a nil error
65 | func (d *DiscardSnapshotSink) Cancel() error {
66 | 	return nil
67 | }
68 | 


--------------------------------------------------------------------------------
/discard_snapshot_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright (c) HashiCorp, Inc.
 2 | // SPDX-License-Identifier: MPL-2.0
 3 | 
 4 | package raft
 5 | 
 6 | import "testing"
 7 | 
 8 | func TestDiscardSnapshotStoreImpl(t *testing.T) {
 9 | 	var impl interface{} = &DiscardSnapshotStore{}
10 | 	if _, ok := impl.(SnapshotStore); !ok {
11 | 		t.Fatalf("DiscardSnapshotStore not a SnapshotStore")
12 | 	}
13 | }
14 | 
15 | func TestDiscardSnapshotSinkImpl(t *testing.T) {
16 | 	var impl interface{} = &DiscardSnapshotSink{}
17 | 	if _, ok := impl.(SnapshotSink); !ok {
18 | 		t.Fatalf("DiscardSnapshotSink not a SnapshotSink")
19 | 	}
20 | }
21 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
  1 | # Raft Developer Documentation
  2 | 
  3 | This documentation provides a high level introduction to the `hashicorp/raft`
  4 | implementation. The intended audience is anyone interested in understanding
  5 | or contributing to the code.
  6 | 
  7 | ## Contents
  8 | 
  9 | 1. [Terminology](#terminology)
 10 | 2. [Operations](#operations)
 11 |    1. [Apply](./apply.md)
 12 | 3. [Threads](#threads)
 13 | 
 14 | 
 15 | ## Terminology
 16 | 
 17 | This documentation uses the following terms as defined.
 18 | 
 19 | * **Cluster** - the set of peers in the raft configuration
 20 | * **Peer** - a node that participates in the consensus protocol using `hashicorp/raft`. A
 21 |   peer may be in one of the following states: **follower**, **candidate**, or **leader**.
 22 | * **Log** - the full set of log entries.
 23 | * **Log Entry** - an entry in the log. Each entry has an index that is used to order it
 24 |   relative to other log entries.
 25 |   * **Committed** -  A log entry is considered committed if it is safe for that entry to be
 26 |     applied to state machines. A log entry is committed once the leader that created the
 27 |     entry has replicated it on a majority of the peers. A peer has successfully
 28 |     replicated the entry once it is persisted.
 29 |   * **Applied** - log entry applied to the state machine (FSM)
 30 | * **Term** - raft divides time into terms of arbitrary length. Terms are numbered with
 31 |   consecutive integers. Each term begins with an election, in which one or more candidates
 32 |   attempt to become leader. If a candidate wins the election, then it serves as leader for
 33 |   the rest of the term. If the election ends with a split vote, the term will end with no
 34 |   leader.
 35 | * **FSM** - finite state machine, stores the cluster state
 36 | * **Client** - the application that uses the `hashicorp/raft` library
 37 | 
 38 | ## Operations
 39 | 
 40 | ### Leader Write
 41 | 
 42 | Most write operations must be performed on the leader.
 43 | 
 44 | * RequestConfigChange - update the raft peer list configuration
 45 | * Apply - apply a log entry to the log on a majority of peers, and the FSM. See [raft apply](apply.md) for more details.
 46 | * Barrier - a special Apply that does not modify the FSM, used to wait for previous logs to be applied
 47 | * LeadershipTransfer - stop accepting client requests, and tell a different peer to start a leadership election
 48 | * Restore (Snapshot) - overwrite the cluster state with the contents of the snapshot (excluding cluster configuration)
 49 | * VerifyLeader - send a heartbeat to all voters to confirm the peer is still the leader
 50 | 
 51 | ### Follower Write
 52 | 
 53 | * BootstrapCluster - store the cluster configuration in the local log store
 54 | 
 55 | 
 56 | ### Read
 57 | 
 58 | Read operations can be performed on a peer in any state.
 59 | 
 60 | * AppliedIndex - get the index of the last log entry applied to the FSM
 61 | * GetConfiguration - return the latest cluster configuration
 62 | * LastContact - get the last time this peer made contact with the leader
 63 | * LastIndex - get the index of the latest stored log entry
 64 | * Leader - get the address of the peer that is currently the leader
 65 | * Snapshot - snapshot the current state of the FSM into a file
 66 | * State - return the state of the peer
 67 | * Stats - return some stats about the peer and the cluster
 68 | 
 69 | ## Threads
 70 | 
 71 | Raft uses the following threads to handle operations. The name of the thread is in bold,
 72 | and a short description of the operation handled by the thread follows. The main thread is
 73 | responsible for handling many operations.
 74 | 
 75 | * **run** (main thread) - different behaviour based on peer state
 76 |    * follower
 77 |       * processRPC (from rpcCh)
 78 |          * AppendEntries
 79 |          * RequestVote
 80 |          * InstallSnapshot
 81 |          * TimeoutNow
 82 |       * liveBootstrap (from bootstrapCh)
 83 |       * periodic heartbeatTimer (HeartbeatTimeout)
 84 |    * candidate - starts an election for itself when called
 85 |       * processRPC (from rpcCh) - same as follower
 86 |       * acceptVote (from askPeerForVote)
 87 |    * leader - first starts replication to all peers, and applies a Noop log to ensure the new leader has committed up to the commit index
 88 |       * processRPC (from rpcCh) - same as follower, however we don’t actually expect to receive any RPCs other than a RequestVote
 89 |       * leadershipTransfer (from leadershipTransferCh) - 
 90 |       * commit (from commitCh) -
 91 |       * verifyLeader (from verifyCh) -
 92 |       * user restore snapshot (from userRestoreCh) -
 93 |       * changeConfig (from configurationChangeCh) -
 94 |       * dispatchLogs (from applyCh) - handle client Raft.Apply requests by persisting logs to disk, and notifying replication goroutines to replicate the new logs
 95 |       * checkLease (periodically LeaseTimeout) -
 96 | * **runFSM** - has exclusive access to the FSM, all reads and writes must send a message to this thread. Commands:
 97 |    * apply logs to the FSM, from the fsmMutateCh, from processLogs, from leaderLoop (leader) or appendEntries RPC (follower/candidate)
 98 |    * restore a snapshot to the FSM, from the fsmMutateCh, from restoreUserSnapshot (leader) or installSnapshot RPC (follower/candidate)
 99 |    * capture snapshot, from fsmSnapshotCh, from takeSnapshot (runSnapshot thread)
100 | * **runSnapshot** - handles the slower part of taking a snapshot. From a pointer captured by the FSM.Snapshot operation, this thread persists the snapshot by calling FSMSnapshot.Persist. Also calls compactLogs to delete old logs.
101 |    * periodically (SnapshotInterval) takeSnapshot for log compaction
102 |    * user snapshot, from userSnapshotCh, takeSnapshot to return to the user
103 | * **askPeerForVote (candidate only)** - short lived goroutine that synchronously sends a RequestVote RPC to all voting peers, and waits for the response. One goroutine per voting peer.
104 | * **replicate (leader only)** - long running goroutine that synchronously sends log entry AppendEntry RPCs to all peers. Also starts the heartbeat thread, and possibly the pipelineDecode thread. Runs sendLatestSnapshot when AppendEntry fails.
105 |    * **heartbeat (leader only)** - long running goroutine that synchronously sends heartbeat AppendEntry RPCs to all peers.
106 |    * **pipelineDecode (leader only)**
107 | 


--------------------------------------------------------------------------------
/docs/apply.md:
--------------------------------------------------------------------------------
  1 | # Raft Apply
  2 | 
  3 | Apply is the primary operation provided by raft. A client calls `raft.Apply` to apply
  4 | a command to the FSM. A command will first be committed, i.e., durably stored on a
  5 | quorum of raft nodes. Then, the committed command is applied to fsm.
  6 | 
  7 | This sequence diagram shows the steps involved in a `raft.Apply` operation. Each box
  8 | across the top is a separate thread. The name in the box identifies the state of the peer
  9 | (leader or follower) and the thread (`<peer state>:<thread name>`). When there are
 10 | multiple copies of the thread, it is indicated with `(each peer)`.
 11 | 
 12 | ```mermaid
 13 | sequenceDiagram
 14 |    autonumber
 15 |  
 16 |    participant client
 17 |    participant leadermain as leader:main
 18 |    participant leaderfsm as leader:fsm
 19 |    participant leaderreplicate as leader:replicate (each peer)
 20 |    participant followermain as follower:main (each peer)
 21 |    participant followerfsm as follower:fsm (each peer)
 22 |  
 23 |    client-)leadermain: applyCh to dispatchLogs
 24 |    leadermain->>leadermain: store logs to disk
 25 |  
 26 |    leadermain-)leaderreplicate: triggerCh
 27 |    leaderreplicate-->>followermain: Transport.AppendEntries RPC
 28 |  
 29 |    followermain->>followermain: store logs to disk
 30 |  
 31 |    opt leader commit index is ahead of peer commit index
 32 |        followermain-)followerfsm: fsmMutateCh <br>apply committed logs
 33 |        followerfsm->>followerfsm: fsm.Apply
 34 |    end
 35 |  
 36 |    followermain-->>leaderreplicate: respond success=true
 37 |    leaderreplicate->>leaderreplicate: update commitment
 38 |  
 39 |    opt quorum commit index has increased
 40 |        leaderreplicate-)leadermain: commitCh
 41 |        leadermain-)leaderfsm: fsmMutateCh
 42 |        leaderfsm->>leaderfsm: fsm.Apply
 43 |        leaderfsm-)client: future.respond
 44 |    end
 45 | 
 46 | ```
 47 | 
 48 | Following is the description of each step as shown in the above diagram
 49 | 
 50 | 1. The raft node handles the `raft.Apply` call by creating a new log entry and send the entry
 51 | to the `applyCh` channel.
 52 | 
 53 | 2. If the node is not a leader, the method will return an error of `ErrNotLeader`. Otherwise,
 54 | the main loop of the leader node calls `raft.dispatchLogs` to write the log entry locally.
 55 | 
 56 | 3. `raft.dispatchLogs` also sends a notification to the `f.triggerCh` of each follower (`map[ServerID]*followerReplication`) to start replicating log entries to the followers.
 57 | 
 58 | 4. For each follower, the leader has started a long running routine (`replicate`) to
 59 | replicates log entries. On receiving a log entry to the `triggerCh`, the `replicate`
 60 | routine makes the `Transport.AppendEntries` RPC call to do the replication. The log entries
 61 | to be replicated are from the follower's nextIndex to min(nextIndex + maxAppendEntries, 
 62 | leader's lastIndex). Another parameter to AppendEntries is the LeaderCommitIndex. Following
 63 | is some examples:
 64 | 
 65 | ```
 66 | AppendEntries(Log: 1..5, LeaderCommitIndex: 0)   // Replicating log entries 1..5, 
 67 |                                                  // the leader hasn't committed any log entry;
 68 | AppendEntries(Log: 6..8, LeaderCommitIndex: 4)   // Replicating log entries 6..8,
 69 |                                                  // log 0..4 are committed after the leader receives
 70 |                                                  // a quorum of responses
 71 | AppendEntries(Log: 9, LeaderCommitIndex: 8)      // Replicating log entry 9,
 72 |                                                  // log 5..8 are committed.
 73 | AppendEntries(Log: , LeaderCommitIndex: 9)       // no new log, bumping the commit index
 74 |                                                  // to let the follower stay up to date of the
 75 |                                                  // latest committed entries
 76 | ```
 77 | 
 78 | 5. The follower which receives the `appendEntries` RPC calls invokes `raft.appendEntries` to handle
 79 | the request. It appends any new entries to the local log store.
 80 | 
 81 | 6. In the same method on the follower as step 5, if the LeaderCommitIndex > this follower's
 82 | commitIndex, the follower updates it's commitIndex to min(LeaderCommitIndex, index of its last
 83 | log entries). In the first `AppendEntries` call of the above example, the follower won't
 84 | update it's commitIndex, because LeaderCommitIndex is 0. The last RPC call doesn't contain
 85 | any new log, whereas the follower will update its commitIndex to 9.
 86 | 
 87 | Further, the follower start `processLogs` to send all the committed entries that haven't been
 88 | applied to fsm (`fsmMutateCh <- batch`). Otherwise (i.e., `commitIndex <= lastApplied`),
 89 | the appendEntries RPC call returns success.
 90 | 
 91 | Therefore, it's possible that a very small window of time exists when all followers have
 92 | committed the log to disk, the write has been realized in the FSM of the leader but the
 93 | followers have not yet applied the log to their FSM.
 94 | 
 95 | 7. The peer applies the committed entries to the FSM.
 96 | 
 97 | 8. If all went well, the follower responds success (`resp.Success = true`) to the 
 98 | `appendEntries` RPC call.
 99 | 
100 | 9. On receiving the successful response from `Transport.AppendEntries`, the leader needs to
101 | update the fsm based on the replicated log entries. Specifically, the leader finds the
102 | highest log entry index that has been replicated to a quorum of the servers (
103 | `if quorumMatchIndex > c.commitIndex`), update `commitIndex` to that index, and
104 | notify through the `commitCh` channel.
105 | 
106 | 10. The leader receives the notification on the  `r.leaderState.commitCh` channel and starts
107 | grouping the entries that can be applied to the fsm.
108 | 
109 | 11. `processLogs` applies all the committed entries that haven't been applied by batching the log entries and forwarding them through the `fsmMutateCh` channel to fsm.
110 | 
111 | 12. The actual place applying the committed log entries is in the main loop of `runFSM()`.
112 | 
113 | 13. After the log entries that contains the client req are applied to the fsm, the fsm
114 | module will set the responses to the client request (`req.future.respond(nil)`). From the
115 | client's point of view, the future returned by `raft.Apply` should now be unblocked and
116 | calls to `Error()` or `Response()` should return the data at this point.
117 | 


--------------------------------------------------------------------------------
/file_snapshot_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package raft
  5 | 
  6 | import (
  7 | 	"bytes"
  8 | 	"io"
  9 | 	"os"
 10 | 	"reflect"
 11 | 	"runtime"
 12 | 	"testing"
 13 | )
 14 | 
 15 | func TestFileSnapshotStoreImpl(t *testing.T) {
 16 | 	var impl interface{} = &FileSnapshotStore{}
 17 | 	if _, ok := impl.(SnapshotStore); !ok {
 18 | 		t.Fatalf("FileSnapshotStore not a SnapshotStore")
 19 | 	}
 20 | }
 21 | 
 22 | func TestFileSnapshotSinkImpl(t *testing.T) {
 23 | 	var impl interface{} = &FileSnapshotSink{}
 24 | 	if _, ok := impl.(SnapshotSink); !ok {
 25 | 		t.Fatalf("FileSnapshotSink not a SnapshotSink")
 26 | 	}
 27 | }
 28 | 
 29 | func TestFileSS_CreateSnapshotMissingParentDir(t *testing.T) {
 30 | 	parent, err := os.MkdirTemp("", "raft")
 31 | 	if err != nil {
 32 | 		t.Fatalf("err: %v ", err)
 33 | 	}
 34 | 	defer os.RemoveAll(parent)
 35 | 
 36 | 	dir, err := os.MkdirTemp(parent, "raft")
 37 | 	if err != nil {
 38 | 		t.Fatalf("err: %v ", err)
 39 | 	}
 40 | 
 41 | 	snap, err := NewFileSnapshotStoreWithLogger(dir, 3, newTestLogger(t))
 42 | 	if err != nil {
 43 | 		t.Fatalf("err: %v", err)
 44 | 	}
 45 | 
 46 | 	os.RemoveAll(parent)
 47 | 	_, trans := NewInmemTransport(NewInmemAddr())
 48 | 	_, err = snap.Create(SnapshotVersionMax, 10, 3, Configuration{}, 0, trans)
 49 | 	if err != nil {
 50 | 		t.Fatalf("should not fail when using non existing parent")
 51 | 	}
 52 | }
 53 | 
 54 | func TestFileSS_CreateSnapshot(t *testing.T) {
 55 | 	// Create a test dir
 56 | 	dir, err := os.MkdirTemp("", "raft")
 57 | 	if err != nil {
 58 | 		t.Fatalf("err: %v ", err)
 59 | 	}
 60 | 	defer os.RemoveAll(dir)
 61 | 
 62 | 	snap, err := NewFileSnapshotStoreWithLogger(dir, 3, newTestLogger(t))
 63 | 	if err != nil {
 64 | 		t.Fatalf("err: %v", err)
 65 | 	}
 66 | 
 67 | 	// Check no snapshots
 68 | 	snaps, err := snap.List()
 69 | 	if err != nil {
 70 | 		t.Fatalf("err: %v", err)
 71 | 	}
 72 | 	if len(snaps) != 0 {
 73 | 		t.Fatalf("did not expect any snapshots: %v", snaps)
 74 | 	}
 75 | 
 76 | 	// Create a new sink
 77 | 	var configuration Configuration
 78 | 	configuration.Servers = append(configuration.Servers, Server{
 79 | 		Suffrage: Voter,
 80 | 		ID:       ServerID("my id"),
 81 | 		Address:  ServerAddress("over here"),
 82 | 	})
 83 | 	_, trans := NewInmemTransport(NewInmemAddr())
 84 | 	sink, err := snap.Create(SnapshotVersionMax, 10, 3, configuration, 2, trans)
 85 | 	if err != nil {
 86 | 		t.Fatalf("err: %v", err)
 87 | 	}
 88 | 
 89 | 	// The sink is not done, should not be in a list!
 90 | 	snaps, err = snap.List()
 91 | 	if err != nil {
 92 | 		t.Fatalf("err: %v", err)
 93 | 	}
 94 | 	if len(snaps) != 0 {
 95 | 		t.Fatalf("did not expect any snapshots: %v", snaps)
 96 | 	}
 97 | 
 98 | 	// Write to the sink
 99 | 	_, err = sink.Write([]byte("first\n"))
100 | 	if err != nil {
101 | 		t.Fatalf("err: %v", err)
102 | 	}
103 | 	_, err = sink.Write([]byte("second\n"))
104 | 	if err != nil {
105 | 		t.Fatalf("err: %v", err)
106 | 	}
107 | 
108 | 	// Done!
109 | 	err = sink.Close()
110 | 	if err != nil {
111 | 		t.Fatalf("err: %v", err)
112 | 	}
113 | 
114 | 	// Should have a snapshot!
115 | 	snaps, err = snap.List()
116 | 	if err != nil {
117 | 		t.Fatalf("err: %v", err)
118 | 	}
119 | 	if len(snaps) != 1 {
120 | 		t.Fatalf("expect a snapshots: %v", snaps)
121 | 	}
122 | 
123 | 	// Check the latest
124 | 	latest := snaps[0]
125 | 	if latest.Index != 10 {
126 | 		t.Fatalf("bad snapshot: %v", *latest)
127 | 	}
128 | 	if latest.Term != 3 {
129 | 		t.Fatalf("bad snapshot: %v", *latest)
130 | 	}
131 | 	if !reflect.DeepEqual(latest.Configuration, configuration) {
132 | 		t.Fatalf("bad snapshot: %v", *latest)
133 | 	}
134 | 	if latest.ConfigurationIndex != 2 {
135 | 		t.Fatalf("bad snapshot: %v", *latest)
136 | 	}
137 | 	if latest.Size != 13 {
138 | 		t.Fatalf("bad snapshot: %v", *latest)
139 | 	}
140 | 
141 | 	// Read the snapshot
142 | 	_, r, err := snap.Open(latest.ID)
143 | 	if err != nil {
144 | 		t.Fatalf("err: %v", err)
145 | 	}
146 | 
147 | 	// Read out everything
148 | 	var buf bytes.Buffer
149 | 	if _, err := io.Copy(&buf, r); err != nil {
150 | 		t.Fatalf("err: %v", err)
151 | 	}
152 | 	if err := r.Close(); err != nil {
153 | 		t.Fatalf("err: %v", err)
154 | 	}
155 | 
156 | 	// Ensure a match
157 | 	if bytes.Compare(buf.Bytes(), []byte("first\nsecond\n")) != 0 {
158 | 		t.Fatalf("content mismatch")
159 | 	}
160 | }
161 | 
162 | func TestFileSS_CancelSnapshot(t *testing.T) {
163 | 	// Create a test dir
164 | 	dir, err := os.MkdirTemp("", "raft")
165 | 	if err != nil {
166 | 		t.Fatalf("err: %v ", err)
167 | 	}
168 | 	defer os.RemoveAll(dir)
169 | 
170 | 	snap, err := NewFileSnapshotStoreWithLogger(dir, 3, newTestLogger(t))
171 | 	if err != nil {
172 | 		t.Fatalf("err: %v", err)
173 | 	}
174 | 
175 | 	// Create a new sink
176 | 	_, trans := NewInmemTransport(NewInmemAddr())
177 | 	sink, err := snap.Create(SnapshotVersionMax, 10, 3, Configuration{}, 0, trans)
178 | 	if err != nil {
179 | 		t.Fatalf("err: %v", err)
180 | 	}
181 | 
182 | 	// Cancel the snapshot! Should delete
183 | 	err = sink.Cancel()
184 | 	if err != nil {
185 | 		t.Fatalf("err: %v", err)
186 | 	}
187 | 
188 | 	// The sink is canceled, should not be in a list!
189 | 	snaps, err := snap.List()
190 | 	if err != nil {
191 | 		t.Fatalf("err: %v", err)
192 | 	}
193 | 	if len(snaps) != 0 {
194 | 		t.Fatalf("did not expect any snapshots: %v", snaps)
195 | 	}
196 | }
197 | 
198 | func TestFileSS_Retention(t *testing.T) {
199 | 	var err error
200 | 	// Create a test dir
201 | 	var dir string
202 | 	dir, err = os.MkdirTemp("", "raft")
203 | 	if err != nil {
204 | 		t.Fatalf("err: %v ", err)
205 | 	}
206 | 	defer os.RemoveAll(dir)
207 | 
208 | 	var snap *FileSnapshotStore
209 | 	snap, err = NewFileSnapshotStoreWithLogger(dir, 2, newTestLogger(t))
210 | 	if err != nil {
211 | 		t.Fatalf("err: %v", err)
212 | 	}
213 | 
214 | 	// Create a few snapshots
215 | 	_, trans := NewInmemTransport(NewInmemAddr())
216 | 	for i := 10; i < 15; i++ {
217 | 		var sink SnapshotSink
218 | 		sink, err = snap.Create(SnapshotVersionMax, uint64(i), 3, Configuration{}, 0, trans)
219 | 		if err != nil {
220 | 			t.Fatalf("err: %v", err)
221 | 		}
222 | 		err = sink.Close()
223 | 		if err != nil {
224 | 			t.Fatalf("err: %v", err)
225 | 		}
226 | 	}
227 | 
228 | 	// Should only have 2 listed!
229 | 	var snaps []*SnapshotMeta
230 | 	snaps, err = snap.List()
231 | 	if err != nil {
232 | 		t.Fatalf("err: %v", err)
233 | 	}
234 | 	if len(snaps) != 2 {
235 | 		t.Fatalf("expect 2 snapshots: %v", snaps)
236 | 	}
237 | 
238 | 	// Check they are the latest
239 | 	if snaps[0].Index != 14 {
240 | 		t.Fatalf("bad snap: %#v", *snaps[0])
241 | 	}
242 | 	if snaps[1].Index != 13 {
243 | 		t.Fatalf("bad snap: %#v", *snaps[1])
244 | 	}
245 | }
246 | 
247 | func TestFileSS_BadPerm(t *testing.T) {
248 | 	var err error
249 | 	if runtime.GOOS == "windows" {
250 | 		t.Skip("skipping file permission test on windows")
251 | 	}
252 | 
253 | 	// Create a temp dir
254 | 	var dir1 string
255 | 	dir1, err = os.MkdirTemp("", "raft")
256 | 	if err != nil {
257 | 		t.Fatalf("err: %s", err)
258 | 	}
259 | 	defer os.RemoveAll(dir1)
260 | 
261 | 	// Create a sub dir and remove all permissions
262 | 	var dir2 string
263 | 	dir2, err = os.MkdirTemp(dir1, "badperm")
264 | 	if err != nil {
265 | 		t.Fatalf("err: %s", err)
266 | 	}
267 | 	if err = os.Chmod(dir2, 0o00); err != nil {
268 | 		t.Fatalf("err: %s", err)
269 | 	}
270 | 	defer os.Chmod(dir2, 777) // Set perms back for delete
271 | 
272 | 	// Should fail
273 | 	if _, err = NewFileSnapshotStore(dir2, 3, nil); err == nil {
274 | 		t.Fatalf("should fail to use dir with bad perms")
275 | 	}
276 | }
277 | 
278 | func TestFileSS_MissingParentDir(t *testing.T) {
279 | 	parent, err := os.MkdirTemp("", "raft")
280 | 	if err != nil {
281 | 		t.Fatalf("err: %v ", err)
282 | 	}
283 | 	defer os.RemoveAll(parent)
284 | 
285 | 	dir, err := os.MkdirTemp(parent, "raft")
286 | 	if err != nil {
287 | 		t.Fatalf("err: %v ", err)
288 | 	}
289 | 
290 | 	os.RemoveAll(parent)
291 | 	_, err = NewFileSnapshotStore(dir, 3, nil)
292 | 	if err != nil {
293 | 		t.Fatalf("should not fail when using non existing parent")
294 | 	}
295 | }
296 | 
297 | func TestFileSS_Ordering(t *testing.T) {
298 | 	// Create a test dir
299 | 	dir, err := os.MkdirTemp("", "raft")
300 | 	if err != nil {
301 | 		t.Fatalf("err: %v ", err)
302 | 	}
303 | 	defer os.RemoveAll(dir)
304 | 
305 | 	snap, err := NewFileSnapshotStoreWithLogger(dir, 3, newTestLogger(t))
306 | 	if err != nil {
307 | 		t.Fatalf("err: %v", err)
308 | 	}
309 | 
310 | 	// Create a new sink
311 | 	_, trans := NewInmemTransport(NewInmemAddr())
312 | 	sink, err := snap.Create(SnapshotVersionMax, 130350, 5, Configuration{}, 0, trans)
313 | 	if err != nil {
314 | 		t.Fatalf("err: %v", err)
315 | 	}
316 | 	err = sink.Close()
317 | 	if err != nil {
318 | 		t.Fatalf("err: %v", err)
319 | 	}
320 | 
321 | 	sink, err = snap.Create(SnapshotVersionMax, 204917, 36, Configuration{}, 0, trans)
322 | 	if err != nil {
323 | 		t.Fatalf("err: %v", err)
324 | 	}
325 | 	err = sink.Close()
326 | 	if err != nil {
327 | 		t.Fatalf("err: %v", err)
328 | 	}
329 | 
330 | 	// Should only have 2 listed!
331 | 	snaps, err := snap.List()
332 | 	if err != nil {
333 | 		t.Fatalf("err: %v", err)
334 | 	}
335 | 	if len(snaps) != 2 {
336 | 		t.Fatalf("expect 2 snapshots: %v", snaps)
337 | 	}
338 | 
339 | 	// Check they are ordered
340 | 	if snaps[0].Term != 36 {
341 | 		t.Fatalf("bad snap: %#v", *snaps[0])
342 | 	}
343 | 	if snaps[1].Term != 5 {
344 | 		t.Fatalf("bad snap: %#v", *snaps[1])
345 | 	}
346 | }
347 | 


--------------------------------------------------------------------------------
/future.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package raft
  5 | 
  6 | import (
  7 | 	"fmt"
  8 | 	"io"
  9 | 	"sync"
 10 | 	"time"
 11 | )
 12 | 
 13 | // Future is used to represent an action that may occur in the future.
 14 | type Future interface {
 15 | 	// Error blocks until the future arrives and then returns the error status
 16 | 	// of the future. This may be called any number of times - all calls will
 17 | 	// return the same value, however is not OK to call this method twice
 18 | 	// concurrently on the same Future instance.
 19 | 	// Error will only return generic errors related to raft, such
 20 | 	// as ErrLeadershipLost, or ErrRaftShutdown. Some operations, such as
 21 | 	// ApplyLog, may also return errors from other methods.
 22 | 	Error() error
 23 | }
 24 | 
 25 | // IndexFuture is used for future actions that can result in a raft log entry
 26 | // being created.
 27 | type IndexFuture interface {
 28 | 	Future
 29 | 
 30 | 	// Index holds the index of the newly applied log entry.
 31 | 	// This must not be called until after the Error method has returned.
 32 | 	Index() uint64
 33 | }
 34 | 
 35 | // ApplyFuture is used for Apply and can return the FSM response.
 36 | type ApplyFuture interface {
 37 | 	IndexFuture
 38 | 
 39 | 	// Response returns the FSM response as returned by the FSM.Apply method. This
 40 | 	// must not be called until after the Error method has returned.
 41 | 	// Note that if FSM.Apply returns an error, it will be returned by Response,
 42 | 	// and not by the Error method, so it is always important to check Response
 43 | 	// for errors from the FSM.
 44 | 	Response() interface{}
 45 | }
 46 | 
 47 | // ConfigurationFuture is used for GetConfiguration and can return the
 48 | // latest configuration in use by Raft.
 49 | type ConfigurationFuture interface {
 50 | 	IndexFuture
 51 | 
 52 | 	// Configuration contains the latest configuration. This must
 53 | 	// not be called until after the Error method has returned.
 54 | 	Configuration() Configuration
 55 | }
 56 | 
 57 | // SnapshotFuture is used for waiting on a user-triggered snapshot to complete.
 58 | type SnapshotFuture interface {
 59 | 	Future
 60 | 
 61 | 	// Open is a function you can call to access the underlying snapshot and
 62 | 	// its metadata. This must not be called until after the Error method
 63 | 	// has returned.
 64 | 	Open() (*SnapshotMeta, io.ReadCloser, error)
 65 | }
 66 | 
 67 | // LeadershipTransferFuture is used for waiting on a user-triggered leadership
 68 | // transfer to complete.
 69 | type LeadershipTransferFuture interface {
 70 | 	Future
 71 | }
 72 | 
 73 | // errorFuture is used to return a static error.
 74 | type errorFuture struct {
 75 | 	err error
 76 | }
 77 | 
 78 | func (e errorFuture) Error() error {
 79 | 	return e.err
 80 | }
 81 | 
 82 | func (e errorFuture) Response() interface{} {
 83 | 	return nil
 84 | }
 85 | 
 86 | func (e errorFuture) Index() uint64 {
 87 | 	return 0
 88 | }
 89 | 
 90 | // deferError can be embedded to allow a future
 91 | // to provide an error in the future.
 92 | type deferError struct {
 93 | 	err        error
 94 | 	errCh      chan error
 95 | 	responded  bool
 96 | 	ShutdownCh chan struct{}
 97 | }
 98 | 
 99 | func (d *deferError) init() {
100 | 	d.errCh = make(chan error, 1)
101 | }
102 | 
103 | func (d *deferError) Error() error {
104 | 	if d.err != nil {
105 | 		// Note that when we've received a nil error, this
106 | 		// won't trigger, but the channel is closed after
107 | 		// send so we'll still return nil below.
108 | 		return d.err
109 | 	}
110 | 	if d.errCh == nil {
111 | 		panic("waiting for response on nil channel")
112 | 	}
113 | 	select {
114 | 	case d.err = <-d.errCh:
115 | 	case <-d.ShutdownCh:
116 | 		d.err = ErrRaftShutdown
117 | 	}
118 | 	return d.err
119 | }
120 | 
121 | func (d *deferError) respond(err error) {
122 | 	if d.errCh == nil {
123 | 		return
124 | 	}
125 | 	if d.responded {
126 | 		return
127 | 	}
128 | 	d.errCh <- err
129 | 	close(d.errCh)
130 | 	d.responded = true
131 | }
132 | 
133 | // There are several types of requests that cause a configuration entry to
134 | // be appended to the log. These are encoded here for leaderLoop() to process.
135 | // This is internal to a single server.
136 | type configurationChangeFuture struct {
137 | 	logFuture
138 | 	req configurationChangeRequest
139 | }
140 | 
141 | // bootstrapFuture is used to attempt a live bootstrap of the cluster. See the
142 | // Raft object's BootstrapCluster member function for more details.
143 | type bootstrapFuture struct {
144 | 	deferError
145 | 
146 | 	// configuration is the proposed bootstrap configuration to apply.
147 | 	configuration Configuration
148 | }
149 | 
150 | // logFuture is used to apply a log entry and waits until
151 | // the log is considered committed.
152 | type logFuture struct {
153 | 	deferError
154 | 	log      Log
155 | 	response interface{}
156 | 	dispatch time.Time
157 | }
158 | 
159 | func (l *logFuture) Response() interface{} {
160 | 	return l.response
161 | }
162 | 
163 | func (l *logFuture) Index() uint64 {
164 | 	return l.log.Index
165 | }
166 | 
167 | type shutdownFuture struct {
168 | 	raft *Raft
169 | }
170 | 
171 | func (s *shutdownFuture) Error() error {
172 | 	if s.raft == nil {
173 | 		return nil
174 | 	}
175 | 	s.raft.waitShutdown()
176 | 	if closeable, ok := s.raft.trans.(WithClose); ok {
177 | 		closeable.Close()
178 | 	}
179 | 	return nil
180 | }
181 | 
182 | // userSnapshotFuture is used for waiting on a user-triggered snapshot to
183 | // complete.
184 | type userSnapshotFuture struct {
185 | 	deferError
186 | 
187 | 	// opener is a function used to open the snapshot. This is filled in
188 | 	// once the future returns with no error.
189 | 	opener func() (*SnapshotMeta, io.ReadCloser, error)
190 | }
191 | 
192 | // Open is a function you can call to access the underlying snapshot and its
193 | // metadata.
194 | func (u *userSnapshotFuture) Open() (*SnapshotMeta, io.ReadCloser, error) {
195 | 	if u.opener == nil {
196 | 		return nil, nil, fmt.Errorf("no snapshot available")
197 | 	}
198 | 	// Invalidate the opener so it can't get called multiple times,
199 | 	// which isn't generally safe.
200 | 	defer func() {
201 | 		u.opener = nil
202 | 	}()
203 | 	return u.opener()
204 | }
205 | 
206 | // userRestoreFuture is used for waiting on a user-triggered restore of an
207 | // external snapshot to complete.
208 | type userRestoreFuture struct {
209 | 	deferError
210 | 
211 | 	// meta is the metadata that belongs with the snapshot.
212 | 	meta *SnapshotMeta
213 | 
214 | 	// reader is the interface to read the snapshot contents from.
215 | 	reader io.Reader
216 | }
217 | 
218 | // reqSnapshotFuture is used for requesting a snapshot start.
219 | // It is only used internally.
220 | type reqSnapshotFuture struct {
221 | 	deferError
222 | 
223 | 	// snapshot details provided by the FSM runner before responding
224 | 	index    uint64
225 | 	term     uint64
226 | 	snapshot FSMSnapshot
227 | }
228 | 
229 | // restoreFuture is used for requesting an FSM to perform a
230 | // snapshot restore. Used internally only.
231 | type restoreFuture struct {
232 | 	deferError
233 | 	ID string
234 | }
235 | 
236 | // verifyFuture is used to verify the current node is still
237 | // the leader. This is to prevent a stale read.
238 | type verifyFuture struct {
239 | 	deferError
240 | 	notifyCh   chan *verifyFuture
241 | 	quorumSize int
242 | 	votes      int
243 | 	voteLock   sync.Mutex
244 | }
245 | 
246 | // leadershipTransferFuture is used to track the progress of a leadership
247 | // transfer internally.
248 | type leadershipTransferFuture struct {
249 | 	deferError
250 | 
251 | 	ID      *ServerID
252 | 	Address *ServerAddress
253 | }
254 | 
255 | // configurationsFuture is used to retrieve the current configurations. This is
256 | // used to allow safe access to this information outside of the main thread.
257 | type configurationsFuture struct {
258 | 	deferError
259 | 	configurations configurations
260 | }
261 | 
262 | // Configuration returns the latest configuration in use by Raft.
263 | func (c *configurationsFuture) Configuration() Configuration {
264 | 	return c.configurations.latest
265 | }
266 | 
267 | // Index returns the index of the latest configuration in use by Raft.
268 | func (c *configurationsFuture) Index() uint64 {
269 | 	return c.configurations.latestIndex
270 | }
271 | 
272 | // vote is used to respond to a verifyFuture.
273 | // This may block when responding on the notifyCh.
274 | func (v *verifyFuture) vote(leader bool) {
275 | 	v.voteLock.Lock()
276 | 	defer v.voteLock.Unlock()
277 | 
278 | 	// Guard against having notified already
279 | 	if v.notifyCh == nil {
280 | 		return
281 | 	}
282 | 
283 | 	if leader {
284 | 		v.votes++
285 | 		if v.votes >= v.quorumSize {
286 | 			v.notifyCh <- v
287 | 			v.notifyCh = nil
288 | 		}
289 | 	} else {
290 | 		v.notifyCh <- v
291 | 		v.notifyCh = nil
292 | 	}
293 | }
294 | 
295 | // appendFuture is used for waiting on a pipelined append
296 | // entries RPC.
297 | type appendFuture struct {
298 | 	deferError
299 | 	start time.Time
300 | 	args  *AppendEntriesRequest
301 | 	resp  *AppendEntriesResponse
302 | }
303 | 
304 | func (a *appendFuture) Start() time.Time {
305 | 	return a.start
306 | }
307 | 
308 | func (a *appendFuture) Request() *AppendEntriesRequest {
309 | 	return a.args
310 | }
311 | 
312 | func (a *appendFuture) Response() *AppendEntriesResponse {
313 | 	return a.resp
314 | }
315 | 


--------------------------------------------------------------------------------
/future_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright (c) HashiCorp, Inc.
 2 | // SPDX-License-Identifier: MPL-2.0
 3 | 
 4 | package raft
 5 | 
 6 | import (
 7 | 	"errors"
 8 | 	"testing"
 9 | )
10 | 
11 | func TestDeferFutureSuccess(t *testing.T) {
12 | 	var f deferError
13 | 	f.init()
14 | 	f.respond(nil)
15 | 	if err := f.Error(); err != nil {
16 | 		t.Fatalf("unexpected error result; got %#v want nil", err)
17 | 	}
18 | 	if err := f.Error(); err != nil {
19 | 		t.Fatalf("unexpected error result; got %#v want nil", err)
20 | 	}
21 | }
22 | 
23 | func TestDeferFutureError(t *testing.T) {
24 | 	want := errors.New("x")
25 | 	var f deferError
26 | 	f.init()
27 | 	f.respond(want)
28 | 	if got := f.Error(); got != want {
29 | 		t.Fatalf("unexpected error result; got %#v want %#v", got, want)
30 | 	}
31 | 	if got := f.Error(); got != want {
32 | 		t.Fatalf("unexpected error result; got %#v want %#v", got, want)
33 | 	}
34 | }
35 | 
36 | func TestDeferFutureConcurrent(t *testing.T) {
37 | 	// Food for the race detector.
38 | 	want := errors.New("x")
39 | 	var f deferError
40 | 	f.init()
41 | 	go f.respond(want)
42 | 	if got := f.Error(); got != want {
43 | 		t.Errorf("unexpected error result; got %#v want %#v", got, want)
44 | 	}
45 | }
46 | 


--------------------------------------------------------------------------------
/fuzzy/apply_src.go:
--------------------------------------------------------------------------------
 1 | // Copyright (c) HashiCorp, Inc.
 2 | // SPDX-License-Identifier: MPL-2.0
 3 | 
 4 | package fuzzy
 5 | 
 6 | import (
 7 | 	"hash/fnv"
 8 | 	"math/rand"
 9 | 	"testing"
10 | 	"time"
11 | )
12 | 
13 | type applySource struct {
14 | 	rnd  *rand.Rand
15 | 	seed int64
16 | }
17 | 
18 | // newApplySource will create a new source, any source created with the same seed will generate the same sequence of data.
19 | func newApplySource(seed string) *applySource {
20 | 	h := fnv.New32()
21 | 	h.Write([]byte(seed))
22 | 	s := &applySource{seed: int64(h.Sum32())}
23 | 	s.reset()
24 | 	return s
25 | }
26 | 
27 | // reset this source back to its initial state, it'll generate the same sequence of data it initially did
28 | func (a *applySource) reset() {
29 | 	a.rnd = rand.New(rand.NewSource(a.seed))
30 | }
31 | 
32 | func (a *applySource) nextEntry() []byte {
33 | 	const sz = 33
34 | 	r := make([]byte, sz)
35 | 	for i := 0; i < len(r); i++ {
36 | 		r[i] = byte(a.rnd.Int31n(256))
37 | 	}
38 | 	return r
39 | }
40 | 
41 | type clusterApplier struct {
42 | 	stopCh  chan bool
43 | 	applied uint64
44 | 	src     *applySource
45 | }
46 | 
47 | // runs apply in chunks of n to the cluster, use the returned Applier to Stop() it
48 | func (a *applySource) apply(t *testing.T, c *cluster, n uint) *clusterApplier {
49 | 	ap := &clusterApplier{stopCh: make(chan bool), src: a}
50 | 	go ap.apply(t, c, n)
51 | 	return ap
52 | }
53 | 
54 | func (ca *clusterApplier) apply(t *testing.T, c *cluster, n uint) {
55 | 	for true {
56 | 		select {
57 | 		case <-ca.stopCh:
58 | 			return
59 | 		default:
60 | 			ca.applied += c.ApplyN(t, 5*time.Second, ca.src, n)
61 | 		}
62 | 	}
63 | }
64 | 
65 | func (ca *clusterApplier) stop() {
66 | 	ca.stopCh <- true
67 | 	close(ca.stopCh)
68 | }
69 | 


--------------------------------------------------------------------------------
/fuzzy/fsm.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package fuzzy
  5 | 
  6 | import (
  7 | 	"bufio"
  8 | 	"encoding/binary"
  9 | 	"fmt"
 10 | 	"hash/adler32"
 11 | 	"io"
 12 | 	"os"
 13 | 
 14 | 	"github.com/hashicorp/raft"
 15 | )
 16 | 
 17 | type logHash struct {
 18 | 	lastHash []byte
 19 | }
 20 | 
 21 | func (l *logHash) Add(d []byte) {
 22 | 	hasher := adler32.New()
 23 | 	hasher.Write(l.lastHash)
 24 | 	hasher.Write(d)
 25 | 	l.lastHash = hasher.Sum(nil)
 26 | }
 27 | 
 28 | type applyItem struct {
 29 | 	index uint64
 30 | 	term  uint64
 31 | 	data  []byte
 32 | }
 33 | 
 34 | func (a *applyItem) set(l *raft.Log) {
 35 | 	a.index = l.Index
 36 | 	a.term = l.Term
 37 | 	a.data = make([]byte, len(l.Data))
 38 | 	copy(a.data, l.Data)
 39 | }
 40 | 
 41 | type fuzzyFSM struct {
 42 | 	logHash
 43 | 	lastTerm  uint64
 44 | 	lastIndex uint64
 45 | 	applied   []applyItem
 46 | }
 47 | 
 48 | func (f *fuzzyFSM) Apply(l *raft.Log) interface{} {
 49 | 	if l.Index <= f.lastIndex {
 50 | 		panic(fmt.Errorf("fsm.Apply received log entry with invalid Index %v (lastIndex we saw was %d)", l, f.lastIndex))
 51 | 	}
 52 | 	if l.Term < f.lastTerm {
 53 | 		panic(fmt.Errorf("fsm.Apply received log entry with invalid Term %v (lastTerm we saw was %d)", l, f.lastTerm))
 54 | 	}
 55 | 	f.lastIndex = l.Index
 56 | 	f.lastTerm = l.Term
 57 | 	f.Add(l.Data)
 58 | 	f.applied = append(f.applied, applyItem{})
 59 | 	f.applied[len(f.applied)-1].set(l)
 60 | 	return nil
 61 | }
 62 | 
 63 | func (f *fuzzyFSM) WriteTo(fn string) error {
 64 | 	fw, err := os.Create(fn)
 65 | 	if err != nil {
 66 | 		return err
 67 | 	}
 68 | 	defer fw.Close()
 69 | 	w := bufio.NewWriter(fw)
 70 | 	defer w.Flush()
 71 | 	for _, i := range f.applied {
 72 | 		fmt.Fprintf(w, "%d.%8d: %X\n", i.term, i.index, i.data)
 73 | 	}
 74 | 	return nil
 75 | }
 76 | 
 77 | func (f *fuzzyFSM) Snapshot() (raft.FSMSnapshot, error) {
 78 | 	s := *f
 79 | 	return &s, nil
 80 | }
 81 | 
 82 | func (f *fuzzyFSM) Restore(r io.ReadCloser) error {
 83 | 	err := binary.Read(r, binary.LittleEndian, &f.lastTerm)
 84 | 	if err == nil {
 85 | 		err = binary.Read(r, binary.LittleEndian, &f.lastIndex)
 86 | 	}
 87 | 	if err == nil {
 88 | 		f.lastHash = make([]byte, adler32.Size)
 89 | 		_, err = r.Read(f.lastHash)
 90 | 	}
 91 | 	return err
 92 | }
 93 | 
 94 | func (f *fuzzyFSM) Persist(sink raft.SnapshotSink) error {
 95 | 	err := binary.Write(sink, binary.LittleEndian, f.lastTerm)
 96 | 	if err == nil {
 97 | 		err = binary.Write(sink, binary.LittleEndian, f.lastIndex)
 98 | 	}
 99 | 	if err == nil {
100 | 		_, err = sink.Write(f.lastHash)
101 | 	}
102 | 	return err
103 | }
104 | 
105 | func (f *fuzzyFSM) Release() {
106 | }
107 | 


--------------------------------------------------------------------------------
/fuzzy/fsm_batch.go:
--------------------------------------------------------------------------------
 1 | // Copyright (c) HashiCorp, Inc.
 2 | // SPDX-License-Identifier: MPL-2.0
 3 | 
 4 | //go:build batchtest
 5 | // +build batchtest
 6 | 
 7 | package fuzzy
 8 | 
 9 | import "github.com/hashicorp/raft"
10 | 
11 | // ApplyBatch enables fuzzyFSM to satisfy the BatchingFSM interface. This
12 | // function is gated by the batchtest build flag.
13 | func (f *fuzzyFSM) ApplyBatch(logs []*raft.Log) []interface{} {
14 | 	ret := make([]interface{}, len(logs))
15 | 
16 | 	for _, l := range logs {
17 | 		f.Apply(l)
18 | 	}
19 | 
20 | 	return ret
21 | }
22 | 


--------------------------------------------------------------------------------
/fuzzy/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/hashicorp/raft/fuzzy
 2 | 
 3 | go 1.20
 4 | 
 5 | require (
 6 | 	github.com/hashicorp/go-hclog v1.6.2
 7 | 	github.com/hashicorp/go-msgpack/v2 v2.1.1
 8 | 	github.com/hashicorp/raft v1.2.0
 9 | 	github.com/hashicorp/raft-boltdb v0.0.0-20171010151810-6e5ba93211ea
10 | )
11 | 
12 | require (
13 | 	github.com/armon/go-metrics v0.4.1 // indirect
14 | 	github.com/boltdb/bolt v1.3.1 // indirect
15 | 	github.com/fatih/color v1.13.0 // indirect
16 | 	github.com/hashicorp/go-immutable-radix v1.0.0 // indirect
17 | 	github.com/hashicorp/go-msgpack v0.5.5 // indirect
18 | 	github.com/hashicorp/golang-lru v0.5.0 // indirect
19 | 	github.com/mattn/go-colorable v0.1.12 // indirect
20 | 	github.com/mattn/go-isatty v0.0.14 // indirect
21 | 	golang.org/x/sys v0.13.0 // indirect
22 | )
23 | 
24 | replace github.com/hashicorp/raft => ../
25 | 


--------------------------------------------------------------------------------
/fuzzy/leadershiptransfer_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright (c) HashiCorp, Inc.
 2 | // SPDX-License-Identifier: MPL-2.0
 3 | 
 4 | package fuzzy
 5 | 
 6 | import (
 7 | 	"math/rand"
 8 | 	"testing"
 9 | 	"time"
10 | 
11 | 	"github.com/hashicorp/raft"
12 | )
13 | 
14 | // 5 node cluster
15 | func TestRaft_FuzzyLeadershipTransfer(t *testing.T) {
16 | 	cluster := newRaftCluster(t, testLogWriter, "lt", 5, nil)
17 | 	r := rand.New(rand.NewSource(time.Now().UnixNano()))
18 | 
19 | 	s := newApplySource("LeadershipTransfer")
20 | 	data := cluster.generateNApplies(s, uint(r.Intn(10000)))
21 | 	futures := cluster.sendNApplies(time.Minute, data)
22 | 	cluster.leadershipTransfer(time.Minute)
23 | 
24 | 	data = cluster.generateNApplies(s, uint(r.Intn(10000)))
25 | 	futures = append(futures, cluster.sendNApplies(time.Minute, data)...)
26 | 	cluster.leadershipTransfer(time.Minute)
27 | 
28 | 	data = cluster.generateNApplies(s, uint(r.Intn(10000)))
29 | 	futures = append(futures, cluster.sendNApplies(time.Minute, data)...)
30 | 	cluster.leadershipTransfer(time.Minute)
31 | 
32 | 	data = cluster.generateNApplies(s, uint(r.Intn(10000)))
33 | 	futures = append(futures, cluster.sendNApplies(time.Minute, data)...)
34 | 
35 | 	ac := cluster.checkApplyFutures(futures)
36 | 
37 | 	cluster.Stop(t, time.Minute)
38 | 	cluster.VerifyLog(t, ac)
39 | 	cluster.VerifyFSM(t)
40 | }
41 | 
42 | type LeadershipTransferMode int
43 | 
44 | type LeadershipTransfer struct {
45 | 	verifier  appendEntriesVerifier
46 | 	slowNodes map[string]bool
47 | 	delayMin  time.Duration
48 | 	delayMax  time.Duration
49 | 	mode      LeadershipTransferMode
50 | }
51 | 
52 | func (lt *LeadershipTransfer) Report(t *testing.T) {
53 | 	lt.verifier.Report(t)
54 | }
55 | 
56 | func (lt *LeadershipTransfer) PreRPC(s, t string, r *raft.RPC) error {
57 | 	return nil
58 | }
59 | 
60 | func (lt *LeadershipTransfer) nap() {
61 | 	d := lt.delayMin + time.Duration(rand.Int63n((lt.delayMax - lt.delayMin).Nanoseconds()))
62 | 	time.Sleep(d)
63 | }
64 | 
65 | func (lt *LeadershipTransfer) PostRPC(src, target string, r *raft.RPC, res *raft.RPCResponse) error {
66 | 	return nil
67 | }
68 | 
69 | func (lt *LeadershipTransfer) PreRequestVote(src, target string, v *raft.RequestVoteRequest) (*raft.RequestVoteResponse, error) {
70 | 	return nil, nil
71 | }
72 | 
73 | func (lt *LeadershipTransfer) PreAppendEntries(src, target string, v *raft.AppendEntriesRequest) (*raft.AppendEntriesResponse, error) {
74 | 	lt.verifier.PreAppendEntries(src, target, v)
75 | 	return nil, nil
76 | }
77 | 


--------------------------------------------------------------------------------
/fuzzy/membership_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package fuzzy
  5 | 
  6 | import (
  7 | 	"io"
  8 | 	"log"
  9 | 	"os"
 10 | 	"path/filepath"
 11 | 	"testing"
 12 | 	"time"
 13 | )
 14 | 
 15 | var testLogWriter io.Writer
 16 | 
 17 | func init() {
 18 | 	testLogWriter = os.Stdout
 19 | 	logDir := os.Getenv("TEST_LOG_DIR")
 20 | 	if logDir != "" {
 21 | 		f, err := os.Create(filepath.Join(logDir, "debug.log"))
 22 | 		if err != nil {
 23 | 			log.Fatalf("TEST_LOG_DIR Env set, but unable to create log file: %v\n", err)
 24 | 		}
 25 | 		testLogWriter = f
 26 | 	}
 27 | }
 28 | 
 29 | // this runs a 3 node cluster then expands it to a 5 node cluster and checks all 5 nodes agree at the end
 30 | func TestRaft_AddMembership(t *testing.T) {
 31 | 	v := appendEntriesVerifier{}
 32 | 	v.Init()
 33 | 	cluster := newRaftCluster(t, testLogWriter, "m", 3, &v)
 34 | 	s := newApplySource("AddMembership")
 35 | 	initApplied := cluster.ApplyN(t, time.Minute, s, 100)
 36 | 	a := s.apply(t, cluster, 1000)
 37 | 	if err := cluster.CreateAndAddNode(t, testLogWriter, "m", 3); err != nil {
 38 | 		t.Fatalf("Failed to add node m3: %v", err)
 39 | 	}
 40 | 	if err := cluster.CreateAndAddNode(t, testLogWriter, "m", 4); err != nil {
 41 | 		t.Fatalf("Failed to add node m4: %v", err)
 42 | 	}
 43 | 	time.Sleep(time.Second * 5)
 44 | 	a.stop()
 45 | 	cluster.Stop(t, time.Minute)
 46 | 	v.Report(t)
 47 | 	cluster.VerifyLog(t, uint64(a.applied+initApplied))
 48 | 	cluster.VerifyFSM(t)
 49 | }
 50 | 
 51 | // starts with 3 nodes, goes to 5, then goes back to 3, but never removes the leader.
 52 | func TestRaft_AddRemoveNodesNotLeader(t *testing.T) {
 53 | 	v := appendEntriesVerifier{}
 54 | 	v.Init()
 55 | 	cluster := newRaftCluster(t, testLogWriter, "ar", 3, &v)
 56 | 	s := newApplySource("AddRemoveNodesNotLeader")
 57 | 	initApplied := cluster.ApplyN(t, time.Minute, s, 100)
 58 | 	a := s.apply(t, cluster, 1000)
 59 | 	cluster.CreateAndAddNode(t, testLogWriter, "ar", 3)
 60 | 	cluster.CreateAndAddNode(t, testLogWriter, "ar", 4)
 61 | 	ldr := cluster.Leader(time.Minute)
 62 | 	removed := 0
 63 | 	for _, rn := range cluster.nodes {
 64 | 		if rn.name != ldr.name {
 65 | 			cluster.RemoveNode(t, rn.name)
 66 | 			removed++
 67 | 			if removed >= 2 {
 68 | 				break
 69 | 			}
 70 | 		}
 71 | 	}
 72 | 	a.stop()
 73 | 	cluster.Stop(t, time.Minute)
 74 | 	v.Report(t)
 75 | 	cluster.VerifyLog(t, uint64(a.applied+initApplied))
 76 | 	cluster.VerifyFSM(t)
 77 | }
 78 | 
 79 | // starts with a 5 node cluster then removes the leader.
 80 | func TestRaft_RemoveLeader(t *testing.T) {
 81 | 	v := appendEntriesVerifier{}
 82 | 	v.Init()
 83 | 	cluster := newRaftCluster(t, testLogWriter, "rl", 5, &v)
 84 | 	s := newApplySource("RemoveLeader")
 85 | 	initApplied := cluster.ApplyN(t, time.Minute, s, 100)
 86 | 	a := s.apply(t, cluster, 100)
 87 | 	time.Sleep(time.Second)
 88 | 	ldr := cluster.Leader(time.Minute)
 89 | 	cluster.RemoveNode(t, ldr.name)
 90 | 	time.Sleep(5 * time.Second)
 91 | 	a.stop()
 92 | 	cluster.Stop(t, time.Minute)
 93 | 	v.Report(t)
 94 | 	cluster.VerifyLog(t, uint64(a.applied+initApplied))
 95 | 	cluster.VerifyFSM(t)
 96 | 	ldr.raft.Shutdown()
 97 | }
 98 | 
 99 | // starts with a 5 node cluster, partitions off one node, and then removes it from the cluster on the other partition
100 | func TestRaft_RemovePartitionedNode(t *testing.T) {
101 | 	hooks := NewPartitioner()
102 | 	cluster := newRaftCluster(t, testLogWriter, "rmp", 5, hooks)
103 | 	s := newApplySource("RemovePartitionedNode")
104 | 	initApplied := cluster.ApplyN(t, time.Minute, s, 101)
105 | 	a := s.apply(t, cluster, 100)
106 | 	nodes := cluster.LeaderPlus(3)
107 | 	victim := nodes[len(nodes)-1]
108 | 	hooks.PartitionOff(cluster.log, []*raftNode{victim})
109 | 	time.Sleep(3 * time.Second)
110 | 	removed := cluster.RemoveNode(t, victim.name)
111 | 	time.Sleep(3 * time.Second)
112 | 	hooks.HealAll(cluster.log)
113 | 	time.Sleep(10 * time.Second)
114 | 	a.stop()
115 | 	cluster.Stop(t, time.Minute)
116 | 	hooks.Report(t)
117 | 	cluster.VerifyLog(t, uint64(a.applied+initApplied))
118 | 	cluster.VerifyFSM(t)
119 | 
120 | 	// we should verify that the partitioned node see that it was removed & shutdown
121 | 	// but it never gets notified of that, so we can't verify that currently.
122 | 	removed.raft.Shutdown()
123 | }
124 | 


--------------------------------------------------------------------------------
/fuzzy/node.go:
--------------------------------------------------------------------------------
 1 | // Copyright (c) HashiCorp, Inc.
 2 | // SPDX-License-Identifier: MPL-2.0
 3 | 
 4 | package fuzzy
 5 | 
 6 | import (
 7 | 	"fmt"
 8 | 	"path/filepath"
 9 | 	"time"
10 | 
11 | 	"github.com/hashicorp/go-hclog"
12 | 	"github.com/hashicorp/raft"
13 | 	rdb "github.com/hashicorp/raft-boltdb"
14 | )
15 | 
16 | type raftNode struct {
17 | 	transport *transport
18 | 	store     *rdb.BoltStore
19 | 	raft      *raft.Raft
20 | 	log       hclog.Logger
21 | 	fsm       *fuzzyFSM
22 | 	name      string
23 | 	dir       string
24 | }
25 | 
26 | func newRaftNode(logger hclog.Logger, tc *transports, h TransportHooks, nodes []string, name string) (*raftNode, error) {
27 | 	var err error
28 | 	var datadir string
29 | 	datadir, err = resolveDirectory(fmt.Sprintf("data/%v", name), true)
30 | 	if err != nil {
31 | 		return nil, err
32 | 	}
33 | 	logger.Info("[INFO] Creating new raft Node with data in dir %v", datadir)
34 | 	var ss *raft.FileSnapshotStore
35 | 	ss, err = raft.NewFileSnapshotStoreWithLogger(datadir, 5, logger)
36 | 
37 | 	if err != nil {
38 | 		return nil, fmt.Errorf("unable to initialize snapshots %v", err.Error())
39 | 	}
40 | 	transport := tc.AddNode(name, h)
41 | 
42 | 	config := raft.DefaultConfig()
43 | 	config.SnapshotThreshold = 1409600
44 | 	config.SnapshotInterval = time.Hour
45 | 	config.Logger = logger
46 | 	config.ShutdownOnRemove = false
47 | 	config.LocalID = raft.ServerID(name)
48 | 
49 | 	var store *rdb.BoltStore
50 | 	store, err = rdb.NewBoltStore(filepath.Join(datadir, "store.bolt"))
51 | 	if err != nil {
52 | 		return nil, fmt.Errorf("unable to initialize log %v", err.Error())
53 | 	}
54 | 
55 | 	if len(nodes) > 0 {
56 | 		c := make([]raft.Server, 0, len(nodes))
57 | 		for _, n := range nodes {
58 | 			c = append(c, raft.Server{Suffrage: raft.Voter, ID: raft.ServerID(n), Address: raft.ServerAddress(n)})
59 | 		}
60 | 		configuration := raft.Configuration{Servers: c}
61 | 
62 | 		if err = raft.BootstrapCluster(config, store, store, ss, transport, configuration); err != nil {
63 | 			return nil, err
64 | 		}
65 | 	}
66 | 	fsm := &fuzzyFSM{}
67 | 	var r *raft.Raft
68 | 	r, err = raft.NewRaft(config, fsm, store, store, ss, transport)
69 | 	if err != nil {
70 | 		return nil, err
71 | 	}
72 | 	n := raftNode{
73 | 		transport: transport,
74 | 		store:     store,
75 | 		raft:      r,
76 | 		fsm:       fsm,
77 | 		log:       logger,
78 | 		name:      name,
79 | 		dir:       datadir,
80 | 	}
81 | 	return &n, nil
82 | }
83 | 


--------------------------------------------------------------------------------
/fuzzy/partition_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package fuzzy
  5 | 
  6 | import (
  7 | 	"bytes"
  8 | 	"fmt"
  9 | 	"math/rand"
 10 | 	"sync"
 11 | 	"testing"
 12 | 	"time"
 13 | 
 14 | 	"github.com/hashicorp/raft"
 15 | )
 16 | 
 17 | // 5 node cluster where the leader and another node get regularly partitioned off
 18 | // eventually all partitions heal.
 19 | func TestRaft_LeaderPartitions(t *testing.T) {
 20 | 	hooks := NewPartitioner()
 21 | 	cluster := newRaftCluster(t, testLogWriter, "lp", 5, hooks)
 22 | 	cluster.Leader(time.Second * 10)
 23 | 	s := newApplySource("LeaderPartitions")
 24 | 	applier := s.apply(t, cluster, 5)
 25 | 	for i := 0; i < 10; i++ {
 26 | 		pg := hooks.PartitionOff(cluster.log, cluster.LeaderPlus(rand.Intn(4)))
 27 | 		time.Sleep(time.Second * 4)
 28 | 		r := rand.Intn(10)
 29 | 		if r < 1 {
 30 | 			cluster.log.Logf("Healing no partitions!")
 31 | 		} else if r < 4 {
 32 | 			hooks.HealAll(cluster.log)
 33 | 		} else {
 34 | 			hooks.Heal(cluster.log, pg)
 35 | 		}
 36 | 		time.Sleep(time.Second * 5)
 37 | 	}
 38 | 	hooks.HealAll(cluster.log)
 39 | 	cluster.Leader(time.Hour)
 40 | 	applier.stop()
 41 | 	cluster.Stop(t, time.Minute*10)
 42 | 	hooks.Report(t)
 43 | 	cluster.VerifyLog(t, applier.applied)
 44 | 	cluster.VerifyFSM(t)
 45 | }
 46 | 
 47 | type Partitioner struct {
 48 | 	verifier appendEntriesVerifier
 49 | 	lock     sync.RWMutex // protects partitioned / nextGroup
 50 | 	// this is a map of node -> partition group, only nodes in the same partition group can communicate with each other
 51 | 	partitioned map[string]int
 52 | 	nextGroup   int
 53 | }
 54 | 
 55 | func NewPartitioner() *Partitioner {
 56 | 	p := &Partitioner{
 57 | 		partitioned: make(map[string]int),
 58 | 		nextGroup:   1,
 59 | 	}
 60 | 	p.verifier.Init()
 61 | 	return p
 62 | }
 63 | 
 64 | // PartitionOff creates a partition where the supplied nodes can only communicate with each other
 65 | // returns the partition group, which can be used later with Heal to heal this specific partition
 66 | func (p *Partitioner) PartitionOff(l Logger, nodes []*raftNode) int {
 67 | 	nn := make([]string, 0, len(nodes))
 68 | 	p.lock.Lock()
 69 | 	defer p.lock.Unlock()
 70 | 	pGroup := p.nextGroup
 71 | 	p.nextGroup++
 72 | 	for _, n := range nodes {
 73 | 		p.partitioned[n.name] = pGroup
 74 | 		nn = append(nn, n.name)
 75 | 	}
 76 | 	l.Logf("Created partition %d with nodes %v, partitions now are %v", pGroup, nn, p)
 77 | 	return pGroup
 78 | }
 79 | 
 80 | func (p *Partitioner) Heal(l Logger, pGroup int) {
 81 | 	p.lock.Lock()
 82 | 	defer p.lock.Unlock()
 83 | 	for k, v := range p.partitioned {
 84 | 		if v == pGroup {
 85 | 			p.partitioned[k] = 0
 86 | 		}
 87 | 	}
 88 | 	l.Logf("Healing partition group %d, now partitions are %v", pGroup, p)
 89 | }
 90 | 
 91 | func (p *Partitioner) String() string {
 92 | 	pl := make([][]string, 0, 10)
 93 | 	for n, pv := range p.partitioned {
 94 | 		if pv > 0 {
 95 | 			for pv >= len(pl) {
 96 | 				pl = append(pl, nil)
 97 | 			}
 98 | 			pl[pv] = append(pl[pv], n)
 99 | 		}
100 | 	}
101 | 	b := bytes.Buffer{}
102 | 	for i, n := range pl {
103 | 		if len(n) > 0 {
104 | 			if b.Len() > 0 {
105 | 				b.WriteString(", ")
106 | 			}
107 | 			fmt.Fprintf(&b, "%d = %v", i, n)
108 | 		}
109 | 	}
110 | 	if b.Len() == 0 {
111 | 		return "[None]"
112 | 	}
113 | 	return b.String()
114 | }
115 | 
116 | func (p *Partitioner) HealAll(l Logger) {
117 | 	p.lock.Lock()
118 | 	defer p.lock.Unlock()
119 | 	p.partitioned = make(map[string]int)
120 | 	l.Logf("Healing all partitions, partitions now %v", p)
121 | }
122 | 
123 | func (p *Partitioner) Report(t *testing.T) {
124 | 	p.verifier.Report(t)
125 | }
126 | 
127 | func (p *Partitioner) PreRPC(s, t string, r *raft.RPC) error {
128 | 	p.lock.RLock()
129 | 	sp := p.partitioned[s]
130 | 	st := p.partitioned[t]
131 | 	p.lock.RUnlock()
132 | 	if sp == st {
133 | 		return nil
134 | 	}
135 | 	return fmt.Errorf("unable to connect to %v, from %v", t, s)
136 | }
137 | 
138 | func (p *Partitioner) PostRPC(s, t string, req *raft.RPC, res *raft.RPCResponse) error {
139 | 	return nil
140 | }
141 | 
142 | func (p *Partitioner) PreRequestVote(src, target string, v *raft.RequestVoteRequest) (*raft.RequestVoteResponse, error) {
143 | 	return nil, nil
144 | }
145 | 
146 | func (p *Partitioner) PreAppendEntries(src, target string, v *raft.AppendEntriesRequest) (*raft.AppendEntriesResponse, error) {
147 | 	return nil, nil
148 | }
149 | 


--------------------------------------------------------------------------------
/fuzzy/readme.md:
--------------------------------------------------------------------------------
 1 | # Fuzzy Raft
 2 | 
 3 | Inspired by http://colin-scott.github.io/blog/2015/10/07/fuzzing-raft-for-fun-and-profit/ this package 
 4 | is a framework and set of test scenarios for testing the behavior and correctness of the raft library
 5 | under various conditions.
 6 | 
 7 | ## Framework
 8 | 
 9 | The framework allows you to construct multiple node raft clusters, connected by an instrumented transport 
10 | that allows a test to inject various transport level behaviors to simulate various scenarios (e.g. you 
11 | can have your hook fail all transport calls to a particular node to simulate it being partitioned off 
12 | the network). There are helper classes to create and Apply well know sequences of test data, and to 
13 | examine the final state of the cluster, the nodes FSMs and the raft log. 
14 | 
15 | ## Running
16 | 
17 | The tests run with the standard go test framework, run with go test . [from this dir] or use make fuzz from
18 | the parent directory. As these tests are looking for timing and other edge cases, a pass from a single run
19 | isn't enough, the tests needs running repeatedly to build up confidence.
20 | 
21 | ## Test Scenarios
22 | 
23 | The follow test scenario's are currently implemented. Each test concludes with a standard set of validations
24 | 
25 |  * Each node raft log contains the same set of entries (term/index/data).
26 |  * The raft log contains data matching the client request for each call to raft.Apply() that reported success.
27 |  * Each node's FSM saw the same sequence of Apply(*raft.Log) calls.
28 |  * A verifier at the transport level verifies a number of transport level invariants.
29 | 
30 | Most tests run with a background workload that is constantly apply()ing new entries to the log. [when there's a leader]
31 | 
32 | ### TestRaft_LeaderPartitions
33 | 
34 | This creates a 5 node cluster and then repeated partitions multiple nodes off (including the current leader), 
35 | then heals the partition and repeats. At the end all partitions are removed. [clearly inspired by Jepson]
36 | 
37 | ### TestRaft_NoIssueSanity
38 | 
39 | Is a basic 5 node cluster test, it starts a 5 node cluster applies some data, then does the verifications
40 | 
41 | ### TestRaft_SlowSendVote
42 | 
43 | Tests what happens when RequestVote requests are delaying being sent to other nodes
44 | 
45 | ### TestRaft_SlowRecvVote
46 | 
47 | Tests what happens when RequestVote responses are delaying being received by the sender.
48 | 
49 | ### TestRaft_AddMembership
50 | 
51 | Starts a 3 node cluster, and then adds 2 new members to the cluster.
52 | 
53 | ### TestRaft_AddRemoveNodesNotLeader
54 | 
55 | Starts a 5 node cluster, and then then removes 2 follower nodes from the cluster.
56 | 
57 | ### TestRaft_RemoveLeader
58 | 
59 | Starts a 5 node cluster, and then removes the node that is the leader.
60 | 
61 | ### TestRaft_RemovePartitionedNode
62 | 
63 | Starts a 5 node cluster, partitions one of the follower nodes off the network, and then tells the leader to remove that node, then heals the partition.
64 | 


--------------------------------------------------------------------------------
/fuzzy/resolve.go:
--------------------------------------------------------------------------------
 1 | // Copyright (c) HashiCorp, Inc.
 2 | // SPDX-License-Identifier: MPL-2.0
 3 | 
 4 | package fuzzy
 5 | 
 6 | import (
 7 | 	"os"
 8 | 	"path/filepath"
 9 | )
10 | 
11 | // resolveDirectory returns a full directory path based on the supplied dir path
12 | // if the supplied dir path is absolute (i.e. it starts with / ) then it is
13 | // returned as is, if it's a relative path, then it is assumed to be relative
14 | // to the executable, and that is computed and returned.
15 | //
16 | // if create is true, then the directory path will be created if it doesn't
17 | // already exist
18 | //
19 | // if create is false, then it's upto the caller to ensure it exists and/or
20 | // create it as needed [this won't verify that it exists]
21 | func resolveDirectory(dir string, create bool) (string, error) {
22 | 	var resolved string
23 | 	if filepath.IsAbs(dir) {
24 | 		resolved = dir
25 | 	} else {
26 | 		execdir, err := filepath.Abs(filepath.Dir(os.Args[0]))
27 | 		if err != nil {
28 | 			return "", err
29 | 		}
30 | 		resolved = filepath.Join(execdir, dir)
31 | 	}
32 | 	if create {
33 | 		if _, err := os.Stat(resolved); os.IsNotExist(err) {
34 | 			if err := os.MkdirAll(resolved, 0o744); err != nil {
35 | 				return "", err
36 | 			}
37 | 		}
38 | 	}
39 | 	return resolved, nil
40 | }
41 | 


--------------------------------------------------------------------------------
/fuzzy/simple_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright (c) HashiCorp, Inc.
 2 | // SPDX-License-Identifier: MPL-2.0
 3 | 
 4 | package fuzzy
 5 | 
 6 | import (
 7 | 	"testing"
 8 | 	"time"
 9 | )
10 | 
11 | // this runs a 5 node cluster with verifications turned on, but no failures or issues injected.
12 | func TestRaft_NoIssueSanity(t *testing.T) {
13 | 	v := appendEntriesVerifier{}
14 | 	v.Init()
15 | 	cluster := newRaftCluster(t, testLogWriter, "node", 5, &v)
16 | 	s := newApplySource("NoIssueSanity")
17 | 	applyCount := cluster.ApplyN(t, time.Minute, s, 10000)
18 | 	cluster.Stop(t, time.Minute)
19 | 	v.Report(t)
20 | 	cluster.VerifyLog(t, applyCount)
21 | 	cluster.VerifyFSM(t)
22 | }
23 | 


--------------------------------------------------------------------------------
/fuzzy/slowvoter_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package fuzzy
  5 | 
  6 | import (
  7 | 	"math/rand"
  8 | 	"testing"
  9 | 	"time"
 10 | 
 11 | 	"github.com/hashicorp/raft"
 12 | )
 13 | 
 14 | // 5 node cluster where 2 nodes always see a delay in getting a request vote msg.
 15 | func TestRaft_SlowSendVote(t *testing.T) {
 16 | 	hooks := NewSlowVoter("sv_0", "sv_1")
 17 | 	cluster := newRaftCluster(t, testLogWriter, "sv", 5, hooks)
 18 | 	s := newApplySource("SlowSendVote")
 19 | 	ac := cluster.ApplyN(t, time.Minute, s, 10000)
 20 | 	cluster.Stop(t, time.Minute)
 21 | 	hooks.Report(t)
 22 | 	cluster.VerifyLog(t, ac)
 23 | 	cluster.VerifyFSM(t)
 24 | }
 25 | 
 26 | // 5 node cluster where vote results from 3 nodes are slow to turn up.
 27 | // [they see the vote request normally, but their response is slow]
 28 | func TestRaft_SlowRecvVote(t *testing.T) {
 29 | 	hooks := NewSlowVoter("svr_1", "svr_4", "svr_3")
 30 | 	hooks.mode = SlowRecv
 31 | 	cluster := newRaftCluster(t, testLogWriter, "svr", 5, hooks)
 32 | 	s := newApplySource("SlowRecvVote")
 33 | 	ac := cluster.ApplyN(t, time.Minute, s, 10000)
 34 | 	cluster.Stop(t, time.Minute)
 35 | 	hooks.Report(t)
 36 | 	cluster.VerifyLog(t, ac)
 37 | 	cluster.VerifyFSM(t)
 38 | }
 39 | 
 40 | type SlowVoterMode int
 41 | 
 42 | const (
 43 | 	SlowSend SlowVoterMode = iota
 44 | 	SlowRecv
 45 | )
 46 | 
 47 | type SlowVoter struct {
 48 | 	verifier  appendEntriesVerifier
 49 | 	slowNodes map[string]bool
 50 | 	delayMin  time.Duration
 51 | 	delayMax  time.Duration
 52 | 	mode      SlowVoterMode
 53 | }
 54 | 
 55 | func NewSlowVoter(slowNodes ...string) *SlowVoter {
 56 | 	sv := SlowVoter{
 57 | 		slowNodes: make(map[string]bool, len(slowNodes)),
 58 | 		delayMin:  time.Second,
 59 | 		delayMax:  time.Second * 2,
 60 | 		mode:      SlowSend,
 61 | 	}
 62 | 	for _, n := range slowNodes {
 63 | 		sv.slowNodes[n] = true
 64 | 	}
 65 | 	sv.verifier.Init()
 66 | 	return &sv
 67 | }
 68 | 
 69 | func (sv *SlowVoter) Report(t *testing.T) {
 70 | 	sv.verifier.Report(t)
 71 | }
 72 | 
 73 | func (sv *SlowVoter) PreRPC(s, t string, r *raft.RPC) error {
 74 | 	return nil
 75 | }
 76 | 
 77 | func (sv *SlowVoter) nap() {
 78 | 	d := sv.delayMin + time.Duration(rand.Int63n((sv.delayMax - sv.delayMin).Nanoseconds()))
 79 | 	time.Sleep(d)
 80 | }
 81 | 
 82 | func (sv *SlowVoter) PostRPC(src, target string, r *raft.RPC, res *raft.RPCResponse) error {
 83 | 	if sv.mode == SlowRecv && sv.slowNodes[target] {
 84 | 		_, ok := r.Command.(*raft.RequestVoteRequest)
 85 | 		if ok {
 86 | 			sv.nap()
 87 | 		}
 88 | 	}
 89 | 	return nil
 90 | }
 91 | 
 92 | func (sv *SlowVoter) PreRequestVote(src, target string, v *raft.RequestVoteRequest) (*raft.RequestVoteResponse, error) {
 93 | 	if sv.mode == SlowSend && sv.slowNodes[target] {
 94 | 		sv.nap()
 95 | 	}
 96 | 	return nil, nil
 97 | }
 98 | 
 99 | func (sv *SlowVoter) PreAppendEntries(src, target string, v *raft.AppendEntriesRequest) (*raft.AppendEntriesResponse, error) {
100 | 	sv.verifier.PreAppendEntries(src, target, v)
101 | 	return nil, nil
102 | }
103 | 


--------------------------------------------------------------------------------
/fuzzy/verifier.go:
--------------------------------------------------------------------------------
 1 | // Copyright (c) HashiCorp, Inc.
 2 | // SPDX-License-Identifier: MPL-2.0
 3 | 
 4 | package fuzzy
 5 | 
 6 | import (
 7 | 	"fmt"
 8 | 	"sync"
 9 | 	"testing"
10 | 
11 | 	"github.com/hashicorp/raft"
12 | )
13 | 
14 | // AppendEntriesVerifier looks at all the AppendEntry RPC request and verifies that only one node sends AE requests for any given term
15 | // it also verifies that the request only comes from the node indicated as the leader in the AE message.
16 | type appendEntriesVerifier struct {
17 | 	sync.RWMutex
18 | 	leaderForTerm map[uint64]string
19 | 	errors        []string
20 | }
21 | 
22 | func (v *appendEntriesVerifier) Report(t *testing.T) {
23 | 	v.Lock()
24 | 	defer v.Unlock()
25 | 	for _, e := range v.errors {
26 | 		t.Error(e)
27 | 	}
28 | }
29 | 
30 | func (v *appendEntriesVerifier) Init() {
31 | 	v.Lock()
32 | 	defer v.Unlock()
33 | 	v.leaderForTerm = make(map[uint64]string)
34 | 	v.errors = make([]string, 0, 10)
35 | }
36 | 
37 | func (v *appendEntriesVerifier) PreRPC(src, target string, r *raft.RPC) error {
38 | 	return nil
39 | }
40 | 
41 | func (v *appendEntriesVerifier) PostRPC(src, target string, req *raft.RPC, res *raft.RPCResponse) error {
42 | 	return nil
43 | }
44 | 
45 | func (v *appendEntriesVerifier) PreRequestVote(src, target string, rv *raft.RequestVoteRequest) (*raft.RequestVoteResponse, error) {
46 | 	return nil, nil
47 | }
48 | 
49 | func (v *appendEntriesVerifier) PreAppendEntries(src, target string, req *raft.AppendEntriesRequest) (*raft.AppendEntriesResponse, error) {
50 | 	term := req.Term
51 | 	var ldr string
52 | 	if len(req.RPCHeader.Addr) > 0 {
53 | 		ldr = string(req.RPCHeader.Addr)
54 | 	} else {
55 | 		ldr = string(req.Leader)
56 | 	}
57 | 
58 | 	if ldr != src {
59 | 		v.Lock()
60 | 		defer v.Unlock()
61 | 		v.errors = append(v.errors, fmt.Sprintf("Node %v sent an appendEntries request for term %d that said the leader was some other node %v", src, term, ldr))
62 | 	}
63 | 	v.RLock()
64 | 	tl, exists := v.leaderForTerm[term]
65 | 	v.RUnlock()
66 | 	if exists && tl != ldr {
67 | 		v.Lock()
68 | 		defer v.Unlock()
69 | 		v.errors = append(v.errors, fmt.Sprintf("Node %v sent an AppendEntries request for term %d, but node %v had already done some, multiple leaders for same term!", src, term, tl))
70 | 	}
71 | 	if !exists {
72 | 		v.Lock()
73 | 		tl, exists := v.leaderForTerm[term]
74 | 		if exists && tl != ldr {
75 | 			v.errors = append(v.errors, fmt.Sprintf("Node %v sent an AppendEntries request for term %d, but node %v had already done some, multiple leaders for same term!", src, term, tl))
76 | 		}
77 | 		if !exists {
78 | 			v.leaderForTerm[term] = ldr
79 | 		}
80 | 		v.Unlock()
81 | 	}
82 | 	return nil, nil
83 | }
84 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/hashicorp/raft
 2 | 
 3 | go 1.20
 4 | 
 5 | retract v1.1.3 // Deleted original tag; module checksum may not be accurate.
 6 | 
 7 | require (
 8 | 	github.com/hashicorp/go-hclog v1.6.2
 9 | 	github.com/hashicorp/go-metrics v0.5.4
10 | 	github.com/hashicorp/go-msgpack/v2 v2.1.2
11 | 	github.com/stretchr/testify v1.8.4
12 | )
13 | 
14 | require (
15 | 	github.com/armon/go-metrics v0.4.1 // indirect
16 | 	github.com/davecgh/go-spew v1.1.1 // indirect
17 | 	github.com/fatih/color v1.13.0 // indirect
18 | 	github.com/hashicorp/go-immutable-radix v1.0.0 // indirect
19 | 	github.com/hashicorp/golang-lru v0.5.0 // indirect
20 | 	github.com/kr/pretty v0.2.1 // indirect
21 | 	github.com/mattn/go-colorable v0.1.12 // indirect
22 | 	github.com/mattn/go-isatty v0.0.14 // indirect
23 | 	github.com/pmezard/go-difflib v1.0.0 // indirect
24 | 	golang.org/x/sys v0.13.0 // indirect
25 | 	gopkg.in/yaml.v3 v3.0.1 // indirect
26 | )
27 | 


--------------------------------------------------------------------------------
/inmem_snapshot.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package raft
  5 | 
  6 | import (
  7 | 	"bytes"
  8 | 	"fmt"
  9 | 	"io"
 10 | 	"sync"
 11 | )
 12 | 
 13 | // InmemSnapshotStore implements the SnapshotStore interface and
 14 | // retains only the most recent snapshot
 15 | type InmemSnapshotStore struct {
 16 | 	latest      *InmemSnapshotSink
 17 | 	hasSnapshot bool
 18 | 	sync.RWMutex
 19 | }
 20 | 
 21 | // InmemSnapshotSink implements SnapshotSink in memory
 22 | type InmemSnapshotSink struct {
 23 | 	meta     SnapshotMeta
 24 | 	contents *bytes.Buffer
 25 | }
 26 | 
 27 | // NewInmemSnapshotStore creates a blank new InmemSnapshotStore
 28 | func NewInmemSnapshotStore() *InmemSnapshotStore {
 29 | 	return &InmemSnapshotStore{
 30 | 		latest: &InmemSnapshotSink{
 31 | 			contents: &bytes.Buffer{},
 32 | 		},
 33 | 	}
 34 | }
 35 | 
 36 | // Create replaces the stored snapshot with a new one using the given args
 37 | func (m *InmemSnapshotStore) Create(version SnapshotVersion, index, term uint64,
 38 | 	configuration Configuration, configurationIndex uint64, trans Transport) (SnapshotSink, error) {
 39 | 	// We only support version 1 snapshots at this time.
 40 | 	if version != 1 {
 41 | 		return nil, fmt.Errorf("unsupported snapshot version %d", version)
 42 | 	}
 43 | 
 44 | 	name := snapshotName(term, index)
 45 | 
 46 | 	m.Lock()
 47 | 	defer m.Unlock()
 48 | 
 49 | 	sink := &InmemSnapshotSink{
 50 | 		meta: SnapshotMeta{
 51 | 			Version:            version,
 52 | 			ID:                 name,
 53 | 			Index:              index,
 54 | 			Term:               term,
 55 | 			Peers:              encodePeers(configuration, trans),
 56 | 			Configuration:      configuration,
 57 | 			ConfigurationIndex: configurationIndex,
 58 | 		},
 59 | 		contents: &bytes.Buffer{},
 60 | 	}
 61 | 	m.hasSnapshot = true
 62 | 	m.latest = sink
 63 | 
 64 | 	return sink, nil
 65 | }
 66 | 
 67 | // List returns the latest snapshot taken
 68 | func (m *InmemSnapshotStore) List() ([]*SnapshotMeta, error) {
 69 | 	m.RLock()
 70 | 	defer m.RUnlock()
 71 | 
 72 | 	if !m.hasSnapshot {
 73 | 		return []*SnapshotMeta{}, nil
 74 | 	}
 75 | 	return []*SnapshotMeta{&m.latest.meta}, nil
 76 | }
 77 | 
 78 | // Open wraps an io.ReadCloser around the snapshot contents
 79 | func (m *InmemSnapshotStore) Open(id string) (*SnapshotMeta, io.ReadCloser, error) {
 80 | 	m.RLock()
 81 | 	defer m.RUnlock()
 82 | 
 83 | 	if m.latest.meta.ID != id {
 84 | 		return nil, nil, fmt.Errorf("[ERR] snapshot: failed to open snapshot id: %s", id)
 85 | 	}
 86 | 
 87 | 	// Make a copy of the contents, since a bytes.Buffer can only be read
 88 | 	// once.
 89 | 	contents := bytes.NewBuffer(m.latest.contents.Bytes())
 90 | 	return &m.latest.meta, io.NopCloser(contents), nil
 91 | }
 92 | 
 93 | // Write appends the given bytes to the snapshot contents
 94 | func (s *InmemSnapshotSink) Write(p []byte) (n int, err error) {
 95 | 	written, err := s.contents.Write(p)
 96 | 	s.meta.Size += int64(written)
 97 | 	return written, err
 98 | }
 99 | 
100 | // Close updates the Size and is otherwise a no-op
101 | func (s *InmemSnapshotSink) Close() error {
102 | 	return nil
103 | }
104 | 
105 | // ID returns the ID of the SnapshotMeta
106 | func (s *InmemSnapshotSink) ID() string {
107 | 	return s.meta.ID
108 | }
109 | 
110 | // Cancel returns successfully with a nil error
111 | func (s *InmemSnapshotSink) Cancel() error {
112 | 	return nil
113 | }
114 | 


--------------------------------------------------------------------------------
/inmem_snapshot_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package raft
  5 | 
  6 | import (
  7 | 	"bytes"
  8 | 	"io"
  9 | 	"reflect"
 10 | 	"testing"
 11 | )
 12 | 
 13 | func TestInmemSnapshotStoreImpl(t *testing.T) {
 14 | 	var impl interface{} = &InmemSnapshotStore{}
 15 | 	if _, ok := impl.(SnapshotStore); !ok {
 16 | 		t.Fatalf("InmemSnapshotStore not a SnapshotStore")
 17 | 	}
 18 | }
 19 | 
 20 | func TestInmemSnapshotSinkImpl(t *testing.T) {
 21 | 	var impl interface{} = &InmemSnapshotSink{}
 22 | 	if _, ok := impl.(SnapshotSink); !ok {
 23 | 		t.Fatalf("InmemSnapshotSink not a SnapshotSink")
 24 | 	}
 25 | }
 26 | 
 27 | func TestInmemSS_CreateSnapshot(t *testing.T) {
 28 | 	snap := NewInmemSnapshotStore()
 29 | 
 30 | 	// Check no snapshots
 31 | 	snaps, err := snap.List()
 32 | 	if err != nil {
 33 | 		t.Fatalf("err: %v", err)
 34 | 	}
 35 | 	if len(snaps) != 0 {
 36 | 		t.Fatalf("did not expect any snapshots: %v", snaps)
 37 | 	}
 38 | 
 39 | 	// Create a new sink
 40 | 	var configuration Configuration
 41 | 	configuration.Servers = append(configuration.Servers, Server{
 42 | 		Suffrage: Voter,
 43 | 		ID:       ServerID("my id"),
 44 | 		Address:  ServerAddress("over here"),
 45 | 	})
 46 | 	_, trans := NewInmemTransport(NewInmemAddr())
 47 | 	sink, err := snap.Create(SnapshotVersionMax, 10, 3, configuration, 2, trans)
 48 | 	if err != nil {
 49 | 		t.Fatalf("err: %v", err)
 50 | 	}
 51 | 
 52 | 	// The sink is not done, should not be in a list!
 53 | 	snaps, err = snap.List()
 54 | 	if err != nil {
 55 | 		t.Fatalf("err: %v", err)
 56 | 	}
 57 | 	if len(snaps) != 1 {
 58 | 		t.Fatalf("should always be 1 snapshot: %v", snaps)
 59 | 	}
 60 | 
 61 | 	// Write to the sink
 62 | 	_, err = sink.Write([]byte("first\n"))
 63 | 	if err != nil {
 64 | 		t.Fatalf("err: %v", err)
 65 | 	}
 66 | 	_, err = sink.Write([]byte("second\n"))
 67 | 	if err != nil {
 68 | 		t.Fatalf("err: %v", err)
 69 | 	}
 70 | 
 71 | 	// Done!
 72 | 	err = sink.Close()
 73 | 	if err != nil {
 74 | 		t.Fatalf("err: %v", err)
 75 | 	}
 76 | 
 77 | 	// Should have a snapshot!
 78 | 	snaps, err = snap.List()
 79 | 	if err != nil {
 80 | 		t.Fatalf("err: %v", err)
 81 | 	}
 82 | 	if len(snaps) != 1 {
 83 | 		t.Fatalf("expect a snapshots: %v", snaps)
 84 | 	}
 85 | 
 86 | 	// Check the latest
 87 | 	latest := snaps[0]
 88 | 	if latest.Index != 10 {
 89 | 		t.Fatalf("bad snapshot: %v", *latest)
 90 | 	}
 91 | 	if latest.Term != 3 {
 92 | 		t.Fatalf("bad snapshot: %v", *latest)
 93 | 	}
 94 | 	if !reflect.DeepEqual(latest.Configuration, configuration) {
 95 | 		t.Fatalf("bad snapshot: %v", *latest)
 96 | 	}
 97 | 	if latest.ConfigurationIndex != 2 {
 98 | 		t.Fatalf("bad snapshot: %v", *latest)
 99 | 	}
100 | 	if latest.Size != 13 {
101 | 		t.Fatalf("bad snapshot: %v", *latest)
102 | 	}
103 | 
104 | 	// Read the snapshot
105 | 	_, r, err := snap.Open(latest.ID)
106 | 	if err != nil {
107 | 		t.Fatalf("err: %v", err)
108 | 	}
109 | 
110 | 	// Read out everything
111 | 	var buf bytes.Buffer
112 | 	if _, err := io.Copy(&buf, r); err != nil {
113 | 		t.Fatalf("err: %v", err)
114 | 	}
115 | 	if err := r.Close(); err != nil {
116 | 		t.Fatalf("err: %v", err)
117 | 	}
118 | 
119 | 	// Ensure a match
120 | 	if bytes.Compare(buf.Bytes(), []byte("first\nsecond\n")) != 0 {
121 | 		t.Fatalf("content mismatch")
122 | 	}
123 | }
124 | 
125 | func TestInmemSS_OpenSnapshotTwice(t *testing.T) {
126 | 	snap := NewInmemSnapshotStore()
127 | 
128 | 	// Create a new sink
129 | 	var configuration Configuration
130 | 	configuration.Servers = append(configuration.Servers, Server{
131 | 		Suffrage: Voter,
132 | 		ID:       ServerID("my id"),
133 | 		Address:  ServerAddress("over here"),
134 | 	})
135 | 	_, trans := NewInmemTransport(NewInmemAddr())
136 | 	sink, err := snap.Create(SnapshotVersionMax, 10, 3, configuration, 2, trans)
137 | 	if err != nil {
138 | 		t.Fatalf("err: %v", err)
139 | 	}
140 | 
141 | 	// Write to the sink
142 | 	_, err = sink.Write([]byte("data\n"))
143 | 	if err != nil {
144 | 		t.Fatalf("err: %v", err)
145 | 	}
146 | 	err = sink.Close()
147 | 	if err != nil {
148 | 		t.Fatalf("err: %v", err)
149 | 	}
150 | 
151 | 	// Read the snapshot a first time
152 | 	_, r, err := snap.Open(sink.ID())
153 | 	if err != nil {
154 | 		t.Fatalf("err: %v", err)
155 | 	}
156 | 
157 | 	// Read out everything
158 | 	var buf1 bytes.Buffer
159 | 	if _, err = io.Copy(&buf1, r); err != nil {
160 | 		t.Fatalf("err: %v", err)
161 | 	}
162 | 	if err = r.Close(); err != nil {
163 | 		t.Fatalf("err: %v", err)
164 | 	}
165 | 
166 | 	// Ensure a match
167 | 	if bytes.Compare(buf1.Bytes(), []byte("data\n")) != 0 {
168 | 		t.Fatalf("content mismatch")
169 | 	}
170 | 
171 | 	// Read the snapshot a second time.
172 | 	_, r, err = snap.Open(sink.ID())
173 | 	if err != nil {
174 | 		t.Fatalf("err: %v", err)
175 | 	}
176 | 
177 | 	// Read out everything again
178 | 	var buf2 bytes.Buffer
179 | 	if _, err := io.Copy(&buf2, r); err != nil {
180 | 		t.Fatalf("err: %v", err)
181 | 	}
182 | 	if err := r.Close(); err != nil {
183 | 		t.Fatalf("err: %v", err)
184 | 	}
185 | 
186 | 	// Ensure it's still the same content
187 | 	if bytes.Compare(buf2.Bytes(), []byte("data\n")) != 0 {
188 | 		t.Fatalf("content mismatch")
189 | 	}
190 | }
191 | 


--------------------------------------------------------------------------------
/inmem_store.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package raft
  5 | 
  6 | import (
  7 | 	"errors"
  8 | 	"sync"
  9 | )
 10 | 
 11 | // InmemStore implements the LogStore and StableStore interface.
 12 | // It should NOT EVER be used for production. It is used only for
 13 | // unit tests. Use the MDBStore implementation instead.
 14 | type InmemStore struct {
 15 | 	l         sync.RWMutex
 16 | 	lowIndex  uint64
 17 | 	highIndex uint64
 18 | 	logs      map[uint64]*Log
 19 | 	kv        map[string][]byte
 20 | 	kvInt     map[string]uint64
 21 | }
 22 | 
 23 | // NewInmemStore returns a new in-memory backend. Do not ever
 24 | // use for production. Only for testing.
 25 | func NewInmemStore() *InmemStore {
 26 | 	i := &InmemStore{
 27 | 		logs:  make(map[uint64]*Log),
 28 | 		kv:    make(map[string][]byte),
 29 | 		kvInt: make(map[string]uint64),
 30 | 	}
 31 | 	return i
 32 | }
 33 | 
 34 | // FirstIndex implements the LogStore interface.
 35 | func (i *InmemStore) FirstIndex() (uint64, error) {
 36 | 	i.l.RLock()
 37 | 	defer i.l.RUnlock()
 38 | 	return i.lowIndex, nil
 39 | }
 40 | 
 41 | // LastIndex implements the LogStore interface.
 42 | func (i *InmemStore) LastIndex() (uint64, error) {
 43 | 	i.l.RLock()
 44 | 	defer i.l.RUnlock()
 45 | 	return i.highIndex, nil
 46 | }
 47 | 
 48 | // GetLog implements the LogStore interface.
 49 | func (i *InmemStore) GetLog(index uint64, log *Log) error {
 50 | 	i.l.RLock()
 51 | 	defer i.l.RUnlock()
 52 | 	l, ok := i.logs[index]
 53 | 	if !ok {
 54 | 		return ErrLogNotFound
 55 | 	}
 56 | 	*log = *l
 57 | 	return nil
 58 | }
 59 | 
 60 | // StoreLog implements the LogStore interface.
 61 | func (i *InmemStore) StoreLog(log *Log) error {
 62 | 	return i.StoreLogs([]*Log{log})
 63 | }
 64 | 
 65 | // StoreLogs implements the LogStore interface.
 66 | func (i *InmemStore) StoreLogs(logs []*Log) error {
 67 | 	i.l.Lock()
 68 | 	defer i.l.Unlock()
 69 | 	for _, l := range logs {
 70 | 		i.logs[l.Index] = l
 71 | 		if i.lowIndex == 0 {
 72 | 			i.lowIndex = l.Index
 73 | 		}
 74 | 		if l.Index > i.highIndex {
 75 | 			i.highIndex = l.Index
 76 | 		}
 77 | 	}
 78 | 	return nil
 79 | }
 80 | 
 81 | // DeleteRange implements the LogStore interface.
 82 | func (i *InmemStore) DeleteRange(min, max uint64) error {
 83 | 	i.l.Lock()
 84 | 	defer i.l.Unlock()
 85 | 	for j := min; j <= max; j++ {
 86 | 		delete(i.logs, j)
 87 | 	}
 88 | 	if min <= i.lowIndex {
 89 | 		i.lowIndex = max + 1
 90 | 	}
 91 | 	if max >= i.highIndex {
 92 | 		i.highIndex = min - 1
 93 | 	}
 94 | 	if i.lowIndex > i.highIndex {
 95 | 		i.lowIndex = 0
 96 | 		i.highIndex = 0
 97 | 	}
 98 | 	return nil
 99 | }
100 | 
101 | // Set implements the StableStore interface.
102 | func (i *InmemStore) Set(key []byte, val []byte) error {
103 | 	i.l.Lock()
104 | 	defer i.l.Unlock()
105 | 	i.kv[string(key)] = val
106 | 	return nil
107 | }
108 | 
109 | // Get implements the StableStore interface.
110 | func (i *InmemStore) Get(key []byte) ([]byte, error) {
111 | 	i.l.RLock()
112 | 	defer i.l.RUnlock()
113 | 	val := i.kv[string(key)]
114 | 	if val == nil {
115 | 		return nil, errors.New("not found")
116 | 	}
117 | 	return val, nil
118 | }
119 | 
120 | // SetUint64 implements the StableStore interface.
121 | func (i *InmemStore) SetUint64(key []byte, val uint64) error {
122 | 	i.l.Lock()
123 | 	defer i.l.Unlock()
124 | 	i.kvInt[string(key)] = val
125 | 	return nil
126 | }
127 | 
128 | // GetUint64 implements the StableStore interface.
129 | func (i *InmemStore) GetUint64(key []byte) (uint64, error) {
130 | 	i.l.RLock()
131 | 	defer i.l.RUnlock()
132 | 	return i.kvInt[string(key)], nil
133 | }
134 | 


--------------------------------------------------------------------------------
/inmem_transport_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright (c) HashiCorp, Inc.
 2 | // SPDX-License-Identifier: MPL-2.0
 3 | 
 4 | package raft
 5 | 
 6 | import (
 7 | 	"testing"
 8 | 	"time"
 9 | 
10 | 	"github.com/stretchr/testify/require"
11 | )
12 | 
13 | func TestInmemTransportImpl(t *testing.T) {
14 | 	var inm interface{} = &InmemTransport{}
15 | 	if _, ok := inm.(Transport); !ok {
16 | 		t.Fatalf("InmemTransport is not a Transport")
17 | 	}
18 | 	if _, ok := inm.(LoopbackTransport); !ok {
19 | 		t.Fatalf("InmemTransport is not a Loopback Transport")
20 | 	}
21 | 	if _, ok := inm.(WithPeers); !ok {
22 | 		t.Fatalf("InmemTransport is not a WithPeers Transport")
23 | 	}
24 | }
25 | 
26 | func TestInmemTransportWriteTimeout(t *testing.T) {
27 | 	// InmemTransport should timeout if the other end has gone away
28 | 	// when it tries to send a request.
29 | 	// Use unbuffered channels so that we can see the write failing
30 | 	// without having to contrive to fill up the buffer first.
31 | 	timeout := 10 * time.Millisecond
32 | 	t1 := &InmemTransport{
33 | 		consumerCh: make(chan RPC),
34 | 		localAddr:  NewInmemAddr(),
35 | 		peers:      make(map[ServerAddress]*InmemTransport),
36 | 		timeout:    timeout,
37 | 	}
38 | 	t2 := &InmemTransport{
39 | 		consumerCh: make(chan RPC),
40 | 		localAddr:  NewInmemAddr(),
41 | 		peers:      make(map[ServerAddress]*InmemTransport),
42 | 		timeout:    timeout,
43 | 	}
44 | 	a2 := t2.LocalAddr()
45 | 	t1.Connect(a2, t2)
46 | 
47 | 	stop := make(chan struct{})
48 | 	stopped := make(chan struct{})
49 | 	go func() {
50 | 		defer close(stopped)
51 | 		var i uint64
52 | 		for {
53 | 			select {
54 | 			case <-stop:
55 | 				return
56 | 			case rpc := <-t2.Consumer():
57 | 				i++
58 | 				rpc.Respond(&AppendEntriesResponse{
59 | 					Success: true,
60 | 					LastLog: i,
61 | 				}, nil)
62 | 			}
63 | 		}
64 | 	}()
65 | 
66 | 	var resp AppendEntriesResponse
67 | 	// Sanity check that sending is working before stopping the
68 | 	// responder.
69 | 	err := t1.AppendEntries("server1", a2, &AppendEntriesRequest{}, &resp)
70 | 	NoErr(err, t)
71 | 	require.True(t, resp.LastLog == 1)
72 | 
73 | 	close(stop)
74 | 	select {
75 | 	case <-stopped:
76 | 	case <-time.After(time.Second):
77 | 		t.Fatalf("timed out waiting for responder to stop")
78 | 	}
79 | 
80 | 	err = t1.AppendEntries("server1", a2, &AppendEntriesRequest{}, &resp)
81 | 	if err == nil {
82 | 		t.Fatalf("expected AppendEntries to time out")
83 | 	}
84 | 	if err.Error() != "send timed out" {
85 | 		t.Fatalf("unexpected error: %v", err)
86 | 	}
87 | }
88 | 


--------------------------------------------------------------------------------
/log.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package raft
  5 | 
  6 | import (
  7 | 	"fmt"
  8 | 	"time"
  9 | 
 10 | 	metrics "github.com/hashicorp/go-metrics/compat"
 11 | )
 12 | 
 13 | // LogType describes various types of log entries.
 14 | type LogType uint8
 15 | 
 16 | const (
 17 | 	// LogCommand is applied to a user FSM.
 18 | 	LogCommand LogType = iota
 19 | 
 20 | 	// LogNoop is used to assert leadership.
 21 | 	LogNoop
 22 | 
 23 | 	// LogAddPeerDeprecated is used to add a new peer. This should only be used with
 24 | 	// older protocol versions designed to be compatible with unversioned
 25 | 	// Raft servers. See comments in config.go for details.
 26 | 	LogAddPeerDeprecated
 27 | 
 28 | 	// LogRemovePeerDeprecated is used to remove an existing peer. This should only be
 29 | 	// used with older protocol versions designed to be compatible with
 30 | 	// unversioned Raft servers. See comments in config.go for details.
 31 | 	LogRemovePeerDeprecated
 32 | 
 33 | 	// LogBarrier is used to ensure all preceding operations have been
 34 | 	// applied to the FSM. It is similar to LogNoop, but instead of returning
 35 | 	// once committed, it only returns once the FSM manager acks it. Otherwise,
 36 | 	// it is possible there are operations committed but not yet applied to
 37 | 	// the FSM.
 38 | 	LogBarrier
 39 | 
 40 | 	// LogConfiguration establishes a membership change configuration. It is
 41 | 	// created when a server is added, removed, promoted, etc. Only used
 42 | 	// when protocol version 1 or greater is in use.
 43 | 	LogConfiguration
 44 | )
 45 | 
 46 | // String returns LogType as a human readable string.
 47 | func (lt LogType) String() string {
 48 | 	switch lt {
 49 | 	case LogCommand:
 50 | 		return "LogCommand"
 51 | 	case LogNoop:
 52 | 		return "LogNoop"
 53 | 	case LogAddPeerDeprecated:
 54 | 		return "LogAddPeerDeprecated"
 55 | 	case LogRemovePeerDeprecated:
 56 | 		return "LogRemovePeerDeprecated"
 57 | 	case LogBarrier:
 58 | 		return "LogBarrier"
 59 | 	case LogConfiguration:
 60 | 		return "LogConfiguration"
 61 | 	default:
 62 | 		return fmt.Sprintf("%d", lt)
 63 | 	}
 64 | }
 65 | 
 66 | // Log entries are replicated to all members of the Raft cluster
 67 | // and form the heart of the replicated state machine.
 68 | type Log struct {
 69 | 	// Index holds the index of the log entry.
 70 | 	Index uint64
 71 | 
 72 | 	// Term holds the election term of the log entry.
 73 | 	Term uint64
 74 | 
 75 | 	// Type holds the type of the log entry.
 76 | 	Type LogType
 77 | 
 78 | 	// Data holds the log entry's type-specific data.
 79 | 	Data []byte
 80 | 
 81 | 	// Extensions holds an opaque byte slice of information for middleware. It
 82 | 	// is up to the client of the library to properly modify this as it adds
 83 | 	// layers and remove those layers when appropriate. This value is a part of
 84 | 	// the log, so very large values could cause timing issues.
 85 | 	//
 86 | 	// N.B. It is _up to the client_ to handle upgrade paths. For instance if
 87 | 	// using this with go-raftchunking, the client should ensure that all Raft
 88 | 	// peers are using a version that can handle that extension before ever
 89 | 	// actually triggering chunking behavior. It is sometimes sufficient to
 90 | 	// ensure that non-leaders are upgraded first, then the current leader is
 91 | 	// upgraded, but a leader changeover during this process could lead to
 92 | 	// trouble, so gating extension behavior via some flag in the client
 93 | 	// program is also a good idea.
 94 | 	Extensions []byte
 95 | 
 96 | 	// AppendedAt stores the time the leader first appended this log to it's
 97 | 	// LogStore. Followers will observe the leader's time. It is not used for
 98 | 	// coordination or as part of the replication protocol at all. It exists only
 99 | 	// to provide operational information for example how many seconds worth of
100 | 	// logs are present on the leader which might impact follower's ability to
101 | 	// catch up after restoring a large snapshot. We should never rely on this
102 | 	// being in the past when appending on a follower or reading a log back since
103 | 	// the clock skew can mean a follower could see a log with a future timestamp.
104 | 	// In general too the leader is not required to persist the log before
105 | 	// delivering to followers although the current implementation happens to do
106 | 	// this.
107 | 	AppendedAt time.Time
108 | }
109 | 
110 | // LogStore is used to provide an interface for storing
111 | // and retrieving logs in a durable fashion.
112 | type LogStore interface {
113 | 	// FirstIndex returns the first index written. 0 for no entries.
114 | 	FirstIndex() (uint64, error)
115 | 
116 | 	// LastIndex returns the last index written. 0 for no entries.
117 | 	LastIndex() (uint64, error)
118 | 
119 | 	// GetLog gets a log entry at a given index.
120 | 	GetLog(index uint64, log *Log) error
121 | 
122 | 	// StoreLog stores a log entry.
123 | 	StoreLog(log *Log) error
124 | 
125 | 	// StoreLogs stores multiple log entries. By default the logs stored may not be contiguous with previous logs (i.e. may have a gap in Index since the last log written). If an implementation can't tolerate this it may optionally implement `MonotonicLogStore` to indicate that this is not allowed. This changes Raft's behaviour after restoring a user snapshot to remove all previous logs instead of relying on a "gap" to signal the discontinuity between logs before the snapshot and logs after.
126 | 	StoreLogs(logs []*Log) error
127 | 
128 | 	// DeleteRange deletes a range of log entries. The range is inclusive.
129 | 	DeleteRange(min, max uint64) error
130 | }
131 | 
132 | // MonotonicLogStore is an optional interface for LogStore implementations that
133 | // cannot tolerate gaps in between the Index values of consecutive log entries. For example,
134 | // this may allow more efficient indexing because the Index values are densely populated. If true is
135 | // returned, Raft will avoid relying on gaps to trigger re-synching logs on followers after a
136 | // snapshot is restored. The LogStore must have an efficient implementation of
137 | // DeleteLogs for the case where all logs are removed, as this must be called after snapshot restore when gaps are not allowed.
138 | // We avoid deleting all records for LogStores that do not implement MonotonicLogStore
139 | // because although it's always correct to do so, it has a major negative performance impact on the BoltDB store that is currently
140 | // the most widely used.
141 | type MonotonicLogStore interface {
142 | 	IsMonotonic() bool
143 | }
144 | 
145 | func oldestLog(s LogStore) (Log, error) {
146 | 	var l Log
147 | 
148 | 	// We might get unlucky and have a truncate right between getting first log
149 | 	// index and fetching it so keep trying until we succeed or hard fail.
150 | 	var lastFailIdx uint64
151 | 	var lastErr error
152 | 	for {
153 | 		firstIdx, err := s.FirstIndex()
154 | 		if err != nil {
155 | 			return l, err
156 | 		}
157 | 		if firstIdx == 0 {
158 | 			return l, ErrLogNotFound
159 | 		}
160 | 		if firstIdx == lastFailIdx {
161 | 			// Got same index as last time around which errored, don't bother trying
162 | 			// to fetch it again just return the error.
163 | 			return l, lastErr
164 | 		}
165 | 		err = s.GetLog(firstIdx, &l)
166 | 		if err == nil {
167 | 			// We found the oldest log, break the loop
168 | 			break
169 | 		}
170 | 		// We failed, keep trying to see if there is a new firstIndex
171 | 		lastFailIdx = firstIdx
172 | 		lastErr = err
173 | 	}
174 | 	return l, nil
175 | }
176 | 
177 | func emitLogStoreMetrics(s LogStore, prefix []string, interval time.Duration, stopCh <-chan struct{}) {
178 | 	for {
179 | 		select {
180 | 		case <-time.After(interval):
181 | 			// In error case emit 0 as the age
182 | 			ageMs := float32(0.0)
183 | 			l, err := oldestLog(s)
184 | 			if err == nil && !l.AppendedAt.IsZero() {
185 | 				ageMs = float32(time.Since(l.AppendedAt).Milliseconds())
186 | 			}
187 | 			metrics.SetGauge(append(prefix, "oldestLogAge"), ageMs)
188 | 		case <-stopCh:
189 | 			return
190 | 		}
191 | 	}
192 | }
193 | 


--------------------------------------------------------------------------------
/log_cache.go:
--------------------------------------------------------------------------------
 1 | // Copyright (c) HashiCorp, Inc.
 2 | // SPDX-License-Identifier: MPL-2.0
 3 | 
 4 | package raft
 5 | 
 6 | import (
 7 | 	"fmt"
 8 | 	"sync"
 9 | )
10 | 
11 | // LogCache wraps any LogStore implementation to provide an
12 | // in-memory ring buffer. This is used to cache access to
13 | // the recently written entries. For implementations that do not
14 | // cache themselves, this can provide a substantial boost by
15 | // avoiding disk I/O on recent entries.
16 | type LogCache struct {
17 | 	store LogStore
18 | 
19 | 	cache []*Log
20 | 	l     sync.RWMutex
21 | }
22 | 
23 | // NewLogCache is used to create a new LogCache with the
24 | // given capacity and backend store.
25 | func NewLogCache(capacity int, store LogStore) (*LogCache, error) {
26 | 	if capacity <= 0 {
27 | 		return nil, fmt.Errorf("capacity must be positive")
28 | 	}
29 | 	c := &LogCache{
30 | 		store: store,
31 | 		cache: make([]*Log, capacity),
32 | 	}
33 | 	return c, nil
34 | }
35 | 
36 | // IsMonotonic implements the MonotonicLogStore interface. This is a shim to
37 | // expose the underlying store as monotonically indexed or not.
38 | func (c *LogCache) IsMonotonic() bool {
39 | 	if store, ok := c.store.(MonotonicLogStore); ok {
40 | 		return store.IsMonotonic()
41 | 	}
42 | 
43 | 	return false
44 | }
45 | 
46 | func (c *LogCache) GetLog(idx uint64, log *Log) error {
47 | 	// Check the buffer for an entry
48 | 	c.l.RLock()
49 | 	cached := c.cache[idx%uint64(len(c.cache))]
50 | 	c.l.RUnlock()
51 | 
52 | 	// Check if entry is valid
53 | 	if cached != nil && cached.Index == idx {
54 | 		*log = *cached
55 | 		return nil
56 | 	}
57 | 
58 | 	// Forward request on cache miss
59 | 	return c.store.GetLog(idx, log)
60 | }
61 | 
62 | func (c *LogCache) StoreLog(log *Log) error {
63 | 	return c.StoreLogs([]*Log{log})
64 | }
65 | 
66 | func (c *LogCache) StoreLogs(logs []*Log) error {
67 | 	err := c.store.StoreLogs(logs)
68 | 	// Insert the logs into the ring buffer, but only on success
69 | 	if err != nil {
70 | 		return fmt.Errorf("unable to store logs within log store, err: %q", err)
71 | 	}
72 | 	c.l.Lock()
73 | 	for _, l := range logs {
74 | 		c.cache[l.Index%uint64(len(c.cache))] = l
75 | 	}
76 | 	c.l.Unlock()
77 | 	return nil
78 | }
79 | 
80 | func (c *LogCache) FirstIndex() (uint64, error) {
81 | 	return c.store.FirstIndex()
82 | }
83 | 
84 | func (c *LogCache) LastIndex() (uint64, error) {
85 | 	return c.store.LastIndex()
86 | }
87 | 
88 | func (c *LogCache) DeleteRange(min, max uint64) error {
89 | 	// Invalidate the cache on deletes
90 | 	c.l.Lock()
91 | 	c.cache = make([]*Log, len(c.cache))
92 | 	c.l.Unlock()
93 | 
94 | 	return c.store.DeleteRange(min, max)
95 | }
96 | 


--------------------------------------------------------------------------------
/log_cache_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package raft
  5 | 
  6 | import (
  7 | 	"errors"
  8 | 	"strings"
  9 | 	"sync"
 10 | 	"testing"
 11 | )
 12 | 
 13 | func TestLogCache(t *testing.T) {
 14 | 	store := NewInmemStore()
 15 | 	c, _ := NewLogCache(16, store)
 16 | 
 17 | 	// Insert into the in-mem store
 18 | 	for i := 0; i < 32; i++ {
 19 | 		log := &Log{Index: uint64(i) + 1}
 20 | 		store.StoreLog(log)
 21 | 	}
 22 | 
 23 | 	// Check the indexes
 24 | 	if idx, _ := c.FirstIndex(); idx != 1 {
 25 | 		t.Fatalf("bad: %d", idx)
 26 | 	}
 27 | 	if idx, _ := c.LastIndex(); idx != 32 {
 28 | 		t.Fatalf("bad: %d", idx)
 29 | 	}
 30 | 
 31 | 	// Try get log with a miss
 32 | 	var out Log
 33 | 	err := c.GetLog(1, &out)
 34 | 	if err != nil {
 35 | 		t.Fatalf("err: %v", err)
 36 | 	}
 37 | 	if out.Index != 1 {
 38 | 		t.Fatalf("bad: %#v", out)
 39 | 	}
 40 | 
 41 | 	// Store logs
 42 | 	l1 := &Log{Index: 33}
 43 | 	l2 := &Log{Index: 34}
 44 | 	err = c.StoreLogs([]*Log{l1, l2})
 45 | 	if err != nil {
 46 | 		t.Fatalf("err: %v", err)
 47 | 	}
 48 | 
 49 | 	if idx, _ := c.LastIndex(); idx != 34 {
 50 | 		t.Fatalf("bad: %d", idx)
 51 | 	}
 52 | 
 53 | 	// Check that it wrote-through
 54 | 	err = store.GetLog(33, &out)
 55 | 	if err != nil {
 56 | 		t.Fatalf("err: %v", err)
 57 | 	}
 58 | 	err = store.GetLog(34, &out)
 59 | 	if err != nil {
 60 | 		t.Fatalf("err: %v", err)
 61 | 	}
 62 | 
 63 | 	// Delete in the backend
 64 | 	err = store.DeleteRange(33, 34)
 65 | 	if err != nil {
 66 | 		t.Fatalf("err: %v", err)
 67 | 	}
 68 | 
 69 | 	// Should be in the ring buffer
 70 | 	err = c.GetLog(33, &out)
 71 | 	if err != nil {
 72 | 		t.Fatalf("err: %v", err)
 73 | 	}
 74 | 	err = c.GetLog(34, &out)
 75 | 	if err != nil {
 76 | 		t.Fatalf("err: %v", err)
 77 | 	}
 78 | 
 79 | 	// Purge the ring buffer
 80 | 	err = c.DeleteRange(33, 34)
 81 | 	if err != nil {
 82 | 		t.Fatalf("err: %v", err)
 83 | 	}
 84 | 
 85 | 	// Should not be in the ring buffer
 86 | 	err = c.GetLog(33, &out)
 87 | 	if err != ErrLogNotFound {
 88 | 		t.Fatalf("err: %v", err)
 89 | 	}
 90 | 	err = c.GetLog(34, &out)
 91 | 	if err != ErrLogNotFound {
 92 | 		t.Fatalf("err: %v", err)
 93 | 	}
 94 | }
 95 | 
 96 | type errorStore struct {
 97 | 	LogStore
 98 | 	mu      sync.Mutex
 99 | 	fail    bool
100 | 	failed  int
101 | 	failMax int
102 | }
103 | 
104 | func (e *errorStore) StoreLogs(logs []*Log) error {
105 | 	e.mu.Lock()
106 | 	defer e.mu.Unlock()
107 | 	if e.fail {
108 | 		e.failed++
109 | 		if e.failed <= e.failMax {
110 | 			return errors.New("some error")
111 | 		}
112 | 		e.fail = false
113 | 	}
114 | 	return e.LogStore.StoreLogs(logs)
115 | }
116 | 
117 | func (e *errorStore) failNext(count int) {
118 | 	e.mu.Lock()
119 | 	e.fail = true
120 | 	e.failMax = count
121 | 	e.mu.Unlock()
122 | }
123 | 
124 | func TestLogCacheWithBackendStoreError(t *testing.T) {
125 | 	var err error
126 | 	store := NewInmemStore()
127 | 	errStore := &errorStore{LogStore: store}
128 | 	c, _ := NewLogCache(16, errStore)
129 | 
130 | 	for i := 0; i < 4; i++ {
131 | 		log := &Log{Index: uint64(i) + 1}
132 | 		store.StoreLog(log)
133 | 	}
134 | 	errStore.failNext(1)
135 | 	log := &Log{Index: 5}
136 | 	err = c.StoreLog(log)
137 | 	if !strings.Contains(err.Error(), "some error") {
138 | 		t.Fatalf("wanted: some error,  got err=%v", err)
139 | 	}
140 | 
141 | 	var out Log
142 | 	for i := 1; i < 5; i++ {
143 | 		if err := c.GetLog(uint64(i), &out); err != nil {
144 | 			t.Fatalf("err: %v", err)
145 | 		}
146 | 	}
147 | 	out = Log{}
148 | 	if err = c.GetLog(5, &out); err != ErrLogNotFound {
149 | 		t.Fatalf("Should have returned not found, got err=%v out=%+v", err, out)
150 | 	}
151 | }
152 | 


--------------------------------------------------------------------------------
/log_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package raft
  5 | 
  6 | import (
  7 | 	"bytes"
  8 | 	"fmt"
  9 | 	"testing"
 10 | 	"time"
 11 | 
 12 | 	metrics "github.com/hashicorp/go-metrics/compat"
 13 | )
 14 | 
 15 | func TestOldestLog(t *testing.T) {
 16 | 	cases := []struct {
 17 | 		Name    string
 18 | 		Logs    []*Log
 19 | 		WantIdx uint64
 20 | 		WantErr bool
 21 | 	}{
 22 | 		{
 23 | 			Name:    "empty logs",
 24 | 			Logs:    nil,
 25 | 			WantIdx: 0,
 26 | 			WantErr: true,
 27 | 		},
 28 | 		{
 29 | 			Name: "simple case",
 30 | 			Logs: []*Log{
 31 | 				{
 32 | 					Index: 1234,
 33 | 					Term:  1,
 34 | 				},
 35 | 				{
 36 | 					Index: 1235,
 37 | 					Term:  1,
 38 | 				},
 39 | 				{
 40 | 					Index: 1236,
 41 | 					Term:  2,
 42 | 				},
 43 | 			},
 44 | 			WantIdx: 1234,
 45 | 			WantErr: false,
 46 | 		},
 47 | 	}
 48 | 
 49 | 	for _, tc := range cases {
 50 | 		tc := tc
 51 | 		t.Run(tc.Name, func(t *testing.T) {
 52 | 			s := NewInmemStore()
 53 | 			if err := s.StoreLogs(tc.Logs); err != nil {
 54 | 				t.Fatalf("expected store logs not to fail: %s", err)
 55 | 			}
 56 | 
 57 | 			got, err := oldestLog(s)
 58 | 			switch {
 59 | 			case tc.WantErr && err == nil:
 60 | 				t.Fatalf("wanted error got nil")
 61 | 			case !tc.WantErr && err != nil:
 62 | 				t.Fatalf("wanted no error got: %s", err)
 63 | 			}
 64 | 
 65 | 			if got.Index != tc.WantIdx {
 66 | 				t.Fatalf("got index %v, want %v", got.Index, tc.WantIdx)
 67 | 			}
 68 | 		})
 69 | 	}
 70 | }
 71 | 
 72 | func TestEmitsLogStoreMetrics(t *testing.T) {
 73 | 	sink := testSetupMetrics(t)
 74 | 
 75 | 	start := time.Now()
 76 | 
 77 | 	s := NewInmemStore()
 78 | 	logs := []*Log{
 79 | 		{
 80 | 			Index:      1234,
 81 | 			Term:       1,
 82 | 			AppendedAt: time.Now(),
 83 | 		},
 84 | 		{
 85 | 			Index: 1235,
 86 | 			Term:  1,
 87 | 		},
 88 | 		{
 89 | 			Index: 1236,
 90 | 			Term:  2,
 91 | 		},
 92 | 	}
 93 | 	if err := s.StoreLogs(logs); err != nil {
 94 | 		t.Fatalf("expected store logs not to fail: %s", err)
 95 | 	}
 96 | 
 97 | 	stopCh := make(chan struct{})
 98 | 	defer close(stopCh)
 99 | 
100 | 	go emitLogStoreMetrics(s, []string{"foo"}, time.Millisecond, stopCh)
101 | 
102 | 	// Wait for at least one interval
103 | 	time.Sleep(5 * time.Millisecond)
104 | 
105 | 	got := getCurrentGaugeValue(t, sink, "raft.test.foo.oldestLogAge")
106 | 
107 | 	// Assert the age is in a reasonable range.
108 | 	if got > float32(time.Since(start).Milliseconds()) {
109 | 		t.Fatalf("max age before test start: %v", got)
110 | 	}
111 | 
112 | 	if got < 1 {
113 | 		t.Fatalf("max age less than interval: %v", got)
114 | 	}
115 | }
116 | 
117 | func testSetupMetrics(t *testing.T) *metrics.InmemSink {
118 | 	// Record for ages (5 mins) so we can be confident that our assertions won't
119 | 	// fail on silly long test runs due to dropped data.
120 | 	s := metrics.NewInmemSink(10*time.Second, 300*time.Second)
121 | 	cfg := metrics.DefaultConfig("raft.test")
122 | 	cfg.EnableHostname = false
123 | 	metrics.NewGlobal(cfg, s)
124 | 	return s
125 | }
126 | 
127 | func getCurrentGaugeValue(t *testing.T, sink *metrics.InmemSink, name string) float32 {
128 | 	t.Helper()
129 | 
130 | 	data := sink.Data()
131 | 
132 | 	// Loop backward through intervals until there is a non-empty one
133 | 	// Addresses flakiness around recording to one interval but accessing during the next
134 | 	for i := len(data) - 1; i >= 0; i-- {
135 | 		currentInterval := data[i]
136 | 
137 | 		currentInterval.RLock()
138 | 		if gv, ok := currentInterval.Gauges[name]; ok {
139 | 			currentInterval.RUnlock()
140 | 			return gv.Value
141 | 		}
142 | 		currentInterval.RUnlock()
143 | 	}
144 | 
145 | 	// Debug print all the gauges
146 | 	buf := bytes.NewBuffer(nil)
147 | 	for _, intv := range data {
148 | 		intv.RLock()
149 | 		for name, val := range intv.Gauges {
150 | 			fmt.Fprintf(buf, "[%v][G] '%s': %0.3f\n", intv.Interval, name, val.Value)
151 | 		}
152 | 		intv.RUnlock()
153 | 	}
154 | 	t.Log(buf.String())
155 | 
156 | 	t.Fatalf("didn't find gauge %q", name)
157 | 	return 0
158 | }
159 | 


--------------------------------------------------------------------------------
/membership.md:
--------------------------------------------------------------------------------
 1 | Simon (@superfell) and I (@ongardie) talked through reworking this library's cluster membership changes last Friday. We don't see a way to split this into independent patches, so we're taking the next best approach: submitting the plan here for review, then working on an enormous PR. Your feedback would be appreciated. (@superfell is out this week, however, so don't expect him to respond quickly.)
 2 | 
 3 | These are the main goals:
 4 |  - Bringing things in line with the description in my PhD dissertation;
 5 |  - Catching up new servers prior to granting them a vote, as well as allowing permanent non-voting members; and
 6 |  - Eliminating the `peers.json` file, to avoid issues of consistency between that and the log/snapshot.
 7 | 
 8 | ## Data-centric view
 9 | 
10 | We propose to re-define a *configuration* as a set of servers, where each server includes an address (as it does today) and a mode that is either:
11 |  - *Voter*: a server whose vote is counted in elections and whose match index is used in advancing the leader's commit index.
12 |  - *Nonvoter*: a server that receives log entries but is not considered for elections or commitment purposes.
13 |  - *Staging*: a server that acts like a nonvoter with one exception: once a staging server receives enough log entries to catch up sufficiently to the leader's log, the leader will invoke a  membership change to change the staging server to a voter.
14 | 
15 | All changes to the configuration will be done by writing a new configuration to the log. The new configuration will be in affect as soon as it is appended to the log (not when it is committed like a normal state machine command). Note that, per my dissertation, there can be at most one uncommitted configuration at a time (the next configuration may not be created until the prior one has been committed). It's not strictly necessary to follow these same rules for the nonvoter/staging servers, but we think its best to treat all changes uniformly.
16 | 
17 | Each server will track two configurations:
18 |  1. its *committed configuration*: the latest configuration in the log/snapshot that has been committed, along with its index.
19 |  2. its *latest configuration*: the latest configuration in the log/snapshot (may be committed or uncommitted), along with its index.
20 | 
21 | When there's no membership change happening, these two will be the same. The latest configuration is almost always the one used, except:
22 |  - When followers truncate the suffix of their logs, they may need to fall back to the committed configuration.
23 |  - When snapshotting, the committed configuration is written, to correspond with the committed log prefix that is being snapshotted.
24 | 
25 | 
26 | ## Application API
27 | 
28 | We propose the following operations for clients to manipulate the cluster configuration:
29 |  - AddVoter: server becomes staging unless voter,
30 |  - AddNonvoter: server becomes nonvoter unless staging or voter,
31 |  - DemoteVoter: server becomes nonvoter unless absent,
32 |  - RemovePeer: server removed from configuration,
33 |  - GetConfiguration: waits for latest config to commit, returns committed config.
34 | 
35 | This diagram, of which I'm quite proud, shows the possible transitions:
36 | ```
37 | +-----------------------------------------------------------------------------+
38 | |                                                                             |
39 | |                      Start ->  +--------+                                   |
40 | |            ,------<------------|        |                                   |
41 | |           /                    | absent |                                   |
42 | |          /       RemovePeer--> |        | <---RemovePeer                    |
43 | |         /            |         +--------+               \                   |
44 | |        /             |            |                      \                  |
45 | |   AddNonvoter        |         AddVoter                   \                 |
46 | |       |       ,->---' `--<-.      |                        \                |
47 | |       v      /              \     v                         \               |
48 | |  +----------+                +----------+                    +----------+   |
49 | |  |          | ---AddVoter--> |          | -log caught up --> |          |   |
50 | |  | nonvoter |                | staging  |                    |  voter   |   |
51 | |  |          | <-DemoteVoter- |          |                 ,- |          |   |
52 | |  +----------+         \      +----------+                /   +----------+   |
53 | |                        \                                /                   |
54 | |                         `--------------<---------------'                    |
55 | |                                                                             |
56 | +-----------------------------------------------------------------------------+
57 | ```
58 | 
59 | While these operations aren't quite symmetric, we think they're a good set to capture
60 | the possible intent of the user. For example, if I want to make sure a server doesn't have a vote, but the server isn't part of the configuration at all, it probably shouldn't be added as a nonvoting server.
61 | 
62 | Each of these application-level operations will be interpreted by the leader and, if it has an effect, will cause the leader to write a new configuration entry to its log. Which particular application-level operation caused the log entry to be written need not be part of the log entry.
63 | 
64 | ## Code implications
65 | 
66 | This is a non-exhaustive list, but we came up with a few things:
67 | - Remove the PeerStore: the `peers.json` file introduces the possibility of getting out of sync with the log and snapshot, and it's hard to maintain this atomically as the log changes. It's not clear whether it's meant to track the committed or latest configuration, either.
68 | - Servers will have to search their snapshot and log to find the committed configuration and the latest configuration on startup.
69 | - Bootstrap will no longer use `peers.json` but should initialize the log or snapshot with an application-provided configuration entry.
70 | - Snapshots should store the index of their configuration along with the configuration itself. In my experience with LogCabin, the original log index of the configuration is very useful to include in debug log messages.
71 | - As noted in hashicorp/raft#84, configuration change requests should come in via a separate channel, and one may not proceed until the last has been committed.
72 | - As to deciding when a log is sufficiently caught up, implementing a sophisticated algorithm *is* something that can be done in a separate PR. An easy and decent placeholder is: once the staging server has reached 95% of the leader's commit index, promote it.
73 | 
74 | ## Feedback
75 | 
76 | Again, we're looking for feedback here before we start working on this. Here are some questions to think about:
77 |  - Does this seem like where we want things to go?
78 |  - Is there anything here that should be left out?
79 |  - Is there anything else we're forgetting about?
80 |  - Is there a good way to break this up?
81 |  - What do we need to worry about in terms of backwards compatibility?
82 |  - What implication will this have on current tests?
83 |  - What's the best way to test this code, in particular the small changes that will be sprinkled all over the library?
84 | 


--------------------------------------------------------------------------------
/observer.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package raft
  5 | 
  6 | import (
  7 | 	"sync/atomic"
  8 | 	"time"
  9 | )
 10 | 
 11 | // Observation is sent along the given channel to observers when an event occurs.
 12 | type Observation struct {
 13 | 	// Raft holds the Raft instance generating the observation.
 14 | 	Raft *Raft
 15 | 	// Data holds observation-specific data. Possible types are
 16 | 	// RequestVoteRequest
 17 | 	// RaftState
 18 | 	// PeerObservation
 19 | 	// LeaderObservation
 20 | 	Data interface{}
 21 | }
 22 | 
 23 | // LeaderObservation is used for the data when leadership changes.
 24 | type LeaderObservation struct {
 25 | 	// DEPRECATED The LeaderAddr field should now be used
 26 | 	Leader     ServerAddress
 27 | 	LeaderAddr ServerAddress
 28 | 	LeaderID   ServerID
 29 | }
 30 | 
 31 | // PeerObservation is sent to observers when peers change.
 32 | type PeerObservation struct {
 33 | 	Removed bool
 34 | 	Peer    Server
 35 | }
 36 | 
 37 | // FailedHeartbeatObservation is sent when a node fails to heartbeat with the leader
 38 | type FailedHeartbeatObservation struct {
 39 | 	PeerID      ServerID
 40 | 	LastContact time.Time
 41 | }
 42 | 
 43 | // ResumedHeartbeatObservation is sent when a node resumes to heartbeat with the leader following failures
 44 | type ResumedHeartbeatObservation struct {
 45 | 	PeerID ServerID
 46 | }
 47 | 
 48 | // nextObserverId is used to provide a unique ID for each observer to aid in
 49 | // deregistration.
 50 | var nextObserverID uint64
 51 | 
 52 | // FilterFn is a function that can be registered in order to filter observations.
 53 | // The function reports whether the observation should be included - if
 54 | // it returns false, the observation will be filtered out.
 55 | type FilterFn func(o *Observation) bool
 56 | 
 57 | // Observer describes what to do with a given observation.
 58 | type Observer struct {
 59 | 	// numObserved and numDropped are performance counters for this observer.
 60 | 	// 64 bit types must be 64 bit aligned to use with atomic operations on
 61 | 	// 32 bit platforms, so keep them at the top of the struct.
 62 | 	numObserved uint64
 63 | 	numDropped  uint64
 64 | 
 65 | 	// channel receives observations.
 66 | 	channel chan Observation
 67 | 
 68 | 	// blocking, if true, will cause Raft to block when sending an observation
 69 | 	// to this observer. This should generally be set to false.
 70 | 	blocking bool
 71 | 
 72 | 	// filter will be called to determine if an observation should be sent to
 73 | 	// the channel.
 74 | 	filter FilterFn
 75 | 
 76 | 	// id is the ID of this observer in the Raft map.
 77 | 	id uint64
 78 | }
 79 | 
 80 | // NewObserver creates a new observer that can be registered
 81 | // to make observations on a Raft instance. Observations
 82 | // will be sent on the given channel if they satisfy the
 83 | // given filter.
 84 | //
 85 | // If blocking is true, the observer will block when it can't
 86 | // send on the channel, otherwise it may discard events.
 87 | func NewObserver(channel chan Observation, blocking bool, filter FilterFn) *Observer {
 88 | 	return &Observer{
 89 | 		channel:  channel,
 90 | 		blocking: blocking,
 91 | 		filter:   filter,
 92 | 		id:       atomic.AddUint64(&nextObserverID, 1),
 93 | 	}
 94 | }
 95 | 
 96 | // GetNumObserved returns the number of observations.
 97 | func (or *Observer) GetNumObserved() uint64 {
 98 | 	return atomic.LoadUint64(&or.numObserved)
 99 | }
100 | 
101 | // GetNumDropped returns the number of dropped observations due to blocking.
102 | func (or *Observer) GetNumDropped() uint64 {
103 | 	return atomic.LoadUint64(&or.numDropped)
104 | }
105 | 
106 | // RegisterObserver registers a new observer.
107 | func (r *Raft) RegisterObserver(or *Observer) {
108 | 	r.observersLock.Lock()
109 | 	defer r.observersLock.Unlock()
110 | 	r.observers[or.id] = or
111 | }
112 | 
113 | // DeregisterObserver deregisters an observer.
114 | func (r *Raft) DeregisterObserver(or *Observer) {
115 | 	r.observersLock.Lock()
116 | 	defer r.observersLock.Unlock()
117 | 	delete(r.observers, or.id)
118 | }
119 | 
120 | // observe sends an observation to every observer.
121 | func (r *Raft) observe(o interface{}) {
122 | 	// In general observers should not block. But in any case this isn't
123 | 	// disastrous as we only hold a read lock, which merely prevents
124 | 	// registration / deregistration of observers.
125 | 	r.observersLock.RLock()
126 | 	defer r.observersLock.RUnlock()
127 | 	for _, or := range r.observers {
128 | 		// It's wasteful to do this in the loop, but for the common case
129 | 		// where there are no observers we won't create any objects.
130 | 		ob := Observation{Raft: r, Data: o}
131 | 		if or.filter != nil && !or.filter(&ob) {
132 | 			continue
133 | 		}
134 | 		if or.channel == nil {
135 | 			continue
136 | 		}
137 | 		if or.blocking {
138 | 			or.channel <- ob
139 | 			atomic.AddUint64(&or.numObserved, 1)
140 | 		} else {
141 | 			select {
142 | 			case or.channel <- ob:
143 | 				atomic.AddUint64(&or.numObserved, 1)
144 | 			default:
145 | 				atomic.AddUint64(&or.numDropped, 1)
146 | 			}
147 | 		}
148 | 	}
149 | }
150 | 


--------------------------------------------------------------------------------
/peersjson.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package raft
  5 | 
  6 | import (
  7 | 	"bytes"
  8 | 	"encoding/json"
  9 | 	"os"
 10 | )
 11 | 
 12 | // ReadPeersJSON consumes a legacy peers.json file in the format of the old JSON
 13 | // peer store and creates a new-style configuration structure. This can be used
 14 | // to migrate this data or perform manual recovery when running protocol versions
 15 | // that can interoperate with older, unversioned Raft servers. This should not be
 16 | // used once server IDs are in use, because the old peers.json file didn't have
 17 | // support for these, nor non-voter suffrage types.
 18 | func ReadPeersJSON(path string) (Configuration, error) {
 19 | 	// Read in the file.
 20 | 	buf, err := os.ReadFile(path)
 21 | 	if err != nil {
 22 | 		return Configuration{}, err
 23 | 	}
 24 | 
 25 | 	// Parse it as JSON.
 26 | 	var peers []string
 27 | 	dec := json.NewDecoder(bytes.NewReader(buf))
 28 | 	if err := dec.Decode(&peers); err != nil {
 29 | 		return Configuration{}, err
 30 | 	}
 31 | 
 32 | 	// Map it into the new-style configuration structure. We can only specify
 33 | 	// voter roles here, and the ID has to be the same as the address.
 34 | 	var configuration Configuration
 35 | 	for _, peer := range peers {
 36 | 		server := Server{
 37 | 			Suffrage: Voter,
 38 | 			ID:       ServerID(peer),
 39 | 			Address:  ServerAddress(peer),
 40 | 		}
 41 | 		configuration.Servers = append(configuration.Servers, server)
 42 | 	}
 43 | 
 44 | 	// We should only ingest valid configurations.
 45 | 	if err := checkConfiguration(configuration); err != nil {
 46 | 		return Configuration{}, err
 47 | 	}
 48 | 	return configuration, nil
 49 | }
 50 | 
 51 | // configEntry is used when decoding a new-style peers.json.
 52 | type configEntry struct {
 53 | 	// ID is the ID of the server (a UUID, usually).
 54 | 	ID ServerID `json:"id"`
 55 | 
 56 | 	// Address is the host:port of the server.
 57 | 	Address ServerAddress `json:"address"`
 58 | 
 59 | 	// NonVoter controls the suffrage. We choose this sense so people
 60 | 	// can leave this out and get a Voter by default.
 61 | 	NonVoter bool `json:"non_voter"`
 62 | }
 63 | 
 64 | // ReadConfigJSON reads a new-style peers.json and returns a configuration
 65 | // structure. This can be used to perform manual recovery when running protocol
 66 | // versions that use server IDs.
 67 | func ReadConfigJSON(path string) (Configuration, error) {
 68 | 	// Read in the file.
 69 | 	buf, err := os.ReadFile(path)
 70 | 	if err != nil {
 71 | 		return Configuration{}, err
 72 | 	}
 73 | 
 74 | 	// Parse it as JSON.
 75 | 	var peers []configEntry
 76 | 	dec := json.NewDecoder(bytes.NewReader(buf))
 77 | 	if err := dec.Decode(&peers); err != nil {
 78 | 		return Configuration{}, err
 79 | 	}
 80 | 
 81 | 	// Map it into the new-style configuration structure.
 82 | 	var configuration Configuration
 83 | 	for _, peer := range peers {
 84 | 		suffrage := Voter
 85 | 		if peer.NonVoter {
 86 | 			suffrage = Nonvoter
 87 | 		}
 88 | 		server := Server{
 89 | 			Suffrage: suffrage,
 90 | 			ID:       peer.ID,
 91 | 			Address:  peer.Address,
 92 | 		}
 93 | 		configuration.Servers = append(configuration.Servers, server)
 94 | 	}
 95 | 
 96 | 	// We should only ingest valid configurations.
 97 | 	if err := checkConfiguration(configuration); err != nil {
 98 | 		return Configuration{}, err
 99 | 	}
100 | 	return configuration, nil
101 | }
102 | 


--------------------------------------------------------------------------------
/peersjson_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package raft
  5 | 
  6 | import (
  7 | 	"os"
  8 | 	"path/filepath"
  9 | 	"reflect"
 10 | 	"strings"
 11 | 	"testing"
 12 | )
 13 | 
 14 | func TestPeersJSON_BadConfiguration(t *testing.T) {
 15 | 	var err error
 16 | 	var base string
 17 | 	base, err = os.MkdirTemp("", "")
 18 | 	if err != nil {
 19 | 		t.Fatalf("err: %v", err)
 20 | 	}
 21 | 	defer os.RemoveAll(base)
 22 | 
 23 | 	peers := filepath.Join(base, "peers.json")
 24 | 	if err = os.WriteFile(peers, []byte("null"), 0o666); err != nil {
 25 | 		t.Fatalf("err: %v", err)
 26 | 	}
 27 | 
 28 | 	_, err = ReadPeersJSON(peers)
 29 | 	if err == nil || !strings.Contains(err.Error(), "at least one voter") {
 30 | 		t.Fatalf("err: %v", err)
 31 | 	}
 32 | }
 33 | 
 34 | func TestPeersJSON_ReadPeersJSON(t *testing.T) {
 35 | 	var err error
 36 | 	var base string
 37 | 	base, err = os.MkdirTemp("", "")
 38 | 	if err != nil {
 39 | 		t.Fatalf("err: %v", err)
 40 | 	}
 41 | 	defer os.RemoveAll(base)
 42 | 
 43 | 	content := []byte(`
 44 | ["127.0.0.1:123",
 45 |  "127.0.0.2:123",
 46 |  "127.0.0.3:123"]
 47 | `)
 48 | 	peers := filepath.Join(base, "peers.json")
 49 | 	if err = os.WriteFile(peers, content, 0o666); err != nil {
 50 | 		t.Fatalf("err: %v", err)
 51 | 	}
 52 | 	var configuration Configuration
 53 | 	configuration, err = ReadPeersJSON(peers)
 54 | 	if err != nil {
 55 | 		t.Fatalf("err: %v", err)
 56 | 	}
 57 | 
 58 | 	expected := Configuration{
 59 | 		Servers: []Server{
 60 | 			{
 61 | 				Suffrage: Voter,
 62 | 				ID:       ServerID("127.0.0.1:123"),
 63 | 				Address:  ServerAddress("127.0.0.1:123"),
 64 | 			},
 65 | 			{
 66 | 				Suffrage: Voter,
 67 | 				ID:       ServerID("127.0.0.2:123"),
 68 | 				Address:  ServerAddress("127.0.0.2:123"),
 69 | 			},
 70 | 			{
 71 | 				Suffrage: Voter,
 72 | 				ID:       ServerID("127.0.0.3:123"),
 73 | 				Address:  ServerAddress("127.0.0.3:123"),
 74 | 			},
 75 | 		},
 76 | 	}
 77 | 	if !reflect.DeepEqual(configuration, expected) {
 78 | 		t.Fatalf("bad configuration: %+v != %+v", configuration, expected)
 79 | 	}
 80 | }
 81 | 
 82 | func TestPeersJSON_ReadConfigJSON(t *testing.T) {
 83 | 	var err error
 84 | 	var base string
 85 | 	base, err = os.MkdirTemp("", "")
 86 | 	if err != nil {
 87 | 		t.Fatalf("err: %v", err)
 88 | 	}
 89 | 	defer os.RemoveAll(base)
 90 | 
 91 | 	content := []byte(`
 92 | [
 93 |   {
 94 |     "id": "adf4238a-882b-9ddc-4a9d-5b6758e4159e",
 95 |     "address": "127.0.0.1:123",
 96 |     "non_voter": false
 97 |   },
 98 |   {
 99 |     "id": "8b6dda82-3103-11e7-93ae-92361f002671",
100 |     "address": "127.0.0.2:123"
101 |   },
102 |   {
103 |     "id": "97e17742-3103-11e7-93ae-92361f002671",
104 |     "address": "127.0.0.3:123",
105 |     "non_voter": true
106 |   }
107 | ]
108 | `)
109 | 	peers := filepath.Join(base, "peers.json")
110 | 	if err = os.WriteFile(peers, content, 0o666); err != nil {
111 | 		t.Fatalf("err: %v", err)
112 | 	}
113 | 
114 | 	var configuration Configuration
115 | 	configuration, err = ReadConfigJSON(peers)
116 | 	if err != nil {
117 | 		t.Fatalf("err: %v", err)
118 | 	}
119 | 
120 | 	expected := Configuration{
121 | 		Servers: []Server{
122 | 			{
123 | 				Suffrage: Voter,
124 | 				ID:       ServerID("adf4238a-882b-9ddc-4a9d-5b6758e4159e"),
125 | 				Address:  ServerAddress("127.0.0.1:123"),
126 | 			},
127 | 			{
128 | 				Suffrage: Voter,
129 | 				ID:       ServerID("8b6dda82-3103-11e7-93ae-92361f002671"),
130 | 				Address:  ServerAddress("127.0.0.2:123"),
131 | 			},
132 | 			{
133 | 				Suffrage: Nonvoter,
134 | 				ID:       ServerID("97e17742-3103-11e7-93ae-92361f002671"),
135 | 				Address:  ServerAddress("127.0.0.3:123"),
136 | 			},
137 | 		},
138 | 	}
139 | 	if !reflect.DeepEqual(configuration, expected) {
140 | 		t.Fatalf("bad configuration: %+v != %+v", configuration, expected)
141 | 	}
142 | }
143 | 


--------------------------------------------------------------------------------
/progress.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package raft
  5 | 
  6 | import (
  7 | 	"context"
  8 | 	"io"
  9 | 	"sync"
 10 | 	"time"
 11 | 
 12 | 	hclog "github.com/hashicorp/go-hclog"
 13 | )
 14 | 
 15 | const (
 16 | 	snapshotRestoreMonitorInterval = 10 * time.Second
 17 | )
 18 | 
 19 | type snapshotRestoreMonitor struct {
 20 | 	logger          hclog.Logger
 21 | 	cr              CountingReader
 22 | 	size            int64
 23 | 	networkTransfer bool
 24 | 
 25 | 	once   sync.Once
 26 | 	cancel func()
 27 | 	doneCh chan struct{}
 28 | }
 29 | 
 30 | func startSnapshotRestoreMonitor(
 31 | 	logger hclog.Logger,
 32 | 	cr CountingReader,
 33 | 	size int64,
 34 | 	networkTransfer bool,
 35 | ) *snapshotRestoreMonitor {
 36 | 	ctx, cancel := context.WithCancel(context.Background())
 37 | 
 38 | 	m := &snapshotRestoreMonitor{
 39 | 		logger:          logger,
 40 | 		cr:              cr,
 41 | 		size:            size,
 42 | 		networkTransfer: networkTransfer,
 43 | 		cancel:          cancel,
 44 | 		doneCh:          make(chan struct{}),
 45 | 	}
 46 | 	go m.run(ctx)
 47 | 	return m
 48 | }
 49 | 
 50 | func (m *snapshotRestoreMonitor) run(ctx context.Context) {
 51 | 	defer close(m.doneCh)
 52 | 
 53 | 	ticker := time.NewTicker(snapshotRestoreMonitorInterval)
 54 | 	defer ticker.Stop()
 55 | 
 56 | 	ranOnce := false
 57 | 	for {
 58 | 		select {
 59 | 		case <-ctx.Done():
 60 | 			if !ranOnce {
 61 | 				m.runOnce()
 62 | 			}
 63 | 			return
 64 | 		case <-ticker.C:
 65 | 			m.runOnce()
 66 | 			ranOnce = true
 67 | 		}
 68 | 	}
 69 | }
 70 | 
 71 | func (m *snapshotRestoreMonitor) runOnce() {
 72 | 	readBytes := m.cr.Count()
 73 | 	pct := float64(100*readBytes) / float64(m.size)
 74 | 
 75 | 	message := "snapshot restore progress"
 76 | 	if m.networkTransfer {
 77 | 		message = "snapshot network transfer progress"
 78 | 	}
 79 | 
 80 | 	m.logger.Info(message,
 81 | 		"read-bytes", readBytes,
 82 | 		"percent-complete", hclog.Fmt("%0.2f%%", pct),
 83 | 	)
 84 | }
 85 | 
 86 | func (m *snapshotRestoreMonitor) StopAndWait() {
 87 | 	m.once.Do(func() {
 88 | 		m.cancel()
 89 | 		<-m.doneCh
 90 | 	})
 91 | }
 92 | 
 93 | type CountingReader interface {
 94 | 	io.Reader
 95 | 	Count() int64
 96 | }
 97 | 
 98 | type countingReader struct {
 99 | 	reader io.Reader
100 | 
101 | 	mu    sync.Mutex
102 | 	bytes int64
103 | }
104 | 
105 | func (r *countingReader) Read(p []byte) (n int, err error) {
106 | 	n, err = r.reader.Read(p)
107 | 	r.mu.Lock()
108 | 	r.bytes += int64(n)
109 | 	r.mu.Unlock()
110 | 	return n, err
111 | }
112 | 
113 | func (r *countingReader) Count() int64 {
114 | 	r.mu.Lock()
115 | 	defer r.mu.Unlock()
116 | 	return r.bytes
117 | }
118 | 
119 | func newCountingReader(r io.Reader) *countingReader {
120 | 	return &countingReader{reader: r}
121 | }
122 | 
123 | type countingReadCloser struct {
124 | 	*countingReader
125 | 	readCloser io.ReadCloser
126 | }
127 | 
128 | func newCountingReadCloser(rc io.ReadCloser) *countingReadCloser {
129 | 	return &countingReadCloser{
130 | 		countingReader: newCountingReader(rc),
131 | 		readCloser:     rc,
132 | 	}
133 | }
134 | 
135 | func (c countingReadCloser) Close() error {
136 | 	return c.readCloser.Close()
137 | }
138 | 
139 | func (c countingReadCloser) WrappedReadCloser() io.ReadCloser {
140 | 	return c.readCloser
141 | }
142 | 
143 | // ReadCloserWrapper allows access to an underlying ReadCloser from a wrapper.
144 | type ReadCloserWrapper interface {
145 | 	io.ReadCloser
146 | 	WrappedReadCloser() io.ReadCloser
147 | }
148 | 
149 | var _ ReadCloserWrapper = &countingReadCloser{}
150 | 


--------------------------------------------------------------------------------
/raft-compat/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/hashicorp/raft/compat
 2 | 
 3 | go 1.20
 4 | 
 5 | require github.com/stretchr/testify v1.8.4
 6 | 
 7 | require (
 8 | 	github.com/armon/go-metrics v0.4.1 // indirect
 9 | 	github.com/fatih/color v1.13.0 // indirect
10 | 	github.com/hashicorp/go-hclog v1.6.2 // indirect
11 | 	github.com/hashicorp/go-immutable-radix v1.0.0 // indirect
12 | 	github.com/hashicorp/go-msgpack v0.5.5 // indirect
13 | 	github.com/hashicorp/go-msgpack/v2 v2.1.1 // indirect
14 | 	github.com/hashicorp/golang-lru v0.5.0 // indirect
15 | 	github.com/mattn/go-colorable v0.1.12 // indirect
16 | 	github.com/mattn/go-isatty v0.0.14 // indirect
17 | 	golang.org/x/sys v0.13.0 // indirect
18 | )
19 | 
20 | replace github.com/hashicorp/raft-previous-version => ./raft-previous-version
21 | 
22 | replace github.com/hashicorp/raft => ../
23 | 
24 | require (
25 | 	github.com/davecgh/go-spew v1.1.1 // indirect
26 | 	github.com/hashicorp/raft v1.6.1
27 | 	github.com/hashicorp/raft-previous-version v1.2.0
28 | 	github.com/pmezard/go-difflib v1.0.0 // indirect
29 | 	gopkg.in/yaml.v3 v3.0.1 // indirect
30 | )
31 | 


--------------------------------------------------------------------------------
/raft-compat/testcluster/cluster.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package testcluster
  5 | 
  6 | import (
  7 | 	"fmt"
  8 | 	"github.com/hashicorp/raft"
  9 | 	raftprevious "github.com/hashicorp/raft-previous-version"
 10 | 	"github.com/stretchr/testify/require"
 11 | 	"testing"
 12 | 	"time"
 13 | )
 14 | 
 15 | type RaftUIT struct {
 16 | 	raft   *raft.Raft
 17 | 	trans  *raft.NetworkTransport
 18 | 	Config *raft.Config
 19 | 	Store  *raft.InmemStore
 20 | 	Snap   *raft.InmemSnapshotStore
 21 | 	id     raft.ServerID
 22 | 	fsm    *raft.MockFSM
 23 | }
 24 | 
 25 | func (r RaftUIT) NumLogs() int {
 26 | 	return len(r.fsm.Logs())
 27 | }
 28 | 
 29 | func (r RaftUIT) GetLocalAddr() string {
 30 | 	return string(r.trans.LocalAddr())
 31 | }
 32 | 
 33 | func (r RaftUIT) GetRaft() interface{} {
 34 | 	return r.raft
 35 | }
 36 | 
 37 | func (r RaftUIT) GetStore() interface{} {
 38 | 	return r.Store
 39 | }
 40 | 
 41 | func (r RaftUIT) GetLocalID() string {
 42 | 	return string(r.id)
 43 | }
 44 | 
 45 | func (r RaftUIT) GetLeaderID() string {
 46 | 	_, id := r.raft.LeaderWithID()
 47 | 	return string(id)
 48 | }
 49 | 
 50 | func (r *RaftCluster) ID(i int) string {
 51 | 	return r.rafts[i].GetLocalID()
 52 | }
 53 | func (r *RaftCluster) Addr(i int) string {
 54 | 	return r.rafts[i].GetLocalAddr()
 55 | }
 56 | 
 57 | func (r *RaftCluster) Raft(id string) interface{} {
 58 | 	i := r.GetIndex(id)
 59 | 	return r.rafts[i].GetRaft()
 60 | }
 61 | 
 62 | func (r *RaftCluster) Store(id string) interface{} {
 63 | 	i := r.GetIndex(id)
 64 | 	return r.rafts[i].GetStore()
 65 | }
 66 | 
 67 | type RaftLatest struct {
 68 | 	raft   *raftprevious.Raft
 69 | 	trans  *raftprevious.NetworkTransport
 70 | 	Config *raftprevious.Config
 71 | 	Store  *raftprevious.InmemStore
 72 | 	Snap   *raftprevious.InmemSnapshotStore
 73 | 	id     raftprevious.ServerID
 74 | 	fsm    *raftprevious.MockFSM
 75 | }
 76 | 
 77 | func (r RaftLatest) NumLogs() int {
 78 | 	return len(r.fsm.Logs())
 79 | }
 80 | 
 81 | func (r RaftLatest) GetLocalAddr() string {
 82 | 	return string(r.trans.LocalAddr())
 83 | }
 84 | 
 85 | func (r RaftLatest) GetRaft() interface{} {
 86 | 	return r.raft
 87 | }
 88 | func (r RaftLatest) GetStore() interface{} {
 89 | 	return r.Store
 90 | }
 91 | 
 92 | func (r RaftLatest) GetLocalID() string {
 93 | 	return string(r.id)
 94 | }
 95 | 
 96 | func (r RaftLatest) GetLeaderID() string {
 97 | 	_, id := r.raft.LeaderWithID()
 98 | 	return string(id)
 99 | }
100 | 
101 | type RaftNode interface {
102 | 	GetLocalID() string
103 | 	GetLocalAddr() string
104 | 	GetLeaderID() string
105 | 	GetRaft() interface{}
106 | 	GetStore() interface{}
107 | 	NumLogs() int
108 | }
109 | 
110 | type RaftCluster struct {
111 | 	rafts []RaftNode
112 | }
113 | 
114 | func NewRaftCluster(t *testing.T, f func(t *testing.T, id string) RaftNode, count int, name string) RaftCluster {
115 | 	rc := RaftCluster{}
116 | 	rc.rafts = make([]RaftNode, count)
117 | 	for i := 0; i < count; i++ {
118 | 		rc.rafts[i] = f(t, fmt.Sprintf("%s-%d", name, i))
119 | 	}
120 | 	return rc
121 | }
122 | 
123 | func NewPreviousRaftCluster(t *testing.T, count int, name string) RaftCluster {
124 | 	return NewRaftCluster(t, InitPrevious, count, name)
125 | }
126 | 
127 | func NewUITRaftCluster(t *testing.T, count int, name string) RaftCluster {
128 | 	return NewRaftCluster(t, InitUIT, count, name)
129 | }
130 | 
131 | func (r *RaftCluster) GetLeader() RaftNode {
132 | 	for _, n := range r.rafts {
133 | 		if n.GetLocalID() == n.GetLeaderID() {
134 | 			return n
135 | 		}
136 | 	}
137 | 	return nil
138 | }
139 | 
140 | func (r *RaftCluster) Len() int {
141 | 	return len(r.rafts)
142 | }
143 | 
144 | func (r *RaftCluster) AddNode(node RaftNode) {
145 | 	r.rafts = append([]RaftNode{node}, r.rafts...)
146 | }
147 | 
148 | func (r *RaftCluster) DeleteNode(id string) {
149 | 	i := r.GetIndex(id)
150 | 	r.rafts = append(r.rafts[:i], r.rafts[i+1:]...)
151 | }
152 | 
153 | func (r *RaftCluster) GetIndex(id string) int {
154 | 	i := 0
155 | 	for _, r := range r.rafts {
156 | 		if r.GetLocalID() == id {
157 | 			return i
158 | 		}
159 | 		i++
160 | 	}
161 | 	return -1
162 | }
163 | 
164 | func InitUIT(t *testing.T, id string) RaftNode {
165 | 	return InitUITWithStore(t, id, nil, func(config *raft.Config) {})
166 | }
167 | 
168 | func InitUITWithStore(t *testing.T, id string, store *raftprevious.InmemStore, cfgMod func(config *raft.Config)) RaftNode {
169 | 	node := RaftUIT{}
170 | 	node.Config = raft.DefaultConfig()
171 | 	cfgMod(node.Config)
172 | 	node.Config.HeartbeatTimeout = 50 * time.Millisecond
173 | 	node.Config.ElectionTimeout = 50 * time.Millisecond
174 | 	node.Config.LeaderLeaseTimeout = 50 * time.Millisecond
175 | 	node.Config.CommitTimeout = 5 * time.Millisecond
176 | 	node.id = raft.ServerID(id)
177 | 	node.Config.LocalID = node.id
178 | 	if store != nil {
179 | 		node.Store = convertInMemStoreToUIT(store)
180 | 	} else {
181 | 		node.Store = raft.NewInmemStore()
182 | 	}
183 | 
184 | 	node.Snap = raft.NewInmemSnapshotStore()
185 | 	node.fsm = &raft.MockFSM{}
186 | 	var err error
187 | 	node.trans, err = raft.NewTCPTransport("localhost:0", nil, 2, time.Second, nil)
188 | 	require.NoError(t, err)
189 | 	node.raft, err = raft.NewRaft(node.Config, node.fsm, node.Store,
190 | 		node.Store, node.Snap, node.trans)
191 | 	require.NoError(t, err)
192 | 	return node
193 | }
194 | 
195 | func InitPrevious(t *testing.T, id string) RaftNode {
196 | 	return InitPreviousWithStore(t, id, nil, func(config *raftprevious.Config) {
197 | 	})
198 | }
199 | 
200 | func InitPreviousWithStore(t *testing.T, id string, store *raft.InmemStore, f func(config *raftprevious.Config)) RaftNode {
201 | 	node := RaftLatest{}
202 | 	node.Config = raftprevious.DefaultConfig()
203 | 	node.Config.HeartbeatTimeout = 50 * time.Millisecond
204 | 	node.Config.ElectionTimeout = 50 * time.Millisecond
205 | 	node.Config.LeaderLeaseTimeout = 50 * time.Millisecond
206 | 	node.Config.CommitTimeout = 5 * time.Millisecond
207 | 	node.id = raftprevious.ServerID(id)
208 | 	node.Config.LocalID = node.id
209 | 	f(node.Config)
210 | 
211 | 	if store != nil {
212 | 		node.Store = convertInMemStoreToPrevious(store)
213 | 	} else {
214 | 		node.Store = raftprevious.NewInmemStore()
215 | 	}
216 | 	node.Snap = raftprevious.NewInmemSnapshotStore()
217 | 	node.fsm = &raftprevious.MockFSM{}
218 | 	var err error
219 | 	node.trans, err = raftprevious.NewTCPTransport("localhost:0", nil, 2, time.Second, nil)
220 | 	require.NoError(t, err)
221 | 	node.raft, err = raftprevious.NewRaft(node.Config, node.fsm, node.Store,
222 | 		node.Store, node.Snap, node.trans)
223 | 	require.NoError(t, err)
224 | 	return node
225 | }
226 | 
227 | func convertLogToUIT(ll *raftprevious.Log) *raft.Log {
228 | 	l := new(raft.Log)
229 | 	l.Index = ll.Index
230 | 	l.AppendedAt = ll.AppendedAt
231 | 	l.Type = raft.LogType(ll.Type)
232 | 	l.Term = ll.Term
233 | 	l.Data = ll.Data
234 | 	l.Extensions = ll.Extensions
235 | 	return l
236 | }
237 | func convertLogToPrevious(ll *raft.Log) *raftprevious.Log {
238 | 	l := new(raftprevious.Log)
239 | 	l.Index = ll.Index
240 | 	l.AppendedAt = ll.AppendedAt
241 | 	l.Type = raftprevious.LogType(ll.Type)
242 | 	l.Term = ll.Term
243 | 	l.Data = ll.Data
244 | 	l.Extensions = ll.Extensions
245 | 	return l
246 | }
247 | 
248 | var (
249 | 	keyCurrentTerm  = []byte("CurrentTerm")
250 | 	keyLastVoteTerm = []byte("LastVoteTerm")
251 | 	keyLastVoteCand = []byte("LastVoteCand")
252 | )
253 | 
254 | func convertInMemStoreToPrevious(s *raft.InmemStore) *raftprevious.InmemStore {
255 | 	ss := raftprevious.NewInmemStore()
256 | 	fi, _ := s.FirstIndex()
257 | 	li, _ := s.LastIndex()
258 | 	for i := fi; i <= li; i++ {
259 | 		log := new(raft.Log)
260 | 		s.GetLog(i, log)
261 | 		ss.StoreLog(convertLogToPrevious(log))
262 | 	}
263 | 
264 | 	get, _ := ss.Get(keyCurrentTerm)
265 | 	ss.Set(keyCurrentTerm, get)
266 | 
267 | 	get, _ = ss.Get(keyLastVoteTerm)
268 | 	ss.Set(keyLastVoteTerm, get)
269 | 
270 | 	get, _ = ss.Get(keyLastVoteCand)
271 | 	ss.Set(keyLastVoteCand, get)
272 | 
273 | 	get64, _ := ss.GetUint64(keyCurrentTerm)
274 | 	ss.SetUint64(keyCurrentTerm, get64)
275 | 
276 | 	get64, _ = ss.GetUint64(keyLastVoteTerm)
277 | 	ss.SetUint64(keyLastVoteTerm, get64)
278 | 
279 | 	get64, _ = ss.GetUint64(keyLastVoteCand)
280 | 	ss.SetUint64(keyLastVoteCand, get64)
281 | 
282 | 	return ss
283 | }
284 | 
285 | func convertInMemStoreToUIT(s *raftprevious.InmemStore) *raft.InmemStore {
286 | 	ss := raft.NewInmemStore()
287 | 	fi, _ := s.FirstIndex()
288 | 	li, _ := s.LastIndex()
289 | 	for i := fi; i <= li; i++ {
290 | 		log := new(raftprevious.Log)
291 | 		s.GetLog(i, log)
292 | 		ss.StoreLog(convertLogToUIT(log))
293 | 	}
294 | 
295 | 	get, _ := ss.Get(keyCurrentTerm)
296 | 	ss.Set(keyCurrentTerm, get)
297 | 
298 | 	get, _ = ss.Get(keyLastVoteTerm)
299 | 	ss.Set(keyLastVoteTerm, get)
300 | 
301 | 	get, _ = ss.Get(keyLastVoteCand)
302 | 	ss.Set(keyLastVoteCand, get)
303 | 
304 | 	get64, _ := ss.GetUint64(keyCurrentTerm)
305 | 	ss.SetUint64(keyCurrentTerm, get64)
306 | 
307 | 	get64, _ = ss.GetUint64(keyLastVoteTerm)
308 | 	ss.SetUint64(keyLastVoteTerm, get64)
309 | 
310 | 	get64, _ = ss.GetUint64(keyLastVoteCand)
311 | 	ss.SetUint64(keyLastVoteCand, get64)
312 | 
313 | 	return ss
314 | }
315 | 


--------------------------------------------------------------------------------
/raft-compat/utils/test_utils.go:
--------------------------------------------------------------------------------
 1 | // Copyright (c) HashiCorp, Inc.
 2 | // SPDX-License-Identifier: MPL-2.0
 3 | 
 4 | package utils
 5 | 
 6 | import (
 7 | 	"fmt"
 8 | 	"github.com/hashicorp/raft"
 9 | 	raftprevious "github.com/hashicorp/raft-previous-version"
10 | 	"github.com/hashicorp/raft/compat/testcluster"
11 | 	"github.com/stretchr/testify/require"
12 | 	"testing"
13 | 	"time"
14 | )
15 | 
16 | func WaitForNewLeader(t *testing.T, oldLeader string, c testcluster.RaftCluster) {
17 | 
18 | 	leader := func() string {
19 | 		for i := 0; i < c.Len(); i++ {
20 | 			switch r := c.Raft(c.ID(i)).(type) {
21 | 			case *raft.Raft:
22 | 				if r.State() == raft.Leader {
23 | 					return c.ID(i)
24 | 				}
25 | 			case *raftprevious.Raft:
26 | 				if r.State() == raftprevious.Leader {
27 | 					return c.ID(i)
28 | 				}
29 | 			}
30 | 		}
31 | 		return ""
32 | 	}
33 | 	after := time.After(5 * time.Second)
34 | 	ticker := time.NewTicker(100 * time.Millisecond)
35 | 	for {
36 | 		select {
37 | 		case <-after:
38 | 			t.Fatalf("timedout")
39 | 		case <-ticker.C:
40 | 			id := leader()
41 | 			if id != "" {
42 | 				if id != oldLeader || oldLeader == "" {
43 | 					return
44 | 				}
45 | 			}
46 | 		}
47 | 	}
48 | }
49 | 
50 | type future interface {
51 | 	Error() error
52 | }
53 | 
54 | func WaitFuture(t *testing.T, f future) {
55 | 	timer := time.AfterFunc(1000*time.Millisecond, func() {
56 | 		panic(fmt.Errorf("timeout waiting for future %v", f))
57 | 	})
58 | 	defer timer.Stop()
59 | 	require.NoError(t, f.Error())
60 | }
61 | 


--------------------------------------------------------------------------------
/saturation.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package raft
  5 | 
  6 | import (
  7 | 	"math"
  8 | 	"time"
  9 | 
 10 | 	"github.com/hashicorp/go-metrics/compat"
 11 | )
 12 | 
 13 | // saturationMetric measures the saturation (percentage of time spent working vs
 14 | // waiting for work) of an event processing loop, such as runFSM. It reports the
 15 | // saturation as a gauge metric (at most) once every reportInterval.
 16 | //
 17 | // Callers must instrument their loop with calls to sleeping and working, starting
 18 | // with a call to sleeping.
 19 | //
 20 | // Note: the caller must be single-threaded and saturationMetric is not safe for
 21 | // concurrent use by multiple goroutines.
 22 | type saturationMetric struct {
 23 | 	reportInterval time.Duration
 24 | 
 25 | 	// slept contains time for which the event processing loop was sleeping rather
 26 | 	// than working in the period since lastReport.
 27 | 	slept time.Duration
 28 | 
 29 | 	// lost contains time that is considered lost due to incorrect use of
 30 | 	// saturationMetricBucket (e.g. calling sleeping() or working() multiple
 31 | 	// times in succession) in the period since lastReport.
 32 | 	lost time.Duration
 33 | 
 34 | 	lastReport, sleepBegan, workBegan time.Time
 35 | 
 36 | 	// These are overwritten in tests.
 37 | 	nowFn    func() time.Time
 38 | 	reportFn func(float32)
 39 | }
 40 | 
 41 | // newSaturationMetric creates a saturationMetric that will update the gauge
 42 | // with the given name at the given reportInterval. keepPrev determines the
 43 | // number of previous measurements that will be used to smooth out spikes.
 44 | func newSaturationMetric(name []string, reportInterval time.Duration) *saturationMetric {
 45 | 	m := &saturationMetric{
 46 | 		reportInterval: reportInterval,
 47 | 		nowFn:          time.Now,
 48 | 		lastReport:     time.Now(),
 49 | 		reportFn:       func(sat float32) { metrics.AddSample(name, sat) },
 50 | 	}
 51 | 	return m
 52 | }
 53 | 
 54 | // sleeping records the time at which the loop began waiting for work. After the
 55 | // initial call it must always be proceeded by a call to working.
 56 | func (s *saturationMetric) sleeping() {
 57 | 	now := s.nowFn()
 58 | 
 59 | 	if !s.sleepBegan.IsZero() {
 60 | 		// sleeping called twice in succession. Count that time as lost rather than
 61 | 		// measuring nonsense.
 62 | 		s.lost += now.Sub(s.sleepBegan)
 63 | 	}
 64 | 
 65 | 	s.sleepBegan = now
 66 | 	s.workBegan = time.Time{}
 67 | 	s.report()
 68 | }
 69 | 
 70 | // working records the time at which the loop began working. It must always be
 71 | // proceeded by a call to sleeping.
 72 | func (s *saturationMetric) working() {
 73 | 	now := s.nowFn()
 74 | 
 75 | 	if s.workBegan.IsZero() {
 76 | 		if s.sleepBegan.IsZero() {
 77 | 			// working called before the initial call to sleeping. Count that time as
 78 | 			// lost rather than measuring nonsense.
 79 | 			s.lost += now.Sub(s.lastReport)
 80 | 		} else {
 81 | 			s.slept += now.Sub(s.sleepBegan)
 82 | 		}
 83 | 	} else {
 84 | 		// working called twice in succession. Count that time as lost rather than
 85 | 		// measuring nonsense.
 86 | 		s.lost += now.Sub(s.workBegan)
 87 | 	}
 88 | 
 89 | 	s.workBegan = now
 90 | 	s.sleepBegan = time.Time{}
 91 | 	s.report()
 92 | }
 93 | 
 94 | // report updates the gauge if reportInterval has passed since our last report.
 95 | func (s *saturationMetric) report() {
 96 | 	now := s.nowFn()
 97 | 	timeSinceLastReport := now.Sub(s.lastReport)
 98 | 
 99 | 	if timeSinceLastReport < s.reportInterval {
100 | 		return
101 | 	}
102 | 
103 | 	var saturation float64
104 | 	total := timeSinceLastReport - s.lost
105 | 	if total != 0 {
106 | 		saturation = float64(total-s.slept) / float64(total)
107 | 		saturation = math.Round(saturation*100) / 100
108 | 	}
109 | 	s.reportFn(float32(saturation))
110 | 
111 | 	s.slept = 0
112 | 	s.lost = 0
113 | 	s.lastReport = now
114 | }
115 | 


--------------------------------------------------------------------------------
/saturation_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package raft
  5 | 
  6 | import (
  7 | 	"testing"
  8 | 	"time"
  9 | 
 10 | 	"github.com/stretchr/testify/require"
 11 | )
 12 | 
 13 | func TestSaturationMetric(t *testing.T) {
 14 | 	t.Run("without smoothing", func(t *testing.T) {
 15 | 		sat := newSaturationMetric([]string{"metric"}, 100*time.Millisecond)
 16 | 
 17 | 		now := sat.lastReport
 18 | 		sat.nowFn = func() time.Time { return now }
 19 | 
 20 | 		var reported float32
 21 | 		sat.reportFn = func(val float32) { reported = val }
 22 | 
 23 | 		sat.sleeping()
 24 | 
 25 | 		// First window: 50ms sleeping + 75ms working.
 26 | 		now = now.Add(50 * time.Millisecond)
 27 | 		sat.working()
 28 | 
 29 | 		now = now.Add(75 * time.Millisecond)
 30 | 		sat.sleeping()
 31 | 
 32 | 		// Should be 60% saturation.
 33 | 		require.Equal(t, float32(0.6), reported)
 34 | 
 35 | 		// Second window: 90ms sleeping + 10ms working.
 36 | 		now = now.Add(90 * time.Millisecond)
 37 | 		sat.working()
 38 | 
 39 | 		now = now.Add(10 * time.Millisecond)
 40 | 		sat.sleeping()
 41 | 
 42 | 		// Should be 10% saturation.
 43 | 		require.Equal(t, float32(0.1), reported)
 44 | 
 45 | 		// Third window: 100ms sleeping + 0ms working.
 46 | 		now = now.Add(100 * time.Millisecond)
 47 | 		sat.working()
 48 | 
 49 | 		// Should be 0% saturation.
 50 | 		require.Equal(t, float32(0), reported)
 51 | 	})
 52 | }
 53 | 
 54 | func TestSaturationMetric_IncorrectUsage(t *testing.T) {
 55 | 	t.Run("calling sleeping() consecutively", func(t *testing.T) {
 56 | 		sat := newSaturationMetric([]string{"metric"}, 50*time.Millisecond)
 57 | 
 58 | 		now := sat.lastReport
 59 | 		sat.nowFn = func() time.Time { return now }
 60 | 
 61 | 		var reported float32
 62 | 		sat.reportFn = func(v float32) { reported = v }
 63 | 
 64 | 		// Calling sleeping() consecutively should reset sleepBegan without recording
 65 | 		// a sample, such that we "lose" time rather than recording nonsense data.
 66 | 		//
 67 | 		//   0   | sleeping() |
 68 | 		//                     => Sleeping (10ms)
 69 | 		// +10ms |  working() |
 70 | 		//                     => Working  (10ms)
 71 | 		// +20ms | sleeping() |
 72 | 		//                     => [!] LOST [!] (10ms)
 73 | 		// +30ms | sleeping() |
 74 | 		//                     => Sleeping (10ms)
 75 | 		// +40ms |  working() |
 76 | 		//                     => Working (10ms)
 77 | 		// +50ms | sleeping() |
 78 | 		//
 79 | 		// Total reportable time: 40ms. Saturation: 50%.
 80 | 		sat.sleeping()
 81 | 		now = now.Add(10 * time.Millisecond)
 82 | 		sat.working()
 83 | 		now = now.Add(10 * time.Millisecond)
 84 | 		sat.sleeping()
 85 | 		now = now.Add(10 * time.Millisecond)
 86 | 		sat.sleeping()
 87 | 		now = now.Add(10 * time.Millisecond)
 88 | 		sat.working()
 89 | 		now = now.Add(10 * time.Millisecond)
 90 | 		sat.sleeping()
 91 | 
 92 | 		require.Equal(t, float32(0.5), reported)
 93 | 	})
 94 | 
 95 | 	t.Run("calling working() consecutively", func(t *testing.T) {
 96 | 		sat := newSaturationMetric([]string{"metric"}, 30*time.Millisecond)
 97 | 
 98 | 		now := sat.lastReport
 99 | 		sat.nowFn = func() time.Time { return now }
100 | 
101 | 		var reported float32
102 | 		sat.reportFn = func(v float32) { reported = v }
103 | 
104 | 		// Calling working() consecutively should reset workBegan without recording
105 | 		// a sample, such that we "lose" time rather than recording nonsense data.
106 | 		//
107 | 		//   0   | sleeping() |
108 | 		//                     => Sleeping (10ms)
109 | 		// +10ms |  working() |
110 | 		//                     => [!] LOST [!] (10ms)
111 | 		// +20ms |  working() |
112 | 		//                     => Working (10ms)
113 | 		// +30ms | sleeping() |
114 | 		//
115 | 		// Total reportable time: 20ms. Saturation: 50%.
116 | 		sat.sleeping()
117 | 		now = now.Add(10 * time.Millisecond)
118 | 		sat.working()
119 | 		now = now.Add(10 * time.Millisecond)
120 | 		sat.working()
121 | 		now = now.Add(10 * time.Millisecond)
122 | 		sat.sleeping()
123 | 
124 | 		require.Equal(t, float32(0.5), reported)
125 | 	})
126 | 
127 | 	t.Run("calling working() first", func(t *testing.T) {
128 | 		sat := newSaturationMetric([]string{"metric"}, 10*time.Millisecond)
129 | 
130 | 		now := sat.lastReport
131 | 		sat.nowFn = func() time.Time { return now }
132 | 
133 | 		var reported float32
134 | 		sat.reportFn = func(v float32) { reported = v }
135 | 
136 | 		// Time from start until working() is treated as lost.
137 | 		sat.working()
138 | 		require.Equal(t, float32(0), reported)
139 | 
140 | 		sat.sleeping()
141 | 		now = now.Add(5 * time.Millisecond)
142 | 		sat.working()
143 | 		now = now.Add(5 * time.Millisecond)
144 | 		sat.sleeping()
145 | 		require.Equal(t, float32(0.5), reported)
146 | 	})
147 | }
148 | 


--------------------------------------------------------------------------------
/stable.go:
--------------------------------------------------------------------------------
 1 | // Copyright (c) HashiCorp, Inc.
 2 | // SPDX-License-Identifier: MPL-2.0
 3 | 
 4 | package raft
 5 | 
 6 | // StableStore is used to provide stable storage
 7 | // of key configurations to ensure safety.
 8 | type StableStore interface {
 9 | 	Set(key []byte, val []byte) error
10 | 
11 | 	// Get returns the value for key, or an empty byte slice if key was not found.
12 | 	Get(key []byte) ([]byte, error)
13 | 
14 | 	SetUint64(key []byte, val uint64) error
15 | 
16 | 	// GetUint64 returns the uint64 value for key, or 0 if key was not found.
17 | 	GetUint64(key []byte) (uint64, error)
18 | }
19 | 


--------------------------------------------------------------------------------
/state.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package raft
  5 | 
  6 | import (
  7 | 	"sync"
  8 | 	"sync/atomic"
  9 | )
 10 | 
 11 | // RaftState captures the state of a Raft node: Follower, Candidate, Leader,
 12 | // or Shutdown.
 13 | type RaftState uint32
 14 | 
 15 | const (
 16 | 	// Follower is the initial state of a Raft node.
 17 | 	Follower RaftState = iota
 18 | 
 19 | 	// Candidate is one of the valid states of a Raft node.
 20 | 	Candidate
 21 | 
 22 | 	// Leader is one of the valid states of a Raft node.
 23 | 	Leader
 24 | 
 25 | 	// Shutdown is the terminal state of a Raft node.
 26 | 	Shutdown
 27 | )
 28 | 
 29 | func (s RaftState) String() string {
 30 | 	switch s {
 31 | 	case Follower:
 32 | 		return "Follower"
 33 | 	case Candidate:
 34 | 		return "Candidate"
 35 | 	case Leader:
 36 | 		return "Leader"
 37 | 	case Shutdown:
 38 | 		return "Shutdown"
 39 | 	default:
 40 | 		return "Unknown"
 41 | 	}
 42 | }
 43 | 
 44 | // raftState is used to maintain various state variables
 45 | // and provides an interface to set/get the variables in a
 46 | // thread safe manner.
 47 | type raftState struct {
 48 | 	// currentTerm commitIndex, lastApplied,  must be kept at the top of
 49 | 	// the struct so they're 64 bit aligned which is a requirement for
 50 | 	// atomic ops on 32 bit platforms.
 51 | 
 52 | 	// The current term, cache of StableStore
 53 | 	currentTerm uint64
 54 | 
 55 | 	// Highest committed log entry
 56 | 	commitIndex uint64
 57 | 
 58 | 	// Last applied log to the FSM
 59 | 	lastApplied uint64
 60 | 
 61 | 	// protects 4 next fields
 62 | 	lastLock sync.Mutex
 63 | 
 64 | 	// Cache the latest snapshot index/term
 65 | 	lastSnapshotIndex uint64
 66 | 	lastSnapshotTerm  uint64
 67 | 
 68 | 	// Cache the latest log from LogStore
 69 | 	lastLogIndex uint64
 70 | 	lastLogTerm  uint64
 71 | 
 72 | 	// Tracks running goroutines
 73 | 	routinesGroup sync.WaitGroup
 74 | 
 75 | 	// The current state
 76 | 	state RaftState
 77 | }
 78 | 
 79 | func (r *raftState) getState() RaftState {
 80 | 	stateAddr := (*uint32)(&r.state)
 81 | 	return RaftState(atomic.LoadUint32(stateAddr))
 82 | }
 83 | 
 84 | func (r *raftState) setState(s RaftState) {
 85 | 	stateAddr := (*uint32)(&r.state)
 86 | 	atomic.StoreUint32(stateAddr, uint32(s))
 87 | }
 88 | 
 89 | func (r *raftState) getCurrentTerm() uint64 {
 90 | 	return atomic.LoadUint64(&r.currentTerm)
 91 | }
 92 | 
 93 | func (r *raftState) setCurrentTerm(term uint64) {
 94 | 	atomic.StoreUint64(&r.currentTerm, term)
 95 | }
 96 | 
 97 | func (r *raftState) getLastLog() (index, term uint64) {
 98 | 	r.lastLock.Lock()
 99 | 	index = r.lastLogIndex
100 | 	term = r.lastLogTerm
101 | 	r.lastLock.Unlock()
102 | 	return
103 | }
104 | 
105 | func (r *raftState) setLastLog(index, term uint64) {
106 | 	r.lastLock.Lock()
107 | 	r.lastLogIndex = index
108 | 	r.lastLogTerm = term
109 | 	r.lastLock.Unlock()
110 | }
111 | 
112 | func (r *raftState) getLastSnapshot() (index, term uint64) {
113 | 	r.lastLock.Lock()
114 | 	index = r.lastSnapshotIndex
115 | 	term = r.lastSnapshotTerm
116 | 	r.lastLock.Unlock()
117 | 	return
118 | }
119 | 
120 | func (r *raftState) setLastSnapshot(index, term uint64) {
121 | 	r.lastLock.Lock()
122 | 	r.lastSnapshotIndex = index
123 | 	r.lastSnapshotTerm = term
124 | 	r.lastLock.Unlock()
125 | }
126 | 
127 | func (r *raftState) getCommitIndex() uint64 {
128 | 	return atomic.LoadUint64(&r.commitIndex)
129 | }
130 | 
131 | func (r *raftState) setCommitIndex(index uint64) {
132 | 	atomic.StoreUint64(&r.commitIndex, index)
133 | }
134 | 
135 | func (r *raftState) getLastApplied() uint64 {
136 | 	return atomic.LoadUint64(&r.lastApplied)
137 | }
138 | 
139 | func (r *raftState) setLastApplied(index uint64) {
140 | 	atomic.StoreUint64(&r.lastApplied, index)
141 | }
142 | 
143 | // Start a goroutine and properly handle the race between a routine
144 | // starting and incrementing, and exiting and decrementing.
145 | func (r *raftState) goFunc(f func()) {
146 | 	r.routinesGroup.Add(1)
147 | 	go func() {
148 | 		defer r.routinesGroup.Done()
149 | 		f()
150 | 	}()
151 | }
152 | 
153 | func (r *raftState) waitShutdown() {
154 | 	r.routinesGroup.Wait()
155 | }
156 | 
157 | // getLastIndex returns the last index in stable storage.
158 | // Either from the last log or from the last snapshot.
159 | func (r *raftState) getLastIndex() uint64 {
160 | 	r.lastLock.Lock()
161 | 	defer r.lastLock.Unlock()
162 | 	return max(r.lastLogIndex, r.lastSnapshotIndex)
163 | }
164 | 
165 | // getLastEntry returns the last index and term in stable storage.
166 | // Either from the last log or from the last snapshot.
167 | func (r *raftState) getLastEntry() (uint64, uint64) {
168 | 	r.lastLock.Lock()
169 | 	defer r.lastLock.Unlock()
170 | 	if r.lastLogIndex >= r.lastSnapshotIndex {
171 | 		return r.lastLogIndex, r.lastLogTerm
172 | 	}
173 | 	return r.lastSnapshotIndex, r.lastSnapshotTerm
174 | }
175 | 


--------------------------------------------------------------------------------
/tag.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright (c) HashiCorp, Inc.
 3 | # SPDX-License-Identifier: MPL-2.0
 4 | 
 5 | set -e
 6 | 
 7 | # The version must be supplied from the environment. Do not include the
 8 | # leading "v".
 9 | if [ -z $VERSION ]; then
10 |     echo "Please specify a version."
11 |     exit 1
12 | fi
13 | 
14 | # Generate the tag.
15 | echo "==> Tagging version $VERSION..."
16 | git commit --allow-empty -a --gpg-sign=348FFC4C -m "Release v$VERSION"
17 | git tag -a -m "Version $VERSION" -s -u 348FFC4C "v${VERSION}" main
18 | 
19 | exit 0
20 | 


--------------------------------------------------------------------------------
/tcp_transport.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package raft
  5 | 
  6 | import (
  7 | 	"errors"
  8 | 	"io"
  9 | 	"net"
 10 | 	"time"
 11 | 
 12 | 	"github.com/hashicorp/go-hclog"
 13 | )
 14 | 
 15 | var (
 16 | 	errNotAdvertisable = errors.New("local bind address is not advertisable")
 17 | 	errNotTCP          = errors.New("local address is not a TCP address")
 18 | )
 19 | 
 20 | // TCPStreamLayer implements StreamLayer interface for plain TCP.
 21 | type TCPStreamLayer struct {
 22 | 	advertise net.Addr
 23 | 	listener  *net.TCPListener
 24 | }
 25 | 
 26 | // NewTCPTransport returns a NetworkTransport that is built on top of
 27 | // a TCP streaming transport layer.
 28 | func NewTCPTransport(
 29 | 	bindAddr string,
 30 | 	advertise net.Addr,
 31 | 	maxPool int,
 32 | 	timeout time.Duration,
 33 | 	logOutput io.Writer,
 34 | ) (*NetworkTransport, error) {
 35 | 	return newTCPTransport(bindAddr, advertise, func(stream StreamLayer) *NetworkTransport {
 36 | 		return NewNetworkTransport(stream, maxPool, timeout, logOutput)
 37 | 	})
 38 | }
 39 | 
 40 | // NewTCPTransportWithLogger returns a NetworkTransport that is built on top of
 41 | // a TCP streaming transport layer, with log output going to the supplied Logger
 42 | func NewTCPTransportWithLogger(
 43 | 	bindAddr string,
 44 | 	advertise net.Addr,
 45 | 	maxPool int,
 46 | 	timeout time.Duration,
 47 | 	logger hclog.Logger,
 48 | ) (*NetworkTransport, error) {
 49 | 	return newTCPTransport(bindAddr, advertise, func(stream StreamLayer) *NetworkTransport {
 50 | 		return NewNetworkTransportWithLogger(stream, maxPool, timeout, logger)
 51 | 	})
 52 | }
 53 | 
 54 | // NewTCPTransportWithConfig returns a NetworkTransport that is built on top of
 55 | // a TCP streaming transport layer, using the given config struct.
 56 | func NewTCPTransportWithConfig(
 57 | 	bindAddr string,
 58 | 	advertise net.Addr,
 59 | 	config *NetworkTransportConfig,
 60 | ) (*NetworkTransport, error) {
 61 | 	return newTCPTransport(bindAddr, advertise, func(stream StreamLayer) *NetworkTransport {
 62 | 		config.Stream = stream
 63 | 		return NewNetworkTransportWithConfig(config)
 64 | 	})
 65 | }
 66 | 
 67 | func newTCPTransport(bindAddr string,
 68 | 	advertise net.Addr,
 69 | 	transportCreator func(stream StreamLayer) *NetworkTransport) (*NetworkTransport, error) {
 70 | 	// Try to bind
 71 | 	list, err := net.Listen("tcp", bindAddr)
 72 | 	if err != nil {
 73 | 		return nil, err
 74 | 	}
 75 | 
 76 | 	// Create stream
 77 | 	stream := &TCPStreamLayer{
 78 | 		advertise: advertise,
 79 | 		listener:  list.(*net.TCPListener),
 80 | 	}
 81 | 
 82 | 	// Verify that we have a usable advertise address
 83 | 	addr, ok := stream.Addr().(*net.TCPAddr)
 84 | 	if !ok {
 85 | 		list.Close()
 86 | 		return nil, errNotTCP
 87 | 	}
 88 | 	if addr.IP == nil || addr.IP.IsUnspecified() {
 89 | 		list.Close()
 90 | 		return nil, errNotAdvertisable
 91 | 	}
 92 | 
 93 | 	// Create the network transport
 94 | 	trans := transportCreator(stream)
 95 | 	return trans, nil
 96 | }
 97 | 
 98 | // Dial implements the StreamLayer interface.
 99 | func (t *TCPStreamLayer) Dial(address ServerAddress, timeout time.Duration) (net.Conn, error) {
100 | 	return net.DialTimeout("tcp", string(address), timeout)
101 | }
102 | 
103 | // Accept implements the net.Listener interface.
104 | func (t *TCPStreamLayer) Accept() (c net.Conn, err error) {
105 | 	return t.listener.Accept()
106 | }
107 | 
108 | // Close implements the net.Listener interface.
109 | func (t *TCPStreamLayer) Close() (err error) {
110 | 	return t.listener.Close()
111 | }
112 | 
113 | // Addr implements the net.Listener interface.
114 | func (t *TCPStreamLayer) Addr() net.Addr {
115 | 	// Use an advertise addr if provided
116 | 	if t.advertise != nil {
117 | 		return t.advertise
118 | 	}
119 | 	return t.listener.Addr()
120 | }
121 | 


--------------------------------------------------------------------------------
/tcp_transport_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright (c) HashiCorp, Inc.
 2 | // SPDX-License-Identifier: MPL-2.0
 3 | 
 4 | package raft
 5 | 
 6 | import (
 7 | 	"net"
 8 | 	"testing"
 9 | )
10 | 
11 | func TestTCPTransport_BadAddr(t *testing.T) {
12 | 	_, err := NewTCPTransportWithLogger("0.0.0.0:0", nil, 1, 0, newTestLogger(t))
13 | 	if err != errNotAdvertisable {
14 | 		t.Fatalf("err: %v", err)
15 | 	}
16 | }
17 | 
18 | func TestTCPTransport_EmptyAddr(t *testing.T) {
19 | 	_, err := NewTCPTransportWithLogger(":0", nil, 1, 0, newTestLogger(t))
20 | 	if err != errNotAdvertisable {
21 | 		t.Fatalf("err: %v", err)
22 | 	}
23 | }
24 | 
25 | func TestTCPTransport_WithAdvertise(t *testing.T) {
26 | 	ips, err := net.LookupIP("localhost")
27 | 	if err != nil {
28 | 		t.Fatal(err)
29 | 	}
30 | 	if len(ips) == 0 {
31 | 		t.Fatalf("localhost did not resolve to any IPs")
32 | 	}
33 | 	addr := &net.TCPAddr{IP: ips[0], Port: 12345}
34 | 	trans, err := NewTCPTransportWithLogger("0.0.0.0:0", addr, 1, 0, newTestLogger(t))
35 | 	if err != nil {
36 | 		t.Fatalf("err: %v", err)
37 | 	}
38 | 	if trans.LocalAddr() != ServerAddress(net.JoinHostPort(ips[0].String(), "12345")) {
39 | 		t.Fatalf("bad: %v", trans.LocalAddr())
40 | 	}
41 | }
42 | 


--------------------------------------------------------------------------------
/testing_batch.go:
--------------------------------------------------------------------------------
 1 | // Copyright (c) HashiCorp, Inc.
 2 | // SPDX-License-Identifier: MPL-2.0
 3 | 
 4 | //go:build batchtest
 5 | // +build batchtest
 6 | 
 7 | package raft
 8 | 
 9 | func init() {
10 | 	userSnapshotErrorsOnNoData = false
11 | }
12 | 
13 | // ApplyBatch enables MockFSM to satisfy the BatchingFSM interface. This
14 | // function is gated by the batchtest build flag.
15 | //
16 | // NOTE: This is exposed for middleware testing purposes and is not a stable API
17 | func (m *MockFSM) ApplyBatch(logs []*Log) []interface{} {
18 | 	m.Lock()
19 | 	defer m.Unlock()
20 | 
21 | 	ret := make([]interface{}, len(logs))
22 | 	for i, log := range logs {
23 | 		switch log.Type {
24 | 		case LogCommand:
25 | 			m.logs = append(m.logs, log.Data)
26 | 			ret[i] = len(m.logs)
27 | 		default:
28 | 			ret[i] = nil
29 | 		}
30 | 	}
31 | 
32 | 	return ret
33 | }
34 | 


--------------------------------------------------------------------------------
/transport.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package raft
  5 | 
  6 | import (
  7 | 	"io"
  8 | 	"time"
  9 | )
 10 | 
 11 | // RPCResponse captures both a response and a potential error.
 12 | type RPCResponse struct {
 13 | 	Response interface{}
 14 | 	Error    error
 15 | }
 16 | 
 17 | // RPC has a command, and provides a response mechanism.
 18 | type RPC struct {
 19 | 	Command  interface{}
 20 | 	Reader   io.Reader // Set only for InstallSnapshot
 21 | 	RespChan chan<- RPCResponse
 22 | }
 23 | 
 24 | // Respond is used to respond with a response, error or both
 25 | func (r *RPC) Respond(resp interface{}, err error) {
 26 | 	r.RespChan <- RPCResponse{resp, err}
 27 | }
 28 | 
 29 | // Transport provides an interface for network transports
 30 | // to allow Raft to communicate with other nodes.
 31 | type Transport interface {
 32 | 	// Consumer returns a channel that can be used to
 33 | 	// consume and respond to RPC requests.
 34 | 	Consumer() <-chan RPC
 35 | 
 36 | 	// LocalAddr is used to return our local address to distinguish from our peers.
 37 | 	LocalAddr() ServerAddress
 38 | 
 39 | 	// AppendEntriesPipeline returns an interface that can be used to pipeline
 40 | 	// AppendEntries requests.
 41 | 	AppendEntriesPipeline(id ServerID, target ServerAddress) (AppendPipeline, error)
 42 | 
 43 | 	// AppendEntries sends the appropriate RPC to the target node.
 44 | 	AppendEntries(id ServerID, target ServerAddress, args *AppendEntriesRequest, resp *AppendEntriesResponse) error
 45 | 
 46 | 	// RequestVote sends the appropriate RPC to the target node.
 47 | 	RequestVote(id ServerID, target ServerAddress, args *RequestVoteRequest, resp *RequestVoteResponse) error
 48 | 
 49 | 	// InstallSnapshot is used to push a snapshot down to a follower. The data is read from
 50 | 	// the ReadCloser and streamed to the client.
 51 | 	InstallSnapshot(id ServerID, target ServerAddress, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error
 52 | 
 53 | 	// EncodePeer is used to serialize a peer's address.
 54 | 	EncodePeer(id ServerID, addr ServerAddress) []byte
 55 | 
 56 | 	// DecodePeer is used to deserialize a peer's address.
 57 | 	DecodePeer([]byte) ServerAddress
 58 | 
 59 | 	// SetHeartbeatHandler is used to setup a heartbeat handler
 60 | 	// as a fast-pass. This is to avoid head-of-line blocking from
 61 | 	// disk IO. If a Transport does not support this, it can simply
 62 | 	// ignore the call, and push the heartbeat onto the Consumer channel.
 63 | 	SetHeartbeatHandler(cb func(rpc RPC))
 64 | 
 65 | 	// TimeoutNow is used to start a leadership transfer to the target node.
 66 | 	TimeoutNow(id ServerID, target ServerAddress, args *TimeoutNowRequest, resp *TimeoutNowResponse) error
 67 | }
 68 | 
 69 | // WithPreVote is an interface that a transport may provide which
 70 | // allows a transport to support a PreVote request.
 71 | //
 72 | // It is defined separately from Transport as unfortunately it wasn't in the
 73 | // original interface specification.
 74 | type WithPreVote interface {
 75 | 	// RequestPreVote sends the appropriate RPC to the target node.
 76 | 	RequestPreVote(id ServerID, target ServerAddress, args *RequestPreVoteRequest, resp *RequestPreVoteResponse) error
 77 | }
 78 | 
 79 | // WithClose is an interface that a transport may provide which
 80 | // allows a transport to be shut down cleanly when a Raft instance
 81 | // shuts down.
 82 | //
 83 | // It is defined separately from Transport as unfortunately it wasn't in the
 84 | // original interface specification.
 85 | type WithClose interface {
 86 | 	// Close permanently closes a transport, stopping
 87 | 	// any associated goroutines and freeing other resources.
 88 | 	Close() error
 89 | }
 90 | 
 91 | // LoopbackTransport is an interface that provides a loopback transport suitable for testing
 92 | // e.g. InmemTransport. It's there so we don't have to rewrite tests.
 93 | type LoopbackTransport interface {
 94 | 	Transport   // Embedded transport reference
 95 | 	WithPeers   // Embedded peer management
 96 | 	WithClose   // with a close routine
 97 | 	WithPreVote // with a prevote
 98 | }
 99 | 
100 | // WithPeers is an interface that a transport may provide which allows for connection and
101 | // disconnection. Unless the transport is a loopback transport, the transport specified to
102 | // "Connect" is likely to be nil.
103 | type WithPeers interface {
104 | 	Connect(peer ServerAddress, t Transport) // Connect a peer
105 | 	Disconnect(peer ServerAddress)           // Disconnect a given peer
106 | 	DisconnectAll()                          // Disconnect all peers, possibly to reconnect them later
107 | }
108 | 
109 | // AppendPipeline is used for pipelining AppendEntries requests. It is used
110 | // to increase the replication throughput by masking latency and better
111 | // utilizing bandwidth.
112 | type AppendPipeline interface {
113 | 	// AppendEntries is used to add another request to the pipeline.
114 | 	// The send may block which is an effective form of back-pressure.
115 | 	AppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error)
116 | 
117 | 	// Consumer returns a channel that can be used to consume
118 | 	// response futures when they are ready.
119 | 	Consumer() <-chan AppendFuture
120 | 
121 | 	// Close closes the pipeline and cancels all inflight RPCs
122 | 	Close() error
123 | }
124 | 
125 | // AppendFuture is used to return information about a pipelined AppendEntries request.
126 | type AppendFuture interface {
127 | 	Future
128 | 
129 | 	// Start returns the time that the append request was started.
130 | 	// It is always OK to call this method.
131 | 	Start() time.Time
132 | 
133 | 	// Request holds the parameters of the AppendEntries call.
134 | 	// It is always OK to call this method.
135 | 	Request() *AppendEntriesRequest
136 | 
137 | 	// Response holds the results of the AppendEntries call.
138 | 	// This method must only be called after the Error
139 | 	// method returns, and will only be valid on success.
140 | 	Response() *AppendEntriesResponse
141 | }
142 | 


--------------------------------------------------------------------------------
/transport_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package raft
  5 | 
  6 | import (
  7 | 	"bytes"
  8 | 	"reflect"
  9 | 	"testing"
 10 | 	"time"
 11 | )
 12 | 
 13 | const (
 14 | 	TTInmem = iota
 15 | 
 16 | 	// NOTE: must be last
 17 | 	numTestTransports
 18 | )
 19 | 
 20 | func NewTestTransport(ttype int, addr ServerAddress) (ServerAddress, LoopbackTransport) {
 21 | 	switch ttype {
 22 | 	case TTInmem:
 23 | 		return NewInmemTransport(addr)
 24 | 	default:
 25 | 		panic("Unknown transport type")
 26 | 	}
 27 | }
 28 | 
 29 | func TestTransport_StartStop(t *testing.T) {
 30 | 	for ttype := 0; ttype < numTestTransports; ttype++ {
 31 | 		_, trans := NewTestTransport(ttype, "")
 32 | 		if err := trans.Close(); err != nil {
 33 | 			t.Fatalf("err: %v", err)
 34 | 		}
 35 | 	}
 36 | }
 37 | 
 38 | func TestTransport_AppendEntries(t *testing.T) {
 39 | 	for ttype := 0; ttype < numTestTransports; ttype++ {
 40 | 		addr1, trans1 := NewTestTransport(ttype, "")
 41 | 		defer trans1.Close()
 42 | 		rpcCh := trans1.Consumer()
 43 | 
 44 | 		// Make the RPC request
 45 | 		args := AppendEntriesRequest{
 46 | 			Term:         10,
 47 | 			PrevLogEntry: 100,
 48 | 			PrevLogTerm:  4,
 49 | 			Entries: []*Log{
 50 | 				{
 51 | 					Index: 101,
 52 | 					Term:  4,
 53 | 					Type:  LogNoop,
 54 | 				},
 55 | 			},
 56 | 			LeaderCommitIndex: 90,
 57 | 			RPCHeader:         RPCHeader{Addr: []byte("cartman")},
 58 | 		}
 59 | 
 60 | 		resp := AppendEntriesResponse{
 61 | 			Term:    4,
 62 | 			LastLog: 90,
 63 | 			Success: true,
 64 | 		}
 65 | 
 66 | 		// Listen for a request
 67 | 		go func() {
 68 | 			select {
 69 | 			case rpc := <-rpcCh:
 70 | 				// Verify the command
 71 | 				req := rpc.Command.(*AppendEntriesRequest)
 72 | 				if !reflect.DeepEqual(req, &args) {
 73 | 					t.Errorf("command mismatch: %#v %#v", *req, args)
 74 | 					return
 75 | 				}
 76 | 				rpc.Respond(&resp, nil)
 77 | 
 78 | 			case <-time.After(200 * time.Millisecond):
 79 | 				t.Errorf("timeout")
 80 | 			}
 81 | 		}()
 82 | 
 83 | 		// Transport 2 makes outbound request
 84 | 		addr2, trans2 := NewTestTransport(ttype, "")
 85 | 		defer trans2.Close()
 86 | 
 87 | 		trans1.Connect(addr2, trans2)
 88 | 		trans2.Connect(addr1, trans1)
 89 | 
 90 | 		var out AppendEntriesResponse
 91 | 		if err := trans2.AppendEntries("id1", trans1.LocalAddr(), &args, &out); err != nil {
 92 | 			t.Fatalf("err: %v", err)
 93 | 		}
 94 | 
 95 | 		// Verify the response
 96 | 		if !reflect.DeepEqual(resp, out) {
 97 | 			t.Fatalf("command mismatch: %#v %#v", resp, out)
 98 | 		}
 99 | 	}
100 | }
101 | 
102 | func TestTransport_AppendEntriesPipeline(t *testing.T) {
103 | 	for ttype := 0; ttype < numTestTransports; ttype++ {
104 | 		addr1, trans1 := NewTestTransport(ttype, "")
105 | 		defer trans1.Close()
106 | 		rpcCh := trans1.Consumer()
107 | 
108 | 		// Make the RPC request
109 | 		args := AppendEntriesRequest{
110 | 			Term:         10,
111 | 			PrevLogEntry: 100,
112 | 			PrevLogTerm:  4,
113 | 			Entries: []*Log{
114 | 				{
115 | 					Index: 101,
116 | 					Term:  4,
117 | 					Type:  LogNoop,
118 | 				},
119 | 			},
120 | 			LeaderCommitIndex: 90,
121 | 			RPCHeader:         RPCHeader{Addr: []byte("cartman")},
122 | 		}
123 | 
124 | 		resp := AppendEntriesResponse{
125 | 			Term:    4,
126 | 			LastLog: 90,
127 | 			Success: true,
128 | 		}
129 | 
130 | 		// Listen for a request
131 | 		go func() {
132 | 			for i := 0; i < 10; i++ {
133 | 				select {
134 | 				case rpc := <-rpcCh:
135 | 					// Verify the command
136 | 					req := rpc.Command.(*AppendEntriesRequest)
137 | 					if !reflect.DeepEqual(req, &args) {
138 | 						t.Errorf("command mismatch: %#v %#v", *req, args)
139 | 						return
140 | 					}
141 | 					rpc.Respond(&resp, nil)
142 | 
143 | 				case <-time.After(200 * time.Millisecond):
144 | 					t.Errorf("timeout")
145 | 					return
146 | 				}
147 | 			}
148 | 		}()
149 | 
150 | 		// Transport 2 makes outbound request
151 | 		addr2, trans2 := NewTestTransport(ttype, "")
152 | 		defer trans2.Close()
153 | 
154 | 		trans1.Connect(addr2, trans2)
155 | 		trans2.Connect(addr1, trans1)
156 | 
157 | 		pipeline, err := trans2.AppendEntriesPipeline("id1", trans1.LocalAddr())
158 | 		if err != nil {
159 | 			t.Fatalf("err: %v", err)
160 | 		}
161 | 		defer pipeline.Close()
162 | 		for i := 0; i < 10; i++ {
163 | 			out := new(AppendEntriesResponse)
164 | 			if _, err := pipeline.AppendEntries(&args, out); err != nil {
165 | 				t.Fatalf("err: %v", err)
166 | 			}
167 | 		}
168 | 
169 | 		respCh := pipeline.Consumer()
170 | 		for i := 0; i < 10; i++ {
171 | 			select {
172 | 			case ready := <-respCh:
173 | 				// Verify the response
174 | 				if !reflect.DeepEqual(&resp, ready.Response()) {
175 | 					t.Fatalf("command mismatch: %#v %#v", &resp, ready.Response())
176 | 				}
177 | 			case <-time.After(200 * time.Millisecond):
178 | 				t.Fatalf("timeout")
179 | 			}
180 | 		}
181 | 	}
182 | }
183 | 
184 | func TestTransport_RequestVote(t *testing.T) {
185 | 	for ttype := 0; ttype < numTestTransports; ttype++ {
186 | 		addr1, trans1 := NewTestTransport(ttype, "")
187 | 		defer trans1.Close()
188 | 		rpcCh := trans1.Consumer()
189 | 
190 | 		// Make the RPC request
191 | 		args := RequestVoteRequest{
192 | 			Term:         20,
193 | 			LastLogIndex: 100,
194 | 			LastLogTerm:  19,
195 | 			RPCHeader:    RPCHeader{Addr: []byte("butters")},
196 | 		}
197 | 		resp := RequestVoteResponse{
198 | 			Term:    100,
199 | 			Granted: false,
200 | 		}
201 | 
202 | 		// Listen for a request
203 | 		go func() {
204 | 			select {
205 | 			case rpc := <-rpcCh:
206 | 				// Verify the command
207 | 				req := rpc.Command.(*RequestVoteRequest)
208 | 				if !reflect.DeepEqual(req, &args) {
209 | 					t.Errorf("command mismatch: %#v %#v", *req, args)
210 | 					return
211 | 				}
212 | 
213 | 				rpc.Respond(&resp, nil)
214 | 
215 | 			case <-time.After(200 * time.Millisecond):
216 | 				t.Errorf("timeout")
217 | 			}
218 | 		}()
219 | 
220 | 		// Transport 2 makes outbound request
221 | 		addr2, trans2 := NewTestTransport(ttype, "")
222 | 		defer trans2.Close()
223 | 
224 | 		trans1.Connect(addr2, trans2)
225 | 		trans2.Connect(addr1, trans1)
226 | 
227 | 		var out RequestVoteResponse
228 | 		if err := trans2.RequestVote("id1", trans1.LocalAddr(), &args, &out); err != nil {
229 | 			t.Fatalf("err: %v", err)
230 | 		}
231 | 
232 | 		// Verify the response
233 | 		if !reflect.DeepEqual(resp, out) {
234 | 			t.Fatalf("command mismatch: %#v %#v", resp, out)
235 | 		}
236 | 	}
237 | }
238 | 
239 | func TestTransport_InstallSnapshot(t *testing.T) {
240 | 	for ttype := 0; ttype < numTestTransports; ttype++ {
241 | 		addr1, trans1 := NewTestTransport(ttype, "")
242 | 		defer trans1.Close()
243 | 		rpcCh := trans1.Consumer()
244 | 
245 | 		// Make the RPC request
246 | 		args := InstallSnapshotRequest{
247 | 			Term:         10,
248 | 			LastLogIndex: 100,
249 | 			LastLogTerm:  9,
250 | 			Peers:        []byte("blah blah"),
251 | 			Size:         10,
252 | 			RPCHeader:    RPCHeader{Addr: []byte("kyle")},
253 | 		}
254 | 
255 | 		resp := InstallSnapshotResponse{
256 | 			Term:    10,
257 | 			Success: true,
258 | 		}
259 | 
260 | 		// Listen for a request
261 | 		go func() {
262 | 			select {
263 | 			case rpc := <-rpcCh:
264 | 				// Verify the command
265 | 				req := rpc.Command.(*InstallSnapshotRequest)
266 | 				if !reflect.DeepEqual(req, &args) {
267 | 					t.Errorf("command mismatch: %#v %#v", *req, args)
268 | 					return
269 | 				}
270 | 
271 | 				// Try to read the bytes
272 | 				buf := make([]byte, 10)
273 | 				rpc.Reader.Read(buf)
274 | 
275 | 				// Compare
276 | 				if bytes.Compare(buf, []byte("0123456789")) != 0 {
277 | 					t.Errorf("bad buf %v", buf)
278 | 					return
279 | 				}
280 | 
281 | 				rpc.Respond(&resp, nil)
282 | 
283 | 			case <-time.After(200 * time.Millisecond):
284 | 				t.Errorf("timeout")
285 | 			}
286 | 		}()
287 | 
288 | 		// Transport 2 makes outbound request
289 | 		addr2, trans2 := NewTestTransport(ttype, "")
290 | 		defer trans2.Close()
291 | 
292 | 		trans1.Connect(addr2, trans2)
293 | 		trans2.Connect(addr1, trans1)
294 | 
295 | 		// Create a buffer
296 | 		buf := bytes.NewBuffer([]byte("0123456789"))
297 | 
298 | 		var out InstallSnapshotResponse
299 | 		if err := trans2.InstallSnapshot("id1", trans1.LocalAddr(), &args, &out, buf); err != nil {
300 | 			t.Fatalf("err: %v", err)
301 | 		}
302 | 
303 | 		// Verify the response
304 | 		if !reflect.DeepEqual(resp, out) {
305 | 			t.Fatalf("command mismatch: %#v %#v", resp, out)
306 | 		}
307 | 	}
308 | }
309 | 
310 | func TestTransport_EncodeDecode(t *testing.T) {
311 | 	for ttype := 0; ttype < numTestTransports; ttype++ {
312 | 		_, trans1 := NewTestTransport(ttype, "")
313 | 		defer trans1.Close()
314 | 
315 | 		local := trans1.LocalAddr()
316 | 		enc := trans1.EncodePeer("aaaa", local)
317 | 		dec := trans1.DecodePeer(enc)
318 | 
319 | 		if dec != local {
320 | 			t.Fatalf("enc/dec fail: %v %v", dec, local)
321 | 		}
322 | 	}
323 | }
324 | 


--------------------------------------------------------------------------------
/util.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package raft
  5 | 
  6 | import (
  7 | 	"bytes"
  8 | 	crand "crypto/rand"
  9 | 	"fmt"
 10 | 	"math"
 11 | 	"math/big"
 12 | 	"math/rand"
 13 | 	"time"
 14 | 
 15 | 	"github.com/hashicorp/go-msgpack/v2/codec"
 16 | )
 17 | 
 18 | func init() {
 19 | 	// Ensure we use a high-entropy seed for the pseudo-random generator
 20 | 	rand.Seed(newSeed())
 21 | }
 22 | 
 23 | // returns an int64 from a crypto random source
 24 | // can be used to seed a source for a math/rand.
 25 | func newSeed() int64 {
 26 | 	r, err := crand.Int(crand.Reader, big.NewInt(math.MaxInt64))
 27 | 	if err != nil {
 28 | 		panic(fmt.Errorf("failed to read random bytes: %v", err))
 29 | 	}
 30 | 	return r.Int64()
 31 | }
 32 | 
 33 | // randomTimeout returns a value that is between the minVal and 2x minVal.
 34 | func randomTimeout(minVal time.Duration) <-chan time.Time {
 35 | 	if minVal == 0 {
 36 | 		return nil
 37 | 	}
 38 | 	extra := time.Duration(rand.Int63()) % minVal
 39 | 	return time.After(minVal + extra)
 40 | }
 41 | 
 42 | // min returns the minimum.
 43 | func min(a, b uint64) uint64 {
 44 | 	if a <= b {
 45 | 		return a
 46 | 	}
 47 | 	return b
 48 | }
 49 | 
 50 | // max returns the maximum.
 51 | func max(a, b uint64) uint64 {
 52 | 	if a >= b {
 53 | 		return a
 54 | 	}
 55 | 	return b
 56 | }
 57 | 
 58 | // generateUUID is used to generate a random UUID.
 59 | func generateUUID() string {
 60 | 	buf := make([]byte, 16)
 61 | 	if _, err := crand.Read(buf); err != nil {
 62 | 		panic(fmt.Errorf("failed to read random bytes: %v", err))
 63 | 	}
 64 | 
 65 | 	return fmt.Sprintf("%08x-%04x-%04x-%04x-%12x",
 66 | 		buf[0:4],
 67 | 		buf[4:6],
 68 | 		buf[6:8],
 69 | 		buf[8:10],
 70 | 		buf[10:16])
 71 | }
 72 | 
 73 | // asyncNotifyCh is used to do an async channel send
 74 | // to a single channel without blocking.
 75 | func asyncNotifyCh(ch chan struct{}) {
 76 | 	select {
 77 | 	case ch <- struct{}{}:
 78 | 	default:
 79 | 	}
 80 | }
 81 | 
 82 | // drainNotifyCh empties out a single-item notification channel without
 83 | // blocking, and returns whether it received anything.
 84 | func drainNotifyCh(ch chan struct{}) bool {
 85 | 	select {
 86 | 	case <-ch:
 87 | 		return true
 88 | 	default:
 89 | 		return false
 90 | 	}
 91 | }
 92 | 
 93 | // asyncNotifyBool is used to do an async notification
 94 | // on a bool channel.
 95 | func asyncNotifyBool(ch chan bool, v bool) {
 96 | 	select {
 97 | 	case ch <- v:
 98 | 	default:
 99 | 	}
100 | }
101 | 
102 | // overrideNotifyBool is used to notify on a bool channel
103 | // but override existing value if value is present.
104 | // ch must be 1-item buffered channel.
105 | //
106 | // This method does not support multiple concurrent calls.
107 | func overrideNotifyBool(ch chan bool, v bool) {
108 | 	select {
109 | 	case ch <- v:
110 | 		// value sent, all done
111 | 	case <-ch:
112 | 		// channel had an old value
113 | 		select {
114 | 		case ch <- v:
115 | 		default:
116 | 			panic("race: channel was sent concurrently")
117 | 		}
118 | 	}
119 | }
120 | 
121 | // Decode reverses the encode operation on a byte slice input.
122 | func decodeMsgPack(buf []byte, out interface{}) error {
123 | 	r := bytes.NewBuffer(buf)
124 | 	hd := codec.MsgpackHandle{}
125 | 	dec := codec.NewDecoder(r, &hd)
126 | 	return dec.Decode(out)
127 | }
128 | 
129 | // Encode writes an encoded object to a new bytes buffer.
130 | func encodeMsgPack(in interface{}) (*bytes.Buffer, error) {
131 | 	buf := bytes.NewBuffer(nil)
132 | 	hd := codec.MsgpackHandle{
133 | 		BasicHandle: codec.BasicHandle{
134 | 			TimeNotBuiltin: true,
135 | 		},
136 | 	}
137 | 	enc := codec.NewEncoder(buf, &hd)
138 | 	err := enc.Encode(in)
139 | 	return buf, err
140 | }
141 | 
142 | // backoff is used to compute an exponential backoff
143 | // duration. Base time is scaled by the current round,
144 | // up to some maximum scale factor.
145 | func backoff(base time.Duration, round, limit uint64) time.Duration {
146 | 	power := min(round, limit)
147 | 	for power > 2 {
148 | 		base *= 2
149 | 		power--
150 | 	}
151 | 	return base
152 | }
153 | 
154 | // cappedExponentialBackoff computes the exponential backoff with an adjustable
155 | // cap on the max timeout.
156 | func cappedExponentialBackoff(base time.Duration, round, limit uint64, cap time.Duration) time.Duration {
157 | 	power := min(round, limit)
158 | 	for power > 2 {
159 | 		if base > cap {
160 | 			return cap
161 | 		}
162 | 		base *= 2
163 | 		power--
164 | 	}
165 | 	if base > cap {
166 | 		return cap
167 | 	}
168 | 	return base
169 | }
170 | 
171 | // Needed for sorting []uint64, used to determine commitment
172 | type uint64Slice []uint64
173 | 
174 | func (p uint64Slice) Len() int           { return len(p) }
175 | func (p uint64Slice) Less(i, j int) bool { return p[i] < p[j] }
176 | func (p uint64Slice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
177 | 


--------------------------------------------------------------------------------
/util_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) HashiCorp, Inc.
  2 | // SPDX-License-Identifier: MPL-2.0
  3 | 
  4 | package raft
  5 | 
  6 | import (
  7 | 	"bytes"
  8 | 	"regexp"
  9 | 	"testing"
 10 | 	"time"
 11 | )
 12 | 
 13 | // TestMsgpackEncodeTime ensures that we don't break backwards compatibility when updating go-msgpack with
 14 | // Raft binary formats.
 15 | func TestMsgpackEncodeTimeDefaultFormat(t *testing.T) {
 16 | 	stamp := "2006-01-02T15:04:05Z"
 17 | 	tm, err := time.Parse(time.RFC3339, stamp)
 18 | 	if err != nil {
 19 | 		t.Fatal(err)
 20 | 	}
 21 | 	buf, err := encodeMsgPack(tm)
 22 | 
 23 | 	expected := []byte{175, 1, 0, 0, 0, 14, 187, 75, 55, 229, 0, 0, 0, 0, 255, 255}
 24 | 
 25 | 	if !bytes.Equal(buf.Bytes(), expected) {
 26 | 		t.Errorf("Expected time %s to encode as %+v but got %+v", stamp, expected, buf.Bytes())
 27 | 	}
 28 | }
 29 | 
 30 | func TestRandomTimeout(t *testing.T) {
 31 | 	start := time.Now()
 32 | 	timeout := randomTimeout(time.Millisecond)
 33 | 
 34 | 	select {
 35 | 	case <-timeout:
 36 | 		diff := time.Now().Sub(start)
 37 | 		if diff < time.Millisecond {
 38 | 			t.Fatalf("fired early")
 39 | 		}
 40 | 	case <-time.After(3 * time.Millisecond):
 41 | 		t.Fatalf("timeout")
 42 | 	}
 43 | }
 44 | 
 45 | func TestNewSeed(t *testing.T) {
 46 | 	vals := make(map[int64]bool)
 47 | 	for i := 0; i < 1000; i++ {
 48 | 		seed := newSeed()
 49 | 		if _, exists := vals[seed]; exists {
 50 | 			t.Fatal("newSeed() return a value it'd previously returned")
 51 | 		}
 52 | 		vals[seed] = true
 53 | 	}
 54 | }
 55 | 
 56 | func TestRandomTimeout_NoTime(t *testing.T) {
 57 | 	timeout := randomTimeout(0)
 58 | 	if timeout != nil {
 59 | 		t.Fatalf("expected nil channel")
 60 | 	}
 61 | }
 62 | 
 63 | func TestMin(t *testing.T) {
 64 | 	if min(1, 1) != 1 {
 65 | 		t.Fatalf("bad min")
 66 | 	}
 67 | 	if min(2, 1) != 1 {
 68 | 		t.Fatalf("bad min")
 69 | 	}
 70 | 	if min(1, 2) != 1 {
 71 | 		t.Fatalf("bad min")
 72 | 	}
 73 | }
 74 | 
 75 | func TestMax(t *testing.T) {
 76 | 	if max(1, 1) != 1 {
 77 | 		t.Fatalf("bad max")
 78 | 	}
 79 | 	if max(2, 1) != 2 {
 80 | 		t.Fatalf("bad max")
 81 | 	}
 82 | 	if max(1, 2) != 2 {
 83 | 		t.Fatalf("bad max")
 84 | 	}
 85 | }
 86 | 
 87 | func TestGenerateUUID(t *testing.T) {
 88 | 	prev := generateUUID()
 89 | 	for i := 0; i < 100; i++ {
 90 | 		id := generateUUID()
 91 | 		if prev == id {
 92 | 			t.Fatalf("Should get a new ID!")
 93 | 		}
 94 | 
 95 | 		matched, err := regexp.MatchString(
 96 | 			`[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}`, id)
 97 | 		if !matched || err != nil {
 98 | 			t.Fatalf("expected match %s %v %s", id, matched, err)
 99 | 		}
100 | 	}
101 | }
102 | 
103 | func TestBackoff(t *testing.T) {
104 | 	b := backoff(10*time.Millisecond, 1, 8)
105 | 	if b != 10*time.Millisecond {
106 | 		t.Fatalf("bad: %v", b)
107 | 	}
108 | 
109 | 	b = backoff(20*time.Millisecond, 2, 8)
110 | 	if b != 20*time.Millisecond {
111 | 		t.Fatalf("bad: %v", b)
112 | 	}
113 | 
114 | 	b = backoff(10*time.Millisecond, 8, 8)
115 | 	if b != 640*time.Millisecond {
116 | 		t.Fatalf("bad: %v", b)
117 | 	}
118 | 
119 | 	b = backoff(10*time.Millisecond, 9, 8)
120 | 	if b != 640*time.Millisecond {
121 | 		t.Fatalf("bad: %v", b)
122 | 	}
123 | }
124 | 
125 | func TestOverrideNotifyBool(t *testing.T) {
126 | 	ch := make(chan bool, 1)
127 | 
128 | 	// sanity check - buffered channel don't have any values
129 | 	select {
130 | 	case v := <-ch:
131 | 		t.Fatalf("unexpected receive: %v", v)
132 | 	default:
133 | 	}
134 | 
135 | 	// simple case of a single push
136 | 	overrideNotifyBool(ch, false)
137 | 	select {
138 | 	case v := <-ch:
139 | 		if v != false {
140 | 			t.Fatalf("expected false but got %v", v)
141 | 		}
142 | 	default:
143 | 		t.Fatalf("expected a value but is not ready")
144 | 	}
145 | 
146 | 	// assert that function never blocks and only last item is received
147 | 	overrideNotifyBool(ch, false)
148 | 	overrideNotifyBool(ch, false)
149 | 	overrideNotifyBool(ch, false)
150 | 	overrideNotifyBool(ch, false)
151 | 	overrideNotifyBool(ch, true)
152 | 
153 | 	select {
154 | 	case v := <-ch:
155 | 		if v != true {
156 | 			t.Fatalf("expected true but got %v", v)
157 | 		}
158 | 	default:
159 | 		t.Fatalf("expected a value but is not ready")
160 | 	}
161 | 
162 | 	// no further value is available
163 | 	select {
164 | 	case v := <-ch:
165 | 		t.Fatalf("unexpected receive: %v", v)
166 | 	default:
167 | 	}
168 | }
169 | 


--------------------------------------------------------------------------------