├── .github └── workflows │ └── main.yml ├── .gitignore ├── .golangci.yml ├── .goreleaser.yml ├── LICENSE ├── Makefile ├── README.md ├── api └── swagger.yml ├── cmd └── qumomf │ └── main.go ├── config ├── qumomf.conf.yml └── qumomf.daemon.min.conf.yml ├── example ├── docker-compose.yml ├── qumomf.yml ├── router │ ├── Dockerfile │ ├── init_router.lua │ └── router.lua └── storage │ ├── Dockerfile │ ├── init_storage.lua │ └── storage.lua ├── go.mod ├── go.sum ├── internal ├── api │ ├── api.go │ └── data.go ├── config │ ├── config.go │ ├── config_test.go │ ├── testdata │ │ ├── bad-elector.conf.yml │ │ └── qumomf-full.conf.yml │ └── validator.go ├── coordinator │ └── coordinator.go ├── metrics │ └── metrics.go ├── qumhttp │ ├── api.go │ ├── api_test.go │ ├── data.go │ ├── http.go │ └── routing.go ├── quorum │ ├── elector.go │ ├── elector_test.go │ ├── idle.go │ ├── idle_test.go │ ├── smart.go │ └── smart_test.go ├── storage │ ├── data.go │ ├── sqlite │ │ ├── sqlite.go │ │ └── sqlite_test.go │ └── storage.go ├── util │ └── util.go └── vshard │ ├── alert.go │ ├── cluster.go │ ├── cluster_test.go │ ├── instance.go │ ├── mock.go │ ├── orchestrator │ ├── analysis.go │ ├── config.go │ ├── failover.go │ ├── failover_test.go │ ├── hook.go │ ├── hook_test.go │ ├── instance_utils.go │ ├── instance_utils_test.go │ ├── monitor.go │ ├── monitor_test.go │ ├── recovery.go │ ├── recovery_test.go │ └── sampler.go │ ├── parser.go │ ├── parser_test.go │ ├── replicaset.go │ ├── replicaset_test.go │ ├── router.go │ ├── snapshot.go │ ├── tarantool.go │ └── tarantool_test.go └── scripts ├── etc └── systemd │ └── qumomf.service ├── postinstall.sh └── preremove.sh /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - 'master' 7 | pull_request: 8 | branches: 9 | - '*' 10 | 11 | jobs: 12 | 13 | build: 14 | runs-on: ubuntu-latest 15 | steps: 16 | 17 | - name: Set up Go 1.13 18 | uses: actions/setup-go@v1 19 | with: 20 | go-version: 1.13 21 | id: go 22 | 23 | - name: Check out code into the Go module directory 24 | uses: actions/checkout@v2 25 | 26 | - name: Get dependencies 27 | run: | 28 | go get -v -t -d ./... 29 | if [ -f Gopkg.toml ]; then 30 | curl https://raw.githubusercontent.com/golang/dep/master/install.sh | sh 31 | dep ensure 32 | fi 33 | 34 | - name: Build 35 | run: make build 36 | 37 | - name: Test 38 | run: | 39 | make env_up 40 | make run_tests 41 | make env_down 42 | 43 | - name: install golangci-lint 44 | run: | 45 | curl -sfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh| sh -s -- -b $GITHUB_WORKSPACE v1.23.6 46 | 47 | - name: Lint 48 | run: $GITHUB_WORKSPACE/golangci-lint run 49 | 50 | - name: Run GoReleaser 51 | uses: goreleaser/goreleaser-action@v2 52 | with: 53 | args: release --snapshot --skip-publish --rm-dist -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, built with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | # Dependency directories (remove the comment below to include it) 15 | vendor/ 16 | 17 | # IDE 18 | .idea/ 19 | **/.DS_Store 20 | 21 | # vim 22 | *.swp 23 | *.swo 24 | 25 | bin/ 26 | dist/ 27 | /*.db 28 | -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | run: 2 | skip-dirs: 3 | - example 4 | 5 | linters-settings: 6 | govet: 7 | check-shadowing: true 8 | settings: 9 | printf: 10 | funcs: 11 | - (github.com/golangci/golangci-lint/pkg/logutils.Log).Infof 12 | - (github.com/golangci/golangci-lint/pkg/logutils.Log).Warnf 13 | - (github.com/golangci/golangci-lint/pkg/logutils.Log).Errorf 14 | - (github.com/golangci/golangci-lint/pkg/logutils.Log).Fatalf 15 | golint: 16 | min-confidence: 0 17 | gocyclo: 18 | min-complexity: 15 19 | maligned: 20 | suggest-new: true 21 | dupl: 22 | threshold: 100 23 | goconst: 24 | min-len: 2 25 | min-occurrences: 3 26 | misspell: 27 | locale: US 28 | goimports: 29 | local-prefixes: github.com/golangci/golangci-lint 30 | gocritic: 31 | enabled-tags: 32 | - diagnostic 33 | - experimental 34 | - opinionated 35 | - performance 36 | - style 37 | disabled-checks: 38 | - wrapperFunc 39 | - dupImport # https://github.com/go-critic/go-critic/issues/845 40 | - ifElseChain 41 | - octalLiteral 42 | - whyNoLint 43 | - hugeParam 44 | funlen: 45 | lines: 300 46 | statements: 200 47 | 48 | issues: 49 | exclude-rules: 50 | - path: _test\.go 51 | linters: 52 | - dupl 53 | - gosec 54 | 55 | linters: 56 | disable-all: true 57 | enable: 58 | - bodyclose 59 | - deadcode 60 | - depguard 61 | - dogsled 62 | - dupl 63 | - errcheck 64 | - funlen 65 | - goconst 66 | - gocritic 67 | - gocyclo 68 | - gofmt 69 | - goimports 70 | - golint 71 | - gosec 72 | - gosimple 73 | - govet 74 | - ineffassign 75 | - interfacer 76 | - misspell 77 | - nakedret 78 | - scopelint 79 | - staticcheck 80 | - structcheck 81 | - stylecheck 82 | - typecheck 83 | - unconvert 84 | - unparam 85 | - unused 86 | - varcheck 87 | - whitespace 88 | - prealloc 89 | - maligned 90 | 91 | service: 92 | golangci-lint-version: 1.21.x # use the fixed version to not introduce new linters unexpectedly -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | before: 2 | hooks: 3 | - go mod download 4 | 5 | builds: 6 | - env: 7 | - CGO_ENABLED=1 8 | main: ./cmd/qumomf/main.go 9 | ldflags: 10 | - -s -w -X main.version={{.Version}} -X main.commit={{.Commit}} -X main.buildDate={{.Date}} 11 | goarch: 12 | - amd64 13 | goos: 14 | - linux 15 | 16 | archives: 17 | - files: 18 | - LICENSE 19 | - README.md 20 | - config/* 21 | 22 | checksum: 23 | name_template: 'checksums.txt' 24 | # Algorithm to be used. 25 | # Accepted options are sha256, sha512, sha1, crc32, md5, sha224 and sha384. 26 | # Default is sha256. 27 | algorithm: sha256 28 | 29 | snapshot: 30 | name_template: "{{ .Tag }}-SNAPSHOT-{{.ShortCommit}}" 31 | 32 | changelog: 33 | skip: true 34 | 35 | nfpms: 36 | - id: default 37 | package_name: qumomf 38 | 39 | vendor: citymobil 40 | maintainer: Pavel Parshin , Aleksandr Petrukhin 41 | homepage: https://github.com/shmel1k/qumomf 42 | description: Tarantool vshard HA tool supports auto discovery and recovery 43 | license: MIT 44 | 45 | formats: 46 | - deb 47 | - rpm 48 | 49 | dependencies: ~ 50 | recommends: ~ 51 | suggests: ~ 52 | conflicts: ~ 53 | 54 | bindir: /usr/local/bin 55 | 56 | epoch: 1 57 | release: 1 58 | 59 | scripts: 60 | postinstall: "scripts/postinstall.sh" 61 | preremove: "scripts/preremove.sh" 62 | 63 | files: 64 | "scripts/etc/systemd/**": "/etc/systemd/system" 65 | 66 | config_files: 67 | "config/qumomf.daemon.min.conf.yml": "/etc/qumomf/conf.yml" -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | BINARY=qumomf 2 | VERSION=`git describe --tags --dirty --always` 3 | COMMIT=`git rev-parse HEAD` 4 | BUILD_DATE=`date +%FT%T%z` 5 | LDFLAGS=-ldflags "-w -s -X main.version=${VERSION} -X main.commit=${COMMIT} -X main.buildDate=${BUILD_DATE}" 6 | 7 | all: build 8 | 9 | .PHONY: build 10 | build: 11 | go build ${LDFLAGS} -o bin/${BINARY} cmd/qumomf/main.go 12 | 13 | .PHONY: release 14 | release: 15 | goreleaser build --snapshot --rm-dist 16 | 17 | .PHONY: run 18 | run: build 19 | bin/qumomf -config=example/qumomf.yml 20 | 21 | .PHONY: env_up 22 | env_up: 23 | docker-compose -f example/docker-compose.yml up -d 24 | sleep 2 25 | docker-compose -f example/docker-compose.yml ps 26 | 27 | .PHONY: env_down 28 | env_down: 29 | docker-compose -f example/docker-compose.yml down -v --rmi local --remove-orphans 30 | 31 | .PHONY: fmt 32 | fmt: 33 | go fmt ./... 34 | 35 | .PHONY: lint 36 | lint: 37 | golangci-lint run -v ./... 38 | 39 | .PHONY: run_short_tests 40 | run_short_tests: 41 | go test -count=1 -v -short ./... 42 | 43 | .PHONY: run_tests 44 | run_tests: env_up 45 | go test -count=1 -v -race ./... 46 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![GitHub release (latest SemVer)](https://img.shields.io/github/v/release/shmel1k/qumomf?sort=semver&style=for-the-badge) 2 | ![GitHub Workflow Status](https://img.shields.io/github/workflow/status/shmel1k/qumomf/CI?style=for-the-badge) 3 | 4 | # Qumomf 5 | 6 | Qumomf is a Tarantool vshard high availability tool which supports discovery and recovery. 7 | 8 | # Table of Contents 9 | 10 | * [Discovery](#discovery) 11 | * [Configuration](#configuration) 12 | * [How to add a new cluster](#how-to-add-a-new-cluster) 13 | * [Topology recovery](#topology-recovery) 14 | * [Idle](#idle) 15 | * [Smart](#smart) 16 | * [Recovery hooks](#recovery-hooks) 17 | * [Hooks arguments and environment](#hooks-arguments-and-environment) 18 | * [API](#api) 19 | * [Hacking](#hacking) 20 | 21 | ## Discovery 22 | 23 | Qumomf actively crawls through your topologies and analyzes them. 24 | It reads basic vshard info such as replication status and configuration. 25 | 26 | You should provide at least one router which will be an entrypoint to the discovery process. 27 | 28 | ## Configuration 29 | 30 | For a sample qumomf configuration and its description see [example](config/qumomf.conf.yml). 31 | 32 | ### How to add a new cluster 33 | 34 | Edit your configuration file and add a new cluster, e.g.: 35 | 36 | ```yaml 37 | clusters: 38 | my_cluster: 39 | routers: 40 | - name: 'my_cluster_router_1' 41 | addr: 'localhost:3301' 42 | ``` 43 | 44 | You might override default connection settings for each cluster. 45 | 46 | ```yaml 47 | clusters: 48 | my_cluster: 49 | connection: 50 | user: 'tnt' 51 | password: 'tnt' 52 | connect_timeout: 10s 53 | request_timeout: 10s 54 | 55 | routers: 56 | - name: 'my_cluster_router_1' 57 | addr: 'localhost:3301' 58 | ``` 59 | 60 | For a sample vshard configuration, 61 | see [qumomf example](/example) or [Tarantool documentation](https://www.tarantool.io/en/doc/1.10/reference/reference_rock/vshard/vshard_quick/#vshard-config-cluster-example). 62 | 63 | Start qumomf, and it will discover all clusters defined in the configuration. 64 | 65 | ## Topology recovery 66 | 67 | Just now qumomf supports only automated master recovery. 68 | It is a configurable option and can be disabled completely or for a cluster via configuration. 69 | 70 | Master election supports two modes: `idle` and `smart`. 71 | Election mode might be configured for each cluster independently. 72 | 73 | Both electors supports those options: 74 | 75 | - `reasonable_follower_lsn_lag` - on crash recovery, followers that are lagging 76 | more than given LSN must not participate in the election. 77 | - `reasonable_follower_idle` - on crash recovery, followers that are lagging 78 | more than given duration must not participate in the election. 79 | 80 | Value of 0 disables this features. 81 | 82 | ### Idle 83 | 84 | Naive and simple elector which finds alive replica last communicated to the failed master (received data or heartbeat signal). 85 | Followers with the negative priority will be excluded from the master election. 86 | 87 | ### Smart 88 | 89 | Elector tries to involve as many metrics as can: 90 | - vshard configuration consistency (prefer replica which has the same configuration as master), 91 | - which upstream status did replica have before the crash, 92 | - how replica is far from the master comparing LSN to the master LSN, 93 | - last time when replica received data or heartbeat signal from the master, 94 | - user promotion rules based on the instance priorities. 95 | 96 | You can define your own promotion rules which will influence on master election during a failover. 97 | Each instance has a priority set via config. Negative priority excludes follower from the election process. 98 | 99 | ## Recovery hooks 100 | 101 | Hooks invoked through the recovery process via shell, in particular bash. 102 | 103 | These hooks are available: 104 | 105 | - `PreFailover`: executed immediately before qumomf takes recovery action. Failure (non-zero exit code) of any of these processes aborts the recovery. Hint: this gives you the opportunity to abort recovery based on some internal state of your system. 106 | - `PostSuccessfulFailover`: executed at the end of successful recovery. 107 | - `PostUnsuccessfulFailover`: executed at the end of unsuccessful recovery. 108 | 109 | Any process command that starts with "&" will be executed asynchronously, and a failure for such process is ignored. 110 | 111 | Qumomf executes lists of commands sequentially, in order of definition. 112 | 113 | A naive implementation might look like: 114 | 115 | ```yaml 116 | hooks: 117 | shell: bash 118 | pre_failover: 119 | - "echo 'Will recover from {failureType} on {failureCluster}' >> /tmp/qumomf_recovery.log" 120 | post_successful_failover: 121 | - "echo 'Recovered from {failureType} on {failureCluster}. Set: {failureReplicaSetUUID}; Failed: {failedURI}; Successor: {successorURI}' >> /tmp/qumomf_recovery.log" 122 | post_unsuccessful_failover: 123 | - "echo 'Failed to recover from {failureType} on {failureCluster}. Set: {failureReplicaSetUUID}; Failed: {failedURI}' >> /tmp/qumomf_recovery.log" 124 | ``` 125 | 126 | ### Hooks arguments and environment 127 | 128 | Qumomf provides all hooks with failure/recovery related information, such as the UUID/URI of the failed instance, 129 | UUID/URI of promoted instance, type of failure, name of cluster, etc. 130 | 131 | This information is passed independently in two ways, and you may choose to use one or both: 132 | 133 | **Environment variables**: 134 | 135 | - `QUM_FAILURE_TYPE` 136 | - `QUM_FAILED_UUID` 137 | - `QUM_FAILED_URI` 138 | - `QUM_FAILURE_CLUSTER` 139 | - `QUM_FAILURE_REPLICA_SET_UUID` 140 | - `QUM_COUNT_FOLLOWERS` 141 | - `QUM_COUNT_WORKING_FOLLOWERS` 142 | - `QUM_COUNT_REPLICATING_FOLLOWERS` 143 | - `QUM_COUNT_INCONSISTENT_VSHARD_CONF` 144 | - `QUM_IS_SUCCESSFUL` 145 | 146 | And, if a recovery was successful: 147 | 148 | - `QUM_SUCCESSOR_UUID` 149 | - `QUM_SUCCESSOR_URI` 150 | 151 | **Command line text replacement**. 152 | 153 | Qumomf replaces the following tokens in your hook commands: 154 | 155 | - `{failureType}` 156 | - `{failedUUID}` 157 | - `{failedURI}` 158 | - `{failureCluster}` 159 | - `{failureReplicaSetUUID}` 160 | - `{countFollowers}` 161 | - `{countWorkingFollowers}` 162 | - `{countReplicatingFollowers}` 163 | - `{countInconsistentVShardConf}` 164 | - `{isSuccessful}` 165 | 166 | And, if a recovery was a successful: 167 | 168 | - `{successorUUID}` 169 | - `{successorURI}` 170 | 171 | ## API 172 | 173 | Qumomf exposes several debug endpoints: 174 | 175 | - `/debug/metrics` - runtime and app metrics in Prometheus format, 176 | - `/debug/health` - health check, 177 | - `/debug/about` - the app version and build date. 178 | 179 | [API documentation](api/swagger.yml) for getting information about cluster states, recoveries and problems. 180 | 181 | ## Hacking 182 | 183 | Feel free to open issues and pull requests with your ideas how to improve qumomf. 184 | 185 | To run unit and integration tests: 186 | 187 | ```bash 188 | make env_up 189 | make run_tests 190 | make env_down 191 | ``` 192 | -------------------------------------------------------------------------------- /api/swagger.yml: -------------------------------------------------------------------------------- 1 | openapi: 3.0.0 2 | info: 3 | title: QUMOMF API 4 | version: 0.0.1 5 | 6 | paths: 7 | /api/v0/snapshots: 8 | get: 9 | summary: "Get list of clusters" 10 | responses: 11 | '200': 12 | description: 'Request succefully finished' 13 | content: 14 | application/json: 15 | schema: 16 | $ref: '#/components/schemas/ClusterInfo' 17 | '500': 18 | description: 'Internal error' 19 | 20 | /api/v0/snapshots/{cluster_name}: 21 | get: 22 | summary: "Get all information about cluster" 23 | parameters: 24 | - $ref: '#/components/parameters/cluster_name' 25 | responses: 26 | '200': 27 | description: 'Request succefully finished' 28 | '400': 29 | description: 'Invalid request' 30 | '500': 31 | description: 'Internal error' 32 | 33 | /api/v0/snapshots/{cluster_name}/{shard_uuid}: 34 | get: 35 | summary: "Get all information about shard" 36 | parameters: 37 | - $ref: '#/components/parameters/cluster_name' 38 | - $ref: '#/components/parameters/shard_uuid' 39 | responses: 40 | '200': 41 | description: 'Request succefully finished' 42 | '400': 43 | description: 'Invalid request' 44 | '500': 45 | description: 'Internal error' 46 | /api/v0/snapshots/{cluster_name}/{shard_uuid}/{instance_uuid}: 47 | get: 48 | summary: "Get all information about instance" 49 | parameters: 50 | - $ref: '#/components/parameters/cluster_name' 51 | - $ref: '#/components/parameters/shard_uuid' 52 | - $ref: '#/components/parameters/instance_uuid' 53 | responses: 54 | '200': 55 | description: 'Request succefully finished' 56 | '400': 57 | description: 'Invalid request' 58 | '500': 59 | description: 'Internal error' 60 | 61 | /api/v0/recoveries/{cluster_name}/{shard_uuid}: 62 | get: 63 | summary: "Get all recoceries for shard" 64 | parameters: 65 | - $ref: '#/components/parameters/cluster_name' 66 | - $ref: '#/components/parameters/shard_uuid' 67 | responses: 68 | '200': 69 | description: 'Request succefully finished' 70 | '400': 71 | description: 'Invalid request' 72 | '500': 73 | description: 'Internal error' 74 | /api/v0/alerts: 75 | get: 76 | summary: "Get all active problems" 77 | responses: 78 | '200': 79 | description: 'Request succefully finished' 80 | content: 81 | application/json: 82 | schema: 83 | $ref: '#/components/schemas/AlertsResponse' 84 | '500': 85 | description: 'Internal error' 86 | /api/v0/alerts/{cluster_name}: 87 | get: 88 | summary: "Get all active problems for cluster" 89 | parameters: 90 | - $ref: '#/components/parameters/cluster_name' 91 | responses: 92 | '200': 93 | description: 'Request succefully finished' 94 | content: 95 | application/json: 96 | schema: 97 | $ref: '#/components/schemas/AlertsResponse' 98 | '400': 99 | description: 'Invalid request' 100 | '500': 101 | description: 'Internal error' 102 | components: 103 | schemas: 104 | ClusterInfo: 105 | type: array 106 | items: 107 | properties: 108 | name: 109 | type: string 110 | example: qumomf_sandbox 111 | shards_count: 112 | type: integer 113 | example: 2 114 | routers_count: 115 | type: integer 116 | example: 1 117 | discovered_at: 118 | type: integer 119 | example: 1611231096 120 | health_level: 121 | type: string 122 | example: green 123 | AlertsResponse: 124 | properties: 125 | instances_alerts: 126 | $ref: '#/components/schemas/InstanceAlerts' 127 | routers_alerts: 128 | $ref: '#/components/schemas/RoutersAlerts' 129 | InstanceAlerts: 130 | properties: 131 | cluster_name: 132 | type: string 133 | shard_uuid: 134 | type: string 135 | instance_uri: 136 | type: string 137 | alerts: 138 | type: array 139 | items: 140 | $ref: '#/components/schemas/Alert' 141 | RoutersAlerts: 142 | properties: 143 | uri: 144 | type: string 145 | alerts: 146 | type: array 147 | items: 148 | $ref: '#/components/schemas/Alert' 149 | Alert: 150 | properties: 151 | Type: 152 | type: string 153 | Description: 154 | type: string 155 | parameters: 156 | cluster_name: 157 | in: path 158 | name: cluster_name 159 | schema: 160 | type: string 161 | required: true 162 | description: Cluster name 163 | shard_uuid: 164 | in: path 165 | name: shard_uuid 166 | schema: 167 | type: string 168 | required: true 169 | description: Shard uuid 170 | instance_uuid: 171 | in: path 172 | name: instance_uuid 173 | schema: 174 | type: string 175 | required: true 176 | description: Instance uuid -------------------------------------------------------------------------------- /cmd/qumomf/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "flag" 6 | "fmt" 7 | "io" 8 | "log/syslog" 9 | "net/http" 10 | "os" 11 | "os/signal" 12 | "path" 13 | "syscall" 14 | "time" 15 | 16 | "github.com/shmel1k/qumomf/internal/api" 17 | 18 | "github.com/gorilla/mux" 19 | 20 | "github.com/shmel1k/qumomf/internal/storage" 21 | "github.com/shmel1k/qumomf/internal/storage/sqlite" 22 | 23 | "github.com/rs/zerolog" 24 | "github.com/rs/zerolog/log" 25 | "golang.org/x/sys/unix" 26 | "gopkg.in/natefinch/lumberjack.v2" 27 | 28 | "github.com/shmel1k/qumomf/internal/config" 29 | "github.com/shmel1k/qumomf/internal/coordinator" 30 | "github.com/shmel1k/qumomf/internal/qumhttp" 31 | ) 32 | 33 | var ( 34 | version = "dev" 35 | commit = "none" 36 | buildDate = "unknown" 37 | ) 38 | 39 | var ( 40 | configPath = flag.String("config", "", "Config file path") 41 | ) 42 | 43 | func main() { 44 | flag.Parse() 45 | cfg, err := config.Setup(*configPath) 46 | if err != nil { 47 | log.Fatal().Err(err).Msgf("failed to read config") 48 | } 49 | 50 | logger := initLogger(cfg) 51 | 52 | db, err := newStorage(cfg) 53 | if err != nil { 54 | logger.Fatal().Err(err).Msg("failed to init persistent storage") 55 | } 56 | 57 | service := api.NewService(db) 58 | server := initHTTPServer(logger, service, cfg.Qumomf.Port) 59 | 60 | logger.Info().Msgf("Starting qumomf %s, commit %s, built at %s", version, commit, buildDate) 61 | 62 | go func() { 63 | logger.Info().Msgf("Listening on %s", cfg.Qumomf.Port) 64 | 65 | err = server.ListenAndServe() 66 | if err != http.ErrServerClosed { 67 | logger.Fatal().Err(err).Msg("Failed to listen HTTP server") 68 | } 69 | }() 70 | 71 | if len(cfg.Clusters) == 0 { 72 | logger.Warn().Msg("No clusters are found in the configuration") 73 | } 74 | 75 | qCoordinator := coordinator.New(logger, db) 76 | for clusterName, clusterCfg := range cfg.Clusters { 77 | err = qCoordinator.RegisterCluster(clusterName, clusterCfg, cfg) 78 | if err != nil { 79 | logger.Err(err).Msgf("Could not register cluster with name %s", clusterName) 80 | continue 81 | } 82 | logger.Info().Msgf("New cluster '%s' has been registered", clusterName) 83 | } 84 | 85 | interrupt := make(chan os.Signal, 1) 86 | signal.Notify(interrupt, syscall.SIGINT, syscall.SIGTERM) 87 | sig := <-interrupt 88 | 89 | logger.Info().Msgf("Received system signal: %s. Shutting down qumomf", sig) 90 | qCoordinator.Shutdown() 91 | 92 | err = server.Shutdown(context.Background()) 93 | if err != nil { 94 | logger.Err(err).Msg("Failed to shutting down the HTTP server gracefully") 95 | } 96 | } 97 | 98 | func newStorage(cfg *config.Config) (storage.Storage, error) { 99 | return sqlite.New(sqlite.Config{ 100 | FileName: cfg.Qumomf.Storage.Filename, 101 | ConnectTimeout: cfg.Qumomf.Storage.ConnectTimeout, 102 | QueryTimeout: cfg.Qumomf.Storage.QueryTimeout, 103 | }) 104 | } 105 | 106 | func initLogger(cfg *config.Config) zerolog.Logger { 107 | zerolog.TimeFieldFormat = zerolog.TimeFormatUnix 108 | 109 | loggingCfg := cfg.Qumomf.Logging 110 | 111 | logLevel, err := zerolog.ParseLevel(loggingCfg.Level) 112 | if err != nil { 113 | log.Warn().Msgf("Unknown Level String: '%s', defaulting to DebugLevel", loggingCfg.Level) 114 | logLevel = zerolog.DebugLevel 115 | } 116 | 117 | zerolog.SetGlobalLevel(logLevel) 118 | 119 | writers := make([]io.Writer, 0, 1) 120 | writers = append(writers, os.Stdout) 121 | 122 | if loggingCfg.SysLogEnabled { 123 | w, err := syslog.New(syslog.LOG_INFO, "qumomf") 124 | if err != nil { 125 | log.Warn().Err(err).Msg("Unable to connect to the system log daemon") 126 | } else { 127 | writers = append(writers, zerolog.SyslogLevelWriter(w)) 128 | } 129 | } 130 | 131 | if loggingCfg.FileLoggingEnabled { 132 | w, err := newRollingLogFile(&loggingCfg) 133 | if err != nil { 134 | log.Warn().Err(err).Msg("Unable to init file logger") 135 | } else { 136 | writers = append(writers, w) 137 | } 138 | } 139 | 140 | var baseLogger zerolog.Logger 141 | if len(writers) == 1 { 142 | baseLogger = zerolog.New(writers[0]) 143 | } else { 144 | return zerolog.New(zerolog.MultiLevelWriter(writers...)) 145 | } 146 | 147 | return baseLogger.Level(logLevel).With().Timestamp().Logger() 148 | } 149 | 150 | func newRollingLogFile(cfg *config.Logging) (io.Writer, error) { 151 | dir := path.Dir(cfg.Filename) 152 | if unix.Access(dir, unix.W_OK) != nil { 153 | return nil, fmt.Errorf("no permissions to write logs to dir: %s", dir) 154 | } 155 | 156 | return &lumberjack.Logger{ 157 | Filename: cfg.Filename, 158 | MaxBackups: cfg.MaxBackups, 159 | MaxSize: cfg.MaxSize, 160 | MaxAge: cfg.MaxAge, 161 | }, nil 162 | } 163 | 164 | func initHTTPServer(logger zerolog.Logger, service api.Service, port string) *http.Server { 165 | r := mux.NewRouter() 166 | qumhttp.RegisterDebugHandlers(r, version, commit, buildDate) 167 | qumhttp.RegisterAPIHandlers(r, qumhttp.NewHandler(logger, service)) 168 | 169 | return &http.Server{ 170 | Addr: port, 171 | Handler: r, 172 | ReadTimeout: 5 * time.Second, 173 | WriteTimeout: 5 * time.Second, 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /config/qumomf.conf.yml: -------------------------------------------------------------------------------- 1 | qumomf: 2 | # TCP port to listen. 3 | port: ':8080' 4 | logging: 5 | # Verbose level of logging: trace, debug, info, warn, error, fatal, panic. 6 | # To disable logging, pass an empty string. 7 | level: 'debug' 8 | # Write logs to the local syslog daemon. 9 | syslog_enabled: false 10 | # Write logs to the file. 11 | file_enabled: true 12 | # Absolute path to the log output file. 13 | file_name: '/var/log/qumomf.log' 14 | # The max size in MB of the logfile before it's rolled. 15 | file_max_size: 256 16 | # The max number of rolled files to keep. 17 | file_max_backups: 3 18 | # The max age in days to keep a logfile. 19 | file_max_age: 5 20 | # Indicates whether qumomf should run in the readonly mode: 21 | # no auto failover will be executed. 22 | # Can be overwritten by cluster-specific options. 23 | readonly: true 24 | # How often should qumomf discover the cluster topology. 25 | cluster_discovery_time: '5s' 26 | # How often should qumomf analyze the cluster state. 27 | cluster_recovery_time: '1s' 28 | # Qumomf avoids flapping (cascading failures causing continuous outage and elimination of resources) 29 | # by introducing a block period, where on any given cluster, qumomf will not kick in automated recovery 30 | # on an interval smaller than said period. 31 | # It only applies to recoveries on the same cluster. 32 | # There is nothing to prevent concurrent recoveries running on different clusters. 33 | shard_recovery_block_time: '30m' 34 | # Similar to the shard_recovery_block_time option but defines recovery block period 35 | # only for a single instance. Used during the vshard configuration recovery. 36 | instance_recovery_block_time: '10m' 37 | 38 | # How should qumomf choose a new master during the failover. 39 | # Available options: idle, smart. 40 | # See README for the description. 41 | # Can be overwritten by cluster-specific options. 42 | elector: 'smart' 43 | # On crash recovery, followers that are lagging more than given LSN must not participate in the election. 44 | # Value of 0 disables this feature. 45 | reasonable_follower_lsn_lag: 500 46 | # On crash recovery, followers that are lagging more than given duration must not participate in the election. 47 | # Value of 0 disables this feature. 48 | reasonable_follower_idle: '1m' 49 | 50 | # Hooks invoked through the recovery process. 51 | # These are arrays of commands invoked via shell, in particular bash. 52 | hooks: 53 | # Shell to use invoking hooks in format "shell -c ". 54 | shell: bash 55 | # Deadline timeout for basic hooks. 56 | timeout: 5s 57 | # Deadline timeout for async hooks. 58 | timeout_async: 10m 59 | # PreFailover hooks executed before the recovery process. 60 | pre_failover: 61 | - "echo 'Will recover from {failureType} on {failureCluster}' >> /tmp/qumomf_recovery.log" 62 | # PostSuccessfulFailover hooks executed after the successful recovery process. 63 | post_successful_failover: 64 | - "echo 'Recovered from {failureType} on {failureCluster}. Set: {failureReplicaSetUUID}; Failed: {failedURI}; Successor: {successorURI}' >> /tmp/qumomf_recovery.log" 65 | # PostUnsuccessfulFailover hooks executed after the unsuccessful recovery process. 66 | post_unsuccessful_failover: 67 | - "echo 'Failed to recover from {failureType} on {failureCluster}. Set: {failureReplicaSetUUID}; Failed: {failedURI}' >> /tmp/qumomf_recovery.log" 68 | 69 | # Local persistent storage to save snapshots, recoveries and other useful data 70 | storage: 71 | filename: 'qumomf.db' 72 | connect_timeout: '1s' 73 | query_timeout: '1s' 74 | 75 | # Tarantool connection options. 76 | # Can be overwritten by cluster-specific options. 77 | connection: 78 | user: 'qumomf' 79 | password: 'qumomf' 80 | connect_timeout: '500ms' 81 | request_timeout: '1s' 82 | 83 | # List of all clusters. 84 | clusters: 85 | # Cluster unique name. 86 | qumomf_sandbox: 87 | readonly: false 88 | 89 | # During the autodiscovery qumomf will use the information 90 | # read from tarantool instances. 91 | # You may want to override the URI of the instances. 92 | override_uri_rules: 93 | 'qumomf_1_m.ddk:3301': '127.0.0.1:9303' 94 | 'qumomf_1_s.ddk:3301': '127.0.0.1:9304' 95 | 'qumomf_2_m.ddk:3301': '127.0.0.1:9305' 96 | 'qumomf_2_s_1.ddk:3301': '127.0.0.1:9306' 97 | 'qumomf_2_s_2.ddk:3301': '127.0.0.1:9307' 98 | 99 | # List of all routers in the cluster. 100 | # Used to discover the cluster topology. 101 | routers: 102 | - name: 'router_1' 103 | uuid: 'router_1_uuid' 104 | addr: '127.0.0.1:9301' 105 | 106 | qumomf_sandbox_2: 107 | elector: 'idle' 108 | 109 | connection: 110 | user: 'tnt' 111 | password: 'tnt' 112 | connect_timeout: 10s 113 | request_timeout: 10s 114 | 115 | # List of priorities for the cluster instances. 116 | priorities: 117 | 'a3ef657e-eb9a-4730-b420-7ea78d52797d': 0 118 | 'bd64dd00-161e-4c99-8b3c-d3c4635e18d2': 10 119 | 'cc4cfb9c-11d8-4810-84d2-66cfbebb0f6e': -1 # exclude from the election process 120 | 121 | routers: 122 | - name: 'sandbox2-router1' 123 | uuid: '38dbe90b-9bca-4766-a98c-f02e56ddf986' 124 | addr: '127.0.0.1:7301' -------------------------------------------------------------------------------- /config/qumomf.daemon.min.conf.yml: -------------------------------------------------------------------------------- 1 | qumomf: 2 | port: ':8080' 3 | logging: 4 | level: 'debug' 5 | syslog_enabled: false 6 | file_enabled: true 7 | file_name: '/var/log/qumomf.log' 8 | file_max_size: 256 9 | file_max_backups: 3 10 | file_max_age: 5 11 | readonly: true 12 | cluster_discovery_time: '5s' 13 | cluster_recovery_time: '1s' 14 | shard_recovery_block_time: '30m' 15 | instance_recovery_block_time: '10m' 16 | 17 | elector: 'smart' 18 | reasonable_follower_lsn_lag: 500 19 | reasonable_follower_idle: '1m' 20 | 21 | hooks: 22 | shell: bash 23 | timeout: 5s 24 | timeout_async: 10m 25 | pre_failover: 26 | - "echo 'Will recover from {failureType} on {failureCluster}' >> /tmp/qumomf_recovery.log" 27 | post_successful_failover: 28 | - "echo 'Recovered from {failureType} on {failureCluster}. Set: {failureReplicaSetUUID}; Failed: {failedURI}; Successor: {successorURI}' >> /tmp/qumomf_recovery.log" 29 | post_unsuccessful_failover: 30 | - "echo 'Failed to recover from {failureType} on {failureCluster}. Set: {failureReplicaSetUUID}; Failed: {failedURI}' >> /tmp/qumomf_recovery.log" 31 | 32 | connection: 33 | user: 'qumomf' 34 | password: 'qumomf' 35 | connect_timeout: '500ms' 36 | request_timeout: '1s' 37 | 38 | clusters: ~ -------------------------------------------------------------------------------- /example/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.3' 2 | 3 | services: 4 | router_1: 5 | build: './router' 6 | container_name: qumomf_router.ddk 7 | networks: 8 | - qumomf 9 | ports: 10 | - '9301:3301' 11 | depends_on: 12 | - storage_1_m 13 | - storage_1_s 14 | - storage_2_m 15 | - storage_2_s_1 16 | - storage_2_s_2 17 | 18 | storage_1_m: 19 | build: './storage' 20 | container_name: qumomf_1_m.ddk 21 | networks: 22 | - qumomf 23 | ports: 24 | - '9303:3301' 25 | environment: 26 | - STORAGE_UUID=a94e7310-13f0-4690-b136-169599e87ba0 27 | 28 | storage_1_s: 29 | build: './storage' 30 | container_name: qumomf_1_s.ddk 31 | depends_on: 32 | - storage_1_m 33 | networks: 34 | - qumomf 35 | ports: 36 | - '9304:3301' 37 | environment: 38 | - STORAGE_UUID=bd1095d1-1e73-4ceb-8e2f-6ebdc7838cb1 39 | 40 | storage_2_m: 41 | build: './storage' 42 | container_name: qumomf_2_m.ddk 43 | networks: 44 | - qumomf 45 | ports: 46 | - '9305:3301' 47 | environment: 48 | - STORAGE_UUID=a3ef657e-eb9a-4730-b420-7ea78d52797d 49 | 50 | storage_2_s_1: 51 | build: './storage' 52 | container_name: qumomf_2_s_1.ddk 53 | networks: 54 | - qumomf 55 | ports: 56 | - '9306:3301' 57 | depends_on: 58 | - storage_2_m 59 | environment: 60 | - STORAGE_UUID=bd64dd00-161e-4c99-8b3c-d3c4635e18d2 61 | 62 | storage_2_s_2: 63 | build: './storage' 64 | container_name: qumomf_2_s_2.ddk 65 | networks: 66 | - qumomf 67 | ports: 68 | - '9307:3301' 69 | depends_on: 70 | - storage_2_m 71 | environment: 72 | # - FAKETIME=-1m 73 | - STORAGE_UUID=cc4cfb9c-11d8-4810-84d2-66cfbebb0f6e 74 | 75 | networks: 76 | qumomf: 77 | driver: bridge 78 | -------------------------------------------------------------------------------- /example/qumomf.yml: -------------------------------------------------------------------------------- 1 | qumomf: 2 | port: ':8080' 3 | logging: 4 | level: 'info' 5 | syslog_enabled: false 6 | file_enabled: true 7 | file_name: '/home/pavel/work/qumomf/src/github.com/shmel1k/qumomf/bin/qumomf.log' 8 | file_max_size: 256 9 | file_max_backups: 3 10 | file_max_age: 5 11 | readonly: true 12 | cluster_discovery_time: '5s' 13 | cluster_recovery_time: '1s' 14 | shard_recovery_block_time: '30m' 15 | instance_recovery_block_time: '10m' 16 | elector: 'smart' 17 | 18 | hooks: 19 | shell: bash 20 | timeout: 2s 21 | timeout_async: 1m 22 | pre_failover: 23 | - "echo 'Will recover from {failureType} on {failureCluster}' >> /tmp/qumomf_recovery.log" 24 | post_successful_failover: 25 | - "echo 'Recovered from {failureType} on {failureCluster}. Set: {failureReplicaSetUUID}; Failed: {failedURI}; Successor: {successorURI}' >> /tmp/qumomf_recovery.log" 26 | post_unsuccessful_failover: 27 | - "echo 'Failed to recover from {failureType} on {failureCluster}. Set: {failureReplicaSetUUID}; Failed: {failedURI}' >> /tmp/qumomf_recovery.log" 28 | storage: 29 | filename: 'qumomf.db' 30 | connect_timeout: '1s' 31 | query_timeout: '1s' 32 | 33 | connection: 34 | user: 'qumomf' 35 | password: 'qumomf' 36 | connect_timeout: '500ms' 37 | request_timeout: '1s' 38 | 39 | clusters: 40 | qumomf_sandbox: 41 | readonly: false 42 | 43 | override_uri_rules: 44 | 'qumomf_1_m.ddk:3301': '127.0.0.1:9303' 45 | 'qumomf_1_s.ddk:3301': '127.0.0.1:9304' 46 | 'qumomf_2_m.ddk:3301': '127.0.0.1:9305' 47 | 'qumomf_2_s_1.ddk:3301': '127.0.0.1:9306' 48 | 'qumomf_2_s_2.ddk:3301': '127.0.0.1:9307' 49 | 50 | priorities: 51 | 'bd64dd00-161e-4c99-8b3c-d3c4635e18d2': 10 52 | 'cc4cfb9c-11d8-4810-84d2-66cfbebb0f6e': 5 53 | 54 | routers: 55 | - name: 'router_1' 56 | addr: '127.0.0.1:9301' -------------------------------------------------------------------------------- /example/router/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM tarantool/tarantool:2.3.1 2 | 3 | COPY init_router.lua /etc/tarantool/instances.enabled/init_router.lua 4 | COPY router.lua /etc/tarantool/instances.enabled/qumomf/router/router.lua 5 | CMD ["tarantool", "/etc/tarantool/instances.enabled/init_router.lua"] 6 | -------------------------------------------------------------------------------- /example/router/init_router.lua: -------------------------------------------------------------------------------- 1 | vshard = require('vshard') 2 | 3 | local cfg = { 4 | memtx_memory = 100 * 1024 * 1024, 5 | bucket_count = 120, 6 | rebalancer_disbalance_threshold = 10, 7 | rebalancer_max_receiving = 1000, 8 | 9 | sharding = { 10 | ['7432f072-c00b-4498-b1a6-6d9547a8a150'] = { -- replicaset #1 11 | replicas = { 12 | ['a94e7310-13f0-4690-b136-169599e87ba0'] = { 13 | uri = 'qumomf:qumomf@qumomf_1_m.ddk:3301', 14 | name = 'qumomf_1_m', 15 | master = true 16 | }, 17 | ['bd1095d1-1e73-4ceb-8e2f-6ebdc7838cb1'] = { 18 | uri = 'qumomf:qumomf@qumomf_1_s.ddk:3301', 19 | name = 'qumomf_1_s' 20 | } 21 | }, 22 | }, -- replicaset #1 23 | ['5065fb5f-5f40-498e-af79-43887ba3d1ec'] = { -- replicaset #2 24 | replicas = { 25 | ['a3ef657e-eb9a-4730-b420-7ea78d52797d'] = { 26 | uri = 'qumomf:qumomf@qumomf_2_m.ddk:3301', 27 | name = 'qumomf_2_m', 28 | master = true 29 | }, 30 | ['bd64dd00-161e-4c99-8b3c-d3c4635e18d2'] = { 31 | uri = 'qumomf:qumomf@qumomf_2_s_1.ddk:3301', 32 | name = 'qumomf_2_s_1' 33 | }, 34 | ['cc4cfb9c-11d8-4810-84d2-66cfbebb0f6e'] = { 35 | uri = 'qumomf:qumomf@qumomf_2_s_2.ddk:3301', 36 | name = 'qumomf_2_s_2' 37 | } 38 | }, 39 | }, -- replicaset #2 40 | }, -- sharding 41 | } 42 | 43 | cfg.listen = 3301 44 | vshard.router.cfg(cfg) 45 | 46 | box.once("init", function() 47 | box.schema.user.create('qumomf', { password = 'qumomf', if_not_exists = true }) 48 | box.schema.user.grant('qumomf', 'read,write,create,execute', 'universe') 49 | end) 50 | 51 | vshard.router.bootstrap() 52 | vshard.router.discovery_wakeup() 53 | 54 | dofile('/etc/tarantool/instances.enabled/qumomf/router/router.lua') 55 | 56 | -------------------------------------------------------------------------------- /example/router/router.lua: -------------------------------------------------------------------------------- 1 | vshard = require('vshard') 2 | 3 | local DEFAULT_TIMEOUT = 1 4 | 5 | local OP_GET = 'qumomf_get' 6 | local OP_SET = 'qumomf_set' 7 | 8 | function qumomf_get(key) 9 | local bucket_id = vshard.router.bucket_id(key) 10 | local netbox, err = vshard.router.route(bucket_id) 11 | if err ~= nil then 12 | error(err) 13 | end 14 | 15 | local result, err = netbox:callbre(OP_GET, {key}, { 16 | timeout = DEFAULT_TIMEOUT, 17 | }) 18 | if err ~= nil then 19 | error(err) 20 | end 21 | return result 22 | end 23 | 24 | function qumomf_set(key, value) 25 | local bucket_id = vshard.router.bucket_id(key) 26 | local netbox, err = vshard.router.route(bucket_id) 27 | if err ~= nil then 28 | error(err) 29 | end 30 | 31 | local result, err = netbox:callrw(OP_SET, { key, value }, { 32 | timeout = DEFAULT_TIMEOUT, 33 | }) 34 | if err ~= nil then 35 | error(err) 36 | end 37 | 38 | return result 39 | end -------------------------------------------------------------------------------- /example/storage/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM tarantool/tarantool:2.3.1 2 | 3 | COPY --from=trajano/alpine-libfaketime /faketime.so /lib/faketime.so 4 | ENV LD_PRELOAD=/lib/faketime.so 5 | 6 | COPY init_storage.lua /etc/tarantool/instances.enabled/init_storage.lua 7 | COPY storage.lua /etc/tarantool/instances.enabled/qumomf/storage/storage.lua 8 | CMD ["tarantool", "/etc/tarantool/instances.enabled/init_storage.lua"] 9 | -------------------------------------------------------------------------------- /example/storage/init_storage.lua: -------------------------------------------------------------------------------- 1 | os = require('os') 2 | vshard = require('vshard') 3 | 4 | local IDX_KEY = 1 5 | local IDX_VALUE = 2 6 | 7 | local cfg = { 8 | memtx_memory = 100 * 1024 * 1024, 9 | bucket_count = 120, 10 | rebalancer_disbalance_threshold = 10, 11 | rebalancer_max_receiving = 1000, 12 | 13 | sharding = { 14 | ['7432f072-c00b-4498-b1a6-6d9547a8a150'] = { -- replicaset #1 15 | replicas = { 16 | ['a94e7310-13f0-4690-b136-169599e87ba0'] = { 17 | uri = 'qumomf:qumomf@qumomf_1_m.ddk:3301', 18 | name = 'qumomf_1_m', 19 | master = true 20 | }, 21 | ['bd1095d1-1e73-4ceb-8e2f-6ebdc7838cb1'] = { 22 | uri = 'qumomf:qumomf@qumomf_1_s.ddk:3301', 23 | name = 'qumomf_1_s' 24 | } 25 | }, 26 | }, -- replicaset #1 27 | ['5065fb5f-5f40-498e-af79-43887ba3d1ec'] = { -- replicaset #2 28 | replicas = { 29 | ['a3ef657e-eb9a-4730-b420-7ea78d52797d'] = { 30 | uri = 'qumomf:qumomf@qumomf_2_m.ddk:3301', 31 | name = 'qumomf_2_m', 32 | master = true 33 | }, 34 | ['bd64dd00-161e-4c99-8b3c-d3c4635e18d2'] = { 35 | uri = 'qumomf:qumomf@qumomf_2_s_1.ddk:3301', 36 | name = 'qumomf_2_s_1' 37 | }, 38 | ['cc4cfb9c-11d8-4810-84d2-66cfbebb0f6e'] = { 39 | uri = 'qumomf:qumomf@qumomf_2_s_2.ddk:3301', 40 | name = 'qumomf_2_s_2' 41 | } 42 | }, 43 | }, -- replicaset #2 44 | }, -- sharding 45 | } 46 | 47 | local UUID = os.getenv("STORAGE_UUID") 48 | 49 | cfg.listen = 3301 50 | vshard.storage.cfg(cfg, UUID) 51 | 52 | box.once("init", function() 53 | if UUID == 'a94e7310-13f0-4690-b136-169599e87ba0' then 54 | vshard.storage.bucket_force_create(0, 60, {}) 55 | end 56 | 57 | if UUID == 'a3ef657e-eb9a-4730-b420-7ea78d52797d' then 58 | vshard.storage.bucket_force_create(61, 60, {}) 59 | end 60 | 61 | box.schema.user.create('qumomf', { password = 'qumomf', if_not_exists = true }) 62 | box.schema.user.grant('qumomf', 'read,write,create,execute', 'universe') 63 | 64 | local space = box.schema.create_space("qumomf", { 65 | if_not_exists = true, 66 | }) 67 | 68 | space:create_index('key', { 69 | type = 'TREE', 70 | if_not_exists = true, 71 | parts = { 72 | IDX_KEY, 73 | 'string', 74 | }, 75 | unique = true, 76 | }) 77 | end) 78 | 79 | dofile('/etc/tarantool/instances.enabled/qumomf/storage/storage.lua') -------------------------------------------------------------------------------- /example/storage/storage.lua: -------------------------------------------------------------------------------- 1 | require('strict').on() 2 | os = require('os') 3 | 4 | local IDX_KEY = 1 5 | local IDX_VALUE = 2 6 | 7 | function qumomf_set(key, value) 8 | box.space.qumomf:insert({ key, value, 0 }) 9 | return {} 10 | end 11 | 12 | function qumomf_get(key) 13 | local tuple = box.space.qumomf:select(key) 14 | if #tuple == 0 then 15 | return nil 16 | end 17 | tuple = tuple[1] 18 | 19 | return tuple[IDX_VALUE] 20 | end -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/shmel1k/qumomf 2 | 3 | go 1.13 4 | 5 | require ( 6 | github.com/google/uuid v1.1.2 // indirect 7 | github.com/gorilla/mux v1.8.0 8 | github.com/mattn/go-sqlite3 v1.14.5 9 | github.com/philhofer/fwd v1.0.0 // indirect 10 | github.com/prometheus/client_golang v1.5.1 11 | github.com/rs/zerolog v1.18.0 12 | github.com/satori/go.uuid v1.2.0 // indirect 13 | github.com/stretchr/testify v1.4.0 14 | github.com/tarantool/go-tarantool v0.0.0-20191229181800-f4ece3508d87 // indirect 15 | github.com/tinylib/msgp v1.1.1 // indirect 16 | github.com/viciious/go-tarantool v0.0.0-20190828171136-ede812c03707 17 | golang.org/x/sys v0.0.0-20200122134326-e047566fdf82 18 | google.golang.org/appengine v1.6.5 // indirect 19 | google.golang.org/genproto v0.0.0-20210113195801-ae06605f4595 // indirect 20 | gopkg.in/natefinch/lumberjack.v2 v2.0.0 21 | gopkg.in/vmihailenco/msgpack.v2 v2.9.1 // indirect 22 | gopkg.in/yaml.v2 v2.2.8 23 | ) 24 | -------------------------------------------------------------------------------- /internal/api/api.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | 7 | "github.com/shmel1k/qumomf/internal/storage" 8 | "github.com/shmel1k/qumomf/internal/storage/sqlite" 9 | "github.com/shmel1k/qumomf/internal/vshard" 10 | "github.com/shmel1k/qumomf/internal/vshard/orchestrator" 11 | ) 12 | 13 | var ( 14 | ErrClusterNotFound = errors.New("cluster not found") 15 | ErrReplicaSetNotFound = errors.New("replica set not found") 16 | ErrInstanceNotFound = errors.New("instance not found") 17 | ) 18 | 19 | type Service interface { 20 | ClustersList(context.Context) ([]ClusterInfo, error) 21 | ClusterSnapshot(context.Context, string) (vshard.Snapshot, error) 22 | ReplicaSet(context.Context, string, vshard.ReplicaSetUUID) (vshard.ReplicaSet, error) 23 | Instance(context.Context, string, vshard.ReplicaSetUUID, vshard.InstanceUUID) (vshard.Instance, error) 24 | Recoveries(context.Context, string, vshard.ReplicaSetUUID) ([]orchestrator.Recovery, error) 25 | Alerts(context.Context) (AlertsResponse, error) 26 | ClusterAlerts(context.Context, string) (AlertsResponse, error) 27 | } 28 | 29 | func NewService(db storage.Storage) Service { 30 | return &service{ 31 | db: db, 32 | } 33 | } 34 | 35 | type service struct { 36 | db storage.Storage 37 | } 38 | 39 | func (s *service) ClustersList(ctx context.Context) ([]ClusterInfo, error) { 40 | clustersList, err := s.db.GetClusters(ctx) 41 | if err != nil { 42 | return nil, err 43 | } 44 | 45 | resp := make([]ClusterInfo, 0, len(clustersList)) 46 | for _, cluster := range clustersList { 47 | resp = append(resp, ClusterInfo{ 48 | Name: cluster.Name, 49 | ShardsCount: len(cluster.Snapshot.ReplicaSets), 50 | RoutersCount: len(cluster.Snapshot.Routers), 51 | DiscoveredAt: cluster.Snapshot.Created, 52 | HealthLevel: cluster.Snapshot.ClusterHealthLevel(), 53 | }) 54 | } 55 | 56 | return resp, nil 57 | } 58 | 59 | func (s *service) ClusterSnapshot(ctx context.Context, clusterName string) (vshard.Snapshot, error) { 60 | snap, err := s.db.GetClusterSnapshot(ctx, clusterName) 61 | if err == sqlite.ErrEmptyResult { 62 | return vshard.Snapshot{}, ErrClusterNotFound 63 | } 64 | 65 | return snap, err 66 | } 67 | 68 | func (s *service) ReplicaSet(ctx context.Context, clusterName string, replicaSetUUID vshard.ReplicaSetUUID) (vshard.ReplicaSet, error) { 69 | snap, err := s.db.GetClusterSnapshot(ctx, clusterName) 70 | if err != nil { 71 | if err == sqlite.ErrEmptyResult { 72 | return vshard.ReplicaSet{}, ErrClusterNotFound 73 | } 74 | return vshard.ReplicaSet{}, err 75 | } 76 | 77 | replicaSet, err := snap.ReplicaSet(replicaSetUUID) 78 | if err != nil { 79 | if err == vshard.ErrReplicaSetNotFound { 80 | return vshard.ReplicaSet{}, ErrReplicaSetNotFound 81 | } 82 | 83 | return vshard.ReplicaSet{}, err 84 | } 85 | 86 | return replicaSet, nil 87 | } 88 | 89 | func (s *service) Instance(ctx context.Context, clusterName string, replicaSetUUID vshard.ReplicaSetUUID, instanceUUID vshard.InstanceUUID) (vshard.Instance, error) { 90 | replicaSet, err := s.ReplicaSet(ctx, clusterName, replicaSetUUID) 91 | if err != nil { 92 | return vshard.Instance{}, err 93 | } 94 | 95 | for i := range replicaSet.Instances { 96 | if replicaSet.Instances[i].UUID == instanceUUID { 97 | return replicaSet.Instances[i], nil 98 | } 99 | } 100 | 101 | return vshard.Instance{}, ErrInstanceNotFound 102 | } 103 | 104 | func (s *service) Recoveries(ctx context.Context, clusterName string, replicaSetUUID vshard.ReplicaSetUUID) ([]orchestrator.Recovery, error) { 105 | recoveries, err := s.db.GetRecoveries(ctx, clusterName) 106 | if err != nil { 107 | return nil, err 108 | } 109 | 110 | resp := make([]orchestrator.Recovery, 0, len(recoveries)) 111 | for i := range recoveries { 112 | if recoveries[i].SetUUID == replicaSetUUID { 113 | resp = append(resp, recoveries[i]) 114 | } 115 | } 116 | 117 | return resp, nil 118 | } 119 | 120 | func (s *service) Alerts(ctx context.Context) (AlertsResponse, error) { 121 | clusters, err := s.db.GetClusters(ctx) 122 | if err != nil { 123 | return AlertsResponse{}, err 124 | } 125 | 126 | instanceAlertsList := make([]InstanceAlerts, 0) 127 | routerAlertsList := make([]RoutersAlerts, 0) 128 | for i := range clusters { 129 | routerAlertsList = append(routerAlertsList, routersAlerts(clusters[i].Snapshot.Routers)...) 130 | instanceAlertsList = append(instanceAlertsList, instanceAlerts(clusters[i].Name, clusters[i].Snapshot.ReplicaSets)...) 131 | } 132 | 133 | return AlertsResponse{ 134 | InstancesAlerts: instanceAlertsList, 135 | RoutersAlerts: routerAlertsList, 136 | }, nil 137 | } 138 | 139 | func (s *service) ClusterAlerts(ctx context.Context, clusterName string) (AlertsResponse, error) { 140 | cluster, err := s.ClusterSnapshot(ctx, clusterName) 141 | if err != nil { 142 | return AlertsResponse{}, err 143 | } 144 | 145 | return AlertsResponse{ 146 | InstancesAlerts: instanceAlerts(clusterName, cluster.ReplicaSets), 147 | RoutersAlerts: routersAlerts(cluster.Routers), 148 | }, nil 149 | } 150 | 151 | func routersAlerts(routers []vshard.Router) []RoutersAlerts { 152 | result := make([]RoutersAlerts, 0) 153 | for i := range routers { 154 | if len(routers[i].Info.Alerts) > 0 { 155 | result = append(result, RoutersAlerts{ 156 | URI: routers[i].URI, 157 | Alerts: routers[i].Info.Alerts, 158 | }) 159 | } 160 | } 161 | 162 | return result 163 | } 164 | 165 | func instanceAlerts(clusterName string, replicaSets []vshard.ReplicaSet) []InstanceAlerts { 166 | resp := make([]InstanceAlerts, 0) 167 | 168 | for i := range replicaSets { 169 | instances := replicaSets[i].Instances 170 | for j := range instances { 171 | alerts := instances[j].StorageInfo.Alerts 172 | if len(alerts) != 0 { 173 | resp = append(resp, InstanceAlerts{ 174 | ClusterName: clusterName, 175 | ShardUUID: replicaSets[i].UUID, 176 | InstanceURI: instances[j].URI, 177 | Alerts: alerts, 178 | }) 179 | } 180 | } 181 | } 182 | 183 | return resp 184 | } 185 | -------------------------------------------------------------------------------- /internal/api/data.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import "github.com/shmel1k/qumomf/internal/vshard" 4 | 5 | type ClusterInfo struct { 6 | Name string `json:"name"` 7 | ShardsCount int `json:"shards_count"` 8 | RoutersCount int `json:"routers_count"` 9 | DiscoveredAt int64 `json:"discovered_at"` 10 | HealthLevel vshard.HealthLevel `json:"health_level"` 11 | } 12 | 13 | type AlertsResponse struct { 14 | InstancesAlerts []InstanceAlerts `json:"instances_alerts"` 15 | RoutersAlerts []RoutersAlerts `json:"routers_alerts"` 16 | } 17 | 18 | type InstanceAlerts struct { 19 | ClusterName string `json:"cluster_name"` 20 | ShardUUID vshard.ReplicaSetUUID `json:"shard_uuid"` 21 | InstanceURI string `json:"instance_uri"` 22 | Alerts []vshard.Alert `json:"alerts"` 23 | } 24 | 25 | type RoutersAlerts struct { 26 | URI string `json:"uri"` 27 | Alerts []vshard.Alert `json:"alerts"` 28 | } 29 | -------------------------------------------------------------------------------- /internal/config/config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "io/ioutil" 5 | "os" 6 | "time" 7 | 8 | "gopkg.in/yaml.v2" 9 | ) 10 | 11 | const ( 12 | defaultLogLevel = "debug" 13 | defaultSysLogEnabled = false 14 | defaultFileLoggingEnabled = false 15 | defaultLogFilename = "/var/log/qumomf.log" 16 | defaultLogFileMaxSize = 256 17 | defaultLogFileMaxBackups = 3 18 | defaultLogFileMaxAge = 5 19 | defaultReadOnly = true 20 | defaultUser = "guest" 21 | defaultPassword = "guest" 22 | defaultConnectTimeout = 500 * time.Millisecond 23 | defaultRequestTimeout = 1 * time.Second 24 | defaultClusterDiscoveryTime = 5 * time.Second 25 | defaultClusterRecoveryTime = 1 * time.Second 26 | defaultShardRecoveryBlockTime = 30 * time.Minute 27 | defaultInstanceRecoveryBlockTime = 10 * time.Minute 28 | defaultElectorType = "smart" 29 | defaultShellCommand = "bash" 30 | defaultHookTimeout = 5 * time.Second 31 | defaultAsyncHookTimeout = 10 * time.Minute 32 | defaultMaxFollowerLSNLag = 1000 33 | defaultMaxFollowerIdle = 5 * time.Minute 34 | defaultStorageFileName = "qumomf.db" 35 | defaultStorageConnectTimeout = time.Second 36 | defaultStorageQueryTimeout = time.Second 37 | ) 38 | 39 | type Config struct { 40 | // Qumomf is a set of global options determines qumomf's behavior. 41 | Qumomf struct { 42 | Port string `yaml:"port"` 43 | Logging Logging `yaml:"logging"` 44 | ReadOnly bool `yaml:"readonly"` 45 | ClusterDiscoveryTime time.Duration `yaml:"cluster_discovery_time"` 46 | ClusterRecoveryTime time.Duration `yaml:"cluster_recovery_time"` 47 | ShardRecoveryBlockTime time.Duration `yaml:"shard_recovery_block_time"` 48 | InstanceRecoveryBlockTime time.Duration `yaml:"instance_recovery_block_time"` 49 | ElectionMode string `yaml:"elector"` 50 | ReasonableFollowerLSNLag int64 `yaml:"reasonable_follower_lsn_lag"` 51 | ReasonableFollowerIdle time.Duration `yaml:"reasonable_follower_idle"` 52 | Hooks struct { 53 | Shell string `yaml:"shell"` 54 | PreFailover []string `yaml:"pre_failover"` 55 | PostSuccessfulFailover []string `yaml:"post_successful_failover"` 56 | PostUnsuccessfulFailover []string `yaml:"post_unsuccessful_failover"` 57 | Timeout time.Duration `yaml:"timeout"` 58 | TimeoutAsync time.Duration `yaml:"timeout_async"` 59 | } `yaml:"hooks"` 60 | Storage struct { 61 | Filename string `yaml:"filename"` 62 | QueryTimeout time.Duration `yaml:"query_timeout"` 63 | ConnectTimeout time.Duration `yaml:"connect_timeout"` 64 | } `yaml:"storage"` 65 | } `yaml:"qumomf"` 66 | 67 | // Connection contains the default connection options for each instance in clusters. 68 | // This options might be overridden by cluster-level options. 69 | Connection *ConnectConfig `yaml:"connection,omitempty"` 70 | Clusters map[string]ClusterConfig `yaml:"clusters"` 71 | } 72 | 73 | type Logging struct { 74 | Level string `yaml:"level"` 75 | SysLogEnabled bool `yaml:"syslog_enabled"` 76 | FileLoggingEnabled bool `yaml:"file_enabled"` 77 | Filename string `yaml:"file_name"` 78 | MaxSize int `yaml:"file_max_size"` // megabytes 79 | MaxBackups int `yaml:"file_max_backups"` // files 80 | MaxAge int `yaml:"file_max_age"` // days 81 | } 82 | 83 | type ConnectConfig struct { 84 | User *string `yaml:"user"` 85 | Password *string `yaml:"password"` 86 | ConnectTimeout *time.Duration `yaml:"connect_timeout"` 87 | RequestTimeout *time.Duration `yaml:"request_timeout"` 88 | } 89 | 90 | type ClusterConfig struct { 91 | // Connection contains connection options which qumomf should 92 | // use to connect to routers and instances in the cluster. 93 | Connection *ConnectConfig `yaml:"connection,omitempty"` 94 | 95 | // ReadOnly indicates whether qumomf can run a failover 96 | // or should just observe the cluster topology. 97 | ReadOnly *bool `yaml:"readonly,omitempty"` 98 | 99 | // ElectionMode is a master election mode of the given cluster. 100 | ElectionMode *string `yaml:"elector"` 101 | 102 | // OverrideURIRules contains list of URI used in tarantool replication and 103 | // their mappings which will be used in connection pool by qumomf. 104 | // 105 | // Use it if qumomf should not connect to the instances by URI 106 | // obtained from the replication configuration during the auto discovery. 107 | OverrideURIRules map[string]string `yaml:"override_uri_rules,omitempty"` 108 | 109 | // Priorities contains list of instances UUID and their priorities. 110 | Priorities map[string]int `yaml:"priorities,omitempty"` 111 | 112 | // Routers contains list of all cluster routers. 113 | // 114 | // All cluster nodes must share a common topology. 115 | // An administrator must ensure that the configurations are identical. 116 | // The administrator must provide list of all routers so qumomf will be able 117 | // to update their configuration when failover is running. 118 | // Otherwise failover might break topology. 119 | Routers []RouterConfig `yaml:"routers"` 120 | } 121 | 122 | type RouterConfig struct { 123 | Name string `yaml:"name"` 124 | Addr string `yaml:"addr"` 125 | } 126 | 127 | func Setup(path string) (*Config, error) { 128 | file, err := os.Open(path) 129 | if err != nil { 130 | return nil, err 131 | } 132 | defer func() { 133 | _ = file.Close() 134 | }() 135 | 136 | data, err := ioutil.ReadAll(file) 137 | if err != nil { 138 | return nil, err 139 | } 140 | 141 | var cfg Config 142 | cfg.withDefaults() 143 | err = yaml.Unmarshal(data, &cfg) 144 | if err != nil { 145 | return nil, err 146 | } 147 | 148 | cfg.overrideEmptyByGlobalConfigs() 149 | 150 | err = validate(&cfg) 151 | if err != nil { 152 | return nil, err 153 | } 154 | 155 | return &cfg, nil 156 | } 157 | 158 | func (c *Config) withDefaults() { 159 | if c == nil { 160 | return 161 | } 162 | 163 | base := &c.Qumomf 164 | base.ReadOnly = defaultReadOnly 165 | 166 | base.Logging.Level = defaultLogLevel 167 | base.Logging.SysLogEnabled = defaultSysLogEnabled 168 | base.Logging.FileLoggingEnabled = defaultFileLoggingEnabled 169 | base.Logging.Filename = defaultLogFilename 170 | base.Logging.MaxSize = defaultLogFileMaxSize 171 | base.Logging.MaxBackups = defaultLogFileMaxBackups 172 | base.Logging.MaxAge = defaultLogFileMaxAge 173 | 174 | base.ClusterDiscoveryTime = defaultClusterDiscoveryTime 175 | base.ClusterRecoveryTime = defaultClusterRecoveryTime 176 | base.ShardRecoveryBlockTime = defaultShardRecoveryBlockTime 177 | base.InstanceRecoveryBlockTime = defaultInstanceRecoveryBlockTime 178 | base.ElectionMode = defaultElectorType 179 | base.ReasonableFollowerLSNLag = defaultMaxFollowerLSNLag 180 | base.ReasonableFollowerIdle = defaultMaxFollowerIdle 181 | base.Hooks.Shell = defaultShellCommand 182 | base.Hooks.Timeout = defaultHookTimeout 183 | base.Hooks.TimeoutAsync = defaultAsyncHookTimeout 184 | 185 | base.Storage.Filename = defaultStorageFileName 186 | base.Storage.ConnectTimeout = defaultStorageConnectTimeout 187 | base.Storage.QueryTimeout = defaultStorageQueryTimeout 188 | 189 | connection := &ConnectConfig{} 190 | connection.User = newString(defaultUser) 191 | connection.Password = newString(defaultPassword) 192 | connection.ConnectTimeout = newDuration(defaultConnectTimeout) 193 | connection.RequestTimeout = newDuration(defaultRequestTimeout) 194 | c.Connection = connection 195 | } 196 | 197 | func (c *Config) overrideEmptyByGlobalConfigs() { 198 | for clusterUUID, clusterCfg := range c.Clusters { 199 | if clusterCfg.ReadOnly == nil { 200 | clusterCfg.ReadOnly = newBool(c.Qumomf.ReadOnly) 201 | } 202 | 203 | if clusterCfg.ElectionMode == nil { 204 | clusterCfg.ElectionMode = newString(c.Qumomf.ElectionMode) 205 | } 206 | 207 | if clusterCfg.Connection == nil { 208 | clusterCfg.Connection = c.Connection 209 | } else { 210 | opts := clusterCfg.Connection 211 | if opts.ConnectTimeout == nil { 212 | opts.ConnectTimeout = c.Connection.ConnectTimeout 213 | } 214 | if opts.RequestTimeout == nil { 215 | opts.RequestTimeout = c.Connection.RequestTimeout 216 | } 217 | if opts.User == nil { 218 | opts.User = c.Connection.User 219 | } 220 | if opts.Password == nil { 221 | opts.Password = c.Connection.Password 222 | } 223 | } 224 | 225 | c.Clusters[clusterUUID] = clusterCfg 226 | } 227 | } 228 | 229 | func validate(c *Config) error { 230 | err := validateElector(&c.Qumomf.ElectionMode) 231 | if err != nil { 232 | return err 233 | } 234 | 235 | for _, clusterCfg := range c.Clusters { 236 | err = validateElector(clusterCfg.ElectionMode) 237 | if err != nil { 238 | return err 239 | } 240 | } 241 | 242 | return nil 243 | } 244 | 245 | func newBool(v bool) *bool { 246 | return &v 247 | } 248 | 249 | func newDuration(v time.Duration) *time.Duration { 250 | return &v 251 | } 252 | 253 | func newString(v string) *string { 254 | return &v 255 | } 256 | -------------------------------------------------------------------------------- /internal/config/config_test.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "path/filepath" 5 | "testing" 6 | "time" 7 | 8 | "github.com/stretchr/testify/assert" 9 | "github.com/stretchr/testify/require" 10 | ) 11 | 12 | func TestSetup_InvalidPath(t *testing.T) { 13 | cfg, err := Setup("invalid_path") 14 | assert.NotNil(t, err) 15 | assert.Nil(t, cfg) 16 | } 17 | 18 | func TestSetup_ValidPath(t *testing.T) { 19 | testConfigPath, err := filepath.Abs("testdata/qumomf-full.conf.yml") 20 | require.Nil(t, err) 21 | 22 | cfg, err := Setup(testConfigPath) 23 | require.Nil(t, err) 24 | require.NotNil(t, cfg) 25 | 26 | assert.Equal(t, ":8080", cfg.Qumomf.Port) 27 | 28 | loggingCfg := cfg.Qumomf.Logging 29 | assert.Equal(t, "debug", loggingCfg.Level) 30 | assert.True(t, loggingCfg.SysLogEnabled) 31 | assert.True(t, loggingCfg.FileLoggingEnabled) 32 | assert.Equal(t, "/var/log/qumomf.log", loggingCfg.Filename) 33 | assert.Equal(t, 256, loggingCfg.MaxSize) 34 | assert.Equal(t, 3, loggingCfg.MaxBackups) 35 | assert.Equal(t, 5, loggingCfg.MaxAge) 36 | 37 | assert.True(t, cfg.Qumomf.ReadOnly) 38 | assert.Equal(t, 60*time.Second, cfg.Qumomf.ClusterDiscoveryTime) 39 | assert.Equal(t, 5*time.Second, cfg.Qumomf.ClusterRecoveryTime) 40 | assert.Equal(t, 30*time.Minute, cfg.Qumomf.ShardRecoveryBlockTime) 41 | assert.Equal(t, 10*time.Minute, cfg.Qumomf.InstanceRecoveryBlockTime) 42 | assert.Equal(t, int64(500), cfg.Qumomf.ReasonableFollowerLSNLag) 43 | assert.Equal(t, 1*time.Minute, cfg.Qumomf.ReasonableFollowerIdle) 44 | 45 | hooks := cfg.Qumomf.Hooks 46 | assert.Equal(t, "bash", hooks.Shell) 47 | assert.Equal(t, 5*time.Second, hooks.Timeout) 48 | assert.Equal(t, 10*time.Minute, hooks.TimeoutAsync) 49 | assert.Equal(t, []string{"echo 'Will recover from {failureType} on {failureCluster}' >> /tmp/qumomf_recovery.log"}, hooks.PreFailover) 50 | assert.Equal(t, []string{"echo 'Recovered from {failureType} on {failureCluster}. Set: {failureReplicaSetUUID}; Failed: {failedURI}; Successor: {successorURI}' >> /tmp/qumomf_recovery.log"}, hooks.PostSuccessfulFailover) 51 | assert.Equal(t, []string{"echo 'Failed to recover from {failureType} on {failureCluster}. Set: {failureReplicaSetUUID}; Failed: {failedURI}' >> /tmp/qumomf_recovery.log"}, hooks.PostUnsuccessfulFailover) 52 | 53 | storage := cfg.Qumomf.Storage 54 | assert.Equal(t, "sqlite.db", storage.Filename) 55 | assert.Equal(t, time.Second, storage.QueryTimeout) 56 | assert.Equal(t, time.Second, storage.ConnectTimeout) 57 | 58 | assert.Equal(t, 500*time.Millisecond, *cfg.Connection.ConnectTimeout) 59 | assert.Equal(t, 1*time.Second, *cfg.Connection.RequestTimeout) 60 | 61 | connOpts := cfg.Connection 62 | require.NotNil(t, connOpts) 63 | assert.Equal(t, "qumomf", *connOpts.User) 64 | assert.Equal(t, "qumomf", *connOpts.Password) 65 | assert.Equal(t, 500*time.Millisecond, *connOpts.ConnectTimeout) 66 | assert.Equal(t, 1*time.Second, *connOpts.RequestTimeout) 67 | 68 | expected := map[string]ClusterConfig{ 69 | "qumomf_sandbox_1": { 70 | Connection: &ConnectConfig{ 71 | User: newString("qumomf"), 72 | Password: newString("qumomf"), 73 | ConnectTimeout: newDuration(500 * time.Millisecond), 74 | RequestTimeout: newDuration(1 * time.Second), 75 | }, 76 | ReadOnly: newBool(false), 77 | ElectionMode: newString("smart"), 78 | OverrideURIRules: map[string]string{ 79 | "qumomf_1_m.ddk:3301": "127.0.0.1:9303", 80 | }, 81 | Routers: []RouterConfig{ 82 | { 83 | Name: "sandbox1-router1", 84 | Addr: "127.0.0.1:9301", 85 | }, 86 | { 87 | Name: "sandbox1-router2", 88 | Addr: "127.0.0.1:9302", 89 | }, 90 | }, 91 | }, 92 | "qumomf_sandbox_2": { 93 | Connection: &ConnectConfig{ 94 | User: newString("tnt"), 95 | Password: newString("tnt"), 96 | ConnectTimeout: newDuration(10 * time.Second), 97 | RequestTimeout: newDuration(10 * time.Second), 98 | }, 99 | ReadOnly: newBool(true), 100 | ElectionMode: newString("idle"), 101 | Priorities: map[string]int{ 102 | "bd64dd00-161e-4c99-8b3c-d3c4635e18d2": 10, 103 | "cc4cfb9c-11d8-4810-84d2-66cfbebb0f6e": 5, 104 | "a3ef657e-eb9a-4730-b420-7ea78d52797d": -1, 105 | }, 106 | Routers: []RouterConfig{ 107 | { 108 | Name: "sandbox2-router1", 109 | Addr: "127.0.0.1:7301", 110 | }, 111 | }, 112 | }, 113 | } 114 | 115 | assert.Equal(t, expected, cfg.Clusters) 116 | } 117 | 118 | func TestSetup_InvalidElectorOption(t *testing.T) { 119 | testConfigPath, err := filepath.Abs("testdata/bad-elector.conf.yml") 120 | require.Nil(t, err) 121 | 122 | cfg, err := Setup(testConfigPath) 123 | require.NotNil(t, err) 124 | assert.Nil(t, cfg) 125 | } 126 | -------------------------------------------------------------------------------- /internal/config/testdata/bad-elector.conf.yml: -------------------------------------------------------------------------------- 1 | qumomf: 2 | elector: 'smart' 3 | 4 | clusters: 5 | qumomf_sandbox_1: 6 | elector: 'unknown' 7 | 8 | routers: 9 | - name: 'sandbox1-router1' 10 | addr: '127.0.0.1:9301' 11 | uuid: 'a94e7310-13f0-4690-b136-169599e87ba0' -------------------------------------------------------------------------------- /internal/config/testdata/qumomf-full.conf.yml: -------------------------------------------------------------------------------- 1 | qumomf: 2 | port: ':8080' 3 | logging: 4 | level: 'debug' 5 | syslog_enabled: true 6 | file_enabled: true 7 | file_name: '/var/log/qumomf.log' 8 | file_max_size: 256 9 | file_max_backups: 3 10 | file_max_age: 5 11 | readonly: true 12 | cluster_discovery_time: '60s' 13 | cluster_recovery_time: '5s' 14 | shard_recovery_block_time: '30m' 15 | instance_recovery_block_time: '10m' 16 | 17 | elector: 'smart' 18 | reasonable_follower_lsn_lag: 500 19 | reasonable_follower_idle: '1m' 20 | 21 | hooks: 22 | shell: bash 23 | timeout: 5s 24 | timeout_async: 10m 25 | pre_failover: 26 | - "echo 'Will recover from {failureType} on {failureCluster}' >> /tmp/qumomf_recovery.log" 27 | post_successful_failover: 28 | - "echo 'Recovered from {failureType} on {failureCluster}. Set: {failureReplicaSetUUID}; Failed: {failedURI}; Successor: {successorURI}' >> /tmp/qumomf_recovery.log" 29 | post_unsuccessful_failover: 30 | - "echo 'Failed to recover from {failureType} on {failureCluster}. Set: {failureReplicaSetUUID}; Failed: {failedURI}' >> /tmp/qumomf_recovery.log" 31 | storage: 32 | filename: 'sqlite.db' 33 | connect_timeout: '1s' 34 | query_timeout: '1s' 35 | 36 | connection: 37 | user: 'qumomf' 38 | password: 'qumomf' 39 | connect_timeout: '500ms' 40 | request_timeout: '1s' 41 | 42 | clusters: 43 | qumomf_sandbox_1: 44 | readonly: false 45 | 46 | override_uri_rules: 47 | 'qumomf_1_m.ddk:3301': '127.0.0.1:9303' 48 | 49 | routers: 50 | - name: 'sandbox1-router1' 51 | addr: '127.0.0.1:9301' 52 | uuid: 'a94e7310-13f0-4690-b136-169599e87ba0' 53 | - name: 'sandbox1-router2' 54 | addr: '127.0.0.1:9302' 55 | uuid: 'a3ef657e-eb9a-4730-b420-7ea78d52797d' 56 | 57 | qumomf_sandbox_2: 58 | elector: 'idle' 59 | 60 | connection: 61 | user: 'tnt' 62 | password: 'tnt' 63 | connect_timeout: 10s 64 | request_timeout: 10s 65 | 66 | priorities: 67 | 'bd64dd00-161e-4c99-8b3c-d3c4635e18d2': 10 68 | 'cc4cfb9c-11d8-4810-84d2-66cfbebb0f6e': 5 69 | 'a3ef657e-eb9a-4730-b420-7ea78d52797d': -1 70 | 71 | routers: 72 | - name: 'sandbox2-router1' 73 | uuid: '38dbe90b-9bca-4766-a98c-f02e56ddf986' 74 | addr: '127.0.0.1:7301' -------------------------------------------------------------------------------- /internal/config/validator.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import "fmt" 4 | 5 | func validateElector(v *string) error { 6 | if v == nil { 7 | return fmt.Errorf("option 'elector' must not be empty") 8 | } 9 | 10 | if *v != "idle" && *v != "smart" { 11 | return fmt.Errorf("option 'elector' has a wrong value:: %s", *v) 12 | } 13 | 14 | return nil 15 | } 16 | -------------------------------------------------------------------------------- /internal/coordinator/coordinator.go: -------------------------------------------------------------------------------- 1 | package coordinator 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | 7 | "github.com/shmel1k/qumomf/internal/config" 8 | "github.com/shmel1k/qumomf/internal/quorum" 9 | "github.com/shmel1k/qumomf/internal/storage" 10 | "github.com/shmel1k/qumomf/internal/vshard" 11 | "github.com/shmel1k/qumomf/internal/vshard/orchestrator" 12 | 13 | "github.com/rs/zerolog" 14 | ) 15 | 16 | var ( 17 | ErrClusterAlreadyExist = errors.New("cluster with such name already registered") 18 | ) 19 | 20 | type shutdownTask func() 21 | 22 | type Coordinator struct { 23 | logger zerolog.Logger 24 | 25 | // clusters contains registered Tarantool clusters 26 | // which Qumomf observes. 27 | clusters map[string]*vshard.Cluster 28 | 29 | // shutdownQueue contains all shutdown tasks to be 30 | // executed when coordinator is going to exit. 31 | shutdownQueue []shutdownTask 32 | 33 | db storage.Storage 34 | } 35 | 36 | func New(logger zerolog.Logger, db storage.Storage) *Coordinator { 37 | return &Coordinator{ 38 | logger: logger, 39 | clusters: make(map[string]*vshard.Cluster), 40 | db: db, 41 | } 42 | } 43 | 44 | func (c *Coordinator) RegisterCluster(name string, cfg config.ClusterConfig, globalCfg *config.Config) error { 45 | if _, exist := c.clusters[name]; exist { 46 | return ErrClusterAlreadyExist 47 | } 48 | 49 | clusterLogger := c.logger.With().Str("cluster", name).Logger() 50 | 51 | cluster := vshard.NewCluster(name, cfg) 52 | cluster.SetLogger(clusterLogger) 53 | cluster.SetOnClusterDiscovered(c.onClusterDiscovered) 54 | c.clusters[name] = cluster 55 | c.addShutdownTask(cluster.Shutdown) 56 | 57 | mon := orchestrator.NewMonitor(cluster, orchestrator.Config{ 58 | RecoveryPollTime: globalCfg.Qumomf.ClusterRecoveryTime, 59 | DiscoveryPollTime: globalCfg.Qumomf.ClusterDiscoveryTime, 60 | }, clusterLogger) 61 | c.addShutdownTask(mon.Shutdown) 62 | 63 | hooker := initHooker(globalCfg, clusterLogger) 64 | elector := quorum.New(quorum.Mode(*cfg.ElectionMode), quorum.Options{ 65 | ReasonableFollowerLSNLag: globalCfg.Qumomf.ReasonableFollowerLSNLag, 66 | ReasonableFollowerIdle: globalCfg.Qumomf.ReasonableFollowerIdle.Seconds(), 67 | }) 68 | failover := orchestrator.NewDefaultFailover(cluster, orchestrator.FailoverConfig{ 69 | Hooker: hooker, 70 | Elector: elector, 71 | ReplicaSetRecoveryBlockTime: globalCfg.Qumomf.ShardRecoveryBlockTime, 72 | InstanceRecoveryBlockTime: globalCfg.Qumomf.InstanceRecoveryBlockTime, 73 | }, clusterLogger) 74 | failover.SetOnClusterRecovered(c.onClusterRecovered) 75 | 76 | c.addShutdownTask(failover.Shutdown) 77 | 78 | analysisStream := mon.Serve() 79 | failover.Serve(analysisStream) 80 | 81 | return nil 82 | } 83 | 84 | func (c *Coordinator) onClusterDiscovered(clusterName string, snapshot vshard.Snapshot) { 85 | err := c.db.SaveSnapshot(context.Background(), clusterName, snapshot) 86 | if err != nil { 87 | c.logger.Err(err).Str("cluster_name", clusterName).Msg("failed to save cluster snapshot") 88 | } 89 | } 90 | 91 | func (c *Coordinator) onClusterRecovered(recovery orchestrator.Recovery) { 92 | err := c.db.SaveRecovery(context.Background(), recovery) 93 | if err != nil { 94 | c.logger.Err(err).Str("cluster_name", recovery.ClusterName).Msg("failed to save cluster recovery data") 95 | } 96 | } 97 | 98 | func (c *Coordinator) Shutdown() { 99 | for i := len(c.shutdownQueue) - 1; i >= 0; i-- { 100 | task := c.shutdownQueue[i] 101 | task() 102 | } 103 | } 104 | 105 | func (c *Coordinator) addShutdownTask(task shutdownTask) { 106 | c.shutdownQueue = append(c.shutdownQueue, task) 107 | } 108 | 109 | func initHooker(cfg *config.Config, logger zerolog.Logger) *orchestrator.Hooker { 110 | hooksCfg := cfg.Qumomf.Hooks 111 | hooker := orchestrator.NewHooker(hooksCfg.Shell, logger) 112 | hooker.SetTimeout(hooksCfg.Timeout) 113 | hooker.SetTimeoutAsync(hooksCfg.TimeoutAsync) 114 | 115 | hooker.AddHook(orchestrator.HookPreFailover, hooksCfg.PreFailover...) 116 | hooker.AddHook(orchestrator.HookPostSuccessfulFailover, hooksCfg.PostSuccessfulFailover...) 117 | hooker.AddHook(orchestrator.HookPostUnsuccessfulFailover, hooksCfg.PostUnsuccessfulFailover...) 118 | 119 | return hooker 120 | } 121 | -------------------------------------------------------------------------------- /internal/metrics/metrics.go: -------------------------------------------------------------------------------- 1 | package metrics 2 | 3 | import ( 4 | "github.com/prometheus/client_golang/prometheus" 5 | ) 6 | 7 | const ( 8 | discoveryInstanceDurations = "instance_durations" 9 | discoveryClusterDurations = "cluster_durations" 10 | shardCriticalLevel = "critical_level" 11 | shardState = "state" 12 | shardStateEvent = "shard_state_event" 13 | ) 14 | 15 | const ( 16 | labelClusterName = "cluster_name" 17 | labelHostName = "hostname" 18 | labelShardState = "shard_state" 19 | labelShardUUID = "shard_uuid" 20 | ) 21 | 22 | var ( 23 | discoveryInstanceDurationsBuckets = prometheus.ExponentialBuckets(.001, 2.5, 10) 24 | discoveryClusterDurationsBuckets = prometheus.ExponentialBuckets(.001, 2.5, 10) 25 | ) 26 | 27 | var ( 28 | discoveryInstanceDurationsSum = prometheus.NewHistogramVec(prometheus.HistogramOpts{ 29 | Subsystem: "discovery", 30 | Name: discoveryInstanceDurations, 31 | Help: "Instance discovery latencies in seconds", 32 | Buckets: discoveryInstanceDurationsBuckets, 33 | }, []string{labelClusterName, labelHostName}) 34 | 35 | discoveryClusterDurationsSum = prometheus.NewHistogramVec(prometheus.HistogramOpts{ 36 | Subsystem: "discovery", 37 | Name: discoveryClusterDurations, 38 | Help: "Cluster discovery latencies in seconds", 39 | Buckets: discoveryClusterDurationsBuckets, 40 | }, []string{labelClusterName}) 41 | 42 | shardCriticalLevelGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 43 | Subsystem: "shard", 44 | Name: shardCriticalLevel, 45 | Help: "Critical level of the replica set", 46 | }, []string{labelClusterName, labelShardUUID}) 47 | 48 | shardStateGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 49 | Subsystem: "shard", 50 | Name: shardState, 51 | Help: "The state of each shard in the cluster; it will have one line for each possible state of each shard. A value of 1 means the shard is in the state specified by the state label, a value of 0 means it is not.", 52 | }, []string{labelClusterName, labelShardUUID, labelShardState}) 53 | 54 | discoveryErrors = prometheus.NewCounter(prometheus.CounterOpts{ 55 | Subsystem: "discovery", 56 | Name: "errors", 57 | Help: "Errors that happen during discovery process", 58 | }) 59 | 60 | shardStateCounter = prometheus.NewCounterVec(prometheus.CounterOpts{ 61 | Subsystem: "orchestrator", 62 | Name: shardStateEvent, 63 | Help: "Discovered shard state event", 64 | }, []string{labelClusterName, labelShardUUID, labelShardState}) 65 | ) 66 | 67 | func init() { 68 | discoveryErrors.Add(0) 69 | prometheus.MustRegister( 70 | discoveryInstanceDurationsSum, 71 | discoveryClusterDurationsSum, 72 | shardCriticalLevelGauge, 73 | shardStateGauge, 74 | discoveryErrors, 75 | shardStateCounter, 76 | ) 77 | } 78 | 79 | type Transaction interface { 80 | Start() Transaction 81 | End() 82 | } 83 | 84 | type timeTransaction struct { 85 | labels []string 86 | summary *prometheus.HistogramVec 87 | timer *prometheus.Timer 88 | } 89 | 90 | func (txn *timeTransaction) Start() Transaction { 91 | txn.timer = prometheus.NewTimer(txn.summary.WithLabelValues(txn.labels...)) 92 | return txn 93 | } 94 | 95 | func (txn *timeTransaction) End() { 96 | txn.timer.ObserveDuration() 97 | } 98 | 99 | func StartInstanceDiscovery(clusterName, hostname string) Transaction { 100 | txn := &timeTransaction{ 101 | summary: discoveryInstanceDurationsSum, 102 | labels: []string{clusterName, hostname}, 103 | } 104 | return txn.Start() 105 | } 106 | 107 | func StartClusterDiscovery(clusterName string) Transaction { 108 | txn := &timeTransaction{ 109 | summary: discoveryClusterDurationsSum, 110 | labels: []string{clusterName}, 111 | } 112 | return txn.Start() 113 | } 114 | 115 | func SetShardCriticalLevel(clusterName, uuid string, level int) { 116 | shardCriticalLevelGauge.WithLabelValues(clusterName, uuid).Set(float64(level)) 117 | } 118 | 119 | func SetShardState(clusterName, uuid, state string, active bool) { 120 | v := float64(0) 121 | if active { 122 | v = 1 123 | } 124 | shardStateGauge.With(prometheus.Labels{ 125 | labelClusterName: clusterName, 126 | labelShardUUID: uuid, 127 | labelShardState: state, 128 | }).Set(v) 129 | } 130 | 131 | func RecordDiscoveryError() { 132 | discoveryErrors.Inc() 133 | } 134 | 135 | func RecordDiscoveredShardState(clusterName, shardUUID, state string) { 136 | shardStateCounter.With(prometheus.Labels{ 137 | labelClusterName: clusterName, 138 | labelShardUUID: shardUUID, 139 | labelShardState: state, 140 | }).Inc() 141 | } 142 | -------------------------------------------------------------------------------- /internal/qumhttp/api.go: -------------------------------------------------------------------------------- 1 | package qumhttp 2 | 3 | import ( 4 | "encoding/json" 5 | "net/http" 6 | 7 | "github.com/gorilla/mux" 8 | "github.com/rs/zerolog" 9 | 10 | "github.com/shmel1k/qumomf/internal/api" 11 | ) 12 | 13 | const ( 14 | paramClusterName = "cluster_name" 15 | paramShardUUID = "shard_uuid" 16 | paramInstanceUUID = "instance_uuid" 17 | ) 18 | 19 | const ( 20 | msgMarshallingError = "failed to marshal data" 21 | msgInvalidParams = "one or more parameters are invalid" 22 | ) 23 | 24 | type APIHandler interface { 25 | ClusterList(http.ResponseWriter, *http.Request) 26 | ClusterSnapshot(http.ResponseWriter, *http.Request) 27 | ShardSnapshot(http.ResponseWriter, *http.Request) 28 | InstanceSnapshot(http.ResponseWriter, *http.Request) 29 | ShardRecoveries(http.ResponseWriter, *http.Request) 30 | Alerts(http.ResponseWriter, *http.Request) 31 | ClusterAlerts(http.ResponseWriter, *http.Request) 32 | } 33 | 34 | type apiHandler struct { 35 | apiSrv api.Service 36 | logger zerolog.Logger 37 | } 38 | 39 | func NewHandler(logger zerolog.Logger, apiSrv api.Service) APIHandler { 40 | return &apiHandler{ 41 | logger: logger, 42 | apiSrv: apiSrv, 43 | } 44 | } 45 | 46 | func (a *apiHandler) ClusterList(w http.ResponseWriter, r *http.Request) { 47 | resp, err := a.apiSrv.ClustersList(r.Context()) 48 | if err != nil { 49 | a.writeResponse(w, newInternalErrResponse("failed to get cluster list", err)) 50 | return 51 | } 52 | 53 | data, err := json.Marshal(resp) 54 | if err != nil { 55 | a.writeResponse(w, newInternalErrResponse(msgMarshallingError, err)) 56 | return 57 | } 58 | 59 | a.writeResponse(w, newOKResponse(data)) 60 | } 61 | 62 | // nolint: dupl 63 | func (a *apiHandler) ClusterSnapshot(w http.ResponseWriter, r *http.Request) { 64 | reqParams := parseParams(mux.Vars(r)) 65 | if reqParams.clusterName == "" { 66 | a.writeResponse(w, newBadRequestResponse(msgInvalidParams)) 67 | return 68 | } 69 | 70 | snap, err := a.apiSrv.ClusterSnapshot(r.Context(), reqParams.clusterName) 71 | if err != nil { 72 | if isNotFoundTypeErr(err) { 73 | a.writeResponse(w, newBadRequestResponse(parseNotFoundTypeErr(err))) 74 | return 75 | } 76 | a.writeResponse(w, newInternalErrResponse("failed get cluster snapshot", err)) 77 | return 78 | } 79 | 80 | data, err := json.Marshal(snap) 81 | if err != nil { 82 | a.writeResponse(w, newInternalErrResponse(msgMarshallingError, err)) 83 | return 84 | } 85 | 86 | a.writeResponse(w, newOKResponse(data)) 87 | } 88 | 89 | func (a *apiHandler) ShardSnapshot(w http.ResponseWriter, r *http.Request) { 90 | reqParams := parseParams(mux.Vars(r)) 91 | if reqParams.clusterName == "" || reqParams.shardUUID == "" { 92 | a.writeResponse(w, newBadRequestResponse(msgInvalidParams)) 93 | return 94 | } 95 | 96 | shard, err := a.apiSrv.ReplicaSet(r.Context(), reqParams.clusterName, reqParams.shardUUID) 97 | if err != nil { 98 | if isNotFoundTypeErr(err) { 99 | a.writeResponse(w, newBadRequestResponse(parseNotFoundTypeErr(err))) 100 | return 101 | } 102 | a.writeResponse(w, newInternalErrResponse("failed get shard snapshot", err)) 103 | return 104 | } 105 | 106 | data, err := json.Marshal(shard) 107 | if err != nil { 108 | a.writeResponse(w, newInternalErrResponse(msgMarshallingError, err)) 109 | return 110 | } 111 | 112 | a.writeResponse(w, newOKResponse(data)) 113 | } 114 | 115 | func (a *apiHandler) InstanceSnapshot(w http.ResponseWriter, r *http.Request) { 116 | reqParams := parseParams(mux.Vars(r)) 117 | if reqParams.clusterName == "" || reqParams.shardUUID == "" || reqParams.instanceUUID == "" { 118 | a.writeResponse(w, newBadRequestResponse(msgInvalidParams)) 119 | return 120 | } 121 | 122 | inst, err := a.apiSrv.Instance(r.Context(), reqParams.clusterName, reqParams.shardUUID, reqParams.instanceUUID) 123 | if err != nil { 124 | if isNotFoundTypeErr(err) { 125 | a.writeResponse(w, newBadRequestResponse(parseNotFoundTypeErr(err))) 126 | return 127 | } 128 | a.writeResponse(w, newInternalErrResponse("failed get instance snapshot", err)) 129 | 130 | return 131 | } 132 | 133 | data, err := json.Marshal(inst) 134 | if err != nil { 135 | a.writeResponse(w, newInternalErrResponse(msgMarshallingError, err)) 136 | return 137 | } 138 | 139 | a.writeResponse(w, newOKResponse(data)) 140 | } 141 | 142 | func (a *apiHandler) ShardRecoveries(w http.ResponseWriter, r *http.Request) { 143 | reqParams := parseParams(mux.Vars(r)) 144 | if reqParams.clusterName == "" || reqParams.shardUUID == "" { 145 | a.writeResponse(w, newBadRequestResponse(msgInvalidParams)) 146 | return 147 | } 148 | 149 | recoveries, err := a.apiSrv.Recoveries(r.Context(), reqParams.clusterName, reqParams.shardUUID) 150 | if err != nil { 151 | a.writeResponse(w, newInternalErrResponse("failed get shard recovery", err)) 152 | return 153 | } 154 | 155 | data, err := json.Marshal(recoveries) 156 | if err != nil { 157 | a.writeResponse(w, newInternalErrResponse(msgMarshallingError, err)) 158 | return 159 | } 160 | 161 | a.writeResponse(w, newOKResponse(data)) 162 | } 163 | 164 | func (a *apiHandler) Alerts(w http.ResponseWriter, r *http.Request) { 165 | alerts, err := a.apiSrv.Alerts(r.Context()) 166 | if err != nil { 167 | a.writeResponse(w, newInternalErrResponse("failed get alerts list", err)) 168 | return 169 | } 170 | 171 | data, err := json.Marshal(alerts) 172 | if err != nil { 173 | a.writeResponse(w, newInternalErrResponse(msgMarshallingError, err)) 174 | return 175 | } 176 | 177 | a.writeResponse(w, newOKResponse(data)) 178 | } 179 | 180 | // nolint: dupl 181 | func (a *apiHandler) ClusterAlerts(w http.ResponseWriter, r *http.Request) { 182 | reqParams := parseParams(mux.Vars(r)) 183 | if reqParams.clusterName == "" { 184 | a.writeResponse(w, newBadRequestResponse(msgInvalidParams)) 185 | return 186 | } 187 | 188 | alerts, err := a.apiSrv.ClusterAlerts(r.Context(), reqParams.clusterName) 189 | if err != nil { 190 | if isNotFoundTypeErr(err) { 191 | a.writeResponse(w, newBadRequestResponse(parseNotFoundTypeErr(err))) 192 | return 193 | } 194 | a.writeResponse(w, newInternalErrResponse("failed get cluster alerts", err)) 195 | return 196 | } 197 | 198 | data, err := json.Marshal(alerts) 199 | if err != nil { 200 | a.writeResponse(w, newInternalErrResponse(msgMarshallingError, err)) 201 | return 202 | } 203 | 204 | a.writeResponse(w, newOKResponse(data)) 205 | } 206 | 207 | func isNotFoundTypeErr(err error) bool { 208 | return err == api.ErrClusterNotFound || err == api.ErrReplicaSetNotFound || err == api.ErrInstanceNotFound 209 | } 210 | 211 | func parseNotFoundTypeErr(err error) string { 212 | switch err { 213 | case api.ErrClusterNotFound: 214 | return "cluster snapshot not found" 215 | case api.ErrReplicaSetNotFound: 216 | return "shard snapshot not found" 217 | case api.ErrInstanceNotFound: 218 | return "instance snapshot not found" 219 | } 220 | 221 | return "cluster not found" 222 | } 223 | 224 | func (a *apiHandler) writeResponse(w http.ResponseWriter, resp response) { 225 | if resp.err != nil { 226 | a.logger.Err(resp.err).Msg(string(resp.data)) 227 | } 228 | 229 | w.Header().Add("Content-Type", "application/json; charset=utf-8") 230 | w.WriteHeader(resp.statusCode) 231 | 232 | _, err := w.Write(resp.data) 233 | if err != nil { 234 | a.logger.Err(err).Msg("failed to write response") 235 | } 236 | } 237 | -------------------------------------------------------------------------------- /internal/qumhttp/data.go: -------------------------------------------------------------------------------- 1 | package qumhttp 2 | 3 | import ( 4 | "net/http" 5 | 6 | "github.com/shmel1k/qumomf/internal/vshard" 7 | ) 8 | 9 | type response struct { 10 | statusCode int 11 | data []byte 12 | err error 13 | } 14 | 15 | func newOKResponse(data []byte) response { 16 | return response{ 17 | statusCode: http.StatusOK, 18 | data: data, 19 | } 20 | } 21 | 22 | func newBadRequestResponse(msg string) response { 23 | return response{ 24 | statusCode: http.StatusBadRequest, 25 | data: []byte(msg), 26 | } 27 | } 28 | 29 | func newInternalErrResponse(msg string, err error) response { 30 | return response{ 31 | statusCode: http.StatusInternalServerError, 32 | data: []byte(msg), 33 | err: err, 34 | } 35 | } 36 | 37 | type params struct { 38 | clusterName string 39 | shardUUID vshard.ReplicaSetUUID 40 | instanceUUID vshard.InstanceUUID 41 | } 42 | 43 | func parseParams(vars map[string]string) params { 44 | return params{ 45 | clusterName: vars[paramClusterName], 46 | shardUUID: vshard.ReplicaSetUUID(vars[paramShardUUID]), 47 | instanceUUID: vshard.InstanceUUID(vars[paramInstanceUUID]), 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /internal/qumhttp/http.go: -------------------------------------------------------------------------------- 1 | package qumhttp 2 | 3 | import ( 4 | "encoding/json" 5 | "net/http" 6 | ) 7 | 8 | func HealthHandler() http.Handler { 9 | return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 10 | w.Header().Set("Content-Type", "application/json; charset=utf-8") 11 | w.WriteHeader(http.StatusOK) 12 | }) 13 | } 14 | 15 | func AboutHandler(version, commit, buildDate string) http.Handler { 16 | about := struct { 17 | Version string `json:"version"` 18 | Commit string `json:"commit"` 19 | Build string `json:"build"` 20 | }{ 21 | Version: version, 22 | Commit: commit, 23 | Build: buildDate, 24 | } 25 | 26 | aboutStr, _ := json.Marshal(about) 27 | 28 | return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 29 | w.Header().Set("Content-Type", "application/json; charset=utf-8") 30 | w.WriteHeader(http.StatusOK) 31 | _, _ = w.Write(aboutStr) 32 | }) 33 | } 34 | -------------------------------------------------------------------------------- /internal/qumhttp/routing.go: -------------------------------------------------------------------------------- 1 | package qumhttp 2 | 3 | import ( 4 | "net/http" 5 | 6 | "github.com/gorilla/mux" 7 | "github.com/prometheus/client_golang/prometheus/promhttp" 8 | ) 9 | 10 | func RegisterDebugHandlers(r *mux.Router, version, commit, buildDate string) { 11 | r.Handle("/debug/metrics", promhttp.Handler()).Methods(http.MethodGet) 12 | r.Handle("/debug/health", HealthHandler()).Methods(http.MethodGet) 13 | r.Handle("/debug/about", AboutHandler(version, commit, buildDate)).Methods(http.MethodGet) 14 | } 15 | 16 | func RegisterAPIHandlers(r *mux.Router, h APIHandler) { 17 | r.HandleFunc("/api/v0/snapshots", h.ClusterList).Methods(http.MethodGet) 18 | r.HandleFunc("/api/v0/snapshots/{cluster_name}", h.ClusterSnapshot).Methods(http.MethodGet) 19 | r.HandleFunc("/api/v0/snapshots/{cluster_name}/{shard_uuid}", h.ShardSnapshot).Methods(http.MethodGet) 20 | r.HandleFunc("/api/v0/snapshots/{cluster_name}/{shard_uuid}/{instance_uuid}", h.InstanceSnapshot).Methods(http.MethodGet) 21 | 22 | r.HandleFunc("/api/v0/recoveries/{cluster_name}/{shard_uuid}", h.ShardRecoveries).Methods(http.MethodGet) 23 | 24 | r.HandleFunc("/api/v0/alerts", h.Alerts).Methods(http.MethodGet) 25 | r.HandleFunc("/api/v0/alerts/{cluster_name}", h.ClusterAlerts).Methods(http.MethodGet) 26 | } 27 | -------------------------------------------------------------------------------- /internal/quorum/elector.go: -------------------------------------------------------------------------------- 1 | package quorum 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | 7 | "github.com/shmel1k/qumomf/internal/vshard" 8 | ) 9 | 10 | type Mode string 11 | 12 | const ( 13 | ModeIdle Mode = "idle" 14 | ModeSmart Mode = "smart" 15 | ) 16 | 17 | var ( 18 | ErrNoAliveFollowers = errors.New("quorum: replica set does not have any alive followers or all of them were excluded from the election") 19 | ErrNoCandidateFound = errors.New("quorum: no available candidate found") 20 | ) 21 | 22 | type Options struct { 23 | ReasonableFollowerLSNLag int64 24 | ReasonableFollowerIdle float64 25 | } 26 | 27 | type Elector interface { 28 | // ChooseMaster selects new master and returns back its uuid. 29 | ChooseMaster(set vshard.ReplicaSet) (vshard.InstanceUUID, error) 30 | // Mode returns the elector type. 31 | Mode() Mode 32 | } 33 | 34 | func New(m Mode, opts Options) Elector { 35 | switch m { 36 | case ModeIdle: 37 | return NewIdleElector(opts) 38 | case ModeSmart: 39 | return NewSmartElector(opts) 40 | } 41 | 42 | panic(fmt.Sprintf("Elector: got unknown mode %s", m)) 43 | } 44 | 45 | // filter filters out the instances which must not be promoted to the master. 46 | func filter(instances []vshard.Instance, opts Options) []vshard.Instance { 47 | filtered := make([]vshard.Instance, 0, len(instances)) 48 | 49 | for i := range instances { 50 | inst := &instances[i] 51 | 52 | // Exclude all followers with negative priority. 53 | if inst.Priority < 0 { 54 | continue 55 | } 56 | 57 | if opts.ReasonableFollowerLSNLag != 0 { 58 | // Exclude followers too far from the master. 59 | if inst.LSNBehindMaster > opts.ReasonableFollowerLSNLag { 60 | continue 61 | } 62 | } 63 | 64 | if opts.ReasonableFollowerIdle != 0 { 65 | // Exclude followers too far from the master. 66 | if inst.Idle() > opts.ReasonableFollowerIdle { 67 | continue 68 | } 69 | } 70 | 71 | filtered = append(filtered, *inst) 72 | } 73 | 74 | return filtered 75 | } 76 | -------------------------------------------------------------------------------- /internal/quorum/elector_test.go: -------------------------------------------------------------------------------- 1 | package quorum 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | 8 | "github.com/shmel1k/qumomf/internal/vshard" 9 | ) 10 | 11 | func Test_filter(t *testing.T) { 12 | tests := []struct { 13 | name string 14 | opts Options 15 | instances []vshard.Instance 16 | want []vshard.InstanceUUID 17 | }{ 18 | { 19 | name: "ExcludeByPriority", 20 | opts: Options{}, 21 | instances: []vshard.Instance{ 22 | { 23 | UUID: "1", 24 | Priority: -1, 25 | }, 26 | { 27 | UUID: "2", 28 | Priority: 0, 29 | }, 30 | { 31 | UUID: "3", 32 | Priority: 1, 33 | }, 34 | }, 35 | want: []vshard.InstanceUUID{ 36 | "2", "3", 37 | }, 38 | }, 39 | { 40 | name: "ExcludeByLSN", 41 | opts: Options{ 42 | ReasonableFollowerLSNLag: 100, 43 | }, 44 | instances: []vshard.Instance{ 45 | { 46 | UUID: "1", 47 | LSNBehindMaster: 1000, 48 | }, 49 | { 50 | UUID: "2", 51 | LSNBehindMaster: 100, 52 | }, 53 | { 54 | UUID: "3", 55 | LSNBehindMaster: 0, 56 | }, 57 | }, 58 | want: []vshard.InstanceUUID{ 59 | "2", "3", 60 | }, 61 | }, 62 | { 63 | name: "ExcludeByIdle", 64 | opts: Options{ 65 | ReasonableFollowerIdle: 5.5, 66 | }, 67 | instances: []vshard.Instance{ 68 | { 69 | UUID: "1", 70 | Upstream: &vshard.Upstream{ 71 | Status: vshard.UpstreamFollow, 72 | Idle: 7.2, 73 | }, 74 | }, 75 | { 76 | UUID: "2", 77 | Upstream: &vshard.Upstream{ 78 | Status: vshard.UpstreamFollow, 79 | Idle: 5.1, 80 | }, 81 | }, 82 | { 83 | UUID: "3", 84 | Upstream: &vshard.Upstream{ 85 | Status: vshard.UpstreamFollow, 86 | Idle: 0.86981821060181, 87 | }, 88 | }, 89 | }, 90 | want: []vshard.InstanceUUID{ 91 | "2", "3", 92 | }, 93 | }, 94 | { 95 | name: "ExcludeAll", 96 | opts: Options{ 97 | ReasonableFollowerLSNLag: 100, 98 | ReasonableFollowerIdle: 5.5, 99 | }, 100 | instances: []vshard.Instance{ 101 | { 102 | UUID: "1", 103 | Priority: 0, 104 | LSNBehindMaster: 10, 105 | Upstream: &vshard.Upstream{ 106 | Status: vshard.UpstreamFollow, 107 | Idle: 7.2, 108 | }, 109 | }, 110 | { 111 | UUID: "2", 112 | Priority: -1, 113 | LSNBehindMaster: 0, 114 | Upstream: &vshard.Upstream{ 115 | Status: vshard.UpstreamFollow, 116 | Idle: 0.2, 117 | }, 118 | }, 119 | { 120 | UUID: "3", 121 | Priority: 100, 122 | LSNBehindMaster: 1000, 123 | Upstream: &vshard.Upstream{ 124 | Status: vshard.UpstreamFollow, 125 | Idle: 0.1, 126 | }, 127 | }, 128 | }, 129 | want: []vshard.InstanceUUID{}, 130 | }, 131 | } 132 | 133 | for _, tt := range tests { 134 | tt := tt 135 | t.Run(tt.name, func(t *testing.T) { 136 | got := filter(tt.instances, tt.opts) 137 | uuids := make([]vshard.InstanceUUID, len(got)) 138 | for i, inst := range got { 139 | uuids[i] = inst.UUID 140 | } 141 | assert.Equal(t, tt.want, uuids) 142 | }) 143 | } 144 | } 145 | -------------------------------------------------------------------------------- /internal/quorum/idle.go: -------------------------------------------------------------------------------- 1 | package quorum 2 | 3 | import ( 4 | "math" 5 | 6 | "github.com/shmel1k/qumomf/internal/vshard" 7 | ) 8 | 9 | const ( 10 | maxIdle = math.MaxFloat64 11 | ) 12 | 13 | type idleElector struct { 14 | opts Options 15 | } 16 | 17 | // NewIdleElector returns a new elector based on the follower's idle value. 18 | // 19 | // This elector chooses the candidate to be a master selecting 20 | // the follower with a minimum idle value. 21 | func NewIdleElector(opts Options) Elector { 22 | return &idleElector{ 23 | opts: opts, 24 | } 25 | } 26 | 27 | func (e *idleElector) ChooseMaster(set vshard.ReplicaSet) (vshard.InstanceUUID, error) { 28 | followers := filter(set.AliveFollowers(), e.opts) 29 | if len(followers) == 0 { 30 | return "", ErrNoAliveFollowers 31 | } 32 | 33 | minIdle := maxIdle 34 | minUUID := vshard.InstanceUUID("") 35 | for i := range followers { 36 | r := &followers[i] 37 | 38 | if r.Idle() < minIdle { 39 | minIdle = r.Idle() 40 | minUUID = r.UUID 41 | } 42 | } 43 | 44 | if minUUID == "" { 45 | return "", ErrNoCandidateFound 46 | } 47 | 48 | return minUUID, nil 49 | } 50 | 51 | func (*idleElector) Mode() Mode { 52 | return ModeIdle 53 | } 54 | -------------------------------------------------------------------------------- /internal/quorum/idle_test.go: -------------------------------------------------------------------------------- 1 | package quorum 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | 8 | "github.com/shmel1k/qumomf/internal/vshard" 9 | ) 10 | 11 | func TestIdleElector(t *testing.T) { 12 | var testData = []struct { 13 | name string 14 | set vshard.ReplicaSet 15 | expectedUUID vshard.InstanceUUID 16 | expectedErr error 17 | }{ 18 | { 19 | name: "ShouldSelectExpectedReplica", 20 | set: vshard.ReplicaSet{ 21 | Instances: []vshard.Instance{ 22 | { 23 | UUID: "1", 24 | LastCheckValid: false, 25 | StorageInfo: vshard.StorageInfo{ 26 | Replication: vshard.Replication{ 27 | Status: vshard.StatusMaster, 28 | }, 29 | }, 30 | }, 31 | { 32 | UUID: "2", 33 | LastCheckValid: true, 34 | Upstream: &vshard.Upstream{ 35 | Status: vshard.UpstreamFollow, 36 | Idle: 0.05, 37 | }, 38 | Downstream: &vshard.Downstream{ 39 | Status: vshard.DownstreamFollow, 40 | }, 41 | StorageInfo: vshard.StorageInfo{ 42 | Replication: vshard.Replication{ 43 | Status: vshard.StatusFollow, 44 | }, 45 | }, 46 | }, 47 | { 48 | UUID: "3", 49 | LastCheckValid: true, 50 | Upstream: &vshard.Upstream{ 51 | Status: vshard.UpstreamFollow, 52 | Idle: 0.1, 53 | }, 54 | Downstream: &vshard.Downstream{ 55 | Status: vshard.DownstreamFollow, 56 | }, 57 | StorageInfo: vshard.StorageInfo{ 58 | Replication: vshard.Replication{ 59 | Status: vshard.StatusFollow, 60 | }, 61 | }, 62 | }, 63 | }, 64 | }, 65 | expectedUUID: "2", 66 | }, 67 | { 68 | name: "NoAliveFollowers_ShouldReturnErr", 69 | set: vshard.ReplicaSet{ 70 | Instances: []vshard.Instance{ 71 | { 72 | UUID: "1", 73 | LastCheckValid: false, 74 | StorageInfo: vshard.StorageInfo{ 75 | Replication: vshard.Replication{ 76 | Status: vshard.StatusMaster, 77 | }, 78 | }, 79 | }, 80 | { 81 | UUID: "2", 82 | LastCheckValid: true, 83 | Upstream: &vshard.Upstream{ 84 | Status: vshard.UpstreamDisconnected, 85 | }, 86 | }, 87 | { // too far from the master 88 | UUID: "3", 89 | LastCheckValid: true, 90 | LSNBehindMaster: 1000, 91 | Upstream: &vshard.Upstream{ 92 | Status: vshard.UpstreamFollow, 93 | Idle: 0.1, 94 | }, 95 | Downstream: &vshard.Downstream{ 96 | Status: vshard.DownstreamFollow, 97 | }, 98 | StorageInfo: vshard.StorageInfo{ 99 | Replication: vshard.Replication{ 100 | Status: vshard.StatusFollow, 101 | }, 102 | }, 103 | }, 104 | { // too far from the master 105 | UUID: "4", 106 | LastCheckValid: true, 107 | LSNBehindMaster: 1, 108 | Upstream: &vshard.Upstream{ 109 | Status: vshard.UpstreamFollow, 110 | Idle: 10, 111 | }, 112 | Downstream: &vshard.Downstream{ 113 | Status: vshard.DownstreamFollow, 114 | }, 115 | StorageInfo: vshard.StorageInfo{ 116 | Replication: vshard.Replication{ 117 | Status: vshard.StatusFollow, 118 | }, 119 | }, 120 | }, 121 | }, 122 | }, 123 | expectedErr: ErrNoAliveFollowers, 124 | }, 125 | { 126 | name: "EmptySet_ShouldReturnErr", 127 | set: vshard.ReplicaSet{ 128 | Instances: nil, 129 | }, 130 | expectedErr: ErrNoAliveFollowers, 131 | }, 132 | } 133 | 134 | e := NewIdleElector(Options{ 135 | ReasonableFollowerLSNLag: 100, 136 | ReasonableFollowerIdle: 5, 137 | }) 138 | 139 | for _, v := range testData { 140 | vt := v 141 | t.Run(v.name, func(t *testing.T) { 142 | uuid, err := e.ChooseMaster(vt.set) 143 | assert.Equal(t, vt.expectedErr, err) 144 | assert.Equal(t, vt.expectedUUID, uuid) 145 | }) 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /internal/quorum/smart.go: -------------------------------------------------------------------------------- 1 | package quorum 2 | 3 | import ( 4 | "sort" 5 | 6 | "github.com/shmel1k/qumomf/internal/vshard" 7 | ) 8 | 9 | // idleDiffDelta represents the max diff between 10 | // idle values of the followers after which 11 | // they are not treated as almost identical. 12 | const idleDiffDelta = 0.5 // in seconds 13 | 14 | type smartElector struct { 15 | opts Options 16 | } 17 | 18 | // NewSmartElector returns a new elector based on rules: 19 | // - compare vshard configuration consistency, 20 | // - compare upstream status, 21 | // - compare LSN behind the master, 22 | // - compare when replica got last heartbeat signal or data from master, 23 | // - user promotion rules based on instance priorities. 24 | func NewSmartElector(opts Options) Elector { 25 | return &smartElector{ 26 | opts: opts, 27 | } 28 | } 29 | 30 | func (e *smartElector) ChooseMaster(set vshard.ReplicaSet) (vshard.InstanceUUID, error) { 31 | followers := filter(set.AliveFollowers(), e.opts) 32 | if len(followers) == 0 { 33 | return "", ErrNoAliveFollowers 34 | } 35 | 36 | master, err := set.Master() 37 | if err != nil { 38 | return "", err 39 | } 40 | sorter := newInstanceSorter(master, followers) 41 | sort.Sort(sorter) 42 | 43 | return followers[0].UUID, nil 44 | } 45 | 46 | func (e *smartElector) Mode() Mode { 47 | return ModeSmart 48 | } 49 | 50 | // instanceSorter sorts instances by their priority to be a new master. 51 | type instanceSorter struct { 52 | master vshard.Instance 53 | instances []vshard.Instance 54 | } 55 | 56 | func newInstanceSorter(master vshard.Instance, instances []vshard.Instance) *instanceSorter { 57 | return &instanceSorter{ 58 | master: master, 59 | instances: instances, 60 | } 61 | } 62 | 63 | func (s *instanceSorter) Len() int { 64 | return len(s.instances) 65 | } 66 | 67 | func (s *instanceSorter) Swap(i, j int) { 68 | s.instances[i], s.instances[j] = s.instances[j], s.instances[i] 69 | } 70 | 71 | //nolint:gocyclo 72 | func (s *instanceSorter) Less(i, j int) bool { 73 | left, right := s.instances[i], s.instances[j] 74 | 75 | // Prefer replicas with the same vshard configuration as master. 76 | confHash := s.master.VShardFingerprint 77 | if left.VShardFingerprint == confHash && right.VShardFingerprint != confHash { 78 | return true 79 | } 80 | if left.VShardFingerprint != confHash && right.VShardFingerprint == confHash { 81 | return false 82 | } 83 | 84 | // Prefer replicas which have follow upstream status. 85 | if left.Upstream.Status == vshard.UpstreamFollow && right.Upstream.Status != vshard.UpstreamFollow { 86 | return true 87 | } 88 | if left.Upstream.Status != vshard.UpstreamFollow && right.Upstream.Status == vshard.UpstreamFollow { 89 | return false 90 | } 91 | 92 | // Prefer most up to date replica. 93 | if left.LSNBehindMaster != right.LSNBehindMaster { 94 | // Special case: when replication is broken and replica has been recovered from an old snapshot with 95 | // LSN in front of master LSN. 96 | if left.LSNBehindMaster > 0 && right.LSNBehindMaster < 0 { 97 | return true 98 | } 99 | if left.LSNBehindMaster < 0 && right.LSNBehindMaster > 0 { 100 | return false 101 | } 102 | 103 | return left.LSNBehindMaster < right.LSNBehindMaster 104 | } 105 | 106 | d1 := left.Idle() 107 | d2 := right.Idle() 108 | 109 | if left.Priority != right.Priority && inDelta(d1, d2, idleDiffDelta) { 110 | // If followers are almost equal, use user promotion rules. 111 | return left.Priority > right.Priority 112 | } 113 | 114 | return d1 < d2 115 | } 116 | 117 | func inDelta(d1, d2, delta float64) bool { 118 | diff := d1 - d2 119 | return diff >= -delta && diff <= delta 120 | } 121 | -------------------------------------------------------------------------------- /internal/quorum/smart_test.go: -------------------------------------------------------------------------------- 1 | package quorum 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | 8 | "github.com/shmel1k/qumomf/internal/vshard" 9 | ) 10 | 11 | func Test_smartElector_ChooseMaster(t *testing.T) { 12 | var testData = []struct { 13 | name string 14 | set vshard.ReplicaSet 15 | expectedUUID vshard.InstanceUUID 16 | expectedErr error 17 | }{ 18 | { 19 | name: "ShouldSelectExpectedReplica", 20 | set: vshard.ReplicaSet{ 21 | MasterUUID: "1", 22 | Instances: []vshard.Instance{ 23 | { 24 | UUID: "1", 25 | LastCheckValid: false, 26 | VShardFingerprint: 100, 27 | StorageInfo: vshard.StorageInfo{ 28 | Replication: vshard.Replication{ 29 | Status: vshard.StatusMaster, 30 | }, 31 | }, 32 | }, 33 | { // the best candidate 34 | UUID: "2", 35 | LastCheckValid: true, 36 | LSNBehindMaster: 0, 37 | VShardFingerprint: 100, 38 | Upstream: &vshard.Upstream{ 39 | Status: vshard.UpstreamFollow, 40 | Idle: 0.05, 41 | }, 42 | Downstream: &vshard.Downstream{ 43 | Status: vshard.DownstreamFollow, 44 | }, 45 | StorageInfo: vshard.StorageInfo{ 46 | Replication: vshard.Replication{ 47 | Status: vshard.StatusFollow, 48 | }, 49 | }, 50 | Priority: 100, 51 | }, 52 | { // good candidate but has lower priority 53 | UUID: "3", 54 | LastCheckValid: true, 55 | LSNBehindMaster: 0, 56 | VShardFingerprint: 100, 57 | Upstream: &vshard.Upstream{ 58 | Status: vshard.UpstreamFollow, 59 | Idle: 0.05, 60 | }, 61 | Downstream: &vshard.Downstream{ 62 | Status: vshard.DownstreamFollow, 63 | }, 64 | StorageInfo: vshard.StorageInfo{ 65 | Replication: vshard.Replication{ 66 | Status: vshard.StatusFollow, 67 | }, 68 | }, 69 | Priority: 10, 70 | }, 71 | { // too far from master 72 | UUID: "4", 73 | LastCheckValid: true, 74 | LSNBehindMaster: 10, 75 | VShardFingerprint: 100, 76 | Upstream: &vshard.Upstream{ 77 | Status: vshard.UpstreamFollow, 78 | Idle: 0.1, 79 | }, 80 | Downstream: &vshard.Downstream{ 81 | Status: vshard.DownstreamFollow, 82 | }, 83 | StorageInfo: vshard.StorageInfo{ 84 | Replication: vshard.Replication{ 85 | Status: vshard.StatusFollow, 86 | }, 87 | }, 88 | }, 89 | { // inconsistent vshard configuration 90 | UUID: "5", 91 | LastCheckValid: true, 92 | LSNBehindMaster: 0, 93 | VShardFingerprint: 10, 94 | Upstream: &vshard.Upstream{ 95 | Status: vshard.UpstreamFollow, 96 | Idle: 0.0001, 97 | }, 98 | Downstream: &vshard.Downstream{ 99 | Status: vshard.DownstreamFollow, 100 | }, 101 | StorageInfo: vshard.StorageInfo{ 102 | Replication: vshard.Replication{ 103 | Status: vshard.StatusFollow, 104 | }, 105 | }, 106 | }, 107 | }, 108 | }, 109 | expectedUUID: "2", 110 | }, 111 | { 112 | name: "NoAliveFollowers_ShouldReturnErr", 113 | set: vshard.ReplicaSet{ 114 | MasterUUID: "1", 115 | Instances: []vshard.Instance{ 116 | { 117 | UUID: "1", 118 | LastCheckValid: false, 119 | StorageInfo: vshard.StorageInfo{ 120 | Replication: vshard.Replication{ 121 | Status: vshard.StatusMaster, 122 | }, 123 | }, 124 | }, 125 | { 126 | UUID: "2", 127 | LastCheckValid: true, 128 | Upstream: &vshard.Upstream{ 129 | Status: vshard.UpstreamDisconnected, 130 | }, 131 | }, 132 | { // too far from the master 133 | UUID: "3", 134 | LastCheckValid: true, 135 | LSNBehindMaster: 1000, 136 | Upstream: &vshard.Upstream{ 137 | Status: vshard.UpstreamFollow, 138 | Idle: 0.1, 139 | }, 140 | Downstream: &vshard.Downstream{ 141 | Status: vshard.DownstreamFollow, 142 | }, 143 | StorageInfo: vshard.StorageInfo{ 144 | Replication: vshard.Replication{ 145 | Status: vshard.StatusFollow, 146 | }, 147 | }, 148 | }, 149 | { // too far from the master 150 | UUID: "4", 151 | LastCheckValid: true, 152 | LSNBehindMaster: 1, 153 | Upstream: &vshard.Upstream{ 154 | Status: vshard.UpstreamFollow, 155 | Idle: 10, 156 | }, 157 | Downstream: &vshard.Downstream{ 158 | Status: vshard.DownstreamFollow, 159 | }, 160 | StorageInfo: vshard.StorageInfo{ 161 | Replication: vshard.Replication{ 162 | Status: vshard.StatusFollow, 163 | }, 164 | }, 165 | }, 166 | }, 167 | }, 168 | expectedErr: ErrNoAliveFollowers, 169 | }, 170 | { 171 | name: "EmptySet_ShouldReturnErr", 172 | set: vshard.ReplicaSet{ 173 | Instances: nil, 174 | }, 175 | expectedErr: ErrNoAliveFollowers, 176 | }, 177 | } 178 | 179 | e := NewSmartElector(Options{ 180 | ReasonableFollowerLSNLag: 100, 181 | ReasonableFollowerIdle: 5, 182 | }) 183 | 184 | for _, v := range testData { 185 | vt := v 186 | t.Run(v.name, func(t *testing.T) { 187 | uuid, err := e.ChooseMaster(vt.set) 188 | assert.Equal(t, vt.expectedErr, err) 189 | assert.Equal(t, vt.expectedUUID, uuid) 190 | }) 191 | } 192 | } 193 | 194 | func Test_inDelta(t *testing.T) { 195 | tests := []struct { 196 | name string 197 | d1 float64 198 | d2 float64 199 | delta float64 200 | want bool 201 | }{ 202 | { 203 | name: "InDelta", 204 | d1: 0.23, 205 | d2: 0.532, 206 | delta: 1, 207 | want: true, 208 | }, 209 | { 210 | name: "NotInDelta", 211 | d1: 0.23, 212 | d2: 0.532, 213 | delta: 0.1, 214 | want: false, 215 | }, 216 | } 217 | for _, tt := range tests { 218 | tt := tt 219 | t.Run(tt.name, func(t *testing.T) { 220 | assert.Equal(t, tt.want, inDelta(tt.d1, tt.d2, tt.delta)) 221 | }) 222 | } 223 | } 224 | -------------------------------------------------------------------------------- /internal/storage/data.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | import "github.com/shmel1k/qumomf/internal/vshard" 4 | 5 | type ClusterSnapshotResp struct { 6 | Name string 7 | Snapshot vshard.Snapshot 8 | } 9 | -------------------------------------------------------------------------------- /internal/storage/sqlite/sqlite.go: -------------------------------------------------------------------------------- 1 | package sqlite 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "encoding/json" 7 | "errors" 8 | "time" 9 | 10 | "github.com/shmel1k/qumomf/internal/storage" 11 | "github.com/shmel1k/qumomf/internal/vshard" 12 | "github.com/shmel1k/qumomf/internal/vshard/orchestrator" 13 | 14 | // sqlite3 driver 15 | _ "github.com/mattn/go-sqlite3" 16 | ) 17 | 18 | const ( 19 | querySaveSnapshot = `INSERT INTO snapshots(cluster_name, created_at, data) 20 | VALUES(?, ?, ?) 21 | ON CONFLICT(cluster_name) DO UPDATE SET 22 | created_at = excluded.created_at, 23 | data = excluded.data` 24 | querySaveRecoveries = `INSERT INTO recoveries(cluster_name, created_at, data) 25 | VALUES(?, ?, ?)` 26 | initDatabaseQueries = `CREATE TABLE IF NOT EXISTS snapshots ( 27 | "id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, 28 | "cluster_name" TEXT UNIQUE, 29 | "created_at" INTEGER, 30 | "data" BLOB 31 | ); 32 | CREATE TABLE IF NOT EXISTS recoveries ( 33 | "id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, 34 | "cluster_name" TEXT, 35 | "created_at" INTEGER, 36 | "data" BLOB 37 | )` 38 | queryGetLastSnapshot = `SELECT data 39 | FROM snapshots 40 | WHERE cluster_name = ? 41 | ORDER BY id DESC limit 1` 42 | queryGetRecoveries = `SELECT data 43 | FROM recoveries 44 | WHERE cluster_name = ?` 45 | queryGetClusters = `SELECT cluster_name, data 46 | FROM snapshots` 47 | ) 48 | 49 | var ( 50 | ErrEmptyResult = errors.New("empty result") 51 | ) 52 | 53 | type sqlite struct { 54 | db *sql.DB 55 | config Config 56 | } 57 | 58 | type Config struct { 59 | FileName string 60 | ConnectTimeout time.Duration 61 | QueryTimeout time.Duration 62 | } 63 | 64 | func New(cfg Config) (storage.Storage, error) { 65 | ctx, cancel := context.WithTimeout(context.Background(), cfg.QueryTimeout) 66 | defer cancel() 67 | 68 | db, err := sql.Open("sqlite3", cfg.FileName) 69 | if err != nil { 70 | return &sqlite{}, err 71 | } 72 | 73 | db.SetMaxOpenConns(1) 74 | 75 | err = createTables(ctx, db) 76 | if err != nil { 77 | return nil, err 78 | } 79 | 80 | return &sqlite{ 81 | db: db, 82 | config: cfg, 83 | }, nil 84 | } 85 | 86 | func (s *sqlite) GetClusters(ctx context.Context) ([]storage.ClusterSnapshotResp, error) { 87 | ctx, cancel := context.WithTimeout(ctx, s.config.QueryTimeout) 88 | defer cancel() 89 | 90 | rows, err := s.db.QueryContext(ctx, queryGetClusters) 91 | if err != nil { 92 | return nil, err 93 | } 94 | 95 | resp := make([]storage.ClusterSnapshotResp, 0) 96 | data := make([]byte, 0) 97 | for rows.Next() { 98 | snapResp := storage.ClusterSnapshotResp{} 99 | err = rows.Scan(&snapResp.Name, &data) 100 | if err != nil { 101 | return nil, err 102 | } 103 | 104 | err = json.Unmarshal(data, &snapResp.Snapshot) 105 | if err != nil { 106 | return nil, err 107 | } 108 | 109 | resp = append(resp, snapResp) 110 | } 111 | 112 | return resp, nil 113 | } 114 | 115 | func (s *sqlite) SaveSnapshot(ctx context.Context, clusterName string, snapshot vshard.Snapshot) error { 116 | ctx, cancel := context.WithTimeout(ctx, s.config.QueryTimeout) 117 | defer cancel() 118 | 119 | data, err := json.Marshal(snapshot) 120 | if err != nil { 121 | return err 122 | } 123 | 124 | _, err = s.db.ExecContext(ctx, querySaveSnapshot, clusterName, snapshot.Created, data) 125 | 126 | return err 127 | } 128 | 129 | func (s *sqlite) SaveRecovery(ctx context.Context, recovery orchestrator.Recovery) error { 130 | ctx, cancel := context.WithTimeout(ctx, s.config.QueryTimeout) 131 | defer cancel() 132 | 133 | data, err := json.Marshal(recovery) 134 | if err != nil { 135 | return err 136 | } 137 | 138 | _, err = s.db.ExecContext(ctx, querySaveRecoveries, recovery.ClusterName, recovery.EndTimestamp, data) 139 | 140 | return err 141 | } 142 | 143 | func (s *sqlite) GetClusterSnapshot(ctx context.Context, clusterName string) (vshard.Snapshot, error) { 144 | ctx, cancel := context.WithTimeout(ctx, s.config.QueryTimeout) 145 | defer cancel() 146 | 147 | data := make([]byte, 0) 148 | row := s.db.QueryRowContext(ctx, queryGetLastSnapshot, clusterName) 149 | 150 | var ns vshard.Snapshot 151 | err := row.Scan(&data) 152 | if err == sql.ErrNoRows { 153 | return ns, ErrEmptyResult 154 | } 155 | err = json.Unmarshal(data, &ns) 156 | 157 | return ns, err 158 | } 159 | 160 | func (s *sqlite) GetRecoveries(ctx context.Context, clusterName string) ([]orchestrator.Recovery, error) { 161 | ctx, cancel := context.WithTimeout(ctx, s.config.QueryTimeout) 162 | defer cancel() 163 | 164 | data := make([]byte, 0) 165 | resp := make([]orchestrator.Recovery, 0) 166 | rows, err := s.db.QueryContext(ctx, queryGetRecoveries, clusterName) 167 | if err != nil { 168 | return nil, err 169 | } 170 | defer rows.Close() 171 | 172 | for rows.Next() { 173 | err = rows.Scan(&data) 174 | if err != nil { 175 | return nil, err 176 | } 177 | 178 | var recovery orchestrator.Recovery 179 | err = json.Unmarshal(data, &recovery) 180 | if err != nil { 181 | return nil, err 182 | } 183 | 184 | resp = append(resp, recovery) 185 | } 186 | 187 | return resp, err 188 | } 189 | 190 | func createTables(ctx context.Context, db *sql.DB) error { 191 | _, err := db.ExecContext(ctx, initDatabaseQueries) 192 | 193 | return err 194 | } 195 | -------------------------------------------------------------------------------- /internal/storage/sqlite/sqlite_test.go: -------------------------------------------------------------------------------- 1 | package sqlite 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "os" 7 | "testing" 8 | "time" 9 | 10 | "github.com/stretchr/testify/assert" 11 | 12 | "github.com/shmel1k/qumomf/internal/storage" 13 | "github.com/shmel1k/qumomf/internal/vshard" 14 | "github.com/shmel1k/qumomf/internal/vshard/orchestrator" 15 | 16 | "github.com/stretchr/testify/require" 17 | "github.com/stretchr/testify/suite" 18 | ) 19 | 20 | var ( 21 | tFileName = "tFileName.db" 22 | tClusterName = "testCluster" 23 | tSnapshot = vshard.Snapshot{ 24 | Created: 123, 25 | Routers: []vshard.Router{}, 26 | ReplicaSets: []vshard.ReplicaSet{}, 27 | } 28 | tRecovery = orchestrator.Recovery{ 29 | Type: "test type", 30 | ClusterName: tClusterName, 31 | } 32 | ) 33 | 34 | var ( 35 | dummyContext = context.Background() 36 | ) 37 | 38 | type storageSuite struct { 39 | suite.Suite 40 | db storage.Storage 41 | sqliteDB *sql.DB 42 | } 43 | 44 | func TestStorage(t *testing.T) { 45 | suite.Run(t, &storageSuite{ 46 | Suite: suite.Suite{}, 47 | }) 48 | } 49 | 50 | func (s *storageSuite) BeforeTest(_, _ string) { 51 | t := s.T() 52 | 53 | sqliteDB, err := sql.Open("sqlite3", tFileName) 54 | require.NoError(t, err) 55 | s.sqliteDB = sqliteDB 56 | 57 | db, err := New(Config{ 58 | FileName: tFileName, 59 | ConnectTimeout: 3 * time.Second, 60 | QueryTimeout: 3 * time.Second, 61 | }) 62 | require.NoError(t, err) 63 | require.NotNil(t, db) 64 | 65 | s.db = db 66 | } 67 | 68 | func (s *storageSuite) AfterTest(_, _ string) { 69 | err := os.Remove(tFileName) 70 | require.NoError(s.T(), err) 71 | } 72 | 73 | func (s *storageSuite) TestEmptyResult() { 74 | t := s.T() 75 | _, err := s.db.GetClusterSnapshot(dummyContext, tClusterName) 76 | require.Equal(t, ErrEmptyResult, err) 77 | } 78 | 79 | func (s *storageSuite) TestSaveSnapshot() { 80 | t := s.T() 81 | err := s.db.SaveSnapshot(dummyContext, tClusterName, tSnapshot) 82 | require.NoError(t, err) 83 | 84 | snap, err := s.db.GetClusterSnapshot(dummyContext, tClusterName) 85 | require.NoError(t, err) 86 | require.Equal(t, tSnapshot, snap) 87 | } 88 | 89 | func (s *storageSuite) TestSaveRecovery() { 90 | t := s.T() 91 | err := s.db.SaveRecovery(dummyContext, tRecovery) 92 | require.NoError(t, err) 93 | 94 | results, err := s.db.GetRecoveries(dummyContext, tClusterName) 95 | require.NoError(t, err) 96 | require.Equal(t, []orchestrator.Recovery{tRecovery}, results) 97 | } 98 | 99 | func (s *storageSuite) TestSaveSnapshot_ShouldNotDuplicateSnapshots() { 100 | t := s.T() 101 | var lastCreatedAt int64 102 | for i := 0; i < 3; i++ { 103 | lastCreatedAt = time.Now().Unix() 104 | err := s.db.SaveSnapshot(dummyContext, tClusterName, vshard.Snapshot{ 105 | Created: lastCreatedAt, 106 | }) 107 | require.NoError(t, err) 108 | } 109 | 110 | snap, err := s.db.GetClusterSnapshot(dummyContext, tClusterName) 111 | require.NoError(t, err) 112 | assert.Equal(t, snap.Created, lastCreatedAt) 113 | 114 | expectedSnapshotsCount := 1 115 | var snapshotsCount int 116 | row := s.sqliteDB.QueryRow("select count(1) from snapshots where cluster_name = ?", tClusterName) 117 | err = row.Scan(&snapshotsCount) 118 | require.NoError(t, err) 119 | assert.Equal(t, expectedSnapshotsCount, snapshotsCount) 120 | } 121 | -------------------------------------------------------------------------------- /internal/storage/storage.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/shmel1k/qumomf/internal/vshard" 7 | "github.com/shmel1k/qumomf/internal/vshard/orchestrator" 8 | ) 9 | 10 | type Storage interface { 11 | GetClusters(context.Context) ([]ClusterSnapshotResp, error) 12 | SaveSnapshot(context.Context, string, vshard.Snapshot) error 13 | SaveRecovery(context.Context, orchestrator.Recovery) error 14 | GetClusterSnapshot(context.Context, string) (vshard.Snapshot, error) 15 | GetRecoveries(context.Context, string) ([]orchestrator.Recovery, error) 16 | } 17 | -------------------------------------------------------------------------------- /internal/util/util.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "time" 5 | ) 6 | 7 | func Timestamp() int64 { 8 | return time.Now().Unix() 9 | } 10 | 11 | func NewBool(v bool) *bool { 12 | return &v 13 | } 14 | 15 | func NewDuration(v time.Duration) *time.Duration { 16 | return &v 17 | } 18 | 19 | func NewString(v string) *string { 20 | return &v 21 | } 22 | -------------------------------------------------------------------------------- /internal/vshard/alert.go: -------------------------------------------------------------------------------- 1 | package vshard 2 | 3 | import "strings" 4 | 5 | type AlertType string 6 | 7 | const ( 8 | AlertUnreachableMaster = "UNREACHABLE_MASTER" 9 | AlertUnreachableReplica = "UNREACHABLE_REPLICA" 10 | ) 11 | 12 | type Alert struct { 13 | Type AlertType `json:"type"` 14 | Description string `json:"description"` 15 | } 16 | 17 | func (a Alert) String() string { 18 | var sb strings.Builder 19 | sb.WriteString(string(a.Type)) 20 | sb.WriteString(": ") 21 | sb.WriteRune('"') 22 | sb.WriteString(a.Description) 23 | sb.WriteRune('"') 24 | return sb.String() 25 | } 26 | -------------------------------------------------------------------------------- /internal/vshard/cluster_test.go: -------------------------------------------------------------------------------- 1 | package vshard 2 | 3 | import ( 4 | "sort" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | "github.com/stretchr/testify/require" 9 | 10 | "github.com/shmel1k/qumomf/internal/util" 11 | ) 12 | 13 | type tExpSet struct { 14 | setUUID ReplicaSetUUID 15 | masterUUID InstanceUUID 16 | instances []tExpInst 17 | } 18 | 19 | type tExpInst struct { 20 | uuid InstanceUUID 21 | uri string 22 | readonly bool 23 | hasUpstream bool 24 | upstreamStatus UpstreamStatus 25 | upstreamPeer string 26 | replicationStatus ReplicationStatus 27 | priority int 28 | } 29 | 30 | func TestCluster_Discover(t *testing.T) { 31 | if testing.Short() { 32 | t.Skip("test requires dev env - skipping it in short mode.") 33 | } 34 | 35 | c := MockCluster() 36 | c.Discover() 37 | 38 | assert.InDelta(t, util.Timestamp(), c.LastDiscovered(), 1000) 39 | 40 | routers := c.Routers() 41 | require.Len(t, routers, 1) 42 | r := routers[0] 43 | assert.Equal(t, "127.0.0.1:9301", r.URI) 44 | 45 | sets := c.ReplicaSets() 46 | sort.SliceStable(sets, func(i, j int) bool { // predictable order 47 | return sets[j].UUID < sets[i].UUID 48 | }) 49 | 50 | expected := []tExpSet{ 51 | { 52 | setUUID: "7432f072-c00b-4498-b1a6-6d9547a8a150", 53 | masterUUID: "a94e7310-13f0-4690-b136-169599e87ba0", 54 | instances: []tExpInst{ 55 | { 56 | uuid: "a94e7310-13f0-4690-b136-169599e87ba0", 57 | uri: "qumomf_1_m.ddk:3301", 58 | readonly: false, 59 | hasUpstream: false, 60 | replicationStatus: StatusMaster, 61 | priority: 0, 62 | }, 63 | { 64 | uuid: "bd1095d1-1e73-4ceb-8e2f-6ebdc7838cb1", 65 | uri: "qumomf_1_s.ddk:3301", 66 | readonly: true, 67 | hasUpstream: true, 68 | upstreamStatus: UpstreamFollow, 69 | upstreamPeer: "qumomf@qumomf_1_s.ddk:3301", 70 | replicationStatus: StatusFollow, 71 | priority: 0, 72 | }, 73 | }, 74 | }, 75 | { 76 | setUUID: "5065fb5f-5f40-498e-af79-43887ba3d1ec", 77 | masterUUID: "a3ef657e-eb9a-4730-b420-7ea78d52797d", 78 | instances: []tExpInst{ 79 | { 80 | uuid: "a3ef657e-eb9a-4730-b420-7ea78d52797d", 81 | uri: "qumomf_2_m.ddk:3301", 82 | readonly: false, 83 | hasUpstream: false, 84 | replicationStatus: StatusMaster, 85 | priority: 0, 86 | }, 87 | { 88 | uuid: "bd64dd00-161e-4c99-8b3c-d3c4635e18d2", 89 | uri: "qumomf_2_s_1.ddk:3301", 90 | readonly: true, 91 | hasUpstream: true, 92 | upstreamStatus: UpstreamFollow, 93 | upstreamPeer: "qumomf@qumomf_2_s_1.ddk:3301", 94 | replicationStatus: StatusFollow, 95 | priority: 10, 96 | }, 97 | { 98 | uuid: "cc4cfb9c-11d8-4810-84d2-66cfbebb0f6e", 99 | uri: "qumomf_2_s_2.ddk:3301", 100 | readonly: true, 101 | hasUpstream: true, 102 | upstreamStatus: UpstreamFollow, 103 | upstreamPeer: "qumomf@qumomf_2_s_2.ddk:3301", 104 | replicationStatus: StatusFollow, 105 | priority: 5, 106 | }, 107 | }, 108 | }, 109 | } 110 | 111 | require.Len(t, sets, len(expected)) 112 | 113 | for i, set := range sets { 114 | exp := expected[i] 115 | 116 | assert.Equal(t, exp.setUUID, set.UUID) 117 | assert.Equal(t, exp.masterUUID, set.MasterUUID) 118 | 119 | require.Len(t, set.Instances, len(exp.instances)) 120 | 121 | temp := set 122 | sort.SliceStable(set.Instances, func(i, j int) bool { // predictable order 123 | return temp.Instances[j].UUID > temp.Instances[i].UUID 124 | }) 125 | 126 | for j, inst := range set.Instances { 127 | expInst := exp.instances[j] 128 | 129 | assert.Equal(t, expInst.uuid, inst.UUID) 130 | assert.Equal(t, expInst.uri, inst.URI) 131 | assert.Equal(t, expInst.readonly, inst.Readonly) 132 | assert.Equal(t, expInst.priority, inst.Priority) 133 | assert.True(t, inst.LastCheckValid) 134 | 135 | upstream := inst.Upstream 136 | if expInst.hasUpstream { 137 | require.NotNil(t, upstream) 138 | assert.Equal(t, expInst.upstreamStatus, upstream.Status) 139 | assert.Equal(t, expInst.upstreamPeer, inst.Upstream.Peer) 140 | assert.Empty(t, inst.Upstream.Message) 141 | } else { 142 | assert.Nil(t, upstream) 143 | } 144 | 145 | assert.Equal(t, expInst.replicationStatus, inst.StorageInfo.Replication.Status) 146 | } 147 | } 148 | } 149 | 150 | func TestCluster_Instance(t *testing.T) { 151 | sets := []ReplicaSet{ 152 | { 153 | UUID: "set_1", 154 | MasterUUID: "set_1_replica_1", 155 | Instances: []Instance{ 156 | { 157 | UUID: "set_1_replica_1", 158 | }, 159 | { 160 | UUID: "set_1_replica_2", 161 | }, 162 | { 163 | UUID: "set_1_replica_3", 164 | }, 165 | }, 166 | }, 167 | { 168 | UUID: "set_2", 169 | MasterUUID: "set_2_replica_2", 170 | Instances: []Instance{ 171 | { 172 | UUID: "set_2_replica_1", 173 | }, 174 | { 175 | UUID: "set_2_replica_2", 176 | }, 177 | }, 178 | }, 179 | } 180 | 181 | c := MockCluster() 182 | c.snapshot = Snapshot{ 183 | Created: util.Timestamp(), 184 | Routers: c.Routers(), 185 | ReplicaSets: sets, 186 | } 187 | 188 | tests := []struct { 189 | name string 190 | uuid InstanceUUID 191 | wantErr bool 192 | }{ 193 | { 194 | name: "KnownUUID_ShouldReturnInstance", 195 | uuid: "set_2_replica_1", 196 | wantErr: false, 197 | }, 198 | { 199 | name: "UnknownUUID_ShouldReturnErr", 200 | uuid: "set_2_replica_1000", 201 | wantErr: true, 202 | }, 203 | } 204 | 205 | for _, tv := range tests { 206 | tt := tv 207 | t.Run(tt.name, func(t *testing.T) { 208 | inst, err := c.Instance(tt.uuid) 209 | if tt.wantErr { 210 | require.NotNil(t, err) 211 | assert.Equal(t, ErrInstanceNotFound, err) 212 | } else { 213 | require.Nil(t, err) 214 | assert.Equal(t, tt.uuid, inst.UUID) 215 | } 216 | }) 217 | } 218 | } 219 | -------------------------------------------------------------------------------- /internal/vshard/instance.go: -------------------------------------------------------------------------------- 1 | package vshard 2 | 3 | import "strings" 4 | 5 | type InstanceUUID string 6 | 7 | type ReplicationStatus string 8 | type UpstreamStatus string 9 | type DownstreamStatus string 10 | 11 | type HealthCode int 12 | type HealthLevel string 13 | 14 | const ( 15 | StatusFollow ReplicationStatus = "follow" 16 | StatusMaster ReplicationStatus = "master" 17 | StatusDisconnected ReplicationStatus = "disconnected" 18 | ) 19 | 20 | const ( 21 | UpstreamAuth UpstreamStatus = "auth" // the instance is getting authenticated to connect to a replication source. 22 | UpstreamConnecting UpstreamStatus = "connecting" // the instance is trying to connect to the replications source(s) listed in its replication parameter. 23 | UpstreamDisconnected UpstreamStatus = "disconnected" // the instance is not connected to the replica set (due to network problems, not replication errors). 24 | UpstreamFollow UpstreamStatus = "follow" // the replication is in progress. 25 | UpstreamRunning UpstreamStatus = "running" // the instance’s role is “master” (non read-only) and replication is in progress. 26 | UpstreamStopped UpstreamStatus = "stopped" // the replication was stopped due to a replication error (e.g. duplicate key). 27 | UpstreamOrphan UpstreamStatus = "orphan" // the instance has not (yet) succeeded in joining the required number of masters (see orphan status). 28 | UpstreamSync UpstreamStatus = "sync" // the master and replica are synchronizing to have the same data. 29 | ) 30 | 31 | const ( 32 | DownstreamFollow DownstreamStatus = "follow" // the downstream replication is in progress. 33 | DownstreamStopped DownstreamStatus = "stopped" // the downstream replication has stopped. 34 | ) 35 | 36 | const ( 37 | // A replica set works in a regular way. 38 | HealthCodeGreen HealthCode = 0 39 | // There are some issues, but they don’t affect a replica set efficiency 40 | // (worth noticing, but don’t require immediate intervention). 41 | HealthCodeYellow HealthCode = 1 42 | // A replica set is in a degraded state. 43 | HealthCodeOrange HealthCode = 2 44 | // A replica set is disabled. 45 | HealthCodeRed HealthCode = 3 46 | // If something will change. 47 | HealthCodeUnknown HealthCode = 4 48 | ) 49 | 50 | const ( 51 | HealthLevelGreen HealthLevel = "green" 52 | HealthLevelYellow HealthLevel = "yellow" 53 | HealthLevelOrange HealthLevel = "orange" 54 | HealthLevelRed HealthLevel = "red" 55 | HealthLevelUnknown HealthLevel = "unknown" // if something will change 56 | ) 57 | 58 | type Instance struct { 59 | // ID is a short numeric identifier of the instance within the replica set. 60 | ID uint64 `json:"id"` 61 | 62 | // UUID is a global unique identifier of the instance. 63 | UUID InstanceUUID `json:"uuid"` 64 | 65 | // URI contains the host IP address and port number of the instance. 66 | URI string `json:"uri"` 67 | 68 | // Readonly indicates whether the instance is readonly or readwrite. 69 | Readonly bool `json:"readonly"` 70 | 71 | // LastCheckValid indicates whether the last check of the instance by qumomf was successful or not. 72 | LastCheckValid bool `json:"last_check_valid"` 73 | 74 | // LSN is the log sequence number (LSN) for the latest entry in the instance’s write ahead log (WAL). 75 | LSN int64 `json:"lsn"` 76 | 77 | // LSNBehindMaster is a measure of how the replica is far from master. 78 | LSNBehindMaster int64 `json:"lsn_behind_master"` 79 | 80 | // Upstream contains statistics for the replication data uploaded by the instance. 81 | Upstream *Upstream `json:"upstream"` 82 | 83 | // Downstream contains statistics for the replication data requested and downloaded from the instance. 84 | Downstream *Downstream `json:"downstream"` 85 | 86 | // StorageInfo contains the information about the storage instance. 87 | StorageInfo StorageInfo `json:"storage_info"` 88 | 89 | // VShardFingerprint is a CRC32 hash code of the shard topology configuration. 90 | VShardFingerprint uint64 `json:"vshard_fingerprint"` 91 | 92 | // Priority helps to choose the best candidate during the failover using 93 | // user promotion rules. 94 | // 95 | // If priority less than 0, instance will not participate in the master election. 96 | Priority int `json:"priority"` 97 | } 98 | 99 | // InstanceIdent contains unique UUID and URI of the instance. 100 | type InstanceIdent struct { 101 | UUID InstanceUUID 102 | URI string 103 | } 104 | 105 | func (ident InstanceIdent) String() string { 106 | var sb strings.Builder 107 | sb.Grow(len(ident.URI) + len(ident.UUID) + 1) 108 | sb.WriteString(string(ident.UUID)) 109 | sb.WriteRune('/') 110 | sb.WriteString(ident.URI) 111 | 112 | return sb.String() 113 | } 114 | 115 | // Upstream contains statistics for the replication data uploaded by the instance. 116 | type Upstream struct { 117 | // Peer contains the replication user name, host IP address and port number used for the instance. 118 | Peer string `json:"peer"` 119 | 120 | // Status is the replication status of the instance. 121 | Status UpstreamStatus `json:"status"` 122 | 123 | // Idle is the time (in seconds) since the instance received the last event from a master. 124 | // This is the primary indicator of replication health. 125 | Idle float64 `json:"idle"` 126 | 127 | // Lag is the time difference between the local time at the instance, recorded when the event was received, 128 | // and the local time at another master recorded when the event was written to the write ahead log on that master. 129 | Lag float64 `json:"lag"` 130 | 131 | // Message contains an error message in case of a degraded state, empty otherwise. 132 | Message string `json:"message"` 133 | } 134 | 135 | type Downstream struct { 136 | // Status is the replication status for downstream replications. 137 | Status DownstreamStatus `json:"status"` 138 | } 139 | 140 | func (i *Instance) Ident() InstanceIdent { 141 | return InstanceIdent{ 142 | UUID: i.UUID, 143 | URI: i.URI, 144 | } 145 | } 146 | 147 | func (i *Instance) HasAlert(t AlertType) bool { 148 | for _, a := range i.StorageInfo.Alerts { 149 | if a.Type == t { 150 | return true 151 | } 152 | } 153 | 154 | return false 155 | } 156 | 157 | func (i *Instance) CriticalCode() HealthCode { 158 | return i.StorageInfo.Status 159 | } 160 | 161 | func (i *Instance) CriticalLevel() HealthLevel { 162 | switch i.CriticalCode() { 163 | case HealthCodeGreen: 164 | return HealthLevelGreen 165 | case HealthCodeYellow: 166 | return HealthLevelYellow 167 | case HealthCodeOrange: 168 | return HealthLevelOrange 169 | case HealthCodeRed: 170 | return HealthLevelRed 171 | } 172 | 173 | return HealthLevelUnknown 174 | } 175 | 176 | func (i *Instance) Idle() float64 { 177 | if i.Upstream == nil { 178 | return 0 179 | } 180 | 181 | return i.Upstream.Idle 182 | } 183 | 184 | func (i *Instance) SameAs(another Instance) bool { 185 | return i.UUID == another.UUID && 186 | i.URI == another.URI && 187 | i.VShardFingerprint == another.VShardFingerprint && 188 | i.ID == another.ID && 189 | i.Readonly == another.Readonly && 190 | i.StorageInfo.Replication.Status == another.StorageInfo.Replication.Status && 191 | i.StorageInfo.Status == another.StorageInfo.Status 192 | } 193 | 194 | // InstanceInfo is a helper structure contains 195 | // instance info in custom format. 196 | type InstanceInfo struct { 197 | Readonly bool 198 | VShardFingerprint uint64 199 | StorageInfo StorageInfo 200 | } 201 | 202 | type StorageInfo struct { 203 | // Status indicates current state of the ReplicaSet. 204 | // It ranges from 0 (green) up to 3 (red). 205 | Status HealthCode `json:"status"` 206 | Replication Replication `json:"replication"` 207 | Bucket InstanceBucket `json:"bucket"` 208 | Alerts []Alert `json:"alerts"` 209 | } 210 | 211 | type Replication struct { 212 | Status ReplicationStatus `json:"status"` 213 | } 214 | 215 | type InstanceBucket struct { 216 | Active int64 `json:"active"` 217 | Garbage int64 `json:"garbage"` 218 | Pinned int64 `json:"pinned"` 219 | Receiving int64 `json:"receiving"` 220 | Sending int64 `json:"sending"` 221 | Total int64 `json:"total"` 222 | } 223 | -------------------------------------------------------------------------------- /internal/vshard/mock.go: -------------------------------------------------------------------------------- 1 | package vshard 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/shmel1k/qumomf/internal/config" 7 | "github.com/shmel1k/qumomf/internal/util" 8 | ) 9 | 10 | func MockCluster() *Cluster { 11 | return NewCluster("sandbox", config.ClusterConfig{ 12 | Connection: &config.ConnectConfig{ 13 | User: util.NewString("qumomf"), 14 | Password: util.NewString("qumomf"), 15 | ConnectTimeout: util.NewDuration(1 * time.Second), 16 | RequestTimeout: util.NewDuration(1 * time.Second), 17 | }, 18 | ReadOnly: util.NewBool(true), 19 | OverrideURIRules: map[string]string{ 20 | "qumomf_1_m.ddk:3301": "127.0.0.1:9303", 21 | "qumomf_1_s.ddk:3301": "127.0.0.1:9304", 22 | "qumomf_2_m.ddk:3301": "127.0.0.1:9305", 23 | "qumomf_2_s_1.ddk:3301": "127.0.0.1:9306", 24 | "qumomf_2_s_2.ddk:3301": "127.0.0.1:9307", 25 | }, 26 | Priorities: map[string]int{ 27 | "bd64dd00-161e-4c99-8b3c-d3c4635e18d2": 10, 28 | "cc4cfb9c-11d8-4810-84d2-66cfbebb0f6e": 5, 29 | }, 30 | Routers: []config.RouterConfig{ 31 | { 32 | Name: "router_1", 33 | Addr: "127.0.0.1:9301", 34 | }, 35 | }, 36 | }) 37 | } 38 | -------------------------------------------------------------------------------- /internal/vshard/orchestrator/analysis.go: -------------------------------------------------------------------------------- 1 | package orchestrator 2 | 3 | import ( 4 | "crypto/sha256" 5 | "encoding/hex" 6 | "fmt" 7 | "strconv" 8 | 9 | "github.com/shmel1k/qumomf/internal/vshard" 10 | ) 11 | 12 | type AnalysisWriteStream chan<- *ReplicationAnalysis 13 | type AnalysisReadStream <-chan *ReplicationAnalysis 14 | 15 | func NewAnalysisStream() chan *ReplicationAnalysis { 16 | return make(chan *ReplicationAnalysis) 17 | } 18 | 19 | type ReplicaSetState string 20 | 21 | const ( 22 | NoProblem ReplicaSetState = "NoProblem" 23 | DeadMaster ReplicaSetState = "DeadMaster" 24 | DeadMasterAndFollowers ReplicaSetState = "DeadMasterAndFollowers" 25 | DeadMasterAndSomeFollowers ReplicaSetState = "DeadMasterAndSomeFollowers" 26 | DeadMasterWithoutFollowers ReplicaSetState = "DeadMasterWithoutFollowers" 27 | DeadFollowers ReplicaSetState = "DeadFollowers" 28 | AllMasterFollowersNotReplicating ReplicaSetState = "AllMasterFollowersNotReplicating" 29 | NetworkProblems ReplicaSetState = "NetworkProblems" 30 | MasterMasterReplication ReplicaSetState = "MasterMasterReplication" 31 | InconsistentVShardConfiguration ReplicaSetState = "InconsistentVShardConfiguration" 32 | ) 33 | 34 | var ( 35 | ReplicaSetStateEnum = []ReplicaSetState{ 36 | NoProblem, 37 | DeadMaster, 38 | DeadMasterAndFollowers, 39 | DeadMasterAndSomeFollowers, 40 | DeadMasterWithoutFollowers, 41 | DeadFollowers, 42 | AllMasterFollowersNotReplicating, 43 | NetworkProblems, 44 | MasterMasterReplication, 45 | InconsistentVShardConfiguration, 46 | } 47 | ) 48 | 49 | type ReplicationAnalysis struct { 50 | Set vshard.ReplicaSet 51 | CountReplicas int // Total number of replicas in set 52 | CountWorkingReplicas int // Total number of successfully discovered replicas 53 | CountReplicatingReplicas int // Total number of replicas confirmed replication 54 | CountInconsistentVShardConf int // Total number of replicas with other than master vshard configuration 55 | State ReplicaSetState 56 | // DeadFollowers is a list with followers that are not currently connected to leader. 57 | DeadFollowers []string 58 | } 59 | 60 | func (a ReplicationAnalysis) String() string { 61 | return fmt.Sprintf( 62 | "[State: %s; CountReplicas: %d; CountWorkingReplicas: %d; CountReplicatingReplicas: %d]", 63 | a.State, a.CountReplicas, a.CountWorkingReplicas, a.CountReplicatingReplicas, 64 | ) 65 | } 66 | 67 | func (a ReplicationAnalysis) GetHash() (string, error) { 68 | h := sha256.New() 69 | 70 | for _, val := range []string{ 71 | string(a.State), 72 | strconv.Itoa(a.CountReplicas), 73 | strconv.Itoa(a.CountWorkingReplicas), 74 | strconv.Itoa(a.CountReplicatingReplicas), 75 | strconv.Itoa(a.CountInconsistentVShardConf), 76 | a.Set.String(), 77 | } { 78 | _, err := h.Write([]byte(val)) 79 | if err != nil { 80 | return "", err 81 | } 82 | } 83 | 84 | return hex.EncodeToString(h.Sum(nil)), nil 85 | } 86 | -------------------------------------------------------------------------------- /internal/vshard/orchestrator/config.go: -------------------------------------------------------------------------------- 1 | package orchestrator 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/shmel1k/qumomf/internal/quorum" 7 | ) 8 | 9 | type Config struct { 10 | RecoveryPollTime time.Duration 11 | DiscoveryPollTime time.Duration 12 | } 13 | 14 | type FailoverConfig struct { 15 | Hooker *Hooker 16 | Elector quorum.Elector 17 | InstanceRecoveryBlockTime time.Duration 18 | ReplicaSetRecoveryBlockTime time.Duration 19 | } 20 | -------------------------------------------------------------------------------- /internal/vshard/orchestrator/failover_test.go: -------------------------------------------------------------------------------- 1 | package orchestrator 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | "github.com/rs/zerolog" 8 | "github.com/stretchr/testify/assert" 9 | "github.com/stretchr/testify/require" 10 | "github.com/stretchr/testify/suite" 11 | 12 | "github.com/shmel1k/qumomf/internal/quorum" 13 | "github.com/shmel1k/qumomf/internal/util" 14 | "github.com/shmel1k/qumomf/internal/vshard" 15 | ) 16 | 17 | var ( 18 | tests = []struct { 19 | name string 20 | mode quorum.Mode 21 | opts quorum.Options 22 | }{ 23 | { 24 | name: "IdleElector", 25 | mode: quorum.ModeIdle, 26 | opts: quorum.Options{ 27 | ReasonableFollowerLSNLag: 10, 28 | ReasonableFollowerIdle: 1, 29 | }, 30 | }, 31 | { 32 | name: "SmartElector", 33 | mode: quorum.ModeSmart, 34 | opts: quorum.Options{ 35 | ReasonableFollowerLSNLag: 10, 36 | ReasonableFollowerIdle: 1, 37 | }, 38 | }, 39 | } 40 | ) 41 | 42 | type failoverTestSuite struct { 43 | suite.Suite 44 | 45 | cluster *vshard.Cluster 46 | failover Failover 47 | 48 | logger zerolog.Logger 49 | } 50 | 51 | func newFailoverTestSuite() *failoverTestSuite { 52 | return &failoverTestSuite{ 53 | logger: zerolog.New(zerolog.NewConsoleWriter()).With().Timestamp().Logger(), 54 | } 55 | } 56 | 57 | func (s *failoverTestSuite) SetupTest() { 58 | s.cluster = vshard.MockCluster() 59 | s.cluster.SetReadOnly(false) 60 | } 61 | 62 | func (s *failoverTestSuite) AfterTest(_, _ string) { 63 | if s.failover != nil { 64 | s.failover.Shutdown() 65 | } 66 | if s.cluster != nil { 67 | s.cluster.Shutdown() 68 | } 69 | } 70 | 71 | func (s *failoverTestSuite) Test_failover_promoteFollowerToMaster() { 72 | t := s.T() 73 | 74 | if testing.Short() { 75 | t.Skip("test requires dev env - skipping it in short mode.") 76 | } 77 | 78 | s.cluster.Discover() 79 | require.InDelta(t, util.Timestamp(), s.cluster.LastDiscovered(), 1) 80 | 81 | for _, tt := range tests { 82 | tt := tt 83 | s.Run(tt.name, func() { 84 | hooker := NewBashHooker(s.logger) 85 | elector := quorum.New(tt.mode, tt.opts) 86 | s.failover = NewDefaultFailover(s.cluster, FailoverConfig{ 87 | Hooker: hooker, 88 | Elector: elector, 89 | ReplicaSetRecoveryBlockTime: 2 * time.Second, 90 | }, s.logger) 91 | fv := s.failover.(*failover) 92 | 93 | stream := NewAnalysisStream() 94 | fv.Serve(stream) 95 | 96 | set, err := s.cluster.ReplicaSet("7432f072-c00b-4498-b1a6-6d9547a8a150") 97 | require.Nil(t, err) 98 | 99 | analysis := &ReplicationAnalysis{ 100 | Set: set, 101 | CountReplicas: 1, 102 | CountWorkingReplicas: 0, 103 | CountReplicatingReplicas: 0, 104 | State: DeadMaster, 105 | } 106 | stream <- analysis 107 | 108 | require.Eventually(t, func() bool { 109 | return fv.hasBlockedRecovery(string(set.UUID)) 110 | }, 5*time.Second, 100*time.Millisecond) 111 | require.Len(t, fv.recoveries, 1) 112 | recv := fv.recoveries[0] 113 | 114 | require.True(t, recv.IsSuccessful) 115 | assert.InDelta(t, util.Timestamp(), recv.StartTimestamp, 5) 116 | assert.InDelta(t, util.Timestamp(), recv.EndTimestamp, 2) 117 | assert.Equal(t, string(analysis.State), recv.Type) 118 | assert.Equal(t, set.MasterUUID, recv.Failed.UUID) 119 | 120 | recvSet, err := s.cluster.ReplicaSet("7432f072-c00b-4498-b1a6-6d9547a8a150") 121 | require.Nil(t, err) 122 | 123 | assert.Equal(t, recv.Successor.UUID, recvSet.MasterUUID) 124 | 125 | master, err := recvSet.Master() 126 | require.Nil(t, err) 127 | assert.False(t, master.Readonly) 128 | 129 | alive := recvSet.AliveFollowers() 130 | assert.Len(t, alive, 1) 131 | for i := range alive { 132 | assert.True(t, alive[i].Readonly) 133 | } 134 | 135 | // Ensure that anti-flapping is working. 136 | analysis.Set = recvSet 137 | stream <- analysis 138 | 139 | require.Len(t, fv.recoveries, 1) 140 | assert.Same(t, recv, fv.recoveries[0]) 141 | 142 | // Recreate the initial cluster. 143 | fv.cleanup(true) 144 | require.False(t, fv.hasBlockedRecovery(string(set.UUID))) 145 | 146 | stream <- analysis 147 | 148 | require.Eventually(t, func() bool { 149 | return fv.hasBlockedRecovery(string(set.UUID)) 150 | }, 5*time.Second, 100*time.Millisecond) 151 | require.Len(t, fv.recoveries, 1) 152 | assert.True(t, recv != fv.recoveries[0]) 153 | 154 | recv = fv.recoveries[0] 155 | assert.True(t, recv.IsSuccessful) 156 | assert.Equal(t, set.MasterUUID, recv.Successor.UUID) 157 | 158 | time.Sleep(1 * time.Second) 159 | }) 160 | } 161 | } 162 | 163 | func (s *failoverTestSuite) Test_failover_applyFollowerRoleToCoMasters() { 164 | t := s.T() 165 | 166 | if testing.Short() { 167 | t.Skip("test requires dev env - skipping it in short mode.") 168 | } 169 | 170 | s.cluster.Discover() 171 | require.InDelta(t, util.Timestamp(), s.cluster.LastDiscovered(), 1) 172 | 173 | for _, tt := range tests { 174 | tt := tt 175 | s.Run(tt.name, func() { 176 | hooker := NewBashHooker(s.logger) 177 | elector := quorum.New(tt.mode, tt.opts) 178 | s.failover = NewDefaultFailover(s.cluster, FailoverConfig{ 179 | Hooker: hooker, 180 | Elector: elector, 181 | ReplicaSetRecoveryBlockTime: 2 * time.Second, 182 | InstanceRecoveryBlockTime: 2 * time.Second, 183 | }, s.logger) 184 | fv := s.failover.(*failover) 185 | 186 | stream := NewAnalysisStream() 187 | fv.Serve(stream) 188 | 189 | set, err := s.cluster.ReplicaSet("7432f072-c00b-4498-b1a6-6d9547a8a150") 190 | require.Nil(t, err) 191 | 192 | invalidUUID := "bd1095d1-1e73-4ceb-8e2f-6ebdc7838cb1" 193 | 194 | for i := range set.Instances { 195 | inst := &set.Instances[i] 196 | if inst.UUID == vshard.InstanceUUID(invalidUUID) { 197 | inst.VShardFingerprint = 100 198 | break 199 | } 200 | } 201 | 202 | analysis := &ReplicationAnalysis{ 203 | Set: set, 204 | CountReplicas: 1, 205 | CountWorkingReplicas: 1, 206 | CountReplicatingReplicas: 1, 207 | CountInconsistentVShardConf: 1, 208 | State: MasterMasterReplication, 209 | } 210 | stream <- analysis 211 | 212 | require.Eventually(t, func() bool { 213 | return fv.hasBlockedRecovery(invalidUUID) 214 | }, 5*time.Second, 100*time.Millisecond) 215 | require.Len(t, fv.recoveries, 1) 216 | recv := fv.recoveries[0] 217 | 218 | assert.True(t, recv.IsSuccessful) 219 | assert.Equal(t, string(analysis.State), recv.Type) 220 | assert.Equal(t, invalidUUID, recv.ScopeKey()) 221 | assert.False(t, recv.Expired()) 222 | 223 | time.Sleep(1 * time.Second) 224 | }) 225 | } 226 | } 227 | 228 | func TestFailover(t *testing.T) { 229 | suite.Run(t, newFailoverTestSuite()) 230 | } 231 | -------------------------------------------------------------------------------- /internal/vshard/orchestrator/hook.go: -------------------------------------------------------------------------------- 1 | package orchestrator 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "os" 7 | "os/exec" 8 | "strconv" 9 | "strings" 10 | "time" 11 | 12 | "github.com/rs/zerolog" 13 | ) 14 | 15 | type HookType string 16 | 17 | const ( 18 | HookPreFailover HookType = "PreFailover" 19 | HookPostSuccessfulFailover HookType = "PostSuccessfulFailover" 20 | HookPostUnsuccessfulFailover HookType = "PostUnsuccessfulFailover" 21 | ) 22 | 23 | const ( 24 | ShellBash = "bash" 25 | ) 26 | 27 | type Hooker struct { 28 | processesShellCommand string 29 | processes map[HookType][]string 30 | timeout time.Duration 31 | timeoutAsync time.Duration 32 | logger zerolog.Logger 33 | } 34 | 35 | func NewHooker(shell string, logger zerolog.Logger) *Hooker { 36 | return &Hooker{ 37 | processesShellCommand: shell, 38 | processes: make(map[HookType][]string), 39 | timeout: 2 * time.Second, 40 | timeoutAsync: 10 * time.Minute, 41 | logger: logger, 42 | } 43 | } 44 | 45 | func NewBashHooker(logger zerolog.Logger) *Hooker { 46 | return NewHooker(ShellBash, logger) 47 | } 48 | 49 | // SetTimeout sets timeout for basic hook. 50 | func (h *Hooker) SetTimeout(t time.Duration) { 51 | h.timeout = t 52 | } 53 | 54 | // SetTimeoutAsync sets timeout for async hook. 55 | func (h *Hooker) SetTimeoutAsync(t time.Duration) { 56 | h.timeoutAsync = t 57 | } 58 | 59 | func (h *Hooker) AddHook(t HookType, commands ...string) { 60 | hooks, ok := h.processes[t] 61 | if !ok { 62 | hooks = make([]string, 0, len(commands)) 63 | } 64 | hooks = append(hooks, commands...) 65 | h.processes[t] = hooks 66 | } 67 | 68 | // ExecuteProcesses executes a list of processes. 69 | func (h *Hooker) ExecuteProcesses(t HookType, recv *Recovery, failOnError bool) (err error) { 70 | processes := h.processes[t] 71 | if len(processes) == 0 { 72 | h.logger.Info().Msgf("No %s hooks to run", t) 73 | return nil 74 | } 75 | 76 | h.logger.Info().Msgf("Running %d %s hooks", len(processes), t) 77 | for i, process := range processes { 78 | command, async := prepareCommand(process, recv) 79 | env := applyEnvironmentVariables(recv) 80 | 81 | fullDescription := fmt.Sprintf("%s hook %d of %d", t, i+1, len(processes)) 82 | if async { 83 | fullDescription = fmt.Sprintf("%s (async)", fullDescription) 84 | } 85 | if async { 86 | go func() { 87 | ctx, cancel := context.WithTimeout(context.Background(), h.timeoutAsync) 88 | // Ignore errors, it is async process. 89 | _ = h.executeProcess(ctx, command, env, fullDescription) 90 | cancel() 91 | }() 92 | } else { 93 | ctx, cancel := context.WithTimeout(context.Background(), h.timeout) 94 | cmdErr := h.executeProcess(ctx, command, env, fullDescription) 95 | cancel() 96 | 97 | if cmdErr != nil { 98 | if failOnError { 99 | h.logger.Warn().Msgf("Not running further %s hooks", t) 100 | return cmdErr 101 | } 102 | if err == nil { 103 | // Keep first error encountered. 104 | err = cmdErr 105 | } 106 | } 107 | } 108 | } 109 | h.logger.Info().Msgf("Done running %s hooks", t) 110 | 111 | return err 112 | } 113 | 114 | func (h *Hooker) executeProcess(ctx context.Context, command string, env []string, fullDescription string) error { 115 | // Log the command to be run and record how long it takes as this may be useful. 116 | h.logger.Info().Msgf("Running %s: %s", fullDescription, command) 117 | start := time.Now() 118 | 119 | cmd := exec.CommandContext(ctx, h.processesShellCommand, "-c", command) //nolint:gosec 120 | cmd.Env = env 121 | 122 | err := cmd.Run() 123 | if err == nil { 124 | h.logger.Info().Msgf("Completed %s in %v", fullDescription, time.Since(start)) 125 | } else { 126 | h.logger.Error().Msgf("Execution of %s failed in %v with error: %v", fullDescription, time.Since(start), err) 127 | } 128 | 129 | return err 130 | } 131 | 132 | // prepareCommand replaces agreed-upon placeholders with recovery data. 133 | func prepareCommand(command string, recv *Recovery) (result string, async bool) { 134 | command = strings.TrimSpace(command) 135 | if strings.HasPrefix(command, "&") { 136 | command = strings.TrimLeft(command, "&") 137 | async = true 138 | } 139 | 140 | analysis := recv.AnalysisEntry 141 | 142 | command = strings.Replace(command, "{failureType}", recv.Type, -1) 143 | command = strings.Replace(command, "{failedUUID}", string(recv.Failed.UUID), -1) 144 | command = strings.Replace(command, "{failedURI}", recv.Failed.URI, -1) 145 | command = strings.Replace(command, "{failureCluster}", recv.ClusterName, -1) 146 | command = strings.Replace(command, "{failureReplicaSetUUID}", string(recv.SetUUID), -1) 147 | command = strings.Replace(command, "{countFollowers}", strconv.Itoa(analysis.CountReplicas), -1) 148 | command = strings.Replace(command, "{countWorkingFollowers}", strconv.Itoa(analysis.CountWorkingReplicas), -1) 149 | command = strings.Replace(command, "{countReplicatingFollowers}", strconv.Itoa(analysis.CountReplicatingReplicas), -1) 150 | command = strings.Replace(command, "{countInconsistentVShardConf}", strconv.Itoa(analysis.CountInconsistentVShardConf), -1) 151 | command = strings.Replace(command, "{isSuccessful}", fmt.Sprint(recv.IsSuccessful), -1) 152 | 153 | if recv.IsSuccessful { 154 | command = strings.Replace(command, "{successorUUID}", string(recv.Successor.UUID), -1) 155 | command = strings.Replace(command, "{successorURI}", recv.Successor.URI, -1) 156 | } 157 | 158 | return command, async 159 | } 160 | 161 | // applyEnvironmentVariables sets the relevant environment variables for a recovery. 162 | //nolint:gocritic 163 | func applyEnvironmentVariables(recv *Recovery) []string { 164 | env := os.Environ() 165 | 166 | env = append(env, fmt.Sprintf("QUM_FAILURE_TYPE=%s", recv.Type)) 167 | env = append(env, fmt.Sprintf("QUM_FAILED_UUID=%s", string(recv.Failed.UUID))) 168 | env = append(env, fmt.Sprintf("QUM_FAILED_URI=%s", recv.Failed.URI)) 169 | env = append(env, fmt.Sprintf("QUM_FAILURE_CLUSTER=%s", recv.ClusterName)) 170 | env = append(env, fmt.Sprintf("QUM_FAILURE_REPLICA_SET_UUID=%s", recv.SetUUID)) 171 | env = append(env, fmt.Sprintf("QUM_COUNT_FOLLOWERS=%d", recv.AnalysisEntry.CountReplicas)) 172 | env = append(env, fmt.Sprintf("QUM_COUNT_WORKING_FOLLOWERS=%d", recv.AnalysisEntry.CountWorkingReplicas)) 173 | env = append(env, fmt.Sprintf("QUM_COUNT_REPLICATING_FOLLOWERS=%d", recv.AnalysisEntry.CountReplicatingReplicas)) 174 | env = append(env, fmt.Sprintf("QUM_COUNT_INCONSISTENT_VSHARD_CONF=%d", recv.AnalysisEntry.CountInconsistentVShardConf)) 175 | env = append(env, fmt.Sprintf("QUM_IS_SUCCESSFUL=%t", recv.IsSuccessful)) 176 | 177 | if recv.IsSuccessful { 178 | env = append(env, fmt.Sprintf("QUM_SUCCESSOR_UUID=%s", recv.Successor.UUID)) 179 | env = append(env, fmt.Sprintf("QUM_SUCCESSOR_URI=%s", recv.Successor.URI)) 180 | } 181 | 182 | return env 183 | } 184 | -------------------------------------------------------------------------------- /internal/vshard/orchestrator/hook_test.go: -------------------------------------------------------------------------------- 1 | package orchestrator 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "os" 7 | "path" 8 | "strconv" 9 | "strings" 10 | "testing" 11 | "time" 12 | 13 | "github.com/rs/zerolog" 14 | "github.com/stretchr/testify/assert" 15 | "github.com/stretchr/testify/require" 16 | "github.com/stretchr/testify/suite" 17 | 18 | "github.com/shmel1k/qumomf/internal/vshard" 19 | ) 20 | 21 | type hookerTestSuite struct { 22 | suite.Suite 23 | 24 | failed vshard.InstanceIdent 25 | analysis *ReplicationAnalysis 26 | recv *Recovery 27 | 28 | logger zerolog.Logger 29 | } 30 | 31 | func (s *hookerTestSuite) SetupTest() { 32 | s.analysis = mockAnalysis 33 | s.failed = vshard.InstanceIdent{ 34 | UUID: s.analysis.Set.MasterUUID, 35 | URI: "localhost:8080", 36 | } 37 | s.recv = NewRecovery(RecoveryScopeSet, s.failed, *s.analysis) 38 | s.recv.ClusterName = "sandbox" 39 | s.logger = zerolog.New(zerolog.NewConsoleWriter()).With().Timestamp().Logger() 40 | } 41 | 42 | func TestHooker(t *testing.T) { 43 | suite.Run(t, &hookerTestSuite{}) 44 | } 45 | 46 | func (s *hookerTestSuite) Test_ExecuteProcesses() { 47 | t := s.T() 48 | 49 | env := []string{ 50 | fmt.Sprintf("QUM_FAILURE_TYPE=%s", s.analysis.State), 51 | fmt.Sprintf("QUM_FAILED_UUID=%s", s.failed.UUID), 52 | fmt.Sprintf("QUM_FAILED_URI=%s", s.failed.URI), 53 | fmt.Sprintf("QUM_FAILURE_CLUSTER=%s", s.recv.ClusterName), 54 | fmt.Sprintf("QUM_FAILURE_REPLICA_SET_UUID=%s", s.analysis.Set.UUID), 55 | fmt.Sprintf("QUM_COUNT_FOLLOWERS=%d", s.analysis.CountReplicas), 56 | fmt.Sprintf("QUM_COUNT_WORKING_FOLLOWERS=%d", s.analysis.CountWorkingReplicas), 57 | fmt.Sprintf("QUM_COUNT_REPLICATING_FOLLOWERS=%d", s.analysis.CountReplicatingReplicas), 58 | fmt.Sprintf("QUM_COUNT_INCONSISTENT_VSHARD_CONF=%d", s.analysis.CountInconsistentVShardConf), 59 | fmt.Sprintf("IS_SUCCESSFUL=%t", s.recv.IsSuccessful), 60 | } 61 | 62 | hooker := NewBashHooker(s.logger) 63 | 64 | filename := genUniqueFilename(os.TempDir(), "qumomf-hook-test") 65 | require.NotEmpty(t, filename) 66 | defer func() { 67 | _ = os.Remove(filename) 68 | }() 69 | 70 | hooker.AddHook(HookPreFailover, fmt.Sprintf("touch %s", filename)) 71 | hooker.AddHook(HookPreFailover, fmt.Sprintf("echo $(printenv | grep QUM) >> %s", filename)) 72 | 73 | hooker.AddHook(HookPostSuccessfulFailover, fmt.Sprintf("rm -f %s", filename)) 74 | 75 | err := hooker.ExecuteProcesses(HookPreFailover, s.recv, true) 76 | require.Nil(t, err) 77 | 78 | f, err := os.Open(filename) 79 | require.Nil(t, err) 80 | defer func() { _ = f.Close() }() 81 | 82 | foundEnv := make([]string, 0, len(env)) 83 | scanner := bufio.NewScanner(f) 84 | for scanner.Scan() { 85 | line := scanner.Text() 86 | for _, e := range env { 87 | if strings.Contains(line, e) { 88 | foundEnv = append(foundEnv, e) 89 | } 90 | } 91 | } 92 | 93 | assert.Equal(t, env, foundEnv) 94 | 95 | err = hooker.ExecuteProcesses(HookPostSuccessfulFailover, s.recv, false) 96 | assert.Nil(t, err) 97 | } 98 | 99 | func (s *hookerTestSuite) Test_ExecuteProcesses_Async() { 100 | t := s.T() 101 | 102 | hooker := NewBashHooker(s.logger) 103 | 104 | start := time.Now() 105 | hooker.AddHook(HookPreFailover, "&sleep 3") 106 | err := hooker.ExecuteProcesses(HookPreFailover, s.recv, true) 107 | end := time.Now() 108 | assert.Nil(t, err) 109 | assert.WithinDuration(t, start, end, 1*time.Second) 110 | } 111 | 112 | func (s *hookerTestSuite) Test_ExecuteProcesses_CheckArguments() { 113 | t := s.T() 114 | 115 | s.recv.IsSuccessful = true 116 | s.recv.Successor = vshard.InstanceIdent{ 117 | UUID: "successor_uuid", 118 | URI: "successor_uri", 119 | } 120 | 121 | args := []string{ 122 | "failureType", 123 | "failedUUID", 124 | "failedURI", 125 | "failureCluster", 126 | "failureReplicaSetUUID", 127 | "countFollowers", 128 | "countWorkingFollowers", 129 | "countReplicatingFollowers", 130 | "countInconsistentVShardConf", 131 | "isSuccessful", 132 | "successorUUID", 133 | "successorURI", 134 | } 135 | expectedArgs := []string{ 136 | fmt.Sprintf("failureType=%s", s.analysis.State), 137 | fmt.Sprintf("failedUUID=%s", s.failed.UUID), 138 | fmt.Sprintf("failedURI=%s", s.failed.URI), 139 | fmt.Sprintf("failureCluster=%s", s.recv.ClusterName), 140 | fmt.Sprintf("failureReplicaSetUUID=%s", s.analysis.Set.UUID), 141 | fmt.Sprintf("countFollowers=%d", s.analysis.CountReplicas), 142 | fmt.Sprintf("countWorkingFollowers=%d", s.analysis.CountWorkingReplicas), 143 | fmt.Sprintf("countReplicatingFollowers=%d", s.analysis.CountReplicatingReplicas), 144 | fmt.Sprintf("countInconsistentVShardConf=%d", s.analysis.CountInconsistentVShardConf), 145 | fmt.Sprintf("isSuccessful=%t", s.recv.IsSuccessful), 146 | fmt.Sprintf("successorUUID=%s", s.recv.Successor.UUID), 147 | fmt.Sprintf("successorURI=%s", s.recv.Successor.URI), 148 | } 149 | 150 | hooker := NewBashHooker(s.logger) 151 | 152 | filename := genUniqueFilename(os.TempDir(), "qumomf-hook-test") 153 | require.NotEmpty(t, filename) 154 | defer func() { 155 | _ = os.Remove(filename) 156 | }() 157 | 158 | hooker.AddHook(HookPreFailover, fmt.Sprintf("touch %s", filename)) 159 | for _, arg := range args { 160 | hooker.AddHook(HookPreFailover, fmt.Sprintf("echo '%s={%s}' >> %s", arg, arg, filename)) 161 | } 162 | hooker.AddHook(HookPostSuccessfulFailover, fmt.Sprintf("rm -f %s", filename)) 163 | 164 | err := hooker.ExecuteProcesses(HookPreFailover, s.recv, true) 165 | require.Nil(t, err) 166 | 167 | f, err := os.Open(filename) 168 | require.Nil(t, err) 169 | defer func() { _ = f.Close() }() 170 | 171 | foundArgs := make([]string, 0, len(expectedArgs)) 172 | scanner := bufio.NewScanner(f) 173 | for scanner.Scan() { 174 | line := scanner.Text() 175 | for _, e := range expectedArgs { 176 | if strings.Contains(line, e) { 177 | foundArgs = append(foundArgs, e) 178 | } 179 | } 180 | } 181 | 182 | assert.Equal(t, expectedArgs, foundArgs) 183 | 184 | err = hooker.ExecuteProcesses(HookPostSuccessfulFailover, s.recv, false) 185 | assert.Nil(t, err) 186 | } 187 | 188 | func genUniqueFilename(dir, prefix string) string { 189 | name := "" 190 | rand := uint32(0) 191 | for i := 0; i < 1000; i++ { 192 | name = path.Join(dir, prefix+nextRandom(&rand)) 193 | _, err := os.Stat(name) 194 | if os.IsExist(err) { 195 | continue 196 | } 197 | break 198 | } 199 | return name 200 | } 201 | 202 | func reseed() uint32 { 203 | return uint32(time.Now().UnixNano() + int64(os.Getpid())) 204 | } 205 | 206 | func nextRandom(rand *uint32) string { 207 | r := *rand 208 | if r == 0 { 209 | r = reseed() 210 | } 211 | r = r*1664525 + 1013904223 // constants from Numerical Recipes 212 | *rand = r 213 | 214 | return strconv.Itoa(int(1e9 + r%1e9))[1:] 215 | } 216 | -------------------------------------------------------------------------------- /internal/vshard/orchestrator/instance_utils.go: -------------------------------------------------------------------------------- 1 | package orchestrator 2 | 3 | import "github.com/shmel1k/qumomf/internal/vshard" 4 | 5 | // InstanceFailoverSorter sorts instances by priority to update vshard configuration. 6 | type InstanceFailoverSorter struct { 7 | instances []vshard.Instance 8 | } 9 | 10 | func NewInstanceFailoverSorter(instances []vshard.Instance) *InstanceFailoverSorter { 11 | return &InstanceFailoverSorter{ 12 | instances: instances, 13 | } 14 | } 15 | 16 | func (s *InstanceFailoverSorter) Len() int { 17 | return len(s.instances) 18 | } 19 | 20 | func (s *InstanceFailoverSorter) Swap(i, j int) { 21 | s.instances[i], s.instances[j] = s.instances[j], s.instances[i] 22 | } 23 | 24 | func (s *InstanceFailoverSorter) Less(i, j int) bool { 25 | left, right := s.instances[i], s.instances[j] 26 | 27 | // Prefer replicas which was polled successfully last time. 28 | if left.LastCheckValid && !right.LastCheckValid { 29 | return true 30 | } 31 | // Prefer instance which has unreachable master. 32 | if left.HasAlert(vshard.AlertUnreachableMaster) && !right.HasAlert(vshard.AlertUnreachableMaster) { 33 | return true 34 | } 35 | if right.HasAlert(vshard.AlertUnreachableMaster) && !left.HasAlert(vshard.AlertUnreachableMaster) { 36 | return false 37 | } 38 | // Prefer most up to date replica. 39 | return left.Idle() < right.Idle() 40 | } 41 | -------------------------------------------------------------------------------- /internal/vshard/orchestrator/instance_utils_test.go: -------------------------------------------------------------------------------- 1 | package orchestrator 2 | 3 | import ( 4 | "sort" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | 9 | "github.com/shmel1k/qumomf/internal/vshard" 10 | ) 11 | 12 | func TestInstanceFailoverSorter(t *testing.T) { 13 | instances := []vshard.Instance{ 14 | { 15 | UUID: "replica_1", 16 | LastCheckValid: false, 17 | Upstream: &vshard.Upstream{ 18 | Status: vshard.UpstreamFollow, 19 | Idle: 0, 20 | }, 21 | StorageInfo: vshard.StorageInfo{ 22 | Replication: vshard.Replication{ 23 | Status: "", 24 | }, 25 | Alerts: nil, 26 | }, 27 | }, 28 | { 29 | UUID: "replica_2", 30 | LastCheckValid: true, 31 | Upstream: &vshard.Upstream{ 32 | Status: vshard.UpstreamFollow, 33 | Idle: 0.032492704689502716, 34 | }, 35 | StorageInfo: vshard.StorageInfo{ 36 | Replication: vshard.Replication{ 37 | Status: vshard.StatusDisconnected, 38 | }, 39 | Alerts: []vshard.Alert{ 40 | { 41 | Type: vshard.AlertUnreachableMaster, 42 | Description: "Master of replicaset is unreachable: disconnected", 43 | }, 44 | }, 45 | }, 46 | }, 47 | { 48 | UUID: "replica_3", 49 | LastCheckValid: true, 50 | Upstream: &vshard.Upstream{ 51 | Status: vshard.UpstreamFollow, 52 | Idle: 3.479430440813303, 53 | }, 54 | StorageInfo: vshard.StorageInfo{ 55 | Replication: vshard.Replication{ 56 | Status: vshard.StatusDisconnected, 57 | }, 58 | Alerts: []vshard.Alert{ 59 | { 60 | Type: vshard.AlertUnreachableMaster, 61 | Description: "Master of replicaset is unreachable: disconnected", 62 | }, 63 | }, 64 | }, 65 | }, 66 | { 67 | UUID: "replica_4", 68 | LastCheckValid: true, 69 | Upstream: &vshard.Upstream{ 70 | Status: vshard.UpstreamFollow, 71 | Idle: 0.079430440813303, 72 | }, 73 | StorageInfo: vshard.StorageInfo{ 74 | Replication: vshard.Replication{ 75 | Status: vshard.StatusFollow, 76 | }, 77 | Alerts: nil, 78 | }, 79 | }, 80 | { 81 | UUID: "replica_5", 82 | LastCheckValid: true, 83 | Upstream: &vshard.Upstream{ 84 | Status: vshard.UpstreamFollow, 85 | Idle: 0, 86 | }, 87 | StorageInfo: vshard.StorageInfo{ 88 | Replication: vshard.Replication{ 89 | Status: vshard.StatusMaster, 90 | }, 91 | Alerts: nil, 92 | }, 93 | }, 94 | } 95 | 96 | sort.Sort(NewInstanceFailoverSorter(instances)) 97 | 98 | expected := []vshard.InstanceUUID{ 99 | "replica_2", "replica_3", "replica_5", "replica_4", "replica_1", 100 | } 101 | 102 | got := make([]vshard.InstanceUUID, 0, len(instances)) 103 | for _, inst := range instances { 104 | got = append(got, inst.UUID) 105 | } 106 | 107 | assert.Equal(t, expected, got) 108 | } 109 | -------------------------------------------------------------------------------- /internal/vshard/orchestrator/monitor.go: -------------------------------------------------------------------------------- 1 | package orchestrator 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/rs/zerolog" 7 | 8 | "github.com/shmel1k/qumomf/internal/metrics" 9 | "github.com/shmel1k/qumomf/internal/vshard" 10 | ) 11 | 12 | type Monitor interface { 13 | Serve() AnalysisReadStream 14 | Shutdown() 15 | } 16 | 17 | func NewMonitor(cluster *vshard.Cluster, cfg Config, logger zerolog.Logger) Monitor { 18 | return &storageMonitor{ 19 | config: cfg, 20 | cluster: cluster, 21 | stop: make(chan struct{}, 1), 22 | logger: logger, 23 | } 24 | } 25 | 26 | type storageMonitor struct { 27 | config Config 28 | 29 | cluster *vshard.Cluster 30 | analyzed int64 // identifier of the last analyzed cluster topology 31 | 32 | stop chan struct{} 33 | logger zerolog.Logger 34 | } 35 | 36 | func (m *storageMonitor) Serve() AnalysisReadStream { 37 | stream := NewAnalysisStream() 38 | go m.continuousDiscovery(stream) 39 | 40 | return stream 41 | } 42 | 43 | func (m *storageMonitor) continuousDiscovery(stream AnalysisWriteStream) { 44 | recoveryTick := time.NewTicker(m.config.RecoveryPollTime) 45 | defer recoveryTick.Stop() 46 | discoveryTick := time.NewTicker(m.config.DiscoveryPollTime) 47 | defer discoveryTick.Stop() 48 | 49 | continuousDiscoveryStartTime := time.Now() 50 | checkAndRecoverWaitPeriod := 3 * m.config.DiscoveryPollTime 51 | 52 | runCheckAndRecoverOperationsTimeRipe := func() bool { 53 | return time.Since(continuousDiscoveryStartTime) >= checkAndRecoverWaitPeriod 54 | } 55 | 56 | for { 57 | select { 58 | case <-m.stop: 59 | return 60 | case <-discoveryTick.C: 61 | go m.cluster.Discover() 62 | case <-recoveryTick.C: 63 | // NOTE: we might improve this place checking the delay only on start. 64 | if runCheckAndRecoverOperationsTimeRipe() { 65 | m.checkCluster(stream) 66 | } else { 67 | m.logger.Info().Msgf("Waiting for %+v seconds to pass before running failure detection/recovery", checkAndRecoverWaitPeriod.Seconds()) 68 | } 69 | } 70 | } 71 | } 72 | 73 | func (m *storageMonitor) checkCluster(stream AnalysisWriteStream) { 74 | discovered := m.cluster.LastDiscovered() 75 | if discovered <= m.analyzed { 76 | // Prevent too much analyzes of the same cluster topology. 77 | return 78 | } 79 | 80 | for _, set := range m.cluster.ReplicaSets() { 81 | go func(set vshard.ReplicaSet) { 82 | logger := m.logger.With().Str("replica_set", string(set.UUID)).Logger() 83 | analysis := analyze(set, logger) 84 | if analysis != nil { 85 | stream <- analysis 86 | 87 | for _, state := range ReplicaSetStateEnum { 88 | active := state == analysis.State 89 | metrics.SetShardState(m.cluster.Name, string(set.UUID), string(state), active) 90 | } 91 | } 92 | }(set) 93 | } 94 | 95 | m.analyzed = discovered 96 | } 97 | 98 | func analyze(set vshard.ReplicaSet, logger zerolog.Logger) *ReplicationAnalysis { //nolint: gocyclo 99 | master, err := set.Master() 100 | if err != nil { 101 | // Something really weird but we have data inconsistency here. 102 | // Master UUID not found in ReplicaSet. 103 | logger.Error().Msgf("Fatal analyze error: master '%s' not found in given snapshot. Likely an internal error", set.MasterUUID) 104 | return nil 105 | } 106 | 107 | countReplicas := 0 108 | countWorkingReplicas := 0 109 | countReplicatingReplicas := 0 110 | countInconsistentVShardConf := 0 111 | masterMasterReplication := false 112 | followers := set.Followers() 113 | var deadFollowers []string 114 | for i := range followers { 115 | r := &followers[i] 116 | countReplicas++ 117 | if r.LastCheckValid { 118 | countWorkingReplicas++ 119 | 120 | status := r.StorageInfo.Replication.Status 121 | switch status { 122 | case vshard.StatusFollow: 123 | countReplicatingReplicas++ 124 | case vshard.StatusMaster: 125 | countReplicatingReplicas++ 126 | masterMasterReplication = true 127 | logger.Warn().Msgf("Found M-M replication ('%s'-'%s'), ('%s'-'%s')", set.MasterUUID, r.UUID, set.MasterURI, r.URI) 128 | default: 129 | deadFollowers = append(deadFollowers, string(r.UUID)) 130 | } 131 | 132 | if r.VShardFingerprint != master.VShardFingerprint { 133 | countInconsistentVShardConf++ 134 | } 135 | } 136 | } 137 | 138 | isMasterDead := !master.LastCheckValid // relative to qumomf 139 | 140 | state := NoProblem 141 | if isMasterDead && countWorkingReplicas == countReplicas && countReplicatingReplicas == 0 { 142 | if countReplicas == 0 { 143 | state = DeadMasterWithoutFollowers 144 | } else { 145 | state = DeadMaster 146 | } 147 | } else if isMasterDead && countWorkingReplicas <= countReplicas && countReplicatingReplicas == 0 { 148 | if countWorkingReplicas == 0 { 149 | state = DeadMasterAndFollowers 150 | } else { 151 | state = DeadMasterAndSomeFollowers 152 | } 153 | } else if isMasterDead && countReplicatingReplicas != 0 { 154 | state = NetworkProblems 155 | } else if !isMasterDead && countReplicas > 0 && countReplicatingReplicas == 0 { 156 | state = AllMasterFollowersNotReplicating 157 | } else if countInconsistentVShardConf > 0 { 158 | if masterMasterReplication { 159 | state = MasterMasterReplication 160 | } else { 161 | state = InconsistentVShardConfiguration 162 | } 163 | } else if !isMasterDead && countReplicas > 0 && countReplicatingReplicas < countReplicas { 164 | state = DeadFollowers 165 | } 166 | 167 | return &ReplicationAnalysis{ 168 | Set: set, 169 | CountReplicas: countReplicas, 170 | CountWorkingReplicas: countWorkingReplicas, 171 | CountReplicatingReplicas: countReplicatingReplicas, 172 | CountInconsistentVShardConf: countInconsistentVShardConf, 173 | State: state, 174 | DeadFollowers: deadFollowers, 175 | } 176 | } 177 | 178 | func (m *storageMonitor) Shutdown() { 179 | m.stop <- struct{}{} 180 | } 181 | -------------------------------------------------------------------------------- /internal/vshard/orchestrator/monitor_test.go: -------------------------------------------------------------------------------- 1 | package orchestrator 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/rs/zerolog" 8 | "github.com/stretchr/testify/assert" 9 | "github.com/stretchr/testify/require" 10 | 11 | "github.com/shmel1k/qumomf/internal/vshard" 12 | ) 13 | 14 | func Test_storageMonitor_analyze(t *testing.T) { 15 | logger := zerolog.Nop() 16 | 17 | tests := []struct { 18 | name string 19 | set vshard.ReplicaSet 20 | want *ReplicationAnalysis 21 | }{ 22 | { 23 | name: "NoProblem", 24 | set: vshard.ReplicaSet{ 25 | UUID: "set_1", 26 | MasterUUID: "replica_1", 27 | Instances: []vshard.Instance{ 28 | mockInstance(1, true, vshard.StatusMaster), 29 | mockInstance(2, true, vshard.StatusFollow), 30 | mockInstance(3, true, vshard.StatusFollow), 31 | }, 32 | }, 33 | want: &ReplicationAnalysis{ 34 | CountReplicas: 2, 35 | CountWorkingReplicas: 2, 36 | CountReplicatingReplicas: 2, 37 | State: NoProblem, 38 | }, 39 | }, 40 | { 41 | name: "NoProblem_MasterMasterReplication", 42 | set: vshard.ReplicaSet{ 43 | UUID: "set_1", 44 | MasterUUID: "replica_1", 45 | Instances: []vshard.Instance{ 46 | mockInstance(1, true, vshard.StatusMaster), 47 | mockInstance(2, true, vshard.StatusMaster), 48 | mockInstance(3, true, vshard.StatusFollow), 49 | }, 50 | }, 51 | want: &ReplicationAnalysis{ 52 | CountReplicas: 2, 53 | CountWorkingReplicas: 2, 54 | CountReplicatingReplicas: 2, 55 | State: NoProblem, 56 | }, 57 | }, 58 | { 59 | name: "DeadMaster", 60 | set: vshard.ReplicaSet{ 61 | UUID: "set_1", 62 | MasterUUID: "replica_1", 63 | Instances: []vshard.Instance{ 64 | mockInstance(1, false, vshard.StatusMaster), 65 | mockInstance(2, true, vshard.StatusDisconnected), 66 | mockInstance(3, true, vshard.StatusDisconnected), 67 | }, 68 | }, 69 | want: &ReplicationAnalysis{ 70 | CountReplicas: 2, 71 | CountWorkingReplicas: 2, 72 | CountReplicatingReplicas: 0, 73 | State: DeadMaster, 74 | }, 75 | }, 76 | { 77 | name: "DeadMaster", 78 | set: vshard.ReplicaSet{ 79 | UUID: "set_1", 80 | MasterUUID: "replica_1", 81 | Instances: []vshard.Instance{ 82 | mockInstance(1, false, vshard.StatusMaster), 83 | mockInstance(2, true, vshard.StatusDisconnected), 84 | mockInstance(3, true, vshard.StatusDisconnected), 85 | }, 86 | }, 87 | want: &ReplicationAnalysis{ 88 | CountReplicas: 2, 89 | CountWorkingReplicas: 2, 90 | CountReplicatingReplicas: 0, 91 | State: DeadMaster, 92 | }, 93 | }, 94 | { 95 | name: "DeadMasterAndFollowers", 96 | set: vshard.ReplicaSet{ 97 | UUID: "set_1", 98 | MasterUUID: "replica_1", 99 | Instances: []vshard.Instance{ 100 | mockInstance(1, false, vshard.StatusMaster), 101 | mockInstance(2, false, vshard.StatusDisconnected), 102 | mockInstance(3, false, vshard.StatusDisconnected), 103 | }, 104 | }, 105 | want: &ReplicationAnalysis{ 106 | CountReplicas: 2, 107 | CountWorkingReplicas: 0, 108 | CountReplicatingReplicas: 0, 109 | State: DeadMasterAndFollowers, 110 | }, 111 | }, 112 | { 113 | name: "DeadMasterAndSomeFollowers", 114 | set: vshard.ReplicaSet{ 115 | UUID: "set_1", 116 | MasterUUID: "replica_1", 117 | Instances: []vshard.Instance{ 118 | mockInstance(1, false, vshard.StatusMaster), 119 | mockInstance(2, false, vshard.StatusDisconnected), 120 | mockInstance(3, true, vshard.StatusDisconnected), 121 | }, 122 | }, 123 | want: &ReplicationAnalysis{ 124 | CountReplicas: 2, 125 | CountWorkingReplicas: 1, 126 | CountReplicatingReplicas: 0, 127 | State: DeadMasterAndSomeFollowers, 128 | }, 129 | }, 130 | { 131 | name: "DeadMasterWithoutFollowers", 132 | set: vshard.ReplicaSet{ 133 | UUID: "set_1", 134 | MasterUUID: "replica_1", 135 | Instances: []vshard.Instance{ 136 | mockInstance(1, false, vshard.StatusMaster), 137 | }, 138 | }, 139 | want: &ReplicationAnalysis{ 140 | CountReplicas: 0, 141 | CountWorkingReplicas: 0, 142 | CountReplicatingReplicas: 0, 143 | State: DeadMasterWithoutFollowers, 144 | }, 145 | }, 146 | { 147 | name: "DeadFollowers", 148 | set: vshard.ReplicaSet{ 149 | UUID: "set_1", 150 | MasterUUID: "replica_1", 151 | Instances: []vshard.Instance{ 152 | mockInstance(1, true, vshard.StatusMaster), 153 | mockInstance(2, true, vshard.StatusFollow), 154 | mockInstance(3, false, vshard.StatusDisconnected), 155 | mockInstance(4, false, vshard.StatusDisconnected), 156 | }, 157 | }, 158 | want: &ReplicationAnalysis{ 159 | CountReplicas: 3, 160 | CountWorkingReplicas: 1, 161 | CountReplicatingReplicas: 1, 162 | State: DeadFollowers, 163 | }, 164 | }, 165 | { 166 | name: "AllMasterFollowersNotReplicating", 167 | set: vshard.ReplicaSet{ 168 | UUID: "set_1", 169 | MasterUUID: "replica_1", 170 | Instances: []vshard.Instance{ 171 | mockInstance(1, true, vshard.StatusMaster), 172 | mockInstance(2, false, vshard.StatusFollow), 173 | mockInstance(3, true, vshard.StatusDisconnected), 174 | }, 175 | }, 176 | want: &ReplicationAnalysis{ 177 | CountReplicas: 2, 178 | CountWorkingReplicas: 1, 179 | CountReplicatingReplicas: 0, 180 | State: AllMasterFollowersNotReplicating, 181 | }, 182 | }, 183 | { 184 | name: "NetworkProblems", 185 | set: vshard.ReplicaSet{ 186 | UUID: "set_1", 187 | MasterUUID: "replica_1", 188 | Instances: []vshard.Instance{ 189 | mockInstance(1, false, vshard.StatusMaster), 190 | mockInstance(2, true, vshard.StatusFollow), 191 | mockInstance(3, true, vshard.StatusFollow), 192 | }, 193 | }, 194 | want: &ReplicationAnalysis{ 195 | CountReplicas: 2, 196 | CountWorkingReplicas: 2, 197 | CountReplicatingReplicas: 2, 198 | State: NetworkProblems, 199 | }, 200 | }, 201 | { 202 | name: "MasterMasterReplication", 203 | set: vshard.ReplicaSet{ 204 | UUID: "set_1", 205 | MasterUUID: "replica_1", 206 | Instances: []vshard.Instance{ 207 | mockInstance(1, true, vshard.StatusMaster), 208 | mockInvalidVShardConf(mockInstance(2, true, vshard.StatusMaster)), 209 | mockInstance(3, true, vshard.StatusFollow), 210 | }, 211 | }, 212 | want: &ReplicationAnalysis{ 213 | CountReplicas: 2, 214 | CountWorkingReplicas: 2, 215 | CountReplicatingReplicas: 2, 216 | CountInconsistentVShardConf: 1, 217 | State: MasterMasterReplication, 218 | }, 219 | }, 220 | { 221 | name: "InconsistentVShardConfiguration", 222 | set: vshard.ReplicaSet{ 223 | UUID: "set_1", 224 | MasterUUID: "replica_1", 225 | Instances: []vshard.Instance{ 226 | mockInstance(1, true, vshard.StatusMaster), 227 | mockInstance(2, true, vshard.StatusFollow), 228 | mockInvalidVShardConf(mockInstance(3, true, vshard.StatusFollow)), 229 | }, 230 | }, 231 | want: &ReplicationAnalysis{ 232 | CountReplicas: 2, 233 | CountWorkingReplicas: 2, 234 | CountReplicatingReplicas: 2, 235 | CountInconsistentVShardConf: 1, 236 | State: InconsistentVShardConfiguration, 237 | }, 238 | }, 239 | } 240 | 241 | for _, tv := range tests { 242 | tt := tv 243 | t.Run(tt.name, func(t *testing.T) { 244 | got := analyze(tt.set, logger) 245 | require.NotNil(t, got) 246 | assert.Equal(t, tt.want.CountReplicas, got.CountReplicas) 247 | assert.Equal(t, tt.want.CountWorkingReplicas, got.CountWorkingReplicas) 248 | assert.Equal(t, tt.want.CountReplicatingReplicas, got.CountReplicatingReplicas) 249 | assert.Equal(t, tt.want.State, got.State) 250 | }) 251 | } 252 | } 253 | 254 | func mockInstance(id int, valid bool, status vshard.ReplicationStatus) vshard.Instance { 255 | return vshard.Instance{ 256 | UUID: vshard.InstanceUUID(fmt.Sprintf("replica_%d", id)), 257 | URI: fmt.Sprintf("replica_%d:3306", id), 258 | LastCheckValid: valid, 259 | StorageInfo: vshard.StorageInfo{ 260 | Replication: vshard.Replication{ 261 | Status: status, 262 | }, 263 | }, 264 | } 265 | } 266 | 267 | func mockInvalidVShardConf(inst vshard.Instance) vshard.Instance { 268 | inst.VShardFingerprint = 1000 269 | return inst 270 | } 271 | -------------------------------------------------------------------------------- /internal/vshard/orchestrator/recovery.go: -------------------------------------------------------------------------------- 1 | package orchestrator 2 | 3 | import ( 4 | "context" 5 | "strconv" 6 | "strings" 7 | "time" 8 | 9 | "github.com/shmel1k/qumomf/internal/util" 10 | "github.com/shmel1k/qumomf/internal/vshard" 11 | ) 12 | 13 | // recoveryTimeFormat is a datetime format used in logs. 14 | const recoveryTimeFormat = time.RFC3339 15 | 16 | // RecoveryFunc is a function executed by orchestrator in case of failover. 17 | // Returns list of recoveries applied on cluster, replica set or instances. 18 | type RecoveryFunc func(ctx context.Context, analysis *ReplicationAnalysis) []*Recovery 19 | 20 | type RecoveryScope string 21 | 22 | const ( 23 | RecoveryScopeInstance RecoveryScope = "instance" 24 | RecoveryScopeSet RecoveryScope = "replica set" 25 | ) 26 | 27 | // Recovery describes the applied recovery to a cluster, replica set or instance. 28 | type Recovery struct { 29 | Type string 30 | Scope RecoveryScope 31 | AnalysisEntry ReplicationAnalysis 32 | ClusterName string 33 | SetUUID vshard.ReplicaSetUUID 34 | Failed vshard.InstanceIdent 35 | Successor vshard.InstanceIdent 36 | IsSuccessful bool 37 | StartTimestamp int64 38 | EndTimestamp int64 39 | Expiration int64 40 | } 41 | 42 | func NewRecovery(scope RecoveryScope, failed vshard.InstanceIdent, analysis ReplicationAnalysis) *Recovery { 43 | return &Recovery{ 44 | Type: string(analysis.State), 45 | Scope: scope, 46 | AnalysisEntry: analysis, 47 | SetUUID: analysis.Set.UUID, 48 | Failed: failed, 49 | StartTimestamp: util.Timestamp(), 50 | } 51 | } 52 | 53 | func (r *Recovery) ExpireAfter(ttl time.Duration) { 54 | exp := time.Now().Add(ttl).Unix() 55 | r.Expiration = exp 56 | } 57 | 58 | // ScopeKey returns the UUID of the replica set or instance 59 | // where recovery has been applied on. 60 | func (r *Recovery) ScopeKey() string { 61 | switch r.Scope { 62 | case RecoveryScopeInstance: 63 | return string(r.Failed.UUID) 64 | case RecoveryScopeSet: 65 | return string(r.SetUUID) 66 | } 67 | 68 | return r.ClusterName 69 | } 70 | 71 | func (r *Recovery) Expired() bool { 72 | now := util.Timestamp() 73 | return r.Expiration < now 74 | } 75 | 76 | func (r *Recovery) String() string { 77 | start := time.Unix(r.StartTimestamp, 0).Format(recoveryTimeFormat) 78 | end := time.Unix(r.EndTimestamp, 0).Format(recoveryTimeFormat) 79 | duration := r.EndTimestamp - r.StartTimestamp 80 | 81 | var sb strings.Builder 82 | sb.WriteString("set: ") 83 | sb.WriteString(string(r.SetUUID)) 84 | sb.WriteString(", type: ") 85 | sb.WriteString(r.Type) 86 | sb.WriteString(", failed: ") 87 | sb.WriteString(string(r.Failed.UUID)) 88 | if r.Successor.UUID != "" { 89 | sb.WriteString(", successor: ") 90 | sb.WriteString(string(r.Successor.UUID)) 91 | } 92 | sb.WriteString(", success: ") 93 | sb.WriteString(strconv.FormatBool(r.IsSuccessful)) 94 | sb.WriteString(", period: ") 95 | sb.WriteString(start) 96 | sb.WriteString(" - ") 97 | sb.WriteString(end) 98 | sb.WriteString(", duration: ") 99 | sb.WriteString(strconv.FormatInt(duration, 10)) 100 | sb.WriteString("s") 101 | 102 | return sb.String() 103 | } 104 | -------------------------------------------------------------------------------- /internal/vshard/orchestrator/recovery_test.go: -------------------------------------------------------------------------------- 1 | package orchestrator 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | "github.com/stretchr/testify/assert" 8 | 9 | "github.com/shmel1k/qumomf/internal/util" 10 | "github.com/shmel1k/qumomf/internal/vshard" 11 | ) 12 | 13 | var mockAnalysis = &ReplicationAnalysis{ 14 | Set: vshard.ReplicaSet{ 15 | UUID: "set_uuid", 16 | MasterUUID: "master_uuid", 17 | }, 18 | CountReplicas: 3, 19 | CountWorkingReplicas: 0, 20 | CountReplicatingReplicas: 0, 21 | State: DeadMaster, 22 | } 23 | 24 | func TestNewRecovery(t *testing.T) { 25 | ttl := 100 * time.Second 26 | failed := vshard.InstanceIdent{ 27 | UUID: "master", 28 | URI: "localhost:3301", 29 | } 30 | r := NewRecovery(RecoveryScopeSet, failed, *mockAnalysis) 31 | r.ExpireAfter(ttl) 32 | 33 | assert.Equal(t, *mockAnalysis, r.AnalysisEntry) 34 | assert.Equal(t, mockAnalysis.Set.UUID, r.SetUUID) 35 | assert.Equal(t, failed.UUID, r.Failed.UUID) 36 | assert.Equal(t, failed.URI, r.Failed.URI) 37 | assert.Equal(t, string(DeadMaster), r.Type) 38 | assert.InDelta(t, util.Timestamp(), r.StartTimestamp, 5) 39 | assert.InDelta(t, time.Now().Add(ttl).UTC().Unix(), r.Expiration, 1) 40 | } 41 | 42 | func TestRecovery_Expired(t *testing.T) { 43 | ttl := 1 * time.Second 44 | failed := vshard.InstanceIdent{ 45 | UUID: "master", 46 | URI: "localhost:3301", 47 | } 48 | r := NewRecovery(RecoveryScopeInstance, failed, *mockAnalysis) 49 | r.ExpireAfter(ttl) 50 | 51 | assert.False(t, r.Expired()) 52 | time.Sleep(2 * ttl) 53 | assert.True(t, r.Expired()) 54 | } 55 | -------------------------------------------------------------------------------- /internal/vshard/orchestrator/sampler.go: -------------------------------------------------------------------------------- 1 | package orchestrator 2 | 3 | import ( 4 | "sync" 5 | 6 | "github.com/rs/zerolog" 7 | ) 8 | 9 | type sampler struct { 10 | enabled bool 11 | fingerprints map[string]string 12 | mu *sync.RWMutex 13 | } 14 | 15 | func (s *sampler) sample(analysis *ReplicationAnalysis) zerolog.Level { 16 | if !s.enabled { 17 | return zerolog.InfoLevel 18 | } 19 | 20 | got, err := analysis.GetHash() 21 | if err != nil { 22 | return zerolog.InfoLevel 23 | } 24 | s.mu.RLock() 25 | found, ok := s.fingerprints[string(analysis.Set.UUID)] 26 | s.mu.RUnlock() 27 | if ok && found == got { 28 | return zerolog.DebugLevel 29 | } 30 | 31 | s.mu.Lock() 32 | defer s.mu.Unlock() 33 | s.fingerprints[string(analysis.Set.UUID)] = got 34 | 35 | return zerolog.InfoLevel 36 | } 37 | -------------------------------------------------------------------------------- /internal/vshard/parser_test.go: -------------------------------------------------------------------------------- 1 | package vshard 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | "time" 7 | 8 | "github.com/stretchr/testify/assert" 9 | "github.com/stretchr/testify/require" 10 | ) 11 | 12 | func TestParseRouterInfo(t *testing.T) { 13 | if testing.Short() { 14 | t.Skip("test requires dev env - skipping it in short mode.") 15 | } 16 | 17 | conn := setupConnection("127.0.0.1:9301", ConnOptions{ 18 | User: "qumomf", 19 | Password: "qumomf", 20 | ConnectTimeout: 1 * time.Second, 21 | QueryTimeout: 1 * time.Second, 22 | }) 23 | 24 | ctx := context.Background() 25 | ctx, cancel := context.WithTimeout(ctx, 5*time.Second) 26 | defer cancel() 27 | 28 | resp := conn.Exec(ctx, vshardRouterInfoQuery) 29 | if resp.Error != nil { 30 | require.Nil(t, resp.Error, resp.String()) 31 | } 32 | 33 | info, err := ParseRouterInfo(resp.Data) 34 | require.Nil(t, err) 35 | 36 | assert.Equal(t, int64(0), info.Status) 37 | assert.Empty(t, info.Alerts) 38 | 39 | b := RouterBucket{ 40 | AvailableRO: 0, 41 | AvailableRW: 120, 42 | Unknown: 0, 43 | Unreachable: 0, 44 | } 45 | assert.Equal(t, b, info.Bucket) 46 | 47 | expected := RouterReplicaSetParameters{ 48 | "7432f072-c00b-4498-b1a6-6d9547a8a150": RouterInstanceParameters{ 49 | UUID: "a94e7310-13f0-4690-b136-169599e87ba0", 50 | Status: InstanceAvailable, 51 | URI: "qumomf_1_m.ddk:3301", 52 | NetworkTimeout: 0.5, 53 | }, 54 | "5065fb5f-5f40-498e-af79-43887ba3d1ec": RouterInstanceParameters{ 55 | UUID: "a3ef657e-eb9a-4730-b420-7ea78d52797d", 56 | Status: InstanceAvailable, 57 | URI: "qumomf_2_m.ddk:3301", 58 | NetworkTimeout: 0.5, 59 | }, 60 | } 61 | 62 | require.Len(t, info.ReplicaSets, len(expected)) 63 | for uuid, set := range info.ReplicaSets { 64 | expSet, ok := expected[uuid] 65 | require.True(t, ok) 66 | 67 | assert.Equal(t, expSet.UUID, set.UUID) 68 | assert.Equal(t, expSet.Status, set.Status) 69 | assert.Equal(t, expSet.URI, set.URI) 70 | assert.InDelta(t, expSet.NetworkTimeout, set.NetworkTimeout, 1.0) 71 | } 72 | } 73 | 74 | func TestParseReplication(t *testing.T) { 75 | if testing.Short() { 76 | t.Skip("test requires dev env - skipping it in short mode.") 77 | } 78 | 79 | conn := setupConnection("127.0.0.1:9303", ConnOptions{ 80 | User: "qumomf", 81 | Password: "qumomf", 82 | ConnectTimeout: 1 * time.Second, 83 | QueryTimeout: 1 * time.Second, 84 | }) 85 | 86 | ctx := context.Background() 87 | ctx, cancel := context.WithTimeout(ctx, 5*time.Second) 88 | defer cancel() 89 | 90 | resp := conn.Exec(ctx, vshardBoxInfoQuery) 91 | if resp.Error != nil { 92 | require.Nil(t, resp.Error, resp.String()) 93 | } 94 | 95 | data, err := ParseReplication(resp.Data) 96 | require.Nil(t, err) 97 | 98 | assert.Len(t, data, 2) 99 | 100 | master := data[0] 101 | assert.Equal(t, uint64(1), master.ID) 102 | assert.Equal(t, InstanceUUID("a94e7310-13f0-4690-b136-169599e87ba0"), master.UUID) 103 | assert.Equal(t, "", master.URI) // No upstream data for master, URI must be set manually 104 | assert.Equal(t, int64(105), master.LSN) 105 | assert.Equal(t, int64(0), master.LSNBehindMaster) 106 | assert.Nil(t, master.Upstream) 107 | assert.Nil(t, master.Downstream) 108 | 109 | replica := data[1] 110 | assert.Equal(t, uint64(2), replica.ID) 111 | assert.Equal(t, InstanceUUID("bd1095d1-1e73-4ceb-8e2f-6ebdc7838cb1"), replica.UUID) 112 | assert.Equal(t, "qumomf_1_s.ddk:3301", replica.URI) 113 | assert.Equal(t, int64(0), replica.LSN) 114 | assert.Equal(t, int64(0), replica.LSNBehindMaster) 115 | require.NotNil(t, replica.Upstream) 116 | assert.Equal(t, UpstreamFollow, replica.Upstream.Status) 117 | require.NotNil(t, replica.Downstream) 118 | assert.Equal(t, DownstreamFollow, replica.Downstream.Status) 119 | } 120 | 121 | func TestParseInstanceInfo(t *testing.T) { 122 | if testing.Short() { 123 | t.Skip("test requires dev env - skipping it in short mode.") 124 | } 125 | 126 | conn := setupConnection("127.0.0.1:9304", ConnOptions{ 127 | User: "qumomf", 128 | Password: "qumomf", 129 | ConnectTimeout: 1 * time.Second, 130 | QueryTimeout: 1 * time.Second, 131 | }) 132 | 133 | ctx := context.Background() 134 | ctx, cancel := context.WithTimeout(ctx, 5*time.Second) 135 | defer cancel() 136 | 137 | resp := conn.Exec(ctx, vshardInstanceInfoQuery) 138 | if resp.Error != nil { 139 | require.Nil(t, resp.Error, resp.String()) 140 | } 141 | 142 | data, err := ParseInstanceInfo(resp.Data) 143 | require.Nil(t, err) 144 | 145 | assert.True(t, data.Readonly) 146 | assert.Equal(t, uint64(251215738), data.VShardFingerprint) 147 | 148 | storage := &data.StorageInfo 149 | assert.Equal(t, HealthCodeGreen, storage.Status) 150 | 151 | replication := &storage.Replication 152 | assert.Equal(t, StatusFollow, replication.Status) 153 | 154 | assert.Empty(t, storage.Alerts) 155 | 156 | b := InstanceBucket{ 157 | Active: 60, 158 | Garbage: 0, 159 | Pinned: 0, 160 | Receiving: 0, 161 | Sending: 0, 162 | Total: 60, 163 | } 164 | assert.Equal(t, b, storage.Bucket) 165 | } 166 | 167 | func TestParseReplication_TableTests(t *testing.T) { 168 | tests := []struct { 169 | name string 170 | data [][]interface{} 171 | want []Instance 172 | }{ 173 | { 174 | name: "regular_case", 175 | data: [][]interface{}{ 176 | { 177 | map[string]interface{}{ 178 | "id": int64(1), 179 | "uuid": "uuid", 180 | "lsn": int64(1), 181 | "lsn_behind_master": int64(1), 182 | "downstream": map[string]interface{}{ 183 | "idle": int64(1), 184 | "status": "follow", 185 | }, 186 | "upstream": map[string]interface{}{ 187 | "idle": int64(1), 188 | "lag": int64(1), 189 | "peer": "test@test", 190 | "status": "follow", 191 | }, 192 | }, 193 | }, 194 | }, 195 | want: []Instance{ 196 | { 197 | URI: "test", 198 | ID: 1, 199 | UUID: "uuid", 200 | LSN: 1, 201 | LSNBehindMaster: 1, 202 | Downstream: &Downstream{Status: DownstreamFollow}, 203 | Upstream: &Upstream{ 204 | Status: UpstreamFollow, 205 | Idle: 1, 206 | Peer: "test@test", 207 | Lag: 1, 208 | }, 209 | }, 210 | }, 211 | }, 212 | { 213 | name: "response_array_with_gaps_should_ignore_it", 214 | data: [][]interface{}{ 215 | { 216 | nil, 217 | map[string]interface{}{ 218 | "id": int64(1), 219 | "uuid": "uuid", 220 | "lsn": int64(1), 221 | "lsn_behind_master": int64(1), 222 | }, 223 | }, 224 | }, 225 | want: []Instance{ 226 | { 227 | ID: 1, 228 | UUID: "uuid", 229 | LSN: 1, 230 | LSNBehindMaster: 1, 231 | }, 232 | }, 233 | }, 234 | } 235 | 236 | for _, tt := range tests { 237 | tc := tt 238 | t.Run(tt.name, func(t *testing.T) { 239 | got, err := ParseReplication(tc.data) 240 | 241 | assert.NoError(t, err) 242 | assert.Equal(t, tc.want, got) 243 | }) 244 | } 245 | } 246 | -------------------------------------------------------------------------------- /internal/vshard/replicaset.go: -------------------------------------------------------------------------------- 1 | package vshard 2 | 3 | import ( 4 | "fmt" 5 | "sort" 6 | "strconv" 7 | "strings" 8 | ) 9 | 10 | type ReplicaSetUUID string 11 | 12 | type ReplicaSet struct { 13 | // UUID is an unique identifier of the replica set in the cluster. 14 | UUID ReplicaSetUUID `json:"uuid"` 15 | 16 | // MasterUUID is UUID of current master in the replica set. 17 | MasterUUID InstanceUUID `json:"master_uuid"` 18 | 19 | // MasterURI is URI of current master in the replica set. 20 | MasterURI string `json:"master_uri"` 21 | 22 | // Instances contains replication statistics and storage info 23 | // for all instances in the replica set in regard to the current master. 24 | Instances []Instance `json:"instances"` 25 | } 26 | 27 | func (set ReplicaSet) Copy() ReplicaSet { 28 | r := ReplicaSet{ 29 | UUID: set.UUID, 30 | MasterUUID: set.MasterUUID, 31 | MasterURI: set.MasterURI, 32 | Instances: make([]Instance, len(set.Instances)), 33 | } 34 | copy(r.Instances, set.Instances) 35 | 36 | return r 37 | } 38 | 39 | func (set ReplicaSet) SameAs(another *ReplicaSet) bool { 40 | if set.UUID != another.UUID { 41 | return false 42 | } 43 | 44 | n := len(set.Instances) 45 | if set.MasterUUID != another.MasterUUID || n != len(another.Instances) { 46 | return false 47 | } 48 | 49 | instances := set.Instances 50 | anotherInstances := another.Instances 51 | sortInstances(instances) 52 | sortInstances(anotherInstances) 53 | 54 | for i := 0; i < n; i++ { 55 | if !instances[i].SameAs(anotherInstances[i]) { 56 | return false 57 | } 58 | } 59 | 60 | return true 61 | } 62 | 63 | func sortInstances(instances []Instance) { 64 | sort.Slice(instances, func(i, j int) bool { 65 | return instances[i].UUID < instances[j].UUID 66 | }) 67 | } 68 | 69 | func (set ReplicaSet) HealthStatus() (code HealthCode, level HealthLevel) { 70 | master, err := set.Master() 71 | if err != nil { 72 | return HealthCodeUnknown, HealthLevelUnknown 73 | } 74 | 75 | return master.CriticalCode(), master.CriticalLevel() 76 | } 77 | 78 | func (set ReplicaSet) Followers() []Instance { 79 | if len(set.Instances) == 0 { 80 | return []Instance{} 81 | } 82 | 83 | followers := make([]Instance, 0, len(set.Instances)-1) 84 | for _, inst := range set.Instances { //nolint:gocritic 85 | if inst.UUID != set.MasterUUID { 86 | followers = append(followers, inst) 87 | } 88 | } 89 | 90 | return followers 91 | } 92 | 93 | func (set ReplicaSet) AliveFollowers() []Instance { 94 | if len(set.Instances) == 0 { 95 | return []Instance{} 96 | } 97 | 98 | followers := make([]Instance, 0, len(set.Instances)-1) 99 | for _, inst := range set.Instances { // nolint:gocritic 100 | if inst.UUID == set.MasterUUID { 101 | continue 102 | } 103 | 104 | if !inst.LastCheckValid { 105 | continue 106 | } 107 | 108 | upstream := inst.Upstream 109 | downstream := inst.Downstream 110 | 111 | if upstream == nil && downstream == nil { 112 | continue 113 | } 114 | 115 | if upstream != nil { 116 | if upstream.Status != UpstreamDisconnected && upstream.Status != UpstreamStopped { 117 | followers = append(followers, inst) 118 | } 119 | } else if downstream != nil { 120 | if downstream.Status != DownstreamStopped { 121 | followers = append(followers, inst) 122 | } 123 | } 124 | } 125 | 126 | return followers 127 | } 128 | 129 | func (set ReplicaSet) Master() (Instance, error) { 130 | for i := range set.Instances { 131 | inst := &set.Instances[i] 132 | if inst.UUID == set.MasterUUID { 133 | return *inst, nil 134 | } 135 | } 136 | 137 | return Instance{}, fmt.Errorf("replica set `%s` has invalid topology snapshot: master `%s` not found", set.UUID, set.MasterUUID) 138 | } 139 | 140 | func (set ReplicaSet) String() string { 141 | // Minimal style, only important info. 142 | var sb strings.Builder 143 | sb.WriteString("id: ") 144 | sb.WriteString(string(set.UUID)) 145 | sb.WriteString("; master_uuid: ") 146 | sb.WriteString(string(set.MasterUUID)) 147 | sb.WriteString("; master_uri: ") 148 | sb.WriteString(set.MasterURI) 149 | sb.WriteString("; size: ") 150 | sb.WriteString(strconv.Itoa(len(set.Instances))) 151 | sb.WriteString("; health: ") 152 | _, cl := set.HealthStatus() 153 | sb.WriteString(string(cl)) 154 | 155 | if cl == HealthLevelGreen { 156 | return sb.String() 157 | } 158 | 159 | sb.WriteString("; alerts: [") 160 | prettyList := false 161 | for i := range set.Instances { 162 | inst := &set.Instances[i] 163 | alerts := inst.StorageInfo.Alerts 164 | if len(alerts) > 0 { 165 | if prettyList { 166 | sb.WriteString(", ") 167 | } 168 | sb.WriteString(inst.URI) 169 | sb.WriteString(" -> ") 170 | for j, alert := range alerts { 171 | sb.WriteString(alert.String()) 172 | if j != len(alerts)-1 { 173 | sb.WriteString(", ") 174 | } 175 | } 176 | prettyList = true 177 | } 178 | } 179 | sb.WriteString("]") 180 | 181 | return sb.String() 182 | } 183 | -------------------------------------------------------------------------------- /internal/vshard/replicaset_test.go: -------------------------------------------------------------------------------- 1 | package vshard 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestReplicaSet_Followers(t *testing.T) { 10 | type fields struct { 11 | UUID ReplicaSetUUID 12 | MasterUUID InstanceUUID 13 | Instances []Instance 14 | } 15 | 16 | tests := []struct { 17 | name string 18 | fields fields 19 | want []InstanceUUID 20 | }{ 21 | { 22 | name: "NoFollowers", 23 | fields: fields{ 24 | UUID: "uuid_1", 25 | MasterUUID: "master_uuid_1", 26 | Instances: []Instance{}, 27 | }, 28 | want: []InstanceUUID{}, 29 | }, 30 | { 31 | name: "MultipleFollowers", 32 | fields: fields{ 33 | UUID: "uuid_1", 34 | MasterUUID: "master_uuid_1", 35 | Instances: []Instance{ 36 | { 37 | UUID: "master_uuid_1", 38 | }, 39 | { 40 | UUID: "replica_uuid_1", 41 | }, 42 | { 43 | UUID: "replica_uuid_2", 44 | }, 45 | }, 46 | }, 47 | want: []InstanceUUID{"replica_uuid_1", "replica_uuid_2"}, 48 | }, 49 | } 50 | 51 | for _, tv := range tests { 52 | tt := tv 53 | t.Run(tt.name, func(t *testing.T) { 54 | set := ReplicaSet{ 55 | UUID: tt.fields.UUID, 56 | MasterUUID: tt.fields.MasterUUID, 57 | Instances: tt.fields.Instances, 58 | } 59 | 60 | followers := set.Followers() 61 | got := make([]InstanceUUID, 0, len(followers)) 62 | for _, f := range followers { 63 | got = append(got, f.UUID) 64 | } 65 | 66 | assert.Equal(t, tt.want, got) 67 | }) 68 | } 69 | } 70 | 71 | func TestReplicaSet_AliveFollowers(t *testing.T) { 72 | type fields struct { 73 | UUID ReplicaSetUUID 74 | MasterUUID InstanceUUID 75 | Instances []Instance 76 | } 77 | 78 | tests := []struct { 79 | name string 80 | fields fields 81 | want []InstanceUUID 82 | }{ 83 | { 84 | name: "NoFollowers", 85 | fields: fields{ 86 | UUID: "uuid_1", 87 | MasterUUID: "master_uuid_1", 88 | Instances: []Instance{}, 89 | }, 90 | want: []InstanceUUID{}, 91 | }, 92 | { 93 | name: "MultipleFollowers", 94 | fields: fields{ 95 | UUID: "uuid_1", 96 | MasterUUID: "master_uuid_1", 97 | Instances: []Instance{ 98 | { 99 | UUID: "master_uuid_1", 100 | Upstream: &Upstream{ 101 | Status: UpstreamRunning, 102 | }, 103 | }, 104 | { 105 | UUID: "replica_uuid_1", 106 | LastCheckValid: true, 107 | Upstream: &Upstream{ 108 | Status: UpstreamFollow, 109 | }, 110 | Downstream: &Downstream{ 111 | Status: DownstreamFollow, 112 | }, 113 | }, 114 | { 115 | UUID: "replica_uuid_2", 116 | LastCheckValid: true, 117 | Upstream: &Upstream{ 118 | Status: UpstreamFollow, 119 | }, 120 | Downstream: &Downstream{ 121 | Status: DownstreamFollow, 122 | }, 123 | }, 124 | { 125 | UUID: "replica_uuid_3", 126 | LastCheckValid: true, 127 | Upstream: &Upstream{ 128 | Status: UpstreamStopped, 129 | }, 130 | }, 131 | { 132 | UUID: "replica_uuid_4", 133 | LastCheckValid: false, 134 | Upstream: &Upstream{ 135 | Status: UpstreamFollow, 136 | }, 137 | }, 138 | }, 139 | }, 140 | want: []InstanceUUID{"replica_uuid_1", "replica_uuid_2"}, 141 | }, 142 | } 143 | 144 | for _, tv := range tests { 145 | tt := tv 146 | t.Run(tt.name, func(t *testing.T) { 147 | set := &ReplicaSet{ 148 | UUID: tt.fields.UUID, 149 | MasterUUID: tt.fields.MasterUUID, 150 | Instances: tt.fields.Instances, 151 | } 152 | 153 | followers := set.AliveFollowers() 154 | got := make([]InstanceUUID, 0, len(followers)) 155 | for _, f := range followers { 156 | got = append(got, f.UUID) 157 | } 158 | 159 | assert.Equal(t, tt.want, got) 160 | }) 161 | } 162 | } 163 | -------------------------------------------------------------------------------- /internal/vshard/router.go: -------------------------------------------------------------------------------- 1 | package vshard 2 | 3 | type InstanceStatus string 4 | 5 | const ( 6 | InstanceAvailable InstanceStatus = "available" 7 | InstanceUnreachable InstanceStatus = "unreachable" 8 | InstanceMissing InstanceStatus = "missing" 9 | ) 10 | 11 | type Router struct { 12 | URI string `json:"uri"` 13 | Info RouterInfo `json:"info"` 14 | } 15 | 16 | func NewRouter(uri string) Router { 17 | return Router{ 18 | URI: uri, 19 | Info: RouterInfo{ 20 | Status: -1, 21 | }, 22 | } 23 | } 24 | 25 | type RouterInfo struct { 26 | LastSeen int64 `json:"last_seen"` 27 | ReplicaSets RouterReplicaSetParameters `json:"replica_sets"` 28 | Bucket RouterBucket `json:"bucket"` 29 | Status int64 `json:"status"` 30 | Alerts []Alert `json:"alerts"` 31 | } 32 | 33 | type RouterReplicaSetParameters map[ReplicaSetUUID]RouterInstanceParameters 34 | 35 | type RouterInstanceParameters struct { 36 | UUID InstanceUUID `json:"uuid"` 37 | Status InstanceStatus `json:"status"` 38 | URI string `json:"uri"` 39 | NetworkTimeout float64 `json:"network_timeout"` 40 | } 41 | 42 | // RouterBucket represents bucket parameters known to the router. 43 | type RouterBucket struct { 44 | // AvailableRO is the number of buckets known to the router 45 | // and available for read requests. 46 | AvailableRO int64 `json:"available_ro"` 47 | 48 | // AvailableRW is the number of buckets known to the router 49 | // and available for read and write requests. 50 | AvailableRW int64 `json:"available_rw"` 51 | 52 | // Unknown is the number of buckets known to the router 53 | // but unavailable for any requests. 54 | Unknown int64 `json:"unknown"` 55 | 56 | // Unreachable is the number of buckets 57 | // whose replica sets are not known to the router. 58 | Unreachable int64 `json:"unreachable"` 59 | } 60 | -------------------------------------------------------------------------------- /internal/vshard/snapshot.go: -------------------------------------------------------------------------------- 1 | package vshard 2 | 3 | // Snapshot is a copy of the cluster topology in given time. 4 | type Snapshot struct { 5 | Created int64 `json:"created"` 6 | Routers []Router `json:"routers"` 7 | ReplicaSets []ReplicaSet `json:"replica_sets"` 8 | priorities map[string]int 9 | } 10 | 11 | func (s *Snapshot) ClusterHealthLevel() HealthLevel { 12 | hc := HealthCodeGreen 13 | for _, replicaSet := range s.ReplicaSets { 14 | gotHC, _ := replicaSet.HealthStatus() 15 | if gotHC > hc { 16 | hc = gotHC 17 | } 18 | } 19 | 20 | return s.healthLevel(hc) 21 | } 22 | 23 | func (s *Snapshot) healthLevel(healthCode HealthCode) HealthLevel { 24 | switch healthCode { 25 | case HealthCodeGreen: 26 | return HealthLevelGreen 27 | case HealthCodeYellow: 28 | return HealthLevelYellow 29 | case HealthCodeOrange: 30 | return HealthLevelOrange 31 | case HealthCodeRed: 32 | return HealthLevelRed 33 | } 34 | 35 | return HealthLevelUnknown 36 | } 37 | 38 | func (s *Snapshot) Copy() Snapshot { 39 | dst := Snapshot{ 40 | Created: s.Created, 41 | Routers: make([]Router, len(s.Routers)), 42 | ReplicaSets: make([]ReplicaSet, 0, len(s.ReplicaSets)), 43 | priorities: make(map[string]int), 44 | } 45 | 46 | for key, value := range s.priorities { 47 | dst.priorities[key] = value 48 | } 49 | 50 | for _, set := range s.ReplicaSets { 51 | dst.ReplicaSets = append(dst.ReplicaSets, set.Copy()) 52 | } 53 | 54 | copy(dst.Routers, s.Routers) 55 | 56 | return dst 57 | } 58 | 59 | func (s *Snapshot) TopologyOf(uuid ReplicaSetUUID) ([]Instance, error) { 60 | for _, set := range s.ReplicaSets { 61 | if set.UUID == uuid { 62 | return set.Instances, nil 63 | } 64 | } 65 | 66 | return []Instance{}, ErrReplicaSetNotFound 67 | } 68 | 69 | func (s *Snapshot) ReplicaSet(uuid ReplicaSetUUID) (ReplicaSet, error) { 70 | for _, set := range s.ReplicaSets { 71 | if set.UUID == uuid { 72 | return set, nil 73 | } 74 | } 75 | 76 | return ReplicaSet{}, ErrReplicaSetNotFound 77 | } 78 | 79 | func (s *Snapshot) UpdatePriorities(priorities map[string]int) { 80 | s.priorities = priorities 81 | 82 | for i := range s.ReplicaSets { 83 | set := &s.ReplicaSets[i] 84 | for j := range set.Instances { 85 | inst := &set.Instances[j] 86 | if priority, ok := s.priorities[string(inst.UUID)]; ok { 87 | inst.Priority = priority 88 | } else { 89 | inst.Priority = 0 90 | } 91 | } 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /internal/vshard/tarantool.go: -------------------------------------------------------------------------------- 1 | package vshard 2 | 3 | import ( 4 | "context" 5 | "strings" 6 | "sync" 7 | "time" 8 | 9 | "github.com/viciious/go-tarantool" 10 | ) 11 | 12 | const maxRetries = 2 // TODO: move to config 13 | 14 | var tntRetryableErrors = []uint{ 15 | tarantool.ErrNoConnection, 16 | tarantool.ErrTimeout, 17 | } 18 | 19 | type ConnPool interface { 20 | Get(uri string) *Connector 21 | Close() 22 | } 23 | 24 | type ConnOptions struct { 25 | User string 26 | Password string 27 | ConnectTimeout time.Duration 28 | QueryTimeout time.Duration 29 | } 30 | 31 | type OverrideURIRules map[string]string 32 | 33 | type pool struct { 34 | template ConnOptions 35 | rules OverrideURIRules 36 | 37 | m map[string]*Connector 38 | mutex sync.RWMutex 39 | } 40 | 41 | func NewConnPool(template ConnOptions, rules OverrideURIRules) ConnPool { 42 | return &pool{ 43 | template: template, 44 | rules: rules, 45 | m: make(map[string]*Connector), 46 | } 47 | } 48 | 49 | func (p *pool) Get(uri string) *Connector { 50 | u := removeUserInfo(uri) 51 | u = overrideURI(u, p.rules) 52 | 53 | p.mutex.RLock() 54 | conn, ok := p.m[u] 55 | p.mutex.RUnlock() 56 | if ok { 57 | return conn 58 | } 59 | 60 | p.mutex.Lock() 61 | // double check 62 | conn, ok = p.m[u] 63 | if ok { 64 | return conn 65 | } 66 | conn = setupConnection(u, p.template) 67 | p.m[u] = conn 68 | p.mutex.Unlock() 69 | 70 | return conn 71 | } 72 | 73 | func overrideURI(uri string, rules OverrideURIRules) string { 74 | u, ok := rules[uri] 75 | if ok { 76 | return u 77 | } 78 | return uri 79 | } 80 | 81 | func (p *pool) Close() { 82 | p.mutex.Lock() 83 | for _, conn := range p.m { 84 | conn.Close() 85 | } 86 | p.mutex.Unlock() 87 | } 88 | 89 | func removeUserInfo(uri string) string { 90 | if idx := strings.IndexByte(uri, '@'); idx >= 0 { 91 | return uri[idx+1:] 92 | } 93 | return uri 94 | } 95 | 96 | type Connector struct { 97 | conn *tarantool.Connector 98 | } 99 | 100 | func (c *Connector) Exec(ctx context.Context, q tarantool.Query, opts ...tarantool.ExecOption) *tarantool.Result { 101 | var resp *tarantool.Result 102 | for i := 0; i < maxRetries; i++ { 103 | conn, err := c.conn.Connect() 104 | if err != nil { 105 | return &tarantool.Result{ 106 | Error: err, 107 | } 108 | } 109 | 110 | select { 111 | case <-ctx.Done(): 112 | return &tarantool.Result{ 113 | Error: tarantool.NewContextError(ctx, conn, "Exec error"), 114 | ErrorCode: tarantool.ErrTimeout, 115 | } 116 | default: 117 | } 118 | 119 | resp = conn.Exec(ctx, q, opts...) 120 | if resp.Error != nil && isRetryable(resp.ErrorCode) { 121 | conn.Close() 122 | continue 123 | } 124 | return resp 125 | } 126 | 127 | return resp 128 | } 129 | 130 | func (c *Connector) Close() { 131 | c.conn.Close() 132 | } 133 | 134 | func setupConnection(uri string, c ConnOptions) *Connector { 135 | cfg := &tarantool.Options{ 136 | User: c.User, 137 | Password: c.Password, 138 | ConnectTimeout: c.ConnectTimeout, 139 | QueryTimeout: c.QueryTimeout, 140 | } 141 | 142 | conn := tarantool.New(uri, cfg) 143 | return &Connector{ 144 | conn: conn, 145 | } 146 | } 147 | 148 | func isRetryable(code uint) bool { 149 | for _, rc := range tntRetryableErrors { 150 | if rc == code { 151 | return true 152 | } 153 | } 154 | 155 | return false 156 | } 157 | -------------------------------------------------------------------------------- /internal/vshard/tarantool_test.go: -------------------------------------------------------------------------------- 1 | package vshard 2 | 3 | import ( 4 | "strconv" 5 | "strings" 6 | "sync" 7 | "testing" 8 | 9 | "github.com/stretchr/testify/assert" 10 | "github.com/stretchr/testify/require" 11 | ) 12 | 13 | func Test_removeUserInfo(t *testing.T) { 14 | tests := []struct { 15 | name string 16 | uri string 17 | want string 18 | }{ 19 | { 20 | name: "NoUserInfo_ShouldReturnTheSameUri", 21 | uri: "tarantool.repl:3301", 22 | want: "tarantool.repl:3301", 23 | }, 24 | { 25 | name: "Username_ShouldReturnHostAndPort", 26 | uri: "qumomf@tarantool.repl:3301", 27 | want: "tarantool.repl:3301", 28 | }, 29 | { 30 | name: "UsernameAndPass_ShouldReturnHostAndPort", 31 | uri: "qumomf:qumomf@tarantool.repl:3301", 32 | want: "tarantool.repl:3301", 33 | }, 34 | } 35 | for _, tv := range tests { 36 | tt := tv 37 | t.Run(tt.name, func(t *testing.T) { 38 | got := removeUserInfo(tt.uri) 39 | assert.Equal(t, tt.want, got) 40 | }) 41 | } 42 | } 43 | 44 | func Test_overrideURI(t *testing.T) { 45 | type args struct { 46 | uri string 47 | rules OverrideURIRules 48 | } 49 | tests := []struct { 50 | name string 51 | args args 52 | want string 53 | }{ 54 | { 55 | name: "NoRules_ShouldReturnTheSameUri", 56 | args: args{ 57 | uri: "tarantool.repl:3301", 58 | rules: nil, 59 | }, 60 | want: "tarantool.repl:3301", 61 | }, 62 | { 63 | name: "NoSuitableRule_ShouldReturnTheSameUri", 64 | args: args{ 65 | uri: "tarantool.repl:3301", 66 | rules: OverrideURIRules{ 67 | "tarantool2.repl:3301": "tnt2.repl:3301", 68 | "tarantool.repl:8801": "tnt.repl:8801", 69 | }, 70 | }, 71 | want: "tarantool.repl:3301", 72 | }, 73 | { 74 | name: "RuleApplied_ShouldReturnOverridden", 75 | args: args{ 76 | uri: "tarantool.repl:3301", 77 | rules: OverrideURIRules{ 78 | "tarantool.repl:3301": "tnt.repl:3301", 79 | "tarantool.repl:8801": "tnt.repl:8801", 80 | }, 81 | }, 82 | want: "tnt.repl:3301", 83 | }, 84 | } 85 | for _, tv := range tests { 86 | tt := tv 87 | t.Run(tt.name, func(t *testing.T) { 88 | got := overrideURI(tt.args.uri, tt.args.rules) 89 | assert.Equal(t, tt.want, got) 90 | }) 91 | } 92 | } 93 | 94 | func TestPool_Get(t *testing.T) { 95 | connOpts := ConnOptions{ 96 | User: "qumomf", 97 | Password: "qumomf", 98 | } 99 | p := NewConnPool(connOpts, nil) 100 | uri := "tarantool.repl:3301" 101 | n := 1000 102 | 103 | ch := make(chan *Connector, n) 104 | var wg sync.WaitGroup 105 | wg.Add(n) 106 | for i := 0; i < n; i++ { 107 | go func() { 108 | ch <- p.Get(uri) 109 | wg.Done() 110 | }() 111 | } 112 | wg.Wait() 113 | close(ch) 114 | 115 | var conn *Connector 116 | for c := range ch { 117 | if conn == nil { 118 | conn = c 119 | } 120 | require.Same(t, conn, c) 121 | } 122 | 123 | p.Close() 124 | } 125 | 126 | func BenchmarkPool_Get(b *testing.B) { 127 | connOpts := ConnOptions{ 128 | User: "qumomf", 129 | Password: "qumomf", 130 | } 131 | p := NewConnPool(connOpts, nil) 132 | 133 | var ub strings.Builder 134 | var uri string 135 | var conn *Connector 136 | 137 | b.ReportAllocs() 138 | for i := 0; i < b.N; i++ { 139 | ub.Reset() 140 | ub.WriteString("tnt-") 141 | ub.WriteString(strconv.Itoa(i)) 142 | ub.WriteString(":3301") 143 | uri = ub.String() 144 | 145 | conn = p.Get(uri) 146 | conn.Close() 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /scripts/etc/systemd/qumomf.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=qumomf: Tarantool vshard HA tool supports auto discovery and recovery. 3 | Documentation=https://github.com/shmel1k/qumomf 4 | After=syslog.target network.target 5 | 6 | [Service] 7 | Type=simple 8 | WorkingDirectory=/usr/local/bin 9 | ExecStart=/usr/local/bin/qumomf -config /etc/qumomf/conf.yml 10 | TimeoutSec=30 11 | 12 | [Install] 13 | WantedBy=multi-user.target -------------------------------------------------------------------------------- /scripts/postinstall.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | systemctl --system daemon-reload >/dev/null || true 4 | systemctl enable qumomf.service >/dev/null || true 5 | 6 | deb_systemctl=$(command -v deb-systemd-invoke || echo systemctl) 7 | ${deb_systemctl} restart qumomf.service >/dev/null || true 8 | -------------------------------------------------------------------------------- /scripts/preremove.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | deb_systemctl=$(command -v deb-systemd-invoke || echo systemctl) 4 | ${deb_systemctl} stop qumomf.service >/dev/null || true 5 | 6 | systemctl disable qumomf.service >/dev/null || true 7 | systemctl --system daemon-reload >/dev/null || true 8 | --------------------------------------------------------------------------------