├── .github ├── PULL_REQUEST_TEMPLATE.md ├── dependabot.yml └── workflows │ ├── codeql.yml │ ├── docker-jepsen.yml │ ├── docker-tests-8.0.yml │ ├── docker-tests-8.4.yml │ ├── docker-tests.yml │ ├── golangci-lint.yml │ └── unit-tests.yml ├── .gitignore ├── .golangci.yml ├── AUTHORS ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── cmd └── mysync │ ├── abort.go │ ├── hosts.go │ ├── info.go │ ├── main.go │ ├── maintenance.go │ ├── optimize.go │ ├── state.go │ └── switch.go ├── go.mod ├── go.sum ├── internal ├── app │ ├── app.go │ ├── app_dcs.go │ ├── async.go │ ├── cli_host.go │ ├── cli_info.go │ ├── cli_maintenance.go │ ├── cli_optimize.go │ ├── cli_state.go │ ├── cli_switch.go │ ├── cli_util.go │ ├── data.go │ ├── data_test.go │ ├── replication.go │ ├── util.go │ └── util_test.go ├── config │ └── config.go ├── dcs │ ├── config.go │ ├── dcs.go │ ├── zk.go │ ├── zk_host_provider.go │ ├── zk_test.go │ └── zk_tls.go ├── log │ ├── log.go │ └── syslog.go ├── mysql │ ├── cluster.go │ ├── commands.go │ ├── data.go │ ├── gtids │ │ ├── utils.go │ │ ├── wrapper.go │ │ └── wrapper_test.go │ ├── node.go │ ├── queries.go │ ├── replication.go │ ├── switch_helper.go │ └── util.go └── util │ ├── consts.go │ ├── user.go │ └── util.go ├── mysync.arch.png └── tests ├── features ├── CLI.feature ├── active_nodes.feature ├── async.feature ├── async_setting.feature ├── cascade_replicas.84.feature ├── cascade_replicas.feature ├── crash_recovery.feature ├── events_reenable.84.feature ├── events_reenable.feature ├── external_replication.feature ├── failover.84.feature ├── failover.feature ├── free_space.feature ├── host_discovery.feature ├── host_management.feature ├── maintenance.84.feature ├── maintenance.feature ├── manager_switchover.feature ├── offline_mode.84.feature ├── offline_mode.feature ├── priority.feature ├── readonly_filesystem.feature ├── recovery.feature ├── repair.feature ├── repl_mon.feature ├── statefile.feature ├── switchover_from.84.feature ├── switchover_from.feature ├── switchover_to.feature ├── zk_failure.feature └── zk_maintenance.feature ├── images ├── base │ ├── Dockerfile │ ├── generate_certs.sh │ ├── percona.gpg │ ├── setup.sh │ ├── sshd_config │ ├── supervisor.conf │ └── supervisor_ssh.conf ├── copy_keys.sh ├── docker-compose.yaml ├── jepsen-compose.yml ├── jepsen_common │ ├── Dockerfile │ ├── ssh_config │ └── sshd_config ├── jepsen_main │ ├── Dockerfile │ ├── jepsen │ │ ├── project.clj │ │ ├── run.sh │ │ ├── src │ │ │ └── jepsen │ │ │ │ └── mysync.clj │ │ └── test │ │ │ └── jepsen │ │ │ └── mysync_test.clj │ └── save_logs.sh ├── jepsen_sshd_config ├── mysql │ ├── .my.cnf │ ├── Dockerfile │ ├── my.cnf │ ├── my.cnf.8.0 │ ├── my.cnf.8.4 │ ├── mysync.yaml │ ├── setup.sh │ ├── start_mysql.sh │ ├── start_mysql_84.sh │ ├── start_mysync.sh │ ├── supervisor_mysql.conf │ └── supervisor_mysql.conf.8.4 ├── mysql_jepsen │ ├── .my.cnf │ ├── Dockerfile │ ├── my.cnf │ ├── mysync.yaml │ ├── setup.sh │ ├── sh-scripts │ │ ├── my-resetup-wd.sh │ │ ├── my-resetup.sh │ │ └── my-wait-started.sh │ ├── start_mysql.sh │ ├── start_mysync.sh │ └── supervisor_mysql.conf ├── zookeeper │ ├── Dockerfile │ ├── setup.sh │ ├── start.sh │ ├── supervisor_zookeeper.conf │ └── zoo.cfg └── zookeeper_jepsen │ ├── Dockerfile │ ├── generate_certs_with_restart.sh │ ├── retriable_path_create.sh │ ├── start.sh │ ├── supervisor_zookeeper.conf │ └── zoo.cfg ├── mysync_test.go └── testutil ├── context.go ├── context_test.go ├── docker_composer.go ├── matchers ├── matchers.go └── matchers_test.go ├── network.go ├── retry.go └── uuid.go /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | # Pull request description 2 | 3 | ### Describe what this PR fix 4 | // problem is ... 5 | 6 | ### Please provide steps to reproduce (if it's a bug) 7 | // it can really help 8 | 9 | ### Please add config and mysync logs for debug purpose 10 | 11 |
If you can, provide logs 12 |

13 | ```bash 14 | any logs here 15 | ``` 16 |

17 |
-------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: gomod 4 | directory: / 5 | schedule: 6 | interval: "monthly" 7 | groups: 8 | dev-dependencies: 9 | applies-to: version-updates 10 | patterns: 11 | - "*" 12 | - package-ecosystem: github-actions 13 | directory: / 14 | schedule: 15 | interval: "monthly" 16 | groups: 17 | actions-dependencies: 18 | applies-to: version-updates 19 | patterns: 20 | - "*" 21 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | name: CodeQL 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | schedule: 9 | - cron: '30 06 * * 6' 10 | 11 | env: 12 | GO_VERSION: 1.22.1 13 | 14 | jobs: 15 | analyze: 16 | runs-on: ubuntu-latest 17 | permissions: 18 | security-events: write 19 | steps: 20 | - name: Check out code into the Go module directory 21 | uses: actions/checkout@v4 22 | - name: Set up Go 1.x 23 | uses: actions/setup-go@v5 24 | with: 25 | go-version: ${{ env.GO_VERSION }} 26 | - name: Initialize CodeQL 27 | uses: github/codeql-action/init@v3 28 | with: 29 | languages: go 30 | build-mode: manual 31 | - name: Build 32 | run: go build -tags netgo,osusergo -o ./cmd/mysync/mysync ./cmd/mysync/... 33 | - name: Perform CodeQL Analysis 34 | uses: github/codeql-action/analyze@v3 35 | with: 36 | category: "/language:go" 37 | -------------------------------------------------------------------------------- /.github/workflows/docker-jepsen.yml: -------------------------------------------------------------------------------- 1 | name: Jepsen tests 2 | 3 | on: 4 | schedule: 5 | - cron: '0 0 * * *' 6 | 7 | env: 8 | GO_VERSION: 1.22.4 9 | 10 | jobs: 11 | test: 12 | name: jepsen 13 | runs-on: ubuntu-22.04 14 | steps: 15 | - name: Set up Go 1.x 16 | uses: actions/setup-go@v5 17 | with: 18 | go-version: ${{ env.GO_VERSION }} 19 | id: go 20 | 21 | - name: Check out code into the Go module directory 22 | uses: actions/checkout@v4 23 | 24 | - name: Get dependencies 25 | run: | 26 | go get -v -t -d ./... 27 | 28 | - name: Test 29 | run: make base_img jepsen_base_img jepsen_test 30 | 31 | - uses: actions/upload-artifact@v4 32 | if: failure() 33 | with: 34 | name: logs 35 | path: tests/logs 36 | -------------------------------------------------------------------------------- /.github/workflows/docker-tests-8.0.yml: -------------------------------------------------------------------------------- 1 | name: Docker tests (8.0) 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | env: 10 | GO_VERSION: 1.22.4 11 | DOCKER_API_VERSION: 1.43 12 | 13 | jobs: 14 | buildimages: 15 | name: Build images 16 | runs-on: ubuntu-22.04 17 | steps: 18 | - name: Check out code into the Go module directory 19 | uses: actions/checkout@v4 20 | 21 | - name: Docker images caching 22 | id: cache-images 23 | uses: actions/cache@v4 24 | with: 25 | path: ~/mysync-base-img8.0.tgz 26 | key: mysync-base-img8.0-${{ hashFiles('tests/images/base/*') }} 27 | 28 | - name: Build images 29 | if: steps.cache-images.outputs.cache-hit != 'true' 30 | run: make base_img_8.0 31 | 32 | - name: Export image 33 | if: steps.cache-images.outputs.cache-hit != 'true' 34 | run: docker save mysync-test-base8.0 | gzip -c > ~/mysync-base-img8.0.tgz 35 | 36 | test: 37 | name: test 38 | runs-on: ubuntu-22.04 39 | needs: [ buildimages ] 40 | strategy: 41 | matrix: 42 | command: 43 | - 'VERSION=8.0 GODOG_FEATURE=active_nodes.feature make test' 44 | - 'VERSION=8.0 GODOG_FEATURE=async.feature make test' 45 | - 'VERSION=8.0 GODOG_FEATURE=async_setting.feature make test' 46 | - 'VERSION=8.0 GODOG_FEATURE=cascade_replicas.feature make test' 47 | - 'VERSION=8.0 GODOG_FEATURE=CLI.feature make test' 48 | - 'VERSION=8.0 GODOG_FEATURE=crash_recovery.feature make test' 49 | - 'VERSION=8.0 GODOG_FEATURE=events_reenable.feature make test' 50 | - 'VERSION=8.0 GODOG_FEATURE=external_replication.feature make test' 51 | - 'VERSION=8.0 GODOG_FEATURE=failover.feature make test' 52 | - 'VERSION=8.0 GODOG_FEATURE=free_space.feature make test' 53 | - 'VERSION=8.0 GODOG_FEATURE=host_discovery.feature make test' 54 | - 'VERSION=8.0 GODOG_FEATURE=host_management.feature make test' 55 | - 'VERSION=8.0 GODOG_FEATURE=maintenance.feature make test' 56 | - 'VERSION=8.0 GODOG_FEATURE=offline_mode.feature make test' 57 | - 'VERSION=8.0 GODOG_FEATURE=priority.feature make test' 58 | - 'VERSION=8.0 GODOG_FEATURE=readonly_filesystem.feature make test' 59 | - 'VERSION=8.0 GODOG_FEATURE=recovery.feature make test' 60 | - 'VERSION=8.0 GODOG_FEATURE=repair.feature make test' 61 | - 'VERSION=8.0 GODOG_FEATURE=repl_mon.feature make test' 62 | - 'VERSION=8.0 GODOG_FEATURE=statefile.feature make test' 63 | - 'VERSION=8.0 GODOG_FEATURE=switchover_from.feature make test' 64 | - 'VERSION=8.0 GODOG_FEATURE=switchover_to.feature make test' 65 | - 'VERSION=8.0 GODOG_FEATURE=zk_failure.feature make test' 66 | - 'VERSION=8.0 GODOG_FEATURE=zk_maintenance.feature make test' 67 | - 'VERSION=8.0 GODOG_FEATURE=manager_switchover.feature make test' 68 | fail-fast: false 69 | 70 | steps: 71 | - name: Set up Go 1.x 72 | uses: actions/setup-go@v5 73 | with: 74 | go-version: ${{ env.GO_VERSION }} 75 | id: go 76 | 77 | - name: Check out code into the Go module directory 78 | uses: actions/checkout@v4 79 | 80 | - name: Get dependencies 81 | run: | 82 | go get -v -t -d ./... 83 | 84 | - name: Load docker images 85 | id: cache-images 86 | uses: actions/cache@v4 87 | with: 88 | path: ~/mysync-base-img8.0.tgz 89 | key: mysync-base-img8.0-${{ hashFiles('tests/images/base/*') }} 90 | 91 | - name: Fail if no cached images 92 | if: steps.cache-images.outputs.cache-hit != 'true' 93 | run: | 94 | echo "Failed to fetch cached docker images. Will now exit..." 95 | exit 1 96 | 97 | - name: Import image 98 | run: docker load -i ~/mysync-base-img8.0.tgz 99 | 100 | - name: Run test 101 | run: ${{ matrix.command }} 102 | 103 | - uses: actions/upload-artifact@v4 104 | if: failure() 105 | with: 106 | name: logs 107 | path: tests/logs 108 | -------------------------------------------------------------------------------- /.github/workflows/docker-tests-8.4.yml: -------------------------------------------------------------------------------- 1 | name: Docker tests (8.4) 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | env: 10 | GO_VERSION: 1.22.4 11 | DOCKER_API_VERSION: 1.43 12 | 13 | jobs: 14 | buildimages: 15 | name: Build images 16 | runs-on: ubuntu-22.04 17 | steps: 18 | - name: Check out code into the Go module directory 19 | uses: actions/checkout@v4 20 | 21 | - name: Docker images caching 22 | id: cache-images 23 | uses: actions/cache@v4 24 | with: 25 | path: ~/mysync-base-img8.4.tgz 26 | key: mysync-base-img8.4-${{ hashFiles('tests/images/base/*') }} 27 | 28 | - name: Build images 29 | if: steps.cache-images.outputs.cache-hit != 'true' 30 | run: make base_img_8.4 31 | 32 | - name: Export image 33 | if: steps.cache-images.outputs.cache-hit != 'true' 34 | run: docker save mysync-test-base8.4 | gzip -c > ~/mysync-base-img8.4.tgz 35 | 36 | test: 37 | name: test 38 | runs-on: ubuntu-22.04 39 | needs: [ buildimages ] 40 | strategy: 41 | matrix: 42 | command: 43 | - 'VERSION=8.4 GODOG_FEATURE=active_nodes.feature make test' 44 | - 'VERSION=8.4 GODOG_FEATURE=async.feature make test' 45 | - 'VERSION=8.4 GODOG_FEATURE=async_setting.feature make test' 46 | - 'VERSION=8.4 GODOG_FEATURE=cascade_replicas.84.feature make test' 47 | - 'VERSION=8.4 GODOG_FEATURE=CLI.feature make test' 48 | - 'VERSION=8.4 GODOG_FEATURE=crash_recovery.feature make test' 49 | - 'VERSION=8.4 GODOG_FEATURE=events_reenable.84.feature make test' 50 | - 'VERSION=8.4 GODOG_FEATURE=external_replication.feature make test' 51 | - 'VERSION=8.4 GODOG_FEATURE=failover.84.feature make test' 52 | - 'VERSION=8.4 GODOG_FEATURE=free_space.feature make test' 53 | - 'VERSION=8.4 GODOG_FEATURE=host_discovery.feature make test' 54 | - 'VERSION=8.4 GODOG_FEATURE=host_management.feature make test' 55 | - 'VERSION=8.4 GODOG_FEATURE=maintenance.84.feature make test' 56 | - 'VERSION=8.4 GODOG_FEATURE=offline_mode.84.feature make test' 57 | - 'VERSION=8.4 GODOG_FEATURE=priority.feature make test' 58 | - 'VERSION=8.4 GODOG_FEATURE=readonly_filesystem.feature make test' 59 | - 'VERSION=8.4 GODOG_FEATURE=recovery.feature make test' 60 | - 'VERSION=8.4 GODOG_FEATURE=repair.feature make test' 61 | - 'VERSION=8.4 GODOG_FEATURE=repl_mon.feature make test' 62 | - 'VERSION=8.4 GODOG_FEATURE=statefile.feature make test' 63 | - 'VERSION=8.4 GODOG_FEATURE=switchover_from.84.feature make test' 64 | - 'VERSION=8.4 GODOG_FEATURE=switchover_to.feature make test' 65 | - 'VERSION=8.4 GODOG_FEATURE=zk_failure.feature make test' 66 | - 'VERSION=8.4 GODOG_FEATURE=zk_maintenance.feature make test' 67 | - 'VERSION=8.4 GODOG_FEATURE=manager_switchover.feature make test' 68 | fail-fast: false 69 | 70 | steps: 71 | - name: Set up Go 1.x 72 | uses: actions/setup-go@v5 73 | with: 74 | go-version: ${{ env.GO_VERSION }} 75 | id: go 76 | 77 | - name: Check out code into the Go module directory 78 | uses: actions/checkout@v4 79 | 80 | - name: Get dependencies 81 | run: | 82 | go get -v -t -d ./... 83 | 84 | - name: Load docker images 85 | id: cache-images 86 | uses: actions/cache@v4 87 | with: 88 | path: ~/mysync-base-img8.4.tgz 89 | key: mysync-base-img8.4-${{ hashFiles('tests/images/base/*') }} 90 | 91 | - name: Fail if no cached images 92 | if: steps.cache-images.outputs.cache-hit != 'true' 93 | run: | 94 | echo "Failed to fetch cached docker images. Will now exit..." 95 | exit 1 96 | 97 | - name: Import image 98 | run: docker load -i ~/mysync-base-img8.4.tgz 99 | 100 | - name: Run test 101 | run: ${{ matrix.command }} 102 | 103 | - uses: actions/upload-artifact@v4 104 | if: failure() 105 | with: 106 | name: logs 107 | path: tests/logs 108 | -------------------------------------------------------------------------------- /.github/workflows/docker-tests.yml: -------------------------------------------------------------------------------- 1 | name: Docker tests (5.7) 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | env: 10 | GO_VERSION: 1.22.4 11 | DOCKER_API_VERSION: 1.43 12 | 13 | jobs: 14 | buildimages: 15 | name: Build images 16 | runs-on: ubuntu-22.04 17 | steps: 18 | - name: Check out code into the Go module directory 19 | uses: actions/checkout@v4 20 | 21 | - name: Docker images caching 22 | id: cache-images 23 | uses: actions/cache@v4 24 | with: 25 | path: ~/mysync-base-img.tgz 26 | key: mysync-base-img-${{ hashFiles('tests/images/base/*') }} 27 | 28 | - name: Build images 29 | if: steps.cache-images.outputs.cache-hit != 'true' 30 | run: make base_img 31 | 32 | - name: Export image 33 | if: steps.cache-images.outputs.cache-hit != 'true' 34 | run: docker save mysync-test-base | gzip -c > ~/mysync-base-img.tgz 35 | 36 | test: 37 | name: test 38 | runs-on: ubuntu-22.04 39 | needs: [ buildimages ] 40 | strategy: 41 | matrix: 42 | command: 43 | - 'GODOG_FEATURE=active_nodes.feature make test' 44 | - 'GODOG_FEATURE=async.feature make test' 45 | - 'GODOG_FEATURE=cascade_replicas.feature make test' 46 | - 'GODOG_FEATURE=CLI.feature make test' 47 | - 'GODOG_FEATURE=crash_recovery.feature make test' 48 | - 'GODOG_FEATURE=events_reenable.feature make test' 49 | - 'GODOG_FEATURE=failover.feature make test' 50 | - 'GODOG_FEATURE=free_space.feature make test' 51 | - 'GODOG_FEATURE=host_discovery.feature make test' 52 | - 'GODOG_FEATURE=host_management.feature make test' 53 | - 'GODOG_FEATURE=maintenance.feature make test' 54 | - 'GODOG_FEATURE=offline_mode.feature make test' 55 | - 'GODOG_FEATURE=priority.feature make test' 56 | - 'GODOG_FEATURE=readonly_filesystem.feature make test' 57 | - 'GODOG_FEATURE=recovery.feature make test' 58 | - 'GODOG_FEATURE=repair.feature make test' 59 | - 'GODOG_FEATURE=repl_mon.feature make test' 60 | - 'GODOG_FEATURE=statefile.feature make test' 61 | - 'GODOG_FEATURE=switchover_from.feature make test' 62 | - 'GODOG_FEATURE=switchover_to.feature make test' 63 | - 'GODOG_FEATURE=zk_failure.feature make test' 64 | - 'GODOG_FEATURE=zk_maintenance.feature make test' 65 | - 'GODOG_FEATURE=manager_switchover.feature make test' 66 | fail-fast: false 67 | 68 | steps: 69 | - name: Set up Go 1.x 70 | uses: actions/setup-go@v5 71 | with: 72 | go-version: ${{ env.GO_VERSION }} 73 | id: go 74 | 75 | - name: Check out code into the Go module directory 76 | uses: actions/checkout@v4 77 | 78 | - name: Get dependencies 79 | run: | 80 | go get -v -t -d ./... 81 | 82 | - name: Load docker images 83 | id: cache-images 84 | uses: actions/cache@v4 85 | with: 86 | path: ~/mysync-base-img.tgz 87 | key: mysync-base-img-${{ hashFiles('tests/images/base/*') }} 88 | 89 | - name: Fail if no cached images 90 | if: steps.cache-images.outputs.cache-hit != 'true' 91 | run: | 92 | echo "Failed to fetch cached docker images. Will now exit..." 93 | exit 1 94 | 95 | - name: Import image 96 | run: docker load -i ~/mysync-base-img.tgz 97 | 98 | - name: Run test 99 | run: ${{ matrix.command }} 100 | 101 | - uses: actions/upload-artifact@v4 102 | if: failure() 103 | with: 104 | name: logs 105 | path: tests/logs 106 | -------------------------------------------------------------------------------- /.github/workflows/golangci-lint.yml: -------------------------------------------------------------------------------- 1 | name: Linters 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | permissions: 10 | contents: read 11 | 12 | jobs: 13 | golangci: 14 | name: lint 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/setup-go@v5 18 | with: 19 | go-version: 1.24.1 20 | - uses: actions/checkout@v4 21 | - name: golangci-lint 22 | uses: golangci/golangci-lint-action@v8.0.0 23 | with: 24 | version: v2.1 25 | -------------------------------------------------------------------------------- /.github/workflows/unit-tests.yml: -------------------------------------------------------------------------------- 1 | name: Unit tests 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: [ master ] 7 | pull_request: 8 | branches: [ master ] 9 | 10 | env: 11 | GO_VERSION: 1.22.4 12 | 13 | jobs: 14 | unittest: 15 | name: all_unittests 16 | runs-on: ubuntu-22.04 17 | steps: 18 | - name: Set up Go 1.x 19 | uses: actions/setup-go@v5 20 | with: 21 | go-version: ${{ env.GO_VERSION }} 22 | id: go 23 | 24 | - name: Check out code into the Go module directory 25 | uses: actions/checkout@v4 26 | 27 | - name: Get dependencies 28 | run: | 29 | go get -v -t -d ./... 30 | 31 | - name: Test 32 | run: make unittests 33 | env: 34 | TEST_MODIFIER: -race 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # ide 2 | .DS_Store 3 | .idea/ 4 | .vscode/ 5 | *.vsix 6 | *.swp 7 | *.swo 8 | 9 | 10 | # ext code 11 | vendor/ 12 | buildtools/ 13 | 14 | # binaries 15 | cmd/mysync/mysync 16 | tests/images/mysql/mysync 17 | tests/images/mysql_jepsen/mysync 18 | 19 | # tests 20 | tests/logs/ 21 | -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | run: 3 | go: "1.22" 4 | modules-download-mode: mod 5 | linters: 6 | default: none 7 | enable: 8 | - bodyclose 9 | - copyloopvar 10 | - dupl 11 | - errcheck 12 | - funlen 13 | - gocritic 14 | - gocyclo 15 | - govet 16 | - ineffassign 17 | - misspell 18 | - nakedret 19 | - revive 20 | - staticcheck 21 | - unconvert 22 | - unparam 23 | - unused 24 | - whitespace 25 | settings: 26 | dupl: 27 | threshold: 400 28 | funlen: 29 | lines: 150 30 | statements: 100 31 | gocritic: 32 | enabled-tags: 33 | - performance 34 | disabled-tags: 35 | - diagnostic 36 | - experimental 37 | - opinionated 38 | - style 39 | gocyclo: 40 | min-complexity: 25 41 | lll: 42 | line-length: 140 43 | misspell: 44 | locale: US 45 | revive: 46 | rules: 47 | - name: blank-imports 48 | - name: context-as-argument 49 | - name: context-keys-type 50 | - name: dot-imports 51 | - name: error-return 52 | - name: error-naming 53 | - name: exported 54 | - name: var-naming 55 | - name: var-declaration 56 | - name: package-comments 57 | - name: range 58 | - name: receiver-naming 59 | - name: time-naming 60 | - name: unexported-return 61 | - name: errorf 62 | - name: empty-block 63 | - name: unreachable-code 64 | - name: redefines-builtin-id 65 | exclusions: 66 | generated: lax 67 | presets: 68 | - comments 69 | - common-false-positives 70 | - legacy 71 | - std-error-handling 72 | paths: 73 | - third_party$ 74 | - builtin$ 75 | - examples$ 76 | severity: 77 | default: error 78 | formatters: 79 | enable: 80 | - gofmt 81 | - goimports 82 | exclusions: 83 | generated: lax 84 | paths: 85 | - third_party$ 86 | - builtin$ 87 | - examples$ 88 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | The following authors have created the source code of "Yandex Mysync" 2 | published and distributed by YANDEX LLC as the owner: 3 | 4 | Dmitry Smal mialinx@yandex-team.ru 5 | Kirill Reshke reshke@yandex-team.ru 6 | Aleksandr Shevchuk teem0n@yandex-team.ru 7 | Nikolay Antonov ostinru@yandex-team.ru 8 | Evgeniy Dyukov secwall@yandex-team.ru 9 | Bulat Ahmetzyanov bularond@yandex-team.ru 10 | Andrey Krasichkov buglloc@yandex-team.ru 11 | Andrey Borodin x4mmm@yandex-team.ru 12 | Ilya Sinelnikov sidh@yandex-team.ru 13 | Sergey Bevzenko sabevzenko@yandex-team.ru 14 | Svyatoslav Ermilin munakoiso@yandex-team.ru 15 | Georgiy Zujkov gzuykov@yandex-team.ru 16 | Georgiy Rylov godjan@yandex-team.ru 17 | Igor Suetin suetin@yandex-team.ru 18 | Pavel Khattu khattu@yandex-team.ru 19 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Notice to external contributors 2 | 3 | 4 | ## General info 5 | 6 | Hello! In order for us (YANDEX LLC) to accept patches and other contributions from you, you will have to adopt our Yandex Contributor License Agreement (the “**CLA**â€). The current version of the CLA can be found here: 7 | 1) https://yandex.ru/legal/cla/?lang=en (in English) and 8 | 2) https://yandex.ru/legal/cla/?lang=ru (in Russian). 9 | 10 | By adopting the CLA, you state the following: 11 | 12 | * You obviously wish and are willingly licensing your contributions to us for our open source projects under the terms of the CLA, 13 | * You have read the terms and conditions of the CLA and agree with them in full, 14 | * You are legally able to provide and license your contributions as stated, 15 | * We may use your contributions for our open source projects and for any other our project too, 16 | * We rely on your assurances concerning the rights of third parties in relation to your contributions. 17 | 18 | If you agree with these principles, please read and adopt our CLA. By providing us your contributions, you hereby declare that you have already read and adopt our CLA, and we may freely merge your contributions with our corresponding open source project and use it in further in accordance with terms and conditions of the CLA. 19 | 20 | ## Provide contributions 21 | 22 | If you have already adopted terms and conditions of the CLA, you are able to provide your contributions. When you submit your pull request, please add the following information into it: 23 | 24 | ``` 25 | I hereby agree to the terms of the CLA available at: [link]. 26 | ``` 27 | 28 | Replace the bracketed text as follows: 29 | * [link] is the link to the current version of the CLA: https://yandex.ru/legal/cla/?lang=en (in English) or https://yandex.ru/legal/cla/?lang=ru (in Russian). 30 | 31 | It is enough to provide us such notification once. 32 | 33 | ## Other questions 34 | 35 | If you have any questions, please mail us at opensource@yandex-team.ru. 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2025 YANDEX LLC 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | build: 2 | go build -o ./cmd/mysync/mysync ./cmd/mysync/... 3 | 4 | format: 5 | gofmt -s -w `find . -name '*.go'` 6 | goimports -w `find . -name '*.go'` 7 | 8 | lint: 9 | docker run --rm -v ${CURDIR}:/app -w /app golangci/golangci-lint:v2.1-alpine golangci-lint run -v 10 | 11 | unittests: 12 | go test ./cmd/... ./internal/... 13 | go test ./cmd/... ./tests/testutil/matchers/ 14 | 15 | base_img: 16 | docker build --tag=mysync-test-base tests/images/base --build-arg MYSQL_VERSION=5.7 17 | 18 | base_img_8.0: 19 | docker build --tag=mysync-test-base8.0 tests/images/base --build-arg MYSQL_VERSION=8.0 20 | 21 | base_img_8.4: 22 | docker build --tag=mysync-test-base8.4 tests/images/base --build-arg MYSQL_VERSION=8.4 23 | 24 | jepsen_base_img: 25 | docker build --tag=mysync-jepsen-test-base tests/images/jepsen_common 26 | 27 | test: 28 | GOOS=linux go build -tags netgo,osusergo -o ./cmd/mysync/mysync ./cmd/mysync/... 29 | go build ./tests/... 30 | rm -fr ./tests/images/mysql/mysync && cp ./cmd/mysync/mysync ./tests/images/mysql/mysync 31 | rm -rf ./tests/logs 32 | mkdir ./tests/logs 33 | (cd tests; go test -timeout 150m) 34 | 35 | jepsen_test: 36 | GOOS=linux go build -tags netgo,osusergo -o ./cmd/mysync/mysync ./cmd/mysync/... 37 | go build ./tests/... 38 | rm -fr ./tests/images/mysql_jepsen/mysync && cp ./cmd/mysync/mysync ./tests/images/mysql_jepsen/mysync 39 | docker compose -p mysync -f ./tests/images/jepsen-compose.yml up -d --force-recreate --build 40 | timeout 600 docker exec mysync_zoo1_1 /usr/local/bin/generate_certs_with_restart.sh mysync_zookeeper1_1.mysync_mysql_net 41 | timeout 600 docker exec mysync_zoo2_1 /usr/local/bin/generate_certs_with_restart.sh mysync_zookeeper2_1.mysync_mysql_net 42 | timeout 600 docker exec mysync_zoo3_1 /usr/local/bin/generate_certs_with_restart.sh mysync_zookeeper3_1.mysync_mysql_net 43 | timeout 600 docker exec mysync_zoo1_1 retriable_path_create.sh /test 44 | timeout 600 docker exec mysync_zoo1_1 retriable_path_create.sh /test/ha_nodes 45 | timeout 600 docker exec mysync_zoo1_1 retriable_path_create.sh /test/ha_nodes/mysync_mysql1_1 46 | timeout 600 docker exec mysync_zoo1_1 retriable_path_create.sh /test/ha_nodes/mysync_mysql2_1 47 | timeout 600 docker exec mysync_zoo1_1 retriable_path_create.sh /test/ha_nodes/mysync_mysql3_1 48 | timeout 600 docker exec mysync_mysql1_1 sh -c "/var/lib/dist/base/generate_certs.sh mysync_mysql1_1.mysync_mysql_net && supervisorctl restart mysync && supervisorctl start mysqld" 49 | timeout 600 docker exec mysync_mysql2_1 sh -c "/var/lib/dist/base/generate_certs.sh mysync_mysql2_1.mysync_mysql_net && supervisorctl restart mysync && supervisorctl start mysqld" 50 | timeout 600 docker exec mysync_mysql3_1 sh -c "/var/lib/dist/base/generate_certs.sh mysync_mysql3_1.mysync_mysql_net && supervisorctl restart mysync && supervisorctl start mysqld" 51 | timeout 600 docker exec mysync_mysql1_1 setup.sh 52 | timeout 600 bash ./tests/images/copy_keys.sh 53 | mkdir -p ./tests/logs 54 | (docker exec mysync_jepsen_1 /root/jepsen/run.sh > ./tests/logs/jepsen.log 2>&1 && tail -n 50 ./tests/logs/jepsen.log) || ./tests/images/jepsen_main/save_logs.sh 55 | docker compose -p mysync -f ./tests/images/jepsen-compose.yml down --rmi all 56 | 57 | clean: 58 | docker ps | grep mysync | awk '{print $$1}' | xargs docker rm -f || true 59 | docker network ls | grep mysync | awk '{print $$1}' | xargs docker network rm || true 60 | docker image ls | grep mysync | awk '{print $$3}' | xargs docker image rm --force || true 61 | rm -rf ./tests/logs 62 | 63 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Unit-tests-status](https://github.com/yandex/mysync/workflows/Unit%20tests/badge.svg) 2 | ![Linters-status](https://github.com/yandex/mysync/workflows/Linters/badge.svg) 3 | ![Docker-tests-57-status](https://github.com/yandex/mysync/workflows/Docker%20tests%20(5.7)/badge.svg) 4 | ![Docker-tests-80-status](https://github.com/yandex/mysync/workflows/Docker%20tests%20(8.0)/badge.svg) 5 | ![Docker-tests-84-status](https://github.com/yandex/mysync/workflows/Docker%20tests%20(8.4)/badge.svg) 6 | 7 | ## MySync 8 | 9 | MySync is mysql high-availability and cluster configuration tool. 10 | MySync is designed to switch master in homogeneous MySQL cluster in manual and automatic mode without data loss. 11 | 12 | #### Project status 13 | 14 | MySync is production-ready, it is being used in large production setups. 15 | We appreciate any kind of feedback and contribution to the project. 16 | 17 | ### Architecture 18 | 19 | 20 | 21 | ### Limitations and requirements 22 | 23 | * MySQL 5.7+ with GTID's enabled 24 | * MySQL cluster is homogeneous - all nodes have exactly the same data 25 | * ZooKeeper database to keep state and locks 26 | 27 | ### MySQL configuration 28 | 29 | ``` 30 | # required 31 | gtid_mode = ON 32 | enforce_gtid_consistency = ON 33 | log_slave_updates = ON 34 | binlog_format = ROW 35 | 36 | # required, managed by mysync 37 | read_only = ON 38 | super_read_only = ON 39 | offline_mode = ON 40 | 41 | # recommended 42 | sync_binlog = 1 43 | innodb_flush_log_at_trx_commit = 1 44 | 45 | ``` 46 | 47 | ### MySync configuration example 48 | 49 | ``` 50 | log: /var/log/mysync/mysync.log 51 | loglevel: Debug 52 | 53 | lockfile: /var/run/mysync/mysync.lock 54 | emergefile: /var/run/mysync/mysync.emerge 55 | resetupfile: /var/run/mysync/mysync.resetup 56 | 57 | resetup_crashed_hosts: False 58 | db_timeout: 2s 59 | db_lost_check_timeout: 5s 60 | tick_interval: 5s 61 | healthcheck_interval: 5s 62 | info_file_handler_interval: 5s 63 | dcs_wait_timeout: 30s 64 | critical_disk_usage: 95.00 65 | not_critical_disk_usage: 94.76 66 | disable_semi_sync_replication_on_maintenance: true 67 | keep_super_writable_on_critical_disk_usage: true 68 | db_set_ro_timeout: 30s 69 | db_set_ro_force_timeout: 60s 70 | priority_choice_max_lag: 60s 71 | offline_mode_enable_interval: 900s 72 | offline_mode_enable_lag: 86400s 73 | offline_mode_disable_lag: 300s 74 | disable_set_readonly_on_lost: False 75 | exclude_users: 76 | - 'repl' 77 | - 'admin' 78 | - 'monitor' 79 | - 'event_scheduler' 80 | 81 | semi_sync: true 82 | rpl_semi_sync_master_wait_for_slave_count: 1 83 | semi_sync_enable_lag: 33554432 84 | 85 | failover: true 86 | failover_cooldown: 3600s 87 | failover_delay: 60s 88 | inactivation_delay: 120s 89 | 90 | zookeeper: 91 | session_timeout: 10s 92 | namespace: /mysql/cluster_id_1 93 | hosts: 94 | - zk01.db.company.net:2181 95 | - zk02.db.company.net:2181 96 | - zk03.db.company.net:2181 97 | 98 | mysql: 99 | user: admin 100 | password: ********** 101 | ssl_ca: /etc/mysql/ssl/allCAs.pem 102 | replication_connect_retry: 10 103 | replication_retry_count: 0 104 | replication_heartbeat_period: 2 105 | replication_port: 3306 106 | replication_user: repl 107 | replication_password: ******** 108 | replication_ssl_ca: /etc/mysql/ssl/allCAs.pem 109 | external_replication_ssl_ca: /etc/mysql/ssl/external_CA.pem 110 | port: 3306 111 | 112 | stream_from_reasonable_lag: 5m 113 | stream_from_max_catchup_wait: 5m 114 | 115 | replication_repair_aggressive_mode: False 116 | replication_repair_cooldown: 60s 117 | replication_repair_max_attempts: 3 118 | 119 | external_replication_type: off 120 | show_only_gtid_diff: False 121 | force_switchover: False 122 | ``` 123 | 124 | ### Usage 125 | 126 | ``` 127 | mysync hosts add fqdn1.db.company.net 128 | mysync hosts add fqdn2.db.company.net 129 | mysync hosts add fqdn3.db.company.net 130 | 131 | mysync info -s 132 | mysync switch --to fqdn2 133 | mysync switch --from fqdn2 134 | mysync maint on 135 | mysync maint off 136 | ``` 137 | 138 | 139 | -------------------------------------------------------------------------------- /cmd/mysync/abort.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/spf13/cobra" 8 | 9 | "github.com/yandex/mysync/internal/app" 10 | ) 11 | 12 | var abortCmd = &cobra.Command{ 13 | Use: "abort", 14 | Short: "Clear switchover command from DCS", 15 | Long: "It does NOT rollback performed actions. You should manually repair cluster after it.", 16 | Run: func(cmd *cobra.Command, args []string) { 17 | app, err := app.NewApp(configFile, logLevel, true) 18 | if err != nil { 19 | fmt.Println(err) 20 | os.Exit(1) 21 | } 22 | os.Exit(app.CliAbort()) 23 | }, 24 | } 25 | 26 | func init() { 27 | rootCmd.AddCommand(abortCmd) 28 | } 29 | -------------------------------------------------------------------------------- /cmd/mysync/hosts.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/spf13/cobra" 8 | "github.com/spf13/pflag" 9 | 10 | "github.com/yandex/mysync/internal/app" 11 | ) 12 | 13 | var streamFrom string 14 | var priority int64 15 | var dryRun bool 16 | var skipMySQLCheck bool 17 | 18 | var hostCmd = &cobra.Command{ 19 | Use: "host", 20 | Aliases: []string{"hosts"}, 21 | Short: "manage hosts in cluster", 22 | Run: func(cmd *cobra.Command, args []string) { 23 | app, err := app.NewApp(configFile, logLevel, true) 24 | if err != nil { 25 | fmt.Println(err) 26 | os.Exit(1) 27 | } 28 | os.Exit(app.CliHostList()) 29 | }, 30 | } 31 | 32 | var hostAddCmd = &cobra.Command{ 33 | Use: "add", 34 | Short: "add host to cluster", 35 | Args: cobra.ExactArgs(1), 36 | Run: func(cmd *cobra.Command, args []string) { 37 | app, err := app.NewApp(configFile, logLevel, true) 38 | if err != nil { 39 | fmt.Println(err) 40 | os.Exit(1) 41 | } 42 | 43 | var priorityVal *int64 44 | var streamFromVar *string 45 | cmd.Flags().Visit(func(f *pflag.Flag) { 46 | switch f.Name { 47 | case "priority": 48 | priorityVal = &priority 49 | case "stream-from": 50 | streamFromVar = &streamFrom 51 | } 52 | }) 53 | 54 | os.Exit(app.CliHostAdd(args[0], streamFromVar, priorityVal, dryRun, skipMySQLCheck)) 55 | }, 56 | } 57 | 58 | var hostRemoveCmd = &cobra.Command{ 59 | Use: "remove", 60 | Short: "remove host from cluster", 61 | Args: cobra.ExactArgs(1), 62 | Run: func(cmd *cobra.Command, args []string) { 63 | app, err := app.NewApp(configFile, logLevel, true) 64 | if err != nil { 65 | fmt.Println(err) 66 | os.Exit(1) 67 | } 68 | os.Exit(app.CliHostRemove(args[0])) 69 | }, 70 | } 71 | 72 | func init() { 73 | hostAddCmd.Flags().StringVar(&streamFrom, "stream-from", "", "host to stream from") 74 | hostAddCmd.Flags().Int64Var(&priority, "priority", 0, "host priority") 75 | hostAddCmd.Flags().BoolVar(&dryRun, "dry-run", false, "tests suggested changes."+ 76 | " Exits codes:"+ 77 | " 0 - when no changes detected,"+ 78 | " 1 - when some error happened or changes prohibited,"+ 79 | " 2 - when changes detected and some changes will be performed during usual run") 80 | hostAddCmd.Flags().BoolVar(&skipMySQLCheck, "skip-mysql-check", false, "skip mysql availability check") 81 | hostCmd.AddCommand(hostAddCmd) 82 | hostCmd.AddCommand(hostRemoveCmd) 83 | rootCmd.AddCommand(hostCmd) 84 | } 85 | -------------------------------------------------------------------------------- /cmd/mysync/info.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/spf13/cobra" 8 | 9 | "github.com/yandex/mysync/internal/app" 10 | ) 11 | 12 | var infoCmd = &cobra.Command{ 13 | Use: "info", 14 | Short: "Print information from DCS", 15 | Run: func(cmd *cobra.Command, args []string) { 16 | app, err := app.NewApp(configFile, logLevel, true) 17 | if err != nil { 18 | fmt.Println(err) 19 | os.Exit(1) 20 | } 21 | os.Exit(app.CliInfo(short)) 22 | }, 23 | } 24 | 25 | func init() { 26 | rootCmd.AddCommand(infoCmd) 27 | } 28 | -------------------------------------------------------------------------------- /cmd/mysync/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/spf13/cobra" 8 | 9 | "github.com/yandex/mysync/internal/app" 10 | ) 11 | 12 | var configFile string 13 | var logLevel string 14 | var short bool 15 | 16 | var rootCmd = &cobra.Command{ 17 | Use: "mysync", 18 | Short: "Mysync is MySQL HA cluster coordination tool", 19 | Long: `Running without additional arguments will start mysync agent for current node.`, 20 | Run: func(cmd *cobra.Command, args []string) { 21 | app, err := app.NewApp(configFile, logLevel, false) 22 | if err != nil { 23 | fmt.Println(err) 24 | os.Exit(1) 25 | } 26 | os.Exit(app.Run()) 27 | }, 28 | } 29 | 30 | func init() { 31 | rootCmd.PersistentFlags().StringVarP(&configFile, "config", "c", "/etc/mysync.yaml", "config file") 32 | rootCmd.PersistentFlags().StringVarP(&logLevel, "loglevel", "l", "Warn", "logging level (Trace|Debug|Info|Warn|Error|Fatal)") 33 | rootCmd.PersistentFlags().BoolVarP(&short, "short", "s", false, "short output") 34 | } 35 | 36 | func main() { 37 | if err := rootCmd.Execute(); err != nil { 38 | fmt.Println(err) 39 | os.Exit(1) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /cmd/mysync/maintenance.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "time" 7 | 8 | "github.com/spf13/cobra" 9 | 10 | "github.com/yandex/mysync/internal/app" 11 | ) 12 | 13 | var maintWait time.Duration 14 | var maintReason string 15 | 16 | var maintCmd = &cobra.Command{ 17 | Use: "maintenance", 18 | Aliases: []string{"maint", "mnt"}, 19 | Short: "Enables or disables maintenance mode", 20 | Long: ("When maintenance is enabled, MySync manager will not perform any actions.\n" + 21 | "When maintenance is disabled, MySync will analyze cluster state and remember it as correct."), 22 | } 23 | 24 | var maintOnCmd = &cobra.Command{ 25 | Use: "on", 26 | Aliases: []string{"enable"}, 27 | Run: func(cmd *cobra.Command, args []string) { 28 | app, err := app.NewApp(configFile, logLevel, true) 29 | if err != nil { 30 | fmt.Println(err) 31 | os.Exit(1) 32 | } 33 | os.Exit(app.CliEnableMaintenance(maintWait, maintReason)) 34 | }, 35 | } 36 | 37 | var maintOffCmd = &cobra.Command{ 38 | Use: "off", 39 | Aliases: []string{"disable"}, 40 | Run: func(cmd *cobra.Command, args []string) { 41 | app, err := app.NewApp(configFile, logLevel, true) 42 | if err != nil { 43 | fmt.Println(err) 44 | os.Exit(1) 45 | } 46 | os.Exit(app.CliDisableMaintenance(maintWait)) 47 | }, 48 | } 49 | 50 | var maintGetCmd = &cobra.Command{ 51 | Use: "get", 52 | Run: func(cmd *cobra.Command, args []string) { 53 | app, err := app.NewApp(configFile, logLevel, true) 54 | if err != nil { 55 | fmt.Println(err) 56 | os.Exit(1) 57 | } 58 | os.Exit(app.CliGetMaintenance()) 59 | }, 60 | } 61 | 62 | func init() { 63 | rootCmd.AddCommand(maintCmd) 64 | maintCmd.AddCommand(maintOnCmd) 65 | maintCmd.AddCommand(maintOffCmd) 66 | maintCmd.AddCommand(maintGetCmd) 67 | maintCmd.PersistentFlags().DurationVarP(&maintWait, "wait", "w", 30*time.Second, "how long to wait for maintenance activation, 0s to return immediately") 68 | 69 | maintOnCmd.Flags().StringVarP(&maintReason, "reason", "r", "", "reason for maintenance (e.g. ticket number)") 70 | } 71 | -------------------------------------------------------------------------------- /cmd/mysync/optimize.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/spf13/cobra" 8 | 9 | "github.com/yandex/mysync/internal/app" 10 | ) 11 | 12 | var optimizeCmd = &cobra.Command{ 13 | Use: "optimize", 14 | Aliases: []string{"turbo"}, 15 | Short: "Enables or disables optimization mode", 16 | Long: ("When optimization mode is enabled, MySync turns on potentially dangerous options to reduce disk usage.\n" + 17 | "When optimization mode is disabled, MySync restores safe defaults, and the host operates in normal mode.\n" + 18 | "Optimization works only on replica hosts and cannot be enabled on the master host."), 19 | } 20 | 21 | var optimizeOnCmd = &cobra.Command{ 22 | Use: "on", 23 | Aliases: []string{"enable"}, 24 | Run: func(cmd *cobra.Command, args []string) { 25 | app, err := app.NewApp(configFile, logLevel, true) 26 | if err != nil { 27 | fmt.Println(err) 28 | os.Exit(1) 29 | } 30 | os.Exit(app.CliEnableOptimization()) 31 | }, 32 | } 33 | 34 | var optimizeOffCmd = &cobra.Command{ 35 | Use: "off", 36 | Aliases: []string{"disable"}, 37 | Run: func(cmd *cobra.Command, args []string) { 38 | app, err := app.NewApp(configFile, logLevel, true) 39 | if err != nil { 40 | fmt.Println(err) 41 | os.Exit(1) 42 | } 43 | os.Exit(app.CliDisableOptimization()) 44 | }, 45 | } 46 | 47 | var optimizeGetCmd = &cobra.Command{ 48 | Use: "get", 49 | Run: func(cmd *cobra.Command, args []string) { 50 | app, err := app.NewApp(configFile, logLevel, true) 51 | if err != nil { 52 | fmt.Println(err) 53 | os.Exit(1) 54 | } 55 | os.Exit(app.CliGetOptimization()) 56 | }, 57 | } 58 | 59 | func init() { 60 | rootCmd.AddCommand(optimizeCmd) 61 | optimizeCmd.AddCommand(optimizeOnCmd) 62 | optimizeCmd.AddCommand(optimizeOffCmd) 63 | optimizeCmd.AddCommand(optimizeGetCmd) 64 | } 65 | -------------------------------------------------------------------------------- /cmd/mysync/state.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/spf13/cobra" 8 | 9 | "github.com/yandex/mysync/internal/app" 10 | ) 11 | 12 | var stateCmd = &cobra.Command{ 13 | Use: "state", 14 | Short: "Print cluster nodes state by querying databases", 15 | Run: func(cmd *cobra.Command, args []string) { 16 | app, err := app.NewApp(configFile, logLevel, true) 17 | if err != nil { 18 | fmt.Println(err) 19 | os.Exit(1) 20 | } 21 | os.Exit(app.CliState(short)) 22 | }, 23 | } 24 | 25 | func init() { 26 | rootCmd.AddCommand(stateCmd) 27 | } 28 | -------------------------------------------------------------------------------- /cmd/mysync/switch.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "time" 7 | 8 | "github.com/spf13/cobra" 9 | 10 | "github.com/yandex/mysync/internal/app" 11 | ) 12 | 13 | var switchTo string 14 | var switchFrom string 15 | var switchWait time.Duration 16 | var failover bool 17 | 18 | var switchCmd = &cobra.Command{ 19 | Use: "switch", 20 | Short: "Move the master to (from) specified host", 21 | Long: "If master is already on (not on) specified host it will be ignored", 22 | Run: func(cmd *cobra.Command, args []string) { 23 | app, err := app.NewApp(configFile, logLevel, true) 24 | if err != nil { 25 | fmt.Println(err) 26 | os.Exit(1) 27 | } 28 | os.Exit(app.CliSwitch(switchFrom, switchTo, switchWait, failover)) 29 | }, 30 | } 31 | 32 | func init() { 33 | rootCmd.AddCommand(switchCmd) 34 | switchCmd.Flags().StringVar(&switchFrom, "from", "", "switch master from specific (or current master if empty) host") 35 | switchCmd.Flags().StringVar(&switchTo, "to", "", "switch master to specific (or most up-to-date if empty) host") 36 | switchCmd.Flags().DurationVarP(&switchWait, "wait", "w", 5*time.Minute, "how long wait for switchover to complete, 0s to return immediately") 37 | switchCmd.Flags().BoolVar(&failover, "failover", false, "ignore the master's liveness probe during switchover") 38 | } 39 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/yandex/mysync 2 | 3 | go 1.23.0 4 | 5 | toolchain go1.24.1 6 | 7 | require ( 8 | github.com/cenkalti/backoff/v4 v4.3.0 9 | github.com/cucumber/godog v0.15.0 10 | github.com/docker/docker v28.2.2+incompatible 11 | github.com/go-mysql-org/go-mysql v1.12.0 12 | github.com/go-sql-driver/mysql v1.9.2 13 | github.com/go-zookeeper/zk v1.0.4 14 | github.com/gofrs/flock v0.12.1 15 | github.com/gofrs/uuid v4.4.0+incompatible 16 | github.com/golang/mock v1.6.0 17 | github.com/google/uuid v1.6.0 18 | github.com/heetch/confita v0.10.0 19 | github.com/jmoiron/sqlx v1.4.0 20 | github.com/shirou/gopsutil/v3 v3.24.5 21 | github.com/spf13/cobra v1.9.1 22 | github.com/spf13/pflag v1.0.6 23 | github.com/stretchr/testify v1.10.0 24 | gopkg.in/yaml.v2 v2.4.0 25 | ) 26 | 27 | require ( 28 | filippo.io/edwards25519 v1.1.0 // indirect 29 | github.com/BurntSushi/toml v1.4.0 // indirect 30 | github.com/Masterminds/semver v1.5.0 // indirect 31 | github.com/Microsoft/go-winio v0.6.2 // indirect 32 | github.com/containerd/errdefs v1.0.0 // indirect 33 | github.com/containerd/errdefs/pkg v0.3.0 // indirect 34 | github.com/containerd/log v0.1.0 // indirect 35 | github.com/cucumber/gherkin/go/v26 v26.2.0 // indirect 36 | github.com/cucumber/messages/go/v21 v21.0.1 // indirect 37 | github.com/davecgh/go-spew v1.1.1 // indirect 38 | github.com/distribution/reference v0.6.0 // indirect 39 | github.com/docker/go-connections v0.5.0 // indirect 40 | github.com/docker/go-units v0.5.0 // indirect 41 | github.com/felixge/httpsnoop v1.0.4 // indirect 42 | github.com/go-logr/logr v1.4.2 // indirect 43 | github.com/go-logr/stdr v1.2.2 // indirect 44 | github.com/go-ole/go-ole v1.3.0 // indirect 45 | github.com/gogo/protobuf v1.3.2 // indirect 46 | github.com/hashicorp/go-immutable-radix v1.3.1 // indirect 47 | github.com/hashicorp/go-memdb v1.3.4 // indirect 48 | github.com/hashicorp/golang-lru v0.5.4 // indirect 49 | github.com/inconshreveable/mousetrap v1.1.0 // indirect 50 | github.com/lufia/plan9stats v0.0.0-20240819163618-b1d8f4d146e7 // indirect 51 | github.com/moby/docker-image-spec v1.3.1 // indirect 52 | github.com/moby/sys/atomicwriter v0.1.0 // indirect 53 | github.com/moby/term v0.0.0-20220808134915-39b0c02b01ae // indirect 54 | github.com/morikuni/aec v1.0.0 // indirect 55 | github.com/opencontainers/go-digest v1.0.0 // indirect 56 | github.com/opencontainers/image-spec v1.1.0 // indirect 57 | github.com/pingcap/errors v0.11.5-0.20240311024730-e056997136bb // indirect 58 | github.com/pkg/errors v0.9.1 // indirect 59 | github.com/pmezard/go-difflib v1.0.0 // indirect 60 | github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect 61 | github.com/shoenig/go-m1cpu v0.1.6 // indirect 62 | github.com/tklauser/go-sysconf v0.3.14 // indirect 63 | github.com/tklauser/numcpus v0.8.0 // indirect 64 | github.com/yusufpapurcu/wmi v1.2.4 // indirect 65 | go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 // indirect 66 | go.opentelemetry.io/otel v1.29.0 // indirect 67 | go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.22.0 // indirect 68 | go.opentelemetry.io/otel/metric v1.29.0 // indirect 69 | go.opentelemetry.io/otel/sdk v1.22.0 // indirect 70 | go.opentelemetry.io/otel/trace v1.29.0 // indirect 71 | go.uber.org/atomic v1.11.0 // indirect 72 | golang.org/x/net v0.38.0 // indirect 73 | golang.org/x/sys v0.31.0 // indirect 74 | google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142 // indirect 75 | google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect 76 | gopkg.in/yaml.v3 v3.0.1 // indirect 77 | gotest.tools/v3 v3.4.0 // indirect 78 | ) 79 | -------------------------------------------------------------------------------- /internal/app/async.go: -------------------------------------------------------------------------------- 1 | package app 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | 7 | "github.com/yandex/mysync/internal/mysql" 8 | ) 9 | 10 | func (app *App) CheckAsyncSwitchAllowed(node *mysql.Node, switchover *Switchover) bool { 11 | if app.config.ASync && switchover.Cause == CauseAuto && app.config.AsyncAllowedLag > 0 { 12 | app.logger.Infof("async mode is active and this is auto switch so we checking new master delay") 13 | ts, err := app.GetReplMonTS() 14 | if err != nil { 15 | app.logger.Errorf("failed to get mdb repl mon ts: %v", err) 16 | return false 17 | } 18 | delay, err := node.CalcReplMonTSDelay(app.config.ReplMonSchemeName, app.config.ReplMonTableName, ts) 19 | if err != nil { 20 | app.logger.Errorf("failed to calc mdb repl mon ts: %v", err) 21 | return false 22 | } 23 | if time.Duration(delay)*time.Second < app.config.AsyncAllowedLag { 24 | app.logger.Infof("async allowed lag is %f seconds and current lag on host %s is %d, so we don't wait for catch up any more", 25 | app.config.AsyncAllowedLag.Seconds(), node.Host(), delay) 26 | return true 27 | } 28 | } 29 | return false 30 | } 31 | 32 | func (app *App) updateReplMonTS(master string) error { 33 | masterNode := app.cluster.Get(master) 34 | ts, err := masterNode.GetReplMonTS(app.config.ReplMonSchemeName, app.config.ReplMonTableName) 35 | if err != nil { 36 | return fmt.Errorf("failed to get master repl_mon timestamp: %v", err) 37 | } 38 | return app.SetReplMonTS(ts) 39 | } 40 | -------------------------------------------------------------------------------- /internal/app/cli_info.go: -------------------------------------------------------------------------------- 1 | package app 2 | 3 | import ( 4 | "fmt" 5 | "sort" 6 | 7 | "gopkg.in/yaml.v2" 8 | 9 | "github.com/yandex/mysync/internal/dcs" 10 | ) 11 | 12 | // CliInfo is CLI command printing information from DCS to the stdout 13 | func (app *App) CliInfo(short bool) int { 14 | cancel, err := app.cliInitApp() 15 | if err != nil { 16 | app.logger.Error(err.Error()) 17 | return 1 18 | } 19 | defer cancel() 20 | 21 | var tree any 22 | if short { 23 | data := make(map[string]any) 24 | 25 | haNodes, err := app.cluster.GetClusterHAHostsFromDcs() 26 | if err != nil { 27 | app.logger.Errorf("failed to get ha nodes: %v", err) 28 | return 1 29 | } 30 | data[pathHANodes] = haNodes 31 | 32 | cascadeNodes, err := app.cluster.GetClusterCascadeHostsFromDcs() 33 | if err != nil { 34 | app.logger.Errorf("failed to get cascade nodes: %v", err) 35 | return 1 36 | } 37 | data[pathCascadeNodesPrefix] = cascadeNodes 38 | 39 | activeNodes, err := app.GetActiveNodes() 40 | if err != nil { 41 | app.logger.Error(err.Error()) 42 | return 1 43 | } 44 | sort.Strings(activeNodes) 45 | data[pathActiveNodes] = activeNodes 46 | 47 | nodesOnRecovery, err := app.GetHostsOnRecovery() 48 | if err != nil { 49 | app.logger.Errorf("failed to get nodes on recovery: %v", err) 50 | return 1 51 | } 52 | if len(nodesOnRecovery) > 0 { 53 | sort.Strings(nodesOnRecovery) 54 | data[pathRecovery] = nodesOnRecovery 55 | } 56 | 57 | clusterState, err := app.getClusterStateFromDcs() 58 | if err != nil { 59 | app.logger.Errorf("failed to get cluster state: %v", err) 60 | return 1 61 | } 62 | health := make(map[string]any) 63 | for host, state := range clusterState { 64 | health[host] = state.String() 65 | } 66 | data[pathHealthPrefix] = health 67 | 68 | for _, path := range []string{pathLastSwitch, pathCurrentSwitch, pathLastRejectedSwitch} { 69 | var switchover Switchover 70 | err = app.dcs.Get(path, &switchover) 71 | if err == nil { 72 | data[path] = switchover.String() 73 | } else if err != dcs.ErrNotFound { 74 | app.logger.Errorf("failed to get %s: %v", path, err) 75 | return 1 76 | } 77 | } 78 | 79 | var maintenance Maintenance 80 | err = app.dcs.Get(pathMaintenance, &maintenance) 81 | if err == nil { 82 | data[pathMaintenance] = maintenance.String() 83 | } else if err != dcs.ErrNotFound { 84 | app.logger.Errorf("failed to get %s: %v", pathMaintenance, err) 85 | return 1 86 | } 87 | 88 | var manager dcs.LockOwner 89 | err = app.dcs.Get(pathManagerLock, &manager) 90 | if err != nil && err != dcs.ErrNotFound { 91 | app.logger.Errorf("failed to get %s: %v", pathManagerLock, err) 92 | return 1 93 | } 94 | data[pathManagerLock] = manager.Hostname 95 | 96 | var master string 97 | err = app.dcs.Get(pathMasterNode, &master) 98 | if err != nil && err != dcs.ErrNotFound { 99 | app.logger.Errorf("failed to get %s: %v", pathMasterNode, err) 100 | return 1 101 | } 102 | data[pathMasterNode] = master 103 | tree = data 104 | } else { 105 | tree, err = app.dcs.GetTree("") 106 | if err != nil { 107 | app.logger.Error(err.Error()) 108 | return 1 109 | } 110 | } 111 | data, err := yaml.Marshal(tree) 112 | if err != nil { 113 | app.logger.Errorf("failed to marshal yaml: %v", err) 114 | return 1 115 | } 116 | fmt.Print(string(data)) 117 | return 0 118 | } 119 | -------------------------------------------------------------------------------- /internal/app/cli_maintenance.go: -------------------------------------------------------------------------------- 1 | package app 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "time" 7 | 8 | "github.com/yandex/mysync/internal/dcs" 9 | "github.com/yandex/mysync/internal/util" 10 | ) 11 | 12 | // CliEnableMaintenance enables maintenance mode 13 | func (app *App) CliEnableMaintenance(waitTimeout time.Duration, reason string) int { 14 | ctx := app.baseContext() 15 | err := app.connectDCS() 16 | if err != nil { 17 | app.logger.Error(err.Error()) 18 | return 1 19 | } 20 | defer app.dcs.Close() 21 | app.dcs.Initialize() 22 | 23 | maintenance := &Maintenance{ 24 | InitiatedBy: util.GuessWhoRunning() + "@" + app.config.Hostname, 25 | InitiatedAt: time.Now(), 26 | Reason: reason, 27 | } 28 | err = app.dcs.Create(pathMaintenance, maintenance) 29 | if err != nil && err != dcs.ErrExists { 30 | app.logger.Error(err.Error()) 31 | return 1 32 | } 33 | // wait for mysync to pause 34 | if waitTimeout > 0 { 35 | waitCtx, cancel := context.WithTimeout(ctx, waitTimeout) 36 | defer cancel() 37 | ticker := time.NewTicker(time.Second) 38 | Out: 39 | for { 40 | select { 41 | case <-ticker.C: 42 | err = app.dcs.Get(pathMaintenance, maintenance) 43 | if err != nil { 44 | app.logger.Error(err.Error()) 45 | } 46 | if maintenance.MySyncPaused { 47 | break Out 48 | } 49 | case <-waitCtx.Done(): 50 | break Out 51 | } 52 | } 53 | if !maintenance.MySyncPaused { 54 | app.logger.Error("could not wait for mysync to enter maintenance") 55 | return 1 56 | } 57 | fmt.Println("maintenance enabled") 58 | } else { 59 | fmt.Println("maintenance scheduled") 60 | } 61 | return 0 62 | } 63 | 64 | // CliDisableMaintenance disables maintenance mode 65 | func (app *App) CliDisableMaintenance(waitTimeout time.Duration) int { 66 | ctx := app.baseContext() 67 | err := app.connectDCS() 68 | if err != nil { 69 | app.logger.Error(err.Error()) 70 | return 1 71 | } 72 | defer app.dcs.Close() 73 | app.dcs.Initialize() 74 | 75 | maintenance := &Maintenance{} 76 | err = app.dcs.Get(pathMaintenance, maintenance) 77 | if err == dcs.ErrNotFound { 78 | fmt.Println("maintenance disabled") 79 | return 0 80 | } else if err != nil { 81 | app.logger.Error(err.Error()) 82 | return 1 83 | } 84 | maintenance.ShouldLeave = true 85 | err = app.dcs.Set(pathMaintenance, maintenance) 86 | if err != nil { 87 | app.logger.Error(err.Error()) 88 | return 1 89 | } 90 | if waitTimeout > 0 { 91 | waitCtx, cancel := context.WithTimeout(ctx, waitTimeout) 92 | defer cancel() 93 | ticker := time.NewTicker(time.Second) 94 | Out: 95 | for { 96 | select { 97 | case <-ticker.C: 98 | err = app.dcs.Get(pathMaintenance, maintenance) 99 | if err == dcs.ErrNotFound { 100 | maintenance = nil 101 | break Out 102 | } 103 | if err != nil { 104 | app.logger.Error(err.Error()) 105 | } 106 | case <-waitCtx.Done(): 107 | break Out 108 | } 109 | } 110 | if maintenance != nil { 111 | app.logger.Error("could not wait for mysync to leave maintenance") 112 | return 1 113 | } 114 | fmt.Println("maintenance disabled") 115 | } else { 116 | fmt.Println("maintenance disable scheduled") 117 | } 118 | return 0 119 | } 120 | 121 | // CliGetMaintenance prints on/off depending on current maintenance status 122 | func (app *App) CliGetMaintenance() int { 123 | err := app.connectDCS() 124 | if err != nil { 125 | app.logger.Error(err.Error()) 126 | return 1 127 | } 128 | defer app.dcs.Close() 129 | app.dcs.Initialize() 130 | 131 | err = app.dcs.Get(pathMaintenance, new(Maintenance)) 132 | switch err { 133 | case nil: 134 | fmt.Println("on") 135 | return 0 136 | case dcs.ErrNotFound: 137 | fmt.Println("off") 138 | return 0 139 | default: 140 | app.logger.Error(err.Error()) 141 | return 1 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /internal/app/cli_optimize.go: -------------------------------------------------------------------------------- 1 | package app 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/yandex/mysync/internal/mysql" 7 | ) 8 | 9 | // CliEnableOptimization enables optimization mode 10 | func (app *App) CliEnableOptimization() int { 11 | cancel, err := app.cliInitApp() 12 | if err != nil { 13 | fmt.Printf("%s\n", err) 14 | return 1 15 | } 16 | defer cancel() 17 | 18 | node := app.cluster.Local() 19 | status, err := app.cliGetHostOptimizationStatus(node) 20 | if err != nil { 21 | fmt.Printf("%s\n", err) 22 | return 1 23 | } 24 | 25 | if status == Optimizable { 26 | err = node.OptimizeReplication() 27 | if err != nil { 28 | fmt.Printf("%s\n", err) 29 | return 1 30 | } 31 | fmt.Println("The host optimization has been started.") 32 | return 0 33 | } 34 | 35 | fmt.Printf("Can't optimize host in status '%s'\n", status) 36 | return 1 37 | } 38 | 39 | // CliDisableOptimization disables optimization mode 40 | func (app *App) CliDisableOptimization() int { 41 | cancel, err := app.cliInitApp() 42 | if err != nil { 43 | fmt.Printf("%s\n", err) 44 | return 1 45 | } 46 | defer cancel() 47 | 48 | node := app.cluster.Local() 49 | err = app.SetDefaultReplicationSettingsForNode(node) 50 | if err != nil { 51 | fmt.Printf("%s\n", err) 52 | return 1 53 | } 54 | return 0 55 | } 56 | 57 | // CliGetOptimization gets optimization mode 58 | func (app *App) CliGetOptimization() int { 59 | cancel, err := app.cliInitApp() 60 | if err != nil { 61 | fmt.Printf("%s\n", err) 62 | return 1 63 | } 64 | defer cancel() 65 | 66 | node := app.cluster.Local() 67 | status, err := app.cliGetHostOptimizationStatus(node) 68 | if err != nil { 69 | fmt.Printf("%s\n", err) 70 | return 1 71 | } 72 | 73 | fmt.Printf("The host is in status '%s'\n", status) 74 | 75 | return 0 76 | } 77 | 78 | func (app *App) cliGetHostOptimizationStatus(localNode *mysql.Node) (HostOptimizationStatus, error) { 79 | status, err := localNode.GetReplicaStatus() 80 | if err != nil { 81 | return Unknown, err 82 | } 83 | if status == nil { 84 | return HostRoleMaster, nil 85 | } 86 | 87 | replicationSettings, err := localNode.GetReplicationSettings() 88 | if err != nil { 89 | return Unknown, nil 90 | } 91 | if replicationSettings.CanBeOptimized() { 92 | return Optimizable, nil 93 | } 94 | 95 | masterFqdn, err := app.GetMasterHostFromDcs() 96 | if err != nil { 97 | return Unknown, nil 98 | } 99 | 100 | master := app.cluster.Get(masterFqdn) 101 | masterReplicationSettings, err := master.GetReplicationSettings() 102 | if err != nil { 103 | return Unknown, nil 104 | } 105 | 106 | if masterReplicationSettings.Equal(&replicationSettings) { 107 | return UnoptimizableConfiguration, nil 108 | } 109 | return OptimizationRunning, nil 110 | } 111 | 112 | type HostOptimizationStatus string 113 | 114 | const ( 115 | OptimizationRunning HostOptimizationStatus = "optimization is running" 116 | Optimizable HostOptimizationStatus = "can be optimized" 117 | UnoptimizableConfiguration HostOptimizationStatus = "configuration of the cluster is already optimized" 118 | HostRoleMaster HostOptimizationStatus = "host is master" 119 | Unknown HostOptimizationStatus = "unknown" 120 | ) 121 | -------------------------------------------------------------------------------- /internal/app/cli_state.go: -------------------------------------------------------------------------------- 1 | package app 2 | 3 | import ( 4 | "fmt" 5 | 6 | "gopkg.in/yaml.v2" 7 | ) 8 | 9 | // CliState print state of the cluster to the stdout 10 | func (app *App) CliState(short bool) int { 11 | cancel, err := app.cliInitApp() 12 | if err != nil { 13 | app.logger.Error(err.Error()) 14 | return 1 15 | } 16 | defer cancel() 17 | 18 | clusterState := app.getClusterStateFromDB() 19 | var tree any 20 | if short { 21 | clusterStateStrings := make(map[string]string) 22 | for host, state := range clusterState { 23 | clusterStateStrings[host] = state.String() 24 | } 25 | tree = clusterStateStrings 26 | } else { 27 | tree = clusterState 28 | } 29 | data, err := yaml.Marshal(tree) 30 | if err != nil { 31 | app.logger.Errorf("failed to marshal yaml: %v", err) 32 | return 1 33 | } 34 | fmt.Print(string(data)) 35 | return 0 36 | } 37 | -------------------------------------------------------------------------------- /internal/app/cli_util.go: -------------------------------------------------------------------------------- 1 | package app 2 | 3 | // cliInitApp consolidates initialization logic for CLI commands to reduce boilerplate code. 4 | // The returned cleanup function closes the dcs and cluster connections to prevent leaks. 5 | func (app *App) cliInitApp() (func(), error) { 6 | err := app.connectDCS() 7 | if err != nil { 8 | return nil, err 9 | } 10 | 11 | app.dcs.Initialize() 12 | err = app.newDBCluster() 13 | if err != nil { 14 | app.dcs.Close() 15 | return nil, err 16 | } 17 | 18 | err = app.cluster.UpdateHostsInfo() 19 | if err != nil { 20 | app.dcs.Close() 21 | app.cluster.Close() 22 | return nil, err 23 | } 24 | 25 | return func() { 26 | app.dcs.Close() 27 | app.cluster.Close() 28 | }, nil 29 | } 30 | -------------------------------------------------------------------------------- /internal/app/data_test.go: -------------------------------------------------------------------------------- 1 | package app 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/require" 8 | ) 9 | 10 | func TestStringerWorksOnNodeState(t *testing.T) { 11 | ns := &NodeState{} 12 | nsStr := fmt.Sprintf("%v", ns) 13 | if nsStr != "" { 14 | t.Errorf("%s", ns) 15 | } 16 | 17 | ns.IsMaster = false 18 | ns.MasterState = new(MasterState) 19 | ns.MasterState.ExecutedGtidSet = "6DBC0B04-4B09-43DC-86CC-9AF852DED919:1-101" 20 | ns.SlaveState = new(SlaveState) 21 | ns.SlaveState.ExecutedGtidSet = "6DBC0B04-4B09-43DC-86CC-9AF852DED919:1-40" 22 | 23 | nsStr = fmt.Sprintf("%v", ns) 24 | 25 | require.Equal( 26 | t, 27 | "", 28 | nsStr, 29 | ) 30 | 31 | ns.ShowOnlyGTIDDiff = true 32 | nsStr = fmt.Sprintf("%v", ns) 33 | 34 | require.Equal( 35 | t, 36 | "", 37 | nsStr, 38 | ) 39 | } 40 | 41 | func TestStringerWorksOnNodeStateMap(t *testing.T) { 42 | m := make(map[string]*NodeState) 43 | m["a"] = &NodeState{} 44 | m["b"] = &NodeState{} 45 | m["c"] = &NodeState{} 46 | 47 | mStr := fmt.Sprintf("%v", m) 48 | 49 | require.Equal( 50 | t, 51 | "map[a: b: c:]", 52 | mStr, 53 | ) 54 | } 55 | 56 | func newMockNodeState() *NodeState { 57 | return &NodeState{ 58 | SlaveState: &SlaveState{ 59 | MasterLogFile: "test_master_log_file", 60 | MasterLogPos: 2, 61 | }, 62 | } 63 | } 64 | 65 | func TestUpdateBinlogWithChanges(t *testing.T) { 66 | oldLogFile := "test_master_log_file" 67 | maxLogPos := int64(1) 68 | 69 | ns := newMockNodeState() 70 | 71 | oldLogFile, maxLogPos = ns.UpdateBinlogStatus(oldLogFile, maxLogPos) 72 | 73 | require.Equal(t, "test_master_log_file", oldLogFile) 74 | require.Equal(t, int64(2), maxLogPos) 75 | require.Equal(t, true, ns.IsLoadingBinlog) 76 | } 77 | 78 | func TestUpdateBinlogWithoutChanges(t *testing.T) { 79 | oldLogFile := "test_master_log_file" 80 | maxLogPos := int64(2) 81 | 82 | ns := newMockNodeState() 83 | 84 | oldLogFile, maxLogPos = ns.UpdateBinlogStatus(oldLogFile, maxLogPos) 85 | 86 | require.Equal(t, "test_master_log_file", oldLogFile) 87 | require.Equal(t, int64(2), maxLogPos) 88 | require.Equal(t, false, ns.IsLoadingBinlog) 89 | } 90 | 91 | func TestUpdateBinlogAfterReloading(t *testing.T) { 92 | oldLogFile := "test_master_log_file" 93 | maxLogPos := int64(5) 94 | 95 | ns := newMockNodeState() 96 | 97 | oldLogFile, maxLogPos = ns.UpdateBinlogStatus(oldLogFile, maxLogPos) 98 | 99 | require.Equal(t, "test_master_log_file", oldLogFile) 100 | require.Equal(t, int64(5), maxLogPos) 101 | require.Equal(t, false, ns.IsLoadingBinlog) 102 | } 103 | 104 | func TestUpdateBinlogAfterMasterSwitch(t *testing.T) { 105 | oldLogFile := "old_log_file" 106 | maxLogPos := int64(5) 107 | 108 | ns := newMockNodeState() 109 | 110 | oldLogFile, maxLogPos = ns.UpdateBinlogStatus(oldLogFile, maxLogPos) 111 | 112 | require.Equal(t, "test_master_log_file", oldLogFile) 113 | require.Equal(t, int64(2), maxLogPos) 114 | require.Equal(t, false, ns.IsLoadingBinlog) 115 | } 116 | -------------------------------------------------------------------------------- /internal/dcs/config.go: -------------------------------------------------------------------------------- 1 | package dcs 2 | 3 | import ( 4 | "os" 5 | "time" 6 | 7 | "github.com/cenkalti/backoff/v4" 8 | ) 9 | 10 | // ZookeeperConfig contains Zookeeper connection info 11 | type ZookeeperConfig struct { 12 | Hostname string `config:"hostname" yaml:"hostname"` 13 | SessionTimeout time.Duration `config:"session_timeout" yaml:"session_timeout"` 14 | Namespace string `config:"namespace,required"` 15 | Hosts []string `config:"hosts,required"` 16 | BackoffInterval time.Duration `config:"backoff_interval" yaml:"backoff_interval"` 17 | BackoffRandFactor float64 `config:"backoff_rand_factor" yaml:"backoff_rand_factor"` 18 | BackoffMultiplier float64 `config:"backoff_multiplier" yaml:"backoff_multiplier"` 19 | BackoffMaxInterval time.Duration `config:"backoff_max_interval" yaml:"backoff_max_interval"` 20 | BackoffMaxElapsedTime time.Duration `config:"backoff_max_elapsed_time" yaml:"backoff_max_elapsed_time"` 21 | BackoffMaxRetries uint64 `config:"backoff_max_retries" yaml:"backoff_max_retries"` 22 | RandomHostProvider RandomHostProviderConfig `config:"random_host_provider" yaml:"random_host_provider"` 23 | Auth bool `config:"auth" yaml:"auth"` 24 | Username string `config:"username" yaml:"username"` 25 | Password string `config:"password" yaml:"password"` 26 | UseSSL bool `config:"use_ssl" yaml:"use_ssl"` 27 | KeyFile string `config:"keyfile" yaml:"keyfile"` 28 | CertFile string `config:"certfile" yaml:"certfile"` 29 | CACert string `config:"ca_cert" yaml:"ca_cert"` 30 | VerifyCerts bool `config:"verify_certs" yaml:"verify_certs"` 31 | } 32 | 33 | type RandomHostProviderConfig struct { 34 | LookupTimeout time.Duration `config:"lookup_timeout" yaml:"lookup_timeout"` 35 | LookupTTL time.Duration `config:"lookup_ttl" yaml:"lookup_ttl"` 36 | LookupTickInterval time.Duration `config:"lookup_tick_interval" yaml:"lookup_tick_interval"` 37 | } 38 | 39 | func DefaultRandomHostProviderConfig() RandomHostProviderConfig { 40 | return RandomHostProviderConfig{ 41 | LookupTimeout: 3 * time.Second, 42 | LookupTTL: 300 * time.Second, 43 | LookupTickInterval: 60 * time.Second, 44 | } 45 | } 46 | 47 | // DefaultZookeeperConfig return default Zookeeper connection configuration 48 | func DefaultZookeeperConfig() (ZookeeperConfig, error) { 49 | hostname, err := os.Hostname() 50 | if err != nil { 51 | return ZookeeperConfig{}, err 52 | } 53 | config := ZookeeperConfig{ 54 | Hostname: hostname, 55 | SessionTimeout: 2 * time.Second, 56 | BackoffInterval: backoff.DefaultInitialInterval, 57 | BackoffRandFactor: backoff.DefaultRandomizationFactor, 58 | BackoffMultiplier: backoff.DefaultMultiplier, 59 | BackoffMaxInterval: backoff.DefaultMaxInterval, 60 | BackoffMaxElapsedTime: backoff.DefaultMaxElapsedTime, 61 | BackoffMaxRetries: 10, 62 | RandomHostProvider: DefaultRandomHostProviderConfig(), 63 | } 64 | return config, nil 65 | } 66 | -------------------------------------------------------------------------------- /internal/dcs/dcs.go: -------------------------------------------------------------------------------- 1 | package dcs 2 | 3 | import ( 4 | "errors" 5 | "strings" 6 | "time" 7 | ) 8 | 9 | /* 10 | DCS is the main interface representing data store 11 | DCS implementation should maintain connection to a server, 12 | track connection status changes (connected/disconnected) 13 | and perform basic operations 14 | */ 15 | type DCS interface { 16 | IsConnected() bool 17 | WaitConnected(timeout time.Duration) bool 18 | Initialize() // Create initial data structure if not exists 19 | SetDisconnectCallback(callback func() error) 20 | AcquireLock(path string) bool 21 | ReleaseLock(path string) 22 | Create(path string, value any) error 23 | CreateEphemeral(path string, value any) error 24 | Set(path string, value any) error 25 | SetEphemeral(path string, value any) error 26 | Get(path string, dest any) error 27 | Delete(path string) error 28 | GetTree(path string) (any, error) 29 | GetChildren(path string) ([]string, error) 30 | Close() 31 | } 32 | 33 | var ( 34 | // ErrExists means that node being created already exists 35 | ErrExists = errors.New("key already exists") 36 | // ErrNotFound means that requested not does not exist 37 | ErrNotFound = errors.New("key was not found in DCS") 38 | // ErrMalformed means that we failed to unmarshall received data 39 | ErrMalformed = errors.New("failed to parse DCS value, possibly data format changed") 40 | ) 41 | 42 | // sep is a path separator for most common DCS 43 | // Zookeeper, etcd and consul use slash 44 | const sep = "/" 45 | 46 | // LockOwner contains info about the process holding the lock 47 | type LockOwner struct { 48 | Hostname string `json:"hostname"` 49 | Pid int `json:"pid"` 50 | } 51 | 52 | // JoinPath build node path from chunks 53 | func JoinPath(parts ...string) string { 54 | return strings.Join(parts, sep) 55 | } 56 | -------------------------------------------------------------------------------- /internal/dcs/zk_host_provider.go: -------------------------------------------------------------------------------- 1 | package dcs 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "math/rand" 7 | "net" 8 | "sync" 9 | "time" 10 | 11 | "github.com/yandex/mysync/internal/log" 12 | ) 13 | 14 | type zkhost struct { 15 | resolved []string 16 | lastLookup time.Time 17 | } 18 | 19 | type RandomHostProvider struct { 20 | ctx context.Context 21 | hosts sync.Map 22 | useAddrs bool 23 | hostsKeys []string 24 | tried map[string]struct{} 25 | logger *log.Logger 26 | lookupTTL time.Duration 27 | lookupTimeout time.Duration 28 | lookupTickInterval time.Duration 29 | resolver *net.Resolver 30 | } 31 | 32 | func NewRandomHostProvider(ctx context.Context, config *RandomHostProviderConfig, useAddrs bool, logger *log.Logger) *RandomHostProvider { 33 | return &RandomHostProvider{ 34 | ctx: ctx, 35 | lookupTTL: config.LookupTTL, 36 | lookupTimeout: config.LookupTimeout, 37 | lookupTickInterval: config.LookupTickInterval, 38 | logger: logger, 39 | tried: make(map[string]struct{}), 40 | hosts: sync.Map{}, 41 | resolver: &net.Resolver{}, 42 | useAddrs: useAddrs, 43 | } 44 | } 45 | 46 | func (rhp *RandomHostProvider) Init(servers []string) error { 47 | numResolved := 0 48 | 49 | for _, host := range servers { 50 | resolved, err := rhp.resolveHost(host) 51 | if err != nil { 52 | rhp.logger.Errorf("host definition %s is invalid %v", host, err) 53 | continue 54 | } 55 | numResolved += len(resolved) 56 | rhp.hosts.Store(host, zkhost{ 57 | resolved: resolved, 58 | lastLookup: time.Now(), 59 | }) 60 | rhp.hostsKeys = append(rhp.hostsKeys, host) 61 | } 62 | 63 | if numResolved == 0 { 64 | return fmt.Errorf("unable to resolve any host from %v", servers) 65 | } 66 | 67 | go rhp.resolveHosts() 68 | 69 | return nil 70 | } 71 | 72 | func (rhp *RandomHostProvider) resolveHosts() { 73 | ticker := time.NewTicker(rhp.lookupTickInterval) 74 | for { 75 | select { 76 | case <-ticker.C: 77 | for _, pair := range rhp.hostsKeys { 78 | host, _ := rhp.hosts.Load(pair) 79 | zhost := host.(zkhost) 80 | 81 | if len(zhost.resolved) == 0 || time.Since(zhost.lastLookup) > rhp.lookupTTL { 82 | resolved, err := rhp.resolveHost(pair) 83 | if err != nil || len(resolved) == 0 { 84 | rhp.logger.Errorf("background resolve for %s failed: %v", pair, err) 85 | continue 86 | } 87 | rhp.hosts.Store(pair, zkhost{ 88 | resolved: resolved, 89 | lastLookup: time.Now(), 90 | }) 91 | } 92 | } 93 | case <-rhp.ctx.Done(): 94 | return 95 | } 96 | } 97 | } 98 | 99 | func (rhp *RandomHostProvider) resolveHost(pair string) ([]string, error) { 100 | var res []string 101 | host, port, err := net.SplitHostPort(pair) 102 | if err != nil { 103 | return res, err 104 | } 105 | ctx, cancel := context.WithTimeout(rhp.ctx, rhp.lookupTimeout) 106 | defer cancel() 107 | addrs, err := rhp.resolver.LookupHost(ctx, host) 108 | if err != nil { 109 | rhp.logger.Errorf("unable to resolve %s: %v", host, err) 110 | } 111 | for _, addr := range addrs { 112 | res = append(res, net.JoinHostPort(addr, port)) 113 | } 114 | 115 | return res, nil 116 | } 117 | 118 | func (rhp *RandomHostProvider) Len() int { 119 | return len(rhp.hostsKeys) 120 | } 121 | 122 | func (rhp *RandomHostProvider) Next() (server string, retryStart bool) { 123 | needRetry := false 124 | 125 | var ret string 126 | 127 | for len(ret) == 0 { 128 | notTried := []string{} 129 | 130 | for _, host := range rhp.hostsKeys { 131 | if _, ok := rhp.tried[host]; !ok { 132 | notTried = append(notTried, host) 133 | } 134 | } 135 | 136 | var selected string 137 | if len(notTried) == 0 { 138 | needRetry = true 139 | for k := range rhp.tried { 140 | delete(rhp.tried, k) 141 | } 142 | selected = rhp.hostsKeys[rand.Intn(len(rhp.hostsKeys))] 143 | } else { 144 | selected = notTried[rand.Intn(len(notTried))] 145 | } 146 | rhp.tried[selected] = struct{}{} 147 | 148 | host, _ := rhp.hosts.Load(selected) 149 | zhost := host.(zkhost) 150 | 151 | if len(zhost.resolved) > 0 { 152 | if rhp.useAddrs { 153 | ret = zhost.resolved[rand.Intn(len(zhost.resolved))] 154 | } else { 155 | ret = selected 156 | } 157 | } 158 | } 159 | 160 | return ret, needRetry 161 | } 162 | 163 | func (rhp *RandomHostProvider) Connected() { 164 | for k := range rhp.tried { 165 | delete(rhp.tried, k) 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /internal/dcs/zk_test.go: -------------------------------------------------------------------------------- 1 | package dcs 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/require" 7 | ) 8 | 9 | func TestBuildFullPath(t *testing.T) { 10 | z := &zkDCS{config: &ZookeeperConfig{Namespace: "//abc//def"}} 11 | require.Equal(t, "/abc/def/xyz", z.buildFullPath("/xyz/")) 12 | require.Equal(t, "/abc/def/xyz", z.buildFullPath("xyz")) 13 | require.Equal(t, "/abc/def/xyz", z.buildFullPath("////xyz////")) 14 | require.Equal(t, "/abc/def", z.buildFullPath("")) 15 | z = &zkDCS{config: &ZookeeperConfig{Namespace: "//abc//def/"}} 16 | require.Equal(t, "/abc/def/xyz", z.buildFullPath("/xyz/")) 17 | require.Equal(t, "/abc/def/xyz", z.buildFullPath("xyz")) 18 | require.Equal(t, "/abc/def/xyz", z.buildFullPath("////xyz////")) 19 | require.Equal(t, "/abc/def", z.buildFullPath("")) 20 | } 21 | -------------------------------------------------------------------------------- /internal/dcs/zk_tls.go: -------------------------------------------------------------------------------- 1 | package dcs 2 | 3 | import ( 4 | "crypto/tls" 5 | "crypto/x509" 6 | "net" 7 | "os" 8 | "time" 9 | 10 | "github.com/go-zookeeper/zk" 11 | ) 12 | 13 | func CreateTLSConfig(rootCAFile, certFile, keyFile string) (*tls.Config, error) { 14 | rootCABytes, err := os.ReadFile(rootCAFile) 15 | if err != nil { 16 | return nil, err 17 | } 18 | 19 | rootCA := x509.NewCertPool() 20 | ok := rootCA.AppendCertsFromPEM(rootCABytes) 21 | if !ok { 22 | return nil, err 23 | } 24 | 25 | cert, err := tls.LoadX509KeyPair(certFile, keyFile) 26 | if err != nil { 27 | return nil, err 28 | } 29 | 30 | return &tls.Config{ 31 | Certificates: []tls.Certificate{cert}, 32 | RootCAs: rootCA, 33 | }, nil 34 | } 35 | 36 | func GetTLSDialer(dialer *net.Dialer, tlsConfig *tls.Config) (zk.Dialer, error) { 37 | return func(network, address string, _ time.Duration) (net.Conn, error) { 38 | return tls.DialWithDialer(dialer, network, address, tlsConfig) 39 | }, nil 40 | } 41 | -------------------------------------------------------------------------------- /internal/log/log.go: -------------------------------------------------------------------------------- 1 | package log 2 | 3 | import ( 4 | "fmt" 5 | "log/syslog" 6 | "os" 7 | "os/signal" 8 | "strings" 9 | "sync" 10 | "syscall" 11 | "time" 12 | ) 13 | 14 | type Level int 15 | 16 | const ( 17 | DEBUG Level = iota 18 | INFO 19 | WARN 20 | ERROR 21 | FATAL 22 | ) 23 | 24 | const timeFormat = "2006-01-02T15:04:05Z07:00" 25 | 26 | func parseLevel(level string) (Level, error) { 27 | switch strings.ToLower(level) { 28 | case "debug": 29 | return DEBUG, nil 30 | case "info": 31 | return INFO, nil 32 | case "warn": 33 | return WARN, nil 34 | case "warning": 35 | return WARN, nil 36 | case "err": 37 | return ERROR, nil 38 | case "error": 39 | return ERROR, nil 40 | case "fatal": 41 | return FATAL, nil 42 | default: 43 | return 0, fmt.Errorf("unknown log level %q", level) 44 | } 45 | } 46 | 47 | func (lvl Level) String() string { 48 | switch lvl { 49 | case DEBUG: 50 | return "DEBUG" 51 | case INFO: 52 | return "INFO" 53 | case WARN: 54 | return "WARN" 55 | case ERROR: 56 | return "ERROR" 57 | case FATAL: 58 | return "FATAL" 59 | default: 60 | return "" 61 | } 62 | } 63 | 64 | type Logger struct { 65 | path string 66 | fh *os.File 67 | m sync.Mutex 68 | lvl Level 69 | } 70 | 71 | func Open(path, level string) (*Logger, error) { 72 | l := &Logger{path: path} 73 | lvl, err := parseLevel(level) 74 | if err != nil { 75 | return nil, err 76 | } 77 | l.lvl = lvl 78 | err = l.ReOpen() 79 | if err != nil { 80 | return nil, err 81 | } 82 | return l, nil 83 | } 84 | 85 | func (l *Logger) ReOpen() error { 86 | l.m.Lock() 87 | defer l.m.Unlock() 88 | if l.path == "" || l.path == "/dev/stderr" { 89 | l.fh = os.Stderr 90 | return nil 91 | } 92 | fh, err := os.OpenFile(l.path, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) 93 | if err != nil { 94 | return fmt.Errorf("failed to open log %s: %w", l.path, err) 95 | } 96 | if l.fh != nil { 97 | _ = l.fh.Close() 98 | } 99 | l.fh = fh 100 | return nil 101 | } 102 | 103 | func (l *Logger) ReOpenOnSignal(sig syscall.Signal, syslog *syslog.Writer) { 104 | sigs := make(chan os.Signal, 1) 105 | signal.Notify(sigs, sig) 106 | go func() { 107 | for { 108 | <-sigs 109 | err := l.ReOpen() 110 | if err != nil { 111 | WriteSysLogError(syslog, fmt.Sprintf("failed to reopen log file: %v", err)) 112 | } 113 | } 114 | }() 115 | } 116 | 117 | func (l *Logger) printf(lvl Level, msg string, args ...any) { 118 | if lvl < l.lvl { 119 | return 120 | } 121 | data := fmt.Sprintf("%s %s: ", time.Now().Format(timeFormat), lvl) + fmt.Sprintf(msg, args...) + "\n" 122 | l.m.Lock() 123 | _, _ = l.fh.Write([]byte(data)) 124 | l.m.Unlock() 125 | } 126 | 127 | func (l *Logger) Debug(msg string) { 128 | l.Debugf("%s", msg) 129 | } 130 | 131 | func (l *Logger) Info(msg string) { 132 | l.Infof("%s", msg) 133 | } 134 | 135 | func (l *Logger) Warn(msg string) { 136 | l.Warnf("%s", msg) 137 | } 138 | 139 | func (l *Logger) Error(msg string) { 140 | l.Errorf("%s", msg) 141 | } 142 | 143 | func (l *Logger) Fatal(msg string) { 144 | l.Fatalf("%s", msg) 145 | } 146 | 147 | func (l *Logger) Debugf(msg string, args ...any) { 148 | l.printf(DEBUG, msg, args...) 149 | } 150 | 151 | func (l *Logger) Infof(msg string, args ...any) { 152 | l.printf(INFO, msg, args...) 153 | } 154 | 155 | func (l *Logger) Warnf(msg string, args ...any) { 156 | l.printf(WARN, msg, args...) 157 | } 158 | 159 | func (l *Logger) Errorf(msg string, args ...any) { 160 | l.printf(ERROR, msg, args...) 161 | } 162 | 163 | func (l *Logger) Fatalf(msg string, args ...any) { 164 | l.printf(FATAL, msg, args...) 165 | } 166 | -------------------------------------------------------------------------------- /internal/log/syslog.go: -------------------------------------------------------------------------------- 1 | package log 2 | 3 | import ( 4 | "log/syslog" 5 | "os" 6 | ) 7 | 8 | func WriteSysLogInfo(syslog *syslog.Writer, msg string) { 9 | if syslog != nil { 10 | // nolint: errcheck 11 | syslog.Info(msg) 12 | } else { 13 | os.Stderr.WriteString(msg) 14 | } 15 | } 16 | 17 | func WriteSysLogError(syslog *syslog.Writer, msg string) { 18 | if syslog != nil { 19 | // nolint: errcheck 20 | syslog.Err(msg) 21 | } else { 22 | os.Stderr.WriteString(msg) 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /internal/mysql/commands.go: -------------------------------------------------------------------------------- 1 | package mysql 2 | 3 | const ( 4 | commandStatus = "status" 5 | ) 6 | 7 | var defaultCommands = map[string]string{ 8 | commandStatus: `service mysql status`, 9 | } 10 | -------------------------------------------------------------------------------- /internal/mysql/gtids/utils.go: -------------------------------------------------------------------------------- 1 | package gtids 2 | 3 | import ( 4 | gomysql "github.com/go-mysql-org/go-mysql/mysql" 5 | "github.com/google/uuid" 6 | ) 7 | 8 | func IsSlaveBehindOrEqual(slaveGtidSet, masterGtidSet GTIDSet) bool { 9 | return masterGtidSet.Contain(slaveGtidSet) || masterGtidSet.Equal(slaveGtidSet) 10 | } 11 | 12 | func IsSlaveAhead(slaveGtidSet, masterGtidSet GTIDSet) bool { 13 | return !IsSlaveBehindOrEqual(slaveGtidSet, masterGtidSet) 14 | } 15 | 16 | func IsSplitBrained(slaveGtidSet, masterGtidSet GTIDSet, masterUUID uuid.UUID) bool { 17 | mysqlSlaveGtidSet := slaveGtidSet.(*gomysql.MysqlGTIDSet) 18 | mysqlMasterGtidSet := masterGtidSet.(*gomysql.MysqlGTIDSet) 19 | for _, slaveSet := range mysqlSlaveGtidSet.Sets { 20 | masterSet, ok := mysqlMasterGtidSet.Sets[slaveSet.SID.String()] 21 | if !ok { 22 | return true 23 | } 24 | 25 | if masterSet.Contain(slaveSet) { 26 | continue 27 | } 28 | 29 | if masterSet.SID == masterUUID { 30 | continue 31 | } 32 | 33 | return true 34 | } 35 | 36 | return false 37 | } 38 | -------------------------------------------------------------------------------- /internal/mysql/gtids/wrapper.go: -------------------------------------------------------------------------------- 1 | package gtids 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/go-mysql-org/go-mysql/mysql" 7 | ) 8 | 9 | type GTIDSet = mysql.GTIDSet 10 | 11 | func ParseGtidSet(gtidset string) GTIDSet { 12 | parsed, err := mysql.ParseGTIDSet(mysql.MySQLFlavor, gtidset) 13 | if err != nil { 14 | panic(err) 15 | } 16 | return parsed 17 | } 18 | 19 | func GTIDDiff(replicaGTIDSet, sourceGTIDSet mysql.GTIDSet) (string, error) { 20 | mysqlReplicaGTIDSet := replicaGTIDSet.(*mysql.MysqlGTIDSet) 21 | mysqlSourceGTIDSet := sourceGTIDSet.(*mysql.MysqlGTIDSet) 22 | // check standard case 23 | diffWithSource := mysqlSourceGTIDSet.Clone().(*mysql.MysqlGTIDSet) 24 | err := diffWithSource.Minus(*mysqlReplicaGTIDSet) 25 | if err != nil { 26 | return "", err 27 | } 28 | 29 | // check reverse case 30 | diffWithReplica := mysqlReplicaGTIDSet.Clone().(*mysql.MysqlGTIDSet) 31 | err = diffWithReplica.Minus(*mysqlSourceGTIDSet) 32 | if err != nil { 33 | return "", err 34 | } 35 | 36 | if diffWithSource.String() == "" && diffWithReplica.String() == "" { 37 | return "replica gtid equal source", nil 38 | } 39 | 40 | if diffWithSource.String() != "" && diffWithReplica.String() == "" { 41 | return fmt.Sprintf("source ahead on: %s", diffWithSource.String()), nil 42 | } 43 | 44 | if diffWithSource.String() != "" && diffWithReplica.String() != "" { 45 | return fmt.Sprintf("split brain! source ahead on: %s; replica ahead on: %s", diffWithSource.String(), diffWithReplica.String()), nil 46 | } 47 | 48 | if diffWithSource.String() == "" && diffWithReplica.String() != "" { 49 | return fmt.Sprintf("replica ahead on: %s", diffWithReplica.String()), nil 50 | } 51 | 52 | return "", fmt.Errorf("an indefinite case was obtained") 53 | } 54 | -------------------------------------------------------------------------------- /internal/mysql/gtids/wrapper_test.go: -------------------------------------------------------------------------------- 1 | package gtids 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/require" 7 | ) 8 | 9 | func TestGTIDDiff(t *testing.T) { 10 | sourceGTID := ParseGtidSet("00000000-0000-0000-0000-000000000000:1-100,11111111-1111-1111-1111-111111111111:1-100") 11 | 12 | // equal 13 | replicaGTID := ParseGtidSet("00000000-0000-0000-0000-000000000000:1-100,11111111-1111-1111-1111-111111111111:1-100") 14 | diff, err := GTIDDiff(replicaGTID, sourceGTID) 15 | require.NoError(t, err) 16 | require.Equal(t, "replica gtid equal source", diff) 17 | 18 | replicaGTID = ParseGtidSet("11111111-1111-1111-1111-111111111111:1-100,00000000-0000-0000-0000-000000000000:1-100") 19 | diff, err = GTIDDiff(replicaGTID, sourceGTID) 20 | require.NoError(t, err) 21 | require.Equal(t, "replica gtid equal source", diff) 22 | 23 | // source ahead 24 | replicaGTID = ParseGtidSet("00000000-0000-0000-0000-000000000000:1-90,11111111-1111-1111-1111-111111111111:1-100") 25 | diff, err = GTIDDiff(replicaGTID, sourceGTID) 26 | require.NoError(t, err) 27 | require.Equal(t, "source ahead on: 00000000-0000-0000-0000-000000000000:91-100", diff) 28 | 29 | replicaGTID = ParseGtidSet("00000000-0000-0000-0000-000000000000:1-90,11111111-1111-1111-1111-111111111111:1-90") 30 | diff, err = GTIDDiff(replicaGTID, sourceGTID) 31 | require.NoError(t, err) 32 | require.Equal(t, "source ahead on: 00000000-0000-0000-0000-000000000000:91-100,11111111-1111-1111-1111-111111111111:91-100", diff) 33 | 34 | // replica ahead 35 | replicaGTID = ParseGtidSet("00000000-0000-0000-0000-000000000000:1-110,11111111-1111-1111-1111-111111111111:1-100") 36 | diff, err = GTIDDiff(replicaGTID, sourceGTID) 37 | require.NoError(t, err) 38 | require.Equal(t, "replica ahead on: 00000000-0000-0000-0000-000000000000:101-110", diff) 39 | 40 | // split brain 41 | replicaGTID = ParseGtidSet("00000000-0000-0000-0000-000000000000:1-90,11111111-1111-1111-1111-111111111111:1-110") 42 | diff, err = GTIDDiff(replicaGTID, sourceGTID) 43 | require.NoError(t, err) 44 | require.Equal(t, "split brain! source ahead on: 00000000-0000-0000-0000-000000000000:91-100; replica ahead on: 11111111-1111-1111-1111-111111111111:101-110", diff) 45 | 46 | replicaGTID = ParseGtidSet("00000000-0000-0000-0000-000000000000:1-100,22222222-2222-2222-2222-222222222222:1-110") 47 | diff, err = GTIDDiff(replicaGTID, sourceGTID) 48 | require.NoError(t, err) 49 | require.Equal(t, "split brain! source ahead on: 11111111-1111-1111-1111-111111111111:1-100; replica ahead on: 22222222-2222-2222-2222-222222222222:1-110", diff) 50 | } 51 | -------------------------------------------------------------------------------- /internal/mysql/switch_helper.go: -------------------------------------------------------------------------------- 1 | package mysql 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | 7 | "github.com/yandex/mysync/internal/config" 8 | ) 9 | 10 | type ISwitchHelper interface { 11 | GetPriorityChoiceMaxLag() time.Duration 12 | GetRequiredWaitSlaveCount([]string) int 13 | GetFailoverQuorum([]string) int 14 | CheckFailoverQuorum([]string, int) error 15 | IsOptimizationPhaseAllowed() bool 16 | } 17 | 18 | type SwitchHelper struct { 19 | priorityChoiceMaxLag time.Duration 20 | rplSemiSyncMasterWaitForSlaveCount int 21 | SemiSync bool 22 | } 23 | 24 | func NewSwitchHelper(config *config.Config) ISwitchHelper { 25 | priorityChoiceMaxLag := config.PriorityChoiceMaxLag 26 | if config.ASync { 27 | if config.AsyncAllowedLag > config.PriorityChoiceMaxLag { 28 | priorityChoiceMaxLag = config.AsyncAllowedLag 29 | } 30 | } 31 | return &SwitchHelper{ 32 | priorityChoiceMaxLag: priorityChoiceMaxLag, 33 | rplSemiSyncMasterWaitForSlaveCount: config.RplSemiSyncMasterWaitForSlaveCount, 34 | SemiSync: config.SemiSync, 35 | } 36 | } 37 | 38 | func (sh *SwitchHelper) GetPriorityChoiceMaxLag() time.Duration { 39 | return sh.priorityChoiceMaxLag 40 | } 41 | 42 | // GetRequiredWaitSlaveCount Dynamically calculated version of RplSemiSyncMasterWaitForSlaveCount. 43 | // This variable can be lower than hard-configured RplSemiSyncMasterWaitForSlaveCount 44 | // when some semi-sync replicas are dead. 45 | func (sh *SwitchHelper) GetRequiredWaitSlaveCount(activeNodes []string) int { 46 | wsc := min(len(activeNodes)/2, sh.rplSemiSyncMasterWaitForSlaveCount) 47 | return wsc 48 | } 49 | 50 | // GetFailoverQuorum Number of HA nodes to be alive to failover/switchover 51 | func (sh *SwitchHelper) GetFailoverQuorum(activeNodes []string) int { 52 | fq := max(len(activeNodes)-sh.GetRequiredWaitSlaveCount(activeNodes), 1) 53 | return fq 54 | } 55 | 56 | func (sh *SwitchHelper) CheckFailoverQuorum(activeNodes []string, permissibleSlaves int) error { 57 | if sh.SemiSync { 58 | failoverQuorum := sh.GetFailoverQuorum(activeNodes) 59 | if permissibleSlaves < failoverQuorum { 60 | return fmt.Errorf("no quorum, have %d replicas while %d is required", permissibleSlaves, failoverQuorum) 61 | } 62 | } else { 63 | if permissibleSlaves == 0 { 64 | return fmt.Errorf("no alive active replica found") 65 | } 66 | } 67 | return nil 68 | } 69 | 70 | func (sh *SwitchHelper) IsOptimizationPhaseAllowed() bool { 71 | return sh.SemiSync 72 | } 73 | -------------------------------------------------------------------------------- /internal/mysql/util.go: -------------------------------------------------------------------------------- 1 | package mysql 2 | 3 | import ( 4 | "slices" 5 | 6 | "github.com/go-sql-driver/mysql" 7 | ) 8 | 9 | var dubiousErrorNumbers = []uint16{ 10 | 1040, // Symbol: ER_CON_COUNT_ERROR; SQLSTATE: 08004 11 | 1129, // Symbol: ER_HOST_IS_BLOCKED; SQLSTATE: HY000 12 | 1130, // Symbol: ER_HOST_NOT_PRIVILEGED; SQLSTATE: HY000 13 | 1203, // Symbol: ER_TOO_MANY_USER_CONNECTIONS; SQLSTATE: 42000 14 | 3159, // Symbol: ER_SECURE_TRANSPORT_REQUIRED; SQLSTATE: HY000 15 | 1045, // Symbol: ER_ACCESS_DENIED_ERROR; SQLSTATE: 28000 16 | 1044, // Symbol: ER_DBACCESS_DENIED_ERROR; SQLSTATE: 42000 17 | 1698, // Symbol: ER_ACCESS_DENIED_NO_PASSWORD_ERROR; SQLSTATE: 28000 18 | } 19 | 20 | const ( 21 | channelDoesNotExists = 3074 // Symbol: ER_REPLICA_CHANNEL_DOES_NOT_EXIST; SQLSTATE: HY000 22 | tableDoesNotExists = 1146 // Symbol: ER_NO_SUCH_TABLE; SQLSTATE: 42S02 23 | ) 24 | 25 | // IsErrorDubious check that error may be caused by misconfiguration, mysync/scripts bugs 26 | // and not related to MySQL/network failure 27 | func IsErrorDubious(err error) bool { 28 | if err == nil { 29 | return false 30 | } 31 | mysqlErr, ok := err.(*mysql.MySQLError) 32 | if !ok { 33 | return false 34 | } 35 | return slices.Contains(dubiousErrorNumbers, mysqlErr.Number) 36 | } 37 | 38 | func IsErrorChannelDoesNotExists(err error) bool { 39 | if err == nil { 40 | return false 41 | } 42 | mysqlErr, ok := err.(*mysql.MySQLError) 43 | if !ok { 44 | return false 45 | } 46 | if mysqlErr.Number == channelDoesNotExists { 47 | return true 48 | } 49 | return false 50 | } 51 | 52 | func IsErrorTableDoesNotExists(err error) bool { 53 | if err == nil { 54 | return false 55 | } 56 | mysqlErr, ok := err.(*mysql.MySQLError) 57 | if !ok { 58 | return false 59 | } 60 | if mysqlErr.Number == tableDoesNotExists { 61 | return true 62 | } 63 | return false 64 | } 65 | -------------------------------------------------------------------------------- /internal/util/consts.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | type ExternalReplicationType string 4 | 5 | const ( 6 | Disabled ExternalReplicationType = "off" 7 | MyExternalReplication ExternalReplicationType = "external" 8 | ) 9 | -------------------------------------------------------------------------------- /internal/util/user.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "os" 5 | "slices" 6 | 7 | "github.com/shirou/gopsutil/v3/process" 8 | ) 9 | 10 | var notInformativeUsernames = []string{"root", "mysql"} 11 | 12 | func GuessWhoRunning() string { 13 | pid := os.Getppid() 14 | 15 | p, err := process.NewProcess(int32(pid)) 16 | if err != nil { 17 | return "" 18 | } 19 | 20 | for range 50 { 21 | if p == nil { 22 | return "unknown_dolphin" 23 | } 24 | 25 | p, err = p.Parent() 26 | if err != nil { 27 | return "unknown_sakila" 28 | } 29 | 30 | // Known issue: cross-compiled builds by default uses CGO_ENABLED="0" (aka static builds) 31 | // this may break user.LookupId() for LDAP/NIS users (user.UnknownUserError returned) 32 | username, err := p.Username() 33 | if err != nil { 34 | return "" 35 | } 36 | if !slices.Contains(notInformativeUsernames, username) { 37 | return username 38 | } 39 | } 40 | return "unknown" 41 | } 42 | -------------------------------------------------------------------------------- /internal/util/util.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "errors" 5 | "net" 6 | "os" 7 | "strconv" 8 | "strings" 9 | ) 10 | 11 | func JoinHostPort(addr string, port int) string { 12 | return net.JoinHostPort(addr, strconv.Itoa(port)) 13 | } 14 | 15 | // GetEnvVariable returns environment variable by name 16 | func GetEnvVariable(name, def string) string { 17 | if val, ok := os.LookupEnv(name); ok { 18 | return val 19 | } 20 | return def 21 | } 22 | 23 | // SelectNode returns host (from given list) starting specified match string 24 | // If match starts with ^ it's discarded (backward compatibility) 25 | func SelectNodes(hosts []string, match string) []string { 26 | match = strings.TrimPrefix(match, "^") 27 | res := make([]string, 0) 28 | for _, host := range hosts { 29 | if strings.HasPrefix(host, match) { 30 | res = append(res, host) 31 | } 32 | } 33 | return res 34 | } 35 | 36 | func TouchFile(fname string) error { 37 | _, err := os.Stat(fname) 38 | if os.IsNotExist(err) { 39 | err := os.WriteFile(fname, []byte(""), 0644) 40 | if err != nil { 41 | return err 42 | } 43 | } 44 | return nil 45 | } 46 | 47 | func RunParallel(f func(string) error, arguments []string) map[string]error { 48 | type pair struct { 49 | key string 50 | err error 51 | } 52 | errs := make(chan pair, len(arguments)) 53 | for _, argValue := range arguments { 54 | go func(dbname string) { 55 | errs <- pair{dbname, f(dbname)} 56 | }(argValue) 57 | } 58 | result := make(map[string]error) 59 | for range len(arguments) { 60 | pairValue := <-errs 61 | result[pairValue.key] = pairValue.err 62 | } 63 | return result 64 | } 65 | 66 | func CombineErrors(allErrors map[string]error) error { 67 | var errStr string 68 | for _, err := range allErrors { 69 | if err != nil { 70 | errStr += err.Error() + ";" 71 | } 72 | } 73 | if errStr != "" { 74 | return errors.New(errStr) 75 | } 76 | return nil 77 | } 78 | 79 | func FilterStrings(heap []string, cond func(s string) bool) []string { 80 | var ret []string 81 | for _, v := range heap { 82 | if cond(v) { 83 | ret = append(ret, v) 84 | } 85 | } 86 | return ret 87 | } 88 | -------------------------------------------------------------------------------- /mysync.arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/mysync/d23a6dae2589696cdc5938a3432410b19190efe6/mysync.arch.png -------------------------------------------------------------------------------- /tests/features/active_nodes.feature: -------------------------------------------------------------------------------- 1 | Feature: mysync saves quorum hosts in zk 2 | 3 | Scenario: active nodes works with add/delete hosts 4 | Given cluster environment is 5 | """ 6 | MYSYNC_FAILOVER=true 7 | MYSYNC_FAILOVER_DELAY=30s 8 | MYSYNC_FAILOVER_COOLDOWN=0s 9 | """ 10 | Given cluster is up and running 11 | And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds 12 | """ 13 | ["mysql1","mysql2","mysql3"] 14 | """ 15 | When host "mysql3" is deleted 16 | Then mysql host "mysql3" should become unavailable within "10" seconds 17 | Then zookeeper node "/test/manager" should match regexp 18 | """ 19 | .*mysql[12].* 20 | """ 21 | And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds 22 | """ 23 | ["mysql1","mysql2"] 24 | """ 25 | And zookeeper node "/test/master" should match regexp 26 | """ 27 | .*mysql1.* 28 | """ 29 | When host "mysql1" is stopped 30 | Then mysql host "mysql1" should become unavailable within "10" seconds 31 | When I wait for "10" seconds 32 | Then zookeeper node "/test/active_nodes" should match json_exactly 33 | """ 34 | ["mysql1","mysql2"] 35 | """ 36 | Then zookeeper node "/test/master" should match regexp within "30" seconds 37 | """ 38 | .*mysql2.* 39 | """ 40 | Then mysql host "mysql2" should be master 41 | And mysql host "mysql2" should be writable 42 | And zookeeper node "/test/active_nodes" should match json_exactly within "10" seconds 43 | """ 44 | ["mysql2"] 45 | """ 46 | When host "mysql3" is added 47 | Then mysql host "mysql3" should become available within "10" seconds 48 | And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds 49 | """ 50 | ["mysql2","mysql3"] 51 | """ 52 | When host "mysql1" is started 53 | Then mysql host "mysql1" should become available within "10" seconds 54 | And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds 55 | """ 56 | ["mysql1","mysql2","mysql3"] 57 | """ 58 | 59 | Scenario: active nodes works with splitbrain 60 | Given cluster environment is 61 | """ 62 | MYSYNC_FAILOVER=true 63 | MYSYNC_FAILOVER_DELAY=0s 64 | MYSYNC_FAILOVER_COOLDOWN=0s 65 | """ 66 | Given cluster is up and running 67 | And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds 68 | """ 69 | ["mysql1","mysql2","mysql3"] 70 | """ 71 | And zookeeper node "/test/master" should match regexp 72 | """ 73 | .*mysql1.* 74 | """ 75 | When I run command on host "mysql1" 76 | """ 77 | mysync maint on 78 | """ 79 | Then command return code should be "0" 80 | And I wait for "5" seconds 81 | When I run SQL on mysql host "mysql2" 82 | """ 83 | SET GLOBAL READ_ONLY=0; 84 | """ 85 | When I run SQL on mysql host "mysql2" 86 | """ 87 | CREATE TABLE mysql.test_table1 ( 88 | value VARCHAR(30) 89 | ) ENGINE=INNODB; 90 | """ 91 | When I run command on host "mysql1" 92 | """ 93 | mysync maint off 94 | """ 95 | Then command return code should be "0" 96 | When I wait for "10" seconds 97 | Then zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds 98 | """ 99 | ["mysql1","mysql3"] 100 | """ 101 | 102 | Scenario: active nodes works with broken replication 103 | Given cluster environment is 104 | """ 105 | MYSYNC_FAILOVER=true 106 | MYSYNC_FAILOVER_DELAY=0s 107 | MYSYNC_FAILOVER_COOLDOWN=0s 108 | """ 109 | Given cluster is up and running 110 | And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds 111 | """ 112 | ["mysql1","mysql2","mysql3"] 113 | """ 114 | And zookeeper node "/test/master" should match regexp 115 | """ 116 | .*mysql1.* 117 | """ 118 | When I break replication on host "mysql2" 119 | Then zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds 120 | """ 121 | ["mysql1","mysql3"] 122 | """ 123 | 124 | Scenario: active nodes honors inactivation timeout 125 | Given cluster environment is 126 | """ 127 | MYSYNC_INACTIVATION_DELAY=40s 128 | """ 129 | Given cluster is up and running 130 | And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds 131 | """ 132 | ["mysql1","mysql2","mysql3"] 133 | """ 134 | And zookeeper node "/test/master" should match regexp 135 | """ 136 | .*mysql1.* 137 | """ 138 | When host "mysql2" is detached from the network 139 | Then zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds 140 | """ 141 | ["mysql1","mysql2","mysql3"] 142 | """ 143 | When I wait for "30" seconds 144 | Then zookeeper node "/test/active_nodes" should match json_exactly 145 | """ 146 | ["mysql1","mysql2","mysql3"] 147 | """ 148 | Then zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds 149 | """ 150 | ["mysql1","mysql3"] 151 | """ 152 | When host "mysql2" is attached to the network 153 | Then zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds 154 | """ 155 | ["mysql1","mysql2","mysql3"] 156 | """ 157 | -------------------------------------------------------------------------------- /tests/features/crash_recovery.feature: -------------------------------------------------------------------------------- 1 | Feature: resetup after crash recovery 2 | 3 | Scenario: replica resetup after crash recovery 4 | Given cluster environment is 5 | """ 6 | MYSYNC_RESETUP_CRASHED_HOSTS=true 7 | """ 8 | Given cluster is up and running 9 | Then mysql host "mysql1" should be master 10 | When mysql on host "mysql2" is killed 11 | Then mysql host "mysql2" should become unavailable within "60" seconds 12 | When mysql on host "mysql2" is started 13 | Then mysql host "mysql2" should become available within "60" seconds 14 | Then host "mysql2" should have file "/tmp/mysync.resetup" within "60" seconds 15 | 16 | Scenario: force failover for crashed master 17 | Given cluster environment is 18 | """ 19 | MYSYNC_RESETUP_CRASHED_HOSTS=true 20 | MYSYNC_FAILOVER=true 21 | MYSYNC_FAILOVER_DELAY=120s 22 | """ 23 | Given cluster is up and running 24 | Then mysql host "mysql1" should be master 25 | And zookeeper node "/test/active_nodes" should match json_exactly within "20" seconds 26 | """ 27 | ["mysql1","mysql2","mysql3"] 28 | """ 29 | When mysql on host "mysql1" is killed 30 | Then mysql host "mysql1" should become unavailable within "60" seconds 31 | When mysql on host "mysql1" is started 32 | Then mysql host "mysql1" should become available within "60" seconds 33 | Then zookeeper node "/test/last_switch" should match json within "30" seconds 34 | """ 35 | { 36 | "cause": "auto", 37 | "from": "mysql1", 38 | "result": { 39 | "ok": true 40 | } 41 | } 42 | """ 43 | And zookeeper node "/test/master" should match regexp 44 | """ 45 | .*mysql[23].* 46 | """ 47 | Then host "mysql1" should have file "/tmp/mysync.resetup" within "60" seconds 48 | 49 | Scenario: cascade failures within cooldown should not leave master offline 50 | Given cluster environment is 51 | """ 52 | MYSYNC_RESETUP_CRASHED_HOSTS=true 53 | MYSYNC_FAILOVER=true 54 | MYSYNC_FAILOVER_DELAY=180s 55 | MYSYNC_FAILOVER_COOLDOWN=600s 56 | """ 57 | Given cluster is up and running 58 | Then mysql host "mysql1" should be master 59 | And zookeeper node "/test/active_nodes" should match json_exactly within "20" seconds 60 | """ 61 | ["mysql1","mysql2","mysql3"] 62 | """ 63 | When mysql on host "mysql1" is killed 64 | Then mysql host "mysql1" should become unavailable within "60" seconds 65 | When mysql on host "mysql1" is started 66 | Then mysql host "mysql1" should become available within "60" seconds 67 | And zookeeper node "/test/last_switch" should match json within "30" seconds 68 | """ 69 | { 70 | "cause": "auto", 71 | "from": "mysql1", 72 | "result": { 73 | "ok": true 74 | } 75 | } 76 | """ 77 | And host "mysql1" should have file "/tmp/mysync.resetup" within "60" seconds 78 | 79 | When I get zookeeper node "/test/master" 80 | And I save zookeeper query result as "new_master" 81 | Then mysql host "{{.new_master}}" should be master 82 | When mysql on host "{{.new_master}}" is killed 83 | Then mysql host "{{.new_master}}" should become unavailable within "60" seconds 84 | When mysql on host "{{.new_master}}" is started 85 | Then mysql host "{{.new_master}}" should become available within "60" seconds 86 | # As cooldown not elapsed 87 | Then mysql host "{{.new_master}}" should be master 88 | And mysql host "{{.new_master}}" should become writable within "15" seconds 89 | And mysql host "{{.new_master}}" should be online within "15" seconds 90 | -------------------------------------------------------------------------------- /tests/features/events_reenable.84.feature: -------------------------------------------------------------------------------- 1 | Feature: mysync reenables slaveside disabled events 2 | 3 | Scenario: reenable events after switchover 4 | Given cluster is up and running 5 | Then mysql host "mysql1" should be master 6 | And mysql host "mysql2" should be replica of "mysql1" 7 | And mysql replication on host "mysql2" should run fine within "5" seconds 8 | And mysql host "mysql3" should be replica of "mysql1" 9 | And mysql replication on host "mysql3" should run fine within "5" seconds 10 | And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds 11 | """ 12 | ["mysql1","mysql2","mysql3"] 13 | """ 14 | When I run SQL on mysql host "mysql1" 15 | """ 16 | CREATE TABLE mysql.mdb_repl_mon( 17 | ts TIMESTAMP(3) 18 | ) ENGINE=INNODB; 19 | """ 20 | And I run SQL on mysql host "mysql1" 21 | """ 22 | INSERT INTO mysql.mdb_repl_mon VALUES(CURRENT_TIMESTAMP(3)); 23 | """ 24 | And I run SQL on mysql host "mysql1" 25 | """ 26 | CREATE EVENT mysql.mdb_repl_mon_event 27 | ON SCHEDULE EVERY 1 SECOND 28 | DO UPDATE mysql.mdb_repl_mon SET ts = CURRENT_TIMESTAMP(3); 29 | """ 30 | And I run SQL on mysql host "mysql1" 31 | """ 32 | CREATE DEFINER = "user123" EVENT mysql.event_test_definer 33 | ON SCHEDULE EVERY 1 SECOND 34 | DO UPDATE mysql.mdb_repl_mon SET ts = CURRENT_TIMESTAMP(3); 35 | """ 36 | And I run SQL on mysql host "mysql1" 37 | """ 38 | CREATE DEFINER = "user456@host789" EVENT mysql.event_test_definer_with_host 39 | ON SCHEDULE EVERY 1 SECOND 40 | DO UPDATE mysql.mdb_repl_mon SET ts = CURRENT_TIMESTAMP(3); 41 | """ 42 | Then mysql host "mysql1" should have event "mysql.mdb_repl_mon_event" in status "ENABLED" 43 | And mysql host "mysql1" should have event "mysql.event_test_definer" in status "ENABLED" 44 | And mysql host "mysql1" should have event "mysql.event_test_definer_with_host" in status "ENABLED" 45 | And mysql host "mysql2" should have event "mysql.mdb_repl_mon_event" in status "REPLICA_SIDE_DISABLED" within "10" seconds 46 | And mysql host "mysql2" should have event "mysql.event_test_definer" in status "REPLICA_SIDE_DISABLED" within "10" seconds 47 | And mysql host "mysql2" should have event "mysql.event_test_definer_with_host" in status "REPLICA_SIDE_DISABLED" within "10" seconds 48 | When I run command on host "mysql1" 49 | """ 50 | mysync switch --to mysql2 --wait=0s 51 | """ 52 | Then command return code should be "0" 53 | Then zookeeper node "/test/last_switch" should match json within "30" seconds 54 | """ 55 | { 56 | "from": "", 57 | "to": "mysql2", 58 | "result": { 59 | "ok": true 60 | } 61 | } 62 | 63 | """ 64 | Then mysql host "mysql2" should be master 65 | And mysql host "mysql1" should be replica of "mysql2" 66 | 67 | Then mysql host "mysql2" should have event "mysql.mdb_repl_mon_event" in status "ENABLED" 68 | And mysql host "mysql2" should have event "mysql.event_test_definer" in status "ENABLED" 69 | And mysql host "mysql2" should have event "mysql.event_test_definer_with_host" in status "ENABLED" 70 | And mysql host "mysql1" should have event "mysql.mdb_repl_mon_event" in status "REPLICA_SIDE_DISABLED" within "10" seconds 71 | And mysql host "mysql1" should have event "mysql.event_test_definer" in status "REPLICA_SIDE_DISABLED" within "10" seconds 72 | And mysql host "mysql1" should have event "mysql.event_test_definer_with_host" in status "REPLICA_SIDE_DISABLED" within "10" seconds 73 | And mysql host "mysql2" should have event "mysql.event_test_definer" of definer "user123@%" 74 | And mysql host "mysql2" should have event "mysql.event_test_definer_with_host" of definer "user456@host789" 75 | -------------------------------------------------------------------------------- /tests/features/events_reenable.feature: -------------------------------------------------------------------------------- 1 | Feature: mysync reenables slaveside disabled events 2 | 3 | Scenario: reenable events after switchover 4 | Given cluster is up and running 5 | Then mysql host "mysql1" should be master 6 | And mysql host "mysql2" should be replica of "mysql1" 7 | And mysql replication on host "mysql2" should run fine within "5" seconds 8 | And mysql host "mysql3" should be replica of "mysql1" 9 | And mysql replication on host "mysql3" should run fine within "5" seconds 10 | And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds 11 | """ 12 | ["mysql1","mysql2","mysql3"] 13 | """ 14 | When I run SQL on mysql host "mysql1" 15 | """ 16 | CREATE TABLE mysql.mdb_repl_mon( 17 | ts TIMESTAMP(3) 18 | ) ENGINE=INNODB; 19 | """ 20 | And I run SQL on mysql host "mysql1" 21 | """ 22 | INSERT INTO mysql.mdb_repl_mon VALUES(CURRENT_TIMESTAMP(3)); 23 | """ 24 | And I run SQL on mysql host "mysql1" 25 | """ 26 | CREATE EVENT mysql.mdb_repl_mon_event 27 | ON SCHEDULE EVERY 1 SECOND 28 | DO UPDATE mysql.mdb_repl_mon SET ts = CURRENT_TIMESTAMP(3); 29 | """ 30 | And I run SQL on mysql host "mysql1" 31 | """ 32 | CREATE DEFINER = "user123" EVENT mysql.event_test_definer 33 | ON SCHEDULE EVERY 1 SECOND 34 | DO UPDATE mysql.mdb_repl_mon SET ts = CURRENT_TIMESTAMP(3); 35 | """ 36 | And I run SQL on mysql host "mysql1" 37 | """ 38 | CREATE DEFINER = "user456@host789" EVENT mysql.event_test_definer_with_host 39 | ON SCHEDULE EVERY 1 SECOND 40 | DO UPDATE mysql.mdb_repl_mon SET ts = CURRENT_TIMESTAMP(3); 41 | """ 42 | Then mysql host "mysql1" should have event "mysql.mdb_repl_mon_event" in status "ENABLED" 43 | And mysql host "mysql1" should have event "mysql.event_test_definer" in status "ENABLED" 44 | And mysql host "mysql1" should have event "mysql.event_test_definer_with_host" in status "ENABLED" 45 | And mysql host "mysql2" should have event "mysql.mdb_repl_mon_event" in status "SLAVESIDE_DISABLED" within "10" seconds 46 | And mysql host "mysql2" should have event "mysql.event_test_definer" in status "SLAVESIDE_DISABLED" within "10" seconds 47 | And mysql host "mysql2" should have event "mysql.event_test_definer_with_host" in status "SLAVESIDE_DISABLED" within "10" seconds 48 | When I run command on host "mysql1" 49 | """ 50 | mysync switch --to mysql2 --wait=0s 51 | """ 52 | Then command return code should be "0" 53 | Then zookeeper node "/test/last_switch" should match json within "30" seconds 54 | """ 55 | { 56 | "from": "", 57 | "to": "mysql2", 58 | "result": { 59 | "ok": true 60 | } 61 | } 62 | 63 | """ 64 | Then mysql host "mysql2" should be master 65 | And mysql host "mysql1" should be replica of "mysql2" 66 | 67 | Then mysql host "mysql2" should have event "mysql.mdb_repl_mon_event" in status "ENABLED" 68 | And mysql host "mysql2" should have event "mysql.event_test_definer" in status "ENABLED" 69 | And mysql host "mysql2" should have event "mysql.event_test_definer_with_host" in status "ENABLED" 70 | And mysql host "mysql1" should have event "mysql.mdb_repl_mon_event" in status "SLAVESIDE_DISABLED" within "10" seconds 71 | And mysql host "mysql1" should have event "mysql.event_test_definer" in status "SLAVESIDE_DISABLED" within "10" seconds 72 | And mysql host "mysql1" should have event "mysql.event_test_definer_with_host" in status "SLAVESIDE_DISABLED" within "10" seconds 73 | And mysql host "mysql2" should have event "mysql.event_test_definer" of definer "user123@%" 74 | And mysql host "mysql2" should have event "mysql.event_test_definer_with_host" of definer "user456@host789" 75 | -------------------------------------------------------------------------------- /tests/features/free_space.feature: -------------------------------------------------------------------------------- 1 | Feature: free space 2 | Background: 3 | Given cluster environment is 4 | """ 5 | MYSYNC_CRITICAL_DISK_USAGE=95 6 | MYSYNC_KEEP_SUPER_WRITABLE_ON_CRITICAL_DISK_USAGE=false 7 | """ 8 | 9 | Scenario: master become read only on low free space 10 | Given cluster is up and running 11 | Then zookeeper node "/test/active_nodes" should match json_exactly within "20" seconds 12 | """ 13 | ["mysql1","mysql2","mysql3"] 14 | """ 15 | And mysql host "mysql1" should be master 16 | And mysql host "mysql1" should be writable 17 | When I set used space on host "mysql1" to 99% 18 | Then mysql host "mysql1" should become read only within "30" seconds 19 | When I set used space on host "mysql1" to 80% 20 | Then mysql host "mysql1" should become writable within "30" seconds 21 | 22 | Scenario: master become read only no super on low free space 23 | Given cluster environment is 24 | """ 25 | MYSYNC_CRITICAL_DISK_USAGE=95 26 | MYSYNC_KEEP_SUPER_WRITABLE_ON_CRITICAL_DISK_USAGE=true 27 | MYSYNC_FAILOVER=false 28 | """ 29 | Given cluster is up and running 30 | Then mysql host "mysql1" should be master 31 | And mysql host "mysql1" should be writable 32 | When I set used space on host "mysql1" to 99% 33 | Then mysql host "mysql1" should become read only no super within "30" seconds 34 | When mysql on host "mysql1" is restarted 35 | Then mysql host "mysql1" should become available within "60" seconds 36 | And mysql host "mysql1" should become read only no super within "30" seconds 37 | When I set used space on host "mysql1" to 80% 38 | Then mysql host "mysql1" should become writable within "30" seconds 39 | 40 | Scenario: single replica overflow should not make master read only 41 | Given cluster is up and running 42 | Then mysql host "mysql1" should be master 43 | And mysql host "mysql1" should be writable 44 | When I set used space on host "mysql2" to 99% 45 | And I wait for "15" seconds 46 | Then mysql host "mysql1" should be writable 47 | 48 | Scenario: all replicas overflow should make master read only 49 | Given cluster is up and running 50 | Then mysql host "mysql1" should be master 51 | And mysql host "mysql1" should be writable 52 | When I set used space on host "mysql2" to 99% 53 | When I set used space on host "mysql3" to 99% 54 | Then mysql host "mysql1" should become read only within "30" seconds 55 | When I set used space on host "mysql2" to 80% 56 | Then mysql host "mysql1" should become writable within "30" seconds 57 | -------------------------------------------------------------------------------- /tests/features/host_discovery.feature: -------------------------------------------------------------------------------- 1 | Feature: mysync use keys in zk properly 2 | 3 | Scenario: dynamic host resolve over zookeeper works 4 | Given cluster is up and running 5 | When I run command on host "mysql1" 6 | """ 7 | mysync info 8 | """ 9 | Then command return code should be "0" 10 | When zookeeper node "/test/ha_nodes/mysql1" should match regexp within "10" seconds 11 | """ 12 | """ 13 | When zookeeper node "/test/ha_nodes/mysql2" should match regexp within "10" seconds 14 | """ 15 | """ 16 | When zookeeper node "/test/ha_nodes/mysql3" should match regexp within "10" seconds 17 | """ 18 | """ 19 | When host "mysql2" is deleted 20 | Then zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds 21 | """ 22 | ["mysql1","mysql3"] 23 | """ 24 | When I run command on host "mysql1" 25 | """ 26 | mysync switch --to mysql2 27 | """ 28 | Then command return code should be "1" 29 | And command output should match regexp 30 | """ 31 | .*no HA-nodes matching 'mysql2'.* 32 | """ 33 | 34 | Scenario: mysync does not perform changes on not HA nodes 35 | Given cluster environment is 36 | """ 37 | MYSYNC_FAILOVER=true 38 | """ 39 | Given cluster is up and running 40 | Then zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds 41 | """ 42 | ["mysql1","mysql2","mysql3"] 43 | """ 44 | When I change replication source on host "mysql3" to "mysql1" 45 | Then zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds 46 | """ 47 | ["mysql1","mysql2"] 48 | """ 49 | When host "mysql1" is stopped 50 | Then mysql host "mysql1" should become unavailable within "10" seconds 51 | Then zookeeper node "/test/manager" should match regexp within "10" seconds 52 | """ 53 | .*mysql[23].* 54 | """ 55 | Then zookeeper node "/test/last_switch" should match json within "30" seconds 56 | """ 57 | { 58 | "cause": "auto", 59 | "from": "mysql1", 60 | "result": { 61 | "ok": true 62 | } 63 | } 64 | """ 65 | And zookeeper node "/test/master" should match regexp 66 | """ 67 | .*mysql2.* 68 | """ 69 | And mysql host "mysql3" should be replica of "mysql1" 70 | When host "mysql2" is stopped 71 | Then mysql host "mysql2" should become unavailable within "10" seconds 72 | And mysql host "mysql3" should be replica of "mysql1" 73 | -------------------------------------------------------------------------------- /tests/features/host_management.feature: -------------------------------------------------------------------------------- 1 | Feature: update host topology using CLI 2 | 3 | Scenario: removing and adding HA host via CLI works 4 | Given cluster is up and running 5 | Then zookeeper node "/test/active_nodes" should match json_exactly within "20" seconds 6 | """ 7 | ["mysql1","mysql2","mysql3"] 8 | """ 9 | When I run command on host "mysql1" 10 | """ 11 | mysync host remove mysql3 12 | """ 13 | Then command return code should be "1" 14 | When host "mysql3" is stopped 15 | When I run command on host "mysql1" 16 | """ 17 | mysync host remove mysql3 18 | """ 19 | Then command return code should be "0" 20 | Then zookeeper node "/test/ha_nodes/mysql3" should not exist 21 | When I run command on host "mysql1" 22 | """ 23 | mysync host add mysql3 24 | """ 25 | Then command return code should be "1" 26 | When host "mysql3" is started 27 | Then mysql host "mysql3" should become available within "20" seconds 28 | When I run command on host "mysql1" 29 | """ 30 | mysync host add mysql3 31 | """ 32 | Then command return code should be "0" 33 | Then zookeeper node "/test/ha_nodes/mysql3" should exist 34 | 35 | Scenario: removing and adding HA host via CLI works 36 | Given cluster is up and running 37 | Then zookeeper node "/test/active_nodes" should match json_exactly within "20" seconds 38 | """ 39 | ["mysql1","mysql2","mysql3"] 40 | """ 41 | When I run command on host "mysql3" 42 | """ 43 | mysync host add mysql2 --stream-from mysql1 44 | """ 45 | Then command return code should be "0" 46 | Then zookeeper node "/test/ha_nodes/mysql2" should not exist 47 | Then zookeeper node "/test/cascade_nodes/mysql2" should exist 48 | 49 | # loops are forbidden: 50 | When I run command on host "mysql1" 51 | """ 52 | mysync host add mysql2 --stream-from mysql2 53 | """ 54 | Then command return code should be "1" 55 | # loops are forbidden: 56 | When I run command on host "mysql1" 57 | """ 58 | mysync host add mysql1 --stream-from mysql2 59 | """ 60 | Then command return code should be "1" 61 | # master cannot be cascade: 62 | When I run command on host "mysql1" 63 | """ 64 | mysync host add mysql1 --stream-from mysql3 65 | """ 66 | Then command return code should be "1" 67 | -------------------------------------------------------------------------------- /tests/features/manager_switchover.feature: -------------------------------------------------------------------------------- 1 | Feature: manager switchover 2 | Scenario: manager loss quorum than release lock 3 | Given cluster environment is 4 | """ 5 | MANAGER_ELECTION_DELAY_AFTER_QUORUM_LOSS=180s 6 | MANAGER_LOCK_ACQUIRE_DELAY_AFTER_QUORUM_LOSS=180s 7 | """ 8 | And cluster is up and running 9 | 10 | When I get zookeeper node "/test/manager" 11 | And I save zookeeper query result as "manager" 12 | 13 | When I run command on host "{{.manager.hostname}}" 14 | """ 15 | mysync switch --from "{{.manager.hostname}}" 16 | """ 17 | Then command return code should be "0" 18 | 19 | When I get zookeeper node "/test/master" 20 | And I save zookeeper query result as "master" 21 | 22 | Then mysql host "{{.manager.hostname}}" should be replica of "{{.master}}" 23 | 24 | When host "{{.manager.hostname}}" is detached from the user network 25 | Then mysql host "{{.manager.hostname}}" should become unavailable within "10" seconds 26 | 27 | Then zookeeper node "/test/manager" should match regexp within "30" seconds 28 | """ 29 | .*{{.manager.hostname}}.* 30 | """ 31 | Then zookeeper node "/test/manager" should not match regexp within "300" seconds 32 | """ 33 | .*{{.manager.hostname}}.* 34 | """ 35 | 36 | Scenario: manager loss quorum near master that`s why manager dont release lock 37 | Given cluster is up and running 38 | 39 | When I get zookeeper node "/test/manager" 40 | And I save zookeeper query result as "manager" 41 | 42 | When I run command on host "{{.manager.hostname}}" 43 | """ 44 | mysync switch --to "{{.manager.hostname}}" 45 | """ 46 | Then command return code should be "0" 47 | 48 | When host "{{.manager.hostname}}" is detached from the user network 49 | Then mysql host "{{.manager.hostname}}" should become unavailable within "10" seconds 50 | 51 | Then I wait for "300" seconds 52 | 53 | Then zookeeper node "/test/manager" should match regexp 54 | """ 55 | .*{{.manager.hostname}}.* 56 | """ 57 | 58 | Scenario: manager switch is off 59 | Given cluster environment is 60 | """ 61 | MANAGER_SWITCHOVER=false 62 | """ 63 | And cluster is up and running 64 | 65 | When I get zookeeper node "/test/manager" 66 | And I save zookeeper query result as "manager" 67 | 68 | When I run command on host "{{.manager.hostname}}" 69 | """ 70 | mysync switch --from "{{.manager.hostname}}" 71 | """ 72 | Then command return code should be "0" 73 | 74 | When I get zookeeper node "/test/master" 75 | And I save zookeeper query result as "master" 76 | 77 | Then mysql host "{{.manager.hostname}}" should be replica of "{{.master}}" 78 | 79 | When host "{{.manager.hostname}}" is detached from the user network 80 | Then mysql host "{{.manager.hostname}}" should become unavailable within "10" seconds 81 | 82 | Then I wait for "300" seconds 83 | 84 | Then zookeeper node "/test/manager" should match regexp 85 | """ 86 | .*{{.manager.hostname}}.* 87 | """ 88 | -------------------------------------------------------------------------------- /tests/features/offline_mode.84.feature: -------------------------------------------------------------------------------- 1 | Feature: offline mode for lagging replicas 2 | 3 | Scenario: mysync switches replicas to offline mode and back when slave lags 4 | Given cluster is up and running 5 | Then mysql host "mysql1" should be master 6 | And mysql host "mysql2" should be replica of "mysql1" 7 | And mysql host "mysql3" should be replica of "mysql1" 8 | 9 | Then mysql host "mysql1" should be online within "10" seconds 10 | And mysql host "mysql2" should be online within "10" seconds 11 | And mysql host "mysql3" should be online within "10" seconds 12 | 13 | #When I break replication on host "mysql3" 14 | And I run SQL on mysql host "mysql3" 15 | """ 16 | STOP REPLICA; CHANGE REPLICATION SOURCE TO SOURCE_DELAY = 300 FOR CHANNEL ''; START REPLICA 17 | """ 18 | And I run SQL on mysql host "mysql1" 19 | """ 20 | CREATE TABLE IF NOT EXISTS mysql.test_table1 ( 21 | value VARCHAR(30) 22 | ) 23 | """ 24 | And I run SQL on mysql host "mysql1" 25 | """ 26 | INSERT INTO mysql.test_table1 VALUES ("A"), ("B"), ("C") 27 | """ 28 | Then mysql host "mysql3" should be offline within "15" seconds 29 | When I run SQL on mysql host "mysql3" 30 | """ 31 | STOP REPLICA; CHANGE REPLICATION SOURCE TO SOURCE_DELAY = 0 FOR CHANNEL ''; START REPLICA 32 | """ 33 | Then mysql host "mysql3" should be online within "10" seconds 34 | -------------------------------------------------------------------------------- /tests/features/offline_mode.feature: -------------------------------------------------------------------------------- 1 | Feature: offline mode for lagging replicas 2 | 3 | Scenario: mysync switches replicas to offline mode and back when slave lags 4 | Given cluster is up and running 5 | Then mysql host "mysql1" should be master 6 | And mysql host "mysql2" should be replica of "mysql1" 7 | And mysql host "mysql3" should be replica of "mysql1" 8 | 9 | Then mysql host "mysql1" should be online within "10" seconds 10 | And mysql host "mysql2" should be online within "10" seconds 11 | And mysql host "mysql3" should be online within "10" seconds 12 | 13 | #When I break replication on host "mysql3" 14 | And I run SQL on mysql host "mysql3" 15 | """ 16 | STOP SLAVE; CHANGE MASTER TO MASTER_DELAY = 300 FOR CHANNEL ''; START SLAVE 17 | """ 18 | And I run SQL on mysql host "mysql1" 19 | """ 20 | CREATE TABLE IF NOT EXISTS mysql.test_table1 ( 21 | value VARCHAR(30) 22 | ) 23 | """ 24 | And I run SQL on mysql host "mysql1" 25 | """ 26 | INSERT INTO mysql.test_table1 VALUES ("A"), ("B"), ("C") 27 | """ 28 | Then mysql host "mysql3" should be offline within "15" seconds 29 | When I run SQL on mysql host "mysql3" 30 | """ 31 | STOP SLAVE; CHANGE MASTER TO MASTER_DELAY = 0 FOR CHANNEL ''; START SLAVE 32 | """ 33 | Then mysql host "mysql3" should be online within "10" seconds 34 | -------------------------------------------------------------------------------- /tests/features/priority.feature: -------------------------------------------------------------------------------- 1 | Feature: host priority 2 | 3 | Scenario: CLI works 4 | Given cluster is up and running 5 | And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds 6 | """ 7 | ["mysql1","mysql2","mysql3"] 8 | """ 9 | 10 | When I run command on host "mysql3" 11 | """ 12 | mysync host add mysql2 --priority 5 13 | """ 14 | Then command return code should be "0" 15 | Then zookeeper node "/test/ha_nodes/mysql2" should match json within "5" seconds 16 | """ 17 | { "priority": 5 } 18 | """ 19 | When I run command on host "mysql3" 20 | """ 21 | mysync host add mysql3 --priority 10 22 | """ 23 | Then command return code should be "0" 24 | Then zookeeper node "/test/ha_nodes/mysql3" should match json within "5" seconds 25 | """ 26 | { "priority": 10 } 27 | """ 28 | When I run command on host "mysql3" 29 | """ 30 | mysync host add mysql3 --priority -10 31 | """ 32 | Then command return code should be "1" 33 | And command output should match regexp 34 | """ 35 | .*priority must be >= 0.* 36 | """ 37 | 38 | Scenario Outline: Switchover chooses replica with greater priority 39 | And cluster is up and running 40 | Then mysql host "mysql1" should be master 41 | 42 | And mysql host "mysql2" should be replica of "mysql1" 43 | And mysql replication on host "mysql2" should run fine within "5" seconds 44 | And mysql host "mysql3" should be replica of "mysql1" 45 | And mysql replication on host "mysql3" should run fine within "5" seconds 46 | And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds 47 | """ 48 | ["mysql1","mysql2","mysql3"] 49 | """ 50 | 51 | When I run command on host "mysql3" 52 | """ 53 | mysync host add --priority 54 | """ 55 | Then command return code should be "0" 56 | Then zookeeper node "/test/ha_nodes/" should match json within "5" seconds 57 | """ 58 | { "priority": } 59 | """ 60 | When I run command on host "mysql2" 61 | """ 62 | mysync switch --from mysql1 --wait=0s 63 | """ 64 | Then command return code should be "0" 65 | Then zookeeper node "/test/last_switch" should match json within "30" seconds 66 | """ 67 | { 68 | "from": "mysql1", 69 | "result": { 70 | "ok": true 71 | } 72 | } 73 | """ 74 | Then mysql host "" should be master 75 | And mysql host "" should be writable 76 | Examples: 77 | | priority | host | new_master | 78 | | 10 | mysql2 | mysql2 | 79 | | 10 | mysql3 | mysql3 | 80 | 81 | Scenario: Switchover ignores cascade replica 82 | Given cluster environment is 83 | """ 84 | MYSYNC_FAILOVER=true 85 | MYSYNC_FAILOVER_DELAY=1s 86 | MYSYNC_STREAM_FROM_REASONABLE_LAG=1s 87 | """ 88 | And cluster is up and running 89 | Then mysql host "mysql1" should be master 90 | 91 | And mysql host "mysql2" should be replica of "mysql1" 92 | And mysql replication on host "mysql2" should run fine within "5" seconds 93 | And mysql host "mysql3" should be replica of "mysql1" 94 | And mysql replication on host "mysql3" should run fine within "5" seconds 95 | And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds 96 | """ 97 | ["mysql1","mysql2","mysql3"] 98 | """ 99 | 100 | When I run command on host "mysql3" 101 | """ 102 | mysync host add mysql2 --priority 5 103 | """ 104 | Then command return code should be "0" 105 | Then zookeeper node "/test/ha_nodes/mysql2" should match json within "5" seconds 106 | """ 107 | { "priority": 5 } 108 | """ 109 | When I run command on host "mysql3" 110 | """ 111 | mysync host add mysql3 --priority 10 112 | """ 113 | Then command return code should be "0" 114 | Then zookeeper node "/test/ha_nodes/mysql3" should match json within "5" seconds 115 | """ 116 | { "priority": 10 } 117 | """ 118 | When I run command on host "mysql3" 119 | """ 120 | mysync host add mysql3 --stream-from mysql1 121 | """ 122 | Then command return code should be "0" 123 | Then zookeeper node "/test/cascade_nodes/mysql3" should match json within "5" seconds 124 | """ 125 | { "stream_from": "mysql1" } 126 | """ 127 | And zookeeper node "/test/active_nodes" should match json_exactly within "10" seconds 128 | """ 129 | ["mysql1","mysql2"] 130 | """ 131 | When I run command on host "mysql2" 132 | """ 133 | mysync switch --from mysql1 --wait=0s 134 | """ 135 | Then command return code should be "0" 136 | Then zookeeper node "/test/last_switch" should match json within "30" seconds 137 | """ 138 | { 139 | "from": "mysql1", 140 | "result": { 141 | "ok": true 142 | } 143 | } 144 | """ 145 | Then mysql host "mysql2" should be master 146 | And mysql host "mysql2" should be writable 147 | -------------------------------------------------------------------------------- /tests/features/readonly_filesystem.feature: -------------------------------------------------------------------------------- 1 | Feature: readonly filesystem 2 | Scenario: check master failure when disk on master become readonly 3 | Given cluster environment is 4 | """ 5 | MYSYNC_FAILOVER=true 6 | MYSYNC_FAILOVER_COOLDOWN=0s 7 | """ 8 | Given cluster is up and running 9 | Then zookeeper node "/test/active_nodes" should match json_exactly within "20" seconds 10 | """ 11 | ["mysql1","mysql2","mysql3"] 12 | """ 13 | 14 | When I set readonly file system on host "mysql1" to "true" 15 | Then zookeeper node "/test/last_switch" should match json within "30" seconds 16 | """ 17 | { 18 | "cause": "auto", 19 | "from": "mysql1", 20 | "result": { 21 | "ok": true 22 | } 23 | } 24 | """ 25 | 26 | When I get zookeeper node "/test/master" 27 | And I save zookeeper query result as "new_master" 28 | Then mysql host "{{.new_master}}" should be master 29 | -------------------------------------------------------------------------------- /tests/features/recovery.feature: -------------------------------------------------------------------------------- 1 | Feature: hosts recovery 2 | 3 | Scenario: failover honors recovery hosts 4 | Given cluster environment is 5 | """ 6 | MYSYNC_FAILOVER=true 7 | MYSYNC_FAILOVER_DELAY=0s 8 | MYSYNC_FAILOVER_COOLDOWN=0s 9 | """ 10 | Given cluster is up and running 11 | When host "mysql3" is deleted 12 | Then mysql host "mysql3" should become unavailable within "10" seconds 13 | And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds 14 | """ 15 | ["mysql1","mysql2"] 16 | """ 17 | When host "mysql1" is detached from the network 18 | Then mysql host "mysql1" should become unavailable within "10" seconds 19 | Then zookeeper node "/test/manager" should match regexp within "10" seconds 20 | """ 21 | .*mysql2.* 22 | """ 23 | Then zookeeper node "/test/last_switch" should match json within "120" seconds 24 | """ 25 | { 26 | "cause": "auto", 27 | "from": "mysql1", 28 | "result": { 29 | "ok": true 30 | } 31 | } 32 | """ 33 | Then zookeeper node "/test/master" should match regexp within "30" seconds 34 | """ 35 | .*mysql2.* 36 | """ 37 | And zookeeper node "/test/recovery/mysql1" should exist 38 | Then mysql host "mysql2" should be master 39 | And mysql host "mysql2" should be writable 40 | And mysql host "mysql2" should have variable "rpl_semi_sync_master_enabled" set to "0" 41 | # Commit normal transaction 42 | Then I run SQL on mysql host "mysql2" 43 | """ 44 | CREATE TABLE splitbrain(id int); 45 | """ 46 | 47 | # Emulate lost transactions on old master 48 | # mysync may set super_read_only while we run query 49 | When I run command on host "mysql1" until return code is "0" with timeout "5" seconds 50 | """ 51 | mysql -e ' 52 | SET GLOBAL rpl_semi_sync_master_enabled = 0; 53 | SET GLOBAL super_read_only = 0; 54 | CREATE TABLE mysql.splitbrain(id int); 55 | SET GLOBAL read_only = 1; 56 | ' 57 | """ 58 | 59 | When host "mysql1" is attached to the network 60 | Then mysql host "mysql1" should become available within "10" seconds 61 | And mysql host "mysql1" should become replica of "mysql2" within "10" seconds 62 | And mysql replication on host "mysql1" should not run fine within "3" seconds 63 | And zookeeper node "/test/recovery/mysql1" should exist 64 | And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds 65 | """ 66 | ["mysql2"] 67 | """ 68 | And host "mysql1" should have file "/tmp/mysync.resetup" within "20" seconds 69 | -------------------------------------------------------------------------------- /tests/features/repair.feature: -------------------------------------------------------------------------------- 1 | Feature: repair hosts in cluster 2 | 3 | Scenario: master became writable after manual changes 4 | Given cluster is up and running 5 | Then mysql host "mysql1" should be master 6 | And mysql host "mysql1" should be writable 7 | When I run SQL on mysql host "mysql1" 8 | """ 9 | SET GLOBAL READ_ONLY=1; 10 | """ 11 | Then mysql host "mysql1" should become writable within "20" seconds 12 | When I run SQL on mysql host "mysql1" 13 | """ 14 | SET GLOBAL SUPER_READ_ONLY=1; 15 | """ 16 | Then mysql host "mysql1" should become writable within "20" seconds 17 | 18 | Scenario: async replication fallback 19 | Given cluster is up and running 20 | Then mysql host "mysql1" should be master 21 | And mysql host "mysql1" should have variable "rpl_semi_sync_master_enabled" set to "1" within "20" seconds 22 | And mysql host "mysql1" should have variable "rpl_semi_sync_slave_enabled" set to "0" 23 | And mysql host "mysql2" should be replica of "mysql1" 24 | And mysql host "mysql2" should have variable "rpl_semi_sync_slave_enabled" set to "1" within "20" seconds 25 | And mysql host "mysql2" should have variable "rpl_semi_sync_master_enabled" set to "0" 26 | And mysql host "mysql3" should be replica of "mysql1" 27 | And mysql host "mysql3" should have variable "rpl_semi_sync_slave_enabled" set to "1" within "20" seconds 28 | And mysql host "mysql3" should have variable "rpl_semi_sync_master_enabled" set to "0" 29 | When host "mysql2" is detached from the network 30 | And host "mysql3" is detached from the network 31 | Then mysql host "mysql1" should have variable "rpl_semi_sync_master_enabled" set to "0" within "30" seconds 32 | And mysql host "mysql1" should be writable 33 | And mysql host "mysql1" should be master 34 | 35 | Scenario: semisync replication setup 36 | Given cluster is up and running 37 | Then mysql host "mysql1" should be master 38 | And mysql host "mysql1" should have variable "rpl_semi_sync_master_enabled" set to "1" within "20" seconds 39 | And mysql host "mysql1" should have variable "rpl_semi_sync_slave_enabled" set to "0" 40 | And mysql host "mysql2" should be replica of "mysql1" 41 | And mysql host "mysql2" should have variable "rpl_semi_sync_slave_enabled" set to "1" within "20" seconds 42 | And mysql host "mysql2" should have variable "rpl_semi_sync_master_enabled" set to "0" 43 | And mysql host "mysql3" should be replica of "mysql1" 44 | And mysql host "mysql3" should have variable "rpl_semi_sync_slave_enabled" set to "1" within "20" seconds 45 | And mysql host "mysql3" should have variable "rpl_semi_sync_master_enabled" set to "0" 46 | 47 | Scenario: master became writable again after network loss 48 | Given cluster environment is 49 | """ 50 | MYSYNC_FAILOVER=false 51 | """ 52 | Given cluster is up and running 53 | Then mysql host "mysql1" should be master 54 | And mysql host "mysql2" should be replica of "mysql1" 55 | And mysql host "mysql3" should be replica of "mysql1" 56 | When host "mysql1" is detached from the network 57 | Then mysql host "mysql1" should become unavailable within "10" seconds 58 | When host "mysql1" is attached to the network 59 | Then mysql host "mysql1" should become available within "20" seconds 60 | And mysql host "mysql1" should be master 61 | And mysql host "mysql1" should become writable within "20" seconds 62 | 63 | Scenario: master became writable if zookeeper was unavailable during first run 64 | Given cluster is up and running 65 | And host "mysql3" is stopped 66 | And host "mysql2" is stopped 67 | And host "mysql1" is stopped 68 | And I delete zookeeper node "/test/master" 69 | And host "zoo1" is detached from the network 70 | And host "zoo2" is detached from the network 71 | And host "zoo3" is detached from the network 72 | When host "mysql1" is started 73 | And host "mysql2" is started 74 | And host "mysql3" is started 75 | Then mysql host "mysql1" should become available within "20" seconds 76 | And mysql host "mysql1" should be master 77 | And mysql host "mysql1" should be read only 78 | When host "zoo1" is attached to the network 79 | And host "zoo2" is attached to the network 80 | And host "zoo3" is attached to the network 81 | Then mysql host "mysql1" should become writable within "20" seconds 82 | 83 | Scenario: mysync repairs replication on replication error 84 | Given cluster is up and running 85 | Then mysql host "mysql1" should be master 86 | And mysql host "mysql1" should be writable 87 | And mysql host "mysql2" should be replica of "mysql1" 88 | And mysql replication on host "mysql2" should run fine within "5" seconds 89 | # just to have stable tests - turn on maintenance mode 90 | And I run command on host "mysql1" 91 | """ 92 | mysync maint on 93 | """ 94 | And I wait for "5" seconds 95 | When I break replication on host "mysql2" in repairable way 96 | Then mysql replication on host "mysql2" should not run fine 97 | And I run command on host "mysql1" 98 | """ 99 | mysync maint off 100 | """ 101 | And mysql replication on host "mysql2" should run fine within "60" seconds 102 | 103 | Scenario: mysync repairs unrepairable replication in aggressive mode 104 | Given cluster environment is 105 | """ 106 | MYSYNC_REPLICATION_REPAIR_AGGRESSIVE_MODE=true 107 | """ 108 | Given cluster is up and running 109 | Then mysql host "mysql1" should be master 110 | And mysql host "mysql1" should be writable 111 | # just to have stable tests - turn on maintenance mode 112 | And I run command on host "mysql1" 113 | """ 114 | mysync maint on 115 | """ 116 | And I wait for "5" seconds 117 | When I break replication on host "mysql2" 118 | Then mysql replication on host "mysql2" should not run fine 119 | And I run command on host "mysql1" 120 | """ 121 | mysync maint off 122 | """ 123 | And mysql replication on host "mysql2" should run fine within "60" seconds 124 | -------------------------------------------------------------------------------- /tests/features/repl_mon.feature: -------------------------------------------------------------------------------- 1 | Feature: repl_mon tests 2 | 3 | Scenario: repl_mon enabled 4 | Given cluster environment is 5 | """ 6 | REPL_MON=true 7 | """ 8 | Given cluster is up and running 9 | When I wait for "10" seconds 10 | Then zookeeper node "/test/active_nodes" should match json_exactly within "20" seconds 11 | """ 12 | ["mysql1","mysql2","mysql3"] 13 | """ 14 | And I wait for "5" seconds 15 | And I run SQL on mysql host "mysql1" expecting error on number "1050" 16 | """ 17 | CREATE TABLE mysql.mysync_repl_mon( 18 | ts TIMESTAMP(3) 19 | ) ENGINE=INNODB; 20 | """ 21 | And I run SQL on mysql host "mysql1" 22 | """ 23 | SELECT (CURRENT_TIMESTAMP(3) - ts) < 2 as res FROM mysql.mysync_repl_mon 24 | """ 25 | Then SQL result should match json 26 | """ 27 | [{"res":1}] 28 | """ 29 | And mysql host "mysql2" should be replica of "mysql1" 30 | Then zookeeper node "/test/health/mysql2" should match json within "20" seconds 31 | """ 32 | { 33 | "is_loading_binlog": true 34 | } 35 | """ 36 | And mysql host "mysql3" should be replica of "mysql1" 37 | Then zookeeper node "/test/health/mysql3" should match json within "20" seconds 38 | """ 39 | { 40 | "is_loading_binlog": true 41 | } 42 | """ 43 | 44 | Scenario: repl_mon disabled 45 | Given cluster environment is 46 | """ 47 | REPL_MON=false 48 | """ 49 | Given cluster is up and running 50 | When I wait for "10" seconds 51 | Then zookeeper node "/test/active_nodes" should match json_exactly within "20" seconds 52 | """ 53 | ["mysql1","mysql2","mysql3"] 54 | """ 55 | And I wait for "5" seconds 56 | And I run SQL on mysql host "mysql1" expecting error on number "1146" 57 | """ 58 | SELECT ts FROM mysql.mysync_repl_mon 59 | """ 60 | And I run SQL on mysql host "mysql2" expecting error on number "1146" 61 | """ 62 | SELECT ts FROM mysql.mysync_repl_mon 63 | """ 64 | And I run SQL on mysql host "mysql3" expecting error on number "1146" 65 | """ 66 | SELECT ts FROM mysql.mysync_repl_mon 67 | """ 68 | And mysql host "mysql2" should be replica of "mysql1" 69 | Then zookeeper node "/test/health/mysql2" should match json within "20" seconds 70 | """ 71 | { 72 | "is_loading_binlog": false 73 | } 74 | """ 75 | And mysql host "mysql3" should be replica of "mysql1" 76 | Then zookeeper node "/test/health/mysql3" should match json within "20" seconds 77 | """ 78 | { 79 | "is_loading_binlog": false 80 | } 81 | """ 82 | -------------------------------------------------------------------------------- /tests/features/statefile.feature: -------------------------------------------------------------------------------- 1 | Feature: mysync touches info file when diskusage is too high 2 | 3 | Scenario: mysync touch info file on high disk usage 4 | Given cluster environment is 5 | # force mysync to enable RO in any case 6 | """ 7 | MYSYNC_CRITICAL_DISK_USAGE=0 8 | """ 9 | Given cluster is up and running 10 | Then mysql host "mysql1" should be master 11 | And mysql host "mysql2" should be replica of "mysql1" 12 | And mysql replication on host "mysql2" should run fine within "5" seconds 13 | And mysql host "mysql3" should be replica of "mysql1" 14 | And mysql replication on host "mysql3" should run fine within "5" seconds 15 | And zookeeper node "/test/health/mysql1" should match json within "30" seconds 16 | """ 17 | { 18 | "is_master": true, 19 | "is_readonly": true 20 | } 21 | """ 22 | When I wait for "60" seconds 23 | And info file "/var/run/mysync/mysync.info" on "mysql1" match json 24 | """ 25 | { 26 | "low_space": true 27 | } 28 | """ 29 | And info file "/var/run/mysync/mysync.info" on "mysql2" match json 30 | """ 31 | { 32 | "low_space": true 33 | } 34 | """ 35 | And info file "/var/run/mysync/mysync.info" on "mysql3" match json 36 | """ 37 | { 38 | "low_space": true 39 | } 40 | """ 41 | 42 | Scenario: mysync does not touch info file on ok disk consumption 43 | Given cluster environment is 44 | # force mysync to enable RO in any case 45 | """ 46 | MYSYNC_CRITICAL_DISK_USAGE=95 47 | """ 48 | Given cluster is up and running 49 | Then mysql host "mysql1" should be master 50 | And mysql host "mysql2" should be replica of "mysql1" 51 | And mysql replication on host "mysql2" should run fine within "5" seconds 52 | And mysql host "mysql3" should be replica of "mysql1" 53 | And mysql replication on host "mysql3" should run fine within "5" seconds 54 | And zookeeper node "/test/health/mysql1" should match json within "30" seconds 55 | """ 56 | { 57 | "is_master": true, 58 | "is_readonly": false 59 | } 60 | """ 61 | And info file "/var/run/mysync/mysync.info" on "mysql1" match json 62 | """ 63 | { 64 | "low_space": false 65 | } 66 | """ 67 | And info file "/var/run/mysync/mysync.info" on "mysql2" match json 68 | """ 69 | { 70 | "low_space": false 71 | } 72 | """ 73 | And info file "/var/run/mysync/mysync.info" on "mysql3" match json 74 | """ 75 | { 76 | "low_space": false 77 | } 78 | """ 79 | 80 | -------------------------------------------------------------------------------- /tests/features/zk_failure.feature: -------------------------------------------------------------------------------- 1 | Feature: mysync handles zookeeper lost 2 | 3 | Scenario: mysync handles single rack network issue that could lead to split brain 4 | Given cluster is up and running 5 | Then mysql host "mysql1" should be master 6 | And mysql host "mysql1" should be writable 7 | And mysql host "mysql2" should be replica of "mysql1" 8 | And mysql host "mysql3" should be replica of "mysql1" 9 | And zookeeper node "/test/active_nodes" should match json_exactly within "20" seconds 10 | """ 11 | ["mysql1","mysql2","mysql3"] 12 | """ 13 | 14 | And I run heavy user requests on host "mysql1" for "300" seconds 15 | When host "mysql1" is detached from the network 16 | When I wait for "60" seconds 17 | Then I have no SQL execution error at mysql host "mysql2" within "3" seconds 18 | And I have no SQL execution error at mysql host "mysql3" within "3" seconds 19 | When I run command on host "mysql1" 20 | """ 21 | mysql -s --skip-column-names -e "SELECT (CASE WHEN count(*) = 0 THEN 'OK' ELSE 'HAS_USER_QUERIES' END) 22 | FROM information_schema.PROCESSLIST p 23 | WHERE USER NOT IN ('admin', 'monitor', 'event_scheduler', 'repl')" 24 | """ 25 | Then command output should match regexp 26 | """ 27 | .*OK.* 28 | """ 29 | 30 | When I run command on host "mysql1" 31 | """ 32 | mysql -s --skip-column-names -e "SELECT 33 | (CASE WHEN @@read_only = 1 THEN 'RO' ELSE 'RW' END) as RO, 34 | (CASE WHEN @@super_read_only = 1 THEN 'RO' ELSE 'RW' END) as SRO" 35 | """ 36 | Then command output should match regexp 37 | """ 38 | .*RO[[:space:]]+RO.* 39 | """ 40 | 41 | 42 | Scenario: mysync handles ZK failure - do nothing 43 | Given cluster is up and running 44 | Then mysql host "mysql1" should be master 45 | And mysql host "mysql1" should be writable 46 | And mysql host "mysql2" should be replica of "mysql1" 47 | And mysql host "mysql3" should be replica of "mysql1" 48 | And zookeeper node "/test/active_nodes" should match json_exactly within "20" seconds 49 | """ 50 | ["mysql1","mysql2","mysql3"] 51 | """ 52 | 53 | When I run heavy user requests on host "mysql1" for "300" seconds 54 | And I run long read user requests on host "mysql2" for "300" seconds 55 | And I run long read user requests on host "mysql3" for "300" seconds 56 | 57 | And host "zoo3" is detached from the network 58 | And host "zoo2" is detached from the network 59 | And host "zoo1" is detached from the network 60 | Then I have no SQL execution error at mysql host "mysql1" within "60" seconds 61 | And I have no SQL execution error at mysql host "mysql2" within "0" seconds 62 | And I have no SQL execution error at mysql host "mysql3" within "0" seconds 63 | And mysql host "mysql1" should be writable 64 | And mysql host "mysql2" should be read only 65 | And mysql host "mysql3" should be read only 66 | 67 | 68 | Scenario: failover works when old master hangs waiting semisync ack 69 | Given cluster environment is 70 | """ 71 | MYSYNC_FAILOVER=false 72 | MYSYNC_SEMISYNC=true 73 | MYSYNC_DB_LOST_CHECK_TIMEOUT=3s 74 | """ 75 | Given cluster is up and running 76 | 77 | Then mysql host "mysql1" should be master 78 | And mysql host "mysql1" should become writable within "5" seconds 79 | And mysql host "mysql1" should have variable "rpl_semi_sync_master_enabled" set to "1" within "20" seconds 80 | And mysql host "mysql2" should be replica of "mysql1" 81 | And mysql host "mysql3" should be replica of "mysql1" 82 | 83 | When I run SQL on mysql host "mysql1" 84 | """ 85 | CREATE TABLE IF NOT EXISTS mysql.test_table1 ( 86 | value VARCHAR(30) 87 | ) 88 | """ 89 | And I run SQL on mysql host "mysql1" 90 | """ 91 | INSERT INTO mysql.test_table1 VALUES ("A"), ("B"), ("C") 92 | """ 93 | 94 | When host "mysql1" is detached from the network 95 | Then mysql host "mysql1" should become unavailable within "10" seconds 96 | # following request should stuck in 'Waiting for semi-sync ACK from slave' state 97 | When I run async command on host "mysql1" 98 | """ 99 | mysql -s --skip-column-names -e "INSERT INTO mysql.test_table1 VALUES ('D'), ('E'), ('F')" 100 | """ 101 | And I wait for "5" seconds 102 | When I run command on host "mysql1" 103 | """ 104 | mysql -s --skip-column-names -e "SELECT state FROM information_schema.PROCESSLIST" 105 | """ 106 | Then command output should match regexp 107 | """ 108 | .*Waiting for semi-sync ACK from.* 109 | """ 110 | # start manual deterministic switchover - we will use this in last check 111 | When I run command on host "mysql2" 112 | """ 113 | mysync switch --from mysql1 --wait=0s --failover 114 | """ 115 | Then zookeeper node "/test/last_switch" should match json within "90" seconds 116 | """ 117 | { 118 | "from": "mysql1", 119 | "master_transition": "failover", 120 | "result": { 121 | "ok": true 122 | } 123 | } 124 | """ 125 | And I run command on host "mysql1" until result match regexp ".*OK.*" with timeout "90" seconds 126 | """ 127 | mysql -s --skip-column-names -e "SELECT (CASE WHEN count(*) = 0 THEN 'OK' ELSE 'STILL_WAITING' END) 128 | FROM information_schema.PROCESSLIST 129 | WHERE state like 'Waiting for semi-sync ACK from%'" 130 | """ 131 | When host "mysql1" is attached to the network 132 | Then mysql host "mysql1" should become available within "20" seconds 133 | When I get zookeeper node "/test/master" 134 | And I save zookeeper query result as "new_master" 135 | Then mysql host "mysql1" should become replica of "{{.new_master}}" within "30" seconds 136 | -------------------------------------------------------------------------------- /tests/features/zk_maintenance.feature: -------------------------------------------------------------------------------- 1 | Feature: maintenance during dead zookeeper 2 | 3 | Scenario: mysync keeps maintenance while zookeeper is down 4 | Given cluster is up and running 5 | Then mysql host "mysql1" should be master 6 | And mysql host "mysql2" should be replica of "mysql1" 7 | And mysql host "mysql3" should be replica of "mysql1" 8 | And mysql host "mysql1" should be writable 9 | When I run command on host "mysql1" 10 | """ 11 | mysync maint on 12 | """ 13 | Then command return code should be "0" 14 | And command output should match regexp 15 | """ 16 | maintenance enabled 17 | """ 18 | And I wait for "5" seconds 19 | 20 | When host "zoo3" is detached from the network 21 | And host "zoo2" is detached from the network 22 | And host "zoo1" is detached from the network 23 | When I run command on host "mysql1" 24 | """ 25 | mysync info 26 | """ 27 | Then command return code should be "1" 28 | 29 | When I wait for "10" seconds 30 | Then mysql host "mysql1" should be master 31 | And mysql host "mysql2" should be replica of "mysql1" 32 | And mysql host "mysql3" should be replica of "mysql1" 33 | And mysql host "mysql1" should be writable 34 | 35 | 36 | When I run command on host "mysql1" with timeout "20" seconds 37 | """ 38 | supervisorctl restart mysync 39 | """ 40 | Then command return code should be "0" 41 | When I run command on host "mysql2" with timeout "20" seconds 42 | """ 43 | supervisorctl restart mysync 44 | """ 45 | Then command return code should be "0" 46 | When I run command on host "mysql3" with timeout "20" seconds 47 | """ 48 | supervisorctl restart mysync 49 | """ 50 | Then command return code should be "0" 51 | 52 | When I wait for "30" seconds 53 | Then mysql host "mysql1" should be master 54 | And mysql host "mysql2" should be replica of "mysql1" 55 | And mysql host "mysql3" should be replica of "mysql1" 56 | And mysql host "mysql1" should be writable 57 | 58 | When host "zoo3" is attached to the network 59 | And host "zoo2" is attached to the network 60 | And host "zoo1" is attached to the network 61 | 62 | Then zookeeper node "/test/maintenance" should match json within "90" seconds 63 | """ 64 | { 65 | "initiated_by": "REGEXP:.*@mysql1" 66 | } 67 | """ 68 | When I run command on host "mysql1" with timeout "30" seconds 69 | """ 70 | mysync maint off 71 | """ 72 | Then command return code should be "0" 73 | 74 | Then mysql host "mysql1" should be master 75 | And mysql host "mysql2" should be replica of "mysql1" 76 | And mysql host "mysql3" should be replica of "mysql1" 77 | And mysql host "mysql1" should be writable 78 | And zookeeper node "/test/health/mysql1" should match json within "30" seconds 79 | """ 80 | { 81 | "ping_ok": true, 82 | "is_readonly": false 83 | } 84 | """ 85 | And zookeeper node "/test/health/mysql2" should match json within "30" seconds 86 | """ 87 | { 88 | "ping_ok": true, 89 | "is_readonly": true 90 | } 91 | """ 92 | And zookeeper node "/test/health/mysql3" should match json within "30" seconds 93 | """ 94 | { 95 | "ping_ok": true, 96 | "is_readonly": true 97 | } 98 | """ 99 | -------------------------------------------------------------------------------- /tests/images/base/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:jammy 2 | ENV container=docker 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | ENV ZK_VERSION=3.7.1 5 | ARG MYSQL_VERSION="" 6 | ENV MYSQL_VERSION="${MYSQL_VERSION}" 7 | COPY . /var/lib/dist/base 8 | RUN bash /var/lib/dist/base/setup.sh 9 | RUN chmod 755 /var/lib/dist/base/generate_certs.sh 10 | CMD ["/usr/local/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"] 11 | -------------------------------------------------------------------------------- /tests/images/base/percona.gpg: -------------------------------------------------------------------------------- 1 | -----BEGIN PGP PUBLIC KEY BLOCK----- 2 | Version: GnuPG v1 3 | 4 | mQINBFd0veABEADyFa8jPHXhhX1XS9W7Og4p+jLxB0aowElk4Kt6lb/mYjwKmQ77 5 | 9ZKUAvb1xRYFU1/NEaykEl/jxE7RA/fqlqheZzBblB3WLIPM0sMfh/D4fyFCaKKF 6 | k2CSwXtYfhk9DOsBP2K+ZEg0PoLqMbLIBUxPl61ZIy2tnF3G+gCfGu6pMHK7WTtI 7 | nnruMKk51s9Itc9vUeUvRGDcFIiEEq0xJhEX/7J/WAReD5Am/kD4CvkkunSqbhhu 8 | B6DV9tAeEFtDppEHdFDzfHfTOwlHLgTvgVETDgLgTRXzztgBVKl7Gdvc3ulbtowB 9 | uBtbuRr49+QIlcBdFZmM6gA4V5P9/qrkUaarvuIkXWQYs9/8oCd3SRluhdxXs3xX 10 | 1/gQQXYHUhcdAWrqS56txncXf0cnO2v5kO5rlOX1ovpNQsc69R52LJKOLA1Kmjca 11 | JNtC+4e+SF2upK14gtXK384z7owXYUA4NRZOEu+UAw7wAoiIWPUfzMEHYi8I3Rsz 12 | EtpVyOQC5YyYgwzIdt4YxlVJ0CUoinvtIygies8LkA5GQvaGJHYG1aQ3i9WDddCX 13 | wtoV1uA4EZlEWjTXlSRc92jhSKut/EWbmYHEUhmvcfFErrxUPqirpVZHSaXY5Rdh 14 | KVFyx9JcRuIQ0SJxeHQPlaEkyhKpTDN5Cw7USLwoXfIu2w0w0W06LdXZ7wARAQAB 15 | tEZQZXJjb25hIE15U1FMIERldmVsb3BtZW50IFRlYW0gKFBhY2thZ2luZyBrZXkp 16 | IDxteXNxbC1kZXZAcGVyY29uYS5jb20+iQI3BBMBCgAhBQJXdL3gAhsDBQsJCAcD 17 | BRUKCQgLBRYCAwEAAh4BAheAAAoJEJM0ol+FB++l4koQAKkrRP+K/p/TGlnqlbNy 18 | S5gdSIB1hxT3iFwIdF9EPZq0U+msh8OY7omV/82rJp4T5cIJFvivtWQpEwpUjJtq 19 | BzVrQlF+12D1RFPSoXkmk6t4opAmCsAmAtRHaXIzU9WGJETaHl57Trv5IPMv15X3 20 | TmLnk1mDMSImJoxWJMyUHzA37BlPjvqQZv5meuweLCbL4qJS015s7Uz+1f/FsiDL 21 | srlE0iYCAScfBeRSKF4MSnk5huIGgncaltKJPnNYppXUb2wt+4X2dpY3/V0BoiG8 22 | YBxV6N7sA7lC/OoYF6+H3DMlSxGBQEb1i9b6ypwZIbG6CnM2abLqO67D3XGx559/ 23 | FtAgxrDBX1f63MQKlu+tQ9mOrCvSbt+bMGT6frFopgH6XiSOhOiMmjUazVRBsXRK 24 | /HM5qIk5MK0tGPSgpc5tr9NbMDmp58OQZYQscslKhx0EDDYHQyHfYFS2qoduRwQG 25 | 4BgpZm2xjGM/auCvdZ+pxjqy7dnEXvMVf0i1BylkyW4p+oK5nEwY3KHljsRxuJ0+ 26 | gjfyj64ihNMSqDX5k38T2GPSXm5XAN+/iazlIuiqPQKLZWUjTOwr2/AA6AztU/fm 27 | sXV2swz8WekqT2fphvWKUOISr3tEGG+HF1iIY43BoAMHYYOcdSI1ZODZq3Wic+zl 28 | N1WzPshDB+d3acxeV5JhstvPiQQcBBABCAAGBQJYCWSTAAoJEHpjgJ3lEnYiM40g 29 | ALkOg65HOAOGkBV6WG9BTpQgnhsmrvC/2ozZ6dV5577/zYCf6ZB5hMO3mSwcrjTG 30 | X5+yD1CyVQEayWuUxoV2By+N9an98660hWAIYTSNiRwSFITDbLVqXOp7t/B7Bddh 31 | j3ZrzA3Eo5bV/QyS/zyKGF1tMkA64IJkQ3292g1L7RYfNG5h1IBB/xY2xCVcKNT2 32 | XcFbAPOct30bqMyT4mdT39WdYg0l4U3zOutemFYs4uyObzrVNOKln0thZpfNJdRq 33 | +OfkE6XwW2UwhTK0/GM5l1Y3NJW64DGPyM7KKcE4FTgq1MRaWepw5sAZr6pTqasW 34 | uWUf20la1M9fIdyxJsAbWn1bhpPIOl3NZ88dRK6XI8Ly36fRa2as/lPeG7ql2yma 35 | OVFDBHqfB+gAWMzkwF7TS+02er4kg9vnpErPc/aA0lMKmyXHkMANLAnWBA7tx+7s 36 | EKck8XcY4e1OiwpUXRxC+UlSaJYQtE/kmoC2NPQB0FhhvC/VQ0sBOYOAbJ5GukEJ 37 | VDB7QqqGKjzaKE0LUADCXJFcLY4yMA9bP9U+Ex/G62YcYn0g1amriKAAkEBRvBOp 38 | /qUFSj6b+EqEC5w2my3cLBnATrzskGm32XNOFdpwR469rOqxomtVedH72vW3sS1e 39 | tcGw/SHBSplDYTzcnAJQbHvD6LEeOQeWPbA77PD9ASlx7jGZj3GCq0tc7dndjTLy 40 | iL+A4EsRxEUDrH30d8TLaYd1WSD6v5i/xa0r3rXQUmPviBBzRpJxl0CFB/db2L6a 41 | /A2EHkOWjpcL2XSJgcgIVlYZCgM1OEuDGURbLUM9qNiFogdBNCkGTkqjIFES0iq4 42 | lBA4vphcXR8C34OP+7DeT1RthyPjmvi/ErXIQLTpR2Yuwl9/nI2gx6ddZFqkoHFc 43 | PSyE152uJRsYdtL9iIeEIPH//WZ0Fz+h6hhfLiPh6AN1LH3wxKqLW4hAAZ8ytUqA 44 | NNZT+7o6EVQHI6VyoigS5TJ34h36jKjRvfUaP4FfkGaPRpfR/cKUiNaCIJRaIFlv 45 | lUdbN+biQO3WRxwdyUdgDSETZnLiym6pKuCpLsic/3+fOyBuWuIxxvGGm3XUt3Lm 46 | tvlkey/sSCwInioxn0drYosq+FZP/ocBQ9aeyxZ5Fqyxqg0BInrusfthXA35WUEx 47 | VsjwidFPeftz2VbV9gD1Og3JN2Rhd7FzxH0lrLghxh129R1QVPZiDOiaJQO4QObs 48 | C5YXmzF0A/25qJ9Y8UJrsnWrPvjpH41p70Sl6iDWKigdxi6LD9NrwOnw9qBkIlmj 49 | bJL6WKrvjxgVoCo4iP8jtHUx0jwn2qsMkGqO3NM2xWb6MBVzU7nZsyGpH5OzlrHY 50 | oYziw8v6zCLZj8eg3EgFxe+5Ag0EV3S94AEQAJ+4dVt7Lmobk/qtGEBfal139/uL 51 | Ad1xbX56/EJ8JHl8fOw7UtHCUcz0ZGqXO0rODHMAh+BRep0xdSzq9bxqB+S7nneH 52 | yAGquF2r00frn9h6fNX9K/1z8QbOwFC6tq7VELiB8niOAB527gVApm9Wv//Q1Na4 53 | mbd6XeithjPisurv1q9KAPtD+4rz+PvXOAImLGwXOMLx6FGU60x1609NjfrNzYuN 54 | BIxNKkTtK8RuuTrIMqlC9lpuXd2aQSQG+gWlq3vH6Ldm0ELNEVPHasf/0NYoI75K 55 | 4ZUFezy+Eu0C8oqNtYYZT0uuYRJlxqEjp+WIfnDbw2+k64mWvxGf/qNCYkMM8o7n 56 | RcozyGlPoMGogT31ipgtTNcAp/hjzwXIe+U7qSJVtdo5jPU5OoJZWqNoxgVuI9bo 57 | 2ANfSHIT24bSV80D0/l52rI9IRpM36SkP05WobpHS48EIVjy7bk2s1GEyogVB28j 58 | nh4S03SS0U/QWuUUWSDpL6X7dCyv2wwMoJRVMn8GQrCqR2FO/ldjgqIgQlCO8wqv 59 | S8fmViI8MZf/cqwkv6vEmMD77haHjRYEtgNINZIB8I9KiSDWVGM5owOGcflidR4S 60 | ToyHLrUNBGwf7ESl4v8XUvTq7RaH7SJeopckDiO9ThfAZKTODfJppuWRie6fmbKE 61 | hBizAh0LIQfhaXdJABEBAAGJAh8EGAEKAAkFAld0veACGwwACgkQkzSiX4UH76XG 62 | qRAAgLuPPUJa361sqC60tEVzF7E1BmhMAA9OTc6Oqp4ItY7VyYe2aM1JdNzmulfv 63 | y88RhCPNCkABFnECmkB14kcHOb1Ct+LKjtNbw/QZ/1z2nWY9S2XaDQE29FTvNjOA 64 | IXVojAq1L5c7ZR1NPnobLm9rF3UGJODwn3K2QgZKS5JdI4BJ4YLlGY3dJoPrKiZV 65 | rjzeT2RWGFI5TMrBgr1/ZaAaEjXHGlUXktttGEKgTPiJr9OomhZ0f9qC6XfgAZY6 66 | A9GEy74USlv+eiezvddPBC1xeJkB73PhmW1WxJyKiWBHM/CRfEyZZUyZ71jKZUI9 67 | OvPE+LqdzqelJnMTbvmbTa7zpXaG3APYxtK4aZxN2YA899eBDlcznsQsSUNs0DV4 68 | 3WNkCHNgEu/rdf6c07LrKy5pzlDujPIE4ik2SwuV4DT4XOydiY+UarNi2cPqcWCU 69 | Ofz3yOT8taTCK0vjvZ+HxFFsNh9+xd5qWLLpbZNgqtCXnZqMtXsPk9RRL3FKUA9x 70 | 09K5cDOHsaE4oOiaZbAt8+jS5g3deNr4CRbXfly3Ph68Km9mOQFN+iDTsUaW6Z25 71 | Qrl8e8liJLJXU/lIqvjvbYLyNYKjZhxL4ixmBUUW5jVsboe2Iiak/vkgzQbeDW7J 72 | 3Y6EX2cYNLGOniQpadSgZ1XQ/VtRdoBu9dHOUhzHt04Pu1k= 73 | =5SzL 74 | -----END PGP PUBLIC KEY BLOCK----- 75 | -------------------------------------------------------------------------------- /tests/images/base/setup.sh: -------------------------------------------------------------------------------- 1 | set -xe 2 | 3 | cat </etc/apt/apt.conf.d/01buildconfig 4 | APT::Install-Recommends "0"; 5 | APT::Get::Assume-Yes "true"; 6 | APT::Install-Suggests "0"; 7 | EOF 8 | 9 | apt-get update 10 | 11 | apt-get install \ 12 | wget \ 13 | ca-certificates \ 14 | lsb-release \ 15 | gpg-agent \ 16 | apt-utils \ 17 | software-properties-common 18 | 19 | apt-key add - >/root/.ssh/authorized_keys" 10 | done 11 | done 12 | 13 | rm id_rsa.pub 14 | -------------------------------------------------------------------------------- /tests/images/jepsen_common/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mysync-test-base:latest 2 | RUN (yes | ssh-keygen -m PEM -t rsa -b 2048 -N '' -f /root/test_ssh_key -C root@mysync_jepsen_1 || true) && \ 3 | eval `ssh-agent -s` && cp /root/test_ssh_key.pub /root/.ssh/authorized_keys && \ 4 | cp /root/test_ssh_key.pub /root/.ssh/id_rsa.pub && \ 5 | cp /root/test_ssh_key /root/.ssh/id_rsa && ssh-add -k /root/.ssh/id_rsa 6 | COPY ./ssh_config /etc/ssh/ssh_config 7 | COPY ./sshd_config /etc/ssh/sshd_config 8 | -------------------------------------------------------------------------------- /tests/images/jepsen_common/ssh_config: -------------------------------------------------------------------------------- 1 | 2 | # This is the ssh client system-wide configuration file. See 3 | # ssh_config(5) for more information. This file provides defaults for 4 | # users, and the values can be changed in per-user configuration files 5 | # or on the command line. 6 | 7 | # Configuration data is parsed as follows: 8 | # 1. command line options 9 | # 2. user-specific file 10 | # 3. system-wide file 11 | # Any configuration value is only changed the first time it is set. 12 | # Thus, host-specific definitions should be at the beginning of the 13 | # configuration file, and defaults at the end. 14 | 15 | # Site-wide defaults for some commonly used options. For a comprehensive 16 | # list of available options, their meanings and defaults, please see the 17 | # ssh_config(5) man page. 18 | 19 | Host * 20 | # ForwardAgent no 21 | # ForwardX11 no 22 | # ForwardX11Trusted yes 23 | # PasswordAuthentication yes 24 | # HostbasedAuthentication no 25 | # GSSAPIAuthentication no 26 | # GSSAPIDelegateCredentials no 27 | # GSSAPIKeyExchange no 28 | # GSSAPITrustDNS no 29 | # BatchMode no 30 | # CheckHostIP yes 31 | # AddressFamily any 32 | # ConnectTimeout 0 33 | StrictHostKeyChecking no 34 | # IdentityFile ~/.ssh/id_rsa 35 | # IdentityFile ~/.ssh/id_dsa 36 | # IdentityFile ~/.ssh/id_ecdsa 37 | # IdentityFile ~/.ssh/id_ed25519 38 | # Port 22 39 | # Protocol 2 40 | # Ciphers aes128-ctr,aes192-ctr,aes256-ctr,aes128-cbc,3des-cbc 41 | # MACs hmac-md5,hmac-sha1,umac-64@openssh.com 42 | # EscapeChar ~ 43 | # Tunnel no 44 | # TunnelDevice any:any 45 | # PermitLocalCommand no 46 | # VisualHostKey no 47 | # ProxyCommand ssh -q -W %h:%p gateway.example.com 48 | # RekeyLimit 1G 1h 49 | SendEnv LANG LC_* 50 | HashKnownHosts yes 51 | GSSAPIAuthentication yes 52 | -------------------------------------------------------------------------------- /tests/images/jepsen_common/sshd_config: -------------------------------------------------------------------------------- 1 | PubkeyAuthentication yes 2 | ChallengeResponseAuthentication no 3 | UsePAM yes 4 | X11Forwarding yes 5 | PrintMotd no 6 | AcceptEnv LANG LC_* 7 | Subsystem sftp /usr/lib/openssh/sftp-server 8 | PubkeyAcceptedAlgorithms +ssh-rsa 9 | PermitRootLogin yes 10 | MaxAuthTries 1000000 11 | -------------------------------------------------------------------------------- /tests/images/jepsen_main/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mysync-jepsen-test-base:latest 2 | ENV LEIN_ROOT 1 3 | COPY . /var/lib/dist/jepsen_main 4 | RUN apt-get -qq update && apt-get install libjna-java \ 5 | gnuplot \ 6 | wget && \ 7 | chmod 600 /root/.ssh/id_rsa && \ 8 | wget https://raw.githubusercontent.com/technomancy/leiningen/stable/bin/lein -O /usr/bin/lein && \ 9 | chmod +x /usr/bin/lein && \ 10 | cp -r /var/lib/dist/jepsen_main/jepsen /root/ && \ 11 | cd /root/jepsen && \ 12 | lein install && lein deps 13 | -------------------------------------------------------------------------------- /tests/images/jepsen_main/jepsen/project.clj: -------------------------------------------------------------------------------- 1 | (defproject jepsen.mysync "0.1.0-SNAPSHOT" 2 | :description "mysync tests" 3 | :url "https://yandex.com" 4 | :license {:name "Eclipse Public License" 5 | :url "http://www.eclipse.org/legal/epl-v10.html"} 6 | :dependencies [[org.clojure/clojure "1.10.3"] 7 | [org.clojure/tools.nrepl "0.2.13"] 8 | [clojure-complete "0.2.5"] 9 | [jepsen "0.2.6"] 10 | [zookeeper-clj "0.9.4"] 11 | [org.clojure/java.jdbc "0.7.12"] 12 | [mysql/mysql-connector-java "8.0.28"]]) 13 | -------------------------------------------------------------------------------- /tests/images/jepsen_main/jepsen/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -x 5 | 6 | cd "$(dirname "$0")" 7 | export LEIN_ROOT=1 8 | for i in zookeeper1 zookeeper2 zookeeper3 mysql1 mysql2 mysql3 9 | do 10 | ssh-keyscan -t rsa mysync_${i}_1.mysync_mysql_net >> /root/.ssh/known_hosts 11 | done 12 | lein test 13 | -------------------------------------------------------------------------------- /tests/images/jepsen_main/jepsen/test/jepsen/mysync_test.clj: -------------------------------------------------------------------------------- 1 | (ns jepsen.mysync-test 2 | (:require [clojure.test :refer :all] 3 | [jepsen.core :as jepsen] 4 | [jepsen.mysync :as mysync])) 5 | 6 | (def mysql_nodes ["mysync_mysql1_1.mysync_mysql_net" 7 | "mysync_mysql2_1.mysync_mysql_net" 8 | "mysync_mysql3_1.mysync_mysql_net"]) 9 | 10 | (def zk_nodes ["mysync_zookeeper1_1.mysync_mysql_net" 11 | "mysync_zookeeper2_1.mysync_mysql_net" 12 | "mysync_zookeeper3_1.mysync_mysql_net"]) 13 | 14 | (deftest mysync-test 15 | (is (:valid? (:results (jepsen/run! (mysync/mysync-test mysql_nodes zk_nodes)))))) 16 | -------------------------------------------------------------------------------- /tests/images/jepsen_main/save_logs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for i in 1 2 3; do 4 | mkdir -p tests/logs/mysql${i} 5 | mkdir -p tests/logs/zookeeper${i} 6 | 7 | queries=$(docker exec mysync_mysql${i}_1 bash -c 'ls /var/log/mysql/ -d /var/log/mysql/* | sed 1d') 8 | 9 | for logfile in /var/log/mysync.log /var/log/mysql/error.log $queries /var/log/resetup.log /var/log/supervisor.log; do 10 | logname=$(echo "${logfile}" | rev | cut -d/ -f1 | rev) 11 | docker exec mysync_mysql${i}_1 cat "${logfile}" >"tests/logs/mysql${i}/${logname}" 12 | done 13 | 14 | docker exec mysync_zoo${i}_1 cat /var/log/zookeeper/zookeeper--server-mysync_zookeeper${i}_1.log >tests/logs/zookeeper${i}/zk.log 2>&1 15 | done 16 | 17 | tail -n 18 tests/logs/jepsen.log 18 | # Explicitly fail here 19 | exit 1 20 | -------------------------------------------------------------------------------- /tests/images/jepsen_sshd_config: -------------------------------------------------------------------------------- 1 | PubkeyAuthentication yes 2 | ChallengeResponseAuthentication no 3 | UsePAM yes 4 | X11Forwarding yes 5 | PrintMotd no 6 | AcceptEnv LANG LC_* 7 | Subsystem sftp /usr/lib/openssh/sftp-server 8 | PubkeyAcceptedAlgorithms ssh-rsa 9 | PermitRootLogin yes 10 | MaxAuthTries 1000000 11 | -------------------------------------------------------------------------------- /tests/images/mysql/.my.cnf: -------------------------------------------------------------------------------- 1 | [client] 2 | user=admin 3 | password=admin_pwd 4 | -------------------------------------------------------------------------------- /tests/images/mysql/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG VERSION="" 2 | FROM mysync-test-base${VERSION}:latest 3 | ARG VERSION="" 4 | ENV VERSION="${VERSION}" 5 | COPY . /var/lib/dist/mysql 6 | COPY ./mysync /usr/bin/mysync 7 | RUN bash /var/lib/dist/mysql/setup.sh 8 | -------------------------------------------------------------------------------- /tests/images/mysql/my.cnf: -------------------------------------------------------------------------------- 1 | [client] 2 | port = 3306 3 | socket = /tmp/mysqld.sock 4 | 5 | [mysqld_safe] 6 | nice = 0 7 | socket = /tmp/mysqld.sock 8 | log_error = /var/log/mysql/error.log 9 | timezone = Europe/Moscow 10 | log_timestamps = SYSTEM 11 | open_files_limit = 65535 12 | 13 | [mysqldump] 14 | quick 15 | quote-names 16 | max_allowed_packet = 16M 17 | 18 | [mysqld] 19 | plugin_load_add = 'rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so' 20 | user = mysql 21 | port = 3306 22 | pid_file = /tmp/mysqld.pid 23 | socket = /tmp/mysqld.sock 24 | log_error = /var/log/mysql/error.log 25 | log_error_verbosity = 3 26 | general_log = ON 27 | general_log_file = /var/log/mysql/query.log 28 | basedir = /usr 29 | datadir = /var/lib/mysql 30 | tmpdir = /tmp 31 | lc_messages_dir = /usr/share/mysql 32 | max_connect_errors = 1000000 33 | local_infile = OFF 34 | slave_net_timeout = 30 35 | 36 | event_scheduler = ON 37 | skip_external_locking = ON 38 | default_storage_engine = InnoDB 39 | disabled_storage_engines = "MyISAM,MRG_MyISAM,BLACKHOLE,FEDERATED,ARCHIVE,CSV" 40 | explicit_defaults_for_timestamp = ON 41 | log_timestamps = SYSTEM 42 | max_allowed_packet = 16M 43 | thread_stack = 192K 44 | # query cache is deprecated and will be removed in 8.0 45 | query_cache_size = 0 46 | 47 | max_connections = 200 48 | thread_handling = one-thread-per-connection 49 | thread_cache_size = 5 50 | 51 | # Slow log 52 | max_slowlog_size = 1G 53 | 54 | # InnoDB settings 55 | innodb_buffer_pool_size = 32M 56 | innodb_file_per_table = ON 57 | innodb_log_file_size = 8M 58 | innodb_autoinc_lock_mode = 2 59 | innodb_use_native_aio = OFF 60 | #innodb_flush_method = O_DIRECT 61 | innodb_flush_log_at_trx_commit = 1 62 | innodb_use_global_flush_log_at_trx_commit = OFF 63 | 64 | # Replication 65 | # server_id = 2 # should set from env 66 | binlog_format = ROW 67 | gtid_mode = ON 68 | enforce_gtid_consistency = ON 69 | log_slave_updates = ON 70 | max_binlog_size = 100M 71 | expire_logs_days = 3 72 | master_info_repository = TABLE 73 | relay_log_info_repository = TABLE 74 | relay_log_recovery = ON 75 | log_bin = mysql-bin-log 76 | relay_log = mysql-relay-log 77 | sync_binlog = 1 78 | rpl_semi_sync_master_timeout = 31536000000 79 | rpl_semi_sync_master_wait_for_slave_count = 1 80 | rpl_semi_sync_master_wait_no_slave = ON 81 | rpl_semi_sync_master_wait_point = AFTER_SYNC 82 | 83 | # we should start in 84 | # * read-only mode to avoid split brain after restart 85 | # * offline_mode to avoid stale reads after restart 86 | # mysync will bring us in writable mode 87 | read_only = ON 88 | super_read_only = ON 89 | offline_mode = ON 90 | -------------------------------------------------------------------------------- /tests/images/mysql/my.cnf.8.0: -------------------------------------------------------------------------------- 1 | [client] 2 | port = 3306 3 | socket = /tmp/mysqld.sock 4 | 5 | [mysqld_safe] 6 | nice = 0 7 | socket = /tmp/mysqld.sock 8 | log_error = /var/log/mysql/error.log 9 | timezone = Europe/Moscow 10 | log_timestamps = SYSTEM 11 | open_files_limit = 65535 12 | 13 | [mysqldump] 14 | quick 15 | quote-names 16 | max_allowed_packet = 16M 17 | 18 | [mysqld] 19 | plugin_load_add = 'rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so' 20 | default_authentication_plugin=mysql_native_password # no ssl in docker 21 | user = mysql 22 | port = 3306 23 | pid_file = /tmp/mysqld.pid 24 | socket = /tmp/mysqld.sock 25 | log_error = /var/log/mysql/error.log 26 | log_error_verbosity = 3 27 | general_log = ON 28 | general_log_file = /var/log/mysql/query.log 29 | basedir = /usr 30 | datadir = /var/lib/mysql 31 | tmpdir = /tmp 32 | lc_messages_dir = /usr/share/mysql 33 | max_connect_errors = 1000000 34 | local_infile = OFF 35 | slave_net_timeout = 30 36 | 37 | event_scheduler = ON 38 | skip_external_locking = ON 39 | default_storage_engine = InnoDB 40 | disabled_storage_engines = "MyISAM,MRG_MyISAM,BLACKHOLE,FEDERATED,ARCHIVE,CSV" 41 | explicit_defaults_for_timestamp = ON 42 | log_timestamps = SYSTEM 43 | max_allowed_packet = 16M 44 | thread_stack = 192K 45 | 46 | max_connections = 200 47 | thread_handling = one-thread-per-connection 48 | thread_cache_size = 5 49 | 50 | # Slow log 51 | max_slowlog_size = 1G 52 | 53 | # InnoDB settings 54 | innodb_buffer_pool_size = 32M 55 | innodb_file_per_table = ON 56 | innodb_log_file_size = 8M 57 | innodb_autoinc_lock_mode = 2 58 | innodb_use_native_aio = OFF 59 | #innodb_flush_method = O_DIRECT 60 | innodb_flush_log_at_trx_commit = 1 61 | 62 | 63 | # Replication 64 | # server_id = 2 # should set from env 65 | binlog_format = ROW 66 | gtid_mode = ON 67 | enforce_gtid_consistency = ON 68 | log_slave_updates = ON 69 | max_binlog_size = 100M 70 | expire_logs_days = 3 71 | master_info_repository = TABLE 72 | relay_log_info_repository = TABLE 73 | relay_log_recovery = ON 74 | log_bin = mysql-bin-log 75 | relay_log = mysql-relay-log 76 | sync_binlog = 1 77 | 78 | # we should start in 79 | # * read-only mode to avoid split brain after restart 80 | # * offline_mode to avoid stale reads after restart 81 | # mysync will bring us in writable mode 82 | read_only = ON 83 | super_read_only = ON 84 | offline_mode = ON 85 | -------------------------------------------------------------------------------- /tests/images/mysql/my.cnf.8.4: -------------------------------------------------------------------------------- 1 | [client] 2 | port = 3306 3 | socket = /tmp/mysqld.sock 4 | 5 | [mysqld_safe] 6 | nice = 0 7 | socket = /tmp/mysqld.sock 8 | log_error = /var/log/mysql/error.log 9 | timezone = Europe/Moscow 10 | log_timestamps = SYSTEM 11 | open_files_limit = 65535 12 | 13 | [mysqldump] 14 | quick 15 | quote-names 16 | max_allowed_packet = 16M 17 | 18 | [mysqld] 19 | mysql_native_password = ON 20 | plugin_load_add = 'rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so' 21 | user = mysql 22 | port = 3306 23 | pid_file = /tmp/mysqld.pid 24 | socket = /tmp/mysqld.sock 25 | log_error = /var/log/mysql/error.log 26 | log_error_verbosity = 3 27 | general_log = ON 28 | general_log_file = /var/log/mysql/query.log 29 | basedir = /usr 30 | datadir = /var/lib/mysql 31 | tmpdir = /tmp 32 | lc_messages_dir = /usr/share/mysql 33 | max_connect_errors = 1000000 34 | local_infile = OFF 35 | replica_net_timeout = 30 36 | 37 | event_scheduler = ON 38 | skip_external_locking = ON 39 | default_storage_engine = InnoDB 40 | disabled_storage_engines = "MyISAM,MRG_MyISAM,BLACKHOLE,FEDERATED,ARCHIVE,CSV" 41 | explicit_defaults_for_timestamp = ON 42 | log_timestamps = SYSTEM 43 | max_allowed_packet = 16M 44 | thread_stack = 192K 45 | 46 | max_connections = 200 47 | thread_handling = one-thread-per-connection 48 | thread_cache_size = 5 49 | 50 | # Slow log 51 | max_slowlog_size = 1G 52 | 53 | # InnoDB settings 54 | innodb_buffer_pool_size = 32M 55 | innodb_file_per_table = ON 56 | innodb_log_file_size = 8M 57 | innodb_autoinc_lock_mode = 2 58 | innodb_use_native_aio = OFF 59 | #innodb_flush_method = O_DIRECT 60 | innodb_flush_log_at_trx_commit = 1 61 | 62 | 63 | # Replication 64 | # server_id = 2 # should set from env 65 | binlog_format = ROW 66 | gtid_mode = ON 67 | enforce_gtid_consistency = ON 68 | log_replica_updates = ON 69 | max_binlog_size = 100M 70 | binlog_expire_logs_seconds = 1209600 71 | relay_log_recovery = ON 72 | log_bin = mysql-bin-log 73 | relay_log = mysql-relay-log 74 | sync_binlog = 1 75 | 76 | # we should start in 77 | # * read-only mode to avoid split brain after restart 78 | # * offline_mode to avoid stale reads after restart 79 | # mysync will bring us in writable mode 80 | read_only = ON 81 | super_read_only = ON 82 | offline_mode = ON 83 | -------------------------------------------------------------------------------- /tests/images/mysql/mysync.yaml: -------------------------------------------------------------------------------- 1 | log: /dev/stderr 2 | loglevel: Debug 3 | db_timeout: 5s 4 | db_lost_check_timeout: 1s 5 | tick_interval: 2s 6 | healthcheck_interval: 5s 7 | dcs_wait_timeout: 10s 8 | failover: ${MYSYNC_FAILOVER:-false} 9 | failover_cooldown: ${MYSYNC_FAILOVER_COOLDOWN:-60m} 10 | failover_delay: ${MYSYNC_FAILOVER_DELAY:-0s} 11 | inactivation_delay: ${MYSYNC_INACTIVATION_DELAY:-5s} 12 | semi_sync: ${MYSYNC_SEMISYNC:-true} 13 | async: ${MYSYNC_ASYNC:-false} 14 | async_allowed_lag: ${ASYNC_ALLOWED_LAG:-0s} 15 | resetupfile: /tmp/mysync.resetup 16 | resetup_crashed_hosts: ${MYSYNC_RESETUP_CRASHED_HOSTS:-false} 17 | zookeeper: 18 | session_timeout: 3s 19 | namespace: /test 20 | hosts: [ $ZK_SERVERS ] 21 | auth: true 22 | username: testuser 23 | password: testpassword123 24 | use_ssl: true 25 | keyfile: /etc/zk-ssl/server.key 26 | certfile: /etc/zk-ssl/server.crt 27 | ca_cert: /etc/zk-ssl/ca.cert.pem 28 | verify_certs: true 29 | mysql: 30 | user: $MYSQL_ADMIN_USER 31 | password: $MYSQL_ADMIN_PASSWORD 32 | replication_user: repl 33 | replication_password: repl_pwd 34 | port: $MYSQL_PORT 35 | external_replication_ssl_ca: /etc/mysql/ssl/external_CA.pem 36 | pid_file: /tmp/mysqld.pid 37 | error_log: /var/log/mysql/error.log 38 | queries: 39 | replication_lag: $MYSYNC_REPLICATION_LAG_QUERY 40 | disable_semi_sync_replication_on_maintenance: ${MYSYNC_DISABLE_REPLICATION_ON_MAINT:-false} 41 | rpl_semi_sync_master_wait_for_slave_count: ${MYSYNC_WAIT_FOR_SLAVE_COUNT:-1} 42 | critical_disk_usage: ${MYSYNC_CRITICAL_DISK_USAGE:-100} 43 | keep_super_writable_on_critical_disk_usage: ${MYSYNC_KEEP_SUPER_WRITABLE_ON_CRITICAL_DISK_USAGE:-false} 44 | test_disk_usage_file: /tmp/usedspace 45 | db_set_ro_force_timeout: 40s 46 | db_set_ro_timeout: ${MYSYNC_SET_RO_TIMEOUT:-30s} 47 | offline_mode_enable_lag: ${OFFLINE_MODE_ENABLE_LAG:-10s} 48 | offline_mode_disable_lag: 5s 49 | priority_choice_max_lag: ${MYSYNC_PRIORITY_CHOICE_LAG:-60s} 50 | disable_set_readonly_on_lost: false 51 | stream_from_reasonable_lag: ${MYSYNC_STREAM_FROM_REASONABLE_LAG:-5m} 52 | exclude_users: 53 | - 'repl' 54 | - 'something' 55 | - 'admin' 56 | - 'monitor' 57 | - 'event_scheduler' 58 | replication_repair_cooldown: 10s 59 | replication_repair_aggressive_mode: ${MYSYNC_REPLICATION_REPAIR_AGGRESSIVE_MODE:-false} 60 | test_filesystem_readonly_file: /tmp/readonly 61 | replication_channel: '' 62 | external_replication_type: 'external' 63 | show_only_gtid_diff: false 64 | repl_mon: ${REPL_MON:-false} 65 | force_switchover: ${FORCE_SWITCHOVER:-false} 66 | manager_switchover: ${MANAGER_SWITCHOVER:-true} 67 | manager_election_delay_after_quorum_loss: ${MANAGER_ELECTION_DELAY_AFTER_QUORUM_LOSS:-15s} 68 | manager_lock_acquire_delay_after_quorum_loss: ${MANAGER_LOCK_ACQUIRE_DELAY_AFTER_QUORUM_LOSS:-30s} 69 | optimize_replication_lag_threshold: 60s 70 | optimize_replication_convergence_timeout: 300s 71 | -------------------------------------------------------------------------------- /tests/images/mysql/setup.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | chown mysql:root /etc/mysql 4 | touch /etc/mysync.yaml 5 | chown mysql:mysql /etc/mysync.yaml 6 | if [[ "$VERSION" == "8.0" ]]; then 7 | mkdir /etc/mysql/ssl 8 | chown mysql:mysql /etc/mysql/ssl 9 | cp /var/lib/dist/mysql/my.cnf.8.0 /etc/mysql/my.cnf 10 | cp /var/lib/dist/mysql/my.cnf.8.0 /etc/mysql/init.cnf 11 | cat <> /etc/mysql/my.cnf 12 | rpl_semi_sync_master_timeout = 31536000000 13 | rpl_semi_sync_master_wait_for_slave_count = 1 14 | rpl_semi_sync_master_wait_no_slave = ON 15 | rpl_semi_sync_master_wait_point = AFTER_SYNC 16 | EOF 17 | elif [[ "$VERSION" == "8.4" ]]; then 18 | mkdir /etc/mysql/ssl 19 | chown mysql:mysql /etc/mysql/ssl 20 | cp /var/lib/dist/mysql/my.cnf.8.4 /etc/mysql/my.cnf 21 | cp /var/lib/dist/mysql/my.cnf.8.4 /etc/mysql/init.cnf 22 | cat <> /etc/mysql/my.cnf 23 | rpl_semi_sync_master_timeout = 31536000000 24 | rpl_semi_sync_master_wait_for_slave_count = 1 25 | rpl_semi_sync_master_wait_no_slave = ON 26 | rpl_semi_sync_master_wait_point = AFTER_SYNC 27 | EOF 28 | else 29 | cp /var/lib/dist/mysql/my.cnf /etc/mysql/my.cnf 30 | cp /var/lib/dist/mysql/my.cnf /etc/mysql/init.cnf 31 | fi 32 | 33 | cp /var/lib/dist/mysql/.my.cnf /root/.my.cnf 34 | if [[ "$VERSION" == "8.4" ]]; then 35 | cp /var/lib/dist/mysql/supervisor_mysql.conf.8.4 /etc/supervisor/conf.d/supervisor_mysql.conf 36 | else 37 | cp /var/lib/dist/mysql/supervisor_mysql.conf /etc/supervisor/conf.d/supervisor_mysql.conf 38 | fi -------------------------------------------------------------------------------- /tests/images/mysql/start_mysql.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | cat < /etc/mysql/init.sql 7 | SET GLOBAL super_read_only = 0; 8 | CREATE USER $MYSQL_ADMIN_USER@'%' IDENTIFIED BY '$MYSQL_ADMIN_PASSWORD'; 9 | GRANT ALL ON *.* TO $MYSQL_ADMIN_USER@'%' WITH GRANT OPTION; 10 | CREATE USER repl@'%' IDENTIFIED BY 'repl_pwd'; 11 | CREATE USER user@'%' IDENTIFIED BY 'user_pwd'; 12 | GRANT ALL ON *.* TO user@'%'; 13 | GRANT REPLICATION SLAVE ON *.* TO repl@'%'; 14 | CREATE DATABASE test1; 15 | RESET MASTER; 16 | SET GLOBAL super_read_only = 1; 17 | EOF 18 | 19 | if [ ! -f /etc/mysql/slave.sql ]; then 20 | if [ ! -z "$MYSQL_MASTER" ]; then 21 | cat < /etc/mysql/slave.sql 22 | SET GLOBAL server_id = $MYSQL_SERVER_ID; 23 | RESET SLAVE FOR CHANNEL ''; 24 | CHANGE MASTER TO MASTER_HOST = '$MYSQL_MASTER', MASTER_USER = 'repl', MASTER_PASSWORD = 'repl_pwd', MASTER_AUTO_POSITION = 1, MASTER_CONNECT_RETRY = 1, MASTER_RETRY_COUNT = 100500 FOR CHANNEL ''; 25 | START SLAVE; 26 | EOF 27 | else 28 | touch /etc/mysql/slave.sql 29 | fi 30 | else 31 | echo "" > /etc/mysql/slave.sql 32 | fi 33 | 34 | if [ ! -f /var/lib/mysql/auto.cnf ]; then 35 | /usr/sbin/mysqld --defaults-file=/etc/mysql/init.cnf \ 36 | --initialize --datadir=/var/lib/mysql --init-file=/etc/mysql/init.sql --server-id=$MYSQL_SERVER_ID || true 37 | echo "==INITIALIZED==" 38 | fi 39 | 40 | # workaround for docker on mac 41 | chown -R mysql:mysql /var/lib/mysql 42 | find /var/lib/mysql -type f -exec touch {} + 43 | 44 | echo "==STARTING==" 45 | exec /usr/sbin/mysqld --defaults-file=/etc/mysql/my.cnf --datadir=/var/lib/mysql --init-file=/etc/mysql/slave.sql --server-id=$MYSQL_SERVER_ID --report-host=`hostname` 46 | -------------------------------------------------------------------------------- /tests/images/mysql/start_mysql_84.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | cat < /etc/mysql/init.sql 7 | SET GLOBAL super_read_only = 0; 8 | CREATE USER $MYSQL_ADMIN_USER@'%' IDENTIFIED WITH mysql_native_password BY '$MYSQL_ADMIN_PASSWORD'; 9 | GRANT ALL ON *.* TO $MYSQL_ADMIN_USER@'%' WITH GRANT OPTION; 10 | CREATE USER repl@'%' IDENTIFIED WITH mysql_native_password BY 'repl_pwd'; 11 | CREATE USER user@'%' IDENTIFIED WITH mysql_native_password BY 'user_pwd'; 12 | GRANT ALL ON *.* TO user@'%'; 13 | GRANT REPLICATION SLAVE ON *.* TO repl@'%'; 14 | CREATE DATABASE test1; 15 | RESET BINARY LOGS AND GTIDS; 16 | SET GLOBAL super_read_only = 1; 17 | EOF 18 | 19 | if [ ! -f /etc/mysql/slave.sql ]; then 20 | if [ ! -z "$MYSQL_MASTER" ]; then 21 | cat < /etc/mysql/slave.sql 22 | SET GLOBAL server_id = $MYSQL_SERVER_ID; 23 | RESET REPLICA FOR CHANNEL ''; 24 | CHANGE REPLICATION SOURCE TO SOURCE_HOST = '$MYSQL_MASTER', SOURCE_USER = 'repl', SOURCE_PASSWORD = 'repl_pwd', SOURCE_AUTO_POSITION = 1, SOURCE_CONNECT_RETRY = 1, SOURCE_RETRY_COUNT = 100500 FOR CHANNEL ''; 25 | START REPLICA; 26 | EOF 27 | else 28 | touch /etc/mysql/slave.sql 29 | fi 30 | else 31 | echo "" > /etc/mysql/slave.sql 32 | fi 33 | 34 | if [ ! -f /var/lib/mysql/auto.cnf ]; then 35 | /usr/sbin/mysqld --defaults-file=/etc/mysql/init.cnf \ 36 | --initialize --datadir=/var/lib/mysql --init-file=/etc/mysql/init.sql --server-id=$MYSQL_SERVER_ID || true 37 | echo "==INITIALIZED==" 38 | fi 39 | 40 | # workaround for docker on mac 41 | chown -R mysql:mysql /var/lib/mysql 42 | find /var/lib/mysql -type f -exec touch {} + 43 | 44 | echo "==STARTING==" 45 | exec /usr/sbin/mysqld --defaults-file=/etc/mysql/my.cnf --datadir=/var/lib/mysql --init-file=/etc/mysql/slave.sql --server-id=$MYSQL_SERVER_ID --report-host=`hostname` 46 | -------------------------------------------------------------------------------- /tests/images/mysql/start_mysync.sh: -------------------------------------------------------------------------------- 1 | mkdir -p /var/run/mysync 2 | 3 | eval "cat </dev/null >/etc/mysync.yaml 7 | 8 | if [ ! -f /tmp/usedspace ]; then 9 | echo 10 > /tmp/usedspace 10 | fi 11 | if [ ! -f /tmp/readonly ]; then 12 | echo "false" > /tmp/readonly 13 | fi 14 | exec /usr/bin/mysync --loglevel=Debug 15 | -------------------------------------------------------------------------------- /tests/images/mysql/supervisor_mysql.conf: -------------------------------------------------------------------------------- 1 | [program:mysync] 2 | command=bash /var/lib/dist/mysql/start_mysync.sh 3 | process_name=%(program_name)s 4 | autostart=true 5 | startretries=100000 6 | autorestart=true 7 | stopsignal=TERM 8 | user=mysql 9 | priority=15 10 | stdout_logfile=/var/log/mysync.log 11 | stdout_logfile_maxbytes=0 12 | redirect_stderr=true 13 | 14 | [program:mysqld] 15 | command=bash /var/lib/dist/mysql/start_mysql.sh 16 | process_name=%(program_name)s 17 | autostart=true 18 | autorestart=false 19 | stopsignal=TERM 20 | user=mysql 21 | group=mysql 22 | priority=5 23 | stdout_logfile=/var/log/mysql/error.log 24 | stdout_logfile_maxbytes=0 25 | redirect_stderr=true 26 | -------------------------------------------------------------------------------- /tests/images/mysql/supervisor_mysql.conf.8.4: -------------------------------------------------------------------------------- 1 | [program:mysync] 2 | command=bash /var/lib/dist/mysql/start_mysync.sh 3 | process_name=%(program_name)s 4 | autostart=true 5 | startretries=100000 6 | autorestart=true 7 | stopsignal=TERM 8 | user=mysql 9 | priority=15 10 | stdout_logfile=/var/log/mysync.log 11 | stdout_logfile_maxbytes=0 12 | redirect_stderr=true 13 | 14 | [program:mysqld] 15 | command=bash /var/lib/dist/mysql/start_mysql_84.sh 16 | process_name=%(program_name)s 17 | autostart=true 18 | autorestart=false 19 | stopsignal=TERM 20 | user=mysql 21 | group=mysql 22 | priority=5 23 | stdout_logfile=/var/log/mysql/error.log 24 | stdout_logfile_maxbytes=0 25 | redirect_stderr=true 26 | -------------------------------------------------------------------------------- /tests/images/mysql_jepsen/.my.cnf: -------------------------------------------------------------------------------- 1 | [client] 2 | user=admin 3 | password=admin_pwd 4 | -------------------------------------------------------------------------------- /tests/images/mysql_jepsen/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mysync-jepsen-test-base:latest 2 | COPY . /var/lib/dist/mysql 3 | COPY ./sh-scripts/my-wait-started.sh /usr/bin/my-wait-started 4 | COPY ./mysync /usr/bin/mysync 5 | COPY ./sh-scripts/my-resetup.sh /usr/bin/my-resetup 6 | COPY ./sh-scripts/my-resetup-wd.sh /usr/bin/my-resetup-wd 7 | COPY ./setup.sh /usr/local/bin/setup.sh 8 | COPY ./my.cnf /etc/mysql/my.cnf 9 | COPY ./.my.cnf /root/.my.cnf 10 | COPY ./supervisor_mysql.conf /etc/supervisor/conf.d 11 | RUN chown mysql:root /etc/mysql 12 | RUN touch /etc/mysync.yaml 13 | RUN chown mysql:mysql /etc/mysync.yaml 14 | 15 | RUN chmod +x /usr/bin/my-resetup 16 | 17 | RUN echo 'APT::Install-Recommends "0"; \n\ 18 | APT::Get::Assume-Yes "true"; \n\ 19 | APT::Get::force-yes "true"; \n\ 20 | APT::Install-Suggests "0";' > /etc/apt/apt.conf.d/01buildconfig && \ 21 | apt-get update && apt-get install wget gnupg ca-certificates && \ 22 | apt-get update && \ 23 | apt-get install 24 | 25 | ENV MYSQL_ADMIN_USER admin 26 | ENV MYSQL_ADMIN_PASSWORD admin_pwd 27 | -------------------------------------------------------------------------------- /tests/images/mysql_jepsen/my.cnf: -------------------------------------------------------------------------------- 1 | [client] 2 | port = 3306 3 | socket = /tmp/mysqld.sock 4 | 5 | [mysqld_safe] 6 | nice = 0 7 | socket = /tmp/mysqld.sock 8 | log_error = /var/log/mysql/error.log 9 | timezone = Europe/Moscow 10 | log_timestamps = SYSTEM 11 | open_files_limit = 65535 12 | 13 | [mysqld] 14 | plugin_load_add = 'rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so' 15 | user = mysql 16 | port = 3306 17 | pid_file = /tmp/mysqld.pid 18 | socket = /tmp/mysqld.sock 19 | log_error = /var/log/mysql/error.log 20 | log_error_verbosity = 3 21 | general_log = ON 22 | general_log_file = /var/log/mysql/query.log 23 | basedir = /usr 24 | datadir = /var/lib/mysql 25 | tmpdir = /tmp 26 | lc_messages_dir = /usr/share/mysql 27 | max-connect-errors = 1000000 28 | local_infile = OFF 29 | 30 | 31 | event_scheduler = ON 32 | skip_external_locking = ON 33 | default_storage_engine = InnoDB 34 | disabled_storage_engines = "MyISAM,MRG_MyISAM,BLACKHOLE,FEDERATED,ARCHIVE,CSV" 35 | explicit_defaults_for_timestamp = ON 36 | log_timestamps = SYSTEM 37 | max_allowed_packet = 16M 38 | thread_stack = 192K 39 | # query cache is deprecated and will be removed in 8.0 40 | query_cache_size = 0 41 | 42 | max_connections = 200 43 | thread_handling = one-thread-per-connection 44 | thread_cache_size = 5 45 | 46 | # Slow log 47 | max_slowlog_size = 1G 48 | 49 | # InnoDB settings 50 | innodb_buffer_pool_size = 32M 51 | innodb_file_per_table = ON 52 | innodb_log_file_size = 8M 53 | innodb_autoinc_lock_mode = 2 54 | innodb_use_native_aio = OFF 55 | #innodb_flush_method = O_DIRECT 56 | innodb_flush_log_at_trx_commit = 1 57 | innodb_use_global_flush_log_at_trx_commit = OFF 58 | 59 | 60 | # Replication 61 | # server_id = 2 # should set from env 62 | binlog_format = ROW 63 | gtid_mode = ON 64 | enforce_gtid_consistency = ON 65 | log_slave_updates = ON 66 | max_binlog_size = 100M 67 | expire_logs_days = 3 68 | master_info_repository = TABLE 69 | relay_log_info_repository = TABLE 70 | relay_log_recovery = ON 71 | log_bin = mysql-bin-log 72 | relay_log = mysql-relay-log 73 | sync_binlog = 1 74 | rpl_semi_sync_master_timeout = 31536000000 75 | rpl_semi_sync_master_wait_for_slave_count = 1 76 | rpl_semi_sync_master_wait_no_slave = ON 77 | rpl_semi_sync_master_wait_point = AFTER_SYNC 78 | 79 | # we should start in read-only mode to avoid split brain after restart 80 | # mysync will bring us in writable mode 81 | read_only = ON 82 | super_read_only = ON 83 | offline_mode = ON 84 | 85 | [mysqldump] 86 | quick 87 | quote-names 88 | max_allowed_packet = 16M 89 | -------------------------------------------------------------------------------- /tests/images/mysql_jepsen/mysync.yaml: -------------------------------------------------------------------------------- 1 | log: /dev/stderr 2 | loglevel: Debug 3 | db_timeout: 5s 4 | db_lost_check_timeout: 1s 5 | tick_interval: 2s 6 | healthcheck_interval: 5s 7 | dcs_wait_timeout: 10s 8 | failover: ${MYSYNC_FAILOVER:-false} 9 | failover_cooldown: ${MYSYNC_FAILOVER_COOLDOWN:-60m} 10 | failover_delay: ${MYSYNC_FAILOVER_DELAY:-0s} 11 | semi_sync: ${MYSYNC_SEMISYNC:-true} 12 | resetupfile: /tmp/mysync.resetup 13 | zookeeper: 14 | session_timeout: 3s 15 | namespace: /test 16 | hosts: [ $ZK_SERVERS ] 17 | auth: true 18 | username: testuser 19 | password: testpassword123 20 | use_ssl: true 21 | keyfile: /etc/zk-ssl/server.key 22 | certfile: /etc/zk-ssl/server.crt 23 | ca_cert: /etc/zk-ssl/ca.cert.pem 24 | verify_certs: true 25 | mysql: 26 | user: $MYSQL_ADMIN_USER 27 | password: $MYSQL_ADMIN_PASSWORD 28 | replication_user: repl 29 | replication_password: repl_pwd 30 | port: $MYSQL_PORT 31 | pid_file: /tmp/mysqld.pid 32 | disable_semi_sync_replication_on_maintenance: ${MYSYNC_DISABLE_REPLICATION_ON_MAINT:-false} 33 | replication_channel: '' 34 | test_filesystem_readonly_file: /tmp/readonly 35 | test_disk_usage_file: /tmp/usedspace 36 | show_only_gtid_diff: true 37 | force_switchover: true 38 | -------------------------------------------------------------------------------- /tests/images/mysql_jepsen/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ex 4 | 5 | retry_mysql_query() { 6 | tries=0 7 | ret=1 8 | while [ ${tries} -le 60 ] 9 | do 10 | if (echo "${1}" | mysql test1) 11 | then 12 | ret=0 13 | break 14 | else 15 | tries=$(( tries + 1 )) 16 | sleep 1 17 | fi 18 | done 19 | return ${ret} 20 | } 21 | 22 | if ! retry_mysql_query "CREATE TABLE IF NOT EXISTS test1.test_set(value int) ENGINE=INNODB;" 23 | then 24 | exit 1 25 | fi 26 | -------------------------------------------------------------------------------- /tests/images/mysql_jepsen/sh-scripts/my-resetup-wd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -xe 4 | 5 | while true 6 | do 7 | echo "==============" 8 | date 9 | flock -n /tmp/resetup.lock /usr/bin/my-resetup || true 10 | sleep 10 11 | done 12 | -------------------------------------------------------------------------------- /tests/images/mysql_jepsen/sh-scripts/my-resetup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -xe 4 | 5 | RESETUP_FILE=/tmp/mysync.resetup 6 | MYSQL_DATA_DIR=/var/lib/mysql 7 | PATH_MASTER=/test/master 8 | 9 | function zk_get() { 10 | echo "addauth digest testuser:testpassword123 11 | get /test/master" > /tmp/zk_commands 12 | 13 | cat /tmp/zk_commands | /opt/zookeeper/bin/zkCli.sh -server "mysync_zookeeper1_1.mysync_mysql_net:2181,mysync_zookeeper2_1.mysync_mysql_net:2181,mysync_zookeeper3_1.mysync_mysql_net:2181" | grep -o '"mysync_mysql[1-3]_1"' | grep -o '[a-z0-9_]*' 14 | } 15 | 16 | 17 | function mysql_set_gtid_purged() { 18 | gtids=$(tr -d '\n' < /var/lib/mysql/xtrabackup_binlog_info | awk '{print $3}') 19 | mysql -e "RESET MASTER; SET @@GLOBAL.GTID_PURGED='$gtids';" 20 | } 21 | 22 | 23 | function do_resetup() { 24 | # shutdown mysql 25 | if ! supervisorctl stop mysqld; then 26 | echo `date` failed to stop mysql 27 | return 1 28 | fi 29 | # cleanup datadir 30 | if ! find /var/lib/mysql/ -name "*" | egrep -v -e "auto\.cnf" -e "^/var/lib/mysql/$" | xargs rm -fr; then 31 | echo `date` failed to cleanup mysql dir 32 | fi 33 | 34 | echo `date` cleared up mysql dir 35 | 36 | master=$(zk_get $PATH_MASTER) 37 | echo `date` current master from zk: $master 38 | # fetch backup 39 | 40 | echo `date` fetching backup 41 | ssh root@"$master" "xtrabackup --backup --stream=xbstream" | sudo -u mysql xbstream --extract --directory="$MYSQL_DATA_DIR" 42 | chown -R mysql:mysql "$MYSQL_DATA_DIR" 43 | echo `date` preparing backup 44 | sudo -u mysql xtrabackup --prepare --target-dir="$MYSQL_DATA_DIR" 45 | 46 | echo `date` starting mysql 47 | supervisorctl start mysqld 48 | my-wait-started 49 | 50 | echo `date` setting gtid_purged 51 | mysql_set_gtid_purged 52 | 53 | echo `date` done 54 | } 55 | 56 | 57 | if [ -f $RESETUP_FILE ]; then 58 | do_resetup 59 | rm -fr $RESETUP_FILE 60 | else 61 | echo `date` no resetup file found, delaying 62 | fi 63 | 64 | -------------------------------------------------------------------------------- /tests/images/mysql_jepsen/sh-scripts/my-wait-started.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | WAIT_TIME=120 4 | 5 | for i in `seq 1 $WAIT_TIME` 6 | do 7 | if mysql -e 'select 1'; then 8 | exit 0 9 | fi 10 | sleep 1 11 | done 12 | 13 | exit 1 14 | -------------------------------------------------------------------------------- /tests/images/mysql_jepsen/start_mysql.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | date 6 | 7 | cat < /etc/mysql/init.sql 8 | SET GLOBAL super_read_only = 0; 9 | CREATE USER $MYSQL_ADMIN_USER@'%' IDENTIFIED BY '$MYSQL_ADMIN_PASSWORD'; 10 | GRANT ALL ON *.* TO $MYSQL_ADMIN_USER@'%' WITH GRANT OPTION; 11 | CREATE USER repl@'%' IDENTIFIED BY 'repl_pwd'; 12 | GRANT REPLICATION SLAVE ON *.* TO repl@'%'; 13 | CREATE DATABASE test1; 14 | CREATE USER 'client'@'%' IDENTIFIED BY 'client_pwd'; 15 | GRANT ALL ON test1.* TO 'client'@'%'; 16 | GRANT REPLICATION CLIENT ON *.* TO 'client'@'%'; 17 | RESET MASTER; 18 | SET GLOBAL super_read_only = 1; 19 | EOF 20 | 21 | if [ ! -z "$MYSQL_MASTER" ]; then 22 | cat < /etc/mysql/slave.sql 23 | SET GLOBAL server_id = $MYSQL_SERVER_ID; 24 | RESET SLAVE FOR CHANNEL ''; 25 | CHANGE MASTER TO MASTER_HOST = '$MYSQL_MASTER', MASTER_USER = 'repl', MASTER_PASSWORD = 'repl_pwd', MASTER_AUTO_POSITION = 1, MASTER_CONNECT_RETRY = 1, MASTER_RETRY_COUNT = 100500 FOR CHANNEL ''; 26 | START SLAVE; 27 | EOF 28 | else 29 | touch /etc/mysql/slave.sql 30 | fi 31 | 32 | if [ ! -f /var/lib/mysql/auto.cnf ]; then 33 | /usr/sbin/mysqld --initialize --datadir=/var/lib/mysql --init-file=/etc/mysql/init.sql --server-id=$MYSQL_SERVER_ID 34 | echo "==INITIALIZED==" 35 | else 36 | # clean slave script for restarts 37 | echo "" > /etc/mysql/slave.sql 38 | fi 39 | 40 | # workaround for docker on mac 41 | chown -R mysql:mysql /var/lib/mysql 42 | find /var/lib/mysql -type f -exec touch {} + 43 | 44 | echo "==EMULATING SLOW START==" 45 | sleep 10 46 | 47 | echo "==STARTING==" 48 | exec /usr/sbin/mysqld --defaults-file=/etc/mysql/my.cnf --datadir=/var/lib/mysql --init-file=/etc/mysql/slave.sql --server-id=$MYSQL_SERVER_ID --report-host=`hostname` 49 | -------------------------------------------------------------------------------- /tests/images/mysql_jepsen/start_mysync.sh: -------------------------------------------------------------------------------- 1 | mkdir -p /var/run/mysync 2 | 3 | eval "cat </dev/null >/etc/mysync.yaml 7 | 8 | if [ ! -f /tmp/usedspace ]; then 9 | echo 10 > /tmp/usedspace 10 | fi 11 | if [ ! -f /tmp/readonly ]; then 12 | echo "false" > /tmp/readonly 13 | fi 14 | exec /usr/bin/mysync --loglevel=Debug 15 | -------------------------------------------------------------------------------- /tests/images/mysql_jepsen/supervisor_mysql.conf: -------------------------------------------------------------------------------- 1 | [program:mysync] 2 | command=bash /var/lib/dist/mysql/start_mysync.sh 3 | process_name=%(program_name)s 4 | autostart=true 5 | startretries=100000 6 | autorestart=true 7 | stopsignal=TERM 8 | user=mysql 9 | priority=15 10 | stdout_logfile=/var/log/mysync.log 11 | stdout_logfile_maxbytes=0 12 | redirect_stderr=true 13 | 14 | [program:mysqld] 15 | command=bash /var/lib/dist/mysql/start_mysql.sh 16 | process_name=%(program_name)s 17 | autostart=false 18 | autorestart=true 19 | stopsignal=TERM 20 | user=mysql 21 | group=mysql 22 | priority=5 23 | stdout_logfile=/var/log/mysql/error.log 24 | stdout_logfile_maxbytes=0 25 | redirect_stderr=true 26 | 27 | [program:resetup-wd] 28 | command=/usr/bin/my-resetup-wd 29 | stdout_logfile=/var/log/resetup.log 30 | stderr_logfile=/var/log/resetup.log 31 | autostart=true 32 | autorestart=true 33 | -------------------------------------------------------------------------------- /tests/images/zookeeper/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG VERSION="" 2 | FROM mysync-test-base${VERSION}:latest 3 | COPY . /var/lib/dist/zookeeper 4 | RUN bash /var/lib/dist/zookeeper/setup.sh 5 | -------------------------------------------------------------------------------- /tests/images/zookeeper/setup.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | cp /var/lib/dist/zookeeper/supervisor_zookeeper.conf /etc/supervisor/conf.d 4 | -------------------------------------------------------------------------------- /tests/images/zookeeper/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkdir -p /var/log/zookeeper 4 | mkdir -p /tmp/zookeeper 5 | 6 | cp /var/lib/dist/zookeeper/zoo.cfg /opt/zookeeper/conf/zoo.cfg 7 | 8 | echo $ZK_SERVERS | sed -E "s/, */\n/g" >> /opt/zookeeper/conf/zoo.cfg 9 | echo $ZK_MYID > /tmp/zookeeper/myid 10 | 11 | /var/lib/dist/base/generate_certs.sh 12 | /opt/zookeeper/bin/zkServer.sh start-foreground 13 | -------------------------------------------------------------------------------- /tests/images/zookeeper/supervisor_zookeeper.conf: -------------------------------------------------------------------------------- 1 | [program:zookeeper] 2 | command=bash /var/lib/dist/zookeeper/start.sh 3 | environment=ZOO_LOG_DIR=/var/log/zookeeper,ZOO_LOG4J_PROP='INFO,ROLLINGFILE' 4 | process_name=%(program_name)s 5 | autostart=true 6 | autorestart=true 7 | stopsignal=TERM 8 | priority=5 9 | -------------------------------------------------------------------------------- /tests/images/zookeeper/zoo.cfg: -------------------------------------------------------------------------------- 1 | # The number of milliseconds of each tick 2 | tickTime=1000 3 | # The number of ticks that the initial 4 | # synchronization phase can take 5 | initLimit=20 6 | # The number of ticks that can pass between 7 | # sending a request and getting an acknowledgement 8 | syncLimit=10 9 | # the directory where the snapshot is stored. 10 | # do not use /tmp for storage, /tmp here is just 11 | # example sakes. 12 | dataDir=/tmp/zookeeper 13 | # the port at which the clients will connect 14 | clientPort=2181 15 | # the maximum session timeout in milliseconds that the server 16 | # will allow the client to negotiate. 17 | maxSessionTimeout=60000 18 | snapCount=1000000 19 | forceSync=no 20 | # the timeout value for opening connections for leader election notifications. 21 | cnxTimeout=3000 22 | # the maximum number of client connections. 23 | # increase this if you need to handle more clients 24 | #maxClientCnxns=60 25 | # 26 | # Be sure to read the maintenance section of the 27 | # administrator guide before turning on autopurge. 28 | # 29 | # http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance 30 | # 31 | # The number of snapshots to retain in dataDir 32 | autopurge.snapRetainCount=3 33 | # Purge task interval in hours 34 | # Set to "0" to disable auto purge feature 35 | autopurge.purgeInterval=0 36 | leaderServes=yes 37 | quorumListenOnAllIPs=true 38 | jute.maxbuffer=16777216 39 | secureClientPort=2281 40 | serverCnxnFactory=org.apache.zookeeper.server.NettyServerCnxnFactory 41 | portUnification=false 42 | skipACL=no 43 | ssl.trustStore.password=testpassword123 44 | ssl.trustStore.location=/etc/zk-ssl/truststore.jks 45 | ssl.keyStore.password=testpassword321 46 | ssl.keyStore.location=/etc/zk-ssl/server.jks 47 | 48 | # servers section will be generated from env variablses 49 | 50 | 51 | -------------------------------------------------------------------------------- /tests/images/zookeeper_jepsen/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mysync-jepsen-test-base:latest 2 | COPY . /var/lib/dist/zookeeper 3 | COPY ./supervisor_zookeeper.conf /etc/supervisor/conf.d 4 | COPY ./retriable_path_create.sh /usr/local/bin/retriable_path_create.sh 5 | COPY ./generate_certs_with_restart.sh /usr/local/bin/generate_certs_with_restart.sh 6 | RUN chmod 755 /usr/local/bin/generate_certs_with_restart.sh 7 | -------------------------------------------------------------------------------- /tests/images/zookeeper_jepsen/generate_certs_with_restart.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ex 3 | 4 | supervisorctl stop zookeeper 5 | ps -aux | grep [z]oo.cfg | awk '{print $2}' | xargs kill || true 6 | /var/lib/dist/base/generate_certs.sh $1 7 | supervisorctl start zookeeper 8 | -------------------------------------------------------------------------------- /tests/images/zookeeper_jepsen/retriable_path_create.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ "$1" == "" ] 4 | then 5 | echo "Usage $(basename "${0}") " 6 | exit 1 7 | fi 8 | 9 | retry_create() { 10 | echo "addauth digest testuser:testpassword123 11 | create ${1} 12 | setAcl ${1} auth:testuser:testpassword123:crwad" > /tmp/zk_commands 13 | 14 | tries=0 15 | ret=1 16 | while [ ${tries} -le 60 ] 17 | do 18 | if cat /tmp/zk_commands | /opt/zookeeper/bin/zkCli.sh 19 | then 20 | ret=0 21 | break 22 | else 23 | tries=$(( tries + 1 )) 24 | sleep 1 25 | fi 26 | done 27 | return ${ret} 28 | } 29 | 30 | retry_create "${1}" 31 | -------------------------------------------------------------------------------- /tests/images/zookeeper_jepsen/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkdir -p /var/log/zookeeper 4 | mkdir -p /tmp/zookeeper 5 | 6 | cp /var/lib/dist/zookeeper/zoo.cfg /opt/zookeeper/conf/zoo.cfg 7 | 8 | echo $ZK_SERVERS | sed -E "s/, */\n/g" >> /opt/zookeeper/conf/zoo.cfg 9 | echo $ZK_MYID > /tmp/zookeeper/myid 10 | 11 | /opt/zookeeper/bin/zkServer.sh start-foreground 12 | -------------------------------------------------------------------------------- /tests/images/zookeeper_jepsen/supervisor_zookeeper.conf: -------------------------------------------------------------------------------- 1 | [program:zookeeper] 2 | command=bash /var/lib/dist/zookeeper/start.sh 3 | environment=ZOO_LOG_DIR=/var/log/zookeeper,ZOO_LOG4J_PROP='INFO,ROLLINGFILE' 4 | process_name=%(program_name)s 5 | autostart=true 6 | autorestart=true 7 | stopsignal=TERM 8 | priority=5 9 | -------------------------------------------------------------------------------- /tests/images/zookeeper_jepsen/zoo.cfg: -------------------------------------------------------------------------------- 1 | # The number of milliseconds of each tick 2 | tickTime=1000 3 | # The number of ticks that the initial 4 | # synchronization phase can take 5 | initLimit=20 6 | # The number of ticks that can pass between 7 | # sending a request and getting an acknowledgement 8 | syncLimit=10 9 | # the directory where the snapshot is stored. 10 | # do not use /tmp for storage, /tmp here is just 11 | # example sakes. 12 | dataDir=/tmp/zookeeper 13 | # the port at which the clients will connect 14 | clientPort=2181 15 | # the maximum session timeout in milliseconds that the server 16 | # will allow the client to negotiate. 17 | maxSessionTimeout=60000 18 | snapCount=1000000 19 | forceSync=no 20 | # the timeout value for opening connections for leader election notifications. 21 | cnxTimeout=3000 22 | # the maximum number of client connections. 23 | # increase this if you need to handle more clients 24 | #maxClientCnxns=60 25 | # 26 | # Be sure to read the maintenance section of the 27 | # administrator guide before turning on autopurge. 28 | # 29 | # http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance 30 | # 31 | # The number of snapshots to retain in dataDir 32 | autopurge.snapRetainCount=3 33 | # Purge task interval in hours 34 | # Set to "0" to disable auto purge feature 35 | autopurge.purgeInterval=0 36 | leaderServes=yes 37 | quorumListenOnAllIPs=true 38 | jute.maxbuffer=16777216 39 | 40 | secureClientPort=2281 41 | serverCnxnFactory=org.apache.zookeeper.server.NettyServerCnxnFactory 42 | portUnification=false 43 | skipACL=no 44 | ssl.trustStore.password=testpassword123 45 | ssl.trustStore.location=/etc/zk-ssl/truststore.jks 46 | ssl.keyStore.password=testpassword321 47 | ssl.keyStore.location=/etc/zk-ssl/server.jks 48 | 49 | 50 | # servers section will be generated from env variablses 51 | 52 | 53 | -------------------------------------------------------------------------------- /tests/testutil/context.go: -------------------------------------------------------------------------------- 1 | package testutil 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/golang/mock/gomock" 8 | ) 9 | 10 | type ctxID string 11 | 12 | const ctxIDKeyName ctxID = "context UUID" 13 | 14 | type ctxMatcher struct { 15 | ctx context.Context 16 | } 17 | 18 | func (m *ctxMatcher) Matches(x any) bool { 19 | newCtx, ok := x.(context.Context) 20 | if !ok { 21 | return false 22 | } 23 | oldVal := m.ctx.Value(ctxIDKeyName) 24 | newVal := newCtx.Value(ctxIDKeyName) 25 | return oldVal == newVal 26 | } 27 | 28 | func (m *ctxMatcher) String() string { 29 | return "context matcher" 30 | } 31 | 32 | //nolint:revive 33 | func MatchContext(t *testing.T, ctx context.Context) (context.Context, gomock.Matcher) { 34 | ctx = context.WithValue(ctx, ctxIDKeyName, NewUUIDStr(t)) 35 | return ctx, &ctxMatcher{ 36 | ctx: ctx, 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /tests/testutil/context_test.go: -------------------------------------------------------------------------------- 1 | package testutil_test 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/require" 8 | 9 | "github.com/yandex/mysync/tests/testutil" 10 | ) 11 | 12 | type someKey string 13 | 14 | const someKeyName someKey = "key" 15 | 16 | func TestMatchContext(t *testing.T) { 17 | tcs := []struct { 18 | newCtx func(context.Context) context.Context 19 | exp bool 20 | }{ 21 | { 22 | newCtx: func(ctx context.Context) context.Context { 23 | return context.WithValue(ctx, someKeyName, "asd") 24 | }, 25 | exp: true, 26 | }, 27 | { 28 | newCtx: func(ctx context.Context) context.Context { 29 | return context.Background() 30 | }, 31 | exp: false, 32 | }, 33 | { 34 | newCtx: func(ctx context.Context) context.Context { 35 | ctx, cancel := context.WithCancel(ctx) 36 | cancel() 37 | return ctx 38 | }, 39 | exp: true, 40 | }, 41 | } 42 | 43 | for _, tc := range tcs { 44 | t.Run("", func(t *testing.T) { 45 | ctx, matcher := testutil.MatchContext(t, context.Background()) 46 | require.Equal(t, tc.exp, matcher.Matches(tc.newCtx(ctx))) 47 | }) 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /tests/testutil/matchers/matchers_test.go: -------------------------------------------------------------------------------- 1 | package matchers 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestRegexpMatcher(t *testing.T) { 10 | assert.NoError(t, RegexpMatcher("qwe asd er", "a.d"), "regexp matcher should find match at any position") 11 | assert.NoError(t, RegexpMatcher("qwe", "qwe"), "regexp matcher should also match full string") 12 | assert.NoError(t, RegexpMatcher("qwe asd er", "^.*qwe.*$"), "regexp matcher should also match full string with patterns") 13 | assert.Error(t, RegexpMatcher("qwe asd er", "boo"), "regexp matcher should match not match anything") 14 | assert.NoError(t, RegexpMatcher("1 1", "^1[[:space:]]+1$"), "regexp matcher should match [[:space:]]") 15 | } 16 | 17 | func TestJsonExactlyMatcher(t *testing.T) { 18 | var a, e string 19 | a = `{"a":1, "b":0.2, "c": [1,2], "z": null, "e": {"x":"y"}}` 20 | e = `{"a":1, "c": [1,2], "b":0.2, "z": null, "e": {"x": "y"}}` 21 | assert.NoError(t, JSONExactlyMatcher(a, e), "exact json matcher should match jsons despite key orders and spaces") 22 | a = `{"a":2, "b":0.2, "c": [1,2], "d": null, "e": {"x":"y"}}` 23 | assert.Error(t, JSONExactlyMatcher(a, e), "exact json matcher should not match if value changes") 24 | } 25 | 26 | func TestJSONMatcher(t *testing.T) { 27 | var a, e string 28 | a = `{"a":1, "b":0.2, "c": [1,2], "d": null, "e": {"x":"y"}}` 29 | e = `{"a":1, "c": [1,2], "b":0.2, "d": null, "e": {"x": "y"}}` 30 | assert.NoError(t, JSONMatcher(a, e), "json matcher should match jsons despite key orders and spaces") 31 | e = `{"a":1, "d": null}` 32 | assert.NoError(t, JSONMatcher(a, e), "json matcher should ignore extra keys") 33 | a = ` 34 | { 35 | "a":1, 36 | "e": { 37 | "a": 1, 38 | "e": { 39 | "a": 1, 40 | "ok": true 41 | } 42 | } 43 | } 44 | ` 45 | e = `{"e":{"e":{"ok":true}}}` 46 | assert.NoError(t, JSONMatcher(a, e), "json matcher should match deep nested jsons") 47 | e = `{"e":{"e":{"res":"ok"}}}` 48 | assert.Error(t, JSONMatcher(a, e), "json matcher should not match if field is missing") 49 | e = `{"e":{"e":{"ok":1}}}` 50 | assert.Error(t, JSONMatcher(a, e), "json matcher should not match if field type is different") 51 | e = `{"e":{"e":{"ok":false}}}` 52 | assert.Error(t, JSONMatcher(a, e), "json matcher should not match if field value is different") 53 | a = ` 54 | { 55 | "e": [ 56 | {"a": 1}, 57 | {"b": 2}, 58 | {"c": 3}, 59 | {"d": 4}, 60 | {"e": 5} 61 | ] 62 | } 63 | ` 64 | e = ` 65 | { 66 | "e": [ 67 | {"b": 2}, 68 | {"d": 4} 69 | ] 70 | } 71 | ` 72 | assert.NoError(t, JSONMatcher(a, e), "json matcher should match parts of arrays, preserving order") 73 | e = ` 74 | { 75 | "e": [ 76 | {"d": 4}, 77 | {"b": 2} 78 | ] 79 | } 80 | ` 81 | assert.Error(t, JSONMatcher(a, e), "json matcher should not match parts of arrays, if order differs") 82 | } 83 | -------------------------------------------------------------------------------- /tests/testutil/network.go: -------------------------------------------------------------------------------- 1 | package testutil 2 | 3 | import ( 4 | "fmt" 5 | "net" 6 | ) 7 | 8 | // GetFreePort returns a random tcp port available for binding (at the moment of call) 9 | func GetFreePort() (string, error) { 10 | const tries = 3 11 | var errs []error 12 | 13 | for range tries { 14 | listener, err := net.Listen("tcp", ":0") 15 | if err != nil { 16 | errs = append(errs, fmt.Errorf("unable to bind on free port at localhost: %w", err)) 17 | continue 18 | } 19 | 20 | _, port, err := net.SplitHostPort(listener.Addr().String()) 21 | if err != nil { 22 | errs = append(errs, fmt.Errorf("unable to parse host/port string (%s): %w", listener.Addr().String(), err)) 23 | continue 24 | } 25 | 26 | if err = listener.Close(); err != nil { 27 | errs = append(errs, fmt.Errorf("unable to close listener: %w", err)) 28 | continue 29 | } 30 | return port, nil 31 | } 32 | 33 | // TODO: use multierr when available 34 | return "", fmt.Errorf("unable to find free port with %d tries: %+v", tries, errs) 35 | } 36 | -------------------------------------------------------------------------------- /tests/testutil/retry.go: -------------------------------------------------------------------------------- 1 | package testutil 2 | 3 | import "time" 4 | 5 | // nolint: unparam 6 | func Retry(code func() bool, timeout, sleep time.Duration) { 7 | if code() { 8 | return 9 | } 10 | timer := time.NewTimer(timeout) 11 | ticker := time.NewTicker(sleep) 12 | for { 13 | select { 14 | case <-ticker.C: 15 | if code() { 16 | return 17 | } 18 | case <-timer.C: 19 | return 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /tests/testutil/uuid.go: -------------------------------------------------------------------------------- 1 | package testutil 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/gofrs/uuid" 7 | "github.com/stretchr/testify/require" 8 | ) 9 | 10 | func NewUUID(t *testing.T) uuid.UUID { 11 | v, err := uuid.NewV4() 12 | require.NoError(t, err) 13 | return v 14 | } 15 | 16 | func NewUUIDStr(t *testing.T) string { 17 | return NewUUID(t).String() 18 | } 19 | --------------------------------------------------------------------------------