├── .flake8 ├── .github ├── CODEOWNERS ├── CONTRIBUTING.md ├── ISSUE_TEMPLATE │ ├── 01_question.md │ ├── 02_bug.md │ ├── 03_feature.md │ └── config.yml ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── codeql-analysis.yml │ └── tests.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── .pylintrc ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── SECURITY.md ├── codecov.yml ├── fix_newlines.py ├── myhoard.json ├── myhoard.spec ├── myhoard.unit ├── myhoard ├── __init__.py ├── append_only_state_manager.py ├── backup_stream.py ├── basebackup_operation.py ├── basebackup_restore_operation.py ├── binary_io_slice.py ├── binlog_downloader.py ├── binlog_scanner.py ├── controller.py ├── errors.py ├── myhoard.py ├── py.typed ├── restore_coordinator.py ├── state_manager.py ├── statsd.py ├── table.py ├── update_mysql_environment.py ├── util.py └── web_server.py ├── mypy.ini ├── pyproject.toml ├── pytest.ini ├── scripts ├── build-setup-specific-test-image ├── build-setup-specific-test-image-full ├── create-user ├── install-mysql-packages ├── install-percona-package ├── install-python-deps ├── install-python-version ├── pytest-inside ├── remove-default-mysql ├── setup-percona-repo └── test-inside └── test ├── __init__.py ├── binlog ├── conftest.py ├── helpers ├── __init__.py ├── databases.py ├── filesystem.py ├── fixtures.py ├── flow_testers.py ├── loggers.py └── version.py ├── local ├── __init__.py └── test_controller.py ├── test_append_only_state_manager.py ├── test_backup_stream.py ├── test_basebackup_operation.py ├── test_basebackup_restore_operation.py ├── test_binlog_scanner.py ├── test_controller.py ├── test_myhoard.py ├── test_restore_coordinator.py ├── test_statsd.py ├── test_table.py ├── test_util.py └── test_web_server.py /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 125 3 | 4 | # E123: Closing brackets indent 5 | # E126: Hanging indents 6 | # E129: Visual indent 7 | # E203: Whitespace before ':' (in accordance with Black) 8 | # E231: Missing whitespace after ',' (in accordance with Black) 9 | # E501: Max line length 10 | # E722: do not use bare 'except' 11 | extend-ignore = 12 | E123, 13 | E126, 14 | E129, 15 | E203, 16 | E231, 17 | E501, 18 | E722 19 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @aiven/team-brute-force 2 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Welcome! 2 | 3 | Contributions are welcome to this project. Please follow the guidelines: 4 | 5 | - It's recommended to open an issue to discuss a feature before putting in a lot of effort. 6 | 7 | - We use [GitHub Flow](https://guides.github.com/introduction/flow/), check that your main branch is up to date, and create a new branch for changes. 8 | 9 | - Commit messages should describe the changes, not the filenames. Win our admiration by following the [excellent advice from Chris Beams](https://chris.beams.io/posts/git-commit/) when composing commit messages. 10 | 11 | - Choose a meaningful title for your pull request. 12 | 13 | - The pull request description should focus on what changed and why. 14 | 15 | - Check that the tests pass (and add test coverage for your changes if appropriate). 16 | 17 | - Stay in touch with us if we have follow up questions or requests for further changes. 18 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/01_question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: ❓ Ask a question 3 | about: Got stuck or missing something from the docs? Ask away! 4 | --- 5 | 6 | # What can we help you with? 7 | 8 | 9 | 10 | # Where would you expect to find this information? 11 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/02_bug.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 🐜 Report a bug 3 | about: Spotted a problem? Let us know 4 | --- 5 | 6 | # What happened? 7 | 8 | 9 | 10 | # What did you expect to happen? 11 | 12 | 13 | 14 | # What else do we need to know? 15 | 16 | Include your platform, version, and any other information that seems relevant. 17 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/03_feature.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 💡 Feature suggestion 3 | about: What would make this even better? 4 | --- 5 | 6 | # What is currently missing? 7 | 8 | 9 | 10 | # How could this be improved? 11 | 12 | 13 | 14 | # Is this a feature you would work on yourself? 15 | 16 | [ ] I plan to open a pull request for this feature 17 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | # About this change: What it does, why it matters 2 | 3 | (all contributors please complete this section, including maintainers) 4 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | name: "CodeQL" 2 | on: 3 | push: 4 | branches: [ master ] 5 | pull_request: 6 | branches: [ master ] 7 | schedule: 8 | - cron: '42 20 * * 6' 9 | jobs: 10 | analyze: 11 | name: Analyze 12 | runs-on: ubuntu-latest 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | language: [ 'python' ] 17 | steps: 18 | - name: Checkout repository 19 | uses: actions/checkout@v4 20 | - name: Initialize CodeQL 21 | uses: github/codeql-action/init@v3 22 | with: 23 | languages: ${{ matrix.language }} 24 | - name: Autobuild 25 | uses: github/codeql-action/autobuild@v3 26 | - name: Perform CodeQL Analysis 27 | uses: github/codeql-action/analyze@v3 28 | -------------------------------------------------------------------------------- /.github/workflows/tests.yaml: -------------------------------------------------------------------------------- 1 | name: Unit tests 2 | 3 | permissions: read-all 4 | 5 | on: 6 | push: 7 | branches: 8 | - master 9 | tags: 10 | - "**" 11 | schedule: 12 | - cron: "0 0 * * SUN" 13 | pull_request: 14 | 15 | jobs: 16 | lint: 17 | runs-on: ubuntu-22.04 18 | strategy: 19 | matrix: 20 | python-version: ["3.12"] 21 | env: 22 | MYSQL_VERSION: mysql-8.0 23 | 24 | steps: 25 | - id: checkout-code 26 | uses: actions/checkout@v4 27 | with: 28 | persist-credentials: false 29 | - id: prepare-python 30 | uses: actions/setup-python@v5 31 | with: 32 | python-version: ${{ matrix.python-version }} 33 | - id: python-deps 34 | run: | 35 | sudo apt-get update && sudo apt install -y libsnappy-dev 36 | pip install -e . 37 | pip install -e '.[dev]' 38 | - id: pre-commit 39 | run: pre-commit run --all 40 | - id: copyright 41 | run: make copyright 42 | 43 | unittest: 44 | runs-on: ubuntu-${{ matrix.ubuntu-version }} 45 | strategy: 46 | fail-fast: false 47 | # this isn't a standard matrix because some version testing doesn't make sense 48 | # e.g. you cannot use mysql .28 with percona .27, and there's no clean way of 49 | # skipping matrix items that aren't meaningful 50 | matrix: 51 | include: 52 | - mysql-version: "8.0.32" 53 | percona-version: "8.0.32-26-1.jammy" 54 | python-version: "3.10" 55 | ubuntu-version: "22.04" 56 | - mysql-version: "8.0.32" 57 | percona-version: "8.0.32-26-1.jammy" 58 | python-version: "3.11" 59 | ubuntu-version: "22.04" 60 | - mysql-version: "8.0.35" 61 | percona-version: "8.0.35-30-1.jammy" 62 | python-version: "3.11" 63 | ubuntu-version: "22.04" 64 | - mysql-version: "8.0.35" 65 | percona-version: "8.0.35-30-1.jammy" 66 | python-version: "3.12" 67 | ubuntu-version: "22.04" 68 | 69 | steps: 70 | - id: checkout-code 71 | uses: actions/checkout@v4 72 | - name: Cache APT Packages 73 | uses: awalsh128/cache-apt-pkgs-action@v1.1.2 74 | with: 75 | version: 1.0 76 | packages: sudo lsb-release wget tzdata libsnappy-dev libpq5 libpq-dev software-properties-common build-essential rsync curl git libaio1 libmecab2 psmisc 77 | - id: prepare-python 78 | uses: actions/setup-python@v5 79 | with: 80 | python-version: ${{ matrix.python-version }} 81 | cache: pip 82 | cache-dependency-path: | 83 | **/requirements.txt 84 | **/requirements.dev.txt 85 | 86 | - run: make clean 87 | - run: sudo scripts/remove-default-mysql 88 | - run: sudo scripts/install-mysql-packages ${{matrix.mysql-version}} 89 | - run: sudo scripts/setup-percona-repo 90 | - run: sudo scripts/install-percona-package ${{matrix.percona-version}} 91 | - run: scripts/install-python-deps 92 | - run: python -m pip install -e . 93 | 94 | - id: unittest 95 | run: | 96 | make coverage 97 | 98 | - id: upload-codecov 99 | uses: codecov/codecov-action@v3 100 | with: 101 | verbose: true 102 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.pyo 3 | *.log 4 | *~ 5 | /.cache 6 | /.coverage 7 | coverage.xml 8 | /.project 9 | /.pydevproject 10 | /.vagrant 11 | /.idea 12 | __pycache__/ 13 | /build/ 14 | /dist/ 15 | /*.egg-info/ 16 | /*-rpm-src.tar 17 | /rpm/ 18 | .vagrant 19 | /venv/ 20 | /.venv/ 21 | *.orig 22 | Dockerfile.myhoard-test-temp 23 | .vscode/ 24 | myhoard/version.py 25 | .hypothesis/ 26 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.4.0 4 | hooks: 5 | - id: check-yaml 6 | - id: end-of-file-fixer 7 | - id: trailing-whitespace 8 | - repo: https://github.com/PyCQA/flake8 9 | rev: 7.1.1 10 | hooks: 11 | - id: flake8 12 | - repo: https://github.com/psf/black 13 | rev: 25.1.0 14 | hooks: 15 | - id: black 16 | - repo: https://github.com/pylint-dev/pylint 17 | rev: v3.3.4 18 | hooks: 19 | - id: pylint 20 | args: ["--disable=R,E0401,W0719"] 21 | - repo: https://github.com/pre-commit/mirrors-mypy 22 | rev: v1.15.0 23 | hooks: 24 | - id: mypy 25 | args: [--ignore-missing-imports] 26 | additional_dependencies: [types-PyMySQL==1.0.19, types-requests==2.27.30] 27 | - repo: https://github.com/pycqa/isort 28 | rev: 6.0.0 29 | hooks: 30 | - id: isort 31 | name: isort (python) 32 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MESSAGES CONTROL] 2 | disable= 3 | bad-option-value, 4 | duplicate-code, 5 | fixme, 6 | import-outside-toplevel, 7 | invalid-name, 8 | len-as-condition, 9 | locally-disabled, 10 | missing-docstring, 11 | no-else-raise, 12 | no-else-return, 13 | no-self-use, 14 | raise-missing-from, 15 | too-few-public-methods, 16 | too-many-ancestors, 17 | too-many-arguments, 18 | too-many-boolean-expressions, 19 | too-many-branches, 20 | too-many-function-args, 21 | too-many-instance-attributes, 22 | too-many-lines, 23 | too-many-locals, 24 | too-many-nested-blocks, 25 | too-many-public-methods, 26 | too-many-statements, 27 | ungrouped-imports, 28 | unspecified-encoding, 29 | wrong-import-order, 30 | wrong-import-position 31 | 32 | [FORMAT] 33 | max-line-length=125 34 | 35 | [REPORTS] 36 | output-format=text 37 | reports=no 38 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG PYTHON_VERSION=3.11 2 | 3 | ############################################### 4 | ### Common base for building Percona tools #### 5 | ############################################### 6 | FROM debian:bullseye as builder-percona 7 | 8 | # We provide default values, but they can be overridden at build time. 9 | ARG MYSQL_VERSION=8.0.30 10 | ARG PERCONA_SERVER_VERSION=${MYSQL_VERSION}-22 11 | ARG PERCONA_XTRA_VERSION=${MYSQL_VERSION}-23 12 | ENV MYSQL_VERSION=${MYSQL_VERSION} 13 | ENV PERCONA_SERVER_VERSION=${PERCONA_SERVER_VERSION} 14 | ENV PERCONA_XTRA_VERSION=${PERCONA_XTRA_VERSION} 15 | 16 | RUN apt update && \ 17 | apt upgrade && \ 18 | # From https://percona.community/blog/2022/04/05/percona-server-raspberry-pi/ 19 | # - apt-get, + wget 20 | apt install -y build-essential pkg-config cmake devscripts debconf debhelper automake bison ca-certificates \ 21 | libcurl4-gnutls-dev libaio-dev libncurses-dev libssl-dev libtool libgcrypt20-dev zlib1g-dev lsb-release \ 22 | python3-docutils build-essential rsync libdbd-mysql-perl libnuma1 socat librtmp-dev libtinfo5 liblz4-tool \ 23 | liblz4-1 liblz4-dev libldap2-dev libsasl2-dev libsasl2-modules-gssapi-mit libkrb5-dev wget \ 24 | libreadline-dev libudev-dev libev-dev libev4 libprocps-dev vim-common 25 | # Download boost and percona-xtrabackup 26 | RUN wget https://archives.boost.io/release/1.77.0/source/boost_1_77_0.tar.gz && \ 27 | tar -zxvf boost_1_77_0.tar.gz 28 | 29 | 30 | ############################################### 31 | ### Build Percona XtraBackup ################## 32 | ############################################### 33 | FROM builder-percona AS builder-percona-xtrabackup 34 | 35 | RUN wget https://downloads.percona.com/downloads/Percona-XtraBackup-LATEST/Percona-XtraBackup-${PERCONA_XTRA_VERSION}/source/tarball/percona-xtrabackup-${PERCONA_XTRA_VERSION}.tar.gz && \ 36 | tar -zxvf percona-xtrabackup-${PERCONA_XTRA_VERSION}.tar.gz 37 | # Build percona-xtrabackup 38 | RUN cd percona-xtrabackup-${PERCONA_XTRA_VERSION} && \ 39 | mkdir "arm64-build" && \ 40 | cd "arm64-build" && \ 41 | cmake .. -DCMAKE_BUILD_TYPE=Release -DWITH_BOOST=/boost_1_77_0 -DCMAKE_INSTALL_PREFIX=/usr/local/xtrabackup && \ 42 | make -j$(nproc) && \ 43 | make install 44 | 45 | 46 | ############################################### 47 | ### Build Percona Server ###################### 48 | ############################################### 49 | FROM builder-percona AS builder-percona-server 50 | 51 | RUN wget https://downloads.percona.com/downloads/Percona-Server-LATEST/Percona-Server-${PERCONA_SERVER_VERSION}/source/tarball/percona-server-${PERCONA_SERVER_VERSION}.tar.gz && \ 52 | tar -zxvf percona-server-${PERCONA_SERVER_VERSION}.tar.gz 53 | # Build percona-xtrabackup 54 | RUN cd percona-server-${PERCONA_SERVER_VERSION} && \ 55 | mkdir "arm64-build" && \ 56 | cd "arm64-build" && \ 57 | cmake .. -DCMAKE_BUILD_TYPE=Release -DWITH_BOOST=/boost_1_77_0 -DCMAKE_INSTALL_PREFIX=/usr/local/mysql -DWITH_ZLIB=bundled && \ 58 | make -j$(nproc) && \ 59 | make install 60 | 61 | 62 | ############################################### 63 | ### Build Myhoard ############################# 64 | ############################################### 65 | FROM python:${PYTHON_VERSION}-bullseye 66 | 67 | ARG MYSQL_VERSION=8.0.30 68 | ENV MYSQL_VERSION=${MYSQL_VERSION} 69 | 70 | RUN apt-get update && apt-get install -y \ 71 | sudo lsb-release wget tzdata libsnappy-dev libpq5 libpq-dev software-properties-common build-essential rsync curl git libaio1 libmecab2 psmisc \ 72 | && rm -rf /var/lib/apt/lists/* 73 | ADD scripts /src/scripts 74 | ADD Makefile /src/ 75 | WORKDIR /src 76 | RUN make clean 77 | RUN sudo scripts/remove-default-mysql 78 | RUN sudo scripts/install-mysql-packages ${MYSQL_VERSION} 79 | 80 | COPY --from=builder-percona-xtrabackup /usr/local/xtrabackup/bin /usr/bin 81 | COPY --from=builder-percona-xtrabackup /usr/local/xtrabackup/lib /usr/lib 82 | COPY --from=builder-percona-server /usr/local/mysql/bin /usr/bin 83 | COPY --from=builder-percona-server /usr/local/mysql/lib /usr/lib 84 | 85 | ADD requirement* /src/ 86 | RUN sudo scripts/create-user 87 | 88 | ADD . /src/ 89 | RUN scripts/install-python-deps 90 | RUN git config --global --add safe.directory /src 91 | RUN python -m pip install -e . 92 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | short_ver = $(shell git describe --abbrev=0) 2 | long_ver = $(shell git describe --long 2>/dev/null || echo $(short_ver)-0-unknown-g`git describe --always`) 3 | generated = myhoard/version.py 4 | 5 | all: $(generated) 6 | 7 | PYTHON ?= python3 8 | PYTHON_SOURCE_DIRS = myhoard/ test/ 9 | 10 | PYTEST_TMP ?= /var/tmp/pytest-of-$(USER) 11 | PYTEST_ARG ?= -vvv --log-level=INFO --basetemp "$(PYTEST_TMP)" 12 | 13 | MYSQL_SERVER_PACKAGE ?= mysql-server >= 8.0 14 | 15 | .PHONY: unittest 16 | unittest: version 17 | $(PYTHON) -m pytest -vv test/ 18 | 19 | .PHONY: copyright 20 | copyright: 21 | grep -EL "Copyright \(c\) 20.* Aiven" $(shell git ls-files "*.py" | grep -v __init__.py) 22 | 23 | .PHONY: coverage 24 | coverage: 25 | [ -d "$(PYTEST_TMP)" ] || mkdir -p "$(PYTEST_TMP)" 26 | $(PYTHON) -m pytest $(PYTEST_ARG) --cov-report term-missing --cov-branch --cov-report xml:coverage.xml --cov myhoard test/ 27 | 28 | 29 | .PHONY: clean 30 | clean: 31 | $(RM) -r *.egg-info/ build/ dist/ 32 | $(RM) ../myhoard_* test-*.xml $(generated) 33 | 34 | .PHONY: rpm 35 | rpm: $(generated) 36 | git archive --output=myhoard-rpm-src.tar --prefix=myhoard/ HEAD 37 | # add generated files to the tar, they're not in git repository 38 | tar -r -f myhoard-rpm-src.tar --transform=s,myhoard/,myhoard/myhoard/, $(generated) 39 | rpmbuild -bb myhoard.spec \ 40 | --define '_topdir $(PWD)/rpm' \ 41 | --define '_sourcedir $(CURDIR)' \ 42 | --define 'major_version $(short_ver)' \ 43 | --define 'minor_version $(subst -,.,$(subst $(short_ver)-,,$(long_ver)))' 44 | $(RM) myhoard-rpm-src.tar 45 | 46 | .PHONY: build-dep-fedora 47 | build-dep-fedora: 48 | sudo dnf install -y 'dnf-command(builddep)' 49 | sudo dnf -y builddep myhoard.spec 50 | sudo dnf -y install --best --allowerasing --setopt=install_weak_deps=False \ 51 | --exclude=mariadb-server "$(MYSQL_SERVER_PACKAGE)" 52 | 53 | 54 | .PHONY: build-dep-ubuntu 55 | build-dep-ubuntu: 56 | sudo apt install -y lsb-release wget tzdata libsnappy-dev libpq5 libpq-dev software-properties-common build-essential rsync curl git libaio1 libmecab2 psmisc python-is-python3 57 | 58 | # local development, don't use in CI 59 | # prerequisite 60 | .PHONY: build-setup-specific-image 61 | build-setup-specific-image: 62 | ifeq ($(shell uname -m),x86_64) 63 | @echo "Building image for default architecture" 64 | PYTHON_VERSION=$(PYTHON_VERSION) MYSQL_VERSION=$(MYSQL_VERSION) PERCONA_VERSION=$(PERCONA_VERSION) \ 65 | scripts/build-setup-specific-test-image 66 | else 67 | # For other architectures, we must build the dependencies ourselves, as they are not available in the official repos. 68 | @echo "Building image for $(shell uname -m)" 69 | PYTHON_VERSION=$(PYTHON_VERSION) MYSQL_VERSION=$(MYSQL_VERSION) PERCONA_VERSION=$(PERCONA_VERSION) \ 70 | scripts/build-setup-specific-test-image-full 71 | endif 72 | 73 | .PHONY: dockertest 74 | dockertest: 75 | docker run --cap-add SYS_ADMIN -it --rm myhoard-test-temp /src/scripts/test-inside 76 | 77 | # when the image didn't change this can be used. local dev only, don't use in CI 78 | # in this target we override the /src that gets used to rsync source inside the container 79 | .PHONY: dockertest-resync 80 | dockertest-resync: 81 | docker run --cap-add SYS_ADMIN -it --rm -v "$(shell pwd):/src:ro" myhoard-test-temp /src/scripts/test-inside 82 | 83 | .PHONY: dockertest-pytest 84 | dockertest-pytest: 85 | docker run --cap-add SYS_ADMIN -it --rm -v "$(shell pwd):/src:ro" myhoard-test-temp /src/scripts/pytest-inside $(PYTEST_ARGS) 86 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | We release patches for security vulnerabilities. Which versions are eligible 6 | receiving such patches depend on the CVSS v3.0 Rating: 7 | 8 | | CVSS v3.0 | Supported Versions | 9 | | --------- | ----------------------------------------- | 10 | | 4.0-10.0 | Most recent release | 11 | 12 | ## Reporting a Vulnerability 13 | 14 | Please report (suspected) security vulnerabilities to our **[bug bounty 15 | program](https://hackerone.com/aiven_ltd)**. You will receive a response from 16 | us within 2 working days. If the issue is confirmed, we will release a patch as 17 | soon as possible depending on impact and complexity. 18 | 19 | ## Qualifying Vulnerabilities 20 | 21 | Any reproducible vulnerability that has a severe effect on the security or 22 | privacy of our users is likely to be in scope for the program. 23 | 24 | We generally **aren't** interested in the following issues: 25 | * Social engineering (e.g. phishing, vishing, smishing) attacks 26 | * Brute force, DoS, text injection 27 | * Missing best practices such as HTTP security headers (CSP, X-XSS, etc.), 28 | email (SPF/DKIM/DMARC records), SSL/TLS configuration. 29 | * Software version disclosure / Banner identification issues / Descriptive 30 | error messages or headers (e.g. stack traces, application or server errors). 31 | * Clickjacking on pages with no sensitive actions 32 | * Theoretical vulnerabilities where you can't demonstrate a significant 33 | security impact with a proof of concept. 34 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | status: 3 | project: 4 | default: 5 | # basic 6 | target: auto 7 | threshold: 0% 8 | # advanced settings 9 | if_ci_failed: error # success, failure, error, ignore 10 | only_pulls: false 11 | if_not_found: failure 12 | -------------------------------------------------------------------------------- /fix_newlines.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from pathlib import Path 3 | from typing import Collection 4 | 5 | import argparse 6 | import io 7 | import subprocess 8 | import sys 9 | 10 | 11 | def main(exclude_patterns: Collection[str]) -> int: 12 | has_fixed_a_file = False 13 | for filename in map(Path, subprocess.check_output(["git", "ls-files"]).decode().splitlines()): 14 | should_skip_file = False 15 | for exclude_pattern in exclude_patterns: 16 | if filename.match(exclude_pattern): 17 | should_skip_file = True 18 | if should_skip_file: 19 | continue 20 | with filename.open("r+b") as file: 21 | file.seek(0, io.SEEK_END) 22 | if file.tell() > 0: 23 | file.seek(-1, io.SEEK_END) 24 | if file.read() != b"\n": 25 | print(f"Fixed missing newline at end of {filename!s}") 26 | file.write(b"\n") 27 | has_fixed_a_file = True 28 | return 1 if has_fixed_a_file else 0 29 | 30 | 31 | if __name__ == "__main__": 32 | parser = argparse.ArgumentParser(description="Enforce trailing newlines") 33 | parser.add_argument("--exclude", dest="exclude", help="file pattern to exclude", action="append") 34 | args = parser.parse_args() 35 | sys.exit(main(exclude_patterns=args.exclude)) 36 | -------------------------------------------------------------------------------- /myhoard.json: -------------------------------------------------------------------------------- 1 | { 2 | "backup_settings": { 3 | "backup_age_days_max": 14, 4 | "backup_count_max": 30, 5 | "backup_count_min": 14, 6 | "backup_hour": 3, 7 | "backup_interval_minutes": 1440, 8 | "backup_minute": 0, 9 | "forced_binlog_rotation_interval": 300, 10 | "upload_site": "default", 11 | "incremental": { 12 | "enabled": false, 13 | "full_backup_week_schedule": "sun,wed" 14 | } 15 | }, 16 | "backup_sites": { 17 | "default": { 18 | "compression": { 19 | "algorithm": "snappy" 20 | }, 21 | "encryption_keys": { 22 | "private": "-----BEGIN RSA PRIVATE KEY-----\n...\n-----END RSA PRIVATE KEY-----", 23 | "public": "-----BEGIN PUBLIC KEY-----\n...-----END PUBLIC KEY-----\n" 24 | }, 25 | "object_storage": { 26 | "directory": "/tmp/sample_backup_dir", 27 | "storage_type": "local" 28 | }, 29 | "recovery_only": false 30 | } 31 | }, 32 | "binlog_purge_settings": { 33 | "enabled": true, 34 | "min_binlog_age_before_purge": 600, 35 | "purge_interval": 60, 36 | "purge_when_observe_no_streams": true 37 | }, 38 | "http_address": "127.0.0.1", 39 | "http_port": 16001, 40 | "mysql": { 41 | "binlog_prefix": "/var/lib/mysql/binlog", 42 | "client_params": { 43 | "host": "127.0.0.1", 44 | "password": "f@keP@ssw0rd", 45 | "port": 3306, 46 | "require_ssl": false, 47 | "user": "root" 48 | }, 49 | "config_file_name": "/etc/my.cnf", 50 | "data_directory": "/var/lib/mysql", 51 | "relay_log_index_file": "/var/lib/mysql/relay.index", 52 | "relay_log_prefix": "/var/lib/mysql/relay" 53 | }, 54 | "restore_auto_mark_backups_broken": false, 55 | "restore_free_memory_percentage": null, 56 | "restore_max_binlog_bytes": 4294967296, 57 | "sentry_dsn": null, 58 | "server_id": 1, 59 | "start_command": ["/usr/sbin/mysqld", "--defaults-file=/etc/my.cnf", "--basedir=/opt/mysql"], 60 | "state_directory": "/tmp/sample_state_dir", 61 | "statsd": { 62 | "host": null, 63 | "port": null, 64 | "tags": { 65 | "app": "myhoard" 66 | } 67 | }, 68 | "systemctl_command": ["sudo", "/usr/bin/systemctl"], 69 | "systemd_env_update_command": [ 70 | "sudo", "/usr/bin/myhoard_mysql_env_update", "-f", "/etc/systemd/system/mysqld.environment" 71 | ], 72 | "systemd_service": "mysql-server", 73 | "temporary_directory": "/var/tmp/sample_temp_dir", 74 | "xtrabackup": { 75 | "copy_threads": 1, 76 | "compress_threads": 1, 77 | "encrypt_threads": 1, 78 | "register_redo_log_consumer": false 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /myhoard.spec: -------------------------------------------------------------------------------- 1 | Name: myhoard 2 | Version: %{major_version} 3 | Release: %{minor_version}%{?dist} 4 | Url: https://github.com/aiven/myhoard 5 | Summary: MySQL streaming backup service 6 | BuildArch: noarch 7 | License: ASL 2.0 8 | Source0: myhoard-rpm-src.tar 9 | BuildRequires: percona-xtrabackup-80 >= 8.0 10 | BuildRequires: python3-aiohttp 11 | BuildRequires: python3-devel 12 | BuildRequires: python3-flake8 13 | BuildRequires: python3-httplib2 14 | BuildRequires: python3-isort 15 | BuildRequires: python3-pylint 16 | BuildRequires: python3-PyMySQL >= 0.9.2 17 | BuildRequires: python3-pytest 18 | BuildRequires: python3-pytest-cov 19 | BuildRequires: python3-requests 20 | BuildRequires: python3-rohmu 21 | BuildRequires: python3-socks 22 | BuildRequires: python3-yapf 23 | BuildRequires: rpm-build 24 | BuildRequires: systemd-rpm-macros 25 | Requires: percona-xtrabackup-80 >= 8.0 26 | Requires: python3-aiohttp 27 | Requires: python3-cryptography >= 0.8 28 | Requires: python3-PyMySQL >= 0.9.2 29 | Requires: python3-rohmu >= 1.1.2 30 | Requires: systemd 31 | 32 | %undefine _missing_build_ids_terminate_build 33 | 34 | %description 35 | MyHoard is a MySQL streaming backup service. Backups are stored in 36 | encrypted and compressed format in a cloud object storage. MyHoard 37 | currently supports Amazon Web Services S3, Google Cloud Storage and 38 | Microsoft Azure. 39 | 40 | 41 | %global debug_package %{nil} 42 | 43 | 44 | %prep 45 | %setup -q -n myhoard 46 | 47 | 48 | %build 49 | 50 | 51 | %install 52 | python3 setup.py install --prefix=%{_prefix} --root=%{buildroot} 53 | sed -e "s@#!/bin/python@#!%{_bindir}/python@" -i %{buildroot}%{_bindir}/* 54 | %{__install} -Dm0644 myhoard.unit %{buildroot}%{_unitdir}/myhoard.service 55 | 56 | 57 | %check 58 | 59 | 60 | %files 61 | %defattr(-,root,root,-) 62 | %doc LICENSE README.md myhoard.json 63 | %{_bindir}/myhoard* 64 | %{_unitdir}/myhoard.service 65 | %{python3_sitelib}/* 66 | 67 | 68 | %changelog 69 | * Mon May 27 2019 Rauli Ikonen - 1.0.0 70 | - Initial version 71 | -------------------------------------------------------------------------------- /myhoard.unit: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=MySQL streaming backup service 3 | 4 | [Service] 5 | User=mysql 6 | Group=mysql 7 | Type=notify 8 | Restart=always 9 | ExecStart=/usr/bin/myhoard --config /var/lib/myhoard/myhoard.json 10 | ExecReload=/bin/kill -HUP $MAINPID 11 | WorkingDirectory=/var/lib/myhoard 12 | # Percona XtraBackup needs to keep all database tables open simultaneously and it inherits our 13 | # nofile limit. Set to very large number to allow it to operate in large environments 14 | LimitNOFILE=2000000 15 | 16 | [Install] 17 | WantedBy=multi-user.target 18 | -------------------------------------------------------------------------------- /myhoard/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 Aiven, Helsinki, Finland. https://aiven.io/ 2 | -------------------------------------------------------------------------------- /myhoard/append_only_state_manager.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from .util import atomic_create_file 3 | from typing import Any, Generic, Iterable, List, TypeVar 4 | 5 | import io 6 | import json 7 | import logging 8 | import os 9 | import threading 10 | 11 | # TODO: Any should really be JsonType. See https://github.com/python/typing/issues/182 12 | T = TypeVar("T", bound=Any) 13 | 14 | 15 | class AppendOnlyStateManager(Generic[T]): 16 | """Disk backed state manager that stores a list of dict/JSON objects. Entries can only be added to the 17 | end of the list and removed from the beginning. The entire file is rewritten when sufficient number 18 | of entries have been removed from the beginning. Otherwise deletions are handled by writing invalidation 19 | entry for current head entry to the end of the file. 20 | 21 | Uses textual header instead of binary as well as linefeed after each row to make the file human readable.""" 22 | 23 | HEADER_BYTES = 9 24 | ENTRY_TYPE_TOMBSTONE = "T" 25 | ENTRY_TYPE_JSON = "J" 26 | 27 | MAX_DEAD_ENTRY_COUNT = 1000 28 | 29 | entries: List[T] 30 | 31 | def __init__(self, *, entries: List[T], lock=None, state_file): 32 | self.dead_entry_count = 0 33 | self.entries = entries 34 | self.lock = lock or threading.RLock() 35 | self.log = logging.getLogger(self.__class__.__name__) 36 | self.max_dead_entry_count = self.MAX_DEAD_ENTRY_COUNT 37 | self.state_file = state_file 38 | self.read_state() 39 | 40 | def append(self, entry: T) -> None: 41 | self.append_many([entry]) 42 | 43 | def append_many(self, entries: Iterable[T]): 44 | if not entries: 45 | return 46 | 47 | with self.lock: 48 | full_data = self._encode_entries(entries) 49 | with open(self.state_file, "ab") as f: 50 | f.write(full_data) 51 | self.entries.extend(entries) 52 | 53 | def read_state(self) -> None: 54 | entries = [] 55 | pos = 0 56 | if os.path.exists(self.state_file): 57 | with open(self.state_file, "rb") as f: 58 | if not isinstance(f, io.BufferedReader): 59 | f = io.BufferedReader(f) 60 | while True: 61 | header = f.read(self.HEADER_BYTES).decode("utf-8") 62 | if not header: 63 | break 64 | if len(header) < self.HEADER_BYTES: 65 | raise EOFError( 66 | f"Unexpected end of file at {pos}. Expected {self.HEADER_BYTES} bytes, got only {len(header)}" 67 | ) 68 | 69 | entry_type = header[0] 70 | length = int(header[1:], 16) 71 | data = f.read(length) 72 | if len(data) < length: 73 | raise EOFError(f"Expected {length} bytes of data for entry at {pos}, only got {len(data)} bytes") 74 | 75 | if entry_type == self.ENTRY_TYPE_TOMBSTONE: 76 | self.dead_entry_count += 1 77 | elif entry_type == self.ENTRY_TYPE_JSON: 78 | entries.append(data) 79 | else: 80 | raise ValueError(f"Unsupported entry type {entry_type} at position {pos}") 81 | pos += len(header) + len(data) 82 | else: 83 | self._rewrite_file(entries=entries) 84 | 85 | self.log.info( 86 | "Loaded %s entries from %r, %s of them are marked as dead", len(entries), self.state_file, self.dead_entry_count 87 | ) 88 | self.entries.extend(json.loads(entry.decode("utf-8")) for entry in entries[self.dead_entry_count :]) 89 | 90 | def remove_head(self) -> None: 91 | self.remove_many_from_head(count=1) 92 | 93 | def remove_many_from_head(self, count: int) -> None: 94 | if count <= 0: 95 | return 96 | elif count > len(self.entries): 97 | raise ValueError(f"Requested removal of {count} entries when only {len(self.entries)} exist") 98 | 99 | with self.lock: 100 | new_entries = self.entries[count:] 101 | if self.dead_entry_count + count > self.max_dead_entry_count: 102 | self.log.info( 103 | "Dead entry count %s exceeds %s for %r, rewriting file", 104 | self.dead_entry_count + count, 105 | self.max_dead_entry_count, 106 | self.state_file, 107 | ) 108 | self._rewrite_file(entries=new_entries) 109 | self.dead_entry_count = 0 110 | else: 111 | # All entries look identical: just linefeed as data (to keep the file human readable) and header 112 | # says its a tombstone entry with one byte of data 113 | line = self._make_header(entry_type=self.ENTRY_TYPE_TOMBSTONE, data=b"\n") + b"\n" 114 | # Construct full data with join for better performance in case count is large 115 | data = b"".join(line for _ in range(count)) 116 | with open(self.state_file, "ab") as f: 117 | f.write(data) 118 | self.dead_entry_count += count 119 | self.entries[:] = new_entries 120 | 121 | @classmethod 122 | def _encode_entries(cls, entries): 123 | encoded = [json.dumps(entry, ensure_ascii=True).encode("utf-8").rstrip(b"\n") + b"\n" for entry in entries] 124 | full_data = [] 125 | for entry_data in encoded: 126 | full_data.append(cls._make_header(entry_type=cls.ENTRY_TYPE_JSON, data=entry_data)) 127 | full_data.append(entry_data) 128 | return b"".join(full_data) 129 | 130 | @staticmethod 131 | def _make_header(*, entry_type, data): 132 | return f"{entry_type}{len(data):08x}".encode("utf-8") 133 | 134 | def _rewrite_file(self, *, entries): 135 | full_data = self._encode_entries(entries) 136 | with atomic_create_file(self.state_file, binary=True) as f: 137 | f.write(full_data) 138 | 139 | def delete_state(self) -> None: 140 | if os.path.exists(self.state_file): 141 | os.remove(self.state_file) 142 | -------------------------------------------------------------------------------- /myhoard/binary_io_slice.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from types import TracebackType 3 | from typing import BinaryIO, Iterable, Iterator, Type 4 | 5 | 6 | class MethodNotSupportedError(Exception): 7 | pass 8 | 9 | 10 | class BinaryIOSlice(BinaryIO): 11 | def __init__(self, max_file_size: int, stream: BinaryIO): 12 | super().__init__() 13 | self._max_file_size = max_file_size 14 | self._size_remaining = max_file_size 15 | self.stream = stream 16 | 17 | def read(self, __n: int = -1) -> bytes: 18 | if __n < 0: 19 | to_read = self._size_remaining 20 | else: 21 | to_read = min(__n, self._size_remaining) 22 | result = self.stream.read(to_read) 23 | 24 | if result: 25 | self._size_remaining -= len(result) 26 | 27 | return result 28 | 29 | @property 30 | def mode(self) -> str: 31 | return self.stream.mode 32 | 33 | @property 34 | def name(self) -> str: 35 | return self.stream.name 36 | 37 | def close(self) -> None: 38 | self.stream.close() 39 | 40 | @property 41 | def closed(self) -> bool: 42 | return self.stream.closed 43 | 44 | def fileno(self) -> int: 45 | return self.stream.fileno() 46 | 47 | def flush(self) -> None: 48 | return self.stream.flush() 49 | 50 | def isatty(self) -> bool: 51 | return self.stream.isatty() 52 | 53 | def readable(self) -> bool: 54 | return self.stream.readable() 55 | 56 | def readline(self, __limit: int = -1) -> bytes: 57 | raise MethodNotSupportedError() 58 | 59 | def readlines(self, __hint: int = -1) -> list[bytes]: 60 | raise MethodNotSupportedError() 61 | 62 | def seek(self, __offset: int, __whence: int = 0) -> int: 63 | return self.stream.seek(__offset, __whence) 64 | 65 | def seekable(self) -> bool: 66 | return False 67 | 68 | def tell(self) -> int: 69 | return self._max_file_size - self._size_remaining 70 | 71 | def truncate(self, __size: int | None = None) -> int: 72 | raise MethodNotSupportedError() 73 | 74 | def writable(self) -> bool: 75 | return False 76 | 77 | def write(self, __s: bytes) -> int: # type: ignore[override] 78 | raise MethodNotSupportedError() 79 | 80 | def writelines(self, __lines: Iterable[bytes]) -> None: # type: ignore[override] 81 | raise MethodNotSupportedError() 82 | 83 | def __next__(self) -> bytes: 84 | return self.stream.__next__() 85 | 86 | def __iter__(self) -> Iterator[bytes]: 87 | return self.stream.__iter__() 88 | 89 | def __enter__(self) -> BinaryIO: 90 | return self.stream.__enter__() 91 | 92 | def __exit__( 93 | self, __t: Type[BaseException] | None, __value: BaseException | None, __traceback: TracebackType | None 94 | ) -> None: 95 | return self.stream.__exit__(__t, __value, __traceback) 96 | -------------------------------------------------------------------------------- /myhoard/binlog_downloader.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from rohmu import get_transfer 3 | from rohmu.compressor import DecompressSink 4 | from rohmu.encryptor import DecryptSink 5 | 6 | import contextlib 7 | import logging 8 | import os 9 | import time 10 | 11 | 12 | def download_binlog(config, queue_in, queue_out): 13 | logging.basicConfig(level=logging.INFO, format="%(asctime)s\t%(name)s\t%(levelname)s\t%(message)s") 14 | downloader = BinlogDownloader(config, queue_in, queue_out) 15 | downloader.loop() 16 | 17 | 18 | class BinlogDownloader: 19 | def __init__(self, config, queue_in, queue_out): 20 | self.config = config 21 | self.log = logging.getLogger(self.__class__.__name__) 22 | self.queue_in = queue_in 23 | self.queue_out = queue_out 24 | self.rsa_private_key_pem = config["rsa_private_key_pem"].encode("ascii") 25 | self.transfer = None 26 | 27 | def loop(self): 28 | while True: 29 | action = self.queue_in.get() 30 | if not action: 31 | return 32 | start_time = time.monotonic() 33 | exception = None 34 | try: 35 | self.log.info("Starting to download %r", action["remote_key"]) 36 | if self.transfer is None: 37 | self.transfer = get_transfer(self.config["object_storage"]) 38 | # TODO: Monitor progress 39 | with contextlib.suppress(OSError): 40 | os.remove(action["local_file_name"]) 41 | with open(action["local_file_name"], "wb") as output_file: 42 | output_obj = DecompressSink(output_file, action["compression_algorithm"]) 43 | output_obj = DecryptSink(output_obj, action["remote_file_size"], self.rsa_private_key_pem) 44 | self.transfer.get_contents_to_fileobj(action["remote_key"], output_obj) 45 | self.log.info( 46 | "%r successfully saved as %r in %.2f seconds", 47 | action["remote_key"], 48 | action["local_file_name"], 49 | time.monotonic() - start_time, 50 | ) 51 | except Exception as ex: # pylint: disable=broad-except 52 | exception = ex 53 | self.log.exception("An error occurred while handling action") 54 | 55 | # Convert exception to string as it might not be picklable 56 | result = { 57 | **action, 58 | "duration": time.monotonic() - start_time, 59 | "message": str(exception) if exception else None, 60 | "result": "failure" if exception else "success", 61 | } 62 | self.queue_out.put(result) 63 | -------------------------------------------------------------------------------- /myhoard/binlog_scanner.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from .append_only_state_manager import AppendOnlyStateManager 3 | from .state_manager import StateManager 4 | from .util import build_gtid_ranges, GtidRangeDict, read_gtids_from_log 5 | from typing import List, Optional, TypedDict 6 | 7 | import logging 8 | import os 9 | import threading 10 | import time 11 | 12 | 13 | class BinlogInfo(TypedDict): 14 | file_name: str 15 | file_size: int 16 | full_name: str 17 | gtid_ranges: List[GtidRangeDict] 18 | local_index: int 19 | processed_at: float 20 | processing_time: float 21 | server_id: int 22 | 23 | 24 | class BinlogScanner: 25 | """Looks for new (complete) and removed binlog files. Scans any new completed files 26 | for GTID details and maintains disk backed state of all GTID ranges in all complete 27 | binlogs that still exist on disk.""" 28 | 29 | class State(TypedDict): 30 | last_add: float 31 | last_remove: Optional[float] 32 | next_index: int 33 | total_binlog_count: int 34 | total_binlog_size: int 35 | 36 | def __init__(self, *, binlog_prefix: str, server_id: int, state_file, stats): 37 | super().__init__() 38 | binlogs: List[BinlogInfo] = [] 39 | lock = threading.RLock() 40 | self.binlog_prefix = binlog_prefix 41 | binlog_state_name = state_file.replace(".json", "") + ".binlogs" 42 | self.binlog_state = AppendOnlyStateManager[BinlogInfo](entries=binlogs, lock=lock, state_file=binlog_state_name) 43 | self.binlogs = binlogs 44 | # Keep track of binlogs we have in the file listing local binlogs; if persisting the binlogs 45 | # succeeds but persisting other metadata fails we'd end up in bad state without this logic 46 | self.known_local_indexes = {binlog["local_index"] for binlog in self.binlogs} 47 | self.lock = lock 48 | self.log = logging.getLogger(self.__class__.__name__) 49 | self.state: BinlogScanner.State = { 50 | "last_add": time.time(), 51 | "last_remove": None, 52 | "next_index": 1, 53 | "total_binlog_count": 0, 54 | "total_binlog_size": 0, 55 | } 56 | self.state_manager = StateManager[BinlogScanner.State](state=self.state, state_file=state_file) 57 | self.stats = stats 58 | self.server_id = server_id 59 | 60 | @property 61 | def latest_complete_binlog_index(self) -> int: 62 | return self.state["next_index"] - 1 63 | 64 | def scan_new(self, added_callback) -> List[BinlogInfo]: 65 | """Scan for any added binlogs. Passes any found binlogs to the given callback function 66 | before updating internal state.""" 67 | added: List[BinlogInfo] = [] 68 | last_processed_at = time.time() 69 | 70 | next_index: int = self.state["next_index"] 71 | while True: 72 | # We only scan completed files so expect the index following our next 73 | # index to be present as well 74 | if not os.path.exists(self.build_full_name(next_index + 1)): 75 | self.log.debug("Binlog with index %s not yet present", next_index + 1) 76 | break 77 | 78 | full_name = self.build_full_name(next_index) 79 | start_time = time.monotonic() 80 | gtid_ranges = list(build_gtid_ranges(read_gtids_from_log(full_name))) 81 | file_size = os.path.getsize(full_name) 82 | duration = time.monotonic() - start_time 83 | last_processed_at = time.time() 84 | binlog_info: BinlogInfo = { 85 | "file_size": file_size, 86 | "full_name": full_name, 87 | "gtid_ranges": gtid_ranges, 88 | "local_index": next_index, 89 | "file_name": os.path.basename(full_name), 90 | "processed_at": last_processed_at, 91 | "processing_time": duration, 92 | "server_id": self.server_id, 93 | } 94 | added.append(binlog_info) 95 | next_index += 1 96 | self.log.info( 97 | "New binlog %r (%s bytes) in %.2f seconds, found %d GTID ranges: %r", 98 | full_name, 99 | file_size, 100 | duration, 101 | len(gtid_ranges), 102 | gtid_ranges, 103 | ) 104 | 105 | new_size = 0 106 | if added: 107 | if added_callback: 108 | added_callback(added) 109 | new_size = sum(binlog["file_size"] for binlog in added) 110 | with self.lock: 111 | actual_added = [entry for entry in added if entry["local_index"] not in self.known_local_indexes] 112 | self.binlog_state.append_many(actual_added) 113 | self.known_local_indexes.update(entry["local_index"] for entry in actual_added) 114 | self.state_manager.update_state( 115 | last_add=last_processed_at, 116 | next_index=added[-1]["local_index"] + 1, 117 | total_binlog_count=self.state["total_binlog_count"] + len(added), 118 | total_binlog_size=self.state["total_binlog_size"] + new_size, 119 | ) 120 | # Send data points regardless of whether we got any new binlogs 121 | self.stats.gauge_int("myhoard.binlog.count", self.state["total_binlog_count"]) 122 | self.stats.gauge_int("myhoard.binlog.size", self.state["total_binlog_size"]) 123 | # Track new binlog count and sizes separately to make it possible to get rate of how 124 | # fast binlogs are being created; if they're being purged quickly then binlog.count 125 | # and binlog.size may remain relatively unchanged regardless of creation rate 126 | self.stats.increase("myhoard.binlog.count_new", len(added)) 127 | self.stats.increase("myhoard.binlog.size_new", new_size) 128 | return added 129 | 130 | def scan_removed(self, removed_callback): 131 | """Scan for any removed binlogs. Passes any removed binlogs to the given callback function 132 | before updating internal state.""" 133 | removed_size = 0 134 | removed: List[BinlogInfo] = [] 135 | 136 | try: 137 | with self.lock: 138 | for binlog in self.binlogs: 139 | if os.path.exists(binlog["full_name"]): 140 | return removed 141 | 142 | self.log.info("Binlog %r has been removed", binlog["full_name"]) 143 | removed.append(binlog) 144 | removed_size += binlog["file_size"] 145 | return removed 146 | finally: 147 | if removed: 148 | if removed_callback: 149 | removed_callback(removed) 150 | with self.lock: 151 | actual_removed = [ 152 | binlog 153 | for binlog in self.binlogs[: len(removed)] 154 | if binlog["local_index"] in self.known_local_indexes 155 | ] 156 | self.binlog_state.remove_many_from_head(len(actual_removed)) 157 | for binlog in actual_removed: 158 | self.known_local_indexes.discard(binlog["local_index"]) 159 | self.state_manager.update_state( 160 | last_remove=time.time(), 161 | total_binlog_count=self.state["total_binlog_count"] - len(removed), 162 | total_binlog_size=self.state["total_binlog_size"] - removed_size, 163 | ) 164 | self.stats.increase(metric="myhoard.binlog.removed", inc_value=len(removed)) 165 | self.stats.gauge_int("myhoard.binlog.count", self.state["total_binlog_count"]) 166 | self.stats.gauge_int("myhoard.binlog.size", self.state["total_binlog_size"]) 167 | 168 | def build_full_name(self, index): 169 | # Extensions are zero padded to be always at least 6 characters. I.e. file names 170 | # are prefix.000001, prefix.001000, prefix.100000, prefix.1000000, etc 171 | return f"{self.binlog_prefix}.{index:06}" 172 | -------------------------------------------------------------------------------- /myhoard/errors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 Aiven, Helsinki, Finland. https://aiven.io/ 2 | 3 | 4 | class BadRequest(Exception): 5 | pass 6 | 7 | 8 | class XtraBackupError(Exception): 9 | """Raised when the backup operation fails.""" 10 | 11 | 12 | class BlockMismatchError(XtraBackupError): 13 | """Raised when XtraBackup fails due to log block mismatch""" 14 | 15 | 16 | class UnknownBackupSite(Exception): 17 | """Referenced backup site not in configuration.""" 18 | 19 | def __init__(self, backup_site_name, known_backup_sites): 20 | super().__init__() 21 | self.backup_site_name = backup_site_name 22 | self.known_backup_sites = known_backup_sites 23 | 24 | def __str__(self): 25 | return f"backup site {self.backup_site_name} unknown (sites: {self.known_backup_sites!r})" 26 | 27 | 28 | class DiskFullError(Exception): 29 | """Raised when disk is full.""" 30 | -------------------------------------------------------------------------------- /myhoard/myhoard.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from myhoard import version 3 | from myhoard.controller import Controller 4 | from myhoard.statsd import StatsClient 5 | from myhoard.util import ( 6 | DEFAULT_XTRABACKUP_SETTINGS, 7 | detect_running_process_id, 8 | find_extra_xtrabackup_executables, 9 | parse_dow_schedule, 10 | wait_for_port, 11 | ) 12 | from myhoard.web_server import WebServer 13 | 14 | import argparse 15 | import asyncio 16 | import json 17 | import logging 18 | import os 19 | import signal 20 | import subprocess 21 | import sys 22 | 23 | try: 24 | from systemd import daemon # pylint: disable=no-name-in-module 25 | except ImportError: 26 | daemon = None 27 | 28 | 29 | class MyHoard: 30 | def __init__(self, config_file): 31 | self.config = {} 32 | self.config_file = config_file 33 | self.config_reload_pending = True 34 | self.controller = None 35 | self.is_running = True 36 | self.log = logging.getLogger(self.__class__.__name__) 37 | self.loop = asyncio.get_event_loop() 38 | self.mysqld_pid = None 39 | self.reload_retry_interval = 10 40 | self.reloading = False 41 | self.systemd_notified = False 42 | self.web_server = None 43 | 44 | def request_reload(self, _signal=None, _frame=None): 45 | self.log.info("Got SIGHUP signal, marking config reload pending") 46 | asyncio.ensure_future(self._reload_and_initialize_if_possible()) 47 | 48 | def request_shutdown(self, is_signal=True, _frame=None): 49 | if is_signal: 50 | self.log.info("Got SIGINT or SIGTERM signal, shutting down") 51 | self.loop.stop() 52 | 53 | async def _reload_and_initialize_if_possible(self): 54 | if self.controller and not self.controller.is_safe_to_reload(): 55 | self.log.info("Reload requested but controller state does not allow safe reload, postponing") 56 | await asyncio.sleep(self.reload_retry_interval) 57 | asyncio.ensure_future(self._reload_and_initialize_if_possible()) 58 | return 59 | 60 | await self._reload_and_initialize() 61 | 62 | async def _reload_and_initialize(self): 63 | if self.reloading: 64 | self.log.info("Reload called while already reloading configuration") 65 | await asyncio.sleep(0.1) 66 | asyncio.ensure_future(self._reload_and_initialize_if_possible()) 67 | return 68 | 69 | self.reloading = True 70 | try: 71 | await self._stop() 72 | self._load_configuration() 73 | await self._start() 74 | finally: 75 | self.reloading = False 76 | 77 | def run(self): 78 | self.loop.add_signal_handler(signal.SIGHUP, self.request_reload) 79 | self.loop.add_signal_handler(signal.SIGINT, self.request_shutdown) 80 | self.loop.add_signal_handler(signal.SIGTERM, self.request_shutdown) 81 | self.loop.run_until_complete(self._reload_and_initialize()) 82 | 83 | self.loop.run_forever() 84 | self.loop.run_until_complete(self._stop()) 85 | self.log.info("Exiting") 86 | 87 | return 0 88 | 89 | def _load_configuration(self): 90 | with open(self.config_file, "r") as f: 91 | self.config = json.load(f) 92 | 93 | start_command = self.config.get("start_command") 94 | systemd_service = self.config.get("systemd_service") 95 | if start_command and systemd_service: 96 | raise Exception("Only one of 'start_command' and 'systemd_service' must be specified") 97 | if not start_command and not systemd_service: 98 | raise Exception("Either 'start_command' or 'systemd_service' must be specified") 99 | if start_command and not isinstance(start_command, list): 100 | raise Exception("'start_command' must be a list") 101 | 102 | backup_settings = self.config["backup_settings"] 103 | ival = backup_settings["backup_interval_minutes"] 104 | if (ival > 1440 and ival // 1440 * 1440 != ival) or (ival < 1440 and 1440 // ival * ival != 1440): 105 | raise Exception("Backup interval must be 1440, multiple of 1440, or integer divisor of 1440") 106 | 107 | incremental = backup_settings.get("incremental", {}) 108 | if incremental and incremental.get("enabled", False): 109 | dow_schedule = incremental.get("full_backup_week_schedule") 110 | if not dow_schedule: 111 | raise ValueError("Incremental backups require `full_backup_week_schedule`") 112 | parse_dow_schedule(dow_schedule) 113 | 114 | if self.config["http_address"] not in {"127.0.0.1", "::1", "localhost"}: 115 | self.log.warning("Binding to non-localhost address %r is highly discouraged", self.config["http_address"]) 116 | 117 | extra_pxb_bins = find_extra_xtrabackup_executables() 118 | if extra_pxb_bins: 119 | self.log.info("Found extra xtrabackup binaries: %r", extra_pxb_bins) 120 | 121 | self.log.info("Configuration loaded") 122 | 123 | def _notify_systemd(self): 124 | if self.systemd_notified: 125 | return 126 | 127 | if daemon: 128 | daemon.notify("READY=1") 129 | 130 | self.systemd_notified = True 131 | 132 | def _restart_mysqld(self, *, with_binlog, with_gtids): 133 | systemd_service = self.config.get("systemd_service") 134 | if systemd_service: 135 | self._restart_systemd(with_binlog=with_binlog, with_gtids=with_gtids, service=systemd_service) 136 | else: 137 | mysqld_options = [] 138 | if not with_binlog: 139 | mysqld_options.append("--disable-log-bin") 140 | # If config says slave-preserve-commit-order=ON MySQL would refuse to start if binlog is 141 | # disabled. To prevent that from happening ensure preserve commit order is disabled 142 | mysqld_options.append("--skip-slave-preserve-commit-order") 143 | if not with_gtids: 144 | mysqld_options.append("--gtid-mode=OFF") 145 | self._restart_process(mysqld_options=mysqld_options) 146 | 147 | # Ensure the server is accepting connections 148 | params = self.config["mysql"]["client_params"] 149 | wait_for_port(host=params["host"], port=params["port"], timeout=15) 150 | 151 | def _restart_process(self, *, mysqld_options): 152 | # When not using systemd and we haven't started mysqld (during current invocation of the daemon) 153 | # start by determining current pid (if any) of the process so that we can kill it before starting. 154 | if self.mysqld_pid is None: 155 | self.mysqld_pid, dummy_output_bytes = detect_running_process_id(" ".join(self.config["start_command"])) or -1 156 | if self.mysqld_pid and self.mysqld_pid > 0: 157 | self.log.info("Terminating running mysqld process %s", self.mysqld_pid) 158 | os.kill(self.mysqld_pid, signal.SIGTERM) 159 | os.waitpid(self.mysqld_pid, 0) 160 | self.log.info("Process %s exited", self.mysqld_pid) 161 | self.mysqld_pid = -1 162 | 163 | full_command = self.config["start_command"] + mysqld_options 164 | self.log.info("Starting process %r", full_command) 165 | proc = subprocess.Popen( # pylint: disable=consider-using-with 166 | full_command, env={"MYSQLD_OPTS": " ".join(mysqld_options)} 167 | ) 168 | self.mysqld_pid = proc.pid 169 | self.log.info("Process %r started, pid %s", full_command, proc.pid) 170 | 171 | def _restart_systemd(self, with_binlog, with_gtids, service): 172 | self.log.info("Restarting service %r", service) 173 | 174 | command = self.config["systemd_env_update_command"].copy() 175 | command.extend(["-b", "true"] if with_binlog else ["-b", "false"]) 176 | command.extend(["-g", "true"] if with_gtids else ["-g", "false"]) 177 | try: 178 | subprocess.run(command, stderr=subprocess.PIPE, stdout=subprocess.PIPE, check=True) 179 | except subprocess.CalledProcessError as e: 180 | self.log.error( 181 | "Failed to update MySQL config, %r exited with code %s. Output: %r / %r", 182 | command, 183 | e.returncode, 184 | e.output, 185 | e.stderr, 186 | ) 187 | raise Exception(f"Reconfiguring {service!r} failed. Code {e.returncode}") from e 188 | 189 | systemctl = self.config["systemctl_command"] 190 | try: 191 | subprocess.run(systemctl + ["restart", service], stderr=subprocess.PIPE, stdout=subprocess.PIPE, check=True) 192 | except subprocess.CalledProcessError as e: 193 | self.log.error( 194 | "Failed to restart %r, systemctl exited with code %s. Output: %r / %r", 195 | service, 196 | e.returncode, 197 | e.output, 198 | e.stderr, 199 | ) 200 | raise Exception(f"Restarting {service!r} failed. Code {e.returncode}") from e 201 | self.log.info("Restarting %r completed successfully", service) 202 | 203 | async def _start(self): 204 | statsd_config = self.config["statsd"] 205 | statsd = StatsClient( 206 | host=statsd_config["host"], 207 | port=statsd_config["port"], 208 | sentry_dsn=self.config["sentry_dsn"], 209 | tags=statsd_config["tags"], 210 | ) 211 | mysql = self.config["mysql"] 212 | self.controller = Controller( 213 | backup_settings=self.config["backup_settings"], 214 | backup_sites=self.config["backup_sites"], 215 | binlog_purge_settings=self.config["binlog_purge_settings"], 216 | mysql_binlog_prefix=mysql["binlog_prefix"], 217 | mysql_client_params=mysql["client_params"], 218 | mysql_config_file_name=mysql["config_file_name"], 219 | mysql_data_directory=mysql["data_directory"], 220 | mysql_relay_log_index_file=mysql["relay_log_index_file"], 221 | mysql_relay_log_prefix=mysql["relay_log_prefix"], 222 | optimize_tables_before_backup=self.config.get("optimize_tables_before_backup", False), 223 | restart_mysqld_callback=self._restart_mysqld, 224 | restore_max_binlog_bytes=self.config["restore_max_binlog_bytes"], 225 | restore_free_memory_percentage=self.config.get("restore_free_memory_percentage"), 226 | server_id=self.config["server_id"], 227 | state_dir=self.config["state_directory"], 228 | stats=statsd, 229 | temp_dir=self.config["temporary_directory"], 230 | xtrabackup_settings=self.config.get("xtrabackup", DEFAULT_XTRABACKUP_SETTINGS), 231 | auto_mark_backups_broken=self.config.get("restore_auto_mark_backups_broken", False), 232 | ) 233 | self.controller.start() 234 | self.web_server = WebServer( 235 | controller=self.controller, 236 | http_address=self.config["http_address"], 237 | http_port=self.config["http_port"], 238 | stats=statsd, 239 | ) 240 | await self.web_server.start() 241 | self._notify_systemd() 242 | 243 | async def _stop(self): 244 | web_server = self.web_server 245 | self.web_server = None 246 | if web_server: 247 | await web_server.stop() 248 | controller = self.controller 249 | self.controller = None 250 | if controller: 251 | controller.stop() 252 | 253 | 254 | def main(args=None): 255 | if args is None: 256 | args = sys.argv[1:] 257 | 258 | parser = argparse.ArgumentParser(prog="myhoard", description="MySQL backup and restore daemon") 259 | parser.add_argument("--version", action="version", help="show program version", version=version.__version__) 260 | parser.add_argument("--log-level", help="Log level", default="INFO", choices=("ERROR", "WARNING", "INFO", "DEBUG")) 261 | parser.add_argument("--config", help="Configuration file path", default=os.environ.get("MYHOARD_CONFIG")) 262 | arg = parser.parse_args(args) 263 | 264 | if not arg.config: 265 | print("config file path must be given with --config or via env MYHOARD_CONFIG", file=sys.stderr) 266 | return 1 267 | 268 | logging.basicConfig(level=arg.log_level, format="%(asctime)s\t%(threadName)s\t%(name)s\t%(levelname)s\t%(message)s") 269 | 270 | hoard = MyHoard(arg.config) 271 | return hoard.run() 272 | 273 | 274 | if __name__ == "__main__": 275 | sys.exit(main()) 276 | -------------------------------------------------------------------------------- /myhoard/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aiven-Open/myhoard/95d70a8c2ce1cb2b65311dabb212d435d70c7638/myhoard/py.typed -------------------------------------------------------------------------------- /myhoard/state_manager.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from .util import atomic_create_file 3 | from typing import Any, Generic, Mapping, TypeVar 4 | 5 | import errno 6 | import json 7 | import os 8 | import threading 9 | 10 | # TODO: Any should really be JsonType 11 | # https://github.com/python/typing/issues/182 12 | T = TypeVar("T", bound=Mapping[str, Any]) 13 | 14 | 15 | class StateManager(Generic[T]): 16 | """Simple disk backed dict/JSON state manager""" 17 | 18 | state: T 19 | 20 | def __init__(self, *, allow_unknown_keys: bool = False, lock=None, state: T, state_file): 21 | self.allow_unknown_keys = allow_unknown_keys 22 | self.lock = lock or threading.RLock() 23 | self.state = state 24 | 25 | # Check that the state_file directory actually exists before initializing. If it doesn't 26 | # then we'll just crash out later when we try to write to it. We'd prefer to crash here, 27 | # where we're more likely to find the problem higher up the stack. 28 | state_file_dirname = os.path.dirname(state_file) 29 | if not os.path.isdir(state_file_dirname): 30 | raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), state_file_dirname) 31 | 32 | self.state_file = state_file 33 | self.read_state() 34 | 35 | def delete_state(self) -> None: 36 | if os.path.exists(self.state_file): 37 | os.remove(self.state_file) 38 | 39 | def increment_counter(self, *, name: str, increment: int = 1) -> None: 40 | with self.lock: 41 | assert name in self.state 42 | self.state[name] = self.state[name] + increment # type: ignore 43 | self.write_state() 44 | 45 | def read_state(self) -> None: 46 | if os.path.exists(self.state_file): 47 | with open(self.state_file, "r") as f: 48 | self.state.clear() # type: ignore 49 | self.state.update(**json.load(f)) # type: ignore 50 | else: 51 | self.write_state() 52 | 53 | def update_state(self, **kwargs) -> None: 54 | with self.lock: 55 | changes = False 56 | for name, value in kwargs.items(): 57 | if not self.allow_unknown_keys: 58 | assert name in self.state 59 | if self.state.get(name) != value: 60 | self.state[name] = value # type: ignore 61 | changes = True 62 | if changes: 63 | self.write_state() 64 | 65 | def write_state(self) -> None: 66 | with atomic_create_file(self.state_file) as f: 67 | json.dump(self.state, f) 68 | -------------------------------------------------------------------------------- /myhoard/statsd.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 Aiven, Helsinki, Finland. https://aiven.io/ 2 | """ 3 | myhoard - statsd 4 | 5 | Supports Telegraf's statsd protocol extension for 'key=value' tags: 6 | 7 | https://github.com/influxdata/telegraf/tree/master/plugins/inputs/statsd 8 | """ 9 | from contextlib import contextmanager 10 | from copy import copy 11 | from types import ModuleType 12 | from typing import Any, Dict, Optional, Union 13 | 14 | import datetime 15 | import enum 16 | import logging 17 | import os 18 | import socket 19 | import time 20 | 21 | 22 | class StatsClient: 23 | def __init__( 24 | self, 25 | *, 26 | host: Optional[str], 27 | port: int = 8125, 28 | sentry_dsn: Optional[str] = None, 29 | tags: Optional[Dict[str, str]] = None, 30 | ): 31 | self.log = logging.getLogger("StatsClient") 32 | 33 | tags = tags or {} 34 | sentry_tags = copy(tags) 35 | self.sentry_config: Dict[str, Any] = { 36 | "dsn": sentry_dsn or None, 37 | "hostname": os.environ.get("HOSTNAME") or None, 38 | "tags": sentry_tags, 39 | "ignore_exceptions": [], 40 | } 41 | 42 | self.sentry: Optional[ModuleType] = None 43 | self._initialize_sentry() 44 | 45 | self._dest_addr = (host, port) 46 | self._socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 47 | self.tags = tags 48 | 49 | @contextmanager 50 | def timing_manager(self, metric, tags=None): 51 | start_time = time.monotonic() 52 | tags = (tags or {}).copy() 53 | try: 54 | yield 55 | except: # noqa pylint: disable=broad-except,bare-except 56 | tags["success"] = "0" 57 | self.timing(metric, time.monotonic() - start_time, tags) 58 | raise 59 | else: 60 | tags["success"] = "1" 61 | self.timing(metric, time.monotonic() - start_time, tags) 62 | 63 | def update_sentry_config(self, config): 64 | new_config = self.sentry_config.copy() 65 | new_config.update(config) 66 | if new_config == self.sentry_config: 67 | return 68 | 69 | self.sentry_config = new_config 70 | self._initialize_sentry() 71 | 72 | def gauge_timedelta(self, metric: str, value: datetime.timedelta, *, tags=None) -> None: 73 | self._send(metric, b"g", value.total_seconds(), tags) 74 | 75 | def gauge_float(self, metric: str, value: Union[float, int], *, tags=None) -> None: 76 | self._send(metric, b"g", float(value), tags) 77 | 78 | def gauge_int(self, metric: str, value: int, *, tags=None) -> None: 79 | if not isinstance(value, int): 80 | raise ValueError(f"Invalid int value for {metric}: {value!r}") 81 | self._send(metric, b"g", int(value), tags) 82 | 83 | def increase(self, metric: str, inc_value: int = 1, tags=None) -> None: 84 | self._send(metric, b"c", inc_value, tags) 85 | 86 | def timing(self, metric: str, value: Union[float, int, datetime.timedelta], tags=None) -> None: 87 | if isinstance(value, datetime.timedelta): 88 | value = value.total_seconds() 89 | value = float(value) 90 | self._send(metric, b"ms", value, tags) 91 | 92 | def unexpected_exception(self, *, ex, where, tags=None, elapsed=None): 93 | all_tags = { 94 | "exception": ex.__class__.__name__, 95 | "where": where, 96 | } 97 | all_tags.update(tags or {}) 98 | self.increase("exception", tags=all_tags) 99 | 100 | if not self.sentry: 101 | return 102 | 103 | sentry_tags = {**(tags or {}), "where": where} 104 | 105 | with self.sentry.push_scope() as scope: 106 | for key, value in sentry_tags.items(): 107 | scope.set_tag(key, value) 108 | 109 | if elapsed: 110 | scope.set_extra("time_spent", elapsed) 111 | 112 | if getattr(ex, "sentry_fingerprint", None): 113 | # "{{ default }}" is a special tag sentry replaces with default fingerprint. 114 | # Only set sentry_fingerprint if you are sure automatic grouping in Sentry is failing. Don't add items 115 | # like service_id, unless there is a very good reason to have separate Sentry issues for each service. 116 | scope.fingerprint = ["{{ default }}", ex.sentry_fingerprint] 117 | 118 | self.sentry.capture_exception(ex) 119 | 120 | def _initialize_sentry(self) -> None: 121 | if not self.sentry_config.get("dsn"): 122 | self.sentry = None 123 | return 124 | 125 | try: 126 | from sentry_sdk.integrations.logging import LoggingIntegration 127 | 128 | import sentry_sdk 129 | 130 | sentry_logging = LoggingIntegration( 131 | level=logging.INFO, 132 | event_level=logging.CRITICAL, 133 | ) 134 | 135 | sentry_sdk.init(dsn=self.sentry_config["dsn"], integrations=[sentry_logging]) 136 | with sentry_sdk.configure_scope() as scope: 137 | scope.set_extra("hostname", self.sentry_config.get("hostname")) 138 | for key, value in self.sentry_config.get("tags", {}).items(): 139 | scope.set_tag(key, value) 140 | 141 | self.sentry = sentry_sdk 142 | except ImportError: 143 | self.sentry = None 144 | self.log.warning("Cannot enable Sentry.io sending: importing 'sentry_sdk' failed") 145 | 146 | def _send(self, metric: str, metric_type, value, tags): 147 | try: 148 | # format: "user.logins,service=payroll,region=us-west:1|c" 149 | parts = [metric.encode("utf-8"), b":", str(value).encode("utf-8"), b"|", metric_type] 150 | send_tags = self.tags.copy() 151 | send_tags.update(tags or {}) 152 | for tag, tag_value in sorted(send_tags.items()): 153 | if isinstance(tag_value, enum.Enum): 154 | tag_value = tag_value.value 155 | if tag_value is None: 156 | tag_value = "" 157 | elif isinstance(tag_value, datetime.datetime): 158 | if tag_value.tzinfo: 159 | tag_value = tag_value.astimezone(datetime.timezone.utc).replace(tzinfo=None) 160 | tag_value = tag_value.isoformat()[:19].replace("-", "").replace(":", "") + "Z" 161 | elif isinstance(tag_value, datetime.timedelta): 162 | tag_value = f"{int(tag_value.total_seconds())}s" 163 | elif not isinstance(tag_value, str): 164 | tag_value = str(tag_value) 165 | if " " in tag_value or ":" in tag_value or "|" in tag_value or "=" in tag_value: 166 | tag_value = "INVALID" 167 | parts.insert(1, f",{tag}={tag_value}".encode("utf-8")) 168 | 169 | if None not in self._dest_addr: 170 | self._socket.sendto(b"".join(parts), self._dest_addr) 171 | except Exception: # pylint: disable=broad-except,bare-except 172 | self.log.exception( 173 | "Unexpected exception in statsd send: metric=%r, metric_type=%r, value=%r, tags=%r, _dest_addr=%r", 174 | metric, 175 | metric_type, 176 | value, 177 | tags, 178 | self._dest_addr, 179 | ) 180 | 181 | def close(self): 182 | self._socket.close() 183 | -------------------------------------------------------------------------------- /myhoard/table.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 23 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from typing import Any, Mapping 3 | 4 | import dataclasses 5 | 6 | 7 | @dataclasses.dataclass(frozen=True) 8 | class Table: 9 | table_schema: str 10 | table_name: str 11 | table_rows: int 12 | avg_row_length: int 13 | 14 | @classmethod 15 | def from_row(cls, row: Mapping[str, Any]) -> "Table": 16 | return Table( 17 | table_schema=row["TABLE_SCHEMA"], 18 | table_name=row["TABLE_NAME"], 19 | table_rows=row["TABLE_ROWS"], 20 | avg_row_length=row["AVG_ROW_LENGTH"], 21 | ) 22 | 23 | def estimated_size_bytes(self) -> int: 24 | return self.table_rows * self.avg_row_length 25 | 26 | def escaped_designator(self) -> str: 27 | escaped_table_schema = escape_identifier(self.table_schema) 28 | escaped_table_name = escape_identifier(self.table_name) 29 | return f"{escaped_table_schema}.{escaped_table_name}" 30 | 31 | 32 | def escape_identifier(name: str) -> str: 33 | return "`" + name.replace("`", "``") + "`" 34 | -------------------------------------------------------------------------------- /myhoard/update_mysql_environment.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from myhoard.util import atomic_create_file 3 | 4 | import argparse 5 | import logging 6 | import sys 7 | 8 | 9 | class EnvironmentUpdater: 10 | """Updates given environment variable in given systemd environment variable file. 11 | Implemented as separate command to allow executing as root via sudo""" 12 | 13 | def __init__(self, args): 14 | self.args = args 15 | self.log = logging.getLogger(self.__class__.__name__) 16 | 17 | def update(self): 18 | # make sure we only update the parameter we need to update i.e., MYSQLD_OPTS 19 | key = "MYSQLD_OPTS" # we only update this environment variable 20 | options = [] 21 | if self.args.with_bin_log != "true": 22 | options.append("--disable-log-bin") 23 | # If config says slave-preserve-commit-order=ON MySQL would refuse to start if binlog is 24 | # disabled. To prevent that from happening ensure preserve commit order is disabled 25 | options.append("--skip-slave-preserve-commit-order") 26 | if self.args.gtid_mode != "true": 27 | options.append("--gtid-mode=OFF") 28 | try: 29 | with open(self.args.env_file, "r") as f: 30 | contents = [line.rstrip("\n") for line in f.readlines() if line.strip() and not line.startswith(key)] 31 | except FileNotFoundError: 32 | contents = [] 33 | value = " ".join(options) 34 | if value: 35 | contents.append(f"{key}={value}") 36 | with atomic_create_file(self.args.env_file) as f: 37 | f.write("\n".join(contents) + "\n") 38 | 39 | 40 | def main(): 41 | logging.basicConfig(level=logging.INFO) 42 | parser = argparse.ArgumentParser(description="Aiven MySQL environment updater") 43 | parser.add_argument("-f", dest="env_file", metavar="FILE", help="The Environment file to be updated") 44 | parser.add_argument("-b", dest="with_bin_log", choices=["true", "false"], help="Flag to enable bin log or not") 45 | parser.add_argument("-g", dest="gtid_mode", choices=["true", "false"], help="Flag to turn GTID mode on or off") 46 | args = parser.parse_args() 47 | EnvironmentUpdater(args).update() 48 | return 0 49 | 50 | 51 | if __name__ == "__main__": 52 | sys.exit(main()) 53 | -------------------------------------------------------------------------------- /myhoard/web_server.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from aiohttp import web 3 | from aiohttp.web_response import json_response 4 | from datetime import datetime, timezone 5 | from myhoard.backup_stream import BackupStream 6 | from myhoard.controller import Controller, IncrementalBackupInfo 7 | from myhoard.errors import BadRequest 8 | 9 | import asyncio 10 | import contextlib 11 | import enum 12 | import json 13 | import logging 14 | import time 15 | import uuid 16 | 17 | 18 | class WebServer: 19 | """Provide an API to list available backups, request state changes, observe current state and obtain metrics""" 20 | 21 | @enum.unique 22 | class BackupType(str, enum.Enum): 23 | basebackup = "basebackup" 24 | binlog = "binlog" 25 | 26 | def __init__(self, *, controller, http_address="127.0.0.1", http_port, stats): 27 | super().__init__() 28 | self.app = web.Application() 29 | logging.getLogger("aiohttp.access").setLevel(logging.WARNING) 30 | self.controller = controller 31 | self.http_address = http_address 32 | self.http_port = http_port 33 | self.log = logging.getLogger(self.__class__.__name__) 34 | self.runner = None 35 | self.site = None 36 | self.stats = stats 37 | self._add_routes() 38 | 39 | async def backup_create(self, request): 40 | """Creates new basebackup or ensures latest binlog is backed up depending on parameters""" 41 | with self._handle_request(name="backup_create"): 42 | body = await self._get_request_json(request) 43 | log_index = None 44 | backup_type = body.get("backup_type") 45 | incremental = body.get("incremental", False) 46 | wait_for_upload = body.get("wait_for_upload") 47 | with self.controller.lock: 48 | if backup_type == self.BackupType.basebackup: 49 | if wait_for_upload: 50 | raise BadRequest("wait_for_upload currently not supported for basebackup") 51 | incremental_backup_info: IncrementalBackupInfo | None = None 52 | if incremental: 53 | incremental_backup_info = self.controller.get_incremental_backup_info() 54 | if not incremental_backup_info: 55 | self.log.warning("Can't schedule incremental backup, proceeding with full backup") 56 | self.controller.mark_backup_requested( 57 | backup_reason=BackupStream.BackupReason.requested, incremental_backup_info=incremental_backup_info 58 | ) 59 | elif backup_type == self.BackupType.binlog: 60 | log_index = self.controller.rotate_and_back_up_binlog() 61 | else: 62 | raise BadRequest("`backup_type` must be set to `basebackup` or `binlog` in request body") 63 | 64 | if log_index is not None and wait_for_upload: 65 | self.log.info("Waiting up to %.1f seconds for upload of %r to complete", wait_for_upload, log_index) 66 | start = time.monotonic() 67 | while True: 68 | if self.controller.is_log_backed_up(log_index=log_index): 69 | self.log.info("Log %r was backed up in %.1f seconds", log_index, time.monotonic() - start) 70 | break 71 | elapsed = time.monotonic() - start 72 | if elapsed > wait_for_upload: 73 | self.log.info("Log %r was not backed up in %.1f seconds", log_index, elapsed) 74 | break 75 | wait_time = min(wait_for_upload - elapsed, 0.1) 76 | await asyncio.sleep(wait_time) 77 | 78 | return json_response({"success": True}) 79 | 80 | async def backup_list(self, _request): 81 | with self._handle_request(name="backup_list"): 82 | order = _request.rel_url.query.get("order") 83 | response = { 84 | "backups": None, 85 | } 86 | with self.controller.lock: 87 | if self.controller.state["backups_fetched_at"]: 88 | if order is None: 89 | response["backups"] = self.controller.state["backups"] 90 | else: 91 | response["backups"] = sorted( 92 | self.controller.state["backups"], key=lambda b: b["stream_id"], reverse=order.lower() != "asc" 93 | ) 94 | 95 | return json_response(response) 96 | 97 | async def backup_preserve(self, request): 98 | with self._handle_request(name="backup_preserve"): 99 | stream_id = request.match_info["stream_id"] 100 | body = await self._get_request_json(request) 101 | preserve_until = body.get("preserve_until") 102 | 103 | if preserve_until is not None: 104 | try: 105 | preserve_until = datetime.fromisoformat(preserve_until) 106 | if preserve_until.tzinfo != timezone.utc: 107 | raise BadRequest("`preserve_until` must be in UTC timezone.") 108 | 109 | now = datetime.now(timezone.utc) 110 | if preserve_until < now: 111 | raise BadRequest("`preserve_until` must be a date in the future.") 112 | except ValueError: 113 | raise BadRequest("`preserve_until` must be a valid isoformat datetime string.") 114 | 115 | self.controller.mark_backup_preservation(stream_id=stream_id, preserve_until=preserve_until) 116 | wait_for_applied_preservation = body.get("wait_for_applied_preservation") 117 | if wait_for_applied_preservation: 118 | self.log.info( 119 | "Waiting up to %.1f seconds for preservation of backup %s to be applied.", 120 | wait_for_applied_preservation, 121 | stream_id, 122 | ) 123 | start = time.monotonic() 124 | while True: 125 | backup = self.controller.get_backup_by_stream_id(stream_id) 126 | # the backup was or will be removed before preservation could be applied 127 | if not backup or stream_id == self.controller.state["stream_to_be_purged"]: 128 | if preserve_until: 129 | return json_response({"success": False}) 130 | # preservation was removed on time 131 | return json_response({"success": True}) 132 | 133 | if (backup["preserve_until"] is None and preserve_until is None) or ( 134 | backup["preserve_until"] == preserve_until.isoformat() 135 | ): 136 | self.log.info("Preservation for backup %s was applied.", stream_id) 137 | break 138 | 139 | elapsed = time.monotonic() - start 140 | if elapsed > wait_for_applied_preservation: 141 | self.log.info( 142 | "Preservation for backup %s was not applied up in %.1f seconds", 143 | stream_id, 144 | elapsed, 145 | ) 146 | # waiting time was exceeded 147 | return json_response({"success": False, "preservation_is_still_pending": True}) 148 | 149 | wait_time = min(wait_for_applied_preservation - elapsed, 0.1) 150 | await asyncio.sleep(wait_time) 151 | 152 | return json_response({"success": True}) 153 | 154 | async def replication_state_set(self, request): 155 | with self._handle_request(name="replication_state_set"): 156 | state = await self._get_request_json(request) 157 | self.validate_replication_state(state) 158 | self.controller.state_manager.update_state(replication_state=state) 159 | return json_response(state) 160 | 161 | async def restore_status_show(self, _request): 162 | with self._handle_request(name="restore_status_show"): 163 | if self.controller.mode != Controller.Mode.restore: 164 | raise BadRequest(f"Mode is {self.controller.mode}, restore status is not available") 165 | 166 | # If restore was just requested or our state was reloaded there might not have 167 | # been time to create the restore coordinator so wait a bit for that to become 168 | # available 169 | start_time = time.monotonic() 170 | coordinator = self.controller.restore_coordinator 171 | while time.monotonic() - start_time < 2 and not coordinator: 172 | await asyncio.sleep(0.05) 173 | coordinator = self.controller.restore_coordinator 174 | 175 | if not coordinator: 176 | if self.controller.mode != Controller.Mode.restore: 177 | raise BadRequest(f"Mode is {self.controller.mode}, restore status is not available") 178 | raise Exception("Restore coordinator is not available even though state is 'restore'") 179 | 180 | with coordinator.state_manager.lock: 181 | response = { 182 | "basebackup_compressed_bytes_downloaded": coordinator.basebackup_bytes_downloaded, 183 | "basebackup_compressed_bytes_total": coordinator.basebackup_bytes_total, 184 | "binlogs_being_restored": coordinator.binlogs_being_restored, 185 | "binlogs_pending": coordinator.binlogs_pending, 186 | "binlogs_restored": coordinator.binlogs_restored, 187 | "phase": coordinator.phase, 188 | } 189 | return json_response(response) 190 | 191 | async def status_show(self, _request): 192 | with self._handle_request(name="status_show"): 193 | return json_response({"mode": self.controller.mode}) 194 | 195 | async def status_update(self, request): 196 | with self._handle_request(name="status_update"): 197 | body = await self._get_request_json(request) 198 | body_mode = body.get("mode") 199 | if body_mode == Controller.Mode.active: 200 | force = body.get("force") 201 | if not isinstance(force, bool): 202 | force = False 203 | if force: 204 | self.log.info("Switch to active mode with force flag requested") 205 | self.controller.switch_to_active_mode(force=force) 206 | elif body_mode == Controller.Mode.observe: 207 | self.controller.switch_to_observe_mode() 208 | elif body_mode == Controller.Mode.restore: 209 | if not isinstance(body.get("rebuild_tables"), (bool, type(None))): 210 | raise BadRequest("Field 'rebuild_tables' must be a boolean when present") 211 | for key in ["site", "stream_id"]: 212 | if not isinstance(body.get(key), str): 213 | raise BadRequest(f"Field {key!r} must be given and a string") 214 | if not isinstance(body.get("target_time"), (int, type(None))): 215 | raise BadRequest("Field 'target_time' must be an integer when present") 216 | if not isinstance(body.get("target_time_approximate_ok"), (bool, type(None))): 217 | raise BadRequest("Field 'target_time_approximate_ok' must be a boolean when present") 218 | self.controller.restore_backup( 219 | rebuild_tables=False if body.get("rebuild_tables") is None else body.get("rebuild_tables"), 220 | site=body["site"], 221 | stream_id=body["stream_id"], 222 | target_time=body.get("target_time"), 223 | target_time_approximate_ok=body.get("target_time_approximate_ok"), 224 | ) 225 | else: 226 | raise BadRequest(f"Unexpected value {body_mode!r} for field 'mode'") 227 | 228 | return json_response({"mode": self.controller.mode}) 229 | 230 | @contextlib.contextmanager 231 | def _handle_request(self, *, name): 232 | with self._convert_exception_to_bad_request(method_name=name): 233 | with self.stats.timing_manager(f"myhoard.http.{name}"): 234 | yield 235 | 236 | @contextlib.contextmanager 237 | def _convert_exception_to_bad_request(self, *, method_name): 238 | try: 239 | yield 240 | except (BadRequest, ValueError) as ex: 241 | raise web.HTTPBadRequest(content_type="application/json", text=json.dumps({"message": str(ex)})) 242 | except Exception as ex: # pylint: disable=broad-except 243 | self.log.exception("Exception while handling request %r", method_name) 244 | self.stats.unexpected_exception(ex=ex, where=method_name) 245 | raise web.HTTPInternalServerError(content_type="application/json", text=json.dumps({"message": str(ex)})) 246 | 247 | async def _get_request_json(self, request): 248 | try: 249 | body = json.loads(await request.text()) 250 | except Exception as ex: # pylint= disable=broad-except 251 | raise BadRequest(f"Failed to deserialize request body as JSON: {str(ex)}") 252 | if not isinstance(body, dict): 253 | raise BadRequest("Request body must be JSON object") 254 | return body 255 | 256 | async def start(self): 257 | self.runner = web.AppRunner(self.app) 258 | await self.runner.setup() 259 | self.site = web.TCPSite(self.runner, self.http_address, self.http_port) 260 | await self.site.start() 261 | self.log.info("Web server running") 262 | 263 | async def stop(self): 264 | if not self.site: 265 | return 266 | assert self.runner is not None 267 | self.log.info("Stopping web server") 268 | await self.runner.cleanup() 269 | self.log.info("Web server stopped") 270 | self.site = None 271 | 272 | def _add_routes(self): 273 | self.app.add_routes( 274 | [ 275 | web.get("/backup", self.backup_list), 276 | web.post("/backup", self.backup_create), 277 | web.put("/backup/{stream_id}/preserve", self.backup_preserve), 278 | web.put("/replication_state", self.replication_state_set), 279 | web.get("/status", self.status_show), 280 | web.put("/status", self.status_update), 281 | web.get("/status/restore", self.restore_status_show), 282 | ] 283 | ) 284 | 285 | @classmethod 286 | def validate_replication_state(cls, state): 287 | """Validates that given state value matches the format returned by parse_gtid_range_string""" 288 | if not isinstance(state, dict): 289 | raise BadRequest("Replication state must be name => object mapping") 290 | for gtids in state.values(): 291 | if not isinstance(gtids, dict): 292 | raise BadRequest("Replication state objects must be uuid => object mappings") 293 | for maybe_uuid, ranges in gtids.items(): 294 | try: 295 | uuid.UUID(maybe_uuid) 296 | except Exception: # pylint: disable=broad-except 297 | raise BadRequest("Replication state objects must be uuid => object mappings") 298 | if not isinstance(ranges, list): 299 | raise BadRequest("Individual values must be uuid => [[start1, end1], ...] mappings") 300 | for rng in ranges: 301 | if not isinstance(rng, list) or len(rng) != 2: 302 | raise BadRequest("List entries must be 2 element ([start, end]) lists") 303 | for start_end in rng: 304 | if not isinstance(start_end, int): 305 | raise BadRequest("Range start/end values must be integers") 306 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | ignore_missing_imports = False 3 | 4 | [mypy-httplib2] 5 | ignore_missing_imports = True 6 | 7 | [mypy-rohmu.*] 8 | ignore_missing_imports = True 9 | 10 | [mypy-socks] 11 | ignore_missing_imports = True 12 | 13 | [mypy-systemd] 14 | ignore_missing_imports = True 15 | check_untyped_defs = True 16 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling", "hatch-vcs"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "myhoard" 7 | authors = [ 8 | { name="Aiven", email="opensource@aiven.io" }, 9 | { name="Rauli Ikonen", email="rauli@aiven.io"}, 10 | ] 11 | description = "MyHoard is a daemon for creating, managing and restoring MySQL backups." 12 | readme = "README.md" 13 | requires-python = ">=3.10" 14 | classifiers=[ 15 | "Development Status :: 5 - Production/Stable", 16 | "Intended Audience :: Developers", 17 | "Intended Audience :: Information Technology", 18 | "Intended Audience :: System Administrators", 19 | "License :: OSI Approved :: Apache Software License", 20 | "Programming Language :: Python :: 3.10", 21 | "Programming Language :: Python :: 3.11", 22 | "Programming Language :: Python :: 3.12", 23 | "Topic :: Database :: Database Engines/Servers", 24 | "Topic :: Software Development :: Libraries", 25 | ] 26 | license = { text = "Apache License 2.0" } 27 | dynamic = ["version"] 28 | dependencies = [ 29 | "aiohttp", 30 | "cryptography >= 0.8", 31 | "httplib2", 32 | "PyMySQL >= 0.9.2", 33 | "PySocks", 34 | # rohmu is incompatible with latest version snappy 0.7.1 35 | "python-snappy == 0.6.1", 36 | "rohmu >= 1.1.2", 37 | "sentry-sdk >= 1.14.0", 38 | "packaging", 39 | ] 40 | 41 | [project.optional-dependencies] 42 | dev = [ 43 | "black==22.3.0", 44 | "coverage", 45 | "coveralls", 46 | "flake8", 47 | "isort==5.12.0", 48 | "mock", 49 | "mypy", 50 | "pre-commit", 51 | "pylint-quotes", 52 | "pylint>=2.4.3", 53 | "pytest", 54 | "pytest-cov", 55 | "pytest-mock", 56 | "pytest-timeout", 57 | "pytest-xdist", 58 | "responses", 59 | "time-machine", 60 | "types-PyMySQL", 61 | "types-requests", 62 | ] 63 | 64 | [project.urls] 65 | "Homepage" = "https://github.com/Aiven-Open/myhoard/" 66 | "Bug Tracker" = "https://github.com/Aiven-Open/myhoard/issues" 67 | 68 | [project.scripts] 69 | myhoard = "myhoard.myhoard:main" 70 | myhoard_mysql_env_update = "myhoard.update_mysql_environment:main" 71 | 72 | [tool.hatch.version] 73 | source = "vcs" 74 | 75 | [tool.hatch.build.hooks.vcs] 76 | version-file = "myhoard/version.py" 77 | 78 | [tool.black] 79 | line-length = 125 80 | 81 | [tool.isort] 82 | no_sections = true 83 | force_alphabetical_sort = true 84 | combine_as_imports = true 85 | profile = "black" 86 | skip_gitignore = true 87 | line_length = 125 88 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | markers = 3 | all 4 | unittest 5 | filterwarnings = 6 | ignore:\((1007|1050|1753|1759|1760|3084|3129):pymysql.Warning 7 | -------------------------------------------------------------------------------- /scripts/build-setup-specific-test-image: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | [ -n "${PYTHON_VERSION}" ] || { echo "outside env must define PYTHON_VERSION"; exit 1; } 3 | [ -n "${MYSQL_VERSION}" ] || { echo "outside env must define MYSQL_VERSION"; exit 1; } 4 | [ -n "${PERCONA_VERSION}" ] || { echo "outside env must define PERCONA_VERSION"; exit 1; } 5 | 6 | trap "rm -f Dockerfile.myhoard-test-temp" EXIT 7 | cat > Dockerfile.myhoard-test-temp < /etc/sudoers.d/sudonopasswd 6 | chmod 440 /etc/sudoers.d/sudonopasswd 7 | chown -R testme:testme . 8 | -------------------------------------------------------------------------------- /scripts/install-mysql-packages: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | [ -n "$1" ] || { echo "must pass mysql version"; exit 1; } 3 | MYSQL_PACKAGE_VERSION="$1" 4 | export DEBIAN_FRONTEND="noninteractive" 5 | 6 | sudo debconf-set-selections <<< 'mysql-community-server mysql-community-server/re-root-pass password ROOTPASSWORD' 7 | sudo debconf-set-selections <<< 'mysql-community-server mysql-community-server/root-pass password ROOTPASSWORD' 8 | sudo debconf-set-selections <<< 'mysql-community-server mysql-server/default-auth-override select Use Strong Password Encryption (RECOMMENDED)' 9 | 10 | rm -f mysql-*.deb 11 | 12 | VENDOR="$(dpkg-vendor --query Vendor)" 13 | if [[ "${VENDOR}" = Ubuntu ]]; then 14 | DIST="ubuntu" 15 | VERSION_ID=$"$(lsb_release -r -s)" 16 | elif [[ "${VENDOR}" = "Debian" ]]; then 17 | DIST="debian" 18 | VERSION_ID=$"$(lsb_release -r -s)" 19 | if [[ "${VERSION_ID}" = "n/a" ]]; then 20 | # Fall back to latest stable version on sid/testing 21 | VERSION_ID="$(distro-info -r --stable)" 22 | fi 23 | else 24 | echo "Unknown vendor ${VENDOR}" 25 | exit 1 26 | fi 27 | 28 | export mysql_debs=( 29 | mysql-community-client-plugins_${MYSQL_PACKAGE_VERSION}-1${DIST}${VERSION_ID}_amd64.deb 30 | mysql-common_${MYSQL_PACKAGE_VERSION}-1${DIST}${VERSION_ID}_amd64.deb 31 | mysql-community-client-core_${MYSQL_PACKAGE_VERSION}-1${DIST}${VERSION_ID}_amd64.deb 32 | mysql-community-client_${MYSQL_PACKAGE_VERSION}-1${DIST}${VERSION_ID}_amd64.deb 33 | mysql-client_${MYSQL_PACKAGE_VERSION}-1${DIST}${VERSION_ID}_amd64.deb 34 | mysql-community-server-core_${MYSQL_PACKAGE_VERSION}-1${DIST}${VERSION_ID}_amd64.deb 35 | mysql-community-server_${MYSQL_PACKAGE_VERSION}-1${DIST}${VERSION_ID}_amd64.deb 36 | mysql-server_${MYSQL_PACKAGE_VERSION}-1${DIST}${VERSION_ID}_amd64.deb 37 | ) 38 | 39 | for package in "${mysql_debs[@]}"; do wget "https://repo.mysql.com/apt/${DIST}/pool/mysql-8.0/m/mysql-community/${package}"; done 40 | set +e 41 | for package in "${mysql_debs[@]}"; do sudo dpkg -i "$package" ; done 42 | set -e 43 | sudo apt-get -f -y install 44 | -------------------------------------------------------------------------------- /scripts/install-percona-package: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | [ -n "$1" ] || { echo "must pass percona package version"; exit 1; } 3 | PERCONA_PACKAGE_VERSION="$1" 4 | export DEBIAN_FRONTEND="noninteractive" 5 | sudo percona-release setup pxb-80 6 | sudo apt-get install -q -y --allow-unauthenticated -o Dpkg::Options::=--force-confnew percona-xtrabackup-80="${PERCONA_PACKAGE_VERSION}" 7 | mysqld --version 8 | -------------------------------------------------------------------------------- /scripts/install-python-deps: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | python -m pip install . 3 | python -m pip install '.[dev]' 4 | -------------------------------------------------------------------------------- /scripts/install-python-version: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | [ -n "$1" ] || { echo "must pass python version"; exit 1; } 3 | 4 | PYTHON_VERSION="$1" 5 | export DEBIAN_FRONTEND="noninteractive" 6 | apt-get update 7 | apt-get -y install python${PYTHON_VERSION}-full python${PYTHON_VERSION}-dev 8 | apt-get remove --purge -y python3-pip 9 | python${PYTHON_VERSION} < <(curl -s https://bootstrap.pypa.io/get-pip.py) 10 | if [[ "$2" == "--set-python-version" ]] 11 | then 12 | echo "PYTHON_VERSION=${PYTHON_VERSION}" >> /etc/PYTHON_VERSION 13 | fi 14 | -------------------------------------------------------------------------------- /scripts/pytest-inside: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | # this is designed to be launched INSIDE the container to run tests 3 | source /etc/PYTHON_VERSION 4 | mkdir /testdir 5 | cd /testdir 6 | rsync -a /src . 7 | chown -R testme:testme src 8 | cd src 9 | sudo -u testme python${PYTHON_VERSION} -m pytest "$@" 10 | -------------------------------------------------------------------------------- /scripts/remove-default-mysql: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | export DEBIAN_FRONTEND="noninteractive" 3 | sudo debconf-set-selections <<< 'mysql-community-server mysql-community-server/remove-data-dir boolean true' 4 | sudo apt purge -y mysql-server mysql-client mysql-common mysql-server-core-* mysql-client-core-* || echo "MySQL was not installed; proceeding." 5 | sudo rm -rf /var/lib/mysql 6 | -------------------------------------------------------------------------------- /scripts/setup-percona-repo: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | export VERSION_NAME=${VERSION_NAME:-"$(lsb_release -sc)"} 3 | export DEBIAN_FRONTEND="noninteractive" 4 | wget https://repo.percona.com/apt/percona-release_latest."${VERSION_NAME}"_all.deb 5 | set +e 6 | sudo dpkg -i percona-release_latest."${VERSION_NAME}"_all.deb 7 | set -e 8 | sudo apt-get -f -y install 9 | sudo /usr/bin/percona-release enable-only tools 10 | sudo apt update 11 | -------------------------------------------------------------------------------- /scripts/test-inside: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | # this is designed to be launched INSIDE the container to run tests 3 | mkdir /testdir 4 | cd /testdir 5 | rsync -a /src . 6 | chown -R testme:testme src 7 | cd src 8 | sudo -u testme make coverage 9 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from cryptography.hazmat.backends import default_backend 3 | from cryptography.hazmat.primitives import serialization 4 | from cryptography.hazmat.primitives.asymmetric import rsa 5 | from myhoard.backup_stream import BackupStream 6 | from myhoard.controller import BackupSiteInfo, Controller 7 | from myhoard.statsd import StatsClient 8 | from typing import List, Optional, Type, TypeVar 9 | 10 | import asyncio 11 | import contextlib 12 | import multiprocessing 13 | import myhoard.util as myhoard_util 14 | import os 15 | import random 16 | import signal 17 | import socket 18 | import string 19 | import subprocess 20 | import threading 21 | import time 22 | 23 | 24 | class MySQLConfig: 25 | def __init__( 26 | self, 27 | *, 28 | base_dir: Optional[str] = None, 29 | config=None, 30 | config_name=None, 31 | config_options=None, 32 | connect_options=None, 33 | password: Optional[str] = None, 34 | port: Optional[int] = None, 35 | proc=None, 36 | server_id: Optional[int] = None, 37 | startup_command: Optional[List[str]] = None, 38 | user: Optional[str] = None, 39 | ): 40 | self.base_dir = base_dir 41 | self.config = config 42 | self.config_name = config_name 43 | self.config_options = config_options 44 | self.connect_options = connect_options 45 | self.password = password 46 | self.port = port 47 | self.proc = proc 48 | self.server_id = server_id 49 | self.startup_command = startup_command 50 | self.user = user 51 | 52 | 53 | T = TypeVar("T", bound="Controller") 54 | 55 | 56 | def build_controller( 57 | cls: Type[T], 58 | *, 59 | default_backup_site: BackupSiteInfo, 60 | mysql_config: MySQLConfig, 61 | session_tmpdir, 62 | state_dir: Optional[str] = None, 63 | temp_dir: Optional[str] = None, 64 | ) -> T: 65 | Controller.ITERATION_SLEEP = 0.1 66 | Controller.BACKUP_REFRESH_INTERVAL_BASE = 0.1 67 | Controller.BACKUP_REFRESH_ACTIVE_MULTIPLIER = 1 68 | BackupStream.ITERATION_SLEEP = 0.1 69 | BackupStream.REMOTE_POLL_INTERVAL = 0.1 70 | 71 | state_dir = state_dir or os.path.abspath(os.path.join(session_tmpdir().strpath, "myhoard_state")) 72 | os.makedirs(state_dir, exist_ok=True) 73 | temp_dir = temp_dir or os.path.abspath(os.path.join(session_tmpdir().strpath, "temp")) 74 | os.makedirs(temp_dir, exist_ok=True) 75 | 76 | controller = cls( 77 | backup_settings={ 78 | "backup_age_days_max": 14, 79 | "backup_count_max": 100, 80 | "backup_count_min": 14, 81 | "backup_hour": 3, 82 | "backup_interval_minutes": 1440, 83 | "backup_minute": 0, 84 | "forced_binlog_rotation_interval": 300, 85 | }, 86 | backup_sites={"default": default_backup_site}, 87 | binlog_purge_settings={ 88 | "enabled": True, 89 | "min_binlog_age_before_purge": 30, 90 | "purge_interval": 1, 91 | "purge_when_observe_no_streams": True, 92 | }, 93 | mysql_binlog_prefix=mysql_config.config_options.binlog_file_prefix, 94 | mysql_client_params=mysql_config.connect_options, 95 | mysql_config_file_name=mysql_config.config_name, 96 | mysql_data_directory=mysql_config.config_options.datadir, 97 | mysql_relay_log_index_file=mysql_config.config_options.relay_log_index_file, 98 | mysql_relay_log_prefix=mysql_config.config_options.relay_log_file_prefix, 99 | restart_mysqld_callback=lambda **kwargs: restart_mysql(mysql_config, **kwargs), 100 | restore_max_binlog_bytes=2 * 1024 * 1024, 101 | server_id=mysql_config.server_id, 102 | state_dir=state_dir, 103 | stats=build_statsd_client(), 104 | temp_dir=temp_dir, 105 | xtrabackup_settings=myhoard_util.DEFAULT_XTRABACKUP_SETTINGS, 106 | auto_mark_backups_broken=True, 107 | ) 108 | return controller 109 | 110 | 111 | def build_statsd_client(): 112 | return StatsClient(host=None, port=None, tags=None) 113 | 114 | 115 | class MySQLConfigOptions: 116 | def __init__( 117 | self, 118 | binlog_file_prefix, 119 | binlog_index_file, 120 | datadir, 121 | parallel_workers, 122 | pid_file, 123 | port, 124 | read_only, 125 | relay_log_file_prefix, 126 | relay_log_index_file, 127 | server_id, 128 | ): 129 | self.binlog_file_prefix = binlog_file_prefix 130 | self.binlog_index_file = binlog_index_file 131 | self.datadir = datadir 132 | self.parallel_workers = parallel_workers 133 | self.pid_file = pid_file 134 | self.port = port 135 | self.read_only = read_only 136 | self.relay_log_file_prefix = relay_log_file_prefix 137 | self.relay_log_index_file = relay_log_index_file 138 | self.server_id = server_id 139 | 140 | 141 | def get_mysql_config_options(*, config_path, name, server_id, test_base_dir) -> MySQLConfigOptions: 142 | os.makedirs(config_path) 143 | data_dir = os.path.join(test_base_dir, "data") 144 | os.makedirs(data_dir) 145 | binlog_dir = os.path.join(test_base_dir, "binlogs") 146 | os.makedirs(binlog_dir) 147 | relay_log_dir = os.path.join(test_base_dir, "relay_logs") 148 | os.makedirs(relay_log_dir) 149 | 150 | port = get_random_port() 151 | return MySQLConfigOptions( 152 | binlog_file_prefix=os.path.join(binlog_dir, "bin"), 153 | binlog_index_file=os.path.join(test_base_dir, "binlog.index"), 154 | datadir=data_dir, 155 | parallel_workers=multiprocessing.cpu_count(), 156 | pid_file=os.path.join(config_path, "mysql.pid"), 157 | port=port, 158 | read_only=name != "master", 159 | relay_log_file_prefix=os.path.join(relay_log_dir, "relay"), 160 | relay_log_index_file=os.path.join(test_base_dir, "relay_log.index"), 161 | server_id=server_id, 162 | ) 163 | 164 | 165 | def restart_mysql(mysql_config, *, with_binlog=True, with_gtids=True): 166 | if mysql_config.proc: 167 | proc = mysql_config.proc 168 | mysql_config.proc = None 169 | os.kill(proc.pid, signal.SIGKILL) 170 | proc.wait(timeout=20.0) 171 | print("Stopped mysqld with pid", proc.pid) 172 | command = mysql_config.startup_command 173 | if not with_binlog: 174 | command = command + ["--disable-log-bin", "--skip-slave-preserve-commit-order"] 175 | if not with_gtids: 176 | command = command + ["--gtid-mode=OFF"] 177 | mysql_config.proc = subprocess.Popen(command) # pylint: disable=consider-using-with 178 | print("Started mysqld with pid", mysql_config.proc.pid) 179 | wait_for_port(mysql_config.port, wait_time=60) 180 | 181 | 182 | def port_is_listening(hostname, port, ipv6): 183 | if ipv6: 184 | s = socket.socket(socket.AF_INET6, socket.SOCK_STREAM, 0) 185 | else: 186 | s = socket.socket() 187 | s.settimeout(0.5) 188 | try: 189 | s.connect((hostname, port)) 190 | return True 191 | except socket.error: 192 | return False 193 | 194 | 195 | def wait_for_port(port, *, hostname="127.0.0.1", wait_time=20.0, ipv6=False): 196 | start_time = time.monotonic() 197 | while True: 198 | if port_is_listening(hostname, port, ipv6): 199 | break 200 | elapsed = time.monotonic() - start_time 201 | if elapsed >= wait_time: 202 | raise Exception(f"Port {port} not listening after {wait_time} seconds") 203 | time.sleep(0.1) 204 | 205 | 206 | def get_random_port(*, start=3000, end=30000): 207 | while True: 208 | port = random.randint(start, end) 209 | if not port_is_listening("127.0.0.1", port, True) and not port_is_listening("127.0.0.1", port, False): 210 | return port 211 | 212 | 213 | def random_basic_string(length=16, *, prefix=None, digit_spacing=None): 214 | if prefix is None: 215 | prefix = random.choice(string.ascii_lowercase) 216 | random_length = length - len(prefix) 217 | if digit_spacing is None: 218 | chars = "".join(random.choice(string.ascii_lowercase + string.digits) for _ in range(random_length)) 219 | else: 220 | chars = "".join( 221 | random.choice(string.ascii_lowercase if (n % (digit_spacing + 1)) > 0 else string.digits) 222 | for n in range(random_length) 223 | ) 224 | return f"{prefix}{chars}" 225 | 226 | 227 | def generate_rsa_key_pair(*, bits=3072, public_exponent=65537): 228 | private = rsa.generate_private_key(public_exponent=public_exponent, key_size=bits, backend=default_backend()) 229 | public = private.public_key() 230 | 231 | private_pem = private.private_bytes( 232 | encoding=serialization.Encoding.PEM, 233 | format=serialization.PrivateFormat.PKCS8, 234 | encryption_algorithm=serialization.NoEncryption(), 235 | ) 236 | public_pem = public.public_bytes( 237 | encoding=serialization.Encoding.PEM, format=serialization.PublicFormat.SubjectPublicKeyInfo 238 | ) 239 | 240 | return private_pem, public_pem 241 | 242 | 243 | def wait_for_condition(condition, *, timeout=5.0, interval=0.1, description=""): 244 | start_time = time.monotonic() 245 | while True: 246 | if time.monotonic() - start_time >= timeout: 247 | raise Exception(f"Timeout of {timeout}s exceeded before condition was met: {description}") 248 | if condition(): 249 | break 250 | time.sleep(interval) 251 | 252 | 253 | def while_asserts(condition, *, timeout=5.0, interval=0.1): 254 | last_exception = AssertionError("for static checker") 255 | start_time = time.monotonic() 256 | while True: 257 | if time.monotonic() - start_time >= timeout: 258 | raise last_exception 259 | try: 260 | condition() 261 | break 262 | except AssertionError as ex: 263 | last_exception = ex 264 | time.sleep(interval) 265 | 266 | 267 | async def awhile_asserts(condition, *, timeout=5.0, interval=0.1): 268 | last_exception = AssertionError("for static checker") 269 | start_time = time.monotonic() 270 | while True: 271 | if time.monotonic() - start_time >= timeout: 272 | raise last_exception 273 | try: 274 | await condition() 275 | break 276 | except AssertionError as ex: 277 | last_exception = ex 278 | await asyncio.sleep(interval) 279 | 280 | 281 | class DataGenerator(threading.Thread): 282 | """Generates data into MySQL in busy loop. Used to validate that all data is correctly backed up""" 283 | 284 | def __init__(self, *, connect_info, index_offset=0, make_temp_tables=True): 285 | super().__init__() 286 | self.basic_wait = 0.1 287 | self.committed_row_count = 0 288 | self.connect_info = connect_info 289 | self.estimated_bytes = 0 290 | self.generate_data_event = threading.Event() 291 | self.generate_data_event.set() 292 | self.index_offset = index_offset 293 | self.is_running = True 294 | self.make_temp_tables = make_temp_tables 295 | self.paused = False 296 | self.pending_row_count = 0 297 | self.row_count = 0 298 | self.row_infos = [] 299 | self.temp_table_index = 0 300 | self.temp_tables = [] 301 | 302 | def run(self): 303 | with myhoard_util.mysql_cursor(**self.connect_info) as cursor1: 304 | with myhoard_util.mysql_cursor(**self.connect_info) as cursor2: 305 | cursor1.execute("CREATE DATABASE IF NOT EXISTS db1") 306 | cursor1.execute("CREATE TABLE IF NOT EXISTS db1.t1 (id INTEGER PRIMARY KEY, data TEXT)") 307 | while self.is_running: 308 | if not self.generate_data_event.wait(timeout=0.1): 309 | self.commit_pending(cursor1) 310 | self.paused = True 311 | continue 312 | 313 | self.paused = False 314 | self.direct_data_generate(cursor1) 315 | if self.make_temp_tables: 316 | self.indirect_data_generate(cursor2) 317 | time.sleep(self.basic_wait) 318 | 319 | self.commit_pending(cursor1) 320 | 321 | for table_name in self.temp_tables: 322 | print("Inserting rows from temp table", table_name) 323 | cursor2.execute(f"INSERT INTO db1.t1 (id, data) SELECT id, data FROM {table_name}") 324 | cursor2.execute(f"DROP TEMPORARY TABLE {table_name}") 325 | cursor2.execute("COMMIT") 326 | cursor1.execute("FLUSH BINARY LOGS") 327 | 328 | def stop(self): 329 | self.is_running = False 330 | with contextlib.suppress(Exception): 331 | self.join() 332 | 333 | def commit_pending(self, cursor): 334 | if not self.pending_row_count: 335 | return 336 | 337 | self.committed_row_count += self.pending_row_count 338 | self.pending_row_count = 0 339 | cursor.execute("COMMIT") 340 | 341 | def direct_data_generate(self, cursor): 342 | do_commit = random.random() < self.basic_wait * 3 343 | do_flush = random.random() < self.basic_wait * 2 344 | self.pending_row_count += self.generate_rows(cursor, "db1.t1") 345 | if do_commit: 346 | self.commit_pending(cursor=cursor) 347 | if do_flush: 348 | self.committed_row_count += self.pending_row_count 349 | self.pending_row_count = 0 350 | cursor.execute("FLUSH BINARY LOGS") 351 | 352 | def indirect_data_generate(self, cursor): 353 | table_name = f"db1.temp_t{self.temp_table_index}" 354 | print("Creating temp table", table_name, "start identifier", self.row_count + self.index_offset + 1) 355 | self.temp_table_index += 1 356 | cursor.execute(f"CREATE TEMPORARY TABLE {table_name} (id INTEGER, data TEXT)") 357 | self.temp_tables.append(table_name) 358 | self.generate_rows(cursor, table_name) 359 | 360 | drop_table = random.random() < 0.9 361 | if drop_table and len(self.temp_tables) > 20: 362 | index = random.randrange(0, len(self.temp_tables)) 363 | table_name = self.temp_tables[index] 364 | self.temp_tables.pop(index) 365 | print("Inserting rows from temp table", table_name) 366 | cursor.execute(f"INSERT INTO db1.t1 (id, data) SELECT id, data FROM {table_name}") 367 | cursor.execute(f"DROP TEMPORARY TABLE {table_name}") 368 | cursor.execute("COMMIT") 369 | cursor.execute("SELECT @@GLOBAL.gtid_executed AS gtid_executed") 370 | gtid_executed = cursor.fetchone()["gtid_executed"] 371 | print("GTID executed after", table_name, "insert:", gtid_executed) 372 | 373 | def generate_rows(self, cursor, table): 374 | row_count = random.randrange(50, 200) 375 | for _ in range(row_count): 376 | character = random.choice("abcdefghijklmnopqrstuvwxyz") 377 | character_count = random.randrange(10, 10000) 378 | self.row_infos.append((character, character_count)) 379 | self.row_count += 1 380 | self.estimated_bytes += character_count + 10 381 | data = character * character_count 382 | cursor.execute(f"INSERT INTO {table} (id, data) VALUES (%s, %s)", (self.row_count + self.index_offset, data)) 383 | return row_count 384 | -------------------------------------------------------------------------------- /test/binlog: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aiven-Open/myhoard/95d70a8c2ce1cb2b65311dabb212d435d70c7638/test/binlog -------------------------------------------------------------------------------- /test/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from . import ( 3 | build_controller, 4 | build_statsd_client, 5 | generate_rsa_key_pair, 6 | get_mysql_config_options, 7 | get_random_port, 8 | MySQLConfig, 9 | random_basic_string, 10 | ) 11 | from myhoard.controller import BackupSiteInfo, Controller 12 | from myhoard.util import atomic_create_file, change_master_to, DEFAULT_XTRABACKUP_SETTINGS, mysql_cursor, wait_for_port 13 | from myhoard.web_server import WebServer 14 | from py.path import local as LocalPath 15 | from typing import Callable, Iterator, Optional 16 | 17 | import contextlib 18 | import logging 19 | import os 20 | import pytest 21 | import shutil 22 | import signal 23 | import subprocess 24 | import sys 25 | 26 | pytest_plugins = "aiohttp.pytest_plugin" 27 | 28 | # Force logging to be configured verbose, so we can debug tests easily. 29 | _log_level_str = os.getenv("MYHOARD_TEST_LOG_LEVEL", "WARNING") 30 | _test_log_level = logging._nameToLevel[_log_level_str] # pylint: disable=protected-access 31 | _test_mysqld_log_level = int(os.getenv("MYHOARD_TEST_MYSQLD_LOG_LEVEL", "0")) 32 | 33 | root = logging.getLogger() 34 | root.setLevel(_test_log_level) 35 | 36 | handler = logging.StreamHandler(sys.stderr) 37 | handler.setLevel(_test_log_level) 38 | formatter = logging.Formatter("%(asctime)s:%(name)s:%(levelname)s:%(pathname)s:%(lineno)d:%(message)s") 39 | handler.setFormatter(formatter) 40 | root.addHandler(handler) 41 | 42 | 43 | @pytest.fixture(scope="session", name="session_tmpdir") 44 | def fixture_session_tmpdir(tmpdir_factory: pytest.TempdirFactory) -> Iterator[Callable[[], LocalPath]]: 45 | """Create a temporary directory object that's usable in the session scope. The returned value is a 46 | function which creates a new temporary directory which will be automatically cleaned up upon exit.""" 47 | tmpdir_obj = tmpdir_factory.mktemp("myhoard.session.tmpdr.") 48 | 49 | def subdir() -> LocalPath: 50 | return tmpdir_obj.mkdtemp(rootdir=tmpdir_obj) 51 | 52 | try: 53 | yield subdir 54 | finally: 55 | with contextlib.suppress(Exception): 56 | tmpdir_obj.remove(rec=1) 57 | 58 | 59 | @pytest.fixture(scope="function", name="mysql_master") 60 | def fixture_mysql_master(session_tmpdir): 61 | with mysql_setup_teardown(session_tmpdir, name="master", server_id=1) as config: 62 | yield config 63 | 64 | 65 | @pytest.fixture(scope="function", name="mysql_standby1") 66 | def fixture_mysql_standby1(session_tmpdir, mysql_master): 67 | with mysql_setup_teardown(session_tmpdir, master=mysql_master, name="standby1", server_id=2) as config: 68 | yield config 69 | 70 | 71 | @pytest.fixture(scope="function", name="mysql_standby2") 72 | def fixture_mysql_standby2(session_tmpdir, mysql_master): 73 | with mysql_setup_teardown(session_tmpdir, master=mysql_master, name="standby2", server_id=3) as config: 74 | yield config 75 | 76 | 77 | @pytest.fixture(scope="function", name="mysql_empty") 78 | def fixture_mysql_empty(session_tmpdir): 79 | with mysql_setup_teardown(session_tmpdir, name="empty", server_id=4, empty=True) as config: 80 | yield config 81 | 82 | 83 | @contextlib.contextmanager 84 | def mysql_setup_teardown(session_tmpdir, *, empty=False, master: Optional[MySQLConfig] = None, name, server_id): 85 | config = mysql_initialize_and_start(session_tmpdir, empty=empty, master=master, name=name, server_id=server_id) 86 | try: 87 | yield config 88 | finally: 89 | if config.proc: 90 | os.kill(config.proc.pid, signal.SIGKILL) 91 | config.proc.wait(timeout=10.0) 92 | 93 | 94 | def mysql_initialize_and_start( 95 | session_tmpdir, *, empty=False, master: Optional[MySQLConfig] = None, name, server_id 96 | ) -> MySQLConfig: 97 | mysql_basedir = os.environ.get("MYHOARD_MYSQL_BASEDIR") 98 | if mysql_basedir is None and os.path.exists("/opt/mysql"): 99 | mysql_basedir = "/opt/mysql" 100 | 101 | mysqld_bin = shutil.which("mysqld") 102 | assert mysqld_bin, f"mysqld binary not found in PATH: {os.environ['PATH']}" 103 | xtrabackup_bin = shutil.which("xtrabackup") 104 | assert xtrabackup_bin, f"xtrabackup binary not found in PATH: {os.environ['PATH']}" 105 | 106 | test_base_dir = os.path.abspath(os.path.join(session_tmpdir().strpath, name)) 107 | config_path = os.path.join(test_base_dir, "etc") 108 | config_options = get_mysql_config_options( 109 | config_path=config_path, name=name, server_id=server_id, test_base_dir=test_base_dir 110 | ) 111 | 112 | config = f""" 113 | [mysqld] 114 | binlog-transaction-dependency-tracking=WRITESET 115 | binlog-format=ROW 116 | datadir={config_options.datadir} 117 | enforce-gtid-consistency=ON 118 | gtid-mode=ON 119 | log-bin={config_options.binlog_file_prefix} 120 | log-bin-index={config_options.binlog_index_file} 121 | log_error_verbosity = {_test_mysqld_log_level} 122 | mysqlx=OFF 123 | pid-file={config_options.pid_file} 124 | port={config_options.port} 125 | read-only={config_options.read_only} 126 | relay-log={config_options.relay_log_file_prefix} 127 | relay-log-index={config_options.relay_log_index_file} 128 | server-id={server_id} 129 | skip-name-resolve=ON 130 | skip-slave-start=ON 131 | slave-parallel-type=LOGICAL_CLOCK 132 | slave-parallel-workers={config_options.parallel_workers} 133 | slave-preserve-commit-order=ON 134 | socket={config_options.datadir}/mysql.sock 135 | sql-mode=ANSI,STRICT_ALL_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION 136 | transaction-write-set-extraction=XXHASH64 137 | 138 | [validate_password] 139 | policy=LOW 140 | """ 141 | 142 | config_file = os.path.join(config_path, "my.cnf") 143 | with atomic_create_file(config_file, perm=0o644) as f: 144 | f.write(config) 145 | 146 | password = random_basic_string() 147 | init_file = os.path.join(config_path, "init_file.sql") 148 | 149 | init_config = f""" 150 | DROP USER IF EXISTS 'root'@'localhost'; 151 | CREATE USER IF NOT EXISTS 'root'@'127.0.0.1' IDENTIFIED BY '{password}'; 152 | GRANT ALL PRIVILEGES ON *.* TO 'root'@'127.0.0.1' WITH GRANT OPTION; 153 | FLUSH PRIVILEGES; 154 | """ 155 | 156 | with atomic_create_file(init_file, perm=0o644) as f: 157 | f.write(init_config) 158 | 159 | if not empty: 160 | # Since data directory is empty to begin with we first need to start mysqld with --initialize switch, 161 | # which creates necessary files and exists once done. We don't want any binlog or GTIDs to be generated 162 | # for these operations so run without binlog and GTID 163 | cmd = [ 164 | mysqld_bin, 165 | f"--defaults-file={config_file}", 166 | ] 167 | if mysql_basedir: 168 | cmd.append(f"--basedir={mysql_basedir}") 169 | cmd.extend( 170 | [ 171 | "--initialize", 172 | "--disable-log-bin", 173 | "--gtid-mode=OFF", 174 | "--skip-slave-preserve-commit-order", 175 | "--init-file", 176 | init_file, 177 | ] 178 | ) 179 | subprocess.run(cmd, check=True, timeout=30) 180 | 181 | connect_options = { 182 | "host": "127.0.0.1", 183 | "password": password, 184 | "port": config_options.port, 185 | "require_ssl": False, 186 | "timeout": 10, 187 | "user": "root", 188 | } 189 | 190 | cmd = [mysqld_bin, f"--defaults-file={config_file}"] 191 | if mysql_basedir: 192 | cmd.append(f"--basedir={mysql_basedir}") 193 | if empty: 194 | # Empty server is used for restoring data. Wipe data directory and don't start the server 195 | shutil.rmtree(config_options.datadir) 196 | proc = None 197 | else: 198 | proc = subprocess.Popen(cmd) # pylint: disable=consider-using-with 199 | wait_for_port(host="127.0.0.1", port=config_options.port, timeout=30.0) 200 | # Ensure connecting to the newly started server works and if this is standby also start replication 201 | with mysql_cursor(**connect_options) as cursor: 202 | if master: 203 | change_master_to( 204 | cursor=cursor, 205 | options={ 206 | "MASTER_AUTO_POSITION": 1, 207 | "MASTER_CONNECT_RETRY": 0.1, 208 | "MASTER_HOST": "127.0.0.1", 209 | "MASTER_PORT": master.port, 210 | "MASTER_PASSWORD": master.password, 211 | "MASTER_SSL": 0, 212 | "MASTER_USER": master.user, 213 | }, 214 | ) 215 | cursor.execute("START SLAVE IO_THREAD, SQL_THREAD") 216 | else: 217 | cursor.execute("SELECT 1") 218 | 219 | return MySQLConfig( 220 | base_dir=test_base_dir, 221 | config=config, 222 | config_name=config_file, 223 | config_options=config_options, 224 | connect_options=connect_options, 225 | password=password, 226 | port=config_options.port, 227 | proc=proc, 228 | server_id=server_id, 229 | startup_command=cmd, 230 | user="root", 231 | ) 232 | 233 | 234 | @pytest.fixture(scope="function", name="encryption_keys") 235 | def fixture_encryption_keys(): 236 | private_key, public_key = generate_rsa_key_pair() 237 | yield { 238 | "private": private_key.decode("ascii"), 239 | "public": public_key.decode("ascii"), 240 | } 241 | 242 | 243 | @pytest.fixture(scope="function", name="default_backup_site") 244 | def fixture_default_backup_site(session_tmpdir, encryption_keys): 245 | backup_dir = os.path.abspath(os.path.join(session_tmpdir().strpath, "backups")) 246 | os.makedirs(backup_dir) 247 | backup_site = { 248 | "compression": { 249 | "algorithm": "snappy", 250 | }, 251 | "encryption_keys": encryption_keys, 252 | "object_storage": { 253 | "directory": backup_dir, 254 | "storage_type": "local", 255 | }, 256 | "recovery_only": False, 257 | } 258 | yield backup_site 259 | 260 | 261 | @pytest.fixture(scope="function", name="master_controller") 262 | def fixture_master_controller( 263 | session_tmpdir, mysql_master: MySQLConfig, default_backup_site: BackupSiteInfo 264 | ) -> Iterator[tuple[Controller, MySQLConfig]]: 265 | controller = build_controller( 266 | Controller, 267 | default_backup_site=default_backup_site, 268 | mysql_config=mysql_master, 269 | session_tmpdir=session_tmpdir, 270 | ) 271 | try: 272 | yield controller, mysql_master 273 | finally: 274 | controller.stop() 275 | 276 | 277 | @pytest.fixture(scope="function", name="standby1_controller") 278 | def fixture_standby1_controller(session_tmpdir, mysql_standby1, default_backup_site): 279 | controller = build_controller( 280 | Controller, 281 | default_backup_site=default_backup_site, 282 | mysql_config=mysql_standby1, 283 | session_tmpdir=session_tmpdir, 284 | ) 285 | try: 286 | yield controller, mysql_standby1 287 | finally: 288 | controller.stop() 289 | 290 | 291 | @pytest.fixture(scope="function", name="standby2_controller") 292 | def fixture_standby2_controller(session_tmpdir, mysql_standby2, default_backup_site): 293 | controller = build_controller( 294 | Controller, 295 | default_backup_site=default_backup_site, 296 | mysql_config=mysql_standby2, 297 | session_tmpdir=session_tmpdir, 298 | ) 299 | try: 300 | yield controller, mysql_standby2 301 | finally: 302 | controller.stop() 303 | 304 | 305 | @pytest.fixture(scope="function", name="empty_controller") 306 | def fixture_empty_controller( 307 | session_tmpdir, mysql_empty: MySQLConfig, default_backup_site: BackupSiteInfo 308 | ) -> Iterator[tuple[Controller, MySQLConfig]]: 309 | controller = build_controller( 310 | Controller, 311 | default_backup_site=default_backup_site, 312 | mysql_config=mysql_empty, 313 | session_tmpdir=session_tmpdir, 314 | ) 315 | try: 316 | yield controller, mysql_empty 317 | finally: 318 | controller.stop() 319 | 320 | 321 | @pytest.fixture(scope="function", name="myhoard_config") 322 | def fixture_myhoard_config(default_backup_site, mysql_master, session_tmpdir): 323 | state_dir = os.path.abspath(os.path.join(session_tmpdir().strpath, "myhoard_state")) 324 | os.makedirs(state_dir) 325 | temp_dir = os.path.abspath(os.path.join(session_tmpdir().strpath, "temp")) 326 | os.makedirs(temp_dir) 327 | return { 328 | "backup_settings": { 329 | "backup_age_days_max": 14, 330 | "backup_count_max": 100, 331 | "backup_count_min": 14, 332 | "backup_hour": 3, 333 | "backup_interval_minutes": 1440, 334 | "backup_minute": 0, 335 | "forced_binlog_rotation_interval": 300, 336 | "upload_site": "default", 337 | "incremental": {"enabled": False, "full_backup_week_schedule": "sun,wed"}, 338 | }, 339 | "backup_sites": { 340 | "default": default_backup_site, 341 | }, 342 | "binlog_purge_settings": { 343 | "enabled": True, 344 | "min_binlog_age_before_purge": 600, 345 | "purge_interval": 60, 346 | "purge_when_observe_no_streams": True, 347 | }, 348 | "http_address": "127.0.0.1", 349 | "http_port": get_random_port(start=3000, end=30000), 350 | "mysql": { 351 | "binlog_prefix": mysql_master.config_options.binlog_file_prefix, 352 | "client_params": { 353 | "host": "127.0.0.1", 354 | "password": "f@keP@ssw0rd", 355 | "port": 3306, 356 | "require_ssl": False, 357 | "user": "root", 358 | }, 359 | "config_file_name": mysql_master.config_name, 360 | "data_directory": mysql_master.config_options.datadir, 361 | "relay_log_index_file": mysql_master.config_options.relay_log_index_file, 362 | "relay_log_prefix": mysql_master.config_options.relay_log_file_prefix, 363 | }, 364 | "restore_auto_mark_backups_broken": True, 365 | "restore_free_memory_percentage": 50, 366 | "restore_max_binlog_bytes": 4294967296, 367 | "sentry_dsn": None, 368 | "server_id": mysql_master.server_id, 369 | "start_command": mysql_master.startup_command, 370 | "state_directory": state_dir, 371 | "statsd": { 372 | "host": None, 373 | "port": None, 374 | "tags": { 375 | "app": "myhoard", 376 | }, 377 | }, 378 | "systemctl_command": ["sudo", "/usr/bin/systemctl"], 379 | "systemd_env_update_command": [ 380 | "sudo", 381 | "/usr/bin/myhoard_mysql_env_update", 382 | "-f", 383 | "/etc/systemd/system/mysqld.environment", 384 | ], 385 | "systemd_service": None, 386 | "temporary_directory": temp_dir, 387 | "xtrabackup": DEFAULT_XTRABACKUP_SETTINGS, 388 | } 389 | 390 | 391 | @pytest.fixture(scope="function", name="web_client") 392 | async def fixture_web_client(master_controller, aiohttp_client): 393 | server = WebServer( 394 | controller=master_controller[0], 395 | http_address="::1", 396 | http_port=-1, 397 | stats=build_statsd_client(), 398 | ) 399 | client = await aiohttp_client(server.app) 400 | yield client 401 | -------------------------------------------------------------------------------- /test/helpers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Aiven, Helsinki, Finland. https://aiven.io/ 2 | -------------------------------------------------------------------------------- /test/helpers/databases.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from myhoard.util import mysql_cursor 3 | from test import MySQLConfig 4 | from test.helpers.loggers import get_logger_name 5 | from typing import cast, Final, TypedDict 6 | 7 | import logging 8 | import time 9 | 10 | 11 | class SizeDict(TypedDict): 12 | size: int 13 | 14 | 15 | def populate_table(mysql_config: MySQLConfig, table_name: str, batches: int = 1) -> None: 16 | """Populate database with a lot of data, using a single transaction. 17 | 18 | Args: 19 | mysql_config: Configuration for connecting to MySQL. 20 | table_name: Name of the table to populate (will be created if it does not exist). 21 | batches: Number of batches to use. Each batch is 64 MB. 22 | """ 23 | logger = logging.getLogger(get_logger_name()) 24 | 25 | ONE_MB: Final[int] = 2**20 26 | MB_PER_BATCH: Final[int] = 64 27 | 28 | # Use a higher timeout, +1 minute per 4 batches 29 | options = mysql_config.connect_options 30 | if batches > 3: 31 | orig_timeout = mysql_config.connect_options["timeout"] 32 | options = mysql_config.connect_options | {"timeout": orig_timeout + batches // 4 * 60} 33 | 34 | # Use a higher timeout 35 | with mysql_cursor(**options) as cursor: 36 | t0 = time.monotonic_ns() 37 | 38 | cursor.execute(f"CREATE TABLE IF NOT EXISTS {table_name} (id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b LONGBLOB);") 39 | cursor.execute("DROP PROCEDURE IF EXISTS generate_data;") 40 | cursor.execute( 41 | f""" 42 | CREATE PROCEDURE generate_data() 43 | BEGIN 44 | DECLARE i INT DEFAULT 0; 45 | WHILE i < {batches} DO 46 | INSERT INTO {table_name} (b) VALUES (REPEAT('x', {MB_PER_BATCH * ONE_MB})); 47 | SET i = i + 1; 48 | END WHILE; 49 | END 50 | """ 51 | ) 52 | cursor.execute("CALL generate_data();") 53 | cursor.execute("COMMIT") 54 | cursor.execute("FLUSH BINARY LOGS") 55 | 56 | t1 = time.monotonic_ns() 57 | 58 | logger.info( 59 | "Populating table %s with %i MB took %f sec.", table_name, (batches * MB_PER_BATCH), (t1 - t0) / 1_000_000_000 60 | ) 61 | 62 | 63 | def get_table_size(mysql_config: MySQLConfig, table_name: str) -> int: 64 | """Get size of table (data + index) in bytes.""" 65 | with mysql_cursor(**mysql_config.connect_options) as cursor: 66 | cursor.execute( 67 | f""" 68 | SELECT TABLE_NAME AS `Table`, 69 | (DATA_LENGTH + INDEX_LENGTH) AS `size` 70 | FROM information_schema.TABLES 71 | WHERE TABLE_NAME = '{table_name}'; 72 | """ 73 | ) 74 | 75 | return cast(SizeDict, cursor.fetchone())["size"] 76 | -------------------------------------------------------------------------------- /test/helpers/filesystem.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from contextlib import contextmanager 3 | from pathlib import Path 4 | from test import random_basic_string 5 | from typing import Iterator 6 | 7 | import subprocess 8 | 9 | 10 | @contextmanager 11 | def mount_tmpfs(path: Path, *, megabytes: int) -> Iterator[Path]: 12 | """Mount a tmpfs filesystem at the given path and unmount it when done. 13 | 14 | Args: 15 | path: The path to mount the tmpfs filesystem at (will create a subdirectory there). 16 | megabytes: The size of the tmpfs filesystem in megabytes. 17 | 18 | Yields: 19 | The path the tmpfs filesystem was mounted at. 20 | """ 21 | sub_dir = path / random_basic_string(20, prefix="small_disk_") 22 | try: 23 | sub_dir.mkdir(parents=True, exist_ok=True) 24 | subprocess.check_call(["sudo", "mount", "-t", "tmpfs", "-o", f"size={megabytes}m", "tmpfs", str(sub_dir)]) 25 | 26 | yield sub_dir 27 | finally: 28 | subprocess.check_call(["sudo", "umount", str(sub_dir)]) 29 | -------------------------------------------------------------------------------- /test/helpers/fixtures.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from contextlib import contextmanager 3 | from myhoard.controller import BackupSiteInfo, Controller 4 | from pathlib import Path 5 | from py.path import local as LocalPath 6 | from test import build_controller, MySQLConfig 7 | from test.helpers.filesystem import mount_tmpfs 8 | from typing import Callable, Iterator 9 | 10 | 11 | @contextmanager 12 | def create_controller_in_small_disk( 13 | *, session_tmpdir: Callable[[], LocalPath], mysql_config: MySQLConfig, default_backup_site: BackupSiteInfo 14 | ) -> Iterator[tuple[Controller, MySQLConfig]]: 15 | sub_dir = Path(session_tmpdir().strpath) / "small_disk" 16 | with mount_tmpfs(path=sub_dir, megabytes=8) as tmpfs_dir: 17 | controller = build_controller( 18 | Controller, 19 | default_backup_site=default_backup_site, 20 | mysql_config=mysql_config, 21 | session_tmpdir=lambda: LocalPath(tmpfs_dir), 22 | ) 23 | 24 | try: 25 | yield controller, mysql_config 26 | 27 | finally: 28 | controller.stop() 29 | -------------------------------------------------------------------------------- /test/helpers/flow_testers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from __future__ import annotations 3 | 4 | from functools import partial 5 | from myhoard.backup_stream import BackupStream 6 | from myhoard.controller import Controller 7 | from myhoard.restore_coordinator import RestoreCoordinator 8 | from test import wait_for_condition, while_asserts 9 | from test.helpers.loggers import get_logger_name, log_duration 10 | 11 | import logging 12 | 13 | 14 | class ControllerFlowTester: 15 | """Helper class to test the flow of the Controller for a backup or restore.""" 16 | 17 | def __init__(self, controller: Controller, global_timeout: int = 10) -> None: 18 | self.controller = controller 19 | self.timeout = global_timeout 20 | self.logger = logging.getLogger(get_logger_name()) 21 | 22 | @log_duration 23 | def wait_for_streaming_binlogs(self, *, timeout: int | None = None) -> None: 24 | timeout = self.timeout if timeout is None else timeout 25 | while_asserts(self._streaming_binlogs, timeout=timeout) 26 | 27 | @log_duration 28 | def wait_for_multiple_streams(self, *, timeout: int | None = None) -> None: 29 | timeout = self.timeout if timeout is None else timeout 30 | while_asserts(self._has_multiple_streams, timeout=timeout) 31 | 32 | @log_duration 33 | def wait_for_single_stream(self, *, timeout: int | None = None) -> None: 34 | timeout = self.timeout if timeout is None else timeout 35 | while_asserts(self._has_single_stream, timeout=timeout) 36 | 37 | @log_duration 38 | def wait_for_restore_phase(self, phase: RestoreCoordinator.Phase, *, timeout: int | None = None) -> None: 39 | timeout = self.timeout if timeout is None else timeout 40 | wait_for_condition(partial(self._restore_phase, phase=phase), timeout=timeout, description=f"restore {phase}") 41 | 42 | @log_duration 43 | def wait_for_fetched_backup(self, *, timeout: int | None = None) -> None: 44 | timeout = self.timeout if timeout is None else timeout 45 | wait_for_condition(self._has_fetched_backup, timeout=timeout, description="fetched backup") 46 | 47 | def _streaming_binlogs(self) -> None: 48 | assert self.controller.backup_streams 49 | assert all(bs.active_phase == BackupStream.ActivePhase.binlog for bs in self.controller.backup_streams), [ 50 | (s.name, s.active_phase) for s in self.controller.backup_streams 51 | ] 52 | 53 | def _has_multiple_streams(self) -> None: 54 | assert len(self.controller.backup_streams) > 1 55 | 56 | def _has_single_stream(self) -> None: 57 | assert len(self.controller.backup_streams) == 1 58 | 59 | def _restore_phase(self, phase: RestoreCoordinator.Phase) -> bool: 60 | return self.controller.restore_coordinator is not None and self.controller.restore_coordinator.phase is phase 61 | 62 | def _has_fetched_backup(self) -> bool: 63 | return self.controller.state["backups_fetched_at"] != 0 64 | -------------------------------------------------------------------------------- /test/helpers/loggers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from functools import wraps 3 | from pathlib import Path 4 | from typing import Any, Callable 5 | 6 | import logging 7 | import os 8 | import time 9 | 10 | 11 | def get_logger_name() -> str: 12 | """Get the name of the logger for the current test. 13 | 14 | Environment variable PYTEST_CURRENT_TEST is set by pytest and contains something like 15 | ``"test/test_myfile.py::test_something (call)"``. 16 | 17 | With the example above, this function will return ``"test.test_myfile.test_something"``. 18 | 19 | If the environment variable is not set, it will return something like ``"test.test_myfile"``. 20 | """ 21 | current_test = os.environ.get("PYTEST_CURRENT_TEST") 22 | if current_test is None: 23 | return f"test.{Path(__file__).stem}" 24 | 25 | path, _, name = current_test.split(":") 26 | name = name.split()[0] 27 | path = path.removesuffix(".py").replace("/", ".").replace("\\", ".") 28 | return f"{path}.{name}" 29 | 30 | 31 | def log_duration(function: Callable) -> Callable: 32 | """Decorator to log the duration of a function call.""" 33 | description = function.__name__.replace("_", " ").capitalize() 34 | 35 | @wraps(function) 36 | def wrapper(*args, **kwargs) -> Any: 37 | logger = logging.getLogger(get_logger_name()) 38 | 39 | t0 = time.monotonic_ns() 40 | result = function(*args, **kwargs) 41 | t1 = time.monotonic_ns() 42 | 43 | logger.info("%s took %.5f sec.", description, (t1 - t0) / 1_000_000_000) 44 | 45 | return result 46 | 47 | return wrapper 48 | -------------------------------------------------------------------------------- /test/helpers/version.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from myhoard.util import BinVersion 3 | 4 | 5 | def xtrabackup_version_to_string(version: BinVersion) -> str: 6 | v = list(version) 7 | assert 3 <= len(v) <= 4, f"Unexpected format of tool version: {v}" 8 | version_str = ".".join(str(v) for v in v[:3]) 9 | if len(v) > 3: 10 | version_str += f"-{v[3]}" 11 | print(version_str) 12 | return version_str 13 | -------------------------------------------------------------------------------- /test/local/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Aiven, Helsinki, Finland. https://aiven.io/ 2 | -------------------------------------------------------------------------------- /test/local/test_controller.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from __future__ import annotations 3 | 4 | from _pytest.logging import LogCaptureFixture 5 | from myhoard.backup_stream import BackupStream 6 | from myhoard.controller import BackupSiteInfo, Controller, sort_completed_backups 7 | from myhoard.restore_coordinator import RestoreCoordinator 8 | from py.path import local as LocalPath 9 | from test import MySQLConfig 10 | from test.helpers.databases import get_table_size, populate_table 11 | from test.helpers.fixtures import create_controller_in_small_disk 12 | from test.helpers.flow_testers import ControllerFlowTester 13 | from typing import Callable, Iterator 14 | 15 | import pytest 16 | 17 | 18 | @pytest.fixture(scope="function", name="empty_controller_in_small_disk") 19 | def fixture_empty_controller_in_small_disk( 20 | session_tmpdir: Callable[[], LocalPath], mysql_empty: MySQLConfig, default_backup_site: BackupSiteInfo 21 | ) -> Iterator[tuple[Controller, MySQLConfig]]: 22 | with create_controller_in_small_disk( 23 | session_tmpdir=session_tmpdir, mysql_config=mysql_empty, default_backup_site=default_backup_site 24 | ) as controller_and_mysql_config: 25 | yield controller_and_mysql_config 26 | 27 | 28 | def test_backup_and_restore( 29 | master_controller: tuple[Controller, MySQLConfig], 30 | empty_controller: tuple[Controller, MySQLConfig], 31 | ) -> None: 32 | """Test a successful backup and restore.""" 33 | empty_controller[1].connect_options["password"] = master_controller[1].connect_options["password"] 34 | populate_table(master_controller[1], "test") 35 | 36 | backup_streams = do_backup(controller=master_controller[0]) 37 | do_restore(target_controller=empty_controller[0], backup_streams=backup_streams) 38 | 39 | orig_size = get_table_size(master_controller[1], "test") 40 | restored_size = get_table_size(empty_controller[1], "test") 41 | 42 | assert orig_size == restored_size 43 | 44 | 45 | def test_backup_and_restore_fail_on_disk_full( 46 | master_controller: tuple[Controller, MySQLConfig], 47 | empty_controller_in_small_disk: tuple[Controller, MySQLConfig], 48 | caplog: LogCaptureFixture, 49 | ) -> None: 50 | """Test a backup and restore that fails restoring because the disk is full.""" 51 | empty_controller_in_small_disk[1].connect_options["password"] = master_controller[1].connect_options["password"] 52 | populate_table(master_controller[1], "test") 53 | 54 | backup_streams = do_backup(controller=master_controller[0]) 55 | do_restore( 56 | target_controller=empty_controller_in_small_disk[0], 57 | backup_streams=backup_streams, 58 | caplog=caplog, 59 | fail_because_disk_full=True, 60 | ) 61 | 62 | 63 | def do_backup(controller: Controller) -> list[BackupStream]: 64 | """Trigger a backup and wait for it to finish.""" 65 | flow_tester = ControllerFlowTester(controller) 66 | 67 | controller.switch_to_active_mode() 68 | controller.start() 69 | 70 | flow_tester.wait_for_streaming_binlogs() 71 | 72 | # Stream backup. 73 | controller.mark_backup_requested(backup_reason=BackupStream.BackupReason.requested) 74 | 75 | flow_tester.wait_for_multiple_streams() 76 | flow_tester.wait_for_streaming_binlogs() 77 | flow_tester.wait_for_single_stream() 78 | 79 | return controller.backup_streams 80 | 81 | 82 | def do_restore( 83 | target_controller: Controller, 84 | backup_streams: list[BackupStream], 85 | caplog: LogCaptureFixture | None = None, 86 | fail_because_disk_full: bool = False, 87 | ) -> None: 88 | """Trigger a restore and wait for it to finish.""" 89 | bs = backup_streams[0] 90 | 91 | # Restore backup into an empty database. 92 | flow_tester = ControllerFlowTester(target_controller) 93 | target_controller.start() 94 | 95 | try: 96 | flow_tester.wait_for_fetched_backup(timeout=2) 97 | 98 | target_controller.restore_backup(site=bs.site, stream_id=bs.stream_id) 99 | 100 | if fail_because_disk_full: 101 | flow_tester.wait_for_restore_phase(RestoreCoordinator.Phase.failed) 102 | 103 | # check if it failed due to full disk 104 | assert caplog is not None, "caplog is required for checking full disk message." 105 | assert any( 106 | "DiskFullError('No space left on device. Cannot complete xbstream-extract!')" in record.message 107 | for record in caplog.records 108 | ) 109 | 110 | # Check that we have backups, but none of them are broken. 111 | current_backups = sort_completed_backups(target_controller.state["backups"]) 112 | assert current_backups 113 | assert all(b["broken_at"] is None for b in current_backups) 114 | 115 | else: 116 | flow_tester.wait_for_restore_phase(RestoreCoordinator.Phase.completed) 117 | finally: 118 | target_controller.stop() 119 | -------------------------------------------------------------------------------- /test/test_append_only_state_manager.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from myhoard.append_only_state_manager import AppendOnlyStateManager 3 | from typing import Dict, List 4 | 5 | import os 6 | import pytest 7 | 8 | pytestmark = [pytest.mark.unittest, pytest.mark.all] 9 | 10 | 11 | def test_basic_operations(session_tmpdir): 12 | state_file_name = os.path.join(session_tmpdir().strpath, "aosm.txt") 13 | entries: List[Dict] = [] 14 | aosm = AppendOnlyStateManager(entries=entries, state_file=state_file_name) 15 | assert aosm.entries == [] 16 | file_size = os.stat(state_file_name).st_size 17 | assert file_size == 0 18 | 19 | aosm.append({"foo": "bar0"}) 20 | assert entries == [{"foo": "bar0"}] 21 | for index in range(1010): 22 | new_file_size = os.stat(state_file_name).st_size 23 | assert new_file_size > file_size 24 | file_size = new_file_size 25 | aosm.append({"foo": f"bar{index + 1}"}) 26 | assert len(entries) == index + 2 27 | 28 | for index in range(1000): 29 | new_file_size = os.stat(state_file_name).st_size 30 | # File size keeps on growing even though data is being deleted because only deletion markers are being written 31 | assert new_file_size > file_size 32 | file_size = new_file_size 33 | aosm.remove_head() 34 | assert len(entries) == 1010 - index 35 | 36 | new_file_size = os.stat(state_file_name).st_size 37 | assert new_file_size > file_size 38 | file_size = new_file_size 39 | 40 | entries2: List[Dict] = [] 41 | AppendOnlyStateManager(entries=entries2, state_file=state_file_name) 42 | 43 | assert set(entry["foo"] for entry in entries) == {f"bar{index}" for index in range(1000, 1011)} 44 | assert set(entry["foo"] for entry in entries2) == {f"bar{index}" for index in range(1000, 1011)} 45 | 46 | # This deletion takes us over the maximum number of deleted entries to keep around and file is rewritten 47 | aosm.remove_head() 48 | assert set(entry["foo"] for entry in entries) == {f"bar{index}" for index in range(1001, 1011)} 49 | new_file_size = os.stat(state_file_name).st_size 50 | assert new_file_size < file_size 51 | file_size = new_file_size 52 | 53 | entries2 = [] 54 | AppendOnlyStateManager(entries=entries2, state_file=state_file_name) 55 | assert set(entry["foo"] for entry in entries2) == {f"bar{index}" for index in range(1001, 1011)} 56 | 57 | aosm.remove_head() 58 | assert set(entry["foo"] for entry in entries) == {f"bar{index}" for index in range(1002, 1011)} 59 | new_file_size = os.stat(state_file_name).st_size 60 | assert new_file_size > file_size 61 | file_size = new_file_size 62 | 63 | aosm.append_many([{"foo": "bar1011"}, {"foo": "bar1012"}]) 64 | assert set(entry["foo"] for entry in entries) == {f"bar{index}" for index in range(1002, 1013)} 65 | new_file_size = os.stat(state_file_name).st_size 66 | assert new_file_size > file_size 67 | file_size = new_file_size 68 | 69 | entries2 = [] 70 | AppendOnlyStateManager(entries=entries2, state_file=state_file_name) 71 | assert set(entry["foo"] for entry in entries2) == {f"bar{index}" for index in range(1002, 1013)} 72 | 73 | aosm.remove_many_from_head(3) 74 | assert set(entry["foo"] for entry in entries) == {f"bar{index}" for index in range(1005, 1013)} 75 | new_file_size = os.stat(state_file_name).st_size 76 | assert new_file_size > file_size 77 | 78 | entries2 = [] 79 | AppendOnlyStateManager(entries=entries2, state_file=state_file_name) 80 | assert set(entry["foo"] for entry in entries2) == {f"bar{index}" for index in range(1005, 1013)} 81 | 82 | AppendOnlyStateManager(entries=[], state_file=state_file_name).delete_state() 83 | assert not os.path.exists(state_file_name) 84 | -------------------------------------------------------------------------------- /test/test_backup_stream.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from . import build_statsd_client, generate_rsa_key_pair, MySQLConfig, wait_for_condition 3 | from myhoard.backup_stream import BackupStream 4 | from myhoard.binlog_scanner import BinlogScanner 5 | from myhoard.controller import BackupSiteInfo, Controller 6 | from rohmu.object_storage.local import LocalTransfer 7 | from typing import cast, Dict 8 | 9 | import json 10 | import math 11 | import myhoard.util as myhoard_util 12 | import os 13 | import pytest 14 | import rohmu 15 | 16 | pytestmark = [pytest.mark.unittest, pytest.mark.all] 17 | 18 | 19 | def test_backup_stream(session_tmpdir, mysql_master): 20 | _run_backup_stream_test(session_tmpdir, mysql_master, BackupStream) 21 | 22 | 23 | def test_backup_stream_with_s3_emulation(session_tmpdir, mysql_master): 24 | class PatchedBackupStream(BackupStream): 25 | def _should_list_with_metadata(self, *, next_index): # pylint: disable=unused-argument 26 | return False 27 | 28 | _run_backup_stream_test(session_tmpdir, mysql_master, PatchedBackupStream) 29 | 30 | 31 | def test_backup_stream_with_split_basebackup_file(session_tmpdir, mysql_master): 32 | _run_backup_stream_test(session_tmpdir, mysql_master, BackupStream, split_size=10_000) 33 | 34 | 35 | def _run_backup_stream_test(session_tmpdir, mysql_master: MySQLConfig, backup_stream_class, split_size: int = 0): 36 | with myhoard_util.mysql_cursor(**mysql_master.connect_options) as cursor: 37 | cursor.execute("CREATE DATABASE db1") 38 | cursor.execute("USE db1") 39 | cursor.execute("CREATE TABLE t1 (id INTEGER PRIMARY KEY, data TEXT)") 40 | cursor.execute("COMMIT") 41 | 42 | BackupStream.ITERATION_SLEEP = 0.1 43 | BackupStream.REMOTE_POLL_INTERVAL = 0.1 44 | 45 | backup_target_location = session_tmpdir().strpath 46 | state_dir = session_tmpdir().strpath 47 | state_file = os.path.join(state_dir, "backup_stream.json") 48 | remote_binlogs_state_file = os.path.join(state_dir, "backup_stream.remote_binlogs") 49 | private_key_pem, public_key_pem = generate_rsa_key_pair() # pylint: disable=unused-variable 50 | bs = backup_stream_class( 51 | backup_reason=BackupStream.BackupReason.requested, 52 | compression={ 53 | "algorithm": "lzma", 54 | "level": 1, 55 | }, 56 | file_storage_setup_fn=lambda: LocalTransfer(backup_target_location), 57 | mode=BackupStream.Mode.active, 58 | mysql_client_params=mysql_master.connect_options, 59 | mysql_config_file_name=mysql_master.config_name, 60 | mysql_data_directory=mysql_master.config_options.datadir, 61 | normalized_backup_time="2019-02-25T08:20", 62 | rsa_public_key_pem=public_key_pem, 63 | remote_binlogs_state_file=remote_binlogs_state_file, 64 | server_id=mysql_master.server_id, 65 | site="default", 66 | state_file=state_file, 67 | stats=build_statsd_client(), 68 | temp_dir=mysql_master.base_dir, 69 | split_size=split_size, 70 | ) 71 | 72 | assert mysql_master.server_id is not None 73 | 74 | scanner = BinlogScanner( 75 | binlog_prefix=mysql_master.config_options.binlog_file_prefix, 76 | server_id=mysql_master.server_id, 77 | state_file=os.path.join(session_tmpdir().strpath, "scanner_state.json"), 78 | stats=build_statsd_client(), 79 | ) 80 | bs.add_binlogs(scanner.scan_new(None)) 81 | 82 | observer_dir = session_tmpdir().strpath 83 | observer_state_file = os.path.join(observer_dir, "backup_stream_observer.json") 84 | observer_remote_binlogs_state_file = os.path.join(observer_dir, "backup_stream_observer.remote_binlogs") 85 | _private_key_pem, public_key_pem = generate_rsa_key_pair() 86 | bs_observer = backup_stream_class( 87 | backup_reason=None, 88 | file_storage_setup_fn=lambda: LocalTransfer(backup_target_location), 89 | mode=BackupStream.Mode.observe, 90 | mysql_client_params=mysql_master.connect_options, 91 | mysql_config_file_name=mysql_master.config_name, 92 | mysql_data_directory=mysql_master.config_options.datadir, 93 | normalized_backup_time="2019-02-25T08:20", 94 | rsa_public_key_pem=public_key_pem, 95 | remote_binlogs_state_file=observer_remote_binlogs_state_file, 96 | server_id=mysql_master.server_id, 97 | site="default", 98 | state_file=observer_state_file, 99 | stats=build_statsd_client(), 100 | stream_id=bs.stream_id, 101 | temp_dir=mysql_master.base_dir, 102 | split_size=split_size, 103 | ) 104 | 105 | with bs_observer.running(): 106 | with bs.running(): 107 | wait_for_condition(bs.is_streaming_binlogs, timeout=15) 108 | wait_for_condition(lambda: bs_observer.state["last_remote_state_check"], timeout=10) 109 | wait_for_condition(lambda: bs_observer.state.get("remote_gtid_executed", []), timeout=10) 110 | 111 | with myhoard_util.mysql_cursor(**mysql_master.connect_options) as cursor: 112 | cursor.execute("SELECT @@GLOBAL.gtid_executed AS gtid_executed") 113 | gtid_executed = myhoard_util.parse_gtid_range_string(cast(dict, cursor.fetchone())["gtid_executed"]) 114 | assert bs_observer.state["remote_gtid_executed"] == gtid_executed 115 | cursor.execute("INSERT INTO db1.t1 (id, data) VALUES (1, 'abcdefg')") 116 | cursor.execute("COMMIT") 117 | cursor.execute("FLUSH BINARY LOGS") 118 | 119 | new_binlogs = scanner.scan_new(None) 120 | assert new_binlogs 121 | bs.add_binlogs(new_binlogs) 122 | wait_for_condition(lambda: not bs.state["pending_binlogs"]) 123 | bs.mark_as_completed() 124 | wait_for_condition(lambda: bs.state["active_details"]["phase"] == BackupStream.ActivePhase.binlog) 125 | 126 | assert bs.is_binlog_safe_to_delete(new_binlogs[0]) 127 | assert bs.is_log_backed_up(log_index=new_binlogs[0]["local_index"]) 128 | 129 | # remote_gtid_executed will be updated once the stream notices the new binlog that was uploaded above 130 | wait_for_condition(lambda: bs_observer.state["remote_gtid_executed"] != gtid_executed) 131 | # Is safe to delete because all GTIDs in this binlog have been backed up (but not by this stream) 132 | assert bs_observer.is_binlog_safe_to_delete(new_binlogs[0]) 133 | # This stream isn't in active mode so is_log_backed_up will return false for any input 134 | assert not bs_observer.is_log_backed_up(log_index=new_binlogs[0]["local_index"]) 135 | # Check the compression algorithm for binlogs is set as expected 136 | assert bs_observer.remote_binlogs[0]["compression_algorithm"] == "lzma" 137 | 138 | assert bs.state["basebackup_errors"] == 0 139 | assert bs.state["remote_read_errors"] == 0 140 | assert bs.state["remote_write_errors"] == 0 141 | with open(state_file) as f: 142 | assert bs.state == json.load(f) 143 | 144 | backup_sites: Dict[str, BackupSiteInfo] = { 145 | "default": { 146 | "recovery_only": False, 147 | "encryption_keys": {}, 148 | "object_storage": { 149 | "directory": backup_target_location, 150 | "storage_type": "local", 151 | }, 152 | "split_size": None, 153 | } 154 | } 155 | backups = Controller.get_backup_list(backup_sites) 156 | assert len(backups) == 1 157 | backup = backups[0] 158 | assert not backup["closed_at"] 159 | assert backup["completed_at"] 160 | assert backup["stream_id"] 161 | assert backup["resumable"] 162 | assert backup["site"] == "default" 163 | assert backup["basebackup_info"]["split_size"] == split_size 164 | if split_size: 165 | expected_number_of_splits = math.ceil(backup["basebackup_info"]["compressed_size"] / float(split_size)) 166 | assert backup["basebackup_info"]["number_of_splits"] == expected_number_of_splits 167 | 168 | # verify that all chunks have been uploaded 169 | number_of_splits_found = 0 170 | file_storage = rohmu.get_transfer(backup_sites["default"]["object_storage"]) 171 | streams = list(file_storage.list_prefixes("default")) 172 | assert len(streams) == 1 173 | for site_and_stream_id in streams: 174 | for info in file_storage.list_iter(site_and_stream_id): 175 | file_name = info["name"].rsplit("/", 1)[-1] 176 | if file_name.startswith("basebackup.xbstream"): 177 | number_of_splits_found += 1 178 | 179 | assert number_of_splits_found == expected_number_of_splits 180 | 181 | with myhoard_util.mysql_cursor(**mysql_master.connect_options) as cursor: 182 | cursor.execute("SELECT @@GLOBAL.gtid_executed AS gtid_executed") 183 | gtid_executed = myhoard_util.parse_gtid_range_string(cast(dict, cursor.fetchone())["gtid_executed"]) 184 | assert bs_observer.state["remote_gtid_executed"] == gtid_executed 185 | cursor.execute("INSERT INTO db1.t1 (id, data) VALUES (2, 'hijkl')") 186 | cursor.execute("COMMIT") 187 | cursor.execute("FLUSH BINARY LOGS") 188 | 189 | new_binlogs = scanner.scan_new(None) 190 | assert new_binlogs 191 | bs.add_binlogs(new_binlogs) 192 | assert bs.state["pending_binlogs"] 193 | assert not bs.is_binlog_safe_to_delete(new_binlogs[0]) 194 | assert not bs.is_log_backed_up(log_index=new_binlogs[0]["local_index"]) 195 | assert not bs_observer.is_binlog_safe_to_delete(new_binlogs[0]) 196 | assert not bs_observer.is_log_backed_up(log_index=new_binlogs[0]["local_index"]) 197 | 198 | bs.state_manager.update_state(initial_latest_complete_binlog_index=new_binlogs[0]["local_index"]) 199 | assert bs.is_binlog_safe_to_delete(new_binlogs[0]) 200 | 201 | bs.delete_state() 202 | assert not os.path.exists(bs.state_manager.state_file) 203 | assert not os.path.exists(bs.remote_binlog_manager.state_file) 204 | -------------------------------------------------------------------------------- /test/test_basebackup_operation.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from . import build_statsd_client, MySQLConfig, restart_mysql 3 | from myhoard.basebackup_operation import BasebackupOperation 4 | from packaging.version import Version 5 | from typing import IO 6 | from unittest import SkipTest 7 | from unittest.mock import mock_open, patch 8 | 9 | import myhoard.util as myhoard_util 10 | import os 11 | import pytest 12 | 13 | pytestmark = [pytest.mark.unittest, pytest.mark.all] 14 | 15 | 16 | @pytest.mark.parametrize("extra_uuid", [None, "daf0a972-acd8-44b4-941e-42cbbb43a593"]) 17 | def test_basic_backup(mysql_master, extra_uuid): 18 | with myhoard_util.mysql_cursor(**mysql_master.connect_options) as cursor: 19 | for db_index in range(15): 20 | cursor.execute(f"CREATE DATABASE test{db_index}") 21 | cursor.execute(f"CREATE TABLE test{db_index}.foo{db_index} (id integer primary key)") 22 | for value in range(15): 23 | cursor.execute(f"INSERT INTO test{db_index}.foo{db_index} (id) VALUES ({value})") 24 | cursor.execute("COMMIT") 25 | # Insert second source_uuid into gtid_executed to test that this is parsed correctly 26 | if extra_uuid: 27 | cursor.execute( 28 | "INSERT INTO mysql.gtid_executed (source_uuid, interval_start, interval_end) " 29 | f"VALUES ('{extra_uuid}', 1, 1)" 30 | ) 31 | cursor.execute("COMMIT") 32 | 33 | if extra_uuid: 34 | restart_mysql(mysql_master) 35 | 36 | with myhoard_util.mysql_cursor(**mysql_master.connect_options) as cursor: 37 | cursor.execute("SHOW MASTER STATUS") 38 | master_status = cursor.fetchone() 39 | 40 | # Executed_Gtid_Set has linefeeds in it but XtraBackup (8.0) strips those away, do the same here 41 | master_status["Executed_Gtid_Set"] = master_status["Executed_Gtid_Set"].replace("\n", "") 42 | 43 | backed_up_files = set() 44 | 45 | # pylint: disable=unused-argument 46 | def progress_callback( 47 | *, 48 | estimated_progress, 49 | estimated_total_bytes, 50 | last_file_name, 51 | last_file_size, 52 | processed_original_bytes, 53 | ): 54 | if last_file_name: 55 | backed_up_files.add(last_file_name) 56 | 57 | bytes_read = [0] 58 | 59 | def stream_handler(stream): 60 | while True: 61 | data = stream.read(10 * 1024) 62 | if not data: 63 | break 64 | bytes_read[0] += len(data) 65 | 66 | encryption_key = os.urandom(24) 67 | op = BasebackupOperation( 68 | encryption_algorithm="AES256", 69 | encryption_key=encryption_key, 70 | mysql_client_params=mysql_master.connect_options, 71 | mysql_config_file_name=mysql_master.config_name, 72 | mysql_data_directory=mysql_master.config_options.datadir, 73 | progress_callback=progress_callback, 74 | stats=build_statsd_client(), 75 | stream_handler=stream_handler, 76 | temp_dir=mysql_master.base_dir, 77 | ) 78 | op.create_backup() 79 | 80 | for db_index in range(15): 81 | assert f"./test{db_index}/foo{db_index}.ibd" in backed_up_files, f"Couldn't find index {db_index}" 82 | 83 | assert op.binlog_info 84 | assert op.binlog_info["gtid"] == master_status["Executed_Gtid_Set"] 85 | 86 | # Taking basebackup might flush binary logs 87 | with myhoard_util.mysql_cursor(**mysql_master.connect_options) as cursor: 88 | cursor.execute("SHOW MASTER STATUS") 89 | master_status = cursor.fetchone() 90 | 91 | assert op.binlog_info 92 | assert op.binlog_info["file_name"] == master_status["File"] 93 | assert op.binlog_info["file_position"] == master_status["Position"] 94 | 95 | # Even almost empty backup is at least 1.5 megs due to standard files that are always included 96 | assert bytes_read[0] > 1.5 * 1024 * 1024 97 | 98 | # Now add incremental backup 99 | assert op.checkpoints_file_content is not None 100 | inc_op = BasebackupOperation( 101 | encryption_algorithm="AES256", 102 | encryption_key=encryption_key, 103 | mysql_client_params=mysql_master.connect_options, 104 | mysql_config_file_name=mysql_master.config_name, 105 | mysql_data_directory=mysql_master.config_options.datadir, 106 | progress_callback=progress_callback, 107 | stats=build_statsd_client(), 108 | stream_handler=stream_handler, 109 | temp_dir=mysql_master.base_dir, 110 | incremental_since_checkpoint=op.checkpoints_file_content, 111 | ) 112 | inc_op.create_backup() 113 | assert inc_op.checkpoints_file_content is not None 114 | 115 | full_backup_checkpoint = myhoard_util.parse_xtrabackup_info(op.checkpoints_file_content) 116 | inc_backup_checkpoint = myhoard_util.parse_xtrabackup_info(inc_op.checkpoints_file_content) 117 | assert full_backup_checkpoint["backup_type"] == "full-backuped" 118 | assert inc_backup_checkpoint["backup_type"] == "incremental" 119 | assert full_backup_checkpoint["from_lsn"] == "0" 120 | assert full_backup_checkpoint["to_lsn"] == inc_backup_checkpoint["from_lsn"] 121 | 122 | 123 | def test_stream_handler_error_is_propagated(mysql_master): 124 | def stream_handler(_stream): 125 | raise ValueError("This is test error") 126 | 127 | encryption_key = os.urandom(24) 128 | op = BasebackupOperation( 129 | encryption_algorithm="AES256", 130 | encryption_key=encryption_key, 131 | mysql_client_params=mysql_master.connect_options, 132 | mysql_config_file_name=mysql_master.config_name, 133 | mysql_data_directory=mysql_master.config_options.datadir, 134 | stats=build_statsd_client(), 135 | stream_handler=stream_handler, 136 | temp_dir=mysql_master.base_dir, 137 | ) 138 | with pytest.raises(ValueError, match="^This is test error$"): 139 | op.create_backup() 140 | 141 | 142 | def test_fails_on_invalid_params(mysql_master): 143 | def stream_handler(_stream): 144 | pass 145 | 146 | op = BasebackupOperation( 147 | encryption_algorithm="nosuchalgo", 148 | encryption_key=os.urandom(24), 149 | mysql_client_params={ 150 | "host": "127.0.0.1", 151 | }, 152 | mysql_config_file_name=mysql_master.config_name, 153 | mysql_data_directory=mysql_master.config_options.datadir, 154 | stats=build_statsd_client(), 155 | stream_handler=stream_handler, 156 | temp_dir=mysql_master.base_dir, 157 | ) 158 | # we're opening a connection to check the version on mysql >= 8.0.29 below we're failing with the first message 159 | with pytest.raises(Exception, match=r"(^xtrabackup failed with code 13$|^mysql_cursor\(\) missing 3 required keyword)"): 160 | op.create_backup() 161 | 162 | 163 | def test_backup_with_non_optimized_tables(mysql_master: MySQLConfig) -> None: 164 | with myhoard_util.mysql_cursor(**mysql_master.connect_options) as cursor: 165 | version = myhoard_util.get_mysql_version(cursor) 166 | if Version(version) < Version("8.0.29"): 167 | raise SkipTest("DB version doesn't need OPTIMIZE TABLE") 168 | 169 | def create_test_db(*, db_name: str, table_name: str, add_pk: bool) -> None: 170 | cursor.execute(f"CREATE DATABASE {db_name}") 171 | 172 | if add_pk: 173 | id_column_type = "integer primary key" 174 | else: 175 | id_column_type = "integer" 176 | 177 | cursor.execute(f"CREATE TABLE {db_name}.{table_name} (id {id_column_type})") 178 | for value in range(15): 179 | cursor.execute(f"INSERT INTO {db_name}.{table_name} (id) VALUES ({value})") 180 | cursor.execute("COMMIT") 181 | cursor.execute(f"ALTER TABLE {db_name}.{table_name} ADD COLUMN foobar VARCHAR(15)") 182 | cursor.execute("COMMIT") 183 | 184 | for db_index in range(15): 185 | create_test_db(db_name=f"test{db_index}", table_name=f"foo{db_index}", add_pk=db_index % 2 == 0) 186 | 187 | create_test_db(db_name="`söme/thing'; weird`", table_name="`table with space`", add_pk=True) 188 | 189 | def stream_handler(stream: IO) -> None: 190 | while True: 191 | if not stream.read(10 * 1024): 192 | break 193 | 194 | encryption_key = os.urandom(24) 195 | op = BasebackupOperation( 196 | encryption_algorithm="AES256", 197 | encryption_key=encryption_key, 198 | mysql_client_params=mysql_master.connect_options, 199 | mysql_config_file_name=mysql_master.config_name, 200 | mysql_data_directory=mysql_master.config_options.datadir, 201 | optimize_tables_before_backup=True, 202 | progress_callback=None, 203 | stats=build_statsd_client(), 204 | stream_handler=stream_handler, 205 | temp_dir=mysql_master.base_dir, 206 | ) 207 | op.create_backup() 208 | 209 | 210 | BACKUP_INFO_FILE_TEMPLATE = """ 211 | uuid = 4fe5defe-900a-11ed-9921-0261ecef4636 212 | name = blah 213 | tool_name = xtrabackup 214 | tool_command = --defaults-file=/tmp/mysql.conf --backup --compress --no-version-check --stream xbstream --target-dir /tmp/ --extra-lsndir=/lsn/ 215 | tool_version = 8.0.30-23.1.aiven 216 | ibbackup_version = 8.0.30-23.1.aiven 217 | server_version = 8.0.30 218 | start_time = 2023-01-09 10:42:26 219 | end_time = 2023-01-09 10:42:28 220 | lock_time = 1 221 | {binlog_line} 222 | innodb_from_lsn = 0 223 | innodb_to_lsn = 31122960 224 | partial = N 225 | incremental = N 226 | format = xbstream 227 | compressed = compressed 228 | encrypted = N 229 | """ 230 | 231 | 232 | def test_process_binlog_info(mysql_master: MySQLConfig) -> None: 233 | op = BasebackupOperation( 234 | encryption_algorithm="AES256", 235 | encryption_key=os.urandom(24), 236 | mysql_client_params=mysql_master.connect_options, 237 | mysql_config_file_name=mysql_master.config_name, 238 | mysql_data_directory=mysql_master.config_options.datadir, 239 | optimize_tables_before_backup=True, 240 | progress_callback=None, 241 | stats=build_statsd_client(), 242 | stream_handler=None, 243 | temp_dir=mysql_master.base_dir, 244 | ) 245 | # lsn_dir is not specified 246 | with pytest.raises(AssertionError): 247 | op._process_xtrabackup_info() # pylint: disable=protected-access 248 | assert op.binlog_info is None 249 | 250 | op.lsn_dir = "/tmp/lsn-dir" 251 | with patch("builtins.open", side_effect=FileNotFoundError): 252 | with pytest.raises(FileNotFoundError): 253 | op._process_xtrabackup_info() # pylint: disable=protected-access 254 | assert op.binlog_info is None 255 | 256 | line = "abrakadabra" 257 | with patch("builtins.open", mock_open(read_data=BACKUP_INFO_FILE_TEMPLATE.format(binlog_line=line))): 258 | op._process_xtrabackup_info() # pylint: disable=protected-access 259 | assert op.binlog_info is None 260 | 261 | line = ( 262 | "binlog_pos = filename 'binlog.000220', position '236', GTID of the last change " 263 | "'00006582-5ce4-11ea-9748-22f28f5a4c51:1-55419,00ae3f16-75ce-11ea-9b71-7266f0f43b98:1-4104'" 264 | ) 265 | with patch("builtins.open", mock_open(read_data=BACKUP_INFO_FILE_TEMPLATE.format(binlog_line=line))): 266 | op._process_xtrabackup_info() # pylint: disable=protected-access 267 | assert op.binlog_info == { 268 | "file_name": "binlog.000220", 269 | "file_position": 236, 270 | "gtid": "00006582-5ce4-11ea-9748-22f28f5a4c51:1-55419,00ae3f16-75ce-11ea-9b71-7266f0f43b98:1-4104", 271 | } 272 | 273 | line = "binlog_pos = filename 'binlog.000221', position '238'" 274 | with patch("builtins.open", mock_open(read_data=BACKUP_INFO_FILE_TEMPLATE.format(binlog_line=line))): 275 | op._process_xtrabackup_info() # pylint: disable=protected-access 276 | assert op.binlog_info == { 277 | "file_name": "binlog.000221", 278 | "file_position": 238, 279 | "gtid": None, 280 | } 281 | -------------------------------------------------------------------------------- /test/test_basebackup_restore_operation.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from . import build_statsd_client, wait_for_port 3 | from .helpers.version import xtrabackup_version_to_string 4 | from myhoard.basebackup_operation import BasebackupOperation 5 | from myhoard.basebackup_restore_operation import BasebackupRestoreOperation 6 | from unittest.mock import patch 7 | 8 | import myhoard.util as myhoard_util 9 | import os 10 | import pytest 11 | import shutil 12 | import subprocess 13 | import tempfile 14 | 15 | pytestmark = [pytest.mark.unittest, pytest.mark.all] 16 | 17 | 18 | def test_get_xtrabackup_cmd(): 19 | op_kwargs = { 20 | "encryption_algorithm": "AES256", 21 | "encryption_key": "123", 22 | "free_memory_percentage": 80, 23 | "mysql_config_file_name": "/etc/mysql/mysql.conf", 24 | "mysql_data_directory": "/usr/lib/mysql/", 25 | "stats": build_statsd_client(), 26 | "stream_handler": None, 27 | "target_dir": "", 28 | "temp_dir": "", 29 | } 30 | op = BasebackupRestoreOperation(**op_kwargs) 31 | cmd = op.get_xtrabackup_cmd() 32 | assert cmd == "xtrabackup" 33 | xtrabackup_path = shutil.which("xtrabackup") 34 | xtrabackup_dir = os.path.dirname(xtrabackup_path) 35 | xtrabackup_version = myhoard_util.get_xtrabackup_version() 36 | with patch.dict(os.environ, {"PXB_EXTRA_BIN_PATHS": xtrabackup_dir}): 37 | cmd = BasebackupRestoreOperation( 38 | **op_kwargs, backup_tool_version=xtrabackup_version_to_string(xtrabackup_version) 39 | ).get_xtrabackup_cmd() 40 | assert cmd == xtrabackup_path 41 | cmd = BasebackupRestoreOperation(**op_kwargs, backup_tool_version="8.0.0").get_xtrabackup_cmd() 42 | assert cmd == "xtrabackup" 43 | 44 | 45 | def test_basic_restore(mysql_master, mysql_empty): 46 | with myhoard_util.mysql_cursor(**mysql_master.connect_options) as cursor: 47 | for db_index in range(15): 48 | cursor.execute(f"CREATE DATABASE test{db_index}") 49 | cursor.execute(f"CREATE TABLE test{db_index}.foo{db_index} (id integer primary key)") 50 | for value in range(15): 51 | cursor.execute(f"INSERT INTO test{db_index}.foo{db_index} (id) VALUES ({value})") 52 | cursor.execute("FLUSH LOGS") 53 | cursor.execute("SHOW MASTER STATUS") 54 | old_master_status = cursor.fetchone() 55 | 56 | encryption_key = os.urandom(24) 57 | 58 | with tempfile.NamedTemporaryFile() as backup_file: 59 | 60 | def output_stream_handler(stream): 61 | shutil.copyfileobj(stream, backup_file) 62 | 63 | backup_op = BasebackupOperation( 64 | encryption_algorithm="AES256", 65 | encryption_key=encryption_key, 66 | mysql_client_params=mysql_master.connect_options, 67 | mysql_config_file_name=mysql_master.config_name, 68 | mysql_data_directory=mysql_master.config_options.datadir, 69 | stats=build_statsd_client(), 70 | stream_handler=output_stream_handler, 71 | temp_dir=mysql_master.base_dir, 72 | ) 73 | backup_op.create_backup() 74 | 75 | backup_file.seek(0) 76 | 77 | def input_stream_handler(stream): 78 | shutil.copyfileobj(backup_file, stream) 79 | stream.close() 80 | 81 | with tempfile.TemporaryDirectory(dir=mysql_empty.base_dir, prefix="myhoard_target_") as temp_target_dir: 82 | restore_op = BasebackupRestoreOperation( 83 | encryption_algorithm="AES256", 84 | encryption_key=encryption_key, 85 | free_memory_percentage=80, 86 | mysql_config_file_name=mysql_empty.config_name, 87 | mysql_data_directory=mysql_empty.config_options.datadir, 88 | stats=build_statsd_client(), 89 | stream_handler=input_stream_handler, 90 | target_dir=temp_target_dir, 91 | temp_dir=mysql_empty.base_dir, 92 | backup_tool_version=xtrabackup_version_to_string(myhoard_util.get_xtrabackup_version()), 93 | ) 94 | restore_op.prepare_backup() 95 | restore_op.restore_backup() 96 | 97 | assert restore_op.number_of_files >= backup_op.number_of_files 98 | 99 | mysql_empty.proc = subprocess.Popen(mysql_empty.startup_command) # pylint: disable=consider-using-with 100 | wait_for_port(mysql_empty.port) 101 | 102 | with myhoard_util.mysql_cursor( 103 | password=mysql_master.password, 104 | port=mysql_empty.port, 105 | user=mysql_master.user, 106 | ) as cursor: 107 | for db_index in range(15): 108 | cursor.execute(f"SELECT id FROM test{db_index}.foo{db_index}") 109 | results = cursor.fetchall() 110 | assert sorted(result["id"] for result in results) == sorted(range(15)) 111 | cursor.execute("SHOW MASTER STATUS") 112 | new_master_status = cursor.fetchone() 113 | assert old_master_status["Executed_Gtid_Set"] == new_master_status["Executed_Gtid_Set"] 114 | 115 | 116 | def test_incremental_backup_restore(mysql_master, mysql_empty) -> None: 117 | with myhoard_util.mysql_cursor(**mysql_master.connect_options) as cursor: 118 | for db_index in range(5): 119 | cursor.execute(f"CREATE DATABASE test{db_index}") 120 | cursor.execute(f"CREATE TABLE test{db_index}.foo{db_index} (id integer primary key)") 121 | for value in range(10): 122 | cursor.execute(f"INSERT INTO test{db_index}.foo{db_index} (id) VALUES ({value})") 123 | cursor.execute("FLUSH LOGS") 124 | 125 | encryption_key = os.urandom(24) 126 | 127 | with tempfile.NamedTemporaryFile() as backup_file1, tempfile.NamedTemporaryFile() as backup_file2: 128 | 129 | def build_stream_handler(backup_file): 130 | def output_stream_handler(stream): 131 | shutil.copyfileobj(stream, backup_file) 132 | 133 | return output_stream_handler 134 | 135 | backup_op = BasebackupOperation( 136 | encryption_algorithm="AES256", 137 | encryption_key=encryption_key, 138 | mysql_client_params=mysql_master.connect_options, 139 | mysql_config_file_name=mysql_master.config_name, 140 | mysql_data_directory=mysql_master.config_options.datadir, 141 | stats=build_statsd_client(), 142 | stream_handler=build_stream_handler(backup_file1), 143 | temp_dir=mysql_empty.base_dir, 144 | ) 145 | backup_op.create_backup() 146 | 147 | with myhoard_util.mysql_cursor(**mysql_master.connect_options) as cursor: 148 | for db_index in range(5, 10): 149 | cursor.execute(f"CREATE DATABASE test{db_index}") 150 | cursor.execute(f"CREATE TABLE test{db_index}.foo{db_index} (id integer primary key)") 151 | for value in range(10): 152 | cursor.execute(f"INSERT INTO test{db_index}.foo{db_index} (id) VALUES ({value})") 153 | cursor.execute("FLUSH LOGS") 154 | cursor.execute("SHOW MASTER STATUS") 155 | old_master_status = cursor.fetchone() 156 | assert old_master_status 157 | 158 | backup_op_inc = BasebackupOperation( 159 | encryption_algorithm="AES256", 160 | encryption_key=encryption_key, 161 | mysql_client_params=mysql_master.connect_options, 162 | mysql_config_file_name=mysql_master.config_name, 163 | mysql_data_directory=mysql_master.config_options.datadir, 164 | stats=build_statsd_client(), 165 | stream_handler=build_stream_handler(backup_file2), 166 | temp_dir=mysql_empty.base_dir, 167 | incremental_since_checkpoint=backup_op.checkpoints_file_content, 168 | ) 169 | backup_op_inc.create_backup() 170 | 171 | def build_input_stream_handler(backup_file): 172 | backup_file.seek(0) 173 | 174 | def input_stream_handler(stream): 175 | shutil.copyfileobj(backup_file, stream) 176 | stream.close() 177 | 178 | return input_stream_handler 179 | 180 | with tempfile.TemporaryDirectory(dir=mysql_empty.base_dir, prefix="myhoard_target_") as temp_target_dir: 181 | restore_op = BasebackupRestoreOperation( 182 | encryption_algorithm="AES256", 183 | encryption_key=encryption_key, 184 | free_memory_percentage=80, 185 | mysql_config_file_name=mysql_empty.config_name, 186 | mysql_data_directory=mysql_empty.config_options.datadir, 187 | stats=build_statsd_client(), 188 | stream_handler=build_input_stream_handler(backup_file1), 189 | target_dir=temp_target_dir, 190 | temp_dir=mysql_empty.base_dir, 191 | ) 192 | restore_op.prepare_backup( 193 | incremental=False, apply_log_only=True, checkpoints_file_content=backup_op.checkpoints_file_content 194 | ) 195 | restore_op_inc = BasebackupRestoreOperation( 196 | encryption_algorithm="AES256", 197 | encryption_key=encryption_key, 198 | free_memory_percentage=80, 199 | mysql_config_file_name=mysql_empty.config_name, 200 | mysql_data_directory=mysql_empty.config_options.datadir, 201 | stats=build_statsd_client(), 202 | stream_handler=build_input_stream_handler(backup_file2), 203 | target_dir=temp_target_dir, 204 | temp_dir=mysql_empty.base_dir, 205 | ) 206 | restore_op_inc.prepare_backup( 207 | incremental=True, apply_log_only=False, checkpoints_file_content=backup_op_inc.checkpoints_file_content 208 | ) 209 | restore_op_inc.restore_backup() 210 | 211 | assert restore_op_inc.number_of_files >= backup_op.number_of_files 212 | 213 | mysql_empty.proc = subprocess.Popen(mysql_empty.startup_command) # pylint: disable=consider-using-with 214 | wait_for_port(mysql_empty.port) 215 | 216 | with myhoard_util.mysql_cursor( 217 | password=mysql_master.password, 218 | port=mysql_empty.port, 219 | user=mysql_master.user, 220 | ) as cursor: 221 | for db_index in range(10): 222 | cursor.execute(f"SELECT id FROM test{db_index}.foo{db_index}") 223 | results = cursor.fetchall() 224 | assert sorted(result["id"] for result in results) == sorted(range(10)) 225 | cursor.execute("SHOW MASTER STATUS") 226 | new_master_status = cursor.fetchone() 227 | assert new_master_status 228 | assert old_master_status["Executed_Gtid_Set"] == new_master_status["Executed_Gtid_Set"] 229 | -------------------------------------------------------------------------------- /test/test_binlog_scanner.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from . import build_statsd_client 3 | from myhoard.binlog_scanner import BinlogScanner 4 | 5 | import json 6 | import myhoard.util as myhoard_util 7 | import os 8 | import pytest 9 | 10 | pytestmark = [pytest.mark.unittest, pytest.mark.all] 11 | 12 | 13 | def test_read_gtids_from_log(session_tmpdir, mysql_master): 14 | state_file_name = os.path.join(session_tmpdir().strpath, "scanner_state.json") 15 | scanner = BinlogScanner( 16 | binlog_prefix=mysql_master.config_options.binlog_file_prefix, 17 | server_id=mysql_master.server_id, 18 | state_file=state_file_name, 19 | stats=build_statsd_client(), 20 | ) 21 | with open(state_file_name, "r") as f: 22 | assert json.load(f) == scanner.state 23 | 24 | scanner.scan_new(None) 25 | scanner.scan_removed(None) 26 | # We don't use binlog when initializing so there are no binlogs for fresh server 27 | assert len(scanner.binlogs) == 0 28 | with open(state_file_name, "r") as f: 29 | assert json.load(f) == scanner.state 30 | 31 | with myhoard_util.mysql_cursor(**mysql_master.connect_options) as cursor: 32 | cursor.execute("CREATE TABLE foo(id INTEGER PRIMARY KEY)") 33 | cursor.execute("COMMIT") 34 | cursor.execute("INSERT INTO foo (id) VALUES (1)") 35 | cursor.execute("COMMIT") 36 | cursor.execute("FLUSH BINARY LOGS") 37 | cursor.execute("SHOW MASTER STATUS") 38 | master_info = cursor.fetchone() 39 | 40 | scanner.scan_new(None) 41 | scanner.scan_removed(None) 42 | with open(state_file_name, "r") as f: 43 | assert json.load(f) == scanner.state 44 | assert len(scanner.binlogs) == 1 45 | print(master_info) 46 | binlog1 = scanner.binlogs[0] 47 | assert binlog1["file_name"] == "bin.000001" 48 | assert len(binlog1["gtid_ranges"]) == 1 49 | server_uuid, ranges = master_info["Executed_Gtid_Set"].split(":") 50 | range_start, range_end = ranges.split("-") 51 | range1 = binlog1["gtid_ranges"][0] 52 | assert range1["server_uuid"] == server_uuid 53 | assert range1["start"] == int(range_start) 54 | assert range1["end"] == int(range_end) 55 | assert range1["server_id"] == mysql_master.config_options.server_id 56 | 57 | with myhoard_util.mysql_cursor(**mysql_master.connect_options) as cursor: 58 | cursor.execute("CREATE TABLE foo2(id INTEGER PRIMARY KEY)") 59 | cursor.execute("COMMIT") 60 | cursor.execute("FLUSH BINARY LOGS") 61 | cursor.execute("PURGE BINARY LOGS TO 'bin.000002'") 62 | cursor.execute("SHOW MASTER STATUS") 63 | master_info = cursor.fetchone() 64 | 65 | scanner.scan_new(None) 66 | scanner.scan_removed(None) 67 | with open(state_file_name, "r") as f: 68 | scanner_state = json.load(f) 69 | assert scanner_state == scanner.state 70 | assert len(scanner.binlogs) == 1 71 | binlog2 = scanner.binlogs[0] 72 | assert binlog2["file_name"] == "bin.000002" 73 | assert len(binlog2["gtid_ranges"]) == 1 74 | assert binlog2["gtid_ranges"][0]["start"] == range1["end"] + 1 75 | expected_end = int(master_info["Executed_Gtid_Set"].split("-")[-1]) 76 | assert binlog2["gtid_ranges"][0]["end"] == expected_end 77 | 78 | scanner.scan_new(None) 79 | scanner.scan_removed(None) 80 | assert scanner.state == scanner_state 81 | 82 | with myhoard_util.mysql_cursor(**mysql_master.connect_options) as cursor: 83 | cursor.execute("FLUSH BINARY LOGS") 84 | 85 | scanner.scan_new(None) 86 | scanner.scan_removed(None) 87 | assert len(scanner.binlogs) == 2 88 | assert scanner.binlogs[0] == binlog2 89 | binlog4 = scanner.binlogs[1] 90 | assert binlog4["file_name"] == "bin.000003" 91 | assert len(binlog4["gtid_ranges"]) == 0 92 | 93 | scanner = BinlogScanner( 94 | binlog_prefix=mysql_master.config_options.binlog_file_prefix, 95 | server_id=mysql_master.server_id, 96 | state_file=state_file_name, 97 | stats=build_statsd_client(), 98 | ) 99 | assert len(scanner.binlogs) == 2 100 | assert scanner.binlogs[0] == binlog2 101 | binlog4 = scanner.binlogs[1] 102 | assert binlog4["file_name"] == "bin.000003" 103 | -------------------------------------------------------------------------------- /test/test_myhoard.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from . import get_random_port, wait_for_port, while_asserts 3 | 4 | import contextlib 5 | import json 6 | import os 7 | import pytest 8 | import requests 9 | import signal 10 | import subprocess 11 | import sys 12 | 13 | pytestmark = [pytest.mark.unittest, pytest.mark.all] 14 | ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) 15 | 16 | 17 | def test_sample_config_keys_match_fixture_config_keys(myhoard_config): 18 | def validate_recursive(actual_object, expected_object, root=""): 19 | for key, value in actual_object.items(): 20 | assert key in expected_object, f"Unexpected key {key!r} found under {root!r}" 21 | if isinstance(value, dict): 22 | validate_recursive(value, expected_object[key], f"{root}.{key}".lstrip(".")) 23 | for key, value in expected_object.items(): 24 | assert key in actual_object, f"Expected key {key!r} under {root!r} not found" 25 | 26 | with open(os.path.join(ROOT_DIR, "myhoard.json")) as f: 27 | actual_config = json.load(f) 28 | 29 | validate_recursive(actual_config, myhoard_config) 30 | 31 | 32 | def test_basic_daemon_execution(myhoard_config): 33 | config_name = os.path.join(myhoard_config["state_directory"], "myhoard.json") 34 | with open(config_name, "w") as f: 35 | json.dump(myhoard_config, f) 36 | 37 | python3 = sys.executable or os.environ.get("PYTHON", "python3") 38 | cmd = [python3, "-c", "from myhoard.myhoard import main; main()", "--config", config_name, "--log-level", "DEBUG"] 39 | print("Running command", cmd) 40 | with subprocess.Popen(cmd, env={"PYTHONPATH": ROOT_DIR}) as proc: 41 | try: 42 | http_address = myhoard_config["http_address"] 43 | http_port = myhoard_config["http_port"] 44 | wait_for_port(http_port, hostname=http_address, wait_time=5) 45 | 46 | def backups_not_none(): 47 | response = requests.get(f"http://{http_address}:{http_port}/backup", timeout=1) 48 | assert response.status_code == 200 49 | assert response.json()["backups"] is not None 50 | 51 | while_asserts(backups_not_none) 52 | 53 | # Update config and see the new config gets applied 54 | new_http_port = get_random_port(start=3000, end=30000) 55 | assert new_http_port != http_port 56 | myhoard_config["http_port"] = new_http_port 57 | with open(config_name, "w") as f: 58 | json.dump(myhoard_config, f) 59 | 60 | os.kill(proc.pid, signal.SIGHUP) 61 | wait_for_port(new_http_port, hostname=http_address, wait_time=2) 62 | response = requests.get(f"http://{http_address}:{new_http_port}/backup", timeout=1) 63 | response.raise_for_status() 64 | assert response.json()["backups"] is not None 65 | 66 | os.kill(proc.pid, signal.SIGINT) 67 | proc.communicate(input=None, timeout=2) 68 | assert proc.returncode == 0 69 | finally: 70 | with contextlib.suppress(Exception): 71 | os.kill(proc.pid, signal.SIGKILL) 72 | -------------------------------------------------------------------------------- /test/test_statsd.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from myhoard.statsd import StatsClient 3 | from socket import socket 4 | from types import ModuleType 5 | from typing import Callable, Generator 6 | from unittest.mock import ANY, MagicMock, patch 7 | 8 | import datetime 9 | import pytest 10 | import sentry_sdk 11 | import time 12 | 13 | FAKE_SENTRY_DSN = "https://random.ingest.sentry.io/project_id" 14 | 15 | 16 | @pytest.fixture(name="sentry_init") 17 | def fixture_sentry_init() -> Generator[Callable[[], ModuleType], None, None]: 18 | def inner(*a, **kw): 19 | hub = sentry_sdk.Hub.current 20 | client = sentry_sdk.Client(*a, **kw) 21 | hub.bind_client(client) 22 | return sentry_sdk 23 | 24 | with sentry_sdk.Hub(None): 25 | yield inner 26 | 27 | 28 | @pytest.fixture(name="stats_client", scope="function") 29 | def fixture_stats_client(monkeypatch, sentry_init: Callable[[], ModuleType]) -> StatsClient: 30 | stats_client = StatsClient(host=None, sentry_dsn=None) 31 | 32 | def fake_initialize() -> None: 33 | if not stats_client.sentry_config.get("dsn"): 34 | stats_client.sentry = None 35 | else: 36 | stats_client.sentry = sentry_init() 37 | 38 | monkeypatch.setattr(stats_client, "_initialize_sentry", fake_initialize) 39 | return stats_client 40 | 41 | 42 | def test_update_sentry_config(stats_client: StatsClient) -> None: 43 | assert stats_client.sentry_config["dsn"] is None 44 | assert stats_client.sentry is None 45 | 46 | stats_client.update_sentry_config({"dsn": FAKE_SENTRY_DSN}) 47 | assert stats_client.sentry_config["dsn"] == FAKE_SENTRY_DSN 48 | assert stats_client.sentry is not None 49 | 50 | 51 | @patch.object(sentry_sdk.Scope, "set_tag") 52 | @patch.object(sentry_sdk, "init") 53 | def test_initialize_sentry( 54 | mocked_sentry_init: MagicMock, # pylint: disable=unused-argument 55 | mocked_scope_set_tag: MagicMock, 56 | sentry_init: Callable[[], ModuleType], 57 | ) -> None: 58 | # pylint: disable=protected-access 59 | mocked_sentry_init.return_value = sentry_init # noqa 60 | stats_client = StatsClient(host=None, sentry_dsn=None) 61 | assert stats_client.sentry is None 62 | 63 | stats_client.sentry_config["dsn"] = FAKE_SENTRY_DSN 64 | stats_client._initialize_sentry() 65 | assert stats_client.sentry is not None 66 | mocked_scope_set_tag.assert_not_called() 67 | 68 | stats_client.sentry_config["tags"] = {"abc": "123", "def": "456"} 69 | stats_client._initialize_sentry() 70 | assert mocked_scope_set_tag.call_count == 2 71 | 72 | 73 | @patch.object(socket, "sendto") 74 | def test_send(mocked_sendto: MagicMock) -> None: 75 | # pylint: disable=protected-access 76 | stats_client = StatsClient(host="fakehost", sentry_dsn=None) 77 | 78 | FakeEnum = Enum("FakeEnum", ["ONE", "TWO", "THREE"]) 79 | 80 | tags = { 81 | "enum": FakeEnum.ONE, 82 | "datetime": datetime.datetime(2023, 2, 15, 12, 0, 0), 83 | "empty": None, 84 | "timedelta": datetime.timedelta(minutes=2), 85 | "string": "1234", 86 | "num": 12.34, 87 | "invalid_tag": "a=b", 88 | } 89 | stats_client._send(metric="random", metric_type=b"g", value=123, tags=tags) 90 | mocked_sendto.assert_called_once_with( 91 | b"random,timedelta=120s,string=1234,num=12.34,invalid_tag=INVALID,enum=1,empty=,datetime=20230215T120000Z:123|g", 92 | ("fakehost", 8125), 93 | ) 94 | 95 | 96 | @patch.object(sentry_sdk.Scope, "set_tag") 97 | @patch.object(sentry_sdk, "capture_exception") 98 | @patch.object(StatsClient, "increase") 99 | def test_unexpected_exception( 100 | mocked_increase: MagicMock, 101 | mocked_sentry_capture_exception: MagicMock, 102 | mocked_scope_set_tag: MagicMock, 103 | stats_client: StatsClient, 104 | ) -> None: 105 | stats_client.update_sentry_config({"dsn": FAKE_SENTRY_DSN}) 106 | 107 | ex = ValueError("backupstream cache error") 108 | stats_client.unexpected_exception( 109 | ex=ex, 110 | where="BackupStream._cache_basebackup_info", 111 | elapsed=1234, 112 | ) 113 | mocked_increase.assert_called_once_with( 114 | "exception", 115 | tags={"exception": "ValueError", "where": "BackupStream._cache_basebackup_info"}, 116 | ) 117 | mocked_sentry_capture_exception.assert_called_once_with(ex) 118 | 119 | assert mocked_scope_set_tag.call_count == 1 120 | 121 | 122 | @patch.object(StatsClient, "timing") 123 | def test_timing_manager(mocked_timing: MagicMock, stats_client: StatsClient) -> None: 124 | with stats_client.timing_manager(metric="test"): 125 | time.sleep(0.1) 126 | 127 | mocked_timing.assert_called_once_with("test", ANY, {"success": "1"}) 128 | 129 | with pytest.raises(ValueError, match="random error"): 130 | with stats_client.timing_manager(metric="test2"): 131 | raise ValueError("random error") 132 | 133 | mocked_timing.assert_called_with("test2", ANY, {"success": "0"}) 134 | -------------------------------------------------------------------------------- /test/test_table.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 23 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from myhoard.table import escape_identifier, Table 3 | 4 | 5 | def test_create_table_from_row() -> None: 6 | table = Table.from_row( 7 | { 8 | "TABLE_SCHEMA": "schema", 9 | "TABLE_NAME": "name", 10 | "TABLE_ROWS": 123456, 11 | "AVG_ROW_LENGTH": 100, 12 | } 13 | ) 14 | assert table.table_schema == "schema" 15 | assert table.table_name == "name" 16 | assert table.table_rows == 123456 17 | assert table.avg_row_length == 100 18 | 19 | 20 | def test_table_estimated_size_bytes() -> None: 21 | table = Table(table_schema="schema", table_name="name", table_rows=10, avg_row_length=20) 22 | assert table.estimated_size_bytes() == 200 23 | 24 | 25 | def test_table_escaped_designator() -> None: 26 | table = Table(table_schema="bad`sch``ema", table_name="bad`name", table_rows=10, avg_row_length=20) 27 | assert table.escaped_designator() == "`bad``sch````ema`.`bad``name`" 28 | 29 | 30 | def test_escape_identifier() -> None: 31 | assert escape_identifier("name") == "`name`" 32 | assert escape_identifier("na`me") == "`na``me`" 33 | assert escape_identifier("na``me") == "`na````me`" 34 | -------------------------------------------------------------------------------- /test/test_web_server.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from . import awhile_asserts, while_asserts 3 | from myhoard.backup_stream import BackupStream 4 | from myhoard.controller import Controller 5 | from myhoard.errors import BadRequest 6 | from myhoard.restore_coordinator import RestoreCoordinator 7 | from myhoard.web_server import WebServer 8 | 9 | import datetime 10 | import pytest 11 | import uuid 12 | 13 | pytestmark = [pytest.mark.unittest, pytest.mark.all] 14 | 15 | 16 | async def test_backup_create(master_controller, web_client): 17 | controller = master_controller[0] 18 | 19 | controller.switch_to_active_mode() 20 | controller.start() 21 | 22 | def is_streaming_binlogs(): 23 | assert controller.backup_streams 24 | assert controller.backup_streams[0].active_phase == BackupStream.ActivePhase.binlog 25 | 26 | while_asserts(is_streaming_binlogs, timeout=15) 27 | 28 | log_count_before = len(controller.backup_streams[0].remote_binlogs) 29 | await post_and_verify_json_body( 30 | web_client, "/backup", {"backup_type": WebServer.BackupType.binlog, "wait_for_upload": 1} 31 | ) 32 | log_count_after = len(controller.backup_streams[0].remote_binlogs) 33 | assert log_count_after > log_count_before 34 | 35 | await post_and_verify_json_body(web_client, "/backup", {"backup_type": WebServer.BackupType.binlog}) 36 | 37 | await post_and_verify_json_body(web_client, "/backup", {}, expected_status=400) 38 | 39 | await post_and_verify_json_body(web_client, "/backup", {"backup_type": WebServer.BackupType.basebackup}) 40 | 41 | async def has_two_backups(): 42 | response = await get_and_verify_json_body(web_client, "/backup") 43 | assert response["backups"] 44 | assert len(response["backups"]) == 2 45 | 46 | await awhile_asserts(has_two_backups, timeout=15) 47 | 48 | 49 | async def test_backup_list(master_controller, web_client): 50 | controller = master_controller[0] 51 | response = await get_and_verify_json_body(web_client, "/backup") 52 | # backups is None when backend hasn't listed backups from file storage yet 53 | assert response == {"backups": None} 54 | 55 | async def backup_list_not_none(): 56 | assert (await get_and_verify_json_body(web_client, "/backup"))["backups"] is not None 57 | 58 | controller.start() 59 | # Backups is empty list when backups have been listed but there are none 60 | await awhile_asserts(backup_list_not_none) 61 | 62 | def is_streaming_binlogs(): 63 | assert controller.backup_streams 64 | assert controller.backup_streams[0].is_streaming_binlogs() 65 | 66 | # Switching to active mode causes new backup to be created, which should be returned in listing soon 67 | controller.switch_to_active_mode() 68 | while_asserts(is_streaming_binlogs, timeout=15) 69 | 70 | async def has_backup(): 71 | response = await get_and_verify_json_body(web_client, "/backup") 72 | assert response["backups"] 73 | assert len(response["backups"]) == 1 74 | backup = response["backups"][0] 75 | expected = { 76 | "basebackup_info", 77 | "broken_at", 78 | "closed_at", 79 | "completed_at", 80 | "preserve_until", 81 | "recovery_site", 82 | "resumable", 83 | "site", 84 | "stream_id", 85 | } 86 | assert set(backup) == expected 87 | 88 | await awhile_asserts(has_backup) 89 | 90 | 91 | async def test_replication_state_set(master_controller, web_client): 92 | controller = master_controller[0] 93 | state = { 94 | "server-1": { 95 | "eff55bc8-dec8-45f6-bf9f-149228c08671": [[1, 4], [7, 89]], 96 | } 97 | } 98 | response = await put_and_verify_json_body(web_client, "/replication_state", state) 99 | assert response == state 100 | assert controller.state["replication_state"] == state 101 | await put_and_verify_json_body(web_client, "/replication_state", {"foo": "bar"}, expected_status=400) 102 | 103 | 104 | async def test_status_show(master_controller, web_client): 105 | controller = master_controller[0] 106 | response = await get_and_verify_json_body(web_client, "/status") 107 | assert response["mode"] == Controller.Mode.idle 108 | controller.switch_to_active_mode() 109 | response = await get_and_verify_json_body(web_client, "/status") 110 | assert response["mode"] == Controller.Mode.promote 111 | 112 | 113 | async def test_status_update_to_active(master_controller, web_client): 114 | controller = master_controller[0] 115 | response = await put_and_verify_json_body(web_client, "/status", {"mode": "active"}) 116 | assert response["mode"] == Controller.Mode.promote 117 | assert controller.mode == Controller.Mode.promote 118 | 119 | response = await put_and_verify_json_body(web_client, "/status", {"force": True, "mode": "active"}, expected_status=400) 120 | assert response["message"] == "Can only force promotion while waiting for binlogs to be applied" 121 | 122 | response = await put_and_verify_json_body(web_client, "/status", {"mode": Controller.Mode.observe}, expected_status=400) 123 | assert response["message"] == "Switch from promote to observe mode is not allowed" 124 | 125 | 126 | async def test_status_update_to_observe(master_controller, web_client): 127 | controller = master_controller[0] 128 | response = await put_and_verify_json_body(web_client, "/status", {"mode": "observe"}) 129 | assert response["mode"] == Controller.Mode.observe 130 | assert controller.mode == Controller.Mode.observe 131 | 132 | 133 | async def test_status_update_to_restore(master_controller, web_client): 134 | response = await put_and_verify_json_body( 135 | web_client, "/status", {"mode": "restore", "site": "default", "stream_id": "abc"}, expected_status=400 136 | ) 137 | assert response["message"] == "Requested backup 'abc' for site 'default' not found" 138 | 139 | response = await put_and_verify_json_body( 140 | web_client, "/status", {"mode": "restore", "site": "default"}, expected_status=400 141 | ) 142 | assert response["message"] == "Field 'stream_id' must be given and a string" 143 | 144 | response = await put_and_verify_json_body( 145 | web_client, 146 | "/status", 147 | {"mode": "restore", "site": "default", "stream_id": "abc", "target_time": "foo"}, 148 | expected_status=400, 149 | ) 150 | assert response["message"] == "Field 'target_time' must be an integer when present" 151 | 152 | response = await put_and_verify_json_body( 153 | web_client, 154 | "/status", 155 | {"mode": "restore", "rebuild_tables": "foo", "site": "default", "stream_id": "abc"}, 156 | expected_status=400, 157 | ) 158 | assert response["message"] == "Field 'rebuild_tables' must be a boolean when present" 159 | 160 | async def restore_status_returned(): 161 | response = await get_and_verify_json_body(web_client, "/status/restore") 162 | assert isinstance(response["basebackup_compressed_bytes_downloaded"], int) 163 | assert isinstance(response["basebackup_compressed_bytes_total"], int) 164 | assert isinstance(response["binlogs_being_restored"], int) 165 | assert isinstance(response["binlogs_pending"], int) 166 | assert isinstance(response["binlogs_restored"], int) 167 | # Operation will fail because we faked the backup info 168 | assert response["phase"] != RestoreCoordinator.Phase.failed 169 | 170 | master_controller[0].state["backups"].append( 171 | {"stream_id": "abc", "site": "default", "basebackup_info": {"end_ts": 1234567}} 172 | ) 173 | await put_and_verify_json_body(web_client, "/status", {"mode": "restore", "site": "default", "stream_id": "abc"}) 174 | master_controller[0].start() 175 | await awhile_asserts(restore_status_returned, timeout=2) 176 | 177 | 178 | async def test_backup_preserve_wrong_stream_id(web_client) -> None: 179 | preserve_until = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(days=1) 180 | response = await put_and_verify_json_body( 181 | web_client, 182 | "/backup/1234/preserve", 183 | {"preserve_until": preserve_until.isoformat()}, 184 | expected_status=500, 185 | ) 186 | assert response["message"] == "Stream 1234 was not found in completed backups." 187 | 188 | 189 | async def test_backup_preserve_wrong_preserve_until(web_client) -> None: 190 | response = await put_and_verify_json_body( 191 | web_client, 192 | "/backup/1234/preserve", 193 | {"preserve_until": "invalid_value"}, 194 | expected_status=400, 195 | ) 196 | assert response["message"] == "`preserve_until` must be a valid isoformat datetime string." 197 | 198 | response = await put_and_verify_json_body( 199 | web_client, 200 | "/backup/1234/preserve", 201 | {"preserve_until": "2023-09-01T00:00:00"}, 202 | expected_status=400, 203 | ) 204 | assert response["message"] == "`preserve_until` must be in UTC timezone." 205 | 206 | response = await put_and_verify_json_body( 207 | web_client, 208 | "/backup/1234/preserve", 209 | {"preserve_until": "2023-09-01T00:00:00+00:00"}, 210 | expected_status=400, 211 | ) 212 | assert response["message"] == "`preserve_until` must be a date in the future." 213 | 214 | 215 | async def get_and_verify_json_body(client, path, *, expected_status=200): 216 | response = await client.get(path) 217 | response_json = await response.json() 218 | assert response.status == expected_status, f"{response.status} != {expected_status}: {response_json}" 219 | return response_json 220 | 221 | 222 | async def post_and_verify_json_body(client, path, body, *, expected_status=200): 223 | response = await client.post(path, json=body) 224 | response_json = await response.json() 225 | assert response.status == expected_status, f"{response.status} != {expected_status}: {response_json}" 226 | return response_json 227 | 228 | 229 | async def put_and_verify_json_body(client, path, body, *, expected_status=200): 230 | response = await client.put(path, json=body) 231 | response_json = await response.json() 232 | assert response.status == expected_status, f"{response.status} != {expected_status}: {response_json}" 233 | return response_json 234 | 235 | 236 | def test_validate_replication_state(): 237 | uuid1 = str(uuid.uuid4()) 238 | uuid2 = str(uuid.uuid1()) 239 | WebServer.validate_replication_state({}) # No values is valid value 240 | with pytest.raises(BadRequest): 241 | WebServer.validate_replication_state("foo") 242 | WebServer.validate_replication_state({"foo": {}}) # Server with empty GTID set is valid 243 | with pytest.raises(BadRequest): 244 | WebServer.validate_replication_state({"foo": "bar"}) 245 | with pytest.raises(BadRequest): 246 | WebServer.validate_replication_state({"foo": {"bar": "zob"}}) 247 | with pytest.raises(BadRequest): 248 | WebServer.validate_replication_state({"foo": {"bar": []}}) 249 | WebServer.validate_replication_state({"foo": {uuid1: []}}) 250 | with pytest.raises(BadRequest): 251 | WebServer.validate_replication_state({"foo": {uuid1: ["abc"]}}) 252 | with pytest.raises(BadRequest): 253 | WebServer.validate_replication_state({"foo": {uuid1: [["abc"]]}}) 254 | with pytest.raises(BadRequest): 255 | WebServer.validate_replication_state({"foo": {uuid1: [[1]]}}) 256 | WebServer.validate_replication_state({"foo": {uuid1: [[1, 2]]}}) 257 | WebServer.validate_replication_state({"foo": {uuid1: [[1, 2], [3, 4]]}}) 258 | WebServer.validate_replication_state({"foo": {uuid1: [[1, 2], [3, 4]]}, "zob": {}}) 259 | WebServer.validate_replication_state({"foo": {uuid1: [[1, 2], [3, 4]]}, "zob": {uuid2: []}}) 260 | --------------------------------------------------------------------------------