├── .flake8 ├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── codeql-analysis.yml │ ├── lint.yml │ └── tests.yml ├── .gitignore ├── .isort.cfg ├── .pylintrc ├── .style.yapf ├── LICENSE ├── Makefile ├── README.md ├── SECURITY.md ├── aiven_db_migrate ├── __init__.py └── migrate │ ├── __init__.py │ ├── __main__.py │ ├── errors.py │ ├── pgmigrate.py │ ├── pgutils.py │ └── version.py ├── ci └── install.sh ├── mypy.ini ├── requirements-dev.txt ├── setup.py └── test ├── __init__.py ├── conftest.py ├── test_db_size_check.py ├── test_force_method.py ├── test_main.py ├── test_migrate_checks.py ├── test_pg_cluster.py ├── test_pg_dump.py ├── test_pg_extensions.py ├── test_pg_migrate.py ├── test_pg_replication.py ├── test_pg_roles.py ├── test_table_filtering.py ├── test_utils.py └── utils.py /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = 3 | 4 | max-line-length = 125 5 | ignore = 6 | E123, # Closing brackets indent 7 | E126, # Hanging indents 8 | E129, # Visual indent 9 | E722, # bare-except 10 | W503, # Breaks & binary operators 11 | W504, # line break after binary operator (conflicts with our yapf style) 12 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @aiven/team-brute-force 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ### Description of the issue 2 | Describe the issue you are experiencing. 3 | 4 | ### Issue-Type (put a `x` sign in the square brackets) 5 | - [ ] bug report 6 | - [ ] feature request 7 | - [ ] Documentation improvement 8 | - [ ] Other 9 | 10 | ### Checklist 11 | - [ ] Running latest version of code. 12 | - [ ] This issue has not been reported earlier. 13 | 14 | ### Your environment 15 | * OS 16 | * Python Version 17 | * Release tag/commit of the code 18 | 19 | ### Expected behaviour 20 | What should happen? 21 | 22 | ### Actual behaviour 23 | What is actually happening? 24 | 25 | ### Steps to reproduce 26 | 27 | ### Any extra info ( for eg. code snippet to reproduce, logs, screenshots etc. ) 28 | 29 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ### Proposed changes in this pull request 2 | 3 | ### Type (put an `x` where ever applicable) 4 | - [ ] Bug fix: Link to the issue 5 | - [ ] Feature (Non-breaking change) 6 | - [ ] Feature (Breaking change) 7 | - [ ] Documentation Improvement 8 | - [ ] Other 9 | 10 | ### Checklist 11 | Please put an `x` against the checkboxes. Write a small comment explaining if its `N/A` (not applicable) 12 | 13 | - [ ] All the tests are passing after the introduction of new changes. 14 | - [ ] Added tests respective to the part of code I have written. 15 | - [ ] Added proper documentation where ever applicable (in code and README.md). 16 | 17 | ### Optional extra information 18 | 19 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ master ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ master ] 20 | schedule: 21 | - cron: '42 20 * * 6' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ubuntu-latest 27 | 28 | strategy: 29 | fail-fast: false 30 | matrix: 31 | language: [ 'python' ] 32 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] 33 | # Learn more: 34 | # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed 35 | 36 | steps: 37 | - name: Checkout repository 38 | uses: actions/checkout@v3 39 | 40 | # Initializes the CodeQL tools for scanning. 41 | - name: Initialize CodeQL 42 | uses: github/codeql-action/init@v2 43 | with: 44 | languages: ${{ matrix.language }} 45 | # If you wish to specify custom queries, you can do so here or in a config file. 46 | # By default, queries listed here will override any specified in a config file. 47 | # Prefix the list here with "+" to use these queries and those in the config file. 48 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 49 | 50 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 51 | # If this step fails, then you should remove it and run the build manually (see below) 52 | - name: Autobuild 53 | uses: github/codeql-action/autobuild@v2 54 | 55 | # ℹ️ Command-line programs to run using the OS shell. 56 | # 📚 https://git.io/JvXDl 57 | 58 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 59 | # and modify them (or add more) to build your code if your project 60 | # uses a compiled language 61 | 62 | #- run: | 63 | # make bootstrap 64 | # make release 65 | 66 | - name: Perform CodeQL Analysis 67 | uses: github/codeql-action/analyze@v2 68 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | lint: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: [ "3.10", "3.11", "3.12" ] 11 | steps: 12 | - uses: actions/checkout@v3 13 | 14 | - name: Set up Python ${{ matrix.python-version }} 15 | uses: actions/setup-python@v4 16 | with: 17 | python-version: ${{ matrix.python-version }} 18 | 19 | - name: Install dependencies 20 | run: pip install -r requirements-dev.txt 21 | 22 | - name: Run checks 23 | run: make validate-style 24 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Test Suite 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | unit-test: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: [ "3.10", "3.11", "3.12" ] 11 | steps: 12 | - uses: actions/checkout@v3 13 | 14 | - name: Set up Python ${{ matrix.python-version }} 15 | uses: actions/setup-python@v4 16 | with: 17 | python-version: ${{ matrix.python-version }} 18 | 19 | - name: Setup container 20 | run: sudo ./ci/install.sh 21 | 22 | - name: Install dependencies 23 | run: pip install -r requirements-dev.txt 24 | 25 | - name: Execute unit-tests 26 | run: make test 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | .pyc 3 | *.rpm 4 | .cache/ 5 | .mypy_cache/ 6 | .pytest_cache/ 7 | __pycache__ 8 | /aiven_db_migrate.egg-info 9 | /aiven/migrate/version.py 10 | build 11 | dist 12 | .venv* 13 | venv 14 | .vscode/* 15 | 16 | .idea 17 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | line_length = 125 3 | force_alphabetical_sort = true 4 | combine_as_imports = true 5 | no_sections = true 6 | skip_glob = 7 | multi_line_output = 5 8 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | load-plugins=pylint_quotes 3 | 4 | string-quote=double-avoid-escape 5 | triple-quote=double 6 | docstring-quote=double 7 | 8 | [MESSAGES CONTROL] 9 | disable= 10 | abstract-method, 11 | bad-continuation, 12 | chained-comparison, # R1716: Simplify chained comparison between the operands 13 | duplicate-code, 14 | fixme, 15 | invalid-name, 16 | len-as-condition, 17 | missing-docstring, 18 | no-else-return, 19 | no-else-raise, 20 | no-self-use, 21 | superfluous-parens, 22 | too-few-public-methods, 23 | too-many-ancestors, 24 | too-many-arguments, 25 | too-many-boolean-expressions, 26 | too-many-branches, 27 | too-many-instance-attributes, 28 | too-many-lines, 29 | too-many-locals, 30 | too-many-nested-blocks, 31 | too-many-public-methods, 32 | too-many-statements, 33 | ungrouped-imports, 34 | unused-argument, 35 | wrong-import-order, 36 | line-too-long, 37 | no-else-continue, 38 | no-else-break, 39 | import-outside-toplevel 40 | 41 | [FORMAT] 42 | max-line-length=125 43 | 44 | [REPORTS] 45 | output-format=text 46 | reports=no 47 | score=no 48 | 49 | [TYPECHECK] 50 | -------------------------------------------------------------------------------- /.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | # For docs, see https://github.com/google/yapf/blob/master/README.rst 3 | 4 | based_on_style = pep8 5 | # Disallow splitting between dict key and dict value in multiline {"key": "value"} lines 6 | ALLOW_SPLIT_BEFORE_DICT_VALUE = false 7 | 8 | # Avoid adding unnecessary blank lines when nesting 9 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = false 10 | 11 | # Always add two blank lines for top-level classes and methods 12 | BLANK_LINES_AROUND_TOP_LEVEL_DEFINITION = 2 13 | 14 | # These two combine consecutive ({ and }) to same line to reduce clutter 15 | COALESCE_BRACKETS = true 16 | DEDENT_CLOSING_BRACKETS = true 17 | 18 | # Line length 19 | COLUMN_LIMIT = 125 20 | 21 | # Try to avoid having overly long lines by having excessively large penalty for that. 22 | SPLIT_PENALTY_EXCESS_CHARACTER = 1000000000 23 | 24 | # Always split dict entries to one entry per line 25 | # EACH_DICT_ENTRY_ON_SEPARATE_LINE = true 26 | 27 | # Never split this comment to a separate line. Workaround for certain flake8 & email template lines 28 | I18N_COMMENT = # noqa 29 | 30 | # Allow automatically joining lines, for example, multiline if that would fit to a single line 31 | JOIN_MULTIPLE_LINES = true 32 | 33 | # "3 * 5", instead of "3*5" 34 | SPACES_AROUND_POWER_OPERATOR = true 35 | 36 | # Follow normal comment style by adding two spaces between code and comment 37 | SPACES_BEFORE_COMMENT = 2 38 | 39 | # If list of items is comma terminated, always split to one per line. 40 | SPLIT_ARGUMENTS_WHEN_COMMA_TERMINATED = true 41 | 42 | # Related to previous one, if list of items (args or dict/list/...) needs to be split, split to one per line. 43 | # SPLIT_ALL_COMMA_SEPARATED_VALUES = true 44 | 45 | # Split dict generators for clarity (add line breaks between { and key: val etc. 46 | SPLIT_BEFORE_DICT_SET_GENERATOR = true 47 | 48 | # Split method(k1=v1, k2=v2...) to separate lines 49 | SPLIT_BEFORE_NAMED_ASSIGNS = true 50 | 51 | # For complex (for some definition of complex) comprehensions, put output, for and if to separate lines 52 | SPLIT_COMPLEX_COMPREHENSION = true 53 | 54 | # When splitting something to multiple lines ('method(\n val...'), intend by 4 55 | CONTINUATION_INDENT_WIDTH = 4 56 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | PYTHON ?= python3 3 | PYTHON_SOURCE_DIRS = aiven_db_migrate/ test/ 4 | PG_VERSIONS = 13 14 15 16 17 5 | 6 | generated = aiven_db_migrate/migrate/version.py 7 | 8 | 9 | all: $(generated) 10 | 11 | aiven_db_migrate/migrate/version.py: 12 | echo "__version__ = \"$(shell git describe)\"" > $@ 13 | 14 | build-dep-fedora: 15 | sudo dnf -y install --best --allowerasing \ 16 | $(foreach ver,$(PG_VERSIONS),postgresql$(ver)-server) \ 17 | python3-flake8 \ 18 | python3-isort \ 19 | python3-mypy \ 20 | python3-psycopg2 \ 21 | python3-pylint \ 22 | python3-pytest \ 23 | python3-yapf \ 24 | rpm-build 25 | 26 | flake8: $(generated) 27 | $(PYTHON) -m flake8 $(PYTHON_SOURCE_DIRS) 28 | 29 | pylint: $(generated) 30 | $(PYTHON) -m pylint --rcfile .pylintrc $(PYTHON_SOURCE_DIRS) 31 | 32 | mypy: $(generated) 33 | $(PYTHON) -m mypy $(PYTHON_SOURCE_DIRS) 34 | 35 | isort: $(generated) 36 | $(PYTHON) -m isort $(PYTHON_SOURCE_DIRS) 37 | 38 | yapf: $(generated) 39 | $(PYTHON) -m yapf --parallel --recursive --in-place $(PYTHON_SOURCE_DIRS) 40 | 41 | static-checks: flake8 pylint mypy 42 | 43 | validate-style: 44 | $(eval CHANGES_BEFORE := $(shell mktemp)) 45 | git diff > $(CHANGES_BEFORE) 46 | $(MAKE) isort yapf 47 | $(eval CHANGES_AFTER := $(shell mktemp)) 48 | git diff > $(CHANGES_AFTER) 49 | diff $(CHANGES_BEFORE) $(CHANGES_AFTER) 50 | -rm $(CHANGES_BEFORE) $(CHANGES_AFTER) 51 | 52 | .PHONY: test 53 | test: $(generated) 54 | $(PYTHON) -m pytest -vv -r test 55 | 56 | clean: 57 | $(RM) aiven_db_migrate/migrate/version.py 58 | 59 | rpm: 60 | sudo $(PYTHON) setup.py bdist_rpm && rm -rf build/ 61 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # aiven-db-migrate 2 | 3 | Aiven database migration tool. This tool is meant for easy migration of databases from some database service 4 | provider, such AWS RDS, or on premises data center, to [Aiven Database as a Service](https://aiven.io/). 5 | However, it's not limited for Aiven services and it might be useful as a generic database migration tool. 6 | 7 | Usually database service providers, such as Aiven, AWS RDS, and alike, don't allow superuser/root access. 8 | Instead the service's master/admin user is granted permissions for the most common DBA tasks, see e.g. 9 | https://help.aiven.io/en/articles/489557-postgresql-superuser-access. 10 | In addition, service provider's web console/API can be used for performing some DBA tasks requiring more privileges than 11 | granted for the master/admin user. However, the missing superuser access makes some existing database migrating tools, 12 | such as `pg_dumpall`, not useful when migrating database to/from service provider. 13 | 14 | Currently this tool supports only PostgreSQL but we aim to add support for other databases, such as MySQL. 15 | 16 | Requires Python 3.10 or newer. 17 | 18 | ## Usage 19 | 20 | Running library module: 21 | ``` 22 | $ python3 -m aiven_db_migrate.migrate -h 23 | Available commands: pg 24 | ``` 25 | 26 | Installing in virtualenv: 27 | ``` 28 | $ python3 -m venv venv 29 | $ . venv/bin/activate 30 | $ ## Run make to set the proper version 31 | $ make 32 | $ pip install . 33 | ``` 34 | 35 | This installs console scripts which have the same interface as the library module: 36 | * `pg_migrate`: PostgreSQL migration 37 | 38 | ## PostgreSQL 39 | 40 | Requirements: 41 | * `pg_dump`: from any PostgreSQL version between the source and target versions 42 | * `psql`: any modern version should work 43 | 44 | Run library module: 45 | ``` 46 | $ python3 -m aiven_db_migrate.migrate pg -h 47 | ``` 48 | or, if installed: 49 | ``` 50 | $ pg_migrate -h 51 | ``` 52 | 53 | Migrating is supported to the same or newer PostgreSQL version starting from PostgreSQL 10 to PostgreSQL 14. 54 | Migrating to older version is not supported. 55 | 56 | By default it searches `pg_dump` under `/usr/`, when using PostgreSQL installs on different directory such as on Mac, use `--pgbin` parameter to define PostgreSQL home directory. e,g, 57 | ``` 58 | --pgbin /Applications/Postgres.app/Contents/Versions/14/bin 59 | ``` 60 | 61 | Supports regular data dump (`pg_dump`) and [logical replication](https://www.postgresql.org/docs/current/logical-replication.html) (PostgreSQL 10 or newer). 62 | In case that logical replication is not available or privileges/requirements are missing migrating falls back to 63 | data dump. 64 | 65 | ### CLI example 66 | 67 | Migrating from AWS RDS to Aiven for PostgreSQL. Logical replication is enabled in source AWS RDS PostgreSQL 68 | server and `aiven-extras` extension is installed in target database. 69 | 70 | ``` 71 | $ pg_migrate -s "postgres://postgres:@jappja-pg1.chfhzaircbpb.eu-west-1.rds.amazonaws.com:5432/defaultdb" -t "postgres://avnadmin:@pg1-test-jappja-test.avns.net:26192/defaultdb?sslmode=require" 72 | 73 | # Or: 74 | $ SOURCE_SERVICE_URI="postgres://postgres:@jappja-pg1.chfhzaircbpb.eu-west-1.rds.amazonaws.com:5432/defaultdb" TARGET_SERVICE_URI="postgres://avnadmin:@pg1-test-jappja-test.avns.net:26192/defaultdb?sslmode=require" pg_migrate 75 | ... 76 | 77 | Roles: 78 | rolname: 'rdsadmin', rolpassword: None, status: 'failed', message: 'must be superuser to create superusers' 79 | rolname: 'rds_password', rolpassword: None, status: 'created', message: 'role created' 80 | rolname: 'rds_superuser', rolpassword: None, status: 'created', message: 'role created' 81 | rolname: 'test_user1', rolpassword: 'placeholder_kfbqrvmdhgrpgpvy', status: 'created', message: 'role created' 82 | rolname: 'rds_ad', rolpassword: None, status: 'created', message: 'role created' 83 | rolname: 'rds_iam', rolpassword: None, status: 'created', message: 'role created' 84 | rolname: 'rds_replication', rolpassword: None, status: 'created', message: 'role created' 85 | rolname: 'rdsrepladmin', rolpassword: None, status: 'failed', message: 'must be superuser to create replication users' 86 | rolname: 'postgres', rolpassword: None, status: 'exists', message: 'role already exists' 87 | rolname: 'test_user2', rolpassword: None, status: 'created', message: 'role created' 88 | 89 | Databases: 90 | dbaname: 'rdsadmin', method: None, status: 'failed', message: 'FATAL: pg_hba.conf rejects connection for host "80.220.195.174", user "postgres", database "rdsadmin", SSL on\nFATAL: pg_hba.conf rejects connection for host "80.220.195.174", user "postgres", database "rdsadmin", SSL off\n' 91 | dbaname: 'defaultdb', method: 'replication', status: 'running', message: 'migrated to existing database' 92 | ``` 93 | 94 | By default logical replication is left running and the created pub/sub objects need to be cleaned up once workloads have been 95 | moved to the new server. Objects created by this tool are named like `aiven_db_migrate__`. 96 | 97 | Starting from the target (using `aiven-extras` extension), get first the subscription name: 98 | ``` 99 | defaultdb= > SELECT * FROM aiven_extras.pg_list_all_subscriptions(); 100 | ``` 101 | and then drop it: 102 | ``` 103 | defaultdb= > SELECT * FROM aiven_extras.pg_drop_subscription('aiven_db_migrate_defaultdb_sub'); 104 | ``` 105 | 106 | Note that with `aiven-extras` dropping subscription in target also drops replication slot in source (`dblink`). 107 | 108 | In the source get first the publication name: 109 | ``` 110 | defaultdb=> SELECT * FROM pg_publication; 111 | ``` 112 | and then drop it: 113 | ``` 114 | defaultdb=> DROP PUBLICATION aiven_db_migrate_defaultdb_pub; 115 | ``` 116 | 117 | In case that `aiven-extras` is not used clean up replication slot too: 118 | ``` 119 | defaultdb=> SELECT * FROM pg_replication_slots; 120 | defaultdb=> SELECT * FROM pg_drop_replication_slot('aiven_db_migrate_defaultdb_slot'); 121 | ``` 122 | 123 | Using `--max-replication-lag` waits until replication lag in bytes is less than/equal to given max replication lag. This 124 | can be used together with `--stop-replication` to clean up all created pub/sub objects when replication is done. 125 | 126 | With `--validate` only best effort validation is run. This checks e.g. PL/pgSQL languages, extensions etc. installed 127 | in source are also installed/available in target. 128 | 129 | Use `--no-replicate-extension-tables` to skip extension tables. By default it attempts to replicate all extension tables during logical replication. 130 | 131 | With `--force-method` you can specify if you wish to use either replication or dump method. Otherwise the most suitable method is chosen automatically. 132 | 133 | Using `--dbs-max-total-size` together with `--validate` you can check if the size of the source database in below some threshold. 134 | 135 | ### API example 136 | 137 | Migrating from AWS RDS to Aiven for PostgreSQL. Logical replication is enabled in source AWS RDS PostgreSQL 138 | server but `aiven-extras` extension is not installed in target database so migrating falls back to data dump. 139 | 140 | ``` 141 | >>> from aiven.migrate import PGMigrate, PGMigrateResult 142 | >>> pg_mig = PGMigrate(source_conn_info="postgres://postgres:@jappja-pg1.chfhzaircbpb.eu-west-1.rds.amazonaws.com:5432/defaultdb", target_conn_info="postgres://avnadmin:@pg2-test-jappja-test.avns.net:26192/defaultdb?sslmode=require") 143 | >>> result: PGMigrateResult = pg_mig.migrate() 144 | ... 145 | Logical replication failed with error: 'must be superuser to create subscriptions', fallback to dump 146 | >>> result 147 | PGMigrateResult(pg_databases={'rdsadmin': {'dbname': 'rdsadmin', 'message': 'FATAL: pg_hba.conf rejects connection for host "80.220.195.174", user "postgres", database "rdsadmin", SSL on\nFATAL: pg_hba.conf rejects connection for host "80.220.195.174", user "postgres", database "rdsadmin", SSL off\n', 'method': None, 'status': 'failed'}, 'defaultdb': {'dbname': 'defaultdb', 'message': 'migrated to existing database', 'method': 'dump', 'status': 'done'}}, pg_roles={'rdsadmin': {'message': 'must be superuser to create superusers', 'rolname': 'rdsadmin', 'rolpassword': None, 'status': 'failed'}, 'rds_password': {'message': 'role created', 'rolname': 'rds_password', 'rolpassword': None, 'status': 'created'}, 'rds_superuser': {'message': 'role created', 'rolname': 'rds_superuser', 'rolpassword': None, 'status': 'created'}, 'test_user1': {'message': 'role created', 'rolname': 'test_user1', 'rolpassword': 'placeholder_qkdryldfsrdaocio', 'status': 'created'}, 'rds_ad': {'message': 'role created', 'rolname': 'rds_ad', 'rolpassword': None, 'status': 'created'}, 'rds_iam': {'message': 'role created', 'rolname': 'rds_iam', 'rolpassword': None, 'status': 'created'}, 'rds_replication': {'message': 'role created', 'rolname': 'rds_replication', 'rolpassword': None, 'status': 'created'}, 'rdsrepladmin': {'message': 'must be superuser to create replication users', 'rolname': 'rdsrepladmin', 'rolpassword': None, 'status': 'failed'}, 'postgres': {'message': 'role already exists', 'rolname': 'postgres', 'rolpassword': None, 'status': 'exists'}, 'test_user2': {'message': 'role created', 'rolname': 'test_user2', 'rolpassword': None, 'status': 'created'}}) 148 | ``` 149 | 150 | ### Logical replication 151 | * requires PostgreSQL 10 or newer 152 | * `wal_level` needs to be `logical` 153 | * currently supports only FOR ALL TABLES publication in source 154 | * [aiven-extras](https://github.com/aiven/aiven-extras) extension installed in both source and target database, or 155 | * superuser or superuser-like privileges, such as `rds_replication` role in AWS RDS, in both source and target 156 | * [AWS RDS additional settings/info](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/CHAP_PostgreSQL.html#PostgreSQL.Concepts.General.FeatureSupport.LogicalReplication) 157 | 158 | #### Warning 159 | 160 | ⚠️ Large objects are [unable to be replicated using logical replication](https://www.postgresql.org/docs/15/logical-replication-restrictions.html), up to and including PostgreSQL 15. 161 | 162 | ### Schemas 163 | * schemas are migrated without object ownership; the user used for migration is given all object ownership 164 | * NOTE: schema changes break logical replication 165 | 166 | ### Roles 167 | * roles with `LOGIN` attribute are migrated using placeholder passwords: `placeholder_<16 char random string>` 168 | * migrating superuser or replication roles requires superuser privileges 169 | 170 | ### Extensions 171 | * requires whitelisting the extension in target with [pgextwlist](https://github.com/dimitri/pgextwlist), or 172 | * superuser or superuser-like privileges 173 | * [Aiven for PostgreSQL supported extensions](https://help.aiven.io/en/articles/489561-supported-postgresql-extensions) 174 | 175 | ## Development 176 | 177 | Install build depends (Fedora): 178 | ``` 179 | $ make build-dep-fedora 180 | ``` 181 | 182 | Style checks: 183 | ``` 184 | $ make validate-style 185 | ``` 186 | 187 | Fix style errors with: 188 | ``` 189 | $ make isort 190 | $ make yapf 191 | ``` 192 | 193 | Static checks (`flake8`, `pylint` and `mypy`): 194 | ``` 195 | $ make static-checks 196 | ``` 197 | 198 | Tests (`pytest`): 199 | ``` 200 | $ make test 201 | ``` 202 | 203 | Running whole test set takes time since all supported migration paths are tested. During development it's usually enough 204 | to run tests only for a certain PostgreSQL version, e.g.: 205 | ``` 206 | $ PG_VERSION="12" make test 207 | ``` 208 | 209 | It's also possible to test migration from one PostgreSQL version to another, e.g.: 210 | ``` 211 | $ PG_SOURCE_VERSION="10" PG_TARGET_VERSION="12" make test 212 | ``` 213 | 214 | Test set can be targeted even further by invoking `pytest`, e.g.: 215 | ``` 216 | $ PG_SOURCE_VERSION="10" PG_TARGET_VERSION="12" python3 -m pytest -s test/test_pg_migrate.py::Test_PGMigrate::test_migrate 217 | ``` 218 | 219 | # TODO 220 | 221 | * JSON output with CLI (for automation) 222 | * Hard to make pg_dump silent for outputting JSON to stdout 223 | * Output json to file instead? 224 | * More options 225 | * --dump-only, --repl-only 226 | * --include-databases, --exclude-databases 227 | * --include-tables, --exclude-tables 228 | * --role-passwords (role/passwords file for creating roles with real passwords instead of placeholders) 229 | * More tests 230 | * Notably error/corner cases 231 | * Schema changes break logical replication 232 | * While logical replication is running dump schema periodically and check if has changed, 233 | e.g. by calculating hash of the schema dump 234 | * How to continue if schema has changed? Stop replication, dump schema and restart replication? 235 | * Proper README + API doc 236 | * RPM build recipe for aiven-core/prune integration 237 | * Test automation: Jenkins/Github Actions 238 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | We release patches for security vulnerabilities. Which versions are eligible 6 | receiving such patches depend on the CVSS v3.0 Rating: 7 | 8 | | CVSS v3.0 | Supported Versions | 9 | | --------- | ----------------------------------------- | 10 | | 4.0-10.0 | Most recent release | 11 | 12 | ## Reporting a Vulnerability 13 | 14 | Please report (suspected) security vulnerabilities to our **[bug bounty 15 | program](https://bugcrowd.com/aiven-mbb-og)**. You will receive a response from 16 | us within 2 working days. If the issue is confirmed, we will release a patch as 17 | soon as possible depending on impact and complexity. 18 | 19 | ## Qualifying Vulnerabilities 20 | 21 | Any reproducible vulnerability that has a severe effect on the security or 22 | privacy of our users is likely to be in scope for the program. 23 | 24 | We generally **aren't** interested in the following issues: 25 | * Social engineering (e.g. phishing, vishing, smishing) attacks 26 | * Brute force, DoS, text injection 27 | * Missing best practices such as HTTP security headers (CSP, X-XSS, etc.), 28 | email (SPF/DKIM/DMARC records), SSL/TLS configuration. 29 | * Software version disclosure / Banner identification issues / Descriptive 30 | error messages or headers (e.g. stack traces, application or server errors). 31 | * Clickjacking on pages with no sensitive actions 32 | * Theoretical vulnerabilities where you can't demonstrate a significant 33 | security impact with a proof of concept. 34 | -------------------------------------------------------------------------------- /aiven_db_migrate/__init__.py: -------------------------------------------------------------------------------- 1 | from pkgutil import extend_path 2 | 3 | __path__ = extend_path(__path__, __name__) # type: ignore 4 | -------------------------------------------------------------------------------- /aiven_db_migrate/migrate/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Aiven, Helsinki, Finland. https://aiven.io/ 2 | 3 | from .pgmigrate import PGMigrate, PGMigrateResult # noqa 4 | -------------------------------------------------------------------------------- /aiven_db_migrate/migrate/__main__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Aiven, Helsinki, Finland. https://aiven.io/ 2 | 3 | from .pgmigrate import main as pg_main 4 | 5 | 6 | def pg(args_): 7 | pg_main(args_, prog="pg") 8 | 9 | 10 | if __name__ == "__main__": 11 | from typing import Optional 12 | 13 | import sys 14 | 15 | commands = ("pg", ) 16 | args = sys.argv[1:] 17 | c: Optional[str] 18 | if args: 19 | c = args.pop(0) 20 | else: 21 | c = None 22 | 23 | if not c or c not in commands: 24 | print("Available commands: {}".format(", ".join(commands))) 25 | sys.exit(1) 26 | 27 | sys.exit(locals()[c](args)) 28 | -------------------------------------------------------------------------------- /aiven_db_migrate/migrate/errors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Aiven, Helsinki, Finland. https://aiven.io/ 2 | 3 | 4 | class PGDataNotFoundError(Exception): 5 | pass 6 | 7 | 8 | class PGTooMuchDataError(Exception): 9 | pass 10 | 11 | 12 | class PGSchemaDumpFailedError(Exception): 13 | pass 14 | 15 | 16 | class PGDataDumpFailedError(Exception): 17 | pass 18 | 19 | 20 | class PGMigrateValidationFailedError(Exception): 21 | pass 22 | -------------------------------------------------------------------------------- /aiven_db_migrate/migrate/pgutils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from packaging.version import Version 3 | from pathlib import Path 4 | from typing import Any, Dict, Optional 5 | from urllib.parse import parse_qs, urlparse 6 | 7 | import psycopg2 8 | import re 9 | import select 10 | import time 11 | 12 | 13 | def find_pgbin_dir(pgversion: str, *, max_pgversion: Optional[str] = None, usr_dir: Path = Path("/usr")) -> Path: 14 | """ 15 | Returns an existing pgbin directory with a version equal to `pgversion`. 16 | 17 | If `max_pgversion` is specified, returns the oldest existing pgbin directory with a version between 18 | `pgversion` and `max_pgversion` included. 19 | 20 | Versions equal or above 10 only check the major version number: 10, 11, 12, 13... 21 | """ 22 | min_version = list(Version(pgversion).release) 23 | max_version = min_version if max_pgversion is None else list(Version(max_pgversion).release) 24 | max_version = max(max_version, min_version) 25 | max_parts = 1 26 | candidates = [] 27 | search_scopes = [(usr_dir, r"pgsql-([0-9]+(\.[0-9]+)*)"), (usr_dir / "lib/postgresql", r"([0-9]+(\.[0-9]+)*)")] 28 | for base_dir, pattern in search_scopes: 29 | if base_dir.is_dir(): 30 | for path in base_dir.iterdir(): 31 | match = re.search(pattern, path.name) 32 | bin_path = path / "bin" 33 | if match and bin_path.is_dir(): 34 | candidate_version = list(Version(match.group(1)).release) 35 | if min_version[:max_parts] <= candidate_version[:max_parts] <= max_version[:max_parts]: 36 | candidates.append((candidate_version, bin_path)) 37 | candidates.sort() 38 | if candidates: 39 | return candidates[0][1] 40 | search_scope_description = [str(search_scope[0] / search_scope[1] / "bin") for search_scope in search_scopes] 41 | if max_pgversion is not None: 42 | raise ValueError( 43 | "Couldn't find bin dir for any pg version between {!r} and {!r}, tried {!r}".format( 44 | pgversion, max_pgversion, search_scope_description 45 | ) 46 | ) 47 | else: 48 | raise ValueError("Couldn't find bin dir for pg version {!r}, tried {!r}".format(pgversion, search_scope_description)) 49 | 50 | 51 | def validate_pg_identifier_length(ident: str): 52 | length = len(ident) 53 | if length > 63: 54 | raise ValueError(f"PostgreSQL max identifier length is 63, len({ident!r}) = {length}") 55 | 56 | 57 | def create_connection_string(conn_info: Dict[str, Any]) -> str: 58 | return " ".join("{}='{}'".format(k, str(v).replace("'", "\\'")) for k, v in sorted(conn_info.items()) if v) 59 | 60 | 61 | def get_connection_info(info) -> Dict[str, Any]: 62 | """ 63 | Turn a connection info into a dict or return it if it was a dict already. 64 | Supports both the traditional libpq format and postgres:// uri format. 65 | """ 66 | if isinstance(info, dict): 67 | return info.copy() 68 | elif info.startswith("postgres://") or info.startswith("postgresql://"): 69 | return parse_connection_string_url(info) 70 | else: 71 | return parse_connection_string_libpq(info) 72 | 73 | 74 | def parse_connection_string_libpq(connection_string: str) -> Dict[str, Any]: 75 | """ 76 | Parse a postgresql connection string as defined in 77 | http://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING 78 | """ 79 | fields = {} 80 | while True: 81 | connection_string = connection_string.strip() 82 | if not connection_string: 83 | break 84 | if "=" not in connection_string: 85 | raise ValueError("expecting key=value format in connection_string fragment {!r}".format(connection_string)) 86 | key, rem = connection_string.split("=", 1) 87 | if rem.startswith("'"): 88 | asis, value = False, "" 89 | for i in range(1, len(rem)): 90 | if asis: 91 | value += rem[i] 92 | asis = False 93 | elif rem[i] == "'": 94 | break # end of entry 95 | elif rem[i] == "\\": 96 | asis = True 97 | else: 98 | value += rem[i] 99 | else: 100 | raise ValueError("invalid connection_string fragment {!r}".format(rem)) 101 | connection_string = rem[i + 1:] # pylint: disable=undefined-loop-variable 102 | else: 103 | res = rem.split(None, 1) 104 | if len(res) > 1: 105 | value, connection_string = res 106 | else: 107 | value, connection_string = rem, "" 108 | fields[key] = value 109 | return fields 110 | 111 | 112 | def parse_connection_string_url(url: str) -> Dict[str, str]: 113 | if "://" not in url: 114 | url = f"http://{url}" 115 | p = urlparse(url) 116 | fields = {} 117 | if p.hostname: 118 | fields["host"] = p.hostname 119 | if p.port: 120 | fields["port"] = str(p.port) 121 | if p.username: 122 | fields["user"] = p.username 123 | if p.password is not None: 124 | fields["password"] = p.password 125 | if p.path and p.path != "/": 126 | fields["dbname"] = p.path[1:] 127 | for k, v in parse_qs(p.query).items(): 128 | fields[k] = v[-1] 129 | return fields 130 | 131 | 132 | # This enables interruptible queries with an approach similar to 133 | # https://www.psycopg.org/docs/faq.html#faq-interrupt-query 134 | # However, to handle timeouts we can't use psycopg2.extensions.set_wait_callback : 135 | # https://github.com/psycopg/psycopg2/issues/944 136 | # Instead we rely on manually calling wait_select after connection and queries. 137 | # Since it's not a wait callback, we do not capture and transform KeyboardInterupt here. 138 | def wait_select(conn, timeout=None): 139 | start_time = time.monotonic() 140 | poll = select.poll() 141 | while True: 142 | if timeout is not None and timeout > 0: 143 | time_left = start_time + timeout - time.monotonic() 144 | if time_left <= 0: 145 | raise TimeoutError("wait_select: timeout after {} seconds".format(timeout)) 146 | else: 147 | time_left = 1 148 | state = conn.poll() 149 | if state == psycopg2.extensions.POLL_OK: 150 | return 151 | elif state == psycopg2.extensions.POLL_READ: 152 | poll.register(conn.fileno(), select.POLLIN) 153 | elif state == psycopg2.extensions.POLL_WRITE: 154 | poll.register(conn.fileno(), select.POLLOUT) 155 | else: 156 | raise conn.OperationalError("wait_select: invalid poll state") 157 | try: 158 | # When the remote address does not exist at all, poll.poll() waits its full timeout without any event. 159 | # However, in the same conditions, conn.poll() raises a psycopg2 exception almost immediately. 160 | # It is better to fail quickly instead of waiting the full timeout, so we keep our poll.poll() below 1sec. 161 | poll.poll(min(1.0, time_left) * 1000) 162 | finally: 163 | poll.unregister(conn.fileno()) 164 | -------------------------------------------------------------------------------- /aiven_db_migrate/migrate/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.5-2-ga13c553" 2 | -------------------------------------------------------------------------------- /ci/install.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | AIVEN_EXTRAS_TARGET="/aiven-extras" 5 | AIVEN_PG_SECURITY_TARGET="/aiven-pg-security" 6 | 7 | export DEBIAN_FRONTEND=noninteractive 8 | apt-get -y update 9 | ln -fs /usr/share/zoneinfo/America/New_York /etc/localtime 10 | apt install -y lsb-release wget gnupg tzdata git make rpm python3-pip libpq-dev jq 11 | dpkg-reconfigure --frontend noninteractive tzdata 12 | echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list 13 | wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - 14 | apt-get -y update 15 | apt-get install -y \ 16 | postgresql-{13,14,15,16,17} \ 17 | postgresql-server-dev-{13,14,15,16,17} \ 18 | postgresql-{13,14,15,16,17}-postgis-3 \ 19 | postgresql-{13,14,15,16,17}-pgextwlist 20 | 21 | # Install aiven-extras, using the latest tag. 22 | git clone https://github.com/aiven/aiven-extras "${AIVEN_EXTRAS_TARGET}" 23 | git -C "${AIVEN_EXTRAS_TARGET}" checkout "$(git -C "${AIVEN_EXTRAS_TARGET}" describe --tags --abbrev=0)" 24 | short_version=$(grep -oP 'short_ver = \K\d+\.\d+\.\d+' "${AIVEN_EXTRAS_TARGET}/Makefile") 25 | last_version=$(grep -oP 'last_ver = \K\d+\.\d+\.\d+' "${AIVEN_EXTRAS_TARGET}/Makefile") 26 | make -C "${AIVEN_EXTRAS_TARGET}" clean \ 27 | aiven_extras.control \ 28 | "sql/aiven_extras--${short_version}.sql" \ 29 | "sql/aiven_extras--${last_version}--${short_version}.sql" 30 | 31 | # The latest released version of aiven-pg-security (excludes pre-releases). 32 | AIVEN_PG_SECURITY_TAG=$(wget --quiet -O - "https://api.github.com/repos/aiven/aiven-pg-security/releases/latest" | jq -r .tag_name) 33 | # Clone aiven-pg-security (aiven_gatekeeper). 34 | git clone https://github.com/aiven/aiven-pg-security/ "${AIVEN_PG_SECURITY_TARGET}" 35 | git -C "${AIVEN_PG_SECURITY_TARGET}" checkout ${AIVEN_PG_SECURITY_TAG} 36 | 37 | # maybe add a deb target to aiven-extras in the future, but for now, while hacky, this is (probably) terser and less intrusive 38 | 39 | for dest in "13" "14" "15" "16" "17" 40 | do 41 | gcc -fPIC -I/usr/include/postgresql/${dest}/server \ 42 | -D_GNU_SOURCE -I/usr/include/libxml2 -I/usr/include -c -o aiven_extras.o $AIVEN_EXTRAS_TARGET/src/aiven_extras.c 43 | gcc -fPIC -shared -o aiven_extras.so aiven_extras.o -L/usr/lib/postgresql/${dest} \ 44 | -L/usr/lib64 -L/usr/lib64 -Wl,--as-needed -Wl,-rpath,/usr/lib/postgresql/${dest},--enable-new-dtags 45 | 46 | mkdir -p /usr/lib/postgresql/${dest}/lib/ 47 | cp aiven_extras.so /usr/lib/postgresql/${dest}/lib/ 48 | cp $AIVEN_EXTRAS_TARGET/aiven_extras.control /usr/share/postgresql/${dest}/extension/ 49 | cp $AIVEN_EXTRAS_TARGET/sql/*.sql /usr/share/postgresql/${dest}/extension/ 50 | 51 | make -C "$AIVEN_PG_SECURITY_TARGET" PG_CONFIG="/usr/lib/postgresql/${dest}/bin/pg_config" clean install 52 | 53 | # Count we have 2 entries: aiven_gatekeeper.so and aiven_extras.so. 54 | $(ls /usr/lib/postgresql/${dest}/lib/aiven_{gatekeeper,extras}.so | wc -l | grep -q 2) || exit 1 55 | done 56 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | # Global configuration 2 | 3 | [mypy] 4 | python_version = 3.10 5 | warn_redundant_casts = True 6 | 7 | # Module based overrides to disable errors on legacy code 8 | 9 | [mypy-pytest] 10 | ignore_missing_imports = True 11 | 12 | [mypy-psycopg2.*] 13 | ignore_missing_imports = True 14 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | flake8==6.0.0 2 | isort==5.12.0 3 | mypy==1.3.0 4 | psycopg2==2.9.6 5 | pylint==2.17.4 6 | pytest==7.3.1 7 | yapf==0.33.0 8 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Aiven, Helsinki, Finland. https://aiven.io/ 2 | 3 | from importlib.machinery import SourceFileLoader 4 | from setuptools import find_packages, setup 5 | 6 | import sys 7 | 8 | 9 | def get_version(): 10 | return SourceFileLoader("version", "aiven_db_migrate/migrate/version.py").load_module().__version__ 11 | 12 | 13 | setup( 14 | author="Aiven", 15 | author_email="support@aiven.io", 16 | entry_points={ 17 | "console_scripts": [ 18 | "pg_migrate = aiven_db_migrate.migrate.pgmigrate:main", 19 | ], 20 | }, 21 | install_requires=[ 22 | "psycopg2", 23 | "packaging", 24 | ], 25 | python_requires=">=3.10", 26 | license="Apache 2.0", 27 | name="aiven-db-migrate", 28 | packages=find_packages(exclude=["test"]), 29 | platforms=["POSIX", "MacOS", "Windows"], 30 | description="Aiven database migration tool", 31 | long_description=open("README.md").read(), 32 | url="https://aiven.io/", 33 | version=get_version(), 34 | classifiers=[ 35 | "Development Status :: 2 - Pre-Alpha", 36 | "Intended Audience :: Developers", 37 | "Topic :: Software Development :: Libraries", 38 | "License :: OSI Approved :: Apache Software License", 39 | "Programming Language :: Python :: 3.10", 40 | "Programming Language :: Python :: 3.11", 41 | "Programming Language :: Python :: 3.12", 42 | ], 43 | ) 44 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aiven/aiven-db-migrate/1a23ec11562ec18b6139a7498a03def3b831f8ee/test/__init__.py -------------------------------------------------------------------------------- /test/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Aiven, Helsinki, Finland. https://aiven.io/ 2 | 3 | from __future__ import annotations 4 | 5 | from _pytest.fixtures import FixtureRequest 6 | from _pytest.tmpdir import TempPathFactory 7 | from aiven_db_migrate.migrate.pgmigrate import PGTarget, ReplicationObjectType 8 | from contextlib import contextmanager 9 | from copy import copy 10 | from functools import partial, wraps 11 | from packaging.version import Version 12 | from pathlib import Path 13 | from psycopg2.extras import LogicalReplicationConnection, ReplicationCursor 14 | from test.utils import PGRunner, SUPPORTED_PG_VERSIONS 15 | from typing import Callable, cast, Iterator, List, Tuple, TypeVar 16 | from unittest.mock import patch 17 | 18 | import logging 19 | import os 20 | import psycopg2 21 | import pytest 22 | 23 | R = TypeVar("R") 24 | 25 | logging.basicConfig(level=logging.DEBUG, format="%(asctime)s\t%(name)s\t%(levelname)s\t%(message)s") 26 | 27 | 28 | @pytest.fixture(scope="module", name="pg_system_roles") 29 | def fixture_pg_system_roles() -> list[str]: 30 | return ["companyuser", "postgres", "some_superuser"] 31 | 32 | 33 | @contextmanager 34 | def setup_pg(tmp: Path, pgversion: str, *, with_gatekeeper: bool = False, system_roles: list[str]) -> Iterator[PGRunner]: 35 | pgdata = tmp / "pgdata" 36 | pgdata.mkdir() 37 | pg = PGRunner(pgversion=pgversion, pgdata=pgdata) 38 | 39 | extra_conf = {} 40 | if with_gatekeeper: 41 | system_roles_str = ",".join(system_roles) # users that can be assigned superuser 42 | extra_conf["shared_preload_libraries"] = "aiven_gatekeeper" 43 | extra_conf["aiven.pg_security_agent_reserved_roles"] = f"'{system_roles_str}'" 44 | 45 | pg.init().make_conf(wal_level="logical", **extra_conf).start() 46 | 47 | # create test users 48 | pg.create_superuser() 49 | pg.create_user(username=pg.testuser) 50 | 51 | try: 52 | yield pg 53 | finally: 54 | pg.stop() 55 | 56 | 57 | # Dynamically generating fixtures taken from https://github.com/pytest-dev/pytest/issues/2424 58 | 59 | 60 | def generate_pg_fixture(*, name: str, pgversion: str, scope="module", with_gatekeeper: bool = False): 61 | @pytest.fixture(scope=scope) 62 | def pg_fixture(tmp_path_factory: TempPathFactory, pg_system_roles: list[str]) -> Iterator[PGRunner]: 63 | with setup_pg( 64 | tmp_path_factory.mktemp(name), pgversion, with_gatekeeper=with_gatekeeper, system_roles=pg_system_roles 65 | ) as pg: 66 | yield pg 67 | 68 | return pg_fixture 69 | 70 | 71 | def inject_pg_fixture(*, name: str, pgversion: str, scope="module", with_gatekeeper: bool = False): 72 | globals()[name] = generate_pg_fixture(name=name, pgversion=pgversion, scope=scope, with_gatekeeper=with_gatekeeper) 73 | 74 | 75 | pg_cluster_for_tests: List[str] = [] 76 | pg_source_and_target_for_tests: List[Tuple[str, str]] = [] 77 | pg_unsafe_source_and_target_for_tests: List[Tuple[str, str]] = [] 78 | 79 | 80 | def generate_fixtures(): 81 | pg_source_versions: List[str] = [] 82 | pg_target_versions: List[str] = [] 83 | 84 | version = os.getenv("PG_SOURCE_VERSION", os.getenv("PG_VERSION")) 85 | if version: 86 | assert version in SUPPORTED_PG_VERSIONS, f"Supported pg versions are: {SUPPORTED_PG_VERSIONS}" 87 | pg_source_versions.append(version) 88 | else: 89 | pg_source_versions = SUPPORTED_PG_VERSIONS 90 | 91 | version = os.getenv("PG_TARGET_VERSION") 92 | if version: 93 | assert version in SUPPORTED_PG_VERSIONS, f"Supported pg versions are: {SUPPORTED_PG_VERSIONS}" 94 | pg_target_versions.append(version) 95 | else: 96 | pg_target_versions = copy(SUPPORTED_PG_VERSIONS) 97 | 98 | for source in pg_source_versions: 99 | name_prefix = "pg{}".format(source.replace(".", "")) 100 | source_name = f"{name_prefix}_source" 101 | inject_pg_fixture(name=source_name, pgversion=source, with_gatekeeper=False) 102 | for target in pg_target_versions: 103 | if Version(source) > Version(target): 104 | continue 105 | name_prefix = "pg{}".format(target.replace(".", "")) 106 | target_name = f"{name_prefix}_target" 107 | unsafe_target_name = f"{target_name}_unsafe" 108 | inject_pg_fixture(name=target_name, pgversion=target, with_gatekeeper=True) 109 | pg_source_and_target_for_tests.append((source_name, target_name)) 110 | inject_pg_fixture(name=unsafe_target_name, pgversion=target, with_gatekeeper=False) 111 | pg_unsafe_source_and_target_for_tests.append((source_name, unsafe_target_name)) 112 | for version in set(pg_source_versions).union(pg_target_versions): 113 | fixture_name = "pg{}".format(version.replace(".", "")) 114 | inject_pg_fixture(name=fixture_name, pgversion=version, with_gatekeeper=True) 115 | pg_cluster_for_tests.append(fixture_name) 116 | 117 | 118 | generate_fixtures() 119 | 120 | 121 | def test_pg_source_and_target_for_tests(): 122 | print(pg_source_and_target_for_tests) 123 | 124 | 125 | @pytest.fixture(name="pg_cluster", params=pg_cluster_for_tests, scope="function") 126 | def fixture_pg_cluster(request): 127 | """Returns a fixture parametrized on the union of all source and target pg versions.""" 128 | cluster_runner = request.getfixturevalue(request.param) 129 | yield cluster_runner 130 | for cleanup in cluster_runner.cleanups: 131 | cleanup() 132 | cluster_runner.cleanups.clear() 133 | cluster_runner.drop_dbs() 134 | 135 | 136 | def clean_replication_slots_for_runner(pg_runner: PGRunner) -> Callable[[Callable[..., R]], Callable[..., R]]: 137 | """Parametrized decorator to clean replication slots for a given PGRunner instance.""" 138 | def clean_replication_slots(function: Callable[..., R]) -> Callable[..., R]: 139 | """Decorator that schedules a drop of all replication slots created by the decorated function.""" 140 | def _drop_replication_slot(pg_runner_: PGRunner, slot_name: str) -> None: 141 | """Drop a replication slot, will try to find it in all databases.""" 142 | for db in pg_runner_.get_all_db_names(): 143 | try: 144 | with pg_runner_.connection( 145 | username=pg_runner_.superuser, dbname=db, connection_factory=LogicalReplicationConnection 146 | ) as log_conn: 147 | log_cursor: ReplicationCursor 148 | with log_conn.cursor() as log_cursor: 149 | log_cursor.drop_replication_slot(slot_name) 150 | logging.info("Dropped replication slot %s on %s", slot_name, db) 151 | except psycopg2.errors.UndefinedObject: 152 | pass 153 | else: 154 | break # Found it, no need to try other databases. 155 | 156 | @wraps(function) 157 | def wrapper(self: PGTarget, *args, dbname: str, **kwargs) -> R: 158 | subname = function(self, *args, dbname=dbname, **kwargs) 159 | slotname = self.get_replication_object_name( 160 | dbname=dbname, 161 | replication_obj_type=ReplicationObjectType.REPLICATION_SLOT, 162 | ) 163 | 164 | pg_runner.cleanups.append(partial(_drop_replication_slot, pg_runner_=pg_runner, slot_name=slotname)) 165 | 166 | return subname 167 | 168 | return wrapper 169 | 170 | return clean_replication_slots 171 | 172 | 173 | @contextmanager 174 | def make_pg_source_and_target(request: FixtureRequest) -> Iterator[Tuple[PGRunner, PGRunner]]: 175 | """Returns a fixture parametrized on the union of all source and target pg versions. 176 | 177 | This is expected to be used in a fixture that is parametrized with a list of tuples of 178 | source and target fixture names. 179 | 180 | If the fixture is used in a class, the attributes ``source`` and ``target`` are also set 181 | on the class. 182 | """ 183 | source_fixture_name, target_fixture_name = request.param 184 | # run the fixture function 185 | source: PGRunner = request.getfixturevalue(source_fixture_name) 186 | target: PGRunner = request.getfixturevalue(target_fixture_name) 187 | 188 | # Patch PGTarget.create_subscription to add the cleanup of created logical slots to the cleanup list. 189 | # We do this because in some rare cases, the teardown already drops the table while the replication 190 | # slot is still active, which causes the drop to fail. In the cleanup, it is executed after all 191 | # connections are closed, so it should always succeed. 192 | patched_create_subscription = clean_replication_slots_for_runner(target)(PGTarget.create_subscription) 193 | with patch("aiven_db_migrate.migrate.pgmigrate.PGTarget.create_subscription", patched_create_subscription): 194 | if request.cls: 195 | request.cls.source = source 196 | request.cls.target = target 197 | 198 | try: 199 | yield source, target 200 | finally: 201 | # cleanup functions 202 | for cleanup in source.cleanups + target.cleanups: 203 | cleanup() 204 | source.cleanups.clear() 205 | target.cleanups.clear() 206 | # cleanup created db's 207 | source.drop_dbs() 208 | target.drop_dbs() 209 | 210 | 211 | @pytest.fixture( 212 | name="pg_source_and_target", 213 | params=pg_source_and_target_for_tests, 214 | scope="function", 215 | ids=["{}-{}".format(*entry) for entry in pg_source_and_target_for_tests] 216 | ) 217 | def fixture_pg_source_and_target(request): 218 | """Generate a source and target ``PGRunner``s for all the requested versions. 219 | 220 | Note: 221 | The source databases are vanilla PG, whereas the target databases are hardened, 222 | using ``shared_preload_libraries = aiven_gatekeeper``. 223 | """ 224 | with make_pg_source_and_target(request) as (source, target): 225 | yield source, target 226 | 227 | 228 | @pytest.fixture( 229 | name="pg_source_and_target_unsafe", 230 | params=pg_unsafe_source_and_target_for_tests, 231 | scope="function", 232 | ids=["{}-{}".format(*entry) for entry in pg_unsafe_source_and_target_for_tests] 233 | ) 234 | def fixture_pg_source_and_target_unsafe(request): 235 | """Generate a source and an unsafe target ``PGRunner``s for all the requested versions. 236 | 237 | Note: 238 | Both the source and target databases are vanilla PG (no ``shared_preload_libraries``). 239 | """ 240 | with make_pg_source_and_target(request) as (source, target): 241 | yield source, target 242 | -------------------------------------------------------------------------------- /test/test_db_size_check.py: -------------------------------------------------------------------------------- 1 | from aiven_db_migrate.migrate.pgmigrate import PGMigrate 2 | from test.utils import PGRunner, random_string 3 | from typing import Tuple 4 | 5 | import psycopg2 6 | import pytest 7 | 8 | 9 | def test_db_size(pg_source_and_target: Tuple[PGRunner, PGRunner]): 10 | source, target = pg_source_and_target 11 | 12 | db_name = random_string(6) 13 | other_db_name = random_string(6) 14 | 15 | source.create_db(dbname=db_name) 16 | source.create_db(dbname=other_db_name) 17 | 18 | pg_mig = PGMigrate( 19 | source_conn_info=source.super_conn_info(), 20 | target_conn_info=target.super_conn_info(), 21 | verbose=True, 22 | ) 23 | 24 | # Create few tables and insert some data 25 | tables = [f'table_{i}' for i in range(4)] 26 | for dbname in {db_name, other_db_name}: 27 | with source.cursor(dbname=dbname) as c: 28 | for t in tables: 29 | c.execute(f"DROP TABLE IF EXISTS {t}") 30 | c.execute(f"CREATE TABLE {t} (foo INT)") 31 | c.execute(f"INSERT INTO {t} (foo) VALUES (1), (2), (3)") 32 | 33 | size = pg_mig.source.get_size(dbname=db_name, only_tables=[]) 34 | assert size == 0 35 | 36 | size = pg_mig.source.get_size(dbname=db_name) 37 | assert size >= 0 # returns slightly different values per pg version 38 | 39 | size = pg_mig.source.get_size(dbname=db_name, only_tables=tables) 40 | assert size == 32768 41 | 42 | size = pg_mig.source.get_size(dbname=db_name, only_tables=tables[:1]) 43 | assert size == 8192 44 | 45 | with pytest.raises(psycopg2.OperationalError): 46 | size = pg_mig.source.get_size(dbname="notfound") 47 | -------------------------------------------------------------------------------- /test/test_force_method.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 Aiven, Helsinki, Finland. https://aiven.io/ 2 | 3 | from aiven_db_migrate.migrate import errors 4 | from aiven_db_migrate.migrate.pgmigrate import main, PGCluster, PGDatabase, PGMigrate, PGMigrateMethod, PGMigrateTask 5 | from contextlib import nullcontext as does_not_raise 6 | from unittest import mock 7 | 8 | import psycopg2 9 | import pytest 10 | 11 | 12 | @mock.patch.object(PGCluster, "params", new_callable=mock.PropertyMock, return_value={"server_version": "11.13"}) 13 | @mock.patch.object(PGMigrate, "_dump_schema") 14 | @mock.patch.object(PGMigrate, "_dump_data") 15 | @mock.patch.object(PGMigrate, "_db_replication") 16 | @mock.patch.object(PGCluster, "refresh_db") 17 | @pytest.mark.parametrize("method", [PGMigrateMethod.dump, PGMigrateMethod.replication, None]) 18 | def test_force_method(mock_refresh_db, mock_db_replication, mock_dump_data, mock_dump_schema, mock_params, method): 19 | pg_mig = PGMigrate(source_conn_info="postgresql://source", target_conn_info="postgresql://target") 20 | pg_task = PGMigrateTask( 21 | source_db=PGDatabase(dbname="test_source", tables=[]), 22 | target_db=PGDatabase(dbname="test_target", tables=[]), 23 | method=method 24 | ) 25 | 26 | pg_mig._db_migrate(pgtask=pg_task) 27 | 28 | mock_dump_schema.assert_called() 29 | mock_refresh_db.assert_called() 30 | mock_params.assert_called() 31 | 32 | if method == PGMigrateMethod.dump: 33 | mock_dump_data.assert_called() 34 | mock_db_replication.assert_not_called() 35 | elif method == PGMigrateMethod.replication: 36 | mock_dump_data.assert_not_called() 37 | mock_db_replication.assert_called() 38 | elif method is None: 39 | mock_db_replication.assert_called() 40 | mock_dump_data.assert_not_called() 41 | 42 | 43 | @mock.patch.object(PGCluster, "params", new_callable=mock.PropertyMock, return_value={"server_version": "11.13"}) 44 | @mock.patch.object(PGMigrate, "_dump_schema") 45 | @mock.patch.object(PGMigrate, "_dump_data") 46 | @mock.patch.object(PGMigrate, "_db_replication", side_effect=psycopg2.ProgrammingError) 47 | @mock.patch.object(PGCluster, "refresh_db") 48 | @pytest.mark.parametrize("method", [PGMigrateMethod.replication, None]) 49 | def test_force_method_failure(mock_refresh_db, mock_db_replication, mock_dump_data, mock_dump_schema, mock_params, method): 50 | pg_mig = PGMigrate(source_conn_info="postgresql://source", target_conn_info="postgresql://target") 51 | pg_task = PGMigrateTask( 52 | source_db=PGDatabase(dbname="test_source", tables=[]), 53 | target_db=PGDatabase(dbname="test_target", tables=[]), 54 | method=method 55 | ) 56 | 57 | if method == PGMigrateMethod.replication: 58 | # if we are forcing the use of replication and it fails, psycopg2.ProgramminError gets raised 59 | context = pytest.raises(psycopg2.ProgrammingError) 60 | else: 61 | # otherwise we should fallback to using dump and not raise exception 62 | context = does_not_raise() 63 | 64 | with mock.patch.object( 65 | psycopg2.ProgrammingError, 66 | "pgcode", 67 | new_callable=mock.PropertyMock, 68 | return_value=psycopg2.errorcodes.INSUFFICIENT_PRIVILEGE 69 | ): 70 | with context: 71 | pg_mig._db_migrate(pgtask=pg_task) 72 | 73 | mock_dump_schema.assert_called() 74 | mock_refresh_db.assert_called() 75 | mock_params.assert_called() 76 | 77 | if method == PGMigrateMethod.replication: 78 | mock_dump_data.assert_not_called() 79 | mock_db_replication.assert_called() 80 | elif method is None: 81 | mock_db_replication.assert_called() 82 | mock_dump_data.assert_called() 83 | -------------------------------------------------------------------------------- /test/test_main.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from aiven_db_migrate.migrate.pgmigrate import main, PGCluster, PGMigrate 3 | from unittest import mock 4 | 5 | import pytest 6 | import re 7 | 8 | 9 | @mock.patch.object(PGMigrate, "_check_aiven_pg_security_agent") 10 | @mock.patch.object(PGCluster, "params", new_callable=mock.PropertyMock, return_value={"server_version": "16.4"}) 11 | @mock.patch.object(PGCluster, "databases", new_callable=mock.PropertyMock, return_value={}) 12 | @mock.patch.object(PGCluster, "pg_lang", new_callable=mock.PropertyMock, return_value={}) 13 | @mock.patch.object(PGMigrate, "migrate") 14 | @pytest.mark.parametrize("validate", [True, False]) 15 | def test_main(mock_migrate, mock_lang, mock_databases, mock_params, mock_check_security, validate): 16 | args = [ 17 | "pg_migrate", 18 | "-s", 19 | "postgresql://source", 20 | "-t", 21 | "postgresql://target", 22 | ] 23 | if validate: 24 | args.append("--validate") 25 | 26 | with mock.patch("sys.argv", args): 27 | main() 28 | if validate: 29 | mock_check_security.assert_called() 30 | mock_params.assert_called() 31 | mock_databases.assert_called() 32 | mock_lang.assert_called() 33 | mock_migrate.assert_not_called() 34 | else: 35 | mock_check_security.assert_not_called() 36 | mock_params.assert_not_called() 37 | mock_databases.assert_not_called() 38 | mock_lang.assert_not_called() 39 | mock_migrate.assert_called_with(force_method=None) 40 | 41 | 42 | @mock.patch.object(PGCluster, "params", new_callable=mock.PropertyMock, return_value={"server_version": "11.13"}) 43 | @mock.patch.object(PGCluster, "databases", new_callable=mock.PropertyMock, return_value={}) 44 | @mock.patch.object(PGCluster, "pg_lang", new_callable=mock.PropertyMock, return_value={}) 45 | @mock.patch.object(PGMigrate, "migrate") 46 | @pytest.mark.parametrize( 47 | "method,validate", [("dump", True), ("dump", False), ("replication", True), ("replication", False), (None, True), 48 | (None, False)] 49 | ) 50 | def test_main_force_method(mock_migrate, mock_lang, mock_databases, mock_params, method, validate): 51 | args = [ 52 | "pg_migrate", 53 | "--source", 54 | "postgres://source/defaultdb", 55 | "--target", 56 | "postgres://target/defaultdb", 57 | "--force-method", 58 | method, 59 | ] 60 | 61 | with mock.patch("sys.argv", args): 62 | main() 63 | mock_migrate.assert_called_with(force_method=method) 64 | 65 | mock_params.reset_mock() 66 | mock_databases.reset_mock() 67 | mock_lang.reset_mock() 68 | mock_migrate.reset_mock() 69 | 70 | with mock.patch( 71 | "sys.argv", [ 72 | "pg_migrate", 73 | "-s", 74 | "postgresql://source", 75 | "-t", 76 | "postgresql://target", 77 | "--force-method", 78 | "noop", 79 | ] 80 | ): 81 | with pytest.raises(ValueError, match=re.escape("Unsupported migration method 'noop'")): 82 | main() 83 | 84 | mock_params.assert_not_called() 85 | mock_databases.assert_not_called() 86 | mock_lang.assert_not_called() 87 | mock_migrate.assert_not_called() 88 | -------------------------------------------------------------------------------- /test/test_migrate_checks.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 Aiven, Helsinki, Finland. https://aiven.io/ 2 | 3 | from aiven_db_migrate.migrate.errors import PGMigrateValidationFailedError 4 | from aiven_db_migrate.migrate.pgmigrate import PGMigrate 5 | from test.utils import PGRunner, random_string 6 | from typing import Tuple 7 | from unittest.mock import MagicMock, patch 8 | 9 | import pytest 10 | 11 | 12 | def test_dbs_max_total_size_check(pg_source_and_target: Tuple[PGRunner, PGRunner]): 13 | source, target = pg_source_and_target 14 | dbnames = {random_string() for _ in range(3)} 15 | 16 | for dbname in dbnames: 17 | source.create_db(dbname=dbname) 18 | target.create_db(dbname=dbname) 19 | 20 | # from >=PG15 there is a restriction non-super users in postgres db 21 | # so let's just ignore it 22 | 23 | default_filtered_dbs = ["postgres"] 24 | # Create few tables and insert some data 25 | tables = [f'table_{i}' for i in range(4)] 26 | for dbname in dbnames: 27 | with source.cursor(dbname=dbname) as c: 28 | for t in tables: 29 | c.execute(f"DROP TABLE IF EXISTS {t}") 30 | c.execute(f"CREATE TABLE {t} (foo INT)") 31 | c.execute(f"INSERT INTO {t} (foo) VALUES (1), (2), (3)") 32 | 33 | pg_mig = PGMigrate( 34 | source_conn_info=source.conn_info(), 35 | target_conn_info=target.conn_info(), 36 | createdb=False, 37 | verbose=True, 38 | filtered_db=",".join(default_filtered_dbs), 39 | ) 40 | 41 | with patch( 42 | "aiven_db_migrate.migrate.pgmigrate.PGMigrate._check_database_size", side_effect=pg_mig._check_database_size 43 | ) as mock_db_size_check: 44 | # DB size check is not run 45 | pg_mig.validate() 46 | mock_db_size_check.assert_not_called() 47 | 48 | mock_db_size_check.reset_mock() 49 | 50 | # DB size check with max size of zero 51 | with pytest.raises(PGMigrateValidationFailedError) as e: 52 | pg_mig.validate(dbs_max_total_size=0) 53 | assert "Databases do not fit to the required maximum size" in str(e) 54 | mock_db_size_check.assert_called_once_with(max_size=0) 55 | 56 | mock_db_size_check.reset_mock() 57 | 58 | # DB size check with enough size 59 | pg_mig.validate(dbs_max_total_size=1073741824) 60 | mock_db_size_check.assert_called_once_with(max_size=1073741824) 61 | 62 | # Test with DB name filtering 63 | pg_mig = PGMigrate( 64 | source_conn_info=source.conn_info(), 65 | target_conn_info=target.conn_info(), 66 | createdb=False, 67 | verbose=True, 68 | filtered_db=",".join(default_filtered_dbs + list(dbnames)), 69 | ) 70 | 71 | with patch( 72 | "aiven_db_migrate.migrate.pgmigrate.PGMigrate._check_database_size", side_effect=pg_mig._check_database_size 73 | ) as mock_db_size_check: 74 | # Should pass as all DBs are filtered out from size calculations 75 | pg_mig.validate(dbs_max_total_size=0) 76 | mock_db_size_check.assert_called_once_with(max_size=0) 77 | 78 | # Test with table filtering 79 | 80 | # Include all tables in "skip_tables" 81 | pg_mig = PGMigrate( 82 | source_conn_info=source.conn_info(), 83 | target_conn_info=target.conn_info(), 84 | createdb=False, 85 | verbose=True, 86 | skip_tables=tables, # skip all tables 87 | ) 88 | 89 | with patch( 90 | "aiven_db_migrate.migrate.pgmigrate.PGMigrate._check_database_size", side_effect=pg_mig._check_database_size 91 | ) as mock_db_size_check: 92 | # Should pass as all tables are filtered out from size calculations 93 | pg_mig.validate(dbs_max_total_size=0) 94 | mock_db_size_check.assert_called_once_with(max_size=0) 95 | 96 | # Only the first table is included 97 | pg_mig = PGMigrate( 98 | source_conn_info=source.conn_info(), 99 | target_conn_info=target.conn_info(), 100 | createdb=False, 101 | verbose=True, 102 | with_tables=tables[:1], # include only one table 103 | ) 104 | with patch( 105 | "aiven_db_migrate.migrate.pgmigrate.PGMigrate._check_database_size", side_effect=pg_mig._check_database_size 106 | ) as mock_db_size_check: 107 | # This fails as one table is included in check and it should have data 108 | with pytest.raises(PGMigrateValidationFailedError) as e: 109 | pg_mig.validate(dbs_max_total_size=0) 110 | assert "Databases do not fit to the required maximum size" in str(e) 111 | mock_db_size_check.assert_called_once_with(max_size=0) 112 | 113 | # Should easily fit 114 | pg_mig.validate(dbs_max_total_size=1073741824) 115 | 116 | 117 | def test_large_object_warnings(pg_source_and_target: Tuple[PGRunner, PGRunner]): 118 | source, target = pg_source_and_target 119 | dbnames = {random_string() for _ in range(3)} 120 | for dbname in dbnames: 121 | source.create_db(dbname=dbname) 122 | target.create_db(dbname=dbname) 123 | dbnames.add(source.defaultdb) 124 | dbnames.add(target.defaultdb) 125 | 126 | pg_mig = PGMigrate( 127 | source_conn_info=source.conn_info(), 128 | target_conn_info=target.conn_info(), 129 | createdb=False, 130 | verbose=True, 131 | ) 132 | 133 | pg_mig.log = MagicMock() 134 | 135 | # Create a large object in the source 136 | with source.cursor(dbname=source.defaultdb) as cur: 137 | cur.execute("SELECT lo_create(0)") 138 | 139 | with patch( 140 | "aiven_db_migrate.migrate.pgmigrate.PGMigrate._warn_if_pg_lobs", side_effect=pg_mig._warn_if_pg_lobs 141 | ) as mock_lobs_check: 142 | pg_mig.validate() 143 | mock_lobs_check.assert_called_once() 144 | pg_mig.log.warning.assert_called_with( 145 | "Large objects detected: large objects are not compatible with logical replication: https://www.postgresql.org/docs/14/logical-replication-restrictions.html" 146 | ) 147 | -------------------------------------------------------------------------------- /test/test_pg_cluster.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from aiven_db_migrate.migrate.pgmigrate import PGCluster 3 | from multiprocessing import Process 4 | from packaging.version import Version 5 | from test.utils import PGRunner 6 | 7 | import os 8 | import pytest 9 | import signal 10 | import time 11 | 12 | 13 | def test_interruptible_queries(pg_cluster: PGRunner): 14 | def wait_and_interrupt(): 15 | time.sleep(1) 16 | os.kill(os.getppid(), signal.SIGINT) 17 | 18 | cluster = PGCluster(conn_info=pg_cluster.conn_info()) 19 | interuptor = Process(target=wait_and_interrupt) 20 | interuptor.start() 21 | start_time = time.monotonic() 22 | with pytest.raises(KeyboardInterrupt): 23 | cluster.c("select pg_sleep(100)") 24 | assert time.monotonic() - start_time < 2 25 | interuptor.join() 26 | 27 | 28 | def test_trusted_extensions(pg_cluster: PGRunner): 29 | # A small sample of built-in contrib extensions, no need to be exhaustive 30 | known_trusted = {"btree_gin", "btree_gist", "hstore", "intarray", "pgcrypto", "plpgsql", "unaccent"} 31 | known_untrusted = {"pg_buffercache", "pg_freespacemap", "pg_prewarm", "pg_stat_statements"} 32 | cluster = PGCluster(conn_info=pg_cluster.conn_info()) 33 | if cluster.version >= Version("13"): 34 | for extension in cluster.pg_ext: 35 | assert isinstance(extension.trusted, bool) 36 | if extension.name in known_trusted: 37 | assert extension.trusted 38 | if extension.name in known_untrusted: 39 | assert not extension.trusted 40 | else: 41 | for extension in cluster.pg_ext: 42 | assert extension.trusted is None 43 | -------------------------------------------------------------------------------- /test/test_pg_dump.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Aiven, Helsinki, Finland. https://aiven.io/ 2 | 3 | from aiven_db_migrate.migrate.pgmigrate import PGDatabase, PGMigrate 4 | from test.utils import PGRunner, random_string 5 | from typing import Tuple 6 | 7 | import pytest 8 | 9 | 10 | @pytest.mark.parametrize("createdb", [True, False]) 11 | def test_dump(pg_source_and_target: Tuple[PGRunner, PGRunner], createdb: bool): 12 | source, target = pg_source_and_target 13 | dbname = random_string() 14 | tblname = random_string() 15 | # create db and table with some data in source 16 | source.create_db(dbname=dbname) 17 | with source.cursor(dbname=dbname) as cur: 18 | cur.execute(f"CREATE TABLE {tblname} (something INT)") 19 | cur.execute(f"INSERT INTO {tblname} VALUES (1), (2), (3), (4), (5)") 20 | 21 | if not createdb: 22 | # create existing db to target 23 | target.create_db(dbname=dbname) 24 | 25 | pg_mig = PGMigrate( 26 | source_conn_info=source.conn_info(), target_conn_info=target.conn_info(), createdb=createdb, verbose=True 27 | ) 28 | 29 | # evaluates pgbin dir (pg_dump needs to be from same version as source) 30 | pg_mig.validate() 31 | 32 | # dump both schema and data 33 | db = PGDatabase(dbname=dbname, tables=set()) 34 | pg_mig._dump_schema(db=db) # pylint: disable=protected-access 35 | pg_mig._dump_data(db=db) # pylint: disable=protected-access 36 | 37 | # verify that db/table migrated to target 38 | exists = pg_mig.target.c( 39 | "SELECT 1 FROM information_schema.tables WHERE table_schema = %s AND table_name= %s", 40 | args=( 41 | "public", 42 | tblname, 43 | ), 44 | dbname=dbname 45 | ) 46 | assert exists 47 | count = pg_mig.target.c(f"SELECT count(*) FROM {tblname}", dbname=dbname, return_rows=1)[0] 48 | assert int(count["count"]) == 5 49 | -------------------------------------------------------------------------------- /test/test_pg_extensions.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Aiven, Helsinki, Finland. https://aiven.io/ 2 | 3 | from aiven_db_migrate.migrate.errors import PGMigrateValidationFailedError 4 | from aiven_db_migrate.migrate.pgmigrate import PGDatabase, PGExtension, PGMigrate 5 | from packaging.version import Version 6 | from test.utils import PGRunner, random_string 7 | from typing import Tuple 8 | 9 | import pytest 10 | 11 | 12 | @pytest.mark.parametrize("createdb", [True, False]) 13 | def test_defaults(pg_source_and_target: Tuple[PGRunner, PGRunner], createdb: bool): 14 | source, target = pg_source_and_target 15 | dbnames = {random_string() for _ in range(3)} 16 | 17 | for dbname in dbnames: 18 | source.create_db(dbname=dbname) 19 | if not createdb: 20 | # create existing db to target 21 | target.create_db(dbname=dbname) 22 | 23 | pg_mig = PGMigrate( 24 | source_conn_info=source.conn_info(), target_conn_info=target.conn_info(), createdb=createdb, verbose=True 25 | ) 26 | 27 | pg_mig.validate() 28 | for dbname in dbnames: 29 | pg_mig._dump_schema(db=PGDatabase(dbname=dbname, tables=set())) # pylint: disable=protected-access 30 | 31 | # reset databases so that they and installed extensions get queried from server again 32 | setattr(pg_mig.target, "_databases", {}) 33 | 34 | for dbname in dbnames: 35 | for ext1 in pg_mig.source.databases[dbname].pg_ext: 36 | ext2 = next(e for e in pg_mig.target.databases[dbname].pg_ext if e.name == ext1.name) 37 | assert Version(ext2.version) >= Version(ext1.version) 38 | 39 | 40 | @pytest.mark.parametrize("createdb", [True, False]) 41 | def test_extension_requires_superuser(pg_source_and_target: Tuple[PGRunner, PGRunner], createdb: bool): 42 | source, target = pg_source_and_target 43 | dbname = random_string() 44 | extname = "pg_stat_statements" 45 | 46 | source.create_db(dbname=dbname) 47 | source.create_extension(extname=extname, dbname=dbname) 48 | if not createdb: 49 | # create existing db to target 50 | target.create_db(dbname=dbname) 51 | 52 | pg_mig = PGMigrate( 53 | source_conn_info=source.conn_info(), target_conn_info=target.conn_info(), createdb=createdb, verbose=True 54 | ) 55 | 56 | with pytest.raises(PGMigrateValidationFailedError) as err: 57 | pg_mig.validate() 58 | assert str(err.value) == f"Installing extension '{extname}' in target requires superuser" 59 | 60 | 61 | def test_migration_succeeds_when_extensions_that_require_superuser_are_excluded( 62 | pg_source_and_target: Tuple[PGRunner, PGRunner] 63 | ) -> None: 64 | source, target = pg_source_and_target 65 | dbname = random_string() 66 | extensions = {"pg_freespacemap", "pg_visibility"} 67 | 68 | source.create_db(dbname=dbname) 69 | for extname in extensions: 70 | source.create_extension(extname=extname, dbname=dbname) 71 | 72 | pg_mig = PGMigrate( 73 | source_conn_info=source.conn_info(), 74 | target_conn_info=target.conn_info(), 75 | verbose=True, 76 | excluded_extensions=",".join(extensions), 77 | ) 78 | assert set(pg_mig.target.excluded_extensions) == extensions 79 | 80 | pg_mig.validate() 81 | 82 | 83 | @pytest.mark.parametrize("createdb", [True, False]) 84 | def test_extension_superuser(pg_source_and_target: Tuple[PGRunner, PGRunner], createdb: bool): 85 | source, target = pg_source_and_target 86 | dbname = random_string() 87 | extname = "aiven_extras" 88 | 89 | source.create_db(dbname=dbname) 90 | source.create_extension(extname=extname, dbname=dbname) 91 | if not createdb: 92 | # create existing db to target 93 | target.create_db(dbname=dbname) 94 | 95 | pg_mig = PGMigrate( 96 | source_conn_info=source.super_conn_info(), 97 | target_conn_info=target.super_conn_info(), 98 | createdb=createdb, 99 | verbose=True 100 | ) 101 | 102 | pg_mig.validate() 103 | pg_mig._dump_schema(db=PGDatabase(dbname=dbname, tables=set())) # pylint: disable=protected-access 104 | 105 | # reset databases so that they and installed extensions get queried from server again 106 | setattr(pg_mig.target, "_databases", {}) 107 | 108 | for ext1 in pg_mig.source.databases[dbname].pg_ext: 109 | ext2 = next(e for e in pg_mig.target.databases[dbname].pg_ext if e.name == ext1.name) 110 | assert Version(ext2.version) >= Version(ext1.version) 111 | 112 | 113 | @pytest.mark.parametrize("createdb", [True, False]) 114 | def test_extension_whitelist(pg_source_and_target: Tuple[PGRunner, PGRunner], createdb: bool): 115 | source, target = pg_source_and_target 116 | dbnames = {random_string() for _ in range(3)} 117 | extnames = {"btree_gist", "pgcrypto", "postgis"} 118 | 119 | for dbname in dbnames: 120 | source.create_db(dbname=dbname) 121 | for extname in extnames: 122 | source.create_extension(extname=extname, dbname=dbname) 123 | if not createdb: 124 | # create existing db to target 125 | target.create_db(dbname=dbname) 126 | 127 | # whitelist extensions in target 128 | target.make_conf(**{"extwlist.extensions": "'{}'".format(",".join(extnames))}).reload() 129 | 130 | pg_mig = PGMigrate( 131 | source_conn_info=source.conn_info(), target_conn_info=target.conn_info(), createdb=createdb, verbose=True 132 | ) 133 | 134 | pg_mig.validate() 135 | for dbname in dbnames: 136 | pg_mig._dump_schema(db=PGDatabase(dbname=dbname, tables=set())) # pylint: disable=protected-access 137 | 138 | # reset databases so that they and installed extensions get queried from server again 139 | setattr(pg_mig.target, "_databases", {}) 140 | 141 | for dbname in dbnames: 142 | for ext1 in pg_mig.source.databases[dbname].pg_ext: 143 | ext2 = next(e for e in pg_mig.target.databases[dbname].pg_ext if e.name == ext1.name) 144 | assert Version(ext2.version) >= Version(ext1.version) 145 | 146 | 147 | @pytest.mark.parametrize("createdb", [True, False]) 148 | def test_extension_not_available(pg_source_and_target: Tuple[PGRunner, PGRunner], createdb: bool): 149 | source, target = pg_source_and_target 150 | dbname = random_string() 151 | extname = "this_extension_is_not_available_in_target" 152 | 153 | if not createdb: 154 | # create existing db to target 155 | target.create_db(dbname=dbname) 156 | 157 | pg_mig = PGMigrate( 158 | source_conn_info=source.conn_info(), target_conn_info=target.conn_info(), createdb=createdb, verbose=True 159 | ) 160 | 161 | # mock source databases 162 | setattr( 163 | pg_mig.source, "_databases", 164 | {dbname: PGDatabase(dbname=dbname, tables=set(), pg_ext=[PGExtension(name=extname, version="1.2.3")])} 165 | ) 166 | 167 | with pytest.raises(PGMigrateValidationFailedError) as err: 168 | pg_mig.validate() 169 | assert str(err.value) == f"Extension '{extname}' is not available for installation in target" 170 | 171 | 172 | @pytest.mark.parametrize("createdb", [True, False]) 173 | def test_extension_available_older_version(pg_source_and_target: Tuple[PGRunner, PGRunner], createdb: bool): 174 | source, target = pg_source_and_target 175 | dbname = random_string() 176 | extname = "pgcrypto" 177 | 178 | if not createdb: 179 | # create existing db to target 180 | target.create_db(dbname=dbname) 181 | 182 | pg_mig = PGMigrate( 183 | source_conn_info=source.conn_info(), target_conn_info=target.conn_info(), createdb=createdb, verbose=True 184 | ) 185 | 186 | # mock source databases 187 | setattr( 188 | pg_mig.source, "_databases", 189 | {dbname: PGDatabase(dbname=dbname, tables=set(), pg_ext=[PGExtension(name=extname, version="999999")])} 190 | ) 191 | 192 | with pytest.raises(PGMigrateValidationFailedError) as err: 193 | pg_mig.validate() 194 | assert f"Extension '{extname}' version available for installation in target is too old" in str(err.value) 195 | 196 | 197 | def test_extension_installed_older_version(pg_source_and_target: Tuple[PGRunner, PGRunner]): 198 | source, target = pg_source_and_target 199 | dbname = random_string() 200 | extname = "some_cool_extension_name" 201 | source_ver = "999999" 202 | target_ver = "999998" 203 | 204 | pg_mig = PGMigrate( 205 | source_conn_info=source.conn_info(), target_conn_info=target.conn_info(), createdb=False, verbose=True 206 | ) 207 | 208 | # mock source and target databases 209 | setattr( 210 | pg_mig.source, "_databases", 211 | {dbname: PGDatabase(dbname=dbname, tables=set(), pg_ext=[PGExtension(name=extname, version=source_ver)])} 212 | ) 213 | setattr( 214 | pg_mig.target, "_databases", 215 | {dbname: PGDatabase(dbname=dbname, tables=set(), pg_ext=[PGExtension(name=extname, version=target_ver)])} 216 | ) 217 | 218 | with pytest.raises(PGMigrateValidationFailedError) as err: 219 | pg_mig.validate() 220 | assert str(err.value) == ( 221 | f"Installed extension '{extname}' in target database '{dbname}' is older than in source, " 222 | f"target version: {target_ver}, source version: {source_ver}" 223 | ) 224 | -------------------------------------------------------------------------------- /test/test_pg_migrate.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Aiven, Helsinki, Finland. https://aiven.io/ 2 | 3 | from aiven_db_migrate.migrate.errors import PGMigrateValidationFailedError 4 | from aiven_db_migrate.migrate.pgmigrate import PGMigrate, PGMigrateResult 5 | from test.utils import PGRunner, random_string, Timer 6 | from typing import Any, Dict, Optional 7 | 8 | import os 9 | import psycopg2 10 | import pytest 11 | import time 12 | 13 | 14 | class PGMigrateTest: 15 | """Mixin for pg migration tests""" 16 | 17 | source: PGRunner 18 | target: PGRunner 19 | 20 | @staticmethod 21 | def assert_result( 22 | *, 23 | result: Dict[str, Any], 24 | dbname: str, 25 | method: Optional[str], 26 | message: str = None, 27 | error: str = None, 28 | status: str = "done" 29 | ): 30 | assert message or error 31 | assert result["dbname"] == dbname 32 | if message: 33 | assert result["message"] == message 34 | else: 35 | assert error in result["message"] 36 | assert result["method"] == method 37 | assert result["status"] == status 38 | for field in result: 39 | assert field in {"dbname", "message", "method", "status"} 40 | 41 | @staticmethod 42 | def wait_until_data_migrated(*, pg_mig: PGMigrate, dbname: str, tblname: str, count: int, timeout: float = 10): 43 | exists = pg_mig.target.c( 44 | "SELECT 1 FROM information_schema.tables WHERE table_schema = %s AND table_name= %s", 45 | args=( 46 | "public", 47 | tblname, 48 | ), 49 | dbname=dbname 50 | ) 51 | assert exists 52 | timer = Timer(timeout=timeout, what=f"all data replicated to {dbname} table {tblname}") 53 | while timer.loop(): 54 | result = pg_mig.target.c(f"SELECT count(*) FROM {tblname}", dbname=dbname, return_rows=1)[0] 55 | if int(result["count"]) == count: 56 | break 57 | 58 | def _test_migrate(self, *, createdb: bool, expected_method: str, superuser: bool = False, number_of_dbs: int = 3): 59 | dbnames = [f"test_migrate_db_{i + 1}" for i in range(number_of_dbs)] 60 | for dbname in dbnames: 61 | # create db and table with some data in source 62 | self.source.create_db(dbname=dbname) 63 | with self.source.cursor(dbname=dbname) as cur: 64 | cur.execute(f"CREATE TABLE {dbname}_tbl (something INT)") 65 | cur.execute(f"INSERT INTO {dbname}_tbl VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10)") 66 | 67 | if not createdb: 68 | # create existing db to target 69 | self.target.create_db(dbname=dbname) 70 | 71 | if expected_method == "replication" and not superuser: 72 | # we need existing db for installing aiven-extras 73 | assert not createdb 74 | # have aiven-extras in both source and target 75 | if not self.source.have_aiven_extras(dbname=dbname) or not self.target.have_aiven_extras(dbname=dbname): 76 | pytest.skip("aiven-extras not available") 77 | 78 | if superuser: 79 | source_conn_info = self.source.super_conn_info() 80 | target_conn_info = self.target.super_conn_info() 81 | else: 82 | source_conn_info = self.source.conn_info() 83 | target_conn_info = self.target.conn_info() 84 | 85 | pg_mig = PGMigrate( 86 | source_conn_info=source_conn_info, 87 | target_conn_info=target_conn_info, 88 | createdb=createdb, 89 | max_replication_lag=0, 90 | stop_replication=True, 91 | verbose=True 92 | ) 93 | 94 | result: PGMigrateResult = pg_mig.migrate() 95 | 96 | for dbname, r in result.pg_databases.items(): 97 | if dbname == "postgres": 98 | # default db 99 | self.assert_result(result=r, dbname=dbname, method=expected_method, message="migrated to existing database") 100 | else: 101 | dbnames.remove(dbname) 102 | self.assert_result( 103 | result=r, 104 | dbname=dbname, 105 | method=expected_method, 106 | message="created and migrated database" if createdb else "migrated to existing database" 107 | ) 108 | self.wait_until_data_migrated(pg_mig=pg_mig, dbname=dbname, tblname=f"{dbname}_tbl", count=10) 109 | 110 | assert not dbnames 111 | 112 | 113 | @pytest.mark.usefixtures("pg_source_and_target") 114 | class Test_PGMigrate(PGMigrateTest): 115 | 116 | # pylint: disable=no-member 117 | 118 | @pytest.mark.parametrize("createdb", [True, False]) 119 | def test_migrate(self, createdb: bool): 120 | return self._test_migrate(createdb=createdb, expected_method="dump") 121 | 122 | def test_migrate_no_db(self): 123 | result: PGMigrateResult = PGMigrate( 124 | source_conn_info=self.source.conn_info(), target_conn_info=self.target.conn_info() 125 | ).migrate() 126 | assert len(result.pg_databases) == 1 127 | # default db 128 | assert "postgres" in result.pg_databases 129 | 130 | def test_migrate_db_does_not_exist(self): 131 | dbname = "this_db_does_not_exist" 132 | for source_conn_info, target_conn_info in ( 133 | (self.source.conn_info(dbname=dbname), self.target.conn_info()), 134 | (self.source.conn_info(), self.target.conn_info(dbname=dbname)), 135 | ): 136 | with pytest.raises(PGMigrateValidationFailedError) as err: 137 | PGMigrate(source_conn_info=source_conn_info, target_conn_info=target_conn_info).migrate() 138 | assert f'database "{dbname}" does not exist' in str(err.value) 139 | 140 | def test_migrate_invalid_conn_str(self): 141 | for source_conn_info, target_conn_info in ( 142 | (None, self.target.conn_info()), 143 | (self.source.conn_info(), None), 144 | ): 145 | assert not (source_conn_info and target_conn_info) 146 | for conn_info in ( 147 | "postgres://", 148 | "foo=bar", 149 | ): 150 | if source_conn_info is None: 151 | source_conn_info = conn_info 152 | else: 153 | target_conn_info = conn_info 154 | with pytest.raises(PGMigrateValidationFailedError) as err: 155 | PGMigrate(source_conn_info=source_conn_info, target_conn_info=target_conn_info).migrate() 156 | assert str(err.value) == "Invalid source or target connection string" 157 | 158 | def test_migrate_connect_timeout_parameter(self): 159 | for source_conn_info in ("host=example.org connect_timeout=1", "postgresql://example.org?connect_timeout=1"): 160 | start_time = time.monotonic() 161 | with pytest.raises(TimeoutError): 162 | PGMigrate(source_conn_info=source_conn_info, target_conn_info=self.target.conn_info()).migrate() 163 | end_time = time.monotonic() 164 | assert end_time - start_time < 2 165 | 166 | def test_migrate_connect_timeout_environment(self): 167 | start_time = time.monotonic() 168 | original_timeout = os.environ.get("PGCONNECT_TIMEOUT") 169 | try: 170 | os.environ["PGCONNECT_TIMEOUT"] = "1" 171 | with pytest.raises(TimeoutError): 172 | PGMigrate(source_conn_info="host=example.org", target_conn_info=self.target.conn_info()).migrate() 173 | end_time = time.monotonic() 174 | assert end_time - start_time < 2 175 | finally: 176 | if original_timeout is not None: 177 | os.environ["PGCONNECT_TIMEOUT"] = original_timeout 178 | 179 | def test_migrate_same_server(self): 180 | source_conn_info = target_conn_info = self.target.conn_info() 181 | with pytest.raises(PGMigrateValidationFailedError) as err: 182 | PGMigrate(source_conn_info=source_conn_info, target_conn_info=target_conn_info).migrate() 183 | assert str(err.value) == "Migrating to the same server is not supported" 184 | 185 | def test_migrate_missing_languages(self): 186 | pg_mig = PGMigrate( 187 | source_conn_info=self.source.conn_info(), target_conn_info=self.target.conn_info(), createdb=True, verbose=True 188 | ) 189 | # mock source/target languages 190 | setattr(pg_mig.source, "_pg_lang", [ 191 | {"lanname": "c"}, 192 | {"lanname": "plpgsql"}, 193 | {"lanname": "sql"}, 194 | {"lanname": "foo"}, 195 | {"lanname": "bar"}, 196 | ]) # yapf: disable 197 | setattr(pg_mig.target, "_pg_lang", [ 198 | {"lanname": "c"}, 199 | {"lanname": "plpgsql"}, 200 | {"lanname": "sql"}, 201 | ]) # yapf: disable 202 | with pytest.raises(PGMigrateValidationFailedError) as err: 203 | pg_mig.migrate() 204 | assert str(err.value) == "Languages not installed in target: bar, foo" 205 | 206 | def test_migrate_source_connection_rejected(self): 207 | dbname0 = "postgres" 208 | dbname1 = random_string() 209 | dbname2 = random_string() 210 | # create db's in source 211 | self.source.create_db(dbname=dbname1) 212 | self.source.create_db(dbname=dbname2) 213 | 214 | user = self.source.testuser 215 | self.source.make_hba_conf(dbname=dbname1, user=user, auth="reject").reload() 216 | # verify that user is rejected for dbname1 217 | error = f'pg_hba.conf rejects connection for host "[local]", user "{user}", database "{dbname1}"' 218 | with pytest.raises(psycopg2.OperationalError) as err: 219 | with self.source.cursor(dbname=dbname1): 220 | pass 221 | assert error in str(err.value) 222 | 223 | pg_mig = PGMigrate( 224 | source_conn_info=self.source.conn_info(), target_conn_info=self.target.conn_info(), createdb=True, verbose=True 225 | ) 226 | 227 | result: PGMigrateResult = pg_mig.migrate() 228 | assert len(result.pg_databases) == 3 229 | self.assert_result( 230 | result=result.pg_databases[dbname0], dbname=dbname0, method="dump", message="migrated to existing database" 231 | ) 232 | self.assert_result(result=result.pg_databases[dbname1], dbname=dbname1, method=None, error=error, status="failed") 233 | self.assert_result( 234 | result=result.pg_databases[dbname2], dbname=dbname2, method="dump", message="created and migrated database" 235 | ) 236 | 237 | def test_migrate_target_connection_rejected(self): 238 | dbname0 = "postgres" 239 | dbname1 = random_string() 240 | dbname2 = random_string() 241 | # create db's in source 242 | self.source.create_db(dbname=dbname1) 243 | self.source.create_db(dbname=dbname2) 244 | # create db in target 245 | self.target.create_db(dbname=dbname1) 246 | 247 | user = self.target.testuser 248 | self.target.make_hba_conf(dbname=dbname1, user=user, auth="reject").reload() 249 | # verify that user is rejected for dbname1 250 | error = f'pg_hba.conf rejects connection for host "[local]", user "{user}", database "{dbname1}"' 251 | with pytest.raises(psycopg2.OperationalError) as err: 252 | with self.target.cursor(dbname=dbname1): 253 | pass 254 | assert error in str(err.value) 255 | 256 | pg_mig = PGMigrate( 257 | source_conn_info=self.source.conn_info(), target_conn_info=self.target.conn_info(), createdb=True, verbose=True 258 | ) 259 | 260 | result: PGMigrateResult = pg_mig.migrate() 261 | assert len(result.pg_databases) == 3 262 | self.assert_result( 263 | result=result.pg_databases[dbname0], dbname=dbname0, method="dump", message="migrated to existing database" 264 | ) 265 | self.assert_result(result=result.pg_databases[dbname1], dbname=dbname1, method=None, error=error, status="failed") 266 | self.assert_result( 267 | result=result.pg_databases[dbname2], dbname=dbname2, method="dump", message="created and migrated database" 268 | ) 269 | 270 | def test_migrate_filtered_db_sql_injection(self): 271 | dbname1 = random_string() 272 | dbname2 = random_string() 273 | 274 | self.source.create_db(dbname=dbname1) 275 | self.source.create_db(dbname=dbname2) 276 | 277 | pg_mig = PGMigrate( 278 | source_conn_info=self.source.conn_info(), 279 | target_conn_info=self.target.conn_info(), 280 | createdb=True, 281 | verbose=True, 282 | filtered_db=f"{dbname1},') OR ('a' = 'a", 283 | ) 284 | result: PGMigrateResult = pg_mig.migrate() 285 | assert len(result.pg_databases) == 2 286 | 287 | 288 | @pytest.mark.usefixtures("pg_source_and_target") 289 | class Test_PGMigrate_Replication(PGMigrateTest): 290 | 291 | # pylint: disable=no-member 292 | 293 | def test_migrate_with_aiven_extras(self): 294 | # default db 295 | dbname = "postgres" 296 | if not self.source.have_aiven_extras(dbname=dbname) or not self.target.have_aiven_extras(dbname=dbname): 297 | pytest.skip("aiven-extras not available") 298 | self.source.add_cleanup(lambda: self.source.drop_aiven_extras(dbname=dbname)) 299 | self.target.add_cleanup(lambda: self.target.drop_aiven_extras(dbname=dbname)) 300 | # we need to have existing db for installing aiven-extras 301 | return self._test_migrate(createdb=False, expected_method="replication") 302 | 303 | @pytest.mark.parametrize("createdb", [True, False]) 304 | def test_migrate_with_superuser(self, createdb: bool): 305 | return self._test_migrate(createdb=createdb, expected_method="replication", superuser=True) 306 | 307 | @pytest.mark.parametrize("createdb", [True]) 308 | def test_migrate_source_aiven_extras(self, createdb: bool): 309 | dbname = random_string() 310 | tblname = f"{dbname}_tbl" 311 | # create db in source 312 | self.source.create_db(dbname=dbname) 313 | # have aiven-extras in source 314 | if not self.source.have_aiven_extras(dbname=dbname): 315 | pytest.skip("aiven-extras not available in source") 316 | # create some data in db 317 | with self.source.cursor(dbname=dbname) as cur: 318 | cur.execute(f"CREATE TABLE {tblname} (something INT)") 319 | cur.execute(f"INSERT INTO {tblname} VALUES (1), (2), (3)") 320 | 321 | # whitelist aiven-extras in target 322 | extnames = {"aiven_extras", "dblink"} 323 | self.target.make_conf(**{"extwlist.extensions": "'{}'".format(",".join(extnames))}).reload() 324 | 325 | if not createdb: 326 | # create existing db to target, since we are also doing this automatically and failing silently in the tool 327 | # the expected method will always be replication 328 | self.target.create_db(dbname=dbname) 329 | 330 | pg_mig = PGMigrate( 331 | source_conn_info=self.source.conn_info(), 332 | target_conn_info=self.target.conn_info(), 333 | createdb=createdb, 334 | max_replication_lag=0, 335 | stop_replication=True, 336 | verbose=True 337 | ) 338 | 339 | result: PGMigrateResult = pg_mig.migrate() 340 | 341 | self.assert_result( 342 | result=result.pg_databases[dbname], 343 | dbname=dbname, 344 | method="replication", 345 | message="created and migrated database" if createdb else "migrated to existing database" 346 | ) 347 | self.wait_until_data_migrated(pg_mig=pg_mig, dbname=dbname, tblname=tblname, count=3) 348 | # default db 349 | self.assert_result( 350 | result=result.pg_databases["postgres"], 351 | dbname="postgres", 352 | method="dump", 353 | message="migrated to existing database" 354 | ) 355 | 356 | # verify that there's no leftovers 357 | assert not self.source.list_pubs(dbname=dbname) 358 | assert not self.source.list_slots() 359 | assert not self.target.list_subs() 360 | 361 | def test_migrate_target_aiven_extras(self): 362 | dbname = random_string() 363 | tblname = f"{dbname}_tbl" 364 | # create existing db to target 365 | self.target.create_db(dbname=dbname) 366 | # have aiven-extras in target 367 | if not self.target.have_aiven_extras(dbname=dbname): 368 | pytest.skip("aiven-extras not available in target") 369 | # create db in source 370 | self.source.create_db(dbname=dbname) 371 | # create some data in db 372 | with self.source.cursor(dbname=dbname) as cur: 373 | cur.execute(f"CREATE TABLE {tblname} (something INT)") 374 | cur.execute(f"INSERT INTO {tblname} VALUES (1), (2), (3)") 375 | 376 | pg_mig = PGMigrate( 377 | source_conn_info=self.source.conn_info(), 378 | target_conn_info=self.target.conn_info(), 379 | createdb=False, 380 | max_replication_lag=0, 381 | stop_replication=True, 382 | verbose=True 383 | ) 384 | 385 | result: PGMigrateResult = pg_mig.migrate() 386 | 387 | assert len(result.pg_databases) == 2 388 | self.assert_result( 389 | result=result.pg_databases[dbname], dbname=dbname, method="dump", message="migrated to existing database" 390 | ) 391 | self.wait_until_data_migrated(pg_mig=pg_mig, dbname=dbname, tblname=tblname, count=3) 392 | # default db 393 | self.assert_result( 394 | result=result.pg_databases["postgres"], 395 | dbname="postgres", 396 | method="dump", 397 | message="migrated to existing database" 398 | ) 399 | 400 | # verify that there's no leftovers 401 | assert not self.source.list_pubs(dbname=dbname) 402 | assert not self.source.list_slots() 403 | assert not self.target.list_subs() 404 | -------------------------------------------------------------------------------- /test/test_pg_replication.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Aiven, Helsinki, Finland. https://aiven.io/ 2 | 3 | from aiven_db_migrate.migrate.pgmigrate import PGSource, PGTarget 4 | from packaging.version import Version 5 | from test.utils import PGRunner, random_string, Timer 6 | from typing import Tuple 7 | 8 | import psycopg2 9 | import psycopg2.errorcodes 10 | import pytest 11 | 12 | 13 | @pytest.mark.parametrize("aiven_extras", [True, False]) 14 | def test_replication(pg_source_and_target: Tuple[PGRunner, PGRunner], aiven_extras: bool): 15 | source, target = pg_source_and_target 16 | dbname = random_string() 17 | tblname = random_string() 18 | 19 | # have the db in both source and target 20 | source.create_db(dbname=dbname) 21 | target.create_db(dbname=dbname) 22 | 23 | # create table with some data in source db 24 | with source.cursor(dbname=dbname) as cur: 25 | cur.execute(f"CREATE TABLE {tblname} (something INT)") 26 | cur.execute(f"INSERT INTO {tblname} VALUES (1), (2)") 27 | 28 | # create table in target 29 | with target.cursor(dbname=dbname) as cur: 30 | cur.execute(f"CREATE TABLE {tblname} (something INT)") 31 | 32 | if aiven_extras: 33 | # have aiven-extras in both source and target 34 | if not source.have_aiven_extras(dbname=dbname) or not target.have_aiven_extras(dbname=dbname): 35 | pytest.skip("aiven-extras not available") 36 | pg_source = PGSource(source.conn_info()) 37 | pg_target = PGTarget(target.conn_info()) 38 | else: 39 | pg_source = PGSource(source.super_conn_info()) 40 | pg_target = PGTarget(target.super_conn_info()) 41 | 42 | assert pg_source.has_aiven_extras(dbname=dbname) if aiven_extras else pg_source.is_superuser 43 | assert pg_target.has_aiven_extras(dbname=dbname) if aiven_extras else pg_target.is_superuser 44 | 45 | pubname = pg_source.create_publication(dbname=dbname) 46 | slotname = pg_source.create_replication_slot(dbname=dbname) 47 | # verify that pub and replication slot exixts 48 | pub = pg_source.get_publication(dbname=dbname) 49 | assert pub 50 | assert pub["pubname"] == pubname 51 | slot = pg_source.get_replication_slot(dbname=dbname) 52 | assert slot 53 | assert slot["slot_name"] == slotname 54 | assert slot["slot_type"] == "logical" 55 | 56 | conn_str = pg_source.conn_str(dbname=dbname) 57 | subname = pg_target.create_subscription(conn_str=conn_str, dbname=dbname) 58 | # verify that sub exists 59 | sub = pg_target.get_subscription(dbname=dbname) 60 | assert sub 61 | assert sub["subname"] == subname 62 | assert sub["subenabled"] 63 | assert pubname in sub["subpublications"] 64 | 65 | # have some more data in source 66 | pg_source.c(f"INSERT INTO {tblname} VALUES (3), (4), (5)", dbname=dbname, return_rows=0) 67 | 68 | # wait until replication is in sync 69 | timer = Timer(timeout=10, what="replication in sync") 70 | while timer.loop(): 71 | in_sync, write_lsn = pg_source.replication_in_sync(dbname=dbname, max_replication_lag=0) 72 | if in_sync and pg_target.replication_in_sync(dbname=dbname, write_lsn=write_lsn, max_replication_lag=0): 73 | break 74 | 75 | # verify that all data has been replicated 76 | timer = Timer(timeout=10, what="all data replicated") 77 | while timer.loop(): 78 | count = pg_target.c(f"SELECT count(*) FROM {tblname}", dbname=dbname, return_rows=1)[0] 79 | if int(count["count"]) == 5: 80 | break 81 | 82 | pg_target.cleanup(dbname=dbname) 83 | pg_source.cleanup(dbname=dbname) 84 | 85 | # verify that pub, replication slot and sub are dropped 86 | assert not source.list_pubs(dbname=dbname) 87 | assert not source.list_slots() 88 | assert not target.list_subs() 89 | 90 | 91 | def test_replication_no_aiven_extras_no_superuser(pg_source_and_target: Tuple[PGRunner, PGRunner]): 92 | source, target = pg_source_and_target 93 | dbname = random_string() 94 | source.create_db(dbname=dbname) 95 | target.create_db(dbname=dbname) 96 | 97 | pg_source = PGSource(source.conn_info()) 98 | assert not pg_source.has_aiven_extras(dbname=dbname) 99 | assert not pg_source.is_superuser 100 | 101 | pg_target = PGTarget(target.conn_info()) 102 | assert not pg_target.has_aiven_extras(dbname=dbname) 103 | assert not pg_target.is_superuser 104 | 105 | # creating publication for all tables in db should fail with insufficient privilege 106 | with pytest.raises(psycopg2.ProgrammingError) as err: 107 | pg_source.create_publication(dbname=dbname) 108 | assert err.value.pgcode == psycopg2.errorcodes.INSUFFICIENT_PRIVILEGE 109 | assert err.value.diag.message_primary == "must be superuser to create FOR ALL TABLES publication" 110 | 111 | # creating subscription should fail with insufficient privilege 112 | with pytest.raises(psycopg2.ProgrammingError) as err: 113 | pg_target.create_subscription(conn_str=pg_source.conn_str(), dbname=dbname) 114 | assert err.value.pgcode == psycopg2.errorcodes.INSUFFICIENT_PRIVILEGE 115 | 116 | privilege_error_message = "must be superuser to create subscriptions" 117 | # error message was changed 118 | if pg_target.version >= Version("16"): 119 | privilege_error_message = "permission denied to create subscription" 120 | 121 | assert err.value.diag.message_primary == privilege_error_message 122 | 123 | # verify that there's no leftovers 124 | assert not source.list_pubs(dbname=dbname) 125 | assert not source.list_slots() 126 | assert not target.list_subs() 127 | -------------------------------------------------------------------------------- /test/test_pg_roles.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from aiven_db_migrate.migrate.errors import PGMigrateValidationFailedError 3 | from aiven_db_migrate.migrate.pgmigrate import PGMigrate, PGMigrateResult, PGTarget 4 | from datetime import datetime 5 | from packaging.version import Version 6 | from test.utils import modify_pg_security_agent_reserved_roles, PGRunner, random_string 7 | from typing import Tuple 8 | 9 | import pytest 10 | import time 11 | 12 | 13 | def test_pg_roles_with_no_password(pg_source_and_target: Tuple[PGRunner, PGRunner]): 14 | source, target = pg_source_and_target 15 | new_roles = {random_string() for _ in range(10)} 16 | 17 | def _cleanup(): 18 | for username in new_roles: 19 | source.drop_user(username=username) 20 | 21 | source.add_cleanup(_cleanup) 22 | for username in new_roles: 23 | source.create_role(username=username, login=False) 24 | 25 | pg_mig = PGMigrate(source_conn_info=source.conn_info(), target_conn_info=target.conn_info(), createdb=True, verbose=True) 26 | existing_roles = set(pg_mig.target.pg_roles.keys()) 27 | all_roles = new_roles | existing_roles 28 | result: PGMigrateResult = pg_mig.migrate() 29 | 30 | for role in result.pg_roles.values(): 31 | assert role["rolname"] in all_roles 32 | if role["rolname"] in existing_roles: 33 | assert role["status"] == "exists" 34 | assert role["message"] == "role already exists" 35 | else: 36 | assert role["rolname"] in new_roles 37 | assert role["status"] == "created" 38 | assert role["message"] == "role created" 39 | assert not role["rolpassword"] 40 | 41 | 42 | def test_pg_roles_with_placeholder_password(pg_source_and_target: Tuple[PGRunner, PGRunner]): 43 | source, target = pg_source_and_target 44 | new_roles = {random_string() for _ in range(10)} 45 | 46 | def _cleanup(): 47 | for username in new_roles: 48 | source.drop_user(username=username) 49 | 50 | source.add_cleanup(_cleanup) 51 | for username in new_roles: 52 | source.create_role(username=username, password=random_string()) 53 | 54 | pg_mig = PGMigrate(source_conn_info=source.conn_info(), target_conn_info=target.conn_info(), createdb=True, verbose=True) 55 | existing_roles = set(pg_mig.target.pg_roles.keys()) 56 | all_roles = new_roles | existing_roles 57 | result: PGMigrateResult = pg_mig.migrate() 58 | 59 | for role in result.pg_roles.values(): 60 | assert role["rolname"] in all_roles 61 | if role["rolname"] in existing_roles: 62 | assert role["status"] == "exists" 63 | assert role["message"] == "role already exists" 64 | else: 65 | assert role["rolname"] in new_roles 66 | assert role["status"] == "created" 67 | assert role["message"] == "role created" 68 | assert role["rolpassword"].startswith("placeholder_") 69 | 70 | 71 | def test_pg_roles_rolconfig(pg_source_and_target: Tuple[PGRunner, PGRunner]): 72 | source, target = pg_source_and_target 73 | username = random_string() 74 | source.add_cleanup(lambda: source.drop_user(username=username)) 75 | source.create_role(username=username, password=random_string(), statement_timeout=12345, search_path="foobar") 76 | pg_mig = PGMigrate(source_conn_info=source.conn_info(), target_conn_info=target.conn_info(), createdb=True, verbose=True) 77 | existing_roles = set(pg_mig.target.pg_roles.keys()) 78 | all_roles = {username} | existing_roles 79 | result: PGMigrateResult = pg_mig.migrate() 80 | 81 | for role in result.pg_roles.values(): 82 | assert role["rolname"] in all_roles 83 | if role["rolname"] in existing_roles: 84 | assert role["status"] == "exists" 85 | assert role["message"] == "role already exists" 86 | else: 87 | assert role["rolname"] == username 88 | assert role["status"] == "created" 89 | assert role["message"] == "role created" 90 | assert role["rolpassword"].startswith("placeholder_") 91 | 92 | 93 | def test_pg_roles_superusers(pg_source_and_target_unsafe: Tuple[PGRunner, PGRunner]): 94 | """Test that superusers are not created on target **without** a superuser connection. 95 | 96 | Note: 97 | This tests the behaviour of an unsafe target, which would not have ``shared_preload_libraries = aiven_gatekeeper``. 98 | """ 99 | source, target = pg_source_and_target_unsafe 100 | username = random_string() 101 | source.add_cleanup(lambda: source.drop_user(username=username)) 102 | source.create_role(username=username, password=random_string(), superuser=True) 103 | pg_mig = PGMigrate(source_conn_info=source.conn_info(), target_conn_info=target.conn_info(), createdb=True, verbose=True) 104 | existing_roles = set(pg_mig.target.pg_roles.keys()) 105 | all_roles = {username} | existing_roles 106 | result: PGMigrateResult = pg_mig.migrate() 107 | 108 | for role in result.pg_roles.values(): 109 | assert role["rolname"] in all_roles 110 | if role["rolname"] in existing_roles: 111 | assert role["status"] == "exists" 112 | assert role["message"] == "role already exists" 113 | else: 114 | assert role["rolname"] == username 115 | assert role["status"] == "failed" 116 | 117 | if Version(target.pgversion) >= Version("16"): 118 | privilege_err_message = 'permission denied to create role' 119 | else: 120 | privilege_err_message = 'must be superuser to create superusers' 121 | assert role["message"] == privilege_err_message 122 | assert not role["rolpassword"] 123 | 124 | roles = set(r["rolname"] for r in target.list_roles()) 125 | assert username not in roles 126 | 127 | 128 | def test_pg_roles_replication_users(pg_source_and_target: Tuple[PGRunner, PGRunner]): 129 | source, target = pg_source_and_target 130 | username = random_string() 131 | source.add_cleanup(lambda: source.drop_user(username=username)) 132 | source.create_role(username=username, password=random_string(), replication=True) 133 | pg_mig = PGMigrate(source_conn_info=source.conn_info(), target_conn_info=target.conn_info(), createdb=True, verbose=True) 134 | existing_roles = set(pg_mig.target.pg_roles.keys()) 135 | all_roles = {username} | existing_roles 136 | result: PGMigrateResult = pg_mig.migrate() 137 | 138 | for role in result.pg_roles.values(): 139 | assert role["rolname"] in all_roles 140 | if role["rolname"] in existing_roles: 141 | assert role["status"] == "exists" 142 | assert role["message"] == "role already exists" 143 | # >= PG16, users with CREATEROLE privilege can create replication users 144 | # Prior PG16, only superusers were allowed to 145 | elif Version(target.pgversion) >= Version("16"): 146 | assert role["rolname"] == username 147 | assert role["status"] == "created" 148 | assert role["message"] == "role created" 149 | else: 150 | assert role["rolname"] == username 151 | assert role["status"] == "failed" 152 | assert role["message"] == "must be superuser to create replication users" 153 | assert not role["rolpassword"] 154 | 155 | roles = set(r["rolname"] for r in target.list_roles()) 156 | if Version(target.pgversion) >= Version("16"): 157 | assert username in roles 158 | else: 159 | assert username not in roles 160 | 161 | 162 | def test_pg_roles_as_superuser(pg_source_and_target_unsafe: Tuple[PGRunner, PGRunner]): 163 | """Test that superusers are successfully created on target **with** a superuser connection. 164 | 165 | Note: 166 | This tests the behaviour of an unsafe target, which would not have ``shared_preload_libraries = aiven_gatekeeper``. 167 | """ 168 | source, target = pg_source_and_target_unsafe 169 | superuser = random_string() 170 | repuser = random_string() 171 | source.add_cleanup(lambda: source.drop_user(username=superuser)) 172 | source.add_cleanup(lambda: source.drop_user(username=repuser)) 173 | source.create_role(username=superuser, password=random_string(), superuser=True) 174 | source.create_role(username=repuser, password=random_string(), replication=True) 175 | pg_mig = PGMigrate( 176 | source_conn_info=source.conn_info(), target_conn_info=target.super_conn_info(), createdb=True, verbose=True 177 | ) 178 | existing_roles = set(pg_mig.target.pg_roles.keys()) 179 | new_roles = {superuser, repuser} 180 | all_roles = new_roles | existing_roles 181 | result: PGMigrateResult = pg_mig.migrate() 182 | 183 | for role in result.pg_roles.values(): 184 | assert role["rolname"] in all_roles 185 | if role["rolname"] in existing_roles: 186 | assert role["status"] == "exists" 187 | assert role["message"] == "role already exists" 188 | else: 189 | assert role["rolname"] in new_roles 190 | assert role["status"] == "created" 191 | assert role["message"] == "role created" 192 | assert role["rolpassword"].startswith("placeholder_") 193 | 194 | 195 | def test_pg_roles_valid_until(pg_source_and_target: Tuple[PGRunner, PGRunner]): 196 | source, target = pg_source_and_target 197 | username = random_string() 198 | validuntil: datetime = datetime.fromtimestamp(time.time() + 60) 199 | source.add_cleanup(lambda: source.drop_user(username=username)) 200 | source.create_role(username=username, password=random_string(), validuntil=validuntil) 201 | pg_mig = PGMigrate(source_conn_info=source.conn_info(), target_conn_info=target.conn_info(), createdb=True, verbose=True) 202 | existing_roles = set(pg_mig.target.pg_roles.keys()) 203 | all_roles = {username} | existing_roles 204 | result: PGMigrateResult = pg_mig.migrate() 205 | 206 | for role in result.pg_roles.values(): 207 | assert role["rolname"] in all_roles 208 | if role["rolname"] in existing_roles: 209 | assert role["status"] == "exists" 210 | assert role["message"] == "role already exists" 211 | else: 212 | assert role["rolname"] == username 213 | assert role["status"] == "created" 214 | assert role["message"] == "role created" 215 | assert role["rolpassword"].startswith("placeholder_") 216 | 217 | role = next(r for r in target.list_roles() if r["rolname"] == username) 218 | assert role["rolvaliduntil"].date() == validuntil.date() 219 | assert role["rolvaliduntil"].time() == validuntil.time() 220 | 221 | 222 | def test_pg_roles_connection_limit(pg_source_and_target: Tuple[PGRunner, PGRunner]): 223 | source, target = pg_source_and_target 224 | username = random_string() 225 | connlimit = 42 226 | source.add_cleanup(lambda: source.drop_user(username=username)) 227 | source.create_role(username=username, password=random_string(), connlimit=connlimit) 228 | pg_mig = PGMigrate(source_conn_info=source.conn_info(), target_conn_info=target.conn_info(), createdb=True, verbose=True) 229 | existing_roles = set(pg_mig.target.pg_roles.keys()) 230 | all_roles = {username} | existing_roles 231 | result: PGMigrateResult = pg_mig.migrate() 232 | 233 | for role in result.pg_roles.values(): 234 | assert role["rolname"] in all_roles 235 | if role["rolname"] in existing_roles: 236 | assert role["status"] == "exists" 237 | assert role["message"] == "role already exists" 238 | else: 239 | assert role["rolname"] == username 240 | assert role["status"] == "created" 241 | assert role["message"] == "role created" 242 | assert role["rolpassword"].startswith("placeholder_") 243 | 244 | role = next(r for r in target.list_roles() if r["rolname"] == username) 245 | assert role["rolconnlimit"] == connlimit 246 | 247 | 248 | def test_migration_fails_with_additional_superuser_roles(pg_source_and_target: Tuple[PGRunner, PGRunner]): 249 | """Test that it fails when we try to migrate superuser roles that are not in the reserved roles list of the target.""" 250 | source, target = pg_source_and_target 251 | superuser1 = random_string() 252 | superuser2 = random_string() 253 | source.add_cleanup(lambda: source.drop_user(username=superuser1)) 254 | source.add_cleanup(lambda: source.drop_user(username=superuser2)) 255 | source.create_role(username=superuser1, password=random_string(), superuser=True) 256 | source.create_role(username=superuser2, password=random_string(), superuser=True) 257 | pg_mig = PGMigrate( 258 | source_conn_info=source.conn_info(), target_conn_info=target.super_conn_info(), createdb=True, verbose=True 259 | ) 260 | 261 | assert pg_mig.target.is_pg_security_agent_enabled 262 | 263 | with pytest.raises( 264 | PGMigrateValidationFailedError, 265 | match=r"Some superuser roles from source database .* are not allowed in target database.*", 266 | ): 267 | pg_mig.migrate() 268 | 269 | 270 | def test_migration_succeeds_when_additional_superuser_roles_are_excluded( 271 | pg_source_and_target: Tuple[PGRunner, PGRunner], 272 | ) -> None: 273 | """Test that migration succeeds when non allowed superuser roles are excluded.""" 274 | source, target = pg_source_and_target 275 | regularuser = random_string() 276 | superuser1 = random_string() 277 | superuser2 = random_string() 278 | source.add_cleanup(lambda: source.drop_user(username=regularuser)) 279 | source.add_cleanup(lambda: source.drop_user(username=superuser1)) 280 | source.add_cleanup(lambda: source.drop_user(username=superuser2)) 281 | source.create_role(username=regularuser, password=random_string()) 282 | source.create_role(username=superuser1, password=random_string(), superuser=True) 283 | source.create_role(username=superuser2, password=random_string(), superuser=True) 284 | 285 | pg_mig = PGMigrate( 286 | source_conn_info=source.conn_info(), 287 | target_conn_info=target.super_conn_info(), 288 | createdb=True, 289 | verbose=True, 290 | excluded_roles=f"{superuser1},{superuser2}", 291 | ) 292 | assert pg_mig.target.is_pg_security_agent_enabled 293 | result = pg_mig.migrate() 294 | 295 | assert result.pg_roles.keys() == {regularuser, source.testuser} 296 | 297 | 298 | def test_migration_succeeds_with_authorized_superuser_role(pg_source_and_target: Tuple[PGRunner, PGRunner]) -> None: 299 | """Test that it succeeds when we try to migrate a superuser role that is in the reserved roles list of the target.""" 300 | source, target = pg_source_and_target 301 | 302 | with modify_pg_security_agent_reserved_roles(target) as superuser: 303 | source.add_cleanup(lambda: source.drop_user(username=superuser)) 304 | source.create_role(username=superuser, password=random_string(), superuser=True) 305 | 306 | pg_mig = PGMigrate( 307 | source_conn_info=source.conn_info(), target_conn_info=target.super_conn_info(), createdb=True, verbose=True 308 | ) 309 | 310 | reserved_roles = pg_mig.target.get_security_agent_reserved_roles() 311 | 312 | assert pg_mig.target.is_pg_security_agent_enabled 313 | assert superuser in pg_mig.target.get_security_agent_reserved_roles(), str(reserved_roles) 314 | 315 | result: PGMigrateResult = pg_mig.migrate() 316 | 317 | assert superuser in result.pg_roles 318 | assert result.pg_roles[superuser]["status"] == "created" 319 | assert result.pg_roles[superuser]["message"] == "role created" 320 | 321 | # Get the specificities of this role 322 | perms = pg_mig.target.c(f"SELECT * FROM pg_roles WHERE rolname = %s", args=(superuser, ), return_rows=1)[0] 323 | assert perms["rolsuper"] is True 324 | 325 | 326 | def test_user_cannot_see_reserved_roles(pg_source_and_target: Tuple[PGRunner, PGRunner]) -> None: 327 | """Test that a user cannot see the reserved roles.""" 328 | source, target = pg_source_and_target 329 | 330 | authorized_roles = PGTarget(conn_info=target.conn_info()).get_security_agent_reserved_roles() 331 | 332 | assert not authorized_roles 333 | 334 | 335 | def test_superuser_can_see_reserved_roles( 336 | pg_source_and_target: Tuple[PGRunner, PGRunner], pg_system_roles: list[str] 337 | ) -> None: 338 | """Test that a superuser can see the reserved roles.""" 339 | source, target = pg_source_and_target 340 | 341 | authorized_roles = PGTarget(conn_info=target.super_conn_info()).get_security_agent_reserved_roles() 342 | 343 | assert set(authorized_roles) == set(pg_system_roles) 344 | -------------------------------------------------------------------------------- /test/test_table_filtering.py: -------------------------------------------------------------------------------- 1 | from aiven_db_migrate.migrate.pgmigrate import PGMigrate 2 | from test.utils import PGRunner, random_string, Timer 3 | from typing import Dict, Set, Tuple 4 | 5 | import psycopg2 6 | import pytest 7 | 8 | # pylint: disable=invalid-string-quote 9 | 10 | 11 | @pytest.mark.parametrize("skip", [True, False]) 12 | @pytest.mark.parametrize("with_extension", [True, False]) 13 | def test_extension_table_filtering( 14 | pg_source_and_target: Tuple[PGRunner, PGRunner], skip: bool, with_extension: bool 15 | ) -> None: 16 | source, target = pg_source_and_target 17 | db_name = random_string(6) 18 | other_db_name = random_string(6) 19 | source.create_db(dbname=db_name) 20 | source.create_db(dbname=other_db_name) 21 | tables = [f'"ta. \'ble{i}"' for i in range(4)] 22 | to_filter = tables[:2] 23 | pg_mig = PGMigrate( 24 | source_conn_info=source.super_conn_info(), 25 | target_conn_info=target.super_conn_info(), 26 | verbose=True, 27 | with_tables=None if skip else to_filter, 28 | skip_tables=to_filter if skip else None, 29 | replicate_extensions=with_extension, 30 | ) 31 | for t in tables: 32 | pg_mig.source.c(f"CREATE TABLE {t} (foo int)", return_rows=0, dbname=db_name) 33 | pg_mig.source.c("CREATE EXTENSION postgis CASCADE", return_rows=0, dbname=db_name) 34 | # pylint: disable=protected-access 35 | pg_mig.source._set_db(dbname=db_name) 36 | # pylint: enable=protected-access 37 | db = pg_mig.source.databases[db_name] 38 | # sanity 39 | assert db.tables is not None 40 | filtered_names = pg_mig.filter_tables(db) 41 | for name in to_filter: 42 | if skip: 43 | assert f"public.{name}" not in filtered_names, filtered_names 44 | else: 45 | assert f"public.{name}" in filtered_names, filtered_names 46 | if with_extension and skip: 47 | assert "public.spatial_ref_sys" in filtered_names, filtered_names 48 | else: 49 | assert "public.spatial_ref_sys" not in filtered_names, filtered_names 50 | 51 | pg_mig = PGMigrate( 52 | source_conn_info=source.super_conn_info(), 53 | target_conn_info=target.super_conn_info(), 54 | verbose=True, 55 | with_tables=None, 56 | skip_tables=None, 57 | replicate_extensions=with_extension, 58 | ) 59 | for t in tables: 60 | pg_mig.source.c(f"CREATE TABLE {t} (foo int)", return_rows=0, dbname=other_db_name) 61 | pg_mig.source.c("CREATE EXTENSION postgis CASCADE", return_rows=0, dbname=other_db_name) 62 | # pylint: disable=protected-access 63 | pg_mig.source._set_db(dbname=other_db_name) 64 | # pylint: enable=protected-access 65 | db = pg_mig.source.databases[other_db_name] 66 | filtered_names = pg_mig.filter_tables(db) 67 | if with_extension: 68 | assert not filtered_names, filtered_names 69 | else: 70 | assert set(filtered_names) == {f"public.{t}" for t in tables}, filtered_names 71 | 72 | 73 | @pytest.mark.parametrize(["skip", "with_db", "with_schema"], [ 74 | [True, False, False], 75 | [False, False, False], 76 | [True, False, True], 77 | [False, False, True], 78 | [True, True, True], 79 | [False, True, True], 80 | ]) 81 | def test_table_filtering( 82 | pg_source_and_target: Tuple[PGRunner, PGRunner], skip: bool, with_db: bool, with_schema: bool 83 | ) -> None: 84 | source, target = pg_source_and_target 85 | db_name = random_string(6) 86 | other_db_name = random_string(6) 87 | schema_name = "schema" if with_schema else "public" 88 | source.create_db(dbname=db_name) 89 | if with_db: 90 | source.create_db(dbname=other_db_name) 91 | tables_names = [f'"ta .\'ble{i}"' for i in range(4)] 92 | tables = [f"{schema_name}.{t}" for t in tables_names] 93 | if with_schema and with_db: 94 | tables = [f"{db_name}.{schema_name}.{t}" for t in tables_names] 95 | pg_mig = PGMigrate( 96 | source_conn_info=source.conn_info(), 97 | target_conn_info=target.conn_info(), 98 | verbose=True, 99 | with_tables=None if skip else tables, 100 | skip_tables=tables if skip else None, 101 | replicate_extensions=True, 102 | ) 103 | if with_schema: 104 | pg_mig.source.c(f"CREATE SCHEMA {schema_name}", return_rows=0, dbname=db_name) 105 | if with_db: 106 | pg_mig.source.c(f"CREATE SCHEMA {schema_name}", return_rows=0, dbname=other_db_name) 107 | # create all tables 108 | for t in tables_names: 109 | if with_db: 110 | pg_mig.source.c(f"CREATE TABLE {schema_name}.{t} (foo int)", return_rows=0, dbname=other_db_name) 111 | if with_schema: 112 | pg_mig.source.c(f"CREATE TABLE {schema_name}.{t} (foo int)", return_rows=0, dbname=db_name) 113 | else: 114 | pg_mig.source.c(f"CREATE TABLE {t} (foo int)", return_rows=0, dbname=db_name) 115 | # pylint: disable=protected-access 116 | pg_mig.source._set_db(dbname=db_name) 117 | other_db = None 118 | if with_db: 119 | pg_mig.source._set_db(dbname=other_db_name) 120 | other_db = pg_mig.source.databases[other_db_name] 121 | # pylint: enable=protected-access 122 | db = pg_mig.source.databases[db_name] 123 | # sanity 124 | assert db.tables is not None 125 | assert len(db.tables) == len(tables) 126 | 127 | filtered = pg_mig.filter_tables(db) 128 | if skip: 129 | assert not filtered 130 | else: 131 | assert len(filtered) == len(tables) 132 | if not with_db: 133 | assert set(filtered) == set(tables) 134 | # with a db there the strings do not match 135 | if other_db and not skip: 136 | # skip means comparison is reversed 137 | no_match_filter = pg_mig.filter_tables(other_db) 138 | assert not no_match_filter 139 | 140 | 141 | @pytest.mark.parametrize("superuser", [True, False]) 142 | @pytest.mark.parametrize("extras", [True, False]) 143 | def test_replicate_filter_with(pg_source_and_target: Tuple[PGRunner, PGRunner], superuser: bool, extras: bool) -> None: 144 | source, target = pg_source_and_target 145 | db_name = random_string(6) 146 | other_db_name = random_string(6) 147 | for db in [db_name, other_db_name]: 148 | for runner in [source, target]: 149 | runner.create_db(dbname=db) 150 | if extras: 151 | runner.create_extension( 152 | extname="aiven_extras", 153 | dbname=db, 154 | grantee=runner.superuser if superuser else runner.testuser, 155 | ) 156 | runner.have_aiven_extras(dbname=db, grantee=runner.superuser if superuser else runner.testuser) 157 | table_names = [f'"ta .\'ble{i}"' for i in range(3)] 158 | for db in [db_name, other_db_name]: 159 | with source.cursor(dbname=db) as c: 160 | for t in table_names: 161 | c.execute(f"CREATE TABLE {t} (foo INT)") 162 | c.execute(f"INSERT INTO {t} (foo) VALUES (1), (2), (3)") 163 | 164 | only_tables = [ 165 | f'{db_name}.public."ta .\'ble0"', 166 | f'{db_name}.public."ta .\'ble1"', 167 | f'{other_db_name}.public."ta .\'ble2"', 168 | ] 169 | pg_mig = PGMigrate( 170 | source_conn_info=source.super_conn_info() if superuser else source.conn_info(), 171 | target_conn_info=target.super_conn_info() if superuser else target.conn_info(), 172 | verbose=False, 173 | with_tables=only_tables, 174 | createdb=True, 175 | replicate_extensions=True, 176 | ) 177 | try: 178 | result = pg_mig.migrate() 179 | for db in {db_name, other_db_name}: 180 | assert db in set(pg_mig.target.databases.keys()) 181 | assert db in result.pg_databases 182 | if extras or superuser: 183 | assert result.pg_databases[db]["method"] == "replication", result.pg_databases[db] 184 | else: 185 | assert result.pg_databases[db]["method"] == "dump", result.pg_databases[db] 186 | matched_tables: Dict[str, Set] = {db_name: set(), other_db_name: set()} 187 | desired = {db_name: {'"ta .\'ble0"', '"ta .\'ble1"'}, other_db_name: {'"ta .\'ble2"'}} 188 | timer = Timer(timeout=30, sleep=1, what="Waiting for data to replicate") 189 | while timer.loop(): 190 | if desired == matched_tables: 191 | break 192 | for db, tables in desired.items(): 193 | if matched_tables[db] == tables: 194 | continue 195 | for t in tables: 196 | if t in matched_tables[db]: 197 | continue 198 | count = pg_mig.target.c(f"SELECT COUNT(1) FROM {t}", dbname=db)[0] 199 | if count["count"] == 3: 200 | matched_tables[db].add(t) 201 | desired = {other_db_name: {'"ta .\'ble0"', '"ta .\'ble1"'}, db_name: {'"ta .\'ble2"'}} 202 | for db, tables in desired.items(): 203 | for t in tables: 204 | count = pg_mig.target.c(f"SELECT COUNT(1) FROM {t}", dbname=db)[0] 205 | assert count["count"] == 0, count 206 | 207 | finally: 208 | for db in [db_name, other_db_name, "postgres"]: 209 | try: 210 | with target.cursor(username=target.superuser, dbname=db, autocommit=True) as cur: 211 | cur.execute(f"ALTER SUBSCRIPTION aiven_db_migrate_{db}_sub DISABLE") 212 | cur.execute(f"DROP SUBSCRIPTION IF EXISTS aiven_db_migrate_{db}_sub CASCADE") 213 | except psycopg2.Error: 214 | pass 215 | try: 216 | pg_mig.source.cleanup(dbname=db) 217 | except: # pylint: disable=bare-except 218 | pass 219 | -------------------------------------------------------------------------------- /test/test_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Aiven, Helsinki, Finland. https://aiven.io/ 2 | 3 | from aiven_db_migrate.migrate.pgutils import find_pgbin_dir, validate_pg_identifier_length 4 | from pathlib import Path 5 | from test.utils import random_string 6 | 7 | import pytest 8 | import tempfile 9 | 10 | 11 | def test_pgbin_dir_exists_for_supported_versions(): 12 | for pgversion in ("13", "14", "15", "16", "17"): 13 | find_pgbin_dir(pgversion) 14 | for pgversion in ("12345", "12345.6"): 15 | with pytest.raises(ValueError, match=f"Couldn't find bin dir for pg version '{pgversion}'.*"): 16 | find_pgbin_dir(pgversion) 17 | 18 | 19 | @pytest.mark.parametrize("pg_dir", ["pgsql-12", "pgsql-12.4", "lib/postgresql/12", "lib/postgresql/12.4"]) 20 | def test_find_pgbin_dir(pg_dir): 21 | with tempfile.TemporaryDirectory() as temp_dir: 22 | usr_dir = Path(temp_dir) 23 | pgbin_12 = usr_dir / pg_dir / "bin" 24 | pgbin_12.mkdir(parents=True) 25 | with pytest.raises(ValueError): 26 | find_pgbin_dir("10", usr_dir=usr_dir) 27 | with pytest.raises(ValueError): 28 | find_pgbin_dir("10", max_pgversion="11", usr_dir=usr_dir) 29 | assert find_pgbin_dir("10", max_pgversion="12", usr_dir=usr_dir) == pgbin_12 30 | assert find_pgbin_dir("10", max_pgversion="13", usr_dir=usr_dir) == pgbin_12 31 | assert find_pgbin_dir("12", usr_dir=usr_dir) == pgbin_12 32 | assert find_pgbin_dir("12", max_pgversion="13", usr_dir=usr_dir) == pgbin_12 33 | assert find_pgbin_dir("12.2", usr_dir=usr_dir) == pgbin_12 34 | assert find_pgbin_dir("12.2", max_pgversion="13", usr_dir=usr_dir) == pgbin_12 35 | with pytest.raises(ValueError): 36 | assert find_pgbin_dir("13", usr_dir=usr_dir) 37 | with pytest.raises(ValueError): 38 | assert find_pgbin_dir("13", max_pgversion="14", usr_dir=usr_dir) 39 | 40 | 41 | def test_find_pgbin_dir_prefers_oldest(): 42 | with tempfile.TemporaryDirectory() as temp_dir: 43 | usr_dir = Path(temp_dir) 44 | pgbin_12 = usr_dir / "pgsql-12/bin" 45 | pgbin_12.mkdir(parents=True) 46 | pgbin_13 = usr_dir / "pgsql-13/bin" 47 | pgbin_13.mkdir(parents=True) 48 | assert find_pgbin_dir("10", max_pgversion="13", usr_dir=usr_dir) == pgbin_12 49 | assert find_pgbin_dir("11", max_pgversion="13", usr_dir=usr_dir) == pgbin_12 50 | assert find_pgbin_dir("12", max_pgversion="13", usr_dir=usr_dir) == pgbin_12 51 | 52 | 53 | def test_validate_pg_identifier_length(): 54 | validate_pg_identifier_length(random_string(length=63)) 55 | ident = random_string(length=64) 56 | with pytest.raises(ValueError) as err: 57 | validate_pg_identifier_length(ident) 58 | assert str(err.value) == f"PostgreSQL max identifier length is 63, len('{ident}') = 64" 59 | -------------------------------------------------------------------------------- /test/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Aiven, Helsinki, Finland. https://aiven.io/ 2 | from __future__ import annotations 3 | 4 | from aiven_db_migrate.migrate.pgmigrate import PGTarget 5 | from aiven_db_migrate.migrate.pgutils import find_pgbin_dir 6 | from contextlib import contextmanager 7 | from datetime import datetime 8 | from packaging.version import Version 9 | from pathlib import Path 10 | from psycopg2._psycopg import connection 11 | from psycopg2.extras import RealDictCursor 12 | from typing import Any, Callable, Dict, Iterator, List, Tuple 13 | 14 | import datetime 15 | import logging 16 | import psycopg2 17 | import random 18 | import re 19 | import string 20 | import subprocess 21 | import threading 22 | import time 23 | 24 | SUPPORTED_PG_VERSIONS = ["13", "14", "15", "16", "17"] 25 | 26 | 27 | def random_string(length=20): 28 | return "".join(random.choices(string.ascii_lowercase, k=length)) 29 | 30 | 31 | class Timeout(Exception): 32 | """Timeout""" 33 | 34 | 35 | class TimerBase: 36 | def __init__(self): 37 | self._start = self.now() 38 | 39 | @staticmethod 40 | def now(): 41 | return time.monotonic() 42 | 43 | def start_time(self): 44 | return self._start 45 | 46 | def reset(self): 47 | self._start = self.now() 48 | 49 | def elapsed(self): 50 | """Return seconds since starting timer""" 51 | return self.now() - self._start 52 | 53 | def elapsed_absolute(self): 54 | """Return timestamp for starting timer""" 55 | return datetime.datetime.now(tz=datetime.timezone.utc) - datetime.timedelta(seconds=self.elapsed()) 56 | 57 | 58 | class Timer(TimerBase): 59 | """Multipurpose timer""" 60 | def __init__(self, *, timeout=None, sleep=1.0, what=None): 61 | super().__init__() 62 | self._what = what or "operation to complete" 63 | self._timeout = timeout 64 | self._sleep = sleep 65 | self._next_sleep_value = self._calculate_next_sleep_value() 66 | self._iters = 0 67 | self._last_sleep_start = self._start 68 | self._event = threading.Event() 69 | 70 | def get_timeout_value(self): 71 | return self._timeout 72 | 73 | def reset(self): 74 | super().reset() 75 | self._iters = 0 76 | 77 | def loop(self, *, raise_timeout=True, log=None): 78 | """Helper function to implement waiting loops like: 79 | timer = Timer(sleep=5, timeout=60) 80 | while timer.loop(): 81 | if x: 82 | break 83 | which sleeps on every iteration after the first and raises an error on timeout 84 | """ 85 | self._iters += 1 86 | if self._iters == 1: 87 | return True 88 | 89 | if self.timeout(raise_timeout=raise_timeout): 90 | return False # timed out 91 | 92 | # Wait a bit and keep going 93 | if log: 94 | log.info("Waiting for %s, %.2fs elapsed", self._what, self.elapsed()) 95 | self.sleep() 96 | return True 97 | 98 | def timeout(self, raise_timeout=False): 99 | """Return True if we are past the timeout moment""" 100 | if self._timeout is None: 101 | return False # never timeout 102 | 103 | timeout_occurred = self.elapsed() >= self._timeout 104 | if raise_timeout and timeout_occurred: 105 | msg = "Timeout waiting for {} ({:.2f} seconds)".format(self._what, self._timeout) 106 | if isinstance(raise_timeout, Exception): 107 | raise Timeout(msg) from raise_timeout 108 | raise Timeout(msg) 109 | 110 | return timeout_occurred 111 | 112 | def time_to_timeout(self): 113 | """Return time until timer will timeout. 114 | 115 | <0 when timeout is already passed. 116 | Use .timeout() instead if you want to check whether timer has expired. 117 | """ 118 | if self._timeout is None: 119 | # This is timer counting upwards, calling this method does not make much sense 120 | return None 121 | return self._timeout - self.elapsed() 122 | 123 | def next_sleep_length(self): 124 | """Return length of the next sleep in seconds""" 125 | sleep_time = self._next_sleep_value - min(self.now() - self._last_sleep_start, 0) 126 | if self._timeout is not None: 127 | # never sleep past timeout deadline 128 | sleep_time = min(sleep_time, (self.start_time() + self._timeout) - self.now()) 129 | 130 | return max(sleep_time, 0.0) 131 | 132 | def interrupt(self): 133 | """Make a possible sleep() return immediately""" 134 | self._event.set() 135 | 136 | def is_interrupted(self): 137 | """Returns True if the timer has been interrupted and next call to sleep() will return immediately""" 138 | return self._event.is_set() 139 | 140 | def set_expired(self): 141 | """Set timer to timed out""" 142 | if self._timeout is not None: 143 | self._start = self.now() - self._timeout 144 | 145 | def sleep(self): 146 | """ 147 | Sleep until next attempt should be performed or we are interrupted 148 | 149 | Attempt to synchronize exiting this method every 'sleep' interval, 150 | i.e. time spent outside this method is taken into account. 151 | """ 152 | 153 | sleep_time = self.next_sleep_length() 154 | self._next_sleep_value = self._calculate_next_sleep_value() 155 | if sleep_time > 0.0: 156 | # only sleep if not long enough time was spent between iterations outside this function 157 | self._last_sleep_start = self.now() 158 | if self._event.wait(timeout=self.next_sleep_length()): 159 | self._event.clear() 160 | 161 | def _calculate_next_sleep_value(self): 162 | if not isinstance(self._sleep, tuple): 163 | return self._sleep 164 | return random.randrange(*self._sleep) 165 | 166 | 167 | class PGRunner: 168 | pgbin: Path 169 | pgdata: Path 170 | pgport: int 171 | pgversion: str 172 | aiven_extras_available: bool = False 173 | defaultdb: str = "postgres" 174 | superuser: str = "postgres" 175 | testuser: str = "testuser" 176 | cleanups: List[Callable] 177 | 178 | def __init__(self, *, pgversion: str, pgdata: Path, pgport: int = 5432): 179 | self.log = logging.getLogger(self.__class__.__name__) 180 | self.pgbin = find_pgbin_dir(pgversion) 181 | self.pgversion = pgversion 182 | self.pgport = pgport 183 | self.pgdata = pgdata 184 | self.cleanups = list() 185 | 186 | def init(self) -> PGRunner: 187 | self.log.info("Initializing pg %s in %r", self.pgversion, self.pgdata) 188 | cmd = ( 189 | self.pgbin / "pg_ctl", 190 | "init", 191 | "-D", 192 | self.pgdata, 193 | "-o", 194 | "--encoding utf-8", 195 | ) 196 | subprocess.run(cmd, check=True, env={"TZ": "UTC"}) 197 | return self 198 | 199 | def make_conf(self, **kwargs) -> PGRunner: 200 | with open(self.pgdata / "postgresql.conf", "r+") as fp: 201 | lines = fp.read().splitlines() 202 | fp.seek(0) 203 | fp.truncate() 204 | config = {} 205 | for line in lines: 206 | line = line.strip() 207 | if not line or line.startswith("#"): 208 | continue 209 | key, val = re.split(r"\s*=\s*", line, 1) 210 | config[key] = re.sub(r"\s*(#.*)?$", "", val) 211 | config.update( 212 | # disable fsync and synchronous_commit to speed up the tests a bit 213 | fsync="off", 214 | synchronous_commit="off", 215 | # synchronous_commit="local", 216 | # don't need to wait for autovacuum workers when shutting down 217 | autovacuum="off", 218 | # extensions whitelisting, https://github.com/dimitri/pgextwlist 219 | session_preload_libraries="'pgextwlist'", 220 | ) 221 | config.update(kwargs) 222 | lines = ["{} = {}\n".format(key, val) for key, val in sorted(config.items())] 223 | fp.write("".join(lines)) 224 | return self 225 | 226 | def make_hba_conf(self, *, dbname: str, user: str, auth: str) -> PGRunner: 227 | with open(self.pgdata / "pg_hba.conf", "r+") as fp: 228 | lines = fp.read().splitlines() 229 | fp.seek(0) 230 | fp.truncate() 231 | lines = ["{}\n".format(line.strip()) for line in lines if line and not line.startswith("#")] 232 | lines.insert(0, f"local\t{dbname}\t{user}\t{auth}\n") 233 | fp.write("".join(lines)) 234 | return self 235 | 236 | def start(self, timeout: int = 10): 237 | self.log.info("Starting pg %s in %r", self.pgversion, self.pgdata) 238 | cmd: Tuple 239 | cmd = ( 240 | self.pgbin / "pg_ctl", 241 | "start", 242 | "-D", 243 | self.pgdata, 244 | "-o", 245 | f"-k {self.pgdata} -p {self.pgport} -c listen_addresses=", 246 | ) 247 | subprocess.run(cmd, check=True) 248 | # wait until ready to accept connections 249 | cmd = ( 250 | self.pgbin / "pg_isready", 251 | "-h", 252 | self.pgdata, 253 | "-p", 254 | str(self.pgport), 255 | ) 256 | timer = Timer(timeout=timeout, what=f"pg {self.pgversion} is ready") 257 | while timer.loop(log=self.log): 258 | p = subprocess.run(cmd, check=False) 259 | if p.returncode == 0: 260 | break 261 | 262 | def reload(self): 263 | """Re-read configuration files (postgresql.conf, pg_hba.conf, etc.)""" 264 | cmd = ( 265 | self.pgbin / "pg_ctl", 266 | "reload", 267 | "-D", 268 | self.pgdata, 269 | ) 270 | subprocess.run(cmd, check=True) 271 | 272 | def stop(self, timeout: int = 10): 273 | self.log.info("Stopping pg %s in %r", self.pgversion, self.pgdata) 274 | cmd = ( 275 | self.pgbin / "pg_ctl", 276 | "stop", 277 | "-D", 278 | self.pgdata, 279 | "-m", 280 | "smart", 281 | "-t", 282 | str(timeout), 283 | ) 284 | subprocess.run(cmd, check=True) 285 | 286 | def create_superuser(self, username: str = None): 287 | if username is None: 288 | username = self.superuser 289 | cmd = ( 290 | self.pgbin / "createuser", 291 | "-h", 292 | self.pgdata, 293 | "-p", 294 | str(self.pgport), 295 | "--superuser", 296 | username, 297 | ) 298 | subprocess.run(cmd, check=True) 299 | 300 | def create_user(self, *, username: str, createdb: bool = True, createrole: bool = True, replication: bool = True): 301 | cmd = ( 302 | self.pgbin / "createuser", 303 | "-h", 304 | self.pgdata, 305 | "-p", 306 | str(self.pgport), 307 | "--createdb" if createdb else "--no-createdb", 308 | "--createrole" if createrole else "--no-createrole", 309 | "--replication" if replication else "--no-replication", 310 | username, 311 | ) 312 | subprocess.run(cmd, check=True) 313 | 314 | def create_role( 315 | self, 316 | *, 317 | username: str, 318 | password: str = None, 319 | superuser: bool = False, 320 | createdb: bool = True, 321 | createrole: bool = True, 322 | inherit: bool = True, 323 | login: bool = True, 324 | replication: bool = False, 325 | bypassrls: bool = False, 326 | connlimit: int = -1, 327 | validuntil: datetime = None, 328 | **kwargs, 329 | ): 330 | if login: 331 | assert password, "Password must be set for roles with login" 332 | sql = "CREATE ROLE {} WITH {} {} {} {} {} {} {}".format( 333 | username, 334 | "SUPERUSER" if superuser else "NOSUPERUSER", 335 | "CREATEDB" if createdb else "NOCREATEDB", 336 | "CREATEROLE" if createrole else "NOCREATEROLE", 337 | "INHERIT" if inherit else "NOINHERIT", 338 | "LOGIN" if login else "NOLOGIN", 339 | "REPLICATION" if replication else "NOREPLICATION", 340 | "BYPASSRLS" if bypassrls else "NOBYPASSRLS", 341 | ) 342 | params: List[Any] = [] 343 | if connlimit != -1: 344 | sql += " CONNECTION LIMIT %s" 345 | params.append(connlimit) 346 | if password: 347 | sql += " PASSWORD %s" 348 | params.append(password) 349 | if validuntil: 350 | sql += f" VALID UNTIL '{validuntil}'" 351 | config: Dict[str, Any] = {} 352 | config.update(kwargs) 353 | with self.cursor(username=self.superuser) as cur: 354 | cur.execute(sql, params) 355 | for key, value in config.items(): 356 | cur.execute(f"ALTER ROLE {username} SET {key} = %s", (value, )) 357 | self.log.info("Created role %r in %r", username, self.pgdata) 358 | 359 | def create_db(self, *, dbname: str, owner: str = None): 360 | if owner is None: 361 | owner = self.testuser 362 | cmd = ( 363 | self.pgbin / "createdb", 364 | "-h", 365 | self.pgdata, 366 | "-p", 367 | str(self.pgport), 368 | "--owner", 369 | owner, 370 | dbname, 371 | ) 372 | subprocess.run(cmd, check=True) 373 | 374 | def conn_info(self, *, username: str = None, dbname: str = None) -> Dict[str, Any]: 375 | if username is None: 376 | username = self.testuser 377 | if dbname is None: 378 | dbname = self.defaultdb 379 | return { 380 | "dbname": dbname, 381 | "host": self.pgdata, 382 | "port": self.pgport, 383 | "user": username, 384 | } 385 | 386 | def super_conn_info(self, *, dbname: str = None) -> Dict[str, Any]: 387 | return self.conn_info(username=self.superuser, dbname=dbname) 388 | 389 | @contextmanager 390 | def cursor(self, *, username: str = None, dbname: str = None, autocommit: bool = True) -> RealDictCursor: 391 | with self.connection(username=username, dbname=dbname) as conn: 392 | conn.autocommit = autocommit 393 | yield conn.cursor(cursor_factory=RealDictCursor) 394 | 395 | @contextmanager 396 | def connection( 397 | self, 398 | *, 399 | username: str | None = None, 400 | dbname: str | None = None, 401 | connection_factory: type[connection] | None = None 402 | ) -> psycopg2.extensions.connection: 403 | conn = None 404 | try: 405 | conn = psycopg2.connect( 406 | **self.conn_info(username=username, dbname=dbname), connection_factory=connection_factory 407 | ) 408 | yield conn 409 | finally: 410 | if conn is not None: 411 | conn.close() 412 | 413 | def drop_db(self, *, dbname: str): 414 | self.log.info("Dropping database %r from %r", dbname, self.pgdata) 415 | cmd = ( 416 | self.pgbin / "dropdb", 417 | "-h", 418 | self.pgdata, 419 | "-p", 420 | str(self.pgport), 421 | dbname, 422 | ) 423 | subprocess.run(cmd, check=True) 424 | 425 | def get_all_db_names(self) -> list[str]: 426 | with self.cursor(username=self.superuser) as cur: 427 | cur.execute( 428 | "SELECT datname from pg_catalog.pg_database WHERE NOT datistemplate AND datname <> %s", (self.defaultdb, ) 429 | ) 430 | dbs = cur.fetchall() 431 | 432 | return [db["datname"] for db in dbs] 433 | 434 | def drop_dbs(self): 435 | for db_name in self.get_all_db_names(): 436 | self.drop_db(dbname=db_name) 437 | 438 | def drop_user(self, *, username): 439 | self.log.info("Dropping user %r from %r", username, self.pgdata) 440 | cmd = ( 441 | self.pgbin / "dropuser", 442 | "-h", 443 | self.pgdata, 444 | "-p", 445 | str(self.pgport), 446 | username, 447 | ) 448 | subprocess.run(cmd, check=True) 449 | 450 | def create_extension(self, *, extname: str, extversion: str = None, dbname: str, grantee: str = None): 451 | if grantee is None: 452 | grantee = self.testuser 453 | sql = f"CREATE EXTENSION IF NOT EXISTS {extname}" 454 | if extversion: 455 | sql += f" WITH VERSION '{extversion}'" 456 | if Version(self.pgversion) > Version("9.5"): 457 | sql += " CASCADE" 458 | try: 459 | with self.cursor(username=self.superuser, dbname=dbname) as cur: 460 | cur.execute(sql) 461 | # wait until extension is installed 462 | timer = Timer(timeout=10, what=f"{extname} is installed") 463 | while timer.loop(log=self.log): 464 | cur.execute("SELECT * FROM pg_catalog.pg_extension WHERE extname = %s", (extname, )) 465 | ext = cur.fetchone() 466 | if ext: 467 | self.log.info( 468 | "Installed extension %r to db %r in %r, version: %s", ext["extname"], dbname, self.pgdata, 469 | ext["extversion"] 470 | ) 471 | break 472 | except psycopg2.OperationalError as err: 473 | assert "No such file or directory" in str(err) 474 | # extension not available 475 | return False 476 | else: 477 | return True 478 | 479 | def drop_extension(self, *, extname: str, dbname: str): 480 | sql = f"DROP EXTENSION IF EXISTS {extname}" 481 | if Version(self.pgversion) > Version("9.5"): 482 | sql += " CASCADE" 483 | with self.cursor(username=self.superuser, dbname=dbname) as cur: 484 | cur.execute(sql) 485 | 486 | def have_aiven_extras(self, *, dbname: str, grantee: str = None): 487 | if grantee is None: 488 | grantee = self.testuser 489 | extname = "aiven_extras" 490 | if self.create_extension(extname=extname, dbname=dbname, grantee=grantee): 491 | # grant schema usage and select on view 492 | with self.cursor(username=self.superuser, dbname=dbname) as cur: 493 | cur.execute(f"GRANT USAGE ON SCHEMA {extname} TO {grantee}") 494 | cur.execute(f"GRANT SELECT ON aiven_extras.pg_stat_replication TO {grantee}") 495 | return True 496 | return False 497 | 498 | def drop_aiven_extras(self, *, dbname: str): 499 | extname = "aiven_extras" 500 | self.drop_extension(extname=extname, dbname=dbname) 501 | 502 | def list_pubs(self, *, dbname: str): 503 | """Get all publications created in the database""" 504 | with self.cursor(dbname=dbname) as cur: 505 | cur.execute("SELECT * FROM pg_catalog.pg_publication") 506 | return cur.fetchall() 507 | 508 | def list_slots(self): 509 | """Get all replication slots that currently exist on the database cluster""" 510 | with self.cursor() as cur: 511 | cur.execute("SELECT * FROM pg_catalog.pg_replication_slots") 512 | return cur.fetchall() 513 | 514 | def list_subs(self): 515 | """Get all existing logical replication subscriptions across all databases of a cluster""" 516 | # requires superuser 517 | with self.cursor(username=self.superuser) as cur: 518 | cur.execute("SELECT * FROM pg_catalog.pg_subscription") 519 | return cur.fetchall() 520 | 521 | def list_roles(self): 522 | with self.cursor() as cur: 523 | cur.execute("SELECT * FROM pg_catalog.pg_roles") 524 | return cur.fetchall() 525 | 526 | def add_cleanup(self, cleanup: Callable): 527 | self.cleanups.append(cleanup) 528 | 529 | 530 | @contextmanager 531 | def modify_pg_security_agent_reserved_roles(target: PGRunner) -> Iterator[str]: 532 | """Modify the list of reserved roles of the target database. 533 | 534 | Returns: 535 | The name of the superuser role that was added to the list of reserved roles. 536 | """ 537 | authorized_roles = PGTarget(conn_info=target.super_conn_info()).get_security_agent_reserved_roles() 538 | superuser = random_string() 539 | authorized_roles_str = ",".join(authorized_roles) 540 | modified_authorized_roles_str = ",".join(authorized_roles + [superuser]) 541 | 542 | target.make_conf(**{"aiven.pg_security_agent_reserved_roles": f"'{modified_authorized_roles_str}'"}).stop() 543 | target.start() 544 | 545 | try: 546 | yield superuser 547 | finally: 548 | target.make_conf(**{"aiven.pg_security_agent_reserved_roles": f"'{authorized_roles_str}'"}).stop() 549 | target.start() 550 | --------------------------------------------------------------------------------