├── .flake8 ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── dependabot.yml └── workflows │ ├── ci.yml │ ├── cla.yml │ ├── constraints.txt │ ├── release-please.yml │ └── release.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .prettierignore ├── CHANGELOG.md ├── CODEOWNERS ├── CONTRIBUTING.md ├── DISCLAIMER.md ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── code-of-conduct.md ├── configs ├── README.md └── datacenter-name │ └── database-name │ └── config.json ├── docker-compose.yml ├── docs ├── extended_knowledge.md ├── playbook.md ├── quickstart.md └── usage.md ├── local_dev_scripts ├── README.md └── generate_large_test_data.py ├── pgbelt.png ├── pgbelt ├── __init__.py ├── cmd │ ├── __init__.py │ ├── convenience.py │ ├── helpers.py │ ├── login.py │ ├── preflight.py │ ├── schema.py │ ├── setup.py │ ├── status.py │ ├── sync.py │ └── teardown.py ├── config │ ├── __init__.py │ ├── config.py │ ├── models.py │ └── remote.py ├── main.py └── util │ ├── __init__.py │ ├── asyncfuncs.py │ ├── dump.py │ ├── logs.py │ ├── pglogical.py │ └── postgres.py ├── poetry.lock ├── pyproject.toml └── tests ├── __init__.py ├── integration ├── __init__.py ├── conftest.py ├── files │ ├── postgres13-pglogical-docker │ │ └── Dockerfile │ └── test_schema_data.sql └── test_integration.py └── pgbelt ├── __init__.py ├── cmd ├── __init__.py ├── conftest.py └── test_convenience.py └── config ├── __init__.py ├── conftest.py └── test_pass.py /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = docs/* tests/* *__init__.py 3 | # TODO: Eventually want to address D and DAR. 4 | # S608 NEED to address, but releasing for now. 5 | # B008 is false flagging for asyncpg stuff that's valid. 6 | # RST201 complaining about something not even true. 7 | ignore = E501, W503, D, DAR, B008, RST201, S608 8 | per-file-ignores = 9 | pgbelt/cmd/preflight.py: RST203, RST301, RST401 10 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. Linux, OSX, Windows] 28 | - pgbelt Version [e.g. 1.0.0] 29 | - python Version [e.g. 3.9.11] 30 | - python version(s) 31 | - pglogical version 32 | 33 | **Additional context** 34 | Add any other context about the problem here. 35 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | # this updates github actions versions 4 | - package-ecosystem: github-actions 5 | directory: "/" 6 | schedule: 7 | interval: daily 8 | # keep .githug/workflows/constraints.yml up to date 9 | - package-ecosystem: pip 10 | directory: "/.github/workflows" 11 | schedule: 12 | interval: daily 13 | # pip means poetry in this case, this keeps poetry.lock up to date 14 | - package-ecosystem: pip 15 | directory: "/" 16 | schedule: 17 | interval: daily 18 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: 'CI' 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | workflow_dispatch: 9 | 10 | jobs: 11 | pgbelt_ci: 12 | name: CI 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - name: Checkout Code 17 | uses: actions/checkout@v4.2.2 18 | - name: Set up Docker Buildx 19 | uses: docker/setup-buildx-action@v3 20 | - name: Cache Docker layers 21 | uses: actions/cache@v4 22 | with: 23 | path: /tmp/.buildx-cache 24 | key: ${{ runner.os }}-buildx-${{ github.sha }} 25 | restore-keys: | 26 | ${{ runner.os }}-buildx- 27 | - name: Build pgbelt Image 28 | uses: docker/build-push-action@v6 29 | with: 30 | context: . 31 | push: false 32 | load: true 33 | tags: autodesk/pgbelt:latest 34 | cache-from: type=local,src=/tmp/.buildx-cache 35 | cache-to: type=local,dest=/tmp/.buildx-cache-new 36 | - name: Build Postgres Image with pglogical 37 | uses: docker/build-push-action@v6 38 | with: 39 | context: tests/integration/files/postgres13-pglogical-docker 40 | push: false 41 | load: true 42 | tags: autodesk/postgres-pglogical-docker:13 43 | cache-from: type=local,src=/tmp/.buildx-cache 44 | cache-to: type=local,dest=/tmp/.buildx-cache-new 45 | - name: Run flake8 46 | run: docker compose run flake8 47 | - name: Python Black 48 | run: docker compose run black 49 | - name: Pytest 50 | run: docker compose run tests 51 | # This ugly bit is necessary if you don't want your cache to grow forever 52 | # until it hits GitHub's limit of 5GB. 53 | # Temp fix 54 | # https://github.com/docker/build-push-action/issues/252 55 | # https://github.com/moby/buildkit/issues/1896 56 | - name: Move cache 57 | run: | 58 | rm -rf /tmp/.buildx-cache 59 | mv /tmp/.buildx-cache-new /tmp/.buildx-cache 60 | -------------------------------------------------------------------------------- /.github/workflows/cla.yml: -------------------------------------------------------------------------------- 1 | name: "CLA Assistant" 2 | on: 3 | issue_comment: 4 | types: [created] 5 | pull_request_target: 6 | types: [opened,closed,synchronize] 7 | 8 | jobs: 9 | CLA-Assistant: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: "CLA Assistant" 13 | if: (github.event.comment.body == 'recheck' || github.event.comment.body == 'I have read the CLA Document and I hereby sign the CLA') || github.event_name == 'pull_request_target' 14 | # Beta Release 15 | uses: contributor-assistant/github-action@v2.6.1 16 | env: 17 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 18 | # the below token should have repo scope and must be manually added by you in the repository's secret 19 | PERSONAL_ACCESS_TOKEN : ${{ secrets.CLA_BOT_SECRET }} 20 | with: 21 | remote-repository-name: 'CLA-Signatures' 22 | remote-organization-name: 'Autodesk' 23 | path-to-signatures: 'signatures/Autodesk/Individual/pgbelt.json' 24 | path-to-document: 'https://github.com/Autodesk/CLA-Assistant-Test-Signatures/blob/master/CLA.md' # e.g. a CLA or a DCO document 25 | # branch should not be protected 26 | branch: 'master' 27 | allowlist: user1,bot* 28 | 29 | #below are the optional inputs - If the optional inputs are not given, then default values will be taken 30 | #remote-organization-name: enter the remote organization name where the signatures should be stored (Default is storing the signatures in the same repository) 31 | #remote-repository-name: enter the remote repository name where the signatures should be stored (Default is storing the signatures in the same repository) 32 | #create-file-commit-message: 'For example: Creating file for storing CLA Signatures' 33 | #signed-commit-message: 'For example: $contributorName has signed the CLA in #$pullRequestNo' 34 | custom-notsigned-prcomment: | 35 | Thank you for your submission, we really appreciate it. We ask that you sign our Contributor License Agreement before we can accept your contribution. 36 | 37 | If you are contributing on behalf of your employer you must fill out our **Corporate Contributor License Agreement** which can be found [here](https://github.com/Autodesk/autodesk.github.io/releases/download/1.0/ADSK.Form.Corp.Contrib.Agmt.for.Open.Source.docx). 38 | If you are contributing on behalf of yourself you must agree to our **Individual Contributor License Agreement** by reviewing [this document](https://github.com/Autodesk/autodesk.github.io/releases/download/1.0/ADSK.Form.Ind.Contrib.Agmt.for.Open.Source.docx) and signing it or by replying below a with a comment containing the following text: 39 | #custom-pr-sign-comment: 'The signature to be committed in order to sign the CLA' 40 | #custom-allsigned-prcomment: 'pull request comment when all contributors has signed, defaults to **CLA Assistant Lite bot** All Contributors have signed the CLA.' 41 | #lock-pullrequest-aftermerge: false - if you don't want this bot to automatically lock the pull request after merging (default - true) 42 | #use-dco-flag: true - If you are using DCO instead of CLA 43 | -------------------------------------------------------------------------------- /.github/workflows/constraints.txt: -------------------------------------------------------------------------------- 1 | pip==25.1.1 2 | poetry==2.1.3 3 | virtualenv==20.31.2 4 | poetry-dynamic-versioning==1.8.2 5 | -------------------------------------------------------------------------------- /.github/workflows/release-please.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | name: release-please 6 | jobs: 7 | release-please: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: googleapis/release-please-action@v4 11 | with: 12 | token: ${{ secrets.PGBELT_GHA_VJEEVA_PAT }} 13 | release-type: python 14 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release to pypi 2 | 3 | on: 4 | release: 5 | types: [released] 6 | 7 | jobs: 8 | release: 9 | name: Release 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Check out the repository 13 | uses: actions/checkout@v4.2.2 14 | with: 15 | fetch-depth: 2 16 | - name: Set up Python 17 | uses: actions/setup-python@v5.6.0 18 | with: 19 | python-version: "3.11" 20 | 21 | - name: Upgrade pip 22 | run: | 23 | pip install --constraint=.github/workflows/constraints.txt pip 24 | pip --version 25 | - name: Install Poetry 26 | run: | 27 | pip install --constraint=.github/workflows/constraints.txt poetry poetry-dynamic-versioning 28 | poetry --version 29 | - name: Build package 30 | run: | 31 | poetry build --ansi 32 | - name: Publish package on PyPI 33 | uses: pypa/gh-action-pypi-publish@v1.12.4 34 | with: 35 | user: __token__ 36 | password: ${{ secrets.PGBELT_PYPI_VJEEVA_TOKEN }} 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.swp 3 | *.swo 4 | *.xml 5 | *.egg-info 6 | .venv 7 | .idea/ 8 | venv 9 | .DS_store 10 | .coverage 11 | .vscode 12 | build/* 13 | dist/* 14 | logs/* 15 | configs/testdc/* 16 | schemas/* 17 | tables/* 18 | .python-version 19 | .mypy_cache 20 | __pycache__/ 21 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: local 3 | hooks: 4 | - id: black 5 | name: black 6 | entry: black 7 | language: system 8 | types: [python] 9 | require_serial: true 10 | - id: check-added-large-files 11 | name: Check for added large files 12 | entry: check-added-large-files 13 | language: system 14 | - id: check-toml 15 | name: Check Toml 16 | entry: check-toml 17 | language: system 18 | types: [toml] 19 | - id: check-yaml 20 | name: Check Yaml 21 | entry: check-yaml 22 | language: system 23 | types: [yaml] 24 | - id: end-of-file-fixer 25 | name: Fix End of Files 26 | entry: end-of-file-fixer 27 | language: system 28 | types: [text] 29 | stages: [commit, push, manual] 30 | - id: flake8 31 | name: flake8 32 | entry: flake8 33 | language: system 34 | types: [python3] 35 | exclude: "^(test/*|examples/*|noxfile.py)" 36 | require_serial: true 37 | args: ["--config=.flake8"] 38 | - id: pyupgrade 39 | name: pyupgrade 40 | description: Automatically upgrade syntax for newer versions. 41 | entry: pyupgrade 42 | language: system 43 | types: [python3] 44 | args: [--py39-plus, --keep-runtime-typing] 45 | - id: reorder-python-imports 46 | name: Reorder python imports 47 | entry: reorder-python-imports 48 | language: system 49 | types: [python3] 50 | args: [--application-directories=src] 51 | - id: trailing-whitespace 52 | name: Trim Trailing Whitespace 53 | entry: trailing-whitespace-fixer 54 | language: system 55 | types: [text] 56 | stages: [commit, push, manual] 57 | - repo: https://github.com/pre-commit/mirrors-prettier 58 | rev: v2.5.1 59 | hooks: 60 | - id: prettier 61 | -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | .github/* 2 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [0.8.3](https://github.com/Autodesk/pgbelt/compare/v0.8.2...v0.8.3) (2025-02-03) 4 | 5 | 6 | ### Bug Fixes 7 | 8 | * loggers were overwriting themselves on disk. Separated now [#673](https://github.com/Autodesk/pgbelt/issues/673) ([#674](https://github.com/Autodesk/pgbelt/issues/674)) ([f39c73d](https://github.com/Autodesk/pgbelt/commit/f39c73d5932604300d6859cdcc94562b86a4fc66)) 9 | 10 | 11 | ### Documentation 12 | 13 | * various updates ([#676](https://github.com/Autodesk/pgbelt/issues/676)) ([64f62dd](https://github.com/Autodesk/pgbelt/commit/64f62dd0d33cdb248e1d57d3e4f66cb307962fa2)) 14 | 15 | ## [0.8.2](https://github.com/Autodesk/pgbelt/compare/v0.8.1...v0.8.2) (2025-01-06) 16 | 17 | 18 | ### Bug Fixes 19 | 20 | * support for indexes with quotes in the name [#652](https://github.com/Autodesk/pgbelt/issues/652) ([#653](https://github.com/Autodesk/pgbelt/issues/653)) ([1b7d90c](https://github.com/Autodesk/pgbelt/commit/1b7d90cd13ca2e03d46a8f3c5b95599b35a8cac9)) 21 | 22 | ## [0.8.1](https://github.com/Autodesk/pgbelt/compare/v0.8.0...v0.8.1) (2024-11-26) 23 | 24 | 25 | ### Bug Fixes 26 | 27 | * remove dump-dst-indexes, useless and add 0 statement timeout to creating indexes ([#629](https://github.com/Autodesk/pgbelt/issues/629)) ([06285e9](https://github.com/Autodesk/pgbelt/commit/06285e9da801ac184ab54dee4ae3c49d61e49801)) 28 | * remove useless command (dump_constraints for dst dump...) ([#625](https://github.com/Autodesk/pgbelt/issues/625)) ([8a1f372](https://github.com/Autodesk/pgbelt/commit/8a1f3720d80f28742e9b628172551e5f297b24fb)) 29 | * remove-constraints errors if no NOT VALIDS involved ([#626](https://github.com/Autodesk/pgbelt/issues/626)) ([e3fc113](https://github.com/Autodesk/pgbelt/commit/e3fc11367fac8d4c0c15310a970cf35fe67e2cde)) 30 | * remove-indexes syntax error fix ([#630](https://github.com/Autodesk/pgbelt/issues/630)) ([4f779f9](https://github.com/Autodesk/pgbelt/commit/4f779f93d6fe4c39ce81bfb6f82e6767a174c963)) 31 | 32 | ## [0.8.0](https://github.com/Autodesk/pgbelt/compare/v0.7.11...v0.8.0) (2024-11-19) 33 | 34 | 35 | ### Features 36 | 37 | * run ANALYZE as part of create-indexes ([#612](https://github.com/Autodesk/pgbelt/issues/612)) ([29294ed](https://github.com/Autodesk/pgbelt/commit/29294ed9c4325ea0b5c699eda2398d4b46735e06)) 38 | * run ANALYZE as part of create-indexes [#611](https://github.com/Autodesk/pgbelt/issues/611) ([29294ed](https://github.com/Autodesk/pgbelt/commit/29294ed9c4325ea0b5c699eda2398d4b46735e06)) 39 | 40 | 41 | ### Bug Fixes 42 | 43 | * hide progress when reverse replication is running [#608](https://github.com/Autodesk/pgbelt/issues/608) ([#610](https://github.com/Autodesk/pgbelt/issues/610)) ([8f143a8](https://github.com/Autodesk/pgbelt/commit/8f143a868126ee613b0607659747f14f95b5fd17)) 44 | * run analyze without statement timeout ([#606](https://github.com/Autodesk/pgbelt/issues/606)) ([3bac10c](https://github.com/Autodesk/pgbelt/commit/3bac10c92f662b5c66145cde8245a1ac967130d7)) 45 | * sync command breakdown update and remove useless commands ([#622](https://github.com/Autodesk/pgbelt/issues/622)) ([558a28d](https://github.com/Autodesk/pgbelt/commit/558a28d0af3d577e0c6ecf5b06d431883b38fc2f)) 46 | 47 | ## [0.7.11](https://github.com/Autodesk/pgbelt/compare/v0.7.10...v0.7.11) (2024-09-27) 48 | 49 | 50 | ### Bug Fixes 51 | 52 | * accommodate Decimal NaN != Decimal NaN ([#573](https://github.com/Autodesk/pgbelt/issues/573)) ([3602266](https://github.com/Autodesk/pgbelt/commit/3602266ed1421dfe4efb5dabe8c9303e653f05d1)) 53 | 54 | ## [0.7.10](https://github.com/Autodesk/pgbelt/compare/v0.7.9...v0.7.10) (2024-09-24) 55 | 56 | 57 | ### Bug Fixes 58 | 59 | * add timestamps to logs DBAAS-92 ([#568](https://github.com/Autodesk/pgbelt/issues/568)) ([9c19378](https://github.com/Autodesk/pgbelt/commit/9c1937882343272e0e64887d2f57e9e5afa5eb6c)) 60 | 61 | ## [0.7.9](https://github.com/Autodesk/pgbelt/compare/v0.7.8...v0.7.9) (2024-09-09) 62 | 63 | 64 | ### Bug Fixes 65 | 66 | * support for capitals in column names ([#552](https://github.com/Autodesk/pgbelt/issues/552)) ([d0bea7b](https://github.com/Autodesk/pgbelt/commit/d0bea7b7c9e79664885cd2463d26e599b97472f9)) 67 | 68 | ## [0.7.8](https://github.com/Autodesk/pgbelt/compare/v0.7.7...v0.7.8) (2024-09-09) 69 | 70 | 71 | ### Bug Fixes 72 | 73 | * support for capital sequences and better sequence testing ([#550](https://github.com/Autodesk/pgbelt/issues/550)) ([48915e8](https://github.com/Autodesk/pgbelt/commit/48915e8efc55e9f48bb16fb25bdd1e7a2803cbd0)) 74 | 75 | ## [0.7.7](https://github.com/Autodesk/pgbelt/compare/v0.7.6...v0.7.7) (2024-09-06) 76 | 77 | 78 | ### Bug Fixes 79 | 80 | * support for capital letters in sequence names ([#548](https://github.com/Autodesk/pgbelt/issues/548)) ([885e234](https://github.com/Autodesk/pgbelt/commit/885e234e01becfac3eba2297d4618935760044b4)) 81 | 82 | ## [0.7.6](https://github.com/Autodesk/pgbelt/compare/v0.7.5...v0.7.6) (2024-07-12) 83 | 84 | 85 | ### Bug Fixes 86 | 87 | * support for dbs with no seqs, fixes [#515](https://github.com/Autodesk/pgbelt/issues/515) ([#516](https://github.com/Autodesk/pgbelt/issues/516)) ([5e9fe4c](https://github.com/Autodesk/pgbelt/commit/5e9fe4c8456c1a71f5440cba6e0d04c673d01f2d)) 88 | 89 | ## [0.7.5](https://github.com/Autodesk/pgbelt/compare/v0.7.4...v0.7.5) (2024-07-10) 90 | 91 | 92 | ### Bug Fixes 93 | 94 | * revoke-logins skips owner if owner is same as root [#509](https://github.com/Autodesk/pgbelt/issues/509) ([#510](https://github.com/Autodesk/pgbelt/issues/510)) ([ed7e758](https://github.com/Autodesk/pgbelt/commit/ed7e7588a710be696d857c86e46ec84e850dbff4)) 95 | 96 | 97 | ### Documentation 98 | 99 | * update quickstart and code comments to explain how to teardown properly ([#511](https://github.com/Autodesk/pgbelt/issues/511)) ([9f6a4a5](https://github.com/Autodesk/pgbelt/commit/9f6a4a593c4a20561b0efd5b0f112b7523288a53)) 100 | 101 | ## [0.7.4](https://github.com/Autodesk/pgbelt/compare/v0.7.3...v0.7.4) (2024-04-08) 102 | 103 | 104 | ### Bug Fixes 105 | 106 | * support for password special characters, fixes [#440](https://github.com/Autodesk/pgbelt/issues/440) ([#441](https://github.com/Autodesk/pgbelt/issues/441)) ([71b0f9d](https://github.com/Autodesk/pgbelt/commit/71b0f9d41add54967420c31ad86ac38b10ccdf2a)) 107 | 108 | ## [0.7.3](https://github.com/Autodesk/pgbelt/compare/v0.7.2...v0.7.3) (2024-04-01) 109 | 110 | 111 | ### Bug Fixes 112 | 113 | * actual table capitalization support - integration tests didn't actually have capitals inside the DBs ([99c78b7](https://github.com/Autodesk/pgbelt/commit/99c78b7dfb916ebf04d662c496ba62dc3dba839e)) 114 | * actual table capitalization support ([#436](https://github.com/Autodesk/pgbelt/issues/436)) ([99c78b7](https://github.com/Autodesk/pgbelt/commit/99c78b7dfb916ebf04d662c496ba62dc3dba839e)) 115 | 116 | ## [0.7.2](https://github.com/Autodesk/pgbelt/compare/v0.7.1...v0.7.2) (2024-03-19) 117 | 118 | 119 | ### Bug Fixes 120 | 121 | * support for table names with capitals ([#422](https://github.com/Autodesk/pgbelt/issues/422)) ([4512f2f](https://github.com/Autodesk/pgbelt/commit/4512f2f083cde6208ad344b61bd6e41e96745bf9)) 122 | 123 | ## [0.7.1](https://github.com/Autodesk/pgbelt/compare/v0.7.0...v0.7.1) (2024-03-01) 124 | 125 | 126 | ### Bug Fixes 127 | 128 | * various small fixes found from new integration testing ([#409](https://github.com/Autodesk/pgbelt/issues/409)) ([0309c56](https://github.com/Autodesk/pgbelt/commit/0309c5646c2d58e8d9c41299e4762a02c211f512)) 129 | 130 | ## [0.7.0](https://github.com/Autodesk/pgbelt/compare/v0.6.2...v0.7.0) (2024-02-27) 131 | 132 | 133 | ### Features 134 | 135 | * add percent progress to initializing status view ([#387](https://github.com/Autodesk/pgbelt/issues/387)) ([6139cfc](https://github.com/Autodesk/pgbelt/commit/6139cfc25d8ce4a272adf0587085455f136cdb66)) 136 | * handle exodus-style migrations ([#377](https://github.com/Autodesk/pgbelt/issues/377)) ([09c6887](https://github.com/Autodesk/pgbelt/commit/09c68878a4332698393956d5ee166134b84da8af)) 137 | * pgbelt now supports non-public schemas! ([#398](https://github.com/Autodesk/pgbelt/issues/398)) ([1f1fa87](https://github.com/Autodesk/pgbelt/commit/1f1fa87e6f884fd6ae16de7d032d4d79d24929ba)) 138 | * precheck now compares extensions of DBs ([#406](https://github.com/Autodesk/pgbelt/issues/406)) ([b74ce43](https://github.com/Autodesk/pgbelt/commit/b74ce438855ebf611672ccdfa7c0768fd4f6cd58)) 139 | 140 | 141 | ### Bug Fixes 142 | 143 | * sizes in status to should target config dataset instead of whole DB ([#395](https://github.com/Autodesk/pgbelt/issues/395)) ([80bf6ba](https://github.com/Autodesk/pgbelt/commit/80bf6bac8532ea57cd867597203392bf23d78f63)) 144 | 145 | ## [0.6.2](https://github.com/Autodesk/pgbelt/compare/v0.6.1...v0.6.2) (2024-02-02) 146 | 147 | ### Bug Fixes 148 | 149 | - stop using default replication set ([#371](https://github.com/Autodesk/pgbelt/issues/371)) ([cfa6276](https://github.com/Autodesk/pgbelt/commit/cfa62766c43c4f632582730339637be2f5021390)) 150 | 151 | ## [0.6.1](https://github.com/Autodesk/pgbelt/compare/v0.6.0...v0.6.1) (2024-01-22) 152 | 153 | ### Bug Fixes 154 | 155 | - pglogical teardown will now always revoke all from all tables from pglogical role ([#362](https://github.com/Autodesk/pgbelt/issues/362)) ([89e892c](https://github.com/Autodesk/pgbelt/commit/89e892c323ffda4bfbedd4d4a04ec612202cbe81)) 156 | 157 | ## [0.6.0](https://github.com/Autodesk/pgbelt/compare/v0.5.4...v0.6.0) (2023-11-20) 158 | 159 | ### Features 160 | 161 | - refactor Teardown Command due to DROP EXTENSION woes ([#326](https://github.com/Autodesk/pgbelt/issues/326)) ([5de6300](https://github.com/Autodesk/pgbelt/commit/5de6300d597f3e3f45dd03b83c286fcc07b1ced0)) 162 | 163 | ## [0.5.4](https://github.com/Autodesk/pgbelt/compare/v0.5.3...v0.5.4) (2023-11-09) 164 | 165 | ### Bug Fixes 166 | 167 | - bump pydantic from 1.10.13 to 2.4.2 ([#299](https://github.com/Autodesk/pgbelt/issues/299)) ([8597fcc](https://github.com/Autodesk/pgbelt/commit/8597fccca106606dd1513839bc23c170383f3f2b)) 168 | 169 | ## [0.5.3](https://github.com/Autodesk/pgbelt/compare/v0.5.2...v0.5.3) (2023-11-09) 170 | 171 | ### Bug Fixes 172 | 173 | - grant pglogical schema usage [#278](https://github.com/Autodesk/pgbelt/issues/278) ([#318](https://github.com/Autodesk/pgbelt/issues/318)) ([0de9290](https://github.com/Autodesk/pgbelt/commit/0de929074db26e5ca3d47ab9adfbf9bc3d055f0f)) 174 | 175 | ## [0.5.2](https://github.com/Autodesk/pgbelt/compare/v0.5.1...v0.5.2) (2023-10-26) 176 | 177 | ### Bug Fixes 178 | 179 | - remote.py error handling needs to shore up plugin error ([#311](https://github.com/Autodesk/pgbelt/issues/311)) ([b47b07f](https://github.com/Autodesk/pgbelt/commit/b47b07f6705b1c8548b67d17b69e5a29e2de5178)) 180 | 181 | ## [0.5.1](https://github.com/Autodesk/pgbelt/compare/v0.5.0...v0.5.1) (2023-10-06) 182 | 183 | ### Bug Fixes 184 | 185 | - index commands need to surface as CLI commands too ([#275](https://github.com/Autodesk/pgbelt/issues/275)) ([a8d6fae](https://github.com/Autodesk/pgbelt/commit/a8d6faef753d434a6883aac5797c2b0cb952e86a)) 186 | 187 | ## [0.5.0](https://github.com/Autodesk/pgbelt/compare/v0.4.0...v0.5.0) (2023-10-06) 188 | 189 | ### Features 190 | 191 | - stripping indexes and adding them later, and testing ([#273](https://github.com/Autodesk/pgbelt/issues/273)) ([c730e78](https://github.com/Autodesk/pgbelt/commit/c730e7852fdf40001633fd074b5aac4128a48af2)) 192 | 193 | ### Bug Fixes 194 | 195 | - precheck parameter value guidelines updated for [#267](https://github.com/Autodesk/pgbelt/issues/267) ([#268](https://github.com/Autodesk/pgbelt/issues/268)) ([e423929](https://github.com/Autodesk/pgbelt/commit/e42392993da1500ef564c3f6cc287f89531651e0)) 196 | 197 | ## [0.4.0](https://github.com/Autodesk/pgbelt/compare/v0.3.0...v0.4.0) (2023-07-10) 198 | 199 | ### Features 200 | 201 | - ability to not write logs to disk if needed ([#255](https://github.com/Autodesk/pgbelt/issues/255)) ([69469c0](https://github.com/Autodesk/pgbelt/commit/69469c0c16f1891992e7d15f4725369de3162cd7)) 202 | 203 | ## [0.3.0](https://github.com/Autodesk/pgbelt/compare/v0.2.4...v0.3.0) (2023-06-29) 204 | 205 | ### Features 206 | 207 | - check-connectivity command ([#253](https://github.com/Autodesk/pgbelt/issues/253)) ([6ebfff7](https://github.com/Autodesk/pgbelt/commit/6ebfff7a80573a1a33f1c54096fe300786e205c3)) 208 | 209 | ## [0.2.4](https://github.com/Autodesk/pgbelt/compare/v0.2.3...v0.2.4) (2023-06-15) 210 | 211 | ### Bug Fixes 212 | 213 | - allowing backwards compatibility down to python 3.9 ([#247](https://github.com/Autodesk/pgbelt/issues/247)) ([78b2fd8](https://github.com/Autodesk/pgbelt/commit/78b2fd8d05368f23ecd6c1ece223893bb424731c)) 214 | 215 | ## [0.2.3](https://github.com/Autodesk/pgbelt/compare/v0.2.2...v0.2.3) (2023-05-04) 216 | 217 | ### Bug Fixes 218 | 219 | - double quotes on table names ([#228](https://github.com/Autodesk/pgbelt/issues/228)) ([893773a](https://github.com/Autodesk/pgbelt/commit/893773aa113b719f32e4e5486e585c584499cd9e)) 220 | 221 | ## [0.2.2](https://github.com/Autodesk/pgbelt/compare/v0.2.1...v0.2.2) (2023-05-02) 222 | 223 | ### Bug Fixes 224 | 225 | - undo bump of virtualenv, back to 20.21.0 ([#223](https://github.com/Autodesk/pgbelt/issues/223)) ([01bfe6f](https://github.com/Autodesk/pgbelt/commit/01bfe6fc167df009d451ac83666f1075a4a0ae05)) 226 | 227 | ## [0.2.1](https://github.com/Autodesk/pgbelt/compare/v0.2.0...v0.2.1) (2023-05-02) 228 | 229 | ### Bug Fixes 230 | 231 | - typer doesn't support union types from python 3.11 ([#220](https://github.com/Autodesk/pgbelt/issues/220)) ([0315390](https://github.com/Autodesk/pgbelt/commit/03153909bcdf4e621b3e7c66aa5a99e55c9ceea4)) 232 | 233 | ## [0.2.0](https://github.com/Autodesk/pgbelt/compare/v0.1.2...v0.2.0) (2023-04-28) 234 | 235 | ### Features 236 | 237 | - update to python 3.11 ([#191](https://github.com/Autodesk/pgbelt/issues/191)) ([3c44681](https://github.com/Autodesk/pgbelt/commit/3c44681bfda3679bff10022eb9bfcb56fdd9e50a)) 238 | 239 | ### Bug Fixes 240 | 241 | - incorrect precheck command in quickstart.md ([#172](https://github.com/Autodesk/pgbelt/issues/172)) ([36ce96f](https://github.com/Autodesk/pgbelt/commit/36ce96feb6f5c98bcb3c7a0648a25e4f3b9ce655)) 242 | - no need to find and replace owners in schema ([#215](https://github.com/Autodesk/pgbelt/issues/215)) ([dfe1324](https://github.com/Autodesk/pgbelt/commit/dfe1324a56f57e75fa44dfe7196e104c34988695)) 243 | 244 | ## [0.1.2](https://github.com/Autodesk/pgbelt/compare/v0.1.1...v0.1.2) (2022-10-03) 245 | 246 | ### Bug Fixes 247 | 248 | - the analyze function should be run as root of a database for safest and least restriction [#87](https://github.com/Autodesk/pgbelt/issues/87) ([#90](https://github.com/Autodesk/pgbelt/issues/90)) ([81c5fde](https://github.com/Autodesk/pgbelt/commit/81c5fde8d1db4be797389a0ff361c00ee3329ab2)) 249 | - validator supposed to check 100 rows, not 1 ([#93](https://github.com/Autodesk/pgbelt/issues/93)) ([48122dc](https://github.com/Autodesk/pgbelt/commit/48122dca57b51b3f8114380840e510d70c75eaae)) 250 | 251 | ## [0.1.1](https://github.com/Autodesk/pgbelt/compare/v0.1.0...v0.1.1) (2022-09-30) 252 | 253 | ### Documentation 254 | 255 | - update README and quickstart now that pgbelt is on pypi ([#79](https://github.com/Autodesk/pgbelt/issues/79)) ([2a2d4c9](https://github.com/Autodesk/pgbelt/commit/2a2d4c91aaa8e961c2958df2f6150982b8b66c77)) 256 | 257 | ## 0.1.0 (2022-09-02) 258 | 259 | ### Features 260 | 261 | - convert to poetry ([07f9a10](https://github.com/Autodesk/pgbelt/commit/07f9a102aed7392e91dc7bffddb41e2bdf69eb52)) 262 | - fresh upload of pgbelt with GHA CI ([4df9896](https://github.com/Autodesk/pgbelt/commit/4df98960d662a14c99bb2c60ec80ba3c0317b22b)) 263 | 264 | ### Bug Fixes 265 | 266 | - ci move to poetry too ([979c988](https://github.com/Autodesk/pgbelt/commit/979c988666dea0705cecc19fdd7cf941f17f2d6b)) 267 | - dev setup in Makefile also installs precommit ([c27cb09](https://github.com/Autodesk/pgbelt/commit/c27cb09615b7a448f31ca07fb6cdab6fe8a10f0f)) 268 | - flake8 baited, placing ignore, it broke pydantic ([#17](https://github.com/Autodesk/pgbelt/issues/17)) ([560207d](https://github.com/Autodesk/pgbelt/commit/560207df79276ceece0d4df00221e748f2718648)) 269 | - need code of conduct for open-sourcing ([#58](https://github.com/Autodesk/pgbelt/issues/58)) ([4aede91](https://github.com/Autodesk/pgbelt/commit/4aede91a9c4344d3fcaa1972bc6b00cdf70f5ce6)) 270 | - pre-commit fixed and run ([d3e188b](https://github.com/Autodesk/pgbelt/commit/d3e188bc2b9641614e8e63a5011adecf4db511e2)) 271 | - pre-commit fixes and flake8 config ([#15](https://github.com/Autodesk/pgbelt/issues/15)) ([958afd0](https://github.com/Autodesk/pgbelt/commit/958afd04b5f542704b15809274d039d0accc3cbe)) 272 | - remove rando .git file ([#5](https://github.com/Autodesk/pgbelt/issues/5)) ([16d38fd](https://github.com/Autodesk/pgbelt/commit/16d38fd2a7c607552191444d69d0f02212aecfc9)) 273 | - remove random .git files ([35213a5](https://github.com/Autodesk/pgbelt/commit/35213a518ce4f3450f462aceb6d139b6379f4cc1)) 274 | - update black and fix ci poetry ([5a7a88b](https://github.com/Autodesk/pgbelt/commit/5a7a88beb4cac34a7c1e65f686f8cd0fd1f6ac50)) 275 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @vjeeva 2 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to pgbelt 2 | 3 | Thank you for your interest in improving this project. This project is open-source under the `Apache 2.0 license` and welcomes contributions in the form of bug reports, feature requests, and pull requests. We are excited and eager to accept external contributions! 4 | 5 | ## Filing Bugs 6 | 7 | Report bugs on the `Issue Tracker`. 8 | 9 | When filing an issue, make sure to answer these questions: 10 | 11 | - Which operating system and Python version are you using? 12 | - Which version of this project are you using? 13 | - What did you do? 14 | - What did you expect to see? 15 | - What did you see instead? 16 | 17 | The best way to get your bug fixed is to provide a test case, and/or steps to reproduce the issue. There is an issue template made for this repository so you can provide the needed information. 18 | 19 | ## Feature Requests 20 | 21 | We accept feature requests! Please file requests in the `Issue Tracker`. 22 | 23 | ## Code Contributions 24 | 25 | We accept external code contributions! 26 | 27 | ### Contributor License Agreement 28 | 29 | Before contributing any code to this project, we kindly ask you to sign a Contributor License Agreement (CLA). We can not accept any pull request if a CLA has not been signed. 30 | 31 | - If you are contributing on behalf of yourself, the CLA signature is included as a part of the pull request process. 32 | 33 | - If you are contributing on behalf of your employer, please sign our [Corporate Contributor License](https://github.com/Autodesk/autodesk.github.io/releases/download/1.0/ADSK.Form.Corp.Contrib.Agmt.for.Open.Source.docx) Agreement. The document includes instructions on where to send the completed forms to. Once a signed form has been received, we can happily review and accept your pull requests. 34 | 35 | ### How to set up your development environment 36 | 37 | It is recommended to install pgbelt inside a virtual environment if installing by clone: 38 | 39 | - [pyenv](https://github.com/pyenv/pyenv) 40 | - [pyenv-virtualenv](https://github.com/pyenv/pyenv-virtualenv) 41 | 42 | You must also have: 43 | 44 | - [poetry](https://github.com/python-poetry/poetry) 45 | - Postgres Client Tools (pg_dump, pg_restore). Mac: `brew install libpq`. Ubuntu: `sudo apt-get install postgresql-client` 46 | - [docker](https://www.docker.com/) 47 | 48 | Install the egg locally: 49 | 50 | # create a python virtualenv with python 3.9.11 and activate it (any 3.9.x is ok) 51 | pyenv install 3.9.11 52 | pyenv virtualenv 3.9.11 pgbelt 53 | pyenv activate pgbelt 54 | 55 | # Install poetry inside your virtualenv 56 | pip3 install poetry 57 | 58 | # clone the repo 59 | git clone git@github.com:Autodesk/pgbelt.git 60 | cd pgbelt 61 | 62 | # install pgbelt and dev tools with make **setup** 63 | make setup 64 | 65 | ### Better understanding of `pgbelt` and how it works 66 | 67 | To gain a better understanding of how the tool works, which helps with development, please read the [extended knowledge document](docs/extended_knowledge.md)! 68 | 69 | ### How to spin up a local replication task for development 70 | 71 | This feature is very useful when you are making code changes to `pgbelt` and want to test against live databases to ensure correct behavior. 72 | 73 | To do this, this local development feature uses `docker` and `docker-compose` to spin up the following: 74 | 75 | 1. 4 Pairs of Postgres Containers with networking configured between each other. These 4 sets are used for the following integration test cases: public schema & whole DB, public schema & exodus-style migration (only a subset is moved), non-public schema & whole DB, non-public schema & exodus-style migration (only a subset is moved) 76 | 2. One container loaded with your local copy of `pgbelt`, built and installed for CLI usage. 77 | 78 | Simply run the following to spin the above up and drop yourself into your container with your local `pgbelt`: 79 | 80 | make local-dev 81 | 82 | _Note_: the DC and DB for the above environment are [here](https://github.com/Autodesk/pgbelt/blob/main/tests/integration/conftest.py#L20-L21). 83 | 84 | Note: your local code will be _mounted_ to your container instead of copied. This allows you to edit your code on your laptop, then go into the container and run `pip3 install -e .` to update your container's `belt` binary for quick testing iterations. 85 | 86 | Once you are done, you can exit out of the above container. Then, for cleanliness, please run the following to clean up `docker` and `docker-compose`: 87 | 88 | make clean-docker 89 | 90 | ### How to test the project 91 | 92 | You will want to run the full test suite (including integration tests) to ensure your contribution causes no issues. 93 | 94 | To do this, this repository uses `docker` and `docker-compose` to run tests and set up a local migration scenario with multiple databases, to do a full migration run-through. 95 | 96 | Simply run: 97 | 98 | make test 99 | 100 | Tests are made with `pytest` and are in the `tests/` folder. The integration test is found in `tests/integration/`, along with accompanying files, such as the Dockerfile for Postgres with pglogical configured. 101 | 102 | ### How to submit changes 103 | 104 | Open a `pull request` to submit changes to this project. 105 | 106 | Your pull request needs to meet the following guidelines for acceptance: 107 | 108 | - The Github Actions CI job must pass without errors and warnings. 109 | - If your changes add functionality, update the documentation accordingly. 110 | 111 | It is recommended to open an issue before starting work on anything. This will allow a chance to talk it over with the owners and validate your approach. 112 | -------------------------------------------------------------------------------- /DISCLAIMER.md: -------------------------------------------------------------------------------- 1 | # Disclaimers 2 | 3 | ## Disclaimer of Warranty 4 | 5 | This Work is provided "as is". Any express or implied warranties, including but not limited to, the implied warranties of merchantability and fitness for a particular purpose are disclaimed. In no event shall Autodesk be liable for any direct, indirect, incidental, special, exemplary or consequential damages (including, but not limited to, procurement of substitute goods or services, loss of use, data or profits, or business interruption) however caused and on any theory of liability, whether in contract, strict liability, or tort (including negligence or otherwise) arising in any way out of the use of this Work, even if advised of the possibility of such damage. 6 | 7 | The User of this Work agrees to hold harmless and indemnify Autodesk and its employees from every claim or liability (whether in tort or in contract), including attorneys' fees, court costs, and expenses, arising in direct consequence of Recipient's use of the item, including but not limited to, claims or liabilities made for injury to or death of personnel of User or third parties, damage to or destruction of property of User or third parties, infringement or other violations of intellectual property or technical data rights. 8 | 9 | Nothing in this Work is intended to constitute an endorsement, explicit or implied, by Autodesk of any particular manufacturer's product or service. 10 | 11 | ## Disclaimer of Endorsement 12 | 13 | Reference herein to any specific commercial product, process, or service by trade name, trademark, manufacturer, or otherwise, in this Work does not constitute an endorsement, recommendation, or favoring by Autodesk and shall not be used for advertising or product endorsement purposes. 14 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim 2 | ENV VIRTUAL_ENV=/opt/venv 3 | ENV PATH="$VIRTUAL_ENV/bin:$PATH" 4 | COPY ./ /opt/pgbelt 5 | WORKDIR /opt/pgbelt 6 | 7 | RUN set -e \ 8 | && apt-get -y update \ 9 | && apt-get -y install postgresql-client \ 10 | && apt-get -y install gcc 11 | 12 | RUN set -e \ 13 | && python -m venv $VIRTUAL_ENV \ 14 | && python -m pip install --upgrade pip \ 15 | && pip install poetry poetry-dynamic-versioning \ 16 | && poetry install 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | Copyright 2017 Autodesk Inc. 179 | 180 | Licensed under the Apache License, Version 2.0 (the "License"); 181 | you may not use this file except in compliance with the License. 182 | You may obtain a copy of the License at 183 | 184 | http://www.apache.org/licenses/LICENSE-2.0 185 | 186 | Unless required by applicable law or agreed to in writing, software 187 | distributed under the License is distributed on an "AS IS" BASIS, 188 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 189 | See the License for the specific language governing permissions and 190 | limitations under the License. 191 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PYTHON_CODE_PATH="./pgbelt" 2 | 3 | .DEFAULT_GOAL := help 4 | 5 | # This help function will automatically generate help/usage text for any make target that is commented with "##". 6 | # Targets with a singe "#" description do not show up in the help text 7 | help: 8 | @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-40s\033[0m %s\n", $$1, $$2}' 9 | 10 | install: ## Install whatever you have locally 11 | pip3 install -e . 12 | 13 | setup: ## Install development requirements. You should be in a virtualenv 14 | poetry install && pre-commit install 15 | 16 | test: ## Run tests 17 | docker build . -t autodesk/pgbelt:latest && docker build tests/integration/files/postgres13-pglogical-docker/ -t autodesk/postgres-pglogical-docker:13 && docker compose run tests 18 | 19 | tests: test 20 | 21 | local-dev: ## Sets up docker containers for Postgres DBs and gets you into a docker container with pgbelt installed. DC: testdc, DB: testdb 22 | docker build . -t autodesk/pgbelt:latest && docker build tests/integration/files/postgres13-pglogical-docker/ -t autodesk/postgres-pglogical-docker:13 && docker compose run localtest 23 | 24 | clean-docker: ## Stop and remove all docker containers and images made from local testing 25 | docker stop $$(docker ps -aq --filter name=^/pgbelt) && docker rm $$(docker ps -aq --filter name=^/pgbelt) && docker compose down --rmi all 26 | 27 | # Note: typer-cli has dependency conflict issues that don't affect it generating docs, see https://github.com/tiangolo/typer-cli/pull/120. 28 | # We need to install the package with pip instead. Then, we run pre-commit to fix the formatting of the generated file. 29 | # Finally, we OR true because the pre-commit errors when finding stuff to fix, but that's exactly what we want it to do. 30 | generate-usage-docs: ## Generate usage docs 31 | pip3 install typer-cli && typer pgbelt/main.py utils docs --name belt > docs/usage.md && pre-commit run --files docs/usage.md || true 32 | 33 | clean-local-files: # Clean out local files that were generated by local-dev command or local runs of pytest 34 | rm -rf schemas/ configs/testdc/ logs/ tables/ 35 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pgbelt 2 | 3 |

4 | 5 |

6 | 7 |

8 | 9 | Latest Commit 10 | 11 | 12 | 13 | 14 | 15 |

16 | 17 | PgBelt is a CLI tool used to manage Postgres data migrations from beginning to end, 18 | for a single database or a fleet, leveraging pglogical replication. 19 | 20 | It was built to assist in migrating data between postgres databases with as 21 | little application downtime as possible. It works in databases running different versions 22 | of postgres and makes it easy to run many migrations in parallel during a single downtime. 23 | 24 | | :exclamation: This is very important | 25 | | :---------------------------------------------------------------------------------------------------------------------------------------------- | 26 | | As with all Data Migration tasks, **there is a risk of data loss**. Please ensure you have backed up your data before attempting any migrations | 27 | 28 | ## Installation 29 | 30 | ### Install From PyPi 31 | 32 | It is recommended to install pgbelt inside a virtual environment: 33 | 34 | - [pyenv](https://github.com/pyenv/pyenv) 35 | - [pyenv-virtualenv](https://github.com/pyenv/pyenv-virtualenv) 36 | 37 | You must also have: 38 | 39 | - Postgres Client Tools (pg_dump, pg_restore). Mac: `brew install libpq`. Ubuntu: `sudo apt-get install postgresql-client` 40 | 41 | Install pgbelt locally: 42 | 43 | pip3 install pgbelt 44 | 45 | ## Quickstart with Pgbelt 46 | 47 | See [this doc](docs/quickstart.md)! 48 | 49 | ## Playbook 50 | 51 | This playbook gets updated actively. If you have any issues, solutions could be found in [this playbook](docs/playbook.md). 52 | 53 | ## Contributing 54 | 55 | We welcome contributions! See [this doc](CONTRIBUTING.md) on how to do so, including setting up your local development environment. 56 | -------------------------------------------------------------------------------- /code-of-conduct.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our community a safe, inclusive and harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive and healthy community. 6 | 7 | ## Our Standards 8 | 9 | Our Open Source Community works to: 10 | 11 | - Be kind towards other people which enables us to be empathic to each other 12 | - Be respectful of differing opinions, viewpoints, and experiences 13 | - Give and gracefully accept constructive feedback 14 | - Accept responsibility and apologize to those affected by our mistakes, and learning from the experience 15 | - Focus on what is best not just for us as individuals, but for the overall community 16 | 17 | We will not tolerate the following behaviors: 18 | 19 | - Violent threats or language 20 | - The use of sexualized language or imagery, and sexual attention or advances of any kind 21 | - Trolling, insulting or derogatory comments, and personal or political attacks 22 | - Public or private harassment 23 | - Publishing others’ private information, such as a physical or email address, without their explicit permission 24 | - Other conduct which could reasonably be considered inappropriate in a professional setting 25 | -------------------------------------------------------------------------------- /configs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Autodesk/pgbelt/d4d6839a005f34344a024eac838f14d6fe59f991/configs/README.md -------------------------------------------------------------------------------- /configs/datacenter-name/database-name/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "db": "", 3 | "dc": "", 4 | "src": { 5 | "host": "", 6 | "ip": "", 7 | "db": "", 8 | "port": "5432", 9 | "root_user": { 10 | "name": "postgres", 11 | "pw": "" 12 | }, 13 | "owner_user": { 14 | "name": "owner", 15 | "pw": "" 16 | }, 17 | "pglogical_user": { 18 | "name": "pglogical", 19 | "pw": "" 20 | }, 21 | "other_users": null 22 | }, 23 | "dst": { 24 | "host": "", 25 | "ip": "", 26 | "db": "", 27 | "port": "5432", 28 | "root_user": { 29 | "name": "postgres", 30 | "pw": "" 31 | }, 32 | "owner_user": { 33 | "name": "owner", 34 | "pw": "" 35 | }, 36 | "pglogical_user": { 37 | "name": "pglogical", 38 | "pw": "" 39 | }, 40 | "other_users": null 41 | }, 42 | "tables": null, 43 | "sequences": null 44 | } 45 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | services: 4 | db-src-set-public-schema-full: &db 5 | image: autodesk/postgres-pglogical-docker:13 6 | environment: 7 | POSTGRES_PASSWORD: postgres 8 | PGDATA: /tmp/pgdata 9 | restart: on-failure 10 | networks: 11 | datacenter: 12 | ipv4_address: 10.5.0.5 13 | healthcheck: 14 | test: ["CMD-SHELL", "pg_isready"] 15 | interval: 10s 16 | timeout: 5s 17 | retries: 5 18 | 19 | db-dst-set-public-schema-full: 20 | <<: *db 21 | networks: 22 | datacenter: 23 | ipv4_address: 10.5.0.6 24 | 25 | db-src-set-non-public-schema-full: 26 | <<: *db 27 | networks: 28 | datacenter: 29 | ipv4_address: 10.5.0.7 30 | 31 | db-dst-set-non-public-schema-full: 32 | <<: *db 33 | networks: 34 | datacenter: 35 | ipv4_address: 10.5.0.8 36 | 37 | db-src-set-public-schema-exodus: 38 | <<: *db 39 | networks: 40 | datacenter: 41 | ipv4_address: 10.5.0.9 42 | 43 | db-dst-set-public-schema-exodus: 44 | <<: *db 45 | networks: 46 | datacenter: 47 | ipv4_address: 10.5.0.10 48 | 49 | db-src-set-non-public-schema-exodus: 50 | <<: *db 51 | networks: 52 | datacenter: 53 | ipv4_address: 10.5.0.11 54 | 55 | db-dst-set-non-public-schema-exodus: 56 | <<: *db 57 | networks: 58 | datacenter: 59 | ipv4_address: 10.5.0.12 60 | 61 | flake8: 62 | image: autodesk/pgbelt:latest 63 | command: poetry run flake8 64 | networks: 65 | - datacenter 66 | 67 | black: 68 | image: autodesk/pgbelt:latest 69 | command: poetry run black . 70 | networks: 71 | - datacenter 72 | 73 | tests: 74 | image: autodesk/pgbelt:latest 75 | environment: 76 | PUBLIC_FULL_SRC_IP: 10.5.0.5 77 | PUBLIC_FULL_DST_IP: 10.5.0.6 78 | NONPUBLIC_FULL_SRC_IP: 10.5.0.7 79 | NONPUBLIC_FULL_DST_IP: 10.5.0.8 80 | PUBLIC_EXODUS_SRC_IP: 10.5.0.9 81 | PUBLIC_EXODUS_DST_IP: 10.5.0.10 82 | NONPUBLIC_EXODUS_SRC_IP: 10.5.0.11 83 | NONPUBLIC_EXODUS_DST_IP: 10.5.0.12 84 | command: poetry run pytest --cov=pgbelt tests/ 85 | depends_on: 86 | db-src-set-public-schema-full: 87 | condition: service_healthy 88 | db-dst-set-public-schema-full: 89 | condition: service_healthy 90 | db-src-set-non-public-schema-full: 91 | condition: service_healthy 92 | db-dst-set-non-public-schema-full: 93 | condition: service_healthy 94 | db-src-set-public-schema-exodus: 95 | condition: service_healthy 96 | db-dst-set-public-schema-exodus: 97 | condition: service_healthy 98 | db-src-set-non-public-schema-exodus: 99 | condition: service_healthy 100 | db-dst-set-non-public-schema-exodus: 101 | condition: service_healthy 102 | networks: 103 | - datacenter 104 | 105 | localtest: 106 | image: autodesk/pgbelt:latest 107 | environment: 108 | PUBLIC_FULL_SRC_IP: 10.5.0.5 109 | PUBLIC_FULL_DST_IP: 10.5.0.6 110 | NONPUBLIC_FULL_SRC_IP: 10.5.0.7 111 | NONPUBLIC_FULL_DST_IP: 10.5.0.8 112 | PUBLIC_EXODUS_SRC_IP: 10.5.0.9 113 | PUBLIC_EXODUS_DST_IP: 10.5.0.10 114 | NONPUBLIC_EXODUS_SRC_IP: 10.5.0.11 115 | NONPUBLIC_EXODUS_DST_IP: 10.5.0.12 116 | command: bash -c "cd /pgbelt-volume/ && poetry run python3 tests/integration/conftest.py --non-public-schema && pip3 install -e . && bash" 117 | depends_on: 118 | db-src-set-public-schema-full: 119 | condition: service_healthy 120 | db-dst-set-public-schema-full: 121 | condition: service_healthy 122 | db-src-set-non-public-schema-full: 123 | condition: service_healthy 124 | db-dst-set-non-public-schema-full: 125 | condition: service_healthy 126 | db-src-set-public-schema-exodus: 127 | condition: service_healthy 128 | db-dst-set-public-schema-exodus: 129 | condition: service_healthy 130 | db-src-set-non-public-schema-exodus: 131 | condition: service_healthy 132 | db-dst-set-non-public-schema-exodus: 133 | condition: service_healthy 134 | networks: 135 | - datacenter 136 | volumes: 137 | - ./:/pgbelt-volume/:rw 138 | 139 | networks: 140 | datacenter: 141 | driver: bridge 142 | ipam: 143 | config: 144 | - subnet: 10.5.0.0/16 145 | gateway: 10.5.0.1 146 | -------------------------------------------------------------------------------- /docs/extended_knowledge.md: -------------------------------------------------------------------------------- 1 | # Extended Knowledge with `pgbelt` 2 | 3 | ## How `pglogical` replication works 4 | 5 | ### How a replication task works logically 6 | 7 | - We have a replication task that runs in **two phases**: 8 | 1. Full Load / Bulk Sync. Moving the majority of data takes a lot of time, so it is all dumped and loaded **at a specific timestamp**. While this occurs, any ongoing changes to the dataset from that timestamp onwards are stored in a **replication slot**. 9 | 2. Once the above step is finished, ongoing changes are consumed from the source database's replication slot and replayed on the destination database. This is an ongoing process. 10 | 11 | ### Pglogical Components for a Replication task 12 | 13 | - Node - A way of telling pglogical the existence of an external database, along with the credentials to connect with. 14 | - Subscription - A replication task initiated from the side of the subcribing node, or destination database. 15 | - Replication Set - A set of tables to replicate, along with settings of what action/statement types to replicate. 16 | - We replicate **all** actions, but the list of tables to replicate may vary. We replicate all tables in a database major version upgrade, but also only do subsets for "exodus-style" migrations. 17 | 18 | ### What `pgbelt` does with the above components: 19 | 20 | - Configure the pglogical nodes for the external database in both the source and destination databases. 21 | - For forward replication (source to destination) 22 | - Create a new replication set in the source DB, and add all required tables to it. 23 | - Start a new subscription from the destination DB, referencing the above replication set. 24 | - For reverse replication (destination to source) 25 | - Create a new replication set in the destination DB, and add all required tables to it. 26 | - Start a new subscription from the source DB, referencing the above replication set, **and with synchronize_structure off**. 27 | - The last flag ensures no full load sync occurs from the destination DB (incomplete/empty) to the source database. It will only replicate transactions other than the incoming forward replication statements. 28 | -------------------------------------------------------------------------------- /docs/playbook.md: -------------------------------------------------------------------------------- 1 | # Playbook 2 | 3 | ## I see an incorrect credential error with the `pglogical` user when setting up replication. What do I do? 4 | 5 | It is very possible you have multiple people using the `pgbelt` tool to set up replication. The config's `pglogical` password may be differnt in each person's config, and that is used during the `setup` stage. The password from the config is used to create the `pglogical` role in your databases. 6 | 7 | Therefore, the first person to run `setup` has set the `pglogical` user's password in the databases. The error likely comes from `pglogical` mentioning a `node` configuration, where the password is set. 8 | 9 | For information on `nodes` in the `pglogical` plugin, please see the `Extended Knowledge` document in this repository. 10 | 11 | To remedy this issue, you can perform the following: 12 | 13 | 1. If you see the error with the entire DSN (including password and IP address or hostname), identify if the host is the **source** or **destination** database. 14 | 2. Once identified, run the following to PSQL into that host: `psql "$(belt -dsn )"` 15 | 3. In that PSQL terminal, run the following to set the password according to the `node` configuration: `ALTER ROLE pglogical PASSWORD '';` 16 | 17 | ## How can I roll back? 18 | 19 | **NOTE: The rollback process is not fully implemented in pgbelt. You should make every effort to solve 20 | issues that surface only after writes have succeeded in the target database at the application level first!** 21 | 22 | If you discover an application issue that requires a rollback to the old database, you can do so without data loss even after 23 | writes have succeeded in the target database. 24 | 25 | To perform a rollback you will need to begin another period of application downtime where neither 26 | database receives any writes. Once you are sure downtime has begun, run the following: 27 | 28 | $ belt teardown-back-replication testdatacenter1 database1 29 | $ belt restore-logins testdatacenter1 database1 30 | 31 | If you've lost the pgbelt config file where these users' names were stored when you ran the revoke logins 32 | command, some users might be missed here. 33 | 34 | Things that will need manual resolution: 35 | 36 | - Sequence values on the source database. You will need to copy these over from the target database, no `belt` commands cover this yet. 37 | - Tables without Primary Keys will need to be updated. You will need to copy these over from the target database to the source, no `belt` commands cover this yet. 38 | 39 | After you are sure that sequences and tables without primary keys have been synchronized from the target 40 | into the old source, point your application to the old source and your rollback is complete. 41 | 42 | ## I started a pgbelt replication job and need to restart it from scratch. How can I restart a pgbelt migration? 43 | 44 | The following is a general guide to restarting a pgbelt migration. This is useful if you have a failed migration, or if you need to restart a migration after a rollback. 45 | 46 | Run the following commands: 47 | 48 | $ belt teardown-back-replication testdatacenter1 database1 49 | $ belt teardown-forward-replication testdatacenter1 database1 50 | $ belt teardown testdatacenter1 database1 51 | $ belt teardown testdatacenter1 database1 --full 52 | $ belt remove-constraints testdatacenter1 database1 53 | $ belt remove-indexes testdatacenter1 database1 54 | 55 | Note that the first four commands will remove all replication job setup from the databases. `remove-constraints` removes NOT VALID constraints from the target schema so when you restart replication, they don't cause failed inserts (these must not exist during the initial setup). `remove-indexes` removes all indexes from the target schema to help speed up the initial bulk load. `remove-indexes` is not necessary to run, you may skip this if needed. 56 | 57 | After running these commands, you can `TRUNCATE` the tables in the destination database and start the migration from the beginning. **Please take as much precaution as possible when running TRUNCATE, as it will delete all data in the tables. Especially please ensure you are running this on the correct database!** 58 | 59 | ## My `sync` command has failed or is hanging. What can I do? 60 | 61 | The `sync` command from Step 7 of the Quickstart guide does the following: 62 | 63 | - Sync sequence values 64 | - Dump and load tables without Primary Keys 65 | - Add NOT VALID constraints to the target schema (they were removed in Step 1 in the target database) 66 | - Create Indexes (as long as this was run in Step 2, this will be glossed over. If step 2 was missed, indexes will build now amd this will take longer than expected). 67 | - Validate data (take 100 random rows and 100 last rows of each table, and compare data) 68 | - Run ANALYZE to ensure optimal performance 69 | 70 | If the `sync` command fails, you can try to run the individual commands that make up the `sync` command to see where the failure is. The individual commands are: 71 | 72 | ### 1. Syncing Sequences: 73 | 74 | - `sync-sequences` - reads and sets sequences values from SRC to DST at the time of command execution 75 | 76 | ### 2. Syncing Tables without Primary Keys: 77 | 78 | - `dump-tables` - dumps only tables without Primary Keys (to ensure only tables without Primary Keys are dumped, DO NOT specify the `--tables` flag for this command) 79 | - `load-tables` - load into DST DB the tables from the `dump-tables` command (found on disk) 80 | 81 | ### 3. Syncing NOT VALID Constraints: 82 | 83 | - `dump-schema` - dumps schema from your SRC DB schema onto disk (the files may already be on disk, but run this command just to ensure they exist anyways) 84 | - `load-constraints` - load NOT VALID constraints from disk (obtained by the `dump-schema` command) to your DST DB schema 85 | 86 | ### 4. Creating Indexes & Running ANALYZE: 87 | 88 | - `create-indexes` - Create indexes on the target database, and then runs ANALYZE as well. 89 | 90 | ### 5. Validating Data: 91 | 92 | - `validate-data` - Check random 100 rows and last 100 rows of every table involved in the replication job, and ensure all match exactly. 93 | 94 | ## belt hangs when running `teardown --full`. What can I do? 95 | 96 | If `belt` hangs when running `teardown --full`, it is likely having trouble dropping the `pglogical` extension. This normally happens due to any _idle in transaction_ connections to the database. To resolve this, you can run the following when it hangs: 97 | 98 | - CTRL+C to stop the `teardown --full` command 99 | - Identify which database is getting traffic (SRC or DST) 100 | - List out the active connections and find which are _idle in transaction_: 101 | - `SELECT * FROM pg_stat_activity;` 102 | - For each _idle in transaction_ connection, run the following: 103 | - `SELECT pg_terminate_backend();` 104 | - Once all _idle in transaction_ connections are terminated, you can run the `teardown --full` command again. 105 | 106 | ## I need to start the replication process again from the beginning. How can I do this? 107 | 108 | - Run `belt teardown` to remove the replication jobs from the databases. 109 | - Run `belt status` to ensure the replication jobs are `unconfigured` for both directions. 110 | - TRUNCATE the data in your destination database. **Please take as much precaution as possible when running TRUNCATE, as it will delete all data in the tables. Especially please ensure you are running this on the correct database!** 111 | - Now you can start the replication process again from the beginning (eg run `belt setup`). 112 | 113 | The following is a transaction that will TRUNCATE all tables in a database: 114 | 115 | ```sql 116 | SET lock_timeout = '2s'; 117 | DO 118 | $$ 119 | DECLARE 120 | _rec RECORD; 121 | BEGIN 122 | FOR _rec IN 123 | SELECT 124 | pg_namespace.nspname, 125 | pg_class.relname 126 | FROM 127 | pg_catalog.pg_class 128 | JOIN pg_catalog.pg_namespace ON ( 129 | pg_namespace.oid = pg_class.relnamespace AND 130 | pg_namespace.nspname = 'public' 131 | ) 132 | WHERE 133 | pg_class.relkind = 'r' 134 | LOOP 135 | -- RAISE WARNING 'TRUNCATE TABLE %.%;'; 136 | 137 | EXECUTE FORMAT( 138 | 'TRUNCATE TABLE %I.%I CASCADE', 139 | _rec.nspname, 140 | _rec.relname 141 | ); 142 | END LOOP; 143 | END; 144 | $$; 145 | ``` 146 | 147 | ## I accidentally ran `revoke-logins` on my database when the schema owner was the same as my root user. How can I undo this? 148 | 149 | When this happens you accidently revoke LOGIN permissions from your root user. You will need to re-grant this with another superuser. 150 | 151 | If you are using AWS RDS, you can reset the root password via the AWS Console or API, and that will restore all revoked privileges to the root user (as well as reset the password). 152 | 153 | ## I revoked logins on my database but I want to restore them. How can I do this? (NOT when the schema owner is the same as the root user) 154 | 155 | If you revoked logins on your database and want to restore them, you can run the following command: 156 | 157 | $ belt restore-logins testdatacenter1 database1 158 | 159 | ## The status of my replication job is `down`. What can I do? 160 | 161 | There are a few reasons why a replication job can be `down`. The most common reasons are: 162 | 163 | ### 1. If you were in the `initializing` phase (eg. last state was `initializing`, and the status is now `down`): 164 | 165 | A. Your DST database may not have been empty when starting your replication job. 166 | 167 | - Check your DST database's log files. This database should be getting no traffic other that `pglogical`. 168 | - If you see logs like `ERROR: duplicate key value violates unique constraint`, your DST database was not empty when you started the replication job. You will need to start your replication job again from the beginning. 169 | - See the `I need to start the replication process again from the beginning. How can I do this?` question in this document. 170 | 171 | B. Your network may have been interrupted between the SRC and DST databases. 172 | 173 | - Check your DST database's log files. You should see logs like `background worker "pglogical apply XXXXX:YYYYYYYYYY" (PID ZZZZZ) exited with exit code 1`. 174 | - Connect to your DST database and run the following: 175 | - `SELECT * FROM pg_replication_origin;` 176 | - If you see 0 rows, **your replication job was disrupted, and can be restored**. You can restore by doing the following: 177 | - Connect to your DST database and run the following: `SELECT pglogical.alter_subscription_disable('',true);` 178 | - If this is forward replicaton, the subscription name will be `pg1_pg2` and if this is back replication, the subscription name will be `pg2_pg1`. 179 | - Get the publisher node identifier from the DST database by running the following: `SELECT * FROM pg_replication_origin;` 180 | - Use the `roname` from the previous query to run the following: `SELECT pg_replication_origin_create('');` 181 | - Run the following to re-enable the subscription: `SELECT pglogical.alter_subscription_enable('',true);` 182 | - Check on the status of replication now by running `belt status`. 183 | - If you see 1 row, your replication job was not disrupted, and you will need to diagnose further as to why the `pglogical` plugin failed to apply changes. 184 | - As of now, there is no recovery process for this. You will need to start your replication job again from the beginning. 185 | 186 | Source: https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Appendix.PostgreSQL.CommonDBATasks.pglogical.recover-replication-after-upgrade.html 187 | -------------------------------------------------------------------------------- /docs/quickstart.md: -------------------------------------------------------------------------------- 1 | # Quickstart with PgBelt 2 | 3 | The base case is using the tool to migrate all data from one Postgres database to another, while being used by an application. 4 | 5 | Let's say we have `database1` inside datacenter `testdatacenter1`, and you are moving this database's data 6 | from one host to another. The source host has all the data, the destination is **empty, not even a schema** (aside -- belt 7 | can work with preloaded schemas in the destination, just not done in this example). 8 | 9 | This example will set up **bidirectional replication** between your source and destination hosts. This is useful to have configured 10 | so after cutover, your application can keep writing updates to the source in case a rollback to the source host is required. The 11 | destination will only write/replicate back to the source once writes occur on the destination host. 12 | 13 | # Local Setup 14 | 15 | ## Step 1: Install PgBelt on your machine 16 | 17 | Pgbelt is now available on PyPi! Install it locally: 18 | 19 | pip3 install pgbelt 20 | 21 | ## Step 2: Create Required Folder and File Structure 22 | 23 | Create a migration project folder as follows: 24 | 25 | mymigrationproject/ 26 | configs/ 27 | testdatacenter1/ # Datacenter names are at this level 28 | database1/ # Database names are at this level 29 | config.json 30 | 31 | ## Step 3: Set up configuration 32 | 33 | Fill in config.json with the required info (marked in `<>`), referring to this example: 34 | 35 | ```json 36 | { 37 | "db": "database1", 38 | "dc": "testdatacenter1", 39 | "src": { 40 | // Anything in here must match what is in the host 41 | "host": "", 42 | "ip": "<###.###.###.###>", 43 | "db": "", 44 | "port": "<####>", 45 | "root_user": { 46 | "name": "", 47 | "pw": "" 48 | }, 49 | "owner_user": { 50 | "name": "", 51 | "pw": "" 52 | }, 53 | "pglogical_user": { 54 | // PgBelt will use this info to create this user in your database. 55 | "name": "pglogical", // No need to change this 56 | "pw": "" // You can use the following: python3 -c "from string import ascii_letters; from string import digits; from random import choices; print(\"\".join(choices(ascii_letters + digits, k=16)))"; 57 | }, 58 | "other_users": null 59 | }, 60 | "dst": { 61 | // Anything in here must match what is in the host 62 | "host": "", 63 | "ip": "<###.###.###.###>", 64 | "db": "", 65 | "port": "<####>", 66 | "root_user": { 67 | "name": "", 68 | "pw": "" 69 | }, 70 | "owner_user": { 71 | "name": "", 72 | "pw": "" 73 | }, 74 | "pglogical_user": { 75 | // PgBelt will use this info to create this user in your database. 76 | "name": "pglogical", // No need to change this 77 | "pw": "" // You can use the following: python3 -c "from string import ascii_letters; from string import digits; from random import choices; print(\"\".join(choices(ascii_letters + digits, k=16)))"; 78 | }, 79 | "other_users": null 80 | }, 81 | "tables": [], 82 | "sequences": [] 83 | // Optional key: "schema_name": "". If the key isn't specified, the default will be "public". Schema name must be the same in source and destination DBs. 84 | } 85 | ``` 86 | 87 | ## Step 4: Confirm PgBelt can be used with your hosts 88 | 89 | Run the `belt precheck` command to check if `belt` can work for your migration. 90 | If any requirements fail, they show in red and need to be reconfigured on your 91 | database. 92 | 93 | NOTE: You must run `belt` from the root of the `mymigrationproject/` folder, 94 | as `belt` will check for configs based on relative pathing from where it is run. 95 | 96 | $ belt precheck testdatacenter1 97 | 98 | **Also note: this command does not check the target database configuration or check 99 | for network connectivity between the two databases.** 100 | 101 | ### Database Requirements 102 | 103 | Both your source and target database must satisfy the following requirements: 104 | 105 | - Be running postgreSQL version 9.6 or greater. 106 | - Each database must be accessible from the other on the network. 107 | - All data to be migrated must be owned by a single login user, and that user must have CREATE permissions to create objects. 108 | - All targeted data must live in the same schema in both the source and destination DBs. 109 | - There must be a postgres superuser with a login in the database. 110 | - Have the following parameters: 111 | - `max_replication_slots` >= 2 (at least 2 for use by this tool, add more if other tools are using slots as well) 112 | - `max_worker_processes` >= 2 (should be as high as your CPU count) 113 | - `max_wal_senders` >= 10 (Postgres default is 10, should not be lower than this) 114 | - `shared_preload_libraries` must include both `pg_stat_statements` and `pglogical`. _NOTE:_ You must ensure your destination database has all required extensions for your schema. 115 | - If your db is on AWS RDS you must also set `rds.logical_replication = 1` 116 | 117 | # Migration Steps 118 | 119 | ## Step 1: Setup and Start Replication 120 | 121 | This command will set up the target database's schema, pglogical and start replication from the 122 | source to the destination. 123 | 124 | $ belt setup testdatacenter1 database1 125 | 126 | You can check the status of the migration, database hosts, replication delay, etc using the following command: 127 | 128 | $ belt status testdatacenter1 129 | 130 | ## Step 2: Create Indexes on the target database before your application cutover 131 | 132 | To ensure the bulk COPY phase of the migration runs faster, indexes are not made in the destination database during setup. 133 | They need to be built and this process should be done before the cutover to not prolong your cutover window. You should run 134 | this command during a period of low traffic. 135 | 136 | Note that this command will create all the indexes in the target database, **and will run ANALYZE after** to ensure optimal performance. 137 | 138 | $ belt create-indexes testdatacenter1 database1 139 | 140 | ## Step 3: (Optional) Run ANALYZE on the target database before your application cutover 141 | 142 | This is typically run some time before your application cutover, so the target database performs better with the dataset 143 | once the application cuts over to the target database. 144 | 145 | $ belt analyze testdatacenter1 database1 146 | 147 | ## Step 4: Set up Reverse Replication 148 | 149 | We will set up reverse replication (destination to source), in case rollback is needed later. 150 | 151 | This can be done at any time before the next step, but for clarity, we will do it now. It also is a very quick operation. Doing this later also eliminates any possibility of bad writes on the destination database being replicated back to the source. 152 | 153 | $ belt setup-back-replication testdatacenter1 database1 154 | 155 | ## Step 5: Stop write traffic to your source database 156 | 157 | This would be the beginning of your application downtime. We revoke all login permissions on the source host using `belt` to ensure writes can no longer occur. You may want to do this, then restart Postgres connections on your application to ensure connections can no longer write. 158 | 159 | **NOTE: Do not run this command if the schema owner of your database is the same as your root user.** 160 | 161 | $ belt revoke-logins testdatacenter1 database1 162 | 163 | ## Step 6: Stop forward replication 164 | 165 | Once write traffic has stopped on the source database, we need to stop replication in the forward direction. 166 | 167 | $ belt teardown-forward-replication testdatacenter1 database1 168 | 169 | ## Step 7: Sync all the missing bits from source to destination (that could not be done by replication) 170 | 171 | PgLogical (used for the actual replication) can't handle the following: 172 | 173 | - Replicating Sequences (see https://github.com/2ndQuadrant/pglogical/issues/163) 174 | - Replicating tables without Primary Keys 175 | - Replicate data with NOT VALID constraints into the target schema (since by nature, they are only enforced in a dataset once applied, not for all previous records) 176 | 177 | Tables without primary keys were already ignored as part of Step 1, and NOT VALID constraints were removed when the schema was set up in the target database in Step 1. 178 | 179 | Therefore the next command will do the following: 180 | 181 | - Sync sequence values 182 | - Dump and load tables without Primary Keys 183 | - Add NOT VALID constraints to the target schema (they were removed in Step 1 in the target database) 184 | - Create Indexes (as long as this was run in Step 2, this will be glossed over. If step 2 was missed, indexes will build now amd this will take longer than expected). 185 | - Validate data (take 100 random rows and 100 last rows of each table, and compare data) 186 | - Run ANALYZE to ensure optimal performance 187 | 188 | ``` 189 | $ belt sync testdatacenter1 database1 190 | ``` 191 | 192 | If the above command fails, please see the `playbook.md` document in this repository for more information on how to resolve the issue. 193 | 194 | ## Step 8: Enable write traffic to the destination host 195 | 196 | Enabling write traffic to the destination host is done outside of PgBelt, with your application. 197 | 198 | ## Step 9: Teardown pgbelt replication and leftover objects! 199 | 200 | Up until this step, reverse replication will be ongoing. It is meant to do this until you feel a rollback is unnecessary. To stop reverse replication, and consider your pgbelt migration **complete**, simply run the following: 201 | 202 | $ belt teardown testdatacenter1 database1 203 | $ belt teardown testdatacenter1 database1 --full 204 | 205 | The first command will tear down all replication jobs if still running. At this point, you should only have your reverse replication running. It will also tear down all of the pgbelt replication job objects in the database, including the `pglogical` role used by the jobs. 206 | 207 | The second command will run through the first command, and finally drop the `pglogical` extension from the database. This is separated out because the extension drop tends to hang if the previous steps are done right beforehand. When run separately, the DROP command likely will run without hanging or run in significantly less time. 208 | 209 | # Final Notes 210 | 211 | Please note that instructions for rolling back and restarting a migration are now in the playbook in this directory. Please refer to those for more information. 212 | -------------------------------------------------------------------------------- /local_dev_scripts/README.md: -------------------------------------------------------------------------------- 1 | # Local Development Scripts 2 | 3 | This section of the repository will contain scripts to aid with development in `pgbelt`. 4 | 5 | ## generate_large_test_data.py 6 | 7 | This script simply generates chunks of INSERTs to the integration test `users` table. It will return a large string. 8 | 9 | For easy use, simply redirect output to a file, then load it into your database yourself via PSQL. 10 | 11 | ``` 12 | python3 generate_large_test_data.py > extra_data.sql 13 | ``` 14 | 15 | NOTE: The existing parameters in the script generate a 5GB SQL file and 10000MB of on-disk data to use. This could overwhelm your laptop's Docker engine (you might need to bump your Docker engine allocated memory). 16 | -------------------------------------------------------------------------------- /local_dev_scripts/generate_large_test_data.py: -------------------------------------------------------------------------------- 1 | # Script to echo Postgres lines for garbage test data 2 | # Useful for local development where you want to test with a large dataset 3 | # Need to chunk inserts otherwise the query goes too large for the docker container to handle. 4 | 5 | # NOTE: The existing parameters in the script generate a 5GB SQL file and 10000MB of on-disk data to use. 6 | # This could overwhelm your laptop's Docker engine (you might need to bump your Docker engine allocated memory). 7 | 8 | set_size = 100000 9 | num_sets = 1000 10 | set_num = 0 11 | while set_num < num_sets: 12 | num = 0 13 | print( 14 | """ 15 | INSERT INTO public.users (id, hash_firstname, hash_lastname, gender) 16 | VALUES 17 | """ 18 | ) 19 | while num < set_size - 1: 20 | print( 21 | f" ({set_num * set_size + num}, 'dsdssdgarbagefirst', 'dgsaggggdjj', 'male')," 22 | ) 23 | num = num + 1 24 | print( 25 | f" ({set_num * set_size + num}, 'dsdssdgarbagefirst', 'dgsaggggdjj', 'male');" 26 | ) 27 | set_num = set_num + 1 28 | -------------------------------------------------------------------------------- /pgbelt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Autodesk/pgbelt/d4d6839a005f34344a024eac838f14d6fe59f991/pgbelt.png -------------------------------------------------------------------------------- /pgbelt/__init__.py: -------------------------------------------------------------------------------- 1 | from pgbelt.config.models import DbConfig 2 | from pgbelt.config.models import DbupgradeConfig 3 | from pgbelt.config.remote import BaseResolver 4 | -------------------------------------------------------------------------------- /pgbelt/cmd/__init__.py: -------------------------------------------------------------------------------- 1 | from importlib import import_module 2 | from pkgutil import walk_packages 3 | 4 | from pgbelt import cmd 5 | from pgbelt.cmd.helpers import add_command as _add_command 6 | 7 | COMMANDS = [] 8 | 9 | # discover all commands in any module in this directory 10 | for _, modname, _ in walk_packages(cmd.__path__): 11 | mod = import_module(f"{cmd.__name__}.{modname}") 12 | COMMANDS += getattr(mod, "COMMANDS", []) 13 | 14 | 15 | def add_commands(app): 16 | for command in COMMANDS: 17 | _add_command(app, command) 18 | -------------------------------------------------------------------------------- /pgbelt/cmd/convenience.py: -------------------------------------------------------------------------------- 1 | import socket 2 | from asyncio import open_connection 3 | from asyncio import run 4 | from asyncio import TimeoutError 5 | from asyncio import wait_for 6 | from collections.abc import Awaitable 7 | from logging import Logger 8 | 9 | from asyncpg import create_pool 10 | from pgbelt.cmd.helpers import run_with_configs 11 | from pgbelt.config.config import get_config 12 | from pgbelt.config.models import DbupgradeConfig 13 | from pgbelt.util.logs import get_logger 14 | from pgbelt.util.postgres import analyze_table_pkeys 15 | from tabulate import tabulate 16 | from typer import echo 17 | from typer import Option 18 | from typer import style 19 | 20 | 21 | def src_dsn( 22 | db: str, 23 | dc: str, 24 | owner: bool = Option(False, help="Use the owner credentials"), 25 | pglogical: bool = Option(False, help="Use the pglogical credentials."), 26 | ) -> None: 27 | """ 28 | Print a dsn to stdout that you can use to connect to the source db: 29 | psql "$(dbup src-dsn scribble prod-use1-pg-1)" 30 | 31 | Pass --owner to log in as the owner or --pglogical to log in as pglogical. 32 | """ 33 | conf = get_config(db, dc, skip_dst=True) 34 | echo( 35 | conf.src.owner_dsn 36 | if owner 37 | else conf.src.pglogical_dsn if pglogical else conf.src.root_dsn 38 | ) 39 | 40 | 41 | def dst_dsn( 42 | db: str, 43 | dc: str, 44 | owner: bool = Option(False, help="Use the owner credentials"), 45 | pglogical: bool = Option(False, help="Use the pglogical credentials."), 46 | ) -> None: 47 | """ 48 | Print a dsn to stdout that you can use to connect to the destination db: 49 | psql "$(dbup dst-dsn scribble prod-use1-pg-1)" 50 | 51 | Pass --owner to log in as the owner or --pglogical to log in as pglogical. 52 | """ 53 | conf = get_config(db, dc, skip_src=True) 54 | echo( 55 | conf.dst.owner_dsn 56 | if owner 57 | else conf.dst.pglogical_dsn if pglogical else conf.dst.root_dsn 58 | ) 59 | 60 | 61 | async def _check_pkeys( 62 | conf: DbupgradeConfig, logger: Logger 63 | ) -> tuple[list[str], list[str]]: 64 | async with create_pool(conf.src.root_uri, min_size=1) as pool: 65 | pkey_tables, no_pkey_tables, _ = await analyze_table_pkeys( 66 | pool, conf.schema_name, logger 67 | ) 68 | return pkey_tables, no_pkey_tables 69 | 70 | 71 | def check_pkeys(db: str, dc: str) -> None: 72 | """ 73 | Print out lists of tables with and without primary keys 74 | """ 75 | conf = get_config(db, dc, skip_src=True) 76 | logger = get_logger(db, dc, "convenience.src") 77 | pkeys, no_pkeys = run(_check_pkeys(conf, logger)) 78 | echo( 79 | f"""Analyzed table pkeys for {db} in {dc}: 80 | has pkey: {pkeys} 81 | no pkey: {no_pkeys} 82 | """ 83 | ) 84 | 85 | 86 | async def _print_connectivity_results(results: list[dict]): 87 | """ 88 | For a list of databases in a datacenter, show a table of established connections. 89 | 90 | Also exit(1) if ANY connections failed. 91 | """ 92 | 93 | table = [ 94 | [ 95 | style("database", "yellow"), 96 | style("src connect ok", "yellow"), 97 | style("dst connect ok", "yellow"), 98 | ] 99 | ] 100 | 101 | results.sort(key=lambda d: d["db"]) 102 | 103 | failed_connection_exists = False 104 | for r in results: 105 | table.append( 106 | [ 107 | style(r["db"], "green"), 108 | style(r["src"], "green" if r["src"] else "red"), 109 | style(r["dst"], "green" if r["dst"] else "red"), 110 | ] 111 | ) 112 | # If any of the connections have failed in this DB, and the flag hasn't been set, set it. 113 | if not failed_connection_exists and (r["src"] is False or r["dst"] is False): 114 | failed_connection_exists = True 115 | 116 | echo(tabulate(table, headers="firstrow")) 117 | 118 | if failed_connection_exists: 119 | exit(1) 120 | 121 | 122 | @run_with_configs(results_callback=_print_connectivity_results) 123 | async def check_connectivity(config_future: Awaitable[DbupgradeConfig]) -> None: 124 | """ 125 | Returns exit code 0 if pgbelt can connect to all databases in a datacenter 126 | (if db is not specified), or to both src and dst of a database. 127 | 128 | This is done by checking network access to the database ports ONLY. 129 | 130 | If any connection times out, the command will exit 1. It will test ALL connections 131 | before returning exit code 1 or 0, and output which connections passed/failed. 132 | """ 133 | 134 | conf = await config_future 135 | 136 | src_future = open_connection(conf.src.ip, conf.src.port) 137 | src_logger = get_logger(conf.db, conf.dc, "connect.src") 138 | dst_future = open_connection(conf.dst.ip, conf.dst.port) 139 | dst_logger = get_logger(conf.db, conf.dc, "connect.dst") 140 | src_connect_ok = False 141 | dst_connect_ok = False 142 | 143 | # Source Connection Checks 144 | try: 145 | src_logger.info("Checking network access to port...") 146 | 147 | # Wait for 3 seconds, then raise TimeoutError 148 | _, writer = await wait_for(src_future, timeout=3) 149 | src_logger.debug("Can access network port.") 150 | writer.close() 151 | await writer.wait_closed() 152 | src_connect_ok = True 153 | except TimeoutError: 154 | src_logger.error("Cannot access network port. timed out.") 155 | except socket.gaierror as e: 156 | src_logger.error(f"Socket.gaierror {e}") 157 | except ConnectionRefusedError as e: 158 | src_logger.error(f"ConnectionRefusedError {e}") 159 | 160 | # Destination Connection Checks 161 | try: 162 | dst_logger.info("Checking network access to port...") 163 | 164 | # Wait for 3 seconds, then raise TimeoutError 165 | _, writer = await wait_for(dst_future, timeout=3) 166 | dst_logger.debug("Can access network port.") 167 | writer.close() 168 | await writer.wait_closed() 169 | dst_connect_ok = True 170 | except TimeoutError: 171 | dst_logger.error("Cannot access network port. timed out.") 172 | except socket.gaierror as e: 173 | dst_logger.error(f"Socket.gaierror {e}") 174 | except ConnectionRefusedError as e: 175 | dst_logger.error(f"ConnectionRefusedError {e}") 176 | 177 | # TODO: Exit code AFTER all have run 178 | 179 | return {"db": conf.db, "src": src_connect_ok, "dst": dst_connect_ok} 180 | 181 | 182 | COMMANDS = [src_dsn, dst_dsn, check_pkeys, check_connectivity] 183 | -------------------------------------------------------------------------------- /pgbelt/cmd/helpers.py: -------------------------------------------------------------------------------- 1 | from asyncio import gather 2 | from asyncio import run 3 | from collections.abc import Awaitable 4 | from collections.abc import Callable 5 | from functools import wraps 6 | from inspect import iscoroutinefunction 7 | from inspect import Parameter 8 | from inspect import signature 9 | from typing import Any 10 | from typing import Optional # noqa: F401 # Needed until tiangolo/typer#522 is fixed) 11 | from typing import TypeVar 12 | 13 | from pgbelt.config import get_all_configs_async 14 | from pgbelt.config import get_config_async 15 | from typer import Argument 16 | from typer import Typer 17 | 18 | 19 | T = TypeVar("T") 20 | 21 | 22 | def run_with_configs( 23 | decorated_func: Callable[..., Awaitable[Optional[T]]] = None, 24 | skip_src: bool = False, 25 | skip_dst: bool = False, 26 | results_callback: Optional[Callable[[list[T]], Awaitable[Optional[Any]]]] = None, 27 | ) -> Callable: 28 | """ 29 | Decorator for async commands. Implementations should take one Awaitable[DbupgradeConfig] arg 30 | and do some operation on the databases in it. This wrapper handles looking up the 31 | config and executing the command. The decorated result can be run either on one db only 32 | or on the entire datacenter concurrently. 33 | 34 | You may also provide a callback that will be called on the results of the command. Useful 35 | for displaying the output of interrogative commands. 36 | """ 37 | 38 | def decorator(func): 39 | if skip_src and skip_dst: 40 | func.__doc__ += ( 41 | "\n\n Can be run with both src and dst set null in the config file." 42 | ) 43 | elif skip_src: 44 | func.__doc__ += "\n\n Can be run with a null src in the config file." 45 | elif skip_dst: 46 | func.__doc__ += "\n\n Can be run with a null dst in the config file." 47 | else: 48 | func.__doc__ += ( 49 | "\n\n Requires both src and dst to be not null in the config file." 50 | ) 51 | 52 | # The name, docstring, and signature of the implementation is preserved. Important for add_command 53 | @wraps(func) 54 | async def wrapper(dc: str, db: Optional[str], **kwargs): 55 | # If db is specified we only want to run on one of them 56 | if db is not None: 57 | results = [ 58 | await func( 59 | get_config_async(db, dc, skip_src=skip_src, skip_dst=skip_dst), 60 | **kwargs 61 | ) 62 | ] 63 | else: 64 | # if the db is not provided run on all the dbs in the dc 65 | results = await gather( 66 | *[ 67 | func(fut, **kwargs) 68 | async for fut in get_all_configs_async( 69 | dc, skip_src=skip_src, skip_dst=skip_dst 70 | ) 71 | ] 72 | ) 73 | 74 | # Call the callback if provided. 75 | if results_callback is None: 76 | return results 77 | return await results_callback(results) 78 | 79 | return wrapper 80 | 81 | # makes either @decorator or @decorator(...) work to decorate a function 82 | if decorated_func is None: 83 | return decorator 84 | return decorator(decorated_func) 85 | 86 | 87 | def add_command(app: Typer, command: Callable): 88 | """ 89 | Helper which attaches a function to the given typer app. Merges 90 | the signature of the underlying implementation with standard arguments. 91 | This allows command options to be defined on the implementation and all 92 | commands in belt to share common arguments defined in one place. 93 | 94 | If a command is async, then it may be run on all dbs in a dc concurrently. 95 | Otherwise we assume it only makes sense to run it on one. 96 | """ 97 | # Give typer the name of the actual implementing function 98 | name = command.__name__.replace("_", "-") 99 | 100 | # If async assume command can be run on a whole datacenter and make db optional 101 | if iscoroutinefunction(command): 102 | 103 | @app.command(name=name) 104 | def cmdwrapper(dc: str, db: Optional[str] = Argument(None), **kwargs): 105 | run(command(dc, db, **kwargs)) 106 | 107 | # Synchronous commands can only be run on one db at a time 108 | else: 109 | 110 | @app.command(name=name) 111 | def cmdwrapper(dc: str, db: str, **kwargs): 112 | command(db, dc, **kwargs) 113 | 114 | # remove the **kwargs since typer doesn't do anything with it 115 | wrap_signature = signature(cmdwrapper) 116 | wrap_params = wrap_signature.parameters.copy() 117 | wrap_params.popitem() 118 | 119 | # Remove any args without defaults from the implementation's signature 120 | # so we are left with only what typer interprets as options 121 | cmd_params = signature(command).parameters.copy() 122 | cmd_params_copy = cmd_params.copy() 123 | while ( 124 | cmd_params_copy 125 | and cmd_params_copy.popitem(last=False)[1].default is Parameter.empty 126 | ): 127 | cmd_params.popitem(last=False) 128 | 129 | # merge the arguments from the wrapper with the options from the implementation 130 | wrap_params.update(cmd_params) 131 | 132 | # set the signature for typer to read 133 | cmdwrapper.__signature__ = wrap_signature.replace(parameters=wrap_params.values()) 134 | 135 | # the docstring on the implementation will be used as typer help 136 | cmdwrapper.__doc__ = command.__doc__ 137 | if iscoroutinefunction(command): 138 | cmdwrapper.__doc__ += ( 139 | "\n\n If the db name is not given run on all dbs in the dc." 140 | ) 141 | -------------------------------------------------------------------------------- /pgbelt/cmd/login.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from collections.abc import Awaitable 3 | from logging import Logger 4 | 5 | from asyncpg import create_pool 6 | from asyncpg import Pool 7 | from pgbelt.cmd.helpers import run_with_configs 8 | from pgbelt.config.models import DbConfig 9 | from pgbelt.config.models import DbupgradeConfig 10 | from pgbelt.config.models import User 11 | from pgbelt.util import get_logger 12 | from pgbelt.util.postgres import disable_login_users 13 | from pgbelt.util.postgres import enable_login_users 14 | from pgbelt.util.postgres import get_login_users 15 | 16 | 17 | # TODO this should be configurable, add a PgbeltConfig or something 18 | NO_DISABLE = [ 19 | "pglogical", 20 | "postgres", 21 | "rdsadmin", 22 | "vividcortexsu", 23 | "vividcortex", 24 | "fivetran", 25 | "datadog", 26 | "rdsrepladmin", 27 | "monitoring", 28 | ] 29 | 30 | 31 | async def _populate_logins(dbconf: DbConfig, pool: Pool, logger: Logger) -> None: 32 | all_logins = await get_login_users(pool, logger) 33 | exclude = [ 34 | dbconf.root_user.name, 35 | dbconf.owner_user.name, 36 | dbconf.pglogical_user.name, 37 | ] 38 | dbconf.other_users = [User(name=n) for n in all_logins if n not in exclude] 39 | 40 | 41 | @run_with_configs(skip_dst=True) 42 | async def revoke_logins(config_future: Awaitable[DbupgradeConfig]) -> None: 43 | """ 44 | Discovers all users in the db who can log in, saves them in the config file, 45 | then revokes their permission to log in. Use this command to ensure that all 46 | writes to the source database have been stopped before syncing sequence values 47 | and tables without primary keys. 48 | """ 49 | conf = await config_future 50 | logger = get_logger(conf.db, conf.dc, "login.src") 51 | 52 | async with create_pool(conf.src.root_uri, min_size=1) as pool: 53 | save_task = None 54 | if conf.src.other_users is None: 55 | await _populate_logins(conf.src, pool, logger) 56 | save_task = asyncio.create_task(conf.save()) 57 | 58 | to_disable = [] 59 | # Sometimes the owner user is the same as the root user. 60 | # When that happens, we don't want to disable the owner user. 61 | # If the owner user is different, we want to disable the owner user. 62 | if conf.src.owner_user.name != conf.src.root_user.name: 63 | to_disable.append(conf.src.owner_user.name) 64 | 65 | if conf.src.other_users is not None: 66 | to_disable += [ 67 | u.name for u in conf.src.other_users if u.name not in NO_DISABLE 68 | ] 69 | 70 | try: 71 | await disable_login_users(pool, to_disable, logger) 72 | finally: 73 | if save_task is not None: 74 | await save_task 75 | 76 | 77 | @run_with_configs(skip_dst=True) 78 | async def restore_logins(config_future: Awaitable[DbupgradeConfig]) -> None: 79 | """ 80 | Grant permission to log in for any user present in the config file. The user 81 | must already have a password. This will not generate or modify existing 82 | passwords for users. 83 | 84 | Intended to be used after revoke-logins in case a rollback is required. 85 | """ 86 | conf = await config_future 87 | logger = get_logger(conf.db, conf.dc, "login.src") 88 | to_enable = [conf.src.owner_user.name] 89 | 90 | if conf.src.other_users is not None: 91 | to_enable += [u.name for u in conf.src.other_users if u.name not in NO_DISABLE] 92 | 93 | async with create_pool(conf.src.root_uri, min_size=1) as pool: 94 | await enable_login_users(pool, to_enable, logger) 95 | 96 | 97 | COMMANDS = [ 98 | revoke_logins, 99 | restore_logins, 100 | ] 101 | -------------------------------------------------------------------------------- /pgbelt/cmd/schema.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Awaitable 2 | from asyncpg import create_pool 3 | 4 | from pgbelt.cmd.helpers import run_with_configs 5 | from pgbelt.config.models import DbupgradeConfig 6 | from pgbelt.util.dump import apply_target_constraints 7 | from pgbelt.util.dump import apply_target_schema 8 | from pgbelt.util.dump import create_target_indexes 9 | from pgbelt.util.dump import dump_source_schema 10 | from pgbelt.util.dump import remove_dst_not_valid_constraints 11 | from pgbelt.util.dump import remove_dst_indexes 12 | from pgbelt.util.logs import get_logger 13 | from pgbelt.util.postgres import run_analyze 14 | 15 | 16 | @run_with_configs 17 | async def dump_schema(config_future: Awaitable[DbupgradeConfig]) -> None: 18 | """ 19 | Dumps and sanitizes the schema from the source database, then saves it to 20 | a file. Four files will be generated: 21 | 1. The entire sanitized schema 22 | 2. The schema with all NOT VALID constraints and CREATE INDEX statements removed, 23 | 3. A file that contains only the CREATE INDEX statements 24 | 4. A file that contains only the NOT VALID constraints 25 | These files will be saved in the schemas directory. 26 | """ 27 | conf = await config_future 28 | logger = get_logger(conf.db, conf.dc, "schema.src") 29 | await dump_source_schema(conf, logger) 30 | 31 | 32 | @run_with_configs(skip_src=True) 33 | async def load_schema(config_future: Awaitable[DbupgradeConfig]) -> None: 34 | """ 35 | Loads the sanitized schema from the file schemas/dc/db/no_invalid_constraints.sql 36 | into the destination as the owner user. 37 | 38 | Invalid constraints are omitted because the source database may contain data 39 | that was created before the constraint was added. Loading the constraints into 40 | the destination before the data will cause replication to fail. 41 | """ 42 | conf = await config_future 43 | logger = get_logger(conf.db, conf.dc, "schema.dst") 44 | await apply_target_schema(conf, logger) 45 | 46 | 47 | @run_with_configs(skip_src=True) 48 | async def load_constraints(config_future: Awaitable[DbupgradeConfig]) -> None: 49 | """ 50 | Loads the NOT VALID constraints from the file schemas/dc/db/invalid_constraints.sql 51 | into the destination as the owner user. This must only be done after all data is 52 | synchronized from the source to the destination database. 53 | """ 54 | conf = await config_future 55 | logger = get_logger(conf.db, conf.dc, "schema.dst") 56 | await apply_target_constraints(conf, logger) 57 | 58 | 59 | @run_with_configs(skip_src=True) 60 | async def remove_constraints(config_future: Awaitable[DbupgradeConfig]) -> None: 61 | """ 62 | Removes NOT VALID constraints from the target database. This must be done 63 | before setting up replication, and should only be used if the schema in the 64 | target database was loaded outside of pgbelt. 65 | """ 66 | conf = await config_future 67 | logger = get_logger(conf.db, conf.dc, "schema.dst") 68 | await remove_dst_not_valid_constraints(conf, logger) 69 | 70 | 71 | @run_with_configs(skip_src=True) 72 | async def remove_indexes(config_future: Awaitable[DbupgradeConfig]) -> None: 73 | """ 74 | Removes indexes from the target database. This must be done 75 | before setting up replication, and should only be used if the schema in the 76 | target database was loaded outside of pgbelt. 77 | """ 78 | conf = await config_future 79 | logger = get_logger(conf.db, conf.dc, "schema.dst") 80 | await remove_dst_indexes(conf, logger) 81 | 82 | 83 | @run_with_configs(skip_src=True) 84 | async def create_indexes(config_future: Awaitable[DbupgradeConfig]) -> None: 85 | """ 86 | Creates indexes from the file schemas/dc/db/indexes.sql into the destination 87 | as the owner user. This must only be done after most data is synchronized 88 | (at minimum after the initializing phase) from the source to the destination 89 | database. 90 | 91 | After creating indexes, the destination database should be analyzed to ensure 92 | the query planner has the most up-to-date statistics for the indexes. 93 | """ 94 | conf = await config_future 95 | logger = get_logger(conf.db, conf.dc, "schema.dst") 96 | await create_target_indexes(conf, logger, during_sync=False) 97 | 98 | # Run ANALYZE after creating indexes (without statement timeout) 99 | async with create_pool( 100 | conf.dst.root_uri, 101 | min_size=1, 102 | server_settings={ 103 | "statement_timeout": "0", 104 | }, 105 | ) as dst_pool: 106 | await run_analyze(dst_pool, logger) 107 | 108 | 109 | COMMANDS = [ 110 | dump_schema, 111 | load_schema, 112 | load_constraints, 113 | remove_constraints, 114 | remove_indexes, 115 | create_indexes, 116 | ] 117 | -------------------------------------------------------------------------------- /pgbelt/cmd/setup.py: -------------------------------------------------------------------------------- 1 | from asyncio import create_task 2 | from asyncio import gather 3 | from collections.abc import Awaitable 4 | from logging import Logger 5 | 6 | from asyncpg import create_pool 7 | from asyncpg import Pool 8 | from pgbelt.cmd.helpers import run_with_configs 9 | from pgbelt.config.models import DbupgradeConfig 10 | from pgbelt.util.dump import apply_target_schema 11 | from pgbelt.util.dump import dump_source_schema 12 | from pgbelt.util.logs import get_logger 13 | from pgbelt.util.pglogical import configure_node 14 | from pgbelt.util.pglogical import configure_pgl 15 | from pgbelt.util.pglogical import configure_replication_set 16 | from pgbelt.util.pglogical import configure_subscription 17 | from pgbelt.util.pglogical import grant_pgl 18 | from pgbelt.util.postgres import analyze_table_pkeys 19 | from typer import Option 20 | 21 | 22 | async def _dump_and_load_schema( 23 | conf: DbupgradeConfig, src_logger: Logger, dst_logger: Logger 24 | ) -> None: 25 | await dump_source_schema(conf, src_logger) 26 | await apply_target_schema(conf, dst_logger) 27 | 28 | 29 | async def _setup_src_node( 30 | conf: DbupgradeConfig, src_root_pool: Pool, src_logger: Logger 31 | ) -> None: 32 | """ 33 | Configure the pglogical node and replication set on the Source database. 34 | """ 35 | 36 | await configure_node(src_root_pool, "pg1", conf.src.pglogical_dsn, src_logger) 37 | async with create_pool(conf.src.pglogical_uri, min_size=1) as src_pglogical_pool: 38 | pkey_tables, _, _ = await analyze_table_pkeys( 39 | src_pglogical_pool, conf.schema_name, src_logger 40 | ) 41 | 42 | pglogical_tables = pkey_tables 43 | if conf.tables: 44 | pglogical_tables = [t for t in pkey_tables if t in conf.tables] 45 | 46 | # Intentionally throw an error if no tables are found, so that the user can correct their config. 47 | # When reported by a certain user, errors showed when running the status command, but it was ignored, 48 | # then the user ran setup and since that DIDN'T throw an error, they assumed everything was fine. 49 | 50 | if not pglogical_tables: 51 | raise ValueError( 52 | f"No tables were targeted to replicate. Please check your config's schema and tables. DB: {conf.db} DC: {conf.dc}, SCHEMA: {conf.schema_name} TABLES: {conf.tables}.\nIf TABLES is [], all tables in the schema should be replicated, but pgbelt still found no tables.\nCheck the schema name or reach out to the pgbelt team for help." 53 | ) 54 | 55 | await configure_replication_set( 56 | src_root_pool, pglogical_tables, conf.schema_name, src_logger 57 | ) 58 | 59 | 60 | @run_with_configs 61 | async def setup( 62 | config_future: Awaitable[DbupgradeConfig], 63 | schema: bool = Option(True, help="Copy the schema?"), 64 | ) -> None: 65 | """ 66 | Configures pglogical to replicate all compatible tables from the source 67 | to the destination db. This includes copying the database schema from the 68 | source into the destination. 69 | 70 | If you want to set up the schema in the destination db manually you can use 71 | the --no-schema option to stop this from happening. 72 | """ 73 | conf = await config_future 74 | pools = await gather( 75 | create_pool(conf.src.root_uri, min_size=1), 76 | create_pool(conf.src.owner_uri, min_size=1), 77 | create_pool(conf.dst.root_uri, min_size=1), 78 | create_pool(conf.dst.owner_uri, min_size=1), 79 | ) 80 | 81 | src_root_pool, src_owner_pool, dst_root_pool, dst_owner_pool = pools 82 | try: 83 | src_logger = get_logger(conf.db, conf.dc, "setup.src") 84 | dst_logger = get_logger(conf.db, conf.dc, "setup.dst") 85 | 86 | # Configure Source for pglogical (before we can configure the plugin) 87 | await configure_pgl( 88 | src_root_pool, 89 | conf.src.pglogical_user.pw, 90 | src_logger, 91 | conf.src.owner_user.name, 92 | ) 93 | await grant_pgl(src_owner_pool, conf.tables, conf.schema_name, src_logger) 94 | 95 | # Load schema into destination 96 | schema_load_task = None 97 | if schema: 98 | schema_load_task = create_task( 99 | _dump_and_load_schema(conf, src_logger, dst_logger) 100 | ) 101 | 102 | # Configure Pglogical plugin on Source 103 | src_node_task = create_task(_setup_src_node(conf, src_root_pool, src_logger)) 104 | 105 | # We need to wait for the schema to exist in the target before setting up pglogical there 106 | if schema_load_task is not None: 107 | await schema_load_task 108 | 109 | # Configure Destination for pglogical (before we can configure the plugin) 110 | await configure_pgl( 111 | dst_root_pool, 112 | conf.dst.pglogical_user.pw, 113 | dst_logger, 114 | conf.dst.owner_user.name, 115 | ) 116 | await grant_pgl(dst_owner_pool, conf.tables, conf.schema_name, dst_logger) 117 | 118 | # Also configure the node on the destination... of itself. #TODO: This is a bit weird, confirm if this is necessary. 119 | await configure_node(dst_root_pool, "pg2", conf.dst.pglogical_dsn, dst_logger) 120 | 121 | # The source node must be set up before we create a subscription 122 | await src_node_task 123 | await configure_subscription( 124 | dst_root_pool, "pg1_pg2", conf.src.pglogical_dsn, dst_logger 125 | ) 126 | finally: 127 | await gather(*[p.close() for p in pools]) 128 | 129 | 130 | @run_with_configs 131 | async def setup_back_replication(config_future: Awaitable[DbupgradeConfig]) -> None: 132 | """ 133 | Configures pglogical to replicate all compatible tables from the destination 134 | to the source db. Can only complete successfully after the initial load phase 135 | is completed for replication from the source to target. 136 | 137 | Back replication ensures that dataloss does not occur if a rollback is required 138 | after applications are allowed to begin writing data into the destination db. 139 | """ 140 | conf = await config_future 141 | pools = await gather( 142 | create_pool(conf.src.root_uri, min_size=1), 143 | create_pool(conf.src.pglogical_uri, min_size=1), 144 | create_pool(conf.dst.root_uri, min_size=1), 145 | ) 146 | src_root_pool, src_pglogical_pool, dst_root_pool = pools 147 | 148 | try: 149 | src_logger = get_logger(conf.db, conf.dc, "setup.src") 150 | pkeys, _, _ = await analyze_table_pkeys( 151 | src_pglogical_pool, conf.schema_name, src_logger 152 | ) 153 | dst_logger = get_logger(conf.db, conf.dc, "setup.src") 154 | 155 | pglogical_tables = pkeys 156 | if conf.tables: 157 | pglogical_tables = [t for t in pkeys if t in conf.tables] 158 | 159 | await configure_replication_set( 160 | dst_root_pool, pglogical_tables, conf.schema_name, dst_logger 161 | ) 162 | await configure_subscription( 163 | src_root_pool, "pg2_pg1", conf.dst.pglogical_dsn, src_logger 164 | ) 165 | finally: 166 | await gather(*[p.close() for p in pools]) 167 | 168 | 169 | COMMANDS = [ 170 | setup, 171 | setup_back_replication, 172 | ] 173 | -------------------------------------------------------------------------------- /pgbelt/cmd/status.py: -------------------------------------------------------------------------------- 1 | from asyncio import gather 2 | from collections.abc import Awaitable 3 | 4 | from asyncpg import create_pool 5 | from pgbelt.cmd.helpers import run_with_configs 6 | from pgbelt.config.models import DbupgradeConfig 7 | from pgbelt.util import get_logger 8 | from pgbelt.util.pglogical import dst_status 9 | from pgbelt.util.pglogical import src_status 10 | from pgbelt.util.postgres import initialization_progress 11 | from pgbelt.util.postgres import analyze_table_pkeys 12 | from tabulate import tabulate 13 | from typer import echo 14 | from typer import style 15 | 16 | 17 | async def _print_status_table(results: list[dict[str, str]]) -> list[list[str]]: 18 | table = [ 19 | [ 20 | style("database", "yellow"), 21 | style("src -> dst", "yellow"), 22 | style("src <- dst", "yellow"), 23 | style("sent_lag", "yellow"), 24 | style("flush_lag", "yellow"), 25 | style("write_lag", "yellow"), 26 | style("replay_lag", "yellow"), 27 | style("src_dataset_size", "yellow"), 28 | style("dst_dataset_size", "yellow"), 29 | style("progress", "yellow"), 30 | ] 31 | ] 32 | 33 | results.sort(key=lambda d: d["db"]) 34 | 35 | for r in results: 36 | table.append( 37 | [ 38 | style(r["db"], "green"), 39 | style( 40 | r["pg1_pg2"], "green" if r["pg1_pg2"] == "replicating" else "red" 41 | ), 42 | style( 43 | r["pg2_pg1"], "green" if r["pg2_pg1"] == "replicating" else "red" 44 | ), 45 | style(r["sent_lag"], "green" if r["sent_lag"] == "0" else "red"), 46 | style(r["flush_lag"], "green" if r["flush_lag"] == "0" else "red"), 47 | style(r["write_lag"], "green" if r["write_lag"] == "0" else "red"), 48 | style(r["replay_lag"], "green" if r["replay_lag"] == "0" else "red"), 49 | style(r["src_dataset_size"], "green"), 50 | style(r["dst_dataset_size"], "green"), 51 | style(r["progress"], "green"), 52 | ] 53 | ) 54 | 55 | echo(tabulate(table, headers="firstrow")) 56 | 57 | return table 58 | 59 | 60 | @run_with_configs(results_callback=_print_status_table) 61 | async def status(conf_future: Awaitable[DbupgradeConfig]) -> dict[str, str]: 62 | """ 63 | Print out a table of status information for one or all of the dbs in a datacenter. 64 | Contains the pglogical replication status for both directions of replication and 65 | replication lag data for forward replication. Possible replication statuses are as 66 | follows: 67 | 68 | unconfigured: No replication has been set up in this direction yet. 69 | 70 | initializing: Pglogical is performing an initial data dump to bring the follower up to speed. 71 | You can not begin replication in the opposite direction during this stage. 72 | 73 | replicating: Pglogical is replicating only net new writes in this direction. 74 | 75 | down: Pglogical has encountered an error and has stopped replicating entirely. 76 | Check the postgres logs on both dbs to determine the cause. 77 | """ 78 | conf = await conf_future 79 | src_logger = get_logger(conf.db, conf.dc, "status.src") 80 | dst_logger = get_logger(conf.db, conf.dc, "status.dst") 81 | 82 | pools = await gather( 83 | create_pool(dsn=conf.src.root_uri, min_size=1), 84 | create_pool(dsn=conf.dst.root_uri, min_size=1), 85 | ) 86 | src_pool, dst_pool = pools 87 | 88 | # Get the list of targeted tables by first getting all tables, then filtering whatever is in the config. 89 | pkey_tables, non_pkey_tables, _ = await analyze_table_pkeys( 90 | src_pool, conf.schema_name, src_logger 91 | ) 92 | all_tables = pkey_tables + non_pkey_tables 93 | target_tables = all_tables 94 | if conf.tables: 95 | target_tables = [t for t in all_tables if t in conf.tables] 96 | 97 | if not target_tables: 98 | raise ValueError( 99 | f"Targeted tables not found in the source database. Please check your config's schema and tables. DB: {conf.db} DC: {conf.dc}, SCHEMA: {conf.schema_name} TABLES: {conf.tables}." 100 | ) 101 | 102 | try: 103 | result = await gather( 104 | src_status(src_pool, src_logger), 105 | dst_status(dst_pool, dst_logger), 106 | initialization_progress( 107 | target_tables, 108 | conf.schema_name, 109 | conf.schema_name, 110 | src_pool, 111 | dst_pool, 112 | src_logger, 113 | dst_logger, 114 | ), 115 | ) 116 | 117 | result[0].update(result[1]) 118 | result[0]["db"] = conf.db 119 | 120 | # We should hide the progress in the following cases: 121 | # 1. When src -> dst is replicating and dst -> src is any state (replicating, unconfigured, down) 122 | # a. We do this because the size when done still will be a tad smaller than SRC, showing <100% 123 | # 2. When src -> dst is unconfigured and dst -> src is replicating (not down or unconfigured) 124 | # a. We do this because reverse-only occurs at the start of cutover and onwards, and seeing the progress at that stage is not useful. 125 | if (result[0]["pg1_pg2"] == "replicating") or ( # 1 126 | result[0]["pg1_pg2"] == "unconfigured" 127 | and result[0]["pg2_pg1"] == "replicating" 128 | ): # 2 129 | result[2]["src_dataset_size"] = "n/a" 130 | result[2]["dst_dataset_size"] = "n/a" 131 | result[2]["progress"] = "n/a" 132 | 133 | result[0].update(result[2]) 134 | return result[0] 135 | finally: 136 | await gather(*[p.close() for p in pools]) 137 | 138 | 139 | COMMANDS = [status] 140 | -------------------------------------------------------------------------------- /pgbelt/cmd/sync.py: -------------------------------------------------------------------------------- 1 | from asyncio import gather 2 | from collections.abc import Awaitable 3 | from logging import Logger 4 | 5 | from asyncpg import create_pool 6 | from asyncpg import Pool 7 | from pgbelt.cmd.helpers import run_with_configs 8 | from pgbelt.config.models import DbupgradeConfig 9 | from pgbelt.util.dump import apply_target_constraints 10 | from pgbelt.util.dump import create_target_indexes 11 | from pgbelt.util.dump import dump_source_tables 12 | from pgbelt.util.dump import load_dumped_tables 13 | from pgbelt.util.logs import get_logger 14 | from pgbelt.util.postgres import analyze_table_pkeys 15 | from pgbelt.util.postgres import compare_100_rows 16 | from pgbelt.util.postgres import compare_latest_100_rows 17 | from pgbelt.util.postgres import dump_sequences 18 | from pgbelt.util.postgres import load_sequences 19 | from pgbelt.util.postgres import run_analyze 20 | from typer import Option 21 | 22 | 23 | async def _sync_sequences( 24 | targeted_sequences: list[str], 25 | schema: str, 26 | src_pool: Pool, 27 | dst_pool: Pool, 28 | src_logger: Logger, 29 | dst_logger: Logger, 30 | ) -> None: 31 | 32 | seq_vals = await dump_sequences(src_pool, targeted_sequences, schema, src_logger) 33 | await load_sequences(dst_pool, seq_vals, schema, dst_logger) 34 | 35 | 36 | @run_with_configs 37 | async def sync_sequences(config_future: Awaitable[DbupgradeConfig]) -> None: 38 | """ 39 | Retrieve the current value of all sequences in the source database and update 40 | the sequences in the target to match. 41 | """ 42 | conf = await config_future 43 | pools = await gather( 44 | create_pool(conf.src.pglogical_uri, min_size=1), 45 | create_pool(conf.dst.root_uri, min_size=1), 46 | ) 47 | src_pool, dst_pool = pools 48 | try: 49 | src_logger = get_logger(conf.db, conf.dc, "sync.src") 50 | dst_logger = get_logger(conf.db, conf.dc, "sync.dst") 51 | await _sync_sequences( 52 | conf.sequences, conf.schema_name, src_pool, dst_pool, src_logger, dst_logger 53 | ) 54 | finally: 55 | await gather(*[p.close() for p in pools]) 56 | 57 | 58 | @run_with_configs(skip_dst=True) 59 | async def dump_tables( 60 | config_future: Awaitable[DbupgradeConfig], 61 | tables: list[str] = Option([], help="Specific tables to dump"), 62 | ) -> None: 63 | """ 64 | Dump all tables without primary keys from the source database and save 65 | them to files locally. 66 | 67 | You may also provide a list of tables to dump with the 68 | --tables option and only these tables will be dumped. 69 | """ 70 | conf = await config_future 71 | logger = get_logger(conf.db, conf.dc, "sync.src") 72 | 73 | if tables: 74 | tables = tables.split(",") 75 | else: 76 | async with create_pool(conf.src.pglogical_uri, min_size=1) as src_pool: 77 | _, tables, _ = await analyze_table_pkeys(src_pool, conf.schema_name, logger) 78 | 79 | if conf.tables: 80 | tables = [t for t in tables if t in conf.tables] 81 | 82 | await dump_source_tables(conf, tables, logger) 83 | 84 | 85 | @run_with_configs(skip_src=True) 86 | async def load_tables( 87 | config_future: Awaitable[DbupgradeConfig], 88 | tables: list[str] = Option([], help="Specific tables to load"), 89 | ): 90 | """ 91 | Load all locally saved table data files into the destination db. A table will 92 | only be loaded into the destination if it currently contains no rows. 93 | 94 | You may also provide a list of tables to load with the 95 | --tables option and only these files will be loaded. 96 | """ 97 | conf = await config_future 98 | logger = get_logger(conf.db, conf.dc, "sync.dst") 99 | 100 | if tables: 101 | tables = tables.split(",") 102 | else: 103 | if conf.tables: 104 | tables = [t for t in tables if t in conf.tables] 105 | else: 106 | tables = [] 107 | 108 | await load_dumped_tables(conf, tables, logger) 109 | 110 | 111 | @run_with_configs(skip_src=True) 112 | async def analyze(config_future: Awaitable[DbupgradeConfig]) -> None: 113 | """ 114 | Run ANALYZE in the destination database. This should be run after data is 115 | completely replicated and before applications are allowed to use the new db. 116 | """ 117 | conf = await config_future 118 | logger = get_logger(conf.db, conf.dc, "sync.dst") 119 | async with create_pool( 120 | conf.dst.root_uri, 121 | min_size=1, 122 | server_settings={ 123 | "statement_timeout": "0", 124 | }, 125 | ) as dst_pool: 126 | await run_analyze(dst_pool, logger) 127 | 128 | 129 | @run_with_configs 130 | async def validate_data(config_future: Awaitable[DbupgradeConfig]) -> None: 131 | """ 132 | Compares data in the source and target databases. Both a random sample and a 133 | sample of the latest rows will be compared for each table. Does not validate 134 | the entire data set. 135 | """ 136 | conf = await config_future 137 | pools = await gather( 138 | create_pool(conf.src.pglogical_uri, min_size=1), 139 | create_pool(conf.dst.owner_uri, min_size=1), 140 | ) 141 | src_pool, dst_pool = pools 142 | 143 | try: 144 | logger = get_logger(conf.db, conf.dc, "sync") 145 | await gather( 146 | compare_100_rows(src_pool, dst_pool, conf.tables, conf.schema_name, logger), 147 | compare_latest_100_rows( 148 | src_pool, dst_pool, conf.tables, conf.schema_name, logger 149 | ), 150 | ) 151 | finally: 152 | await gather(*[p.close() for p in pools]) 153 | 154 | 155 | async def _dump_and_load_all_tables( 156 | conf: DbupgradeConfig, src_pool: Pool, src_logger: Logger, dst_logger: Logger 157 | ) -> None: 158 | _, tables, _ = await analyze_table_pkeys(src_pool, conf.schema_name, src_logger) 159 | if conf.tables: 160 | tables = [t for t in tables if t in conf.tables] 161 | await dump_source_tables(conf, tables, src_logger) 162 | await load_dumped_tables(conf, tables, dst_logger) 163 | 164 | 165 | @run_with_configs 166 | async def sync( 167 | config_future: Awaitable[DbupgradeConfig], no_schema: bool = False 168 | ) -> None: 169 | """ 170 | Sync and validate all data that is not replicated with pglogical. This includes all 171 | tables without primary keys and all sequences. Also loads any previously omitted 172 | NOT VALID constraints into the destination db and runs ANALYZE in the destination. 173 | 174 | This command is equivalent to running the following commands in order: 175 | sync-sequences, sync-tables, validate-data, load-constraints, analyze. 176 | Though here they may run concurrently when possible. 177 | """ 178 | conf = await config_future 179 | pools = await gather( 180 | create_pool(conf.src.pglogical_uri, min_size=1), 181 | create_pool(conf.dst.root_uri, min_size=1), 182 | create_pool(conf.dst.owner_uri, min_size=1), 183 | create_pool( 184 | conf.dst.root_uri, 185 | min_size=1, 186 | server_settings={ 187 | "statement_timeout": "0", 188 | }, 189 | ), 190 | ) 191 | src_pool, dst_root_pool, dst_owner_pool, dst_root_no_timeout_pool = pools 192 | 193 | try: 194 | src_logger = get_logger(conf.db, conf.dc, "sync.src") 195 | dst_logger = get_logger(conf.db, conf.dc, "sync.dst") 196 | validation_logger = get_logger(conf.db, conf.dc, "sync") 197 | 198 | await gather( 199 | _sync_sequences( 200 | conf.sequences, 201 | conf.schema_name, 202 | src_pool, 203 | dst_root_pool, 204 | src_logger, 205 | dst_logger, 206 | ), 207 | _dump_and_load_all_tables(conf, src_pool, src_logger, dst_logger), 208 | ) 209 | 210 | # Creating indexes should run before validations and ANALYZE, but after all the data exists 211 | # in the destination database. 212 | 213 | # Do not load NOT VALID constraints or create INDEXes for exodus-style migrations 214 | if not no_schema: 215 | await gather( 216 | apply_target_constraints(conf, dst_logger), 217 | create_target_indexes(conf, dst_logger, during_sync=True), 218 | ) 219 | 220 | await gather( 221 | compare_100_rows( 222 | src_pool, 223 | dst_owner_pool, 224 | conf.tables, 225 | conf.schema_name, 226 | validation_logger, 227 | ), 228 | compare_latest_100_rows( 229 | src_pool, 230 | dst_owner_pool, 231 | conf.tables, 232 | conf.schema_name, 233 | validation_logger, 234 | ), 235 | run_analyze(dst_root_no_timeout_pool, dst_logger), 236 | ) 237 | finally: 238 | await gather(*[p.close() for p in pools]) 239 | 240 | 241 | COMMANDS = [ 242 | sync_sequences, 243 | dump_tables, 244 | load_tables, 245 | analyze, 246 | validate_data, 247 | sync, 248 | ] 249 | -------------------------------------------------------------------------------- /pgbelt/cmd/teardown.py: -------------------------------------------------------------------------------- 1 | from asyncio import gather 2 | from asyncio import sleep 3 | from collections.abc import Awaitable 4 | 5 | from asyncpg import create_pool 6 | from pgbelt.cmd.helpers import run_with_configs 7 | from pgbelt.config.models import DbupgradeConfig 8 | from pgbelt.util.logs import get_logger 9 | from pgbelt.util.pglogical import revoke_pgl 10 | from pgbelt.util.pglogical import teardown_node 11 | from pgbelt.util.pglogical import teardown_pgl 12 | from pgbelt.util.pglogical import teardown_replication_set 13 | from pgbelt.util.pglogical import teardown_subscription 14 | from typer import Option 15 | 16 | 17 | @run_with_configs(skip_dst=True) 18 | async def teardown_back_replication(config_future: Awaitable[DbupgradeConfig]): 19 | """ 20 | Stops pglogical replication from the destination database to the source. 21 | You should only do this once you are certain a rollback will not be required. 22 | """ 23 | conf = await config_future 24 | async with create_pool(conf.src.root_uri, min_size=1) as src_pool: 25 | logger = get_logger(conf.db, conf.dc, "teardown.src") 26 | await teardown_subscription(src_pool, "pg2_pg1", logger) 27 | 28 | 29 | @run_with_configs(skip_src=True) 30 | async def teardown_forward_replication(config_future: Awaitable[DbupgradeConfig]): 31 | """ 32 | Stops pglogical replication from the source database to the destination. 33 | This should be done during your migration downtime before writes are allowed 34 | to the destination. 35 | """ 36 | conf = await config_future 37 | async with create_pool(conf.dst.root_uri, min_size=1) as dst_pool: 38 | logger = get_logger(conf.db, conf.dc, "teardown.dst") 39 | await teardown_subscription(dst_pool, "pg1_pg2", logger) 40 | 41 | 42 | @run_with_configs 43 | async def teardown( 44 | config_future: Awaitable[DbupgradeConfig], 45 | full: bool = Option(False, help="Remove pglogical extension"), 46 | ): 47 | """ 48 | Removes all pglogical configuration from both databases. If any replication is 49 | configured this will stop it. It will also drop the pglogical user. 50 | 51 | If run with --full the pglogical extension will be dropped. 52 | 53 | WARNING: running with --full may cause the database to lock up. You should be 54 | prepared to reboot the database if you do this. 55 | """ 56 | conf = await config_future 57 | pools = await gather( 58 | create_pool(conf.src.root_uri, min_size=1), 59 | create_pool(conf.dst.root_uri, min_size=1), 60 | ) 61 | src_root_pool, dst_root_pool = pools 62 | 63 | try: 64 | src_logger = get_logger(conf.db, conf.dc, "teardown.src") 65 | dst_logger = get_logger(conf.db, conf.dc, "teardown.dst") 66 | 67 | await gather( 68 | teardown_subscription(src_root_pool, "pg2_pg1", src_logger), 69 | teardown_subscription(dst_root_pool, "pg1_pg2", dst_logger), 70 | ) 71 | 72 | await gather( 73 | teardown_replication_set(src_root_pool, src_logger), 74 | teardown_replication_set(dst_root_pool, dst_logger), 75 | ) 76 | await sleep(15) 77 | 78 | await gather( 79 | teardown_node(src_root_pool, "pg1", src_logger), 80 | teardown_node(dst_root_pool, "pg2", dst_logger), 81 | ) 82 | await gather( 83 | revoke_pgl(src_root_pool, conf.tables, conf.schema_name, src_logger), 84 | revoke_pgl(dst_root_pool, conf.tables, conf.schema_name, dst_logger), 85 | ) 86 | 87 | if full: 88 | await sleep(15) 89 | 90 | await gather( 91 | teardown_pgl(src_root_pool, src_logger), 92 | teardown_pgl(dst_root_pool, dst_logger), 93 | ) 94 | finally: 95 | await gather(*[p.close() for p in pools]) 96 | 97 | 98 | COMMANDS = [teardown_back_replication, teardown_forward_replication, teardown] 99 | -------------------------------------------------------------------------------- /pgbelt/config/__init__.py: -------------------------------------------------------------------------------- 1 | from pgbelt.config.config import * 2 | -------------------------------------------------------------------------------- /pgbelt/config/config.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from collections.abc import AsyncGenerator 3 | from collections.abc import Awaitable 4 | from os.path import join 5 | from typing import Optional # noqa: F401 # Needed until tiangolo/typer#522 is fixed) 6 | 7 | from pgbelt.config.models import DbupgradeConfig 8 | from pgbelt.config.remote import resolve_remote_config 9 | from pgbelt.util import get_logger 10 | from pgbelt.util.asyncfuncs import isdir 11 | from pgbelt.util.asyncfuncs import isfile 12 | from pgbelt.util.asyncfuncs import listdir 13 | 14 | 15 | def get_config( 16 | db: str, dc: str, skip_src: bool = False, skip_dst: bool = False 17 | ) -> Optional[DbupgradeConfig]: 18 | """ 19 | Get a configuration for one database pair synchronously. 20 | """ 21 | config = asyncio.run(get_config_async(db, dc, skip_src, skip_dst)) 22 | 23 | if config is None: 24 | exit(1) 25 | 26 | return config 27 | 28 | 29 | async def get_config_async( 30 | db: str, dc: str, skip_src: bool = False, skip_dst: bool = False 31 | ) -> Optional[DbupgradeConfig]: 32 | """ 33 | Get configuration for one database pair asynchronously. Always prefers 34 | locally cached configuration but attempts to resolve any uncached configuration 35 | if it is required. Locally cached config never expires, so it may become stale. 36 | """ 37 | logger = get_logger(db, dc, "config") 38 | logger.info("Getting configuration...") 39 | config = await DbupgradeConfig.load(db, dc) 40 | 41 | if config is None or (config.src is None and config.dst is None): 42 | logger.info("Resolving remote configuration...") 43 | config = await resolve_remote_config(db, dc, skip_src, skip_dst) 44 | elif config.src is None and not skip_src: 45 | logger.info("Cached config did not include source info, resolving...") 46 | src_config = await resolve_remote_config(db, dc, skip_dst=True) 47 | if src_config is None or src_config.src is None: 48 | logger.critical("Could not resolve missing source info!") 49 | return None 50 | config.src = src_config.src 51 | elif config.dst is None and not skip_dst: 52 | logger.info("Cached config did not include target info, resolving...") 53 | dst_config = await resolve_remote_config(db, dc, skip_src=True) 54 | if dst_config is None or dst_config.dst is None: 55 | logger.critical("Could not resolve missing target info!") 56 | return None 57 | config.dst = dst_config.dst 58 | else: 59 | return config 60 | 61 | if config is None or (config.src is None and config.dst is None): 62 | logger.critical("No configuration could be retrieved") 63 | else: 64 | await config.save() 65 | 66 | return config 67 | 68 | 69 | async def find_available_configs(confdir: str, dc: str) -> set[str]: 70 | """ 71 | Search for all the database configs in a datacenter directory within a config directory. 72 | """ 73 | result = set() 74 | dc_dir = join(confdir, dc) 75 | if not await isdir(dc_dir): 76 | return set() 77 | for db in await listdir(dc_dir): 78 | if await isdir(join(dc_dir, db)) and await isfile( 79 | join(dc_dir, db, "config.json") 80 | ): 81 | result.add(db) 82 | return result 83 | 84 | 85 | async def get_all_configs_async( 86 | dc: str, skip_src: bool = False, skip_dst: bool = False 87 | ) -> AsyncGenerator[Awaitable[Optional[DbupgradeConfig]], None]: 88 | """ 89 | A generator that produces Awaitables that resolve to DbupgradeConfigs or None 90 | 91 | Will produce all possible configs in the given dc whether they are remote 92 | or already resolved and cached. 93 | """ 94 | logger = get_logger("all", dc, "config") 95 | logger.info("Getting all available configurations...") 96 | remote, local = await asyncio.gather( 97 | find_available_configs("remote-configs", dc), 98 | find_available_configs("configs", dc), 99 | ) 100 | 101 | for conf in asyncio.as_completed( 102 | [ 103 | get_config_async(db, dc, skip_src=skip_src, skip_dst=skip_dst) 104 | for db in remote | local 105 | ] 106 | ): 107 | yield conf 108 | -------------------------------------------------------------------------------- /pgbelt/config/models.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from os.path import join 4 | from typing import Optional # noqa: F401 # Needed until tiangolo/typer#522 is fixed) 5 | 6 | from aiofiles import open as aopen 7 | from aiofiles.os import remove 8 | from pgbelt.util import get_logger 9 | from pgbelt.util.asyncfuncs import makedirs 10 | from pydantic import BaseModel 11 | from pydantic import ValidationError 12 | from pydantic import field_validator 13 | from urllib.parse import quote 14 | 15 | 16 | def config_dir(db: str, dc: str) -> str: 17 | return f"configs/{dc}/{db}" 18 | 19 | 20 | def config_file(db: str, dc: str) -> str: 21 | return join(config_dir(db, dc), "config.json") 22 | 23 | 24 | def not_empty(v) -> Optional[str]: 25 | if v == "": 26 | raise ValueError 27 | return v 28 | 29 | 30 | class User(BaseModel): 31 | """ 32 | Represents a user in a postgres db. 33 | 34 | name: str The user name. 35 | pw: str The user's password. Only required for users pgbelt needs to log in as. 36 | """ 37 | 38 | name: str 39 | pw: Optional[str] = None 40 | 41 | _not_empty = field_validator("name", "pw")(not_empty) 42 | 43 | 44 | class DbConfig(BaseModel): 45 | """ 46 | Represents a postgres db instance. 47 | 48 | host: str The hostname of this instance. 49 | ip: str The ip of this instance. Instance IPs must be reachable from one another. 50 | db: str The dbname to operate on. If you want to migrate multiple dbs in a single instance set up a separate config. 51 | port: str The port to connect to. 52 | root_user: User A superuser. Usually the postgres user. 53 | owner_user: User A user who owns all the data in the your specified schema or who has equivalent permissions. # noqa: RST301 54 | This user will end up owning all the data if this is describing the target instance. 55 | pglogical_user: User A user for use with pglogical. Will be created if it does not exist. 56 | other_users: list[User] A list of other users whose passwords we might not know. 57 | """ 58 | 59 | host: str 60 | ip: str 61 | db: str 62 | port: str 63 | root_user: User 64 | owner_user: User 65 | pglogical_user: User 66 | other_users: Optional[list[User]] = None 67 | 68 | _not_empty = field_validator("host", "ip", "db", "port")(not_empty) 69 | 70 | @field_validator("root_user", "owner_user", "pglogical_user") 71 | def has_password(cls, v) -> User: # noqa: N805 72 | if not v.pw: 73 | raise ValueError 74 | return v 75 | 76 | @property 77 | def root_dsn(self) -> str: 78 | return f"hostaddr={self.ip} port={self.port} dbname={self.db} user={self.root_user.name} password={self.root_user.pw}" 79 | 80 | @property 81 | def owner_dsn(self) -> str: 82 | return f"hostaddr={self.ip} port={self.port} dbname={self.db} user={self.owner_user.name} password={self.owner_user.pw}" 83 | 84 | @property 85 | def pglogical_dsn(self) -> str: 86 | return f"hostaddr={self.ip} port={self.port} dbname={self.db} user={self.pglogical_user.name} password={self.pglogical_user.pw}" 87 | 88 | @property 89 | def root_uri(self) -> str: 90 | password = quote( 91 | self.root_user.pw 92 | ) # https://github.com/encode/databases/issues/145#issuecomment-1303792343 need this to handle special characters 93 | return f"postgresql://{self.root_user.name}:{password}@{self.ip}:{self.port}/{self.db}" 94 | 95 | @property 96 | def owner_uri(self) -> str: 97 | password = quote( 98 | self.owner_user.pw 99 | ) # https://github.com/encode/databases/issues/145#issuecomment-1303792343 need this to handle special characters 100 | return f"postgresql://{self.owner_user.name}:{password}@{self.ip}:{self.port}/{self.db}" 101 | 102 | @property 103 | def pglogical_uri(self) -> str: 104 | password = quote( 105 | self.pglogical_user.pw 106 | ) # https://github.com/encode/databases/issues/145#issuecomment-1303792343 need this to handle special characters 107 | return f"postgresql://{self.pglogical_user.name}:{password}@{self.ip}:{self.port}/{self.db}" 108 | 109 | 110 | class DbupgradeConfig(BaseModel): 111 | """ 112 | Represents a migration to be performed. 113 | 114 | db: str A name used to identify this specific database pair. Used in cli commands. 115 | dc: str A name used to identify the environment this database pair is in. Used in cli commands. 116 | src: DbConfig The database we are moving data out of. 117 | dst: DbConfig The database we are moving data into. 118 | tables: Optional[list[str]] A list of tables to replicate. If not provided all tables in the named schema will be replicated. 119 | sequences: Optional[list[str]] A list of sequences to replicate. If not provided all sequences in the named schema will be replicated. 120 | schema_name: Optional[str] The schema to operate on. Defaults to "public". 121 | """ 122 | 123 | db: str 124 | dc: str 125 | src: Optional[DbConfig] = None 126 | dst: Optional[DbConfig] = None 127 | tables: Optional[list[str]] = None 128 | sequences: Optional[list[str]] = None 129 | schema_name: Optional[str] = "public" 130 | 131 | _not_empty = field_validator("db", "dc")(not_empty) 132 | 133 | @property 134 | def file(self) -> str: 135 | return config_file(self.db, self.dc) 136 | 137 | @property 138 | def dir(self) -> str: 139 | return config_dir(self.db, self.dc) 140 | 141 | async def save(self): 142 | """ 143 | Write the configuration out to disk 144 | """ 145 | logger = get_logger(self.db, self.dc, "config") 146 | logger.debug("Caching config to disk...") 147 | 148 | try: 149 | await makedirs(self.dir) 150 | except FileExistsError: 151 | pass 152 | 153 | try: 154 | await remove(self.file) 155 | except FileNotFoundError: 156 | pass 157 | 158 | async with aopen(self.file, "w") as f: 159 | await f.write(self.model_dump_json(indent=4)) 160 | 161 | logger.info("Cached config to disk.") 162 | 163 | @classmethod 164 | async def load(cls, db: str, dc: str) -> Optional[DbupgradeConfig]: 165 | """ 166 | Load the specified configuration from disk if present. 167 | If the existing config is invalid or does not exist return None. 168 | """ 169 | logger = get_logger(db, dc, "config") 170 | logger.debug("Trying to load cached config...") 171 | 172 | try: 173 | async with aopen(config_file(db, dc), "r") as f: 174 | raw = await f.read() 175 | except FileNotFoundError: 176 | logger.info("No cached config available") 177 | return None 178 | 179 | try: 180 | out = cls.model_validate_json(raw) 181 | except ValidationError: 182 | logger.info("Cached config was not a valid DbupgradeConfig") 183 | return None 184 | 185 | logger.info("Found cached config.") 186 | 187 | return out 188 | -------------------------------------------------------------------------------- /pgbelt/config/remote.py: -------------------------------------------------------------------------------- 1 | from importlib import import_module 2 | from json import JSONDecodeError 3 | from logging import Logger 4 | from typing import Optional # noqa: F401 # Needed until tiangolo/typer#522 is fixed) 5 | 6 | from aiofiles import open as aopen 7 | from pgbelt.config.models import DbupgradeConfig 8 | from pgbelt.util.logs import get_logger 9 | from pydantic import BaseModel 10 | from pydantic import ValidationError 11 | 12 | 13 | def remote_conf_path(db: str, dc: str) -> str: 14 | return f"remote-configs/{dc}/{db}/config.json" 15 | 16 | 17 | class RemoteConfigError(Exception): 18 | """ 19 | raised by the resolve method of a resolver when it could not retrieve 20 | configuration due to an error. 21 | """ 22 | 23 | pass 24 | 25 | 26 | class RemoteConfigDefinition(BaseModel): 27 | """ 28 | Pydantic model representing the contents of a remote-config json 29 | before we have the actual resolver to put it in. So the only required 30 | key is `resolver_path`. The rest are passed into the resolver model. 31 | """ 32 | 33 | resolver_path: str 34 | 35 | class Config: 36 | extra = "allow" 37 | 38 | 39 | class BaseResolver(BaseModel): 40 | """ 41 | Remote configuration resolvers must subclass this. 42 | 43 | db: str the database name 44 | dc: str the datacenter name 45 | skip_src: bool don't retrieve the source db configuration 46 | skip_dst: bool don't retrieve the destination db configuration 47 | logger: Logger your resolver should log through this logger 48 | 49 | Your resolver subclass will be a pydantic model. Any attributes you define 50 | other than the ones already in here will come from your remote config files. 51 | """ 52 | 53 | db: str 54 | dc: str 55 | skip_src: bool 56 | skip_dst: bool 57 | logger: Logger 58 | 59 | class Config: 60 | arbitrary_types_allowed = True 61 | extra = "ignore" 62 | 63 | async def resolve(self) -> Optional[DbupgradeConfig]: 64 | """ 65 | Called to retrieve the configuration from wherever your resolver gets it. 66 | Return configuration as a DbupgradeConfig. 67 | 68 | If you should have been able to get configuration but encountered an error 69 | then throw a RemoteConfigError. If there was no config and no error return None 70 | """ 71 | raise NotImplementedError 72 | 73 | 74 | async def load_remote_conf_def( 75 | config_file: str, logger: Logger 76 | ) -> Optional[RemoteConfigDefinition]: 77 | try: 78 | logger.debug(f"Reading remote config definition from file {config_file}") 79 | async with aopen(config_file, mode="r") as f: 80 | raw_json = await f.read() 81 | 82 | return RemoteConfigDefinition.model_validate_json(raw_json) 83 | except FileNotFoundError: 84 | logger.error(f"No remote config definition exists at {config_file}") 85 | except JSONDecodeError: 86 | logger.error(f"Remote config definition in {config_file} was malformed JSON.") 87 | except ValidationError: 88 | logger.error(f"Remote config definition in {config_file} was not valid.") 89 | 90 | 91 | async def resolve_remote_config( 92 | db: str, dc: str, skip_src: bool = False, skip_dst: bool = False 93 | ) -> Optional[DbupgradeConfig]: 94 | """ 95 | Loads the referenced remote configuration json file, tries to import the 96 | specified resolver class, executes its resolve method, and returns the 97 | resulting DbupgradeConfig or None if there was an error 98 | """ 99 | 100 | # set up the logger 101 | logger = get_logger(db, dc, "remote-config") 102 | 103 | # load the remote config from the json file 104 | definition = await load_remote_conf_def(remote_conf_path(db, dc), logger) 105 | 106 | if definition is None: 107 | return None 108 | 109 | module, classname = definition.resolver_path.rsplit(".", 1) 110 | 111 | try: 112 | resolver_module = import_module(module) 113 | except ModuleNotFoundError: 114 | logger.error(f"Could not find module {module}") 115 | return None 116 | 117 | try: 118 | resolver_class = getattr(resolver_module, classname) 119 | except AttributeError: 120 | logger.error(f"Config resolver class {classname} does not exist in {module}") 121 | return None 122 | 123 | if not issubclass(resolver_class, BaseResolver): 124 | logger.error( 125 | f"Config resolver class {classname} from {module} is not a config resolver" 126 | ) 127 | return None 128 | 129 | try: 130 | resolver_dict = definition.dict() 131 | 132 | resolver_dict.update( 133 | { 134 | "db": db, 135 | "dc": dc, 136 | "skip_src": skip_src, 137 | "skip_dst": skip_dst, 138 | "logger": logger.getChild(classname), 139 | } 140 | ) 141 | 142 | resolver = resolver_class(**resolver_dict) 143 | except ValidationError: 144 | logger.error( 145 | f"Remote config definition for {db} {dc} was not valid for {resolver_class.__name__}" 146 | ) 147 | return None 148 | 149 | try: 150 | config = await resolver.resolve() 151 | except NotImplementedError: 152 | logger.error( 153 | f"Config resolver class {classname} from {module} does not implement resolve" 154 | ) 155 | return None 156 | except RemoteConfigError as e: 157 | logger.error( 158 | f"Failed to resolve remote configuration for {db} {dc}. RemoteConfigError {e}" 159 | ) 160 | return None 161 | except ValidationError: 162 | logger.error( 163 | f"Configuration for {db} {dc} resolved by {resolver_class.__name__} was not a valid DbupgradeConfig" 164 | ) 165 | return None 166 | 167 | logger.info( 168 | f"Successfully resolved remote configuration for {db} {dc} using {resolver_class.__name__}" 169 | ) 170 | 171 | return config 172 | -------------------------------------------------------------------------------- /pgbelt/main.py: -------------------------------------------------------------------------------- 1 | from pgbelt.cmd import add_commands 2 | from typer import Typer 3 | 4 | app = Typer(help="A tool to help manage postgres data migrations.") 5 | add_commands(app) 6 | 7 | 8 | if __name__ == "__main__": 9 | app() 10 | -------------------------------------------------------------------------------- /pgbelt/util/__init__.py: -------------------------------------------------------------------------------- 1 | from pgbelt.util.logs import get_logger 2 | -------------------------------------------------------------------------------- /pgbelt/util/asyncfuncs.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from functools import partial 3 | from functools import wraps 4 | from os import listdir as _listdir 5 | from os import makedirs as _makedirs 6 | from os.path import isdir as _isdir 7 | from os.path import isfile as _isfile 8 | 9 | 10 | def make_async(sync_func): 11 | @wraps(sync_func) 12 | async def do_async(*args, **kwargs): 13 | return await asyncio.get_running_loop().run_in_executor( 14 | None, partial(sync_func, *args, **kwargs) 15 | ) 16 | 17 | return do_async 18 | 19 | 20 | listdir = make_async(_listdir) 21 | makedirs = make_async(_makedirs) 22 | isdir = make_async(_isdir) 23 | isfile = make_async(_isfile) 24 | -------------------------------------------------------------------------------- /pgbelt/util/dump.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from logging import Logger 3 | from os.path import join 4 | from pgbelt.config.models import DbupgradeConfig 5 | from pgbelt.util.asyncfuncs import isfile 6 | from pgbelt.util.asyncfuncs import listdir 7 | from pgbelt.util.asyncfuncs import makedirs 8 | from pgbelt.util.postgres import table_empty 9 | from re import search 10 | 11 | from aiofiles import open as aopen 12 | from asyncpg import create_pool 13 | 14 | RAW = "schema" 15 | NO_INVALID_NO_INDEX = "no_invalid_constraints_no_indexes" 16 | ONLY_INVALID = "invalid_constraints" 17 | ONLY_INDEXES = "indexes" 18 | 19 | 20 | def schema_dir(db: str, dc: str) -> str: 21 | return f"schemas/{dc}/{db}" 22 | 23 | 24 | def schema_file(db: str, dc: str, name: str) -> str: 25 | return join(schema_dir(db, dc), f"{name}.sql") 26 | 27 | 28 | def table_dir(db: str, dc: str) -> str: 29 | return f"tables/{dc}/{db}" 30 | 31 | 32 | def table_file(db: str, dc: str, name: str) -> str: 33 | return join(table_dir(db, dc), f"{name}.sql") 34 | 35 | 36 | def _parse_dump_commands(out: str) -> list[str]: 37 | """ 38 | Given a string containing output from pg_dump, return a list of strings where 39 | each is a complete postgres command. Commands may be multi-line. 40 | """ 41 | lines = out.split("\n") 42 | commands = [] 43 | 44 | for line in lines: 45 | stripped = line.strip() 46 | # if the line is whitespace only or a comment then ignore it 47 | if not stripped or stripped.startswith("--"): 48 | continue 49 | 50 | # if the last command is terminated or we don't have any yet start a new one 51 | if not commands or commands[-1].endswith(";\n"): 52 | commands.append(line + "\n") 53 | # otherwise we append to the last command because it must be multi-line 54 | else: 55 | commands[-1] += line + "\n" 56 | 57 | return commands 58 | 59 | 60 | async def _execute_subprocess( 61 | command: list[str], finished_log: str, logger: Logger 62 | ) -> bytes: 63 | p = await asyncio.create_subprocess_exec( 64 | command[0], 65 | *command[1:], 66 | stdout=asyncio.subprocess.PIPE, 67 | stderr=asyncio.subprocess.PIPE, 68 | ) 69 | out, err = await p.communicate() 70 | 71 | if p.returncode != 0: 72 | raise Exception( 73 | f"Couldn't do {command}, got code {p.returncode}.\n out: {out.decode('utf-8')}\n err: {err.decode('utf-8')}" 74 | ) 75 | logger.debug(finished_log) 76 | return out 77 | 78 | 79 | async def dump_source_tables( 80 | config: DbupgradeConfig, tables: list[str], logger: Logger 81 | ) -> None: 82 | try: 83 | await makedirs(table_dir(config.db, config.dc)) 84 | except FileExistsError: 85 | pass 86 | 87 | logger.info(f"Dumping tables {tables}") 88 | 89 | dumps = [] 90 | for table in tables: 91 | dumps.append( 92 | _execute_subprocess( 93 | [ 94 | "pg_dump", 95 | "--data-only", 96 | f'--table={config.schema_name}."{table}"', 97 | "-Fc", 98 | "-f", 99 | table_file(config.db, config.dc, table), 100 | config.src.pglogical_dsn, 101 | ], 102 | f"dumped {table}", 103 | logger, 104 | ) 105 | ) 106 | 107 | await asyncio.gather(*dumps) 108 | 109 | 110 | async def load_dumped_tables( 111 | config: DbupgradeConfig, tables: list[str], logger: Logger 112 | ) -> None: 113 | # unless we get an explicit list of tables to load just load all the dump files 114 | if not tables: 115 | tables_dir = table_dir(config.db, config.dc) 116 | tables = [ 117 | f.split(".")[0] 118 | for f in await listdir(tables_dir) 119 | if await isfile(join(tables_dir, f)) 120 | ] 121 | 122 | logger.info(f"Loading dumped tables {tables}") 123 | 124 | # only load a dump file if the target table is completely empty 125 | async with create_pool(config.dst.root_uri, min_size=1) as pool: 126 | to_load = [] 127 | for t in tables: 128 | if await table_empty(pool, t, config.schema_name, logger): 129 | to_load.append(table_file(config.db, config.dc, t)) 130 | else: 131 | logger.warning( 132 | f"Not loading {t}, table not empty. If this is unexpected please investigate." 133 | ) 134 | 135 | loads = [] 136 | for file in to_load: 137 | loads.append( 138 | _execute_subprocess( 139 | [ 140 | "pg_restore", 141 | "-d", 142 | config.dst.owner_dsn, 143 | file, 144 | ], 145 | f"loaded {file}", 146 | logger, 147 | ) 148 | ) 149 | 150 | await asyncio.gather(*loads) 151 | 152 | 153 | async def dump_source_schema(config: DbupgradeConfig, logger: Logger) -> None: 154 | """ 155 | Dump the schema from the source db and write a file with the complete schema, 156 | one with only the CREATE INDEX statements from the schema, 157 | one with only the NOT VALID constraints from the schema, 158 | and one with everything but the NOT VALID constraints and the CREATE INDEX statements. 159 | """ 160 | logger.info("Dumping schema...") 161 | 162 | command = [ 163 | "pg_dump", 164 | "--schema-only", 165 | "--no-owner", 166 | "-n", 167 | config.schema_name, 168 | config.src.pglogical_dsn, 169 | ] 170 | 171 | # TODO: We should exclude the creation of a schema in the schema dump and load, and made that the responsibility of the user. 172 | # Confirm if the CREATE SCHEMA statement is included in the schema dump, and if yes, exclude it. 173 | # This will reveal itself in the integration test. 174 | 175 | out = await _execute_subprocess(command, "Retrieved source schema", logger) 176 | 177 | commands_raw = _parse_dump_commands(out.decode("utf-8")) 178 | 179 | commands = [] 180 | for c in commands_raw: 181 | if "EXTENSION " not in c and "GRANT " not in c and "REVOKE " not in c: 182 | commands.append(c) 183 | 184 | try: 185 | await makedirs(schema_dir(config.db, config.dc)) 186 | except FileExistsError: 187 | pass 188 | 189 | async with aopen(schema_file(config.db, config.dc, RAW), "w") as out: 190 | for command in commands: 191 | await out.write(command) 192 | 193 | async with aopen(schema_file(config.db, config.dc, ONLY_INVALID), "w") as out: 194 | for command in commands: 195 | if "NOT VALID" in command: 196 | await out.write(command) 197 | 198 | async with aopen(schema_file(config.db, config.dc, ONLY_INDEXES), "w") as out: 199 | for command in commands: 200 | if "CREATE" in command and "INDEX" in command: 201 | await out.write(command) 202 | 203 | async with aopen( 204 | schema_file(config.db, config.dc, NO_INVALID_NO_INDEX), "w" 205 | ) as out: 206 | for command in commands: 207 | if not ("NOT VALID" in command) and not ( 208 | "CREATE" in command and "INDEX" in command 209 | ): 210 | await out.write(command) 211 | 212 | logger.debug("Finished dumping schema.") 213 | 214 | 215 | async def apply_target_schema(config: DbupgradeConfig, logger: Logger) -> None: 216 | """ 217 | Load the schema dumped from the source into the target excluding NOT VALID constraints and CREATE INDEX statements. 218 | """ 219 | logger.info("Loading schema without constraints...") 220 | 221 | command = [ 222 | "psql", 223 | config.dst.owner_dsn, 224 | "-f", 225 | schema_file(config.db, config.dc, NO_INVALID_NO_INDEX), 226 | ] 227 | 228 | await _execute_subprocess(command, "Finished loading schema.", logger) 229 | 230 | 231 | async def dump_dst_not_valid_constraints( 232 | config: DbupgradeConfig, logger: Logger 233 | ) -> None: 234 | """ 235 | Dump NOT VALID Constraints from the target database. 236 | Used when schema is loaded in outside of pgbelt. 237 | """ 238 | 239 | logger.info("Dumping target NOT VALID constraints...") 240 | 241 | command = [ 242 | "pg_dump", 243 | "--schema-only", 244 | "--no-owner", 245 | "-n", 246 | config.schema_name, 247 | config.dst.pglogical_dsn, 248 | ] 249 | 250 | out = await _execute_subprocess(command, "Retrieved target schema", logger) 251 | 252 | # No username replacement needs to be done, so replace dst user with the same. 253 | commands_raw = _parse_dump_commands( 254 | out.decode("utf-8"), config.dst.owner_user.name, config.dst.owner_user.name 255 | ) 256 | 257 | commands = [] 258 | for c in commands_raw: 259 | if "NOT VALID" in command: 260 | if config.tables: 261 | regex_matches = search( 262 | r"ALTER TABLE [ONLY ]*(?P[a-zA-Z0-9._]+)+\s+ADD CONSTRAINT (?P[a-zA-Z0-9._]+)+.*", 263 | c, 264 | ) 265 | if not regex_matches: 266 | continue 267 | table = regex_matches.groupdict()["table"] 268 | if config.tables and table in config.tables: 269 | commands.append(c) 270 | else: 271 | commands.append(c) 272 | 273 | try: 274 | await makedirs(schema_dir(config.db, config.dc)) 275 | except FileExistsError: 276 | pass 277 | 278 | async with aopen(schema_file(config.db, config.dc, ONLY_INVALID), "w") as out: 279 | for command in commands: 280 | await out.write(command) 281 | 282 | logger.debug("Finished dumping NOT VALID constraints from the target.") 283 | 284 | 285 | async def remove_dst_not_valid_constraints( 286 | config: DbupgradeConfig, logger: Logger 287 | ) -> None: 288 | """ 289 | Remove the NOT VALID constraints from the schema of the target database. 290 | Only use if target schema was loaded in without pgbelt. 291 | """ 292 | logger.info("Looking for previously dumped NOT VALID constraints...") 293 | 294 | async with aopen(schema_file(config.db, config.dc, ONLY_INVALID), "r") as f: 295 | not_valid_constraints = await f.read() 296 | 297 | logger.info("Removing NOT VALID constraints from the target...") 298 | 299 | queries = "" 300 | for c in not_valid_constraints.split(";"): 301 | regex_matches = search( 302 | r"ALTER TABLE [ONLY ]*(?P
[a-zA-Z0-9._]+)+\s+ADD CONSTRAINT (?P[a-zA-Z0-9._]+)+.*", 303 | c, 304 | ) 305 | if not regex_matches: 306 | continue 307 | table = regex_matches.groupdict()["table"] 308 | constraint = table = regex_matches.groupdict()["constraint"] 309 | 310 | if (config.tables and table in config.tables) or not config.tables: 311 | queries = queries + f"ALTER TABLE {table} DROP CONSTRAINT {constraint};" 312 | 313 | if queries != "": 314 | command = ["psql", config.dst.owner_dsn, "-c", f"'{queries}'"] 315 | 316 | await _execute_subprocess( 317 | command, "Finished removing NOT VALID constraints from the target.", logger 318 | ) 319 | else: 320 | logger.info("No NOT VALID detected for removal.") 321 | 322 | 323 | async def apply_target_constraints(config: DbupgradeConfig, logger: Logger) -> None: 324 | """ 325 | Load the NOT VALID constraints that were excluded from the schema. Should be called after replication during 326 | downtime before allowing writes into the target. 327 | """ 328 | logger.info("Loading NOT VALID constraints...") 329 | 330 | command = [ 331 | "psql", 332 | config.dst.owner_dsn, 333 | "-f", 334 | schema_file(config.db, config.dc, ONLY_INVALID), 335 | ] 336 | 337 | await _execute_subprocess( 338 | command, "Finished loading NOT VALID constraints.", logger 339 | ) 340 | 341 | 342 | async def remove_dst_indexes(config: DbupgradeConfig, logger: Logger) -> None: 343 | """ 344 | Remove the INDEXes from the schema of the target database. 345 | Only use if target schema was loaded in without pgbelt. 346 | """ 347 | logger.info("Looking for previously dumped CREATE INDEX statements...") 348 | 349 | async with aopen(schema_file(config.db, config.dc, ONLY_INDEXES), "r") as f: 350 | create_index_statements = await f.read() 351 | 352 | logger.info("Removing Indexes from the target...") 353 | 354 | for c in create_index_statements.split(";"): 355 | regex_matches = search( 356 | r"CREATE [UNIQUE ]*INDEX (?P[a-zA-Z0-9._]+)+.*", 357 | c, 358 | ) 359 | if not regex_matches: 360 | continue 361 | index = regex_matches.groupdict()["index"] 362 | if config.schema_name: 363 | index = f"{config.schema_name}.{index}" 364 | 365 | # DROP the index 366 | # Note that the host DSN must have a statement timeout of 0. 367 | # Example DSN: `host=server-hostname user=user dbname=db_name options='-c statement_timeout=3600000'` 368 | host_dsn = config.dst.owner_dsn + " options='-c statement_timeout=0'" 369 | 370 | # DROP INDEX IF EXISTS so no need to catch exceptions 371 | command = ["psql", host_dsn, "-c", f"DROP INDEX IF EXISTS {index};"] 372 | logger.info(f"Dropping index {index} on the target...") 373 | await _execute_subprocess( 374 | command, f"Finished dropping index {index} on the target.", logger 375 | ) 376 | 377 | 378 | async def create_target_indexes( 379 | config: DbupgradeConfig, logger: Logger, during_sync=False 380 | ) -> None: 381 | """ 382 | Create indexes on the target that were excluded from the schema during setup. 383 | Should be called once bulk syncing is complete, and before cutover. 384 | 385 | Runs in serial for now with this async code. 386 | TODO: make this run in parallel (beware risk of building too many indexes at once, resource heavy) 387 | """ 388 | 389 | if during_sync: 390 | logger.warning( 391 | "Attempting to create indexes on the target. If indexes were not created before the cutover window, this can take a long time." 392 | ) 393 | 394 | logger.info("Looking for previously dumped CREATE INDEX statements...") 395 | 396 | async with aopen(schema_file(config.db, config.dc, ONLY_INDEXES), "r") as f: 397 | create_index_statements = await f.read() 398 | 399 | logger.info("Creating indexes on the target...") 400 | 401 | for c in create_index_statements.split(";"): 402 | # Get the Index Name 403 | regex_matches = search( 404 | r"CREATE [UNIQUE ]*INDEX (?P[a-zA-Z0-9._\"]+)+.*", 405 | c, 406 | ) 407 | if not regex_matches: 408 | continue 409 | index = regex_matches.groupdict()["index"] 410 | 411 | # Sometimes the index name is quoted, so remove the quotes 412 | index = index.replace('"', "") 413 | 414 | # Create the index 415 | # Note that the host DSN must have a statement timeout of 0. 416 | # Example DSN: `host=server-hostname user=user dbname=db_name options='-c statement_timeout=3600000'` 417 | host_dsn = config.dst.owner_dsn + " options='-c statement_timeout=0'" 418 | command = ["psql", host_dsn, "-c", f"{c};"] 419 | logger.info(f"Creating index {index} on the target...") 420 | try: 421 | await _execute_subprocess( 422 | command, f"Finished creating index {index} on the target.", logger 423 | ) 424 | except Exception as e: 425 | if f'relation "{index}" already exists' in str(e): 426 | logger.info(f"Index {index} already exist on the target.") 427 | else: 428 | raise Exception(e) 429 | -------------------------------------------------------------------------------- /pgbelt/util/logs.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from datetime import datetime 3 | from os import getenv 4 | from os import makedirs 5 | 6 | 7 | FORMATTER = "{asctime} {name}:{levelname} {message}" 8 | 9 | # if this module is ever imported we set up the root logger to log to stderr 10 | root_level = int(getenv("LOG_LEVEL", logging.DEBUG)) 11 | root_handler = logging.StreamHandler() 12 | formatter = logging.Formatter(fmt=FORMATTER, datefmt="%Y-%m-%d %H:%M:%S", style="{") 13 | root_handler.setFormatter(formatter) 14 | root_handler.setLevel(root_level) 15 | root_logger = logging.getLogger("dbup") 16 | root_logger.setLevel(root_level) 17 | root_logger.addHandler(root_handler) 18 | 19 | 20 | def log_file_dir(db: str, dc: str) -> str: 21 | return f"logs/{db}/{dc}" 22 | 23 | 24 | def log_file_path(db: str, dc: str, kind: str) -> str: 25 | timestamp = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") 26 | if kind: 27 | return f"logs/{db}/{dc}/{timestamp}-{kind}.txt" 28 | else: 29 | return f"logs/{db}/{dc}/{timestamp}.txt" 30 | 31 | 32 | def get_logger(db: str, dc: str, kind: str = "") -> logging.Logger: 33 | # When we set up a logger for that db that emits to a file 34 | logger = ( 35 | logging.getLogger(f"dbup.{db}.{dc}.{kind}") 36 | if kind 37 | else logging.getLogger(f"dbup.{db}.{dc}") 38 | ) 39 | if not logger.handlers: 40 | skip_file_handler = False 41 | 42 | try: 43 | makedirs(log_file_dir(db, dc)) 44 | except FileExistsError: 45 | pass 46 | # We will allow OSError (not being able to write to disk) 47 | # Just don't add the File Handler 48 | except OSError: 49 | skip_file_handler = True 50 | pass 51 | 52 | if not skip_file_handler: 53 | handler = logging.FileHandler(log_file_path(db, dc, kind), mode="w") 54 | handler.setFormatter( 55 | logging.Formatter(FORMATTER, datefmt="%Y-%m-%d %H:%M:%S", style="{") 56 | ) 57 | # always log everything to the file 58 | logger.setLevel(logging.DEBUG) 59 | logger.addHandler(handler) 60 | 61 | # if you pass kind then you can get a logger for a specific thing, 62 | # and your logs will end up annotated with the kind 63 | return logging.getLogger(f"dbup.{db}.{dc}.{kind}") if kind else logger 64 | -------------------------------------------------------------------------------- /pgbelt/util/pglogical.py: -------------------------------------------------------------------------------- 1 | from logging import Logger 2 | 3 | from asyncpg import Pool 4 | from asyncpg.exceptions import DuplicateObjectError 5 | from asyncpg.exceptions import InternalServerError 6 | from asyncpg.exceptions import InvalidParameterValueError 7 | from asyncpg.exceptions import InvalidSchemaNameError 8 | from asyncpg.exceptions import ObjectNotInPrerequisiteStateError 9 | from asyncpg.exceptions import UndefinedFunctionError 10 | from asyncpg.exceptions import UndefinedObjectError 11 | from asyncpg.exceptions import UniqueViolationError 12 | 13 | 14 | async def configure_pgl( 15 | pool: Pool, pgl_pw: str, logger: Logger, owner_user: str 16 | ) -> None: 17 | """ 18 | Set up the pglogical role, grant it superuser and replication, create 19 | the extension and grant USAGE to its schema to the owner user. 20 | """ 21 | logger.info("Creating pglogical user and extension...") 22 | async with pool.acquire() as conn: 23 | async with conn.transaction(): 24 | try: 25 | await conn.execute( 26 | f"CREATE ROLE pglogical LOGIN ENCRYPTED PASSWORD '{pgl_pw}';" 27 | ) 28 | logger.debug("pglogical user created") 29 | except DuplicateObjectError: 30 | logger.debug("pglogical user already created") 31 | 32 | # Check if the database is RDS 33 | async with pool.acquire() as conn: 34 | async with conn.transaction(): 35 | pg_roles = await conn.fetch("SELECT rolname FROM pg_roles;") 36 | 37 | is_rds = "rdsadmin" in [i[0] for i in pg_roles] 38 | 39 | # If this is an RDS Database, grant rds_superuser and rds_replication 40 | if is_rds: 41 | async with pool.acquire() as conn: 42 | async with conn.transaction(): 43 | await conn.execute("GRANT rds_superuser TO pglogical;") 44 | await conn.execute("GRANT rds_replication TO pglogical;") 45 | # If this is not an RDS database, just ensure the user is a superuser 46 | else: 47 | async with pool.acquire() as conn: 48 | async with conn.transaction(): 49 | await conn.execute("ALTER USER pglogical WITH SUPERUSER;") 50 | 51 | async with pool.acquire() as conn: 52 | async with conn.transaction(): 53 | try: 54 | await conn.execute("CREATE EXTENSION pglogical;") 55 | logger.debug("pglogical extension created") 56 | except DuplicateObjectError: 57 | logger.debug("pglogical extension already created") 58 | 59 | # TODO: Somehow test for this working in our integration test. 60 | # We need to make the DBs have a separate schema owner role to test this. 61 | async with pool.acquire() as conn: 62 | async with conn.transaction(): 63 | await conn.execute(f"GRANT USAGE ON SCHEMA pglogical TO {owner_user};") 64 | logger.debug( 65 | f"GRANTed USAGE ON pglogical schema to Schema Owner {owner_user}" 66 | ) 67 | 68 | 69 | async def grant_pgl(pool: Pool, tables: list[str], schema: str, logger: Logger) -> None: 70 | """ 71 | Grant pglogical access to the data 72 | 73 | TODO: This should instead find all tables and sequences owned by the currently connected user 74 | and grant to each individually. Then we can call this for every known dsn and we won't miss 75 | any grants to pglogical because of weird ownership stuff. 76 | """ 77 | logger.info("Granting data permissions to pglogical...") 78 | async with pool.acquire() as conn: 79 | async with conn.transaction(): 80 | if tables: 81 | tables_with_schema = [f'{schema}."{table}"' for table in tables] 82 | await conn.execute( 83 | f"GRANT ALL ON TABLE {','.join(tables_with_schema)} TO pglogical;" 84 | ) 85 | else: 86 | await conn.execute( 87 | f"GRANT ALL ON ALL TABLES IN SCHEMA {schema} TO pglogical;" 88 | ) 89 | await conn.execute( 90 | f"GRANT ALL ON ALL SEQUENCES IN SCHEMA {schema} TO pglogical;" 91 | ) 92 | logger.debug("pglogical data grants complete") 93 | 94 | 95 | async def configure_replication_set( 96 | pool: Pool, tables: list[str], schema: str, logger: Logger 97 | ) -> None: 98 | """ 99 | Add each table in the given list to the default replication set 100 | """ 101 | logger.info("Creating new replication set 'pgbelt'") 102 | async with pool.acquire() as conn: 103 | try: 104 | await conn.execute("SELECT pglogical.create_replication_set('pgbelt');") 105 | logger.debug("Created the 'pgbelt' replication set") 106 | except Exception as e: 107 | logger.debug(f"Could not create replication set 'pgbelt': {e}") 108 | 109 | logger.info( 110 | f"Configuring 'pgbelt' replication set with tables from schema {schema}: {tables}" 111 | ) 112 | for table in tables: 113 | async with pool.acquire() as conn: 114 | async with conn.transaction(): 115 | try: 116 | await conn.execute( 117 | f"SELECT pglogical.replication_set_add_table('pgbelt', '\"{schema}\".\"{table}\"');" 118 | ) 119 | logger.debug( 120 | f"Table '{table}' added to 'pgbelt' replication set from schema {schema}" 121 | ) 122 | except UniqueViolationError: 123 | logger.debug( 124 | f"Table '{table}' already in 'pgbelt' replication set from schema {schema}" 125 | ) 126 | 127 | 128 | async def configure_node(pool: Pool, name: str, dsn: str, logger: Logger) -> None: 129 | """ 130 | Set up a pglogical node 131 | """ 132 | logger.info(f"Configuring node {name}...") 133 | async with pool.acquire() as conn: 134 | async with conn.transaction(): 135 | try: 136 | await conn.execute( 137 | f"""SELECT pglogical.create_node( 138 | node_name:='{name}', 139 | dsn:='{dsn}' 140 | );""" 141 | ) 142 | logger.debug(f"Node {name} created") 143 | except InternalServerError as e: 144 | if f"node {name} already exists" in str(e): 145 | logger.debug(f"Node {name} already exists") 146 | else: 147 | raise e 148 | 149 | 150 | async def configure_subscription( 151 | pool: Pool, name: str, provider_dsn: str, logger: Logger 152 | ) -> None: 153 | """ 154 | Set up a subscription 155 | """ 156 | logger.info(f"Configuring subscription {name}...") 157 | async with pool.acquire() as conn: 158 | async with conn.transaction(): 159 | try: 160 | await conn.execute( 161 | f"""SELECT pglogical.create_subscription( 162 | subscription_name:='{name}', 163 | replication_sets:='{{pgbelt}}', 164 | provider_dsn:='{provider_dsn}', 165 | synchronize_structure:=false, 166 | synchronize_data:={'true' if name.startswith('pg1') else 'false'}, 167 | forward_origins:='{{}}' 168 | );""" 169 | ) 170 | logger.debug(f"Subscription {name} created") 171 | except InvalidParameterValueError as e: 172 | if f'existing subscription "{name}"' in str(e): 173 | logger.debug(f"Subscription {name} already exists") 174 | else: 175 | raise e 176 | 177 | 178 | async def teardown_subscription(pool: Pool, name: str, logger: Logger) -> None: 179 | """ 180 | Tear down a subscription 181 | """ 182 | logger.info(f"Dropping subscription {name}...") 183 | async with pool.acquire() as conn: 184 | async with conn.transaction(): 185 | try: 186 | await conn.execute( 187 | f"SELECT pglogical.drop_subscription('{name}', true);" 188 | ) 189 | logger.debug(f"Subscription {name} dropped") 190 | except (InvalidSchemaNameError, UndefinedFunctionError): 191 | logger.debug(f"Subscription {name} does not exist") 192 | 193 | 194 | async def teardown_node(pool: Pool, name: str, logger: Logger) -> None: 195 | """ 196 | Tear down a node 197 | """ 198 | logger.info(f"Dropping node {name}...") 199 | async with pool.acquire() as conn: 200 | async with conn.transaction(): 201 | try: 202 | await conn.execute(f"SELECT pglogical.drop_node('{name}', true);") 203 | logger.debug(f"Node {name} dropped") 204 | except (InvalidSchemaNameError, UndefinedFunctionError): 205 | logger.debug(f"Node {name} does not exist") 206 | 207 | 208 | async def teardown_replication_set(pool: Pool, logger: Logger) -> None: 209 | """ 210 | Tear down the replication_set 211 | """ 212 | logger.info("Dropping replication set 'pgbelt'...") 213 | async with pool.acquire() as conn: 214 | async with conn.transaction(): 215 | try: 216 | await conn.execute("SELECT pglogical.drop_replication_set('pgbelt');") 217 | logger.debug("Replication set 'pgbelt' dropped") 218 | except ( 219 | InvalidSchemaNameError, 220 | UndefinedFunctionError, 221 | InternalServerError, 222 | ): 223 | logger.debug("Replication set 'pgbelt' does not exist") 224 | except ObjectNotInPrerequisiteStateError: 225 | logger.debug( 226 | "pglogical node was already dropped, so we can't drop the replication set. This is okay, keep going." 227 | ) 228 | 229 | 230 | async def revoke_pgl( 231 | pool: Pool, tables: list[str], schema: str, logger: Logger 232 | ) -> None: 233 | """ 234 | Revoke data access permissions from pglogical, and drop the pglogical role 235 | """ 236 | logger.info("Revoking data access permissions from pglogical...") 237 | async with pool.acquire() as conn: 238 | async with conn.transaction(): 239 | try: 240 | await conn.execute( 241 | f"REVOKE ALL ON ALL TABLES IN SCHEMA {schema} FROM pglogical;" 242 | ) 243 | await conn.execute( 244 | f"REVOKE ALL ON ALL SEQUENCES IN SCHEMA {schema} FROM pglogical;" 245 | ) 246 | logger.debug("Data access permissions revoked") 247 | except UndefinedObjectError as e: 248 | if 'role "pglogical" does not exist' in str(e): 249 | logger.debug("pglogical does not exist") 250 | else: 251 | raise e 252 | 253 | logger.info("Dropping pglogical role...") 254 | async with pool.acquire() as conn: 255 | async with conn.transaction(): 256 | await conn.execute("DROP ROLE IF EXISTS pglogical;") 257 | logger.debug("Pglogical role dropped") 258 | 259 | 260 | async def teardown_pgl(pool: Pool, logger: Logger) -> None: 261 | """ 262 | If they exist, drop the pglogical extension 263 | """ 264 | logger.info("Dropping pglogical extension...") 265 | async with pool.acquire() as conn: 266 | async with conn.transaction(): 267 | await conn.execute("DROP EXTENSION IF EXISTS pglogical;") 268 | logger.debug("Pglogical extension dropped") 269 | 270 | 271 | async def subscription_status(pool: Pool, logger: Logger) -> str: 272 | """ 273 | Get the status of a subscription. Assumes one subscription in a db. 274 | Status can be initializing, replicating, down, or unconfigured. 275 | """ 276 | logger.debug("checking subscription status") 277 | try: 278 | subscription_data = await pool.fetchval( 279 | "SELECT pglogical.show_subscription_status();" 280 | ) 281 | if not subscription_data: 282 | return "unconfigured" 283 | return subscription_data[1] 284 | except ( 285 | InvalidSchemaNameError, 286 | UndefinedFunctionError, 287 | ObjectNotInPrerequisiteStateError, 288 | ): 289 | return "unconfigured" 290 | 291 | 292 | async def src_status(pool: Pool, logger: Logger) -> dict[str, str]: 293 | """ 294 | Get the status of the back replication subscription and the forward replication lag 295 | """ 296 | logger.info("checking source status...") 297 | status = {"pg2_pg1": await subscription_status(pool, logger)} 298 | 299 | server_version = await pool.fetchval("SHOW server_version;") 300 | 301 | logger.debug("checking source to target lag") 302 | if "9.6" in server_version: 303 | lag_data = await pool.fetchrow( 304 | """ 305 | SELECT current_timestamp, application_name, 306 | pg_xlog_location_diff(pg_current_xlog_location(), pg_stat_replication.sent_location) AS sent_location_lag, 307 | pg_xlog_location_diff(pg_current_xlog_location(), pg_stat_replication.write_location) AS write_location_lag, 308 | pg_xlog_location_diff(pg_current_xlog_location(), pg_stat_replication.flush_location) AS flush_location_lag, 309 | pg_xlog_location_diff(pg_current_xlog_location(), pg_stat_replication.replay_location) AS replay_location_lag 310 | FROM pg_stat_replication WHERE application_name = 'pg1_pg2';""" 311 | ) 312 | else: 313 | lag_data = await pool.fetchrow( 314 | """ 315 | SELECT current_timestamp, application_name, 316 | pg_wal_lsn_diff(pg_current_wal_lsn(), pg_stat_replication.sent_lsn) AS sent_location_lag, 317 | pg_wal_lsn_diff(pg_current_wal_lsn(), pg_stat_replication.write_lsn) AS write_location_lag, 318 | pg_wal_lsn_diff(pg_current_wal_lsn(), pg_stat_replication.flush_lsn) AS flush_location_lag, 319 | pg_wal_lsn_diff(pg_current_wal_lsn(), pg_stat_replication.replay_lsn) AS replay_location_lag 320 | FROM pg_stat_replication WHERE application_name = 'pg1_pg2';""" 321 | ) 322 | 323 | status["sent_lag"] = str(lag_data[2]) if lag_data else "unknown" 324 | status["write_lag"] = str(lag_data[3]) if lag_data else "unknown" 325 | status["flush_lag"] = str(lag_data[4]) if lag_data else "unknown" 326 | status["replay_lag"] = str(lag_data[5]) if lag_data else "unknown" 327 | 328 | return status 329 | 330 | 331 | async def dst_status(pool: Pool, logger: Logger) -> dict[str, str]: 332 | """ 333 | Get the status of the forward replication subscription 334 | """ 335 | logger.info("checking target status...") 336 | return {"pg1_pg2": await subscription_status(pool, logger)} 337 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "pgbelt" 3 | version = "0.8.3" 4 | description = "A CLI tool used to manage Postgres data migrations from beginning to end, for a single database or a fleet, leveraging pglogical replication." 5 | authors = ["Varjitt Jeeva "] 6 | readme = "README.md" 7 | 8 | packages = [ 9 | { include = "pgbelt", from = "./" }, 10 | ] 11 | 12 | [tool.poetry.dependencies] 13 | python = ">=3.9,<4.0" 14 | aiofiles = ">=0.8,<24.2" 15 | asyncpg = ">=0.27,<0.31" 16 | pydantic = ">=2.0,<3.0" 17 | tabulate = "^0.9.0" 18 | typer = ">=0.9,<0.17" 19 | 20 | [tool.poetry.dev-dependencies] 21 | black = "~25.1.0" 22 | pre-commit = "~4.2.0" 23 | flake8 = "^7.2.0" 24 | pytest-cov = "~6.1.1" 25 | pytest = "^8.4.0" 26 | coverage = {extras = ["toml"], version = "^7.8"} 27 | safety = "^3.3.0" 28 | mypy = "^1.16" 29 | xdoctest = {extras = ["colors"], version = "^1.2.0"} 30 | flake8-bandit = "~4.1.1" 31 | flake8-bugbear = ">=21.9.2" 32 | flake8-docstrings = "^1.6.0" 33 | flake8-rst-docstrings = "^0.3.1" 34 | pep8-naming = "^0.15.1" 35 | darglint = "^1.8.1" 36 | reorder-python-imports = "^3.15.0" 37 | pre-commit-hooks = "^5.0.0" 38 | Pygments = "^2.19.1" 39 | pyupgrade = "^3.20.0" 40 | pylint = "^3.3.7" 41 | pytest-asyncio = "~0.26.0" 42 | 43 | [build-system] 44 | requires = ["poetry-core>=1.0.0", "setuptools"] 45 | build-backend = "poetry.core.masonry.api" 46 | 47 | [tool.poetry.scripts] 48 | belt = "pgbelt.main:app" 49 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Autodesk/pgbelt/d4d6839a005f34344a024eac838f14d6fe59f991/tests/__init__.py -------------------------------------------------------------------------------- /tests/integration/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Autodesk/pgbelt/d4d6839a005f34344a024eac838f14d6fe59f991/tests/integration/__init__.py -------------------------------------------------------------------------------- /tests/integration/conftest.py: -------------------------------------------------------------------------------- 1 | from os import environ 2 | from shutil import rmtree 3 | 4 | import pytest_asyncio 5 | import asyncio 6 | from asyncpg import create_pool 7 | from pgbelt.config.models import DbConfig 8 | from pgbelt.config.models import DbupgradeConfig 9 | from pgbelt.config.models import User 10 | 11 | 12 | async def _create_dbupgradeconfigs() -> dict[str, DbupgradeConfig]: 13 | """ 14 | Function for creating DbupgradeConfig objects for testing. 15 | We also save it to disk since the pgbelt commands will look for it there. 16 | 17 | This will create 4 sets of DBs: public vs non-public schema, and exodus-style vs full migration. 18 | """ 19 | 20 | # Set the common kwargs at the DBUpgradeConfig level 21 | db_upgrade_config_kwargs = { 22 | "dc": "testdc", 23 | } 24 | 25 | # Set the common config kwargs for the individual DBs 26 | # We set many of the args here in the actual DB containers, so we don't need to pull these vars out to docker-compose. 27 | common_db_config_kwargs = { 28 | "host": "localhost", 29 | "port": "5432", 30 | # This is the default credential for the admin user in the Postgres containers used for testing. 31 | "root_user": User( 32 | name="postgres", 33 | pw="postgres", 34 | ), 35 | # We will create the owner_user in the DBs via the integration test setup. 36 | # Due to issue #440, we're adding a special character to the password to ensure this still works. 37 | "owner_user": User(name="owner", pw="owner#password"), 38 | "pglogical_user": User(name="pglogical", pw="pglogicalpassword"), 39 | "db": "testdb", 40 | } 41 | 42 | # We're treating DB pairs as sets here. 43 | sets = [ 44 | "public-full", 45 | "nonpublic-full", 46 | "public-exodus", 47 | "nonpublic-exodus", 48 | ] 49 | 50 | configs = {} 51 | for s in sets: 52 | db_upgrade_config_kwargs["db"] = f"testdb-{s}" 53 | db_upgrade_config_kwargs["schema_name"] = ( 54 | "non_public_schema" if "nonpublic" in s else "public" 55 | ) 56 | db_upgrade_config_kwargs["tables"] = ( 57 | ["UsersCapital", "existingSomethingIds"] if "exodus" in s else None 58 | ) 59 | db_upgrade_config_kwargs["sequences"] = ( 60 | ["userS_id_seq"] if "exodus" in s else None 61 | ) 62 | config = DbupgradeConfig(**db_upgrade_config_kwargs) 63 | 64 | # The IP addresses are set in the docker-compose file, so we can pull them out of the environment. They follow the following pattern: 65 | # (NON)PUBLIC___IP 66 | config.src = DbConfig( 67 | ip=environ[f"{s.split('-')[0].upper()}_{s.split('-')[1].upper()}_SRC_IP"], 68 | **common_db_config_kwargs, 69 | ) 70 | config.dst = DbConfig( 71 | ip=environ[f"{s.split('-')[0].upper()}_{s.split('-')[1].upper()}_DST_IP"], 72 | **common_db_config_kwargs, 73 | ) 74 | 75 | # Save the config to disk 76 | await config.save() 77 | configs[s] = config 78 | 79 | return configs 80 | 81 | 82 | async def _prepare_databases(configs: dict[str, DbupgradeConfig]) -> None: 83 | """ 84 | Given a dict of various configs for database pairs, prepare the following: 85 | 1. Create the owner user on both databases 86 | 2. Create the Postgres DB on both databases 87 | 3. If the schema is non-public, create the schema on both databases 88 | 4. Load the test data into the source database 89 | """ 90 | 91 | # Load test data and schema SQL 92 | with open("tests/integration/files/test_schema_data.sql") as f: 93 | base_test_schema_data = f.read() 94 | 95 | for config in configs.values(): 96 | 97 | # Get the root connections to the root DBs 98 | src_root_uri_with_root_db, dst_root_uri_with_root_db = _root_uris(config) 99 | src_root_user_root_db_pool, dst_root_user_root_db_pool = await asyncio.gather( 100 | create_pool(src_root_uri_with_root_db, min_size=1), 101 | create_pool(dst_root_uri_with_root_db, min_size=1), 102 | ) 103 | 104 | # Create the owner user 105 | await asyncio.gather( 106 | src_root_user_root_db_pool.execute( 107 | f"CREATE ROLE {config.src.owner_user.name} LOGIN PASSWORD '{config.src.owner_user.pw}'", 108 | ), 109 | dst_root_user_root_db_pool.execute( 110 | f"CREATE ROLE {config.dst.owner_user.name} LOGIN PASSWORD '{config.dst.owner_user.pw}'", 111 | ), 112 | ) 113 | 114 | # Create the databases 115 | await asyncio.gather( 116 | src_root_user_root_db_pool.execute( 117 | f"CREATE DATABASE {config.src.db} WITH OWNER = {config.src.owner_user.name}" 118 | ), 119 | dst_root_user_root_db_pool.execute( 120 | f"CREATE DATABASE {config.dst.db} WITH OWNER = {config.dst.owner_user.name}" 121 | ), 122 | ) 123 | 124 | src_owner_user_logical_db_pool, dst_owner_user_logical_db_pool = ( 125 | await asyncio.gather( 126 | create_pool(config.src.owner_uri, min_size=1), 127 | create_pool(config.dst.owner_uri, min_size=1), 128 | ) 129 | ) 130 | 131 | # Create the non-public schema if the schema_name is not "public" 132 | if config.schema_name != "public": 133 | await asyncio.gather( 134 | src_owner_user_logical_db_pool.execute( 135 | f"CREATE SCHEMA {config.schema_name}" 136 | ), 137 | dst_owner_user_logical_db_pool.execute( 138 | f"CREATE SCHEMA {config.schema_name}" 139 | ), 140 | ) 141 | await asyncio.gather( 142 | src_owner_user_logical_db_pool.execute( 143 | f"GRANT CREATE ON SCHEMA {config.schema_name} TO {config.src.owner_user.name}" 144 | ), 145 | dst_owner_user_logical_db_pool.execute( 146 | f"GRANT CREATE ON SCHEMA {config.schema_name} TO {config.dst.owner_user.name}" 147 | ), 148 | ) 149 | 150 | # With the db made, load data into src 151 | test_schema_data = base_test_schema_data 152 | 153 | # If we're testing with a non-public schema, we need to replace the schema name in our schema template. 154 | if config.schema_name != "public": 155 | test_schema_data = test_schema_data.replace( 156 | "public.", f"{config.schema_name}." 157 | ) 158 | 159 | await asyncio.gather( 160 | src_owner_user_logical_db_pool.execute(test_schema_data), 161 | ) 162 | 163 | 164 | def _root_uris(config: DbupgradeConfig) -> tuple[str, str]: 165 | """ 166 | Given a DbupgradeConfig object, return the root URIs for the source and destination databases. 167 | """ 168 | 169 | # Make src root URI with root dbname not the one to be made 170 | src_root_uri_with_root_db = config.src.root_uri.replace( 171 | f"{config.src.port}/{config.src.db}", 172 | f"{config.src.port}/postgres", 173 | ) 174 | 175 | # Make dst root URI with root dbname not the one to be made 176 | dst_root_uri_with_root_db = config.dst.root_uri.replace( 177 | f"{config.dst.port}/{config.dst.db}", 178 | f"{config.dst.port}/postgres", 179 | ) 180 | 181 | return src_root_uri_with_root_db, dst_root_uri_with_root_db 182 | 183 | 184 | async def _empty_out_databases(configs: dict[str, DbupgradeConfig]) -> None: 185 | """ 186 | This code will DROP the databases specified in the config, 187 | DROP the owner role specified in the config and any permissions with it. 188 | """ 189 | 190 | for config in configs.values(): 191 | 192 | # Get the root URIs 193 | src_root_uri_with_root_db, dst_root_uri_with_root_db = _root_uris(config) 194 | 195 | async with create_pool(src_root_uri_with_root_db, min_size=1) as pool: 196 | async with pool.acquire() as conn: 197 | await conn.execute( 198 | f"DROP DATABASE {config.src.db} WITH (FORCE);", 199 | ) 200 | await conn.execute( 201 | f"DROP OWNED BY {config.src.owner_user.name};", 202 | ) 203 | await conn.execute( 204 | f"DROP ROLE {config.src.owner_user.name};", 205 | ) 206 | 207 | async with create_pool(dst_root_uri_with_root_db, min_size=1) as pool: 208 | async with pool.acquire() as conn: 209 | await conn.execute( 210 | f"DROP DATABASE {config.dst.db} WITH (FORCE);", 211 | ) 212 | await conn.execute( 213 | f"DROP OWNED BY {config.dst.owner_user.name};", 214 | ) 215 | await conn.execute( 216 | f"DROP ROLE {config.dst.owner_user.name};", 217 | ) 218 | 219 | 220 | @pytest_asyncio.fixture 221 | async def setup_db_upgrade_configs(): 222 | """ 223 | Fixture for preparing the test databases and creating a DbupgradeConfig object. 224 | This fixture will also clean up after the test (removing local files and tearing down against the DBs). 225 | """ 226 | 227 | # Create the config 228 | test_configs = await _create_dbupgradeconfigs() 229 | 230 | # Prepare the databases 231 | await _prepare_databases(test_configs) 232 | 233 | yield test_configs 234 | 235 | # Clear out all data and stuff in the database containers :shrug: 236 | await _empty_out_databases(test_configs) 237 | 238 | # Delete the config that was saved to disk by the setup 239 | rmtree("configs/testdc") 240 | rmtree("schemas/") 241 | 242 | 243 | # This is a hacky way of doing it, but I don't want to duplicate code. 244 | # If this code is called directly, we can use it to set up the databases and create the config for 245 | # local interactive testing. 246 | 247 | # This will create the datasets. 248 | if __name__ == "__main__": 249 | 250 | configs = asyncio.run(_create_dbupgradeconfigs()) 251 | asyncio.run(_prepare_databases(configs)) 252 | 253 | print("Local databases are ready for local testing!") 254 | -------------------------------------------------------------------------------- /tests/integration/files/postgres13-pglogical-docker/Dockerfile: -------------------------------------------------------------------------------- 1 | 2 | FROM postgres:13 3 | 4 | # Install pglogical onto the image 5 | RUN apt-get update && apt-get install -y wget gnupg 6 | RUN echo "deb http://apt.postgresql.org/pub/repos/apt/ bullseye-pgdg main" > /etc/apt/sources.list.d/pgdg.list \ 7 | && wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | \ 8 | apt-key add - \ 9 | && apt-get update \ 10 | && apt-get install -y postgresql-13-pglogical 11 | 12 | # Configure with settings pgbelt requires (max_wal_senders, max_replication_slots, max_worker_processes, shared_preloaded_libraries) 13 | RUN echo "host replication postgres 172.18.0.0/16 trust" >> /usr/share/postgresql/13/pg_hba.conf.sample 14 | RUN echo "host replication postgres ::1/128 trust" >> /usr/share/postgresql/13/pg_hba.conf.sample 15 | RUN echo "shared_preload_libraries = 'pg_stat_statements,pglogical'" >> /usr/share/postgresql/postgresql.conf.sample 16 | RUN echo "wal_level = 'logical'" >> /usr/share/postgresql/postgresql.conf.sample 17 | RUN echo "max_wal_senders = 20" >> /usr/share/postgresql/postgresql.conf.sample 18 | RUN echo "max_replication_slots = 20" >> /usr/share/postgresql/postgresql.conf.sample 19 | RUN echo "max_worker_processes = 20" >> /usr/share/postgresql/postgresql.conf.sample 20 | -------------------------------------------------------------------------------- /tests/integration/files/test_schema_data.sql: -------------------------------------------------------------------------------- 1 | -- 2 | -- Name: fruits; Type: TABLE; Schema: public; Owner: owner 3 | -- 4 | 5 | CREATE TABLE public.fruits ( 6 | id integer, 7 | name character varying(20) 8 | ); 9 | 10 | 11 | ALTER TABLE public.fruits OWNER TO owner; 12 | 13 | -- 14 | -- Name: UsersCapital; Type: TABLE; Schema: public; Owner: owner 15 | -- 16 | 17 | CREATE TABLE public."UsersCapital" ( 18 | id bigint NOT NULL, 19 | hash_firstname text NOT NULL, 20 | hash_lastname text NOT NULL, 21 | gender character varying(6) NOT NULL, 22 | numericnan numeric(19,4), -- Testing for #571 Numeric NaN 23 | CONSTRAINT users_gender_check CHECK (((gender)::text = ANY (ARRAY[('male'::character varying)::text, ('female'::character varying)::text]))) 24 | ); 25 | 26 | 27 | ALTER TABLE public."UsersCapital" OWNER TO owner; 28 | 29 | -- 30 | -- Name: UsersCapital2; Type: TABLE; Schema: public; Owner: owner 31 | -- 32 | 33 | CREATE TABLE public."UsersCapital2" ( 34 | id bigint NOT NULL, 35 | "hash_firstName" text NOT NULL, 36 | hash_lastname text NOT NULL, 37 | gender character varying(6) NOT NULL, 38 | CONSTRAINT users_gender_check CHECK (((gender)::text = ANY (ARRAY[('male'::character varying)::text, ('female'::character varying)::text]))) 39 | ); 40 | 41 | 42 | ALTER TABLE public."UsersCapital2" OWNER TO owner; 43 | 44 | CREATE TABLE public.another_test_table ( 45 | "someThingIDontKnow" uuid NOT NULL, 46 | "anotherThing" uuid NOT NULL 47 | ); 48 | 49 | ALTER TABLE public.another_test_table OWNER TO owner; 50 | 51 | -- 52 | -- Name: users_idx; Type: INDEX; Schema: public; Owner: owner 53 | -- 54 | 55 | CREATE INDEX users_idx ON public."UsersCapital" ( 56 | hash_firstname, 57 | hash_lastname 58 | ); 59 | 60 | -- 61 | -- Name: users2_idx; Type: INDEX; Schema: public; Owner: owner 62 | -- 63 | 64 | CREATE INDEX users2_idx ON public."UsersCapital" ( 65 | hash_firstname, 66 | hash_lastname 67 | ); 68 | 69 | -- Addressing the following index statement style: CREATE INDEX "existingEmailIds_email_id_idx" ON public."existingEmailIds" USING btree ("projectId", "emailId"); 70 | -- Issue #652 71 | -- Did not add a primary key, helped iron out related quoting issues in the dump and load code. 72 | 73 | CREATE TABLE public."existingSomethingIds" ( 74 | "thingId" integer NOT NULL, 75 | "somethingId" character varying(255) NOT NULL 76 | ); 77 | 78 | CREATE INDEX "existingSomethingIds_something_id_idx" ON public."existingSomethingIds" USING btree ("thingId", "somethingId"); 79 | 80 | -- 81 | -- Name: userS_id_seq; Type: SEQUENCE; Schema: public; Owner: owner 82 | -- 83 | 84 | CREATE SEQUENCE public."userS_id_seq" 85 | START WITH 1 86 | INCREMENT BY 1 87 | NO MINVALUE 88 | NO MAXVALUE 89 | CACHE 1; 90 | 91 | 92 | ALTER TABLE public."userS_id_seq" OWNER TO owner; 93 | 94 | -- 95 | -- Name: users2_id_seq; Type: SEQUENCE; Schema: public; Owner: owner 96 | -- 97 | 98 | CREATE SEQUENCE public.users2_id_seq 99 | START WITH 1 100 | INCREMENT BY 1 101 | NO MINVALUE 102 | NO MAXVALUE 103 | CACHE 1; 104 | 105 | 106 | ALTER TABLE public.users2_id_seq OWNER TO owner; 107 | 108 | -- 109 | -- Data for Name: fruits; Type: TABLE DATA; Schema: public; Owner: owner 110 | -- 111 | INSERT INTO public.fruits (id, name) 112 | VALUES (1, 'watermelon'), 113 | (2, 'pear'), 114 | (3, 'strawberry'), 115 | (4, 'grape'); 116 | 117 | -- 118 | -- Data for Name: UsersCapital; Type: TABLE DATA; Schema: public; Owner: owner 119 | -- 120 | 121 | INSERT INTO public."UsersCapital" (id, hash_firstname, hash_lastname, gender, numericnan) 122 | VALUES (1, 'garbagefirst', 'garbagelast', 'male', 1), 123 | (2, 'garbagefirst1', 'garbagelast1', 'female', 0), 124 | (3, 'sdgarbagefirst', 'dgsadsrbagelast', 'male', 'NaN'), 125 | (4, 'dsdssdgarbagefirst', 'dgsaggggdjjjsrbagelast', 'female', 1), 126 | (5, 'dsdssdgarbagefirt', 'dgsagggdjjjsrbagelast', 'female', 0); 127 | 128 | 129 | -- 130 | -- Data for Name: Users2; Type: TABLE DATA; Schema: public; Owner: owner 131 | -- 132 | 133 | INSERT INTO public."UsersCapital2" (id, "hash_firstName", hash_lastname, gender) 134 | VALUES (1, 'garbagefirst', 'garbagelast', 'male'), 135 | (2, 'garbagefirst1', 'garbagelast1', 'female'), 136 | (3, 'sdgarbagefirst', 'dgsadsrbagelast', 'male'), 137 | (4, 'dsdssdgarbagefirst', 'dgsaggggdjjjsrbagelast', 'female'), 138 | (5, 'dsdssdgarbagefirt', 'dgsagggdjjjsrbagelast', 'female'); 139 | 140 | 141 | INSERT INTO public.another_test_table ("someThingIDontKnow", "anotherThing") 142 | VALUES ('0e095b60-ab7d-4892-9a92-6175497fe0f9', '0e095b60-ab7d-4892-9a92-6175497fe0f9'); 143 | 144 | -- 145 | -- Data for Name: existingSomethingIds; Type: TABLE DATA; Schema: public; Owner: owner 146 | -- 147 | 148 | INSERT INTO public."existingSomethingIds" ("thingId", "somethingId") 149 | VALUES (1, 'something1'), 150 | (2, 'something2'), 151 | (3, 'something3'), 152 | (4, 'something4'); 153 | 154 | 155 | -- 156 | -- Name: userS_id_seq; Type: SEQUENCE SET; Schema: public; Owner: owner 157 | -- 158 | 159 | SELECT pg_catalog.setval('public."userS_id_seq"', 16, false); 160 | 161 | 162 | -- 163 | -- Name: users2_id_seq; Type: SEQUENCE SET; Schema: public; Owner: owner 164 | -- 165 | 166 | SELECT pg_catalog.setval('public.users2_id_seq', 15, false); 167 | 168 | 169 | -- 170 | -- Name: UsersCapital users_pkey; Type: CONSTRAINT; Schema: public; Owner: owner 171 | -- 172 | 173 | ALTER TABLE ONLY public."UsersCapital" 174 | ADD CONSTRAINT users_pkey PRIMARY KEY (id); 175 | 176 | 177 | -- 178 | -- Name: UsersCapital users_pkey; Type: CONSTRAINT; Schema: public; Owner: owner 179 | -- 180 | 181 | ALTER TABLE ONLY public."UsersCapital2" 182 | ADD CONSTRAINT users2_pkey PRIMARY KEY (id); 183 | 184 | ALTER TABLE ONLY public.another_test_table 185 | ADD CONSTRAINT another_test_table_pkey PRIMARY KEY ("someThingIDontKnow", "anotherThing"); 186 | -------------------------------------------------------------------------------- /tests/integration/test_integration.py: -------------------------------------------------------------------------------- 1 | import re 2 | import subprocess 3 | from time import sleep 4 | from unittest.mock import AsyncMock 5 | from unittest.mock import Mock 6 | from pgbelt.util.dump import _parse_dump_commands 7 | from pgbelt.config.models import DbupgradeConfig 8 | 9 | import asyncio 10 | from asyncpg import create_pool 11 | 12 | import pgbelt 13 | import pytest 14 | 15 | 16 | async def _check_status( 17 | configs: dict[str, DbupgradeConfig], src_dst_status: str, dst_src_status: str 18 | ): 19 | # Check status and make sure all are in the correct state 20 | # ALL sets must match the src_dst_status and dst_src_status 21 | 22 | dc = list(configs.values())[0].dc 23 | num_configs = len(configs.keys()) 24 | 25 | # Sleep 1, repeat until target status is seen. 26 | pgbelt.cmd.status.echo = Mock() 27 | status_reached = False 28 | i = 4 29 | while not status_reached and i > 0: 30 | sleep(1) 31 | await pgbelt.cmd.status.status(db=None, dc=dc) 32 | 33 | status_echo_call_arg = pgbelt.cmd.status.echo.call_args[0][0] 34 | 35 | # Regex for the two columns to be in the correct state 36 | matches = re.findall( 37 | rf"^\S+\s+\S+{src_dst_status}\S+\s+\S+{dst_src_status}.*", 38 | status_echo_call_arg.split("\n")[2], 39 | ) 40 | if len(matches) == num_configs: 41 | status_reached = True 42 | elif i > 0: 43 | i = i - 1 44 | else: 45 | raise AssertionError( 46 | f"Timed out waiting for src->dst: {src_dst_status}, dst->src: {dst_src_status} state across {num_configs} configs. Ended with: {status_echo_call_arg}" 47 | ) 48 | 49 | 50 | async def _test_check_connectivity(configs: dict[str, DbupgradeConfig]): 51 | # Run check_connectivity and make sure all green, no rec 52 | pgbelt.cmd.convenience.echo = Mock() 53 | await pgbelt.cmd.convenience.check_connectivity( 54 | db=None, dc=configs[list(configs.keys())[0]].dc 55 | ) 56 | check_connectivity_echo_call_arg = pgbelt.cmd.convenience.echo.call_args[0][0] 57 | assert "\x1b[31m" not in check_connectivity_echo_call_arg 58 | 59 | await _check_status(configs, "unconfigured", "unconfigured") 60 | 61 | 62 | async def _test_precheck(configs: dict[str, DbupgradeConfig]): 63 | # Run precheck and make sure all green, no red 64 | pgbelt.cmd.preflight.echo = Mock() 65 | await pgbelt.cmd.preflight.precheck(db=None, dc=configs[list(configs.keys())[0]].dc) 66 | preflight_echo_call_arg = pgbelt.cmd.preflight.echo.call_args[0][0] 67 | assert "\x1b[31m" not in preflight_echo_call_arg 68 | 69 | await _check_status(configs, "unconfigured", "unconfigured") 70 | 71 | 72 | async def _test_setup(configs: dict[str, DbupgradeConfig]): 73 | # Run Setup on the dc of the first config to run against all DBs in that dc 74 | await pgbelt.cmd.setup.setup(db=None, dc=configs[list(configs.keys())[0]].dc) 75 | 76 | # Ensure Schema in the destination doesn't have NOT VALID, no Indexes across all DB pairs 77 | dst_dumps = await _get_dumps(configs) 78 | 79 | # Format of dumps: {setname: stdout} 80 | for setname, stdout in dst_dumps.items(): 81 | commands_raw = _parse_dump_commands(stdout.decode("utf-8")) 82 | print( 83 | f"Test Setup: checking {setname} for NOT VALID and INDEXES in destination schema..." 84 | ) 85 | for c in commands_raw: 86 | assert "NOT VALID" not in c 87 | assert "INDEX" not in c 88 | 89 | await _check_status(configs, "replicating", "unconfigured") 90 | 91 | 92 | async def _test_setup_back_replication(configs: dict[str, DbupgradeConfig]): 93 | # Set up back replication 94 | await pgbelt.cmd.setup.setup_back_replication( 95 | db=None, dc=configs[list(configs.keys())[0]].dc 96 | ) 97 | 98 | await _check_status(configs, "replicating", "replicating") 99 | 100 | 101 | async def _test_create_indexes(configs: dict[str, DbupgradeConfig]): 102 | # Load in Indexes 103 | await pgbelt.cmd.schema.create_indexes( 104 | db=None, dc=configs[list(configs.keys())[0]].dc 105 | ) 106 | 107 | # Ensure Schema in the destination has Indexes 108 | 109 | dst_dumps = await _get_dumps(configs) 110 | 111 | # Format of dumps: {setname: stdout} 112 | for setname, stdout in dst_dumps.items(): 113 | 114 | print( 115 | f"Test Create-Indexes: checking {setname} for INDEXES in destination schema..." 116 | ) 117 | 118 | commands_raw = _parse_dump_commands(stdout.decode("utf-8")) 119 | index_exists = False 120 | for c in commands_raw: 121 | if "INDEX" in c: 122 | index_exists = True 123 | break 124 | assert index_exists 125 | 126 | await _check_status(configs, "replicating", "replicating") 127 | 128 | 129 | async def _test_analyze(configs: dict[str, DbupgradeConfig]): 130 | await pgbelt.cmd.sync.analyze(db=None, dc=configs[list(configs.keys())[0]].dc) 131 | 132 | # TODO: test that ANALYZE was run on the destination 133 | 134 | await _check_status(configs, "replicating", "replicating") 135 | 136 | 137 | async def _test_revoke_logins(configs: dict[str, DbupgradeConfig]): 138 | await pgbelt.cmd.login.revoke_logins( 139 | db=None, dc=configs[list(configs.keys())[0]].dc 140 | ) 141 | 142 | # TODO: test that appropriate login roles were revoked 143 | 144 | await _check_status(configs, "replicating", "replicating") 145 | 146 | 147 | async def _test_teardown_forward_replication(configs: dict[str, DbupgradeConfig]): 148 | await pgbelt.cmd.teardown.teardown_forward_replication( 149 | db=None, dc=configs[list(configs.keys())[0]].dc 150 | ) 151 | 152 | await _check_status(configs, "unconfigured", "replicating") 153 | 154 | 155 | async def _test_sync(configs: dict[str, DbupgradeConfig]): 156 | await pgbelt.cmd.sync.sync(db=None, dc=configs[list(configs.keys())[0]].dc) 157 | 158 | # TODO: test that the appropriate sync steps were run 159 | 160 | await _check_status(configs, "unconfigured", "replicating") 161 | 162 | 163 | async def _get_dumps( 164 | configs: dict[str, DbupgradeConfig], src: bool = False 165 | ) -> dict[str, str]: 166 | """ 167 | Get the full dumps for the source or destination databases using pg_dump. 168 | Default is destination. 169 | """ 170 | 171 | std_kwargs = { 172 | "stdin": subprocess.PIPE, 173 | "stdout": subprocess.PIPE, 174 | "stderr": subprocess.PIPE, 175 | } 176 | 177 | # For each set of DBs, run pg_dump -s against the destination 178 | if src: 179 | dump_processes = await asyncio.gather( 180 | *[ 181 | asyncio.create_subprocess_exec( 182 | "pg_dump", 183 | configs[setname].src.root_dsn, 184 | **std_kwargs, 185 | ) 186 | for setname in configs.keys() 187 | ] 188 | ) 189 | else: # Default is destination 190 | dump_processes = await asyncio.gather( 191 | *[ 192 | asyncio.create_subprocess_exec( 193 | "pg_dump", 194 | configs[setname].dst.root_dsn, 195 | **std_kwargs, 196 | ) 197 | for setname in configs.keys() 198 | ] 199 | ) 200 | 201 | await asyncio.gather(*[d.wait() for d in dump_processes]) 202 | 203 | # get STDOUT for each dump 204 | # Format of dumps: {setname: stdout} 205 | return { 206 | setname: (await d.communicate())[0] 207 | for setname, d in zip(configs.keys(), dump_processes) 208 | } 209 | 210 | 211 | async def _filter_dump(dump: str, keywords_to_exclude: list[str]): 212 | commands_raw = _parse_dump_commands(dump) 213 | commands = [] 214 | for c in commands_raw: 215 | add_command = True 216 | for k in keywords_to_exclude: 217 | if k in c: 218 | add_command = False 219 | break 220 | if add_command: 221 | commands.append(c) 222 | return "\n".join(commands) 223 | 224 | 225 | async def _compare_sequences( 226 | sequences: str, src_root_dsn: str, dst_root_dsn: str, schema_name: str 227 | ): 228 | """ 229 | Compare the sequences in the source and destination databases by asynchronously running 230 | PSQL "SELECT last_value FROM sequence_name;" for each sequence in the set. 231 | """ 232 | 233 | std_kwargs = { 234 | "stdin": subprocess.PIPE, 235 | "stdout": subprocess.PIPE, 236 | "stderr": subprocess.PIPE, 237 | } 238 | src_seq_fetch_processes = await asyncio.gather( 239 | *[ 240 | asyncio.create_subprocess_exec( 241 | "psql", 242 | src_root_dsn, 243 | "-c", 244 | f"'SELECT last_value FROM {schema_name}.\"{sequence}\";'", 245 | "-t", 246 | **std_kwargs, 247 | ) 248 | for sequence in sequences 249 | ] 250 | ) 251 | dst_seq_fetch_processes = await asyncio.gather( 252 | *[ 253 | asyncio.create_subprocess_exec( 254 | "psql", 255 | dst_root_dsn, 256 | "-c", 257 | f"'SELECT last_value FROM {schema_name}.\"{sequence}\";'", 258 | "-t", 259 | **std_kwargs, 260 | ) 261 | for sequence in sequences 262 | ] 263 | ) 264 | 265 | await asyncio.gather(*[p.wait() for p in src_seq_fetch_processes]) 266 | await asyncio.gather(*[p.wait() for p in dst_seq_fetch_processes]) 267 | 268 | for i in range(len(sequences)): 269 | src_val = (await src_seq_fetch_processes[i].communicate())[0].strip() 270 | dst_val = (await dst_seq_fetch_processes[i].communicate())[0].strip() 271 | 272 | print(f"Sequence {sequences[i]} in source: {src_val}, destination: {dst_val}") 273 | assert src_val == dst_val 274 | 275 | 276 | async def _ensure_same_data(configs: dict[str, DbupgradeConfig]): 277 | # Dump the databases and ensure they're the same 278 | # Unfortunately except for the sequence lines because for some reason, the dump in the source is_called is true, yet on the destination is false. 279 | # Verified in the code we set it with is_called=True, so not sure what's going on there. 280 | # ------------------------------------------------------------------ 281 | 282 | # Get all the SRC and DST Dumps 283 | # Format of dumps: {setname: stdout} 284 | src_dumps = await _get_dumps(configs, src=True) 285 | dst_dumps = await _get_dumps(configs) 286 | 287 | keywords_to_exclude = [ 288 | "EXTENSION ", 289 | "GRANT ", 290 | "REVOKE ", 291 | "setval", 292 | "SET ", 293 | "SELECT pg_catalog.set_config('search_path'", 294 | "ALTER SCHEMA", 295 | "CREATE SCHEMA", 296 | ] 297 | 298 | # First, asynchronously filter out the keywords from the source dumps 299 | 300 | # Run the filter_dump function on each dump asynchronously 301 | src_dumps_filtered = await asyncio.gather( 302 | *[ 303 | _filter_dump(dump.decode("utf-8"), keywords_to_exclude) 304 | for dump in src_dumps.values() 305 | ] 306 | ) 307 | 308 | # Then, asynchronously filter out the keywords from the destination dumps 309 | dst_dumps_filtered = await asyncio.gather( 310 | *[ 311 | _filter_dump(dump.decode("utf-8"), keywords_to_exclude) 312 | for dump in dst_dumps.values() 313 | ] 314 | ) 315 | 316 | # Note: the asyncio gathers will return a list of the filtered dumps in the same order as the input dumps 317 | # So we can safely say that the ith element of each list corresponds to the same set of DBs 318 | 319 | # Ensure the filtered dumps are the same 320 | for i in range(len(src_dumps_filtered)): 321 | setname = list(configs.keys())[i] 322 | 323 | # Only the targeted tables should match in exodus-style migrations 324 | if "exodus" in setname: 325 | 326 | # In a real exodus migration, only the schema related to the targeted tables will probably exist. 327 | # But in our integration testing, we just copy the entire schema yet just copy only the targeted data. 328 | 329 | # Given this, the only thing to really check is that the targeted data is the same. Even the schema and structure is not the responsibility of pgbelt. 330 | 331 | src_dump = src_dumps_filtered[i] 332 | dst_dump = dst_dumps_filtered[i] 333 | 334 | # Only get the COPY lines for the targeted tables in the dumps. 335 | # COPY format: 336 | # COPY non_public_schema.users (id, hash_firstname, hash_lastname, gender) FROM stdin; 337 | 338 | # 1 garbagefirst garbagelast male 339 | # 2 garbagefirst1 garbagelast1 female 340 | # 3 sdgarbagefirst dgsadsrbagelast male 341 | # 4 dsdssdgarbagefirst dgsaggggdjjjsrbagelast female 342 | # 5 dsdssdgarbagefirt dgsagggdjjjsrbagelast female 343 | # \. 344 | 345 | src_table_data = {} 346 | for table in configs[setname].tables: 347 | src_table_data[table] = "" 348 | for line in src_dump.split("\n"): 349 | if f"COPY {configs[setname].schema_name}.{table}" in line: 350 | src_table_data[table] = src_table_data[table] + line + "\n" 351 | elif len(src_table_data[table]) > 0: 352 | src_table_data[table] = src_table_data[table] + line + "\n" 353 | if line == "\\.": 354 | break 355 | dst_table_data = {} 356 | for table in configs[setname].tables: 357 | dst_table_data[table] = "" 358 | for line in dst_dump.split("\n"): 359 | if f"COPY {configs[setname].schema_name}.{table}" in line: 360 | dst_table_data[table] = dst_table_data[table] + line + "\n" 361 | elif len(dst_table_data[table]) > 0: 362 | dst_table_data[table] = dst_table_data[table] + line + "\n" 363 | if line == "\\.": 364 | break 365 | 366 | # Ensure the targeted data is the same 367 | for table in configs[setname].tables: 368 | print( 369 | f"Ensuring {setname} source and destination data for table {table} are the same..." 370 | ) 371 | 372 | assert src_table_data[table] == dst_table_data[table] 373 | 374 | # Check that the sequences are the same by literally running PSQL "SELECT last_value FROM sequence_name;" 375 | 376 | print( 377 | f"Ensuring {setname} source and destination sequences are the same..." 378 | ) 379 | 380 | _compare_sequences( 381 | configs[ 382 | setname 383 | ].sequences, # In exodus-style migrations, we have our sequences defined in the config 384 | configs[setname].src.root_dsn, 385 | configs[setname].dst.root_dsn, 386 | configs[setname].schema_name, 387 | ) 388 | 389 | else: 390 | print(f"Ensuring {setname} source and destination dumps are the same...") 391 | assert src_dumps_filtered[i] == dst_dumps_filtered[i] 392 | 393 | print( 394 | f"Ensuring {setname} source and destination sequences are the same..." 395 | ) 396 | 397 | # First, get a list of all sequences in the source database in the specified schema 398 | # Synchronous because we need to run it once before the next commands anyways. 399 | sequences = ( 400 | subprocess.run( 401 | [ 402 | "psql", 403 | f'"{configs[setname].src.root_dsn}"', 404 | "-c", 405 | f"'SELECT sequence_name FROM information_schema.sequences WHERE sequence_schema = \"{configs[setname].schema_name}\";'", 406 | "-t", 407 | ], 408 | capture_output=True, 409 | ) 410 | .stdout.decode("utf-8") 411 | .strip() 412 | .split("\n") 413 | ) 414 | 415 | _compare_sequences( 416 | sequences, # In full migrations, we need to get the sequences from the source database 417 | configs[setname].src.root_dsn, 418 | configs[setname].dst.root_dsn, 419 | configs[setname].schema_name, 420 | ) 421 | 422 | 423 | async def _test_teardown_not_full(configs: dict[str, DbupgradeConfig]): 424 | await pgbelt.cmd.teardown.teardown(db=None, dc=configs[list(configs.keys())[0]].dc) 425 | 426 | # TODO: test that the appropriate teardown steps were run for a non-full teardown 427 | 428 | await _check_status(configs, "unconfigured", "unconfigured") 429 | 430 | 431 | async def _test_teardown_full(configs: dict[str, DbupgradeConfig]): 432 | await pgbelt.cmd.teardown.teardown( 433 | db=None, dc=configs[list(configs.keys())[0]].dc, full=True 434 | ) 435 | 436 | # TODO: test that the appropriate teardown steps were run for a full teardown 437 | 438 | await _check_status(configs, "unconfigured", "unconfigured") 439 | 440 | 441 | async def _test_main_workflow(configs: dict[str, DbupgradeConfig]): 442 | """ 443 | Run the following commands in order: 444 | 445 | belt check-connectivity testdc && \ 446 | belt precheck testdc && \ 447 | belt setup testdc && \ 448 | belt setup-back-replication testdc && \ 449 | belt create-indexes testdc && \ 450 | belt analyze testdc && \ 451 | belt revoke-logins testdc && \ 452 | belt sync testdc && \ 453 | belt teardown testdc && \ 454 | belt teardown testdc --full 455 | """ 456 | 457 | await _test_check_connectivity(configs) 458 | await _test_precheck(configs) 459 | await _test_setup(configs) 460 | await _test_setup_back_replication(configs) 461 | await _test_create_indexes(configs) 462 | await _test_analyze(configs) 463 | await _test_revoke_logins(configs) 464 | await _test_teardown_forward_replication(configs) 465 | await _test_sync(configs) 466 | 467 | # Check if the data is the same before testing teardown 468 | await _ensure_same_data(configs) 469 | 470 | await _test_teardown_not_full(configs) 471 | await _test_teardown_full(configs) 472 | 473 | 474 | # Run the main integration test. 475 | # 4 sets of DBs are created: public vs non-public schema, and exodus-style vs full migration. 476 | # Use pgbelt's native async parallelization to run the main workflow on the total set of DBs. 477 | @pytest.mark.asyncio 478 | async def test_main_workflow(setup_db_upgrade_configs): 479 | 480 | await _test_main_workflow(setup_db_upgrade_configs) 481 | -------------------------------------------------------------------------------- /tests/pgbelt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Autodesk/pgbelt/d4d6839a005f34344a024eac838f14d6fe59f991/tests/pgbelt/__init__.py -------------------------------------------------------------------------------- /tests/pgbelt/cmd/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Autodesk/pgbelt/d4d6839a005f34344a024eac838f14d6fe59f991/tests/pgbelt/cmd/__init__.py -------------------------------------------------------------------------------- /tests/pgbelt/cmd/conftest.py: -------------------------------------------------------------------------------- 1 | from os import environ 2 | 3 | import pytest 4 | from pgbelt.config.models import DbConfig 5 | from pgbelt.config.models import DbupgradeConfig 6 | from pgbelt.config.models import User 7 | 8 | 9 | def pytest_collectstart(): 10 | environ["CI"] = "true" 11 | 12 | 13 | @pytest.fixture() 14 | def config(): 15 | testconf = DbupgradeConfig( 16 | db="test-db", 17 | dc="test-dc", 18 | src=DbConfig( 19 | host="pgbelt-unit-test-src.pgbelt.fake", 20 | ip="192.168.0.11", 21 | db="testdbsrc", 22 | port="5432", 23 | root_user=User(name="fake_src_root_username", pw="fake_src_root_password"), 24 | owner_user=User( 25 | name="fake_src_owner_username", pw="fake_src_owner_password" 26 | ), 27 | pglogical_user=User( 28 | name="fake_src_pgl_username", pw="fake_src_pgl_password" 29 | ), 30 | ), 31 | dst=DbConfig( 32 | host="pgbelt-unit-test-dst.pgbelt.fake", 33 | ip="192.168.0.12", 34 | db="testdbdst", 35 | port="5432", 36 | root_user=User(name="fake_dst_root_username", pw="fake_dst_root_password"), 37 | owner_user=User( 38 | name="fake_dst_owner_username", pw="fake_dst_owner_password" 39 | ), 40 | pglogical_user=User( 41 | name="fake_dst_pgl_username", pw="fake_dst_pgl_password" 42 | ), 43 | ), 44 | ) 45 | yield testconf 46 | -------------------------------------------------------------------------------- /tests/pgbelt/cmd/test_convenience.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from unittest.mock import Mock 3 | from unittest.mock import patch 4 | 5 | from pgbelt.cmd import convenience 6 | 7 | 8 | # Test for the Owner DSN to come from src-dsn when the config 9 | # has an Owner in it 10 | def test_src_dsn_owner(config): 11 | with patch("pgbelt.cmd.convenience.get_config", return_value=config): 12 | convenience.echo = Mock() 13 | convenience.src_dsn("test-db", "test-dc") 14 | convenience.echo.assert_called_with(config.src.owner_dsn) 15 | -------------------------------------------------------------------------------- /tests/pgbelt/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Autodesk/pgbelt/d4d6839a005f34344a024eac838f14d6fe59f991/tests/pgbelt/config/__init__.py -------------------------------------------------------------------------------- /tests/pgbelt/config/conftest.py: -------------------------------------------------------------------------------- 1 | from os import environ 2 | 3 | 4 | def pytest_collectstart(): 5 | environ["CI"] = "true" 6 | -------------------------------------------------------------------------------- /tests/pgbelt/config/test_pass.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | 4 | class FakeTest(TestCase): 5 | def test_fake(self): 6 | pass 7 | --------------------------------------------------------------------------------