├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── 01_question.md │ ├── 02_bug.md │ ├── 03_feature.md │ └── config.yml ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── build.yml │ ├── codeql-analysis.yml │ ├── sphinx.yml │ └── sync-main-and-master.yml ├── .gitignore ├── .isort.cfg ├── .pylintrc ├── .style.yapf ├── CODE_OF_CONDUCT.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── NEWS ├── README.rst ├── SECURITY.md ├── TODO ├── Vagrantfile ├── codecov.yml ├── debian ├── changelog.in ├── compat ├── control ├── copyright ├── docs ├── pghoard.postinst └── rules ├── docs ├── .gitignore ├── Makefile ├── about.rst ├── architecture.rst ├── commands.rst ├── conf.py ├── configuration.rst ├── development.rst ├── index.rst ├── install.rst ├── make.bat ├── monitoring.rst ├── quickstart.rst └── requirements.txt ├── golang └── pghoard_postgres_command_go.go ├── mypy.ini ├── pghoard-local-minimal.json ├── pghoard.json ├── pghoard.spec ├── pghoard.unit ├── pghoard ├── .gitignore ├── __init__.py ├── __main__.py ├── archive_cleanup.py ├── archive_sync.py ├── basebackup │ ├── __init__.py │ ├── base.py │ ├── chunks.py │ └── delta.py ├── common.py ├── compressor.py ├── config.py ├── create_keys.py ├── fetcher.py ├── gnutaremu.py ├── logutil.py ├── mapping.py ├── metrics.py ├── monitoring │ ├── __init__.py │ ├── prometheus.py │ ├── pushgateway.py │ └── statsd.py ├── object_store.py ├── pghoard.py ├── pgutil.py ├── postgres_command.py ├── preservation_request.py ├── receivexlog.py ├── restore.py ├── transfer.py ├── wal.py ├── walreceiver.py └── webserver.py ├── pyproject.toml ├── test ├── .gitignore ├── __init__.py ├── base.py ├── basebackup │ ├── __init__.py │ ├── test_basebackup.py │ ├── test_chunks.py │ └── test_delta.py ├── conftest.py ├── data │ ├── basebackup │ │ ├── chunks │ │ │ ├── 00000001.pghoard │ │ │ ├── 00000001.pghoard.metadata │ │ │ ├── 00000002.pghoard │ │ │ ├── 00000002.pghoard.metadata │ │ │ ├── 00000003.pghoard │ │ │ ├── 00000003.pghoard.metadata │ │ │ ├── 00000004.pghoard │ │ │ └── 00000004.pghoard.metadata │ │ └── config.json │ ├── basebackup_delta │ │ ├── chunks │ │ │ ├── 0af668268d0fe14c6e269760b08d80a634c421b8381df25f31fbed5e8a8c8d8b │ │ │ ├── 0af668268d0fe14c6e269760b08d80a634c421b8381df25f31fbed5e8a8c8d8b.metadata │ │ │ ├── 4b65df4d0857bbbcb22aa086e02bd8414a9f3a484869f2b96ed7c62f3c4eb088 │ │ │ ├── 4b65df4d0857bbbcb22aa086e02bd8414a9f3a484869f2b96ed7c62f3c4eb088.metadata │ │ │ ├── fc61c91430dcb345001306ad513f103380c16896093a17868fc909aeda393559 │ │ │ └── fc61c91430dcb345001306ad513f103380c16896093a17868fc909aeda393559.metadata │ │ └── config.json │ ├── basebackup_one_chunk │ │ ├── chunks │ │ │ ├── 00000002.pghoard │ │ │ └── 00000002.pghoard.metadata │ │ └── config.json │ └── basebackup_with_ts │ │ ├── chunks │ │ ├── chunk_2018-04-23_2__2018-04-23_2.00000570.pghoard │ │ ├── chunk_2018-04-23_2__2018-04-23_2.00000570.pghoard.metadata │ │ ├── chunk_2018-04-23_2__2018-04-23_2.00000572.pghoard │ │ ├── chunk_2018-04-23_2__2018-04-23_2.00000572.pghoard.metadata │ │ ├── chunk_2018-04-23_2__2018-04-23_2.00000573.pghoard │ │ └── chunk_2018-04-23_2__2018-04-23_2.00000573.pghoard.metadata │ │ └── config.json ├── monitoring │ ├── __init__.py │ ├── conftest.py │ ├── test_prometheus.py │ ├── test_pushgateway.py │ └── test_statsd.py ├── tar_failer ├── test.tar ├── test_archive_cleanup.py ├── test_archivesync.py ├── test_common.py ├── test_compressor.py ├── test_config.py ├── test_create_keys.py ├── test_encryptor.py ├── test_gnutaremu.py ├── test_inotify.py ├── test_object_store.py ├── test_pghoard.py ├── test_pgutil.py ├── test_postgres_command.py ├── test_preservation_request.py ├── test_restore.py ├── test_storage.py ├── test_transferagent.py ├── test_wal.py ├── test_wal_file_deleter.py ├── test_walreceiver.py ├── test_webserver.py ├── test_webserver_ipv6.py └── util.py └── update-constraints /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @aiven/team-brute-force @aiven/aiven-open-source 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/01_question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: ❓ Ask a question 3 | about: Got stuck or missing something from the docs? Ask away! 4 | --- 5 | 6 | # What can we help you with? 7 | 8 | 9 | 10 | # Where would you expect to find this information? 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/02_bug.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 🐜 Report a bug 3 | about: Spotted a problem? Let us know 4 | --- 5 | 6 | # What happened? 7 | 8 | 9 | 10 | # What did you expect to happen? 11 | 12 | 13 | 14 | # What else do we need to know? 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/03_feature.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 💡 Feature suggestion 3 | about: What would make this even better? 4 | --- 5 | 6 | # What is currently missing? 7 | 8 | 9 | 10 | # How could this be improved? 11 | 12 | 13 | 14 | # Is this a feature you would work on yourself? 15 | 16 | * [ ] I plan to open a pull request for this feature 17 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | contact_links: 3 | - name: Aiven Security Bug Bounty 4 | url: https://hackerone.com/aiven_ltd 5 | about: Our bug bounty program. 6 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 2 | # About this change - What it does 3 | 4 | 5 | 6 | 7 | Resolves: #xxxxx 8 | 9 | # Why this way 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build pghoard 2 | 3 | # Default to read-only access to all APIs. 4 | permissions: read-all 5 | 6 | on: 7 | push: 8 | branches: 9 | - main 10 | tags: 11 | - '**' 12 | pull_request: 13 | 14 | jobs: 15 | 16 | lint: 17 | runs-on: ubuntu-22.04 18 | strategy: 19 | matrix: 20 | # only use one version for the lint step 21 | python-version: ["3.10"] 22 | 23 | steps: 24 | 25 | - id: checkout-code 26 | uses: actions/checkout@v3 27 | with: 28 | # Do not persist the token during execution of this job. 29 | persist-credentials: false 30 | 31 | - id: prepare-python 32 | uses: actions/setup-python@v3 33 | with: 34 | python-version: ${{ matrix.python-version }} 35 | 36 | - id: dependencies 37 | run: | 38 | pip install . 39 | pip install ".[dev]" 40 | 41 | - id: pylint 42 | run: make lint 43 | 44 | - id: mypy 45 | run: make mypy 46 | 47 | - id: validate-style 48 | run: | 49 | make fmt 50 | if [ $(git diff --name-only --diff-filter=ACMR | wc -l ) != 0 ]; then 51 | echo "Reformatting failed! Please run make fmt on your commits and resubmit!" 1>&2; 52 | git diff; 53 | exit 1; 54 | fi 55 | 56 | test: 57 | runs-on: ubuntu-22.04 58 | needs: lint 59 | strategy: 60 | max-parallel: 5 61 | matrix: 62 | python-version: ["3.10", "3.11", "3.12"] 63 | 64 | steps: 65 | - id: checkout-code 66 | uses: actions/checkout@v3 67 | 68 | - id: prepare-python 69 | uses: actions/setup-python@v3 70 | with: 71 | python-version: ${{ matrix.python-version }} 72 | 73 | - id: dependencies 74 | run: | 75 | # Setup the Postgres repositories 76 | sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main 14" > /etc/apt/sources.list.d/pgdg.list' 77 | wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add - 78 | sudo apt-get update 79 | # Setup build deps 80 | sudo apt-get install -y libsnappy-dev postgresql-12 postgresql-13 postgresql-14 postgresql-15 postgresql-16 postgresql-17 81 | # Setup common python dependencies 82 | python -m pip install --upgrade pip 83 | pip install . 84 | pip install ".[constraints]" 85 | pip install -e . 86 | 87 | - id: unittest 88 | run: make coverage 89 | 90 | - id: upload-codecov 91 | # Third-party action pinned to v3.1.1 92 | uses: codecov/codecov-action@d9f34f8cd5cb3b3eb79b3e4b5dae3a16df499a70 93 | with: 94 | verbose: true 95 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [main] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [main] 20 | schedule: 21 | - cron: "42 20 * * 6" 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ubuntu-latest 27 | 28 | strategy: 29 | fail-fast: false 30 | matrix: 31 | language: ["python"] 32 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] 33 | # Learn more: 34 | # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed 35 | 36 | steps: 37 | - name: Checkout repository 38 | uses: actions/checkout@v2 39 | 40 | # Initializes the CodeQL tools for scanning. 41 | - name: Initialize CodeQL 42 | uses: github/codeql-action/init@v2 43 | with: 44 | languages: ${{ matrix.language }} 45 | # If you wish to specify custom queries, you can do so here or in a config file. 46 | # By default, queries listed here will override any specified in a config file. 47 | # Prefix the list here with "+" to use these queries and those in the config file. 48 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 49 | 50 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 51 | # If this step fails, then you should remove it and run the build manually (see below) 52 | - name: Autobuild 53 | uses: github/codeql-action/autobuild@v2 54 | 55 | # ℹ️ Command-line programs to run using the OS shell. 56 | # 📚 https://git.io/JvXDl 57 | 58 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 59 | # and modify them (or add more) to build your code if your project 60 | # uses a compiled language 61 | 62 | #- run: | 63 | # make bootstrap 64 | # make release 65 | 66 | - name: Perform CodeQL Analysis 67 | uses: github/codeql-action/analyze@v2 68 | -------------------------------------------------------------------------------- /.github/workflows/sphinx.yml: -------------------------------------------------------------------------------- 1 | name: Sphinx Pages 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | build: 10 | name: Sphinx Pages 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: "Checkout Main Branch" 14 | uses: actions/checkout@v2 15 | with: 16 | persist-credentials: false 17 | path: main 18 | ref: main 19 | 20 | - name: "Checkout GitHub Pages Branch" 21 | uses: actions/checkout@v2 22 | with: 23 | persist-credentials: false 24 | path: gh-pages 25 | ref: gh-pages 26 | 27 | - name: "Setup Python & Install Spinx" 28 | uses: actions/setup-python@v3 29 | 30 | - name: "Install Sphinx & Theme" 31 | run: pip install sphinx==4.5.0 sphinx-rtd-theme==1.0.0 32 | 33 | - name: "Run Sphinx" 34 | run: sphinx-build -b html main/docs gh-pages -E -d $GITHUB_WORKSPACE/.doctree 35 | 36 | - name: "Commit & Push Changes (If Any)" 37 | run: | 38 | cd gh-pages 39 | git add -A 40 | git config --global user.email "$(git show --format=%ae -s)" 41 | git config --global user.name "$(git show --format=%an -s)" 42 | git commit -m "From $GITHUB_REF $(echo ${GITHUB_SHA} | cut -c 1-8)" 43 | git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }} 44 | git push 45 | -------------------------------------------------------------------------------- /.github/workflows/sync-main-and-master.yml: -------------------------------------------------------------------------------- 1 | name: "Sync master from main" 2 | on: 3 | push: 4 | branches: 5 | - main 6 | 7 | jobs: 8 | sync_master_from_main: 9 | runs-on: ubuntu-latest 10 | name: Sync master from main 11 | 12 | steps: 13 | - name: Checkout main 14 | uses: actions/checkout@v3 15 | with: 16 | ref: main 17 | fetch-depth: 0 18 | 19 | - name: Push to master 20 | run: git push origin main:master 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.pyo 3 | *.log 4 | *~ 5 | /.cache 6 | /.coverage 7 | /.project 8 | /.pydevproject 9 | /.vagrant 10 | /.idea 11 | /.vscode 12 | __pycache__/ 13 | /build/ 14 | /dist/ 15 | /pghoard.egg-info/ 16 | /pghoard-rpm-src.tar 17 | /rpm/ 18 | .vagrant 19 | /venv/ 20 | /.venv/ 21 | *.orig 22 | /pghoard-rpm-src/ 23 | coverage.xml 24 | .hypothesis 25 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | known_first_party = pghoard 3 | known_third_party = rohmu 4 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MESSAGES CONTROL] 2 | disable= 3 | bad-option-value, 4 | duplicate-code, 5 | fixme, 6 | import-outside-toplevel, 7 | invalid-name, 8 | len-as-condition, 9 | locally-disabled, 10 | missing-docstring, 11 | no-else-raise, 12 | no-else-return, 13 | no-self-use, 14 | raise-missing-from, 15 | too-few-public-methods, 16 | too-many-ancestors, 17 | too-many-arguments, 18 | too-many-boolean-expressions, 19 | too-many-branches, 20 | too-many-function-args, 21 | too-many-instance-attributes, 22 | too-many-locals, 23 | too-many-public-methods, 24 | too-many-statements, 25 | ungrouped-imports, 26 | wrong-import-order, 27 | wrong-import-position 28 | 29 | [FORMAT] 30 | max-line-length=125 31 | max-module-lines=1100 32 | 33 | [REPORTS] 34 | output-format=text 35 | reports=no 36 | 37 | [TYPECHECK] 38 | extension-pkg-whitelist=pydantic 39 | -------------------------------------------------------------------------------- /.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | # For docs, see https://github.com/google/yapf/blob/main/README.rst 3 | 4 | based_on_style = pep8 5 | # Disallow splitting between dict key and dict value in multiline {"key": "value"} lines 6 | ALLOW_SPLIT_BEFORE_DICT_VALUE = false 7 | 8 | # Avoid adding unnecessary blank lines when nesting 9 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = false 10 | 11 | # Always add two blank lines for top-level classes and methods 12 | BLANK_LINES_AROUND_TOP_LEVEL_DEFINITION = 2 13 | 14 | # These two combine consecutive ({ and }) to same line to reduce clutter 15 | COALESCE_BRACKETS = true 16 | DEDENT_CLOSING_BRACKETS = true 17 | 18 | # Line length 19 | COLUMN_LIMIT = 125 20 | 21 | # Try to avoid having overly long lines by having excessively large penalty for that. 22 | SPLIT_PENALTY_EXCESS_CHARACTER = 1000000000 23 | 24 | # Always split dict entries to one entry per line 25 | # EACH_DICT_ENTRY_ON_SEPARATE_LINE = true 26 | 27 | # Never split this comment to a separate line. Workaround for certain flake8 & email template lines 28 | I18N_COMMENT = # noqa 29 | 30 | # Allow automatically joining lines, for example, multiline if that would fit to a single line 31 | JOIN_MULTIPLE_LINES = true 32 | 33 | # "3 * 5", instead of "3*5" 34 | SPACES_AROUND_POWER_OPERATOR = true 35 | 36 | # Follow normal comment style by adding two spaces between code and comment 37 | SPACES_BEFORE_COMMENT = 2 38 | 39 | # If list of items is comma terminated, always split to one per line. 40 | SPLIT_ARGUMENTS_WHEN_COMMA_TERMINATED = true 41 | 42 | # Related to previous one, if list of items (args or dict/list/...) needs to be split, split to one per line. 43 | # SPLIT_ALL_COMMA_SEPARATED_VALUES = true 44 | 45 | # Split dict generators for clarity (add line breaks between { and key: val etc. 46 | SPLIT_BEFORE_DICT_SET_GENERATOR = true 47 | 48 | # Split method(k1=v1, k2=v2...) to separate lines 49 | SPLIT_BEFORE_NAMED_ASSIGNS = true 50 | 51 | # For complex (for some definition of complex) comprehensions, put output, for and if to separate lines 52 | SPLIT_COMPLEX_COMPREHENSION = true 53 | 54 | # When splitting something to multiple lines ('method(\n val...'), intend by 4 55 | CONTINUATION_INDENT_WIDTH = 4 56 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | opensource@aiven.io. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. 129 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include pghoard/*.py 2 | include pghoard.json 3 | include pghoard.unit 4 | include test/*.py 5 | include scripts/* 6 | include README.rst 7 | include version.py 8 | include setup.py 9 | include setup.cfg 10 | include LICENSE 11 | include MANIFEST.in 12 | 13 | recursive-exclude examples *~ *.pyc \.* 14 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | short_ver = $(shell git describe --abbrev=0) 2 | long_ver = $(shell git describe --long 2>/dev/null || echo $(short_ver)-0-unknown-g`git describe --always`) 3 | 4 | PYTHON ?= python3 5 | PYTHON_SOURCE_DIRS = pghoard/ test/ 6 | PYTEST_ARG ?= -v 7 | 8 | .PHONY: pghoard/version.py 9 | python-build: 10 | $(PYTHON) -m build 11 | 12 | .PHONY: dev-deps 13 | dev-deps: 14 | pip install . 15 | pip install ".[dev]" 16 | 17 | .PHONY: unittest 18 | unittest: dev-deps 19 | $(PYTHON) -m pytest -vv test/ 20 | 21 | .PHONY: lint 22 | lint: dev-deps 23 | $(PYTHON) -m pylint --rcfile .pylintrc $(PYTHON_SOURCE_DIRS) 24 | 25 | .PHONY: mypy 26 | mypy: dev-deps 27 | $(PYTHON) -m mypy $(PYTHON_SOURCE_DIRS) 28 | 29 | .PHONY: fmt 30 | fmt: dev-deps 31 | unify --quote '"' --recursive --in-place $(PYTHON_SOURCE_DIRS) 32 | isort $(PYTHON_SOURCE_DIRS) 33 | yapf --parallel --recursive --in-place $(PYTHON_SOURCE_DIRS) 34 | 35 | .PHONY: coverage 36 | coverage: dev-deps 37 | $(PYTHON) -m pytest $(PYTEST_ARG) --cov-report term-missing --cov-report xml:coverage.xml \ 38 | --cov pghoard test/ 39 | 40 | .PHONY: clean 41 | clean: 42 | $(RM) -r *.egg-info/ build/ dist/ rpm/ 43 | $(RM) ../pghoard_* test-*.xml coverage.xml pghoard/version.py 44 | 45 | 46 | .PHONY: deb 47 | deb: 48 | cp debian/changelog.in debian/changelog 49 | dch -v $(long_ver) --distribution unstable "Automatically built .deb" 50 | dpkg-buildpackage -A -uc -us 51 | 52 | .PHONY: rpm 53 | rpm: python-build 54 | git archive --output=pghoard-rpm-src.tar --prefix=pghoard/ HEAD 55 | # add generated files to the tar, they're not in git repository 56 | tar -r -f pghoard-rpm-src.tar --transform=s,pghoard/,pghoard/pghoard/, pghoard/version.py 57 | rpmbuild -bb pghoard.spec \ 58 | --define '_topdir $(PWD)/rpm' \ 59 | --define '_sourcedir $(CURDIR)' \ 60 | --define 'major_version $(short_ver)' \ 61 | --define 'minor_version $(subst -,.,$(subst $(short_ver)-,,$(long_ver)))' 62 | $(RM) pghoard-rpm-src.tar 63 | 64 | .PHONY: build-dep-fed 65 | build-dep-fed: 66 | sudo dnf -y install 'dnf-command(builddep)' tar rpm-build 67 | sudo dnf -y builddep pghoard.spec 68 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | We release patches for security vulnerabilities. Which versions are eligible 6 | receiving such patches depend on the CVSS v3.0 Rating: 7 | 8 | | CVSS v3.0 | Supported Versions | 9 | | --------- | ----------------------------------------- | 10 | | 4.0-10.0 | Most recent release | 11 | 12 | ## Reporting a Vulnerability 13 | 14 | Please report (suspected) security vulnerabilities to our **[bug bounty 15 | program](https://bugcrowd.com/aiven-mbb-og)**. You will receive a response from 16 | us within 2 working days. If the issue is confirmed, we will release a patch as 17 | soon as possible depending on impact and complexity. 18 | 19 | ## Qualifying Vulnerabilities 20 | 21 | Any reproducible vulnerability that has a severe effect on the security or 22 | privacy of our users is likely to be in scope for the program. 23 | 24 | We generally **aren't** interested in the following issues: 25 | * Social engineering (e.g. phishing, vishing, smishing) attacks 26 | * Brute force, DoS, text injection 27 | * Missing best practices such as HTTP security headers (CSP, X-XSS, etc.), 28 | email (SPF/DKIM/DMARC records), SSL/TLS configuration. 29 | * Software version disclosure / Banner identification issues / Descriptive 30 | error messages or headers (e.g. stack traces, application or server errors). 31 | * Clickjacking on pages with no sensitive actions 32 | * Theoretical vulnerabilities where you can't demonstrate a significant 33 | security impact with a proof of concept. 34 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | TODO 2 | ==== 3 | 4 | * SSL support for Webserver 5 | * Authentication/authorization for Webserver for use in non private networks 6 | * check that we have the diskspace needed for a basebackup from PG 7 | -------------------------------------------------------------------------------- /Vagrantfile: -------------------------------------------------------------------------------- 1 | # -*- mode: ruby -*- 2 | # vi: set ft=ruby : 3 | 4 | if ENV['VAGRANT_DEFAULT_PROVIDER'] == "libvirt" and (ARGV[0] == "up" or ARGV[0] == "destroy") 5 | unless system("sudo -n true 2> /dev/null") 6 | puts('Sudo is required, do a sudo true') 7 | exit 8 | end 9 | end 10 | 11 | # to be able to modify tests outside of vagrant, we use nfs mount point, for more 12 | # information refer https://www.vagrantup.com/docs/synced-folders/nfs 13 | Vagrant.configure("2") do |config| 14 | config.vm.box = "generic/ubuntu2204" 15 | config.vm.synced_folder ".", "/vagrant", type: "nfs", nfs_udp: false 16 | 17 | $script = <<-SCRIPT 18 | ssh-keyscan localhost >> ~/.ssh/known_hosts 19 | ssh-keygen -N '' -f ~/.ssh/id_rsa 20 | SCRIPT 21 | config.vm.provision "shell", inline: $script, privileged: false 22 | 23 | $script = <<-SCRIPT 24 | export DEBIAN_FRONTEND="noninteractive" 25 | 26 | # do not disable ipv6, the base vagrant image has disabled this 27 | sysctl net.ipv6.conf.all.disable_ipv6=0 28 | sed -i '/net.ipv6.conf.all.disable_ipv6/d' /etc/sysctl.conf 29 | 30 | echo "deb http://apt.postgresql.org/pub/repos/apt/ $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list 31 | wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - 32 | add-apt-repository -y ppa:deadsnakes/ppa 33 | 34 | apt-get update 35 | apt-get install -y build-essential libsnappy-dev postgresql-common 36 | 37 | # no point creating the default cluster as its not used for tests 38 | sed -i "s/^#start_conf.*/start_conf='manual'/g" /etc/postgresql-common/createcluster.conf 39 | sed -i "s/^#create_main_cluster.*/create_main_cluster=false/g" /etc/postgresql-common/createcluster.conf 40 | 41 | apt-get install -y python{3.10,3.11,3.12} python{3.10,3.11,3.12}-dev python{3.10,3.11,3.12}-venv 42 | apt-get install -y postgresql-{12,13,14,15,16,17} postgresql-server-dev-{12,13,14,15,16,17} 43 | 44 | username="$(< /dev/urandom tr -dc a-z | head -c${1:-32};echo;)" 45 | password=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-32};echo;) 46 | useradd -m -U $username 47 | echo "$username:$password" > /home/vagrant/pghoard-test-sftp-user 48 | echo "$username:$password" | chpasswd 49 | 50 | mkdir -p /home/$username/.ssh 51 | cat /home/vagrant/.ssh/id_rsa.pub >> /home/$username/.ssh/authorized_keys 52 | chown -R $username: /home/$username/.ssh 53 | chmod -R go-rwx /home/$username/.ssh 54 | 55 | sed -i 's/PasswordAuthentication no/PasswordAuthentication yes/g' /etc/ssh/sshd_config 56 | 57 | # later versions have the Port 22 config disabled (cos its the default), so need to 58 | # explicitly enable it to avoid ssh only using port 23. 59 | echo "Port 22" >> /etc/ssh/sshd_config 60 | 61 | # this is for sftp testing 62 | echo "Port 23" >> /etc/ssh/sshd_config 63 | echo "Match LocalPort 22" >> /etc/ssh/sshd_config 64 | echo " DenyUsers $username" >> /etc/ssh/sshd_config 65 | systemctl reload ssh 66 | SCRIPT 67 | 68 | config.vm.provision "shell", inline: $script, privileged: true 69 | 70 | $script = <<-SCRIPT 71 | versions=(3.10 3.11 3.12) 72 | for version in "${versions[@]}"; do 73 | python${version} -m venv venv${version} 74 | source ~/venv${version}/bin/activate 75 | pip install --upgrade pip 76 | pip install "/vagrant/." 77 | pip install --upgrade "/vagrant/.[dev]" 78 | done 79 | SCRIPT 80 | config.vm.provision "shell", inline: $script, privileged: false 81 | end 82 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: 2 | layout: "reach, diff, flags, files" 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | # basic 8 | target: auto 9 | threshold: 0% 10 | # advanced settings 11 | if_ci_failed: error #success, failure, error, ignore 12 | only_pulls: false 13 | if_not_found: failure 14 | 15 | -------------------------------------------------------------------------------- /debian/changelog.in: -------------------------------------------------------------------------------- 1 | pghoard (1.4.0) unstable; urgency=low 2 | 3 | * Conflict with pgespresso < 1.2: older versions crash PostgreSQL 4 | when tablespaces are used 5 | 6 | -- Oskari Saarenmaa Tue, 26 Jul 2016 18:48:03 +0300 7 | 8 | pghoard (1.0.0) unstable; urgency=low 9 | 10 | * Release version 1.0.0 11 | 12 | -- Oskari Saarenmaa Wed, 16 Mar 2016 08:14:25 +0200 13 | 14 | pghoard (0.9.0) unstable; urgency=low 15 | 16 | * First version 17 | 18 | -- Hannu Valtonen Thu, 19 Feb 2015 12:00:00 +0200 19 | -------------------------------------------------------------------------------- /debian/compat: -------------------------------------------------------------------------------- 1 | 7 2 | -------------------------------------------------------------------------------- /debian/control: -------------------------------------------------------------------------------- 1 | Source: pghoard 2 | Maintainer: Hannu Valtonen 3 | Section: python 4 | Priority: optional 5 | Build-Depends: debhelper (>= 8), dh-python, 6 | postgresql-9.3 | postgresql-9.4 | postgresql-9.5 | postgresql-9.6 | postgresql-10 | postgresql-11 | postgresql-12 | postgresql-13, 7 | python3-all (>= 3.3), python3-cryptography (>= 0.8), python3-dateutil, 8 | python3-flake8, python3-psycopg2, python3-pytest, python3-requests, 9 | python3-paramiko, python3-setuptools, python3-snappy 10 | X-Python3-Version: >= 3.3 11 | Standards-Version: 3.9.4 12 | Homepage: https://github.com/aiven/pghoard 13 | 14 | Package: pghoard 15 | Architecture: all 16 | Depends: ${misc:Depends}, ${python3:Depends}, 17 | python3-cryptography (>= 0.8), python3-dateutil, python3-psycopg2, 18 | python3-requests, python3-snappy, postgresql-common, python3-paramiko 19 | Breaks: postgresql-9.3-pgespresso (< 1.2), 20 | postgresql-9.4-pgespresso (< 1.2), postgresql-9.5-pgespresso (< 1.2) 21 | Description: PostgreSQL streaming backup service 22 | PGHoard is a PostgreSQL streaming backup service. Backups are stored in 23 | encrypted and compressed format in a cloud object storage. PGHoard 24 | currently supports Amazon Web Services S3, Google Cloud Storage, OpenStack 25 | Swift and Ceph (using S3 or Swift interfaces with RadosGW.) 26 | Support for Microsoft Azure is experimental. 27 | -------------------------------------------------------------------------------- /debian/copyright: -------------------------------------------------------------------------------- 1 | Format: http://dep.debian.net/deps/dep5 2 | Upstream-Name: pghoard 3 | Source: https://github.com/aiven/pghoard/ 4 | 5 | Files: * 6 | Copyright: 2015 Ohmu Ltd 7 | License: Apache-2.0 8 | 9 | License: Apache-2.0 10 | On Debian GNU/Linux system you can find the complete text of the 11 | Apache 2.0 license in '/usr/share/common-licenses/Apache-2.0'. 12 | -------------------------------------------------------------------------------- /debian/docs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aiven-Open/pghoard/2e66fbd28e799d90dc850597a0dde117d363325b/debian/docs -------------------------------------------------------------------------------- /debian/pghoard.postinst: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | #DEBHELPER# 6 | 7 | if [ "$1" = configure ]; then 8 | chown postgres:postgres /var/lib/pghoard 9 | exit 0 10 | fi 11 | 12 | exit 0 13 | -------------------------------------------------------------------------------- /debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | 3 | export PYBUILD_NAME=pghoard 4 | 5 | %: 6 | dh $@ --with python3 --buildsystem=pybuild 7 | 8 | override_dh_install: 9 | dh_install 10 | install -d debian/pghoard/lib/systemd/system/ 11 | install -m 644 pghoard.unit debian/pghoard/lib/systemd/system/pghoard.service 12 | mkdir -p debian/pghoard/var/lib/pghoard 13 | chown -R postgres:postgres debian/pghoard/var/lib/pghoard 14 | 15 | override_dh_auto_test: 16 | $(MAKE) lint unittest 17 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _build/ 2 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/about.rst: -------------------------------------------------------------------------------- 1 | About PGHoard 2 | ============= 3 | 4 | Features 5 | -------- 6 | 7 | * Automatic periodic basebackups 8 | * Automatic transaction log (WAL/xlog) backups (using either ``pg_receivewal`` 9 | (formerly ``pg_receivexlog``), ``archive_command`` or experimental PG native 10 | replication protocol support with ``walreceiver``) 11 | * Optional Standalone Hot Backup support 12 | * Cloud object storage support (AWS S3, Google Cloud, OpenStack Swift, Azure, Ceph) 13 | * Backup restoration directly from object storage, compressed and encrypted 14 | * Point-in-time-recovery (PITR) 15 | * Initialize a new standby from object storage backups, automatically configured as 16 | a replicating hot-standby 17 | 18 | Fault-resilience and monitoring 19 | ------------------------------- 20 | 21 | * Persists over temporary object storage connectivity issues by retrying transfers 22 | * Verifies WAL file headers before upload (backup) and after download (restore), 23 | so that e.g. files recycled by PostgreSQL are ignored 24 | * Automatic history cleanup (backups and related WAL files older than N days) 25 | * "Archive sync" tool for detecting holes in WAL backup streams and fixing them 26 | * "Archive cleanup" tool for deleting obsolete WAL files from the archive 27 | * Keeps statistics updated in a file on disk (for monitoring tools) 28 | * Creates alert files on disk on problems (for monitoring tools) 29 | 30 | 31 | Performance 32 | ----------- 33 | 34 | * Parallel compression and encryption 35 | * WAL pre-fetching on restore 36 | -------------------------------------------------------------------------------- /docs/architecture.rst: -------------------------------------------------------------------------------- 1 | Architecture 2 | ============ 3 | 4 | PostgreSQL Point In Time Replication (PITR) consists of having a database 5 | basebackup and changes after that point go into WAL log files that can be 6 | replayed to get to the desired replication point. 7 | 8 | PGHoard runs as a daemon which will be responsible for performing the main 9 | tasks of a backup tool for PostgreSQL: 10 | 11 | * Taking periodical basebackups 12 | * Archiving the WAL 13 | * Managing backup retention according to a policy. 14 | 15 | Basebackup 16 | ---------- 17 | 18 | The basebackups are taken by the pghoard daemon directly, with no need for an 19 | external scheduler / crond. 20 | 21 | When pghoard is first launched, it will take a basebackup. After that, the 22 | frequency of basebackups is determined by configuration files. 23 | 24 | Those basebackups can be taken in one of two ways: 25 | 26 | * Either by copying the files directly from ``PGDATA``, using the 27 | ``local-tar`` or ``delta`` modes 28 | * By calling ``pg_basebackup``, using the ``basic`` or ``pipe`` modes. 29 | 30 | See :ref:`configuration_basebackup` for how to configure it. 31 | 32 | Archiving 33 | --------- 34 | 35 | PGHoard supports multiple operating models. If you don't want to modify the 36 | backuped server archiving configuration, or install anything particular on that 37 | server, ``pghoard`` can fetch the WAL using ``pg_receivewal`` (formerly ``pg_receivexlog`` on PostgreSQL < 10). 38 | It also provides its own replication client replacing ``pg_receivewal``, using 39 | the ``walreceiver`` mode. This mode is currently experimental. 40 | 41 | PGHoard also supports a traditional ``archive_command`` in the form of the 42 | ``pghoard_postgres_command`` utility. 43 | 44 | 45 | See :ref:`configuration_archiving` for how to configure it. 46 | 47 | Retention 48 | --------- 49 | 50 | ``pghoard`` expires the backups according to the configured retention policy. 51 | Whenever there is more than the specified number of backups, older backups will 52 | be removed as well as their associated WAL files. 53 | 54 | Compression and encryption 55 | -------------------------- 56 | 57 | The PostgreSQL write-ahead log (WAL) and basebackups are compressed with 58 | Snappy (default) in order to ensure good compression speed and relatively small backup size. for more information. Zstandard or LZMA encryption is also available. See :ref:`configuration_compression` for more information. 59 | 60 | Encryption is not enabled by default, but PGHoard can encrypt backuped data at 61 | rest. Each individual file is encrypted and authenticated with file specific 62 | keys. The file specific keys are included in the backup in turn encrypted with 63 | a master RSA private/public key pair. 64 | 65 | You should follow the encryption section in the quickstart guide :ref:`quickstart_encryption`. For a full reference see :ref:`configuration_encryption`. 66 | 67 | 68 | Deployment examples 69 | ------------------- 70 | 71 | FIXME: add schemas showing a deployment of pghoard on the same host with 72 | -------------------------------------------------------------------------------- /docs/commands.rst: -------------------------------------------------------------------------------- 1 | Commands 2 | ======== 3 | 4 | 5 | pghoard 6 | ------- 7 | 8 | ``pghoard`` is the main daemon process that should be run under a service 9 | manager, such as ``systemd`` or ``supervisord``. It handles the backup of 10 | the configured sites. 11 | 12 | .. code-block:: 13 | 14 | usage: pghoard [-h] [-D] [--version] [-s] [--config CONFIG] [config_file] 15 | 16 | postgresql automatic backup daemon 17 | 18 | positional arguments: 19 | config_file configuration file path (for backward compatibility) 20 | 21 | optional arguments: 22 | -h, --help show this help message and exit 23 | -D, --debug Enable debug logging 24 | --version show program version 25 | -s, --short-log use non-verbose logging format 26 | --config CONFIG configuration file path 27 | 28 | 29 | .. _commands_restore: 30 | 31 | pghoard_restore 32 | --------------- 33 | 34 | ``pghoard_restore`` is a command line tool that can be used to restore a 35 | previous database backup from either ``pghoard`` itself or from one of the 36 | supported object stores. ``pghoard_restore`` can also configure 37 | ``recovery.conf`` to use ``pghoard_postgres_command`` as the WAL 38 | ``restore_command`` in ``recovery.conf``. 39 | 40 | 41 | .. code-block:: 42 | 43 | usage: pghoard_restore [-h] [-D] [--status-output-file STATUS_OUTPUT_FILE] [--version] 44 | {list-basebackups-http,list-basebackups,get-basebackup} ... 45 | 46 | positional arguments: 47 | list-basebackups-http 48 | List available basebackups from a HTTP source 49 | list-basebackups 50 | List basebackups from an object store 51 | get-basebackup 52 | Download a basebackup from an object store 53 | 54 | 55 | -h, --help show this help message and exit 56 | -D, --debug Enable debug logging 57 | --status-output-file STATUS_OUTPUT_FILE 58 | Filename for status output JSON 59 | --version show program version 60 | 61 | pghoard_archive_cleanup 62 | ----------------------- 63 | 64 | ``pghoard_archive_cleanup`` can be used to clean up any orphan WAL files 65 | from the object store. After the configured number of basebackups has been 66 | exceeded (configuration key ``basebackup_count``), ``pghoard`` deletes the 67 | oldest basebackup and all WAL associated with it. Transient object storage 68 | failures and other interruptions can cause the WAL deletion process to leave 69 | orphan WAL files behind, they can be deleted with this tool. 70 | 71 | .. code-block:: 72 | 73 | usage: pghoard_archive_cleanup [-h] [--version] [--site SITE] [--config CONFIG] [--dry-run] 74 | 75 | 76 | -h, --help show this help message and exit 77 | --version show program version 78 | --site SITE pghoard site 79 | --config CONFIG pghoard config file 80 | --dry-run only list redundant segments and calculate total file size but do not delete 81 | 82 | 83 | pghoard_archive_sync 84 | -------------------- 85 | 86 | ``pghoard_archive_sync`` can be used to see if any local files should 87 | be archived but haven't been or if any of the archived files have unexpected 88 | content and need to be archived again. The other usecase it has is to determine 89 | if there are any gaps in the required files in the WAL archive 90 | from the current WAL file on to to the latest basebackup's first WAL file. 91 | 92 | .. code-block:: 93 | 94 | usage: pghoard_archive_sync [-h] [-D] [--version] [--site SITE] [--config CONFIG] 95 | [--max-hash-checks MAX_HASH_CHECKS] [--no-verify] [--create-new-backup-on-failure] 96 | 97 | 98 | -h, --help show this help message and exit 99 | -D, --debug Enable debug logging 100 | --version show program version 101 | --site SITE pghoard site 102 | --config CONFIG pghoard config file 103 | --max-hash-checks MAX_HASH_CHECKS 104 | Maximum number of files for which to validate hash in addition to basic existence check 105 | --no-verify do not verify archive integrity 106 | --create-new-backup-on-failure 107 | request a new basebackup if verification fails 108 | 109 | pghoard_create_keys 110 | ------------------- 111 | 112 | ``pghoard_create_keys`` can be used to generate and output encryption keys 113 | in the ``pghoard`` configuration format. 114 | 115 | ``pghoard_postgres_command`` is a command line tool that can be used as 116 | PostgreSQL's ``archive_command`` or ``recovery_command``. It communicates with 117 | ``pghoard`` 's locally running webserver to let it know there's a new file that 118 | needs to be compressed, encrypted and stored in an object store (in archive 119 | mode) or its inverse (in restore mode.) 120 | 121 | .. code-block:: 122 | 123 | 124 | usage: pghoard_create_keys [-h] [-D] [--version] [--site SITE] --key-id KEY_ID [--bits BITS] [--config CONFIG] 125 | 126 | -h, --help show this help message and exit 127 | -D, --debug Enable debug logging 128 | --version show program version 129 | --site SITE backup site 130 | --key-id KEY_ID key alias as used with encryption_key_id configuration directive 131 | --bits BITS length of the generated key in bits, default 3072 132 | --config CONFIG configuration file to store the keys in 133 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | sys.path.insert(0, os.path.abspath('..')) 16 | from version import get_project_version 17 | 18 | 19 | # -- Project information ----------------------------------------------------- 20 | 21 | project = 'PGHoard' 22 | copyright = '2021, Aiven' 23 | author = 'Aiven' 24 | 25 | # The full version, including alpha/beta/rc tags 26 | release = get_project_version('pghoard/version.py') 27 | 28 | # -- General configuration --------------------------------------------------- 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = [ 34 | "sphinx_rtd_theme" 35 | ] 36 | 37 | # Add any paths that contain templates here, relative to this directory. 38 | templates_path = ['_templates'] 39 | 40 | # List of patterns, relative to source directory, that match files and 41 | # directories to ignore when looking for source files. 42 | # This pattern also affects html_static_path and html_extra_path. 43 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 44 | 45 | 46 | # -- Options for HTML output ------------------------------------------------- 47 | 48 | # The theme to use for HTML and HTML Help pages. See the documentation for 49 | # a list of builtin themes. 50 | # 51 | html_theme = 'sphinx_rtd_theme' 52 | 53 | # Add any paths that contain custom static files (such as style sheets) here, 54 | # relative to this directory. They are copied after the builtin static files, 55 | # so a file named "default.css" will overwrite the builtin "default.css". 56 | html_static_path = ['_static'] 57 | -------------------------------------------------------------------------------- /docs/development.rst: -------------------------------------------------------------------------------- 1 | Development 2 | =========== 3 | 4 | Requirements 5 | ------------ 6 | 7 | PGHoard can backup and restore PostgreSQL versions 9.3 and above, but is 8 | only tested and actively developed with version 10 and above. 9 | 10 | The daemon is implemented in Python and is tested and developed with version 11 | 3.6 and above. The following Python modules are required: 12 | 13 | The following Python modules are required: 14 | 15 | * psycopg2_ to look up transaction log metadata 16 | * requests_ for the internal client-server architecture 17 | 18 | .. _`psycopg2`: http://initd.org/psycopg/ 19 | .. _`requests`: http://www.python-requests.org/en/latest/ 20 | 21 | Optional requirements include: 22 | 23 | * azure_ for Microsoft Azure object storage (patched version required, see link) 24 | * botocore_ for AWS S3 (or Ceph-S3) object storage 25 | * google-api-client_ for Google Cloud object storage 26 | * cryptography_ for backup encryption and decryption (version 0.8 or newer required) 27 | * snappy_ for Snappy compression and decompression 28 | * zstandard_ for Zstandard (zstd) compression and decompression 29 | * systemd_ for systemd integration 30 | * swiftclient_ for OpenStack Swift object storage 31 | * paramiko_ for sftp object storage 32 | 33 | .. _`azure`: https://github.com/aiven/azure-sdk-for-python/tree/aiven/rpm_fixes 34 | .. _`botocore`: https://github.com/boto/botocore 35 | .. _`google-api-client`: https://github.com/google/google-api-python-client 36 | .. _`cryptography`: https://cryptography.io/ 37 | .. _`snappy`: https://github.com/andrix/python-snappy 38 | .. _`zstandard`: https://github.com/indygreg/python-zstandard 39 | .. _`systemd`: https://github.com/systemd/python-systemd 40 | .. _`swiftclient`: https://github.com/openstack/python-swiftclient 41 | .. _`paramiko`: https://github.com/paramiko/paramiko 42 | 43 | Developing and testing PGHoard also requires the following utilities: 44 | flake8_, pylint_ and pytest_. 45 | 46 | .. _`flake8`: https://flake8.readthedocs.io/ 47 | .. _`pylint`: https://www.pylint.org/ 48 | .. _`pytest`: http://pytest.org/ 49 | 50 | PGHoard has been developed and tested on modern Linux x86-64 systems, but 51 | should work on other platforms that provide the required modules. 52 | 53 | Vagrant 54 | ======= 55 | 56 | The Vagrantfile can be used to setup a vagrant development environment. The vagrant environment has 57 | python 3.10, 3.11 and 3.12 virtual environments and installations of postgresql 12, 13, 14, 15, 16 and 17. 58 | 59 | By default vagrant up will start a Virtualbox environment. The Vagrantfile will also work for libvirt, just prefix 60 | ``VAGRANT_DEFAULT_PROVIDER=libvirt`` to the ``vagrant up`` command. 61 | 62 | Any combination of Python (3.10, 3.11 and 3.12) and Postgresql (12, 13, 14, 15, 16 and 17) 63 | 64 | Bring up vagrant instance and connect via ssh:: 65 | 66 | vagrant up 67 | vagrant ssh 68 | vagrant@ubuntu2004:~$ cd /vagrant 69 | 70 | Test with Python 3.11 and Postgresql 12:: 71 | 72 | vagrant@ubuntu2004:~$ source ~/venv3.11/bin/activate 73 | vagrant@ubuntu2004:~$ PG_VERSION=12 make unittest 74 | vagrant@ubuntu2004:~$ deactivate 75 | 76 | Test with Python 3.12 and Postgresql 13:: 77 | 78 | vagrant@ubuntu2004:~$ source ~/venv3.12/bin/activate 79 | vagrant@ubuntu2004:~$ PG_VERSION=13 make unittest 80 | vagrant@ubuntu2004:~$ deactivate 81 | 82 | And so on 83 | 84 | .. _building_from_source: 85 | 86 | Building 87 | -------- 88 | 89 | To build an installation package for your distribution, go to the root 90 | directory of a PGHoard Git checkout and run: 91 | 92 | Debian:: 93 | 94 | make deb 95 | 96 | This will produce a ``.deb`` package into the parent directory of the Git 97 | checkout. 98 | 99 | Fedora:: 100 | 101 | make rpm 102 | 103 | This will produce a ``.rpm`` package usually into ``rpm/RPMS/noarch/``. 104 | 105 | Python/Other:: 106 | 107 | python setup.py bdist_egg 108 | 109 | This will produce an egg file into a dist directory within the same folder. 110 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. PGHoard documentation index file, created by 2 | sphinx-quickstart on Tue Jul 27 13:52:50 2021. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | PGHoard 7 | ======= 8 | 9 | .. |BuildStatus| image:: https://github.com/aiven/pghoard/actions/workflows/build.yml/badge.svg?branch=main 10 | .. _BuildStatus: https://github.com/aiven/pghoard/actions 11 | 12 | 13 | ``pghoard`` is a PostgreSQL backup daemon and restore tooling that stores backup data in cloud object stores. 14 | 15 | .. toctree:: 16 | :maxdepth: 2 17 | :caption: Contents: 18 | 19 | about 20 | quickstart 21 | architecture 22 | install 23 | commands 24 | monitoring 25 | configuration 26 | development 27 | 28 | License 29 | ======= 30 | 31 | PGHoard is licensed under the Apache License, Version 2.0. Full license text 32 | is available in the ``LICENSE`` file and at 33 | http://www.apache.org/licenses/LICENSE-2.0.txt 34 | 35 | 36 | Credits 37 | ======= 38 | 39 | PGHoard was created by Hannu Valtonen for 40 | `Aiven`_ and is now maintained by Aiven developers . 41 | 42 | .. _`Aiven`: https://aiven.io/ 43 | 44 | Recent contributors are listed on the GitHub project page, 45 | https://github.com/aiven/pghoard/graphs/contributors 46 | 47 | 48 | Contact 49 | ======= 50 | 51 | Bug reports and patches are very welcome, please post them as GitHub issues 52 | and pull requests at https://github.com/aiven/pghoard . Any possible 53 | vulnerabilities or other serious issues should be reported directly to the 54 | maintainers . 55 | 56 | 57 | Copyright 58 | ========= 59 | 60 | Copyright (C) 2015 Aiven Ltd 61 | -------------------------------------------------------------------------------- /docs/install.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | To run ``PGHoard`` you need to install it, and configure PostgreSQL according 5 | to the modes of backup and archiving you chose. 6 | 7 | This section only describes how to install it using a package manager. 8 | See :ref:`building_from_source` for other installation methods. 9 | 10 | 11 | .. _installation_package: 12 | 13 | Installation from your distribution package manager 14 | --------------------------------------------------- 15 | 16 | RHEL 17 | ++++ 18 | 19 | FIXME: the RPM package seems to be available on yum.postgresql.org. Write a 20 | proper documentation for that. 21 | 22 | Debian 23 | ++++++ 24 | 25 | FIXME: can the package be included in apt.postgresql.org ? doesn't seem to be 26 | the case for now. 27 | 28 | 29 | 30 | Installation from pip 31 | --------------------- 32 | 33 | You can also install it using pip: 34 | 35 | ``pip install pghoard`` 36 | 37 | FIXME: version of pghoard on pypi isn't up to date. 38 | 39 | 40 | .. _installation_postgresql_configuration: 41 | 42 | PostgreSQL Configuration 43 | ======================== 44 | 45 | PostgreSQL should be configured to allow replication connections, and have a 46 | high enough ``wal_level``. 47 | 48 | wal_level 49 | --------- 50 | 51 | ``wal_level`` should be set to at least ``replica`` (or ``archive`` for 52 | PostgreSQL versions prior to 9.6). 53 | 54 | .. note:: Changing ``wal_level`` requires restarting PostgreSQL. 55 | 56 | 57 | Replication connections 58 | ----------------------- 59 | 60 | If you use the one of the non-local basebackup strategies (``basic`` or 61 | ``pipe``), you will need to allow ``pg_basebackup`` to connect using a 62 | replication connection. 63 | 64 | Additionally, if you use a WAL-streaming archiving mode (``pg_receivexlog`` or 65 | ``walreceiver``) you will need another replication connection for those. 66 | 67 | The parameter ``max_wal_senders`` must then be setup accordingly to allow for 68 | at least that number of connections. You should of course take into account the 69 | other replication connections that you may need, for one or several replicas. 70 | 71 | Example:: 72 | 73 | max_wal_senders = 4 74 | 75 | .. note:: Changing ``max_wal_senders`` requires restrating PostgreSQL 76 | 77 | You also need a PostgreSQL user account with the ``REPLICATION`` attribute, 78 | using psql:: 79 | 80 | -- create the user 81 | CREATE USER pghoard REPLICATION; 82 | -- Setup a password for the pghoard user 83 | \password pghoard 84 | 85 | This user will need to be allowed to connect. For this you will need to edit 86 | the ``pg_hba.conf`` file on your PostgreSQL cluster. 87 | 88 | For example:: 89 | 90 | # TYPE DATABASE USER ADDRESS METHOD 91 | host replication pghoard 127.0.0.1/32 md5 92 | 93 | .. note:: See `PostgreSQL documentation `_ for 94 | more information 95 | 96 | After editing, please reload the configuration with either:: 97 | 98 | SELECT pg_reload_conf(); 99 | 100 | or by using your distribution service manager (ex: ``systemctl reload 101 | postgresql``) 102 | 103 | Now you can move on to :ref:`configuration` for how to setup PGHoard.: 104 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/monitoring.rst: -------------------------------------------------------------------------------- 1 | Monitoring 2 | ========== 3 | 4 | Any backup tool must be properly monitored to ensure backups are correctly 5 | performed. 6 | 7 | ``pghoard`` provides several ways to monitor it. 8 | 9 | 10 | .. note:: 11 | In addition to monitoring, the restore process should be tested regularly 12 | 13 | Alert files 14 | ----------- 15 | 16 | Alert files are created whenever an error condition that requires human 17 | intervention to solve. You're recommended to add checks for the existence 18 | of these files to your alerting system. 19 | 20 | :authentication_error: 21 | There has been a problem in the authentication of at least one of the 22 | PostgreSQL connections. This usually denotes a wrong username and/or 23 | password. 24 | :configuration_error: 25 | There has been a problem in the authentication of at least one of the 26 | PostgreSQL connections. This usually denotes a missing ``pg_hba.conf`` entry or 27 | incompatible settings in postgresql.conf. 28 | :upload_retries_warning: 29 | Upload of a file has failed more times than 30 | :upload_retries_warning_limit:. Needs human intervention to figure 31 | out why and to delete the alert once the situation has been fixed. 32 | :version_mismatch_error: 33 | Your local PostgreSQL client versions of ``pg_basebackup`` or 34 | ``pg_receivewal`` (formerly ``pg_receive_xlog``) do not match with the servers PostgreSQL version. You 35 | need to update them to be on the same version level. 36 | 37 | :version_unsupported_error: 38 | Server PostgreSQL version is not supported. 39 | 40 | Metrics 41 | ------- 42 | 43 | You can configure ``pghoard`` to send metrics to an external system. Supported 44 | systems are described in :ref:`configuration_logging`. 45 | 46 | FIXME: describe the different metrics and what kind of alert to trigger based on 47 | them. 48 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx_rtd_theme 2 | -------------------------------------------------------------------------------- /golang/pghoard_postgres_command_go.go: -------------------------------------------------------------------------------- 1 | /* 2 | pghoard - archive_command and restore_command for postgresql 3 | 4 | Copyright (c) 2017 Aiven 5 | See LICENSE for details 6 | */ 7 | 8 | package main 9 | 10 | import ( 11 | "errors" 12 | "flag" 13 | "fmt" 14 | "io/ioutil" 15 | "log" 16 | "net/http" 17 | "os" 18 | "path" 19 | "regexp" 20 | "time" 21 | ) 22 | 23 | const PGHOARD_HOST = "127.0.0.1" 24 | const PGHOARD_PORT = 16000 25 | 26 | // When running restore_command PostgreSQL interprets exit codes 1..125 as "file not found errors" signalling 27 | // that there's no such WAL file from which PostgreSQL assumes that we've completed recovery. We never want to 28 | // return such an error code unless we actually got confirmation that the requested file isn't in the backend so 29 | // we try to exit with EXIT_ERROR (255) status whenever we see unexpected errors. Such an error code causes 30 | // PostgreSQL to abort recovery and wait for admin interaction. 31 | // 32 | // The above considerations apply to handling archive_command, but in its case there's no reason for us to ask 33 | // PostgreSQL to abort, we want it to just retry indefinitely so we'll always return a code between 1..125. 34 | // 35 | // Note that EXIT_NOT_FOUND and EXIT_ARCHIVE_FAIL and their error codes are not defined or required by 36 | // PostgreSQL, they're just used for convenience here and to test for differences between various failure 37 | // scenarios (Python exits with status 1 on uncaught exceptions.) 38 | const EXIT_OK = 0 39 | const EXIT_RESTORE_FAIL = 2 40 | const EXIT_ARCHIVE_FAIL = 3 41 | const EXIT_NOT_FOUND = 4 42 | const EXIT_ABORT = 255 43 | 44 | func main() { 45 | rc, err := run() 46 | if err != nil { 47 | fmt.Fprintf(os.Stderr, "error: %v\n", err) 48 | } 49 | os.Exit(rc) 50 | } 51 | 52 | func run() (int, error) { 53 | verPtr := flag.Bool("version", false, "show program version") 54 | hostPtr := flag.String("host", PGHOARD_HOST, "pghoard service host") 55 | portPtr := flag.Int("port", PGHOARD_PORT, "pghoard service port") 56 | usernamePtr := flag.String("username", "", "pghoard service username") 57 | passwordPtr := flag.String("password", "", "pghoard service password") 58 | sitePtr := flag.String("site", "", "pghoard backup site") 59 | xlogPtr := flag.String("xlog", "", "xlog file name") 60 | outputPtr := flag.String("output", "", "output file") 61 | modePtr := flag.String("mode", "", "operation mode") 62 | riPtr := flag.Float64("retry-interval", 5.0, "retry interval (seconds)") 63 | 64 | flag.Parse() 65 | 66 | if *verPtr { 67 | fmt.Println("pghoard_postgres_command_go 1.0.0") 68 | return EXIT_OK, nil 69 | } 70 | 71 | if *sitePtr == "" { 72 | return EXIT_ABORT, errors.New("--site flag is required") 73 | } 74 | if *xlogPtr == "" { 75 | return EXIT_ABORT, errors.New("--xlog flag is required") 76 | } 77 | 78 | url := fmt.Sprint("http://", *hostPtr, ":", *portPtr, "/", *sitePtr, "/archive/", *xlogPtr) 79 | 80 | if *modePtr == "archive" { 81 | return archive_command(url) 82 | } else if *modePtr == "restore" { 83 | attempt := 0 84 | retry_seconds := *riPtr 85 | for { 86 | attempt += 1 87 | rc, err := restore_command(url, *outputPtr, *xlogPtr, *usernamePtr, *passwordPtr) 88 | if rc != EXIT_RESTORE_FAIL { 89 | return rc, err 90 | } 91 | if attempt >= 3 { 92 | return EXIT_ABORT, err // see the comment at the top of this file 93 | } 94 | log.Printf("Restoring %s failed: %s; retrying in %g seconds", *xlogPtr, err, retry_seconds) 95 | time.Sleep(time.Duration(retry_seconds) * time.Second) 96 | } 97 | } else { 98 | return EXIT_ABORT, errors.New("--mode must be set to 'archive' or 'restore'") 99 | } 100 | } 101 | 102 | func archive_command(url string) (int, error) { 103 | return EXIT_ABORT, errors.New("archive_command not yet implemented") 104 | } 105 | 106 | func restore_command(url string, output string, xlog string, username string, password string) (int, error) { 107 | var output_path string 108 | var req *http.Request 109 | var err error 110 | 111 | if output == "" { 112 | req, err = http.NewRequest("HEAD", url, nil) 113 | if username != "" && password != "" { 114 | req.SetBasicAuth(username, password) 115 | } 116 | } else { 117 | /* Construct absolute path for output - postgres calls this command with a relative path to its xlog 118 | directory. Note that os.path.join strips preceding components if a new components starts with a 119 | slash so it's still possible to use this with absolute paths. */ 120 | if output[0] == '/' { 121 | output_path = output 122 | } else { 123 | cwd, err := os.Getwd() 124 | if err != nil { 125 | return EXIT_ABORT, err 126 | } 127 | output_path = path.Join(cwd, output) 128 | } 129 | xlogNameRe := regexp.MustCompile(`^([A-F0-9]{24}|[A-F0-9]{8}\.history)$`) 130 | if xlogNameRe.MatchString(xlog) { 131 | // if file ".pghoard.prefetch" exists, just move it to destination 132 | xlogPrefetchPath := path.Join(path.Dir(output_path), xlog+".pghoard.prefetch") 133 | _, err = os.Stat(xlogPrefetchPath) 134 | if err == nil { 135 | err := os.Rename(xlogPrefetchPath, output_path) 136 | if err != nil { 137 | return EXIT_ABORT, err 138 | } 139 | return EXIT_OK, nil 140 | } 141 | } 142 | req, err = http.NewRequest("GET", url, nil) 143 | req.Header.Set("x-pghoard-target-path", output_path) 144 | if username != "" && password != "" { 145 | req.SetBasicAuth(username, password) 146 | } 147 | } 148 | 149 | client := &http.Client{} 150 | resp, err := client.Do(req) 151 | if err != nil { 152 | return EXIT_RESTORE_FAIL, err 153 | } 154 | defer resp.Body.Close() 155 | body, err := ioutil.ReadAll(resp.Body) 156 | if err != nil { 157 | return EXIT_RESTORE_FAIL, err 158 | } 159 | 160 | // no output requested, expecting 200 OK for a HEAD request 161 | if output == "" && len(body) == 0 && resp.StatusCode == 200 { 162 | return EXIT_OK, nil 163 | } 164 | 165 | // output requested, expecting 201 Created response 166 | if output != "" && len(body) == 0 && resp.StatusCode == 201 { 167 | return EXIT_OK, nil 168 | } 169 | 170 | /* NOTE: PostgreSQL interprets exit codes 1..125 as "file not found errors" signalling that there's no 171 | such wal file from which PostgreSQL assumes that we've completed recovery so we never want to return 172 | such an error code unless we actually got confirmation that the file isn't in the backend. */ 173 | if resp.StatusCode == 404 { 174 | log.Printf("%s not found from archive", url) 175 | return EXIT_NOT_FOUND, nil 176 | } 177 | return EXIT_ABORT, fmt.Errorf("Restore failed with HTTP status %d: %s", resp.StatusCode, string(body)) 178 | } 179 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | python_version = 3.9 3 | warn_redundant_casts = True 4 | 5 | 6 | [mypy-azure.*] 7 | ignore_missing_imports = True 8 | 9 | [mypy-azure.common.*] 10 | ignore_missing_imports = True 11 | 12 | [mypy-dataclasses.*] 13 | ignore_missing_imports = True 14 | 15 | [mypy-googleapiclient.*] 16 | ignore_missing_imports = True 17 | 18 | [mypy-oauth2client.*] 19 | ignore_missing_imports = True 20 | 21 | [mypy-py] 22 | ignore_missing_imports = True 23 | 24 | [mypy-rohmu.*] 25 | ignore_missing_imports = True 26 | 27 | [mypy-snappy.*] 28 | ignore_missing_imports = True 29 | 30 | [mypy-swiftclient.*] 31 | ignore_missing_imports = True 32 | 33 | [mypy-systemd.*] 34 | ignore_missing_imports = True 35 | 36 | -------------------------------------------------------------------------------- /pghoard-local-minimal.json: -------------------------------------------------------------------------------- 1 | { 2 | "backup_location": "./metadata", 3 | "backup_sites": { 4 | "example-site": { 5 | "nodes": [ 6 | { 7 | "host": "127.0.0.1", 8 | "password": "secret", 9 | "port": 5432, 10 | "user": "backup" 11 | } 12 | ], 13 | "object_storage": { 14 | "storage_type": "local", 15 | "directory": "./backups" 16 | }, 17 | "pg_data_directory": "/path/where/my/pgdata/resides" 18 | } 19 | } 20 | } 21 | 22 | -------------------------------------------------------------------------------- /pghoard.json: -------------------------------------------------------------------------------- 1 | { 2 | "backup_location": "/var/lib/pghoard", 3 | "backup_sites": { 4 | "default": { 5 | "active_backup_mode": "pg_receivexlog", 6 | "basebackup_count": 2, 7 | "basebackup_interval_hours": 24, 8 | "nodes": [ 9 | { 10 | "host": "127.0.0.1", 11 | "password": "example", 12 | "port": 5432, 13 | "user": "replication" 14 | } 15 | ], 16 | "object_storage": { 17 | "aws_access_key_id": "xxx", 18 | "aws_secret_access_key": "ASDF", 19 | "bucket_name": "pgbackups", 20 | "region": "eu-central-1", 21 | "storage_type": "s3" 22 | }, 23 | "pg_data_directory": "/path/where/my/pgdata/resides", 24 | "pg_bin_directory": "/usr/bin" 25 | } 26 | }, 27 | "tar_executable": "tar", 28 | "http_address": "127.0.0.1", 29 | "http_port": 16000, 30 | "log_level": "INFO", 31 | "syslog": false, 32 | "syslog_address": "/dev/log", 33 | "syslog_facility": "local2" 34 | } 35 | -------------------------------------------------------------------------------- /pghoard.spec: -------------------------------------------------------------------------------- 1 | Name: pghoard 2 | Version: %{major_version} 3 | Release: %{minor_version}%{?dist} 4 | Url: http://github.com/aiven/pghoard 5 | Summary: PostgreSQL streaming backup service 6 | License: ASL 2.0 7 | Source0: pghoard-rpm-src.tar 8 | Requires: python3-botocore 9 | Requires: python3-cryptography >= 0.8 10 | Requires: python3-dateutil 11 | Requires: python3-psycopg2 12 | Requires: python3-pydantic 13 | Requires: python3-requests 14 | Requires: python3-snappy 15 | Requires: python3-zstandard 16 | Requires: systemd 17 | Conflicts: pgespresso93 < 1.2, pgespresso94 < 1.2, pgespresso95 < 1.2 18 | BuildRequires: golang 19 | BuildRequires: python3-devel 20 | BuildRequires: python3-flake8 21 | BuildRequires: python3-pylint 22 | BuildRequires: python3-pytest 23 | BuildRequires: systemd 24 | 25 | %undefine _missing_build_ids_terminate_build 26 | %define debug_package %{nil} 27 | 28 | %description 29 | PGHoard is a PostgreSQL streaming backup service. Backups are stored in 30 | encrypted and compressed format in a cloud object storage. PGHoard 31 | currently supports Amazon Web Services S3, Google Cloud Storage, OpenStack 32 | Swift and Ceph (using S3 or Swift interfaces with RadosGW.) 33 | Support for Microsoft Azure is experimental. 34 | 35 | 36 | %{?python_disable_dependency_generator} 37 | 38 | 39 | %prep 40 | %setup -q -n pghoard 41 | 42 | 43 | %build 44 | go build golang/pghoard_postgres_command_go.go 45 | 46 | 47 | %install 48 | sed -e s,pghoard_postgres_command,pghoard_postgres_command_go,g -i pghoard/restore.py 49 | python3 setup.py install --prefix=%{_prefix} --root=%{buildroot} 50 | sed -e "s@#!/bin/python@#!%{_bindir}/python@" -i %{buildroot}%{_bindir}/* 51 | %{__install} -Dm0644 pghoard.unit %{buildroot}%{_unitdir}/pghoard.service 52 | cp -a pghoard_postgres_command_go %{buildroot}%{_bindir} 53 | 54 | 55 | %check 56 | make test 57 | 58 | %files 59 | %defattr(-,root,root,-) 60 | %doc LICENSE README.rst pghoard.json 61 | %{_bindir}/pghoard* 62 | %{_unitdir}/pghoard.service 63 | %{python3_sitelib}/* 64 | 65 | 66 | %changelog 67 | * Tue Feb 11 2020 Tapio Oikarinen - 2.1.1 68 | - Security fix for gnutaremu 69 | 70 | * Tue Sep 5 2017 Oskari Saarenmaa - 1.4.0 71 | - Add pghoard_postgres_command_go 72 | 73 | * Tue Jul 26 2016 Oskari Saarenmaa - 1.4.0 74 | - Conflict with pgespresso < 1.2: older versions crash PostgreSQL 75 | when tablespaces are used 76 | 77 | * Mon Dec 14 2015 Oskari Saarenmaa - 0.9.0 78 | - We're Python 3 only now 79 | 80 | * Wed Mar 25 2015 Oskari Saarenmaa - 0.9.0 81 | - Build a single package using Python 3 if possible, Python 2 otherwise 82 | 83 | * Thu Feb 26 2015 Oskari Saarenmaa - 0.9.0 84 | - Refactored 85 | 86 | * Thu Feb 19 2015 Hannu Valtonen - 0.9.0 87 | - Initial RPM package spec 88 | -------------------------------------------------------------------------------- /pghoard.unit: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=PostgreSQL streaming backup service 3 | 4 | [Service] 5 | User=postgres 6 | Group=postgres 7 | Type=notify 8 | Restart=always 9 | ExecStart=/usr/bin/pghoard /var/lib/pghoard/pghoard.json 10 | ExecReload=/bin/kill -HUP $MAINPID 11 | WorkingDirectory=/var/lib/pghoard 12 | 13 | [Install] 14 | WantedBy=multi-user.target 15 | -------------------------------------------------------------------------------- /pghoard/.gitignore: -------------------------------------------------------------------------------- 1 | /version.py 2 | -------------------------------------------------------------------------------- /pghoard/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | pghoard 3 | 4 | Copyright (c) 2016 Ohmu Ltd 5 | See LICENSE for details 6 | """ 7 | from . import mapping, monitoring 8 | -------------------------------------------------------------------------------- /pghoard/__main__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from . import pghoard 4 | 5 | sys.exit(pghoard.main()) 6 | -------------------------------------------------------------------------------- /pghoard/archive_cleanup.py: -------------------------------------------------------------------------------- 1 | """ 2 | pghoard: clean up unused WAL from archive 3 | 4 | Copyright (c) 2017 Ohmu Ltd 5 | See LICENSE for details 6 | """ 7 | import argparse 8 | import logging 9 | import os 10 | import sys 11 | 12 | from rohmu import get_transfer 13 | from rohmu.errors import (FileNotFoundFromStorageError, InvalidConfigurationError) 14 | 15 | from pghoard import common, config, logutil, version 16 | 17 | 18 | class ArchiveCleanup: 19 | def __init__(self): 20 | self.log = logging.getLogger(self.__class__.__name__) 21 | self.config = None 22 | self.site = None 23 | self.backup_site = None 24 | self.storage = None 25 | 26 | def set_config(self, config_file, site): 27 | self.config = config.read_json_config_file(config_file, check_commands=False, check_pgdata=False) 28 | self.site = config.get_site_from_config(self.config, site) 29 | self.backup_site = self.config["backup_sites"][self.site] 30 | storage_config = common.get_object_storage_config(self.config, self.site) 31 | self.storage = get_transfer(storage_config) 32 | 33 | def archive_cleanup(self, dry_run): 34 | basebackup_path = os.path.join(self.backup_site["prefix"], "basebackup") 35 | xlog_path = os.path.join(self.backup_site["prefix"], "xlog") 36 | basebackups = self.storage.list_path(basebackup_path) 37 | first_required_wal = min(bb["metadata"]["start-wal-segment"] for bb in basebackups) 38 | self.log.info("First required WAL segment is %r", first_required_wal) 39 | total_bytes = 0 40 | for object_info in self.storage.list_iter(xlog_path, with_metadata=False): 41 | object_name = object_info["name"] 42 | segment = os.path.basename(object_name) 43 | if segment < first_required_wal: 44 | self.log.info("Orphan WAL segment %r needs to be deleted", segment) 45 | if "size" in object_info: 46 | total_bytes += int(object_info["size"]) 47 | if not dry_run: 48 | try: 49 | self.storage.delete_key(object_name) 50 | except FileNotFoundFromStorageError: 51 | self.log.error("Storage report segment %r is not available", segment) 52 | self.log.info("Total orphan WAL segments size is %s bytes", total_bytes) 53 | 54 | def run(self, args=None): 55 | parser = argparse.ArgumentParser() 56 | parser.add_argument("--version", action="version", help="show program version", version=version.__version__) 57 | parser.add_argument("--site", help="pghoard site", required=False) 58 | parser.add_argument("--config", help="pghoard config file", default=os.environ.get("PGHOARD_CONFIG")) 59 | parser.add_argument( 60 | "--dry-run", 61 | help="only list redundant segments and calculate total file size but do not delete", 62 | required=False, 63 | default=False, 64 | action="store_true" 65 | ) 66 | args = parser.parse_args(args) 67 | 68 | if not args.config: 69 | print("pghoard: config file path must be given with --config or via env PGHOARD_CONFIG") 70 | return 1 71 | 72 | self.set_config(args.config, args.site) 73 | return self.archive_cleanup(args.dry_run) 74 | 75 | 76 | def main(): 77 | logutil.configure_logging(level=logging.INFO) 78 | tool = ArchiveCleanup() 79 | try: 80 | return tool.run() 81 | except KeyboardInterrupt: 82 | print("*** interrupted by keyboard ***") 83 | return 1 84 | except InvalidConfigurationError as ex: 85 | tool.log.error("FATAL: %s: %s", ex.__class__.__name__, ex) 86 | return 1 87 | 88 | 89 | if __name__ == "__main__": 90 | sys.exit(main() or 0) 91 | -------------------------------------------------------------------------------- /pghoard/basebackup/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 Aiven, Helsinki, Finland. https://aiven.io/ 2 | -------------------------------------------------------------------------------- /pghoard/create_keys.py: -------------------------------------------------------------------------------- 1 | """ 2 | pghoard - encryption key generation tool 3 | 4 | Copyright (c) 2016 Ohmu Ltd 5 | See LICENSE for details 6 | """ 7 | import argparse 8 | import json 9 | import logging 10 | import os 11 | import sys 12 | 13 | from cryptography.hazmat.backends import default_backend 14 | from cryptography.hazmat.primitives import serialization 15 | from cryptography.hazmat.primitives.asymmetric import rsa 16 | from rohmu.errors import InvalidConfigurationError 17 | 18 | from . import config, logutil, version 19 | from .common import write_json_file 20 | 21 | 22 | class CommandError(Exception): 23 | pass 24 | 25 | 26 | def create_keys(bits): 27 | rsa_private_key = rsa.generate_private_key(public_exponent=65537, key_size=bits, backend=default_backend()) 28 | rsa_private_key_pem_bin = rsa_private_key.private_bytes( 29 | encoding=serialization.Encoding.PEM, 30 | format=serialization.PrivateFormat.PKCS8, 31 | encryption_algorithm=serialization.NoEncryption() 32 | ) 33 | 34 | rsa_public_key = rsa_private_key.public_key() 35 | rsa_public_key_pem_bin = rsa_public_key.public_bytes( 36 | encoding=serialization.Encoding.PEM, format=serialization.PublicFormat.SubjectPublicKeyInfo 37 | ) 38 | 39 | return rsa_private_key_pem_bin.decode("ascii"), rsa_public_key_pem_bin.decode("ascii") 40 | 41 | 42 | def create_config(site, key_id, rsa_private_key, rsa_public_key): 43 | return { 44 | "backup_sites": { 45 | site: { 46 | "encryption_key_id": key_id, 47 | "encryption_keys": { 48 | key_id: { 49 | "private": rsa_private_key, 50 | "public": rsa_public_key, 51 | } 52 | } 53 | } 54 | } 55 | } 56 | 57 | 58 | def show_key_config(site, key_id, rsa_private_key, rsa_public_key): 59 | if not site: 60 | raise CommandError("Site must be defined if configuration file is not provided") 61 | key_config = create_config(site, key_id, rsa_private_key, rsa_public_key) 62 | print(json.dumps(key_config, indent=4, sort_keys=True)) 63 | 64 | 65 | def save_keys(config_file, site, key_id, rsa_private_key, rsa_public_key): 66 | config_obj = config.read_json_config_file(config_file, check_commands=False, add_defaults=False) 67 | site = config.get_site_from_config(config_obj, site) 68 | site_config = config_obj["backup_sites"][site] 69 | 70 | if key_id in site_config.setdefault("encryption_keys", {}): 71 | raise CommandError("key_id {!r} already defined for site {!r} in {!r}".format(key_id, site, config_file)) 72 | site_config["encryption_keys"][key_id] = { 73 | "private": rsa_private_key, 74 | "public": rsa_public_key, 75 | } 76 | site_config["encryption_key_id"] = key_id 77 | write_json_file(config_file, config_obj) 78 | print("Saved new key_id {!r} for site {!r} in {!r}".format(key_id, site, config_file)) 79 | print( 80 | "NOTE: The pghoard daemon does not require the 'private' key in its configuration file, " 81 | "it can be stored elsewhere to improve security" 82 | ) 83 | 84 | 85 | def main(): 86 | parser = argparse.ArgumentParser() 87 | parser.add_argument("-D", "--debug", help="Enable debug logging", action="store_true") 88 | parser.add_argument("--version", action="version", help="show program version", version=version.__version__) 89 | parser.add_argument("--site", help="backup site", required=False) 90 | parser.add_argument("--key-id", help="key alias as used with encryption_key_id configuration directive", required=True) 91 | parser.add_argument("--bits", help="length of the generated key in bits, default %(default)d", default=3072, type=int) 92 | parser.add_argument("--config", help="configuration file to store the keys in", default=os.environ.get("PGHOARD_CONFIG")) 93 | 94 | args = parser.parse_args() 95 | logutil.configure_logging(level=logging.DEBUG if args.debug else logging.INFO) 96 | 97 | rsa_private_key, rsa_public_key = create_keys(args.bits) 98 | try: 99 | if args.config: 100 | return save_keys(args.config, args.site, args.key_id, rsa_private_key, rsa_public_key) 101 | else: 102 | return show_key_config(args.site, args.key_id, rsa_private_key, rsa_public_key) 103 | except (CommandError, InvalidConfigurationError) as ex: 104 | print("FATAL: {}".format(ex)) 105 | return 1 106 | 107 | 108 | if __name__ == "__main__": 109 | sys.exit(main() or 0) 110 | -------------------------------------------------------------------------------- /pghoard/fetcher.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | import os 3 | import queue 4 | import signal 5 | import threading 6 | import time 7 | 8 | from rohmu import get_transfer 9 | from rohmu.rohmufile import create_sink_pipeline 10 | 11 | from pghoard.common import get_object_storage_config 12 | from pghoard.config import key_lookup_for_site 13 | 14 | 15 | class FileFetchManager: 16 | """Manages (potentially) multiprocessing related assets for fetching file contents from 17 | object storage. If a multiprocess.Manager instance is provided, the fetch is performed 18 | in a subprocess to avoid GIL related performance constraints, otherwise file is fetched 19 | in current process.""" 20 | def __init__(self, app_config, mp_manager, transfer_provider): 21 | self.config = app_config 22 | self.last_activity = time.monotonic() 23 | self.lock = threading.RLock() 24 | self.max_idle_age = 10 * 60 25 | self.mp_manager = mp_manager 26 | self.process = None 27 | self.result_queue = None 28 | self.task_queue = None 29 | self.transfer_provider = transfer_provider 30 | 31 | def check_state(self): 32 | if self.process and time.monotonic() - self.last_activity > self.max_idle_age: 33 | self.stop() 34 | 35 | def fetch_file(self, site, key, target_path): 36 | self.last_activity = time.monotonic() 37 | self._start_process() 38 | if self.mp_manager: 39 | self.task_queue.put((self.config, site, key, target_path)) 40 | result = self.result_queue.get() 41 | if result is None: 42 | # Should only happen if the process is terminated while we're waiting for 43 | # a result, which is pretty much the same as timeout 44 | raise queue.Empty 45 | elif isinstance(result[1], Exception): 46 | raise result[1] 47 | return result[1], result[2] 48 | else: 49 | transfer = self.transfer_provider(site) 50 | return FileFetcher(self.config, transfer).fetch(site, key, target_path) 51 | 52 | def stop(self): 53 | with self.lock: 54 | if not self.process: 55 | return 56 | self.task_queue.put(None) 57 | self.result_queue.put(None) 58 | process = self.process 59 | self.process = None 60 | self.task_queue = None 61 | self.result_queue = None 62 | process.join(timeout=0.1) 63 | if process.exitcode is None: 64 | os.kill(process.pid, signal.SIGKILL) 65 | process.join() 66 | 67 | def _start_process(self): 68 | with self.lock: 69 | if not self.mp_manager or self.process: 70 | return 71 | self.result_queue = self.mp_manager.Queue() 72 | self.task_queue = self.mp_manager.Queue() 73 | self.process = multiprocessing.Process(target=_remote_file_fetch_loop, args=(self.task_queue, self.result_queue)) 74 | self.process.start() 75 | 76 | 77 | class FileFetcher: 78 | """Fetches a file from object storage and strips possible encryption and/or compression away.""" 79 | def __init__(self, app_config, transfer): 80 | self.config = app_config 81 | self.transfer = transfer 82 | 83 | def fetch(self, site, key, target_path): 84 | try: 85 | lookup = key_lookup_for_site(self.config, site) 86 | data, metadata = self.transfer.get_contents_to_string(key) 87 | if isinstance(data, str): 88 | data = data.encode("latin1") 89 | file_size = len(data) 90 | with open(target_path, "wb") as target_file: 91 | output = create_sink_pipeline( 92 | output=target_file, file_size=file_size, metadata=metadata, key_lookup=lookup, throttle_time=0 93 | ) 94 | output.write(data) 95 | return file_size, metadata 96 | except Exception: 97 | if os.path.isfile(target_path): 98 | os.unlink(target_path) 99 | raise 100 | 101 | 102 | def _remote_file_fetch_loop(task_queue, result_queue): 103 | transfers = {} 104 | obj_storage_configs = {} 105 | while True: 106 | task = task_queue.get() 107 | if not task: 108 | return 109 | try: 110 | app_config, site, key, target_path = task 111 | obj_storage_config = get_object_storage_config(app_config, site) 112 | transfer = transfers.get(site) 113 | 114 | # even if we got a transfer for the site 115 | # we should check if there was a change on the site's storage config, in such case 116 | # we must get the correct transfer 117 | if not transfer or obj_storage_configs.get(site, {}) != obj_storage_config: 118 | transfer = get_transfer(obj_storage_config) 119 | transfers[site] = transfer 120 | obj_storage_configs[site] = obj_storage_config 121 | 122 | file_size, metadata = FileFetcher(app_config, transfer).fetch(site, key, target_path) 123 | result_queue.put((task, file_size, metadata)) 124 | except Exception as e: # pylint: disable=broad-except 125 | result_queue.put((task, e)) 126 | -------------------------------------------------------------------------------- /pghoard/logutil.py: -------------------------------------------------------------------------------- 1 | # Copied from https://github.com/ohmu/ohmu_common_py ohmu_common_py/logutil.py version 0.0.1-0-unknown-fa54b44 2 | """ 3 | pghoard - logging formats and utility functions 4 | 5 | Copyright (c) 2015 Ohmu Ltd 6 | See LICENSE for details 7 | """ 8 | 9 | import logging 10 | import logging.handlers 11 | import os 12 | 13 | try: 14 | from systemd import daemon # pylint: disable=no-name-in-module 15 | except ImportError: 16 | daemon = None 17 | 18 | LOG_FORMAT = "%(asctime)s\t%(name)s\t%(threadName)s\t%(levelname)s\t%(message)s" 19 | LOG_FORMAT_SHORT = "%(levelname)s\t%(message)s" 20 | LOG_FORMAT_SYSLOG = "%(name)s %(threadName)s %(levelname)s: %(message)s" 21 | 22 | 23 | def set_syslog_handler(address, facility, logger): 24 | syslog_handler = logging.handlers.SysLogHandler(address=address, facility=facility) 25 | logger.addHandler(syslog_handler) 26 | formatter = logging.Formatter(LOG_FORMAT_SYSLOG) 27 | syslog_handler.setFormatter(formatter) 28 | return syslog_handler 29 | 30 | 31 | def configure_logging(level=logging.DEBUG, short_log=False): 32 | # Are we running under systemd? 33 | if os.getenv("NOTIFY_SOCKET"): 34 | logging.basicConfig(level=level, format=LOG_FORMAT_SYSLOG) 35 | if not daemon: 36 | print("WARNING: Running under systemd but python-systemd not available, " "systemd won't see our notifications") 37 | else: 38 | logging.basicConfig(level=level, format=LOG_FORMAT_SHORT if short_log else LOG_FORMAT) 39 | 40 | 41 | def notify_systemd(status): 42 | if daemon: 43 | daemon.notify(status) 44 | -------------------------------------------------------------------------------- /pghoard/mapping.py: -------------------------------------------------------------------------------- 1 | clients = { 2 | "statsd": ("pghoard.monitoring.statsd", "StatsClient"), 3 | "pushgateway": ("pghoard.monitoring.pushgateway", "PushgatewayClient"), 4 | "prometheus": ("pghoard.monitoring.prometheus", "PrometheusClient"), 5 | } 6 | -------------------------------------------------------------------------------- /pghoard/metrics.py: -------------------------------------------------------------------------------- 1 | """ 2 | Interface for monitoring clients 3 | 4 | """ 5 | import pghoard 6 | 7 | 8 | class Metrics: 9 | def __init__(self, **configs): 10 | self.clients = self._init_clients(configs) 11 | 12 | def _init_clients(self, configs): 13 | clients = {} 14 | 15 | if not isinstance(configs, dict): 16 | return clients 17 | 18 | map_client = pghoard.mapping.clients 19 | for k, config in configs.items(): 20 | if isinstance(config, dict) and k in map_client: 21 | path, classname = map_client[k] 22 | mod = __import__(path, fromlist=[classname]) 23 | klass = getattr(mod, classname) 24 | clients[k] = klass(config) 25 | 26 | return clients 27 | 28 | def gauge(self, metric, value, tags=None): 29 | for client in self.clients.values(): 30 | client.gauge(metric, value, tags) 31 | 32 | def increase(self, metric, inc_value=1, tags=None): 33 | for client in self.clients.values(): 34 | client.increase(metric, inc_value, tags) 35 | 36 | def unexpected_exception(self, ex, where, tags=None): 37 | for client in self.clients.values(): 38 | client.unexpected_exception(ex, where, tags) 39 | -------------------------------------------------------------------------------- /pghoard/monitoring/__init__.py: -------------------------------------------------------------------------------- 1 | import pkgutil 2 | 3 | __path__ = pkgutil.extend_path(__path__, __name__) # type: ignore 4 | for importer, modname, ispkg in pkgutil.walk_packages(path=__path__, prefix=__name__ + "."): 5 | __import__(modname) 6 | -------------------------------------------------------------------------------- /pghoard/monitoring/prometheus.py: -------------------------------------------------------------------------------- 1 | """ 2 | Prometheus client (used to create a Prometheus endpoint) 3 | 4 | """ 5 | 6 | import time 7 | 8 | 9 | class PrometheusClient: 10 | def __init__(self, config): 11 | self._tags = config.get("tags", {}) 12 | self.metrics = {} 13 | 14 | def gauge(self, metric, value, tags=None): 15 | self._update(metric, value, tags) 16 | 17 | def increase(self, metric, inc_value=1, tags=None): 18 | self._update(metric, inc_value, tags) 19 | 20 | def unexpected_exception(self, ex, where, tags=None): 21 | all_tags = { 22 | "exception": ex.__class__.__name__, 23 | "where": where, 24 | } 25 | all_tags.update(tags or {}) 26 | self.increase("pghoard.exception", tags=all_tags) 27 | 28 | def get_metrics(self): 29 | data = [] 30 | for metric, value in self.metrics.items(): 31 | line = "{} {} {}".format(metric, value.get("value"), value.get("ts")) 32 | data.append(line) 33 | return data 34 | 35 | def _update(self, metric, value, tags): 36 | ts = str(int(time.time())) + "000" 37 | 38 | metric = metric.replace(".", "_").replace("-", "_") 39 | tags = {**self._tags, **tags} if tags else {**self._tags} 40 | tag_list = [] 41 | for k in sorted(tags.keys()): 42 | tag_list.append("{}=\"{}\"".format(k, tags[k])) 43 | encoded_tags = "{{{}}}".format(",".join(tag_list)) 44 | formatted_metric = "{}{}".format(metric, encoded_tags) 45 | self.metrics[formatted_metric] = {"value": value, "ts": ts} 46 | -------------------------------------------------------------------------------- /pghoard/monitoring/pushgateway.py: -------------------------------------------------------------------------------- 1 | """ 2 | Prometheus Pushgateway client 3 | 4 | """ 5 | import socket 6 | 7 | import requests 8 | 9 | 10 | class PushgatewayClient: 11 | def __init__(self, config): 12 | self._endpoint = config.get("endpoint", "") 13 | self._job = config.get("job", "pghoard") 14 | self._instance = config.get("instance", "") 15 | self._tags = config.get("tags", {}) 16 | 17 | def gauge(self, metric, value, tags=None): 18 | self._send(metric, "gauge", value, tags) 19 | 20 | def increase(self, metric, inc_value=1, tags=None): 21 | self._send(metric, "counter", inc_value, tags) 22 | 23 | def unexpected_exception(self, ex, where, tags=None): 24 | all_tags = { 25 | "exception": ex.__class__.__name__, 26 | "where": where, 27 | } 28 | all_tags.update(tags or {}) 29 | self.increase("pghoard.exception", tags=all_tags) 30 | 31 | def _send(self, metric, metric_type, value, tags): 32 | tags = {**self._tags, **tags} if tags else {**self._tags} 33 | 34 | if len(self._endpoint) == 0: 35 | return 36 | 37 | instance = self._instance if self._instance else tags.get("site", socket.gethostname()) 38 | 39 | data = self._build_data(metric, metric_type, value, tags) 40 | requests.post("{}/metrics/job/{}/instance/{}".format(self._endpoint, self._job, instance), data=data) 41 | 42 | def _build_data(self, metric, metric_type, value, tags): 43 | metric = metric.replace(".", "_").replace("-", "_") 44 | tag_list = [] 45 | for k, v in sorted(tags.items()): 46 | tag_list.append("{}=\"{}\"".format(k, v)) 47 | 48 | encoded_tags = "{{{}}}".format(",".join(tag_list)) 49 | return """# TYPE {0} {1} 50 | {0}{2} {3} 51 | """.format(metric, metric_type, encoded_tags, value) 52 | -------------------------------------------------------------------------------- /pghoard/monitoring/statsd.py: -------------------------------------------------------------------------------- 1 | """ 2 | StatsD client 3 | 4 | Supports telegraf's statsd protocol extension for 'key=value' tags: 5 | 6 | https://github.com/influxdata/telegraf/tree/master/plugins/inputs/statsd 7 | 8 | """ 9 | import socket 10 | 11 | 12 | class StatsClient: 13 | def __init__(self, config): 14 | self._dest_addr = (config.get("host", "127.0.0.1"), config.get("port", 8125)) 15 | self._socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 16 | self._tags = config.get("tags", {}) 17 | self._message_format = config.get("format", "telegraf") 18 | 19 | def gauge(self, metric, value, tags=None): 20 | self._send(metric, b"g", value, tags) 21 | 22 | def increase(self, metric, inc_value=1, tags=None): 23 | self._send(metric, b"c", inc_value, tags) 24 | 25 | def unexpected_exception(self, ex, where, tags=None): 26 | all_tags = { 27 | "exception": ex.__class__.__name__, 28 | "where": where, 29 | } 30 | all_tags.update(tags or {}) 31 | self.increase("pghoard.exception", tags=all_tags) 32 | 33 | def _send(self, metric, metric_type, value, tags): 34 | if None in self._dest_addr: 35 | # stats sending is disabled 36 | return 37 | 38 | # telegraf format: "user.logins,service=payroll,region=us-west:1|c" 39 | # datadog format: metric.name:value|type|@sample_rate|#tag1:value,tag2 40 | # http://docs.datadoghq.com/guides/dogstatsd/#datagram-format 41 | 42 | parts = [metric.encode("utf-8"), b":", str(value).encode("utf-8"), b"|", metric_type] 43 | send_tags = self._tags.copy() 44 | send_tags.update(tags or {}) 45 | if self._message_format == "datadog": 46 | for index, (tag, val) in enumerate(sorted(send_tags.items())): 47 | if index == 0: 48 | separator = "|#" 49 | else: 50 | separator = "," 51 | if val is None: 52 | pattern = "{}{}" 53 | else: 54 | pattern = "{}{}:{}" 55 | parts.append(pattern.format(separator, tag, val).encode("utf-8")) 56 | else: 57 | for tag, val in reversed(sorted(send_tags.items())): 58 | parts.insert(1, ",{}={}".format(tag, val).encode("utf-8")) 59 | 60 | self._socket.sendto(b"".join(parts), self._dest_addr) 61 | -------------------------------------------------------------------------------- /pghoard/object_store.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2022 Aiven Ltd 3 | See LICENSE for details 4 | """ 5 | import datetime 6 | import logging 7 | import os 8 | from pathlib import Path 9 | from typing import Optional 10 | 11 | from requests import Session 12 | from requests.auth import HTTPBasicAuth 13 | from rohmu import dates 14 | 15 | 16 | class ObjectStore: 17 | def __init__(self, storage, prefix, site, pgdata): 18 | self.storage = storage 19 | self.prefix = prefix 20 | self.site = site 21 | self.pgdata = pgdata 22 | self.log = logging.getLogger(self.__class__.__name__) 23 | 24 | def list_basebackups(self): 25 | return self.storage.list_path(os.path.join(self.prefix, "basebackup")) 26 | 27 | def try_request_backup_preservation(self, basebackup: str, preserve_until: datetime.datetime) -> Optional[str]: 28 | try: 29 | return self.request_backup_preservation(basebackup, preserve_until) 30 | except Exception: # pylint: disable=broad-except 31 | # rohmu does not wrap storage implementation errors in high-level errors: 32 | # we can't catch something more specific like "permission denied". 33 | self.log.exception("Could not request backup preservation") 34 | return None 35 | 36 | def try_cancel_backup_preservation(self, request_name: str) -> None: 37 | try: 38 | self.cancel_backup_preservation(request_name) 39 | except Exception: # pylint: disable=broad-except 40 | # rohmu does not wrap storage implementation errors in high-level errors: 41 | # we can't catch something more specific like "permission denied". 42 | self.log.exception("Could not cancel backup preservation") 43 | 44 | def request_backup_preservation(self, basebackup: str, preserve_until: datetime.datetime) -> str: 45 | backup_name = Path(basebackup).name 46 | request_name = f"{backup_name}_{preserve_until}" 47 | request_path = os.path.join(self.prefix, "preservation_request", request_name) 48 | self.storage.store_file_from_memory( 49 | request_path, b"", { 50 | "preserve-backup": backup_name, 51 | "preserve-until": str(preserve_until) 52 | } 53 | ) 54 | return request_name 55 | 56 | def cancel_backup_preservation(self, request_name: str) -> None: 57 | request_path = os.path.join(self.prefix, "preservation_request", request_name) 58 | self.storage.delete_key(request_path) 59 | 60 | def show_basebackup_list(self, verbose=True): 61 | result = self.list_basebackups() 62 | caption = "Available %r basebackups:" % self.site 63 | print_basebackup_list(result, caption=caption, verbose=verbose) 64 | 65 | def get_basebackup_metadata(self, basebackup): 66 | return self.storage.get_metadata_for_key(basebackup) 67 | 68 | def get_basebackup_file_to_fileobj(self, basebackup, fileobj, *, progress_callback=None): 69 | return self.storage.get_contents_to_fileobj(basebackup, fileobj, progress_callback=progress_callback) 70 | 71 | def get_file_bytes(self, name): 72 | return self.storage.get_contents_to_string(name)[0] 73 | 74 | 75 | class HTTPRestore(ObjectStore): 76 | def __init__(self, host, port, site, pgdata=None, *, username=None, password=None): 77 | super().__init__(storage=None, prefix=None, site=site, pgdata=pgdata) 78 | self.host = host 79 | self.port = port 80 | self.session = Session() 81 | if username and password: 82 | self.session.auth = HTTPBasicAuth(username, password) 83 | 84 | def _url(self, path): 85 | return f"http://{self.host}:{self.port}/{self.site}/{path}" 86 | 87 | def list_basebackups(self): 88 | response = self.session.get(self._url("basebackup")) 89 | return response.json()["basebackups"] 90 | 91 | 92 | def print_basebackup_list(basebackups, *, caption="Available basebackups", verbose=True): 93 | print(caption, "\n") 94 | fmt = "{name:40} {size:>11} {orig_size:>11} {time:20}".format 95 | print(fmt(name="Basebackup", size="Backup size", time="Start time", orig_size="Orig size")) 96 | print(fmt(name="-" * 40, size="-" * 11, time="-" * 20, orig_size="-" * 11)) 97 | for b in sorted(basebackups, key=lambda b: b["name"]): 98 | meta = b["metadata"].copy() 99 | lm = meta.pop("start-time") 100 | if isinstance(lm, str): 101 | lm = dates.parse_timestamp(lm) 102 | if lm.tzinfo: 103 | lm = lm.astimezone(datetime.timezone.utc).replace(tzinfo=None) 104 | lm_str = lm.isoformat()[:19] + "Z" # # pylint: disable=no-member 105 | size_str = "{} MB".format(int(meta.get("total-size-enc", b["size"])) // (1024 ** 2)) 106 | orig_size = int(meta.get("total-size-plain", meta.get("original-file-size")) or 0) 107 | if orig_size: 108 | orig_size_str = "{} MB".format(orig_size // (1024 ** 2)) 109 | else: 110 | orig_size_str = "n/a" 111 | print(fmt(name=b["name"], size=size_str, time=lm_str, orig_size=orig_size_str)) 112 | if verbose: 113 | print(" metadata:", meta) 114 | -------------------------------------------------------------------------------- /pghoard/pgutil.py: -------------------------------------------------------------------------------- 1 | # Copied from https://github.com/ohmu/ohmu_common_py ohmu_common_py/pgutil.py version 0.0.1-0-unknown-fa54b44 2 | """ 3 | pghoard - postgresql utility functions 4 | 5 | Copyright (c) 2015 Ohmu Ltd 6 | See LICENSE for details 7 | """ 8 | from urllib.parse import parse_qs, urlparse 9 | 10 | from psycopg2.extensions import (TRANSACTION_STATUS_ACTIVE, TRANSACTION_STATUS_IDLE, TRANSACTION_STATUS_INTRANS) 11 | 12 | 13 | def create_connection_string(connection_info): 14 | return " ".join("{}='{}'".format(k, str(v).replace("'", "\\'")) for k, v in sorted(connection_info.items())) 15 | 16 | 17 | def mask_connection_info(info): 18 | masked_info = get_connection_info(info) 19 | password = masked_info.pop("password", None) 20 | return "{0}; {1} password".format(create_connection_string(masked_info), "no" if password is None else "hidden") 21 | 22 | 23 | def get_connection_info_from_config_line(line): 24 | _, value = line.split("=", 1) 25 | value = value.strip()[1:-1].replace("''", "'") 26 | return get_connection_info(value) 27 | 28 | 29 | def get_connection_info(info): 30 | """turn a connection info object into a dict or return it if it was a 31 | dict already. supports both the traditional libpq format and the new 32 | url format""" 33 | if isinstance(info, dict): 34 | return info.copy() 35 | elif info.startswith("postgres://") or info.startswith("postgresql://"): 36 | return parse_connection_string_url(info) 37 | else: 38 | return parse_connection_string_libpq(info) 39 | 40 | 41 | def parse_connection_string_url(url): 42 | # drop scheme from the url as some versions of urlparse don't handle 43 | # query and path properly for urls with a non-http scheme 44 | schemeless_url = url.split(":", 1)[1] 45 | p = urlparse(schemeless_url) 46 | fields = {} 47 | if p.hostname: 48 | fields["host"] = p.hostname 49 | if p.port: 50 | fields["port"] = str(p.port) 51 | if p.username: 52 | fields["user"] = p.username 53 | if p.password is not None: 54 | fields["password"] = p.password 55 | if p.path and p.path != "/": 56 | fields["dbname"] = p.path[1:] 57 | for k, v in parse_qs(p.query).items(): 58 | fields[k] = v[-1] 59 | return fields 60 | 61 | 62 | def parse_connection_string_libpq(connection_string): 63 | """parse a postgresql connection string as defined in 64 | http://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING""" 65 | fields = {} 66 | while True: 67 | connection_string = connection_string.strip() 68 | if not connection_string: 69 | break 70 | if "=" not in connection_string: 71 | raise ValueError("expecting key=value format in connection_string fragment {!r}".format(connection_string)) 72 | key, rem = connection_string.split("=", 1) 73 | if rem.startswith("'"): 74 | asis, value = False, "" 75 | for i in range(1, len(rem)): 76 | if asis: 77 | value += rem[i] 78 | asis = False 79 | elif rem[i] == "'": 80 | break # end of entry 81 | elif rem[i] == "\\": 82 | asis = True 83 | else: 84 | value += rem[i] 85 | else: 86 | raise ValueError("invalid connection_string fragment {!r}".format(rem)) 87 | connection_string = rem[i + 1:] # pylint: disable=undefined-loop-variable 88 | else: 89 | res = rem.split(None, 1) 90 | if len(res) > 1: 91 | value, connection_string = res 92 | else: 93 | value, connection_string = rem, "" 94 | fields[key] = value 95 | return fields 96 | 97 | 98 | def check_if_pg_connection_is_alive(db_conn) -> bool: 99 | if db_conn.closed: 100 | return False 101 | 102 | status = db_conn.get_transaction_status() 103 | if status not in [TRANSACTION_STATUS_ACTIVE, TRANSACTION_STATUS_IDLE, TRANSACTION_STATUS_INTRANS]: 104 | return False 105 | 106 | return True 107 | -------------------------------------------------------------------------------- /pghoard/postgres_command.py: -------------------------------------------------------------------------------- 1 | """ 2 | pghoard - archive_command and restore_command for postgresql 3 | 4 | Copyright (c) 2016 Ohmu Ltd 5 | See LICENSE for details 6 | """ 7 | 8 | import argparse 9 | import base64 10 | import os 11 | import socket 12 | import sys 13 | import time 14 | from http.client import BadStatusLine, HTTPConnection, IncompleteRead 15 | 16 | from pghoard.wal import TIMELINE_RE, WAL_RE 17 | 18 | from . import version 19 | 20 | PGHOARD_HOST = "127.0.0.1" 21 | PGHOARD_PORT = 16000 22 | 23 | # When running restore_command PostgreSQL interprets exit codes 1..125 as "file not found errors" signalling 24 | # that there's no such WAL file from which PostgreSQL assumes that we've completed recovery. We never want to 25 | # return such an error code unless we actually got confirmation that the requested file isn't in the backend so 26 | # we try to exit with EXIT_ERROR (255) status whenever we see unexpected errors. Such an error code causes 27 | # PostgreSQL to abort recovery and wait for admin interaction. 28 | # 29 | # The above considerations apply to handling archive_command, but in its case there's no reason for us to ask 30 | # PostgreSQL to abort, we want it to just retry indefinitely so we'll always return a code between 1..125. 31 | # 32 | # Note that EXIT_NOT_FOUND and EXIT_ARCHIVE_FAIL and their error codes are not defined or required by 33 | # PostgreSQL, they're just used for convenience here and to test for differences between various failure 34 | # scenarios (Python exits with status 1 on uncaught exceptions.) 35 | EXIT_OK = 0 36 | EXIT_FAIL = 1 37 | EXIT_UNEXPECTED = 2 38 | EXIT_ARCHIVE_FAIL = 3 39 | EXIT_NOT_FOUND = 4 40 | EXIT_ABORT = 255 41 | 42 | 43 | class PGCError(Exception): 44 | def __init__(self, message, exit_code=EXIT_FAIL): 45 | super().__init__(message) 46 | self.exit_code = exit_code 47 | 48 | 49 | def http_request(host, port, method, path, headers=None, *, username=None, password=None): 50 | conn = HTTPConnection(host=host, port=port) 51 | if headers is not None: 52 | headers = headers.copy() 53 | else: 54 | headers = {} 55 | if username and password: 56 | auth_str = base64.b64encode(f"{username}:{password}".encode("utf-8")).decode() 57 | headers["Authorization"] = f"Basic {auth_str}" 58 | try: 59 | conn.request(method, path, headers=headers) 60 | resp = conn.getresponse() 61 | finally: 62 | conn.close() 63 | return resp.status 64 | 65 | 66 | def archive_command(site, xlog, host=PGHOARD_HOST, port=PGHOARD_PORT): 67 | if xlog.endswith(".backup"): 68 | print("Ignoring request to archive backup label {!r}: PGHoard does not use them".format(xlog)) 69 | return 70 | status = http_request(host, port, "PUT", "/{}/archive/{}".format(site, xlog)) 71 | if status == 201: 72 | return 73 | raise PGCError("Archival failed with HTTP status {}".format(status), exit_code=EXIT_ARCHIVE_FAIL) 74 | 75 | 76 | def restore_command(site, xlog, output, host=PGHOARD_HOST, port=PGHOARD_PORT, retry_interval=5, retry_count=3): 77 | if not output: 78 | headers = {} 79 | method = "HEAD" 80 | else: 81 | # Construct absolute path for output - postgres calls this command with a relative path to its xlog 82 | # directory. Note that os.path.join strips preceding components if a new components starts with a 83 | # slash so it's still possible to use this with absolute paths. 84 | output_path = os.path.join(os.getcwd(), output) 85 | if WAL_RE.match(xlog) or TIMELINE_RE.match(xlog): 86 | # if file ".pghoard.prefetch" exists, just move it to destination 87 | prefetch_path = os.path.join(os.path.dirname(output_path), xlog + ".pghoard.prefetch") 88 | if os.path.exists(prefetch_path): 89 | os.rename(prefetch_path, output_path) 90 | return 91 | headers = {"x-pghoard-target-path": output_path} 92 | method = "GET" 93 | path = "/{}/archive/{}".format(site, xlog) 94 | 95 | for retries in range(retry_count - 1, -1, -1): 96 | try: 97 | status = http_request(host, port, method, path, headers) 98 | break 99 | except (socket.error, BadStatusLine, IncompleteRead) as ex: 100 | err = "HTTP connection to {0}:{1} failed: {2.__class__.__name__}: {2}".format(host, port, ex) 101 | if not retries: 102 | raise PGCError(err, exit_code=EXIT_ABORT) 103 | print("{}; {} retries left, sleeping {} seconds and retrying".format(err, retries, retry_interval)) 104 | time.sleep(retry_interval) 105 | 106 | if status == 201 and method == "GET": 107 | return 108 | if status == 200 and method == "HEAD": 109 | return 110 | # NOTE: PostgreSQL interprets exit codes 1..125 as "file not found errors" signalling that there's no 111 | # such wal file from which PostgreSQL assumes that we've completed recovery so we never want to return 112 | # such an error code unless we actually got confirmation that the file isn't in the backend. 113 | if status == 404: 114 | raise PGCError("{!r} not found from archive".format(xlog), exit_code=EXIT_NOT_FOUND) 115 | raise PGCError("Restore failed with HTTP status {}".format(status), exit_code=EXIT_ABORT) 116 | 117 | 118 | def main(args=None): 119 | parser = argparse.ArgumentParser() 120 | parser.add_argument("--version", action="version", help="show program version", version=version.__version__) 121 | parser.add_argument("--host", type=str, default=PGHOARD_HOST, help="pghoard service host") 122 | parser.add_argument("--port", type=int, default=PGHOARD_PORT, help="pghoard service port") 123 | parser.add_argument("--username", type=str, help="pghoard service username") 124 | parser.add_argument("--password", type=str, help="pghoard service password") 125 | parser.add_argument("--site", type=str, required=True, help="pghoard backup site") 126 | parser.add_argument("--xlog", type=str, required=True, help="xlog file name") 127 | parser.add_argument("--output", type=str, help="output file") 128 | parser.add_argument("--mode", type=str, required=True, choices=["archive", "restore"], help="operation mode") 129 | 130 | # Note that we try to catch as many exception as possible and to exit with return code 255 unless we get a 131 | # custom exception stating otherwise. This is to avoid signalling "end of recovery" to PostgreSQL. 132 | fail_exit_code = EXIT_ABORT 133 | try: 134 | pa = parser.parse_args(args) 135 | if pa.mode == "archive": 136 | fail_exit_code = EXIT_UNEXPECTED # pg can just try again 137 | archive_command(pa.site, pa.xlog, pa.host, pa.port) 138 | elif pa.mode == "restore": 139 | restore_command(pa.site, pa.xlog, pa.output, pa.host, pa.port) 140 | else: 141 | raise PGCError("Unexpected command {!r}".format(pa.mode)) 142 | return EXIT_OK 143 | except PGCError as ex: 144 | print("{}: ERROR: {}".format(sys.argv[0], ex)) 145 | return ex.exit_code 146 | except SystemExit: 147 | return fail_exit_code 148 | except: # pylint: disable=bare-except 149 | import traceback 150 | traceback.print_exc() 151 | return fail_exit_code 152 | 153 | 154 | if __name__ == "__main__": 155 | sys.exit(main()) 156 | -------------------------------------------------------------------------------- /pghoard/preservation_request.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2022 Aiven Ltd 3 | See LICENSE for details 4 | """ 5 | import datetime 6 | from typing import Any, Mapping, Sequence 7 | 8 | from rohmu import dates 9 | 10 | 11 | def patch_basebackup_metadata_with_preservation( 12 | basebackup_entry: Mapping[str, Any], 13 | backups_to_preserve: Mapping[str, datetime.datetime], 14 | ) -> None: 15 | basebackup_entry["metadata"]["preserve-until"] = backups_to_preserve.get(basebackup_entry["name"]) 16 | 17 | 18 | def is_basebackup_preserved(basebackup_entry: Mapping[str, Any], now: datetime.datetime) -> bool: 19 | preserve_until = basebackup_entry["metadata"].get("preserve-until") 20 | return preserve_until is not None and preserve_until > now 21 | 22 | 23 | def parse_preservation_requests(preservation_requests: Sequence[Mapping[str, Any]], ) -> Mapping[str, datetime.datetime]: 24 | backups_to_preserve: dict[str, datetime.datetime] = {} 25 | for preservation_request in preservation_requests: 26 | backup_name = preservation_request["metadata"]["preserve-backup"] 27 | preserve_until = dates.parse_timestamp(preservation_request["metadata"]["preserve-until"]) 28 | if backup_name in backups_to_preserve: 29 | backups_to_preserve[backup_name] = max(backups_to_preserve[backup_name], preserve_until) 30 | else: 31 | backups_to_preserve[backup_name] = preserve_until 32 | return backups_to_preserve 33 | -------------------------------------------------------------------------------- /pghoard/receivexlog.py: -------------------------------------------------------------------------------- 1 | """ 2 | pghoard - pg_receivexlog handler 3 | 4 | Copyright (c) 2016 Ohmu Ltd 5 | See LICENSE for details 6 | """ 7 | 8 | import datetime 9 | import logging 10 | import os 11 | import select 12 | import signal 13 | import subprocess 14 | import time 15 | 16 | from .common import (PGHoardThread, set_subprocess_stdout_and_stderr_nonblocking, terminate_subprocess) 17 | 18 | 19 | class PGReceiveXLog(PGHoardThread): 20 | def __init__(self, config, connection_string, wal_location, site, slot, pg_version_server): 21 | super().__init__() 22 | pg_receivexlog_config = config["backup_sites"][site]["pg_receivexlog"] 23 | self.log = logging.getLogger("PGReceiveXLog") 24 | self.config = config 25 | self.connection_string = connection_string 26 | self.disk_space_check_interval = pg_receivexlog_config["disk_space_check_interval"] 27 | self.last_disk_space_check = time.monotonic() 28 | self.min_disk_space = pg_receivexlog_config.get("min_disk_free_bytes") 29 | self.resume_multiplier = pg_receivexlog_config["resume_multiplier"] 30 | self.wal_location = wal_location 31 | self.site = site 32 | self.slot = slot 33 | self.pg_version_server = pg_version_server 34 | self.pid = None 35 | self.receiver_paused = False 36 | self.running = False 37 | self.latest_activity = datetime.datetime.utcnow() 38 | self.log.debug("Initialized PGReceiveXLog") 39 | 40 | def run_safe(self): 41 | self.running = True 42 | 43 | command = [ 44 | self.config["backup_sites"][self.site]["pg_receivexlog_path"], 45 | "--status-interval", 46 | "1", 47 | "--verbose", 48 | "--directory", 49 | self.wal_location, 50 | ] 51 | command.extend(["--dbname", self.connection_string]) 52 | 53 | if self.pg_version_server >= 90400 and self.slot: 54 | command.extend(["--slot", self.slot]) 55 | 56 | self.log.debug("Starting to run: %r", command) 57 | start_time = time.time() 58 | proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 59 | set_subprocess_stdout_and_stderr_nonblocking(proc) 60 | self.pid = proc.pid 61 | self.log.info("Started: %r, running as PID: %r", command, self.pid) 62 | while self.running: 63 | rlist, _, _ = select.select([proc.stdout, proc.stderr], [], [], min(1.0, self.disk_space_check_interval)) 64 | for fd in rlist: 65 | content = fd.read() 66 | if content: 67 | self.log.info(content) 68 | self.latest_activity = datetime.datetime.utcnow() 69 | if proc.poll() is not None: 70 | break 71 | self.stop_or_continue_based_on_free_disk() 72 | self.continue_pg_receivewal() 73 | rc = terminate_subprocess(proc, log=self.log) 74 | self.log.debug("Ran: %r, took: %.3fs to run, returncode: %r", command, time.time() - start_time, rc) 75 | self.running = False 76 | 77 | def stop_or_continue_based_on_free_disk(self): 78 | if not self.min_disk_space: 79 | return 80 | 81 | now = time.monotonic() 82 | if now - self.last_disk_space_check < self.disk_space_check_interval: 83 | return 84 | 85 | bytes_free = self.get_disk_bytes_free() 86 | if not self.receiver_paused: 87 | if bytes_free < self.min_disk_space: 88 | self.log.warning( 89 | "Free disk space %.1f MiB is below configured minimum %.1f MiB, pausing pg_receive(wal|xlog)", 90 | bytes_free / 1024.0 / 1024.0, self.min_disk_space / 1024.0 / 1024.0 91 | ) 92 | self.pause_pg_receivewal() 93 | else: 94 | min_free_bytes = int(self.min_disk_space * self.resume_multiplier) 95 | if bytes_free >= min_free_bytes: 96 | self.log.info( 97 | "Free disk space %.1f MiB is above configured resume threshold %.1f MiB, resuming pg_receive(wal|xlog)", 98 | bytes_free / 1024.0 / 1024.0, min_free_bytes / 1024.0 / 1024.0 99 | ) 100 | self.continue_pg_receivewal() 101 | 102 | def get_disk_bytes_free(self): 103 | st = os.statvfs(self.wal_location) 104 | return st.f_bfree * st.f_bsize 105 | 106 | def continue_pg_receivewal(self): 107 | if not self.receiver_paused or not self.pid: 108 | return 109 | 110 | os.kill(self.pid, signal.SIGCONT) 111 | self.receiver_paused = False 112 | 113 | def pause_pg_receivewal(self): 114 | if self.receiver_paused or not self.pid: 115 | return 116 | 117 | os.kill(self.pid, signal.SIGSTOP) 118 | self.receiver_paused = True 119 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling", "hatch-vcs"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "pghoard" 7 | authors = [ 8 | { name="Aiven", email="opensource@aiven.io" }, 9 | { name="Hannu Valtonen", email="hannu.valtonen@ohmu.fi" } 10 | ] 11 | description = "PostgreSQL automatic backup/restore service daemon." 12 | readme = "README.rst" 13 | requires-python = ">=3.8" 14 | classifiers=[ 15 | "Development Status :: 5 - Production/Stable", 16 | "Intended Audience :: Developers", 17 | "Intended Audience :: Information Technology", 18 | "Intended Audience :: System Administrators", 19 | "License :: OSI Approved :: Apache Software License", 20 | "Programming Language :: Python :: 3.10", 21 | "Programming Language :: Python :: 3.11", 22 | "Programming Language :: Python :: 3.12", 23 | "Topic :: Database :: Database Engines/Servers", 24 | "Topic :: Software Development :: Libraries", 25 | ] 26 | license = { text = "Apache License 2.0" } 27 | dynamic = ["version"] 28 | dependencies = [ 29 | "cryptography", 30 | "psycopg2-binary >= 2.8.0", 31 | "pydantic", 32 | "python-dateutil", 33 | "python-snappy >= 0.5", 34 | "python-systemd", 35 | "requests >= 1.2.0", 36 | "rohmu >= 1.0.7", 37 | "zstandard >= 0.11.1", 38 | ] 39 | 40 | [project.optional-dependencies] 41 | dev = [ 42 | "boto3", 43 | "mock", 44 | "mypy", 45 | "pylint>=2.4.3,<=2.7.2", 46 | "pylint-quotes", 47 | "pytest", 48 | "pytest-cov", 49 | "pytest-mock", 50 | "pytest-timeout", 51 | "pytest-xdist", 52 | "yapf==0.30.0", 53 | "isort==5.7.0", 54 | "coverage", 55 | "coveralls", 56 | "freezegun>=1.2", 57 | "responses", 58 | "unify", 59 | "types-botocore", 60 | "types-httplib2", 61 | "types-mock", 62 | "types-paramiko", 63 | "types-psycopg2", 64 | "types-python-dateutil", 65 | "types-requests", 66 | "types-six", 67 | ] 68 | constraints = [ 69 | "astroid==2.5.8", 70 | "attrs==22.2.0", 71 | "azure-core==1.26.3", 72 | "azure-storage-blob==12.15.0", 73 | "bcrypt==4.0.1", 74 | "boto3==1.26.96", 75 | "botocore==1.29.96", 76 | "botocore-stubs==1.29.96", 77 | "cachetools==5.3.0", 78 | "certifi==2022.12.7", 79 | "cffi==1.16.0", 80 | "charset-normalizer==3.1.0", 81 | "coverage==6.5.0", 82 | "coveralls==3.3.1", 83 | "cryptography==39.0.2", 84 | "docopt==0.6.2", 85 | "exceptiongroup==1.1.1", 86 | "execnet==1.9.0", 87 | "freezegun==1.2.2", 88 | "google-api-core==2.11.0", 89 | "google-api-python-client==2.82.0", 90 | "googleapis-common-protos==1.59.0", 91 | "google-auth==2.16.2", 92 | "google-auth-httplib2==0.1.0", 93 | "httplib2==0.21.0", 94 | "idna==3.4", 95 | "iniconfig==2.0.0", 96 | "isodate==0.6.1", 97 | "isort==5.7.0", 98 | "jmespath==1.0.1", 99 | "lazy-object-proxy==1.9.0", 100 | "mccabe==0.6.1", 101 | "mock==5.0.1", 102 | "mypy==1.1.1", 103 | "mypy-extensions==1.0.0", 104 | "oauth2client==4.1.3", 105 | "packaging==23.0", 106 | "paramiko==3.1.0", 107 | "pluggy==1.0.0", 108 | "protobuf==4.22.1", 109 | "pyasn1==0.4.8", 110 | "pyasn1-modules==0.2.8", 111 | "pycparser==2.21", 112 | "pydantic==1.10.14", 113 | "pylint==2.7.2", 114 | "pylint-quotes==0.2.1", 115 | "PyNaCl==1.5.0", 116 | "pyparsing==3.0.9", 117 | "pytest==7.2.2", 118 | "pytest-cov==4.0.0", 119 | "pytest-mock==3.10.0", 120 | "pytest-timeout==2.1.0", 121 | "pytest-xdist==3.2.1", 122 | "python-dateutil==2.8.2", 123 | "python-snappy==0.7.1", 124 | "python-systemd==0.0.9", 125 | "requests==2.28.2", 126 | "responses==0.23.1", 127 | "rohmu==2.3.0", 128 | "rsa==4.9", 129 | "s3transfer==0.6.0", 130 | "six==1.16.0", 131 | "toml==0.10.2", 132 | "tomli==2.0.1", 133 | "types-awscrt==0.16.13", 134 | "types-botocore==1.0.2", 135 | "types-httplib2==0.21.0.5", 136 | "types-mock==5.0.0.5", 137 | "types-paramiko==3.0.0.4", 138 | "types-psycopg2==2.9.21.8", 139 | "types-python-dateutil==2.8.19.10", 140 | "types-PyYAML==6.0.12.8", 141 | "types-requests==2.28.11.15", 142 | "types-six==1.16.21.7", 143 | "types-urllib3==1.26.25.8", 144 | "typing_extensions==4.7.1", 145 | "unify==0.5", 146 | "untokenize==0.1.1", 147 | "uritemplate==4.1.1", 148 | "urllib3==1.26.15", 149 | "wrapt==1.12.1", 150 | "yapf==0.30.0", 151 | "zstandard==0.22.0", 152 | ] 153 | 154 | [project.urls] 155 | "Homepage" = "https://github.com/Aiven-Open/pghoard/" 156 | "Bug Tracker" = "https://github.com/Aiven-Open/pghoard/issues" 157 | 158 | [project.scripts] 159 | pghoard = "pghoard.pghoard:main" 160 | pghoard_archive_cleanup = "pghoard.archive_cleanup:main" 161 | pghoard_archive_sync = "pghoard.archive_sync:main" 162 | pghoard_create_keys = "pghoard.create_keys:main" 163 | pghoard_gnutaremu = "pghoard.gnutaremu:main" 164 | pghoard_postgres_command = "pghoard.postgres_command:main" 165 | pghoard_restore = "pghoard.restore:main" 166 | 167 | [tool.hatch.version] 168 | source = "vcs" 169 | 170 | [tool.hatch.build.hooks.vcs] 171 | version-file = "pghoard/version.py" 172 | -------------------------------------------------------------------------------- /test/.gitignore: -------------------------------------------------------------------------------- 1 | /test_storage_configs.py 2 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aiven-Open/pghoard/2e66fbd28e799d90dc850597a0dde117d363325b/test/__init__.py -------------------------------------------------------------------------------- /test/base.py: -------------------------------------------------------------------------------- 1 | """ 2 | pghoard - unit test setup 3 | 4 | Copyright (c) 2015 Ohmu Ltd 5 | See LICENSE for details 6 | """ 7 | import logging 8 | import os 9 | # pylint: disable=attribute-defined-outside-init 10 | from shutil import rmtree 11 | from tempfile import mkdtemp 12 | 13 | import psycopg2.extras 14 | from packaging.version import Version 15 | 16 | from pghoard.config import find_pg_binary, set_and_check_config_defaults 17 | 18 | CONSTANT_TEST_RSA_PUBLIC_KEY = """\ 19 | -----BEGIN PUBLIC KEY----- 20 | MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDQ9yu7rNmu0GFMYeQq9Jo2B3d9 21 | hv5t4a+54TbbxpJlks8T27ipgsaIjqiQP7+uXNfU6UCzGFEHs9R5OELtO3Hq0Dn+ 22 | JGdxJlJ1prxVkvjCICCpiOkhc2ytmn3PWRuVf2VyeAddslEWHuXhZPptvIr593kF 23 | lWN+9KPe+5bXS8of+wIDAQAB 24 | -----END PUBLIC KEY-----""" 25 | 26 | CONSTANT_TEST_RSA_PRIVATE_KEY = """\ 27 | -----BEGIN PRIVATE KEY----- 28 | MIICdwIBADANBgkqhkiG9w0BAQEFAASCAmEwggJdAgEAAoGBAND3K7us2a7QYUxh 29 | 5Cr0mjYHd32G/m3hr7nhNtvGkmWSzxPbuKmCxoiOqJA/v65c19TpQLMYUQez1Hk4 30 | Qu07cerQOf4kZ3EmUnWmvFWS+MIgIKmI6SFzbK2afc9ZG5V/ZXJ4B12yURYe5eFk 31 | +m28ivn3eQWVY370o977ltdLyh/7AgMBAAECgYEAkuAobRFhL+5ndTiZF1g1zCQT 32 | aLepvbITwaL63B8GZz55LowRj5PL18/tyvYD1JqNWalZQIim67MKdOmGoRhXSF22 33 | gUc6/SeqD27/9rsj8I+j0TrzLdTZwn88oX/gtndNutZuryCC/7KbJ8j18Jjn5qf9 34 | ZboRKbEc7udxOb+RcYECQQD/ZLkxIvMSj0TxPUJcW4MTEsdeJHCSnQAhreIf2omi 35 | hf4YwmuU3qnFA3ROje9jJe3LNtc0TK1kvAqfZwdpqyAdAkEA0XY4P1CPqycYvTxa 36 | dxxWJnYA8K3g8Gs/Eo8wYKIciP+K70Q0GRP9Qlluk4vrA/wJJnTKCUl7YuAX6jDf 37 | WdV09wJALGHXoQde0IHfTEEGEEDC9YSU6vJQMdpg1HmAS2LR+lFox+q5gWR0gk1I 38 | YAJgcI191ovQOEF+/HuFKRBhhGZ9rQJAXOt13liNs15/sgshEq/mY997YUmxfNYG 39 | v+P3kRa5U+kRKD14YxukARgNXrT2R+k54e5zZhVMADvrP//4RTDVVwJBAN5TV9p1 40 | UPZXbydO8vZgPuo001KoEd9N3inq/yNcsHoF/h23Sdt/rcdfLMpCWuIYs/JAqE5K 41 | nkMAHqg9PS372Cs= 42 | -----END PRIVATE KEY-----""" 43 | 44 | 45 | class PGHoardTestCase: 46 | @classmethod 47 | def setup_class(cls): 48 | cls.log = logging.getLogger(cls.__name__) 49 | 50 | def config_template(self, override=None): 51 | # NOTE: we set pg_receivexlog_path and pg_basebackup_path per site and globally mostly to verify that 52 | # it works, the config keys are deprecated and will be removed in a future release at which point we'll 53 | # switch to using pg_bin_directory config. 54 | bindir = os.environ.get("PG_BINDIR") 55 | if not bindir: 56 | pgexe, _ = find_pg_binary("postgres") 57 | bindir = os.path.dirname(pgexe) 58 | ver = self._check_all_needed_commands_found(bindir) 59 | 60 | if hasattr(psycopg2.extras, "PhysicalReplicationConnection"): 61 | active_backup_mode = "walreceiver" 62 | else: 63 | active_backup_mode = "pg_receivexlog" 64 | 65 | # Instantiate a fake PG data directory 66 | pg_data_directory = os.path.join(str(self.temp_dir), "PG_DATA_DIRECTORY") 67 | os.makedirs(pg_data_directory) 68 | open(os.path.join(pg_data_directory, "PG_VERSION"), "w").write(ver) 69 | 70 | config = { 71 | "alert_file_dir": os.path.join(str(self.temp_dir), "alerts"), 72 | "backup_location": os.path.join(str(self.temp_dir), "backupspool"), 73 | "backup_sites": { 74 | self.test_site: { 75 | "active_backup_mode": active_backup_mode, 76 | "object_storage": { 77 | "storage_type": "local", 78 | "directory": os.path.join(self.temp_dir, "backups"), 79 | }, 80 | "pg_data_directory": pg_data_directory, 81 | "pg_receivexlog_path": os.path.join(bindir, "pg_receivexlog"), 82 | }, 83 | }, 84 | "json_state_file_path": os.path.join(self.temp_dir, "state.json"), 85 | "pg_basebackup_path": os.path.join(bindir, "pg_basebackup"), 86 | } 87 | if Version(ver).major >= 10: 88 | config["backup_sites"][self.test_site]["pg_receivexlog_path"] = os.path.join(bindir, "pg_receivewal") 89 | if override: 90 | all_site_overrides = override.pop("backup_sites", None) 91 | for site_name, site_override in (all_site_overrides or {}).items(): 92 | if site_name in config["backup_sites"]: 93 | config["backup_sites"][site_name].update(site_override) 94 | else: 95 | config["backup_sites"][site_name] = site_override 96 | config.update(override) 97 | 98 | os.makedirs(config["alert_file_dir"], exist_ok=True) 99 | return set_and_check_config_defaults(config) 100 | 101 | def setup_method(self, method): 102 | self.temp_dir = mkdtemp(prefix=self.__class__.__name__) 103 | self.test_site = "site_{}".format(method.__name__) 104 | 105 | def teardown_method(self, method): # pylint: disable=unused-argument 106 | rmtree(self.temp_dir) 107 | 108 | def _check_all_needed_commands_found(self, bindir): 109 | version = None 110 | for command in ["postgres", "pg_receivexlog", "pg_basebackup"]: 111 | command_path, ver = find_pg_binary(command, pg_bin_directory=bindir) 112 | assert ver is not None 113 | if version is None: 114 | version = ver 115 | # Major version should match, some packages are shipped with different minor versions of binaries 116 | assert version.split(".")[0] == ver.split(".")[0] 117 | assert os.path.dirname(command_path) == bindir 118 | return version 119 | -------------------------------------------------------------------------------- /test/basebackup/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 Aiven, Helsinki, Finland. https://aiven.io/ 2 | -------------------------------------------------------------------------------- /test/basebackup/test_chunks.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 Aiven, Helsinki, Finland. https://aiven.io/ 2 | import os 3 | import time 4 | from pathlib import Path 5 | from tarfile import TarInfo 6 | from tempfile import NamedTemporaryFile 7 | 8 | import pytest 9 | 10 | from pghoard import metrics 11 | from pghoard.basebackup.chunks import ChunkUploader, HashFile 12 | from pghoard.common import (BackupFailure, CallbackQueue, CompressionData, EncryptionData, FileType) 13 | from pghoard.transfer import TransferQueue 14 | 15 | 16 | class FakeTar: 17 | def __init__(self): 18 | self.items = [] 19 | 20 | def add(self, local_path, *, arcname, recursive): 21 | assert recursive is False 22 | self.items.append((local_path, arcname, os.stat(local_path))) 23 | 24 | def addfile(self, tarinfo, fileobj=None): # pylint: disable=unused-argument 25 | self.items.append(tarinfo) 26 | 27 | 28 | @pytest.fixture(name="chunk_uploader") 29 | def fixture_chunk_uploader(): 30 | return ChunkUploader( 31 | metrics=metrics.Metrics(statsd={}), 32 | chunks_on_disk=0, 33 | encryption_data=EncryptionData("foo_id", "foo_key"), 34 | compression_data=CompressionData("snappy", 0), 35 | site_config={}, 36 | site="foosite", 37 | is_running=lambda: True, 38 | transfer_queue=TransferQueue() 39 | ) 40 | 41 | 42 | def test_chunk_path_to_middle_path_name(): 43 | assert ChunkUploader.chunk_path_to_middle_path_name( 44 | Path("/a/b/2022-04-19_09-27_0.00000000.pghoard"), FileType.Basebackup 45 | ) == (Path("basebackup"), "2022-04-19_09-27_0.00000000.pghoard") 46 | 47 | assert ChunkUploader.chunk_path_to_middle_path_name( 48 | Path("/a/b/2022-04-19_09-27_0/2022-04-19_09-27_0.00000001.pghoard"), FileType.Basebackup_chunk 49 | ) == (Path("basebackup_chunk"), "2022-04-19_09-27_0/2022-04-19_09-27_0.00000001.pghoard") 50 | 51 | assert ChunkUploader.chunk_path_to_middle_path_name( 52 | Path("/a/b/0fdc9365aea5447f9a16da8104dc9fcc.delta"), FileType.Basebackup_delta 53 | ) == (Path("basebackup_delta"), "0fdc9365aea5447f9a16da8104dc9fcc.delta") 54 | 55 | assert ChunkUploader.chunk_path_to_middle_path_name( 56 | Path("/a/b/2022-04-19_09-27_0/2022-04-19_09-27_0.00000001.pghoard"), FileType.Basebackup_delta_chunk 57 | ) == (Path("basebackup_delta_chunk"), "2022-04-19_09-27_0/2022-04-19_09-27_0.00000001.pghoard") 58 | 59 | for file_type in {FileType.Wal, FileType.Metadata, FileType.Timeline}: 60 | with pytest.raises(NotImplementedError): 61 | ChunkUploader.chunk_path_to_middle_path_name(Path("/a/b/000000010000000000000002"), file_type) 62 | 63 | 64 | def test_write_files_to_tar_stops_when_not_running(): 65 | cu = ChunkUploader( 66 | metrics=metrics.Metrics(statsd={}), 67 | chunks_on_disk=0, 68 | encryption_data=EncryptionData("foo_id", "foo_key"), 69 | compression_data=CompressionData("snappy", 0), 70 | site_config={}, 71 | site="foosite", 72 | is_running=lambda: False, 73 | transfer_queue=TransferQueue() 74 | ) 75 | with pytest.raises(BackupFailure): 76 | cu.write_files_to_tar(files=[("foo", "foo", False)], tar=None) 77 | 78 | 79 | def test_write_files_to_tar_missing_raises_exception(chunk_uploader): 80 | with pytest.raises(FileNotFoundError): 81 | chunk_uploader.write_files_to_tar(files=[("foo", "foo", False)], tar=FakeTar()) 82 | 83 | 84 | def test_write_files_to_tar_adds_tar_info_file(chunk_uploader): 85 | faketar = FakeTar() 86 | ti = TarInfo(name="test tar info file") 87 | chunk_uploader.write_files_to_tar(files=[(ti, "foo", False)], tar=faketar) 88 | assert faketar.items == [ti] 89 | 90 | 91 | def test_wait_for_chunk_transfer_to_complete_upload_in_progress(chunk_uploader): 92 | assert not chunk_uploader.wait_for_chunk_transfer_to_complete( 93 | chunk_count=1, 94 | upload_results=[], 95 | chunk_callback_queue=CallbackQueue(), 96 | start_time=time.monotonic(), 97 | queue_timeout=0.1 98 | ) 99 | 100 | 101 | def test_hash_file(): 102 | test_data = b"123" * 100 103 | with NamedTemporaryFile("w+b", delete=False) as tmp_file: 104 | tmp_file.write(test_data) 105 | 106 | with HashFile(path=tmp_file.name) as hash_file: 107 | assert hash_file.read(150) == test_data[:len(test_data) // 2] 108 | assert hash_file.hash.hexdigest() == "44438d46e19c3116a5a782cedac0e7cac379e90cfc85e8603bddc1540099215e" 109 | assert hash_file.read(150) == test_data[len(test_data) // 2:] 110 | assert hash_file.hash.hexdigest() == "0728577aecf53fd989cd4c0fc4e2fc73aaa60905fb224d4ca93d8f3eac62feeb" 111 | 112 | assert hash_file.closed 113 | 114 | with HashFile(path=tmp_file.name) as hash_file: 115 | hash_file.seek(0) 116 | assert hash_file.read() == test_data 117 | assert hash_file.hash.hexdigest() == "0728577aecf53fd989cd4c0fc4e2fc73aaa60905fb224d4ca93d8f3eac62feeb" 118 | -------------------------------------------------------------------------------- /test/data/basebackup/chunks/00000001.pghoard: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aiven-Open/pghoard/2e66fbd28e799d90dc850597a0dde117d363325b/test/data/basebackup/chunks/00000001.pghoard -------------------------------------------------------------------------------- /test/data/basebackup/chunks/00000001.pghoard.metadata: -------------------------------------------------------------------------------- 1 | {"_hash": "abc", "compression-algorithm": "snappy", "encryption-key-id": "5ba999de817c49a682ffed124abf9a2e", "format": "pghoard-bb-v2", "original-file-size": "20480"} 2 | -------------------------------------------------------------------------------- /test/data/basebackup/chunks/00000002.pghoard: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aiven-Open/pghoard/2e66fbd28e799d90dc850597a0dde117d363325b/test/data/basebackup/chunks/00000002.pghoard -------------------------------------------------------------------------------- /test/data/basebackup/chunks/00000002.pghoard.metadata: -------------------------------------------------------------------------------- 1 | {"_hash": "abc", "compression-algorithm": "snappy", "encryption-key-id": "5ba999de817c49a682ffed124abf9a2e", "format": "pghoard-bb-v2", "original-file-size": "20480"} 2 | -------------------------------------------------------------------------------- /test/data/basebackup/chunks/00000003.pghoard: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aiven-Open/pghoard/2e66fbd28e799d90dc850597a0dde117d363325b/test/data/basebackup/chunks/00000003.pghoard -------------------------------------------------------------------------------- /test/data/basebackup/chunks/00000003.pghoard.metadata: -------------------------------------------------------------------------------- 1 | {"_hash": "abc", "compression-algorithm": "snappy", "encryption-key-id": "5ba999de817c49a682ffed124abf9a2e", "format": "pghoard-bb-v2", "original-file-size": "10240"} 2 | -------------------------------------------------------------------------------- /test/data/basebackup/chunks/00000004.pghoard: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aiven-Open/pghoard/2e66fbd28e799d90dc850597a0dde117d363325b/test/data/basebackup/chunks/00000004.pghoard -------------------------------------------------------------------------------- /test/data/basebackup/chunks/00000004.pghoard.metadata: -------------------------------------------------------------------------------- 1 | {"_hash": "abc", "compression-algorithm": "snappy", "encryption-key-id": "5ba999de817c49a682ffed124abf9a2e", "format": "pghoard-bb-v2", "original-file-size": "20480"} 2 | -------------------------------------------------------------------------------- /test/data/basebackup/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "backup_sites": { 3 | "f73f56ee-6b9f-4ce0-b7aa-a170d58da833": { 4 | "encryption_key_id": "5ba999de817c49a682ffed124abf9a2e", 5 | "encryption_keys": { 6 | "5ba999de817c49a682ffed124abf9a2e": { 7 | "private": "-----BEGIN PRIVATE KEY-----\nMIIG/AIBADANBgkqhkiG9w0BAQEFAASCBuYwggbiAgEAAoIBgQC/e0jNVBCB8pxK\nwPmJUlus6q+mKQ9QD0esP/TzBZ6TwKiMlMukwh0FCah88UTf/9VNDEvgzFhrcbEc\n0O0ZKywSHNFOEq6onm3QWRqnMvXZLlTyhIBZRuLB8Vt3WH3Atv6BwbLRgFuT6Rfx\nopOGptmNQsOMT4z9lB2n2JiTBJsg7+iAfw6ZltuBSIjC8/5flcmYkkTQFEHUJ3RY\nOTjlqTY7y8J464qvXgQIUE/kCx7np4pdvWc3Zf9l1hgv5Ol/escpr8Mo2Cg5Qhjg\ntLzB4emx8dfnoV1oc77F4XEFj5SzBZqgKy/pV5yy8UtJ6NDBEwG4cK5kXXFyu31q\nc+XYzgD2SdmG3Fqvy5Ikwj8Sx82zrwBWEM8etEy7CniyZi7+Sr9G4NfFvBG8OOQ2\nXGMfnl8doEFYRtVdjU0o3VSn37ASCW8XsHgd/Zobu6k9sMKdU1iNrEjkiSGQtlbD\nsq8GMuq8saSbPyRUNQO7mFQC2F6K49KGgxpn4d65G0wQMMnbq4sCAwEAAQKCAYAK\nYYOr5g/TC7UfdGDS6g0gTcTiDD3RSFLJato7xqU3O22n2XVE5GUwXbqts2LZhgQp\nXi5K7KkqggppFoaUI7wK61cJlYe0iopHjl0cjW24rYNbdoWC0Y3/l7cuvDRtGz6n\nCDpKk1vjo/JxXjADT85hkyoI1FM/eCU3cU2sQsaqPXdsZ/cBqqUR2D3Z2+KBihxY\n0i063q5G8zCii8+i286d5UkQxyxIn582WCxMn7G4O2QL+vW6kiQLgFTlW9Kw35YO\nfbM04zUmpvZpjCJuqLDw/2x7/sJn17vzQ+LsdBz/JXaLEidkI4Tr4caWqSjJZVBK\nb5LzeOrqCBcsppXUhN+4yvqCAtK0iUCLbFwPuqqmv3Ly2GpWQBPEgozdaXZOOGzc\ne7M5xCDH5bQRR/3e6Md9fHl/ATKD5eSun+7dyjNq8FJHeUAEKS2okd5oFmTD4mkZ\nxV1GTHTLzUy0qeyiUoGCxnCnS8q9rx2Os4j4/Y+aQI6xzUWYK+Zb57o9w7kIirEC\ngcEA8BHiDzTM4HgmKJ5kPkB38JYkoI0hFzofesvvKg8Nx4hEg6KBL3t7kJPqdJgm\ntZHwhg3EwzKIRCY+wHCpYy0ouX/MAfKjWBI9uAYhTB5nbJEqxFNo18uK40PoWRMt\nNSralCrRcIb5z/Kl5WTsXN6DHJFTDwsGHF41LWrnCXRNM/XfzJvE7X0VlcjbL/A9\n0cxfEuTb7k2xhJNBbizCvdw92mmDemFxq9PO63966nnOIjwaX3m4G1yg3nsUGeTY\nVmpZAoHBAMwwBaj9x29Cxjn1Dy4rF5GiNErO5JY9mZnroa4I43zbYuXww5mWTpHs\nORntM1XtKt2E5KeW9fBUdJPWh4epnh2cyFmodSxaEk35wfnEGnMPlwHTgZvUE7kz\n6VXqDEGK4mYmzof6edbpg+tejx8SsK6Pwrt1Moj9evGs+I38oK2IRJchIS7Ur+qj\njLS13Z1mxLmAquDsmzAVgEL13rgUsBsiV+ghEEpvPS8gb9yZmpTiKD07JKml18VK\n06FsF9VAgwKBwBT/O2phD9pCJ/Q9hj77nIHqX+G69j3103MGCzD+iBH/lR5+RBZH\nEpOenE7+T8Rps0PGSINaFBkBz1M9h1MpS/qNduZktmypi8RgpODnd9xDBh6NvQnC\nv68I7XV8++M+kEeNRyw0Yf0SF/hsT1AAFi+VdlJGgI5SnwwN4Y8uIOJ+ish2h07O\nNekX3DPhK0cCPP6GDcZV/US/LGXafF3muXI08E7v3uVMbTijubhwVtsfrp7TIosi\nGt/am/N31IQaYQKBwAmUmz9hoPDsfiaMBAlThkiUBsYXzQvrmgBp2O00h4/9LzfA\nwzy6m7cnEUrRIV5/wUohiST/5UxAejPRlgxcfgm/qHrkd5L8Ku2zsVFJzT/m1FwG\nk4c/PSmscN9SGv8cSCEo4vnoW70kucbaafa4Rsf6ANYQ2q0oz5L1XbgzyUo7IZTB\nvi/XVOW6hMiZ2+sdvk9B5UKmd2WbLKh3ptqWRekQBHXkz0He1E0YxYbhQiqILgEp\nfD/lgylDqIhjbP7ZhwKBwFYIBmEYEs/+ixHkHGbzE4tP+VhgzQ7we1+xyqN7Z69e\nf7StEuWWIZ5Os9JpdhZmN/9qb4kzL+Pb0vELuNHLwv08MnJ792wA9dPeCuYLKKiK\nUBuBwnslBy69tCRSXQ8ltH3kX67lxCu28hDRg+oN83bEczPJ2I/x7BKDrWNYZvrq\nJw7Ijp5l5gLqtdb7eOI8mSs5F7yaUKR6yVhl3ifc6bQUmmGmmElobpCpcGqAmXSj\noAeJCZug3MWS2fxhluthxQ==\n-----END PRIVATE KEY-----\n", 8 | "public": "-----BEGIN PUBLIC KEY-----\nMIIBojANBgkqhkiG9w0BAQEFAAOCAY8AMIIBigKCAYEAv3tIzVQQgfKcSsD5iVJb\nrOqvpikPUA9HrD/08wWek8CojJTLpMIdBQmofPFE3//VTQxL4MxYa3GxHNDtGSss\nEhzRThKuqJ5t0FkapzL12S5U8oSAWUbiwfFbd1h9wLb+gcGy0YBbk+kX8aKThqbZ\njULDjE+M/ZQdp9iYkwSbIO/ogH8OmZbbgUiIwvP+X5XJmJJE0BRB1Cd0WDk45ak2\nO8vCeOuKr14ECFBP5Ase56eKXb1nN2X/ZdYYL+Tpf3rHKa/DKNgoOUIY4LS8weHp\nsfHX56FdaHO+xeFxBY+UswWaoCsv6VecsvFLSejQwRMBuHCuZF1xcrt9anPl2M4A\n9knZhtxar8uSJMI/EsfNs68AVhDPHrRMuwp4smYu/kq/RuDXxbwRvDjkNlxjH55f\nHaBBWEbVXY1NKN1Up9+wEglvF7B4Hf2aG7upPbDCnVNYjaxI5IkhkLZWw7KvBjLq\nvLGkmz8kVDUDu5hUAtheiuPShoMaZ+HeuRtMEDDJ26uLAgMBAAE=\n-----END PUBLIC KEY-----\n" 9 | } 10 | }, 11 | "object_storage": { 12 | "directory": "test/data/basebackup/chunks", 13 | "storage_type": "local" 14 | }, 15 | "prefix": "1052a492-1a01-459d-a126-9db8518724c0/f73f56ee-6b9f-4ce0-b7aa-a170d58da833" 16 | } 17 | }, 18 | "compression": { 19 | "algorithm": "snappy" 20 | }, 21 | "log_level": "INFO", 22 | "restore_process_count": 2 23 | } 24 | -------------------------------------------------------------------------------- /test/data/basebackup_delta/chunks/0af668268d0fe14c6e269760b08d80a634c421b8381df25f31fbed5e8a8c8d8b: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aiven-Open/pghoard/2e66fbd28e799d90dc850597a0dde117d363325b/test/data/basebackup_delta/chunks/0af668268d0fe14c6e269760b08d80a634c421b8381df25f31fbed5e8a8c8d8b -------------------------------------------------------------------------------- /test/data/basebackup_delta/chunks/0af668268d0fe14c6e269760b08d80a634c421b8381df25f31fbed5e8a8c8d8b.metadata: -------------------------------------------------------------------------------- 1 | {"_hash": "abc", "compression-algorithm": "snappy", "format": "pghoard-delta-v1", "original-file-size": "16384"} 2 | -------------------------------------------------------------------------------- /test/data/basebackup_delta/chunks/4b65df4d0857bbbcb22aa086e02bd8414a9f3a484869f2b96ed7c62f3c4eb088: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aiven-Open/pghoard/2e66fbd28e799d90dc850597a0dde117d363325b/test/data/basebackup_delta/chunks/4b65df4d0857bbbcb22aa086e02bd8414a9f3a484869f2b96ed7c62f3c4eb088 -------------------------------------------------------------------------------- /test/data/basebackup_delta/chunks/4b65df4d0857bbbcb22aa086e02bd8414a9f3a484869f2b96ed7c62f3c4eb088.metadata: -------------------------------------------------------------------------------- 1 | {"_hash": "abc", "compression-algorithm": "snappy", "format": "pghoard-delta-v1", "original-file-size": "8192"} 2 | -------------------------------------------------------------------------------- /test/data/basebackup_delta/chunks/fc61c91430dcb345001306ad513f103380c16896093a17868fc909aeda393559: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aiven-Open/pghoard/2e66fbd28e799d90dc850597a0dde117d363325b/test/data/basebackup_delta/chunks/fc61c91430dcb345001306ad513f103380c16896093a17868fc909aeda393559 -------------------------------------------------------------------------------- /test/data/basebackup_delta/chunks/fc61c91430dcb345001306ad513f103380c16896093a17868fc909aeda393559.metadata: -------------------------------------------------------------------------------- 1 | {"_hash": "abc", "compression-algorithm": "snappy", "format": "pghoard-delta-v1", "original-file-size": "24576"} 2 | -------------------------------------------------------------------------------- /test/data/basebackup_delta/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "backup_sites": { 3 | "f73f56ee-6b9f-4ce0-b7aa-a170d58da833": { 4 | "encryption_key_id": "5ba999de817c49a682ffed124abf9a2e", 5 | "encryption_keys": { 6 | "5ba999de817c49a682ffed124abf9a2e": { 7 | "private": "-----BEGIN PRIVATE KEY-----\nMIIG/AIBADANBgkqhkiG9w0BAQEFAASCBuYwggbiAgEAAoIBgQC/e0jNVBCB8pxK\nwPmJUlus6q+mKQ9QD0esP/TzBZ6TwKiMlMukwh0FCah88UTf/9VNDEvgzFhrcbEc\n0O0ZKywSHNFOEq6onm3QWRqnMvXZLlTyhIBZRuLB8Vt3WH3Atv6BwbLRgFuT6Rfx\nopOGptmNQsOMT4z9lB2n2JiTBJsg7+iAfw6ZltuBSIjC8/5flcmYkkTQFEHUJ3RY\nOTjlqTY7y8J464qvXgQIUE/kCx7np4pdvWc3Zf9l1hgv5Ol/escpr8Mo2Cg5Qhjg\ntLzB4emx8dfnoV1oc77F4XEFj5SzBZqgKy/pV5yy8UtJ6NDBEwG4cK5kXXFyu31q\nc+XYzgD2SdmG3Fqvy5Ikwj8Sx82zrwBWEM8etEy7CniyZi7+Sr9G4NfFvBG8OOQ2\nXGMfnl8doEFYRtVdjU0o3VSn37ASCW8XsHgd/Zobu6k9sMKdU1iNrEjkiSGQtlbD\nsq8GMuq8saSbPyRUNQO7mFQC2F6K49KGgxpn4d65G0wQMMnbq4sCAwEAAQKCAYAK\nYYOr5g/TC7UfdGDS6g0gTcTiDD3RSFLJato7xqU3O22n2XVE5GUwXbqts2LZhgQp\nXi5K7KkqggppFoaUI7wK61cJlYe0iopHjl0cjW24rYNbdoWC0Y3/l7cuvDRtGz6n\nCDpKk1vjo/JxXjADT85hkyoI1FM/eCU3cU2sQsaqPXdsZ/cBqqUR2D3Z2+KBihxY\n0i063q5G8zCii8+i286d5UkQxyxIn582WCxMn7G4O2QL+vW6kiQLgFTlW9Kw35YO\nfbM04zUmpvZpjCJuqLDw/2x7/sJn17vzQ+LsdBz/JXaLEidkI4Tr4caWqSjJZVBK\nb5LzeOrqCBcsppXUhN+4yvqCAtK0iUCLbFwPuqqmv3Ly2GpWQBPEgozdaXZOOGzc\ne7M5xCDH5bQRR/3e6Md9fHl/ATKD5eSun+7dyjNq8FJHeUAEKS2okd5oFmTD4mkZ\nxV1GTHTLzUy0qeyiUoGCxnCnS8q9rx2Os4j4/Y+aQI6xzUWYK+Zb57o9w7kIirEC\ngcEA8BHiDzTM4HgmKJ5kPkB38JYkoI0hFzofesvvKg8Nx4hEg6KBL3t7kJPqdJgm\ntZHwhg3EwzKIRCY+wHCpYy0ouX/MAfKjWBI9uAYhTB5nbJEqxFNo18uK40PoWRMt\nNSralCrRcIb5z/Kl5WTsXN6DHJFTDwsGHF41LWrnCXRNM/XfzJvE7X0VlcjbL/A9\n0cxfEuTb7k2xhJNBbizCvdw92mmDemFxq9PO63966nnOIjwaX3m4G1yg3nsUGeTY\nVmpZAoHBAMwwBaj9x29Cxjn1Dy4rF5GiNErO5JY9mZnroa4I43zbYuXww5mWTpHs\nORntM1XtKt2E5KeW9fBUdJPWh4epnh2cyFmodSxaEk35wfnEGnMPlwHTgZvUE7kz\n6VXqDEGK4mYmzof6edbpg+tejx8SsK6Pwrt1Moj9evGs+I38oK2IRJchIS7Ur+qj\njLS13Z1mxLmAquDsmzAVgEL13rgUsBsiV+ghEEpvPS8gb9yZmpTiKD07JKml18VK\n06FsF9VAgwKBwBT/O2phD9pCJ/Q9hj77nIHqX+G69j3103MGCzD+iBH/lR5+RBZH\nEpOenE7+T8Rps0PGSINaFBkBz1M9h1MpS/qNduZktmypi8RgpODnd9xDBh6NvQnC\nv68I7XV8++M+kEeNRyw0Yf0SF/hsT1AAFi+VdlJGgI5SnwwN4Y8uIOJ+ish2h07O\nNekX3DPhK0cCPP6GDcZV/US/LGXafF3muXI08E7v3uVMbTijubhwVtsfrp7TIosi\nGt/am/N31IQaYQKBwAmUmz9hoPDsfiaMBAlThkiUBsYXzQvrmgBp2O00h4/9LzfA\nwzy6m7cnEUrRIV5/wUohiST/5UxAejPRlgxcfgm/qHrkd5L8Ku2zsVFJzT/m1FwG\nk4c/PSmscN9SGv8cSCEo4vnoW70kucbaafa4Rsf6ANYQ2q0oz5L1XbgzyUo7IZTB\nvi/XVOW6hMiZ2+sdvk9B5UKmd2WbLKh3ptqWRekQBHXkz0He1E0YxYbhQiqILgEp\nfD/lgylDqIhjbP7ZhwKBwFYIBmEYEs/+ixHkHGbzE4tP+VhgzQ7we1+xyqN7Z69e\nf7StEuWWIZ5Os9JpdhZmN/9qb4kzL+Pb0vELuNHLwv08MnJ792wA9dPeCuYLKKiK\nUBuBwnslBy69tCRSXQ8ltH3kX67lxCu28hDRg+oN83bEczPJ2I/x7BKDrWNYZvrq\nJw7Ijp5l5gLqtdb7eOI8mSs5F7yaUKR6yVhl3ifc6bQUmmGmmElobpCpcGqAmXSj\noAeJCZug3MWS2fxhluthxQ==\n-----END PRIVATE KEY-----\n", 8 | "public": "-----BEGIN PUBLIC KEY-----\nMIIBojANBgkqhkiG9w0BAQEFAAOCAY8AMIIBigKCAYEAv3tIzVQQgfKcSsD5iVJb\nrOqvpikPUA9HrD/08wWek8CojJTLpMIdBQmofPFE3//VTQxL4MxYa3GxHNDtGSss\nEhzRThKuqJ5t0FkapzL12S5U8oSAWUbiwfFbd1h9wLb+gcGy0YBbk+kX8aKThqbZ\njULDjE+M/ZQdp9iYkwSbIO/ogH8OmZbbgUiIwvP+X5XJmJJE0BRB1Cd0WDk45ak2\nO8vCeOuKr14ECFBP5Ase56eKXb1nN2X/ZdYYL+Tpf3rHKa/DKNgoOUIY4LS8weHp\nsfHX56FdaHO+xeFxBY+UswWaoCsv6VecsvFLSejQwRMBuHCuZF1xcrt9anPl2M4A\n9knZhtxar8uSJMI/EsfNs68AVhDPHrRMuwp4smYu/kq/RuDXxbwRvDjkNlxjH55f\nHaBBWEbVXY1NKN1Up9+wEglvF7B4Hf2aG7upPbDCnVNYjaxI5IkhkLZWw7KvBjLq\nvLGkmz8kVDUDu5hUAtheiuPShoMaZ+HeuRtMEDDJ26uLAgMBAAE=\n-----END PUBLIC KEY-----\n" 9 | } 10 | }, 11 | "object_storage": { 12 | "directory": "test/data/basebackup_delta/chunks", 13 | "storage_type": "local" 14 | }, 15 | "prefix": "1052a492-1a01-459d-a126-9db8518724c0/f73f56ee-6b9f-4ce0-b7aa-a170d58da833" 16 | } 17 | }, 18 | "compression": { 19 | "algorithm": "snappy" 20 | }, 21 | "log_level": "INFO", 22 | "restore_process_count": 2 23 | } 24 | -------------------------------------------------------------------------------- /test/data/basebackup_one_chunk/chunks/00000002.pghoard: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aiven-Open/pghoard/2e66fbd28e799d90dc850597a0dde117d363325b/test/data/basebackup_one_chunk/chunks/00000002.pghoard -------------------------------------------------------------------------------- /test/data/basebackup_one_chunk/chunks/00000002.pghoard.metadata: -------------------------------------------------------------------------------- 1 | {"_hash": "abc", "compression-algorithm": "snappy", "encryption-key-id": "5ba999de817c49a682ffed124abf9a2e", "format": "pghoard-bb-v2", "original-file-size": "20480"} 2 | -------------------------------------------------------------------------------- /test/data/basebackup_one_chunk/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "backup_sites": { 3 | "f73f56ee-6b9f-4ce0-b7aa-a170d58da833": { 4 | "encryption_key_id": "5ba999de817c49a682ffed124abf9a2e", 5 | "encryption_keys": { 6 | "5ba999de817c49a682ffed124abf9a2e": { 7 | "private": "-----BEGIN PRIVATE KEY-----\nMIIG/AIBADANBgkqhkiG9w0BAQEFAASCBuYwggbiAgEAAoIBgQC/e0jNVBCB8pxK\nwPmJUlus6q+mKQ9QD0esP/TzBZ6TwKiMlMukwh0FCah88UTf/9VNDEvgzFhrcbEc\n0O0ZKywSHNFOEq6onm3QWRqnMvXZLlTyhIBZRuLB8Vt3WH3Atv6BwbLRgFuT6Rfx\nopOGptmNQsOMT4z9lB2n2JiTBJsg7+iAfw6ZltuBSIjC8/5flcmYkkTQFEHUJ3RY\nOTjlqTY7y8J464qvXgQIUE/kCx7np4pdvWc3Zf9l1hgv5Ol/escpr8Mo2Cg5Qhjg\ntLzB4emx8dfnoV1oc77F4XEFj5SzBZqgKy/pV5yy8UtJ6NDBEwG4cK5kXXFyu31q\nc+XYzgD2SdmG3Fqvy5Ikwj8Sx82zrwBWEM8etEy7CniyZi7+Sr9G4NfFvBG8OOQ2\nXGMfnl8doEFYRtVdjU0o3VSn37ASCW8XsHgd/Zobu6k9sMKdU1iNrEjkiSGQtlbD\nsq8GMuq8saSbPyRUNQO7mFQC2F6K49KGgxpn4d65G0wQMMnbq4sCAwEAAQKCAYAK\nYYOr5g/TC7UfdGDS6g0gTcTiDD3RSFLJato7xqU3O22n2XVE5GUwXbqts2LZhgQp\nXi5K7KkqggppFoaUI7wK61cJlYe0iopHjl0cjW24rYNbdoWC0Y3/l7cuvDRtGz6n\nCDpKk1vjo/JxXjADT85hkyoI1FM/eCU3cU2sQsaqPXdsZ/cBqqUR2D3Z2+KBihxY\n0i063q5G8zCii8+i286d5UkQxyxIn582WCxMn7G4O2QL+vW6kiQLgFTlW9Kw35YO\nfbM04zUmpvZpjCJuqLDw/2x7/sJn17vzQ+LsdBz/JXaLEidkI4Tr4caWqSjJZVBK\nb5LzeOrqCBcsppXUhN+4yvqCAtK0iUCLbFwPuqqmv3Ly2GpWQBPEgozdaXZOOGzc\ne7M5xCDH5bQRR/3e6Md9fHl/ATKD5eSun+7dyjNq8FJHeUAEKS2okd5oFmTD4mkZ\nxV1GTHTLzUy0qeyiUoGCxnCnS8q9rx2Os4j4/Y+aQI6xzUWYK+Zb57o9w7kIirEC\ngcEA8BHiDzTM4HgmKJ5kPkB38JYkoI0hFzofesvvKg8Nx4hEg6KBL3t7kJPqdJgm\ntZHwhg3EwzKIRCY+wHCpYy0ouX/MAfKjWBI9uAYhTB5nbJEqxFNo18uK40PoWRMt\nNSralCrRcIb5z/Kl5WTsXN6DHJFTDwsGHF41LWrnCXRNM/XfzJvE7X0VlcjbL/A9\n0cxfEuTb7k2xhJNBbizCvdw92mmDemFxq9PO63966nnOIjwaX3m4G1yg3nsUGeTY\nVmpZAoHBAMwwBaj9x29Cxjn1Dy4rF5GiNErO5JY9mZnroa4I43zbYuXww5mWTpHs\nORntM1XtKt2E5KeW9fBUdJPWh4epnh2cyFmodSxaEk35wfnEGnMPlwHTgZvUE7kz\n6VXqDEGK4mYmzof6edbpg+tejx8SsK6Pwrt1Moj9evGs+I38oK2IRJchIS7Ur+qj\njLS13Z1mxLmAquDsmzAVgEL13rgUsBsiV+ghEEpvPS8gb9yZmpTiKD07JKml18VK\n06FsF9VAgwKBwBT/O2phD9pCJ/Q9hj77nIHqX+G69j3103MGCzD+iBH/lR5+RBZH\nEpOenE7+T8Rps0PGSINaFBkBz1M9h1MpS/qNduZktmypi8RgpODnd9xDBh6NvQnC\nv68I7XV8++M+kEeNRyw0Yf0SF/hsT1AAFi+VdlJGgI5SnwwN4Y8uIOJ+ish2h07O\nNekX3DPhK0cCPP6GDcZV/US/LGXafF3muXI08E7v3uVMbTijubhwVtsfrp7TIosi\nGt/am/N31IQaYQKBwAmUmz9hoPDsfiaMBAlThkiUBsYXzQvrmgBp2O00h4/9LzfA\nwzy6m7cnEUrRIV5/wUohiST/5UxAejPRlgxcfgm/qHrkd5L8Ku2zsVFJzT/m1FwG\nk4c/PSmscN9SGv8cSCEo4vnoW70kucbaafa4Rsf6ANYQ2q0oz5L1XbgzyUo7IZTB\nvi/XVOW6hMiZ2+sdvk9B5UKmd2WbLKh3ptqWRekQBHXkz0He1E0YxYbhQiqILgEp\nfD/lgylDqIhjbP7ZhwKBwFYIBmEYEs/+ixHkHGbzE4tP+VhgzQ7we1+xyqN7Z69e\nf7StEuWWIZ5Os9JpdhZmN/9qb4kzL+Pb0vELuNHLwv08MnJ792wA9dPeCuYLKKiK\nUBuBwnslBy69tCRSXQ8ltH3kX67lxCu28hDRg+oN83bEczPJ2I/x7BKDrWNYZvrq\nJw7Ijp5l5gLqtdb7eOI8mSs5F7yaUKR6yVhl3ifc6bQUmmGmmElobpCpcGqAmXSj\noAeJCZug3MWS2fxhluthxQ==\n-----END PRIVATE KEY-----\n", 8 | "public": "-----BEGIN PUBLIC KEY-----\nMIIBojANBgkqhkiG9w0BAQEFAAOCAY8AMIIBigKCAYEAv3tIzVQQgfKcSsD5iVJb\nrOqvpikPUA9HrD/08wWek8CojJTLpMIdBQmofPFE3//VTQxL4MxYa3GxHNDtGSss\nEhzRThKuqJ5t0FkapzL12S5U8oSAWUbiwfFbd1h9wLb+gcGy0YBbk+kX8aKThqbZ\njULDjE+M/ZQdp9iYkwSbIO/ogH8OmZbbgUiIwvP+X5XJmJJE0BRB1Cd0WDk45ak2\nO8vCeOuKr14ECFBP5Ase56eKXb1nN2X/ZdYYL+Tpf3rHKa/DKNgoOUIY4LS8weHp\nsfHX56FdaHO+xeFxBY+UswWaoCsv6VecsvFLSejQwRMBuHCuZF1xcrt9anPl2M4A\n9knZhtxar8uSJMI/EsfNs68AVhDPHrRMuwp4smYu/kq/RuDXxbwRvDjkNlxjH55f\nHaBBWEbVXY1NKN1Up9+wEglvF7B4Hf2aG7upPbDCnVNYjaxI5IkhkLZWw7KvBjLq\nvLGkmz8kVDUDu5hUAtheiuPShoMaZ+HeuRtMEDDJ26uLAgMBAAE=\n-----END PUBLIC KEY-----\n" 9 | } 10 | }, 11 | "object_storage": { 12 | "directory": "test/data/basebackup/chunks", 13 | "storage_type": "local" 14 | }, 15 | "prefix": "1052a492-1a01-459d-a126-9db8518724c0/f73f56ee-6b9f-4ce0-b7aa-a170d58da833" 16 | } 17 | }, 18 | "compression": { 19 | "algorithm": "snappy" 20 | }, 21 | "log_level": "INFO", 22 | "restore_process_count": 2 23 | } 24 | -------------------------------------------------------------------------------- /test/data/basebackup_with_ts/chunks/chunk_2018-04-23_2__2018-04-23_2.00000570.pghoard: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aiven-Open/pghoard/2e66fbd28e799d90dc850597a0dde117d363325b/test/data/basebackup_with_ts/chunks/chunk_2018-04-23_2__2018-04-23_2.00000570.pghoard -------------------------------------------------------------------------------- /test/data/basebackup_with_ts/chunks/chunk_2018-04-23_2__2018-04-23_2.00000570.pghoard.metadata: -------------------------------------------------------------------------------- 1 | {"_hash": "abc", "compression-algorithm": "snappy", "encryption-key-id": "517ba091076547cfaba7d4655b7254c8", "format": "pghoard-bb-v2", "original-file-size": "20480"} -------------------------------------------------------------------------------- /test/data/basebackup_with_ts/chunks/chunk_2018-04-23_2__2018-04-23_2.00000572.pghoard: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aiven-Open/pghoard/2e66fbd28e799d90dc850597a0dde117d363325b/test/data/basebackup_with_ts/chunks/chunk_2018-04-23_2__2018-04-23_2.00000572.pghoard -------------------------------------------------------------------------------- /test/data/basebackup_with_ts/chunks/chunk_2018-04-23_2__2018-04-23_2.00000572.pghoard.metadata: -------------------------------------------------------------------------------- 1 | {"_hash": "abc", "compression-algorithm": "snappy", "encryption-key-id": "517ba091076547cfaba7d4655b7254c8", "format": "pghoard-bb-v2", "original-file-size": "30720"} -------------------------------------------------------------------------------- /test/data/basebackup_with_ts/chunks/chunk_2018-04-23_2__2018-04-23_2.00000573.pghoard: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aiven-Open/pghoard/2e66fbd28e799d90dc850597a0dde117d363325b/test/data/basebackup_with_ts/chunks/chunk_2018-04-23_2__2018-04-23_2.00000573.pghoard -------------------------------------------------------------------------------- /test/data/basebackup_with_ts/chunks/chunk_2018-04-23_2__2018-04-23_2.00000573.pghoard.metadata: -------------------------------------------------------------------------------- 1 | {"_hash": "abc", "compression-algorithm": "snappy", "encryption-key-id": "517ba091076547cfaba7d4655b7254c8", "format": "pghoard-bb-v2", "original-file-size": "10240"} -------------------------------------------------------------------------------- /test/data/basebackup_with_ts/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "backup_sites": { 3 | "96be37ae-75d8-48ca-92e2-53f838d22f1d": { 4 | "encryption_key_id": "517ba091076547cfaba7d4655b7254c8", 5 | "encryption_keys": { 6 | "517ba091076547cfaba7d4655b7254c8": { 7 | "private": "-----BEGIN PRIVATE KEY-----\nMIIG/QIBADANBgkqhkiG9w0BAQEFAASCBucwggbjAgEAAoIBgQC3wmIt8xOYwMqA\ni4vgBsEKq4AZ4NtJhNXxniQw1ccWatCwErjz4WkQnUBz2aQnl72uHVTYSagqUFDK\n76nbVg/iIPXI6cPOJTR6T1of1Tuy2nXEUR3pbJ44W/qcNoYURDLN6rR5/bTrbum/\nivrn2IzFAgBaNlEScHDC7GHyqBAr9f0NoEfsCoEntigDhwyB0deOn0c+LY1ckt+p\njoiefgWQYXjPa4e9y8h6e5sWCL2juAcFy90nsvbzyAGemG4ra/YyUpVFMjMF2DEv\nmORhZjA+DMEbN4qdK6Ee/kV7rsHj0S8JHAZpQQbQDUtlATFqFNi1Llazg8it56Vx\nu/CowaDumk0ec1osy68jLaVN75vFCjCL2nUBjGdgx3kAaTe/wCHJSmep7hwwg1uI\nFkXDW9y04lbGytaDK6LTDVsE++Lvfwri8aMH/6KMEPnFN0WTk7M7M5OzFJ2BWB20\n9agKjIde38D1lpHJsW4cdASdmpPl21SXxZoPqK81xkbivtrQwMECAwEAAQKCAYB2\no4FPuUhFrvMO2wmLsdCgWulxy8LITIL0Y+1puhu3VW82vl+61SVIHL6oDTP2e/ly\nmG1W1vdDmmcPHnzDp2TJ0g3dkU9hoJOpA+Jj+v94EaNHBDgTpJTiPkAm6uzV2NIU\nMBwlRq+v8QPpLomfdigsaYZguADPrtiegI8cY3VNbJvcWEcXFhEc2nYrEN36Usig\nxCTy9+X0hr7wcZ2y9hoRhVzCTeJtn0voCDWwnCQTt5SRIXW/U8Qs0Ex7YF6AmkpY\n2WukfpJjDSanmxPIUcHVYlEOLIURCQFkaBYXeT8ZGy9PtAqQZCx4o1lL0bK0GXgR\n1iIKg/XjCLDAmO55Ri2Vmv+L3wkyZKsSVUr0fKh3tD8ZQBlGv1BFc9UXSYa3nM8j\nQaCA60fEfIep872pXaXcrnA6SBFO3YEGbbHatRPH3yEWLMW1Qy0ksWq9W/vOVamf\ngh9nOUWA63DdplMqM2cVO8MKe14h8atrBvatVlv3we4l4oUgY3c/tM9gzyjR/6EC\ngcEA5d5UuZGkSv7CIT/6iFg8kPfqsXHZZKopYgakkPqgELK8Ru+dO/mlS8mRDULj\nSJaLxIT1s79Sbk2SGQ418VNBU+S638KGH4OXcSBicLZGLR+FtqXyp4ND50XM1nC9\n8nCGzpvmsSWWiDoZbjKWjR9Ut52ircPajYERwq9+3ZuRGlHUM2iShugs10ciW2q+\nou7RsZyrUcbN9d2DDlRLAUkiIXQrkN9dPLcy7tcWPr1sQLVqumP4BLDfFwCMUJ0h\ngv9LAoHBAMymLRmstpqE9h5C3rHdDW5sdIpo+2+yMbcyzz2SjK7NqYhP+QzbXans\nNGsidWYBfnvtEgCmmGDQgVttaIQYtU1PdaVcVegfHHHqnxu2twA+XLoCAp/5OFsJ\nYJbJlgOAEE5dW9TxUjUDoWoesHB+K2GtXjpHJN223Iw8aYjdcuj0oB9PMz6to0Ds\nBFaK4vBYQqZRXDLhy+bCkU16ZQT20LB0lbHIJHLwLe+YXvoPSrMmKxBYUk58Vz2I\nHj7jd/QcowKBwD/8krMRQp2Jqvpcd93qSfnEuTTpzy0BXQGnfbnJujLLvjM00hz2\n0jd1hYYef2weVAvBQy/k6wo/V5LX7NYfacjzm71bxcAR/l8EN8LOSlyXML52FUTh\n+k3vlSSAP6+z+SKcLSxUnUGQgrG1LUldVlTCcZ1TLoxDSHFcLsF7Y3njG2GwE1Gw\nYYMPjxXJcgWi2wZI6TubTovJPIkJl9NkyzZqwxHRy39yM6BeiRev3OlqVqpJ16le\nnsslYOmzw7dWXwKBwF7c7O0I1YBc/+VZI/eYu0AjgAYk0eCd3lPIdq5Y/2OMK/jm\n8VBXY1yABHBhAcBuEt6JLx4kG2BNZpkl5xvMhEY4/hTP73Zw7XOUyXpEQSIYxkzm\n36aeFNSGTUK4YRRwSZ65wfa8glnWu/0ACtOqeJuHjLVBA0LPH/7VhioOVNTNiGfb\nA6sbPCC9BunC6Z11nDeSYUWoyybFrCsZADjD3ErfUACgAN1SnkiA/g22Bu9yUuX5\nuvVsx9fbAAIw2QrMNwKBwQCWjA99l8gZu3cZhuGgIrtZp+Y4M9O4pvvG4AlmLTL0\nysFfQ262hftQdJNoNaCgy+2+zw9uS9sSo9/bkfJXpkrvT1pq+sS3mqw9jnB0ytrB\nTruAF0aVy3ph96aghjAQkBGJqs0RDM5cB6MPgpo5vxk9bCWr8lwH4UzTGyp54MSE\nE96MxKlL5P3t7ISdPjD7IaZ0GYmRiLpTfNVz6zbzrTBXv4XNBC7/sVMckW9aTIty\nJA2XB0H3xyLWJcy0v3MSyRY=\n-----END PRIVATE KEY-----\n", 8 | "public": "-----BEGIN PUBLIC KEY-----\nMIIBojANBgkqhkiG9w0BAQEFAAOCAY8AMIIBigKCAYEAt8JiLfMTmMDKgIuL4AbB\nCquAGeDbSYTV8Z4kMNXHFmrQsBK48+FpEJ1Ac9mkJ5e9rh1U2EmoKlBQyu+p21YP\n4iD1yOnDziU0ek9aH9U7stp1xFEd6WyeOFv6nDaGFEQyzeq0ef20627pv4r659iM\nxQIAWjZREnBwwuxh8qgQK/X9DaBH7AqBJ7YoA4cMgdHXjp9HPi2NXJLfqY6Inn4F\nkGF4z2uHvcvIenubFgi9o7gHBcvdJ7L288gBnphuK2v2MlKVRTIzBdgxL5jkYWYw\nPgzBGzeKnSuhHv5Fe67B49EvCRwGaUEG0A1LZQExahTYtS5Ws4PIreelcbvwqMGg\n7ppNHnNaLMuvIy2lTe+bxQowi9p1AYxnYMd5AGk3v8AhyUpnqe4cMINbiBZFw1vc\ntOJWxsrWgyui0w1bBPvi738K4vGjB/+ijBD5xTdFk5OzOzOTsxSdgVgdtPWoCoyH\nXt/A9ZaRybFuHHQEnZqT5dtUl8WaD6ivNcZG4r7a0MDBAgMBAAE=\n-----END PUBLIC KEY-----\n" 9 | } 10 | }, 11 | "object_storage": { 12 | "directory": "test/data/basebackup_with_ts/chunks", 13 | "storage_type": "local" 14 | }, 15 | "prefix": "2332f272-198b-46e7-bd34-ef603d04d460/96be37ae-75d8-48ca-92e2-53f838d22f1d" 16 | } 17 | }, 18 | "compression": { 19 | "algorithm": "snappy" 20 | }, 21 | "log_level": "INFO", 22 | "restore_process_count": 2 23 | } 24 | -------------------------------------------------------------------------------- /test/monitoring/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aiven-Open/pghoard/2e66fbd28e799d90dc850597a0dde117d363325b/test/monitoring/__init__.py -------------------------------------------------------------------------------- /test/monitoring/conftest.py: -------------------------------------------------------------------------------- 1 | import dataclasses 2 | import http 3 | import http.server 4 | import selectors 5 | import socket 6 | from concurrent.futures import ThreadPoolExecutor 7 | from types import TracebackType 8 | from typing import Callable, Iterator, Type 9 | 10 | import pytest 11 | 12 | 13 | @dataclasses.dataclass(frozen=True) 14 | class LoggedRequest: 15 | path: str 16 | body: bytes 17 | 18 | 19 | class LoggingServer(http.server.HTTPServer): 20 | def __init__(self, *args, **kwargs) -> None: 21 | super().__init__(*args, **kwargs) 22 | self.requests: list[LoggedRequest] = [] 23 | 24 | 25 | class LoggingRequestHandler(http.server.BaseHTTPRequestHandler): 26 | server: LoggingServer 27 | 28 | def do_POST(self) -> None: 29 | request_body_size = int(self.headers["Content-Length"]) 30 | request_body = self.rfile.read(request_body_size) 31 | self.server.requests.append(LoggedRequest(path=self.path, body=request_body)) 32 | self.send_response(http.HTTPStatus.FOUND) 33 | self.end_headers() 34 | 35 | 36 | @pytest.fixture(scope="module", name="shared_logging_server") 37 | def fixture_shared_logging_server(get_available_port: Callable[[], int]) -> Iterator[LoggingServer]: 38 | server_address = ("localhost", get_available_port()) 39 | server = LoggingServer(server_address, RequestHandlerClass=LoggingRequestHandler, bind_and_activate=True) 40 | with ThreadPoolExecutor(max_workers=1) as executor: 41 | executor.submit(server.serve_forever) 42 | try: 43 | yield server 44 | finally: 45 | server.shutdown() 46 | 47 | 48 | @pytest.fixture(name="logging_server") 49 | def fixture_logging_server(shared_logging_server: LoggingServer) -> Iterator[LoggingServer]: 50 | try: 51 | yield shared_logging_server 52 | finally: 53 | shared_logging_server.requests.clear() 54 | 55 | 56 | class UdpServer: 57 | def __init__(self, port: int) -> None: 58 | self.port = port 59 | self.socket = socket.socket(family=socket.AF_INET, type=socket.SOCK_DGRAM) 60 | 61 | def __enter__(self) -> "UdpServer": 62 | self.socket.bind(("localhost", self.port)) 63 | return self 64 | 65 | def __exit__(self, exc_type: Type, exc_val: BaseException, exc_tb: TracebackType) -> None: 66 | self.socket.close() 67 | 68 | def has_message(self) -> bool: 69 | selector = selectors.DefaultSelector() 70 | selector.register(self.socket, selectors.EVENT_READ) 71 | try: 72 | return len(selector.select(timeout=-1)) > 0 73 | finally: 74 | selector.unregister(self.socket) 75 | 76 | def get_message(self) -> str: 77 | return self.socket.recv(2048).decode() 78 | -------------------------------------------------------------------------------- /test/monitoring/test_prometheus.py: -------------------------------------------------------------------------------- 1 | import freezegun 2 | 3 | from pghoard.monitoring.prometheus import PrometheusClient 4 | 5 | 6 | @freezegun.freeze_time("2022-01-02 03:04:05") 7 | def test_gauge() -> None: 8 | client = PrometheusClient(config={}) 9 | client.gauge("something", 123.456) 10 | assert client.get_metrics() == ["something{} 123.456 1641092645000"] 11 | 12 | 13 | @freezegun.freeze_time("2022-01-02 03:04:05") 14 | def test_increase() -> None: 15 | client = PrometheusClient(config={}) 16 | client.increase("something") 17 | assert client.get_metrics() == ["something{} 1 1641092645000"] 18 | 19 | 20 | @freezegun.freeze_time("2022-01-02 03:04:05") 21 | def test_custom_increase_value() -> None: 22 | client = PrometheusClient(config={}) 23 | client.increase("something", inc_value=10) 24 | assert client.get_metrics() == ["something{} 10 1641092645000"] 25 | 26 | 27 | @freezegun.freeze_time("2022-01-02 03:04:05") 28 | def test_unexpected_exception() -> None: 29 | client = PrometheusClient(config={}) 30 | client.unexpected_exception(ValueError("hello !"), where="tests") 31 | assert client.get_metrics() == ["""pghoard_exception{exception="ValueError",where="tests"} 1 1641092645000"""] 32 | 33 | 34 | @freezegun.freeze_time("2022-01-02 03:04:05.678") 35 | def test_microseconds_are_ignored_and_truncated() -> None: 36 | client = PrometheusClient(config={}) 37 | client.gauge("something", 123.456) 38 | assert client.get_metrics() == ["something{} 123.456 1641092645000"] 39 | 40 | 41 | def test_identical_metric_overrides_previous_value() -> None: 42 | client = PrometheusClient(config={}) 43 | with freezegun.freeze_time("2022-01-02 03:04:05"): 44 | client.gauge("something", 123.456) 45 | with freezegun.freeze_time("2022-01-02 03:04:35"): 46 | client.gauge("something", 789.123) 47 | assert client.get_metrics() == ["something{} 789.123 1641092675000"] 48 | 49 | 50 | @freezegun.freeze_time("2022-01-02 03:04:05") 51 | def test_metric_with_different_names_are_separated() -> None: 52 | client = PrometheusClient(config={}) 53 | client.gauge("something", 123.456) 54 | client.gauge("else", 789.123) 55 | assert client.get_metrics() == ["something{} 123.456 1641092645000", "else{} 789.123 1641092645000"] 56 | 57 | 58 | @freezegun.freeze_time("2022-01-02 03:04:05") 59 | def test_metric_with_different_tags_are_separated() -> None: 60 | client = PrometheusClient(config={}) 61 | client.gauge("something", 123.456, tags={"mark": "one"}) 62 | client.gauge("something", 789.123, tags={"mark": "two"}) 63 | assert client.get_metrics() == [ 64 | """something{mark="one"} 123.456 1641092645000""", 65 | """something{mark="two"} 789.123 1641092645000""", 66 | ] 67 | 68 | 69 | @freezegun.freeze_time("2022-01-02 03:04:05") 70 | def test_metric_names_replaces_dots_and_dashes() -> None: 71 | client = PrometheusClient(config={}) 72 | client.gauge("a-metric.value", 123) 73 | assert client.get_metrics() == ["a_metric_value{} 123 1641092645000"] 74 | 75 | 76 | @freezegun.freeze_time("2022-01-02 03:04:05") 77 | def test_metric_can_have_tags() -> None: 78 | client = PrometheusClient(config={}) 79 | client.gauge("something", 123, tags={"foo": "bar", "baz": "tog"}) 80 | # tags are sorted 81 | assert client.get_metrics() == ["""something{baz="tog",foo="bar"} 123 1641092645000"""] 82 | 83 | 84 | @freezegun.freeze_time("2022-01-02 03:04:05") 85 | def test_metric_can_have_default_tags() -> None: 86 | client = PrometheusClient(config={"tags": {"foo": "bar"}}) 87 | client.gauge("something", 123) 88 | assert client.get_metrics() == ["""something{foo="bar"} 123 1641092645000"""] 89 | 90 | 91 | @freezegun.freeze_time("2022-01-02 03:04:05") 92 | def test_metric_custom_tags_override_defaults() -> None: 93 | client = PrometheusClient(config={"tags": {"foo": "bar", "baz": "tog"}}) 94 | client.gauge("something", 123, tags={"foo": "notbar"}) 95 | assert client.get_metrics() == ["""something{baz="tog",foo="notbar"} 123 1641092645000"""] 96 | -------------------------------------------------------------------------------- /test/monitoring/test_pushgateway.py: -------------------------------------------------------------------------------- 1 | from test.monitoring.conftest import LoggedRequest, LoggingServer 2 | from typing import List, Sequence 3 | 4 | import pytest 5 | 6 | from pghoard.monitoring.pushgateway import PushgatewayClient 7 | 8 | 9 | def get_lines(logged_requests: Sequence[LoggedRequest]) -> List[str]: 10 | return [line for logged_request in logged_requests for line in logged_request.body.decode().split("\n") if line != ""] 11 | 12 | 13 | @pytest.fixture(name="gateway_client") 14 | def fixture_gateway_client(logging_server: LoggingServer) -> PushgatewayClient: 15 | return PushgatewayClient({ 16 | "endpoint": f"http://{logging_server.server_name}:{logging_server.server_port}", 17 | "job": "test_job", 18 | "instance": "test_server", 19 | }) 20 | 21 | 22 | def test_gauge(gateway_client: PushgatewayClient, logging_server: LoggingServer) -> None: 23 | gateway_client.gauge("something", 123456) 24 | assert get_lines(logging_server.requests) == [ 25 | "# TYPE something gauge", 26 | "something{} 123456", 27 | ] 28 | 29 | 30 | def test_increase(gateway_client: PushgatewayClient, logging_server: LoggingServer) -> None: 31 | gateway_client.increase("something") 32 | assert get_lines(logging_server.requests) == [ 33 | "# TYPE something counter", 34 | "something{} 1", 35 | ] 36 | 37 | 38 | def test_custom_increase_value(gateway_client: PushgatewayClient, logging_server: LoggingServer) -> None: 39 | gateway_client.increase("something", 10) 40 | assert get_lines(logging_server.requests) == [ 41 | "# TYPE something counter", 42 | "something{} 10", 43 | ] 44 | 45 | 46 | def test_unexpected_exception(gateway_client: PushgatewayClient, logging_server: LoggingServer) -> None: 47 | gateway_client.unexpected_exception(ValueError("hello !"), where="tests") 48 | assert get_lines(logging_server.requests) == [ 49 | "# TYPE pghoard_exception counter", 50 | """pghoard_exception{exception="ValueError",where="tests"} 1""", 51 | ] 52 | 53 | 54 | def test_identical_metric_follows_previous_value(gateway_client: PushgatewayClient, logging_server: LoggingServer) -> None: 55 | gateway_client.gauge("something", 123.456) 56 | gateway_client.gauge("something", 789.123) 57 | assert get_lines(logging_server.requests) == [ 58 | "# TYPE something gauge", 59 | "something{} 123.456", 60 | "# TYPE something gauge", 61 | "something{} 789.123", 62 | ] 63 | 64 | 65 | def test_metric_with_different_names_are_separated(gateway_client: PushgatewayClient, logging_server: LoggingServer) -> None: 66 | gateway_client.gauge("something", 123.456) 67 | gateway_client.gauge("else", 789.123) 68 | assert get_lines(logging_server.requests) == [ 69 | "# TYPE something gauge", 70 | "something{} 123.456", 71 | "# TYPE else gauge", 72 | "else{} 789.123", 73 | ] 74 | 75 | 76 | def test_metric_with_different_tags_are_separated(gateway_client: PushgatewayClient, logging_server: LoggingServer) -> None: 77 | gateway_client.gauge("something", 123.456, tags={"mark": "one"}) 78 | gateway_client.gauge("something", 789.123, tags={"mark": "two"}) 79 | assert get_lines(logging_server.requests) == [ 80 | "# TYPE something gauge", 81 | """something{mark="one"} 123.456""", 82 | "# TYPE something gauge", 83 | """something{mark="two"} 789.123""", 84 | ] 85 | 86 | 87 | def test_metric_names_replaces_dots_and_dashes(gateway_client: PushgatewayClient, logging_server: LoggingServer) -> None: 88 | gateway_client.gauge("a-metric.value", 123) 89 | assert get_lines(logging_server.requests) == ["# TYPE a_metric_value gauge", "a_metric_value{} 123"] 90 | 91 | 92 | def test_metric_can_have_tags(gateway_client: PushgatewayClient, logging_server: LoggingServer) -> None: 93 | gateway_client.gauge("something", 123, tags={"foo": "bar", "baz": "tog"}) 94 | # tags are sorted 95 | assert get_lines(logging_server.requests) == [ 96 | "# TYPE something gauge", 97 | """something{baz="tog",foo="bar"} 123""", 98 | ] 99 | 100 | 101 | def test_metric_can_have_default_tags(logging_server: LoggingServer) -> None: 102 | client = PushgatewayClient( 103 | config={ 104 | "endpoint": f"http://{logging_server.server_name}:{logging_server.server_port}", 105 | "job": "test_job", 106 | "instance": "test_server", 107 | "tags": { 108 | "foo": "bar" 109 | } 110 | } 111 | ) 112 | client.gauge("something", 123) 113 | assert get_lines(logging_server.requests) == ["# TYPE something gauge", """something{foo="bar"} 123"""] 114 | 115 | 116 | def test_metric_custom_tags_override_defaults(logging_server: LoggingServer) -> None: 117 | client = PushgatewayClient( 118 | config={ 119 | "endpoint": f"http://{logging_server.server_name}:{logging_server.server_port}", 120 | "job": "test_job", 121 | "instance": "test_server", 122 | "tags": { 123 | "foo": "bar", 124 | "baz": "tog" 125 | } 126 | } 127 | ) 128 | client.gauge("something", 123, tags={"foo": "notbar"}) 129 | assert get_lines(logging_server.requests) == [ 130 | "# TYPE something gauge", 131 | """something{baz="tog",foo="notbar"} 123""", 132 | ] 133 | 134 | 135 | def test_empty_endpoints_disables_monitoring(logging_server: LoggingServer) -> None: 136 | client = PushgatewayClient(config={"endpoint": ""}) 137 | client.gauge("something", 123, tags={"foo": "notbar"}) 138 | assert get_lines(logging_server.requests) == [] 139 | -------------------------------------------------------------------------------- /test/monitoring/test_statsd.py: -------------------------------------------------------------------------------- 1 | from test.monitoring.conftest import UdpServer 2 | from typing import Callable, Iterator 3 | 4 | import pytest 5 | 6 | from pghoard.monitoring.statsd import StatsClient 7 | 8 | 9 | @pytest.fixture(name="udp_server") 10 | def fixture_udp_server(get_available_port: Callable[[], int]) -> Iterator[UdpServer]: 11 | with UdpServer(port=get_available_port()) as udp_server: 12 | yield udp_server 13 | 14 | 15 | @pytest.mark.parametrize( 16 | "stats_format,expected", [ 17 | ("telegraf", "something:123.456|g"), 18 | ("datadog", "something:123.456|g"), 19 | ] 20 | ) 21 | def test_gauge(udp_server: UdpServer, stats_format: str, expected: str) -> None: 22 | client = StatsClient({"port": udp_server.port, "format": stats_format}) 23 | client.gauge("something", 123.456) 24 | assert udp_server.get_message() == expected 25 | 26 | 27 | @pytest.mark.parametrize("stats_format,expected", [ 28 | ("telegraf", "something:1|c"), 29 | ("datadog", "something:1|c"), 30 | ]) 31 | def test_increase(udp_server: UdpServer, stats_format: str, expected: str) -> None: 32 | client = StatsClient({"port": udp_server.port, "format": stats_format}) 33 | client.increase("something") 34 | assert udp_server.get_message() == expected 35 | 36 | 37 | @pytest.mark.parametrize("stats_format,expected", [ 38 | ("telegraf", "something:10|c"), 39 | ("datadog", "something:10|c"), 40 | ]) 41 | def test_custom_increase_value(udp_server: UdpServer, stats_format: str, expected: str) -> None: 42 | client = StatsClient({"port": udp_server.port, "format": stats_format}) 43 | client.increase("something", inc_value=10) 44 | assert udp_server.get_message() == expected 45 | 46 | 47 | @pytest.mark.parametrize( 48 | "stats_format,expected", [ 49 | ("telegraf", "pghoard.exception,exception=ValueError,where=tests:1|c"), 50 | ("datadog", "pghoard.exception:1|c|#exception:ValueError,where:tests"), 51 | ] 52 | ) 53 | def test_unexpected_exception(udp_server: UdpServer, stats_format: str, expected: str) -> None: 54 | client = StatsClient({"port": udp_server.port, "format": stats_format}) 55 | client.unexpected_exception(ValueError("hello !"), where="tests") 56 | assert udp_server.get_message() == expected 57 | 58 | 59 | @pytest.mark.parametrize( 60 | "stats_format,expected", [ 61 | ("telegraf", "something,baz=tog,foo=bar:123|g"), 62 | ("datadog", "something:123|g|#baz:tog,foo:bar"), 63 | ] 64 | ) 65 | def test_metric_can_have_tags(udp_server: UdpServer, stats_format: str, expected: str) -> None: 66 | client = StatsClient({"port": udp_server.port, "format": stats_format}) 67 | client.gauge("something", 123, tags={"foo": "bar", "baz": "tog"}) 68 | # tags are sorted 69 | assert udp_server.get_message() == expected 70 | 71 | 72 | def test_datadog_tag_values_can_be_none(udp_server: UdpServer) -> None: 73 | client = StatsClient({"port": udp_server.port, "format": "datadog"}) 74 | client.gauge("something", 123, tags={"foo": None, "bar": None}) 75 | assert udp_server.get_message() == "something:123|g|#bar,foo" 76 | 77 | 78 | @pytest.mark.parametrize( 79 | "stats_format,expected", [ 80 | ("telegraf", "something,baz=tog,foo=bar:123|g"), 81 | ("datadog", "something:123|g|#baz:tog,foo:bar"), 82 | ] 83 | ) 84 | def test_metric_can_have_default_tags(udp_server: UdpServer, stats_format: str, expected: str) -> None: 85 | client = StatsClient({"port": udp_server.port, "format": stats_format, "tags": {"foo": "bar", "baz": "tog"}}) 86 | client.gauge("something", 123) 87 | assert udp_server.get_message() == expected 88 | 89 | 90 | @pytest.mark.parametrize( 91 | "stats_format,expected", [ 92 | ("telegraf", "something,baz=tog,foo=notbar:123|g"), 93 | ("datadog", "something:123|g|#baz:tog,foo:notbar"), 94 | ] 95 | ) 96 | def test_metric_custom_tags_override_defaults(udp_server: UdpServer, stats_format: str, expected: str) -> None: 97 | client = StatsClient({"port": udp_server.port, "format": stats_format, "tags": {"foo": "bar", "baz": "tog"}}) 98 | client.gauge("something", 123, tags={"foo": "notbar"}) 99 | assert udp_server.get_message() == expected 100 | 101 | 102 | def test_none_host_disables_monitoring(udp_server: UdpServer) -> None: 103 | client = StatsClient({"port": udp_server.port, "host": None}) 104 | client.gauge("something", 123, tags={"foo": "notbar"}) 105 | assert not udp_server.has_message() 106 | 107 | 108 | def test_none_port_disables_monitoring(udp_server: UdpServer) -> None: 109 | client = StatsClient({"port": None, "host": "localhost"}) 110 | client.gauge("something", 123, tags={"foo": "notbar"}) 111 | assert not udp_server.has_message() 112 | -------------------------------------------------------------------------------- /test/tar_failer: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This is a mock program to simulate tar race conditions failure 3 | cat - > /dev/null 4 | echo "Cannot open: No such file or directory" >&2 5 | exit 2 6 | -------------------------------------------------------------------------------- /test/test_archive_cleanup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 Aiven, Helsinki, Finland. https://aiven.io/ 2 | 3 | import datetime 4 | import sys 5 | from unittest import mock 6 | 7 | import pytest 8 | 9 | from pghoard import archive_cleanup 10 | 11 | 12 | def test_missing_config_arg(): 13 | args = ["archive_cleanup", "--dry-run"] 14 | tool = archive_cleanup.ArchiveCleanup() 15 | with mock.patch.object(sys, "argv", args): 16 | assert tool.run() == 1 17 | 18 | 19 | def test_main_invalid_config(): 20 | args = ["archive_cleanup", "--config", "/etc/os-release", "--dry-run"] 21 | with mock.patch.object(sys, "argv", args): 22 | assert archive_cleanup.main() == 1 23 | 24 | 25 | def test_invalid_config(): 26 | args = ["archive_cleanup", "--config", "/etc/os-release", "--dry-run"] 27 | tool = archive_cleanup.ArchiveCleanup() 28 | with mock.patch.object(sys, "argv", args): 29 | with pytest.raises(archive_cleanup.InvalidConfigurationError): 30 | tool.run() 31 | 32 | 33 | def test_set_config(): 34 | tool = archive_cleanup.ArchiveCleanup() 35 | tool.set_config("pghoard.json", "default") 36 | assert tool.site == "default" 37 | assert "basebackup_count" in tool.backup_site 38 | assert "backup_location" in tool.config 39 | assert tool.storage is not None 40 | 41 | 42 | @pytest.mark.parametrize("dry_run", [True, False]) 43 | def test_archive_cleanup_orphaned_xlog(archive_cleaner, dry_run): 44 | archive_cleaner.archive_cleanup.archive_cleanup(dry_run=dry_run) 45 | assert (archive_cleaner.xlog_path / "000000010000000000000001").exists() is dry_run 46 | 47 | 48 | def test_archive_cleanup_missing_file(archive_cleaner): 49 | # make sure we don't err out if a file could not be found 50 | with mock.patch.object(archive_cleaner.archive_cleanup.storage, "list_iter") as list_iter: 51 | list_iter.return_value = [{ 52 | "name": "example-site/xlog/000000010000000000000001", 53 | "size": 0, 54 | "last_modified": datetime.datetime(2022, 3, 23, 18, 32, 38, 810545, tzinfo=datetime.timezone.utc), 55 | "metadata": { 56 | "start-wal-segment": "example-site/xlog/000000010000000000000002" 57 | } 58 | }] 59 | archive_cleaner.archive_cleanup.archive_cleanup(dry_run=False) 60 | 61 | 62 | def test_archive_cleanup_from_main(archive_cleaner): 63 | # one additional run, but started through main() 64 | args = ["archive_cleanup", "--config", archive_cleaner.config_path.as_posix()] 65 | with mock.patch.object(sys, "argv", args): 66 | assert archive_cleanup.main() is None 67 | 68 | assert (archive_cleaner.xlog_path / "000000010000000000000001").exists() is False 69 | -------------------------------------------------------------------------------- /test/test_config.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from unittest.mock import patch 3 | 4 | import pytest 5 | from rohmu.errors import InvalidConfigurationError 6 | 7 | import pghoard.config 8 | 9 | from .base import PGHoardTestCase 10 | 11 | 12 | def make_mock_find_pg_binary(out_command, out_version): 13 | def mock_find_pg_binary(wanted_program, versions=None, pg_bin_directory=None, check_commands=True): # pylint: disable=unused-argument 14 | return out_command, out_version 15 | 16 | return mock_find_pg_binary 17 | 18 | 19 | def make_mock_get_command_version(wanted_version_string): 20 | def mock_get_command_version(command, can_fail=True): # pylint: disable=unused-argument 21 | return wanted_version_string 22 | 23 | return mock_get_command_version 24 | 25 | 26 | class TestConfig(PGHoardTestCase): 27 | 28 | # Do not use config_template as we want only the minimum to call 29 | # fill_config_command_paths 30 | def minimal_config_template( 31 | self, pg_bin_directory=None, pg_data_directory_version=None, basebackup_path=None, receivexlog_path=None 32 | ): 33 | site_config = { 34 | "active": True, 35 | } 36 | if pg_bin_directory: 37 | site_config["pg_bin_directory"] = pg_bin_directory 38 | if pg_data_directory_version: 39 | site_config["pg_data_directory_version"] = pg_data_directory_version 40 | if basebackup_path: 41 | site_config["pg_basebackup_path"] = basebackup_path 42 | if receivexlog_path: 43 | site_config["pg_receivexlog_path"] = receivexlog_path 44 | return {"backup_sites": {self.test_site: site_config}} 45 | 46 | def test_valid_bin_directory(self, tmpdir): 47 | """ 48 | Test a valid bin directory, containing the required programs. 49 | """ 50 | for utility in ["postgres", "pg_basebackup", "pg_receivewal"]: 51 | dest_path = tmpdir / utility 52 | # Convert it to a proper Path 53 | Path(dest_path).touch() 54 | 55 | with patch("pghoard.config.get_command_version", make_mock_get_command_version("13.2")): 56 | assert self._check_all_needed_commands_found(str(tmpdir)) == "13.2" 57 | config = self.minimal_config_template(str(tmpdir)) 58 | site_config = config["backup_sites"][self.test_site] 59 | pghoard.config.fill_config_command_paths(config, self.test_site, True) 60 | assert site_config["pg_receivexlog_path"] == tmpdir / "pg_receivewal" 61 | assert site_config["pg_receivexlog_version"] == 130002 62 | assert site_config["pg_basebackup_path"] == tmpdir / "pg_basebackup" 63 | assert site_config["pg_basebackup_version"] == 130002 64 | 65 | def test_specific_pg_version(self, tmpdir): 66 | for utility in ["postgres", "pg_basebackup", "pg_receivewal"]: 67 | dest_path = tmpdir / utility 68 | # Convert it to a proper Path 69 | Path(dest_path).touch() 70 | 71 | with patch("pghoard.config.get_command_version", make_mock_get_command_version("13.2")): 72 | assert self._check_all_needed_commands_found(str(tmpdir)) == "13.2" 73 | with pytest.raises(InvalidConfigurationError): 74 | config = self.minimal_config_template(str(tmpdir), pg_data_directory_version="10") 75 | pghoard.config.fill_config_command_paths(config, self.test_site, True) 76 | config = self.minimal_config_template(str(tmpdir), pg_data_directory_version="13") 77 | pghoard.config.fill_config_command_paths(config, self.test_site, True) 78 | 79 | def test_fallback_to_path(self, tmpdir, monkeypatch): 80 | for utility in ["postgres", "pg_basebackup", "pg_receivewal"]: 81 | dest_path = tmpdir / utility 82 | # Convert it to a proper Path 83 | Path(dest_path).touch() 84 | monkeypatch.setenv("PATH", str(tmpdir)) 85 | # Add a dummy bin directory so that we don't fallback on versions 86 | # found in "well known locations" 87 | config = self.minimal_config_template("/dummy/bin/directory/") 88 | site_config = config["backup_sites"][self.test_site] 89 | with patch("pghoard.config.get_command_version", make_mock_get_command_version("13.2")): 90 | pghoard.config.fill_config_command_paths(config, self.test_site, True) 91 | assert site_config["pg_receivexlog_path"] == tmpdir / "pg_receivewal" 92 | assert site_config["pg_receivexlog_version"] == 130002 93 | assert site_config["pg_basebackup_path"] == tmpdir / "pg_basebackup" 94 | assert site_config["pg_basebackup_version"] == 130002 95 | 96 | def test_unsupported_pg_version(self, tmpdir): 97 | for utility in ["postgres", "pg_basebackup", "pg_receivewal"]: 98 | dest_path = tmpdir / utility 99 | # Convert it to a proper Path 100 | Path(dest_path).touch() 101 | 102 | with patch("pghoard.config.get_command_version", make_mock_get_command_version("8.2")): 103 | config = self.minimal_config_template(str(tmpdir)) 104 | with pytest.raises(InvalidConfigurationError): 105 | pghoard.config.fill_config_command_paths(config, self.test_site, True) 106 | -------------------------------------------------------------------------------- /test/test_create_keys.py: -------------------------------------------------------------------------------- 1 | """ 2 | pghoard - test key generation tools 3 | 4 | Copyright (c) 2016 Ohmu Ltd 5 | See LICENSE for details 6 | """ 7 | import json 8 | import os 9 | import shutil 10 | import sys 11 | from unittest import mock 12 | 13 | import pytest 14 | from rohmu.errors import InvalidConfigurationError 15 | 16 | from pghoard import create_keys 17 | 18 | 19 | def test_create_config_with_keys(): 20 | site = "foosite" 21 | key_id = "fookeyid" 22 | private, public = create_keys.create_keys(bits=1024) 23 | config = create_keys.create_config(site=site, key_id=key_id, rsa_private_key=private, rsa_public_key=public) 24 | assert config["backup_sites"][site]["encryption_key_id"] == key_id 25 | # Basically with this we just want to know we created something (912 or 916 in length) 26 | assert len(config["backup_sites"][site]["encryption_keys"][key_id]["private"]) >= 912 27 | 28 | 29 | def test_write_keys_in_old_config(tmpdir): 30 | config_template = os.path.join(os.path.dirname(__file__), "..", "pghoard.json") 31 | config_file = tmpdir.join("pghoard.json").strpath 32 | shutil.copyfile(config_template, config_file) 33 | with open(config_file, "r") as fp: 34 | config = json.load(fp) 35 | assert "default" in config["backup_sites"] 36 | assert "encryption_keys" not in config["backup_sites"]["default"] 37 | private, public = create_keys.create_keys(bits=1024) 38 | create_keys.save_keys(config_file, "default", "testkey", private, public) 39 | with pytest.raises(create_keys.CommandError) as excinfo: 40 | create_keys.save_keys(config_file, "default", "testkey", private, public) 41 | assert "already defined" in str(excinfo.value) 42 | with pytest.raises(InvalidConfigurationError) as excinfo: 43 | create_keys.save_keys(config_file, "nosite", "testkey", private, public) 44 | assert "not defined" in str(excinfo.value) 45 | 46 | 47 | def test_show_key_config_no_site(): 48 | with pytest.raises(create_keys.CommandError, match="Site must be defined if configuration file is not provided"): 49 | create_keys.show_key_config(None, "foo", "bar", "baz") 50 | 51 | 52 | def test_create_keys_main(tmp_path): 53 | config = {"backup_sites": {"default": {}}} 54 | config_file = (tmp_path / "test.json") 55 | config_file.write_text(json.dumps(config, indent=4)) 56 | 57 | args = ["create_keys", "--key-id", "foo", "--config", (tmp_path / "test.json").as_posix()] 58 | with mock.patch.object(sys, "argv", args): 59 | create_keys.main() 60 | 61 | with open(config_file.as_posix(), "r") as f: 62 | result = json.load(f) 63 | 64 | assert result["backup_sites"]["default"]["encryption_key_id"] == "foo" 65 | assert result["backup_sites"]["default"]["encryption_keys"]["foo"].keys() == {"private", "public"} 66 | -------------------------------------------------------------------------------- /test/test_gnutaremu.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 Aiven, Helsinki, Finland. https://aiven.io/ 2 | 3 | import sys 4 | from pathlib import Path 5 | from unittest import mock 6 | 7 | from pghoard import gnutaremu 8 | 9 | 10 | def test_extract(tmp_path): 11 | args = ["gnutaremu", "--directory", tmp_path.as_posix(), "--extract", "-f", (Path("test") / "test.tar").as_posix()] 12 | with mock.patch.object(sys, "argv", args): 13 | gnutaremu.main() 14 | assert (tmp_path / "foo" / "bar").is_file() 15 | assert (tmp_path / "foo" / "baz").is_file() 16 | assert (tmp_path / "foo" / "bing").is_symlink() 17 | 18 | 19 | def test_exclude(tmp_path): 20 | args = [ 21 | "gnutaremu", "--directory", 22 | tmp_path.as_posix(), "--extract", "--exclude", "bar", "-f", (Path("test") / "test.tar").as_posix() 23 | ] 24 | with mock.patch.object(sys, "argv", args): 25 | gnutaremu.main() 26 | assert {x.name for x in (tmp_path / "foo").iterdir()} == {"baz", "bing"} 27 | 28 | 29 | def test_transform(tmp_path): 30 | args = [ 31 | "gnutaremu", "--directory", 32 | tmp_path.as_posix(), "--extract", "--transform", "s%f%\\%\\\\F%", "-f", (Path("test") / "test.tar").as_posix() 33 | ] 34 | with mock.patch.object(sys, "argv", args): 35 | gnutaremu.main() 36 | transformed_path = tmp_path / "%\\Foo" 37 | assert (transformed_path / "bar").is_file() 38 | assert (transformed_path / "baz").is_file() 39 | assert (transformed_path / "bing").is_symlink() 40 | -------------------------------------------------------------------------------- /test/test_inotify.py: -------------------------------------------------------------------------------- 1 | """ 2 | pghoard 3 | 4 | Copyright (c) 2015 Ohmu Ltd 5 | See LICENSE for details 6 | """ 7 | import os 8 | import platform 9 | from queue import Queue 10 | from unittest import SkipTest 11 | 12 | import pytest 13 | from rohmu.inotify import InotifyWatcher 14 | 15 | # pylint: disable=attribute-defined-outside-init 16 | from .base import PGHoardTestCase 17 | 18 | 19 | class TestInotify(PGHoardTestCase): 20 | def setup_method(self, method): 21 | if platform.system() == "Darwin": 22 | raise SkipTest() 23 | 24 | super().setup_method(method) 25 | 26 | self.queue = Queue() 27 | self.foo_path = os.path.join(self.temp_dir, "foo") 28 | with open(self.foo_path, "w") as out: 29 | out.write("foo") 30 | self.inotify = InotifyWatcher(self.queue) 31 | self.inotify.add_watch(self.temp_dir) 32 | self.inotify.start() 33 | 34 | def teardown_method(self, method): 35 | self.inotify.running = False 36 | # NOTE: teardown_method() removes the watched dir which terminates inotify immediately 37 | super().teardown_method(method) 38 | self.inotify.join() 39 | 40 | def test_create_file(self): 41 | with open(os.path.join(self.temp_dir, "bar"), "wb") as fp: 42 | fp.write(b"jee") 43 | assert self.queue.get(timeout=1.0)["type"] == "CLOSE_WRITE" 44 | 45 | def test_delete(self): 46 | os.unlink(self.foo_path) 47 | assert self.queue.get(timeout=1.0)["type"] == "DELETE" 48 | 49 | def test_move(self): 50 | os.rename(self.foo_path, os.path.join(self.temp_dir, "foo2")) 51 | event = self.queue.get(timeout=1.0) 52 | assert event["type"] == "MOVE" 53 | assert event["src_path"] == self.foo_path 54 | assert event["full_path"] == os.path.join(self.temp_dir, "foo2") 55 | 56 | def test_invalid(self): 57 | with pytest.raises(FileNotFoundError): 58 | self.inotify.add_watch(self.temp_dir + "NA") 59 | -------------------------------------------------------------------------------- /test/test_object_store.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 Aiven, Helsinki, Finland. https://aiven.io/ 2 | import datetime 3 | from pathlib import Path 4 | 5 | from rohmu.object_storage.local import LocalTransfer 6 | 7 | from pghoard.object_store import ObjectStore 8 | 9 | 10 | def test_object_store_request_backup_preservation(tmp_path: Path) -> None: 11 | storage_dir = tmp_path / "storage" 12 | storage_dir.mkdir() 13 | storage = LocalTransfer(directory=str(storage_dir)) 14 | store = ObjectStore(storage, prefix="site_name", site=None, pgdata=str(tmp_path / "pgdata")) 15 | preserve_until = datetime.datetime(2022, 12, 18, 10, 20, 30, 123456, tzinfo=datetime.timezone.utc) 16 | request_name = store.request_backup_preservation("2022_12_10", preserve_until=preserve_until) 17 | requests = storage.list_path("site_name/preservation_request") 18 | assert len(requests) == 1 19 | assert requests[0]["name"] == f"site_name/preservation_request/{request_name}" 20 | assert requests[0]["metadata"]["preserve-backup"] == "2022_12_10" 21 | assert requests[0]["metadata"]["preserve-until"] == "2022-12-18 10:20:30.123456+00:00" 22 | 23 | 24 | def test_object_store_cancel_backup_preservation(tmp_path: Path) -> None: 25 | storage_dir = tmp_path / "storage" 26 | storage_dir.mkdir() 27 | storage = LocalTransfer(directory=str(storage_dir)) 28 | store = ObjectStore(storage, prefix="site_name", site=None, pgdata=str(tmp_path / "pgdata")) 29 | preserve_until = datetime.datetime(2022, 12, 18, 10, 20, 30, 123456, tzinfo=datetime.timezone.utc) 30 | request_name = store.request_backup_preservation("2022_12_10", preserve_until=preserve_until) 31 | store.cancel_backup_preservation(request_name) 32 | requests = storage.list_path("site_name/preservation_request") 33 | assert len(requests) == 0 34 | 35 | 36 | def test_object_store_try_request_backup_preservation_returns_none_on_failure(tmp_path: Path) -> None: 37 | storage_dir = tmp_path / "storage" 38 | storage_dir.mkdir() 39 | storage_dir.chmod(0o000) 40 | try: 41 | storage = LocalTransfer(directory=str(storage_dir)) 42 | store = ObjectStore(storage, prefix="site_name", site=None, pgdata=str(tmp_path / "pgdata")) 43 | preserve_until = datetime.datetime(2022, 12, 18, 10, 20, 30, 123456, tzinfo=datetime.timezone.utc) 44 | request_name = store.try_request_backup_preservation("2022_12_10", preserve_until=preserve_until) 45 | assert request_name is None 46 | finally: 47 | storage_dir.chmod(0o700) 48 | 49 | 50 | def test_object_store_try_cancel_backup_preservation_silently_fails(tmp_path: Path) -> None: 51 | storage_dir = tmp_path / "storage" 52 | storage_dir.mkdir() 53 | storage = LocalTransfer(directory=str(storage_dir)) 54 | store = ObjectStore(storage, prefix="site_name", site=None, pgdata=str(tmp_path / "pgdata")) 55 | preserve_until = datetime.datetime(2022, 12, 18, 10, 20, 30, 123456, tzinfo=datetime.timezone.utc) 56 | request_name = store.request_backup_preservation("2022_12_10", preserve_until=preserve_until) 57 | storage_dir.chmod(0o000) 58 | try: 59 | store.try_cancel_backup_preservation(request_name) 60 | finally: 61 | storage_dir.chmod(0o700) 62 | -------------------------------------------------------------------------------- /test/test_pgutil.py: -------------------------------------------------------------------------------- 1 | # Copied from https://github.com/ohmu/ohmu_common_py test/test_pgutil.py version 0.0.1-0-unknown-fa54b44 2 | """ 3 | pghoard - postgresql utility function tests 4 | 5 | Copyright (c) 2015 Ohmu Ltd 6 | See LICENSE for details 7 | """ 8 | 9 | from pytest import raises 10 | 11 | from pghoard.pgutil import ( 12 | create_connection_string, get_connection_info, get_connection_info_from_config_line, mask_connection_info 13 | ) 14 | 15 | 16 | def test_connection_info(): 17 | # Test connection string - do not report through bug bounty programs 18 | url = "postgres://hannu:secret@dbhost.local:5555/abc?replication=true&sslmode=foobar&sslmode=require" 19 | cs = "host=dbhost.local user='hannu' dbname='abc'\n" \ 20 | "replication=true password=secret sslmode=require port=5555" 21 | ci = { 22 | "host": "dbhost.local", 23 | "port": "5555", 24 | "user": "hannu", 25 | "password": "secret", 26 | "dbname": "abc", 27 | "replication": "true", 28 | "sslmode": "require", 29 | } 30 | assert get_connection_info(ci) == get_connection_info(cs) 31 | assert get_connection_info(ci) == get_connection_info(url) 32 | 33 | basic_cstr = "host=localhost user=os" 34 | assert create_connection_string(get_connection_info(basic_cstr)) == "host='localhost' user='os'" 35 | 36 | assert get_connection_info("foo=bar bar='\\'x'") == {"foo": "bar", "bar": "'x"} 37 | 38 | with raises(ValueError): 39 | get_connection_info("foo=bar x") 40 | with raises(ValueError): 41 | get_connection_info("foo=bar bar='x") 42 | 43 | 44 | def test_mask_connection_info(): 45 | # Test connection string - do not report through bug bounty programs 46 | url = "postgres://michael:secret@dbhost.local:5555/abc?replication=true&sslmode=foobar&sslmode=require" 47 | cs = "host=dbhost.local user='michael' dbname='abc'\n" \ 48 | "replication=true password=secret sslmode=require port=5555" 49 | ci = get_connection_info(cs) 50 | masked_url = mask_connection_info(url) 51 | masked_cs = mask_connection_info(url) 52 | masked_ci = mask_connection_info(url) 53 | assert masked_url == masked_cs 54 | assert masked_url == masked_ci 55 | assert "password" in ci # make sure we didn't modify the original dict 56 | 57 | # the return format is a connection string without password, followed by 58 | # a semicolon and comment about password presence 59 | masked_str, password_info = masked_url.split("; ", 1) 60 | assert "password" not in masked_str 61 | assert password_info == "hidden password" 62 | 63 | # remasking the masked string should yield a no password comment 64 | masked_masked = mask_connection_info(masked_str) 65 | _, masked_password_info = masked_masked.split("; ", 1) 66 | assert masked_password_info == "no password" 67 | 68 | 69 | def test_connection_info_from_config_line(): 70 | conn_info = get_connection_info_from_config_line("db1='localhost port=5432 dbname=mydb connect_timeout=10'") 71 | assert conn_info == {"localhost port": "5432", "dbname": "mydb", "connect_timeout": "10"} 72 | -------------------------------------------------------------------------------- /test/test_postgres_command.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 Aiven, Helsinki, Finland. https://aiven.io/ 2 | 3 | import sys 4 | from unittest import mock 5 | 6 | import pytest 7 | 8 | from pghoard import postgres_command 9 | 10 | 11 | def test_restore_command_error(): 12 | with mock.patch("pghoard.postgres_command.http_request", return_value=500): 13 | with pytest.raises(postgres_command.PGCError, match="Restore failed with HTTP status 500"): 14 | postgres_command.restore_command("foo", "123", "/tmp/xxx") 15 | 16 | 17 | def test_postgres_command_archive_error(): 18 | args = ["postgres_command", "--site", "foo", "--xlog", "bar", "--mode", "archive"] 19 | with mock.patch.object(sys, "argv", args): 20 | with mock.patch("pghoard.postgres_command.archive_command", side_effect=SystemExit): 21 | assert postgres_command.main() == postgres_command.EXIT_UNEXPECTED 22 | 23 | 24 | def test_postgres_command_restore_error(): 25 | args = ["postgres_command", "--site", "foo", "--xlog", "bar", "--mode", "restore"] 26 | with mock.patch.object(sys, "argv", args): 27 | with mock.patch("pghoard.postgres_command.restore_command", side_effect=SystemExit): 28 | assert postgres_command.main() == postgres_command.EXIT_ABORT 29 | 30 | 31 | def test_postgres_command_archive_pgcerror(): 32 | args = ["postgres_command", "--site", "foo", "--xlog", "bar", "--mode", "archive"] 33 | with mock.patch.object(sys, "argv", args): 34 | with mock.patch( 35 | "pghoard.postgres_command.archive_command", side_effect=postgres_command.PGCError(message="howdy", exit_code=42) 36 | ): 37 | assert postgres_command.main() == 42 38 | -------------------------------------------------------------------------------- /test/test_preservation_request.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2022 Aiven Ltd 3 | See LICENSE for details 4 | """ 5 | import datetime 6 | from typing import Any 7 | 8 | from pghoard.preservation_request import ( 9 | is_basebackup_preserved, parse_preservation_requests, patch_basebackup_metadata_with_preservation 10 | ) 11 | 12 | 13 | def test_patch_basebackup_metadata_with_preservation() -> None: 14 | preserve_until = datetime.datetime(2022, 12, 26, 10, 20, tzinfo=datetime.timezone.utc) 15 | basebackup_entry: dict[str, Any] = {"name": "2022_12_20", "metadata": {}} 16 | backups_to_preserve = {"2022_12_20": preserve_until} 17 | patch_basebackup_metadata_with_preservation(basebackup_entry, backups_to_preserve) 18 | assert basebackup_entry["metadata"]["preserve-until"] == preserve_until 19 | 20 | 21 | def test_patch_basebackup_metadata_with_preservation_with_no_match() -> None: 22 | basebackup_entry: dict[str, Any] = {"name": "2022_12_20", "metadata": {}} 23 | backups_to_preserve = {"2022_12_14": datetime.datetime(2022, 12, 20, 10, 20, 30, 123456)} 24 | patch_basebackup_metadata_with_preservation(basebackup_entry, backups_to_preserve) 25 | assert basebackup_entry["metadata"]["preserve-until"] is None 26 | 27 | 28 | def test_is_backup_preserved_no_metadata() -> None: 29 | now = datetime.datetime(2022, 12, 26, 10, 20, tzinfo=datetime.timezone.utc) 30 | basebackup_entry = {"name": "2022_12_20", "metadata": {}} 31 | assert is_basebackup_preserved(basebackup_entry, now) is False 32 | 33 | 34 | def test_is_backup_preserved_none_metadata() -> None: 35 | now = datetime.datetime(2022, 12, 26, 10, 20, tzinfo=datetime.timezone.utc) 36 | basebackup_entry = {"name": "2022_12_20", "metadata": {"preserve-until": None}} 37 | assert is_basebackup_preserved(basebackup_entry, now) is False 38 | 39 | 40 | def test_is_backup_preserved_metadata_in_past() -> None: 41 | now = datetime.datetime(2022, 12, 26, 10, 20, tzinfo=datetime.timezone.utc) 42 | preserve_until = datetime.datetime(2022, 12, 26, tzinfo=datetime.timezone.utc) 43 | basebackup_entry = {"name": "2022_12_20", "metadata": {"preserve-until": preserve_until}} 44 | assert is_basebackup_preserved(basebackup_entry, now) is False 45 | 46 | 47 | def test_is_backup_preserved_metadata_in_future() -> None: 48 | now = datetime.datetime(2022, 12, 24, tzinfo=datetime.timezone.utc) 49 | preserve_until = datetime.datetime(2022, 12, 26, tzinfo=datetime.timezone.utc) 50 | basebackup_entry = {"name": "2022_12_20", "metadata": {"preserve-until": preserve_until}} 51 | assert is_basebackup_preserved(basebackup_entry, now) is True 52 | 53 | 54 | def test_parse_preservation_requests() -> None: 55 | preservation_requests = [{ 56 | "metadata": { 57 | "preserve-backup": "2022_12_10", 58 | "preserve-until": "2022-12-18 10:20:30.123456" 59 | } 60 | }, { 61 | "metadata": { 62 | "preserve-backup": "2022_12_10", 63 | "preserve-until": "2022-12-16 10:20:30.123456" 64 | } 65 | }, { 66 | "metadata": { 67 | "preserve-backup": "2022_12_20", 68 | "preserve-until": "2022-12-26 10:20:30.123456+00:00" 69 | } 70 | }] 71 | backups_to_preserve = parse_preservation_requests(preservation_requests) 72 | assert backups_to_preserve == { 73 | "2022_12_10": datetime.datetime(2022, 12, 18, 10, 20, 30, 123456, tzinfo=datetime.timezone.utc), 74 | "2022_12_20": datetime.datetime(2022, 12, 26, 10, 20, 30, 123456, tzinfo=datetime.timezone.utc), 75 | } 76 | -------------------------------------------------------------------------------- /test/test_wal_file_deleter.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 Aiven, Helsinki, Finland. https://aiven.io/ 2 | 3 | import time 4 | from pathlib import Path 5 | from queue import Queue 6 | 7 | import mock 8 | import pytest 9 | 10 | from pghoard import metrics 11 | from pghoard.common import QuitEvent 12 | from pghoard.compressor import WALFileDeleterThread, WalFileDeletionEvent 13 | 14 | 15 | # too fool the 16 | class WALFileDeleterThreadPatched(WALFileDeleterThread): 17 | os_unlink_mock: mock.MagicMock 18 | 19 | 20 | @pytest.fixture(name="wal_file_deleter") 21 | def fixture_wal_file_deleter(mocker): 22 | deleter_queue = Queue() 23 | # speed up the tests 24 | config = {"deleter_event_get_timeout": 0.001} 25 | deleter = WALFileDeleterThread( 26 | config=config, 27 | wal_file_deletion_queue=deleter_queue, 28 | metrics=metrics.Metrics(statsd={}), 29 | ) 30 | os_unlink_mock = mock.MagicMock() 31 | mocker.patch("os.unlink", side_effect=os_unlink_mock) 32 | deleter.os_unlink_mock = os_unlink_mock 33 | deleter.start() 34 | yield deleter 35 | deleter.running = False 36 | deleter_queue.put(QuitEvent) 37 | deleter.join() 38 | 39 | 40 | def make_event(path: str, site: str = "a"): 41 | return WalFileDeletionEvent(backup_site_name=site, file_path=Path(path)) 42 | 43 | 44 | TEST_WAIT_TIME = 0.1 45 | 46 | 47 | def test_wal_file_deleter_happy_path(wal_file_deleter: WALFileDeleterThreadPatched): 48 | 49 | wal_file_deleter.wal_file_deletion_queue.put(make_event("AA000001")) 50 | time.sleep(TEST_WAIT_TIME) 51 | assert len(wal_file_deleter.to_be_deleted_files["a"]) == 1 52 | assert wal_file_deleter.to_be_deleted_files["a"] == {Path("AA000001")} 53 | wal_file_deleter.os_unlink_mock.assert_not_called() 54 | 55 | wal_file_deleter.wal_file_deletion_queue.put(make_event("AA000002")) 56 | time.sleep(TEST_WAIT_TIME) 57 | assert len(wal_file_deleter.to_be_deleted_files["a"]) == 1 58 | assert wal_file_deleter.to_be_deleted_files["a"] == {Path("AA000002")} 59 | wal_file_deleter.os_unlink_mock.assert_called_once_with(Path("AA000001")) 60 | 61 | wal_file_deleter.os_unlink_mock.reset_mock() 62 | wal_file_deleter.wal_file_deletion_queue.put(make_event("AA000001")) 63 | time.sleep(TEST_WAIT_TIME) 64 | assert len(wal_file_deleter.to_be_deleted_files["a"]) == 1 65 | assert wal_file_deleter.to_be_deleted_files["a"] == {Path("AA000002")} 66 | wal_file_deleter.os_unlink_mock.assert_called_once_with(Path("AA000001")) 67 | 68 | # Even if there are multiple files in the list, we delete all but the latest 69 | wal_file_deleter.os_unlink_mock.reset_mock() 70 | wal_file_deleter.to_be_deleted_files["a"].add(Path("AA000004")) 71 | wal_file_deleter.to_be_deleted_files["a"].add(Path("AA000003")) 72 | wal_file_deleter.wal_file_deletion_queue.put(make_event("AA000001")) 73 | time.sleep(TEST_WAIT_TIME) 74 | assert len(wal_file_deleter.to_be_deleted_files["a"]) == 1 75 | assert wal_file_deleter.to_be_deleted_files["a"] == {Path("AA000004")} 76 | assert wal_file_deleter.os_unlink_mock.call_count == 3 77 | 78 | 79 | def test_survive_problems(wal_file_deleter: WALFileDeleterThreadPatched): 80 | # Adding the same path twice will still result in that file not deleted 81 | wal_file_deleter.wal_file_deletion_queue.put(make_event("AA000001")) 82 | wal_file_deleter.wal_file_deletion_queue.put(make_event("AA000001")) 83 | time.sleep(TEST_WAIT_TIME) 84 | assert wal_file_deleter.is_alive() 85 | wal_file_deleter.os_unlink_mock.assert_not_called() 86 | assert len(wal_file_deleter.to_be_deleted_files["a"]) == 1 87 | assert wal_file_deleter.to_be_deleted_files["a"] == {Path("AA000001")} 88 | 89 | # we survive not finding the file during deletion and the to be deleted ("older") file is still removed from the queue 90 | wal_file_deleter.os_unlink_mock.side_effect = FileNotFoundError("foo") 91 | wal_file_deleter.wal_file_deletion_queue.put(make_event("AA000002")) 92 | time.sleep(TEST_WAIT_TIME) 93 | assert wal_file_deleter.is_alive() 94 | assert len(wal_file_deleter.to_be_deleted_files["a"]) == 1 95 | assert wal_file_deleter.to_be_deleted_files["a"] == {Path("AA000002")} 96 | 97 | 98 | def test_multiple_sites(wal_file_deleter: WALFileDeleterThreadPatched): 99 | 100 | # Adding the same path twice will still result in that file not deleted 101 | wal_file_deleter.wal_file_deletion_queue.put(make_event("AA000001", site="a")) 102 | wal_file_deleter.wal_file_deletion_queue.put(make_event("AA000001", site="b")) 103 | time.sleep(TEST_WAIT_TIME) 104 | assert wal_file_deleter.running 105 | wal_file_deleter.os_unlink_mock.assert_not_called() 106 | assert len(wal_file_deleter.to_be_deleted_files) == 2 107 | assert wal_file_deleter.to_be_deleted_files["a"] == {Path("AA000001")} 108 | assert wal_file_deleter.to_be_deleted_files["b"] == {Path("AA000001")} 109 | 110 | # advance one site 111 | wal_file_deleter.wal_file_deletion_queue.put(make_event("AA000002", site="a")) 112 | time.sleep(TEST_WAIT_TIME) 113 | assert wal_file_deleter.running 114 | assert wal_file_deleter.os_unlink_mock.call_count == 1 115 | assert len(wal_file_deleter.to_be_deleted_files) == 2 116 | assert wal_file_deleter.to_be_deleted_files["a"] == {Path("AA000002")} 117 | assert wal_file_deleter.to_be_deleted_files["b"] == {Path("AA000001")} 118 | 119 | # Should do nothing 120 | wal_file_deleter.wal_file_deletion_queue.put(make_event("AA000001", site="b")) 121 | time.sleep(TEST_WAIT_TIME) 122 | assert wal_file_deleter.running 123 | assert wal_file_deleter.os_unlink_mock.call_count == 1 124 | assert len(wal_file_deleter.to_be_deleted_files) == 2 125 | assert wal_file_deleter.to_be_deleted_files["a"] == {Path("AA000002")} 126 | assert wal_file_deleter.to_be_deleted_files["b"] == {Path("AA000001")} 127 | 128 | # now advance it on site b 129 | wal_file_deleter.wal_file_deletion_queue.put(make_event("AA000003", site="b")) 130 | time.sleep(TEST_WAIT_TIME) 131 | assert wal_file_deleter.running 132 | #assert wal_file_deleter.os_unlink_mock.call_count == 2 133 | assert len(wal_file_deleter.to_be_deleted_files) == 2 134 | assert wal_file_deleter.to_be_deleted_files["a"] == {Path("AA000002")} 135 | assert wal_file_deleter.to_be_deleted_files["b"] == {Path("AA000003")} 136 | 137 | wal_file_deleter.wal_file_deletion_queue.put(make_event("AA000001", site="c")) 138 | wal_file_deleter.wal_file_deletion_queue.put(make_event("AA000002", site="c")) 139 | wal_file_deleter.wal_file_deletion_queue.put(make_event("AA000003", site="c")) 140 | time.sleep(TEST_WAIT_TIME) 141 | assert wal_file_deleter.to_be_deleted_files["a"] == {Path("AA000002")} 142 | assert wal_file_deleter.to_be_deleted_files["b"] == {Path("AA000003")} 143 | assert wal_file_deleter.to_be_deleted_files["c"] == {Path("AA000003")} 144 | -------------------------------------------------------------------------------- /test/test_walreceiver.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os.path 3 | import time 4 | from unittest import mock 5 | 6 | import psycopg2 7 | import pytest 8 | 9 | from pghoard.wal import get_current_lsn 10 | 11 | from .conftest import PGHoardForTest 12 | from .util import switch_wal, wait_for_xlog 13 | 14 | 15 | def get_transfer_agent_upload_xlog_state(pghoard: PGHoardForTest): 16 | transfer_agent_state = pghoard.transfer_agent_state.get(pghoard.test_site) 17 | if transfer_agent_state is None: 18 | return {} 19 | return transfer_agent_state["upload"]["xlog"] 20 | 21 | 22 | def stop_walreceiver(pghoard: PGHoardForTest): 23 | walreceiver = pghoard.walreceivers.pop(pghoard.test_site) 24 | walreceiver.running = False 25 | walreceiver.join() 26 | return walreceiver.last_flushed_lsn 27 | 28 | 29 | class TestWalReceiver: 30 | @pytest.mark.parametrize("replication_slot", [None, "foobar"]) 31 | def test_walreceiver(self, db, pghoard_walreceiver, replication_slot): 32 | """ 33 | Test the happy-path of the wal receiver. 34 | """ 35 | log = logging.getLogger(__class__.__name__) 36 | conn = db.connect() 37 | conn.autocommit = True 38 | 39 | pghoard = pghoard_walreceiver 40 | node = pghoard.config["backup_sites"][pghoard.test_site]["nodes"][0] 41 | if "slot" in node: 42 | log.warning("using slot %s from config", node["slot"]) 43 | else: 44 | node["slot"] = replication_slot 45 | 46 | # The transfer agent state will be used to check what was uploaded 47 | # Start streaming 48 | pghoard.start_walreceiver(pghoard.test_site, node, None) 49 | # Get the initial wal name of the server 50 | pghoard.walreceivers[pghoard.test_site].initial_lsn_available.wait() 51 | wal_name = pghoard.walreceivers[pghoard.test_site].initial_lsn.walfile_name 52 | # Force a wal rotation 53 | switch_wal(conn) 54 | # Check that we uploaded one file, and it is the right one. 55 | wait_for_xlog(pghoard, 1) 56 | last_flushed_lsn = stop_walreceiver(pghoard) 57 | # Record the last flushed lsn 58 | state = get_transfer_agent_upload_xlog_state(pghoard) 59 | assert state.get("xlogs_since_basebackup") == 1 60 | assert state.get("latest_filename") == wal_name 61 | 62 | # Generate some more wal while the walreceiver is not running, 63 | # and check that we can fetch it once done using the recorded state 64 | for _ in range(3): 65 | switch_wal(conn) 66 | conn.close() 67 | # The last wal file is the previous one, as the current one is not 68 | # complete. 69 | lsn = get_current_lsn(node) 70 | previous_wal_name = lsn.previous_walfile_start_lsn.walfile_name 71 | pghoard.start_walreceiver(pghoard.test_site, node, last_flushed_lsn) 72 | wait_for_xlog(pghoard, 4) 73 | stop_walreceiver(pghoard) 74 | state = get_transfer_agent_upload_xlog_state(pghoard) 75 | assert state.get("xlogs_since_basebackup") == 4 76 | assert state.get("latest_filename") == previous_wal_name 77 | 78 | @pytest.mark.timeout(60) 79 | def test_walreceiver_database_error(self, db, pghoard_walreceiver): 80 | """Verify that we can recover from a DatabaseError exception 81 | """ 82 | 83 | # Used for monkeypatching a psycopg2 Cursor object 84 | class FakeCursor: 85 | _raised = False 86 | 87 | @classmethod 88 | @property 89 | def raised(cls): 90 | return cls._raised 91 | 92 | @classmethod 93 | def read_message(cls): 94 | cls._raised = True 95 | raise psycopg2.DatabaseError 96 | 97 | conn = db.connect() 98 | conn.autocommit = True 99 | pghoard = pghoard_walreceiver 100 | node = pghoard.config["backup_sites"][pghoard.test_site]["nodes"][0] 101 | pghoard.start_walreceiver(pghoard.test_site, node, None) 102 | # Wait for a Cursor object to be created/assigned 103 | while pghoard.walreceivers[pghoard.test_site].c is None: 104 | time.sleep(0.5) 105 | 106 | # Monkeypatch method in order to raise an exception 107 | with mock.patch.object(pghoard.walreceivers[pghoard.test_site].c, "read_message", FakeCursor.read_message): 108 | while FakeCursor.raised is False: 109 | time.sleep(0.5) 110 | 111 | switch_wal(conn) 112 | wait_for_xlog(pghoard, 1) 113 | conn.close() 114 | 115 | def test_walreceiver_multiple_timelines(self, recovery_db, pghoard_walreceiver_recovery): 116 | """As we want to fetch all timeline history files when starting up, promote a PG instance 117 | to bump the timeline and create a history file. 118 | """ 119 | recovery_db.run_cmd("pg_ctl", "-D", recovery_db.pgdata, "promote") 120 | pghoard = pghoard_walreceiver_recovery 121 | node = pghoard.config["backup_sites"][pghoard.test_site]["nodes"][0] 122 | pghoard.start_walreceiver(pghoard.test_site, node, None) 123 | with recovery_db.connect() as conn: 124 | switch_wal(conn) 125 | wait_for_xlog(pghoard, 1) 126 | storage = pghoard.get_or_create_site_storage(site=pghoard.test_site) 127 | files = storage.list_path(os.path.join("test_walreceiver_multiple_timelines", "timeline")) 128 | assert len(files) == 1 129 | assert files[0]["name"] == "test_walreceiver_multiple_timelines/timeline/00000002.history" 130 | -------------------------------------------------------------------------------- /test/test_webserver_ipv6.py: -------------------------------------------------------------------------------- 1 | """ 2 | pghoard 3 | 4 | Copyright (c) 2015 Ohmu Ltd 5 | See LICENSE for details 6 | """ 7 | import json 8 | from http.client import HTTPConnection 9 | 10 | from pytest import raises 11 | 12 | 13 | class TestIPV6WebServer: 14 | # verify that existing behaviour of listening on all IPV4 addresses if an empty http_address is 15 | # specified 16 | def test_ipv4_endpoint_with_empty_listen_address(self, pghoard_empty_listen_address): 17 | pghoard_empty_listen_address.write_backup_state_to_json_file() 18 | conn = HTTPConnection(host="127.0.0.1", port=pghoard_empty_listen_address.config["http_port"]) 19 | conn.request("GET", "/status") 20 | response = conn.getresponse() 21 | response_parsed = json.loads(response.read().decode("utf-8")) 22 | assert response.status == 200 23 | assert response_parsed["startup_time"] is not None 24 | 25 | # an empty http_address does not make pghoard listen on IPV6 26 | def test_ipv6_endpoint_fails_with_empty_listen_address(self, pghoard_empty_listen_address): 27 | pghoard_empty_listen_address.write_backup_state_to_json_file() 28 | conn = HTTPConnection(host="::1", port=pghoard_empty_listen_address.config["http_port"]) 29 | with raises(ConnectionRefusedError): 30 | conn.request("GET", "/status") 31 | 32 | def test_ipv4_endpoint_with_ipv4_hostname_listen_address(self, pghoard_ipv4_hostname): 33 | pghoard_ipv4_hostname.write_backup_state_to_json_file() 34 | conn = HTTPConnection(host="127.0.0.1", port=pghoard_ipv4_hostname.config["http_port"]) 35 | conn.request("GET", "/status") 36 | response = conn.getresponse() 37 | response_parsed = json.loads(response.read().decode("utf-8")) 38 | assert response.status == 200 39 | assert response_parsed["startup_time"] is not None 40 | 41 | # an empty http_address does not make pghoard listen on IPV6 42 | def test_ipv6_endpoint_fails_with_ipv4_hostname_listen_address(self, pghoard_ipv4_hostname): 43 | pghoard_ipv4_hostname.write_backup_state_to_json_file() 44 | conn = HTTPConnection(host="::1", port=pghoard_ipv4_hostname.config["http_port"]) 45 | with raises(ConnectionRefusedError): 46 | conn.request("GET", "/status") 47 | 48 | # a IPV6 wildcard (::) supports connecting with the IPV4 lookback address (and in fact any IPV4 interface) 49 | def test_ipv4_endpoint_with_wildcard_ipv6_listen_address(self, pghoard_ipv6_wildcard): 50 | pghoard_ipv6_wildcard.write_backup_state_to_json_file() 51 | conn = HTTPConnection(host="127.0.0.1", port=pghoard_ipv6_wildcard.config["http_port"]) 52 | conn.request("GET", "/status") 53 | response = conn.getresponse() 54 | response_parsed = json.loads(response.read().decode("utf-8")) 55 | assert response.status == 200 56 | assert response_parsed["startup_time"] is not None 57 | 58 | def test_ipv6_endpoint_with_wildcard_ipv6_listen_address(self, pghoard_ipv6_wildcard): 59 | pghoard_ipv6_wildcard.write_backup_state_to_json_file() 60 | conn = HTTPConnection(host="::1", port=pghoard_ipv6_wildcard.config["http_port"]) 61 | conn.request("GET", "/status") 62 | response = conn.getresponse() 63 | response_parsed = json.loads(response.read().decode("utf-8")) 64 | assert response.status == 200 65 | assert response_parsed["startup_time"] is not None 66 | 67 | def test_ipv6_endpoint_with_loopback_ipv6_listen_address(self, pghoard_ipv6_loopback): 68 | pghoard_ipv6_loopback.write_backup_state_to_json_file() 69 | conn = HTTPConnection(host="::1", port=pghoard_ipv6_loopback.config["http_port"]) 70 | conn.request("GET", "/status") 71 | response = conn.getresponse() 72 | response_parsed = json.loads(response.read().decode("utf-8")) 73 | assert response.status == 200 74 | assert response_parsed["startup_time"] is not None 75 | 76 | # You cannot connect to pghoard on the IPV4 loopback if it was started with the IPV6 loopback address 77 | def test_ipv4_endpoint_fails_with_loopback_ipv6_listen_address(self, pghoard_ipv6_loopback): 78 | pghoard_ipv6_loopback.write_backup_state_to_json_file() 79 | conn = HTTPConnection(host="127.0.0.1", port=pghoard_ipv6_loopback.config["http_port"]) 80 | with raises(ConnectionRefusedError): 81 | conn.request("GET", "/status") 82 | -------------------------------------------------------------------------------- /test/util.py: -------------------------------------------------------------------------------- 1 | import io 2 | import tarfile 3 | import time 4 | from pathlib import Path 5 | from typing import TYPE_CHECKING, Any, BinaryIO, Dict, Optional, Union, cast 6 | 7 | from rohmu import rohmufile 8 | from rohmu.typing import FileLike 9 | 10 | from pghoard.common import json_encode 11 | 12 | from .conftest import PGHoardForTest 13 | 14 | if TYPE_CHECKING: 15 | from tarfile import _Fileobj # pylint: disable=no-name-in-module 16 | else: 17 | _Fileobj = Any 18 | 19 | 20 | def wait_for_xlog(pghoard: PGHoardForTest, count: int): 21 | start = time.monotonic() 22 | while True: 23 | xlogs = None 24 | # At the start, this is not yet defined 25 | transfer_agent_state_for_site = pghoard.transfer_agent_state.get(pghoard.test_site) 26 | if transfer_agent_state_for_site: 27 | xlogs = transfer_agent_state_for_site["upload"]["xlog"]["xlogs_since_basebackup"] 28 | if xlogs >= count: 29 | break 30 | 31 | if time.monotonic() - start > 15: 32 | assert False, "Expected at least {} xlog uploads, got {}".format(count, xlogs) 33 | 34 | time.sleep(0.1) 35 | 36 | 37 | def switch_wal(connection): 38 | cur = connection.cursor() 39 | # Force allocating a XID, otherwise if there was no activity we will 40 | # stay on the same WAL 41 | # Note: do not combine two function call in one select, PG executes it differently and 42 | # sometimes looks like it generates less WAL files than we wanted 43 | cur.execute("SELECT txid_current()") 44 | cur.execute("CHECKPOINT") 45 | if connection.server_version >= 100000: 46 | cur.execute("SELECT pg_switch_wal()") 47 | else: 48 | cur.execute("SELECT pg_switch_xlog()") 49 | # This should fix flaky tests, which expect a specific number of WAL files which never arrive. 50 | # Quite often the last WAL would not be finalized by walreceiver unless there is some extra activity after 51 | # switching, the bug should be fixed in PG 15 52 | # https://github.com/postgres/postgres/commit/596ba75cb11173a528c6b6ec0142a282e42b69ec 53 | cur.execute("SELECT txid_current()") 54 | cur.execute("CHECKPOINT") 55 | cur.close() 56 | 57 | 58 | def dict_to_file_obj(fileobj: BinaryIO, data: Dict[str, Any], tar_name: str) -> int: 59 | """Dumps data into a compressed tar file and writes to fileobj, returns the size of the resulting file""" 60 | blob = io.BytesIO(json_encode(data, binary=True)) 61 | ti = tarfile.TarInfo(name=tar_name) 62 | ti.size = len(blob.getbuffer()) 63 | ti.mtime = int(time.time()) 64 | 65 | with rohmufile.file_writer( 66 | compression_algorithm="snappy", compression_level=0, fileobj=cast(FileLike, fileobj) 67 | ) as output_obj: 68 | with tarfile.TarFile(fileobj=cast(Optional[_Fileobj], output_obj), mode="w") as output_tar: 69 | output_tar.addfile(ti, blob) 70 | 71 | return output_obj.tell() 72 | 73 | 74 | def dict_to_tar_file(data: Dict[str, Any], file_path: Union[str, Path], tar_name: str) -> int: 75 | with open(file_path, "wb") as raw_output_obj: 76 | return dict_to_file_obj(raw_output_obj, data=data, tar_name=tar_name) 77 | 78 | 79 | def dict_to_tar_data(data: Dict[str, Any], tar_name: str) -> bytes: 80 | with io.BytesIO() as raw_output_obj: 81 | dict_to_file_obj(raw_output_obj, data=data, tar_name=tar_name) 82 | raw_output_obj.seek(0) 83 | return raw_output_obj.read() 84 | -------------------------------------------------------------------------------- /update-constraints: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | TEMPDIR="$(mktemp -d)" 3 | python -m venv "$TEMPDIR" 4 | "$TEMPDIR"/bin/pip install . 5 | "$TEMPDIR"/bin/pip install ".[dev]" 6 | "$TEMPDIR"/bin/pip freeze | grep -v "pkg-resources" | sort > constraints.txt 7 | rm -rf "$TEMPDIR" 8 | --------------------------------------------------------------------------------