├── .github ├── dependabot.yml └── workflows │ ├── ci.yml │ └── release.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE.txt ├── README.md ├── nfpm.yaml ├── partitionmanager ├── __init__.py ├── cli.py ├── cli_test.py ├── database_helpers.py ├── database_helpers_test.py ├── dropper.py ├── dropper_test.py ├── migrate.py ├── migrate_test.py ├── sql.py ├── sql_test.py ├── stats.py ├── stats_test.py ├── table_append_partition.py ├── table_append_partition_test.py ├── tools.py ├── tools_test.py ├── types.py └── types_test.py ├── pyproject.toml ├── pytest.ini └── test_tools └── fake_mariadb.sh /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # Keep GitHub Actions up to date with GitHub's Dependabot... 2 | # https://docs.github.com/en/code-security/dependabot/working-with-dependabot/keeping-your-actions-up-to-date-with-dependabot 3 | # https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file#package-ecosystem 4 | version: 2 5 | updates: 6 | - package-ecosystem: github-actions 7 | directory: / 8 | groups: 9 | github-actions: 10 | patterns: 11 | - "*" # Group all Actions updates into a single larger pull request 12 | schedule: 13 | interval: weekly 14 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Partition Manager CI 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | 7 | lint: 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - uses: actions/checkout@v4 12 | - name: Set up Python 3.9 13 | uses: actions/setup-python@v5 14 | with: 15 | python-version: 3.9 16 | 17 | - name: Install Linting Tools 18 | run: | 19 | python -m pip install --upgrade pip 20 | pip install --user pylint==3.1.0 pytest ruff validate-pyproject[all] 21 | 22 | - name: Install Partition Manager 23 | run: | 24 | pip install --editable ".[pymysql]" 25 | 26 | - name: Analysing the code with pylint 27 | run: | 28 | python -m pylint --errors-only partitionmanager 29 | 30 | - name: Lint Python code with Ruff 31 | run: | 32 | python -m ruff check --output-format=github 33 | 34 | - name: Checking format with Ruff 35 | run: | 36 | python -m ruff format --check . 37 | 38 | - name: Checking pyproject 39 | run: | 40 | validate-pyproject pyproject.toml 41 | 42 | test: 43 | runs-on: ubuntu-latest 44 | 45 | steps: 46 | - uses: actions/checkout@v4 47 | - name: Set up Python 3.9 48 | uses: actions/setup-python@v5 49 | with: 50 | python-version: 3.9 51 | - name: Install Partition Manager 52 | run: | 53 | pip install --editable ".[pymysql]" 54 | - name: Install PyTest 55 | run: | 56 | pip install pytest 57 | - name: Run PyTest 58 | run: | 59 | pytest --junitxml=test-results/junit.xml 60 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: release 2 | 3 | permissions: 4 | contents: write 5 | packages: write 6 | statuses: write 7 | pull-requests: read 8 | 9 | on: 10 | push: 11 | tags: 12 | - "v*" 13 | workflow_dispatch: 14 | 15 | jobs: 16 | release: 17 | name: release 18 | runs-on: ubuntu-latest 19 | 20 | steps: 21 | - name: Setup python 22 | uses: actions/setup-python@v5 23 | with: 24 | python-version: '3.x' 25 | architecture: 'x64' 26 | 27 | - name: Install packages 28 | run: | 29 | sudo apt-get update 30 | sudo apt-get install -y build-essential python3-pip 31 | pip3 install build 32 | 33 | - name: Checkout 34 | uses: actions/checkout@v4 35 | with: 36 | fetch-depth: 0 37 | 38 | - name: Get version from git tag 39 | id: get_version 40 | uses: battila7/get-version-action@v2 41 | 42 | - name: Build partition-manager 43 | run: | 44 | python3 -m build 45 | sha256sum dist/*.whl dist/*.tar.gz >dist/sha256sums 46 | 47 | - name: "Publish release" 48 | uses: "marvinpinto/action-automatic-releases@919008cf3f741b179569b7a6fb4d8860689ab7f0" 49 | with: 50 | repo_token: "${{ secrets.GITHUB_TOKEN }}" 51 | automatic_release_tag: "${{ steps.get_version.outputs.version }}" 52 | title: "partition-manager ${{ steps.get_version.outputs.version }}" 53 | files: | 54 | dist/sha256sums 55 | dist/*.whl 56 | dist/*.tar.gz 57 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | .venv 129 | 130 | # Pyre type checker 131 | .pyre/ 132 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.5.0 4 | hooks: 5 | - id: check-ast 6 | - id: check-merge-conflict 7 | - id: detect-private-key 8 | - id: end-of-file-fixer 9 | - id: requirements-txt-fixer 10 | - id: trailing-whitespace 11 | 12 | - repo: https://github.com/codespell-project/codespell 13 | rev: v2.2.6 14 | hooks: 15 | - id: codespell 16 | additional_dependencies: 17 | - tomli 18 | 19 | - repo: https://github.com/astral-sh/ruff-pre-commit 20 | rev: v0.3.0 21 | hooks: 22 | - id: ruff 23 | - id: ruff-format 24 | 25 | - repo: https://github.com/PyCQA/pylint 26 | rev: v3.1.0 27 | hooks: 28 | - id: pylint 29 | args: 30 | - --errors-only 31 | additional_dependencies: 32 | - PyMySQL 33 | - pyyaml 34 | - pytest 35 | - setuptools 36 | - repo: https://github.com/abravalheri/validate-pyproject 37 | rev: v0.16 38 | hooks: 39 | - id: validate-pyproject 40 | - repo: local 41 | hooks: 42 | - id: pytest 43 | name: Python Tests 44 | language: system 45 | entry: python3 -m pytest 46 | pass_filenames: false 47 | files: '.py$' 48 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2016 ISRG. All rights reserved. 2 | 3 | Mozilla Public License Version 2.0 4 | ================================== 5 | 6 | 1. Definitions 7 | -------------- 8 | 9 | 1.1. "Contributor" 10 | means each individual or legal entity that creates, contributes to 11 | the creation of, or owns Covered Software. 12 | 13 | 1.2. "Contributor Version" 14 | means the combination of the Contributions of others (if any) used 15 | by a Contributor and that particular Contributor's Contribution. 16 | 17 | 1.3. "Contribution" 18 | means Covered Software of a particular Contributor. 19 | 20 | 1.4. "Covered Software" 21 | means Source Code Form to which the initial Contributor has attached 22 | the notice in Exhibit A, the Executable Form of such Source Code 23 | Form, and Modifications of such Source Code Form, in each case 24 | including portions thereof. 25 | 26 | 1.5. "Incompatible With Secondary Licenses" 27 | means 28 | 29 | (a) that the initial Contributor has attached the notice described 30 | in Exhibit B to the Covered Software; or 31 | 32 | (b) that the Covered Software was made available under the terms of 33 | version 1.1 or earlier of the License, but not also under the 34 | terms of a Secondary License. 35 | 36 | 1.6. "Executable Form" 37 | means any form of the work other than Source Code Form. 38 | 39 | 1.7. "Larger Work" 40 | means a work that combines Covered Software with other material, in 41 | a separate file or files, that is not Covered Software. 42 | 43 | 1.8. "License" 44 | means this document. 45 | 46 | 1.9. "Licensable" 47 | means having the right to grant, to the maximum extent possible, 48 | whether at the time of the initial grant or subsequently, any and 49 | all of the rights conveyed by this License. 50 | 51 | 1.10. "Modifications" 52 | means any of the following: 53 | 54 | (a) any file in Source Code Form that results from an addition to, 55 | deletion from, or modification of the contents of Covered 56 | Software; or 57 | 58 | (b) any new file in Source Code Form that contains any Covered 59 | Software. 60 | 61 | 1.11. "Patent Claims" of a Contributor 62 | means any patent claim(s), including without limitation, method, 63 | process, and apparatus claims, in any patent Licensable by such 64 | Contributor that would be infringed, but for the grant of the 65 | License, by the making, using, selling, offering for sale, having 66 | made, import, or transfer of either its Contributions or its 67 | Contributor Version. 68 | 69 | 1.12. "Secondary License" 70 | means either the GNU General Public License, Version 2.0, the GNU 71 | Lesser General Public License, Version 2.1, the GNU Affero General 72 | Public License, Version 3.0, or any later versions of those 73 | licenses. 74 | 75 | 1.13. "Source Code Form" 76 | means the form of the work preferred for making modifications. 77 | 78 | 1.14. "You" (or "Your") 79 | means an individual or a legal entity exercising rights under this 80 | License. For legal entities, "You" includes any entity that 81 | controls, is controlled by, or is under common control with You. For 82 | purposes of this definition, "control" means (a) the power, direct 83 | or indirect, to cause the direction or management of such entity, 84 | whether by contract or otherwise, or (b) ownership of more than 85 | fifty percent (50%) of the outstanding shares or beneficial 86 | ownership of such entity. 87 | 88 | 2. License Grants and Conditions 89 | -------------------------------- 90 | 91 | 2.1. Grants 92 | 93 | Each Contributor hereby grants You a world-wide, royalty-free, 94 | non-exclusive license: 95 | 96 | (a) under intellectual property rights (other than patent or trademark) 97 | Licensable by such Contributor to use, reproduce, make available, 98 | modify, display, perform, distribute, and otherwise exploit its 99 | Contributions, either on an unmodified basis, with Modifications, or 100 | as part of a Larger Work; and 101 | 102 | (b) under Patent Claims of such Contributor to make, use, sell, offer 103 | for sale, have made, import, and otherwise transfer either its 104 | Contributions or its Contributor Version. 105 | 106 | 2.2. Effective Date 107 | 108 | The licenses granted in Section 2.1 with respect to any Contribution 109 | become effective for each Contribution on the date the Contributor first 110 | distributes such Contribution. 111 | 112 | 2.3. Limitations on Grant Scope 113 | 114 | The licenses granted in this Section 2 are the only rights granted under 115 | this License. No additional rights or licenses will be implied from the 116 | distribution or licensing of Covered Software under this License. 117 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 118 | Contributor: 119 | 120 | (a) for any code that a Contributor has removed from Covered Software; 121 | or 122 | 123 | (b) for infringements caused by: (i) Your and any other third party's 124 | modifications of Covered Software, or (ii) the combination of its 125 | Contributions with other software (except as part of its Contributor 126 | Version); or 127 | 128 | (c) under Patent Claims infringed by Covered Software in the absence of 129 | its Contributions. 130 | 131 | This License does not grant any rights in the trademarks, service marks, 132 | or logos of any Contributor (except as may be necessary to comply with 133 | the notice requirements in Section 3.4). 134 | 135 | 2.4. Subsequent Licenses 136 | 137 | No Contributor makes additional grants as a result of Your choice to 138 | distribute the Covered Software under a subsequent version of this 139 | License (see Section 10.2) or under the terms of a Secondary License (if 140 | permitted under the terms of Section 3.3). 141 | 142 | 2.5. Representation 143 | 144 | Each Contributor represents that the Contributor believes its 145 | Contributions are its original creation(s) or it has sufficient rights 146 | to grant the rights to its Contributions conveyed by this License. 147 | 148 | 2.6. Fair Use 149 | 150 | This License is not intended to limit any rights You have under 151 | applicable copyright doctrines of fair use, fair dealing, or other 152 | equivalents. 153 | 154 | 2.7. Conditions 155 | 156 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted 157 | in Section 2.1. 158 | 159 | 3. Responsibilities 160 | ------------------- 161 | 162 | 3.1. Distribution of Source Form 163 | 164 | All distribution of Covered Software in Source Code Form, including any 165 | Modifications that You create or to which You contribute, must be under 166 | the terms of this License. You must inform recipients that the Source 167 | Code Form of the Covered Software is governed by the terms of this 168 | License, and how they can obtain a copy of this License. You may not 169 | attempt to alter or restrict the recipients' rights in the Source Code 170 | Form. 171 | 172 | 3.2. Distribution of Executable Form 173 | 174 | If You distribute Covered Software in Executable Form then: 175 | 176 | (a) such Covered Software must also be made available in Source Code 177 | Form, as described in Section 3.1, and You must inform recipients of 178 | the Executable Form how they can obtain a copy of such Source Code 179 | Form by reasonable means in a timely manner, at a charge no more 180 | than the cost of distribution to the recipient; and 181 | 182 | (b) You may distribute such Executable Form under the terms of this 183 | License, or sublicense it under different terms, provided that the 184 | license for the Executable Form does not attempt to limit or alter 185 | the recipients' rights in the Source Code Form under this License. 186 | 187 | 3.3. Distribution of a Larger Work 188 | 189 | You may create and distribute a Larger Work under terms of Your choice, 190 | provided that You also comply with the requirements of this License for 191 | the Covered Software. If the Larger Work is a combination of Covered 192 | Software with a work governed by one or more Secondary Licenses, and the 193 | Covered Software is not Incompatible With Secondary Licenses, this 194 | License permits You to additionally distribute such Covered Software 195 | under the terms of such Secondary License(s), so that the recipient of 196 | the Larger Work may, at their option, further distribute the Covered 197 | Software under the terms of either this License or such Secondary 198 | License(s). 199 | 200 | 3.4. Notices 201 | 202 | You may not remove or alter the substance of any license notices 203 | (including copyright notices, patent notices, disclaimers of warranty, 204 | or limitations of liability) contained within the Source Code Form of 205 | the Covered Software, except that You may alter any license notices to 206 | the extent required to remedy known factual inaccuracies. 207 | 208 | 3.5. Application of Additional Terms 209 | 210 | You may choose to offer, and to charge a fee for, warranty, support, 211 | indemnity or liability obligations to one or more recipients of Covered 212 | Software. However, You may do so only on Your own behalf, and not on 213 | behalf of any Contributor. You must make it absolutely clear that any 214 | such warranty, support, indemnity, or liability obligation is offered by 215 | You alone, and You hereby agree to indemnify every Contributor for any 216 | liability incurred by such Contributor as a result of warranty, support, 217 | indemnity or liability terms You offer. You may include additional 218 | disclaimers of warranty and limitations of liability specific to any 219 | jurisdiction. 220 | 221 | 4. Inability to Comply Due to Statute or Regulation 222 | --------------------------------------------------- 223 | 224 | If it is impossible for You to comply with any of the terms of this 225 | License with respect to some or all of the Covered Software due to 226 | statute, judicial order, or regulation then You must: (a) comply with 227 | the terms of this License to the maximum extent possible; and (b) 228 | describe the limitations and the code they affect. Such description must 229 | be placed in a text file included with all distributions of the Covered 230 | Software under this License. Except to the extent prohibited by statute 231 | or regulation, such description must be sufficiently detailed for a 232 | recipient of ordinary skill to be able to understand it. 233 | 234 | 5. Termination 235 | -------------- 236 | 237 | 5.1. The rights granted under this License will terminate automatically 238 | if You fail to comply with any of its terms. However, if You become 239 | compliant, then the rights granted under this License from a particular 240 | Contributor are reinstated (a) provisionally, unless and until such 241 | Contributor explicitly and finally terminates Your grants, and (b) on an 242 | ongoing basis, if such Contributor fails to notify You of the 243 | non-compliance by some reasonable means prior to 60 days after You have 244 | come back into compliance. Moreover, Your grants from a particular 245 | Contributor are reinstated on an ongoing basis if such Contributor 246 | notifies You of the non-compliance by some reasonable means, this is the 247 | first time You have received notice of non-compliance with this License 248 | from such Contributor, and You become compliant prior to 30 days after 249 | Your receipt of the notice. 250 | 251 | 5.2. If You initiate litigation against any entity by asserting a patent 252 | infringement claim (excluding declaratory judgment actions, 253 | counter-claims, and cross-claims) alleging that a Contributor Version 254 | directly or indirectly infringes any patent, then the rights granted to 255 | You by any and all Contributors for the Covered Software under Section 256 | 2.1 of this License shall terminate. 257 | 258 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all 259 | end user license agreements (excluding distributors and resellers) which 260 | have been validly granted by You or Your distributors under this License 261 | prior to termination shall survive termination. 262 | 263 | ************************************************************************ 264 | * * 265 | * 6. Disclaimer of Warranty * 266 | * ------------------------- * 267 | * * 268 | * Covered Software is provided under this License on an "as is" * 269 | * basis, without warranty of any kind, either expressed, implied, or * 270 | * statutory, including, without limitation, warranties that the * 271 | * Covered Software is free of defects, merchantable, fit for a * 272 | * particular purpose or non-infringing. The entire risk as to the * 273 | * quality and performance of the Covered Software is with You. * 274 | * Should any Covered Software prove defective in any respect, You * 275 | * (not any Contributor) assume the cost of any necessary servicing, * 276 | * repair, or correction. This disclaimer of warranty constitutes an * 277 | * essential part of this License. No use of any Covered Software is * 278 | * authorized under this License except under this disclaimer. * 279 | * * 280 | ************************************************************************ 281 | 282 | ************************************************************************ 283 | * * 284 | * 7. Limitation of Liability * 285 | * -------------------------- * 286 | * * 287 | * Under no circumstances and under no legal theory, whether tort * 288 | * (including negligence), contract, or otherwise, shall any * 289 | * Contributor, or anyone who distributes Covered Software as * 290 | * permitted above, be liable to You for any direct, indirect, * 291 | * special, incidental, or consequential damages of any character * 292 | * including, without limitation, damages for lost profits, loss of * 293 | * goodwill, work stoppage, computer failure or malfunction, or any * 294 | * and all other commercial damages or losses, even if such party * 295 | * shall have been informed of the possibility of such damages. This * 296 | * limitation of liability shall not apply to liability for death or * 297 | * personal injury resulting from such party's negligence to the * 298 | * extent applicable law prohibits such limitation. Some * 299 | * jurisdictions do not allow the exclusion or limitation of * 300 | * incidental or consequential damages, so this exclusion and * 301 | * limitation may not apply to You. * 302 | * * 303 | ************************************************************************ 304 | 305 | 8. Litigation 306 | ------------- 307 | 308 | Any litigation relating to this License may be brought only in the 309 | courts of a jurisdiction where the defendant maintains its principal 310 | place of business and such litigation shall be governed by laws of that 311 | jurisdiction, without reference to its conflict-of-law provisions. 312 | Nothing in this Section shall prevent a party's ability to bring 313 | cross-claims or counter-claims. 314 | 315 | 9. Miscellaneous 316 | ---------------- 317 | 318 | This License represents the complete agreement concerning the subject 319 | matter hereof. If any provision of this License is held to be 320 | unenforceable, such provision shall be reformed only to the extent 321 | necessary to make it enforceable. Any law or regulation which provides 322 | that the language of a contract shall be construed against the drafter 323 | shall not be used to construe this License against a Contributor. 324 | 325 | 10. Versions of the License 326 | --------------------------- 327 | 328 | 10.1. New Versions 329 | 330 | Mozilla Foundation is the license steward. Except as provided in Section 331 | 10.3, no one other than the license steward has the right to modify or 332 | publish new versions of this License. Each version will be given a 333 | distinguishing version number. 334 | 335 | 10.2. Effect of New Versions 336 | 337 | You may distribute the Covered Software under the terms of the version 338 | of the License under which You originally received the Covered Software, 339 | or under the terms of any subsequent version published by the license 340 | steward. 341 | 342 | 10.3. Modified Versions 343 | 344 | If you create software not governed by this License, and you want to 345 | create a new license for such software, you may create and use a 346 | modified version of this License if you rename the license and remove 347 | any references to the name of the license steward (except to note that 348 | such modified license differs from this License). 349 | 350 | 10.4. Distributing Source Code Form that is Incompatible With Secondary 351 | Licenses 352 | 353 | If You choose to distribute Source Code Form that is Incompatible With 354 | Secondary Licenses under the terms of this version of the License, the 355 | notice described in Exhibit B of this License must be attached. 356 | 357 | Exhibit A - Source Code Form License Notice 358 | ------------------------------------------- 359 | 360 | This Source Code Form is subject to the terms of the Mozilla Public 361 | License, v. 2.0. If a copy of the MPL was not distributed with this 362 | file, You can obtain one at http://mozilla.org/MPL/2.0/. 363 | 364 | If it is not possible or desirable to put the notice in a particular 365 | file, then You may include the notice in a location (such as a LICENSE 366 | file in a relevant directory) where a recipient would be likely to look 367 | for such a notice. 368 | 369 | You may add additional accurate notices of copyright ownership. 370 | 371 | Exhibit B - "Incompatible With Secondary Licenses" Notice 372 | --------------------------------------------------------- 373 | 374 | This Source Code Form is "Incompatible With Secondary Licenses", as 375 | defined by the Mozilla Public License, v. 2.0. 376 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://circleci.com/gh/letsencrypt/mariadb-sequential-partition-manager-py.svg?style=shield)](https://circleci.com/gh/letsencrypt/mariadb-sequential-partition-manager-py) 2 | ![Maturity Level: Beta](https://img.shields.io/badge/maturity-beta-blue.svg) 3 | 4 | # Partman 5 | 6 | This tool partitions and manages MariaDB tables by sequential IDs. 7 | 8 | This is primarily a mechanism for dropping large numbers of rows of data without using `DELETE` statements. 9 | 10 | Adding partitions in the first place with InnoDB requires a full table copy. Otherwise, the `REORGANIZE PARTITION` command is fast only if operating on a partition that is empty, e.g., has no rows. 11 | 12 | Similar tools: 13 | * https://github.com/davidburger/gomypartition, intended for tables with date-based partitions 14 | * https://github.com/yahoo/mysql_partition_manager, which is archived and in pure SQL 15 | 16 | ## Usage 17 | 18 | ```sh 19 | → git clone https://github.com/letsencrypt/mariadb-sequential-partition-manager-py.git 20 | → cd mariadb-sequential-partition-manager-py 21 | → python3 -m venv .venv 22 | → . .venv/bin/activate 23 | → python3 -m pip install ".[pymysql]" 24 | → tee /tmp/partman.conf.yml < 33 | SELECT UNIX_TIMESTAMP(`created`) FROM `cats` WHERE `id` > '?' ORDER BY `id` ASC LIMIT 1; 34 | dogs: 35 | partition_period: 36 | days: 30 37 | earliest_utc_timestamp_query: > 38 | SELECT UNIX_TIMESTAMP(`c`.`created`) FROM `dogs` AS `d` 39 | JOIN `cats` AS `c` ON `c`.`house_id` = `d`.`house_id` 40 | WHERE `d`.`id` > '?' 41 | ORDER BY `d`.`id` ASC LIMIT 1; 42 | prometheus_stats: "/tmp/prometheus-textcollect-partition-manager.prom" 43 | EOF 44 | → partition-manager --config /tmp/partman.conf.yml maintain --noop 45 | INFO:root:No-op mode 46 | INFO:partition:Evaluating Table dogs (duration=30 days, 0:00:00) (pos={'id': 150}) 47 | INFO:partition:Table dogs planned SQL: ALTER TABLE `dogs` REORGANIZE PARTITION `p_20201204` INTO (PARTITION `p_20210422` VALUES LESS THAN (221), PARTITION `p_20210522` VALUES LESS THAN MAXVALUE); 48 | 49 | dogs: 50 | sql: ALTER TABLE `dogs` REORGANIZE PARTITION `p_20201204` INTO (PARTITION `p_20210422` VALUES LESS THAN (221), PARTITION `p_20210522` VALUES LESS THAN MAXVALUE); 51 | noop: True 52 | ``` 53 | 54 | ### Running `partman` in your development environment 55 | 56 | ```sh 57 | → git clone https://github.com/letsencrypt/mariadb-sequential-partition-manager-py.git 58 | → cd mariadb-sequential-partition-manager-py 59 | → python3 -m venv .venv 60 | → . .venv/bin/activate 61 | → python3 -m pip install --editable ".[pymysql]" 62 | → partition-manager --log-level=debug \ 63 | --mariadb test_tools/fake_mariadb.sh \ 64 | maintain --noop --table tablename 65 | DEBUG:root:Auto_Increment column identified as id 66 | DEBUG:root:Partition range column identified as id 67 | DEBUG:root:Found partition before = (100) 68 | DEBUG:root:Found tail partition named p_20201204 69 | INFO:root:No-op mode 70 | 71 | ALTER TABLE `dbname`.`tablename` REORGANIZE PARTITION `p_20201204` INTO (PARTITION `p_20201204` VALUES LESS THAN (3101009), PARTITION `p_20210122` VALUES LESS THAN MAXVALUE); 72 | ``` 73 | 74 | ## Configuration 75 | You can use a yaml configuration file with the `--config` parameter of the form: 76 | 77 | ```yaml 78 | partitionmanager: 79 | dburl: sql://user:password@localhost/db-name 80 | # or 81 | # mariadb: /usr/local/bin/mariadb 82 | partition_period: 83 | days: 7 84 | num_empty: 2 85 | 86 | tables: 87 | table1: 88 | retention: 89 | days: 60 90 | earliest_utc_timestamp_query: > 91 | SELECT UNIX_TIMESTAMP(created) FROM table1 WHERE id > ? ORDER BY id ASC LIMIT 1; 92 | table2: 93 | partition_period: 94 | days: 30 95 | earliest_utc_timestamp_query: > 96 | SELECT UNIX_TIMESTAMP(created) FROM table2 WHERE id > ? ORDER BY id ASC LIMIT 1; 97 | table3: 98 | retention: 99 | days: 14 100 | earliest_utc_timestamp_query: > 101 | SELECT UNIX_TIMESTAMP(created) FROM table3 WHERE id > ? ORDER BY id ASC LIMIT 1; 102 | table4: {} 103 | ``` 104 | 105 | The `earliest_utc_timestamp_query` entries are optional SQL queries that are run during partition map analysis to determine the eact timestamp of the earliest entry in each partition. If you configure such a query for a table, it must return a single row and column, specifically the epoch timestamp in UTC of the earliest entry the partition. There is expcected a single `?` entry which will be replaced with the partition value of that partition. 106 | 107 | For tables which are either partitioned but not yet using this tool's schema, or which have no empty partitions, the `migrate` command can be useful for proposing alterations to run manually. Note that `migrate` proposes commands that are likely to require partial copies of each table, so likely they will require a maintenance period. 108 | 109 | ```sh 110 | partition-manager --mariadb ~/bin/rootsql-dev-primary migrate --out /tmp/migrate.yml --table orders 111 | INFO:write_state_info:Writing current state information 112 | INFO:write_state_info:(Table("orders"): {'id': 9236}), 113 | 114 | # wait some time 115 | partition-manager --mariadb ~/bin/rootsql-dev-primary migrate --in /tmp/migrate.yml --table orders 116 | INFO:calculate_sql_alters:Reading prior state information 117 | INFO:calculate_sql_alters:Table orders, 24.0 hours, [9236] - [29236], [20000] pos_change, [832.706363653845]/hour 118 | orders: 119 | - ALTER TABLE `orders` REORGANIZE PARTITION `p_20210405` INTO (PARTITION `p_20210416` VALUES LESS THAN (30901), PARTITION `p_20210516` VALUES LESS THAN (630449), PARTITION `p_20210615` VALUES LESS THAN MAXVALUE); 120 | ``` 121 | 122 | ## Getting started 123 | 124 | ### Configuring `partman` 125 | 126 | - At start, if any configuration file specified as a CLI argument, read that configuration file to set all other values. 127 | - Then, process all remaining command line arguments, overriding values loaded from the configuration file in case of conflicts. 128 | - From those command-line arguments, determine whether to collect statistics `stats`, determine an initial partition layout `migrate`, or operate in the normal `maintain` mode. 129 | - Use the configuration information as inputs to the required algorithm. 130 | 131 | ### How does `partman` determine when an additional partition is needed? 132 | 133 | The core algorithm is implemented in a method `get_pending_sql_reorganize_partition_commands` in `table_append_partition.py`. That algorithm is: 134 | 135 | For a given table and that table's intended partition period, desired end-state is to have: 136 | - All the existing partitions containing data, 137 | - A configurable number of trailing partitions which contain no data, and 138 | - An "active" partition currently being filled with data 139 | 140 | To make it easier to manage, we give all the filled partitions a name to indicate the approximate date that partition began being filled with data. This date is approximate because once a partition contains data, it is no longer an instant `ALTER` operation to rename the partition, rather every contained row gets copied, so this tool predicts the date at which the new partition will become the "active" one. 141 | 142 | Inputs: 143 | - The table name 144 | - The intended partition period 145 | - The number of trailing partitions to keep 146 | - The table's current partition list 147 | - The table's partition id's current value(s) 148 | 149 | Outputs: 150 | - An intended partition list, changing only the empty partitions, or 151 | - If no partitions can be reorganized, an error. 152 | 153 | Procedure: 154 | - Using the current values, split the partition list into two sub-lists: empty partitions, and non-empty partitions. 155 | - If there are no empty partitions: 156 | - Raise an error and halt the algorithm. 157 | 158 | - Perform a statistical regression using each non-empty partition to determine each partition's fill rate. 159 | - Using each partition's fill rate and their age, predict the future partition fill rate. 160 | - Create a new list of intended empty partitions. 161 | - For each empty partition: 162 | - Predict the start-of-fill date using the partition's position relative to the current active partition, the current active partition's date, the partition period, and the future partition fill rate. 163 | - If the start-of-fill date is different than the partition's name, rename the partition. 164 | - Append the changed partition to the intended empty partition list. 165 | - While the number of empty partitions is less than the intended number of trailing partitions to keep: 166 | - Predict the start-of-fill date for a new partition using the previous partition's date and the partition period. 167 | - Append the new partition to the intended empty partition list. 168 | - Return the lists of non-empty partitions, the current empty partitions, and the post-algorithm intended empty partitions. 169 | 170 | #### How do I run `partman` in `noop` mode? 171 | 172 | The results of the algorithm are converted into `ALTER` statements; if the user configured `--noop` they're emitted to console and the logs for each table. If not set to `--noop`, the application will execute the ALTERs at the database server and emit the results, including execution time as prometheus statistics if so configured. 173 | 174 | #### "Migrate" algorithm 175 | 176 | The migrate mode is a limited form of the "Maintain" Algorithm, using a temporary state file to determine rates-of-change. The migrate mode also does not limit itself to only affecting empty partitions, it can and will request changes that will prompt row copies, in order to prepare a table for future use of the "Maintain" algorithm. 177 | 178 | ## TODOs 179 | 180 | Lots: 181 | - [x] Support for tables with partitions across multiple columns. 182 | - [ ] A drop mechanism, for one. Initially it should take a retention period and log proposed `DROP` statements, not perform them. 183 | - [ ] Yet more tests, particularly live integration tests with a test DB. 184 | -------------------------------------------------------------------------------- /nfpm.yaml: -------------------------------------------------------------------------------- 1 | name: "python3-mariadb-sequential-partition-manager" 2 | arch: amd64 3 | version: ${SEMVER} 4 | version_metadata: git 5 | 6 | maintainer: Let's Encrypt 7 | license: MPL 8 | depends: 9 | - python3-yaml 10 | disable_globbing: false 11 | 12 | contents: 13 | - src: ./install/usr/**/* 14 | dst: /usr/ 15 | 16 | deb: 17 | -------------------------------------------------------------------------------- /partitionmanager/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Exported parameters 3 | """ 4 | -------------------------------------------------------------------------------- /partitionmanager/cli.py: -------------------------------------------------------------------------------- 1 | """ 2 | Interface for running the partition manager from a CLI. 3 | """ 4 | 5 | from datetime import datetime, timedelta, timezone 6 | from pathlib import Path 7 | import argparse 8 | import logging 9 | import time 10 | import traceback 11 | import yaml 12 | 13 | import partitionmanager.database_helpers 14 | import partitionmanager.dropper 15 | import partitionmanager.migrate 16 | import partitionmanager.sql 17 | import partitionmanager.stats 18 | import partitionmanager.table_append_partition as pm_tap 19 | import partitionmanager.types 20 | 21 | PARSER = argparse.ArgumentParser( 22 | description=""" 23 | In already-partitioned tables with an auto_increment key as the partition, 24 | add a new partition at the current auto_increment value. 25 | """ 26 | ) 27 | 28 | PARSER.add_argument( 29 | "--log-level", 30 | default=logging.INFO, 31 | type=lambda x: getattr(logging, x.upper()), 32 | help="Configure the logging level.", 33 | ) 34 | PARSER.add_argument( 35 | "--prometheus-stats", type=Path, help="Path to produce a prometheus statistics file" 36 | ) 37 | PARSER.add_argument( 38 | "--config", "-c", type=argparse.FileType("r"), help="Configuration YAML" 39 | ) 40 | 41 | GROUP = PARSER.add_mutually_exclusive_group() 42 | GROUP.add_argument("--mariadb", help="Path to mariadb command") 43 | GROUP.add_argument( 44 | "--dburl", 45 | type=partitionmanager.types.to_sql_url, 46 | help="DB connection url, such as sql://user:pass@10.0.0.1:3306/database", 47 | ) 48 | 49 | 50 | class Config: 51 | """Configuration data that the rest of the tooling uses. 52 | 53 | Can be created from both an argparse object of command-line arguments, from 54 | a YAML file, both, and potentially be modified via unit tests. 55 | """ 56 | 57 | def __init__(self): 58 | self.tables = set() 59 | self.dbcmd = None 60 | self.noop = True 61 | self.num_empty = 2 62 | self.curtime = datetime.now(tz=timezone.utc) 63 | self.partition_period = timedelta(days=30) 64 | self.prometheus_stats_path = None 65 | self.assume_partitioned_on = None 66 | 67 | def from_argparse(self, args): 68 | """Populate this config from an argparse result. 69 | 70 | Overwrites only what is set by argparse. 71 | """ 72 | if "table" in args and args.table: 73 | for n in args.table: 74 | self.tables.add(partitionmanager.types.Table(n)) 75 | if args.dburl: 76 | self.dbcmd = partitionmanager.sql.IntegratedDatabaseCommand(args.dburl) 77 | elif args.mariadb: 78 | self.dbcmd = partitionmanager.sql.SubprocessDatabaseCommand(args.mariadb) 79 | if "days" in args and args.days: 80 | self.partition_period = timedelta(days=args.days) 81 | if self.partition_period <= timedelta(): 82 | raise ValueError("Negative lifespan is not allowed") 83 | if "noop" in args: 84 | self.noop = args.noop 85 | if "prometheus_stats" in args: 86 | self.prometheus_stats_path = args.prometheus_stats 87 | if "assume_partitioned_on" in args: 88 | self.assume_partitioned_on = args.assume_partitioned_on 89 | 90 | def from_yaml_file(self, file): 91 | """Populate this config from the yaml in the file-like object supplied. 92 | 93 | Overwrites only what is set by the yaml. 94 | """ 95 | data = yaml.safe_load(file) 96 | if "partitionmanager" not in data: 97 | raise TypeError( 98 | "Unexpected YAML format: missing top-level partitionmanager" 99 | ) 100 | data = data["partitionmanager"] 101 | if "tables" not in data or not isinstance(data["tables"], dict): 102 | raise TypeError("Unexpected YAML format: no tables defined") 103 | if "noop" in data: 104 | self.noop = data["noop"] 105 | if "partition_period" in data: 106 | self.partition_period = partitionmanager.types.timedelta_from_dict( 107 | data["partition_period"] 108 | ) 109 | if self.partition_period <= timedelta(): 110 | raise ValueError("Negative lifespan is not allowed") 111 | if "num_empty" in data: 112 | self.num_empty = int(data["num_empty"]) 113 | if not self.dbcmd: 114 | if "dburl" in data: 115 | self.dbcmd = partitionmanager.sql.IntegratedDatabaseCommand( 116 | partitionmanager.types.to_sql_url(data["dburl"]) 117 | ) 118 | elif "mariadb" in data: 119 | self.dbcmd = partitionmanager.sql.SubprocessDatabaseCommand( 120 | data["mariadb"] 121 | ) 122 | if not self.tables: # Only load tables from YAML if not supplied via args 123 | for key in data["tables"]: 124 | tab = partitionmanager.types.Table(key) 125 | tabledata = data["tables"][key] 126 | if isinstance(tabledata, dict) and "retention_period" in tabledata: 127 | tab.set_retention_period( 128 | partitionmanager.types.timedelta_from_dict( 129 | tabledata["retention_period"] 130 | ) 131 | ) 132 | if isinstance(tabledata, dict) and "partition_period" in tabledata: 133 | tab.set_partition_period( 134 | partitionmanager.types.timedelta_from_dict( 135 | tabledata["partition_period"] 136 | ) 137 | ) 138 | if ( 139 | isinstance(tabledata, dict) 140 | and "earliest_utc_timestamp_query" in tabledata 141 | ): 142 | tab.set_earliest_utc_timestamp_query( 143 | partitionmanager.types.SqlQuery( 144 | tabledata["earliest_utc_timestamp_query"] 145 | ) 146 | ) 147 | 148 | self.tables.add(tab) 149 | if "prometheus_stats" in data: 150 | self.prometheus_stats_path = Path(data["prometheus_stats"]) 151 | 152 | 153 | def config_from_args(args): 154 | """Helper that produces a Config from the arguments. 155 | 156 | Loads referenced YAML after the argparse completes. 157 | """ 158 | conf = Config() 159 | conf.from_argparse(args) 160 | if args.config: 161 | conf.from_yaml_file(args.config) 162 | if not conf.dbcmd: 163 | raise ValueError("Either dburl or mariadb must be set in the configuration") 164 | return conf 165 | 166 | 167 | def is_read_only(conf): 168 | """Pre-flight test whether the database is read-only; returns True/False.""" 169 | rows = conf.dbcmd.run("SELECT @@READ_ONLY;") 170 | if len(rows) != 1: 171 | raise ValueError("Couldn't determine READ_ONLY status") 172 | return rows.pop()["@@READ_ONLY"] == 1 173 | 174 | 175 | def _extract_single_column(row): 176 | """Assert that there's only one column in this row, and get it.""" 177 | columns = list(row.keys()) 178 | assert len(columns) == 1, "Expecting a single column" 179 | return row[columns[0]] 180 | 181 | 182 | def partition_cmd(args): 183 | """Runs do_partition on the config that results from the CLI arguments. 184 | 185 | Helper for argparse. 186 | """ 187 | conf = config_from_args(args) 188 | return do_partition(conf) 189 | 190 | 191 | SUBPARSERS = PARSER.add_subparsers(dest="subparser_name") 192 | PARTITION_PARSER = SUBPARSERS.add_parser("maintain", help="maintain partitions") 193 | PARTITION_PARSER.add_argument( 194 | "--noop", 195 | "-n", 196 | action="store_true", 197 | help="Don't attempt to commit changes, just print", 198 | ) 199 | PARTITION_PARSER.add_argument( 200 | "--days", "-d", type=int, help="Lifetime of each partition in days" 201 | ) 202 | PARTITION_PARSER.add_argument( 203 | "--table", 204 | "-t", 205 | type=partitionmanager.types.SqlInput, 206 | nargs="+", 207 | help="table names, overwriting config", 208 | ) 209 | PARTITION_PARSER.set_defaults(func=partition_cmd) 210 | 211 | 212 | def stats_cmd(args): 213 | """Runs do_stats on the config that results from the CLI arguments. 214 | 215 | Helper for argparse. 216 | """ 217 | conf = config_from_args(args) 218 | return do_stats(conf) 219 | 220 | 221 | STATS_PARSER = SUBPARSERS.add_parser("stats", help="get stats for partitions") 222 | STATS_PARSER.set_defaults(func=stats_cmd) 223 | 224 | 225 | def migrate_cmd(args): 226 | """Runs migration actions on the config that results from the CLI arguments. 227 | 228 | Helper for argparse. 229 | """ 230 | conf = config_from_args(args) 231 | 232 | if args.outfile: 233 | partitionmanager.migrate.write_state_info(conf, args.outfile) 234 | 235 | if args.infile: 236 | return partitionmanager.migrate.calculate_sql_alters_from_state_info( 237 | conf, args.infile 238 | ) 239 | return {} 240 | 241 | 242 | MIGRATE_PARSER = SUBPARSERS.add_parser( 243 | "migrate", help="migrate partitions that haven't been used with this tool before" 244 | ) 245 | MIGRATE_GROUP = MIGRATE_PARSER.add_mutually_exclusive_group() 246 | MIGRATE_GROUP.add_argument( 247 | "--in", "-i", dest="infile", type=argparse.FileType("r"), help="input YAML" 248 | ) 249 | MIGRATE_GROUP.add_argument( 250 | "--out", "-o", dest="outfile", type=argparse.FileType("w"), help="output YAML" 251 | ) 252 | MIGRATE_PARSER.add_argument( 253 | "--table", 254 | "-t", 255 | type=partitionmanager.types.SqlInput, 256 | nargs="+", 257 | help="table names, overwriting config", 258 | ) 259 | MIGRATE_PARSER.add_argument( 260 | "--assume-partitioned-on", 261 | type=partitionmanager.types.SqlInput, 262 | action="append", 263 | help="Assume tables are partitioned by this column name, can be specified " 264 | "multiple times for multi-column partitions", 265 | ) 266 | MIGRATE_PARSER.set_defaults(func=migrate_cmd) 267 | 268 | 269 | def _partition_table(conf, log, table, metrics): 270 | if table_problems := pm_tap.get_table_compatibility_problems(conf.dbcmd, table): 271 | log.error(f"Cannot proceed: {table} {table_problems}") 272 | return None 273 | 274 | map_data = pm_tap.get_partition_map(conf.dbcmd, table) 275 | 276 | duration = table.partition_period or conf.partition_period 277 | 278 | log.info(f"Evaluating {table} (duration={duration})") 279 | cur_pos = partitionmanager.database_helpers.get_position_of_table( 280 | conf.dbcmd, table, map_data 281 | ) 282 | 283 | sql_cmds = pm_tap.get_pending_sql_reorganize_partition_commands( 284 | database=conf.dbcmd, 285 | table=table, 286 | partition_list=map_data["partitions"], 287 | current_position=cur_pos, 288 | allowed_lifespan=duration, 289 | num_empty_partitions=conf.num_empty, 290 | evaluation_time=conf.curtime, 291 | ) 292 | 293 | if not sql_cmds: 294 | log.debug(f"{table} has no pending SQL updates.") 295 | return None 296 | 297 | composite_sql_command = "\n".join(sql_cmds) 298 | 299 | if conf.noop: 300 | log.info(f"{table} planned SQL: {composite_sql_command}") 301 | return {"sql": composite_sql_command, "noop": True} 302 | 303 | log.info(f"{table} running SQL: {composite_sql_command}") 304 | 305 | time_start = datetime.now(tz=timezone.utc) 306 | output = conf.dbcmd.run(composite_sql_command) 307 | time_end = datetime.now(tz=timezone.utc) 308 | metrics.add( 309 | "alter_time_seconds", 310 | table.name, 311 | (time_end - time_start).total_seconds(), 312 | ) 313 | 314 | log.info(f"{table} results: {output}") 315 | return {"sql": composite_sql_command, "output": output} 316 | 317 | 318 | def do_partition(conf): 319 | """Produces SQL statements to manage partitions per the supplied configuration. 320 | 321 | If the configuration does not set the noop flag, this runs those statements 322 | as well. 323 | """ 324 | log = logging.getLogger("partition") 325 | 326 | # Preflight 327 | if is_read_only(conf): 328 | log.info("Database is read-only, only emitting statistics") 329 | if conf.prometheus_stats_path: 330 | do_stats(conf) 331 | return {} 332 | 333 | if conf.noop: 334 | log.info("Running in noop mode, no changes will be made") 335 | 336 | metrics = partitionmanager.stats.PrometheusMetrics() 337 | metrics.describe( 338 | "alter_time_seconds", 339 | help_text="Time in seconds to complete the ALTER command", 340 | type_name="gauge", 341 | ) 342 | metrics.describe( 343 | "alter_errors", 344 | help_text="Number of errors observed during ALTER commands", 345 | type_name="counter", 346 | ) 347 | 348 | all_results = {} 349 | for table in conf.tables: 350 | try: 351 | if results := _partition_table(conf, log, table, metrics): 352 | all_results[table.name] = results 353 | 354 | except partitionmanager.types.NoEmptyPartitionsAvailableException: 355 | log.warning( 356 | "Unable to automatically handle %s: No empty " 357 | "partition is available.", 358 | table, 359 | ) 360 | except partitionmanager.types.DatabaseCommandException as e: 361 | log.warning("Failed to automatically handle %s: %s", table, e) 362 | metrics.add("alter_errors", table.name, 1) 363 | except ( 364 | partitionmanager.types.TableEmptyException, 365 | partitionmanager.types.NoValidRatesOfChangeException, 366 | ) as e: 367 | log.warning("Table %s appears to be empty (%s). Skipping.", table, e) 368 | except (ValueError, Exception) as e: 369 | log.warning("Failed to handle %s: %s", table, e) 370 | metrics.add("alter_errors", table.name, 1) 371 | 372 | if conf.prometheus_stats_path: 373 | do_stats(conf, metrics=metrics) 374 | return all_results 375 | 376 | 377 | def do_stats(conf, metrics=None): 378 | """Populates a metrics object from the tables in the configuration.""" 379 | 380 | log = logging.getLogger("do_stats") 381 | 382 | if not metrics: 383 | metrics = partitionmanager.stats.PrometheusMetrics() 384 | 385 | all_results = {} 386 | for table in conf.tables: 387 | table_problems = pm_tap.get_table_compatibility_problems(conf.dbcmd, table) 388 | if table_problems: 389 | log.debug(f"Cannot gather statistics for {table}: {table_problems}") 390 | continue 391 | 392 | map_data = pm_tap.get_partition_map(conf.dbcmd, table) 393 | statistics = partitionmanager.stats.get_statistics( 394 | map_data["partitions"], conf.curtime, table 395 | ) 396 | all_results[table.name] = statistics 397 | 398 | if conf.prometheus_stats_path: 399 | metrics.describe( 400 | "total", help_text="Total number of partitions", type_name="counter" 401 | ) 402 | metrics.describe( 403 | "time_remaining_until_partition_overrun", 404 | help_text="The time in seconds until a table's partitions can no longer be " 405 | "maintained. Negative times indicate faulted tables.", 406 | type_name="gauge", 407 | ) 408 | metrics.describe( 409 | "age_of_retained_partitions", 410 | help_text="The age in seconds of the first partition for the table, " 411 | "indicating the retention of data in the table.", 412 | type_name="gauge", 413 | ) 414 | metrics.describe( 415 | "mean_delta_seconds", 416 | help_text="Mean seconds between partitions", 417 | type_name="gauge", 418 | ) 419 | metrics.describe( 420 | "max_delta_seconds", 421 | help_text="Maximum seconds between partitions", 422 | type_name="gauge", 423 | ) 424 | metrics.describe( 425 | "last_run_timestamp", 426 | help_text="The timestamp of the last run", 427 | type_name="gauge", 428 | ) 429 | 430 | for table, results in all_results.items(): 431 | if "partitions" in results: 432 | metrics.add("total", table, results["partitions"]) 433 | if "time_since_newest_partition" in results: 434 | metrics.add( 435 | "time_remaining_until_partition_overrun", 436 | table, 437 | -1 * results["time_since_newest_partition"].total_seconds(), 438 | ) 439 | if "time_since_oldest_partition" in results: 440 | metrics.add( 441 | "age_of_retained_partitions", 442 | table, 443 | results["time_since_oldest_partition"].total_seconds(), 444 | ) 445 | if "mean_partition_delta" in results: 446 | metrics.add( 447 | "mean_delta_seconds", 448 | table, 449 | results["mean_partition_delta"].total_seconds(), 450 | ) 451 | if "max_partition_delta" in results: 452 | metrics.add( 453 | "max_delta_seconds", 454 | table, 455 | results["max_partition_delta"].total_seconds(), 456 | ) 457 | 458 | metrics.add("last_run_timestamp", None, time.time()) 459 | with conf.prometheus_stats_path.open(mode="w", encoding="utf-8") as fp: 460 | metrics.render(fp) 461 | return all_results 462 | 463 | 464 | def drop_cmd(args): 465 | """Calculates drop. 466 | Helper for argparse. 467 | """ 468 | conf = config_from_args(args) 469 | return do_find_drops_for_tables(conf) 470 | 471 | 472 | DROP_PARSER = SUBPARSERS.add_parser("drop", help="drop old partitions") 473 | DROP_PARSER.set_defaults(func=drop_cmd) 474 | 475 | 476 | def do_find_drops_for_tables(conf): 477 | all_results = {} 478 | for table in conf.tables: 479 | log = logging.getLogger(f"do_find_drops_for_tables:{table.name}") 480 | 481 | if not table.has_date_query: 482 | log.warning(f"Cannot process {table}: no date query specified") 483 | continue 484 | 485 | if not table.retention_period: 486 | log.warning(f"Cannot process {table}: no retention specified") 487 | continue 488 | 489 | try: 490 | table_problems = pm_tap.get_table_compatibility_problems(conf.dbcmd, table) 491 | if table_problems: 492 | log.debug(f"Cannot process {table}: {table_problems}") 493 | continue 494 | 495 | map_data = pm_tap.get_partition_map(conf.dbcmd, table) 496 | current_position = partitionmanager.database_helpers.get_position_of_table( 497 | conf.dbcmd, table, map_data 498 | ) 499 | 500 | droppable = partitionmanager.dropper.get_droppable_partitions( 501 | conf.dbcmd, 502 | map_data["partitions"], 503 | current_position, 504 | conf.curtime, 505 | table, 506 | ) 507 | 508 | all_results[table.name] = droppable 509 | except Exception as e: 510 | log.warning(f"Error processing table {table.name}") 511 | raise e 512 | return all_results 513 | 514 | 515 | def main(): 516 | """Start here.""" 517 | args = PARSER.parse_args() 518 | log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" 519 | logging.basicConfig(level=args.log_level, format=log_format) 520 | if "func" not in args: 521 | PARSER.print_help() 522 | return 523 | 524 | try: 525 | output = args.func(args) 526 | for key in output: 527 | print(f"{key}:") 528 | if isinstance(output[key], dict): 529 | for k, v in output[key].items(): 530 | print(f" {k}: {v}") 531 | elif isinstance(output[key], list): 532 | for v in output[key]: 533 | print(f"# {v}") 534 | else: 535 | print(f" {output[key]}") 536 | except Exception as e: 537 | logging.warning(f"Couldn't complete command: {args.subparser_name}") 538 | logging.warning(traceback.format_exc()) 539 | raise e 540 | 541 | 542 | if __name__ == "__main__": 543 | main() 544 | -------------------------------------------------------------------------------- /partitionmanager/cli_test.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | import unittest 3 | import pymysql 4 | import yaml 5 | from datetime import datetime, timezone 6 | from pathlib import Path 7 | from .cli import ( 8 | migrate_cmd, 9 | config_from_args, 10 | do_partition, 11 | drop_cmd, 12 | PARSER, 13 | partition_cmd, 14 | stats_cmd, 15 | ) 16 | from .migrate import calculate_sql_alters_from_state_info 17 | 18 | 19 | fake_exec = Path(__file__).absolute().parent.parent / "test_tools/fake_mariadb.sh" 20 | nonexistant_exec = fake_exec.parent / "not_real" 21 | 22 | 23 | def insert_into_file(fp, data): 24 | fp.write(data.encode("utf-8")) 25 | fp.seek(0) 26 | 27 | 28 | def get_config_from_args_and_yaml(args, yaml, time): 29 | with tempfile.NamedTemporaryFile() as tmpfile: 30 | insert_into_file(tmpfile, yaml) 31 | args.config = tmpfile 32 | conf = config_from_args(args) 33 | conf.curtime = time 34 | return conf 35 | 36 | 37 | def run_partition_cmd_yaml(yaml): 38 | with tempfile.NamedTemporaryFile() as tmpfile: 39 | insert_into_file(tmpfile, yaml) 40 | args = PARSER.parse_args(["--config", tmpfile.name, "maintain"]) 41 | return partition_cmd(args) 42 | 43 | 44 | def partition_cmd_at_time(args, time): 45 | conf = config_from_args(args) 46 | conf.curtime = time 47 | return do_partition(conf) 48 | 49 | 50 | class TestPartitionCmd(unittest.TestCase): 51 | maxDiff = None 52 | 53 | def test_partition_cmd_no_exec(self): 54 | args = PARSER.parse_args( 55 | [ 56 | "--mariadb", 57 | str(nonexistant_exec), 58 | "maintain", 59 | "--noop", 60 | "--table", 61 | "testtable", 62 | ] 63 | ) 64 | with self.assertRaises(FileNotFoundError): 65 | partition_cmd(args) 66 | 67 | def test_partition_cmd_noop(self): 68 | args = PARSER.parse_args( 69 | [ 70 | "--mariadb", 71 | str(fake_exec), 72 | "maintain", 73 | "--noop", 74 | "--table", 75 | "testtable_noop", 76 | ] 77 | ) 78 | output = partition_cmd_at_time(args, datetime(2020, 11, 8, tzinfo=timezone.utc)) 79 | 80 | self.assertEqual( 81 | { 82 | "testtable_noop": { 83 | "sql": ( 84 | "ALTER TABLE `testtable_noop` WAIT 6 REORGANIZE PARTITION " 85 | "`p_20201204` INTO " 86 | "(PARTITION `p_20201112` VALUES LESS THAN (548), " 87 | "PARTITION `p_20201212` VALUES LESS THAN MAXVALUE);" 88 | ), 89 | "noop": True, 90 | } 91 | }, 92 | output, 93 | ) 94 | 95 | def test_partition_cmd_final(self): 96 | args = PARSER.parse_args( 97 | ["--mariadb", str(fake_exec), "maintain", "--table", "testtable_commit"] 98 | ) 99 | output = partition_cmd_at_time(args, datetime(2020, 11, 8, tzinfo=timezone.utc)) 100 | 101 | self.assertEqual( 102 | { 103 | "testtable_commit": { 104 | "output": [], 105 | "sql": ( 106 | "ALTER TABLE `testtable_commit` WAIT 6 REORGANIZE PARTITION " 107 | "`p_20201204` INTO " 108 | "(PARTITION `p_20201112` VALUES LESS THAN (548), " 109 | "PARTITION `p_20201212` VALUES LESS THAN MAXVALUE);" 110 | ), 111 | } 112 | }, 113 | output, 114 | ) 115 | 116 | def test_partition_cmd_several_tables(self): 117 | args = PARSER.parse_args( 118 | [ 119 | "--mariadb", 120 | str(fake_exec), 121 | "maintain", 122 | "--table", 123 | "testtable", 124 | "another_table", 125 | ] 126 | ) 127 | output = partition_cmd(args) 128 | 129 | self.assertEqual(len(output), 2) 130 | self.assertSetEqual(set(output), {"testtable", "another_table"}) 131 | 132 | def test_partition_unpartitioned_table(self): 133 | o = run_partition_cmd_yaml( 134 | f""" 135 | partitionmanager: 136 | tables: 137 | test: 138 | unpartitioned: 139 | mariadb: {str(fake_exec)} 140 | """ 141 | ) 142 | self.assertSequenceEqual(list(o), ["test"]) 143 | 144 | def test_partition_cmd_invalid_yaml(self): 145 | with self.assertRaises(TypeError): 146 | run_partition_cmd_yaml( 147 | """ 148 | data: 149 | tables: 150 | what 151 | """ 152 | ) 153 | 154 | def test_partition_cmd_no_tables(self): 155 | with self.assertRaises(TypeError): 156 | run_partition_cmd_yaml( 157 | f""" 158 | partitionmanager: 159 | mariadb: {str(fake_exec)} 160 | tables: 161 | """ 162 | ) 163 | 164 | def test_partition_cmd_one_table(self): 165 | o = run_partition_cmd_yaml( 166 | f""" 167 | partitionmanager: 168 | mariadb: {str(fake_exec)} 169 | tables: 170 | test_with_retention: 171 | retention: 172 | days: 10 173 | """ 174 | ) 175 | self.assertSequenceEqual(list(o), ["test_with_retention"]) 176 | 177 | def test_partition_cmd_two_tables(self): 178 | o = run_partition_cmd_yaml( 179 | f""" 180 | partitionmanager: 181 | tables: 182 | test: 183 | test_with_retention: 184 | retention: 185 | days: 10 186 | mariadb: {str(fake_exec)} 187 | """ 188 | ) 189 | self.assertSetEqual(set(o), {"test", "test_with_retention"}) 190 | 191 | def test_partition_period_daily(self): 192 | o = run_partition_cmd_yaml( 193 | f""" 194 | partitionmanager: 195 | partition_period: 196 | days: 1 197 | tables: 198 | partitioned_last_week: 199 | partitioned_yesterday: 200 | mariadb: {str(fake_exec)} 201 | """ 202 | ) 203 | self.assertSequenceEqual( 204 | set(o), {"partitioned_last_week", "partitioned_yesterday"} 205 | ) 206 | 207 | def test_partition_period_seven_days(self): 208 | with self.assertLogs("partition", level="DEBUG") as logctx: 209 | o = run_partition_cmd_yaml( 210 | f""" 211 | partitionmanager: 212 | num_empty: 1 213 | partition_period: 214 | days: 7 215 | tables: 216 | partitioned_yesterday: 217 | partitioned_last_week: 218 | mariadb: {str(fake_exec)} 219 | """ 220 | ) 221 | 222 | self.assertEqual( 223 | set(logctx.output), 224 | { 225 | "INFO:partition:Evaluating Table partitioned_last_week " 226 | "(duration=7 days, 0:00:00)", 227 | "DEBUG:partition:Table partitioned_last_week has no pending SQL updates.", # noqa: E501 228 | "INFO:partition:Evaluating Table partitioned_yesterday " 229 | "(duration=7 days, 0:00:00)", 230 | "DEBUG:partition:Table partitioned_yesterday has no pending SQL updates.", # noqa: E501 231 | }, 232 | ) 233 | self.assertSequenceEqual(list(o), []) 234 | 235 | def test_partition_period_different_per_table(self): 236 | o = run_partition_cmd_yaml( 237 | f""" 238 | partitionmanager: 239 | partition_period: 240 | days: 7 241 | tables: 242 | partitioned_yesterday: 243 | partition_period: 244 | days: 1 245 | partitioned_last_week: 246 | mariadb: {str(fake_exec)} 247 | """ 248 | ) 249 | self.assertSequenceEqual( 250 | set(o), {"partitioned_yesterday", "partitioned_last_week"} 251 | ) 252 | 253 | def test_partition_with_db_url(self): 254 | with self.assertRaises(pymysql.err.OperationalError): 255 | run_partition_cmd_yaml( 256 | """ 257 | partitionmanager: 258 | tables: 259 | test: 260 | unpartitioned: 261 | dburl: sql://user@localhost:9999/fake_database 262 | """ 263 | ) 264 | 265 | 266 | class TestStatsCmd(unittest.TestCase): 267 | def assert_stats_results(self, results): 268 | self.assertEqual(results["partitioned_yesterday"]["partitions"], 3) 269 | self.assertLess( 270 | results["partitioned_yesterday"]["time_since_newest_partition"].days, 2 271 | ) 272 | self.assertLess( 273 | results["partitioned_yesterday"]["time_since_oldest_partition"].days, 43 274 | ) 275 | self.assertGreater( 276 | results["partitioned_yesterday"]["mean_partition_delta"].days, 2 277 | ) 278 | self.assertGreater( 279 | results["partitioned_yesterday"]["max_partition_delta"].days, 2 280 | ) 281 | 282 | def parse_prometheus_outfile(self, prom_file): 283 | lines = prom_file.split("\n") 284 | metrics = {} 285 | for line in lines: 286 | if not line.startswith("#") and len(line) > 0: 287 | key, value = line.split(" ") 288 | metrics[key] = value 289 | return metrics 290 | 291 | def assert_stats_prometheus_outfile(self, prom_file): 292 | metrics = self.parse_prometheus_outfile(prom_file) 293 | 294 | for table in ["partitioned_last_week", "partitioned_yesterday", "other"]: 295 | self.assertIn(f'partition_total{{table="{table}"}}', metrics) 296 | self.assertIn( 297 | f'partition_time_remaining_until_partition_overrun{{table="{table}"}}', 298 | metrics, 299 | ) 300 | self.assertIn( 301 | f'partition_age_of_retained_partitions{{table="{table}"}}', metrics 302 | ) 303 | self.assertIn(f'partition_mean_delta_seconds{{table="{table}"}}', metrics) 304 | self.assertIn(f'partition_max_delta_seconds{{table="{table}"}}', metrics) 305 | self.assertIn("partition_last_run_timestamp{}", metrics) 306 | 307 | def test_stats_cli_flag(self): 308 | args = PARSER.parse_args(["--mariadb", str(fake_exec), "stats"]) 309 | results = stats_cmd(args) 310 | assert results == {} 311 | 312 | def test_stats_yaml(self): 313 | with tempfile.NamedTemporaryFile( 314 | mode="w+", encoding="UTF-8" 315 | ) as stats_outfile, tempfile.NamedTemporaryFile() as tmpfile: 316 | yaml = f""" 317 | partitionmanager: 318 | mariadb: {str(fake_exec)} 319 | prometheus_stats: {stats_outfile.name} 320 | tables: 321 | other: 322 | partitioned_last_week: 323 | partitioned_yesterday: 324 | """ 325 | insert_into_file(tmpfile, yaml) 326 | args = PARSER.parse_args(["--config", tmpfile.name, "stats"]) 327 | 328 | results = stats_cmd(args) 329 | 330 | self.assert_stats_results(results) 331 | self.assert_stats_prometheus_outfile(stats_outfile.read()) 332 | 333 | def test_stats_yaml_ignore_unconfigured_tables(self): 334 | with tempfile.NamedTemporaryFile( 335 | mode="w+", encoding="UTF-8" 336 | ) as stats_outfile, tempfile.NamedTemporaryFile() as tmpfile: 337 | yaml = f""" 338 | partitionmanager: 339 | mariadb: {str(fake_exec)} 340 | prometheus_stats: {stats_outfile.name} 341 | tables: 342 | other: 343 | """ 344 | insert_into_file(tmpfile, yaml) 345 | args = PARSER.parse_args(["--config", tmpfile.name, "stats"]) 346 | 347 | results = stats_cmd(args) 348 | 349 | assert list(results.keys()) == ["other"] 350 | 351 | out_data = stats_outfile.read() 352 | 353 | metrics = self.parse_prometheus_outfile(out_data) 354 | assert list(metrics.keys()) == [ 355 | 'partition_total{table="other"}', 356 | 'partition_time_remaining_until_partition_overrun{table="other"}', 357 | 'partition_age_of_retained_partitions{table="other"}', 358 | 'partition_mean_delta_seconds{table="other"}', 359 | 'partition_max_delta_seconds{table="other"}', 360 | "partition_last_run_timestamp{}", 361 | ] 362 | 363 | 364 | class TestConfig(unittest.TestCase): 365 | def test_cli_tables_override_yaml(self): 366 | args = PARSER.parse_args( 367 | [ 368 | "--mariadb", 369 | str(fake_exec), 370 | "maintain", 371 | "--table", 372 | "table_one", 373 | "table_two", 374 | ] 375 | ) 376 | conf = get_config_from_args_and_yaml( 377 | args, 378 | """ 379 | partitionmanager: 380 | tables: 381 | table_a: 382 | table_b: 383 | table_c: 384 | """, 385 | datetime.now(tz=timezone.utc), 386 | ) 387 | self.assertEqual({str(x.name) for x in conf.tables}, {"table_one", "table_two"}) 388 | 389 | def test_cli_mariadb_override_yaml(self): 390 | args = PARSER.parse_args(["--mariadb", "/usr/bin/true", "stats"]) 391 | conf = get_config_from_args_and_yaml( 392 | args, 393 | """ 394 | partitionmanager: 395 | mariadb: /dev/null 396 | tables: 397 | one: 398 | """, 399 | datetime.now(tz=timezone.utc), 400 | ) 401 | self.assertEqual(conf.dbcmd.exe, "/usr/bin/true") 402 | 403 | def test_cli_sqlurl_override_yaml(self): 404 | args = PARSER.parse_args( 405 | ["--dburl", "sql://user:pass@127.0.0.1:3306/database", "stats"] 406 | ) 407 | with self.assertRaises(pymysql.err.OperationalError): 408 | get_config_from_args_and_yaml( 409 | args, 410 | """ 411 | partitionmanager: 412 | mariadb: /dev/null 413 | tables: 414 | one: 415 | """, 416 | datetime.now(tz=timezone.utc), 417 | ) 418 | 419 | def test_migrate_cmd_out(self): 420 | with tempfile.NamedTemporaryFile() as outfile: 421 | args = PARSER.parse_args( 422 | [ 423 | "--mariadb", 424 | str(fake_exec), 425 | "migrate", 426 | "--out", 427 | outfile.name, 428 | "--table", 429 | "partitioned_yesterday", 430 | "two", 431 | ] 432 | ) 433 | 434 | output = migrate_cmd(args) 435 | self.assertEqual({}, output) 436 | 437 | out_yaml = yaml.safe_load(Path(outfile.name).read_text()) 438 | self.assertTrue("time" in out_yaml) 439 | self.assertTrue(isinstance(out_yaml["time"], datetime)) 440 | del out_yaml["time"] 441 | 442 | self.assertEqual( 443 | out_yaml, 444 | {"tables": {"partitioned_yesterday": {"id": 150}, "two": {"id": 150}}}, 445 | ) 446 | 447 | def test_migrate_cmd_out_unpartitioned(self): 448 | with tempfile.NamedTemporaryFile() as outfile: 449 | args = PARSER.parse_args( 450 | [ 451 | "--mariadb", 452 | str(fake_exec), 453 | "migrate", 454 | "--out", 455 | outfile.name, 456 | "--table", 457 | "unpartitioned", 458 | "two", 459 | ] 460 | ) 461 | 462 | with self.assertRaisesRegex( 463 | Exception, "Table unpartitioned is not partitioned" 464 | ): 465 | migrate_cmd(args) 466 | 467 | def test_migrate_cmd_out_unpartitioned_with_override(self): 468 | with tempfile.NamedTemporaryFile() as outfile: 469 | args = PARSER.parse_args( 470 | [ 471 | "--mariadb", 472 | str(fake_exec), 473 | "migrate", 474 | "--assume-partitioned-on", 475 | "id", 476 | "--out", 477 | outfile.name, 478 | "--table", 479 | "unpartitioned", 480 | ] 481 | ) 482 | output = migrate_cmd(args) 483 | self.assertEqual({}, output) 484 | 485 | out_yaml = yaml.safe_load(Path(outfile.name).read_text()) 486 | self.assertTrue("time" in out_yaml) 487 | self.assertTrue(isinstance(out_yaml["time"], datetime)) 488 | del out_yaml["time"] 489 | 490 | self.assertEqual(out_yaml, {"tables": {"unpartitioned": {"id": 150}}}) 491 | 492 | def test_migrate_cmd_in(self): 493 | with tempfile.NamedTemporaryFile(mode="w+") as infile: 494 | yaml.dump( 495 | { 496 | "tables": {"partitioned_yesterday": {"id": 50}, "two": {"id": 0}}, 497 | "time": datetime(2021, 4, 1, tzinfo=timezone.utc), 498 | }, 499 | infile, 500 | ) 501 | 502 | args = PARSER.parse_args( 503 | [ 504 | "--mariadb", 505 | str(fake_exec), 506 | "migrate", 507 | "--in", 508 | infile.name, 509 | "--table", 510 | "partitioned_yesterday", 511 | "two", 512 | ] 513 | ) 514 | 515 | conf = config_from_args(args) 516 | conf.assume_partitioned_on = ["id"] 517 | conf.curtime = datetime(2021, 4, 21, tzinfo=timezone.utc) 518 | self.maxDiff = None 519 | 520 | output = calculate_sql_alters_from_state_info( 521 | conf, Path(infile.name).open("r") 522 | ) 523 | self.assertEqual( 524 | output, 525 | { 526 | "partitioned_yesterday": [ 527 | "DROP TABLE IF EXISTS partitioned_yesterday_new_20210421;", 528 | "CREATE TABLE partitioned_yesterday_new_20210421 " 529 | "LIKE partitioned_yesterday;", 530 | "ALTER TABLE partitioned_yesterday_new_20210421 " 531 | "REMOVE PARTITIONING;", 532 | "ALTER TABLE partitioned_yesterday_new_20210421 " 533 | "PARTITION BY RANGE (id) (", 534 | "\tPARTITION p_assumed VALUES LESS THAN MAXVALUE", 535 | ");", 536 | "ALTER TABLE `partitioned_yesterday_new_20210421` WAIT 6 " 537 | "REORGANIZE PARTITION `p_assumed` INTO (PARTITION " 538 | "`p_20210421` VALUES LESS THAN (150), PARTITION " 539 | "`p_20210521` VALUES LESS THAN (300), PARTITION " 540 | "`p_20210620` VALUES LESS THAN MAXVALUE);", 541 | "CREATE OR REPLACE TRIGGER copy_inserts_from_" 542 | "partitioned_yesterday_to_partitioned_yesterday", 543 | "\tAFTER INSERT ON partitioned_yesterday FOR EACH ROW", 544 | "\t\tINSERT INTO partitioned_yesterday_new_20210421 SET", 545 | "\t\t\t`id` = NEW.`id`,", 546 | "\t\t\t`serial` = NEW.`serial`;", 547 | "CREATE OR REPLACE TRIGGER copy_updates_from_" 548 | "partitioned_yesterday_to_partitioned_yesterday", 549 | "\tAFTER UPDATE ON partitioned_yesterday FOR EACH ROW", 550 | "\t\tUPDATE partitioned_yesterday_new_20210421 SET", 551 | "\t\t\t`serial` = NEW.`serial`", 552 | "\t\tWHERE `id` = NEW.`id`;", 553 | ], 554 | "two": [ 555 | "DROP TABLE IF EXISTS two_new_20210421;", 556 | "CREATE TABLE two_new_20210421 LIKE two;", 557 | "ALTER TABLE two_new_20210421 REMOVE PARTITIONING;", 558 | "ALTER TABLE two_new_20210421 PARTITION BY RANGE (id) (", 559 | "\tPARTITION p_assumed VALUES LESS THAN MAXVALUE", 560 | ");", 561 | "ALTER TABLE `two_new_20210421` WAIT 6 REORGANIZE PARTITION " 562 | "`p_assumed` INTO (PARTITION `p_20210421` VALUES " 563 | "LESS THAN (150), PARTITION `p_20210521` VALUES LESS " 564 | "THAN (375), PARTITION `p_20210620` VALUES LESS THAN " 565 | "MAXVALUE);", 566 | "CREATE OR REPLACE TRIGGER copy_inserts_from_two_to_two_new_20210421", # noqa: E501 567 | "\tAFTER INSERT ON two FOR EACH ROW", 568 | "\t\tINSERT INTO two_new_20210421 SET", 569 | "\t\t\t`id` = NEW.`id`,", 570 | "\t\t\t`serial` = NEW.`serial`;", 571 | "CREATE OR REPLACE TRIGGER copy_updates_from_two_to_two_new_20210421", # noqa: E501 572 | "\tAFTER UPDATE ON two FOR EACH ROW", 573 | "\t\tUPDATE two_new_20210421 SET", 574 | "\t\t\t`serial` = NEW.`serial`", 575 | "\t\tWHERE `id` = NEW.`id`;", 576 | ], 577 | }, 578 | ) 579 | 580 | def test_migrate_cmd_in_unpartitioned_with_override(self): 581 | with tempfile.NamedTemporaryFile(mode="w+") as infile: 582 | yaml.dump( 583 | { 584 | "tables": {"unpartitioned": {"id": 50}}, 585 | "time": datetime(2021, 4, 1, tzinfo=timezone.utc), 586 | }, 587 | infile, 588 | ) 589 | 590 | args = PARSER.parse_args( 591 | [ 592 | "--mariadb", 593 | str(fake_exec), 594 | "migrate", 595 | "--assume-partitioned-on", 596 | "id", 597 | "--in", 598 | infile.name, 599 | "--table", 600 | "unpartitioned", 601 | ] 602 | ) 603 | conf = config_from_args(args) 604 | conf.curtime = datetime(2021, 4, 21, tzinfo=timezone.utc) 605 | self.maxDiff = None 606 | 607 | output = calculate_sql_alters_from_state_info( 608 | conf, Path(infile.name).open("r") 609 | ) 610 | 611 | self.assertEqual( 612 | output, 613 | { 614 | "unpartitioned": [ 615 | "DROP TABLE IF EXISTS unpartitioned_new_20210421;", 616 | "CREATE TABLE unpartitioned_new_20210421 LIKE unpartitioned;", 617 | "ALTER TABLE unpartitioned_new_20210421 REMOVE PARTITIONING;", 618 | "ALTER TABLE unpartitioned_new_20210421 PARTITION BY RANGE (id) (", # noqa: E501 619 | "\tPARTITION p_assumed VALUES LESS THAN MAXVALUE", 620 | ");", 621 | "ALTER TABLE `unpartitioned_new_20210421` WAIT 6 REORGANIZE " 622 | "PARTITION `p_assumed` INTO (PARTITION `p_20210421` " 623 | "VALUES LESS THAN (150), PARTITION `p_20210521` VALUES " 624 | "LESS THAN (300), PARTITION `p_20210620` VALUES LESS " 625 | "THAN MAXVALUE);", 626 | "CREATE OR REPLACE TRIGGER copy_inserts_from_" 627 | "unpartitioned_to_unpartitioned_new_20210421", 628 | "\tAFTER INSERT ON unpartitioned FOR EACH ROW", 629 | "\t\tINSERT INTO unpartitioned_new_20210421 SET", 630 | "\t\t\t`id` = NEW.`id`,", 631 | "\t\t\t`serial` = NEW.`serial`;", 632 | "CREATE OR REPLACE TRIGGER copy_updates_from_" 633 | "unpartitioned_to_unpartitioned_new_20210421", 634 | "\tAFTER UPDATE ON unpartitioned FOR EACH ROW", 635 | "\t\tUPDATE unpartitioned_new_20210421 SET", 636 | "\t\t\t`serial` = NEW.`serial`", 637 | "\t\tWHERE `id` = NEW.`id`;", 638 | ] 639 | }, 640 | ) 641 | 642 | def test_migrate_cmd_in_out(self): 643 | with tempfile.NamedTemporaryFile() as outfile, tempfile.NamedTemporaryFile( 644 | mode="w+" 645 | ) as infile: 646 | with self.assertRaises(SystemExit): 647 | PARSER.parse_args( 648 | [ 649 | "--mariadb", 650 | str(fake_exec), 651 | "migrate", 652 | "--out", 653 | outfile.name, 654 | "--in", 655 | infile.name, 656 | "--table", 657 | "flip", 658 | ] 659 | ) 660 | 661 | 662 | class TestDropCmd(unittest.TestCase): 663 | def _run_drop_cmd_yaml(self, yaml): 664 | with tempfile.NamedTemporaryFile() as tmpfile: 665 | insert_into_file(tmpfile, yaml) 666 | args = PARSER.parse_args(["--config", tmpfile.name, "drop"]) 667 | return drop_cmd(args) 668 | 669 | def test_drop_invalid_config(self): 670 | with self.assertLogs( 671 | "do_find_drops_for_tables:unused", level="WARNING" 672 | ) as logctx: 673 | self._run_drop_cmd_yaml( 674 | f""" 675 | partitionmanager: 676 | mariadb: {str(fake_exec)} 677 | tables: 678 | unused: 679 | earliest_utc_timestamp_query: > 680 | SELECT UNIX_TIMESTAMP(`issued`) FROM `unused` 681 | WHERE `id` > '?' ORDER BY `id` ASC LIMIT 1; 682 | """ 683 | ) 684 | self.assertEqual( 685 | set(logctx.output), 686 | { 687 | "WARNING:do_find_drops_for_tables:unused:" 688 | "Cannot process Table unused: no retention specified" 689 | }, 690 | ) 691 | 692 | def test_drop_no_sql(self): 693 | with self.assertLogs( 694 | "do_find_drops_for_tables:unused", level="WARNING" 695 | ) as logctx: 696 | self._run_drop_cmd_yaml( 697 | f""" 698 | partitionmanager: 699 | mariadb: {str(fake_exec)} 700 | tables: 701 | unused: 702 | retention_period: 703 | days: 180 704 | """ 705 | ) 706 | self.assertEqual( 707 | set(logctx.output), 708 | { 709 | "WARNING:do_find_drops_for_tables:unused:" 710 | "Cannot process Table unused: no date query specified" 711 | }, 712 | ) 713 | -------------------------------------------------------------------------------- /partitionmanager/database_helpers.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper functions for database operations 3 | """ 4 | 5 | from datetime import datetime, timezone 6 | import logging 7 | 8 | import partitionmanager.table_append_partition as pm_tap 9 | import partitionmanager.types 10 | 11 | 12 | def get_position_of_table(database, table, map_data): 13 | """Returns a Position of the table at the current moment.""" 14 | 15 | pos_list = pm_tap.get_current_positions(database, table, map_data["range_cols"]) 16 | 17 | cur_pos = partitionmanager.types.Position() 18 | cur_pos.set_position([pos_list[col] for col in map_data["range_cols"]]) 19 | 20 | return cur_pos 21 | 22 | 23 | def calculate_exact_timestamp_via_query(database, table, position_partition): 24 | """Calculates the exact timestamp of a PositionPartition. 25 | 26 | raises ValueError if the position is incalculable 27 | """ 28 | 29 | log = logging.getLogger(f"calculate_exact_timestamp_via_query:{table.name}") 30 | 31 | if not table.has_date_query: 32 | raise ValueError("Table has no defined date query") 33 | 34 | if not isinstance(position_partition, partitionmanager.types.PositionPartition): 35 | raise ValueError("Only PositionPartitions are supported") 36 | 37 | if len(position_partition.position) != 1: 38 | raise ValueError( 39 | "This method is only valid for single-column partitions right now" 40 | ) 41 | arg = position_partition.position.as_sql_input()[0] 42 | 43 | sql_select_cmd = table.earliest_utc_timestamp_query.get_statement_with_argument(arg) 44 | log.debug( 45 | "Executing %s to derive partition %s at position %s", 46 | sql_select_cmd, 47 | position_partition.name, 48 | position_partition.position, 49 | ) 50 | 51 | start = datetime.now(tz=timezone.utc) 52 | exact_time_result = database.run(sql_select_cmd) 53 | end = datetime.now(tz=timezone.utc) 54 | 55 | if not len(exact_time_result) == 1: 56 | raise partitionmanager.types.NoExactTimeException("No exact timestamp result") 57 | if not len(exact_time_result[0]) == 1: 58 | raise partitionmanager.types.NoExactTimeException( 59 | "Unexpected column count for the timestamp result" 60 | ) 61 | for value in exact_time_result[0].values(): 62 | exact_time = datetime.fromtimestamp(value, tz=timezone.utc) 63 | break 64 | 65 | log.debug( 66 | "Exact time of %s returned for %s at position %s, query took %s", 67 | exact_time, 68 | position_partition.name, 69 | position_partition.position, 70 | (end - start), 71 | ) 72 | return exact_time 73 | -------------------------------------------------------------------------------- /partitionmanager/database_helpers_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from .database_helpers import get_position_of_table, calculate_exact_timestamp_via_query 4 | 5 | from .types import ( 6 | DatabaseCommand, 7 | NoExactTimeException, 8 | PositionPartition, 9 | SqlInput, 10 | SqlQuery, 11 | Table, 12 | TableEmptyException, 13 | ) 14 | 15 | 16 | class MockDatabase(DatabaseCommand): 17 | def __init__(self): 18 | self._responses = [] 19 | self.num_queries = 0 20 | 21 | def add_response(self, expected, response): 22 | self._responses.insert(0, {"expected": expected, "response": response}) 23 | 24 | def run(self, cmd): 25 | self.num_queries += 1 26 | if not self._responses: 27 | raise Exception(f"No mock responses available for cmd [{cmd}]") 28 | 29 | r = self._responses.pop() 30 | if r["expected"] in cmd: 31 | return r["response"] 32 | 33 | raise Exception(f"Received command [{cmd}] and expected [{r['expected']}]") 34 | 35 | def db_name(self): 36 | return SqlInput("the-database") 37 | 38 | 39 | class TestDatabaseHelpers(unittest.TestCase): 40 | def test_position_of_table(self): 41 | db = MockDatabase() 42 | db.add_response("SELECT id FROM `burgers` ORDER BY", [{"id": 90210}]) 43 | 44 | table = Table("burgers") 45 | data = {"range_cols": ["id"]} 46 | 47 | pos = get_position_of_table(db, table, data) 48 | self.assertEqual(pos.as_list(), [90210]) 49 | 50 | def test_empty_table(self): 51 | db = MockDatabase() 52 | db.add_response("SELECT id FROM `burgers` ORDER BY", []) 53 | 54 | table = Table("burgers") 55 | data = {"range_cols": ["id"]} 56 | 57 | with self.assertRaises(TableEmptyException): 58 | get_position_of_table(db, table, data) 59 | 60 | def test_exact_timestamp_no_query(self): 61 | db = MockDatabase() 62 | db.add_response("SELECT id FROM `burgers` ORDER BY", [{"id": 42}]) 63 | 64 | table = Table("burgers") 65 | self.assertFalse(table.has_date_query) 66 | 67 | pos = PositionPartition("p_start") 68 | pos.set_position([42]) 69 | 70 | with self.assertRaises(ValueError): 71 | calculate_exact_timestamp_via_query(db, table, pos) 72 | 73 | def test_exact_timestamp(self): 74 | db = MockDatabase() 75 | db.add_response( 76 | "SELECT UNIX_TIMESTAMP(`cooked`)", [{"UNIX_TIMESTAMP": 17541339060}] 77 | ) 78 | 79 | table = Table("burgers") 80 | table.set_earliest_utc_timestamp_query( 81 | SqlQuery( 82 | "SELECT UNIX_TIMESTAMP(`cooked`) FROM `orders` " 83 | "WHERE `type` = \"burger\" AND `id` > '?' ORDER BY `id` ASC LIMIT 1;" 84 | ) 85 | ) 86 | 87 | pos = PositionPartition("p_start") 88 | pos.set_position([150]) 89 | 90 | ts = calculate_exact_timestamp_via_query(db, table, pos) 91 | assert f"{ts}" == "2525-11-11 18:11:00+00:00" 92 | 93 | def test_no_exact_timestamp(self): 94 | db = MockDatabase() 95 | db.add_response( 96 | "SELECT UNIX_TIMESTAMP(`cooked`)", 97 | [{"UNIX_TIMESTAMP": 17541339060}, {"UNIX_TIMESTAMP": 17541339070}], 98 | ) 99 | 100 | table = Table("burgers") 101 | table.set_earliest_utc_timestamp_query( 102 | SqlQuery( 103 | "SELECT UNIX_TIMESTAMP(`cooked`) FROM `orders` " 104 | "WHERE `type` = \"burger\" AND `id` > '?' ORDER BY `id` ASC LIMIT 1;" 105 | ) 106 | ) 107 | 108 | pos = PositionPartition("p_start") 109 | pos.set_position([150]) 110 | 111 | with self.assertRaises(NoExactTimeException): 112 | calculate_exact_timestamp_via_query(db, table, pos) 113 | 114 | db.add_response( 115 | "SELECT UNIX_TIMESTAMP(`cooked`)", 116 | [{"UNIX_TIMESTAMP": 17541339060, "column2": True}], 117 | ) 118 | 119 | with self.assertRaises(NoExactTimeException): 120 | calculate_exact_timestamp_via_query(db, table, pos) 121 | -------------------------------------------------------------------------------- /partitionmanager/dropper.py: -------------------------------------------------------------------------------- 1 | """ 2 | Determine which partitions can be dropped. 3 | """ 4 | 5 | import logging 6 | 7 | import partitionmanager.types 8 | import partitionmanager.tools 9 | 10 | 11 | def _drop_statement(table, partition_list): 12 | """Generate an ALTER TABLE statement to drop these partitions.""" 13 | 14 | log = logging.getLogger("get_droppable_partitions") 15 | 16 | if not partition_list: 17 | raise ValueError("Partition list may not be empty") 18 | 19 | partitions = ",".join(f"`{x.name}`" for x in partition_list) 20 | 21 | alter_cmd = f"ALTER TABLE `{table.name}` DROP PARTITION IF EXISTS {partitions};" 22 | 23 | log.debug("Yielding %s", alter_cmd) 24 | 25 | return alter_cmd 26 | 27 | 28 | def get_droppable_partitions( 29 | database, partitions, current_position, current_timestamp, table 30 | ): 31 | """Return a dictionary of partitions which can be dropped and why.""" 32 | log = logging.getLogger("get_droppable_partitions") 33 | results = {} 34 | droppable = [] 35 | 36 | if not table.retention_period: 37 | raise ValueError(f"{table.name} does not have a retention period set") 38 | 39 | if not partitions: 40 | return results 41 | 42 | if sorted(partitions) != partitions: 43 | raise ValueError(f"Supplied partitions are not correctly sorted: {partitions}") 44 | 45 | for partition, next_partition in partitionmanager.tools.pairwise(partitions): 46 | if next_partition >= current_position: 47 | log.debug( 48 | "Stopping at %s because current position %s indicates " 49 | "subsequent partition is empty", 50 | partition, 51 | current_position, 52 | ) 53 | break 54 | 55 | if isinstance(next_partition, partitionmanager.types.MaxValuePartition): 56 | log.debug("Stopping at %s because we can't handle MaxValuePartitions.") 57 | break 58 | 59 | assert isinstance(next_partition, partitionmanager.types.PositionPartition) 60 | 61 | approx_size = 0 62 | for a, b in zip( 63 | next_partition.position.as_list(), partition.position.as_list() 64 | ): 65 | approx_size += a - b 66 | 67 | try: 68 | start_time = ( 69 | partitionmanager.database_helpers.calculate_exact_timestamp_via_query( 70 | database, table, partition 71 | ) 72 | ) 73 | end_time = ( 74 | partitionmanager.database_helpers.calculate_exact_timestamp_via_query( 75 | database, table, next_partition 76 | ) 77 | ) 78 | 79 | oldest_age = current_timestamp - start_time 80 | youngest_age = current_timestamp - end_time 81 | 82 | if youngest_age > table.retention_period: 83 | results[partition.name] = { 84 | "oldest_time": f"{start_time}", 85 | "youngest_time": f"{end_time}", 86 | "oldest_position": partition.position, 87 | "youngest_position": next_partition.position, 88 | "oldest_age": f"{oldest_age}", 89 | "youngest_age": f"{youngest_age}", 90 | "approx_size": approx_size, 91 | } 92 | droppable.append(partition) 93 | except partitionmanager.types.NoExactTimeException: 94 | log.warning( 95 | "Couldn't determine exact times for %s.%s, it is probably droppable " 96 | "too.", 97 | table, 98 | partition, 99 | ) 100 | 101 | results[partition.name] = { 102 | "oldest_time": "unable to determine", 103 | "youngest_time": "unable to determine", 104 | "oldest_position": partition.position, 105 | "youngest_position": next_partition.position, 106 | "oldest_age": "unable to determine", 107 | "youngest_age": "unable to determine", 108 | "approx_size": approx_size, 109 | } 110 | droppable.append(partition) 111 | 112 | if droppable: 113 | results["drop_query"] = _drop_statement(table, droppable) 114 | 115 | return results 116 | -------------------------------------------------------------------------------- /partitionmanager/dropper_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from datetime import datetime, timedelta, timezone 3 | 4 | from .dropper import _drop_statement, get_droppable_partitions 5 | from .types import ( 6 | DatabaseCommand, 7 | PositionPartition, 8 | SqlInput, 9 | SqlQuery, 10 | Table, 11 | ) 12 | from .types_test import mkPPart, mkTailPart, mkPos 13 | 14 | 15 | def _timestamp_rsp(year, mo, day): 16 | return [ 17 | {"UNIX_TIMESTAMP": datetime(year, mo, day, tzinfo=timezone.utc).timestamp()} 18 | ] 19 | 20 | 21 | class MockDatabase(DatabaseCommand): 22 | def __init__(self): 23 | self._responses = [] 24 | self.num_queries = 0 25 | 26 | def add_response(self, expected, response): 27 | self._responses.insert(0, {"expected": expected, "response": response}) 28 | 29 | def run(self, cmd): 30 | self.num_queries += 1 31 | if not self._responses: 32 | raise Exception(f"No mock responses available for cmd [{cmd}]") 33 | 34 | r = self._responses.pop() 35 | if r["expected"] in cmd: 36 | return r["response"] 37 | 38 | raise Exception(f"Received command [{cmd}] and expected [{r['expected']}]") 39 | 40 | def db_name(self): 41 | return SqlInput("the-database") 42 | 43 | 44 | class TestDropper(unittest.TestCase): 45 | def test_drop_statement_empty(self): 46 | table = Table("burgers") 47 | parts = [] 48 | with self.assertRaises(ValueError): 49 | _drop_statement(table, parts) 50 | 51 | def test_drop_statement(self): 52 | table = Table("burgers") 53 | parts = [PositionPartition("p_start")] 54 | self.assertEqual( 55 | _drop_statement(table, parts), 56 | "ALTER TABLE `burgers` DROP PARTITION IF EXISTS `p_start`;", 57 | ) 58 | 59 | def test_get_droppable_partitions_invalid_config(self): 60 | database = MockDatabase() 61 | table = Table("burgers") 62 | partitions = [PositionPartition("p_start")] 63 | current_timestamp = datetime(2021, 1, 1, tzinfo=timezone.utc) 64 | current_position = PositionPartition("p_20210102").set_position([10]) 65 | 66 | with self.assertRaises(ValueError): 67 | get_droppable_partitions( 68 | database, partitions, current_position, current_timestamp, table 69 | ) 70 | 71 | def test_no_droppable_partitions(self): 72 | database = MockDatabase() 73 | table = Table("burgers") 74 | table.set_earliest_utc_timestamp_query( 75 | SqlQuery( 76 | "SELECT UNIX_TIMESTAMP(`cooked`) FROM `orders` " 77 | "WHERE `id` > '?' ORDER BY `id` ASC LIMIT 1;" 78 | ) 79 | ) 80 | table.set_retention_period(timedelta(days=2)) 81 | current_timestamp = datetime(2021, 1, 1, tzinfo=timezone.utc) 82 | current_position = PositionPartition("p_20210102").set_position([10]) 83 | assert {} == get_droppable_partitions( 84 | database, [], current_position, current_timestamp, table 85 | ) 86 | 87 | def test_get_droppable_partitions(self): 88 | database = MockDatabase() 89 | database.add_response("WHERE `id` > '100'", _timestamp_rsp(2021, 5, 20)) 90 | database.add_response("WHERE `id` > '200'", _timestamp_rsp(2021, 5, 27)) 91 | database.add_response("WHERE `id` > '200'", _timestamp_rsp(2021, 5, 27)) 92 | database.add_response("WHERE `id` > '300'", _timestamp_rsp(2021, 6, 3)) 93 | database.add_response("WHERE `id` > '300'", _timestamp_rsp(2021, 6, 3)) 94 | database.add_response("WHERE `id` > '400'", _timestamp_rsp(2021, 6, 10)) 95 | database.add_response("WHERE `id` > '400'", _timestamp_rsp(2021, 6, 10)) 96 | database.add_response("WHERE `id` > '500'", _timestamp_rsp(2021, 6, 17)) 97 | 98 | table = Table("burgers") 99 | table.set_earliest_utc_timestamp_query( 100 | SqlQuery( 101 | "SELECT UNIX_TIMESTAMP(`cooked`) FROM `orders` " 102 | "WHERE `id` > '?' ORDER BY `id` ASC LIMIT 1;" 103 | ) 104 | ) 105 | current_timestamp = datetime(2021, 7, 1, tzinfo=timezone.utc) 106 | 107 | partitions = [ 108 | mkPPart("1", 100), 109 | mkPPart("2", 200), 110 | mkPPart("3", 300), 111 | mkPPart("4", 400), 112 | mkPPart("5", 500), 113 | mkPPart("6", 600), 114 | mkTailPart("z"), 115 | ] 116 | current_position = mkPos(340) 117 | 118 | table.set_retention_period(timedelta(days=2)) 119 | results = get_droppable_partitions( 120 | database, partitions, current_position, current_timestamp, table 121 | ) 122 | self.assertEqual( 123 | results["drop_query"], 124 | "ALTER TABLE `burgers` DROP PARTITION IF EXISTS `1`,`2`;", 125 | ) 126 | self.assertEqual(results["1"]["oldest_time"], "2021-05-20 00:00:00+00:00") 127 | self.assertEqual(results["1"]["youngest_time"], "2021-05-27 00:00:00+00:00") 128 | self.assertEqual(results["1"]["oldest_position"].as_list(), [100]) 129 | self.assertEqual(results["1"]["youngest_position"].as_list(), [200]) 130 | self.assertEqual(results["1"]["oldest_age"], "42 days, 0:00:00") 131 | self.assertEqual(results["1"]["youngest_age"], "35 days, 0:00:00") 132 | self.assertEqual(results["1"]["approx_size"], 100) 133 | 134 | self.assertEqual(results["2"]["oldest_time"], "2021-05-27 00:00:00+00:00") 135 | self.assertEqual(results["2"]["youngest_time"], "2021-06-03 00:00:00+00:00") 136 | self.assertEqual(results["2"]["oldest_position"].as_list(), [200]) 137 | self.assertEqual(results["2"]["youngest_position"].as_list(), [300]) 138 | self.assertEqual(results["2"]["oldest_age"], "35 days, 0:00:00") 139 | self.assertEqual(results["2"]["youngest_age"], "28 days, 0:00:00") 140 | self.assertEqual(results["2"]["approx_size"], 100) 141 | 142 | def test_get_droppable_partitions_out_of_order(self): 143 | database = MockDatabase() 144 | 145 | table = Table("burgers") 146 | table.set_earliest_utc_timestamp_query( 147 | SqlQuery( 148 | "SELECT UNIX_TIMESTAMP(`cooked`) FROM `orders` " 149 | "WHERE `id` > '?' ORDER BY `id` ASC LIMIT 1;" 150 | ) 151 | ) 152 | current_timestamp = datetime(2021, 7, 1, tzinfo=timezone.utc) 153 | 154 | partitions = [ 155 | mkPPart("2", 200), 156 | mkPPart("1", 100), 157 | mkPPart("3", 300), 158 | mkTailPart("z"), 159 | ] 160 | current_position = mkPos(140) 161 | table.set_retention_period(timedelta(days=2)) 162 | 163 | with self.assertRaises(ValueError): 164 | get_droppable_partitions( 165 | database, partitions, current_position, current_timestamp, table 166 | ) 167 | 168 | def test_drop_nothing_to_do(self): 169 | database = MockDatabase() 170 | database.add_response("WHERE `id` > '100'", _timestamp_rsp(2021, 5, 1)) 171 | database.add_response("WHERE `id` > '200'", _timestamp_rsp(2021, 5, 8)) 172 | database.add_response("WHERE `id` > '200'", _timestamp_rsp(2021, 5, 8)) 173 | database.add_response("WHERE `id` > '300'", _timestamp_rsp(2021, 5, 19)) 174 | database.add_response("WHERE `id` > '300'", _timestamp_rsp(2021, 5, 19)) 175 | database.add_response("WHERE `id` > '400'", _timestamp_rsp(2021, 5, 24)) 176 | 177 | table = Table("burgers") 178 | table.set_earliest_utc_timestamp_query( 179 | SqlQuery( 180 | "SELECT UNIX_TIMESTAMP(`cooked`) FROM `orders` " 181 | "WHERE `id` > '?' ORDER BY `id` ASC LIMIT 1;" 182 | ) 183 | ) 184 | current_timestamp = datetime(2021, 6, 1, tzinfo=timezone.utc) 185 | 186 | partitions = [ 187 | mkPPart("1", 100), 188 | mkPPart("2", 200), 189 | mkPPart("3", 300), 190 | mkPPart("4", 400), 191 | mkPPart("5", 500), 192 | mkPPart("6", 600), 193 | mkTailPart("z"), 194 | ] 195 | current_position = mkPos(340) 196 | 197 | table.set_retention_period(timedelta(days=30)) 198 | results = get_droppable_partitions( 199 | database, partitions, current_position, current_timestamp, table 200 | ) 201 | self.assertNotIn("drop_query", results) 202 | 203 | 204 | def test_get_droppable_partitions_no_exact_times(caplog): 205 | database = MockDatabase() 206 | resp = _timestamp_rsp(2021, 5, 20) 207 | resp.extend(_timestamp_rsp(2021, 5, 21)) 208 | database.add_response("WHERE `id` > '100'", resp) 209 | database.add_response("WHERE `id` > '200'", _timestamp_rsp(2021, 5, 27)) 210 | 211 | table = Table("burgers") 212 | table.set_earliest_utc_timestamp_query( 213 | SqlQuery( 214 | "SELECT UNIX_TIMESTAMP(`cooked`) FROM `orders` " 215 | "WHERE `id` > '?' ORDER BY `id` ASC LIMIT 1;" 216 | ) 217 | ) 218 | current_timestamp = datetime(2021, 7, 1, tzinfo=timezone.utc) 219 | 220 | partitions = [ 221 | mkPPart("1", 100), 222 | mkPPart("2", 200), 223 | mkTailPart("z"), 224 | ] 225 | current_position = mkPos(340) 226 | 227 | table.set_retention_period(timedelta(days=2)) 228 | 229 | get_droppable_partitions( 230 | database, partitions, current_position, current_timestamp, table 231 | ) 232 | assert ( 233 | "Couldn't determine exact times for Table burgers.1: (100), it is probably " 234 | "droppable too." in caplog.messages 235 | ) 236 | -------------------------------------------------------------------------------- /partitionmanager/migrate.py: -------------------------------------------------------------------------------- 1 | """ 2 | Bootstrap a table that does not have sufficient partitions to determine rates 3 | of change. 4 | """ 5 | 6 | from datetime import timedelta 7 | import logging 8 | import operator 9 | import yaml 10 | 11 | import partitionmanager.table_append_partition as pm_tap 12 | import partitionmanager.tools 13 | import partitionmanager.types 14 | 15 | RATE_UNIT = timedelta(hours=1) 16 | MINIMUM_FUTURE_DELTA = timedelta(hours=2) 17 | 18 | 19 | def _override_config_to_map_data(conf): 20 | """Return an analog to get_partition_map from override data in conf""" 21 | return { 22 | "range_cols": [str(x) for x in conf.assume_partitioned_on], 23 | "partitions": [ 24 | partitionmanager.types.MaxValuePartition( 25 | "p_assumed", count=len(conf.assume_partitioned_on) 26 | ) 27 | ], 28 | } 29 | 30 | 31 | def _get_map_data_from_config(conf, table): 32 | """Helper to return a partition map for the table, either directly or 33 | from a configuration override.""" 34 | if not conf.assume_partitioned_on: 35 | problems = pm_tap.get_table_compatibility_problems(conf.dbcmd, table) 36 | if problems: 37 | raise Exception("; ".join(problems)) 38 | return pm_tap.get_partition_map(conf.dbcmd, table) 39 | 40 | return _override_config_to_map_data(conf) 41 | 42 | 43 | def write_state_info(conf, out_fp): 44 | """ 45 | Write the state info for tables defined in conf to the provided file-like 46 | object. 47 | """ 48 | log = logging.getLogger("write_state_info") 49 | 50 | log.info("Writing current state information") 51 | state_info = {"time": conf.curtime, "tables": {}} 52 | for table in conf.tables: 53 | map_data = _get_map_data_from_config(conf, table) 54 | 55 | positions = pm_tap.get_current_positions( 56 | conf.dbcmd, table, map_data["range_cols"] 57 | ) 58 | 59 | log.info(f'(Table("{table.name}"): {positions}),') 60 | state_info["tables"][str(table.name)] = positions 61 | 62 | yaml.dump(state_info, out_fp) 63 | 64 | 65 | def _get_time_offsets(num_entries, first_delta, subseq_delta): 66 | """ 67 | Construct a list of timedeltas of size num_entries of the form 68 | [ first_delta, subseq_delta, [subseq_delta...] ] 69 | """ 70 | if num_entries < 1: 71 | raise ValueError("Must request at least one entry") 72 | 73 | time_units = [first_delta] 74 | while len(time_units) < num_entries: 75 | prev = time_units[-1] 76 | time_units.append(prev + subseq_delta) 77 | return time_units 78 | 79 | 80 | def _plan_partitions_for_time_offsets( 81 | now_time, time_offsets, rate_of_change, ordered_current_pos, max_val_part 82 | ): 83 | """ 84 | Return a list of PlannedPartitions whose positions are predicted to 85 | lie upon the supplied time_offsets, given the initial conditions supplied 86 | in the other parameters. 87 | 88 | types: 89 | time_offsets: an ordered list of timedeltas to plan to reach 90 | 91 | rate_of_change: an ordered list of positions per RATE_UNIT. 92 | """ 93 | changes = [] 94 | for (i, offset), is_final in partitionmanager.tools.iter_show_end( 95 | enumerate(time_offsets) 96 | ): 97 | increase = [x * (offset / RATE_UNIT) for x in rate_of_change] 98 | predicted_positions = [ 99 | int(p + i) for p, i in zip(ordered_current_pos, increase) 100 | ] 101 | predicted_time = now_time + offset 102 | 103 | part = None 104 | if i == 0: 105 | part = ( 106 | partitionmanager.types.ChangePlannedPartition(max_val_part) 107 | .set_position(predicted_positions) 108 | .set_timestamp(predicted_time) 109 | ) 110 | 111 | else: 112 | part = partitionmanager.types.NewPlannedPartition().set_timestamp( 113 | predicted_time 114 | ) 115 | 116 | if is_final: 117 | part.set_columns(len(predicted_positions)) 118 | else: 119 | part.set_position(predicted_positions) 120 | 121 | changes.append(part) 122 | return changes 123 | 124 | 125 | def _suffix(lines, *, indent="", mid_suffix="", final_suffix=""): 126 | """Helper that suffixes each line with either mid- or final- suffix""" 127 | for line, is_final in partitionmanager.tools.iter_show_end(lines): 128 | if is_final: 129 | yield indent + line + final_suffix 130 | else: 131 | yield indent + line + mid_suffix 132 | 133 | 134 | def _trigger_column_copies(cols): 135 | """Helper that returns lines copying each column for a trigger.""" 136 | for c in cols: 137 | yield f"`{c}` = NEW.`{c}`" 138 | 139 | 140 | def _make_trigger_name(name): 141 | """Helper that enforces the trigger must be <= 64 chars""" 142 | return name[:64] 143 | 144 | 145 | def _generate_sql_copy_commands( 146 | existing_table, map_data, columns, new_table, alter_commands_iter 147 | ): 148 | """Generate a series of SQL commands to start a copy of the existing_table 149 | to a new_table, applying the supplied alterations before starting the 150 | triggers.""" 151 | log = logging.getLogger( 152 | f"_generate_sql_copy_commands:{existing_table.name} to {new_table.name}" 153 | ) 154 | 155 | max_val_part = map_data["partitions"][-1] 156 | if not isinstance(max_val_part, partitionmanager.types.MaxValuePartition): 157 | msg = f"Expected a MaxValue partition, got {max_val_part}" 158 | log.error(msg) 159 | raise Exception(msg) 160 | 161 | range_id_string = ", ".join(map_data["range_cols"]) 162 | 163 | if len(map_data["range_cols"]) == 1: 164 | range_cols_string = "RANGE" 165 | max_val_string = "MAXVALUE" 166 | else: 167 | num_cols = len(map_data["range_cols"]) 168 | range_cols_string = "RANGE COLUMNS" 169 | max_val_string = "(" + ", ".join(["MAXVALUE"] * num_cols) + ")" 170 | 171 | yield f"DROP TABLE IF EXISTS {new_table.name};" 172 | yield f"CREATE TABLE {new_table.name} LIKE {existing_table.name};" 173 | yield f"ALTER TABLE {new_table.name} REMOVE PARTITIONING;" 174 | yield ( 175 | f"ALTER TABLE {new_table.name} PARTITION BY {range_cols_string} " 176 | f"({range_id_string}) (" 177 | ) 178 | yield f"\tPARTITION {max_val_part.name} VALUES LESS THAN {max_val_string}" 179 | yield ");" 180 | 181 | yield from alter_commands_iter 182 | 183 | cols = set(columns) 184 | 185 | inserts_trigger_name = _make_trigger_name( 186 | f"copy_inserts_from_{existing_table.name}_to_{new_table.name}" 187 | ) 188 | 189 | yield f"CREATE OR REPLACE TRIGGER {inserts_trigger_name}" 190 | yield f"\tAFTER INSERT ON {existing_table.name} FOR EACH ROW" 191 | yield f"\t\tINSERT INTO {new_table.name} SET" 192 | 193 | for line in _suffix( 194 | _trigger_column_copies(sorted(cols)), 195 | indent="\t\t\t", 196 | mid_suffix=",", 197 | final_suffix=";", 198 | ): 199 | yield line 200 | 201 | update_columns = cols.difference(set(map_data["range_cols"])) 202 | if not update_columns: 203 | log.info("No columns to copy, so no UPDATE trigger being constructed.") 204 | return 205 | 206 | updates_trigger_name = _make_trigger_name( 207 | f"copy_updates_from_{existing_table.name}_to_{new_table.name}" 208 | ) 209 | 210 | yield f"CREATE OR REPLACE TRIGGER {updates_trigger_name}" 211 | yield f"\tAFTER UPDATE ON {existing_table.name} FOR EACH ROW" 212 | yield f"\t\tUPDATE {new_table.name} SET" 213 | 214 | for line in _suffix( 215 | _trigger_column_copies(sorted(update_columns)), indent="\t\t\t", mid_suffix="," 216 | ): 217 | yield line 218 | 219 | yield ( 220 | "\t\tWHERE " 221 | + " AND ".join(_trigger_column_copies(map_data["range_cols"])) 222 | + ";" 223 | ) 224 | 225 | return 226 | 227 | 228 | def calculate_sql_alters_from_state_info(conf, in_fp): 229 | """ 230 | Using the config and the input yaml file-like object, return the SQL 231 | statements to bootstrap the tables in config that also have data in 232 | the input yaml as a dictionary of { Table -> list(SQL ALTER statements) } 233 | """ 234 | log = logging.getLogger("calculate_sql_alters") 235 | 236 | log.info("Reading prior state information") 237 | prior_data = yaml.safe_load(in_fp) 238 | 239 | time_delta = (conf.curtime - prior_data["time"]) / RATE_UNIT 240 | if time_delta <= 0: 241 | raise ValueError( 242 | f"Time delta is too small: {conf.curtime} - " 243 | f"{prior_data['time']} = {time_delta}" 244 | ) 245 | 246 | commands = {} 247 | 248 | for table_name, prior_pos in prior_data["tables"].items(): 249 | table = None 250 | for t in conf.tables: 251 | if t.name == table_name: 252 | table = t 253 | if not table: 254 | log.info(f"Skipping {table_name} as it is not in the current config") 255 | continue 256 | 257 | map_data = _get_map_data_from_config(conf, table) 258 | 259 | current_positions = pm_tap.get_current_positions( 260 | conf.dbcmd, table, map_data["range_cols"] 261 | ) 262 | 263 | columns = [r["Field"] for r in pm_tap.get_columns(conf.dbcmd, table)] 264 | 265 | ordered_current_pos = [ 266 | current_positions[name] for name in map_data["range_cols"] 267 | ] 268 | ordered_prior_pos = [prior_pos[name] for name in map_data["range_cols"]] 269 | 270 | delta_positions = list( 271 | map(operator.sub, ordered_current_pos, ordered_prior_pos) 272 | ) 273 | rate_of_change = [pos / time_delta for pos in delta_positions] 274 | 275 | max_val_part = map_data["partitions"][-1] 276 | if not isinstance(max_val_part, partitionmanager.types.MaxValuePartition): 277 | log.error(f"Expected a MaxValue partition, got {max_val_part}") 278 | raise Exception("Unexpected part?") 279 | 280 | log.info( 281 | f"{table}, {time_delta:0.1f} hours, {ordered_prior_pos} - " 282 | f"{ordered_current_pos}, {delta_positions} pos_change, " 283 | f"{rate_of_change}/hour" 284 | ) 285 | 286 | part_duration = conf.partition_period 287 | if table.partition_period: 288 | part_duration = table.partition_period 289 | 290 | # Choose the times for each partition that we are configured to 291 | # construct, beginning in the near future (see MINIMUM_FUTURE_DELTA), 292 | # to provide a quick changeover into the new partition schema. 293 | time_offsets = _get_time_offsets( 294 | 1 + conf.num_empty, MINIMUM_FUTURE_DELTA, part_duration 295 | ) 296 | 297 | changes = _plan_partitions_for_time_offsets( 298 | conf.curtime, 299 | time_offsets, 300 | rate_of_change, 301 | ordered_current_pos, 302 | max_val_part, 303 | ) 304 | 305 | table_new = partitionmanager.types.Table( 306 | f"{table.name}_new_{conf.curtime:%Y%m%d}" 307 | ) 308 | 309 | alter_commands_iter = pm_tap.generate_sql_reorganize_partition_commands( 310 | table_new, changes 311 | ) 312 | 313 | commands[table.name] = list( 314 | _generate_sql_copy_commands( 315 | table, map_data, columns, table_new, alter_commands_iter 316 | ) 317 | ) 318 | return commands 319 | -------------------------------------------------------------------------------- /partitionmanager/migrate_test.py: -------------------------------------------------------------------------------- 1 | import io 2 | import unittest 3 | import yaml 4 | from datetime import datetime, timedelta, timezone 5 | 6 | from .migrate import ( 7 | _generate_sql_copy_commands, 8 | _get_time_offsets, 9 | _suffix, 10 | _trigger_column_copies, 11 | _override_config_to_map_data, 12 | _plan_partitions_for_time_offsets, 13 | calculate_sql_alters_from_state_info, 14 | write_state_info, 15 | ) 16 | from .cli import Config 17 | from .types import ( 18 | DatabaseCommand, 19 | Table, 20 | SqlInput, 21 | MaxValuePartition, 22 | ChangePlannedPartition, 23 | NewPlannedPartition, 24 | ) 25 | 26 | 27 | class MockDatabase(DatabaseCommand): 28 | def __init__(self): 29 | self._response = [] 30 | self._select_response = [[{"id": 150}]] 31 | self.num_queries = 0 32 | 33 | def run(self, cmd): 34 | self.num_queries += 1 35 | 36 | if "CREATE_OPTIONS" in cmd: 37 | return [{"CREATE_OPTIONS": "partitioned"}] 38 | 39 | if "SHOW CREATE TABLE" in cmd: 40 | return [ 41 | { 42 | "Create Table": """CREATE TABLE `burgers` ( 43 | `id` bigint(20) NOT NULL AUTO_INCREMENT, 44 | PRIMARY KEY (`id`), 45 | ) ENGINE=InnoDB AUTO_INCREMENT=150 DEFAULT CHARSET=utf8 46 | PARTITION BY RANGE (`id`) 47 | (PARTITION `p_start` VALUES LESS THAN MAXVALUE ENGINE = InnoDB)""" 48 | } 49 | ] 50 | 51 | if "SELECT" in cmd: 52 | return self._select_response.pop() 53 | 54 | return self._response.pop() 55 | 56 | def db_name(self): 57 | return SqlInput("the-database") 58 | 59 | 60 | class TestBootstrapTool(unittest.TestCase): 61 | def test_writing_state_info(self): 62 | conf = Config() 63 | conf.curtime = datetime(2021, 3, 1, tzinfo=timezone.utc) 64 | conf.dbcmd = MockDatabase() 65 | conf.tables = [Table("test")] 66 | 67 | out = io.StringIO() 68 | 69 | write_state_info(conf, out) 70 | 71 | written_yaml = yaml.safe_load(out.getvalue()) 72 | 73 | self.assertEqual( 74 | written_yaml, {"tables": {"test": {"id": 150}}, "time": conf.curtime} 75 | ) 76 | 77 | def test_get_time_offsets(self): 78 | self.assertEqual( 79 | _get_time_offsets(1, timedelta(hours=4), timedelta(days=30)), 80 | [timedelta(hours=4)], 81 | ) 82 | 83 | self.assertEqual( 84 | _get_time_offsets(2, timedelta(hours=4), timedelta(days=30)), 85 | [timedelta(hours=4), timedelta(days=30, hours=4)], 86 | ) 87 | 88 | self.assertEqual( 89 | _get_time_offsets(3, timedelta(hours=4), timedelta(days=30)), 90 | [ 91 | timedelta(hours=4), 92 | timedelta(days=30, hours=4), 93 | timedelta(days=60, hours=4), 94 | ], 95 | ) 96 | 97 | def test_read_state_info(self): 98 | self.maxDiff = None 99 | conf_past = Config() 100 | conf_past.curtime = datetime(2021, 3, 1, tzinfo=timezone.utc) 101 | conf_past.dbcmd = MockDatabase() 102 | conf_past.tables = [Table("test").set_partition_period(timedelta(days=30))] 103 | 104 | state_fs = io.StringIO() 105 | yaml.dump({"tables": {"test": {"id": 0}}, "time": conf_past.curtime}, state_fs) 106 | state_fs.seek(0) 107 | 108 | with self.assertRaises(ValueError): 109 | calculate_sql_alters_from_state_info(conf_past, state_fs) 110 | 111 | conf_now = Config() 112 | conf_now.curtime = datetime(2021, 3, 3, tzinfo=timezone.utc) 113 | conf_now.dbcmd = MockDatabase() 114 | conf_now.dbcmd._response = [ 115 | [ 116 | {"Field": "id", "Type": "bigint UNSIGNED"}, 117 | {"Field": "serial", "Type": "varchar"}, 118 | ] 119 | ] 120 | conf_now.tables = [Table("test").set_partition_period(timedelta(days=30))] 121 | 122 | state_fs.seek(0) 123 | x = calculate_sql_alters_from_state_info(conf_now, state_fs) 124 | self.assertEqual( 125 | x, 126 | { 127 | "test": [ 128 | "DROP TABLE IF EXISTS test_new_20210303;", 129 | "CREATE TABLE test_new_20210303 LIKE test;", 130 | "ALTER TABLE test_new_20210303 REMOVE PARTITIONING;", 131 | "ALTER TABLE test_new_20210303 PARTITION BY RANGE (id) (", 132 | "\tPARTITION p_start VALUES LESS THAN MAXVALUE", 133 | ");", 134 | "ALTER TABLE `test_new_20210303` WAIT 6 REORGANIZE PARTITION `p_start` " # noqa: E501 135 | "INTO (PARTITION `p_20210303` VALUES LESS THAN (156), " 136 | "PARTITION `p_20210402` VALUES LESS THAN (2406), PARTITION " 137 | "`p_20210502` VALUES LESS THAN MAXVALUE);", 138 | "CREATE OR REPLACE TRIGGER copy_inserts_from_test_to_test_new_20210303", # noqa: E501 139 | "\tAFTER INSERT ON test FOR EACH ROW", 140 | "\t\tINSERT INTO test_new_20210303 SET", 141 | "\t\t\t`id` = NEW.`id`,", 142 | "\t\t\t`serial` = NEW.`serial`;", 143 | "CREATE OR REPLACE TRIGGER copy_updates_from_test_to_test_new_20210303", # noqa: E501 144 | "\tAFTER UPDATE ON test FOR EACH ROW", 145 | "\t\tUPDATE test_new_20210303 SET", 146 | "\t\t\t`serial` = NEW.`serial`", 147 | "\t\tWHERE `id` = NEW.`id`;", 148 | ] 149 | }, 150 | ) 151 | 152 | def test_read_state_info_map_table(self): 153 | self.maxDiff = None 154 | conf = Config() 155 | conf.assume_partitioned_on = ["orderID", "authzID"] 156 | conf.curtime = datetime(2021, 3, 3, tzinfo=timezone.utc) 157 | conf.dbcmd = MockDatabase() 158 | conf.dbcmd._select_response = [[{"authzID": 22}], [{"orderID": 11}]] 159 | conf.dbcmd._response = [ 160 | [ 161 | {"Field": "orderID", "Type": "bigint UNSIGNED"}, 162 | {"Field": "authzID", "Type": "bigint UNSIGNED"}, 163 | ] 164 | ] 165 | conf.tables = [Table("map_table").set_partition_period(timedelta(days=30))] 166 | 167 | state_fs = io.StringIO() 168 | yaml.dump( 169 | { 170 | "tables": {"map_table": {"orderID": 10, "authzID": 20}}, 171 | "time": (conf.curtime - timedelta(days=1)), 172 | }, 173 | state_fs, 174 | ) 175 | state_fs.seek(0) 176 | 177 | x = calculate_sql_alters_from_state_info(conf, state_fs) 178 | print(x) 179 | self.assertEqual( 180 | x, 181 | { 182 | "map_table": [ 183 | "DROP TABLE IF EXISTS map_table_new_20210303;", 184 | "CREATE TABLE map_table_new_20210303 LIKE map_table;", 185 | "ALTER TABLE map_table_new_20210303 REMOVE PARTITIONING;", 186 | "ALTER TABLE map_table_new_20210303 PARTITION BY RANGE " 187 | "COLUMNS (orderID, authzID) (", 188 | "\tPARTITION p_assumed VALUES LESS THAN (MAXVALUE, MAXVALUE)", 189 | ");", 190 | "ALTER TABLE `map_table_new_20210303` WAIT 6 REORGANIZE PARTITION " 191 | "`p_assumed` INTO (PARTITION `p_20210303` VALUES LESS THAN " 192 | "(11, 22), PARTITION `p_20210402` VALUES LESS THAN " 193 | "(41, 82), PARTITION `p_20210502` VALUES LESS THAN " 194 | "(MAXVALUE, MAXVALUE));", 195 | "CREATE OR REPLACE TRIGGER copy_inserts_from_map_table_" 196 | "to_map_table_new_20210303", 197 | "\tAFTER INSERT ON map_table FOR EACH ROW", 198 | "\t\tINSERT INTO map_table_new_20210303 SET", 199 | "\t\t\t`authzID` = NEW.`authzID`,", 200 | "\t\t\t`orderID` = NEW.`orderID`;", 201 | ] 202 | }, 203 | ) 204 | 205 | def test_trigger_column_copies(self): 206 | self.assertEqual(list(_trigger_column_copies([])), []) 207 | self.assertEqual(list(_trigger_column_copies(["a"])), ["`a` = NEW.`a`"]) 208 | self.assertEqual( 209 | list(_trigger_column_copies(["b", "a", "c"])), 210 | ["`b` = NEW.`b`", "`a` = NEW.`a`", "`c` = NEW.`c`"], 211 | ) 212 | 213 | def test_suffix(self): 214 | self.assertEqual(list(_suffix(["a"])), ["a"]) 215 | self.assertEqual(list(_suffix(["a", "b"])), ["a", "b"]) 216 | self.assertEqual(list(_suffix(["a", "b"], indent=" ")), [" a", " b"]) 217 | self.assertEqual(list(_suffix(["a", "b"], mid_suffix=",")), ["a,", "b"]) 218 | self.assertEqual(list(_suffix(["a", "b"], final_suffix=";")), ["a", "b;"]) 219 | self.assertEqual( 220 | list(_suffix(["a", "b"], mid_suffix=",", final_suffix=";")), ["a,", "b;"] 221 | ) 222 | 223 | def test_generate_sql_copy_commands(self): 224 | conf = Config() 225 | conf.assume_partitioned_on = ["id"] 226 | conf.curtime = datetime(2021, 3, 3, tzinfo=timezone.utc) 227 | conf.dbcmd = MockDatabase() 228 | map_data = _override_config_to_map_data(conf) 229 | cmds = list( 230 | _generate_sql_copy_commands( 231 | Table("old"), 232 | map_data, 233 | ["id", "field"], 234 | Table("new"), 235 | ["STRAIGHT_UP_INSERTED", "STUFF GOES HERE"], 236 | ) 237 | ) 238 | 239 | print(cmds) 240 | self.assertEqual( 241 | cmds, 242 | [ 243 | "DROP TABLE IF EXISTS new;", 244 | "CREATE TABLE new LIKE old;", 245 | "ALTER TABLE new REMOVE PARTITIONING;", 246 | "ALTER TABLE new PARTITION BY RANGE (id) (", 247 | "\tPARTITION p_assumed VALUES LESS THAN MAXVALUE", 248 | ");", 249 | "STRAIGHT_UP_INSERTED", 250 | "STUFF GOES HERE", 251 | "CREATE OR REPLACE TRIGGER copy_inserts_from_old_to_new", 252 | "\tAFTER INSERT ON old FOR EACH ROW", 253 | "\t\tINSERT INTO new SET", 254 | "\t\t\t`field` = NEW.`field`,", 255 | "\t\t\t`id` = NEW.`id`;", 256 | "CREATE OR REPLACE TRIGGER copy_updates_from_old_to_new", 257 | "\tAFTER UPDATE ON old FOR EACH ROW", 258 | "\t\tUPDATE new SET", 259 | "\t\t\t`field` = NEW.`field`", 260 | "\t\tWHERE `id` = NEW.`id`;", 261 | ], 262 | ) 263 | 264 | def test_plan_partitions_for_time_offsets(self): 265 | parts = _plan_partitions_for_time_offsets( 266 | datetime(2021, 3, 3, tzinfo=timezone.utc), 267 | [timedelta(days=60), timedelta(days=360)], 268 | [11943234], 269 | [16753227640], 270 | MaxValuePartition("p_assumed", count=1), 271 | ) 272 | self.assertIsInstance(parts[0], ChangePlannedPartition) 273 | self.assertIsInstance(parts[1], NewPlannedPartition) 274 | -------------------------------------------------------------------------------- /partitionmanager/sql.py: -------------------------------------------------------------------------------- 1 | """ 2 | Interact with SQL databases. 3 | """ 4 | 5 | from collections import defaultdict 6 | import logging 7 | import subprocess 8 | import xml.parsers.expat 9 | 10 | import partitionmanager.types 11 | 12 | 13 | def _destring(text): 14 | """Try and get a python type from a string. Used for SQL results.""" 15 | try: 16 | return int(text) 17 | except ValueError: 18 | pass 19 | try: 20 | return float(text) 21 | except ValueError: 22 | pass 23 | return text 24 | 25 | 26 | class XmlResult: 27 | """Parses XML results from the mariadb CLI client. 28 | 29 | The general schema is: 30 | 31 | 32 | data if any 33 | 34 | 35 | 36 | The major hangups are that field can be nil, and field can also be 37 | of arbitrary size. 38 | """ 39 | 40 | def __init__(self): 41 | self.logger = logging.getLogger("xml") 42 | 43 | # The XML debugging is a little much, normally. If we're debugging 44 | # the parser, comment this out or set it to DEBUG. 45 | self.logger.setLevel("INFO") 46 | 47 | self.xmlparser = xml.parsers.expat.ParserCreate() 48 | 49 | self.xmlparser.StartElementHandler = self._start_element 50 | self.xmlparser.EndElementHandler = self._end_element 51 | self.xmlparser.CharacterDataHandler = self._char_data 52 | 53 | self.rows = None 54 | self.current_row = None 55 | self.current_field = None 56 | self.current_elements = [] 57 | self.statement = None 58 | 59 | def parse(self, data): 60 | """Return rows from an XML Result object.""" 61 | if self.rows is not None: 62 | raise ValueError("XmlResult objects can only be used once") 63 | 64 | self.rows = [] 65 | self.xmlparser.Parse(data) 66 | 67 | if self.current_elements: 68 | raise partitionmanager.types.TruncatedDatabaseResultException( 69 | f"These XML tags are unclosed: {self.current_elements}" 70 | ) 71 | return self.rows 72 | 73 | def _start_element(self, name, attrs): 74 | self.logger.debug( 75 | f"Element start: {name} {attrs} (Current elements: {self.current_elements}" 76 | ) 77 | self.current_elements.append(name) 78 | 79 | if name == "resultset": 80 | self.statement = attrs["statement"] 81 | elif name == "row": 82 | assert self.current_row is None 83 | self.current_row = defaultdict(str) 84 | elif name == "field": 85 | assert self.current_field is None 86 | self.current_field = attrs["name"] 87 | if "xsi:nil" in attrs and attrs["xsi:nil"] == "true": 88 | self.current_row[attrs["name"]] = None 89 | 90 | def _end_element(self, name): 91 | self.logger.debug( 92 | f"Element end: {name} (Current elements: {self.current_elements}" 93 | ) 94 | assert name == self.current_elements.pop() 95 | 96 | if name == "row": 97 | self.rows.append(self.current_row) 98 | self.current_row = None 99 | elif name == "field": 100 | assert self.current_field is not None 101 | value = self.current_row[self.current_field] 102 | if value: 103 | self.current_row[self.current_field] = _destring(value) 104 | self.current_field = None 105 | 106 | def _char_data(self, data): 107 | if self.current_elements[-1] == "field": 108 | assert self.current_field is not None 109 | assert self.current_row is not None 110 | 111 | self.current_row[self.current_field] += data 112 | 113 | 114 | class SubprocessDatabaseCommand(partitionmanager.types.DatabaseCommand): 115 | """Run a database command via the CLI tool, getting the results in XML form. 116 | 117 | This can be very convenient without explicit port-forwarding, but is a 118 | little slow. 119 | """ 120 | 121 | def __init__(self, exe): 122 | self.exe = exe 123 | 124 | def run(self, sql_cmd): 125 | logging.debug(f"SubprocessDatabaseCommand executing {sql_cmd}") 126 | try: 127 | result = subprocess.run( 128 | [self.exe, "-X"], 129 | input=sql_cmd, 130 | stdout=subprocess.PIPE, 131 | stderr=subprocess.DEVNULL, 132 | encoding="UTF-8", 133 | check=True, 134 | ) 135 | return XmlResult().parse(result.stdout) 136 | except subprocess.CalledProcessError as cpe: 137 | logging.error( 138 | "SubprocessDatabaseCommand failed, error code %d", cpe.returncode 139 | ) 140 | logging.error("stdout: %s", cpe.stdout) 141 | logging.error("stderr: %s", cpe.stderr) 142 | raise partitionmanager.types.DatabaseCommandException(sql_cmd) 143 | 144 | def db_name(self): 145 | rows = self.run("SELECT DATABASE();") 146 | if len(rows) != 1: 147 | raise partitionmanager.types.TableInformationException( 148 | "Expected one result" 149 | ) 150 | return partitionmanager.types.SqlInput(rows[0]["DATABASE()"]) 151 | 152 | 153 | class IntegratedDatabaseCommand(partitionmanager.types.DatabaseCommand): 154 | """Run a database command via a direct socket connection and pymysql. 155 | 156 | Pymysql is a pure Python PEP 249-compliant database connector. 157 | """ 158 | 159 | def __init__(self, url): 160 | try: 161 | import pymysql 162 | import pymysql.cursors 163 | except ModuleNotFoundError as mnfe: 164 | logging.fatal("You cannot use --dburl without the pymysql package.") 165 | raise mnfe 166 | 167 | self.db = None 168 | if url.path and url.path != "/": 169 | self.db = url.path.lstrip("/") 170 | if not self.db: 171 | raise Exception("You must supply a database name") 172 | 173 | self.connection = pymysql.connect( 174 | host=url.hostname, 175 | port=url.port, 176 | user=url.username, 177 | password=url.password, 178 | database=self.db, 179 | cursorclass=pymysql.cursors.DictCursor, 180 | ) 181 | 182 | def db_name(self): 183 | return partitionmanager.types.SqlInput(self.db) 184 | 185 | def run(self, sql_cmd): 186 | logging.debug(f"IntegratedDatabaseCommand executing {sql_cmd}") 187 | with self.connection.cursor() as cursor: 188 | cursor.execute(sql_cmd) 189 | return list(cursor) 190 | -------------------------------------------------------------------------------- /partitionmanager/sql_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from .sql import _destring, XmlResult 3 | from .types import TruncatedDatabaseResultException 4 | 5 | 6 | class TestSubprocessParsing(unittest.TestCase): 7 | def test_destring(self): 8 | self.assertEqual(_destring("not a number"), "not a number") 9 | self.assertEqual(_destring("99999"), 99999) 10 | self.assertEqual(_destring("999.99"), 999.99) 11 | self.assertEqual(_destring("9.9999"), 9.9999) 12 | self.assertEqual(_destring("1/2"), "1/2") 13 | self.assertEqual(_destring("NULL"), "NULL") 14 | 15 | def test_single_row(self): 16 | o = XmlResult().parse( 17 | """ 18 | 19 | 20 | 21 | 1 22 | 1 23 | 2 24 | 3 25 | 4 26 | 2021-02-03 17:48:59 27 | 0 28 | 29 | 30 | bogus 31 | 32 | 33 | 34 | """ 35 | ) 36 | self.assertEqual(len(o), 1) 37 | d = o[0] 38 | self.assertEqual(d["id"], 1) 39 | self.assertEqual(d["identifierType"], 1) 40 | self.assertEqual(d["identifierValue"], 2) 41 | self.assertEqual(d["registrationID"], 3) 42 | self.assertEqual(d["status"], 4) 43 | self.assertEqual(d["expires"], "2021-02-03 17:48:59") 44 | self.assertEqual(d["challenges"], 0) 45 | self.assertEqual(d["attempted"], None) 46 | self.assertEqual(d["attemptedAt"], None) 47 | self.assertEqual(d["token"], "bogus ") 48 | self.assertEqual(d["validationError"], None) 49 | self.assertEqual(d["validationRecord"], None) 50 | 51 | def test_four_rows(self): 52 | o = XmlResult().parse( 53 | """ 54 | 55 | 56 | 57 | 1 58 | 1 59 | wtf.bogus.3c18ed9212e0 60 | 61 | 62 | 63 | 2 64 | 1 65 | wtf.bogus.8915c54c38d8 66 | 67 | 68 | 69 | 3 70 | 1 71 | wtf.bogus.86c81cfd8489 72 | 73 | 74 | 75 | 4 76 | 1 77 | wtf.bogus.74ce949b17da 78 | 79 | 80 | """ 81 | ) 82 | self.assertEqual(len(o), 4) 83 | for n, x in enumerate(o, start=1): 84 | self.assertEqual(x["id"], n) 85 | self.assertEqual(x["orderID"], 1) 86 | self.assertTrue("wtf.bogus" in x["reversedName"]) 87 | 88 | def test_create_table(self): 89 | o = XmlResult().parse( 90 | """ 91 | 92 | 93 | 94 | treat 95 | CREATE TABLE `treat` ( 96 | `id` bigint(20) NOT NULL AUTO_INCREMENT, 97 | PRIMARY KEY (`id`), 98 | ) ENGINE=InnoDB AUTO_INCREMENT=10101 DEFAULT CHARSET=utf8 99 | PARTITION BY RANGE (`id`) 100 | (PARTITION `p_start` VALUES LESS THAN MAXVALUE ENGINE = InnoDB) 101 | 102 | """ 103 | ) 104 | 105 | self.assertEqual(len(o), 1) 106 | for x in o: 107 | self.assertEqual(x["Table"], "treat") 108 | self.assertEqual( 109 | x["Create Table"], 110 | """CREATE TABLE `treat` ( 111 | `id` bigint(20) NOT NULL AUTO_INCREMENT, 112 | PRIMARY KEY (`id`), 113 | ) ENGINE=InnoDB AUTO_INCREMENT=10101 DEFAULT CHARSET=utf8 114 | PARTITION BY RANGE (`id`) 115 | (PARTITION `p_start` VALUES LESS THAN MAXVALUE ENGINE = InnoDB)""", 116 | ) 117 | 118 | def test_truncated_resultset(self): 119 | with self.assertRaises(TruncatedDatabaseResultException): 120 | XmlResult().parse( 121 | """ 122 | 123 | 124 | 125 | 1 126 | """ 127 | ) 128 | 129 | with self.assertRaises(TruncatedDatabaseResultException): 130 | XmlResult().parse( 131 | """ 132 | 133 | 134 | 135 | 1""" 136 | ) 137 | -------------------------------------------------------------------------------- /partitionmanager/stats.py: -------------------------------------------------------------------------------- 1 | """ 2 | Statistics-gathering tooling. 3 | """ 4 | 5 | import logging 6 | 7 | from datetime import timedelta 8 | import partitionmanager.tools 9 | import partitionmanager.types 10 | 11 | 12 | class PrometheusMetric: 13 | """Represents a single named metric for Prometheus""" 14 | 15 | def __init__(self, name, table, data): 16 | self.name = name 17 | self.table = table 18 | self.data = data 19 | 20 | 21 | class PrometheusMetrics: 22 | """A set of metrics that can be rendered for Prometheus.""" 23 | 24 | def __init__(self): 25 | self.metrics = {} 26 | self.help = {} 27 | self.types = {} 28 | 29 | def add(self, name, table, data): 30 | """Record metric data representing the name and table.""" 31 | if name not in self.metrics: 32 | self.metrics[name] = [] 33 | self.metrics[name].append(PrometheusMetric(name, table, data)) 34 | 35 | def describe(self, name, help_text=None, type_name=None): 36 | """Add optional descriptive and type data for a given metric name.""" 37 | self.help[name] = help_text 38 | self.types[name] = type_name 39 | 40 | def render(self, fp): 41 | """Write the collected metrics to the supplied file-like object. 42 | 43 | Follows the format specification: 44 | https://prometheus.io/docs/instrumenting/exposition_formats/ 45 | """ 46 | for n, metrics in self.metrics.items(): 47 | name = f"partition_{n}" 48 | if n in self.help: 49 | print(f"# HELP {name} {self.help[n]}", file=fp) 50 | if n in self.types: 51 | print(f"# TYPE {name} {self.types[n]}", file=fp) 52 | for m in metrics: 53 | labels = [] 54 | if m.table: 55 | labels = [f'table="{m.table}"'] 56 | print(f"{name}{{{','.join(labels)}}} {m.data}", file=fp) 57 | 58 | 59 | def get_statistics(partitions, current_timestamp, table): 60 | """Return a dictionary of statistics about the supplied table's partitions.""" 61 | log = logging.getLogger("get_statistics") 62 | results = {"partitions": len(partitions)} 63 | 64 | if not partitions: 65 | return results 66 | 67 | for p in partitions: 68 | if not partitionmanager.types.is_partition_type(p): 69 | log.warning( 70 | f"{table} get_statistics called with a partition list " 71 | f"that included a non-Partition entry: {p}" 72 | ) 73 | raise partitionmanager.types.UnexpectedPartitionException(p) 74 | 75 | head_part = None 76 | tail_part = partitions[-1] 77 | 78 | if not isinstance(tail_part, partitionmanager.types.MaxValuePartition): 79 | log.warning( 80 | f"{table} get_statistics called with a partition list tail " 81 | f"that wasn't a MaxValuePartition: {tail_part}" 82 | ) 83 | raise partitionmanager.types.UnexpectedPartitionException(tail_part) 84 | 85 | if tail_part.has_real_time and tail_part.timestamp(): 86 | results["time_since_newest_partition"] = ( 87 | current_timestamp - tail_part.timestamp() 88 | ) 89 | 90 | # Find the earliest partition that is timestamped 91 | for p in partitions: 92 | if p.timestamp(): 93 | head_part = p 94 | break 95 | 96 | if not head_part or head_part == tail_part: 97 | # For simple tables, we're done now. 98 | return results 99 | 100 | if head_part.timestamp(): 101 | results["time_since_oldest_partition"] = ( 102 | current_timestamp - head_part.timestamp() 103 | ) 104 | 105 | if head_part.timestamp() and tail_part.timestamp(): 106 | results["mean_partition_delta"] = ( 107 | tail_part.timestamp() - head_part.timestamp() 108 | ) / (len(partitions) - 1) 109 | 110 | max_d = timedelta() 111 | for a, b in partitionmanager.tools.pairwise(partitions): 112 | if not a.timestamp() or not b.timestamp(): 113 | log.debug(f"{table} had partitions that aren't comparable: {a} and {b}") 114 | continue 115 | d = b.timestamp() - a.timestamp() 116 | if d > max_d: 117 | max_d = d 118 | 119 | if max_d > timedelta(): 120 | results["max_partition_delta"] = max_d 121 | return results 122 | -------------------------------------------------------------------------------- /partitionmanager/stats_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from datetime import datetime, timedelta, timezone 3 | from io import StringIO 4 | from .stats import get_statistics, PrometheusMetrics 5 | from .types import Table, MaxValuePartition 6 | from .types_test import mkPPart 7 | 8 | 9 | ts = datetime(1949, 1, 12, tzinfo=timezone.utc) 10 | 11 | 12 | class TestStatistics(unittest.TestCase): 13 | def test_statistics_no_partitions(self): 14 | s = get_statistics([], ts, Table("no_parts")) 15 | self.assertEqual(s, {"partitions": 0}) 16 | 17 | def test_statistics_single_unnamed_partition(self): 18 | s = get_statistics([MaxValuePartition("p_start", 1)], ts, Table("single_part")) 19 | self.assertEqual(s, {"partitions": 1}) 20 | 21 | def test_statistics_single_partition(self): 22 | s = get_statistics( 23 | [MaxValuePartition("p_19480113", 1)], ts, Table("single_part") 24 | ) 25 | self.assertEqual( 26 | s, {"partitions": 1, "time_since_newest_partition": timedelta(days=365)} 27 | ) 28 | 29 | def test_statistics_two_partitions(self): 30 | s = get_statistics( 31 | [mkPPart("p_19480101", 42), MaxValuePartition("p_19490101", 1)], 32 | ts, 33 | Table("two_parts"), 34 | ) 35 | self.assertEqual( 36 | s, 37 | { 38 | "partitions": 2, 39 | "time_since_newest_partition": timedelta(days=11), 40 | "time_since_oldest_partition": timedelta(days=377), 41 | "mean_partition_delta": timedelta(days=366), 42 | "max_partition_delta": timedelta(days=366), 43 | }, 44 | ) 45 | 46 | def test_statistics_weekly_partitions_year(self): 47 | parts = [] 48 | base = datetime(2020, 5, 20, tzinfo=timezone.utc) 49 | for w in range(52): 50 | partName = f"p_{base + timedelta(weeks=w):%Y%m%d}" 51 | parts.append(mkPPart(partName, w * 1024)) 52 | parts.append(MaxValuePartition(f"p_{base + timedelta(weeks=52):%Y%m%d}", 1)) 53 | 54 | s = get_statistics( 55 | parts, base + timedelta(weeks=54), Table("weekly_partitions_year_retention") 56 | ) 57 | self.assertEqual( 58 | s, 59 | { 60 | "partitions": 53, 61 | "time_since_newest_partition": timedelta(days=14), 62 | "time_since_oldest_partition": timedelta(days=378), 63 | "mean_partition_delta": timedelta(days=7), 64 | "max_partition_delta": timedelta(days=7), 65 | }, 66 | ) 67 | 68 | 69 | class TestPrometheusMetric(unittest.TestCase): 70 | def test_rendering(self): 71 | exp = PrometheusMetrics() 72 | exp.add("name", "table_name", 42) 73 | 74 | f = StringIO() 75 | exp.render(f) 76 | self.assertEqual('partition_name{table="table_name"} 42\n', f.getvalue()) 77 | 78 | def test_rendering_grouping(self): 79 | exp = PrometheusMetrics() 80 | exp.add("name", "table_name", 42) 81 | exp.add("second_metric", "table_name", 42) 82 | exp.add("name", "other_table", 42) 83 | 84 | f = StringIO() 85 | exp.render(f) 86 | self.assertEqual( 87 | """partition_name{table="table_name"} 42 88 | partition_name{table="other_table"} 42 89 | partition_second_metric{table="table_name"} 42 90 | """, 91 | f.getvalue(), 92 | ) 93 | 94 | def test_descriptions(self): 95 | exp = PrometheusMetrics() 96 | exp.add("name", "table_name", 42) 97 | exp.add("second_metric", "table_name", 42) 98 | exp.add("name", "other_table", 42) 99 | 100 | exp.describe( 101 | "second_metric", help_text="help for second_metric", type_name="type" 102 | ) 103 | exp.describe("name", help_text="help for name", type_name="type") 104 | 105 | f = StringIO() 106 | exp.render(f) 107 | self.assertEqual( 108 | """# HELP partition_name help for name 109 | # TYPE partition_name type 110 | partition_name{table="table_name"} 42 111 | partition_name{table="other_table"} 42 112 | # HELP partition_second_metric help for second_metric 113 | # TYPE partition_second_metric type 114 | partition_second_metric{table="table_name"} 42 115 | """, 116 | f.getvalue(), 117 | ) 118 | -------------------------------------------------------------------------------- /partitionmanager/table_append_partition.py: -------------------------------------------------------------------------------- 1 | """ 2 | Design and perform partition management. 3 | """ 4 | 5 | from datetime import timedelta 6 | import logging 7 | import operator 8 | import re 9 | 10 | import partitionmanager.types 11 | import partitionmanager.tools 12 | 13 | 14 | def get_table_compatibility_problems(database, table): 15 | """Return a list of strings of problems altering this table, or empty.""" 16 | db_name = database.db_name() 17 | 18 | if ( 19 | not isinstance(db_name, partitionmanager.types.SqlInput) 20 | or not isinstance(table, partitionmanager.types.Table) 21 | or not isinstance(table.name, partitionmanager.types.SqlInput) 22 | ): 23 | return [f"Unexpected table type: {table}"] 24 | 25 | sql_cmd = ( 26 | "SELECT CREATE_OPTIONS FROM INFORMATION_SCHEMA.TABLES " 27 | f"WHERE TABLE_SCHEMA='{db_name}' and TABLE_NAME='{table.name}';" 28 | ).strip() 29 | return _get_table_information_schema_problems(database.run(sql_cmd), table.name) 30 | 31 | 32 | def _get_table_information_schema_problems(rows, table_name): 33 | """Return a string representing problems partitioning this table, or None.""" 34 | if len(rows) != 1: 35 | return [f"Unable to read information for {table_name}"] 36 | 37 | options = rows[0] 38 | if "partitioned" not in options["CREATE_OPTIONS"]: 39 | return [f"Table {table_name} is not partitioned"] 40 | return [] 41 | 42 | 43 | def get_current_positions(database, table, columns): 44 | """Get positions of the columns in the table. 45 | 46 | Return as a dictionary of {column_name: position} 47 | """ 48 | if not isinstance(columns, list) or not isinstance( 49 | table, partitionmanager.types.Table 50 | ): 51 | raise ValueError("columns must be a list and table must be a Table") 52 | 53 | positions = {} 54 | for column in columns: 55 | if not isinstance(column, str): 56 | raise ValueError("columns must be a list of strings") 57 | sql = f"SELECT {column} FROM `{table.name}` ORDER BY {column} DESC LIMIT 1;" 58 | rows = database.run(sql) 59 | if len(rows) > 1: 60 | raise partitionmanager.types.TableInformationException( 61 | f"Expected one result from {table.name}" 62 | ) 63 | if not rows: 64 | raise partitionmanager.types.TableEmptyException( 65 | f"Table {table.name} appears to be empty. (No results)" 66 | ) 67 | positions[column] = rows[0][column] 68 | return positions 69 | 70 | 71 | def get_partition_map(database, table): 72 | """Gather the partition map via the database command tool.""" 73 | if not isinstance(table, partitionmanager.types.Table) or not isinstance( 74 | table.name, partitionmanager.types.SqlInput 75 | ): 76 | raise ValueError("Unexpected type") 77 | sql_cmd = f"SHOW CREATE TABLE `{table.name}`;" 78 | return _parse_partition_map(database.run(sql_cmd)) 79 | 80 | 81 | def _parse_partition_map(rows): 82 | """Return a dictionary of range_cols and partition objects. 83 | 84 | The "range_cols" is the ordered list of what columns are used as the 85 | range identifiers for the partitions. 86 | 87 | The "partitions" is a list of the Partition objects representing each defined 88 | partition. There will be at least one partitionmanager.types.MaxValuePartition. 89 | """ 90 | log = logging.getLogger("parse_partition_map") 91 | 92 | partition_range = re.compile( 93 | r"[ ]*PARTITION BY RANGE\s+(COLUMNS)?\((?P[\w,` ]+)\)" 94 | ) 95 | partition_member = re.compile( 96 | r"[ (]*PARTITION\s+`(?P\w+)` VALUES LESS THAN \((?P[\d, ]+)\)" 97 | ) 98 | partition_tail = re.compile( 99 | r"[ (]*PARTITION\s+`(?P\w+)` VALUES LESS THAN \(?(MAXVALUE[, ]*)+\)?" 100 | ) 101 | 102 | range_cols = None 103 | partitions = [] 104 | 105 | if len(rows) != 1: 106 | raise partitionmanager.types.TableInformationException("Expected one result") 107 | 108 | options = rows[0] 109 | 110 | for line in options["Create Table"].split("\n"): 111 | range_match = partition_range.match(line) 112 | if range_match: 113 | range_cols = [x.strip("` ") for x in range_match.group("cols").split(",")] 114 | log.debug(f"Partition range columns: {range_cols}") 115 | 116 | member_match = partition_member.match(line) 117 | if member_match: 118 | part_name = member_match.group("name") 119 | part_vals_str = member_match.group("cols") 120 | log.debug(f"Found partition {part_name} = {part_vals_str}") 121 | 122 | part_vals = [int(x.strip("` ")) for x in part_vals_str.split(",")] 123 | 124 | if range_cols is None: 125 | raise partitionmanager.types.TableInformationException( 126 | "Processing partitions, but the partition definition wasn't found." 127 | ) 128 | 129 | if len(part_vals) != len(range_cols): 130 | log.error( 131 | f"Partition columns {part_vals} don't match the partition range " 132 | f"{range_cols}" 133 | ) 134 | raise partitionmanager.types.MismatchedIdException( 135 | "Partition columns mismatch" 136 | ) 137 | 138 | pos_part = partitionmanager.types.PositionPartition(part_name).set_position( 139 | part_vals 140 | ) 141 | partitions.append(pos_part) 142 | 143 | member_tail = partition_tail.match(line) 144 | if member_tail: 145 | if range_cols is None: 146 | raise partitionmanager.types.TableInformationException( 147 | "Processing tail, but the partition definition wasn't found." 148 | ) 149 | part_name = member_tail.group("name") 150 | log.debug(f"Found tail partition named {part_name}") 151 | partitions.append( 152 | partitionmanager.types.MaxValuePartition(part_name, len(range_cols)) 153 | ) 154 | 155 | if not partitions or not isinstance( 156 | partitions[-1], partitionmanager.types.MaxValuePartition 157 | ): 158 | raise partitionmanager.types.UnexpectedPartitionException( 159 | "There was no tail partition" 160 | ) 161 | return {"range_cols": range_cols, "partitions": partitions} 162 | 163 | 164 | def get_columns(database, table): 165 | """Gather the columns list via the database command tool.""" 166 | if not isinstance(table, partitionmanager.types.Table) or not isinstance( 167 | table.name, partitionmanager.types.SqlInput 168 | ): 169 | raise ValueError("Unexpected type") 170 | sql_cmd = f"DESCRIBE `{table.name}`;" 171 | return _parse_columns(table, database.run(sql_cmd)) 172 | 173 | 174 | def _parse_columns(table, rows): 175 | """Read the columns description and return a list of the columns, where 176 | each entry is a dict containing Field and Type.""" 177 | log = logging.getLogger("parse_columns") 178 | if not rows: 179 | raise partitionmanager.types.TableInformationException("No column information") 180 | 181 | for r in rows: 182 | if "Field" not in r or "Type" not in r: 183 | raise partitionmanager.types.TableInformationException( 184 | "Described table does not include sufficient column details" 185 | ) 186 | log.debug(f"{table.name} column {r['Field']} has type {r['Type']}") 187 | return rows 188 | 189 | 190 | def _split_partitions_around_position(partition_list, current_position): 191 | """Divide up a partition list to three parts: filled, current, and empty. 192 | 193 | The first part is the filled partition list: those partitions for which 194 | _all_ values are less than current_position. 195 | 196 | The second is the a single partition whose values contain current_position. 197 | 198 | The third part is a list of all the other, empty partitions yet-to-be-filled. 199 | """ 200 | for p in partition_list: 201 | if not partitionmanager.types.is_partition_type(p): 202 | raise partitionmanager.types.UnexpectedPartitionException(p) 203 | if not isinstance(current_position, partitionmanager.types.Position): 204 | raise ValueError 205 | 206 | less_than_partitions = [] 207 | greater_or_equal_partitions = [] 208 | 209 | for p in partition_list: 210 | if p < current_position: 211 | less_than_partitions.append(p) 212 | else: 213 | greater_or_equal_partitions.append(p) 214 | 215 | # The active partition is always the first in the list of greater_or_equal 216 | active_partition = greater_or_equal_partitions.pop(0) 217 | return less_than_partitions, active_partition, greater_or_equal_partitions 218 | 219 | 220 | def _get_position_increase_per_day(p1, p2): 221 | """Return the rate of change between two position-lists, in positions/day. 222 | 223 | Returns a list containing the change in positions between p1 and p2 divided 224 | by the number of days between them, as "position increase per day", or raise 225 | ValueError if p1 is not before p2, or if either p1 or p2 does not have a 226 | position. For partitions with only a single position, this will be a list of 227 | size 1. 228 | """ 229 | log = logging.getLogger("get_position_increase_per_day") 230 | 231 | if not isinstance(p1, partitionmanager.types.PositionPartition) or not isinstance( 232 | p2, partitionmanager.types.PositionPartition 233 | ): 234 | raise ValueError( 235 | "Both partitions must be partitionmanager.types.PositionPartition type" 236 | ) 237 | if p1.num_columns != p2.num_columns: 238 | raise ValueError(f"p1 {p1} and p2 {p2} must have the same number of columns") 239 | 240 | if None in (p1.timestamp(), p2.timestamp()): 241 | # An empty list skips this pair in get_weighted_position_increase 242 | return [] 243 | if p1.timestamp() >= p2.timestamp(): 244 | log.warning( 245 | f"Skipping rate of change between p1 {p1} and p2 {p2} as they are " 246 | "out-of-order" 247 | ) 248 | return [] 249 | 250 | delta_time = p2.timestamp() - p1.timestamp() 251 | delta_days = delta_time / timedelta(days=1) 252 | delta_positions = list( 253 | map(operator.sub, p2.position.as_list(), p1.position.as_list()) 254 | ) 255 | return [pos / delta_days for pos in delta_positions] 256 | 257 | 258 | def _generate_weights(count): 259 | """Static list of geometrically-decreasing weights. 260 | 261 | Starts from 10,000 to give a high ceiling. It could be dynamic, but eh. 262 | """ 263 | return [10_000 / x for x in range(count, 0, -1)] 264 | 265 | 266 | def _get_weighted_position_increase_per_day_for_partitions(partitions): 267 | """Get weighted partition-position-increase-per-day as a position-list. 268 | 269 | For the provided list of partitions, uses the _get_position_increase_per_day 270 | method to generate a list position increment rates in positions/day, then 271 | uses a geometric weight to make more recent rates influence the outcome 272 | more, and returns a final list of weighted partition-position-increase-per- 273 | day, with one entry per column. 274 | """ 275 | log = logging.getLogger("get_weighted_position_increase_per_day_for_partitions") 276 | 277 | if not partitions: 278 | raise ValueError("Partition list must not be empty") 279 | 280 | pos_rates = [ 281 | _get_position_increase_per_day(p1, p2) 282 | for p1, p2 in partitionmanager.tools.pairwise(partitions) 283 | ] 284 | weights = _generate_weights(len(pos_rates)) 285 | 286 | if not pos_rates or not weights: 287 | log.error( 288 | "No rates of change were valid for the partition list: %s", partitions 289 | ) 290 | raise partitionmanager.types.NoValidRatesOfChangeException 291 | 292 | # Initialize a list with a zero for each position 293 | weighted_sums = [0] * partitions[0].num_columns 294 | 295 | for p_r, weight in zip(pos_rates, weights): 296 | for idx, val in enumerate(p_r): 297 | weighted_sums[idx] += val * weight 298 | return [x / sum(weights) for x in weighted_sums] 299 | 300 | 301 | def _predict_forward_position(current_positions, rate_of_change, duration): 302 | """Return a predicted future position as a position-list. 303 | 304 | This moves current_positions forward a given duration at the provided rates 305 | of change. The rate and the duration must be compatible units, and both the 306 | positions and the rate must be lists of the same size. 307 | """ 308 | if len(current_positions) != len(rate_of_change): 309 | raise ValueError("Expected identical list sizes") 310 | 311 | for neg_rate in filter(lambda r: r < 0, rate_of_change): 312 | raise ValueError( 313 | f"Can't predict forward with a negative rate of change: {neg_rate}" 314 | ) 315 | 316 | increase = [x * (duration / timedelta(days=1)) for x in rate_of_change] 317 | predicted_positions = [int(p + i) for p, i in zip(current_positions, increase)] 318 | for old, new in zip(current_positions, predicted_positions): 319 | assert new >= old, f"Always predict forward, {new} < {old}" 320 | return predicted_positions 321 | 322 | 323 | def _predict_forward_time(current_position, end_position, rates, evaluation_time): 324 | """Return a predicted datetime of when we'll exceed the end position-list. 325 | 326 | Given the current_position position-list and the rates, this calculates 327 | a timestamp of when the positions will be beyond ALL of the end_positions 328 | position-list, as that is MariaDB's definition of when to start filling a 329 | partition. 330 | """ 331 | if not isinstance( 332 | current_position, partitionmanager.types.Position 333 | ) or not isinstance(end_position, partitionmanager.types.Position): 334 | raise ValueError("Expected to be given Position types") 335 | 336 | if not len(current_position) == len(end_position) == len(rates): 337 | raise ValueError("Expected identical list sizes") 338 | 339 | for neg_rate in filter(lambda r: r <= 0, rates): 340 | raise ValueError( 341 | f"Can't predict forward with a non-positive rate of change: " 342 | f"{neg_rate} / {rates}" 343 | ) 344 | 345 | days_remaining = [ 346 | (end - now) / rate 347 | for now, end, rate in zip( 348 | current_position.as_list(), end_position.as_list(), rates 349 | ) 350 | ] 351 | 352 | if max(days_remaining) < 0: 353 | raise ValueError(f"All values are negative: {days_remaining}") 354 | calculated = evaluation_time + (max(days_remaining) * timedelta(days=1)) 355 | return calculated.replace(minute=0, second=0, microsecond=0) 356 | 357 | 358 | def _calculate_start_time(last_changed_time, evaluation_time, allowed_lifespan): 359 | """Return a start time to be used in the partition planning. 360 | 361 | This is a helper method that doesn't always return strictly 362 | last_changed_time + allowed_lifespan, it prohibits times in the past, 363 | returning evaluation_time instead, to ensure that we don't try to set 364 | newly constructed partitions in the past. 365 | """ 366 | partition_start_time = last_changed_time + allowed_lifespan 367 | if partition_start_time < evaluation_time: 368 | # Partition start times should never be in the past. 369 | return evaluation_time 370 | return partition_start_time.replace(minute=0, second=0, microsecond=0) 371 | 372 | 373 | def _get_rate_partitions_with_implicit_timestamps( 374 | table, filled_partitions, current_position, evaluation_time, active_partition 375 | ): 376 | """Return a list of PositionPartitions for use in rate calculations. 377 | 378 | The partitions are set with implicit timestamps. 379 | """ 380 | log = logging.getLogger( 381 | f"_get_rate_partitions_with_implicit_timestamps:{table.name}" 382 | ) 383 | 384 | rate_relevant_partitions = None 385 | 386 | if active_partition.timestamp() < evaluation_time: 387 | # This bit of weirdness is a fencepost issue: The partition list is strictly 388 | # increasing until we get to "now" and the active partition. "Now" actually 389 | # takes place _after_ active partition's start date (naturally), but 390 | # contains a position that is before the top of active, by definition. For 391 | # the rate processing to work, we need to swap the "now" and the active 392 | # partition's dates and positions. 393 | rate_relevant_partitions = filled_partitions + [ 394 | partitionmanager.types.InstantPartition( 395 | "p_current_pos", active_partition.timestamp(), current_position 396 | ), 397 | partitionmanager.types.InstantPartition( 398 | "p_prev_pos", evaluation_time, active_partition.position 399 | ), 400 | ] 401 | else: 402 | # If the active partition's start date is later than today, then we 403 | # previously mispredicted the rate of change. There's nothing we can 404 | # do about that at this point, except limit our rate-of-change calculation 405 | # to exclude the future-dated, irrelevant partition. 406 | log.debug( 407 | f"Misprediction: Evaluation time ({evaluation_time}) is " 408 | f"before the active partition {active_partition}. Excluding " 409 | "mispredicted partitions from the rate calculations." 410 | ) 411 | filled_partitions = filter( 412 | lambda f: f.timestamp() < evaluation_time, filled_partitions 413 | ) 414 | rate_relevant_partitions = list(filled_partitions) + [ 415 | partitionmanager.types.InstantPartition( 416 | "p_current_pos", evaluation_time, current_position 417 | ) 418 | ] 419 | 420 | return rate_relevant_partitions 421 | 422 | 423 | def _get_rate_partitions_with_queried_timestamps( 424 | database, table, partition_list, current_position, evaluation_time, active_partition 425 | ): 426 | """Return a list of PositionPartitions for use in rate calculations. 427 | 428 | The partitions' timestamps are explicitly queried. 429 | """ 430 | 431 | if not table.has_date_query: 432 | raise ValueError("Table has no defined date query") 433 | 434 | instant_partitions = [] 435 | 436 | for partition in partition_list: 437 | exact_time = ( 438 | partitionmanager.database_helpers.calculate_exact_timestamp_via_query( 439 | database, table, partition 440 | ) 441 | ) 442 | 443 | instant_partitions.append( 444 | partitionmanager.types.InstantPartition( 445 | partition.name, exact_time, partition.position 446 | ) 447 | ) 448 | 449 | instant_partitions.append( 450 | partitionmanager.types.InstantPartition( 451 | active_partition.name, evaluation_time, current_position 452 | ) 453 | ) 454 | 455 | return instant_partitions 456 | 457 | 458 | def _plan_partition_changes( 459 | database, 460 | table, 461 | partition_list, 462 | current_position, 463 | evaluation_time, 464 | allowed_lifespan, 465 | num_empty_partitions, 466 | ): 467 | """Return a list of partitions to modify or create. 468 | 469 | This method makes recommendations in order to meet the supplied table 470 | requirements, using an estimate as to the rate of fill from the supplied 471 | partition_list, current_position, and evaluation_time. 472 | """ 473 | log = logging.getLogger(f"plan_partition_changes:{table.name}") 474 | 475 | ( 476 | filled_partitions, 477 | active_partition, 478 | empty_partitions, 479 | ) = _split_partitions_around_position(partition_list, current_position) 480 | if not empty_partitions: 481 | log.error( 482 | f"Partition {active_partition.name} requires manual ALTER " 483 | "as without an empty partition to manipulate, you'll need to " 484 | "perform an expensive copy operation. See the bootstrap mode." 485 | ) 486 | raise partitionmanager.types.NoEmptyPartitionsAvailableException 487 | if not active_partition: 488 | raise Exception("Active Partition can't be None") 489 | 490 | if table.has_date_query: 491 | rate_relevant_partitions = _get_rate_partitions_with_queried_timestamps( 492 | database, 493 | table, 494 | filled_partitions, 495 | current_position, 496 | evaluation_time, 497 | active_partition, 498 | ) 499 | else: 500 | rate_relevant_partitions = _get_rate_partitions_with_implicit_timestamps( 501 | table, 502 | filled_partitions, 503 | current_position, 504 | evaluation_time, 505 | active_partition, 506 | ) 507 | 508 | rates = _get_weighted_position_increase_per_day_for_partitions( 509 | rate_relevant_partitions 510 | ) 511 | 512 | log.info( 513 | f"Rates of change calculated as {rates} per day from " 514 | f"{len(rate_relevant_partitions)} partitions" 515 | ) 516 | 517 | # We need to include active_partition in the list for the subsequent 518 | # calculations even though we're not actually changing it. 519 | results = [partitionmanager.types.ChangePlannedPartition(active_partition)] 520 | 521 | # Adjust each of the empty partitions 522 | for partition in empty_partitions: 523 | last_changed = results[-1] 524 | 525 | changed_partition = partitionmanager.types.ChangePlannedPartition(partition) 526 | 527 | start_of_fill_time = _predict_forward_time( 528 | current_position, last_changed.position, rates, evaluation_time 529 | ) 530 | 531 | if isinstance(partition, partitionmanager.types.PositionPartition): 532 | # We can't change the position on this partition, but we can adjust 533 | # the name to be more exact as to what date we expect it to begin 534 | # filling. If we calculate the start-of-fill date and it doesn't 535 | # match the partition's name, let's rename it and mark it as an 536 | # important change. 537 | if start_of_fill_time.date() != partition.timestamp().date(): 538 | log.info( 539 | f"Start-of-fill predicted at {start_of_fill_time.date()} " 540 | f"which is not {partition.timestamp().date()}. This change " 541 | f"will be marked as important to ensure that {partition} is " 542 | f"moved to {start_of_fill_time:%Y-%m-%d}" 543 | ) 544 | changed_partition.set_timestamp(start_of_fill_time).set_important() 545 | 546 | if isinstance(partition, partitionmanager.types.MaxValuePartition): 547 | # Only the tail MaxValuePartitions can get new positions. For those, 548 | # we calculate forward what position we expect and use it in the 549 | # future. 550 | 551 | nominal_partition_start_time = _calculate_start_time( 552 | last_changed.timestamp(), evaluation_time, allowed_lifespan 553 | ) 554 | 555 | # We use the nearest timestamp, which should generally be the 556 | # calculated time, but could be the fill time based on predicting 557 | # forward if we have gotten far off in our predictions in the past. 558 | changed_partition.set_timestamp( 559 | min(nominal_partition_start_time, start_of_fill_time) 560 | ) 561 | 562 | changed_part_pos = _predict_forward_position( 563 | last_changed.position.as_list(), rates, allowed_lifespan 564 | ) 565 | changed_partition.set_position(changed_part_pos) 566 | 567 | results.append(changed_partition) 568 | 569 | # Ensure we have the required number of empty partitions 570 | while len(results) < num_empty_partitions + 1: 571 | last_changed = results[-1] 572 | partition_start_time = _calculate_start_time( 573 | last_changed.timestamp(), evaluation_time, allowed_lifespan 574 | ) 575 | 576 | new_part_pos = _predict_forward_position( 577 | last_changed.position.as_list(), rates, allowed_lifespan 578 | ) 579 | results.append( 580 | partitionmanager.types.NewPlannedPartition() 581 | .set_position(new_part_pos) 582 | .set_timestamp(partition_start_time) 583 | ) 584 | 585 | # Confirm we won't make timestamp conflicts 586 | conflict_found = True 587 | while conflict_found: 588 | conflict_found = False 589 | 590 | existing_timestamps = {p.timestamp() for p in partition_list} 591 | 592 | for partition in results: 593 | if partition.timestamp() in existing_timestamps: 594 | if ( 595 | isinstance(partition, partitionmanager.types.ChangePlannedPartition) 596 | and partition.timestamp() == partition.old.timestamp() 597 | ): 598 | # That's not a conflict 599 | continue 600 | 601 | log.debug( 602 | f"{partition} has a conflict for its timestamp, increasing by 1 day" 603 | ) 604 | partition.set_timestamp(partition.timestamp() + timedelta(days=1)) 605 | conflict_found = True 606 | break 607 | 608 | existing_timestamps.add(partition.timestamp()) 609 | 610 | # Final result is always MAXVALUE 611 | results[-1].set_as_max_value() 612 | 613 | log.debug(f"Planned {results}") 614 | return results 615 | 616 | 617 | def _should_run_changes(table, altered_partitions): 618 | """Returns True if the changeset should run, otherwise returns False. 619 | 620 | Evaluate the list from plan_partition_changes and determine if the set of 621 | changes should be performed - if all the changes are minor, they shouldn't 622 | be run. 623 | """ 624 | log = logging.getLogger(f"should_run_changes:{table.name}") 625 | 626 | for p in altered_partitions: 627 | if isinstance(p, partitionmanager.types.NewPlannedPartition): 628 | log.debug(f"{p} is new") 629 | return True 630 | 631 | if ( 632 | isinstance(p, partitionmanager.types.ChangePlannedPartition) 633 | and p.important() 634 | ): 635 | log.debug(f"{p} is marked important") 636 | return True 637 | return False 638 | 639 | 640 | def generate_sql_reorganize_partition_commands(table, changes): 641 | """Generates SQL commands to reorganize table to apply the changes. 642 | 643 | Args: 644 | 645 | table: a types.Table object 646 | 647 | changes: a list of objects implementing types.PlannedPartition 648 | """ 649 | log = logging.getLogger(f"generate_sql_reorganize_partition_commands:{table.name}") 650 | 651 | modified_partitions = [] 652 | new_partitions = [] 653 | 654 | for p in changes: 655 | if isinstance(p, partitionmanager.types.ChangePlannedPartition): 656 | assert not new_partitions, "Modified partitions must precede new partitions" 657 | modified_partitions.append(p) 658 | elif isinstance(p, partitionmanager.types.NewPlannedPartition): 659 | new_partitions.append(p) 660 | else: 661 | raise partitionmanager.types.UnexpectedPartitionException(p) 662 | 663 | # If there's not at least one modification, bail out 664 | if not new_partitions and not list( 665 | filter(lambda x: x.has_modifications, modified_partitions) 666 | ): 667 | log.debug("No partitions have modifications and no new partitions") 668 | return 669 | 670 | partition_names_set = set() 671 | 672 | for modified_partition, is_final in reversed( 673 | list(partitionmanager.tools.iter_show_end(modified_partitions)) 674 | ): 675 | # We reverse the iterator so that we always alter the furthest-out partitions 676 | # first, so that we are always increasing the number of empty partitions 677 | # before (potentially) moving the end position near the active one 678 | new_part_list = [modified_partition.as_partition()] 679 | if is_final: 680 | new_part_list.extend([p.as_partition() for p in new_partitions]) 681 | 682 | # If there's not at least one modification, skip 683 | if not is_final and not modified_partition.has_modifications: 684 | log.debug(f"{modified_partition} does not have modifications, skip") 685 | continue 686 | 687 | partition_strings = [] 688 | for part in new_part_list: 689 | if part.name in partition_names_set: 690 | raise partitionmanager.types.DuplicatePartitionException( 691 | f"Duplicate {part}" 692 | ) 693 | partition_names_set.add(part.name) 694 | 695 | partition_strings.append( 696 | f"PARTITION `{part.name}` VALUES LESS THAN {part.values()}" 697 | ) 698 | partition_update = ", ".join(partition_strings) 699 | 700 | alter_cmd = ( 701 | f"ALTER TABLE `{table.name}` WAIT 6 REORGANIZE " 702 | f"PARTITION `{modified_partition.old.name}` INTO ({partition_update});" 703 | ) 704 | 705 | log.debug(f"Yielding {alter_cmd}") 706 | 707 | yield alter_cmd 708 | 709 | 710 | def get_pending_sql_reorganize_partition_commands( 711 | *, 712 | database, 713 | table, 714 | partition_list, 715 | current_position, 716 | allowed_lifespan, 717 | num_empty_partitions, 718 | evaluation_time, 719 | ): 720 | """Return a list of SQL commands to produce an optimally-partitioned table. 721 | 722 | This algorithm is described in the README.md file as the Maintain Algorithm. 723 | 724 | Args: 725 | 726 | table: The table name and properties 727 | 728 | partition_list: the currently-existing partition objects, each with 729 | a name and either a starting position or are the tail MAXVALUE. 730 | 731 | current_position: a Position representing the position IDs for 732 | this table at the evaluation_time. 733 | 734 | allowed_lifespan: a timedelta that represents how long a span of time 735 | a partition should seek to cover. 736 | 737 | num_empty_partitions: the number of empty partitions to seek to keep at the 738 | tail, each aiming to span allowed_lifespan. 739 | 740 | evaluation_time: a datetime instance that represents the time the 741 | algorithm is running. 742 | """ 743 | 744 | log = logging.getLogger( 745 | f"get_pending_sql_reorganize_partition_commands:{table.name}" 746 | ) 747 | 748 | partition_changes = _plan_partition_changes( 749 | database, 750 | table, 751 | partition_list, 752 | current_position, 753 | evaluation_time, 754 | allowed_lifespan, 755 | num_empty_partitions, 756 | ) 757 | 758 | if not _should_run_changes(table, partition_changes): 759 | log.info(f"{table} does not need to be modified currently.") 760 | return [] 761 | 762 | log.debug(f"{table} has changes waiting.") 763 | return generate_sql_reorganize_partition_commands(table, partition_changes) 764 | -------------------------------------------------------------------------------- /partitionmanager/tools.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tools for working with iterators. Helpers. 3 | """ 4 | 5 | from itertools import tee 6 | 7 | 8 | def pairwise(iterable): 9 | """iterable -> (s0,s1), (s1,s2), (s2, s3), ... (s_n-1, s_n).""" 10 | a, b = tee(iterable) 11 | next(b, None) 12 | return zip(a, b) 13 | 14 | 15 | def iter_show_end(iterable): 16 | """iterable -> (s0, false), (s1, false), ... (s_n, true).""" 17 | it = iter(iterable) 18 | prev = next(it) 19 | for val in it: 20 | yield prev, False 21 | prev = val 22 | yield prev, True 23 | -------------------------------------------------------------------------------- /partitionmanager/tools_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from .tools import pairwise, iter_show_end 4 | 5 | 6 | class TestTools(unittest.TestCase): 7 | def test_pairwise(self): 8 | self.assertEqual(list(pairwise(["a", "b"])), [("a", "b")]) 9 | self.assertEqual(list(pairwise(["a", "b", "c"])), [("a", "b"), ("b", "c")]) 10 | self.assertEqual(list(pairwise(["a"])), []) 11 | 12 | def test_iter_show_end(self): 13 | self.assertEqual(list(iter_show_end(["a"])), [("a", True)]) 14 | self.assertEqual(list(iter_show_end(["a", "b"])), [("a", False), ("b", True)]) 15 | -------------------------------------------------------------------------------- /partitionmanager/types.py: -------------------------------------------------------------------------------- 1 | """ 2 | Classes and types used across the Partition Manager 3 | """ 4 | 5 | import abc 6 | import argparse 7 | import re 8 | from datetime import datetime, timedelta, timezone 9 | from urllib.parse import urlparse 10 | 11 | 12 | def timedelta_from_dict(r): 13 | """ 14 | Process a dictionary, typically from YAML, which describes a table's 15 | retention or partition period. Returns a timedelta or None, and raises an argparse 16 | error if the arguments are not understood. 17 | """ 18 | for k, v in r.items(): 19 | if k == "days": 20 | return timedelta(days=v) 21 | raise argparse.ArgumentTypeError( 22 | f"Unknown retention period definition: {k}={v}" 23 | ) 24 | return None 25 | 26 | 27 | class Table: 28 | """ 29 | Represents enough information about a table to make partitioning decisions. 30 | """ 31 | 32 | def __init__(self, name): 33 | self.name = SqlInput(name) 34 | self.retention_period = None 35 | self.partition_period = None 36 | self.earliest_utc_timestamp_query = None 37 | 38 | def set_retention_period(self, ret): 39 | """ 40 | Sets the retention period as a timedelta for this table 41 | """ 42 | if not isinstance(ret, timedelta): 43 | raise ValueError("Must be a timedelta") 44 | self.retention_period = ret 45 | return self 46 | 47 | def set_partition_period(self, dur): 48 | """ 49 | Sets the partition period as a timedelta for this table 50 | """ 51 | if not isinstance(dur, timedelta): 52 | raise ValueError("Must be a timedelta") 53 | self.partition_period = dur 54 | return self 55 | 56 | def set_earliest_utc_timestamp_query(self, query): 57 | if not isinstance(query, SqlQuery): 58 | raise ValueError("Must be a SqlQuery") 59 | self.earliest_utc_timestamp_query = query 60 | 61 | @property 62 | def has_date_query(self): 63 | return self.earliest_utc_timestamp_query is not None 64 | 65 | def __str__(self): 66 | return f"Table {self.name}" 67 | 68 | 69 | class SqlInput(str): 70 | """ 71 | Class which wraps a string or number only if it is safe to use within a 72 | single SQL statement. 73 | """ 74 | 75 | valid_form = re.compile(r"^[A-Z0-9_-]+$", re.IGNORECASE) 76 | 77 | def __new__(cls, *args): 78 | if len(args) != 1: 79 | raise argparse.ArgumentTypeError(f"{args} is not a single argument") 80 | if not isinstance(args[0], int) and not SqlInput.valid_form.match(args[0]): 81 | raise argparse.ArgumentTypeError(f"{args[0]} is not a valid SQL identifier") 82 | return super().__new__(cls, args[0]) 83 | 84 | def __repr__(self): 85 | return str(self) 86 | 87 | 88 | class SqlQuery(str): 89 | """ 90 | Class which loosely enforces that there's a single SQL SELECT statement to run. 91 | """ 92 | 93 | forbidden_terms = ["UPDATE ", "INSERT ", "DELETE "] 94 | 95 | def __new__(cls, *args): 96 | if len(args) != 1: 97 | raise argparse.ArgumentTypeError(f"{args} is not a single argument") 98 | query_string = args[0].strip() 99 | if not query_string.endswith(";"): 100 | raise argparse.ArgumentTypeError( 101 | f"[{query_string}] does not end with a ';'" 102 | ) 103 | if query_string.count(";") > 1: 104 | raise argparse.ArgumentTypeError( 105 | f"[{query_string}] has more than one statement" 106 | ) 107 | 108 | if "?" not in query_string: 109 | raise argparse.ArgumentTypeError( 110 | f"[{query_string}] has no substitution variable '?'" 111 | ) 112 | if query_string.count("?") > 1: 113 | raise argparse.ArgumentTypeError( 114 | f"[{query_string}] has more than one substitution variable '?'" 115 | ) 116 | 117 | if not query_string.upper().startswith("SELECT "): 118 | raise argparse.ArgumentTypeError( 119 | f"[{query_string}] is not a SELECT statement" 120 | ) 121 | for term in SqlQuery.forbidden_terms: 122 | if term in query_string.upper(): 123 | raise argparse.ArgumentTypeError( 124 | f"[{query_string}] has a forbidden term [{term}]" 125 | ) 126 | 127 | return super().__new__(cls, query_string) 128 | 129 | def __repr__(self): 130 | return str(self) 131 | 132 | def get_statement_with_argument(self, arg): 133 | if not isinstance(arg, SqlInput): 134 | raise argparse.ArgumentTypeError("Must be a SqlInput") 135 | return str(self).replace("?", str(arg)) 136 | 137 | 138 | def to_sql_url(urlstring): 139 | """ 140 | Parse a sql://user:pass@host:port/schema URL and return the tuple. 141 | """ 142 | try: 143 | urltuple = urlparse(urlstring) 144 | if urltuple.scheme.lower() != "sql": 145 | raise argparse.ArgumentTypeError(f"{urlstring} is not a valid sql://") 146 | if urltuple.path in {"/", ""}: 147 | raise argparse.ArgumentTypeError(f"{urlstring} should include a db path") 148 | return urltuple 149 | except ValueError as ve: 150 | raise argparse.ArgumentTypeError(f"{urlstring} not valid: {ve}") 151 | 152 | 153 | class DatabaseCommand(abc.ABC): 154 | """ 155 | Abstract class which can run SQL commands and return the results in a 156 | minimal form. 157 | """ 158 | 159 | @abc.abstractmethod 160 | def run(self, sql_cmd): 161 | """ 162 | Run the sql, returning the results as a list of python-ized types, or 163 | raising an Exception 164 | """ 165 | 166 | @abc.abstractmethod 167 | def db_name(self): 168 | """ 169 | Return the current database name 170 | """ 171 | 172 | 173 | def is_partition_type(obj): 174 | """True if the object inherits from a _Partition.""" 175 | return isinstance(obj, _Partition) 176 | 177 | 178 | class _Partition(abc.ABC): 179 | """Abstract class which represents a existing table partition.""" 180 | 181 | @abc.abstractmethod 182 | def values(self): 183 | """Return a SQL partition value string.""" 184 | 185 | @property 186 | @abc.abstractmethod 187 | def name(self): 188 | """Name representing when the partition began to fill. 189 | 190 | Generally this will be of the form p_yyyymmdd, but sometimes partitions 191 | have names like p_initial, p_start, or any other valid SQL identifier. 192 | """ 193 | 194 | @property 195 | @abc.abstractmethod 196 | def num_columns(self): 197 | """Return the number of columns included in this partition's range.""" 198 | 199 | @property 200 | def has_real_time(self): 201 | """True if the partition has a non-synthetic timestamp. 202 | 203 | This should be used to determine whether timestamp() should be used for 204 | statistical purposes, as timestamp() generates a synthetic timestamp 205 | for rate-of-change calculations in corner-cases. 206 | """ 207 | if "p_start" in self.name or not self.name.startswith("p_"): 208 | return False 209 | return self.timestamp() is not None 210 | 211 | def timestamp(self): 212 | """Returns datetime of this partition's date, or None. 213 | 214 | This returns the date from the partition's name if the partition is of 215 | the form "p_YYYYMMDD". If the name is "p_start", return a synthetic 216 | timestamp (be sure to use self.has_real_time before using for 217 | statistical purposes). Otherwise, returns None. 218 | """ 219 | 220 | if not self.name.startswith("p_"): 221 | return None 222 | 223 | if "p_start" in self.name: 224 | # Gotta start somewhere, for partitions named things like 225 | # "p_start". This has the downside of causing abnormally-low 226 | # rate of change calculations, but they fall off quickly 227 | # for subsequent partitions 228 | return datetime(2021, 1, 1, tzinfo=timezone.utc) 229 | 230 | try: 231 | return datetime.strptime(self.name, "p_%Y%m%d").replace(tzinfo=timezone.utc) 232 | except ValueError: 233 | pass 234 | try: 235 | return datetime.strptime(self.name, "p_%Y%m").replace(tzinfo=timezone.utc) 236 | except ValueError: 237 | pass 238 | try: 239 | return datetime.strptime(self.name, "p_%Y").replace(tzinfo=timezone.utc) 240 | except ValueError: 241 | pass 242 | return None 243 | 244 | def __repr__(self): 245 | return f"{type(self).__name__}<{str(self)}>" 246 | 247 | def __str__(self): 248 | return f"{self.name}: {self.values()}" 249 | 250 | 251 | class Position: 252 | """An internal class that represents a position as an ordered list of 253 | identifiers, matching the table's partition-by statement. 254 | """ 255 | 256 | def __init__(self): 257 | self._position = [] 258 | 259 | def set_position(self, position_in): 260 | """Set the list of identifiers for this position.""" 261 | if isinstance(position_in, Position): 262 | self._position = position_in.as_list() 263 | elif isinstance(position_in, (list, tuple)): 264 | self._position = [int(p) for p in position_in] 265 | else: 266 | raise ValueError(f"Unexpected position input: {position_in}") 267 | return self 268 | 269 | def as_list(self): 270 | """Return a copy of the list of identifiers representing this position""" 271 | return self._position.copy() 272 | 273 | def as_sql_input(self): 274 | """Return the position as an array of SqlInput objects""" 275 | return [SqlInput(p) for p in self._position] 276 | 277 | def __len__(self): 278 | return len(self._position) 279 | 280 | def __eq__(self, other): 281 | if isinstance(other, Position): 282 | return self._position == other.as_list() 283 | return False 284 | 285 | def __str__(self): 286 | return str(self._position) 287 | 288 | def __repr__(self): 289 | return repr(self._position) 290 | 291 | 292 | class PositionPartition(_Partition): 293 | """A partition that has a position associated with it. 294 | 295 | Partitions are independent table segments, and each has a name and a current 296 | position. The positions-list is an ordered list of identifiers, matching 297 | the order of the table's partition-by statement when the table was created. 298 | """ 299 | 300 | def __init__(self, name): 301 | self._name = name 302 | self._position = Position() 303 | 304 | @property 305 | def name(self): 306 | return self._name 307 | 308 | def set_position(self, position_in): 309 | """Set the position for this partition.""" 310 | self._position.set_position(position_in) 311 | return self 312 | 313 | @property 314 | def position(self): 315 | """Return the Position this partition represents""" 316 | return self._position 317 | 318 | @property 319 | def num_columns(self): 320 | return len(self._position) 321 | 322 | def values(self): 323 | return "(" + ", ".join([str(x) for x in self._position.as_list()]) + ")" 324 | 325 | def __lt__(self, other): 326 | if isinstance(other, MaxValuePartition): 327 | if len(self._position) != other.num_columns: 328 | raise UnexpectedPartitionException( 329 | f"Expected {len(self._position)} columns but " 330 | f"partition has {other.num_columns}." 331 | ) 332 | return True 333 | 334 | other_position_list = None 335 | if isinstance(other, list): 336 | other_position_list = other 337 | elif isinstance(other, Position): 338 | other_position_list = other.as_list() 339 | elif isinstance(other, PositionPartition): 340 | other_position_list = other.position.as_list() 341 | 342 | if not other_position_list or len(self._position) != len(other_position_list): 343 | raise UnexpectedPartitionException( 344 | f"Expected {len(self._position)} columns but partition has " 345 | f"{other_position_list}." 346 | ) 347 | 348 | # If ALL of v_mine >= v_other, then self is greater than other 349 | # If ANY of v_mine < v_other, then self is less than other 350 | return any( 351 | v_mine < v_other 352 | for v_mine, v_other in zip(self._position.as_list(), other_position_list) 353 | ) 354 | 355 | def __ge__(self, other): 356 | return not self < other 357 | 358 | def __eq__(self, other): 359 | if isinstance(other, PositionPartition): 360 | return self.name == other.name and self._position == other.position 361 | elif isinstance(other, MaxValuePartition): 362 | return False 363 | 364 | raise ValueError(f"Unexpected equality with {other}") 365 | 366 | 367 | class MaxValuePartition(_Partition): 368 | """A partition that includes all remaining values. 369 | 370 | This kind of partition always resides at the tail of the partition list, 371 | and is defined as containing values up to the reserved keyword MAXVALUE. 372 | """ 373 | 374 | def __init__(self, name, count): 375 | self._name = name 376 | self._count = count 377 | 378 | @property 379 | def name(self): 380 | return self._name 381 | 382 | @property 383 | def num_columns(self): 384 | return self._count 385 | 386 | def values(self): 387 | if self._count == 1: 388 | return "MAXVALUE" 389 | return "(" + ", ".join(["MAXVALUE"] * self._count) + ")" 390 | 391 | def __lt__(self, other): 392 | """MaxValuePartitions are always greater than every other partition.""" 393 | if isinstance(other, (Position, list)): 394 | if self._count != len(other): 395 | raise UnexpectedPartitionException( 396 | f"Expected {self._count} columns but list has {len(other)}." 397 | ) 398 | return False 399 | if is_partition_type(other): 400 | if self._count != other.num_columns: 401 | raise UnexpectedPartitionException( 402 | f"Expected {self._count} columns but list has {other.num_columns}." 403 | ) 404 | return False 405 | return ValueError() 406 | 407 | def __ge__(self, other): 408 | return not self < other 409 | 410 | def __eq__(self, other): 411 | if isinstance(other, MaxValuePartition): 412 | return self.name == other.name and self._count == other.num_columns 413 | elif isinstance(other, PositionPartition): 414 | return False 415 | raise ValueError(f"Unexpected equality with {other}") 416 | 417 | 418 | class InstantPartition(PositionPartition): 419 | """Represent a partition at the current moment. 420 | 421 | Used for rate calculations as a stand-in that only exists for the purposes 422 | of the rate calculation itself. 423 | """ 424 | 425 | def __init__(self, name, now, position_in): 426 | super().__init__(name) 427 | self._instant = now 428 | self._position.set_position(position_in) 429 | 430 | def timestamp(self): 431 | return self._instant 432 | 433 | 434 | class _PlannedPartition(abc.ABC): 435 | """Represents a partition this tool plans to emit. 436 | 437 | The method as_partition will make this a concrete type for later evaluation. 438 | """ 439 | 440 | def __init__(self): 441 | self._num_columns = None 442 | self._position = None 443 | self._timestamp = None 444 | self._important = False 445 | 446 | def set_timestamp(self, timestamp): 447 | """Set the timestamp to be used for the modified partition. 448 | 449 | This effectively changes the partition's name. 450 | """ 451 | self._timestamp = timestamp.replace(hour=0, minute=0) 452 | return self 453 | 454 | def set_position(self, position_in): 455 | """Set the position of this modified partition. 456 | 457 | If this partition changes an existing partition, the positions of both 458 | must have identical length. 459 | """ 460 | pos = Position() 461 | pos.set_position(position_in) 462 | 463 | if self.num_columns is not None and len(pos) != self.num_columns: 464 | raise UnexpectedPartitionException( 465 | f"Expected {self.num_columns} columns but input has {len(pos)}." 466 | ) 467 | 468 | self._position = pos 469 | return self 470 | 471 | def set_important(self): 472 | """Indicate this is an important partition. Used in the 473 | _plan_partition_changes as a marker that there's a significant 474 | change in this partition that should be committed even if the 475 | overall map isn't changing much.""" 476 | self._important = True 477 | return self 478 | 479 | @property 480 | def position(self): 481 | """Get the position for this modified partition.""" 482 | return self._position 483 | 484 | def timestamp(self): 485 | """The timestamp of this partition.""" 486 | return self._timestamp 487 | 488 | def important(self): 489 | """True if this Partition is important enough to ensure commitment.""" 490 | return self._important 491 | 492 | @property 493 | @abc.abstractmethod 494 | def has_modifications(self): 495 | """True if this partition modifies another partition.""" 496 | 497 | @property 498 | def num_columns(self): 499 | """Return the number of columns this partition represents.""" 500 | return self._num_columns 501 | 502 | def set_as_max_value(self): 503 | """Represent this partition by MaxValuePartition from as_partition()""" 504 | self._num_columns = len(self._position) 505 | self._position = None 506 | return self 507 | 508 | def as_partition(self): 509 | """Return a concrete Partition that can be rendered into a SQL ALTER.""" 510 | if not self._timestamp: 511 | raise ValueError 512 | if self._position: 513 | return PositionPartition(f"p_{self._timestamp:%Y%m%d}").set_position( 514 | self._position 515 | ) 516 | return MaxValuePartition(f"p_{self._timestamp:%Y%m%d}", count=self._num_columns) 517 | 518 | def __repr__(self): 519 | return f"{type(self).__name__}<{str(self)}>" 520 | 521 | def __eq__(self, other): 522 | if isinstance(other, _PlannedPartition): 523 | return ( 524 | isinstance(self, type(other)) 525 | and self.position == other.position 526 | and self.timestamp() == other.timestamp() 527 | and self.important() == other.important() 528 | ) 529 | return False 530 | 531 | 532 | class ChangePlannedPartition(_PlannedPartition): 533 | """Represents modifications to a Partition supplied during construction. 534 | 535 | Use the parent class' methods to alter this change. 536 | """ 537 | 538 | def __init__(self, old_part): 539 | if not is_partition_type(old_part): 540 | raise ValueError 541 | super().__init__() 542 | self._old = old_part 543 | self._num_columns = self._old.num_columns 544 | self._timestamp = self._old.timestamp() 545 | self._old_position = ( 546 | self._old.position if isinstance(old_part, PositionPartition) else None 547 | ) 548 | self._position = self._old_position 549 | 550 | @property 551 | def has_modifications(self): 552 | return ( 553 | self._position != self._old_position 554 | or self._old.timestamp() is None 555 | and self._timestamp is not None 556 | or self._timestamp.date() != self._old.timestamp().date() 557 | ) 558 | 559 | @property 560 | def old(self): 561 | """Get the partition to be modified""" 562 | return self._old 563 | 564 | def __str__(self): 565 | imp = "[!!]" if self.important() else "" 566 | return f"{self._old} => {self.position} {imp} {self._timestamp}" 567 | 568 | 569 | class NewPlannedPartition(_PlannedPartition): 570 | """Represents a wholly new Partition to be constructed. 571 | 572 | After construction, you must set the number of columns using set_columns 573 | before attempting to use this in a plan. 574 | """ 575 | 576 | def __init__(self): 577 | super().__init__() 578 | self.set_important() 579 | 580 | def set_columns(self, count): 581 | """Set the number of columns needed to represent a position for this 582 | partition.""" 583 | self._num_columns = count 584 | return self 585 | 586 | @property 587 | def has_modifications(self): 588 | return False 589 | 590 | def __str__(self): 591 | return f"Add: {self.position} {self._timestamp}" 592 | 593 | 594 | class MismatchedIdException(Exception): 595 | """Raised if the partition map doesn't use the primary key as its range id.""" 596 | 597 | 598 | class TruncatedDatabaseResultException(Exception): 599 | """Raised if the XML schema truncated over a subprocess interaction""" 600 | 601 | 602 | class DuplicatePartitionException(Exception): 603 | """Raise if a partition being created already exists.""" 604 | 605 | 606 | class UnexpectedPartitionException(Exception): 607 | """Raised when the partition map is unexpected.""" 608 | 609 | 610 | class TableInformationException(Exception): 611 | """Raised when the table's status doesn't include the information we need.""" 612 | 613 | 614 | class TableEmptyException(Exception): 615 | """Raised when the table is empty.""" 616 | 617 | 618 | class NoEmptyPartitionsAvailableException(Exception): 619 | """Raised if no empty partitions are available to safely modify.""" 620 | 621 | 622 | class DatabaseCommandException(Exception): 623 | """Raised if the database command failed.""" 624 | 625 | 626 | class NoExactTimeException(Exception): 627 | """Raised if there's no exact time available for this partition.""" 628 | 629 | 630 | class NoValidRatesOfChangeException(Exception): 631 | """Raised if the table's rate of change cannot be calculated.""" 632 | -------------------------------------------------------------------------------- /partitionmanager/types_test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import unittest 3 | import pytest 4 | from datetime import datetime, timedelta, timezone 5 | from .types import ( 6 | ChangePlannedPartition, 7 | InstantPartition, 8 | is_partition_type, 9 | MaxValuePartition, 10 | NewPlannedPartition, 11 | Position, 12 | PositionPartition, 13 | timedelta_from_dict, 14 | SqlInput, 15 | SqlQuery, 16 | Table, 17 | to_sql_url, 18 | UnexpectedPartitionException, 19 | ) 20 | 21 | 22 | def mkPos(*pos): 23 | p = Position() 24 | p.set_position(pos) 25 | return p 26 | 27 | 28 | def mkPPart(name, *pos): 29 | return PositionPartition(name).set_position(mkPos(*pos)) 30 | 31 | 32 | def mkTailPart(name, count=1): 33 | return MaxValuePartition(name, count) 34 | 35 | 36 | class TestSqlQuery(unittest.TestCase): 37 | def test_multiple_statements(self): 38 | with self.assertRaises(argparse.ArgumentTypeError): 39 | SqlQuery("SELECT 'id' FROM 'place' WHERE 'id'=?; SELECT 1=1;") 40 | 41 | def test_multiple_arguments(self): 42 | with self.assertRaises(argparse.ArgumentTypeError): 43 | SqlQuery("SELECT 'id' FROM 'place' WHERE 'id'=? OR 'what'=?;") 44 | 45 | def test_forbidden_terms(self): 46 | with self.assertRaises(argparse.ArgumentTypeError): 47 | SqlQuery("DELETE FROM 'place';") 48 | with self.assertRaises(argparse.ArgumentTypeError): 49 | SqlQuery("UPDATE 'place';") 50 | with self.assertRaises(argparse.ArgumentTypeError): 51 | SqlQuery("INSERT INTO 'place';") 52 | with self.assertRaises(argparse.ArgumentTypeError): 53 | SqlQuery("ANALYZE 'place';") 54 | with self.assertRaises(argparse.ArgumentTypeError): 55 | SqlQuery("SET 'place';") 56 | with self.assertRaises(argparse.ArgumentTypeError): 57 | SqlQuery(";") 58 | 59 | def test_get_statement_errors(self): 60 | q = SqlQuery("SELECT 'id' FROM 'place' WHERE 'id'=?;") 61 | with self.assertRaises(argparse.ArgumentTypeError): 62 | q.get_statement_with_argument("must be a SqlInput type") 63 | with self.assertRaises(argparse.ArgumentTypeError): 64 | q.get_statement_with_argument(5) 65 | with self.assertRaises(argparse.ArgumentTypeError): 66 | q.get_statement_with_argument(None) 67 | 68 | def test_get_statement_string(self): 69 | q = SqlQuery("SELECT 'id' FROM 'place' WHERE 'status'=?;") 70 | 71 | with self.assertRaises(argparse.ArgumentTypeError): 72 | q.get_statement_with_argument(SqlInput("strings aren't allowed")) 73 | 74 | def test_get_statement_number(self): 75 | q = SqlQuery("SELECT 'id' FROM 'place' WHERE 'id'=?;") 76 | 77 | self.assertEqual( 78 | q.get_statement_with_argument(SqlInput(5)), 79 | "SELECT 'id' FROM 'place' WHERE 'id'=5;", 80 | ) 81 | self.assertEqual( 82 | q.get_statement_with_argument(SqlInput(5555)), 83 | "SELECT 'id' FROM 'place' WHERE 'id'=5555;", 84 | ) 85 | 86 | def test_get_statement_number_with_newlines(self): 87 | q = SqlQuery( 88 | """ 89 | SELECT 'multilines' FROM 'where it might be' WHERE 'id'=?; 90 | """ 91 | ) 92 | self.assertEqual( 93 | q.get_statement_with_argument(SqlInput(0xFF)), 94 | "SELECT 'multilines' FROM 'where it might be' WHERE 'id'=255;", 95 | ) 96 | 97 | 98 | class TestTypes(unittest.TestCase): 99 | def test_dburl_invalid(self): 100 | with self.assertRaises(argparse.ArgumentTypeError): 101 | to_sql_url("http://localhost/dbname") 102 | 103 | def test_dburl_without_db_path(self): 104 | with self.assertRaises(argparse.ArgumentTypeError): 105 | to_sql_url("sql://localhost") 106 | with self.assertRaises(argparse.ArgumentTypeError): 107 | to_sql_url("sql://localhost/") 108 | 109 | def test_dburl_with_two_passwords(self): 110 | u = to_sql_url("sql://username:password:else@localhost:3306/database") 111 | self.assertEqual(u.username, "username") 112 | self.assertEqual(u.password, "password:else") 113 | self.assertEqual(u.port, 3306) 114 | 115 | def test_dburl_with_port(self): 116 | u = to_sql_url("sql://localhost:3306/database") 117 | self.assertEqual(u.hostname, "localhost") 118 | self.assertEqual(u.username, None) 119 | self.assertEqual(u.password, None) 120 | self.assertEqual(u.port, 3306) 121 | 122 | def test_dburl_with_no_port(self): 123 | u = to_sql_url("sql://localhost/database") 124 | self.assertEqual(u.hostname, "localhost") 125 | self.assertEqual(u.username, None) 126 | self.assertEqual(u.password, None) 127 | self.assertEqual(u.port, None) 128 | 129 | def test_dburl_with_user_pass_and_no_port(self): 130 | u = to_sql_url("sql://username:password@localhost/database") 131 | self.assertEqual(u.hostname, "localhost") 132 | self.assertEqual(u.username, "username") 133 | self.assertEqual(u.password, "password") 134 | self.assertEqual(u.port, None) 135 | 136 | def test_dburl_with_user_pass_and_port(self): 137 | u = to_sql_url("sql://username:password@localhost:911/database") 138 | self.assertEqual(u.hostname, "localhost") 139 | self.assertEqual(u.username, "username") 140 | self.assertEqual(u.password, "password") 141 | self.assertEqual(u.port, 911) 142 | 143 | def test_table(self): 144 | with self.assertRaises(argparse.ArgumentTypeError): 145 | Table("invalid'name") 146 | 147 | self.assertEqual(type(Table("name").name), SqlInput) 148 | 149 | t = Table("t") 150 | self.assertEqual(None, t.retention_period) 151 | 152 | self.assertEqual( 153 | Table("a").set_partition_period(timedelta(days=9)).partition_period, 154 | timedelta(days=9), 155 | ) 156 | 157 | self.assertEqual( 158 | Table("a").set_retention_period(timedelta(days=9)).retention_period, 159 | timedelta(days=9), 160 | ) 161 | 162 | with self.assertRaises(argparse.ArgumentTypeError): 163 | timedelta_from_dict({"something": 1}) 164 | 165 | with self.assertRaises(argparse.ArgumentTypeError): 166 | timedelta_from_dict({"another thing": 1, "days": 30}) 167 | 168 | r = timedelta_from_dict({}) 169 | self.assertEqual(None, r) 170 | 171 | with self.assertRaises(TypeError): 172 | timedelta_from_dict({"days": "thirty"}) 173 | 174 | r = timedelta_from_dict({"days": 30}) 175 | self.assertEqual(timedelta(days=30), r) 176 | 177 | with self.assertRaises(ValueError): 178 | t.set_earliest_utc_timestamp_query("col") 179 | with self.assertRaises(ValueError): 180 | t.set_earliest_utc_timestamp_query(None) 181 | self.assertFalse(t.has_date_query) 182 | 183 | t.set_earliest_utc_timestamp_query( 184 | SqlQuery("SELECT not_before FROM table WHERE id = ?;") 185 | ) 186 | self.assertTrue(t.has_date_query) 187 | 188 | def test_invalid_timedelta_string(self): 189 | with pytest.raises(AttributeError): 190 | assert timedelta_from_dict("30s") 191 | 192 | def test_changed_partition(self): 193 | with self.assertRaises(ValueError): 194 | ChangePlannedPartition("bob") 195 | 196 | with self.assertRaises(ValueError): 197 | ChangePlannedPartition(PositionPartition("p_20201231")).set_position(2) 198 | 199 | with self.assertRaises(UnexpectedPartitionException): 200 | ChangePlannedPartition(PositionPartition("p_20210101")).set_position( 201 | [1, 2, 3, 4] 202 | ) 203 | 204 | c = ChangePlannedPartition( 205 | PositionPartition("p_20210101").set_position([1, 2, 3, 4]) 206 | ) 207 | self.assertFalse(c.has_modifications) 208 | c.set_timestamp(datetime(2021, 1, 2, tzinfo=timezone.utc)) 209 | y = c.set_position([10, 10, 10, 10]) 210 | self.assertEqual(c, y) 211 | self.assertTrue(c.has_modifications) 212 | 213 | self.assertEqual(c.timestamp(), datetime(2021, 1, 2, tzinfo=timezone.utc)) 214 | self.assertEqual(c.position.as_list(), [10, 10, 10, 10]) 215 | 216 | self.assertEqual( 217 | c.as_partition(), 218 | PositionPartition("p_20210102").set_position([10, 10, 10, 10]), 219 | ) 220 | 221 | c_max = ChangePlannedPartition( 222 | MaxValuePartition("p_20210101", count=1) 223 | ).set_position([1949]) 224 | self.assertEqual(c_max.timestamp(), datetime(2021, 1, 1, tzinfo=timezone.utc)) 225 | self.assertEqual(c_max.position.as_list(), [1949]) 226 | 227 | self.assertEqual( 228 | ChangePlannedPartition( 229 | PositionPartition("p_20210101").set_position([1, 2, 3, 4]) 230 | ), 231 | ChangePlannedPartition( 232 | PositionPartition("p_20210101").set_position([1, 2, 3, 4]) 233 | ), 234 | ) 235 | 236 | self.assertEqual( 237 | ChangePlannedPartition( 238 | PositionPartition("p_20210101").set_position([1, 2, 3, 4]) 239 | ).set_important(), 240 | ChangePlannedPartition( 241 | PositionPartition("p_20210101").set_position([1, 2, 3, 4]) 242 | ).set_important(), 243 | ) 244 | 245 | self.assertNotEqual( 246 | ChangePlannedPartition( 247 | PositionPartition("p_20210101").set_position([1, 2, 4, 4]) 248 | ), 249 | ChangePlannedPartition( 250 | PositionPartition("p_20210101").set_position([1, 2, 3, 4]) 251 | ), 252 | ) 253 | 254 | self.assertNotEqual( 255 | ChangePlannedPartition( 256 | PositionPartition("p_20210101").set_position([1, 2, 3, 4]) 257 | ).set_important(), 258 | ChangePlannedPartition( 259 | PositionPartition("p_20210101").set_position([1, 2, 3, 4]) 260 | ), 261 | ) 262 | 263 | self.assertNotEqual( 264 | ChangePlannedPartition( 265 | PositionPartition("p_20210101").set_position([1, 2, 3, 4]) 266 | ), 267 | ChangePlannedPartition( 268 | PositionPartition("p_20210102").set_position([1, 2, 3, 4]) 269 | ), 270 | ) 271 | self.assertEqual( 272 | ChangePlannedPartition( 273 | PositionPartition("p_20210101").set_position([1, 2, 3, 4]) 274 | ) 275 | .set_as_max_value() 276 | .as_partition(), 277 | NewPlannedPartition() 278 | .set_columns(4) 279 | .set_timestamp(datetime(2021, 1, 1, tzinfo=timezone.utc)) 280 | .as_partition(), 281 | ) 282 | 283 | def test_new_partition(self): 284 | with self.assertRaises(ValueError): 285 | NewPlannedPartition().as_partition() 286 | 287 | self.assertEqual( 288 | NewPlannedPartition() 289 | .set_columns(5) 290 | .set_timestamp( 291 | datetime(2021, 12, 31, hour=23, minute=15, tzinfo=timezone.utc) 292 | ) 293 | .as_partition(), 294 | MaxValuePartition("p_20211231", count=5), 295 | ) 296 | 297 | self.assertFalse(NewPlannedPartition().has_modifications) 298 | 299 | self.assertEqual( 300 | NewPlannedPartition() 301 | .set_position([3]) 302 | .set_timestamp(datetime(2021, 12, 31, tzinfo=timezone.utc)) 303 | .as_partition(), 304 | PositionPartition("p_20211231").set_position(mkPos(3)), 305 | ) 306 | 307 | self.assertEqual( 308 | NewPlannedPartition() 309 | .set_position([1, 1, 1]) 310 | .set_timestamp(datetime(1994, 1, 1, tzinfo=timezone.utc)) 311 | .as_partition(), 312 | PositionPartition("p_19940101").set_position([1, 1, 1]), 313 | ) 314 | 315 | self.assertEqual( 316 | NewPlannedPartition() 317 | .set_position([3]) 318 | .set_timestamp(datetime(2021, 12, 31, tzinfo=timezone.utc)), 319 | NewPlannedPartition() 320 | .set_position([3]) 321 | .set_timestamp(datetime(2021, 12, 31, tzinfo=timezone.utc)), 322 | ) 323 | 324 | self.assertEqual( 325 | NewPlannedPartition() 326 | .set_position([99, 999]) 327 | .set_timestamp( 328 | datetime(2021, 12, 31, hour=19, minute=2, tzinfo=timezone.utc) 329 | ) 330 | .set_as_max_value(), 331 | NewPlannedPartition() 332 | .set_columns(2) 333 | .set_timestamp(datetime(2021, 12, 31, tzinfo=timezone.utc)), 334 | ) 335 | 336 | 337 | class TestPartition(unittest.TestCase): 338 | def test_partition_timestamps(self): 339 | self.assertFalse(PositionPartition("p_start").has_real_time) 340 | self.assertEqual( 341 | PositionPartition("p_start").timestamp(), 342 | datetime(2021, 1, 1, tzinfo=timezone.utc), 343 | ) 344 | self.assertFalse(PositionPartition("not_a_date").has_real_time) 345 | self.assertIsNone(PositionPartition("not_a_date").timestamp()) 346 | self.assertFalse(PositionPartition("p_202012310130").has_real_time) 347 | self.assertIsNone(PositionPartition("p_202012310130").timestamp()) 348 | 349 | self.assertTrue(PositionPartition("p_20011231").has_real_time) 350 | self.assertEqual( 351 | PositionPartition("p_20011231").timestamp(), 352 | datetime(2001, 12, 31, tzinfo=timezone.utc), 353 | ) 354 | 355 | self.assertLess(mkPPart("a", 9), mkPPart("b", 11)) 356 | self.assertLess(mkPPart("a", 10), mkPPart("b", 11)) 357 | self.assertFalse(mkPPart("a", 11) < mkPPart("b", 11)) 358 | self.assertFalse(mkPPart("a", 12) < mkPPart("b", 11)) 359 | 360 | self.assertLess(mkPPart("a", 10, 10), mkTailPart("b", count=2)) 361 | with self.assertRaises(UnexpectedPartitionException): 362 | mkPPart("a", 10, 10) < mkTailPart("b", count=1) 363 | 364 | self.assertTrue(mkPPart("a", 10, 10) < mkPPart("b", 11, 10)) 365 | self.assertTrue(mkPPart("a", 10, 10) < mkPPart("b", 10, 11)) 366 | self.assertLess(mkPPart("a", 10, 10), mkPPart("b", 11, 11)) 367 | self.assertTrue(mkPPart("a", 10, 10) < [10, 11]) 368 | self.assertTrue(mkPPart("a", 10, 10) < [11, 10]) 369 | self.assertLess(mkPPart("a", 10, 10), [11, 11]) 370 | 371 | with self.assertRaises(UnexpectedPartitionException): 372 | mkPPart("a", 10, 10) < mkPPart("b", 11, 11, 11) 373 | with self.assertRaises(UnexpectedPartitionException): 374 | mkPPart("a", 10, 10, 10) < mkPPart("b", 11, 11) 375 | 376 | def test_partition_tuple_ordering(self): 377 | cur_pos = mkPPart("current_pos", 8236476764, 6096376984) 378 | p_20220525 = mkPPart("p_20220525", 2805308158, 2682458996) 379 | p_20220611 = mkPPart("p_20220611", 7882495694, 7856340600) 380 | p_20230519 = mkPPart("p_20230519", 10790547177, 11048018089) 381 | p_20230724 = mkPPart("p_20230724", 95233456870, 97348306298) 382 | 383 | self.assertGreater(cur_pos, p_20220525) 384 | self.assertGreater(cur_pos, p_20220611) 385 | self.assertLess(cur_pos, p_20230519) 386 | self.assertLess(cur_pos, p_20230724) 387 | 388 | def test_instant_partition(self): 389 | now = datetime.now(tz=timezone.utc) 390 | 391 | ip = InstantPartition("p_20380101", now, [1, 2]) 392 | self.assertEqual(ip.position.as_list(), [1, 2]) 393 | self.assertEqual(ip.name, "p_20380101") 394 | self.assertEqual(ip.timestamp(), now) 395 | 396 | def test_is_partition_type(self): 397 | self.assertTrue(is_partition_type(mkPPart("b", 1, 2))) 398 | self.assertTrue( 399 | is_partition_type( 400 | InstantPartition("p_19490520", datetime.now(tz=timezone.utc), [1, 2]) 401 | ) 402 | ) 403 | self.assertFalse(is_partition_type(None)) 404 | self.assertFalse(is_partition_type(1)) 405 | self.assertFalse(is_partition_type(NewPlannedPartition())) 406 | 407 | 408 | class TestPosition(unittest.TestCase): 409 | def test_position_as_sql_input(self): 410 | self.assertEqual([SqlInput(88)], mkPos(88).as_sql_input()) 411 | self.assertEqual([SqlInput(88), SqlInput(99)], mkPos(88, 99).as_sql_input()) 412 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "mariadb-sequential-partition-manager" 7 | maintainers = [ 8 | {name = "J.C. Jones", email = "jc@letsencrypt.org"}, 9 | ] 10 | version = "0.4.1" 11 | description = "Manage DB partitions based on sequential IDs" 12 | license = {file = "LICENSE.txt"} 13 | classifiers = [ 14 | "Development Status :: 4 - Beta", 15 | "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)", 16 | "Programming Language :: Python :: 3", 17 | ] 18 | keywords = ["database", "mariadb"] 19 | dependencies = [ 20 | "pyyaml" 21 | ] 22 | requires-python = ">=3.8" 23 | readme = "README.md" 24 | 25 | [tool.hatch.build.targets.wheel] 26 | packages = ["partitionmanager"] 27 | 28 | [project.optional-dependencies] 29 | pymysql = ["PyMySQL >= 1.0.2"] 30 | 31 | [project.urls] 32 | Repository = "http://github.com/letsencrypt/mariadb-sequential-partition-manager" 33 | 34 | [project.scripts] 35 | partition-manager = "partitionmanager.cli:main" 36 | 37 | [tool.ruff] 38 | line-length = 88 # default is 88 39 | target-version = "py38" 40 | 41 | [tool.ruff.lint] 42 | select = [ 43 | "A", # flake8-builtins 44 | "AIR", # Airflow 45 | "ARG", # flake8-unused-arguments 46 | "ASYNC", # flake8-async 47 | "B", # flake8-bugbear 48 | "BLE", # flake8-blind-except 49 | "C4", # flake8-comprehensions 50 | "C90", # McCabe cyclomatic complexity 51 | "DJ", # flake8-django 52 | "DTZ", # flake8-datetimez 53 | "E", # pycodestyle 54 | "EXE", # flake8-executable 55 | "F", # Pyflakes 56 | "FA", # flake8-future-annotations 57 | "FBT", # flake8-boolean-trap 58 | "FIX", # flake8-fixme 59 | "FLY", # flynt 60 | "ICN", # flake8-import-conventions 61 | "INP", # flake8-no-pep420 62 | "INT", # flake8-gettext 63 | "ISC", # flake8-implicit-str-concat 64 | "LOG", # flake8-logging 65 | "NPY", # NumPy-specific rules 66 | "PD", # pandas-vet 67 | "PERF", # Perflint 68 | "PGH", # pygrep-hooks 69 | "PIE", # flake8-pie 70 | "PL", # Pylint 71 | "PTH", # flake8-use-pathlib 72 | "PYI", # flake8-pyi 73 | "RET", # flake8-return 74 | "RSE", # flake8-raise 75 | "S", # flake8-bandit 76 | "SIM", # flake8-simplify 77 | "SLOT", # flake8-slots 78 | "T10", # flake8-debugger 79 | "TCH", # flake8-type-checking 80 | "TD", # flake8-todos 81 | "TID", # flake8-tidy-imports 82 | "TRIO", # flake8-trio 83 | "UP", # pyupgrade 84 | "W", # pycodestyle 85 | "YTT", # flake8-2020 86 | # "ANN", # flake8-annotations 87 | # "COM", # flake8-commas 88 | # "CPY", # flake8-copyright 89 | # "D", # pydocstyle 90 | # "EM", # flake8-errmsg 91 | # "ERA", # eradicate 92 | # "FURB", # refurb 93 | # "G", # flake8-logging-format 94 | # "I", # isort 95 | # "N", # pep8-naming 96 | # "PT", # flake8-pytest-style 97 | # "Q", # flake8-quotes 98 | # "RUF", # Ruff-specific rules 99 | # "SLF", # flake8-self 100 | # "T20", # flake8-print 101 | # "TRY", # tryceratops 102 | ] 103 | ignore = [ 104 | "ISC001", # Implicit string concatenation can conflict with ruff format 105 | "S101", # Allow assert statements 106 | ] 107 | 108 | [tool.ruff.lint.mccabe] 109 | max-complexity = 16 # default is 10 110 | 111 | [tool.ruff.lint.per-file-ignores] 112 | "partitionmanager/cli.py" = ["B008", "PERF203"] # TODO: Fix B008, upgrade to Py3.11 for PERF203 113 | "partitionmanager/cli_test.py" = ["S608", "SIM115", "SIM117"] # TODO: Fix SIMs 114 | "partitionmanager/sql.py" = ["B904", "S603"] # TODO: Fix S603 115 | "partitionmanager/table_append_partition.py" = ["S608", "SIM102"] # TODO: Fix S608 116 | "partitionmanager/types.py" = ["B904", "RET505", "SLOT000"] # TODO: Fix B904 and SLOT000 117 | "partitionmanager/types_test.py" = ["B015"] # TODO: Fix me 118 | 119 | [tool.ruff.lint.pylint] 120 | max-args = 7 # default is 5 121 | max-branches = 15 # default is 12 122 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | junit_family=xunit2 3 | -------------------------------------------------------------------------------- /test_tools/fake_mariadb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | stdin=$(cat) 3 | 4 | if echo "$*" | grep "v" >/dev/null; then 5 | echo "mariadb command was: $@" >&2 6 | echo "stdin was: $stdin" >&2 7 | fi 8 | 9 | if echo $stdin | grep "@@READ_ONLY" >/dev/null; then 10 | cat < 12 | 13 | 14 | 15 | 0 16 | 17 | 18 | EOF 19 | exit 20 | fi 21 | 22 | if echo $stdin | grep "SHOW TABLES" >/dev/null; then 23 | cat < 25 | 26 | 27 | 28 | partitioned_last_week 29 | 30 | 31 | partitioned_yesterday 32 | 33 | 34 | other 35 | 36 | 37 | EOF 38 | exit 39 | fi 40 | 41 | if echo $stdin | grep "INFORMATION_SCHEMA" >/dev/null; then 42 | if echo $stdin | grep "unpartitioned" >/dev/null; then 43 | cat < 45 | 46 | 48 | 49 | 150 50 | max_rows=10380835156842741 transactional=0 51 | 52 | 53 | EOF 54 | exit 55 | else 56 | cat < 58 | 59 | 61 | 62 | 150 63 | max_rows=10380835156842741 transactional=0 partitioned 64 | 65 | 66 | EOF 67 | exit 68 | fi 69 | fi 70 | 71 | if echo $stdin | grep "ORDER BY" >/dev/null; then 72 | cat < 74 | 75 | 76 | 77 | 150 78 | 79 | 80 | EOF 81 | exit 82 | fi 83 | 84 | if echo $stdin | grep "SHOW CREATE" >/dev/null; then 85 | if echo $stdin | grep "partitioned_last_week" >/dev/null; then 86 | earlyPartName=$(date --utc --date='7 days ago' +p_%Y%m%d) 87 | midPartName=$(date --utc --date='today' +p_%Y%m%d) 88 | tailPartName=$(date --utc --date='7 days' +p_%Y%m%d) 89 | elif echo $stdin | grep "partitioned_yesterday" >/dev/null; then 90 | earlyPartName=$(date --utc --date='8 days ago' +p_%Y%m%d) 91 | midPartName=$(date --utc --date='yesterday' +p_%Y%m%d) 92 | tailPartName=$(date --utc --date='6 days' +p_%Y%m%d) 93 | else 94 | earlyPartName="p_20201004" 95 | midPartName="p_20201105" 96 | tailPartName="p_20201204" 97 | fi 98 | 99 | cat < 101 | 102 | 103 | 104 | burgers 105 | CREATE TABLE \`burgers\` ( 106 | \`id\` bigint(20) NOT NULL AUTO_INCREMENT, 107 | PRIMARY KEY (\`id\`), 108 | ) ENGINE=InnoDB AUTO_INCREMENT=150 DEFAULT CHARSET=utf8 109 | EOF 110 | if echo $stdin | grep "unpartitioned" >/dev/null; then 111 | cat < 113 | 114 | 115 | EOF 116 | else 117 | cat < 122 | 123 | 124 | EOF 125 | fi 126 | exit 127 | fi 128 | 129 | if echo $stdin | grep "REORGANIZE PARTITION" >/dev/null; then 130 | cat < 132 | 133 | 134 | 135 | EOF 136 | exit 137 | fi 138 | 139 | if echo $stdin | grep "SELECT DATABASE" >/dev/null; then 140 | cat < 142 | 143 | 144 | 145 | tasty-treats 146 | 147 | 148 | EOF 149 | exit 150 | fi 151 | 152 | if echo $stdin | grep "DESCRIBE" >/dev/null; then 153 | cat < 155 | 156 | 157 | 158 | id 159 | bigint(20) 160 | 161 | 162 | serial 163 | varchar(20) 164 | 165 | 166 | EOF 167 | exit 168 | fi 169 | 170 | exit 1 171 | --------------------------------------------------------------------------------