├── .github └── workflows │ └── main.yaml ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── docs └── sqlite_backup │ ├── core.md │ ├── index.md │ └── log.md ├── requirements.txt ├── setup.cfg ├── setup.py ├── sqlite_backup ├── __init__.py ├── __main__.py ├── core.py ├── log.py └── py.typed └── tests ├── __init__.py ├── test_sqlite_backup.py └── test_threads.py /.github/workflows/main.yaml: -------------------------------------------------------------------------------- 1 | name: Run Tests 2 | 3 | on: 4 | push: 5 | branches: ["*"] 6 | pull_request: 7 | branches: ["*"] 8 | 9 | jobs: 10 | build: 11 | strategy: 12 | matrix: 13 | platform: [ubuntu-latest, macos-latest, windows-latest] 14 | python-version: [3.8, 3.9, "3.10", "3.11", "3.12"] 15 | exclude: [ 16 | # exclude some windows/macos runners, they run pretty slow 17 | { platform: windows-latest, python-version: "3.9" }, 18 | { platform: windows-latest, python-version: "3.10" }, 19 | { platform: windows-latest, python-version: "3.11" }, 20 | { platform: macos-latest, python-version: "3.9" }, 21 | { platform: macos-latest, python-version: "3.10" }, 22 | { platform: macos-latest, python-version: "3.11" }, 23 | ] 24 | 25 | runs-on: ${{ matrix.platform }} 26 | 27 | steps: 28 | - uses: actions/checkout@v4 29 | - name: Set up Python ${{ matrix.python-version }} 30 | uses: actions/setup-python@v4 31 | with: 32 | python-version: ${{ matrix.python-version }} 33 | - name: Install packages 34 | run: | 35 | python -m pip install --upgrade pip wheel 36 | pip install '.[testing]' 37 | - name: Run mypy 38 | run: | 39 | mypy --install-types --non-interactive ./sqlite_backup ./tests setup.py 40 | - name: Run pytest 41 | run: | 42 | pytest 43 | - name: Run flake8 44 | run: | 45 | flake8 ./sqlite_backup 46 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | tags* 2 | *.pdf 3 | *.sqlite 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | pip-wheel-metadata/ 28 | share/python-wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | MANIFEST 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .nox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # celery beat schedule file 98 | celerybeat-schedule 99 | 100 | # SageMath parsed files 101 | *.sage.py 102 | 103 | # Environments 104 | .env 105 | .venv 106 | env/ 107 | venv/ 108 | ENV/ 109 | env.bak/ 110 | venv.bak/ 111 | 112 | # Spyder project settings 113 | .spyderproject 114 | .spyproject 115 | 116 | # Rope project settings 117 | .ropeproject 118 | 119 | # mkdocs documentation 120 | /site 121 | 122 | # mypy 123 | .mypy_cache/ 124 | .dmypy.json 125 | dmypy.json 126 | 127 | # Pyre type checker 128 | .pyre/ 129 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 purarue 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .DEFAULT_GOAL := install 2 | 3 | install: 4 | python3 -m pip install . 5 | 6 | docs: install 7 | python3 -m pip list --format=freeze | cut -d"=" -f1 | grep -x 'pdoc3' -q || python3 -m pip install pdoc3 8 | rm -rf ./docs 9 | pdoc3 -o ./docs sqlite_backup 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sqlite_backup 2 | 3 | This exposes the python stdlib [`sqlite.backup`](https://docs.python.org/3/library/sqlite3.html#sqlite3.Connection.backup) function as a library, with a couple extra steps. 4 | 5 | The main purpose for writing this is to copy sqlite databases that you may not own -- perhaps it belongs to an application (e.g., your browser) and is locked since that's currently open, or the OS keeps it open while the computer is active (e.g. Mac with iMessage) 6 | 7 | ### Features 8 | 9 | - Has the option (true by default) to first [safely copy](https://github.com/purarue/sqlite_backup/blob/cbe57a88bc987ca990edfb65b66f04b6d8765a5e/sqlite_backup/core.py#L48-L56) the database from disk to a temporary directory, which is: 10 | - useful in case the source is in read-only mode (e.g. in some sort of docker container) 11 | - safer if you're especially worried about corrupting or losing data 12 | - Uses [`Cpython`s Connection.backup](https://github.com/python/cpython/blob/8fb36494501aad5b0c1d34311c9743c60bb9926c/Modules/_sqlite/connection.c#L1716), which directly uses the [underlying Sqlite C code](https://www.sqlite.org/c3ref/backup_finish.html) 13 | - Performs a [`wal_checkpoint`](https://www.sqlite.org/pragma.html#pragma_wal_checkpoint) and sets `journal_mode=DELETE` after copying to the destination, to remove the WAL (write-ahead log; temporary database file). Typically the WAL is removed when the database is closed, but [particular builds of sqlite](https://sqlite.org/forum/forumpost/1fdfc1a0e7), the [default sqlite installed on mac](https://github.com/purarue/sqlite_backup/issues/9), or sqlite compiled with [`SQLITE_DBCONFIG_NO_CKPT_ON_CLOSE` enabled](https://www.sqlite.org/c3ref/c_dbconfig_enable_fkey.html) or [`SQLITE_FCNTL_PERSIST_WAL`](https://www.sqlite.org/c3ref/c_fcntl_begin_atomic_write.html#sqlitefcntlpersistwal) may prevent that -- so the checkpoint exists to ensure there are no temporary files leftover 14 | 15 | In short, this **prioritizes safety of the data** over performance (temporarily copying data files to `/tmp`) - because we often don't know what the application may be doing while we're copying underlying sqlite databases 16 | 17 | The initial backup function and some tests were extracted out of the [`karlicoss/HPI` `core/sqlite`](https://github.com/karlicoss/HPI/blob/a1f03f9c028df9d1898de2cc14f1df4fa6d8c471/my/core/sqlite.py#L33-L51) module 18 | 19 | If other tools exist to do this, please [let me know!](https://github.com/purarue/sqlite_backup/issues/new) 20 | 21 | ## Installation 22 | 23 | Requires `python3.8+` 24 | 25 | To install with pip, run: 26 | 27 | pip install sqlite_backup 28 | 29 | ## Usage 30 | 31 | ``` 32 | Usage: sqlite_backup [OPTIONS] SOURCE_DATABASE DESTINATION 33 | 34 | SOURCE_DATABASE is the database to copy 35 | 36 | DESTINATION is where to write the database. If a directory, uses 37 | the SOURCE_DATABASE name. If a file, the directory must exist, 38 | and the destination file must not already exist (to prevent 39 | possibly overwriting old data) 40 | 41 | Options: 42 | --debug Increase log verbosity [default: False] 43 | --wal-checkpoint / --no-wal-checkpoint 44 | After writing to the destination, run a 45 | checkpoint to truncate the WAL to zero bytes 46 | [default: wal-checkpoint] 47 | --copy-use-tempdir / --no-copy-use-tempdir 48 | Copy the source database files to a 49 | temporary directory, then connect to the 50 | copied files [default: copy-use-tempdir] 51 | --copy-retry INTEGER If the files change while copying to the 52 | temporary directory, retry times 53 | [default: 100] 54 | --copy-retry-strict / --no-copy-retry-strict 55 | Throws an error if this fails to safely copy 56 | the database files --copy-retry times 57 | [default: copy-retry-strict] 58 | --help Show this message and exit. [default: 59 | False] 60 | ``` 61 | 62 | For usage in python, use the `sqlite_backup` function, see the [docs](./docs/sqlite_backup/index.md) 63 | 64 | If you plan on reading from these backed up databases (and you're not planning on modifying these at all), I would recommend using the [`mode=ro`](https://www.sqlite.org/uri.html#urimode) (readonly) or [`immutable`](https://www.sqlite.org/uri.html#uriimmutable) flags when connecting to the database. In python, like: 65 | 66 | ```python 67 | import sqlite3 68 | from typing import Iterator 69 | 70 | def sqlite_connect(database: str) -> Iterator[sqlite3.Connection]: 71 | try: 72 | # or for immutable, f"file:{database}?immutable=1" 73 | with sqlite3.connect(f"file:{database}?mode=ro", uri=True) as conn: 74 | yield conn 75 | finally: 76 | conn.close() 77 | 78 | with sqlite_connect("/path/to/database") as conn: 79 | conn.execute("...") 80 | ``` 81 | 82 | ### Example 83 | 84 | ``` 85 | sqlite_backup --debug ~/.mozilla/firefox/ew9cqpqe.dev-edition-default/places.sqlite ./firefox.sqlite 86 | [D 220202 13:00:32 core:110] Source database files: '['/home/username/.mozilla/firefox/ew9cqpqe.dev-edition-default/places.sqlite', '/home/username/.mozilla/firefox/ew9cqpqe.dev-edition-default/places.sqlite-wal']' 87 | [D 220202 13:00:32 core:111] Temporary Destination database files: '['/tmp/tmpm2nhl1p3/places.sqlite', '/tmp/tmpm2nhl1p3/places.sqlite-wal']' 88 | [D 220202 13:00:32 core:64] Copied from '/home/username/.mozilla/firefox/ew9cqpqe.dev-edition-default/places.sqlite' to '/tmp/tmpm2nhl1p3/places.sqlite' successfully; copied without file changing: True 89 | [D 220202 13:00:32 core:64] Copied from '/home/username/.mozilla/firefox/ew9cqpqe.dev-edition-default/places.sqlite-wal' to '/tmp/tmpm2nhl1p3/places.sqlite-wal' successfully; copied without file changing: True 90 | [D 220202 13:00:32 core:240] Running backup, from '/tmp/tmpm2nhl1p3/places.sqlite' to '/home/username/Repos/sqlite_backup/firefox.sqlite' 91 | Backed up /home/username/.mozilla/firefox/ew9cqpqe.dev-edition-default/places.sqlite to /home/username/Repos/sqlite_backup/firefox.sqlite 92 | ``` 93 | 94 | ### Tests 95 | 96 | ```bash 97 | git clone 'https://github.com/purarue/sqlite_backup' 98 | cd ./sqlite_backup 99 | pip install '.[testing]' 100 | mypy ./sqlite_backup 101 | pytest 102 | ``` 103 | -------------------------------------------------------------------------------- /docs/sqlite_backup/core.md: -------------------------------------------------------------------------------- 1 | Module sqlite_backup.core 2 | ========================= 3 | 4 | Functions 5 | --------- 6 | 7 | 8 | `atomic_copy(src: str, dest: str) ‑> bool` 9 | : Copy from src to dest. If src changes while copying to dest, retry till it is the same 10 | These are very few ways to truly guarantee a file is copied atomically, so this is the closest approximation 11 | 12 | This retries till the file doesn't change while we were copying it 13 | 14 | If the file did change (before the final copy, which succeeded) while we were copying it, this returns False 15 | 16 | 17 | `copy_all_files(source_files: List[pathlib.Path], temporary_dest: pathlib.Path, copy_function: Callable[[str, str], bool], retry: int) ‑> bool` 18 | : Copy all files from source to directory 19 | This retries (up to 'retry' count) if any of the files change while any of the copies were copying 20 | 21 | Returns: 22 | True if it successfully copied and none of the files changing while it was copying 23 | False if it retied 'retry' times but files still changed as it was copying 24 | 25 | It still *has* copied the files, it just doesn't guarantee that the copies were atomic according to 26 | atomic_copy's definition of failure 27 | 28 | 29 | `glob_database_files(source_database: pathlib.Path) ‑> List[pathlib.Path]` 30 | : List any of the temporary database files (and the database itself) 31 | 32 | 33 | `sqlite_backup(source: Union[str, pathlib.Path], destination: Union[str, pathlib.Path, ForwardRef(None)] = None, *, wal_checkpoint: bool = True, copy_use_tempdir: bool = True, copy_retry: int = 100, copy_retry_strict: bool = True, sqlite_connect_kwargs: Optional[Dict[str, Any]] = None, sqlite_backup_kwargs: Optional[Dict[str, Any]] = None, copy_function: Optional[Callable[[str, str], bool]] = None) ‑> Optional[sqlite3.Connection]` 34 | : 'Snapshots' the source database and opens by making a deep copy of it, including journal/WAL files 35 | 36 | If you don't specify a 'destination', this copies the database 37 | into memory and returns an active connection to that. 38 | 39 | If you specify a 'destination', this copies the 'source' to the 'destination' file, 40 | instead of into memory 41 | 42 | If 'copy_use_tempdir' is True, this copies the relevant database files to a temporary directory, 43 | and then copies it into destination using sqlite3.Connection.backup. So, by default, the steps are: 44 | 45 | - Copy the source database files to a temporary directory 46 | - create a connection to the temporary database files 47 | - create a temporary 'destination' connection in memory 48 | - backup from the temporary directory database connection to the destination 49 | - cleanup; close temporary connection and remove temporary files 50 | - returns the 'destination' connection, which has the data stored in memory 51 | 52 | If you instead specify a path as the 'destination', this creates the 53 | database file there, and returns nothing (If you want access to the 54 | destination database, open a connection afterwards with sqlite3.connect) 55 | 56 | 'wal_checkpoint' runs a 'PRAGMA wal_checkpoint(TRUNCATE)' after it writes to 57 | the destination database, which truncates the write ahead log to 0 bytes. 58 | Typically the WAL is removed when the database is closed, but particular builds of sqlite 59 | or sqlite compiled with SQLITE_DBCONFIG_NO_CKPT_ON_CLOSE may prevent that -- 60 | so the checkpoint exists to ensure there are no temporary files leftover 61 | 62 | See: 63 | https://sqlite.org/forum/forumpost/1fdfc1a0e7 64 | https://www.sqlite.org/c3ref/c_dbconfig_enable_fkey.html 65 | 66 | if 'copy_use_tempdir' is False, that skips the copy, which increases the chance that this fails 67 | (if there's a lock (SQLITE_BUSY, SQLITE_LOCKED)) on the source database, 68 | which is what we're trying to avoid in the first place 69 | 70 | 'copy_retry' (default 100) specifies how many times we should attempt to copy the database files, if they 71 | happen to change while we're copying. If 'copy_retry_strict' is True, this throws an error if it failed 72 | to safely copy the database files 'copy_retry' times 73 | 74 | 'sqlite_connect_kwargs' and 'sqlite_backup_kwargs' let you pass additional kwargs 75 | to the connect (when copying from the source database) and the backup (when copying 76 | from the source (or database in the tempdir) to the destination 77 | 78 | 79 | `sqlite_connect_immutable(db: Union[str, pathlib.Path]) ‑> Iterator[sqlite3.Connection]` 80 | : 81 | 82 | Classes 83 | ------- 84 | 85 | `SqliteBackupError(*args, **kwargs)` 86 | : Generic error for the sqlite_backup module 87 | 88 | ### Ancestors (in MRO) 89 | 90 | * builtins.RuntimeError 91 | * builtins.Exception 92 | * builtins.BaseException -------------------------------------------------------------------------------- /docs/sqlite_backup/index.md: -------------------------------------------------------------------------------- 1 | Module sqlite_backup 2 | ==================== 3 | 4 | Sub-modules 5 | ----------- 6 | * sqlite_backup.core 7 | * sqlite_backup.log 8 | 9 | Functions 10 | --------- 11 | 12 | 13 | `sqlite_backup(source: Union[str, pathlib.Path], destination: Union[str, pathlib.Path, ForwardRef(None)] = None, *, wal_checkpoint: bool = True, copy_use_tempdir: bool = True, copy_retry: int = 100, copy_retry_strict: bool = True, sqlite_connect_kwargs: Optional[Dict[str, Any]] = None, sqlite_backup_kwargs: Optional[Dict[str, Any]] = None, copy_function: Optional[Callable[[str, str], bool]] = None) ‑> Optional[sqlite3.Connection]` 14 | : 'Snapshots' the source database and opens by making a deep copy of it, including journal/WAL files 15 | 16 | If you don't specify a 'destination', this copies the database 17 | into memory and returns an active connection to that. 18 | 19 | If you specify a 'destination', this copies the 'source' to the 'destination' file, 20 | instead of into memory 21 | 22 | If 'copy_use_tempdir' is True, this copies the relevant database files to a temporary directory, 23 | and then copies it into destination using sqlite3.Connection.backup. So, by default, the steps are: 24 | 25 | - Copy the source database files to a temporary directory 26 | - create a connection to the temporary database files 27 | - create a temporary 'destination' connection in memory 28 | - backup from the temporary directory database connection to the destination 29 | - cleanup; close temporary connection and remove temporary files 30 | - returns the 'destination' connection, which has the data stored in memory 31 | 32 | If you instead specify a path as the 'destination', this creates the 33 | database file there, and returns nothing (If you want access to the 34 | destination database, open a connection afterwards with sqlite3.connect) 35 | 36 | 'wal_checkpoint' runs a 'PRAGMA wal_checkpoint(TRUNCATE)' after it writes to 37 | the destination database, which truncates the write ahead log to 0 bytes. 38 | Typically the WAL is removed when the database is closed, but particular builds of sqlite 39 | or sqlite compiled with SQLITE_DBCONFIG_NO_CKPT_ON_CLOSE may prevent that -- 40 | so the checkpoint exists to ensure there are no temporary files leftover 41 | 42 | See: 43 | https://sqlite.org/forum/forumpost/1fdfc1a0e7 44 | https://www.sqlite.org/c3ref/c_dbconfig_enable_fkey.html 45 | 46 | if 'copy_use_tempdir' is False, that skips the copy, which increases the chance that this fails 47 | (if there's a lock (SQLITE_BUSY, SQLITE_LOCKED)) on the source database, 48 | which is what we're trying to avoid in the first place 49 | 50 | 'copy_retry' (default 100) specifies how many times we should attempt to copy the database files, if they 51 | happen to change while we're copying. If 'copy_retry_strict' is True, this throws an error if it failed 52 | to safely copy the database files 'copy_retry' times 53 | 54 | 'sqlite_connect_kwargs' and 'sqlite_backup_kwargs' let you pass additional kwargs 55 | to the connect (when copying from the source database) and the backup (when copying 56 | from the source (or database in the tempdir) to the destination -------------------------------------------------------------------------------- /docs/sqlite_backup/log.md: -------------------------------------------------------------------------------- 1 | Module sqlite_backup.log 2 | ======================== 3 | Setup logging for this module 4 | 5 | Functions 6 | --------- 7 | 8 | 9 | `setup(level: Optional[int] = None) ‑> logging.Logger` 10 | : -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | click>=8.0 2 | logzero 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = sqlite_backup 3 | version = 0.1.7 4 | description = A tool to copy sqlite databases you don't own 5 | long_description = file: README.md 6 | long_description_content_type = text/markdown 7 | license = MIT 8 | license_file = LICENSE 9 | author = purarue 10 | url = https://github.com/purarue/sqlite_backup 11 | keywords = database sqlite 12 | classifiers = 13 | License :: OSI Approved :: MIT License 14 | Programming Language :: Python 15 | Programming Language :: Python :: 3 16 | Programming Language :: Python :: 3.8 17 | Programming Language :: Python :: 3.9 18 | Programming Language :: Python :: 3.10 19 | Programming Language :: Python :: 3.11 20 | 21 | [options] 22 | packages = find: 23 | include_package_data = True 24 | install_requires = 25 | click>=8.0 26 | logzero 27 | python_requires = >=3.8 28 | 29 | [options.packages.find] 30 | include = 31 | sqlite_backup 32 | exclude = 33 | tests* 34 | 35 | [options.package_data] 36 | sqlite_backup = py.typed 37 | 38 | [options.entry_points] 39 | console_scripts = 40 | sqlite_backup = sqlite_backup.__main__:main 41 | 42 | 43 | [options.extras_require] 44 | testing = 45 | pytest 46 | mypy 47 | flake8 48 | pytest-reraise 49 | 50 | 51 | [mypy] 52 | pretty = True 53 | show_error_context = True 54 | show_error_codes = True 55 | check_untyped_defs = True 56 | namespace_packages = True 57 | disallow_any_generics = True 58 | disallow_subclassing_any = True 59 | disallow_untyped_calls = True 60 | disallow_untyped_defs = True 61 | disallow_incomplete_defs = True 62 | no_implicit_optional = True 63 | warn_redundant_casts = True 64 | warn_return_any = True 65 | warn_unreachable = True 66 | 67 | [flake8] 68 | ignore=E501 69 | 70 | [tool:pytest] 71 | addopts = 72 | -rap 73 | --doctest-modules sqlite_backup 74 | ./tests/ 75 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | if __name__ == "__main__": 4 | setup() 5 | -------------------------------------------------------------------------------- /sqlite_backup/__init__.py: -------------------------------------------------------------------------------- 1 | from .core import sqlite_backup # noqa 2 | 3 | __all__ = ["sqlite_backup"] 4 | -------------------------------------------------------------------------------- /sqlite_backup/__main__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import logging 3 | from pathlib import Path 4 | 5 | import click 6 | 7 | from .core import sqlite_backup, COPY_RETRY_DEFAULT 8 | from .log import setup 9 | 10 | CONTEXT_SETTINGS = { 11 | "max_content_width": 120, 12 | "show_default": True, 13 | } 14 | 15 | 16 | @click.command(context_settings=CONTEXT_SETTINGS) 17 | @click.option("--debug", is_flag=True, default=False, help="Increase log verbosity") 18 | @click.option( 19 | "--wal-checkpoint/--no-wal-checkpoint", 20 | default=True, 21 | is_flag=True, 22 | help="After writing to the destination, run a checkpoint to truncate the WAL to zero bytes", 23 | ) 24 | @click.option( 25 | "--copy-use-tempdir/--no-copy-use-tempdir", 26 | default=True, 27 | is_flag=True, 28 | help="Copy the source database files to a temporary directory, then connect to the copied files", 29 | ) 30 | @click.option( 31 | "--copy-retry", 32 | default=COPY_RETRY_DEFAULT, 33 | type=int, 34 | show_default=False, 35 | help="If the files change while copying to the temporary directory, retry times", 36 | ) 37 | @click.option( 38 | "--copy-retry-strict/--no-copy-retry-strict", 39 | default=True, 40 | is_flag=True, 41 | help="Throws an error if this fails to safely copy the database files --copy-retry times", 42 | ) 43 | @click.argument( 44 | "SOURCE_DATABASE", 45 | required=True, 46 | type=click.Path(exists=True, file_okay=True, dir_okay=False, path_type=Path), 47 | ) 48 | @click.argument("DESTINATION", required=True, type=click.Path(path_type=Path)) 49 | def main( 50 | debug: bool, 51 | wal_checkpoint: bool, 52 | copy_use_tempdir: bool, 53 | copy_retry: int, 54 | copy_retry_strict: bool, 55 | source_database: Path, 56 | destination: Path, 57 | ) -> None: 58 | """ 59 | SOURCE_DATABASE is the database to copy 60 | 61 | \b 62 | DESTINATION is where to write the database. If a directory, uses 63 | the SOURCE_DATABASE name. If a file, the directory must exist, 64 | and the destination file must not already exist (to prevent 65 | possibly overwriting old data) 66 | """ 67 | if debug: 68 | setup(logging.DEBUG) 69 | 70 | source_database = source_database.absolute() 71 | dest: Path 72 | if destination.exists(): 73 | if destination.is_dir(): 74 | dest = (destination / source_database.name).absolute() 75 | if dest.exists(): 76 | click.echo( 77 | f"Computed DESTINATION '{dest}' using SOURCE_DATABASE name already exists", 78 | err=True, 79 | ) 80 | sys.exit(1) 81 | elif destination.is_file(): 82 | click.echo(f"Target DESTINATION already exists: '{destination}'", err=True) 83 | sys.exit(1) 84 | else: 85 | click.echo( 86 | f"Target DESTINATION '{destination}' is not a directory or a file", 87 | err=True, 88 | ) 89 | sys.exit(1) 90 | else: 91 | # doesn't exist, check if parent dir exists 92 | if not destination.parent.exists(): 93 | click.echo( 94 | f"Parent directory '{destination.parent}' does not exist", err=True 95 | ) 96 | sys.exit(1) 97 | dest = destination.absolute() 98 | 99 | if not copy_use_tempdir: 100 | click.echo( 101 | "Warning: Copying a database in use by another application without copying to a temporary directory could result in corrupt data or incorrect results. Only use this if you know the underlying database is not being modified", 102 | err=True, 103 | ) 104 | sqlite_backup( 105 | source_database, 106 | dest, 107 | wal_checkpoint=wal_checkpoint, 108 | copy_use_tempdir=copy_use_tempdir, 109 | copy_retry=copy_retry, 110 | copy_retry_strict=copy_retry_strict, 111 | ) 112 | click.echo(f"Backed up {source_database} to {dest}", err=True) 113 | 114 | 115 | if __name__ == "__main__": 116 | main(prog_name="sqlite_backup") 117 | -------------------------------------------------------------------------------- /sqlite_backup/core.py: -------------------------------------------------------------------------------- 1 | import os 2 | import errno 3 | import sqlite3 4 | import filecmp 5 | import shutil 6 | 7 | from typing import ( 8 | Union, 9 | Optional, 10 | List, 11 | Tuple, 12 | Dict, 13 | Any, 14 | Iterator, 15 | Callable, 16 | ) 17 | from pathlib import Path 18 | from contextlib import contextmanager 19 | from tempfile import TemporaryDirectory 20 | 21 | from .log import logger 22 | 23 | 24 | PathIsh = Union[str, Path] 25 | 26 | CopyFunction = Callable[[str, str], bool] 27 | 28 | COPY_RETRY_DEFAULT: int = 100 29 | 30 | 31 | class SqliteBackupError(RuntimeError): 32 | """Generic error for the sqlite_backup module""" 33 | 34 | 35 | @contextmanager 36 | def sqlite_connect_immutable(db: PathIsh) -> Iterator[sqlite3.Connection]: 37 | # https://www.sqlite.org/draft/uri.html#uriimmutable 38 | conn: Union[sqlite3.Connection, None] = None 39 | try: 40 | with sqlite3.connect(f"file:{db}?immutable=1", uri=True) as conn: 41 | yield conn 42 | finally: 43 | if conn: 44 | conn.close() 45 | 46 | 47 | # https://github.com/karlicoss/promnesia/blob/1b26ccdf9be7c0ac8f8e6e9e4193647450548878/scripts/browser_history.py#L48 48 | def atomic_copy(src: str, dest: str) -> bool: 49 | """ 50 | Copy from src to dest. If src changes while copying to dest, retry till it is the same 51 | These are very few ways to truly guarantee a file is copied atomically, so this is the closest approximation 52 | 53 | This retries till the file doesn't change while we were copying it 54 | 55 | If the file did change (before the final copy, which succeeded) while we were copying it, this returns False 56 | """ 57 | # function-level succeeded value -- i.e., if while trying to copy 58 | # this this failed, it marks this as False. It still retries, but 59 | # this is to signify to copy_all_files that something changed while 60 | # we were copying, so we likely want to retry 61 | succeeded = True 62 | while True: 63 | shutil.copy(src, dest) 64 | if filecmp.cmp(src, dest, shallow=True): 65 | logger.debug( 66 | f"Copied from '{src}' to '{dest}' successfully; copied without file changing: {succeeded}" 67 | ) 68 | # succeeded, return whether or not this failed on any loop iteration 69 | return succeeded 70 | else: 71 | logger.debug(f"Failed to copy from '{src}' to '{dest}', retrying...") 72 | succeeded = False 73 | 74 | 75 | def glob_database_files(source_database: Path) -> List[Path]: 76 | """ 77 | List any of the temporary database files (and the database itself) 78 | """ 79 | files: List[Path] = [source_database] 80 | for temp_db_file in source_database.parent.glob(source_database.name + "-*"): 81 | # shm should be recreated from scratch -- safer not to copy perhaps 82 | # https://www.sqlite.org/tempfiles.html#shared_memory_files 83 | if temp_db_file.name.endswith("-shm"): 84 | continue 85 | files.append(temp_db_file) 86 | return files 87 | 88 | 89 | def copy_all_files( 90 | source_files: List[Path], 91 | temporary_dest: Path, 92 | copy_function: CopyFunction, 93 | retry: int, 94 | ) -> bool: 95 | """ 96 | Copy all files from source to directory 97 | This retries (up to 'retry' count) if any of the files change while any of the copies were copying 98 | 99 | Returns: 100 | True if it successfully copied and none of the files changing while it was copying 101 | False if it retied 'retry' times but files still changed as it was copying 102 | 103 | It still *has* copied the files, it just doesn't guarantee that the copies were atomic according to 104 | atomic_copy's definition of failure 105 | """ 106 | if not temporary_dest.is_dir(): 107 | raise ValueError(f"Expected a directory, received {temporary_dest}") 108 | sources = [str(s) for s in source_files] 109 | destinations = [str(temporary_dest / s.name) for s in source_files] 110 | # (source, destination) for each file 111 | logger.debug(f"Source database files: '{sources}'") 112 | logger.debug(f"Temporary Destination database files: '{destinations}'") 113 | copies: List[Tuple[str, str]] = list(zip(sources, destinations)) 114 | while retry >= 0: 115 | # if all files successfully copied according to atomic_copy's definition 116 | if all([copy_function(s, d) for s, d in copies]): 117 | return True 118 | retry -= 1 119 | logger.debug( 120 | f"Failed to copy all files without at least one changing, retrying ({retry} left)" 121 | ) 122 | return False 123 | 124 | 125 | def sqlite_backup( 126 | source: PathIsh, 127 | destination: Optional[PathIsh] = None, 128 | *, 129 | wal_checkpoint: bool = True, 130 | copy_use_tempdir: bool = True, 131 | copy_retry: int = COPY_RETRY_DEFAULT, 132 | copy_retry_strict: bool = True, 133 | sqlite_connect_kwargs: Optional[Dict[str, Any]] = None, 134 | sqlite_backup_kwargs: Optional[Dict[str, Any]] = None, 135 | copy_function: Optional[CopyFunction] = None, 136 | ) -> Optional[sqlite3.Connection]: 137 | """ 138 | 'Snapshots' the source database and opens by making a deep copy of it, including journal/WAL files 139 | 140 | If you don't specify a 'destination', this copies the database 141 | into memory and returns an active connection to that. 142 | 143 | If you specify a 'destination', this copies the 'source' to the 'destination' file, 144 | instead of into memory 145 | 146 | If 'copy_use_tempdir' is True, this copies the relevant database files to a temporary directory, 147 | and then copies it into destination using sqlite3.Connection.backup. So, by default, the steps are: 148 | 149 | - Copy the source database files to a temporary directory 150 | - create a connection to the temporary database files 151 | - create a temporary 'destination' connection in memory 152 | - backup from the temporary directory database connection to the destination 153 | - cleanup; close temporary connection and remove temporary files 154 | - returns the 'destination' connection, which has the data stored in memory 155 | 156 | If you instead specify a path as the 'destination', this creates the 157 | database file there, and returns nothing (If you want access to the 158 | destination database, open a connection afterwards with sqlite3.connect) 159 | 160 | 'wal_checkpoint' runs a 'PRAGMA wal_checkpoint(TRUNCATE)' after it writes to 161 | the destination database, which truncates the write ahead log to 0 bytes. 162 | Typically the WAL is removed when the database is closed, but particular builds of sqlite 163 | or sqlite compiled with SQLITE_DBCONFIG_NO_CKPT_ON_CLOSE may prevent that -- 164 | so the checkpoint exists to ensure there are no temporary files leftover 165 | 166 | See: 167 | https://sqlite.org/forum/forumpost/1fdfc1a0e7 168 | https://www.sqlite.org/c3ref/c_dbconfig_enable_fkey.html 169 | 170 | if 'copy_use_tempdir' is False, that skips the copy, which increases the chance that this fails 171 | (if there's a lock (SQLITE_BUSY, SQLITE_LOCKED)) on the source database, 172 | which is what we're trying to avoid in the first place 173 | 174 | 'copy_retry' (default 100) specifies how many times we should attempt to copy the database files, if they 175 | happen to change while we're copying. If 'copy_retry_strict' is True, this throws an error if it failed 176 | to safely copy the database files 'copy_retry' times 177 | 178 | 'sqlite_connect_kwargs' and 'sqlite_backup_kwargs' let you pass additional kwargs 179 | to the connect (when copying from the source database) and the backup (when copying 180 | from the source (or database in the tempdir) to the destination 181 | """ 182 | source_path = Path(source) 183 | copy_from: Path 184 | 185 | if not source_path.exists(): 186 | raise FileNotFoundError( 187 | errno.ENOENT, os.strerror(errno.ENOENT), str(source_path) 188 | ) 189 | 190 | if destination is not None: 191 | if source_path == Path(destination): 192 | raise ValueError( 193 | f"'source' and 'destination' '{source_path}' can't be the same" 194 | ) 195 | 196 | if sqlite_connect_kwargs is None: 197 | sqlite_connect_kwargs = {} 198 | 199 | if sqlite_backup_kwargs is None: 200 | sqlite_backup_kwargs = {} 201 | 202 | if copy_function is None: 203 | copy_function = atomic_copy 204 | 205 | with TemporaryDirectory() as td: 206 | # if we should copy files to the temporary dir 207 | # could use a nullcontext but is harder to read 208 | if copy_use_tempdir: 209 | tdir = Path(td) 210 | succeeded = copy_all_files( 211 | glob_database_files(source_path), 212 | temporary_dest=tdir, 213 | copy_function=copy_function, 214 | retry=copy_retry, 215 | ) 216 | if not succeeded and copy_retry_strict: 217 | raise SqliteBackupError( 218 | f"While in strict mode, this failed to copy all files without any of them changing {copy_retry} times. Increase 'copy_retry' or disable 'copy_retry_strict'" 219 | ) 220 | copy_from = tdir / source_path.name 221 | assert ( 222 | copy_from.exists() 223 | ), f"Expected copied database to exist at {copy_from} in temporary directory" 224 | else: 225 | copy_from = source_path 226 | 227 | target_connection: sqlite3.Connection 228 | if destination is None: 229 | logger.debug("No destination provided, copying data to ':memory:'") 230 | target_connection = sqlite3.connect(":memory:") 231 | else: 232 | if not isinstance(destination, (str, Path)): 233 | raise TypeError( 234 | f"Unexpected 'destination' type, expected path like object, got {type(destination)}" 235 | ) 236 | target_connection = sqlite3.connect(destination) 237 | 238 | logger.debug( 239 | f"Running backup, from '{copy_from}' to '{destination or 'memory'}'" 240 | ) 241 | with sqlite3.connect(copy_from, **sqlite_connect_kwargs) as conn: 242 | if copy_use_tempdir: 243 | # workaround for leftover wal/shm files on some macos systems 244 | # see https://github.com/purarue/sqlite_backup/issues/9 245 | conn.execute("PRAGMA journal_mode=DELETE") 246 | conn.backup(target_connection, **sqlite_backup_kwargs) 247 | 248 | if destination is not None and wal_checkpoint: 249 | with target_connection: 250 | target_connection.execute("PRAGMA wal_checkpoint(TRUNCATE);") 251 | 252 | conn.close() 253 | 254 | # if there was no target, then we copied into memory 255 | # don't close so that the user has access to the memory 256 | # otherwise, the data is just copied and lost 257 | if destination is None: 258 | return target_connection 259 | else: 260 | target_connection.close() 261 | return None 262 | -------------------------------------------------------------------------------- /sqlite_backup/log.py: -------------------------------------------------------------------------------- 1 | """ 2 | Setup logging for this module 3 | """ 4 | 5 | import os 6 | import logging 7 | 8 | from typing import Optional 9 | 10 | from logzero import setup_logger # type: ignore[import] 11 | 12 | DEFAULT_LEVEL = logging.WARNING 13 | 14 | # global access to the logger 15 | logger: logging.Logger 16 | 17 | 18 | # logzero handles adding handling/modifying levels fine 19 | # can be imported/configured multiple times 20 | def setup(level: Optional[int] = None) -> logging.Logger: 21 | chosen_level = level or int(os.environ.get("SQLITE_BACKUP_LOGS", DEFAULT_LEVEL)) 22 | lgr: logging.Logger = setup_logger(name=__package__, level=chosen_level) 23 | return lgr 24 | 25 | 26 | # runs the first time this file is run, setup can be imported/run multiple times in other places 27 | logger = setup() 28 | -------------------------------------------------------------------------------- /sqlite_backup/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/purarue/sqlite_backup/516957083786d7c4411f70e22bb25bbc36eae36e/sqlite_backup/py.typed -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The run_in_thread function is used to automate and sanity 3 | check running tests in this module 4 | """ 5 | 6 | from typing import Callable 7 | from threading import Thread 8 | 9 | 10 | def run_in_thread(func: Callable[[], None], *, allow_unwrapped: bool = False) -> None: 11 | """ 12 | helper function which runs a function in a separate thread 13 | so that we have no possibility of re-using connections 14 | 15 | we could set check_same_thread on sqlite_with_wal to be False, 16 | but often applications won't give us that luxury, so we should 17 | test with 'default' parameters 18 | """ 19 | assert callable(func), "Didn't pass a function to run_in_thread" 20 | if not allow_unwrapped: 21 | # https://github.com/bjoluc/pytest-reraise/blob/a781930ea3af826d0cbc6a8b3411c0a5db063e36/pytest_reraise/reraise.py#L110 22 | # make sure this is a wrapped func 23 | assert ( 24 | func.__code__.co_name == "wrapped" 25 | ), "Didn't match wrapped function name -- try to wrap this in @reraise.wrap or pass allow_unwrapped if trying to catch an error" 26 | 27 | # run in a separate thread 28 | t = Thread(target=func) 29 | t.start() 30 | t.join() 31 | -------------------------------------------------------------------------------- /tests/test_sqlite_backup.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import sqlite3 3 | from pathlib import Path 4 | from typing import Iterator, Any, Set 5 | 6 | import pytest 7 | 8 | # make sure errors in threads get raised by pytest properly 9 | # https://github.com/bjoluc/pytest-reraise 10 | from pytest_reraise import Reraise # type: ignore[import] 11 | 12 | from sqlite_backup.core import ( 13 | sqlite_connect_immutable, 14 | sqlite_backup, 15 | atomic_copy, 16 | SqliteBackupError, 17 | ) 18 | 19 | from . import run_in_thread 20 | 21 | 22 | # tmp_path is test-scoped, not function-scoped 23 | 24 | 25 | @pytest.fixture(scope="function") 26 | def tmp_path_f( 27 | request: Any, tmp_path_factory: pytest.TempPathFactory 28 | ) -> Iterator[Path]: 29 | """ 30 | Create a new tempdir every time this runs 31 | """ 32 | # request is a _pytest.fixture.SubRequest, function that called this 33 | assert isinstance(request.function.__name__, str), str(request) 34 | assert request.function.__name__.strip(), str(request) 35 | yield tmp_path_factory.mktemp(request.function.__name__, numbered=True) 36 | 37 | 38 | def _list_files(p: Path) -> Set[Path]: 39 | return {f for f in p.iterdir() if f.is_file()} 40 | 41 | 42 | @pytest.fixture() 43 | def sqlite_with_wal( 44 | tmp_path_f: Path, 45 | ) -> Iterator[Path]: 46 | """ 47 | In a temporary directory, create a database with a basic table 48 | insert 5 items into it and let sqlite3.connection close the connection 49 | then, open in PRAGMA journal_mode=wal;, which writes to the temporary 50 | write-ahead log, instead of the main database 51 | 52 | This is similar to what applications may be doing while changes 53 | are uncommitted 54 | See https://sqlite.org/wal.html 55 | """ 56 | db = tmp_path_f / "sqlite_with_wal" / "db.sqlite" 57 | db.parent.mkdir(exist_ok=False) 58 | # write a bit 59 | with sqlite3.connect(str(db)) as conn: 60 | conn.execute("CREATE TABLE testtable (col)") 61 | for i in range(5): 62 | conn.execute("INSERT INTO testtable (col) VALUES (?)", str(i)) 63 | 64 | # write more in WAL mode 65 | with sqlite3.connect(str(db)) as conn_db: 66 | conn.execute("PRAGMA journal_mode=wal;") 67 | for i in range(5, 10): 68 | conn_db.execute("INSERT INTO testtable (col) VALUES (?)", str(i)) 69 | conn_db.execute("COMMIT") 70 | 71 | # make sure it has unflushed stuff in wal 72 | wals = list(db.parent.glob("*-wal")) 73 | assert len(wals) == 1 74 | 75 | # make sure -wal file is not empty 76 | assert wals[0].stat().st_size > 0 77 | 78 | yield db 79 | 80 | conn.close() 81 | conn_db.close() 82 | 83 | 84 | def test_open_asis(sqlite_with_wal: Path, reraise: Reraise) -> None: 85 | """ 86 | This works, but leaves potential for DB corruption since we have 87 | multiple connections to the same database table on different threads 88 | """ 89 | 90 | @reraise.wrap 91 | def _run() -> None: 92 | with sqlite3.connect(sqlite_with_wal) as conn: 93 | assert len(list(conn.execute("SELECT * FROM testtable"))) == 10 94 | 95 | run_in_thread(_run) 96 | 97 | 98 | def test_do_copy(sqlite_with_wal: Path, tmp_path_f: Path, reraise: Reraise) -> None: 99 | """ 100 | a copy of the database itself without the WAL can only read previously committed stuff 101 | """ 102 | 103 | @reraise.wrap 104 | def _run() -> None: 105 | cdb = Path(tmp_path_f) / "dbcopy.sqlite" 106 | shutil.copy(sqlite_with_wal, cdb) 107 | with sqlite3.connect(cdb) as conn_copy: 108 | assert len(list(conn_copy.execute("SELECT * FROM testtable"))) == 5 109 | 110 | run_in_thread(_run) 111 | 112 | 113 | # https://www.sqlite.org/c3ref/open.html 114 | # When immutable is set, SQLite assumes that the database file cannot be 115 | # changed, even by a process with higher privilege, and so the database is opened 116 | # read-only and all locking and change detection is disabled. Caution: Setting 117 | # the immutable property on a database file that does in fact change can result 118 | # in incorrect query results and/or SQLITE_CORRUPT errors. See also: 119 | # SQLITE_IOCAP_IMMUTABLE. 120 | def test_do_immutable(sqlite_with_wal: Path, reraise: Reraise) -> None: 121 | """ 122 | a copy of the database in immutable mode 123 | 124 | *IF* the application changed this query was executing, this has the opportunity 125 | to corrupt or fetch incorrect results -- this only works because 126 | we control sqlite_with_wal and know its not going to change 127 | 128 | this also doesn't read anything from the WAL -- only the database 129 | """ 130 | 131 | @reraise.wrap 132 | def _run() -> None: 133 | with sqlite_connect_immutable(sqlite_with_wal) as conn_imm: 134 | assert len(list(conn_imm.execute("SELECT * FROM testtable"))) == 5 135 | 136 | run_in_thread(_run) 137 | 138 | 139 | def test_no_copy_use_tempdir(sqlite_with_wal: Path, reraise: Reraise) -> None: 140 | """ 141 | similarly, we could use sqlite_backup without copy_use_tempdir 142 | which would mean this would run Connection.backup directly 143 | on the live database. this should work in this case, because 144 | we know its not changing, but this is prone to data loss 145 | """ 146 | 147 | @reraise.wrap 148 | def _run() -> None: 149 | conn = sqlite_backup(sqlite_with_wal, copy_use_tempdir=False) 150 | 151 | assert conn is not None 152 | assert len(list(conn.execute("SELECT * from testtable"))) == 10 153 | 154 | run_in_thread(_run) 155 | 156 | 157 | def test_do_copy_and_open(sqlite_with_wal: Path, reraise: Reraise) -> None: 158 | """ 159 | main usage of the sqlite_backup function, this copies all database files 160 | to a temporary directory, and then reads it into memory using 161 | pythons Connection.backup 162 | """ 163 | 164 | @reraise.wrap 165 | def _run() -> None: 166 | conn = sqlite_backup(sqlite_with_wal) # copy to memory 167 | assert conn is not None 168 | assert len(list(conn.execute("SELECT * FROM testtable"))) == 10 169 | conn.close() 170 | 171 | run_in_thread(_run) 172 | 173 | 174 | def test_copy_to_another_file( 175 | sqlite_with_wal: Path, reraise: Reraise, tmp_path_f: Path 176 | ) -> None: 177 | """ 178 | Copy from the sqlite_with_wal to another database file -- this 179 | is pretty similar to test_do_copy_and_open, it just doesn't copy to memory 180 | 181 | We can then open the copied database to ensure it has all 10 records 182 | 183 | sqlite_with_wal -> temporary directory -> 184 | temp_database.backup(other_database) -> sqlite3.connect(other_database) 185 | """ 186 | 187 | @reraise.wrap 188 | def _run() -> None: 189 | destination_database = tmp_path_f / "db.sqlite" 190 | conn = sqlite_backup( 191 | sqlite_with_wal, destination_database, wal_checkpoint=False 192 | ) 193 | assert conn is None # the database connection is closed 194 | 195 | with sqlite3.connect(destination_database) as dest_conn: 196 | assert len(list(dest_conn.execute("SELECT * FROM testtable"))) == 10 197 | dest_conn.close() 198 | 199 | # make sure no -wal/-shm files exist 200 | assert _list_files(tmp_path_f) == {destination_database} 201 | 202 | run_in_thread(_run) 203 | 204 | 205 | def test_backup_with_checkpoint( 206 | sqlite_with_wal: Path, reraise: Reraise, tmp_path_f: Path 207 | ) -> None: 208 | """ 209 | Copy from the sqlite_with_wal to another database file 210 | run a wal_checkpoint to make sure that works 211 | 212 | test this worked by opening it in immutable mode 213 | """ 214 | 215 | @reraise.wrap 216 | def _run() -> None: 217 | destination_database = tmp_path_f / "db.sqlite" 218 | conn = sqlite_backup(sqlite_with_wal, destination_database, wal_checkpoint=True) 219 | assert conn is None # the database connection is closed 220 | 221 | # no -wal/-shm files exist after the connection.backup 222 | assert _list_files(tmp_path_f) == {destination_database} 223 | 224 | # should be able to read all data in immutable mode 225 | with sqlite_connect_immutable(destination_database) as dest_conn: 226 | assert len(list(dest_conn.execute("SELECT * FROM testtable"))) == 10 227 | dest_conn.close() 228 | 229 | # no -wal/-shm files exist after opening in immutable, since 230 | # it acts as if the data is on a read-only volume 231 | assert _list_files(tmp_path_f) == {destination_database} 232 | 233 | run_in_thread(_run) 234 | 235 | 236 | def test_backup_without_checkpoint( 237 | sqlite_with_wal: Path, reraise: Reraise, tmp_path_f: Path 238 | ) -> None: 239 | """ 240 | similar to test_copy_vacuum, if backup is run without a wal_checkpoint, 241 | then connecting to the database with immutable=1 doesn't pick up anything from the -wal 242 | """ 243 | 244 | @reraise.wrap 245 | def _run() -> None: 246 | destination_database = tmp_path_f / "db.sqlite" 247 | conn = sqlite_backup( 248 | sqlite_with_wal, destination_database, wal_checkpoint=False 249 | ) 250 | assert conn is None # the database connection is closed 251 | 252 | # even without a checkpoint, should be no WAL in destination 253 | assert _list_files(tmp_path_f) == {destination_database} 254 | 255 | # both immutable and regular connections should read all the data 256 | with sqlite_connect_immutable(destination_database) as imm: 257 | assert len(list(imm.execute("SELECT * FROM testtable"))) == 10 258 | imm.close() 259 | 260 | with sqlite3.connect(destination_database) as reg_conn: 261 | assert len(list(reg_conn.execute("SELECT * FROM testtable"))) == 10 262 | reg_conn.close() 263 | 264 | # no -wal/-shm files should exist, after closing the reg_conn connection 265 | assert _list_files(tmp_path_f) == {destination_database} 266 | 267 | run_in_thread(_run) 268 | 269 | 270 | def test_database_doesnt_exist(tmp_path_f: Path, reraise: Reraise) -> None: 271 | """ 272 | basic test to make sure sqlite_backup fails if db doesn't exist 273 | """ 274 | 275 | db = str(tmp_path_f / "db.sqlite") 276 | 277 | def _run() -> None: 278 | with reraise: 279 | sqlite_backup(db) 280 | 281 | run_in_thread(_run, allow_unwrapped=True) 282 | 283 | err = reraise.reset() # type: ignore 284 | assert isinstance(err, FileNotFoundError) 285 | assert err.filename == db 286 | assert "No such file or directory" in err.strerror 287 | 288 | 289 | def test_copy_retry_strict(sqlite_with_wal: Path, reraise: Reraise) -> None: 290 | """ 291 | Test copy_retry_strict, e.g., if the file is constantly being written to and 292 | this fails to copy, this should raise an error 293 | """ 294 | 295 | def _run() -> None: 296 | def atomic_copy_failed(src: str, dest: str) -> bool: 297 | atomic_copy(src, dest) 298 | return False 299 | 300 | with reraise: 301 | sqlite_backup( 302 | sqlite_with_wal, 303 | copy_retry_strict=True, 304 | copy_function=atomic_copy_failed, 305 | ) 306 | 307 | run_in_thread(_run, allow_unwrapped=True) 308 | 309 | err = reraise.reset() # type: ignore 310 | assert isinstance(err, SqliteBackupError) 311 | assert ( 312 | "this failed to copy all files without any of them changing 100 times" 313 | in str(err) 314 | ) 315 | 316 | 317 | def test_copy_different_source_and_dest( 318 | sqlite_with_wal: Path, reraise: Reraise 319 | ) -> None: 320 | """ 321 | test to make sure if source == destination, that throws an error 322 | """ 323 | 324 | def _run() -> None: 325 | with reraise: 326 | sqlite_backup(sqlite_with_wal, sqlite_with_wal) 327 | 328 | run_in_thread(_run, allow_unwrapped=True) 329 | 330 | err = reraise.reset() # type: ignore 331 | assert isinstance(err, ValueError) 332 | assert "'source' and 'destination'" in str(err) 333 | assert "can't be the same" in str(err) 334 | -------------------------------------------------------------------------------- /tests/test_threads.py: -------------------------------------------------------------------------------- 1 | """ 2 | Me getting used to handling thread exceptions using 3 | https://github.com/bjoluc/pytest-reraise 4 | """ 5 | 6 | 7 | import pytest 8 | from pytest_reraise import Reraise # type: ignore[import] 9 | 10 | from . import run_in_thread 11 | 12 | 13 | # a sanity check to make sure that failed assertions 14 | # in wrapped threads raise errors 15 | def test_thread_wrapper_none() -> None: 16 | def _run_no_wrapper() -> None: 17 | assert True 18 | 19 | with pytest.raises(AssertionError) as exc_info: 20 | run_in_thread(_run_no_wrapper) 21 | 22 | assert "Didn't match wrapped function name" in str(exc_info.value) 23 | 24 | 25 | def test_thread_wrapper_has(reraise: Reraise) -> None: 26 | @reraise.wrap 27 | def _run_with_wrapper() -> None: 28 | assert True 29 | 30 | run_in_thread(_run_with_wrapper) 31 | 32 | 33 | # https://github.com/bjoluc/pytest-reraise#accessing-and-modifying-exceptions 34 | def test_thread_raises(reraise: Reraise) -> None: 35 | def _run() -> None: 36 | with reraise: 37 | assert False, "Raised error here" 38 | 39 | run_in_thread(_run, allow_unwrapped=True) 40 | 41 | # Return the captured exception 42 | assert type(reraise.exception) is AssertionError 43 | 44 | # This won't do anything, since an exception has already been captured 45 | reraise.exception = Exception() 46 | 47 | # Return the exception and set reraise.exception to None 48 | err = reraise.reset() # type: ignore 49 | assert isinstance(err, AssertionError) 50 | assert "Raised error here" in str(err) 51 | 52 | # Reraise will not fail the test case 53 | assert reraise.exception is None 54 | --------------------------------------------------------------------------------