├── tests ├── __init__.py ├── index │ ├── __init__.py │ ├── test_index.py │ ├── test_database.py │ ├── test_index__state.py │ ├── test_index__search.py │ ├── test_index__scan.py │ ├── test_index__restore.py │ └── test_models.py ├── storage │ ├── __init__.py │ ├── test_base.py │ ├── test_local.py │ └── test_s3.py ├── test_documentation.py ├── test_mocks.py ├── test_crypto.py ├── test_config.py ├── mocks.py └── test_commands.py ├── serac ├── __init__.py ├── storage │ ├── __init__.py │ ├── local.py │ ├── base.py │ └── s3.py ├── __main__.py ├── index │ ├── __init__.py │ ├── database.py │ ├── models.py │ └── index.py ├── crypto.py ├── exceptions.py ├── reporter.py ├── config.py └── commands.py ├── .gitignore ├── MANIFEST.in ├── requirements.in ├── setup.cfg ├── tox.ini ├── .travis.yml ├── LICENSE ├── setup.py ├── requirements.txt └── README.rst /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/index/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/storage/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /serac/__init__.py: -------------------------------------------------------------------------------- 1 | VERSION = "0.0.2" 2 | -------------------------------------------------------------------------------- /serac/storage/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import Storage, storage_registry # noqa 2 | from .local import Local # noqa 3 | from .s3 import S3 # noqa 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .pytest_cache/* 3 | .mypy_cache/* 4 | .coverage 5 | htmlcov/* 6 | *.log 7 | .eggs 8 | build 9 | dist 10 | *.egg-info 11 | .tox -------------------------------------------------------------------------------- /serac/__main__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Invoke the command line when called directly 3 | """ 4 | from .commands import cli 5 | 6 | 7 | if __name__ == "__main__": 8 | cli(obj={}, prog_name="serac") 9 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst 2 | include LICENSE 3 | include tox.ini 4 | include setup.cfg 5 | recursive-include serac * 6 | recursive-include tests * 7 | global-exclude __pycache__ 8 | global-exclude *.pyc 9 | -------------------------------------------------------------------------------- /tests/test_documentation.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from doc8 import doc8 4 | 5 | 6 | def test_doc8(): 7 | result = doc8(paths=[os.getcwd()], extension=[".rst"]) 8 | 9 | assert result.total_errors == 0, result.report() 10 | -------------------------------------------------------------------------------- /tests/storage/test_base.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test serac/storage/base.py 3 | """ 4 | from serac.storage import storage_registry 5 | 6 | 7 | def test_registry__storage_classes_registered(): 8 | assert list(storage_registry.keys()) == ["local", "s3"] 9 | -------------------------------------------------------------------------------- /serac/index/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Database 3 | 4 | Load in order to ensure models are always ready for generate_mapping 5 | """ 6 | from . import database # noqa 7 | from .index import Changeset, Pattern, State, restore, scan, search # noqa 8 | from .models import Action, File # noqa 9 | -------------------------------------------------------------------------------- /requirements.in: -------------------------------------------------------------------------------- 1 | # Dev 2 | pip-tools 3 | ipdb 4 | pyfakefs 5 | pytest 6 | pytest-black 7 | pytest-cov 8 | pytest-flake8 9 | pytest-isort 10 | pytest-mock 11 | pytest-mypy 12 | typing_extensions 13 | -e git+https://github.com/radiac/pytest-freezegun.git@bugfix/class-based-tests-with-duration-regression#egg=pytest-freezegun 14 | coveralls 15 | 16 | # Docs 17 | -e git+https://github.com/radiac/doc8.git@feature/python-api#egg=doc8 18 | #sphinx 19 | #sphinx_rtd_theme 20 | 21 | # Deployment 22 | click 23 | peewee 24 | pyAesCrypt 25 | smart-open 26 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [aliases] 2 | test=pytest 3 | 4 | [tool:pytest] 5 | addopts = --black --isort --flake8 --mypy --cov=serac --cov-report=term --cov-report=html 6 | 7 | [isort] 8 | multi_line_output = 3 9 | line_length = 88 10 | known_django = django 11 | sections = FUTURE,STDLIB,DJANGO,THIRDPARTY,FIRSTPARTY,LOCALFOLDER 12 | include_trailing_comma = True 13 | lines_after_imports = 2 14 | 15 | [flake8] 16 | max-line-length = 88 17 | ignore = E501,W503,E203 18 | 19 | [mypy] 20 | ignore_missing_imports = True 21 | 22 | [doc8] 23 | max-line-length = 88 24 | ignore-path = *.txt,.tox 25 | -------------------------------------------------------------------------------- /serac/crypto.py: -------------------------------------------------------------------------------- 1 | """ 2 | Light wrapper around the encryption library 3 | """ 4 | from typing import IO 5 | 6 | from pyAesCrypt import decryptStream, encryptStream 7 | 8 | 9 | # Encryption/decryption buffer size - 64K 10 | BUFFER_SIZE = 64 * 1024 11 | 12 | 13 | def encrypt(source: IO[bytes], destination: IO[bytes], password: str) -> None: 14 | encryptStream(source, destination, password, BUFFER_SIZE) 15 | 16 | 17 | def decrypt( 18 | source: IO[bytes], destination: IO[bytes], password: str, source_size: int 19 | ) -> None: 20 | decryptStream(source, destination, password, BUFFER_SIZE, source_size) 21 | -------------------------------------------------------------------------------- /serac/exceptions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Serac exceptions 3 | """ 4 | 5 | 6 | class SeracException(Exception): 7 | msg = "Serac exception" 8 | short = "error" 9 | 10 | def __init__(self, msg=None, short=None): 11 | if msg is not None: 12 | self.msg = msg 13 | if short is not None: 14 | self.short = short 15 | 16 | def __str__(self): 17 | return self.msg 18 | 19 | 20 | class ArchiveUnavailable(SeracException): 21 | """ 22 | Used when retrieving objects from storage when the archived object is unavailable, 23 | ie is frozen in S3 Glacier 24 | """ 25 | 26 | msg = "Archived object is not currently available" 27 | short = "object unavailable" 28 | 29 | 30 | class FileExists(SeracException): 31 | """ 32 | Used when trying to write to a path 33 | """ 34 | 35 | msg = "File already exists" 36 | short = "file exists" 37 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = 3 | clean 4 | py37 5 | report 6 | 7 | [testenv] 8 | skipsdist=True 9 | usedevelop=True 10 | passenv = TRAVIS TRAVIS_JOB_ID TRAVIS_BRANCH 11 | setenv = 12 | PYTHONWARNINGS=default 13 | COVERAGE_FILE=.coverage.tox.{envname} 14 | TOXENV={envname} 15 | 16 | basepython = 17 | py37: python3.7 18 | 19 | deps = 20 | -rrequirements.txt 21 | 22 | commands = 23 | python setup.py test {posargs:} 24 | -coveralls 25 | 26 | 27 | [testenv:clean] 28 | basepython = python3.7 29 | deps = coverage 30 | setenv = 31 | COVERAGE_FILE=.coverage.tox 32 | commands = 33 | -python {envbindir}/coverage combine 34 | -python {envbindir}/coverage erase 35 | 36 | [testenv:report] 37 | basepython = python3.7 38 | deps = coverage 39 | setenv = 40 | COVERAGE_FILE=.coverage.tox 41 | commands = 42 | -python {envbindir}/coverage combine 43 | -python {envbindir}/coverage html 44 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: '3.7' 3 | install: 4 | - pip install tox 5 | script: 6 | - tox 7 | env: 8 | - TOXENV=py37 DEPLOY=true 9 | deploy: 10 | provider: pypi 11 | user: radiac 12 | password: 13 | secure: tlRO97E6XqPJdgzA5UiP+QlQMh5eWRcAEN0hGrOkxCk9UT2VE8nwlWXJw2q2Pm8dMmopOGFYSIM10Ljtj1PxLTzkO8+QY1a1Y6t1bSJqSkTszD0lNXrBTz9DvqfCqZMJMTl/wexQ51dYVTM5W0XB/8ACnl5Pzthl35CUOuuxXggGXaCwaAT0i5swNFSsMgHXQI/WbhEwpcJp4gyJaNMHRxjLOiEtQdyEezm5KlhChTi1b5Bb+HCvTMS+1pXwy/XXvnYlWkdGhBu4J0T95+bt1m0MSwO0wN0Qq1CBEjJ+kcl+fcPajs6HYqEuIQFU+ZWgyAOKutAk4Xm/XZMF0ogQi7SZJ8Qnih/4Sr9CbrJ6obJne+94GRSjtLzyklwSCBqa/KJFnYWBNy66EAh+LxoWmW0sQ4WWgkvwUKQwYW3QTM0RLI4fEjtw9+wFL2GfwRY5oEf1tpKW5cge681hUoaYhoQ/1HTGx+oWcPFr6HdlM9Je+dQHMCakl925c541hzqjALtf2NRYteSk3RP0O+Dyfsa9kaxqSwuJBo5sInO8NkjF0lg2ZBVxMC3aNa613eAjEhzNAhEBkMC9k/hOyUcY5FaP1Gnr6qDcK8LcKz175okTrLFMAfRmyQbxsuBhAj093neQJ5+A4Abaa67uXh6rjHfroA+hmQpzhq0oL2uMaiE= 14 | on: 15 | tags: true 16 | distributions: sdist bdist_wheel 17 | repo: radiac/serac 18 | condition: "$DEPLOY = true" 19 | -------------------------------------------------------------------------------- /tests/test_mocks.py: -------------------------------------------------------------------------------- 1 | """ 2 | Confirm that non-trivial test mocks function as expected 3 | """ 4 | from pathlib import Path 5 | 6 | from .mocks import FilesystemTest 7 | 8 | 9 | class TestFakeFs(FilesystemTest): 10 | """ 11 | Confirm expectations about pyfakefs 12 | """ 13 | 14 | def test_confirm_pyfakefs_base_class__fake_fs_works(self, fs): 15 | self.mock_fs(fs) 16 | assert Path("/src").is_dir() 17 | assert Path("/src/one.txt").is_file() 18 | assert Path("/src/two.txt").is_file() 19 | assert Path("/src/dir").is_dir() 20 | assert Path("/src/dir/three.txt").is_file() 21 | assert Path("/src/dir/four.txt").is_file() 22 | assert Path("/src/dir/subdir").is_dir() 23 | assert Path("/src/dir/subdir/five.txt").is_file() 24 | assert Path("/alt").is_dir() 25 | assert Path("/alt/six.txt").is_file() 26 | assert Path("/alt/seven.txt").is_file() 27 | 28 | with open("/src/one.txt") as f: 29 | contents = f.read() 30 | assert contents == "one" 31 | -------------------------------------------------------------------------------- /serac/storage/local.py: -------------------------------------------------------------------------------- 1 | """ 2 | Local storage 3 | """ 4 | from __future__ import annotations 5 | 6 | from pathlib import Path 7 | from typing import IO, TYPE_CHECKING, Any, Dict 8 | 9 | from .base import Storage 10 | 11 | 12 | if TYPE_CHECKING: 13 | from configparser import ConfigParser # pragma: no cover 14 | 15 | 16 | class Local(Storage): 17 | """ 18 | Local storage 19 | """ 20 | 21 | path: Path 22 | 23 | @classmethod 24 | def parse_config(cls, config: ConfigParser) -> Dict[str, Any]: 25 | path = config.get("path", "") 26 | if not path: 27 | raise ValueError("Local storage requires a path") 28 | return {"path": Path(path)} 29 | 30 | def __init__(self, path: Path) -> None: 31 | self.path = path 32 | 33 | def get_size(self, archive_id: str) -> int: 34 | file: Path = self.path / archive_id 35 | return file.stat().st_size 36 | 37 | def read(self, archive_id: str) -> IO[bytes]: 38 | handle: IO[bytes] = open(self.path / archive_id, "rb") 39 | return handle 40 | 41 | def write(self, archive_id: str) -> IO[bytes]: 42 | handle: IO[bytes] = open(self.path / archive_id, "wb") 43 | return handle 44 | -------------------------------------------------------------------------------- /serac/reporter.py: -------------------------------------------------------------------------------- 1 | """ 2 | Report classes for realtime feedback on long-running tasks 3 | """ 4 | import sys 5 | from typing import IO 6 | 7 | 8 | class Reporter: 9 | """ 10 | Base reporter class 11 | """ 12 | 13 | file: str 14 | status: str 15 | 16 | def __init__(self, file: str, status: str): 17 | self.file = file 18 | self.status = status 19 | 20 | def update(self, status: str): 21 | self.status = status 22 | 23 | def complete(self, status: str): 24 | self.update(status) 25 | 26 | 27 | class NullReporter(Reporter): 28 | def __init__(self, file: str, status: str): 29 | pass 30 | 31 | def update(self, status: str): 32 | pass 33 | 34 | def complete(self, status: str): 35 | pass 36 | 37 | 38 | class StreamReporter(Reporter): 39 | """ 40 | Report to a stream 41 | """ 42 | 43 | stream: IO[str] 44 | 45 | def __init__(self, file: str, status: str): 46 | super().__init__(file, status) 47 | self.stream.write(f"{file}... {status}") 48 | 49 | def update(self, status: str): 50 | super().update(status) 51 | self.stream.write(f"\r{self.file}... {status} ") 52 | 53 | def complete(self, status: str): 54 | self.update(status) 55 | self.stream.write("\n") 56 | 57 | 58 | class StdoutReporter(StreamReporter): 59 | stream: IO[str] = sys.stdout 60 | -------------------------------------------------------------------------------- /tests/test_crypto.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test serac/crypto.py 3 | """ 4 | from pathlib import Path 5 | 6 | from serac.crypto import decrypt, encrypt 7 | 8 | from .mocks import FilesystemTest 9 | 10 | 11 | class TestCrypto(FilesystemTest): 12 | """ 13 | Test Crypto operations 14 | """ 15 | 16 | def test_encrypt(self, fs): 17 | fs.create_file("/test/raw.txt", contents="value") 18 | with Path("/test/raw.txt").open("rb") as src: 19 | with Path("/test/encrypted.txt").open("wb") as dest: 20 | encrypt(src, dest, "secret") 21 | 22 | with Path("/test/raw.txt").open("r") as f: 23 | contents = f.read() 24 | assert contents == "value" 25 | 26 | with Path("/test/encrypted.txt").open("rb") as f: 27 | contents = f.read() 28 | assert contents != "" 29 | assert contents != "value" 30 | 31 | def test_decrypt(self, fs): 32 | # Encrypt so we can decrypt 33 | fs.create_file("/test/raw.txt", contents="value") 34 | with Path("/test/raw.txt").open("rb") as src: 35 | with Path("/test/encrypted.txt").open("wb") as dest: 36 | encrypt(src, dest, "secret") 37 | 38 | encrypted = Path("/test/encrypted.txt") 39 | with encrypted.open("rb") as src: 40 | with Path("/test/decrypted.txt").open("wb") as dest: 41 | decrypt(src, dest, "secret", encrypted.stat().st_size) 42 | 43 | with Path("/test/decrypted.txt").open("r") as f: 44 | contents = f.read() 45 | assert contents == "value" 46 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Serac is licensed under the BSD License 2 | ======================================= 3 | 4 | Copyright (c) 2019, Richard Terry, http://radiac.net/ 5 | All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 8 | 9 | Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 10 | Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 11 | Neither the name of the software nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 14 | -------------------------------------------------------------------------------- /tests/index/test_index.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test serac/index/index.py 3 | 4 | Feature tests are broken into additional files 5 | """ 6 | from datetime import datetime 7 | from pathlib import Path 8 | from time import time 9 | 10 | from pyfakefs.fake_filesystem import FakeFile 11 | 12 | from serac.index.index import Pattern, scan 13 | 14 | from ..mocks import DatabaseTest, FilesystemTest 15 | 16 | 17 | class TestIndexPattern: 18 | def test_pattern_eq__is_equal(self): 19 | assert Pattern("/foo") == Pattern("/foo") 20 | 21 | def test_pattern_eq__is_not_equal(self): 22 | assert Pattern("/foo") != Pattern("/bar") 23 | 24 | 25 | class IndexTestBase(DatabaseTest, FilesystemTest): 26 | """ 27 | Base class for use in other index tests 28 | """ 29 | 30 | def mock_initial(self, fs): 31 | self.mock_fs(fs) 32 | fs.create_dir("/dest") 33 | fs.create_dir("/retrieved") 34 | changeset = scan(includes=["/src/"]) 35 | changeset.commit(archive_config=self.get_archive_config()) 36 | 37 | def mock_update(self, fs): 38 | Path("/src/dir/three.txt").write_text("updated") 39 | FakeFile("/src/dir/three.txt", filesystem=fs).st_mtime = int(time()) 40 | changeset = scan(includes=["/src/"]) 41 | changeset.commit(archive_config=self.get_archive_config()) 42 | 43 | def mock_two_states(self, fs, freezer): 44 | initial_time = datetime(2001, 1, 1, 1, 1, 1) 45 | freezer.move_to(initial_time) 46 | self.mock_initial(fs) 47 | update_time = datetime(2001, 1, 1, 1, 1, 2) 48 | freezer.move_to(update_time) 49 | self.mock_update(fs) 50 | return initial_time, update_time 51 | -------------------------------------------------------------------------------- /tests/index/test_database.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test serac/index/database.py 3 | """ 4 | from pathlib import Path 5 | 6 | import pytest 7 | from peewee import CharField, SqliteDatabase 8 | 9 | from serac.index.database import ( 10 | Model, 11 | connect, 12 | create_db, 13 | disconnect, 14 | get_current_db, 15 | set_current_db, 16 | ) 17 | 18 | from ..mocks import MockDatabase, TmpFs 19 | 20 | 21 | def test_create(): 22 | # Database uses C libraries so doesn't work with pyfakefs 23 | test_db = SqliteDatabase(None) 24 | main_db = get_current_db() 25 | set_current_db(test_db) 26 | 27 | with TmpFs("index.sqlite") as filename: 28 | create_db(path=Path(filename)) 29 | 30 | # Restore to main db 31 | set_current_db(main_db) 32 | 33 | 34 | def test_connect(): 35 | with MockDatabase() as test_db: # noqa # assign to var to have it in scope 36 | 37 | class FakeModel(Model): 38 | name = CharField() 39 | 40 | FakeModel.create(name="test") 41 | 42 | 43 | def test_connect__does_not_exist__raises_exception(fs): 44 | # Stash main db and prep test db 45 | test_db = SqliteDatabase(None) 46 | main_db = get_current_db() 47 | set_current_db(test_db) 48 | 49 | with pytest.raises(ValueError) as e: 50 | connect(path=Path("/does/not/exist.sqlite")) 51 | assert str(e.value) == "Database does not exist" 52 | 53 | # Restore to main db 54 | set_current_db(main_db) 55 | 56 | 57 | def test_disconnect__closes(mocker): 58 | # Stash main db and create test db 59 | main_db = get_current_db() 60 | 61 | class MockDb: 62 | close = mocker.stub() 63 | 64 | mock_db = MockDb() 65 | set_current_db(mock_db) 66 | 67 | disconnect() 68 | mock_db.close.assert_called_once() 69 | 70 | set_current_db(main_db) 71 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from setuptools import find_packages, setup 4 | 5 | 6 | VERSION = "0.0.2" 7 | 8 | 9 | def read(fname): 10 | return open(os.path.join(os.path.dirname(__file__), fname)).read() 11 | 12 | 13 | setup( 14 | name="serac", 15 | version=VERSION, 16 | author="Richard Terry", 17 | author_email="code@radiac.net", 18 | description=("Incremental permanent data archiver with encryption"), 19 | license="BSD", 20 | keywords="backup archive glacier", 21 | url="http://radiac.net/projects/serac/", 22 | long_description=read("README.rst"), 23 | classifiers=[ 24 | "Development Status :: 4 - Beta", 25 | "Intended Audience :: System Administrators", 26 | "License :: OSI Approved :: BSD License", 27 | "Operating System :: OS Independent", 28 | "Topic :: System :: Archiving :: Backup", 29 | "Programming Language :: Python :: 3", 30 | "Programming Language :: Python :: 3.7", 31 | ], 32 | install_requires=["click", "peewee", "pyAesCrypt", "smart-open"], 33 | setup_requires=["pytest-runner"], 34 | tests_require=[ 35 | "pytest", 36 | "pytest-black", 37 | "pytest-cov", 38 | "pytest-flake8", 39 | "pytest-isort", 40 | "pytest-mypy", 41 | "pytest-mock", 42 | "pyfakefs", 43 | "typing_extensions", 44 | "doc8", 45 | ], 46 | dependency_links=[ 47 | # Bugfix awaiting response to PR: 48 | # https://github.com/ktosiek/pytest-freezegun/pull/17 49 | "git+https://github.com/radiac/pytest-freezegun.git@bugfix/class-based-tests-with-duration-regression#egg=pytest-freezegun", 50 | # Bugfix merged but awaiting deployment to PyPI: 51 | # https://github.com/PyCQA/doc8/pull/17 52 | "git+https://github.com/radiac/doc8.git@feature/python-api#egg=doc8", 53 | ], 54 | zip_safe=True, 55 | packages=find_packages(exclude=("docs", "tests*")), 56 | include_package_data=True, 57 | entry_points={"console_scripts": ["serac=serac.commands:cli"]}, 58 | ) 59 | -------------------------------------------------------------------------------- /tests/storage/test_local.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test serac/storage/s3.py 3 | """ 4 | from configparser import ConfigParser 5 | from io import BytesIO 6 | from pathlib import Path 7 | 8 | import pytest 9 | 10 | from serac import crypto 11 | from serac.storage import Local 12 | 13 | from ..mocks import FilesystemTest 14 | 15 | 16 | class TestLocal(FilesystemTest): 17 | def test_init__path_required(self): 18 | parser = ConfigParser() 19 | parser.read_string( 20 | """ 21 | [archive] 22 | storage = local 23 | """ 24 | ) 25 | 26 | with pytest.raises(ValueError) as e: 27 | Local.parse_config(parser["archive"]) 28 | assert str(e.value) == "Local storage requires a path" 29 | 30 | def test_store(self, fs): 31 | # This will be tested in a separate test, but we'll focus on the store aspect 32 | fs.create_file("/src/foo", contents="unencrypted") 33 | fs.create_dir("/store") 34 | storage = Local(path=Path("/store/")) 35 | 36 | # Encrypt and push to storage 37 | storage.store( 38 | local_path=Path("/src/foo"), archive_id=str("1"), password="secret" 39 | ) 40 | 41 | # Check file exists in /store/ 42 | dest_path = Path("/store/1") 43 | assert dest_path.is_file() 44 | 45 | # Check it has been encrypted and we can decrypt it 46 | decrypted = BytesIO() 47 | with dest_path.open("rb") as handle: 48 | crypto.decrypt(handle, decrypted, "secret", dest_path.stat().st_size) 49 | assert str(decrypted.getvalue(), "utf-8") == "unencrypted" 50 | 51 | def test_retrieve(self, fs): 52 | # Encrypt and deliver. This is tested in a separate test 53 | fs.create_file("/src/foo", contents="unencrypted") 54 | fs.create_dir("/store") 55 | fs.create_dir("/dest") 56 | storage = Local(path=Path("/store/")) 57 | storage.store(local_path=Path("/src/foo"), archive_id=str(1), password="secret") 58 | 59 | # Pull and decrypt from storage 60 | storage.retrieve( 61 | local_path=Path("/dest/bar"), archive_id=str(1), password="secret" 62 | ) 63 | 64 | # Check file exists in /dest/ 65 | dest_path = Path("/dest/bar") 66 | assert dest_path.is_file() 67 | 68 | # Check it has been decrypted 69 | with dest_path.open("r") as handle: 70 | content = handle.read() 71 | assert content == "unencrypted" 72 | -------------------------------------------------------------------------------- /serac/storage/base.py: -------------------------------------------------------------------------------- 1 | """ 2 | Storage base class 3 | """ 4 | from __future__ import annotations 5 | 6 | from configparser import ConfigParser 7 | from pathlib import Path 8 | from typing import IO, Any, Dict 9 | 10 | from ..crypto import decrypt, encrypt 11 | from ..exceptions import FileExists 12 | 13 | 14 | storage_registry = {} 15 | 16 | 17 | class StorageType(type): 18 | def __init__(cls, name, bases, attrs): 19 | super().__init__(name, bases, attrs) 20 | if attrs.get("abstract", False): 21 | return 22 | storage_registry[name.lower()] = cls 23 | 24 | 25 | class Storage(metaclass=StorageType): 26 | 27 | abstract = True 28 | 29 | def __init__(self, **kwargs: Dict[str, Any]): 30 | pass 31 | 32 | @classmethod 33 | def from_config(cls, config: ConfigParser) -> Storage: 34 | kwargs: Dict[str, Any] = cls.parse_config(config) 35 | return cls(**kwargs) 36 | 37 | @classmethod 38 | def parse_config(cls, config: ConfigParser) -> Dict[str, Any]: 39 | raise NotImplementedError( 40 | "Storage.parse_config must be implemented by subclasses" 41 | ) # pragma: no cover 42 | 43 | def store(self, local_path: Path, archive_id: str, password: str) -> None: 44 | source: IO[bytes] 45 | with local_path.open("rb") as source: 46 | destination = self.write(archive_id) 47 | encrypt(source=source, destination=destination, password=password) 48 | destination.close() 49 | 50 | def retrieve(self, local_path: Path, archive_id: str, password: str) -> None: 51 | # Don't want to retrieve it if it already exists 52 | if local_path.exists(): 53 | raise FileExists(local_path) 54 | 55 | source_size = self.get_size(archive_id) 56 | source = self.read(archive_id) 57 | destination: IO[bytes] 58 | with local_path.open("wb") as destination: 59 | decrypt(source, destination, password, source_size) 60 | source.close() 61 | 62 | def get_size(self, archive_id: str) -> int: 63 | """ 64 | Return the size of the file 65 | """ 66 | raise NotImplementedError( 67 | "Storage.get_size must be implemented by subclasses" 68 | ) # pragma: no cover 69 | 70 | def read(self, archive_id: str) -> IO[bytes]: 71 | """ 72 | Return an IO object to read from 73 | """ 74 | raise NotImplementedError( 75 | "Storage.read must be implemented by subclasses" 76 | ) # pragma: no cover 77 | 78 | def write(self, archive_id: str) -> IO[bytes]: 79 | """ 80 | Return an IO object to write to 81 | """ 82 | raise NotImplementedError( 83 | "Storage.write must be implemented by subclasses" 84 | ) # pragma: no cover 85 | -------------------------------------------------------------------------------- /serac/index/database.py: -------------------------------------------------------------------------------- 1 | """ 2 | Database object 3 | """ 4 | from __future__ import annotations 5 | 6 | from collections import defaultdict 7 | from enum import IntEnum 8 | from pathlib import Path 9 | from typing import Any, DefaultDict, Dict, List, Type 10 | 11 | from peewee import Database, IntegerField 12 | from peewee import Model as BaseModel 13 | from peewee import SqliteDatabase, TextField 14 | 15 | 16 | _db = SqliteDatabase(None) 17 | models: DefaultDict[Database, List[Model]] = defaultdict(list) 18 | 19 | 20 | def set_current_db(database: Database): 21 | global _db 22 | _db = database 23 | 24 | 25 | def get_current_db(): 26 | return _db 27 | 28 | 29 | def connect(path: Path, create: bool = False, database: Database = None): 30 | if database is None: 31 | database = get_current_db() 32 | 33 | if not create and not path.is_file(): 34 | raise ValueError("Database does not exist") 35 | 36 | database.init(str(path)) 37 | database.connect() 38 | if create: 39 | database.create_tables(models[database]) 40 | 41 | 42 | def create_db(path: Path, database: Database = None) -> None: 43 | connect(path, create=True, database=database) 44 | 45 | 46 | def disconnect(database: Database = None) -> None: 47 | if database is None: 48 | database = get_current_db() 49 | 50 | database.close() 51 | 52 | 53 | class ModelMeta(type(BaseModel)): # type: ignore # see mypy #4284 54 | """ 55 | Metaclass wrapper for standard peewee model metaclass to automatically 56 | register a new model with the model registry 57 | """ 58 | 59 | def __new__(cls, name, bases, attrs): 60 | # Ensure we've got a Meta class definition 61 | if "Meta" not in attrs: 62 | attrs["Meta"] = type("Meta", (), {}) 63 | 64 | # Set the database to the current db 65 | if getattr(attrs["Meta"], "database", None) is None: 66 | setattr(attrs["Meta"], "database", get_current_db()) 67 | 68 | # Initialise metaclass as normal 69 | cls = super().__new__(cls, name, bases, attrs) 70 | 71 | # Log model so we can automatically create it 72 | models[cls._meta.database].append(cls) 73 | 74 | return cls 75 | 76 | 77 | class Model(BaseModel, metaclass=ModelMeta): 78 | pass 79 | 80 | 81 | class EnumField(IntegerField): 82 | """ 83 | Field for integer enums 84 | """ 85 | 86 | enum: Type[IntEnum] 87 | 88 | def __init__( 89 | self, enum: Type[IntEnum], *args: List[Any], **kwargs: Dict[str, Any] 90 | ) -> None: 91 | super().__init__(*args, **kwargs) 92 | self.enum = enum 93 | 94 | def db_value(self, value: IntEnum) -> int: 95 | return value.value 96 | 97 | def python_value(self, value: int) -> IntEnum: 98 | return self.enum(value) 99 | 100 | 101 | class PathField(TextField): 102 | """ 103 | Field for Path objects 104 | """ 105 | 106 | def db_value(self, value: Path) -> str: 107 | return str(value) 108 | 109 | def python_value(self, value: str) -> Path: 110 | return Path(value) 111 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile 3 | # To update, run: 4 | # 5 | # pip-compile 6 | # 7 | -e git+https://github.com/radiac/doc8.git@feature/python-api#egg=doc8 8 | -e git+https://github.com/radiac/pytest-freezegun.git@bugfix/class-based-tests-with-duration-regression#egg=pytest-freezegun 9 | appdirs==1.4.3 # via black 10 | asn1crypto==0.24.0 # via cryptography 11 | atomicwrites==1.3.0 # via pytest 12 | attrs==19.1.0 # via black, packaging, pytest 13 | backcall==0.1.0 # via ipython 14 | black==19.3b0 # via pytest-black 15 | boto3==1.9.205 # via smart-open 16 | boto==2.49.0 # via smart-open 17 | botocore==1.12.205 # via boto3, s3transfer 18 | certifi==2019.6.16 # via requests 19 | cffi==1.12.3 # via cryptography 20 | chardet==3.0.4 # via requests 21 | click==7.0 22 | coverage==4.5.4 # via coveralls, pytest-cov 23 | coveralls==1.8.2 24 | cryptography==2.7 # via pyaescrypt 25 | decorator==4.4.0 # via ipython, traitlets 26 | docopt==0.6.2 # via coveralls 27 | docutils==0.14 # via botocore, restructuredtext-lint 28 | entrypoints==0.3 # via flake8 29 | flake8==3.7.8 # via pytest-flake8 30 | freezegun==0.3.12 31 | idna==2.8 # via requests 32 | importlib-metadata==0.19 # via pluggy, pytest 33 | ipdb==0.12.2 34 | ipython-genutils==0.2.0 # via traitlets 35 | ipython==7.7.0 # via ipdb 36 | isort==4.3.21 # via pytest-isort 37 | jedi==0.14.1 # via ipython 38 | jmespath==0.9.4 # via boto3, botocore 39 | mccabe==0.6.1 # via flake8 40 | more-itertools==7.2.0 # via pytest 41 | mypy-extensions==0.4.1 # via mypy 42 | mypy==0.720 # via pytest-mypy 43 | packaging==19.1 # via pytest 44 | parso==0.5.1 # via jedi 45 | pbr==5.4.2 # via stevedore 46 | peewee==3.10.0 47 | pexpect==4.7.0 # via ipython 48 | pickleshare==0.7.5 # via ipython 49 | pip-tools==4.0.0 50 | pluggy==0.12.0 # via pytest 51 | prompt-toolkit==2.0.9 # via ipython 52 | ptyprocess==0.6.0 # via pexpect 53 | py==1.8.0 # via pytest 54 | pyaescrypt==0.4.3 55 | pycodestyle==2.5.0 # via flake8 56 | pycparser==2.19 # via cffi 57 | pyfakefs==3.6 58 | pyflakes==2.1.1 # via flake8 59 | pygments==2.4.2 # via ipython 60 | pyparsing==2.4.2 # via packaging 61 | pytest-black==0.3.7 62 | pytest-cov==2.7.1 63 | pytest-flake8==1.0.4 64 | pytest-isort==0.3.1 65 | pytest-mock==1.10.4 66 | pytest-mypy==0.3.3 67 | pytest==5.0.1 68 | python-dateutil==2.8.0 # via botocore, freezegun 69 | requests==2.22.0 # via coveralls, smart-open 70 | restructuredtext-lint==1.3.0 71 | s3transfer==0.2.1 # via boto3 72 | six==1.12.0 # via cryptography, freezegun, packaging, pip-tools, prompt-toolkit, python-dateutil, stevedore, traitlets 73 | smart-open==1.8.4 74 | stevedore==1.30.1 75 | toml==0.10.0 # via black, pytest-black 76 | traitlets==4.3.2 # via ipython 77 | typed-ast==1.4.0 # via mypy 78 | typing-extensions==3.7.4 79 | urllib3==1.25.3 # via botocore, requests 80 | wcwidth==0.1.7 # via prompt-toolkit, pytest 81 | zipp==0.5.2 # via importlib-metadata 82 | 83 | # The following packages are considered to be unsafe in a requirements file: 84 | # setuptools==41.2.0 # via ipdb, ipython 85 | -------------------------------------------------------------------------------- /tests/index/test_index__state.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test State class in serac/index/index.py 3 | """ 4 | from datetime import datetime, timedelta 5 | from pathlib import Path 6 | 7 | import pytest 8 | 9 | from serac.index.index import State 10 | from serac.index.models import Action 11 | 12 | from ..mocks import DatabaseTest, gen_file 13 | 14 | 15 | class TestIndexState(DatabaseTest): 16 | def test_single_entry__get_latest(self): 17 | now = datetime.now() 18 | earlier = now - timedelta(days=7) 19 | 20 | file1 = gen_file(path="foo", action=Action.ADD, last_modified=earlier) 21 | file2 = gen_file(path="foo", action=Action.CONTENT, last_modified=now) 22 | state = State.at(timestamp=int(now.timestamp())) 23 | 24 | assert file1 != file2 25 | assert state == {Path("foo"): file2} 26 | assert state[Path("foo")].action == Action.CONTENT 27 | 28 | def test_multiple_entries__get_latest(self): 29 | now = datetime.now() 30 | earlier = now - timedelta(days=7) 31 | 32 | file1 = gen_file(path="one", action=Action.ADD, last_modified=earlier) 33 | file2 = gen_file(path="one", action=Action.CONTENT, last_modified=now) 34 | file3 = gen_file(path="two", action=Action.ADD, last_modified=earlier) 35 | file4 = gen_file(path="two", action=Action.CONTENT, last_modified=now) 36 | state = State.at(timestamp=int(now.timestamp())) 37 | 38 | assert file1 != file2 39 | assert file3 != file4 40 | assert state == {Path("one"): file2, Path("two"): file4} 41 | assert state[Path("one")].action == Action.CONTENT 42 | assert state[Path("two")].action == Action.CONTENT 43 | 44 | def test_multiple_entries__get_earlier(self): 45 | now = datetime.now() 46 | earlier = now - timedelta(days=7) 47 | 48 | file1 = gen_file(path="one", action=Action.ADD, last_modified=earlier) 49 | file2 = gen_file(path="one", action=Action.CONTENT, last_modified=now) 50 | file3 = gen_file(path="two", action=Action.ADD, last_modified=earlier) 51 | file4 = gen_file(path="two", action=Action.CONTENT, last_modified=now) 52 | state = State.at(timestamp=int(earlier.timestamp())) 53 | 54 | assert file1 != file2 55 | assert file3 != file4 56 | assert state == {Path("one"): file1, Path("two"): file3} 57 | assert state[Path("one")].action == Action.ADD 58 | assert state[Path("two")].action == Action.ADD 59 | 60 | def test_deleted_entry__not_included(self): 61 | now = datetime.now() 62 | earlier = now - timedelta(days=7) 63 | 64 | file1 = gen_file(path="one", action=Action.ADD, last_modified=earlier) 65 | file2 = gen_file(path="one", action=Action.CONTENT, last_modified=now) 66 | file3 = gen_file(path="two", action=Action.ADD, last_modified=earlier) 67 | file4 = gen_file(path="two", action=Action.DELETE, last_modified=now) 68 | state = State.at(timestamp=int(now.timestamp())) 69 | 70 | assert file1 != file2 71 | assert file3 != file4 72 | assert state == {Path("one"): file2} 73 | assert state[Path("one")].action == Action.CONTENT 74 | 75 | def test_state_at_datetime__raise_exception(self): 76 | now = datetime.now() 77 | 78 | with pytest.raises(ValueError) as e: 79 | State.at(timestamp=now) 80 | assert str(e.value) == "Can only get state using a timestamp" 81 | 82 | def test_state_by_path__returns_in_order(self): 83 | now = datetime.now() 84 | 85 | file1 = gen_file(path="b", action=Action.ADD, last_modified=now) 86 | file2 = gen_file(path="a", action=Action.CONTENT, last_modified=now) 87 | state = State.at(timestamp=int(now.timestamp())) 88 | 89 | assert state.by_path() == [file2, file1] 90 | -------------------------------------------------------------------------------- /tests/index/test_index__search.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test search() in serac/index/index.py 3 | """ 4 | from pathlib import Path 5 | from time import time 6 | 7 | from serac.index.index import Pattern, search 8 | 9 | from .test_index import IndexTestBase 10 | 11 | 12 | class TestIndexSearch(IndexTestBase): 13 | def test_search_file__from_head__finds_single_file(self, fs, freezer): 14 | initial_time, update_time = self.mock_two_states(fs, freezer) 15 | results = search(timestamp=int(time()), pattern=Pattern("/src/dir/three.txt")) 16 | 17 | assert len(results) == 1 18 | assert Path("/src/dir/three.txt") in results 19 | assert results[Path("/src/dir/three.txt")].last_modified == int( 20 | update_time.timestamp() 21 | ) 22 | 23 | def test_search_file__from_past__finds_single_file(self, fs, freezer): 24 | initial_time, update_time = self.mock_two_states(fs, freezer) 25 | results = search( 26 | timestamp=int(initial_time.timestamp()), 27 | pattern=Pattern("/src/dir/three.txt"), 28 | ) 29 | 30 | assert len(results) == 1 31 | assert Path("/src/dir/three.txt") in results 32 | assert results[Path("/src/dir/three.txt")].last_modified == int( 33 | initial_time.timestamp() 34 | ) 35 | 36 | def test_search_dir__from_head__finds_some_files(self, fs, freezer): 37 | initial_time, update_time = self.mock_two_states(fs, freezer) 38 | results = search(timestamp=int(time()), pattern=Pattern("/src/dir")) 39 | 40 | assert len(results) == 3 41 | assert Path("/src/dir/three.txt") in results 42 | assert ( 43 | results[Path("/src/dir/three.txt")].last_modified == update_time.timestamp() 44 | ) 45 | assert Path("/src/dir/four.txt") in results 46 | assert Path("/src/dir/subdir/five.txt") in results 47 | 48 | def test_search_dir__from_past__finds_some_files(self, fs, freezer): 49 | initial_time, update_time = self.mock_two_states(fs, freezer) 50 | results = search( 51 | timestamp=int(initial_time.timestamp()), pattern=Pattern("/src/dir") 52 | ) 53 | 54 | assert len(results) == 3 55 | assert Path("/src/dir/three.txt") in results 56 | assert ( 57 | results[Path("/src/dir/three.txt")].last_modified 58 | == initial_time.timestamp() 59 | ) 60 | assert Path("/src/dir/four.txt") in results 61 | assert Path("/src/dir/subdir/five.txt") in results 62 | 63 | def test_search_all__from_head__finds_all_files(self, fs, freezer): 64 | initial_time, update_time = self.mock_two_states(fs, freezer) 65 | results = search(timestamp=int(time())) 66 | 67 | assert len(results) == 5 68 | assert Path("/src/one.txt") in results 69 | assert Path("/src/two.txt") in results 70 | assert Path("/src/dir/three.txt") in results 71 | assert ( 72 | results[Path("/src/dir/three.txt")].last_modified == update_time.timestamp() 73 | ) 74 | assert Path("/src/dir/four.txt") in results 75 | assert Path("/src/dir/subdir/five.txt") in results 76 | 77 | def test_search_all__from_past__finds_all_files(self, fs, freezer): 78 | initial_time, update_time = self.mock_two_states(fs, freezer) 79 | results = search(timestamp=int(initial_time.timestamp())) 80 | 81 | assert len(results) == 5 82 | assert Path("/src/one.txt") in results 83 | assert Path("/src/two.txt") in results 84 | assert Path("/src/dir/three.txt") in results 85 | assert ( 86 | results[Path("/src/dir/three.txt")].last_modified 87 | == initial_time.timestamp() 88 | ) 89 | assert Path("/src/dir/four.txt") in results 90 | assert Path("/src/dir/subdir/five.txt") in results 91 | 92 | def test_search_missing__returns_zero(self, fs, freezer): 93 | initial_time, update_time = self.mock_two_states(fs, freezer) 94 | results = search(timestamp=int(time()), pattern=Pattern("/does/not.exist")) 95 | assert len(results) == 0 96 | -------------------------------------------------------------------------------- /serac/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Config parsing 3 | """ 4 | from __future__ import annotations 5 | 6 | from configparser import ConfigParser, SectionProxy 7 | from dataclasses import dataclass 8 | from pathlib import Path 9 | from typing import Any, Dict, List, Type, TypeVar 10 | 11 | from .storage import Storage, storage_registry 12 | 13 | 14 | T = TypeVar("T", bound="SectionConfig") 15 | 16 | 17 | @dataclass 18 | class SectionConfig: 19 | """ 20 | Base for section config objects 21 | """ 22 | 23 | @classmethod 24 | def from_config(cls: Type[T], config: SectionProxy) -> T: 25 | kwargs: Dict[str, Any] = cls.parse_config(config) 26 | # mypy has a problem with dataclasses, so ignore the typing error 27 | return cls(**kwargs) # type: ignore 28 | 29 | @classmethod 30 | def parse_config(self, section: SectionProxy) -> Dict[str, Any]: 31 | raise NotImplementedError() # pragma: no cover 32 | 33 | 34 | @dataclass 35 | class SourceConfig(SectionConfig): 36 | """ 37 | Source config container 38 | """ 39 | 40 | includes: List[str] 41 | excludes: List[str] 42 | 43 | @classmethod 44 | def parse_config(self, section: SectionProxy) -> Dict[str, Any]: 45 | includes = section.get("include", "").split() 46 | excludes = section.get("exclude", "").split() 47 | 48 | if not includes: 49 | raise ValueError("The source section must declare at least one include") 50 | 51 | return {"includes": includes, "excludes": excludes} 52 | 53 | 54 | @dataclass 55 | class ArchiveConfig(SectionConfig): 56 | """ 57 | Archive config container 58 | """ 59 | 60 | storage: Storage 61 | password: str 62 | 63 | @classmethod 64 | def parse_config(self, section: SectionProxy) -> Dict[str, Any]: 65 | storage_type = section.get("storage", "") 66 | password = section.get("password", "") 67 | 68 | if not storage_type: 69 | raise ValueError("The archive section must declare a storage type") 70 | 71 | # Look up storage type in registry and get it to parse config 72 | storage_cls = storage_registry.get(storage_type) 73 | if not storage_cls: 74 | raise ValueError(f"The archive storage '{storage_type}' is not recognised") 75 | storage = storage_cls.from_config(section) 76 | 77 | return {"storage": storage, "password": password} 78 | 79 | 80 | @dataclass 81 | class IndexConfig(SectionConfig): 82 | """ 83 | Index config container 84 | """ 85 | 86 | path: Path 87 | 88 | @classmethod 89 | def parse_config(self, section: SectionProxy) -> Dict[str, Any]: 90 | path_raw: str = section.get("path", "") 91 | 92 | if not path_raw: 93 | raise ValueError("The index section must declare a path") 94 | path = Path(path_raw) 95 | if not path.parent.exists(): 96 | raise ValueError("The path for the index does not exist") 97 | 98 | return {"path": path} 99 | 100 | 101 | class Config: 102 | """ 103 | Configuration file loader 104 | """ 105 | 106 | sections = ["source", "archive", "index"] 107 | source: SourceConfig 108 | archive: ArchiveConfig 109 | index: IndexConfig 110 | 111 | def __init__(self, filename: str = None) -> None: 112 | if filename: 113 | self.load(filename) 114 | 115 | def load(self, filename: str) -> None: 116 | parser = ConfigParser() 117 | 118 | # Let parsing errors go through unchanged 119 | parser.read(filename) 120 | 121 | if sorted(parser.sections()) != sorted(self.sections): 122 | raise ValueError( 123 | "Invalid config file; must contain source, archive and " 124 | f"index sections; instead found {', '.join(parser.sections())}" 125 | ) 126 | 127 | self.source = SourceConfig.from_config(parser["source"]) 128 | self.archive = ArchiveConfig.from_config(parser["archive"]) 129 | self.index = IndexConfig.from_config(parser["index"]) 130 | -------------------------------------------------------------------------------- /tests/storage/test_s3.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test serac/storage/s3.py 3 | 4 | These tests are disabled by default. Set environment variables to test: 5 | SERAC_TEST_S3=1 S3_KEY="key" S3_SECRET="secret" \ 6 | S3_BUCKET="bucket_name" S3_PATH="test" pytest 7 | """ 8 | import os 9 | from io import BytesIO 10 | from pathlib import Path 11 | 12 | import boto3 13 | import pytest 14 | 15 | from serac import crypto 16 | from serac.storage import S3 17 | 18 | from ..mocks import FilesystemTest 19 | 20 | 21 | class TestS3Config: 22 | """ 23 | Generic class tests 24 | """ 25 | 26 | def test_init__missing_key__raises_exception(self): 27 | with pytest.raises(ValueError) as e: 28 | S3(key="", secret="secret", bucket="bucket", path="path") 29 | assert str(e.value) == "S3 storage requires a key" 30 | 31 | def test_init__missing_secret__raises_exception(self): 32 | with pytest.raises(ValueError) as e: 33 | S3(key="key", secret="", bucket="bucket", path="path") 34 | assert str(e.value) == "S3 storage requires a secret" 35 | 36 | def test_init__missing_bucket__raises_exception(self): 37 | with pytest.raises(ValueError) as e: 38 | S3(key="key", secret="secret", bucket="", path="path") 39 | assert str(e.value) == "S3 storage requires a bucket" 40 | 41 | def test_init__missing_path__no_exception_raised(self): 42 | S3(key="key", secret="secret", bucket="bucket", path="") 43 | 44 | 45 | @pytest.mark.skipif( 46 | not os.getenv("SERAC_TEST_S3", ""), reason="Not running S3 integration tests" 47 | ) 48 | class TestS3Integration(FilesystemTest): 49 | """ 50 | S3 integration tests 51 | """ 52 | 53 | @property 54 | def storage_S3(self): 55 | return S3( 56 | key=os.environ["S3_KEY"], 57 | secret=os.environ["S3_SECRET"], 58 | bucket=os.environ["S3_BUCKET"], 59 | path=os.environ["S3_PATH"], 60 | ) 61 | 62 | @property 63 | def boto_session(self): 64 | return boto3.Session( 65 | aws_access_key_id=os.environ["S3_KEY"], 66 | aws_secret_access_key=os.environ["S3_SECRET"], 67 | ) 68 | 69 | def get_s3_object(self, filename): 70 | s3 = self.boto_session.resource("s3") 71 | return s3.Object( 72 | bucket_name=os.environ["S3_BUCKET"], 73 | key=f"{os.environ['S3_PATH']}/{filename}", 74 | ) 75 | 76 | def teardown_method(self): 77 | obj = self.get_s3_object("1") 78 | obj.delete() 79 | super().teardown_method() 80 | 81 | def test_store(self, fs): 82 | self.fix_boto(fs) 83 | 84 | # This will be tested in a separate test, but we'll focus on the store aspect 85 | fs.create_file("/src/foo", contents="unencrypted") 86 | storage = self.storage_S3 87 | 88 | # Encrypt and push to storage 89 | storage.store(local_path=Path("/src/foo"), archive_id="1", password="secret") 90 | 91 | # Check file exists in S3 92 | assert storage.get_size("1") > 0 93 | obj = self.get_s3_object("1") 94 | data = obj.get()["Body"].read() 95 | 96 | # Check it has been encrypted and we can decrypt it 97 | encrypted = BytesIO() 98 | encrypted.write(data) 99 | encrypted.seek(0) 100 | decrypted = BytesIO() 101 | crypto.decrypt(encrypted, decrypted, "secret", len(data)) 102 | assert str(decrypted.getvalue(), "utf-8") == "unencrypted" 103 | 104 | def test_retrieve(self, fs): 105 | self.fix_boto(fs) 106 | 107 | # Encrypt and deliver. This is tested in a separate test 108 | fs.create_file("/src/foo", contents="unencrypted") 109 | fs.create_dir("/store") 110 | fs.create_dir("/dest") 111 | storage = self.storage_S3 112 | storage.store(local_path=Path("/src/foo"), archive_id="1", password="secret") 113 | 114 | # Pull and decrypt from storage 115 | storage.retrieve( 116 | local_path=Path("/dest/bar"), archive_id="1", password="secret" 117 | ) 118 | 119 | # Check file exists in /dest/ 120 | dest_path = Path(f"/dest/bar") 121 | assert dest_path.is_file() 122 | 123 | # Check it has been decrypted 124 | with dest_path.open("r") as handle: 125 | content = handle.read() 126 | assert content == "unencrypted" 127 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ===== 2 | Serac 3 | ===== 4 | 5 | .. image:: https://travis-ci.org/radiac/serac.svg?branch=master 6 | :target: https://travis-ci.org/radiac/serac 7 | 8 | .. image:: https://coveralls.io/repos/radiac/serac/badge.svg?branch=master&service=github 9 | :target: https://coveralls.io/github/radiac/serac?branch=master 10 | 11 | 12 | Incremental permanent data archiver with encryption. 13 | 14 | Designed for permanently backing up data which does not change frequently, 15 | suitable for write-only storage such as Amazon Glacier. 16 | 17 | 18 | Installation 19 | ============ 20 | 21 | This requires Python 3.7. 22 | 23 | Install serac with:: 24 | 25 | pip install serac 26 | 27 | If you don't have Python 3.7 installed, you can install it with 28 | `pyenv `_:: 29 | 30 | curl https://pyenv.run | bash 31 | # restart your shell 32 | pyenv update 33 | pyenv install 3.7.3 34 | pip install virtualenv 35 | virtualenv python=python3.7 venv 36 | . venv/bin/activate 37 | pip install serac 38 | 39 | 40 | Usage 41 | ===== 42 | 43 | Serac must always be run with the config file path as the first argument, then 44 | the command to perform as the second argument. 45 | 46 | To run serac:: 47 | 48 | /path/to/venv/bin/serac CONFIG COMMAND [OPTIONS] 49 | 50 | It is safe to run Serac from a cron job; it will not allow multiple processes to work 51 | with the same config file at the same time. 52 | 53 | 54 | Commands 55 | -------- 56 | 57 | After that it accepts one of the following commands: 58 | 59 | ``test`` 60 | Test the configuration file 61 | 62 | ``init`` 63 | Initialise an index for a new config by creating the database 64 | 65 | ``archive`` 66 | Archive any changes since the last archive was performed 67 | 68 | ``ls [--at=DATE] [--pattern=PATTERN]`` 69 | Show the state of the archive. 70 | 71 | This follows the roughly the same layout as ``ls -l``, with the following 72 | columns: 73 | 74 | * File permissions 75 | * Owner (as it will be restored to on this system) 76 | * Group (as it will be restored to on this system) 77 | * Size (in kibi/mebib/gibibytes, or in bytes if not specified) 78 | * Last modified date (this year if not specified) 79 | * Last modified timestamp (for ease of use in calls to ``ls`` and 80 | ``restore``) 81 | * Path (as it was on the originating system) 82 | 83 | ``restore DESTINATION [--at=DATE] [--pattern=PATTERN]`` 84 | Restore some or all of an archive 85 | 86 | If an archived object is in glacier it will be marked for retrieval. 87 | 88 | 89 | Arguments 90 | ~~~~~~~~~ 91 | 92 | ``DATE`` 93 | This should be a date in one of the following formats: 94 | 95 | * epoch timestamp, eg ``1582165202`` 96 | * ``YYYY-MM-DD``, eg ``2020-02-20`` 97 | * ``YYYY-MM-DD HH:MM:SS``, eg ``2020-03-20 02:20:02`` 98 | * ``YYYY-MM-DDTHH:MM:SS``, eg ``2020-03-20T02:20:02`` 99 | 100 | ``PATTERN`` 101 | This can either be an exact path to a file, or a partial path to a 102 | directory. 103 | 104 | Globs are not yet supported. 105 | 106 | 107 | Configuration 108 | ============= 109 | 110 | Configure serac using a config file:: 111 | 112 | [source] 113 | # Define the source for the backups 114 | 115 | # List of paths to include and exclude (glob patterns) 116 | include = 117 | /path/to/source 118 | /path/somewhere/else 119 | exclude = 120 | /path/to/source/unprocessed 121 | /path/somewhere/else/*.jpg 122 | 123 | [archive] 124 | # Define where the backups are saved 125 | 126 | # Backup to a local path 127 | #storage = local 128 | #path = /path/to/backup 129 | 130 | # Backup to S3 131 | storage = s3 132 | key = 4p1_k3y 133 | secret = 53cr3t 134 | bucket = arn:aws:s3:::my_bucket_name 135 | path = path/within/bucket 136 | 137 | # Encrypt backups with this password 138 | password = l0ng_s3cr3t 139 | 140 | [index] 141 | # Define how indexed files are treated 142 | 143 | # Location for index database 144 | # This should then be backed up by another service, eg duplicity 145 | path = /path/to/index.sqlite 146 | 147 | 148 | Contributing 149 | ============ 150 | 151 | To work on serac, install it in a virtual environment:: 152 | 153 | mkdir serac 154 | cd serac 155 | git clone repo 156 | virtualenv --python=python3.7 venv 157 | . venv/bin/activate 158 | pip install pip-tools 159 | cd repo 160 | pip-sync 161 | 162 | To run during development:: 163 | 164 | python -m serac CONFIG COMMAND [OPTIONS] 165 | 166 | To run tests:: 167 | 168 | cd serac/repo 169 | . ../venv/bin/activate 170 | pytest 171 | 172 | 173 | Changelog 174 | ========= 175 | 176 | 0.0.2, 2019-09-25 177 | ----------------- 178 | 179 | Feature: 180 | 181 | * Add process locking 182 | 183 | 184 | 0.0.1, 2019-09-23 185 | ----------------- 186 | 187 | Feature: 188 | 189 | * Initial release 190 | -------------------------------------------------------------------------------- /serac/storage/s3.py: -------------------------------------------------------------------------------- 1 | """ 2 | Storage on AWS S3 3 | """ 4 | from configparser import ConfigParser 5 | from enum import Enum 6 | from functools import lru_cache 7 | from pathlib import Path 8 | from typing import IO, Any, Dict 9 | 10 | import boto3 11 | from smart_open import open 12 | 13 | from ..exceptions import ArchiveUnavailable, FileExists 14 | from .base import Storage 15 | 16 | 17 | # Number of days to restore a file from Glacier for 18 | RESTORE_DAYS = 1 19 | 20 | 21 | class StorageClass(Enum): 22 | STANDARD = "STANDARD" 23 | GLACIER = "GLACIER" 24 | DEEP_ARCHIVE = "DEEP_ARCHIVE" 25 | 26 | 27 | class ObjectFrozen(ArchiveUnavailable): 28 | msg = "Object is frozen" 29 | short = "frozen" 30 | 31 | 32 | class ObjectRetrieving(ArchiveUnavailable): 33 | msg = "Object is not yet available" 34 | short = "retrieve in progress" 35 | 36 | 37 | class S3(Storage): 38 | key: str 39 | secret: str 40 | bucket: str 41 | path: str 42 | 43 | @classmethod 44 | def parse_config(cls, config: ConfigParser) -> Dict[str, Any]: 45 | kwargs = { 46 | key: config.get(key, "") for key in ["key", "secret", "bucket", "path"] 47 | } 48 | 49 | return kwargs 50 | 51 | def __init__(self, key: str, secret: str, bucket: str, path: str) -> None: 52 | self.key = key 53 | self.secret = secret 54 | self.bucket = bucket 55 | self.path = path 56 | 57 | # Check required string values 58 | for attr in ["key", "secret", "bucket"]: 59 | if not getattr(self, attr): 60 | raise ValueError(f"S3 storage requires a {attr}") 61 | 62 | @property 63 | def s3_resource(self) -> boto3.resources.base.ServiceResource: 64 | session = boto3.Session( 65 | aws_access_key_id=self.key, aws_secret_access_key=self.secret 66 | ) 67 | s3 = session.resource("s3") 68 | return s3 69 | 70 | def get_s3_path(self, archive_id: str): 71 | return f"s3://{self.key}:{self.secret}@{self.bucket}/{self.path}/{archive_id}" 72 | 73 | def get_s3_object(self, archive_id: str) -> boto3.resources.base.ServiceResource: 74 | obj = self.s3_resource.Object( 75 | bucket_name=self.bucket, key=f"{self.path}/{archive_id}" 76 | ) 77 | return obj 78 | 79 | @lru_cache() 80 | def check_is_available(self, archive_id: str) -> bool: 81 | obj = self.get_s3_object(archive_id) 82 | 83 | # If it's in standard S3, nothing preventing us 84 | if obj.storage_class not in [StorageClass.GLACIER, StorageClass.DEEP_ARCHIVE]: 85 | return True 86 | 87 | # It's in glacier - check its restore state 88 | if obj.restore is None: 89 | # Restoration not started 90 | raise ObjectFrozen() 91 | 92 | elif 'ongoing-request="true"' in obj.restore: 93 | # Restoration in progress 94 | raise ObjectRetrieving() 95 | 96 | elif 'ongoing-request="false"' in obj.restore: 97 | # Restoration complete 98 | return True 99 | 100 | raise ValueError(f"Unknown restore state: {obj.restore}") 101 | 102 | def retrieve(self, local_path: Path, archive_id: str, password: str) -> None: 103 | """ 104 | Check if the file is available on S3 to restore to the destination, and if not 105 | start the S3 restore so it will be available soon 106 | """ 107 | # This check will be done again in super().retrieve, but we don't want to 108 | # request thawing the glacier object unnecessarily. 109 | if local_path.exists(): 110 | raise FileExists(local_path) 111 | 112 | try: 113 | self.check_is_available() 114 | except ObjectFrozen: 115 | # Object is frozen, start the thaw 116 | self.start_s3_restore(archive_id) 117 | raise 118 | # Unhandled exceptions will include ``ObjectRetrieving`` 119 | 120 | # File is available, start the restore 121 | super().retrieve( 122 | local_path=local_path, archive_id=archive_id, password=password 123 | ) 124 | 125 | def get_size(self, archive_id: str) -> int: 126 | obj = self.get_s3_object(archive_id) 127 | return obj.content_length 128 | 129 | def start_s3_restore(self, archive_id: str) -> None: 130 | restore_request = { 131 | "OutputLocation": { 132 | "S3": { 133 | "BucketName": "destination-bucket", 134 | "Prefix": "destination-prefix", 135 | } 136 | }, 137 | "Days": RESTORE_DAYS, 138 | } 139 | self.s3_resource.restore_object( 140 | Bucket="bucket-name", 141 | Key=f"{self.path}/{archive_id}", 142 | RestoreRequest=restore_request, 143 | ) 144 | 145 | def read(self, archive_id: str) -> IO[bytes]: 146 | return open(self.get_s3_path(archive_id), "rb") 147 | 148 | def write(self, archive_id: str) -> IO[bytes]: 149 | return open(self.get_s3_path(archive_id), "wb") 150 | -------------------------------------------------------------------------------- /tests/index/test_index__scan.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test scan() in serac/index/index.py 3 | """ 4 | import os 5 | from pathlib import Path 6 | 7 | from serac.index.index import scan 8 | from serac.index.models import File 9 | 10 | from ..mocks import DatabaseTest, FilesystemTest, mock_file_archive 11 | 12 | 13 | class TestIndexScan(DatabaseTest, FilesystemTest): 14 | def test_single_dir__all_files_add(self, fs): 15 | self.mock_fs(fs) 16 | changeset = scan(includes=["/src/"]) 17 | 18 | assert len(changeset.added.keys()) == 5 19 | assert Path("/src/one.txt") in changeset.added 20 | assert Path("/src/two.txt") in changeset.added 21 | assert Path("/src/dir/three.txt") in changeset.added 22 | assert Path("/src/dir/four.txt") in changeset.added 23 | assert Path("/src/dir/subdir/five.txt") in changeset.added 24 | 25 | assert len(changeset.content.keys()) == 0 26 | assert len(changeset.metadata.keys()) == 0 27 | assert len(changeset.deleted.keys()) == 0 28 | 29 | def test_glob_exclude__exclusions_not_listed(self, fs): 30 | self.mock_fs(fs) 31 | changeset = scan(includes=["/src"], excludes=["/src/dir/*.txt"]) 32 | 33 | assert len(changeset.added.keys()) == 2 34 | assert Path("/src/one.txt") in changeset.added 35 | assert Path("/src/two.txt") in changeset.added 36 | 37 | def test_glob_and_exact_exclude__exclusions_not_listed(self, fs): 38 | self.mock_fs(fs) 39 | changeset = scan(includes=["/src"], excludes=["/src/one.txt", "/src/dir/*.txt"]) 40 | 41 | assert len(changeset.added.keys()) == 1 42 | assert Path("/src/two.txt") in changeset.added 43 | 44 | def test_path_exclude__exclusions_not_listed(self, fs): 45 | self.mock_fs(fs) 46 | changeset = scan(includes=["/src"], excludes=["*/subdir"]) 47 | 48 | assert len(changeset.added.keys()) == 4 49 | assert Path("/src/one.txt") in changeset.added 50 | assert Path("/src/two.txt") in changeset.added 51 | assert Path("/src/dir/three.txt") in changeset.added 52 | assert Path("/src/dir/four.txt") in changeset.added 53 | 54 | def test_multiple_dir__all_collected(self, fs): 55 | self.mock_fs(fs) 56 | changeset = scan(includes=["/src", "/alt"]) 57 | 58 | assert len(changeset.added.keys()) == 7 59 | assert Path("/src/one.txt") in changeset.added 60 | assert Path("/src/two.txt") in changeset.added 61 | assert Path("/src/dir/three.txt") in changeset.added 62 | assert Path("/src/dir/four.txt") in changeset.added 63 | assert Path("/src/dir/subdir/five.txt") in changeset.added 64 | assert Path("/alt/six.txt") in changeset.added 65 | assert Path("/alt/seven.txt") in changeset.added 66 | 67 | def test_change_content(self, monkeypatch, fs): 68 | self.mock_fs(fs) 69 | monkeypatch.setattr(File, "archive", mock_file_archive) 70 | 71 | changeset = scan(includes=["/src"]) 72 | changeset.commit(archive_config=self.get_archive_config()) 73 | 74 | Path("/src/one.txt").write_text("one updated") 75 | Path("/src/dir/three.txt").write_text("three updated") 76 | changeset = scan(includes=["/src"]) 77 | changeset.commit(archive_config=self.get_archive_config()) 78 | 79 | assert len(changeset.added.keys()) == 0 80 | assert len(changeset.content.keys()) == 2 81 | assert Path("/src/one.txt") in changeset.content 82 | assert Path("/src/dir/three.txt") in changeset.content 83 | assert len(changeset.metadata.keys()) == 0 84 | assert len(changeset.deleted.keys()) == 0 85 | 86 | def test_change_metadata(self, monkeypatch, fs): 87 | self.mock_fs(fs) 88 | monkeypatch.setattr(File, "archive", mock_file_archive) 89 | 90 | changeset = scan(includes=["/src"]) 91 | changeset.commit(archive_config=self.get_archive_config()) 92 | 93 | Path("/src/one.txt").chmod(0o444) 94 | os.chown("/src/dir/three.txt", 1, 1) 95 | changeset = scan(includes=["/src"]) 96 | changeset.commit(archive_config=self.get_archive_config()) 97 | 98 | assert len(changeset.added.keys()) == 0 99 | assert len(changeset.content.keys()) == 0 100 | assert len(changeset.metadata.keys()) == 2 101 | assert Path("/src/one.txt") in changeset.metadata 102 | assert Path("/src/dir/three.txt") in changeset.metadata 103 | assert len(changeset.deleted.keys()) == 0 104 | 105 | def test_delete(self, monkeypatch, fs): 106 | self.mock_fs(fs) 107 | monkeypatch.setattr(File, "archive", mock_file_archive) 108 | 109 | changeset = scan(includes=["/src"]) 110 | changeset.commit(archive_config=self.get_archive_config()) 111 | 112 | Path("/src/one.txt").unlink() 113 | Path("/src/dir/three.txt").unlink() 114 | changeset = scan(includes=["/src"]) 115 | changeset.commit(archive_config=self.get_archive_config()) 116 | 117 | assert len(changeset.added.keys()) == 0 118 | assert len(changeset.content.keys()) == 0 119 | assert len(changeset.metadata.keys()) == 0 120 | assert len(changeset.deleted.keys()) == 2 121 | assert Path("/src/one.txt") in changeset.deleted 122 | assert Path("/src/dir/three.txt") in changeset.deleted 123 | -------------------------------------------------------------------------------- /tests/test_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test serac/config.py 3 | """ 4 | from configparser import ConfigParser 5 | from pathlib import Path 6 | 7 | import pytest 8 | 9 | from serac.config import ArchiveConfig, Config, IndexConfig, SourceConfig 10 | from serac.storage import S3, Local 11 | 12 | from .mocks import SAMPLE_CONFIG, SAMPLE_STORAGE_LOCAL, SAMPLE_STORAGE_S3 13 | 14 | 15 | def test_parser_source__valid(fs): 16 | fs.create_file( 17 | "/sample.conf", contents=SAMPLE_CONFIG.format(storage=SAMPLE_STORAGE_LOCAL) 18 | ) 19 | fs.create_dir("/path/to") 20 | fs.create_dir("/path/to/backup") 21 | config = Config(filename="/sample.conf") 22 | 23 | assert isinstance(config.source, SourceConfig) 24 | assert config.source.includes == ["/path/to/source", "/path/somewhere/else"] 25 | assert config.source.excludes == [ 26 | "/path/to/source/unprocessed", 27 | "/path/somewhere/else/*.jpg", 28 | ] 29 | 30 | 31 | def test_parser_source__missing_includes__raises_exception(): 32 | parser = ConfigParser() 33 | parser.read_string( 34 | """ 35 | [source] 36 | excludes = one 37 | """ 38 | ) 39 | 40 | with pytest.raises(ValueError) as e: 41 | SourceConfig.from_config(parser["source"]) 42 | assert str(e.value) == "The source section must declare at least one include" 43 | 44 | 45 | def test_parser_archive__local(fs): 46 | fs.create_file( 47 | "/sample.conf", contents=SAMPLE_CONFIG.format(storage=SAMPLE_STORAGE_LOCAL) 48 | ) 49 | fs.create_dir("/path/to") 50 | fs.create_dir("/path/to/backup") 51 | config = Config(filename="/sample.conf") 52 | 53 | assert isinstance(config.archive, ArchiveConfig) 54 | assert isinstance(config.archive.storage, Local) 55 | assert config.archive.storage.path == Path("/path/to/backup") 56 | assert config.archive.password == "l0ng_s3cr3t" 57 | 58 | 59 | def test_parser_archive__s3(fs): 60 | fs.create_file( 61 | "/sample.conf", contents=SAMPLE_CONFIG.format(storage=SAMPLE_STORAGE_S3) 62 | ) 63 | fs.create_dir("/path/to") 64 | config = Config(filename="/sample.conf") 65 | 66 | assert isinstance(config.archive, ArchiveConfig) 67 | assert isinstance(config.archive.storage, S3) 68 | assert config.archive.storage.key == "4p1_k3y" 69 | assert config.archive.storage.secret == "53cr3t" 70 | assert config.archive.storage.bucket == "arn:aws:s3:::my_bucket_name" 71 | assert config.archive.storage.path == "path/within/bucket" 72 | assert config.archive.password == "l0ng_s3cr3t" 73 | 74 | 75 | def test_parser_archive__missing_storage_type__raises_exception(): 76 | parser = ConfigParser() 77 | parser.read_string( 78 | """ 79 | [archive] 80 | password=set 81 | """ 82 | ) 83 | 84 | with pytest.raises(ValueError) as e: 85 | ArchiveConfig.from_config(parser["archive"]) 86 | assert str(e.value) == "The archive section must declare a storage type" 87 | 88 | 89 | def test_parser_archive__unknown_storage_type__raises_exception(): 90 | parser = ConfigParser() 91 | parser.read_string( 92 | """ 93 | [archive] 94 | storage=invalid 95 | password=set 96 | """ 97 | ) 98 | 99 | with pytest.raises(ValueError) as e: 100 | ArchiveConfig.from_config(parser["archive"]) 101 | assert str(e.value) == "The archive storage 'invalid' is not recognised" 102 | 103 | 104 | def test_parser_index(fs): 105 | fs.create_file( 106 | "/sample.conf", contents=SAMPLE_CONFIG.format(storage=SAMPLE_STORAGE_LOCAL) 107 | ) 108 | fs.create_dir("/path/to") 109 | fs.create_dir("/path/to/backup") 110 | config = Config(filename="/sample.conf") 111 | 112 | assert isinstance(config.index, IndexConfig) 113 | assert config.index.path == Path("/path/to/index.sqlite") 114 | 115 | 116 | def test_parser_index__path_missing__raises_exception(): 117 | parser = ConfigParser() 118 | parser.read_string( 119 | """ 120 | [index] 121 | missing=path 122 | """ 123 | ) 124 | 125 | with pytest.raises(ValueError) as e: 126 | IndexConfig.from_config(parser["index"]) 127 | assert str(e.value) == "The index section must declare a path" 128 | 129 | 130 | def test_parser_index__path_does_not_exit__raises_exception(): 131 | parser = ConfigParser() 132 | parser.read_string( 133 | """ 134 | [index] 135 | path=/does/not/exist 136 | """ 137 | ) 138 | 139 | with pytest.raises(ValueError) as e: 140 | IndexConfig.from_config(parser["index"]) 141 | assert str(e.value) == "The path for the index does not exist" 142 | 143 | 144 | def test_parser_config__sections_missing__raises_exception(fs): 145 | fs.create_file( 146 | "/sample.conf", 147 | contents=( 148 | """ 149 | [invalid] 150 | config=file 151 | """ 152 | ), 153 | ) 154 | 155 | with pytest.raises(ValueError) as e: 156 | Config(filename="/sample.conf") 157 | assert str(e.value) == ( 158 | "Invalid config file; must contain source, archive and " 159 | f"index sections; instead found invalid" 160 | ) 161 | 162 | 163 | def test_parser_config__archive_section_missing__raises_exception(fs): 164 | fs.create_file( 165 | "/sample.conf", 166 | contents=""" 167 | [source] 168 | includes=value 169 | [index] 170 | path=somewhere 171 | """, 172 | ) 173 | 174 | with pytest.raises(ValueError) as e: 175 | Config(filename="/sample.conf") 176 | assert str(e.value) == ( 177 | "Invalid config file; must contain source, archive and " 178 | f"index sections; instead found source, index" 179 | ) 180 | -------------------------------------------------------------------------------- /serac/commands.py: -------------------------------------------------------------------------------- 1 | """ 2 | Commands 3 | """ 4 | import fcntl 5 | import sys 6 | from datetime import datetime 7 | from pathlib import Path 8 | from time import time 9 | from typing import Dict, Optional, Type, Union 10 | 11 | import click 12 | 13 | from .config import Config 14 | from .exceptions import SeracException 15 | from .index import Changeset, Pattern, State, database, restore, scan, search 16 | from .reporter import NullReporter, Reporter, StdoutReporter 17 | 18 | 19 | class Timestamp(click.DateTime): # type: ignore # due to typeshed issue 20 | """ 21 | Store a datetime or timestamp 22 | """ 23 | 24 | def get_metavar(self, param): 25 | return "[timestamp|{}]".format("|".join(self.formats)) 26 | 27 | def convert(self, value, param, ctx) -> int: 28 | if value.isdigit(): 29 | return int(value) 30 | try: 31 | dt = super().convert(value, param, ctx) 32 | except click.BadParameter: 33 | self.fail( 34 | "invalid datetime format: {}. (choose from timestamp, {})".format( 35 | value, ", ".join(self.formats) 36 | ) 37 | ) 38 | return int(dt.timestamp()) 39 | 40 | def __repr__(self): # pragma: no cover 41 | return "Timestamp" 42 | 43 | 44 | @click.group() 45 | @click.argument( 46 | "config", 47 | type=click.Path(exists=True, file_okay=True, dir_okay=False, resolve_path=True), 48 | ) 49 | @click.pass_context 50 | def cli(ctx, config: str): 51 | try: 52 | ctx.obj["config"] = Config(config) 53 | except Exception as e: 54 | raise click.ClickException(f"Invalid config: {e}") 55 | 56 | # Lock - only one process on a config at a time 57 | ctx.obj["lock"] = open(config, "r") 58 | try: 59 | fcntl.flock(ctx.obj["lock"], fcntl.LOCK_EX | fcntl.LOCK_NB) 60 | except IOError: 61 | raise click.ClickException( 62 | f"Config {config} is already in use by another process" 63 | ) 64 | 65 | 66 | @cli.command() 67 | @click.pass_context 68 | def test(ctx): 69 | """ 70 | Test the config file is valid 71 | """ 72 | # If it reaches this, the config file has been parsed 73 | sys.stdout.write("Config file syntax is correct\n") 74 | 75 | 76 | @cli.command() 77 | @click.pass_context 78 | def init(ctx): 79 | """ 80 | Create a new index database 81 | """ 82 | config: Config = ctx.obj["config"] 83 | if config.index.path.exists(): 84 | raise click.ClickException(f"Index database {config.index.path} already exists") 85 | database.create_db(config.index.path) 86 | database.disconnect() 87 | sys.stdout.write("Index database created\n") 88 | 89 | 90 | @cli.command() 91 | @click.option("--verbose", "-v", is_flag=True, default=False) 92 | @click.pass_context 93 | def archive(ctx, verbose: bool = False): 94 | """ 95 | Scan and archive any changes 96 | """ 97 | report_class: Type[Reporter] = NullReporter 98 | if verbose: 99 | report_class = StdoutReporter 100 | 101 | config: Config = ctx.obj["config"] 102 | database.connect(config.index.path) 103 | 104 | if verbose: 105 | sys.stdout.write("Scanning...\n") 106 | changeset: Changeset = scan( 107 | includes=config.source.includes, excludes=config.source.excludes 108 | ) 109 | changeset.commit(archive_config=config.archive, report_class=report_class) 110 | database.disconnect() 111 | 112 | 113 | @cli.command() 114 | @click.option( 115 | "--at", 116 | "timestamp", 117 | help="Date and time (or timestamp) to go back to", 118 | type=Timestamp(), 119 | ) 120 | @click.option( 121 | "--pattern", "pattern_str", help="Path to file", type=click.Path(exists=False) 122 | ) 123 | @click.pass_context 124 | def ls(ctx, pattern_str: Optional[str] = None, timestamp: Optional[int] = None): 125 | """ 126 | Show the status of the archive 127 | """ 128 | config: Config = ctx.obj["config"] 129 | 130 | if not timestamp: 131 | timestamp = int(time()) 132 | 133 | database.connect(config.index.path) 134 | 135 | files: State = search(timestamp=timestamp, pattern=Pattern(pattern_str)) 136 | 137 | if not files: 138 | if pattern_str: 139 | raise click.ClickException(f"No files found at {pattern_str}") 140 | else: 141 | raise click.ClickException("No files found") 142 | # If no files found, code will not proceed past this condition 143 | 144 | this_year = str(datetime.now().astimezone().year) 145 | for file in files.by_path(): 146 | size_num, size_unit = file.archived.get_human_size() 147 | m_month, m_day, m_year, m_time = file.get_human_last_modified() 148 | sys.stdout.write( 149 | f"{file.permissions_display} " 150 | f"{file.owner_display:<8.8} " 151 | f"{file.group_display:<8.8} " 152 | f"{int(size_num):>4}{size_unit:<1} " 153 | f"{m_month:<3} {m_day.lstrip('0'):>2} " 154 | f"{m_time if m_year == this_year else m_year:>5} " 155 | f"{file.last_modified} " 156 | f"{file.path}" 157 | "\n" 158 | ) 159 | 160 | database.disconnect() 161 | 162 | 163 | @cli.command("restore") 164 | @click.argument("destination", type=click.Path(exists=False)) 165 | @click.option( 166 | "--at", 167 | "timestamp", 168 | help="Date and time (or timestamp) to go back to", 169 | type=Timestamp(), 170 | ) 171 | @click.option( 172 | "--pattern", 173 | "pattern_str", 174 | help="Path to file in archive", 175 | type=click.Path(exists=False), 176 | ) 177 | @click.option( 178 | "--verbose", "-v", default=False, is_flag=True, help="Provide a progress report" 179 | ) 180 | @click.pass_context 181 | def cmd_restore( 182 | ctx, 183 | destination: str, 184 | timestamp: Optional[int] = None, 185 | pattern_str: Optional[str] = None, 186 | verbose: bool = False, 187 | ): 188 | """ 189 | Restore from the archive 190 | """ 191 | config: Config = ctx.obj["config"] 192 | database.connect(config.index.path) 193 | 194 | if not timestamp: 195 | timestamp = int(time()) 196 | 197 | report_class: Type[Reporter] = NullReporter 198 | if verbose: 199 | report_class = StdoutReporter 200 | 201 | restored: Dict[str, Union[bool, SeracException]] = restore( 202 | archive_config=config.archive, 203 | timestamp=timestamp, 204 | destination_path=Path(destination), 205 | pattern=Pattern(pattern_str), 206 | missing_ok=True, 207 | report_class=report_class, 208 | ) 209 | 210 | if restored: 211 | if verbose: 212 | sys.stdout.write( 213 | f"Restored {len(restored)} file{'' if len(restored) == 1 else 's'}\n" 214 | ) 215 | else: 216 | raise click.ClickException(f"Path not found") 217 | 218 | database.disconnect() 219 | -------------------------------------------------------------------------------- /serac/index/models.py: -------------------------------------------------------------------------------- 1 | """ 2 | Database models 3 | """ 4 | from __future__ import annotations 5 | 6 | import grp 7 | import pwd 8 | from datetime import datetime 9 | from enum import IntEnum 10 | from hashlib import sha256 11 | from pathlib import Path 12 | from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union 13 | 14 | from peewee import CharField, ForeignKeyField, IntegerField, TextField 15 | 16 | from .database import EnumField, Model, PathField 17 | 18 | 19 | if TYPE_CHECKING: 20 | from ..config import ArchiveConfig # pragma: no cover 21 | 22 | 23 | _uid_cache: Dict[int, str] = {} 24 | _gid_cache: Dict[int, str] = {} 25 | 26 | 27 | def uid_to_name(uid: int) -> str: 28 | """ 29 | Given a system user, try to resolve it on the current system 30 | """ 31 | if uid not in _uid_cache: 32 | try: 33 | _uid_cache[uid] = pwd.getpwuid(uid).pw_name 34 | except (AttributeError, KeyError): 35 | _uid_cache[uid] = str(uid) 36 | return _uid_cache[uid] 37 | 38 | 39 | def gid_to_name(gid: int) -> str: 40 | """ 41 | Given a system group, try to resolve it on the current system 42 | """ 43 | if gid not in _gid_cache: 44 | try: 45 | _gid_cache[gid] = grp.getgrgid(gid).gr_name 46 | except (AttributeError, KeyError): 47 | _gid_cache[gid] = str(gid) 48 | return _gid_cache[gid] 49 | 50 | 51 | class Action(IntEnum): 52 | ADD = 1 53 | CONTENT = 2 54 | METADATA = 3 55 | DELETE = 4 56 | 57 | 58 | class Archived(Model): 59 | """ 60 | Represents an object stored in the archive 61 | 62 | Identified by a sha256 hash 63 | """ 64 | 65 | hash: Union[TextField, str] = CharField(max_length=64) 66 | size: Union[IntegerField, int] = IntegerField() 67 | 68 | def get_human_size(self): 69 | size = self.size 70 | for unit in ["", "K", "M", "G", "T"]: 71 | if size < 1024: 72 | break 73 | if unit != "T": 74 | size /= 1024.0 75 | return size, unit 76 | 77 | 78 | class File(Model): 79 | """ 80 | A file at a path 81 | """ 82 | 83 | path: Union[PathField, Path] = PathField() 84 | archived: Union[ForeignKeyField, Archived] = ForeignKeyField( 85 | Archived, backref="files" 86 | ) 87 | action: Union[EnumField, Action] = EnumField(Action) 88 | last_modified: Union[IntegerField, int] = IntegerField() 89 | owner: Union[IntegerField, int] = IntegerField() 90 | group: Union[IntegerField, int] = IntegerField() 91 | permissions: Union[IntegerField, int] = IntegerField() 92 | 93 | _meta_fields = [ 94 | # attributes 95 | "last_modified", 96 | "owner", 97 | "group", 98 | "permissions", 99 | # 100 | # cached property, calculated or read from self.archived 101 | "size", 102 | ] 103 | _cached_hash: Optional[str] = None 104 | _size: Optional[int] = None 105 | 106 | def __str__(self): 107 | return str(self.path) 108 | 109 | def __eq__(self, other) -> bool: 110 | """ 111 | Check if path and metadata match 112 | """ 113 | return self.path == other.path and all( 114 | [getattr(self, attr) == getattr(other, attr) for attr in self._meta_fields] 115 | ) 116 | 117 | def clone(self, **overrides) -> File: 118 | # Copy all field values 119 | attrs: Dict[str, Any] = { 120 | field_name: getattr(self, field_name) 121 | for field_name in File._meta.fields.keys() 122 | if field_name not in ["id", "archived"] 123 | } 124 | try: 125 | attrs["archived"] = self.archived 126 | except Archived.DoesNotExist: 127 | pass 128 | attrs.update(overrides) 129 | return File(**attrs) 130 | 131 | def refresh_metadata_from_disk(self) -> None: 132 | """ 133 | Update metadata by checking the path on disk 134 | """ 135 | if not self.path.exists(): 136 | raise ValueError(f"File {self.path} has disappeared") 137 | if not self.path.is_file(): 138 | raise ValueError(f"File {self.path} is not a file") 139 | stat = self.path.stat() 140 | self.last_modified = int(stat.st_mtime) 141 | self._size = stat.st_size 142 | self.owner = stat.st_uid 143 | self.group = stat.st_gid 144 | self.permissions = stat.st_mode 145 | 146 | @property 147 | def size(self) -> int: 148 | if self._size is None: 149 | try: 150 | if self.archived: 151 | return self.archived.size 152 | except Archived.DoesNotExist: 153 | raise ValueError("Cannot access size without metadata") 154 | return self._size # type: ignore # mypy doesn't understand 155 | 156 | @property 157 | def owner_display(self) -> str: 158 | """ 159 | Return the owner username according to this system 160 | """ 161 | return uid_to_name(self.owner) 162 | 163 | @property 164 | def group_display(self) -> str: 165 | """ 166 | Return the owner username according to this system 167 | """ 168 | return gid_to_name(self.group) 169 | 170 | @property 171 | def permissions_display(self) -> str: 172 | """ 173 | Return permissions as a human-readable 10 character string, eg: 174 | 175 | -rwxr-xr-x 176 | """ 177 | if not self.permissions: 178 | return "-" * 10 179 | parts = ["-"] 180 | bits = [(4, "r"), (2, "w"), (1, "x")] 181 | for perm_char in oct(self.permissions)[-3:]: 182 | perm = int(perm_char) 183 | for bit, label in bits: 184 | if perm >= bit: 185 | parts.append(label) 186 | perm -= bit 187 | else: 188 | parts.append("-") 189 | return "".join(parts) 190 | 191 | def get_human_last_modified(self) -> List[str]: 192 | """ 193 | Return last modified date as tuple ready to be rendered as a 194 | human-readable string:: 195 | 196 | (month_abbr, day_num, year, HH:MM) 197 | """ 198 | if not self.last_modified: 199 | return ["", "", "", ""] 200 | 201 | dt = datetime.utcfromtimestamp(self.last_modified) 202 | dt_local = dt.astimezone() 203 | return dt_local.strftime("%b %d %Y %H:%M").split(" ") 204 | 205 | def calculate_hash(self, force=False) -> str: 206 | """ 207 | Calculate file hash 208 | """ 209 | # Based on: 210 | # https://gist.github.com/aunyks/042c2798383f016939c40aa1be4f4aaf 211 | if not self._cached_hash: 212 | # Specify how many bytes of the file you want to open at a time 213 | block_size = 65536 214 | sha = sha256() 215 | with self.path.open("rb") as file: 216 | file_buffer = file.read(block_size) 217 | while len(file_buffer) > 0: 218 | sha.update(file_buffer) 219 | file_buffer = file.read(block_size) 220 | 221 | self._cached_hash = sha.hexdigest() 222 | 223 | return self._cached_hash 224 | 225 | def archive(self, archive_config: ArchiveConfig) -> None: 226 | """ 227 | Push to the archive 228 | 229 | Creates Archived object and sets it on this object, saving this File object 230 | """ 231 | # Ensure this object is not yet in the database 232 | # If it is, it will already have been archived (File.archived is required) 233 | if self.id: 234 | raise ValueError("Cannot archive a file twice") 235 | 236 | # Create Archived object with hash to get ID 237 | # This should be created regardless of whether the archive succeeds 238 | archived = Archived.create(hash=self.calculate_hash(), size=self.size) 239 | 240 | try: 241 | # Store the file 242 | archive_config.storage.store( 243 | local_path=self.path, 244 | archive_id=str(archived.id), 245 | password=archive_config.password, 246 | ) 247 | 248 | except Exception as e: 249 | # Null the Archived hash rather than delete it, to prevent it being reused 250 | archived.hash = "" 251 | archived.save() 252 | raise ValueError(f"Unable to archive {self.path}: {e}") 253 | 254 | else: 255 | # Link archived object to this file 256 | self.archived = archived 257 | self.save() 258 | 259 | def restore(self, archive_config: ArchiveConfig, to: Path) -> None: 260 | """ 261 | Restore from the archive 262 | """ 263 | archive_config.storage.retrieve( 264 | local_path=to, 265 | archive_id=str(self.archived.id), 266 | password=archive_config.password, 267 | ) 268 | -------------------------------------------------------------------------------- /tests/index/test_index__restore.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test restore() in serac/index/index.py 3 | """ 4 | from datetime import datetime 5 | from pathlib import Path 6 | from time import time 7 | 8 | import pytest 9 | 10 | from serac.exceptions import FileExists, SeracException 11 | from serac.index.index import Pattern, restore 12 | 13 | from .test_index import IndexTestBase 14 | 15 | 16 | class TestIndexRestore(IndexTestBase): 17 | def test_restore_file__from_head__restores_single_file(self, fs, freezer): 18 | initial_time, update_time = self.mock_two_states(fs, freezer) 19 | restored = restore( 20 | archive_config=self.get_archive_config(), 21 | timestamp=int(time()), 22 | destination_path=Path("/retrieved"), 23 | pattern=Pattern("/src/dir/three.txt"), 24 | ) 25 | 26 | assert restored == {"/src/dir/three.txt": True} 27 | assert Path("/retrieved/three.txt").is_file() 28 | assert Path("/retrieved/three.txt").read_text() == "updated" 29 | 30 | def test_restore_file__from_past__restores_single_file(self, fs, freezer): 31 | initial_time, update_time = self.mock_two_states(fs, freezer) 32 | restored = restore( 33 | archive_config=self.get_archive_config(), 34 | timestamp=int(initial_time.timestamp()), 35 | destination_path=Path("/retrieved"), 36 | pattern=Pattern("/src/dir/three.txt"), 37 | ) 38 | 39 | assert restored == {"/src/dir/three.txt": True} 40 | assert Path("/retrieved").is_dir() 41 | assert Path("/retrieved/three.txt").is_file() 42 | assert Path("/retrieved/three.txt").read_text() == "three" 43 | 44 | def test_restore_dir__from_head__restores_some_files(self, fs, freezer): 45 | initial_time, update_time = self.mock_two_states(fs, freezer) 46 | restored = restore( 47 | archive_config=self.get_archive_config(), 48 | timestamp=int(time()), 49 | destination_path=Path("/retrieved"), 50 | pattern=Pattern("/src/dir"), 51 | ) 52 | 53 | assert restored == { 54 | "/src/dir/three.txt": True, 55 | "/src/dir/four.txt": True, 56 | "/src/dir/subdir/five.txt": True, 57 | } 58 | assert Path("/retrieved/three.txt").is_file() 59 | assert Path("/retrieved/three.txt").read_text() == "updated" 60 | assert Path("/retrieved/four.txt").is_file() 61 | assert Path("/retrieved/four.txt").read_text() == "four" 62 | assert Path("/retrieved/subdir/five.txt").is_file() 63 | assert Path("/retrieved/subdir/five.txt").read_text() == "five" 64 | 65 | def test_restore_dir__from_past__restores_some_files(self, fs, freezer): 66 | initial_time, update_time = self.mock_two_states(fs, freezer) 67 | restored = restore( 68 | archive_config=self.get_archive_config(), 69 | timestamp=int(initial_time.timestamp()), 70 | destination_path=Path("/retrieved"), 71 | pattern=Pattern("/src/dir"), 72 | ) 73 | 74 | assert restored == { 75 | "/src/dir/three.txt": True, 76 | "/src/dir/four.txt": True, 77 | "/src/dir/subdir/five.txt": True, 78 | } 79 | assert Path("/retrieved/three.txt").is_file() 80 | assert Path("/retrieved/three.txt").read_text() == "three" 81 | assert Path("/retrieved/four.txt").is_file() 82 | assert Path("/retrieved/four.txt").read_text() == "four" 83 | assert Path("/retrieved/subdir/five.txt").is_file() 84 | assert Path("/retrieved/subdir/five.txt").read_text() == "five" 85 | 86 | def test_restore_all__from_head__restores_all_files(self, fs, freezer): 87 | initial_time, update_time = self.mock_two_states(fs, freezer) 88 | restored = restore( 89 | archive_config=self.get_archive_config(), 90 | timestamp=int(time()), 91 | destination_path=Path("/retrieved"), 92 | ) 93 | 94 | assert restored == { 95 | "/src/one.txt": True, 96 | "/src/two.txt": True, 97 | "/src/dir/three.txt": True, 98 | "/src/dir/four.txt": True, 99 | "/src/dir/subdir/five.txt": True, 100 | } 101 | assert Path("/retrieved/src/one.txt").is_file() 102 | assert Path("/retrieved/src/one.txt").read_text() == "one" 103 | assert Path("/retrieved/src/two.txt").is_file() 104 | assert Path("/retrieved/src/two.txt").read_text() == "two" 105 | assert Path("/retrieved/src/dir/three.txt").is_file() 106 | assert Path("/retrieved/src/dir/three.txt").read_text() == "updated" 107 | assert Path("/retrieved/src/dir/four.txt").is_file() 108 | assert Path("/retrieved/src/dir/four.txt").read_text() == "four" 109 | assert Path("/retrieved/src/dir/subdir/five.txt").is_file() 110 | assert Path("/retrieved/src/dir/subdir/five.txt").read_text() == "five" 111 | 112 | def test_restore_all__from_past__restores_all_files(self, fs, freezer): 113 | initial_time, update_time = self.mock_two_states(fs, freezer) 114 | restored = restore( 115 | archive_config=self.get_archive_config(), 116 | timestamp=int(initial_time.timestamp()), 117 | destination_path=Path("/retrieved"), 118 | ) 119 | 120 | assert restored == { 121 | "/src/one.txt": True, 122 | "/src/two.txt": True, 123 | "/src/dir/three.txt": True, 124 | "/src/dir/four.txt": True, 125 | "/src/dir/subdir/five.txt": True, 126 | } 127 | assert Path("/retrieved/src/one.txt").is_file() 128 | assert Path("/retrieved/src/one.txt").read_text() == "one" 129 | assert Path("/retrieved/src/two.txt").is_file() 130 | assert Path("/retrieved/src/two.txt").read_text() == "two" 131 | assert Path("/retrieved/src/dir/three.txt").is_file() 132 | assert Path("/retrieved/src/dir/three.txt").read_text() == "three" 133 | assert Path("/retrieved/src/dir/four.txt").is_file() 134 | assert Path("/retrieved/src/dir/four.txt").read_text() == "four" 135 | assert Path("/retrieved/src/dir/subdir/five.txt").is_file() 136 | assert Path("/retrieved/src/dir/subdir/five.txt").read_text() == "five" 137 | 138 | def test_restore_missing__missing_ok__returns_zero(self, fs, freezer): 139 | initial_time, update_time = self.mock_two_states(fs, freezer) 140 | restored = restore( 141 | archive_config=self.get_archive_config(), 142 | timestamp=int(time()), 143 | destination_path=Path("/retrieved"), 144 | pattern=Pattern("/does/not.exist"), 145 | missing_ok=True, 146 | ) 147 | assert restored == {} 148 | 149 | def test_restore_missing__missing_not_ok__raises_exception(self, fs, freezer): 150 | initial_time, update_time = self.mock_two_states(fs, freezer) 151 | 152 | with pytest.raises(SeracException) as e: 153 | restore( 154 | archive_config=self.get_archive_config(), 155 | timestamp=int(time()), 156 | destination_path=Path("/retrieved"), 157 | pattern=Pattern("/does/not.exist"), 158 | missing_ok=False, 159 | ) 160 | assert str(e.value) == "Requested path not found in archive" 161 | 162 | def test_restore_missing_empty__missing_not_ok__raises_exception(self, fs, freezer): 163 | with pytest.raises(SeracException) as e: 164 | restore( 165 | archive_config=self.get_archive_config(), 166 | timestamp=int(time()), 167 | destination_path=Path("/retrieved"), 168 | missing_ok=False, 169 | ) 170 | assert str(e.value) == "Archive is empty" 171 | 172 | def test_state_at_datetime__raise_exception(self, fs): 173 | now = datetime.now() 174 | 175 | with pytest.raises(ValueError) as e: 176 | restore( 177 | archive_config=self.get_archive_config(), 178 | timestamp=now, 179 | destination_path=Path("/retrieved"), 180 | missing_ok=False, 181 | ) 182 | assert str(e.value) == "Can only restore using a timestamp" 183 | 184 | def test_restore_dir__file_exists__restores_other_files(self, fs, freezer): 185 | initial_time, update_time = self.mock_two_states(fs, freezer) 186 | Path("/retrieved/three.txt").write_text("original") 187 | restored = restore( 188 | archive_config=self.get_archive_config(), 189 | timestamp=int(time()), 190 | destination_path=Path("/retrieved"), 191 | pattern=Pattern("/src/dir"), 192 | ) 193 | 194 | assert len(restored) == 3 195 | assert isinstance(restored["/src/dir/three.txt"], FileExists) 196 | assert restored["/src/dir/four.txt"] is True 197 | assert restored["/src/dir/subdir/five.txt"] is True 198 | 199 | assert Path("/retrieved/three.txt").is_file() 200 | assert Path("/retrieved/three.txt").read_text() == "original" 201 | assert Path("/retrieved/four.txt").is_file() 202 | assert Path("/retrieved/four.txt").read_text() == "four" 203 | assert Path("/retrieved/subdir/five.txt").is_file() 204 | assert Path("/retrieved/subdir/five.txt").read_text() == "five" 205 | -------------------------------------------------------------------------------- /tests/mocks.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mock objects 3 | """ 4 | import shutil 5 | import socket 6 | import threading 7 | from datetime import datetime 8 | from pathlib import Path 9 | from time import time 10 | from typing import IO, Type 11 | 12 | import boto3 13 | from peewee import Database, SqliteDatabase 14 | 15 | from serac import storage 16 | from serac.config import ArchiveConfig 17 | from serac.index import database, models 18 | from serac.storage import Storage 19 | 20 | 21 | class BaseTest: 22 | """ 23 | Abstract base for base test classes 24 | 25 | Simplifies using multiple base test classes on a single test class 26 | """ 27 | 28 | def setup_method(self): 29 | pass 30 | 31 | def teardown_method(self): 32 | pass 33 | 34 | 35 | class DatabaseTest(BaseTest): 36 | """ 37 | Base for test classes which use the database 38 | """ 39 | 40 | def setup_method(self): 41 | database.create_db(path=Path(":memory:")) 42 | super().setup_method() 43 | 44 | def teardown_method(self): 45 | database.get_current_db().close() 46 | super().teardown_method() 47 | 48 | 49 | class FilesystemTest(BaseTest): 50 | """ 51 | Base for test classes which use the file system 52 | """ 53 | 54 | def fix_boto(self, fs): 55 | """ 56 | pyfakefs is incompatible with boto - this will pass through the boto 57 | package so it will function correctly 58 | """ 59 | boto_dir = Path(boto3.__file__).parent.parent.absolute() 60 | fs.add_real_directory(str(boto_dir), lazy_read=False) 61 | 62 | def mock_fs(self, fs): 63 | """ 64 | Create mock filesystem ready for testing against 65 | """ 66 | fs.create_file("/src/one.txt", contents="one") 67 | fs.create_file("/src/two.txt", contents="two") 68 | fs.create_file("/src/dir/three.txt", contents="three") 69 | fs.create_file("/src/dir/four.txt", contents="four") 70 | fs.create_file("/src/dir/subdir/five.txt", contents="five") 71 | fs.create_file("/alt/six.txt", contents="six") 72 | fs.create_file("/alt/seven.txt", contents="seven") 73 | 74 | def get_archive_config(self): 75 | return ArchiveConfig( 76 | storage=storage.Local(path=Path("/dest/")), password="secret" 77 | ) 78 | 79 | 80 | class LiveFilesystemTest(BaseTest): 81 | """ 82 | Base for test classes which use the real file system 83 | """ 84 | 85 | TEST_ROOT = Path.cwd() 86 | TEST_PATH = TEST_ROOT / "serac-fs-test" 87 | 88 | def setup_method(self): 89 | super().setup_method() 90 | 91 | assert self.TEST_ROOT.is_dir() 92 | assert not self.TEST_PATH.is_dir() 93 | self.TEST_PATH.mkdir(exist_ok=False) 94 | 95 | def mock_fs(self): 96 | self.create_file("src/one.txt", contents="one") 97 | self.create_file("src/two.txt", contents="two") 98 | self.create_file("src/dir/three.txt", contents="three") 99 | self.create_file("src/dir/four.txt", contents="four") 100 | self.create_file("src/dir/subdir/five.txt", contents="five") 101 | self.create_file("alt/six.txt", contents="six") 102 | self.create_file("alt/seven.txt", contents="seven") 103 | 104 | def create_file(self, filename: str, contents: str) -> None: 105 | path = self.TEST_PATH / filename 106 | path.parent.mkdir(exist_ok=True) 107 | with path.open(mode="w") as file: 108 | file.write(contents) 109 | 110 | def get_path(self, filename: str) -> Path: 111 | return self.TEST_PATH / filename 112 | 113 | def teardown_method(self): 114 | super().teardown_method() 115 | shutil.rmtree(self.TEST_PATH) 116 | 117 | 118 | class MockDatabase: 119 | """ 120 | Context manager to create an in-memory sqlite database and create any 121 | Models defined within the context. The database will be closed at the 122 | when execution leaves the context variable's scope, at the end of the test 123 | 124 | Example: 125 | 126 | def test_create(): 127 | with MockDatabase() as tdb: 128 | class FakeModel(Model): 129 | name = CharField() 130 | FakeModel.create(name='test') 131 | """ 132 | 133 | db_cls: Type[Database] 134 | path: Path 135 | test_db: Database 136 | main_db: Database 137 | 138 | def __init__( 139 | self, db_cls: Database = SqliteDatabase, path: Path = Path(":memory:") 140 | ): 141 | self.db_cls = db_cls 142 | self.path = path 143 | 144 | def __enter__(self) -> Database: 145 | """ 146 | Start context by: 147 | * switching current db to test db 148 | * returning self so destructor called at end of test scope, not context 149 | """ 150 | self.test_db = self.db_cls(None) 151 | self.main_db = database.get_current_db() 152 | database.set_current_db(self.test_db) 153 | return self 154 | 155 | def __exit__(self, *args): 156 | """ 157 | End context by: 158 | * creating test db 159 | * switching back to main db 160 | """ 161 | database.create_db(path=self.path, database=self.test_db) 162 | database.set_current_db(self.main_db) 163 | 164 | def __del__(self): 165 | """ 166 | Once this context leaves scope, clean up all records 167 | """ 168 | # Close db - this is not done automatically 169 | self.test_db.close() 170 | 171 | # Remove from models registry 172 | del database.models[self.test_db] 173 | 174 | 175 | class FlawedStorage(Storage): 176 | """ 177 | A storage class which is intentionally flawed and will fail during upload 178 | """ 179 | 180 | # Mark it as abstract so it doesn't get registered and throw the tests 181 | # It will still function for testing purposes 182 | abstract = True 183 | 184 | def write(self, archive_id: str) -> IO[bytes]: 185 | lock = threading.Lock() 186 | 187 | def listen(): 188 | # Start server on port 8000 189 | server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 190 | server.bind(("127.0.0.1", 8000)) 191 | server.listen(1) 192 | lock.release() # can release now the socket is ready 193 | client, addr = server.accept() # blocks until connection is made 194 | client.recv(10) # blocks until data is ready 195 | client.close() 196 | server.close() 197 | 198 | # Lock until listener's socket is ready 199 | lock.acquire() 200 | listener = threading.Thread(target=listen, daemon=True) 201 | listener.start() 202 | 203 | # Listener's socket is ready, connect 204 | lock.acquire() 205 | client = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 206 | client.connect(("127.0.0.1", 8000)) 207 | lock.release() 208 | return client.makefile(mode="wb") 209 | 210 | 211 | class TmpFs: 212 | def __init__(self, filename): 213 | self.filename = filename 214 | self.path = f"/tmp/_serac_test_{self.filename}" 215 | 216 | def __enter__(self): 217 | return self.path 218 | 219 | def __exit__(self, *args): 220 | path = Path(self.path) 221 | if path.exists(): 222 | path.unlink() 223 | 224 | 225 | def gen_archived(**kwargs): 226 | attrs = dict(hash="abc", size=123) 227 | attrs.update(kwargs) 228 | return models.Archived.create(**attrs) 229 | 230 | 231 | def gen_file(**kwargs): 232 | archived = kwargs.get("archived") 233 | if not isinstance(archived, models.Archived): 234 | archived_attrs = {} 235 | if archived is not None: 236 | archived_attrs["hash"] = archived 237 | archived = gen_archived(**archived_attrs) 238 | 239 | attrs = dict( 240 | path="/tmp/foo", 241 | archived=archived, 242 | action=models.Action.ADD, 243 | last_modified=int(time()), 244 | owner=1000, 245 | group=1000, 246 | permissions=644, 247 | ) 248 | attrs.update(kwargs) 249 | 250 | if isinstance(attrs["last_modified"], datetime): 251 | attrs["last_modified"] = int(attrs["last_modified"].timestamp()) 252 | 253 | attrs["path"] = Path(attrs["path"]) 254 | return models.File.create(**attrs) 255 | 256 | 257 | def mock_file_archive(self: models.File, hash: str = "hash") -> None: 258 | self.archived = models.Archived.create(hash=self.calculate_hash(), size=self.size) 259 | self.save() 260 | 261 | 262 | SAMPLE_CONFIG = """# Sample config file 263 | 264 | [source] 265 | # Define the source for the backups 266 | 267 | # List of paths to include and exclude (glob patterns) 268 | include = 269 | /path/to/source 270 | /path/somewhere/else 271 | exclude = 272 | /path/to/source/unprocessed 273 | /path/somewhere/else/*.jpg 274 | 275 | [archive] 276 | # Define where the backups are saved 277 | 278 | {storage} 279 | 280 | # Encrypt backups with this password 281 | password = l0ng_s3cr3t 282 | 283 | [index] 284 | # Define how indexed files are treated 285 | 286 | # Location for index database 287 | # This should then be backed up by another service, eg duplicity 288 | path = /path/to/index.sqlite 289 | """ 290 | 291 | SAMPLE_STORAGE_LOCAL = """# Backup to a local path 292 | storage = local 293 | path = /path/to/backup 294 | """ 295 | 296 | SAMPLE_STORAGE_S3 = """# Backup to S3 297 | storage = s3 298 | key = 4p1_k3y 299 | secret = 53cr3t 300 | bucket = arn:aws:s3:::my_bucket_name 301 | path = path/within/bucket 302 | """ 303 | -------------------------------------------------------------------------------- /serac/index/index.py: -------------------------------------------------------------------------------- 1 | """ 2 | Index management 3 | """ 4 | from __future__ import annotations 5 | 6 | from collections import defaultdict 7 | from collections.abc import Mapping 8 | from fnmatch import fnmatchcase 9 | from glob import iglob 10 | from itertools import chain 11 | from pathlib import Path 12 | from time import time 13 | from typing import Dict, Iterator, List, Optional, Type, Union 14 | 15 | from peewee import fn 16 | 17 | from ..exceptions import SeracException 18 | from ..reporter import NullReporter, Reporter 19 | from .models import TYPE_CHECKING, Action, File 20 | 21 | 22 | if TYPE_CHECKING: 23 | from ..config import ArchiveConfig # pragma: no cover 24 | 25 | 26 | class Changeset: 27 | """ 28 | Set of changes from an index scan 29 | """ 30 | 31 | added: Dict[Path, File] 32 | content: Dict[Path, File] 33 | metadata: Dict[Path, File] 34 | deleted: Dict[Path, File] 35 | 36 | def __init__(self): 37 | self.added = defaultdict(File) 38 | self.content = defaultdict(File) 39 | self.metadata = defaultdict(File) 40 | self.deleted = defaultdict(File) 41 | 42 | def commit( 43 | self, archive_config: ArchiveConfig, report_class: Type[Reporter] = NullReporter 44 | ) -> None: 45 | for file in chain(self.metadata.values(), self.deleted.values()): 46 | report = report_class(str(file.path), "updating") 47 | file.save() 48 | report.complete("updated") 49 | 50 | for file in chain(self.added.values(), self.content.values()): 51 | report = report_class(str(file.path), "archiving") 52 | file.archive(archive_config) 53 | report.complete("archived") 54 | 55 | 56 | class Pattern: 57 | """ 58 | Represent a filter and process matches against a Path 59 | """ 60 | 61 | def __init__(self, pattern: Optional[str]): 62 | self.str = pattern or "" 63 | self.path = Path(self.str) 64 | 65 | def match(self, path: Path) -> bool: 66 | if not self.str or self.path == path or self.path in path.parents: 67 | return True 68 | return False 69 | 70 | def __eq__(self, other): 71 | return self.str == other.str 72 | 73 | def __bool__(self): 74 | return bool(self.str) 75 | 76 | 77 | class State(Mapping): 78 | """ 79 | Represent the state of the index at a specific time 80 | """ 81 | 82 | def __init__(self, files: List[File]): 83 | self._store: Mapping[Path, File] = {file.path: file for file in files} 84 | super().__init__() 85 | 86 | def __getitem__(self, key): 87 | return self._store[key] 88 | 89 | def __iter__(self): 90 | return iter(self._store) 91 | 92 | def __len__(self): 93 | return len(self._store) 94 | 95 | def pop(self, key, default): 96 | return self._store.pop(key, default) 97 | 98 | @classmethod 99 | def at(cls, timestamp: int) -> State: 100 | """ 101 | Get the state of the index at a given timestamp 102 | """ 103 | if not isinstance(timestamp, int): 104 | # This is going to be a common error, and we don't want to convert it 105 | # ourselves - we won't have the timezone info and we'll make a mistake 106 | raise ValueError("Can only get state using a timestamp") 107 | 108 | file_fields = File._meta.sorted_fields + [ 109 | fn.MAX(File.last_modified).alias("latest_modified") 110 | ] 111 | files = ( 112 | File.select(*file_fields) 113 | .where(File.last_modified <= timestamp) 114 | .group_by(File.path) 115 | .having(File.action != Action.DELETE) 116 | ) 117 | return cls(files) 118 | 119 | def by_path(self): 120 | """ 121 | Return a list of files, sorted by path 122 | """ 123 | return sorted(self.values(), key=lambda file: file.path) 124 | 125 | 126 | def is_excluded(path: Path, excludes: List[str]) -> bool: 127 | for pattern in excludes: 128 | if fnmatchcase(str(path), pattern): 129 | return True 130 | return False 131 | 132 | 133 | def scan(includes: List[str], excludes: Optional[List[str]] = None) -> Changeset: 134 | """ 135 | Scan specified path and return a Changeset 136 | """ 137 | path: Path 138 | path_str: str 139 | file: File 140 | 141 | include_paths: Iterator[Path] = chain.from_iterable( 142 | ((Path(globbed) for globbed in iglob(path_str)) for path_str in includes) 143 | ) 144 | 145 | changeset = Changeset() 146 | last_state: State = State.at(timestamp=int(time())) 147 | 148 | while True: 149 | # Get next path 150 | try: 151 | path = next(include_paths) 152 | except StopIteration: 153 | break 154 | 155 | # Run exclusions 156 | if excludes and is_excluded(path, excludes): 157 | continue 158 | 159 | # Examine path 160 | if path.is_dir(): 161 | # Valid path, but we don't index dirs themselves - search it 162 | include_paths = chain(include_paths, path.iterdir()) 163 | continue 164 | 165 | # Create File and collect metadata 166 | file = File(path=path) 167 | file.refresh_metadata_from_disk() 168 | 169 | # Diff path against last_state (removing so we know we've seen it) 170 | last_file = last_state.pop(path, None) 171 | if last_file is None: 172 | # Added 173 | file.action = Action.ADD 174 | changeset.added[path] = file 175 | 176 | elif file != last_file: 177 | # Something changed 178 | 179 | # If last_modified changed, check the hash 180 | file_hash = file.calculate_hash() 181 | if file_hash != last_file.archived.hash: 182 | # Content has changed 183 | file.action = Action.CONTENT 184 | changeset.content[path] = file 185 | else: 186 | # Just metadata 187 | file.action = Action.METADATA 188 | file.archived = last_file.archived 189 | changeset.metadata[path] = file 190 | 191 | # All remaining files in the state were deleted 192 | changeset.deleted = { 193 | path: file.clone(action=Action.DELETE) for path, file in last_state.items() 194 | } 195 | return changeset 196 | 197 | 198 | def search(timestamp: int, pattern: Optional[Pattern] = None) -> State: 199 | """ 200 | Search the index at the specified timestamp matching the specified filter string. 201 | 202 | Returns a dict of {Path: File} 203 | """ 204 | state: State = State.at(timestamp) 205 | path: Path 206 | 207 | if not pattern: 208 | return state 209 | 210 | files: State = State([file for path, file in state.items() if pattern.match(path)]) 211 | return files 212 | 213 | 214 | def restore( 215 | archive_config: ArchiveConfig, 216 | timestamp: int, 217 | destination_path: Path, 218 | pattern: Pattern = None, 219 | missing_ok: bool = False, 220 | report_class: Type[Reporter] = NullReporter, 221 | ) -> Dict[str, Union[bool, SeracException]]: 222 | """ 223 | Restore one or more files as they were at the specified timestamp, to the 224 | specified destination path. 225 | 226 | If no pattern is specified, restores all files with their full paths 227 | under the specified destination path. 228 | 229 | If a pattern is specified, restores that file or all files under that 230 | path into the specified destination path. 231 | 232 | Returns a dict of ``path: True`` or ``path: Exception`` 233 | """ 234 | if not isinstance(timestamp, int): 235 | # This is going to be a common error, and we don't want to convert it 236 | # ourselves - we won't have the timezone info and we'll make a mistake 237 | raise ValueError("Can only restore using a timestamp") 238 | 239 | state = search(timestamp=timestamp, pattern=pattern) 240 | 241 | # Standardise destination path 242 | archive_path: Optional[Path] 243 | if pattern: 244 | archive_path = pattern.path 245 | else: 246 | archive_path = None 247 | if archive_path and archive_path in state: 248 | if destination_path.is_dir(): 249 | destination_path /= archive_path.name 250 | 251 | path: Path 252 | file: File 253 | restored: Dict[str, Union[bool, SeracException]] = {} 254 | for path, file in state.items(): 255 | if not archive_path or archive_path == path or archive_path in path.parents: 256 | report = report_class(str(path), "") 257 | if archive_path: 258 | target_path = destination_path / file.path.relative_to(archive_path) 259 | else: 260 | target_path = destination_path / file.path.relative_to("/") 261 | target_path.parent.mkdir(parents=True, exist_ok=True) 262 | 263 | try: 264 | report.update("restoring") 265 | file.restore(archive_config=archive_config, to=target_path) 266 | report.complete("restored") 267 | restored[str(path)] = True 268 | except SeracException as e: 269 | report.complete(e.short) 270 | restored[str(path)] = e 271 | 272 | if not missing_ok and not restored: 273 | if archive_path: 274 | raise SeracException( 275 | msg="Requested path not found in archive", short="not found" 276 | ) 277 | else: 278 | raise SeracException(msg="Archive is empty", short="archive empty") 279 | 280 | return restored 281 | -------------------------------------------------------------------------------- /tests/index/test_models.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test serac/index/models.py 3 | """ 4 | import grp 5 | import pwd 6 | from datetime import datetime, timedelta 7 | from io import BytesIO 8 | from pathlib import Path 9 | 10 | import pytest 11 | from pyfakefs import fake_filesystem 12 | 13 | from serac import crypto 14 | from serac.config import ArchiveConfig 15 | from serac.index.models import ( 16 | Action, 17 | Archived, 18 | File, 19 | _gid_cache, 20 | _uid_cache, 21 | gid_to_name, 22 | uid_to_name, 23 | ) 24 | from serac.storage import Local 25 | 26 | from ..mocks import DatabaseTest, FilesystemTest, FlawedStorage, gen_file 27 | 28 | 29 | class TestDatabaseTest(DatabaseTest): 30 | """ 31 | Test the DatabaseTest base class operates as expected 32 | """ 33 | 34 | def test_create_first__object_is_only_item(self): 35 | file = gen_file(path="/tmp/foo") 36 | files = File.select() 37 | assert len(files) == 1 38 | assert files[0].path == file.path 39 | 40 | def test_create_second__object_is_only_item(self): 41 | """ 42 | Ensure db is wiped between tests 43 | """ 44 | file = gen_file(path="/tmp/bar") 45 | files = File.select() 46 | assert len(files) == 1 47 | assert files[0].path == file.path 48 | 49 | def test_create_multiple__all_returned(self): 50 | now = datetime.now() 51 | earlier = now - timedelta(days=7) 52 | 53 | file1 = gen_file(path="/tmp/foo", last_modified=earlier) 54 | file2 = gen_file(path="/tmp/foo", last_modified=now) 55 | files = list(File.select()) 56 | 57 | assert len(files) == 2 58 | assert file1 != file2 59 | assert file1 in files 60 | assert file2 in files 61 | 62 | 63 | class TestFile(DatabaseTest, FilesystemTest): 64 | """ 65 | Test the File model 66 | """ 67 | 68 | def test_to_string(self): 69 | file = File(path=Path("/tmp/foo")) 70 | assert str(file) == "/tmp/foo" 71 | 72 | def test_metadata__file_missing__raises_exception(self, fs): 73 | file = File(path=Path("/tmp/foo")) 74 | with pytest.raises(ValueError) as e: 75 | file.refresh_metadata_from_disk() 76 | assert str(e.value) == "File /tmp/foo has disappeared" 77 | 78 | def test_metadata__file_not_a_file__raises_exception(self, fs): 79 | fs.create_dir("/tmp/foo") 80 | file = File(path=Path("/tmp/foo")) 81 | with pytest.raises(ValueError) as e: 82 | file.refresh_metadata_from_disk() 83 | assert str(e.value) == "File /tmp/foo is not a file" 84 | 85 | def test_metadata__collected(self, freezer, fs): 86 | frozen_time = datetime(2001, 1, 1, 1, 1, 1) 87 | freezer.move_to(frozen_time) 88 | 89 | uid = 100 90 | gid = 200 91 | fake_filesystem.set_uid(uid) 92 | fake_filesystem.set_gid(gid) 93 | fs.create_file("/tmp/foo", contents="unencrypted") 94 | file = File(path=Path("/tmp/foo")) 95 | 96 | file.refresh_metadata_from_disk() 97 | assert file.size == len("unencrypted") 98 | assert file.last_modified == frozen_time.timestamp() 99 | assert file.owner == uid 100 | assert file.group == gid 101 | 102 | def test_metadata__collected__last_modified_change_detected(self, fs, freezer): 103 | # Create file 104 | frozen_time = datetime(2001, 1, 1, 1, 1, 1) 105 | freezer.move_to(frozen_time) 106 | fake_file = fs.create_file("/tmp/foo", contents="unencrypted") 107 | file = File(path=Path("/tmp/foo")) 108 | file.refresh_metadata_from_disk() 109 | 110 | # Modify file 111 | frozen_time_modified = datetime(2001, 1, 1, 1, 1, 2) 112 | freezer.move_to(frozen_time_modified) 113 | with file.path.open("w") as handle: 114 | handle.write("modified") 115 | fake_file.st_mtime = datetime.timestamp(frozen_time_modified) 116 | file_modified = file.clone() 117 | file_modified.refresh_metadata_from_disk() 118 | 119 | # Last modified dates are different, objects are not the same 120 | assert file.last_modified == frozen_time.timestamp() 121 | assert file_modified.last_modified == frozen_time_modified.timestamp() 122 | assert file != file_modified 123 | 124 | def test_size__before_metadata__raises_exception(self, fs): 125 | file = File(path=Path("/tmp/foo")) 126 | with pytest.raises(ValueError) as e: 127 | file.size 128 | assert str(e.value) == "Cannot access size without metadata" 129 | 130 | def test_archive(self, fs): 131 | fs.create_file("/src/foo", contents="unencrypted") 132 | fs.create_dir("/dest") 133 | file = File(path=Path("/src/foo"), action=Action.ADD) 134 | archive_config = ArchiveConfig( 135 | storage=Local(path=Path("/dest/")), password="secret" 136 | ) 137 | file.refresh_metadata_from_disk() 138 | file.archive(archive_config) 139 | 140 | # Check Archived db object exists 141 | assert file.archived.id > 0 142 | archives = Archived.select() 143 | assert len(archives) == 1 144 | assert archives[0].id == file.archived.id 145 | 146 | # Check file exists in/dest/ 147 | dest_path = Path(f"/dest/{file.archived.id}") 148 | assert dest_path.is_file() 149 | 150 | # Check it has been encrypted and we can decrypt it 151 | decrypted = BytesIO() 152 | with dest_path.open("rb") as handle: 153 | crypto.decrypt(handle, decrypted, "secret", dest_path.stat().st_size) 154 | assert str(decrypted.getvalue(), "utf-8") == "unencrypted" 155 | 156 | def test_archive_file__storage_broken__error_raised(self, fs): 157 | # Create a file with enough data to overwhelm the kernel buffer 158 | fs.create_file("/src/foo", contents="unencrypted" * 1024 * 1024) 159 | file = File(path=Path("/src/foo"), action=Action.ADD) 160 | archive_config = ArchiveConfig(storage=FlawedStorage(), password="secret") 161 | file.refresh_metadata_from_disk() 162 | 163 | with pytest.raises(ValueError) as e: 164 | file.archive(archive_config) 165 | assert str(e.value).startswith("Unable to archive /src/foo: ") 166 | 167 | # Check File object does not exist in db 168 | assert file.id is None 169 | files = File.select() 170 | assert len(files) == 0 171 | 172 | # Check the archived object does exist 173 | archives = Archived.select() 174 | assert len(archives) == 1 175 | assert archives[0].hash == "" 176 | 177 | def test_archive_twice__action_forbidden_error_raised(self, fs): 178 | # This is such an edge case, but test for it to be safe 179 | fs.create_file("/src/foo", contents="unencrypted") 180 | fs.create_dir("/dest") 181 | file = File(path=Path("/src/foo"), action=Action.ADD) 182 | archive_config = ArchiveConfig( 183 | storage=Local(path=Path("/dest/")), password="secret" 184 | ) 185 | file.refresh_metadata_from_disk() 186 | file.archive(archive_config) 187 | 188 | # Now archive it again 189 | with pytest.raises(ValueError) as e: 190 | file.archive(archive_config) 191 | assert str(e.value) == "Cannot archive a file twice" 192 | 193 | def test_restore(self, fs): 194 | archive_config = ArchiveConfig( 195 | storage=Local(path=Path("/dest/")), password="secret" 196 | ) 197 | 198 | # Create an archived file 199 | fs.create_file("/src/foo", contents="unencrypted") 200 | fs.create_dir("/dest") 201 | file = File(path=Path("/src/foo"), action=Action.ADD) 202 | file.refresh_metadata_from_disk() 203 | file.archive(archive_config) 204 | 205 | # Now check we can restore it 206 | fs.create_dir("/restore") 207 | file.restore(archive_config, to=Path("/restore/file")) 208 | decrypted = Path("/restore/file") 209 | with decrypted.open("r") as handle: 210 | assert handle.read() == "unencrypted" 211 | 212 | 213 | class TestUserGroup: 214 | """ 215 | Test uid_to_name, gid_to_name, File.user_display and File.group_display 216 | 217 | We're not disabling the caches, so each test needs to use a separate ID 218 | """ 219 | 220 | def teardown_method(self): 221 | _uid_cache.clear() 222 | _gid_cache.clear() 223 | 224 | def test_uid_known__returns_name(self, monkeypatch, mocker): 225 | def return_name(uid): 226 | assert uid == 100 227 | obj = mocker.MagicMock(name="pw_name") 228 | obj.pw_name = "foo" 229 | return obj 230 | 231 | monkeypatch.setattr(pwd, "getpwuid", return_name) 232 | 233 | assert uid_to_name(100) == "foo" 234 | 235 | def test_uid_unknown__returns_uid(self, monkeypatch): 236 | def return_name(uid): 237 | raise KeyError("Unknown") 238 | 239 | monkeypatch.setattr(pwd, "getpwuid", return_name) 240 | 241 | assert uid_to_name(100) == "100" 242 | 243 | def test_file_user_display(self, monkeypatch): 244 | # Monkeypatch something to raise an AttributeError and return the ID as a str 245 | monkeypatch.setattr(pwd, "getpwuid", lambda x: None) 246 | file = File() 247 | file.owner = 100 248 | assert file.owner_display == "100" 249 | 250 | def test_gid_known__returns_name(self, monkeypatch, mocker): 251 | def return_name(gid): 252 | assert gid == 100 253 | obj = mocker.MagicMock(name="gr_name") 254 | obj.gr_name = "foo" 255 | return obj 256 | 257 | monkeypatch.setattr(grp, "getgrgid", return_name) 258 | 259 | assert gid_to_name(100) == "foo" 260 | 261 | def test_gid_unknown__returns_gid(self, monkeypatch): 262 | def return_name(gid): 263 | raise KeyError("Unknown") 264 | 265 | monkeypatch.setattr(grp, "getgrgid", return_name) 266 | 267 | assert gid_to_name(100) == "100" 268 | 269 | def test_file_group_display(self, monkeypatch): 270 | monkeypatch.setattr(grp, "getgrgid", lambda x: None) 271 | file = File() 272 | file.group = 100 273 | assert file.group_display == "100" 274 | 275 | 276 | class TestFilePermissions: 277 | """ 278 | Test of File.permissions_display 279 | """ 280 | 281 | def assert_permission(self, mask, human): 282 | file = File() 283 | file.permissions = int(str(mask), 8) 284 | assert file.permissions_display == human 285 | 286 | def test_no_bits(self): 287 | self.assert_permission(0, "-" * 10) 288 | 289 | def test_read_all(self): 290 | self.assert_permission(444, "-r--r--r--") 291 | 292 | def test_read_write_owner__read_others(self): 293 | self.assert_permission(644, "-rw-r--r--") 294 | 295 | def test_read_write_execute_owner__read_execute_group__read_public(self): 296 | self.assert_permission(754, "-rwxr-xr--") 297 | 298 | def test_execute_owner__write_group__write_execute_public(self): 299 | self.assert_permission(123, "---x-w--wx") 300 | 301 | 302 | class TestFileHumanLastModified: 303 | """ 304 | Test of File.get_human_last_modified 305 | """ 306 | 307 | def test_not_set(self): 308 | file = File() 309 | assert file.get_human_last_modified() == ["", "", "", ""] 310 | 311 | def test_timestamp_converted(self): 312 | now = datetime(2001, 1, 2, 3, 4, 5) 313 | file = File() 314 | file.last_modified = now.timestamp() 315 | assert file.get_human_last_modified() == ["Jan", "02", "2001", "03:04"] 316 | 317 | 318 | class TestArchivedHumanSize: 319 | """ 320 | Test of Archived.get_human_size 321 | """ 322 | 323 | def assert_size(self, bytes, size, unit): 324 | archived = Archived() 325 | archived.size = bytes 326 | actual = archived.get_human_size() 327 | assert actual == (size, unit) 328 | 329 | def test_bytes(self): 330 | self.assert_size(1000, 1000, "") 331 | 332 | def test_kibibytes(self): 333 | self.assert_size(1000 * 1024, 1000, "K") 334 | 335 | def test_mebibyte(self): 336 | self.assert_size(1000 * 1024 * 1024, 1000, "M") 337 | 338 | def test_gibibyte(self): 339 | self.assert_size(1000 * 1024 * 1024 * 1024, 1000, "G") 340 | 341 | def test_tebibyte(self): 342 | self.assert_size(1000 * 1024 * 1024 * 1024 * 1024, 1000, "T") 343 | 344 | def test_pebibyte(self): 345 | self.assert_size(1000 * 1024 * 1024 * 1024 * 1024 * 1024, 1000 * 1024, "T") 346 | -------------------------------------------------------------------------------- /tests/test_commands.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test serac/commands.py 3 | """ 4 | from datetime import datetime 5 | from pathlib import Path 6 | from time import time 7 | 8 | from click.testing import CliRunner 9 | from peewee import SqliteDatabase 10 | 11 | from serac.commands import Timestamp, cli 12 | from serac.config import ArchiveConfig 13 | from serac.index.database import get_current_db, set_current_db 14 | from serac.index.index import Pattern, State 15 | from serac.index.models import Archived, File 16 | from serac.reporter import NullReporter 17 | 18 | from .mocks import ( 19 | SAMPLE_CONFIG, 20 | SAMPLE_STORAGE_LOCAL, 21 | DatabaseTest, 22 | FilesystemTest, 23 | TmpFs, 24 | ) 25 | 26 | 27 | # Timestamp to use in tests - 2001-01-30 00:00:00 28 | JAN_30 = 980812800 29 | 30 | 31 | class CliTestMixin: 32 | def run(self, *args): 33 | """ 34 | Run command, config as first argument 35 | """ 36 | runner = CliRunner() 37 | return runner.invoke(cli, args, obj={}, catch_exceptions=False) 38 | 39 | def cmd(self, fs, *args, index=None): 40 | """ 41 | Create valid config and run specified command 42 | """ 43 | self.gen_config(fs, "config.conf", index=index) 44 | return self.run("config.conf", *args) 45 | 46 | def gen_config(self, fs, filename, storage=SAMPLE_STORAGE_LOCAL, index=None): 47 | """ 48 | Generate config file 49 | """ 50 | # Create required paths 51 | if not Path("/path/to").exists(): 52 | fs.create_dir("/path/to") 53 | if not Path("/path/to/backup").exists(): 54 | fs.create_dir("/path/to/backup") 55 | contents = contents = SAMPLE_CONFIG.format(storage=storage) 56 | if index is not None: 57 | contents = contents.replace("/path/to/index.sqlite", index) 58 | 59 | fs.create_file(filename, contents=contents) 60 | 61 | 62 | class TestCommandTest(CliTestMixin, DatabaseTest, FilesystemTest): 63 | def test_config_does_not_exist__raises_error(self): 64 | result = self.run("does_not_exist.conf", "test") 65 | assert result.exit_code == 2 66 | assert 'File "does_not_exist.conf" does not exist.' in result.output 67 | 68 | def test_config_does_exist_but_invalid__raises_error(self, fs): 69 | self.gen_config(fs, "invalid.conf", storage="INVALID") 70 | result = self.run("invalid.conf", "test") 71 | assert result.exit_code == 1 72 | assert "Invalid config: Source contains parsing errors" in result.output 73 | assert "[line 17]: 'INVALID\\n'" in result.output 74 | 75 | def test_config_does_exist_and_is_valid__passes(self, fs): 76 | self.gen_config(fs, "valid.conf") 77 | result = self.run("valid.conf", "test") 78 | assert result.exit_code == 0 79 | assert "Config file syntax is correct" in result.output 80 | 81 | 82 | class TestCommandInit(CliTestMixin, FilesystemTest): 83 | def test_database_does_not_exist__database_created(self, fs): 84 | # Database uses C libraries so doesn't work with pyfakefs 85 | with TmpFs("index.sqlite") as filename: 86 | test_db = SqliteDatabase(None) 87 | main_db = get_current_db() 88 | set_current_db(test_db) 89 | 90 | result = self.cmd(fs, "init", index=filename) 91 | fs.add_real_file(filename) 92 | 93 | set_current_db(main_db) 94 | 95 | assert result.exit_code == 0 96 | assert Path(filename).is_file() 97 | assert "Index database created" in result.output 98 | 99 | def test_database_exists__exception_raised(self, fs): 100 | # Check is done in python 101 | filename = "/tmp/index.sqlite" 102 | path = Path(filename) 103 | path.touch() 104 | result = self.cmd(fs, "init", index=filename) 105 | assert result.exit_code == 1 106 | assert f"Index database {filename} already exists" in result.output 107 | 108 | 109 | class TestCommandArchive(CliTestMixin, FilesystemTest): 110 | def test_archive(self, fs, mocker): 111 | mocked_changeset = mocker.MagicMock() 112 | mocked_scan = mocker.patch("serac.commands.scan", return_value=mocked_changeset) 113 | mocked_db_connect = mocker.patch("serac.index.database.connect") 114 | mocked_db_disconnect = mocker.patch("serac.index.database.disconnect") 115 | 116 | result = self.cmd(fs, "archive") 117 | mocked_scan.assert_called_once_with( 118 | includes=["/path/to/source", "/path/somewhere/else"], 119 | excludes=["/path/to/source/unprocessed", "/path/somewhere/else/*.jpg"], 120 | ) 121 | mocked_changeset.commit.assert_called_once() 122 | mocked_db_connect.assert_called_once_with(Path("/path/to/index.sqlite")) 123 | mocked_db_disconnect.assert_called_once() 124 | 125 | assert result.exit_code == 0 126 | assert result.output == "" 127 | 128 | 129 | class TestCommandLsBase(CliTestMixin, FilesystemTest, DatabaseTest): 130 | def fake_file(self, i): 131 | paths = [ 132 | "/src/one.txt", 133 | "/src/two.txt", 134 | "/src/dir/three.txt", 135 | "/src/dir/four.txt", 136 | "/src/dir/subdir/five.txt", 137 | "/alt/six.txt", 138 | "/alt/seven.txt", 139 | ] 140 | file = File( 141 | path=Path(paths[i]), 142 | archived=Archived(size=102400), 143 | last_modified=JAN_30 - (i * 60 * 60 * 24), 144 | owner=100, 145 | group=100, 146 | permissions=int(str(644), 8), 147 | ) 148 | return file 149 | 150 | def ls( 151 | self, 152 | fs, 153 | mocker, 154 | timestamp, 155 | pattern, 156 | files=None, 157 | at=None, 158 | expect_error=False, 159 | expect_failure=False, 160 | ): 161 | """ 162 | expect_error: we're expecting an error in the command 163 | expect_failure: we're expecting an error before the command 164 | """ 165 | # Patch out everything 166 | if files is None: 167 | files = [self.fake_file(i) for i in range(7)] 168 | mocked_state = State(files=files) 169 | mocked_search = mocker.patch("serac.commands.search", return_value=mocked_state) 170 | mocked_db_connect = mocker.patch("serac.index.database.connect") 171 | mocked_db_disconnect = mocker.patch("serac.index.database.disconnect") 172 | mocker.patch("serac.index.models.uid_to_name", return_value="user") 173 | mocker.patch("serac.index.models.gid_to_name", return_value="group") 174 | 175 | args = [] 176 | if timestamp: 177 | if at is None: 178 | at = str(timestamp) 179 | args.extend(["--at", at]) 180 | else: 181 | timestamp = int(time()) 182 | if pattern: 183 | args.extend(["--pattern", pattern]) 184 | result = self.cmd(fs, "ls", *args) 185 | 186 | # Check the cmd ran correctly 187 | if not expect_failure: 188 | mocked_search.assert_called_once_with( 189 | timestamp=timestamp, pattern=Pattern(pattern) 190 | ) 191 | mocked_db_connect.assert_called_once_with(Path("/path/to/index.sqlite")) 192 | 193 | if not expect_error: 194 | # Won't disconnect if we exit with an error 195 | mocked_db_disconnect.assert_called_once() 196 | 197 | return result 198 | 199 | def assert_success(self, result): 200 | """ 201 | Check the response was correct 202 | """ 203 | # Because we've mocked out the search fn, we'll always get the same results 204 | assert result.exit_code == 0 205 | assert result.output.splitlines() == [ 206 | "-rw-r--r-- user group 100K {} {} {}".format(*opts) 207 | for opts in [ 208 | ("Jan 24 2001", 980294400, "/alt/seven.txt"), 209 | ("Jan 25 2001", 980380800, "/alt/six.txt"), 210 | ("Jan 27 2001", 980553600, "/src/dir/four.txt"), 211 | ("Jan 26 2001", 980467200, "/src/dir/subdir/five.txt"), 212 | ("Jan 28 2001", 980640000, "/src/dir/three.txt"), 213 | ("Jan 30 2001", 980812800, "/src/one.txt"), 214 | ("Jan 29 2001", 980726400, "/src/two.txt"), 215 | ] 216 | ] 217 | 218 | 219 | class TestCommandLs(TestCommandLsBase): 220 | """ 221 | Test the LS command 222 | """ 223 | 224 | def test_ls__no_args__search_with_no_args(self, fs, mocker, freezer): 225 | # Freeze time 1 year later so we get the year in the output, not hours 226 | freezer.move_to(datetime.utcfromtimestamp(JAN_30 + (60 * 60 * 24 * 365))) 227 | result = self.ls(fs, mocker, timestamp="", pattern="") 228 | self.assert_success(result) 229 | 230 | def test_ls__same_year__shown_with_hours(self, fs, mocker, freezer): 231 | # Freeze time 1 day later so we get the hours in the output, not years 232 | freezer.move_to(datetime.utcfromtimestamp(JAN_30 + (60 * 60 * 24))) 233 | result = self.ls(fs, mocker, timestamp="", pattern="") 234 | assert result.exit_code == 0 235 | assert result.output.splitlines() == [ 236 | "-rw-r--r-- user group 100K {} {} {}".format(*opts) 237 | for opts in [ 238 | ("Jan 24 00:00", 980294400, "/alt/seven.txt"), 239 | ("Jan 25 00:00", 980380800, "/alt/six.txt"), 240 | ("Jan 27 00:00", 980553600, "/src/dir/four.txt"), 241 | ("Jan 26 00:00", 980467200, "/src/dir/subdir/five.txt"), 242 | ("Jan 28 00:00", 980640000, "/src/dir/three.txt"), 243 | ("Jan 30 00:00", 980812800, "/src/one.txt"), 244 | ("Jan 29 00:00", 980726400, "/src/two.txt"), 245 | ] 246 | ] 247 | 248 | def test_ls__timestamp_and_pattern__search_with_all_args(self, fs, mocker): 249 | result = self.ls(fs, mocker, timestamp=JAN_30, pattern="/src/dir") 250 | self.assert_success(result) 251 | 252 | def test_ls__no_results_no_pattern__raises_error(self, fs, mocker): 253 | result = self.ls( 254 | fs, mocker, timestamp="", pattern="", files=[], expect_error=True 255 | ) 256 | assert result.exit_code == 1 257 | assert "No files found" in result.output 258 | 259 | def test_ls__no_results_with_pattern__raises_pattern_error(self, fs, mocker): 260 | result = self.ls( 261 | fs, mocker, timestamp="", pattern="/src", files=[], expect_error=True 262 | ) 263 | assert result.exit_code == 1 264 | assert "No files found at /src" in result.output 265 | 266 | 267 | class TestTimestamp(TestCommandLsBase): 268 | """ 269 | Test the timestamp class (using ls where necessary) 270 | """ 271 | 272 | def test_timestamp_is_string__parsed_ok(self, fs, mocker): 273 | result = self.ls( 274 | fs, mocker, at="2001-01-30", timestamp=JAN_30, pattern="/src/dir" 275 | ) 276 | self.assert_success(result) 277 | 278 | def test_invalid_timestamp__raises_error(self, fs, mocker): 279 | result = self.ls( 280 | fs, mocker, timestamp="wrong", pattern="", files=[], expect_failure=True 281 | ) 282 | assert result.exit_code == 2 283 | assert ( 284 | "invalid datetime format: wrong. (choose from timestamp," in result.output 285 | ) 286 | 287 | def test_metavar__renders_ok(self): 288 | assert ( 289 | Timestamp().get_metavar("x") 290 | == "[timestamp|%Y-%m-%d|%Y-%m-%dT%H:%M:%S|%Y-%m-%d %H:%M:%S]" 291 | ) 292 | 293 | 294 | class TestCommandRestore(CliTestMixin, FilesystemTest, DatabaseTest): 295 | def fake_restored(self, n): 296 | return {f"/path/{i}": True for i in range(n)} 297 | 298 | def test_restore__no_args__restore_with_time(self, fs, mocker, freezer): 299 | # Patch out everything 300 | freezer.move_to(datetime(2001, 1, 30)) 301 | mocked_restore = mocker.patch( 302 | "serac.commands.restore", return_value=self.fake_restored(1) 303 | ) 304 | mocked_db_connect = mocker.patch("serac.index.database.connect") 305 | mocked_db_disconnect = mocker.patch("serac.index.database.disconnect") 306 | 307 | result = self.cmd(fs, "restore", "/dest") 308 | 309 | assert result.exit_code == 0 310 | assert result.output == "" 311 | 312 | archive_config = mocker.MagicMock(spec=ArchiveConfig) 313 | archive_config.__eq__.return_value = True 314 | mocked_restore.assert_called_once_with( 315 | archive_config=archive_config, 316 | timestamp=JAN_30, 317 | destination_path=Path("/dest"), 318 | pattern=Pattern(""), 319 | missing_ok=True, 320 | report_class=NullReporter, 321 | ) 322 | mocked_db_connect.assert_called_once_with(Path("/path/to/index.sqlite")) 323 | mocked_db_disconnect.assert_called_once() 324 | 325 | def test_restore__multiple_files__message_empty(self, fs, mocker): 326 | # Patch out everything 327 | mocker.patch("serac.commands.restore", return_value=self.fake_restored(2)) 328 | mocker.patch("serac.index.database.connect") 329 | mocker.patch("serac.index.database.disconnect") 330 | 331 | result = self.cmd(fs, "restore", "/dest") 332 | 333 | assert result.exit_code == 0 334 | assert result.output == "" 335 | 336 | def test_restore__multiple_files_verbose__message_ok(self, capsys, fs, mocker): 337 | # Patch out everything 338 | mocker.patch("serac.commands.restore", return_value=self.fake_restored(2)) 339 | mocker.patch("serac.index.database.connect") 340 | mocker.patch("serac.index.database.disconnect") 341 | 342 | result = self.cmd(fs, "restore", "/dest", "--verbose") 343 | 344 | assert result.exit_code == 0 345 | assert "Restored 2 files" in result.output 346 | 347 | def test_restore__no_files__raises_exception(self, fs, mocker, freezer): 348 | # Patch out everything 349 | mocker.patch("serac.commands.restore", return_value={}) 350 | mocker.patch("serac.index.database.connect") 351 | mocker.patch("serac.index.database.disconnect") 352 | 353 | result = self.cmd(fs, "restore", "/dest") 354 | 355 | assert result.exit_code == 1 356 | assert "Path not found" in result.output 357 | --------------------------------------------------------------------------------