├── modules ├── __init__.py ├── helper.py ├── providers │ ├── s3.py │ ├── glacier.py │ ├── copy.py │ ├── __init__.py │ ├── scp.py │ └── sftp.py ├── glacier.py ├── fileutils.py ├── aws.py └── configuration.py ├── requirements.txt ├── extras ├── testsuite │ ├── .gitignore │ ├── tests │ │ ├── 001 Initial backup │ │ ├── 003 Delete one file │ │ ├── 015 Max size │ │ ├── 007 Delete the new file again │ │ ├── 009 Move file │ │ ├── 002 Change on file │ │ ├── 006 Create new file with same name as deleted │ │ ├── 005 Delete one file and change another │ │ ├── 008 Test prefix config │ │ ├── 004 Run without changes │ │ ├── 012 Copy file and use encrypted manifest │ │ ├── 014 Handle legacy hashing │ │ ├── 010 Move file and copy the moved file │ │ ├── 011 Remove two files and generate backup with filelist and verify checksums │ │ └── 013 Test new hash method │ ├── test_key.private │ ├── test_key.public │ ├── README.md │ ├── test_restore.sh │ └── test_backup.sh ├── README.md ├── iceshelf.service ├── iceshelf-cronjob └── analog-key.sh ├── exclusions ├── README.md └── dovecot.excl ├── providers ├── s3.md ├── glacier.md ├── scp.md ├── sftp.md └── cp.md ├── .gitignore ├── .github └── workflows │ └── python-app.yml ├── database.schema.json ├── TODO.md ├── README.iceshelf-retrieve.md ├── DATABASE.md ├── README.iceshelf-restore.md ├── iceshelf-inspect ├── iceshelf.sample.conf ├── iceshelf-retrieve ├── iceshelf-restore ├── LICENSE ├── README.md └── iceshelf /modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | python-gnupg 2 | awscli 3 | boto3 4 | -------------------------------------------------------------------------------- /extras/testsuite/.gitignore: -------------------------------------------------------------------------------- 1 | done/ 2 | content/ 3 | data/ 4 | tmp/ 5 | /config_* 6 | -------------------------------------------------------------------------------- /extras/testsuite/tests/001 Initial backup: -------------------------------------------------------------------------------- 1 | # Test 1 2 | # 3 | runTest "Initial backup" "" "" regular 4 | -------------------------------------------------------------------------------- /extras/testsuite/test_key.private: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mrworf/iceshelf/HEAD/extras/testsuite/test_key.private -------------------------------------------------------------------------------- /extras/testsuite/test_key.public: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mrworf/iceshelf/HEAD/extras/testsuite/test_key.public -------------------------------------------------------------------------------- /extras/testsuite/tests/003 Delete one file: -------------------------------------------------------------------------------- 1 | rm content/b 2 | runTest "Delete one file" "" "" regular "Only in compare/content: b" 3 | 4 | -------------------------------------------------------------------------------- /extras/testsuite/tests/015 Max size: -------------------------------------------------------------------------------- 1 | # Change a file 2 | 3 | OPT_SUCCESSRET=3 4 | runTest "Show files not fitting max size" "nofit" "" maxsize 5 | -------------------------------------------------------------------------------- /extras/testsuite/tests/007 Delete the new file again: -------------------------------------------------------------------------------- 1 | rm content/b 2 | runTest "Delete the new file again" "" "" regular "Only in compare/content: b" 3 | 4 | -------------------------------------------------------------------------------- /extras/testsuite/tests/009 Move file: -------------------------------------------------------------------------------- 1 | mv content/d content/dd 2 | runTest "Moved file" "" "" regular "Only in compare/content: d 3 | Only in content: dd" 4 | 5 | -------------------------------------------------------------------------------- /extras/testsuite/tests/002 Change on file: -------------------------------------------------------------------------------- 1 | # Change a file 2 | 3 | dd if=/dev/urandom of=content/a bs=1024 count=123 2>/dev/null 4 | runTest "Change one file" "" "" regular 5 | -------------------------------------------------------------------------------- /exclusions/README.md: -------------------------------------------------------------------------------- 1 | This folder holds ready-made exclusion files which can be easily 2 | added to your configuration file to avoid spending time doing it 3 | yourself. 4 | 5 | Feel free to expand upon this. 6 | -------------------------------------------------------------------------------- /extras/testsuite/tests/006 Create new file with same name as deleted: -------------------------------------------------------------------------------- 1 | 2 | dd if=/dev/urandom of=content/b bs=1024 count=243 2>/dev/null 3 | runTest "Create new file with same name as deleted file" "" "" regular 4 | -------------------------------------------------------------------------------- /extras/testsuite/tests/005 Delete one file and change another: -------------------------------------------------------------------------------- 1 | rm content/c 2 | dd if=/dev/urandom of=content/a bs=1024 count=123 2>/dev/null 3 | runTest "Delete one file and change another" "" "" regular "Only in compare/content: c" 4 | 5 | -------------------------------------------------------------------------------- /exclusions/dovecot.excl: -------------------------------------------------------------------------------- 1 | # This exclusion file will ignore all unnecessary files in dovecot when 2 | # performing a backup of your mail. 3 | # 4 | ?dovecot.index 5 | ?dovecot.list.index 6 | *dovecot-uidlist 7 | *dovecot-keywords 8 | *.dovecot.lda-dupes 9 | -------------------------------------------------------------------------------- /extras/testsuite/tests/008 Test prefix config: -------------------------------------------------------------------------------- 1 | runTest "Test prefix config" \ 2 | "skip" \ 3 | ' 4 | function posttest() { 5 | ls -laR done/ | grep prefix > /dev/null 6 | if [ $? -ne 0 ]; then 7 | echo "Prefix not working" 8 | return 1 9 | fi 10 | } 11 | ' \ 12 | prefix "" --full 13 | 14 | -------------------------------------------------------------------------------- /extras/testsuite/tests/004 Run without changes: -------------------------------------------------------------------------------- 1 | runTest "Run without any changes" \ 2 | "skip" \ 3 | ' 4 | function pretest() { 5 | if ! ${ICESHELF} --changes config_regular; then 6 | echo "ERROR: Changes detected when there should not be any" 7 | return 255 8 | fi 9 | } 10 | ' \ 11 | regular "" 12 | 13 | -------------------------------------------------------------------------------- /extras/testsuite/tests/012 Copy file and use encrypted manifest: -------------------------------------------------------------------------------- 1 | if [[ "$VARIANT" == *"encrypted"* ]]; then 2 | cp content/q content/qq 3 | runTest "Copy file and use encrypted manifest" "" ' 4 | function posttest() { 5 | if ! ls -1 $(lastFolder) | grep json.gpg ; then 6 | echo "No encrypted json was found" 7 | return 1 8 | fi 9 | } 10 | ' encryptmani "" 11 | fi 12 | -------------------------------------------------------------------------------- /extras/testsuite/tests/014 Handle legacy hashing: -------------------------------------------------------------------------------- 1 | # This strips all hash identifiers from the database and then runs 2 | # a changes test. It should not detect any changes. 3 | 4 | # Strip sha indicator from database 5 | cat data/checksum.json | sed -r 's/:sha[0-9]+//g' > data/checksum.json.tmp 6 | mv data/checksum.json.tmp data/checksum.json 7 | 8 | runTest "Handle legacy file" "nochange" '' regular 9 | -------------------------------------------------------------------------------- /providers/s3.md: -------------------------------------------------------------------------------- 1 | # Amazon S3 Provider 2 | 3 | Uses `aws s3 cp` to upload files to an S3 bucket. 4 | 5 | ## Arguments 6 | - `bucket` – name of the target S3 bucket. 7 | - `prefix` – optional prefix inside the bucket. 8 | 9 | ## Pros 10 | - Objects can be stored in immutable storage classes (e.g. Glacier or Glacier Deep Archive) which protects against ransomware. 11 | - Highly durable and available. 12 | 13 | ## Cons 14 | - Requires the AWS CLI and credentials. 15 | - Transfer costs may apply. 16 | -------------------------------------------------------------------------------- /providers/glacier.md: -------------------------------------------------------------------------------- 1 | # Glacier Provider 2 | 3 | Stores backups in Amazon Glacier using the `aws` CLI. 4 | 5 | ## Arguments 6 | - `vault` – name of the Glacier vault. 7 | - `threads` – optional number of upload threads. 8 | 9 | ## Pros 10 | - Data is stored immutably which offers strong protection against ransomware. 11 | - Very low storage cost for large archives. 12 | 13 | ## Cons 14 | - Retrieval can take many hours and incurs additional cost. 15 | - Requires AWS CLI and configured credentials. 16 | -------------------------------------------------------------------------------- /providers/scp.md: -------------------------------------------------------------------------------- 1 | # SCP Provider 2 | 3 | Transfers files using the `scp` command. 4 | 5 | ## Arguments 6 | - `user` – user to connect as. 7 | - `host` – remote host. 8 | - `dest` – remote directory for the uploaded files. 9 | - `key` – optional SSH private key for authentication. 10 | - `password` – optional password or passphrase (requires `sshpass`). 11 | 12 | ## Pros 13 | - Easy to use and available on most systems. 14 | 15 | ## Cons 16 | - Requires SSH credentials. 17 | - Does not resume interrupted uploads. 18 | -------------------------------------------------------------------------------- /providers/sftp.md: -------------------------------------------------------------------------------- 1 | # SFTP Provider 2 | 3 | Uploads backup files using the `sftp` command. 4 | 5 | ## Arguments 6 | - `user` – user to connect as. 7 | - `host` – remote host. 8 | - `dest` – remote directory where files are uploaded. 9 | - `key` – optional SSH private key. 10 | - `password` – optional password or passphrase (requires `sshpass`). 11 | 12 | ## Pros 13 | - Works over SSH and is widely supported. 14 | 15 | ## Cons 16 | - Requires SSH access and credentials. 17 | - Transfer speed may be limited by network latency. 18 | -------------------------------------------------------------------------------- /providers/cp.md: -------------------------------------------------------------------------------- 1 | # Copy Provider 2 | 3 | Copies backup files to a local destination using the `cp` command. Useful when 4 | keeping archives on the same system or on a mounted network share. 5 | 6 | ## Arguments 7 | - `dest` – path to the target directory where files will be placed. 8 | - `create` – set to `yes` to create `dest` if it does not exist. 9 | 10 | ## Pros 11 | - Simple and uses basic tools available on any system. 12 | - No network transfer required. 13 | 14 | ## Cons 15 | - Provides no remote storage or redundancy. 16 | -------------------------------------------------------------------------------- /extras/testsuite/tests/010 Move file and copy the moved file: -------------------------------------------------------------------------------- 1 | ### This has has a latent issue, iceshelf doesn't do deduplication which means 2 | ### that sometimes it catches the eee as a rename instead of ee. 3 | ### To solve this, we use regex to allow for both cases 4 | 5 | mv content/e content/ee || echo "ERROR: moving content/e to content/ee" 6 | cp content/ee content/eee || echo "ERROR: copying content/ee to content/eee" 7 | runTest "Move file and copy the same as well" "" "" regular '^Only in compare/content: e 8 | Only in content: eee?$' 9 | 10 | -------------------------------------------------------------------------------- /extras/testsuite/tests/011 Remove two files and generate backup with filelist and verify checksums: -------------------------------------------------------------------------------- 1 | rm content/ee content/eee 2 | runTest "Remove two files and generate backup with filelist and verify checksums" "" ' 3 | function posttest() { 4 | pushd $(lastFolder) 5 | # Make sure we do not get tripped by signed version of file list 6 | gpg -o filelist.lst -d *.lst.asc 2>/dev/null && rm *.lst.asc 7 | if ! shasum -c *.lst ; then 8 | echo "file list checksum failed" 9 | return 1 10 | fi 11 | popd 12 | } 13 | ' filelist "Only in compare/content: ee 14 | Only in compare/content: eee" 15 | -------------------------------------------------------------------------------- /extras/README.md: -------------------------------------------------------------------------------- 1 | # Other 2 | 3 | This folder holds some goodies which might be useful for you. 4 | 5 | ## iceshelf.service 6 | 7 | A systemd service file for running iceshelf 8 | 9 | ## analog-key.sh 10 | 11 | A shell script which can transfer a GPG key into a printable form (as multiple QR codes) suitable for longterm backup. It can also take a scanned copy and restore the digital key. Finally it also has a validate mode where it simple exports, imports and confirms that the reconstituted key is identical to the one in GPGs keychain. 12 | 13 | It's HIGHLY recommended that you make copies of the key used for iceshelf backups, since without it, any and all backed up content is lost. 14 | -------------------------------------------------------------------------------- /modules/helper.py: -------------------------------------------------------------------------------- 1 | 2 | def formatTime(seconds): 3 | if seconds < 60: 4 | # ss 5 | return "%ds" % seconds 6 | elif seconds < 3600: 7 | # mm:ss 8 | return "%dm %02ds" % (seconds / 60, seconds % 60) 9 | elif seconds < 86400: 10 | # hh:mm:ss 11 | return "%dh %02dm %02ds" % (seconds / 3600, (seconds % 3600) / 60, seconds % 60) 12 | else: 13 | # dd:hh:mm:ss 14 | return "%dd %02dh %02dm %02ds" % (seconds / 86400, (seconds % 86400) / 3600, (seconds % 3600) / 60, seconds % 60) 15 | 16 | def formatSize(size): 17 | return formatNumber(size, [" bytes", "K", "M", "G", "T"]) 18 | 19 | def formatSpeed(bps): 20 | return formatNumber(bps, [" bytes/s", "K/s", "M/s", "G/s", "T/s"]) 21 | 22 | def formatNumber(number, units): 23 | i = 0 24 | while number >= 1024 and i < len(units): 25 | number /= 1024 26 | i += 1 27 | return "%.1d%s" % (number, units[i]) 28 | -------------------------------------------------------------------------------- /extras/testsuite/tests/013 Test new hash method: -------------------------------------------------------------------------------- 1 | # Add a new file, change an old file 2 | # Run the backup using sha256 instead of sha1 and make sure these 3 | # files now have a sha256 entry. 4 | 5 | # Generate a 10k file that doesn't exist 6 | dd 2>/dev/null if=/dev/urandom of=content/qqq bs=1024 count=10 7 | # Get the hash of that 8 | NEW="$(sha256sum content/qqq | cut -d " " -f 1):sha256" 9 | 10 | # Generate a 10k file that does exist 11 | dd 2>/dev/null if=/dev/urandom of=content/q bs=1024 count=10 12 | OLD="$(sha256sum content/q | cut -d " " -f 1):sha256" 13 | 14 | runTest "Test change of hash config" "" \ 15 | ' 16 | function posttest() { 17 | grep "$NEW" data/checksum.json 18 | if [ $? -ne 0 ]; then 19 | echo "Hash did not change for content/qqq" 20 | return 1 21 | fi 22 | grep "$OLD" data/checksum.json 23 | if [ $? -ne 0 ]; then 24 | echo "Hash did not change for content/q" 25 | return 1 26 | fi 27 | } 28 | ' \ 29 | changehash 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | 59 | backup/ 60 | 61 | -------------------------------------------------------------------------------- /extras/iceshelf.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=incremental backups using Amazon Glacier 3 | Documentation=https://github.com/mrworf/iceshelf 4 | After=network.target 5 | 6 | [Service] 7 | # Configure your user here 8 | User=iceshelf 9 | Group=iceshelf 10 | 11 | # Configure paths to iceshelf and config here 12 | Environment="ICESHELF=/home/iceshelf/iceshelf/iceshelf" 13 | Environment="CONFIG=/home/iceshelf/backup.conf" 14 | 15 | Type=simple 16 | ExecStart=/usr/bin/python3 ${ICESHELF} ${CONFIG} 17 | 18 | # Restart if not finished 19 | RestartForceExitStatus=10 20 | SuccessExitStatus=10 21 | 22 | PrivateTmp=true 23 | NoNewPrivileges=true 24 | PrivateDevices=true 25 | # mounts read-only: /usr, /boot and /etc 26 | ProtectSystem=full 27 | 28 | # Everything is read-only by default 29 | ReadOnlyDirectories=/ 30 | # Allow writing to these directories: (GnuPG needs to lock its keyrings, add tmp dir, done dir and data dir) 31 | ReadWriteDirectories=/home/iceshelf/.gnupg /home/iceshelf/backup/inprogress /home/iceshelf/backup/metadata /home/iceshelf/backup/done 32 | # Don't allow access to these directories: (GnuPG needs /dev) 33 | InaccessibleDirectories=-/root -/opt -/run -/sbin 34 | 35 | # -20 = highest, 19 lowest 36 | Nice=13 37 | # none, realtime, best-effort, idle 38 | IOSchedulingClass=idle 39 | # 0 = highest, 7 = lowest 40 | IOSchedulingPriority=6 41 | 42 | 43 | [Install] 44 | WantedBy=multi-user.target 45 | -------------------------------------------------------------------------------- /modules/providers/s3.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import logging 4 | from . import BackupProvider, _which 5 | 6 | class S3Provider(BackupProvider): 7 | name = 's3' 8 | def verify(self): 9 | self.bucket = self.options.get('bucket') 10 | self.prefix = self.options.get('prefix', '') 11 | if not self.bucket: 12 | logging.error('s3 provider requires "bucket"') 13 | return False 14 | if _which('aws') is None: 15 | logging.error('aws command not found') 16 | return False 17 | return True 18 | 19 | def storage_id(self): 20 | prefix = f'/{self.prefix}' if self.prefix else '' 21 | return f's3:{self.bucket}{prefix}' 22 | 23 | def upload_files(self, files): 24 | for f in files: 25 | key = os.path.join(self.prefix, os.path.basename(f)) 26 | cmd = ['aws', 's3', 'cp', f, f's3://{self.bucket}/{key}'] 27 | try: 28 | p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 29 | out, err = p.communicate() 30 | if p.returncode != 0: 31 | logging.error('aws s3 cp failed: %s', err) 32 | return False 33 | except Exception: 34 | logging.exception('aws s3 cp failed for %s', f) 35 | return False 36 | return True 37 | -------------------------------------------------------------------------------- /.github/workflows/python-app.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python 3 | 4 | name: Validate iceshelf 5 | 6 | on: 7 | push: 8 | branches: [ "master" ] 9 | pull_request: 10 | branches: [ "master" ] 11 | 12 | permissions: 13 | contents: read 14 | 15 | jobs: 16 | build: 17 | 18 | runs-on: ubuntu-latest 19 | 20 | steps: 21 | - uses: actions/checkout@v3 22 | - name: Set up Python 3 23 | uses: actions/setup-python@v3 24 | with: 25 | python-version: "3.10" 26 | - name: Install dependencies 27 | run: | 28 | sudo apt-get install par2 29 | sudo apt-get install gnupg 30 | python -m pip install --upgrade pip 31 | pip install pytest pylint 32 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 33 | - name: Lint with pylint 34 | run: | 35 | pylint modules iceshelf iceshelf-inspect iceshelf-restore iceshelf-retrieve --errors-only 36 | - name: Run backup tests 37 | run: | 38 | bash extras/testsuite/test_backup.sh insecure 39 | - name: Run restore tests 40 | run: | 41 | bash extras/testsuite/test_restore.sh 42 | #- name: Test with pytest 43 | # run: | 44 | # pytest 45 | -------------------------------------------------------------------------------- /modules/providers/glacier.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | from . import BackupProvider, _which 4 | from modules import aws 5 | 6 | class GlacierProvider(BackupProvider): 7 | """Upload archives to AWS Glacier using the aws CLI.""" 8 | name = 'glacier' 9 | 10 | def verify(self): 11 | self.vault = self.options.get('vault') 12 | self.threads = int(self.options.get('threads', 4)) 13 | if not self.vault: 14 | logging.error('glacier provider requires "vault"') 15 | return False 16 | if _which('aws') is None: 17 | logging.error('aws command not found') 18 | return False 19 | if not aws.isConfigured(): 20 | return False 21 | return True 22 | 23 | def storage_id(self): 24 | return f'glacier:{self.vault}' 25 | 26 | def get_vault(self): 27 | return self.vault 28 | 29 | def upload_files(self, files): 30 | cfg = { 31 | 'glacier-vault': self.vault, 32 | 'glacier-threads': self.threads, 33 | 'prepdir': os.path.dirname(files[0]) if files else '' 34 | } 35 | total = sum(os.path.getsize(f) for f in files) 36 | names = [os.path.basename(f) for f in files] 37 | # Ensure vault exists (createVault will no-op if it already exists) 38 | if not aws.createVault(cfg): 39 | return False 40 | return aws.uploadFiles(cfg, names, total) 41 | -------------------------------------------------------------------------------- /database.schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "title": "IceShelf Database", 4 | "type": "object", 5 | "required": ["dataset", "backups", "vault", "version", "timestamp"], 6 | "properties": { 7 | "dataset": { 8 | "type": "object", 9 | "additionalProperties": { 10 | "type": "object", 11 | "required": ["checksum", "memberof", "deleted"], 12 | "properties": { 13 | "checksum": {"type": "string"}, 14 | "memberof": {"type": "array", "items": {"type": "string"}}, 15 | "deleted": {"type": "array", "items": {"type": "string"}} 16 | } 17 | } 18 | }, 19 | "backups": { 20 | "type": "object", 21 | "additionalProperties": { 22 | "type": "array", 23 | "items": {"type": "string"} 24 | } 25 | }, 26 | "vault": {"type": "string"}, 27 | "version": { 28 | "type": "array", 29 | "items": {"type": "integer"}, 30 | "minItems": 3, 31 | "maxItems": 3 32 | }, 33 | "moved": { 34 | "type": "object", 35 | "additionalProperties": { 36 | "type": "object", 37 | "required": ["reference", "original"], 38 | "properties": { 39 | "reference": {"type": "string"}, 40 | "original": {"type": "string"} 41 | } 42 | } 43 | }, 44 | "lastbackup": {"type": "string"}, 45 | "timestamp": {"type": "number"} 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /modules/providers/copy.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import logging 4 | from . import BackupProvider, _which 5 | 6 | class CopyProvider(BackupProvider): 7 | name = 'cp' 8 | """Simple provider that copies files locally using cp.""" 9 | def verify(self): 10 | dest = self.options.get('dest') 11 | if not dest: 12 | logging.error('copy provider requires "dest"') 13 | return False 14 | if not os.path.isdir(dest): 15 | if self.options.get('create', '').lower() in ['yes', 'true']: 16 | try: 17 | os.makedirs(dest, exist_ok=True) 18 | except Exception: 19 | logging.exception('Failed to create %s', dest) 20 | return False 21 | else: 22 | logging.error('Destination %s does not exist', dest) 23 | return False 24 | if _which('cp') is None: 25 | logging.error('cp command not found') 26 | return False 27 | self.dest = dest 28 | return True 29 | 30 | def storage_id(self): 31 | return f'cp:{self.dest}' 32 | 33 | def upload_files(self, files): 34 | for f in files: 35 | try: 36 | shutil.copy(f, os.path.join(self.dest, os.path.basename(f))) 37 | except Exception: 38 | logging.exception('Failed to copy %s', f) 39 | return False 40 | return True 41 | -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | # TODO 2 | 3 | This file details what my goals are for this project, both in the short term and in the long term. It also details items which I intend to do but doesn't have a clear time plan. 4 | 5 | The list is intentionally kept vague to avoid over-promising and under-delivering :) 6 | 7 | ## Short term 8 | - Extend iceshelf to allow usage of alternate long-term storage solutions other than glacier 9 | - add hint if a backup is full instead of incremential 10 | 11 | ## Long term 12 | - Add testsuite coverage of iceshelf-restore 13 | - Detect duplication when using sha method (impossible with meta due to lack of details) 14 | - Move validation of exclusion rules to configuration parsing instead of during backup 15 | 16 | ## Anytime 17 | - Cleanup parameters 18 | - Add info about http://www.jabberwocky.com/software/paperkey/ to README.md 19 | - improve --modified output (min, max, etc) 20 | - add warning if one and the same file changes a lot 21 | - Add piece about "why encrypt" to README.md (ie, why I am so adamant about it). See second section in this file for current links about security until I get around to putting it in the README.md 22 | - Redo the "?bla" rule into a "*bla*" which makes more sense... But do we also need to support *bl*a* then? Probably 23 | - Detect missing key or wrong passphrase 24 | 25 | # Why use iceshelf with encryption? 26 | 27 | - http://arstechnica.com/tech-policy/2015/10/microsoft-wants-us-government-to-obey-eu-privacy-laws/ 28 | - http://arstechnica.com/tech-policy/2015/10/apple-ceo-tim-cook-blasts-encryption-backdoors/ 29 | - http://arstechnica.com/tech-policy/2015/10/judge-does-us-law-allow-feds-to-compel-apple-to-unlock-an-iphone/ 30 | 31 | -------------------------------------------------------------------------------- /modules/providers/__init__.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import logging 3 | 4 | class BackupProvider: 5 | """Base class for backup providers.""" 6 | 7 | name = 'provider' 8 | 9 | def __init__(self, **options): 10 | self.options = options 11 | 12 | def verify(self): 13 | """Return True if the provider configuration is valid.""" 14 | raise NotImplementedError 15 | 16 | def __str__(self): 17 | return self.name 18 | 19 | def storage_id(self): 20 | """Return a string describing where files are stored.""" 21 | raise NotImplementedError 22 | 23 | def get_vault(self): 24 | """Return Glacier vault name if applicable, else None.""" 25 | return None 26 | 27 | def upload_files(self, files): 28 | """Upload a list of files.""" 29 | raise NotImplementedError 30 | 31 | 32 | def _which(program): 33 | return shutil.which(program) 34 | 35 | from . import sftp, s3, scp, copy, glacier 36 | 37 | PROVIDERS = { 38 | 'sftp': sftp.SFTPProvider, 39 | 's3': s3.S3Provider, 40 | 'scp': scp.SCPProvider, 41 | 'cp': copy.CopyProvider, 42 | 'glacier': glacier.GlacierProvider, 43 | } 44 | 45 | def get_provider(cfg): 46 | if not cfg or 'type' not in cfg: 47 | raise ValueError('Provider configuration missing type') 48 | t = cfg['type'].lower() 49 | cls = PROVIDERS.get(t) 50 | if not cls: 51 | raise ValueError('Unknown provider: %s' % t) 52 | opts = dict(cfg) 53 | opts.pop('type', None) 54 | provider = cls(**opts) 55 | if not provider.verify(): 56 | logging.error('Provider verification failed for %s', t) 57 | return None 58 | return provider 59 | -------------------------------------------------------------------------------- /extras/iceshelf-cronjob: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # 4 | SHOWSUCCESS=true 5 | USER=iceshelf 6 | CONFS=("/home/${USER}/backup.conf") 7 | EXEC=/home/${USER}/iceshelf/iceshelf 8 | 9 | # Allow you to override the defaults above without having to 10 | # edit this file. 11 | # 12 | if [ -f "/etc/default/iceshelf" ]; then 13 | source "/etc/default/iceshelf" 14 | fi 15 | 16 | ############# DO NOT CHANGE ANYTHING BELOW THIS POINT ##################### 17 | # 18 | FINALRET=0 19 | for CONF in "${CONFS[@]}"; do 20 | TMPLOG=$(sudo -Hu ${USER} mktemp /tmp/iceshelf.log.XXXXX) 21 | RET=0 22 | 23 | if [ -z "$TMPLOG" ]; then 24 | echo "ERROR: User ${USER} does not exist" >&2 25 | exit 255 26 | fi 27 | if [ ! -f "$CONF" ]; then 28 | echo "ERROR: Configuration $CONF was not found" >&2 29 | exit 255 30 | fi 31 | 32 | # Avoid emails about stuff unless it did something 33 | sudo -Hu ${USER} ${EXEC} --changes --logfile $TMPLOG $CONF 34 | RET=$? 35 | if [ $RET -eq 1 ]; then 36 | # Changes detected, clear old log and do a real run 37 | echo -n >$TMPLOG "" 38 | sudo -Hu ${USER} ${EXEC} --logfile $TMPLOG $CONF 39 | RET=$? 40 | if $SHOWSUCCESS && [ $RET -eq 0 ]; then 41 | echo "SHOWSUCCESS is TRUE, showing result of successfull run" >&2 42 | echo "======================================================" >&2 43 | cat $TMPLOG >&2; 44 | fi 45 | fi 46 | if [ $RET -ne 0 ]; then 47 | echo "Backup failed with error code $RET, this is what happened:" >&2 48 | echo "==========================================================" >&2 49 | cat $TMPLOG >&2 50 | FINALRET=1 51 | fi 52 | 53 | # Always keep a log of all activities 54 | cat $TMPLOG >> /var/log/iceshelf.log 55 | rm $TMPLOG 56 | done 57 | 58 | exit $FINALRET 59 | -------------------------------------------------------------------------------- /modules/providers/scp.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import logging 4 | from . import BackupProvider, _which 5 | 6 | class SCPProvider(BackupProvider): 7 | name = 'scp' 8 | def verify(self): 9 | self.user = self.options.get('user') 10 | self.host = self.options.get('host') 11 | self.dest = self.options.get('dest', '.') 12 | self.key = self.options.get('key') 13 | self.password = self.options.get('password') 14 | if not self.user or not self.host: 15 | logging.error('scp provider requires "user" and "host"') 16 | return False 17 | if self.key and not os.path.exists(self.key): 18 | logging.error('SSH key %s not found', self.key) 19 | return False 20 | if self.password and _which('sshpass') is None: 21 | logging.error('sshpass command not found') 22 | return False 23 | if _which('scp') is None: 24 | logging.error('scp command not found') 25 | return False 26 | return True 27 | 28 | def storage_id(self): 29 | return f'scp:{self.user}@{self.host}:{self.dest}' 30 | 31 | def upload_files(self, files): 32 | base = [] 33 | if self.password: 34 | base += ['sshpass', '-p', self.password] 35 | scp_cmd = ['scp'] 36 | if self.key: 37 | scp_cmd += ['-i', self.key] 38 | for f in files: 39 | dest = f'{self.user}@{self.host}:{self.dest}/{os.path.basename(f)}' 40 | cmd = base + scp_cmd + [f, dest] 41 | try: 42 | p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 43 | out, err = p.communicate() 44 | if p.returncode != 0: 45 | logging.error('scp failed: %s', err) 46 | return False 47 | except Exception: 48 | logging.exception('scp failed for %s', f) 49 | return False 50 | return True 51 | -------------------------------------------------------------------------------- /modules/providers/sftp.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import logging 4 | from . import BackupProvider, _which 5 | 6 | class SFTPProvider(BackupProvider): 7 | name = 'sftp' 8 | def verify(self): 9 | self.user = self.options.get('user') 10 | self.host = self.options.get('host') 11 | self.dest = self.options.get('dest', '.') 12 | self.key = self.options.get('key') 13 | self.password = self.options.get('password') 14 | if not self.user or not self.host: 15 | logging.error('sftp provider requires "user" and "host"') 16 | return False 17 | if self.key and not os.path.exists(self.key): 18 | logging.error('SSH key %s not found', self.key) 19 | return False 20 | if self.password and _which('sshpass') is None: 21 | logging.error('sshpass command not found') 22 | return False 23 | if _which('sftp') is None: 24 | logging.error('sftp command not found') 25 | return False 26 | return True 27 | 28 | def storage_id(self): 29 | return f'sftp:{self.user}@{self.host}:{self.dest}' 30 | 31 | def upload_files(self, files): 32 | base = [] 33 | if self.password: 34 | base += ['sshpass', '-p', self.password] 35 | sftp_cmd = ['sftp'] 36 | if self.key: 37 | sftp_cmd += ['-i', self.key] 38 | for f in files: 39 | cmd = base + sftp_cmd + [f'{self.user}@{self.host}'] 40 | batch = f'put {f} {self.dest}/{os.path.basename(f)}\n' 41 | try: 42 | p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 43 | out, err = p.communicate(batch.encode()) 44 | if p.returncode != 0: 45 | logging.error('sftp failed: %s', err) 46 | return False 47 | except Exception: 48 | logging.exception('sftp failed for %s', f) 49 | return False 50 | return True 51 | -------------------------------------------------------------------------------- /extras/testsuite/README.md: -------------------------------------------------------------------------------- 1 | # Backup and Restore tests 2 | 3 | A beginning to a suite of tests to confirm that the tool is doing the right thing. 4 | 5 | All backup test cases are stored inside the `tests/` directory. They manipulate 6 | the `content` folder and then execute `runTest` from `test_backup.sh`. 7 | 8 | `test_restore.sh` provides functional tests for `iceshelf-restore`. It exercises 9 | all permutations of encryption, signatures and parity while also verifying 10 | behavior when manifests are missing or archives are corrupt. 11 | 12 | runTest take the following arguments: 13 | 14 | - param1 = title of test 15 | - param2 = leave empty to run `iceshelf` with `--changes`. If no changes are detected, the test fails 16 | - param3 = provide `pretest()` and `posttest()` functions which are called before or after test, return non-zero to fail test 17 | - param4 = Which config file to use (unrelated to variant) 18 | -- regular = Simple backup 19 | -- prefix = Sets the prefix option 20 | -- filelist = produces a *.lst file 21 | -- encryptmani = Also encrypts manifest 22 | -- *NOTE* These configurations are also adapted based on variant, so variant encrypted will make all configs produce encrypted output 23 | - param5 = Text to compare output from `diff` of the source material and the resulting unpacked backup. If prefixed with `^` it will assume the text is a regular expression, otherwise it's just plain text comparison. 24 | - param6+ passed verbaitum to iceshelf 25 | 26 | _Of all these options, only 5 & 6 can be left empty._ 27 | 28 | By setting the `ERROR` environment variable to `true`, you will trigger an error. This 29 | is done automatically inside `runTest` so unless you have specific needs, you shouldn't 30 | have to do anything. 31 | 32 | There are a number of other variables acccessible: 33 | 34 | - VARIANT indicates the current variant the test is used in, some cases (like #012) depend on this 35 | 36 | All tests are run in numerical order and therefore can depend on the output from the 37 | previous testcase. 38 | 39 | All tests are run multiple times in various configurations (encryption, signature, etc). 40 | -------------------------------------------------------------------------------- /README.iceshelf-retrieve.md: -------------------------------------------------------------------------------- 1 | # iceshelf-retrieve 2 | 3 | `iceshelf-retrieve` downloads archives stored in AWS Glacier by 4 | [iceshelf](README.md). Retrieval from Glacier is asynchronous which means files 5 | cannot be fetched immediately. This helper keeps track of pending retrieval jobs 6 | and can be re-run until everything is downloaded and verified. 7 | 8 | ## Features 9 | 10 | - Handles Glacier inventory requests automatically. 11 | - Initiates archive retrieval jobs and resumes interrupted downloads. 12 | - Multi-threaded downloads with configurable thread count. 13 | - Verifies files using the Glacier SHA256 tree hash. 14 | - Provides progress information and clear error reporting. 15 | 16 | ## Usage 17 | 18 | ``` 19 | iceshelf-retrieve VAULT BACKUP [BACKUP ...] [--database FILE] [--dest DIR] [--threads N] 20 | iceshelf-retrieve VAULT --all [--database FILE] [--dest DIR] [--threads N] 21 | ``` 22 | 23 | - `VAULT` – name of the Glacier vault where archives are stored. 24 | - `--database` – path to the `checksum.json` database. This file is optional when using `--all`. 25 | - `BACKUP` – name of a backup set to retrieve (for example 26 | `20230101-123456-00000`). Multiple backups can be listed. 27 | - `--dest` – directory where files are stored (defaults to `retrieved/`). All 28 | downloads are placed directly in this directory without creating backup 29 | subfolders. 30 | - `--threads` – number of concurrent downloads. 31 | - `--all` – download every backup in the vault using only the Glacier inventory. 32 | 33 | Running the tool the first time will start an inventory retrieval job if no 34 | recent inventory exists. Once the inventory is available it will request 35 | retrieval for each file in the selected backup. With `--all`, the inventory 36 | is scanned to locate every backup in the vault and each one is downloaded in 37 | turn. Re-run the tool periodically until all files report `Finished`. 38 | 39 | ## Example 40 | 41 | ``` 42 | ./iceshelf-retrieve myvault 20230101-123456-00000 --dest restore --threads 4 43 | ``` 44 | 45 | Errors are printed with hints whenever possible. Ensure that your AWS 46 | credentials are configured for the account that owns the Glacier vault. 47 | -------------------------------------------------------------------------------- /DATABASE.md: -------------------------------------------------------------------------------- 1 | Structure of the JSON database: 2 | 3 | { 4 | "dataset" : {...}, 5 | "backups" : {...}, 6 | "vault" : "", 7 | "storage" : ["", ...], // destinations used for this backup 8 | "version" : [major, minor, revision], 9 | "timestamp" : , 10 | "moved" : {...} (optional), 11 | "lastbackup" : "" 12 | } 13 | 14 | "backups" contains: 15 | 16 | "" : [ 17 | "file1", 18 | "file2", 19 | ... 20 | ] 21 | 22 | "dataset" contains: 23 | 24 | "" : { 25 | "deleted" : ["", ...], // Lists in which backups this file was deleted 26 | "checksum" : "", // Currently known version (blank if currently deleted) 27 | "memberof" : ["", ...] // Which backups this file exists in 28 | } 29 | 30 | "moved" contains: 31 | 32 | "" : { 33 | "reference" : "", 34 | "original" : ">" 35 | } 36 | 37 | --------- 38 | 39 | Manifest: 40 | 41 | { 42 | "deleted" : [...], 43 | "moved" : {...}, 44 | "modified" : {...}, 45 | "previousbackup", "" 46 | } 47 | 48 | "deleted" contains: 49 | 50 | All files which were deleted since last run 51 | 52 | "moved" contains: 53 | 54 | "" : { 55 | "reference" : "", 56 | "original" : ">" 57 | } 58 | 59 | "modified" contains: 60 | 61 | "" : { 62 | "deleted" : ["", ...], // Lists in which backups this file was deleted 63 | "checksum" : "", // Currently known version (blank if currently deleted) 64 | "memberof" : ["", ...] // Which backups this file exists in 65 | } 66 | 67 | ## Version history 68 | 69 | | Version | Changes | 70 | |---------|---------| 71 | | 1.0.0 | Added `version` and `timestamp` fields along with `dataset`, `backups` and `vault`. | 72 | | 1.0.1 | Internal move detection, manifest gained `moved` entries. Database format unchanged. | 73 | | 1.1.0 | Database now records `moved` entries and `lastbackup` of the previous run. Version key changed to an integer array. | 74 | -------------------------------------------------------------------------------- /modules/glacier.py: -------------------------------------------------------------------------------- 1 | from . import helper 2 | from subprocess import Popen, PIPE 3 | import logging 4 | import os 5 | import time 6 | 7 | def createVault(config): 8 | logging.info("Creating vault \"%s\"", config["glacier-vault"]) 9 | result = glacierCommand(config, ["mkvault", config["glacier-vault"]]) 10 | if result is None or result["code"] != 0: 11 | logging.error("Failed to create vault: %s", repr(result)) 12 | return False 13 | 14 | logging.info("Vault created") 15 | return True 16 | 17 | def uploadFiles(config, files, bytes): 18 | logging.info("Uploading %d files (%s) to glacier, this may take a while", len(files), helper.formatSize(bytes)) 19 | cmd = ["upload", config["glacier-vault"]] 20 | for f in files: 21 | cmd.append(f) 22 | 23 | upload_start = round(time.time()) 24 | result = glacierCommand(config, cmd) 25 | upload_time = max(round(time.time()) - upload_start, 1) 26 | 27 | if result is None or "output" not in result or "Uploaded file" not in result["output"]: 28 | logging.error("Failed to upload files: %s", repr(result)) 29 | return False 30 | 31 | logging.info("Files uploaded @ %s", helper.formatSpeed(bytes / upload_time)) 32 | return True 33 | 34 | # TODO: This one should actually show output as it goes... 35 | def glacierCommand(config, args): 36 | if config["glacier-config"] is None: 37 | logging.error("glacierCommand() called without proper settings") 38 | return None 39 | 40 | cmd = ["glacier-cmd", "-c", config["glacier-config"], "--output", "json"] 41 | cmd += args 42 | 43 | logging.debug("Glacier command: " + repr(cmd)) 44 | 45 | p = Popen(cmd, stdout=PIPE, stderr=PIPE, cwd=config["prepdir"]) 46 | out, err = p.communicate() 47 | logging.debug("Output: " + repr(out)) 48 | logging.debug("Error : " + repr(err)) 49 | return {"code" : p.returncode, "output" : out, "error" : err } 50 | # return {"code": 0} 51 | """ 52 | upload: 53 | {'output': '{"Created archive with ID": "", "Archive SHA256 tree hash": "", "Uploaded file": ""}\n', 'code': 0, 'error': ''} 54 | 55 | mkvault: 56 | {"RequestId": "", "Location": "/5555555555/vaults/test"} 57 | 58 | lsvault: 59 | [{"SizeInBytes": 0, "LastInventoryDate": null, "VaultARN": "arn:aws:glacier:", "VaultName": "test", "NumberOfArchives": 0, "CreationDate": "2015-10-01T06:13:47.811Z"}] 60 | """ 61 | -------------------------------------------------------------------------------- /README.iceshelf-restore.md: -------------------------------------------------------------------------------- 1 | # iceshelf-restore 2 | 3 | A helper tool for iceshelf, allowing a somewhat easier way of restoring backups created by it. 4 | 5 | # Features 6 | 7 | - Quick validation of backup 8 | - Able to check for parent backup to avoid extacting in the wrong order (`--lastbackup`) 9 | - Can show contents of backup (`--list`) 10 | - Validate or restore a backup without needing the original config file 11 | - Allows for restore even when some files are missing (`--force`) 12 | - Initial validation of files using `filelist.txt` if available (will still confirm signatures) 13 | - Can attempt parity repair using `--repair` 14 | 15 | # Known issues 16 | 17 | Backup must be alone in a directory, you cannot store multiple backups in a folder since it simply picks the first manifest. This is on the todo list to fix 18 | 19 | # Usage 20 | 21 | The tool accepts either a single file from the backup or just the prefix of the backup files. Configuration is optional, but can be provided using `--config` if you have it available. Without it you may supply the GPG user using `--user` and the passphrase using `--passphrase`. 22 | Running the command with no extra arguments will validate the backup and return `0` on success. 23 | 24 | Note! If the archive is corrupt, it will only tell you if there is the possibility to repair it. It is *NO GUARANTEE* that you actually can. 25 | 26 | ## Listing the contents 27 | 28 | Adding `--list` will print the contents of the backup as specified by the manifest, including the parent backup (if available). 29 | 30 | ## Validating the backup 31 | 32 | `--validate` performs a full validation of the backup without extracting any files. Combine with `--repair` to fix corrupted archives if parity files are available. 33 | 34 | ## Restoring the backup 35 | 36 | Add `--restore` with a folder where you want the backup restored. The tool will automatically locate the necessary files based on the provided prefix or file path. Extra verification is performed to ensure the archive matches the manifest. If a file is present in the archive but not in the manifest, it will error out. This is by design to avoid causing unexpected issues after restoring. 37 | 38 | Note! Once the restore process has started, a failure to remove or rename/move an existing file will only cause a warning, restore will still continue. 39 | 40 | ## Corrupt backup 41 | 42 | If one or more files are missing (such as the manifest), you can still make `iceshelf-restore` try to process it by specifying `--force`. 43 | 44 | If the archive is corrupt but parity files are available you can try fixing it using `--repair`. 45 | 46 | Note! It will *NOT* extract any file, it will simply verify as many files as possible as well as repair and decrypt if possible. 47 | 48 | ## What does `--debug` do? 49 | 50 | It will give you some extra information while running, which normally isn't needed but can be helpful in understanding what's going wrong if `iceshelf-restore` isn't behaving as expected. 51 | -------------------------------------------------------------------------------- /iceshelf-inspect: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import json 3 | import argparse 4 | import os 5 | from datetime import datetime 6 | 7 | 8 | def load_database(filename): 9 | with open(filename, 'r', encoding='utf-8') as f: 10 | return json.load(f) 11 | 12 | 13 | def list_directory(data, path, recurse=False): 14 | path = path.rstrip('/') + '/' 15 | for fname in sorted(data['dataset'].keys()): 16 | if fname.startswith(path): 17 | rel = fname[len(path):] 18 | if recurse or '/' not in rel: 19 | print(fname) 20 | 21 | 22 | def find_files(data, query): 23 | query = query.lower() 24 | for fname in sorted(data['dataset'].keys()): 25 | if query in fname.lower(): 26 | print(fname) 27 | print(' backups:', ', '.join(sorted(data['dataset'][fname]['memberof']))) 28 | 29 | 30 | def file_info(data, filenames): 31 | for filename in filenames: 32 | item = data['dataset'].get(filename) 33 | if not item: 34 | print(f'{filename}: No such file in database') 35 | continue 36 | print('File:', filename) 37 | print(' checksum:', item['checksum']) 38 | print(' backups:', ', '.join(sorted(item['memberof']))) 39 | if item.get('deleted'): 40 | print(' deleted in:', ', '.join(sorted(item['deleted']))) 41 | moved_to = [n for n, v in data.get('moved', {}).items() if v['original'] == filename] 42 | if moved_to: 43 | for n in moved_to: 44 | print(' moved to:', n, 'in', data['moved'][n]['reference']) 45 | if filename in data.get('moved', {}): 46 | info = data['moved'][filename] 47 | print(' moved from:', info['original'], 'in', info['reference']) 48 | 49 | 50 | def stats(data): 51 | print('Backups:', len(data.get('backups', {}))) 52 | print('Files :', len(data.get('dataset', {}))) 53 | if 'timestamp' in data: 54 | ts = datetime.fromtimestamp(data['timestamp']) 55 | print('Timestamp:', ts.isoformat()) 56 | if 'lastbackup' in data: 57 | print('Last backup:', data['lastbackup']) 58 | print('Moved entries:', len(data.get('moved', {}))) 59 | 60 | 61 | def main(): 62 | p = argparse.ArgumentParser(description='Inspect iceshelf database') 63 | p.add_argument('database', help='checksum.json to inspect') 64 | sub = p.add_subparsers(dest='cmd') 65 | 66 | f_find = sub.add_parser('find', help='Search for files') 67 | f_find.add_argument('query') 68 | 69 | f_list = sub.add_parser('list', help='List directory contents') 70 | f_list.add_argument('path') 71 | f_list.add_argument('-r', '--recurse', action='store_true') 72 | 73 | f_file = sub.add_parser('file', help='Show file details') 74 | f_file.add_argument('paths', nargs='+') 75 | 76 | sub.add_parser('stats', help='Show statistics') 77 | 78 | args = p.parse_args() 79 | data = load_database(args.database) 80 | 81 | if args.cmd == 'find': 82 | find_files(data, args.query) 83 | elif args.cmd == 'list': 84 | list_directory(data, args.path, args.recurse) 85 | elif args.cmd == 'file': 86 | file_info(data, args.paths) 87 | elif args.cmd == 'stats': 88 | stats(data) 89 | else: 90 | p.print_help() 91 | 92 | 93 | if __name__ == '__main__': 94 | main() 95 | -------------------------------------------------------------------------------- /modules/fileutils.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import os 3 | import hashlib 4 | import shutil 5 | import logging 6 | from subprocess import Popen, PIPE 7 | 8 | def copy(src, dst): 9 | try: 10 | shutil.copy(src, dst) 11 | except OSError as e: 12 | if e.errno == 1: 13 | logging.debug("Unable to change permissons on copied file: %s" % dst) 14 | else: 15 | logging.exception("Error copying file: %s" % src) 16 | raise 17 | 18 | def deleteTree(tree, include_self=False): 19 | for root, dirs, files in os.walk(tree, topdown=False): 20 | for name in files: 21 | os.remove(os.path.join(root, name)) 22 | for name in dirs: 23 | os.rmdir(os.path.join(root, name)) 24 | if include_self: 25 | os.rmdir(tree) 26 | 27 | def generateParity(filename, level): 28 | if level == 0: 29 | return False 30 | cmd = ["par2", "create", "-r"+str(level), filename] 31 | p = Popen(cmd, stdout=PIPE, stderr=PIPE) 32 | out, err = p.communicate() 33 | if p.returncode != 0: 34 | logging.error("Command: %s", repr(cmd)) 35 | logging.error("Output: %s", out) 36 | logging.error("Error : %s", err) 37 | logging.error("Code : %s", str(p.returncode)) 38 | return p.returncode == 0 39 | 40 | def repairParity(filename): 41 | cmd = ["par2", "r", filename] 42 | p = Popen(cmd, stdout=PIPE, stderr=PIPE) 43 | out, err = p.communicate() 44 | if p.returncode != 0: 45 | print("Command: " + repr(cmd)) 46 | print("Output: " + out) 47 | print("Error : " + err) 48 | print("Code : " + str(p.returncode)) 49 | else: 50 | # Remove the corrupt file 51 | if filename[-5:] == '.par2': 52 | os.unlink(filename[0:-5] + '.1') 53 | else: 54 | os.unlink(filename + '.1') 55 | return p.returncode == 0 56 | 57 | def hashFile(file, shatype, includeType=False): 58 | sha = hashlib.new(shatype) 59 | with open(file, 'rb') as fp: 60 | for chunk in iter(lambda: fp.read(32768), b''): 61 | sha.update(chunk) 62 | if includeType: 63 | return sha.hexdigest() + ":" + shatype 64 | return sha.hexdigest() 65 | 66 | def hashChanged(filename, oldChecksum, newChecksum): 67 | (hashNew, typeNew) = newChecksum.split(':', 2) 68 | 69 | # See if it's using the new method of hashes 70 | if ':' in oldChecksum: 71 | (hashOld, typeOld) = oldChecksum.split(':', 2) 72 | if typeOld != typeNew: 73 | hashNew = hashFile(filename, typeOld) 74 | return hashOld != hashNew 75 | 76 | # It's the old kind, see if this matches 77 | if len(oldChecksum) != len(hashNew): 78 | l = len(oldChecksum) 79 | hashNew = None # Forces a differences if we can't resolve 80 | if l == 32: 81 | hashNew = hashFile(filename, "md5") 82 | elif l == 40: 83 | hashNew = hashFile(filename, "sha1") 84 | elif l == 56: 85 | hashNew = hashFile(filename, "sha224") 86 | elif l == 64: 87 | hashNew = hashFile(filename, "sha256") 88 | elif l == 96: 89 | hashNew = hashFile(filename, "sha384") 90 | elif l == 128: 91 | hashNew = hashFile(filename, "sha512") 92 | else: 93 | logging.warn("Unable to determine hashing method used, returning changed (old hash: " + oldChecksum + ")") 94 | 95 | return oldChecksum != hashNew 96 | 97 | def sumSize(path, files): 98 | result = 0 99 | for f in files: 100 | result += os.path.getsize(os.path.join(path, f)) 101 | return result 102 | 103 | def generateFilelist(path, output): 104 | files = os.listdir(path) 105 | with open(output, 'w', encoding="utf-8") as lst: 106 | for f in files: 107 | lst.write('{} {}\n'.format(hashFile(os.path.join(path, f), 'sha1'), f)) 108 | -------------------------------------------------------------------------------- /extras/analog-key.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Converts your private key to/from QR codes 4 | # 5 | # Requires: 6 | # - gpg 7 | # - qrencode 8 | # - zbarimg 9 | # - split 10 | # - base64 11 | # 12 | EXPORT=false 13 | IMPORT=false 14 | VALIDATE=false 15 | 16 | # Make sure user has all necessary commands 17 | for CMD in gpg qrencode base64 split replace zbarimg diff ; do 18 | if ! hash $CMD ; then 19 | echo "ERROR: Command $CMD is not available, see README.md for more details" 20 | exit 255 21 | fi 22 | done 23 | 24 | if [ "$1" == "export" ]; then 25 | EXPORT=true 26 | elif [ "$1" == "import" ]; then 27 | IMPORT=true 28 | elif [ "$1" == "validate" ]; then 29 | VALIDATE=true 30 | else 31 | echo 'analog-key.sh - A simple utility to transfer digital keys to/from analog format' 32 | echo '' 33 | echo 'To export GPG key: analog-key.sh export ""' 34 | echo 'To import GPG key: analog-key.sh import ""' 35 | echo ' analog-key.sh import "*.jpg"' 36 | echo '' 37 | echo 'When importing the key using images, make sure you name them so they are in the' 38 | echo 'correct order (a-z or 0-9 or similar). Then just give the right pattern as 2nd' 39 | echo 'parameter. For example, if your files are named "page-1.tiff", "page-2.tiff", etc.' 40 | echo 'just execute:' 41 | echo '' 42 | echo ' analog-key.sh import "page-*.tiff"' 43 | echo '' 44 | echo 'and you should be able to decode it properly.' 45 | echo "" 46 | exit 255 47 | fi 48 | 49 | if ! $EXPORT && ! $IMPORT && ! $VALIDATE ; then 50 | echo "You must either export or import a key" 51 | exit 255 52 | fi 53 | 54 | if [ "$2" = "" ]; then 55 | if $IMPORT; then 56 | echo "You must provide a base filename" 57 | else 58 | echo "You must provide a GPG key identifier" 59 | fi 60 | exit 255 61 | fi 62 | ITEM="$2" 63 | 64 | if $VALIDATE; then 65 | echo 'Validate will export and then immediately reimport the key and compare it to GPG' 66 | echo 'to confirm that the functionality in analog-key.sh works as expected.' 67 | echo '' 68 | echo 'No changes are done to GPG and all files are erased after testing.' 69 | echo '' 70 | echo 'Exporting...' 71 | if ! $0 export "${ITEM}" ; then 72 | echo 'ERROR: Unable to export key' 73 | exit 255 74 | fi 75 | echo 'Importing...' 76 | if ! $0 import "key-*.png" ; then 77 | echo 'ERROR: Unable to import key' 78 | exit 255 79 | fi 80 | echo 'Comparing...' 81 | if ! gpg --export-secret-key "${ITEM}" | diff - secret-key.gpg ; then 82 | echo "ERROR: Export->Import produced a key which differs from the one in gpg's keychain" 83 | exit 255 84 | fi 85 | rm "secret-key.gpg" key-*.png key.html 86 | echo "Everything checks out!" 87 | exit 0 88 | elif $EXPORT; then 89 | rm key-* >/dev/null 2>/dev/null 90 | gpg --export-secret-key "${ITEM}" | base64 | split -d -b 2048 - key- 91 | PARTS=0 92 | for F in key-0[0-9]; do 93 | PARTS=$(($PARTS + 1)) 94 | qrencode < $F -o $F.png 95 | rm $F 96 | done 97 | DATE="$(date)" 98 | cat >key.html < 100 | 101 | 144 | Private/Secret GPG Key for "$ITEM" 145 | 146 | 147 |

Private/Secret GPG Key for "$ITEM"

148 |

Generated $DATE

149 |

150 | Please print out this and keep it for your records. If you used a passphrase with this key, 151 | do NOT write it down on the same paper as it would make it useless. 152 |

153 |

154 | If you ever loose the script which generated these pages, here's how you restore it manually: 155 |

156 | zbarimg filename.pdf --raw -q | replace "QR-Code:" "" | base64 -d >secret-key.gpg
157 | 
158 |

159 |

160 | Or if you have each QR code as a separate image, do this in sequence: 161 |

162 | EOF
163 |   I=0
164 |   for F in key-0[0-9].png; do
165 |     I=$(($I + 1))
166 |     if [ $I -eq 1 ]; then
167 |       E=" "
168 |     else
169 |       E=">"
170 |     fi
171 |     echo >>key.html "zbarimg file $I of $PARTS --raw -q | replace \"QR-Code:\" \"\" $E> secret-key.b64.txt"
172 |   done
173 | cat >>key.html <
176 | 

177 |
178 |

179 | Notes: 180 |

181 |
182 |
183 |
184 |
185 |
186 | EOF 187 | I=0 188 | for F in key-0[0-9].png; do 189 | I=$(($I + 1)) 190 | echo >>key.html "

Part $I of $PARTS - $DATE


" 191 | done 192 | echo >>key.html "" 193 | 194 | echo "Please open and print key.html" 195 | elif $IMPORT; then 196 | if [ "${ITEM: -4}" == ".pdf" ]; then 197 | if ! zbarimg "${ITEM}" --raw -q | replace "QR-Code:" "" | base64 -d >secret-key.gpg ; then 198 | echo "ERROR: Was unable to interpret the QR codes." 199 | echo " Some reasons this could fail:" 200 | echo " - Is zbarimg compiled with PDF support?" 201 | echo " - Did you scan the QR codes out-of-order?" 202 | exit 255 203 | fi 204 | else 205 | rm 2>/dev/null 1>/dev/null secret-key.b64.txt 206 | for F in ${ITEM}; do 207 | if ! zbarimg "${F}" --raw -q | replace "QR-Code:" "" >> "secret-key.b64.txt" ; then 208 | echo "ERROR: Unable to determine a QR code in the provided file." 209 | exit 255 210 | fi 211 | done 212 | if ! base64 -d < "secret-key.b64.txt" >secret-key.gpg ; then 213 | echo "ERROR: Not a valid key. Did you forget to enclose the 2nd parameter in quotes?" 214 | exit 255 215 | fi 216 | rm 2>/dev/null 1>/dev/null secret-key.b64.txt 217 | fi 218 | echo 'Restored key can be found as "secret-key.gpg"' 219 | fi -------------------------------------------------------------------------------- /extras/testsuite/test_restore.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Functional validation suite for iceshelf-restore. 3 | # 4 | # The script runs a number of restore scenarios covering 5 | # - unencrypted backups 6 | # - encrypted and/or signed backups (if gpg is available) 7 | # - archives with parity data (if par2 is available) 8 | # It validates the output from --list and --validate and then 9 | # restores the backup both using the manifest and using the 10 | # prefix path. Various failure conditions such as missing 11 | # manifests and corrupt archives are also tested. 12 | 13 | set -e 14 | 15 | # Move to the directory where the script resides so that all 16 | # relative paths work regardless of where the test is invoked 17 | cd "$(dirname "$0")" 18 | 19 | function hasGPGconfig() { 20 | gpg --list-secret-keys 2>/dev/null | grep test@test.test >/dev/null 2>/dev/null 21 | return $? 22 | } 23 | 24 | # The test is executed for a number of backup variants. By default we 25 | # only run the plain "normal" setup. If additional tooling is 26 | # available the set expands to cover parity data as well as gpg based 27 | # encryption/signatures. 28 | VARIATIONS=("normal") 29 | if hash par2 >/dev/null 2>&1; then 30 | VARIATIONS+=("parity") 31 | fi 32 | 33 | if hash gpg >/dev/null 2>&1; then 34 | # If gpg is available, attempt to import the test key so that 35 | # encrypted and signed backups can be produced. When the key is 36 | # present we extend the test matrix to cover those variants. 37 | HASKEY=false 38 | if ! hasGPGconfig; then 39 | echo "Importing test-key for test usage" 40 | gpg --no-tty --batch --pinentry-mode loopback --passphrase test --fast-import test_key.* >/dev/null 2>&1 41 | echo "010034E91082BF022DBAF1FEA00E5EDACC9D1828:6:" | gpg --import-ownertrust >/dev/null 2>&1 42 | if hasGPGconfig ; then 43 | HASKEY=true 44 | else 45 | echo "=== ERROR: Unable to import GPG key for testing, encryption will not be tested" 46 | exit 255 47 | fi 48 | else 49 | HASKEY=true 50 | fi 51 | 52 | if $HASKEY ; then 53 | ADD=() 54 | for I in "${VARIATIONS[@]}"; do 55 | ADD+=("$I,encrypted" "$I,signed" "$I,encrypted,signed") 56 | done 57 | VARIATIONS+=("${ADD[@]}") 58 | fi 59 | fi 60 | 61 | for VARIANT in "${VARIATIONS[@]}"; do 62 | echo "--- Restore variant ${VARIANT} ---" 63 | # Prepare a fresh backup environment 64 | rm -rf content tmp data done restore restore2 65 | mkdir content tmp data done restore restore2 66 | echo "hello restore" > content/file.txt 67 | 68 | # Compose additional configuration depending on current variant 69 | EXTRAS="[security]" 70 | if [[ "$VARIANT" == *"encrypted"* ]]; then 71 | EXTRAS="$EXTRAS\nencrypt: test@test.test\nencrypt phrase: test" 72 | fi 73 | if [[ "$VARIANT" == *"signed"* ]]; then 74 | EXTRAS="$EXTRAS\nsign: test@test.test\nsign phrase: test" 75 | fi 76 | if [[ "$VARIANT" == *"parity"* ]]; then 77 | EXTRAS="$EXTRAS\nadd parity: 5" 78 | fi 79 | 80 | # Minimal configuration to generate a single backup using the 81 | # currently selected options 82 | cat > config_restore <diff.out; then 140 | echo "Mismatch after manifest restore" 141 | cat diff.out 142 | exit 1 143 | fi 144 | rm diff.out 145 | 146 | # Restore using the prefix notation (no manifest) and verify again 147 | ../../iceshelf-restore --restore restore2 "$PREFIX" 148 | DEST2="restore2$(pwd)/content" 149 | if ! diff -r content "$DEST2" >diff.out; then 150 | echo "Mismatch after prefix restore" 151 | cat diff.out 152 | exit 1 153 | fi 154 | rm diff.out 155 | 156 | # Negative tests: remove the manifest and ensure restore/validate fail 157 | mv "$MANIFEST" "$MANIFEST.bak" 158 | if ../../iceshelf-restore "$PREFIX" 2>/dev/null; then 159 | echo "restore succeeded unexpectedly when manifest missing" 160 | exit 1 161 | fi 162 | if ../../iceshelf-restore --force --validate "$PREFIX" 2>/dev/null; then 163 | echo "forced validation unexpectedly succeeded" 164 | exit 1 165 | fi 166 | mv "$MANIFEST.bak" "$MANIFEST" 167 | 168 | # Corrupt the archive and verify that validation fails. When parity 169 | # data is present we also exercise the repair functionality. 170 | cp "$ARCHIVE" "$ARCHIVE.bak" 171 | dd if=/dev/urandom of="$ARCHIVE" bs=1 count=10 seek=10 conv=notrunc >/dev/null 2>&1 172 | if ../../iceshelf-restore --validate "$MANIFEST" 2>/dev/null; then 173 | echo "corrupt archive validated unexpectedly" 174 | exit 1 175 | fi 176 | if [[ "$VARIANT" == *"parity"* ]]; then 177 | ../../iceshelf-restore --repair --validate "$MANIFEST" || true 178 | fi 179 | mv "$ARCHIVE.bak" "$ARCHIVE" 180 | 181 | done 182 | 183 | # All variants completed successfully 184 | echo "iceshelf-restore test suite completed successfully" 185 | -------------------------------------------------------------------------------- /iceshelf.sample.conf: -------------------------------------------------------------------------------- 1 | # The sources section points out both files and folders that 2 | # needs to be backed up. Wildcard is not allowed, tool will 3 | # always recurse into directories. 4 | # 5 | [sources] 6 | 7 | # Some extra paths needed by the tool. 8 | # "prep dir" is used for temporary storage (creating archive, signing etc) 9 | # "data dir" is used for storing information needed to track changes 10 | # "done dir" is used for storing successfully backed up archives. Each backup is 11 | # stored in its own folder. Note! It copies and then deletes, 12 | # so needs extra space. Leave blank to disable. 13 | # 14 | [paths] 15 | prep dir: backup/inprogress/ 16 | data dir: backup/metadata/ 17 | done dir: backup/done/ 18 | 19 | # Allows tweaking the tool 20 | # "max size" sets an upper limit of the archive's uncompressed size. Note! If 21 | # parity is enabled, max size is automaticallt restricted to 32GB or 22 | # less due to limitations in PAR2 23 | # 24 | # NOTE! Due to the stage-by-stage nature of this tool, you should be 25 | # aware that it will at times consume twice the space for temporary 26 | # files. So while max size defines the max size of the content 27 | # grabbed, it does not limit the end-result (which can vary, see 28 | # security section) nor does it take temp files into account. 29 | # 30 | # "change method" determines how changes are detected. "data" uses sha1 of 31 | # the data. You can also specify sha1, sha256 or sha512 explicityly 32 | # depending on your needs. For most users, data (sha1) is enough 33 | # and will also have the benefit of being fairly quick. 34 | # 35 | # "meta" is deprecated and will error out. 36 | # 37 | # "delta manifest" yes/no 38 | # 39 | # Allows you to store a copy of the files contained within the backup. This helps 40 | # you locate that elusive file when in a pinch. The default for this option is "yes", 41 | # if you prefer not to keep a manifest, then "no" will disable it. 42 | # 43 | # "compress" yes/no/force 44 | # 45 | # Normally yes, no disables and force ignores internal rules. 46 | # Uses bzip2 compression. 47 | # 48 | # "persuasive" Normally no, but if yes, will try and fit as many files into the 49 | # maxsize restriction, leaving some for another day. This results 50 | # in a more uniform sized uploads, no data will ever be lost, it 51 | # just will come at a later session. TODO 52 | # 53 | # "ignore overlimit" will cause iceshelf to return 0 even if files were skipped. 54 | # However, if there are more files which WOULD fit, it will work 55 | # as expected (ie, tell you to run it again). 56 | # 57 | # "incompressible" allows you to add additional extensions for files which won't 58 | # compress very well. To add more than one, separate them using space. 59 | # "max keep" allows you to automatically keep a max of X backups in the done folder. 60 | # If done folder is undefined, this option has no effect. If the folder exists 61 | # but this option is blank or zero, there is no limit (unlimited) 62 | # 63 | # "prefix" is optional, but when available defines a prefix to be added to all 64 | # generated backup files (so you can store more than one backup in the same vault) 65 | # 66 | # "detect move" if true, a moved file will only result in the actual operation being 67 | # backed up. This saves on data. ***EXPERIMENTAL SEE README.MD*** 68 | # 69 | # "skip empty" if true, skips the backup if no changes are detected 70 | # 71 | # Example: 72 | # "File_A" was renamed to "File_B" will cause the manifest to just log the move 73 | # 74 | # If the option is disabled, "File_B" will be backed up and "File_A" will be marked as 75 | # deleted 76 | # 77 | [options] 78 | max size: 79 | change method: data 80 | delta manifest: yes 81 | compress: yes 82 | persuasive: yes 83 | ignore overlimit: no 84 | incompressible: 85 | max keep: 0 86 | prefix: 87 | detect move: no 88 | skip empty: no 89 | 90 | # Exclusion rules 91 | # All rules are processed in the order they are defined, as soon as a rule 92 | # matches, it will stop processing the rest. 93 | # 94 | # Rules are defined by = where name can be whatever you want. 95 | # rule without any special prefix is simply a textual match of the complete 96 | # filename including the path. However, this can be extended by the following 97 | # prefixes: 98 | # 99 | # * Matches from the end of the filename instead of start 100 | # ? Matches any part of the filename 101 | # > Tests the filesize, if bigger than , then it's excluded 102 | # < Tests the filesize, if less than , the it's excluded 103 | # 104 | # | Load a list of exclusions from ... The format of the file 105 | # is such that it omits the "=" part of the syntax. Just the 106 | # rules straight up with potential modifiers. 107 | # With ONE exception, a loaded file cannot use the | prefix in the 108 | # rules. This is to avoid unexpected relationships. 109 | # Also consider that the order of the rules STILL apply, even when 110 | # loading external files. 111 | # 112 | # There is also a special modifier which you can prefix the entire rule with. 113 | # By adding an exclamationmark, you invert the rule. Now if it matches the 114 | # content WILL be included. That allows for some snazzy rules such as: 115 | # 116 | # [exclude] 117 | # alldocs=!*.doc 118 | # no odd dirs=/some/odd/dir/ 119 | # 120 | # In a structure like this: 121 | # /some/ 122 | # /some/data.txt 123 | # /some/todo.doc 124 | # /some/odd/dir/ 125 | # /some/odd/dir/moredata.txt 126 | # /some/odd/dir/readme.doc 127 | # 128 | # It will backup the following: 129 | # /some/data.txt 130 | # /some/todo.doc 131 | # /some/odd/dir/readme.doc 132 | # 133 | # Notice how it snagged a file from inside an excluded folder? Pretty 134 | # convenient. However, in order for this to work, you must consider the 135 | # order of the rules. If you change the order to: 136 | # 137 | # [exclude] 138 | # no odd dirs=/some/odd/dir/ 139 | # alldocs=!*.doc 140 | # 141 | # The "no odd dirs" would trigger first and the second rule would never get a 142 | # chance to be evaluated. If you're having issues with the rules, consider 143 | # running iceshelf with --changes and --debug to see what it's doing. 144 | # 145 | [exclude] 146 | 147 | # Provider settings control where files are stored. Multiple provider sections can 148 | # be specified and files will be uploaded to each destination. Type can be cp, 149 | # sftp, scp, s3 or glacier. Each provider has its own required arguments documented 150 | # in providers/*.md 151 | [provider-local] 152 | type: cp 153 | dest: backup/done/ 154 | 155 | [provider-cloud] 156 | type: s3 157 | bucket: mybucket 158 | 159 | # Run custom command before and/or after backup 160 | # 161 | # "pre command" is run before anything is done 162 | # "post command" is run AFTER the archive is created but BEFORE providers upload it (if at all) 163 | # 164 | # The post command will be provided with the complete path and filename of the created files 165 | # which may be one or more. 166 | # 167 | # If any of these commands return non-zero, it will cause the backup to abort. 168 | # 169 | # Using post command, you can easily adapt iceshelf to upload the result to another storage 170 | # service (such as dropbox). 171 | # 172 | # Note! You cannot provide any arguments to these commands 173 | # 174 | [custom] 175 | pre command: 176 | post command: 177 | 178 | # Security settings 179 | # "encrypt" and "sign" points out a GPG identity (typical email address) to use 180 | # for encryption and signatures. If they need a passphrase, use companion settings. 181 | # "add parity" creates a parity file which can replace anywhere from 1 to 100%, 182 | # 0 is off 183 | # 184 | # Encryption adds ~1% to the size of the archive, signature is has negligible 185 | # impact on size. Parity roughly adds the percentage you define 186 | # (on top of the encryption penalty) 187 | # 188 | [security] 189 | encrypt: 190 | encrypt phrase: 191 | sign: 192 | sign phrase: 193 | add parity: 0 194 | -------------------------------------------------------------------------------- /iceshelf-retrieve: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Iceshelf Retrieve - fetch backups from AWS Glacier. 3 | 4 | This tool downloads files created by `iceshelf` that were stored in an AWS 5 | Glacier vault. Because Glacier retrievals are asynchronous, the tool keeps 6 | track of in-progress jobs and can be re-run to continue where it left off. 7 | """ 8 | 9 | import argparse 10 | import logging 11 | import json 12 | import os 13 | import re 14 | import sys 15 | import time 16 | from concurrent.futures import ThreadPoolExecutor 17 | 18 | import boto3 19 | 20 | from modules import aws 21 | 22 | 23 | STATE_VERSION = 1 24 | 25 | 26 | def ensure_inventory(client, vault, datadir): 27 | """Ensure a recent inventory is available. 28 | 29 | Returns a dictionary mapping archive description to information or None if 30 | the inventory is still being prepared. 31 | """ 32 | inv_file = os.path.join(datadir, "glacier_inventory.json") 33 | job_file = os.path.join(datadir, "glacier_inventory.job") 34 | 35 | if os.path.exists(inv_file): 36 | with open(inv_file, "r", encoding="utf-8") as f: 37 | data = json.load(f) 38 | mapping = {} 39 | for a in data.get("ArchiveList", []): 40 | mapping[a.get("ArchiveDescription")] = { 41 | "archiveId": a.get("ArchiveId"), 42 | "size": a.get("Size"), 43 | "checksum": a.get("SHA256TreeHash"), 44 | } 45 | return mapping 46 | 47 | if os.path.exists(job_file): 48 | with open(job_file, "r", encoding="utf-8") as f: 49 | job_id = f.read().strip() 50 | logging.info("Checking status of inventory job %s", job_id) 51 | status = client.describe_job(vaultName=vault, jobId=job_id) 52 | if not status.get("Completed"): 53 | logging.info("Inventory job not ready yet. Please rerun later.") 54 | return None 55 | logging.info("Downloading inventory ...") 56 | out = client.get_job_output(vaultName=vault, jobId=job_id) 57 | body = out["body"].read() 58 | with open(inv_file, "wb") as f: 59 | f.write(body) 60 | os.remove(job_file) 61 | data = json.loads(body.decode("utf-8")) 62 | mapping = {} 63 | for a in data.get("ArchiveList", []): 64 | mapping[a.get("ArchiveDescription")] = { 65 | "archiveId": a.get("ArchiveId"), 66 | "size": a.get("Size"), 67 | "checksum": a.get("SHA256TreeHash"), 68 | } 69 | return mapping 70 | 71 | # No job running, start one 72 | logging.info("Starting inventory retrieval job ...") 73 | resp = client.initiate_job( 74 | vaultName=vault, 75 | jobParameters={"Type": "inventory-retrieval"}, 76 | ) 77 | with open(job_file, "w", encoding="utf-8") as f: 78 | f.write(resp["jobId"]) 79 | logging.info( 80 | "Inventory retrieval job %s started. Rerun the tool once it completes.", 81 | resp["jobId"], 82 | ) 83 | return None 84 | 85 | 86 | def load_state(path): 87 | if not os.path.exists(path): 88 | return {"version": STATE_VERSION, "files": {}} 89 | with open(path, "r", encoding="utf-8") as f: 90 | return json.load(f) 91 | 92 | 93 | def save_state(path, state): 94 | tmp = path + ".tmp" 95 | with open(tmp, "w", encoding="utf-8") as f: 96 | json.dump(state, f, indent=2) 97 | os.replace(tmp, path) 98 | 99 | 100 | def request_job(client, vault, entry): 101 | resp = client.initiate_job( 102 | vaultName=vault, 103 | jobParameters={ 104 | "Type": "archive-retrieval", 105 | "ArchiveId": entry["archiveId"], 106 | "Description": entry["name"], 107 | }, 108 | ) 109 | entry["jobId"] = resp["jobId"] 110 | entry["status"] = "requested" 111 | logging.info("Requested retrieval of %s (job %s)", entry["name"], entry["jobId"]) 112 | 113 | 114 | def check_job(client, vault, entry): 115 | info = client.describe_job(vaultName=vault, jobId=entry["jobId"]) 116 | if not info.get("Completed"): 117 | logging.info("%s not ready yet", entry["name"]) 118 | return False 119 | entry["status"] = "ready" 120 | return True 121 | 122 | 123 | def download_job(client, vault, entry, destdir): 124 | dest = os.path.join(destdir, entry["name"]) 125 | logging.info("Downloading %s", entry["name"]) 126 | start = time.time() 127 | out = client.get_job_output(vaultName=vault, jobId=entry["jobId"]) 128 | with open(dest, "wb") as f: 129 | while True: 130 | chunk = out["body"].read(1024 * 1024) 131 | if not chunk: 132 | break 133 | f.write(chunk) 134 | if sys.stdout.isatty(): 135 | done = f.tell() 136 | total = entry.get("size", 0) 137 | speed = done / max(time.time() - start, 1) 138 | sys.stdout.write( 139 | "%s: %s/%s @ %s\r" 140 | % ( 141 | entry["name"], 142 | aws.helper.formatSize(done), 143 | aws.helper.formatSize(total), 144 | aws.helper.formatSpeed(speed), 145 | ) 146 | ) 147 | sys.stdout.flush() 148 | if sys.stdout.isatty(): 149 | sys.stdout.write("\n") 150 | 151 | checksum = aws.hashFile(dest, 1024 ** 2)["final"].hexdigest() 152 | expected = entry.get("checksum") or out.get("checksum") 153 | if expected and checksum != expected: 154 | logging.error("Checksum mismatch for %s", entry["name"]) 155 | entry["status"] = "pending" 156 | entry["jobId"] = None 157 | try: 158 | os.remove(dest) 159 | except OSError: 160 | pass 161 | return 162 | 163 | entry["status"] = "done" 164 | entry["jobId"] = None 165 | logging.info("Finished %s", entry["name"]) 166 | 167 | 168 | def main(): 169 | parser = argparse.ArgumentParser( 170 | description="Iceshelf Retrieve - Download backups from Glacier", 171 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 172 | ) 173 | parser.add_argument("vault", help="Name of the Glacier vault") 174 | parser.add_argument( 175 | "backup", 176 | nargs="*", 177 | metavar="BACKUP", 178 | help="One or more backup identifiers to retrieve", 179 | ) 180 | parser.add_argument( 181 | "--database", 182 | help="Path to checksum.json database", 183 | ) 184 | parser.add_argument("--all", action="store_true", default=False, help="Retrieve all backups") 185 | parser.add_argument( 186 | "--dest", 187 | default="retrieved", 188 | help="Destination directory for downloaded files", 189 | ) 190 | parser.add_argument( 191 | "--threads", type=int, default=2, help="Number of parallel downloads" 192 | ) 193 | parser.add_argument("--logfile", metavar="FILE", help="Log to file instead of stdout") 194 | parser.add_argument( 195 | "--debug", action="store_true", default=False, help="Enable debug logging" 196 | ) 197 | args = parser.parse_args() 198 | 199 | if not args.all and not args.backup: 200 | parser.error("Specify BACKUP or --all") 201 | 202 | loglevel = logging.DEBUG if args.debug else logging.INFO 203 | logformat = "%(asctime)s - %(levelname)s - %(message)s" if args.logfile else "%(message)s" 204 | if args.logfile: 205 | logging.basicConfig(filename=args.logfile, level=loglevel, format=logformat) 206 | else: 207 | logging.basicConfig(stream=sys.stdout, level=loglevel, format=logformat) 208 | 209 | db_path = args.database 210 | datadir = os.path.dirname(db_path) if db_path else args.dest 211 | os.makedirs(datadir, exist_ok=True) 212 | 213 | client = boto3.client("glacier") 214 | vault = args.vault 215 | 216 | inventory = ensure_inventory(client, vault, datadir) 217 | if inventory is None: 218 | return 0 219 | 220 | backups_db = {} 221 | if args.all: 222 | # Build backup sets from inventory when database is unavailable 223 | for name, info in inventory.items(): 224 | m = re.search(r"\d{8}-\d{6}-[0-9a-fA-F]{5}", name) 225 | if not m: 226 | logging.warning("Unable to determine backup id for %s", name) 227 | continue 228 | backups_db.setdefault(m.group(0), []).append(name) 229 | targets = sorted(backups_db.keys()) 230 | if not targets: 231 | logging.error("No backups found in inventory") 232 | return 1 233 | else: 234 | if not os.path.exists(db_path): 235 | logging.error("Database %s not found", db_path) 236 | return 1 237 | with open(db_path, "r", encoding="utf-8") as f: 238 | db = json.load(f) 239 | if db.get("vault") and db.get("vault") != vault: 240 | logging.warning( 241 | "Database was created for vault %s but using %s", db.get("vault"), vault 242 | ) 243 | backups_db = db.get("backups", {}) 244 | missing = [b for b in args.backup if b not in backups_db] 245 | if missing: 246 | logging.error("Backup(s) not found: %s", ", ".join(missing)) 247 | return 1 248 | targets = args.backup 249 | 250 | os.makedirs(args.dest, exist_ok=True) 251 | 252 | for backup in targets: 253 | files = backups_db[backup] 254 | 255 | destdir = args.dest 256 | 257 | state_file = os.path.join(datadir, f"retrieve-{backup}.json") 258 | state = load_state(state_file) 259 | 260 | # Update state with any new files 261 | for name in files: 262 | if name not in inventory: 263 | logging.error("File %s not found in vault inventory", name) 264 | continue 265 | if name not in state["files"]: 266 | info = inventory[name] 267 | state["files"][name] = { 268 | "name": name, 269 | "archiveId": info["archiveId"], 270 | "size": info.get("size"), 271 | "checksum": info.get("checksum"), 272 | "jobId": None, 273 | "status": "pending", 274 | } 275 | save_state(state_file, state) 276 | 277 | pending = [f for f in state["files"].values() if f["status"] != "done"] 278 | if not pending: 279 | logging.info("All files for %s already retrieved", backup) 280 | continue 281 | 282 | # Request jobs if needed 283 | for entry in pending: 284 | if entry["jobId"] is None: 285 | request_job(client, vault, entry) 286 | save_state(state_file, state) 287 | 288 | # Check which jobs are ready 289 | ready = [e for e in pending if e["jobId"]] 290 | to_download = [] 291 | for entry in ready: 292 | if check_job(client, vault, entry): 293 | to_download.append(entry) 294 | if not to_download: 295 | logging.info("No files for %s are ready for download yet", backup) 296 | save_state(state_file, state) 297 | continue 298 | 299 | # Download ready files in parallel 300 | with ThreadPoolExecutor(max_workers=args.threads) as exe: 301 | futures = [exe.submit(download_job, client, vault, e, destdir) for e in to_download] 302 | for fut in futures: 303 | fut.result() 304 | 305 | save_state(state_file, state) 306 | 307 | logging.info("Done") 308 | return 0 309 | 310 | 311 | if __name__ == "__main__": 312 | sys.exit(main()) 313 | -------------------------------------------------------------------------------- /modules/aws.py: -------------------------------------------------------------------------------- 1 | from . import helper 2 | from subprocess import Popen, PIPE 3 | import logging 4 | import os 5 | import time 6 | import json 7 | import io 8 | import hashlib 9 | import tempfile 10 | import sys 11 | import math 12 | 13 | import random 14 | 15 | import threading 16 | from queue import Queue, Empty 17 | 18 | def isConfigured(): 19 | if not os.path.exists(os.path.expanduser('~/.aws/config')) or not os.path.exists(os.path.expanduser('~/.aws/credentials')): 20 | logging.error('AWS is not configured, please run aws tool with configure for current user') 21 | return False 22 | 23 | # Now that we know these files exists, check the contents 24 | hasRegion = False 25 | hasJSON = False 26 | hasCred1 = False 27 | hasCred2 = False 28 | with io.open(os.path.expanduser('~/.aws/config')) as f: 29 | while True: 30 | line = f.readline().lower() 31 | if 'region' in line: 32 | hasRegion = True 33 | elif 'output' in line and 'json' in line: 34 | hasJSON = True 35 | elif line == '': 36 | break 37 | 38 | with io.open(os.path.expanduser('~/.aws/credentials')) as f: 39 | while True: 40 | line = f.readline().lower() 41 | if 'aws_access_key_id' in line: 42 | hasCred1 = True 43 | elif 'aws_secret_access_key' in line: 44 | hasCred2 = True 45 | elif line == '': 46 | break 47 | 48 | if not hasRegion: 49 | logging.error('AWS configuration is missing region setting') 50 | if not hasJSON: 51 | logging.error('AWS configuration is missing output setting or it\'s not set to JSON') 52 | if not hasCred1: 53 | logging.error('AWS configuration is missing aws_access_key_id') 54 | if not hasCred2: 55 | logging.error('AWS configuration is missing aws_secret_access_key') 56 | if not (hasRegion and hasJSON and hasCred1 and hasCred2): 57 | logging.error('Please resolve issues by running aws tool with configure for current user') 58 | return False 59 | return True 60 | 61 | def createVault(config): 62 | result = awsCommand(config, ['create-vault', '--vault-name', config["glacier-vault"]]) 63 | if result is None or result["code"] != 0: 64 | logging.error("Failed to create vault: %s", repr(result)) 65 | return False 66 | logging.info("Vault created") 67 | return True 68 | 69 | class uploadCoordinator: 70 | def __init__(self, threads=4): 71 | self.threads = threads 72 | self.sent = 0 73 | self.began = round(time.time()) 74 | self.exit = False 75 | self.queue = Queue() 76 | 77 | def process(self): 78 | self.began = round(time.time()) 79 | for w in range(self.threads): 80 | t = threading.Thread(target=self.worker) 81 | t.daemon = True 82 | t.start() 83 | 84 | def worker(self): 85 | run = True 86 | while run and not self.exit: 87 | try: 88 | entry = self.queue.get(False) 89 | except Empty: 90 | break 91 | except Exception: 92 | logging.exception('Failed to read from queue') 93 | break 94 | sent = entry.work() 95 | if sent == -1: 96 | logging.error("WE FAILED!") 97 | run = False 98 | self.exit = True 99 | else: 100 | self.sent += sent 101 | entry.cleanup() 102 | self.queue.task_done() 103 | self.threads -= 1 104 | 105 | def add(self, process): 106 | if self.exit: 107 | return False 108 | self.queue.put(process) 109 | return True 110 | 111 | def getTime(self): 112 | t = round(time.time()) - self.began 113 | if t < 1: 114 | return 1 115 | return t 116 | 117 | def getSent(self): 118 | return self.sent 119 | 120 | def isDone(self): 121 | return self.threads == 0 | self.exit 122 | 123 | def finish(self): 124 | self.queue.join() 125 | return not self.exit 126 | 127 | class uploadJob: 128 | def __init__(self, config, file, name, offset, size, checksum, uploadId): 129 | self.config = config 130 | self.file = file 131 | self.name = name 132 | self.offset = offset 133 | self.size = size 134 | self.checksum = checksum 135 | self.uploadId = uploadId 136 | 137 | self.retries = 10 138 | tf = tempfile.NamedTemporaryFile(dir='/tmp', delete=False) 139 | if tf is None: 140 | logging.error('Unable to generate temporary file') 141 | raise RuntimeError('Unable to generate temporary file') 142 | self.tmpfile = tf.name 143 | tf.close() 144 | 145 | def extractChunk(self, offset, size): 146 | with io.open(self.file, 'rb') as i: 147 | i.seek(offset) 148 | with io.open(self.tmpfile, 'wb') as o: 149 | buf = i.read(size) 150 | o.write(buf) 151 | return True 152 | 153 | def cleanup(self): 154 | if os.path.exists(self.tmpfile): 155 | os.unlink(self.tmpfile) 156 | 157 | def work(self): 158 | # Exract chunk into temp file for upload purpose 159 | if not self.extractChunk(self.offset, self.size): 160 | logging.error('Unable to extract chunk for upload') 161 | return False 162 | 163 | dataRange = 'bytes %d-%d/*' % (self.offset, self.offset + self.size - 1) 164 | self.retry = self.retries 165 | while self.retry > 0: 166 | result = awsCommand(self.config, ['upload-multipart-part', '--vault-name', self.config['glacier-vault'], '--cli-input-json', '{"uploadId": "' + self.uploadId + '"}', '--body', self.tmpfile, '--range', dataRange]) 167 | if result is not None and result['json'] is not None and 'checksum' in result['json']: 168 | if self.checksum != result['json']['checksum']: 169 | logging.error('Hash does not match, expected %s got %s.', self.checksum, result['json']['checksum']) 170 | else: 171 | break 172 | else: 173 | if 'RequestTimeoutException' in result['error']: 174 | logging.warn('Timeout') 175 | else: 176 | logging.debug('Result was: ' + repr(result)) 177 | 178 | self.retry = self.retry - 1 179 | logging.warning('%s @ %d failed to upload, retrying in %d seconds. %d tries left', helper.formatSize(self.size), self.offset, (10-self.retry)*30, self.retry) 180 | time.sleep((10-self.retry) * 30) 181 | 182 | if self.retry == 0: 183 | logging.error('Unable to upload %s at offset %d', helper.formatSize(self.size), self.offset) 184 | return -1 185 | return self.size 186 | 187 | def hashFile(file, chunkSize): 188 | if not os.path.exists(file): 189 | return None 190 | 191 | h = hashlib.sha256 192 | blocks = [] 193 | final = [] 194 | # Do it in 1MB chunks, regardless of chunkSize 195 | with io.open(file, 'rb') as f: 196 | while True: 197 | data = f.read(1024**2) 198 | if len(data) == 0: 199 | break 200 | v = h(data) 201 | blocks.append(v) 202 | 203 | # Produce final hash 204 | def recurse(hashlist, size): 205 | # We've reached the chunksize we need, so store a copy before we continue 206 | if size == chunkSize: 207 | for o in hashlist: 208 | final.append(o) 209 | 210 | output = [h(h1.digest() + h2.digest()) 211 | for h1, h2 in zip(hashlist[::2], hashlist[1::2])] 212 | if len(hashlist) % 2: 213 | output.append(hashlist[-1]) 214 | 215 | if len(output) > 1: 216 | return recurse(output, size*2) 217 | else: 218 | return output[0] 219 | 220 | result = {'blocks' : final, 'final' : recurse(blocks or [h(b"")], 1024**2)} 221 | return result 222 | 223 | def uploadFile(config, prefix, file, bytesDone=0, bytesTotal=0, withPath=False): 224 | if not os.path.exists(file): 225 | logging.error('File %s does not exist', file) 226 | return False 227 | 228 | name = file 229 | if not withPath: 230 | name = os.path.basename(name) 231 | size = remain = os.path.getsize(file) 232 | 233 | # Due to limit of 10000 parts in an upload, we need to make it all fit 234 | chunkSize = size / 10000 235 | if chunkSize <= 1024**2: 236 | chunkSize = 1024**2 237 | else: 238 | # Make sure it's a power of two 239 | factor = math.ceil(float(chunkSize) / float(1024**2)) 240 | chunkSize = int((1024**2) * factor) 241 | chunkSize -= 1 242 | chunkSize |= chunkSize >> 1 243 | chunkSize |= chunkSize >> 2 244 | chunkSize |= chunkSize >> 4 245 | chunkSize |= chunkSize >> 8 246 | chunkSize |= chunkSize >> 16 247 | chunkSize += 1 248 | logging.debug('Using chunksize of %s based on size (%s) of the file we\'re uploading', helper.formatSize(chunkSize), helper.formatSize(size)) 249 | 250 | hashes = hashFile(file, chunkSize) 251 | if hashes is None: 252 | logging.error('Unable to hash file %s', file) 253 | return False 254 | 255 | # Initiate the upload 256 | result = awsCommand(config, ['initiate-multipart-upload', '--vault-name', config['glacier-vault'], '--archive-description', name, '--part-size', str(chunkSize)]) 257 | if result is None or result['code'] != 0 or 'uploadId' not in result['json']: 258 | logging.error('Unable to initiate upload: %s', repr(result)) 259 | return False 260 | uploadId = result['json']['uploadId'] 261 | 262 | # Start sending the file, one megabyte at a time until we have none left 263 | offset = 0 264 | block = 0 265 | work = uploadCoordinator(config['glacier-threads']) 266 | 267 | # Queue up all the work 268 | while remain > 0: 269 | chunk = remain 270 | if chunk > chunkSize: 271 | chunk = chunkSize 272 | 273 | job = uploadJob(config, file, name, offset, chunk, hashes['blocks'][block].hexdigest(), uploadId) 274 | work.add(job) 275 | 276 | block += 1 277 | remain -= chunk 278 | offset += chunk 279 | 280 | # Wait for it... 281 | work.process() 282 | while not work.isDone(): 283 | time.sleep(1) 284 | if sys.stdout.isatty(): 285 | # Extra spaces at the end to clear remnants when numbers change 286 | if work.getSent() > 0 and work.getTime() > 0: 287 | timerem = ", " + helper.formatTime((float(bytesTotal) - float(bytesDone + work.getSent())) / (work.getSent() / work.getTime())) + " remaining" 288 | else: 289 | timerem = "" 290 | sys.stdout.write('%s%s @ %s, %.2f%% done (%.2f%% total%s) \r' % ( 291 | prefix, 292 | name, 293 | helper.formatSpeed(work.getSent() / work.getTime()), 294 | float(work.getSent())/float(size) * 100.0, 295 | float(bytesDone + work.getSent())/float(bytesTotal) * 100.0, 296 | timerem 297 | ) 298 | ) 299 | sys.stdout.flush() 300 | if sys.stdout.isatty(): 301 | sys.stdout.write('\n') 302 | sys.stdout.flush() 303 | 304 | if not work.finish(): 305 | logging.error('Failed to upload the file, aborting') 306 | # Note! Should use JSON since plain arguments seems to not work 307 | awsCommand(config, ['abort-multipart-upload', '--vault-name', config['glacier-vault'], '--cli-input-json', '{"uploadId": "' + uploadId + '"}']) 308 | return False 309 | 310 | # Time to finalize this deal 311 | result = awsCommand(config, ['complete-multipart-upload', '--vault-name', config['glacier-vault'], '--cli-input-json', '{"uploadId": "' + uploadId + '"}', '--checksum', hashes['final'].hexdigest(), '--archive-size', str(size)]) 312 | if result is None or result['code'] != 0: 313 | logging.error('Unable to complete upload of %s: %s', file, repr(result)) 314 | return False 315 | return True 316 | 317 | def uploadFiles(config, files, bytes): 318 | logging.info("Uploading %d files (%s) to glacier, this may take a while", len(files), helper.formatSize(bytes)) 319 | 320 | i = 0 321 | d = 0 322 | for file in files: 323 | i += 1 324 | file = os.path.join(config["prepdir"], file) 325 | if not uploadFile(config, "(%d of %d) " % (i, len(files)), file, d, bytes): 326 | return False 327 | d += os.path.getsize(file) 328 | return True 329 | 330 | def awsCommand(config, args, dry=False): 331 | if config["glacier-vault"] is None: 332 | logging.error("awsCommand() called without proper settings") 333 | return None 334 | 335 | # Fake it until you make it 336 | if dry: 337 | time.sleep(random.randint(1, 50) / 10) 338 | return {"code" : 0, "raw" : '', 'json' : {'checksum' : 'something', 'uploadId' : 'someid' }, "error" : '' } 339 | 340 | cmd = ['aws', '--output', 'json', 'glacier'] 341 | cmd += args 342 | cmd += ['--account-id', '-'] 343 | 344 | #logging.debug("AWS command: " + repr(cmd)) 345 | 346 | p = Popen(cmd, stdout=PIPE, stderr=PIPE, cwd=config["prepdir"]) 347 | out, err = p.communicate() 348 | 349 | jout = None 350 | try: 351 | jout = json.loads(out) 352 | except ValueError as e: 353 | logging.debug('Failed to parse AWS output as JSON: %s', e) 354 | 355 | if out is None or out == "": 356 | logging.debug("Error : " + repr(err)) 357 | logging.debug('Cmd: ' + repr(cmd)) 358 | 359 | return {"code" : p.returncode, "raw" : out, 'json' : jout, "error" : err } 360 | -------------------------------------------------------------------------------- /extras/testsuite/test_backup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # First, make sure we're standing inside the testsuite area 4 | cd "$(dirname "$0")" 5 | 6 | ICESHELF=../../iceshelf 7 | COUNT=0 8 | 9 | # Removes old data and creates fresh 10 | function cleanup() { 11 | rm -rf compare data tmp content done >/dev/null 2>/dev/null 12 | rm config_* >/dev/null 2>/dev/null 13 | } 14 | 15 | function initialize() { 16 | # Clean and prep 17 | cleanup 18 | mkdir data tmp content done compare 19 | 20 | # Generate content 21 | # First, bunch of files 22 | for FILE in a b c d e f g h i j k l m n o p q r s t u v w x y z åäö éùü ø Hörbücher " "; do 23 | dd if=/dev/zero of=content/${FILE} bs=1024 count=1 2>/dev/null 24 | FOLDER="folder-${FILE}" 25 | mkdir "content/${FOLDER}" 26 | for FILE2 in a b c åäö éùü " " ø; do 27 | dd if=/dev/zero of="content/${FOLDER}/${FILE2}" bs=1024 count=1 2>/dev/null 28 | done 29 | done 30 | # Next folders with files 31 | } 32 | 33 | # Takes an INI file and ensures sections are only defined once 34 | # 35 | # Param 1: Filename of the INI file 36 | # 37 | merge_sections() { 38 | local filename=$1 39 | awk -F'=' ' 40 | /^\[.*\]$/ { 41 | if (section != $0 && section) { 42 | print section 43 | for (key in keys) { 44 | if (keys[key] != "") { 45 | print key "=" keys[key] 46 | } else { 47 | print key 48 | } 49 | } 50 | delete keys 51 | } 52 | section=$0 53 | next 54 | } 55 | /^$/ { next } # Skip blank lines 56 | /^#/ { next } # Skip comments 57 | { 58 | if ($1 in keys) { 59 | next 60 | } else { 61 | keys[$1]=$2 62 | } 63 | } 64 | END { 65 | print section 66 | for (key in keys) { 67 | if (keys[key] != "") { 68 | print key "=" keys[key] 69 | } else { 70 | print key 71 | } 72 | } 73 | } 74 | ' "$filename" > "${filename}.tmp" && mv "${filename}.tmp" "$filename" 75 | } 76 | # Creates a configuration file 77 | # 78 | # Param 1: Name of the config file, always prefixed with "config_" 79 | # Param 2: sources, additional config parameters (supports escaping) 80 | # Param 3: paths, additional config parameters (supports escaping) 81 | # Param 4: options, additional config parameters (supports escaping) 82 | # Param 5: sections, additional config parameters (supports escaping) 83 | # 84 | # NOTE! Don't forget section when using parameter 5! 85 | # 86 | function generateConfig() { 87 | cat >> "config_$1" << EOF 88 | [sources] 89 | test=content/ 90 | $(echo -e "$2") 91 | [paths] 92 | prep dir: tmp/ 93 | data dir: data/ 94 | done dir: done/ 95 | $(echo -e "$3") 96 | [options] 97 | delta manifest: yes 98 | compress: no 99 | persuasive: yes 100 | ignore overlimit: no 101 | incompressible: 102 | max keep: 0 103 | detect move: yes 104 | $(echo -e "$4") 105 | $(echo -e "$5") 106 | EOF 107 | 108 | # Ensure sections are unique 109 | merge_sections "config_$1" 110 | 111 | } 112 | 113 | function lastFolder() { 114 | F=$(ls -1t done/ | head -n1) 115 | echo "done/$F/" 116 | } 117 | 118 | function lastArchive() { 119 | T=$(ls -1rt done/ | tail -1) 120 | TT=$(ls -1rt done/$T | grep tar | grep -v par) 121 | echo "done/$T/$TT" 122 | } 123 | 124 | # Runs an iceshelf session, first checking if there is any changes. 125 | # If no changes are found, it fails 126 | # 127 | # Param 1: Name of the test 128 | # Param 2: Run --changes ? skip = no, nochange = expect no changes, change = expect changes 129 | # Param 3: Optional script (pretest() and posttest()) 130 | # Param 4: Configfile to use 131 | # Param 5: List of file remaining in compare 132 | # Param 6+ sent verbaitum to iceshelf 133 | # 134 | # Special variables that test can override: 135 | # OPT_SUCCESSRET Default zero, this is the return code expected from iceshelf. 136 | # OPT_IGNORECOMP Default false, if true skips directory comparison. 137 | # 138 | # All changes to these variables are reset at end of runTest() call 139 | # 140 | function runTest() { 141 | ERROR=true # Catch all 142 | let "COUNT+=1" 143 | printf "Test #%03d: %s\n" ${COUNT} "$1" 144 | 145 | # Create functions 146 | eval "$3" 147 | 148 | if [ "$(type -t pretest)" == "function" ]; then 149 | RESULT="$(pretest)" 150 | if [ $? -ne 0 ]; then 151 | echo "=== Pretest failed: $RESULT" 152 | return 255 153 | fi 154 | unset -f pretest 155 | fi 156 | 157 | if [ ! -f config_$4 ]; then 158 | echo "=== Config \"config_$4\" does not exist" 159 | return 255 160 | fi 161 | 162 | if [ "$2" != "skip" ]; then 163 | RESULT1="$(${ICESHELF} 2>&1 config_$4 --debug --changes)" 164 | RET=$? 165 | if [ $RET -ne 1 -a "$2" == "change" ]; then 166 | echo "=== Iceshelf didn't detect changes (was expected to detect changed)" 167 | echo "$RESULT1" 168 | return 255 169 | fi 170 | if [ $RET -ne 0 -a "$2" == "nochange" ]; then 171 | echo "=== Iceshelf detected changes (was expected to not have changes)" 172 | echo "$RESULT1" 173 | return 255 174 | fi 175 | fi 176 | 177 | RESULT2="$(${ICESHELF} 2>&1 config_$4 --debug ${@:6})" 178 | if [ $? -ne $OPT_SUCCESSRET ]; then 179 | echo "=== Iceshelf failed:" 180 | echo "$RESULT2" 181 | return 255 182 | fi 183 | 184 | # The magic part, we unpack into compare so we can diff things... 185 | ARCHIVE="$(lastArchive)" 186 | ORIGINAL="${ARCHIVE}" 187 | if [ -f "${ARCHIVE}" ]; then 188 | 189 | # See if there is parity and then check that it's ok 190 | if [ -f "${ARCHIVE}.par2" ]; then 191 | dd if=/dev/urandom of="${ARCHIVE}" seek=5 bs=1 count=5 conv=notrunc >/dev/null 2>/dev/null 192 | par2repair "${ARCHIVE}" >/dev/null 193 | if [ $? -ne 0 ]; then 194 | echo "ERROR: Parity is corrupt or insufficient, unable to repair file ${ORIGINAL}" 195 | return 255 196 | fi 197 | fi 198 | 199 | GPGERR=0 200 | rm tmp/file.tar >/dev/null 2>/dev/null 201 | rm tmp/file.tar.gpg >/dev/null 2>/dev/null 202 | if echo "$ARCHIVE" | grep -q "gpg.sig" ; then 203 | GPGOUTPUT="$(gpg -q --no-tty --batch --pinentry-mode loopback --passphrase test --output tmp/file.tar.gpg --decrypt "${ARCHIVE}" 2>&1)" 204 | GPGERR=$? 205 | ARCHIVE=tmp/file.tar.gpg 206 | fi 207 | if [ $GPGERR -ne 0 ]; then 208 | echo "ERROR: GPG was unable to process ${ORIGINAL}" 209 | echo "$GPGOUTPUT" 210 | return 255 211 | fi 212 | 213 | if echo "$ARCHIVE" | grep -q gpg ; then 214 | GPGOUTPUT="$(gpg -q --no-tty --batch --pinentry-mode loopback --passphrase test --output tmp/file.tar --decrypt "${ARCHIVE}" 2>&1)" 215 | GPGERR=$? 216 | ARCHIVE=tmp/file.tar 217 | elif echo "$ARCHIVE" | grep -q sig ; then 218 | GPGOUTPUT="$(gpg -q --no-tty --batch --pinentry-mode loopback --passphrase test --output tmp/file.tar --decrypt "${ARCHIVE}" 2>&1)" 219 | GPGERR=$? 220 | ARCHIVE=tmp/file.tar 221 | fi 222 | 223 | if [ $GPGERR -ne 0 ]; then 224 | echo "ERROR: GPG was unable to process ${ORIGINAL}" 225 | echo "$GPGOUTPUT" 226 | return 255 227 | fi 228 | 229 | if echo "$ARCHIVE" | grep -q bz2 ; then 230 | tar xfj "${ARCHIVE}" -C compare/ --overwrite 231 | else 232 | tar xf "${ARCHIVE}" -C compare/ --overwrite 233 | fi 234 | fi 235 | if [ $? -ne 0 ]; then 236 | echo "Failed decompressing ${ARCHIVE} (${ORIGINAL})" 237 | return 255 238 | fi 239 | 240 | FAILED=false 241 | if ! $OPT_IGNORECOMP; then 242 | DIFF=$(diff -r content compare/content) 243 | if [ $? -eq 0 ]; then 244 | DIFF="" 245 | fi 246 | if [ "$5" != "" ]; then 247 | if [ "${5:0:1}" == "^" ]; then 248 | if ! [[ "${DIFF}" =~ $5 ]]; then 249 | FAILED=true 250 | fi 251 | elif [ "${DIFF}" != "$5" ]; then 252 | FAILED=true 253 | fi 254 | elif [ "${DIFF}" != "" ]; then 255 | FAILED=true 256 | fi 257 | fi 258 | 259 | if $FAILED ; then 260 | echo "=== FAILED! Diff is not matching expectations for ${ORIGINAL}:" 261 | echo "'$DIFF'" 262 | echo "=== Expected:" 263 | echo "'$5'" 264 | echo "=== Iceshelf output:" 265 | echo "$RESULT2" 266 | echo "=== Contents of folder: content/" 267 | ls -laR content/ 268 | echo "=== Contents of folder: compare/content/" 269 | ls -laR compare/content/ 270 | return 255 271 | fi 272 | 273 | if [ "$(type -t posttest)" == "function" ]; then 274 | RESULT="$(posttest)" 275 | if [ $? -ne 0 ]; then 276 | echo "=== FAILED! Posttest failed:" 277 | echo "$RESULT" 278 | echo "=== Iceshelf output:" 279 | echo "$RESULT2" 280 | echo "=== Contents of folder: content/" 281 | ls -laR content/ 282 | echo "=== Contents of folder: compare/content/" 283 | ls -laR compare/content/ 284 | return 255 285 | fi 286 | unset -f posttest 287 | fi 288 | 289 | # Final step, sync content with compare 290 | rsync -avr --delete content/ compare/content/ 2>/dev/null >/dev/null 291 | ERROR=false 292 | OPT_SUCCESSRET=0 293 | OPT_IGNORECOMP=false 294 | return 0 295 | } 296 | 297 | function hasGPGconfig() { 298 | gpg --list-secret-keys 2>/dev/null | grep test@test.test >/dev/null 2>/dev/null 299 | return $? 300 | } 301 | 302 | if hash par2 ; then 303 | VARIATIONS=("normal" "parity") 304 | else 305 | echo 'Note! PAR2 configuration not detected' 306 | echo 'To enable PAR2 testing (parity support), please install par2 tools.' 307 | VARIATIONS=("normal") 308 | fi 309 | 310 | # See if user has installed the testkey 311 | if hash gpg ; then 312 | HASKEY=false 313 | if ! hasGPGconfig; then 314 | echo "Importing test-key for test usage" 315 | RESULT="$(gpg 2>&1 --no-tty --batch --pinentry-mode loopback --passphrase test --fast-import test_key.*)" 316 | RESULT2="$(echo "010034E91082BF022DBAF1FEA00E5EDACC9D1828:6:" | gpg 2>&1 --import-ownertrust)" 317 | if hasGPGconfig ; then 318 | HASKEY=true 319 | else 320 | echo "=== ERROR: Unable to import GPG key for testing, encryption will not be tested" 321 | echo -e "Result 1:\n$RESULT" 322 | echo -e "Result 2:\n$RESULT2" 323 | exit 255 324 | fi 325 | else 326 | HASKEY=true 327 | fi 328 | 329 | if $HASKEY ; then 330 | ADD=() 331 | for I in "${VARIATIONS[@]}"; do 332 | ADD+=("$I,encrypted" "$I,signed" "$I,encrypted,signed") 333 | done 334 | for I in "${ADD[@]}"; do 335 | VARIATIONS+=($I) 336 | done 337 | fi 338 | fi 339 | 340 | if [ "$1" == "short" ]; then 341 | echo "Running normal use-case only! NOT A COMPLETE TEST RUN!" 342 | VARIATIONS=("normal") 343 | fi 344 | 345 | # Runs through ALL the versions... 346 | ERROR=false 347 | OPT_SUCCESSRET=0 348 | OPT_IGNORECOMP=false 349 | for VARIANT in "${VARIATIONS[@]}"; do 350 | EXTRAS="[security]" 351 | if [[ "$VARIANT" == *"encrypted"* ]]; then 352 | EXTRAS="$EXTRAS\nencrypt: test@test.test\nencrypt phrase: test\n" 353 | fi 354 | if [[ "$VARIANT" == *"signed"* ]]; then 355 | EXTRAS="$EXTRAS\nsign: test@test.test\nsign phrase: test\n" 356 | fi 357 | if [[ "$VARIANT" == *"parity"* ]]; then 358 | EXTRAS="$EXTRAS\nadd parity: 5\n" 359 | fi 360 | 361 | echo "...Running suite using variation ${VARIANT}..." 362 | 363 | initialize 364 | 365 | # Param 1: Name of the config file, always prefixed with "config_" 366 | # Param 2: sources, additional config parameters (supports escaping) 367 | # Param 3: paths, additional config parameters (supports escaping) 368 | # Param 4: options, additional config parameters (supports escaping) 369 | # Param 5: sections, additional config parameters (supports escaping) 370 | # 371 | # NOTE! Don't forget section when using parameter 5! 372 | 373 | 374 | generateConfig regular '' '' '' "$EXTRAS" 375 | generateConfig prefix '' '' "prefix: prefixed-\n" "$EXTRAS" 376 | generateConfig filelist '' '' "create filelist: yes\n" "$EXTRAS" 377 | generateConfig encryptmani '' '' '' "[security]\nencrypt manifest: yes\n$EXTRAS" 378 | generateConfig changehash '' '' "change method: sha256\n" "$EXTRAS" 379 | generateConfig maxsize '' '' "max size: 1\n" "$EXTRAS" 380 | 381 | # First, make sure NO test uses the same case-number, that's an AUTO FAIL! 382 | ALL_CASES="$(ls -1 tests/ | wc --lines)" 383 | UNI_CASES="$(ls -1 tests/ | cut -c 1-3 | wc --lines)" 384 | if [ "${ALL_CASES}" != "${UNI_CASES}" ]; then 385 | echo "=== ERROR: Cannot have two cases with the same sequential number!" 386 | ls -la tests/ 387 | exit 255 388 | fi 389 | 390 | while read TESTCASE; do 391 | source "tests/$TESTCASE" 392 | if $ERROR ; then 393 | break 394 | fi 395 | done < <(ls -1 tests/) 396 | if $ERROR ; then 397 | break 398 | fi 399 | done 400 | 401 | if $ERROR ; then 402 | echo -e "\nTest failed, output directories preserved for analysis" 403 | exit 255 404 | else 405 | echo -e "\nAll tests ended successfully" 406 | cleanup 407 | exit 0 408 | fi 409 | -------------------------------------------------------------------------------- /modules/configuration.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | import sys 3 | import os.path 4 | import logging 5 | import os 6 | 7 | setting = { 8 | "encrypt": None, 9 | "encrypt-pw": None, 10 | "sign": None, 11 | "sign-pw": None, 12 | "parity": 0, 13 | "manifest": True, 14 | "use-sha": True, 15 | "sha-type": "sha1", 16 | "maxsize": 0, 17 | "prepdir": "backup/inprogress/", 18 | "datadir": "backup/metadata/", 19 | "sources": {}, 20 | "exclude": [], 21 | "persuasive": True, 22 | "compress": True, 23 | "compress-force": False, 24 | "ignore-overlimit": False, 25 | "extra-ext" : None, 26 | "donedir": "backup/done/", 27 | "maxkeep": 0, 28 | "prefix" : "", 29 | "detect-move": False, 30 | "create-paths": False, 31 | "skip-empty": False, 32 | "encrypt-manifest" : True, 33 | "create-filelist" : True, 34 | "checkupdate" : False, 35 | "custom-pre" : None, 36 | "custom-post" : None 37 | } 38 | 39 | def getVersion(): 40 | return [1,1,0] 41 | 42 | def isCompatible(version): 43 | """ 44 | Checks if the version (x.y.z) is compatible with ours 45 | The general rule is that as long as only Z changes, 46 | it remains compatible. 47 | """ 48 | try: 49 | if len(version) != 3: 50 | return False 51 | c = getVersion() 52 | return c[0] == version[0] and c[1] == version[1] and c[2] >= version[2] 53 | except: 54 | return False 55 | 56 | def parse(filename, onlysecurity=False): 57 | config = configparser.ConfigParser() 58 | 59 | # Some sane defaults 60 | sections = { 61 | "sources": {}, 62 | "paths": { 63 | "prep dir": "backup/inprogress/", 64 | "data dir": "backup/metadata/", 65 | "done dir": "backup/done/", 66 | "prefix": "", 67 | "create paths": "no" 68 | }, 69 | "options": { 70 | "max size": "0", 71 | "delta manifest": "yes", 72 | "compress": "yes", 73 | "incompressible": "", 74 | "persuasive": "no", 75 | "detect move": "no", 76 | "skip empty": "no", 77 | "ignore overlimit": "no", 78 | "change method": "sha1", 79 | "max keep": "0", 80 | "create filelist": "yes", 81 | "check update": "no" 82 | }, 83 | "custom": { 84 | "pre command": "", 85 | "post command": "" 86 | }, 87 | "security": { 88 | "encrypt": "", 89 | "sign": "", 90 | "encrypt phrase": "", 91 | "sign phrase": "", 92 | "add parity": "0", 93 | "encrypt manifest": "yes" 94 | } 95 | } 96 | 97 | # Read user settings 98 | logging.debug('Loading configuration from %s', filename) 99 | config.read(filename) 100 | 101 | # Load the defaults 102 | for section, options in sections.items(): 103 | if not config.has_section(section): 104 | config.add_section(section) 105 | for option, value in options.items(): 106 | if not config.has_option(section, option): 107 | config.set(section, option, value) 108 | 109 | # Detect deprecated glacier config 110 | if config.has_section('glacier'): 111 | logging.error('The [glacier] section is no longer supported.') 112 | logging.error('Please migrate to provider sections. See providers/glacier.md and the wiki for details.') 113 | return None 114 | 115 | # Validate the config 116 | if len(config.options("sources")) == 0 and not onlysecurity: 117 | logging.error("You don't have any sources defined") 118 | return None 119 | 120 | if config.get("security", "encrypt") != "": 121 | setting["encrypt"] = config.get("security", "encrypt") 122 | if config.get("security", "encrypt phrase") != "": 123 | setting["encrypt-pw"] = config.get("security", "encrypt phrase") 124 | if config.get("security", "sign") != "": 125 | setting["sign"] = config.get("security", "sign") 126 | if config.get("security", "sign phrase") != "": 127 | setting["sign-pw"] = config.get("security", "sign phrase") 128 | if config.get("security", "encrypt manifest").lower() not in ["yes", "no"]: 129 | logging.error("encrypt manifest has to be yes/no") 130 | return None 131 | elif config.get("security", "encrypt manifest").lower() == "no": 132 | setting["encrypt-manifest"] = False 133 | 134 | # Exit early if we don't need more than security 135 | if onlysecurity: 136 | return setting 137 | 138 | if config.get("options", "delta manifest").lower() not in ["yes", "no"]: 139 | logging.error("Delta Manifest has to be yes/no") 140 | return None 141 | elif config.get("options", "delta manifest").lower() == "no": 142 | setting["manifest"] = False 143 | 144 | if config.get("options", "create filelist").lower() not in ["yes", "no"]: 145 | logging.error("create filelist has to be yes/no") 146 | return None 147 | elif config.get("options", "create filelist").lower() == "no": 148 | setting["create-filelist"] = False 149 | 150 | if config.get("options", "persuasive").lower() not in ["yes", "no"]: 151 | logging.error("persuasive has to be yes/no") 152 | return None 153 | elif config.get("options", "persuasive").lower() == "no": 154 | setting["persuasive"] = False 155 | 156 | if config.get("options", "check update").lower() not in ["yes", "no"]: 157 | logging.error("check update has to be yes/no") 158 | return None 159 | elif config.get("options", "check update").lower() == "yes": 160 | setting["checkupdate"] = True 161 | 162 | if config.get("options", "ignore overlimit").lower() not in ["yes", "no"]: 163 | logging.error("ignore overlimit has to be yes/no") 164 | return None 165 | elif config.get("options", "ignore overlimit").lower() == "yes": 166 | setting["ignore-overlimit"] = True 167 | 168 | if config.get("options", "compress").lower() not in ["force", "yes", "no"]: 169 | logging.error("compress has to be yes/no") 170 | return None 171 | elif config.get("options", "compress").lower() == "no": 172 | setting["compress"] = False 173 | elif config.get("options", "compress").lower() == "force": 174 | setting["compress-force"] = True 175 | 176 | if config.get("options", "skip empty").lower() not in ["yes", "no"]: 177 | logging.error("skip empty has to be yes/no") 178 | return None 179 | elif config.get("options", "skip empty").lower() == "yes": 180 | setting["skip-empty"] = True 181 | 182 | if config.get("options", "change method").lower() not in [ "data", "sha1", "sha256", "sha512"]: 183 | logging.error("Change method has to be data, sha1, sha256 or sha512 (meta is deprecated)") 184 | return None 185 | else: 186 | setting["use-sha"] = True 187 | setting["sha-type"] = config.get("options", "change method").lower() 188 | if setting["sha-type"] == "data": 189 | logging.debug("Sha type was data, default to sha1") 190 | setting["sha-type"] = "sha1" 191 | logging.debug("Using sha-type: " + setting["sha-type"]) 192 | 193 | if config.get("options", "incompressible"): 194 | setting["extra-ext"] = config.get("options", "incompressible").split() 195 | 196 | if config.get("options", "max keep").isdigit(): 197 | setting["maxkeep"] = config.getint("options", "max keep") 198 | elif config.get("options", "max keep") != "": 199 | logging.error("Max keep should be a number or empty") 200 | return None 201 | 202 | if config.has_option("options", "prefix") and config.get("options", "prefix") != "": 203 | setting["prefix"] = config.get("options", "prefix") 204 | 205 | if config.get("options", "detect move").lower() not in ["yes", "no"]: 206 | logging.error("detect move has to be yes or no") 207 | elif config.get("options", "detect move").lower() == "yes": 208 | if not setting["use-sha"]: 209 | logging.error("You cannot use \"detect move\" with \"change method\" set to \"meta\"") 210 | return None 211 | setting["detect-move"] = True 212 | 213 | if config.get("options", "max size").isdigit() and config.getint("options", "max size") > 0: 214 | setting["maxsize"] = config.getint("options", "max size") 215 | elif not config.get("options", "max size").isdigit() and config.get("options", "max size") != "": 216 | unit = config.get("options", "max size").lower()[-1:] 217 | value = config.get("options", "max size")[:-1] 218 | if not value.isdigit(): 219 | logging.error("Max size has to be a number and may contain a unit suffix") 220 | return None 221 | value = int(value, 10) 222 | 223 | if unit == 'k': 224 | value *= 1024 225 | elif unit == 'm': 226 | value *= 1048576 227 | elif unit == 'g': 228 | value *= 1073741824 229 | elif unit == 't': 230 | value *= 1099511627776 231 | else: 232 | logging.error("Max size has to be a number and may contain a unit suffix") 233 | sys.exit(1) 234 | setting["maxsize"] = value 235 | 236 | if not config.get("security", "add parity").isdigit() or config.getint("security", "add parity") > 100 or config.getint("security", "add parity") < 0: 237 | logging.error("Parity ranges from 0 to 100, " + config.get("security", "add parity") + " is invalid") 238 | return None 239 | elif config.getint("security", "add parity") > 0: 240 | setting["parity"] = config.getint("security", "add parity") 241 | if setting["maxsize"] > 34359738367 or setting["maxsize"] == 0: 242 | logging.debug("max size is limited to 32GB when using parity, changing \"max size\" setting") 243 | setting["maxsize"] = 34359738367 # (actually 32GB - 1 byte) 244 | 245 | if config.get("paths", "create paths").lower() not in ["yes", "no"]: 246 | logging.error("create paths has to be yes or no") 247 | elif config.get("paths", "create paths").lower() == "yes": 248 | setting["create-paths"] = True 249 | 250 | if config.get("paths", "prep dir") == "": 251 | logging.error("Preparation dir cannot be empty") 252 | elif not os.path.isdir(config.get("paths", "prep dir")) and setting["create-paths"] == False: 253 | logging.error("Preparation dir doesn't exist") 254 | return None 255 | else: 256 | setting["prepdir"] = os.path.join(config.get("paths", "prep dir"), "iceshelf") 257 | if setting["create-paths"]: 258 | try: 259 | os.makedirs(setting["prepdir"]) 260 | except OSError as e: 261 | if e.errno != 17: 262 | logging.exception("Cannot create preparation dir") 263 | return None 264 | 265 | if config.get("paths", "data dir") == "": 266 | logging.error("Data dir cannot be empty") 267 | elif not os.path.isdir(config.get("paths", "data dir")) and setting["create-paths"] == False: 268 | logging.error("Data dir doesn't exist") 269 | return None 270 | else: 271 | setting["datadir"] = config.get("paths", "data dir") 272 | if setting["create-paths"]: 273 | try: 274 | os.makedirs(setting["datadir"]) 275 | except OSError as e: 276 | if e.errno != 17: 277 | logging.exception("Cannot create data dir") 278 | return None 279 | 280 | if config.get("paths", "done dir") == "": 281 | setting["donedir"] = None 282 | elif not os.path.isdir(config.get("paths", "done dir")) and setting["create-paths"] == False: 283 | logging.error("Done dir doesn't exist") 284 | return None 285 | elif config.get("paths", "done dir") != "": 286 | setting["donedir"] = config.get("paths", "done dir") 287 | if setting["create-paths"]: 288 | try: 289 | os.makedirs(setting["donedir"]) 290 | except OSError as e: 291 | if e.errno != 17: 292 | logging.exception("Cannot create done dir") 293 | return None 294 | 295 | # Check that all sources are either directories or files 296 | for x in config.options("sources"): 297 | if config.get("sources", x) == "": 298 | logging.error("Source " + x + " is empty") 299 | return None 300 | if not os.path.exists(config.get("sources", x)): 301 | logging.error("Source \"%s\" points to a non-existing entry \"%s\"", x, config.get("sources", x)) 302 | return None 303 | setting["sources"][x] = config.get("sources", x) 304 | 305 | # Provider options (multiple sections allowed) 306 | setting["providers"] = [] 307 | for section in config.sections(): 308 | if section.lower().startswith("provider-"): 309 | provider_cfg = {k: v for k, v in config.items(section)} 310 | if 'type' not in provider_cfg: 311 | logging.error('Provider section %s must contain a type option', section) 312 | return None 313 | setting["providers"].append(provider_cfg) 314 | 315 | 316 | # Custom options (not in-use yet 317 | if config.has_section("custom"): 318 | if config.get("custom", "pre command") != "" and not os.path.exists(config.get("custom", "pre command")): 319 | logging.error("Can't find pre-command \"%s\"" % config.get("custom", "pre command")) 320 | return None 321 | elif config.get("custom", "pre command") != "": 322 | setting["custom-pre"] = config.get("custom", "pre command") 323 | if config.get("custom", "post command") != "" and not os.path.exists(config.get("custom", "post command")): 324 | logging.error("Can't find post-command \"%s\"" % config.get("custom", "post command")) 325 | return None 326 | elif config.get("custom", "post command") != "": 327 | setting["custom-pre"] = config.get("custom", "post command") 328 | 329 | # Load exlude rules (if any) 330 | if config.has_section("exclude"): 331 | for x in config.options("exclude"): 332 | v = config.get("exclude", x).strip() 333 | if v == "" : 334 | logging.error("Exclude filter %s is empty", x) 335 | return None 336 | if v[0] == '|': 337 | logging.debug("Loading external exclusion rules from %s", v[1:]) 338 | try: 339 | with open(v[1:], "r") as f: 340 | for line in f: 341 | line = line.strip() 342 | if len(line) == 0: 343 | continue 344 | if line[0] == '|': 345 | logging.error("Cannot reference external exclusion files from an external exclusion file (%s): %s", v[1:], line) 346 | return None 347 | elif line[0] == '#': 348 | continue 349 | setting["exclude"].append(line) 350 | except: 351 | logging.exception("Error loading external exclusion file \"%s\"", v[1:]) 352 | raise 353 | else: 354 | setting["exclude"].append(v) 355 | if len(setting["exclude"]) == 0: 356 | setting["exclude"] = None 357 | 358 | # Lastly, check that required software is installed and available on the path 359 | if setting["parity"] > 0 and which("par2") is None: 360 | logging.error("To use parity, you must have par2 installed") 361 | return None 362 | if (setting["sign"] is not None or setting["encrypt"] is not None) and which("gpg") is None: 363 | logging.error("To use encryption/signature, you must have gpg installed") 364 | return None 365 | 366 | return setting 367 | 368 | # From http://stackoverflow.com/questions/377017/test-if-executable-exists-in-python 369 | # 370 | def which(program): 371 | def is_exe(fpath): 372 | return os.path.isfile(fpath) and os.access(fpath, os.X_OK) 373 | 374 | fpath, fname = os.path.split(program) 375 | if fpath: 376 | if is_exe(program): 377 | return program 378 | else: 379 | for path in os.environ["PATH"].split(os.pathsep): 380 | path = path.strip('"') 381 | exe_file = os.path.join(path, program) 382 | if is_exe(exe_file): 383 | return exe_file 384 | 385 | return None 386 | 387 | def isExcluded(f): 388 | if setting["exclude"] is None: 389 | return False 390 | 391 | fl = f.lower() 392 | for v in setting["exclude"]: 393 | ov=v 394 | invert = False 395 | fromend = False 396 | contain = False 397 | match = False 398 | lessthan = None 399 | morethan = None 400 | if v[0] != "\\": 401 | if v[0] == "!": 402 | invert = True 403 | v = v[1:] 404 | if v[0] != "\\": 405 | if v[0] == "*": 406 | fromend = True 407 | v = v[1:] 408 | elif v[0] == "?": 409 | contain = True 410 | v = v[1:] 411 | elif v[0] == "<": 412 | v = v[1:] 413 | if v.isdigit(): 414 | lessthan = int(v) 415 | else: 416 | logging.error("\"Less than\" exclude rule can only have digits") 417 | sys.exit(2) 418 | elif v[0] == ">": 419 | v = v[1:] 420 | if v.isdigit(): 421 | morethan = int(v) 422 | else: 423 | logging.error("\"More than\" exclude rule can only have digits") 424 | sys.exit(2) 425 | else: # No special filter at the start (after invert) 426 | v = v[1:] 427 | else: # No special filter at the start 428 | v = v[1:] 429 | 430 | if morethan or lessthan: 431 | # Expensive, we need to stat 432 | i = os.stat(f) 433 | if morethan is not None and i.st_size > morethan: 434 | match = True 435 | elif lessthan is not None and i.st_size < lessthan: 436 | match = True 437 | else: 438 | match = (fromend and fl.endswith(v)) or (contain and v in fl) or (fl.startswith(v)) 439 | 440 | if match: 441 | if invert: # Special case, it matches, so stop processing, but DON'T EXCLUDE IT 442 | logging.debug("Rule \"%s\" matched \"%s\", not excluded", ov, f) 443 | return False 444 | else: # Normal case, matched, so should be excluded 445 | logging.debug("Rule \"%s\" matched \"%s\", excluded", ov, f) 446 | return True 447 | return False 448 | 449 | -------------------------------------------------------------------------------- /iceshelf-restore: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # This tool will take a iceshelf backup and restore it to a 4 | # designated folder, following any directives stored in the 5 | # manifest file. 6 | # 7 | # NOTE! Will not do anything if a manifest is missing. 8 | # 9 | ############################################################################## 10 | # pylint: disable=invalid-name 11 | 12 | """Restore and validate iceshelf backups.""" 13 | 14 | import argparse 15 | import json 16 | import logging 17 | import os.path 18 | import re 19 | import sys 20 | import tarfile 21 | import gnupg 22 | from modules import configuration 23 | from modules import fileutils 24 | 25 | 26 | def valid_archive(base_dir, list_filename, corrupt_list, found_files): 27 | """ 28 | Start with validating all files and confirm existance of files, using the filelist.txt 29 | """ 30 | p = re.compile('([a-f0-9]+)\\s+([^\\s]+)') 31 | criticalerror = False 32 | archivecorrupt = False 33 | paritycount = 0 34 | del found_files[:] 35 | with open(os.path.join(base_dir, list_filename), "r", encoding='utf-8') as list_fp: 36 | for line in list_fp: 37 | res = p.match(line) 38 | if res: 39 | if os.path.exists(os.path.join(base_dir, res.group(2))): 40 | found_files.append(res.group(2)) 41 | sha = fileutils.hashFile( 42 | os.path.join(base_dir, res.group(2)), 'sha1') 43 | if sha != res.group(1): 44 | corrupt_list.append(res.group(2)) 45 | if ".json" in line: 46 | logging.error( 47 | 'Manifest is corrupt, please restore manually') 48 | criticalerror = True 49 | elif ".tar" in line: 50 | archivecorrupt = True 51 | elif ".par2" in line: 52 | logging.warning( 53 | 'Parity file "%s" is corrupt and will not be used', 54 | res.group(2)) 55 | elif ".par2" in line: 56 | paritycount += 1 57 | else: 58 | logging.error( 59 | 'File "%s" is missing from backup', 60 | res.group(2)) 61 | return False 62 | else: 63 | logging.error("filelist.txt is corrupt") 64 | return False 65 | if archivecorrupt and paritycount == 0: 66 | logging.error('Archive is corrupt and no available parity files') 67 | criticalerror = True 68 | elif archivecorrupt: 69 | logging.warning( 70 | 'Archive is corrupt, but parity is available making repair a possibility') 71 | return not criticalerror 72 | 73 | 74 | def validate_file(filepath): 75 | """Verify GPG signatures when present.""" 76 | gpg = gnupg.GPG() 77 | 78 | logging.debug('Validating "%s"', filepath) 79 | 80 | if filepath.endswith('.sig') or filepath.endswith('.asc'): 81 | try: 82 | with open(filepath, 'rb') as sig_file: 83 | verification = gpg.verify_file(sig_file) 84 | except OSError as exc: 85 | logging.error('gpg verification failed: %s', exc) 86 | return False 87 | if not verification or not verification.valid: 88 | logging.error( 89 | 'Signature verification failed (%s): %s', 90 | getattr(verification, 'status', 'unknown'), 91 | filepath) 92 | return False 93 | return True 94 | 95 | 96 | def strip_file(filename): 97 | """Strip signatures and decrypt files as needed.""" 98 | gpg = gnupg.GPG() 99 | destfile = filename 100 | 101 | logging.debug('Processing "%s"', filename) 102 | 103 | while destfile.endswith('.sig') or destfile.endswith( 104 | '.asc') or destfile.endswith('.gpg'): 105 | ext = destfile[-4:] 106 | destfile = destfile[0:-4] 107 | if destfile[-4:] == '.gpg' and ext == '.asc': 108 | destfile = destfile[0:-4] + ext 109 | result = None 110 | if os.path.exists(destfile): 111 | os.remove(destfile) 112 | with open(filename, 'rb') as src_fh: 113 | result = gpg.decrypt_file( 114 | src_fh, 115 | always_trust=True, 116 | passphrase=config.get('encrypt-pw'), 117 | output=destfile) 118 | if result is None: 119 | logging.error('Unable to decrypt (unknown reason): %s', filename) 120 | return None 121 | if result is None or not os.path.exists(destfile): 122 | logging.error( 123 | 'Unable to decrypt (%s): %s', 124 | result.status, 125 | filename) 126 | return None 127 | filename = destfile 128 | 129 | if filename != destfile: 130 | fileutils.copy(filename, destfile) 131 | 132 | return destfile 133 | 134 | 135 | def getBackupFiles(itemFromBackup): 136 | """Locate all files belonging to a backup based on a path or prefix""" 137 | backup_dir = os.path.dirname(itemFromBackup) 138 | basename = os.path.basename(itemFromBackup) 139 | 140 | if os.path.isdir(itemFromBackup): 141 | backup_dir = itemFromBackup 142 | basename = None 143 | 144 | if backup_dir == '': 145 | backup_dir = './' 146 | 147 | unfiltered = os.listdir(backup_dir) 148 | 149 | if basename and '.' in basename: 150 | basename = basename.split('.', 1)[0] 151 | elif basename: 152 | # guess first file with this prefix 153 | for entry in unfiltered: 154 | if entry.startswith(basename): 155 | basename = entry.split('.', 1)[0] 156 | break 157 | else: 158 | # No basename supplied, pick first suitable file 159 | for entry in unfiltered: 160 | if '.json' in entry or '.tar' in entry: 161 | basename = entry.split('.', 1)[0] 162 | break 163 | 164 | matched_files = [] 165 | for entry in unfiltered: 166 | if basename is None or os.path.basename( 167 | entry).startswith(basename) or entry == "filelist.txt": 168 | logging.debug('Found backup file "%s"', entry) 169 | matched_files.append(entry) 170 | 171 | return (backup_dir, matched_files) 172 | 173 | 174 | # Parse command line 175 | parser = argparse.ArgumentParser( 176 | description="Iceshelf Restore - Restores the contents of an iceshelf backup", 177 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 178 | parser.add_argument( 179 | '--logfile', 180 | metavar="FILE", 181 | help="Log to file instead of stdout") 182 | parser.add_argument( 183 | '--debug', 184 | action='store_true', 185 | default=False, 186 | help='Adds more details to the log output') 187 | parser.add_argument( 188 | '--restore', 189 | metavar="DEST", 190 | default=None, 191 | help='Extracts the backup') 192 | parser.add_argument( 193 | '--validate', 194 | action='store_true', 195 | default=False, 196 | help='Validate the backup without restoring') 197 | parser.add_argument( 198 | '--repair', 199 | action='store_true', 200 | default=False, 201 | help='Attempt to repair damaged archive using parity') 202 | parser.add_argument( 203 | '--list', 204 | action='store_true', 205 | default=False, 206 | help='List contents of backup (will not extract)') 207 | parser.add_argument( 208 | '--lastbackup', 209 | metavar='LAST', 210 | help='If set, requires the backup to be the successor of LAST') 211 | parser.add_argument( 212 | '--force', 213 | action='store_true', 214 | default=False, 215 | help='Even if manifest is missing, it will at least try to verify and repair archive') 216 | parser.add_argument( 217 | '--config', 218 | metavar="CONFIG", 219 | default=None, 220 | help="Configuration file to load (optional)") 221 | parser.add_argument( 222 | '--user', 223 | metavar='USER', 224 | default=None, 225 | help='GPG key user or ID used for decryption') 226 | parser.add_argument( 227 | '--passphrase', 228 | metavar='PW', 229 | default=None, 230 | help='Decrypt using supplied passphrase') 231 | parser.add_argument( 232 | 'backup', 233 | metavar="BACKUP", 234 | help="Path to backup prefix or file") 235 | cmdline = parser.parse_args() 236 | 237 | restore_base = cmdline.restore or "" 238 | 239 | 240 | # Delay logging mode information until logging is configured 241 | mode = [] 242 | if cmdline.validate: 243 | mode.append('validate') 244 | if cmdline.list: 245 | mode.append('list') 246 | if cmdline.repair: 247 | mode.append('repair') 248 | if cmdline.restore: 249 | mode.append('restore') 250 | 251 | # Setup logging 252 | logging.getLogger('').handlers = [] 253 | LOG_LEVEL = logging.INFO 254 | if cmdline.logfile: 255 | LOG_FORMAT = '%(asctime)s - %(levelname)s - %(message)s' 256 | else: 257 | LOG_FORMAT = '%(message)s' 258 | if cmdline.debug: 259 | LOG_LEVEL = logging.DEBUG 260 | LOG_FORMAT = '%(asctime)s - %(filename)s@%(lineno)d - %(levelname)s - %(message)s' 261 | 262 | if cmdline.logfile: 263 | logging.basicConfig( 264 | filename=cmdline.logfile, 265 | level=LOG_LEVEL, 266 | format=LOG_FORMAT) 267 | else: 268 | logging.basicConfig( 269 | stream=sys.stdout, 270 | level=LOG_LEVEL, 271 | format=LOG_FORMAT) 272 | logging.getLogger("gnupg").setLevel(logging.WARNING) 273 | logging.getLogger("shutil").setLevel(logging.WARNING) 274 | 275 | logging.info('Starting iceshelf-restore in %s mode', ', '.join(mode) or 'validate') 276 | 277 | # Make sure we have the correct gnupg module 278 | if "encrypt_file" not in dir(gnupg.GPG()): 279 | logging.error( 280 | "Current GnuPG python module does not support file encryption, " 281 | "please check FAQ section in documentation") 282 | sys.exit(255) 283 | 284 | ####################### 285 | 286 | config = {} 287 | if cmdline.config: 288 | config = configuration.parse(cmdline.config, True) 289 | if config is None: 290 | logging.error( 291 | 'Configuration is broken, please check %s', 292 | cmdline.config) 293 | sys.exit(1) 294 | else: 295 | config = {} 296 | if cmdline.passphrase: 297 | config['encrypt-pw'] = cmdline.passphrase 298 | if cmdline.user: 299 | config['encrypt'] = cmdline.user 300 | 301 | 302 | basepath, files = getBackupFiles(cmdline.backup) 303 | logging.info('Located backup in "%s"', basepath) 304 | logging.info('Detected backup files: %s', ', '.join(files)) 305 | 306 | file_manifest = None 307 | file_archive = None 308 | 309 | file_parity = None 310 | filelist = None 311 | old_filelist = False 312 | corrupt_files = [] 313 | processed_files = [] 314 | 315 | for f in files: 316 | if ".json" in f: 317 | file_manifest = f 318 | elif ".par2" in f: 319 | file_parity = f 320 | elif ".tar" in f: 321 | file_archive = f 322 | elif f.endswith(".lst"): 323 | filelist = f 324 | elif f == "filelist.txt": 325 | old_filelist = True 326 | 327 | if file_manifest is None: 328 | if cmdline.force: 329 | logging.error( 330 | "No manifest found, unable to restore. Will try to verify and repair if needed") 331 | else: 332 | logging.error( 333 | "No manifest found, unable to restore (use --force to do as much as possible)") 334 | sys.exit(1) 335 | if file_archive is None: 336 | logging.error("No archive found, unable to continue") 337 | sys.exit(1) 338 | 339 | if file_manifest is not None: 340 | logging.debug('Using manifest "%s"', file_manifest) 341 | if file_parity is not None: 342 | logging.debug("Parity is available") 343 | 344 | # If we have a filelist, use it to confirm files 345 | if filelist and not valid_archive( 346 | basepath, 347 | filelist, 348 | corrupt_files, 349 | files) and not cmdline.force: 350 | sys.exit(1) 351 | elif old_filelist: 352 | logging.warning( 353 | 'Using older "filelist.txt" instead of new format using file ending in ".lst"') 354 | if not valid_archive( 355 | basepath, 356 | "filelist.txt", 357 | corrupt_files, 358 | files) and not cmdline.force: 359 | sys.exit(1) 360 | 361 | 362 | # Strip all files except the archive (verify signatures and decrypt) 363 | # since archive might need repairs and for that we need PAR2 364 | logging.info('Validating metadata files') 365 | do_manifest = cmdline.list or cmdline.restore or cmdline.validate 366 | 367 | for f in files: 368 | if f in corrupt_files: 369 | continue 370 | if f == file_archive: 371 | continue 372 | if not validate_file(os.path.join(basepath, f)): 373 | logging.warning('Skipping "%s" due to failed validation', f) 374 | continue 375 | 376 | # Do not extract files we don't need (ie, when not extracting) 377 | if not do_manifest: 378 | continue 379 | 380 | n = strip_file(os.path.join(basepath, f)) 381 | if n is None: 382 | logging.warning('Unable to process "%s"', f) 383 | continue 384 | processed_files.append(n) 385 | if n.endswith('.json'): 386 | file_manifest = n 387 | 388 | if not file_manifest or not os.path.exists(file_manifest): 389 | logging.error('No valid manifest found, aborting') 390 | sys.exit(1) 391 | 392 | if not do_manifest: 393 | sys.exit(0) 394 | 395 | if (cmdline.restore or cmdline.repair) and file_parity is not None and len( 396 | corrupt_files) > 0: 397 | logging.info('Attempting repair of "%s"', file_archive) 398 | for f in processed_files: 399 | if f.endswith(file_archive + '.par2'): 400 | if not fileutils.repairParity(f): 401 | logging.error( 402 | "Failed to repair file, not enough parity material") 403 | sys.exit(1) 404 | else: 405 | logging.info('File was repaired successfully') 406 | break 407 | 408 | # Strip the archive 409 | if cmdline.restore: 410 | logging.info('Preparing to restore files to "%s"', cmdline.restore) 411 | logging.info('Validating archive file "%s"', file_archive) 412 | if not validate_file(os.path.join(basepath, file_archive)): 413 | logging.error('File "%s" signature does not match', file_archive) 414 | if not cmdline.force: 415 | sys.exit(1) 416 | archive = strip_file(os.path.join(basepath, file_archive)) 417 | if archive is None: 418 | logging.error('Unable to process "%s"', file_archive) 419 | sys.exit(1) 420 | 421 | 422 | if file_manifest is None: 423 | logging.info( 424 | 'This is as much as can be done. You can now manually extract the files') 425 | sys.exit(0) 426 | 427 | # And now... restore 428 | manifest = None 429 | with open(file_manifest, encoding='utf-8') as fp: 430 | manifest = json.load(fp) 431 | 432 | # If last backup is defined, check it 433 | if cmdline.lastbackup is not None: 434 | if 'lastbackup' not in manifest: 435 | logging.debug( 436 | 'This backup does not specify a previous backup ' 437 | '(made with an older version of iceshelf)') 438 | if 'lastbackup' not in manifest or manifest['lastbackup'] != cmdline.lastbackup: 439 | logging.error( 440 | 'Backup "%s" is not the successor of "%s"', 441 | os.path.basename(file_manifest)[0:-5], 442 | cmdline.lastbackup) 443 | sys.exit(1) 444 | 445 | # If available, show which backup that preceeded it 446 | if cmdline.list: 447 | if 'lastbackup' in manifest: 448 | logging.info( 449 | 'Manifest: Parent backup is "%s"', 450 | manifest['lastbackup']) 451 | else: 452 | logging.debug('Manifest: Does not contain parent reference') 453 | 454 | # Now, print the files we're changing or creating 455 | filecount = 0 456 | fileerror = 0 457 | for k in manifest['modified']: 458 | v = manifest['modified'][k] 459 | src = os.path.normpath(restore_base + k) 460 | if cmdline.list: 461 | logging.info( 462 | 'Manifest: Modified or new file "%s" in "%s"', 463 | os.path.basename(k), 464 | os.path.dirname(k)) 465 | filecount += 1 466 | 467 | # Iterate the archive and make sure we know what's in it 468 | if cmdline.restore: 469 | with tarfile.open(archive, "r:*") as tar: 470 | item = tar.next() 471 | while item is not None: 472 | if '/' + item.name not in manifest['modified']: 473 | logging.error( 474 | 'Archive contains "%s", not listed in the manifest', 475 | item.name) 476 | fileerror += 1 477 | else: 478 | manifest['modified']['/' + item.name]['found'] = True 479 | filecount -= 1 480 | item = tar.next() 481 | 482 | # Check that all files we were looking for was in the archive 483 | for k in manifest['modified']: 484 | if 'found' not in manifest['modified'][k]: 485 | logging.error('Archive is missing "%s"', k) 486 | fileerror += 1 487 | 488 | if fileerror != 0 or filecount != 0: 489 | logging.error("Archive contains errors, aborting") 490 | sys.exit(1) 491 | 492 | # Step 1: Remove any files that were deleted 493 | for f in manifest['deleted']: 494 | src = os.path.normpath(restore_base + f) 495 | if cmdline.list: 496 | logging.info('Manifest: Deleting "%s"', src) 497 | if cmdline.restore: 498 | try: 499 | os.unlink(src) 500 | except OSError as e: 501 | logging.warning('Unable to remove "%s": %s', src, e) 502 | 503 | for k in manifest['moved']: 504 | v = manifest['moved'][k] 505 | src = os.path.normpath(restore_base + v['original']) 506 | dst = os.path.normpath(restore_base + k) 507 | if cmdline.list: 508 | logging.info('Manifest: Moving "%s" to "%s"', src, dst) 509 | if cmdline.restore: 510 | try: 511 | os.rename(src, dst) 512 | except OSError as e: 513 | logging.warning('Unable to move "%s" to "%s": %s', src, dst, e) 514 | 515 | # Finally, if not a dryrun 516 | if not cmdline.restore: 517 | sys.exit(0) 518 | 519 | # Time to extract the files 520 | with tarfile.open(archive, "r:*") as tar: 521 | item = tar.next() 522 | while item is not None: 523 | target_path = os.path.normpath(restore_base + '/' + item.name) 524 | logging.info( 525 | 'Extracting "%s" to "%s"', 526 | os.path.basename(target_path), 527 | os.path.dirname(target_path)) 528 | tar.extract(item, cmdline.restore) 529 | item = tar.next() 530 | logging.info("Backup has been restored") 531 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | 341 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # iceshelf [![Build Status](https://github.com/mrworf/iceshelf/actions/workflows/python-app.yml/badge.svg)](https://github.com/mrworf/iceshelf/actions/workflows/python-app.yml) 2 | 3 | This tool allow you to backup data, it's intended to be used with services that store such data in an immutable state. This means, for backing up data which is changing on a daily basis, this isn't the tool since it would generate a lot of data. 4 | 5 | The design goal for this backup was to leverage known tools and standards, allowing recovery of data, even if you don't have access to iceshelf, a viable option. 6 | 7 | To that end, this tool uses 8 | - par2 9 | - tar 10 | - bzip2 11 | - gpg 12 | - json 13 | 14 | All of these will allow you restore backups even if you don't have this tool anymore. 15 | 16 | If used with immutable storage, then it also provides protection against ransomware. 17 | 18 | # Features 19 | 20 | - Encrypts all backups using GPG private/public key 21 | - Signs all files it uploads (tamper detection) 22 | - Can upload separate PAR2 file for parity correction (allows for a certain amount of bitrot) 23 | - Supports segmentation of upload (but not of files, yet) 24 | - Pluggable provider system supporting Glacier, S3, SFTP, SCP and local copy 25 | with the ability to upload to multiple destinations in one run 26 | - Tracks backups locally to help locate the file needed to restore 27 | - Keeps the exact directory structure of the backed up files 28 | - Provides paper-based GPG key backup/restore solution 29 | - Most features can be turned on/off and customized 30 | 31 | ## Backup providers 32 | 33 | Define one or more `provider` sections in the configuration file. Each section 34 | specifies a `type` and any provider-specific arguments. All configured 35 | providers will receive the generated archive for storage. The legacy `[glacier]` 36 | section has been removed; using it will now cause the tool to abort so that you 37 | review the new provider documentation. 38 | 39 | Provider sections must be named using the pattern `[provider-]` where the 40 | portion after `provider-` is arbitrary. The name only helps you identify the 41 | section. A minimal configuration might look like: 42 | 43 | ``` 44 | [provider-local] 45 | type: cp 46 | dest: backup/done/ 47 | 48 | [provider-cloud] 49 | type: s3 50 | bucket: mybucket 51 | ``` 52 | 53 | Refer to `providers/*.md` for the options available to each provider type. 54 | 55 | #### Migrating from older versions 56 | 57 | Older configurations used a dedicated `[glacier]` section. This section has been 58 | removed. Replace it with a provider block: 59 | 60 | ``` 61 | [provider-glacier] 62 | type: glacier 63 | vault: myvault 64 | threads: 4 65 | ``` 66 | 67 | Remove the old `[glacier]` section to avoid startup errors. 68 | 69 | Due to the need to work well with immutable storage (for example, AWS Glacier), any change to a file will cause it to reupload the same file with the new content. For this reason, this tool isn't recommended to use with data sources which change frequently as it will produce a tremendous amount of data over time. 70 | 71 | This is an archiving solution for long-term storage which is what Glacier excels 72 | at. Also the reason it's called iceshelf. To quote from wikipedia: 73 | 74 | > An ice shelf is a thick floating platform of ice that forms where a glacier or ice sheet flows down to a coastline and onto the ocean surface 75 | 76 | *and yes, this would probably mean that time runs in reverse, but bear with 77 | me, finding cool names (phun intended) for projects is not always easy* 78 | 79 | # How does it all work? 80 | 81 | 1. Loads backup database if available 82 | 2. Empties prep directory of any files 83 | 3. Creates a tar file (recreating directory structure) until no more are found or limit is hit. If this wasn't the first run, only new or changed files are added 84 | 4. Depending on options, tar file is compressed with bzip2 85 | 5. The archive is encrypted with a public key of your choice 86 | 6. The archive is signed with a public key of your choice (not necessarily the same as in #6) 87 | 7. A manifest of all files in the archive + checksums is stored as a JSON file 88 | 8. The manifest is signed (using ASCII instead of binary to keep it readable) 89 | 9. Parity file(s) are created to allow the archive to be restored should bitrot happen 90 | 10. Filelist with checksums is created 91 | 11. All extra files (filelist, parity, etc) files are signed 92 | 12. Resulting files are uploaded using the configured providers (remote uploads may take a while) 93 | 13. Backup is copied to safe keeping (if done directory is specified) 94 | 14. Prep directory is emptied 95 | 15. New backup is added to local database 96 | 16. Local database is saved as JSON 97 | 98 | A lot of things here can be customized, but in a nutshell, this is what the tool does with all the bells and whistles enabled. 99 | 100 | All filenames generated by the tool are based on date and time (YYYYMMDD-HHMMSS-xxxxx, time is in UTC), which helps you figure out where data might hide if you need to find it and have lost the original local database. Also allows you to restore files in the *correct* order (since the tool may have more than one copy of the same file, see `--modified`). 101 | 102 | If you have the local database, you find that each file also points out which archive it belongs to. When a file is modified, it adds a new memberof entry. By sorting the backups field you can easily find the latest backup. Same applies to the an individual file, by sorting the memberof field you can find the latest version (or an old one). 103 | 104 | # Disclaimer 105 | 106 | I use this backup tool myself to safely keep a backup of my family's private email (I run the server so it seemed prudent). It's also used for all our photos and homevideos, not to mention all scanned documents (see LagerDox, another pet project on github). 107 | 108 | **BUT!**
109 | If you loose any content as a result of using this tool (directly or indirectly) you cannot hold me responsible for your loss or damage. 110 | 111 | There, I said it. Enough with disclaimers now :-) 112 | 113 | ## Requirements 114 | 115 | In order to be able to run this, you need a few other parts installed. 116 | 117 | - OpenPGP / GNU Privacy Guard (typically referred to as `gpg`) 118 | - python-gnupg - Encryption & Signature (NOT `gnupg`, it's `python-gnupg`) 119 | Ubuntu comes with a version, but unfortunately it's too old. You should install this using the `pip3` tool to make sure you get a current version. 120 | - par2 - Parity tool 121 | - aws - In order to upload archives to AWS services such as S3 or Glacier 122 | 123 | ### Installing on Ubuntu 124 | 125 | This is the simple version which points out what commands to run. Please consider reading through before running since it will install things (such as pip) in a manner which you might not agree with. It is based on what and how I installed the requirements on a Ubuntu LTS 14 release. 126 | 127 | 1. GPG 128 | Easy enough, ubuntu comes with it pre-installed 129 | 2. GnuPG (requires pip3) 130 | ``` 131 | sudo apt-get install python3-dev 132 | sudo apt-get install python3-pip 133 | sudo -H pip3 install python-gnupg 134 | ``` 135 | 136 | 3. PAR2 for parity 137 | ``` 138 | sudo apt-get install par2 139 | ``` 140 | 141 | 4. AWS CLI 142 | Install the `aws` command by following the official instructions: 143 | 144 | 145 | For more details, see the [step-by-step guide](https://github.com/mrworf/iceshelf/wiki) in the wiki. 146 | 147 | ## Configuration file 148 | 149 | Iceshelf requires a config file to work. You may name it whatever you want and it may exist wherever you want. The important part is that you point it out to the tool. 150 | 151 | Here's what it all does... 152 | 153 | ### Section [sources] 154 | 155 | Contains all the directories you wish to backup. Can also be individual files. You define each source by name=path/file, for example: 156 | 157 | ``` 158 | my home movies=/mnt/storage/homemovies 159 | my little file=/mnt/documents/birthcertificate.pdf 160 | ``` 161 | 162 | *default is... no defined source* 163 | 164 | ### Section [paths] 165 | 166 | Iceshelf needs some space for both temporary files and the local database. 167 | 168 | #### prep dir 169 | 170 | The folder to hold the temporary files, like the in-transit tar files and related files, so a ram-backed storage (such as tmpfs) is a **VERY BAD IDEA**. Uploads to remote storage may take quite a while. 171 | 172 | *default is `backup/inprogress/`* 173 | 174 | #### data dir 175 | 176 | Where to store local data needed by iceshelf to function. Today that's a checksum database, tomorrow, who knows? Might be good to back up (yes, you can do that). 177 | 178 | *default is `backup/metadata/`* 179 | 180 | #### done dir 181 | 182 | Where to store the backup once it's been completed. If this is blank, no backup is stored. Also see `max keep` under `[options]` for additional configuration. By setting this option and not defining any provider sections, you can use iceshelf purely for local backups. 183 | 184 | Please note that it copies the data to the new location and only on success will it delete the original archive files. 185 | 186 | *default is `backup/done/`* 187 | 188 | #### create paths 189 | 190 | By default, iceshelf does not create the done, data or preparation directories, it leaves this responsibility to the user. However, by setting this option to yes, it will create the needed structure as described in the configuration file. 191 | 192 | *default is `no`* 193 | 194 | ### Section [options] 195 | 196 | There are quite a few options for you to play with. Unless otherwise specified, the options are toggled using `yes` or `no`. 197 | 198 | #### check update 199 | 200 | Will try to detect if there is a new version of iceshelf available and if so, print out the changes. It's done as the first operation before starting the backup. It requires you run iceshelf from its git repository and that `git` is available. If there is no new version or it's not run from the git repository, then it fails silently. 201 | 202 | *default is no, don't check for updates* 203 | 204 | #### max size 205 | 206 | Defines the maxium size of the *uncompressed* data. It will never go above this, but depending on various other options, the resulting backup files may exceed it. 207 | 208 | This option is defined in bytes, but can also be suffixed with K, M, G or T to indicate the unit. We're using true powers of 2 here, so 1K = 1024. 209 | 210 | A value of zero or simply blank (or left out) will make it unlimited (unless `add parity` is in-effect) 211 | 212 | **If the backup didn't include all files due to exceeded max size, then iceshelf will exit with code 10. By rerunning iceshelf with the same parameters it will continue where it left of. If you do this until it exits with zero, you'll have a full backup. 213 | 214 | This behavior is to allow you to segment your uploads into a specific size.** 215 | 216 | *default is blank, no limit* 217 | 218 | #### change method 219 | 220 | How to detect changes. You have a few different modes, the most common is `data`, but also `sha1` (same as data actually), `sha256` and `sha512` works. Iceshelf uses hashes of the data which is then compared to see changes. While sha1 usually is good enough, you can also specify `sha256` or `sha512` if you feel it is warranted. 221 | 222 | Note that switching between various methods will not upgrade all checksum on the next run, only files which have changes will get the new checksum to avoid unnecessary changes. 223 | 224 | *default is `data`* 225 | 226 | #### delta manifest 227 | 228 | Save a delta manifest with the archive as separate file. This is essentially a JSON file with the filenames and their checksums. Handy if you ever loose the entire local database since you can download all your manifests in order to locate the missing file. 229 | 230 | Please keep in-mind that this is a *delta* manifest, it does not contain anything but the files in this backup, there are no references to any other files from previous backups. 231 | 232 | *default is `yes`* 233 | 234 | #### compress 235 | 236 | Controlling compression, this option can be `yes`, `no`, `force`. While `no` is obvious (never compress), `yes` is somewhat more clever. It will calculate how many of the files included in the backup are considered compressible (see `incompressible` as well) and engage compression if 20% or more is considered compressible. 237 | 238 | Now, `force` is probably more obvious, but we cover it anyway for completeness. It essentially overrides the logic of `yes` and compresses regardless of the content. 239 | 240 | *default is `yes`* 241 | 242 | #### persuasive 243 | 244 | While a fun name for an option, it essentially says that even if the next file won't fit within the max size limits, it should continue and see if any other file fits. This is to try and make sure that all archives are of equal size. If no, it will abort the moment a it gets to a file which won't fit the envelope. 245 | 246 | *default is `yes`* 247 | 248 | #### ignore overlimit 249 | 250 | If `yes`, this will make iceshelf return a success code once all files are backed up, even if it has skipped files that are larger than the max size. So if you have 10 files and one is larger than max size, then 9 files will be backed up and it will still return OK (exit code 0), without this option, it would have failed and had a non-zero exit code. 251 | 252 | *default is `no`* 253 | 254 | #### incompressible 255 | 256 | Using this option, you can add additional file extensions which will be considered incompressible by the built-in logic. 257 | 258 | *default is blank, relying only on the built-in list* 259 | 260 | #### max keep 261 | 262 | Defines how many backups to keep in the `done dir` folder. If it's zero or blank, there's no limit. Anything else defines the number of backups to keep. It's based on FIFO, oldest backup gets deleted first. This option is pointless without defining a `done dir`. 263 | 264 | *default is zero, unlimited storage* 265 | 266 | #### prefix 267 | 268 | Optional setting, allows you to add the selected prefix to all files produced by the tool. If not set, then no prefix is added. 269 | 270 | *default is no prefix* 271 | 272 | #### detect move 273 | 274 | This is an *experimental* feature which tries to detect when you've just moved a file or renamed it. It will only log the change to the JSON manifest and will not upload the file, since it's the same file. 275 | 276 | It's a very new feature and should be used with caution. It will track what backup the original file was in and what the name was, so it should be able to provide details for restore of moved files, but it's not 100% tested. 277 | 278 | *default is `no`* 279 | 280 | #### create filelist 281 | 282 | Adds an additional file, called `filelist.txt` which is a shasum compatible file which details the hash of each file in the backup (the produced backup files, not the backed up files) as well as their corresponding sha1 which can be checked with shasum, like so `shasum -c filelist.txt`. This is to tell you what files belong to the backup. It's used by iceshelf-restore. File will also be signed if signature is enabled (see security). 283 | 284 | *default is `yes`* 285 | 286 | ### Section [exclude] 287 | 288 | This is an optional section, by default iceshelf will backup every file it finds in the source. But sometimes that's not always appreciated. This section allows you to define some exclusion rules. 289 | 290 | You define rules the same way you do sources, by name=rule, for example: 291 | 292 | ``` 293 | no zip files=*.zip 294 | no cache=/home/user/cache 295 | ... 296 | ``` 297 | 298 | In the simplest form, the rule is simply a definition of what the filename (including path) is starting with. If this matches, it's excluded. All rules are CaSe-InSeNsItIvE. 299 | 300 | #### Prefixes 301 | 302 | You can however make it more complex by using prefixes. By prefixing the rule with a star (*) the rule will match starting from the end. By prefixing with a questionmark (?) the rule will match any file containing the rule. Finally you can also use less-than or more-than (< or >) followed by a size to exclude by size only. 303 | 304 | But wait, there's more. You can on top of these prefixes add an additional prefix (a pre-prefix) in the shape of an exclamationmark. This will *invert* the rule and make it inclusive instead. 305 | 306 | Why would you want to do this? 307 | 308 | Consider the following: 309 | ``` 310 | [exclude] 311 | alldocs=!*.doc 312 | no odd dirs=/some/odd/dir/ 313 | ``` 314 | 315 | In a structure like this: 316 | 317 | ``` 318 | /some/ 319 | /some/data.txt 320 | /some/todo.doc 321 | /some/odd/dir/ 322 | /some/odd/dir/moredata.txt 323 | /some/odd/dir/readme.doc 324 | ``` 325 | 326 | It will backup the following: 327 | 328 | ``` 329 | /some/data.txt 330 | /some/todo.doc 331 | /some/odd/dir/readme.doc 332 | ``` 333 | 334 | Notice how it snagged a file from inside an excluded folder? Pretty convenient. However, in order for this to work, you must consider the order of the rules. If you change the order to: 335 | 336 | ``` 337 | [exclude] 338 | no odd dirs=/some/odd/dir/ 339 | alldocs=!*.doc 340 | ``` 341 | 342 | The `no odd dirs` would trigger first and the second rule would never get a chance to be evaluated. If you're having issues with the rules, consider running iceshelf with `--changes` and `--debug` to see what it's doing. 343 | 344 | Finally, you can also reference external files containing exclusion rules. This makes it easy to use readymade rules for various items you'd like to backup. Including a external rule file is done by prefixing the filename with a pipe ```|``` character. For example, to include "my-rules.excl", you'd write the following: 345 | 346 | ``` 347 | [exclude] 348 | my rules=|/some/path/my-rules.excl 349 | ``` 350 | 351 | What essentially happens is that the "my rules" line is replaced with all the rules defined inside my-rules.excl. The only restriction of the external rules reference is that you are not able to reference other external rule files from an external rule file (yes, no recursion for you). 352 | 353 | ### Section [provider-*] 354 | 355 | Providers control where your backups are stored. Create one or more sections with 356 | names beginning with `provider-`. Each section must define a `type` matching one 357 | of the built‑in providers (cp, sftp, scp, s3 or glacier) and any additional 358 | options documented in `providers/*.md`. 359 | 360 | Example: 361 | 362 | ``` 363 | [provider-local] 364 | type: cp 365 | dest: /mnt/backup/ 366 | create: yes 367 | ``` 368 | 369 | All provider sections are processed in order and the backup files will be 370 | uploaded to each destination. 371 | 372 | ### Section [security] 373 | 374 | From here you can control everything which relates to security of the content and the parity controls. Make sure you have GPG installed or this will not function properly. 375 | 376 | #### encrypt 377 | 378 | Specifies the GPG key to use for encryption. Usually an email address. This option can be used independently from sign and can also use a different key. 379 | 380 | Only the archive file is encrypted. 381 | 382 | *default is blank* 383 | 384 | #### encrypt phrase 385 | 386 | If your encryption key needs a passphrase, this is the place you put it. 387 | 388 | *default is blank* 389 | 390 | #### sign 391 | 392 | Specifies the GPG key to use for signing files. Usually an email address. This option can be used independently from encrypt and can also use a different key. 393 | 394 | Using signature will sign *every* file associated with the archive, including the archive itself. It gives you the benefit of being able to validate the data as well as detecting if the archive has been damaged/tampered with. 395 | 396 | See `add parity` for dealing with damaged archive files. 397 | 398 | *default is blank* 399 | 400 | #### sign phrase 401 | 402 | If your signature key needs a passphrase, this is the place you put it. 403 | 404 | *default is blank* 405 | 406 | #### encrypt manifest 407 | 408 | If you're worried that the use of a manifest file (which describes the changes contained in the backup, see `delta manifest` under `options`), specifying this option will encrypt the manifest as well (using the same key as `encrypt` above). If you haven't enabled `delta manifest`, this option has no effect. 409 | 410 | *default is `yes`* 411 | 412 | #### add parity 413 | 414 | Adds a PAR2 parity file, allowing you to recover from errors in the archive, should that have happened. These files will never be encrypted, only signed if you've enabled signature. The value for this option is the percentage of errors in the archive that you wish to be able to deal with. 415 | 416 | The value ranges from 0 (off) to 100 (the whole file). 417 | 418 | Remember, if you ask for 50%, the resulting archive files *will* be roughly 50% larger. 419 | 420 | For security people, this option is acting upon the already encrypted and signed version of the archive, so even at 100%, there won't be any data which can be used to get around the encryption. 421 | 422 | There is unfortunately also a caveat with using parity. Due to a limitation of the PAR2 specification, `max size` will automatically be set to 32GB, regardless if you have set it to unlimited or >32GB. 423 | 424 | *default is zero, no parity, to avoid the 32GB limit* 425 | 426 | ## Commandline 427 | 428 | You can also provide a few options via the commandline, these are not available in the configuration file. 429 | 430 | `--changes` will show you what *would* be backed up, if you were to do it 431 | 432 | `--logfile` redirects the log output to a separate file, otherwise warning and errors are shown on the console. Enabling file logging will also enable full debugging. 433 | 434 | `--find ` will show any file and backup which contains the `` 435 | 436 | `--modified` shows files which have changed and the number of times, helpful when you want to find what you need to exclude from your backup (such as index files, cache, etc) 437 | 438 | `--show ` lists all files components which makes up a particular backup. This is refering to the archive file, manifest, etc. Not the contents of the actual backup. Helpful when you need to retreive a backup and you want to know all the files. 439 | 440 | `--full` forces a complete backup, foregoing the incremential logic. 441 | 442 | `--list files` shows the current state of your backup, as iceshelf knows it 443 | 444 | `--list members` shows the files that are a part of your backup and where to find the latest copy of that file 445 | 446 | `--list sets` shows the backups you need to retrieve to restore a complete backup (please unpack in old->new order) 447 | 448 | No matter what options you add, you *must* point out the configuration file, or you will not get any results. 449 | 450 | ## Return codes 451 | 452 | Depending on what happened during the run, iceshelf will return the following exit codes: 453 | 454 | 0 = All good, operation finished successfully 455 | 456 | 1 = Configuration issue 457 | 458 | 2 = Unable to gather all data, meaning that while creating the archive to upload, some kind of I/O related error happened. The log should give you an idea of what. Can happen when files disappear during archive creation 459 | 460 | 3 = Amount of files to backup exceed the `max size` parameter and `persuasive` wasn't enabled 461 | 462 | 10 = Backup was successful, but there are more files to backup. Happens if `persuasive` and `max size` is enabled and the amount of data exceeds `max size`. Running the tool again will gather any files which weren't backed up. Ideally you continue to run the tool until it returns 0 463 | 464 | 255 = Generic error, see log output 465 | 466 | # Retrieving backups 467 | 468 | To download archives stored in Glacier use the [iceshelf-retrieve](README.iceshelf-retrieve.md) helper. It manages Glacier jobs and verifies files automatically. You can fetch one or more backups, or use `--all` to restore everything directly from the vault inventory. 469 | 470 | # Thoughts 471 | 472 | - Better options than par2 which are open-source? 473 | - JSON is probably not going to cut-it in the future for local metadata storage 474 | 475 | # FAQ 476 | 477 | ## I keep getting "Signature not yet current" errors when uploading 478 | 479 | This is caused by your system clock being off by more than 5 minutes. It's highly recommended that you run a time synchronization daemon such as NTPd on the machine which is responsible for uploading the backup to AWS. 480 | 481 | ## When I run the tool, it says "Current GnuPG python module does not support file encryption, please check FAQ section in documentation" 482 | 483 | Unfortunately, there is both a gnupg and a python-gnupg implementation. This tool relies on the latter. If you get this error, then you've installed the `gnupg` version instead of `python-gnupg`. 484 | To fix this, please uninstall the wrong one using either the package manager or `sudo pip3 uninstall gnupg` followed by the correct one `sudo -H pip3 install python-gnupg` 485 | 486 | ## I get "Filename '<some file>' is corrupt, please rename it. Will be skipped for now" warnings 487 | 488 | This happens, in particular on Unix filesystems where you might, at one point, have stored filename information encoded in a non-UTF8 format (such as Latin1, or similar). When you then upgraded to UTF8, these files remained. Usually doing a `ls -la ` it will show up but with a questionmark where the character should be. This is because it's not compatible with UTF8. 489 | 490 | To fix it, simply rename the file and it will work as expected. 491 | 492 | ## What about the local database? 493 | 494 | Yes, it's vulnerable to tampering, bitrot and loss. But instead of constructing something to solve that locally, I would recommend you simply add an entry to the [sources] section of the config: 495 | 496 | ``` 497 | iceshelf-db=/where/i/store/the/checksum.json 498 | ``` 499 | 500 | And presto, each copy of the archive will have the previous database included. Which is fine because normally the `delta manifest` option is enabled which means that you got it all covered. 501 | 502 | If this turns out to be a major concern/issue, I'll revisit this question. 503 | 504 | ## How am I supposed to restore a full backup? 505 | 506 | Using the `--list sets` option, iceshelf will list the necessary backups you need to restore and in the order to do it. If a file was moved, the tool will display what the original name was and what the new name is supposed to be. 507 | 508 | There is also an experimental tool called [iceshelf-restore](README.iceshelf-restore.md) which you can use to more easily extract a backup. Configuration is optional; the tool can validate or restore a backup directly from the files and will attempt repairs if parity data is available. When the config is missing you may provide the GPG user with `--user` and the passphrase with `--passphrase`. 509 | 510 | ## After doing some development on the code, how will I know something didn't break? 511 | 512 | Please use the testsuite and run a complete iteration with GPG and PAR2. Also 513 | extend the suite if needed to cover any specific testcase which was previously 514 | missed. 515 | 516 | The tests rely on the `par2` and `gpg` tools being available in the PATH, so 517 | make sure they are installed before running `bash extras/testsuite/test_backup.sh` 518 | and `bash extras/testsuite/test_restore.sh`. 519 | -------------------------------------------------------------------------------- /iceshelf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Ice Shelf is an incremental backup tool designed for immutable storage. 4 | # It can encrypt data and generate parity information 5 | # to make sure that the data is secure and has some measure of protection 6 | # against data corruption. 7 | # 8 | # Each backup can therefore be restored individually at the expense of 9 | # extra storage in Glacier. 10 | # 11 | # Copyright (C) 2015 Henric Andersson (henric@sensenet.nu) 12 | # 13 | # This program is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU General Public License 15 | # as published by the Free Software Foundation; either version 2 16 | # of the License, or (at your option) any later version. 17 | # 18 | # This program is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21 | # GNU General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU General Public License 24 | # along with this program; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 26 | # 27 | ################################################################################ 28 | 29 | def create_archive(base): 30 | mode = "w" 31 | archive = base + ".tar" 32 | if config["compress"] and currentOp["filesize"] > 0: 33 | if config["compress-force"] or shouldCompress(): 34 | mode = "w|bz2" 35 | archive += ".bz2" 36 | else: 37 | logging.info( 38 | "Content is not likely to compress (%d%% chance), skipping compression.", 39 | shouldCompress()) 40 | logging.info( 41 | "Preparing content for archiving, may take quite a while depending on size") 42 | with tarfile.open(archive, mode) as tar: 43 | tar.dereference = True 44 | for k in newFiles: 45 | if k not in movedFiles: 46 | try: 47 | tar.add(k, recursive=False) 48 | logging.debug('File "%s" added' % k) 49 | except IOError as e: 50 | if e.errno == 2: 51 | logging.warning("File \"%s\" was removed after initial scan", k) 52 | else: 53 | logging.exception("Error copying file \"%s\"", k) 54 | raise 55 | return archive 56 | 57 | 58 | def create_manifest(path): 59 | tmp1 = {} 60 | tmp2 = [] 61 | for k, v in newFiles.items(): 62 | if k not in movedFiles: 63 | tmp1[k] = v 64 | for k in deletedFiles: 65 | if k not in movedFiles.values(): 66 | tmp2.append(k) 67 | manifest = { 68 | "modified": tmp1, 69 | "deleted": tmp2, 70 | "moved": movedFiles, 71 | "previousbackup": lastBackup, 72 | } 73 | with open(path, "w", encoding="utf-8") as fp: 74 | fp.write(json.dumps(manifest, ensure_ascii=False)) 75 | return path 76 | 77 | 78 | def encrypt_file(path, gpg, armor=False): 79 | with open(path, "rb") as fp: 80 | gpg.encrypt_file( 81 | fp, 82 | config["encrypt"], 83 | passphrase=config["encrypt-pw"], 84 | armor=armor, 85 | output=path + ".gpg", 86 | ) 87 | if not os.path.exists(path + ".gpg"): 88 | logging.error( 89 | "GnuPG didn't produce an encrypted file. Please make sure GnuPG is installed and running properly") 90 | return None 91 | os.remove(path) 92 | return path + ".gpg" 93 | 94 | 95 | def sign_file(path, gpg, binary=False): 96 | with open(path, "rb") as fp: 97 | gpg.sign_file( 98 | fp, 99 | keyid=config["sign"], 100 | passphrase=config["sign-pw"], 101 | binary=binary, 102 | clearsign=False, 103 | output=path + (".sig" if binary else ".asc"), 104 | ) 105 | outfile = path + (".sig" if binary else ".asc") 106 | if not os.path.exists(outfile): 107 | logging.error( 108 | "GnuPG didn't produce a signed file. Please make sure GnuPG is installed and running properly") 109 | return None 110 | os.remove(path) 111 | return outfile 112 | 113 | 114 | def add_parity(path): 115 | logging.info("Generating %d%% parity information", config['parity']) 116 | return fileutils.generateParity(path, config["parity"]) 117 | 118 | 119 | def gatherData(): 120 | base = os.path.join(config["prepdir"], config["prefix"] + config["unique"]) 121 | file_archive = None 122 | file_manifest = base + ".json" 123 | gpg = gnupg.GPG(options=['-z', '0']) 124 | 125 | havearchive = False 126 | if len(newFiles) - len(movedFiles): 127 | file_archive = create_archive(base) 128 | havearchive = True 129 | else: 130 | if len(movedFiles): 131 | logging.info("No files to save, only metadata changes, skipping archive") 132 | else: 133 | logging.info("No files to save, skipping archive") 134 | 135 | if config["manifest"]: 136 | file_manifest = create_manifest(file_manifest) 137 | 138 | if config["encrypt"] and havearchive: 139 | file_archive = encrypt_file(file_archive, gpg, armor=False) 140 | if file_archive is None: 141 | return None 142 | if config["sign"] and havearchive: 143 | file_archive = sign_file(file_archive, gpg, binary=True) 144 | if file_archive is None: 145 | return None 146 | 147 | if havearchive and config["parity"] > 0: 148 | if not add_parity(file_archive): 149 | logging.error("Unable to create PAR2 file for this archive") 150 | return None 151 | 152 | if config["encrypt"] and config["encrypt-manifest"] and config["manifest"]: 153 | file_manifest = encrypt_file(file_manifest, gpg, armor=True) 154 | if file_manifest is None: 155 | return None 156 | 157 | if config["sign"]: 158 | if config["manifest"]: 159 | file_manifest = sign_file(file_manifest, gpg) 160 | if file_manifest is None: 161 | return None 162 | if config["parity"] > 0: 163 | logging.info("Signing parity") 164 | for f in os.listdir(config["prepdir"]): 165 | if f.endswith('.par2'): 166 | f = os.path.join(config["prepdir"], f) 167 | signed = sign_file(f, gpg, binary=True) 168 | if signed is None: 169 | return None 170 | 171 | if config["create-filelist"]: 172 | file_list = base + ".lst" 173 | fileutils.generateFilelist(config["prepdir"], file_list) 174 | if config["sign"]: 175 | signed = sign_file(file_list, gpg) 176 | if signed is None: 177 | return None 178 | 179 | return os.listdir(config["prepdir"]) 180 | import logging 181 | import argparse 182 | import sys 183 | import os.path 184 | import json 185 | from datetime import datetime 186 | import time 187 | import shutil 188 | import tarfile 189 | import gnupg 190 | from subprocess import Popen, PIPE 191 | 192 | import modules.configuration as configuration 193 | import modules.fileutils as fileutils 194 | import modules.helper as helper 195 | import modules.providers as providers 196 | 197 | lastBackup = None 198 | oldMoves = {} 199 | oldFiles = {} 200 | newFiles = {} 201 | shaFiles = {} 202 | movedFiles = {} 203 | deletedFiles = {} 204 | backupSets = {} 205 | currentOp = {"filecount": 0, "filesize": 0, "compressable" : 0} 206 | oldVault = None 207 | 208 | incompressable = [ 209 | "jpg", "gif", "mkv", "avi", "mov", "mp4", 210 | "mp3", "flac", "zip", "bz2", "gz", "tgz", 211 | "7z", "aac", "rar", "vob", "m2ts", "ts", 212 | "jpeg", "psd", "png", "m4v", "m4a", "3gp", 213 | "tif", "tiff", "mts" 214 | ] 215 | 216 | """ Parse command line """ 217 | parser = argparse.ArgumentParser(description="IceShelf - An Amazon Galcier Incremental backup tool", formatter_class=argparse.ArgumentDefaultsHelpFormatter) 218 | parser.add_argument('--logfile', metavar="FILE", help="Log to file instead of stdout") 219 | parser.add_argument('--debug', action='store_true', default=False, help='Adds more details to the log output') 220 | parser.add_argument('--changes', action='store_true', default=False, help="Show changes to backup set but don't do anything") 221 | parser.add_argument('--find', metavar='STRING', help='Searches the backup archive for files which contain string in name (case-insensitive)') 222 | parser.add_argument('--show', metavar='ARCHIVE', help='Shows members of a certain archive') 223 | parser.add_argument('--modified', action='store_true', default=False, help='Show all files which exists multiple times due to modifications') 224 | parser.add_argument('--full', action='store_true', default=False, help='Full backup, regardless of changes to files') 225 | parser.add_argument('--list', type=str.lower, choices=['files', 'members', 'sets'], help='List currently backed up structure') 226 | parser.add_argument('config', metavar="CONFIG", help="Which config file to load") 227 | cmdline = parser.parse_args() 228 | 229 | """ Setup logging first """ 230 | logging.getLogger('').handlers = [] 231 | loglevel=logging.INFO 232 | if cmdline.logfile: 233 | logformat=u'%(asctime)s - %(levelname)s - %(message)s' 234 | else: 235 | logformat=u'%(message)s' 236 | if cmdline.debug: 237 | loglevel=logging.DEBUG 238 | logformat=u'%(asctime)s - %(filename)s@%(lineno)d - %(levelname)s - %(message)s' 239 | 240 | # Create a logger 241 | logger = logging.getLogger() 242 | logger.setLevel(loglevel) 243 | 244 | # Create a handler for stdout 245 | stream_handler = logging.StreamHandler(sys.stdout) 246 | stream_handler.setLevel(loglevel) 247 | stream_handler.setFormatter(logging.Formatter(logformat)) 248 | logger.addHandler(stream_handler) 249 | 250 | # Create a handler for the file 251 | if cmdline.logfile: 252 | file_handler = logging.FileHandler(cmdline.logfile) 253 | file_handler.setLevel(loglevel) 254 | file_handler.setFormatter(logging.Formatter(logformat)) 255 | logger.addHandler(file_handler) 256 | 257 | logging.getLogger("gnupg").setLevel(logging.WARNING) 258 | logging.getLogger("shutil").setLevel(logging.WARNING) 259 | 260 | # Make sure we have the correct gnupg module 261 | if not "encrypt_file" in dir(gnupg.GPG()): 262 | logging.error("Current GnuPG python module does not support file encryption, please check FAQ section in documentation") 263 | sys.exit(255) 264 | 265 | ####################### 266 | 267 | def executeCommand(config, cmd): 268 | logging.debug("Executing command: " + repr(cmd)) 269 | 270 | p = Popen(cmd, stdout=PIPE, stderr=PIPE, cwd=config["prepdir"]) 271 | out, err = p.communicate() 272 | logging.debug("Output: " + repr(out)) 273 | logging.debug("Error : " + repr(err)) 274 | return p.returncode 275 | 276 | def checkNewVersion(): 277 | # First, see if there's a .git folder where we stand 278 | if not os.path.exists('.git'): 279 | return 280 | try: 281 | p = Popen(['git', 'fetch', 'origin' ], stdout=PIPE, stderr=PIPE) 282 | out, err = p.communicate() 283 | if p.returncode == 0: 284 | p = Popen(['git', 'log', '--oneline', 'master..origin/master' ], stdout=PIPE, stderr=PIPE) 285 | out, err = p.communicate() 286 | if len(out) > 0: 287 | logging.info("New version available:") 288 | for line in out.split('\n'): 289 | line = line.strip() 290 | if line != "": 291 | logging.info("+++ " + line) 292 | except Exception: 293 | logging.exception('Failed to check for new version') 294 | 295 | def shouldCompress(): 296 | if currentOp["filesize"] == 0: 297 | return 0 298 | chance = int((currentOp["compressable"] * 100) / currentOp["filesize"]) 299 | return chance >= 20 300 | 301 | def willCompress(filename): 302 | (ignore, ext) = os.path.splitext(filename) 303 | return ext[1:].lower() not in incompressable 304 | 305 | def collectFile(filename): 306 | chksum = "" 307 | info = os.stat(filename) 308 | maxsize = config["maxsize"] 309 | 310 | if maxsize > 0 and info.st_size > maxsize: 311 | logging.warn("File \"%s\" is too big (%s) to ever fit inside defined max size of %s", filename, helper.formatSize(info.st_size), helper.formatSize(config["maxsize"])) 312 | return False 313 | 314 | if maxsize > 0 and (currentOp["filesize"] + info.st_size) > maxsize and not cmdline.changes: 315 | return False 316 | 317 | chksum = fileutils.hashFile(filename, config["sha-type"], True) 318 | 319 | # Remove files from the deleted index (so we catch files which are deleted, they are the ones left behind) 320 | deletedFiles.pop(filename, None) 321 | # Store SHA for quick lookup 322 | item = shaFiles.get(chksum) 323 | if item is None: 324 | shaFiles[chksum] = [filename] 325 | else: 326 | item.append(filename) 327 | 328 | item = oldFiles.get(filename) 329 | if item is None or item["checksum"] == '' or fileutils.hashChanged(filename, item["checksum"], chksum) or cmdline.full: 330 | currentOp["filecount"] += 1 331 | currentOp["filesize"] += info.st_size 332 | if willCompress(filename): 333 | currentOp["compressable"] += info.st_size 334 | newFiles[filename] = {"checksum" : chksum, "memberof" : [config["unique"]], "deleted": []} 335 | return True 336 | 337 | def collectSources(sources): 338 | # Time to start building a list of files 339 | result = {'files':[], 'size':0} 340 | for name,path in sources.items(): 341 | logging.info("Processing \"%s\" (%s)", name, path) 342 | if os.path.isfile(path): 343 | if not configuration.isExcluded(path): 344 | if not collectFile(path): 345 | result['files'].append(path) 346 | result['size'] += os.path.getsize(path) 347 | if not config["persuasive"] and not cmdline.changes: 348 | return result 349 | else: 350 | for root, dirs, files in os.walk(path): 351 | for f in files: 352 | if not configuration.isExcluded(os.path.join(root, f)): 353 | filename = os.path.join(root, f) 354 | 355 | if filename is not None: 356 | if not collectFile(filename): 357 | result['files'].append(filename) 358 | result['size'] += os.path.getsize(path) 359 | if not config["persuasive"] and not cmdline.changes: 360 | logging.debug("Not persuasive") 361 | return result 362 | 363 | # Make this easier to test by the caller if we have zero files we skipped 364 | if len(result['files']) == 0: 365 | result = None 366 | return result 367 | 368 | 369 | 370 | ##################### 371 | 372 | config = configuration.parse(cmdline.config) 373 | if config is None: 374 | logging.error("Configuration is broken, please check %s" % cmdline.config) 375 | sys.exit(1) 376 | 377 | if cmdline.debug: 378 | logging.debug('Active config:') 379 | for k in config: 380 | logging.debug('"%s" = "%s"', k, config[k]) 381 | 382 | # Check version 383 | if config["checkupdate"]: 384 | checkNewVersion() 385 | 386 | # Also make sure any GnuPG key is available and valid 387 | if config["encrypt"] or config["sign"]: 388 | gpg = gnupg.GPG(options=['-z', '0']) # Do not use GPG compression since we use bzip2 389 | if config["encrypt"]: 390 | test = gpg.encrypt("test", config["encrypt"], passphrase=config["encrypt-pw"], armor=True) 391 | if len(str(test)) == 0: 392 | logging.error("Can't find encryption key \"%s\"" % config["encrypt"]) 393 | sys.exit(1) 394 | if config["sign"]: 395 | test = gpg.sign("test", keyid=config["sign"], passphrase=config["sign-pw"], binary=False) 396 | if len(str(test)) == 0: 397 | logging.error("Can't find sign key \"%s\"" % config["sign"]) 398 | sys.exit(1) 399 | 400 | # Add more extensions (if provided) 401 | if config["extra-ext"] is not None: 402 | incompressable += config["extra-ext"] 403 | 404 | # Prep some needed config items which we generate 405 | config["file-checksum"] = os.path.join(config["datadir"], "checksum.json") 406 | tm = datetime.utcnow() 407 | config["unique"] = "%d%02d%02d-%02d%02d%02d-%05x" % (tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second, tm.microsecond) 408 | config["archivedir"] = os.path.join(config["prepdir"], config["unique"]) 409 | 410 | # Instantiate backup providers 411 | providers_cfg = config.get("providers", []) 412 | provider_objects = [] 413 | for p_cfg in providers_cfg: 414 | try: 415 | p = providers.get_provider(p_cfg) 416 | except ValueError as e: 417 | logging.error("Provider error: %s", e) 418 | sys.exit(1) 419 | if p is None: 420 | logging.error("Invalid provider configuration for %s", p_cfg.get('type')) 421 | sys.exit(1) 422 | provider_objects.append(p) 423 | 424 | """ 425 | Load the old data, containing checksums and backup sets 426 | """ 427 | if os.path.exists(config["file-checksum"]): 428 | with open(config["file-checksum"], "rb") as fp: 429 | oldSave = json.load(fp) 430 | if configuration.isCompatible(oldSave["version"]): 431 | oldFiles = oldSave["dataset"] 432 | #deletedFiles = oldFiles.copy() 433 | for k in oldFiles: 434 | if oldFiles[k]["checksum"] != '': 435 | deletedFiles[k] = oldFiles[k] 436 | 437 | backupSets = oldSave["backups"] 438 | oldVault = oldSave["vault"] 439 | if 'moved' in oldSave: 440 | oldMoves = oldSave["moved"] 441 | if 'lastbackup' in oldSave: 442 | lastBackup = oldSave["lastbackup"] 443 | logging.info( 444 | "State loaded, last run was %s using version %s", 445 | datetime.fromtimestamp(oldSave["timestamp"]).strftime("%c"), 446 | oldSave["version"] 447 | ) 448 | else: 449 | logging.info("First run, no previous checksums") 450 | 451 | if cmdline.list: 452 | if cmdline.list == "files": 453 | logging.info("Files in current backup:") 454 | elif cmdline.list == "members": 455 | logging.info("Backups containing files in current backup:") 456 | elif cmdline.list == "sets": 457 | logging.info("Needed backup sets to restore complete backup (in this order):") 458 | 459 | # Find out which was the latest backup 460 | # Build a tree so we can sort it 461 | backuptree = [] 462 | for k,v in backupSets.items(): 463 | backuptree.append(k) 464 | backuptree.sort() 465 | 466 | filetree = [] 467 | for k,v in oldFiles.items(): 468 | if v["checksum"] != "": 469 | if cmdline.list == "members": 470 | last = sorted(v["memberof"]) 471 | if k in oldMoves: 472 | filetree.append(oldMoves[k]['reference'] + ' "' + oldMoves[k]['original'] + '" moved to "' + k + '"') 473 | else: 474 | filetree.append(last[len(last)-1] + ' "' + k + '"') 475 | elif cmdline.list == "sets": 476 | if k in oldMoves: 477 | item = oldMoves[k]['reference'] + ' "' + oldMoves[k]['original'] + '" moved to "' + k + '"' 478 | else: 479 | last = sorted(v["memberof"]) 480 | item = last[len(last)-1] 481 | if not item in filetree: 482 | filetree.append(item) 483 | else: 484 | filetree.append('"' + k + '"') 485 | 486 | filetree.sort() 487 | for b in filetree: 488 | logging.info(b) 489 | sys.exit(0) 490 | 491 | if cmdline.modified: 492 | found = 0 493 | logging.info("Searching for modified files:") 494 | for k,v in oldFiles.items(): 495 | if len(v["memberof"]) > 1: 496 | found += 1 497 | logging.info("\"%s\" modified %d times", k, len(v["memberof"])) 498 | logging.info("Found %d files (of %d) which have been modified", found, len(oldFiles)) 499 | if found: 500 | sys.exit(0) 501 | else: 502 | sys.exit(1) 503 | 504 | if cmdline.show: 505 | archive = cmdline.show.lower() 506 | if archive in backupSets: 507 | logging.info("Members of \"%s\":", archive) 508 | for f in backupSets[archive]: 509 | logging.info(" %s", f) 510 | else: 511 | logging.error("No such backup, \"%s\"", cmdline.show) 512 | sys.exit(0) 513 | 514 | if cmdline.find: 515 | logging.info("Searching for \"%s\"", cmdline.find) 516 | found = 0 517 | query = cmdline.find.lower() 518 | for k, v in oldFiles.items(): 519 | if query in k.lower(): 520 | logging.info(" \"%s\", exists in:", k) 521 | found += 1 522 | v["memberof"].sort() 523 | for x in v["memberof"]: 524 | logging.info(" %s", x) 525 | logging.info("Found %d instances", found) 526 | if found: 527 | sys.exit(0) 528 | else: 529 | sys.exit(1) 530 | 531 | 532 | logging.info("Setting up the prep directory") 533 | try: 534 | os.makedirs(config["prepdir"]) 535 | except OSError as e: 536 | if e.errno != 17: 537 | logging.exception("Error creating prep directory") 538 | raise 539 | 540 | fileutils.deleteTree(config["prepdir"]) 541 | 542 | logging.info("Checking sources for changes") 543 | missedFiles = collectSources(config['sources']) 544 | 545 | logging.debug("Processing file structure changes") 546 | 547 | """ 548 | Figure out if any file was renamed, this is easily detected since a deleted file 549 | will have a new file with the same checksum. 550 | """ 551 | if config["detect-move"]: 552 | tmpRemove = [] 553 | for k,v in deletedFiles.items(): 554 | item = shaFiles.get(v["checksum"]) 555 | if item: 556 | for f in item: 557 | if f in newFiles: 558 | # Moved! From k to f 559 | movedFiles[f] = k 560 | tmpRemove.append(k) 561 | logging.debug('''File "%s" moved to "%s"''' % (k, f)) 562 | break 563 | for k in tmpRemove: 564 | deletedFiles.pop(k, None) 565 | 566 | # When looking for changes, only provide a list of changes + summary 567 | if cmdline.changes: 568 | logging.info("Detected changes:") 569 | for k in deletedFiles: 570 | logging.info(u"\"%s\" was deleted", k) 571 | 572 | for k in newFiles: 573 | if k in movedFiles: 574 | logging.info(u"\"%s\" was renamed/moved from \"%s\"", k, movedFiles[k]) 575 | elif k not in oldFiles: 576 | logging.info(u"\"%s\" is new", k) 577 | else: 578 | logging.info(u"\"%s\" changed", k) 579 | 580 | if currentOp["filecount"] > 0 or len(deletedFiles): 581 | logging.info("===============") 582 | if len(oldFiles) == 0: 583 | logging.info("%d files (%s) to be backed up", currentOp["filecount"], helper.formatSize(currentOp["filesize"])) 584 | else: 585 | logging.info("%d files (%s) has changed or been added since last backup, %d has been deleted", currentOp["filecount"], helper.formatSize(currentOp["filesize"]), len(deletedFiles)) 586 | sys.exit(1) 587 | else: 588 | logging.info("No file(s) changed or added since last backup") 589 | sys.exit(0) 590 | 591 | if len(newFiles) == 0 and missedFiles is not None: 592 | logging.info("Done. There were files which didn't fit the maxsize limit (%d files, %s)", len(missedFiles['files']), helper.formatSize(missedFiles['size'])) 593 | if cmdline.debug: 594 | for f in missedFiles['files']: 595 | logging.debug('Ignored: "%s"', f) 596 | if config["ignore-overlimit"]: 597 | logging.info('Since ignore overlimit was set, this is still considered a success') 598 | sys.exit(0) 599 | else: 600 | logging.error("Cannot continue since there are files bigger than maxsize") 601 | sys.exit(3) 602 | 603 | # Time to compress 604 | files = gatherData() 605 | if files is None: 606 | logging.error("Failed to gather all data and compress it.") 607 | sys.exit(2) 608 | 609 | if currentOp["filecount"] == 0 and len(deletedFiles) == 0 and config["skip-empty"]: 610 | logging.info("No changes detected, skipping backup") 611 | sys.exit(0) 612 | 613 | msg = "%d files (%s) gathered" % (currentOp["filecount"], helper.formatSize(currentOp["filesize"])) 614 | if config["compress"] and (shouldCompress() or config["compress-force"]): 615 | msg += ", compressed" 616 | if config["encrypt"]: 617 | msg += ", encrypted" 618 | if config["sign"]: 619 | msg += ", signed" 620 | totalbytes = fileutils.sumSize(config["prepdir"], files) 621 | msg += " and ready to upload as %d files, total %s" % (len(files), helper.formatSize(totalbytes)) 622 | logging.info(msg) 623 | 624 | ############################################################################## 625 | # 626 | 627 | # We want to avoid wasting requests, so only try to 628 | # create vaults if we need to. 629 | file_paths = [os.path.join(config["prepdir"], f) for f in files] 630 | for p in provider_objects: 631 | backup = p.upload_files(file_paths) 632 | if not backup: 633 | logging.error("Backup provider %s failed to store files", p) 634 | sys.exit(1) 635 | 636 | # 637 | ############################################################################## 638 | 639 | # merge new files, checksums and memberships 640 | for k,v in newFiles.items(): 641 | if k in oldFiles: # Don't forget any old memberships 642 | newFiles[k]["memberof"] += oldFiles[k]["memberof"] 643 | if "deleted" in oldFiles[k]: 644 | newFiles[k]["deleted"] += oldFiles[k]["deleted"] 645 | oldFiles[k] = newFiles[k] 646 | 647 | # Get rid of this file from the moves database since we now have a fresh copy 648 | if k in oldMoves: 649 | logging.info("Removing " + k + " since it's marked as new") 650 | oldMoves.pop(k) 651 | 652 | """ 653 | Deal with deleted files. We must store all deletes as an array since user can 654 | restore the file. We also must wipe the checksum so a restored file gets backed 655 | up again. 656 | """ 657 | for f in deletedFiles: 658 | logging.debug('''File "%s" deleted''' % f) 659 | if "deleted" in oldFiles[f]: 660 | oldFiles[f]["deleted"].append(config["unique"]) 661 | else: 662 | oldFiles[f]["deleted"] = [config["unique"]] 663 | oldFiles[f]["checksum"] = "" # Wipe checksum to make sure new copy is backed up 664 | 665 | # We also need to handle the moved files properly to avoid marking a moved file as deleted 666 | # on the next run. 667 | 668 | for _new,_old in movedFiles.items(): 669 | # Note where we got this copy from 670 | if _old in oldMoves: 671 | # Just readjust this entry 672 | movedFiles[_new] = oldMoves[_old] 673 | oldMoves.pop(_old) 674 | else: 675 | # Create a new moved entry 676 | lst = sorted(oldFiles[_old]['memberof']) 677 | movedFiles[_new] = {'reference' : lst[len(lst)-1], 'original' : _old} 678 | 679 | # Clear other fields 680 | if "deleted" in oldFiles[_old]: 681 | oldFiles[_old]["deleted"].append(config["unique"]) 682 | else: 683 | oldFiles[_old]["deleted"] = [config["unique"]] 684 | oldFiles[_old]["checksum"] = "" 685 | 686 | # Finally, append any previously moved files 687 | movedFiles.update(oldMoves) 688 | 689 | # Add the backup to our sets... 690 | backupSets[config["unique"]] = files 691 | 692 | logging.info("Saving the new checksum") 693 | vault = None 694 | storage_ids = [] 695 | for p in provider_objects: 696 | try: 697 | if vault is None: 698 | pv = p.get_vault() 699 | if pv: 700 | vault = pv 701 | except Exception: 702 | logging.exception("Provider %s failed to report vault", p) 703 | try: 704 | storage_ids.append(p.storage_id()) 705 | except Exception: 706 | logging.exception("Provider %s failed to report storage location", p) 707 | 708 | saveData = { 709 | "version" : configuration.getVersion(), 710 | "timestamp" : time.time(), 711 | "dataset" : oldFiles, 712 | "backups" : backupSets, 713 | "vault" : vault, 714 | "storage" : storage_ids, 715 | "moved" : movedFiles, 716 | "lastbackup" : config["prefix"] + config["unique"] 717 | } 718 | with open(config["file-checksum"] + "_tmp", "wb") as fp: 719 | fp.write(json.dumps(saveData, ensure_ascii=False).encode("utf-8")) 720 | 721 | # Copy the new file into place and then delete the temp file 722 | try: 723 | shutil.copy(config["file-checksum"] + "_tmp", config["file-checksum"]) 724 | except OSError as e: 725 | if e.errno == 1: 726 | logging.debug("Unable to change permissons on copied file: %s" % config["file-checksum"]) 727 | else: 728 | logging.exception("Error copying file") 729 | raise 730 | 731 | try: 732 | os.remove(config["file-checksum"] + "_tmp") 733 | except OSError as e: 734 | logging.exception("Error removing temporary database") 735 | raise 736 | 737 | if config["donedir"] is not None: 738 | logging.info("Moving backed up archive into done directory") 739 | dest = os.path.join(config["donedir"], config["unique"]) 740 | os.mkdir(dest) 741 | for f in files: 742 | try: 743 | shutil.copy( 744 | os.path.join(config["prepdir"], f), 745 | os.path.join(dest, f) 746 | ) 747 | except OSError as e: 748 | if e.errno == 1: 749 | logging.debug("Unable to change permissons on copied file: %s" % dest) 750 | else: 751 | logging.exception("Error copying file") 752 | raise 753 | os.remove(os.path.join(config["prepdir"], f)) 754 | os.rmdir(config["prepdir"]) 755 | 756 | # Finally, we count the number of stored archives and delete the 757 | # older ones exceeding the defined limit. 758 | if config["maxkeep"] > 0: 759 | archives = os.listdir(config["donedir"]) 760 | archives.sort() 761 | logging.info("Told to keep %d archive(s), we have %d", config["maxkeep"], len(archives)) 762 | while len(archives) > config["maxkeep"]: 763 | folder = archives.pop(0) 764 | logging.info("Deleting \"%s\"", folder) 765 | shutil.rmtree(os.path.join(config["donedir"], folder)) 766 | 767 | if missedFiles is not None: 768 | logging.warn("Reached size limit, recommend running again after this session (skipped %d files, %s)", len(missedFiles['files']), helper.formatSize(missedFiles['size'])) 769 | if cmdline.debug: 770 | for f in missedFiles['files']: 771 | logging.debug('Skipped: "%s"', f) 772 | sys.exit(10) 773 | sys.exit(0) 774 | --------------------------------------------------------------------------------