├── .gitignore ├── .travis.yml ├── CHANGELOG.md ├── Cargo.lock ├── Cargo.toml ├── LICENSE.md ├── README.md ├── deb ├── .gitignore ├── Makefile ├── libsodium23_1.0.16-2+ubuntu16.04.1+deb.sury.org+3_amd64.deb ├── libsquash │ ├── Makefile │ └── debian │ │ ├── changelog │ │ ├── compat │ │ ├── control │ │ ├── copyright │ │ └── rules └── zvault │ ├── Makefile │ └── debian │ ├── changelog │ ├── compat │ ├── control │ ├── copyright │ ├── manpages │ └── rules ├── docs ├── Algotest.txt ├── Comparison.txt ├── comparison.md ├── design.md ├── directory_struture.md ├── excludes.default ├── logo.png ├── man │ ├── zvault-addkey.1.md │ ├── zvault-algotest.1.md │ ├── zvault-analyze.1.md │ ├── zvault-backup.1.md │ ├── zvault-bundleinfo.1.md │ ├── zvault-bundlelist.1.md │ ├── zvault-check.1.md │ ├── zvault-config.1.md │ ├── zvault-copy.1.md │ ├── zvault-diff.1.md │ ├── zvault-genkey.1.md │ ├── zvault-import.1.md │ ├── zvault-info.1.md │ ├── zvault-init.1.md │ ├── zvault-list.1.md │ ├── zvault-mount.1.md │ ├── zvault-prune.1.md │ ├── zvault-remove.1.md │ ├── zvault-restore.1.md │ ├── zvault-vacuum.1.md │ ├── zvault-versions.1.md │ └── zvault.1.md └── repository_readme.md ├── gpl-3.txt ├── lang ├── Makefile ├── de.po ├── default.pot └── excluded.po ├── rustfmt.toml ├── src ├── bundledb │ ├── cache.rs │ ├── db.rs │ ├── mod.rs │ ├── reader.rs │ ├── uploader.rs │ └── writer.rs ├── chunker.rs ├── chunking │ ├── ae.rs │ ├── benches.rs │ ├── fastcdc.rs │ ├── fixed.rs │ ├── mod.rs │ ├── rabin.rs │ └── test.rs ├── cli │ ├── algotest.rs │ ├── args.rs │ ├── logger.rs │ └── mod.rs ├── index.rs ├── main.rs ├── mount.rs ├── prelude.rs ├── repository │ ├── backup.rs │ ├── backup_file.rs │ ├── basic_io.rs │ ├── bundle_map.rs │ ├── config.rs │ ├── error.rs │ ├── info.rs │ ├── integrity.rs │ ├── layout.rs │ ├── metadata.rs │ ├── mod.rs │ ├── tarfile.rs │ └── vacuum.rs ├── translation.rs └── util │ ├── bitmap.rs │ ├── chunk.rs │ ├── cli.rs │ ├── compression.rs │ ├── encryption.rs │ ├── fs.rs │ ├── hash.rs │ ├── hex.rs │ ├── hostname.rs │ ├── lock.rs │ ├── lru_cache.rs │ ├── mod.rs │ ├── msgpack.rs │ └── statistics.rs └── test.sh /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | squash 3 | test.tar 4 | test_* 5 | restored 6 | excludes 7 | ._* 8 | .~* 9 | docs/logo 10 | lang/*.mo 11 | lang/default.pot 12 | .idea 13 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | dist: trusty 3 | addons: 4 | apt: 5 | packages: 6 | - libssl-dev 7 | - libfuse-dev 8 | install: 9 | - wget https://github.com/jedisct1/libsodium/releases/download/1.0.8/libsodium-1.0.8.tar.gz 10 | - tar xvfz libsodium-1.0.8.tar.gz 11 | - cd libsodium-1.0.8 && ./configure --prefix=$HOME/installed_libs && make && make install && cd .. 12 | - git clone https://github.com/quixdb/squash libsquash && cd libsquash && git checkout 5ea579cae2324f9e814cb3d88aa589dff312e9e2 && ./autogen.sh --prefix=$HOME/installed_libs --disable-external && make && make install && cd .. 13 | - export PKG_CONFIG_PATH=$HOME/installed_libs/lib/pkgconfig:$PKG_CONFIG_PATH 14 | - export LD_LIBRARY_PATH=$HOME/installed_libs/lib:$LD_LIBRARY_PATH 15 | cache: 16 | - cargo 17 | - ccache 18 | rust: 19 | - stable 20 | - beta 21 | - nightly 22 | matrix: 23 | allow_failures: 24 | - rust: 25 | - beta 26 | - stable 27 | script: 28 | - cargo clean 29 | - cargo build 30 | - cargo test 31 | 32 | after_success: | 33 | if [[ "$TRAVIS_RUST_VERSION" == nightly ]]; then 34 | cargo install cargo-tarpaulin 35 | cargo tarpaulin --ciserver travis-ci --coveralls $TRAVIS_JOB_ID 36 | fi 37 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | This project follows [semantic versioning](http://semver.org). 4 | 5 | 6 | ### UNRELEASED 7 | * [added] Translation infrastructure (**requires nightly rust**) 8 | * [added] Checking hashes of chunks in check --bundle-data 9 | * [added] Debian packet for libsodium23 10 | * [modified] Updated dependencies 11 | * [modified] Updated copyright date 12 | * [modified] Moved all code into one crate for easier translation 13 | * [modified] Compression ratio is now displayed in a clearer format 14 | * [fixed] Also including the first min_size bytes in hash 15 | * [fixed] Fixed some texts in manpages 16 | * [fixed] Calling strip on final binaries 17 | * [fixed] Fixed bug that caused repairs to miss some errors 18 | 19 | 20 | ### v0.4.0 (2017-07-21) 21 | * [added] Added `copy` subcommand 22 | * [added] Added support for xattrs in fuse mount 23 | * [added] Added support for block/char devices 24 | * [added] Added support for fifo files 25 | * [modified] Reformatted sources using rustfmt 26 | * [modified] Also documenting common flags in subcommands 27 | * [modified] Using repository aliases (**conversion needed**) 28 | * [modified] Remote path must be absolute 29 | * [modified] Not removing prefixes from bundle names anymore 30 | * [fixed] Fixed tarfile import 31 | 32 | 33 | ### v0.3.2 (2017-05-11) 34 | * [modified] Changed order of arguments in `addkey` to match src-dst scheme 35 | * [modified] Skip root folder on restore 36 | * [fixed] Fixed `addkey` subcommand 37 | * [fixed] Fixed reading tar files from stdin 38 | * [fixed] Fixed exporting files with long names as tar files 39 | 40 | 41 | ### v0.3.1 (2017-05-09) 42 | * [added] Derive key pairs from passwords 43 | * [modified] Added root repository to exclude list 44 | * [modified] Initializing data in index before use 45 | * [modified] Updated dependencies 46 | 47 | 48 | ### v0.3.0 (2017-04-27) 49 | * [added] Ability to read/write tar file from/to stdin/stdout 50 | * [added] Added date to bundles 51 | * [added] Option to combine small bundles 52 | * [added] Fixed chunker 53 | * [modified] Logging to stderr 54 | * [modified] Enforce deterministic bundle ordering 55 | * [modified] More info in analyze subcommand 56 | * [modified] Estimating final bundle size in order to reach it 57 | * [fixed] Only print "repairing bundles" if actually repairing bundles 58 | * [fixed] Only put mode bits of st_mode into metadata 59 | * [fixed] Only repairing backups with --repair 60 | * [fixed] Fixed vacuum 61 | * [fixed] First removing bundles, then adding new ones 62 | * [fixed] No longer clobbering broken files 63 | 64 | 65 | ### v0.2.0 (2017-04-14) 66 | * [added] Added CHANGELOG 67 | * [added] Locking local repository to avoid index corruption 68 | * [added] Storing user/group names in backups 69 | * [added] Ability to repair bundles, backups, index, bundle map and bundle cache 70 | * [added] Manpages for all subcommands 71 | * [added] Folders of backups can be listed, removed and mounted 72 | * [added] Supporting extended attributes in tar files 73 | * [modified] No longer trying to upload by rename 74 | * [modified] No longer failing restore if setting file attributes fails 75 | * [modified] Backup files must end with `.backup` (**conversion needed**) 76 | * [modified] Bundle files must end with `.bundle` 77 | * [modified] Ignoring corrupt bundles instead of failing 78 | * [fixed] Creating empty bundle cache on init to avoid warnings 79 | * [fixed] Calling sodiumoxide::init for faster algorithms and thread safety (not needed) 80 | * [fixed] Fixed a deadlock in the bundle upload code 81 | * [fixed] Also setting repository dirty on crash 82 | * [fixed] Ignoring missing backups folder 83 | * [fixed] Fixed problems with uploads from relative repository paths 84 | * [fixed] Fixed finished messages 85 | * [fixed] Fixed inode retrieval for single-file backups 86 | * [fixed] Fixed tar import 87 | 88 | 89 | ### v0.1.0 (2017-04-11) 90 | First working alpha release 91 | 92 | This release features the main functionality: 93 | * Initializing repository 94 | - Generating a key on the fly 95 | - Import existing repository 96 | * Creating backups 97 | - Partial backups 98 | - Deduplication 99 | - Compression 100 | - Encryption 101 | - From tar files 102 | - Support for file permissions, file date and extended attributes 103 | * Restoring backups 104 | - Full or subtrees 105 | - To tar files 106 | * Mounting backups or the whole repository 107 | * Removing backups 108 | - Full or only specific subtrees 109 | - By date (`prune` subcommand) 110 | * Check integrity 111 | - Repository 112 | - Bundles 113 | - Index 114 | - Backups 115 | - Inode trees 116 | * Vacuum 117 | - By ratio 118 | * Listing & Info methods 119 | - Repository info 120 | - Backup info/list 121 | - Directory list, Inode info 122 | - Bundle list and info 123 | * Utility commands 124 | - `analyze`: analyze chunk usage 125 | - Key management commands (`addkey`, `genkey`) 126 | - `algotest`: algorithm testing 127 | - `versions`: find versions of a file 128 | - `diff`: Find differences between backups 129 | - `config`: Getting and setting config options 130 | * Command line client 131 | - Powerful argument parsing 132 | - Nice colorful error messages 133 | - Progress bars 134 | - Man pages for main commands 135 | * Special functionality 136 | - Shared repositories 137 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "zvault" 3 | version = "0.5.0" 4 | authors = ["Dennis Schwerdel "] 5 | description = "Deduplicating backup tool" 6 | 7 | [profile.release] 8 | lto = true 9 | 10 | [dependencies] 11 | serde = "1.0" 12 | rmp-serde = "0.13" 13 | serde_yaml = "0.7" 14 | serde_utils = "0.6" 15 | serde_bytes = "0.10" 16 | squash-sys = "0.9" 17 | quick-error = "1.1" 18 | blake2-rfc = "0.2" 19 | murmurhash3 = "0.0.5" 20 | chrono = "0.4" 21 | clap = "^2.24" 22 | log = "0.4" 23 | byteorder = "1.0" 24 | ansi_term = "0.11" 25 | sodiumoxide = "0.0.16" 26 | libsodium-sys = "0.0.16" 27 | filetime = "0.1" 28 | regex = "0.2" 29 | fuse = "0.3" 30 | lazy_static = "1.0" 31 | rand = "0.4" 32 | tar = "0.4" 33 | xattr = "0.2" 34 | crossbeam = "0.3" 35 | pbr = "1.0" 36 | users = "0.6" 37 | time = "*" 38 | libc = "0.2" 39 | runtime-fmt = "0.3" 40 | locale_config = "^0.2.2" 41 | mmap = "0.1" 42 | 43 | [features] 44 | default = [] 45 | bench = [] 46 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # License: GPL-3 2 | 3 | zVault - Deduplicating backup solution 4 | Copyright (C) 2017-2018 Dennis Schwerdel 5 | 6 | This program is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published by 8 | the Free Software Foundation, either version 3 of the License, or 9 | (at your option) any later version. 10 | 11 | This program is distributed in the hope that it will be useful, 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | GNU General Public License for more details. 15 | 16 | [Full license text](gpl-3.txt) 17 | 18 | 19 | ## Included works 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # zVault Backup Solution 2 | 3 | [![Build Status](https://travis-ci.org/dswd/zvault.svg?branch=master)](https://travis-ci.org/dswd/zvault) 4 | [![Coverage Status](https://coveralls.io/repos/dswd/zvault/badge.svg?branch=master&service=github)](https://coveralls.io/github/dswd/zvault?branch=master) 5 | 6 | zVault is a highly efficient deduplicating backup solution that supports 7 | client-side encryption, compression and remote storage of backup data. 8 | 9 | ## Main Features 10 | 11 | ### Space efficient storage 12 | Each file is split into a number of chunks. Content-defined chunking and chunk 13 | fingerprints make sure that each chunk is only stored once. The chunking 14 | algorithm is designed so that small changes to a file only change a few chunks 15 | and leave most chunks unchanged. Multiple backups of the same data set will only 16 | take up the space of one copy. 17 | 18 | The deduplication in zVault is able to reuse existing data no matter whether a 19 | file is modified, stored again under a different name, renamed or moved to a 20 | different folder. 21 | 22 | That makes it possible to store daily backups without much overhead as backups 23 | with only small changes do not take up much space. 24 | 25 | Also multiple machines can share the same remote backup location and reuse the 26 | data of each others for deduplication. 27 | 28 | ### Performance 29 | High backup speed is a major design goal of zVault. Therefore is uses different 30 | techniques to reach extremely fast backup speeds. 31 | 32 | All used algorithms are hand-selected and optimized for speed. 33 | 34 | Unmodified files are detected by comparing them to the last backup which makes 35 | it possible to skip most of the files in regular usage. 36 | 37 | A blazingly fast memory-mapped hash table tracks the fingerprints of all known 38 | chunks so that chunks that are already in the repository can be skipped quickly. 39 | 40 | In a general use case with a Linux system and a home folder of 50 GiB, backup 41 | runs usually take between 1 and 2 minutes. 42 | 43 | ### Independent backups 44 | All backups share common data in form of chunks but are independent on a higher 45 | level. Backups can be deleted and chunks that are not used by any backup can be 46 | removed. 47 | 48 | Other backup solutions use differential backups organized in chains. This makes 49 | those backups dependent on previous backups in the chain, so that those backups 50 | can not be deleted. Also, restoring chained backups is much less efficient. 51 | 52 | ### Data encryption 53 | The backup data can be protected by modern and fast encryption methods on the 54 | client before storing it remotely. 55 | 56 | ### Compression 57 | The backup data can be compressed to save even more space than by deduplication 58 | alone. Users can choose between zlib (medium speed and compression), 59 | lz4 (very fast, lower compression), brotli (medium speed, good compression), and 60 | lzma (quite slow but amazing compression). 61 | 62 | ### Remote backup storage 63 | zVault supports off-site backups via mounted filesystems. Backups can be stored 64 | on any remote storage that can be mounted as a filesystem: 65 | - NFS 66 | - SMB / Windows shares 67 | - SSH (via sshfs) 68 | - FTP (via curlftpfs) 69 | - Google Drive (via rclone) 70 | - Amazon S3 (via rclone) 71 | - Openstack Swift / Rackspace cloud files / Memset Memstore (via rclone) 72 | - Dropbox (via rclone) 73 | - Google Cloud Storage (via rclone) 74 | - Amazon Drive (via rclone) 75 | - Microsoft OneDrive (via rclone) 76 | - Hubic (via rclone) 77 | - Backblaze B2 (via rclone) 78 | - Yandex Disk (via rclone) 79 | - ... (potentially many more) 80 | 81 | ### Backup verification 82 | For long-term storage of backups it is important to check backups regularly. 83 | zVault offers a simple way to verify the integrity of backups. 84 | 85 | ### Mount backups as filesystems 86 | Backups can be mounted as a user-space filesystem to investigate and restore 87 | their contents. Once mounted, graphical programs like file managers can be used 88 | to work on the backup data and find the needed files. 89 | 90 | 91 | ## Example scenario 92 | 93 | I am using zVault on several of my computers. Here are some numbers from my 94 | desktop PC. On this computer I am running daily backups of both the system `/` 95 | (excluding some folders like `/home`) with 12.9 GiB and the home folder `/home` 96 | with 53.6 GiB. 97 | 98 | $> zvault config :: 99 | Bundle size: 25.0 MiB 100 | Chunker: fastcdc/16 101 | Compression: brotli/3 102 | Encryption: 8678d... 103 | Hash method: blake2 104 | 105 | The backup repository uses the default configuration with encryption enabled. 106 | The repository currently contains 12 backup versions of each folder. Both 107 | folders combined currently contain over 66.5 GiB not counting changes between 108 | the different versions. 109 | 110 | $> zvault info :: 111 | Bundles: 1675 112 | Total size: 37.9 GiB 113 | Uncompressed size: 58.1 GiB 114 | Compression ratio: 65.3% 115 | Chunk count: 5580237 116 | Average chunk size: 10.9 KiB 117 | Index: 192.0 MiB, 67% full 118 | 119 | The repository info reveals that the data stored in the repository is only 120 | 58.1 GiB, so 8.4 GiB / 12.5% has been saved by deduplication. Another 20.2 GiB / 121 | 34.7% have been saved by compression. In total, 28.6 out of 66.5 GiB / 43% have 122 | been saved. 123 | 124 | The data is stored in over 5 million chunks of an average size of 10.9 KiB. The 125 | average chunk is smaller than configured because of files smaller than the chunk 126 | size. The chunks are stored in an index file which takes up 192 MiB on disk and 127 | in memory during backup runs. Additionally, 337 MiB of bundle data is stored 128 | locally to allow fast access to metadata. In total that is less than 1% of the 129 | original data. 130 | 131 | $> zvault info ::home/2017-06-19 132 | Date: Mon, 19 Jun 2017 00:00:48 +0200 133 | Source: desktop:/home 134 | Duration: 0:01:57.2 135 | Entries: 193624 files, 40651 dirs 136 | Total backup size: 53.6 GiB 137 | Modified data size: 2.4 GiB 138 | Deduplicated size: 50.8 MiB, 97.9% saved 139 | Compressed size: 8.9 MiB in 2 bundles, 82.4% saved 140 | Chunk count: 2443, avg size: 21.3 KiB 141 | 142 | This is the information on the last backup run for `/home`. The total data in 143 | that backup is 53.6 GiB of which 2.4 GiB have been detected to have changed by 144 | comparing file dates and sizes to the last backup. Of those changed files, 145 | deduplication reduced the data to 50.8 MiB and compression reduced this to 146 | 8.9 MiB. The whole backup run took less than 2 minutes. 147 | 148 | $> zvault info ::system/2017-06-19 149 | Date: Mon, 19 Jun 2017 00:00:01 +0200 150 | Source: desktop:/ 151 | Duration: 0:00:46.5 152 | Entries: 435905 files, 56257 dirs 153 | Total backup size: 12.9 GiB 154 | Modified data size: 43.1 MiB 155 | Deduplicated size: 6.8 MiB, 84.2% saved 156 | Compressed size: 1.9 MiB in 2 bundles, 72.3% saved 157 | Chunk count: 497, avg size: 14.0 KiB 158 | 159 | The information of the last backup run for `/` looks similar. Out of 12.9 GiB, 160 | deduplication and compression reduced the new data to 1.9 MiB and the backup 161 | took less than one minute. 162 | 163 | This data seems representative as other backup runs and other systems yield 164 | similar results. 165 | 166 | 167 | ### Semantic Versioning 168 | zVault sticks to the semantic versioning scheme. In its current pre-1.0 stage 169 | this has the following implications: 170 | - Even now the repository format is considered pretty stable. All future 171 | versions will be able to read the current repository format. Maybe conversions 172 | might be necessary but the backups should always be forward-compatible. 173 | - The CLI might see breaking changes but at least it is guaranteed that calls 174 | that are currently non-destructive will not become destructive in the future. 175 | Running todays commands on a future version will not cause any harm. 176 | -------------------------------------------------------------------------------- /deb/.gitignore: -------------------------------------------------------------------------------- 1 | zvault/debian/zvault 2 | zvault/zvault* 3 | zvault/man/* 4 | libsquash/src 5 | libsquash/debian/libsquash 6 | */debian/debhelper* 7 | */debian/files 8 | *.deb 9 | *.build 10 | *.changes 11 | *.debhelper* 12 | *.substvars 13 | -------------------------------------------------------------------------------- /deb/Makefile: -------------------------------------------------------------------------------- 1 | PACKAGE=zvault 2 | DEPENDENCIES=debhelper devscripts 3 | 4 | .PHONY: default 5 | default: clean build 6 | 7 | .PHONY: build 8 | build: libsquash_*.deb $(PACKAGE)_*.deb 9 | 10 | libsquash_*.deb: 11 | (cd libsquash; make clean; debuild -b -us -uc; cd ..) 12 | 13 | $(PACKAGE)_*.deb: $(PACKAGE)/man/* $(PACKAGE)/zvault 14 | (cd $(PACKAGE); make clean; debuild -b -us -uc; cd ..) 15 | 16 | .PHONY: clean 17 | clean: 18 | (cd $(PACKAGE); debuild clean; cd ..) 19 | rm -rf $(PACKAGE)_* 20 | (cd libsquash; debuild clean; cd ..) 21 | rm -rf libsqash_* 22 | 23 | $(PACKAGE)/man/*: ../docs/man/* 24 | cp ../docs/man/* $(PACKAGE)/man 25 | 26 | $(PACKAGE)/zvault: ../target/release/zvault 27 | cp ../target/release/zvault $(PACKAGE)/zvault 28 | strip -s $(PACKAGE)/zvault 29 | 30 | ../target/release/zvault: ../src/*.rs ../Cargo.toml 31 | (cd ..; cargo build --release) 32 | -------------------------------------------------------------------------------- /deb/libsodium23_1.0.16-2+ubuntu16.04.1+deb.sury.org+3_amd64.deb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dswd/zvault/98a09fea2ee86351df66e448d9e6f05927cf706e/deb/libsodium23_1.0.16-2+ubuntu16.04.1+deb.sury.org+3_amd64.deb -------------------------------------------------------------------------------- /deb/libsquash/Makefile: -------------------------------------------------------------------------------- 1 | undefine LDFLAGS 2 | 3 | build: 4 | git clone https://github.com/quixdb/squash src 5 | (cd src; git checkout 5ea579cae2324f9e814cb3d88aa589dff312e9e2) 6 | (cd src; ./autogen.sh --prefix=/usr --disable-external) 7 | make -C src 8 | 9 | clean: 10 | rm -rf src 11 | 12 | install: 13 | make -C src install DESTDIR=$(DESTDIR) 14 | -------------------------------------------------------------------------------- /deb/libsquash/debian/changelog: -------------------------------------------------------------------------------- 1 | libsquash (0.8.0) stable; urgency=medium 2 | 3 | * Initial release 4 | 5 | -- Dennis Schwerdel Tue, 04 Apr 2017 14:23:12 +0100 6 | -------------------------------------------------------------------------------- /deb/libsquash/debian/compat: -------------------------------------------------------------------------------- 1 | 9 2 | -------------------------------------------------------------------------------- /deb/libsquash/debian/control: -------------------------------------------------------------------------------- 1 | Source: libsquash 2 | Section: misc 3 | Priority: extra 4 | Maintainer: Dennis Schwerdel 5 | Build-Depends: debhelper (>= 9) 6 | Standards-Version: 3.8.3 7 | 8 | Package: libsquash 9 | Architecture: amd64 10 | Depends: ${shlibs:Depends}, ${misc:Depends} 11 | Description: Compression library 12 | -------------------------------------------------------------------------------- /deb/libsquash/debian/copyright: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dswd/zvault/98a09fea2ee86351df66e448d9e6f05927cf706e/deb/libsquash/debian/copyright -------------------------------------------------------------------------------- /deb/libsquash/debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | %: 3 | dh $@ 4 | -------------------------------------------------------------------------------- /deb/zvault/Makefile: -------------------------------------------------------------------------------- 1 | build: man/zvault.1 man/zvault-backup.1 man/zvault-check.1 man/zvault-config.1 \ 2 | man/zvault-import.1 man/zvault-info.1 man/zvault-init.1 man/zvault-list.1 \ 3 | man/zvault-mount.1 man/zvault-prune.1 man/zvault-remove.1 \ 4 | man/zvault-restore.1 man/zvault-vacuum.1 man/zvault-addkey.1 \ 5 | man/zvault-algotest.1 man/zvault-analyze.1 man/zvault-bundleinfo.1 \ 6 | man/zvault-bundlelist.1 man/zvault-diff.1 man/zvault-genkey.1 \ 7 | man/zvault-versions.1 8 | 9 | 10 | %.1: %.1.md 11 | ronn -r $< 12 | 13 | install: 14 | install -d $(DESTDIR)/usr/bin 15 | install -m 755 zvault $(DESTDIR)/usr/bin/zvault 16 | -------------------------------------------------------------------------------- /deb/zvault/debian/changelog: -------------------------------------------------------------------------------- 1 | zvault (0.4.0) stable; urgency=medium 2 | 3 | * [added] Added `copy` subcommand 4 | * [added] Added support for xattrs in fuse mount 5 | * [added] Added support for block/char devices 6 | * [added] Added support for fifo files 7 | * [modified] Reformatted sources using rustfmt 8 | * [modified] Also documenting common flags in subcommands 9 | * [modified] Using repository aliases (**conversion needed**) 10 | * [modified] Remote path must be absolute 11 | * [modified] Not removing prefixes from bundle names anymore 12 | * [fixed] Fixed tarfile import 13 | 14 | -- Dennis Schwerdel Fri, 21 Jul 2017 11:25:32 +0200 15 | 16 | zvault (0.3.2) stable; urgency=medium 17 | 18 | * [modified] Changed order of arguments in `addkey` to match src-dst scheme 19 | * [modified] Skip root folder on restore 20 | * [fixed] Fixed `addkey` subcommand 21 | * [fixed] Fixed reading tar files from stdin 22 | * [fixed] Fixed exporting files with long names as tar files 23 | 24 | -- Dennis Schwerdel Thu, 11 May 2017 10:52:51 +0200 25 | 26 | zvault (0.3.1) stable; urgency=medium 27 | 28 | * [added] Derive key pairs from passwords 29 | * [modified] Added root repository to exclude list 30 | * [modified] Initializing data in index before use 31 | * [modified] Updated dependencies 32 | 33 | -- Dennis Schwerdel Tue, 09 May 2017 09:34:22 +0200 34 | 35 | zvault (0.3.0) stable; urgency=medium 36 | 37 | * [added] Ability to read/write tar file from/to stdin/stdout 38 | * [added] Added date to bundles 39 | * [added] Option to combine small bundles 40 | * [added] Fixed chunker 41 | * [modified] Logging to stderr 42 | * [modified] Enforce deterministic bundle ordering 43 | * [modified] More info in analyze subcommand 44 | * [modified] Estimating final bundle size in order to reach it 45 | * [fixed] Only print "repairing bundles" if actually repairing bundles 46 | * [fixed] Only put mode bits of st_mode into metadata 47 | * [fixed] Only repairing backups with --repair 48 | * [fixed] Fixed vacuum 49 | * [fixed] First removing bundles, then adding new ones 50 | * [fixed] No longer clobbering broken files 51 | 52 | -- Dennis Schwerdel Thu, 27 Apr 2017 13:34:34 +0200 53 | 54 | zvault (0.2.0) stable; urgency=medium 55 | 56 | * [added] Added CHANGELOG 57 | * [added] Locking local repository to avoid index corruption 58 | * [added] Storing user/group names in backups 59 | * [added] Ability to repair bundles, backups, index, bundle map and bundle cache 60 | * [added] Manpages for all subcommands 61 | * [added] Folders of backups can be listed, removed and mounted 62 | * [added] Supporting extended attributes in tar files 63 | * [modified] No longer trying to upload by rename 64 | * [modified] No longer failing restore if setting file attributes fails 65 | * [modified] Backup files must end with `.backup` (**conversion needed**) 66 | * [modified] Bundle files must end with `.bundle` 67 | * [modified] Ingnoring corrupt bundles instead of failing 68 | * [fixed] Creating empty bundle cache on init to avoid warnings 69 | * [fixed] Calling sodiumoxide::init for faster algorithms and thread safety (not needed) 70 | * [fixed] Fixed a deadlock in the bundle upload code 71 | * [fixed] Also setting repository dirty on crash 72 | * [fixed] Ignoring missing backups folder 73 | * [fixed] Fixed problems with uploads from relative repository paths 74 | * [fixed] Fixed finished messages 75 | * [fixed] Fixed inode retrieval for single-file backups 76 | * [fixed] Fixed tar import 77 | 78 | -- Dennis Schwerdel Fri, 14 Apr 2017 09:45:20 +0200 79 | 80 | zvault (0.1.0) stable; urgency=medium 81 | 82 | * Initial release 83 | 84 | -- Dennis Schwerdel Tue, 04 Apr 2017 14:23:12 +0100 85 | -------------------------------------------------------------------------------- /deb/zvault/debian/compat: -------------------------------------------------------------------------------- 1 | 9 2 | -------------------------------------------------------------------------------- /deb/zvault/debian/control: -------------------------------------------------------------------------------- 1 | Source: zvault 2 | Section: misc 3 | Priority: extra 4 | Maintainer: Dennis Schwerdel 5 | Build-Depends: debhelper (>= 9), ruby-ronn 6 | Standards-Version: 3.8.3 7 | 8 | Package: zvault 9 | Architecture: amd64 10 | Depends: libsodium18, libsquash, ${shlibs:Depends}, ${misc:Depends} 11 | Description: Deduplicating backup solution 12 | -------------------------------------------------------------------------------- /deb/zvault/debian/copyright: -------------------------------------------------------------------------------- 1 | Upstream Author: 2 | 3 | Dennis Schwerdel 4 | 5 | Copyright: 6 | 7 | Copyright (C) 2017 Dennis Schwerdel 8 | 9 | License: 10 | 11 | This program is free software: you can redistribute it and/or modify 12 | it under the terms of the GNU General Public License as published by 13 | the Free Software Foundation, either version 3 of the License, or 14 | (at your option) any later version. 15 | 16 | This package is distributed in the hope that it will be useful, 17 | but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | GNU General Public License for more details. 20 | 21 | You should have received a copy of the GNU General Public License 22 | along with this program. If not, see . 23 | 24 | On Debian systems, the complete text of the GNU General 25 | Public License version 3 can be found in "/usr/share/common-licenses/GPL-3". 26 | 27 | The Debian packaging is: 28 | 29 | Copyright (C) 2017 Dennis Schwerdel 30 | 31 | and is licensed under the GPL version 3, see above. 32 | -------------------------------------------------------------------------------- /deb/zvault/debian/manpages: -------------------------------------------------------------------------------- 1 | man/zvault.1 2 | man/zvault-init.1 3 | man/zvault-info.1 4 | man/zvault-list.1 5 | man/zvault-prune.1 6 | man/zvault-backup.1 7 | man/zvault-check.1 8 | man/zvault-config.1 9 | man/zvault-import.1 10 | man/zvault-mount.1 11 | man/zvault-remove.1 12 | man/zvault-restore.1 13 | man/zvault-vacuum.1 14 | man/zvault-addkey.1 15 | man/zvault-algotest.1 16 | man/zvault-analyze.1 17 | man/zvault-bundleinfo.1 18 | man/zvault-bundlelist.1 19 | man/zvault-diff.1 20 | man/zvault-genkey.1 21 | man/zvault-versions.1 22 | -------------------------------------------------------------------------------- /deb/zvault/debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | %: 3 | dh $@ 4 | -------------------------------------------------------------------------------- /docs/comparison.md: -------------------------------------------------------------------------------- 1 | 2 | ## Silesia corpus 3 | 4 | | Tool | 1st run | 2nd run | Repo Size | 5 | | -------------- | -------:| -------:| ---------:| 6 | | zvault/brotli3 | 4.0s | 0.0s | 65 MiB | 7 | | zvault/brotli6 | 16.1s | 0.0s | 58 MiB | 8 | | zvault/lzma2 | 45.1s | 0.0s | 55 MiB | 9 | | attic | 12.7s | 0.3s | 68 MiB | 10 | | borg | 4.2s | 0.5s | 203 MiB | 11 | | borg/zlib | 13.2s | 0.4s | 66 MiB | 12 | | zbackup | 52.3s | 2.0s | 52 MiB | 13 | 14 | 15 | ## Ubuntu 16.04 docker image 16 | 17 | | Tool | 1st run | 2nd run | Repo Size | 18 | | -------------- | -------:| -------:| ---------:| 19 | | zvault/brotli3 | 2.0s | 0.1s | 30 MiB | 20 | | zvault/brotli6 | 7.6s | 0.1s | 25 MiB | 21 | | zvault/lzma2 | 17.6s | 0.1s | 22 MiB | 22 | | attic | 6.9s | 0.6s | 35 MiB | 23 | | borg | 3.0s | 0.9s | 83 MiB | 24 | | borg/zlib | 7.9s | 1.0s | 36 MiB | 25 | | zbackup | 17.2s | 1.0s | 24 MiB | 26 | -------------------------------------------------------------------------------- /docs/design.md: -------------------------------------------------------------------------------- 1 | % Design Document 2 | # Design Document 3 | 4 | ## Project Goals 5 | The main goal of zVault is to provide a backup solution that is both reliable 6 | and efficient. 7 | 8 | Backups should be stored in a way so that they can be restored **reliably**. A 9 | backup that can not be restored is worthless. Backups should be stored in a 10 | **robust** fashion that allows minor changes to remote backup files or losses in 11 | local cache. There should be a way to **verify the integrity** of backups. 12 | 13 | Backups should support **remote storage**. Remote backup files should be stored 14 | on a mounted remote storage (e.g. via `rclone mount`). To support this use case, 15 | remote backup files should be handled with only common file operations so that 16 | dumb remote filesystems can be supported. 17 | 18 | The backup process should be **fast**, especially in the common case where only 19 | small changes happened since the last backup. This means that zVault should be 20 | able to find an existing backup for reference and use it to detect differences. 21 | 22 | The backups should be stored in a **space-efficient and deduplicating** way, to 23 | save storage space, especially in the common case where only small changes 24 | happened since the last backup. The individual backups should be independent of 25 | each other to allow the removal of single backups based on age in a phase-out 26 | scheme. 27 | 28 | 29 | ## Backup process 30 | The main idea of zVault is to split the data into **chunks** which are stored 31 | remotely. The chunks are combined in **bundles** and compressed and encrypted as 32 | a whole to increase the compression ratio and performance. 33 | 34 | An **index** stores **hashes** of all chunks together with their bundle id and 35 | position in that bundle, so that chunks are only stored once and can be reused 36 | by later backups. The index is implemented as a memory-mapped file to maximize 37 | the backup performance. 38 | 39 | To split the data into chunks a so-called **chunker** is used. The main goal of 40 | the chunker is to create a maximal amount of same chunks when only a few changes 41 | happened in a file. This is especially tricky when bytes are inserted or deleted 42 | so that the rest of the data is shifted. The chunker uses content-dependent 43 | methods to split the data in order to handle those cases. 44 | 45 | By splitting data into chunks and storing those chunks remotely as well as in 46 | the index, any stream of data (e.g. file contents) can be represented by a list 47 | of chunk identifiers. This method is used to represent the contents of a file 48 | and store it in the file metadata. This metadata is then encoded as a data 49 | stream and again represented as a chunk list. Directories contain their children 50 | (e.g. files and other directories) by referring to their metadata as a chunk 51 | list. So finally, the whole directory tree of a backup can be represented as the 52 | chunk list of the root directory which is then stored in a separate backup file. 53 | 54 | 55 | ## Saving space 56 | The design of zVault contains multiple ways in which storage space can be saved. 57 | 58 | The most important is deduplication which makes sure that chunks are only stored 59 | once. If only few changes happened since the last backup, almost all chunks are 60 | already present in the index and do not have to be written to remote storage. 61 | Depending on how little data has changed since the last backup, this can save up 62 | to 100% of the storage space. 63 | 64 | But deduplication also works within the same backup. Depending on data, 65 | deduplication can save about 10%-20% even on new data due to repetitions in the 66 | data. 67 | 68 | If multiple systems use the same remote storage, they can benefit from backups 69 | of other machines and use their chunks for deduplication. This is especially 70 | helpful in the case of whole system backups where all systems use the same 71 | operating system. 72 | 73 | Finally zVault uses a powerfull compression that achieves about 1/3 space 74 | reduction in common cases to store the bundles. 75 | 76 | In total, a whole series of backups is often significantly smaller than the data 77 | contained in any of the individual backups. 78 | 79 | 80 | ## Vacuum process 81 | As backups are removed, some chunks become unused and could be removed to free 82 | storage space. However, as chunks are combined in bundles, they can not be 83 | removed individually and all other backups must also be checked in order to make 84 | sure the chunks are truly unused. 85 | 86 | zVault provides an analysis method that scans all backups and identifies unused 87 | chunks in bundles. The vacuum process can then be used to reclaim the space used 88 | by those chunks by rewriting the effected bundles. Since all used chunks in the 89 | bundle need to be written into new bundles and the reclaimed space depends on 90 | the amount of unused chunks, only bundles with a high ratio of unused chunks 91 | should be rewritten. 92 | -------------------------------------------------------------------------------- /docs/directory_struture.md: -------------------------------------------------------------------------------- 1 | % Repository directory structure 2 | # Repository directory structure 3 | 4 | # `/bundles` 5 | 6 | #### `/bundles/cache.dat` 7 | 8 | #### `/bundles/cache` 9 | 10 | ### `/bundles.map` 11 | 12 | ### `/index` 13 | 14 | ### `/keys` 15 | 16 | ### `/config.yaml` 17 | 18 | ### `/remote` 19 | 20 | #### `/remote/bundles` 21 | 22 | #### `/remote/backups` 23 | -------------------------------------------------------------------------------- /docs/excludes.default: -------------------------------------------------------------------------------- 1 | # Mounted locations and pseudo filesystems 2 | /cdrom 3 | lost+found 4 | /mnt 5 | /dev 6 | /sys 7 | /proc 8 | /run 9 | /snap 10 | /media 11 | 12 | # Cache data that does not need to be backed up 13 | /root/.cache 14 | /root/.zvault 15 | /home/*/.cache 16 | /home/*/.zvault 17 | /var/cache 18 | /tmp 19 | /home/**/Trash 20 | 21 | # Avoid backing up zvault remote backups 22 | remote/bundles 23 | -------------------------------------------------------------------------------- /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dswd/zvault/98a09fea2ee86351df66e448d9e6f05927cf706e/docs/logo.png -------------------------------------------------------------------------------- /docs/man/zvault-addkey.1.md: -------------------------------------------------------------------------------- 1 | zvault-addkey(1) -- Add a key pair to the repository 2 | ==================================================== 3 | 4 | ## SYNOPSIS 5 | 6 | `zvault addkey [OPTIONS] [FILE] ` 7 | 8 | 9 | ## DESCRIPTION 10 | 11 | This subcommand adds a new key pair to the repository `REPO`. 12 | 13 | If `FILE` is given, the key pair is read from the file and added to the 14 | repository. 15 | 16 | If `--generate` is set, a new key pair is generated, printed to console and 17 | added to the repository. If `--password` is also set, the key pair will be 18 | derived from the given password instead of creating a random one. 19 | 20 | If `--default` is set, encryption will be enabled (if not already) and the new 21 | key will be set as default encryption key. 22 | 23 | 24 | ## OPTIONS 25 | 26 | * `-g`, `--generate`: 27 | 28 | Generate a new key pair 29 | 30 | 31 | * `-d`, `--default`: 32 | 33 | Set the key pair as default 34 | 35 | 36 | * `-p`, `--password `: 37 | 38 | Derive the key pair from the given password instead of randomly creating it. 39 | This setting requires that `--generate` is set too. 40 | 41 | 42 | * `-q`, `--quiet`: 43 | 44 | Print less information 45 | 46 | 47 | * `-v`, `--verbose`: 48 | 49 | Print more information 50 | 51 | 52 | * `-h`, `--help`: 53 | 54 | Prints help information 55 | 56 | 57 | * `-V`, `--version`: 58 | 59 | Prints version information 60 | 61 | 62 | 63 | ## COPYRIGHT 64 | 65 | Copyright (C) 2017-2018 Dennis Schwerdel 66 | This software is licensed under GPL-3 or newer (see LICENSE.md) 67 | -------------------------------------------------------------------------------- /docs/man/zvault-algotest.1.md: -------------------------------------------------------------------------------- 1 | zvault-algotest(1) -- Test a specific algorithm combination 2 | =========================================================== 3 | 4 | ## SYNOPSIS 5 | 6 | `zvault algotest [OPTIONS] ` 7 | 8 | 9 | ## DESCRIPTION 10 | 11 | This subcommand tests a specific combination of algorithms on a given input 12 | `FILE`. 13 | 14 | The subcommand exists to help users compare and select algorithms and 15 | configuration options when creating a repository with _zvault-init(1)_ or 16 | changing its configuration via _zvault-config(1)_. 17 | 18 | The given algorithms will be used to simulate a backup run and determine the 19 | efficiency and performance of each used algorithm as well as their combination. 20 | 21 | The input file `FILE` is used as sample data during the test and should be 22 | selected to be representative for the envisioned use case. Good examples of such 23 | files can be tar files of system images or parts of a home folder. 24 | Please note, that the input file is read into memory completely in order to 25 | factor out the hard drive speed of the analysis. 26 | 27 | The options are exactly the same as for _zvault-init(1)_. 28 | 29 | 30 | ## OPTIONS 31 | 32 | * `--bundle-size `: 33 | 34 | Set the target bundle size in MiB (default: 25). 35 | Please see zvault(1) for more information on *bundle size*. 36 | 37 | 38 | * `--chunker `: 39 | 40 | Set the chunker algorithm and target chunk size (default: fastcdc/16). 41 | Please see _zvault(1)_ for more information on *chunkers* and possible 42 | values. 43 | 44 | 45 | * `-c`, `--compression `: 46 | 47 | Set the compression method and level (default: brotli/3). 48 | Please see _zvault(1)_ for more information on *compression* and possible 49 | values. 50 | 51 | 52 | * `-e`, `--encrypt`: 53 | 54 | Generate a keypair and enable encryption. 55 | Please see _zvault(1)_ for more information on *encryption*. 56 | 57 | 58 | * `--hash `: 59 | 60 | Set the hash method (default: blake2). 61 | Please see _zvault(1)_ for more information on *hash methods* and possible 62 | values. 63 | 64 | 65 | * `-q`, `--quiet`: 66 | 67 | Print less information 68 | 69 | 70 | * `-v`, `--verbose`: 71 | 72 | Print more information 73 | 74 | 75 | * `-h`, `--help`: 76 | 77 | Prints help information 78 | 79 | 80 | * `-V`, `--version`: 81 | 82 | Prints version information 83 | 84 | 85 | ## COPYRIGHT 86 | 87 | Copyright (C) 2017-2018 Dennis Schwerdel 88 | This software is licensed under GPL-3 or newer (see LICENSE.md) 89 | -------------------------------------------------------------------------------- /docs/man/zvault-analyze.1.md: -------------------------------------------------------------------------------- 1 | zvault-analyze(1) -- Analyze the used and reclaimable space of bundles 2 | ====================================================================== 3 | 4 | ## SYNOPSIS 5 | 6 | `zvault analyze [OPTIONS] ` 7 | 8 | 9 | ## DESCRIPTION 10 | 11 | This subcommand analyzes the used and reclaimable storage space of bundles in 12 | the repository `REPO`. 13 | 14 | The analysis will scan through all backups and identify used chunks, order them 15 | by bundle and finally determine and print the space that could be reclaimed by 16 | running _zvault-vacuum(1)_ with different ratios. 17 | 18 | 19 | ## OPTIONS 20 | 21 | * `-q`, `--quiet`: 22 | 23 | Print less information 24 | 25 | 26 | * `-v`, `--verbose`: 27 | 28 | Print more information 29 | 30 | 31 | * `-h`, `--help`: 32 | 33 | Prints help information 34 | 35 | 36 | * `-V`, `--version`: 37 | 38 | Prints version information 39 | 40 | 41 | ## COPYRIGHT 42 | 43 | Copyright (C) 2017-2018 Dennis Schwerdel 44 | This software is licensed under GPL-3 or newer (see LICENSE.md) 45 | -------------------------------------------------------------------------------- /docs/man/zvault-backup.1.md: -------------------------------------------------------------------------------- 1 | zvault-backup(1) -- Create a new backup 2 | ======================================= 3 | 4 | ## SYNOPSIS 5 | 6 | `zvault backup [OPTIONS] ` 7 | 8 | 9 | ## DESCRIPTION 10 | 11 | This subcommand creates a new backup `BACKUP` from the data located at `SRC`. 12 | 13 | The backup given by `BACKUP` must be in the format `[repository]::backup_name` 14 | as described in _zvault(1)_. If `repository` is omitted, the default repository 15 | location is used instead. 16 | 17 | The source data given by `SRC` can either be a filesystem path or the path of a 18 | tar archive (with `--tar`). 19 | 20 | If `SRC` is a filesystem path, a reference backup is used (unless `--full` is 21 | set) to compare the data with and only store modified data and take the 22 | unmodified data from the reference backup. Unless a specific reference backup 23 | is chosen via `--ref`, the latest matching backup from the same machine with the 24 | same source path is used as reference. 25 | 26 | When `SRC` is a filesystem path, a set of exclude patterns can be configured. 27 | The patterns can be given directly via `--exclude` or be read from a file via 28 | `--excludes-from`. Unless `--no-default-excludes` is set, a set of default 29 | exclude pattern is read from the file `excludes` in the repository folder. 30 | All exclude pattern given via any of these ways will be combined. 31 | 32 | If `--tar` is specified and `SRC` is `-`, the input is read from stdin. 33 | 34 | Unless `--xdev` is set, zVault will not traverse into subfolders that are on a 35 | different filesystem, i.e. mount points will not be included. 36 | 37 | When zVault fails to read a source file, either because of file permissions, 38 | filesystem errors or because the file has an unsupported type, it will print a 39 | warning message and continue with the backup process. 40 | 41 | zVault will store all file attributes including extended attributes except for 42 | creation time and access time as creation time can not be reliably set on 43 | restore and access times change by reading files. 44 | 45 | 46 | ## OPTIONS 47 | 48 | * `-e`, `--exclude ...`: 49 | 50 | Exclude this path or file pattern. This option can be given multiple times. 51 | Please see *EXCLUDE PATTERNS* for details on pattern. 52 | 53 | This option conflicts with `--tar`. 54 | 55 | 56 | * `--excludes-from `: 57 | 58 | Read the list of excludes from this file. 59 | Please see *EXCLUDE PATTERNS* for details on pattern. 60 | 61 | This option conflicts with `--tar`. 62 | 63 | 64 | * `--full`: 65 | 66 | Create a full backup without using another backup as a reference. This makes 67 | sure that all files in the source path (except excluded files) are fully 68 | read. The file contents will still be deduplicated by using existing backups 69 | but all files are read fully. 70 | 71 | This option conflicts with `--ref`. 72 | 73 | 74 | * `--no-default-excludes`: 75 | 76 | Do not load the default `excludes` file from the repository folder. 77 | Those excludes are pre-filled with generic pattern to exclude like pseudo 78 | filesystems or cache folders. 79 | 80 | 81 | * `--ref `: 82 | 83 | Base the new backup on this reference backup instead of automatically 84 | selecting a matching one. The backup given as `REF` must be a valid backup 85 | name as listed by zvault-list(1). 86 | 87 | This option conflicts with `--full`. 88 | 89 | 90 | * `--tar`: 91 | 92 | Read the source data from a tar archive instead of the filesystem. When this 93 | flag is set, the `SRC` path must specify a valid tar file. 94 | The contents of the archive are then read instead of the filesystem. Note 95 | that the tar file contents are read as files and directories and not just 96 | as a single file (this would happen when `SRC` is a tar file and `--tar` is 97 | not set). 98 | 99 | This option can be used to import a backup that has been exported using 100 | zvault-restore(1) with the `--tar` flag. 101 | 102 | This flag conflicts with `--exclude` and `--excludes_from`. 103 | 104 | 105 | * `-x`, `--xdev`: 106 | 107 | Allow to cross filesystem boundaries. By default, paths on different 108 | filesystems than the start path will be ignored. If this flag is set, 109 | the scan will traverse also into mounted filesystems. 110 | **Note:** Please use this option with case. Some pseudo filesystems 111 | contain arbitrarily deep nested directories that will send zVault into 112 | an infinite loop. Also it should be avoided to include the remote storage 113 | in the backup. 114 | 115 | 116 | * `-q`, `--quiet`: 117 | 118 | Print less information 119 | 120 | 121 | * `-v`, `--verbose`: 122 | 123 | Print more information 124 | 125 | 126 | * `-h`, `--help`: 127 | 128 | Prints help information 129 | 130 | 131 | * `-V`, `--version`: 132 | 133 | Prints version information 134 | 135 | 136 | ## EXCLUDE PATTERNS 137 | 138 | Exclude patterns can either be absolute patterns or relative patterns. Absolute 139 | patterns start with `/` and must match from the begin of the absolute file path. 140 | Relative patterns start with anything but `/` and can also match any portion of 141 | the absolute path. For example the pattern `/bin` only matches the system 142 | directory `/bin` but not `/usr/bin` or `/usr/local/bin` while the pattern `bin` 143 | matches them too. 144 | 145 | Exclude patterns must match full path components, i.e. the pattern `bin` will 146 | match any path that contains `bin` as as component (e.g. `/bin` and `/usr/bin`) 147 | but not paths that contain `bin` only as substring like `/sbin`. 148 | 149 | Wildcards can be used to match also substrings of path components: 150 | 151 | - `?` matches any single character. 152 | - `*` matches any string not containing `/`, i.e. `*` only matches within a path 153 | component but does not span components. For example `/usr/*bin` matches 154 | `/usr/bin` and `/usr/sbin` but not `/usr/local/bin`. 155 | - `**` matches any string, even spanning across path components. So `/usr/**bin` 156 | will match `/usr/bin`, `/usr/sbin` and also `/usr/local/bin`. 157 | 158 | If a pattern matches on a filesystem entry, that entry and any child entry (in 159 | the case of directories) will be left out of the backup. 160 | 161 | 162 | ## COPYRIGHT 163 | 164 | Copyright (C) 2017-2018 Dennis Schwerdel 165 | This software is licensed under GPL-3 or newer (see LICENSE.md) 166 | -------------------------------------------------------------------------------- /docs/man/zvault-bundleinfo.1.md: -------------------------------------------------------------------------------- 1 | zvault-bundleinfo(1) -- Display information on a bundle 2 | ======================================================= 3 | 4 | ## SYNOPSIS 5 | 6 | `zvault bundleinfo [OPTIONS] ` 7 | 8 | 9 | ## DESCRIPTION 10 | 11 | This subcommand displays information on bundle `BUNDLE` in the repository 12 | `REPO`. 13 | 14 | The argument `BUNDLE` must give the id of an existing bundle as listed by 15 | _zvault-bundlelist(1)_. Please note that bundles are stored with random file 16 | names on the remote storage that do not relate to the bundle id. 17 | 18 | 19 | 20 | ## OPTIONS 21 | 22 | * `-q`, `--quiet`: 23 | 24 | Print less information 25 | 26 | 27 | * `-v`, `--verbose`: 28 | 29 | Print more information 30 | 31 | 32 | * `-h`, `--help`: 33 | 34 | Prints help information 35 | 36 | 37 | * `-V`, `--version`: 38 | 39 | Prints version information 40 | 41 | 42 | ## COPYRIGHT 43 | 44 | Copyright (C) 2017-2018 Dennis Schwerdel 45 | This software is licensed under GPL-3 or newer (see LICENSE.md) 46 | -------------------------------------------------------------------------------- /docs/man/zvault-bundlelist.1.md: -------------------------------------------------------------------------------- 1 | zvault-bundlelist(1) -- List bundles in a repository 2 | ==================================================== 3 | 4 | ## SYNOPSIS 5 | 6 | `zvault bundlelist [OPTIONS] ` 7 | 8 | 9 | ## DESCRIPTION 10 | 11 | This subcommand lists all bundles in the repository `REPO`. 12 | 13 | _zvault-bundleinfo(1)_ can be used to display information on a specific bundle 14 | given its bundle id. 15 | 16 | 17 | ## OPTIONS 18 | 19 | * `-q`, `--quiet`: 20 | 21 | Print less information 22 | 23 | 24 | * `-v`, `--verbose`: 25 | 26 | Print more information 27 | 28 | 29 | * `-h`, `--help`: 30 | 31 | Prints help information 32 | 33 | 34 | * `-V`, `--version`: 35 | 36 | Prints version information 37 | 38 | 39 | ## COPYRIGHT 40 | 41 | Copyright (C) 2017-2018 Dennis Schwerdel 42 | This software is licensed under GPL-3 or newer (see LICENSE.md) 43 | -------------------------------------------------------------------------------- /docs/man/zvault-check.1.md: -------------------------------------------------------------------------------- 1 | zvault-check(1) -- Check the repository, a backup or a backup subtree 2 | ===================================================================== 3 | 4 | ## SYNOPSIS 5 | 6 | `zvault check [OPTIONS] ` 7 | 8 | 9 | ## DESCRIPTION 10 | 11 | This subcommand checks the repository, a backup or a backup subtree given by 12 | `PATH`. 13 | 14 | The repository, backup, of subtree given by `PATH` must be in the format 15 | `[repository][::backup_name[::subtree]]` as described in _zvault(1)_. 16 | 17 | The command will perform the following checks in order: 18 | - Bundle integrity (optional) 19 | - Full bundle contents (optional) 20 | - Index integrity (optional) 21 | - Backup integrity 22 | - Filesystem integrity 23 | 24 | If a backup is specified in `PATH`, only this backup will be check in the backup 25 | integrity check and only the filesystem integrity of this backup will be checked 26 | in the filesystem integrity check. 27 | 28 | If a subtree is specified in `PATH`, no backups will be checked and only the 29 | given subtree will be checked in the filesystem integrity check. 30 | 31 | If `--bundles` is set, the integrity of the bundles will be checked before 32 | checking any backups. 33 | If `--bundle-data` is also set, the full bundles are fetched and their contents 34 | are compared to what their header claims. This check takes a long time since all 35 | bundles need to fetched, decrypted and decompressed fully to read their 36 | contents. If this flag is not set, the bundles will only be checked without 37 | actually fetching them fully. This means that their contents can only be read 38 | from their header and this information is not verified. 39 | 40 | If `--index` is set, the integrity of the index and its contents will be checked 41 | before checking any backups. 42 | 43 | If `--repair` is set, zVault will try to repair and rebuild things instead of 44 | failing when problems are detected. The repair process will rebuild all local 45 | repository components (index, bundle cache, bundle map) when problems are 46 | identified with them. 47 | If any bundle is broken, a new bundle is created with as much of the readable 48 | data of that bundle as possible. The old bundle is not removed but gets the file 49 | extension `.bundle.broken`. 50 | If any backup is broken, a new backup is created with a much of the readable 51 | files and directories of that backup as possible. The old backup is not removed 52 | but gets the file extension `.backup.broken`. 53 | Please note the destructive nature of the repair process as it removes all 54 | references to data that has been corrupted in order to make the rest of the data 55 | accessible again. To make this process reversible, the old data is kept and just 56 | renamed. 57 | Manual computer forensics might be able to restore some of the corrupt or 58 | inaccessible data that repair can not restore automatically. 59 | Please note that any run of _zvault-vacuum(1)_ will remove any intact data that 60 | has become inaccessible. 61 | 62 | 63 | ## OPTIONS 64 | 65 | * `-b`, `--bundles`: 66 | 67 | Check the integrity of the bundles too. 68 | 69 | 70 | * `--bundle-data`: 71 | 72 | Also check the contents of the bundles by fetching and decompressing them. 73 | Note: This flag causes the check to be much slower. 74 | 75 | 76 | * `-i`, `--index`: 77 | 78 | Also check the integrity of the index and its contents. 79 | 80 | 81 | * `-r`, `--repair`: 82 | 83 | Try to repair broken bundles, backups and rebuild local data when necessary. 84 | 85 | 86 | * `-q`, `--quiet`: 87 | 88 | Print less information 89 | 90 | 91 | * `-v`, `--verbose`: 92 | 93 | Print more information 94 | 95 | 96 | * `-h`, `--help`: 97 | 98 | Prints help information 99 | 100 | 101 | * `-V`, `--version`: 102 | 103 | Prints version information 104 | 105 | 106 | ## COPYRIGHT 107 | 108 | Copyright (C) 2017-2018 Dennis Schwerdel 109 | This software is licensed under GPL-3 or newer (see LICENSE.md) 110 | -------------------------------------------------------------------------------- /docs/man/zvault-config.1.md: -------------------------------------------------------------------------------- 1 | zvault-config(1) -- Display or change the configuration 2 | ======================================================= 3 | 4 | ## SYNOPSIS 5 | 6 | `zvault config ` 7 | 8 | 9 | ## DESCRIPTION 10 | 11 | This subcommand displays or changes the configuration of the repository `REPO`. 12 | The configuration can be changes using the options described below. If no 13 | options are set, the current configuration is displayed. Otherwise, the 14 | configuration is changed as specified and then displayed. 15 | 16 | Beware that the *chunker algorithm*, *chunk size* and *hash method* should not 17 | be changed on existing repositories already containing many backups. If those 18 | values are changed, new backups will not be able to use existing data for 19 | deduplication. This can waste lots of storage space and most likely outweighs 20 | the expected benefits. 21 | 22 | The values for *bundle size*, *compression* and *encryption* only affect new 23 | data and can be changed at any time without any drawback. 24 | 25 | 26 | ## OPTIONS 27 | 28 | * `--bundle-size `: 29 | 30 | Set the target bundle size in MiB (default: 25). 31 | Please see _zvault(1)_ for more information on *bundle size*. 32 | 33 | 34 | * `--chunker `: 35 | 36 | Set the chunker algorithm and target chunk size (default: fastcdc/16). 37 | Please see _zvault(1)_ for more information on *chunkers* and possible 38 | values. 39 | 40 | 41 | * `-c`, `--compression `: 42 | 43 | Set the compression method and level (default: brotli/3). 44 | Please see _zvault(1)_ for more information on *compression* and possible 45 | values. 46 | 47 | 48 | * `-e`, `--encryption `: 49 | 50 | Use the given public key for encryption. The key must be a valid public key 51 | encoded as hexadecimal. Please use _zvault-genkey(1)_ to generate keys and 52 | _zvault-addkey(1)_ to add keys to the repository. 53 | 54 | If `none` is given as public key, encryption is deactivated. 55 | 56 | **Warning:** ZVault does not verify that the matching secret key which is 57 | needed for decryption is known. 58 | 59 | Please see _zvault(1)_ for more information on *encryption*. 60 | 61 | 62 | * `--hash `: 63 | 64 | Set the hash method (default: blake2). 65 | Please see _zvault(1)_ for more information on *hash methods* and possible 66 | values. 67 | 68 | 69 | * `-q`, `--quiet`: 70 | 71 | Print less information 72 | 73 | 74 | * `-v`, `--verbose`: 75 | 76 | Print more information 77 | 78 | 79 | * `-h`, `--help`: 80 | 81 | Prints help information 82 | 83 | 84 | * `-V`, `--version`: 85 | 86 | Prints version information 87 | 88 | 89 | ## COPYRIGHT 90 | 91 | Copyright (C) 2017-2018 Dennis Schwerdel 92 | This software is licensed under GPL-3 or newer (see LICENSE.md) 93 | -------------------------------------------------------------------------------- /docs/man/zvault-copy.1.md: -------------------------------------------------------------------------------- 1 | zvault-copy(1) -- Create a copy of a backup 2 | =========================================== 3 | 4 | ## SYNOPSIS 5 | 6 | `zvault copy [OPTIONS] ` 7 | 8 | 9 | ## DESCRIPTION 10 | 11 | This subcommand copies the backup `SRC` to a new name `DST`. 12 | 13 | The backups given by `SRC` and `DST` must be in the format 14 | `[repository]::backup_name[::subtree]` as described in _zvault(1)_. 15 | If `repository` is omitted, the default repository location is used instead. 16 | 17 | 18 | ## OPTIONS 19 | 20 | * `-q`, `--quiet`: 21 | 22 | Print less information 23 | 24 | 25 | * `-v`, `--verbose`: 26 | 27 | Print more information 28 | 29 | 30 | * `-h`, `--help`: 31 | 32 | Prints help information 33 | 34 | 35 | * `-V`, `--version`: 36 | 37 | Prints version information 38 | 39 | 40 | ## COPYRIGHT 41 | 42 | Copyright (C) 2017-2018 Dennis Schwerdel 43 | This software is licensed under GPL-3 or newer (see LICENSE.md) 44 | -------------------------------------------------------------------------------- /docs/man/zvault-diff.1.md: -------------------------------------------------------------------------------- 1 | zvault-diff(1) -- Display differences between two backup versions 2 | ================================================================= 3 | 4 | ## SYNOPSIS 5 | 6 | `zvault diff [OPTIONS] ` 7 | 8 | 9 | ## DESCRIPTION 10 | 11 | This subcommand lists all differences between the two backups or a backup 12 | subtrees `OLD` and `NEW`. 13 | 14 | The backups or backup subtrees given by `OLD` and `NEW` must be in the format 15 | `[repository]::backup_name[::subtree]` as described in _zvault(1)_. 16 | If `repository` is omitted, the default repository location is used instead. 17 | 18 | The differences will be reported in the format of added, modified, and deleted 19 | files and directories. If some file or directory is present in `NEW` but not in 20 | `OLD`, it will be reported as added (_add_) and if it is present in `OLD` but 21 | not in `NEW`, it will be reported as deleted (_del_). If a file is present in 22 | both versions but got modified between both versions it will be reported as 23 | modified (_mod_). 24 | 25 | 26 | ## OPTIONS 27 | 28 | * `-q`, `--quiet`: 29 | 30 | Print less information 31 | 32 | 33 | * `-v`, `--verbose`: 34 | 35 | Print more information 36 | 37 | 38 | * `-h`, `--help`: 39 | 40 | Prints help information 41 | 42 | 43 | * `-V`, `--version`: 44 | 45 | Prints version information 46 | 47 | 48 | ## COPYRIGHT 49 | 50 | Copyright (C) 2017-2018 Dennis Schwerdel 51 | This software is licensed under GPL-3 or newer (see LICENSE.md) 52 | -------------------------------------------------------------------------------- /docs/man/zvault-genkey.1.md: -------------------------------------------------------------------------------- 1 | zvault-genkey(1) -- Generate a new key pair 2 | =========================================== 3 | 4 | ## SYNOPSIS 5 | 6 | `zvault genkey [OPTIONS] [FILE]` 7 | 8 | 9 | ## DESCRIPTION 10 | 11 | This subcommand generates a new key pair, prints it to console and optionally 12 | writes it to the given file `FILE`. 13 | 14 | 15 | ## OPTIONS 16 | 17 | * `-p`, `--password `: 18 | 19 | Derive the key pair from the given password instead of randomly creating it. 20 | 21 | 22 | * `-q`, `--quiet`: 23 | 24 | Print less information 25 | 26 | 27 | * `-v`, `--verbose`: 28 | 29 | Print more information 30 | 31 | 32 | * `-h`, `--help`: 33 | 34 | Prints help information 35 | 36 | 37 | * `-V`, `--version`: 38 | 39 | Prints version information 40 | 41 | 42 | ## COPYRIGHT 43 | 44 | Copyright (C) 2017-2018 Dennis Schwerdel 45 | This software is licensed under GPL-3 or newer (see LICENSE.md) 46 | -------------------------------------------------------------------------------- /docs/man/zvault-import.1.md: -------------------------------------------------------------------------------- 1 | zvault-import(1) -- Reconstruct a repository from the remote storage 2 | ==================================================================== 3 | 4 | ## SYNOPSIS 5 | 6 | `zvault import ` 7 | 8 | 9 | ## DESCRIPTION 10 | 11 | This subcommand imports a repository from remote storage. First, an empty 12 | repository will be created and then the remote bundles will be imported and 13 | added to the local index. 14 | 15 | The repository will be created at the location `REPO`. It is important that the 16 | path given as `REPO` does not yet exist, so that it can be created. 17 | 18 | The remote storage path `REMOTE` must be an existing remote storage folder 19 | initialized by _zvault-init(1)_. 20 | 21 | Note that this command is not intended to import single backups exported as tar 22 | files via _zvault-restore(1)_ with the `--tar` flag. Those archives can be 23 | imported via _zvault-backup(1)_ also with the `--tar` flag. 24 | 25 | 26 | ## OPTIONS 27 | 28 | * `-k`, `--key ...`: 29 | 30 | Add the key pair in the given file to the repository before importing the 31 | remote bundles. This option can be used to add keys that are needed to read 32 | the bundles. If multiple keys are needed, this options can be given multiple 33 | times. 34 | 35 | 36 | * `-q`, `--quiet`: 37 | 38 | Print less information 39 | 40 | 41 | * `-v`, `--verbose`: 42 | 43 | Print more information 44 | 45 | 46 | * `-h`, `--help`: 47 | 48 | Prints help information 49 | 50 | 51 | * `-V`, `--version`: 52 | 53 | Prints version information 54 | 55 | 56 | ## COPYRIGHT 57 | 58 | Copyright (C) 2017-2018 Dennis Schwerdel 59 | This software is licensed under GPL-3 or newer (see LICENSE.md) 60 | -------------------------------------------------------------------------------- /docs/man/zvault-info.1.md: -------------------------------------------------------------------------------- 1 | zvault-info(1) -- Display information on a repository, a backup or a subtree 2 | ============================================================================ 3 | 4 | ## SYNOPSIS 5 | 6 | `zvault info ` 7 | 8 | 9 | ## DESCRIPTION 10 | 11 | This subcommand displays information on the repository, backup or backup subtree 12 | specified by `PATH`. 13 | 14 | The repository, backup or backup subtree given by `PATH` must be in the format 15 | `[repository][::backup_name[::subtree]]` as described in _zvault(1)_. 16 | 17 | 18 | ## OPTIONS 19 | 20 | * `-q`, `--quiet`: 21 | 22 | Print less information 23 | 24 | 25 | * `-v`, `--verbose`: 26 | 27 | Print more information 28 | 29 | 30 | * `-h`, `--help`: 31 | 32 | Prints help information 33 | 34 | 35 | * `-V`, `--version`: 36 | 37 | Prints version information 38 | 39 | 40 | ## COPYRIGHT 41 | 42 | Copyright (C) 2017-2018 Dennis Schwerdel 43 | This software is licensed under GPL-3 or newer (see LICENSE.md) 44 | -------------------------------------------------------------------------------- /docs/man/zvault-init.1.md: -------------------------------------------------------------------------------- 1 | zvault-init(1) -- Initialize a new repository 2 | ============================================= 3 | 4 | ## SYNOPSIS 5 | 6 | `zvault init [OPTIONS] --remote ` 7 | 8 | 9 | ## DESCRIPTION 10 | 11 | This subcommand initializes a new repository at the location `REPO`. It is 12 | important that the path given as `REPO` does not yet exist, so that it can be 13 | created. 14 | 15 | The remote storage path `REMOTE` must be an existing empty folder. ZVault 16 | supports mounted remote filesystems, so it is a good idea to use such a folder 17 | to keep the backups on a remote location. 18 | 19 | This subcommand should **NOT** be used to import existing remote backup 20 | locations. Please use _zvault-import(1)_ for this purpose. 21 | 22 | The rest of the options sets configuration options for the new repository. The 23 | configuration can be changed by _zvault-config(1)_ later. 24 | 25 | 26 | ## OPTIONS 27 | 28 | * `--bundle-size `: 29 | 30 | Set the target bundle size in MiB (default: 25). 31 | Please see zvault(1) for more information on *bundle size*. 32 | 33 | 34 | * `--chunker `: 35 | 36 | Set the chunker algorithm and target chunk size (default: fastcdc/16). 37 | Please see _zvault(1)_ for more information on *chunkers* and possible 38 | values. 39 | 40 | 41 | * `-c`, `--compression `: 42 | 43 | Set the compression method and level (default: brotli/3). 44 | Please see _zvault(1)_ for more information on *compression* and possible 45 | values. 46 | 47 | 48 | * `-e`, `--encrypt`: 49 | 50 | Generate a keypair and enable encryption. 51 | Please see _zvault(1)_ for more information on *encryption*. 52 | 53 | 54 | * `--hash `: 55 | 56 | Set the hash method (default: blake2). 57 | Please see _zvault(1)_ for more information on *hash methods* and possible 58 | values. 59 | 60 | 61 | * `-h`, `--help`: 62 | 63 | Prints help information 64 | 65 | 66 | * `-r`, `--remote `: 67 | 68 | Set the path to the mounted remote storage. There should be an empty folder 69 | at this location. 70 | 71 | 72 | * `-q`, `--quiet`: 73 | 74 | Print less information 75 | 76 | 77 | * `-v`, `--verbose`: 78 | 79 | Print more information 80 | 81 | 82 | * `-h`, `--help`: 83 | 84 | Prints help information 85 | 86 | 87 | * `-V`, `--version`: 88 | 89 | Prints version information 90 | 91 | 92 | 93 | ## COPYRIGHT 94 | 95 | Copyright (C) 2017-2018 Dennis Schwerdel 96 | This software is licensed under GPL-3 or newer (see LICENSE.md) 97 | -------------------------------------------------------------------------------- /docs/man/zvault-list.1.md: -------------------------------------------------------------------------------- 1 | zvault-list(1) -- List backups or backup contents 2 | ================================================= 3 | 4 | ## SYNOPSIS 5 | 6 | `zvault list ` 7 | 8 | 9 | ## DESCRIPTION 10 | 11 | This subcommand lists all backups or backup contents of the repository or backup 12 | specified by `PATH`. 13 | 14 | The repository, backup or backup subtree given by `PATH` must be in the format 15 | `[repository][::backup_name[::subtree]]` as described in _zvault(1)_. 16 | 17 | If `PATH` specifies a repository, all backups of this repository are listed. 18 | 19 | If `PATH` specifies a backup or a backup subtree, all contents of this folder 20 | are displayed. In the case of a backup, the contents of its root folder are 21 | displayed. 22 | 23 | _zvault-info(1)_ can be used to display more information on single entities. 24 | 25 | Note that _zvault-mount(1)_ can be used to make backups accessible as a 26 | filesystem which is faster than _zvault-list(1)_ for multiple listings. 27 | 28 | 29 | ## OPTIONS 30 | 31 | * `-q`, `--quiet`: 32 | 33 | Print less information 34 | 35 | 36 | * `-v`, `--verbose`: 37 | 38 | Print more information 39 | 40 | 41 | * `-h`, `--help`: 42 | 43 | Prints help information 44 | 45 | 46 | * `-V`, `--version`: 47 | 48 | Prints version information 49 | 50 | 51 | ## COPYRIGHT 52 | 53 | Copyright (C) 2017-2018 Dennis Schwerdel 54 | This software is licensed under GPL-3 or newer (see LICENSE.md) 55 | -------------------------------------------------------------------------------- /docs/man/zvault-mount.1.md: -------------------------------------------------------------------------------- 1 | zvault-mount(1) -- Mount the repository, a backup or a subtree 2 | ============================================================== 3 | 4 | ## SYNOPSIS 5 | 6 | `zvault mount ` 7 | 8 | 9 | ## DESCRIPTION 10 | 11 | This subcommand mounts a repository, backup or backup subtree specified by 12 | `PATH` on the location given by `MOUNTPOINT` making it accessible as a 13 | filesystem. 14 | 15 | The repository, backup or backup subtree given by `PATH` must be in the format 16 | `[repository][::backup_name[::subtree]]` as described in _zvault(1)_. 17 | 18 | If `PATH` specifies a backup or backup subtree, the root of that backup or the 19 | respective subtree is mounted onto the given location. 20 | If `PATH` specifies a whole repository, all backups of that repository will be 21 | accessible in separate folders below the given mount point. 22 | 23 | The provided file system is mounted read-only, i.e. it can only be used to 24 | inspect and restore backups but not to create new backups or modify exiting 25 | ones. 26 | 27 | Please note that since the filesystem is mounted via fuse, restoring huge data 28 | this way is slower than using _zvault-restore(1)_. 29 | 30 | 31 | ## OPTIONS 32 | 33 | * `-q`, `--quiet`: 34 | 35 | Print less information 36 | 37 | 38 | * `-v`, `--verbose`: 39 | 40 | Print more information 41 | 42 | 43 | * `-h`, `--help`: 44 | 45 | Prints help information 46 | 47 | 48 | * `-V`, `--version`: 49 | 50 | Prints version information 51 | 52 | 53 | ## COPYRIGHT 54 | 55 | Copyright (C) 2017-2018 Dennis Schwerdel 56 | This software is licensed under GPL-3 or newer (see LICENSE.md) 57 | -------------------------------------------------------------------------------- /docs/man/zvault-prune.1.md: -------------------------------------------------------------------------------- 1 | zvault-prune(1) -- Remove backups based on age 2 | ============================================== 3 | 4 | ## SYNOPSIS 5 | 6 | `zvault prune [OPTIONS] ` 7 | 8 | 9 | ## DESCRIPTION 10 | 11 | This subcommand removes backups in the repository `REPO` based on their age. 12 | 13 | If a prefix is specified via `--prefix`, only backups which start with this 14 | string are considered for removal. 15 | 16 | The prune logic will preserve a certain number of backups for different time 17 | periods and discard the rest. The available periods are `daily`, `weekly`, 18 | `monthly` and `yearly`. For each of those periods, a number `N` can be specified 19 | that defines that for each of the last `N` of these periods, a single backup 20 | (the newest one in that period) will be kept. 21 | 22 | For example, `--daily 3` will keep backups of the last 3 days, i.e. one backup 23 | for today, yesterday and the day before yesterday (if a backup has been saved 24 | today). If several backups have been saved on a single day, only the newest is 25 | kept. 26 | 27 | The different periods can also be combined to preserve backups using multiple 28 | different time periods. Backups are only removed if they are not preserved by 29 | any of the time periods. 30 | 31 | For example, `--daily 3 --weekly 4 --monthly 3` will keep one backup for each of 32 | the last 3 days, for each of the last 4 weeks and for each of the last 3 months. 33 | As time progresses, the daily backups will be removed as new ones are created so 34 | that only 3 of them are kept but each week one of them will be preserved as a 35 | weekly backup and an old weekly backup will be removed unless that backup 36 | happens to be the last backup of last month... 37 | 38 | If one period is not set, no backups for that time period will be preserved. 39 | This command will refuse to remove all backups if called without options. 40 | 41 | Unless the option `--force` is set, this command only displays the backups that 42 | would be removed but does not remove them. 43 | 44 | This command renders certain chunks unused, but reclaiming their space is a 45 | complicated task as chunks are combined into bundles together with other chunks 46 | which are potentially still used. Please use _zvault-vacuum(1)_ to reclaim 47 | unused space. 48 | 49 | **Important note: Although this command does not actually remove any data, the 50 | data of the deleted backups becomes inaccessible and can not be restored.** 51 | 52 | 53 | ## OPTIONS 54 | 55 | * `-p`, `--prefix `: 56 | 57 | Only consider backups starting with this prefix. 58 | 59 | 60 | * `-d`, `--daily `: 61 | 62 | Keep the newest backup for each of the last `NUM` days. 63 | 64 | 65 | * `-w`, `--weekly `: 66 | 67 | Keep the newest backup for each of the last `NUM` weeks. 68 | 69 | 70 | * `-m`, `--monthly `: 71 | 72 | Keep the newest backup for each of the last `NUM` months. 73 | 74 | 75 | * `-y`, `--yearly `: 76 | 77 | Keep the newest backup for each of the last `NUM` years. 78 | 79 | 80 | * `-f`, `--force`: 81 | 82 | Actually remove backups instead of displaying what would be removed. 83 | 84 | 85 | * `-q`, `--quiet`: 86 | 87 | Print less information 88 | 89 | 90 | * `-v`, `--verbose`: 91 | 92 | Print more information 93 | 94 | 95 | * `-h`, `--help`: 96 | 97 | Prints help information 98 | 99 | 100 | * `-V`, `--version`: 101 | 102 | Prints version information 103 | 104 | 105 | ## COPYRIGHT 106 | 107 | Copyright (C) 2017-2018 Dennis Schwerdel 108 | This software is licensed under GPL-3 or newer (see LICENSE.md) 109 | -------------------------------------------------------------------------------- /docs/man/zvault-remove.1.md: -------------------------------------------------------------------------------- 1 | zvault-remove(1) -- Remove a backup or a subtree 2 | ================================================ 3 | 4 | ## SYNOPSIS 5 | 6 | `zvault remove [OPTIONS] ` 7 | 8 | 9 | ## DESCRIPTION 10 | 11 | This subcommand removes a backup or a backup subtree `BACKUP`. 12 | 13 | The backup or backup subtree given by `BACKUP` must be in the format 14 | `[repository]::backup_name[::subtree]` as described in _zvault(1)_. 15 | If `repository` is omitted, the default repository location is used instead. 16 | 17 | If a backup is referenced, this backup will be deleted. If a subtree is given, 18 | the backup is instead rewritten to not include that subtree anymore. 19 | 20 | If a folder of backups is referenced by `BACKUP` the flag `--force` must be set 21 | in order to remove all backups in that folder (also recursively). 22 | 23 | Note: When removing backup subtrees, the meta information of that backup is left 24 | unchanged and still contains the data (e.g. duration and size) of the original 25 | backup run. 26 | 27 | This command renders certain chunks unused, but reclaiming their space is a 28 | complicated task as chunks are combined into bundles together with other chunks 29 | which are potentially still used. Please use _zvault-vacuum(1)_ to reclaim 30 | unused space. 31 | 32 | **Important note: Although this command does not actually remove any data, the 33 | data of the deleted backups becomes inaccessible and can not be restored.** 34 | 35 | 36 | ## OPTIONS 37 | 38 | * `-f`, `--force`: 39 | 40 | Remove multiple backups in a backup folder 41 | 42 | 43 | * `-q`, `--quiet`: 44 | 45 | Print less information 46 | 47 | 48 | * `-v`, `--verbose`: 49 | 50 | Print more information 51 | 52 | 53 | * `-h`, `--help`: 54 | 55 | Prints help information 56 | 57 | 58 | * `-V`, `--version`: 59 | 60 | Prints version information 61 | 62 | 63 | ## COPYRIGHT 64 | 65 | Copyright (C) 2017-2018 Dennis Schwerdel 66 | This software is licensed under GPL-3 or newer (see LICENSE.md) 67 | -------------------------------------------------------------------------------- /docs/man/zvault-restore.1.md: -------------------------------------------------------------------------------- 1 | zvault-restore(1) -- Restore a backup or subtree 2 | ================================================ 3 | 4 | ## SYNOPSIS 5 | 6 | `zvault restore [OPTIONS] ` 7 | 8 | 9 | ## DESCRIPTION 10 | 11 | This subcommand restores a backup or a backup subtree `BACKUP` into the folder 12 | `DST`. 13 | 14 | The backup or backup subtree given by `BACKUP` must be in the format 15 | `[repository]::backup_name[::subtree]` as described in _zvault(1)_. 16 | If `repository` is omitted, the default repository location is used instead. 17 | 18 | If `--tar` is set, the data is written to a tar file named `DST`. In this case 19 | `DST` must not exist. If `DST` is `-`, the data will be written to stdout. 20 | 21 | If `--tar` is not set, the data will be written into the existing folder `DST`. 22 | 23 | 24 | ## OPTIONS 25 | 26 | * `--tar`: 27 | 28 | Write the backup to a tar archive named `DST` instead of creating files and 29 | folders at this location. 30 | 31 | This option can be used to export a backup that can be imported again using 32 | zvault-backup(1) with the `--tar` flag. 33 | 34 | 35 | * `-q`, `--quiet`: 36 | 37 | Print less information 38 | 39 | 40 | * `-v`, `--verbose`: 41 | 42 | Print more information 43 | 44 | 45 | * `-h`, `--help`: 46 | 47 | Prints help information 48 | 49 | 50 | * `-V`, `--version`: 51 | 52 | Prints version information 53 | 54 | 55 | ## COPYRIGHT 56 | 57 | Copyright (C) 2017-2018 Dennis Schwerdel 58 | This software is licensed under GPL-3 or newer (see LICENSE.md) 59 | -------------------------------------------------------------------------------- /docs/man/zvault-vacuum.1.md: -------------------------------------------------------------------------------- 1 | zvault-vacuum(1) -- Reclaim space by rewriting bundles 2 | ====================================================== 3 | 4 | ## SYNOPSIS 5 | 6 | `zvault vacuum [OPTIONS] ` 7 | 8 | 9 | ## DESCRIPTION 10 | 11 | This subcommand reclaims space by rewriting bundles in the repository `REPO`. 12 | 13 | This command rewrites bundles to remove unused chunks of backups that have been 14 | removed by _zvault-remove(1)_ or _zvault-prune(1)_. 15 | To accomplish this, it will scan all backups and track all used chunks to 16 | identify chunks that are not used by any backup. Those chunks are then grouped 17 | by bundle and bundles with many unused chunks will be rewritten with those 18 | chunks left out. 19 | 20 | The option `--ratio` configures the minimal ratio of used chunks in a bundle 21 | required to remove it. Since all chunks that are still used must be read from 22 | the bundle and written to a new one and only the storage space of the unused 23 | chunks can be reclaimed, rewriting a bundle is more economical the lower the 24 | ratio. At a ratio of 0% will only rewrite bundles with no used chunks at all 25 | (in this case the bundle is just removed). At a ratio of 100%, all bundles will 26 | be rewritten regardless of unused chunks. 27 | 28 | Please note that the bundles will be rewritten with the current settings for 29 | encryption and compression, disregarding the original settings during bundle 30 | creation. 31 | 32 | Unless `--force` is set, this command will only simulate the process but not 33 | actually rewrite any bundle. 34 | 35 | As this is a critical operation, zVault takes many precaution measures to avoid 36 | any damaging the integrity to the repository or other backups. The whole process 37 | is performed with an exclusive lock on the repository which prevents any backup 38 | runs. Also the chunk index is double checked before removing bundles to make 39 | sure that they are unused. Nevertheless, this is a critical operation which 40 | should be avoided when the storage space permits it. 41 | 42 | 43 | 44 | ## OPTIONS 45 | 46 | * `--combine`: 47 | 48 | Also combine small bundles into larger ones. 49 | 50 | 51 | * `-r`, `--ratio `: 52 | 53 | Do not rewrite bundles with more than `NUM`% of used chunks. 54 | The ratio must be given in whole percentage, e.g. 50 mean 50%. 55 | 56 | 57 | * `-f`, `--force`: 58 | 59 | Actually run the vacuum instead of simulating it. 60 | 61 | 62 | * `-q`, `--quiet`: 63 | 64 | Print less information 65 | 66 | 67 | * `-v`, `--verbose`: 68 | 69 | Print more information 70 | 71 | 72 | * `-h`, `--help`: 73 | 74 | Prints help information 75 | 76 | 77 | * `-V`, `--version`: 78 | 79 | Prints version information 80 | 81 | 82 | ## COPYRIGHT 83 | 84 | Copyright (C) 2017-2018 Dennis Schwerdel 85 | This software is licensed under GPL-3 or newer (see LICENSE.md) 86 | -------------------------------------------------------------------------------- /docs/man/zvault-versions.1.md: -------------------------------------------------------------------------------- 1 | zvault-versions(1) -- Find different versions of a file in all backups 2 | ====================================================================== 3 | 4 | ## SYNOPSIS 5 | 6 | `zvault versions [OPTIONS] ` 7 | 8 | 9 | ## DESCRIPTION 10 | 11 | This subcommand finds and lists all versions of the file given by `PATH` in any 12 | backup in the repository `REPO`. 13 | 14 | The path given by `PATH` must be relative with regard to the repository root. 15 | 16 | All different versions of the file in all backups will be listed by this 17 | subcommand. That means that only unique versions will be listed with the 18 | earliest backup that version appeared in. 19 | 20 | 21 | ## OPTIONS 22 | 23 | * `-q`, `--quiet`: 24 | 25 | Print less information 26 | 27 | 28 | * `-v`, `--verbose`: 29 | 30 | Print more information 31 | 32 | 33 | * `-h`, `--help`: 34 | 35 | Prints help information 36 | 37 | 38 | * `-V`, `--version`: 39 | 40 | Prints version information 41 | 42 | 43 | ## COPYRIGHT 44 | 45 | Copyright (C) 2017-2018 Dennis Schwerdel 46 | This software is licensed under GPL-3 or newer (see LICENSE.md) 47 | -------------------------------------------------------------------------------- /lang/Makefile: -------------------------------------------------------------------------------- 1 | MO_FILES = de.mo 2 | 3 | default: default.pot ${MO_FILES} 4 | 5 | default.pot: excluded.po ../src 6 | find ../src -name '*.rs' | xargs xgettext --debug -L python -n -F -a -E --from-code UTF-8 -x ../lang/excluded.po -o default.pot 7 | 8 | %.mo : %.po 9 | msgfmt $< -o $@ 10 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | trailing_semicolon = false 2 | trailing_comma = "Never" 3 | -------------------------------------------------------------------------------- /src/bundledb/cache.rs: -------------------------------------------------------------------------------- 1 | use prelude::*; 2 | 3 | use std::path::{Path, PathBuf}; 4 | use std::fs::{self, File}; 5 | use std::io::{self, BufReader, BufWriter, Write, Read}; 6 | 7 | 8 | pub static CACHE_FILE_STRING: [u8; 7] = *b"zvault\x04"; 9 | pub static CACHE_FILE_VERSION: u8 = 1; 10 | 11 | 12 | quick_error!{ 13 | #[derive(Debug)] 14 | pub enum BundleCacheError { 15 | Read(err: io::Error) { 16 | cause(err) 17 | description(tr!("Failed to read bundle cache")) 18 | display("{}", tr_format!("Bundle cache error: failed to read bundle cache\n\tcaused by: {}", err)) 19 | } 20 | Write(err: io::Error) { 21 | cause(err) 22 | description(tr!("Failed to write bundle cache")) 23 | display("{}", tr_format!("Bundle cache error: failed to write bundle cache\n\tcaused by: {}", err)) 24 | } 25 | WrongHeader { 26 | description(tr!("Wrong header")) 27 | display("{}", tr_format!("Bundle cache error: wrong header on bundle cache")) 28 | } 29 | UnsupportedVersion(version: u8) { 30 | description(tr!("Wrong version")) 31 | display("{}", tr_format!("Bundle cache error: unsupported version: {}", version)) 32 | } 33 | Decode(err: msgpack::DecodeError) { 34 | from() 35 | cause(err) 36 | description(tr!("Failed to decode bundle cache")) 37 | display("{}", tr_format!("Bundle cache error: failed to decode bundle cache\n\tcaused by: {}", err)) 38 | } 39 | Encode(err: msgpack::EncodeError) { 40 | from() 41 | cause(err) 42 | description(tr!("Failed to encode bundle cache")) 43 | display("{}", tr_format!("Bundle cache error: failed to encode bundle cache\n\tcaused by: {}", err)) 44 | } 45 | } 46 | } 47 | 48 | 49 | #[derive(Clone, Default)] 50 | pub struct StoredBundle { 51 | pub info: BundleInfo, 52 | pub path: PathBuf 53 | } 54 | serde_impl!(StoredBundle(u64) { 55 | info: BundleInfo => 0, 56 | path: PathBuf => 1 57 | }); 58 | 59 | impl StoredBundle { 60 | #[inline] 61 | pub fn id(&self) -> BundleId { 62 | self.info.id.clone() 63 | } 64 | 65 | pub fn copy_to>( 66 | &self, 67 | base_path: &Path, 68 | path: P, 69 | ) -> Result { 70 | let src_path = base_path.join(&self.path); 71 | let dst_path = path.as_ref(); 72 | try!(fs::copy(&src_path, dst_path).context(dst_path)); 73 | let mut bundle = self.clone(); 74 | bundle.path = dst_path.strip_prefix(base_path).unwrap().to_path_buf(); 75 | Ok(bundle) 76 | } 77 | 78 | pub fn move_to>( 79 | &mut self, 80 | base_path: &Path, 81 | path: P, 82 | ) -> Result<(), BundleDbError> { 83 | let src_path = base_path.join(&self.path); 84 | let dst_path = path.as_ref(); 85 | if fs::rename(&src_path, dst_path).is_err() { 86 | try!(fs::copy(&src_path, dst_path).context(dst_path)); 87 | try!(fs::remove_file(&src_path).context(&src_path as &Path)); 88 | } 89 | self.path = dst_path.strip_prefix(base_path).unwrap().to_path_buf(); 90 | Ok(()) 91 | } 92 | 93 | pub fn read_list_from>(path: P) -> Result, BundleCacheError> { 94 | let path = path.as_ref(); 95 | let mut file = BufReader::new(try!(File::open(path).map_err(BundleCacheError::Read))); 96 | let mut header = [0u8; 8]; 97 | try!(file.read_exact(&mut header).map_err(BundleCacheError::Read)); 98 | if header[..CACHE_FILE_STRING.len()] != CACHE_FILE_STRING { 99 | return Err(BundleCacheError::WrongHeader); 100 | } 101 | let version = header[CACHE_FILE_STRING.len()]; 102 | if version != CACHE_FILE_VERSION { 103 | return Err(BundleCacheError::UnsupportedVersion(version)); 104 | } 105 | Ok(try!(msgpack::decode_from_stream(&mut file))) 106 | } 107 | 108 | pub fn save_list_to>(list: &[Self], path: P) -> Result<(), BundleCacheError> { 109 | let path = path.as_ref(); 110 | let mut file = BufWriter::new(try!(File::create(path).map_err(BundleCacheError::Write))); 111 | try!(file.write_all(&CACHE_FILE_STRING).map_err( 112 | BundleCacheError::Write 113 | )); 114 | try!(file.write_all(&[CACHE_FILE_VERSION]).map_err( 115 | BundleCacheError::Write 116 | )); 117 | try!(msgpack::encode_to_stream(&list, &mut file)); 118 | Ok(()) 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /src/bundledb/mod.rs: -------------------------------------------------------------------------------- 1 | mod writer; 2 | mod reader; 3 | mod db; 4 | mod cache; 5 | mod uploader; 6 | 7 | pub use self::cache::{StoredBundle, BundleCacheError}; 8 | pub use self::writer::{BundleWriter, BundleWriterError}; 9 | pub use self::reader::{BundleReader, BundleReaderError}; 10 | pub use self::db::*; 11 | pub use self::uploader::BundleUploader; 12 | 13 | use prelude::*; 14 | 15 | use std::fmt; 16 | use std::collections::HashMap; 17 | use serde; 18 | use rand; 19 | 20 | 21 | pub static HEADER_STRING: [u8; 7] = *b"zvault\x01"; 22 | pub static HEADER_VERSION: u8 = 1; 23 | 24 | 25 | #[derive(Hash, PartialEq, Eq, Clone, Default, Ord, PartialOrd)] 26 | pub struct BundleId(pub Hash); 27 | 28 | impl Serialize for BundleId { 29 | #[inline] 30 | fn serialize(&self, ser: S) -> Result { 31 | self.0.serialize(ser) 32 | } 33 | } 34 | 35 | impl<'a> Deserialize<'a> for BundleId { 36 | #[inline] 37 | fn deserialize>(de: D) -> Result { 38 | let hash = try!(Hash::deserialize(de)); 39 | Ok(BundleId(hash)) 40 | } 41 | } 42 | 43 | impl BundleId { 44 | #[inline] 45 | pub fn to_string(&self) -> String { 46 | self.0.to_string() 47 | } 48 | 49 | #[inline] 50 | pub fn random() -> Self { 51 | BundleId(Hash { 52 | high: rand::random(), 53 | low: rand::random() 54 | }) 55 | } 56 | } 57 | 58 | impl fmt::Display for BundleId { 59 | #[inline] 60 | fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> { 61 | write!(fmt, "{}", self.to_string()) 62 | } 63 | } 64 | 65 | impl fmt::Debug for BundleId { 66 | #[inline] 67 | fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> { 68 | write!(fmt, "{}", self.to_string()) 69 | } 70 | } 71 | 72 | 73 | #[derive(Eq, Debug, PartialEq, Clone, Copy)] 74 | pub enum BundleMode { 75 | Data, 76 | Meta 77 | } 78 | serde_impl!(BundleMode(u8) { 79 | Data => 0, 80 | Meta => 1 81 | }); 82 | 83 | 84 | #[derive(Default, Debug, Clone)] 85 | pub struct BundleHeader { 86 | pub encryption: Option, 87 | pub info_size: usize 88 | } 89 | serde_impl!(BundleHeader(u8) { 90 | encryption: Option => 0, 91 | info_size: usize => 1 92 | }); 93 | 94 | 95 | #[derive(Clone)] 96 | pub struct BundleInfo { 97 | pub id: BundleId, 98 | pub mode: BundleMode, 99 | pub compression: Option, 100 | pub encryption: Option, 101 | pub hash_method: HashMethod, 102 | pub raw_size: usize, 103 | pub encoded_size: usize, 104 | pub chunk_count: usize, 105 | pub chunk_list_size: usize, 106 | pub timestamp: i64 107 | } 108 | serde_impl!(BundleInfo(u64?) { 109 | id: BundleId => 0, 110 | mode: BundleMode => 1, 111 | compression: Option => 2, 112 | encryption: Option => 3, 113 | hash_method: HashMethod => 4, 114 | raw_size: usize => 6, 115 | encoded_size: usize => 7, 116 | chunk_count: usize => 8, 117 | chunk_list_size: usize => 9, 118 | timestamp: i64 => 10 119 | }); 120 | 121 | impl Default for BundleInfo { 122 | fn default() -> Self { 123 | BundleInfo { 124 | id: BundleId(Hash::empty()), 125 | compression: None, 126 | encryption: None, 127 | hash_method: HashMethod::Blake2, 128 | raw_size: 0, 129 | encoded_size: 0, 130 | chunk_count: 0, 131 | mode: BundleMode::Data, 132 | chunk_list_size: 0, 133 | timestamp: 0 134 | } 135 | } 136 | } 137 | 138 | 139 | #[derive(Debug)] 140 | pub struct BundleStatistics { 141 | pub raw_size: ValueStats, 142 | pub encoded_size: ValueStats, 143 | pub chunk_count: ValueStats, 144 | pub raw_size_meta: ValueStats, 145 | pub encoded_size_meta: ValueStats, 146 | pub chunk_count_meta: ValueStats, 147 | pub raw_size_data: ValueStats, 148 | pub encoded_size_data: ValueStats, 149 | pub chunk_count_data: ValueStats, 150 | pub hash_methods: HashMap, 151 | pub compressions: HashMap, usize>, 152 | pub encryptions: HashMap, usize> 153 | } -------------------------------------------------------------------------------- /src/bundledb/uploader.rs: -------------------------------------------------------------------------------- 1 | use prelude::*; 2 | 3 | use std::sync::atomic::{Ordering, AtomicBool, AtomicUsize}; 4 | use std::sync::{Mutex, Condvar, Arc}; 5 | use std::{mem, fs, thread}; 6 | use std::path::{Path, PathBuf}; 7 | 8 | use crossbeam::sync::MsQueue; 9 | 10 | 11 | pub struct BundleUploader { 12 | capacity: usize, 13 | error_present: AtomicBool, 14 | error: Mutex>, 15 | waiting: AtomicUsize, 16 | queue: MsQueue>, 17 | wait: (Condvar, Mutex<()>) 18 | } 19 | 20 | impl BundleUploader { 21 | pub fn new(capacity: usize) -> Arc { 22 | let self_ = Arc::new(BundleUploader { 23 | capacity, 24 | error_present: AtomicBool::new(false), 25 | error: Mutex::new(None), 26 | waiting: AtomicUsize::new(0), 27 | queue: MsQueue::new(), 28 | wait: (Condvar::new(), Mutex::new(())) 29 | }); 30 | let self2 = self_.clone(); 31 | thread::Builder::new() 32 | .name("uploader".to_string()) 33 | .spawn(move || self2.worker_thread()) 34 | .unwrap(); 35 | self_ 36 | } 37 | 38 | fn get_status(&self) -> Result<(), BundleDbError> { 39 | if self.error_present.load(Ordering::SeqCst) { 40 | let mut error = None; 41 | mem::swap(&mut error, &mut self.error.lock().unwrap()); 42 | if let Some(err) = error { 43 | Err(err) 44 | } else { 45 | Err(BundleDbError::UploadFailed) 46 | } 47 | } else { 48 | Ok(()) 49 | } 50 | } 51 | 52 | pub fn queue(&self, local_path: PathBuf, remote_path: PathBuf) -> Result<(), BundleDbError> { 53 | while self.waiting.load(Ordering::SeqCst) >= self.capacity { 54 | tr_debug!("Upload queue is full, waiting for slots"); 55 | let _ = self.wait.0.wait(self.wait.1.lock().unwrap()).unwrap(); 56 | } 57 | tr_trace!("Adding to upload queue: {:?}", local_path); 58 | if !self.error_present.load(Ordering::SeqCst) { 59 | self.waiting.fetch_add(1, Ordering::SeqCst); 60 | self.queue.push(Some((local_path, remote_path))); 61 | } 62 | self.get_status() 63 | } 64 | 65 | pub fn finish(&self) -> Result<(), BundleDbError> { 66 | if !self.error_present.load(Ordering::SeqCst) { 67 | self.waiting.fetch_add(1, Ordering::SeqCst); 68 | self.queue.push(None); 69 | } 70 | while self.waiting.load(Ordering::SeqCst) > 0 { 71 | let _ = self.wait.0.wait(self.wait.1.lock().unwrap()); 72 | } 73 | self.get_status() 74 | } 75 | 76 | fn worker_thread_inner(&self) -> Result<(), BundleDbError> { 77 | while let Some((src_path, dst_path)) = self.queue.pop() { 78 | tr_trace!("Uploading {:?} to {:?}", src_path, dst_path); 79 | self.waiting.fetch_sub(1, Ordering::SeqCst); 80 | self.wait.0.notify_all(); 81 | let folder = dst_path.parent().unwrap(); 82 | try!(fs::create_dir_all(&folder).context(folder as &Path)); 83 | try!(fs::copy(&src_path, &dst_path).context(&dst_path as &Path)); 84 | try!(fs::remove_file(&src_path).context(&src_path as &Path)); 85 | tr_debug!("Uploaded {:?} to {:?}", src_path, dst_path); 86 | } 87 | Ok(()) 88 | } 89 | 90 | fn worker_thread(&self) { 91 | if let Err(err) = self.worker_thread_inner() { 92 | tr_debug!("Upload thread failed with error: {}", err); 93 | *self.error.lock().unwrap() = Some(err); 94 | self.error_present.store(true, Ordering::SeqCst); 95 | } 96 | self.waiting.store(0, Ordering::SeqCst); 97 | self.wait.0.notify_all(); 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /src/bundledb/writer.rs: -------------------------------------------------------------------------------- 1 | use prelude::*; 2 | use super::*; 3 | 4 | use std::path::{Path, PathBuf}; 5 | use std::fs::File; 6 | use std::io::{self, Write, BufWriter}; 7 | use std::sync::{Arc, Mutex}; 8 | 9 | use chrono::prelude::*; 10 | 11 | 12 | quick_error!{ 13 | #[derive(Debug)] 14 | pub enum BundleWriterError { 15 | CompressionSetup(err: CompressionError) { 16 | cause(err) 17 | description(tr!("Failed to setup compression")) 18 | display("{}", tr_format!("Bundle writer error: failed to setup compression\n\tcaused by: {}", err)) 19 | } 20 | Compression(err: CompressionError) { 21 | cause(err) 22 | description(tr!("Failed to compress data")) 23 | display("{}", tr_format!("Bundle writer error: failed to compress data\n\tcaused by: {}", err)) 24 | } 25 | Encryption(err: EncryptionError) { 26 | from() 27 | cause(err) 28 | description(tr!("Encryption failed")) 29 | display("{}", tr_format!("Bundle writer error: failed to encrypt data\n\tcaused by: {}", err)) 30 | } 31 | Encode(err: msgpack::EncodeError, path: PathBuf) { 32 | cause(err) 33 | context(path: &'a Path, err: msgpack::EncodeError) -> (err, path.to_path_buf()) 34 | description(tr!("Failed to encode bundle header to file")) 35 | display("{}", tr_format!("Bundle writer error: failed to encode bundle header to file {:?}\n\tcaused by: {}", path, err)) 36 | } 37 | Write(err: io::Error, path: PathBuf) { 38 | cause(err) 39 | context(path: &'a Path, err: io::Error) -> (err, path.to_path_buf()) 40 | description(tr!("Failed to write data to file")) 41 | display("{}", tr_format!("Bundle writer error: failed to write data to file {:?}\n\tcaused by: {}", path, err)) 42 | } 43 | } 44 | } 45 | 46 | 47 | pub struct BundleWriter { 48 | mode: BundleMode, 49 | hash_method: HashMethod, 50 | data: Vec, 51 | compression: Option, 52 | compression_stream: Option, 53 | encryption: Option, 54 | crypto: Arc>, 55 | raw_size: usize, 56 | chunk_count: usize, 57 | chunks: ChunkList 58 | } 59 | 60 | impl BundleWriter { 61 | pub fn new( 62 | mode: BundleMode, 63 | hash_method: HashMethod, 64 | compression: Option, 65 | encryption: Option, 66 | crypto: Arc>, 67 | ) -> Result { 68 | let compression_stream = match compression { 69 | Some(ref compression) => Some(try!(compression.compress_stream().map_err( 70 | BundleWriterError::CompressionSetup 71 | ))), 72 | None => None, 73 | }; 74 | Ok(BundleWriter { 75 | mode, 76 | hash_method, 77 | data: vec![], 78 | compression, 79 | compression_stream, 80 | encryption, 81 | crypto, 82 | raw_size: 0, 83 | chunk_count: 0, 84 | chunks: ChunkList::new() 85 | }) 86 | } 87 | 88 | pub fn add(&mut self, chunk: &[u8], hash: Hash) -> Result { 89 | if let Some(ref mut stream) = self.compression_stream { 90 | try!(stream.process(chunk, &mut self.data).map_err( 91 | BundleWriterError::Compression 92 | )) 93 | } else { 94 | self.data.extend_from_slice(chunk) 95 | } 96 | self.raw_size += chunk.len(); 97 | self.chunk_count += 1; 98 | self.chunks.push((hash, chunk.len() as u32)); 99 | Ok(self.chunk_count - 1) 100 | } 101 | 102 | pub fn finish(mut self, db: &BundleDb) -> Result { 103 | if let Some(stream) = self.compression_stream { 104 | try!(stream.finish(&mut self.data).map_err( 105 | BundleWriterError::Compression 106 | )) 107 | } 108 | if let Some(ref encryption) = self.encryption { 109 | self.data = try!(self.crypto.lock().unwrap().encrypt(encryption, &self.data)); 110 | } 111 | let encoded_size = self.data.len(); 112 | let mut chunk_data = Vec::with_capacity(self.chunks.encoded_size()); 113 | self.chunks.write_to(&mut chunk_data).unwrap(); 114 | let id = BundleId(self.hash_method.hash(&chunk_data)); 115 | if let Some(ref encryption) = self.encryption { 116 | chunk_data = try!(self.crypto.lock().unwrap().encrypt(encryption, &chunk_data)); 117 | } 118 | let mut path = db.layout.temp_bundle_path(); 119 | let mut file = BufWriter::new(try!(File::create(&path).context(&path as &Path))); 120 | try!(file.write_all(&HEADER_STRING).context(&path as &Path)); 121 | try!(file.write_all(&[HEADER_VERSION]).context(&path as &Path)); 122 | let info = BundleInfo { 123 | mode: self.mode, 124 | hash_method: self.hash_method, 125 | compression: self.compression, 126 | encryption: self.encryption.clone(), 127 | chunk_count: self.chunk_count, 128 | id: id.clone(), 129 | raw_size: self.raw_size, 130 | encoded_size, 131 | chunk_list_size: chunk_data.len(), 132 | timestamp: Local::now().timestamp() 133 | }; 134 | let mut info_data = try!(msgpack::encode(&info).context(&path as &Path)); 135 | if let Some(ref encryption) = self.encryption { 136 | info_data = try!(self.crypto.lock().unwrap().encrypt(encryption, &info_data)); 137 | } 138 | let header = BundleHeader { 139 | encryption: self.encryption, 140 | info_size: info_data.len() 141 | }; 142 | try!(msgpack::encode_to_stream(&header, &mut file).context( 143 | &path as &Path 144 | )); 145 | try!(file.write_all(&info_data).context(&path as &Path)); 146 | try!(file.write_all(&chunk_data).context(&path as &Path)); 147 | try!(file.write_all(&self.data).context(&path as &Path)); 148 | path = path.strip_prefix(db.layout.base_path()) 149 | .unwrap() 150 | .to_path_buf(); 151 | Ok(StoredBundle { 152 | path, 153 | info 154 | }) 155 | } 156 | 157 | #[inline] 158 | pub fn raw_size(&self) -> usize { 159 | self.raw_size 160 | } 161 | 162 | #[inline] 163 | pub fn estimate_final_size(&self) -> usize { 164 | self.data.len() + self.chunk_count * 20 + 500 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /src/chunker.rs: -------------------------------------------------------------------------------- 1 | pub use chunking::*; 2 | 3 | use std::str::FromStr; 4 | 5 | 6 | #[derive(Debug, Clone, Copy, Eq, PartialEq)] 7 | pub enum ChunkerType { 8 | Ae(usize), 9 | Rabin((usize, u32)), 10 | FastCdc((usize, u64)), 11 | Fixed(usize) 12 | } 13 | serde_impl!(ChunkerType(u64) { 14 | Ae(usize) => 1, 15 | Rabin((usize, u32)) => 2, 16 | FastCdc((usize, u64)) => 3, 17 | Fixed(usize) => 4 18 | }); 19 | 20 | 21 | impl ChunkerType { 22 | pub fn from(name: &str, avg_size: usize, seed: u64) -> Result { 23 | match name { 24 | "ae" => Ok(ChunkerType::Ae(avg_size)), 25 | "rabin" => Ok(ChunkerType::Rabin((avg_size, seed as u32))), 26 | "fastcdc" => Ok(ChunkerType::FastCdc((avg_size, seed))), 27 | "fixed" => Ok(ChunkerType::Fixed(avg_size)), 28 | _ => Err(tr!("Unsupported chunker type")), 29 | } 30 | } 31 | 32 | pub fn from_string(name: &str) -> Result { 33 | let (name, size) = if let Some(pos) = name.find('/') { 34 | let size = try!(usize::from_str(&name[pos + 1..]).map_err( 35 | |_| tr!("Chunk size must be a number") 36 | )); 37 | let name = &name[..pos]; 38 | (name, size) 39 | } else { 40 | (name, 8) 41 | }; 42 | Self::from(name, size * 1024, 0) 43 | } 44 | 45 | 46 | #[inline] 47 | pub fn create(&self) -> Box { 48 | match *self { 49 | ChunkerType::Ae(size) => Box::new(AeChunker::new(size)), 50 | ChunkerType::Rabin((size, seed)) => Box::new(RabinChunker::new(size, seed)), 51 | ChunkerType::FastCdc((size, seed)) => Box::new(FastCdcChunker::new(size, seed)), 52 | ChunkerType::Fixed(size) => Box::new(FixedChunker::new(size)), 53 | } 54 | } 55 | 56 | pub fn name(&self) -> &'static str { 57 | match *self { 58 | ChunkerType::Ae(_size) => "ae", 59 | ChunkerType::Rabin((_size, _seed)) => "rabin", 60 | ChunkerType::FastCdc((_size, _seed)) => "fastcdc", 61 | ChunkerType::Fixed(_size) => "fixed", 62 | } 63 | } 64 | 65 | pub fn avg_size(&self) -> usize { 66 | match *self { 67 | ChunkerType::Ae(size) | 68 | ChunkerType::Fixed(size) => size, 69 | ChunkerType::Rabin((size, _seed)) => size, 70 | ChunkerType::FastCdc((size, _seed)) => size, 71 | } 72 | } 73 | 74 | pub fn to_string(&self) -> String { 75 | format!("{}/{}", self.name(), self.avg_size() / 1024) 76 | } 77 | 78 | pub fn seed(&self) -> u64 { 79 | match *self { 80 | ChunkerType::Ae(_size) | 81 | ChunkerType::Fixed(_size) => 0, 82 | ChunkerType::Rabin((_size, seed)) => u64::from(seed), 83 | ChunkerType::FastCdc((_size, seed)) => seed, 84 | } 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /src/chunking/ae.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | use std::ptr; 4 | 5 | // AE Chunker 6 | // Paper: "AE: An Asymmetric Extremum Content Defined Chunking Algorithm for Fast and Bandwidth-Efficient Data Deduplication" 7 | 8 | 9 | pub struct AeChunker { 10 | buffer: [u8; 0x1000], 11 | buffered: usize, 12 | window_size: usize 13 | } 14 | 15 | impl AeChunker { 16 | pub fn new(avg_size: usize) -> AeChunker { 17 | // Experiments show that this claim from the paper is wrong and results in smaller chunks 18 | //let window_size = (avg_size as f64 / (consts::E - 1.0)) as usize; 19 | let window_size = avg_size - 256; 20 | AeChunker{ 21 | buffer: [0; 0x1000], 22 | buffered: 0, 23 | window_size, 24 | } 25 | } 26 | } 27 | 28 | impl Chunker for AeChunker { 29 | #[allow(unknown_lints,explicit_counter_loop)] 30 | fn chunk(&mut self, r: &mut Read, w: &mut Write) -> Result { 31 | let mut max; 32 | let mut pos = 0; 33 | let mut max_pos = 0; 34 | let mut max_val = 0; 35 | loop { 36 | // Fill the buffer, there might be some bytes still in there from last chunk 37 | max = try!(r.read(&mut self.buffer[self.buffered..]).map_err(ChunkerError::Read)) + self.buffered; 38 | // If nothing to do, finish 39 | if max == 0 { 40 | return Ok(ChunkerStatus::Finished) 41 | } 42 | for i in 0..max { 43 | let val = self.buffer[i]; 44 | if val <= max_val { 45 | if pos == max_pos + self.window_size { 46 | // Write all bytes from this chunk out to sink and store rest for next chunk 47 | try!(w.write_all(&self.buffer[..i+1]).map_err(ChunkerError::Write)); 48 | unsafe { ptr::copy(self.buffer[i+1..].as_ptr(), self.buffer.as_mut_ptr(), max-i-1) }; 49 | self.buffered = max-i-1; 50 | return Ok(ChunkerStatus::Continue); 51 | } 52 | } else { 53 | max_val = val; 54 | max_pos = pos; 55 | } 56 | pos += 1; 57 | } 58 | try!(w.write_all(&self.buffer[..max]).map_err(ChunkerError::Write)); 59 | self.buffered = 0; 60 | } 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/chunking/benches.rs: -------------------------------------------------------------------------------- 1 | use chunking::*; 2 | 3 | use std::io::{self, Write, Cursor}; 4 | use test::Bencher; 5 | 6 | 7 | fn random_data(seed: u64, size: usize) -> Vec { 8 | assert_eq!(size % 4, 0); 9 | let mut data = vec![0; size]; 10 | let a = 6364136223846793005; 11 | let c = 1442695040888963407; 12 | let mut v = seed; 13 | for i in 0..size/4 { 14 | v = v.wrapping_mul(a).wrapping_add(c); 15 | data[4*i] = ((v >> 24) & 0xff) as u8; 16 | data[4*i+1] = ((v >> 16) & 0xff) as u8; 17 | data[4*i+2] = ((v >> 8) & 0xff) as u8; 18 | data[4*i+3] = (v & 0xff) as u8; 19 | } 20 | data 21 | } 22 | 23 | 24 | struct CutPositions(Vec, u64); 25 | 26 | impl CutPositions { 27 | pub fn new() -> Self { 28 | CutPositions(vec![], 0) 29 | } 30 | 31 | pub fn positions(&self) -> &[u64] { 32 | &self.0 33 | } 34 | } 35 | 36 | impl Write for CutPositions { 37 | fn write(&mut self, data: &[u8]) -> Result { 38 | self.1 += data.len() as u64; 39 | self.0.push(self.1); 40 | Ok(data.len()) 41 | } 42 | 43 | fn flush(&mut self) -> Result<(), io::Error> { 44 | Ok(()) 45 | } 46 | } 47 | 48 | 49 | #[bench] 50 | fn test_fixed_init(b: &mut Bencher) { 51 | b.iter(|| { 52 | FixedChunker::new(8*1024); 53 | }) 54 | } 55 | 56 | #[bench] 57 | fn test_fixed_8192(b: &mut Bencher) { 58 | let data = random_data(0, 1024*1024); 59 | b.bytes = data.len() as u64; 60 | b.iter(|| { 61 | let mut chunker = FixedChunker::new(8*1024); 62 | let mut cursor = Cursor::new(&data); 63 | let mut sink = CutPositions::new(); 64 | while chunker.chunk(&mut cursor, &mut sink).unwrap() == ChunkerStatus::Continue {}; 65 | test::black_box(sink.positions().len()) 66 | }) 67 | } 68 | 69 | 70 | #[bench] 71 | fn test_ae_init(b: &mut Bencher) { 72 | b.iter(|| { 73 | AeChunker::new(8*1024); 74 | }) 75 | } 76 | 77 | #[bench] 78 | fn test_ae_8192(b: &mut Bencher) { 79 | let data = random_data(0, 1024*1024); 80 | b.bytes = data.len() as u64; 81 | b.iter(|| { 82 | let mut chunker = AeChunker::new(8*1024); 83 | let mut cursor = Cursor::new(&data); 84 | let mut sink = CutPositions::new(); 85 | while chunker.chunk(&mut cursor, &mut sink).unwrap() == ChunkerStatus::Continue {}; 86 | test::black_box(sink.positions().len()) 87 | }) 88 | } 89 | 90 | 91 | #[bench] 92 | fn test_rabin_init(b: &mut Bencher) { 93 | b.iter(|| { 94 | RabinChunker::new(8*1024, 0); 95 | }) 96 | } 97 | 98 | #[bench] 99 | fn test_rabin_8192(b: &mut Bencher) { 100 | let data = random_data(0, 1024*1024); 101 | b.bytes = data.len() as u64; 102 | b.iter(|| { 103 | let mut chunker = RabinChunker::new(8*1024, 0); 104 | let mut cursor = Cursor::new(&data); 105 | let mut sink = CutPositions::new(); 106 | while chunker.chunk(&mut cursor, &mut sink).unwrap() == ChunkerStatus::Continue {}; 107 | test::black_box(sink.positions().len()) 108 | }) 109 | } 110 | 111 | 112 | #[bench] 113 | fn test_fastcdc_init(b: &mut Bencher) { 114 | b.iter(|| { 115 | FastCdcChunker::new(8*1024, 0); 116 | }) 117 | } 118 | 119 | #[bench] 120 | fn test_fastcdc_8192(b: &mut Bencher) { 121 | let data = random_data(0, 1024*1024); 122 | b.bytes = data.len() as u64; 123 | b.iter(|| { 124 | let mut chunker = FastCdcChunker::new(8*1024, 0); 125 | let mut cursor = Cursor::new(&data); 126 | let mut sink = CutPositions::new(); 127 | while chunker.chunk(&mut cursor, &mut sink).unwrap() == ChunkerStatus::Continue {}; 128 | test::black_box(sink.positions().len()) 129 | }) 130 | } 131 | -------------------------------------------------------------------------------- /src/chunking/fastcdc.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | use std::ptr; 4 | use std::cmp; 5 | 6 | // FastCDC 7 | // Paper: "FastCDC: a Fast and Efficient Content-Defined Chunking Approach for Data Deduplication" 8 | // Paper-URL: https://www.usenix.org/system/files/conference/atc16/atc16-paper-xia.pdf 9 | // Presentation: https://www.usenix.org/sites/default/files/conference/protected-files/atc16_slides_xia.pdf 10 | 11 | 12 | 13 | // Creating 256 pseudo-random values (based on Knuth's MMIX) 14 | fn create_gear(seed: u64) -> [u64; 256] { 15 | let mut table = [0u64; 256]; 16 | let a = 6_364_136_223_846_793_005; 17 | let c = 1_442_695_040_888_963_407; 18 | let mut v = seed; 19 | for t in &mut table.iter_mut() { 20 | v = v.wrapping_mul(a).wrapping_add(c); 21 | *t = v; 22 | } 23 | table 24 | } 25 | 26 | fn get_masks(avg_size: usize, nc_level: usize, seed: u64) -> (u64, u64) { 27 | let bits = (avg_size.next_power_of_two() - 1).count_ones(); 28 | if bits == 13 { 29 | // From the paper 30 | return (0x0003_5907_0353_0000, 0x0000_d900_0353_0000); 31 | } 32 | let mut mask = 0u64; 33 | let mut v = seed; 34 | let a = 6_364_136_223_846_793_005; 35 | let c = 1_442_695_040_888_963_407; 36 | while mask.count_ones() < bits - nc_level as u32 { 37 | v = v.wrapping_mul(a).wrapping_add(c); 38 | mask = (mask | 1).rotate_left(v as u32 & 0x3f); 39 | } 40 | let mask_long = mask; 41 | while mask.count_ones() < bits + nc_level as u32 { 42 | v = v.wrapping_mul(a).wrapping_add(c); 43 | mask = (mask | 1).rotate_left(v as u32 & 0x3f); 44 | } 45 | let mask_short = mask; 46 | (mask_short, mask_long) 47 | } 48 | 49 | pub struct FastCdcChunker { 50 | buffer: [u8; 0x1000], 51 | buffered: usize, 52 | gear: [u64; 256], 53 | min_size: usize, 54 | max_size: usize, 55 | avg_size: usize, 56 | mask_long: u64, 57 | mask_short: u64, 58 | } 59 | 60 | impl FastCdcChunker { 61 | pub fn new(avg_size: usize, seed: u64) -> Self { 62 | let (mask_short, mask_long) = get_masks(avg_size, 2, seed); 63 | FastCdcChunker { 64 | buffer: [0; 0x1000], 65 | buffered: 0, 66 | gear: create_gear(seed), 67 | min_size: avg_size/4, 68 | max_size: avg_size*8, 69 | avg_size, 70 | mask_long, 71 | mask_short, 72 | } 73 | } 74 | } 75 | 76 | 77 | impl FastCdcChunker { 78 | fn write_output(&mut self, w: &mut Write, pos: usize, max: usize) -> Result { 79 | debug_assert!(max <= self.buffer.len()); 80 | debug_assert!(pos <= self.buffer.len()); 81 | try!(w.write_all(&self.buffer[..pos]).map_err(ChunkerError::Write)); 82 | unsafe { ptr::copy(self.buffer[pos..].as_ptr(), self.buffer.as_mut_ptr(), max-pos) }; 83 | self.buffered = max-pos; 84 | Ok(ChunkerStatus::Continue) 85 | } 86 | } 87 | 88 | 89 | impl Chunker for FastCdcChunker { 90 | #[allow(unknown_lints,explicit_counter_loop,needless_range_loop)] 91 | fn chunk(&mut self, r: &mut Read, w: &mut Write) -> Result { 92 | let mut max; 93 | let mut hash = 0u64; 94 | let mut pos = 0; 95 | loop { 96 | // Fill the buffer, there might be some bytes still in there from last chunk 97 | max = try!(r.read(&mut self.buffer[self.buffered..]).map_err(ChunkerError::Read)) + self.buffered; 98 | // If nothing to do, finish 99 | if max == 0 { 100 | return Ok(ChunkerStatus::Finished) 101 | } 102 | let min_size_p = cmp::min(max, cmp::max(self.min_size as isize - pos as isize, 0) as usize); 103 | let avg_size_p = cmp::min(max, cmp::max(self.avg_size as isize - pos as isize, 0) as usize); 104 | let max_size_p = cmp::min(max, cmp::max(self.max_size as isize - pos as isize, 0) as usize); 105 | // Skipping first min_size bytes. This is ok as same data still results in same hash. 106 | if self.avg_size > pos { 107 | for i in min_size_p..avg_size_p { 108 | hash = (hash << 1).wrapping_add(self.gear[self.buffer[i] as usize]); 109 | if hash & self.mask_short == 0 { 110 | return self.write_output(w, i + 1, max); 111 | } 112 | } 113 | } 114 | if self.max_size > pos { 115 | for i in avg_size_p..max_size_p { 116 | hash = (hash << 1).wrapping_add(self.gear[self.buffer[i] as usize]); 117 | if hash & self.mask_long == 0 { 118 | return self.write_output(w, i+1, max); 119 | } 120 | } 121 | } 122 | if max + pos >= self.max_size { 123 | return self.write_output(w, max_size_p, max); 124 | } 125 | pos += max; 126 | try!(w.write_all(&self.buffer[..max]).map_err(ChunkerError::Write)); 127 | self.buffered = 0; 128 | } 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /src/chunking/fixed.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | use std::cmp::min; 4 | 5 | 6 | pub struct FixedChunker { 7 | buffer: [u8; 0x1000], 8 | size: usize 9 | } 10 | 11 | impl FixedChunker { 12 | pub fn new(avg_size: usize) -> FixedChunker { 13 | FixedChunker{ 14 | buffer: [0; 0x1000], 15 | size: avg_size, 16 | } 17 | } 18 | } 19 | 20 | impl Chunker for FixedChunker { 21 | #[allow(unknown_lints,explicit_counter_loop)] 22 | fn chunk(&mut self, r: &mut Read, w: &mut Write) -> Result { 23 | let mut todo = self.size; 24 | loop { 25 | // Fill the buffer, there might be some bytes still in there from last chunk 26 | let max_read = min(todo, self.buffer.len()); 27 | let read = try!(r.read(&mut self.buffer[..max_read]).map_err(ChunkerError::Read)); 28 | // If nothing to do, finish 29 | if read == 0 { 30 | return Ok(ChunkerStatus::Finished) 31 | } 32 | // Write all bytes from this chunk out to sink and store rest for next chunk 33 | try!(w.write_all(&self.buffer[..read]).map_err(ChunkerError::Write)); 34 | todo -= read; 35 | if todo == 0 { 36 | return Ok(ChunkerStatus::Continue) 37 | } 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/chunking/mod.rs: -------------------------------------------------------------------------------- 1 | use std::io::{self, Write, Read}; 2 | 3 | mod fixed; 4 | mod ae; 5 | mod rabin; 6 | mod fastcdc; 7 | #[cfg(test)] mod test; 8 | #[cfg(feature = "bench")] mod benches; 9 | 10 | pub use self::fixed::FixedChunker; 11 | pub use self::ae::AeChunker; 12 | pub use self::rabin::RabinChunker; 13 | pub use self::fastcdc::FastCdcChunker; 14 | 15 | // https://moinakg.wordpress.com/2013/06/22/high-performance-content-defined-chunking/ 16 | 17 | // Paper: "A Comprehensive Study of the Past, Present, and Future of Data Deduplication" 18 | // Paper-URL: http://wxia.hustbackup.cn/IEEE-Survey-final.pdf 19 | 20 | // https://borgbackup.readthedocs.io/en/stable/internals.html#chunks 21 | // https://github.com/bup/bup/blob/master/lib/bup/bupsplit.c 22 | 23 | quick_error!{ 24 | #[derive(Debug)] 25 | pub enum ChunkerError { 26 | Read(err: io::Error) { 27 | cause(err) 28 | description(tr!("Failed to read input")) 29 | display("{}", tr_format!("Chunker error: failed to read input\n\tcaused by: {}", err)) 30 | } 31 | Write(err: io::Error) { 32 | cause(err) 33 | description(tr!("Failed to write to output")) 34 | display("{}", tr_format!("Chunker error: failed to write to output\n\tcaused by: {}", err)) 35 | } 36 | Custom(reason: &'static str) { 37 | from() 38 | description(tr!("Custom error")) 39 | display("{}", tr_format!("Chunker error: {}", reason)) 40 | } 41 | } 42 | } 43 | 44 | 45 | #[derive(Debug, Eq, PartialEq)] 46 | pub enum ChunkerStatus { 47 | Continue, 48 | Finished 49 | } 50 | 51 | pub trait Chunker { 52 | fn chunk(&mut self, r: &mut Read, w: &mut Write) -> Result; 53 | } 54 | -------------------------------------------------------------------------------- /src/chunking/rabin.rs: -------------------------------------------------------------------------------- 1 | use std::collections::VecDeque; 2 | use std::ptr; 3 | 4 | use super::*; 5 | 6 | // Rabin Chunker 7 | // Paper: "Fingerprinting by Random Polynomials" 8 | // Paper-URL: http://www.xmailserver.org/rabin.pdf 9 | // Paper: "Redundancy Elimination Within Large Collections of Files" 10 | // Paper-URL: https://www.usenix.org/legacy/event/usenix04/tech/general/full_papers/kulkarni/kulkarni_html/paper.html 11 | // Wikipedia: https://en.wikipedia.org/wiki/Rabin_fingerprint 12 | 13 | 14 | fn wrapping_pow(mut base: u32, mut exp: u32) -> u32 { 15 | let mut acc: u32 = 1; 16 | while exp > 0 { 17 | if exp % 2 == 1 { 18 | acc = acc.wrapping_mul(base) 19 | } 20 | base = base.wrapping_mul(base); 21 | exp /= 2; 22 | } 23 | acc 24 | } 25 | 26 | fn create_table(alpha: u32, window_size: usize) -> [u32; 256] { 27 | let mut table = [0u32; 256]; 28 | let a = wrapping_pow(alpha, window_size as u32); 29 | for i in 0..table.len() as u32 { 30 | table[i as usize] = i.wrapping_mul(a); 31 | } 32 | table 33 | } 34 | 35 | 36 | pub struct RabinChunker { 37 | buffer: [u8; 0x1000], 38 | buffered: usize, 39 | seed: u32, 40 | alpha: u32, 41 | table: [u32; 256], 42 | min_size: usize, 43 | max_size: usize, 44 | window_size: usize, 45 | chunk_mask: u32, 46 | } 47 | 48 | 49 | impl RabinChunker { 50 | pub fn new(avg_size: usize, seed: u32) -> Self { 51 | let chunk_mask = (avg_size as u32).next_power_of_two() - 1; 52 | let window_size = avg_size/4-1; 53 | let alpha = 1_664_525;//153191; 54 | RabinChunker { 55 | buffer: [0; 0x1000], 56 | buffered: 0, 57 | table: create_table(alpha, window_size), 58 | alpha, 59 | seed, 60 | min_size: avg_size/4, 61 | max_size: avg_size*4, 62 | window_size, 63 | chunk_mask, 64 | } 65 | } 66 | } 67 | 68 | impl Chunker for RabinChunker { 69 | #[allow(unknown_lints,explicit_counter_loop)] 70 | fn chunk(&mut self, r: &mut Read, w: &mut Write) -> Result { 71 | let mut max; 72 | let mut hash = 0u32; 73 | let mut pos = 0; 74 | let mut window = VecDeque::with_capacity(self.window_size); 75 | loop { 76 | // Fill the buffer, there might be some bytes still in there from last chunk 77 | max = try!(r.read(&mut self.buffer[self.buffered..]).map_err(ChunkerError::Read)) + self.buffered; 78 | // If nothing to do, finish 79 | if max == 0 { 80 | return Ok(ChunkerStatus::Finished) 81 | } 82 | for i in 0..max { 83 | let val = self.buffer[i]; 84 | if pos >= self.max_size { 85 | try!(w.write_all(&self.buffer[..i+1]).map_err(ChunkerError::Write)); 86 | unsafe { ptr::copy(self.buffer[i+1..].as_ptr(), self.buffer.as_mut_ptr(), max-i-1) }; 87 | self.buffered = max-i-1; 88 | return Ok(ChunkerStatus::Continue); 89 | } 90 | // Hash update 91 | hash = hash.wrapping_mul(self.alpha).wrapping_add(u32::from(val)); 92 | if pos >= self.window_size { 93 | let take = window.pop_front().unwrap(); 94 | hash = hash.wrapping_sub(self.table[take as usize]); 95 | if pos >= self.min_size && ((hash ^ self.seed) & self.chunk_mask) == 0 { 96 | try!(w.write_all(&self.buffer[..i+1]).map_err(ChunkerError::Write)); 97 | unsafe { ptr::copy(self.buffer[i+1..].as_ptr(), self.buffer.as_mut_ptr(), max-i-1) }; 98 | self.buffered = max-i-1; 99 | return Ok(ChunkerStatus::Continue); 100 | } 101 | } 102 | pos += 1; 103 | window.push_back(val); 104 | } 105 | try!(w.write_all(&self.buffer[..max]).map_err(ChunkerError::Write)); 106 | self.buffered = 0; 107 | } 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /src/chunking/test.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | use std::io::Cursor; 4 | 5 | 6 | fn random_data(seed: u64, size: usize) -> Vec { 7 | assert_eq!(size % 4, 0); 8 | let mut data = vec![0; size]; 9 | let a = 6364136223846793005; 10 | let c = 1442695040888963407; 11 | let mut v = seed; 12 | for i in 0..size/4 { 13 | v = v.wrapping_mul(a).wrapping_add(c); 14 | data[4*i] = ((v >> 24) & 0xff) as u8; 15 | data[4*i+1] = ((v >> 16) & 0xff) as u8; 16 | data[4*i+2] = ((v >> 8) & 0xff) as u8; 17 | data[4*i+3] = (v & 0xff) as u8; 18 | } 19 | data 20 | } 21 | 22 | fn test_chunking(chunker: &mut Chunker, data: &[u8], chunk_lens: Option<&[usize]>) -> usize { 23 | let mut cursor = Cursor::new(&data); 24 | let mut chunks = vec![]; 25 | let mut chunk = vec![]; 26 | while chunker.chunk(&mut cursor, &mut chunk).unwrap() == ChunkerStatus::Continue { 27 | chunks.push(chunk); 28 | chunk = vec![]; 29 | } 30 | chunks.push(chunk); 31 | let mut pos = 0; 32 | for chunk in &chunks { 33 | assert!(pos+chunk.len() <= data.len()); 34 | assert_eq!(&data[pos..pos+chunk.len()], chunk as &[u8]); 35 | pos += chunk.len(); 36 | } 37 | if let Some(chunk_lens) = chunk_lens { 38 | assert_eq!(chunk_lens.len(), chunks.len()); 39 | for (i, chunk) in chunks.iter().enumerate() { 40 | assert_eq!(chunk.len(), chunk_lens[i]); 41 | } 42 | } 43 | assert_eq!(pos, data.len()); 44 | chunks.len() 45 | } 46 | 47 | 48 | #[test] 49 | fn test_fixed() { 50 | test_chunking(&mut FixedChunker::new(8192), &random_data(0, 128*1024), 51 | Some(&[8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 52 | 8192, 8192, 8192, 8192, 8192, 8192, 0])); 53 | let data = random_data(0, 10*1024*1024); 54 | for n in &[1usize,2,4,8,16,32,64,128,256,512,1024] { 55 | let mut chunker = FixedChunker::new(1024*n); 56 | let len = test_chunking(&mut chunker, &data, None); 57 | assert!(len >= data.len()/n/1024/4); 58 | assert!(len <= data.len()/n/1024*4); 59 | } 60 | } 61 | 62 | #[test] 63 | fn test_ae() { 64 | test_chunking(&mut AeChunker::new(8192), &random_data(0, 128*1024), 65 | Some(&[7979, 8046, 7979, 8192, 8192, 8192, 7965, 8158, 8404, 8241, 66 | 8011, 8302, 8120, 8335, 8192, 8192, 572])); 67 | let data = random_data(0, 10*1024*1024); 68 | for n in &[1usize,2,4,8,16,32,64,128,256,512,1024] { 69 | let mut chunker = AeChunker::new(1024*n); 70 | let len = test_chunking(&mut chunker, &data, None); 71 | assert!(len >= data.len()/n/1024/4); 72 | assert!(len <= data.len()/n/1024*4); 73 | } 74 | } 75 | 76 | #[test] 77 | fn test_rabin() { 78 | test_chunking(&mut RabinChunker::new(8192, 0), &random_data(0, 128*1024), 79 | Some(&[8604, 4190, 32769, 3680, 26732, 3152, 9947, 6487, 25439, 3944, 80 | 6128])); 81 | let data = random_data(0, 10*1024*1024); 82 | for n in &[1usize,2,4,8,16,32,64,128,256,512,1024] { 83 | let mut chunker = RabinChunker::new(1024*n, 0); 84 | let len = test_chunking(&mut chunker, &data, None); 85 | assert!(len >= data.len()/n/1024/4); 86 | assert!(len <= data.len()/n/1024*4); 87 | } 88 | } 89 | 90 | #[test] 91 | fn test_fastcdc() { 92 | test_chunking(&mut FastCdcChunker::new(8192, 0), &random_data(0, 128*1024), 93 | Some(&[8712, 8018, 2847, 9157, 8997, 8581, 8867, 5422, 5412, 9478, 94 | 11553, 9206, 4606, 8529, 3821, 11342, 6524])); 95 | let data = random_data(0, 10*1024*1024); 96 | for n in &[1usize,2,4,8,16,32,64,128,256,512,1024] { 97 | let mut chunker = FastCdcChunker::new(1024*n, 0); 98 | let len = test_chunking(&mut chunker, &data, None); 99 | assert!(len >= data.len()/n/1024/4); 100 | assert!(len <= data.len()/n/1024*4); 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /src/cli/logger.rs: -------------------------------------------------------------------------------- 1 | use log; 2 | pub use log::SetLoggerError; 3 | 4 | use ansi_term::{Color, Style}; 5 | 6 | 7 | struct Logger(log::Level); 8 | 9 | impl log::Log for Logger { 10 | fn enabled(&self, metadata: &log::Metadata) -> bool { 11 | metadata.level() <= self.0 12 | } 13 | 14 | fn flush(&self) {} 15 | 16 | fn log(&self, record: &log::Record) { 17 | if self.enabled(record.metadata()) { 18 | match record.level() { 19 | log::Level::Error => { 20 | eprintln!("{}: {}", Color::Red.bold().paint("error"), record.args()) 21 | } 22 | log::Level::Warn => { 23 | eprintln!( 24 | "{}: {}", 25 | Color::Yellow.bold().paint("warning"), 26 | record.args() 27 | ) 28 | } 29 | log::Level::Info => { 30 | eprintln!("{}: {}", Color::Green.bold().paint("info"), record.args()) 31 | } 32 | log::Level::Debug => { 33 | eprintln!("{}: {}", Style::new().bold().paint("debug"), record.args()) 34 | } 35 | log::Level::Trace => eprintln!("{}: {}", "trace", record.args()), 36 | } 37 | } 38 | } 39 | } 40 | 41 | pub fn init(level: log::Level) -> Result<(), SetLoggerError> { 42 | let logger = Logger(level); 43 | log::set_max_level(level.to_level_filter()); 44 | log::set_boxed_logger(Box::new(logger)) 45 | } 46 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | #![recursion_limit="128"] 2 | #![allow(unknown_lints, float_cmp)] 3 | #![cfg_attr(feature = "bench", feature(test))] 4 | #[cfg(feature = "bench")] 5 | extern crate test; 6 | extern crate serde; 7 | extern crate serde_bytes; 8 | extern crate rmp_serde; 9 | #[macro_use] 10 | extern crate serde_utils; 11 | extern crate squash_sys as squash; 12 | extern crate blake2_rfc as blake2; 13 | extern crate murmurhash3; 14 | extern crate serde_yaml; 15 | #[macro_use] 16 | extern crate quick_error; 17 | extern crate chrono; 18 | #[macro_use] 19 | extern crate clap; 20 | #[macro_use] 21 | extern crate log; 22 | extern crate byteorder; 23 | extern crate sodiumoxide; 24 | extern crate libsodium_sys; 25 | extern crate ansi_term; 26 | extern crate filetime; 27 | extern crate regex; 28 | #[macro_use] 29 | extern crate lazy_static; 30 | extern crate fuse; 31 | extern crate rand; 32 | extern crate time; 33 | extern crate xattr; 34 | extern crate crossbeam; 35 | extern crate pbr; 36 | extern crate users; 37 | extern crate libc; 38 | extern crate tar; 39 | #[macro_use] 40 | extern crate runtime_fmt; 41 | extern crate locale_config; 42 | extern crate mmap; 43 | 44 | #[macro_use] mod translation; 45 | pub mod util; 46 | mod bundledb; 47 | mod repository; 48 | mod cli; 49 | mod prelude; 50 | mod mount; 51 | mod chunker; 52 | mod chunking; 53 | mod index; 54 | 55 | use std::process::exit; 56 | 57 | fn main() { 58 | match cli::run() { 59 | Ok(()) => exit(0), 60 | Err(code) => exit(code.code()), 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/prelude.rs: -------------------------------------------------------------------------------- 1 | pub use util::*; 2 | pub use bundledb::{BundleReader, BundleMode, BundleWriter, BundleInfo, BundleId, BundleDbError, 3 | BundleDb, BundleWriterError, StoredBundle, BundleStatistics}; 4 | pub use chunker::{ChunkerType, Chunker, ChunkerStatus, ChunkerError}; 5 | pub use repository::{Repository, Backup, Config, RepositoryError, RepositoryInfo, Inode, FileType, 6 | IntegrityError, BackupFileError, BackupError, BackupOptions, BundleAnalysis, 7 | FileData, DiffType, InodeError, RepositoryLayout, Location, 8 | RepositoryStatistics}; 9 | pub use index::{Index, IndexError, IndexStatistics}; 10 | pub use mount::FuseFilesystem; 11 | pub use translation::CowStr; 12 | 13 | pub use serde::{Serialize, Deserialize}; 14 | 15 | pub use quick_error::ResultExt; 16 | -------------------------------------------------------------------------------- /src/repository/backup_file.rs: -------------------------------------------------------------------------------- 1 | use prelude::*; 2 | 3 | use std::io::{self, BufReader, BufWriter, Read, Write}; 4 | use std::fs::{self, File}; 5 | use std::path::{Path, PathBuf}; 6 | use std::collections::HashMap; 7 | 8 | 9 | static HEADER_STRING: [u8; 7] = *b"zvault\x03"; 10 | static HEADER_VERSION: u8 = 1; 11 | 12 | 13 | quick_error!{ 14 | #[derive(Debug)] 15 | pub enum BackupFileError { 16 | Read(err: io::Error, path: PathBuf) { 17 | cause(err) 18 | description(tr!("Failed to read backup")) 19 | display("{}", tr_format!("Backup file error: failed to read backup file {:?}\n\tcaused by: {}", path, err)) 20 | } 21 | Write(err: io::Error, path: PathBuf) { 22 | cause(err) 23 | description(tr!("Failed to write backup")) 24 | display("{}", tr_format!("Backup file error: failed to write backup file {:?}\n\tcaused by: {}", path, err)) 25 | } 26 | Decode(err: msgpack::DecodeError, path: PathBuf) { 27 | cause(err) 28 | context(path: &'a Path, err: msgpack::DecodeError) -> (err, path.to_path_buf()) 29 | description(tr!("Failed to decode backup")) 30 | display("{}", tr_format!("Backup file error: failed to decode backup of {:?}\n\tcaused by: {}", path, err)) 31 | } 32 | Encode(err: msgpack::EncodeError, path: PathBuf) { 33 | cause(err) 34 | context(path: &'a Path, err: msgpack::EncodeError) -> (err, path.to_path_buf()) 35 | description(tr!("Failed to encode backup")) 36 | display("{}", tr_format!("Backup file error: failed to encode backup of {:?}\n\tcaused by: {}", path, err)) 37 | } 38 | WrongHeader(path: PathBuf) { 39 | description(tr!("Wrong header")) 40 | display("{}", tr_format!("Backup file error: wrong header on backup {:?}", path)) 41 | } 42 | UnsupportedVersion(path: PathBuf, version: u8) { 43 | description(tr!("Wrong version")) 44 | display("{}", tr_format!("Backup file error: unsupported version on backup {:?}: {}", path, version)) 45 | } 46 | Decryption(err: EncryptionError, path: PathBuf) { 47 | cause(err) 48 | context(path: &'a Path, err: EncryptionError) -> (err, path.to_path_buf()) 49 | description(tr!("Decryption failed")) 50 | display("{}", tr_format!("Backup file error: decryption failed on backup {:?}\n\tcaused by: {}", path, err)) 51 | } 52 | Encryption(err: EncryptionError) { 53 | from() 54 | cause(err) 55 | description(tr!("Encryption failed")) 56 | display("{}", tr_format!("Backup file error: encryption failed\n\tcaused by: {}", err)) 57 | } 58 | PartialBackupsList(partial: HashMap, failed: Vec) { 59 | description(tr!("Some backups could not be loaded")) 60 | display("{}", tr_format!("Backup file error: some backups could not be loaded: {:?}", failed)) 61 | } 62 | } 63 | } 64 | 65 | #[derive(Default, Debug, Clone)] 66 | struct BackupHeader { 67 | pub encryption: Option 68 | } 69 | serde_impl!(BackupHeader(u8) { 70 | encryption: Option => 0 71 | }); 72 | 73 | 74 | #[derive(Default, Debug, Clone)] 75 | pub struct Backup { 76 | pub root: ChunkList, 77 | pub total_data_size: u64, // Sum of all raw sizes of all entities 78 | pub changed_data_size: u64, // Sum of all raw sizes of all entities actively stored 79 | pub deduplicated_data_size: u64, // Sum of all raw sizes of all new bundles 80 | pub encoded_data_size: u64, // Sum al all encoded sizes of all new bundles 81 | pub bundle_count: usize, 82 | pub chunk_count: usize, 83 | pub avg_chunk_size: f32, 84 | pub timestamp: i64, 85 | pub duration: f32, 86 | pub file_count: usize, 87 | pub dir_count: usize, 88 | pub host: String, 89 | pub path: String, 90 | pub config: Config, 91 | pub modified: bool, 92 | pub user_names: HashMap, 93 | pub group_names: HashMap 94 | } 95 | serde_impl!(Backup(u8?) { 96 | root: ChunkList => 0, 97 | total_data_size: u64 => 1, 98 | changed_data_size: u64 => 2, 99 | deduplicated_data_size: u64 => 3, 100 | encoded_data_size: u64 => 4, 101 | bundle_count: usize => 5, 102 | chunk_count: usize => 6, 103 | avg_chunk_size: f32 => 7, 104 | timestamp: i64 => 8, 105 | duration: f32 => 9, 106 | file_count: usize => 10, 107 | dir_count: usize => 11, 108 | host: String => 12, 109 | path: String => 13, 110 | config: Config => 14, 111 | modified: bool => 15, 112 | user_names: HashMap => 16, 113 | group_names: HashMap => 17 114 | }); 115 | 116 | impl Backup { 117 | pub fn read_from>(crypto: &Crypto, path: P) -> Result { 118 | let path = path.as_ref(); 119 | let mut file = BufReader::new(try!(File::open(path).map_err(|err| { 120 | BackupFileError::Read(err, path.to_path_buf()) 121 | }))); 122 | let mut header = [0u8; 8]; 123 | try!(file.read_exact(&mut header).map_err(|err| { 124 | BackupFileError::Read(err, path.to_path_buf()) 125 | })); 126 | if header[..HEADER_STRING.len()] != HEADER_STRING { 127 | return Err(BackupFileError::WrongHeader(path.to_path_buf())); 128 | } 129 | let version = header[HEADER_STRING.len()]; 130 | if version != HEADER_VERSION { 131 | return Err(BackupFileError::UnsupportedVersion( 132 | path.to_path_buf(), 133 | version 134 | )); 135 | } 136 | let header: BackupHeader = try!(msgpack::decode_from_stream(&mut file).context(path)); 137 | let mut data = Vec::new(); 138 | try!(file.read_to_end(&mut data).map_err(|err| { 139 | BackupFileError::Read(err, path.to_path_buf()) 140 | })); 141 | if let Some(ref encryption) = header.encryption { 142 | data = try!(crypto.decrypt(encryption, &data)); 143 | } 144 | Ok(try!(msgpack::decode(&data).context(path))) 145 | } 146 | 147 | pub fn save_to>( 148 | &self, 149 | crypto: &Crypto, 150 | encryption: Option, 151 | path: P, 152 | ) -> Result<(), BackupFileError> { 153 | let path = path.as_ref(); 154 | let mut data = try!(msgpack::encode(self).context(path)); 155 | if let Some(ref encryption) = encryption { 156 | data = try!(crypto.encrypt(encryption, &data)); 157 | } 158 | let mut file = BufWriter::new(try!(File::create(path).map_err(|err| { 159 | BackupFileError::Write(err, path.to_path_buf()) 160 | }))); 161 | try!(file.write_all(&HEADER_STRING).map_err(|err| { 162 | BackupFileError::Write(err, path.to_path_buf()) 163 | })); 164 | try!(file.write_all(&[HEADER_VERSION]).map_err(|err| { 165 | BackupFileError::Write(err, path.to_path_buf()) 166 | })); 167 | let header = BackupHeader { encryption }; 168 | try!(msgpack::encode_to_stream(&header, &mut file).context(path)); 169 | try!(file.write_all(&data).map_err(|err| { 170 | BackupFileError::Write(err, path.to_path_buf()) 171 | })); 172 | Ok(()) 173 | } 174 | 175 | pub fn get_all_from>( 176 | crypto: &Crypto, 177 | path: P, 178 | ) -> Result, BackupFileError> { 179 | let mut backups = HashMap::new(); 180 | let base_path = path.as_ref(); 181 | let path = path.as_ref(); 182 | if !path.exists() { 183 | tr_debug!("Backup root folder does not exist"); 184 | return Ok(backups); 185 | } 186 | let mut paths = vec![path.to_path_buf()]; 187 | let mut failed_paths = vec![]; 188 | while let Some(path) = paths.pop() { 189 | for entry in try!(fs::read_dir(&path).map_err(|e| { 190 | BackupFileError::Read(e, path.clone()) 191 | })) 192 | { 193 | let entry = try!(entry.map_err(|e| BackupFileError::Read(e, path.clone()))); 194 | let path = entry.path(); 195 | if path.is_dir() { 196 | paths.push(path); 197 | } else { 198 | let relpath = path.strip_prefix(&base_path).unwrap(); 199 | if relpath.extension() != Some("backup".as_ref()) { 200 | continue; 201 | } 202 | let name = relpath 203 | .with_file_name(relpath.file_stem().unwrap()) 204 | .to_string_lossy() 205 | .to_string(); 206 | if let Ok(backup) = Backup::read_from(crypto, &path) { 207 | backups.insert(name, backup); 208 | } else { 209 | failed_paths.push(path.clone()); 210 | } 211 | } 212 | } 213 | } 214 | if failed_paths.is_empty() { 215 | Ok(backups) 216 | } else { 217 | Err(BackupFileError::PartialBackupsList(backups, failed_paths)) 218 | } 219 | } 220 | } 221 | -------------------------------------------------------------------------------- /src/repository/basic_io.rs: -------------------------------------------------------------------------------- 1 | use prelude::*; 2 | 3 | use std::mem; 4 | use std::cmp::min; 5 | use std::collections::VecDeque; 6 | use std::io::{self, Read, Write, Cursor}; 7 | 8 | 9 | pub struct ChunkReader<'a> { 10 | chunks: VecDeque, 11 | data: Vec, 12 | pos: usize, 13 | repo: &'a mut Repository 14 | } 15 | 16 | impl<'a> ChunkReader<'a> { 17 | pub fn new(repo: &'a mut Repository, chunks: ChunkList) -> Self { 18 | ChunkReader { 19 | repo, 20 | chunks: chunks.into_inner().into(), 21 | data: vec![], 22 | pos: 0 23 | } 24 | } 25 | } 26 | 27 | impl<'a> Read for ChunkReader<'a> { 28 | fn read(&mut self, buf: &mut [u8]) -> Result { 29 | let mut bpos = 0; 30 | loop { 31 | if buf.len() == bpos { 32 | break; 33 | } 34 | if self.data.len() == self.pos { 35 | if let Some(chunk) = self.chunks.pop_front() { 36 | self.data = match self.repo.get_chunk(chunk.0) { 37 | Ok(Some(data)) => data, 38 | Ok(None) => { 39 | return Err(io::Error::new( 40 | io::ErrorKind::Other, 41 | IntegrityError::MissingChunk(chunk.0) 42 | )) 43 | } 44 | Err(err) => return Err(io::Error::new(io::ErrorKind::Other, err)), 45 | }; 46 | self.pos = 0; 47 | } else { 48 | break; 49 | } 50 | } 51 | let l = min(self.data.len() - self.pos, buf.len() - bpos); 52 | buf[bpos..bpos + l].copy_from_slice(&self.data[self.pos..self.pos + l]); 53 | bpos += l; 54 | self.pos += l; 55 | } 56 | Ok(bpos) 57 | } 58 | } 59 | 60 | 61 | impl Repository { 62 | #[inline] 63 | pub fn get_bundle_id(&self, id: u32) -> Result { 64 | self.bundle_map.get(id).ok_or_else(|| { 65 | IntegrityError::MissingBundleId(id).into() 66 | }) 67 | } 68 | 69 | pub fn get_chunk(&mut self, hash: Hash) -> Result>, RepositoryError> { 70 | // Find bundle and chunk id in index 71 | let found = if let Some(found) = self.index.get(&hash) { 72 | found 73 | } else { 74 | return Ok(None); 75 | }; 76 | // Lookup bundle id from map 77 | let bundle_id = try!(self.get_bundle_id(found.bundle)); 78 | // Get chunk from bundle 79 | Ok(Some(try!( 80 | self.bundles.get_chunk(&bundle_id, found.chunk as usize) 81 | ))) 82 | } 83 | 84 | #[inline] 85 | pub fn put_chunk( 86 | &mut self, 87 | mode: BundleMode, 88 | hash: Hash, 89 | data: &[u8], 90 | ) -> Result<(), RepositoryError> { 91 | // If this chunk is in the index, ignore it 92 | if self.index.contains(&hash) { 93 | return Ok(()); 94 | } 95 | self.put_chunk_override(mode, hash, data) 96 | } 97 | 98 | fn write_chunk_to_bundle_and_index( 99 | &mut self, 100 | mode: BundleMode, 101 | hash: Hash, 102 | data: &[u8], 103 | ) -> Result<(), RepositoryError> { 104 | let writer = match mode { 105 | BundleMode::Data => &mut self.data_bundle, 106 | BundleMode::Meta => &mut self.meta_bundle, 107 | }; 108 | // ...alocate one if needed 109 | if writer.is_none() { 110 | *writer = Some(try!(self.bundles.create_bundle( 111 | mode, 112 | self.config.hash, 113 | self.config.compression.clone(), 114 | self.config.encryption.clone() 115 | ))); 116 | } 117 | debug_assert!(writer.is_some()); 118 | // Add chunk to bundle writer and determine the size of the bundle 119 | let writer_obj = writer.as_mut().unwrap(); 120 | let chunk_id = try!(writer_obj.add(data, hash)); 121 | let bundle_id = match mode { 122 | BundleMode::Data => self.next_data_bundle, 123 | BundleMode::Meta => self.next_meta_bundle, 124 | }; 125 | // Add location to the index 126 | try!(self.index.set( 127 | &hash, 128 | &Location::new(bundle_id, chunk_id as u32) 129 | )); 130 | Ok(()) 131 | } 132 | 133 | fn finish_bundle(&mut self, mode: BundleMode) -> Result<(), RepositoryError> { 134 | // Calculate the next free bundle id now (late lifetime prevents this) 135 | let next_free_bundle_id = self.next_free_bundle_id(); 136 | let writer = match mode { 137 | BundleMode::Data => &mut self.data_bundle, 138 | BundleMode::Meta => &mut self.meta_bundle, 139 | }; 140 | if writer.is_none() { 141 | return Ok(()); 142 | } 143 | let bundle_id = match mode { 144 | BundleMode::Data => self.next_data_bundle, 145 | BundleMode::Meta => self.next_meta_bundle, 146 | }; 147 | let mut finished = None; 148 | mem::swap(writer, &mut finished); 149 | let bundle = try!(self.bundles.add_bundle(finished.unwrap())); 150 | self.bundle_map.set(bundle_id, bundle.id.clone()); 151 | if self.next_meta_bundle == bundle_id { 152 | self.next_meta_bundle = next_free_bundle_id 153 | } 154 | if self.next_data_bundle == bundle_id { 155 | self.next_data_bundle = next_free_bundle_id 156 | } 157 | Ok(()) 158 | } 159 | 160 | fn finish_bundle_if_needed(&mut self, mode: BundleMode) -> Result<(), RepositoryError> { 161 | let (size, raw_size) = { 162 | let writer = match mode { 163 | BundleMode::Data => &mut self.data_bundle, 164 | BundleMode::Meta => &mut self.meta_bundle, 165 | }; 166 | if let Some(ref writer) = *writer { 167 | (writer.estimate_final_size(), writer.raw_size()) 168 | } else { 169 | return Ok(()); 170 | } 171 | }; 172 | if size >= self.config.bundle_size || raw_size >= 4 * self.config.bundle_size { 173 | if mode == BundleMode::Meta { 174 | //First store the current data bundle as meta referrs to those chunks 175 | try!(self.finish_bundle(BundleMode::Data)) 176 | } 177 | try!(self.finish_bundle(mode)) 178 | } 179 | Ok(()) 180 | } 181 | 182 | #[inline] 183 | pub fn put_chunk_override( 184 | &mut self, 185 | mode: BundleMode, 186 | hash: Hash, 187 | data: &[u8], 188 | ) -> Result<(), RepositoryError> { 189 | try!(self.write_chunk_to_bundle_and_index(mode, hash, data)); 190 | self.finish_bundle_if_needed(mode) 191 | } 192 | 193 | #[inline] 194 | pub fn put_data( 195 | &mut self, 196 | mode: BundleMode, 197 | data: &[u8], 198 | ) -> Result { 199 | let mut input = Cursor::new(data); 200 | self.put_stream(mode, &mut input) 201 | } 202 | 203 | pub fn put_stream( 204 | &mut self, 205 | mode: BundleMode, 206 | data: &mut R, 207 | ) -> Result { 208 | let avg_size = self.config.chunker.avg_size(); 209 | let mut chunks = Vec::new(); 210 | let mut chunk = Vec::with_capacity(avg_size * 2); 211 | loop { 212 | chunk.clear(); 213 | let mut output = Cursor::new(chunk); 214 | let res = try!(self.chunker.chunk(data, &mut output)); 215 | chunk = output.into_inner(); 216 | let hash = self.config.hash.hash(&chunk); 217 | try!(self.put_chunk(mode, hash, &chunk)); 218 | chunks.push((hash, chunk.len() as u32)); 219 | if res == ChunkerStatus::Finished { 220 | break; 221 | } 222 | } 223 | Ok(chunks.into()) 224 | } 225 | 226 | pub fn get_data(&mut self, chunks: &[Chunk]) -> Result, RepositoryError> { 227 | let mut data = 228 | Vec::with_capacity(chunks.iter().map(|&(_, size)| size).sum::() as usize); 229 | try!(self.get_stream(chunks, &mut data)); 230 | Ok(data) 231 | } 232 | 233 | #[inline] 234 | pub fn get_reader(&mut self, chunks: ChunkList) -> ChunkReader { 235 | ChunkReader::new(self, chunks) 236 | } 237 | 238 | pub fn get_stream( 239 | &mut self, 240 | chunks: &[Chunk], 241 | w: &mut W, 242 | ) -> Result<(), RepositoryError> { 243 | for &(ref hash, len) in chunks { 244 | let data = try!(try!(self.get_chunk(*hash)).ok_or_else(|| { 245 | IntegrityError::MissingChunk(*hash) 246 | })); 247 | debug_assert_eq!(data.len() as u32, len); 248 | try!(w.write_all(&data)); 249 | } 250 | Ok(()) 251 | } 252 | } 253 | -------------------------------------------------------------------------------- /src/repository/bundle_map.rs: -------------------------------------------------------------------------------- 1 | use prelude::*; 2 | 3 | use std::collections::HashMap; 4 | use std::path::Path; 5 | use std::io::{self, BufReader, Read, Write, BufWriter}; 6 | use std::fs::File; 7 | 8 | 9 | static HEADER_STRING: [u8; 7] = *b"zbunmap"; 10 | static HEADER_VERSION: u8 = 1; 11 | 12 | 13 | quick_error!{ 14 | #[derive(Debug)] 15 | pub enum BundleMapError { 16 | Io(err: io::Error) { 17 | from() 18 | cause(err) 19 | description(tr!("Failed to read/write bundle map")) 20 | } 21 | Decode(err: msgpack::DecodeError) { 22 | from() 23 | cause(err) 24 | description(tr!("Failed to decode bundle map")) 25 | } 26 | Encode(err: msgpack::EncodeError) { 27 | from() 28 | cause(err) 29 | description(tr!("Failed to encode bundle map")) 30 | } 31 | WrongHeader { 32 | description(tr!("Wrong header")) 33 | } 34 | WrongVersion(version: u8) { 35 | description(tr!("Wrong version")) 36 | display("{}", tr_format!("Wrong version: {}", version)) 37 | } 38 | } 39 | } 40 | 41 | 42 | pub struct BundleMap(HashMap); 43 | 44 | impl BundleMap { 45 | pub fn create() -> Self { 46 | BundleMap(Default::default()) 47 | } 48 | 49 | pub fn load>(path: P) -> Result { 50 | let mut file = BufReader::new(try!(File::open(path.as_ref()))); 51 | let mut header = [0u8; 8]; 52 | try!(file.read_exact(&mut header)); 53 | if header[..HEADER_STRING.len()] != HEADER_STRING { 54 | return Err(BundleMapError::WrongHeader); 55 | } 56 | let version = header[HEADER_STRING.len()]; 57 | if version != HEADER_VERSION { 58 | return Err(BundleMapError::WrongVersion(version)); 59 | } 60 | Ok(BundleMap(try!(msgpack::decode_from_stream(&mut file)))) 61 | } 62 | 63 | pub fn save>(&self, path: P) -> Result<(), BundleMapError> { 64 | let mut file = BufWriter::new(try!(File::create(path))); 65 | try!(file.write_all(&HEADER_STRING)); 66 | try!(file.write_all(&[HEADER_VERSION])); 67 | msgpack::encode_to_stream(&self.0, &mut file).map_err(BundleMapError::Encode) 68 | } 69 | 70 | #[inline] 71 | pub fn get(&self, id: u32) -> Option { 72 | self.0.get(&id).cloned() 73 | } 74 | 75 | #[inline] 76 | pub fn remove(&mut self, id: u32) -> Option { 77 | self.0.remove(&id) 78 | } 79 | 80 | pub fn find(&self, bundle: &BundleId) -> Option { 81 | for (id, bundle_id) in &self.0 { 82 | if bundle == bundle_id { 83 | return Some(*id); 84 | } 85 | } 86 | None 87 | } 88 | 89 | #[inline] 90 | pub fn set(&mut self, id: u32, bundle: BundleId) { 91 | self.0.insert(id, bundle); 92 | } 93 | 94 | pub fn bundles(&self) -> Vec<(u32, BundleId)> { 95 | self.0 96 | .iter() 97 | .map(|(id, bundle)| (*id, bundle.clone())) 98 | .collect() 99 | } 100 | 101 | #[inline] 102 | pub fn len(&self) -> usize { 103 | self.0.len() 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /src/repository/config.rs: -------------------------------------------------------------------------------- 1 | use prelude::*; 2 | 3 | use serde_yaml; 4 | 5 | use std::fs::File; 6 | use std::path::Path; 7 | use std::io; 8 | 9 | 10 | quick_error!{ 11 | #[derive(Debug)] 12 | pub enum ConfigError { 13 | Io(err: io::Error) { 14 | from() 15 | cause(err) 16 | } 17 | Parse(reason: &'static str) { 18 | from() 19 | description(tr!("Failed to parse config")) 20 | display("{}", tr_format!("Failed to parse config: {}", reason)) 21 | } 22 | Yaml(err: serde_yaml::Error) { 23 | from() 24 | cause(err) 25 | description(tr!("Yaml format error")) 26 | display("{}", tr_format!("Yaml format error: {}", err)) 27 | } 28 | } 29 | } 30 | 31 | 32 | impl HashMethod { 33 | fn from_yaml(yaml: &str) -> Result { 34 | HashMethod::from(yaml).map_err(ConfigError::Parse) 35 | } 36 | 37 | fn to_yaml(&self) -> String { 38 | self.name().to_string() 39 | } 40 | } 41 | 42 | 43 | struct ChunkerYaml { 44 | method: String, 45 | avg_size: usize, 46 | seed: u64 47 | } 48 | impl Default for ChunkerYaml { 49 | fn default() -> Self { 50 | ChunkerYaml { 51 | method: "fastcdc".to_string(), 52 | avg_size: 16 * 1024, 53 | seed: 0 54 | } 55 | } 56 | } 57 | serde_impl!(ChunkerYaml(String) { 58 | method: String => "method", 59 | avg_size: usize => "avg_size", 60 | seed: u64 => "seed" 61 | }); 62 | 63 | impl ChunkerType { 64 | fn from_yaml(yaml: &ChunkerYaml) -> Result { 65 | ChunkerType::from(&yaml.method, yaml.avg_size, yaml.seed).map_err(ConfigError::Parse) 66 | } 67 | 68 | fn to_yaml(&self) -> ChunkerYaml { 69 | ChunkerYaml { 70 | method: self.name().to_string(), 71 | avg_size: self.avg_size(), 72 | seed: self.seed() 73 | } 74 | } 75 | } 76 | 77 | 78 | 79 | impl Compression { 80 | #[inline] 81 | fn from_yaml(yaml: &str) -> Result { 82 | Compression::from_string(yaml).map_err(|_| ConfigError::Parse(tr!("Invalid codec"))) 83 | } 84 | 85 | #[inline] 86 | fn to_yaml(&self) -> String { 87 | self.to_string() 88 | } 89 | } 90 | 91 | 92 | impl EncryptionMethod { 93 | #[inline] 94 | fn from_yaml(yaml: &str) -> Result { 95 | EncryptionMethod::from_string(yaml).map_err(|_| ConfigError::Parse(tr!("Invalid codec"))) 96 | } 97 | 98 | #[inline] 99 | fn to_yaml(&self) -> String { 100 | self.to_string() 101 | } 102 | } 103 | 104 | 105 | struct EncryptionYaml { 106 | method: String, 107 | key: String 108 | } 109 | impl Default for EncryptionYaml { 110 | fn default() -> Self { 111 | EncryptionYaml { 112 | method: "sodium".to_string(), 113 | key: "".to_string() 114 | } 115 | } 116 | } 117 | serde_impl!(EncryptionYaml(String) { 118 | method: String => "method", 119 | key: String => "key" 120 | }); 121 | 122 | 123 | 124 | struct ConfigYaml { 125 | compression: Option, 126 | encryption: Option, 127 | bundle_size: usize, 128 | chunker: ChunkerYaml, 129 | hash: String 130 | } 131 | impl Default for ConfigYaml { 132 | fn default() -> Self { 133 | ConfigYaml { 134 | compression: Some("brotli/5".to_string()), 135 | encryption: None, 136 | bundle_size: 25 * 1024 * 1024, 137 | chunker: ChunkerYaml::default(), 138 | hash: "blake2".to_string() 139 | } 140 | } 141 | } 142 | serde_impl!(ConfigYaml(String) { 143 | compression: Option => "compression", 144 | encryption: Option => "encryption", 145 | bundle_size: usize => "bundle_size", 146 | chunker: ChunkerYaml => "chunker", 147 | hash: String => "hash" 148 | }); 149 | 150 | 151 | 152 | #[derive(Debug, Clone, Eq, PartialEq)] 153 | pub struct Config { 154 | pub compression: Option, 155 | pub encryption: Option, 156 | pub bundle_size: usize, 157 | pub chunker: ChunkerType, 158 | pub hash: HashMethod 159 | } 160 | impl Default for Config { 161 | fn default() -> Self { 162 | Config { 163 | compression: Some(Compression::from_string("brotli/3").unwrap()), 164 | encryption: None, 165 | bundle_size: 25 * 1024 * 1024, 166 | chunker: ChunkerType::from_string("fastcdc/16").unwrap(), 167 | hash: HashMethod::Blake2 168 | } 169 | } 170 | } 171 | serde_impl!(Config(u64) { 172 | compression: Option => 0, 173 | encryption: Option => 1, 174 | bundle_size: usize => 2, 175 | chunker: ChunkerType => 3, 176 | hash: HashMethod => 4 177 | }); 178 | 179 | impl Config { 180 | fn from_yaml(yaml: ConfigYaml) -> Result { 181 | let compression = if let Some(c) = yaml.compression { 182 | Some(try!(Compression::from_yaml(&c))) 183 | } else { 184 | None 185 | }; 186 | let encryption = if let Some(e) = yaml.encryption { 187 | let method = try!(EncryptionMethod::from_yaml(&e.method)); 188 | let key = try!(parse_hex(&e.key).map_err(|_| { 189 | ConfigError::Parse(tr!("Invalid public key")) 190 | })); 191 | Some((method, key.into())) 192 | } else { 193 | None 194 | }; 195 | Ok(Config { 196 | compression, 197 | encryption, 198 | bundle_size: yaml.bundle_size, 199 | chunker: try!(ChunkerType::from_yaml(&yaml.chunker)), 200 | hash: try!(HashMethod::from_yaml(&yaml.hash)) 201 | }) 202 | } 203 | 204 | fn to_yaml(&self) -> ConfigYaml { 205 | ConfigYaml { 206 | compression: self.compression.as_ref().map(|c| c.to_yaml()), 207 | encryption: self.encryption.as_ref().map(|e| { 208 | EncryptionYaml { 209 | method: e.0.to_yaml(), 210 | key: to_hex(&e.1[..]) 211 | } 212 | }), 213 | bundle_size: self.bundle_size, 214 | chunker: self.chunker.to_yaml(), 215 | hash: self.hash.to_yaml() 216 | } 217 | } 218 | 219 | pub fn load>(path: P) -> Result { 220 | let f = try!(File::open(path)); 221 | let config = try!(serde_yaml::from_reader(f)); 222 | Config::from_yaml(config) 223 | } 224 | 225 | pub fn save>(&self, path: P) -> Result<(), ConfigError> { 226 | let mut f = try!(File::create(path)); 227 | try!(serde_yaml::to_writer(&mut f, &self.to_yaml())); 228 | Ok(()) 229 | } 230 | } 231 | -------------------------------------------------------------------------------- /src/repository/error.rs: -------------------------------------------------------------------------------- 1 | use prelude::*; 2 | 3 | use std::io; 4 | use std::path::PathBuf; 5 | 6 | use super::backup_file::BackupFileError; 7 | use super::backup::BackupError; 8 | use super::bundle_map::BundleMapError; 9 | use super::config::ConfigError; 10 | use super::metadata::InodeError; 11 | 12 | 13 | quick_error!{ 14 | #[derive(Debug)] 15 | #[allow(unknown_lints,large_enum_variant)] 16 | pub enum RepositoryError { 17 | NoRemote { 18 | description(tr!("Remote storage not found")) 19 | display("{}", tr_format!("Repository error: The remote storage has not been found, may be it needs to be mounted?")) 20 | } 21 | Index(err: IndexError) { 22 | from() 23 | cause(err) 24 | description(tr!("Index error")) 25 | display("{}", tr_format!("Repository error: index error\n\tcaused by: {}", err)) 26 | } 27 | BundleDb(err: BundleDbError) { 28 | from() 29 | cause(err) 30 | description(tr!("Bundle error")) 31 | display("{}", tr_format!("Repository error: bundle db error\n\tcaused by: {}", err)) 32 | } 33 | BundleWriter(err: BundleWriterError) { 34 | from() 35 | cause(err) 36 | description(tr!("Bundle write error")) 37 | display("{}", tr_format!("Repository error: failed to write to new bundle\n\tcaused by: {}", err)) 38 | } 39 | BackupFile(err: BackupFileError) { 40 | from() 41 | cause(err) 42 | description(tr!("Backup file error")) 43 | display("{}", tr_format!("Repository error: backup file error\n\tcaused by: {}", err)) 44 | } 45 | Chunker(err: ChunkerError) { 46 | from() 47 | cause(err) 48 | description(tr!("Chunker error")) 49 | display("{}", tr_format!("Repository error: failed to chunk data\n\tcaused by: {}", err)) 50 | } 51 | Config(err: ConfigError) { 52 | from() 53 | cause(err) 54 | description(tr!("Configuration error")) 55 | display("{}", tr_format!("Repository error: configuration error\n\tcaused by: {}", err)) 56 | } 57 | Inode(err: InodeError) { 58 | from() 59 | cause(err) 60 | description(tr!("Inode error")) 61 | display("{}", tr_format!("Repository error: inode error\n\tcaused by: {}", err)) 62 | } 63 | LoadKeys(err: EncryptionError) { 64 | from() 65 | cause(err) 66 | description(tr!("Failed to load keys")) 67 | display("{}", tr_format!("Repository error: failed to load keys\n\tcaused by: {}", err)) 68 | } 69 | BundleMap(err: BundleMapError) { 70 | from() 71 | cause(err) 72 | description(tr!("Bundle map error")) 73 | display("{}", tr_format!("Repository error: bundle map error\n\tcaused by: {}", err)) 74 | } 75 | Integrity(err: IntegrityError) { 76 | from() 77 | cause(err) 78 | description(tr!("Integrity error")) 79 | display("{}", tr_format!("Repository error: integrity error\n\tcaused by: {}", err)) 80 | } 81 | Dirty { 82 | description(tr!("Dirty repository")) 83 | display("{}", tr_format!("The repository is dirty, please run a check")) 84 | } 85 | Backup(err: BackupError) { 86 | from() 87 | cause(err) 88 | description(tr!("Failed to create a backup")) 89 | display("{}", tr_format!("Repository error: failed to create backup\n\tcaused by: {}", err)) 90 | } 91 | Lock(err: LockError) { 92 | from() 93 | cause(err) 94 | description(tr!("Failed to obtain lock")) 95 | display("{}", tr_format!("Repository error: failed to obtain lock\n\tcaused by: {}", err)) 96 | } 97 | 98 | Io(err: io::Error) { 99 | from() 100 | cause(err) 101 | description(tr!("IO error")) 102 | display("{}", tr_format!("IO error: {}", err)) 103 | } 104 | NoSuchFileInBackup(backup: Backup, path: PathBuf) { 105 | description(tr!("No such file in backup")) 106 | display("{}", tr_format!("The backup does not contain the file {:?}", path)) 107 | } 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /src/repository/info.rs: -------------------------------------------------------------------------------- 1 | use prelude::*; 2 | 3 | use std::collections::{HashMap, VecDeque}; 4 | 5 | 6 | pub struct BundleAnalysis { 7 | pub info: BundleInfo, 8 | pub chunk_usage: Bitmap, 9 | pub used_raw_size: usize 10 | } 11 | 12 | impl BundleAnalysis { 13 | #[inline] 14 | pub fn get_usage_ratio(&self) -> f32 { 15 | self.used_raw_size as f32 / self.info.raw_size as f32 16 | } 17 | 18 | #[inline] 19 | pub fn get_used_size(&self) -> usize { 20 | (self.get_usage_ratio() * self.info.encoded_size as f32) as usize 21 | } 22 | 23 | #[inline] 24 | pub fn get_unused_size(&self) -> usize { 25 | ((1.0 - self.get_usage_ratio()) * self.info.encoded_size as f32) as usize 26 | } 27 | } 28 | 29 | pub struct RepositoryInfo { 30 | pub bundle_count: usize, 31 | pub encoded_data_size: u64, 32 | pub raw_data_size: u64, 33 | pub compression_ratio: f32, 34 | pub chunk_count: usize, 35 | pub avg_chunk_size: f32, 36 | pub index_size: usize, 37 | pub index_capacity: usize, 38 | pub index_entries: usize 39 | } 40 | 41 | 42 | #[derive(Debug)] 43 | pub struct RepositoryStatistics { 44 | pub index: IndexStatistics, 45 | pub bundles: BundleStatistics 46 | } 47 | 48 | 49 | impl Repository { 50 | fn mark_used( 51 | &self, 52 | bundles: &mut HashMap, 53 | chunks: &[Chunk], 54 | ) -> Result { 55 | let mut new = false; 56 | for &(hash, len) in chunks { 57 | if let Some(pos) = self.index.get(&hash) { 58 | let bundle = pos.bundle; 59 | if let Some(bundle) = bundles.get_mut(&bundle) { 60 | if !bundle.chunk_usage.get(pos.chunk as usize) { 61 | new = true; 62 | bundle.chunk_usage.set(pos.chunk as usize); 63 | bundle.used_raw_size += len as usize; 64 | } 65 | } else { 66 | return Err(IntegrityError::MissingBundleId(pos.bundle).into()); 67 | } 68 | } else { 69 | return Err(IntegrityError::MissingChunk(hash).into()); 70 | } 71 | } 72 | Ok(new) 73 | } 74 | 75 | pub fn analyze_usage(&mut self) -> Result, RepositoryError> { 76 | if self.dirty { 77 | return Err(RepositoryError::Dirty); 78 | } 79 | try!(self.set_dirty()); 80 | let mut usage = HashMap::new(); 81 | for (id, bundle) in self.bundle_map.bundles() { 82 | let bundle = try!(self.bundles.get_bundle_info(&bundle).ok_or_else(|| { 83 | IntegrityError::MissingBundle(bundle) 84 | })); 85 | usage.insert( 86 | id, 87 | BundleAnalysis { 88 | chunk_usage: Bitmap::new(bundle.info.chunk_count), 89 | info: bundle.info.clone(), 90 | used_raw_size: 0 91 | } 92 | ); 93 | } 94 | let backups = try!(self.get_all_backups()); 95 | let mut todo = VecDeque::new(); 96 | for (_name, backup) in backups { 97 | todo.push_back(backup.root); 98 | } 99 | while let Some(chunks) = todo.pop_back() { 100 | if !try!(self.mark_used(&mut usage, &chunks)) { 101 | continue; 102 | } 103 | let inode = try!(self.get_inode(&chunks)); 104 | // Mark the content chunks as used 105 | match inode.data { 106 | None | 107 | Some(FileData::Inline(_)) => (), 108 | Some(FileData::ChunkedDirect(chunks)) => { 109 | try!(self.mark_used(&mut usage, &chunks)); 110 | } 111 | Some(FileData::ChunkedIndirect(chunks)) => { 112 | if try!(self.mark_used(&mut usage, &chunks)) { 113 | let chunk_data = try!(self.get_data(&chunks)); 114 | let chunks = ChunkList::read_from(&chunk_data); 115 | try!(self.mark_used(&mut usage, &chunks)); 116 | } 117 | } 118 | } 119 | // Put children in todo 120 | if let Some(children) = inode.children { 121 | for (_name, chunks) in children { 122 | todo.push_back(chunks); 123 | } 124 | } 125 | } 126 | self.dirty = false; 127 | Ok(usage) 128 | } 129 | 130 | #[inline] 131 | pub fn list_bundles(&self) -> Vec<&BundleInfo> { 132 | self.bundles.list_bundles() 133 | } 134 | 135 | #[inline] 136 | pub fn get_bundle(&self, bundle: &BundleId) -> Option<&StoredBundle> { 137 | self.bundles.get_bundle_info(bundle) 138 | } 139 | 140 | pub fn info(&self) -> RepositoryInfo { 141 | let bundles = self.list_bundles(); 142 | let encoded_data_size = bundles.iter().map(|b| b.encoded_size as u64).sum(); 143 | let raw_data_size = bundles.iter().map(|b| b.raw_size as u64).sum(); 144 | let chunk_count = bundles.iter().map(|b| b.chunk_count).sum(); 145 | RepositoryInfo { 146 | bundle_count: bundles.len(), 147 | chunk_count, 148 | encoded_data_size, 149 | raw_data_size, 150 | compression_ratio: encoded_data_size as f32 / raw_data_size as f32, 151 | avg_chunk_size: raw_data_size as f32 / chunk_count as f32, 152 | index_size: self.index.size(), 153 | index_capacity: self.index.capacity(), 154 | index_entries: self.index.len() 155 | } 156 | } 157 | 158 | #[allow(dead_code)] 159 | pub fn statistics(&self) -> RepositoryStatistics { 160 | RepositoryStatistics { 161 | index: self.index.statistics(), 162 | bundles: self.bundles.statistics() 163 | } 164 | } 165 | } 166 | -------------------------------------------------------------------------------- /src/repository/layout.rs: -------------------------------------------------------------------------------- 1 | use prelude::*; 2 | 3 | use std::path::{Path, PathBuf}; 4 | 5 | #[derive(Clone)] 6 | pub struct RepositoryLayout(PathBuf); 7 | 8 | impl RepositoryLayout { 9 | pub fn new>(path: P) -> Self { 10 | RepositoryLayout(path.as_ref().to_path_buf()) 11 | } 12 | 13 | #[inline] 14 | pub fn base_path(&self) -> &Path { 15 | &self.0 16 | } 17 | 18 | #[inline] 19 | pub fn config_path(&self) -> PathBuf { 20 | self.0.join("config.yaml") 21 | } 22 | 23 | #[inline] 24 | pub fn excludes_path(&self) -> PathBuf { 25 | self.0.join("excludes") 26 | } 27 | 28 | #[inline] 29 | pub fn index_path(&self) -> PathBuf { 30 | self.0.join("index") 31 | } 32 | 33 | #[inline] 34 | pub fn keys_path(&self) -> PathBuf { 35 | self.0.join("keys") 36 | } 37 | 38 | #[inline] 39 | pub fn bundle_map_path(&self) -> PathBuf { 40 | self.0.join("bundles.map") 41 | } 42 | 43 | #[inline] 44 | pub fn local_locks_path(&self) -> PathBuf { 45 | self.0.join("locks") 46 | } 47 | 48 | #[inline] 49 | pub fn backups_path(&self) -> PathBuf { 50 | self.0.join("remote/backups") 51 | } 52 | 53 | #[inline] 54 | pub fn backup_path(&self, name: &str) -> PathBuf { 55 | self.backups_path().join(format!("{}.backup", name)) 56 | } 57 | 58 | #[inline] 59 | pub fn remote_path(&self) -> PathBuf { 60 | self.0.join("remote") 61 | } 62 | 63 | #[inline] 64 | pub fn remote_exists(&self) -> bool { 65 | self.remote_bundles_path().exists() && self.backups_path().exists() && 66 | self.remote_locks_path().exists() 67 | } 68 | 69 | #[inline] 70 | pub fn remote_readme_path(&self) -> PathBuf { 71 | self.0.join("remote/README.md") 72 | } 73 | 74 | #[inline] 75 | pub fn remote_locks_path(&self) -> PathBuf { 76 | self.0.join("remote/locks") 77 | } 78 | 79 | #[inline] 80 | pub fn remote_bundles_path(&self) -> PathBuf { 81 | self.0.join("remote/bundles") 82 | } 83 | 84 | #[inline] 85 | pub fn local_bundles_path(&self) -> PathBuf { 86 | self.0.join("bundles/cached") 87 | } 88 | 89 | fn bundle_path( 90 | &self, 91 | bundle: &BundleId, 92 | mut folder: PathBuf, 93 | mut count: usize, 94 | ) -> (PathBuf, PathBuf) { 95 | let file = bundle.to_string().to_owned() + ".bundle"; 96 | { 97 | let mut rest = &file as &str; 98 | while count >= 100 { 99 | if rest.len() < 10 { 100 | break; 101 | } 102 | folder = folder.join(&rest[0..2]); 103 | rest = &rest[2..]; 104 | count /= 250; 105 | } 106 | } 107 | (folder, file.into()) 108 | } 109 | 110 | #[inline] 111 | pub fn remote_bundle_path(&self, count: usize) -> (PathBuf, PathBuf) { 112 | self.bundle_path(&BundleId::random(), self.remote_bundles_path(), count) 113 | } 114 | 115 | #[inline] 116 | pub fn local_bundle_path(&self, bundle: &BundleId, count: usize) -> (PathBuf, PathBuf) { 117 | self.bundle_path(bundle, self.local_bundles_path(), count) 118 | } 119 | 120 | #[inline] 121 | pub fn temp_bundles_path(&self) -> PathBuf { 122 | self.0.join("bundles/temp") 123 | } 124 | 125 | #[inline] 126 | pub fn temp_bundle_path(&self) -> PathBuf { 127 | self.temp_bundles_path().join( 128 | BundleId::random().to_string().to_owned() + 129 | ".bundle" 130 | ) 131 | } 132 | 133 | #[inline] 134 | pub fn local_bundle_cache_path(&self) -> PathBuf { 135 | self.0.join("bundles/local.cache") 136 | } 137 | 138 | #[inline] 139 | pub fn remote_bundle_cache_path(&self) -> PathBuf { 140 | self.0.join("bundles/remote.cache") 141 | } 142 | 143 | #[inline] 144 | pub fn dirtyfile_path(&self) -> PathBuf { 145 | self.0.join("dirty") 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /src/repository/vacuum.rs: -------------------------------------------------------------------------------- 1 | use prelude::*; 2 | 3 | use std::collections::HashSet; 4 | 5 | 6 | impl Repository { 7 | fn delete_bundle(&mut self, id: u32) -> Result<(), RepositoryError> { 8 | if let Some(bundle) = self.bundle_map.remove(id) { 9 | try!(self.bundles.delete_bundle(&bundle)); 10 | Ok(()) 11 | } else { 12 | Err(IntegrityError::MissingBundleId(id).into()) 13 | } 14 | } 15 | 16 | pub fn vacuum( 17 | &mut self, 18 | ratio: f32, 19 | combine: bool, 20 | force: bool, 21 | ) -> Result<(), RepositoryError> { 22 | try!(self.flush()); 23 | tr_info!("Locking repository"); 24 | try!(self.write_mode()); 25 | let _lock = try!(self.lock(true)); 26 | // analyze_usage will set the dirty flag 27 | tr_info!("Analyzing chunk usage"); 28 | let usage = try!(self.analyze_usage()); 29 | let mut data_total = 0; 30 | let mut data_used = 0; 31 | for bundle in usage.values() { 32 | data_total += bundle.info.encoded_size; 33 | data_used += bundle.get_used_size(); 34 | } 35 | tr_info!( 36 | "Usage: {} of {}, {:.1}%", 37 | to_file_size(data_used as u64), 38 | to_file_size(data_total as u64), 39 | data_used as f32 / data_total as f32 * 100.0 40 | ); 41 | let mut rewrite_bundles = HashSet::new(); 42 | let mut reclaim_space = 0; 43 | let mut rewrite_data = 0; 44 | for (id, bundle) in &usage { 45 | if bundle.get_usage_ratio() <= ratio { 46 | rewrite_bundles.insert(*id); 47 | reclaim_space += bundle.get_unused_size(); 48 | rewrite_data += bundle.get_used_size(); 49 | } 50 | } 51 | if combine { 52 | let mut small_meta = vec![]; 53 | let mut small_data = vec![]; 54 | for (id, bundle) in &usage { 55 | if bundle.info.encoded_size * 4 < self.config.bundle_size { 56 | match bundle.info.mode { 57 | BundleMode::Meta => small_meta.push(*id), 58 | BundleMode::Data => small_data.push(*id), 59 | } 60 | } 61 | } 62 | if small_meta.len() >= 2 { 63 | for bundle in small_meta { 64 | rewrite_bundles.insert(bundle); 65 | } 66 | } 67 | if small_data.len() >= 2 { 68 | for bundle in small_data { 69 | rewrite_bundles.insert(bundle); 70 | } 71 | } 72 | } 73 | tr_info!( 74 | "Reclaiming about {} by rewriting {} bundles ({})", 75 | to_file_size(reclaim_space as u64), 76 | rewrite_bundles.len(), 77 | to_file_size(rewrite_data as u64) 78 | ); 79 | if !force { 80 | self.dirty = false; 81 | return Ok(()); 82 | } 83 | for id in ProgressIter::new( 84 | tr!("rewriting bundles"), 85 | rewrite_bundles.len(), 86 | rewrite_bundles.iter() 87 | ) 88 | { 89 | let bundle = &usage[id]; 90 | let bundle_id = self.bundle_map.get(*id).unwrap(); 91 | let chunks = try!(self.bundles.get_chunk_list(&bundle_id)); 92 | let mode = usage[id].info.mode; 93 | for (chunk, &(hash, _len)) in chunks.into_iter().enumerate() { 94 | if !bundle.chunk_usage.get(chunk) { 95 | try!(self.index.delete(&hash)); 96 | continue; 97 | } 98 | let data = try!(self.bundles.get_chunk(&bundle_id, chunk)); 99 | try!(self.put_chunk_override(mode, hash, &data)); 100 | } 101 | } 102 | try!(self.flush()); 103 | tr_info!("Checking index"); 104 | for (hash, location) in self.index.iter() { 105 | let loc_bundle = location.bundle; 106 | let loc_chunk = location.chunk; 107 | if rewrite_bundles.contains(&loc_bundle) { 108 | tr_panic!( 109 | "Removed bundle is still referenced in index: hash:{}, bundle:{}, chunk:{}", 110 | hash, 111 | loc_bundle, 112 | loc_chunk 113 | ); 114 | } 115 | } 116 | tr_info!("Deleting {} bundles", rewrite_bundles.len()); 117 | for id in rewrite_bundles { 118 | try!(self.delete_bundle(id)); 119 | } 120 | try!(self.save_bundle_map()); 121 | self.dirty = false; 122 | Ok(()) 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /src/translation.rs: -------------------------------------------------------------------------------- 1 | use std::borrow::Cow; 2 | use std::collections::HashMap; 3 | 4 | use std::cmp::max; 5 | use std::str; 6 | 7 | use std::path::{Path, PathBuf}; 8 | use std::io::Read; 9 | use std::fs::File; 10 | 11 | use locale_config::Locale; 12 | 13 | 14 | pub type CowStr = Cow<'static, str>; 15 | 16 | fn read_u32(b: &[u8], reorder: bool) -> u32 { 17 | if reorder { 18 | (u32::from(b[0]) << 24) + (u32::from(b[1]) << 16) + (u32::from(b[2]) << 8) + u32::from(b[3]) 19 | } else { 20 | (u32::from(b[3]) << 24) + (u32::from(b[2]) << 16) + (u32::from(b[1]) << 8) + u32::from(b[0]) 21 | } 22 | } 23 | 24 | struct MoFile<'a> { 25 | data: &'a [u8], 26 | count: usize, 27 | orig_pos: usize, 28 | trans_pos: usize, 29 | reorder: bool, 30 | i : usize 31 | } 32 | 33 | impl<'a> MoFile<'a> { 34 | fn new(data: &'a [u8]) -> Result { 35 | if data.len() < 20 { 36 | return Err(()); 37 | } 38 | // Magic header 39 | let magic = read_u32(&data[0..4], false); 40 | let reorder = if magic == 0x9504_12de { 41 | false 42 | } else if magic == 0xde12_0495 { 43 | true 44 | } else { 45 | return Err(()); 46 | }; 47 | // Version 48 | if read_u32(&data[4..8], reorder) != 0x0000_0000 { 49 | return Err(()); 50 | } 51 | // Translation count 52 | let count = read_u32(&data[8..12], reorder) as usize; 53 | // Original string offset 54 | let orig_pos = read_u32(&data[12..16], reorder) as usize; 55 | // Original string offset 56 | let trans_pos = read_u32(&data[16..20], reorder) as usize; 57 | if data.len() < max(orig_pos, trans_pos) + count * 8 { 58 | return Err(()); 59 | } 60 | Ok(MoFile{ 61 | data, 62 | count, 63 | orig_pos, 64 | trans_pos, 65 | reorder, 66 | i: 0 67 | }) 68 | } 69 | } 70 | 71 | impl<'a> Iterator for MoFile<'a> { 72 | type Item = (&'a str, &'a str); 73 | 74 | fn next(&mut self) -> Option { 75 | if self.i >= self.count { 76 | return None; 77 | } 78 | let length = read_u32(&self.data[self.orig_pos+self.i*8..], self.reorder) as usize; 79 | let offset = read_u32(&self.data[self.orig_pos+self.i*8+4..], self.reorder) as usize; 80 | let orig = match str::from_utf8(&self.data[offset..offset+length]) { 81 | Ok(s) => s, 82 | Err(_) => return None 83 | }; 84 | let length = read_u32(&self.data[self.trans_pos+self.i*8..], self.reorder) as usize; 85 | let offset = read_u32(&self.data[self.trans_pos+self.i*8+4..], self.reorder) as usize; 86 | let trans = match str::from_utf8(&self.data[offset..offset+length]) { 87 | Ok(s) => s, 88 | Err(_) => return None 89 | }; 90 | self.i += 1; 91 | Some((orig, trans)) 92 | } 93 | } 94 | 95 | 96 | pub struct Translation(HashMap); 97 | 98 | impl Translation { 99 | pub fn new() -> Self { 100 | Translation(Default::default()) 101 | } 102 | 103 | pub fn from_mo_data(data: &'static[u8]) -> Self { 104 | let mut translation = Translation::new(); 105 | match MoFile::new(data) { 106 | Ok(mo_file) => for (orig, trans) in mo_file { 107 | translation.set(orig, trans); 108 | } 109 | Err(_) => error!("Invalid translation data") 110 | } 111 | translation 112 | } 113 | 114 | pub fn from_mo_file(path: &Path) -> Self { 115 | let mut translation = Translation::new(); 116 | if let Ok(mut file) = File::open(&path) { 117 | let mut data = vec![]; 118 | if file.read_to_end(&mut data).is_ok() { 119 | match MoFile::new(&data) { 120 | Ok(mo_file) => for (orig, trans) in mo_file { 121 | translation.set(orig.to_string(), trans.to_string()); 122 | } 123 | Err(_) => error!("Invalid translation data") 124 | } 125 | } 126 | } 127 | translation 128 | } 129 | 130 | pub fn set, T: Into>(&mut self, orig: O, trans: T) { 131 | let trans = trans.into(); 132 | if !trans.is_empty() { 133 | self.0.insert(orig.into(), trans); 134 | } 135 | } 136 | 137 | pub fn get<'a, 'b: 'a>(&'b self, orig: &'a str) -> &'a str { 138 | self.0.get(orig).map(|s| s as &'a str).unwrap_or(orig) 139 | } 140 | } 141 | 142 | fn get_translation(locale: &str) -> Translation { 143 | if let Some(trans) = find_translation(locale) { 144 | return trans; 145 | } 146 | let country = locale.split('_').next().unwrap(); 147 | if let Some(trans) = find_translation(country) { 148 | return trans; 149 | } 150 | Translation::new() 151 | } 152 | 153 | fn find_translation(name: &str) -> Option { 154 | if EMBEDDED_TRANS.contains_key(name) { 155 | return Some(Translation::from_mo_data(EMBEDDED_TRANS[name])); 156 | } 157 | let path = PathBuf::from(format!("/usr/share/locale/{}/LC_MESSAGES/zvault.mo", name)); 158 | if path.exists() { 159 | return Some(Translation::from_mo_file(&path)); 160 | } 161 | let path = PathBuf::from(format!("lang/{}.mo", name)); 162 | if path.exists() { 163 | return Some(Translation::from_mo_file(&path)); 164 | } 165 | None 166 | } 167 | 168 | lazy_static! { 169 | pub static ref EMBEDDED_TRANS: HashMap<&'static str, &'static[u8]> = { 170 | HashMap::new() 171 | //map.insert("de", include_bytes!("../lang/de.mo") as &'static [u8]); 172 | }; 173 | pub static ref TRANS: Translation = { 174 | let locale = Locale::current(); 175 | let locale_str = locale.tags_for("").next().unwrap().as_ref().to_string(); 176 | get_translation(&locale_str) 177 | }; 178 | } 179 | 180 | #[macro_export] macro_rules! tr { 181 | ($fmt:tt) => (::translation::TRANS.get($fmt)); 182 | } 183 | 184 | #[macro_export] macro_rules! tr_format { 185 | ($fmt:tt) => (tr!($fmt)); 186 | ($fmt:tt, $($arg:tt)*) => (rt_format!(tr!($fmt), $($arg)*).expect("invalid format")); 187 | } 188 | 189 | #[macro_export] macro_rules! tr_println { 190 | ($fmt:tt) => (println!("{}", tr!($fmt))); 191 | ($fmt:tt, $($arg:tt)*) => (rt_println!(tr!($fmt), $($arg)*).expect("invalid format")); 192 | } 193 | 194 | #[macro_export] macro_rules! tr_trace { 195 | ($($arg:tt)*) => (debug!("{}", tr_format!($($arg)*))); 196 | } 197 | 198 | #[macro_export] macro_rules! tr_debug { 199 | ($($arg:tt)*) => (debug!("{}", tr_format!($($arg)*))); 200 | } 201 | 202 | #[macro_export] macro_rules! tr_info { 203 | ($($arg:tt)*) => (info!("{}", tr_format!($($arg)*))); 204 | } 205 | 206 | #[macro_export] macro_rules! tr_warn { 207 | ($($arg:tt)*) => (warn!("{}", tr_format!($($arg)*))); 208 | } 209 | 210 | #[macro_export] macro_rules! tr_error { 211 | ($($arg:tt)*) => (error!("{}", tr_format!($($arg)*))); 212 | } 213 | 214 | #[macro_export] macro_rules! tr_panic { 215 | ($($arg:tt)*) => (panic!("{}", tr_format!($($arg)*))); 216 | } 217 | -------------------------------------------------------------------------------- /src/util/bitmap.rs: -------------------------------------------------------------------------------- 1 | use std::ops::Deref; 2 | 3 | #[derive(Clone)] 4 | pub struct Bitmap { 5 | bytes: Vec 6 | } 7 | 8 | impl Bitmap { 9 | /// Creates a new bitmap 10 | pub fn new(len: usize) -> Self { 11 | let len = (len + 7) / 8; 12 | let mut bytes = Vec::with_capacity(len); 13 | bytes.resize(len, 0); 14 | Self { bytes } 15 | } 16 | 17 | /// Returns the number of bits in the bitmap 18 | #[inline] 19 | pub fn len(&self) -> usize { 20 | self.bytes.len() * 8 21 | } 22 | 23 | /// Returns whether the bitmap is empty, i.e. contains no bits 24 | #[inline] 25 | pub fn is_empty(&self) -> bool { 26 | self.len() == 0 27 | } 28 | 29 | #[inline] 30 | fn convert_index(&self, index: usize) -> (usize, u8) { 31 | (index / 8, 1u8 << (index % 8)) 32 | } 33 | 34 | #[inline] 35 | pub fn set(&mut self, index: usize) { 36 | let (byte, mask) = self.convert_index(index); 37 | self.bytes[byte] |= mask 38 | } 39 | 40 | #[inline] 41 | pub fn unset(&mut self, index: usize) { 42 | let (byte, mask) = self.convert_index(index); 43 | self.bytes[byte] &= !mask 44 | } 45 | 46 | #[inline] 47 | pub fn flip(&mut self, index: usize) { 48 | let (byte, mask) = self.convert_index(index); 49 | self.bytes[byte] ^= mask 50 | } 51 | 52 | #[inline] 53 | pub fn get(&self, index: usize) -> bool { 54 | let (byte, mask) = self.convert_index(index); 55 | self.bytes[byte] & mask != 0 56 | } 57 | 58 | #[inline] 59 | pub fn into_bytes(self) -> Vec { 60 | self.bytes 61 | } 62 | 63 | #[inline] 64 | pub fn as_bytes(&self) -> &[u8] { 65 | &self.bytes 66 | } 67 | 68 | #[inline] 69 | pub fn from_bytes(bytes: Vec) -> Self { 70 | Self { bytes } 71 | } 72 | } 73 | 74 | impl Deref for Bitmap { 75 | type Target = [u8]; 76 | 77 | #[inline] 78 | fn deref(&self) -> &[u8] { 79 | &self.bytes 80 | } 81 | } 82 | 83 | 84 | mod tests { 85 | #[allow(unused_imports)] 86 | use super::Bitmap; 87 | 88 | #[test] 89 | fn test_new() { 90 | Bitmap::new(1024); 91 | } 92 | 93 | #[test] 94 | fn test_len() { 95 | assert_eq!(Bitmap::new(1024).len(), 1024); 96 | } 97 | 98 | #[test] 99 | fn test_is_empty() { 100 | assert!(!Bitmap::new(1024).is_empty()); 101 | assert!(Bitmap::new(0).is_empty()); 102 | } 103 | 104 | #[test] 105 | fn test_set() { 106 | let mut bitmap = Bitmap::new(1024); 107 | assert!(!bitmap.get(5)); 108 | assert!(!bitmap.get(154)); 109 | bitmap.set(5); 110 | assert!(bitmap.get(5)); 111 | assert!(!bitmap.get(154)); 112 | bitmap.set(154); 113 | assert!(bitmap.get(5)); 114 | assert!(bitmap.get(154)); 115 | } 116 | 117 | #[test] 118 | fn test_unset() { 119 | let mut bitmap = Bitmap::new(1024); 120 | assert!(!bitmap.get(5)); 121 | bitmap.set(5); 122 | assert!(bitmap.get(5)); 123 | bitmap.unset(5); 124 | assert!(!bitmap.get(5)); 125 | assert!(!bitmap.get(154)); 126 | bitmap.unset(154); 127 | assert!(!bitmap.get(154)); 128 | } 129 | 130 | #[test] 131 | fn test_flip() { 132 | let mut bitmap = Bitmap::new(1024); 133 | assert!(!bitmap.get(5)); 134 | bitmap.flip(5); 135 | assert!(bitmap.get(5)); 136 | bitmap.set(154); 137 | assert!(bitmap.get(154)); 138 | bitmap.flip(154); 139 | assert!(!bitmap.get(154)); 140 | } 141 | 142 | #[test] 143 | fn test_as_bytes() { 144 | let mut bitmap = Bitmap::new(16); 145 | assert_eq!(bitmap.as_bytes(), &[0, 0]); 146 | bitmap.set(0); 147 | assert_eq!(bitmap.as_bytes(), &[1, 0]); 148 | bitmap.set(8); 149 | bitmap.set(9); 150 | assert_eq!(bitmap.as_bytes(), &[1, 3]); 151 | } 152 | 153 | #[test] 154 | fn test_into_bytes() { 155 | let mut bitmap = Bitmap::new(16); 156 | bitmap.set(0); 157 | bitmap.set(8); 158 | bitmap.set(9); 159 | assert_eq!(bitmap.as_bytes(), &bitmap.clone().into_bytes() as &[u8]); 160 | } 161 | 162 | #[test] 163 | fn test_from_bytes() { 164 | assert_eq!(&[1, 3], Bitmap::from_bytes(vec![1, 3]).as_bytes()); 165 | } 166 | 167 | } 168 | -------------------------------------------------------------------------------- /src/util/chunk.rs: -------------------------------------------------------------------------------- 1 | use std::io::{self, Write, Read, Cursor}; 2 | use std::ops::{Deref, DerefMut}; 3 | 4 | use serde::{self, Serialize, Deserialize}; 5 | use serde_bytes::{Bytes, ByteBuf}; 6 | use serde::de::Error; 7 | 8 | use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; 9 | 10 | use super::Hash; 11 | 12 | pub type Chunk = (Hash, u32); 13 | 14 | #[derive(Debug, PartialEq, Eq, Hash, Clone)] 15 | pub struct ChunkList(Vec); 16 | 17 | impl ChunkList { 18 | #[inline] 19 | pub fn new() -> Self { 20 | ChunkList(Vec::new()) 21 | } 22 | 23 | #[inline] 24 | pub fn with_capacity(num: usize) -> Self { 25 | ChunkList(Vec::with_capacity(num)) 26 | } 27 | 28 | #[inline] 29 | pub fn len(&self) -> usize { 30 | self.0.len() 31 | } 32 | 33 | #[inline] 34 | pub fn is_empty(&self) -> bool { 35 | self.0.is_empty() 36 | } 37 | 38 | #[inline] 39 | pub fn push(&mut self, chunk: Chunk) { 40 | self.0.push(chunk) 41 | } 42 | 43 | pub fn write_to(&self, dst: &mut Write) -> Result<(), io::Error> { 44 | for chunk in &self.0 { 45 | try!(chunk.0.write_to(dst)); 46 | try!(dst.write_u32::(chunk.1)); 47 | } 48 | Ok(()) 49 | } 50 | 51 | pub fn read_n_from(n: usize, src: &mut Read) -> Result { 52 | let mut chunks = Vec::with_capacity(n); 53 | for _ in 0..n { 54 | let hash = try!(Hash::read_from(src)); 55 | let len = try!(src.read_u32::()); 56 | chunks.push((hash, len)); 57 | } 58 | Ok(ChunkList(chunks)) 59 | } 60 | 61 | #[inline] 62 | pub fn read_from(src: &[u8]) -> Self { 63 | if src.len() % 20 != 0 { 64 | tr_warn!("Reading truncated chunk list"); 65 | } 66 | ChunkList::read_n_from(src.len() / 20, &mut Cursor::new(src)).unwrap() 67 | } 68 | 69 | #[inline] 70 | pub fn encoded_size(&self) -> usize { 71 | self.0.len() * 20 72 | } 73 | 74 | #[inline] 75 | pub fn into_inner(self) -> Vec { 76 | self.0 77 | } 78 | } 79 | 80 | impl Default for ChunkList { 81 | #[inline] 82 | fn default() -> Self { 83 | ChunkList(Vec::new()) 84 | } 85 | } 86 | 87 | impl From> for ChunkList { 88 | fn from(val: Vec) -> Self { 89 | ChunkList(val) 90 | } 91 | } 92 | 93 | impl Into> for ChunkList { 94 | fn into(self) -> Vec { 95 | self.0 96 | } 97 | } 98 | 99 | impl Deref for ChunkList { 100 | type Target = [Chunk]; 101 | fn deref(&self) -> &[Chunk] { 102 | &self.0 103 | } 104 | } 105 | 106 | impl DerefMut for ChunkList { 107 | fn deref_mut(&mut self) -> &mut [Chunk] { 108 | &mut self.0 109 | } 110 | } 111 | 112 | impl Serialize for ChunkList { 113 | #[inline] 114 | fn serialize(&self, serializer: S) -> Result 115 | where 116 | S: serde::Serializer, 117 | { 118 | let mut buf = Vec::with_capacity(self.encoded_size()); 119 | self.write_to(&mut buf).unwrap(); 120 | Bytes::from(&buf as &[u8]).serialize(serializer) 121 | } 122 | } 123 | 124 | impl<'a> Deserialize<'a> for ChunkList { 125 | #[inline] 126 | fn deserialize(deserializer: D) -> Result 127 | where 128 | D: serde::Deserializer<'a>, 129 | { 130 | let data: Vec = try!(ByteBuf::deserialize(deserializer)).into(); 131 | if data.len() % 20 != 0 { 132 | return Err(D::Error::custom(tr!("Invalid chunk list length"))); 133 | } 134 | Ok( 135 | ChunkList::read_n_from(data.len() / 20, &mut Cursor::new(data)).unwrap() 136 | ) 137 | } 138 | } 139 | 140 | 141 | 142 | mod tests { 143 | 144 | #[allow(unused_imports)] 145 | use super::ChunkList; 146 | 147 | #[allow(unused_imports)] 148 | use super::super::Hash; 149 | 150 | #[allow(unused_imports)] 151 | use super::super::msgpack; 152 | 153 | #[test] 154 | fn test_new() { 155 | ChunkList::new(); 156 | } 157 | 158 | #[test] 159 | fn test_with_capacity() { 160 | ChunkList::with_capacity(0); 161 | ChunkList::with_capacity(1024); 162 | } 163 | 164 | #[test] 165 | fn test_push() { 166 | let mut list = ChunkList::new(); 167 | assert!(list.is_empty()); 168 | assert_eq!(list.len(), 0); 169 | list.push((Hash::default(), 0)); 170 | assert!(!list.is_empty()); 171 | assert_eq!(list.len(), 1); 172 | list.push((Hash::default(), 1)); 173 | assert!(!list.is_empty()); 174 | assert_eq!(list.len(), 2); 175 | } 176 | 177 | #[test] 178 | fn test_into_inner() { 179 | let mut list = ChunkList::new(); 180 | list.push((Hash::default(), 0)); 181 | list.push((Hash::default(), 1)); 182 | assert_eq!( 183 | list.into_inner(), 184 | vec![(Hash::default(), 0), (Hash::default(), 1)] 185 | ); 186 | } 187 | 188 | #[test] 189 | fn test_write_to() { 190 | let mut list = ChunkList::new(); 191 | list.push((Hash::default(), 0)); 192 | list.push((Hash::default(), 1)); 193 | let mut buf = Vec::new(); 194 | assert!(list.write_to(&mut buf).is_ok()); 195 | assert_eq!(buf.len(), 40); 196 | assert_eq!(&buf[16..20], &[0, 0, 0, 0]); 197 | assert_eq!(&buf[36..40], &[1, 0, 0, 0]); 198 | } 199 | 200 | #[test] 201 | fn test_encoded_size() { 202 | let mut list = ChunkList::new(); 203 | list.push((Hash::default(), 0)); 204 | list.push((Hash::default(), 1)); 205 | assert_eq!(list.encoded_size(), 40); 206 | } 207 | 208 | #[test] 209 | fn test_read_from() { 210 | let data = vec![ 211 | 0, 212 | 0, 213 | 0, 214 | 0, 215 | 0, 216 | 0, 217 | 0, 218 | 0, 219 | 0, 220 | 0, 221 | 0, 222 | 0, 223 | 0, 224 | 0, 225 | 0, 226 | 0, 227 | 0, 228 | 0, 229 | 0, 230 | 0, 231 | 0, 232 | 0, 233 | 0, 234 | 0, 235 | 0, 236 | 0, 237 | 0, 238 | 0, 239 | 0, 240 | 0, 241 | 0, 242 | 0, 243 | 0, 244 | 0, 245 | 0, 246 | 0, 247 | 1, 248 | 0, 249 | 0, 250 | 0, 251 | ]; 252 | let list = ChunkList::read_from(&data); 253 | assert_eq!(list.len(), 2); 254 | assert_eq!(list[0], (Hash::default(), 0)); 255 | assert_eq!(list[1], (Hash::default(), 1)); 256 | } 257 | 258 | #[test] 259 | fn test_serialize() { 260 | let mut list = ChunkList::new(); 261 | list.push((Hash::default(), 0)); 262 | list.push((Hash::default(), 1)); 263 | let mut buf = Vec::new(); 264 | assert!(list.write_to(&mut buf).is_ok()); 265 | let encoded = msgpack::encode(&list).unwrap(); 266 | assert_eq!(buf, &encoded[2..]); 267 | assert_eq!(&[196, 40], &encoded[..2]); 268 | } 269 | 270 | #[test] 271 | fn test_deserialize() { 272 | let mut list = ChunkList::new(); 273 | list.push((Hash::default(), 0)); 274 | list.push((Hash::default(), 1)); 275 | let mut buf = vec![196, 40]; 276 | assert!(list.write_to(&mut buf).is_ok()); 277 | assert!(msgpack::decode::(&buf).is_ok()); 278 | assert_eq!(msgpack::decode::(&buf).unwrap(), list); 279 | } 280 | 281 | } 282 | -------------------------------------------------------------------------------- /src/util/cli.rs: -------------------------------------------------------------------------------- 1 | use pbr; 2 | use std::io::Stdout; 3 | use std::time::Duration; 4 | 5 | pub fn to_file_size(size: u64) -> String { 6 | let mut size = size as f32; 7 | if size >= 512.0 { 8 | size /= 1024.0; 9 | } else { 10 | return format!("{:.0} Byte", size); 11 | } 12 | if size >= 512.0 { 13 | size /= 1024.0; 14 | } else { 15 | return format!("{:.1} KiB", size); 16 | } 17 | if size >= 512.0 { 18 | size /= 1024.0; 19 | } else { 20 | return format!("{:.1} MiB", size); 21 | } 22 | if size >= 512.0 { 23 | size /= 1024.0; 24 | } else { 25 | return format!("{:.1} GiB", size); 26 | } 27 | format!("{:.1} TiB", size) 28 | } 29 | 30 | pub fn to_speed(size: u64, dur: f32) -> String { 31 | let speed = (size as f32 / dur) as u64; 32 | to_file_size(speed) + "/s" 33 | } 34 | 35 | pub fn to_duration(dur: f32) -> String { 36 | let secs = dur.floor() as u64; 37 | let subsecs = dur - dur.floor(); 38 | let hours = secs / 3600; 39 | let mins = (secs / 60) % 60; 40 | let secs = (secs % 60) as f32 + subsecs; 41 | format!("{}:{:02}:{:04.1}", hours, mins, secs) 42 | } 43 | 44 | 45 | pub struct ProgressIter { 46 | inner: T, 47 | msg: String, 48 | bar: pbr::ProgressBar 49 | } 50 | 51 | impl ProgressIter { 52 | #[allow(blacklisted_name)] 53 | pub fn new(msg: &str, max: usize, inner: T) -> Self { 54 | let mut bar = pbr::ProgressBar::new(max as u64); 55 | let msg = format!("{}: ", msg); 56 | bar.message(&msg); 57 | bar.set_max_refresh_rate(Some(Duration::from_millis(100))); 58 | ProgressIter { 59 | inner, 60 | bar, 61 | msg 62 | } 63 | } 64 | } 65 | 66 | impl Iterator for ProgressIter { 67 | type Item = T::Item; 68 | 69 | fn size_hint(&self) -> (usize, Option) { 70 | self.inner.size_hint() 71 | } 72 | 73 | fn next(&mut self) -> Option { 74 | match self.inner.next() { 75 | None => { 76 | let msg = self.msg.clone() + tr!("done."); 77 | self.bar.finish_print(&msg); 78 | None 79 | } 80 | Some(item) => { 81 | self.bar.inc(); 82 | Some(item) 83 | } 84 | } 85 | } 86 | } 87 | 88 | 89 | mod tests { 90 | 91 | #[allow(unused_imports)] 92 | use super::*; 93 | 94 | #[test] 95 | fn test_to_file_size() { 96 | assert_eq!("0 Byte", to_file_size(0)); 97 | assert_eq!("1 Byte", to_file_size(1)); 98 | assert_eq!("15 Byte", to_file_size(15)); 99 | assert_eq!("456 Byte", to_file_size(456)); 100 | assert_eq!("0.7 KiB", to_file_size(670)); 101 | assert_eq!("237.0 KiB", to_file_size(242670)); 102 | assert_eq!("442.5 KiB", to_file_size(453170)); 103 | assert_eq!("0.7 MiB", to_file_size(753170)); 104 | assert_eq!("12.2 MiB", to_file_size(12753170)); 105 | assert_eq!("222.0 MiB", to_file_size(232753170)); 106 | assert_eq!("5.1 GiB", to_file_size(5435353170)); 107 | assert_eq!("291.1 GiB", to_file_size(312534553170)); 108 | assert_eq!("3.9 TiB", to_file_size(4312534553170)); 109 | } 110 | 111 | #[test] 112 | fn test_to_speed() { 113 | assert_eq!("0 Byte/s", to_speed(0, 1.0)); 114 | assert_eq!("100 Byte/s", to_speed(100, 1.0)); 115 | assert_eq!("1.0 KiB/s", to_speed(100, 0.1)); 116 | assert_eq!("10 Byte/s", to_speed(100, 10.0)); 117 | assert_eq!("237.0 KiB/s", to_speed(242670, 1.0)); 118 | assert_eq!("0.7 MiB/s", to_speed(753170, 1.0)); 119 | assert_eq!("222.0 MiB/s", to_speed(232753170, 1.0)); 120 | assert_eq!("291.1 GiB/s", to_speed(312534553170, 1.0)); 121 | assert_eq!("3.9 TiB/s", to_speed(4312534553170, 1.0)); 122 | } 123 | 124 | #[test] 125 | fn test_to_duration() { 126 | assert_eq!("0:00:00.0", to_duration(0.0)); 127 | assert_eq!("0:00:00.1", to_duration(0.1)); 128 | assert_eq!("0:00:01.0", to_duration(1.0)); 129 | assert_eq!("0:01:00.0", to_duration(60.0)); 130 | assert_eq!("1:00:00.0", to_duration(3600.0)); 131 | assert_eq!("2:02:02.2", to_duration(7322.2)); 132 | } 133 | 134 | 135 | } 136 | -------------------------------------------------------------------------------- /src/util/fs.rs: -------------------------------------------------------------------------------- 1 | mod linux { 2 | use libc; 3 | 4 | use std::path::Path; 5 | use std::io; 6 | use std::ffi::CString; 7 | use std::os::unix::ffi::OsStringExt; 8 | 9 | #[inline] 10 | pub fn chown>( 11 | path: P, 12 | uid: libc::uid_t, 13 | gid: libc::gid_t, 14 | ) -> Result<(), io::Error> { 15 | let path = CString::new(path.as_ref().to_path_buf().into_os_string().into_vec()).unwrap(); 16 | let result = unsafe { libc::lchown((&path).as_ptr(), uid, gid) }; 17 | match result { 18 | 0 => Ok(()), 19 | -1 => Err(io::Error::last_os_error()), 20 | _ => unreachable!(), 21 | } 22 | } 23 | } 24 | 25 | pub use self::linux::*; 26 | 27 | // Not testing since this requires root 28 | -------------------------------------------------------------------------------- /src/util/hash.rs: -------------------------------------------------------------------------------- 1 | use serde::{self, Serialize, Deserialize}; 2 | use serde::de::Error; 3 | use serde_bytes::{ByteBuf, Bytes}; 4 | 5 | use murmurhash3::murmurhash3_x64_128; 6 | use blake2::blake2b::blake2b; 7 | use byteorder::{LittleEndian, ByteOrder, WriteBytesExt, ReadBytesExt}; 8 | 9 | use std::mem; 10 | use std::fmt; 11 | use std::u64; 12 | use std::io::{self, Read, Write}; 13 | 14 | 15 | #[derive(Clone, Copy, PartialEq, Hash, Eq, Default, Ord, PartialOrd)] 16 | pub struct Hash { 17 | pub high: u64, 18 | pub low: u64 19 | } 20 | 21 | impl Hash { 22 | #[inline] 23 | pub fn hash(&self) -> u64 { 24 | self.low 25 | } 26 | 27 | #[inline] 28 | pub fn empty() -> Self { 29 | Hash { high: 0, low: 0 } 30 | } 31 | 32 | #[inline] 33 | pub fn to_string(&self) -> String { 34 | format!("{:016x}{:016x}", self.high, self.low) 35 | } 36 | 37 | #[inline] 38 | pub fn write_to(&self, dst: &mut Write) -> Result<(), io::Error> { 39 | try!(dst.write_u64::(self.high)); 40 | dst.write_u64::(self.low) 41 | } 42 | 43 | #[inline] 44 | pub fn read_from(src: &mut Read) -> Result { 45 | let high = try!(src.read_u64::()); 46 | let low = try!(src.read_u64::()); 47 | Ok(Hash { 48 | high, 49 | low 50 | }) 51 | } 52 | 53 | #[inline] 54 | pub fn from_string(val: &str) -> Result { 55 | let high = try!(u64::from_str_radix(&val[..16], 16).map_err(|_| ())); 56 | let low = try!(u64::from_str_radix(&val[16..], 16).map_err(|_| ())); 57 | Ok(Self { 58 | high, 59 | low 60 | }) 61 | } 62 | } 63 | 64 | impl fmt::Display for Hash { 65 | #[inline] 66 | fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> { 67 | write!(fmt, "{:016x}{:016x}", self.high, self.low) 68 | } 69 | } 70 | 71 | impl fmt::Debug for Hash { 72 | #[inline] 73 | fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> { 74 | write!(fmt, "{:016x}{:016x}", self.high, self.low) 75 | } 76 | } 77 | 78 | 79 | impl Serialize for Hash { 80 | fn serialize(&self, serializer: S) -> Result 81 | where 82 | S: serde::Serializer, 83 | { 84 | let mut dat = [0u8; 16]; 85 | LittleEndian::write_u64(&mut dat[..8], self.high); 86 | LittleEndian::write_u64(&mut dat[8..], self.low); 87 | Bytes::from(&dat as &[u8]).serialize(serializer) 88 | } 89 | } 90 | 91 | impl<'a> Deserialize<'a> for Hash { 92 | fn deserialize(deserializer: D) -> Result 93 | where 94 | D: serde::Deserializer<'a>, 95 | { 96 | let dat: Vec = try!(ByteBuf::deserialize(deserializer)).into(); 97 | if dat.len() != 16 { 98 | return Err(D::Error::custom(tr!("Invalid key length"))); 99 | } 100 | Ok(Hash { 101 | high: LittleEndian::read_u64(&dat[..8]), 102 | low: LittleEndian::read_u64(&dat[8..]) 103 | }) 104 | } 105 | } 106 | 107 | 108 | #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] 109 | pub enum HashMethod { 110 | Blake2, 111 | Murmur3 112 | } 113 | serde_impl!(HashMethod(u64) { 114 | Blake2 => 1, 115 | Murmur3 => 2 116 | }); 117 | 118 | 119 | impl HashMethod { 120 | #[inline] 121 | pub fn hash(&self, data: &[u8]) -> Hash { 122 | match *self { 123 | HashMethod::Blake2 => { 124 | let hash = blake2b(16, &[], data); 125 | let hash = 126 | unsafe { &*mem::transmute::<_, *const (u64, u64)>(hash.as_bytes().as_ptr()) }; 127 | Hash { 128 | high: u64::from_be(hash.0), 129 | low: u64::from_be(hash.1) 130 | } 131 | } 132 | HashMethod::Murmur3 => { 133 | let (a, b) = murmurhash3_x64_128(data, 0); 134 | Hash { high: a, low: b } 135 | } 136 | } 137 | } 138 | 139 | #[inline] 140 | pub fn from(name: &str) -> Result { 141 | match name { 142 | "blake2" => Ok(HashMethod::Blake2), 143 | "murmur3" => Ok(HashMethod::Murmur3), 144 | _ => Err(tr!("Unsupported hash method")), 145 | } 146 | } 147 | 148 | #[inline] 149 | pub fn name(&self) -> &'static str { 150 | match *self { 151 | HashMethod::Blake2 => "blake2", 152 | HashMethod::Murmur3 => "murmur3", 153 | } 154 | } 155 | } 156 | 157 | 158 | 159 | mod tests { 160 | 161 | #[allow(unused_imports)] 162 | use super::*; 163 | 164 | 165 | #[test] 166 | fn test_parse() { 167 | assert_eq!(HashMethod::from("blake2"), Ok(HashMethod::Blake2)); 168 | assert_eq!(HashMethod::from("murmur3"), Ok(HashMethod::Murmur3)); 169 | assert!(HashMethod::from("foo").is_err()); 170 | } 171 | 172 | #[test] 173 | fn test_to_str() { 174 | assert_eq!(HashMethod::Blake2.name(), "blake2"); 175 | assert_eq!(HashMethod::Murmur3.name(), "murmur3"); 176 | } 177 | 178 | #[test] 179 | fn test_blake2() { 180 | assert_eq!( 181 | HashMethod::Blake2.hash(b"abc"), 182 | Hash { 183 | high: 0xcf4ab791c62b8d2b, 184 | low: 0x2109c90275287816 185 | } 186 | ); 187 | } 188 | 189 | #[test] 190 | fn test_murmur3() { 191 | assert_eq!( 192 | HashMethod::Murmur3.hash(b"123"), 193 | Hash { 194 | high: 10978418110857903978, 195 | low: 4791445053355511657 196 | } 197 | ); 198 | } 199 | 200 | } 201 | 202 | 203 | 204 | #[cfg(feature = "bench")] 205 | mod benches { 206 | 207 | #[allow(unused_imports)] 208 | use super::*; 209 | 210 | use test::Bencher; 211 | 212 | 213 | #[allow(dead_code, needless_range_loop)] 214 | fn test_data(n: usize) -> Vec { 215 | let mut input = vec![0; n]; 216 | for i in 0..input.len() { 217 | input[i] = (i * i * i) as u8; 218 | } 219 | input 220 | } 221 | 222 | #[bench] 223 | fn bench_blake2(b: &mut Bencher) { 224 | let data = test_data(16 * 1024); 225 | b.bytes = data.len() as u64; 226 | b.iter(|| HashMethod::Blake2.hash(&data)); 227 | } 228 | 229 | #[bench] 230 | fn bench_murmur3(b: &mut Bencher) { 231 | let data = test_data(16 * 1024); 232 | b.bytes = data.len() as u64; 233 | b.iter(|| HashMethod::Murmur3.hash(&data)); 234 | } 235 | 236 | } 237 | -------------------------------------------------------------------------------- /src/util/hex.rs: -------------------------------------------------------------------------------- 1 | pub fn to_hex(data: &[u8]) -> String { 2 | data.iter() 3 | .map(|b| format!("{:02x}", b)) 4 | .collect::>() 5 | .join("") 6 | } 7 | 8 | pub fn parse_hex(hex: &str) -> Result, ()> { 9 | let mut b = Vec::with_capacity(hex.len() / 2); 10 | let mut modulus = 0; 11 | let mut buf = 0; 12 | for (_, byte) in hex.bytes().enumerate() { 13 | buf <<= 4; 14 | match byte { 15 | b'A'...b'F' => buf |= byte - b'A' + 10, 16 | b'a'...b'f' => buf |= byte - b'a' + 10, 17 | b'0'...b'9' => buf |= byte - b'0', 18 | b' ' | b'\r' | b'\n' | b'\t' => { 19 | buf >>= 4; 20 | continue; 21 | } 22 | _ => return Err(()), 23 | } 24 | modulus += 1; 25 | if modulus == 2 { 26 | modulus = 0; 27 | b.push(buf); 28 | } 29 | } 30 | match modulus { 31 | 0 => Ok(b.into_iter().collect()), 32 | _ => Err(()), 33 | } 34 | } 35 | 36 | 37 | 38 | mod tests { 39 | 40 | #[allow(unused_imports)] 41 | use super::*; 42 | 43 | 44 | #[test] 45 | fn test_to_hex() { 46 | assert_eq!(to_hex(&[0]), "00"); 47 | assert_eq!(to_hex(&[1]), "01"); 48 | assert_eq!(to_hex(&[15]), "0f"); 49 | assert_eq!(to_hex(&[16]), "10"); 50 | assert_eq!(to_hex(&[255]), "ff"); 51 | assert_eq!(to_hex(&[5, 255]), "05ff"); 52 | } 53 | 54 | #[test] 55 | fn test_parse_hex() { 56 | assert_eq!(parse_hex("00"), Ok(vec![0])); 57 | assert_eq!(parse_hex("01"), Ok(vec![1])); 58 | assert_eq!(parse_hex("0f"), Ok(vec![15])); 59 | assert_eq!(parse_hex("0fff"), Ok(vec![15, 255])); 60 | assert_eq!(parse_hex("0F"), Ok(vec![15])); 61 | assert_eq!(parse_hex("01 02\n03\t04"), Ok(vec![1, 2, 3, 4])); 62 | } 63 | 64 | } 65 | -------------------------------------------------------------------------------- /src/util/hostname.rs: -------------------------------------------------------------------------------- 1 | use libc; 2 | use std::ffi; 3 | 4 | extern "C" { 5 | fn gethostname(name: *mut libc::c_char, size: libc::size_t) -> libc::c_int; 6 | } 7 | 8 | pub fn get_hostname() -> Result { 9 | let mut buf = Vec::with_capacity(255); 10 | buf.resize(255, 0u8); 11 | if unsafe { 12 | gethostname( 13 | buf.as_mut_ptr() as *mut libc::c_char, 14 | buf.len() as libc::size_t 15 | ) 16 | } == 0 17 | { 18 | buf[254] = 0; //enforce null-termination 19 | let name = unsafe { ffi::CStr::from_ptr(buf.as_ptr() as *const libc::c_char) }; 20 | name.to_str().map(|s| s.to_string()).map_err(|_| ()) 21 | } else { 22 | Err(()) 23 | } 24 | } 25 | 26 | 27 | 28 | mod tests { 29 | 30 | #[allow(unused_imports)] 31 | use super::*; 32 | 33 | 34 | #[test] 35 | fn test_gethostname() { 36 | let res = get_hostname(); 37 | assert!(res.is_ok()); 38 | let name = res.unwrap(); 39 | assert!(name.len() >= 1); 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /src/util/lock.rs: -------------------------------------------------------------------------------- 1 | use prelude::*; 2 | 3 | use serde_yaml; 4 | use chrono::prelude::*; 5 | use libc; 6 | 7 | use std::path::{Path, PathBuf}; 8 | use std::io; 9 | use std::fs::{self, File}; 10 | 11 | 12 | quick_error!{ 13 | #[derive(Debug)] 14 | pub enum LockError { 15 | Io(err: io::Error) { 16 | from() 17 | cause(err) 18 | description(tr!("IO error")) 19 | display("{}", tr_format!("Lock error: IO error\n\tcaused by: {}", err)) 20 | } 21 | Yaml(err: serde_yaml::Error) { 22 | from() 23 | cause(err) 24 | description(tr!("Yaml format error")) 25 | display("{}", tr_format!("Lock error: yaml format error\n\tcaused by: {}", err)) 26 | } 27 | InvalidLockState(reason: &'static str) { 28 | description(tr!("Invalid lock state")) 29 | display("{}", tr_format!("Lock error: invalid lock state: {}", reason)) 30 | } 31 | Locked { 32 | description(tr!("Locked")) 33 | display("{}", tr_format!("Lock error: locked")) 34 | } 35 | } 36 | } 37 | 38 | 39 | #[derive(Debug, Clone, Default, Eq, PartialEq)] 40 | pub struct LockFile { 41 | pub hostname: String, 42 | pub processid: usize, 43 | pub date: i64, 44 | pub exclusive: bool 45 | } 46 | serde_impl!(LockFile(String) { 47 | hostname: String => "hostname", 48 | processid: usize => "processid", 49 | date: i64 => "date", 50 | exclusive: bool => "exclusive" 51 | }); 52 | 53 | impl LockFile { 54 | pub fn load>(path: P) -> Result { 55 | let f = try!(File::open(path)); 56 | Ok(try!(serde_yaml::from_reader(f))) 57 | } 58 | 59 | pub fn save>(&self, path: P) -> Result<(), LockError> { 60 | let mut f = try!(File::create(path)); 61 | try!(serde_yaml::to_writer(&mut f, &self)); 62 | Ok(()) 63 | } 64 | } 65 | 66 | #[derive(Debug, Clone, Copy, Hash, Eq, PartialEq)] 67 | pub enum LockLevel { 68 | Free, 69 | Shared, 70 | Exclusive 71 | } 72 | 73 | 74 | pub struct LockHandle { 75 | lock: LockFile, 76 | path: PathBuf 77 | } 78 | 79 | impl LockHandle { 80 | pub fn release(&self) -> Result<(), LockError> { 81 | if self.path.exists() { 82 | try!(fs::remove_file(&self.path)) 83 | } 84 | Ok(()) 85 | } 86 | 87 | pub fn refresh(&self) -> Result<(), LockError> { 88 | let mut file = try!(LockFile::load(&self.path)); 89 | file.date = Utc::now().timestamp(); 90 | file.save(&self.path) 91 | } 92 | } 93 | 94 | impl Drop for LockHandle { 95 | fn drop(&mut self) { 96 | self.release().unwrap() 97 | } 98 | } 99 | 100 | 101 | 102 | pub struct LockFolder { 103 | path: PathBuf 104 | } 105 | 106 | impl LockFolder { 107 | pub fn new>(path: P) -> Self { 108 | LockFolder { path: path.as_ref().to_path_buf() } 109 | } 110 | 111 | fn get_locks(&self) -> Result, LockError> { 112 | let mut locks = vec![]; 113 | for entry in try!(fs::read_dir(&self.path)) { 114 | let entry = try!(entry); 115 | locks.push(try!(LockFile::load(entry.path()))); 116 | } 117 | Ok(locks) 118 | } 119 | 120 | pub fn get_lock_level(&self) -> Result { 121 | let mut level = LockLevel::Free; 122 | for lock in try!(self.get_locks()) { 123 | if lock.exclusive { 124 | if level == LockLevel::Exclusive { 125 | return Err(LockError::InvalidLockState(tr!("multiple exclusive locks"))); 126 | } else { 127 | level = LockLevel::Exclusive 128 | } 129 | } else if level == LockLevel::Exclusive { 130 | return Err(LockError::InvalidLockState( 131 | tr!("exclusive lock and shared locks") 132 | )); 133 | } else { 134 | level = LockLevel::Shared 135 | } 136 | } 137 | Ok(level) 138 | } 139 | 140 | pub fn lock(&self, exclusive: bool) -> Result { 141 | let level = try!(self.get_lock_level()); 142 | if level == LockLevel::Exclusive || level == LockLevel::Shared && exclusive { 143 | return Err(LockError::Locked); 144 | } 145 | let lockfile = LockFile { 146 | hostname: get_hostname().unwrap(), 147 | processid: unsafe { libc::getpid() } as usize, 148 | date: Utc::now().timestamp(), 149 | exclusive 150 | }; 151 | let path = self.path.join(format!( 152 | "{}-{}.lock", 153 | &lockfile.hostname, 154 | lockfile.processid 155 | )); 156 | try!(lockfile.save(&path)); 157 | let handle = LockHandle { 158 | lock: lockfile, 159 | path 160 | }; 161 | if self.get_lock_level().is_err() { 162 | try!(handle.release()); 163 | return Err(LockError::Locked); 164 | } 165 | Ok(handle) 166 | } 167 | 168 | pub fn upgrade(&self, lock: &mut LockHandle) -> Result<(), LockError> { 169 | let lockfile = &mut lock.lock; 170 | if lockfile.exclusive { 171 | return Ok(()); 172 | } 173 | let level = try!(self.get_lock_level()); 174 | if level == LockLevel::Exclusive { 175 | return Err(LockError::Locked); 176 | } 177 | lockfile.exclusive = true; 178 | let path = self.path.join(format!( 179 | "{}-{}.lock", 180 | &lockfile.hostname, 181 | lockfile.processid 182 | )); 183 | try!(lockfile.save(&path)); 184 | if self.get_lock_level().is_err() { 185 | lockfile.exclusive = false; 186 | try!(lockfile.save(&path)); 187 | return Err(LockError::Locked); 188 | } 189 | Ok(()) 190 | } 191 | 192 | pub fn downgrade(&self, lock: &mut LockHandle) -> Result<(), LockError> { 193 | let lockfile = &mut lock.lock; 194 | if !lockfile.exclusive { 195 | return Ok(()); 196 | } 197 | lockfile.exclusive = false; 198 | let path = self.path.join(format!( 199 | "{}-{}.lock", 200 | &lockfile.hostname, 201 | lockfile.processid 202 | )); 203 | lockfile.save(&path) 204 | } 205 | } 206 | -------------------------------------------------------------------------------- /src/util/lru_cache.rs: -------------------------------------------------------------------------------- 1 | 2 | use std::hash::Hash; 3 | use std::collections::HashMap; 4 | 5 | pub struct LruCache { 6 | items: HashMap, 7 | min_size: usize, 8 | max_size: usize, 9 | next: u64 10 | } 11 | 12 | 13 | impl LruCache { 14 | #[inline] 15 | pub fn new(min_size: usize, max_size: usize) -> Self { 16 | LruCache { 17 | items: HashMap::default(), 18 | min_size, 19 | max_size, 20 | next: 0 21 | } 22 | } 23 | 24 | #[inline] 25 | pub fn put(&mut self, key: K, value: V) { 26 | self.items.insert(key, (value, self.next)); 27 | self.next += 1; 28 | if self.items.len() > self.max_size { 29 | self.shrink() 30 | } 31 | } 32 | 33 | #[inline] 34 | pub fn get(&mut self, key: &K) -> Option<&V> { 35 | if let Some(&mut (ref item, ref mut n)) = self.items.get_mut(key) { 36 | *n = self.next; 37 | self.next += 1; 38 | Some(item) 39 | } else { 40 | None 41 | } 42 | } 43 | 44 | #[inline] 45 | pub fn get_mut(&mut self, key: &K) -> Option<&mut V> { 46 | if let Some(&mut (ref mut item, ref mut n)) = self.items.get_mut(key) { 47 | *n = self.next; 48 | self.next += 1; 49 | Some(item) 50 | } else { 51 | None 52 | } 53 | } 54 | 55 | fn shrink(&mut self) { 56 | let mut tags: Vec = self.items.values().map(|&(_, n)| n).collect(); 57 | tags.sort(); 58 | let min = tags[tags.len() - self.min_size]; 59 | let mut new = HashMap::with_capacity(self.min_size); 60 | new.extend(self.items.drain().filter(|&(_, (_, n))| n >= min)); 61 | self.items = new; 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/util/mod.rs: -------------------------------------------------------------------------------- 1 | mod compression; 2 | mod encryption; 3 | mod hash; 4 | mod lru_cache; 5 | mod chunk; 6 | mod bitmap; 7 | mod hex; 8 | mod cli; 9 | mod hostname; 10 | mod fs; 11 | mod lock; 12 | mod statistics; 13 | pub mod msgpack; 14 | 15 | pub use self::fs::*; 16 | pub use self::chunk::*; 17 | pub use self::compression::*; 18 | pub use self::encryption::*; 19 | pub use self::hash::*; 20 | pub use self::lru_cache::*; 21 | pub use self::bitmap::*; 22 | pub use self::hex::*; 23 | pub use self::cli::*; 24 | pub use self::hostname::*; 25 | pub use self::lock::*; 26 | pub use self::statistics::*; -------------------------------------------------------------------------------- /src/util/msgpack.rs: -------------------------------------------------------------------------------- 1 | use rmp_serde; 2 | use serde::{Serialize, Deserialize}; 3 | 4 | use std::io::{Write, Read, Cursor}; 5 | 6 | pub use serde_bytes::ByteBuf as Bytes; 7 | pub use rmp_serde::decode::Error as DecodeError; 8 | pub use rmp_serde::encode::Error as EncodeError; 9 | 10 | 11 | #[inline] 12 | pub fn encode(t: &T) -> Result, EncodeError> { 13 | let mut data = Vec::new(); 14 | { 15 | let mut writer = rmp_serde::Serializer::new(&mut data); 16 | try!(t.serialize(&mut writer)); 17 | } 18 | Ok(data) 19 | } 20 | 21 | #[inline] 22 | pub fn encode_to_stream(t: &T, w: &mut Write) -> Result<(), EncodeError> { 23 | let mut writer = rmp_serde::Serializer::new(w); 24 | t.serialize(&mut writer) 25 | } 26 | 27 | #[inline] 28 | pub fn decode<'a, T: Deserialize<'a>>(data: &[u8]) -> Result { 29 | let data = Cursor::new(data); 30 | let mut reader = rmp_serde::Deserializer::new(data); 31 | T::deserialize(&mut reader) 32 | } 33 | 34 | #[inline] 35 | pub fn decode_from_stream<'a, T: Deserialize<'a>>(r: &mut Read) -> Result { 36 | let mut reader = rmp_serde::Deserializer::new(r); 37 | T::deserialize(&mut reader) 38 | } 39 | -------------------------------------------------------------------------------- /src/util/statistics.rs: -------------------------------------------------------------------------------- 1 | 2 | 3 | #[derive(Debug, Default)] 4 | pub struct ValueStats { 5 | pub min: f32, 6 | pub max: f32, 7 | pub avg: f32, 8 | pub stddev: f32, 9 | pub count: usize, 10 | pub count_xs: usize, 11 | pub count_s: usize, 12 | pub count_m: usize, 13 | pub count_l: usize, 14 | pub count_xl: usize, 15 | } 16 | 17 | impl ValueStats { 18 | pub fn from_iter, F: Fn() -> T>(iter: F) -> ValueStats { 19 | let mut stats = ValueStats::default(); 20 | stats.min = ::std::f32::INFINITY; 21 | let mut sum = 0.0f64; 22 | for val in iter() { 23 | if stats.min > val { 24 | stats.min = val; 25 | } 26 | if stats.max < val { 27 | stats.max = val; 28 | } 29 | sum += f64::from(val); 30 | stats.count += 1; 31 | } 32 | stats.avg = (sum as f32) / (stats.count as f32); 33 | if stats.count < 2 { 34 | stats.count_m = stats.count; 35 | return stats; 36 | } 37 | sum = 0.0; 38 | for val in iter() { 39 | sum += f64::from(val - stats.avg) * f64::from(val - stats.avg); 40 | } 41 | stats.stddev = ((sum as f32)/(stats.count as f32-1.0)).sqrt(); 42 | for val in iter() { 43 | if val < stats.avg - 2.0 * stats.stddev { 44 | stats.count_xs += 1; 45 | } else if val < stats.avg - stats.stddev { 46 | stats.count_s += 1; 47 | } else if val < stats.avg + stats.stddev { 48 | stats.count_m += 1; 49 | } else if val < stats.avg + 2.0 * stats.stddev { 50 | stats.count_l += 1; 51 | } else { 52 | stats.count_xl += 1; 53 | } 54 | } 55 | stats 56 | } 57 | } -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | set -ex 2 | 3 | rm -rf repos 4 | mkdir repos 5 | mkdir -p repos/remotes/zvault_brotli3 repos/remotes/zvault_brotli6 repos/remotes/zvault_lzma2 6 | target/release/zvault init --compression brotli/3 --remote $(pwd)/repos/remotes/zvault_brotli3 $(pwd)/repos/zvault_brotli3 7 | target/release/zvault init --compression brotli/6 --remote $(pwd)/repos/remotes/zvault_brotli6 $(pwd)/repos/zvault_brotli6 8 | target/release/zvault init --compression lzma2/2 --remote $(pwd)/repos/remotes/zvault_lzma2 $(pwd)/repos/zvault_lzma2 9 | attic init repos/attic 10 | borg init -e none repos/borg 11 | borg init -e none repos/borg-zlib 12 | zbackup init --non-encrypted repos/zbackup 13 | 14 | find test_data/silesia -type f | xargs cat > /dev/null 15 | time target/release/zvault backup test_data/silesia $(pwd)/repos/zvault_brotli3::silesia1 16 | time target/release/zvault backup test_data/silesia $(pwd)/repos/zvault_brotli3::silesia2 17 | time target/release/zvault backup test_data/silesia $(pwd)/repos/zvault_brotli6::silesia1 18 | time target/release/zvault backup test_data/silesia $(pwd)/repos/zvault_brotli6::silesia2 19 | time target/release/zvault backup test_data/silesia $(pwd)/repos/zvault_lzma2::silesia1 20 | time target/release/zvault backup test_data/silesia $(pwd)/repos/zvault_lzma2::silesia2 21 | time attic create repos/attic::silesia1 test_data/silesia 22 | time attic create repos/attic::silesia2 test_data/silesia 23 | time borg create -C none repos/borg::silesia1 test_data/silesia 24 | time borg create -C none repos/borg::silesia2 test_data/silesia 25 | time borg create -C zlib repos/borg-zlib::silesia1 test_data/silesia 26 | time borg create -C zlib repos/borg-zlib::silesia2 test_data/silesia 27 | time tar -c test_data/silesia | zbackup backup --non-encrypted repos/zbackup/backups/silesia1 28 | time tar -c test_data/silesia | zbackup backup --non-encrypted repos/zbackup/backups/silesia2 29 | 30 | du -h test_data/silesia.tar 31 | du -sh repos/remotes/zvault* repos/attic repos/borg repos/borg-zlib repos/zbackup 32 | 33 | rm -rf repos 34 | mkdir repos 35 | mkdir -p repos/remotes/zvault_brotli3 repos/remotes/zvault_brotli6 repos/remotes/zvault_lzma2 36 | target/release/zvault init --compression brotli/3 --remote $(pwd)/repos/remotes/zvault_brotli3 $(pwd)/repos/zvault_brotli3 37 | target/release/zvault init --compression brotli/6 --remote $(pwd)/repos/remotes/zvault_brotli6 $(pwd)/repos/zvault_brotli6 38 | target/release/zvault init --compression lzma2/2 --remote $(pwd)/repos/remotes/zvault_lzma2 $(pwd)/repos/zvault_lzma2 39 | attic init repos/attic 40 | borg init -e none repos/borg 41 | borg init -e none repos/borg-zlib 42 | zbackup init --non-encrypted repos/zbackup 43 | 44 | find test_data/ubuntu -type f | xargs cat > /dev/null 45 | time target/release/zvault backup test_data/ubuntu $(pwd)/repos/zvault_brotli3::ubuntu1 46 | time target/release/zvault backup test_data/ubuntu $(pwd)/repos/zvault_brotli3::ubuntu2 47 | time target/release/zvault backup test_data/ubuntu $(pwd)/repos/zvault_brotli6::ubuntu1 48 | time target/release/zvault backup test_data/ubuntu $(pwd)/repos/zvault_brotli6::ubuntu2 49 | time target/release/zvault backup test_data/ubuntu $(pwd)/repos/zvault_lzma2::ubuntu1 50 | time target/release/zvault backup test_data/ubuntu $(pwd)/repos/zvault_lzma2::ubuntu2 51 | time attic create repos/attic::ubuntu1 test_data/ubuntu 52 | time attic create repos/attic::ubuntu2 test_data/ubuntu 53 | time borg create -C none repos/borg::ubuntu1 test_data/ubuntu 54 | time borg create -C none repos/borg::ubuntu2 test_data/ubuntu 55 | time borg create -C zlib repos/borg-zlib::ubuntu1 test_data/ubuntu 56 | time borg create -C zlib repos/borg-zlib::ubuntu2 test_data/ubuntu 57 | time tar -c test_data/ubuntu | zbackup backup --non-encrypted repos/zbackup/backups/ubuntu1 58 | time tar -c test_data/ubuntu | zbackup backup --non-encrypted repos/zbackup/backups/ubuntu2 59 | 60 | du -h test_data/ubuntu.tar 61 | du -sh repos/remotes/zvault* repos/attic repos/borg repos/borg-zlib repos/zbackup 62 | --------------------------------------------------------------------------------