├── .github └── workflows │ └── ci.yml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── build.rs ├── cli-tests ├── cli-tests.bats ├── diod-thrash.sh ├── mk-random-dir.py ├── parallel-thrash.sh └── s3-parallel-thrash.sh ├── csrc ├── cksumvfs │ ├── cksumvfs.c │ ├── cksumvfs_sqlite_version_number.c │ ├── sqlite3.h │ └── upgrade.sh └── sodium-bindings.h ├── doc ├── cli │ ├── diff.txt │ ├── exec-with-locks.txt │ ├── gc.txt │ ├── get.txt │ ├── help.txt │ ├── init.txt │ ├── list-contents.txt │ ├── list.txt │ ├── new-key.txt │ ├── new-sub-key.txt │ ├── put.txt │ ├── recover-removed.txt │ ├── restore.txt │ ├── rm.txt │ ├── serve.txt │ ├── sync.txt │ └── version.txt ├── guides │ ├── Filesystem Backups.md │ ├── Getting Started.md │ ├── Network Filesystems.md │ ├── Password Protected Keys.md │ ├── Remote Access Controls.md │ └── Secure Offline Keys.md ├── man │ ├── bupstash-authors.7.md │ ├── bupstash-diff.1.md │ ├── bupstash-exec-with-locks.1.md │ ├── bupstash-gc.1.md │ ├── bupstash-get.1.md │ ├── bupstash-init.1.md │ ├── bupstash-keyfiles.7.md │ ├── bupstash-list-contents.1.md │ ├── bupstash-list.1.md │ ├── bupstash-new-key.1.md │ ├── bupstash-new-sub-key.1.md │ ├── bupstash-put.1.md │ ├── bupstash-query-language.7.md │ ├── bupstash-recover-removed.1.md │ ├── bupstash-repository.7.md │ ├── bupstash-restore.1.md │ ├── bupstash-rm.1.md │ ├── bupstash-serve.1.md │ ├── bupstash-sync.1.md │ └── bupstash.1.md ├── technical_overview.md └── upcoming_changelog.md ├── src ├── abloom.rs ├── acache.rs ├── address.rs ├── base64.rs ├── chunk_storage.rs ├── chunker.rs ├── cksumvfs.rs ├── client.rs ├── compression.rs ├── crypto.rs ├── dir_chunk_storage.rs ├── external_chunk_storage.rs ├── fmtutil.rs ├── fprefetch.rs ├── fstx1.rs ├── fstx2.rs ├── fsutil.rs ├── hex.rs ├── htree.rs ├── index.rs ├── indexer.rs ├── ioutil.rs ├── keys.rs ├── main.rs ├── migrate.rs ├── oplog.rs ├── pem.rs ├── protocol.rs ├── put.rs ├── query.rs ├── querycache.rs ├── repository.rs ├── rollsum.rs ├── sendlog.rs ├── server.rs ├── sodium.rs ├── sodium_bindings_gen.rs ├── vfs.rs ├── xglobset.rs ├── xid.rs └── xtar.rs └── support ├── bindgen.sh ├── builds.sr.ht ├── debian.yml ├── freebsd.yml └── openbsd.yml ├── pgo-build.sh ├── plot-chunk-sizes.gnuplot ├── pre-commit.sh ├── print-doc-checklist.sh ├── ronn ├── Gemfile ├── Gemfile.lock ├── default.nix ├── gemset.nix └── test-reproducible-html.nix ├── shell.nix └── src-release.sh /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | on: [push, pull_request] 2 | 3 | name: CI 4 | 5 | jobs: 6 | check: 7 | name: Check 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: "Install libsodium" 11 | run: sudo apt-get install -y libsodium-dev 12 | - uses: actions/checkout@v2 13 | - uses: actions/cache@v2 14 | with: 15 | path: | 16 | ~/.cargo/registry 17 | ~/.cargo/git 18 | target 19 | key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} 20 | - uses: actions-rs/toolchain@v1 21 | with: 22 | profile: minimal 23 | toolchain: stable 24 | override: true 25 | - uses: actions-rs/cargo@v1 26 | with: 27 | command: check 28 | 29 | test: 30 | needs: check 31 | strategy: 32 | fail-fast: false 33 | matrix: 34 | IMAGE: [ubuntu-latest, macos-latest] 35 | name: Cargo Test Suite (${{ matrix.IMAGE }}) 36 | runs-on: ${{ matrix.IMAGE }} 37 | steps: 38 | - name: "Install dependencies (Ubuntu)" 39 | run: sudo apt-get install -y libsodium-dev 40 | if: ${{ matrix.IMAGE == 'ubuntu-latest' }} 41 | - name: "Install dependencies (macOS)" 42 | run: brew install libsodium 43 | if: ${{ matrix.IMAGE == 'macos-latest' }} 44 | - uses: actions/checkout@v2 45 | # macOS's BSD tar implementations corrupts the cargo cache when used. There 46 | # is a workaround that installs gnu-tar, but since bupstash recommends using 47 | # the system tar implementation we just skip caching on macOS. 48 | # See: https://github.com/actions/cache/issues/403 49 | - uses: actions/cache@v2 50 | with: 51 | path: | 52 | ~/.cargo/registry 53 | ~/.cargo/git 54 | target 55 | key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} 56 | if: ${{ matrix.IMAGE != 'macos-latest' }} 57 | - uses: actions-rs/toolchain@v1 58 | with: 59 | profile: minimal 60 | toolchain: stable 61 | override: true 62 | - uses: actions-rs/cargo@v1 63 | with: 64 | command: test 65 | 66 | test-cli: 67 | needs: check 68 | strategy: 69 | fail-fast: false 70 | matrix: 71 | IMAGE: [ubuntu-latest, macos-latest] 72 | name: CLI Test Suite (${{ matrix.IMAGE }}) 73 | runs-on: ${{ matrix.IMAGE }} 74 | steps: 75 | - name: "Install dependencies (Ubuntu)" 76 | run: sudo apt-get install -y libsodium-dev bats bubblewrap 77 | if: ${{ matrix.IMAGE == 'ubuntu-latest' }} 78 | - name: "Install dependencies (macOS)" 79 | run: | 80 | brew uninstall --force bats 81 | brew install libsodium bats-core 82 | if: ${{ matrix.IMAGE == 'macos-latest' }} 83 | - uses: actions/checkout@v2 84 | # macOS's BSD tar implementations corrupts the cargo cache when used. There 85 | # is a workaround that installs gnu-tar, but since bupstash recommends using 86 | # the system tar implementation we just skip caching on macOS. 87 | # See: https://github.com/actions/cache/issues/403 88 | - uses: actions/cache@v2 89 | with: 90 | path: | 91 | ~/.cargo/registry 92 | ~/.cargo/git 93 | target 94 | key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} 95 | if: ${{ matrix.IMAGE != 'macos-latest' }} 96 | - uses: actions-rs/toolchain@v1 97 | with: 98 | profile: minimal 99 | toolchain: stable 100 | override: true 101 | - uses: actions-rs/cargo@v1 102 | with: 103 | command: build 104 | args: --release 105 | - name: "Run tests" 106 | run: PATH="$(pwd)/target/release:$PATH" bats ./cli-tests 107 | # The tests here should be reasonably quick to finish. We override the 108 | # default 6 hour timeout in case they aren't 109 | timeout-minutes: 5 110 | 111 | fmt: 112 | needs: check 113 | name: Rustfmt 114 | runs-on: ubuntu-latest 115 | steps: 116 | - name: "Install libsodium" 117 | run: sudo apt-get install -y libsodium-dev 118 | - uses: actions/checkout@v2 119 | - uses: actions/cache@v2 120 | with: 121 | path: | 122 | ~/.cargo/registry 123 | ~/.cargo/git 124 | target 125 | key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} 126 | - uses: actions-rs/toolchain@v1 127 | with: 128 | profile: minimal 129 | toolchain: stable 130 | override: true 131 | - run: rustup component add rustfmt 132 | - uses: actions-rs/cargo@v1 133 | with: 134 | command: fmt 135 | args: --all -- --check 136 | 137 | clippy: 138 | needs: check 139 | name: Clippy 140 | runs-on: ubuntu-latest 141 | steps: 142 | - name: "Install libsodium" 143 | run: sudo apt-get install -y libsodium-dev 144 | - uses: actions/checkout@v2 145 | - uses: actions/cache@v2 146 | with: 147 | path: | 148 | ~/.cargo/registry 149 | ~/.cargo/git 150 | target 151 | key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} 152 | - uses: actions-rs/toolchain@v1 153 | with: 154 | profile: minimal 155 | toolchain: stable 156 | override: true 157 | - run: rustup component add clippy 158 | - uses: actions-rs/cargo@v1 159 | with: 160 | command: clippy 161 | args: -- -D warnings 162 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | **/*.rs.bk 3 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "bupstash" 3 | version = "0.12.1" 4 | authors = ["Andrew Chambers "] 5 | edition = "2018" 6 | license = "MIT" 7 | repository = "https://github.com/andrewchambers/bupstash" 8 | readme = "README.md" 9 | description = "Easy and efficient encrypted backups." 10 | 11 | [profile.release] 12 | lto = true 13 | panic = 'abort' 14 | codegen-units = 1 15 | incremental = false 16 | 17 | [features] 18 | simd-rollsum = [] 19 | 20 | [dependencies] 21 | 22 | # More trusted dependencies 23 | crossbeam-utils = "0.8" 24 | crossbeam-channel = "0.5" 25 | blake3 = "1" 26 | itertools = "0.10" 27 | rusqlite = { version = "0.25", features = ["bundled"] } 28 | lz4 = "1.2" 29 | zstd-safe = { version = "6.0", features = ["std", "experimental"] } 30 | anyhow = "1" 31 | thiserror = "1.0" 32 | libc = "0.2" 33 | getopts = "0.2" 34 | codemap = "0.1" 35 | codemap-diagnostic = "0.1" 36 | serde = { version = "1.0", features = ["derive"] } 37 | serde_json = "1.0" 38 | serde_bare = "0.4" 39 | path-clean = "0.1.0" 40 | humantime = "2.0.1" 41 | atty = "0.2" 42 | once_cell = "1.4" 43 | tar = "0.4" 44 | regex = { version = "1", default-features = false, features = ["std"] } 45 | globset = "0.4.8" 46 | chrono = { version = "0.4", features = ["serde"]} 47 | cfg-if = "0.1" 48 | shlex = "0.1" 49 | nix = "0.23" 50 | indicatif = "0.16.2" 51 | rangemap = "0.1.11" 52 | xattr = "0.2" 53 | walkdir = "2" 54 | bitflags = "1" 55 | uriparse = "0.6" 56 | plmap = "0.3.0" 57 | num_cpus = "1" 58 | 59 | [dev-dependencies] 60 | 61 | rand = "0.8" 62 | tempfile = "3" 63 | 64 | [build-dependencies] 65 | 66 | cc = "1" 67 | pkg-config = "0.3" 68 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 andrewchambers 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bupstash 2 | 3 | [![Gitter](https://badges.gitter.im/bupstash/community.svg)](https://gitter.im/bupstash/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) 4 | 5 | Bupstash is a tool for encrypted backups - if you need secure backups, Bupstash is the tool for you. 6 | 7 | 8 | Bupstash was designed to have: 9 | 10 | - Efficient deduplication - Bupstash can store thousands of encrypted directory snapshots using a fraction of the space encrypted tarballs would require. 11 | 12 | - Strong privacy - Data is encrypted client side and the repository never needs has access to the decryption keys. 13 | 14 | - Offline decryption keys - Backups do not require the decryption key be anywhere near an at-risk server or computer. 15 | 16 | - Key/value tagging with search - all while keeping the tags fully encrypted. 17 | 18 | - Great performance on slow networks - Bupstash really strives to work well on high latency networks like cellular and connections to far-off lands. 19 | 20 | - Secure remote access controls - Ransomware, angry spouses, and disgruntled business partners will be powerless to delete your remote backups. 21 | 22 | - Efficient incremental backups - Bupstash knows what it backed up last time and skips that work. 23 | 24 | - Fantastic performance with low ram usage - Bupstash won't bog down your production servers. 25 | 26 | - Safety against malicious attacks - Bupstash is written in a memory safe language to dramatically reduce the attack surface over the network. 27 | 28 | ## Stability and Backwards Compatibility 29 | 30 | Bupstash is beta software, while all efforts are made to keep bupstash bug free, we currently recommend 31 | using bupstash for making *REDUNDANT* backups where failure can be tolerated. 32 | 33 | The repository format is approaching stability, and will not be changed 34 | in a backwards incompatible way unless there is *very* strong justification. Future changes will most likely be backwards compatible, or come with a migration path if it is needed at all. 35 | 36 | # Guides, documentation and support 37 | 38 | - Visit the [project website](https://bupstash.io). 39 | - Visit the [quickstart guide](https://bupstash.io/doc/guides/Getting%20Started.html) for an introductory tutorial. 40 | - Visit the [filesystem backups guide](https://bupstash.io/doc/guides/Filesystem%20Backups.html) for examples of making backups. 41 | - Visit the [man pages](https://bupstash.io/doc/man/bupstash.html) for more comprehensive documentation. 42 | - Visit the [community chat](https://gitter.im/bupstash/community?utm_source=share-link&utm_medium=link&utm_campaign=share-link) or the [community forum](https://github.com/andrewchambers/bupstash/discussions) to ask questions. 43 | - Read the introductory [blog post](https://acha.ninja/blog/introducing_bupstash/). 44 | - Read the [technical overview](./doc/technical_overview.md) to understand how it works. 45 | 46 | # Typical usage 47 | 48 | Initialize a new Bupstash repository via ssh. 49 | ``` 50 | $ export BUPSTASH_REPOSITORY=ssh://$SERVER/home/me/backups 51 | $ # Ensure bupstash is on the $PATH of both machines. 52 | $ bupstash init 53 | ``` 54 | 55 | Create a new encryption key, and tell bupstash to use it. 56 | ``` 57 | $ bupstash new-key -o backups.key 58 | $ export BUPSTASH_KEY="$(pwd)/backups.key" 59 | ``` 60 | 61 | Save a directory as a tarball snapshot. 62 | ``` 63 | $ bupstash put hostname="$(hostname)" ./some-data 64 | ebb66f3baa5d432e9f9a28934888a23d 65 | ``` 66 | Save the output of a command, checking for errors. 67 | ``` 68 | $ bupstash put --exec name=database.sql pgdump mydatabase 69 | 14ebd2073b258b1f55c5bbc889c49db4 70 | ``` 71 | 72 | List items matching a query. 73 | ``` 74 | $ bupstash list name="backup.tar" and hostname="server-1" 75 | id="bcb8684e6bf5cb453e77486decf61685" name="some-file.txt" hostname="server-1" timestamp="2020/07/27 11:26:16" 76 | ``` 77 | 78 | List files in a backup. 79 | ``` 80 | $ bupstash list-contents id=bcb86* 81 | drwxr-xr-x 0B 2020/10/30 13:32:04 . 82 | -rw-r--r-- 7B 2020/10/30 13:32:04 hello.txt 83 | ``` 84 | 85 | Get an item matching a query. 86 | ``` 87 | $ bupstash get id=bcb8684e6bf5cb453e77486decf61685 88 | some data... 89 | 90 | $ bupstash get id="ebb66*" | tar -C ./restore -xf - 91 | ``` 92 | 93 | Fetch a single file from a backup. 94 | ``` 95 | $ bupstash get --pick hello.txt id="bcb86*" 96 | hello! 97 | ``` 98 | 99 | Diff backups, with local directories or other backups. 100 | ``` 101 | $ bupstash diff /home/ac :: id="a4b8f*" 102 | ... 103 | - -rw------- 14.50KiB 2021/08/01 02:36:19 .bash_history 104 | + -rw------- 13.66KiB 2021/08/01 11:51:23 .bash_history 105 | ``` 106 | 107 | Restore backups to a local directory. 108 | 109 | ``` 110 | $ mkdir restore-dir 111 | $ bupstash restore --into ./restore-dir id="a4b8f*" 112 | ``` 113 | 114 | Remove items matching a query. 115 | ``` 116 | $ bupstash rm name=some-data.txt and older-than 30d 117 | ``` 118 | 119 | Run the garbage collector to reclaim disk space. 120 | ``` 121 | $ bupstash gc 122 | ``` 123 | 124 | # Installation 125 | 126 | ## From source 127 | 128 | First ensure you have a recent rust+cargo, pkg-config and libsodium-dev (>= 1.0.14) package installed. 129 | 130 | Next clone the repository and run cargo build. 131 | ``` 132 | $ git clone https://github.com/andrewchambers/bupstash 133 | $ cd bupstash 134 | $ cargo build --release 135 | $ cp ./target/release/bupstash $INSTALL_DIR 136 | ``` 137 | 138 | ### Pkgconf 139 | 140 | You can use pkgconf instead of pkg-config (this is required on freebsd) by setting 141 | the PKG_CONFIG environment variable. 142 | 143 | ``` 144 | $ export PKG_CONFIG=pkgconf 145 | ``` 146 | 147 | 148 | ## Building man pages 149 | 150 | The man pages are currently build using a markdown to man page renderer called [ronn](https://github.com/rtomayko/ronn). 151 | 152 | ``` 153 | $ cd doc/man 154 | $ ronn -r *.md 155 | ``` 156 | 157 | ## Generating release tarballs 158 | 159 | ``` 160 | $ sh support/src-release.sh $tag 161 | $ echo bupstash-*.tar.gz 162 | bupstash-$version-man.tar.gz 163 | bupstash-$version-src+deps.tar.gz 164 | ``` 165 | 166 | ## Test suites 167 | 168 | Install bash automated test framework and run the following to run both the unit tests, and cli integration test suite. 169 | 170 | ``` 171 | $ cargo test 172 | $ cargo build --release 173 | $ export PATH=${CARGO_TARGET_DIR:-$PWD/target}/release:$PATH 174 | $ bats ./cli-tests 175 | ``` 176 | 177 | ## Precompiled releases 178 | 179 | Head to the [releases page](https://github.com/andrewchambers/bupstash/releases) and download for 180 | a build for your platform. Simply extract the archive and add the single bupstash binary to your PATH. 181 | 182 | Currently we only precompile for linux (help wanted for more platforms). 183 | 184 | 185 | 186 | -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | pkg_config::probe_library("libsodium").unwrap(); 3 | 4 | println!("cargo:rerun-if-changed=csrc/cksumvfs/sqlite3.h"); 5 | cc::Build::new() 6 | .warnings(false) // Not our code/warnings to fix. 7 | .flag("-DSQLITE_CKSUMVFS_STATIC") 8 | .flag("-Icsrc/cksumvfs") 9 | .file("csrc/cksumvfs/cksumvfs.c") 10 | .file("csrc/cksumvfs/cksumvfs_sqlite_version_number.c") 11 | .compile("cksumvfs"); 12 | } 13 | -------------------------------------------------------------------------------- /cli-tests/diod-thrash.sh: -------------------------------------------------------------------------------- 1 | set -xu 2 | 3 | export SCRATCH=/tmp/diod-thrash 4 | export N_WORKERS=8 5 | export DIOD_PORT=1888 6 | 7 | cleanup () { 8 | for m in $(ls $SCRATCH/mnt/) 9 | do 10 | if mountpoint -q "$SCRATCH/mnt/$m" 11 | then 12 | sudo umount "$SCRATCH/mnt/$m" 13 | fi 14 | done 15 | killall bupstash 16 | killall diod 17 | trap - SIGTERM 18 | rm -rf "$SCRATCH" 19 | } 20 | 21 | cleanup 22 | trap "cleanup" SIGINT SIGTERM EXIT 23 | 24 | rm -rf "$SCRATCH" 25 | mkdir -p "$SCRATCH/root" 26 | export BUPSTASH_KEY="$SCRATCH/t.key" 27 | export BUPSTASH_REPOSITORY="$SCRATCH/root/repo" 28 | 29 | bupstash new-key -o "$BUPSTASH_KEY" 30 | bupstash init -r "$SCRATCH/root/repo" 31 | 32 | diod -l "127.0.0.1:$DIOD_PORT" -f -n -S -U $(whoami) -e "$SCRATCH/root" & 33 | sleep 1 34 | 35 | for i in $(seq $((N_WORKERS-1))) 36 | do 37 | mountpoint="$SCRATCH/mnt/$i" 38 | mkdir -p "$SCRATCH/mnt/$i" 39 | if ! sudo diodmount \ 40 | -n \ 41 | -v \ 42 | -o "port=$DIOD_PORT,uname=$(whoami)" \ 43 | "127.0.0.1:$SCRATCH/root" \ 44 | "$mountpoint" 45 | then 46 | exit 1 47 | fi 48 | done 49 | 50 | bupstash init -r "$SCRATCH/sync-source-repo" 51 | 52 | rm -f "$SCRATCH/thrash.summary" 53 | sqlite3 "$SCRATCH/thrash.summary" "create table thrash_results(name, count, unique(name));" 54 | 55 | inc_result () { 56 | sqlite3 "$SCRATCH/thrash.summary" \ 57 | "PRAGMA busy_timeout = 10000; 58 | begin immediate; 59 | insert into thrash_results(name, count) values('$1', 0) 60 | on conflict(name) do update set count=count+1 where name = '$1'; commit;" > /dev/null 61 | } 62 | 63 | thrash_worker () { 64 | 65 | export BUPSTASH_REPOSITORY="$1" 66 | 67 | for i in $(seq 15) 68 | do 69 | expected=$(uuidgen) 70 | id=$(bupstash put -q -e --no-send-log thrash_test=yes :: echo $expected) 71 | 72 | if test "$?" = 0 73 | then 74 | inc_result "put-ok" 75 | 76 | actual="$(bupstash get -q id=$id)" 77 | if test "$?" = 0 78 | then 79 | inc_result "get-ok" 80 | if test "$expected" != "$actual" 81 | then 82 | inc_result "get-corrupt" 83 | fi 84 | else 85 | inc_result "get-fail" 86 | fi 87 | 88 | bupstash rm -q id="$id" >&2 89 | if test "$?" = 0 90 | then 91 | inc_result "rm-ok" 92 | else 93 | inc_result "rm-fail" 94 | fi 95 | else 96 | inc_result "put-fail" 97 | fi 98 | 99 | expected=$(uuidgen) 100 | id=$(bupstash put -r "$SCRATCH/sync-source-repo" -q -e --no-send-log thrash_test=yes :: echo $expected) 101 | bupstash sync -r "$SCRATCH/sync-source-repo" --to "$BUPSTASH_REPOSITORY" -q id="$id" >&2 102 | if test "$?" = 0 103 | then 104 | inc_result "sync-ok" 105 | 106 | actual="$(bupstash get -q id=$id)" 107 | if test "$?" = 0 108 | then 109 | inc_result "sync-get-ok" 110 | if test "$expected" != "$actual" 111 | then 112 | inc_result "sync-get-corrupt" 113 | fi 114 | else 115 | inc_result "sync-get-fail" 116 | fi 117 | 118 | bupstash rm -q id="$id" >&2 119 | if test "$?" = 0 120 | then 121 | inc_result "rm-ok" 122 | else 123 | inc_result "rm-fail" 124 | fi 125 | else 126 | inc_result "sync-fail" 127 | fi 128 | bupstash rm -q -r "$SCRATCH/sync-source-repo" id="$id" >&2 129 | 130 | bupstash recover-removed -q >&2 131 | if test "$?" = 0 132 | then 133 | inc_result "recover-removed-ok" 134 | else 135 | inc_result "recover-removed-fail" 136 | fi 137 | 138 | bupstash gc -q >&2 139 | if test "$?" = 0 140 | then 141 | inc_result "gc-ok" 142 | else 143 | inc_result "gc-fail" 144 | fi 145 | done 146 | 147 | rm -f "$SCRATCH/want_chaos" 148 | } 149 | 150 | bupstash_serve_chaos_worker () { 151 | while test -f "$SCRATCH/want_chaos" 152 | do 153 | kill -9 $(ps -aux | grep 'bupstash serve' | grep -v "grep" | awk '{print $2}' | shuf | head -n $(($RANDOM/$N_WORKERS))) 154 | sleep 1 155 | done 156 | } 157 | 158 | # This loop is to control the max size of the repository. 159 | for i in $(seq 10) 160 | do 161 | 162 | bupstash rm --allow-many thrash_test=yes >&2 163 | bupstash gc >&2 164 | 165 | background_workers=() 166 | # At least enough workers so the scheduler hopefully 167 | # interleaves them in interesting ways. 168 | for j in $(seq $(($N_WORKERS-1))) 169 | do 170 | thrash_worker "$SCRATCH/mnt/$j/repo" & 171 | background_workers+=($!) 172 | done 173 | # One worker not via diod. 174 | thrash_worker "$SCRATCH/root/repo" & 175 | background_workers+=($!) 176 | 177 | touch "$SCRATCH/want_chaos" 178 | bupstash_serve_chaos_worker & 179 | background_workers+=($!) 180 | 181 | wait ${background_workers[@]} 182 | 183 | for id in $(bupstash list -q --format=jsonl1 | jq -r .id) 184 | do 185 | bupstash get -q id=$id > /dev/null 186 | if test "$?" != 0 187 | then 188 | inc_result "get-corrupt" 189 | fi 190 | done 191 | 192 | if sqlite3 "$SCRATCH/thrash.summary" 'select * from thrash_results;' | grep -q 'get\-corrupt' 193 | then 194 | echo "invariant check failed, 'get' should never return a corrupt result" 195 | exit 1 196 | fi 197 | 198 | if sqlite3 "$SCRATCH/thrash.summary" 'select * from thrash_results;' | grep -q 'sync\-get\-corrupt' 199 | then 200 | echo "invariant check failed, 'sync' should never return a corrupt result" 201 | exit 1 202 | fi 203 | 204 | done 205 | 206 | if test "$(sqlite3 "$SCRATCH"/thrash.summary "select count from thrash_results where name='put-ok';")" = "" 207 | then 208 | echo "at least one 'put' operation must succeed for the test to pass." 209 | exit 1 210 | fi 211 | 212 | if test "$(sqlite3 "$SCRATCH"/thrash.summary "select count from thrash_results where name='sync-ok';")" = "" 213 | then 214 | echo "at least one 'sync' operation must succeed for the test to pass." 215 | exit 1 216 | fi 217 | 218 | if test "$(sqlite3 "$SCRATCH"/thrash.summary "select count from thrash_results where name='gc-ok';")" = "" 219 | then 220 | echo "at least one 'gc' operation must succeed for the test to pass." 221 | exit 1 222 | fi 223 | 224 | trap - EXIT 225 | 226 | set +x 227 | echo "test results..." 228 | sqlite3 "$SCRATCH/thrash.summary" 'select * from thrash_results order by name;' 229 | echo "test passed" 230 | 231 | -------------------------------------------------------------------------------- /cli-tests/mk-random-dir.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | import string 4 | import random 5 | import os 6 | import os.path as path 7 | import sys 8 | 9 | MIN_NAME_LEN = 1 10 | MAX_NAME_LEN = 8 11 | MIN_CHILD_DIRS = 0 12 | MAX_CHILD_DIRS = 3 13 | MIN_CHILD_FILES = 0 14 | MAX_CHILD_FILES = 5 15 | MIN_DEPTH = 0 16 | MAX_DEPTH = 3 17 | FILE_SIZES = [0, 1, 2, 3, 1024, 512*1024, 2*1024*1024, 8*1024*1024] 18 | 19 | def random_file_name(): 20 | name_len = random.randint(MIN_NAME_LEN, MAX_NAME_LEN) 21 | return ''.join(random.choices(string.ascii_lowercase, k=name_len)) 22 | 23 | def fresh_dir_ent(dir_path): 24 | while True: 25 | p = path.join(dir_path, random_file_name()) 26 | if not path.exists(p): 27 | return p 28 | 29 | def random_dir(dir_path="./random_dir", depth=None): 30 | 31 | if depth is None: 32 | depth = random.randint(MIN_DEPTH, MAX_DEPTH) 33 | 34 | os.mkdir(dir_path) 35 | 36 | num_files = random.randint(MIN_CHILD_FILES, MAX_CHILD_FILES) 37 | num_dirs = random.randint(MIN_CHILD_DIRS, MAX_CHILD_DIRS) 38 | 39 | if depth != 0: 40 | for i in range(num_dirs): 41 | random_dir(dir_path=fresh_dir_ent(dir_path), depth=depth-1) 42 | 43 | for i in range(num_files): 44 | with open(fresh_dir_ent(dir_path), "wb") as f: 45 | fsize = random.choice(FILE_SIZES) 46 | f.write(os.urandom(fsize)) 47 | 48 | if __name__ == '__main__': 49 | random_dir(sys.argv[1]) -------------------------------------------------------------------------------- /cli-tests/parallel-thrash.sh: -------------------------------------------------------------------------------- 1 | set -xu 2 | 3 | if test "${SCRATCH:-}" = "" 4 | then 5 | export SCRATCH=/tmp 6 | else 7 | export SCRATCH 8 | fi 9 | 10 | export N_WORKERS=$(nproc) 11 | 12 | trap "trap - SIGTERM ; kill -9 -- -$$" SIGINT SIGTERM EXIT 13 | 14 | if test -n "${BUPSTASH_REPOSITORY_COMMAND:-}" 15 | then 16 | export BUPSTASH_TO_REPOSITORY_COMMAND="${BUPSTASH_REPOSITORY_COMMAND}" 17 | fi 18 | if test -n "${BUPSTASH_REPOSITORY:-}" 19 | then 20 | export BUPSTASH_TO_REPOSITORY="${BUPSTASH_REPOSITORY}" 21 | fi 22 | bupstash init -r "$SCRATCH/sync-source-repo" 23 | 24 | rm -f "$SCRATCH/thrash.summary" 25 | sqlite3 "$SCRATCH/thrash.summary" "create table thrash_results(name, count, unique(name));" 26 | 27 | inc_result () { 28 | sqlite3 "$SCRATCH/thrash.summary" \ 29 | "PRAGMA busy_timeout = 10000; 30 | begin immediate; 31 | insert into thrash_results(name, count) values('$1', 0) 32 | on conflict(name) do update set count=count+1 where name = '$1'; commit;" > /dev/null 33 | } 34 | 35 | thrash_worker () { 36 | for i in $(seq 15) 37 | do 38 | expected=$(uuidgen) 39 | id=$(bupstash put -q -e --no-send-log thrash_test=yes :: echo $expected) 40 | 41 | if test "$?" = 0 42 | then 43 | inc_result "put-ok" 44 | 45 | actual="$(bupstash get -q id=$id)" 46 | if test "$?" = 0 47 | then 48 | inc_result "get-ok" 49 | if test "$expected" != "$actual" 50 | then 51 | inc_result "get-corrupt" 52 | fi 53 | else 54 | inc_result "get-fail" 55 | fi 56 | 57 | bupstash rm -q id="$id" >&2 58 | if test "$?" = 0 59 | then 60 | inc_result "rm-ok" 61 | else 62 | inc_result "rm-fail" 63 | fi 64 | else 65 | inc_result "put-fail" 66 | fi 67 | 68 | expected=$(uuidgen) 69 | id=$(bupstash put -r "$SCRATCH/sync-source-repo" -q -e --no-send-log thrash_test=yes :: echo $expected) 70 | bupstash sync -r "$SCRATCH/sync-source-repo" -q id="$id" >&2 71 | if test "$?" = 0 72 | then 73 | inc_result "sync-ok" 74 | 75 | actual="$(bupstash get -q id=$id)" 76 | if test "$?" = 0 77 | then 78 | inc_result "sync-get-ok" 79 | if test "$expected" != "$actual" 80 | then 81 | inc_result "sync-get-corrupt" 82 | fi 83 | else 84 | inc_result "sync-get-fail" 85 | fi 86 | 87 | bupstash rm -q id="$id" >&2 88 | if test "$?" = 0 89 | then 90 | inc_result "rm-ok" 91 | else 92 | inc_result "rm-fail" 93 | fi 94 | else 95 | inc_result "sync-fail" 96 | fi 97 | bupstash rm -q -r "$SCRATCH/sync-source-repo" id="$id" >&2 98 | 99 | bupstash recover-removed -q >&2 100 | if test "$?" = 0 101 | then 102 | inc_result "recover-removed-ok" 103 | else 104 | inc_result "recover-removed-fail" 105 | fi 106 | 107 | bupstash gc -q >&2 108 | if test "$?" = 0 109 | then 110 | inc_result "gc-ok" 111 | else 112 | inc_result "gc-fail" 113 | fi 114 | done 115 | 116 | rm -f "$SCRATCH/want_chaos" 117 | } 118 | 119 | bupstash_serve_chaos_worker () { 120 | while test -f "$SCRATCH/want_chaos" 121 | do 122 | kill -9 $(ps -aux | grep 'bupstash serve' | grep -v "grep" | awk '{print $2}' | shuf | head -n $(($RANDOM/$N_WORKERS))) 123 | sleep 1 124 | done 125 | } 126 | 127 | # This loop is to control the max size of the repository. 128 | for i in $(seq 10) 129 | do 130 | 131 | bupstash rm --allow-many thrash_test=yes >&2 132 | bupstash gc >&2 133 | 134 | background_workers=() 135 | # At least enough workers so the scheduler hopefully 136 | # interleaves them in interesting ways. 137 | for j in $(seq $N_WORKERS) 138 | do 139 | thrash_worker & 140 | background_workers+=($!) 141 | done 142 | 143 | touch "$SCRATCH/want_chaos" 144 | bupstash_serve_chaos_worker & 145 | background_workers+=($!) 146 | 147 | wait ${background_workers[@]} 148 | 149 | for id in $(bupstash list -q --format=jsonl1 | jq -r .id) 150 | do 151 | bupstash get -q id=$id > /dev/null 152 | if test "$?" != 0 153 | then 154 | inc_result "get-corrupt" 155 | fi 156 | done 157 | 158 | if sqlite3 "$SCRATCH/thrash.summary" 'select * from thrash_results;' | grep -q 'get\-corrupt' 159 | then 160 | echo "invariant check failed, 'get' should never return a corrupt result" 161 | exit 1 162 | fi 163 | 164 | if sqlite3 "$SCRATCH/thrash.summary" 'select * from thrash_results;' | grep -q 'sync\-get\-corrupt' 165 | then 166 | echo "invariant check failed, 'sync' should never return a corrupt result" 167 | exit 1 168 | fi 169 | 170 | done 171 | 172 | if test "$(sqlite3 "$SCRATCH"/thrash.summary "select count from thrash_results where name='put-ok';")" = "" 173 | then 174 | echo "at least one 'put' operation must succeed for the test to pass." 175 | exit 1 176 | fi 177 | 178 | if test "$(sqlite3 "$SCRATCH"/thrash.summary "select count from thrash_results where name='sync-ok';")" = "" 179 | then 180 | echo "at least one 'sync' operation must succeed for the test to pass." 181 | exit 1 182 | fi 183 | 184 | if test "$(sqlite3 "$SCRATCH"/thrash.summary "select count from thrash_results where name='gc-ok';")" = "" 185 | then 186 | echo "at least one 'gc' operation must succeed for the test to pass." 187 | exit 1 188 | fi 189 | 190 | trap - EXIT 191 | 192 | set +x 193 | echo "test results..." 194 | sqlite3 "$SCRATCH/thrash.summary" 'select * from thrash_results order by name;' 195 | echo "test passed" 196 | 197 | -------------------------------------------------------------------------------- /cli-tests/s3-parallel-thrash.sh: -------------------------------------------------------------------------------- 1 | set -xu 2 | 3 | if test "${SCRATCH:-}" = "" 4 | then 5 | export SCRATCH=/tmp 6 | else 7 | export SCRATCH 8 | fi 9 | 10 | export BUPSTASH_REPOSITORY="$SCRATCH/thrash_repo" 11 | export BUPSTASH_KEY="$SCRATCH/thrash.key" 12 | export BUPSTASH_QUERY_CACHE="$SCRATCH/thrash.qcache" 13 | export MINIO_ACCESS_KEY="thrash_access" 14 | export MINIO_SECRET_KEY="thrash_secret" 15 | export N_WORKERS=$(nproc) 16 | 17 | trap "trap - SIGTERM ; kill -9 -- -$$" SIGINT SIGTERM EXIT 18 | 19 | rm -rf "$BUPSTASH_REPOSITORY" 20 | rm -f "$BUPSTASH_KEY" 21 | 22 | bupstash new-key -o "$BUPSTASH_KEY" 23 | bupstash init --storage \ 24 | "{\"ExternalStore\":{\"path\":\"s3://thrash_access:thrash_secret@thrashbucket?secure=false&endpoint=localhost%3A9000\",\"socket_path\":\"$SCRATCH/bupstash-s3-storage.sock\"}}" 25 | rm -f "$SCRATCH/thrash.summary" 26 | sqlite3 "$SCRATCH/thrash.summary" "create table thrash_results(name, count, unique(name));" 27 | 28 | inc_result () { 29 | sqlite3 "$SCRATCH/thrash.summary" \ 30 | "PRAGMA busy_timeout = 10000; 31 | begin immediate; 32 | insert into thrash_results(name, count) values('$1', 0) 33 | on conflict(name) do update set count=count+1 where name = '$1'; commit;" > /dev/null 34 | } 35 | 36 | thrash_worker () { 37 | for i in $(seq 50) 38 | do 39 | expected=$(uuidgen) 40 | 41 | id=$(bupstash put -q -e --no-send-log thrash_test=yes :: echo $expected) 42 | 43 | if test "$?" = 0 44 | then 45 | inc_result "put-ok" 46 | 47 | actual="$(bupstash get -q id=$id)" 48 | if test "$?" = 0 49 | then 50 | inc_result "get-ok" 51 | if test "$expected" != "$actual" 52 | then 53 | inc_result "get-corrupt" 54 | fi 55 | else 56 | inc_result "get-fail" 57 | fi 58 | 59 | bupstash rm -q id="$id" >&2 60 | if test "$?" = 0 61 | then 62 | inc_result "rm-ok" 63 | else 64 | inc_result "rm-fail" 65 | fi 66 | else 67 | inc_result "put-fail" 68 | fi 69 | 70 | bupstash recover-removed -q >&2 71 | if test "$?" = 0 72 | then 73 | inc_result "recover-removed-ok" 74 | else 75 | inc_result "recover-removed-fail" 76 | fi 77 | 78 | bupstash gc -q >&2 79 | if test "$?" = 0 80 | then 81 | inc_result "gc-ok" 82 | else 83 | inc_result "gc-fail" 84 | fi 85 | done 86 | 87 | rm -f "$SCRATCH/want_chaos" 88 | } 89 | 90 | bupstash_serve_chaos_worker () { 91 | while test -f "$SCRATCH/want_chaos" 92 | do 93 | kill -9 $(ps -aux | grep 'bupstash serve' | grep -v "grep" | awk '{print $2}' | shuf | head -n $(($RANDOM/$N_WORKERS))) 94 | sleep 1 95 | done 96 | } 97 | 98 | bupstash_s3_plugin_chaos_worker () { 99 | while test -f "$SCRATCH/want_chaos" 100 | do 101 | killall -s SIGKILL 'bupstash-s3-storage' 102 | sleep 1 103 | done 104 | } 105 | 106 | s3_plugin_supervisor () { 107 | cd "$SCRATCH" 108 | while true 109 | do 110 | rm -f "./bupstash-s3-storage.sock" 111 | bupstash-s3-storage -quiescent-period 10ms >&2 112 | done 113 | } 114 | 115 | minio server "$SCRATCH/miniodata" >&2 & 116 | minio_pid="$!" 117 | s3_plugin_supervisor & 118 | s3_plugin_supervisor_pid="$!" 119 | # give both some time to start. 120 | sleep 1 121 | 122 | # Configure the test minio instance. 123 | rm -rf "$SCRATCH/mc" 124 | mc config host add thrashminio http://127.0.0.1:9000 thrash_access thrash_secret >&2 125 | 126 | # Outer loop is to control the size of the gc set. 127 | for i in $(seq 50) 128 | do 129 | 130 | bupstash rm --allow-many thrash_test=yes >&2 131 | bupstash gc >&2 132 | 133 | background_workers=() 134 | # At least enough workers so the scheduler hopefully 135 | # interleaves them in interesting ways. 136 | for j in $(seq $N_WORKERS) 137 | do 138 | thrash_worker & 139 | background_workers+=($!) 140 | done 141 | 142 | touch "$SCRATCH/want_chaos" 143 | bupstash_serve_chaos_worker & 144 | background_workers+=($!) 145 | bupstash_s3_plugin_chaos_worker & 146 | background_workers+=($!) 147 | 148 | wait ${background_workers[@]} 149 | 150 | for id in $(bupstash list -q --format=jsonl | jq -r .id) 151 | do 152 | bupstash get -q id=$id > /dev/null 153 | if test "$?" != 0 154 | then 155 | inc_result "get-corrupt" 156 | fi 157 | done 158 | 159 | if sqlite3 "$SCRATCH/thrash.summary" 'select * from thrash_results;' | grep -q 'get\-corrupt' 160 | then 161 | echo "invariant check failed, 'get' should never return a corrupt result" 162 | exit 1 163 | fi 164 | 165 | done 166 | 167 | if test $(sqlite3 "$SCRATCH"/thrash.summary "select count from thrash_results where name='put-ok';") = "" 168 | then 169 | echo "at least one 'put' operation must succeed for the test to pass." 170 | exit 1 171 | fi 172 | 173 | if test $(sqlite3 "$SCRATCH"/thrash.summary "select count from thrash_results where name='gc-ok';") = "" 174 | then 175 | echo "at least one 'gc' operation must succeed for the test to pass." 176 | exit 1 177 | fi 178 | 179 | # Cleanup any remains 180 | kill $s3_plugin_supervisor_pid 181 | kill $minio_pid 182 | 183 | # XXX hacky, but cleanup any bupstash-s3-storage instances that might have been restarted by the supervisor. 184 | sleep 0.5 185 | killall bupstash-s3-storage 186 | wait 187 | 188 | trap - EXIT 189 | 190 | set +x 191 | 192 | 193 | echo "test results..." 194 | sqlite3 "$SCRATCH/thrash.summary" 'select * from thrash_results order by name;' 195 | echo "test passed" 196 | 197 | -------------------------------------------------------------------------------- /csrc/cksumvfs/cksumvfs_sqlite_version_number.c: -------------------------------------------------------------------------------- 1 | #include "./sqlite3.h" 2 | 3 | int cksumvfs_sqlite_version_number(void) { 4 | return SQLITE_VERSION_NUMBER; 5 | } -------------------------------------------------------------------------------- /csrc/cksumvfs/upgrade.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -ex 3 | 4 | SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) 5 | CUR_DIR=$(pwd -P) 6 | echo "$SCRIPT_DIR" 7 | cd "$SCRIPT_DIR" || { echo "fatal error" >&2; exit 1; } 8 | 9 | SQLITE_VERSION=3350400 10 | YEAR=2021 11 | 12 | # Download and extract amalgamation 13 | SQLITE=sqlite-amalgamation-$SQLITE_VERSION 14 | curl -O https://sqlite.org/$YEAR/$SQLITE.zip 15 | unzip -p "$SQLITE.zip" "$SQLITE/sqlite3.h" > "$SCRIPT_DIR/sqlite3.h" 16 | rm -f "$SQLITE.zip" 17 | 18 | # Download and extract extra extensions 19 | SQLITE=sqlite-src-$SQLITE_VERSION 20 | curl -O https://sqlite.org/$YEAR/$SQLITE.zip 21 | unzip -p "$SQLITE.zip" "$SQLITE/ext/misc/cksumvfs.c" > "$SCRIPT_DIR/cksumvfs.c" 22 | rm -f "$SQLITE.zip" 23 | -------------------------------------------------------------------------------- /csrc/sodium-bindings.h: -------------------------------------------------------------------------------- 1 | #include -------------------------------------------------------------------------------- /doc/cli/diff.txt: -------------------------------------------------------------------------------- 1 | bupstash diff [OPTIONS] QUERY1 :: QUERY2 2 | 3 | Diff two snapshots. 4 | 5 | See the bupstash manual for a detailed description of diff semantics 6 | 7 | Examples: 8 | $ bupstash diff id="8f7*" :: id="def*" 9 | $ bupstash diff --relaxed id="57de*" :: ./files -------------------------------------------------------------------------------- /doc/cli/exec-with-locks.txt: -------------------------------------------------------------------------------- 1 | bupstash exec-with-locks -r REPO COMMAND... 2 | 3 | Execute COMMAND with exclusive locks held on the bupstash repository, 4 | preventing concurrent modification to the repository for the duration of the command. 5 | 6 | Examples: 7 | $ bupstash exec-with-locks -r ./repo rsync ... -------------------------------------------------------------------------------- /doc/cli/gc.txt: -------------------------------------------------------------------------------- 1 | bupstash gc [OPTIONS] 2 | 3 | Run the garbage collector against a repository, removing 4 | unreferenced data and freeing disk space. 5 | 6 | Concurrent operations may be delayed while garbage collection 7 | is in progress. 8 | 9 | Examples: 10 | $ bupstash gc 11 | $ bupstash gc -r ssh://$server/repository -------------------------------------------------------------------------------- /doc/cli/get.txt: -------------------------------------------------------------------------------- 1 | bupstash get [OPTIONS] QUERY 2 | 3 | Get data from a bupstash repository matching a given query. 4 | 5 | See the bupstash user manual for a description of the query language. 6 | 7 | Examples: 8 | $ bupstash get id=8f701cc8c03e1fe23598e95e7b87cb1c > out.tar 9 | $ bupstash get id=1b89* > out.data 10 | $ bupstash get name=foo.tar | tar -xvf - 11 | $ bupstash get --pick dir/my-file.txt id=$id 12 | $ bupstash get --pick sub-dir id=$id | tar -xvf - -------------------------------------------------------------------------------- /doc/cli/help.txt: -------------------------------------------------------------------------------- 1 | bupstash 2 | 3 | Make efficient encrypted backups easily by running 4 | one of the subcommands below. 5 | 6 | Subcommands: 7 | 8 | init Initialize a bupstash repository. 9 | new-key Create a new key capable of all operations. 10 | new-sub-key Derive a sub key for a subset of operations. 11 | put Put a new item into a repository. 12 | list List items in a repository. 13 | list-contents List contents of a directory snapshot. 14 | get Get data from a repository. 15 | restore Restore a snapshot to a local directory. 16 | rm/remove Remove items from a repository. 17 | recover-removed Recover items pending garbage collection. 18 | gc Delete unreferenced data and free space. 19 | sync Sync items between repositories. 20 | exec-with-locks Exec a command with a locked repository. 21 | version Print the version and exit. 22 | help Print this message. 23 | 24 | 25 | For subcommand specific help, run 'bupstash CMD --help'. 26 | 27 | For comprehensive documentation check the man pages. -------------------------------------------------------------------------------- /doc/cli/init.txt: -------------------------------------------------------------------------------- 1 | bupstash init [OPTIONS] 2 | 3 | Initialize a bupstash repository. 4 | 5 | Examples: 6 | $ export BUPSTASH_REPOSITORY=./my-repository 7 | $ bupstash init -------------------------------------------------------------------------------- /doc/cli/list-contents.txt: -------------------------------------------------------------------------------- 1 | bupstash list-contents [OPTIONS] QUERY 2 | 3 | List contents of a bupstash directory snapshot. 4 | 5 | See the bupstash user manual for a description of the query language. 6 | 7 | See the bupstash manual for a description of the listing format. 8 | 9 | Examples: 10 | $ bupstash list-contents id=8f701cc8c03e1fe23598e95e7b87cb1c -------------------------------------------------------------------------------- /doc/cli/list.txt: -------------------------------------------------------------------------------- 1 | bupstash list [OPTIONS] [QUERY] 2 | 3 | List bupstash items that match a given query. 4 | 5 | See the bupstash user manual for a description of the query 6 | language and output formats. 7 | 8 | Examples: 9 | $ bupstash list 10 | $ bupstash list id="1b89*" 11 | $ bupstash list --format=jsonl1 name="*.tar" or name="*.sql" -------------------------------------------------------------------------------- /doc/cli/new-key.txt: -------------------------------------------------------------------------------- 1 | bupstash new-key [OPTIONS] 2 | 3 | Create a new key, readable by only the creating user. 4 | 5 | Keep this key private and secure as it is needed to read any 6 | data put in a repository using this key. 7 | 8 | Examples: 9 | $ bupstash new-key -o ./backups.key -------------------------------------------------------------------------------- /doc/cli/new-sub-key.txt: -------------------------------------------------------------------------------- 1 | bupstash new-sub-key [OPTIONS] 2 | 3 | Generate a bupstash sub key with lesser encryption and decryption capabilities. 4 | 5 | *NOTE*: decryption differs from access - An attacker may still 6 | delete data by simply deleting the items or files they have access 7 | to. Use `bupstash serve` access controls to restrict what operations 8 | a user can perform and prevent unauthorized deletion of data. 9 | 10 | Examples: 11 | $ bupstash new-key -o ./backups.key 12 | $ bupstash new-sub-key --put -k ./backups.key -o ./put.key 13 | $ bupstash new-sub-key --list -k ./backups.key -o ./list.key 14 | $ bupstash new-sub-key --list-contents -k ./backups.key -o ./list.key -------------------------------------------------------------------------------- /doc/cli/put.txt: -------------------------------------------------------------------------------- 1 | bupstash put [OPTIONS] TAGS... [::] PATHS... 2 | bupstash put -e [OPTIONS] TAGS... [::] CMD... 3 | 4 | `bupstash put` encrypts a file, directory, or command output and stores it 5 | in a bupstash repository such that only the primary backup key can decrypt it. 6 | 7 | For single files the contents are saved directly, for multiple files the data 8 | is saved such that is can be retrieved as a tar archive, and for commands the 9 | command is executed and stdout is sent to the repository. 10 | 11 | To do incremental puts you must give each job you intend to run 12 | repeatedly it's own send log. This can dramatically reduce the amount of 13 | disk reading and network usage required to make a snapshot. 14 | 15 | Examples: 16 | $ export BUPSTASH_REPOSITORY=$HOME/bupstash-repo 17 | $ export BUPSTASH_KEY=./my-secret-bupstash.key 18 | 19 | # To avoid resending data needlessly during backups, create job specific send log. 20 | $ bupstash put --send-log /root/backup-sendlog ./to-backup 21 | 22 | # Specify arbitrary metadata as KEY=VALUE before. 23 | $ bupstash put host=$(hostname) ./file.txt 24 | 25 | # Multiple files and directories can be saved at once. 26 | $ bupstash put ./file1.txt ./file2.txt ./some-dir 27 | 28 | # Use --exec to save the output of commands. 29 | $ bupstash put --exec name=files.tar tar -C ./files -cvf - . 30 | 31 | # Put from stdin (does not check error codes). 32 | $ echo data | bupstash put - 33 | 34 | # You can use '::' to terminate the tag list. 35 | $ bupstash put :: foo=bar.txt 36 | -------------------------------------------------------------------------------- /doc/cli/recover-removed.txt: -------------------------------------------------------------------------------- 1 | bupstash recover-removed [OPTIONS] 2 | 3 | Recover repository items that were removed, but that have not 4 | yet been deleted via garbage collection. 5 | 6 | Examples: 7 | $ bupstash recover-removed -r ./backups -------------------------------------------------------------------------------- /doc/cli/restore.txt: -------------------------------------------------------------------------------- 1 | bupstash restore [OPTIONS] --into DIR QUERY 2 | 3 | Efficiently restore the contents of a snapshot into a local directory, 4 | deleting any extra files that already existed in that directory. 5 | 6 | Examples: 7 | $ bupstash restore --into ./dir id="8f7*" 8 | $ bupstash restore --pick sub-dir --into ./dir id="8f7*" 9 | $ bupstash restore --ownership --into ./dir id="8f7*" -------------------------------------------------------------------------------- /doc/cli/rm.txt: -------------------------------------------------------------------------------- 1 | bupstash get [OPTIONS] QUERY 2 | 3 | Remove items from a bupstash repository matching a given query. 4 | 5 | See the bupstash user manual for a description of the query language. 6 | 7 | Examples: 8 | $ bupstash rm id="8f8*" 9 | $ bupstash rm name=backup.tar and older-than 30d -------------------------------------------------------------------------------- /doc/cli/serve.txt: -------------------------------------------------------------------------------- 1 | bupstash serve [OPTIONS] REPOSITORY 2 | 3 | Run a bupstash server that speaks the bupstash 4 | protocol over stdin/stdout. Has access controls 5 | that can be configured via flags, and mandated via mechanisms 6 | such as ssh force commands. See the user manual for access 7 | control documentation and examples. 8 | 9 | Examples: 10 | $ export BUPSTASH_REPOSITORY_COMMAND="ssh $SERVER bupstash serve /data/repository" 11 | $ bupstash list -------------------------------------------------------------------------------- /doc/cli/sync.txt: -------------------------------------------------------------------------------- 1 | bupstash sync --to $REPO [OPTIONS] [QUERY...] 2 | 3 | Synchronize items and data to another bupstash repository. 4 | 5 | If a query is not specified, syncs all repository items. 6 | 7 | Examples: 8 | # Copy all items from one repository to another. 9 | bupstash sync --to ssh://$SERVER id=$id 10 | 11 | # Perform a backup then sync it to a different server. 12 | $ id="$(bupstash put ./files) 13 | $ bupstash sync --to ssh://$SERVER id=$id -------------------------------------------------------------------------------- /doc/cli/version.txt: -------------------------------------------------------------------------------- 1 | bupstash version 2 | 3 | Print the bupstash version to stdout and exit. -------------------------------------------------------------------------------- /doc/guides/Filesystem Backups.md: -------------------------------------------------------------------------------- 1 | # Filesystem Backups 2 | 3 | This guide will cover how to use bupstash for system backups, it is divided into 4 | sections which cover different use cases. 5 | 6 | For all of the guides the shown commands can be put into a cron job or other tool for running background tasks 7 | for automated backups. 8 | 9 | The guides below can also be combined with remote repositories with access controls to allow 'upload only' for secure deployments. 10 | 11 | ## Simple directory snapshots 12 | 13 | The simplest use of bupstash is to simply snapshot your home directory to a repository on an external drive. 14 | 15 | Create the file backup.sh: 16 | 17 | ``` 18 | set -eu 19 | export BUPSTASH_KEY=/root/backup-put.key 20 | export BUPSTASH_REPOSITORY=/mnt/external-drive/bupstash-backups 21 | 22 | bupstash put \ 23 | --send-log /root/backup.sendlog \ 24 | --exclude "/home/*/.cache" \ 25 | hostname=$(hostname) \ 26 | name=home-backup.tar \ 27 | /home/ 28 | ``` 29 | 30 | Then running a backup is as simple as: 31 | 32 | ``` 33 | $ sudo sh ./backup.sh 34 | ``` 35 | 36 | Now to restore files or sub directories we can use `bupstash get`: 37 | 38 | ``` 39 | $ bupstash list name=home-backup.tar 40 | ... 41 | id="aa87fdbc72241f363568bbb888c0834e" name="backup.tar" timestamp="2020-07-24 15:25:00" 42 | ... 43 | $ bupstash get id="aa8*" | tar -C restore ... 44 | $ bupstash get --pick some/sub-dir id="aa8*" | tar -C restore ... 45 | $ bupstash get --pick some/file.txt id="aa8*" > file.txt 46 | ``` 47 | 48 | Some points to consider about this snapshot method: 49 | 50 | - The use of --exclude to omit the user cache directories, we can save a lot of space in backups by ignoring things 51 | like out web browser cache, at the expense of less complete backups. You can specify --exclude more than once to 52 | skip more than one directory or file. See the man page for more details. 53 | 54 | - Bupstash incremental backups work best when the send log file used was last used for a snapshot of the same or similar input data. 55 | Manually specifying a send log path with --send-log ensures subsequent similar snapshots use the same send log, often dramatically increasing efficiency. 56 | 57 | - This method of backup is simple, but does not account for files being modified during upload. The simplest way to to think about this problem, is files will be changing while 58 | the backup is uploading, so you might capture different directories at different points in time. 59 | 60 | - In this command we are also using a 'put' key (see the offline keys guide) so that backups cannot be decrypted even if someone was to steal your external drive. 61 | 62 | 63 | ## Btrfs directory snapshots 64 | 65 | If you are running linux with btrfs, (or any other operating system + filesystem that supports snapshots), you can 66 | use this to get stable snapshots that won't be modified during upload. 67 | 68 | 69 | Create the file backup.sh: 70 | 71 | ``` 72 | set -eu 73 | export BUPSTASH_KEY=/root/backup-put.key 74 | export BUPSTASH_REPOSITORY=/mnt/external-drive/bupstash-backups 75 | 76 | 77 | if test -e /rootsnap 78 | then 79 | echo "removing snapshot, it already existed." 80 | btrfs subvolume delete /rootsnap 81 | fi 82 | btrfs subvolume snapshot -r / /rootsnap > /dev/null 83 | 84 | bupstash put \ 85 | --send-log /root/backup.sendlog \ 86 | --exclude "/home/*/.cache" \ 87 | hostname=$(hostname) \ 88 | name=backup.tar \ 89 | /rootsnap 90 | 91 | btrfs subvolume delete /rootsnap > /dev/null 92 | ``` 93 | 94 | Then running a backup is as simple as: 95 | 96 | ``` 97 | $ sudo sh ./backup.sh 98 | ``` 99 | 100 | Filesystem enabled snapshots do not suffer from 'time smear'. All points about '--send-log', '--exclude' and backup restore from simple directory snapshots also apply to this snapshot method. 101 | 102 | 103 | ## Btrfs send snapshots 104 | 105 | 106 | If you are running linux with btrfs, (or any other operating system + filesystem that supports exporting directories as a stream), you can 107 | directly save the output of such a command into a bupstash repository. 108 | 109 | 110 | Create the file backup.sh: 111 | 112 | ``` 113 | set -eu 114 | export BUPSTASH_KEY=/root/backup-put.key 115 | export BUPSTASH_REPOSITORY=/mnt/external-drive/bupstash-backups 116 | 117 | 118 | if test -e /rootsnap 119 | then 120 | echo "removing snapshot, it already existed." 121 | btrfs subvolume delete /rootsnap 122 | fi 123 | 124 | btrfs subvolume snapshot -r / /rootsnap > /dev/null 125 | 126 | bupstash put \ 127 | --exec 128 | --send-log /root/backup.sendlog \ 129 | hostname=$(hostname) \ 130 | name=backup.btrfs \ 131 | btrfs send /rootsnap 132 | 133 | btrfs subvolume delete /rootsnap > /dev/null 134 | ``` 135 | Then running a backup is as simple as: 136 | 137 | ``` 138 | $ sudo sh ./backup.sh 139 | ``` 140 | 141 | Restoration of the backup is done via the `btrfs receive` command: 142 | 143 | ``` 144 | $ bupstash get name=backup.btrfs | sudo btrfs receive ./restore 145 | ``` 146 | -------------------------------------------------------------------------------- /doc/guides/Getting Started.md: -------------------------------------------------------------------------------- 1 | # Getting started 2 | 3 | bupstash is an easy to use tool for making encrypted space efficient backups. 4 | It is special because it is open source, and stores all data and metadata in an encrypted 5 | and deduplicated format. 6 | 7 | Typical users of bupstash are people familiar with the command line, such as software developers, 8 | system administrators and other technical users. 9 | 10 | This guide covers installation and basic usage of bupstash. 11 | 12 | ## Install bupstash 13 | 14 | ### Precompiled version 15 | 16 | Head to the [releases page](https://github.com/andrewchambers/bupstash/releases) and download a 17 | build for for your platform. Simply extract the archive and add the single bupstash binary to 18 | your PATH. 19 | 20 | ### Via rust and cargo 21 | 22 | If you have a rust compiler installed, you can install the latest release 23 | using cargo (the rust programming language package manager). 24 | 25 | Install `libsodium-dev` and `pkg-config` for your platform, and run: 26 | 27 | 28 | ``` 29 | $ git clone https://github.com/andrewchambers/bupstash 30 | $ cd bupstash 31 | $ cargo build --release 32 | $ cp ./target/release/bupstash "$INSTALL_DIR" 33 | ``` 34 | 35 | or simply: 36 | 37 | ``` 38 | $ cargo install bupstash 39 | $ cp "$HOME/.cargo/bin/bupstash" "$INSTALL_DIR" 40 | ``` 41 | 42 | ## Initializing your repository 43 | 44 | First we must initialize a repository to save data into. We do this with the `bupstash init` command. 45 | 46 | To initialize a local repository run: 47 | ``` 48 | export BUPSTASH_REPOSITORY="$(pwd)/bupstash-repo" 49 | $ bupstash init 50 | ``` 51 | 52 | For remote repositories, install bupstash on both the local and the remote machine and run the following: 53 | 54 | ``` 55 | export BUPSTASH_REPOSITORY=ssh://$SERVER/home/me/bupstash-repo 56 | $ bupstash init 57 | ``` 58 | 59 | Note that you can avoid some retyping by setting certain environment variables (e.g. 60 | BUPSTASH_REPOSITORY) in your .bashrc or other equivalent file. 61 | 62 | ## Generating an encryption key 63 | 64 | All data stored in a bupstash repository is encrypted, so first we need to generate an encryption key. 65 | 66 | ``` 67 | $ bupstash new-key -o backups.key 68 | ``` 69 | 70 | This key can be used to make, view and edit encrypted snapshots. 71 | KEEP THIS KEY SAFE, if you lose it, you will have lost all your backups made with this key. 72 | 73 | Later sections will explain how to create and use secure offline keys. 74 | 75 | ## Making snapshots 76 | 77 | First we must tell bupstash which encryption key to use. 78 | ``` 79 | export BUPSTASH_KEY=$(pwd)/backups.key 80 | ``` 81 | 82 | Now we can start making snapshots, here we save a file: 83 | 84 | ``` 85 | $ bupstash put ./my-data.txt 86 | 811a0f5c61656b5f494a014ce46d3549 87 | ``` 88 | 89 | The printed text is the id of this put, which can be used 90 | to retrieve the data again with a query: 91 | 92 | ``` 93 | $ bupstash get id="811*" 94 | your data! 95 | ``` 96 | 97 | We can also save a directory: 98 | 99 | ``` 100 | $ bupstash put ./my-dir 101 | ... 102 | ``` 103 | 104 | Directories are automatically converted to tarballs, which can be extracted with the tar command: 105 | 106 | ``` 107 | $ mkdir restored 108 | $ bupstash get name=my-dir.tar | tar -C ./restored -xvf - 109 | ``` 110 | 111 | We can also save the output of commands: 112 | 113 | ``` 114 | $ echo hello | bupstash put - 115 | 116 | # This form is able to detect command failures. 117 | $ bupstash put --exec echo hello 118 | ... 119 | ``` 120 | 121 | Note that bupstash automatically applies compression and deduplicates your data so you 122 | do not need to do this manually. 123 | 124 | ## Listing snapshots 125 | 126 | ``` 127 | $ bupstash list 128 | id="dbca49b072c0f94b9e72bf81e7716ff9" name="backup.tar" size="10.23MB" timestamp="2020/08/03 15:47:32" 129 | ... 130 | ``` 131 | 132 | We can do more sophisticated queries when we list: 133 | 134 | ``` 135 | $ bupstash list timestamp="2020/*" 136 | ... 137 | $ bupstash list name=backup.tar and older-than 7d 138 | $ bupstash list newer-than 1h 139 | ... 140 | ``` 141 | 142 | For a full description of the query language see the query language manual page. 143 | 144 | ## Snapshot tags 145 | 146 | When we make snapshots, we can add our own arbitrary tags in addition to the default tags: 147 | 148 | ``` 149 | $ bupstash put mykey=value ./my-important-files 150 | $ bupstash list mykey=value 151 | ``` 152 | 153 | ## Listing and fetching snapshots 154 | 155 | Once we have directory snapshots, we can list the contents using bupstash `list-contents`: 156 | 157 | ``` 158 | $ bupstash list-contents id=$id 159 | drwxr-xr-x 0B 2020/10/30 13:32:04 . 160 | -rw-r--r-- 9B 2020/10/30 13:32:04 data.txt 161 | ... 162 | ``` 163 | 164 | We can efficiently restore a snapshot to a local directory only downloading the files that are missing: 165 | ``` 166 | $ mkdir restore-dir 167 | $ bupstash restore --into ./restore-dir id=$id 168 | ``` 169 | 170 | We can also export individual files or directories as a tarballs: 171 | 172 | ``` 173 | $ bupstash get --pick data.txt id=$id 174 | my data! 175 | $ bupstash get --pick subdir id=$id | tar -C ./subdir-restore -xvf - 176 | $ bupstash get id=$id | tar -C ./restore -xvf - 177 | ``` 178 | 179 | ## Removing snapshots 180 | 181 | We can remove snapshots via the same query language and the `bupstash rm` command: 182 | 183 | ``` 184 | $ bupstash rm older-than 90d and name=backup.tar and host=my-server 185 | ``` 186 | 187 | Removing a snapshot does not immediately reclaim disk space. To do that, you must run the 188 | garbage collector. 189 | 190 | ``` 191 | $ bupstash gc 192 | ``` 193 | 194 | # Learning more 195 | 196 | Feel free to browse the manual pages for each command to get a feel for how to interact and administer with your bupstash backups. -------------------------------------------------------------------------------- /doc/guides/Network Filesystems.md: -------------------------------------------------------------------------------- 1 | # Network Filesystems 2 | 3 | Bupstash relies on fcntl style POSIX file locking across multiple files to work in a concurrent context. Do not use bupstash with any network filesystem that does not support fcntl style locking unless you understand the potential consequences of such a decision. 4 | 5 | When using bupstash with a remote repository it is always recommended to use bupstash over ssh by setting BUPSTASH_REPOSITORY to an `ssh://` 6 | style URL. This mode is safe for concurrent use, faster and better in the majority of use cases. 7 | 8 | For information on specific network filesystem configurations see the sections below. 9 | 10 | ## NFSv3/NFSv4 11 | 12 | We do no recommend using bupstash over NFSv3 in any configuration. 13 | 14 | If you are stubborn, ensure locking is enabled or only access the repository from one bupstash process as a time. 15 | 16 | NFSv4 has a more sound network locking protocol, so given the choice between NFSv3 and NFSv4 always 17 | choose NFSv4 with locking enabled. 18 | 19 | ## CephFS 20 | 21 | Using bupstash over Cephfs is untested so is currently not recommended. 22 | 23 | ## SSHFS 24 | 25 | Currently we do no recommend using bupstash over sshfs in any configuration due to the lack 26 | of file lock support across multiple machines. 27 | 28 | If you have sshfs access, you almost certainly have the ability to set BUPSTASH_REPOSITORY 29 | to an `ssh://` style url which enables safe concurrent repository access in all situations. 30 | 31 | ## 9P2000.L 32 | 33 | Uncached 9P2000.L mounts of repositories exported via the diod 9P2000.L server will likely 34 | work without issue, though use at your own risk. 35 | 36 | 37 | -------------------------------------------------------------------------------- /doc/guides/Password Protected Keys.md: -------------------------------------------------------------------------------- 1 | # Password Protected Keys 2 | 3 | Bupstash allows users to fetch the key to use via arbitrary commands by setting the BUPSTASH_KEY_COMMAND environment variable. In this guide we will configure bupstash to decrypt a password protected key file with gpg. 4 | 5 | First create a key: 6 | 7 | ``` 8 | $ bupstash new-key -o demo.key 9 | ``` 10 | 11 | Next we password protect the key using gpg: 12 | 13 | ``` 14 | $ gpg --symmetric demo.key 15 | ``` 16 | 17 | gpg will ask you for a password using your configured pin entry program and then create demo.key.gpg. 18 | 19 | Verify you can decrypt the key: 20 | 21 | ``` 22 | $ gpg --decrypt demo.key.gpg 23 | ... 24 | -----BEGIN BUPSTASH KEY----- 25 | ... 26 | -----END BUPSTASH KEY----- 27 | ``` 28 | 29 | Now we can remove the unencrypted key: 30 | 31 | ``` 32 | $ shred demo.key 33 | ``` 34 | 35 | Finally, we can tell bupstash to use this encrypted key, to do this we setup the environment variable BUPSTASH_KEY_COMMAND: 36 | 37 | ``` 38 | $ export BUPSTASH_KEY_COMMAND="gpg -q --decrypt $(pwd)/demo.key.gpg" 39 | ``` 40 | 41 | Now whenever bupstash requires a key, it will ask gpg for it, and gpg will ask for the password. 42 | 43 | 44 | ``` 45 | $ bupstash list 46 | 47 | ┌──────────────────────────────────────────────────────┐ 48 | │ Enter passphrase │ 49 | │ │ 50 | │ │ 51 | │ Passphrase: ________________________________________ │ 52 | │ │ 53 | │ │ 54 | └──────────────────────────────────────────────────────┘ 55 | 56 | ``` 57 | 58 | If you have gpg-agent configured, the password does not need to be re-entered until gpg-agent expires the password 59 | entry. 60 | 61 | Remember that BUPSTASH_KEY_COMMAND can be set to run any command of your choosing, giving great flexibility when it comes to protecting sensitive bupstash keys. 62 | 63 | Finally, don't forget to check out our other guides and manuals to learn about sub-keys that do not have the ability to decrypt data after it is sent. Sub-keys allow us to avoid putting our sensitive decryption keys 64 | on devices making backups. -------------------------------------------------------------------------------- /doc/guides/Remote Access Controls.md: -------------------------------------------------------------------------------- 1 | # Remote access controls 2 | 3 | When designing a backup plan, we must remember that if a malicious agent compromises your computer, 4 | it may be able to delete your backups too. To solve this issue bupstash supports access controls on remote repositories 5 | that can be configured on a per ssh key basis. To do this, we can utilize ssh force commands to restrict a backup client to 6 | only run an instance of `bupstash serve` that has limited permissions. 7 | 8 | The following assumes you have a backup server with a user called `backups` that has openssh sshd running, 9 | and a client computer with an ssh client installed. 10 | 11 | In an your sshd config file in your server add the line: 12 | 13 | ``` 14 | Match User backups 15 | ForceCommand "/bin/bupstash-put-force-command.sh" 16 | ``` 17 | 18 | Create /bin/bupstash-put-force-command.sh on your server: 19 | 20 | ``` 21 | $ echo 'exec bupstash serve --allow-put /home/backups/bupstash-backups' > bupstash-put-force-command.sh 22 | $ sudo cp bupstash-put-force-command.sh /bin/bupstash-put-force-command.sh 23 | $ sudo chown root:root /bin/bupstash-put-force-command.sh 24 | $ sudo chmod +x /bin/bupstash-put-force-command.sh 25 | ``` 26 | 27 | Next add an ssh key you intend to use for backups to `$SERVER/home/backups/.ssh/authorized_keys`, 28 | such that the user sending backups can connect to the remote server using ssh key based login. 29 | 30 | Now when the backups user attempts to run a backup via ssh they are only able to 31 | run the bupstash serve command with a hard coded set of permissions and 32 | repository path. 33 | 34 | Now the client is only authorized to create new backups, but not list or remove them: 35 | 36 | ``` 37 | export BUPSTASH_REPOSITORY="ssh://backups@$SERVER/backups" 38 | $ bupstash put ./files 39 | ... 40 | $ bupstash list 41 | server has disabled query and search for this client 42 | ``` 43 | 44 | The `bupstash serve` command also supports allowing fetching data, entry removal and garbage collection. With these 45 | options we can create a backup plan where clients can create new backups, and an administrator is able to cycle old backups 46 | from the secure machine. 47 | -------------------------------------------------------------------------------- /doc/guides/Secure Offline Keys.md: -------------------------------------------------------------------------------- 1 | # Secure offline keys 2 | 3 | In a secure computer systems we do not want our decryption keys stored online where they could 4 | inadvertently be leaked. To support this use case, bupstash allows creating keys which do not support 5 | decrypting backups. Bupstash allows users to create 'put keys' that can only create new backups, or 'list keys' that can list backups, but not decrypt data. 6 | 7 | Using a 'put key' lets you create backups without exposing your decryption key, while using a 'list key' 8 | let's the key rotate old backups based on queries, but without exposing the sensitive decryption key. This 9 | guide will show how to create use these key types. 10 | 11 | 12 | ## Generating put and list keys 13 | 14 | Generating and using these keys is simple, we use bupstash to create a new 'put key' or 'list key' 15 | that is derived from a regular bupstash key using the `new-sub-key`command. 16 | 17 | ``` 18 | $ bupstash new-sub-key -k ./backups.key -o put-backups.key --put 19 | $ bupstash new-sub-key -k ./backups.key -o list-backups.key --list 20 | ``` 21 | 22 | ## Using put and list keys 23 | 24 | Using these keys is the same as a regular key: 25 | 26 | ``` 27 | $ bupstash put --key ./put-backups.key ./data.txt 28 | $ bupstash list --key ./list-backups.key 29 | ``` 30 | 31 | With the important difference that these keys cannot decrypt the contents of the snapshots. 32 | Only the original key is able to decrypt these snapshots. 33 | 34 | ``` 35 | $ bupstash get --key ./put-backups.key id=$id 36 | bupstash get: provided key is not a decryption key 37 | 38 | $ bupstash get --key ./list-backups.key id=$id 39 | bupstash get: provided key is not a decryption key 40 | 41 | $ bupstash get --key ./backups.key id=$id 42 | data... 43 | ``` 44 | 45 | We can now put the main key into secure offline storage for use in case of emergency, 46 | but continue to make and administer our backups using the put key and list key. 47 | 48 | Neither the storage server, nor the devices uploading new snapshots 49 | have access to your existing snapshots. 50 | 51 | Note that we recommend creating a new put key for every backup client if you have a shared bupstash 52 | repository. -------------------------------------------------------------------------------- /doc/man/bupstash-authors.7.md: -------------------------------------------------------------------------------- 1 | bupstash-authors(7) 2 | =================== 3 | 4 | ## SYNOPSIS 5 | 6 | The bupstash authors, how to contact them, and their bupstash specific PGP public keys. 7 | 8 | ## Andrew Chambers 9 | 10 | ### About 11 | 12 | Andrew is an an experienced programmer with a masters degree in computer systems engineering 13 | from the University of Auckland in New Zealand. He is the creator and primary author of bupstash 14 | and bupstash.io. 15 | 16 | ### Contact 17 | 18 | Email: ac@bupstash.io 19 | 20 | ### PGP Key 21 | 22 | ``` 23 | -----BEGIN PGP PUBLIC KEY BLOCK----- 24 | 25 | mDMEYvRmMRYJKwYBBAHaRw8BAQdAzXZnbofNVgtBglzMSqu0cVOgaoLEyAy6v6DX 26 | mYSxwA+IywQfFgoAfQWCYvRmMQMLCQcJEDVGG/kcrtZ+RxQAAAAAAB4AIHNhbHRA 27 | bm90YXRpb25zLnNlcXVvaWEtcGdwLm9yZ4F/S6gog/wBVOoLG3K8SaE5HOFe7EbF 28 | Wgrxh1NKhCsCAxUKCAKbAQIeARYhBJzEZHwEQV2W7y9PzTVGG/kcrtZ+AAD9+AEA 29 | l4BParFXikhZH4VXr1hRyfyWtV8hnwPcl9eU+igX/SgBAKYptO879hggMwu+9zJ+ 30 | x25eJW/EWW2i9S3mu5gMpysEtBA8YWNAYnVwc3Rhc2guaW8+iM4EExYKAIAFgmL0 31 | ZjEDCwkHCRA1Rhv5HK7WfkcUAAAAAAAeACBzYWx0QG5vdGF0aW9ucy5zZXF1b2lh 32 | LXBncC5vcmdbuNzNhBGXeid8J/vWVx3oQYgRa15JKnCMuu/ReEOHVwMVCggCmQEC 33 | mwECHgEWIQScxGR8BEFdlu8vT801Rhv5HK7WfgAATK8BAJ7po0Ni9YNmvSDT4EsX 34 | 35MdvVYgtq22LJoDmLt7r+oRAQCKKWPsw13i6GPm4t5ozPqltdAR3xM7uEfGCzWJ 35 | 0VRfBLgzBGL0ZjEWCSsGAQQB2kcPAQEHQBF6vjySpi+RPabf1L9f3zP61m4OS2PP 36 | HUE0QKUnIkDSiQF/BBgWCgExBYJi9GYxCRA1Rhv5HK7WfkcUAAAAAAAeACBzYWx0 37 | QG5vdGF0aW9ucy5zZXF1b2lhLXBncC5vcme3EZvVUbn+0BUFL3iLpG6BVSixbxNQ 38 | XfCT6kj5GDimRQKbAr6gBBkWCgBvBYJi9GYxCRAdZXgjz6ofGEcUAAAAAAAeACBz 39 | YWx0QG5vdGF0aW9ucy5zZXF1b2lhLXBncC5vcmfk/iZ/MouT7uFwBvSNoD+CzhGR 40 | ZLrUFY6Sh2nGnTPQyBYhBB9DxIpE4XsF0ukONx1leCPPqh8YAADeKwEAovED6zTC 41 | 29byTkr/VzFAha/Gtt/MnPJIC6gpCBSTpTABAMC/EGFMGiivTDdZXP1rqCpdFd8s 42 | qF2mkYVgZyCXw/QMFiEEnMRkfARBXZbvL0/NNUYb+Ryu1n4AALNtAQDDWHOD0SEb 43 | QgXyLk0Ho6yRvgCtSUiSHP3LH+iJpSVx5gEAk+SIGKThNW8BSaK1mzdTOu+Dsntn 44 | Oxrp8vd+GE0RCwe4OARi9GYxEgorBgEEAZdVAQUBAQdA80nC4YzCtnH4hN41nQVE 45 | QO0yNZHqycHgFKqxfodbElcDAQgJiMAEGBYKAHIFgmL0ZjEJEDVGG/kcrtZ+RxQA 46 | AAAAAB4AIHNhbHRAbm90YXRpb25zLnNlcXVvaWEtcGdwLm9yZxJBdw1WDQVXRs8c 47 | 8DntJe+SkWMMOk4cM2VrTv7Tk6ssApsMFiEEnMRkfARBXZbvL0/NNUYb+Ryu1n4A 48 | AK6YAP4sk5/dknfAxmnacYR5w6QqUhTRGvPZU6aRGtVk8eFY3AD+LjNttTyy5u9g 49 | JC4NSoiQRLjFSOj8ypYwbSHMrzrRuA8= 50 | =HZqW 51 | -----END PGP PUBLIC KEY BLOCK----- 52 | ``` 53 | 54 | You can cross reference this PGP key at https://keys.openpgp.org/. 55 | 56 | ## SEE ALSO 57 | 58 | bupstash(1) 59 | -------------------------------------------------------------------------------- /doc/man/bupstash-diff.1.md: -------------------------------------------------------------------------------- 1 | bupstash-diff(1) 2 | ================ 3 | 4 | ## SYNOPSIS 5 | 6 | Diff two snapshots printing the summary to stdout. 7 | 8 | `bupstash diff [OPTIONS] QUERY1... :: QUERY2... ` 9 | 10 | ## DESCRIPTION 11 | 12 | `bupstash diff` fetches two snapshot listings from the remote server and compares them, printing 13 | the diff line output to stdout. As a special case, if either query starts with './' or '/' a temporary 14 | listing is created for that local directory for comparison. 15 | 16 | `bupstash diff` is preferred over running traditional `diff` against the output of `bupstash list-contents` 17 | because it takes the full precision of timestamps and also the stored file hash into account when performing 18 | the diff operation. 19 | 20 | Bupstash supports ignoring items in the diff comparison to aid in analysis. Useful exmples are the `--ignore` values 21 | `times` to ignore file modification timestamps and `content` to ignore file size and hash changes. 22 | 23 | ## OUTPUT FORMAT 24 | 25 | Output is consistent with that of `bupstash list-contents`, except each line is 26 | prefixed with either `+` or `-` representing removed or added items respectively. 27 | 28 | Specifying `--format` alters the underlying output format as described by bupstash-list-contents(1). Lines are still prefixed with either `+` or `-` regardless of the output format. 29 | 30 | ## QUERY LANGUAGE 31 | 32 | For full documentation on the query language, see bupstash-query-language(7). 33 | 34 | ## QUERY CACHING 35 | 36 | The diff command uses the same query caching mechanisms as bupstash-list(1), check that page for 37 | more information on the query cache. 38 | 39 | ## OPTIONS 40 | 41 | * -r, --repository REPO: 42 | The repository to connect to, , may be of the form `ssh://$SERVER/$PATH` for 43 | remote repositories if ssh access is configured. If not specified, is set to `BUPSTASH_REPOSITORY`. 44 | 45 | * -k, --key KEY: 46 | Key used to decrypt data and metadata. If not set, defaults 47 | to `BUPSTASH_KEY`. 48 | 49 | * --query-cache PATH: 50 | Path to the query-cache file, defaults to one of the following, in order, provided 51 | the appropriate environment variables are set, `$BUPSTASH_QUERY_CACHE`, 52 | `$XDG_CACHE_HOME/.cache/bupstash/bupstash.qcache` or `$HOME/.cache/bupstash/bupstash.qcache`. 53 | 54 | * -i, --ignore: 55 | Comma separated list of file attributes to ignore in comparisons. 56 | Valid items are 'content,dev,devnos,inode,type,perms,nlink,uid,gid,times,xattrs' 57 | 58 | * --relaxed: 59 | Shortcut for --ignore 'dev,inode,nlink,uid,gid,times,xattrs'. 60 | This option is useful for comparing content without being so concerned with machine specific metadata. 61 | 62 | * --{left,right}-pick PATH: 63 | Perform diff on a sub-directory of the left/right query. 64 | 65 | * --indexer-threads N: 66 | Number of processor threads to use for pipelined parallel file hashing and metadata reads. 67 | Defaults to the number of processors. 68 | 69 | * --xattrs: 70 | Fetch xattrs when indexing a local directories. 71 | 72 | * --format FORMAT: 73 | Set output format to one of the following 'human', 'jsonl'. 74 | 75 | * --utc-timestamps: 76 | Display and search against timestamps in utc time instead of local time. 77 | 78 | * --no-progress: 79 | Suppress progress indicators (Progress indicators are also suppressed when stderr 80 | is not an interactive terminal). 81 | 82 | * -q, --quiet: 83 | Be quiet, implies --no-progress. 84 | 85 | ## ENVIRONMENT 86 | 87 | * BUPSTASH_REPOSITORY: 88 | The repository to connect to. May be of the form `ssh://$SERVER/$PATH` for 89 | remote repositories if ssh access is configured. 90 | 91 | * BUPSTASH_REPOSITORY_COMMAND: 92 | A command to run to connect to an instance of bupstash-serve(1). This 93 | allows more complex connections to the repository for less common use cases. 94 | 95 | * BUPSTASH_KEY: 96 | Path to a primary key that will be used for decrypting data and metadata. 97 | 98 | * BUPSTASH_KEY_COMMAND: 99 | A command to run that must print the key data, can be used instead of BUPSTASH_KEY 100 | to fetch the key from arbitrary locations such as the network or other secret storage. 101 | 102 | * BUPSTASH_QUERY_CACHE: 103 | Path to the query cache file to use. 104 | 105 | ## EXAMPLES 106 | 107 | ### Compare two snapshots by query 108 | 109 | ``` 110 | $ bupstash diff id="14eb*" :: id="57de*" 111 | - -rw-r--r-- 1.1kB hello.txt 112 | + -rw-r--r-- 1.3kB goodbye.txt 113 | ``` 114 | 115 | ### Compare a snapshot and a local directory 116 | 117 | ``` 118 | $ bupstash diff --left-pick files --relaxed id="57de*" :: ./files 119 | ``` 120 | 121 | ## SEE ALSO 122 | 123 | bupstash(1), bupstash-list(1), bupstash-keyfiles(7), bupstash-query-language(7) 124 | -------------------------------------------------------------------------------- /doc/man/bupstash-exec-with-locks.1.md: -------------------------------------------------------------------------------- 1 | bupstash-exec-with-locks(1) 2 | =========================== 3 | 4 | ## SYNOPSIS 5 | 6 | Execute a command with exclusive locks on the repository. 7 | 8 | `bupstash init -r REPO COMMAND...` 9 | 10 | ## DESCRIPTION 11 | 12 | `bupstash exec-with-locks` executes a command with exclusive locks held on 13 | the bupstash repository, preventing concurrent modification to the repository 14 | for the duration of the command. 15 | 16 | ## OPTIONS 17 | 18 | * -r, --repository REPO: 19 | Repository to lock. Defaults to BUPSTASH_REPOSITORY if not set. 20 | Unlike other commands, does not support remote repository access. 21 | 22 | ## ENVIRONMENT 23 | 24 | * BUPSTASH_REPOSITORY: 25 | Repository to lock. 26 | 27 | ## EXAMPLES 28 | 29 | ``` 30 | $ bupstash exec-with-locks -r ./repo -- cp -r ./repo ./repo-backup 31 | ``` 32 | 33 | ## SEE ALSO 34 | 35 | bupstash(1), bupstash-repository(7) 36 | -------------------------------------------------------------------------------- /doc/man/bupstash-gc.1.md: -------------------------------------------------------------------------------- 1 | bupstash-gc(1) 2 | ============== 3 | 4 | ## SYNOPSIS 5 | 6 | Run the garbage collector against a repository, removing 7 | unreferenced data and freeing disk space. 8 | 9 | `bupstash gc [OPTIONS]` 10 | 11 | ## DESCRIPTION 12 | 13 | `bupstash gc` walks the repository contents attempting to find 14 | unreachable data chunks and removing them, potentially reclaiming disk space. 15 | 16 | It is safe to run `bupstash gc` at any time, but some operations (such as bupstash-put(1)) 17 | may temporarily be delayed. 18 | 19 | The garbage collector only relies on unencrypted metadata, so does not need 20 | access to decryption keys to operate. 21 | 22 | ## OPTIONS 23 | 24 | * -r, --repository REPO: 25 | The repository to connect to and operate on. 26 | May be of the form `ssh://$SERVER/$PATH` for 27 | remote repositories if ssh access is configured. 28 | If not specified, is set to `BUPSTASH_REPOSITORY`. 29 | 30 | * --no-progress: 31 | Suppress progress indicators (Progress indicators are also suppressed when stderr 32 | is not an interactive terminal). 33 | 34 | * -q, --quiet: 35 | Be quiet, implies --no-progress. 36 | 37 | ## ENVIRONMENT 38 | 39 | * BUPSTASH_REPOSITORY: 40 | The repository to connect to. May be of the form `ssh://$SERVER/$PATH` for 41 | remote repositories if ssh access is configured. 42 | 43 | * BUPSTASH_REPOSITORY_COMMAND: 44 | A command to run to connect to an instance of bupstash-serve(1). This 45 | allows more complex connections to the repository for less common use cases. 46 | 47 | ## SEE ALSO 48 | 49 | bupstash(1), bupstash-repository(7) 50 | -------------------------------------------------------------------------------- /doc/man/bupstash-get.1.md: -------------------------------------------------------------------------------- 1 | bupstash-get(1) 2 | =============== 3 | 4 | ## SYNOPSIS 5 | 6 | Get data from a bupstash repository. 7 | 8 | `bupstash get [OPTIONS] QUERY... ` 9 | 10 | ## DESCRIPTION 11 | 12 | `bupstash get` fetches and decrypts data stored in a bupstash repository, sending 13 | it to stdout. If the requested item was saved as a directory, the command converts 14 | it into a tar stream. 15 | 16 | The item that is fetched is chosen based on a simple query against the 17 | tags specified when saving data with `bupstash put`. 18 | 19 | ## QUERY LANGUAGE 20 | 21 | For full documentation on the query language, see bupstash-query-language(7). 22 | 23 | ## QUERY CACHING 24 | 25 | The get command uses the same query caching mechanisms as bupstash-list(1), check that page for 26 | more information on the query cache. 27 | 28 | ## SPARSE FILES 29 | 30 | When getting a directory as a tarball sparse files are not treated specially, to restore sparse 31 | files while preserving the file holes use `bupstash restore` instead. 32 | 33 | ## OPTIONS 34 | 35 | * -r, --repository REPO: 36 | The repository to connect to, , may be of the form `ssh://$SERVER/$PATH` for 37 | remote repositories if ssh access is configured. If not specified, is set to `BUPSTASH_REPOSITORY`. 38 | 39 | * -k, --key KEY: 40 | Key that will be used to decrypt data and metadata. If not set, defaults 41 | to `BUPSTASH_KEY`. 42 | 43 | * --pick PATH: 44 | Fetch an individual file or sub-directory from a snapshot. 45 | 46 | * --query-cache PATH: 47 | Path to the query-cache file, defaults to one of the following, in order, provided 48 | the appropriate environment variables are set, `$BUPSTASH_QUERY_CACHE`, 49 | `$XDG_CACHE_HOME/.cache/bupstash/bupstash.qcache` or `$HOME/.cache/bupstash/bupstash.qcache`. 50 | 51 | * --utc-timestamps: 52 | Display and search against timestamps in utc time instead of local time. 53 | 54 | * --no-progress: 55 | Suppress progress indicators (Progress indicators are also suppressed when stderr 56 | is not an interactive terminal). 57 | 58 | * -q, --quiet: 59 | Be quiet, implies --no-progress. 60 | 61 | ## ENVIRONMENT 62 | 63 | * BUPSTASH_REPOSITORY: 64 | The repository to connect to. May be of the form `ssh://$SERVER/$PATH` for 65 | remote repositories if ssh access is configured. 66 | 67 | * BUPSTASH_REPOSITORY_COMMAND: 68 | A command to run to connect to an instance of bupstash-serve(1). This 69 | allows more complex connections to the repository for less common use cases. 70 | 71 | * BUPSTASH_KEY: 72 | Path to the key that will be used for decrypting data and metadata. 73 | 74 | * BUPSTASH_KEY_COMMAND: 75 | A command to run that must print the key data, can be used instead of BUPSTASH_KEY 76 | to fetch the key from arbitrary locations such as the network or other secret storage. 77 | 78 | * BUPSTASH_QUERY_CACHE: 79 | Path to the query cache file to use. 80 | 81 | 82 | ## EXAMPLES 83 | 84 | ### Get an item with a specific id 85 | 86 | ``` 87 | $ bupstash get id=14ebd2073b258b1f55c5bbc889c49db4 > ./data.file 88 | ``` 89 | 90 | ### Get an item by name and timestamp 91 | 92 | ``` 93 | $ bupstash get name=backup.tar and timestamp=2020/19/* > ./restore.tar 94 | ``` 95 | 96 | ### Get a file or sub-tar from a directory snapshot 97 | 98 | ``` 99 | $ bupstash get --pick=path/to/file.txt id=$id 100 | $ bupstash get --pick=path/to/dir id=$id | tar ... 101 | ``` 102 | 103 | ### Get a tarball 104 | 105 | The builtin directory put creates a tarball from a directory, so to extract 106 | it we use tar. 107 | 108 | ``` 109 | # Snapshot a directory. 110 | $ id=$(bupstash put ./data) 111 | 112 | # Fetch the contents of a snapshot and extract the contents with tar 113 | $ mkdir restore 114 | $ bupstash get id=$id | tar -C ./restore -xvf - 115 | ``` 116 | 117 | ## SEE ALSO 118 | 119 | bupstash(1), bupstash-put(1), bupstash-list(1), bupstash-restore(1), bupstash-rm(1), bupstash-keyfiles(7), 120 | bupstash-query-language(7) 121 | -------------------------------------------------------------------------------- /doc/man/bupstash-init.1.md: -------------------------------------------------------------------------------- 1 | bupstash-init(1) 2 | ================ 3 | 4 | ## SYNOPSIS 5 | 6 | Initialize a bupstash repository. 7 | 8 | `bupstash init [OPTIONS]` 9 | 10 | ## DESCRIPTION 11 | 12 | `bupstash init` initializes a repository. If `REPOSITORY` already exists, the command fails. 13 | 14 | Currently it is not recommend using bupstash on a network filesystem. 15 | 16 | For details about the contents of the package store after initialization, see bupstash-repository(7). 17 | 18 | ## OPTIONS 19 | 20 | * -r, --repository REPO: 21 | The repository to connect to. May be of the form `ssh://$SERVER/$PATH` for 22 | remote repositories if ssh access is configured. If not specified, is set to `BUPSTASH_REPOSITORY`. 23 | 24 | * --storage SPEC: 25 | Accepts 'dir' or a json storage specification. 26 | The default storage is 'dir' and stores encrypted data blocks in a 27 | repository local data directory. 28 | 29 | See the storage specs section for supported json specifications and examples. 30 | 31 | ## ENVIRONMENT 32 | 33 | * BUPSTASH_REPOSITORY: 34 | The repository to connect to. May be of the form `ssh://$SERVER/$PATH` for 35 | remote repositories if ssh access is configured. 36 | 37 | * BUPSTASH_REPOSITORY_COMMAND: 38 | A command to run to connect to an instance of bupstash-serve(1). This 39 | allows more complex connections to the repository for less common use cases. 40 | 41 | ## EXAMPLES 42 | 43 | ``` 44 | $ export BUPSTASH_REPOSITORY=./my-repository 45 | $ bupstash init 46 | 47 | $ export BUPSTASH_REPOSITORY=ssh://$SERVER/home/backups/bupstash-backups 48 | $ bupstash init 49 | ``` 50 | 51 | ## JSON STORAGE SPECS 52 | 53 | Each storage specification consists of a type designator and a set 54 | of type specific parameters. 55 | 56 | ### Dir storage 57 | 58 | Dir storage is an alias for `--storage dir` and is generally not needed. 59 | 60 | Example: 61 | 62 | ``` 63 | $ bupstash init --storage '{"Dir" : {}}'' 64 | ``` 65 | 66 | ### External storage 67 | 68 | The external storage engine stores data via an external socket, documentation is pending interface stabilization. 69 | 70 | Example: 71 | 72 | ``` 73 | $ bupstash init --storage '{"External" : {"socket_path" : "/plugin/socket.sock", "path" : "plugin-specific-path"}}'' 74 | ``` 75 | 76 | ## SEE ALSO 77 | 78 | bupstash(1), bupstash-repository(7) 79 | -------------------------------------------------------------------------------- /doc/man/bupstash-keyfiles.7.md: -------------------------------------------------------------------------------- 1 | bupstash-keyfiles(7) 2 | ==================== 3 | 4 | ## SYNOPSIS 5 | 6 | Overview of the bupstash key format. 7 | 8 | ## DESCRIPTION 9 | 10 | Bupstash key files are PEM encoded with one of the following tags: 11 | 12 | - BUPSTASH KEY 13 | - BUPSTASH SUB KEY 14 | 15 | The binary data after decoding the PEM data consists of [bare](https://baremessages.org/) key structures, described below. 16 | 17 | 18 | Binary encoding of keys: 19 | 20 | ``` 21 | 22 | type PrimaryKey { 23 | id: Data<16>, 24 | rollsum_key: Data<32>, 25 | data_hash_key_part_1: Data<32>, 26 | data_hash_key_part_2: Data<32>, 27 | data_pk: Data<32>, 28 | data_sk: Data<32>, 29 | data_psk: Data<32>, 30 | idx_hash_key_part_1: Data<32>, 31 | idx_hash_key_part_2: Data<32>, 32 | idx_pk: Data<32>, 33 | idx_sk: Data<32>, 34 | idx_psk: Data<32>, 35 | metadata_pk: Data<32>, 36 | metadata_sk: Data<32>, 37 | metadata_psk: Data<32>, 38 | } 39 | 40 | type SubKey { 41 | id: Data<16>, 42 | primary_key_id: Data<16>, 43 | rollsum_key: Option>, 44 | data_hash_key_part_1: Option>, 45 | data_hash_key_part_2: Option>, 46 | data_pk: Option>, 47 | data_sk: Option>, 48 | data_psk: Option>, 49 | idx_hash_key_part_1: Option>, 50 | idx_hash_key_part_2: Option>, 51 | idx_pk: Option>, 52 | idx_sk: Option>, 53 | idx_psk: Option>, 54 | metadata_pk: Option>, 55 | metadata_sk: Option>, 56 | metadata_psk: Option>, 57 | } 58 | 59 | 60 | 61 | type Key (PrimaryKey | SubKey) 62 | ``` 63 | 64 | # EXAMPLE 65 | 66 | ``` 67 | $ bupstash new-key -o bupstash.key 68 | $ cat bupstash.key 69 | # This file contains a cryptographic key used by 'bupstash' to encrypt and decrypt data. 70 | # 71 | # key-id=55f32e9db43a1fa3cf65bb3705230898 72 | 73 | -----BEGIN BUPSTASH KEY----- 74 | AFXzLp20Oh+jz2W7NwUjCJgS7VhqV37771UhSRo7LZUIxJCbEZkm27AcYylSL5T2 75 | bxAE4g0rukxRhloPqWT+s1Yr2cPNEHymMzJzm+V4QiDMzE4K4k548bsrMoQMGXc8 76 | LRpNiqVzwRRvibkdf9RdnyYPQ5IlvQN395YJVCfiD6nEOY90plDH20UgiGiNLRYK 77 | xH+MfIoFA1X59UFdto0B/CJW9R98OgQeJNP91NQloFA17mbzhqUvwnHDjatzkxht 78 | CJWScQm6PTwEFEYRSzLTWgpFXjnpF09quzZenw/jEn6nPAyjb11u+Ohe7pkfxacv 79 | QZ5qhBMqJ7+H3VpvOLW7mTmXL3T6gB5W7u2Lg6Y/AwkE 80 | -----END BUPSTASH KEY----- 81 | 82 | ``` 83 | 84 | ## SEE ALSO 85 | 86 | bupstash(1) 87 | -------------------------------------------------------------------------------- /doc/man/bupstash-list-contents.1.md: -------------------------------------------------------------------------------- 1 | bupstash-list-contents(1) 2 | ========================= 3 | 4 | ## SYNOPSIS 5 | 6 | List snapshot contents. 7 | 8 | `bupstash list-contents [OPTIONS] QUERY... ` 9 | 10 | ## DESCRIPTION 11 | 12 | `bupstash list-contents` lists the contents of the item matching the given query. 13 | 14 | Items created by using `bupstash put` on a directory will have an associated index, other items 15 | are not listable. 16 | 17 | ## OUTPUT FORMATS 18 | 19 | ### Human 20 | 21 | When `--format` is set to `human`, `bupstash list-contents` outputs aligned rows consisting of: 22 | 23 | ``` 24 | PERMS SIZE YYYY/MM/DD HH:MM:SS PATH... 25 | ``` 26 | 27 | The included date is the time of the last change to a given file as reported by the 28 | operating system at the time of the snapshot. 29 | 30 | Prefer using one of the versioned machine readable formats when writing scripts. 31 | 32 | ### JSONl1 33 | 34 | When `--format` is set to `jsonl1`, `bupstash list-contents` outputs one json object per line. 35 | 36 | Each line has the following json schema: 37 | 38 | ``` 39 | { 40 | "path": string | [ bytes... ], 41 | "mode": number, 42 | "size": number, 43 | "uid": number, 44 | "gid": number, 45 | "mtime": number, 46 | "mtime_nsec": number, 47 | "ctime": number, 48 | "ctime_nsec": number, 49 | "norm_dev": number, 50 | "ino": number, 51 | "nlink": number, 52 | "link_target": string | null, 53 | "dev_major": number | null, 54 | "dev_minor": number | null, 55 | "xattrs": {string : string | [bytes...] ...} | null, 56 | "sparse": boolean, 57 | "data_hash": "$KIND[:$HEXBYTE]" | null 58 | } 59 | ``` 60 | 61 | ## QUERY LANGUAGE 62 | 63 | For full documentation on the query language, see bupstash-query-language(7). 64 | 65 | ## QUERY CACHING 66 | 67 | The list-contents command uses the same query caching mechanisms as bupstash-list(1), check that page for 68 | more information on the query cache. 69 | 70 | ## OPTIONS 71 | 72 | * -r, --repository REPO: 73 | The repository to connect to, , may be of the form `ssh://$SERVER/$PATH` for 74 | remote repositories if ssh access is configured. If not specified, is set to `BUPSTASH_REPOSITORY`. 75 | 76 | * -k, --key KEY: 77 | Key used to decrypt data and metadata. If not set, defaults 78 | to `BUPSTASH_KEY`. 79 | 80 | * --format FORMAT: 81 | Set output format to one of the following 'human', 'jsonl'. 82 | 83 | * --query-cache PATH: 84 | Path to the query-cache file, defaults to one of the following, in order, provided 85 | the appropriate environment variables are set, `$BUPSTASH_QUERY_CACHE`, 86 | `$XDG_CACHE_HOME/.cache/bupstash/bupstash.qcache` or `$HOME/.cache/bupstash/bupstash.qcache`. 87 | 88 | * --pick PATH: 89 | List a sub-directory of the query. 90 | 91 | * --utc-timestamps: 92 | Display and search against timestamps in utc time instead of local time. 93 | 94 | * --no-progress: 95 | Suppress progress indicators (Progress indicators are also suppressed when stderr 96 | is not an interactive terminal). 97 | 98 | * -q, --quiet: 99 | Be quiet, implies --no-progress. 100 | 101 | ## ENVIRONMENT 102 | 103 | * BUPSTASH_REPOSITORY: 104 | The repository to connect to. May be of the form `ssh://$SERVER/$PATH` for 105 | remote repositories if ssh access is configured. 106 | 107 | * BUPSTASH_REPOSITORY_COMMAND: 108 | A command to run to connect to an instance of bupstash-serve(1). This 109 | allows more complex connections to the repository for less common use cases. 110 | 111 | * BUPSTASH_KEY: 112 | Path to a primary key that will be used for decrypting data and metadata. 113 | 114 | * BUPSTASH_KEY_COMMAND: 115 | A command to run that must print the key data, can be used instead of BUPSTASH_KEY 116 | to fetch the key from arbitrary locations such as the network or other secret storage. 117 | 118 | * BUPSTASH_QUERY_CACHE: 119 | Path to the query cache file to use. 120 | 121 | 122 | ## EXAMPLES 123 | 124 | ### Get an item with a specific id from the repository 125 | 126 | ``` 127 | $ bupstash list-contents id="14eb*" 128 | drwxr-xr-x 0 2020/10/30 13:32:04 . 129 | -rw-r--r-- 1967 2020/10/30 13:32:04 data.txt 130 | ``` 131 | 132 | ## SEE ALSO 133 | 134 | bupstash(1), bupstash-put(1), bupstash-diff(1), bupstash-keyfiles(7), bupstash-query-language(7) 135 | -------------------------------------------------------------------------------- /doc/man/bupstash-list.1.md: -------------------------------------------------------------------------------- 1 | bupstash-list(1) 2 | =============== 3 | 4 | ## SYNOPSIS 5 | 6 | List items in a bupstash repository. 7 | 8 | `bupstash list [OPTIONS] QUERY... ` 9 | 10 | ## DESCRIPTION 11 | 12 | `bupstash list` fetches, decrypts and prints metadata of items stored 13 | in the bupstash repository. It can be used for searching the database 14 | with the bupstash query language. 15 | 16 | Only the metadata needs to be decrypted to list items, so a metadata key is sufficient 17 | for item queries, even without access to the data decryption key. 18 | 19 | ## QUERY LANGUAGE 20 | 21 | For full documentation on the query language, see bupstash-query-language(7). 22 | 23 | ### List query examples: 24 | 25 | ``` 26 | $ bupstash list name='*.tar' 27 | ... 28 | $ bupstash list timestamp='2020*' 29 | ... 30 | ``` 31 | 32 | ## SPECIAL TAGS 33 | 34 | Bupstash automatically inserts special tags that can be viewed and queried against, they are outlined below. 35 | 36 | ### decryption-key-id 37 | 38 | This special tag is inserted when the `--query-encrypted` option is used, it allows searching against the 39 | key id that would be uesd for decrypting the given item. This tag is mostly useful for pruning 40 | backups for which you do not have the decryption key. 41 | 42 | ### size 43 | 44 | This tag is the size of the data stream and any index metadata associated with the snapshot. This 45 | means the size may not exactly match the size of the data stream retrieved by bupstash-get(1) for the case 46 | of snapshots. 47 | 48 | ### timestamp 49 | 50 | The time the item was created formatted as `YYYY/MM/DD HH:MM:SS`. 51 | 52 | ## QUERY CACHING 53 | 54 | Because all data is stored encrypted on the server, item metadata must first be synchronized to the local machine, 55 | and then decrypted on the client side to run a query. The file containing the synced and encrypted metadata 56 | is called the query cache. 57 | 58 | The path to the query-cache file, defaults to one of the following, in order, provided 59 | the appropriate environment variables are set, `$BUPSTASH_QUERY_CACHE`, 60 | `$XDG_CACHE_HOME/.cache/bupstash/bupstash.qcache` or `$HOME/.cache/bupstash/bupstash.qcache`. 61 | 62 | As a special case, a query that consists only of a fully specified id (e.g. `id=$FULL_ID`) will not require use 63 | of the query cache, instead the query can be passed directly to the server. This means 64 | it is always more efficient to fully specify an id when running any command that expects a query. 65 | 66 | 67 | ## OUTPUT FORMATS 68 | 69 | ### Human 70 | 71 | When `--format` is set to `human`, `bupstash list` outputs rows consisting of: 72 | 73 | ``` 74 | KEY=VALUE KEY=VALUE KEY=VALUE .... 75 | ``` 76 | 77 | Where each key and value corresponds to a tag that may be searched against. 78 | 79 | ### JSONl1 80 | 81 | When `--format` is set to `jsonl1`, `bupstash list` outputs one json object per line. 82 | 83 | Each line has the following json schema: 84 | 85 | ``` 86 | { 87 | "id": hexstring, 88 | "decryption_key_id": hexstring, 89 | "data_tree": { 90 | "address": hexstring, 91 | "height": number, 92 | "data_chunk_count": number 93 | }, 94 | "index_tree": { 95 | "address": hexstring, 96 | "height": number, 97 | "data_chunk_count": number 98 | }, 99 | "data_size": number, // If decryption key present. 100 | "index_size": number, // If decryption key present. 101 | "put_key_id": hexstring, // If decryption key present. 102 | "data_hash_key_part": hexstring, // If decryption key present. 103 | "index_hash_key_part": hexstring, // If decryption key present. 104 | "unix_timestamp_millis": number, 105 | "tags": { 106 | string : string, 107 | ... 108 | } 109 | ``` 110 | 111 | If --query-encrypted is specified, encrypted tags and metadata are omitted. 112 | 113 | ## OPTIONS 114 | 115 | * -r, --repository REPO: 116 | The repository to connect to may be of the form `ssh://$SERVER/$PATH` for 117 | remote repositories if ssh access is configured. If not specified, is set to `BUPSTASH_REPOSITORY`. 118 | 119 | * -k, --key KEY: 120 | Primary key used to decrypt data and metadata. If not set, defaults 121 | to `BUPSTASH_KEY`. 122 | 123 | * --query-encrypted: 124 | The query will not decrypt any metadata, allowing you to 125 | list items you do not have a decryption key for. 126 | This option inserts the pseudo query tag 'decryption-key-id'. 127 | 128 | * --query-cache PATH: 129 | Path to the query-cache file, defaults to one of the following, in order, provided 130 | the appropriate environment variables are set, `$BUPSTASH_QUERY_CACHE`, 131 | `$XDG_CACHE_HOME/.cache/bupstash/bupstash.qcache` or `$HOME/.cache/bupstash/bupstash.qcache`. 132 | 133 | * --format FORMAT: 134 | Set output format to one of the following 'human', 'jsonl'. 135 | 136 | * --utc-timestamps: 137 | Display and search against timestamps in utc time instead of local time. 138 | 139 | * --no-progress: 140 | Suppress progress indicators (Progress indicators are also suppressed when stderr 141 | is not an interactive terminal). 142 | 143 | * -q, --quiet: 144 | Be quiet, implies --no-progress. 145 | 146 | ## ENVIRONMENT 147 | 148 | * BUPSTASH_REPOSITORY: 149 | The repository to connect to. May be of the form `ssh://$SERVER/$PATH` for 150 | remote repositories if ssh access is configured. 151 | 152 | * BUPSTASH_REPOSITORY_COMMAND: 153 | A command to run to connect to an instance of bupstash-serve(1). This 154 | allows more complex connections to the repository for less common use cases. 155 | 156 | * BUPSTASH_KEY: 157 | Path to a primary key that will be used for decrypting data and metadata. 158 | 159 | * BUPSTASH_KEY_COMMAND: 160 | A command to run that must print the key data, can be used instead of BUPSTASH_KEY 161 | to fetch the key from arbitrary locations such as the network or other secret storage. 162 | 163 | * BUPSTASH_QUERY_CACHE: 164 | Path to the query cache file to use. 165 | 166 | 167 | ## EXAMPLES 168 | 169 | ### List items matching a query 170 | 171 | ``` 172 | $ bupstash list name=backup.tar and timestamp=2020/07/* 173 | id="aa87fdbc72241f363568bbb888c0834e" name="backup.tar" size="106.34MB" timestamp="2020-07-24 15:25:00" 174 | id="d271ec0b989cfc20e10d01380115747e" name="backup.tar" size="146.38MB" timestamp="2020-07-29 15:25:24" 175 | ... 176 | ``` 177 | 178 | ## SEE ALSO 179 | 180 | bupstash(1), bupstash-list-contents(1), bupstash-keyfiles(7), bupstash-query-language(7) 181 | -------------------------------------------------------------------------------- /doc/man/bupstash-new-key.1.md: -------------------------------------------------------------------------------- 1 | bupstash-new-key(1) 2 | =================== 3 | 4 | ## SYNOPSIS 5 | 6 | Generate a new bupstash key. 7 | 8 | `bupstash new-key -o KEY` 9 | 10 | ## DESCRIPTION 11 | 12 | `bupstash new-key` creates a new bupstash key capable of both 13 | encrypting and decrypting repository entries. 14 | 15 | The generated key will be have permissions that make it readable by 16 | only the creating user. 17 | 18 | Remember to keep your keys safe, as losing a key is the same as losing all 19 | data stored using that key. 20 | 21 | ## OPTIONS 22 | 23 | * -o, --output PATH: 24 | Path to where the new key will be written. 25 | 26 | ## EXAMPLES 27 | 28 | ### Create a new key 29 | ``` 30 | $ bupstash new-key -o ./backups.key 31 | ``` 32 | 33 | ## SEE ALSO 34 | 35 | bupstash(1), bupstash-keyfiles(7) 36 | -------------------------------------------------------------------------------- /doc/man/bupstash-new-sub-key.1.md: -------------------------------------------------------------------------------- 1 | bupstash-new-sub-key(1) 2 | ======================= 3 | 4 | ## SYNOPSIS 5 | 6 | Generate a bupstash sub key with lesser encryption and decryption capabilities. 7 | 8 | `bupstash new-sub-key -k KEY -o SUB_KEY` 9 | 10 | ## DESCRIPTION 11 | 12 | `bupstash new-sub-key` creates a new bupstash key capable of 13 | a subset of the encryption and decryption operations of the main key. 14 | 15 | Capabilities are any of 'put', 'list' and 'list-contents'. 'put' keys can 16 | create new backups but not decrypt data, 'list' keys can decrypt tags and other metadata, 17 | while 'list-contents' keys can decrypt the contents of items created by 'bupstash put'. 18 | 19 | A typical use of a list only key would be to allow a cron job to rotate old backups by 20 | their search tags, without exposing the data decryption key. 21 | 22 | The generated key will be marked readable only for the creating user. 23 | 24 | If a sub-key is lost, the original key will still be able to decrypt any data in the repository 25 | encrypted by that sub-key. 26 | 27 | *NOTE*: decryption differs from access - An attacker may still delete data by simply deleting the 28 | items or files they have access to. Use bupstash-serve(1) access controls to restrict which 29 | operations a user can perform and prevent unauthorized deletion of data. This can be done via an 30 | ssh authorized_keys file, or through mechanisms such as `sudo` or `doas` configuration. 31 | 32 | ## OPTIONS 33 | 34 | * -k, --key PATH: 35 | Key to derive the new sub-key from. 36 | * -o, --output PATH: 37 | Path to where the sub-key will be written. 38 | * --put: 39 | The key is able to encrypt data for 'put' operations. 40 | * --list: 41 | The key will be able to decrypt metadata and perform queries. 42 | * --list-contents: 43 | The key will be able to list item contents with 'list-contents' (implies --list). 44 | 45 | ## EXAMPLES 46 | 47 | ### Create a new put only key 48 | 49 | ``` 50 | $ bupstash new-sub-key --put -k backups.key -o ./put.key 51 | $ bupstash put -k ./backups-put.key ./data 52 | ``` 53 | 54 | ### Create a new listing key 55 | 56 | ``` 57 | $ bupstash new-sub-key -k ./backups.key -o ./list.key --list 58 | $ bupstash list -k ./list.key 59 | ``` 60 | 61 | ### Create a new content listing key 62 | 63 | ``` 64 | $ bupstash new-sub-key -k ./backups.key -o ./list-contents.key --list-contents 65 | $ bupstash list-contents -k ./list-contents.key name=some-backup.tar 66 | ``` 67 | 68 | ## SEE ALSO 69 | 70 | bupstash(1), bupstash-keyfiles(7) 71 | -------------------------------------------------------------------------------- /doc/man/bupstash-query-language.7.md: -------------------------------------------------------------------------------- 1 | bupstash-query-language(7) 2 | ========================== 3 | 4 | ## SYNOPSIS 5 | 6 | The bupstash query language used by bupstash commands. 7 | 8 | ## DESCRIPTION 9 | 10 | The bupstash query language is used to filter and select items from a bupstash repository. Check 11 | the examples section for practical uses, or read the language section for a more precise description. 12 | 13 | ## EXAMPLES 14 | 15 | Glob matching: 16 | ``` 17 | $ bupstash list name="*.tar" 18 | ... name=foo.tar 19 | ... name=bar.tar 20 | ``` 21 | 22 | Literal matching: 23 | ``` 24 | $ bupstash list name=="*.tar" 25 | ... 26 | ``` 27 | 28 | Age based matching: 29 | 30 | ``` 31 | $ bupstash list newer-than "1 month" 32 | $ bupstash list older-than 2d 33 | $ bupstash list older-than 1y 34 | ... 35 | ``` 36 | 37 | And condition matching: 38 | ``` 39 | $ bupstash list type=backup and hostname=server1 hostname=server2 40 | ... 41 | ``` 42 | 43 | Or condition matching: 44 | ``` 45 | $ bupstash list hostname=server1 or hostname=server2 46 | ... 47 | ``` 48 | 49 | Precedence grouping: 50 | ``` 51 | $ bupstash list [hostname=server1 or hostname=server2] and date=2020-* 52 | ... 53 | ``` 54 | 55 | Quote using your shell's builtin quoting: 56 | 57 | ``` 58 | $ bupstash rm name="my files.tar" 59 | ``` 60 | 61 | ## LANGUAGE 62 | 63 | ### Delimiters 64 | 65 | As queries may span multiple command line arguments, the gap between arguments is treated as a special 66 | delimiting character for the sake of query parsing. 67 | 68 | ### Tags and values 69 | 70 | A tag name is a string containg a set of characters matching the regular 71 | expression ```[A-Za-z0-9-_]+```. 72 | 73 | A values is a set of any characters except a delimiter. 74 | 75 | ### Durations 76 | 77 | A duration is a concatenation of time spans, where each time span is an integer number and a suffix. 78 | 79 | Supported suffixes: 80 | 81 | - seconds, second, sec, s 82 | - minutes, minute, min, m 83 | - hours, hour, hr, h 84 | - days, day, d 85 | - weeks, week, w 86 | - months, month, M -- defined as 30.44 days 87 | - years, year, y -- defined as 365.25 days 88 | 89 | ### Globbing 90 | 91 | Some operators accept a glob to match against, the following describes the valid globbing meta characters. 92 | 93 | ``` 94 | ? matches any single character. (If the literal_separator option is enabled, then ? can never match a path separator.) 95 | * matches zero or more characters. (If the literal_separator option is enabled, then * can never match a path separator.) 96 | ** recursively matches directories but are only legal in three situations. First, if the glob starts with **/, then it matches all directories. For example, **/foo matches foo and bar/foo but not foo/bar. Secondly, if the glob ends with /**, then it matches all sub-entries. For example, foo/** matches foo/a and foo/a/b, but not foo. Thirdly, if the glob contains /**/ anywhere within the pattern, then it matches zero or more directories. Using ** anywhere else is illegal (N.B. the glob ** is allowed and means “match everything”). 97 | {a,b} matches a or b where a and b are arbitrary glob patterns. (N.B. Nesting {...} is not currently allowed.) 98 | [ab] matches a or b where a and b are characters. Use [!ab] to match any character except for a and b. 99 | Metacharacters such as * and ? can be escaped with character class notation. e.g., [*] matches *. 100 | When backslash escapes are enabled, a backslash (\) will escape all meta characters in a glob. If it precedes a non-meta character, then the slash is ignored. A \\ will match a literal \\. Note that this mode is only enabled on Unix platforms by default, but can be enabled on any platform via the backslash_escape setting on Glob. 101 | ``` 102 | 103 | (Documentation taken from the underlying [glob software library](https://docs.rs/globset/0.4.8/globset/index.html#)). 104 | 105 | ### Binary operators 106 | 107 | Check a tag matches a glob: 108 | 109 | ``` 110 | TAGNAME = GLOB 111 | ``` 112 | 113 | Check a tag matches a literal value. 114 | 115 | ``` 116 | TAGNAME == VALUE 117 | ``` 118 | 119 | Match if either expression matches. 120 | 121 | ``` 122 | EXPR or EXPR 123 | ``` 124 | 125 | Match if both expressions match. 126 | 127 | ``` 128 | EXPR and EXPR 129 | ``` 130 | 131 | ### Age matching 132 | 133 | ``` 134 | older-than DURATION 135 | newer-than DURATION 136 | ``` 137 | 138 | Take care that system clocks are configured correctly on both the querying machine, and devices sending backups, as incorrect 139 | system clocks could cause accidental removal of items. 140 | 141 | ### Unary operators 142 | 143 | Invert an expression. 144 | 145 | ``` 146 | ~ EXPR 147 | ``` 148 | 149 | 150 | ### grouping 151 | 152 | Use brackets to alter the default precedence. 153 | 154 | ``` 155 | [ EXPR ] 156 | ``` 157 | 158 | Note, This differs from the typical tradition of using `()` for grouping so queries are 159 | easier to write in shell scripts where `()` already has a designated meaning. 160 | 161 | ## SEE ALSO 162 | 163 | bupstash(1), bupstash-put(1), bupstash-list(1), bupstash-rm(1) 164 | -------------------------------------------------------------------------------- /doc/man/bupstash-recover-removed.1.md: -------------------------------------------------------------------------------- 1 | bupstash-recover-removed(1) 2 | ============== 3 | 4 | ## SYNOPSIS 5 | 6 | Recover repository items that were removed, but that have not 7 | yet been deleted via garbage collection. 8 | 9 | `bupstash recover-removed [OPTIONS]` 10 | 11 | ## DESCRIPTION 12 | 13 | `bupstash recover-removed` allows a user to undo all 'rm' operations that 14 | have taken place since the last invocation of bupstash-gc(1). 15 | In other words, this command provides a way to correct errors and accidental 16 | invocations of bupstash-rm(1). 17 | 18 | `bupstash recover-removed` requires 'put' and 'get' permissions for the repository being operated on. 19 | 20 | ## OPTIONS 21 | 22 | * -r, --repository REPO: 23 | The repository to connect to and operate on. 24 | May be of the form `ssh://$SERVER/$PATH` for 25 | remote repositories if ssh access is configured. 26 | If not specified, is set to `BUPSTASH_REPOSITORY`. 27 | 28 | * --no-progress: 29 | Suppress progress indicators (Progress indicators are also suppressed when stderr 30 | is not an interactive terminal). 31 | 32 | * -q, --quiet: 33 | Be quiet, implies --no-progress. 34 | 35 | ## ENVIRONMENT 36 | 37 | * BUPSTASH_REPOSITORY: 38 | The repository to connect to. May be of the form `ssh://$SERVER/$PATH` for 39 | remote repositories if ssh access is configured. 40 | 41 | * BUPSTASH_REPOSITORY_COMMAND: 42 | A command to run to connect to an instance of bupstash-serve(1). This 43 | allows more complex connections to the repository for less common use cases. 44 | 45 | ## SEE ALSO 46 | 47 | bupstash(1), bupstash-rm(1), bupstash-gc(1) 48 | -------------------------------------------------------------------------------- /doc/man/bupstash-repository.7.md: -------------------------------------------------------------------------------- 1 | bupstash-repository(7) 2 | ====================== 3 | 4 | ## SYNOPSIS 5 | 6 | Overview of the bupstash repository format. 7 | 8 | ## DESCRIPTION 9 | 10 | The most important part of bupstash is the repository. It is where all data is stored in a mostly 11 | encrypted form. The bupstash client interacts via the repository over stdin/stdout of the bupstash-serve(1) 12 | process. This may be locally, or via a protocol such as ssh. 13 | 14 | Because most data is encrypted, the repository structure is quite simple. 15 | 16 | Files: 17 | 18 | ``` 19 | repo 20 | ├── data 21 | │ ├── ... 22 | │ └── 5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03 23 | ├── items 24 | │ ├── 031d91b342fc76b8a4b32e2a8d12e4d0 25 | │ └── ffaa0127fd9938aa0a3eaf6070aa947d 26 | ├── meta 27 | │ ├── gc_generation 28 | │ ├── gc_dirty 29 | │ ├── schema_version 30 | │ └── storage_engine 31 | ├── wal 32 | │ ├── ... 33 | │ └── 00000000N.wal 34 | ├── repo.oplog 35 | ├── repo.lock 36 | ├── tx.lock 37 | ├── tx.seq 38 | └── tx.wal 39 | 40 | ``` 41 | 42 | ### repo.oplog 43 | 44 | This file is an append only ledger where each entry is a [bare](https://baremessages.org/) encoded log op of the following format: 45 | 46 | 47 | ``` 48 | 49 | type Xid data<16>; 50 | type Address data<32>; 51 | 52 | type LogOp (AddItem | RemoveItems | RecoverRemoved); 53 | 54 | type AddItem { 55 | id: Xid 56 | metadata: VersionedItemMetadata 57 | } 58 | 59 | type RemoveItems { 60 | items: []Xid 61 | } 62 | 63 | type RecoverRemoved {} 64 | 65 | type VersionedItemMetadata = (V1VersionedItemMetadata | V2VersionedItemMetadata | V2VersionedItemMetadata) 66 | 67 | type V1VersionedItemMetadata { 68 | // deprecated 69 | } 70 | 71 | type V2VersionedItemMetadata { 72 | // deprecated 73 | } 74 | 75 | type V3VersionedItemMetadata { 76 | primary_key_id: Xid, 77 | unix_timestamp_millis: u64, 78 | tree_height: usize, 79 | address: Address, 80 | encryped_metadata: data 81 | } 82 | 83 | struct V3SecretItemMetadata { 84 | plain_text_hash: data<32> 85 | send_key_id: Xid, 86 | hash_key_part_2: data<32>, 87 | tags: Map[String]String, 88 | } 89 | 90 | ``` 91 | 92 | It is important to note, all metadata like search tags are stored encrypted and are not 93 | readable without a master key or metadata key. 94 | 95 | ### repo.lock 96 | 97 | This lock is held exclusively during garbage collection and in a shared fashion 98 | during operations that modify the repository. 99 | 100 | ### tx.lock 101 | 102 | Bupstash uses `tx.lock` and `tx.wal` to coordinate crash safe edits across multiple files. 103 | 104 | ### tx.wal 105 | 106 | This file is a [bare](https://baremessages.org/) encoded WAL (write ahead log) with the following schema: 107 | 108 | ``` 109 | 110 | type WalOp = Begin | End | CreateFile | WriteFileAt | Remove | Rename | Mkdir; 111 | 112 | type Begin { 113 | sequence_number: u64, 114 | }; 115 | 116 | type End {}; 117 | 118 | type CreateFile { 119 | path: String, 120 | data_size: Uint, 121 | }; 122 | 123 | type WriteFileAt { 124 | path: String, 125 | offset: Uint, 126 | data_size: Uint, 127 | }; 128 | 129 | type Remove { 130 | path: String, 131 | }; 132 | 133 | type Rename { 134 | path: String, 135 | to: String, 136 | }; 137 | 138 | type Mkdir { 139 | path: String, 140 | }; 141 | 142 | ``` 143 | 144 | The final 32 bytes of the write ahead log are the blake3 hash of the previous file contents. 145 | 146 | When bupstash needs to modify repository metadata, it first writes a wal file and flushes it to disk, then performs the given operations in sequence. On crash the operations 147 | will be replayed again preventing data loss. 148 | 149 | Do not delete a write ahead log if you see one, it is critical for data integrity. 150 | 151 | ### tx.seq 152 | 153 | A file containing a sequence number used for numbering WAL files. 154 | 155 | ### data/ 156 | 157 | This directory contains a set of encrypted and deduplicated data chunks. 158 | The name of the file corresponds to the an HMAC hash of the unencrypted contents, as such 159 | if two chunks are added to the repository with the same hmac, they only need to be stored once. 160 | 161 | This directory is not used when the repository is configured for storage engines other than "Dir" storage. 162 | 163 | ### items/ 164 | 165 | This directory contains one file for each item, where the contents of the file is an encoded 166 | `VersionedItemMetadata` as described in the repo.oplog section. When an item is removed and is 167 | pending garbage collection it is given the .removed suffix. 168 | 169 | ### meta/storage_engine 170 | 171 | Contains the JSON storage engine specification, which allows storage of data chunks 172 | in external or alternative storage formats. This file is human editable to assist 173 | manual data migrations between supported formats. 174 | 175 | ### meta/schema_version 176 | 177 | This file contains schema version of a repository. 178 | 179 | ### meta/gc_generation 180 | 181 | Each time a garbage collection happens, this file is changed and is used to invalidate 182 | client side caches. 183 | 184 | ### meta/gc_dirty 185 | 186 | This file marks if a garbage collection was interrupted prematurely and is used for crash 187 | recovery. This file not always present. 188 | 189 | ### wal/ 190 | 191 | When the BUPSTASH_KEEP_WAL=1 env var is set for the `bupstash serve` process, this 192 | directory contains the historic WAL files that can be used for point in time recovery. 193 | 194 | ## The hash tree structure 195 | 196 | Bupstash stores arbitrary streams of data in the repository by splitting the stream into chunks, 197 | hmac addressing the chunks, then compressing and encrypting the chunks with the public key portion of a bupstash key. 198 | Each chunk is then stored in the data directory in a file named after the hmac hash of the contents. 199 | As we generate a sequence of chunks with a corresponding hmac addresses, 200 | we can build a tree structure out of these addresses. Leaf nodes of the tree are simply the encrypted data. 201 | Other nodes in the tree are simply unencrypted lists of hmac hashes, which may point to encrypted leaf nodes, 202 | or other subtrees. The key idea behind the hash tree, is we can convert an arbitrary stream of data 203 | into a single HMAC address with approximately equal sized chunks. 204 | When multiple hash trees are added to the repository, they share structure and enable deduplication. 205 | 206 | This addressing and encryption scheme has some important properties: 207 | 208 | - The repository owner *cannot* guess chunk contents as the HMAC key is unknown to him. 209 | - The repository owner *cannot* decrypt leaves of the hash tree, as they are encrypted. 210 | - The repository owner *can* iterate the hash tree for garbage collection purposes. 211 | - The repository owner *can* run garbage collection without retrieving the leaf nodes from cold storage. 212 | - The repository owner *can* push stream a of hash tree nodes to a client with no network round trips. 213 | - A client *can* send data streams to a repository without sharing the encryption key. 214 | - A client *can* retrieve and verify a datastream by checking hmacs. 215 | 216 | These properties are desirable for enabling high performance garbage collection and data streaming 217 | with prefetch on the repository side. 218 | 219 | ## Chunking and deduplication 220 | 221 | Data is deduplicated by splitting a data stream into small chunks, and never storing the same chunk twice. 222 | The performance of this deduplication is thus determined by how chunks split points are defined. For curious 223 | readers - bupstash uses something known as 'content defined chunking' to find efficient chunk splits. 224 | 225 | ## Chunk formats 226 | 227 | Chunks in the database are one of the following types, in general we know the type of a chunk 228 | based on the item metadata and the hash tree height. 229 | 230 | ### Encrypted data chunk 231 | 232 | These chunks form the roots of our hash trees, they contain encrypted data. They contain 233 | a key exchange packet, with enough information for the master key to derive the session key. 234 | 235 | ``` 236 | KEY_EXCHANGE_PACKET1_BYTES[PACKET1_SZ] || ENCRYPTED_BYTES[...] 237 | ``` 238 | 239 | After decryption, the chunk is optionally compressed, so is either compressed data, or data with a null footer byte. 240 | 241 | ``` 242 | COMPRESSED_DATA[...] || DECOMPRESSED_SIZE[4] || COMPRESSION_TYPE[1] 243 | ``` 244 | 245 | or 246 | 247 | ``` 248 | DATA[...] || 0x00 249 | ``` 250 | 251 | Valid compression types are: 252 | 253 | - 1 == lz4 compression. 254 | - 2 == zstd compression. 255 | 256 | ### Hash tree node chunk 257 | 258 | These chunks form non leaf nodes in our hash tree, they consist of an array of addresses prefixed 259 | with the total number of data chunks that are beneath them in the tree. 260 | 261 | ``` 262 | NUM_DATA_CHUNKS[8] ADDRESS[ADDRESS_SZ] 263 | NUM_DATA_CHUNKS[8] ADDRESS[ADDRESS_SZ] 264 | NUM_DATA_CHUNKS[8] ADDRESS[ADDRESS_SZ] 265 | NUM_DATA_CHUNKS[8] ADDRESS[ADDRESS_SZ] 266 | ... 267 | ``` 268 | 269 | These addresses must be recursively followed to read our data chunks, these addresses correspond 270 | to data chunks when the tree height is 0. The chunk counts can be used to efficiently seek to address offsets 271 | in the tree. 272 | 273 | ### Format of key exchange bytes 274 | 275 | Coming soon... 276 | 277 | ## SEE ALSO 278 | 279 | bupstash(1) 280 | -------------------------------------------------------------------------------- /doc/man/bupstash-restore.1.md: -------------------------------------------------------------------------------- 1 | bupstash-restore(1) 2 | ================ 3 | 4 | ## SYNOPSIS 5 | 6 | Efficiently restore the contents of a snapshot into a local directory. 7 | 8 | `bupstash restore [OPTIONS] --into $PATH QUERY... ` 9 | 10 | ## DESCRIPTION 11 | 12 | `bupstash restore` performs an efficient set of incremental changes to 13 | a directory such that it becomes identical to the requested snapshot. 14 | The incremental nature of `bupstash restore` makes it well suited for 15 | cycling between multiple similar snapshots. Note that this operation is dangerous 16 | as it deletes extra files already present in the destination directory. 17 | 18 | In order to aid file browsing as unprivileged users, `bupstash restore` does 19 | not attempt to restore users, groups and xattrs by default. To set 20 | these you must specify the flags --ownership and --xattrs respectively. 21 | 22 | The item that is checked out is chosen based on a simple query against the 23 | tags specified when saving data with `bupstash put`. 24 | 25 | ## QUERY LANGUAGE 26 | 27 | For full documentation on the query language, see bupstash-query-language(7). 28 | 29 | ## QUERY CACHING 30 | 31 | The restore command uses the same query caching mechanisms as bupstash-list(1), check that page for 32 | more information on the query cache. 33 | 34 | ## SPARSE FILES 35 | 36 | If a file was detected as sparse during 'put', the restore command will restore it as 37 | sparse, but it should be noted the restore command will not delete an existing file 38 | with the correct checksum in order to recreate it as sparse. 39 | 40 | ## OPTIONS 41 | 42 | * --into PATH: 43 | Directory to restore files into, defaults to `BUPSTASH_RESTORE_DIR`. 44 | 45 | * -r, --repository REPO: 46 | The repository to connect to, , may be of the form `ssh://$SERVER/$PATH` for 47 | remote repositories if ssh access is configured. If not specified, is set to `BUPSTASH_REPOSITORY`. 48 | 49 | * -k, --key KEY: 50 | Key that will be used to decrypt data and metadata. If not set, defaults 51 | to `BUPSTASH_KEY`. 52 | 53 | * --pick PATH: 54 | Pick a sub-directory of the snapshot to restore. 55 | 56 | * --ownership: 57 | Set uid's and gid's. 58 | 59 | * --xattrs: 60 | Set xattrs. 61 | 62 | * --indexer-threads N: 63 | Number of processor threads to use for pipelined parallel file hashing and metadata reads. 64 | Defaults to the number of processors. 65 | 66 | * --query-cache PATH: 67 | Path to the query-cache file, defaults to one of the following, in order, provided 68 | the appropriate environment variables are set, `$BUPSTASH_QUERY_CACHE`, 69 | `$XDG_CACHE_HOME/.cache/bupstash/bupstash.qcache` or `$HOME/.cache/bupstash/bupstash.qcache`. 70 | 71 | * --utc-timestamps: 72 | Display and search against timestamps in utc time instead of local time. 73 | 74 | * --no-progress: 75 | Suppress progress indicators (Progress indicators are also suppressed when stderr 76 | is not an interactive terminal). 77 | 78 | * -q, --quiet: 79 | Be quiet, implies --no-progress. 80 | 81 | ## ENVIRONMENT 82 | 83 | * BUPSTASH_REPOSITORY: 84 | The repository to connect to. May be of the form `ssh://$SERVER/$PATH` for 85 | remote repositories if ssh access is configured. 86 | 87 | * BUPSTASH_REPOSITORY_COMMAND: 88 | A command to run to connect to an instance of bupstash-serve(1). This 89 | allows more complex connections to the repository for less common use cases. 90 | 91 | * BUPSTASH_KEY: 92 | Path to the key that will be used for decrypting data and metadata. 93 | 94 | * BUPSTASH_KEY_COMMAND: 95 | A command to run that must print the key data, can be used instead of BUPSTASH_KEY 96 | to fetch the key from arbitrary locations such as the network or other secret storage. 97 | 98 | * BUPSTASH_QUERY_CACHE: 99 | Path to the query cache file to use. 100 | 101 | * BUPSTASH_RESTORE_DIR: 102 | Path to restore into, can be used instead of the --into argument. 103 | 104 | ## EXAMPLES 105 | 106 | ### Restore a snapshot into a local directory 107 | 108 | ``` 109 | $ bupstash restore --into ./dir id=ad8* 110 | ``` 111 | 112 | ### Restore including users and groups 113 | 114 | ``` 115 | $ bupstash restore --ownership --into ./dir id=ad8* 116 | ``` 117 | 118 | ### Restore a sub directory of the snapshot 119 | 120 | ``` 121 | $ bupstash restore --into ./dir --pick sub/dir id=ad8* 122 | ``` 123 | 124 | ## SEE ALSO 125 | 126 | bupstash(1), bupstash-get(1), bupstash-list(1), bupstash-keyfiles(7), bupstash-query-language(7) 127 | -------------------------------------------------------------------------------- /doc/man/bupstash-rm.1.md: -------------------------------------------------------------------------------- 1 | bupstash-rm(1) 2 | ============== 3 | 4 | ## SYNOPSIS 5 | 6 | Remove items from a bupstash repository. 7 | 8 | `bupstash rm [OPTIONS] QUERY... ` 9 | 10 | ## DESCRIPTION 11 | 12 | `bupstash rm` removes items from a bupstash repository. 13 | 14 | Items that are removed are not immediately deleted, instead the deletion and 15 | space reclamation is scheduled for the next time the garbage collector bupstash-gc(1) 16 | is run. 17 | 18 | Only the metadata needs to be decrypted to remove items, so a metadata key is sufficient 19 | for item deletion, even without access to the data decryption key. 20 | 21 | ## QUERY LANGUAGE 22 | 23 | For full documentation on the query language, see bupstash-query-language(7). 24 | 25 | ### Remove query examples 26 | ``` 27 | $ id=$(bupstash put ./some-data) 28 | 29 | $ bupstash rm id=$id 30 | 31 | $ bupstash rm name=backups.tar 32 | 33 | $ bupstash rm --allow-many name='*.tar' and older-than 30d 34 | 35 | $ bupstash rm --allow-many id="*" 36 | ``` 37 | 38 | ## QUERY CACHING 39 | 40 | The rm command uses the same query caching mechanisms as bupstash-list(1), check that page for 41 | more information on the query cache. 42 | 43 | ## OPTIONS 44 | 45 | * -r, --repository REPO: 46 | The repository to connect to. May be of the form `ssh://$SERVER/$PATH` for 47 | remote repositories if ssh access is configured. If not specified, is set to `BUPSTASH_REPOSITORY`. 48 | 49 | * -k, --key KEY: 50 | Key used to decrypt metadata when executing a query. If not set, defaults 51 | to `BUPSTASH_KEY`. 52 | 53 | * --query-cache PATH: 54 | Path to the query-cache file, defaults to one of the following, in order, provided 55 | the appropriate environment variables are set, `$BUPSTASH_QUERY_CACHE`, 56 | `$XDG_CACHE_HOME/.cache/bupstash/bupstash.qcache` or `$HOME/.cache/bupstash/bupstash.qcache`. 57 | 58 | * --query-encrypted: 59 | The query will not decrypt any metadata, allowing you to 60 | list items you do not have a decryption key for. 61 | This option inserts the pseudo query tag 'decryption-key-id'. 62 | 63 | * --ids-from-stdin: 64 | Remove items with IDs read from stdin, one per line, instead of executing a query. 65 | 66 | * --allow-many: 67 | By default bupstash refuses to remove multiple items from a single query, this flag 68 | disables that safety feature. 69 | 70 | * --utc-timestamps: 71 | Display and search against timestamps in utc time instead of local time. 72 | 73 | * --no-progress: 74 | Suppress progress indicators (Progress indicators are also suppressed when stderr 75 | is not an interactive terminal). 76 | 77 | * -q, --quiet: 78 | Be quiet, implies --no-progress. 79 | 80 | ## ENVIRONMENT 81 | 82 | * BUPSTASH_REPOSITORY: 83 | The repository to connect to. May be of the form `ssh://$SERVER/$PATH` for 84 | remote repositories if ssh access is configured. 85 | 86 | * BUPSTASH_REPOSITORY_COMMAND: 87 | A command to run to connect to an instance of bupstash-serve(1). This 88 | allows more complex connections to the repository for less common use cases. 89 | 90 | * BUPSTASH_KEY: 91 | Path to a primary key that will be used for decrypting data and metadata. 92 | 93 | * BUPSTASH_KEY_COMMAND: 94 | A command to run that must print the key data, can be used instead of BUPSTASH_KEY 95 | to fetch the key from arbitrary locations such as the network or other secret storage. 96 | 97 | * BUPSTASH_QUERY_CACHE: 98 | Path to the query cache file to use. 99 | 100 | ## EXAMPLES 101 | 102 | ### remove an item with a specific id from the repository 103 | 104 | ``` 105 | $ bupstash rm id=14ebd2073b258b1f55c5bbc889c49db4 106 | ``` 107 | 108 | ### remove all items from the respository 109 | 110 | ``` 111 | $ bupstash rm id="*" 112 | ``` 113 | 114 | ### remove items with name backup.tar that are older than 30 days 115 | 116 | ``` 117 | $ bupstash rm name=backup.tar and older-than 30d 118 | ``` 119 | 120 | ### remove items with a custom script 121 | 122 | ``` 123 | $ bupstash list --format=jsonl1 \ 124 | | custom-json-filter \ 125 | | bupstash rm --ids-from-stdin 126 | ``` 127 | 128 | ## SEE ALSO 129 | 130 | bupstash(1), bupstash-list(1), bupstash-gc(1), bupstash-query-language(7) 131 | -------------------------------------------------------------------------------- /doc/man/bupstash-serve.1.md: -------------------------------------------------------------------------------- 1 | bupstash-serve(1) 2 | ================ 3 | 4 | ## SYNOPSIS 5 | 6 | Serve the bupstash protocol over stdin/stdout. 7 | 8 | `bupstash serve [OPTIONS] REPOSITORY` 9 | 10 | ## DESCRIPTION 11 | 12 | `bupstash serve` serves the bupstash protocol over stdin/stdout allowing 13 | interaction with a repository. Most bupstash commands operate via an instance of bupstash serve. 14 | 15 | The serve command has flags that can be set to restrict access permissions, by default 16 | all access is permitted until the first --allow-* option is provided. These flags can be enforced 17 | ssh authorized_keys file, or through mechanisms such as `sudo` or `doas` configuration. 18 | 19 | Clients with permission to read data may still not be able to decrypt it unless they 20 | have the correct client side decryption key. 21 | 22 | Note that many errors are printed out of band via stderr, so alternative transports should consider 23 | how to also forward stderr data. 24 | 25 | ## OPTIONS 26 | 27 | * --allow-init: 28 | Allow the client to initialize new repositories. 29 | * --allow-put: 30 | Allow client to put more items into the repository. 31 | * --allow-list: 32 | Allow client to retrieve metadata and snapshot indexes for search and listing. 33 | * --allow-get: 34 | Allow client to retrieve data from the repository, implies --allow-list. 35 | * --allow-remove: 36 | Allow client to remove repository items, implies --allow-list. 37 | * --allow-gc: 38 | Allow client to run the repository garbage collector. 39 | * --allow-sync: 40 | Allow client to sync items into the repository, i.e. be the destination of a repository sync. 41 | 42 | ## EXAMPLES 43 | 44 | 45 | ### Custom ssh flags using BUPSTASH_REPOSITORY_COMMAND 46 | 47 | ``` 48 | $ export BUPSTASH_REPOSITORY_COMMAND="ssh -p 2020 $SERVER bupstash serve /data/repository" 49 | $ bupstash list 50 | ``` 51 | 52 | ### Setup SSH access controls 53 | 54 | Create a 'backups' user on your server. 55 | 56 | In an your sshd config file in your server add the line: 57 | 58 | ``` 59 | Match User backups 60 | ForceCommand "/bin/bupstash-put-force-command.sh" 61 | ``` 62 | 63 | Create /bin/bupstash-put-force-command.sh on your server: 64 | 65 | ``` 66 | $ echo 'exec bupstash serve --allow-put /home/backups/bupstash-backups' > bupstash-put-force-command.sh 67 | $ sudo cp bupstash-put-force-command.sh /bin/bupstash-put-force-command.sh 68 | $ sudo chown root:root /bin/bupstash-put-force-command.sh 69 | $ sudo chmod +x /bin/bupstash-put-force-command.sh 70 | ``` 71 | 72 | Now any client with ssh access to the 'backups' user will only be able to add new backups to one repository: 73 | 74 | ``` 75 | $ export BUPSTASH_REPOSITORY="ssh://backups@$SERVER" 76 | $ bupstash put ./data 77 | d1659c3f56f744c7767fc57da003ee5d 78 | $ bupstash list 79 | server has disabled query and search for this client 80 | ``` 81 | 82 | Logging into the server via other means will have full access to the backups repository. Different 83 | permissions can be configured using similar concepts along side different ssh configurations and keys. 84 | 85 | ## SEE ALSO 86 | 87 | bupstash(1), bupstash-repository(7) 88 | -------------------------------------------------------------------------------- /doc/man/bupstash-sync.1.md: -------------------------------------------------------------------------------- 1 | bupstash-sync(1) 2 | ================ 3 | 4 | ## SYNOPSIS 5 | 6 | Sync items and data from one bupstash repository to another. 7 | 8 | `bupstash sync [OPTIONS] --to $REMOTE [QUERY...]` 9 | 10 | ## DESCRIPTION 11 | 12 | `bupstash sync` copies items and data from one repository to another while 13 | attempting to minimize unnecessary bandwidth usage. 14 | 15 | A typical use of this command is to backup files to a local repository (e.g. and external drive) while also efficiently 16 | uploading them to an offsite location for safe storage. 17 | 18 | Note that when no query is specified all items are synced, even those that do not match the current bupstash key. 19 | 20 | 21 | ## QUERY LANGUAGE 22 | 23 | For full documentation on the query language, see bupstash-query-language(7). 24 | 25 | ## QUERY CACHING 26 | 27 | The sync command uses the same query caching mechanisms as bupstash-list(1), check that page for 28 | more information on the query cache. 29 | 30 | ## OPTIONS 31 | 32 | * -r, --repository REPO: 33 | The repository to sync from. May be of the form `ssh://$SERVER/$PATH` for 34 | remote repositories if ssh access is configured. If not specified, is set to `BUPSTASH_REPOSITORY`. 35 | 36 | * --to REPO: 37 | The destination repository to sync items to. May be of the form `ssh://$SERVER/$PATH` for 38 | remote repositories if ssh access is configured. If not specified, is set to `BUPSTASH_TO_REPOSITORY`. 39 | 40 | * -k, --key KEY: 41 | Key used to decrypt metadata when executing a query. If not set, defaults 42 | to `BUPSTASH_KEY`. 43 | 44 | * --query-cache PATH: 45 | Path to the query-cache file, defaults to one of the following, in order, provided 46 | the appropriate environment variables are set, `$BUPSTASH_QUERY_CACHE`, 47 | `$XDG_CACHE_HOME/.cache/bupstash/bupstash.qcache` or `$HOME/.cache/bupstash/bupstash.qcache`. 48 | 49 | * --query-encrypted: 50 | The query will not decrypt any metadata, allowing you to 51 | list items you do not have a decryption key for. 52 | This option inserts the pseudo query tag 'decryption-key-id'. 53 | 54 | * --ids-from-stdin: 55 | Sync items with IDs read from stdin, one per line, instead of executing a query. 56 | 57 | * --utc-timestamps: 58 | Display and search against timestamps in utc time instead of local time. 59 | 60 | * --no-progress: 61 | Suppress progress indicators (Progress indicators are also suppressed when stderr 62 | is not an interactive terminal). 63 | 64 | * -q, --quiet: 65 | Be quiet, implies --no-progress. 66 | 67 | ## ENVIRONMENT 68 | 69 | * BUPSTASH_REPOSITORY: 70 | The repository to pull items from. May be of the form `ssh://$SERVER/$PATH` for 71 | remote repositories if ssh access is configured. 72 | 73 | * BUPSTASH_REPOSITORY_COMMAND: 74 | A command to run to connect to an instance of bupstash-serve(1). This 75 | allows more complex connections to the repository for less common use cases. 76 | 77 | * BUPSTASH_TO_REPOSITORY: 78 | The repository to sync items to. May be of the form `ssh://$SERVER/$PATH` for 79 | remote repositories if ssh access is configured. 80 | 81 | * BUPSTASH_TO_REPOSITORY_COMMAND: 82 | A command to run to connect to an instance of bupstash-serve(1). This 83 | allows more complex connections to the repository for less common use cases. 84 | 85 | * BUPSTASH_KEY: 86 | Path to a primary key that will be used for decrypting data and metadata. 87 | 88 | * BUPSTASH_KEY_COMMAND: 89 | A command to run that must print the key data, can be used instead of BUPSTASH_KEY 90 | to fetch the key from arbitrary locations such as the network or other secret storage. 91 | 92 | * BUPSTASH_QUERY_CACHE: 93 | Path to the query cache file to use. 94 | 95 | ## EXAMPLES 96 | 97 | ### Push all items from a local repository to a remote repository 98 | 99 | ``` 100 | $ bupstash sync --repository ./local-repository --to ssh://$REMOTE 101 | ``` 102 | 103 | ### Perform a backup locally then sync a copy to a remote repository 104 | 105 | ``` 106 | $ export BUPSTASH_REPOSITORY=./local-repository 107 | $ id="$(bupstash put ./some-files)" 108 | $ bupstash sync --to ssh://$REMOTE id="$id" 109 | ``` 110 | 111 | ## SEE ALSO 112 | 113 | bupstash(1), bupstash-query-language(7) 114 | -------------------------------------------------------------------------------- /doc/man/bupstash.1.md: -------------------------------------------------------------------------------- 1 | bupstash(1) 2 | =========== 3 | 4 | ## SYNOPSIS 5 | 6 | Bupstash encrypted and deduplicated backups. 7 | 8 | Run one of the following `bupstash` subcommands. 9 | 10 | `bupstash init ...`
11 | `bupstash new-key ...`
12 | `bupstash new-sub-key ...`
13 | `bupstash put ...`
14 | `bupstash list ...`
15 | `bupstash list-contents ...`
16 | `bupstash diff ...`
17 | `bupstash get ...`
18 | `bupstash restore ...`
19 | `bupstash rm ...`
20 | `bupstash recover-removed ...`
21 | `bupstash gc ...`
22 | `bupstash sync ...`
23 | `bupstash exec-with-locks ...`
24 | `bupstash serve ...`
25 | `bupstash help ...`
26 | `bupstash version ...`
27 | 28 | ## DESCRIPTION 29 | 30 | ```bupstash``` is a tool for storing (and retrieving) 31 | files and data in an encrypted bupstash-repository(7). 32 | 33 | Some notable features of ```bupstash``` include: 34 | 35 | * Automatic deduplication of stored data. 36 | * Client side encryption of data. 37 | * Incremental file uploads. 38 | * A tag based query language. 39 | * Optional role based encryption and decryption key separation. 40 | * Remote repositories over ssh ssh. 41 | * Optional, per ssh key access repository controls. 42 | * A multi layered approach to security. 43 | 44 | The ```bupstash``` tool itself is divided into subcommands 45 | that each have their own documentation. 46 | 47 | ## SUBCOMMANDS 48 | 49 | * bupstash-init(1): 50 | Initialize a bupstash repository. 51 | * bupstash-new-key(1): 52 | Create a new primary key for creating/reading repository items. 53 | * bupstash-new-sub-key(1): 54 | Derive a sub key for a subset of operations. 55 | * bupstash-put(1): 56 | Add data to a bupstash repository. 57 | * bupstash-get(1): 58 | Fetch data from the bupstash repository matching a query. 59 | * bupstash-restore(1): 60 | Restore a snapshot into a local directory. 61 | * bupstash-list(1): 62 | List repository items matching a given query. 63 | * bupstash-list-contents(1): 64 | List directory snapshot contents. 65 | * bupstash-diff(1): 66 | Diff snapshot contents. 67 | * bupstash-rm(1): 68 | Remove repository items matching a given query. 69 | * bupstash-recover-removed(1): 70 | Recover removed items that are pending garbage collection. 71 | * bupstash-gc(1): 72 | Reclaim diskspace in a repository. 73 | * bupstash-sync(1): 74 | Sync items between repositories. 75 | * bupstash-exec-with-locks(1): 76 | Exec a command with exclusive locks held on the repository. 77 | * bupstash-serve(1): 78 | Serve a repository over stdin/stdout using the bupstash-protocol(7). 79 | 80 | ## EXAMPLES 81 | 82 | 83 | ### Initialize a repository and create keys 84 | ``` 85 | $ bupstash init -r ssh://$SERVER/home/me/backups 86 | $ bupstash new-key -o backups.key 87 | ``` 88 | 89 | ### Tell bupstash to use our repository and key by default 90 | 91 | ``` 92 | $ export BUPSTASH_REPOSITORY=ssh://$SERVER/home/me/backups 93 | $ export BUPSTASH_KEY=backups.key 94 | ``` 95 | 96 | ### Directory snapshots 97 | 98 | ``` 99 | $ bupstash put ./some-data 100 | ebb66f3baa5d432e9f9a28934888a23d 101 | 102 | $ bupstash list-contents id=ebb66f3baa5d432e9f9a28934888a23d 103 | drwxr-xr-x 0 2020/11/05 10:42:48 . 104 | -rw-r--r-- 177B 2020/07/12 17:13:42 data.txt 105 | ``` 106 | 107 | ### List items matching a query 108 | 109 | ``` 110 | $ bupstash list hostname=$(hostname) 111 | id="bcb8684e6bf5cb453e77486decf61685" name="some-file.txt" hostname="my-server" timestamp="2020-07-27 11:26:16" 112 | ... 113 | ``` 114 | 115 | ### Incremental uploads 116 | 117 | ``` 118 | $ bupstash put --send-log /var/backup.sendlog ./some-data 119 | ebb66f3baa5d432e9f9a28934888a23d 120 | 121 | # Second backup is much faster when it reads the send log. 122 | $ bupstash put --send-log /var/backup.sendlog ./some-data 123 | ebb66f3baa5d432e9f9a28934888a23d 124 | ``` 125 | 126 | ### Capture and save command output 127 | 128 | ``` 129 | # Checks for errors before saving new item. 130 | $ bupstash put --exec name=database.sql pgdump mydatabase 131 | 14ebd2073b258b1f55c5bbc889c49db4 132 | ``` 133 | 134 | ### Get an item matching a query 135 | ``` 136 | $ bupstash get id=bcb8684e6bf5cb453e77486decf61685 137 | some data. 138 | ``` 139 | 140 | ### Restore a directory to a previous snapshot 141 | 142 | ``` 143 | $ bupstash restore --to ./dir name=dir.tar 144 | ``` 145 | 146 | ### Remove items matching a query. 147 | ``` 148 | $ bupstash rm name=some-data.txt 149 | ``` 150 | 151 | ### Wipe a repository 152 | 153 | ``` 154 | $ bupstash rm --allow-many id=* 155 | ``` 156 | 157 | ### Reclaim disk space 158 | ``` 159 | $ bupstash gc 160 | ``` 161 | 162 | ### Offline decryption keys 163 | ``` 164 | # Create a key, a put only key, and a metadata (list/rm only) key. 165 | $ bupstash new-key -o backups.key 166 | $ bupstash new-sub-key --put -k backups.key -o backups-put.key 167 | $ bupstash new-sub-key --list -k backups.key -o backups-metadata.key 168 | 169 | ... Copy backups.key to secure offline storage ... 170 | 171 | # Remove primary key 172 | $ shred backups.key 173 | 174 | $ bupstash put -k backups-put.key ./data 175 | 14ebd2073b258b1f55c5bbc889c49db4 176 | 177 | ... When you need to list or remove backups, you may use the metadata key ... 178 | 179 | $ bupstash list -k backups-metadata.key 180 | ... 181 | $ bupstash rm -k backups-metadata.key 182 | 183 | ... After emergency, get decryption key from offline storage ... 184 | 185 | # Restore by getting an item and decrypting it using the decryption key. 186 | $ bupstash get -k backups.key id=14ebd2073b258b1f55c5bbc889c49db4 | tar -C ./restore -xf - 187 | ``` 188 | 189 | ## SEE ALSO 190 | 191 | bupstash-repository(7), bupstash-keyfiles(7) 192 | -------------------------------------------------------------------------------- /doc/technical_overview.md: -------------------------------------------------------------------------------- 1 | # High level implementation overview 2 | 3 | ## What does bupstash do? 4 | 5 | Bupstash ingests arbitrary data streams, deduplicates, encrypts and saves them in a local or remote repository. 6 | Bupstash also can convert filesystems on disk into a data stream transparently for the user. 7 | 8 | The bupstash repository contains very little unencrypted data, it stores only encrypted data chunks, and encrypted 9 | metadata. 10 | 11 | Bupstash does not transmit decryption keys to the server at any point. 12 | 13 | ## Deduplication 14 | 15 | - Bupstash splits an input stream into data chunks less than 8 MiB. 16 | - Each chunk has a keyed blake3 hash computed, this is the address of the chunk. 17 | - Previous sends are tracked in the client side 'send log', an sqlite database, backing up the same data 18 | twice in a row only transmits new data chunks. 19 | - If the server sees repeat hash address, it does not persist the repeat data either. 20 | 21 | Quality of deduplication depends on how we split the data stream into chunks. 22 | We want our data chunks to be resilient to byte insertions or removals, so we use 23 | a rolling hash function to identify common split points between upload sessions. 24 | 25 | We currently use a rolling hash function called 'gear hash'. It hashes a 32 byte rolling window on 26 | the data stream and we form a new chunk if the gear hash matches an 'interestingness' property (see rollsum.rs for details). 27 | 28 | ## Encryption 29 | 30 | - We use libsodium cryptobox to encrypt each data chunk. 31 | - Each upload session encrypts chunks with an ephemeral public/private key pair. 32 | - The encryption is addressed at the private portion of the decryption key, think of this like 33 | sending an encrypted email to someone when you know their public key. 34 | - Each chunk has the ephemeral public key attached such that the session key can be derived 35 | by the master key. 36 | - A bupstash key is actually multiple libsodium key pairs and some preshared secrets, allowing us to divide decryption 37 | capabilities amongst sub keys. 38 | - We also encrypt metadata before sending it to the repository in an append only log. 39 | - Client side query works by syncing the metadata log then decrypting it client side. 40 | 41 | ## Hash tree 42 | 43 | When uploading data streams larger than a single chunk, we must group them. To do this we 44 | form a merkle tree, only the leaf data nodes are encrypted. 45 | 46 | - Each non leaf chunk in the hash tree is simply a list of addresses and an chunk count bread crumb for random access. 47 | - Because the hash tree is mostly unencrypted, server can push stream the tree. 48 | - Because the hash tree is mostly unencrypted, server can perform garbage collection. 49 | - Data is still encrypted so server only knows approximate data stream size. 50 | 51 | ## Content index 52 | 53 | Pure data streams are not enough to allow an efficient file 'browsing' user interface, to 54 | solve this, each data stream has an optional auxillary index data stream. 55 | The index is a hash tree containing an efficient index of the data. 56 | 57 | - A client can fetch and decrypt the index quickly. 58 | - The index allows partial data requests of the files within a data stream. 59 | - When the user requests a data stream, we first check if there is an index, 60 | if there is, we synthesize a tarball stream client side out of the index and data stream. 61 | 62 | ## Stat cache 63 | 64 | When converting a filesystem to a data stream and index, we can cache the hashes of a given 65 | file/directory based on stat information, allowing us to skip the expensive compression and encryption step. 66 | This cache information is stored in the send log. 67 | 68 | ## Repository Garbage collection 69 | 70 | - Garbage collection is a partially concurrent mark and sweep collector. 71 | - Garbage collections invalidate client side caches, except for when the client checks if a previous backup item still exists. 72 | -------------------------------------------------------------------------------- /doc/upcoming_changelog.md: -------------------------------------------------------------------------------- 1 | # Bupstash v0.12.0 2 | 3 | We are proud to bring you the next iteration of bupstash, this update contains a large amount of 4 | performance work as well as important bug fixes. 5 | 6 | As an example of the potential performance improvements, here is a synthetic benchmark snapshotting the linux 7 | kernel source code on a development machine: 8 | 9 | ``` 10 | Benchmark 1: bupstash-0.11.1 put --no-send-log /tmp/linux 11 | Time (mean ± σ): 5.885 s ± 0.091 s [User: 6.099 s, System: 1.766 s] 12 | Range (min … max): 5.701 s … 6.016 s 10 runs 13 | 14 | Benchmark 2: bupstash-0.12.0 put --no-send-log /tmp/linux 15 | Time (mean ± σ): 1.884 s ± 0.014 s [User: 7.334 s, System: 1.340 s] 16 | Range (min … max): 1.862 s … 1.908 s 10 runs 17 | 18 | Summary 19 | 'bupstash-0.12.0 put --no-send-log /tmp/linux' ran 20 | 3.12 ± 0.05 times faster than 'bupstash put --no-send-log /tmp/linux' 21 | ``` 22 | 23 | You read that right, a 3x speed improvement! It is also important to keep in mind bupstash is often limited 24 | by disk, network and send-log speeds rather than the cpu and ram performance that this benchmark measures. 25 | 26 | ## New features 27 | 28 | - The deduplication rolling hash algorithm has been improved and is now 30 to 50 percent faster. 29 | - Those using a nightly rust compiler can enable SIMD (even faster) rolling hash implementations. 30 | - A multithreaded `bupstash put` implementation has been added that can read, hash, compress, encrypt files in parallel. 31 | - Bupstash diff and bupstash restore can now use multiple threads when computing changes on the local disk. 32 | - New flags have been added to `bupstash put` and other commands to tune how bupstash uses cpu threads. 33 | 34 | ## Notable Bug fixes 35 | 36 | - A bug that caused `bupstash serve` to not detect io errors in certain situations has been fixed. 37 | Only the server side needs to be updated, so users of bupstash.io managed repositories do not need to take 38 | further action. 39 | 40 | ## Incompatibilities 41 | 42 | - It is likely your repositories will grow in size if they contain data chunks from previous 43 | versions of bupstash, this can be fixed by cycling older data out over time, or recreating your backups. 44 | - Because the chunks generated by bupstash will change, expect increased bandwidth usage until 45 | the new chunks are uploaded. 46 | - bupstash is now biased to produce 2 MiB data chunks on average instead of 1 MiB chunks (in the future this may become configurable). 47 | 48 | ## Supporting bupstash 49 | 50 | Bupstash.io managed repositories are in open beta and anyone can create an account. 51 | If you enjoy bupstash then please consider creating a managed repository at https://bupstash.io/managed.html 52 | to support the project. 53 | 54 | Another great way to help the project is to just tell your friends to give bupstash a try. -------------------------------------------------------------------------------- /src/abloom.rs: -------------------------------------------------------------------------------- 1 | use super::address; 2 | use std::convert::TryInto; 3 | 4 | // ABloom is a bloom filter specialized for addresses by taking advantage of the 5 | // fact that addresses are already randomly distributed. 6 | // 7 | // See https://en.wikipedia.org/wiki/Bloom_filter 8 | 9 | #[derive(Debug, PartialEq, Eq)] 10 | pub struct ABloom { 11 | nbits: u64, 12 | bytes: Vec, 13 | } 14 | 15 | // k is the number of hash functions in the bloom filter. 16 | const K: usize = 4; 17 | 18 | fn count_set_bits(bytes: &[u8]) -> u64 { 19 | let mut n: u64 = 0; 20 | for b in bytes.iter() { 21 | n += b.count_ones() as u64; 22 | } 23 | n 24 | } 25 | 26 | pub fn approximate_mem_size_upper_bound(false_postive_rate: f64, num_addrs: u64) -> usize { 27 | // see wiki: Optimal number of hash functions... 28 | // > Goel and Gupta,[9] however, give a rigorous upper bound 29 | // > that makes no approximations and requires no assumptions. 30 | // false_positives = (1 - e ^ (-k*n/m))^k 31 | // If we rearrange we get: 32 | // m = -k*n/ln(1 - root(k, false_positives)) 33 | let k = K as f64; 34 | let n = num_addrs as f64; 35 | let e = false_postive_rate; 36 | let m = (-k * n) / ((1.0 - e.powf(1.0 / k)).ln()); 37 | (m / 8.0) as usize // bits to bytes. 38 | } 39 | 40 | impl ABloom { 41 | pub fn new(mut mem_size: usize) -> ABloom { 42 | if mem_size == 0 { 43 | mem_size = 1; 44 | } 45 | 46 | ABloom { 47 | nbits: (mem_size as u64) * 8, 48 | bytes: vec![0; mem_size], 49 | } 50 | } 51 | 52 | pub fn from_bytes(bytes: Vec) -> ABloom { 53 | ABloom { 54 | nbits: (bytes.len() as u64) * 8, 55 | bytes, 56 | } 57 | } 58 | 59 | pub fn mem_size(&self) -> usize { 60 | self.bytes.len() 61 | } 62 | 63 | pub fn borrow_bytes(&self) -> &[u8] { 64 | &self.bytes 65 | } 66 | 67 | pub fn num_bits(&self) -> u64 { 68 | self.nbits 69 | } 70 | 71 | pub fn count_set_bits(&self) -> u64 { 72 | count_set_bits(&self.bytes) 73 | } 74 | 75 | pub fn utilization(&self) -> f64 { 76 | let n = count_set_bits(&self.bytes); 77 | (n as f64) / (self.nbits as f64) 78 | } 79 | 80 | // Like utilization but uses a small sample 81 | // to become a constant time operation. 82 | pub fn estimate_utilization(&self) -> f64 { 83 | const SAMPLE_ESTIMATE_BYTES: usize = 1024 * 1024; 84 | let sample_size = std::cmp::min(SAMPLE_ESTIMATE_BYTES, self.bytes.len()); 85 | let n = count_set_bits(&self.bytes[0..sample_size]); 86 | (n as f64) / ((sample_size * 8) as f64) 87 | } 88 | 89 | pub fn estimate_false_positive_rate(&self) -> f64 { 90 | const N: u64 = 10000; 91 | let mut false_positives = 0; 92 | for _i in 0..N { 93 | if self.probably_has(&address::Address::random()) { 94 | false_positives += 1; 95 | } 96 | } 97 | (false_positives as f64) / (N as f64) 98 | } 99 | 100 | pub fn estimate_add_count(&self) -> f64 { 101 | let m = self.nbits as f64; 102 | let x = self.count_set_bits() as f64; 103 | let k = K as f64; 104 | //Refer to bloom filter wiki: Approximating the number of items in a Bloom filter. 105 | (-m / k) * (1.0 - (x / m)).ln() 106 | } 107 | 108 | pub fn add(&mut self, addr: &address::Address) { 109 | for i in 0..K { 110 | let offset_buf = addr.bytes[i * 8..(i * 8 + 8)].try_into().unwrap(); 111 | let bit_offset: u64 = u64::from_le_bytes(offset_buf) % self.nbits; 112 | let shift = bit_offset & 7; 113 | let byte_offset: usize = ((bit_offset & !7) >> 3).try_into().unwrap(); 114 | self.bytes[byte_offset] |= 1 << shift; 115 | } 116 | } 117 | 118 | pub fn probably_has(&self, addr: &address::Address) -> bool { 119 | for i in 0..K { 120 | let offset_buf = addr.bytes[i * 8..(i * 8 + 8)].try_into().unwrap(); 121 | let bit_offset: u64 = u64::from_le_bytes(offset_buf) % self.nbits; 122 | let shift = bit_offset & 7; 123 | let byte_offset: usize = ((bit_offset & !7) >> 3).try_into().unwrap(); 124 | if (self.bytes[byte_offset] & (1 << shift)) == 0 { 125 | return false; 126 | } 127 | } 128 | true 129 | } 130 | } 131 | 132 | #[cfg(test)] 133 | mod tests { 134 | use super::super::address; 135 | use super::super::crypto; 136 | use super::*; 137 | 138 | #[test] 139 | fn test_abloom() { 140 | crypto::init(); 141 | 142 | let mut abloom = ABloom::new(8 * 1024 * 1024); 143 | 144 | for _i in 0..10000 { 145 | let addr = address::Address::random(); 146 | abloom.add(&addr); 147 | assert!(abloom.probably_has(&addr)); 148 | } 149 | } 150 | 151 | #[test] 152 | fn test_approximate_mem_size() { 153 | crypto::init(); 154 | for n in [20000, 100000].iter() { 155 | for p in [0.01, 0.05, 0.1, 0.5].iter() { 156 | let mut abloom = ABloom::new(approximate_mem_size_upper_bound(*p, *n)); 157 | 158 | for _i in 0..*n { 159 | let addr = address::Address::random(); 160 | abloom.add(&addr); 161 | assert!(abloom.probably_has(&addr)); 162 | } 163 | 164 | let estimated_false_positives = abloom.estimate_false_positive_rate(); 165 | let prediction_delta = *p - estimated_false_positives; 166 | 167 | eprintln!("n={}", n); 168 | eprintln!("p={}", p); 169 | eprintln!("mem_size={}", abloom.mem_size()); 170 | eprintln!( 171 | "estimated_false_positive_rate={}", 172 | estimated_false_positives, 173 | ); 174 | eprintln!("estimated_add_count={}", abloom.estimate_add_count()); 175 | eprintln!("utilization={}", abloom.utilization()); 176 | eprintln!("estimated_utilization={}", abloom.estimate_utilization()); 177 | eprintln!("prediction_delta={}", prediction_delta); 178 | // This test relies on probabilities to pass, if it is flaky, we can tune it. 179 | assert!(prediction_delta < 0.020); 180 | } 181 | } 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /src/acache.rs: -------------------------------------------------------------------------------- 1 | use super::address::*; 2 | use std::convert::TryInto; 3 | 4 | // ACache is the 'address cache' designed to let bupstash 5 | // efficiently skip operations on addresses while also 6 | // keeping a bound on memory use. 7 | // 8 | // The current implementation is a direct mapped cache. On hash collision 9 | // a value simply evicts the existing value. We could use something fancier like an lru, 10 | // but we need benchmarks to show it improves anything over such a simple implementaion. 11 | 12 | pub struct ACache { 13 | dm_cache_ents: Vec
, 14 | pub add_count: u64, 15 | pub hit_count: u64, 16 | } 17 | 18 | impl ACache { 19 | pub fn new(cache_ents: usize) -> ACache { 20 | ACache { 21 | dm_cache_ents: vec![Address::from_bytes(&[0; ADDRESS_SZ]); cache_ents], 22 | add_count: 0, 23 | hit_count: 0, 24 | } 25 | } 26 | 27 | pub fn add(&mut self, addr: &Address) -> bool { 28 | self.add_count += 1; 29 | let offset_buf = addr.bytes[0..8].try_into().unwrap(); 30 | let offset: u64 = u64::from_le_bytes(offset_buf) % (self.dm_cache_ents.len() as u64); 31 | let mut tmp = *addr; 32 | std::mem::swap( 33 | &mut tmp, 34 | self.dm_cache_ents.get_mut(offset as usize).unwrap(), 35 | ); 36 | let new_val = tmp != *addr; 37 | if !new_val { 38 | self.hit_count += 1; 39 | } 40 | new_val 41 | } 42 | 43 | pub fn utilization(&self) -> f64 { 44 | let mut utilized = 0; 45 | for a in self.dm_cache_ents.iter() { 46 | if a.bytes != [0; ADDRESS_SZ] { 47 | utilized += 1 48 | } 49 | } 50 | (utilized as f64) / (self.dm_cache_ents.len() as f64) 51 | } 52 | } 53 | 54 | #[cfg(test)] 55 | mod tests { 56 | use super::super::crypto; 57 | use super::*; 58 | 59 | #[test] 60 | fn test_awcache() { 61 | crypto::init(); 62 | 63 | let mut cache = ACache::new(4 * 1024); 64 | 65 | let addresses: Vec
= (0..1000).map(|_| Address::random()).collect(); 66 | 67 | for a in addresses.iter() { 68 | cache.add(&a); 69 | assert!(!cache.add(&a)); 70 | } 71 | 72 | cache.hit_count = 0; 73 | cache.add_count = 0; 74 | 75 | for a in addresses.iter() { 76 | cache.add(&a); 77 | } 78 | 79 | assert!(cache.hit_count != 0); 80 | eprintln!("cache hit_count: {}/{}", cache.hit_count, cache.add_count); 81 | eprintln!("cache utilization: {}", cache.utilization()); 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /src/address.rs: -------------------------------------------------------------------------------- 1 | use super::crypto; 2 | use super::hex; 3 | use serde::{Deserialize, Serialize}; 4 | use std::convert::TryInto; 5 | use std::fmt; 6 | 7 | pub const ADDRESS_SZ: usize = 32; 8 | 9 | #[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Clone, Copy, Debug)] 10 | pub struct Address { 11 | pub bytes: [u8; ADDRESS_SZ], 12 | } 13 | 14 | impl fmt::Display for Address { 15 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 16 | write!(f, "{}", self.as_hex_addr()) 17 | } 18 | } 19 | 20 | impl Address { 21 | pub fn random() -> Address { 22 | let mut bytes = [0; ADDRESS_SZ]; 23 | crypto::randombytes(&mut bytes); 24 | Address { bytes } 25 | } 26 | 27 | pub fn from_bytes(bytes: &[u8; 32]) -> Address { 28 | Address { bytes: *bytes } 29 | } 30 | 31 | pub fn from_slice(s: &[u8]) -> Result { 32 | Ok(Address { 33 | bytes: s.try_into()?, 34 | }) 35 | } 36 | 37 | pub fn from_hex_str(s: &str) -> Result { 38 | if s.len() != ADDRESS_SZ * 2 { 39 | anyhow::bail!("invalid address '{}', length is not {} ", s, ADDRESS_SZ * 2); 40 | } 41 | let mut a = Address::default(); 42 | hex::decode_string(s, &mut a.bytes)?; 43 | Ok(a) 44 | } 45 | 46 | pub fn as_hex_addr(&self) -> HexAddress { 47 | let mut result = HexAddress::default(); 48 | hex::encode(&self.bytes, &mut result.bytes); 49 | result 50 | } 51 | } 52 | 53 | impl fmt::LowerHex for Address { 54 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 55 | let mut buf = [0; 64]; 56 | hex::encode(&self.bytes[..], &mut buf[..]); 57 | write!(f, "{}", std::str::from_utf8(&buf[..]).unwrap()) 58 | } 59 | } 60 | 61 | impl Default for Address { 62 | fn default() -> Address { 63 | Address::from_bytes(&[0; ADDRESS_SZ]) 64 | } 65 | } 66 | 67 | pub struct HexAddress { 68 | bytes: [u8; ADDRESS_SZ * 2], 69 | } 70 | 71 | impl<'a> HexAddress { 72 | pub fn as_str(&'a self) -> &'a str { 73 | std::str::from_utf8(&self.bytes).unwrap() 74 | } 75 | } 76 | 77 | impl fmt::Display for HexAddress { 78 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 79 | write!(f, "{}", std::str::from_utf8(&self.bytes).unwrap()) 80 | } 81 | } 82 | 83 | impl Default for HexAddress { 84 | fn default() -> HexAddress { 85 | HexAddress { 86 | bytes: [b'0'; ADDRESS_SZ * 2], 87 | } 88 | } 89 | } 90 | 91 | // Convert a slice of addresses to a slice of bytes without any copying. 92 | pub fn addresses_to_bytes(addresses: &[Address]) -> &[u8] { 93 | assert!(std::mem::size_of::
() == ADDRESS_SZ); 94 | let n_bytes = addresses.len() * ADDRESS_SZ; 95 | unsafe { std::slice::from_raw_parts(addresses.as_ptr() as *const u8, n_bytes) } 96 | } 97 | 98 | // Convert a slice of addresses to a slice of bytes without any copying. 99 | // panics if alignment is wrong. 100 | pub fn bytes_to_addresses(bytes: &[u8]) -> &[Address] { 101 | // We rely on alignment, flag any places our assumption is not true. 102 | assert!(((bytes.as_ptr() as usize) & (std::mem::align_of::
() - 1)) == 0); 103 | assert!(std::mem::size_of::
() == ADDRESS_SZ); 104 | let n_addresses = bytes.len() / ADDRESS_SZ; 105 | unsafe { std::slice::from_raw_parts(bytes.as_ptr() as *const Address, n_addresses) } 106 | } 107 | 108 | #[cfg(test)] 109 | mod tests { 110 | use super::*; 111 | 112 | #[test] 113 | fn test_addr_to_hex_addr() { 114 | assert!(Address::default().as_hex_addr().bytes[..] == HexAddress::default().bytes[..]); 115 | } 116 | 117 | #[test] 118 | fn test_addresses_to_bytes() { 119 | let v = vec![Address::default()]; 120 | let s = addresses_to_bytes(&v); 121 | assert_eq!(Address::from_slice(s).unwrap(), v[0]) 122 | } 123 | 124 | #[test] 125 | fn test_bytes_to_addresses() { 126 | // Try to create an poorly unaligned allocation if it is 127 | // possible on the current platform. 128 | for _i in 0..100 { 129 | let bytes = [0; 64]; 130 | let mut b = Vec::new(); 131 | b.extend_from_slice(&bytes[..]); 132 | let s = bytes_to_addresses(&b); 133 | assert_eq!(Address::default(), s[0]) 134 | } 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /src/base64.rs: -------------------------------------------------------------------------------- 1 | use super::sodium; 2 | 3 | // We use libsodium base64 as it removes some dependencies 4 | // as we already have a hard dependency on libsodium. 5 | 6 | pub fn encode(buf: &[u8]) -> String { 7 | let max_out_len = unsafe { 8 | sodium::sodium_base64_encoded_len(buf.len(), sodium::sodium_base64_VARIANT_ORIGINAL as i32) 9 | }; 10 | 11 | let mut out_buf = vec![0; max_out_len]; 12 | 13 | unsafe { 14 | assert!(!sodium::sodium_bin2base64( 15 | out_buf.as_mut_ptr() as *mut std::os::raw::c_char, 16 | out_buf.len(), 17 | buf.as_ptr(), 18 | buf.len(), 19 | sodium::sodium_base64_VARIANT_ORIGINAL as i32, 20 | ) 21 | .is_null()) 22 | }; 23 | 24 | match out_buf.iter().position(|&v| v == 0) { 25 | Some(idx) => { 26 | out_buf.truncate(idx); 27 | } 28 | None => { 29 | panic!(); 30 | } 31 | } 32 | 33 | String::from_utf8(out_buf).unwrap() 34 | } 35 | 36 | pub fn decode(data: &str) -> Option> { 37 | let mut out_len = 0; 38 | let mut out_buf = vec![0; data.len()]; 39 | 40 | let rc = unsafe { 41 | sodium::sodium_base642bin( 42 | out_buf.as_mut_ptr(), 43 | out_buf.len(), 44 | data.as_ptr() as *const std::os::raw::c_char, 45 | data.len(), 46 | std::ptr::null(), 47 | &mut out_len as *mut usize, 48 | std::ptr::null_mut::<*const std::os::raw::c_char>(), 49 | sodium::sodium_base64_VARIANT_ORIGINAL as i32, 50 | ) 51 | }; 52 | 53 | if rc == 0 { 54 | assert!(out_len <= out_buf.len()); 55 | out_buf.truncate(out_len); 56 | Some(out_buf) 57 | } else { 58 | None 59 | } 60 | } 61 | 62 | #[cfg(test)] 63 | mod tests { 64 | use super::*; 65 | 66 | #[test] 67 | fn test_encode() { 68 | assert_eq!(encode(b""), ""); 69 | assert_eq!(encode(b"a"), "YQ=="); 70 | assert_eq!(encode(b"ab"), "YWI="); 71 | assert_eq!(encode(b"abc"), "YWJj"); 72 | assert_eq!(encode(b"abcd"), "YWJjZA=="); 73 | assert_eq!(encode(b"abcde"), "YWJjZGU="); 74 | } 75 | 76 | #[test] 77 | fn test_decode() { 78 | assert_eq!(decode("").unwrap(), b""); 79 | assert_eq!(decode("YQ==").unwrap(), b"a"); 80 | assert_eq!(decode("YWI=").unwrap(), b"ab"); 81 | assert_eq!(decode("YWJj").unwrap(), b"abc"); 82 | assert_eq!(decode("YWJjZA==").unwrap(), b"abcd"); 83 | assert_eq!(decode("YWJjZGU=").unwrap(), b"abcde"); 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/chunk_storage.rs: -------------------------------------------------------------------------------- 1 | use super::abloom; 2 | use super::address::*; 3 | use super::protocol; 4 | use super::repository; 5 | use super::xid; 6 | 7 | pub trait Engine { 8 | // Get many chunks in an efficient pipeline. 9 | #[allow(clippy::type_complexity)] 10 | fn pipelined_get_chunks( 11 | &mut self, 12 | addresses: &[Address], 13 | on_chunk: &mut dyn FnMut(&Address, &[u8]) -> Result<(), anyhow::Error>, 14 | ) -> Result<(), anyhow::Error>; 15 | 16 | // Get a chunk from the storage engine. 17 | fn get_chunk(&mut self, addr: &Address) -> Result, anyhow::Error>; 18 | 19 | // Set the gc_id for the following call to sweep. This is a form 20 | // of two phase commit where we ensure the backend saves this 21 | // id so we can later check if it has completed. 22 | fn prepare_for_sweep(&mut self, gc_id: xid::Xid) -> Result<(), anyhow::Error>; 23 | 24 | // Remove all chunks not in the reachable set. 25 | fn sweep( 26 | &mut self, 27 | update_progress_msg: &mut dyn FnMut(String) -> Result<(), anyhow::Error>, 28 | reachable: abloom::ABloom, 29 | ) -> Result; 30 | 31 | // Check that a previous invocation of sweep has finished. 32 | fn sweep_completed(&mut self, gc_id: xid::Xid) -> Result; 33 | 34 | // Add a chunk, potentially asynchronously. Does not overwrite existing 35 | // chunks with the same name to protect historic items from corruption. 36 | // The write is not guaranteed to be completed until 37 | // after a call to Engine::sync completes without error. 38 | fn add_chunk(&mut self, addr: &Address, buf: Vec) -> Result<(), anyhow::Error>; 39 | 40 | // Filter a list of chunk addresses removing any that already exist in the repository. 41 | // This function is often called in very large batches so requires the backend to periodically 42 | // report progress, the argument to on_progress is how many addresses have been processed since the last 43 | // progress report. 44 | fn filter_existing_chunks( 45 | &mut self, 46 | on_progress: &mut dyn FnMut(u64) -> Result<(), anyhow::Error>, 47 | addr: Vec
, 48 | ) -> Result, anyhow::Error>; 49 | 50 | // A write barrier, any previously added chunks are only guaranteed to be 51 | // in stable storage after a call to flush has returned. A backend 52 | // can use this to implement concurrent background writes. 53 | fn flush(&mut self) -> Result; 54 | 55 | // Estimate how many chunks we have stored, the implementation is free to 56 | // make a rough guess to increase performance. One trick is sampling 57 | // a single address prefix. 58 | fn estimate_chunk_count(&mut self) -> Result; 59 | } 60 | -------------------------------------------------------------------------------- /src/chunker.rs: -------------------------------------------------------------------------------- 1 | use super::rollsum::{FastGearHasher, GearTab, RollsumSplitter}; 2 | 3 | #[derive(Clone)] 4 | pub struct RollsumChunker { 5 | rs: FastGearHasher, 6 | min_sz: usize, 7 | max_sz: usize, 8 | default_chunk_capacity: usize, 9 | cur_vec: Vec, 10 | } 11 | 12 | impl RollsumChunker { 13 | pub fn new(gear_tab: GearTab, mut min_sz: usize, mut max_sz: usize) -> RollsumChunker { 14 | if min_sz == 0 { 15 | min_sz = 1 16 | } 17 | if max_sz < min_sz { 18 | max_sz = min_sz 19 | } 20 | let default_chunk_capacity = max_sz / 2; 21 | RollsumChunker { 22 | rs: FastGearHasher::new(gear_tab), 23 | min_sz, 24 | max_sz, 25 | default_chunk_capacity, 26 | cur_vec: Vec::with_capacity(default_chunk_capacity), 27 | } 28 | } 29 | 30 | fn spare_capacity(&self) -> usize { 31 | self.cur_vec.capacity() - self.cur_vec.len() 32 | } 33 | 34 | fn swap_vec(&mut self) -> Vec { 35 | let mut v = Vec::with_capacity(self.default_chunk_capacity); 36 | std::mem::swap(&mut v, &mut self.cur_vec); 37 | v 38 | } 39 | 40 | pub fn add_bytes(&mut self, buf: &[u8]) -> (usize, Option>) { 41 | let mut n_bytes = buf.len(); 42 | 43 | if (n_bytes + self.cur_vec.len()) > self.max_sz { 44 | let overshoot = (n_bytes + self.cur_vec.len()) - self.max_sz; 45 | n_bytes -= overshoot; 46 | } 47 | 48 | if self.spare_capacity() < n_bytes { 49 | let mut growth = (self.max_sz / 3).max(1); 50 | if self.cur_vec.capacity() + growth > self.max_sz { 51 | growth = self.max_sz - self.cur_vec.capacity(); 52 | } 53 | self.cur_vec.reserve(growth); 54 | n_bytes = std::cmp::min(self.spare_capacity(), n_bytes); 55 | } 56 | 57 | // None of the bytes we are adding will count towards the 58 | // next chunk, simply add them all, the bytes don't matter 59 | // as we will cycle WINDOW_SIZE too. 60 | if let Some(window_size) = self.rs.window_size() { 61 | if self.min_sz >= window_size 62 | && (self.cur_vec.len() + n_bytes < (self.min_sz - window_size)) 63 | { 64 | self.cur_vec.extend_from_slice(&buf[0..n_bytes]); 65 | return (n_bytes, None); 66 | } 67 | } 68 | 69 | match self.rs.roll_bytes(&buf[0..n_bytes]) { 70 | Some(split) => { 71 | self.cur_vec.extend_from_slice(&buf[0..split]); 72 | if self.cur_vec.len() < self.min_sz { 73 | (split, None) 74 | } else { 75 | (split, Some(self.swap_vec())) 76 | } 77 | } 78 | None => { 79 | self.cur_vec.extend_from_slice(&buf[0..n_bytes]); 80 | if self.cur_vec.len() == self.max_sz { 81 | (n_bytes, Some(self.swap_vec())) 82 | } else { 83 | (n_bytes, None) 84 | } 85 | } 86 | } 87 | } 88 | 89 | pub fn buffered_count(&mut self) -> usize { 90 | self.cur_vec.len() 91 | } 92 | 93 | pub fn force_split(&mut self) -> Option> { 94 | self.rs.reset(); 95 | let v = self.swap_vec(); 96 | if v.is_empty() { 97 | None 98 | } else { 99 | Some(v) 100 | } 101 | } 102 | 103 | pub fn take_buffered(&mut self) -> Vec { 104 | self.rs.reset(); 105 | let mut v = Vec::new(); 106 | std::mem::swap(&mut self.cur_vec, &mut v); 107 | v 108 | } 109 | 110 | pub fn finish(self) -> Vec { 111 | self.cur_vec 112 | } 113 | } 114 | 115 | #[cfg(test)] 116 | mod tests { 117 | use super::super::rollsum::{GearTab, TEST_GEAR_TAB_DATA}; 118 | use super::*; 119 | 120 | #[test] 121 | fn test_add_bytes() { 122 | let mut ch = RollsumChunker::new(GearTab::from_array(TEST_GEAR_TAB_DATA), 1, 2); 123 | 124 | match ch.add_bytes(b"a") { 125 | (1, None) => (), 126 | v => panic!("{:?}", v), 127 | } 128 | 129 | match ch.add_bytes(b"bc") { 130 | (1, Some(v)) => assert_eq!(v, b"ab"), 131 | v => panic!("{:?}", v), 132 | } 133 | 134 | match ch.add_bytes(b"c") { 135 | (1, None) => (), 136 | v => panic!("{:?}", v), 137 | } 138 | 139 | assert_eq!(ch.finish(), b"c"); 140 | } 141 | 142 | #[test] 143 | fn test_force_split_bytes() { 144 | let mut ch = RollsumChunker::new(GearTab::from_array(TEST_GEAR_TAB_DATA), 10, 100); 145 | assert_eq!(ch.force_split(), None); 146 | ch.add_bytes(b"abc"); 147 | 148 | match ch.force_split() { 149 | Some(v) => assert_eq!(v, b"abc"), 150 | None => panic!("fail!"), 151 | } 152 | assert_eq!(ch.force_split(), None); 153 | ch.add_bytes(b"def"); 154 | assert_eq!(ch.finish(), b"def"); 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /src/cksumvfs.rs: -------------------------------------------------------------------------------- 1 | // Bindings and helpers for csrc/cksumvfs. 2 | // these functions let us add add checksums to our sqlite3 files. 3 | // For more info see: https://www.sqlite.org/cksumvfs.html 4 | 5 | extern "C" { 6 | fn cksumvfs_sqlite_version_number() -> ::std::os::raw::c_int; 7 | fn sqlite3_register_cksumvfs(unused: *const u8) -> ::std::os::raw::c_int; 8 | } 9 | 10 | pub fn register_cksumvfs() { 11 | // Because have our own copy of the sqlite3 header file, this 12 | // test ensures we are using the same header rusqlite used. 13 | assert_eq!( 14 | unsafe { cksumvfs_sqlite_version_number() }, 15 | rusqlite::version_number() 16 | ); 17 | assert_eq!( 18 | unsafe { sqlite3_register_cksumvfs(std::ptr::null()) }, 19 | rusqlite::ffi::SQLITE_OK 20 | ) 21 | } 22 | 23 | pub fn enable_sqlite_checksums(db: &rusqlite::Connection) -> Result<(), anyhow::Error> { 24 | let mut n = 8; 25 | if unsafe { 26 | rusqlite::ffi::sqlite3_file_control( 27 | db.handle(), 28 | std::ptr::null(), 29 | rusqlite::ffi::SQLITE_FCNTL_RESERVE_BYTES, 30 | (&mut n) as *mut i32 as *mut core::ffi::c_void, 31 | ) 32 | } != rusqlite::ffi::SQLITE_OK 33 | { 34 | anyhow::bail!("unable to reserve bytes for sqlite3 page checksums"); 35 | } 36 | if n != 0 && n != 8 { 37 | anyhow::bail!("database has incorrect reserve bytes for checksums"); 38 | } 39 | if n == 0 { 40 | db.execute("vacuum;", [])?; 41 | } 42 | Ok(()) 43 | } 44 | 45 | #[cfg(test)] 46 | mod tests { 47 | 48 | use super::*; 49 | 50 | #[test] 51 | fn test_cksumvfs_can_be_enabled() { 52 | let temp_dir = tempfile::tempdir().unwrap(); 53 | let path = temp_dir.path().join("test.db3"); 54 | { 55 | register_cksumvfs(); 56 | let db = rusqlite::Connection::open(&path).unwrap(); 57 | enable_sqlite_checksums(&db).unwrap(); 58 | let enabled: String = db 59 | .query_row("PRAGMA checksum_verification;", [], |r| { 60 | Ok(r.get(0).unwrap()) 61 | }) 62 | .unwrap(); 63 | assert_eq!(enabled, "1"); 64 | } 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/compression.rs: -------------------------------------------------------------------------------- 1 | use std::convert::TryInto; 2 | 3 | pub const COMPRESS_FOOTER_NONE: u8 = 0; 4 | pub const COMPRESS_FOOTER_LZ4: u8 = 1; 5 | pub const COMPRESS_FOOTER_ZSTD: u8 = 2; 6 | 7 | pub const COMPRESS_MAX_SIZE: usize = 67108864; 8 | 9 | #[derive(Clone, Copy)] 10 | pub enum Scheme { 11 | None, 12 | Lz4, 13 | Zstd { level: i32 }, 14 | } 15 | 16 | pub fn parse_scheme(s: &str) -> Result { 17 | if s == "none" { 18 | return Ok(Scheme::None); 19 | } 20 | if s == "lz4" { 21 | return Ok(Scheme::Lz4); 22 | } 23 | if s == "zstd" { 24 | return Ok(Scheme::Zstd { level: 3 }); 25 | } 26 | 27 | if s.starts_with("zstd:") { 28 | let spec_parts: Vec<&str> = s.split(':').collect(); 29 | if spec_parts.len() != 2 { 30 | anyhow::bail!("invalid zstd compression level, expected a number"); 31 | } 32 | match spec_parts[1].parse() { 33 | Ok(level) => { 34 | if !(1..=19).contains(&level) { 35 | anyhow::bail!("zstd compression level must be in the range 1-19"); 36 | } 37 | return Ok(Scheme::Zstd { level }); 38 | } 39 | Err(_) => anyhow::bail!("zstd compression level must be a number"), 40 | } 41 | } 42 | anyhow::bail!("invalid compression scheme, expected one of none, lz4, zstd[:$level]") 43 | } 44 | 45 | pub fn compress(scheme: Scheme, mut data: Vec) -> Vec { 46 | assert!(data.len() <= COMPRESS_MAX_SIZE); 47 | 48 | let compressed_data = match scheme { 49 | Scheme::None => { 50 | data.push(COMPRESS_FOOTER_NONE); 51 | return data; 52 | } 53 | Scheme::Lz4 => { 54 | let mut compressed_data = lz4::block::compress(&data, None, false).unwrap(); 55 | compressed_data.reserve(5); 56 | let sz = data.len() as u32; 57 | compressed_data.extend_from_slice(&u32::to_le_bytes(sz)[..]); 58 | compressed_data.push(COMPRESS_FOOTER_LZ4); 59 | compressed_data 60 | } 61 | Scheme::Zstd { level } => { 62 | let mut compressed_data: Vec = 63 | Vec::with_capacity(zstd_safe::compress_bound(data.len()) + 1); 64 | zstd_safe::compress(&mut compressed_data, &data, level).unwrap(); 65 | compressed_data.push(COMPRESS_FOOTER_ZSTD); 66 | compressed_data 67 | } 68 | }; 69 | 70 | if (compressed_data.len()) > data.len() { 71 | data.push(COMPRESS_FOOTER_NONE); 72 | return data; 73 | } 74 | 75 | compressed_data 76 | } 77 | 78 | pub fn decompress(mut data: Vec) -> Result, anyhow::Error> { 79 | match data.pop() { 80 | Some(COMPRESS_FOOTER_NONE) => Ok(data), 81 | Some(COMPRESS_FOOTER_LZ4) => { 82 | if data.len() < 4 { 83 | anyhow::bail!("data corrupt - compression footer missing decompressed size"); 84 | } 85 | let data_len = data.len(); 86 | let decompressed_sz = 87 | u32::from_le_bytes(data[data_len - 4..data_len].try_into().unwrap()) as usize; 88 | // This limit helps prevent bad actors from causing ooms, bupstash 89 | // naturally limits chunks and metadata to a max size that is well below this. 90 | if decompressed_sz > COMPRESS_MAX_SIZE { 91 | anyhow::bail!("data corrupt - decompressed size is larger than application limits"); 92 | } 93 | data.truncate(data.len() - 4); 94 | Ok(lz4::block::decompress(&data, Some(decompressed_sz as i32))?) 95 | } 96 | Some(COMPRESS_FOOTER_ZSTD) => { 97 | // Zstd should read it's internal frame header to get an accurate size. 98 | let max_decompressed_sz = zstd_safe::decompress_bound(&data) 99 | .unwrap() 100 | .try_into() 101 | .unwrap(); 102 | if max_decompressed_sz > COMPRESS_MAX_SIZE { 103 | anyhow::bail!("data corrupt - decompressed size is larger than application limits"); 104 | } 105 | let mut decompressed: Vec = Vec::with_capacity(max_decompressed_sz); 106 | match zstd_safe::decompress(&mut decompressed, &data) { 107 | Ok(_) => Ok(decompressed), 108 | Err(_) => anyhow::bail!("error during zstd decompression"), 109 | } 110 | } 111 | Some(_) => anyhow::bail!("unknown decompression footer, don't know how to decompress data (possibly compressed by a newer version of bupstash)"), 112 | None => anyhow::bail!("data missing compression footer"), 113 | } 114 | } 115 | 116 | pub fn unauthenticated_decompress(data: Vec) -> Result, anyhow::Error> { 117 | match data.last() { 118 | None => anyhow::bail!("data buffer too small, missing compression footer"), 119 | Some(f) if *f == COMPRESS_FOOTER_NONE => decompress(data), 120 | // Once we are confident in the security/memory safety of our decompression function, 121 | // we can shift to enabling compression of the unauthenticated data. 122 | Some(f) => anyhow::bail!( 123 | "decompression of unauthenticated data is currently disabled (encryption footer is {})", 124 | *f 125 | ), 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /src/external_chunk_storage.rs: -------------------------------------------------------------------------------- 1 | use super::abloom; 2 | use super::address::Address; 3 | use super::chunk_storage::Engine; 4 | use super::protocol::*; 5 | use super::repository; 6 | use super::xid; 7 | use std::os::unix::net::UnixStream; 8 | 9 | pub struct ExternalStorage { 10 | sock: UnixStream, 11 | } 12 | 13 | impl ExternalStorage { 14 | pub fn new(socket_path: &std::path::Path, path: &str) -> Result { 15 | let mut sock = UnixStream::connect(socket_path)?; 16 | write_packet( 17 | &mut sock, 18 | &Packet::StorageConnect(StorageConnect { 19 | protocol: "s-6".to_string(), 20 | path: path.to_string(), 21 | }), 22 | )?; 23 | 24 | Ok(ExternalStorage { sock }) 25 | } 26 | } 27 | 28 | impl Engine for ExternalStorage { 29 | fn pipelined_get_chunks( 30 | &mut self, 31 | addresses: &[Address], 32 | on_chunk: &mut dyn FnMut(&Address, &[u8]) -> Result<(), anyhow::Error>, 33 | ) -> Result<(), anyhow::Error> { 34 | // In the future it would probably be good (though more complicated) to perform the writing of the addresses, 35 | // and reading of the results concurrently, though it complicates both the plugin and bupstash. 36 | write_storage_pipelined_get_chunks(&mut self.sock, addresses)?; 37 | 38 | for address in addresses { 39 | match read_packet(&mut self.sock, DEFAULT_MAX_PACKET_SIZE)? { 40 | Packet::RStorageRequestChunkData(data) => on_chunk(address, &data)?, 41 | _ => anyhow::bail!("unexpected packet reponse, expected chunk"), 42 | } 43 | } 44 | 45 | Ok(()) 46 | } 47 | 48 | fn filter_existing_chunks( 49 | &mut self, 50 | on_progress: &mut dyn FnMut(u64) -> Result<(), anyhow::Error>, 51 | addresses: Vec
, 52 | ) -> Result, anyhow::Error> { 53 | write_storage_filter_existing(&mut self.sock, &addresses)?; 54 | std::mem::drop(addresses); 55 | loop { 56 | match read_packet(&mut self.sock, DEFAULT_MAX_PACKET_SIZE)? { 57 | Packet::StorageFilterExistingProgress(n) => on_progress(n.0)?, 58 | Packet::StorageAddresses(missing) => return Ok(missing), 59 | _ => anyhow::bail!( 60 | "expected StorageAddresses or StorageFilterAddresses progress packet" 61 | ), 62 | }; 63 | } 64 | } 65 | 66 | fn get_chunk(&mut self, address: &Address) -> Result, anyhow::Error> { 67 | write_packet(&mut self.sock, &Packet::TStorageRequestChunkData(*address))?; 68 | match read_packet(&mut self.sock, DEFAULT_MAX_PACKET_SIZE)? { 69 | Packet::RStorageRequestChunkData(data) => Ok(data), 70 | _ => anyhow::bail!("unexpected packet reponse, expected RRequestChunkData"), 71 | } 72 | } 73 | 74 | fn add_chunk(&mut self, address: &Address, data: Vec) -> Result<(), anyhow::Error> { 75 | write_chunk(&mut self.sock, address, &data)?; 76 | Ok(()) 77 | } 78 | 79 | fn flush(&mut self) -> Result { 80 | write_packet(&mut self.sock, &Packet::TStorageFlush)?; 81 | match read_packet(&mut self.sock, DEFAULT_MAX_PACKET_SIZE)? { 82 | Packet::RStorageFlush(stats) => Ok(stats), 83 | _ => anyhow::bail!("unexpected packet reponse, expected RStorageFlush"), 84 | } 85 | } 86 | 87 | fn prepare_for_sweep(&mut self, gc_id: xid::Xid) -> Result<(), anyhow::Error> { 88 | write_packet(&mut self.sock, &Packet::TStoragePrepareForSweep(gc_id))?; 89 | match read_packet(&mut self.sock, DEFAULT_MAX_PACKET_SIZE) { 90 | Ok(Packet::RStoragePrepareForSweep) => (), 91 | Ok(_) => anyhow::bail!("unexpected packet response, expected RStoragePrepareForSweep"), 92 | Err(err) => return Err(err), 93 | } 94 | Ok(()) 95 | } 96 | 97 | fn estimate_chunk_count(&mut self) -> Result { 98 | write_packet(&mut self.sock, &Packet::TStorageEstimateCount)?; 99 | match read_packet(&mut self.sock, DEFAULT_MAX_PACKET_SIZE) { 100 | Ok(Packet::RStorageEstimateCount(v)) => Ok(v.count.0), 101 | Ok(_) => anyhow::bail!("unexpected packet response, expected RStorageEstimateCount"), 102 | Err(err) => Err(err), 103 | } 104 | } 105 | 106 | fn sweep( 107 | &mut self, 108 | update_progress_msg: &mut dyn FnMut(String) -> Result<(), anyhow::Error>, 109 | reachable: abloom::ABloom, 110 | ) -> Result { 111 | write_begin_sweep(&mut self.sock, &reachable)?; 112 | std::mem::drop(reachable); 113 | loop { 114 | match read_packet(&mut self.sock, DEFAULT_MAX_PACKET_SIZE)? { 115 | Packet::StorageSweepProgress(msg) => { 116 | update_progress_msg(msg)?; 117 | } 118 | Packet::StorageSweepComplete(stats) => { 119 | let _ = write_packet(&mut self.sock, &Packet::EndOfTransmission); 120 | return Ok(stats); 121 | } 122 | _ => anyhow::bail!("unexpected packet response, expected StorageSweepProgress or StorageSweepComplete"), 123 | } 124 | } 125 | } 126 | 127 | fn sweep_completed(&mut self, gc_id: xid::Xid) -> Result { 128 | write_packet(&mut self.sock, &Packet::TStorageQuerySweepCompleted(gc_id))?; 129 | match read_packet(&mut self.sock, DEFAULT_MAX_PACKET_SIZE)? { 130 | Packet::RStorageQuerySweepCompleted(completed) => Ok(completed), 131 | _ => anyhow::bail!("unexpected packet response, expected RStorageSweepCompleted"), 132 | } 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /src/fmtutil.rs: -------------------------------------------------------------------------------- 1 | use super::hex; 2 | use super::index; 3 | use std::os::unix::ffi::OsStrExt; 4 | 5 | pub fn format_timestamp(ts: &chrono::DateTime, utc_timestamps: bool) -> String { 6 | let tsfmt = "%Y/%m/%d %T"; 7 | if utc_timestamps { 8 | ts.format(tsfmt).to_string() 9 | } else { 10 | chrono::DateTime::::from(*ts) 11 | .format(tsfmt) 12 | .to_string() 13 | } 14 | } 15 | 16 | pub fn format_size(n: u64) -> String { 17 | // Binary units, not SI units. 18 | const K: u64 = 1024; 19 | const M: u64 = 1024 * K; 20 | const G: u64 = 1024 * M; 21 | const T: u64 = 1024 * G; 22 | const P: u64 = 1024 * T; 23 | 24 | if n > P { 25 | format!("{}.{:0>2}PiB", n / P, (n % P) / (P / 100)) 26 | } else if n > T { 27 | format!("{}.{:0>2}TiB", n / T, (n % T) / (T / 100)) 28 | } else if n > G { 29 | format!("{}.{:0>2}GiB", n / G, (n % G) / (G / 100)) 30 | } else if n > M { 31 | format!("{}.{:0>2}MiB", n / M, (n % M) / (M / 100)) 32 | } else if n > K { 33 | format!("{}.{:0>2}KiB", n / K, (n % K) / (K / 100)) 34 | } else { 35 | format!("{}B", n) 36 | } 37 | } 38 | 39 | pub struct IndexHumanDisplayWidths { 40 | pub human_size_digits: usize, 41 | } 42 | 43 | pub fn estimate_index_human_display_widths( 44 | index: &index::CompressedIndex, 45 | ) -> Result { 46 | // If the index is large, just assume we have the full range of values. 47 | // The cost of formatting a huge index perfectly is too large. 48 | if index.compressed_size() > 512 * 1024 { 49 | Ok(IndexHumanDisplayWidths { 50 | human_size_digits: 11, // 'nnnn.nn UUU' 51 | }) 52 | } else { 53 | let mut human_size_digits = 0; 54 | for ent in index.iter() { 55 | let ent = ent?; 56 | human_size_digits = human_size_digits.max(format_size(ent.size.0).len()) 57 | } 58 | Ok(IndexHumanDisplayWidths { human_size_digits }) 59 | } 60 | } 61 | 62 | pub fn format_human_content_listing( 63 | ent: &index::IndexEntry, 64 | utc_timestamps: bool, 65 | widths: &IndexHumanDisplayWidths, 66 | ) -> String { 67 | let mut result = String::new(); 68 | std::fmt::write(&mut result, format_args!("{}", ent.display_mode())).unwrap(); 69 | let size = if ent.is_file() { 70 | format_size(ent.size.0) 71 | } else { 72 | "-".to_string() 73 | }; 74 | let size_padding: String = " ".repeat(widths.human_size_digits - size.len()); 75 | std::fmt::write(&mut result, format_args!(" {}{}", size, size_padding)).unwrap(); 76 | let ts = chrono::NaiveDateTime::from_timestamp_opt(ent.ctime.0 as i64, ent.ctime_nsec.0 as u32) 77 | .unwrap(); 78 | let ts = chrono::DateTime::::from_utc(ts, chrono::Utc); 79 | let ts = format_timestamp(&ts, utc_timestamps); 80 | std::fmt::write(&mut result, format_args!(" {}", ts)).unwrap(); 81 | std::fmt::write(&mut result, format_args!(" {}", ent.path.to_string_lossy())).unwrap(); 82 | result 83 | } 84 | 85 | pub fn format_jsonl1_content_listing(ent: &index::IndexEntry) -> Result { 86 | let mut result = String::with_capacity(512); 87 | std::fmt::write(&mut result, format_args!("{{"))?; 88 | std::fmt::write( 89 | &mut result, 90 | format_args!("\"mode\":{}", serde_json::to_string(&ent.mode.0)?), 91 | )?; 92 | std::fmt::write(&mut result, format_args!(",\"size\":{}", ent.size.0))?; 93 | 94 | match ent.path.to_str() { 95 | Some(path) => std::fmt::write( 96 | &mut result, 97 | format_args!(",\"path\":{}", serde_json::to_string(path)?), 98 | )?, 99 | None => { 100 | let path = ent.path.as_os_str().as_bytes(); 101 | std::fmt::write( 102 | &mut result, 103 | format_args!(",\"path\":{}", serde_json::to_string(path)?), 104 | )? 105 | } 106 | } 107 | std::fmt::write( 108 | &mut result, 109 | format_args!(",\"mtime\":{}", serde_json::to_string(&ent.mtime.0)?), 110 | )?; 111 | std::fmt::write( 112 | &mut result, 113 | format_args!( 114 | ",\"mtime_nsec\":{}", 115 | serde_json::to_string(&ent.mtime_nsec.0)? 116 | ), 117 | )?; 118 | std::fmt::write( 119 | &mut result, 120 | format_args!(",\"ctime\":{}", serde_json::to_string(&ent.ctime.0)?), 121 | )?; 122 | std::fmt::write( 123 | &mut result, 124 | format_args!( 125 | ",\"ctime_nsec\":{}", 126 | serde_json::to_string(&ent.ctime_nsec.0)? 127 | ), 128 | )?; 129 | std::fmt::write( 130 | &mut result, 131 | format_args!(",\"uid\":{}", serde_json::to_string(&ent.uid.0)?), 132 | )?; 133 | std::fmt::write( 134 | &mut result, 135 | format_args!(",\"gid\":{}", serde_json::to_string(&ent.gid.0)?), 136 | )?; 137 | std::fmt::write( 138 | &mut result, 139 | format_args!(",\"norm_dev\":{}", serde_json::to_string(&ent.norm_dev.0)?), 140 | )?; 141 | std::fmt::write( 142 | &mut result, 143 | format_args!(",\"nlink\":{}", serde_json::to_string(&ent.nlink.0)?), 144 | )?; 145 | std::fmt::write( 146 | &mut result, 147 | format_args!(",\"ino\":{}", serde_json::to_string(&ent.ino.0)?), 148 | )?; 149 | 150 | if ent.is_dev_node() { 151 | std::fmt::write( 152 | &mut result, 153 | format_args!( 154 | ",\"dev_major\":{}", 155 | serde_json::to_string(&ent.dev_major.0)?, 156 | ), 157 | )?; 158 | std::fmt::write( 159 | &mut result, 160 | format_args!( 161 | ",\"dev_minor\":{}", 162 | serde_json::to_string(&ent.dev_minor.0)?, 163 | ), 164 | )?; 165 | } else { 166 | result.push_str(",\"dev_major\":null,\"dev_minor\":null"); 167 | } 168 | 169 | if let Some(ref xattrs) = ent.xattrs { 170 | result.push_str(",\"xattrs\":{"); 171 | let mut first = true; 172 | for (k, v) in xattrs.iter() { 173 | let k = if let Ok(k) = std::str::from_utf8(k) { 174 | serde_json::to_string(k)? 175 | } else { 176 | serde_json::to_string(k)? 177 | }; 178 | 179 | let v = if let Ok(v) = std::str::from_utf8(v.as_slice()) { 180 | serde_json::to_string(v)? 181 | } else { 182 | serde_json::to_string(v)? 183 | }; 184 | 185 | if first { 186 | first = false; 187 | } else { 188 | result.push(','); 189 | } 190 | 191 | result.push_str(&k); 192 | result.push(':'); 193 | result.push_str(&v); 194 | } 195 | result.push('}'); 196 | } else { 197 | result.push_str(",\"xattrs\":null"); 198 | } 199 | 200 | std::fmt::write(&mut result, format_args!(",\"sparse\": {}", ent.sparse))?; 201 | 202 | if let Some(ref link_target) = ent.link_target { 203 | match link_target.to_str() { 204 | Some(path) => std::fmt::write( 205 | &mut result, 206 | format_args!(",\"link_target\":{}", serde_json::to_string(path)?), 207 | )?, 208 | None => { 209 | let link_target = link_target.as_os_str().as_bytes(); 210 | std::fmt::write( 211 | &mut result, 212 | format_args!(",\"link_target\":{}", serde_json::to_string(link_target)?), 213 | )? 214 | } 215 | } 216 | } else { 217 | result.push_str(",\"link_target\":null"); 218 | } 219 | 220 | match ent.data_hash { 221 | index::ContentCryptoHash::None => result.push_str(",\"data_hash\":null"), 222 | index::ContentCryptoHash::Blake3(h) => std::fmt::write( 223 | &mut result, 224 | format_args!( 225 | ",\"data_hash\":{}", 226 | serde_json::to_string(&format!("blake3:{}", hex::easy_encode_to_string(&h)))? 227 | ), 228 | )?, 229 | }; 230 | std::fmt::write(&mut result, format_args!("}}"))?; 231 | Ok(result) 232 | } 233 | -------------------------------------------------------------------------------- /src/fprefetch.rs: -------------------------------------------------------------------------------- 1 | // fprefetcher is a file opening queue used by the put command. 2 | // The idea is there is a queue of files you are interested 3 | // in reading in the near future and it lets the OS know the 4 | // intention via whatever readahead mechanism your OS provides. 5 | 6 | use std::collections::VecDeque; 7 | use std::fs::File; 8 | use std::path::PathBuf; 9 | 10 | cfg_if::cfg_if! { 11 | if #[cfg(target_os = "linux")] { 12 | use std::os::unix::fs::OpenOptionsExt; 13 | } 14 | } 15 | 16 | cfg_if::cfg_if! { 17 | if #[cfg(target_os = "macos")] { 18 | // Nothing is needed. 19 | } else if #[cfg(target_os = "openbsd")] { 20 | // Nothing is needed. 21 | } else { 22 | use std::os::unix::io::AsRawFd; 23 | const NUM_PREFETCHED_BYTES: libc::off_t = 128 * 1024 * 1024; 24 | } 25 | } 26 | 27 | const NUM_PREOPENED_FILES: usize = 1; 28 | 29 | #[derive(Default)] 30 | pub struct ReadaheadFileOpener { 31 | unopened: VecDeque, 32 | opened: VecDeque<(PathBuf, std::io::Result)>, 33 | } 34 | 35 | fn open_file_for_streaming(fpath: &std::path::Path) -> std::io::Result { 36 | cfg_if::cfg_if! { 37 | if #[cfg(target_os = "linux")] { 38 | // Try with O_NOATIME first; if it fails, e.g. because the user we 39 | // run as is not the file owner, retry without.. 40 | let f = std::fs::OpenOptions::new() 41 | .read(true) 42 | .custom_flags(libc::O_NOATIME) 43 | .open(fpath) 44 | .or_else(|error| { 45 | match error.kind() { 46 | std::io::ErrorKind::PermissionDenied => { 47 | std::fs::OpenOptions::new() 48 | .read(true) 49 | .open(fpath) 50 | } 51 | _ => Err(error) 52 | } 53 | })?; 54 | } else { 55 | let f = std::fs::OpenOptions::new() 56 | .read(true) 57 | .open(fpath)?; 58 | } 59 | } 60 | 61 | cfg_if::cfg_if! { 62 | if #[cfg(target_os = "macos")] { 63 | // XXX can we do anything here? 64 | // Perhaps F_RDADVISE ? 65 | } else if #[cfg(target_os = "openbsd")] { 66 | // XXX can we do anything here? 67 | } else { 68 | // We would like to use something like POSIX_FADV_NOREUSE to preserve 69 | // the user page cache... this is actually a NOOP on linux. 70 | // Instead we can at least boost performance by hinting our access pattern. 71 | match nix::fcntl::posix_fadvise( 72 | f.as_raw_fd(), 73 | 0, 74 | 0, 75 | nix::fcntl::PosixFadviseAdvice::POSIX_FADV_SEQUENTIAL, 76 | ) { 77 | Ok(_) => (), 78 | Err(err) => { 79 | return Err(std::io::Error::new( 80 | std::io::ErrorKind::Other, 81 | format!("fadvise POSIX_FADV_SEQUENTIAL failed: {}", err), 82 | )) 83 | } 84 | }; 85 | 86 | match nix::fcntl::posix_fadvise( 87 | f.as_raw_fd(), 88 | 0, 89 | NUM_PREFETCHED_BYTES, 90 | nix::fcntl::PosixFadviseAdvice::POSIX_FADV_WILLNEED, 91 | ) { 92 | Ok(_) => (), 93 | Err(err) => { 94 | return Err(std::io::Error::new( 95 | std::io::ErrorKind::Other, 96 | format!("fadvise POSIX_FADV_WILLNEED failed: {}", err), 97 | )) 98 | } 99 | }; 100 | } 101 | } 102 | 103 | Ok(f) 104 | } 105 | 106 | impl ReadaheadFileOpener { 107 | pub fn new() -> ReadaheadFileOpener { 108 | ReadaheadFileOpener { 109 | unopened: VecDeque::new(), 110 | opened: VecDeque::new(), 111 | } 112 | } 113 | 114 | pub fn add_to_queue(&mut self, p: PathBuf) { 115 | self.unopened.push_back(p); 116 | } 117 | 118 | pub fn next_file(&mut self) -> Option<(PathBuf, std::io::Result)> { 119 | while !self.unopened.is_empty() && self.opened.len() < NUM_PREOPENED_FILES + 1 { 120 | let p = self.unopened.pop_front().unwrap(); 121 | let r = open_file_for_streaming(&p); 122 | self.opened.push_back((p, r)) 123 | } 124 | self.opened.pop_front() 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /src/hex.rs: -------------------------------------------------------------------------------- 1 | #[derive(Debug, Eq, PartialEq, thiserror::Error)] 2 | pub enum HexError { 3 | #[error("invalid character in hex input")] 4 | InvalidCharacter, 5 | #[error("hex padding required")] 6 | PaddingRequired, 7 | } 8 | 9 | #[inline] 10 | fn from_hex_byte(b: u8) -> Result { 11 | match b { 12 | 48..=57 => Ok(b - 48), 13 | 65..=70 => Ok(b - 65 + 10), 14 | 97..=102 => Ok(b - 97 + 10), 15 | _ => Err(HexError::InvalidCharacter), 16 | } 17 | } 18 | 19 | pub fn decode(from: &[u8], to: &mut [u8]) -> Result<(), HexError> { 20 | if from.len() % 2 != 0 { 21 | return Err(HexError::PaddingRequired); 22 | } 23 | 24 | assert_eq!(from.len(), to.len() * 2); 25 | 26 | for i in 0..to.len() { 27 | let hi = from[2 * i]; 28 | let lo = from[2 * i + 1]; 29 | to[i] = from_hex_byte(hi)? << 4 | from_hex_byte(lo)?; 30 | } 31 | Ok(()) 32 | } 33 | 34 | pub fn decode_string(from: &str, to: &mut [u8]) -> Result<(), HexError> { 35 | decode(from.as_bytes(), to) 36 | } 37 | 38 | pub fn easy_decode_string(from: &str) -> Result, HexError> { 39 | let n = from.len() / 2; 40 | let mut v = vec![0; n]; 41 | match decode_string(from, &mut v) { 42 | Ok(()) => Ok(v), 43 | Err(e) => Err(e), 44 | } 45 | } 46 | 47 | #[inline] 48 | fn to_hex_bytes(b: u8) -> (u8, u8) { 49 | let tab = b"0123456789abcdef"; 50 | let hi = (b & 0xf0) >> 4; 51 | let lo = b & 0x0f; 52 | (tab[hi as usize], tab[lo as usize]) 53 | } 54 | 55 | #[inline] 56 | fn to_hex_chars(b: u8) -> (char, char) { 57 | let (hi, lo) = to_hex_bytes(b); 58 | (hi as char, lo as char) 59 | } 60 | 61 | // from.len() MUST be exactly half to.len() 62 | pub fn encode(from: &[u8], to: &mut [u8]) { 63 | assert!(to.len() == 2 * from.len()); 64 | 65 | for i in 0..from.len() { 66 | let (hi, lo) = to_hex_bytes(from[i]); 67 | to[2 * i] = hi; 68 | to[2 * i + 1] = lo; 69 | } 70 | } 71 | 72 | pub fn easy_encode_to_string(from: &[u8]) -> String { 73 | let mut s = String::with_capacity(2 * from.len()); 74 | for b in from { 75 | let (hi, lo) = to_hex_chars(*b); 76 | s.push(hi); 77 | s.push(lo); 78 | } 79 | s 80 | } 81 | 82 | #[cfg(test)] 83 | mod tests { 84 | use super::*; 85 | 86 | #[test] 87 | fn test_encode() { 88 | let buf: [u8; 8] = [18, 52, 86, 120, 154, 188, 222, 240]; 89 | let mut encoded: [u8; 16] = [0; 16]; 90 | encode(&buf, &mut encoded); 91 | assert_eq!(std::str::from_utf8(&encoded).unwrap(), "123456789abcdef0"); 92 | } 93 | 94 | #[test] 95 | fn test_easy_encode_to_string() { 96 | let buf: [u8; 8] = [18, 52, 86, 120, 154, 188, 222, 240]; 97 | assert_eq!(easy_encode_to_string(&buf), "123456789abcdef0"); 98 | } 99 | 100 | #[test] 101 | fn test_easy_decode_string() { 102 | let buf: [u8; 8] = [18, 52, 86, 120, 154, 188, 222, 240]; 103 | assert_eq!( 104 | easy_decode_string("123456789abcdef0").unwrap().as_slice(), 105 | &buf[..] 106 | ); 107 | assert_eq!( 108 | easy_decode_string("123456789ABCDEF0").unwrap().as_slice(), 109 | &buf[..] 110 | ); 111 | assert_eq!( 112 | easy_decode_string("1234!6789ABCDEF0").unwrap_err(), 113 | HexError::InvalidCharacter 114 | ); 115 | assert_eq!( 116 | easy_decode_string("23456789ABCDEF0").unwrap_err(), 117 | HexError::PaddingRequired 118 | ); 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /src/ioutil.rs: -------------------------------------------------------------------------------- 1 | use std::io::{self, BufRead, Read, Write}; 2 | 3 | pub struct PipeReader { 4 | receiver: crossbeam_channel::Receiver>, 5 | buffer: Vec, 6 | position: usize, 7 | } 8 | 9 | pub struct PipeWriter { 10 | sender: crossbeam_channel::Sender>, 11 | buffer: Vec, 12 | size: usize, 13 | } 14 | 15 | pub fn buffered_pipe(write_buf_sz: usize) -> (PipeReader, PipeWriter) { 16 | let (tx, rx) = crossbeam_channel::bounded(0); 17 | let write_buf_sz = write_buf_sz.max(1); 18 | ( 19 | PipeReader { 20 | receiver: rx, 21 | buffer: Vec::new(), 22 | position: 0, 23 | }, 24 | PipeWriter { 25 | sender: tx, 26 | buffer: Vec::with_capacity(write_buf_sz), 27 | size: write_buf_sz, 28 | }, 29 | ) 30 | } 31 | 32 | fn epipe() -> io::Error { 33 | io::Error::new(io::ErrorKind::BrokenPipe, "pipe closed") 34 | } 35 | 36 | impl BufRead for PipeReader { 37 | fn fill_buf(&mut self) -> io::Result<&[u8]> { 38 | if self.position >= self.buffer.len() { 39 | if let Ok(data) = self.receiver.recv() { 40 | debug_assert!(!data.is_empty()); 41 | self.buffer = data; 42 | self.position = 0; 43 | } 44 | } 45 | Ok(&self.buffer[self.position..]) 46 | } 47 | 48 | fn consume(&mut self, amt: usize) { 49 | debug_assert!(self.buffer.len() - self.position >= amt); 50 | self.position += amt 51 | } 52 | } 53 | 54 | impl Read for PipeReader { 55 | fn read(&mut self, buf: &mut [u8]) -> io::Result { 56 | let internal = self.fill_buf()?; 57 | let len = std::cmp::min(buf.len(), internal.len()); 58 | if len > 0 { 59 | buf[..len].copy_from_slice(&internal[..len]); 60 | self.consume(len); 61 | } 62 | Ok(len) 63 | } 64 | } 65 | 66 | impl Write for PipeWriter { 67 | fn write(&mut self, buf: &[u8]) -> io::Result { 68 | let bytes_written = if (buf.len() + self.buffer.len()) > self.buffer.capacity() { 69 | self.buffer.capacity() - self.buffer.len() 70 | } else { 71 | buf.len() 72 | }; 73 | self.buffer.extend_from_slice(&buf[..bytes_written]); 74 | if self.buffer.len() == self.buffer.capacity() { 75 | self.flush()?; 76 | } 77 | Ok(bytes_written) 78 | } 79 | 80 | fn flush(&mut self) -> io::Result<()> { 81 | if self.buffer.is_empty() { 82 | Ok(()) 83 | } else { 84 | let data = std::mem::replace(&mut self.buffer, Vec::with_capacity(self.size)); 85 | match self.sender.send(data) { 86 | Ok(_) => Ok(()), 87 | Err(_) => Err(epipe()), 88 | } 89 | } 90 | } 91 | } 92 | 93 | pub struct TeeReader { 94 | read: R, 95 | output: W, 96 | } 97 | 98 | impl TeeReader { 99 | pub fn new(read: R, output: W) -> Self { 100 | Self { read, output } 101 | } 102 | 103 | pub fn into_inner(self) -> (R, W) { 104 | (self.read, self.output) 105 | } 106 | } 107 | 108 | impl Read for TeeReader { 109 | fn read(&mut self, buf: &mut [u8]) -> std::io::Result { 110 | let n = self.read.read(buf)?; 111 | self.output.write_all(&buf[..n])?; 112 | Ok(n) 113 | } 114 | } 115 | 116 | pub fn all_zeros(buf: &[u8]) -> bool { 117 | // This processes a lot of data so we iterate 118 | // by 8 where we can and check the remainder byte wise. 119 | let (prefix, big, suffix) = unsafe { buf.align_to::() }; 120 | // Check the fastest part first so we can early exit. 121 | for v in big { 122 | if *v != 0 { 123 | return false; 124 | } 125 | } 126 | for v in prefix { 127 | if *v != 0 { 128 | return false; 129 | } 130 | } 131 | for v in suffix { 132 | if *v != 0 { 133 | return false; 134 | } 135 | } 136 | true 137 | } 138 | -------------------------------------------------------------------------------- /src/migrate.rs: -------------------------------------------------------------------------------- 1 | // This file contains code to perform repository migrations. 2 | // The code here is often deliberately duplicated and avoids dependencies 3 | // on other modules so that the upgrade migration code can avoid churn with 4 | // other changes. 5 | 6 | use super::fstx1; 7 | use super::fstx2; 8 | use super::oplog; 9 | use super::vfs; 10 | use super::xid; 11 | use std::collections::HashSet; 12 | use std::io::BufRead; 13 | 14 | pub fn repo_upgrade_to_5_to_6(repo_fs: &vfs::VFs) -> Result<(), anyhow::Error> { 15 | // This upgrade mainly just prevents clients from seeing index entries they 16 | // cannot decode... repositories of version 5 and 6 are compatible except 17 | // for an additional index entry type. 18 | // This upgrade simply increments the schema version. 19 | eprintln!("upgrading repository schema from version 5 to version 6..."); 20 | 21 | let mut lock_file = repo_fs.open("repo.lock", vfs::OpenFlags::RDWR)?; 22 | eprintln!("getting exclusive repository lock for upgrade..."); 23 | lock_file.lock(vfs::LockType::Exclusive)?; 24 | 25 | let mut fstx1 = fstx1::WriteTxn::begin_at(repo_fs)?; 26 | let schema_version = fstx1.read_string("meta/schema_version")?; 27 | if schema_version != "5" { 28 | anyhow::bail!( 29 | "unable to upgrade, expected schema version 5, got {}", 30 | schema_version 31 | ) 32 | } 33 | fstx1.add_write("meta/schema_version", "6".to_string().into_bytes())?; 34 | fstx1.commit()?; 35 | eprintln!("repository upgrade successful..."); 36 | drop(lock_file); 37 | Ok(()) 38 | } 39 | 40 | pub fn repo_upgrade_to_6_to_7(repo_fs: &vfs::VFs) -> Result<(), anyhow::Error> { 41 | // This upgrade adds sparse files and zstd compression. 42 | // This upgrade also adds the '.removed' suffix for removed items. 43 | eprintln!("upgrading repository schema from version 6 to version 7..."); 44 | 45 | let mut lock_file = repo_fs.open("repo.lock", vfs::OpenFlags::RDWR)?; 46 | eprintln!("getting exclusive repository lock for upgrade..."); 47 | lock_file.lock(vfs::LockType::Exclusive)?; 48 | 49 | let mut txn = fstx1::WriteTxn::begin_at(repo_fs)?; 50 | 51 | let mut active_items: HashSet = HashSet::new(); 52 | for item in txn.read_dir("items")? { 53 | let id = item.file_name; 54 | match xid::Xid::parse(&id) { 55 | Ok(id) => { 56 | active_items.insert(id); 57 | } 58 | Err(_) => anyhow::bail!("unable to parse item id at path items/{}", id), 59 | } 60 | } 61 | 62 | let log_file = txn.open("repo.oplog")?; 63 | 64 | let mut log_file = std::io::BufReader::new(log_file); 65 | 66 | while !log_file.fill_buf()?.is_empty() { 67 | let op = serde_bare::from_reader(&mut log_file)?; 68 | if let oplog::LogOp::AddItem((id, md)) = op { 69 | if !active_items.contains(&id) { 70 | let serialized_md = serde_bare::to_vec(&md)?; 71 | txn.add_write(&format!("items/{:x}.removed", id), serialized_md)?; 72 | } 73 | } 74 | } 75 | 76 | let schema_version = txn.read_string("meta/schema_version")?; 77 | if schema_version != "6" { 78 | anyhow::bail!( 79 | "unable to upgrade, expected schema version 6, got {}", 80 | schema_version 81 | ) 82 | } 83 | 84 | txn.add_write("meta/schema_version", "7".to_string().into_bytes())?; 85 | txn.commit()?; 86 | 87 | eprintln!("repository upgrade successful..."); 88 | std::mem::drop(lock_file); 89 | Ok(()) 90 | } 91 | 92 | pub fn repo_upgrade_to_7_to_8(repo_fs: &vfs::VFs) -> Result<(), anyhow::Error> { 93 | // This upgrade migrates from fstx1 to fstx2 (WAL mode). 94 | eprintln!("upgrading repository schema from version 7 to version 8..."); 95 | 96 | let mut lock_file = repo_fs.open("repo.lock", vfs::OpenFlags::RDWR)?; 97 | eprintln!("getting exclusive repository lock for upgrade..."); 98 | lock_file.lock(vfs::LockType::Exclusive)?; 99 | 100 | // Rollback any failed old style transactions, prepare for new style. 101 | let mut txn = fstx1::WriteTxn::begin_at(repo_fs)?; 102 | { 103 | txn.add_write(fstx2::SEQ_NUM_NAME, vec![0, 0, 0, 0, 0, 0, 0, 0])?; 104 | } 105 | txn.commit()?; 106 | 107 | // Do upgrade with new style transactions. 108 | let mut txn = fstx2::WriteTxn::begin_at(repo_fs)?; 109 | { 110 | for d in ["data", "wal"] { 111 | if !txn.file_exists(d)? { 112 | txn.add_mkdir(d)?; 113 | } 114 | } 115 | txn.add_write("meta/schema_version", "8".to_string().into_bytes())?; 116 | } 117 | txn.commit()?; 118 | 119 | eprintln!("repository upgrade successful..."); 120 | std::mem::drop(lock_file); 121 | Ok(()) 122 | } 123 | -------------------------------------------------------------------------------- /src/sodium.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_upper_case_globals)] 2 | #![allow(non_camel_case_types)] 3 | #![allow(non_snake_case)] 4 | #![allow(dead_code)] 5 | #![allow(deref_nullptr)] // see https://github.com/rust-lang/rust-bindgen/issues/1651 6 | #![allow(clippy::redundant_static_lifetimes)] 7 | include!("./sodium_bindings_gen.rs"); 8 | -------------------------------------------------------------------------------- /src/xglobset.rs: -------------------------------------------------------------------------------- 1 | // EXtended globset functionality 2 | // 3 | // Waiting for https://github.com/BurntSushi/ripgrep/pull/2061 to get merged 4 | 5 | use std::ops::Deref; 6 | use std::{fmt, hash}; 7 | 8 | // Escape metacharacters within the given string by surrounding them in 9 | // brackets. The resulting string will, when compiled into a `Glob`, 10 | // match the input string and nothing else. 11 | pub fn escape(s: &str) -> String { 12 | let mut escaped = String::with_capacity(s.len()); 13 | for c in s.chars() { 14 | match c { 15 | // note that ! does not need escaping because it is only special 16 | // inside brackets 17 | '?' | '*' | '[' | ']' => { 18 | escaped.push('['); 19 | escaped.push(c); 20 | escaped.push(']'); 21 | } 22 | c => { 23 | escaped.push(c); 24 | } 25 | } 26 | } 27 | escaped 28 | } 29 | 30 | // Newtype wrapper around [globset::GlobMatcher] that adds a few trait implementations we absolutely need 31 | #[derive(Clone, Debug)] 32 | pub struct GlobMatcher(globset::GlobMatcher); 33 | 34 | impl PartialEq for GlobMatcher { 35 | fn eq(&self, other: &GlobMatcher) -> bool { 36 | self.glob() == other.glob() 37 | } 38 | } 39 | 40 | impl Eq for GlobMatcher {} 41 | 42 | impl hash::Hash for GlobMatcher { 43 | fn hash(&self, state: &mut H) { 44 | self.glob().hash(state); 45 | } 46 | } 47 | 48 | impl fmt::Display for GlobMatcher { 49 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 50 | self.glob().fmt(f) 51 | } 52 | } 53 | 54 | /* Conversion traits between the wrapped type and back */ 55 | 56 | impl Deref for GlobMatcher { 57 | type Target = globset::GlobMatcher; 58 | 59 | fn deref(&self) -> &globset::GlobMatcher { 60 | &self.0 61 | } 62 | } 63 | 64 | impl From for globset::GlobMatcher { 65 | fn from(outer: GlobMatcher) -> Self { 66 | outer.0 67 | } 68 | } 69 | 70 | impl From for GlobMatcher { 71 | fn from(inner: globset::GlobMatcher) -> Self { 72 | Self(inner) 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/xid.rs: -------------------------------------------------------------------------------- 1 | use super::crypto; 2 | use super::hex; 3 | use serde::{Deserialize, Serialize}; 4 | use std::convert::TryInto; 5 | use std::fmt; 6 | 7 | pub const XID_SZ: usize = 16; 8 | 9 | #[derive(Serialize, Debug, Deserialize, Default, PartialEq, Eq, Hash, Clone, Copy)] 10 | pub struct Xid { 11 | pub bytes: [u8; XID_SZ], 12 | } 13 | 14 | // Convert a slice of xids to a slice of bytes without any copying. 15 | pub fn xids_to_bytes(xids: &[Xid]) -> &[u8] { 16 | assert!(std::mem::size_of::() == XID_SZ); 17 | let n_bytes = xids.len() * XID_SZ; 18 | unsafe { std::slice::from_raw_parts(xids.as_ptr() as *const u8, n_bytes) } 19 | } 20 | 21 | impl Xid { 22 | pub fn new() -> Self { 23 | let mut bytes = [0; XID_SZ]; 24 | crypto::randombytes(&mut bytes[..]); 25 | Xid { bytes } 26 | } 27 | 28 | pub fn parse(s: &str) -> Result { 29 | let mut bytes = [0; XID_SZ]; 30 | let s = s.as_bytes(); 31 | if s.len() != 32 { 32 | anyhow::bail!("invalid id, should be 32 characters long"); 33 | } 34 | if hex::decode(s, &mut bytes[..]).is_err() { 35 | anyhow::bail!("invalid id, should be a hex value"); 36 | } 37 | Ok(Xid { bytes }) 38 | } 39 | 40 | pub fn as_hex(&self) -> [u8; XID_SZ * 2] { 41 | let mut buf = [0; XID_SZ * 2]; 42 | hex::encode(&self.bytes[..], &mut buf[..]); 43 | buf 44 | } 45 | 46 | pub fn from_slice(s: &[u8]) -> Result { 47 | Ok(Xid { 48 | bytes: s.try_into()?, 49 | }) 50 | } 51 | } 52 | 53 | impl fmt::Display for Xid { 54 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 55 | let h = self.as_hex(); 56 | write!(f, "{}", std::str::from_utf8(&h[..]).unwrap()) 57 | } 58 | } 59 | 60 | impl fmt::LowerHex for Xid { 61 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 62 | let h = self.as_hex(); 63 | write!(f, "{}", std::str::from_utf8(&h[..]).unwrap()) 64 | } 65 | } 66 | 67 | impl rusqlite::types::FromSql for Xid { 68 | fn column_result(v: rusqlite::types::ValueRef) -> rusqlite::types::FromSqlResult { 69 | v.as_blob().map(|b| { 70 | let mut id = Xid::default(); 71 | id.bytes[..].clone_from_slice(b); 72 | id 73 | }) 74 | } 75 | } 76 | 77 | impl rusqlite::types::ToSql for Xid { 78 | fn to_sql(&self) -> rusqlite::Result { 79 | Ok(rusqlite::types::ToSqlOutput::from(&self.bytes[..])) 80 | } 81 | } 82 | 83 | #[cfg(test)] 84 | mod tests { 85 | use super::*; 86 | 87 | #[test] 88 | fn test_default() { 89 | let u = Xid::default(); 90 | assert_eq!( 91 | u.to_string(), 92 | "00000000000000000000000000000000".to_string() 93 | ); 94 | 95 | assert_eq!(u, Xid::parse("00000000000000000000000000000000").unwrap(),); 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /src/xtar.rs: -------------------------------------------------------------------------------- 1 | // EXtended tar functionality. 2 | 3 | use super::index; 4 | use std::convert::TryInto; 5 | use std::os::unix::ffi::OsStrExt; 6 | use std::path::PathBuf; 7 | 8 | fn format_pax_extended_record(key: &[u8], value: &[u8]) -> Vec { 9 | let mut record_len = 3 + key.len() + value.len(); 10 | let mut record_len_s = format!("{}", record_len); 11 | // Whoever designed the pax_ext extended header format was a bit crazy. 12 | // We just loop until we have fixpoint record length. 13 | loop { 14 | if record_len_s.len() + 3 + key.len() + value.len() == record_len { 15 | break; 16 | } 17 | record_len = record_len_s.len() + 3 + key.len() + value.len(); 18 | record_len_s = format!("{}", record_len); 19 | } 20 | 21 | let mut record = Vec::with_capacity(record_len); 22 | record.extend_from_slice(record_len_s.as_bytes()); 23 | record.extend_from_slice(b" "); 24 | record.extend_from_slice(key); 25 | record.extend_from_slice(b"="); 26 | record.extend_from_slice(value); 27 | record.extend_from_slice(b"\n"); 28 | debug_assert!(record.len() == record_len); 29 | record 30 | } 31 | 32 | pub fn index_entry_to_tarheader( 33 | ent: &index::IndexEntry, 34 | hard_link: Option<&PathBuf>, 35 | ) -> Result, anyhow::Error> { 36 | let mut pax_ext_records = Vec::new(); 37 | let mut ustar_hdr = tar::Header::new_ustar(); 38 | 39 | let tar_type = match &hard_link { 40 | Some(hard_link) => match ent.kind() { 41 | index::IndexEntryKind::Other => { 42 | anyhow::bail!( 43 | "index entry {} has an unknown type", 44 | ent.path.to_string_lossy() 45 | ) 46 | } 47 | index::IndexEntryKind::Directory => anyhow::bail!( 48 | "index entry {} is a directory, so can't have a hard link to {}", 49 | ent.path.to_string_lossy(), 50 | hard_link.to_string_lossy(), 51 | ), 52 | _ => tar::EntryType::Link, 53 | }, 54 | 55 | None => match ent.kind() { 56 | index::IndexEntryKind::Other => { 57 | anyhow::bail!( 58 | "index entry {} has an unknown type", 59 | ent.path.to_string_lossy() 60 | ) 61 | } 62 | index::IndexEntryKind::Regular => tar::EntryType::Regular, 63 | index::IndexEntryKind::Symlink => tar::EntryType::Symlink, 64 | index::IndexEntryKind::Char => tar::EntryType::Char, 65 | index::IndexEntryKind::Block => tar::EntryType::Block, 66 | index::IndexEntryKind::Directory => tar::EntryType::Directory, 67 | index::IndexEntryKind::Fifo => tar::EntryType::Fifo, 68 | }, 69 | }; 70 | 71 | ustar_hdr.set_entry_type(tar_type); 72 | ustar_hdr.set_mode(ent.mode.0 as u32); 73 | ustar_hdr.set_mtime(ent.mtime.0); 74 | ustar_hdr.set_uid(ent.uid.0); 75 | ustar_hdr.set_gid(ent.gid.0); 76 | ustar_hdr.set_size(if hard_link.is_none() { ent.size.0 } else { 0 }); 77 | ustar_hdr.set_device_major(ent.dev_major.0 as u32)?; 78 | ustar_hdr.set_device_minor(ent.dev_minor.0 as u32)?; 79 | 80 | match ustar_hdr.set_path(&ent.path) { 81 | Ok(()) => (), 82 | Err(e) => { 83 | /* 100 is more than ustar can handle as a path target */ 84 | if ent.path.as_os_str().len() > 100 { 85 | let path_bytes = ent.path.as_os_str().as_bytes(); 86 | let path_record = format_pax_extended_record(b"path", path_bytes); 87 | pax_ext_records.extend_from_slice(&path_record); 88 | } else { 89 | return Err(e.into()); 90 | } 91 | } 92 | }; 93 | 94 | if matches!(tar_type, tar::EntryType::Symlink | tar::EntryType::Link) { 95 | let target = if let Some(ref hard_link) = hard_link { 96 | hard_link 97 | } else { 98 | ent.link_target.as_ref().unwrap() 99 | }; 100 | 101 | match ustar_hdr.set_link_name(target) { 102 | Ok(()) => (), 103 | Err(err) => { 104 | /* 100 is more than ustar can handle as a link target */ 105 | if target.as_os_str().len() > 100 { 106 | let target_record = 107 | format_pax_extended_record(b"linkpath", target.as_os_str().as_bytes()); 108 | pax_ext_records.extend_from_slice(&target_record); 109 | } else { 110 | return Err(err.into()); 111 | } 112 | } 113 | } 114 | } 115 | 116 | ustar_hdr.set_cksum(); 117 | 118 | match &ent.xattrs { 119 | Some(xattrs) => { 120 | let mut key_bytes = Vec::with_capacity(24); 121 | for (k, v) in xattrs.iter() { 122 | key_bytes.truncate(0); 123 | key_bytes.extend_from_slice(b"SCHILY.xattr."); 124 | key_bytes.extend_from_slice(k); 125 | pax_ext_records.extend_from_slice(&format_pax_extended_record(&key_bytes, v)); 126 | } 127 | } 128 | None => (), 129 | } 130 | 131 | let mut hdr_bytes = Vec::new(); 132 | 133 | if !pax_ext_records.is_empty() { 134 | let mut pax_ext_hdr = tar::Header::new_ustar(); 135 | pax_ext_hdr.set_entry_type(tar::EntryType::XHeader); 136 | pax_ext_hdr.set_size(pax_ext_records.len().try_into().unwrap()); 137 | pax_ext_hdr.set_cksum(); 138 | hdr_bytes.extend_from_slice(&pax_ext_hdr.as_bytes()[..]); 139 | hdr_bytes.extend_from_slice(&pax_ext_records); 140 | let remaining = 512 - (hdr_bytes.len() % 512); 141 | if remaining < 512 { 142 | let buf = [0; 512]; 143 | hdr_bytes.extend_from_slice(&buf[..remaining]); 144 | } 145 | debug_assert!(hdr_bytes.len() % 512 == 0); 146 | } 147 | 148 | hdr_bytes.extend_from_slice(&ustar_hdr.as_bytes()[..]); 149 | 150 | Ok(hdr_bytes) 151 | } 152 | -------------------------------------------------------------------------------- /support/bindgen.sh: -------------------------------------------------------------------------------- 1 | set -eux 2 | 3 | bindgen ./csrc/sodium-bindings.h \ 4 | --whitelist-function "crypto_.*" \ 5 | --whitelist-type "crypto_.*" \ 6 | --whitelist-var "crypto_.*" \ 7 | --whitelist-function "sodium_.*" \ 8 | --whitelist-var "sodium_.*" \ 9 | --whitelist-function "randombytes_.*" \ 10 | > ./src/sodium_bindings_gen.rs -------------------------------------------------------------------------------- /support/builds.sr.ht/debian.yml: -------------------------------------------------------------------------------- 1 | image: debian/sid 2 | packages: 3 | - cargo 4 | - pkg-config 5 | - libsodium-dev 6 | - sqlite3 7 | - uuid-runtime 8 | - bats 9 | sources: 10 | - https://github.com/andrewchambers/bupstash 11 | tasks: 12 | - build: | 13 | cd bupstash 14 | cargo test 15 | cargo build --release 16 | export PATH=$(pwd)/target/release:$PATH 17 | bats ./cli-tests -------------------------------------------------------------------------------- /support/builds.sr.ht/freebsd.yml: -------------------------------------------------------------------------------- 1 | image: freebsd/latest 2 | packages: 3 | - rust 4 | - pkgconf 5 | - libsodium 6 | - sqlite3 7 | - python3 8 | - gtar 9 | - bats-core 10 | sources: 11 | - https://github.com/andrewchambers/bupstash 12 | tasks: 13 | - build: | 14 | cd bupstash 15 | export PKG_CONFIG=pkgconf 16 | cargo test 17 | cargo build --release 18 | export PATH=$(pwd)/target/release:$PATH 19 | bats ./cli-tests -------------------------------------------------------------------------------- /support/builds.sr.ht/openbsd.yml: -------------------------------------------------------------------------------- 1 | image: openbsd/latest 2 | packages: 3 | - rust 4 | - pkgconf 5 | - libsodium 6 | - sqlite3 7 | - python3 8 | - gtar 9 | - bats 10 | sources: 11 | - https://github.com/andrewchambers/bupstash 12 | tasks: 13 | - build: | 14 | cd bupstash 15 | export PKG_CONFIG=pkgconf 16 | cargo test 17 | cargo build --release 18 | export PATH=$(pwd)/target/release:$PATH 19 | bats ./cli-tests -------------------------------------------------------------------------------- /support/pgo-build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | 3 | set -eux 4 | 5 | cargo clean 6 | rm -rf ./pgo 7 | mkdir pgo 8 | mkdir pgo/data 9 | 10 | export BUPSTASH_REPOSITORY="$(pwd)/pgo/repo" 11 | export BUPSTASH_SEND_LOG="$(pwd)/pgo/bupstash.sendlog" 12 | export BUPSTASH_QUERY_CACHE="$(pwd)/pgo/bupstash.querycache" 13 | export BUPSTASH_KEY=$(pwd)/pgo/repo.key 14 | 15 | RUSTFLAGS="-Cprofile-generate=$(pwd)/pgo/data" \ 16 | cargo build --release 17 | 18 | ./target/release/bupstash init 19 | ./target/release/bupstash new-key -o ./pgo/repo.key 20 | ./target/release/bupstash put ./target 21 | id=$(./target/release/bupstash put ./target) 22 | ./target/release/bupstash list "id=*" > /dev/null 23 | ./target/release/bupstash get "id=$id" > /dev/null 24 | ./target/release/bupstash rm --allow-many "id=*" > /dev/null 25 | 26 | llvm-profdata merge -o ./pgo/merged.profdata ./pgo/data 27 | 28 | RUSTFLAGS="-Cprofile-use=$(pwd)/pgo/merged.profdata" \ 29 | cargo build --release 30 | -------------------------------------------------------------------------------- /support/plot-chunk-sizes.gnuplot: -------------------------------------------------------------------------------- 1 | n=100 #number of intervals 2 | max=10000000. #max value 3 | min=0. #min value 4 | width=(max-min)/n #interval width 5 | #function used to map a value to the intervals 6 | hist(x,width)=width*floor(x/width)+width/2.0 7 | set boxwidth width*0.9 8 | set style fill solid 0.5 # fill style 9 | 10 | #count and plot 11 | set term png 12 | set out "chunksizes.png" 13 | plot "chunksizes" u (hist($1,width)):(1.0) smooth freq w boxes lc rgb"green" notitle -------------------------------------------------------------------------------- /support/pre-commit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | exit_code=0 3 | 4 | cargo fmt --all -- --quiet --check 5 | if [ $? -ne 0 ]; then 6 | echo "Please run 'cargo fmt --all' before committing" 7 | exit_code=1 8 | fi 9 | 10 | cargo clippy -- -D warnings 2> /dev/null 11 | if [ $? -ne 0 ]; then 12 | echo "Please run 'cargo clippy' and fix all issues before committing" 13 | exit_code=1 14 | fi 15 | 16 | if [ $exit_code -ne 0 ]; then 17 | exit $exit_code 18 | fi 19 | -------------------------------------------------------------------------------- /support/print-doc-checklist.sh: -------------------------------------------------------------------------------- 1 | 2 | 3 | cat <= 0.8.2) 9 | mustache (>= 0.7.0) 10 | rdiscount (>= 1.5.8) 11 | 12 | PLATFORMS 13 | ruby 14 | 15 | DEPENDENCIES 16 | ronn 17 | 18 | BUNDLED WITH 19 | 2.1.4 20 | -------------------------------------------------------------------------------- /support/ronn/default.nix: -------------------------------------------------------------------------------- 1 | { stdenv, lib, bundlerEnv, bundlerUpdateScript, makeWrapper, groff, callPackage }: 2 | 3 | stdenv.mkDerivation rec { 4 | pname = "ronn"; 5 | version = env.gems.ronn.version; 6 | 7 | env = bundlerEnv { 8 | name = "ronn-gems"; 9 | gemdir = ./.; 10 | }; 11 | 12 | dontUnpack = true; 13 | 14 | nativeBuildInputs = [ makeWrapper ]; 15 | 16 | installPhase = '' 17 | mkdir -p $out/bin 18 | makeWrapper ${env}/bin/ronn $out/bin/ronn \ 19 | --set PATH ${groff}/bin 20 | ''; 21 | 22 | passthru.updateScript = bundlerUpdateScript "ronn"; 23 | 24 | passthru.tests.reproducible-html-manpage = callPackage ./test-reproducible-html.nix { }; 25 | 26 | meta = with lib; { 27 | description = "markdown-based tool for building manpages"; 28 | homepage = "https://rtomayko.github.io/ronn/"; 29 | license = licenses.mit; 30 | maintainers = with maintainers; [ zimbatm nicknovitski ]; 31 | platforms = env.ruby.meta.platforms; 32 | }; 33 | } 34 | -------------------------------------------------------------------------------- /support/ronn/gemset.nix: -------------------------------------------------------------------------------- 1 | { 2 | hpricot = { 3 | source = { 4 | remotes = ["https://rubygems.org"]; 5 | sha256 = "1jn8x9ch79gqmnzgyz78kppavjh5lqx0y0r6frykga2b86rz9s6z"; 6 | type = "gem"; 7 | }; 8 | version = "0.8.6"; 9 | }; 10 | mustache = { 11 | source = { 12 | remotes = ["https://rubygems.org"]; 13 | sha256 = "1v4pdvgvs8gw0zbh5sy3l308amlsjg8sdfrkml0g0m0wwj4x7naf"; 14 | type = "gem"; 15 | }; 16 | version = "1.0.3"; 17 | }; 18 | rdiscount = { 19 | source = { 20 | remotes = ["https://rubygems.org"]; 21 | sha256 = "1arvk3k06prxasq1djbj065ixar4zl171340g7wr1ww4gj9makx3"; 22 | type = "gem"; 23 | }; 24 | version = "2.2.0.1"; 25 | }; 26 | ronn = { 27 | source = { 28 | remotes = ["https://rubygems.org"]; 29 | sha256 = "07plsxxfx5bxdk72ii9za6km0ziqlq8jh3bicr4774dalga6zpw2"; 30 | type = "gem"; 31 | }; 32 | version = "0.7.3"; 33 | }; 34 | } 35 | -------------------------------------------------------------------------------- /support/ronn/test-reproducible-html.nix: -------------------------------------------------------------------------------- 1 | { runCommand 2 | , diffutils 3 | , ronn 4 | }: 5 | runCommand "ronn-test-reproducible-html" { } '' 6 | set -euo pipefail 7 | 8 | cat > aprog.1.ronn << EOF 9 | aprog 10 | ===== 11 | 12 | ## AUTHORS 13 | 14 | Vincent Haupert 15 | EOF 16 | 17 | # We have to repeat the manpage generation a few times to be confident 18 | # it is in fact reproducible. 19 | for i in {1..20}; do 20 | ${ronn}/bin/ronn --html --pipe aprog.1.ronn > aprog.1.html-1 21 | ${ronn}/bin/ronn --html --pipe aprog.1.ronn > aprog.1.html-2 22 | 23 | ${diffutils}/bin/diff -q aprog.1.html-1 aprog.1.html-2 \ 24 | || (printf 'The HTML manpage is not reproducible (round %d)' "$i" && exit 1) 25 | done 26 | 27 | echo 'The HTML manpage appears reproducible' 28 | 29 | mkdir $out 30 | '' 31 | -------------------------------------------------------------------------------- /support/shell.nix: -------------------------------------------------------------------------------- 1 | let 2 | pkgs = (import ) {}; 3 | in 4 | pkgs.stdenv.mkDerivation { 5 | name = "shell"; 6 | 7 | LIBCLANG_PATH="${pkgs.llvmPackages.libclang}/lib"; 8 | 9 | buildInputs = with pkgs; [ 10 | clang 11 | clang-tools 12 | linuxPackages.perf 13 | llvm 14 | entr 15 | minio 16 | minio-client 17 | pandoc 18 | bats 19 | openssl 20 | libsodium 21 | pkg-config 22 | sqlite 23 | rust-bindgen 24 | jq 25 | (pkgs.callPackage ./ronn {}) 26 | hyperfine 27 | ]; 28 | 29 | hardeningDisable = ["all"]; 30 | } 31 | -------------------------------------------------------------------------------- /support/src-release.sh: -------------------------------------------------------------------------------- 1 | set -eux 2 | 3 | version="$1" 4 | 5 | rm -rf ./release/ 6 | mkdir ./release 7 | mkdir release/src 8 | mkdir release/src/.cargo 9 | git archive $version | tar -C release/src -x -f - 10 | cd release/src 11 | 12 | cargo vendor > .cargo/config 13 | 14 | tar -cvf - . | gzip -9 > ../../bupstash-$1-src+deps.tar.gz 15 | gpg -a --sign --detach-sig --default-key ac@bupstash.io ../../bupstash-$1-src+deps.tar.gz 16 | 17 | cd .. 18 | mkdir man 19 | cd man 20 | cp ../src/doc/man/*.md ./ 21 | ronn -r *.md 22 | rm *.md 23 | 24 | tar -cvf - . | gzip -9 > ../../bupstash-$1-man.tar.gz 25 | gpg -a --sign --detach-sig --default-key ac@bupstash.io ../../bupstash-$1-man.tar.gz --------------------------------------------------------------------------------