├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── Cargo.lock
├── Cargo.toml
├── LICENSE
├── README.md
├── build.rs
├── cli-tests
    ├── cli-tests.bats
    ├── diod-thrash.sh
    ├── mk-random-dir.py
    ├── parallel-thrash.sh
    └── s3-parallel-thrash.sh
├── csrc
    ├── cksumvfs
    │   ├── cksumvfs.c
    │   ├── cksumvfs_sqlite_version_number.c
    │   ├── sqlite3.h
    │   └── upgrade.sh
    └── sodium-bindings.h
├── doc
    ├── cli
    │   ├── diff.txt
    │   ├── exec-with-locks.txt
    │   ├── gc.txt
    │   ├── get.txt
    │   ├── help.txt
    │   ├── init.txt
    │   ├── list-contents.txt
    │   ├── list.txt
    │   ├── new-key.txt
    │   ├── new-sub-key.txt
    │   ├── put.txt
    │   ├── recover-removed.txt
    │   ├── restore.txt
    │   ├── rm.txt
    │   ├── serve.txt
    │   ├── sync.txt
    │   └── version.txt
    ├── guides
    │   ├── Filesystem Backups.md
    │   ├── Getting Started.md
    │   ├── Network Filesystems.md
    │   ├── Password Protected Keys.md
    │   ├── Remote Access Controls.md
    │   └── Secure Offline Keys.md
    ├── man
    │   ├── bupstash-authors.7.md
    │   ├── bupstash-diff.1.md
    │   ├── bupstash-exec-with-locks.1.md
    │   ├── bupstash-gc.1.md
    │   ├── bupstash-get.1.md
    │   ├── bupstash-init.1.md
    │   ├── bupstash-keyfiles.7.md
    │   ├── bupstash-list-contents.1.md
    │   ├── bupstash-list.1.md
    │   ├── bupstash-new-key.1.md
    │   ├── bupstash-new-sub-key.1.md
    │   ├── bupstash-put.1.md
    │   ├── bupstash-query-language.7.md
    │   ├── bupstash-recover-removed.1.md
    │   ├── bupstash-repository.7.md
    │   ├── bupstash-restore.1.md
    │   ├── bupstash-rm.1.md
    │   ├── bupstash-serve.1.md
    │   ├── bupstash-sync.1.md
    │   └── bupstash.1.md
    ├── technical_overview.md
    └── upcoming_changelog.md
├── src
    ├── abloom.rs
    ├── acache.rs
    ├── address.rs
    ├── base64.rs
    ├── chunk_storage.rs
    ├── chunker.rs
    ├── cksumvfs.rs
    ├── client.rs
    ├── compression.rs
    ├── crypto.rs
    ├── dir_chunk_storage.rs
    ├── external_chunk_storage.rs
    ├── fmtutil.rs
    ├── fprefetch.rs
    ├── fstx1.rs
    ├── fstx2.rs
    ├── fsutil.rs
    ├── hex.rs
    ├── htree.rs
    ├── index.rs
    ├── indexer.rs
    ├── ioutil.rs
    ├── keys.rs
    ├── main.rs
    ├── migrate.rs
    ├── oplog.rs
    ├── pem.rs
    ├── protocol.rs
    ├── put.rs
    ├── query.rs
    ├── querycache.rs
    ├── repository.rs
    ├── rollsum.rs
    ├── sendlog.rs
    ├── server.rs
    ├── sodium.rs
    ├── sodium_bindings_gen.rs
    ├── vfs.rs
    ├── xglobset.rs
    ├── xid.rs
    └── xtar.rs
└── support
    ├── bindgen.sh
    ├── builds.sr.ht
        ├── debian.yml
        ├── freebsd.yml
        └── openbsd.yml
    ├── pgo-build.sh
    ├── plot-chunk-sizes.gnuplot
    ├── pre-commit.sh
    ├── print-doc-checklist.sh
    ├── ronn
        ├── Gemfile
        ├── Gemfile.lock
        ├── default.nix
        ├── gemset.nix
        └── test-reproducible-html.nix
    ├── shell.nix
    └── src-release.sh


/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | on: [push, pull_request]
  2 | 
  3 | name: CI
  4 | 
  5 | jobs:
  6 |   check:
  7 |     name: Check
  8 |     runs-on: ubuntu-latest
  9 |     steps:
 10 |       - name: "Install libsodium"
 11 |         run: sudo apt-get install -y libsodium-dev
 12 |       - uses: actions/checkout@v2
 13 |       - uses: actions/cache@v2
 14 |         with:
 15 |           path: |
 16 |             ~/.cargo/registry
 17 |             ~/.cargo/git
 18 |             target
 19 |           key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
 20 |       - uses: actions-rs/toolchain@v1
 21 |         with:
 22 |           profile: minimal
 23 |           toolchain: stable
 24 |           override: true
 25 |       - uses: actions-rs/cargo@v1
 26 |         with:
 27 |           command: check
 28 | 
 29 |   test:
 30 |     needs: check
 31 |     strategy:
 32 |       fail-fast: false
 33 |       matrix:
 34 |         IMAGE: [ubuntu-latest, macos-latest]
 35 |     name: Cargo Test Suite (${{ matrix.IMAGE }})
 36 |     runs-on: ${{ matrix.IMAGE }}
 37 |     steps:
 38 |       - name: "Install dependencies (Ubuntu)"
 39 |         run: sudo apt-get install -y libsodium-dev
 40 |         if: ${{ matrix.IMAGE == 'ubuntu-latest' }}
 41 |       - name: "Install dependencies (macOS)"
 42 |         run: brew install libsodium
 43 |         if: ${{ matrix.IMAGE == 'macos-latest' }}
 44 |       - uses: actions/checkout@v2
 45 |       # macOS's BSD tar implementations corrupts the cargo cache when used. There
 46 |       # is a workaround that installs gnu-tar, but since bupstash recommends using
 47 |       # the system tar implementation we just skip caching on macOS.
 48 |       # See: https://github.com/actions/cache/issues/403
 49 |       - uses: actions/cache@v2
 50 |         with:
 51 |           path: |
 52 |             ~/.cargo/registry
 53 |             ~/.cargo/git
 54 |             target
 55 |           key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
 56 |         if: ${{ matrix.IMAGE != 'macos-latest' }}
 57 |       - uses: actions-rs/toolchain@v1
 58 |         with:
 59 |           profile: minimal
 60 |           toolchain: stable
 61 |           override: true
 62 |       - uses: actions-rs/cargo@v1
 63 |         with:
 64 |           command: test
 65 | 
 66 |   test-cli:
 67 |     needs: check
 68 |     strategy:
 69 |       fail-fast: false
 70 |       matrix:
 71 |         IMAGE: [ubuntu-latest, macos-latest]
 72 |     name: CLI Test Suite (${{ matrix.IMAGE }})
 73 |     runs-on: ${{ matrix.IMAGE }}
 74 |     steps:
 75 |       - name: "Install dependencies (Ubuntu)"
 76 |         run: sudo apt-get install -y libsodium-dev bats bubblewrap
 77 |         if: ${{ matrix.IMAGE == 'ubuntu-latest' }}
 78 |       - name: "Install dependencies (macOS)"
 79 |         run: |
 80 |           brew uninstall --force bats
 81 |           brew install libsodium bats-core
 82 |         if: ${{ matrix.IMAGE == 'macos-latest' }}
 83 |       - uses: actions/checkout@v2
 84 |       # macOS's BSD tar implementations corrupts the cargo cache when used. There
 85 |       # is a workaround that installs gnu-tar, but since bupstash recommends using
 86 |       # the system tar implementation we just skip caching on macOS.
 87 |       # See: https://github.com/actions/cache/issues/403
 88 |       - uses: actions/cache@v2
 89 |         with:
 90 |           path: |
 91 |             ~/.cargo/registry
 92 |             ~/.cargo/git
 93 |             target
 94 |           key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
 95 |         if: ${{ matrix.IMAGE != 'macos-latest' }}
 96 |       - uses: actions-rs/toolchain@v1
 97 |         with:
 98 |           profile: minimal
 99 |           toolchain: stable
100 |           override: true
101 |       - uses: actions-rs/cargo@v1
102 |         with:
103 |           command: build
104 |           args: --release
105 |       - name: "Run tests"
106 |         run: PATH="$(pwd)/target/release:$PATH" bats ./cli-tests
107 |         # The tests here should be reasonably quick to finish. We override the
108 |         # default 6 hour timeout in case they aren't
109 |         timeout-minutes: 5
110 | 
111 |   fmt:
112 |     needs: check
113 |     name: Rustfmt
114 |     runs-on: ubuntu-latest
115 |     steps:
116 |       - name: "Install libsodium"
117 |         run: sudo apt-get install -y libsodium-dev
118 |       - uses: actions/checkout@v2
119 |       - uses: actions/cache@v2
120 |         with:
121 |           path: |
122 |             ~/.cargo/registry
123 |             ~/.cargo/git
124 |             target
125 |           key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
126 |       - uses: actions-rs/toolchain@v1
127 |         with:
128 |           profile: minimal
129 |           toolchain: stable
130 |           override: true
131 |       - run: rustup component add rustfmt
132 |       - uses: actions-rs/cargo@v1
133 |         with:
134 |           command: fmt
135 |           args: --all -- --check
136 | 
137 |   clippy:
138 |     needs: check
139 |     name: Clippy
140 |     runs-on: ubuntu-latest
141 |     steps:
142 |       - name: "Install libsodium"
143 |         run: sudo apt-get install -y libsodium-dev
144 |       - uses: actions/checkout@v2
145 |       - uses: actions/cache@v2
146 |         with:
147 |           path: |
148 |             ~/.cargo/registry
149 |             ~/.cargo/git
150 |             target
151 |           key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
152 |       - uses: actions-rs/toolchain@v1
153 |         with:
154 |           profile: minimal
155 |           toolchain: stable
156 |           override: true
157 |       - run: rustup component add clippy
158 |       - uses: actions-rs/cargo@v1
159 |         with:
160 |           command: clippy
161 |           args: -- -D warnings
162 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | **/*.rs.bk
3 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "bupstash"
 3 | version = "0.12.1"
 4 | authors = ["Andrew Chambers <ac@acha.ninja>"]
 5 | edition = "2018"
 6 | license = "MIT"
 7 | repository = "https://github.com/andrewchambers/bupstash"
 8 | readme = "README.md"
 9 | description = "Easy and efficient encrypted backups."
10 | 
11 | [profile.release]
12 | lto = true
13 | panic = 'abort'
14 | codegen-units = 1
15 | incremental = false
16 | 
17 | [features]
18 | simd-rollsum = []
19 | 
20 | [dependencies]
21 | 
22 | # More trusted dependencies
23 | crossbeam-utils = "0.8"
24 | crossbeam-channel = "0.5"
25 | blake3 = "1"
26 | itertools = "0.10"
27 | rusqlite = { version = "0.25", features = ["bundled"] }
28 | lz4 = "1.2"
29 | zstd-safe = { version = "6.0", features = ["std", "experimental"] }
30 | anyhow = "1"
31 | thiserror = "1.0"
32 | libc = "0.2"
33 | getopts = "0.2"
34 | codemap = "0.1"
35 | codemap-diagnostic = "0.1"
36 | serde = { version = "1.0", features = ["derive"] }
37 | serde_json = "1.0"
38 | serde_bare = "0.4"
39 | path-clean = "0.1.0"
40 | humantime = "2.0.1"
41 | atty = "0.2"
42 | once_cell = "1.4"
43 | tar = "0.4"
44 | regex = { version = "1", default-features = false, features = ["std"] }
45 | globset = "0.4.8"
46 | chrono = { version = "0.4", features = ["serde"]}
47 | cfg-if = "0.1"
48 | shlex = "0.1"
49 | nix = "0.23"
50 | indicatif = "0.16.2"
51 | rangemap = "0.1.11"
52 | xattr = "0.2"
53 | walkdir = "2"
54 | bitflags = "1"
55 | uriparse = "0.6"
56 | plmap = "0.3.0"
57 | num_cpus = "1"
58 | 
59 | [dev-dependencies]
60 | 
61 | rand = "0.8"
62 | tempfile = "3"
63 | 
64 | [build-dependencies]
65 | 
66 | cc = "1"
67 | pkg-config = "0.3"
68 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 andrewchambers
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Bupstash
  2 | 
  3 | [![Gitter](https://badges.gitter.im/bupstash/community.svg)](https://gitter.im/bupstash/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
  4 | 
  5 | Bupstash is a tool for encrypted backups - if you need secure backups, Bupstash is the tool for you.
  6 | 
  7 | 
  8 | Bupstash was designed to have:
  9 | 
 10 | - Efficient deduplication - Bupstash can store thousands of encrypted directory snapshots using a fraction of the space encrypted tarballs would require.
 11 | 
 12 | - Strong privacy - Data is encrypted client side and the repository never needs has access to the decryption keys.
 13 | 
 14 | - Offline decryption keys - Backups do not require the decryption key be anywhere near an at-risk server or computer.
 15 | 
 16 | - Key/value tagging with search - all while keeping the tags fully encrypted.
 17 | 
 18 | - Great performance on slow networks - Bupstash really strives to work well on high latency networks like cellular and connections to far-off lands.
 19 | 
 20 | - Secure remote access controls - Ransomware, angry spouses, and disgruntled business partners will be powerless to delete your remote backups.
 21 | 
 22 | - Efficient incremental backups - Bupstash knows what it backed up last time and skips that work.
 23 | 
 24 | - Fantastic performance with low ram usage - Bupstash won't bog down your production servers.
 25 | 
 26 | - Safety against malicious attacks - Bupstash is written in a memory safe language to dramatically reduce the attack surface over the network.
 27 | 
 28 | ## Stability and Backwards Compatibility
 29 | 
 30 | Bupstash is beta software, while all efforts are made to keep bupstash bug free, we currently recommend
 31 | using bupstash for making *REDUNDANT* backups where failure can be tolerated.
 32 | 
 33 | The repository format is approaching stability, and will not be changed
 34 | in a backwards incompatible way unless there is *very* strong justification. Future changes will most likely be backwards compatible, or come with a migration path if it is needed at all.
 35 | 
 36 | # Guides, documentation and support
 37 | 
 38 | - Visit the [project website](https://bupstash.io).
 39 | - Visit the [quickstart guide](https://bupstash.io/doc/guides/Getting%20Started.html) for an introductory tutorial.
 40 | - Visit the [filesystem backups guide](https://bupstash.io/doc/guides/Filesystem%20Backups.html) for examples of making backups.
 41 | - Visit the [man pages](https://bupstash.io/doc/man/bupstash.html) for more comprehensive documentation.
 42 | - Visit the [community chat](https://gitter.im/bupstash/community?utm_source=share-link&utm_medium=link&utm_campaign=share-link) or the [community forum](https://github.com/andrewchambers/bupstash/discussions) to ask questions.
 43 | - Read the introductory [blog post](https://acha.ninja/blog/introducing_bupstash/).
 44 | - Read the [technical overview](./doc/technical_overview.md) to understand how it works.
 45 | 
 46 | # Typical usage
 47 | 
 48 | Initialize a new Bupstash repository via ssh.
 49 | ```
 50 | $ export BUPSTASH_REPOSITORY=ssh://$SERVER/home/me/backups
 51 | $ # Ensure bupstash is on the $PATH of both machines.
 52 | $ bupstash init
 53 | ```
 54 | 
 55 | Create a new encryption key, and tell bupstash to use it.
 56 | ```
 57 | $ bupstash new-key -o backups.key
 58 | $ export BUPSTASH_KEY="$(pwd)/backups.key"
 59 | ```
 60 | 
 61 | Save a directory as a tarball snapshot.
 62 | ```
 63 | $ bupstash put hostname="$(hostname)" ./some-data
 64 | ebb66f3baa5d432e9f9a28934888a23d
 65 | ```
 66 | Save the output of a command, checking for errors.
 67 | ```
 68 | $ bupstash put --exec name=database.sql pgdump mydatabase
 69 | 14ebd2073b258b1f55c5bbc889c49db4
 70 | ```
 71 | 
 72 | List items matching a query.
 73 | ```
 74 | $ bupstash list name="backup.tar" and hostname="server-1"
 75 | id="bcb8684e6bf5cb453e77486decf61685" name="some-file.txt" hostname="server-1" timestamp="2020/07/27 11:26:16"
 76 | ```
 77 | 
 78 | List files in a backup.
 79 | ```
 80 | $ bupstash list-contents id=bcb86*
 81 | drwxr-xr-x 0B 2020/10/30 13:32:04 .
 82 | -rw-r--r-- 7B 2020/10/30 13:32:04 hello.txt
 83 | ```
 84 | 
 85 | Get an item matching a query.
 86 | ```
 87 | $ bupstash get id=bcb8684e6bf5cb453e77486decf61685
 88 | some data...
 89 | 
 90 | $ bupstash get id="ebb66*" | tar -C ./restore -xf -
 91 | ```
 92 | 
 93 | Fetch a single file from a backup.
 94 | ```
 95 | $ bupstash get --pick hello.txt id="bcb86*"
 96 | hello!
 97 | ```
 98 | 
 99 | Diff backups, with local directories or other backups.
100 | ```
101 | $ bupstash diff /home/ac :: id="a4b8f*"
102 | ...
103 | - -rw------- 14.50KiB    2021/08/01 02:36:19 .bash_history
104 | + -rw------- 13.66KiB    2021/08/01 11:51:23 .bash_history
105 | ```
106 | 
107 | Restore backups to a local directory.
108 | 
109 | ```
110 | $ mkdir restore-dir
111 | $ bupstash restore --into ./restore-dir id="a4b8f*"
112 | ```
113 | 
114 | Remove items matching a query.
115 | ```
116 | $ bupstash rm name=some-data.txt and older-than 30d
117 | ```
118 | 
119 | Run the garbage collector to reclaim disk space.
120 | ```
121 | $ bupstash gc
122 | ```
123 | 
124 | # Installation
125 | 
126 | ## From source
127 | 
128 | First ensure you have a recent rust+cargo, pkg-config and libsodium-dev (>= 1.0.14) package installed.
129 | 
130 | Next clone the repository and run cargo build.
131 | ```
132 | $ git clone https://github.com/andrewchambers/bupstash
133 | $ cd bupstash
134 | $ cargo build --release
135 | $ cp ./target/release/bupstash $INSTALL_DIR
136 | ```
137 | 
138 | ### Pkgconf
139 | 
140 | You can use pkgconf instead of pkg-config (this is required on freebsd) by setting
141 | the PKG_CONFIG environment variable.
142 | 
143 | ```
144 | $ export PKG_CONFIG=pkgconf
145 | ```
146 | 
147 | 
148 | ## Building man pages
149 | 
150 | The man pages are currently build using a markdown to man page renderer called [ronn](https://github.com/rtomayko/ronn).
151 | 
152 | ```
153 | $ cd doc/man
154 | $ ronn -r *.md
155 | ```
156 | 
157 | ## Generating release tarballs
158 | 
159 | ```
160 | $ sh support/src-release.sh $tag
161 | $ echo bupstash-*.tar.gz
162 | bupstash-$version-man.tar.gz
163 | bupstash-$version-src+deps.tar.gz
164 | ```
165 | 
166 | ## Test suites
167 | 
168 | Install bash automated test framework and run the following to run both the unit tests, and cli integration test suite.
169 | 
170 | ```
171 | $ cargo test
172 | $ cargo build --release
173 | $ export PATH=${CARGO_TARGET_DIR:-$PWD/target}/release:$PATH
174 | $ bats ./cli-tests
175 | ```
176 | 
177 | ## Precompiled releases
178 | 
179 | Head to the [releases page](https://github.com/andrewchambers/bupstash/releases) and download for 
180 | a build for your platform. Simply extract the archive and add the single bupstash binary to your PATH.
181 | 
182 | Currently we only precompile for linux (help wanted for more platforms).
183 | 
184 | 
185 | 
186 | 


--------------------------------------------------------------------------------
/build.rs:
--------------------------------------------------------------------------------
 1 | fn main() {
 2 |     pkg_config::probe_library("libsodium").unwrap();
 3 | 
 4 |     println!("cargo:rerun-if-changed=csrc/cksumvfs/sqlite3.h");
 5 |     cc::Build::new()
 6 |         .warnings(false) // Not our code/warnings to fix.
 7 |         .flag("-DSQLITE_CKSUMVFS_STATIC")
 8 |         .flag("-Icsrc/cksumvfs")
 9 |         .file("csrc/cksumvfs/cksumvfs.c")
10 |         .file("csrc/cksumvfs/cksumvfs_sqlite_version_number.c")
11 |         .compile("cksumvfs");
12 | }
13 | 


--------------------------------------------------------------------------------
/cli-tests/diod-thrash.sh:
--------------------------------------------------------------------------------
  1 | set -xu
  2 | 
  3 | export SCRATCH=/tmp/diod-thrash
  4 | export N_WORKERS=8
  5 | export DIOD_PORT=1888
  6 | 
  7 | cleanup () {
  8 |     for m in $(ls $SCRATCH/mnt/)
  9 |     do
 10 |       if mountpoint -q "$SCRATCH/mnt/$m"
 11 |       then
 12 |         sudo umount "$SCRATCH/mnt/$m"
 13 |       fi
 14 |     done
 15 |     killall bupstash
 16 |     killall diod
 17 |     trap - SIGTERM
 18 |     rm -rf "$SCRATCH"
 19 | }
 20 | 
 21 | cleanup
 22 | trap "cleanup" SIGINT SIGTERM EXIT
 23 | 
 24 | rm -rf "$SCRATCH"
 25 | mkdir -p "$SCRATCH/root"
 26 | export BUPSTASH_KEY="$SCRATCH/t.key"
 27 | export BUPSTASH_REPOSITORY="$SCRATCH/root/repo"
 28 | 
 29 | bupstash new-key -o "$BUPSTASH_KEY"
 30 | bupstash init -r "$SCRATCH/root/repo"
 31 | 
 32 | diod -l "127.0.0.1:$DIOD_PORT" -f -n -S -U $(whoami) -e "$SCRATCH/root" &
 33 | sleep 1
 34 | 
 35 | for i in $(seq $((N_WORKERS-1)))
 36 | do
 37 |   mountpoint="$SCRATCH/mnt/$i"
 38 |   mkdir -p "$SCRATCH/mnt/$i"
 39 |   if ! sudo diodmount \
 40 |       -n \
 41 |       -v \
 42 |       -o "port=$DIOD_PORT,uname=$(whoami)" \
 43 |       "127.0.0.1:$SCRATCH/root" \
 44 |       "$mountpoint"
 45 |   then
 46 |     exit 1
 47 |   fi
 48 | done
 49 | 
 50 | bupstash init -r "$SCRATCH/sync-source-repo"
 51 | 
 52 | rm -f "$SCRATCH/thrash.summary"
 53 | sqlite3 "$SCRATCH/thrash.summary" "create table thrash_results(name, count, unique(name));"
 54 | 
 55 | inc_result () {
 56 |   sqlite3 "$SCRATCH/thrash.summary" \
 57 |     "PRAGMA busy_timeout = 10000;
 58 |      begin immediate;
 59 |      insert into thrash_results(name, count) values('$1', 0)
 60 |      on conflict(name) do update set count=count+1 where name = '$1'; commit;" > /dev/null
 61 | }
 62 | 
 63 | thrash_worker () {
 64 | 
 65 |   export BUPSTASH_REPOSITORY="$1"
 66 | 
 67 |   for i in $(seq 15)
 68 |   do
 69 |     expected=$(uuidgen)
 70 |     id=$(bupstash put -q -e --no-send-log thrash_test=yes :: echo $expected)
 71 | 
 72 |     if test "$?" = 0
 73 |     then
 74 |       inc_result "put-ok"
 75 | 
 76 |       actual="$(bupstash get -q id=$id)"
 77 |       if test "$?" = 0
 78 |       then
 79 |         inc_result "get-ok"
 80 |         if test "$expected" != "$actual"
 81 |         then
 82 |           inc_result "get-corrupt"
 83 |         fi
 84 |       else
 85 |         inc_result "get-fail"
 86 |       fi
 87 | 
 88 |       bupstash rm -q id="$id" >&2
 89 |       if test "$?" = 0
 90 |       then
 91 |         inc_result "rm-ok"
 92 |       else
 93 |         inc_result "rm-fail"
 94 |       fi
 95 |     else
 96 |       inc_result "put-fail"
 97 |     fi
 98 | 
 99 |     expected=$(uuidgen)
100 |     id=$(bupstash put -r "$SCRATCH/sync-source-repo" -q -e --no-send-log thrash_test=yes :: echo $expected)
101 |     bupstash sync -r "$SCRATCH/sync-source-repo" --to "$BUPSTASH_REPOSITORY" -q id="$id" >&2
102 |     if test "$?" = 0
103 |     then
104 |       inc_result "sync-ok"
105 | 
106 |       actual="$(bupstash get -q id=$id)"
107 |       if test "$?" = 0
108 |       then
109 |         inc_result "sync-get-ok"
110 |         if test "$expected" != "$actual"
111 |         then
112 |           inc_result "sync-get-corrupt"
113 |         fi
114 |       else
115 |         inc_result "sync-get-fail"
116 |       fi
117 | 
118 |       bupstash rm -q id="$id" >&2
119 |       if test "$?" = 0
120 |       then
121 |         inc_result "rm-ok"
122 |       else
123 |         inc_result "rm-fail"
124 |       fi
125 |     else
126 |       inc_result "sync-fail"
127 |     fi
128 |     bupstash rm -q -r "$SCRATCH/sync-source-repo" id="$id" >&2
129 | 
130 |     bupstash recover-removed -q >&2
131 |     if test "$?" = 0
132 |     then
133 |       inc_result "recover-removed-ok"
134 |     else
135 |       inc_result "recover-removed-fail"
136 |     fi
137 | 
138 |     bupstash gc -q >&2
139 |     if test "$?" = 0
140 |     then
141 |       inc_result "gc-ok"
142 |     else
143 |       inc_result "gc-fail"
144 |     fi
145 |   done
146 | 
147 |   rm -f "$SCRATCH/want_chaos"
148 | }
149 | 
150 | bupstash_serve_chaos_worker () {
151 |   while test -f "$SCRATCH/want_chaos"
152 |   do
153 |     kill -9 $(ps -aux  | grep 'bupstash serve' | grep -v "grep" | awk '{print $2}' | shuf | head -n $(($RANDOM/$N_WORKERS)))
154 |     sleep 1
155 |   done
156 | }
157 | 
158 | # This loop is to control the max size of the repository.
159 | for i in $(seq 10)
160 | do
161 | 
162 |   bupstash rm --allow-many thrash_test=yes >&2
163 |   bupstash gc >&2
164 | 
165 |   background_workers=()
166 |   # At least enough workers so the scheduler hopefully
167 |   # interleaves them in interesting ways.
168 |   for j in $(seq $(($N_WORKERS-1)))
169 |   do
170 |     thrash_worker "$SCRATCH/mnt/$j/repo" &
171 |     background_workers+=($!)
172 |   done
173 |   # One worker not via diod.
174 |   thrash_worker "$SCRATCH/root/repo" &
175 |   background_workers+=($!)
176 | 
177 |   touch "$SCRATCH/want_chaos"
178 |   bupstash_serve_chaos_worker &
179 |   background_workers+=($!)
180 | 
181 |   wait ${background_workers[@]}
182 | 
183 |   for id in $(bupstash list -q --format=jsonl1 | jq -r .id)
184 |   do
185 |     bupstash get -q id=$id > /dev/null
186 |     if test "$?" != 0
187 |     then
188 |       inc_result "get-corrupt"
189 |     fi
190 |   done
191 | 
192 |   if sqlite3 "$SCRATCH/thrash.summary" 'select * from thrash_results;' | grep -q 'get\-corrupt'
193 |   then
194 |     echo "invariant check failed, 'get' should never return a corrupt result"
195 |     exit 1
196 |   fi
197 | 
198 |   if sqlite3 "$SCRATCH/thrash.summary" 'select * from thrash_results;' | grep -q 'sync\-get\-corrupt'
199 |   then
200 |     echo "invariant check failed, 'sync' should never return a corrupt result"
201 |     exit 1
202 |   fi
203 | 
204 | done
205 | 
206 | if test "$(sqlite3 "$SCRATCH"/thrash.summary "select count from thrash_results where name='put-ok';")" = ""
207 | then
208 |   echo "at least one 'put' operation must succeed for the test to pass."
209 |   exit 1
210 | fi
211 | 
212 | if test "$(sqlite3 "$SCRATCH"/thrash.summary "select count from thrash_results where name='sync-ok';")" = ""
213 | then
214 |   echo "at least one 'sync' operation must succeed for the test to pass."
215 |   exit 1
216 | fi
217 | 
218 | if test "$(sqlite3 "$SCRATCH"/thrash.summary "select count from thrash_results where name='gc-ok';")" = ""
219 | then
220 |   echo "at least one 'gc' operation must succeed for the test to pass."
221 |   exit 1
222 | fi
223 | 
224 | trap - EXIT
225 | 
226 | set +x
227 | echo "test results..."
228 | sqlite3 "$SCRATCH/thrash.summary" 'select * from thrash_results order by name;'
229 | echo "test passed"
230 | 
231 | 


--------------------------------------------------------------------------------
/cli-tests/mk-random-dir.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python3
 2 | 
 3 | import string
 4 | import random
 5 | import os
 6 | import os.path as path
 7 | import sys
 8 | 
 9 | MIN_NAME_LEN = 1
10 | MAX_NAME_LEN = 8
11 | MIN_CHILD_DIRS = 0
12 | MAX_CHILD_DIRS = 3
13 | MIN_CHILD_FILES = 0
14 | MAX_CHILD_FILES = 5
15 | MIN_DEPTH = 0
16 | MAX_DEPTH = 3
17 | FILE_SIZES = [0, 1, 2, 3, 1024, 512*1024, 2*1024*1024, 8*1024*1024]
18 | 
19 | def random_file_name():
20 |   name_len = random.randint(MIN_NAME_LEN, MAX_NAME_LEN)
21 |   return ''.join(random.choices(string.ascii_lowercase, k=name_len))
22 | 
23 | def fresh_dir_ent(dir_path):
24 |   while True:
25 |     p = path.join(dir_path, random_file_name())
26 |     if not path.exists(p):
27 |       return p
28 |   
29 | def random_dir(dir_path="./random_dir", depth=None):
30 | 
31 |   if depth is None:
32 |     depth = random.randint(MIN_DEPTH, MAX_DEPTH)
33 | 
34 |   os.mkdir(dir_path)
35 | 
36 |   num_files = random.randint(MIN_CHILD_FILES, MAX_CHILD_FILES)
37 |   num_dirs = random.randint(MIN_CHILD_DIRS, MAX_CHILD_DIRS)
38 | 
39 |   if depth != 0:
40 |     for i in range(num_dirs):
41 |       random_dir(dir_path=fresh_dir_ent(dir_path), depth=depth-1)
42 | 
43 |   for i in range(num_files):
44 |     with open(fresh_dir_ent(dir_path), "wb") as f:
45 |       fsize = random.choice(FILE_SIZES)
46 |       f.write(os.urandom(fsize))
47 | 
48 | if __name__ == '__main__':
49 |   random_dir(sys.argv[1])


--------------------------------------------------------------------------------
/cli-tests/parallel-thrash.sh:
--------------------------------------------------------------------------------
  1 | set -xu
  2 | 
  3 | if test "${SCRATCH:-}" = ""
  4 | then
  5 |   export SCRATCH=/tmp
  6 | else
  7 |   export SCRATCH
  8 | fi
  9 | 
 10 | export N_WORKERS=$(nproc)
 11 | 
 12 | trap "trap - SIGTERM ; kill -9 -- -$$" SIGINT SIGTERM EXIT
 13 | 
 14 | if test -n "${BUPSTASH_REPOSITORY_COMMAND:-}"
 15 | then
 16 |   export BUPSTASH_TO_REPOSITORY_COMMAND="${BUPSTASH_REPOSITORY_COMMAND}"
 17 | fi
 18 | if test -n "${BUPSTASH_REPOSITORY:-}"
 19 | then
 20 |   export BUPSTASH_TO_REPOSITORY="${BUPSTASH_REPOSITORY}"
 21 | fi
 22 | bupstash init -r "$SCRATCH/sync-source-repo"
 23 | 
 24 | rm -f "$SCRATCH/thrash.summary"
 25 | sqlite3 "$SCRATCH/thrash.summary" "create table thrash_results(name, count, unique(name));"
 26 | 
 27 | inc_result () {
 28 |   sqlite3 "$SCRATCH/thrash.summary" \
 29 |     "PRAGMA busy_timeout = 10000;
 30 |      begin immediate;
 31 |      insert into thrash_results(name, count) values('$1', 0)
 32 |      on conflict(name) do update set count=count+1 where name = '$1'; commit;" > /dev/null
 33 | }
 34 | 
 35 | thrash_worker () {
 36 |   for i in $(seq 15)
 37 |   do
 38 |     expected=$(uuidgen)
 39 |     id=$(bupstash put -q -e --no-send-log thrash_test=yes :: echo $expected)
 40 | 
 41 |     if test "$?" = 0
 42 |     then
 43 |       inc_result "put-ok"
 44 | 
 45 |       actual="$(bupstash get -q id=$id)"
 46 |       if test "$?" = 0
 47 |       then
 48 |         inc_result "get-ok"
 49 |         if test "$expected" != "$actual"
 50 |         then
 51 |           inc_result "get-corrupt"
 52 |         fi
 53 |       else
 54 |         inc_result "get-fail"
 55 |       fi
 56 | 
 57 |       bupstash rm -q id="$id" >&2
 58 |       if test "$?" = 0
 59 |       then
 60 |         inc_result "rm-ok"
 61 |       else
 62 |         inc_result "rm-fail"
 63 |       fi
 64 |     else
 65 |       inc_result "put-fail"
 66 |     fi
 67 | 
 68 |     expected=$(uuidgen)
 69 |     id=$(bupstash put -r "$SCRATCH/sync-source-repo" -q -e --no-send-log thrash_test=yes :: echo $expected)
 70 |     bupstash sync -r "$SCRATCH/sync-source-repo" -q id="$id" >&2
 71 |     if test "$?" = 0
 72 |     then
 73 |       inc_result "sync-ok"
 74 | 
 75 |       actual="$(bupstash get -q id=$id)"
 76 |       if test "$?" = 0
 77 |       then
 78 |         inc_result "sync-get-ok"
 79 |         if test "$expected" != "$actual"
 80 |         then
 81 |           inc_result "sync-get-corrupt"
 82 |         fi
 83 |       else
 84 |         inc_result "sync-get-fail"
 85 |       fi
 86 | 
 87 |       bupstash rm -q id="$id" >&2
 88 |       if test "$?" = 0
 89 |       then
 90 |         inc_result "rm-ok"
 91 |       else
 92 |         inc_result "rm-fail"
 93 |       fi
 94 |     else
 95 |       inc_result "sync-fail"
 96 |     fi
 97 |     bupstash rm -q -r "$SCRATCH/sync-source-repo" id="$id" >&2
 98 | 
 99 |     bupstash recover-removed -q >&2
100 |     if test "$?" = 0
101 |     then
102 |       inc_result "recover-removed-ok"
103 |     else
104 |       inc_result "recover-removed-fail"
105 |     fi
106 | 
107 |     bupstash gc -q >&2
108 |     if test "$?" = 0
109 |     then
110 |       inc_result "gc-ok"
111 |     else
112 |       inc_result "gc-fail"
113 |     fi
114 |   done
115 | 
116 |   rm -f "$SCRATCH/want_chaos"
117 | }
118 | 
119 | bupstash_serve_chaos_worker () {
120 |   while test -f "$SCRATCH/want_chaos"
121 |   do
122 |     kill -9 $(ps -aux  | grep 'bupstash serve' | grep -v "grep" | awk '{print $2}' | shuf | head -n $(($RANDOM/$N_WORKERS)))
123 |     sleep 1
124 |   done
125 | }
126 | 
127 | # This loop is to control the max size of the repository.
128 | for i in $(seq 10)
129 | do
130 | 
131 |   bupstash rm --allow-many thrash_test=yes >&2
132 |   bupstash gc >&2
133 | 
134 |   background_workers=()
135 |   # At least enough workers so the scheduler hopefully
136 |   # interleaves them in interesting ways.
137 |   for j in $(seq $N_WORKERS)
138 |   do
139 |     thrash_worker &
140 |     background_workers+=($!)
141 |   done
142 | 
143 |   touch "$SCRATCH/want_chaos"
144 |   bupstash_serve_chaos_worker &
145 |   background_workers+=($!)
146 | 
147 |   wait ${background_workers[@]}
148 | 
149 |   for id in $(bupstash list -q --format=jsonl1 | jq -r .id)
150 |   do
151 |     bupstash get -q id=$id > /dev/null
152 |     if test "$?" != 0
153 |     then
154 |       inc_result "get-corrupt"
155 |     fi
156 |   done
157 | 
158 |   if sqlite3 "$SCRATCH/thrash.summary" 'select * from thrash_results;' | grep -q 'get\-corrupt'
159 |   then
160 |     echo "invariant check failed, 'get' should never return a corrupt result"
161 |     exit 1
162 |   fi
163 | 
164 |   if sqlite3 "$SCRATCH/thrash.summary" 'select * from thrash_results;' | grep -q 'sync\-get\-corrupt'
165 |   then
166 |     echo "invariant check failed, 'sync' should never return a corrupt result"
167 |     exit 1
168 |   fi
169 | 
170 | done
171 | 
172 | if test "$(sqlite3 "$SCRATCH"/thrash.summary "select count from thrash_results where name='put-ok';")" = ""
173 | then
174 |   echo "at least one 'put' operation must succeed for the test to pass."
175 |   exit 1
176 | fi
177 | 
178 | if test "$(sqlite3 "$SCRATCH"/thrash.summary "select count from thrash_results where name='sync-ok';")" = ""
179 | then
180 |   echo "at least one 'sync' operation must succeed for the test to pass."
181 |   exit 1
182 | fi
183 | 
184 | if test "$(sqlite3 "$SCRATCH"/thrash.summary "select count from thrash_results where name='gc-ok';")" = ""
185 | then
186 |   echo "at least one 'gc' operation must succeed for the test to pass."
187 |   exit 1
188 | fi
189 | 
190 | trap - EXIT
191 | 
192 | set +x
193 | echo "test results..."
194 | sqlite3 "$SCRATCH/thrash.summary" 'select * from thrash_results order by name;'
195 | echo "test passed"
196 | 
197 | 


--------------------------------------------------------------------------------
/cli-tests/s3-parallel-thrash.sh:
--------------------------------------------------------------------------------
  1 | set -xu
  2 | 
  3 | if test "${SCRATCH:-}" = ""
  4 | then
  5 |   export SCRATCH=/tmp
  6 | else
  7 |   export SCRATCH
  8 | fi
  9 | 
 10 | export BUPSTASH_REPOSITORY="$SCRATCH/thrash_repo"
 11 | export BUPSTASH_KEY="$SCRATCH/thrash.key"
 12 | export BUPSTASH_QUERY_CACHE="$SCRATCH/thrash.qcache"
 13 | export MINIO_ACCESS_KEY="thrash_access"
 14 | export MINIO_SECRET_KEY="thrash_secret"
 15 | export N_WORKERS=$(nproc)
 16 | 
 17 | trap "trap - SIGTERM ; kill -9 -- -$$" SIGINT SIGTERM EXIT
 18 | 
 19 | rm -rf "$BUPSTASH_REPOSITORY"
 20 | rm -f "$BUPSTASH_KEY"
 21 | 
 22 | bupstash new-key -o "$BUPSTASH_KEY"
 23 | bupstash init --storage \
 24 | "{\"ExternalStore\":{\"path\":\"s3://thrash_access:thrash_secret@thrashbucket?secure=false&endpoint=localhost%3A9000\",\"socket_path\":\"$SCRATCH/bupstash-s3-storage.sock\"}}"
 25 | rm -f "$SCRATCH/thrash.summary"
 26 | sqlite3 "$SCRATCH/thrash.summary" "create table thrash_results(name, count, unique(name));"
 27 | 
 28 | inc_result () {
 29 |   sqlite3 "$SCRATCH/thrash.summary" \
 30 |     "PRAGMA busy_timeout = 10000;
 31 |      begin immediate;
 32 |      insert into thrash_results(name, count) values('$1', 0)
 33 |      on conflict(name) do update set count=count+1 where name = '$1'; commit;" > /dev/null
 34 | }
 35 | 
 36 | thrash_worker () {
 37 |   for i in $(seq 50)
 38 |   do
 39 |     expected=$(uuidgen)
 40 |     
 41 |     id=$(bupstash put -q -e --no-send-log thrash_test=yes :: echo $expected)
 42 |     
 43 |     if test "$?" = 0
 44 |     then
 45 |       inc_result "put-ok"
 46 | 
 47 |       actual="$(bupstash get -q id=$id)"
 48 |       if test "$?" = 0
 49 |       then
 50 |         inc_result "get-ok"
 51 |         if test "$expected" != "$actual"
 52 |         then
 53 |           inc_result "get-corrupt"
 54 |         fi
 55 |       else
 56 |         inc_result "get-fail"
 57 |       fi
 58 | 
 59 |       bupstash rm -q id="$id" >&2
 60 |       if test "$?" = 0
 61 |       then
 62 |         inc_result "rm-ok"
 63 |       else
 64 |         inc_result "rm-fail"
 65 |       fi
 66 |     else
 67 |       inc_result "put-fail"
 68 |     fi
 69 | 
 70 |     bupstash recover-removed -q >&2
 71 |     if test "$?" = 0
 72 |     then
 73 |       inc_result "recover-removed-ok"
 74 |     else
 75 |       inc_result "recover-removed-fail"
 76 |     fi
 77 | 
 78 |     bupstash gc -q >&2
 79 |     if test "$?" = 0
 80 |     then
 81 |       inc_result "gc-ok"
 82 |     else
 83 |       inc_result "gc-fail"
 84 |     fi
 85 |   done
 86 | 
 87 |   rm -f "$SCRATCH/want_chaos"
 88 | }
 89 | 
 90 | bupstash_serve_chaos_worker () {
 91 |   while test -f "$SCRATCH/want_chaos"
 92 |   do
 93 |     kill -9 $(ps -aux  | grep 'bupstash serve' | grep -v "grep" | awk '{print $2}' | shuf | head -n $(($RANDOM/$N_WORKERS)))
 94 |     sleep 1
 95 |   done
 96 | }
 97 | 
 98 | bupstash_s3_plugin_chaos_worker () {
 99 |   while test -f "$SCRATCH/want_chaos"
100 |   do
101 |     killall -s SIGKILL 'bupstash-s3-storage'
102 |     sleep 1
103 |   done
104 | }
105 | 
106 | s3_plugin_supervisor () {
107 |   cd "$SCRATCH"
108 |   while true
109 |   do
110 |     rm -f "./bupstash-s3-storage.sock"
111 |     bupstash-s3-storage -quiescent-period 10ms >&2
112 |   done
113 | }
114 | 
115 | minio server "$SCRATCH/miniodata" >&2 &
116 | minio_pid="$!"
117 | s3_plugin_supervisor &
118 | s3_plugin_supervisor_pid="$!"
119 | # give both some time to start.
120 | sleep 1
121 | 
122 | # Configure the test minio instance.
123 | rm -rf "$SCRATCH/mc"
124 | mc config host add thrashminio http://127.0.0.1:9000 thrash_access thrash_secret >&2 
125 | 
126 | # Outer loop is to control the size of the gc set.
127 | for i in $(seq 50)
128 | do
129 | 
130 |   bupstash rm --allow-many thrash_test=yes >&2
131 |   bupstash gc >&2
132 | 
133 |   background_workers=()
134 |   # At least enough workers so the scheduler hopefully
135 |   # interleaves them in interesting ways.
136 |   for j in $(seq $N_WORKERS)
137 |   do
138 |     thrash_worker &
139 |     background_workers+=($!)
140 |   done
141 | 
142 |   touch "$SCRATCH/want_chaos"
143 |   bupstash_serve_chaos_worker &
144 |   background_workers+=($!)
145 |   bupstash_s3_plugin_chaos_worker &
146 |   background_workers+=($!)
147 | 
148 |   wait ${background_workers[@]}
149 | 
150 |   for id in $(bupstash list -q --format=jsonl | jq -r .id)
151 |   do
152 |     bupstash get -q id=$id > /dev/null
153 |     if test "$?" != 0
154 |     then
155 |       inc_result "get-corrupt"
156 |     fi
157 |   done
158 | 
159 |   if sqlite3 "$SCRATCH/thrash.summary" 'select * from thrash_results;' | grep -q 'get\-corrupt'
160 |   then
161 |     echo "invariant check failed, 'get' should never return a corrupt result"
162 |     exit 1
163 |   fi
164 | 
165 | done
166 | 
167 | if test $(sqlite3 "$SCRATCH"/thrash.summary "select count from thrash_results where name='put-ok';") = ""
168 | then
169 |   echo "at least one 'put' operation must succeed for the test to pass."
170 |   exit 1
171 | fi
172 | 
173 | if test $(sqlite3 "$SCRATCH"/thrash.summary "select count from thrash_results where name='gc-ok';") = ""
174 | then
175 |   echo "at least one 'gc' operation must succeed for the test to pass."
176 |   exit 1
177 | fi
178 | 
179 | # Cleanup any remains
180 | kill $s3_plugin_supervisor_pid
181 | kill $minio_pid
182 | 
183 | # XXX hacky, but cleanup any bupstash-s3-storage instances that might have been restarted by the supervisor.
184 | sleep 0.5
185 | killall bupstash-s3-storage
186 | wait
187 | 
188 | trap - EXIT
189 | 
190 | set +x
191 | 
192 | 
193 | echo "test results..."
194 | sqlite3 "$SCRATCH/thrash.summary" 'select * from thrash_results order by name;'
195 | echo "test passed"
196 | 
197 | 


--------------------------------------------------------------------------------
/csrc/cksumvfs/cksumvfs_sqlite_version_number.c:
--------------------------------------------------------------------------------
1 | #include "./sqlite3.h"
2 | 
3 | int cksumvfs_sqlite_version_number(void) {
4 |   return SQLITE_VERSION_NUMBER;
5 | }


--------------------------------------------------------------------------------
/csrc/cksumvfs/upgrade.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -ex
 3 | 
 4 | SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
 5 | CUR_DIR=$(pwd -P)
 6 | echo "$SCRIPT_DIR"
 7 | cd "$SCRIPT_DIR" || { echo "fatal error" >&2; exit 1; }
 8 | 
 9 | SQLITE_VERSION=3350400
10 | YEAR=2021
11 | 
12 | # Download and extract amalgamation
13 | SQLITE=sqlite-amalgamation-$SQLITE_VERSION
14 | curl -O https://sqlite.org/$YEAR/$SQLITE.zip
15 | unzip -p "$SQLITE.zip" "$SQLITE/sqlite3.h" > "$SCRIPT_DIR/sqlite3.h"
16 | rm -f "$SQLITE.zip"
17 | 
18 | # Download and extract extra extensions
19 | SQLITE=sqlite-src-$SQLITE_VERSION
20 | curl -O https://sqlite.org/$YEAR/$SQLITE.zip
21 | unzip -p "$SQLITE.zip" "$SQLITE/ext/misc/cksumvfs.c" > "$SCRIPT_DIR/cksumvfs.c"
22 | rm -f "$SQLITE.zip"
23 | 


--------------------------------------------------------------------------------
/csrc/sodium-bindings.h:
--------------------------------------------------------------------------------
1 | #include <sodium.h>


--------------------------------------------------------------------------------
/doc/cli/diff.txt:
--------------------------------------------------------------------------------
1 | bupstash diff [OPTIONS] QUERY1 :: QUERY2
2 | 
3 | Diff two snapshots.
4 | 
5 | See the bupstash manual for a detailed description of diff semantics
6 | 
7 | Examples:
8 |   $ bupstash diff id="8f7*" :: id="def*"
9 |   $ bupstash diff --relaxed id="57de*" :: ./files


--------------------------------------------------------------------------------
/doc/cli/exec-with-locks.txt:
--------------------------------------------------------------------------------
1 | bupstash exec-with-locks -r REPO COMMAND...
2 | 
3 | Execute COMMAND with exclusive locks held on the bupstash repository,
4 | preventing concurrent modification to the repository for the duration of the command.
5 | 
6 | Examples:
7 |   $ bupstash exec-with-locks -r ./repo rsync ...


--------------------------------------------------------------------------------
/doc/cli/gc.txt:
--------------------------------------------------------------------------------
 1 | bupstash gc [OPTIONS]
 2 | 
 3 | Run the garbage collector against a repository, removing
 4 | unreferenced data and freeing disk space.
 5 | 
 6 | Concurrent operations may be delayed while garbage collection
 7 | is in progress.
 8 | 
 9 | Examples:
10 |   $ bupstash gc
11 |   $ bupstash gc -r ssh://$server/repository


--------------------------------------------------------------------------------
/doc/cli/get.txt:
--------------------------------------------------------------------------------
 1 | bupstash get [OPTIONS] QUERY
 2 | 
 3 | Get data from a bupstash repository matching a given query.
 4 | 
 5 | See the bupstash user manual for a description of the query language.
 6 | 
 7 | Examples:
 8 |   $ bupstash get id=8f701cc8c03e1fe23598e95e7b87cb1c > out.tar
 9 |   $ bupstash get id=1b89* > out.data
10 |   $ bupstash get name=foo.tar | tar -xvf -
11 |   $ bupstash get --pick dir/my-file.txt id=$id
12 |   $ bupstash get --pick sub-dir id=$id | tar -xvf -


--------------------------------------------------------------------------------
/doc/cli/help.txt:
--------------------------------------------------------------------------------
 1 | bupstash
 2 | 
 3 | Make efficient encrypted backups easily by running
 4 | one of the subcommands below.
 5 | 
 6 | Subcommands:
 7 | 
 8 |   init              Initialize a bupstash repository.
 9 |   new-key           Create a new key capable of all operations.
10 |   new-sub-key       Derive a sub key for a subset of operations.
11 |   put               Put a new item into a repository.
12 |   list              List items in a repository.
13 |   list-contents     List contents of a directory snapshot.
14 |   get               Get data from a repository.
15 |   restore           Restore a snapshot to a local directory.
16 |   rm/remove         Remove items from a repository.
17 |   recover-removed   Recover items pending garbage collection.
18 |   gc                Delete unreferenced data and free space.
19 |   sync              Sync items between repositories.
20 |   exec-with-locks   Exec a command with a locked repository.
21 |   version           Print the version and exit.
22 |   help              Print this message.
23 | 
24 | 
25 | For subcommand specific help, run 'bupstash CMD --help'.
26 | 
27 | For comprehensive documentation check the man pages.


--------------------------------------------------------------------------------
/doc/cli/init.txt:
--------------------------------------------------------------------------------
1 | bupstash init [OPTIONS]
2 | 
3 | Initialize a bupstash repository.
4 | 
5 | Examples:
6 |   $ export BUPSTASH_REPOSITORY=./my-repository
7 |   $ bupstash init


--------------------------------------------------------------------------------
/doc/cli/list-contents.txt:
--------------------------------------------------------------------------------
 1 | bupstash list-contents [OPTIONS] QUERY
 2 | 
 3 | List contents of a bupstash directory snapshot.
 4 | 
 5 | See the bupstash user manual for a description of the query language.
 6 | 
 7 | See the bupstash manual for a description of the listing format.
 8 | 
 9 | Examples:
10 |   $ bupstash list-contents id=8f701cc8c03e1fe23598e95e7b87cb1c


--------------------------------------------------------------------------------
/doc/cli/list.txt:
--------------------------------------------------------------------------------
 1 | bupstash list [OPTIONS] [QUERY]
 2 | 
 3 | List bupstash items that match a given query.
 4 | 
 5 | See the bupstash user manual for a description of the query
 6 | language and output formats.
 7 | 
 8 | Examples:
 9 |   $ bupstash list
10 |   $ bupstash list id="1b89*"
11 |   $ bupstash list --format=jsonl1 name="*.tar" or name="*.sql"


--------------------------------------------------------------------------------
/doc/cli/new-key.txt:
--------------------------------------------------------------------------------
1 | bupstash new-key [OPTIONS]
2 | 
3 | Create a new key, readable by only the creating user.
4 | 
5 | Keep this key private and secure as it is needed to read any
6 | data put in a repository using this key.
7 | 
8 | Examples:
9 |   $ bupstash new-key -o ./backups.key


--------------------------------------------------------------------------------
/doc/cli/new-sub-key.txt:
--------------------------------------------------------------------------------
 1 | bupstash new-sub-key [OPTIONS]
 2 | 
 3 | Generate a bupstash sub key with lesser encryption and decryption capabilities.
 4 | 
 5 | *NOTE*: decryption differs from access - An attacker may still
 6 | delete data by simply deleting the items or files they have access
 7 | to. Use `bupstash serve` access controls to restrict what operations
 8 | a user can perform and prevent unauthorized deletion of data.
 9 | 
10 | Examples:
11 |   $ bupstash new-key -o ./backups.key
12 |   $ bupstash new-sub-key --put -k ./backups.key -o ./put.key
13 |   $ bupstash new-sub-key --list -k ./backups.key -o ./list.key
14 |   $ bupstash new-sub-key --list-contents -k ./backups.key -o ./list.key


--------------------------------------------------------------------------------
/doc/cli/put.txt:
--------------------------------------------------------------------------------
 1 | bupstash put [OPTIONS] TAGS... [::] PATHS...
 2 | bupstash put -e [OPTIONS] TAGS... [::] CMD...
 3 | 
 4 | `bupstash put` encrypts a file, directory, or command output and stores it
 5 | in a bupstash repository such that only the primary backup key can decrypt it.
 6 | 
 7 | For single files the contents are saved directly, for multiple files the data
 8 | is saved such that is can be retrieved as a tar archive, and for commands the
 9 | command is executed and stdout is sent to the repository.
10 | 
11 | To do incremental puts you must give each job you intend to run
12 | repeatedly it's own send log. This can dramatically reduce the amount of 
13 | disk reading and network usage required to make a snapshot.
14 | 
15 | Examples:
16 |   $ export BUPSTASH_REPOSITORY=$HOME/bupstash-repo
17 |   $ export BUPSTASH_KEY=./my-secret-bupstash.key
18 | 
19 |   # To avoid resending data needlessly during backups, create job specific send log.
20 |   $ bupstash put --send-log /root/backup-sendlog ./to-backup
21 | 
22 |   # Specify arbitrary metadata as KEY=VALUE before.
23 |   $ bupstash put host=$(hostname) ./file.txt
24 | 
25 |   # Multiple files and directories can be saved at once.
26 |   $ bupstash put ./file1.txt ./file2.txt ./some-dir
27 | 
28 |   # Use --exec to save the output of commands.
29 |   $ bupstash put --exec name=files.tar tar -C ./files -cvf - .
30 | 
31 |   # Put from stdin (does not check error codes).
32 |   $ echo data | bupstash put -
33 | 
34 |   # You can use '::' to terminate the tag list.
35 |   $ bupstash put :: foo=bar.txt
36 | 


--------------------------------------------------------------------------------
/doc/cli/recover-removed.txt:
--------------------------------------------------------------------------------
1 | bupstash recover-removed [OPTIONS]
2 | 
3 | Recover repository items that were removed, but that have not
4 | yet been deleted via garbage collection.
5 | 
6 | Examples:
7 |   $ bupstash recover-removed -r ./backups


--------------------------------------------------------------------------------
/doc/cli/restore.txt:
--------------------------------------------------------------------------------
1 | bupstash restore [OPTIONS] --into DIR QUERY
2 | 
3 | Efficiently restore the contents of a snapshot into a local directory,
4 | deleting any extra files that already existed in that directory.
5 | 
6 | Examples:
7 |   $ bupstash restore --into ./dir id="8f7*"
8 |   $ bupstash restore --pick sub-dir --into ./dir id="8f7*"
9 |   $ bupstash restore --ownership --into ./dir id="8f7*"


--------------------------------------------------------------------------------
/doc/cli/rm.txt:
--------------------------------------------------------------------------------
1 | bupstash get [OPTIONS] QUERY
2 | 
3 | Remove items from a bupstash repository matching a given query.
4 | 
5 | See the bupstash user manual for a description of the query language.
6 | 
7 | Examples:
8 |   $ bupstash rm id="8f8*"
9 |   $ bupstash rm name=backup.tar and older-than 30d


--------------------------------------------------------------------------------
/doc/cli/serve.txt:
--------------------------------------------------------------------------------
 1 | bupstash serve [OPTIONS] REPOSITORY
 2 | 
 3 | Run a bupstash server that speaks the bupstash
 4 | protocol over stdin/stdout. Has access controls
 5 | that can be configured via flags, and mandated via mechanisms
 6 | such as ssh force commands. See the user manual for access
 7 | control documentation and examples.
 8 | 
 9 | Examples:
10 |   $ export BUPSTASH_REPOSITORY_COMMAND="ssh $SERVER bupstash serve /data/repository"
11 |   $ bupstash list


--------------------------------------------------------------------------------
/doc/cli/sync.txt:
--------------------------------------------------------------------------------
 1 | bupstash sync --to $REPO [OPTIONS] [QUERY...]
 2 | 
 3 | Synchronize items and data to another bupstash repository.
 4 | 
 5 | If a query is not specified, syncs all repository items.
 6 | 
 7 | Examples:
 8 |   # Copy all items from one repository to another.
 9 |   bupstash sync --to ssh://$SERVER id=$id
10 | 
11 |   # Perform a backup then sync it to a different server.
12 |   $ id="$(bupstash put ./files)
13 |   $ bupstash sync --to ssh://$SERVER id=$id


--------------------------------------------------------------------------------
/doc/cli/version.txt:
--------------------------------------------------------------------------------
1 | bupstash version
2 | 
3 | Print the bupstash version to stdout and exit.


--------------------------------------------------------------------------------
/doc/guides/Filesystem Backups.md:
--------------------------------------------------------------------------------
  1 | # Filesystem Backups
  2 | 
  3 | This guide will cover how to use bupstash for system backups, it is divided into
  4 | sections which cover different use cases.
  5 | 
  6 | For all of the guides the shown commands can be put into a cron job or other tool for running background tasks
  7 | for automated backups.
  8 | 
  9 | The guides below can also be combined with remote repositories with access controls to allow 'upload only' for secure deployments.
 10 | 
 11 | ## Simple directory snapshots
 12 | 
 13 | The simplest use of bupstash is to simply snapshot your home directory to a repository on an external drive.
 14 | 
 15 | Create the file backup.sh:
 16 | 
 17 | ```
 18 | set -eu
 19 | export BUPSTASH_KEY=/root/backup-put.key
 20 | export BUPSTASH_REPOSITORY=/mnt/external-drive/bupstash-backups
 21 | 
 22 | bupstash put \
 23 |    --send-log /root/backup.sendlog \
 24 |    --exclude "/home/*/.cache" \
 25 |    hostname=$(hostname) \
 26 |    name=home-backup.tar \
 27 |    /home/
 28 | ```
 29 | 
 30 | Then running a backup is as simple as:
 31 | 
 32 | ```
 33 | $ sudo sh ./backup.sh
 34 | ```
 35 | 
 36 | Now to restore files or sub directories we can use `bupstash get`:
 37 | 
 38 | ```
 39 | $ bupstash list name=home-backup.tar
 40 | ...
 41 | id="aa87fdbc72241f363568bbb888c0834e" name="backup.tar" timestamp="2020-07-24 15:25:00"
 42 | ...
 43 | $ bupstash get id="aa8*" | tar -C restore ...
 44 | $ bupstash get --pick some/sub-dir id="aa8*" | tar -C restore ...
 45 | $ bupstash get --pick some/file.txt id="aa8*" > file.txt
 46 | ```
 47 | 
 48 | Some points to consider about this snapshot method:
 49 | 
 50 | - The use of --exclude to omit the user cache directories, we can save a lot of space in backups by ignoring things
 51 |   like out web browser cache, at the expense of less complete backups. You can specify --exclude more than once to
 52 |   skip more than one directory or file. See the man page for more details.
 53 | 
 54 | - Bupstash incremental backups work best when the send log file used was last used for a snapshot of the same or similar input data.
 55 |   Manually specifying a send log path with --send-log ensures subsequent similar snapshots use the same send log, often dramatically increasing efficiency.
 56 | 
 57 | - This method of backup is simple, but does not account for files being modified during upload. The simplest way to to think about this problem, is files will be changing while 
 58 |   the backup is uploading, so you might capture different directories at different points in time.
 59 | 
 60 | - In this command we are also using a 'put' key (see the offline keys guide) so that backups cannot be decrypted even if someone was to steal your external drive.
 61 | 
 62 | 
 63 | ## Btrfs directory snapshots
 64 | 
 65 | If you are running linux with btrfs, (or any other operating system + filesystem that supports snapshots), you can
 66 | use this to get stable snapshots that won't be modified during upload.
 67 | 
 68 | 
 69 | Create the file backup.sh:
 70 | 
 71 | ```
 72 | set -eu
 73 | export BUPSTASH_KEY=/root/backup-put.key
 74 | export BUPSTASH_REPOSITORY=/mnt/external-drive/bupstash-backups
 75 | 
 76 | 
 77 | if test -e /rootsnap
 78 | then
 79 |     echo "removing snapshot, it already existed."
 80 |     btrfs subvolume delete /rootsnap
 81 | fi
 82 | btrfs subvolume snapshot -r / /rootsnap > /dev/null
 83 | 
 84 | bupstash put \
 85 |    --send-log /root/backup.sendlog \
 86 |    --exclude "/home/*/.cache" \
 87 |    hostname=$(hostname) \
 88 |    name=backup.tar \
 89 |    /rootsnap
 90 | 
 91 | btrfs subvolume delete /rootsnap > /dev/null
 92 | ```
 93 | 
 94 | Then running a backup is as simple as:
 95 | 
 96 | ```
 97 | $ sudo sh ./backup.sh
 98 | ```
 99 | 
100 | Filesystem enabled snapshots do not suffer from 'time smear'. All points about '--send-log', '--exclude' and backup restore from simple directory snapshots also apply to this snapshot method.
101 | 
102 | 
103 | ## Btrfs send snapshots
104 | 
105 | 
106 | If you are running linux with btrfs, (or any other operating system + filesystem that supports exporting directories as a stream), you can
107 | directly save the output of such a command into a bupstash repository.
108 | 
109 | 
110 | Create the file backup.sh:
111 | 
112 | ```
113 | set -eu
114 | export BUPSTASH_KEY=/root/backup-put.key
115 | export BUPSTASH_REPOSITORY=/mnt/external-drive/bupstash-backups
116 | 
117 | 
118 | if test -e /rootsnap
119 | then
120 |     echo "removing snapshot, it already existed."
121 |     btrfs subvolume delete /rootsnap
122 | fi
123 | 
124 | btrfs subvolume snapshot -r / /rootsnap > /dev/null
125 | 
126 | bupstash put \
127 |    --exec
128 |    --send-log /root/backup.sendlog \
129 |    hostname=$(hostname) \
130 |    name=backup.btrfs \
131 |    btrfs send  /rootsnap
132 | 
133 | btrfs subvolume delete /rootsnap > /dev/null
134 | ```
135 | Then running a backup is as simple as:
136 | 
137 | ```
138 | $ sudo sh ./backup.sh
139 | ```
140 | 
141 | Restoration of the backup is done via the `btrfs receive` command:
142 | 
143 | ```
144 | $ bupstash get name=backup.btrfs | sudo btrfs receive  ./restore
145 | ```
146 | 


--------------------------------------------------------------------------------
/doc/guides/Getting Started.md:
--------------------------------------------------------------------------------
  1 | # Getting started
  2 | 
  3 | bupstash is an easy to use tool for making encrypted space efficient backups.
  4 | It is special because it is open source, and stores all data and metadata in an encrypted
  5 | and deduplicated format.
  6 | 
  7 | Typical users of bupstash are people familiar with the command line, such as software developers,
  8 | system administrators and other technical users.
  9 | 
 10 | This guide covers installation and basic usage of bupstash.
 11 | 
 12 | ## Install bupstash
 13 | 
 14 | ### Precompiled version 
 15 | 
 16 | Head to the [releases page](https://github.com/andrewchambers/bupstash/releases) and download a
 17 | build for for your platform. Simply extract the archive and add the single bupstash binary to
 18 | your PATH.
 19 | 
 20 | ### Via rust and cargo
 21 | 
 22 | If you have a rust compiler installed, you can install the latest release
 23 | using cargo (the rust programming language package manager).
 24 | 
 25 | Install `libsodium-dev` and `pkg-config` for your platform, and run:
 26 | 
 27 | 
 28 | ```
 29 | $ git clone https://github.com/andrewchambers/bupstash
 30 | $ cd bupstash
 31 | $ cargo build --release
 32 | $ cp ./target/release/bupstash "$INSTALL_DIR"
 33 | ```
 34 | 
 35 | or simply:
 36 | 
 37 | ```
 38 | $ cargo install bupstash
 39 | $ cp "$HOME/.cargo/bin/bupstash" "$INSTALL_DIR"
 40 | ```
 41 | 
 42 | ## Initializing your repository
 43 | 
 44 | First we must initialize a repository to save data into.  We do this with the `bupstash init` command.
 45 | 
 46 | To initialize a local repository run:
 47 | ```
 48 | export BUPSTASH_REPOSITORY="$(pwd)/bupstash-repo"
 49 | $ bupstash init
 50 | ```
 51 | 
 52 | For remote repositories, install bupstash on both the local and the remote machine and run the following:
 53 | 
 54 | ```
 55 | export BUPSTASH_REPOSITORY=ssh://$SERVER/home/me/bupstash-repo
 56 | $ bupstash init
 57 | ```
 58 | 
 59 | Note that you can avoid some retyping by setting certain environment variables (e.g.
 60 | BUPSTASH_REPOSITORY) in your .bashrc or other equivalent file.
 61 | 
 62 | ## Generating an encryption key
 63 | 
 64 | All data stored in a bupstash repository is encrypted, so first we need to generate an encryption key.
 65 | 
 66 | ```
 67 | $ bupstash new-key -o backups.key
 68 | ```
 69 | 
 70 | This key can be used to make, view and edit encrypted snapshots. 
 71 | KEEP THIS KEY SAFE, if you lose it, you will have lost all your backups made with this key.
 72 | 
 73 | Later sections will explain how to create and use secure offline keys.
 74 | 
 75 | ## Making snapshots
 76 | 
 77 | First we must tell bupstash which encryption key to use.
 78 | ```
 79 | export BUPSTASH_KEY=$(pwd)/backups.key
 80 | ```
 81 | 
 82 | Now we can start making snapshots, here we save a file:
 83 | 
 84 | ```
 85 | $ bupstash put ./my-data.txt
 86 | 811a0f5c61656b5f494a014ce46d3549
 87 | ```
 88 | 
 89 | The printed text is the id of this put, which can be used 
 90 | to retrieve the data again with a query:
 91 | 
 92 | ```
 93 | $ bupstash get id="811*"
 94 | your data!
 95 | ```
 96 | 
 97 | We can also save a directory:
 98 | 
 99 | ```
100 | $ bupstash put ./my-dir
101 | ...
102 | ```
103 | 
104 | Directories are automatically converted to tarballs, which can be extracted with the tar command:
105 | 
106 | ```
107 | $ mkdir restored
108 | $ bupstash get name=my-dir.tar | tar -C ./restored -xvf -
109 | ```
110 | 
111 | We can also save the output of commands:
112 | 
113 | ```
114 | $ echo hello | bupstash put -
115 | 
116 | # This form is able to detect command failures.
117 | $ bupstash put --exec echo hello
118 | ...
119 | ```
120 | 
121 | Note that bupstash automatically applies compression and deduplicates your data so you 
122 | do not need to do this manually.
123 | 
124 | ## Listing snapshots
125 | 
126 | ```
127 | $ bupstash list 
128 | id="dbca49b072c0f94b9e72bf81e7716ff9" name="backup.tar" size="10.23MB" timestamp="2020/08/03 15:47:32"
129 | ...
130 | ```
131 | 
132 | We can do more sophisticated queries when we list:
133 | 
134 | ```
135 | $ bupstash list timestamp="2020/*"
136 | ...
137 | $ bupstash list name=backup.tar and older-than 7d
138 | $ bupstash list newer-than 1h
139 | ...
140 | ```
141 | 
142 | For a full description of the query language see the query language manual page.
143 | 
144 | ## Snapshot tags
145 | 
146 | When we make snapshots, we can add our own arbitrary tags in addition to the default tags:
147 | 
148 | ```
149 | $ bupstash put mykey=value ./my-important-files 
150 | $ bupstash list mykey=value
151 | ```
152 | 
153 | ## Listing and fetching snapshots
154 | 
155 | Once we have directory snapshots, we can list the contents using bupstash `list-contents`:
156 | 
157 | ```
158 | $ bupstash list-contents id=$id
159 | drwxr-xr-x 0B 2020/10/30 13:32:04 .
160 | -rw-r--r-- 9B 2020/10/30 13:32:04 data.txt
161 | ...
162 | ```
163 | 
164 | We can efficiently restore a snapshot to a local directory only downloading the files that are missing:
165 | ```
166 | $ mkdir restore-dir
167 | $ bupstash restore --into ./restore-dir id=$id
168 | ```
169 | 
170 | We can also export individual files or directories as a tarballs:
171 | 
172 | ```
173 | $ bupstash get --pick data.txt id=$id
174 | my data!
175 | $ bupstash get --pick subdir id=$id | tar -C ./subdir-restore -xvf -
176 | $ bupstash get id=$id | tar -C ./restore -xvf -
177 | ```
178 | 
179 | ## Removing snapshots
180 | 
181 | We can remove snapshots via the same query language and the `bupstash rm` command:
182 | 
183 | ```
184 | $ bupstash rm older-than 90d and name=backup.tar and host=my-server
185 | ```
186 | 
187 | Removing a snapshot does not immediately reclaim disk space.  To do that, you must run the 
188 | garbage collector.
189 | 
190 | ```
191 | $ bupstash gc
192 | ```
193 | 
194 | # Learning more
195 | 
196 | Feel free to browse the manual pages for each command to get a feel for how to interact and administer with your bupstash backups.


--------------------------------------------------------------------------------
/doc/guides/Network Filesystems.md:
--------------------------------------------------------------------------------
 1 | # Network Filesystems
 2 | 
 3 | Bupstash relies on fcntl style POSIX file locking across multiple files to work in a concurrent context. Do not use bupstash with any network filesystem that does not support fcntl style locking unless you understand the potential consequences of such a decision.
 4 | 
 5 | When using bupstash with a remote repository it is always recommended to use bupstash over ssh by setting BUPSTASH_REPOSITORY to an `ssh://`
 6 | style URL. This mode is safe for concurrent use, faster and better in the majority of use cases.
 7 | 
 8 | For information on specific network filesystem configurations see the sections below.
 9 | 
10 | ## NFSv3/NFSv4
11 | 
12 | We do no recommend using bupstash over NFSv3 in any configuration.
13 | 
14 | If you are stubborn, ensure locking is enabled or only access the repository from one bupstash process as a time.
15 | 
16 | NFSv4 has a more sound network locking protocol, so given the choice between NFSv3 and NFSv4 always
17 | choose NFSv4 with locking enabled.
18 | 
19 | ## CephFS
20 | 
21 | Using bupstash over Cephfs is untested so is currently not recommended.
22 | 
23 | ## SSHFS
24 | 
25 | Currently we do no recommend using bupstash over sshfs in any configuration due to the lack
26 | of file lock support across multiple machines.
27 | 
28 | If you have sshfs access, you almost certainly have the ability to set BUPSTASH_REPOSITORY
29 | to an `ssh://` style url which enables safe concurrent repository access in all situations.
30 | 
31 | ## 9P2000.L
32 | 
33 | Uncached 9P2000.L mounts of repositories exported via the diod 9P2000.L server will likely
34 | work without issue, though use at your own risk.
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/doc/guides/Password Protected Keys.md:
--------------------------------------------------------------------------------
 1 | # Password Protected Keys
 2 | 
 3 | Bupstash allows users to fetch the key to use via arbitrary commands by setting the BUPSTASH_KEY_COMMAND environment variable. In this guide we will configure bupstash to decrypt a password protected key file with gpg.
 4 | 
 5 | First create a key:
 6 | 
 7 | ```
 8 | $ bupstash new-key -o demo.key
 9 | ```
10 | 
11 | Next we password protect the key using gpg:
12 | 
13 | ```
14 | $ gpg --symmetric demo.key
15 | ```
16 | 
17 | gpg will ask you for a password using your configured pin entry program and then create demo.key.gpg.
18 | 
19 | Verify you can decrypt the key:
20 | 
21 | ```
22 | $ gpg --decrypt demo.key.gpg
23 | ...
24 | -----BEGIN BUPSTASH KEY-----
25 | ...
26 | -----END BUPSTASH KEY-----
27 | ```
28 | 
29 | Now we can remove the unencrypted key:
30 | 
31 | ```
32 | $ shred demo.key
33 | ```
34 | 
35 | Finally, we can tell bupstash to use this encrypted key, to do this we setup the environment variable BUPSTASH_KEY_COMMAND:
36 | 
37 | ```
38 | $ export BUPSTASH_KEY_COMMAND="gpg -q --decrypt $(pwd)/demo.key.gpg"
39 | ```
40 | 
41 | Now whenever bupstash requires a key, it will ask gpg for it, and gpg will ask for the password.
42 | 
43 | 
44 | ```
45 | $ bupstash list
46 |                                                                                                                                                          
47 | ┌──────────────────────────────────────────────────────┐                                                 
48 | │ Enter passphrase                                     │                                                 
49 | │                                                      │                                                 
50 | │                                                      │                                                 
51 | │ Passphrase: ________________________________________ │                                                 
52 | │                                                      │                                                 
53 | │       <OK>                              <Cancel>     │                                                 
54 | └──────────────────────────────────────────────────────┘
55 | 
56 | ```
57 | 
58 | If you have gpg-agent configured, the password does not need to be re-entered until gpg-agent expires the password
59 | entry.
60 | 
61 | Remember that BUPSTASH_KEY_COMMAND can be set to run any command of your choosing, giving great flexibility when it comes to protecting sensitive bupstash keys.
62 | 
63 | Finally, don't forget to check out our other guides and manuals to learn about sub-keys that do not have the ability to decrypt data after it is sent. Sub-keys allow us to avoid putting our sensitive decryption keys
64 | on devices making backups.


--------------------------------------------------------------------------------
/doc/guides/Remote Access Controls.md:
--------------------------------------------------------------------------------
 1 | # Remote access controls
 2 | 
 3 | When designing a backup plan, we must remember that if a malicious agent compromises your computer,
 4 | it may be able to delete your backups too. To solve this issue bupstash supports access controls on remote repositories
 5 | that can be configured on a per ssh key basis. To do this, we can utilize ssh force commands to restrict a backup client to
 6 | only run an instance of `bupstash serve` that has limited permissions.
 7 | 
 8 | The following assumes you have a backup server with a user called `backups` that has openssh sshd running,
 9 | and a client computer with an ssh client installed.
10 | 
11 | In an your sshd config file in your server add the line:
12 | 
13 | ```
14 | Match User backups
15 |     ForceCommand "/bin/bupstash-put-force-command.sh"
16 | ```
17 | 
18 | Create /bin/bupstash-put-force-command.sh on your server:
19 | 
20 | ```
21 | $ echo 'exec bupstash serve --allow-put /home/backups/bupstash-backups' > bupstash-put-force-command.sh
22 | $ sudo cp bupstash-put-force-command.sh /bin/bupstash-put-force-command.sh
23 | $ sudo chown root:root /bin/bupstash-put-force-command.sh
24 | $ sudo chmod +x /bin/bupstash-put-force-command.sh
25 | ```
26 | 
27 | Next add an ssh key you intend to use for backups to `$SERVER/home/backups/.ssh/authorized_keys`,
28 | such that the user sending backups can connect to the remote server using ssh key based login.
29 | 
30 | Now when the backups user attempts to run a backup via ssh they are only able to
31 | run the bupstash serve command with a hard coded set of permissions and
32 | repository path.
33 | 
34 | Now the client is only authorized to create new backups, but not list or remove them:
35 | 
36 | ```
37 | export BUPSTASH_REPOSITORY="ssh://backups@$SERVER/backups"
38 | $ bupstash put ./files
39 | ...
40 | $ bupstash list
41 | server has disabled query and search for this client
42 | ```
43 | 
44 | The `bupstash serve` command also supports allowing fetching data, entry removal and garbage collection. With these
45 | options we can create a backup plan where clients can create new backups, and an administrator is able to cycle old backups
46 | from the secure machine.
47 | 


--------------------------------------------------------------------------------
/doc/guides/Secure Offline Keys.md:
--------------------------------------------------------------------------------
 1 | # Secure offline keys
 2 | 
 3 | In a secure computer systems we do not want our decryption keys stored online where they could 
 4 | inadvertently be leaked. To support this use case, bupstash allows creating keys which do not support
 5 | decrypting backups. Bupstash allows users to create 'put keys' that can only create new backups, or 'list keys' that can list backups, but not decrypt data.
 6 | 
 7 | Using a 'put key' lets you create backups without exposing your decryption key, while using a 'list key'
 8 | let's the key rotate old backups based on queries, but without exposing the sensitive decryption key. This
 9 | guide will show how to create use these key types.
10 | 
11 | 
12 | ## Generating put and list keys
13 | 
14 | Generating and using these keys is simple, we use bupstash to create a new 'put key' or 'list key' 
15 | that is derived from a regular bupstash key using the `new-sub-key`command.
16 | 
17 | ```
18 | $ bupstash new-sub-key -k ./backups.key -o put-backups.key --put
19 | $ bupstash new-sub-key -k ./backups.key -o list-backups.key --list
20 | ```
21 | 
22 | ## Using put and list keys
23 | 
24 | Using these keys is the same as a regular key:
25 | 
26 | ```
27 | $ bupstash put --key ./put-backups.key ./data.txt
28 | $ bupstash list --key ./list-backups.key
29 | ```
30 | 
31 | With the important difference that these keys cannot decrypt the contents of the snapshots.
32 | Only the original key is able to decrypt these snapshots.
33 | 
34 | ```
35 | $ bupstash get --key ./put-backups.key id=$id 
36 | bupstash get: provided key is not a decryption key
37 | 
38 | $ bupstash get --key ./list-backups.key id=$id
39 | bupstash get: provided key is not a decryption key
40 | 
41 | $ bupstash get --key ./backups.key id=$id
42 | data...
43 | ```
44 | 
45 | We can now put the main key into secure offline storage for use in case of emergency,
46 | but continue to make and administer our backups using the put key and list key.
47 | 
48 | Neither the storage server, nor the devices uploading new snapshots 
49 | have access to your existing snapshots.
50 | 
51 | Note that we recommend creating a new put key for every backup client if you have a shared bupstash
52 | repository.


--------------------------------------------------------------------------------
/doc/man/bupstash-authors.7.md:
--------------------------------------------------------------------------------
 1 | bupstash-authors(7)
 2 | ===================
 3 | 
 4 | ## SYNOPSIS
 5 | 
 6 | The bupstash authors, how to contact them, and their bupstash specific PGP public keys.
 7 | 
 8 | ## Andrew Chambers
 9 | 
10 | ### About
11 | 
12 | Andrew is an an experienced programmer with a masters degree in computer systems engineering 
13 | from the University of Auckland in New Zealand. He is the creator and primary author of bupstash
14 | and bupstash.io.
15 | 
16 | ### Contact
17 | 
18 | Email: ac@bupstash.io
19 | 
20 | ### PGP Key
21 | 
22 | ```
23 | -----BEGIN PGP PUBLIC KEY BLOCK-----
24 | 
25 | mDMEYvRmMRYJKwYBBAHaRw8BAQdAzXZnbofNVgtBglzMSqu0cVOgaoLEyAy6v6DX
26 | mYSxwA+IywQfFgoAfQWCYvRmMQMLCQcJEDVGG/kcrtZ+RxQAAAAAAB4AIHNhbHRA
27 | bm90YXRpb25zLnNlcXVvaWEtcGdwLm9yZ4F/S6gog/wBVOoLG3K8SaE5HOFe7EbF
28 | Wgrxh1NKhCsCAxUKCAKbAQIeARYhBJzEZHwEQV2W7y9PzTVGG/kcrtZ+AAD9+AEA
29 | l4BParFXikhZH4VXr1hRyfyWtV8hnwPcl9eU+igX/SgBAKYptO879hggMwu+9zJ+
30 | x25eJW/EWW2i9S3mu5gMpysEtBA8YWNAYnVwc3Rhc2guaW8+iM4EExYKAIAFgmL0
31 | ZjEDCwkHCRA1Rhv5HK7WfkcUAAAAAAAeACBzYWx0QG5vdGF0aW9ucy5zZXF1b2lh
32 | LXBncC5vcmdbuNzNhBGXeid8J/vWVx3oQYgRa15JKnCMuu/ReEOHVwMVCggCmQEC
33 | mwECHgEWIQScxGR8BEFdlu8vT801Rhv5HK7WfgAATK8BAJ7po0Ni9YNmvSDT4EsX
34 | 35MdvVYgtq22LJoDmLt7r+oRAQCKKWPsw13i6GPm4t5ozPqltdAR3xM7uEfGCzWJ
35 | 0VRfBLgzBGL0ZjEWCSsGAQQB2kcPAQEHQBF6vjySpi+RPabf1L9f3zP61m4OS2PP
36 | HUE0QKUnIkDSiQF/BBgWCgExBYJi9GYxCRA1Rhv5HK7WfkcUAAAAAAAeACBzYWx0
37 | QG5vdGF0aW9ucy5zZXF1b2lhLXBncC5vcme3EZvVUbn+0BUFL3iLpG6BVSixbxNQ
38 | XfCT6kj5GDimRQKbAr6gBBkWCgBvBYJi9GYxCRAdZXgjz6ofGEcUAAAAAAAeACBz
39 | YWx0QG5vdGF0aW9ucy5zZXF1b2lhLXBncC5vcmfk/iZ/MouT7uFwBvSNoD+CzhGR
40 | ZLrUFY6Sh2nGnTPQyBYhBB9DxIpE4XsF0ukONx1leCPPqh8YAADeKwEAovED6zTC
41 | 29byTkr/VzFAha/Gtt/MnPJIC6gpCBSTpTABAMC/EGFMGiivTDdZXP1rqCpdFd8s
42 | qF2mkYVgZyCXw/QMFiEEnMRkfARBXZbvL0/NNUYb+Ryu1n4AALNtAQDDWHOD0SEb
43 | QgXyLk0Ho6yRvgCtSUiSHP3LH+iJpSVx5gEAk+SIGKThNW8BSaK1mzdTOu+Dsntn
44 | Oxrp8vd+GE0RCwe4OARi9GYxEgorBgEEAZdVAQUBAQdA80nC4YzCtnH4hN41nQVE
45 | QO0yNZHqycHgFKqxfodbElcDAQgJiMAEGBYKAHIFgmL0ZjEJEDVGG/kcrtZ+RxQA
46 | AAAAAB4AIHNhbHRAbm90YXRpb25zLnNlcXVvaWEtcGdwLm9yZxJBdw1WDQVXRs8c
47 | 8DntJe+SkWMMOk4cM2VrTv7Tk6ssApsMFiEEnMRkfARBXZbvL0/NNUYb+Ryu1n4A
48 | AK6YAP4sk5/dknfAxmnacYR5w6QqUhTRGvPZU6aRGtVk8eFY3AD+LjNttTyy5u9g
49 | JC4NSoiQRLjFSOj8ypYwbSHMrzrRuA8=
50 | =HZqW
51 | -----END PGP PUBLIC KEY BLOCK-----
52 | ```
53 | 
54 | You can cross reference this PGP key at https://keys.openpgp.org/.
55 | 
56 | ## SEE ALSO
57 | 
58 | bupstash(1)
59 | 


--------------------------------------------------------------------------------
/doc/man/bupstash-diff.1.md:
--------------------------------------------------------------------------------
  1 | bupstash-diff(1) 
  2 | ================
  3 | 
  4 | ## SYNOPSIS
  5 | 
  6 | Diff two snapshots printing the summary to stdout.
  7 | 
  8 | `bupstash diff [OPTIONS] QUERY1... :: QUERY2... `
  9 | 
 10 | ## DESCRIPTION
 11 | 
 12 | `bupstash diff` fetches two snapshot listings from the remote server and compares them, printing
 13 | the diff line output to stdout. As a special case, if either query starts with './' or '/' a temporary
 14 | listing is created for that local directory for comparison.
 15 | 
 16 | `bupstash diff` is preferred over running traditional `diff` against the output of `bupstash list-contents`
 17 | because it takes the full precision of timestamps and also the stored file hash into account when performing
 18 | the diff operation.
 19 | 
 20 | Bupstash supports ignoring items in the diff comparison to aid in analysis. Useful exmples are the `--ignore` values
 21 | `times` to ignore file modification timestamps and `content` to ignore file size and hash changes.
 22 | 
 23 | ## OUTPUT FORMAT
 24 | 
 25 | Output is consistent with that of `bupstash list-contents`, except each line is
 26 | prefixed with either `+` or `-` representing removed or added items respectively.
 27 | 
 28 | Specifying `--format` alters the underlying output format as described by bupstash-list-contents(1). Lines are still prefixed with either `+` or `-` regardless of the output format.
 29 | 
 30 | ## QUERY LANGUAGE
 31 | 
 32 | For full documentation on the query language, see bupstash-query-language(7).
 33 | 
 34 | ## QUERY CACHING
 35 | 
 36 | The diff command uses the same query caching mechanisms as bupstash-list(1), check that page for
 37 | more information on the query cache.
 38 | 
 39 | ## OPTIONS
 40 | 
 41 | * -r, --repository REPO:
 42 |   The repository to connect to, , may be of the form `ssh://$SERVER/$PATH` for
 43 |   remote repositories if ssh access is configured. If not specified, is set to `BUPSTASH_REPOSITORY`.
 44 | 
 45 | * -k, --key KEY:
 46 |   Key used to decrypt data and metadata. If not set, defaults
 47 |   to `BUPSTASH_KEY`.
 48 | 
 49 | * --query-cache PATH:
 50 |   Path to the query-cache file, defaults to one of the following, in order, provided
 51 |   the appropriate environment variables are set, `$BUPSTASH_QUERY_CACHE`,
 52 |   `$XDG_CACHE_HOME/.cache/bupstash/bupstash.qcache` or `$HOME/.cache/bupstash/bupstash.qcache`.
 53 | 
 54 | * -i, --ignore:
 55 |   Comma separated list of file attributes to ignore in comparisons.
 56 |   Valid items are 'content,dev,devnos,inode,type,perms,nlink,uid,gid,times,xattrs'
 57 | 
 58 | * --relaxed:
 59 |   Shortcut for --ignore 'dev,inode,nlink,uid,gid,times,xattrs'.
 60 |   This option is useful for comparing content without being so concerned with machine specific metadata.
 61 | 
 62 | * --{left,right}-pick PATH:
 63 |   Perform diff on a sub-directory of the left/right query.
 64 | 
 65 | * --indexer-threads N:
 66 |   Number of processor threads to use for pipelined parallel file hashing and metadata reads.
 67 |   Defaults to the number of processors.
 68 | 
 69 | * --xattrs:
 70 |   Fetch xattrs when indexing a local directories.
 71 | 
 72 | * --format FORMAT:
 73 |   Set output format to one of the following 'human', 'jsonl'.
 74 | 
 75 | * --utc-timestamps:
 76 |   Display and search against timestamps in utc time instead of local time.
 77 | 
 78 | * --no-progress:
 79 |   Suppress progress indicators (Progress indicators are also suppressed when stderr
 80 |   is not an interactive terminal).
 81 | 
 82 | * -q, --quiet:
 83 |   Be quiet, implies --no-progress.
 84 | 
 85 | ## ENVIRONMENT
 86 | 
 87 | * BUPSTASH_REPOSITORY:
 88 |   The repository to connect to. May be of the form `ssh://$SERVER/$PATH` for
 89 |   remote repositories if ssh access is configured.
 90 | 
 91 | * BUPSTASH_REPOSITORY_COMMAND:
 92 |   A command to run to connect to an instance of bupstash-serve(1). This 
 93 |   allows more complex connections to the repository for less common use cases.
 94 | 
 95 | * BUPSTASH_KEY:
 96 |   Path to a primary key that will be used for decrypting data and metadata.
 97 | 
 98 | * BUPSTASH_KEY_COMMAND:
 99 |   A command to run that must print the key data, can be used instead of BUPSTASH_KEY
100 |   to fetch the key from arbitrary locations such as the network or other secret storage.
101 | 
102 | * BUPSTASH_QUERY_CACHE:
103 |   Path to the query cache file to use.
104 | 
105 | ## EXAMPLES
106 | 
107 | ### Compare two snapshots by query
108 | 
109 | ```
110 | $ bupstash diff id="14eb*" :: id="57de*"
111 | - -rw-r--r-- 1.1kB hello.txt
112 | + -rw-r--r-- 1.3kB goodbye.txt
113 | ```
114 | 
115 | ### Compare a snapshot and a local directory
116 | 
117 | ```
118 | $ bupstash diff --left-pick files --relaxed id="57de*" :: ./files
119 | ```
120 | 
121 | ## SEE ALSO
122 | 
123 | bupstash(1), bupstash-list(1), bupstash-keyfiles(7), bupstash-query-language(7)
124 | 


--------------------------------------------------------------------------------
/doc/man/bupstash-exec-with-locks.1.md:
--------------------------------------------------------------------------------
 1 | bupstash-exec-with-locks(1)
 2 | ===========================
 3 | 
 4 | ## SYNOPSIS
 5 | 
 6 | Execute a command with exclusive locks on the repository.
 7 | 
 8 | `bupstash init -r REPO COMMAND...`
 9 | 
10 | ## DESCRIPTION
11 | 
12 | `bupstash exec-with-locks` executes a command with exclusive locks held on
13 | the bupstash repository, preventing concurrent modification to the repository 
14 | for the duration of the command.
15 | 
16 | ## OPTIONS
17 | 
18 | * -r, --repository REPO:
19 |   Repository to lock. Defaults to BUPSTASH_REPOSITORY if not set.
20 |   Unlike other commands, does not support remote repository access.
21 | 
22 | ## ENVIRONMENT
23 | 
24 | * BUPSTASH_REPOSITORY:
25 |   Repository to lock.
26 | 
27 | ## EXAMPLES
28 | 
29 | ```
30 | $ bupstash exec-with-locks -r ./repo -- cp -r ./repo ./repo-backup
31 | ```
32 | 
33 | ## SEE ALSO
34 | 
35 | bupstash(1), bupstash-repository(7)
36 | 


--------------------------------------------------------------------------------
/doc/man/bupstash-gc.1.md:
--------------------------------------------------------------------------------
 1 | bupstash-gc(1) 
 2 | ==============
 3 | 
 4 | ## SYNOPSIS
 5 | 
 6 | Run the garbage collector against a repository, removing
 7 | unreferenced data and freeing disk space.
 8 | 
 9 | `bupstash gc [OPTIONS]`
10 | 
11 | ## DESCRIPTION
12 | 
13 | `bupstash gc` walks the repository contents attempting to find
14 | unreachable data chunks and removing them, potentially reclaiming disk space.
15 | 
16 | It is safe to run `bupstash gc` at any time, but some operations (such as bupstash-put(1))
17 | may temporarily be delayed.
18 | 
19 | The garbage collector only relies on unencrypted metadata, so does not need
20 | access to decryption keys to operate.
21 | 
22 | ## OPTIONS
23 | 
24 | * -r, --repository REPO:
25 |   The repository to connect to and operate on.
26 |   May be of the form `ssh://$SERVER/$PATH` for 
27 |   remote repositories if ssh access is configured.
28 |   If not specified, is set to `BUPSTASH_REPOSITORY`.
29 | 
30 | * --no-progress:
31 |   Suppress progress indicators (Progress indicators are also suppressed when stderr
32 |   is not an interactive terminal).
33 | 
34 | * -q, --quiet:
35 |   Be quiet, implies --no-progress.
36 | 
37 | ## ENVIRONMENT
38 | 
39 | * BUPSTASH_REPOSITORY:
40 |   The repository to connect to. May be of the form `ssh://$SERVER/$PATH` for
41 |   remote repositories if ssh access is configured.
42 | 
43 | * BUPSTASH_REPOSITORY_COMMAND:
44 |   A command to run to connect to an instance of bupstash-serve(1). This 
45 |   allows more complex connections to the repository for less common use cases.
46 | 
47 | ## SEE ALSO
48 | 
49 | bupstash(1), bupstash-repository(7)
50 | 


--------------------------------------------------------------------------------
/doc/man/bupstash-get.1.md:
--------------------------------------------------------------------------------
  1 | bupstash-get(1) 
  2 | ===============
  3 | 
  4 | ## SYNOPSIS
  5 | 
  6 | Get data from a bupstash repository.
  7 | 
  8 | `bupstash get [OPTIONS] QUERY... `
  9 | 
 10 | ## DESCRIPTION
 11 | 
 12 | `bupstash get` fetches and decrypts data stored in a bupstash repository, sending
 13 | it to stdout. If the requested item was saved as a directory, the command converts
 14 | it into a tar stream.
 15 | 
 16 | The item that is fetched is chosen based on a simple query against the 
 17 | tags specified when saving data with `bupstash put`.
 18 | 
 19 | ## QUERY LANGUAGE
 20 | 
 21 | For full documentation on the query language, see bupstash-query-language(7).
 22 | 
 23 | ## QUERY CACHING
 24 | 
 25 | The get command uses the same query caching mechanisms as bupstash-list(1), check that page for
 26 | more information on the query cache.
 27 | 
 28 | ## SPARSE FILES
 29 | 
 30 | When getting a directory as a tarball sparse files are not treated specially, to restore sparse
 31 | files while preserving the file holes use `bupstash restore` instead.
 32 | 
 33 | ## OPTIONS
 34 | 
 35 | * -r, --repository REPO:
 36 |   The repository to connect to, , may be of the form `ssh://$SERVER/$PATH` for
 37 |   remote repositories if ssh access is configured. If not specified, is set to `BUPSTASH_REPOSITORY`.
 38 | 
 39 | * -k, --key KEY:
 40 |   Key that will be used to decrypt data and metadata. If not set, defaults
 41 |   to `BUPSTASH_KEY`.
 42 | 
 43 | * --pick PATH:
 44 |   Fetch an individual file or sub-directory from a snapshot.
 45 | 
 46 | * --query-cache PATH:
 47 |   Path to the query-cache file, defaults to one of the following, in order, provided
 48 |   the appropriate environment variables are set, `$BUPSTASH_QUERY_CACHE`,
 49 |   `$XDG_CACHE_HOME/.cache/bupstash/bupstash.qcache` or `$HOME/.cache/bupstash/bupstash.qcache`.
 50 | 
 51 | * --utc-timestamps:
 52 |   Display and search against timestamps in utc time instead of local time.
 53 | 
 54 | * --no-progress:
 55 |   Suppress progress indicators (Progress indicators are also suppressed when stderr
 56 |   is not an interactive terminal).
 57 | 
 58 | * -q, --quiet:
 59 |   Be quiet, implies --no-progress.
 60 | 
 61 | ## ENVIRONMENT
 62 | 
 63 | * BUPSTASH_REPOSITORY:
 64 |   The repository to connect to. May be of the form `ssh://$SERVER/$PATH` for
 65 |   remote repositories if ssh access is configured.
 66 | 
 67 | * BUPSTASH_REPOSITORY_COMMAND:
 68 |   A command to run to connect to an instance of bupstash-serve(1). This 
 69 |   allows more complex connections to the repository for less common use cases.
 70 | 
 71 | * BUPSTASH_KEY:
 72 |   Path to the key that will be used for decrypting data and metadata.
 73 | 
 74 | * BUPSTASH_KEY_COMMAND:
 75 |   A command to run that must print the key data, can be used instead of BUPSTASH_KEY
 76 |   to fetch the key from arbitrary locations such as the network or other secret storage.
 77 | 
 78 | * BUPSTASH_QUERY_CACHE:
 79 |   Path to the query cache file to use.
 80 | 
 81 | 
 82 | ## EXAMPLES
 83 | 
 84 | ### Get an item with a specific id 
 85 | 
 86 | ```
 87 | $ bupstash get id=14ebd2073b258b1f55c5bbc889c49db4 > ./data.file
 88 | ```
 89 | 
 90 | ### Get an item by name and timestamp
 91 | 
 92 | ```
 93 | $ bupstash get name=backup.tar and timestamp=2020/19/* > ./restore.tar
 94 | ```
 95 | 
 96 | ### Get a file or sub-tar from a directory snapshot
 97 | 
 98 | ```
 99 | $ bupstash get --pick=path/to/file.txt id=$id
100 | $ bupstash get --pick=path/to/dir id=$id | tar ...
101 | ```
102 | 
103 | ### Get a tarball
104 | 
105 | The builtin directory put creates a tarball from a directory, so to extract 
106 | it we use tar.
107 | 
108 | ```
109 | # Snapshot a directory.
110 | $ id=$(bupstash put ./data)
111 | 
112 | # Fetch the contents of a snapshot and extract the contents with tar
113 | $ mkdir restore
114 | $ bupstash get id=$id | tar -C ./restore -xvf -
115 | ```
116 | 
117 | ## SEE ALSO
118 | 
119 | bupstash(1), bupstash-put(1), bupstash-list(1), bupstash-restore(1), bupstash-rm(1), bupstash-keyfiles(7),
120 | bupstash-query-language(7)
121 | 


--------------------------------------------------------------------------------
/doc/man/bupstash-init.1.md:
--------------------------------------------------------------------------------
 1 | bupstash-init(1) 
 2 | ================
 3 | 
 4 | ## SYNOPSIS
 5 | 
 6 | Initialize a bupstash repository.
 7 | 
 8 | `bupstash init [OPTIONS]`
 9 | 
10 | ## DESCRIPTION
11 | 
12 | `bupstash init` initializes a repository. If `REPOSITORY` already exists, the command fails.
13 | 
14 | Currently it is not recommend using bupstash on a network filesystem.
15 | 
16 | For details about the contents of the package store after initialization, see bupstash-repository(7).
17 | 
18 | ## OPTIONS
19 | 
20 | * -r, --repository REPO:
21 |   The repository to connect to. May be of the form `ssh://$SERVER/$PATH` for
22 |   remote repositories if ssh access is configured. If not specified, is set to `BUPSTASH_REPOSITORY`.
23 | 
24 | * --storage SPEC:
25 |   Accepts 'dir' or a json storage specification.
26 |   The default storage is 'dir' and stores encrypted data blocks in a 
27 |   repository local data directory.
28 | 
29 |   See the storage specs section for supported json specifications and examples.
30 | 
31 | ## ENVIRONMENT
32 | 
33 | * BUPSTASH_REPOSITORY:
34 |   The repository to connect to. May be of the form `ssh://$SERVER/$PATH` for
35 |   remote repositories if ssh access is configured.
36 | 
37 | * BUPSTASH_REPOSITORY_COMMAND:
38 |   A command to run to connect to an instance of bupstash-serve(1). This 
39 |   allows more complex connections to the repository for less common use cases.
40 | 
41 | ## EXAMPLES
42 | 
43 | ```
44 | $ export BUPSTASH_REPOSITORY=./my-repository
45 | $ bupstash init
46 | 
47 | $ export BUPSTASH_REPOSITORY=ssh://$SERVER/home/backups/bupstash-backups
48 | $ bupstash init
49 | ```
50 | 
51 | ## JSON STORAGE SPECS
52 | 
53 | Each storage specification consists of a type designator and a set
54 | of type specific parameters.
55 | 
56 | ### Dir storage
57 | 
58 | Dir storage is an alias for `--storage dir` and is generally not needed.
59 | 
60 | Example:
61 | 
62 | ```
63 | $ bupstash init --storage '{"Dir" : {}}''
64 | ```
65 | 
66 | ### External storage
67 | 
68 | The external storage engine stores data via an external socket, documentation is pending interface stabilization.
69 | 
70 | Example:
71 | 
72 | ```
73 | $ bupstash init --storage '{"External" : {"socket_path" : "/plugin/socket.sock", "path" : "plugin-specific-path"}}''
74 | ```
75 | 
76 | ## SEE ALSO
77 | 
78 | bupstash(1), bupstash-repository(7)
79 | 


--------------------------------------------------------------------------------
/doc/man/bupstash-keyfiles.7.md:
--------------------------------------------------------------------------------
 1 | bupstash-keyfiles(7)
 2 | ====================
 3 | 
 4 | ## SYNOPSIS
 5 | 
 6 | Overview of the bupstash key format.
 7 | 
 8 | ## DESCRIPTION
 9 | 
10 | Bupstash key files are PEM encoded with one of the following tags:
11 | 
12 | - BUPSTASH KEY
13 | - BUPSTASH SUB KEY
14 | 
15 | The binary data after decoding the PEM data consists of [bare](https://baremessages.org/) key structures, described below.
16 | 
17 | 
18 | Binary encoding of keys:
19 | 
20 | ```
21 | 
22 | type PrimaryKey {
23 |     id: Data<16>,
24 |     rollsum_key: Data<32>,
25 |     data_hash_key_part_1: Data<32>,
26 |     data_hash_key_part_2: Data<32>,
27 |     data_pk: Data<32>,
28 |     data_sk: Data<32>,
29 |     data_psk: Data<32>,
30 |     idx_hash_key_part_1: Data<32>,
31 |     idx_hash_key_part_2: Data<32>,
32 |     idx_pk: Data<32>,
33 |     idx_sk: Data<32>,
34 |     idx_psk: Data<32>,
35 |     metadata_pk: Data<32>,
36 |     metadata_sk: Data<32>,
37 |     metadata_psk: Data<32>,
38 | }
39 | 
40 | type SubKey {
41 |     id: Data<16>,
42 |     primary_key_id: Data<16>,
43 |     rollsum_key: Option<Data<32>>,
44 |     data_hash_key_part_1: Option<Data<32>>,
45 |     data_hash_key_part_2: Option<Data<32>>,
46 |     data_pk: Option<Data<32>>,
47 |     data_sk: Option<Data<32>>,
48 |     data_psk: Option<Data<32>>,
49 |     idx_hash_key_part_1: Option<Data<32>>,
50 |     idx_hash_key_part_2: Option<Data<32>>,
51 |     idx_pk: Option<Data<32>>,
52 |     idx_sk: Option<Data<32>>,
53 |     idx_psk: Option<Data<32>>,
54 |     metadata_pk: Option<Data<32>>,
55 |     metadata_sk: Option<Data<32>>,
56 |     metadata_psk: Option<Data<32>>,
57 | }
58 | 
59 | 
60 | 
61 | type Key (PrimaryKey | SubKey)
62 | ```
63 | 
64 | # EXAMPLE
65 | 
66 | ```
67 | $ bupstash new-key -o bupstash.key
68 | $ cat bupstash.key
69 | # This file contains a cryptographic key used by 'bupstash' to encrypt and decrypt data.
70 | #
71 | # key-id=55f32e9db43a1fa3cf65bb3705230898
72 | 
73 | -----BEGIN BUPSTASH KEY-----
74 | AFXzLp20Oh+jz2W7NwUjCJgS7VhqV37771UhSRo7LZUIxJCbEZkm27AcYylSL5T2
75 | bxAE4g0rukxRhloPqWT+s1Yr2cPNEHymMzJzm+V4QiDMzE4K4k548bsrMoQMGXc8
76 | LRpNiqVzwRRvibkdf9RdnyYPQ5IlvQN395YJVCfiD6nEOY90plDH20UgiGiNLRYK
77 | xH+MfIoFA1X59UFdto0B/CJW9R98OgQeJNP91NQloFA17mbzhqUvwnHDjatzkxht
78 | CJWScQm6PTwEFEYRSzLTWgpFXjnpF09quzZenw/jEn6nPAyjb11u+Ohe7pkfxacv
79 | QZ5qhBMqJ7+H3VpvOLW7mTmXL3T6gB5W7u2Lg6Y/AwkE
80 | -----END BUPSTASH KEY-----
81 | 
82 | ```
83 | 
84 | ## SEE ALSO
85 | 
86 | bupstash(1)
87 | 


--------------------------------------------------------------------------------
/doc/man/bupstash-list-contents.1.md:
--------------------------------------------------------------------------------
  1 | bupstash-list-contents(1) 
  2 | =========================
  3 | 
  4 | ## SYNOPSIS
  5 | 
  6 | List snapshot contents.
  7 | 
  8 | `bupstash list-contents [OPTIONS] QUERY... `
  9 | 
 10 | ## DESCRIPTION
 11 | 
 12 | `bupstash list-contents` lists the contents of the item matching the given query.
 13 | 
 14 | Items created by using `bupstash put` on a directory will have an associated index, other items
 15 | are not listable.
 16 | 
 17 | ## OUTPUT FORMATS
 18 | 
 19 | ### Human
 20 | 
 21 | When `--format` is set to `human`, `bupstash list-contents` outputs aligned rows consisting of:
 22 | 
 23 | ```
 24 | PERMS SIZE YYYY/MM/DD HH:MM:SS PATH...
 25 | ```
 26 | 
 27 | The included date is the time of the last change to a given file as reported by the
 28 | operating system at the time of the snapshot.
 29 | 
 30 | Prefer using one of the versioned machine readable formats when writing scripts.
 31 | 
 32 | ### JSONl1
 33 | 
 34 | When `--format` is set to `jsonl1`, `bupstash list-contents` outputs one json object per line.
 35 | 
 36 | Each line has the following json schema:
 37 | 
 38 | ```
 39 | {
 40 |   "path": string | [ bytes... ],
 41 |   "mode": number,
 42 |   "size": number,
 43 |   "uid": number,
 44 |   "gid": number,
 45 |   "mtime": number,
 46 |   "mtime_nsec": number,
 47 |   "ctime": number,
 48 |   "ctime_nsec": number,
 49 |   "norm_dev": number,
 50 |   "ino": number,
 51 |   "nlink": number,
 52 |   "link_target": string | null,
 53 |   "dev_major": number | null,
 54 |   "dev_minor": number | null,
 55 |   "xattrs": {string : string | [bytes...] ...} | null,
 56 |   "sparse": boolean,
 57 |   "data_hash": "$KIND[:$HEXBYTE]" | null
 58 | }
 59 | ```
 60 | 
 61 | ## QUERY LANGUAGE
 62 | 
 63 | For full documentation on the query language, see bupstash-query-language(7).
 64 | 
 65 | ## QUERY CACHING
 66 | 
 67 | The list-contents command uses the same query caching mechanisms as bupstash-list(1), check that page for
 68 | more information on the query cache.
 69 | 
 70 | ## OPTIONS
 71 | 
 72 | * -r, --repository REPO:
 73 |   The repository to connect to, , may be of the form `ssh://$SERVER/$PATH` for
 74 |   remote repositories if ssh access is configured. If not specified, is set to `BUPSTASH_REPOSITORY`.
 75 | 
 76 | * -k, --key KEY:
 77 |   Key used to decrypt data and metadata. If not set, defaults
 78 |   to `BUPSTASH_KEY`.
 79 | 
 80 | * --format FORMAT:
 81 |   Set output format to one of the following 'human', 'jsonl'.
 82 | 
 83 | * --query-cache PATH:
 84 |   Path to the query-cache file, defaults to one of the following, in order, provided
 85 |   the appropriate environment variables are set, `$BUPSTASH_QUERY_CACHE`,
 86 |   `$XDG_CACHE_HOME/.cache/bupstash/bupstash.qcache` or `$HOME/.cache/bupstash/bupstash.qcache`.
 87 | 
 88 | * --pick PATH:
 89 |   List a sub-directory of the query.
 90 | 
 91 | * --utc-timestamps:
 92 |   Display and search against timestamps in utc time instead of local time.
 93 | 
 94 |   * --no-progress:
 95 |   Suppress progress indicators (Progress indicators are also suppressed when stderr
 96 |   is not an interactive terminal).
 97 | 
 98 | * -q, --quiet:
 99 |   Be quiet, implies --no-progress.
100 | 
101 | ## ENVIRONMENT
102 | 
103 | * BUPSTASH_REPOSITORY:
104 |   The repository to connect to. May be of the form `ssh://$SERVER/$PATH` for
105 |   remote repositories if ssh access is configured.
106 | 
107 | * BUPSTASH_REPOSITORY_COMMAND:
108 |   A command to run to connect to an instance of bupstash-serve(1). This 
109 |   allows more complex connections to the repository for less common use cases.
110 | 
111 | * BUPSTASH_KEY:
112 |   Path to a primary key that will be used for decrypting data and metadata.
113 | 
114 | * BUPSTASH_KEY_COMMAND:
115 |   A command to run that must print the key data, can be used instead of BUPSTASH_KEY
116 |   to fetch the key from arbitrary locations such as the network or other secret storage.
117 | 
118 | * BUPSTASH_QUERY_CACHE:
119 |   Path to the query cache file to use.
120 | 
121 | 
122 | ## EXAMPLES
123 | 
124 | ### Get an item with a specific id from the repository
125 | 
126 | ```
127 | $ bupstash list-contents id="14eb*"
128 | drwxr-xr-x 0     2020/10/30 13:32:04 .
129 | -rw-r--r-- 1967  2020/10/30 13:32:04 data.txt
130 | ```
131 | 
132 | ## SEE ALSO
133 | 
134 | bupstash(1), bupstash-put(1), bupstash-diff(1), bupstash-keyfiles(7), bupstash-query-language(7)
135 | 


--------------------------------------------------------------------------------
/doc/man/bupstash-list.1.md:
--------------------------------------------------------------------------------
  1 | bupstash-list(1) 
  2 | ===============
  3 | 
  4 | ## SYNOPSIS
  5 | 
  6 | List items in a bupstash repository.
  7 | 
  8 | `bupstash list [OPTIONS] QUERY... `
  9 | 
 10 | ## DESCRIPTION
 11 | 
 12 | `bupstash list` fetches, decrypts and prints metadata of items stored
 13 | in the bupstash repository. It can be used for searching the database
 14 | with the bupstash query language. 
 15 | 
 16 | Only the metadata needs to be decrypted to list items, so a metadata key is sufficient
 17 | for item queries, even without access to the data decryption key.
 18 | 
 19 | ## QUERY LANGUAGE
 20 | 
 21 | For full documentation on the query language, see bupstash-query-language(7).
 22 | 
 23 | ### List query examples:
 24 | 
 25 | ```
 26 | $ bupstash list name='*.tar'
 27 | ...
 28 | $ bupstash list timestamp='2020*'
 29 | ...
 30 | ```
 31 | 
 32 | ## SPECIAL TAGS
 33 | 
 34 | Bupstash automatically inserts special tags that can be viewed and queried against, they are outlined below.
 35 | 
 36 | ### decryption-key-id
 37 | 
 38 | This special tag is inserted when the `--query-encrypted` option is used, it allows searching against the
 39 | key id that would be uesd for decrypting the given item. This tag is mostly useful for pruning
 40 | backups for which you do not have the decryption key.
 41 | 
 42 | ### size
 43 | 
 44 | This tag is the size of the data stream and any index metadata associated with the snapshot. This
 45 | means the size may not exactly match the size of the data stream retrieved by bupstash-get(1) for the case
 46 | of snapshots.
 47 | 
 48 | ### timestamp
 49 | 
 50 | The time the item was created formatted as `YYYY/MM/DD HH:MM:SS`.
 51 | 
 52 | ## QUERY CACHING
 53 | 
 54 | Because all data is stored encrypted on the server, item metadata must first be synchronized to the local machine,
 55 | and then decrypted on the client side to run a query. The file containing the synced and encrypted metadata
 56 | is called the query cache.
 57 | 
 58 | The path to the query-cache file, defaults to one of the following, in order, provided
 59 | the appropriate environment variables are set, `$BUPSTASH_QUERY_CACHE`,
 60 | `$XDG_CACHE_HOME/.cache/bupstash/bupstash.qcache` or `$HOME/.cache/bupstash/bupstash.qcache`.
 61 | 
 62 | As a special case, a query that consists only of a fully specified id (e.g. `id=$FULL_ID`) will not require use 
 63 | of the query cache, instead the query can be passed directly to the server. This means
 64 | it is always more efficient to fully specify an id when running any command that expects a query.
 65 | 
 66 | 
 67 | ## OUTPUT FORMATS
 68 | 
 69 | ### Human
 70 | 
 71 | When `--format` is set to `human`, `bupstash list` outputs rows consisting of:
 72 | 
 73 | ```
 74 | KEY=VALUE KEY=VALUE KEY=VALUE ....
 75 | ```
 76 | 
 77 | Where each key and value corresponds to a tag that may be searched against.
 78 | 
 79 | ### JSONl1
 80 | 
 81 | When `--format` is set to `jsonl1`, `bupstash list` outputs one json object per line.
 82 | 
 83 | Each line has the following json schema:
 84 | 
 85 | ```
 86 | {
 87 |   "id": hexstring,
 88 |   "decryption_key_id": hexstring,
 89 |   "data_tree": {
 90 |     "address": hexstring,
 91 |     "height": number,
 92 |     "data_chunk_count": number
 93 |   },
 94 |   "index_tree": {
 95 |     "address": hexstring,
 96 |     "height": number,
 97 |     "data_chunk_count": number
 98 |   },
 99 |   "data_size": number, // If decryption key present.
100 |   "index_size": number,  // If decryption key present.
101 |   "put_key_id": hexstring,  // If decryption key present.
102 |   "data_hash_key_part": hexstring,  // If decryption key present.
103 |   "index_hash_key_part": hexstring,  // If decryption key present.
104 |   "unix_timestamp_millis": number,
105 |   "tags": {
106 |     string : string,
107 |     ...
108 | }
109 | ```
110 | 
111 | If --query-encrypted is specified, encrypted tags and metadata are omitted.
112 | 
113 | ## OPTIONS
114 | 
115 | * -r, --repository REPO:
116 |   The repository to connect to may be of the form `ssh://$SERVER/$PATH` for
117 |   remote repositories if ssh access is configured. If not specified, is set to `BUPSTASH_REPOSITORY`.
118 | 
119 | * -k, --key KEY:
120 |   Primary key used to decrypt data and metadata. If not set, defaults
121 |   to `BUPSTASH_KEY`.
122 | 
123 | * --query-encrypted:
124 |   The query will not decrypt any metadata, allowing you to
125 |   list items you do not have a decryption key for.
126 |   This option inserts the pseudo query tag 'decryption-key-id'.
127 |   
128 | * --query-cache PATH:
129 |   Path to the query-cache file, defaults to one of the following, in order, provided
130 |   the appropriate environment variables are set, `$BUPSTASH_QUERY_CACHE`,
131 |   `$XDG_CACHE_HOME/.cache/bupstash/bupstash.qcache` or `$HOME/.cache/bupstash/bupstash.qcache`.
132 | 
133 | * --format FORMAT:
134 |   Set output format to one of the following 'human', 'jsonl'.
135 | 
136 | * --utc-timestamps:
137 |   Display and search against timestamps in utc time instead of local time.
138 | 
139 | * --no-progress:
140 |   Suppress progress indicators (Progress indicators are also suppressed when stderr
141 |   is not an interactive terminal).
142 | 
143 | * -q, --quiet:
144 |   Be quiet, implies --no-progress.
145 | 
146 | ## ENVIRONMENT
147 | 
148 | * BUPSTASH_REPOSITORY:
149 |   The repository to connect to. May be of the form `ssh://$SERVER/$PATH` for
150 |   remote repositories if ssh access is configured.
151 | 
152 | * BUPSTASH_REPOSITORY_COMMAND:
153 |   A command to run to connect to an instance of bupstash-serve(1). This 
154 |   allows more complex connections to the repository for less common use cases.
155 | 
156 | * BUPSTASH_KEY:
157 |   Path to a primary key that will be used for decrypting data and metadata.
158 | 
159 | * BUPSTASH_KEY_COMMAND:
160 |   A command to run that must print the key data, can be used instead of BUPSTASH_KEY
161 |   to fetch the key from arbitrary locations such as the network or other secret storage.
162 | 
163 | * BUPSTASH_QUERY_CACHE:
164 |   Path to the query cache file to use.
165 | 
166 | 
167 | ## EXAMPLES
168 | 
169 | ### List items matching a query
170 | 
171 | ```
172 | $ bupstash list name=backup.tar and timestamp=2020/07/* 
173 | id="aa87fdbc72241f363568bbb888c0834e" name="backup.tar" size="106.34MB" timestamp="2020-07-24 15:25:00"
174 | id="d271ec0b989cfc20e10d01380115747e" name="backup.tar" size="146.38MB" timestamp="2020-07-29 15:25:24"
175 | ...
176 | ```
177 | 
178 | ## SEE ALSO
179 | 
180 | bupstash(1), bupstash-list-contents(1), bupstash-keyfiles(7), bupstash-query-language(7)
181 | 


--------------------------------------------------------------------------------
/doc/man/bupstash-new-key.1.md:
--------------------------------------------------------------------------------
 1 | bupstash-new-key(1) 
 2 | ===================
 3 | 
 4 | ## SYNOPSIS
 5 | 
 6 | Generate a new bupstash key.
 7 | 
 8 | `bupstash new-key -o KEY`
 9 | 
10 | ## DESCRIPTION
11 | 
12 | `bupstash new-key` creates a new bupstash key capable of both
13 | encrypting and decrypting repository entries.
14 | 
15 | The generated key will be have permissions that make it readable by
16 | only the creating user.
17 | 
18 | Remember to keep your keys safe, as losing a key is the same as losing all
19 | data stored using that key.
20 | 
21 | ## OPTIONS
22 | 
23 | * -o, --output PATH:
24 |   Path to where the new key will be written.
25 | 
26 | ## EXAMPLES
27 | 
28 | ### Create a new key
29 | ```
30 | $ bupstash new-key -o ./backups.key
31 | ```
32 | 
33 | ## SEE ALSO
34 | 
35 | bupstash(1), bupstash-keyfiles(7)
36 | 


--------------------------------------------------------------------------------
/doc/man/bupstash-new-sub-key.1.md:
--------------------------------------------------------------------------------
 1 | bupstash-new-sub-key(1) 
 2 | =======================
 3 | 
 4 | ## SYNOPSIS
 5 | 
 6 | Generate a bupstash sub key with lesser encryption and decryption capabilities.
 7 | 
 8 | `bupstash new-sub-key -k KEY -o SUB_KEY`
 9 | 
10 | ## DESCRIPTION
11 | 
12 | `bupstash new-sub-key` creates a new bupstash key capable of
13 | a subset of the encryption and decryption operations of the main key.
14 | 
15 | Capabilities are any of 'put', 'list' and 'list-contents'. 'put' keys can
16 | create new backups but not decrypt data, 'list' keys can decrypt tags and other metadata,
17 | while 'list-contents' keys can decrypt the contents of items created by 'bupstash put'.
18 | 
19 | A typical use of a list only key would be to allow a cron job to rotate old backups by
20 | their search tags, without exposing the data decryption key.
21 | 
22 | The generated key will be marked readable only for the creating user.
23 | 
24 | If a sub-key is lost, the original key will still be able to decrypt any data in the repository
25 | encrypted by that sub-key.
26 | 
27 | *NOTE*: decryption differs from access - An attacker may still delete data by simply deleting the
28 |  items or files they have access to. Use bupstash-serve(1) access controls to restrict which
29 |  operations a user can perform and prevent unauthorized deletion of data. This can be done via an
30 |  ssh authorized_keys file, or through mechanisms such as `sudo` or `doas` configuration.
31 | 
32 | ## OPTIONS
33 | 
34 | * -k, --key PATH:
35 |   Key to derive the new sub-key from.
36 | * -o, --output PATH:
37 |   Path to where the sub-key will be written.
38 | * --put:
39 |   The key is able to encrypt data for 'put' operations.
40 | * --list:
41 |   The key will be able to decrypt metadata and perform queries.
42 | * --list-contents:
43 |   The key will be able to list item contents with 'list-contents' (implies --list).
44 | 
45 | ## EXAMPLES
46 | 
47 | ### Create a new put only key
48 | 
49 | ```
50 | $ bupstash new-sub-key --put -k backups.key -o ./put.key
51 | $ bupstash put -k ./backups-put.key ./data
52 | ```
53 | 
54 | ### Create a new listing key
55 | 
56 | ```
57 | $ bupstash new-sub-key -k ./backups.key -o ./list.key --list
58 | $ bupstash list -k ./list.key
59 | ```
60 | 
61 | ### Create a new content listing key
62 | 
63 | ```
64 | $ bupstash new-sub-key -k ./backups.key -o ./list-contents.key --list-contents
65 | $ bupstash list-contents -k ./list-contents.key name=some-backup.tar
66 | ```
67 | 
68 | ## SEE ALSO
69 | 
70 | bupstash(1), bupstash-keyfiles(7)
71 | 


--------------------------------------------------------------------------------
/doc/man/bupstash-query-language.7.md:
--------------------------------------------------------------------------------
  1 | bupstash-query-language(7)
  2 | ==========================
  3 | 
  4 | ## SYNOPSIS
  5 | 
  6 | The bupstash query language used by bupstash commands.
  7 | 
  8 | ## DESCRIPTION
  9 | 
 10 | The bupstash query language is used to filter and select items from a bupstash repository. Check 
 11 | the examples section for practical uses, or read the language section for a more precise description.
 12 | 
 13 | ## EXAMPLES
 14 | 
 15 | Glob matching:
 16 | ```
 17 | $ bupstash list name="*.tar"
 18 | ... name=foo.tar
 19 | ... name=bar.tar
 20 | ```
 21 | 
 22 | Literal matching:
 23 | ```
 24 | $ bupstash list name=="*.tar"
 25 | ...
 26 | ```
 27 | 
 28 | Age based matching:
 29 | 
 30 | ```
 31 | $ bupstash list newer-than "1 month"
 32 | $ bupstash list older-than 2d
 33 | $ bupstash list older-than 1y
 34 | ...
 35 | ```
 36 | 
 37 | And condition matching:
 38 | ```
 39 | $ bupstash list type=backup and hostname=server1 hostname=server2
 40 | ...
 41 | ```
 42 | 
 43 | Or condition matching:
 44 | ```
 45 | $ bupstash list hostname=server1 or hostname=server2
 46 | ...
 47 | ```
 48 | 
 49 | Precedence grouping:
 50 | ```
 51 | $ bupstash list [hostname=server1 or hostname=server2] and date=2020-* 
 52 | ...
 53 | ```
 54 | 
 55 | Quote using your shell's builtin quoting:
 56 | 
 57 | ```
 58 | $ bupstash rm name="my files.tar"
 59 | ```
 60 | 
 61 | ## LANGUAGE
 62 | 
 63 | ### Delimiters
 64 | 
 65 | As queries may span multiple command line arguments, the gap between arguments is treated as a special
 66 | delimiting character for the sake of query parsing.
 67 | 
 68 | ### Tags and values
 69 | 
 70 | A tag name is a string containg a set of characters matching the regular
 71 | expression ```[A-Za-z0-9-_]+```.
 72 | 
 73 | A values is a set of any characters except a delimiter.
 74 | 
 75 | ### Durations
 76 | 
 77 | A duration is a concatenation of time spans, where each time span is an integer number and a suffix.
 78 | 
 79 | Supported suffixes:
 80 | 
 81 | - seconds, second, sec, s
 82 | - minutes, minute, min, m
 83 | - hours, hour, hr, h
 84 | - days, day, d
 85 | - weeks, week, w
 86 | - months, month, M -- defined as 30.44 days
 87 | - years, year, y -- defined as 365.25 days
 88 | 
 89 | ### Globbing
 90 | 
 91 | Some operators accept a glob to match against, the following describes the valid globbing meta characters.
 92 | 
 93 | ```
 94 |     ? matches any single character. (If the literal_separator option is enabled, then ? can never match a path separator.)
 95 |     * matches zero or more characters. (If the literal_separator option is enabled, then * can never match a path separator.)
 96 |     ** recursively matches directories but are only legal in three situations. First, if the glob starts with **/, then it matches all directories. For example, **/foo matches foo and bar/foo but not foo/bar. Secondly, if the glob ends with /**, then it matches all sub-entries. For example, foo/** matches foo/a and foo/a/b, but not foo. Thirdly, if the glob contains /**/ anywhere within the pattern, then it matches zero or more directories. Using ** anywhere else is illegal (N.B. the glob ** is allowed and means “match everything”).
 97 |     {a,b} matches a or b where a and b are arbitrary glob patterns. (N.B. Nesting {...} is not currently allowed.)
 98 |     [ab] matches a or b where a and b are characters. Use [!ab] to match any character except for a and b.
 99 |     Metacharacters such as * and ? can be escaped with character class notation. e.g., [*] matches *.
100 |     When backslash escapes are enabled, a backslash (\) will escape all meta characters in a glob. If it precedes a non-meta character, then the slash is ignored. A \\ will match a literal \\. Note that this mode is only enabled on Unix platforms by default, but can be enabled on any platform via the backslash_escape setting on Glob.
101 | ```
102 | 
103 | (Documentation taken from the underlying [glob software library](https://docs.rs/globset/0.4.8/globset/index.html#)).
104 | 
105 | ### Binary operators
106 | 
107 | Check a tag matches a glob:
108 | 
109 | ```
110 | TAGNAME = GLOB
111 | ```
112 | 
113 | Check a tag matches a literal value.
114 | 
115 | ```
116 | TAGNAME == VALUE
117 | ```
118 | 
119 | Match if either expression matches.
120 | 
121 | ```
122 | EXPR or EXPR
123 | ```
124 | 
125 | Match if both expressions match.
126 | 
127 | ```
128 | EXPR and EXPR
129 | ```
130 | 
131 | ### Age matching
132 | 
133 | ```
134 | older-than DURATION
135 | newer-than DURATION
136 | ```
137 | 
138 | Take care that system clocks are configured correctly on both the querying machine, and devices sending backups, as incorrect
139 | system clocks could cause accidental removal of items.
140 | 
141 | ### Unary operators
142 | 
143 | Invert an expression.
144 | 
145 | ```
146 | ~ EXPR
147 | ```
148 | 
149 | 
150 | ### grouping
151 | 
152 | Use brackets to alter the default precedence.
153 | 
154 | ```
155 | [ EXPR ]
156 | ```
157 | 
158 | Note, This differs from the typical tradition of using `()` for grouping so queries are
159 | easier to write in shell scripts where `()` already has a designated meaning.
160 | 
161 | ## SEE ALSO
162 | 
163 | bupstash(1), bupstash-put(1), bupstash-list(1), bupstash-rm(1)
164 | 


--------------------------------------------------------------------------------
/doc/man/bupstash-recover-removed.1.md:
--------------------------------------------------------------------------------
 1 | bupstash-recover-removed(1) 
 2 | ==============
 3 | 
 4 | ## SYNOPSIS
 5 | 
 6 | Recover repository items that were removed, but that have not
 7 | yet been deleted via garbage collection.
 8 | 
 9 | `bupstash recover-removed [OPTIONS]`
10 | 
11 | ## DESCRIPTION
12 | 
13 | `bupstash recover-removed` allows a user to undo all 'rm' operations that
14 | have taken place since the last invocation of bupstash-gc(1).
15 | In other words, this command provides a way to correct errors and accidental
16 | invocations of bupstash-rm(1).
17 | 
18 | `bupstash recover-removed` requires 'put' and 'get' permissions for the repository being operated on.
19 | 
20 | ## OPTIONS
21 | 
22 | * -r, --repository REPO:
23 |   The repository to connect to and operate on.
24 |   May be of the form `ssh://$SERVER/$PATH` for 
25 |   remote repositories if ssh access is configured.
26 |   If not specified, is set to `BUPSTASH_REPOSITORY`.
27 | 
28 | * --no-progress:
29 |   Suppress progress indicators (Progress indicators are also suppressed when stderr
30 |   is not an interactive terminal).
31 | 
32 | * -q, --quiet:
33 |   Be quiet, implies --no-progress.
34 | 
35 | ## ENVIRONMENT
36 | 
37 | * BUPSTASH_REPOSITORY:
38 |   The repository to connect to. May be of the form `ssh://$SERVER/$PATH` for
39 |   remote repositories if ssh access is configured.
40 | 
41 | * BUPSTASH_REPOSITORY_COMMAND:
42 |   A command to run to connect to an instance of bupstash-serve(1). This 
43 |   allows more complex connections to the repository for less common use cases.
44 | 
45 | ## SEE ALSO
46 | 
47 | bupstash(1), bupstash-rm(1), bupstash-gc(1)
48 | 


--------------------------------------------------------------------------------
/doc/man/bupstash-repository.7.md:
--------------------------------------------------------------------------------
  1 | bupstash-repository(7)
  2 | ======================
  3 | 
  4 | ## SYNOPSIS
  5 | 
  6 | Overview of the bupstash repository format.
  7 | 
  8 | ## DESCRIPTION
  9 | 
 10 | The most important part of bupstash is the repository. It is where all data is stored in a mostly
 11 | encrypted form. The bupstash client interacts via the repository over stdin/stdout of the bupstash-serve(1)
 12 | process. This may be locally, or via a protocol such as ssh.
 13 | 
 14 | Because most data is encrypted, the repository structure is quite simple.
 15 | 
 16 | Files:
 17 | 
 18 | ```
 19 | repo
 20 | ├── data
 21 | │   ├── ...
 22 | │   └── 5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03
 23 | ├── items
 24 | │   ├── 031d91b342fc76b8a4b32e2a8d12e4d0
 25 | │   └── ffaa0127fd9938aa0a3eaf6070aa947d
 26 | ├── meta
 27 | │   ├── gc_generation
 28 | │   ├── gc_dirty
 29 | │   ├── schema_version
 30 | │   └── storage_engine
 31 | ├── wal
 32 | │   ├── ...
 33 | │   └── 00000000N.wal
 34 | ├── repo.oplog
 35 | ├── repo.lock
 36 | ├── tx.lock
 37 | ├── tx.seq
 38 | └── tx.wal
 39 | 
 40 | ```
 41 | 
 42 | ### repo.oplog
 43 | 
 44 | This file is an append only ledger where each entry is a [bare](https://baremessages.org/) encoded log op of the following format:
 45 | 
 46 | 
 47 | ```
 48 | 
 49 | type Xid data<16>;
 50 | type Address data<32>;
 51 | 
 52 | type LogOp  (AddItem | RemoveItems | RecoverRemoved);
 53 |   
 54 | type AddItem {
 55 |   id: Xid
 56 |   metadata: VersionedItemMetadata 
 57 | }
 58 | 
 59 | type RemoveItems {
 60 |   items: []Xid
 61 | }
 62 | 
 63 | type RecoverRemoved {}
 64 | 
 65 | type VersionedItemMetadata = (V1VersionedItemMetadata | V2VersionedItemMetadata | V2VersionedItemMetadata)
 66 | 
 67 | type V1VersionedItemMetadata {
 68 |   // deprecated
 69 | }
 70 | 
 71 | type V2VersionedItemMetadata {
 72 |   // deprecated
 73 | }
 74 | 
 75 | type V3VersionedItemMetadata {
 76 |   primary_key_id: Xid,
 77 |   unix_timestamp_millis: u64,
 78 |   tree_height: usize,
 79 |   address: Address,
 80 |   encryped_metadata: data
 81 | }
 82 | 
 83 | struct V3SecretItemMetadata {
 84 |   plain_text_hash: data<32>
 85 |   send_key_id: Xid,
 86 |   hash_key_part_2: data<32>,
 87 |   tags: Map[String]String,
 88 | }
 89 | 
 90 | ```
 91 | 
 92 | It is important to note, all metadata like search tags are stored encrypted and are not 
 93 | readable without a master key or metadata key.
 94 | 
 95 | ### repo.lock
 96 | 
 97 | This lock is held exclusively during garbage collection and in a shared fashion
 98 | during operations that modify the repository.
 99 | 
100 | ### tx.lock
101 | 
102 | Bupstash uses `tx.lock` and `tx.wal` to coordinate crash safe edits across multiple files.
103 | 
104 | ### tx.wal
105 | 
106 | This file is a [bare](https://baremessages.org/) encoded WAL (write ahead log) with the following schema:
107 | 
108 | ```
109 | 
110 | type WalOp = Begin | End | CreateFile | WriteFileAt | Remove | Rename | Mkdir;
111 | 
112 | type Begin {
113 |   sequence_number: u64,
114 | };
115 | 
116 | type End {};
117 | 
118 | type CreateFile {
119 |   path: String,
120 |   data_size: Uint,
121 | };
122 | 
123 | type WriteFileAt {
124 |     path: String,
125 |     offset: Uint,
126 |     data_size: Uint,
127 | };
128 | 
129 | type Remove {
130 |     path: String,
131 | };
132 | 
133 | type Rename {
134 |   path: String,
135 |   to: String,
136 | };
137 |   
138 | type Mkdir {
139 |   path: String,
140 | };
141 | 
142 | ```
143 | 
144 | The final 32 bytes of the write ahead log are the blake3 hash of the previous file contents.
145 | 
146 | When bupstash needs to modify repository metadata, it first writes a wal file and flushes it to disk, then performs the given operations in sequence. On crash the operations
147 | will be replayed again preventing data loss.
148 | 
149 | Do not delete a write ahead log if you see one, it is critical for data integrity.
150 | 
151 | ### tx.seq
152 | 
153 | A file containing a sequence number used for numbering WAL files.
154 | 
155 | ### data/
156 | 
157 | This directory contains a set of encrypted and deduplicated data chunks.
158 | The name of the file corresponds to the an HMAC hash of the unencrypted contents, as such
159 | if two chunks are added to the repository with the same hmac, they only need to be stored once.
160 | 
161 | This directory is not used when the repository is configured for storage engines other than "Dir" storage.
162 | 
163 | ### items/
164 | 
165 | This directory contains one file for each item, where the contents of the file is an encoded
166 | `VersionedItemMetadata` as described in the repo.oplog section. When an item is removed and is 
167 | pending garbage collection it is given the .removed suffix.
168 | 
169 | ### meta/storage_engine
170 | 
171 | Contains the JSON storage engine specification, which allows storage of data chunks
172 | in external or alternative storage formats. This file is human editable to assist
173 | manual data migrations between supported formats.
174 | 
175 | ### meta/schema_version
176 | 
177 | This file contains schema version of a repository.
178 | 
179 | ### meta/gc_generation
180 | 
181 | Each time a garbage collection happens, this file is changed and is used to invalidate
182 | client side caches.
183 | 
184 | ### meta/gc_dirty
185 | 
186 | This file marks if a garbage collection was interrupted prematurely and is used for crash
187 | recovery. This file not always present.
188 | 
189 | ### wal/
190 | 
191 | When the BUPSTASH_KEEP_WAL=1 env var is set for the `bupstash serve` process, this
192 | directory contains the historic WAL files that can be used for point in time recovery.
193 | 
194 | ## The hash tree structure
195 | 
196 | Bupstash stores arbitrary streams of data in the repository by splitting the stream into chunks,
197 | hmac addressing the chunks, then compressing and encrypting the chunks with the public key portion of a bupstash key.
198 | Each chunk is then stored in the data directory in a file named after the hmac hash of the contents.
199 | As we generate a sequence of chunks with a corresponding hmac addresses,
200 | we can build a tree structure out of these addresses. Leaf nodes of the tree are simply the encrypted data. 
201 | Other nodes in the tree are simply unencrypted lists of hmac hashes, which may point to encrypted leaf nodes,
202 | or other subtrees. The key idea behind the hash tree, is we can convert an arbitrary stream of data
203 | into a single HMAC address with approximately equal sized chunks.
204 | When multiple hash trees are added to the repository, they share structure and enable deduplication.
205 | 
206 | This addressing and encryption scheme has some important properties:
207 | 
208 | - The repository owner *cannot* guess chunk contents as the HMAC key is unknown to him.
209 | - The repository owner *cannot* decrypt leaves of the hash tree, as they are encrypted.
210 | - The repository owner *can* iterate the hash tree for garbage collection purposes.
211 | - The repository owner *can* run garbage collection without retrieving the leaf nodes from cold storage.
212 | - The repository owner *can* push stream a of hash tree nodes to a client with no network round trips.
213 | - A client *can* send data streams to a repository without sharing the encryption key.
214 | - A client *can* retrieve and verify a datastream by checking hmacs.
215 | 
216 | These properties are desirable for enabling high performance garbage collection and data streaming
217 | with prefetch on the repository side.
218 | 
219 | ## Chunking and deduplication
220 | 
221 | Data is deduplicated by splitting a data stream into small chunks, and never storing the same chunk twice.
222 | The performance of this deduplication is thus determined by how chunks split points are defined. For curious
223 | readers - bupstash uses something known as 'content defined chunking' to find efficient chunk splits.
224 | 
225 | ## Chunk formats
226 | 
227 | Chunks in the database are one of the following types, in general we know the type of a chunk
228 | based on the item metadata and the hash tree height.
229 | 
230 | ### Encrypted data chunk
231 | 
232 | These chunks form the roots of our hash trees, they contain encrypted data. They contain
233 | a key exchange packet, with enough information for the master key to derive the session key.
234 | 
235 | ```
236 | KEY_EXCHANGE_PACKET1_BYTES[PACKET1_SZ] || ENCRYPTED_BYTES[...]
237 | ```
238 | 
239 | After decryption, the chunk is optionally compressed, so is either compressed data, or data with a null footer byte.
240 | 
241 | ```
242 | COMPRESSED_DATA[...] || DECOMPRESSED_SIZE[4] || COMPRESSION_TYPE[1]
243 | ```
244 | 
245 | or 
246 | 
247 | ```
248 | DATA[...] || 0x00
249 | ```
250 | 
251 | Valid compression types are:
252 | 
253 | - 1 == lz4 compression.
254 | - 2 == zstd compression.
255 | 
256 | ### Hash tree node chunk
257 | 
258 | These chunks form non leaf nodes in our hash tree, they consist of an array of addresses prefixed
259 | with the total number of data chunks that are beneath them in the tree.
260 | 
261 | ```
262 | NUM_DATA_CHUNKS[8] ADDRESS[ADDRESS_SZ]
263 | NUM_DATA_CHUNKS[8] ADDRESS[ADDRESS_SZ]
264 | NUM_DATA_CHUNKS[8] ADDRESS[ADDRESS_SZ]
265 | NUM_DATA_CHUNKS[8] ADDRESS[ADDRESS_SZ]
266 | ...
267 | ```
268 | 
269 | These addresses must be recursively followed to read our data chunks, these addresses correspond
270 | to data chunks when the tree height is 0. The chunk counts can be used to efficiently seek to address offsets
271 | in the tree.
272 | 
273 | ### Format of key exchange bytes
274 | 
275 | Coming soon...
276 | 
277 | ## SEE ALSO
278 | 
279 | bupstash(1)
280 | 


--------------------------------------------------------------------------------
/doc/man/bupstash-restore.1.md:
--------------------------------------------------------------------------------
  1 | bupstash-restore(1) 
  2 | ================
  3 | 
  4 | ## SYNOPSIS
  5 | 
  6 | Efficiently restore the contents of a snapshot into a local directory.
  7 | 
  8 | `bupstash restore [OPTIONS] --into $PATH QUERY... `
  9 | 
 10 | ## DESCRIPTION
 11 | 
 12 | `bupstash restore` performs an efficient set of incremental changes to
 13 | a directory such that it becomes identical to the requested snapshot.
 14 | The incremental nature of `bupstash restore` makes it well suited for
 15 | cycling between multiple similar snapshots. Note that this operation is dangerous
 16 | as it deletes extra files already present in the destination directory.
 17 | 
 18 | In order to aid file browsing as unprivileged users, `bupstash restore` does
 19 | not attempt to restore users, groups and xattrs by default. To set
 20 | these you must specify the flags --ownership and --xattrs respectively.
 21 | 
 22 | The item that is checked out is chosen based on a simple query against the 
 23 | tags specified when saving data with `bupstash put`.
 24 | 
 25 | ## QUERY LANGUAGE
 26 | 
 27 | For full documentation on the query language, see bupstash-query-language(7).
 28 | 
 29 | ## QUERY CACHING
 30 | 
 31 | The restore command uses the same query caching mechanisms as bupstash-list(1), check that page for
 32 | more information on the query cache.
 33 | 
 34 | ## SPARSE FILES
 35 | 
 36 | If a file was detected as sparse during 'put', the restore command will restore it as 
 37 | sparse, but it should be noted the restore command will not delete an existing file
 38 | with the correct checksum in order to recreate it as sparse.
 39 | 
 40 | ## OPTIONS
 41 | 
 42 | * --into PATH:
 43 |   Directory to restore files into, defaults to `BUPSTASH_RESTORE_DIR`.
 44 | 
 45 | * -r, --repository REPO:
 46 |   The repository to connect to, , may be of the form `ssh://$SERVER/$PATH` for
 47 |   remote repositories if ssh access is configured. If not specified, is set to `BUPSTASH_REPOSITORY`.
 48 | 
 49 | * -k, --key KEY:
 50 |   Key that will be used to decrypt data and metadata. If not set, defaults
 51 |   to `BUPSTASH_KEY`.
 52 | 
 53 | * --pick PATH:
 54 |   Pick a sub-directory of the snapshot to restore.
 55 | 
 56 | * --ownership:
 57 |   Set uid's and gid's.
 58 | 
 59 | * --xattrs:
 60 |   Set xattrs.
 61 | 
 62 | * --indexer-threads N:
 63 |   Number of processor threads to use for pipelined parallel file hashing and metadata reads.
 64 |   Defaults to the number of processors.
 65 | 
 66 | * --query-cache PATH:
 67 |   Path to the query-cache file, defaults to one of the following, in order, provided
 68 |   the appropriate environment variables are set, `$BUPSTASH_QUERY_CACHE`,
 69 |   `$XDG_CACHE_HOME/.cache/bupstash/bupstash.qcache` or `$HOME/.cache/bupstash/bupstash.qcache`.
 70 | 
 71 | * --utc-timestamps:
 72 |   Display and search against timestamps in utc time instead of local time.
 73 | 
 74 | * --no-progress:
 75 |   Suppress progress indicators (Progress indicators are also suppressed when stderr
 76 |   is not an interactive terminal).
 77 | 
 78 | * -q, --quiet:
 79 |   Be quiet, implies --no-progress.
 80 | 
 81 | ## ENVIRONMENT
 82 | 
 83 | * BUPSTASH_REPOSITORY:
 84 |   The repository to connect to. May be of the form `ssh://$SERVER/$PATH` for
 85 |   remote repositories if ssh access is configured.
 86 | 
 87 | * BUPSTASH_REPOSITORY_COMMAND:
 88 |   A command to run to connect to an instance of bupstash-serve(1). This 
 89 |   allows more complex connections to the repository for less common use cases.
 90 | 
 91 | * BUPSTASH_KEY:
 92 |   Path to the key that will be used for decrypting data and metadata.
 93 | 
 94 | * BUPSTASH_KEY_COMMAND:
 95 |   A command to run that must print the key data, can be used instead of BUPSTASH_KEY
 96 |   to fetch the key from arbitrary locations such as the network or other secret storage.
 97 | 
 98 | * BUPSTASH_QUERY_CACHE:
 99 |   Path to the query cache file to use.
100 | 
101 | * BUPSTASH_RESTORE_DIR:
102 |   Path to restore into, can be used instead of the --into argument.
103 | 
104 | ## EXAMPLES
105 | 
106 | ### Restore a snapshot into a local directory
107 | 
108 | ```
109 | $ bupstash restore --into ./dir id=ad8*
110 | ```
111 | 
112 | ### Restore including users and groups
113 | 
114 | ```
115 | $ bupstash restore --ownership --into ./dir id=ad8*
116 | ```
117 | 
118 | ### Restore a sub directory of the snapshot
119 | 
120 | ```
121 | $ bupstash restore --into ./dir --pick sub/dir id=ad8*
122 | ```
123 | 
124 | ## SEE ALSO
125 | 
126 | bupstash(1), bupstash-get(1), bupstash-list(1), bupstash-keyfiles(7), bupstash-query-language(7)
127 | 


--------------------------------------------------------------------------------
/doc/man/bupstash-rm.1.md:
--------------------------------------------------------------------------------
  1 | bupstash-rm(1) 
  2 | ==============
  3 | 
  4 | ## SYNOPSIS
  5 | 
  6 | Remove items from a bupstash repository.
  7 | 
  8 | `bupstash rm [OPTIONS] QUERY... `
  9 | 
 10 | ## DESCRIPTION
 11 | 
 12 | `bupstash rm` removes items from a bupstash repository.
 13 | 
 14 | Items that are removed are not immediately deleted, instead the deletion and 
 15 | space reclamation is scheduled for the next time the garbage collector bupstash-gc(1)
 16 | is run.
 17 | 
 18 | Only the metadata needs to be decrypted to remove items, so a metadata key is sufficient
 19 | for item deletion, even without access to the data decryption key.
 20 | 
 21 | ## QUERY LANGUAGE
 22 | 
 23 | For full documentation on the query language, see bupstash-query-language(7).
 24 | 
 25 | ### Remove query examples
 26 | ```
 27 | $ id=$(bupstash put ./some-data)
 28 | 
 29 | $ bupstash rm id=$id
 30 | 
 31 | $ bupstash rm name=backups.tar
 32 | 
 33 | $ bupstash rm --allow-many name='*.tar' and older-than 30d
 34 | 
 35 | $ bupstash rm --allow-many id="*"
 36 | ```
 37 | 
 38 | ## QUERY CACHING
 39 | 
 40 | The rm command uses the same query caching mechanisms as bupstash-list(1), check that page for
 41 | more information on the query cache.
 42 | 
 43 | ## OPTIONS
 44 | 
 45 | * -r, --repository REPO:
 46 |   The repository to connect to. May be of the form `ssh://$SERVER/$PATH` for
 47 |   remote repositories if ssh access is configured. If not specified, is set to `BUPSTASH_REPOSITORY`.
 48 | 
 49 | * -k, --key KEY:
 50 |   Key used to decrypt metadata when executing a query. If not set, defaults
 51 |   to `BUPSTASH_KEY`.
 52 | 
 53 | * --query-cache PATH:
 54 |   Path to the query-cache file, defaults to one of the following, in order, provided
 55 |   the appropriate environment variables are set, `$BUPSTASH_QUERY_CACHE`,
 56 |   `$XDG_CACHE_HOME/.cache/bupstash/bupstash.qcache` or `$HOME/.cache/bupstash/bupstash.qcache`.
 57 | 
 58 | * --query-encrypted:
 59 |   The query will not decrypt any metadata, allowing you to
 60 |   list items you do not have a decryption key for.
 61 |   This option inserts the pseudo query tag 'decryption-key-id'.
 62 | 
 63 | * --ids-from-stdin:
 64 |   Remove items with IDs read from stdin, one per line, instead of executing a query.
 65 | 
 66 | * --allow-many:
 67 |   By default bupstash refuses to remove multiple items from a single query, this flag
 68 |   disables that safety feature.
 69 | 
 70 | * --utc-timestamps:
 71 |   Display and search against timestamps in utc time instead of local time.
 72 | 
 73 | * --no-progress:
 74 |   Suppress progress indicators (Progress indicators are also suppressed when stderr
 75 |   is not an interactive terminal).
 76 | 
 77 | * -q, --quiet:
 78 |   Be quiet, implies --no-progress.
 79 | 
 80 | ## ENVIRONMENT
 81 | 
 82 | * BUPSTASH_REPOSITORY:
 83 |   The repository to connect to. May be of the form `ssh://$SERVER/$PATH` for
 84 |   remote repositories if ssh access is configured.
 85 | 
 86 | * BUPSTASH_REPOSITORY_COMMAND:
 87 |   A command to run to connect to an instance of bupstash-serve(1). This 
 88 |   allows more complex connections to the repository for less common use cases.
 89 | 
 90 | * BUPSTASH_KEY:
 91 |   Path to a primary key that will be used for decrypting data and metadata.
 92 | 
 93 | * BUPSTASH_KEY_COMMAND:
 94 |   A command to run that must print the key data, can be used instead of BUPSTASH_KEY
 95 |   to fetch the key from arbitrary locations such as the network or other secret storage.
 96 | 
 97 | * BUPSTASH_QUERY_CACHE:
 98 |   Path to the query cache file to use.
 99 | 
100 | ## EXAMPLES
101 | 
102 | ### remove an item with a specific id from the repository
103 | 
104 | ```
105 | $ bupstash rm id=14ebd2073b258b1f55c5bbc889c49db4 
106 | ```
107 | 
108 | ### remove all items from the respository
109 | 
110 | ```
111 | $ bupstash rm id="*" 
112 | ```
113 | 
114 | ### remove items with name backup.tar that are older than 30 days
115 | 
116 | ```
117 | $ bupstash rm name=backup.tar and older-than 30d
118 | ```
119 | 
120 | ### remove items with a custom script
121 | 
122 | ```
123 |  $ bupstash list --format=jsonl1 \
124 |     | custom-json-filter \
125 |     | bupstash rm --ids-from-stdin
126 | ```
127 | 
128 | ## SEE ALSO
129 | 
130 | bupstash(1), bupstash-list(1),  bupstash-gc(1),  bupstash-query-language(7)
131 | 


--------------------------------------------------------------------------------
/doc/man/bupstash-serve.1.md:
--------------------------------------------------------------------------------
 1 | bupstash-serve(1) 
 2 | ================
 3 | 
 4 | ## SYNOPSIS
 5 | 
 6 | Serve the bupstash protocol over stdin/stdout.
 7 | 
 8 | `bupstash serve [OPTIONS] REPOSITORY`
 9 | 
10 | ## DESCRIPTION
11 | 
12 | `bupstash serve` serves the bupstash protocol over stdin/stdout allowing
13 | interaction with a repository. Most bupstash commands operate via an instance of bupstash serve.
14 | 
15 | The serve command has flags that can be set to restrict access permissions, by default
16 | all access is permitted until the first --allow-* option is provided. These flags can be enforced
17 | ssh authorized_keys file, or through mechanisms such as `sudo` or `doas` configuration.
18 | 
19 | Clients with permission to read data may still not be able to decrypt it unless they 
20 | have the correct client side decryption key.
21 | 
22 | Note that many errors are printed out of band via stderr, so alternative transports should consider
23 | how to also forward stderr data.
24 | 
25 | ## OPTIONS
26 | 
27 | * --allow-init:
28 |   Allow the client to initialize new repositories.
29 | * --allow-put:
30 |   Allow client to put more items into the repository.
31 | * --allow-list:
32 |   Allow client to retrieve metadata and snapshot indexes for search and listing.
33 | * --allow-get:
34 |   Allow client to retrieve data from the repository, implies --allow-list.
35 | * --allow-remove:
36 |   Allow client to remove repository items, implies --allow-list.
37 | * --allow-gc:
38 |   Allow client to run the repository garbage collector.
39 | * --allow-sync:
40 |   Allow client to sync items into the repository, i.e. be the destination of a repository sync.
41 | 
42 | ## EXAMPLES
43 | 
44 | 
45 | ### Custom ssh flags using BUPSTASH_REPOSITORY_COMMAND
46 | 
47 | ```
48 | $ export BUPSTASH_REPOSITORY_COMMAND="ssh -p 2020 $SERVER bupstash serve /data/repository"
49 | $ bupstash list
50 | ```
51 | 
52 | ### Setup SSH access controls
53 | 
54 | Create a 'backups' user on your server.
55 | 
56 | In an your sshd config file in your server add the line:
57 | 
58 | ```
59 | Match User backups
60 |     ForceCommand "/bin/bupstash-put-force-command.sh"
61 | ```
62 | 
63 | Create /bin/bupstash-put-force-command.sh on your server:
64 | 
65 | ```
66 | $ echo 'exec bupstash serve --allow-put /home/backups/bupstash-backups' > bupstash-put-force-command.sh
67 | $ sudo cp bupstash-put-force-command.sh /bin/bupstash-put-force-command.sh
68 | $ sudo chown root:root /bin/bupstash-put-force-command.sh
69 | $ sudo chmod +x /bin/bupstash-put-force-command.sh
70 | ```
71 | 
72 | Now any client with ssh access to the 'backups' user will only be able to add new backups to one repository:
73 | 
74 | ```
75 | $ export BUPSTASH_REPOSITORY="ssh://backups@$SERVER"
76 | $ bupstash put ./data
77 | d1659c3f56f744c7767fc57da003ee5d
78 | $ bupstash list
79 | server has disabled query and search for this client
80 | ```
81 | 
82 | Logging into the server via other means will have full access to the backups repository. Different 
83 | permissions can be configured using similar concepts along side different ssh configurations and keys.
84 | 
85 | ## SEE ALSO
86 | 
87 | bupstash(1), bupstash-repository(7)
88 | 


--------------------------------------------------------------------------------
/doc/man/bupstash-sync.1.md:
--------------------------------------------------------------------------------
  1 | bupstash-sync(1) 
  2 | ================
  3 | 
  4 | ## SYNOPSIS
  5 | 
  6 | Sync items and data from one bupstash repository to another.
  7 | 
  8 | `bupstash sync [OPTIONS] --to $REMOTE [QUERY...]`
  9 | 
 10 | ## DESCRIPTION
 11 | 
 12 | `bupstash sync` copies items and data from one repository to another while
 13 | attempting to minimize unnecessary bandwidth usage.
 14 | 
 15 | A typical use of this command is to backup files to a local repository (e.g. and external drive) while also efficiently
 16 | uploading them to an offsite location for safe storage.
 17 | 
 18 | Note that when no query is specified all items are synced, even those that do not match the current bupstash key.
 19 | 
 20 | 
 21 | ## QUERY LANGUAGE
 22 | 
 23 | For full documentation on the query language, see bupstash-query-language(7).
 24 | 
 25 | ## QUERY CACHING
 26 | 
 27 | The sync command uses the same query caching mechanisms as bupstash-list(1), check that page for
 28 | more information on the query cache.
 29 | 
 30 | ## OPTIONS
 31 | 
 32 | * -r, --repository REPO:
 33 |   The repository to sync from. May be of the form `ssh://$SERVER/$PATH` for
 34 |   remote repositories if ssh access is configured. If not specified, is set to `BUPSTASH_REPOSITORY`.
 35 | 
 36 | * --to REPO:
 37 |   The destination repository to sync items to. May be of the form `ssh://$SERVER/$PATH` for
 38 |   remote repositories if ssh access is configured. If not specified, is set to `BUPSTASH_TO_REPOSITORY`.
 39 | 
 40 | * -k, --key KEY:
 41 |   Key used to decrypt metadata when executing a query. If not set, defaults
 42 |   to `BUPSTASH_KEY`.
 43 | 
 44 | * --query-cache PATH:
 45 |   Path to the query-cache file, defaults to one of the following, in order, provided
 46 |   the appropriate environment variables are set, `$BUPSTASH_QUERY_CACHE`,
 47 |   `$XDG_CACHE_HOME/.cache/bupstash/bupstash.qcache` or `$HOME/.cache/bupstash/bupstash.qcache`.
 48 | 
 49 | * --query-encrypted:
 50 |   The query will not decrypt any metadata, allowing you to
 51 |   list items you do not have a decryption key for.
 52 |   This option inserts the pseudo query tag 'decryption-key-id'.
 53 | 
 54 | * --ids-from-stdin:
 55 |   Sync items with IDs read from stdin, one per line, instead of executing a query.
 56 | 
 57 | * --utc-timestamps:
 58 |   Display and search against timestamps in utc time instead of local time.
 59 | 
 60 | * --no-progress:
 61 |   Suppress progress indicators (Progress indicators are also suppressed when stderr
 62 |   is not an interactive terminal).
 63 | 
 64 | * -q, --quiet:
 65 |   Be quiet, implies --no-progress.
 66 | 
 67 | ## ENVIRONMENT
 68 | 
 69 | * BUPSTASH_REPOSITORY:
 70 |   The repository to pull items from. May be of the form `ssh://$SERVER/$PATH` for
 71 |   remote repositories if ssh access is configured.
 72 | 
 73 | * BUPSTASH_REPOSITORY_COMMAND:
 74 |   A command to run to connect to an instance of bupstash-serve(1). This 
 75 |   allows more complex connections to the repository for less common use cases.
 76 | 
 77 | * BUPSTASH_TO_REPOSITORY:
 78 |   The repository to sync items to. May be of the form `ssh://$SERVER/$PATH` for
 79 |   remote repositories if ssh access is configured.
 80 | 
 81 | * BUPSTASH_TO_REPOSITORY_COMMAND:
 82 |   A command to run to connect to an instance of bupstash-serve(1). This 
 83 |   allows more complex connections to the repository for less common use cases.
 84 | 
 85 | * BUPSTASH_KEY:
 86 |   Path to a primary key that will be used for decrypting data and metadata.
 87 | 
 88 | * BUPSTASH_KEY_COMMAND:
 89 |   A command to run that must print the key data, can be used instead of BUPSTASH_KEY
 90 |   to fetch the key from arbitrary locations such as the network or other secret storage.
 91 | 
 92 | * BUPSTASH_QUERY_CACHE:
 93 |   Path to the query cache file to use.
 94 | 
 95 | ## EXAMPLES
 96 | 
 97 | ### Push all items from a local repository to a remote repository
 98 | 
 99 | ```
100 | $ bupstash sync --repository ./local-repository --to ssh://$REMOTE
101 | ```
102 | 
103 | ### Perform a backup locally then sync a copy to a remote repository
104 | 
105 | ```
106 | $ export BUPSTASH_REPOSITORY=./local-repository
107 | $ id="$(bupstash put ./some-files)"
108 | $ bupstash sync --to ssh://$REMOTE id="$id"
109 | ```
110 | 
111 | ## SEE ALSO
112 | 
113 | bupstash(1), bupstash-query-language(7)
114 | 


--------------------------------------------------------------------------------
/doc/man/bupstash.1.md:
--------------------------------------------------------------------------------
  1 | bupstash(1) 
  2 | ===========
  3 | 
  4 | ## SYNOPSIS
  5 | 
  6 | Bupstash encrypted and deduplicated backups.
  7 | 
  8 | Run one of the following `bupstash` subcommands.
  9 | 
 10 | `bupstash init ...`<br>
 11 | `bupstash new-key ...`<br>
 12 | `bupstash new-sub-key ...`<br>
 13 | `bupstash put ...`<br>
 14 | `bupstash list ...`<br>
 15 | `bupstash list-contents ...`<br>
 16 | `bupstash diff ...`<br>
 17 | `bupstash get ...`<br>
 18 | `bupstash restore ...`<br>
 19 | `bupstash rm ...`<br>
 20 | `bupstash recover-removed ...`<br>
 21 | `bupstash gc ...`<br>
 22 | `bupstash sync ...`<br>
 23 | `bupstash exec-with-locks ...`<br>
 24 | `bupstash serve ...`<br>
 25 | `bupstash help ...`<br>
 26 | `bupstash version ...`<br>
 27 | 
 28 | ## DESCRIPTION
 29 | 
 30 | ```bupstash``` is a tool for storing (and retrieving)
 31 | files and data in an encrypted bupstash-repository(7).
 32 | 
 33 | Some notable features of ```bupstash``` include:
 34 | 
 35 | * Automatic deduplication of stored data.
 36 | * Client side encryption of data.
 37 | * Incremental file uploads.
 38 | * A tag based query language.
 39 | * Optional role based encryption and decryption key separation.
 40 | * Remote repositories over ssh ssh.
 41 | * Optional, per ssh key access repository controls.
 42 | * A multi layered approach to security.
 43 | 
 44 | The ```bupstash``` tool itself is divided into subcommands
 45 | that each have their own documentation.
 46 | 
 47 | ## SUBCOMMANDS
 48 | 
 49 | * bupstash-init(1):
 50 |   Initialize a bupstash repository.
 51 | * bupstash-new-key(1):
 52 |   Create a new primary key for creating/reading repository items.
 53 | * bupstash-new-sub-key(1):
 54 |   Derive a sub key for a subset of operations.
 55 | * bupstash-put(1):
 56 |   Add data to a bupstash repository.
 57 | * bupstash-get(1):
 58 |   Fetch data from the bupstash repository matching a query.
 59 | * bupstash-restore(1):
 60 |   Restore a snapshot into a local directory.
 61 | * bupstash-list(1):
 62 |   List repository items matching a given query.
 63 | * bupstash-list-contents(1):
 64 |   List directory snapshot contents.
 65 | * bupstash-diff(1):
 66 |   Diff snapshot contents.
 67 | * bupstash-rm(1):
 68 |   Remove repository items matching a given query.
 69 | * bupstash-recover-removed(1):
 70 |   Recover removed items that are pending garbage collection.
 71 | * bupstash-gc(1):
 72 |   Reclaim diskspace in a repository.
 73 | * bupstash-sync(1):
 74 |   Sync items between repositories.
 75 | * bupstash-exec-with-locks(1):
 76 |   Exec a command with exclusive locks held on the repository.
 77 | * bupstash-serve(1):
 78 |   Serve a repository over stdin/stdout using the bupstash-protocol(7).
 79 | 
 80 | ## EXAMPLES
 81 | 
 82 | 
 83 | ### Initialize a repository and create keys
 84 | ```
 85 | $ bupstash init -r ssh://$SERVER/home/me/backups
 86 | $ bupstash new-key -o backups.key
 87 | ```
 88 | 
 89 | ### Tell bupstash to use our repository and key by default
 90 | 
 91 | ```
 92 | $ export BUPSTASH_REPOSITORY=ssh://$SERVER/home/me/backups
 93 | $ export BUPSTASH_KEY=backups.key
 94 | ```
 95 | 
 96 | ### Directory snapshots
 97 | 
 98 | ```
 99 | $ bupstash put ./some-data
100 | ebb66f3baa5d432e9f9a28934888a23d
101 | 
102 | $ bupstash list-contents id=ebb66f3baa5d432e9f9a28934888a23d
103 | drwxr-xr-x 0    2020/11/05 10:42:48 .
104 | -rw-r--r-- 177B 2020/07/12 17:13:42 data.txt
105 | ```
106 | 
107 | ### List items matching a query
108 | 
109 | ```
110 | $ bupstash list hostname=$(hostname)
111 | id="bcb8684e6bf5cb453e77486decf61685" name="some-file.txt" hostname="my-server" timestamp="2020-07-27 11:26:16"
112 | ...
113 | ```
114 | 
115 | ### Incremental uploads
116 | 
117 | ```
118 | $ bupstash put --send-log /var/backup.sendlog ./some-data
119 | ebb66f3baa5d432e9f9a28934888a23d
120 | 
121 | # Second backup is much faster when it reads the send log.
122 | $ bupstash put --send-log /var/backup.sendlog ./some-data
123 | ebb66f3baa5d432e9f9a28934888a23d
124 | ```
125 | 
126 | ### Capture and save command output
127 | 
128 | ```
129 | # Checks for errors before saving new item.
130 | $ bupstash put --exec name=database.sql pgdump mydatabase
131 | 14ebd2073b258b1f55c5bbc889c49db4
132 | ```
133 | 
134 | ### Get an item matching a query
135 | ```
136 | $ bupstash get id=bcb8684e6bf5cb453e77486decf61685
137 | some data.
138 | ```
139 | 
140 | ### Restore a directory to a previous snapshot
141 | 
142 | ```
143 | $ bupstash restore --to ./dir name=dir.tar
144 | ```
145 | 
146 | ### Remove items matching a query.
147 | ```
148 | $ bupstash rm name=some-data.txt
149 | ```
150 | 
151 | ### Wipe a repository
152 | 
153 | ```
154 | $ bupstash rm --allow-many id=*
155 | ```
156 | 
157 | ### Reclaim disk space
158 | ```
159 | $ bupstash gc
160 | ```
161 | 
162 | ### Offline decryption keys
163 | ```
164 | # Create a key, a put only key, and a metadata (list/rm only) key.
165 | $ bupstash new-key -o backups.key
166 | $ bupstash new-sub-key --put -k backups.key -o backups-put.key
167 | $ bupstash new-sub-key --list -k backups.key -o backups-metadata.key
168 | 
169 | ... Copy backups.key to secure offline storage ...
170 | 
171 | # Remove primary key
172 | $ shred backups.key
173 | 
174 | $ bupstash put -k backups-put.key ./data
175 | 14ebd2073b258b1f55c5bbc889c49db4
176 | 
177 | ... When you need to list or remove backups, you may use the metadata key ...
178 | 
179 | $ bupstash list -k backups-metadata.key
180 | ...
181 | $ bupstash rm -k backups-metadata.key 
182 | 
183 | ... After emergency, get decryption key from offline storage ...
184 | 
185 | # Restore by getting an item and decrypting it using the decryption key.
186 | $ bupstash get -k backups.key id=14ebd2073b258b1f55c5bbc889c49db4 | tar -C ./restore -xf - 
187 | ```
188 | 
189 | ## SEE ALSO
190 | 
191 | bupstash-repository(7), bupstash-keyfiles(7)
192 | 


--------------------------------------------------------------------------------
/doc/technical_overview.md:
--------------------------------------------------------------------------------
 1 | # High level implementation overview
 2 | 
 3 | ## What does bupstash do?
 4 | 
 5 | Bupstash ingests arbitrary data streams, deduplicates, encrypts and saves them in a local or remote repository.
 6 | Bupstash also can convert filesystems on disk into a data stream transparently for the user.
 7 | 
 8 | The bupstash repository contains very little unencrypted data, it stores only encrypted data chunks, and encrypted
 9 | metadata.
10 | 
11 | Bupstash does not transmit decryption keys to the server at any point.
12 | 
13 | ## Deduplication
14 | 
15 | - Bupstash splits an input stream into data chunks less than 8 MiB.
16 | - Each chunk has a keyed blake3 hash computed, this is the address of the chunk.
17 | - Previous sends are tracked in the client side 'send log', an sqlite database, backing up the same data
18 |   twice in a row only transmits new data chunks.
19 | - If the server sees repeat hash address, it does not persist the repeat data either.
20 | 
21 | Quality of deduplication depends on how we split the data stream into chunks.
22 | We want our data chunks to be resilient to byte insertions or removals, so we use
23 | a rolling hash function to identify common split points between upload sessions.
24 | 
25 | We currently use a rolling hash function called 'gear hash'. It hashes a 32 byte rolling window on
26 | the data stream and we form a new chunk if the gear hash matches an 'interestingness' property (see rollsum.rs for details).
27 | 
28 | ## Encryption
29 | 
30 | - We use libsodium cryptobox to encrypt each data chunk.
31 | - Each upload session encrypts chunks with an ephemeral public/private key pair.
32 | - The encryption is addressed at the private portion of the decryption key, think of this like
33 |   sending an encrypted email to someone when you know their public key.
34 | - Each chunk has the ephemeral public key attached such that the session key can be derived
35 |   by the master key. 
36 | - A bupstash key is actually multiple libsodium key pairs and some preshared secrets, allowing us to divide decryption
37 |   capabilities amongst sub keys.
38 | - We also encrypt metadata before sending it to the repository in an append only log.
39 | - Client side query works by syncing the metadata log then decrypting it client side.
40 | 
41 | ## Hash tree
42 | 
43 | When uploading data streams larger than a single chunk, we must group them. To do this we 
44 | form a merkle tree, only the leaf data nodes are encrypted.
45 | 
46 | - Each non leaf chunk in the hash tree is simply a list of addresses and an chunk count bread crumb for random access.
47 | - Because the hash tree is mostly unencrypted, server can push stream the tree.
48 | - Because the hash tree is mostly unencrypted, server can perform garbage collection.
49 | - Data is still encrypted so server only knows approximate data stream size.
50 | 
51 | ## Content index
52 | 
53 | Pure data streams are not enough to allow an efficient file 'browsing' user interface, to
54 | solve this, each data stream has an optional auxillary index data stream.
55 | The index is a hash tree containing an efficient index of the data.
56 | 
57 | - A client can fetch and decrypt the index quickly.
58 | - The index allows partial data requests of the files within a data stream.
59 | - When the user requests a data stream, we first check if there is an index,
60 |   if there is, we synthesize a tarball stream client side out of the index and data stream.
61 | 
62 | ## Stat cache
63 | 
64 | When converting a filesystem to a data stream and index, we can cache the hashes of a given
65 | file/directory based on stat information, allowing us to skip the expensive compression and encryption step.
66 | This cache information is stored in the send log.
67 | 
68 | ## Repository Garbage collection
69 | 
70 | - Garbage collection is a partially concurrent mark and sweep collector.
71 | - Garbage collections invalidate client side caches, except for when the client checks if a previous backup item still exists.
72 | 


--------------------------------------------------------------------------------
/doc/upcoming_changelog.md:
--------------------------------------------------------------------------------
 1 | # Bupstash v0.12.0
 2 | 
 3 | We are proud to bring you the next iteration of bupstash, this update contains a large amount of
 4 | performance work as well as important bug fixes.
 5 | 
 6 | As an example of the potential performance improvements, here is a synthetic benchmark snapshotting the linux
 7 | kernel source code on a development machine:
 8 | 
 9 | ```
10 | Benchmark 1: bupstash-0.11.1 put --no-send-log /tmp/linux
11 |   Time (mean ± σ):      5.885 s ±  0.091 s    [User: 6.099 s, System: 1.766 s]
12 |   Range (min … max):    5.701 s …  6.016 s    10 runs
13 |  
14 | Benchmark 2: bupstash-0.12.0 put --no-send-log /tmp/linux
15 |   Time (mean ± σ):      1.884 s ±  0.014 s    [User: 7.334 s, System: 1.340 s]
16 |   Range (min … max):    1.862 s …  1.908 s    10 runs
17 |  
18 | Summary
19 |   'bupstash-0.12.0 put --no-send-log /tmp/linux' ran
20 |     3.12 ± 0.05 times faster than 'bupstash put --no-send-log /tmp/linux'
21 | ```
22 | 
23 | You read that right, a 3x speed improvement! It is also important to keep in mind bupstash is often limited
24 | by disk, network and send-log speeds rather than the cpu and ram performance that this benchmark measures.
25 | 
26 | ## New features
27 | 
28 | - The deduplication rolling hash algorithm has been improved and is now 30 to 50 percent faster.
29 | - Those using a nightly rust compiler can enable SIMD (even faster) rolling hash implementations.
30 | - A multithreaded `bupstash put` implementation has been added that can read, hash, compress, encrypt files in parallel.
31 | - Bupstash diff and bupstash restore can now use multiple threads when computing changes on the local disk.
32 | - New flags have been added to `bupstash put` and other commands to tune how bupstash uses cpu threads.
33 | 
34 | ## Notable Bug fixes
35 | 
36 | - A bug that caused `bupstash serve` to not detect io errors in certain situations has been fixed.
37 |   Only the server side needs to be updated, so users of bupstash.io managed repositories do not need to take
38 |   further action.
39 | 
40 | ## Incompatibilities
41 | 
42 | - It is likely your repositories will grow in size if they contain data chunks from previous
43 |   versions of bupstash, this can be fixed by cycling older data out over time, or recreating your backups.
44 | - Because the chunks generated by bupstash will change, expect increased bandwidth usage until
45 |   the new chunks are uploaded.
46 | - bupstash is now biased to produce 2 MiB data chunks on average instead of 1 MiB chunks (in the future this  may become configurable).
47 | 
48 | ## Supporting bupstash
49 | 
50 | Bupstash.io managed repositories are in open beta and anyone can create an account.
51 | If you enjoy bupstash then please consider creating a managed repository at https://bupstash.io/managed.html
52 | to support the project.
53 | 
54 | Another great way to help the project is to just tell your friends to give bupstash a try.


--------------------------------------------------------------------------------
/src/abloom.rs:
--------------------------------------------------------------------------------
  1 | use super::address;
  2 | use std::convert::TryInto;
  3 | 
  4 | // ABloom is a bloom filter specialized for addresses by taking advantage of the
  5 | // fact that addresses are already randomly distributed.
  6 | //
  7 | // See https://en.wikipedia.org/wiki/Bloom_filter
  8 | 
  9 | #[derive(Debug, PartialEq, Eq)]
 10 | pub struct ABloom {
 11 |     nbits: u64,
 12 |     bytes: Vec<u8>,
 13 | }
 14 | 
 15 | // k is the number of hash functions in the bloom filter.
 16 | const K: usize = 4;
 17 | 
 18 | fn count_set_bits(bytes: &[u8]) -> u64 {
 19 |     let mut n: u64 = 0;
 20 |     for b in bytes.iter() {
 21 |         n += b.count_ones() as u64;
 22 |     }
 23 |     n
 24 | }
 25 | 
 26 | pub fn approximate_mem_size_upper_bound(false_postive_rate: f64, num_addrs: u64) -> usize {
 27 |     // see wiki: Optimal number of hash functions...
 28 |     // > Goel and Gupta,[9] however, give a rigorous upper bound
 29 |     // > that makes no approximations and requires no assumptions.
 30 |     // false_positives = (1 - e ^ (-k*n/m))^k
 31 |     // If we rearrange we get:
 32 |     // m = -k*n/ln(1 - root(k, false_positives))
 33 |     let k = K as f64;
 34 |     let n = num_addrs as f64;
 35 |     let e = false_postive_rate;
 36 |     let m = (-k * n) / ((1.0 - e.powf(1.0 / k)).ln());
 37 |     (m / 8.0) as usize // bits to bytes.
 38 | }
 39 | 
 40 | impl ABloom {
 41 |     pub fn new(mut mem_size: usize) -> ABloom {
 42 |         if mem_size == 0 {
 43 |             mem_size = 1;
 44 |         }
 45 | 
 46 |         ABloom {
 47 |             nbits: (mem_size as u64) * 8,
 48 |             bytes: vec![0; mem_size],
 49 |         }
 50 |     }
 51 | 
 52 |     pub fn from_bytes(bytes: Vec<u8>) -> ABloom {
 53 |         ABloom {
 54 |             nbits: (bytes.len() as u64) * 8,
 55 |             bytes,
 56 |         }
 57 |     }
 58 | 
 59 |     pub fn mem_size(&self) -> usize {
 60 |         self.bytes.len()
 61 |     }
 62 | 
 63 |     pub fn borrow_bytes(&self) -> &[u8] {
 64 |         &self.bytes
 65 |     }
 66 | 
 67 |     pub fn num_bits(&self) -> u64 {
 68 |         self.nbits
 69 |     }
 70 | 
 71 |     pub fn count_set_bits(&self) -> u64 {
 72 |         count_set_bits(&self.bytes)
 73 |     }
 74 | 
 75 |     pub fn utilization(&self) -> f64 {
 76 |         let n = count_set_bits(&self.bytes);
 77 |         (n as f64) / (self.nbits as f64)
 78 |     }
 79 | 
 80 |     // Like utilization but uses a small sample
 81 |     // to become a constant time operation.
 82 |     pub fn estimate_utilization(&self) -> f64 {
 83 |         const SAMPLE_ESTIMATE_BYTES: usize = 1024 * 1024;
 84 |         let sample_size = std::cmp::min(SAMPLE_ESTIMATE_BYTES, self.bytes.len());
 85 |         let n = count_set_bits(&self.bytes[0..sample_size]);
 86 |         (n as f64) / ((sample_size * 8) as f64)
 87 |     }
 88 | 
 89 |     pub fn estimate_false_positive_rate(&self) -> f64 {
 90 |         const N: u64 = 10000;
 91 |         let mut false_positives = 0;
 92 |         for _i in 0..N {
 93 |             if self.probably_has(&address::Address::random()) {
 94 |                 false_positives += 1;
 95 |             }
 96 |         }
 97 |         (false_positives as f64) / (N as f64)
 98 |     }
 99 | 
100 |     pub fn estimate_add_count(&self) -> f64 {
101 |         let m = self.nbits as f64;
102 |         let x = self.count_set_bits() as f64;
103 |         let k = K as f64;
104 |         //Refer to bloom filter wiki: Approximating the number of items in a Bloom filter.
105 |         (-m / k) * (1.0 - (x / m)).ln()
106 |     }
107 | 
108 |     pub fn add(&mut self, addr: &address::Address) {
109 |         for i in 0..K {
110 |             let offset_buf = addr.bytes[i * 8..(i * 8 + 8)].try_into().unwrap();
111 |             let bit_offset: u64 = u64::from_le_bytes(offset_buf) % self.nbits;
112 |             let shift = bit_offset & 7;
113 |             let byte_offset: usize = ((bit_offset & !7) >> 3).try_into().unwrap();
114 |             self.bytes[byte_offset] |= 1 << shift;
115 |         }
116 |     }
117 | 
118 |     pub fn probably_has(&self, addr: &address::Address) -> bool {
119 |         for i in 0..K {
120 |             let offset_buf = addr.bytes[i * 8..(i * 8 + 8)].try_into().unwrap();
121 |             let bit_offset: u64 = u64::from_le_bytes(offset_buf) % self.nbits;
122 |             let shift = bit_offset & 7;
123 |             let byte_offset: usize = ((bit_offset & !7) >> 3).try_into().unwrap();
124 |             if (self.bytes[byte_offset] & (1 << shift)) == 0 {
125 |                 return false;
126 |             }
127 |         }
128 |         true
129 |     }
130 | }
131 | 
132 | #[cfg(test)]
133 | mod tests {
134 |     use super::super::address;
135 |     use super::super::crypto;
136 |     use super::*;
137 | 
138 |     #[test]
139 |     fn test_abloom() {
140 |         crypto::init();
141 | 
142 |         let mut abloom = ABloom::new(8 * 1024 * 1024);
143 | 
144 |         for _i in 0..10000 {
145 |             let addr = address::Address::random();
146 |             abloom.add(&addr);
147 |             assert!(abloom.probably_has(&addr));
148 |         }
149 |     }
150 | 
151 |     #[test]
152 |     fn test_approximate_mem_size() {
153 |         crypto::init();
154 |         for n in [20000, 100000].iter() {
155 |             for p in [0.01, 0.05, 0.1, 0.5].iter() {
156 |                 let mut abloom = ABloom::new(approximate_mem_size_upper_bound(*p, *n));
157 | 
158 |                 for _i in 0..*n {
159 |                     let addr = address::Address::random();
160 |                     abloom.add(&addr);
161 |                     assert!(abloom.probably_has(&addr));
162 |                 }
163 | 
164 |                 let estimated_false_positives = abloom.estimate_false_positive_rate();
165 |                 let prediction_delta = *p - estimated_false_positives;
166 | 
167 |                 eprintln!("n={}", n);
168 |                 eprintln!("p={}", p);
169 |                 eprintln!("mem_size={}", abloom.mem_size());
170 |                 eprintln!(
171 |                     "estimated_false_positive_rate={}",
172 |                     estimated_false_positives,
173 |                 );
174 |                 eprintln!("estimated_add_count={}", abloom.estimate_add_count());
175 |                 eprintln!("utilization={}", abloom.utilization());
176 |                 eprintln!("estimated_utilization={}", abloom.estimate_utilization());
177 |                 eprintln!("prediction_delta={}", prediction_delta);
178 |                 // This test relies on probabilities to pass, if it is flaky, we can tune it.
179 |                 assert!(prediction_delta < 0.020);
180 |             }
181 |         }
182 |     }
183 | }
184 | 


--------------------------------------------------------------------------------
/src/acache.rs:
--------------------------------------------------------------------------------
 1 | use super::address::*;
 2 | use std::convert::TryInto;
 3 | 
 4 | // ACache is the 'address cache' designed to let bupstash
 5 | // efficiently skip operations on addresses while also
 6 | // keeping a bound on memory use.
 7 | //
 8 | // The current implementation is a direct mapped cache. On hash collision
 9 | // a value simply evicts the existing value. We could use something fancier like an lru,
10 | // but we need benchmarks to show it improves anything over such a simple implementaion.
11 | 
12 | pub struct ACache {
13 |     dm_cache_ents: Vec<Address>,
14 |     pub add_count: u64,
15 |     pub hit_count: u64,
16 | }
17 | 
18 | impl ACache {
19 |     pub fn new(cache_ents: usize) -> ACache {
20 |         ACache {
21 |             dm_cache_ents: vec![Address::from_bytes(&[0; ADDRESS_SZ]); cache_ents],
22 |             add_count: 0,
23 |             hit_count: 0,
24 |         }
25 |     }
26 | 
27 |     pub fn add(&mut self, addr: &Address) -> bool {
28 |         self.add_count += 1;
29 |         let offset_buf = addr.bytes[0..8].try_into().unwrap();
30 |         let offset: u64 = u64::from_le_bytes(offset_buf) % (self.dm_cache_ents.len() as u64);
31 |         let mut tmp = *addr;
32 |         std::mem::swap(
33 |             &mut tmp,
34 |             self.dm_cache_ents.get_mut(offset as usize).unwrap(),
35 |         );
36 |         let new_val = tmp != *addr;
37 |         if !new_val {
38 |             self.hit_count += 1;
39 |         }
40 |         new_val
41 |     }
42 | 
43 |     pub fn utilization(&self) -> f64 {
44 |         let mut utilized = 0;
45 |         for a in self.dm_cache_ents.iter() {
46 |             if a.bytes != [0; ADDRESS_SZ] {
47 |                 utilized += 1
48 |             }
49 |         }
50 |         (utilized as f64) / (self.dm_cache_ents.len() as f64)
51 |     }
52 | }
53 | 
54 | #[cfg(test)]
55 | mod tests {
56 |     use super::super::crypto;
57 |     use super::*;
58 | 
59 |     #[test]
60 |     fn test_awcache() {
61 |         crypto::init();
62 | 
63 |         let mut cache = ACache::new(4 * 1024);
64 | 
65 |         let addresses: Vec<Address> = (0..1000).map(|_| Address::random()).collect();
66 | 
67 |         for a in addresses.iter() {
68 |             cache.add(&a);
69 |             assert!(!cache.add(&a));
70 |         }
71 | 
72 |         cache.hit_count = 0;
73 |         cache.add_count = 0;
74 | 
75 |         for a in addresses.iter() {
76 |             cache.add(&a);
77 |         }
78 | 
79 |         assert!(cache.hit_count != 0);
80 |         eprintln!("cache hit_count: {}/{}", cache.hit_count, cache.add_count);
81 |         eprintln!("cache utilization: {}", cache.utilization());
82 |     }
83 | }
84 | 


--------------------------------------------------------------------------------
/src/address.rs:
--------------------------------------------------------------------------------
  1 | use super::crypto;
  2 | use super::hex;
  3 | use serde::{Deserialize, Serialize};
  4 | use std::convert::TryInto;
  5 | use std::fmt;
  6 | 
  7 | pub const ADDRESS_SZ: usize = 32;
  8 | 
  9 | #[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Clone, Copy, Debug)]
 10 | pub struct Address {
 11 |     pub bytes: [u8; ADDRESS_SZ],
 12 | }
 13 | 
 14 | impl fmt::Display for Address {
 15 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 16 |         write!(f, "{}", self.as_hex_addr())
 17 |     }
 18 | }
 19 | 
 20 | impl Address {
 21 |     pub fn random() -> Address {
 22 |         let mut bytes = [0; ADDRESS_SZ];
 23 |         crypto::randombytes(&mut bytes);
 24 |         Address { bytes }
 25 |     }
 26 | 
 27 |     pub fn from_bytes(bytes: &[u8; 32]) -> Address {
 28 |         Address { bytes: *bytes }
 29 |     }
 30 | 
 31 |     pub fn from_slice(s: &[u8]) -> Result<Address, anyhow::Error> {
 32 |         Ok(Address {
 33 |             bytes: s.try_into()?,
 34 |         })
 35 |     }
 36 | 
 37 |     pub fn from_hex_str(s: &str) -> Result<Address, anyhow::Error> {
 38 |         if s.len() != ADDRESS_SZ * 2 {
 39 |             anyhow::bail!("invalid address '{}', length is not {} ", s, ADDRESS_SZ * 2);
 40 |         }
 41 |         let mut a = Address::default();
 42 |         hex::decode_string(s, &mut a.bytes)?;
 43 |         Ok(a)
 44 |     }
 45 | 
 46 |     pub fn as_hex_addr(&self) -> HexAddress {
 47 |         let mut result = HexAddress::default();
 48 |         hex::encode(&self.bytes, &mut result.bytes);
 49 |         result
 50 |     }
 51 | }
 52 | 
 53 | impl fmt::LowerHex for Address {
 54 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 55 |         let mut buf = [0; 64];
 56 |         hex::encode(&self.bytes[..], &mut buf[..]);
 57 |         write!(f, "{}", std::str::from_utf8(&buf[..]).unwrap())
 58 |     }
 59 | }
 60 | 
 61 | impl Default for Address {
 62 |     fn default() -> Address {
 63 |         Address::from_bytes(&[0; ADDRESS_SZ])
 64 |     }
 65 | }
 66 | 
 67 | pub struct HexAddress {
 68 |     bytes: [u8; ADDRESS_SZ * 2],
 69 | }
 70 | 
 71 | impl<'a> HexAddress {
 72 |     pub fn as_str(&'a self) -> &'a str {
 73 |         std::str::from_utf8(&self.bytes).unwrap()
 74 |     }
 75 | }
 76 | 
 77 | impl fmt::Display for HexAddress {
 78 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 79 |         write!(f, "{}", std::str::from_utf8(&self.bytes).unwrap())
 80 |     }
 81 | }
 82 | 
 83 | impl Default for HexAddress {
 84 |     fn default() -> HexAddress {
 85 |         HexAddress {
 86 |             bytes: [b'0'; ADDRESS_SZ * 2],
 87 |         }
 88 |     }
 89 | }
 90 | 
 91 | // Convert a slice of addresses to a slice of bytes without any copying.
 92 | pub fn addresses_to_bytes(addresses: &[Address]) -> &[u8] {
 93 |     assert!(std::mem::size_of::<Address>() == ADDRESS_SZ);
 94 |     let n_bytes = addresses.len() * ADDRESS_SZ;
 95 |     unsafe { std::slice::from_raw_parts(addresses.as_ptr() as *const u8, n_bytes) }
 96 | }
 97 | 
 98 | // Convert a slice of addresses to a slice of bytes without any copying.
 99 | // panics if alignment is wrong.
100 | pub fn bytes_to_addresses(bytes: &[u8]) -> &[Address] {
101 |     // We rely on alignment, flag any places our assumption is not true.
102 |     assert!(((bytes.as_ptr() as usize) & (std::mem::align_of::<Address>() - 1)) == 0);
103 |     assert!(std::mem::size_of::<Address>() == ADDRESS_SZ);
104 |     let n_addresses = bytes.len() / ADDRESS_SZ;
105 |     unsafe { std::slice::from_raw_parts(bytes.as_ptr() as *const Address, n_addresses) }
106 | }
107 | 
108 | #[cfg(test)]
109 | mod tests {
110 |     use super::*;
111 | 
112 |     #[test]
113 |     fn test_addr_to_hex_addr() {
114 |         assert!(Address::default().as_hex_addr().bytes[..] == HexAddress::default().bytes[..]);
115 |     }
116 | 
117 |     #[test]
118 |     fn test_addresses_to_bytes() {
119 |         let v = vec![Address::default()];
120 |         let s = addresses_to_bytes(&v);
121 |         assert_eq!(Address::from_slice(s).unwrap(), v[0])
122 |     }
123 | 
124 |     #[test]
125 |     fn test_bytes_to_addresses() {
126 |         // Try to create an poorly unaligned allocation if it is
127 |         // possible on the current platform.
128 |         for _i in 0..100 {
129 |             let bytes = [0; 64];
130 |             let mut b = Vec::new();
131 |             b.extend_from_slice(&bytes[..]);
132 |             let s = bytes_to_addresses(&b);
133 |             assert_eq!(Address::default(), s[0])
134 |         }
135 |     }
136 | }
137 | 


--------------------------------------------------------------------------------
/src/base64.rs:
--------------------------------------------------------------------------------
 1 | use super::sodium;
 2 | 
 3 | // We use libsodium base64 as it removes some dependencies
 4 | // as we already have a hard dependency on libsodium.
 5 | 
 6 | pub fn encode(buf: &[u8]) -> String {
 7 |     let max_out_len = unsafe {
 8 |         sodium::sodium_base64_encoded_len(buf.len(), sodium::sodium_base64_VARIANT_ORIGINAL as i32)
 9 |     };
10 | 
11 |     let mut out_buf = vec![0; max_out_len];
12 | 
13 |     unsafe {
14 |         assert!(!sodium::sodium_bin2base64(
15 |             out_buf.as_mut_ptr() as *mut std::os::raw::c_char,
16 |             out_buf.len(),
17 |             buf.as_ptr(),
18 |             buf.len(),
19 |             sodium::sodium_base64_VARIANT_ORIGINAL as i32,
20 |         )
21 |         .is_null())
22 |     };
23 | 
24 |     match out_buf.iter().position(|&v| v == 0) {
25 |         Some(idx) => {
26 |             out_buf.truncate(idx);
27 |         }
28 |         None => {
29 |             panic!();
30 |         }
31 |     }
32 | 
33 |     String::from_utf8(out_buf).unwrap()
34 | }
35 | 
36 | pub fn decode(data: &str) -> Option<Vec<u8>> {
37 |     let mut out_len = 0;
38 |     let mut out_buf = vec![0; data.len()];
39 | 
40 |     let rc = unsafe {
41 |         sodium::sodium_base642bin(
42 |             out_buf.as_mut_ptr(),
43 |             out_buf.len(),
44 |             data.as_ptr() as *const std::os::raw::c_char,
45 |             data.len(),
46 |             std::ptr::null(),
47 |             &mut out_len as *mut usize,
48 |             std::ptr::null_mut::<*const std::os::raw::c_char>(),
49 |             sodium::sodium_base64_VARIANT_ORIGINAL as i32,
50 |         )
51 |     };
52 | 
53 |     if rc == 0 {
54 |         assert!(out_len <= out_buf.len());
55 |         out_buf.truncate(out_len);
56 |         Some(out_buf)
57 |     } else {
58 |         None
59 |     }
60 | }
61 | 
62 | #[cfg(test)]
63 | mod tests {
64 |     use super::*;
65 | 
66 |     #[test]
67 |     fn test_encode() {
68 |         assert_eq!(encode(b""), "");
69 |         assert_eq!(encode(b"a"), "YQ==");
70 |         assert_eq!(encode(b"ab"), "YWI=");
71 |         assert_eq!(encode(b"abc"), "YWJj");
72 |         assert_eq!(encode(b"abcd"), "YWJjZA==");
73 |         assert_eq!(encode(b"abcde"), "YWJjZGU=");
74 |     }
75 | 
76 |     #[test]
77 |     fn test_decode() {
78 |         assert_eq!(decode("").unwrap(), b"");
79 |         assert_eq!(decode("YQ==").unwrap(), b"a");
80 |         assert_eq!(decode("YWI=").unwrap(), b"ab");
81 |         assert_eq!(decode("YWJj").unwrap(), b"abc");
82 |         assert_eq!(decode("YWJjZA==").unwrap(), b"abcd");
83 |         assert_eq!(decode("YWJjZGU=").unwrap(), b"abcde");
84 |     }
85 | }
86 | 


--------------------------------------------------------------------------------
/src/chunk_storage.rs:
--------------------------------------------------------------------------------
 1 | use super::abloom;
 2 | use super::address::*;
 3 | use super::protocol;
 4 | use super::repository;
 5 | use super::xid;
 6 | 
 7 | pub trait Engine {
 8 |     // Get many chunks in an efficient pipeline.
 9 |     #[allow(clippy::type_complexity)]
10 |     fn pipelined_get_chunks(
11 |         &mut self,
12 |         addresses: &[Address],
13 |         on_chunk: &mut dyn FnMut(&Address, &[u8]) -> Result<(), anyhow::Error>,
14 |     ) -> Result<(), anyhow::Error>;
15 | 
16 |     // Get a chunk from the storage engine.
17 |     fn get_chunk(&mut self, addr: &Address) -> Result<Vec<u8>, anyhow::Error>;
18 | 
19 |     // Set the gc_id for the following call to sweep. This is a form
20 |     // of two phase commit where we ensure the backend saves this
21 |     // id so we can later check if it has completed.
22 |     fn prepare_for_sweep(&mut self, gc_id: xid::Xid) -> Result<(), anyhow::Error>;
23 | 
24 |     // Remove all chunks not in the reachable set.
25 |     fn sweep(
26 |         &mut self,
27 |         update_progress_msg: &mut dyn FnMut(String) -> Result<(), anyhow::Error>,
28 |         reachable: abloom::ABloom,
29 |     ) -> Result<repository::GcStats, anyhow::Error>;
30 | 
31 |     // Check that a previous invocation of sweep has finished.
32 |     fn sweep_completed(&mut self, gc_id: xid::Xid) -> Result<bool, anyhow::Error>;
33 | 
34 |     // Add a chunk, potentially asynchronously. Does not overwrite existing
35 |     // chunks with the same name to protect historic items from corruption.
36 |     // The write is not guaranteed to be completed until
37 |     // after a call to Engine::sync completes without error.
38 |     fn add_chunk(&mut self, addr: &Address, buf: Vec<u8>) -> Result<(), anyhow::Error>;
39 | 
40 |     // Filter a list of chunk addresses removing any that already exist in the repository.
41 |     // This function is often called in very large batches so requires the backend to periodically
42 |     // report progress, the argument to on_progress is how many addresses have been processed since the last
43 |     // progress report.
44 |     fn filter_existing_chunks(
45 |         &mut self,
46 |         on_progress: &mut dyn FnMut(u64) -> Result<(), anyhow::Error>,
47 |         addr: Vec<Address>,
48 |     ) -> Result<Vec<Address>, anyhow::Error>;
49 | 
50 |     // A write barrier, any previously added chunks are only guaranteed to be
51 |     // in stable storage after a call to flush has returned. A backend
52 |     // can use this to implement concurrent background writes.
53 |     fn flush(&mut self) -> Result<protocol::FlushStats, anyhow::Error>;
54 | 
55 |     // Estimate how many chunks we have stored, the implementation is free to
56 |     // make a rough guess to increase performance. One trick is sampling
57 |     // a single address prefix.
58 |     fn estimate_chunk_count(&mut self) -> Result<u64, anyhow::Error>;
59 | }
60 | 


--------------------------------------------------------------------------------
/src/chunker.rs:
--------------------------------------------------------------------------------
  1 | use super::rollsum::{FastGearHasher, GearTab, RollsumSplitter};
  2 | 
  3 | #[derive(Clone)]
  4 | pub struct RollsumChunker {
  5 |     rs: FastGearHasher,
  6 |     min_sz: usize,
  7 |     max_sz: usize,
  8 |     default_chunk_capacity: usize,
  9 |     cur_vec: Vec<u8>,
 10 | }
 11 | 
 12 | impl RollsumChunker {
 13 |     pub fn new(gear_tab: GearTab, mut min_sz: usize, mut max_sz: usize) -> RollsumChunker {
 14 |         if min_sz == 0 {
 15 |             min_sz = 1
 16 |         }
 17 |         if max_sz < min_sz {
 18 |             max_sz = min_sz
 19 |         }
 20 |         let default_chunk_capacity = max_sz / 2;
 21 |         RollsumChunker {
 22 |             rs: FastGearHasher::new(gear_tab),
 23 |             min_sz,
 24 |             max_sz,
 25 |             default_chunk_capacity,
 26 |             cur_vec: Vec::with_capacity(default_chunk_capacity),
 27 |         }
 28 |     }
 29 | 
 30 |     fn spare_capacity(&self) -> usize {
 31 |         self.cur_vec.capacity() - self.cur_vec.len()
 32 |     }
 33 | 
 34 |     fn swap_vec(&mut self) -> Vec<u8> {
 35 |         let mut v = Vec::with_capacity(self.default_chunk_capacity);
 36 |         std::mem::swap(&mut v, &mut self.cur_vec);
 37 |         v
 38 |     }
 39 | 
 40 |     pub fn add_bytes(&mut self, buf: &[u8]) -> (usize, Option<Vec<u8>>) {
 41 |         let mut n_bytes = buf.len();
 42 | 
 43 |         if (n_bytes + self.cur_vec.len()) > self.max_sz {
 44 |             let overshoot = (n_bytes + self.cur_vec.len()) - self.max_sz;
 45 |             n_bytes -= overshoot;
 46 |         }
 47 | 
 48 |         if self.spare_capacity() < n_bytes {
 49 |             let mut growth = (self.max_sz / 3).max(1);
 50 |             if self.cur_vec.capacity() + growth > self.max_sz {
 51 |                 growth = self.max_sz - self.cur_vec.capacity();
 52 |             }
 53 |             self.cur_vec.reserve(growth);
 54 |             n_bytes = std::cmp::min(self.spare_capacity(), n_bytes);
 55 |         }
 56 | 
 57 |         // None of the bytes we are adding will count towards the
 58 |         // next chunk, simply add them all, the bytes don't matter
 59 |         // as we will cycle WINDOW_SIZE too.
 60 |         if let Some(window_size) = self.rs.window_size() {
 61 |             if self.min_sz >= window_size
 62 |                 && (self.cur_vec.len() + n_bytes < (self.min_sz - window_size))
 63 |             {
 64 |                 self.cur_vec.extend_from_slice(&buf[0..n_bytes]);
 65 |                 return (n_bytes, None);
 66 |             }
 67 |         }
 68 | 
 69 |         match self.rs.roll_bytes(&buf[0..n_bytes]) {
 70 |             Some(split) => {
 71 |                 self.cur_vec.extend_from_slice(&buf[0..split]);
 72 |                 if self.cur_vec.len() < self.min_sz {
 73 |                     (split, None)
 74 |                 } else {
 75 |                     (split, Some(self.swap_vec()))
 76 |                 }
 77 |             }
 78 |             None => {
 79 |                 self.cur_vec.extend_from_slice(&buf[0..n_bytes]);
 80 |                 if self.cur_vec.len() == self.max_sz {
 81 |                     (n_bytes, Some(self.swap_vec()))
 82 |                 } else {
 83 |                     (n_bytes, None)
 84 |                 }
 85 |             }
 86 |         }
 87 |     }
 88 | 
 89 |     pub fn buffered_count(&mut self) -> usize {
 90 |         self.cur_vec.len()
 91 |     }
 92 | 
 93 |     pub fn force_split(&mut self) -> Option<Vec<u8>> {
 94 |         self.rs.reset();
 95 |         let v = self.swap_vec();
 96 |         if v.is_empty() {
 97 |             None
 98 |         } else {
 99 |             Some(v)
100 |         }
101 |     }
102 | 
103 |     pub fn take_buffered(&mut self) -> Vec<u8> {
104 |         self.rs.reset();
105 |         let mut v = Vec::new();
106 |         std::mem::swap(&mut self.cur_vec, &mut v);
107 |         v
108 |     }
109 | 
110 |     pub fn finish(self) -> Vec<u8> {
111 |         self.cur_vec
112 |     }
113 | }
114 | 
115 | #[cfg(test)]
116 | mod tests {
117 |     use super::super::rollsum::{GearTab, TEST_GEAR_TAB_DATA};
118 |     use super::*;
119 | 
120 |     #[test]
121 |     fn test_add_bytes() {
122 |         let mut ch = RollsumChunker::new(GearTab::from_array(TEST_GEAR_TAB_DATA), 1, 2);
123 | 
124 |         match ch.add_bytes(b"a") {
125 |             (1, None) => (),
126 |             v => panic!("{:?}", v),
127 |         }
128 | 
129 |         match ch.add_bytes(b"bc") {
130 |             (1, Some(v)) => assert_eq!(v, b"ab"),
131 |             v => panic!("{:?}", v),
132 |         }
133 | 
134 |         match ch.add_bytes(b"c") {
135 |             (1, None) => (),
136 |             v => panic!("{:?}", v),
137 |         }
138 | 
139 |         assert_eq!(ch.finish(), b"c");
140 |     }
141 | 
142 |     #[test]
143 |     fn test_force_split_bytes() {
144 |         let mut ch = RollsumChunker::new(GearTab::from_array(TEST_GEAR_TAB_DATA), 10, 100);
145 |         assert_eq!(ch.force_split(), None);
146 |         ch.add_bytes(b"abc");
147 | 
148 |         match ch.force_split() {
149 |             Some(v) => assert_eq!(v, b"abc"),
150 |             None => panic!("fail!"),
151 |         }
152 |         assert_eq!(ch.force_split(), None);
153 |         ch.add_bytes(b"def");
154 |         assert_eq!(ch.finish(), b"def");
155 |     }
156 | }
157 | 


--------------------------------------------------------------------------------
/src/cksumvfs.rs:
--------------------------------------------------------------------------------
 1 | // Bindings and helpers for csrc/cksumvfs.
 2 | // these functions let us add add checksums to our sqlite3 files.
 3 | // For more info see: https://www.sqlite.org/cksumvfs.html
 4 | 
 5 | extern "C" {
 6 |     fn cksumvfs_sqlite_version_number() -> ::std::os::raw::c_int;
 7 |     fn sqlite3_register_cksumvfs(unused: *const u8) -> ::std::os::raw::c_int;
 8 | }
 9 | 
10 | pub fn register_cksumvfs() {
11 |     // Because have our own copy of the sqlite3 header file, this
12 |     // test ensures we are using the same header rusqlite used.
13 |     assert_eq!(
14 |         unsafe { cksumvfs_sqlite_version_number() },
15 |         rusqlite::version_number()
16 |     );
17 |     assert_eq!(
18 |         unsafe { sqlite3_register_cksumvfs(std::ptr::null()) },
19 |         rusqlite::ffi::SQLITE_OK
20 |     )
21 | }
22 | 
23 | pub fn enable_sqlite_checksums(db: &rusqlite::Connection) -> Result<(), anyhow::Error> {
24 |     let mut n = 8;
25 |     if unsafe {
26 |         rusqlite::ffi::sqlite3_file_control(
27 |             db.handle(),
28 |             std::ptr::null(),
29 |             rusqlite::ffi::SQLITE_FCNTL_RESERVE_BYTES,
30 |             (&mut n) as *mut i32 as *mut core::ffi::c_void,
31 |         )
32 |     } != rusqlite::ffi::SQLITE_OK
33 |     {
34 |         anyhow::bail!("unable to reserve bytes for sqlite3 page checksums");
35 |     }
36 |     if n != 0 && n != 8 {
37 |         anyhow::bail!("database has incorrect reserve bytes for checksums");
38 |     }
39 |     if n == 0 {
40 |         db.execute("vacuum;", [])?;
41 |     }
42 |     Ok(())
43 | }
44 | 
45 | #[cfg(test)]
46 | mod tests {
47 | 
48 |     use super::*;
49 | 
50 |     #[test]
51 |     fn test_cksumvfs_can_be_enabled() {
52 |         let temp_dir = tempfile::tempdir().unwrap();
53 |         let path = temp_dir.path().join("test.db3");
54 |         {
55 |             register_cksumvfs();
56 |             let db = rusqlite::Connection::open(&path).unwrap();
57 |             enable_sqlite_checksums(&db).unwrap();
58 |             let enabled: String = db
59 |                 .query_row("PRAGMA checksum_verification;", [], |r| {
60 |                     Ok(r.get(0).unwrap())
61 |                 })
62 |                 .unwrap();
63 |             assert_eq!(enabled, "1");
64 |         }
65 |     }
66 | }
67 | 


--------------------------------------------------------------------------------
/src/compression.rs:
--------------------------------------------------------------------------------
  1 | use std::convert::TryInto;
  2 | 
  3 | pub const COMPRESS_FOOTER_NONE: u8 = 0;
  4 | pub const COMPRESS_FOOTER_LZ4: u8 = 1;
  5 | pub const COMPRESS_FOOTER_ZSTD: u8 = 2;
  6 | 
  7 | pub const COMPRESS_MAX_SIZE: usize = 67108864;
  8 | 
  9 | #[derive(Clone, Copy)]
 10 | pub enum Scheme {
 11 |     None,
 12 |     Lz4,
 13 |     Zstd { level: i32 },
 14 | }
 15 | 
 16 | pub fn parse_scheme(s: &str) -> Result<Scheme, anyhow::Error> {
 17 |     if s == "none" {
 18 |         return Ok(Scheme::None);
 19 |     }
 20 |     if s == "lz4" {
 21 |         return Ok(Scheme::Lz4);
 22 |     }
 23 |     if s == "zstd" {
 24 |         return Ok(Scheme::Zstd { level: 3 });
 25 |     }
 26 | 
 27 |     if s.starts_with("zstd:") {
 28 |         let spec_parts: Vec<&str> = s.split(':').collect();
 29 |         if spec_parts.len() != 2 {
 30 |             anyhow::bail!("invalid zstd compression level, expected a number");
 31 |         }
 32 |         match spec_parts[1].parse() {
 33 |             Ok(level) => {
 34 |                 if !(1..=19).contains(&level) {
 35 |                     anyhow::bail!("zstd compression level must be in the range 1-19");
 36 |                 }
 37 |                 return Ok(Scheme::Zstd { level });
 38 |             }
 39 |             Err(_) => anyhow::bail!("zstd compression level must be a number"),
 40 |         }
 41 |     }
 42 |     anyhow::bail!("invalid compression scheme, expected one of none, lz4, zstd[:$level]")
 43 | }
 44 | 
 45 | pub fn compress(scheme: Scheme, mut data: Vec<u8>) -> Vec<u8> {
 46 |     assert!(data.len() <= COMPRESS_MAX_SIZE);
 47 | 
 48 |     let compressed_data = match scheme {
 49 |         Scheme::None => {
 50 |             data.push(COMPRESS_FOOTER_NONE);
 51 |             return data;
 52 |         }
 53 |         Scheme::Lz4 => {
 54 |             let mut compressed_data = lz4::block::compress(&data, None, false).unwrap();
 55 |             compressed_data.reserve(5);
 56 |             let sz = data.len() as u32;
 57 |             compressed_data.extend_from_slice(&u32::to_le_bytes(sz)[..]);
 58 |             compressed_data.push(COMPRESS_FOOTER_LZ4);
 59 |             compressed_data
 60 |         }
 61 |         Scheme::Zstd { level } => {
 62 |             let mut compressed_data: Vec<u8> =
 63 |                 Vec::with_capacity(zstd_safe::compress_bound(data.len()) + 1);
 64 |             zstd_safe::compress(&mut compressed_data, &data, level).unwrap();
 65 |             compressed_data.push(COMPRESS_FOOTER_ZSTD);
 66 |             compressed_data
 67 |         }
 68 |     };
 69 | 
 70 |     if (compressed_data.len()) > data.len() {
 71 |         data.push(COMPRESS_FOOTER_NONE);
 72 |         return data;
 73 |     }
 74 | 
 75 |     compressed_data
 76 | }
 77 | 
 78 | pub fn decompress(mut data: Vec<u8>) -> Result<Vec<u8>, anyhow::Error> {
 79 |     match data.pop() {
 80 |         Some(COMPRESS_FOOTER_NONE) => Ok(data),
 81 |         Some(COMPRESS_FOOTER_LZ4) => {
 82 |             if data.len() < 4 {
 83 |                 anyhow::bail!("data corrupt - compression footer missing decompressed size");
 84 |             }
 85 |             let data_len = data.len();
 86 |             let decompressed_sz =
 87 |                 u32::from_le_bytes(data[data_len - 4..data_len].try_into().unwrap()) as usize;
 88 |             // This limit helps prevent bad actors from causing ooms, bupstash
 89 |             // naturally limits chunks and metadata to a max size that is well below this.
 90 |             if decompressed_sz > COMPRESS_MAX_SIZE {
 91 |                 anyhow::bail!("data corrupt - decompressed size is larger than application limits");
 92 |             }
 93 |             data.truncate(data.len() - 4);
 94 |             Ok(lz4::block::decompress(&data, Some(decompressed_sz as i32))?)
 95 |         }
 96 |         Some(COMPRESS_FOOTER_ZSTD) => {
 97 |             // Zstd should read it's internal frame header to get an accurate size.
 98 |             let max_decompressed_sz = zstd_safe::decompress_bound(&data)
 99 |                 .unwrap()
100 |                 .try_into()
101 |                 .unwrap();
102 |             if max_decompressed_sz > COMPRESS_MAX_SIZE {
103 |                 anyhow::bail!("data corrupt - decompressed size is larger than application limits");
104 |             }
105 |             let mut decompressed: Vec<u8> = Vec::with_capacity(max_decompressed_sz);
106 |             match zstd_safe::decompress(&mut decompressed, &data) {
107 |                 Ok(_) => Ok(decompressed),
108 |                 Err(_) => anyhow::bail!("error during zstd decompression"),
109 |             }
110 |         }
111 |         Some(_) => anyhow::bail!("unknown decompression footer, don't know how to decompress data (possibly compressed by a newer version of bupstash)"),
112 |         None => anyhow::bail!("data missing compression footer"),
113 |     }
114 | }
115 | 
116 | pub fn unauthenticated_decompress(data: Vec<u8>) -> Result<Vec<u8>, anyhow::Error> {
117 |     match data.last() {
118 |         None => anyhow::bail!("data buffer too small, missing compression footer"),
119 |         Some(f) if *f == COMPRESS_FOOTER_NONE => decompress(data),
120 |         // Once we are confident in the security/memory safety of our decompression function,
121 |         // we can shift to enabling compression of the unauthenticated data.
122 |         Some(f) => anyhow::bail!(
123 |             "decompression of unauthenticated data is currently disabled (encryption footer is {})",
124 |             *f
125 |         ),
126 |     }
127 | }
128 | 


--------------------------------------------------------------------------------
/src/external_chunk_storage.rs:
--------------------------------------------------------------------------------
  1 | use super::abloom;
  2 | use super::address::Address;
  3 | use super::chunk_storage::Engine;
  4 | use super::protocol::*;
  5 | use super::repository;
  6 | use super::xid;
  7 | use std::os::unix::net::UnixStream;
  8 | 
  9 | pub struct ExternalStorage {
 10 |     sock: UnixStream,
 11 | }
 12 | 
 13 | impl ExternalStorage {
 14 |     pub fn new(socket_path: &std::path::Path, path: &str) -> Result<Self, anyhow::Error> {
 15 |         let mut sock = UnixStream::connect(socket_path)?;
 16 |         write_packet(
 17 |             &mut sock,
 18 |             &Packet::StorageConnect(StorageConnect {
 19 |                 protocol: "s-6".to_string(),
 20 |                 path: path.to_string(),
 21 |             }),
 22 |         )?;
 23 | 
 24 |         Ok(ExternalStorage { sock })
 25 |     }
 26 | }
 27 | 
 28 | impl Engine for ExternalStorage {
 29 |     fn pipelined_get_chunks(
 30 |         &mut self,
 31 |         addresses: &[Address],
 32 |         on_chunk: &mut dyn FnMut(&Address, &[u8]) -> Result<(), anyhow::Error>,
 33 |     ) -> Result<(), anyhow::Error> {
 34 |         // In the future it would probably be good (though more complicated) to perform the writing of the addresses,
 35 |         // and reading of the results concurrently, though it complicates both the plugin and bupstash.
 36 |         write_storage_pipelined_get_chunks(&mut self.sock, addresses)?;
 37 | 
 38 |         for address in addresses {
 39 |             match read_packet(&mut self.sock, DEFAULT_MAX_PACKET_SIZE)? {
 40 |                 Packet::RStorageRequestChunkData(data) => on_chunk(address, &data)?,
 41 |                 _ => anyhow::bail!("unexpected packet reponse, expected chunk"),
 42 |             }
 43 |         }
 44 | 
 45 |         Ok(())
 46 |     }
 47 | 
 48 |     fn filter_existing_chunks(
 49 |         &mut self,
 50 |         on_progress: &mut dyn FnMut(u64) -> Result<(), anyhow::Error>,
 51 |         addresses: Vec<Address>,
 52 |     ) -> Result<Vec<Address>, anyhow::Error> {
 53 |         write_storage_filter_existing(&mut self.sock, &addresses)?;
 54 |         std::mem::drop(addresses);
 55 |         loop {
 56 |             match read_packet(&mut self.sock, DEFAULT_MAX_PACKET_SIZE)? {
 57 |                 Packet::StorageFilterExistingProgress(n) => on_progress(n.0)?,
 58 |                 Packet::StorageAddresses(missing) => return Ok(missing),
 59 |                 _ => anyhow::bail!(
 60 |                     "expected StorageAddresses or StorageFilterAddresses progress packet"
 61 |                 ),
 62 |             };
 63 |         }
 64 |     }
 65 | 
 66 |     fn get_chunk(&mut self, address: &Address) -> Result<Vec<u8>, anyhow::Error> {
 67 |         write_packet(&mut self.sock, &Packet::TStorageRequestChunkData(*address))?;
 68 |         match read_packet(&mut self.sock, DEFAULT_MAX_PACKET_SIZE)? {
 69 |             Packet::RStorageRequestChunkData(data) => Ok(data),
 70 |             _ => anyhow::bail!("unexpected packet reponse, expected RRequestChunkData"),
 71 |         }
 72 |     }
 73 | 
 74 |     fn add_chunk(&mut self, address: &Address, data: Vec<u8>) -> Result<(), anyhow::Error> {
 75 |         write_chunk(&mut self.sock, address, &data)?;
 76 |         Ok(())
 77 |     }
 78 | 
 79 |     fn flush(&mut self) -> Result<FlushStats, anyhow::Error> {
 80 |         write_packet(&mut self.sock, &Packet::TStorageFlush)?;
 81 |         match read_packet(&mut self.sock, DEFAULT_MAX_PACKET_SIZE)? {
 82 |             Packet::RStorageFlush(stats) => Ok(stats),
 83 |             _ => anyhow::bail!("unexpected packet reponse, expected RStorageFlush"),
 84 |         }
 85 |     }
 86 | 
 87 |     fn prepare_for_sweep(&mut self, gc_id: xid::Xid) -> Result<(), anyhow::Error> {
 88 |         write_packet(&mut self.sock, &Packet::TStoragePrepareForSweep(gc_id))?;
 89 |         match read_packet(&mut self.sock, DEFAULT_MAX_PACKET_SIZE) {
 90 |             Ok(Packet::RStoragePrepareForSweep) => (),
 91 |             Ok(_) => anyhow::bail!("unexpected packet response, expected RStoragePrepareForSweep"),
 92 |             Err(err) => return Err(err),
 93 |         }
 94 |         Ok(())
 95 |     }
 96 | 
 97 |     fn estimate_chunk_count(&mut self) -> Result<u64, anyhow::Error> {
 98 |         write_packet(&mut self.sock, &Packet::TStorageEstimateCount)?;
 99 |         match read_packet(&mut self.sock, DEFAULT_MAX_PACKET_SIZE) {
100 |             Ok(Packet::RStorageEstimateCount(v)) => Ok(v.count.0),
101 |             Ok(_) => anyhow::bail!("unexpected packet response, expected RStorageEstimateCount"),
102 |             Err(err) => Err(err),
103 |         }
104 |     }
105 | 
106 |     fn sweep(
107 |         &mut self,
108 |         update_progress_msg: &mut dyn FnMut(String) -> Result<(), anyhow::Error>,
109 |         reachable: abloom::ABloom,
110 |     ) -> Result<repository::GcStats, anyhow::Error> {
111 |         write_begin_sweep(&mut self.sock, &reachable)?;
112 |         std::mem::drop(reachable);
113 |         loop {
114 |             match read_packet(&mut self.sock, DEFAULT_MAX_PACKET_SIZE)? {
115 |                 Packet::StorageSweepProgress(msg) => {
116 |                     update_progress_msg(msg)?;
117 |                 }
118 |                 Packet::StorageSweepComplete(stats) => {
119 |                     let _ = write_packet(&mut self.sock, &Packet::EndOfTransmission);
120 |                     return Ok(stats);
121 |                 }
122 |                 _ => anyhow::bail!("unexpected packet response, expected StorageSweepProgress or StorageSweepComplete"),
123 |             }
124 |         }
125 |     }
126 | 
127 |     fn sweep_completed(&mut self, gc_id: xid::Xid) -> Result<bool, anyhow::Error> {
128 |         write_packet(&mut self.sock, &Packet::TStorageQuerySweepCompleted(gc_id))?;
129 |         match read_packet(&mut self.sock, DEFAULT_MAX_PACKET_SIZE)? {
130 |             Packet::RStorageQuerySweepCompleted(completed) => Ok(completed),
131 |             _ => anyhow::bail!("unexpected packet response, expected RStorageSweepCompleted"),
132 |         }
133 |     }
134 | }
135 | 


--------------------------------------------------------------------------------
/src/fmtutil.rs:
--------------------------------------------------------------------------------
  1 | use super::hex;
  2 | use super::index;
  3 | use std::os::unix::ffi::OsStrExt;
  4 | 
  5 | pub fn format_timestamp(ts: &chrono::DateTime<chrono::Utc>, utc_timestamps: bool) -> String {
  6 |     let tsfmt = "%Y/%m/%d %T";
  7 |     if utc_timestamps {
  8 |         ts.format(tsfmt).to_string()
  9 |     } else {
 10 |         chrono::DateTime::<chrono::Local>::from(*ts)
 11 |             .format(tsfmt)
 12 |             .to_string()
 13 |     }
 14 | }
 15 | 
 16 | pub fn format_size(n: u64) -> String {
 17 |     // Binary units, not SI units.
 18 |     const K: u64 = 1024;
 19 |     const M: u64 = 1024 * K;
 20 |     const G: u64 = 1024 * M;
 21 |     const T: u64 = 1024 * G;
 22 |     const P: u64 = 1024 * T;
 23 | 
 24 |     if n > P {
 25 |         format!("{}.{:0>2}PiB", n / P, (n % P) / (P / 100))
 26 |     } else if n > T {
 27 |         format!("{}.{:0>2}TiB", n / T, (n % T) / (T / 100))
 28 |     } else if n > G {
 29 |         format!("{}.{:0>2}GiB", n / G, (n % G) / (G / 100))
 30 |     } else if n > M {
 31 |         format!("{}.{:0>2}MiB", n / M, (n % M) / (M / 100))
 32 |     } else if n > K {
 33 |         format!("{}.{:0>2}KiB", n / K, (n % K) / (K / 100))
 34 |     } else {
 35 |         format!("{}B", n)
 36 |     }
 37 | }
 38 | 
 39 | pub struct IndexHumanDisplayWidths {
 40 |     pub human_size_digits: usize,
 41 | }
 42 | 
 43 | pub fn estimate_index_human_display_widths(
 44 |     index: &index::CompressedIndex,
 45 | ) -> Result<IndexHumanDisplayWidths, anyhow::Error> {
 46 |     // If the index is large, just assume we have the full range of values.
 47 |     // The cost of formatting a huge index perfectly is too large.
 48 |     if index.compressed_size() > 512 * 1024 {
 49 |         Ok(IndexHumanDisplayWidths {
 50 |             human_size_digits: 11, // 'nnnn.nn UUU'
 51 |         })
 52 |     } else {
 53 |         let mut human_size_digits = 0;
 54 |         for ent in index.iter() {
 55 |             let ent = ent?;
 56 |             human_size_digits = human_size_digits.max(format_size(ent.size.0).len())
 57 |         }
 58 |         Ok(IndexHumanDisplayWidths { human_size_digits })
 59 |     }
 60 | }
 61 | 
 62 | pub fn format_human_content_listing(
 63 |     ent: &index::IndexEntry,
 64 |     utc_timestamps: bool,
 65 |     widths: &IndexHumanDisplayWidths,
 66 | ) -> String {
 67 |     let mut result = String::new();
 68 |     std::fmt::write(&mut result, format_args!("{}", ent.display_mode())).unwrap();
 69 |     let size = if ent.is_file() {
 70 |         format_size(ent.size.0)
 71 |     } else {
 72 |         "-".to_string()
 73 |     };
 74 |     let size_padding: String = " ".repeat(widths.human_size_digits - size.len());
 75 |     std::fmt::write(&mut result, format_args!(" {}{}", size, size_padding)).unwrap();
 76 |     let ts = chrono::NaiveDateTime::from_timestamp_opt(ent.ctime.0 as i64, ent.ctime_nsec.0 as u32)
 77 |         .unwrap();
 78 |     let ts = chrono::DateTime::<chrono::Utc>::from_utc(ts, chrono::Utc);
 79 |     let ts = format_timestamp(&ts, utc_timestamps);
 80 |     std::fmt::write(&mut result, format_args!(" {}", ts)).unwrap();
 81 |     std::fmt::write(&mut result, format_args!(" {}", ent.path.to_string_lossy())).unwrap();
 82 |     result
 83 | }
 84 | 
 85 | pub fn format_jsonl1_content_listing(ent: &index::IndexEntry) -> Result<String, anyhow::Error> {
 86 |     let mut result = String::with_capacity(512);
 87 |     std::fmt::write(&mut result, format_args!("{{"))?;
 88 |     std::fmt::write(
 89 |         &mut result,
 90 |         format_args!("\"mode\":{}", serde_json::to_string(&ent.mode.0)?),
 91 |     )?;
 92 |     std::fmt::write(&mut result, format_args!(",\"size\":{}", ent.size.0))?;
 93 | 
 94 |     match ent.path.to_str() {
 95 |         Some(path) => std::fmt::write(
 96 |             &mut result,
 97 |             format_args!(",\"path\":{}", serde_json::to_string(path)?),
 98 |         )?,
 99 |         None => {
100 |             let path = ent.path.as_os_str().as_bytes();
101 |             std::fmt::write(
102 |                 &mut result,
103 |                 format_args!(",\"path\":{}", serde_json::to_string(path)?),
104 |             )?
105 |         }
106 |     }
107 |     std::fmt::write(
108 |         &mut result,
109 |         format_args!(",\"mtime\":{}", serde_json::to_string(&ent.mtime.0)?),
110 |     )?;
111 |     std::fmt::write(
112 |         &mut result,
113 |         format_args!(
114 |             ",\"mtime_nsec\":{}",
115 |             serde_json::to_string(&ent.mtime_nsec.0)?
116 |         ),
117 |     )?;
118 |     std::fmt::write(
119 |         &mut result,
120 |         format_args!(",\"ctime\":{}", serde_json::to_string(&ent.ctime.0)?),
121 |     )?;
122 |     std::fmt::write(
123 |         &mut result,
124 |         format_args!(
125 |             ",\"ctime_nsec\":{}",
126 |             serde_json::to_string(&ent.ctime_nsec.0)?
127 |         ),
128 |     )?;
129 |     std::fmt::write(
130 |         &mut result,
131 |         format_args!(",\"uid\":{}", serde_json::to_string(&ent.uid.0)?),
132 |     )?;
133 |     std::fmt::write(
134 |         &mut result,
135 |         format_args!(",\"gid\":{}", serde_json::to_string(&ent.gid.0)?),
136 |     )?;
137 |     std::fmt::write(
138 |         &mut result,
139 |         format_args!(",\"norm_dev\":{}", serde_json::to_string(&ent.norm_dev.0)?),
140 |     )?;
141 |     std::fmt::write(
142 |         &mut result,
143 |         format_args!(",\"nlink\":{}", serde_json::to_string(&ent.nlink.0)?),
144 |     )?;
145 |     std::fmt::write(
146 |         &mut result,
147 |         format_args!(",\"ino\":{}", serde_json::to_string(&ent.ino.0)?),
148 |     )?;
149 | 
150 |     if ent.is_dev_node() {
151 |         std::fmt::write(
152 |             &mut result,
153 |             format_args!(
154 |                 ",\"dev_major\":{}",
155 |                 serde_json::to_string(&ent.dev_major.0)?,
156 |             ),
157 |         )?;
158 |         std::fmt::write(
159 |             &mut result,
160 |             format_args!(
161 |                 ",\"dev_minor\":{}",
162 |                 serde_json::to_string(&ent.dev_minor.0)?,
163 |             ),
164 |         )?;
165 |     } else {
166 |         result.push_str(",\"dev_major\":null,\"dev_minor\":null");
167 |     }
168 | 
169 |     if let Some(ref xattrs) = ent.xattrs {
170 |         result.push_str(",\"xattrs\":{");
171 |         let mut first = true;
172 |         for (k, v) in xattrs.iter() {
173 |             let k = if let Ok(k) = std::str::from_utf8(k) {
174 |                 serde_json::to_string(k)?
175 |             } else {
176 |                 serde_json::to_string(k)?
177 |             };
178 | 
179 |             let v = if let Ok(v) = std::str::from_utf8(v.as_slice()) {
180 |                 serde_json::to_string(v)?
181 |             } else {
182 |                 serde_json::to_string(v)?
183 |             };
184 | 
185 |             if first {
186 |                 first = false;
187 |             } else {
188 |                 result.push(',');
189 |             }
190 | 
191 |             result.push_str(&k);
192 |             result.push(':');
193 |             result.push_str(&v);
194 |         }
195 |         result.push('}');
196 |     } else {
197 |         result.push_str(",\"xattrs\":null");
198 |     }
199 | 
200 |     std::fmt::write(&mut result, format_args!(",\"sparse\": {}", ent.sparse))?;
201 | 
202 |     if let Some(ref link_target) = ent.link_target {
203 |         match link_target.to_str() {
204 |             Some(path) => std::fmt::write(
205 |                 &mut result,
206 |                 format_args!(",\"link_target\":{}", serde_json::to_string(path)?),
207 |             )?,
208 |             None => {
209 |                 let link_target = link_target.as_os_str().as_bytes();
210 |                 std::fmt::write(
211 |                     &mut result,
212 |                     format_args!(",\"link_target\":{}", serde_json::to_string(link_target)?),
213 |                 )?
214 |             }
215 |         }
216 |     } else {
217 |         result.push_str(",\"link_target\":null");
218 |     }
219 | 
220 |     match ent.data_hash {
221 |         index::ContentCryptoHash::None => result.push_str(",\"data_hash\":null"),
222 |         index::ContentCryptoHash::Blake3(h) => std::fmt::write(
223 |             &mut result,
224 |             format_args!(
225 |                 ",\"data_hash\":{}",
226 |                 serde_json::to_string(&format!("blake3:{}", hex::easy_encode_to_string(&h)))?
227 |             ),
228 |         )?,
229 |     };
230 |     std::fmt::write(&mut result, format_args!("}}"))?;
231 |     Ok(result)
232 | }
233 | 


--------------------------------------------------------------------------------
/src/fprefetch.rs:
--------------------------------------------------------------------------------
  1 | // fprefetcher is a file opening queue used by the put command.
  2 | // The idea is there is a queue of files you are interested
  3 | // in reading in the near future and it lets the OS know the
  4 | // intention via whatever readahead mechanism your OS provides.
  5 | 
  6 | use std::collections::VecDeque;
  7 | use std::fs::File;
  8 | use std::path::PathBuf;
  9 | 
 10 | cfg_if::cfg_if! {
 11 |     if #[cfg(target_os = "linux")] {
 12 |         use std::os::unix::fs::OpenOptionsExt;
 13 |     }
 14 | }
 15 | 
 16 | cfg_if::cfg_if! {
 17 |     if #[cfg(target_os = "macos")] {
 18 |       // Nothing is needed.
 19 |     } else if #[cfg(target_os = "openbsd")] {
 20 |       // Nothing is needed.
 21 |     } else {
 22 |       use std::os::unix::io::AsRawFd;
 23 |       const NUM_PREFETCHED_BYTES: libc::off_t = 128 * 1024 * 1024;
 24 |     }
 25 | }
 26 | 
 27 | const NUM_PREOPENED_FILES: usize = 1;
 28 | 
 29 | #[derive(Default)]
 30 | pub struct ReadaheadFileOpener {
 31 |     unopened: VecDeque<PathBuf>,
 32 |     opened: VecDeque<(PathBuf, std::io::Result<File>)>,
 33 | }
 34 | 
 35 | fn open_file_for_streaming(fpath: &std::path::Path) -> std::io::Result<File> {
 36 |     cfg_if::cfg_if! {
 37 |         if #[cfg(target_os = "linux")] {
 38 |             // Try with O_NOATIME first; if it fails, e.g. because the user we
 39 |             // run as is not the file owner, retry without..
 40 |             let f = std::fs::OpenOptions::new()
 41 |                 .read(true)
 42 |                 .custom_flags(libc::O_NOATIME)
 43 |                 .open(fpath)
 44 |                 .or_else(|error| {
 45 |                     match error.kind() {
 46 |                         std::io::ErrorKind::PermissionDenied => {
 47 |                             std::fs::OpenOptions::new()
 48 |                                 .read(true)
 49 |                                 .open(fpath)
 50 |                         }
 51 |                         _ => Err(error)
 52 |                     }
 53 |                 })?;
 54 |         } else {
 55 |           let f = std::fs::OpenOptions::new()
 56 |               .read(true)
 57 |               .open(fpath)?;
 58 |         }
 59 |     }
 60 | 
 61 |     cfg_if::cfg_if! {
 62 |         if #[cfg(target_os = "macos")] {
 63 |             // XXX can we do anything here?
 64 |             // Perhaps F_RDADVISE ?
 65 |         } else if #[cfg(target_os = "openbsd")] {
 66 |             // XXX can we do anything here?
 67 |         } else {
 68 |             // We would like to use something like POSIX_FADV_NOREUSE to preserve
 69 |             // the user page cache... this is actually a NOOP on linux.
 70 |             // Instead we can at least boost performance by hinting our access pattern.
 71 |             match nix::fcntl::posix_fadvise(
 72 |                 f.as_raw_fd(),
 73 |                 0,
 74 |                 0,
 75 |                 nix::fcntl::PosixFadviseAdvice::POSIX_FADV_SEQUENTIAL,
 76 |             ) {
 77 |                 Ok(_) => (),
 78 |                 Err(err) => {
 79 |                     return Err(std::io::Error::new(
 80 |                         std::io::ErrorKind::Other,
 81 |                         format!("fadvise POSIX_FADV_SEQUENTIAL failed: {}", err),
 82 |                     ))
 83 |                 }
 84 |             };
 85 | 
 86 |             match nix::fcntl::posix_fadvise(
 87 |                 f.as_raw_fd(),
 88 |                 0,
 89 |                 NUM_PREFETCHED_BYTES,
 90 |                 nix::fcntl::PosixFadviseAdvice::POSIX_FADV_WILLNEED,
 91 |             ) {
 92 |                 Ok(_) => (),
 93 |                 Err(err) => {
 94 |                     return Err(std::io::Error::new(
 95 |                         std::io::ErrorKind::Other,
 96 |                         format!("fadvise POSIX_FADV_WILLNEED failed: {}", err),
 97 |                     ))
 98 |                 }
 99 |             };
100 |         }
101 |     }
102 | 
103 |     Ok(f)
104 | }
105 | 
106 | impl ReadaheadFileOpener {
107 |     pub fn new() -> ReadaheadFileOpener {
108 |         ReadaheadFileOpener {
109 |             unopened: VecDeque::new(),
110 |             opened: VecDeque::new(),
111 |         }
112 |     }
113 | 
114 |     pub fn add_to_queue(&mut self, p: PathBuf) {
115 |         self.unopened.push_back(p);
116 |     }
117 | 
118 |     pub fn next_file(&mut self) -> Option<(PathBuf, std::io::Result<File>)> {
119 |         while !self.unopened.is_empty() && self.opened.len() < NUM_PREOPENED_FILES + 1 {
120 |             let p = self.unopened.pop_front().unwrap();
121 |             let r = open_file_for_streaming(&p);
122 |             self.opened.push_back((p, r))
123 |         }
124 |         self.opened.pop_front()
125 |     }
126 | }
127 | 


--------------------------------------------------------------------------------
/src/hex.rs:
--------------------------------------------------------------------------------
  1 | #[derive(Debug, Eq, PartialEq, thiserror::Error)]
  2 | pub enum HexError {
  3 |     #[error("invalid character in hex input")]
  4 |     InvalidCharacter,
  5 |     #[error("hex padding required")]
  6 |     PaddingRequired,
  7 | }
  8 | 
  9 | #[inline]
 10 | fn from_hex_byte(b: u8) -> Result<u8, HexError> {
 11 |     match b {
 12 |         48..=57 => Ok(b - 48),
 13 |         65..=70 => Ok(b - 65 + 10),
 14 |         97..=102 => Ok(b - 97 + 10),
 15 |         _ => Err(HexError::InvalidCharacter),
 16 |     }
 17 | }
 18 | 
 19 | pub fn decode(from: &[u8], to: &mut [u8]) -> Result<(), HexError> {
 20 |     if from.len() % 2 != 0 {
 21 |         return Err(HexError::PaddingRequired);
 22 |     }
 23 | 
 24 |     assert_eq!(from.len(), to.len() * 2);
 25 | 
 26 |     for i in 0..to.len() {
 27 |         let hi = from[2 * i];
 28 |         let lo = from[2 * i + 1];
 29 |         to[i] = from_hex_byte(hi)? << 4 | from_hex_byte(lo)?;
 30 |     }
 31 |     Ok(())
 32 | }
 33 | 
 34 | pub fn decode_string(from: &str, to: &mut [u8]) -> Result<(), HexError> {
 35 |     decode(from.as_bytes(), to)
 36 | }
 37 | 
 38 | pub fn easy_decode_string(from: &str) -> Result<Vec<u8>, HexError> {
 39 |     let n = from.len() / 2;
 40 |     let mut v = vec![0; n];
 41 |     match decode_string(from, &mut v) {
 42 |         Ok(()) => Ok(v),
 43 |         Err(e) => Err(e),
 44 |     }
 45 | }
 46 | 
 47 | #[inline]
 48 | fn to_hex_bytes(b: u8) -> (u8, u8) {
 49 |     let tab = b"0123456789abcdef";
 50 |     let hi = (b & 0xf0) >> 4;
 51 |     let lo = b & 0x0f;
 52 |     (tab[hi as usize], tab[lo as usize])
 53 | }
 54 | 
 55 | #[inline]
 56 | fn to_hex_chars(b: u8) -> (char, char) {
 57 |     let (hi, lo) = to_hex_bytes(b);
 58 |     (hi as char, lo as char)
 59 | }
 60 | 
 61 | // from.len() MUST be exactly half to.len()
 62 | pub fn encode(from: &[u8], to: &mut [u8]) {
 63 |     assert!(to.len() == 2 * from.len());
 64 | 
 65 |     for i in 0..from.len() {
 66 |         let (hi, lo) = to_hex_bytes(from[i]);
 67 |         to[2 * i] = hi;
 68 |         to[2 * i + 1] = lo;
 69 |     }
 70 | }
 71 | 
 72 | pub fn easy_encode_to_string(from: &[u8]) -> String {
 73 |     let mut s = String::with_capacity(2 * from.len());
 74 |     for b in from {
 75 |         let (hi, lo) = to_hex_chars(*b);
 76 |         s.push(hi);
 77 |         s.push(lo);
 78 |     }
 79 |     s
 80 | }
 81 | 
 82 | #[cfg(test)]
 83 | mod tests {
 84 |     use super::*;
 85 | 
 86 |     #[test]
 87 |     fn test_encode() {
 88 |         let buf: [u8; 8] = [18, 52, 86, 120, 154, 188, 222, 240];
 89 |         let mut encoded: [u8; 16] = [0; 16];
 90 |         encode(&buf, &mut encoded);
 91 |         assert_eq!(std::str::from_utf8(&encoded).unwrap(), "123456789abcdef0");
 92 |     }
 93 | 
 94 |     #[test]
 95 |     fn test_easy_encode_to_string() {
 96 |         let buf: [u8; 8] = [18, 52, 86, 120, 154, 188, 222, 240];
 97 |         assert_eq!(easy_encode_to_string(&buf), "123456789abcdef0");
 98 |     }
 99 | 
100 |     #[test]
101 |     fn test_easy_decode_string() {
102 |         let buf: [u8; 8] = [18, 52, 86, 120, 154, 188, 222, 240];
103 |         assert_eq!(
104 |             easy_decode_string("123456789abcdef0").unwrap().as_slice(),
105 |             &buf[..]
106 |         );
107 |         assert_eq!(
108 |             easy_decode_string("123456789ABCDEF0").unwrap().as_slice(),
109 |             &buf[..]
110 |         );
111 |         assert_eq!(
112 |             easy_decode_string("1234!6789ABCDEF0").unwrap_err(),
113 |             HexError::InvalidCharacter
114 |         );
115 |         assert_eq!(
116 |             easy_decode_string("23456789ABCDEF0").unwrap_err(),
117 |             HexError::PaddingRequired
118 |         );
119 |     }
120 | }
121 | 


--------------------------------------------------------------------------------
/src/ioutil.rs:
--------------------------------------------------------------------------------
  1 | use std::io::{self, BufRead, Read, Write};
  2 | 
  3 | pub struct PipeReader {
  4 |     receiver: crossbeam_channel::Receiver<Vec<u8>>,
  5 |     buffer: Vec<u8>,
  6 |     position: usize,
  7 | }
  8 | 
  9 | pub struct PipeWriter {
 10 |     sender: crossbeam_channel::Sender<Vec<u8>>,
 11 |     buffer: Vec<u8>,
 12 |     size: usize,
 13 | }
 14 | 
 15 | pub fn buffered_pipe(write_buf_sz: usize) -> (PipeReader, PipeWriter) {
 16 |     let (tx, rx) = crossbeam_channel::bounded(0);
 17 |     let write_buf_sz = write_buf_sz.max(1);
 18 |     (
 19 |         PipeReader {
 20 |             receiver: rx,
 21 |             buffer: Vec::new(),
 22 |             position: 0,
 23 |         },
 24 |         PipeWriter {
 25 |             sender: tx,
 26 |             buffer: Vec::with_capacity(write_buf_sz),
 27 |             size: write_buf_sz,
 28 |         },
 29 |     )
 30 | }
 31 | 
 32 | fn epipe() -> io::Error {
 33 |     io::Error::new(io::ErrorKind::BrokenPipe, "pipe closed")
 34 | }
 35 | 
 36 | impl BufRead for PipeReader {
 37 |     fn fill_buf(&mut self) -> io::Result<&[u8]> {
 38 |         if self.position >= self.buffer.len() {
 39 |             if let Ok(data) = self.receiver.recv() {
 40 |                 debug_assert!(!data.is_empty());
 41 |                 self.buffer = data;
 42 |                 self.position = 0;
 43 |             }
 44 |         }
 45 |         Ok(&self.buffer[self.position..])
 46 |     }
 47 | 
 48 |     fn consume(&mut self, amt: usize) {
 49 |         debug_assert!(self.buffer.len() - self.position >= amt);
 50 |         self.position += amt
 51 |     }
 52 | }
 53 | 
 54 | impl Read for PipeReader {
 55 |     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
 56 |         let internal = self.fill_buf()?;
 57 |         let len = std::cmp::min(buf.len(), internal.len());
 58 |         if len > 0 {
 59 |             buf[..len].copy_from_slice(&internal[..len]);
 60 |             self.consume(len);
 61 |         }
 62 |         Ok(len)
 63 |     }
 64 | }
 65 | 
 66 | impl Write for PipeWriter {
 67 |     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
 68 |         let bytes_written = if (buf.len() + self.buffer.len()) > self.buffer.capacity() {
 69 |             self.buffer.capacity() - self.buffer.len()
 70 |         } else {
 71 |             buf.len()
 72 |         };
 73 |         self.buffer.extend_from_slice(&buf[..bytes_written]);
 74 |         if self.buffer.len() == self.buffer.capacity() {
 75 |             self.flush()?;
 76 |         }
 77 |         Ok(bytes_written)
 78 |     }
 79 | 
 80 |     fn flush(&mut self) -> io::Result<()> {
 81 |         if self.buffer.is_empty() {
 82 |             Ok(())
 83 |         } else {
 84 |             let data = std::mem::replace(&mut self.buffer, Vec::with_capacity(self.size));
 85 |             match self.sender.send(data) {
 86 |                 Ok(_) => Ok(()),
 87 |                 Err(_) => Err(epipe()),
 88 |             }
 89 |         }
 90 |     }
 91 | }
 92 | 
 93 | pub struct TeeReader<R, W> {
 94 |     read: R,
 95 |     output: W,
 96 | }
 97 | 
 98 | impl<R, W> TeeReader<R, W> {
 99 |     pub fn new(read: R, output: W) -> Self {
100 |         Self { read, output }
101 |     }
102 | 
103 |     pub fn into_inner(self) -> (R, W) {
104 |         (self.read, self.output)
105 |     }
106 | }
107 | 
108 | impl<R: Read, W: Write> Read for TeeReader<R, W> {
109 |     fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
110 |         let n = self.read.read(buf)?;
111 |         self.output.write_all(&buf[..n])?;
112 |         Ok(n)
113 |     }
114 | }
115 | 
116 | pub fn all_zeros(buf: &[u8]) -> bool {
117 |     // This processes a lot of data so we iterate
118 |     // by 8 where we can and check the remainder byte wise.
119 |     let (prefix, big, suffix) = unsafe { buf.align_to::<u64>() };
120 |     // Check the fastest part first so we can early exit.
121 |     for v in big {
122 |         if *v != 0 {
123 |             return false;
124 |         }
125 |     }
126 |     for v in prefix {
127 |         if *v != 0 {
128 |             return false;
129 |         }
130 |     }
131 |     for v in suffix {
132 |         if *v != 0 {
133 |             return false;
134 |         }
135 |     }
136 |     true
137 | }
138 | 


--------------------------------------------------------------------------------
/src/migrate.rs:
--------------------------------------------------------------------------------
  1 | // This file contains code to perform repository migrations.
  2 | // The code here is often deliberately duplicated and avoids dependencies
  3 | // on other modules so that the upgrade migration code can avoid churn with
  4 | // other changes.
  5 | 
  6 | use super::fstx1;
  7 | use super::fstx2;
  8 | use super::oplog;
  9 | use super::vfs;
 10 | use super::xid;
 11 | use std::collections::HashSet;
 12 | use std::io::BufRead;
 13 | 
 14 | pub fn repo_upgrade_to_5_to_6(repo_fs: &vfs::VFs) -> Result<(), anyhow::Error> {
 15 |     // This upgrade mainly just prevents clients from seeing index entries they
 16 |     // cannot decode... repositories of version 5 and 6 are compatible except
 17 |     // for an additional index entry type.
 18 |     // This upgrade simply increments the schema version.
 19 |     eprintln!("upgrading repository schema from version 5 to version 6...");
 20 | 
 21 |     let mut lock_file = repo_fs.open("repo.lock", vfs::OpenFlags::RDWR)?;
 22 |     eprintln!("getting exclusive repository lock for upgrade...");
 23 |     lock_file.lock(vfs::LockType::Exclusive)?;
 24 | 
 25 |     let mut fstx1 = fstx1::WriteTxn::begin_at(repo_fs)?;
 26 |     let schema_version = fstx1.read_string("meta/schema_version")?;
 27 |     if schema_version != "5" {
 28 |         anyhow::bail!(
 29 |             "unable to upgrade, expected schema version 5, got {}",
 30 |             schema_version
 31 |         )
 32 |     }
 33 |     fstx1.add_write("meta/schema_version", "6".to_string().into_bytes())?;
 34 |     fstx1.commit()?;
 35 |     eprintln!("repository upgrade successful...");
 36 |     drop(lock_file);
 37 |     Ok(())
 38 | }
 39 | 
 40 | pub fn repo_upgrade_to_6_to_7(repo_fs: &vfs::VFs) -> Result<(), anyhow::Error> {
 41 |     // This upgrade adds sparse files and zstd compression.
 42 |     // This upgrade also adds the '.removed' suffix for removed items.
 43 |     eprintln!("upgrading repository schema from version 6 to version 7...");
 44 | 
 45 |     let mut lock_file = repo_fs.open("repo.lock", vfs::OpenFlags::RDWR)?;
 46 |     eprintln!("getting exclusive repository lock for upgrade...");
 47 |     lock_file.lock(vfs::LockType::Exclusive)?;
 48 | 
 49 |     let mut txn = fstx1::WriteTxn::begin_at(repo_fs)?;
 50 | 
 51 |     let mut active_items: HashSet<xid::Xid> = HashSet::new();
 52 |     for item in txn.read_dir("items")? {
 53 |         let id = item.file_name;
 54 |         match xid::Xid::parse(&id) {
 55 |             Ok(id) => {
 56 |                 active_items.insert(id);
 57 |             }
 58 |             Err(_) => anyhow::bail!("unable to parse item id at path items/{}", id),
 59 |         }
 60 |     }
 61 | 
 62 |     let log_file = txn.open("repo.oplog")?;
 63 | 
 64 |     let mut log_file = std::io::BufReader::new(log_file);
 65 | 
 66 |     while !log_file.fill_buf()?.is_empty() {
 67 |         let op = serde_bare::from_reader(&mut log_file)?;
 68 |         if let oplog::LogOp::AddItem((id, md)) = op {
 69 |             if !active_items.contains(&id) {
 70 |                 let serialized_md = serde_bare::to_vec(&md)?;
 71 |                 txn.add_write(&format!("items/{:x}.removed", id), serialized_md)?;
 72 |             }
 73 |         }
 74 |     }
 75 | 
 76 |     let schema_version = txn.read_string("meta/schema_version")?;
 77 |     if schema_version != "6" {
 78 |         anyhow::bail!(
 79 |             "unable to upgrade, expected schema version 6, got {}",
 80 |             schema_version
 81 |         )
 82 |     }
 83 | 
 84 |     txn.add_write("meta/schema_version", "7".to_string().into_bytes())?;
 85 |     txn.commit()?;
 86 | 
 87 |     eprintln!("repository upgrade successful...");
 88 |     std::mem::drop(lock_file);
 89 |     Ok(())
 90 | }
 91 | 
 92 | pub fn repo_upgrade_to_7_to_8(repo_fs: &vfs::VFs) -> Result<(), anyhow::Error> {
 93 |     // This upgrade migrates from fstx1 to fstx2 (WAL mode).
 94 |     eprintln!("upgrading repository schema from version 7 to version 8...");
 95 | 
 96 |     let mut lock_file = repo_fs.open("repo.lock", vfs::OpenFlags::RDWR)?;
 97 |     eprintln!("getting exclusive repository lock for upgrade...");
 98 |     lock_file.lock(vfs::LockType::Exclusive)?;
 99 | 
100 |     // Rollback any failed old style transactions, prepare for new style.
101 |     let mut txn = fstx1::WriteTxn::begin_at(repo_fs)?;
102 |     {
103 |         txn.add_write(fstx2::SEQ_NUM_NAME, vec![0, 0, 0, 0, 0, 0, 0, 0])?;
104 |     }
105 |     txn.commit()?;
106 | 
107 |     // Do upgrade with new style transactions.
108 |     let mut txn = fstx2::WriteTxn::begin_at(repo_fs)?;
109 |     {
110 |         for d in ["data", "wal"] {
111 |             if !txn.file_exists(d)? {
112 |                 txn.add_mkdir(d)?;
113 |             }
114 |         }
115 |         txn.add_write("meta/schema_version", "8".to_string().into_bytes())?;
116 |     }
117 |     txn.commit()?;
118 | 
119 |     eprintln!("repository upgrade successful...");
120 |     std::mem::drop(lock_file);
121 |     Ok(())
122 | }
123 | 


--------------------------------------------------------------------------------
/src/sodium.rs:
--------------------------------------------------------------------------------
1 | #![allow(non_upper_case_globals)]
2 | #![allow(non_camel_case_types)]
3 | #![allow(non_snake_case)]
4 | #![allow(dead_code)]
5 | #![allow(deref_nullptr)] // see https://github.com/rust-lang/rust-bindgen/issues/1651
6 | #![allow(clippy::redundant_static_lifetimes)]
7 | include!("./sodium_bindings_gen.rs");
8 | 


--------------------------------------------------------------------------------
/src/xglobset.rs:
--------------------------------------------------------------------------------
 1 | // EXtended globset functionality
 2 | //
 3 | // Waiting for https://github.com/BurntSushi/ripgrep/pull/2061 to get merged
 4 | 
 5 | use std::ops::Deref;
 6 | use std::{fmt, hash};
 7 | 
 8 | // Escape metacharacters within the given string by surrounding them in
 9 | // brackets. The resulting string will, when compiled into a `Glob`,
10 | // match the input string and nothing else.
11 | pub fn escape(s: &str) -> String {
12 |     let mut escaped = String::with_capacity(s.len());
13 |     for c in s.chars() {
14 |         match c {
15 |             // note that ! does not need escaping because it is only special
16 |             // inside brackets
17 |             '?' | '*' | '[' | ']' => {
18 |                 escaped.push('[');
19 |                 escaped.push(c);
20 |                 escaped.push(']');
21 |             }
22 |             c => {
23 |                 escaped.push(c);
24 |             }
25 |         }
26 |     }
27 |     escaped
28 | }
29 | 
30 | // Newtype wrapper around [globset::GlobMatcher] that adds a few trait implementations we absolutely need
31 | #[derive(Clone, Debug)]
32 | pub struct GlobMatcher(globset::GlobMatcher);
33 | 
34 | impl PartialEq for GlobMatcher {
35 |     fn eq(&self, other: &GlobMatcher) -> bool {
36 |         self.glob() == other.glob()
37 |     }
38 | }
39 | 
40 | impl Eq for GlobMatcher {}
41 | 
42 | impl hash::Hash for GlobMatcher {
43 |     fn hash<H: hash::Hasher>(&self, state: &mut H) {
44 |         self.glob().hash(state);
45 |     }
46 | }
47 | 
48 | impl fmt::Display for GlobMatcher {
49 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
50 |         self.glob().fmt(f)
51 |     }
52 | }
53 | 
54 | /* Conversion traits between the wrapped type and back */
55 | 
56 | impl Deref for GlobMatcher {
57 |     type Target = globset::GlobMatcher;
58 | 
59 |     fn deref(&self) -> &globset::GlobMatcher {
60 |         &self.0
61 |     }
62 | }
63 | 
64 | impl From<GlobMatcher> for globset::GlobMatcher {
65 |     fn from(outer: GlobMatcher) -> Self {
66 |         outer.0
67 |     }
68 | }
69 | 
70 | impl From<globset::GlobMatcher> for GlobMatcher {
71 |     fn from(inner: globset::GlobMatcher) -> Self {
72 |         Self(inner)
73 |     }
74 | }
75 | 


--------------------------------------------------------------------------------
/src/xid.rs:
--------------------------------------------------------------------------------
 1 | use super::crypto;
 2 | use super::hex;
 3 | use serde::{Deserialize, Serialize};
 4 | use std::convert::TryInto;
 5 | use std::fmt;
 6 | 
 7 | pub const XID_SZ: usize = 16;
 8 | 
 9 | #[derive(Serialize, Debug, Deserialize, Default, PartialEq, Eq, Hash, Clone, Copy)]
10 | pub struct Xid {
11 |     pub bytes: [u8; XID_SZ],
12 | }
13 | 
14 | // Convert a slice of xids to a slice of bytes without any copying.
15 | pub fn xids_to_bytes(xids: &[Xid]) -> &[u8] {
16 |     assert!(std::mem::size_of::<Xid>() == XID_SZ);
17 |     let n_bytes = xids.len() * XID_SZ;
18 |     unsafe { std::slice::from_raw_parts(xids.as_ptr() as *const u8, n_bytes) }
19 | }
20 | 
21 | impl Xid {
22 |     pub fn new() -> Self {
23 |         let mut bytes = [0; XID_SZ];
24 |         crypto::randombytes(&mut bytes[..]);
25 |         Xid { bytes }
26 |     }
27 | 
28 |     pub fn parse(s: &str) -> Result<Xid, anyhow::Error> {
29 |         let mut bytes = [0; XID_SZ];
30 |         let s = s.as_bytes();
31 |         if s.len() != 32 {
32 |             anyhow::bail!("invalid id, should be 32 characters long");
33 |         }
34 |         if hex::decode(s, &mut bytes[..]).is_err() {
35 |             anyhow::bail!("invalid id, should be a hex value");
36 |         }
37 |         Ok(Xid { bytes })
38 |     }
39 | 
40 |     pub fn as_hex(&self) -> [u8; XID_SZ * 2] {
41 |         let mut buf = [0; XID_SZ * 2];
42 |         hex::encode(&self.bytes[..], &mut buf[..]);
43 |         buf
44 |     }
45 | 
46 |     pub fn from_slice(s: &[u8]) -> Result<Xid, anyhow::Error> {
47 |         Ok(Xid {
48 |             bytes: s.try_into()?,
49 |         })
50 |     }
51 | }
52 | 
53 | impl fmt::Display for Xid {
54 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
55 |         let h = self.as_hex();
56 |         write!(f, "{}", std::str::from_utf8(&h[..]).unwrap())
57 |     }
58 | }
59 | 
60 | impl fmt::LowerHex for Xid {
61 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
62 |         let h = self.as_hex();
63 |         write!(f, "{}", std::str::from_utf8(&h[..]).unwrap())
64 |     }
65 | }
66 | 
67 | impl rusqlite::types::FromSql for Xid {
68 |     fn column_result(v: rusqlite::types::ValueRef) -> rusqlite::types::FromSqlResult<Self> {
69 |         v.as_blob().map(|b| {
70 |             let mut id = Xid::default();
71 |             id.bytes[..].clone_from_slice(b);
72 |             id
73 |         })
74 |     }
75 | }
76 | 
77 | impl rusqlite::types::ToSql for Xid {
78 |     fn to_sql(&self) -> rusqlite::Result<rusqlite::types::ToSqlOutput> {
79 |         Ok(rusqlite::types::ToSqlOutput::from(&self.bytes[..]))
80 |     }
81 | }
82 | 
83 | #[cfg(test)]
84 | mod tests {
85 |     use super::*;
86 | 
87 |     #[test]
88 |     fn test_default() {
89 |         let u = Xid::default();
90 |         assert_eq!(
91 |             u.to_string(),
92 |             "00000000000000000000000000000000".to_string()
93 |         );
94 | 
95 |         assert_eq!(u, Xid::parse("00000000000000000000000000000000").unwrap(),);
96 |     }
97 | }
98 | 


--------------------------------------------------------------------------------
/src/xtar.rs:
--------------------------------------------------------------------------------
  1 | // EXtended tar functionality.
  2 | 
  3 | use super::index;
  4 | use std::convert::TryInto;
  5 | use std::os::unix::ffi::OsStrExt;
  6 | use std::path::PathBuf;
  7 | 
  8 | fn format_pax_extended_record(key: &[u8], value: &[u8]) -> Vec<u8> {
  9 |     let mut record_len = 3 + key.len() + value.len();
 10 |     let mut record_len_s = format!("{}", record_len);
 11 |     // Whoever designed the pax_ext extended header format was a bit crazy.
 12 |     // We just loop until we have fixpoint record length.
 13 |     loop {
 14 |         if record_len_s.len() + 3 + key.len() + value.len() == record_len {
 15 |             break;
 16 |         }
 17 |         record_len = record_len_s.len() + 3 + key.len() + value.len();
 18 |         record_len_s = format!("{}", record_len);
 19 |     }
 20 | 
 21 |     let mut record = Vec::with_capacity(record_len);
 22 |     record.extend_from_slice(record_len_s.as_bytes());
 23 |     record.extend_from_slice(b" ");
 24 |     record.extend_from_slice(key);
 25 |     record.extend_from_slice(b"=");
 26 |     record.extend_from_slice(value);
 27 |     record.extend_from_slice(b"\n");
 28 |     debug_assert!(record.len() == record_len);
 29 |     record
 30 | }
 31 | 
 32 | pub fn index_entry_to_tarheader(
 33 |     ent: &index::IndexEntry,
 34 |     hard_link: Option<&PathBuf>,
 35 | ) -> Result<Vec<u8>, anyhow::Error> {
 36 |     let mut pax_ext_records = Vec::new();
 37 |     let mut ustar_hdr = tar::Header::new_ustar();
 38 | 
 39 |     let tar_type = match &hard_link {
 40 |         Some(hard_link) => match ent.kind() {
 41 |             index::IndexEntryKind::Other => {
 42 |                 anyhow::bail!(
 43 |                     "index entry {} has an unknown type",
 44 |                     ent.path.to_string_lossy()
 45 |                 )
 46 |             }
 47 |             index::IndexEntryKind::Directory => anyhow::bail!(
 48 |                 "index entry {} is a directory, so can't have a hard link to {}",
 49 |                 ent.path.to_string_lossy(),
 50 |                 hard_link.to_string_lossy(),
 51 |             ),
 52 |             _ => tar::EntryType::Link,
 53 |         },
 54 | 
 55 |         None => match ent.kind() {
 56 |             index::IndexEntryKind::Other => {
 57 |                 anyhow::bail!(
 58 |                     "index entry {} has an unknown type",
 59 |                     ent.path.to_string_lossy()
 60 |                 )
 61 |             }
 62 |             index::IndexEntryKind::Regular => tar::EntryType::Regular,
 63 |             index::IndexEntryKind::Symlink => tar::EntryType::Symlink,
 64 |             index::IndexEntryKind::Char => tar::EntryType::Char,
 65 |             index::IndexEntryKind::Block => tar::EntryType::Block,
 66 |             index::IndexEntryKind::Directory => tar::EntryType::Directory,
 67 |             index::IndexEntryKind::Fifo => tar::EntryType::Fifo,
 68 |         },
 69 |     };
 70 | 
 71 |     ustar_hdr.set_entry_type(tar_type);
 72 |     ustar_hdr.set_mode(ent.mode.0 as u32);
 73 |     ustar_hdr.set_mtime(ent.mtime.0);
 74 |     ustar_hdr.set_uid(ent.uid.0);
 75 |     ustar_hdr.set_gid(ent.gid.0);
 76 |     ustar_hdr.set_size(if hard_link.is_none() { ent.size.0 } else { 0 });
 77 |     ustar_hdr.set_device_major(ent.dev_major.0 as u32)?;
 78 |     ustar_hdr.set_device_minor(ent.dev_minor.0 as u32)?;
 79 | 
 80 |     match ustar_hdr.set_path(&ent.path) {
 81 |         Ok(()) => (),
 82 |         Err(e) => {
 83 |             /* 100 is more than ustar can handle as a path target */
 84 |             if ent.path.as_os_str().len() > 100 {
 85 |                 let path_bytes = ent.path.as_os_str().as_bytes();
 86 |                 let path_record = format_pax_extended_record(b"path", path_bytes);
 87 |                 pax_ext_records.extend_from_slice(&path_record);
 88 |             } else {
 89 |                 return Err(e.into());
 90 |             }
 91 |         }
 92 |     };
 93 | 
 94 |     if matches!(tar_type, tar::EntryType::Symlink | tar::EntryType::Link) {
 95 |         let target = if let Some(ref hard_link) = hard_link {
 96 |             hard_link
 97 |         } else {
 98 |             ent.link_target.as_ref().unwrap()
 99 |         };
100 | 
101 |         match ustar_hdr.set_link_name(target) {
102 |             Ok(()) => (),
103 |             Err(err) => {
104 |                 /* 100 is more than ustar can handle as a link target */
105 |                 if target.as_os_str().len() > 100 {
106 |                     let target_record =
107 |                         format_pax_extended_record(b"linkpath", target.as_os_str().as_bytes());
108 |                     pax_ext_records.extend_from_slice(&target_record);
109 |                 } else {
110 |                     return Err(err.into());
111 |                 }
112 |             }
113 |         }
114 |     }
115 | 
116 |     ustar_hdr.set_cksum();
117 | 
118 |     match &ent.xattrs {
119 |         Some(xattrs) => {
120 |             let mut key_bytes = Vec::with_capacity(24);
121 |             for (k, v) in xattrs.iter() {
122 |                 key_bytes.truncate(0);
123 |                 key_bytes.extend_from_slice(b"SCHILY.xattr.");
124 |                 key_bytes.extend_from_slice(k);
125 |                 pax_ext_records.extend_from_slice(&format_pax_extended_record(&key_bytes, v));
126 |             }
127 |         }
128 |         None => (),
129 |     }
130 | 
131 |     let mut hdr_bytes = Vec::new();
132 | 
133 |     if !pax_ext_records.is_empty() {
134 |         let mut pax_ext_hdr = tar::Header::new_ustar();
135 |         pax_ext_hdr.set_entry_type(tar::EntryType::XHeader);
136 |         pax_ext_hdr.set_size(pax_ext_records.len().try_into().unwrap());
137 |         pax_ext_hdr.set_cksum();
138 |         hdr_bytes.extend_from_slice(&pax_ext_hdr.as_bytes()[..]);
139 |         hdr_bytes.extend_from_slice(&pax_ext_records);
140 |         let remaining = 512 - (hdr_bytes.len() % 512);
141 |         if remaining < 512 {
142 |             let buf = [0; 512];
143 |             hdr_bytes.extend_from_slice(&buf[..remaining]);
144 |         }
145 |         debug_assert!(hdr_bytes.len() % 512 == 0);
146 |     }
147 | 
148 |     hdr_bytes.extend_from_slice(&ustar_hdr.as_bytes()[..]);
149 | 
150 |     Ok(hdr_bytes)
151 | }
152 | 


--------------------------------------------------------------------------------
/support/bindgen.sh:
--------------------------------------------------------------------------------
 1 | set -eux
 2 | 
 3 | bindgen ./csrc/sodium-bindings.h \
 4 |   --whitelist-function "crypto_.*" \
 5 |   --whitelist-type "crypto_.*" \
 6 |   --whitelist-var "crypto_.*" \
 7 |   --whitelist-function "sodium_.*" \
 8 |   --whitelist-var "sodium_.*" \
 9 |   --whitelist-function "randombytes_.*" \
10 |   > ./src/sodium_bindings_gen.rs


--------------------------------------------------------------------------------
/support/builds.sr.ht/debian.yml:
--------------------------------------------------------------------------------
 1 | image: debian/sid
 2 | packages:
 3 |   - cargo
 4 |   - pkg-config
 5 |   - libsodium-dev
 6 |   - sqlite3
 7 |   - uuid-runtime
 8 |   - bats
 9 | sources:
10 |   - https://github.com/andrewchambers/bupstash
11 | tasks:
12 |   - build: |
13 |       cd bupstash
14 |       cargo test
15 |       cargo build --release
16 |       export PATH=$(pwd)/target/release:$PATH
17 |       bats ./cli-tests


--------------------------------------------------------------------------------
/support/builds.sr.ht/freebsd.yml:
--------------------------------------------------------------------------------
 1 | image: freebsd/latest
 2 | packages:
 3 |   - rust
 4 |   - pkgconf
 5 |   - libsodium
 6 |   - sqlite3
 7 |   - python3
 8 |   - gtar
 9 |   - bats-core
10 | sources:
11 |   - https://github.com/andrewchambers/bupstash
12 | tasks:
13 |   - build: |
14 |       cd bupstash
15 |       export PKG_CONFIG=pkgconf
16 |       cargo test
17 |       cargo build --release
18 |       export PATH=$(pwd)/target/release:$PATH
19 |       bats ./cli-tests


--------------------------------------------------------------------------------
/support/builds.sr.ht/openbsd.yml:
--------------------------------------------------------------------------------
 1 | image: openbsd/latest
 2 | packages:
 3 |   - rust
 4 |   - pkgconf
 5 |   - libsodium
 6 |   - sqlite3
 7 |   - python3
 8 |   - gtar
 9 |   - bats
10 | sources:
11 |   - https://github.com/andrewchambers/bupstash
12 | tasks:
13 |   - build: |
14 |       cd bupstash
15 |       export PKG_CONFIG=pkgconf
16 |       cargo test
17 |       cargo build --release
18 |       export PATH=$(pwd)/target/release:$PATH
19 |       bats ./cli-tests


--------------------------------------------------------------------------------
/support/pgo-build.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | 
 3 | set -eux
 4 | 
 5 | cargo clean
 6 | rm -rf ./pgo
 7 | mkdir pgo
 8 | mkdir pgo/data
 9 | 
10 | export BUPSTASH_REPOSITORY="$(pwd)/pgo/repo"
11 | export BUPSTASH_SEND_LOG="$(pwd)/pgo/bupstash.sendlog"
12 | export BUPSTASH_QUERY_CACHE="$(pwd)/pgo/bupstash.querycache"
13 | export BUPSTASH_KEY=$(pwd)/pgo/repo.key
14 | 
15 | RUSTFLAGS="-Cprofile-generate=$(pwd)/pgo/data" \
16 |     cargo build --release
17 | 
18 | ./target/release/bupstash init
19 | ./target/release/bupstash new-key -o ./pgo/repo.key
20 | ./target/release/bupstash put ./target
21 | id=$(./target/release/bupstash put ./target)
22 | ./target/release/bupstash list "id=*" > /dev/null
23 | ./target/release/bupstash get "id=$id" > /dev/null
24 | ./target/release/bupstash rm --allow-many "id=*" > /dev/null
25 | 
26 | llvm-profdata merge -o ./pgo/merged.profdata ./pgo/data
27 | 
28 | RUSTFLAGS="-Cprofile-use=$(pwd)/pgo/merged.profdata" \
29 |     cargo build --release
30 | 


--------------------------------------------------------------------------------
/support/plot-chunk-sizes.gnuplot:
--------------------------------------------------------------------------------
 1 | n=100 #number of intervals
 2 | max=10000000. #max value
 3 | min=0. #min value
 4 | width=(max-min)/n #interval width
 5 | #function used to map a value to the intervals
 6 | hist(x,width)=width*floor(x/width)+width/2.0
 7 | set boxwidth width*0.9
 8 | set style fill solid 0.5 # fill style
 9 | 
10 | #count and plot
11 | set term png 
12 | set out "chunksizes.png"
13 | plot "chunksizes" u (hist($1,width)):(1.0) smooth freq w boxes lc rgb"green" notitle


--------------------------------------------------------------------------------
/support/pre-commit.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | exit_code=0
 3 | 
 4 | cargo fmt --all -- --quiet --check
 5 | if [ $? -ne 0 ]; then
 6 |     echo "Please run 'cargo fmt --all' before committing"
 7 |     exit_code=1
 8 | fi
 9 | 
10 | cargo clippy -- -D warnings 2> /dev/null
11 | if [ $? -ne 0 ]; then
12 |     echo "Please run 'cargo clippy' and fix all issues before committing"
13 |     exit_code=1
14 | fi
15 | 
16 | if [ $exit_code -ne 0 ]; then
17 |     exit $exit_code
18 | fi
19 | 


--------------------------------------------------------------------------------
/support/print-doc-checklist.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | cat <<EOF
 4 | [ ] Documentation
 5 |   [ ] Command man pages
 6 | EOF
 7 | 
 8 | for md in $(echo doc/man/*.1.md)
 9 | do
10 | cat <<EOF
11 |     [ ] $md
12 |       [ ] proof read.
13 |       [ ] flags match equivalent source code file.
14 |       [ ] examples match equivalent doc/cli/* file.
15 |       [ ] doc/cli/* file rendering looks good in terminal.
16 |       [ ] examples are correct and working.
17 |       [ ] passes spell check.
18 |       [ ] No visual anomalies when rendered on website.
19 |       [ ] No visual anomalies when rendered on website with mobile.
20 |       [ ] No visual anomalies when rendered by 'man'.
21 | EOF
22 | done
23 | 


--------------------------------------------------------------------------------
/support/ronn/Gemfile:
--------------------------------------------------------------------------------
1 | source "https://rubygems.org"
2 | 
3 | gem "ronn"
4 | 


--------------------------------------------------------------------------------
/support/ronn/Gemfile.lock:
--------------------------------------------------------------------------------
 1 | GEM
 2 |   remote: https://rubygems.org/
 3 |   specs:
 4 |     hpricot (0.8.6)
 5 |     mustache (1.0.3)
 6 |     rdiscount (2.2.0.1)
 7 |     ronn (0.7.3)
 8 |       hpricot (>= 0.8.2)
 9 |       mustache (>= 0.7.0)
10 |       rdiscount (>= 1.5.8)
11 | 
12 | PLATFORMS
13 |   ruby
14 | 
15 | DEPENDENCIES
16 |   ronn
17 | 
18 | BUNDLED WITH
19 |    2.1.4
20 | 


--------------------------------------------------------------------------------
/support/ronn/default.nix:
--------------------------------------------------------------------------------
 1 | { stdenv, lib, bundlerEnv, bundlerUpdateScript, makeWrapper, groff, callPackage }:
 2 | 
 3 | stdenv.mkDerivation rec {
 4 |   pname = "ronn";
 5 |   version = env.gems.ronn.version;
 6 | 
 7 |   env = bundlerEnv {
 8 |     name = "ronn-gems";
 9 |     gemdir = ./.;
10 |   };
11 | 
12 |   dontUnpack = true;
13 | 
14 |   nativeBuildInputs = [ makeWrapper ];
15 | 
16 |   installPhase = ''
17 |     mkdir -p $out/bin
18 |     makeWrapper ${env}/bin/ronn $out/bin/ronn \
19 |       --set PATH ${groff}/bin
20 |   '';
21 | 
22 |   passthru.updateScript = bundlerUpdateScript "ronn";
23 | 
24 |   passthru.tests.reproducible-html-manpage = callPackage ./test-reproducible-html.nix { };
25 | 
26 |   meta = with lib; {
27 |     description = "markdown-based tool for building manpages";
28 |     homepage = "https://rtomayko.github.io/ronn/";
29 |     license = licenses.mit;
30 |     maintainers = with maintainers; [ zimbatm nicknovitski ];
31 |     platforms = env.ruby.meta.platforms;
32 |   };
33 | }
34 | 


--------------------------------------------------------------------------------
/support/ronn/gemset.nix:
--------------------------------------------------------------------------------
 1 | {
 2 |   hpricot = {
 3 |     source = {
 4 |       remotes = ["https://rubygems.org"];
 5 |       sha256 = "1jn8x9ch79gqmnzgyz78kppavjh5lqx0y0r6frykga2b86rz9s6z";
 6 |       type = "gem";
 7 |     };
 8 |     version = "0.8.6";
 9 |   };
10 |   mustache = {
11 |     source = {
12 |       remotes = ["https://rubygems.org"];
13 |       sha256 = "1v4pdvgvs8gw0zbh5sy3l308amlsjg8sdfrkml0g0m0wwj4x7naf";
14 |       type = "gem";
15 |     };
16 |     version = "1.0.3";
17 |   };
18 |   rdiscount = {
19 |     source = {
20 |       remotes = ["https://rubygems.org"];
21 |       sha256 = "1arvk3k06prxasq1djbj065ixar4zl171340g7wr1ww4gj9makx3";
22 |       type = "gem";
23 |     };
24 |     version = "2.2.0.1";
25 |   };
26 |   ronn = {
27 |     source = {
28 |       remotes = ["https://rubygems.org"];
29 |       sha256 = "07plsxxfx5bxdk72ii9za6km0ziqlq8jh3bicr4774dalga6zpw2";
30 |       type = "gem";
31 |     };
32 |     version = "0.7.3";
33 |   };
34 | }
35 | 


--------------------------------------------------------------------------------
/support/ronn/test-reproducible-html.nix:
--------------------------------------------------------------------------------
 1 | { runCommand
 2 | , diffutils
 3 | , ronn
 4 | }:
 5 | runCommand "ronn-test-reproducible-html" { } ''
 6 |   set -euo pipefail
 7 | 
 8 |   cat > aprog.1.ronn << EOF
 9 |   aprog
10 |   =====
11 | 
12 |   ## AUTHORS
13 | 
14 |   Vincent Haupert <veehaitch@users.noreply.github.com>
15 |   EOF
16 | 
17 |   # We have to repeat the manpage generation a few times to be confident
18 |   # it is in fact reproducible.
19 |   for i in {1..20}; do
20 |     ${ronn}/bin/ronn --html --pipe aprog.1.ronn > aprog.1.html-1
21 |     ${ronn}/bin/ronn --html --pipe aprog.1.ronn > aprog.1.html-2
22 | 
23 |     ${diffutils}/bin/diff -q aprog.1.html-1 aprog.1.html-2 \
24 |       || (printf 'The HTML manpage is not reproducible (round %d)' "$i" && exit 1)
25 |   done
26 | 
27 |   echo 'The HTML manpage appears reproducible'
28 | 
29 |   mkdir $out
30 | ''
31 | 


--------------------------------------------------------------------------------
/support/shell.nix:
--------------------------------------------------------------------------------
 1 | let 
 2 |   pkgs = (import <nixpkgs>) {};
 3 | in
 4 |   pkgs.stdenv.mkDerivation {
 5 |       name = "shell";
 6 |       
 7 |       LIBCLANG_PATH="${pkgs.llvmPackages.libclang}/lib";
 8 | 
 9 |       buildInputs =  with pkgs; [ 
10 |         clang
11 |         clang-tools
12 |         linuxPackages.perf
13 |         llvm
14 |         entr
15 |         minio
16 |         minio-client
17 |         pandoc
18 |         bats
19 |         openssl
20 |         libsodium
21 |         pkg-config
22 |         sqlite
23 |         rust-bindgen
24 |         jq
25 |         (pkgs.callPackage ./ronn {})
26 |         hyperfine
27 |       ];
28 | 
29 |       hardeningDisable = ["all"];
30 |   }
31 | 


--------------------------------------------------------------------------------
/support/src-release.sh:
--------------------------------------------------------------------------------
 1 | set -eux
 2 | 
 3 | version="$1"
 4 | 
 5 | rm -rf ./release/
 6 | mkdir ./release
 7 | mkdir release/src
 8 | mkdir release/src/.cargo
 9 | git archive $version | tar -C release/src -x -f -
10 | cd release/src
11 | 
12 | cargo vendor > .cargo/config
13 | 
14 | tar -cvf - . | gzip -9 > ../../bupstash-$1-src+deps.tar.gz
15 | gpg -a --sign --detach-sig --default-key ac@bupstash.io ../../bupstash-$1-src+deps.tar.gz
16 | 
17 | cd ..
18 | mkdir man
19 | cd man
20 | cp ../src/doc/man/*.md ./
21 | ronn -r *.md
22 | rm *.md
23 | 
24 | tar -cvf - . | gzip -9 > ../../bupstash-$1-man.tar.gz
25 | gpg -a --sign --detach-sig --default-key ac@bupstash.io ../../bupstash-$1-man.tar.gz


--------------------------------------------------------------------------------