├── .github └── workflows │ └── rust.yml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── benchmark.sh ├── src ├── lib.rs └── main.rs └── tests ├── cli.rs └── cwd.rs /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: [push, pull_request] 4 | 5 | # See https://rust-cli.github.io/book/tutorial/packaging.html#building-binary-releases-on-ci and 6 | # https://github.com/sharkdp/bat/blob/master/.github/workflows/CICD.yml for a more intricate CI/CD 7 | 8 | jobs: 9 | CI: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v1 13 | - uses: actions-rs/clippy-check@v1 14 | with: 15 | token: ${{ secrets.GITHUB_TOKEN }} 16 | args: --locked --all-targets --all-features 17 | - name: Check 18 | run: cargo check --locked --all-targets --verbose 19 | - name: Check (features=debug) 20 | run: cargo check --locked --all-targets --verbose --features debug 21 | - name: Check Documentation 22 | env: 23 | RUSTDOCFLAGS: -D warnings 24 | run: cargo doc --locked --no-deps --document-private-items 25 | - name: Tests 26 | run: cargo test --locked --verbose 27 | - name: Tests (features=debug) 28 | run: cargo test --locked --verbose --features debug 29 | - name: Benchmark 30 | run: | 31 | cargo build --locked --release 32 | ./benchmark.sh --bkt=target/release/bkt -- --ttl=1m -- sleep 1 33 | 34 | CD: 35 | needs: CI 36 | name: CD (${{ matrix.arch.target }} - ${{ matrix.arch.os }}) 37 | runs-on: ${{ matrix.arch.os }} 38 | strategy: 39 | fail-fast: false 40 | matrix: 41 | arch: 42 | - { target: aarch64-unknown-linux-gnu , os: ubuntu-20.04 , use-cross: true } 43 | - { target: arm-unknown-linux-gnueabihf , os: ubuntu-20.04 , use-cross: true } 44 | - { target: i686-unknown-linux-gnu , os: ubuntu-20.04 , use-cross: true } 45 | - { target: i686-unknown-linux-musl , os: ubuntu-20.04 , use-cross: true } 46 | # Note x86 does not appear to be supported past macos-13: https://github.com/actions/runner-images/issues/9741 47 | - { target: x86_64-apple-darwin , os: macos-13 } 48 | - { target: aarch64-apple-darwin , os: macos-14 } 49 | - { target: x86_64-pc-windows-msvc , os: windows-2019 , suffix: .exe } 50 | - { target: x86_64-unknown-linux-gnu , os: ubuntu-20.04 } 51 | - { target: x86_64-unknown-linux-musl , os: ubuntu-20.04 , use-cross: true } 52 | steps: 53 | - uses: actions/checkout@v3 54 | - name: Extract crate information 55 | shell: bash 56 | run: | 57 | echo "PROJECT_NAME=$(sed -n 's/^name = "\(.*\)"/\1/p' Cargo.toml)" >> "$GITHUB_ENV" 58 | echo "PROJECT_VERSION=$(sed -n 's/^version = "\(.*\)"/\1/p' Cargo.toml | head -n1)" >> "$GITHUB_ENV" 59 | - name: Build 60 | uses: actions-rs/cargo@v1 61 | with: 62 | use-cross: ${{ matrix.arch.use-cross }} 63 | command: build 64 | args: --locked --release --target=${{ matrix.arch.target }} 65 | 66 | - name: Upload package artifact 67 | uses: actions/upload-artifact@master 68 | with: 69 | name: '${{ env.PROJECT_NAME }}.v${{ env.PROJECT_VERSION }}.${{ matrix.arch.target }}' 70 | path: 'target/${{ matrix.arch.target }}/release/${{ env.PROJECT_NAME }}${{ matrix.arch.suffix }}' 71 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "aho-corasick" 7 | version = "1.1.2" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" 10 | dependencies = [ 11 | "memchr", 12 | ] 13 | 14 | [[package]] 15 | name = "anstyle" 16 | version = "1.0.4" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87" 19 | 20 | [[package]] 21 | name = "anyhow" 22 | version = "1.0.79" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca" 25 | 26 | [[package]] 27 | name = "base64" 28 | version = "0.21.7" 29 | source = "registry+https://github.com/rust-lang/crates.io-index" 30 | checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" 31 | 32 | [[package]] 33 | name = "bincode" 34 | version = "1.3.3" 35 | source = "registry+https://github.com/rust-lang/crates.io-index" 36 | checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" 37 | dependencies = [ 38 | "serde", 39 | ] 40 | 41 | [[package]] 42 | name = "bitflags" 43 | version = "1.3.2" 44 | source = "registry+https://github.com/rust-lang/crates.io-index" 45 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 46 | 47 | [[package]] 48 | name = "bitflags" 49 | version = "2.4.2" 50 | source = "registry+https://github.com/rust-lang/crates.io-index" 51 | checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" 52 | 53 | [[package]] 54 | name = "bkt" 55 | version = "0.8.2" 56 | dependencies = [ 57 | "anyhow", 58 | "base64", 59 | "bincode", 60 | "clap", 61 | "filetime", 62 | "humantime", 63 | "libc", 64 | "rand", 65 | "regex", 66 | "serde", 67 | "serde_json", 68 | "test_dir", 69 | ] 70 | 71 | [[package]] 72 | name = "cfg-if" 73 | version = "1.0.0" 74 | source = "registry+https://github.com/rust-lang/crates.io-index" 75 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 76 | 77 | [[package]] 78 | name = "clap" 79 | version = "4.4.18" 80 | source = "registry+https://github.com/rust-lang/crates.io-index" 81 | checksum = "1e578d6ec4194633722ccf9544794b71b1385c3c027efe0c55db226fc880865c" 82 | dependencies = [ 83 | "clap_builder", 84 | "clap_derive", 85 | ] 86 | 87 | [[package]] 88 | name = "clap_builder" 89 | version = "4.4.18" 90 | source = "registry+https://github.com/rust-lang/crates.io-index" 91 | checksum = "4df4df40ec50c46000231c914968278b1eb05098cf8f1b3a518a95030e71d1c7" 92 | dependencies = [ 93 | "anstyle", 94 | "clap_lex", 95 | "terminal_size", 96 | ] 97 | 98 | [[package]] 99 | name = "clap_derive" 100 | version = "4.4.7" 101 | source = "registry+https://github.com/rust-lang/crates.io-index" 102 | checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442" 103 | dependencies = [ 104 | "heck", 105 | "proc-macro2", 106 | "quote", 107 | "syn", 108 | ] 109 | 110 | [[package]] 111 | name = "clap_lex" 112 | version = "0.6.0" 113 | source = "registry+https://github.com/rust-lang/crates.io-index" 114 | checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" 115 | 116 | [[package]] 117 | name = "errno" 118 | version = "0.3.8" 119 | source = "registry+https://github.com/rust-lang/crates.io-index" 120 | checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" 121 | dependencies = [ 122 | "libc", 123 | "windows-sys 0.52.0", 124 | ] 125 | 126 | [[package]] 127 | name = "filetime" 128 | version = "0.2.23" 129 | source = "registry+https://github.com/rust-lang/crates.io-index" 130 | checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd" 131 | dependencies = [ 132 | "cfg-if", 133 | "libc", 134 | "redox_syscall", 135 | "windows-sys 0.52.0", 136 | ] 137 | 138 | [[package]] 139 | name = "getrandom" 140 | version = "0.2.12" 141 | source = "registry+https://github.com/rust-lang/crates.io-index" 142 | checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" 143 | dependencies = [ 144 | "cfg-if", 145 | "libc", 146 | "wasi", 147 | ] 148 | 149 | [[package]] 150 | name = "heck" 151 | version = "0.4.1" 152 | source = "registry+https://github.com/rust-lang/crates.io-index" 153 | checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" 154 | 155 | [[package]] 156 | name = "humantime" 157 | version = "2.1.0" 158 | source = "registry+https://github.com/rust-lang/crates.io-index" 159 | checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" 160 | 161 | [[package]] 162 | name = "itoa" 163 | version = "1.0.10" 164 | source = "registry+https://github.com/rust-lang/crates.io-index" 165 | checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" 166 | 167 | [[package]] 168 | name = "libc" 169 | version = "0.2.152" 170 | source = "registry+https://github.com/rust-lang/crates.io-index" 171 | checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" 172 | 173 | [[package]] 174 | name = "linux-raw-sys" 175 | version = "0.4.13" 176 | source = "registry+https://github.com/rust-lang/crates.io-index" 177 | checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" 178 | 179 | [[package]] 180 | name = "memchr" 181 | version = "2.7.1" 182 | source = "registry+https://github.com/rust-lang/crates.io-index" 183 | checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" 184 | 185 | [[package]] 186 | name = "ppv-lite86" 187 | version = "0.2.17" 188 | source = "registry+https://github.com/rust-lang/crates.io-index" 189 | checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" 190 | 191 | [[package]] 192 | name = "proc-macro2" 193 | version = "1.0.78" 194 | source = "registry+https://github.com/rust-lang/crates.io-index" 195 | checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" 196 | dependencies = [ 197 | "unicode-ident", 198 | ] 199 | 200 | [[package]] 201 | name = "quote" 202 | version = "1.0.35" 203 | source = "registry+https://github.com/rust-lang/crates.io-index" 204 | checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" 205 | dependencies = [ 206 | "proc-macro2", 207 | ] 208 | 209 | [[package]] 210 | name = "rand" 211 | version = "0.8.5" 212 | source = "registry+https://github.com/rust-lang/crates.io-index" 213 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 214 | dependencies = [ 215 | "libc", 216 | "rand_chacha", 217 | "rand_core", 218 | ] 219 | 220 | [[package]] 221 | name = "rand_chacha" 222 | version = "0.3.1" 223 | source = "registry+https://github.com/rust-lang/crates.io-index" 224 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 225 | dependencies = [ 226 | "ppv-lite86", 227 | "rand_core", 228 | ] 229 | 230 | [[package]] 231 | name = "rand_core" 232 | version = "0.6.4" 233 | source = "registry+https://github.com/rust-lang/crates.io-index" 234 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 235 | dependencies = [ 236 | "getrandom", 237 | ] 238 | 239 | [[package]] 240 | name = "redox_syscall" 241 | version = "0.4.1" 242 | source = "registry+https://github.com/rust-lang/crates.io-index" 243 | checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" 244 | dependencies = [ 245 | "bitflags 1.3.2", 246 | ] 247 | 248 | [[package]] 249 | name = "regex" 250 | version = "1.10.3" 251 | source = "registry+https://github.com/rust-lang/crates.io-index" 252 | checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" 253 | dependencies = [ 254 | "aho-corasick", 255 | "memchr", 256 | "regex-automata", 257 | "regex-syntax", 258 | ] 259 | 260 | [[package]] 261 | name = "regex-automata" 262 | version = "0.4.4" 263 | source = "registry+https://github.com/rust-lang/crates.io-index" 264 | checksum = "3b7fa1134405e2ec9353fd416b17f8dacd46c473d7d3fd1cf202706a14eb792a" 265 | dependencies = [ 266 | "aho-corasick", 267 | "memchr", 268 | "regex-syntax", 269 | ] 270 | 271 | [[package]] 272 | name = "regex-syntax" 273 | version = "0.8.2" 274 | source = "registry+https://github.com/rust-lang/crates.io-index" 275 | checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" 276 | 277 | [[package]] 278 | name = "rustix" 279 | version = "0.38.30" 280 | source = "registry+https://github.com/rust-lang/crates.io-index" 281 | checksum = "322394588aaf33c24007e8bb3238ee3e4c5c09c084ab32bc73890b99ff326bca" 282 | dependencies = [ 283 | "bitflags 2.4.2", 284 | "errno", 285 | "libc", 286 | "linux-raw-sys", 287 | "windows-sys 0.52.0", 288 | ] 289 | 290 | [[package]] 291 | name = "ryu" 292 | version = "1.0.16" 293 | source = "registry+https://github.com/rust-lang/crates.io-index" 294 | checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" 295 | 296 | [[package]] 297 | name = "serde" 298 | version = "1.0.195" 299 | source = "registry+https://github.com/rust-lang/crates.io-index" 300 | checksum = "63261df402c67811e9ac6def069e4786148c4563f4b50fd4bf30aa370d626b02" 301 | dependencies = [ 302 | "serde_derive", 303 | ] 304 | 305 | [[package]] 306 | name = "serde_derive" 307 | version = "1.0.195" 308 | source = "registry+https://github.com/rust-lang/crates.io-index" 309 | checksum = "46fe8f8603d81ba86327b23a2e9cdf49e1255fb94a4c5f297f6ee0547178ea2c" 310 | dependencies = [ 311 | "proc-macro2", 312 | "quote", 313 | "syn", 314 | ] 315 | 316 | [[package]] 317 | name = "serde_json" 318 | version = "1.0.111" 319 | source = "registry+https://github.com/rust-lang/crates.io-index" 320 | checksum = "176e46fa42316f18edd598015a5166857fc835ec732f5215eac6b7bdbf0a84f4" 321 | dependencies = [ 322 | "itoa", 323 | "ryu", 324 | "serde", 325 | ] 326 | 327 | [[package]] 328 | name = "syn" 329 | version = "2.0.48" 330 | source = "registry+https://github.com/rust-lang/crates.io-index" 331 | checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" 332 | dependencies = [ 333 | "proc-macro2", 334 | "quote", 335 | "unicode-ident", 336 | ] 337 | 338 | [[package]] 339 | name = "terminal_size" 340 | version = "0.3.0" 341 | source = "registry+https://github.com/rust-lang/crates.io-index" 342 | checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7" 343 | dependencies = [ 344 | "rustix", 345 | "windows-sys 0.48.0", 346 | ] 347 | 348 | [[package]] 349 | name = "test_dir" 350 | version = "0.2.0" 351 | source = "registry+https://github.com/rust-lang/crates.io-index" 352 | checksum = "1fc19daf9fc57fadcf740c4abaaa0cd08d9ce22a2a0629aaf6cbd9ae4b80683a" 353 | dependencies = [ 354 | "rand", 355 | ] 356 | 357 | [[package]] 358 | name = "unicode-ident" 359 | version = "1.0.12" 360 | source = "registry+https://github.com/rust-lang/crates.io-index" 361 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" 362 | 363 | [[package]] 364 | name = "wasi" 365 | version = "0.11.0+wasi-snapshot-preview1" 366 | source = "registry+https://github.com/rust-lang/crates.io-index" 367 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 368 | 369 | [[package]] 370 | name = "windows-sys" 371 | version = "0.48.0" 372 | source = "registry+https://github.com/rust-lang/crates.io-index" 373 | checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" 374 | dependencies = [ 375 | "windows-targets 0.48.5", 376 | ] 377 | 378 | [[package]] 379 | name = "windows-sys" 380 | version = "0.52.0" 381 | source = "registry+https://github.com/rust-lang/crates.io-index" 382 | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" 383 | dependencies = [ 384 | "windows-targets 0.52.0", 385 | ] 386 | 387 | [[package]] 388 | name = "windows-targets" 389 | version = "0.48.5" 390 | source = "registry+https://github.com/rust-lang/crates.io-index" 391 | checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" 392 | dependencies = [ 393 | "windows_aarch64_gnullvm 0.48.5", 394 | "windows_aarch64_msvc 0.48.5", 395 | "windows_i686_gnu 0.48.5", 396 | "windows_i686_msvc 0.48.5", 397 | "windows_x86_64_gnu 0.48.5", 398 | "windows_x86_64_gnullvm 0.48.5", 399 | "windows_x86_64_msvc 0.48.5", 400 | ] 401 | 402 | [[package]] 403 | name = "windows-targets" 404 | version = "0.52.0" 405 | source = "registry+https://github.com/rust-lang/crates.io-index" 406 | checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" 407 | dependencies = [ 408 | "windows_aarch64_gnullvm 0.52.0", 409 | "windows_aarch64_msvc 0.52.0", 410 | "windows_i686_gnu 0.52.0", 411 | "windows_i686_msvc 0.52.0", 412 | "windows_x86_64_gnu 0.52.0", 413 | "windows_x86_64_gnullvm 0.52.0", 414 | "windows_x86_64_msvc 0.52.0", 415 | ] 416 | 417 | [[package]] 418 | name = "windows_aarch64_gnullvm" 419 | version = "0.48.5" 420 | source = "registry+https://github.com/rust-lang/crates.io-index" 421 | checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" 422 | 423 | [[package]] 424 | name = "windows_aarch64_gnullvm" 425 | version = "0.52.0" 426 | source = "registry+https://github.com/rust-lang/crates.io-index" 427 | checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" 428 | 429 | [[package]] 430 | name = "windows_aarch64_msvc" 431 | version = "0.48.5" 432 | source = "registry+https://github.com/rust-lang/crates.io-index" 433 | checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" 434 | 435 | [[package]] 436 | name = "windows_aarch64_msvc" 437 | version = "0.52.0" 438 | source = "registry+https://github.com/rust-lang/crates.io-index" 439 | checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" 440 | 441 | [[package]] 442 | name = "windows_i686_gnu" 443 | version = "0.48.5" 444 | source = "registry+https://github.com/rust-lang/crates.io-index" 445 | checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" 446 | 447 | [[package]] 448 | name = "windows_i686_gnu" 449 | version = "0.52.0" 450 | source = "registry+https://github.com/rust-lang/crates.io-index" 451 | checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" 452 | 453 | [[package]] 454 | name = "windows_i686_msvc" 455 | version = "0.48.5" 456 | source = "registry+https://github.com/rust-lang/crates.io-index" 457 | checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" 458 | 459 | [[package]] 460 | name = "windows_i686_msvc" 461 | version = "0.52.0" 462 | source = "registry+https://github.com/rust-lang/crates.io-index" 463 | checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" 464 | 465 | [[package]] 466 | name = "windows_x86_64_gnu" 467 | version = "0.48.5" 468 | source = "registry+https://github.com/rust-lang/crates.io-index" 469 | checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" 470 | 471 | [[package]] 472 | name = "windows_x86_64_gnu" 473 | version = "0.52.0" 474 | source = "registry+https://github.com/rust-lang/crates.io-index" 475 | checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" 476 | 477 | [[package]] 478 | name = "windows_x86_64_gnullvm" 479 | version = "0.48.5" 480 | source = "registry+https://github.com/rust-lang/crates.io-index" 481 | checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" 482 | 483 | [[package]] 484 | name = "windows_x86_64_gnullvm" 485 | version = "0.52.0" 486 | source = "registry+https://github.com/rust-lang/crates.io-index" 487 | checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" 488 | 489 | [[package]] 490 | name = "windows_x86_64_msvc" 491 | version = "0.48.5" 492 | source = "registry+https://github.com/rust-lang/crates.io-index" 493 | checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" 494 | 495 | [[package]] 496 | name = "windows_x86_64_msvc" 497 | version = "0.52.0" 498 | source = "registry+https://github.com/rust-lang/crates.io-index" 499 | checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" 500 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "bkt" 3 | # When updating the version number tag the commit in git 4 | # https://rust-lang.github.io/api-guidelines/documentation.html#release-notes-document-all-significant-changes-c-relnotes 5 | version = "0.8.2" 6 | authors = ["Michael Diamond "] 7 | description = "CLI and Rust library for caching subprocess invocations" 8 | repository = "https://github.com/dimo414/bkt" 9 | homepage = "https://bkt.rs/" 10 | license = "MIT" 11 | keywords = ["cache", "caching", "subprocess", "cli", "shell"] 12 | categories = ["caching", "command-line-utilities"] 13 | edition = "2018" 14 | include = [ 15 | "**/*.rs", 16 | "Cargo.*", 17 | "README.md", 18 | "LICENSE", 19 | ] 20 | 21 | [features] 22 | # Uses JSON to cache the command and output in a human-readable format, to aid debugging. 23 | debug = ['serde_json'] 24 | 25 | [dependencies] 26 | anyhow = "1.0" 27 | bincode = "1.3.1" 28 | humantime = "2.1.0" 29 | rand = "0.8" 30 | serde = { version = "1.0", features = ["derive"] } 31 | base64 = "0.21.5" 32 | libc = "0.2" 33 | 34 | [dependencies.clap] 35 | version = "4.2" 36 | default-features = false 37 | features = ["derive", "env", "error-context", "help", "std", "usage", "wrap_help"] 38 | 39 | [dependencies.serde_json] 40 | optional = true 41 | version = "1.0" 42 | 43 | [dev-dependencies] 44 | filetime = "0.2" 45 | regex = "1.7" 46 | test_dir = "0.2" 47 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Michael Diamond 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # `bkt` 2 | 3 | [![releases](https://img.shields.io/github/v/release/dimo414/bkt?sort=semver&logo=github)](https://github.com/dimo414/bkt/releases) 4 | [![crates.io](https://img.shields.io/crates/v/bkt?logo=rust)](https://crates.io/crates/bkt) 5 | [![docs.rs](https://img.shields.io/docsrs/bkt?label=docs.rs)](https://docs.rs/bkt) 6 | [![build status](https://img.shields.io/github/actions/workflow/status/dimo414/bkt/rust.yml?branch=master)](https://github.com/dimo414/bkt/actions) 7 | [![issues](https://img.shields.io/github/issues/dimo414/bkt)](https://github.com/dimo414/bkt/issues) 8 | [![license](https://img.shields.io/github/license/dimo414/bkt)](https://github.com/dimo414/bkt/blob/master/LICENSE) 9 | 10 | `bkt` (pronounced "bucket") is a subprocess caching utility written in Rust, 11 | inspired by [bash-cache](https://github.com/dimo414/bash-cache). 12 | Wrapping expensive process invocations with `bkt` allows callers to reuse recent 13 | invocations without complicating their application logic. This can be useful in 14 | shell prompts, interactive applications such as [`fzf`](#fzf), and long-running 15 | programs that poll other processes. 16 | 17 | `bkt` is available as a standalone binary as well as a 18 | [Rust library](https://crates.io/crates/bkt). See https://docs.rs/bkt/ for 19 | library documentation. This README covers the `bkt` binary. 20 | 21 | ## Installation 22 | 23 | Run `cargo install bkt` to compile and install `bkt` locally. You will need to 24 | [install `cargo`](https://doc.rust-lang.org/cargo/getting-started/installation.html) 25 | if it's not already on your system. 26 | 27 | Pre-compiled binaries for common platforms are attached to each 28 | [release](https://github.com/dimo414/bkt/releases) (starting with 0.5). Please 29 | open an issue or send a PR if you would like releases to include binaries for 30 | additional platforms. 31 | 32 | Package manager support is being tracked 33 | [here](https://github.com/dimo414/bkt/issues/12); volunteers are welcome. 34 | 35 | [![Packaging status](https://repology.org/badge/vertical-allrepos/bkt.svg?columns=3)](https://repology.org/project/bkt/versions) 36 | 37 | ## Usage 38 | 39 | ``` 40 | bkt --ttl=DURATION [--stale=DURATION] [--cwd] [--env=ENV ...] [--modtime=FILE ...] [--scope=SCOPE] [--discard-failures] [--warm|--force] -- ... 41 | ``` 42 | 43 | `bkt` is easy to start using - simply prefix the command you intend to cache 44 | with `bkt --ttl=[some duration] --`, for example: 45 | 46 | ```shell 47 | # Execute and cache an invocation of 'date +%s.%N' 48 | $ bkt --ttl=1m -- date +%s.%N 49 | 1631992417.080884000 50 | 51 | # A subsequent invocation reuses the same cached output 52 | $ bkt --ttl=1m -- date +%s.%N 53 | 1631992417.080884000 54 | ``` 55 | 56 | When `bkt` is passed a command it hasn't seen before (or recently) it executes 57 | the command synchronously and caches its stdout, stderr, and exit code. Calling 58 | `bkt` again with the same command reads the data from the cache and outputs it 59 | as if the command had been run again. 60 | 61 | ### Cache Lifespan 62 | 63 | Two flags, `--ttl` and `--stale`, configure how long cached data is preserved. 64 | The TTL (Time to Live) specifies how long cached data will be used. Once the 65 | TTL expires the cached data will be discarded and the backing command re-run. 66 | A TTL can also be configured by setting a `BKT_TTL` environment variable. 67 | 68 | When the data expires `bkt` has to re-execute the command synchronously, which 69 | can introduce unexpected slowness. To avoid this, pass `--stale` with a shorter 70 | duration than the TTL. When the cached data is older than the stale threshold 71 | this causes `bkt` to refresh the cache in the background while still promptly 72 | returning the cached data. 73 | 74 | Both flags (and `BKT_TTL`) accept duration strings such as `10s` or 75 | `1hour 30min`. The exact syntax is defined in the 76 | [humantime](https://docs.rs/humantime/2.1.0/humantime/fn.parse_duration.html) 77 | library. 78 | 79 | ### Execution Environment 80 | 81 | Some commands' behavior depends on more than just the command line arguments. 82 | It's possible to adjust how `bkt` caches such commands so that unrelated 83 | invocations are cached separately. 84 | 85 | #### Working Directory 86 | 87 | For example, attempting to cache `pwd` will not work as expected by default: 88 | 89 | ```shell 90 | $ $ bkt --ttl=1m -- pwd 91 | /tmp/foo 92 | 93 | $ cd ../bar 94 | 95 | # Cached output for 'pwd' is reused even though the directory has changed 96 | $ bkt --ttl=1m -- pwd 97 | /tmp/foo 98 | ``` 99 | 100 | To have `bkt` key off the current working directory in addition to the command 101 | line arguments pass `--cwd`: 102 | 103 | ```shell 104 | $ bkt --cwd --ttl=1m -- pwd 105 | /tmp/foo 106 | 107 | $ cd ../bar 108 | 109 | $ bkt --cwd --ttl=1m -- pwd 110 | /tmp/bar 111 | ``` 112 | 113 | #### Environment Variables 114 | 115 | Similarly, to specify one or more environment variables as relevant for the 116 | command being cached use `--env`, such as `--env=LANG`. This flag can be 117 | provided multiple times to key off additional variables. Invocations with 118 | different values for any of the given variables will be cached separately. 119 | 120 | #### File Modifications 121 | 122 | `bkt` can also check the last-modified time of one or more files and include 123 | this in the cache key using `--modtime`. For instance passing 124 | `--modtime=/etc/passwd` would cause the backing command to be re-executed any 125 | time `/etc/passwd` is modified even if the TTL has not expired. 126 | 127 | ### Refreshing Manually 128 | 129 | It's also possible to trigger refreshes manually using `--force` or `--warm`. 130 | The former behaves exactly as if the cached data was not found, executing the 131 | process and caching the result. This is useful if you know the cached data 132 | is no longer up-to-date, e.g. because something external changed. 133 | 134 | Alternatively, it can be useful to refresh the cache asynchronously, which 135 | `--warm` provides. This triggers a refresh in the background but immediately 136 | ends the current process with no output. This is useful if you expect 137 | additional invocations in the near future and want to ensure they get a cache 138 | hit. Note that until the warming process completes concurrent calls may still 139 | see a cache miss and trigger their own invocation. 140 | 141 | ### Setting a Cache Scope 142 | 143 | Cached data is persisted to disk (but see [below](#cache_dir)), and is 144 | available to any process that invokes `bkt`. Generally this is desirable, but 145 | certain usages may want to isolate their invocations from other potential 146 | concurrent calls. 147 | 148 | To do so pass `--scope=...` with a sufficiently unique argument, such as a fixed 149 | label for the calling program, the current process ID, or a timestamp. 150 | 151 | ```shell 152 | $ bkt --ttl=1m -- date +%s.%N 153 | 1631992417.080884000 154 | 155 | # Changing the scope causes the command to be cached separately 156 | $ bkt --scope=foo --ttl=1m -- date +%s.%N 157 | 1631992418.010562000 158 | ``` 159 | 160 | Alternatively, define a `BKT_SCOPE` environment variable to configure a 161 | consistent scope across invocations. This can be useful within a script to 162 | ensure all commands share a scope. 163 | 164 | ```shell 165 | #!/bin/bash 166 | 167 | # Set a unique scope for this script invocation using the PID and current time 168 | export BKT_SCOPE="my_script_$$_$(date -Ins)" 169 | ``` 170 | 171 | ### Discarding Failed Invocations 172 | 173 | By default, all invocations are cached regardless of their output or exit code. 174 | In situations where failures should not be cached pass `--discard-failures` to 175 | only persist successful invocations (those that return a `0` exit code). 176 | 177 | **WARNING:** Passing this flag can cause the backing command to be invoked more 178 | frequently than the `--ttl` would suggest, which in turn can create unexpected 179 | load. If the backing command is failing due to an outage or bug (such as an 180 | overloaded website) triggering additional calls can exacerbate the issue and 181 | effectively DDoS the hampered system. It is generally safer *not* to set this 182 | flag and instead make the client robust to occasional failures. 183 | 184 | 185 | ### Changing the Cache Directory 186 | 187 | By default, cached data is stored under your system's temporary directory 188 | (typically `/tmp` on Linux). 189 | 190 | You may want to use a different location for certain commands, for instance to 191 | be able to easily delete the cached data as soon as it's no longer needed. You 192 | can specify a custom cache directory via the `--cache-dir` flag or by defining 193 | a `BKT_CACHE_DIR` environment variable. 194 | 195 | Note that the choice of directory can affect `bkt`'s performance: if the cache 196 | directory is on a [`tmpfs`](https://en.wikipedia.org/wiki/Tmpfs) or solid-state 197 | partition it will be significantly faster than one using a spinning disk. 198 | 199 | If your system's temporary directory is not a good choice for the default cache 200 | location (e.g. it is not a `tmpfs`) you can specify a different location by 201 | defining a `BKT_TMPDIR` environment variable (for example in your `.bashrc`). 202 | These two environment variables, `BKT_TMPDIR` and `BKT_CACHE_DIR`, have similar 203 | effects but `BKT_TMPDIR` should be used to configure the system-wide default, 204 | and `--cache-dir`/`BKT_CACHE_DIR` used to override it. 205 | 206 | `bkt` periodically prunes stale data from its cache, but it also assumes the 207 | operating system will empty its temporary storage from time to time (for `/tmp` 208 | this typically happens on reboot). If you opt to use a directory that the 209 | system does not maintain, such as `~/.cache`, you may want to manually delete 210 | the cache directory on occasion, such as when upgrading `bkt`. 211 | 212 | ## Security and Privacy 213 | 214 | The default cache directory is potentially world-readable. On Unix the cache 215 | directory is created with `700` permissions, meaning only the current user can 216 | access it, but this is not foolproof. 217 | 218 | You can customize the cache directory (see [above](#cache_dir)) to a location 219 | you trust such as `~/.cache`, but note that your home directory may be slower than 220 | the temporary directory selected by default. 221 | 222 | In general, if you are not the only user of your system it's wise to configure 223 | your `TMPDIR` to a location only you can access. If that is not possible use 224 | `BKT_TMPDIR` to configure a custom temporary directory specifically for `bkt`. 225 | 226 | ## Patterns and Tips 227 | 228 | **Please share how you're using `bkt` on the 229 | [Discussion Board](https://github.com/dimo414/bkt/discussions/categories/show-and-tell)!** 230 | 231 | 232 | ### Speeding up `fzf` and other preview tools 233 | 234 | `bkt` works well with interactive tools like 235 | [`fzf`](https://github.com/junegunn/fzf) that execute other commands. Because 236 | `fzf` executes the `--preview` command every time an element is selected it can 237 | be slow and tedious to browse when the command takes a long time to run. Using 238 | `bkt` allows each selection's preview to be cached. Compare: 239 | 240 | ```shell 241 | $ printf '%s\n' 1 0.2 3 0.1 5 | \ 242 | fzf --preview="bash -c 'sleep {}; echo {}'" 243 | 244 | $ printf '%s\n' 1 0.2 3 0.1 5 | \ 245 | fzf --preview="bkt --ttl=10m --stale=10s -- bash -c 'sleep {}; echo {}'" 246 | ``` 247 | 248 | You'll generally want to use a long TTL and a short stale duration so that 249 | even if you leave `fzf` running for a while the cache remains warm and is 250 | refreshed in the background. You may also want to set a `--scope` if it's 251 | important to invalidate the cache on subsequent invocations. 252 | 253 | See [this discussion](https://github.com/dimo414/bkt/discussions/29) for a more 254 | complete example of using `bkt` with `fzf`, including warming the commands before 255 | the user starts navigating the selector. 256 | 257 | ### Using `bkt` only if installed 258 | 259 | You may want to distribute shell scripts that utilize `bkt` without requiring 260 | every user also install it. By falling back to a no-op shell function when `bkt` 261 | is not available your script can take advantage of it opportunistically without 262 | complicating your users' workflow. Of course if they choose to install `bkt` 263 | they'll get a faster script as a result! 264 | 265 | ```shell 266 | # Cache commands using bkt if installed 267 | if ! command -v bkt >&/dev/null; then 268 | # If bkt isn't installed skip its arguments and just execute directly. 269 | bkt() { 270 | while [[ "$1" == --* ]]; do shift; done 271 | "$@" 272 | } 273 | # Optionally, write a msg to stderr suggesting users install bkt. 274 | echo "Tip: install https://github.com/dimo414/bkt for faster performance" >&2 275 | fi 276 | ``` 277 | 278 | ### Decorating commands with `bkt` in shell scripts 279 | 280 | It is sometimes helpful to cache _all_ invocations of a command in a shell 281 | script or in your shell environment. You can use a decorator function pattern 282 | similar to what bash-cache does to enable caching transparently, like so: 283 | 284 | ```shell 285 | # This is Bash syntax, but other shells support similar syntax 286 | expensive_cmd() { 287 | bkt [bkt args ...] -- expensive_cmd "$@" 288 | } 289 | ``` 290 | 291 | Calls to `expensive_cmd` in your shell will now go through `bkt` behind the 292 | scenes. This can be useful for brevity and consistency but obviously changing 293 | behavior like this is a double-edged-sword, so use with caution. Should you 294 | need to bypass the cache for a single invocation Bash provides the 295 | [`command` builtin](https://www.gnu.org/software/bash/manual/html_node/Bash-Builtins.html#index-command), 296 | so `command expensive_cmd ...` will invoke `expensive_cmd` directly. Other 297 | shells provide similar features. 298 | -------------------------------------------------------------------------------- /benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Basic benchmark script to observe the overhead introduced by bkt. This can be 4 | # useful for validating that a given command will benefit from caching (namely 5 | # if the underlying command is significantly slower than the caching overhead). 6 | # It can also be used to test the performance of different cache directory 7 | # locations, such as a directory backed by a tmpfs file system. 8 | # 9 | # Usage: 10 | # benchmark.sh [[--bkt=PATH] [--iters=NUM] --] [bkt_args ... --] cmd [args ...] 11 | # 12 | # Examples: 13 | # benchmark.sh expensive_command args 14 | # benchmark.sh --iters=20 -- quicker_command args 15 | # benchmark.sh --bkt=target/release/bkt -- expensive_command args 16 | # benchmark.sh --bkt=target/debug/bkt -- --cwd -- expensive_command args 17 | 18 | set -u 19 | 20 | time_quiet() { 21 | ( TIMEFORMAT=%R; time "$@" &> /dev/null ) 2>&1 22 | } 23 | 24 | avg_floats() { 25 | # Maybe this whole script should just be written in Python... 26 | # Or even just implement a --benchmark flag in main.rs? 27 | python3 <(cat <&2 38 | exit "$code" 39 | } 40 | 41 | # flag defaults 42 | bkt=bkt 43 | iters=5 44 | bkt_args=() 45 | cmd=() 46 | 47 | # Read benchmark flags 48 | while [[ "${1:-}" == --* ]]; do 49 | arg="$1" flag="${1%=*}" value= 50 | if [[ "$arg" == *=* ]]; then 51 | value="${1#*=}" 52 | fi 53 | shift 54 | case "$flag" in 55 | --bkt) bkt=$value ;; 56 | --iters) iters=$value ;; 57 | --) break ;; 58 | --*) exit_with_message 2 "Unknown flag '${flag}'" ;; 59 | *) break ;; 60 | esac 61 | done 62 | 63 | # Read command and bkt flags 64 | while (( $# > 0 )); do 65 | if [[ "$1" == -- ]]; then 66 | bkt_args=("${cmd[@]}") 67 | shift 68 | cmd=("$@") 69 | break 70 | fi 71 | cmd+=("$1") 72 | shift 73 | done 74 | 75 | # validation 76 | (( ${#cmd[@]} > 0 )) || exit_with_message 1 "Must provide a command to benchmark" 77 | 78 | full_bkt=$(command -v "$bkt") || exit_with_message 1 \ 79 | "${bkt} not found; pass --bkt to specify bkt's location" 80 | 81 | for bkt_arg in "${bkt_args[@]}"; do 82 | if [[ "$bkt_arg" == --scope* ]]; then 83 | exit_with_message 1 "--scope is used by the benchmark script, do not use" 84 | fi 85 | done 86 | 87 | # Execute benchmark 88 | printf 'Benchmarking:\n\t' 89 | printf '%q ' "${cmd[@]}" 90 | printf '\nwith:\n\t' 91 | printf '%q ' "${full_bkt}" "${bkt_args[@]}" 92 | printf '\n\n' 93 | 94 | # Ensure the cache dir exists and the bkt args are valid 95 | "$full_bkt" "${bkt_args[@]}" -- true || exit_with_message 1 "Invoking bkt failed" 96 | 97 | printf -v scope '%s-%(%s)T' "$$" 98 | raw_times=() cold_times=() warm_times=() 99 | for (( i=0; i for binary details. 5 | //! 6 | //! ```no_run 7 | //! # fn do_something(_: &str) {} 8 | //! # fn main() -> anyhow::Result<()> { 9 | //! # use std::time::Duration; 10 | //! let bkt = bkt::Bkt::in_tmp()?; 11 | //! let expensive_cmd = bkt::CommandDesc::new(["wget", "https://example.com"]); 12 | //! let (result, age) = bkt.retrieve(&expensive_cmd, Duration::from_secs(3600))?; 13 | //! do_something(result.stdout_utf8()); 14 | //! # Ok(()) } 15 | //! ``` 16 | #![warn(missing_docs)] 17 | 18 | use std::collections::{BTreeMap, BTreeSet}; 19 | use std::convert::{TryFrom, TryInto}; 20 | use std::ffi::{OsString, OsStr}; 21 | use std::fs::{File, OpenOptions}; 22 | use std::hash::{Hash, Hasher}; 23 | use std::io::{self, BufReader, ErrorKind, BufWriter, Write, Read}; 24 | use std::path::{PathBuf, Path}; 25 | use std::time::{Duration, SystemTime}; 26 | 27 | use anyhow::{anyhow, Context, Error, Result}; 28 | use serde::{Serialize, Deserialize}; 29 | use serde::de::DeserializeOwned; 30 | 31 | use base64::{Engine as _, engine::general_purpose}; 32 | 33 | 34 | #[cfg(feature="debug")] 35 | macro_rules! debug_msg { 36 | ($($arg:tt)*) => { eprintln!("bkt: {}", format!($($arg)*)) } 37 | } 38 | #[cfg(not(feature="debug"))] 39 | macro_rules! debug_msg { 40 | ($($arg:tt)*) => { } 41 | } 42 | 43 | /// Returns the modtime of the given path. Returns Ok(None) if the file is not found, and 44 | /// otherwise returns an error if the modtime cannot be determined. 45 | fn modtime(path: &Path) -> Result> { 46 | let metadata = std::fs::metadata(path); 47 | match metadata { 48 | Ok(metadata) => { 49 | Ok(Some(metadata.modified().context("Modtime is not supported")?)) 50 | }, 51 | Err(ref err) => { 52 | match err.kind() { 53 | ErrorKind::NotFound => Ok(None), 54 | _ => { metadata?; unreachable!() }, 55 | } 56 | } 57 | } 58 | } 59 | 60 | /// A stateless description of a command to be executed and cached. It consists of a command line 61 | /// invocation and additional metadata about how the command should be cached which are configured 62 | /// via the `with_*` methods. Instances can be persisted and reused. 63 | /// 64 | /// Calling any of these methods changes how the invocation's cache key will be constructed, 65 | /// therefore two invocations with different metadata configured will be cached separately. This 66 | /// allows - for example - commands that interact with the current working directory to be cached 67 | /// dependent on the working directory even if the command line arguments are equal. 68 | /// 69 | /// # Examples 70 | /// 71 | /// ``` 72 | /// let cmd = bkt::CommandDesc::new(["echo", "Hello World!"]); 73 | /// let with_cwd = bkt::CommandDesc::new(["ls"]).with_cwd(); 74 | /// let with_env = bkt::CommandDesc::new(["date"]).with_env("TZ"); 75 | /// ``` 76 | #[derive(Clone, Debug, Eq, PartialEq, Hash, Serialize, Deserialize)] 77 | pub struct CommandDesc { 78 | args: Vec, 79 | use_cwd: bool, 80 | envs: BTreeSet, 81 | mod_files: BTreeSet, 82 | persist_failures: bool, 83 | } 84 | 85 | impl CommandDesc { 86 | /// Constructs a CommandDesc instance for the given command line. 87 | /// 88 | /// ``` 89 | /// let cmd = bkt::CommandDesc::new(["echo", "Hello World!"]); 90 | /// ``` 91 | pub fn new(command: I) -> Self where I: IntoIterator, S: Into { 92 | let ret = CommandDesc { 93 | args: command.into_iter().map(Into::into).collect(), 94 | use_cwd: false, 95 | envs: BTreeSet::new(), 96 | mod_files: BTreeSet::new(), 97 | persist_failures: true, 98 | }; 99 | assert!(!ret.args.is_empty(), "Command cannot be empty"); 100 | ret 101 | } 102 | 103 | /// Specifies that the current process' working directory should be included in the cache key. 104 | /// Commands that depend on the working directory (e.g. `ls` or `git status`) should call this 105 | /// in order to cache executions in different working directories separately. 106 | /// 107 | /// # Examples 108 | /// 109 | /// ``` 110 | /// let cmd = bkt::CommandDesc::new(["pwd"]).with_cwd(); 111 | /// ``` 112 | pub fn with_cwd(mut self) -> Self { 113 | self.use_cwd = true; 114 | self 115 | } 116 | 117 | /// Specifies that the given environment variable should be included in the cache key. Commands 118 | /// that depend on the values of certain environment variables (e.g. `LANG`, `PATH`, or `TZ`) 119 | /// should call this in order to cache such executions separately. Although it's possible to 120 | /// pass `PWD` here calling [`with_cwd()`] is generally recommended instead for clarity and 121 | /// consistency with subprocesses that don't read this environment variable. 122 | /// 123 | /// Note: If the given variable name is not found in the current process' environment at 124 | /// execution time the variable is _not_ included in the cache key, and the execution will be 125 | /// cached as if the environment variable had not been specified at all. 126 | /// 127 | /// [`with_cwd()`]: CommandDesc::with_cwd 128 | /// 129 | /// # Examples 130 | /// 131 | /// ``` 132 | /// let cmd = bkt::CommandDesc::new(["date"]).with_env("TZ"); 133 | /// ``` 134 | pub fn with_env(mut self, key: K) -> Self where K: AsRef { 135 | self.envs.insert(key.as_ref().into()); 136 | self 137 | } 138 | 139 | /// Specifies that the given environment variables should be included in the cache key. Commands 140 | /// that depend on the values of certain environment variables (e.g. `LANG`, `PATH`, or `TZ`) 141 | /// should call this in order to cache such executions separately. Although it's possible to 142 | /// pass `PWD` here calling [`with_cwd()`] is generally recommended instead for clarity and 143 | /// consistency with subprocesses that don't read this environment variable. 144 | /// 145 | /// Note: If a given variable name is not found in the current process' environment at execution 146 | /// time that variable is _not_ included in the cache key, and the execution will be cached as 147 | /// if the environment variable had not been specified at all. 148 | /// 149 | /// [`with_cwd()`]: CommandDesc::with_cwd 150 | /// 151 | /// # Examples 152 | /// 153 | /// ``` 154 | /// let cmd = bkt::CommandDesc::new(["date"]).with_envs(["LANG", "TZ"]); 155 | /// ``` 156 | pub fn with_envs(mut self, envs: I) -> Self where 157 | I: IntoIterator, 158 | E: AsRef, 159 | { 160 | self.envs.extend(envs.into_iter().map(|e| e.as_ref().into())); 161 | self 162 | } 163 | 164 | /// Specifies that the modification time of the given file should be included in the cache key, 165 | /// causing cached commands to be invalidated if the file is modified in the future. Commands 166 | /// that depend on the contents of certain files should call this in order to invalidate the 167 | /// cache when the file changes. 168 | /// 169 | /// It is recommended to pass absolute paths when this is used along with [`with_cwd()`] or 170 | /// [`CommandState::with_working_dir()`] to avoid any ambiguity in how relative paths are 171 | /// resolved. 172 | /// 173 | /// Note: If the given path is not found at execution time the file is _not_ included in the 174 | /// cache key, and the execution will be cached as if the file had not been specified at all. 175 | /// 176 | /// [`with_cwd()`]: CommandDesc::with_cwd 177 | /// 178 | /// # Examples 179 | /// 180 | /// ``` 181 | /// let cmd = bkt::CommandDesc::new(["..."]).with_modtime("/etc/passwd"); 182 | /// ``` 183 | pub fn with_modtime

(mut self, file: P) -> Self where P: AsRef { 184 | self.mod_files.insert(file.as_ref().into()); 185 | self 186 | } 187 | 188 | /// Specifies that the modification time of the given files should be included in the cache key, 189 | /// causing cached commands to be invalidated if the files are modified in the future. Commands 190 | /// that depend on the contents of certain files should call this in order to invalidate the 191 | /// cache when the files change. 192 | /// 193 | /// It is recommended to pass absolute paths when this is used along with [`with_cwd()`] or 194 | /// [`CommandState::with_working_dir()`] to avoid any ambiguity in how relative paths are 195 | /// resolved. 196 | /// 197 | /// Note: If a given path is not found at execution time that file is _not_ included in the 198 | /// cache key, and the execution will be cached as if the file had not been specified at all. 199 | /// 200 | /// [`with_cwd()`]: CommandDesc::with_cwd 201 | /// 202 | /// # Examples 203 | /// 204 | /// ``` 205 | /// let cmd = bkt::CommandDesc::new(["..."]).with_modtimes(["/etc/passwd", "/etc/group"]); 206 | /// ``` 207 | pub fn with_modtimes(mut self, files: I) -> Self where 208 | I: IntoIterator, 209 | P: AsRef, { 210 | self.mod_files.extend(files.into_iter().map(|f| f.as_ref().into())); 211 | self 212 | } 213 | 214 | /// Specifies this command should only be cached if it succeeds - i.e. it returns a zero exit 215 | /// code. Commands that return a non-zero exit code will not be cached, and therefore will be 216 | /// rerun on each invocation (until they succeed). 217 | /// 218 | /// **WARNING:** use this option with caution. Discarding invocations that fail can overload 219 | /// downstream resources that were protected by the caching layer limiting QPS. For example, 220 | /// if a website is rejecting a fraction of requests to shed load and then clients start 221 | /// sending _more_ requests when their attempts fail the website could be taken down outright by 222 | /// the added load. In other words, using this option can lead to accidental DDoSes. 223 | /// 224 | /// ``` 225 | /// let cmd = bkt::CommandDesc::new(["grep", "foo", "/var/log/syslog"]).with_discard_failures(true); 226 | /// ``` 227 | pub fn with_discard_failures(mut self, discard_failures: bool) -> Self { 228 | // Invert the boolean so it's not a double negative at usage sites 229 | self.persist_failures = !discard_failures; 230 | self 231 | } 232 | 233 | /// Constructs a [`CommandState`] instance, capturing application state that will be used in the 234 | /// cache key, such as the current working directory and any specified environment variables. 235 | /// The `CommandState` can also be further customized to change how the subprocess is invoked. 236 | /// 237 | /// Most callers should be able to pass a `CommandDesc` directly to a [`Bkt`] instance without 238 | /// needing to construct a separate `CommandState` first. 239 | /// 240 | /// Example: 241 | /// 242 | /// ```no_run 243 | /// # fn main() -> anyhow::Result<()> { 244 | /// # use std::time::Duration; 245 | /// let bkt = bkt::Bkt::in_tmp()?; 246 | /// let cmd = bkt::CommandDesc::new(["foo", "bar"]).capture_state()?.with_env("FOO", "BAR"); 247 | /// let (result, age) = bkt.retrieve(cmd, Duration::from_secs(3600))?; 248 | /// # Ok(()) } 249 | /// ``` 250 | pub fn capture_state(&self) -> Result { 251 | let cwd = if self.use_cwd { 252 | Some(std::env::current_dir()?) 253 | } else { 254 | None 255 | }; 256 | let envs = self.envs.iter() 257 | .flat_map(|e| std::env::var_os(e).map(|v| (e.clone(), v))) 258 | .collect(); 259 | let modtimes = self.mod_files.iter() 260 | .map(|f| modtime(f).map(|m| (f, m))) 261 | .collect::>>()?.into_iter() 262 | .flat_map(|(f, m)| m.map(|m| (f.clone(), m))) 263 | .collect(); 264 | 265 | let state = CommandState { args: self.args.clone(), cwd, envs, modtimes, persist_failures: self.persist_failures }; 266 | debug_msg!("state: {}", state.debug_info()); 267 | Ok(state) 268 | } 269 | } 270 | 271 | /// The stateful sibling of [`CommandDesc`] which represents a command to be executed and cached 272 | /// along with environment state (e.g. the current working directory) at the time the `CommandState` 273 | /// instance is constructed. It consists of a command line invocation and application state 274 | /// determining how the command should be cached and executed. Additional `with_*` methods are 275 | /// provided on this type for further modifying how the subprocess will be executed. 276 | /// 277 | /// Calling any of these methods changes how the invocation's cache key will be constructed, 278 | /// therefore two invocations with different configured state will be cached separately, in the same 279 | /// manner as the `with_*` methods on `CommandDesc`. 280 | /// 281 | /// # Examples 282 | /// 283 | /// ``` 284 | /// # fn main() -> anyhow::Result<()> { 285 | /// let cmd = bkt::CommandDesc::new(["echo", "Hello World!"]).capture_state(); 286 | /// let with_custom_wd = bkt::CommandDesc::new(["ls"]).capture_state()?.with_working_dir("/"); 287 | /// let with_env = bkt::CommandDesc::new(["date"]).capture_state()?.with_env("TZ", "UTC"); 288 | /// # Ok(()) } 289 | /// ``` 290 | #[derive(Clone, Debug, Eq, PartialEq, Hash, Serialize, Deserialize)] 291 | pub struct CommandState { 292 | // TODO Borrow> or Cow> might be better, need to validate 293 | // serialization. Or maybe just make it &Vec and add a lifetime to CommandState? 294 | args: Vec, 295 | cwd: Option, 296 | envs: BTreeMap, 297 | modtimes: BTreeMap, 298 | persist_failures: bool, 299 | } 300 | 301 | impl CommandState { 302 | /// Sets the working directory the command should be run from, and causes this working directory 303 | /// to be included in the cache key. If unset the working directory will be inherited from the 304 | /// current process' and will _not_ be used to differentiate invocations in separate working 305 | /// directories. 306 | /// 307 | /// ``` 308 | /// # fn main() -> anyhow::Result<()> { 309 | /// let cmd = bkt::CommandDesc::new(["pwd"]); 310 | /// let state = cmd.capture_state()?.with_working_dir("/tmp"); 311 | /// # Ok(()) } 312 | /// ``` 313 | pub fn with_working_dir>(mut self, cwd: P) -> Self { 314 | self.cwd = Some(cwd.as_ref().into()); 315 | self 316 | } 317 | 318 | /// Adds the given key/value pair to the environment the command should be run from, and causes 319 | /// this pair to be included in the cache key. 320 | /// 321 | /// ``` 322 | /// # fn main() -> anyhow::Result<()> { 323 | /// let cmd = bkt::CommandDesc::new(["pwd"]); 324 | /// let state = cmd.capture_state()?.with_env("FOO", "bar"); 325 | /// # Ok(()) } 326 | /// ``` 327 | pub fn with_env(mut self, key: K, value: V) -> Self 328 | where K: AsRef, V: AsRef { 329 | self.envs.insert(key.as_ref().into(), value.as_ref().into()); 330 | self 331 | } 332 | 333 | /// Adds the given key/value pairs to the environment the command should be run from, and causes 334 | /// these pair to be included in the cache key. 335 | /// 336 | /// ``` 337 | /// # fn main() -> anyhow::Result<()> { 338 | /// use std::env; 339 | /// use std::collections::HashMap; 340 | /// 341 | /// let important_envs : HashMap = 342 | /// env::vars().filter(|&(ref k, _)| 343 | /// k == "TERM" || k == "TZ" || k == "LANG" || k == "PATH" 344 | /// ).collect(); 345 | /// let cmd = bkt::CommandDesc::new(["..."]); 346 | /// let state = cmd.capture_state()?.with_envs(&important_envs); 347 | /// # Ok(()) } 348 | /// ``` 349 | pub fn with_envs(mut self, envs: I) -> Self 350 | where 351 | I: IntoIterator, 352 | K: AsRef, 353 | V: AsRef, 354 | { 355 | for (ref key, ref val) in envs { 356 | self.envs.insert(key.as_ref().into(), val.as_ref().into()); 357 | } 358 | self 359 | } 360 | 361 | /// Format's the CommandState's metadata (information read from the system rather than provided 362 | /// by the caller) for diagnostic purposes. 363 | #[cfg(feature="debug")] 364 | fn debug_info(&self) -> String { 365 | fn to_timestamp(time: &SystemTime) -> u128 { 366 | time.duration_since(SystemTime::UNIX_EPOCH).expect("Precedes epoch").as_micros() 367 | } 368 | 369 | let mut parts = Vec::new(); 370 | if let Some(ref cwd) = self.cwd { 371 | parts.push(format!("cwd:{}", cwd.to_string_lossy())); 372 | } 373 | if !self.envs.is_empty() { 374 | parts.push(self.envs.iter() 375 | .map(|(k, v)| format!("{}={}", k.to_string_lossy(), v.to_string_lossy())) 376 | .collect::>().join(",")); 377 | } 378 | if !self.modtimes.is_empty() { 379 | parts.push(self.modtimes.iter() 380 | .map(|(p, m)| format!("{}:{}", p.to_string_lossy(), to_timestamp(m))) 381 | .collect::>().join(" ")); 382 | } 383 | parts.join(" | ") 384 | } 385 | } 386 | 387 | impl TryFrom<&CommandDesc> for CommandState { 388 | type Error = anyhow::Error; 389 | 390 | fn try_from(desc: &CommandDesc) -> Result { 391 | desc.capture_state() 392 | } 393 | } 394 | 395 | impl From<&CommandState> for std::process::Command { 396 | fn from(cmd: &CommandState) -> Self { 397 | let mut command = std::process::Command::new(&cmd.args[0]); 398 | command.args(&cmd.args[1..]); 399 | if let Some(cwd) = &cmd.cwd { 400 | command.current_dir(cwd); 401 | } 402 | if !cmd.envs.is_empty() { 403 | command.envs(&cmd.envs); 404 | } 405 | command 406 | } 407 | } 408 | 409 | impl CacheKey for CommandState { 410 | fn debug_label(&self) -> Option { 411 | Some(self.args.iter() 412 | .map(|a| a.to_string_lossy()).collect::>().join("-") 413 | .chars() 414 | .map(|c| if c.is_whitespace() { '_' } else { c }) 415 | .filter(|&c| c.is_alphanumeric() || c == '-' || c == '_') 416 | .take(100).collect()) 417 | } 418 | } 419 | 420 | #[cfg(test)] 421 | mod cmd_tests { 422 | use super::*; 423 | 424 | #[test] 425 | fn debug_label() { 426 | let cmd = CommandDesc::new(["foo", "bar", "b&r _- a"]); 427 | assert_eq!(CommandState::try_from(&cmd).unwrap().debug_label(), Some("foo-bar-br__-_a".into())); 428 | } 429 | 430 | #[test] 431 | fn collisions() { 432 | std::env::set_var("FOO", "BAR"); 433 | let commands = [ 434 | CommandDesc::new(["foo"]), 435 | CommandDesc::new(["foo", "bar"]), 436 | CommandDesc::new(["foo", "b", "ar"]), 437 | CommandDesc::new(["foo", "b ar"]), 438 | CommandDesc::new(["foo"]).with_cwd(), 439 | CommandDesc::new(["foo"]).with_env("FOO"), 440 | CommandDesc::new(["foo"]).with_cwd().with_env("FOO"), 441 | ]; 442 | 443 | // https://old.reddit.com/r/rust/comments/2koptu/best_way_to_visit_all_pairs_in_a_vec/clnhxr5/ 444 | let mut iter = commands.iter(); 445 | for a in &commands { 446 | iter.next(); 447 | for b in iter.clone() { 448 | assert_ne!( 449 | CommandState::try_from(a).unwrap().cache_key(), 450 | CommandState::try_from(b).unwrap().cache_key(), 451 | "{:?} and {:?} have equivalent hashes", a, b); 452 | } 453 | } 454 | } 455 | } 456 | 457 | /// The outputs of a cached invocation of a [`CommandDesc`], akin to [`std::process::Output`]. 458 | #[derive(Serialize, Deserialize, Debug, Eq, PartialEq)] 459 | pub struct Invocation { 460 | stdout: Vec, 461 | stderr: Vec, 462 | exit_code: i32, 463 | runtime: Duration, 464 | } 465 | 466 | impl Invocation { 467 | /// The data that the process wrote to stdout. 468 | pub fn stdout(&self) -> &[u8] { &self.stdout } 469 | 470 | /// Helper to view stdout as a UTF-8 string. Use [`from_utf8`](std::str::from_utf8) directly if 471 | /// you need to handle output that may not be UTF-8. 472 | pub fn stdout_utf8(&self) -> &str { 473 | std::str::from_utf8(&self.stdout).expect("stdout not valid UTF-8") 474 | } 475 | 476 | /// The data that the process wrote to stderr. 477 | pub fn stderr(&self) -> &[u8] { &self.stderr } 478 | 479 | /// Helper to view stderr as a UTF-8 string. Use [`from_utf8`](std::str::from_utf8) directly if 480 | /// you need to handle output that may not be UTF-8. 481 | pub fn stderr_utf8(&self) -> &str { 482 | std::str::from_utf8(&self.stderr).expect("stderr not valid UTF-8") 483 | } 484 | 485 | /// The exit code of the program, or 126 if the program terminated without an exit status. 486 | /// See [`ExitStatus::code()`](std::process::ExitStatus::code()). This is subject to change to 487 | /// better support other termination states. 488 | pub fn exit_code(&self) -> i32 { self.exit_code } 489 | 490 | /// The time the process took to complete. 491 | pub fn runtime(&self) -> Duration { self.runtime } 492 | } 493 | 494 | /// A file-lock mechanism that holds a lock by atomically creating a file in the given directory, 495 | /// and deleting the file upon being dropped. Callers should beware that dropping is not guaranteed 496 | /// (e.g. if the program panics). When a conflicting lock file is found its age (mtime) is checked 497 | /// to detect stale locks leaked by a separate process that failed to properly drop its lock. 498 | #[derive(Debug)] 499 | struct FileLock { 500 | lock_file: PathBuf, 501 | } 502 | 503 | impl FileLock { 504 | fn try_acquire>(lock_dir: P, name: &str, consider_stale: Duration) -> Result> { 505 | let lock_file = lock_dir.as_ref().join(name).with_extension("lock"); 506 | match OpenOptions::new().create_new(true).write(true).open(&lock_file) { 507 | Ok(mut lock) => { 508 | write!(lock, "{}", std::process::id())?; 509 | Ok(Some(FileLock{ lock_file })) 510 | }, 511 | Err(io) => { 512 | match io.kind() { 513 | ErrorKind::AlreadyExists => { 514 | if let Ok(lock_metadata) = std::fs::metadata(&lock_file) { 515 | if let Ok(age) = lock_metadata.modified()?.elapsed() { 516 | if age > consider_stale { 517 | return Err(Error::msg(format!( 518 | "Lock {} held by PID {} appears stale and may need to be deleted manually.", 519 | lock_file.display(), 520 | std::fs::read_to_string(&lock_file).unwrap_or_else(|_| "unknown".into())))); 521 | } 522 | } 523 | } 524 | Ok(None) 525 | }, 526 | _ => { Err(Error::new(io)) } 527 | } 528 | }, 529 | } 530 | } 531 | } 532 | 533 | impl Drop for FileLock { 534 | fn drop(&mut self) { 535 | if let Err(e) = std::fs::remove_file(&self.lock_file) { 536 | eprintln!("Failed to delete lockfile {}, may need to be deleted manually. Reason: {:?}", 537 | self.lock_file.display(), e); 538 | } 539 | } 540 | } 541 | 542 | #[cfg(test)] 543 | mod file_lock_tests { 544 | use super::*; 545 | use test_dir::{TestDir, DirBuilder}; 546 | 547 | #[test] 548 | fn locks() { 549 | let dir = TestDir::temp(); 550 | let lock = FileLock::try_acquire(dir.root(), "test", Duration::from_secs(100)).unwrap(); 551 | let lock = lock.expect("Could not take lock"); 552 | assert!(dir.path("test.lock").exists()); 553 | std::mem::drop(lock); 554 | assert!(!dir.path("test.lock").exists()); 555 | } 556 | 557 | #[test] 558 | fn already_locked() { 559 | let dir = TestDir::temp(); 560 | let lock = FileLock::try_acquire(dir.root(), "test", Duration::from_secs(100)).unwrap(); 561 | let lock = lock.expect("Could not take lock"); 562 | 563 | let attempt = FileLock::try_acquire(dir.root(), "test", Duration::from_secs(100)).unwrap(); 564 | assert!(attempt.is_none()); 565 | 566 | std::mem::drop(lock); 567 | let attempt = FileLock::try_acquire(dir.root(), "test", Duration::from_secs(100)).unwrap(); 568 | assert!(attempt.is_some()); 569 | } 570 | } 571 | 572 | /// Trait allowing a type to be used as a cache key. It would be nice to blanket-implement 573 | /// this for all types that implement the dependent traits, but without a way for specific 574 | /// impls to opt-out of the blanket that would prevent customizing the debug_label(). 575 | /// Specialization might resolve that issue, in the meantime it's fine since Cache is a 576 | /// private type anyways. 577 | trait CacheKey: std::fmt::Debug+Hash+PartialEq { 578 | /// Label is added to the cache key when run with the debug feature, useful for diagnostics. 579 | fn debug_label(&self) -> Option { None } 580 | 581 | /// Generates a string sufficiently unique to describe the key; typically just the hex encoding 582 | /// of the key's hash code. Most impls should not need to override this. 583 | fn cache_key(&self) -> String { 584 | // The hash_map::DefaultHasher is somewhat underspecified, but it notes that "hashes should 585 | // not be relied upon over releases", which implies it is stable across multiple 586 | // invocations of the same build.... See cache_tests::stable_hash. 587 | let mut s = std::collections::hash_map::DefaultHasher::new(); 588 | self.hash(&mut s); 589 | let hash = s.finish(); 590 | if cfg!(feature = "debug") { 591 | if let Some(label) = self.debug_label() { 592 | if !label.is_empty() { 593 | return format!("{}_{:016X}", label, hash); 594 | } 595 | } 596 | } 597 | format!("{:016X}", hash) 598 | } 599 | } 600 | 601 | /// Container for serialized key/value pairs. 602 | #[derive(Serialize, Deserialize)] 603 | struct CacheEntry { 604 | key: K, 605 | value: V, 606 | } 607 | 608 | // See https://doc.rust-lang.org/std/fs/fn.soft_link.html 609 | #[cfg(windows)] 610 | fn symlink, Q: AsRef>(original: P, link: Q) -> Result<()> { 611 | std::os::windows::fs::symlink_file(original, link) 612 | .context("Windows prevents most programs from creating symlinks; see https://github.com/dimo414/bkt/issues/3") 613 | } 614 | #[cfg(unix)] 615 | use std::os::unix::fs::symlink; 616 | 617 | /// A file-system-backed cache for mapping keys (i.e. `CommandDesc`) to values (i.e. `Invocation`) 618 | /// for a given duration. 619 | // TODO make this a trait so we can swap out impls, such as an in-memory cache or SQLite-backed 620 | #[derive(Clone, Debug)] 621 | struct Cache { 622 | cache_dir: PathBuf, 623 | scope: Option, 624 | } 625 | 626 | impl Cache { 627 | fn new>(cache_dir: P) -> Self { 628 | Cache{ cache_dir: cache_dir.as_ref().into(), scope: None } 629 | } 630 | 631 | fn scoped(mut self, scope: String) -> Self { 632 | assert!(self.scope.is_none()); 633 | self.scope = Some(scope); 634 | self 635 | } 636 | 637 | #[cfg(not(feature = "debug"))] 638 | fn serialize(writer: W, value: &T) -> Result<()> 639 | where W: io::Write, T: Serialize + ?Sized { 640 | Ok(bincode::serialize_into(writer, value)?) 641 | } 642 | 643 | #[cfg(feature = "debug")] 644 | fn serialize(writer: W, value: &T) -> Result<()> 645 | where W: io::Write, T: Serialize + ?Sized { 646 | Ok(serde_json::to_writer_pretty(writer, value)?) 647 | } 648 | 649 | #[cfg(not(feature = "debug"))] 650 | fn deserialize(reader: R) -> Result 651 | where R: std::io::Read, T: DeserializeOwned { 652 | Ok(bincode::deserialize_from(reader)?) 653 | } 654 | 655 | #[cfg(feature = "debug")] 656 | fn deserialize(reader: R) -> Result 657 | where R: std::io::Read, T: DeserializeOwned { 658 | Ok(serde_json::from_reader(reader)?) 659 | } 660 | 661 | fn key_dir(&self) -> PathBuf { 662 | self.cache_dir.join("keys") 663 | } 664 | 665 | fn key_path(&self, key: &str) -> PathBuf { 666 | let file = match &self.scope { 667 | Some(scope) => format!("{}.{}", general_purpose::STANDARD_NO_PAD.encode(scope), key), 668 | None => key.into(), 669 | }; 670 | self.key_dir().join(file) 671 | } 672 | 673 | fn data_dir(&self) -> PathBuf { 674 | self.cache_dir.join("data") 675 | } 676 | 677 | /// Looks up the given key in the cache, returning the associated value and its age 678 | /// if the data is found and is newer than the max_age. 679 | fn lookup(&self, key: &K, max_age: Duration) -> Result> 680 | where K: CacheKey+DeserializeOwned, V: DeserializeOwned { 681 | let path = self.key_path(&key.cache_key()); 682 | let file = File::open(&path); 683 | if let Err(ref e) = file { 684 | if e.kind() == ErrorKind::NotFound { 685 | debug_msg!("lookup {} not found", path.display()); 686 | return Ok(None); 687 | } 688 | if e.kind() == ErrorKind::PermissionDenied { 689 | debug_msg!("lookup {} permission denied", path.display()); 690 | // Improve error message since the default cache location is not user-specific, see #35 691 | file.with_context(|| format!( 692 | "Could not access cached data in {}; note that cache directories should not be shared by multiple users", 693 | self.cache_dir.display()))?; 694 | unreachable!(); 695 | } 696 | } 697 | // Missing file is OK; other errors get propagated to the caller 698 | let reader = BufReader::new(file.context("Failed to access cache file")?); 699 | // TODO consider returning OK(None) if deserialization fails, which could happen if 700 | // different types hashed to the same key 701 | let found: CacheEntry = Cache::deserialize(reader)?; 702 | // Discard data that is too old 703 | let mtime = std::fs::metadata(&path)?.modified()?; 704 | let elapsed = mtime.elapsed().unwrap_or(Duration::MAX); 705 | if elapsed > max_age { 706 | debug_msg!("lookup {} expired", path.display()); 707 | return match std::fs::remove_file(&path) { 708 | Ok(_) => Ok(None), 709 | Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None), 710 | Err(e) => Err(e) 711 | }.context("Failed to remove expired data") 712 | } 713 | // Ignore false-positive hits that happened to collide with the hash code 714 | if &found.key != key { 715 | debug_msg!("lookup {} hash collision", path.display()); 716 | return Ok(None); 717 | } 718 | debug_msg!("lookup {} found", path.display()); 719 | Ok(Some((found.value, mtime))) 720 | } 721 | 722 | fn seconds_ceiling(duration: Duration) -> u64 { 723 | duration.as_secs() + if duration.subsec_nanos() != 0 { 1 } else { 0 } 724 | } 725 | 726 | // https://rust-lang-nursery.github.io/rust-cookbook/algorithms/randomness.html#create-random-passwords-from-a-set-of-alphanumeric-characters 727 | fn rand_filename(dir: &Path, label: &str) -> PathBuf { 728 | use rand::{thread_rng, Rng}; 729 | use rand::distributions::Alphanumeric; 730 | let rand_str: String = thread_rng().sample_iter(Alphanumeric).take(16).map(char::from).collect(); 731 | dir.join(format!("{}.{}", label, rand_str)) 732 | } 733 | 734 | /// Write the given key/value pair to the cache, persisting it for at least the given TTL. 735 | /// 736 | /// Note: This method takes references to the key and value because they are serialized 737 | /// externally, therefore consuming either parameter is unhelpful. An in-memory implementation 738 | /// would need to do an internal `.clone()` which is at odds with 739 | /// [`C-CALLER-CONTROL`](https://rust-lang.github.io/api-guidelines/flexibility.html) but Cache 740 | /// is intended for serialization use cases so some overhead in the in-memory case may be 741 | /// acceptable. 742 | // TODO C-INTERMEDIATE suggests emulating HashMap::insert and returning any existing value in 743 | // the cache, though it would be expensive to construct this so perhaps should be a callback 744 | fn store(&self, key: &K, value: &V, ttl: Duration) -> Result<()> 745 | where K: CacheKey+Serialize, V: Serialize { 746 | assert!(!ttl.is_zero(), "ttl cannot be zero"); 747 | let ttl_dir = self.data_dir().join(Cache::seconds_ceiling(ttl).to_string()); 748 | std::fs::create_dir_all(&ttl_dir)?; 749 | std::fs::create_dir_all(self.key_dir())?; 750 | let data_path = Cache::rand_filename(&ttl_dir, "data"); 751 | // Note: this will fail if filename collides, could retry in a loop if that happens 752 | let file = OpenOptions::new().create_new(true).write(true).open(&data_path)?; 753 | let entry = CacheEntry{ key, value }; 754 | Cache::serialize(BufWriter::new(&file), &entry).context("Serialization failed")?; 755 | debug_msg!("store data {}", data_path.display()); 756 | // The target needs to be canonicalized as we're creating the link in a subdirectory, but I'd somewhat prefer 757 | // to fix it to be correctly relative to the link's location. Probably not worth the trouble though. 758 | let data_path = data_path.canonicalize()?; 759 | // Roundabout approach to an atomic symlink replacement 760 | // https://github.com/dimo414/bash-cache/issues/26 761 | let tmp_symlink = Cache::rand_filename(&self.key_dir(), "tmp-symlink"); 762 | // Note: this call will fail if the tmp_symlink filename collides, could retry in a loop if that happens. 763 | symlink(data_path, &tmp_symlink)?; 764 | let key_path = self.key_path(&entry.key.cache_key()); 765 | debug_msg!("store key {}", key_path.display()); 766 | std::fs::rename(&tmp_symlink, key_path)?; 767 | Ok(()) 768 | } 769 | 770 | fn cleanup(&self) -> Result<()> { 771 | fn delete_stale_file(file: &Path, ttl: Duration) -> Result<()> { 772 | let age = std::fs::metadata(file)?.modified()?.elapsed().unwrap_or(Duration::MAX); 773 | if age > ttl { 774 | std::fs::remove_file(file)?; 775 | } 776 | Ok(()) 777 | } 778 | 779 | // if try_acquire fails, e.g. because the directory does not exist, there's nothing to clean up 780 | if let Ok(Some(_lock)) = FileLock::try_acquire(&self.cache_dir, "cleanup", Duration::from_secs(60*10)) { 781 | // Don't bother if cleanup has been attempted recently 782 | let last_attempt_file = self.cache_dir.join("last_cleanup"); 783 | if let Ok(metadata) = last_attempt_file.metadata() { 784 | if metadata.modified()?.elapsed().unwrap_or(Duration::MAX) < Duration::from_secs(30) { 785 | debug_msg!("cleanup skip recent"); 786 | return Ok(()); 787 | } 788 | } 789 | File::create(&last_attempt_file)?; // resets mtime if already exists 790 | 791 | // First delete stale data files 792 | debug_msg!("cleanup data {}", &self.data_dir().display()); 793 | if let Ok(data_dir_iter) = std::fs::read_dir(self.data_dir()) { 794 | for entry in data_dir_iter { 795 | let ttl_dir = entry?.path(); 796 | let ttl = Duration::from_secs( 797 | ttl_dir.file_name().and_then(|s| s.to_str()).and_then(|s| s.parse().ok()) 798 | .ok_or_else(|| Error::msg(format!("Invalid ttl directory {}", ttl_dir.display())))?); 799 | 800 | for entry in std::fs::read_dir(&ttl_dir)? { 801 | let file = entry?.path(); 802 | // Disregard errors on individual files; typically due to concurrent deletion 803 | // or other changes we don't care about. 804 | let _ = delete_stale_file(&file, ttl); 805 | } 806 | } 807 | } 808 | 809 | // Then delete broken symlinks 810 | debug_msg!("cleanup keys {}", &self.key_dir().display()); 811 | if let Ok(key_dir_iter) = std::fs::read_dir(self.key_dir()) { 812 | for entry in key_dir_iter { 813 | let symlink = entry?.path(); 814 | // This reads as if we're deleting files that no longer exist, but what it really 815 | // means is "if the symlink is broken, try to delete _the symlink_." It would also 816 | // try to delete a symlink that happened to be deleted concurrently, but this is 817 | // harmless since we ignore the error. 818 | // std::fs::symlink_metadata() could be used to check that the symlink itself exists 819 | // if needed, but this could still have false-positives due to a TOCTOU race. 820 | if !symlink.exists() { 821 | let _ = std::fs::remove_file(symlink); 822 | } 823 | } 824 | } 825 | } 826 | Ok(()) 827 | } 828 | } 829 | 830 | #[cfg(test)] 831 | mod cache_tests { 832 | use super::*; 833 | use test_dir::{TestDir, DirBuilder}; 834 | 835 | impl CacheKey for i32 {} 836 | impl CacheKey for String { 837 | fn debug_label(&self) -> Option { 838 | Some(self.clone()) 839 | } 840 | } 841 | 842 | fn modtime>(path: P) -> SystemTime { 843 | std::fs::metadata(&path).expect("No metadata").modified().expect("No modtime") 844 | } 845 | 846 | fn make_dir_stale>(dir: P, age: Duration) -> Result<()> { 847 | let desired_time = SystemTime::now() - age; 848 | let stale_time = filetime::FileTime::from_system_time(desired_time); 849 | for entry in std::fs::read_dir(dir)? { 850 | let path = entry?.path(); 851 | let last_modified = modtime(&path); 852 | 853 | if path.is_file() && last_modified > desired_time { 854 | filetime::set_file_mtime(&path, stale_time)?; 855 | } else if path.is_dir() { 856 | make_dir_stale(&path, age)?; 857 | } 858 | } 859 | Ok(()) 860 | } 861 | 862 | fn dir_contents>(dir: P) -> Vec { 863 | fn contents(dir: &Path, ret: &mut Vec) -> Result<()> { 864 | for entry in std::fs::read_dir(dir)? { 865 | let path = entry?.path(); 866 | if path.is_dir() { 867 | contents(&path, ret)?; 868 | } else { 869 | ret.push(path); 870 | } 871 | } 872 | Ok(()) 873 | } 874 | let mut paths = vec![]; 875 | contents(dir.as_ref(), &mut paths).unwrap(); 876 | paths.iter().map(|p| p.strip_prefix(dir.as_ref()).unwrap().display().to_string()).collect() 877 | } 878 | 879 | // Sanity-checking that cache_key's behavior is stable over time. This test may need to be 880 | // updated when changing Rust versions / editions. 881 | // Disabled on hardware that generates other hashes, see #39 882 | #[cfg(target_endian = "little")] 883 | #[test] 884 | fn stable_hash() { 885 | assert_eq!(100.cache_key(), "7D208C81E8236995"); 886 | if cfg!(feature = "debug") { 887 | assert_eq!("FooBar".to_string().cache_key(), "FooBar_2C8878C07E3ADA57"); 888 | } else { 889 | assert_eq!("FooBar".to_string().cache_key(), "2C8878C07E3ADA57"); 890 | } 891 | } 892 | 893 | #[test] 894 | fn cache() { 895 | let dir = TestDir::temp(); 896 | let key = "foo".to_string(); 897 | let val = "A".to_string(); 898 | let cache = Cache::new(dir.root()); 899 | 900 | let absent = cache.lookup::<_, String>(&key, Duration::from_secs(100)).unwrap(); 901 | assert!(absent.is_none()); 902 | 903 | cache.store(&key, &val, Duration::from_secs(100)).unwrap(); 904 | let present = cache.lookup::<_, String>(&key, Duration::from_secs(100)).unwrap(); 905 | assert_eq!(present.unwrap().0, val); 906 | } 907 | 908 | #[test] 909 | fn lookup_ttls() { 910 | let dir = TestDir::temp(); 911 | let key = "foo".to_string(); 912 | let val = "A".to_string(); 913 | let cache = Cache::new(dir.root()); 914 | 915 | cache.store(&key, &val, Duration::from_secs(5)).unwrap(); // store duration doesn't affect lookups 916 | make_dir_stale(dir.root(), Duration::from_secs(15)).unwrap(); 917 | 918 | // data is still present until a cleanup iteration runs, or a lookup() invalidates it 919 | let present = cache.lookup::<_, String>(&key, Duration::from_secs(20)).unwrap(); 920 | assert_eq!(present.unwrap().0, "A"); 921 | // lookup() finds stale data, deletes it 922 | let absent = cache.lookup::<_, String>(&key, Duration::from_secs(10)).unwrap(); 923 | assert!(absent.is_none()); 924 | // now data is gone, even though this lookup() would have accepted it 925 | let absent = cache.lookup::<_, String>(&key, Duration::from_secs(20)).unwrap(); 926 | assert!(absent.is_none()); 927 | } 928 | 929 | #[test] 930 | fn scoped() { 931 | let dir = TestDir::temp(); 932 | let key = "foo".to_string(); 933 | let val_a = "A".to_string(); 934 | let val_b = "B".to_string(); 935 | let cache = Cache::new(dir.root()); 936 | let cache_scoped = Cache::new(dir.root()).scoped("scope".into()); 937 | 938 | cache.store(&key, &val_a, Duration::from_secs(100)).unwrap(); 939 | cache_scoped.store(&key, &val_b, Duration::from_secs(100)).unwrap(); 940 | 941 | let present = cache.lookup::<_, String>(&key, Duration::from_secs(20)).unwrap(); 942 | assert_eq!(present.unwrap().0, val_a); 943 | let present_scoped = cache_scoped.lookup::<_, String>(&key, Duration::from_secs(20)).unwrap(); 944 | assert_eq!(present_scoped.unwrap().0, val_b); 945 | } 946 | 947 | #[test] 948 | fn scopes_support_special_chars() { 949 | let dir = TestDir::temp(); 950 | let key = "foo".to_string(); 951 | let val_a = "A".to_string(); 952 | let val_b = "B".to_string(); 953 | let cache = Cache::new(dir.root()); 954 | let cache_scoped = Cache::new(dir.root()).scoped("/scope/with/path/separators".into()); 955 | 956 | cache.store(&key, &val_a, Duration::from_secs(100)).unwrap(); 957 | cache_scoped.store(&key, &val_b, Duration::from_secs(100)).unwrap(); 958 | 959 | let present = cache.lookup::<_, String>(&key, Duration::from_secs(20)).unwrap(); 960 | assert_eq!(present.unwrap().0, val_a); 961 | let present_scoped = cache_scoped.lookup::<_, String>(&key, Duration::from_secs(20)).unwrap(); 962 | assert_eq!(present_scoped.unwrap().0, val_b); 963 | } 964 | 965 | #[test] 966 | fn cleanup() { 967 | let dir = TestDir::temp(); 968 | let key = "foo".to_string(); 969 | let val = "A".to_string(); 970 | let cache = Cache::new(dir.root()); 971 | 972 | cache.store(&key, &val, Duration::from_secs(5)).unwrap(); 973 | make_dir_stale(dir.root(), Duration::from_secs(10)).unwrap(); 974 | cache.cleanup().unwrap(); 975 | 976 | assert_eq!(dir_contents(dir.root()), ["last_cleanup"]); // keys and data dirs are now empty 977 | 978 | let absent = cache.lookup::<_, String>(&key, Duration::from_secs(20)).unwrap(); 979 | assert!(absent.is_none()); 980 | } 981 | } 982 | 983 | /// Holds information about the cache status of a given command. 984 | #[derive(Debug, Copy, Clone)] 985 | pub enum CacheStatus { 986 | /// Command was found in the cache. Contains the time the returned invocation was cached. 987 | Hit(SystemTime), 988 | /// Command was not found in the cache and was executed. Contains the execution time of the 989 | /// subprocess. 990 | Miss(Duration), 991 | } 992 | 993 | #[cfg(test)] 994 | impl CacheStatus { 995 | // Note these functions are intentionally not public for now. They're only currently needed to 996 | // make assertions shorter, and should be able to be removed once assert_matches #82775 is 997 | // stable. Can be made public if other use-cases arise. 998 | fn is_hit(&self) -> bool { match self { CacheStatus::Hit(_) => true, CacheStatus::Miss(_) => false, } } 999 | fn is_miss(&self) -> bool { match self { CacheStatus::Hit(_) => false, CacheStatus::Miss(_) => true, } } 1000 | } 1001 | 1002 | /// Returns, if available on this platform, an identifier that uniquely represents the current user. 1003 | /// 1004 | /// This value is only used to disambiguate cache directories in order to support multiple users. 1005 | /// It should not be used to authenticate or validate a caller has access to a given cache entry, 1006 | /// OS-level mechanisms such as directory permissions must be used instead. 1007 | // 1008 | // cfg() options drawn from the set of libc environments with a geteuid() function, see 1009 | // https://github.com/search?q=repo%3Arust-lang%2Flibc+geteuid%28%29&type=code and 1010 | // https://github.com/rust-lang/libc/blob/main/src/lib.rs 1011 | #[cfg(any(unix, target_os = "fuchsia", target_os = "vxworks"))] 1012 | fn user_id() -> Option { 1013 | // SAFETY: geteuid is documented to "always [be] successful and never modify errno." 1014 | Some(unsafe { libc::geteuid() }) 1015 | } 1016 | 1017 | #[cfg(not(any(unix, target_os = "fuchsia", target_os = "vxworks")))] 1018 | fn user_id() -> Option { None } 1019 | 1020 | /// This struct is the main API entry point for the `bkt` library, allowing callers to invoke and 1021 | /// cache subprocesses for later reuse. 1022 | /// 1023 | /// Example: 1024 | /// 1025 | /// ```no_run 1026 | /// # fn main() -> anyhow::Result<()> { 1027 | /// # use std::time::Duration; 1028 | /// let bkt = bkt::Bkt::in_tmp()?; 1029 | /// let cmd = bkt::CommandDesc::new(["curl", "https://expensive.api/foo"]); 1030 | /// let (result, age) = bkt.retrieve(&cmd, Duration::from_secs(60*60))?; 1031 | /// println!("Retrieved: {:?}\nAge: {:?}", result, age); 1032 | /// # Ok(()) } 1033 | /// ``` 1034 | #[derive(Clone, Debug)] 1035 | pub struct Bkt { 1036 | cache: Cache, 1037 | cleanup_on_refresh: bool, 1038 | } 1039 | 1040 | impl Bkt { 1041 | fn temp_dir() -> PathBuf { 1042 | std::env::var_os("BKT_TMPDIR").map(PathBuf::from).unwrap_or_else(std::env::temp_dir) 1043 | } 1044 | 1045 | /// Creates a new Bkt instance using the [`std::env::temp_dir`] as the cache location. If a 1046 | /// `BKT_TMPDIR` environment variable is set that value will be preferred. 1047 | /// 1048 | /// # Errors 1049 | /// 1050 | /// If preparing the tmp cache directory fails. 1051 | pub fn in_tmp() -> Result { 1052 | Bkt::create(Bkt::temp_dir()) 1053 | } 1054 | 1055 | /// Creates a new Bkt instance. 1056 | /// 1057 | /// The given `root_dir` will be used as the parent directory of the cache. It's recommended 1058 | /// this directory be in a tmpfs partition, on an SSD, or similar, so operations are fast. 1059 | /// 1060 | /// # Errors 1061 | /// 1062 | /// If preparing the cache directory under `root_dir` fails. 1063 | pub fn create(root_dir: PathBuf) -> Result { 1064 | // Note the cache is invalidated when the minor version changes 1065 | // TODO use separate directories per user, like bash-cache 1066 | // See https://stackoverflow.com/q/57951893/113632 1067 | let user_suffix = user_id().map(|id| format!("-u{}", id)).unwrap_or_else(String::new); 1068 | let dir_name = format!("bkt-{}.{}-cache{}", env!("CARGO_PKG_VERSION_MAJOR"), env!("CARGO_PKG_VERSION_MINOR"), user_suffix); 1069 | let cache_dir = root_dir.join(dir_name); 1070 | Bkt::restrict_dir(&cache_dir) 1071 | .with_context(|| format!("Failed to set permissions on {}", cache_dir.display()))?; 1072 | Ok(Bkt { 1073 | cache: Cache::new(&cache_dir), 1074 | cleanup_on_refresh: true, 1075 | }) 1076 | } 1077 | 1078 | /// Associates a scope with this Bkt instance, causing it to namespace its cache keys so that 1079 | /// they do not collide with other instances using the same cache directory. This is useful when 1080 | /// separate applications could potentially invoke the same commands but should not share a 1081 | /// cache. Consider using the application's name, PID, and/or a timestamp in order to create a 1082 | /// sufficiently unique namespace. 1083 | pub fn scoped(mut self, scope: String) -> Self { 1084 | self.cache = self.cache.scoped(scope); 1085 | self 1086 | } 1087 | 1088 | /// By default a background cleanup thread runs on cache misses and calls to [`Bkt::refresh()`] 1089 | /// to remove stale data. You may prefer to manage cleanup yourself if you expect frequent cache 1090 | /// misses and want to minimize the number of threads being created. See [`Bkt::cleanup_once()`] 1091 | /// and [`Bkt::cleanup_thread()`] if you set this to `false`. 1092 | pub fn cleanup_on_refresh(mut self, cleanup: bool) -> Self { 1093 | self.cleanup_on_refresh = cleanup; 1094 | self 1095 | } 1096 | 1097 | #[cfg(not(unix))] 1098 | fn restrict_dir(_cache_dir: &Path) -> Result<()> { Ok(()) } 1099 | #[cfg(unix)] 1100 | fn restrict_dir(cache_dir: &Path) -> Result<()> { 1101 | use std::os::unix::fs::PermissionsExt; 1102 | if !cache_dir.exists() { 1103 | std::fs::create_dir_all(cache_dir)?; 1104 | let metadata = std::fs::metadata(cache_dir)?; 1105 | let mut permissions = metadata.permissions(); 1106 | permissions.set_mode(0o700); // Only accessible to current user 1107 | std::fs::set_permissions(cache_dir, permissions)?; 1108 | } 1109 | Ok(()) 1110 | } 1111 | 1112 | // Executes the given command, capturing its output and exit code in the returned Invocation. 1113 | // If output_streams is present the output of the command is _also_ written to these streams 1114 | // concurrently, in order to support displaying a command's output while simultaneously caching 1115 | // it (instead of waiting for the command to complete before outputting anything). 1116 | fn execute_subprocess( 1117 | cmd: impl Into, 1118 | output_streams: Option<(impl Write+Send, impl Write+Send)> 1119 | ) -> Result { 1120 | fn maybe_tee(mut source: impl Read, mut sink: Option) -> std::io::Result> { 1121 | let mut ret = Vec::new(); 1122 | 1123 | // This initialization can be avoided (safely) once 1124 | // https://github.com/rust-lang/rust/issues/78485 is stable. 1125 | let mut buf = [0u8; 1024 * 10]; 1126 | loop { 1127 | let num_read = source.read(&mut buf)?; 1128 | if num_read == 0 { 1129 | break; 1130 | } 1131 | 1132 | let buf = &buf[..num_read]; 1133 | if let Some(ref mut sink) = sink { 1134 | sink.write_all(buf)?; 1135 | sink.flush()?; 1136 | } 1137 | ret.extend(buf); 1138 | } 1139 | Ok(ret) 1140 | } 1141 | 1142 | let (out_sink, err_sink) = match output_streams { 1143 | Some((out, err)) => (Some(out), Some(err)), 1144 | None => (None, None), 1145 | }; 1146 | 1147 | let mut command: std::process::Command = cmd.into(); 1148 | use std::process::Stdio; 1149 | let command = command.stdout(Stdio::piped()).stderr(Stdio::piped()); 1150 | 1151 | let start = std::time::Instant::now(); 1152 | let mut child = command.spawn().with_context(|| format!("Failed to run command: {:?}", command))?; 1153 | 1154 | let child_out = child.stdout.take().ok_or(anyhow!("cannot attach to child stdout"))?; 1155 | let child_err = child.stderr.take().ok_or(anyhow!("cannot attach to child stderr"))?; 1156 | 1157 | // Using scoped threads means we can take a Write+Send instead of a W+S+'static, allowing 1158 | // callers to pass mutable references (such as `&mut Vec`). See also 1159 | // https://stackoverflow.com/q/32750829/113632 1160 | let (stdout, stderr) = std::thread::scope(|s| { 1161 | let thread_out = s.spawn(|| maybe_tee(child_out, out_sink)); 1162 | let thread_err = s.spawn(|| maybe_tee(child_err, err_sink)); 1163 | let stdout = thread_out.join().expect("child stdout thread failed to join").context("stdout pipe failed")?; 1164 | let stderr = thread_err.join().expect("child stderr thread failed to join").context("stderr pipe failed")?; 1165 | anyhow::Ok((stdout, stderr)) 1166 | })?; 1167 | 1168 | let status = child.wait()?; 1169 | let runtime = start.elapsed(); 1170 | 1171 | Ok(Invocation { 1172 | stdout, 1173 | stderr, 1174 | // TODO handle signals, see https://stackoverflow.com/q/66272686 1175 | exit_code: status.code().unwrap_or(126), 1176 | runtime, 1177 | }) 1178 | } 1179 | 1180 | /// Looks up the given command in Bkt's cache. If found (and newer than the given TTL) returns 1181 | /// the cached invocation. If stale or not found the command is executed and the result is 1182 | /// cached and then returned. 1183 | /// 1184 | /// The second element in the returned tuple reports whether or not the invocation was cached 1185 | /// and includes information such as the cached data's age or the executed subprocess' runtime. 1186 | /// 1187 | /// # Errors 1188 | /// 1189 | /// If looking up, deserializing, executing, or serializing the command fails. This generally 1190 | /// reflects a user error such as an invalid command. 1191 | pub fn retrieve(&self, command: T, ttl: Duration) -> Result<(Invocation, CacheStatus)> where 1192 | T: TryInto, 1193 | anyhow::Error: From, // https://stackoverflow.com/a/72627328 1194 | { 1195 | self.retrieve_impl(command, ttl, None::<(std::io::Stdout, std::io::Stderr)>) 1196 | } 1197 | 1198 | /// **Experimental** This method is subject to change. 1199 | /// 1200 | /// Looks up the given command in Bkt's cache. If found (and newer than the given TTL) returns 1201 | /// the cached invocation. If stale or not found the command is executed and the result is 1202 | /// cached and then returned. Additionally, the invocation's stdout and stderr are written to 1203 | /// the given streams in real time. 1204 | /// 1205 | /// The second element in the returned tuple reports whether or not the invocation was cached 1206 | /// and includes information such as the cached data's age or the executed subprocess' runtime. 1207 | /// 1208 | /// # Errors 1209 | /// 1210 | /// If looking up, deserializing, executing, or serializing the command fails. This generally 1211 | /// reflects a user error such as an invalid command. 1212 | pub fn retrieve_streaming( 1213 | &self, 1214 | command: T, 1215 | ttl: Duration, 1216 | stdout_sink: impl Write+Send, 1217 | stderr_sink: impl Write+Send, 1218 | ) -> Result<(Invocation, CacheStatus)> where 1219 | T: TryInto, 1220 | anyhow::Error: From, // https://stackoverflow.com/a/72627328 1221 | { 1222 | self.retrieve_impl(command, ttl, Some((stdout_sink, stderr_sink))) 1223 | } 1224 | 1225 | fn retrieve_impl( 1226 | &self, command: T, 1227 | ttl: Duration, 1228 | output_streams: Option<(impl Write+Send, impl Write+Send)> 1229 | ) -> Result<(Invocation, CacheStatus)> where 1230 | T: TryInto, 1231 | anyhow::Error: From, // https://stackoverflow.com/a/72627328 1232 | { 1233 | let command = command.try_into()?; 1234 | let cached = self.cache.lookup(&command, ttl).context("Cache lookup failed")?; 1235 | let result = match cached { 1236 | Some((inv, mtime)) => { 1237 | let inv: Invocation = inv; //The if-let confuses type inference for some reason, if that's commented out this line isn't needed 1238 | if let Some((mut stdout, mut stderr)) = output_streams { 1239 | stdout.write_all(inv.stdout())?; 1240 | stderr.write_all(inv.stderr())?; 1241 | } 1242 | (inv, CacheStatus::Hit(mtime)) 1243 | }, 1244 | None => { 1245 | let cleanup_hook = self.maybe_cleanup_once(); 1246 | let start = std::time::Instant::now(); 1247 | let result = Bkt::execute_subprocess(&command, output_streams).context("Subprocess execution failed")?; 1248 | let runtime = start.elapsed(); 1249 | if command.persist_failures || result.exit_code == 0 { 1250 | self.cache.store(&command, &result, ttl).context("Cache write failed")?; 1251 | } 1252 | Bkt::join_cleanup_thread(cleanup_hook); 1253 | (result, CacheStatus::Miss(runtime)) 1254 | } 1255 | }; 1256 | Ok(result) 1257 | } 1258 | 1259 | /// Unconditionally executes the given command and caches the invocation for the given TTL. 1260 | /// This can be used to "warm" the cache so that subsequent calls to `execute` are fast. 1261 | /// 1262 | /// The second element in the returned tuple is the subprocess' execution time. 1263 | /// 1264 | /// # Errors 1265 | /// 1266 | /// If executing or serializing the command fails. This generally reflects a user error such as 1267 | /// an invalid command. 1268 | pub fn refresh(&self, command: T, ttl: Duration) -> Result<(Invocation, Duration)> where 1269 | T: TryInto, 1270 | anyhow::Error: From, // https://stackoverflow.com/a/72627328 1271 | { 1272 | self.refresh_impl(command, ttl, None::<(std::io::Stdout, std::io::Stderr)>) 1273 | } 1274 | 1275 | /// Unconditionally executes the given command and caches the invocation for the given TTL. 1276 | /// This can be used to "warm" the cache so that subsequent calls to `execute` are fast. 1277 | /// The invocation's stdout and stderr are written to the given streams in real time in addition 1278 | /// to being cached. 1279 | /// 1280 | /// The second element in the returned tuple is the subprocess' execution time. 1281 | /// 1282 | /// # Errors 1283 | /// 1284 | /// If executing or serializing the command fails. This generally reflects a user error such as 1285 | /// an invalid command. 1286 | pub fn refresh_streaming( 1287 | &self, 1288 | command: T, 1289 | ttl: Duration, 1290 | stdout_sink: impl Write+Send, 1291 | stderr_sink: impl Write+Send, 1292 | ) -> Result<(Invocation, Duration)> where 1293 | T: TryInto, 1294 | anyhow::Error: From, // https://stackoverflow.com/a/72627328 1295 | { 1296 | self.refresh_impl(command, ttl, Some((stdout_sink, stderr_sink))) 1297 | } 1298 | 1299 | fn refresh_impl( 1300 | &self, 1301 | command: T, 1302 | ttl: Duration, 1303 | output_streams: Option<(impl Write+Send, impl Write+Send)> 1304 | ) -> Result<(Invocation, Duration)> where 1305 | T: TryInto, 1306 | anyhow::Error: From, // https://stackoverflow.com/a/72627328 1307 | { 1308 | let command = command.try_into()?; 1309 | let cleanup_hook = self.maybe_cleanup_once(); 1310 | let start = std::time::Instant::now(); 1311 | let result = Bkt::execute_subprocess(&command, output_streams).context("Subprocess execution failed")?; 1312 | let runtime = start.elapsed(); 1313 | if command.persist_failures || result.exit_code == 0 { 1314 | self.cache.store(&command, &result, ttl).context("Cache write failed")?; 1315 | } 1316 | Bkt::join_cleanup_thread(cleanup_hook); 1317 | Ok((result, runtime)) 1318 | } 1319 | 1320 | /// Clean the cache in the background on a cache-miss; this will usually 1321 | /// be much faster than the actual background process. 1322 | fn maybe_cleanup_once(&self) -> Option>> { 1323 | if self.cleanup_on_refresh { 1324 | Some(self.cleanup_once()) 1325 | } else { 1326 | None 1327 | } 1328 | } 1329 | 1330 | fn join_cleanup_thread(cleanup_hook: Option>>) { 1331 | if let Some(cleanup_hook) = cleanup_hook { 1332 | if let Err(e) = cleanup_hook.join().expect("cleanup thread panicked") { 1333 | eprintln!("bkt: cache cleanup failed: {:?}", e); 1334 | } 1335 | } 1336 | } 1337 | 1338 | /// Initiates a single cleanup cycle of the cache, removing stale data in the background. This 1339 | /// should be invoked by short-lived applications early in their lifecycle and then joined 1340 | /// before exiting. `execute_and_cleanup` can be used instead to only trigger a cleanup on a 1341 | /// cache miss, avoiding the extra work on cache hits. Long-running applications should 1342 | /// typically prefer `cleanup_thread` which triggers periodic cleanups. 1343 | /// 1344 | /// # Errors 1345 | /// 1346 | /// The Result returned by joining indicates whether there were any unexpected errors while 1347 | /// cleaning up. It should be Ok in all normal circumstances. 1348 | // TODO if cleanup should always succeed (or no-op) why return Result? 1349 | pub fn cleanup_once(&self) -> std::thread::JoinHandle> { 1350 | let cache = self.cache.clone(); 1351 | std::thread::spawn(move || { cache.cleanup() }) 1352 | } 1353 | 1354 | /// Initiates an infinite-loop thread that triggers periodic cleanups of the cache, removing 1355 | /// stale data in the background. It is not necessary to `join()` this thread, it will 1356 | /// be terminated when the main thread exits. 1357 | pub fn cleanup_thread(&self) -> std::thread::JoinHandle<()> { 1358 | let cache = self.cache.clone(); 1359 | std::thread::spawn(move || { 1360 | // Hard-coded for now, could be made configurable if needed 1361 | let poll_duration = Duration::from_secs(60); 1362 | loop { 1363 | if let Err(e) = cache.cleanup() { 1364 | eprintln!("Bkt: cache cleanup failed: {:?}", e); 1365 | } 1366 | std::thread::sleep(poll_duration); 1367 | } 1368 | }) 1369 | } 1370 | } 1371 | 1372 | // Note: most functionality of Bkt is tested via cli.rs 1373 | #[cfg(test)] 1374 | mod bkt_tests { 1375 | use super::*; 1376 | use test_dir::{TestDir, DirBuilder, FileType}; 1377 | 1378 | // Just validating that Bkt can be cloned to create siblings with different settings. 1379 | #[test] 1380 | #[allow(clippy::redundant_clone)] 1381 | fn cloneable() { 1382 | let dir = TestDir::temp(); 1383 | let bkt = Bkt::create(dir.path("cache")).unwrap(); 1384 | let _scoped = bkt.clone().scoped("scope".into()); 1385 | let _no_cleanup = bkt.clone().cleanup_on_refresh(false); 1386 | } 1387 | 1388 | #[test] 1389 | fn cached() { 1390 | let dir = TestDir::temp(); 1391 | let file = dir.path("file"); 1392 | let cmd = CommandDesc::new( 1393 | ["bash", "-c", r#"echo "$RANDOM" > "${1:?}"; cat "${1:?}""#, "arg0", file.to_str().unwrap()]); 1394 | let bkt = Bkt::create(dir.path("cache")).unwrap(); 1395 | let (first_inv, first_status) = bkt.retrieve(&cmd, Duration::from_secs(10)).unwrap(); 1396 | assert!(first_status.is_miss()); 1397 | 1398 | for _ in 1..3 { 1399 | let (subsequent_inv, subsequent_status) = bkt.retrieve(&cmd, Duration::from_secs(10)).unwrap(); 1400 | assert_eq!(first_inv, subsequent_inv); 1401 | assert!(subsequent_status.is_hit()); 1402 | } 1403 | } 1404 | 1405 | #[test] 1406 | fn discard_failures() { 1407 | let dir = TestDir::temp(); 1408 | let output = dir.path("output"); 1409 | let code = dir.path("code"); 1410 | 1411 | let cmd = CommandDesc::new( 1412 | ["bash", "-c", r#"cat "${1:?}"; exit "$(< "${2:?}")""#, "arg0", output.to_str().unwrap(), code.to_str().unwrap()]) 1413 | .with_discard_failures(true); 1414 | let bkt = Bkt::create(dir.path("cache")).unwrap(); 1415 | 1416 | write!(File::create(&output).unwrap(), "A").unwrap(); 1417 | write!(File::create(&code).unwrap(), "10").unwrap(); 1418 | let (first_inv, first_status) = bkt.retrieve(&cmd, Duration::from_secs(10)).unwrap(); 1419 | assert_eq!(first_inv.exit_code, 10, "{:?}\nstderr:{}", first_inv, first_inv.stderr_utf8()); 1420 | assert_eq!(first_inv.stdout_utf8(), "A"); 1421 | assert!(first_status.is_miss()); 1422 | 1423 | write!(File::create(&output).unwrap(), "B").unwrap(); 1424 | let (subsequent_inv, subsequent_status) = bkt.retrieve(&cmd, Duration::from_secs(10)).unwrap(); 1425 | // call is not cached 1426 | assert_eq!(subsequent_inv.stdout_utf8(), "B"); 1427 | assert!(subsequent_status.is_miss()); 1428 | 1429 | write!(File::create(&output).unwrap(), "C").unwrap(); 1430 | write!(File::create(&code).unwrap(), "0").unwrap(); 1431 | let (success_inv, success_status) = bkt.retrieve(&cmd, Duration::from_secs(10)).unwrap(); 1432 | assert_eq!(success_inv.exit_code, 0); 1433 | assert_eq!(success_inv.stdout_utf8(), "C"); 1434 | assert!(success_status.is_miss()); 1435 | 1436 | write!(File::create(&output).unwrap(), "D").unwrap(); 1437 | let (cached_inv, cached_status) = bkt.retrieve(&cmd, Duration::from_secs(10)).unwrap(); 1438 | assert_eq!(success_inv, cached_inv); 1439 | assert!(cached_status.is_hit()); 1440 | } 1441 | 1442 | #[test] 1443 | fn streaming_same_output() { 1444 | let dir = TestDir::temp(); 1445 | 1446 | let cmd = CommandDesc::new(["bash", "-c", r#"echo StdOut; echo StdErr >&2"#]); 1447 | let bkt = Bkt::create(dir.path("cache")).unwrap(); 1448 | 1449 | let mut stdout = Vec::new(); 1450 | let mut stderr = Vec::new(); 1451 | let (res, stat) = bkt.retrieve_streaming( 1452 | &cmd, Duration::from_secs(10), &mut stdout, &mut stderr).unwrap(); 1453 | assert!(stat.is_miss()); 1454 | assert_eq!(&stdout, &res.stdout); 1455 | assert_eq!(&stderr, &res.stderr); 1456 | assert_eq!(res.stdout_utf8(), "StdOut\n"); 1457 | assert_eq!(res.stderr_utf8(), "StdErr\n"); 1458 | 1459 | let mut stdout = Vec::new(); 1460 | let mut stderr = Vec::new(); 1461 | let (res, stat) = bkt.retrieve_streaming( 1462 | &cmd, Duration::from_secs(10), &mut stdout, &mut stderr).unwrap(); 1463 | assert!(stat.is_hit()); 1464 | assert_eq!(&stdout, &res.stdout); 1465 | assert_eq!(&stderr, &res.stderr); 1466 | assert_eq!(res.stdout_utf8(), "StdOut\n"); 1467 | assert_eq!(res.stderr_utf8(), "StdErr\n"); 1468 | } 1469 | 1470 | #[test] 1471 | fn streaming_refresh() { 1472 | let dir = TestDir::temp(); 1473 | 1474 | let cmd = CommandDesc::new(["bash", "-c", r#"echo StdOut; echo StdErr >&2"#]); 1475 | let bkt = Bkt::create(dir.path("cache")).unwrap(); 1476 | 1477 | let mut stdout = Vec::new(); 1478 | let mut stderr = Vec::new(); 1479 | let (res, _) = bkt.refresh_streaming( 1480 | &cmd, Duration::from_secs(10), &mut stdout, &mut stderr).unwrap(); 1481 | 1482 | assert_eq!(&stdout, &res.stdout); 1483 | assert_eq!(&stderr, &res.stderr); 1484 | assert_eq!(res.stdout_utf8(), "StdOut\n"); 1485 | assert_eq!(res.stderr_utf8(), "StdErr\n"); 1486 | } 1487 | 1488 | // Just a proof-of-concept that streaming to files works as well. 1489 | #[test] 1490 | fn streaming_to_file() { 1491 | let dir = TestDir::temp(); 1492 | 1493 | let cmd = CommandDesc::new(["bash", "-c", r#"echo StdOut; echo StdErr >&2"#]); 1494 | let bkt = Bkt::create(dir.path("cache")).unwrap(); 1495 | 1496 | let out = File::create(dir.path("out")).unwrap(); 1497 | let err = File::create(dir.path("err")).unwrap(); 1498 | let _ = bkt.retrieve_streaming( 1499 | &cmd, Duration::from_secs(10), out, err).unwrap(); 1500 | 1501 | assert_eq!(std::fs::read_to_string(dir.path("out")).unwrap(), "StdOut\n"); 1502 | assert_eq!(std::fs::read_to_string(dir.path("err")).unwrap(), "StdErr\n"); 1503 | } 1504 | 1505 | #[test] 1506 | fn with_working_dir() { 1507 | let dir = TestDir::temp().create("wd", FileType::Dir); 1508 | let work_dir = dir.path("wd"); 1509 | let cmd = CommandDesc::new(["bash", "-c", "echo Hello World > file"]); 1510 | let state = cmd.capture_state().unwrap().with_working_dir(&work_dir); 1511 | let bkt = Bkt::create(dir.path("cache")).unwrap(); 1512 | let (result, status) = bkt.retrieve(state, Duration::from_secs(10)).unwrap(); 1513 | assert_eq!(result.stderr_utf8(), ""); 1514 | assert_eq!(result.exit_code(), 0); 1515 | assert_eq!(std::fs::read_to_string(work_dir.join("file")).unwrap(), "Hello World\n"); 1516 | assert!(status.is_miss()); 1517 | } 1518 | 1519 | #[test] 1520 | // TODO the JSON serializer doesn't support OsString keys, CommandState needs a custom 1521 | // Serializer (for feature="debug", at least) - see https://stackoverflow.com/q/51276896 1522 | // and https://github.com/serde-rs/json/issues/809 1523 | #[cfg(not(feature = "debug"))] 1524 | fn with_env() { 1525 | let dir = TestDir::temp().create("dir", FileType::Dir); 1526 | let cmd = CommandDesc::new(["bash", "-c", r#"echo "FOO:${FOO:?}""#]).capture_state().unwrap() 1527 | .with_env("FOO", "bar"); 1528 | let bkt = Bkt::create(dir.path("cache")).unwrap(); 1529 | let (result, status) = bkt.retrieve(cmd, Duration::from_secs(10)).unwrap(); 1530 | assert_eq!(result.stderr_utf8(), ""); 1531 | assert_eq!(result.exit_code(), 0); 1532 | assert_eq!(result.stdout_utf8(), "FOO:bar\n"); 1533 | assert!(status.is_miss()); 1534 | } 1535 | 1536 | #[test] 1537 | fn with_modtime() { 1538 | let dir = TestDir::temp().create("dir", FileType::Dir); 1539 | let file = dir.path("file"); 1540 | let cmd = CommandDesc::new(["cat", file.to_str().unwrap()]); 1541 | let cmd_modtime = cmd.clone().with_modtime(&file); 1542 | let bkt = Bkt::create(dir.path("cache")).unwrap(); 1543 | write!(File::create(&file).unwrap(), "A").unwrap(); 1544 | let (result_a, status_a) = bkt.retrieve(&cmd, Duration::from_secs(10)).unwrap(); 1545 | let (result_mod_a, status_mod_a) = bkt.retrieve(&cmd_modtime, Duration::from_secs(10)).unwrap(); 1546 | assert!(status_a.is_miss()); 1547 | assert!(status_mod_a.is_miss()); 1548 | 1549 | // Update the file _and_ reset its modtime because modtime is not consistently updated e.g. 1550 | // if writes are too close together. 1551 | write!(File::create(&file).unwrap(), "B").unwrap(); 1552 | filetime::set_file_mtime(&file, filetime::FileTime::from_system_time(SystemTime::now() - Duration::from_secs(15))).unwrap(); 1553 | 1554 | let (result_b, status_b) = bkt.retrieve(&cmd, Duration::from_secs(10)).unwrap(); 1555 | let (result_mod_b, status_mod_b) = bkt.retrieve(&cmd_modtime, Duration::from_secs(10)).unwrap(); 1556 | assert_eq!(result_a.stdout_utf8(), result_b.stdout_utf8()); // cached 1557 | assert!(status_b.is_hit()); 1558 | assert_eq!(result_mod_a.stdout_utf8(), "A"); 1559 | assert_eq!(result_mod_b.stdout_utf8(), "B"); 1560 | assert!(status_mod_b.is_miss()); 1561 | } 1562 | } 1563 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use std::ffi::OsString; 2 | use std::io::{self, stderr, stdout, Write}; 3 | use std::path::PathBuf; 4 | use std::process::{Command, exit, Stdio}; 5 | use std::time::Duration; 6 | 7 | use anyhow::{Context, Result}; 8 | use clap::error::{ContextKind, ContextValue, ErrorKind}; 9 | use clap::Parser; 10 | 11 | use bkt::{CommandDesc, Bkt}; 12 | 13 | // BrokenPipe errors are uninteresting for command line applications; just stop writing to that 14 | // descriptor and, if appropriate, exit. Rust doesn't have good support for this presently, see 15 | // https://github.com/rust-lang/rust/issues/46016 16 | // TODO consider moving this into the Bkt library, either as a pub class or an implementation detail 17 | // of Bkt, so that library callers don't need to deal with BrokenPipe either. 18 | struct DisregardBrokenPipe(Box); 19 | 20 | impl Write for DisregardBrokenPipe { 21 | fn write(&mut self, buf: &[u8]) -> io::Result { 22 | match self.0.write(buf) { 23 | Err(e) if e.kind() == std::io::ErrorKind::BrokenPipe => Ok(0), 24 | r => r, 25 | } 26 | } 27 | 28 | // Custom implementation of write_all() that treats Ok(0) as success rather than an error as the 29 | // default implementation does. 30 | // TODO perhaps this should be inlined into maybe_tee() instead of calling write_all() 31 | fn write_all(&mut self, mut buf: &[u8]) -> io::Result<()> { 32 | while !buf.is_empty() { 33 | match self.write(buf) { 34 | Ok(0) => return Ok(()), 35 | Ok(n) => buf = &buf[n..], 36 | Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {}, 37 | Err(e) => return Err(e), 38 | } 39 | } 40 | Ok(()) 41 | } 42 | 43 | fn flush(&mut self) -> io::Result<()> { 44 | match self.0.flush() { 45 | Err(e) if e.kind() == std::io::ErrorKind::BrokenPipe => Ok(()), 46 | r => r, 47 | } 48 | } 49 | } 50 | 51 | // Re-invokes bkt with --force and then discards the subprocess, causing the cache 52 | // to be refreshed asynchronously. 53 | fn force_update_async() -> Result<()> { 54 | let mut args = std::env::args_os(); 55 | let arg0 = args.next().expect("Must always be a 0th argument"); 56 | let mut command = match std::env::current_exe() { 57 | Ok(path) => Command::new(path), 58 | Err(_) => Command::new(arg0), 59 | }; 60 | // Discard stdout/err so the calling process doesn't wait for them to close. 61 | // Intentionally drop the returned Child; after this process exits the 62 | // child process will continue running in the background. 63 | command.arg("--force").args(args.filter(|a| a != "--warm")) 64 | .stdout(Stdio::null()).stderr(Stdio::null()) 65 | .spawn().context("Failed to start background process")?; 66 | Ok(()) 67 | } 68 | 69 | // Runs bkt after main() handles flag parsing 70 | fn run(cli: Cli) -> Result { 71 | let ttl: Duration = cli.ttl.into(); 72 | let stale: Option = cli.stale.map(Into::into); 73 | 74 | assert!(!ttl.is_zero(), "--ttl cannot be zero"); 75 | if let Some(stale) = stale { 76 | assert!(!stale.is_zero(), "--stale cannot be zero"); 77 | assert!(stale < ttl, "--stale must be less than --ttl"); 78 | } 79 | 80 | let mut bkt = match cli.cache_dir { 81 | Some(cache_dir) => Bkt::create(cache_dir)?, 82 | None => Bkt::in_tmp()?, 83 | }; 84 | if let Some(scope) = cli.scope { 85 | bkt = bkt.scoped(scope); 86 | } 87 | 88 | let mut command = CommandDesc::new(cli.command); 89 | 90 | if cli.cwd { 91 | command = command.with_cwd(); 92 | } 93 | 94 | let envs = cli.env; 95 | if !envs.is_empty() { 96 | command = command.with_envs(&envs); 97 | } 98 | 99 | let files = cli.modtime; 100 | if !files.is_empty() { 101 | command = command.with_modtimes(&files); 102 | } 103 | 104 | if cli.discard_failures { 105 | command = command.with_discard_failures(true); 106 | } 107 | 108 | if cli.warm && !cli.force { 109 | force_update_async()?; 110 | return Ok(0); 111 | } 112 | 113 | let invocation = if cli.force { 114 | bkt.refresh_streaming(&command, ttl, DisregardBrokenPipe( 115 | Box::new(stdout())), DisregardBrokenPipe(Box::new(stderr())))?.0 116 | } else { 117 | let (invocation, status) = bkt.retrieve_streaming( 118 | &command, ttl, DisregardBrokenPipe(Box::new(stdout())), DisregardBrokenPipe(Box::new(stderr())))?; 119 | if let Some(stale) = stale { 120 | if let bkt::CacheStatus::Hit(cached_at) = status { 121 | if cached_at.elapsed().unwrap_or(Duration::MAX) > stale { 122 | force_update_async()?; 123 | } 124 | } 125 | } 126 | invocation 127 | }; 128 | 129 | Ok(invocation.exit_code()) 130 | } 131 | 132 | #[derive(Debug, Parser)] 133 | #[command(about, version)] 134 | struct Cli { 135 | /// The command to run 136 | #[arg(required = true, last = true)] 137 | command: Vec, 138 | 139 | /// Duration the cached result will be valid for 140 | #[arg(long, value_name = "DURATION", visible_alias = "time-to-live", env = "BKT_TTL")] 141 | ttl: humantime::Duration, 142 | 143 | /// Duration after which the result will be asynchronously refreshed 144 | #[arg(long, value_name = "DURATION", conflicts_with = "warm")] 145 | stale: Option, 146 | 147 | /// Asynchronously execute and cache the given command, even if it's already cached 148 | #[arg(long)] 149 | warm: bool, 150 | 151 | /// Execute and cache the given command, even if it's already cached 152 | #[arg(long, conflicts_with = "warm")] 153 | force: bool, 154 | 155 | /// Includes the current working directory in the cache key, 156 | /// so that the same command run in different directories caches separately 157 | #[arg(long, visible_alias = "use-working-dir")] 158 | cwd: bool, 159 | 160 | /// Includes the given environment variable in the cache key, 161 | /// so that the same command run with different values for the given variables caches separately 162 | #[arg(long, value_name = "NAME", visible_alias = "use-environment")] 163 | env: Vec, 164 | 165 | /// Includes the last modification time of the given file(s) in the cache key, 166 | /// so that the same command run with different modtimes for the given files caches separately 167 | #[arg(long, value_name = "FILE", visible_alias = "use-file-modtime")] 168 | modtime: Vec, 169 | 170 | /// Don't cache invocations that fail (non-zero exit code). 171 | /// USE CAUTION when passing this flag, as unexpected failures can lead to a spike in invocations 172 | /// which can exacerbate ongoing issues, effectively a DDoS. 173 | #[arg(long)] 174 | discard_failures: bool, 175 | 176 | /// If set, all cached data will be scoped to this value, 177 | /// preventing collisions with commands cached with different scopes 178 | #[arg(long, value_name = "NAME", env = "BKT_SCOPE")] 179 | scope: Option, 180 | 181 | /// The directory under which to persist cached invocations; 182 | /// defaults to the system's temp directory. 183 | /// Setting this to a directory backed by RAM or an SSD, such as a tmpfs partition, 184 | /// will significantly reduce caching overhead. 185 | #[arg(long, value_name = "DIR", env = "BKT_CACHE_DIR")] 186 | cache_dir: Option, 187 | } 188 | 189 | fn main() { 190 | // TODO remove this suggestion in 0.9.0 191 | let mut cli = Cli::try_parse(); 192 | if let Err(err) = cli.as_mut() { 193 | if matches!(err.kind(), ErrorKind::MissingRequiredArgument) { 194 | // https://github.com/clap-rs/clap/discussions/5318 195 | err.insert(ContextKind::Suggested, ContextValue::StyledStrs(vec![[ 196 | "Prior to 0.8.0 --ttl was optional, and defaulted to 60 seconds.", 197 | "To preserve this behavior pass `--ttl=1m` or set `BKT_TTL=1m` in your environment." 198 | ].join(" ").into()])); 199 | } 200 | err.exit(); 201 | } 202 | let cli = cli.expect("Not Err"); 203 | 204 | match run(cli) { 205 | Ok(code) => exit(code), 206 | Err(msg) => { 207 | eprintln!("bkt: {:#}", msg); 208 | exit(127); 209 | } 210 | } 211 | } 212 | -------------------------------------------------------------------------------- /tests/cli.rs: -------------------------------------------------------------------------------- 1 | mod cli { 2 | use std::path::Path; 3 | use std::process::{Command, Stdio}; 4 | use std::time::{SystemTime, Duration}; 5 | 6 | use anyhow::Result; 7 | use test_dir::{TestDir, DirBuilder, FileType}; 8 | use std::fs::File; 9 | use std::io::Read; 10 | 11 | // Bash scripts to pass to -c. 12 | // Avoid depending on external programs. 13 | const COUNT_INVOCATIONS: &str = r#"file=${1:?} lines=0; \ 14 | printf '%s' '.' >> "$file"; \ 15 | read < "$file"; \ 16 | printf '%s' "${#REPLY}";"#; 17 | const PRINT_ARGS: &str = r#"args=("$@"); declare -p args;"#; 18 | const EXIT_WITH: &str = r#"exit "${1:?}";"#; 19 | const EXIT_WITH_ENV: &str = r#"exit "${EXIT_WITH:?}";"#; 20 | const AWAIT_AND_TOUCH: &str = r#"echo awaiting; \ 21 | until [[ -e "${1:?}" ]]; do sleep .1; done; \ 22 | echo > "${2:?}";"#; 23 | 24 | fn bkt>(cache_dir: P) -> Command { 25 | let test_exe = std::env::current_exe().expect("Could not resolve test location"); 26 | let dir = test_exe 27 | .parent().expect("Could not resolve test directory") 28 | .parent().expect("Could not resolve binary directory"); 29 | let mut path = dir.join("bkt"); 30 | if !path.exists() { 31 | path.set_extension("exe"); 32 | } 33 | assert!(path.exists(), "Could not find bkt binary in {:?}", dir); 34 | let mut bkt = Command::new(&path); 35 | // Set a TTL here rather than in every test - tests that care about the TTL should override 36 | bkt.env("BKT_TTL", "5s"); 37 | bkt.env("BKT_TMPDIR", cache_dir.as_ref().as_os_str()); 38 | bkt 39 | } 40 | 41 | fn sudo(cmd: &mut Command) -> Command { 42 | let mut sudo = Command::new("sudo"); 43 | sudo.args(&["-n", "-E"]).arg(cmd.get_program()).args(cmd.get_args()); 44 | for (key, value) in cmd.get_envs() { 45 | match value { 46 | Some(value) => sudo.env(key, value), 47 | None => sudo.env_remove(key), 48 | }; 49 | } 50 | sudo 51 | } 52 | 53 | #[derive(Eq, PartialEq, Debug)] 54 | struct CmdResult { 55 | out: String, 56 | err: String, 57 | status: Option, 58 | } 59 | 60 | impl From for CmdResult { 61 | fn from(output: std::process::Output) -> Self { 62 | CmdResult { 63 | out: std::str::from_utf8(&output.stdout).unwrap().into(), 64 | err: std::str::from_utf8(&output.stderr).unwrap().into(), 65 | status: output.status.code() 66 | } 67 | } 68 | } 69 | 70 | fn run(cmd: &mut Command) -> CmdResult { 71 | cmd.output().unwrap().into() 72 | } 73 | 74 | fn succeed(cmd: &mut Command) -> String { 75 | let result = run(cmd); 76 | if cfg!(feature="debug") { 77 | if !result.err.is_empty() { eprintln!("stderr:\n{}", result.err); } 78 | } else { 79 | // debug writes to stderr, so don't bother checking it in that mode 80 | assert_eq!(result.err, ""); 81 | } 82 | assert_eq!(result.status, Some(0)); 83 | result.out 84 | } 85 | 86 | // Returns once the given file contains different contents than those provided. Panics if the 87 | // file does not change after ~5s. 88 | // 89 | // Note this could return immediately if the file already doesn't contain initial_contents 90 | // (e.g. if the given contents were wrong) because such a check could race. Do additional 91 | // checks prior to waiting if needed. 92 | fn wait_for_contents_to_change>(file: P, initial_contents: &str) { 93 | for _ in 1..50 { 94 | if std::fs::read_to_string(&file).unwrap() != initial_contents { return; } 95 | std::thread::sleep(Duration::from_millis(100)); 96 | } 97 | panic!("Contents of {} did not change", file.as_ref().to_string_lossy()); 98 | } 99 | 100 | fn make_dir_stale>(dir: P, age: Duration) -> Result<()> { 101 | debug_assert!(dir.as_ref().is_dir()); 102 | let desired_time = SystemTime::now() - age; 103 | let stale_time = filetime::FileTime::from_system_time(desired_time); 104 | for entry in std::fs::read_dir(dir)? { 105 | let path = entry?.path(); 106 | let last_modified = std::fs::metadata(&path)?.modified()?; 107 | 108 | if path.is_file() && last_modified > desired_time { 109 | filetime::set_file_mtime(&path, stale_time)?; 110 | } else if path.is_dir() { 111 | make_dir_stale(&path, age)?; 112 | } 113 | } 114 | Ok(()) 115 | } 116 | 117 | fn make_file_stale>(file: P, age: Duration) -> Result<()> { 118 | debug_assert!(file.as_ref().is_file()); 119 | let desired_time = SystemTime::now() - age; 120 | let stale_time = filetime::FileTime::from_system_time(desired_time); 121 | filetime::set_file_mtime(&file, stale_time)?; 122 | Ok(()) 123 | } 124 | 125 | fn join(beg: &[A], tail: &[A]) -> Vec { 126 | beg.iter().chain(tail).cloned().collect() 127 | } 128 | 129 | #[test] 130 | fn help() { 131 | let dir = TestDir::temp(); 132 | let out = succeed(bkt(dir.path("cache")).arg("--help")); 133 | assert!(out.contains("bkt [OPTIONS] --ttl -- ..."), "Was:\n---\n{}\n---", out); 134 | } 135 | 136 | #[test] 137 | fn cached() { 138 | let dir = TestDir::temp(); 139 | let file = dir.path("file"); 140 | let args = ["--", "bash", "-c", COUNT_INVOCATIONS, "arg0", file.to_str().unwrap()]; 141 | let first_result = run(bkt(dir.path("cache")).args(args)); 142 | 143 | for _ in 1..3 { 144 | let subsequent_result = run(bkt(dir.path("cache")).args(args)); 145 | if cfg!(feature="debug") { 146 | assert_eq!(first_result.status, subsequent_result.status); 147 | assert_eq!(first_result.out, subsequent_result.out); 148 | } else { 149 | assert_eq!(first_result, subsequent_result); 150 | } 151 | } 152 | } 153 | 154 | #[test] 155 | fn cache_expires() { 156 | let dir = TestDir::temp(); 157 | let file = dir.path("file"); 158 | let args = ["--", "bash", "-c", COUNT_INVOCATIONS, "arg0", file.to_str().unwrap()]; 159 | let first_result = succeed(bkt(dir.path("cache")).arg("--ttl=1m").args(args)); 160 | assert_eq!(first_result, "1"); 161 | 162 | // Slightly stale is still cached 163 | make_dir_stale(dir.path("cache"), Duration::from_secs(10)).unwrap(); 164 | let subsequent_result = succeed(bkt(dir.path("cache")).arg("--ttl=1m").args(args)); 165 | assert_eq!(first_result, subsequent_result); 166 | 167 | make_dir_stale(dir.path("cache"), Duration::from_secs(120)).unwrap(); 168 | let after_stale_result = succeed(bkt(dir.path("cache")).arg("--ttl=1m").args(args)); 169 | assert_eq!(after_stale_result, "2"); 170 | 171 | // Respects BKT_TTL env var (other tests cover --ttl) 172 | make_dir_stale(dir.path("cache"), Duration::from_secs(10)).unwrap(); 173 | let env_result = succeed(bkt(dir.path("cache")).env("BKT_TTL", "5s").args(args)); 174 | assert_eq!(env_result, "3"); 175 | } 176 | 177 | #[test] 178 | fn cache_expires_separately() { 179 | let dir = TestDir::temp(); 180 | let file1 = dir.path("file1"); 181 | let file2 = dir.path("file2"); 182 | let args1 = ["--ttl=10s", "--", "bash", "-c", COUNT_INVOCATIONS, "arg0", file1.to_str().unwrap()]; 183 | let args2 = ["--ttl=20s", "--", "bash", "-c", COUNT_INVOCATIONS, "arg0", file2.to_str().unwrap()]; 184 | 185 | // first invocation 186 | assert_eq!(succeed(bkt(dir.path("cache")).args(args1)), "1"); 187 | assert_eq!(succeed(bkt(dir.path("cache")).args(args2)), "1"); 188 | 189 | // second invocation, cached 190 | assert_eq!(succeed(bkt(dir.path("cache")).args(args1)), "1"); 191 | assert_eq!(succeed(bkt(dir.path("cache")).args(args2)), "1"); 192 | 193 | // only shorter TTL is invalidated 194 | make_dir_stale(dir.path("cache"), Duration::from_secs(15)).unwrap(); 195 | assert_eq!(succeed(bkt(dir.path("cache")).args(args1)), "2"); 196 | assert_eq!(succeed(bkt(dir.path("cache")).args(args2)), "1"); 197 | } 198 | 199 | #[test] 200 | fn cache_hits_with_different_settings() { 201 | let dir = TestDir::temp(); 202 | let file = dir.path("file"); 203 | let args1 = ["--ttl=10s", "--", "bash", "-c", COUNT_INVOCATIONS, "arg0", file.to_str().unwrap()]; 204 | let args2 = ["--ttl=20s", "--", "bash", "-c", COUNT_INVOCATIONS, "arg0", file.to_str().unwrap()]; 205 | 206 | // despite different TTLs the invocation is still cached 207 | assert_eq!(succeed(bkt(dir.path("cache")).args(args1)), "1"); 208 | assert_eq!(succeed(bkt(dir.path("cache")).args(args2)), "1"); 209 | 210 | // the provided TTL is respected, though it was cached with a smaller TTL 211 | make_dir_stale(dir.path("cache"), Duration::from_secs(15)).unwrap(); 212 | assert_eq!(succeed(bkt(dir.path("cache")).args(args2)), "1"); 213 | 214 | // However the cache can be invalidated in the background using the older TTL 215 | make_dir_stale(dir.path("cache"), Duration::from_secs(60)).unwrap(); // ensure the following call triggers a cleanup 216 | succeed(bkt(dir.path("cache")).args(["--", "bash", "-c", "sleep 1"])); // trigger cleanup via a different command 217 | assert_eq!(succeed(bkt(dir.path("cache")).args(args1)), "2"); 218 | } 219 | 220 | #[test] 221 | fn cache_refreshes_in_background() { 222 | let dir = TestDir::temp(); 223 | let file = dir.path("file"); 224 | let args = ["--stale=10s", "--ttl=20s", "--", "bash", "-c", COUNT_INVOCATIONS, "arg0", file.to_str().unwrap()]; 225 | assert_eq!(succeed(bkt(dir.path("cache")).args(args)), "1"); 226 | 227 | make_dir_stale(dir.path("cache"), Duration::from_secs(15)).unwrap(); 228 | assert_eq!(succeed(bkt(dir.path("cache")).args(args)), "1"); 229 | 230 | wait_for_contents_to_change(&file, "."); 231 | assert_eq!(std::fs::read_to_string(&file).unwrap(), ".."); 232 | assert_eq!(succeed(bkt(dir.path("cache")).args(args)), "2"); 233 | } 234 | 235 | #[test] 236 | fn discard_failures() { 237 | let dir = TestDir::temp(); 238 | let file = dir.path("file"); 239 | let cmd = format!("{} false;", COUNT_INVOCATIONS); 240 | let args = ["--discard-failures", "--", "bash", "-c", &cmd, "arg0", file.to_str().unwrap()]; 241 | 242 | let result = run(bkt(dir.path("cache")).args(args)); 243 | assert_eq!(result.out, "1"); 244 | assert_eq!(result.status, Some(1)); 245 | 246 | // Not cached 247 | let result = run(bkt(dir.path("cache")).args(args)); 248 | assert_eq!(result.out, "2"); 249 | assert_eq!(result.status, Some(1)); 250 | } 251 | 252 | #[test] 253 | fn discard_failure_cached_separately() { 254 | let dir = TestDir::temp(); 255 | 256 | let allow_args = ["--", "bash", "-c", EXIT_WITH_ENV, "arg0"]; 257 | let discard_args = join(&["--discard-failures"], &allow_args); 258 | 259 | // without separate caches a --discard-failures invocation could return a previously-cached 260 | // failed result. In 0.5.4 and earlier this would mean result2.status == 14. 261 | let result1 = run(bkt(dir.path("cache")).args(allow_args).env("EXIT_WITH", "14")); 262 | assert_eq!(result1.status, Some(14)); 263 | let result2 = run(bkt(dir.path("cache")).args(discard_args).env("EXIT_WITH", "0")); 264 | assert_eq!(result2.status, Some(0)); 265 | } 266 | 267 | #[test] 268 | fn discard_failures_in_background() { 269 | let dir = TestDir::temp(); 270 | let file = dir.path("file"); 271 | let cmd = format!("{} ! \"${{FAIL:-false}}\";", COUNT_INVOCATIONS); 272 | let args = ["--ttl=20s", "--discard-failures", "--", "bash", "-c", &cmd, "arg0", file.to_str().unwrap()]; 273 | let stale_args = join(&["--stale=10s"], &args); 274 | 275 | // Cache result normally 276 | assert_eq!(succeed(bkt(dir.path("cache")).args(args)), "1"); 277 | 278 | // Cause cmd to fail and not be cached 279 | std::env::set_var("FAIL", "true"); 280 | 281 | // returns cached result, but attempts to warm in the background 282 | make_dir_stale(dir.path("cache"), Duration::from_secs(15)).unwrap(); 283 | assert_eq!(succeed(bkt(dir.path("cache")).args(&stale_args)), "1"); 284 | 285 | // Verify command ran 286 | wait_for_contents_to_change(&file, "."); 287 | assert_eq!(std::fs::read_to_string(&file).unwrap(), ".."); 288 | 289 | // But cached success is still returned 290 | assert_eq!(succeed(bkt(dir.path("cache")).args(args)), "1"); 291 | } 292 | 293 | // depends on sudo and libc::geteuid(), but also on Windows we don't split by user presently anyways 294 | #[cfg(unix)] 295 | #[test] 296 | fn cache_dirs_multi_user() { 297 | let dir = TestDir::temp(); 298 | let file = dir.path("file"); 299 | let args = ["--", "bash", "-c", COUNT_INVOCATIONS, "arg0", file.to_str().unwrap()]; 300 | 301 | // Skip the test if we can't run `sudo bkt --version` 302 | // Calling into sudo like this isn't great, but it's an easy and reasonably reliable way to 303 | // run bkt as two different users. It generally won't run on CI but at least it provides 304 | // some manual test coverage. 305 | if unsafe { libc::geteuid() } == 0 { 306 | // https://github.com/rust-lang/rust/issues/68007 tracking skippable tests 307 | eprint!("Running tests as root already, skipping"); 308 | return; 309 | } 310 | let mut sudo_bkt = sudo(bkt(dir.path("cache")).arg("--version")); 311 | if run(&mut sudo_bkt).status.unwrap_or(127) != 0 { 312 | // https://github.com/rust-lang/rust/issues/68007 tracking skippable tests 313 | eprint!("Couldn't run `sudo bkt`, skipping"); 314 | return; 315 | } 316 | 317 | // can call bkt as both current and super-user 318 | let user_call = succeed(bkt(dir.path("cache")).args(args)); 319 | assert_eq!(user_call, "1"); 320 | 321 | let sudo_call = succeed(&mut sudo(bkt(dir.path("cache")).args(args))); 322 | assert_eq!(sudo_call, "2"); 323 | 324 | // cached separately 325 | assert_eq!(user_call, succeed(bkt(dir.path("cache")).args(args))); 326 | 327 | assert_eq!(sudo_call, succeed(&mut sudo(bkt(dir.path("cache")).args(args)))); 328 | } 329 | 330 | #[test] 331 | fn respects_cache_dir() { 332 | let dir = TestDir::temp(); 333 | let file = dir.path("file"); 334 | let args = ["--", "bash", "-c", COUNT_INVOCATIONS, "arg0", file.to_str().unwrap()]; 335 | 336 | let first_call = succeed(bkt(dir.path("cache")).arg(format!("--cache-dir={}", dir.path("cache").display())).args(args)); 337 | assert_eq!(first_call, "1"); 338 | assert_eq!(first_call, succeed(bkt(dir.path("cache")).arg(format!("--cache-dir={}", dir.path("cache").display())).args(args))); 339 | 340 | let diff_cache = succeed(bkt(dir.path("cache")).arg(format!("--cache-dir={}", dir.path("new-cache").display())).args(args)); 341 | assert_eq!(diff_cache, "2"); 342 | 343 | let env_cache = succeed(bkt(dir.path("cache")).env("BKT_CACHE_DIR", dir.path("env-cache").as_os_str()).args(args)); 344 | assert_eq!(env_cache, "3"); 345 | } 346 | 347 | // https://github.com/dimo414/bkt/issues/9 348 | #[test] 349 | fn respects_relative_cache() { 350 | let dir = TestDir::temp(); 351 | let cwd = dir.path("cwd"); 352 | std::fs::create_dir(&cwd).unwrap(); 353 | let file = dir.path("file"); 354 | let args = ["--", "bash", "-c", COUNT_INVOCATIONS, "arg0", file.to_str().unwrap()]; 355 | 356 | let first_call = succeed(bkt(dir.path("unused")).arg("--cache-dir=cache").args(args).current_dir(&cwd)); 357 | assert_eq!(first_call, "1"); 358 | assert_eq!(first_call, succeed(bkt(dir.path("unused")).arg("--cache-dir=cache").args(args).current_dir(&cwd))); 359 | } 360 | 361 | #[test] 362 | fn respects_cache_scope() { 363 | let dir = TestDir::temp(); 364 | let file = dir.path("file"); 365 | let args = ["--", "bash", "-c", COUNT_INVOCATIONS, "arg0", file.to_str().unwrap()]; 366 | 367 | let first_call = succeed(bkt(dir.path("cache")).args(args)); 368 | assert_eq!(first_call, "1"); 369 | assert_eq!(first_call, succeed(bkt(dir.path("cache")).args(args))); 370 | 371 | let diff_scope = succeed(bkt(dir.path("cache")) 372 | .arg("--scope=foo").args(args)); 373 | assert_eq!(diff_scope, "2"); 374 | assert_eq!(diff_scope, succeed(bkt(dir.path("cache")) 375 | .arg("--scope=foo").args(args))); 376 | assert_eq!(diff_scope, succeed(bkt(dir.path("cache")) 377 | .env("BKT_SCOPE", "foo").args(args))); 378 | } 379 | 380 | #[test] 381 | fn respects_args() { 382 | let dir = TestDir::temp(); 383 | let file = dir.path("file"); 384 | let args = ["--", "bash", "-c", COUNT_INVOCATIONS, "arg0", file.to_str().unwrap()]; 385 | 386 | let first_call = succeed(bkt(dir.path("cache")).args(args)); 387 | assert_eq!(first_call, "1"); 388 | assert_eq!(first_call, succeed(bkt(dir.path("cache")).args(args))); 389 | 390 | let diff_args = succeed(bkt(dir.path("cache")).args(args).arg("A B")); 391 | assert_eq!(diff_args, "2"); 392 | 393 | let split_args = succeed(bkt(dir.path("cache")).args(args).args(["A", "B"])); 394 | assert_eq!(split_args, "3"); 395 | } 396 | 397 | #[test] 398 | fn respects_cwd() { 399 | let dir = TestDir::temp() 400 | .create("dir1", FileType::Dir) 401 | .create("dir2", FileType::Dir); 402 | let args = ["--", "bash", "-c", "pwd"]; 403 | let cwd_args = join(&["--cwd"], &args); 404 | 405 | let without_cwd_dir1 = succeed(bkt(dir.path("cache")).args(args).current_dir(dir.path("dir1"))); 406 | let without_cwd_dir2 = succeed(bkt(dir.path("cache")).args(args).current_dir(dir.path("dir2"))); 407 | assert!(without_cwd_dir1.trim().ends_with("/dir1")); 408 | assert!(without_cwd_dir2.trim().ends_with("/dir1")); // incorrect! cached too eagerly 409 | 410 | let cwd_dir1 = succeed(bkt(dir.path("cache")).args(&cwd_args).current_dir(dir.path("dir1"))); 411 | let cwd_dir2 = succeed(bkt(dir.path("cache")).args(&cwd_args).current_dir(dir.path("dir2"))); 412 | assert!(cwd_dir1.trim().ends_with("/dir1")); 413 | assert!(cwd_dir2.trim().ends_with("/dir2")); 414 | } 415 | 416 | #[test] 417 | #[cfg(not(feature = "debug"))] // See lib's bkt_tests::with_env 418 | fn respects_env() { 419 | let dir = TestDir::temp(); 420 | let args = ["--", "bash", "-c", r#"printf 'foo:%s bar:%s baz:%s' "$FOO" "$BAR" "$BAZ""#]; 421 | let env_args = join(&["--env=FOO", "--env=BAR"], &args); 422 | 423 | let without_env = succeed(bkt(dir.path("cache")).args(args) 424 | .env("FOO", "1").env("BAR", "1").env("BAZ", "1")); 425 | assert_eq!(without_env, succeed(bkt(dir.path("cache")).args(args))); 426 | // even if --env is set, if the vars are absent cache still hits earlier call 427 | assert_eq!(without_env, succeed(bkt(dir.path("cache")).args(&env_args))); 428 | 429 | let env = succeed(bkt(dir.path("cache")).args(&env_args) 430 | .env("FOO", "2").env("BAR", "2").env("BAZ", "2")); 431 | assert_eq!(env, "foo:2 bar:2 baz:2"); 432 | let env = succeed(bkt(dir.path("cache")).args(&env_args) 433 | .env("FOO", "3").env("BAR", "2").env("BAZ", "3")); 434 | assert_eq!(env, "foo:3 bar:2 baz:3"); 435 | let env = succeed(bkt(dir.path("cache")).args(&env_args) 436 | .env("FOO", "4").env("BAR", "4").env("BAZ", "4")); 437 | assert_eq!(env, "foo:4 bar:4 baz:4"); 438 | let env = succeed(bkt(dir.path("cache")).args(&env_args) 439 | .env("FOO", "2").env("BAR", "2").env("BAZ", "5")); 440 | assert_eq!(env, "foo:2 bar:2 baz:2"); // BAZ doesn't invalidate cache 441 | } 442 | 443 | #[test] 444 | fn respects_modtime() { 445 | let dir = TestDir::temp(); 446 | let file = dir.path("file"); 447 | let watch_file = dir.path("watch"); 448 | let args = ["--modtime", watch_file.to_str().unwrap(), "--", "bash", "-c", COUNT_INVOCATIONS, "arg0", file.to_str().unwrap()]; 449 | let no_file_result = succeed(bkt(dir.path("cache")).args(args)); 450 | // File absent is cached 451 | assert_eq!(no_file_result, "1"); 452 | assert_eq!(no_file_result, succeed(bkt(dir.path("cache")).args(args))); 453 | 454 | // create a new file, invalidating cache 455 | File::create(&watch_file).unwrap(); 456 | let new_file_result = succeed(bkt(dir.path("cache")).args(args)); 457 | assert_eq!(new_file_result, "2"); 458 | assert_eq!(new_file_result, succeed(bkt(dir.path("cache")).args(args))); 459 | 460 | // update the modtime, again invalidating the cache 461 | make_file_stale(&watch_file, Duration::from_secs(10)).unwrap(); 462 | let old_file_result = succeed(bkt(dir.path("cache")).args(args)); 463 | assert_eq!(old_file_result, "3"); 464 | assert_eq!(old_file_result, succeed(bkt(dir.path("cache")).args(args))); 465 | } 466 | 467 | #[test] 468 | fn streaming() { 469 | let dir = TestDir::temp(); 470 | let file = dir.path("file"); 471 | let script = r#"echo BEFORE; for (( i=0; i<50; i++ )); do if [[ -e "$1" ]]; then echo AFTER; exit 0; fi; sleep .1; done; exit 10"#; 472 | let args = ["--", "bash", "-c", script, "arg0", file.to_str().unwrap()]; 473 | let mut proc = bkt(dir.path("cache")).args(args) 474 | .stdout(Stdio::piped()) 475 | .stderr(Stdio::piped()).spawn().unwrap(); 476 | 477 | // partial output is observable before the process exits 478 | let mut buf = [0; 64]; 479 | let mut stdout = proc.stdout.take().unwrap(); 480 | let len = stdout.read(&mut buf).unwrap(); 481 | assert_eq!("BEFORE\n".as_bytes(), &buf[0..len], "len:{} - {:?}", len, buf); 482 | assert_eq!(proc.try_wait().unwrap(), None); // process is still running 483 | 484 | File::create(&file).unwrap(); // allow the bash process to terminate 485 | let len = stdout.read(&mut buf).unwrap(); 486 | assert_eq!("AFTER\n".as_bytes(), &buf[0..len], "len:{} - {:?}", len, buf); 487 | 488 | if !cfg!(feature="debug") { 489 | let mut buf = String::new(); 490 | assert_eq!(proc.stderr.as_mut().unwrap().read_to_string(&mut buf).unwrap(), 0, "{}", buf); 491 | assert_eq!(buf, ""); 492 | } 493 | assert_eq!(proc.wait().unwrap().code(), Some(0)); 494 | 495 | // Command is cached and can be re-run without blocking 496 | std::fs::remove_file(&file).unwrap(); 497 | assert_eq!(succeed(bkt(dir.path("cache")).args(args)), "BEFORE\nAFTER\n"); 498 | } 499 | 500 | #[test] 501 | fn large_output() { 502 | let dir = TestDir::temp(); 503 | let bytes = 1024*100; // 100KB is larger than the standard OS process buffer 504 | // Write a large amount of data to stdout and stderr; an incorrect implementation reads 505 | // each stream sequentially which will hang on sufficiently large streams as the subprocess 506 | // waits for the reader to catch up. 507 | let script = format!(r#"printf '.%.0s' {{1..{0}}}; printf '.%.0s' {{1..{0}}} >&2"#, bytes); 508 | let args = ["--", "bash", "-c", &script, "arg0"]; 509 | let result = run(bkt(dir.path("cache")).args(args)); 510 | assert_eq!(result.out.len(), bytes); 511 | if !cfg!(feature="debug") { 512 | assert_eq!(result.err.len(), bytes); 513 | } 514 | assert_eq!(result.status, Some(0)); 515 | } 516 | 517 | #[test] 518 | fn truncated_output() { 519 | let dir = TestDir::temp(); 520 | let bytes = 1024*100; // 100KB is larger than the standard OS process buffer 521 | // Write a large amount of data to stdout and close the process' stream without reading it; 522 | // this should be supported silently, see https://github.com/dimo414/bkt/issues/44. 523 | let script = format!(r#"printf '.%.0s' {{1..{0}}}"#, bytes); 524 | let args = ["--", "bash", "-c", &script, "arg0"]; 525 | let mut cmd = bkt(dir.path("cache")); 526 | let cmd = cmd.args(args).stdout(Stdio::piped()).stderr(Stdio::piped()); 527 | 528 | let mut child = cmd.spawn().unwrap(); 529 | // Read the beginning of stdout 530 | // It's not strictly necessary to do this, in fact closing the stream without reading 531 | // anything causes the error even for small outputs, but this seems like the more 532 | // "interesting" case and it covers the read-nothing behavior too. 533 | let mut buf = [0; 10]; 534 | child.stdout.as_mut().unwrap().read_exact(&mut buf).unwrap(); 535 | assert_eq!(buf, [b'.'; 10]); 536 | 537 | std::mem::drop(child.stdout.take().unwrap()); // close stdout without reading further 538 | 539 | let result: CmdResult = child.wait_with_output().unwrap().into(); 540 | assert_eq!(result.out, ""); 541 | // Unexpected error messages will show up in stderr 542 | if !cfg!(feature="debug") { assert_eq!(result.err, ""); } 543 | assert_eq!(result.status, Some(0)); 544 | } 545 | 546 | #[test] 547 | #[cfg(not(feature="debug"))] 548 | fn no_debug_output() { 549 | let dir = TestDir::temp(); 550 | let args = ["--", "bash", "-c", "true"]; 551 | 552 | // Not cached 553 | assert_eq!(run(bkt(dir.path("cache")).args(args)), 554 | CmdResult { out: "".into(), err: "".into(), status: Some(0) }); 555 | // Cached 556 | assert_eq!(run(bkt(dir.path("cache")).args(args)), 557 | CmdResult { out: "".into(), err: "".into(), status: Some(0) }); 558 | } 559 | 560 | #[test] 561 | #[cfg(feature="debug")] 562 | fn debug_output() { 563 | fn starts_with_bkt(s: &str) -> bool { s.lines().all(|l| l.starts_with("bkt: ")) } 564 | 565 | let miss_debug_re = regex::Regex::new( 566 | "bkt: state: \nbkt: lookup .* not found\nbkt: cleanup data .*\nbkt: cleanup keys .*\nbkt: store data .*\nbkt: store key .*\n").unwrap(); 567 | let hit_debug_re = regex::Regex::new("bkt: lookup .* found\n").unwrap(); 568 | 569 | let dir = TestDir::temp(); 570 | let args = ["--", "bash", "-c", PRINT_ARGS, "arg0"]; 571 | 572 | let miss = run(bkt(dir.path("cache")).args(args)); 573 | assert!(starts_with_bkt(&miss.err), "{}", miss.err); 574 | assert!(miss_debug_re.is_match(&miss.err), "{}", miss.err); 575 | 576 | let hit = run(bkt(dir.path("cache")).args(args)); 577 | assert!(starts_with_bkt(&hit.err), "{}", hit.err); 578 | assert!(hit_debug_re.is_match(&hit.err), "{}", hit.err); 579 | } 580 | 581 | #[test] 582 | fn output_preserved() { 583 | let dir = TestDir::temp(); 584 | fn same_output(dir: &TestDir, args: &[&str]) { 585 | let bkt_args = ["--", "bash", "-c", PRINT_ARGS, "arg0"]; 586 | // Second call will be cached 587 | assert_eq!( 588 | succeed(bkt(dir.path("cache")).args(bkt_args).args(args)), 589 | succeed(bkt(dir.path("cache")).args(bkt_args).args(args))); 590 | } 591 | 592 | same_output(&dir, &[]); 593 | same_output(&dir, &[""]); 594 | same_output(&dir, &["a", "b"]); 595 | same_output(&dir, &["a b"]); 596 | same_output(&dir, &["a b", "c"]); 597 | } 598 | 599 | #[test] 600 | #[cfg(not(feature="debug"))] 601 | fn sensitive_output() { 602 | let dir = TestDir::temp(); 603 | let args = ["--", "bash", "-c", r"printf 'foo\0bar'; printf 'bar\0baz\n' >&2"]; 604 | 605 | // Not cached 606 | let output = run(bkt(dir.path("cache")).args(args)); 607 | assert_eq!(output, 608 | CmdResult { out: "foo\u{0}bar".into(), err: "bar\u{0}baz\n".into(), status: Some(0) }); 609 | // Cached 610 | assert_eq!(run(bkt(dir.path("cache")).args(args)), output); 611 | } 612 | 613 | #[test] 614 | fn exit_code_preserved() { 615 | let dir = TestDir::temp(); 616 | let args = ["--", "bash", "-c", EXIT_WITH, "arg0"]; 617 | 618 | assert_eq!(run(bkt(dir.path("cache")).args(args).arg("14")).status, Some(14)); 619 | assert_eq!(run(bkt(dir.path("cache")).args(args).arg("14")).status, Some(14)); 620 | } 621 | 622 | #[test] 623 | fn warm() { 624 | let dir = TestDir::temp(); 625 | let await_file = dir.path("await"); 626 | let touch_file = dir.path("touch"); 627 | let args = ["--", "bash", "-c", AWAIT_AND_TOUCH, "arg0", 628 | await_file.to_str().unwrap(), touch_file.to_str().unwrap()]; 629 | let warm_args = join(&["--warm"], &args); 630 | 631 | let output = succeed(bkt(dir.path("cache")).args(warm_args)); 632 | assert_eq!(output, ""); 633 | assert!(!touch_file.exists()); 634 | 635 | File::create(&await_file).unwrap(); // allow the bash process to terminate 636 | for _ in 0..10 { 637 | if touch_file.exists() { break; } 638 | std::thread::sleep(Duration::from_millis(200)); 639 | } 640 | // This ensures the bash process has almost-completed, but it could still race with bkt actually 641 | // caching the result and creating a key file. If this proves flaky a more robust check would be 642 | // to inspect the keys directory. 643 | assert!(touch_file.exists()); 644 | 645 | std::fs::remove_file(&await_file).unwrap(); // process would not terminate if run again 646 | let output = succeed(bkt(dir.path("cache")).args(args)); 647 | assert_eq!(output, "awaiting\n"); 648 | } 649 | 650 | #[test] 651 | fn force() { 652 | let dir = TestDir::temp(); 653 | let file = dir.path("file"); 654 | let args = ["--", "bash", "-c", COUNT_INVOCATIONS, "arg0", file.to_str().unwrap()]; 655 | let args_force = join(&["--force"], &args); 656 | 657 | let output = succeed(bkt(dir.path("cache")).args(args)); 658 | assert_eq!(output, "1"); 659 | let output = succeed(bkt(dir.path("cache")).args(args)); 660 | assert_eq!(output, "1"); 661 | 662 | let output = succeed(bkt(dir.path("cache")).args(args_force)); 663 | assert_eq!(output, "2"); 664 | let output = succeed(bkt(dir.path("cache")).args(args)); 665 | assert_eq!(output, "2"); 666 | } 667 | 668 | #[test] 669 | fn concurrent_call_race() { 670 | let dir = TestDir::temp(); 671 | let file = dir.path("file"); 672 | let slow_count_invocations = format!(r#"sleep "0.5$RANDOM"; {}"#, COUNT_INVOCATIONS); 673 | let args = ["--", "bash", "-c", &slow_count_invocations, "arg0", file.to_str().unwrap()]; 674 | println!("{:?}", args); 675 | 676 | let proc1 = bkt(dir.path("cache")).args(args).stdout(Stdio::piped()).stderr(Stdio::piped()).spawn().unwrap(); 677 | let proc2 = bkt(dir.path("cache")).args(args).stdout(Stdio::piped()).stderr(Stdio::piped()).spawn().unwrap(); 678 | let result1: CmdResult = proc1.wait_with_output().unwrap().into(); 679 | if !cfg!(feature="debug") { assert_eq!(result1.err, ""); } 680 | assert_eq!(result1.status, Some(0)); 681 | let result2: CmdResult = proc2.wait_with_output().unwrap().into(); 682 | if !cfg!(feature="debug") { assert_eq!(result2.err, ""); } 683 | assert_eq!(result2.status, Some(0)); 684 | 685 | assert_eq!(std::fs::read_to_string(&file).unwrap(), ".."); 686 | assert!(result1.out == "2" || result2.out == "2"); // arbitrary which completes first 687 | } 688 | } 689 | -------------------------------------------------------------------------------- /tests/cwd.rs: -------------------------------------------------------------------------------- 1 | mod cwd { 2 | use std::time::Duration; 3 | use test_dir::{TestDir, FileType, DirBuilder}; 4 | use bkt::{Bkt, CacheStatus, CommandDesc}; 5 | 6 | // This test is pulled out from the unit tests into a separate file to avoid racing with other 7 | // tests that depend on the cwd. See #40 for more. If we need to add more tests like this consider 8 | // https://docs.rs/serial_test/ 9 | #[test] 10 | fn cwd_and_working_dir_share_cache() { 11 | let dir = TestDir::temp().create("wd", FileType::Dir); 12 | let wd = dir.path("wd"); 13 | let bkt = Bkt::create(dir.path("cache")).unwrap(); 14 | // Note we haven't changed the cwd yet - with_cwd() doesn't read it 15 | let cmd = CommandDesc::new(["bash", "-c", "pwd; echo '.' > file"]).with_cwd(); 16 | // The initial cwd is captured, but it's overwritten by with_working_dir() 17 | let state = cmd.capture_state().unwrap().with_working_dir(&wd); 18 | let (result, status) = bkt.retrieve(state, Duration::from_secs(10)).unwrap(); 19 | assert_eq!(result.stdout_utf8(), format!("{}\n", wd.to_str().unwrap())); 20 | assert_eq!(result.stderr_utf8(), ""); 21 | assert_eq!(result.exit_code(), 0); 22 | assert!(matches!(status, CacheStatus::Miss(_))); 23 | 24 | // now change the cwd and see it get captured lazily 25 | std::env::set_current_dir(&wd).unwrap(); 26 | let (result, status) = bkt.retrieve(&cmd, Duration::from_secs(10)).unwrap(); 27 | assert_eq!(result.stdout_utf8(), format!("{}\n", wd.to_str().unwrap())); 28 | assert_eq!(result.stderr_utf8(), ""); 29 | assert_eq!(result.exit_code(), 0); 30 | assert!(matches!(status, CacheStatus::Hit(_))); 31 | 32 | // and the file was only written to once, hence the cache was shared 33 | assert_eq!(std::fs::read_to_string(wd.join("file")).unwrap(), ".\n"); 34 | } 35 | } 36 | --------------------------------------------------------------------------------