├── .gitignore ├── .travis.yml ├── CHANGELOG.md ├── Cargo.lock ├── Cargo.toml ├── LICENSE-MIT ├── NEWS ├── README.md ├── benches └── hash.rs ├── default.nix ├── etc └── template-bk-readme.txt ├── naming ├── Cargo.toml ├── README.rst └── src │ └── lib.rs ├── shell.nix ├── src ├── errors.rs ├── escape.rs ├── hashes.rs ├── lib.rs ├── main.rs ├── node.rs ├── node │ ├── compare.rs │ ├── fs.rs │ ├── fullpath.rs │ └── hashes.rs ├── progress.rs ├── show.rs ├── store.rs ├── store │ └── weave.rs ├── surefs.rs └── suretree.rs ├── tests └── surefiles.rs └── weave ├── .gitignore ├── Cargo.toml ├── README.rst ├── src ├── delta.rs ├── errors.rs ├── header.rs ├── lib.rs ├── naming.rs ├── newweave.rs └── parse.rs └── tests ├── naming.rs └── sccs.rs /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | .*.swp 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | 3 | before_install: 4 | - sudo apt-get install -y cssc 5 | 6 | rust: 7 | - stable 8 | - beta 9 | - nightly 10 | 11 | matrix: 12 | allow_failures: 13 | - rust: nightly 14 | 15 | script: 16 | - cargo build 17 | - cargo test 18 | - cd weave 19 | - cargo build 20 | - cargo test 21 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # ChangeLog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0), 6 | and this project adheres to [Semantic Versioning](https://semver.og/spec/v2.0.0.html). 7 | 8 | ## [Unreleased] 9 | 10 | ## [0.9.3] 11 | 12 | ### Changed 13 | 14 | - This release has no code changes, and is merely a version bump to 15 | properly tag this and release to [crates.io](https://crates.io/). 16 | 17 | ## [0.9.1] 18 | 19 | ### Changed 20 | 21 | - Weave parser now implements a pull parser. This avoids the overhead 22 | of threads and channels for normal processing of the surefie. 23 | - Numerous minor code cleanups from clippy and rustfmt 24 | - Add `default.nix` and `shell.nix` to help with development under 25 | Nix. 26 | 27 | ### Fixed 28 | 29 | - Fix duplicated names in some comparison messages 30 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "adler" 7 | version = "1.0.2" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" 10 | 11 | [[package]] 12 | name = "ahash" 13 | version = "0.7.6" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" 16 | dependencies = [ 17 | "getrandom", 18 | "once_cell", 19 | "version_check", 20 | ] 21 | 22 | [[package]] 23 | name = "aho-corasick" 24 | version = "0.7.18" 25 | source = "registry+https://github.com/rust-lang/crates.io-index" 26 | checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" 27 | dependencies = [ 28 | "memchr", 29 | ] 30 | 31 | [[package]] 32 | name = "ansi_term" 33 | version = "0.11.0" 34 | source = "registry+https://github.com/rust-lang/crates.io-index" 35 | checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" 36 | dependencies = [ 37 | "winapi", 38 | ] 39 | 40 | [[package]] 41 | name = "atty" 42 | version = "0.2.14" 43 | source = "registry+https://github.com/rust-lang/crates.io-index" 44 | checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" 45 | dependencies = [ 46 | "hermit-abi", 47 | "libc", 48 | "winapi", 49 | ] 50 | 51 | [[package]] 52 | name = "autocfg" 53 | version = "1.0.1" 54 | source = "registry+https://github.com/rust-lang/crates.io-index" 55 | checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" 56 | 57 | [[package]] 58 | name = "bitflags" 59 | version = "1.2.1" 60 | source = "registry+https://github.com/rust-lang/crates.io-index" 61 | checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" 62 | 63 | [[package]] 64 | name = "cc" 65 | version = "1.0.67" 66 | source = "registry+https://github.com/rust-lang/crates.io-index" 67 | checksum = "e3c69b077ad434294d3ce9f1f6143a2a4b89a8a2d54ef813d85003a4fd1137fd" 68 | dependencies = [ 69 | "jobserver", 70 | ] 71 | 72 | [[package]] 73 | name = "cfg-if" 74 | version = "1.0.0" 75 | source = "registry+https://github.com/rust-lang/crates.io-index" 76 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 77 | 78 | [[package]] 79 | name = "chrono" 80 | version = "0.4.19" 81 | source = "registry+https://github.com/rust-lang/crates.io-index" 82 | checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" 83 | dependencies = [ 84 | "libc", 85 | "num-integer", 86 | "num-traits", 87 | "serde", 88 | "time 0.1.44", 89 | "winapi", 90 | ] 91 | 92 | [[package]] 93 | name = "clap" 94 | version = "2.33.3" 95 | source = "registry+https://github.com/rust-lang/crates.io-index" 96 | checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002" 97 | dependencies = [ 98 | "ansi_term", 99 | "atty", 100 | "bitflags", 101 | "strsim", 102 | "textwrap", 103 | "unicode-width", 104 | "vec_map", 105 | ] 106 | 107 | [[package]] 108 | name = "crc32fast" 109 | version = "1.2.1" 110 | source = "registry+https://github.com/rust-lang/crates.io-index" 111 | checksum = "81156fece84ab6a9f2afdb109ce3ae577e42b1228441eded99bd77f627953b1a" 112 | dependencies = [ 113 | "cfg-if", 114 | ] 115 | 116 | [[package]] 117 | name = "crossbeam" 118 | version = "0.8.1" 119 | source = "registry+https://github.com/rust-lang/crates.io-index" 120 | checksum = "4ae5588f6b3c3cb05239e90bd110f257254aecd01e4635400391aeae07497845" 121 | dependencies = [ 122 | "cfg-if", 123 | "crossbeam-channel", 124 | "crossbeam-deque", 125 | "crossbeam-epoch", 126 | "crossbeam-queue", 127 | "crossbeam-utils", 128 | ] 129 | 130 | [[package]] 131 | name = "crossbeam-channel" 132 | version = "0.5.0" 133 | source = "registry+https://github.com/rust-lang/crates.io-index" 134 | checksum = "dca26ee1f8d361640700bde38b2c37d8c22b3ce2d360e1fc1c74ea4b0aa7d775" 135 | dependencies = [ 136 | "cfg-if", 137 | "crossbeam-utils", 138 | ] 139 | 140 | [[package]] 141 | name = "crossbeam-deque" 142 | version = "0.8.0" 143 | source = "registry+https://github.com/rust-lang/crates.io-index" 144 | checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9" 145 | dependencies = [ 146 | "cfg-if", 147 | "crossbeam-epoch", 148 | "crossbeam-utils", 149 | ] 150 | 151 | [[package]] 152 | name = "crossbeam-epoch" 153 | version = "0.9.7" 154 | source = "registry+https://github.com/rust-lang/crates.io-index" 155 | checksum = "c00d6d2ea26e8b151d99093005cb442fb9a37aeaca582a03ec70946f49ab5ed9" 156 | dependencies = [ 157 | "cfg-if", 158 | "crossbeam-utils", 159 | "lazy_static", 160 | "memoffset", 161 | "scopeguard", 162 | ] 163 | 164 | [[package]] 165 | name = "crossbeam-queue" 166 | version = "0.3.4" 167 | source = "registry+https://github.com/rust-lang/crates.io-index" 168 | checksum = "4dd435b205a4842da59efd07628f921c096bc1cc0a156835b4fa0bcb9a19bcce" 169 | dependencies = [ 170 | "cfg-if", 171 | "crossbeam-utils", 172 | ] 173 | 174 | [[package]] 175 | name = "crossbeam-utils" 176 | version = "0.8.7" 177 | source = "registry+https://github.com/rust-lang/crates.io-index" 178 | checksum = "b5e5bed1f1c269533fa816a0a5492b3545209a205ca1a54842be180eb63a16a6" 179 | dependencies = [ 180 | "cfg-if", 181 | "lazy_static", 182 | ] 183 | 184 | [[package]] 185 | name = "data-encoding" 186 | version = "2.3.2" 187 | source = "registry+https://github.com/rust-lang/crates.io-index" 188 | checksum = "3ee2393c4a91429dffb4bedf19f4d6abf27d8a732c8ce4980305d782e5426d57" 189 | 190 | [[package]] 191 | name = "env_logger" 192 | version = "0.9.0" 193 | source = "registry+https://github.com/rust-lang/crates.io-index" 194 | checksum = "0b2cf0344971ee6c64c31be0d530793fba457d322dfec2810c453d0ef228f9c3" 195 | dependencies = [ 196 | "atty", 197 | "humantime", 198 | "log", 199 | "regex", 200 | "termcolor", 201 | ] 202 | 203 | [[package]] 204 | name = "fallible-iterator" 205 | version = "0.2.0" 206 | source = "registry+https://github.com/rust-lang/crates.io-index" 207 | checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" 208 | 209 | [[package]] 210 | name = "fallible-streaming-iterator" 211 | version = "0.1.9" 212 | source = "registry+https://github.com/rust-lang/crates.io-index" 213 | checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" 214 | 215 | [[package]] 216 | name = "flate2" 217 | version = "1.0.22" 218 | source = "registry+https://github.com/rust-lang/crates.io-index" 219 | checksum = "1e6988e897c1c9c485f43b47a529cef42fde0547f9d8d41a7062518f1d8fc53f" 220 | dependencies = [ 221 | "cfg-if", 222 | "crc32fast", 223 | "libc", 224 | "miniz_oxide", 225 | ] 226 | 227 | [[package]] 228 | name = "foreign-types" 229 | version = "0.3.2" 230 | source = "registry+https://github.com/rust-lang/crates.io-index" 231 | checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" 232 | dependencies = [ 233 | "foreign-types-shared", 234 | ] 235 | 236 | [[package]] 237 | name = "foreign-types-shared" 238 | version = "0.1.1" 239 | source = "registry+https://github.com/rust-lang/crates.io-index" 240 | checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" 241 | 242 | [[package]] 243 | name = "fuchsia-cprng" 244 | version = "0.1.1" 245 | source = "registry+https://github.com/rust-lang/crates.io-index" 246 | checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" 247 | 248 | [[package]] 249 | name = "getrandom" 250 | version = "0.2.4" 251 | source = "registry+https://github.com/rust-lang/crates.io-index" 252 | checksum = "418d37c8b1d42553c93648be529cb70f920d3baf8ef469b74b9638df426e0b4c" 253 | dependencies = [ 254 | "cfg-if", 255 | "libc", 256 | "wasi", 257 | ] 258 | 259 | [[package]] 260 | name = "hashbrown" 261 | version = "0.11.2" 262 | source = "registry+https://github.com/rust-lang/crates.io-index" 263 | checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" 264 | dependencies = [ 265 | "ahash", 266 | ] 267 | 268 | [[package]] 269 | name = "hashlink" 270 | version = "0.7.0" 271 | source = "registry+https://github.com/rust-lang/crates.io-index" 272 | checksum = "7249a3129cbc1ffccd74857f81464a323a152173cdb134e0fd81bc803b29facf" 273 | dependencies = [ 274 | "hashbrown", 275 | ] 276 | 277 | [[package]] 278 | name = "heck" 279 | version = "0.3.2" 280 | source = "registry+https://github.com/rust-lang/crates.io-index" 281 | checksum = "87cbf45460356b7deeb5e3415b5563308c0a9b057c85e12b06ad551f98d0a6ac" 282 | dependencies = [ 283 | "unicode-segmentation", 284 | ] 285 | 286 | [[package]] 287 | name = "hermit-abi" 288 | version = "0.1.18" 289 | source = "registry+https://github.com/rust-lang/crates.io-index" 290 | checksum = "322f4de77956e22ed0e5032c359a0f1273f1f7f0d79bfa3b8ffbc730d7fbcc5c" 291 | dependencies = [ 292 | "libc", 293 | ] 294 | 295 | [[package]] 296 | name = "humantime" 297 | version = "2.1.0" 298 | source = "registry+https://github.com/rust-lang/crates.io-index" 299 | checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" 300 | 301 | [[package]] 302 | name = "itoa" 303 | version = "0.4.7" 304 | source = "registry+https://github.com/rust-lang/crates.io-index" 305 | checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736" 306 | 307 | [[package]] 308 | name = "jobserver" 309 | version = "0.1.24" 310 | source = "registry+https://github.com/rust-lang/crates.io-index" 311 | checksum = "af25a77299a7f711a01975c35a6a424eb6862092cc2d6c72c4ed6cbc56dfc1fa" 312 | dependencies = [ 313 | "libc", 314 | ] 315 | 316 | [[package]] 317 | name = "lazy_static" 318 | version = "1.4.0" 319 | source = "registry+https://github.com/rust-lang/crates.io-index" 320 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 321 | 322 | [[package]] 323 | name = "libc" 324 | version = "0.2.117" 325 | source = "registry+https://github.com/rust-lang/crates.io-index" 326 | checksum = "e74d72e0f9b65b5b4ca49a346af3976df0f9c61d550727f349ecd559f251a26c" 327 | 328 | [[package]] 329 | name = "libsqlite3-sys" 330 | version = "0.23.2" 331 | source = "registry+https://github.com/rust-lang/crates.io-index" 332 | checksum = "d2cafc7c74096c336d9d27145f7ebd4f4b6f95ba16aa5a282387267e6925cb58" 333 | dependencies = [ 334 | "pkg-config", 335 | "vcpkg", 336 | ] 337 | 338 | [[package]] 339 | name = "log" 340 | version = "0.4.14" 341 | source = "registry+https://github.com/rust-lang/crates.io-index" 342 | checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" 343 | dependencies = [ 344 | "cfg-if", 345 | ] 346 | 347 | [[package]] 348 | name = "memchr" 349 | version = "2.4.1" 350 | source = "registry+https://github.com/rust-lang/crates.io-index" 351 | checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" 352 | 353 | [[package]] 354 | name = "memoffset" 355 | version = "0.6.1" 356 | source = "registry+https://github.com/rust-lang/crates.io-index" 357 | checksum = "157b4208e3059a8f9e78d559edc658e13df41410cb3ae03979c83130067fdd87" 358 | dependencies = [ 359 | "autocfg", 360 | ] 361 | 362 | [[package]] 363 | name = "miniz_oxide" 364 | version = "0.4.4" 365 | source = "registry+https://github.com/rust-lang/crates.io-index" 366 | checksum = "a92518e98c078586bc6c934028adcca4c92a53d6a958196de835170a01d84e4b" 367 | dependencies = [ 368 | "adler", 369 | "autocfg", 370 | ] 371 | 372 | [[package]] 373 | name = "num-integer" 374 | version = "0.1.44" 375 | source = "registry+https://github.com/rust-lang/crates.io-index" 376 | checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" 377 | dependencies = [ 378 | "autocfg", 379 | "num-traits", 380 | ] 381 | 382 | [[package]] 383 | name = "num-traits" 384 | version = "0.2.14" 385 | source = "registry+https://github.com/rust-lang/crates.io-index" 386 | checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" 387 | dependencies = [ 388 | "autocfg", 389 | ] 390 | 391 | [[package]] 392 | name = "num_cpus" 393 | version = "1.13.1" 394 | source = "registry+https://github.com/rust-lang/crates.io-index" 395 | checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" 396 | dependencies = [ 397 | "hermit-abi", 398 | "libc", 399 | ] 400 | 401 | [[package]] 402 | name = "num_threads" 403 | version = "0.1.3" 404 | source = "registry+https://github.com/rust-lang/crates.io-index" 405 | checksum = "97ba99ba6393e2c3734791401b66902d981cb03bf190af674ca69949b6d5fb15" 406 | dependencies = [ 407 | "libc", 408 | ] 409 | 410 | [[package]] 411 | name = "once_cell" 412 | version = "1.9.0" 413 | source = "registry+https://github.com/rust-lang/crates.io-index" 414 | checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5" 415 | 416 | [[package]] 417 | name = "openssl" 418 | version = "0.10.38" 419 | source = "registry+https://github.com/rust-lang/crates.io-index" 420 | checksum = "0c7ae222234c30df141154f159066c5093ff73b63204dcda7121eb082fc56a95" 421 | dependencies = [ 422 | "bitflags", 423 | "cfg-if", 424 | "foreign-types", 425 | "libc", 426 | "once_cell", 427 | "openssl-sys", 428 | ] 429 | 430 | [[package]] 431 | name = "openssl-sys" 432 | version = "0.9.72" 433 | source = "registry+https://github.com/rust-lang/crates.io-index" 434 | checksum = "7e46109c383602735fa0a2e48dd2b7c892b048e1bf69e5c3b1d804b7d9c203cb" 435 | dependencies = [ 436 | "autocfg", 437 | "cc", 438 | "libc", 439 | "pkg-config", 440 | "vcpkg", 441 | ] 442 | 443 | [[package]] 444 | name = "pkg-config" 445 | version = "0.3.19" 446 | source = "registry+https://github.com/rust-lang/crates.io-index" 447 | checksum = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c" 448 | 449 | [[package]] 450 | name = "proc-macro-error" 451 | version = "1.0.4" 452 | source = "registry+https://github.com/rust-lang/crates.io-index" 453 | checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" 454 | dependencies = [ 455 | "proc-macro-error-attr", 456 | "proc-macro2", 457 | "quote", 458 | "syn", 459 | "version_check", 460 | ] 461 | 462 | [[package]] 463 | name = "proc-macro-error-attr" 464 | version = "1.0.4" 465 | source = "registry+https://github.com/rust-lang/crates.io-index" 466 | checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" 467 | dependencies = [ 468 | "proc-macro2", 469 | "quote", 470 | "version_check", 471 | ] 472 | 473 | [[package]] 474 | name = "proc-macro2" 475 | version = "1.0.24" 476 | source = "registry+https://github.com/rust-lang/crates.io-index" 477 | checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" 478 | dependencies = [ 479 | "unicode-xid", 480 | ] 481 | 482 | [[package]] 483 | name = "quote" 484 | version = "1.0.9" 485 | source = "registry+https://github.com/rust-lang/crates.io-index" 486 | checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" 487 | dependencies = [ 488 | "proc-macro2", 489 | ] 490 | 491 | [[package]] 492 | name = "rand" 493 | version = "0.4.6" 494 | source = "registry+https://github.com/rust-lang/crates.io-index" 495 | checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" 496 | dependencies = [ 497 | "fuchsia-cprng", 498 | "libc", 499 | "rand_core 0.3.1", 500 | "rdrand", 501 | "winapi", 502 | ] 503 | 504 | [[package]] 505 | name = "rand_core" 506 | version = "0.3.1" 507 | source = "registry+https://github.com/rust-lang/crates.io-index" 508 | checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" 509 | dependencies = [ 510 | "rand_core 0.4.2", 511 | ] 512 | 513 | [[package]] 514 | name = "rand_core" 515 | version = "0.4.2" 516 | source = "registry+https://github.com/rust-lang/crates.io-index" 517 | checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" 518 | 519 | [[package]] 520 | name = "rdrand" 521 | version = "0.4.0" 522 | source = "registry+https://github.com/rust-lang/crates.io-index" 523 | checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" 524 | dependencies = [ 525 | "rand_core 0.3.1", 526 | ] 527 | 528 | [[package]] 529 | name = "regex" 530 | version = "1.5.4" 531 | source = "registry+https://github.com/rust-lang/crates.io-index" 532 | checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" 533 | dependencies = [ 534 | "aho-corasick", 535 | "memchr", 536 | "regex-syntax", 537 | ] 538 | 539 | [[package]] 540 | name = "regex-syntax" 541 | version = "0.6.25" 542 | source = "registry+https://github.com/rust-lang/crates.io-index" 543 | checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" 544 | 545 | [[package]] 546 | name = "remove_dir_all" 547 | version = "0.5.3" 548 | source = "registry+https://github.com/rust-lang/crates.io-index" 549 | checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" 550 | dependencies = [ 551 | "winapi", 552 | ] 553 | 554 | [[package]] 555 | name = "rsure" 556 | version = "0.10.0-dev" 557 | dependencies = [ 558 | "chrono", 559 | "crossbeam", 560 | "data-encoding", 561 | "env_logger", 562 | "flate2", 563 | "lazy_static", 564 | "libc", 565 | "log", 566 | "num_cpus", 567 | "openssl", 568 | "regex", 569 | "rusqlite", 570 | "structopt", 571 | "tempdir", 572 | "thiserror", 573 | "time 0.3.7", 574 | "weave", 575 | "zstd", 576 | ] 577 | 578 | [[package]] 579 | name = "rusqlite" 580 | version = "0.26.3" 581 | source = "registry+https://github.com/rust-lang/crates.io-index" 582 | checksum = "4ba4d3462c8b2e4d7f4fcfcf2b296dc6b65404fbbc7b63daa37fd485c149daf7" 583 | dependencies = [ 584 | "bitflags", 585 | "fallible-iterator", 586 | "fallible-streaming-iterator", 587 | "hashlink", 588 | "libsqlite3-sys", 589 | "memchr", 590 | "smallvec", 591 | ] 592 | 593 | [[package]] 594 | name = "ryu" 595 | version = "1.0.5" 596 | source = "registry+https://github.com/rust-lang/crates.io-index" 597 | checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" 598 | 599 | [[package]] 600 | name = "scopeguard" 601 | version = "1.1.0" 602 | source = "registry+https://github.com/rust-lang/crates.io-index" 603 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" 604 | 605 | [[package]] 606 | name = "serde" 607 | version = "1.0.123" 608 | source = "registry+https://github.com/rust-lang/crates.io-index" 609 | checksum = "92d5161132722baa40d802cc70b15262b98258453e85e5d1d365c757c73869ae" 610 | 611 | [[package]] 612 | name = "serde_derive" 613 | version = "1.0.123" 614 | source = "registry+https://github.com/rust-lang/crates.io-index" 615 | checksum = "9391c295d64fc0abb2c556bad848f33cb8296276b1ad2677d1ae1ace4f258f31" 616 | dependencies = [ 617 | "proc-macro2", 618 | "quote", 619 | "syn", 620 | ] 621 | 622 | [[package]] 623 | name = "serde_json" 624 | version = "1.0.63" 625 | source = "registry+https://github.com/rust-lang/crates.io-index" 626 | checksum = "43535db9747a4ba938c0ce0a98cc631a46ebf943c9e1d604e091df6007620bf6" 627 | dependencies = [ 628 | "itoa", 629 | "ryu", 630 | "serde", 631 | ] 632 | 633 | [[package]] 634 | name = "smallvec" 635 | version = "1.6.1" 636 | source = "registry+https://github.com/rust-lang/crates.io-index" 637 | checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" 638 | 639 | [[package]] 640 | name = "strsim" 641 | version = "0.8.0" 642 | source = "registry+https://github.com/rust-lang/crates.io-index" 643 | checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" 644 | 645 | [[package]] 646 | name = "structopt" 647 | version = "0.3.26" 648 | source = "registry+https://github.com/rust-lang/crates.io-index" 649 | checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10" 650 | dependencies = [ 651 | "clap", 652 | "lazy_static", 653 | "structopt-derive", 654 | ] 655 | 656 | [[package]] 657 | name = "structopt-derive" 658 | version = "0.4.18" 659 | source = "registry+https://github.com/rust-lang/crates.io-index" 660 | checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0" 661 | dependencies = [ 662 | "heck", 663 | "proc-macro-error", 664 | "proc-macro2", 665 | "quote", 666 | "syn", 667 | ] 668 | 669 | [[package]] 670 | name = "syn" 671 | version = "1.0.60" 672 | source = "registry+https://github.com/rust-lang/crates.io-index" 673 | checksum = "c700597eca8a5a762beb35753ef6b94df201c81cca676604f547495a0d7f0081" 674 | dependencies = [ 675 | "proc-macro2", 676 | "quote", 677 | "unicode-xid", 678 | ] 679 | 680 | [[package]] 681 | name = "tempdir" 682 | version = "0.3.7" 683 | source = "registry+https://github.com/rust-lang/crates.io-index" 684 | checksum = "15f2b5fb00ccdf689e0149d1b1b3c03fead81c2b37735d812fa8bddbbf41b6d8" 685 | dependencies = [ 686 | "rand", 687 | "remove_dir_all", 688 | ] 689 | 690 | [[package]] 691 | name = "termcolor" 692 | version = "1.1.2" 693 | source = "registry+https://github.com/rust-lang/crates.io-index" 694 | checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4" 695 | dependencies = [ 696 | "winapi-util", 697 | ] 698 | 699 | [[package]] 700 | name = "textwrap" 701 | version = "0.11.0" 702 | source = "registry+https://github.com/rust-lang/crates.io-index" 703 | checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" 704 | dependencies = [ 705 | "unicode-width", 706 | ] 707 | 708 | [[package]] 709 | name = "thiserror" 710 | version = "1.0.30" 711 | source = "registry+https://github.com/rust-lang/crates.io-index" 712 | checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417" 713 | dependencies = [ 714 | "thiserror-impl", 715 | ] 716 | 717 | [[package]] 718 | name = "thiserror-impl" 719 | version = "1.0.30" 720 | source = "registry+https://github.com/rust-lang/crates.io-index" 721 | checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b" 722 | dependencies = [ 723 | "proc-macro2", 724 | "quote", 725 | "syn", 726 | ] 727 | 728 | [[package]] 729 | name = "time" 730 | version = "0.1.44" 731 | source = "registry+https://github.com/rust-lang/crates.io-index" 732 | checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" 733 | dependencies = [ 734 | "libc", 735 | "wasi", 736 | "winapi", 737 | ] 738 | 739 | [[package]] 740 | name = "time" 741 | version = "0.3.7" 742 | source = "registry+https://github.com/rust-lang/crates.io-index" 743 | checksum = "004cbc98f30fa233c61a38bc77e96a9106e65c88f2d3bef182ae952027e5753d" 744 | dependencies = [ 745 | "libc", 746 | "num_threads", 747 | ] 748 | 749 | [[package]] 750 | name = "unicode-segmentation" 751 | version = "1.7.1" 752 | source = "registry+https://github.com/rust-lang/crates.io-index" 753 | checksum = "bb0d2e7be6ae3a5fa87eed5fb451aff96f2573d2694942e40543ae0bbe19c796" 754 | 755 | [[package]] 756 | name = "unicode-width" 757 | version = "0.1.8" 758 | source = "registry+https://github.com/rust-lang/crates.io-index" 759 | checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" 760 | 761 | [[package]] 762 | name = "unicode-xid" 763 | version = "0.2.1" 764 | source = "registry+https://github.com/rust-lang/crates.io-index" 765 | checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" 766 | 767 | [[package]] 768 | name = "vcpkg" 769 | version = "0.2.11" 770 | source = "registry+https://github.com/rust-lang/crates.io-index" 771 | checksum = "b00bca6106a5e23f3eee943593759b7fcddb00554332e856d990c893966879fb" 772 | 773 | [[package]] 774 | name = "vec_map" 775 | version = "0.8.2" 776 | source = "registry+https://github.com/rust-lang/crates.io-index" 777 | checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" 778 | 779 | [[package]] 780 | name = "version_check" 781 | version = "0.9.2" 782 | source = "registry+https://github.com/rust-lang/crates.io-index" 783 | checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed" 784 | 785 | [[package]] 786 | name = "wasi" 787 | version = "0.10.0+wasi-snapshot-preview1" 788 | source = "registry+https://github.com/rust-lang/crates.io-index" 789 | checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" 790 | 791 | [[package]] 792 | name = "weave" 793 | version = "0.4.0-dev" 794 | dependencies = [ 795 | "chrono", 796 | "flate2", 797 | "log", 798 | "regex", 799 | "serde", 800 | "serde_derive", 801 | "serde_json", 802 | "thiserror", 803 | "zstd", 804 | ] 805 | 806 | [[package]] 807 | name = "winapi" 808 | version = "0.3.9" 809 | source = "registry+https://github.com/rust-lang/crates.io-index" 810 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 811 | dependencies = [ 812 | "winapi-i686-pc-windows-gnu", 813 | "winapi-x86_64-pc-windows-gnu", 814 | ] 815 | 816 | [[package]] 817 | name = "winapi-i686-pc-windows-gnu" 818 | version = "0.4.0" 819 | source = "registry+https://github.com/rust-lang/crates.io-index" 820 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 821 | 822 | [[package]] 823 | name = "winapi-util" 824 | version = "0.1.5" 825 | source = "registry+https://github.com/rust-lang/crates.io-index" 826 | checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" 827 | dependencies = [ 828 | "winapi", 829 | ] 830 | 831 | [[package]] 832 | name = "winapi-x86_64-pc-windows-gnu" 833 | version = "0.4.0" 834 | source = "registry+https://github.com/rust-lang/crates.io-index" 835 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 836 | 837 | [[package]] 838 | name = "zstd" 839 | version = "0.10.0+zstd.1.5.2" 840 | source = "registry+https://github.com/rust-lang/crates.io-index" 841 | checksum = "3b1365becbe415f3f0fcd024e2f7b45bacfb5bdd055f0dc113571394114e7bdd" 842 | dependencies = [ 843 | "zstd-safe", 844 | ] 845 | 846 | [[package]] 847 | name = "zstd-safe" 848 | version = "4.1.4+zstd.1.5.2" 849 | source = "registry+https://github.com/rust-lang/crates.io-index" 850 | checksum = "2f7cd17c9af1a4d6c24beb1cc54b17e2ef7b593dc92f19e9d9acad8b182bbaee" 851 | dependencies = [ 852 | "libc", 853 | "zstd-sys", 854 | ] 855 | 856 | [[package]] 857 | name = "zstd-sys" 858 | version = "1.6.3+zstd.1.5.2" 859 | source = "registry+https://github.com/rust-lang/crates.io-index" 860 | checksum = "fc49afa5c8d634e75761feda8c592051e7eeb4683ba827211eb0d731d3402ea8" 861 | dependencies = [ 862 | "cc", 863 | "libc", 864 | ] 865 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rsure" 3 | version = "0.10.0-dev" 4 | authors = ["David Brown "] 5 | description = """ 6 | Rsure captures hashes and metadata about a tree of files, and can 7 | later verify these hashes and metadata to determine if the files have 8 | changed. It supports incremental updates, and includes an standalone 9 | executable for scanning and checking trees. 10 | """ 11 | license = "MIT" 12 | readme = "README.md" 13 | repository = "https://github.com/d3zd3z/rsure" 14 | edition = "2018" 15 | 16 | exclude = [ 17 | "2sure.*.gz" 18 | ] 19 | 20 | [dependencies] 21 | chrono = "0.4" 22 | crossbeam = "0.8" 23 | data-encoding = "2.1.1" 24 | flate2 = "1.0" 25 | lazy_static = "1.4" 26 | libc = "0.2.11" 27 | log = "0.4.6" # 0.4.6 needed to fix problem with named macro imports. 28 | # rsure-naming = { path = "naming", version = "0.1.0" } 29 | num_cpus = "1.10" 30 | openssl = "0.10" 31 | regex = "1.5" 32 | rusqlite = "0.26" 33 | structopt = "0.3" 34 | tempdir = "0.3" 35 | thiserror = "1.0" 36 | time = "0.3" 37 | weave = { path = "weave", version = "0.4.0-dev" } 38 | zstd = "0.10" 39 | 40 | # This will go away 41 | env_logger = "0.9" 42 | 43 | [[bin]] 44 | name = "rsure" 45 | test = false 46 | doc = false 47 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 David Brown 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## Release 0.9 4 | 5 | This is a fairly major release, with most changes under the hood. 6 | There are some minor updates in dependencies. 7 | 8 | The underlying mechanism for updating sure files has been rewritten. 9 | Earlier versions of rsure would read an entire tree into memory, and 10 | perform updates on this in-memory structure. For large directory 11 | trees, this could take up a lot of memory. The new version always 12 | performs scans in a linear manner, and updates are performed using a 13 | few temp files (where the rsure file is). This slightly increases the 14 | space needed where the surefile is stored, but greatly reduces memory 15 | usage. 16 | 17 | By recording hash updates to a temporary sqlite database, we can now 18 | perform these hash updates in parallel, using multiple cores. On fast 19 | disks, this can result in a speed improvement. 20 | 21 | Other than the lower memory usage, this change shouldn't be visible to 22 | users of rsure. 23 | 24 | ## Release 0.8.2 25 | 26 | This is a minor release with some minor improvements, mostly having to 27 | do with moving to Rust 2018. As of this release, Rust 2018 is 28 | required to build Rsure. 29 | 30 | In addition to the 2018 changes, this release also makes some 31 | improvements to the progress meter. If the client of the library uses 32 | `log_init` to initialize the logging system, the progress meter will 33 | cooperate with the logging system to present a clean, and frequently 34 | updated message. Otherwise, the meter will remain as before, only 35 | updated ever 5 seconds. 36 | 37 | This also adds a separate progress meter to indicate the status of the 38 | initial filesystem scan. With large trees, this scan can take some 39 | time, and the meter is a useful indicator of what is happening. 40 | 41 | ## Release 0.8.1 42 | 43 | This is a minor release that updates the versions of child dependencies. 44 | 45 | ## Release 0.8 46 | 47 | Release 0.8 of rsure makes some notable changes to the library and 48 | commandline tool. The most significant change is that the 'weave' 49 | format is the primary format that deltas are stored in. Instead of 50 | distinguishing the old files by 2sure.weave.gz and 2sure.dat.gz, weave 51 | files are just called 2sure.dat.gz. It may be possible to add format 52 | detection to detect the old format, but I suspect there isn't a lot of 53 | use of these files anyway. 54 | 55 | Other changes: 56 | - Move to 'failure' instead of 'error-chain'. 57 | - Bump many dependencies. 58 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Rsure file integrity 2 | 3 | [![Build Status](https://travis-ci.org/d3zd3z/rsure.svg?branch=master)](https://travis-ci.org/d3zd3z/rsure) 4 | 5 | It has been said that backups aren't useful unless you've tested them. 6 | But, how does one know that a test restore actually worked? Rsure is 7 | designed to help with this. 8 | 9 | ## History 10 | 11 | The md5sum program captures the MD5 hash of a set of files. It can 12 | also read this output and compare the hashes against the files. By 13 | capturing the hashes before the backup, and comparing them after a 14 | test restore, you can gain a bit of confidence that the contents of 15 | files is at least correct. 16 | 17 | However, this doesn't capture the permissions and other attributes of 18 | the files. Sometimes a restore can fail for this kind of reason. 19 | 20 | ## Intrusion detection 21 | 22 | There have been several similar solutions focused on intrusion 23 | detection. Tripwire and FreeVeracity (or Veracity) come to mind. The 24 | idea is that the files are compared in place to verify that nobody has 25 | modified them. 26 | 27 | Unfortunately, at least tripwire seems to focus so heavily on this 28 | intrusion detection problem, that the tool doesn't work very well for 29 | verifying backups. It really wants a central database, and to use 30 | files by absolute pathname. FreeVeracity was quite useful for 31 | verifying backups, however, it appears to have vanished entirely (it 32 | was under an unusual license). 33 | 34 | ### Incremental updates 35 | 36 | One thing that none of these solutions addressed was that of 37 | incremental updates, probably because of the focus on intrusion 38 | detection. In a normal running system, the POSIX *ctime* field can be 39 | reliably used to determine if a file has been modified. By making use 40 | of this, the integrity program can avoid recomputing hashes of files 41 | that haven't changed. This strategy is similar to what most backup 42 | software does as well. This is important, because taking the time to 43 | hash every file can make the integrity update take so long that people 44 | avoid running it. Full hashing is impractical for the same reasons 45 | that regular full backups are usually impractical. 46 | 47 | # Using rsure 48 | 49 | ## Getting it 50 | 51 | Rsure is written in [Rust][rust]. It began as 52 | an exercise to determine how useful Rust is for a systems-type 53 | program, and has shown to be the easiest implementation to develop and 54 | maintain. 55 | 56 | [rust]: http://www.rust-lang.org/ "The Rust Programming Language" 57 | 58 | Once you have installed rust (and cargo) using either the rust 59 | installer, rustup, or your distro's packaging system, building it is 60 | as easy as: 61 | 62 | ```shell 63 | $ cargo build --release 64 | ``` 65 | 66 | within the Rsure directory. The `--release` flag is important, 67 | otherwise the performance is poor. You can install or link to 68 | `./target/release/rsure` for the executable. It may also be possible 69 | to use `cargo install` to install sure directly. 70 | 71 | ## Basic usage 72 | 73 | Change to a directory you wish to keep integrity for, for example, my 74 | home directory: 75 | 76 | ```shell 77 | $ cd 78 | $ rsure scan 79 | ``` 80 | 81 | This will scan the filesystem (possibly showing progress), and leave a 82 | `2sure.dat.gz` (the 2sure is historical, FreeVeracity used a name 83 | starting with a 0, and having the digit makes it near the beginning of 84 | a directory listing). You can view this file if you'd like. The 85 | format is somewhat readable. 86 | 87 | Then you can do: 88 | 89 | ```shell 90 | $ rsure check 91 | ``` 92 | 93 | to verify the directory. This will show any differences. If you back 94 | up this file with your data, you can run `rsure` after a restore to 95 | check if the backup is correct. 96 | 97 | Later, you can run: 98 | 99 | ```shell 100 | $ rsure update 101 | ``` 102 | 103 | which will update the `2sure.dat.gz` file with the new data. Rust 104 | uses a "weave" format to hold multiple revisions efficiently in the 105 | same file. The update command will refresh the hashes of any files 106 | that have changed. After this, you can run: 107 | 108 | ```shell 109 | $ rsure signoff 110 | ``` 111 | 112 | to compare the old scan with the current, and report on what has 113 | changed between them. 114 | -------------------------------------------------------------------------------- /benches/hash.rs: -------------------------------------------------------------------------------- 1 | // Benchmark our hashing function. 2 | 3 | #![feature(test)] 4 | 5 | extern crate openssl; 6 | extern crate rsure; 7 | extern crate tempdir; 8 | extern crate test; 9 | // extern crate sha1; 10 | 11 | use rsure::{Progress, SureHash}; 12 | use std::fs::File; 13 | use std::io::Write; 14 | use tempdir::TempDir; 15 | use test::Bencher; 16 | 17 | // To compute hashing speed, use 1 over the benchmark time in seconds, and 18 | // then multiply the result by the number of iterations in the 'for i' 19 | // loop. For example, if the benchmark runs in 29,924,583 ns/iter, and the 20 | // count is 16, that would be about 534 MiB/sec hash performance. 21 | // 22 | // The loop count should be large enough to overflow the CPU's largest 23 | // cache, with the value 16 (16MiB) overflowing the 8MiB cache on the Core 24 | // i7-950 I wrote this on. 25 | #[bench] 26 | fn tree_mb_bench(b: &mut Bencher) { 27 | let tmp = TempDir::new("rsure-bench").unwrap(); 28 | for i in 0..16 { 29 | let name = format!("large-{}", i); 30 | let mut fd = File::create(tmp.path().join(&name)).unwrap(); 31 | let buf = vec![0; 1024]; 32 | for _ in 0..1024 { 33 | fd.write_all(&buf).unwrap(); 34 | } 35 | } 36 | 37 | b.iter(|| { 38 | let mut tree = rsure::scan_fs(tmp.path()).unwrap(); 39 | let estimate = tree.hash_estimate(); 40 | let mut progress = Progress::new(estimate.files, estimate.bytes); 41 | tree.hash_update(tmp.path(), &mut progress); 42 | // progress.flush(); 43 | }) 44 | } 45 | 46 | #[bench] 47 | fn openssl_bench(b: &mut Bencher) { 48 | use openssl::hash::{Hasher, MessageDigest}; 49 | 50 | // Make buffer big enough to not fit in cache. 51 | let buf = vec![0; 1024 * 1024 * 16]; 52 | 53 | b.iter(|| { 54 | let mut h = Hasher::new(MessageDigest::sha1()).unwrap(); 55 | h.write_all(&buf).unwrap(); 56 | h.finish().unwrap(); 57 | }) 58 | } 59 | 60 | /* Bring in the SHA1 crate. Currently, it seems to be about 4.2 times slower than the openssl one. 61 | */ 62 | /* 63 | #[bench] 64 | fn sha1_bench(b: &mut Bencher) { 65 | use sha1::Sha1; 66 | 67 | // Make buffer big enough to not fit in cache. 68 | let buf = vec![0; 1024 * 1024 * 16]; 69 | 70 | b.iter(|| { 71 | let mut h = Sha1::new(); 72 | h.update(&buf); 73 | let _ = h.digest(); 74 | }) 75 | } 76 | */ 77 | -------------------------------------------------------------------------------- /default.nix: -------------------------------------------------------------------------------- 1 | /* { stdenv, pkgs, fetchFromGitHub, rustPlatform }: */ 2 | 3 | with import {}; 4 | rustPlatform.buildRustPackage rec { 5 | pname = "rsure"; 6 | version = "0.9.4"; 7 | 8 | src = fetchFromGitHub { 9 | owner = "tangybbq"; 10 | repo = pname; 11 | rev = "v0.9.4"; 12 | sha256 = "sha256:0bx0l2q64ma057l2wwvsnbgl8jr6szanfwr5311lqqzvp4r4kaqy"; 13 | }; 14 | 15 | cargoSha256 = "sha256:1bym7z2b3sw9g2hvixagir4bqh0389v9f2r66x2nf871683vc34y"; 16 | 17 | nativeBuildInputs = [ 18 | pkgs.pkgconfig 19 | ]; 20 | buildInputs = [ pkgs.openssl.dev pkgs.sqlite.dev ]; 21 | 22 | meta = with lib; { 23 | description = "A utility for ensuring file integrity"; 24 | homepage = "https://github.com/tangybbq/rsure"; 25 | license = with licenses; [ mit ]; 26 | maintainers = with maintainers; [ d3zd3z ]; 27 | }; 28 | } 29 | -------------------------------------------------------------------------------- /etc/template-bk-readme.txt: -------------------------------------------------------------------------------- 1 | This directory is an 'rsure' BitKeeper store. Stored within the BitKeeper 2 | data are surefiles that represent the state of one or more filesystems at 3 | one or more points in time. You can use BitKeeper to see what is here. 4 | 5 | bk changes -v 6 | 7 | will show you the revisions. You can verify a revision manually with 8 | something like 9 | 10 | bk co -r1.8 -p filename.dat | gzip > /tmp/filename.dat.gz 11 | rsure check -d dirname -f /tmp/filename.dat.gz 12 | -------------------------------------------------------------------------------- /naming/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rsure-naming" 3 | version = "0.1.0" 4 | authors = ["David Brown "] 5 | edition = "2018" 6 | description = """ 7 | rsure-naming implements a simple naming convention used by the 'rsure' 8 | crate. It manages an associated set of files, typically a data file, 9 | a backup file, and zero or more temporary files. 10 | """ 11 | 12 | [dependencies] 13 | failure = "0.1.5" 14 | flate2 = "1.0" 15 | log = "0.4.6" 16 | -------------------------------------------------------------------------------- /naming/README.rst: -------------------------------------------------------------------------------- 1 | Naming convention support 2 | ************************* 3 | 4 | -------------------------------------------------------------------------------- /naming/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! A Naming manages a group of associated filenames. All of these names 2 | //! exist in a single directory, have a common basename, and various 3 | //! suffixes. It consists of the following names: 4 | //! 5 | //! * path/base.dat.gz: The primary name 6 | //! * path/base.bak.gz: A backup file 7 | //! * path/base.0: A temporary file 8 | //! * path/base.1.gz: A compressed temporary file 9 | //! 10 | //! The client of this crate can determine with the primary and backup 11 | //! names are compressed, and compression can be chosen for the temporary 12 | //! files on a per-file basis. If the compression matches the main name, 13 | //! a temp file can be atomically renamed to the primary name. 14 | //! 15 | //! In addition to the management of the names, this module manages opening 16 | //! and closing files associated with the names, as well as cleaning up 17 | //! temporary files when the Naming goes out of scope. 18 | 19 | use flate2::{write::GzEncoder, Compression}; 20 | use log::warn; 21 | use std::{ 22 | fs::{self, File, OpenOptions}, 23 | io::{BufWriter, ErrorKind, Write}, 24 | path::{Path, PathBuf}, 25 | result, 26 | }; 27 | 28 | /// Our local Result type. Makes use of `failure::Error` to automatically 29 | /// pass errors upward. 30 | type Result = result::Result; 31 | 32 | #[derive(Debug)] 33 | pub struct Naming { 34 | // The directory for files to be written to. 35 | path: PathBuf, 36 | // The base part of the filename 37 | base: String, 38 | // The extension to use for the main name. 39 | ext: String, 40 | // Are the primary and backup files to be compressed? 41 | compressed: bool, 42 | 43 | // Track the next temp we try to open, avoids O(n^2) open calls. This 44 | // is merely an optimization and shouldn't have observable behavior. 45 | next_temp: usize, 46 | 47 | // The naming convention can be instructed to cleanup files when it is 48 | // dropped. 49 | cleanup: Vec, 50 | } 51 | 52 | /// Something that can be written to, that remembers its name. The writer 53 | /// is boxed to support various kinds of writers, including compressed. 54 | pub struct NamedWriter { 55 | pub name: PathBuf, 56 | pub writer: Box, 57 | } 58 | 59 | impl Naming { 60 | pub fn new>(path: P, base: &str, ext: &str, compressed: bool) -> Naming { 61 | Naming { 62 | path: path.as_ref().to_path_buf(), 63 | base: base.to_string(), 64 | ext: ext.to_string(), 65 | compressed: compressed, 66 | next_temp: 0, 67 | cleanup: Vec::new(), 68 | } 69 | } 70 | 71 | pub fn make_name(&self, ext: &str, compressed: bool) -> PathBuf { 72 | let name = format!( 73 | "{}.{}{}", 74 | self.base, 75 | ext, 76 | if compressed { ".gz" } else { "" } 77 | ); 78 | self.path.join(name) 79 | } 80 | 81 | /// Construct a temp file that matches the given naming. 82 | pub fn temp_file(&mut self, compressed: bool) -> Result<(PathBuf, File)> { 83 | let mut n = self.next_temp; 84 | loop { 85 | let name = self.make_name(&n.to_string(), compressed); 86 | self.next_temp = n + 1; 87 | 88 | match OpenOptions::new().write(true).create_new(true).open(&name) { 89 | Ok(fd) => return Ok((name, fd)), 90 | Err(ref e) if e.kind() == ErrorKind::AlreadyExists => (), 91 | Err(e) => return Err(e.into()), 92 | } 93 | 94 | n += 1; 95 | } 96 | } 97 | 98 | /// Construct a temp file (as above), but if compression is requested, 99 | /// use a writer that compresses when writing. 100 | pub fn new_temp(&mut self, compressed: bool) -> Result { 101 | let (name, file) = self.temp_file(compressed)?; 102 | let writer = if compressed { 103 | // The GzEncoder does a measure of buffering. 104 | // TODO: Do benchmarks to determine if buffing the result of 105 | // the GzEncoder help. 106 | Box::new(GzEncoder::new(file, Compression::default())) as Box 107 | } else { 108 | Box::new(BufWriter::new(file)) as Box 109 | }; 110 | Ok(NamedWriter { 111 | name: name, 112 | writer: writer, 113 | }) 114 | } 115 | 116 | /// Replace the main file with the given name. This attempts to rename 117 | /// the main name to the backup name, and then attempts to rename the 118 | /// temp file to the main name. 119 | pub fn rename_to_main(&self, name: &Path) -> Result<()> { 120 | let main_name = self.make_name(&self.ext, self.compressed); 121 | let back_name = self.make_name("bak", self.compressed); 122 | 123 | match fs::rename(&main_name, &back_name) { 124 | // Not found means there isn't a main name to rename. 125 | Err(ref e) if e.kind() == ErrorKind::NotFound => (), 126 | // Other errors are failure. 127 | Err(e) => return Err(e.into()), 128 | Ok(()) => (), 129 | } 130 | 131 | fs::rename(name, main_name)?; 132 | Ok(()) 133 | } 134 | 135 | /// Add a name that must be cleaned up. 136 | pub fn add_cleanup(&mut self, name: PathBuf) { 137 | self.cleanup.push(name); 138 | } 139 | } 140 | 141 | impl Drop for Naming { 142 | fn drop(&mut self) { 143 | for name in &self.cleanup { 144 | if let Err(e) = fs::remove_file(name) { 145 | warn!("Error cleaning up: {:?} ({})", name, e); 146 | } 147 | } 148 | } 149 | } 150 | -------------------------------------------------------------------------------- /shell.nix: -------------------------------------------------------------------------------- 1 | # Shell configuration to build rsure. 2 | { pkgs ? import {} }: 3 | let 4 | lib = pkgs.lib; 5 | stdenv = pkgs.stdenv; 6 | 7 | # SCCS isn't particularly useful, but the file used by weave is 8 | # derived from what SCCS uses. If this program is in the path, then 9 | # weave has additional tests that it can run. 10 | cssc = stdenv.mkDerivation rec { 11 | name = "cssc-1.4.1"; 12 | 13 | src = pkgs.fetchurl { 14 | url = "mirror://gnu/cssc/CSSC-1.4.1.tar.gz"; 15 | sha256 = "1vsisqq573xjr2qpn19iwmpqgl3mq03m790akpa4rvj60b4d1gni"; 16 | }; 17 | 18 | meta = with lib; { 19 | homepage = "https://www.gnu.org/software/cssc/"; 20 | description = "GNU replacement for SCCS"; 21 | license = licenses.gpl3; 22 | }; 23 | }; 24 | in 25 | pkgs.mkShell { 26 | nativeBuildInputs = [ 27 | pkgs.openssl.dev 28 | pkgs.pkgconfig 29 | pkgs.sqlite.dev 30 | 31 | # pkgs.cargo 32 | # pkgs.clippy 33 | # pkgs.rustfmt 34 | # pkgs.cargo-bloat 35 | 36 | cssc 37 | ]; 38 | } 39 | -------------------------------------------------------------------------------- /src/errors.rs: -------------------------------------------------------------------------------- 1 | // Errors. 2 | 3 | use std::result; 4 | use thiserror::Error; 5 | 6 | pub type Result = result::Result; 7 | #[derive(Error, Debug)] 8 | pub enum Error { 9 | #[error("weave error")] 10 | Weave(#[from] weave::Error), 11 | 12 | #[error("I/O Error {0:?}")] 13 | Io(#[from] std::io::Error), 14 | 15 | #[error("OpenSSL error: {0:?}")] 16 | OpenSsl(#[from] openssl::error::ErrorStack), 17 | #[error("Int parse error: {0:?}")] 18 | IntParse(#[from] std::num::ParseIntError), 19 | 20 | #[error("Root must be a directory")] 21 | RootMustBeDir, 22 | #[error("Unknown directory specified")] 23 | UnknownDirectory, 24 | #[error("File not in directory")] 25 | FileNotInDirectory, 26 | #[error("Path missing final file component")] 27 | PathMissingFinalFile, 28 | 29 | // Errors from comparison. 30 | #[error("empty left iterator")] 31 | EmptyLeftIterator, 32 | #[error("empty right iterator")] 33 | EmptyRightIterator, 34 | #[error("Unexpected node in left tree")] 35 | UnexpectedLeftNode, 36 | #[error("Unexpected node in right tree")] 37 | UnexpectedRightNode, 38 | #[error("Incorrect name of root tree")] 39 | IncorrectName, 40 | 41 | #[error("Unexpected line: {0:?}, expect {1:?}")] 42 | UnexpectedLine(String, String), 43 | #[error("Error reading surefile: {0:?}")] 44 | SureFileError(std::io::Error), 45 | #[error("Unexpected eof on surefile")] 46 | SureFileEof, 47 | #[error("Truncated surefile")] 48 | TruncatedSurefile, 49 | #[error("Invalid surefile line start: {0:?}")] 50 | InvalidSurefileChar(char), 51 | 52 | #[error("Sql error: {0:?}")] 53 | Sql(#[from] rusqlite::Error), 54 | // For one case that needs to be written to be able to move the error. 55 | #[error("Sql error: {0}")] 56 | WrappedSql(String), 57 | #[error("Hash error: {0:?}")] 58 | Hash(String), 59 | #[error("mpsc error: {0:?}")] 60 | Mpsc(#[from] std::sync::mpsc::RecvError), 61 | } 62 | 63 | /* 64 | #[derive(Fail, Debug)] 65 | pub enum WeaveError { 66 | #[fail(display = "Error running BitKeeper: {:?}: {:?}", _0, _1)] 67 | BkError(ExitStatus, String), 68 | } 69 | */ 70 | -------------------------------------------------------------------------------- /src/escape.rs: -------------------------------------------------------------------------------- 1 | //! String escaping. 2 | //! 3 | //! Although filenames in Linux are commonly represented as UTF-8 4 | //! sequences, there is no system requirement that this be the case. As a 5 | //! consequence, this means that it is possible for filenames in Linux to 6 | //! not be valid UTF-8, and therefore not representable as strings. 7 | //! 8 | //! To prevent encoding problems, as well as to allow certain characters, 9 | //! such as space, to separate tokens in the sure file format, we escape 10 | //! some bytes in strings by replacing them with "=xx" where "xx" is the 11 | //! lower-cased hex version of the string. The range of valid characters 12 | //! is fairly straightforward, including all of the printable characters 13 | //! from '!' to '~' except for the '=', which is always escaped. This 14 | //! means, for example, that a 2-byte encoded UTF-8 sequence will expand to 15 | //! take 6 bytes. 16 | 17 | use std::{io::prelude::*, result}; 18 | use thiserror::Error; 19 | 20 | pub trait Escape { 21 | fn escaped(&self) -> String; 22 | } 23 | 24 | pub trait Unescape { 25 | fn unescape(&self) -> EscapeResult>; 26 | } 27 | 28 | pub type EscapeResult = result::Result; 29 | 30 | #[derive(Error, Debug)] 31 | pub enum EscapeError { 32 | #[error("Invalid hex character: {0:?}")] 33 | InvalidHexCharacter(u8), 34 | #[error("Invalid hex length")] 35 | InvalidHexLength, 36 | } 37 | 38 | // The basic encoding converts a sequence of bytes into a string. 39 | impl Escape for [u8] { 40 | fn escaped(&self) -> String { 41 | let mut result = vec![]; 42 | for &ch in self.iter() { 43 | // TODO: Can be made more efficient. 44 | if (b'!'..=b'~').contains(&ch) && ch != b'=' && ch != b'[' && ch != b']' { 45 | result.push(ch); 46 | } else { 47 | write!(&mut result, "={:02x}", ch).unwrap(); 48 | } 49 | } 50 | 51 | // TODO: String::from_utf8_unchecked(result) 52 | String::from_utf8(result).unwrap() 53 | } 54 | } 55 | 56 | impl Unescape for str { 57 | fn unescape(&self) -> EscapeResult> { 58 | // Will overestimate. 59 | let mut buf = Vec::with_capacity(self.len() / 2); 60 | let mut phase = 0; 61 | let mut tmp = 0; 62 | 63 | for byte in self.bytes() { 64 | if phase == 0 { 65 | if byte == b'=' { 66 | phase = 1; 67 | } else { 68 | buf.push(byte); 69 | } 70 | } else { 71 | tmp <<= 4; 72 | match byte { 73 | b'A'..=b'F' => tmp |= byte - b'A' + 10, 74 | b'a'..=b'f' => tmp |= byte - b'a' + 10, 75 | b'0'..=b'9' => tmp |= byte - b'0', 76 | _ => return Err(EscapeError::InvalidHexCharacter(byte)), 77 | } 78 | phase += 1; 79 | if phase == 3 { 80 | buf.push(tmp); 81 | phase = 0; 82 | tmp = 0; 83 | } 84 | } 85 | } 86 | 87 | if phase != 0 { 88 | return Err(EscapeError::InvalidHexLength); 89 | } 90 | 91 | Ok(buf) 92 | } 93 | } 94 | 95 | #[test] 96 | fn test_unescape() { 97 | macro_rules! assert_error_kind { 98 | ( $expr:expr, $kind:pat ) => { 99 | match $expr { 100 | Err($kind) => (), 101 | Err(e) => panic!( 102 | "Unexpected error kind: {:?} (want {})", 103 | e, 104 | stringify!($kind) 105 | ), 106 | Ok(_) => panic!("Unexpected success"), 107 | } 108 | }; 109 | } 110 | 111 | assert_eq!("=00".unescape().unwrap(), vec![0]); 112 | assert_error_kind!("=00=0".unescape(), EscapeError::InvalidHexLength); 113 | assert_error_kind!("=00=".unescape(), EscapeError::InvalidHexLength); 114 | assert_error_kind!("=4g".unescape(), EscapeError::InvalidHexCharacter(b'g')); 115 | } 116 | 117 | #[test] 118 | fn test_escape() { 119 | let buf: Vec = (0u32..256).map(|i| i as u8).collect(); 120 | let text = (&buf[..]).escaped(); 121 | assert_eq!(text.unescape().unwrap(), buf); 122 | } 123 | -------------------------------------------------------------------------------- /src/hashes.rs: -------------------------------------------------------------------------------- 1 | //! Computing hashes for files. 2 | 3 | use crate::Result; 4 | use openssl::hash::{DigestBytes, Hasher, MessageDigest}; 5 | use std::io::{Read, Write}; 6 | #[derive(Debug)] 7 | pub struct Estimate { 8 | pub files: u64, 9 | pub bytes: u64, 10 | } 11 | 12 | // TODO: Reuse buffer and hasher for a given thread. 13 | pub(crate) fn hash_file(rd: &mut R) -> Result { 14 | let mut h = Hasher::new(MessageDigest::sha1())?; 15 | let mut buf = vec![0u8; 8192]; 16 | 17 | loop { 18 | let count = rd.read(&mut buf)?; 19 | if count == 0 { 20 | break; 21 | } 22 | 23 | h.write_all(&buf[0..count])?; 24 | } 25 | Ok(h.finish()?) 26 | } 27 | 28 | pub(crate) use self::atime_impl::noatime_open; 29 | 30 | /// Open the given file, trying to not update the atime if that is 31 | /// possible. 32 | /// The `custom_flags` method is only stable since Rust 1.10.0. 33 | #[cfg(target_os = "linux")] 34 | mod atime_impl { 35 | use std::fs::{File, OpenOptions}; 36 | use std::io; 37 | use std::os::unix::fs::OpenOptionsExt; 38 | use std::path::Path; 39 | 40 | // From linux's fcntl.h, not exported in the libc crate. 41 | const O_NOATIME: i32 = 0o1000000; 42 | 43 | pub fn noatime_open(name: &Path) -> io::Result { 44 | // Try opening it first with noatime, and if that fails, try the open 45 | // again without the option. 46 | match OpenOptions::new() 47 | .read(true) 48 | .custom_flags(O_NOATIME) 49 | .open(name) 50 | { 51 | Ok(f) => Ok(f), 52 | Err(_) => OpenOptions::new().read(true).open(name), 53 | } 54 | } 55 | } 56 | 57 | // Other platforms, just use normal open. 58 | #[cfg(not(target_os = "linux"))] 59 | mod atime_impl { 60 | use std::fs::{File, OpenOptions}; 61 | use std::io; 62 | use std::path::Path; 63 | 64 | pub fn noatime_open(name: &Path) -> io::Result { 65 | OpenOptions::new().read(true).open(name) 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Rsure is a set of utilities for capturing information about files, and later verifying it is 2 | //! still true. 3 | //! 4 | //! The easiest way to use Rsure is to build the `rsure` executable contained in this crate. This 5 | //! program allows you to use most of the functionality of the crate. 6 | //! 7 | //! However, it is also possible to use the crate programmatically. At the top level of the crate 8 | //! as some utility functions for the most common operations. 9 | //! 10 | //! For example, to scan a directory or do an update use `update`. 11 | //! 12 | //! This example makes use of several of the building blocks necessary to use the store. First is 13 | //! the store itself. `parse_store` is able to decode options that are passed to the command line. 14 | //! it is also possible to build a `store::Plain` store directly. 15 | //! 16 | //! Next are the tags for the snapshot. Generally, this should hold some kind of information about 17 | //! the snapshot itself. For the `Plain` store, it can be just an empty map. Other store types 18 | //! may require certain tags to be present. 19 | 20 | #![warn(bare_trait_objects)] 21 | 22 | use std::{fs::File, path::Path}; 23 | 24 | pub use crate::{ 25 | errors::{Error, Result}, 26 | hashes::Estimate, 27 | node::{ 28 | compare_trees, fs, load_from, HashCombiner, HashUpdater, NodeWriter, ReadIterator, Source, 29 | SureNode, 30 | }, 31 | progress::{log_init, Progress}, 32 | show::show_tree, 33 | store::{parse_store, Store, StoreTags, StoreVersion, TempLoader, Version}, 34 | suretree::AttMap, 35 | }; 36 | 37 | mod errors; 38 | mod escape; 39 | mod hashes; 40 | pub mod node; 41 | mod progress; 42 | mod show; 43 | mod store; 44 | mod surefs; 45 | mod suretree; 46 | 47 | // Some common operations, abstracted here. 48 | 49 | /// Perform an update scan, using the given store. 50 | /// 51 | /// If 'update' is true, use the hashes from a previous run, otherwise perform a fresh scan. 52 | /// Depending on the [`Store`] type, the tags may be kept, or ignored. 53 | /// 54 | /// [`Store`]: trait.Store.html 55 | /// 56 | /// A simple example: 57 | /// 58 | /// ```rust 59 | /// # use std::error::Error; 60 | /// # 61 | /// # fn try_main() -> Result<(), Box> { 62 | /// let mut tags = rsure::StoreTags::new(); 63 | /// tags.insert("name".into(), "sample".into()); 64 | /// let store = rsure::parse_store("2sure.dat.gz")?; 65 | /// rsure::update(".", &*store, false, &tags)?; 66 | /// # Ok(()) 67 | /// # } 68 | /// # 69 | /// # fn main() { 70 | /// # try_main().unwrap(); 71 | /// # } 72 | /// ``` 73 | pub fn update>( 74 | dir: P, 75 | store: &dyn Store, 76 | is_update: bool, 77 | tags: &StoreTags, 78 | ) -> Result<()> { 79 | let dir = dir.as_ref(); 80 | 81 | let mut estimate = Estimate { files: 0, bytes: 0 }; 82 | let tmp = if is_update { 83 | // In update mode, first tmp file is just the scan. 84 | let scan_temp = { 85 | let mut tmp = store.make_temp()?; 86 | let src = fs::scan_fs(dir)?; 87 | node::save_to(&mut tmp, src)?; 88 | tmp 89 | } 90 | .into_loader()?; 91 | 92 | let latest = store.load_iter(Version::Latest)?; 93 | 94 | let tmp = { 95 | let mut tmp = store.make_temp()?; 96 | let loader = Loader(&*scan_temp); 97 | let combiner = HashCombiner::new(latest, loader.iter()?)?.inspect(|node| { 98 | if let Ok(n @ SureNode::File { .. }) = node { 99 | if n.needs_hash() { 100 | estimate.files += 1; 101 | estimate.bytes += n.size(); 102 | } 103 | } 104 | }); 105 | node::save_to(&mut tmp, combiner)?; 106 | tmp 107 | }; 108 | 109 | tmp 110 | } else { 111 | let mut tmp = store.make_temp()?; 112 | let src = fs::scan_fs(dir)?.inspect(|node| { 113 | if let Ok(n @ SureNode::File { .. }) = node { 114 | if n.needs_hash() { 115 | estimate.files += 1; 116 | estimate.bytes += n.size(); 117 | } 118 | } 119 | }); 120 | node::save_to(&mut tmp, src)?; 121 | tmp 122 | } 123 | .into_loader()?; 124 | 125 | // TODO: If this is an update, pull in hashes from the old version. 126 | 127 | // Update any missing hashes. 128 | let loader = Loader(&*tmp); 129 | let hu = HashUpdater::new(loader, store); 130 | // TODO: This will panic on non-unicode directories. 131 | let hm = hu.compute_parallel(dir.to_str().unwrap(), &estimate)?; 132 | let mut tmp2 = store.make_new(tags)?; 133 | hm.merge(&mut NodeWriter::new(&mut tmp2)?)?; 134 | 135 | tmp2.commit()?; 136 | /* 137 | let dir = dir.as_ref(); 138 | 139 | let mut new_tree = scan_fs(dir)?; 140 | 141 | if is_update { 142 | let old_tree = store.load(Version::Latest)?; 143 | new_tree.update_from(&old_tree); 144 | } 145 | 146 | let estimate = new_tree.hash_estimate(); 147 | let mut progress = Progress::new(estimate.files, estimate.bytes); 148 | new_tree.hash_update(dir, &mut progress); 149 | progress.flush(); 150 | 151 | store.write_new(&new_tree, tags)?; 152 | */ 153 | Ok(()) 154 | } 155 | 156 | struct Loader<'a>(&'a dyn TempLoader); 157 | 158 | impl<'a> Source for Loader<'a> { 159 | fn iter(&self) -> Result> + Send>> { 160 | let rd = File::open(self.0.path_ref())?; 161 | Ok(Box::new(load_from(rd)?)) 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | // Playing with paths. 2 | 3 | #![warn(bare_trait_objects)] 4 | 5 | use chrono::Local; 6 | use std::{collections::BTreeMap, path::Path}; 7 | use structopt::StructOpt; 8 | use tempdir::TempDir; 9 | 10 | use rsure::{log_init, parse_store, show_tree, Store, StoreTags, StoreVersion, Version}; 11 | 12 | // For now, just use the crate's error type. 13 | pub use rsure::Result; 14 | 15 | #[derive(StructOpt)] 16 | #[structopt(name = "rsure", about = "File integrity")] 17 | struct Opt { 18 | #[structopt(short = "f", long = "file", default_value = "2sure.dat.gz")] 19 | /// Base of file name, default 2sure, will get .dat.gz appended 20 | file: String, 21 | #[structopt(short = "d", long = "dir", default_value = ".")] 22 | /// Directory to scan, defaults to "." 23 | dir: String, 24 | #[structopt(long = "tag")] 25 | /// key=value to associate with scan 26 | tag: Vec, 27 | #[structopt(short = "v", long = "version")] 28 | version: Option, 29 | #[structopt(subcommand)] 30 | command: Command, 31 | } 32 | 33 | #[derive(StructOpt)] 34 | enum Command { 35 | #[structopt(name = "scan")] 36 | /// Scan a directory for the first time 37 | Scan, 38 | #[structopt(name = "update")] 39 | /// Update the scan using the dat/weave file 40 | Update, 41 | #[structopt(name = "check")] 42 | /// Compare the directory with the dat/weave file 43 | Check { 44 | #[structopt(short = "i", long = "ignore")] 45 | /// Tag to ignore when comparing. 46 | ignore: Vec, 47 | }, 48 | #[structopt(name = "signoff")] 49 | /// Compare dat with bak file, or last two versions in weave file 50 | Signoff { 51 | #[structopt(short = "i", long = "ignore")] 52 | /// Tag to ignore when comparing. 53 | ignore: Vec, 54 | }, 55 | #[structopt(name = "show")] 56 | /// Pretty print the dat file 57 | Show, 58 | #[structopt(name = "list")] 59 | /// List revisions in a given sure store 60 | List, 61 | } 62 | 63 | #[allow(dead_code)] 64 | fn main() -> Result<()> { 65 | log_init(); 66 | 67 | let opt = Opt::from_args(); 68 | 69 | let store = parse_store(&opt.file)?; 70 | 71 | let mut tags = decode_tags(Some(opt.tag.iter().map(|x| x.as_str()))); 72 | 73 | add_name_tag(&mut tags, &opt.dir); 74 | 75 | // Note that only the "check" command uses the version tag. 76 | let latest = match opt.version { 77 | None => Version::Latest, 78 | Some(ref x) => Version::Tagged(x.to_string()), 79 | }; 80 | 81 | match &opt.command { 82 | Command::Scan => { 83 | rsure::update(&opt.dir, &*store, false, &tags)?; 84 | } 85 | Command::Update => { 86 | rsure::update(&opt.dir, &*store, true, &tags)?; 87 | } 88 | Command::Check { ignore } => { 89 | let ignore: Vec<_> = ignore.iter().map(|x| x.as_str()).collect(); 90 | run_check(&*store, &opt, latest, &ignore)?; 91 | } 92 | Command::Signoff { ignore } => { 93 | let ignore: Vec<_> = ignore.iter().map(|x| x.as_str()).collect(); 94 | let old_tree = store.load_iter(Version::Prior)?; 95 | let new_tree = store.load_iter(Version::Latest)?; 96 | println!("signoff {}", opt.file); 97 | rsure::compare_trees(old_tree, new_tree, &Path::new(&opt.dir), &ignore)?; 98 | } 99 | Command::Show => { 100 | println!("show {}", opt.file); 101 | show_tree(&*store)?; 102 | } 103 | Command::List => { 104 | let version = store.get_versions()?; 105 | dump_versions(&version); 106 | } 107 | } 108 | 109 | Ok(()) 110 | } 111 | 112 | fn run_check(store: &dyn Store, opt: &Opt, latest: Version, ignore: &[&str]) -> Result<()> { 113 | // Perform a full scan to a temp store. 114 | let tdir = TempDir::new("rsure")?; 115 | let tpath = tdir.path().join("check.dat.gz"); 116 | let tstore = parse_store(tpath.to_str().unwrap())?; 117 | let mut tags = BTreeMap::new(); 118 | add_name_tag(&mut tags, &opt.dir); 119 | println!("Scanning"); 120 | rsure::update(&opt.dir, &*tstore, false, &tags)?; 121 | 122 | let old_tree = store.load_iter(latest)?; 123 | let new_tree = tstore.load_iter(Version::Latest)?; 124 | println!("Check {}", opt.file); 125 | rsure::compare_trees(old_tree, new_tree, &Path::new(&opt.dir), ignore)?; 126 | Ok(()) 127 | } 128 | 129 | /// Decode the command-line tags. Tags should be of the form key=value, and multiple can be 130 | /// specified, terminated by the command. It is also possible to specify --tag multiple times. 131 | fn decode_tags<'a, I>(tags: Option) -> StoreTags 132 | where 133 | I: Iterator, 134 | { 135 | match tags { 136 | None => BTreeMap::new(), 137 | Some(tags) => tags.map(|x| decode_tag(x)).collect(), 138 | } 139 | } 140 | 141 | fn decode_tag(tag: &str) -> (String, String) { 142 | let fields: Vec<_> = tag.splitn(2, '=').collect(); 143 | if fields.len() != 2 { 144 | panic!("Tag must be key=value"); 145 | } 146 | (fields[0].to_string(), fields[1].to_string()) 147 | } 148 | 149 | /// If the caller doesn't specify a 'name=' tag, generate one based on the current timestamp. 150 | /// Also will add a 'dir' attribute for where the tree was captured. 151 | fn add_name_tag>(tags: &mut StoreTags, dir: P) { 152 | if !tags.contains_key("name") { 153 | tags.insert("name".to_string(), Local::now().to_rfc3339()); 154 | } 155 | 156 | if !tags.contains_key("dir") { 157 | tags.insert( 158 | "dir".to_string(), 159 | dir.as_ref() 160 | .canonicalize() 161 | .unwrap_or_else(|_| Path::new("invalid").to_owned()) 162 | .to_string_lossy() 163 | .into_owned(), 164 | ); 165 | } 166 | } 167 | 168 | fn dump_versions(versions: &[StoreVersion]) { 169 | println!("vers | Time captured | name"); 170 | println!("-----+---------------------+------------------"); 171 | for v in versions { 172 | let vers = match v.version { 173 | Version::Latest => "tip", 174 | Version::Prior => "prev", 175 | Version::Tagged(ref v) => v, 176 | }; 177 | println!( 178 | "{:>4} | {} | {}", 179 | vers, 180 | v.time.with_timezone(&Local).format("%Y-%m-%d %H:%M:%S"), 181 | v.name 182 | ); 183 | } 184 | } 185 | -------------------------------------------------------------------------------- /src/node.rs: -------------------------------------------------------------------------------- 1 | //! The sure stream. 2 | //! 3 | //! The sure stream represents a linearization of a SureTree. By keeping 4 | //! representations as iterators across SureNodes instead of keeping an 5 | //! entire tree in memory, we can process larger filesystem trees, using 6 | //! temporary space on the hard disk instead of using memory. 7 | use crate::{suretree::AttMap, Error, Result}; 8 | use flate2::{read::GzDecoder, write::GzEncoder, Compression}; 9 | use std::{ 10 | fs::File, 11 | io::{self, BufRead, BufReader, BufWriter, Read, Write}, 12 | path::{Path, PathBuf}, 13 | }; 14 | use weave::NamingConvention; 15 | 16 | mod compare; 17 | pub mod fs; 18 | mod fullpath; 19 | mod hashes; 20 | 21 | pub use compare::compare_trees; 22 | pub use fullpath::into_tracker; 23 | pub use hashes::{HashCombiner, HashUpdater, Source}; 24 | 25 | #[derive(Clone, Debug)] 26 | pub enum SureNode { 27 | Enter { name: String, atts: AttMap }, 28 | Leave, 29 | File { name: String, atts: AttMap }, 30 | Sep, 31 | } 32 | 33 | impl SureNode { 34 | pub fn is_enter(&self) -> bool { 35 | matches!(self, SureNode::Enter { .. }) 36 | } 37 | 38 | pub fn is_reg_file(&self) -> bool { 39 | match self { 40 | SureNode::File { atts, .. } => atts["kind"] == "file", 41 | _ => false, 42 | } 43 | } 44 | 45 | pub fn is_file(&self) -> bool { 46 | matches!(self, SureNode::File { .. }) 47 | } 48 | 49 | pub fn is_leave(&self) -> bool { 50 | matches!(self, SureNode::Leave) 51 | } 52 | 53 | pub fn is_sep(&self) -> bool { 54 | matches!(self, SureNode::Sep) 55 | } 56 | 57 | pub fn needs_hash(&self) -> bool { 58 | match self { 59 | SureNode::File { atts, .. } => atts["kind"] == "file" && !atts.contains_key("sha1"), 60 | _ => false, 61 | } 62 | } 63 | 64 | pub fn size(&self) -> u64 { 65 | match self { 66 | SureNode::File { atts, .. } => { 67 | atts.get("size").map(|x| x.parse().unwrap()).unwrap_or(0) 68 | } 69 | _ => 0, 70 | } 71 | } 72 | 73 | /// Get the name of this node. Panics if the node type does not have 74 | /// an associated name. 75 | pub fn name(&self) -> &str { 76 | match self { 77 | SureNode::File { ref name, .. } => name, 78 | SureNode::Enter { ref name, .. } => name, 79 | _ => panic!("Node does not have a name"), 80 | } 81 | } 82 | 83 | /// Safely get the name of this node. 84 | pub fn get_name(&self) -> Option<&str> { 85 | match self { 86 | SureNode::File { ref name, .. } => Some(name), 87 | SureNode::Enter { ref name, .. } => Some(name), 88 | _ => None, 89 | } 90 | } 91 | 92 | /// Get a nice representation of the kind of this node. Returns "???" 93 | /// if the kind isn't meaningful. 94 | pub fn kind(&self) -> &str { 95 | self.atts() 96 | .map(|a| a.get("kind").map(|k| &k[..]).unwrap_or("???")) 97 | .unwrap_or("???") 98 | } 99 | 100 | /// Access the nodes attributes. 101 | pub fn atts(&self) -> Option<&AttMap> { 102 | match self { 103 | SureNode::File { ref atts, .. } => Some(atts), 104 | SureNode::Enter { ref atts, .. } => Some(atts), 105 | _ => None, 106 | } 107 | } 108 | 109 | /// Access the nodes attributes mutably. 110 | pub fn atts_mut(&mut self) -> Option<&mut AttMap> { 111 | match self { 112 | SureNode::File { ref mut atts, .. } => Some(atts), 113 | SureNode::Enter { ref mut atts, .. } => Some(atts), 114 | _ => None, 115 | } 116 | } 117 | } 118 | 119 | // TODO: These might be possible to make more generic, but it gets messy, 120 | // as it might just be best to assume failure. 121 | 122 | /// Write a sure iterator to a standard gzipped file of the given name. 123 | pub fn save(name: P, nodes: I) -> Result<()> 124 | where 125 | P: AsRef, 126 | I: Iterator>, 127 | { 128 | let wr = File::create(name)?; 129 | let wr = GzEncoder::new(wr, Compression::default()); 130 | save_to(wr, nodes) 131 | } 132 | 133 | /// Write a sure iterator to a new temp file with a given naming 134 | /// convention. Returns the name of the file, if it could be created. The 135 | /// data will not be written compressed. 136 | pub fn save_naming(naming: &N, nodes: I) -> Result 137 | where 138 | N: NamingConvention, 139 | I: Iterator>, 140 | { 141 | let (tmp_name, mut tmp_file) = naming.temp_file()?; 142 | save_to(&mut tmp_file, nodes)?; 143 | Ok(tmp_name) 144 | } 145 | 146 | /// Save a sure tree to the given writer. 147 | pub fn save_to(wr: W, nodes: I) -> Result<()> 148 | where 149 | W: Write, 150 | I: Iterator>, 151 | { 152 | let mut wr = BufWriter::new(wr); 153 | 154 | writeln!(&mut wr, "asure-2.0")?; 155 | writeln!(&mut wr, "-----")?; 156 | 157 | for node in nodes { 158 | match node? { 159 | SureNode::Enter { name, atts } => header(&mut wr, 'd', &name, &atts)?, 160 | SureNode::File { name, atts } => header(&mut wr, 'f', &name, &atts)?, 161 | SureNode::Sep => writeln!(&mut wr, "-")?, 162 | SureNode::Leave => writeln!(&mut wr, "u")?, 163 | } 164 | } 165 | Ok(()) 166 | } 167 | 168 | /// For pushed based writing, we can also write using a NodeWriter. 169 | pub struct NodeWriter { 170 | writer: BufWriter, 171 | } 172 | 173 | impl NodeWriter { 174 | pub fn new(writer: W) -> Result> { 175 | let mut wr = BufWriter::new(writer); 176 | writeln!(&mut wr, "asure-2.0")?; 177 | writeln!(&mut wr, "-----")?; 178 | 179 | Ok(NodeWriter { writer: wr }) 180 | } 181 | 182 | pub fn write_node(&mut self, node: &SureNode) -> Result<()> { 183 | match node { 184 | SureNode::Enter { name, atts } => header(&mut self.writer, 'd', &name, &atts)?, 185 | SureNode::File { name, atts } => header(&mut self.writer, 'f', &name, &atts)?, 186 | SureNode::Sep => writeln!(&mut self.writer, "-")?, 187 | SureNode::Leave => writeln!(&mut self.writer, "u")?, 188 | } 189 | Ok(()) 190 | } 191 | } 192 | 193 | fn header(out: &mut W, kind: char, name: &str, atts: &AttMap) -> Result<()> { 194 | write!(out, "{}{} [", kind, name)?; 195 | 196 | for (k, v) in atts { 197 | write!(out, "{} {} ", k, v)?; 198 | } 199 | writeln!(out, "]")?; 200 | Ok(()) 201 | } 202 | 203 | /// Load and iterate a sure tree from a standard gzip compressed surefile. 204 | pub fn load>(name: P) -> Result>> { 205 | let rd = File::open(name)?; 206 | let rd = GzDecoder::new(rd); 207 | load_from(rd) 208 | } 209 | 210 | /// Load a surenode sequence from the given reader. 211 | pub fn load_from(rd: R) -> Result> { 212 | let rd = BufReader::new(rd); 213 | let mut lines = rd.split(b'\n'); 214 | 215 | fixed(&mut lines, b"asure-2.0")?; 216 | fixed(&mut lines, b"-----")?; 217 | 218 | Ok(ReadIterator { 219 | lines, 220 | depth: 0, 221 | done: false, 222 | }) 223 | } 224 | 225 | fn fixed(inp: &mut I, exp: &[u8]) -> Result<()> 226 | where 227 | I: Iterator>>, 228 | { 229 | match inp.next() { 230 | Some(Ok(ref text)) if &text[..] == exp => Ok(()), 231 | Some(Ok(ref text)) => Err(Error::UnexpectedLine( 232 | String::from_utf8_lossy(text).into_owned(), 233 | String::from_utf8_lossy(exp).into_owned(), 234 | )), 235 | Some(Err(e)) => Err(Error::SureFileError(e)), 236 | None => Err(Error::SureFileEof), 237 | } 238 | } 239 | 240 | pub struct ReadIterator { 241 | lines: io::Split>, 242 | depth: usize, 243 | done: bool, 244 | } 245 | 246 | impl Iterator for ReadIterator { 247 | type Item = Result; 248 | 249 | fn next(&mut self) -> Option> { 250 | if self.done { 251 | return None; 252 | } 253 | 254 | let line = match self.get_line() { 255 | Ok(line) => line, 256 | Err(e) => return Some(Err(e)), 257 | }; 258 | 259 | match line[0] { 260 | b'd' => { 261 | let (dname, datts) = decode_entity(&line[1..]); 262 | self.depth += 1; 263 | Some(Ok(SureNode::Enter { 264 | name: dname, 265 | atts: datts, 266 | })) 267 | } 268 | b'f' => { 269 | let (fname, fatts) = decode_entity(&line[1..]); 270 | Some(Ok(SureNode::File { 271 | name: fname, 272 | atts: fatts, 273 | })) 274 | } 275 | b'-' => Some(Ok(SureNode::Sep)), 276 | b'u' => { 277 | self.depth -= 1; 278 | if self.depth == 0 { 279 | self.done = true; 280 | } 281 | Some(Ok(SureNode::Leave)) 282 | } 283 | ch => Some(Err(Error::InvalidSurefileChar(ch as char))), 284 | } 285 | } 286 | } 287 | 288 | impl ReadIterator { 289 | fn get_line(&mut self) -> Result> { 290 | match self.lines.next() { 291 | None => Err(Error::TruncatedSurefile), 292 | Some(l) => Ok(l?), 293 | } 294 | } 295 | } 296 | 297 | // TODO: This should return Result to handle errors. 298 | pub(crate) fn decode_entity(text: &[u8]) -> (String, AttMap) { 299 | let (name, mut text) = get_delim(text, b' '); 300 | assert!(text[0] == b'['); 301 | text = &text[1..]; 302 | 303 | let mut atts = AttMap::new(); 304 | while text[0] != b']' { 305 | let (key, t2) = get_delim(text, b' '); 306 | let (value, t2) = get_delim(t2, b' '); 307 | text = t2; 308 | 309 | atts.insert(key, value); 310 | } 311 | 312 | (name, atts) 313 | } 314 | 315 | fn get_delim(text: &[u8], delim: u8) -> (String, &[u8]) { 316 | let mut it = text.iter(); 317 | let space = it.position(|&s| s == delim).unwrap(); 318 | ( 319 | String::from_utf8(text[..space].to_owned()).unwrap(), 320 | &text[space + 1..], 321 | ) 322 | } 323 | -------------------------------------------------------------------------------- /src/node/compare.rs: -------------------------------------------------------------------------------- 1 | //! Compare two iterator-based trees. 2 | 3 | use crate::{node::SureNode, Error, Result}; 4 | use log::error; 5 | use std::{collections::HashSet, path::Path}; 6 | 7 | /// This is the mutable state that is threaded through the recursive 8 | /// traversal of the two trees. 9 | struct State { 10 | left: SureNode, 11 | right: SureNode, 12 | left_iter: IA, 13 | right_iter: IB, 14 | 15 | // Track warning messages about added and deleted attributes. 16 | adds: HashSet, 17 | missings: HashSet, 18 | 19 | // Attributes to be ignored 20 | ignore: HashSet, 21 | } 22 | 23 | pub fn compare_trees, IA, IB>( 24 | mut left: IA, 25 | mut right: IB, 26 | dir: P, 27 | ignore: &[&str], 28 | ) -> Result<()> 29 | where 30 | IA: Iterator>, 31 | IB: Iterator>, 32 | { 33 | let mut ignore: HashSet = ignore.iter().map(|x| (*x).to_owned()).collect(); 34 | // The ctime and ino will be different if a backup is restored, and we'd still like to get 35 | // meaningful results. Add these to the list of ignored attributes. 36 | ignore.insert("ctime".to_owned()); 37 | ignore.insert("ino".to_owned()); 38 | 39 | let ln = match left.next() { 40 | None => return Err(Error::EmptyLeftIterator), 41 | Some(Err(e)) => return Err(e), 42 | Some(Ok(node)) => node, 43 | }; 44 | let rn = match right.next() { 45 | None => return Err(Error::EmptyRightIterator), 46 | Some(Err(e)) => return Err(e), 47 | Some(Ok(node)) => node, 48 | }; 49 | let mut state = State { 50 | left: ln, 51 | right: rn, 52 | left_iter: left, 53 | right_iter: right, 54 | adds: HashSet::new(), 55 | missings: HashSet::new(), 56 | ignore, 57 | }; 58 | 59 | state.walk_root(dir.as_ref()) 60 | } 61 | 62 | impl State 63 | where 64 | IA: Iterator>, 65 | IB: Iterator>, 66 | { 67 | /// Advance the left iterator. If it sees the end, it will drop in a 68 | /// "Leave" node, which shouldn't be visited as long as the tree is 69 | /// well-formed. 70 | fn next_left(&mut self) -> Result<()> { 71 | let next = match self.left_iter.next() { 72 | None => SureNode::Leave, 73 | Some(Ok(node)) => node, 74 | Some(Err(e)) => return Err(e), 75 | }; 76 | 77 | self.left = next; 78 | Ok(()) 79 | } 80 | 81 | /// Advance the right iterator. If it sees the end, it will drop in a 82 | /// "Leave" node, which shouldn't be visited as long as the tree is 83 | /// well-formed. 84 | fn next_right(&mut self) -> Result<()> { 85 | let next = match self.right_iter.next() { 86 | None => SureNode::Leave, 87 | Some(Ok(node)) => node, 88 | Some(Err(e)) => return Err(e), 89 | }; 90 | 91 | self.right = next; 92 | Ok(()) 93 | } 94 | 95 | fn walk_root(&mut self, dir: &Path) -> Result<()> { 96 | if !self.left.is_enter() { 97 | Err(Error::UnexpectedLeftNode) 98 | } else if !self.right.is_enter() { 99 | Err(Error::UnexpectedRightNode) 100 | } else if self.left.name() != "__root__" || self.right.name() != "__root__" { 101 | Err(Error::IncorrectName) 102 | } else { 103 | self.compare_enter(dir)?; 104 | self.next_left()?; 105 | self.next_right()?; 106 | self.walk_samedir(dir) 107 | } 108 | } 109 | 110 | /// We are within a directory (of the given name) where both trees have 111 | /// the same directory. This will recursively compare any children, 112 | /// and once both have reached the separator, move to `walk_samefiles`. 113 | fn walk_samedir(&mut self, dir: &Path) -> Result<()> { 114 | loop { 115 | match (self.left.is_sep(), self.right.is_sep()) { 116 | (true, true) => { 117 | self.next_left()?; 118 | self.next_right()?; 119 | return self.walk_samefiles(dir); 120 | } 121 | (false, true) => { 122 | // The old trees has subdirectories not in this 123 | // directory. 124 | self.show_delete(dir); 125 | self.next_left()?; 126 | self.walk_leftdir()?; 127 | } 128 | (true, false) => { 129 | // The new tree has a newly added directory. 130 | self.show_add(dir); 131 | self.next_right()?; 132 | self.walk_rightdir()?; 133 | } 134 | _ if self.left.name() < self.right.name() => { 135 | // Old subdirectory. 136 | self.show_delete(dir); 137 | self.next_left()?; 138 | self.walk_leftdir()?; 139 | } 140 | _ if self.left.name() > self.right.name() => { 141 | // The new tree has a newly added directory. 142 | self.show_add(dir); 143 | self.next_right()?; 144 | self.walk_rightdir()?; 145 | } 146 | _ => { 147 | // Same named directory. 148 | let dirname = dir.join(self.left.name()); 149 | self.compare_enter(&dirname)?; 150 | self.next_left()?; 151 | self.next_right()?; 152 | self.walk_samedir(&dirname)?; 153 | } 154 | } 155 | } 156 | } 157 | 158 | /// We are within the files section of the same directory in the two 159 | /// trees. Walk through the nodes, reading the Leave node in both, and 160 | /// returning. 161 | fn walk_samefiles(&mut self, dir: &Path) -> Result<()> { 162 | loop { 163 | match (self.left.is_leave(), self.right.is_leave()) { 164 | (true, true) => { 165 | self.next_left()?; 166 | self.next_right()?; 167 | return Ok(()); 168 | } 169 | (false, true) => { 170 | self.show_delete(dir); 171 | self.next_left()?; 172 | } 173 | (true, false) => { 174 | self.show_add(dir); 175 | self.next_right()?; 176 | } 177 | _ if self.left.name() < self.right.name() => { 178 | self.show_delete(dir); 179 | self.next_left()?; 180 | } 181 | _ if self.left.name() > self.right.name() => { 182 | self.show_add(dir); 183 | self.next_right()?; 184 | } 185 | _ => { 186 | // Same file. 187 | let nodename = dir.join(self.left.name()); 188 | self.compare_file(&nodename)?; 189 | self.next_left()?; 190 | self.next_right()?; 191 | } 192 | } 193 | } 194 | } 195 | 196 | /// Old directory on the left tree. Walk through nodes recursively to 197 | /// discard entire tree. 198 | fn walk_leftdir(&mut self) -> Result<()> { 199 | loop { 200 | if self.left.is_enter() { 201 | self.next_left()?; 202 | self.walk_leftdir()?; 203 | } else if self.left.is_leave() { 204 | self.next_left()?; 205 | return Ok(()); 206 | } else { 207 | self.next_left()?; 208 | } 209 | } 210 | } 211 | 212 | /// New directory on the right tree. Walk through nodes recursively to 213 | /// discard entire tree. 214 | fn walk_rightdir(&mut self) -> Result<()> { 215 | loop { 216 | if self.right.is_enter() { 217 | self.next_right()?; 218 | self.walk_rightdir()?; 219 | } else if self.right.is_leave() { 220 | self.next_right()?; 221 | return Ok(()); 222 | } else { 223 | self.next_right()?; 224 | } 225 | } 226 | } 227 | 228 | /// Print a message about something added (the name will be the thing 229 | /// on the right. 230 | fn show_add(&self, dir: &Path) { 231 | println!( 232 | "+ {:22} {:?}", 233 | self.right.kind(), 234 | dir.join(self.right.name()) 235 | ); 236 | } 237 | 238 | /// Print a message about something removed (the name will be the thing 239 | /// on the left. 240 | fn show_delete(&self, dir: &Path) { 241 | println!("- {:22} {:?}", self.left.kind(), dir.join(self.left.name())); 242 | } 243 | 244 | /// Compare the two "Enter" nodes we are visiting. 245 | fn compare_enter(&mut self, dir: &Path) -> Result<()> { 246 | self.compare_atts('d', dir) 247 | } 248 | 249 | /// Compare two file nodes. 250 | fn compare_file(&mut self, dir: &Path) -> Result<()> { 251 | self.compare_atts('f', dir) 252 | } 253 | 254 | /// Attribute comparison. 255 | fn compare_atts(&mut self, _kind: char, dir: &Path) -> Result<()> { 256 | let mut old = self.left.atts().unwrap().clone(); 257 | let mut new = self.right.atts().unwrap().clone(); 258 | let mut diffs = vec![]; 259 | 260 | for att in self.ignore.iter() { 261 | old.remove(att); 262 | new.remove(att); 263 | } 264 | 265 | for (k, v) in &new { 266 | match old.get(k) { 267 | None => { 268 | // This attribute is in the new tree, but not the old 269 | // one, warn, but only once. 270 | if !self.adds.contains(k) { 271 | error!("Added attribute: {}", k); 272 | self.adds.insert(k.clone()); 273 | } 274 | } 275 | Some(ov) => { 276 | if v != ov { 277 | diffs.push(k.clone()); 278 | } 279 | } 280 | } 281 | old.remove(k); 282 | } 283 | 284 | for k in old.keys() { 285 | if !self.missings.contains(k) { 286 | error!("Missing attribute: {}", k); 287 | self.missings.insert(k.clone()); 288 | } 289 | } 290 | 291 | if !diffs.is_empty() { 292 | let mut buf = String::new(); 293 | diffs.sort(); 294 | for d in &diffs { 295 | if !buf.is_empty() { 296 | buf.push(','); 297 | } 298 | buf.push_str(&d); 299 | } 300 | println!(" [{:<20}] {:?}", buf, dir); 301 | } 302 | 303 | Ok(()) 304 | } 305 | } 306 | -------------------------------------------------------------------------------- /src/node/fs.rs: -------------------------------------------------------------------------------- 1 | /// Sure tree scanning from the filesystem. 2 | use crate::{ 3 | escape::Escape, node::SureNode, progress::ScanProgress, surefs::encode_atts, suretree::AttMap, 4 | Error, Result, 5 | }; 6 | use log::error; 7 | use std::{ 8 | collections::VecDeque, 9 | fs::{self, symlink_metadata, Metadata}, 10 | os::unix::prelude::*, 11 | path::{Path, PathBuf}, 12 | }; 13 | 14 | pub fn walk>(root: P) -> Result<()> { 15 | for entry in scan_fs(root)? { 16 | let entry = entry?; 17 | println!("{:?}", entry); 18 | } 19 | 20 | Ok(()) 21 | } 22 | 23 | /// A filesystem scanner walks a filesystem, iterating over a tree as it is 24 | /// encountered. 25 | pub fn scan_fs>(root: P) -> Result { 26 | let root = root.as_ref().to_path_buf(); 27 | let meta = symlink_metadata(&root)?; 28 | 29 | if !meta.is_dir() { 30 | return Err(Error::RootMustBeDir); 31 | } 32 | 33 | let atts = encode_atts(&root, &meta); 34 | let root_dev = meta.dev(); 35 | let mut todo = VecDeque::new(); 36 | todo.push_back(AugNode::SubDir { 37 | path: root, 38 | name: "__root__".to_string(), 39 | meta, 40 | atts, 41 | }); 42 | 43 | let si = ScanIterator { 44 | todo, 45 | root_dev, 46 | progress: ScanProgress::new(), 47 | }; 48 | 49 | Ok(si) 50 | } 51 | 52 | pub struct ScanIterator { 53 | todo: VecDeque, 54 | root_dev: u64, 55 | progress: ScanProgress, 56 | } 57 | 58 | impl Iterator for ScanIterator { 59 | type Item = Result; 60 | 61 | fn next(&mut self) -> Option> { 62 | match self.todo.pop_front() { 63 | None => None, 64 | Some(AugNode::Normal(e)) => Some(Ok(e)), 65 | Some(AugNode::SubDir { 66 | path, 67 | name, 68 | atts, 69 | meta, 70 | }) => { 71 | // Push the contents of this directory. Unless we have 72 | // crossed a mountpoint. 73 | if !meta.is_dir() || meta.dev() == self.root_dev { 74 | match self.push_dir(&path) { 75 | Ok(()) => (), 76 | Err(e) => return Some(Err(e)), 77 | }; 78 | } else { 79 | self.push_empty_dir(); 80 | } 81 | 82 | Some(Ok(SureNode::Enter { name, atts })) 83 | } 84 | } 85 | } 86 | } 87 | 88 | impl ScanIterator { 89 | fn push_dir(&mut self, path: &Path) -> Result<()> { 90 | let mut entries = vec![]; 91 | 92 | match fs::read_dir(path) { 93 | Ok(dir) => { 94 | for entry in dir { 95 | let entry = match entry { 96 | Ok(ent) => ent, 97 | Err(err) => { 98 | error!("Unable to read from dir: {:?} ({})", path, err); 99 | break; 100 | } 101 | }; 102 | entries.push(entry); 103 | } 104 | } 105 | Err(e) => { 106 | // Warn about the issue, but otherwise continue, with just an empty directory. 107 | error!("Unable to read dir: {:?} ({})", path, e); 108 | } 109 | }; 110 | 111 | // Sort by inode first. This helps performance on some filesystems 112 | // (such as ext4). 113 | entries.sort_by_key(|a| a.ino()); 114 | 115 | let mut files: Vec<_> = entries 116 | .iter() 117 | .filter_map(|e| match e.metadata() { 118 | Ok(m) => { 119 | let path = e.path(); 120 | let atts = encode_atts(&path, &m); 121 | 122 | Some(OneFile { 123 | path, 124 | meta: m, 125 | atts, 126 | }) 127 | } 128 | Err(err) => { 129 | error!("Unable to stat file: {:?} ({})", e.path(), err); 130 | None 131 | } 132 | }) 133 | .collect(); 134 | 135 | // Sort them back by name. 136 | files.sort_by(|a, b| a.path.file_name().cmp(&b.path.file_name())); 137 | 138 | let (dirs, files): (Vec<_>, Vec<_>) = files.into_iter().partition(|n| n.meta.is_dir()); 139 | 140 | self.progress.update( 141 | dirs.len() as u64, 142 | files.len() as u64, 143 | files.iter().map(|x| x.meta.len()).sum(), 144 | ); 145 | 146 | self.todo.push_front(AugNode::Normal(SureNode::Leave)); 147 | 148 | // The files in reverse order. 149 | for f in files.into_iter().rev() { 150 | self.todo.push_front(AugNode::Normal(SureNode::File { 151 | name: f.path.file_name().unwrap().as_bytes().escaped(), 152 | atts: f.atts, 153 | })); 154 | } 155 | 156 | self.todo.push_front(AugNode::Normal(SureNode::Sep)); 157 | 158 | // The dirs in reverse order. 159 | for d in dirs.into_iter().rev() { 160 | let name = d.path.file_name().unwrap().as_bytes().escaped(); 161 | self.todo.push_front(AugNode::SubDir { 162 | path: d.path, 163 | name, 164 | meta: d.meta, 165 | atts: d.atts, 166 | }); 167 | } 168 | 169 | Ok(()) 170 | } 171 | 172 | /// Pushes the Sep and Leave needed to make an empty directory work. 173 | /// Used when skipping directories that cross mountpoints. 174 | fn push_empty_dir(&mut self) { 175 | self.todo.push_front(AugNode::Normal(SureNode::Leave)); 176 | self.todo.push_front(AugNode::Normal(SureNode::Sep)); 177 | } 178 | } 179 | 180 | struct OneFile { 181 | path: PathBuf, 182 | meta: Metadata, 183 | atts: AttMap, 184 | } 185 | 186 | /// Augmented entries. This intersperses regular nodes with special ones 187 | /// containing enough information to add subdirectories. 188 | enum AugNode { 189 | Normal(SureNode), 190 | SubDir { 191 | path: PathBuf, 192 | name: String, 193 | meta: Metadata, 194 | atts: AttMap, 195 | }, 196 | } 197 | -------------------------------------------------------------------------------- /src/node/fullpath.rs: -------------------------------------------------------------------------------- 1 | //! Augment an iterator over nodes with something that tracks the full 2 | //! path of the files involved. 3 | //! 4 | //! Unfortunately, Rust's Iter does not tie any lifetimes between the 5 | //! iterator and the result of iteration (which is usually good). This 6 | //! makes it difficult to avoid computing these paths, however. 7 | //! 8 | //! If this becomes a performance bottleneck, we can come up with something 9 | //! more complicated that avoids computing (and allocating) the result 10 | //! paths for each node encountered. 11 | 12 | use crate::{escape::Unescape, node::SureNode, Result}; 13 | use std::{ 14 | ffi::OsString, 15 | os::unix::ffi::OsStringExt, 16 | path::{Path, PathBuf}, 17 | }; 18 | 19 | pub fn into_tracker(iter: I, root: &str) -> impl Iterator> 20 | where 21 | I: Iterator>, 22 | { 23 | let root: OsString = OsStringExt::from_vec(root.unescape().unwrap()); 24 | let mut cur = Path::new(&root).to_path_buf(); 25 | let mut at_root = true; 26 | iter.map(move |node| { 27 | let node = node?; 28 | let path = match &node { 29 | SureNode::Enter { name, .. } => { 30 | // Don't add the pseudo "__root__" directory. 31 | if at_root { 32 | if name != "__root__" { 33 | panic!("Root directory not at root"); 34 | } 35 | at_root = false; 36 | } else { 37 | let name: OsString = OsStringExt::from_vec(name.unescape().unwrap()); 38 | cur.push(&name); 39 | } 40 | Some(cur.clone()) 41 | } 42 | SureNode::File { name, .. } => { 43 | let name: OsString = OsStringExt::from_vec(name.unescape().unwrap()); 44 | cur.push(&name); 45 | Some(cur.clone()) 46 | } 47 | _ => None, 48 | }; 49 | 50 | let do_pop = node.is_file() || node.is_leave(); 51 | 52 | let result = Ok(PathedNode { node, path }); 53 | 54 | if do_pop { 55 | cur.pop(); 56 | } 57 | 58 | result 59 | }) 60 | } 61 | 62 | #[derive(Debug)] 63 | pub struct PathedNode { 64 | pub node: SureNode, 65 | pub path: Option, 66 | } 67 | 68 | /* 69 | pub trait PathTrack: Sized { 70 | fn into_tracker(self, root: &str) -> PathTracker; 71 | } 72 | 73 | impl>> PathTrack for I { 74 | fn into_tracker(self, root: &str) -> PathTracker { 75 | PathTracker { 76 | iter: self, 77 | root: Some(root.to_owned()), 78 | dirs: vec![], 79 | } 80 | } 81 | } 82 | 83 | pub struct PathTracker { 84 | iter: I, 85 | root: Option, 86 | dirs: Vec, 87 | } 88 | 89 | #[derive(Debug)] 90 | pub struct PathedNode { 91 | pub node: SureNode, 92 | pub path: Option, 93 | } 94 | 95 | impl Iterator for PathTracker 96 | where I: Iterator>, 97 | { 98 | type Item = Result; 99 | 100 | fn next(&mut self) -> Option> { 101 | match self.iter.next() { 102 | None => None, 103 | Some(Err(e)) => Some(Err(e)), 104 | Some(Ok(node)) => { 105 | let path = match &node { 106 | SureNode::Enter { name, .. } => { 107 | // Don't add the pseudo "__root__ flag. 108 | if self.dirs.is_empty() && name == "__root__" { 109 | let root = self.root.take().unwrap(); 110 | self.dirs.push(root); 111 | } else { 112 | self.dirs.push(name.clone()); 113 | } 114 | Some(self.dirs.join("/")) 115 | } 116 | SureNode::File { name, .. } => { 117 | self.dirs.push(name.clone()); 118 | Some(self.dirs.join("/")) 119 | } 120 | _ => None, 121 | }; 122 | 123 | let do_pop = node.is_file() || node.is_leave(); 124 | 125 | let result = Some(Ok(PathedNode { 126 | node: node, 127 | path: path, 128 | })); 129 | 130 | if do_pop { 131 | self.dirs.pop(); 132 | } 133 | 134 | result 135 | } 136 | } 137 | } 138 | } 139 | */ 140 | -------------------------------------------------------------------------------- /src/node/hashes.rs: -------------------------------------------------------------------------------- 1 | //! Hash updates for node-based sure file. 2 | 3 | use crate::{ 4 | hashes::{hash_file, noatime_open, Estimate}, 5 | node::{into_tracker, NodeWriter, SureNode}, 6 | progress::Progress, 7 | store::{Store, TempCleaner}, 8 | Error, Result, 9 | }; 10 | use crossbeam::channel::{bounded, Sender}; 11 | use data_encoding::HEXLOWER; 12 | use log::{debug, error}; 13 | use rusqlite::{types::ToSql, Connection}; 14 | use std::{ 15 | cmp::Ordering, 16 | io::Write, 17 | mem, 18 | path::PathBuf, 19 | sync::{mpsc::sync_channel, Arc, Mutex}, 20 | thread, 21 | }; 22 | 23 | /// A Source is something that can repeatedly give us an iterator over 24 | /// nodes. 25 | pub trait Source { 26 | fn iter(&self) -> Result> + Send>>; 27 | } 28 | 29 | /// The HashUpdater is able to update hashes. This is the first pass. 30 | pub struct HashUpdater<'n, S> { 31 | source: S, 32 | store: &'n dyn Store, 33 | } 34 | 35 | pub struct HashMerger { 36 | source: S, 37 | conn: Connection, 38 | // Own the temp, so it won't be deleted until the connection is also 39 | // closed. 40 | _temp: Box, 41 | } 42 | 43 | impl<'a, S: Source> HashUpdater<'a, S> { 44 | pub fn new(source: S, store: &dyn Store) -> HashUpdater { 45 | HashUpdater { source, store } 46 | } 47 | 48 | /// First pass. Go through the source nodes, and for any that need a 49 | /// hash, compute the hash, and collect the results into a temporary 50 | /// file. Consumes the updater, returning the HashMerger which is used 51 | /// to merge the hash results into a datastream. 52 | pub fn compute(mut self, base: &str, estimate: &Estimate) -> Result> { 53 | let meter = Arc::new(Mutex::new(Progress::new(estimate.files, estimate.bytes))); 54 | let (mut conn, temp) = self.setup_db()?; 55 | 56 | let (tx, rx) = sync_channel(num_cpus::get()); 57 | 58 | let iter = into_tracker(self.source.iter()?, base); 59 | let mut count = 0; 60 | let meter2 = meter.clone(); 61 | thread::spawn(move || { 62 | for entry in iter { 63 | let entry = entry.unwrap(); 64 | if entry.node.needs_hash() { 65 | let path = entry.path.unwrap(); 66 | match noatime_open(&path) { 67 | Ok(mut fd) => match hash_file(&mut fd) { 68 | Ok(ref h) => { 69 | tx.send(Some(HashInfo { 70 | id: count, 71 | hash: h.as_ref().to_owned(), 72 | })) 73 | .unwrap(); 74 | } 75 | Err(e) => { 76 | error!("Unable to hash file: '{:?}' ({})", path, e); 77 | } 78 | }, 79 | Err(e) => { 80 | error!("Unable to open '{:?}' for hashing ({})", path, e); 81 | } 82 | } 83 | // println!("{} {:?}", count, entry.path); 84 | count += 1; 85 | 86 | meter2.lock().unwrap().update(1, entry.node.size()); 87 | } 88 | } 89 | tx.send(None).unwrap(); 90 | }); 91 | 92 | // The above will send Option over the tx/rx channel. 93 | // Capture these and add them all to the database. 94 | let trans = conn.transaction()?; 95 | while let Some(info) = rx.recv()? { 96 | trans.execute( 97 | "INSERT INTO hashes (id, hash) VALUES (?1, ?2)", 98 | &[&info.id as &dyn ToSql, &info.hash as &dyn ToSql], 99 | )?; 100 | } 101 | trans.commit()?; 102 | 103 | meter.lock().unwrap().flush(); 104 | Ok(HashMerger { 105 | source: self.source, 106 | conn, 107 | _temp: temp, 108 | }) 109 | } 110 | 111 | /// First pass, multi-threaded version. Go through the source nodes, 112 | /// and for any that need a hash, compute the hash, and collect the 113 | /// result into a temporary file. Consumes the updater, returning the 114 | /// HashMerger which is used to merge the hash results into a 115 | /// datastream. 116 | pub fn compute_parallel(mut self, base: &str, estimate: &Estimate) -> Result> { 117 | let meter = Arc::new(Mutex::new(Progress::new(estimate.files, estimate.bytes))); 118 | let iter = into_tracker(self.source.iter()?, base); 119 | let (mut conn, temp) = self.setup_db()?; 120 | let trans = conn.transaction()?; 121 | 122 | let meter2 = meter.clone(); 123 | crossbeam::scope(move |s| { 124 | let ncpu = num_cpus::get(); 125 | 126 | // The work channel. Single sender, multiple receivers (one 127 | // for each CPU). 128 | let (work_send, work_recv) = bounded(ncpu); 129 | 130 | // The result channel. Multiple senders, single receiver. 131 | let (result_send, result_recv) = bounded(ncpu); 132 | 133 | // This thread reads the nodes, and submits work requests for 134 | // them. This will close the channel when it finishes, as the 135 | // work_send is moved in. 136 | s.spawn(move |_| { 137 | let mut count = 0; 138 | for entry in iter { 139 | let entry = entry.unwrap(); // TODO: Handle error. 140 | if entry.node.needs_hash() { 141 | let path = entry.path.unwrap(); 142 | work_send 143 | .send(HashWork { 144 | id: count, 145 | path, 146 | size: entry.node.size(), 147 | }) 148 | .unwrap(); 149 | count += 1; 150 | } 151 | } 152 | }); 153 | 154 | // Fire off a thread for each worker. 155 | for _ in 0..ncpu { 156 | let work_recv = work_recv.clone(); 157 | let result_send = result_send.clone(); 158 | let meter2 = meter2.clone(); 159 | s.spawn(move |_| { 160 | for work in work_recv { 161 | hash_one_file(&work, &result_send, &meter2); 162 | } 163 | }); 164 | } 165 | drop(result_send); 166 | 167 | // And, in the main thread, take all of the results, and add 168 | // them to the sql database. 169 | for info in result_recv { 170 | trans 171 | .execute( 172 | "INSERT INTO hashes (id, hash) VALUES (?1, ?2)", 173 | &[&info.id as &dyn ToSql, &info.hash as &dyn ToSql], 174 | ) 175 | .unwrap(); 176 | } 177 | trans.commit()?; 178 | ok_result() 179 | }) 180 | .map_err(|e| Error::Hash(format!("{:?}", e)))??; 181 | 182 | meter.lock().unwrap().flush(); 183 | Ok(HashMerger { 184 | source: self.source, 185 | conn, 186 | _temp: temp, 187 | }) 188 | } 189 | 190 | /// Set up the sqlite database to hold the hash updates. 191 | fn setup_db(&mut self) -> Result<(Connection, Box)> { 192 | // Create the temp file. Discard the file so that it will be 193 | // closed. 194 | let tmp = self.store.make_temp()?.into_loader()?; 195 | let conn = Connection::open(tmp.path_ref())?; 196 | conn.execute( 197 | "CREATE TABLE hashes ( 198 | id INTEGER PRIMARY KEY, 199 | hash BLOB)", 200 | [], 201 | )?; 202 | 203 | Ok((conn, tmp.into_cleaner()?)) 204 | } 205 | } 206 | 207 | fn hash_one_file(work: &HashWork, sender: &Sender, meter: &Arc>) { 208 | match noatime_open(&work.path) { 209 | Ok(mut fd) => match hash_file(&mut fd) { 210 | Ok(ref h) => { 211 | sender 212 | .send(HashInfo { 213 | id: work.id, 214 | hash: h.as_ref().to_owned(), 215 | }) 216 | .unwrap(); 217 | } 218 | Err(e) => { 219 | error!("Unable to hash file: '{:?}' ({})", work.path, e); 220 | } 221 | }, 222 | Err(e) => { 223 | error!("Unable to open '{:?}' for hashing ({})", work.path, e); 224 | } 225 | } 226 | meter.lock().unwrap().update(1, work.size); 227 | } 228 | 229 | // To make it easier to return a typed result. 230 | fn ok_result() -> Result<()> { 231 | Ok(()) 232 | } 233 | 234 | impl HashMerger { 235 | /// Second pass. Merge the updated hashes back into the data. Note 236 | /// that this is 'push' based instead of 'pull' because there is a 237 | /// chain of lifetime dependencies from Connection->Statement->Rows and 238 | /// if we tried to return something holding the Rows iterator, the user 239 | /// would have to manage these lifetimes. 240 | pub fn merge(self, writer: &mut NodeWriter) -> Result<()> { 241 | let mut stmt = self 242 | .conn 243 | .prepare("SELECT id, hash FROM hashes ORDER BY id")?; 244 | let mut hash_iter = stmt 245 | .query_map([], |row| { 246 | Ok(HashInfo { 247 | id: row.get(0)?, 248 | hash: row.get(1)?, 249 | }) 250 | })? 251 | .peekable(); 252 | 253 | let mut count = 0; 254 | for entry in self.source.iter()? { 255 | let mut entry = entry?; 256 | if entry.needs_hash() { 257 | let hnode = loop { 258 | match hash_iter.peek() { 259 | Some(Ok(hnode)) => { 260 | match count.cmp(&hnode.id) { 261 | Ordering::Equal => { 262 | let node = hash_iter.next().unwrap()?; 263 | break Some(node); 264 | } 265 | Ordering::Less => { 266 | // Node not present in hash, means we 267 | // weren't able to compute a hash of the 268 | // file. 269 | break None; 270 | } 271 | _ => panic!("Out of sequence hash"), 272 | } 273 | } 274 | Some(Err(e)) => { 275 | return Err(Error::WrappedSql(format!("{:?}", e))); 276 | } 277 | None => break None, 278 | } 279 | }; 280 | 281 | if let Some(HashInfo { hash, .. }) = &hnode { 282 | let hex = HEXLOWER.encode(hash); 283 | entry.atts_mut().unwrap().insert("sha1".to_string(), hex); 284 | } 285 | 286 | count += 1; 287 | } 288 | writer.write_node(&entry)?; 289 | // println!("{:?}", entry); 290 | } 291 | 292 | Ok(()) 293 | } 294 | } 295 | 296 | #[derive(Debug)] 297 | struct HashInfo { 298 | id: i64, 299 | hash: Vec, 300 | } 301 | 302 | #[derive(Debug)] 303 | struct HashWork { 304 | id: i64, 305 | size: u64, 306 | path: PathBuf, 307 | } 308 | 309 | /// An iterator that pulls hash from old nodes if the file is unchanged. 310 | pub struct HashCombiner { 311 | // This works like Peekable, but we keep the head in this structure and 312 | // swap it out to advance. Because the nodes are a strict tree 313 | // traversal, we always have a node to view, which makes this simpler 314 | // to use than Peekable, where every call can return a node or a 315 | // failure. 316 | /// The current head of the left tree. 317 | left: SureNode, 318 | /// The current head of the right tree. 319 | right: SureNode, 320 | 321 | /// The iterator for the left node. 322 | left_iter: Iold, 323 | /// The iterator for the right node. 324 | right_iter: Inew, 325 | 326 | state: Vec, 327 | seen_root: bool, 328 | } 329 | 330 | #[derive(Debug)] 331 | enum CombineState { 332 | // Discard one tree level on the left side, we are viewing the dir 333 | // nodes. 334 | LeftDirs, 335 | 336 | // We are passing through the tree on the right. Visiting the dir 337 | // nodes. 338 | RightDirs, 339 | 340 | // We are in a common directory, visiting the dir nodes. 341 | SameDirs, 342 | 343 | // We are in a common directory, visiting the file nodes. 344 | SameFiles, 345 | } 346 | 347 | impl HashCombiner 348 | where 349 | Iold: Iterator>, 350 | Inew: Iterator>, 351 | { 352 | pub fn new(mut left_iter: Iold, mut right_iter: Inew) -> Result> { 353 | let left = match left_iter.next() { 354 | None => return Err(Error::EmptyLeftIterator), 355 | Some(Err(e)) => return Err(e), 356 | Some(Ok(node)) => node, 357 | }; 358 | let right = match right_iter.next() { 359 | None => return Err(Error::EmptyRightIterator), 360 | Some(Err(e)) => return Err(e), 361 | Some(Ok(node)) => node, 362 | }; 363 | 364 | Ok(HashCombiner { 365 | left, 366 | right, 367 | left_iter, 368 | right_iter, 369 | state: vec![], 370 | seen_root: false, 371 | }) 372 | } 373 | 374 | /// Advance the left iterator, replacing 'left' with the new value, and 375 | /// returning that old value. Returns the error from the iterator if 376 | /// that happened. If we see the end of the iterator, places 'Leave' 377 | /// in the node, which should be the same as what was there. 378 | fn next_left(&mut self) -> Result { 379 | let next = match self.left_iter.next() { 380 | None => SureNode::Leave, 381 | Some(Ok(node)) => node, 382 | Some(Err(e)) => return Err(e), 383 | }; 384 | 385 | Ok(mem::replace(&mut self.left, next)) 386 | } 387 | 388 | /// Advance the right iterator, replacing 'right' with the new value, and 389 | /// returning that old value. Returns the error from the iterator if 390 | /// that happened. If we see the end of the iterator, places 'Leave' 391 | /// in the node, which should be the same as what was there. 392 | fn next_right(&mut self) -> Result { 393 | let next = match self.right_iter.next() { 394 | None => SureNode::Leave, 395 | Some(Ok(node)) => node, 396 | Some(Err(e)) => return Err(e), 397 | }; 398 | 399 | Ok(mem::replace(&mut self.right, next)) 400 | } 401 | } 402 | 403 | /// The result of one of the visitors. Continue means to go ahead and 404 | /// process the next nodes. Return means that this result should be 405 | /// returned. Note that we handle the EoF case specially, so this is not 406 | /// an option. 407 | enum VisitResult { 408 | Continue, 409 | Node(SureNode), 410 | } 411 | 412 | macro_rules! vre { 413 | ($err:expr) => { 414 | Err($err) 415 | }; 416 | } 417 | 418 | macro_rules! vro { 419 | ($result:expr) => { 420 | Ok(VisitResult::Node($result)) 421 | }; 422 | } 423 | 424 | // The iterator for the hash combiner. This iterator lazily traverses two 425 | // iterators that are assumed to be and old and new traversal of the same 426 | // filesystem. The output will be the same nodes as the new, but possibly 427 | // with 'sha1' values carried over from the old tree when there is a 428 | // sufficient match. 429 | impl Iterator for HashCombiner 430 | where 431 | Iold: Iterator>, 432 | Inew: Iterator>, 433 | { 434 | type Item = Result; 435 | 436 | fn next(&mut self) -> Option> { 437 | loop { 438 | // Handle the completion state separately, so we don't have as 439 | // many to deal with below. 440 | if self.seen_root && self.state.is_empty() { 441 | return None; 442 | } 443 | 444 | let vr = match self.state.pop() { 445 | None => self.visit_root(), 446 | Some(CombineState::SameDirs) => self.visit_samedir(), 447 | Some(CombineState::SameFiles) => self.visit_samefiles(), 448 | Some(CombineState::RightDirs) => self.visit_rightdirs(), 449 | Some(CombineState::LeftDirs) => self.visit_leftdirs(), 450 | }; 451 | 452 | match vr { 453 | Ok(VisitResult::Continue) => (), 454 | Ok(VisitResult::Node(node)) => return Some(Ok(node)), 455 | Err(e) => return Some(Err(e)), 456 | } 457 | } 458 | } 459 | } 460 | 461 | // The body, a method for each state. 462 | impl HashCombiner 463 | where 464 | Iold: Iterator>, 465 | Inew: Iterator>, 466 | { 467 | fn visit_root(&mut self) -> Result { 468 | if !self.left.is_enter() { 469 | vre!(Error::UnexpectedLeftNode) 470 | } else if !self.right.is_enter() { 471 | vre!(Error::UnexpectedRightNode) 472 | } else if self.left.name() != "__root__" || self.right.name() != "__root__" { 473 | vre!(Error::IncorrectName) 474 | } else { 475 | let _ = self.next_left()?; 476 | let rnode = self.next_right()?; 477 | self.state.push(CombineState::SameDirs); 478 | self.seen_root = true; 479 | vro!(rnode) 480 | } 481 | } 482 | 483 | // Both trees are in the same directory, and we are looking at 484 | // directory nodes. 485 | fn visit_samedir(&mut self) -> Result { 486 | // Handle the cases where they aren't finished together. 487 | debug!("visit samedir: {:?}, {:?}", self.left, self.right); 488 | match (self.left.is_sep(), self.right.is_sep()) { 489 | (true, true) => { 490 | // Both have finished with child directories. 491 | let _ = self.next_left()?; 492 | let rnode = self.next_right()?; 493 | // Push the new state. 494 | self.state.push(CombineState::SameFiles); 495 | vro!(rnode) 496 | } 497 | (false, false) => { 498 | // We are still visiting directories. Assume it is well 499 | // formed, and we are only going to see Enter nodes. 500 | match self.left.name().cmp(&self.right.name()) { 501 | Ordering::Equal => { 502 | // This is the same directory, descend it. 503 | self.state.push(CombineState::SameDirs); 504 | self.state.push(CombineState::SameDirs); 505 | let _ = self.next_left()?; 506 | vro!(self.next_right()?) 507 | } 508 | Ordering::Less => { 509 | // A directory in the old tree we no longer have. 510 | let _ = self.next_left()?; 511 | self.state.push(CombineState::SameDirs); 512 | self.state.push(CombineState::LeftDirs); 513 | Ok(VisitResult::Continue) 514 | } 515 | Ordering::Greater => { 516 | // A new directory entirely. 517 | self.state.push(CombineState::SameDirs); 518 | self.state.push(CombineState::RightDirs); 519 | vro!(self.next_right()?) 520 | } 521 | } 522 | } 523 | (false, true) => { 524 | // Old has an old directory no longer present. 525 | let _ = self.next_left()?; 526 | self.state.push(CombineState::SameDirs); 527 | self.state.push(CombineState::LeftDirs); 528 | Ok(VisitResult::Continue) 529 | } 530 | (true, false) => { 531 | // Directories present in new, not in old. 532 | self.state.push(CombineState::SameDirs); 533 | self.state.push(CombineState::RightDirs); 534 | vro!(self.next_right()?) 535 | } 536 | } 537 | } 538 | 539 | // Both trees are in the same directory, and we are looking at file 540 | // nodes. 541 | fn visit_samefiles(&mut self) -> Result { 542 | debug!("visit samefiles: {:?}, {:?}", self.left, self.right); 543 | match (self.left.is_leave(), self.right.is_leave()) { 544 | (true, true) => { 545 | // Both are leaving at the same time, nothing to push onto 546 | // state. Consume the nodes, and return the leave. 547 | let _ = self.next_left()?; 548 | vro!(self.next_right()?) 549 | } 550 | (true, false) => { 551 | self.state.push(CombineState::SameFiles); 552 | // New file added in new, not present in old. 553 | vro!(self.next_right()?) 554 | } 555 | (false, true) => { 556 | // File removed. 557 | self.state.push(CombineState::SameFiles); 558 | let _ = self.next_left()?; 559 | Ok(VisitResult::Continue) 560 | } 561 | (false, false) => { 562 | self.state.push(CombineState::SameFiles); 563 | 564 | // Two names within a directory. 565 | match self.left.name().cmp(&self.right.name()) { 566 | Ordering::Equal => { 567 | let left = self.next_left()?; 568 | let mut right = self.next_right()?; 569 | maybe_copy_sha(&left, &mut right); 570 | vro!(right) 571 | } 572 | Ordering::Less => { 573 | // An old name no longer present. 574 | let _ = self.next_left()?; 575 | Ok(VisitResult::Continue) 576 | } 577 | Ordering::Greater => { 578 | // A new name with no corresponding old name. 579 | vro!(self.next_right()?) 580 | } 581 | } 582 | } 583 | } 584 | } 585 | 586 | fn visit_rightdirs(&mut self) -> Result { 587 | debug!("visit rightdirs: {:?}, {:?}", self.left, self.right); 588 | if self.right.is_sep() { 589 | // Since we don't care about files, or matching, no need for 590 | // self.state.push(CombineState::RightFiles) 591 | // the RightFiles state, just stay. 592 | self.state.push(CombineState::RightDirs); 593 | } else if self.right.is_enter() { 594 | self.state.push(CombineState::RightDirs); 595 | self.state.push(CombineState::RightDirs); 596 | } else if self.right.is_leave() { 597 | // No state change. 598 | } else { 599 | // Otherwise, stays the same. 600 | self.state.push(CombineState::RightDirs); 601 | } 602 | vro!(self.next_right()?) 603 | } 604 | 605 | fn visit_leftdirs(&mut self) -> Result { 606 | debug!("visit rightdirs: {:?}, {:?}", self.left, self.right); 607 | if self.left.is_sep() { 608 | // Since we don't care about files, or matching, no need for 609 | // self.state.push(CombineState::RightFiles) 610 | // the RightFiles state, just stay. 611 | self.state.push(CombineState::LeftDirs); 612 | } else if self.left.is_enter() { 613 | self.state.push(CombineState::LeftDirs); 614 | self.state.push(CombineState::LeftDirs); 615 | } else if self.left.is_leave() { 616 | // No state change. 617 | } else { 618 | // Otherwise, stays the same. 619 | self.state.push(CombineState::LeftDirs); 620 | } 621 | let _ = self.next_left()?; 622 | Ok(VisitResult::Continue) 623 | } 624 | } 625 | 626 | fn maybe_copy_sha(left: &SureNode, right: &mut SureNode) { 627 | let latts = left.atts().unwrap(); 628 | let ratts = right.atts_mut().unwrap(); 629 | 630 | // If we already have a sha1, don't do anything. 631 | if ratts.contains_key("sha1") { 632 | return; 633 | } 634 | 635 | // Only compare regular files. 636 | if latts["kind"] != "file" || ratts["kind"] != "file" { 637 | return; 638 | } 639 | 640 | // Make sure inode and ctime are identical. 641 | if latts.get("ino") != ratts.get("ino") || latts.get("ctime") != ratts.get("ctime") { 642 | return; 643 | } 644 | 645 | // And only update if there is a sha1 to get. 646 | match latts.get("sha1") { 647 | None => (), 648 | Some(v) => { 649 | ratts.insert("sha1".to_string(), v.to_string()); 650 | } 651 | } 652 | } 653 | -------------------------------------------------------------------------------- /src/progress.rs: -------------------------------------------------------------------------------- 1 | //! A simple progress meter. 2 | //! 3 | //! Records updates of number of files visited, and number of bytes 4 | //! processed. When given an estimate, printes a simple periodic report of 5 | //! how far along we think we are. 6 | 7 | use env_logger::Builder; 8 | use lazy_static::lazy_static; 9 | use log::Log; 10 | use std::{ 11 | io::{stdout, Write}, 12 | sync::Mutex, 13 | }; 14 | use time::{Duration, OffsetDateTime}; 15 | 16 | // The Rust logging system (log crate) only allows a single logger to be 17 | // logged once. If we want to capture this, it has to be done before any 18 | // logger is initialized. Globally, within a mutex, we keep this simple 19 | // state of what is happening. 20 | struct State { 21 | // The last message printed. Since an empty string an no message are 22 | // the same thing, we don't worry about having an option here. 23 | message: String, 24 | 25 | // When we next expect to update the message. 26 | next_update: OffsetDateTime, 27 | 28 | // Set to true if the logging system has been initialized. 29 | is_logging: bool, 30 | } 31 | 32 | // The SafeLogger wraps another logger, coordinating the logging with the 33 | // state to properly interleave logs and messages. 34 | struct SafeLogger { 35 | inner: Box, 36 | } 37 | 38 | /// Initialize the standard logger, based on `env_logger::init()`, but 39 | /// coordinated with any progress meters. Like `init`, this will panic if 40 | /// the logging system has already been initialized. 41 | pub fn log_init() { 42 | let mut st = STATE.lock().unwrap(); 43 | let inner = Builder::from_default_env().build(); 44 | let max_level = inner.filter(); 45 | 46 | let logger = SafeLogger { 47 | inner: Box::new(inner), 48 | }; 49 | log::set_boxed_logger(Box::new(logger)).expect("Set Logger"); 50 | log::set_max_level(max_level); 51 | 52 | st.is_logging = true; 53 | st.next_update = update_interval(true); 54 | } 55 | 56 | // There are two update intervals, depending on whether we are logging. 57 | fn update_interval(is_logging: bool) -> OffsetDateTime { 58 | if is_logging { 59 | OffsetDateTime::now_utc() + Duration::milliseconds(250) 60 | } else { 61 | OffsetDateTime::now_utc() + Duration::seconds(5) 62 | } 63 | } 64 | 65 | lazy_static! { 66 | // The current global state. 67 | static ref STATE: Mutex = Mutex::new(State { 68 | message: String::new(), 69 | next_update: update_interval(false), 70 | is_logging: false, 71 | }); 72 | } 73 | 74 | impl State { 75 | /// Called to advance to the next message, sets the update time 76 | /// appropriately. 77 | fn next(&mut self) { 78 | self.next_update = update_interval(self.is_logging); 79 | } 80 | 81 | /// Clears the visual text of the current message (but not the message 82 | /// buffer itself, so that it can be redisplayed if needed). 83 | fn clear(&self) { 84 | for ch in self.message.chars() { 85 | if ch == '\n' { 86 | print!("\x1b[1A\x1b[2K"); 87 | } 88 | } 89 | stdout().flush().expect("safe stdout write"); 90 | } 91 | 92 | /// Update the current message. 93 | fn update(&mut self, message: String) { 94 | self.clear(); 95 | self.message = message; 96 | print!("{}", self.message); 97 | stdout().flush().expect("safe stdout write"); 98 | self.next(); 99 | } 100 | 101 | /// Indicates if the time has expired and another update should be 102 | /// done. This can be used where the formatting/allocation of the 103 | /// update message would be slower than the possible system call needed 104 | /// to determine the current time. 105 | fn need_update(&self) -> bool { 106 | OffsetDateTime::now_utc() >= self.next_update 107 | } 108 | } 109 | 110 | impl Log for SafeLogger { 111 | fn enabled(&self, metadata: &log::Metadata) -> bool { 112 | self.inner.enabled(metadata) 113 | } 114 | 115 | fn log(&self, record: &log::Record) { 116 | let enabled = self.inner.enabled(record.metadata()); 117 | 118 | if enabled { 119 | let st = STATE.lock().unwrap(); 120 | st.clear(); 121 | self.inner.log(record); 122 | print!("{}", st.message); 123 | stdout().flush().expect("safe stdout write"); 124 | } 125 | } 126 | 127 | fn flush(&self) { 128 | let st = STATE.lock().unwrap(); 129 | st.clear(); 130 | self.inner.flush(); 131 | print!("{}", st.message); 132 | stdout().flush().expect("safe stdout write"); 133 | } 134 | } 135 | 136 | pub struct Progress { 137 | cur_files: u64, 138 | total_files: u64, 139 | 140 | cur_bytes: u64, 141 | total_bytes: u64, 142 | } 143 | 144 | impl Progress { 145 | /// Construct a progress meter, with the given number of files and 146 | /// bytes as an estimate. 147 | pub fn new(files: u64, bytes: u64) -> Progress { 148 | Progress { 149 | cur_files: 0, 150 | total_files: files, 151 | 152 | cur_bytes: 0, 153 | total_bytes: bytes, 154 | } 155 | } 156 | 157 | /// Update the progress meter. 158 | pub fn update(&mut self, files: u64, bytes: u64) { 159 | self.cur_files += files; 160 | self.cur_bytes += bytes; 161 | 162 | let mut st = STATE.lock().unwrap(); 163 | if st.need_update() { 164 | st.update(self.message()); 165 | } 166 | } 167 | 168 | /// Flush the output, regardless of if any update is needed. 169 | pub fn flush(&mut self) { 170 | let mut st = STATE.lock().unwrap(); 171 | st.update(self.message()); 172 | 173 | // Clear the current message so that we don't clear out the shown 174 | // message. 175 | st.message.clear(); 176 | } 177 | 178 | pub fn message(&self) -> String { 179 | format!( 180 | "{:7}/{:7} ({:5.1}%) files, {}/{} ({:5.1}%) bytes\n", 181 | self.cur_files, 182 | self.total_files, 183 | (self.cur_files as f64 * 100.0) / self.total_files as f64, 184 | humanize(self.cur_bytes), 185 | humanize(self.total_bytes), 186 | (self.cur_bytes as f64 * 100.0) / self.total_bytes as f64 187 | ) 188 | } 189 | } 190 | 191 | /// A progress meter used when initially scanning. 192 | pub struct ScanProgress { 193 | dirs: u64, 194 | files: u64, 195 | bytes: u64, 196 | } 197 | 198 | impl ScanProgress { 199 | /// Construct a new scanning progress meter. 200 | pub fn new() -> ScanProgress { 201 | ScanProgress { 202 | dirs: 0, 203 | files: 0, 204 | bytes: 0, 205 | } 206 | } 207 | 208 | /// Update the meter. 209 | pub fn update(&mut self, dirs: u64, files: u64, bytes: u64) { 210 | self.dirs += dirs; 211 | self.files += files; 212 | self.bytes += bytes; 213 | 214 | let mut st = STATE.lock().unwrap(); 215 | if st.need_update() { 216 | st.update(self.message()); 217 | } 218 | } 219 | 220 | fn message(&self) -> String { 221 | format!( 222 | "scan: {} dirs {} files, {} bytes\n", 223 | self.dirs, 224 | self.files, 225 | humanize(self.bytes) 226 | ) 227 | } 228 | } 229 | 230 | impl Drop for ScanProgress { 231 | fn drop(&mut self) { 232 | let mut st = STATE.lock().unwrap(); 233 | st.update(self.message()); 234 | 235 | st.message.clear(); 236 | } 237 | } 238 | 239 | /// Print a size in a more human-friendly format. 240 | pub fn humanize(value: u64) -> String { 241 | let mut value = value as f64; 242 | let mut unit = 0; 243 | 244 | while value > 1024.0 { 245 | value /= 1024.0; 246 | unit += 1; 247 | } 248 | 249 | static UNITS: [&str; 9] = [ 250 | "B ", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB", 251 | ]; 252 | 253 | let precision = if value < 10.0 { 254 | 3 255 | } else if value < 100.0 { 256 | 2 257 | } else { 258 | 1 259 | }; 260 | 261 | format!("{:6.*}{}", precision, value, UNITS[unit]) 262 | } 263 | -------------------------------------------------------------------------------- /src/show.rs: -------------------------------------------------------------------------------- 1 | // Show module. 2 | 3 | use crate::{Result, Store, Version}; 4 | 5 | pub fn show_tree(store: &dyn Store) -> Result<()> { 6 | for node in store.load_iter(Version::Latest)? { 7 | let node = node?; 8 | println!("{:?}", node); 9 | } 10 | Ok(()) 11 | } 12 | -------------------------------------------------------------------------------- /src/store.rs: -------------------------------------------------------------------------------- 1 | // Surefile store 2 | 3 | use crate::{Error, Result, SureNode}; 4 | use chrono::{DateTime, Utc}; 5 | use log::info; 6 | use std::{ 7 | collections::BTreeMap, 8 | io::{BufRead, Write}, 9 | path::Path, 10 | }; 11 | 12 | mod weave; 13 | 14 | pub use self::weave::WeaveStore; 15 | use self::weave::Compression; 16 | 17 | /// Tags are just key/value pairs. Both key and value should be printable strings. 18 | pub type StoreTags = BTreeMap; 19 | 20 | /// Something that can store and retrieve SureTrees. 21 | pub trait Store { 22 | /// Retrieve the available versions, in the store. These should be listed, newest first. 23 | fn get_versions(&self) -> Result>; 24 | 25 | /// Load the specified version, returning an iterator over the nodes. 26 | fn load_iter(&self, version: Version) -> Result>>>; 27 | 28 | /// Create a temporary storage location. 29 | fn make_temp(&self) -> Result>; 30 | 31 | /// Create a writer for a new version. 32 | fn make_new(&self, tags: &StoreTags) -> Result>; 33 | } 34 | 35 | /// A TempFile is a temporary storage location that can be written to, and 36 | /// then committed as a new version, or discarded entirely if it is 37 | /// dropped. 38 | /// Typical usage patterns are: 39 | /// - Write to the file, turn into a reader to reread the data. Will be 40 | /// deleted on drop. 41 | /// - Write to the file, turn into a loader which can make multiple 42 | /// readers. Will be deleted on drop. 43 | /// - Write to the file, which can then be committed. File will be 44 | /// deleted, but data merged into the latest version in the store. 45 | pub trait TempFile<'a>: Write { 46 | fn into_loader(self: Box) -> Result>; 47 | 48 | // Close the file, returning a TempCleaner that will clean up the file 49 | // when it is dropped. Significantly, this has no lifetime 50 | // dependencies. 51 | fn into_cleaner(self: Box) -> Result>; 52 | } 53 | 54 | /// A temp file that can spawn multiple loaders. 55 | pub trait TempLoader { 56 | /// Open the temp file, and return a reader on it. 57 | fn new_loader(&self) -> Result>; 58 | 59 | /// Return the name of the temp file. 60 | fn path_ref(&self) -> &Path; 61 | 62 | // Close the file, returning a TempCleaner that will clean up the file 63 | // when it is dropped. Significantly, this has no lifetime 64 | // dependencies. 65 | fn into_cleaner(self: Box) -> Result>; 66 | } 67 | 68 | /// A Writer for adding a new version. 69 | pub trait StoreWriter<'a>: Write { 70 | /// All data has been written, commit this as a new version. 71 | fn commit(self: Box) -> Result<()>; 72 | } 73 | 74 | pub trait TempCleaner {} 75 | 76 | /// Indicator of which version of sure data to load. 77 | #[derive(Clone, Debug)] 78 | pub enum Version { 79 | Latest, 80 | Prior, 81 | Tagged(String), 82 | } 83 | 84 | impl Version { 85 | /// Retrieve this version as a number, or none if that makes no sense 86 | /// (either it is `Latest`, `Prior`, or the textual version is not an 87 | /// integer). 88 | pub fn numeric(&self) -> Option { 89 | match self { 90 | Version::Latest | Version::Prior => None, 91 | Version::Tagged(text) => text.parse().ok(), 92 | } 93 | } 94 | } 95 | 96 | /// Information about a given version in the store. 97 | #[derive(Clone, Debug)] 98 | pub struct StoreVersion { 99 | /// A descriptive name. May be the "name" tag given when this version was created. 100 | pub name: String, 101 | /// A timestamp of when the version was made. 102 | pub time: DateTime, 103 | /// The identifier for this version. 104 | pub version: Version, 105 | } 106 | 107 | /// Parse a command line specified path to determine the parameters and type of store desired. The 108 | /// path can be the path to a directory. In this case, look at possible filenames to determine the 109 | /// other parameters. The path can also give a filename of one of the surefiles, and we will 110 | /// derive the name information from that. 111 | pub fn parse_store(text: &str) -> Result> { 112 | // First determine if this path is a directory. 113 | let p = Path::new(text); 114 | info!("Parsing: {:?}", p); 115 | 116 | // If we're given an existing directory, construct a store directly from it. 117 | // TODO: Look in the directory to see what might be there. 118 | if p.is_dir() { 119 | return Ok(Box::new(WeaveStore::new(p.to_path_buf(), "2sure", Compression::Gzip))); 120 | } 121 | 122 | // Otherwise, try to get the parent. If it seems to be empty, use the current directory as the 123 | // path. 124 | let dir = match p.parent() { 125 | None => return Err(Error::UnknownDirectory), 126 | Some(dir) => { 127 | if dir.as_os_str().is_empty() { 128 | Path::new(".") 129 | } else { 130 | dir 131 | } 132 | } 133 | }; 134 | 135 | if !dir.is_dir() { 136 | return Err(Error::FileNotInDirectory); 137 | } 138 | 139 | let base = match p.file_name() { 140 | Some(name) => name, 141 | None => return Err(Error::PathMissingFinalFile), 142 | }; 143 | let base = match base.to_str() { 144 | Some(name) => name, 145 | None => panic!("Path came from string, yet is no longer UTF-8"), 146 | }; 147 | 148 | let (base, compression) = if let Some(core_name) = base.strip_suffix(".gz") { 149 | (core_name, Compression::Gzip) 150 | } else if let Some(core_name) = base.strip_suffix(".zstd") { 151 | (core_name, Compression::Zstd) 152 | } else { 153 | (base, Compression::Plain) 154 | }; 155 | 156 | // Check for weave format. 157 | if let Some(base) = base.strip_suffix(".weave") { 158 | return Ok(Box::new(WeaveStore::new(dir, base, compression))); 159 | } 160 | 161 | // Strip off known suffixes. 162 | let base = if base.ends_with(".dat") || base.ends_with(".bak") { 163 | &base[..base.len() - 4] 164 | } else { 165 | base 166 | }; 167 | 168 | Ok(Box::new(WeaveStore::new(dir, base, compression))) 169 | } 170 | -------------------------------------------------------------------------------- /src/store/weave.rs: -------------------------------------------------------------------------------- 1 | //! SCCS-style delta weave stores. 2 | 3 | use crate::{ 4 | node, 5 | store::{ 6 | Store, StoreTags, StoreVersion, StoreWriter, TempCleaner, TempFile, TempLoader, Version, 7 | }, 8 | Error, Result, SureNode, 9 | }; 10 | use std::{ 11 | env, 12 | fs::{self, File}, 13 | io::{self, BufRead, BufReader, BufWriter, Write}, 14 | path::{Path, PathBuf}, 15 | }; 16 | use weave::{ 17 | self, DeltaWriter, NamingConvention, NewWeave, PullParser, SimpleNaming, 18 | }; 19 | pub use weave::Compression; 20 | 21 | pub struct WeaveStore { 22 | naming: SimpleNaming, 23 | } 24 | 25 | impl WeaveStore { 26 | pub fn new>(path: P, base: &str, compression: Compression) -> WeaveStore { 27 | WeaveStore { 28 | naming: SimpleNaming::new(path, base, "dat", compression), 29 | } 30 | } 31 | } 32 | 33 | impl Store for WeaveStore { 34 | fn get_versions(&self) -> Result> { 35 | let header = PullParser::new(&self.naming, 1)?.into_header(); 36 | let mut versions: Vec<_> = header 37 | .deltas 38 | .iter() 39 | .map(|v| StoreVersion { 40 | name: v.name.clone(), 41 | time: v.time, 42 | version: Version::Tagged(v.number.to_string()), 43 | }) 44 | .collect(); 45 | versions.reverse(); 46 | Ok(versions) 47 | } 48 | 49 | fn load_iter(&self, version: Version) -> Result>>> { 50 | let last = weave::get_last_delta(&self.naming)?; 51 | let last = match version { 52 | Version::Latest => last, 53 | Version::Prior => last - 1, 54 | Version::Tagged(vers) => vers.parse()?, 55 | }; 56 | 57 | Ok(Box::new(WeaveIter::new(&self.naming, last)?)) 58 | } 59 | 60 | fn make_temp(&self) -> Result> { 61 | // TODO: Fixup naming to allow uncompressed writes. 62 | let (path, file) = self.naming.temp_file()?; 63 | let cpath = path.clone(); 64 | Ok(Box::new(WeaveTemp { 65 | parent: self, 66 | path, 67 | file: BufWriter::new(file), 68 | cleaner: FileClean(cpath), 69 | })) 70 | } 71 | 72 | fn make_new(&self, tags: &StoreTags) -> Result> { 73 | let itags = tags.iter().map(|(k, v)| (k.as_ref(), v.as_ref())); 74 | match weave::get_last_delta(&self.naming) { 75 | Ok(base) => { 76 | let wv = DeltaWriter::new(&self.naming, itags, base)?; 77 | Ok(Box::new(NewWeaveDelta { weave: wv })) 78 | } 79 | Err(_) => { 80 | // Create a new weave file. 81 | let wv = NewWeave::new(&self.naming, itags)?; 82 | Ok(Box::new(NewWeaveWriter { weave: wv })) 83 | } 84 | } 85 | } 86 | } 87 | 88 | struct WeaveTemp<'a> { 89 | parent: &'a WeaveStore, 90 | path: PathBuf, 91 | file: BufWriter, 92 | cleaner: FileClean, 93 | } 94 | 95 | impl<'a> TempFile<'a> for WeaveTemp<'a> { 96 | fn into_loader(self: Box) -> Result> { 97 | drop(self.file); 98 | Ok(Box::new(WeaveTempLoader { 99 | _parent: self.parent, 100 | path: self.path, 101 | cleaner: self.cleaner, 102 | })) 103 | } 104 | 105 | fn into_cleaner(self: Box) -> Result> { 106 | Ok(Box::new(self.cleaner)) 107 | } 108 | } 109 | 110 | impl<'a> Write for WeaveTemp<'a> { 111 | fn write(&mut self, buf: &[u8]) -> io::Result { 112 | self.file.write(buf) 113 | } 114 | fn flush(&mut self) -> io::Result<()> { 115 | self.file.flush() 116 | } 117 | } 118 | 119 | pub struct WeaveTempLoader<'a> { 120 | _parent: &'a WeaveStore, 121 | path: PathBuf, 122 | cleaner: FileClean, 123 | } 124 | 125 | impl<'a> TempLoader for WeaveTempLoader<'a> { 126 | fn new_loader(&self) -> Result> { 127 | let read = BufReader::new(File::open(&self.path)?); 128 | Ok(Box::new(read)) 129 | } 130 | 131 | fn path_ref(&self) -> &Path { 132 | &self.path 133 | } 134 | 135 | fn into_cleaner(self: Box) -> Result> { 136 | Ok(Box::new(self.cleaner)) 137 | } 138 | } 139 | 140 | pub struct NewWeaveWriter<'a> { 141 | weave: NewWeave<'a>, 142 | } 143 | 144 | impl<'a> StoreWriter<'a> for NewWeaveWriter<'a> { 145 | fn commit(self: Box) -> Result<()> { 146 | self.weave.close()?; 147 | Ok(()) 148 | } 149 | } 150 | 151 | impl<'a> Write for NewWeaveWriter<'a> { 152 | fn write(&mut self, buf: &[u8]) -> io::Result { 153 | self.weave.write(buf) 154 | } 155 | 156 | fn flush(&mut self) -> io::Result<()> { 157 | self.weave.flush() 158 | } 159 | } 160 | 161 | pub struct NewWeaveDelta<'a> { 162 | weave: DeltaWriter<'a>, 163 | } 164 | 165 | impl<'a> StoreWriter<'a> for NewWeaveDelta<'a> { 166 | fn commit(self: Box) -> Result<()> { 167 | self.weave.close()?; 168 | Ok(()) 169 | } 170 | } 171 | 172 | impl<'a> Write for NewWeaveDelta<'a> { 173 | fn write(&mut self, buf: &[u8]) -> io::Result { 174 | self.weave.write(buf) 175 | } 176 | 177 | fn flush(&mut self) -> io::Result<()> { 178 | self.weave.flush() 179 | } 180 | } 181 | 182 | pub struct WeaveIter { 183 | pull: Box>>, 184 | } 185 | 186 | impl WeaveIter { 187 | fn new(naming: &dyn NamingConvention, delta: usize) -> Result { 188 | let mut pull = PullParser::new(naming, delta)?.filter_map(kept_text); 189 | fixed(&mut pull, "asure-2.0")?; 190 | fixed(&mut pull, "-----")?; 191 | Ok(WeaveIter { 192 | pull: Box::new(pull), 193 | }) 194 | } 195 | } 196 | 197 | impl Iterator for WeaveIter { 198 | type Item = Result; 199 | 200 | fn next(&mut self) -> Option> { 201 | let line = match self.pull.next() { 202 | Some(Err(e)) => return Some(Err(e)), 203 | Some(Ok(line)) => line, 204 | None => return None, 205 | }; 206 | let line = line.as_bytes(); 207 | 208 | match line[0] { 209 | b'd' => { 210 | let (dname, datts) = node::decode_entity(&line[1..]); 211 | Some(Ok(SureNode::Enter { 212 | name: dname, 213 | atts: datts, 214 | })) 215 | } 216 | b'f' => { 217 | let (fname, fatts) = node::decode_entity(&line[1..]); 218 | Some(Ok(SureNode::File { 219 | name: fname, 220 | atts: fatts, 221 | })) 222 | } 223 | b'-' => Some(Ok(SureNode::Sep)), 224 | b'u' => Some(Ok(SureNode::Leave)), 225 | ch => Some(Err(Error::InvalidSurefileChar(ch as char))), 226 | } 227 | } 228 | } 229 | 230 | // Filter nodes to only include kept text lines. 231 | fn kept_text(node: weave::Result) -> Option> { 232 | match node { 233 | Err(e) => Some(Err(e.into())), 234 | Ok(weave::Entry::Plain { text, keep }) if keep => Some(Ok(text)), 235 | _ => None, 236 | } 237 | } 238 | 239 | /// Try reading a specific line from the given iterator. Returns Err if 240 | /// the line didn't match, or something went wrong with the read. 241 | fn fixed(pull: &mut I, expect: &str) -> Result<()> 242 | where 243 | I: Iterator>, 244 | { 245 | match pull.next() { 246 | Some(Ok(line)) => { 247 | if line == expect { 248 | Ok(()) 249 | } else { 250 | Err(Error::UnexpectedLine(line, expect.into())) 251 | } 252 | } 253 | Some(Err(e)) => Err(e), 254 | None => Err(Error::SureFileEof), 255 | } 256 | } 257 | /* 258 | fn fixed(recv: &Receiver>>, expect: &[u8]) -> Result<()> { 259 | match recv.recv() { 260 | Ok(Some(Ok(line))) => { 261 | if line.as_bytes() == expect { 262 | Ok(()) 263 | } else { 264 | Err(format_err!("Unexpect line from channel: {:?} expect {:?}", line, expect)) 265 | } 266 | } 267 | Ok(Some(Err(e))) => Err(format_err!("Error reading suredata: {:?}", e)), 268 | Ok(None) => Err(format_err!("Unexpected eof reading suredata")), 269 | Err(e) => Err(e.into()), 270 | } 271 | } 272 | */ 273 | 274 | /// Own a PathBuf, and delete this file on drop. This is in its own type 275 | /// for two reason. 1. I makes it easy to have cleaning in multiple types, 276 | /// passing ownership between them, and 2. It prevents the need for those 277 | /// types to implement drop, which prevents moves out of the fields. 278 | struct FileClean(PathBuf); 279 | 280 | impl Drop for FileClean { 281 | fn drop(&mut self) { 282 | if env::var_os("RSURE_KEEP").is_none() { 283 | let _ = fs::remove_file(&self.0); 284 | } 285 | } 286 | } 287 | 288 | impl TempCleaner for FileClean {} 289 | -------------------------------------------------------------------------------- /src/surefs.rs: -------------------------------------------------------------------------------- 1 | // Filesystem scanning. 2 | 3 | use crate::{escape::*, suretree::AttMap}; 4 | use log::error; 5 | 6 | use std::{ 7 | fs::{self, Metadata}, 8 | os::unix::prelude::*, 9 | path::Path, 10 | }; 11 | 12 | // Encode the attributes for the given node. Note that this returns, even 13 | // when there is an error (resolving a symlink). It logs an error, and 14 | // returns a placeholder. 15 | pub(crate) fn encode_atts(name: &Path, meta: &Metadata) -> AttMap { 16 | // let fname = name.file_name().unwrap().as_bytes().escaped(); 17 | let mode = meta.mode() as libc::mode_t & libc::S_IFMT; 18 | 19 | let mut base = AttMap::new(); 20 | 21 | // These attributes apply to every node. 22 | base.insert("uid".to_string(), meta.uid().to_string()); 23 | base.insert("gid".to_string(), meta.gid().to_string()); 24 | base.insert( 25 | "perm".to_string(), 26 | (meta.mode() as libc::mode_t & !libc::S_IFMT).to_string(), 27 | ); 28 | 29 | // Other permissions are based on the type of the node. 30 | match mode as libc::mode_t { 31 | libc::S_IFDIR => { 32 | base.insert("kind".to_string(), "dir".to_string()); 33 | } 34 | libc::S_IFREG => { 35 | base.insert("kind".to_string(), "file".to_string()); 36 | base.insert("ino".to_string(), meta.ino().to_string()); 37 | base.insert("size".to_string(), meta.size().to_string()); 38 | time_info(&mut base, meta); 39 | // Note that the 'sha1' attribute is computed later. 40 | } 41 | libc::S_IFLNK => { 42 | base.insert("kind".to_string(), "lnk".to_string()); 43 | let link = match fs::read_link(name) { 44 | Ok(l) => l, 45 | Err(err) => { 46 | error!("Unable to read link: {:?} ({})", name, err); 47 | // TODO: Generate a unique placeholder so this will 48 | // always show up. 49 | From::from("???") 50 | } 51 | }; 52 | base.insert("targ".to_string(), link.as_os_str().as_bytes().escaped()); 53 | } 54 | libc::S_IFIFO => { 55 | base.insert("kind".to_string(), "fifo".to_string()); 56 | } 57 | libc::S_IFSOCK => { 58 | base.insert("kind".to_string(), "sock".to_string()); 59 | } 60 | libc::S_IFCHR => { 61 | base.insert("kind".to_string(), "chr".to_string()); 62 | add_dev(&mut base, meta); 63 | } 64 | libc::S_IFBLK => { 65 | base.insert("kind".to_string(), "blk".to_string()); 66 | add_dev(&mut base, meta); 67 | } 68 | _ => panic!("Unknown file type: 0o{:o}", mode), 69 | } 70 | 71 | // println!("{:?}: atts: {:?}", fname, base); 72 | base 73 | } 74 | 75 | fn add_dev(base: &mut AttMap, meta: &Metadata) { 76 | let rdev = meta.rdev(); 77 | // This is defined in a macro, and hasn't made it into libc. Given how 78 | // it is defined in the header, it is unlikely to change, at least on 79 | // Linux. 80 | base.insert("devmaj".to_string(), ((rdev >> 8) & 0xfff).to_string()); 81 | base.insert("devmin".to_string(), (rdev & 0xff).to_string()); 82 | } 83 | 84 | fn time_info(base: &mut AttMap, meta: &Metadata) { 85 | // TODO: Handle the nsec part of the time. 86 | base.insert("mtime".to_string(), meta.mtime().to_string()); 87 | base.insert("ctime".to_string(), meta.ctime().to_string()); 88 | } 89 | -------------------------------------------------------------------------------- /src/suretree.rs: -------------------------------------------------------------------------------- 1 | // SureTree 2 | 3 | use std::collections::BTreeMap; 4 | 5 | pub type AttMap = BTreeMap; 6 | -------------------------------------------------------------------------------- /tests/surefiles.rs: -------------------------------------------------------------------------------- 1 | // Test the rsure API for save and load. 2 | 3 | /* 4 | extern crate rsure; 5 | extern crate tempdir; 6 | 7 | use rsure::{stdout_visitor, SureTree, TreeCompare}; 8 | use tempdir::TempDir; 9 | 10 | // Test that the API is usable. Currently, the output only generates a 11 | // report to stdout, and doesn't return any information to the caller, so 12 | // we can only test that the calls work. If you run the test with 13 | // "--nocapture", it should show the addition of the surefile at the end. 14 | #[test] 15 | fn save_and_load() { 16 | let tmp = TempDir::new("rsure").unwrap(); 17 | let tree = rsure::scan_fs(tmp.path()).unwrap(); 18 | 19 | // First surefile. 20 | let sfile = tmp.path().join("surefile.dat.gz"); 21 | 22 | // Save it to a file. 23 | tree.save(&sfile).unwrap(); 24 | 25 | // Load it back in. 26 | let t2 = SureTree::load(&sfile).unwrap(); 27 | t2.compare_from(&mut stdout_visitor(), &tree, &sfile); 28 | 29 | // Rescan (should catch the newly added surefile). 30 | let t3 = rsure::scan_fs(tmp.path()).unwrap(); 31 | t3.compare_from(&mut stdout_visitor(), &t2, tmp.path()); 32 | } 33 | 34 | // Test writing to a block. 35 | #[test] 36 | fn save_writer() { 37 | let tmp = TempDir::new("rsure").unwrap(); 38 | let t1 = rsure::scan_fs(tmp.path()).unwrap(); 39 | 40 | let mut sf1 = vec![]; 41 | t1.save_to(&mut sf1).unwrap(); 42 | println!("Wrote {} bytes", sf1.len()); 43 | 44 | let t2 = SureTree::load_from(&sf1[..]).unwrap(); 45 | t2.compare_from(&mut stdout_visitor(), &t1, tmp.path()); 46 | } 47 | */ 48 | -------------------------------------------------------------------------------- /weave/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore the lock file, as this is a library. 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /weave/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "weave" 3 | version = "0.4.0-dev" 4 | authors = ["davidb"] 5 | description = """ 6 | Weave delta file storage. Inspired by the storage format of SCCS, 7 | this crate allows multiple revisions of a file to be stored 8 | efficiently in a single file. 9 | """ 10 | license = "MIT" 11 | readme = "README.rst" 12 | repository = "https://github.com/d3zd3z/rsure" 13 | edition = "2018" 14 | 15 | [dependencies] 16 | log = "0.4" 17 | regex = "1.4" 18 | serde = "1.0" 19 | serde_json = "1.0" 20 | serde_derive = "1.0" 21 | thiserror = "1.0" 22 | chrono = { version = "0.4", features = ["serde"] } 23 | flate2 = "1.0" 24 | zstd = "0.10" 25 | 26 | [dev-dependencies] 27 | env_logger = "0.9" 28 | rand = "0.8" 29 | tempdir = "0.3" 30 | 31 | # Optimize the tests so they don't take too long 32 | [profile.test] 33 | opt-level = 3 34 | -------------------------------------------------------------------------------- /weave/README.rst: -------------------------------------------------------------------------------- 1 | Weave File Support 2 | ****************** 3 | 4 | Testing 5 | ======= 6 | 7 | Many of the tests compare the crates output with that generated by the 8 | sccs command. On many Linux distros, a compatible version can be 9 | found in the ``cssc`` package. 10 | -------------------------------------------------------------------------------- /weave/src/delta.rs: -------------------------------------------------------------------------------- 1 | //! Add a delta to a weave file. 2 | 3 | use regex::Regex; 4 | use std::{ 5 | collections::BTreeMap, 6 | fs::{remove_file, rename}, 7 | io::{self, BufRead, BufReader, BufWriter, Write}, 8 | mem::replace, 9 | path::PathBuf, 10 | process::{Command, Stdio}, 11 | }; 12 | 13 | use crate::{header::Header, Entry, Error, NamingConvention, Parser, PullParser, Result, Sink, WriterInfo}; 14 | 15 | /// A DeltaWriter is used to write a new delta. Data should be written to the writer, and then the 16 | /// `close` method called to update the weave file with the new delta. 17 | pub struct DeltaWriter<'n> { 18 | naming: &'n dyn NamingConvention, 19 | 20 | // Where the temporary file will be written. 21 | temp: Option, 22 | 23 | // The base delta. 24 | base: usize, 25 | 26 | // The new delta. 27 | new_delta: usize, 28 | 29 | // The name of the file with the base written to it. 30 | base_name: PathBuf, 31 | 32 | // The regex for parsing diff output. 33 | diff_re: Regex, 34 | 35 | // The header to be written for the new delta. 36 | header: Header, 37 | } 38 | 39 | impl<'n> DeltaWriter<'n> { 40 | /// Construct a writer for a new delta. The naming convention and the tags set where the names 41 | /// will be written, and what tags will be associated with the convention. The `base` is the 42 | /// existing delta that the change should be based on. 43 | pub fn new<'a, 'b, I>(nc: &dyn NamingConvention, tags: I, base: usize) -> Result 44 | where 45 | I: Iterator, 46 | { 47 | // Copy the tags, making sure there is a "name", which is used to index. 48 | // TODO: Ensure that "name" is unique among the existing deltas. 49 | let mut ntags = BTreeMap::new(); 50 | for (k, v) in tags { 51 | ntags.insert(k.to_owned(), v.to_owned()); 52 | } 53 | if !ntags.contains_key("name") { 54 | return Err(Error::NameMissing); 55 | } 56 | 57 | // Extract the base delta to a file. 58 | 59 | let (base_name, mut base_file) = nc.temp_file()?; 60 | let mut header = { 61 | let mut parser = PullParser::new(nc, base)?; 62 | for node in &mut parser { 63 | match node? { 64 | Entry::Plain { text, keep } => { 65 | if keep { 66 | writeln!(base_file, "{}", text)?; 67 | } 68 | } 69 | _ => (), 70 | } 71 | } 72 | parser.into_header() 73 | }; 74 | let new_delta = header.add(ntags)?; 75 | 76 | let (new_name, new_file) = nc.temp_file()?; 77 | let new_info = WriterInfo { 78 | name: new_name, 79 | writer: Box::new(BufWriter::new(new_file)), 80 | }; 81 | 82 | Ok(DeltaWriter { 83 | naming: nc, 84 | temp: Some(new_info), 85 | base, 86 | new_delta, 87 | base_name, 88 | diff_re: Regex::new(r"^(\d+)(,(\d+))?([acd]).*$").unwrap(), 89 | header, 90 | }) 91 | } 92 | 93 | pub fn close(mut self) -> Result<()> { 94 | // Close the temporary file, getting its name. 95 | let temp = replace(&mut self.temp, None); 96 | let temp_name = match temp { 97 | Some(mut wi) => { 98 | wi.writer.flush()?; 99 | drop(wi.writer); 100 | wi.name 101 | } 102 | None => return Err(Error::AlreadyClosed), 103 | }; 104 | 105 | let tweave_info = self.naming.new_temp()?; 106 | 107 | // Invoke diff on the files. 108 | let mut child = Command::new("diff") 109 | .arg(self.base_name.as_os_str()) 110 | .arg(temp_name.as_os_str()) 111 | .stdout(Stdio::piped()) 112 | .spawn()?; 113 | 114 | { 115 | let lines = BufReader::new(child.stdout.as_mut().unwrap()).lines(); 116 | let weave_write = WeaveWriter { 117 | dest: tweave_info.writer, 118 | }; 119 | let mut parser = Parser::new(self.naming, weave_write, self.base)?; 120 | 121 | let weave_write = parser.get_sink(); 122 | 123 | self.header.write(&mut weave_write.borrow_mut().dest)?; 124 | 125 | let mut is_done = false; 126 | let mut is_adding = false; 127 | 128 | for line in lines { 129 | let line = line?; 130 | if let Some(cap) = self.diff_re.captures(&line) { 131 | // If adding, this completes the add. 132 | if is_adding { 133 | weave_write.borrow_mut().end(self.new_delta)?; 134 | is_adding = false; 135 | } 136 | 137 | let left = cap.get(1).unwrap().as_str().parse::().unwrap(); 138 | let right = match cap.get(3) { 139 | None => left, 140 | Some(r) => r.as_str().parse().unwrap(), 141 | }; 142 | let cmd = cap.get(4).unwrap().as_str().chars().next().unwrap(); 143 | 144 | if cmd == 'd' || cmd == 'c' { 145 | // These include deletions. 146 | match parser.parse_to(left)? { 147 | 0 => return Err(Error::UnexpectedEof), 148 | n if n == left => (), 149 | _ => panic!("Unexpected parse result"), 150 | } 151 | weave_write.borrow_mut().delete(self.new_delta)?; 152 | match parser.parse_to(right + 1) { 153 | Ok(0) => is_done = true, 154 | Ok(n) if n == right + 1 => (), 155 | Ok(_) => panic!("Unexpected parse result"), 156 | Err(e) => return Err(e), 157 | } 158 | weave_write.borrow_mut().end(self.new_delta)?; 159 | } else { 160 | match parser.parse_to(right + 1) { 161 | Ok(0) => is_done = true, 162 | Ok(n) if n == right + 1 => (), 163 | Ok(_) => panic!("Unexpected parse result"), 164 | Err(e) => return Err(e), 165 | } 166 | } 167 | 168 | if cmd == 'c' || cmd == 'a' { 169 | weave_write.borrow_mut().insert(self.new_delta)?; 170 | is_adding = true; 171 | } 172 | 173 | continue; 174 | } 175 | 176 | match line.chars().next() { 177 | None => panic!("Unexpected blank line in diff"), 178 | Some('<') => continue, 179 | Some('-') => continue, 180 | Some('>') => { 181 | // Add lines should just be written as-is. 182 | weave_write.borrow_mut().plain(&line[2..], true)?; 183 | } 184 | Some(_) => panic!("Unexpected diff line: {:?}", line), 185 | } 186 | } 187 | 188 | if is_adding { 189 | weave_write.borrow_mut().end(self.new_delta)?; 190 | } 191 | 192 | if !is_done { 193 | match parser.parse_to(0) { 194 | Ok(0) => (), 195 | Ok(_) => panic!("Unexpected non-eof"), 196 | Err(e) => return Err(e), 197 | } 198 | } 199 | } 200 | 201 | match child.wait()?.code() { 202 | None => return Err(Error::DiffKilled), 203 | Some(0) => (), // No diffs 204 | Some(1) => (), // Normal with diffs 205 | Some(n) => return Err(Error::DiffError(n)), 206 | } 207 | 208 | // Now that is all done, clean up the temp files, and cycle the backup. 209 | let _ = rename(self.naming.main_file(), self.naming.backup_file()); 210 | rename(tweave_info.name, self.naming.main_file())?; 211 | remove_file(&self.base_name)?; 212 | remove_file(&temp_name)?; 213 | 214 | Ok(()) 215 | } 216 | } 217 | 218 | impl<'n> Write for DeltaWriter<'n> { 219 | fn write(&mut self, buf: &[u8]) -> io::Result { 220 | self.temp 221 | .as_mut() 222 | .expect("Attempt to write to DeltaWriter that is closed") 223 | .writer 224 | .write(buf) 225 | } 226 | 227 | fn flush(&mut self) -> io::Result<()> { 228 | self.temp 229 | .as_mut() 230 | .expect("Attempt to flush DeltaWriter that is closed") 231 | .writer 232 | .flush() 233 | } 234 | } 235 | 236 | struct RevWriter { 237 | dest: BufWriter, 238 | } 239 | 240 | impl Sink for RevWriter { 241 | fn plain(&mut self, text: &str, keep: bool) -> Result<()> { 242 | if !keep { 243 | return Ok(()); 244 | } 245 | 246 | writeln!(&mut self.dest, "{}", text)?; 247 | Ok(()) 248 | } 249 | } 250 | 251 | /// The weave writer writes out the contents of a weave to a file. 252 | struct WeaveWriter { 253 | dest: W, 254 | } 255 | 256 | impl Sink for WeaveWriter { 257 | fn insert(&mut self, delta: usize) -> Result<()> { 258 | writeln!(&mut self.dest, "\x01I {}", delta)?; 259 | Ok(()) 260 | } 261 | fn delete(&mut self, delta: usize) -> Result<()> { 262 | writeln!(&mut self.dest, "\x01D {}", delta)?; 263 | Ok(()) 264 | } 265 | fn end(&mut self, delta: usize) -> Result<()> { 266 | writeln!(&mut self.dest, "\x01E {}", delta)?; 267 | Ok(()) 268 | } 269 | fn plain(&mut self, text: &str, _keep: bool) -> Result<()> { 270 | writeln!(&mut self.dest, "{}", text)?; 271 | Ok(()) 272 | } 273 | } 274 | -------------------------------------------------------------------------------- /weave/src/errors.rs: -------------------------------------------------------------------------------- 1 | // Errors in the weave code. 2 | 3 | use std::{io, result}; 4 | use thiserror::Error; 5 | 6 | #[derive(Error, Debug)] 7 | pub enum Error { 8 | #[error("I/O Error")] 9 | Io(#[from] io::Error), 10 | #[error("Json error")] 11 | Json(#[from] serde_json::Error), 12 | #[error("Parsing Error")] 13 | Parse(#[from] std::num::ParseIntError), 14 | #[error("tag \"name\" missing")] 15 | NameMissing, 16 | #[error("already closed")] 17 | AlreadyClosed, 18 | #[error("unexpected end of weave file")] 19 | UnexpectedEof, 20 | #[error("weave file appears empty")] 21 | EmptyWeave, 22 | #[error("diff error status {0}")] 23 | DiffError(i32), 24 | #[error("diff killed by signal")] 25 | DiffKilled, 26 | } 27 | 28 | pub type Result = result::Result; 29 | -------------------------------------------------------------------------------- /weave/src/header.rs: -------------------------------------------------------------------------------- 1 | //! Weave file information. 2 | //! 3 | //! The information about each weave file is stored in a header, as the first line of the file. 4 | 5 | use chrono::{DateTime, Utc}; 6 | use serde_derive::{Deserialize, Serialize}; 7 | use std::{collections::BTreeMap, io::Write}; 8 | 9 | use crate::{Error, Result}; 10 | 11 | /// The header placed at the beginning of the each weave file. The deltas correspond with the 12 | /// deltas checked in. Note that the value passed to [`crate::PullParser::new`] should be the `number` 13 | /// field of [`DeltaInfo`] and not the index in the `deltas` vec. 14 | #[derive(Clone, Serialize, Deserialize)] 15 | pub struct Header { 16 | pub version: usize, 17 | pub deltas: Vec, 18 | } 19 | 20 | /// Information about a single delta. 21 | #[derive(Clone, Serialize, Deserialize)] 22 | pub struct DeltaInfo { 23 | /// A tag giving the name for this particular delta. Should be unique across all deltas. 24 | pub name: String, 25 | /// The delta number. A unique integer that identifies this delta in the woven data below. 26 | pub number: usize, 27 | /// Arbitrary tags the user has asked to be stored with this delta. 28 | pub tags: BTreeMap, 29 | /// A time stamp when this delta was added. 30 | pub time: DateTime, 31 | } 32 | 33 | const THIS_VERSION: usize = 1; 34 | 35 | impl Default for Header { 36 | fn default() -> Header { 37 | Header { 38 | version: THIS_VERSION, 39 | deltas: vec![], 40 | } 41 | } 42 | } 43 | 44 | impl Header { 45 | /// Decode from the first line of the file. 46 | pub fn decode(line: &str) -> Result
{ 47 | if let Some(rest) = line.strip_prefix("\x01t") { 48 | Ok(serde_json::from_str(rest)?) 49 | } else { 50 | // This probably comes from an sccs file. 51 | Ok(Header { 52 | version: 0, 53 | deltas: vec![], 54 | }) 55 | } 56 | } 57 | 58 | /// Add a delta to this header. Returns the delta number to be used. 59 | pub fn add(&mut self, mut tags: BTreeMap) -> Result { 60 | let name = if let Some(name) = tags.remove("name") { 61 | name 62 | } else { 63 | return Err(Error::NameMissing); 64 | }; 65 | 66 | let next_delta = self.deltas.iter().map(|x| x.number).max().unwrap_or(0) + 1; 67 | 68 | self.deltas.push(DeltaInfo { 69 | name, 70 | number: next_delta, 71 | tags, 72 | time: Utc::now(), 73 | }); 74 | 75 | Ok(next_delta) 76 | } 77 | 78 | /// Write the header to the writer, as the first line. 79 | pub fn write(&self, mut wr: &mut W) -> Result<()> { 80 | write!(&mut wr, "\x01t")?; 81 | serde_json::to_writer(&mut wr, &self)?; 82 | writeln!(&mut wr)?; 83 | Ok(()) 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /weave/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Weave deltas, inspired by SCCS. 2 | //! 3 | //! The [SCCS](https://en.wikipedia.org/wiki/Source_Code_Control_System) revision control system is 4 | //! one of the oldest source code management systems (1973). Although many of its concepts are 5 | //! quite dated in these days of git, the underlying "weave" delta format it used turns out to be a 6 | //! good way of representing multiple versions of data that differ only in parts. 7 | //! 8 | //! This package implements a weave-based storage of "plain text", where plain text consists of 9 | //! lines of UTF-8 printable characters separated by a newline. 10 | //! 11 | //! The format is similar to SCCS, but with no constraints to keep what are relatively poor design 12 | //! decisions from SCCS, such as putting a checksum at the top of the file, and using limited-sized 13 | //! field for values such as the number of lines in a file, or the use of 2-digit years. However, 14 | //! the main body of the weaved file, that which describes inserts and deletes is the same, and 15 | //! allows us to test this version by comparing with the storage of sccs. 16 | //! 17 | //! Weave files are written using [`NewWeave`], which works like a regular file writer. The file 18 | //! itself has a small amount of surrounding metadata, but is otherwise mostly just the contents of 19 | //! the initial file. 20 | //! 21 | //! Adding a delta to a weave file is done with the [`DeltaWriter`]. This is also written to, as a 22 | //! regular file, and then [`DeltaWriter::close`] method will extract a base revision and use the 23 | //! `diff` command to write a new version of the weave. The `close` method will make several 24 | //! temporary files in the process. 25 | //! 26 | //! The weave data is stored using a [`NamingConvention`], a trait that manages a related 27 | //! collection of files, and temp files. [`SimpleNaming`] is a basic representation of this that 28 | //! has a base name, a backup file, and some temporary files. The data in the file can be 29 | //! compressed. 30 | 31 | #![warn(bare_trait_objects)] 32 | 33 | mod delta; 34 | mod errors; 35 | mod header; 36 | mod naming; 37 | mod newweave; 38 | mod parse; 39 | 40 | pub use crate::{ 41 | delta::DeltaWriter, 42 | errors::{Error, Result}, 43 | header::{DeltaInfo, Header}, 44 | naming::NamingConvention, 45 | naming::SimpleNaming, 46 | naming::Compression, 47 | newweave::NewWeave, 48 | parse::{Entry, Parser, PullParser, Sink}, 49 | }; 50 | 51 | use std::{io::Write, path::PathBuf}; 52 | 53 | /// Something we can write into, that remembers its name. The writer is boxed because the writer 54 | /// may be compressed. 55 | pub struct WriterInfo { 56 | name: PathBuf, 57 | writer: Box, 58 | } 59 | 60 | /// Read the header from a weave file. 61 | pub fn read_header(naming: &dyn NamingConvention) -> Result
{ 62 | Ok(PullParser::new(naming, 1)?.into_header()) 63 | } 64 | 65 | /// Retrieve the last delta in the weave file. Will panic if the weave file is malformed and 66 | /// contains no revisions. 67 | pub fn get_last_delta(naming: &dyn NamingConvention) -> Result { 68 | let header = read_header(naming)?; 69 | Ok(header 70 | .deltas 71 | .iter() 72 | .map(|x| x.number) 73 | .max() 74 | .expect("at least one delta in weave file")) 75 | } 76 | -------------------------------------------------------------------------------- /weave/src/naming.rs: -------------------------------------------------------------------------------- 1 | //! Weave files will follow a file naming convention. This determines the names of various temp 2 | //! files and other aspects. The SCCS conventions are not followed, because they are not safe 3 | //! (this crate will never write to a file that already exists). 4 | 5 | use crate::{Result, WriterInfo}; 6 | use flate2::write::GzEncoder; 7 | use std::{ 8 | fs::{File, OpenOptions}, 9 | io::{BufWriter, ErrorKind, Write}, 10 | path::{Path, PathBuf}, 11 | }; 12 | 13 | /// A naming convention provides utilities needed to find the involved files, and construct 14 | /// temporary files as part of writing the new weave. The underlying object should keep the path 15 | /// and base name. 16 | /// 17 | /// The main file is either used by name, or opened for reading. It should never be written to 18 | /// directly. The main file is always compressed if the convention enables compression. 19 | /// 20 | /// The backup file is only used by name. It is neither written to, nor read. It will be 21 | /// compressed, as it always comes from renaming the main file. 22 | /// 23 | /// The temporary files are used by name, and written to. They may or may not be compressed, 24 | /// depending on how they will be used. 25 | pub trait NamingConvention { 26 | /// Create a temporary file for writing. Upon success, returns the full path of the file, and 27 | /// the opened File for writing to the file. The path should refer to a new file that did not 28 | /// exist prior to this call. 29 | fn temp_file(&self) -> Result<(PathBuf, File)>; 30 | 31 | /// Return the pathname of the primary file. 32 | fn main_file(&self) -> PathBuf; 33 | 34 | /// Return the pathname of the backup file. 35 | fn backup_file(&self) -> PathBuf; 36 | 37 | /// Return if compression is requested on main file. 38 | fn compression(&self) -> Compression; 39 | 40 | /// Open a possibly compressed temp file, returning a WriterInfo for it. The stream will be 41 | /// buffered, and possibly compressed. 42 | fn new_temp(&self) -> Result { 43 | let (name, file) = self.temp_file()?; 44 | let writer = match self.compression() { 45 | Compression::Plain => 46 | Box::new(BufWriter::new(file)) as Box, 47 | Compression::Gzip => 48 | Box::new(GzEncoder::new(file, flate2::Compression::default())) as Box, 49 | Compression::Zstd => 50 | Box::new(zstd::Encoder::new(file, 3)?.auto_finish()) as Box, 51 | }; 52 | Ok(WriterInfo { name, writer }) 53 | } 54 | } 55 | 56 | /// Supported compression types. 57 | #[derive(Debug, Clone, Copy, Eq, PartialEq)] 58 | pub enum Compression { 59 | Plain, 60 | Gzip, 61 | Zstd, 62 | } 63 | 64 | /// The SimpleNaming is a NamingConvention that has a basename, with the main file having a 65 | /// specified extension, the backup file having a ".bak" extension, and the temp files using a 66 | /// numbered extension starting with ".0". If the names are intended to be compressed, a ".gz" 67 | /// suffix can also be added. 68 | #[derive(Debug, Clone)] 69 | pub struct SimpleNaming { 70 | // The directory for the files to be written. 71 | path: PathBuf, 72 | // The string for the base filename. 73 | base: String, 74 | // The extension to use for the main name. 75 | ext: String, 76 | // Compression to be used. 77 | compression: Compression, 78 | } 79 | 80 | impl SimpleNaming { 81 | pub fn new>(path: P, base: &str, ext: &str, compression: Compression) -> SimpleNaming { 82 | SimpleNaming { 83 | path: path.as_ref().to_path_buf(), 84 | base: base.to_string(), 85 | ext: ext.to_string(), 86 | compression, 87 | } 88 | } 89 | 90 | pub fn make_name(&self, ext: &str, compression: Compression) -> PathBuf { 91 | let name = format!( 92 | "{}.{}{}", 93 | self.base, 94 | ext, 95 | match compression { 96 | Compression::Plain => "", 97 | Compression::Gzip => ".gz", 98 | Compression::Zstd => ".zstd", 99 | }, 100 | ); 101 | self.path.join(name) 102 | } 103 | } 104 | 105 | impl NamingConvention for SimpleNaming { 106 | fn main_file(&self) -> PathBuf { 107 | self.make_name(&self.ext, self.compression) 108 | } 109 | 110 | fn backup_file(&self) -> PathBuf { 111 | self.make_name("bak", self.compression) 112 | } 113 | 114 | fn temp_file(&self) -> Result<(PathBuf, File)> { 115 | let mut n = 0; 116 | loop { 117 | let name = self.make_name(&n.to_string(), Compression::Plain); 118 | 119 | match OpenOptions::new().write(true).create_new(true).open(&name) { 120 | Ok(fd) => return Ok((name, fd)), 121 | Err(ref e) if e.kind() == ErrorKind::AlreadyExists => (), 122 | Err(e) => return Err(e.into()), 123 | } 124 | 125 | n += 1; 126 | } 127 | } 128 | 129 | fn compression(&self) -> Compression { 130 | self.compression 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /weave/src/newweave.rs: -------------------------------------------------------------------------------- 1 | //! Writer for new weaves 2 | 3 | use std::{ 4 | collections::BTreeMap, 5 | fs::rename, 6 | io::{self, Write}, 7 | mem::replace, 8 | }; 9 | 10 | use crate::{header::Header, Error, NamingConvention, Result, WriterInfo}; 11 | #[allow(unused)] 12 | use crate::Compression; 13 | 14 | /// A builder for a new weave file. The data should be written as a writer. Closing the weaver 15 | /// will finish up the write and move the new file into place. If the weaver is just dropped, the 16 | /// file will not be moved into place. 17 | pub struct NewWeave<'n> { 18 | naming: &'n dyn NamingConvention, 19 | temp: Option, 20 | } 21 | 22 | impl<'n> NewWeave<'n> { 23 | pub fn new<'a, 'b, I>(nc: &dyn NamingConvention, tags: I) -> Result 24 | where 25 | I: Iterator, 26 | { 27 | let mut writeinfo = nc.new_temp()?; 28 | 29 | let mut ntags = BTreeMap::new(); 30 | for (k, v) in tags { 31 | ntags.insert(k.to_owned(), v.to_owned()); 32 | } 33 | let mut header: Header = Default::default(); 34 | let delta = header.add(ntags)?; 35 | header.write(&mut writeinfo.writer)?; 36 | writeln!(&mut writeinfo.writer, "\x01I {}", delta)?; 37 | 38 | Ok(NewWeave { 39 | naming: nc, 40 | temp: Some(writeinfo), 41 | }) 42 | } 43 | 44 | pub fn close(mut self) -> Result<()> { 45 | let temp = replace(&mut self.temp, None); 46 | let name = match temp { 47 | Some(mut wi) => { 48 | writeln!(&mut wi.writer, "\x01E 1")?; 49 | wi.name 50 | } 51 | None => return Err(Error::AlreadyClosed), 52 | }; 53 | let _ = rename(self.naming.main_file(), self.naming.backup_file()); 54 | rename(name, self.naming.main_file())?; 55 | Ok(()) 56 | } 57 | } 58 | 59 | impl<'n> Write for NewWeave<'n> { 60 | // Write the data out, just passing it through to the underlying file write. We assume the 61 | // last line is terminated, or the resulting weave will be invalid. 62 | fn write(&mut self, buf: &[u8]) -> io::Result { 63 | self.temp 64 | .as_mut() 65 | .expect("Attempt to write to NewWeave that is closed") 66 | .writer 67 | .write(buf) 68 | } 69 | 70 | fn flush(&mut self) -> io::Result<()> { 71 | self.temp 72 | .as_mut() 73 | .expect("Attempt to flush NewWeave that is closed") 74 | .writer 75 | .flush() 76 | } 77 | } 78 | 79 | #[test] 80 | #[ignore] 81 | fn try_tag() { 82 | use crate::SimpleNaming; 83 | let mut tags = BTreeMap::new(); 84 | tags.insert("name".to_owned(), "initial revision".to_owned()); 85 | // Add a whole bunch of longer tags to show it works. 86 | for i in 1..100 { 87 | tags.insert(format!("key{}", i), format!("This is the {}th value", i)); 88 | } 89 | let nc = SimpleNaming::new(".", "tags", "weave", Compression::Gzip); 90 | let t2 = tags.iter().map(|(k, v)| (k.as_ref(), v.as_ref())); 91 | let mut wr = NewWeave::new(&nc, t2).unwrap(); 92 | writeln!(&mut wr, "This is the only line in the file").unwrap(); 93 | wr.close().unwrap(); 94 | } 95 | -------------------------------------------------------------------------------- /weave/src/parse.rs: -------------------------------------------------------------------------------- 1 | //! Weave parsing 2 | 3 | use crate::{header::Header, Error, NamingConvention, Compression, Result}; 4 | use flate2::read::GzDecoder; 5 | use log::info; 6 | use std::{ 7 | cell::RefCell, 8 | fs::File, 9 | io::{BufRead, BufReader, Lines, Read}, 10 | mem, 11 | rc::Rc, 12 | }; 13 | 14 | /// A Sink is a place that a parsed weave can be sent to. The insert/delete/end commands match 15 | /// those in the weave file, and `plain` are the lines of data. With each plain is a flag 16 | /// indicating if that line should be included in the output (all lines are called, so that updates 17 | /// can use this same code). All methods return a result, with the Err value stopping the parse. 18 | /// Note that the default implementations just return success, and ignore the result. 19 | pub trait Sink { 20 | /// Begin an insert sequence for the given delta. 21 | fn insert(&mut self, _delta: usize) -> Result<()> { 22 | Ok(()) 23 | } 24 | 25 | /// Begin a delete sequence. 26 | fn delete(&mut self, _delta: usize) -> Result<()> { 27 | Ok(()) 28 | } 29 | 30 | /// End a previous insert or delete. 31 | fn end(&mut self, _delta: usize) -> Result<()> { 32 | Ok(()) 33 | } 34 | 35 | /// A single line of plain text from the weave. `keep` indicates if the line should be 36 | /// included in the requested delta. 37 | fn plain(&mut self, _text: &str, _keep: bool) -> Result<()> { 38 | Ok(()) 39 | } 40 | } 41 | 42 | /// The PullParser returns the entries as nodes. These are equivalent to 43 | /// the values in Sink. 44 | #[derive(Debug)] 45 | pub enum Entry { 46 | /// Begin an insert sequence for the given delta. 47 | Insert { delta: usize }, 48 | 49 | /// Begin a delete sequence. 50 | Delete { delta: usize }, 51 | 52 | /// End a previous insert or delete. 53 | End { delta: usize }, 54 | 55 | /// A single line of plaintext from the weave. `keep` indicates if the 56 | /// line should be included in the requested delta. 57 | Plain { text: String, keep: bool }, 58 | 59 | /// A control message. Doesn't currently contain any data, which can be added later if needed. 60 | Control, 61 | } 62 | 63 | /// A Parser is used to process a weave file. This is a wrapper around the pull parser that 64 | /// invokes a push parser. 65 | pub struct Parser { 66 | /// The pull parser. 67 | pull: PullParser, 68 | 69 | /// The sink to be given each line record in the weave file. 70 | sink: Rc>, 71 | 72 | /// A single pending line, kept from the last invocation. 73 | pending: Option, 74 | 75 | /// Tracking the line number. 76 | lineno: usize, 77 | } 78 | 79 | impl Parser>> { 80 | /// Construct a parser, based on the main file of the naming convention. 81 | pub fn new( 82 | naming: &dyn NamingConvention, 83 | sink: S, 84 | delta: usize, 85 | ) -> Result>>> { 86 | let rd = match naming.compression() { 87 | Compression::Plain => { 88 | Box::new(File::open(naming.main_file())?) as Box 89 | } 90 | Compression::Gzip => { 91 | let fd = File::open(naming.main_file())?; 92 | Box::new(GzDecoder::new(fd)) as Box 93 | } 94 | Compression::Zstd => { 95 | let fd = File::open(naming.main_file())?; 96 | Box::new(zstd::Decoder::new(fd)?) as Box 97 | } 98 | }; 99 | let lines = BufReader::new(rd).lines(); 100 | Parser::new_raw(lines, Rc::new(RefCell::new(sink)), delta) 101 | } 102 | } 103 | 104 | impl Parser { 105 | /// Construct a new Parser, reading from the given Reader, giving records to the given Sink, 106 | /// and aiming for the specified `delta`. This is not the intended constructor, normal users 107 | /// should use `new`. (This is public, for testing). 108 | pub fn new_raw( 109 | source: Lines, 110 | sink: Rc>, 111 | delta: usize, 112 | ) -> Result> { 113 | let pull = PullParser::new_raw(source, delta)?; 114 | Ok(Parser { 115 | pull, 116 | sink, 117 | pending: None, 118 | lineno: 0, 119 | }) 120 | } 121 | 122 | /// Run the parser until we either reach the given line number, or the end of the weave. Lines 123 | /// are numbered from 1, so calling with a lineno of zero will run the parser until the end of 124 | /// the input. Returns Ok(0) for the end of input, Ok(n) for stopping at line n (which should 125 | /// always be the same as the passed in lineno, or Err if there is an error. 126 | pub fn parse_to(&mut self, lineno: usize) -> Result { 127 | // Handle any pending input line. Pending lines only happen while keeping. 128 | if let Some(text) = mem::replace(&mut self.pending, None) { 129 | self.sink.borrow_mut().plain(&text, true)?; 130 | } 131 | 132 | loop { 133 | match self.pull.next() { 134 | Some(Ok(Entry::Plain { text, keep })) => { 135 | if keep { 136 | self.lineno += 1; 137 | if self.lineno == lineno { 138 | // This is the desired stopping point, hold onto this line, and return 139 | // to the caller. 140 | self.pending = Some(text); 141 | return Ok(lineno); 142 | } 143 | } 144 | 145 | self.sink.borrow_mut().plain(&text, keep)?; 146 | } 147 | Some(Ok(Entry::Insert { delta })) => { 148 | self.sink.borrow_mut().insert(delta)?; 149 | } 150 | Some(Ok(Entry::Delete { delta })) => { 151 | self.sink.borrow_mut().delete(delta)?; 152 | } 153 | Some(Ok(Entry::End { delta })) => { 154 | self.sink.borrow_mut().end(delta)?; 155 | } 156 | Some(Ok(Entry::Control)) => (), 157 | Some(Err(err)) => { 158 | return Err(err); 159 | } 160 | None => { 161 | return Ok(0); 162 | } 163 | } 164 | } 165 | } 166 | 167 | 168 | /// Get the header read from this weave file. 169 | pub fn get_header(&self) -> &Header { 170 | &self.pull.header 171 | } 172 | 173 | /// Consume the parser, returning the header. 174 | pub fn into_header(self) -> Header { 175 | self.pull.into_header() 176 | } 177 | 178 | /// Get a copy of the sink. 179 | pub fn get_sink(&self) -> Rc> { 180 | self.sink.clone() 181 | } 182 | } 183 | 184 | /* 185 | /// A PullIterator returns entities in a weave file, extracting either 186 | /// everything, or only a specific delta. 187 | pub struct PullIterator { 188 | /// The lines of the input. 189 | source: Lines, 190 | 191 | /// The desired delta to retrieve. 192 | delta: usize, 193 | 194 | /// The delta state is kept sorted with the newest (largest) delta at 195 | /// element 0. 196 | delta_state: Vec, 197 | 198 | /// Indicates we are currently keeping lines. 199 | keeping: bool, 200 | 201 | /// The current line number. 202 | lineno: usize, 203 | 204 | /// The header extracted from the file. 205 | header: Header, 206 | } 207 | */ 208 | 209 | /// The pull parser is the intended way of reading from weave files. After opening a particular 210 | /// delta with [`PullParser::new`], the parser can be used as an iterator, to return [`Entry`] values. In 211 | /// particular, the entries for [`Entry::Plain`] where `keep` is true will be the lines of the 212 | /// weave that comprise the expected delta. 213 | pub struct PullParser { 214 | /// The lines of the input. 215 | source: Lines, 216 | 217 | /// The desired delta to retrieve. 218 | delta: usize, 219 | 220 | /// The delta state is kept sorted with the newest (largest) delta at element 0. 221 | delta_state: Vec, 222 | 223 | /// Indicates that we are currently "keeping" lines. 224 | keeping: bool, 225 | 226 | /// The header extracted from the file. 227 | header: Header, 228 | } 229 | 230 | impl PullParser>> { 231 | /// Construct a parser, based on the main file of the naming 232 | /// convention. 233 | pub fn new( 234 | naming: &dyn NamingConvention, 235 | delta: usize, 236 | ) -> Result>>> { 237 | let rd = match naming.compression() { 238 | Compression::Plain => { 239 | Box::new(File::open(naming.main_file())?) as Box 240 | } 241 | Compression::Gzip => { 242 | let fd = File::open(naming.main_file())?; 243 | Box::new(GzDecoder::new(fd)) as Box 244 | } 245 | Compression::Zstd => { 246 | let fd = File::open(naming.main_file())?; 247 | Box::new(zstd::Decoder::new(fd)?) as Box 248 | } 249 | }; 250 | let lines = BufReader::new(rd).lines(); 251 | PullParser::new_raw(lines, delta) 252 | } 253 | } 254 | 255 | impl PullParser { 256 | /// Construct a new Parser, reading from the given Reader. The parser 257 | /// will act as an iterator. This is the intended constructor, normal 258 | /// users should use `new`. (This is public for testing). 259 | pub fn new_raw(mut source: Lines, delta: usize) -> Result> { 260 | if let Some(line) = source.next() { 261 | let line = line?; 262 | let header = Header::decode(&line)?; 263 | 264 | Ok(PullParser { 265 | source, 266 | delta, 267 | delta_state: vec![], 268 | keeping: false, 269 | header, 270 | }) 271 | } else { 272 | Err(Error::EmptyWeave) 273 | } 274 | } 275 | 276 | /// Remove the given numbered state. 277 | fn pop(&mut self, delta: usize) { 278 | // The binary search is reversed, so the largest are first. 279 | let pos = match self 280 | .delta_state 281 | .binary_search_by(|ent| delta.cmp(&ent.delta)) 282 | { 283 | Ok(pos) => pos, 284 | Err(_) => unreachable!(), 285 | }; 286 | 287 | self.delta_state.remove(pos); 288 | } 289 | 290 | /// Add a new state. It will be inserted in the proper place in the array, based on the delta 291 | /// number. 292 | fn push(&mut self, delta: usize, mode: StateMode) { 293 | match self 294 | .delta_state 295 | .binary_search_by(|ent| delta.cmp(&ent.delta)) 296 | { 297 | Ok(_) => panic!("Duplicate state in push"), 298 | Err(pos) => self.delta_state.insert(pos, OneDelta { delta, mode }), 299 | } 300 | } 301 | 302 | /// Update the keep field, based on the current state. 303 | fn update_keep(&mut self) { 304 | info!("Update: {:?}", self.delta_state); 305 | for st in &self.delta_state { 306 | match st.mode { 307 | StateMode::Keep => { 308 | self.keeping = true; 309 | return; 310 | } 311 | StateMode::Skip => { 312 | self.keeping = false; 313 | return; 314 | } 315 | _ => (), 316 | } 317 | } 318 | 319 | // This shouldn't be reached if there are any more context lines, but we may get here when 320 | // we reach the end of the input. 321 | self.keeping = false; 322 | } 323 | 324 | /// Get the header read from this weave file. 325 | pub fn get_header(&self) -> &Header { 326 | &self.header 327 | } 328 | 329 | /// Consume the parser, returning the header. 330 | pub fn into_header(self) -> Header { 331 | self.header 332 | } 333 | } 334 | 335 | impl Iterator for PullParser { 336 | type Item = Result; 337 | 338 | fn next(&mut self) -> Option> { 339 | // At this level, there is a 1:1 correspondence between weave input 340 | // lines and those returned. 341 | let line = match self.source.next() { 342 | None => return None, 343 | Some(Ok(line)) => line, 344 | Some(Err(e)) => return Some(Err(From::from(e))), 345 | }; 346 | 347 | info!("line: {:?}", line); 348 | 349 | // Detect the first character, without borrowing. 350 | let textual = match line.bytes().next() { 351 | None => true, 352 | Some(ch) if ch != b'\x01' => true, 353 | _ => false, 354 | }; 355 | 356 | if textual { 357 | return Some(Ok(Entry::Plain { 358 | text: line, 359 | keep: self.keeping, 360 | })); 361 | } 362 | 363 | let linebytes = line.as_bytes(); 364 | 365 | if linebytes.len() < 4 { 366 | return Some(Ok(Entry::Control)); 367 | } 368 | 369 | if linebytes[1] != b'I' && linebytes[1] != b'D' && linebytes[1] != b'E' { 370 | return Some(Ok(Entry::Control)); 371 | }; 372 | 373 | // TODO: Don't panic, but fail. 374 | let this_delta: usize = line[3..].parse().unwrap(); 375 | 376 | match linebytes[1] { 377 | b'E' => { 378 | self.pop(this_delta); 379 | self.update_keep(); 380 | Some(Ok(Entry::End { delta: this_delta })) 381 | } 382 | b'I' => { 383 | if self.delta >= this_delta { 384 | self.push(this_delta, StateMode::Keep); 385 | } else { 386 | self.push(this_delta, StateMode::Skip); 387 | } 388 | self.update_keep(); 389 | 390 | Some(Ok(Entry::Insert { delta: this_delta })) 391 | } 392 | b'D' => { 393 | if self.delta >= this_delta { 394 | self.push(this_delta, StateMode::Skip); 395 | } else { 396 | self.push(this_delta, StateMode::Next); 397 | } 398 | self.update_keep(); 399 | 400 | Some(Ok(Entry::Delete { delta: this_delta })) 401 | } 402 | _ => unreachable!(), 403 | } 404 | } 405 | } 406 | 407 | #[derive(Copy, Clone, Eq, PartialEq, Debug)] 408 | enum StateMode { 409 | Keep, 410 | Skip, 411 | Next, 412 | } 413 | 414 | #[derive(Debug)] 415 | struct OneDelta { 416 | delta: usize, 417 | mode: StateMode, 418 | } 419 | -------------------------------------------------------------------------------- /weave/tests/naming.rs: -------------------------------------------------------------------------------- 1 | // Test the naming convention code. 2 | 3 | extern crate tempdir; 4 | extern crate weave; 5 | 6 | use std::path::Path; 7 | 8 | use tempdir::TempDir; 9 | use weave::{NamingConvention, SimpleNaming, Compression}; 10 | 11 | #[test] 12 | fn test_names() { 13 | let tmp = TempDir::new("weave").unwrap(); 14 | 15 | let path = tmp.path().to_str().unwrap(); 16 | 17 | let nm = SimpleNaming::new(tmp.path(), "sample", "weave", Compression::Gzip); 18 | assert_eq!( 19 | nm.main_file(), 20 | Path::new(&format!("{}/sample.weave.gz", path)) 21 | ); 22 | assert_eq!( 23 | nm.backup_file(), 24 | Path::new(&format!("{}/sample.bak.gz", path)) 25 | ); 26 | 27 | for i in 0..100 { 28 | let (tname, _tfd) = nm.temp_file().unwrap(); 29 | assert_eq!(tname, Path::new(&format!("{}/sample.{}", path, i))); 30 | println!("tname: {:?}", tname); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /weave/tests/sccs.rs: -------------------------------------------------------------------------------- 1 | /// Comparisons against SCCS. 2 | /// 3 | /// The weave algorithm used comes from the the SCCS program. This can be installed on most Linux 4 | /// distros by installing the package "cssc". 5 | extern crate env_logger; 6 | #[macro_use] 7 | extern crate log; 8 | extern crate rand; 9 | extern crate tempdir; 10 | extern crate weave; 11 | 12 | use rand::{rngs::StdRng, Rng, SeedableRng}; 13 | use std::collections::BTreeMap; 14 | use std::env; 15 | use std::fs::{remove_file, File}; 16 | use std::io::{BufRead, BufReader, Write}; 17 | use std::path::{Path, PathBuf}; 18 | use std::process::{Command, ExitStatus, Stdio}; 19 | use tempdir::TempDir; 20 | use weave::{Compression, DeltaWriter, Entry, NewWeave, PullParser, Result, SimpleNaming, Sink}; 21 | 22 | /// Number of iterations to make. Note that the default check is greater than O(n^2) so the test 23 | /// will run very long if this is increased too much. 24 | const ITERATION_COUNT: usize = 100; 25 | 26 | /// Number of lines in the files. Affects how convoluted the diffs are. 27 | const FILE_SIZE: usize = 100; 28 | 29 | /// Set to true to verify all previous deltas, not just the most recent. 30 | const VERIFY_ALL_DELTAS: bool = true; 31 | 32 | #[test] 33 | fn sccs() { 34 | let _ = env_logger::init(); 35 | 36 | // Normally, detect the SCCS command being present, and use it for additional tests. It can be 37 | // ignored by setting NO_SCCS=1 in the environment. 38 | let use_sccs = has_sccs() && env::var("NO_SCCS").is_err(); 39 | 40 | let tdir = TempDir::new("sccstest").unwrap(); 41 | let mut gen = Gen::new(tdir.path(), use_sccs).unwrap(); 42 | 43 | // For debugging, this will cause the directory to not be removed. 44 | if env::var("KEEPTEMP").is_ok() { 45 | tdir.into_path(); 46 | } 47 | 48 | gen.new_sccs(); 49 | gen.new_weave(); 50 | gen.next_delta(); 51 | gen.weave_check(); 52 | 53 | for i in 0..ITERATION_COUNT { 54 | gen.shuffle(); 55 | gen.add_sccs_delta(); 56 | gen.add_weave_delta(i + 1); 57 | gen.next_delta(); 58 | 59 | // Checking with sccs is very slow. Do we want to do it? 60 | // gen.sccs_check(); 61 | gen.weave_check(); 62 | } 63 | } 64 | 65 | /// Determine if we have the sccs command available. If not, show an error, and return false. 66 | fn has_sccs() -> bool { 67 | match Command::new("sccs").arg("-V").output() { 68 | Ok(_) => true, 69 | Err(_) => { 70 | error!("'sccs' not found in path, skipping some tests, install 'cssc' to fix"); 71 | false 72 | } 73 | } 74 | } 75 | 76 | /// Gen synthesizes a series of deltas, and can add them using SCCS to make a weave file, and later 77 | /// to this weave implementation to compare the results. 78 | struct Gen { 79 | /// The directory to write the files into. 80 | tdir: PathBuf, 81 | 82 | /// The name of the plain file related to it. 83 | sccs_plain: PathBuf, 84 | 85 | /// The current lines. 86 | nums: Vec, 87 | 88 | /// Each delta. Sccs numbers the deltas from 1, so these are off by one. 89 | deltas: Vec>, 90 | 91 | /// A Rng for generating the shuffles. 92 | rand: StdRng, 93 | 94 | /// Is sccs enabled. 95 | use_sccs: bool, 96 | } 97 | 98 | impl Gen { 99 | fn new>(tdir: P, use_sccs: bool) -> Result { 100 | let tdir = tdir.as_ref(); 101 | let mut seed: [u8; 32] = [0; 32]; 102 | seed[0] = 1; 103 | seed[0] = 2; 104 | seed[0] = 3; 105 | seed[0] = 4; 106 | Ok(Gen { 107 | tdir: tdir.to_owned(), 108 | sccs_plain: tdir.join("tfile"), 109 | nums: (1..FILE_SIZE + 1).collect(), 110 | rand: SeedableRng::from_seed(seed), 111 | deltas: vec![], 112 | use_sccs: use_sccs, 113 | }) 114 | } 115 | 116 | /// Perform a somewhat random modification of the data. Choose some range of the numbers and 117 | /// reverse them. 118 | fn shuffle(&mut self) { 119 | let a = self.rand.gen_range(0..self.nums.len()); 120 | let b = self.rand.gen_range(0..self.nums.len()); 121 | 122 | let (a, b) = if a <= b { (a, b) } else { (b, a) }; 123 | self.nums[a..b].reverse(); 124 | } 125 | 126 | fn next_delta(&mut self) { 127 | self.deltas.push(self.nums.clone()) 128 | } 129 | 130 | /// Write to a new sccs file, resulting in delta 1. 131 | fn new_sccs(&mut self) { 132 | if !self.use_sccs { 133 | return; 134 | } 135 | 136 | self.emit_to(&self.sccs_plain); 137 | Command::new("sccs") 138 | .args(&["admin", "-itfile", "-n", "s.tfile"]) 139 | .current_dir(&self.tdir) 140 | .status() 141 | .expect("Unable to run sccs admin") 142 | .expect_success("Sccs command returned error"); 143 | remove_file(&self.sccs_plain).expect("Unable to remove data file"); 144 | } 145 | 146 | /// Add a new delta to the sccs file. 147 | fn add_sccs_delta(&mut self) { 148 | if !self.use_sccs { 149 | return; 150 | } 151 | 152 | Command::new("sccs") 153 | .args(&["get", "-e", "s.tfile"]) 154 | .current_dir(&self.tdir) 155 | .stderr(Stdio::null()) 156 | .stdout(Stdio::null()) 157 | .status() 158 | .expect("Unable to run sccs get") 159 | .expect_success("sccs get failed"); 160 | self.emit_to(&self.sccs_plain); 161 | Command::new("sccs") 162 | .args(&["delta", "-yMessage", "s.tfile"]) 163 | .current_dir(&self.tdir) 164 | .stderr(Stdio::null()) 165 | .stdout(Stdio::null()) 166 | .status() 167 | .expect("Unable to run sccs delta") 168 | .expect_success("sccs delta failed"); 169 | } 170 | 171 | /// Emit the current numbers to the given name (in the temp dir). 172 | fn emit_to>(&self, name: P) { 173 | let mut fd = File::create(self.tdir.join(name)).unwrap(); 174 | for i in &self.nums { 175 | writeln!(&mut fd, "{}", i).unwrap(); 176 | } 177 | } 178 | 179 | /// Check the output of "sccs get". This is more of a sanity check. 180 | #[allow(dead_code)] 181 | fn sccs_check(&self) { 182 | for (i, del) in self.deltas.iter().enumerate() { 183 | self.sccs_check_one(i, del); 184 | } 185 | } 186 | 187 | #[allow(dead_code)] 188 | fn sccs_check_one(&self, num: usize, data: &[usize]) { 189 | if !self.use_sccs { 190 | return; 191 | } 192 | 193 | let out = Command::new("sccs") 194 | .args(&["get", &format!("-r1.{}", num + 1), "-p", "s.tfile"]) 195 | .current_dir(&self.tdir) 196 | .output() 197 | .expect("Unable to run sccs get"); 198 | out.status.expect_success("Error running sccs get"); 199 | let mut onums: Vec = vec![]; 200 | for line in BufReader::new(&out.stdout[..]).lines() { 201 | let line = line.unwrap(); 202 | onums.push(line.as_str().parse::().unwrap()); 203 | } 204 | 205 | assert_eq!(data, &onums[..]); 206 | } 207 | 208 | /// Check that weave decodes all of the sccs files properly. 209 | fn weave_check(&self) { 210 | if VERIFY_ALL_DELTAS { 211 | // Verify all of the previous deltas. 212 | for (i, del) in self.deltas.iter().enumerate() { 213 | self.weave_sccs_check_one(i, del); 214 | self.weave_check_one(i, del); 215 | self.weave_check_pull(i, del); 216 | } 217 | } else { 218 | // This only checks the last delta for each one. It will miss any bugs that result in 219 | // earlier deltas being unreadable. 220 | let del = self.deltas.iter().last().unwrap(); 221 | self.weave_sccs_check_one(self.deltas.len() - 1, del); 222 | self.weave_check_one(self.deltas.len() - 1, del); 223 | self.weave_check_pull(self.deltas.len() - 1, del); 224 | } 225 | } 226 | 227 | fn weave_sccs_check_one(&self, num: usize, data: &[usize]) { 228 | if !self.use_sccs { 229 | return; 230 | } 231 | 232 | let fd = File::open(self.tdir.join("s.tfile")).unwrap(); 233 | let lines = BufReader::new(fd).lines(); 234 | let mut nums: Vec = vec![]; 235 | for node in PullParser::new_raw(lines, num + 1).unwrap() { 236 | match node.unwrap() { 237 | Entry::Plain { text, keep } => { 238 | if keep { 239 | nums.push(text.parse::().unwrap()); 240 | } 241 | } 242 | _ => (), 243 | } 244 | } 245 | assert_eq!(data, nums); 246 | } 247 | 248 | fn weave_check_one(&self, num: usize, data: &[usize]) { 249 | let fd = File::open(self.tdir.join("sample.weave")).unwrap(); 250 | let lines = BufReader::new(fd).lines(); 251 | let mut nums: Vec = vec![]; 252 | for node in PullParser::new_raw(lines, num + 1).unwrap() { 253 | match node.unwrap() { 254 | Entry::Plain { text, keep } => { 255 | if keep { 256 | nums.push(text.parse::().unwrap()); 257 | } 258 | } 259 | _ => (), 260 | } 261 | } 262 | 263 | assert_eq!(data, nums); 264 | } 265 | 266 | fn weave_check_pull(&self, num: usize, data: &[usize]) { 267 | let fd = File::open(self.tdir.join("sample.weave")).unwrap(); 268 | let lines = BufReader::new(fd).lines(); 269 | let mut nums = vec![]; 270 | for line in PullParser::new_raw(lines, num + 1).unwrap() { 271 | let line = line.unwrap(); 272 | match line { 273 | Entry::Plain { keep, text } if keep => { 274 | nums.push(text.parse::().unwrap()); 275 | } 276 | _ => (), 277 | } 278 | } 279 | 280 | assert_eq!(data, &nums[..]); 281 | } 282 | 283 | fn new_weave(&mut self) { 284 | let mut tags = BTreeMap::new(); 285 | tags.insert("name", "initial"); 286 | let nc = SimpleNaming::new(&self.tdir, "sample", "weave", Compression::Plain); 287 | let mut nw = NewWeave::new(&nc, tags.into_iter()).unwrap(); 288 | for i in &self.nums { 289 | writeln!(&mut nw, "{}", i).unwrap(); 290 | } 291 | nw.close().unwrap(); 292 | } 293 | 294 | fn add_weave_delta(&mut self, base: usize) { 295 | let name_value = format!("{}", base + 1); 296 | let mut tags = BTreeMap::new(); 297 | tags.insert("name", name_value.as_str()); 298 | let nc = SimpleNaming::new(&self.tdir, "sample", "weave", Compression::Plain); 299 | let mut delta = DeltaWriter::new(&nc, tags.into_iter(), base).unwrap(); 300 | for i in &self.nums { 301 | writeln!(&mut delta, "{}", i).unwrap(); 302 | } 303 | delta.close().unwrap(); 304 | } 305 | } 306 | 307 | /// A Weave Sink that just collects the numbers in the given delta. 308 | struct DeltaSink { 309 | nums: Vec, 310 | } 311 | 312 | impl Sink for DeltaSink { 313 | fn plain(&mut self, text: &str, keep: bool) -> Result<()> { 314 | if !keep { 315 | return Ok(()); 316 | } 317 | 318 | self.nums.push(text.parse::()?); 319 | Ok(()) 320 | } 321 | } 322 | 323 | /// A small utility to make asserting success easier. 324 | trait Successful { 325 | fn expect_success(&self, msg: &str); 326 | } 327 | 328 | impl Successful for ExitStatus { 329 | fn expect_success(&self, msg: &str) { 330 | if !self.success() { 331 | panic!("{}", msg.to_string()); 332 | } 333 | } 334 | } 335 | --------------------------------------------------------------------------------