├── .gitignore
├── .travis.yml
├── CHANGELOG.md
├── Cargo.lock
├── Cargo.toml
├── LICENSE-MIT
├── NEWS
├── README.md
├── benches
    └── hash.rs
├── default.nix
├── etc
    └── template-bk-readme.txt
├── naming
    ├── Cargo.toml
    ├── README.rst
    └── src
    │   └── lib.rs
├── shell.nix
├── src
    ├── errors.rs
    ├── escape.rs
    ├── hashes.rs
    ├── lib.rs
    ├── main.rs
    ├── node.rs
    ├── node
    │   ├── compare.rs
    │   ├── fs.rs
    │   ├── fullpath.rs
    │   └── hashes.rs
    ├── progress.rs
    ├── show.rs
    ├── store.rs
    ├── store
    │   └── weave.rs
    ├── surefs.rs
    └── suretree.rs
├── tests
    └── surefiles.rs
└── weave
    ├── .gitignore
    ├── Cargo.toml
    ├── README.rst
    ├── src
        ├── delta.rs
        ├── errors.rs
        ├── header.rs
        ├── lib.rs
        ├── naming.rs
        ├── newweave.rs
        └── parse.rs
    └── tests
        ├── naming.rs
        └── sccs.rs


/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | .*.swp
3 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: rust
 2 | 
 3 | before_install:
 4 |   - sudo apt-get install -y cssc
 5 | 
 6 | rust:
 7 |     - stable
 8 |     - beta
 9 |     - nightly
10 | 
11 | matrix:
12 |     allow_failures:
13 |         - rust: nightly
14 | 
15 | script:
16 |     - cargo build
17 |     - cargo test
18 |     - cd weave
19 |     - cargo build
20 |     - cargo test
21 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # ChangeLog
 2 | 
 3 | All notable changes to this project will be documented in this file.
 4 | 
 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0),
 6 | and this project adheres to [Semantic Versioning](https://semver.og/spec/v2.0.0.html).
 7 | 
 8 | ## [Unreleased]
 9 | 
10 | ## [0.9.3]
11 | 
12 | ### Changed
13 | 
14 | - This release has no code changes, and is merely a version bump to
15 |   properly tag this and release to [crates.io](https://crates.io/).
16 | 
17 | ## [0.9.1]
18 | 
19 | ### Changed
20 | 
21 | - Weave parser now implements a pull parser.  This avoids the overhead
22 |   of threads and channels for normal processing of the surefie.
23 | - Numerous minor code cleanups from clippy and rustfmt
24 | - Add `default.nix` and `shell.nix` to help with development under
25 |   Nix.
26 | 
27 | ### Fixed
28 | 
29 | - Fix duplicated names in some comparison messages
30 | 


--------------------------------------------------------------------------------
/Cargo.lock:
--------------------------------------------------------------------------------
  1 | # This file is automatically @generated by Cargo.
  2 | # It is not intended for manual editing.
  3 | version = 3
  4 | 
  5 | [[package]]
  6 | name = "adler"
  7 | version = "1.0.2"
  8 | source = "registry+https://github.com/rust-lang/crates.io-index"
  9 | checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
 10 | 
 11 | [[package]]
 12 | name = "ahash"
 13 | version = "0.7.6"
 14 | source = "registry+https://github.com/rust-lang/crates.io-index"
 15 | checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
 16 | dependencies = [
 17 |  "getrandom",
 18 |  "once_cell",
 19 |  "version_check",
 20 | ]
 21 | 
 22 | [[package]]
 23 | name = "aho-corasick"
 24 | version = "0.7.18"
 25 | source = "registry+https://github.com/rust-lang/crates.io-index"
 26 | checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
 27 | dependencies = [
 28 |  "memchr",
 29 | ]
 30 | 
 31 | [[package]]
 32 | name = "ansi_term"
 33 | version = "0.11.0"
 34 | source = "registry+https://github.com/rust-lang/crates.io-index"
 35 | checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
 36 | dependencies = [
 37 |  "winapi",
 38 | ]
 39 | 
 40 | [[package]]
 41 | name = "atty"
 42 | version = "0.2.14"
 43 | source = "registry+https://github.com/rust-lang/crates.io-index"
 44 | checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
 45 | dependencies = [
 46 |  "hermit-abi",
 47 |  "libc",
 48 |  "winapi",
 49 | ]
 50 | 
 51 | [[package]]
 52 | name = "autocfg"
 53 | version = "1.0.1"
 54 | source = "registry+https://github.com/rust-lang/crates.io-index"
 55 | checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
 56 | 
 57 | [[package]]
 58 | name = "bitflags"
 59 | version = "1.2.1"
 60 | source = "registry+https://github.com/rust-lang/crates.io-index"
 61 | checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
 62 | 
 63 | [[package]]
 64 | name = "cc"
 65 | version = "1.0.67"
 66 | source = "registry+https://github.com/rust-lang/crates.io-index"
 67 | checksum = "e3c69b077ad434294d3ce9f1f6143a2a4b89a8a2d54ef813d85003a4fd1137fd"
 68 | dependencies = [
 69 |  "jobserver",
 70 | ]
 71 | 
 72 | [[package]]
 73 | name = "cfg-if"
 74 | version = "1.0.0"
 75 | source = "registry+https://github.com/rust-lang/crates.io-index"
 76 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 77 | 
 78 | [[package]]
 79 | name = "chrono"
 80 | version = "0.4.19"
 81 | source = "registry+https://github.com/rust-lang/crates.io-index"
 82 | checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73"
 83 | dependencies = [
 84 |  "libc",
 85 |  "num-integer",
 86 |  "num-traits",
 87 |  "serde",
 88 |  "time 0.1.44",
 89 |  "winapi",
 90 | ]
 91 | 
 92 | [[package]]
 93 | name = "clap"
 94 | version = "2.33.3"
 95 | source = "registry+https://github.com/rust-lang/crates.io-index"
 96 | checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002"
 97 | dependencies = [
 98 |  "ansi_term",
 99 |  "atty",
100 |  "bitflags",
101 |  "strsim",
102 |  "textwrap",
103 |  "unicode-width",
104 |  "vec_map",
105 | ]
106 | 
107 | [[package]]
108 | name = "crc32fast"
109 | version = "1.2.1"
110 | source = "registry+https://github.com/rust-lang/crates.io-index"
111 | checksum = "81156fece84ab6a9f2afdb109ce3ae577e42b1228441eded99bd77f627953b1a"
112 | dependencies = [
113 |  "cfg-if",
114 | ]
115 | 
116 | [[package]]
117 | name = "crossbeam"
118 | version = "0.8.1"
119 | source = "registry+https://github.com/rust-lang/crates.io-index"
120 | checksum = "4ae5588f6b3c3cb05239e90bd110f257254aecd01e4635400391aeae07497845"
121 | dependencies = [
122 |  "cfg-if",
123 |  "crossbeam-channel",
124 |  "crossbeam-deque",
125 |  "crossbeam-epoch",
126 |  "crossbeam-queue",
127 |  "crossbeam-utils",
128 | ]
129 | 
130 | [[package]]
131 | name = "crossbeam-channel"
132 | version = "0.5.0"
133 | source = "registry+https://github.com/rust-lang/crates.io-index"
134 | checksum = "dca26ee1f8d361640700bde38b2c37d8c22b3ce2d360e1fc1c74ea4b0aa7d775"
135 | dependencies = [
136 |  "cfg-if",
137 |  "crossbeam-utils",
138 | ]
139 | 
140 | [[package]]
141 | name = "crossbeam-deque"
142 | version = "0.8.0"
143 | source = "registry+https://github.com/rust-lang/crates.io-index"
144 | checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9"
145 | dependencies = [
146 |  "cfg-if",
147 |  "crossbeam-epoch",
148 |  "crossbeam-utils",
149 | ]
150 | 
151 | [[package]]
152 | name = "crossbeam-epoch"
153 | version = "0.9.7"
154 | source = "registry+https://github.com/rust-lang/crates.io-index"
155 | checksum = "c00d6d2ea26e8b151d99093005cb442fb9a37aeaca582a03ec70946f49ab5ed9"
156 | dependencies = [
157 |  "cfg-if",
158 |  "crossbeam-utils",
159 |  "lazy_static",
160 |  "memoffset",
161 |  "scopeguard",
162 | ]
163 | 
164 | [[package]]
165 | name = "crossbeam-queue"
166 | version = "0.3.4"
167 | source = "registry+https://github.com/rust-lang/crates.io-index"
168 | checksum = "4dd435b205a4842da59efd07628f921c096bc1cc0a156835b4fa0bcb9a19bcce"
169 | dependencies = [
170 |  "cfg-if",
171 |  "crossbeam-utils",
172 | ]
173 | 
174 | [[package]]
175 | name = "crossbeam-utils"
176 | version = "0.8.7"
177 | source = "registry+https://github.com/rust-lang/crates.io-index"
178 | checksum = "b5e5bed1f1c269533fa816a0a5492b3545209a205ca1a54842be180eb63a16a6"
179 | dependencies = [
180 |  "cfg-if",
181 |  "lazy_static",
182 | ]
183 | 
184 | [[package]]
185 | name = "data-encoding"
186 | version = "2.3.2"
187 | source = "registry+https://github.com/rust-lang/crates.io-index"
188 | checksum = "3ee2393c4a91429dffb4bedf19f4d6abf27d8a732c8ce4980305d782e5426d57"
189 | 
190 | [[package]]
191 | name = "env_logger"
192 | version = "0.9.0"
193 | source = "registry+https://github.com/rust-lang/crates.io-index"
194 | checksum = "0b2cf0344971ee6c64c31be0d530793fba457d322dfec2810c453d0ef228f9c3"
195 | dependencies = [
196 |  "atty",
197 |  "humantime",
198 |  "log",
199 |  "regex",
200 |  "termcolor",
201 | ]
202 | 
203 | [[package]]
204 | name = "fallible-iterator"
205 | version = "0.2.0"
206 | source = "registry+https://github.com/rust-lang/crates.io-index"
207 | checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7"
208 | 
209 | [[package]]
210 | name = "fallible-streaming-iterator"
211 | version = "0.1.9"
212 | source = "registry+https://github.com/rust-lang/crates.io-index"
213 | checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
214 | 
215 | [[package]]
216 | name = "flate2"
217 | version = "1.0.22"
218 | source = "registry+https://github.com/rust-lang/crates.io-index"
219 | checksum = "1e6988e897c1c9c485f43b47a529cef42fde0547f9d8d41a7062518f1d8fc53f"
220 | dependencies = [
221 |  "cfg-if",
222 |  "crc32fast",
223 |  "libc",
224 |  "miniz_oxide",
225 | ]
226 | 
227 | [[package]]
228 | name = "foreign-types"
229 | version = "0.3.2"
230 | source = "registry+https://github.com/rust-lang/crates.io-index"
231 | checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
232 | dependencies = [
233 |  "foreign-types-shared",
234 | ]
235 | 
236 | [[package]]
237 | name = "foreign-types-shared"
238 | version = "0.1.1"
239 | source = "registry+https://github.com/rust-lang/crates.io-index"
240 | checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
241 | 
242 | [[package]]
243 | name = "fuchsia-cprng"
244 | version = "0.1.1"
245 | source = "registry+https://github.com/rust-lang/crates.io-index"
246 | checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
247 | 
248 | [[package]]
249 | name = "getrandom"
250 | version = "0.2.4"
251 | source = "registry+https://github.com/rust-lang/crates.io-index"
252 | checksum = "418d37c8b1d42553c93648be529cb70f920d3baf8ef469b74b9638df426e0b4c"
253 | dependencies = [
254 |  "cfg-if",
255 |  "libc",
256 |  "wasi",
257 | ]
258 | 
259 | [[package]]
260 | name = "hashbrown"
261 | version = "0.11.2"
262 | source = "registry+https://github.com/rust-lang/crates.io-index"
263 | checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
264 | dependencies = [
265 |  "ahash",
266 | ]
267 | 
268 | [[package]]
269 | name = "hashlink"
270 | version = "0.7.0"
271 | source = "registry+https://github.com/rust-lang/crates.io-index"
272 | checksum = "7249a3129cbc1ffccd74857f81464a323a152173cdb134e0fd81bc803b29facf"
273 | dependencies = [
274 |  "hashbrown",
275 | ]
276 | 
277 | [[package]]
278 | name = "heck"
279 | version = "0.3.2"
280 | source = "registry+https://github.com/rust-lang/crates.io-index"
281 | checksum = "87cbf45460356b7deeb5e3415b5563308c0a9b057c85e12b06ad551f98d0a6ac"
282 | dependencies = [
283 |  "unicode-segmentation",
284 | ]
285 | 
286 | [[package]]
287 | name = "hermit-abi"
288 | version = "0.1.18"
289 | source = "registry+https://github.com/rust-lang/crates.io-index"
290 | checksum = "322f4de77956e22ed0e5032c359a0f1273f1f7f0d79bfa3b8ffbc730d7fbcc5c"
291 | dependencies = [
292 |  "libc",
293 | ]
294 | 
295 | [[package]]
296 | name = "humantime"
297 | version = "2.1.0"
298 | source = "registry+https://github.com/rust-lang/crates.io-index"
299 | checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
300 | 
301 | [[package]]
302 | name = "itoa"
303 | version = "0.4.7"
304 | source = "registry+https://github.com/rust-lang/crates.io-index"
305 | checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736"
306 | 
307 | [[package]]
308 | name = "jobserver"
309 | version = "0.1.24"
310 | source = "registry+https://github.com/rust-lang/crates.io-index"
311 | checksum = "af25a77299a7f711a01975c35a6a424eb6862092cc2d6c72c4ed6cbc56dfc1fa"
312 | dependencies = [
313 |  "libc",
314 | ]
315 | 
316 | [[package]]
317 | name = "lazy_static"
318 | version = "1.4.0"
319 | source = "registry+https://github.com/rust-lang/crates.io-index"
320 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
321 | 
322 | [[package]]
323 | name = "libc"
324 | version = "0.2.117"
325 | source = "registry+https://github.com/rust-lang/crates.io-index"
326 | checksum = "e74d72e0f9b65b5b4ca49a346af3976df0f9c61d550727f349ecd559f251a26c"
327 | 
328 | [[package]]
329 | name = "libsqlite3-sys"
330 | version = "0.23.2"
331 | source = "registry+https://github.com/rust-lang/crates.io-index"
332 | checksum = "d2cafc7c74096c336d9d27145f7ebd4f4b6f95ba16aa5a282387267e6925cb58"
333 | dependencies = [
334 |  "pkg-config",
335 |  "vcpkg",
336 | ]
337 | 
338 | [[package]]
339 | name = "log"
340 | version = "0.4.14"
341 | source = "registry+https://github.com/rust-lang/crates.io-index"
342 | checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
343 | dependencies = [
344 |  "cfg-if",
345 | ]
346 | 
347 | [[package]]
348 | name = "memchr"
349 | version = "2.4.1"
350 | source = "registry+https://github.com/rust-lang/crates.io-index"
351 | checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
352 | 
353 | [[package]]
354 | name = "memoffset"
355 | version = "0.6.1"
356 | source = "registry+https://github.com/rust-lang/crates.io-index"
357 | checksum = "157b4208e3059a8f9e78d559edc658e13df41410cb3ae03979c83130067fdd87"
358 | dependencies = [
359 |  "autocfg",
360 | ]
361 | 
362 | [[package]]
363 | name = "miniz_oxide"
364 | version = "0.4.4"
365 | source = "registry+https://github.com/rust-lang/crates.io-index"
366 | checksum = "a92518e98c078586bc6c934028adcca4c92a53d6a958196de835170a01d84e4b"
367 | dependencies = [
368 |  "adler",
369 |  "autocfg",
370 | ]
371 | 
372 | [[package]]
373 | name = "num-integer"
374 | version = "0.1.44"
375 | source = "registry+https://github.com/rust-lang/crates.io-index"
376 | checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db"
377 | dependencies = [
378 |  "autocfg",
379 |  "num-traits",
380 | ]
381 | 
382 | [[package]]
383 | name = "num-traits"
384 | version = "0.2.14"
385 | source = "registry+https://github.com/rust-lang/crates.io-index"
386 | checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
387 | dependencies = [
388 |  "autocfg",
389 | ]
390 | 
391 | [[package]]
392 | name = "num_cpus"
393 | version = "1.13.1"
394 | source = "registry+https://github.com/rust-lang/crates.io-index"
395 | checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1"
396 | dependencies = [
397 |  "hermit-abi",
398 |  "libc",
399 | ]
400 | 
401 | [[package]]
402 | name = "num_threads"
403 | version = "0.1.3"
404 | source = "registry+https://github.com/rust-lang/crates.io-index"
405 | checksum = "97ba99ba6393e2c3734791401b66902d981cb03bf190af674ca69949b6d5fb15"
406 | dependencies = [
407 |  "libc",
408 | ]
409 | 
410 | [[package]]
411 | name = "once_cell"
412 | version = "1.9.0"
413 | source = "registry+https://github.com/rust-lang/crates.io-index"
414 | checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5"
415 | 
416 | [[package]]
417 | name = "openssl"
418 | version = "0.10.38"
419 | source = "registry+https://github.com/rust-lang/crates.io-index"
420 | checksum = "0c7ae222234c30df141154f159066c5093ff73b63204dcda7121eb082fc56a95"
421 | dependencies = [
422 |  "bitflags",
423 |  "cfg-if",
424 |  "foreign-types",
425 |  "libc",
426 |  "once_cell",
427 |  "openssl-sys",
428 | ]
429 | 
430 | [[package]]
431 | name = "openssl-sys"
432 | version = "0.9.72"
433 | source = "registry+https://github.com/rust-lang/crates.io-index"
434 | checksum = "7e46109c383602735fa0a2e48dd2b7c892b048e1bf69e5c3b1d804b7d9c203cb"
435 | dependencies = [
436 |  "autocfg",
437 |  "cc",
438 |  "libc",
439 |  "pkg-config",
440 |  "vcpkg",
441 | ]
442 | 
443 | [[package]]
444 | name = "pkg-config"
445 | version = "0.3.19"
446 | source = "registry+https://github.com/rust-lang/crates.io-index"
447 | checksum = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c"
448 | 
449 | [[package]]
450 | name = "proc-macro-error"
451 | version = "1.0.4"
452 | source = "registry+https://github.com/rust-lang/crates.io-index"
453 | checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
454 | dependencies = [
455 |  "proc-macro-error-attr",
456 |  "proc-macro2",
457 |  "quote",
458 |  "syn",
459 |  "version_check",
460 | ]
461 | 
462 | [[package]]
463 | name = "proc-macro-error-attr"
464 | version = "1.0.4"
465 | source = "registry+https://github.com/rust-lang/crates.io-index"
466 | checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
467 | dependencies = [
468 |  "proc-macro2",
469 |  "quote",
470 |  "version_check",
471 | ]
472 | 
473 | [[package]]
474 | name = "proc-macro2"
475 | version = "1.0.24"
476 | source = "registry+https://github.com/rust-lang/crates.io-index"
477 | checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71"
478 | dependencies = [
479 |  "unicode-xid",
480 | ]
481 | 
482 | [[package]]
483 | name = "quote"
484 | version = "1.0.9"
485 | source = "registry+https://github.com/rust-lang/crates.io-index"
486 | checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7"
487 | dependencies = [
488 |  "proc-macro2",
489 | ]
490 | 
491 | [[package]]
492 | name = "rand"
493 | version = "0.4.6"
494 | source = "registry+https://github.com/rust-lang/crates.io-index"
495 | checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293"
496 | dependencies = [
497 |  "fuchsia-cprng",
498 |  "libc",
499 |  "rand_core 0.3.1",
500 |  "rdrand",
501 |  "winapi",
502 | ]
503 | 
504 | [[package]]
505 | name = "rand_core"
506 | version = "0.3.1"
507 | source = "registry+https://github.com/rust-lang/crates.io-index"
508 | checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b"
509 | dependencies = [
510 |  "rand_core 0.4.2",
511 | ]
512 | 
513 | [[package]]
514 | name = "rand_core"
515 | version = "0.4.2"
516 | source = "registry+https://github.com/rust-lang/crates.io-index"
517 | checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc"
518 | 
519 | [[package]]
520 | name = "rdrand"
521 | version = "0.4.0"
522 | source = "registry+https://github.com/rust-lang/crates.io-index"
523 | checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
524 | dependencies = [
525 |  "rand_core 0.3.1",
526 | ]
527 | 
528 | [[package]]
529 | name = "regex"
530 | version = "1.5.4"
531 | source = "registry+https://github.com/rust-lang/crates.io-index"
532 | checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461"
533 | dependencies = [
534 |  "aho-corasick",
535 |  "memchr",
536 |  "regex-syntax",
537 | ]
538 | 
539 | [[package]]
540 | name = "regex-syntax"
541 | version = "0.6.25"
542 | source = "registry+https://github.com/rust-lang/crates.io-index"
543 | checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
544 | 
545 | [[package]]
546 | name = "remove_dir_all"
547 | version = "0.5.3"
548 | source = "registry+https://github.com/rust-lang/crates.io-index"
549 | checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
550 | dependencies = [
551 |  "winapi",
552 | ]
553 | 
554 | [[package]]
555 | name = "rsure"
556 | version = "0.10.0-dev"
557 | dependencies = [
558 |  "chrono",
559 |  "crossbeam",
560 |  "data-encoding",
561 |  "env_logger",
562 |  "flate2",
563 |  "lazy_static",
564 |  "libc",
565 |  "log",
566 |  "num_cpus",
567 |  "openssl",
568 |  "regex",
569 |  "rusqlite",
570 |  "structopt",
571 |  "tempdir",
572 |  "thiserror",
573 |  "time 0.3.7",
574 |  "weave",
575 |  "zstd",
576 | ]
577 | 
578 | [[package]]
579 | name = "rusqlite"
580 | version = "0.26.3"
581 | source = "registry+https://github.com/rust-lang/crates.io-index"
582 | checksum = "4ba4d3462c8b2e4d7f4fcfcf2b296dc6b65404fbbc7b63daa37fd485c149daf7"
583 | dependencies = [
584 |  "bitflags",
585 |  "fallible-iterator",
586 |  "fallible-streaming-iterator",
587 |  "hashlink",
588 |  "libsqlite3-sys",
589 |  "memchr",
590 |  "smallvec",
591 | ]
592 | 
593 | [[package]]
594 | name = "ryu"
595 | version = "1.0.5"
596 | source = "registry+https://github.com/rust-lang/crates.io-index"
597 | checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
598 | 
599 | [[package]]
600 | name = "scopeguard"
601 | version = "1.1.0"
602 | source = "registry+https://github.com/rust-lang/crates.io-index"
603 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
604 | 
605 | [[package]]
606 | name = "serde"
607 | version = "1.0.123"
608 | source = "registry+https://github.com/rust-lang/crates.io-index"
609 | checksum = "92d5161132722baa40d802cc70b15262b98258453e85e5d1d365c757c73869ae"
610 | 
611 | [[package]]
612 | name = "serde_derive"
613 | version = "1.0.123"
614 | source = "registry+https://github.com/rust-lang/crates.io-index"
615 | checksum = "9391c295d64fc0abb2c556bad848f33cb8296276b1ad2677d1ae1ace4f258f31"
616 | dependencies = [
617 |  "proc-macro2",
618 |  "quote",
619 |  "syn",
620 | ]
621 | 
622 | [[package]]
623 | name = "serde_json"
624 | version = "1.0.63"
625 | source = "registry+https://github.com/rust-lang/crates.io-index"
626 | checksum = "43535db9747a4ba938c0ce0a98cc631a46ebf943c9e1d604e091df6007620bf6"
627 | dependencies = [
628 |  "itoa",
629 |  "ryu",
630 |  "serde",
631 | ]
632 | 
633 | [[package]]
634 | name = "smallvec"
635 | version = "1.6.1"
636 | source = "registry+https://github.com/rust-lang/crates.io-index"
637 | checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e"
638 | 
639 | [[package]]
640 | name = "strsim"
641 | version = "0.8.0"
642 | source = "registry+https://github.com/rust-lang/crates.io-index"
643 | checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
644 | 
645 | [[package]]
646 | name = "structopt"
647 | version = "0.3.26"
648 | source = "registry+https://github.com/rust-lang/crates.io-index"
649 | checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10"
650 | dependencies = [
651 |  "clap",
652 |  "lazy_static",
653 |  "structopt-derive",
654 | ]
655 | 
656 | [[package]]
657 | name = "structopt-derive"
658 | version = "0.4.18"
659 | source = "registry+https://github.com/rust-lang/crates.io-index"
660 | checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0"
661 | dependencies = [
662 |  "heck",
663 |  "proc-macro-error",
664 |  "proc-macro2",
665 |  "quote",
666 |  "syn",
667 | ]
668 | 
669 | [[package]]
670 | name = "syn"
671 | version = "1.0.60"
672 | source = "registry+https://github.com/rust-lang/crates.io-index"
673 | checksum = "c700597eca8a5a762beb35753ef6b94df201c81cca676604f547495a0d7f0081"
674 | dependencies = [
675 |  "proc-macro2",
676 |  "quote",
677 |  "unicode-xid",
678 | ]
679 | 
680 | [[package]]
681 | name = "tempdir"
682 | version = "0.3.7"
683 | source = "registry+https://github.com/rust-lang/crates.io-index"
684 | checksum = "15f2b5fb00ccdf689e0149d1b1b3c03fead81c2b37735d812fa8bddbbf41b6d8"
685 | dependencies = [
686 |  "rand",
687 |  "remove_dir_all",
688 | ]
689 | 
690 | [[package]]
691 | name = "termcolor"
692 | version = "1.1.2"
693 | source = "registry+https://github.com/rust-lang/crates.io-index"
694 | checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4"
695 | dependencies = [
696 |  "winapi-util",
697 | ]
698 | 
699 | [[package]]
700 | name = "textwrap"
701 | version = "0.11.0"
702 | source = "registry+https://github.com/rust-lang/crates.io-index"
703 | checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
704 | dependencies = [
705 |  "unicode-width",
706 | ]
707 | 
708 | [[package]]
709 | name = "thiserror"
710 | version = "1.0.30"
711 | source = "registry+https://github.com/rust-lang/crates.io-index"
712 | checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417"
713 | dependencies = [
714 |  "thiserror-impl",
715 | ]
716 | 
717 | [[package]]
718 | name = "thiserror-impl"
719 | version = "1.0.30"
720 | source = "registry+https://github.com/rust-lang/crates.io-index"
721 | checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b"
722 | dependencies = [
723 |  "proc-macro2",
724 |  "quote",
725 |  "syn",
726 | ]
727 | 
728 | [[package]]
729 | name = "time"
730 | version = "0.1.44"
731 | source = "registry+https://github.com/rust-lang/crates.io-index"
732 | checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255"
733 | dependencies = [
734 |  "libc",
735 |  "wasi",
736 |  "winapi",
737 | ]
738 | 
739 | [[package]]
740 | name = "time"
741 | version = "0.3.7"
742 | source = "registry+https://github.com/rust-lang/crates.io-index"
743 | checksum = "004cbc98f30fa233c61a38bc77e96a9106e65c88f2d3bef182ae952027e5753d"
744 | dependencies = [
745 |  "libc",
746 |  "num_threads",
747 | ]
748 | 
749 | [[package]]
750 | name = "unicode-segmentation"
751 | version = "1.7.1"
752 | source = "registry+https://github.com/rust-lang/crates.io-index"
753 | checksum = "bb0d2e7be6ae3a5fa87eed5fb451aff96f2573d2694942e40543ae0bbe19c796"
754 | 
755 | [[package]]
756 | name = "unicode-width"
757 | version = "0.1.8"
758 | source = "registry+https://github.com/rust-lang/crates.io-index"
759 | checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3"
760 | 
761 | [[package]]
762 | name = "unicode-xid"
763 | version = "0.2.1"
764 | source = "registry+https://github.com/rust-lang/crates.io-index"
765 | checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564"
766 | 
767 | [[package]]
768 | name = "vcpkg"
769 | version = "0.2.11"
770 | source = "registry+https://github.com/rust-lang/crates.io-index"
771 | checksum = "b00bca6106a5e23f3eee943593759b7fcddb00554332e856d990c893966879fb"
772 | 
773 | [[package]]
774 | name = "vec_map"
775 | version = "0.8.2"
776 | source = "registry+https://github.com/rust-lang/crates.io-index"
777 | checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
778 | 
779 | [[package]]
780 | name = "version_check"
781 | version = "0.9.2"
782 | source = "registry+https://github.com/rust-lang/crates.io-index"
783 | checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed"
784 | 
785 | [[package]]
786 | name = "wasi"
787 | version = "0.10.0+wasi-snapshot-preview1"
788 | source = "registry+https://github.com/rust-lang/crates.io-index"
789 | checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
790 | 
791 | [[package]]
792 | name = "weave"
793 | version = "0.4.0-dev"
794 | dependencies = [
795 |  "chrono",
796 |  "flate2",
797 |  "log",
798 |  "regex",
799 |  "serde",
800 |  "serde_derive",
801 |  "serde_json",
802 |  "thiserror",
803 |  "zstd",
804 | ]
805 | 
806 | [[package]]
807 | name = "winapi"
808 | version = "0.3.9"
809 | source = "registry+https://github.com/rust-lang/crates.io-index"
810 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
811 | dependencies = [
812 |  "winapi-i686-pc-windows-gnu",
813 |  "winapi-x86_64-pc-windows-gnu",
814 | ]
815 | 
816 | [[package]]
817 | name = "winapi-i686-pc-windows-gnu"
818 | version = "0.4.0"
819 | source = "registry+https://github.com/rust-lang/crates.io-index"
820 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
821 | 
822 | [[package]]
823 | name = "winapi-util"
824 | version = "0.1.5"
825 | source = "registry+https://github.com/rust-lang/crates.io-index"
826 | checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
827 | dependencies = [
828 |  "winapi",
829 | ]
830 | 
831 | [[package]]
832 | name = "winapi-x86_64-pc-windows-gnu"
833 | version = "0.4.0"
834 | source = "registry+https://github.com/rust-lang/crates.io-index"
835 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
836 | 
837 | [[package]]
838 | name = "zstd"
839 | version = "0.10.0+zstd.1.5.2"
840 | source = "registry+https://github.com/rust-lang/crates.io-index"
841 | checksum = "3b1365becbe415f3f0fcd024e2f7b45bacfb5bdd055f0dc113571394114e7bdd"
842 | dependencies = [
843 |  "zstd-safe",
844 | ]
845 | 
846 | [[package]]
847 | name = "zstd-safe"
848 | version = "4.1.4+zstd.1.5.2"
849 | source = "registry+https://github.com/rust-lang/crates.io-index"
850 | checksum = "2f7cd17c9af1a4d6c24beb1cc54b17e2ef7b593dc92f19e9d9acad8b182bbaee"
851 | dependencies = [
852 |  "libc",
853 |  "zstd-sys",
854 | ]
855 | 
856 | [[package]]
857 | name = "zstd-sys"
858 | version = "1.6.3+zstd.1.5.2"
859 | source = "registry+https://github.com/rust-lang/crates.io-index"
860 | checksum = "fc49afa5c8d634e75761feda8c592051e7eeb4683ba827211eb0d731d3402ea8"
861 | dependencies = [
862 |  "cc",
863 |  "libc",
864 | ]
865 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rsure"
 3 | version = "0.10.0-dev"
 4 | authors = ["David Brown <davidb@davidb.org>"]
 5 | description = """
 6 | Rsure captures hashes and metadata about a tree of files, and can
 7 | later verify these hashes and metadata to determine if the files have
 8 | changed.  It supports incremental updates, and includes an standalone
 9 | executable for scanning and checking trees.
10 | """
11 | license = "MIT"
12 | readme = "README.md"
13 | repository = "https://github.com/d3zd3z/rsure"
14 | edition = "2018"
15 | 
16 | exclude = [
17 |     "2sure.*.gz"
18 | ]
19 | 
20 | [dependencies]
21 | chrono = "0.4"
22 | crossbeam = "0.8"
23 | data-encoding = "2.1.1"
24 | flate2 = "1.0"
25 | lazy_static = "1.4"
26 | libc = "0.2.11"
27 | log = "0.4.6"  # 0.4.6 needed to fix problem with named macro imports.
28 | # rsure-naming = { path = "naming", version = "0.1.0" }
29 | num_cpus = "1.10"
30 | openssl = "0.10"
31 | regex = "1.5"
32 | rusqlite = "0.26"
33 | structopt = "0.3"
34 | tempdir = "0.3"
35 | thiserror = "1.0"
36 | time = "0.3"
37 | weave = { path = "weave", version = "0.4.0-dev" }
38 | zstd = "0.10"
39 | 
40 | # This will go away
41 | env_logger = "0.9"
42 | 
43 | [[bin]]
44 | name = "rsure"
45 | test = false
46 | doc = false
47 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015 David Brown
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | ## Release 0.9
 4 | 
 5 | This is a fairly major release, with most changes under the hood.
 6 | There are some minor updates in dependencies.
 7 | 
 8 | The underlying mechanism for updating sure files has been rewritten.
 9 | Earlier versions of rsure would read an entire tree into memory, and
10 | perform updates on this in-memory structure.  For large directory
11 | trees, this could take up a lot of memory.  The new version always
12 | performs scans in a linear manner, and updates are performed using a
13 | few temp files (where the rsure file is).  This slightly increases the
14 | space needed where the surefile is stored, but greatly reduces memory
15 | usage.
16 | 
17 | By recording hash updates to a temporary sqlite database, we can now
18 | perform these hash updates in parallel, using multiple cores.  On fast
19 | disks, this can result in a speed improvement.
20 | 
21 | Other than the lower memory usage, this change shouldn't be visible to
22 | users of rsure.
23 | 
24 | ## Release 0.8.2
25 | 
26 | This is a minor release with some minor improvements, mostly having to
27 | do with moving to Rust 2018.  As of this release, Rust 2018 is
28 | required to build Rsure.
29 | 
30 | In addition to the 2018 changes, this release also makes some
31 | improvements to the progress meter.  If the client of the library uses
32 | `log_init` to initialize the logging system, the progress meter will
33 | cooperate with the logging system to present a clean, and frequently
34 | updated message.  Otherwise, the meter will remain as before, only
35 | updated ever 5 seconds.
36 | 
37 | This also adds a separate progress meter to indicate the status of the
38 | initial filesystem scan.  With large trees, this scan can take some
39 | time, and the meter is a useful indicator of what is happening.
40 | 
41 | ## Release 0.8.1
42 | 
43 | This is a minor release that updates the versions of child dependencies.
44 | 
45 | ## Release 0.8
46 | 
47 | Release 0.8 of rsure makes some notable changes to the library and
48 | commandline tool.  The most significant change is that the 'weave'
49 | format is the primary format that deltas are stored in.  Instead of
50 | distinguishing the old files by 2sure.weave.gz and 2sure.dat.gz, weave
51 | files are just called 2sure.dat.gz.  It may be possible to add format
52 | detection to detect the old format, but I suspect there isn't a lot of
53 | use of these files anyway.
54 | 
55 | Other changes:
56 | - Move to 'failure' instead of 'error-chain'.
57 | - Bump many dependencies.
58 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Rsure file integrity
  2 | 
  3 | [![Build Status](https://travis-ci.org/d3zd3z/rsure.svg?branch=master)](https://travis-ci.org/d3zd3z/rsure)
  4 | 
  5 | It has been said that backups aren't useful unless you've tested them.
  6 | But, how does one know that a test restore actually worked?  Rsure is
  7 | designed to help with this.
  8 | 
  9 | ## History
 10 | 
 11 | The md5sum program captures the MD5 hash of a set of files.  It can
 12 | also read this output and compare the hashes against the files.  By
 13 | capturing the hashes before the backup, and comparing them after a
 14 | test restore, you can gain a bit of confidence that the contents of
 15 | files is at least correct.
 16 | 
 17 | However, this doesn't capture the permissions and other attributes of
 18 | the files.  Sometimes a restore can fail for this kind of reason.
 19 | 
 20 | ## Intrusion detection
 21 | 
 22 | There have been several similar solutions focused on intrusion
 23 | detection.  Tripwire and FreeVeracity (or Veracity) come to mind.  The
 24 | idea is that the files are compared in place to verify that nobody has
 25 | modified them.
 26 | 
 27 | Unfortunately, at least tripwire seems to focus so heavily on this
 28 | intrusion detection problem, that the tool doesn't work very well for
 29 | verifying backups.  It really wants a central database, and to use
 30 | files by absolute pathname.  FreeVeracity was quite useful for
 31 | verifying backups, however, it appears to have vanished entirely (it
 32 | was under an unusual license).
 33 | 
 34 | ### Incremental updates
 35 | 
 36 | One thing that none of these solutions addressed was that of
 37 | incremental updates, probably because of the focus on intrusion
 38 | detection.  In a normal running system, the POSIX *ctime* field can be
 39 | reliably used to determine if a file has been modified.  By making use
 40 | of this, the integrity program can avoid recomputing hashes of files
 41 | that haven't changed.  This strategy is similar to what most backup
 42 | software does as well.  This is important, because taking the time to
 43 | hash every file can make the integrity update take so long that people
 44 | avoid running it.  Full hashing is impractical for the same reasons
 45 | that regular full backups are usually impractical.
 46 | 
 47 | # Using rsure
 48 | 
 49 | ## Getting it
 50 | 
 51 | Rsure is written in [Rust][rust].  It began as
 52 | an exercise to determine how useful Rust is for a systems-type
 53 | program, and has shown to be the easiest implementation to develop and
 54 | maintain.
 55 | 
 56 | [rust]: http://www.rust-lang.org/ "The Rust Programming Language"
 57 | 
 58 | Once you have installed rust (and cargo) using either the rust
 59 | installer, rustup, or your distro's packaging system, building it is
 60 | as easy as:
 61 | 
 62 | ```shell
 63 | $ cargo build --release
 64 | ```
 65 | 
 66 | within the Rsure directory.  The `--release` flag is important,
 67 | otherwise the performance is poor.  You can install or link to
 68 | `./target/release/rsure` for the executable.  It may also be possible
 69 | to use `cargo install` to install sure directly.
 70 | 
 71 | ## Basic usage
 72 | 
 73 | Change to a directory you wish to keep integrity for, for example, my
 74 | home directory:
 75 | 
 76 | ```shell
 77 | $ cd
 78 | $ rsure scan
 79 | ```
 80 | 
 81 | This will scan the filesystem (possibly showing progress), and leave a
 82 | `2sure.dat.gz` (the 2sure is historical, FreeVeracity used a name
 83 | starting with a 0, and having the digit makes it near the beginning of
 84 | a directory listing).  You can view this file if you'd like.  The
 85 | format is somewhat readable.
 86 | 
 87 | Then you can do:
 88 | 
 89 | ```shell
 90 | $ rsure check
 91 | ```
 92 | 
 93 | to verify the directory.  This will show any differences.  If you back
 94 | up this file with your data, you can run `rsure` after a restore to
 95 | check if the backup is correct.
 96 | 
 97 | Later, you can run:
 98 | 
 99 | ```shell
100 | $ rsure update
101 | ```
102 | 
103 | which will update the `2sure.dat.gz` file with the new data.  Rust
104 | uses a "weave" format to hold multiple revisions efficiently in the
105 | same file.  The update command will refresh the hashes of any files
106 | that have changed.  After this, you can run:
107 | 
108 | ```shell
109 | $ rsure signoff
110 | ```
111 | 
112 | to compare the old scan with the current, and report on what has
113 | changed between them.
114 | 


--------------------------------------------------------------------------------
/benches/hash.rs:
--------------------------------------------------------------------------------
 1 | // Benchmark our hashing function.
 2 | 
 3 | #![feature(test)]
 4 | 
 5 | extern crate openssl;
 6 | extern crate rsure;
 7 | extern crate tempdir;
 8 | extern crate test;
 9 | // extern crate sha1;
10 | 
11 | use rsure::{Progress, SureHash};
12 | use std::fs::File;
13 | use std::io::Write;
14 | use tempdir::TempDir;
15 | use test::Bencher;
16 | 
17 | // To compute hashing speed, use 1 over the benchmark time in seconds, and
18 | // then multiply the result by the number of iterations in the 'for i'
19 | // loop.  For example, if the benchmark runs in 29,924,583 ns/iter, and the
20 | // count is 16, that would be about 534 MiB/sec hash performance.
21 | //
22 | // The loop count should be large enough to overflow the CPU's largest
23 | // cache, with the value 16 (16MiB) overflowing the 8MiB cache on the Core
24 | // i7-950 I wrote this on.
25 | #[bench]
26 | fn tree_mb_bench(b: &mut Bencher) {
27 |     let tmp = TempDir::new("rsure-bench").unwrap();
28 |     for i in 0..16 {
29 |         let name = format!("large-{}", i);
30 |         let mut fd = File::create(tmp.path().join(&name)).unwrap();
31 |         let buf = vec![0; 1024];
32 |         for _ in 0..1024 {
33 |             fd.write_all(&buf).unwrap();
34 |         }
35 |     }
36 | 
37 |     b.iter(|| {
38 |         let mut tree = rsure::scan_fs(tmp.path()).unwrap();
39 |         let estimate = tree.hash_estimate();
40 |         let mut progress = Progress::new(estimate.files, estimate.bytes);
41 |         tree.hash_update(tmp.path(), &mut progress);
42 |         // progress.flush();
43 |     })
44 | }
45 | 
46 | #[bench]
47 | fn openssl_bench(b: &mut Bencher) {
48 |     use openssl::hash::{Hasher, MessageDigest};
49 | 
50 |     // Make buffer big enough to not fit in cache.
51 |     let buf = vec![0; 1024 * 1024 * 16];
52 | 
53 |     b.iter(|| {
54 |         let mut h = Hasher::new(MessageDigest::sha1()).unwrap();
55 |         h.write_all(&buf).unwrap();
56 |         h.finish().unwrap();
57 |     })
58 | }
59 | 
60 | /* Bring in the SHA1 crate.  Currently, it seems to be about 4.2 times slower than the openssl one.
61 |  */
62 | /*
63 | #[bench]
64 | fn sha1_bench(b: &mut Bencher) {
65 |     use sha1::Sha1;
66 | 
67 |     // Make buffer big enough to not fit in cache.
68 |     let buf = vec![0; 1024 * 1024 * 16];
69 | 
70 |     b.iter(|| {
71 |         let mut h = Sha1::new();
72 |         h.update(&buf);
73 |         let _ = h.digest();
74 |     })
75 | }
76 | */
77 | 


--------------------------------------------------------------------------------
/default.nix:
--------------------------------------------------------------------------------
 1 | /* { stdenv, pkgs, fetchFromGitHub, rustPlatform }: */
 2 | 
 3 | with import <nixpkgs> {};
 4 | rustPlatform.buildRustPackage rec {
 5 |   pname = "rsure";
 6 |   version = "0.9.4";
 7 | 
 8 |   src = fetchFromGitHub {
 9 |     owner = "tangybbq";
10 |     repo = pname;
11 |     rev = "v0.9.4";
12 |     sha256 = "sha256:0bx0l2q64ma057l2wwvsnbgl8jr6szanfwr5311lqqzvp4r4kaqy";
13 |   };
14 | 
15 |   cargoSha256 = "sha256:1bym7z2b3sw9g2hvixagir4bqh0389v9f2r66x2nf871683vc34y";
16 | 
17 |   nativeBuildInputs = [
18 |     pkgs.pkgconfig
19 |   ];
20 |   buildInputs = [ pkgs.openssl.dev pkgs.sqlite.dev ];
21 | 
22 |   meta = with lib; {
23 |     description = "A utility for ensuring file integrity";
24 |     homepage = "https://github.com/tangybbq/rsure";
25 |     license = with licenses; [ mit ];
26 |     maintainers = with maintainers; [ d3zd3z ];
27 |   };
28 | }
29 | 


--------------------------------------------------------------------------------
/etc/template-bk-readme.txt:
--------------------------------------------------------------------------------
 1 | This directory is an 'rsure' BitKeeper store.  Stored within the BitKeeper
 2 | data are surefiles that represent the state of one or more filesystems at
 3 | one or more points in time.  You can use BitKeeper to see what is here.
 4 | 
 5 |   bk changes -v
 6 | 
 7 | will show you the revisions.  You can verify a revision manually with
 8 | something like
 9 | 
10 |   bk co -r1.8 -p filename.dat | gzip > /tmp/filename.dat.gz
11 |   rsure check -d dirname -f /tmp/filename.dat.gz
12 | 


--------------------------------------------------------------------------------
/naming/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rsure-naming"
 3 | version = "0.1.0"
 4 | authors = ["David Brown <davidb@davidb.org>"]
 5 | edition = "2018"
 6 | description = """
 7 | rsure-naming implements a simple naming convention used by the 'rsure'
 8 | crate.  It manages an associated set of files, typically a data file,
 9 | a backup file, and zero or more temporary files.
10 | """
11 | 
12 | [dependencies]
13 | failure = "0.1.5"
14 | flate2 = "1.0"
15 | log = "0.4.6"
16 | 


--------------------------------------------------------------------------------
/naming/README.rst:
--------------------------------------------------------------------------------
1 | Naming convention support
2 | *************************
3 | 
4 | 


--------------------------------------------------------------------------------
/naming/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! A Naming manages a group of associated filenames.  All of these names
  2 | //! exist in a single directory, have a common basename, and various
  3 | //! suffixes.  It consists of the following names:
  4 | //!
  5 | //! *   path/base.dat.gz: The primary name
  6 | //! *   path/base.bak.gz: A backup file
  7 | //! *   path/base.0:      A temporary file
  8 | //! *   path/base.1.gz:   A compressed temporary file
  9 | //!
 10 | //! The client of this crate can determine with the primary and backup
 11 | //! names are compressed, and compression can be chosen for the temporary
 12 | //! files on a per-file basis.  If the compression matches the main name,
 13 | //! a temp file can be atomically renamed to the primary name.
 14 | //!
 15 | //! In addition to the management of the names, this module manages opening
 16 | //! and closing files associated with the names, as well as cleaning up
 17 | //! temporary files when the Naming goes out of scope.
 18 | 
 19 | use flate2::{write::GzEncoder, Compression};
 20 | use log::warn;
 21 | use std::{
 22 |     fs::{self, File, OpenOptions},
 23 |     io::{BufWriter, ErrorKind, Write},
 24 |     path::{Path, PathBuf},
 25 |     result,
 26 | };
 27 | 
 28 | /// Our local Result type.  Makes use of `failure::Error` to automatically
 29 | /// pass errors upward.
 30 | type Result<T> = result::Result<T, failure::Error>;
 31 | 
 32 | #[derive(Debug)]
 33 | pub struct Naming {
 34 |     // The directory for files to be written to.
 35 |     path: PathBuf,
 36 |     // The base part of the filename
 37 |     base: String,
 38 |     // The extension to use for the main name.
 39 |     ext: String,
 40 |     // Are the primary and backup files to be compressed?
 41 |     compressed: bool,
 42 | 
 43 |     // Track the next temp we try to open, avoids O(n^2) open calls.  This
 44 |     // is merely an optimization and shouldn't have observable behavior.
 45 |     next_temp: usize,
 46 | 
 47 |     // The naming convention can be instructed to cleanup files when it is
 48 |     // dropped.
 49 |     cleanup: Vec<PathBuf>,
 50 | }
 51 | 
 52 | /// Something that can be written to, that remembers its name.  The writer
 53 | /// is boxed to support various kinds of writers, including compressed.
 54 | pub struct NamedWriter {
 55 |     pub name: PathBuf,
 56 |     pub writer: Box<dyn Write>,
 57 | }
 58 | 
 59 | impl Naming {
 60 |     pub fn new<P: AsRef<Path>>(path: P, base: &str, ext: &str, compressed: bool) -> Naming {
 61 |         Naming {
 62 |             path: path.as_ref().to_path_buf(),
 63 |             base: base.to_string(),
 64 |             ext: ext.to_string(),
 65 |             compressed: compressed,
 66 |             next_temp: 0,
 67 |             cleanup: Vec::new(),
 68 |         }
 69 |     }
 70 | 
 71 |     pub fn make_name(&self, ext: &str, compressed: bool) -> PathBuf {
 72 |         let name = format!(
 73 |             "{}.{}{}",
 74 |             self.base,
 75 |             ext,
 76 |             if compressed { ".gz" } else { "" }
 77 |         );
 78 |         self.path.join(name)
 79 |     }
 80 | 
 81 |     /// Construct a temp file that matches the given naming.
 82 |     pub fn temp_file(&mut self, compressed: bool) -> Result<(PathBuf, File)> {
 83 |         let mut n = self.next_temp;
 84 |         loop {
 85 |             let name = self.make_name(&n.to_string(), compressed);
 86 |             self.next_temp = n + 1;
 87 | 
 88 |             match OpenOptions::new().write(true).create_new(true).open(&name) {
 89 |                 Ok(fd) => return Ok((name, fd)),
 90 |                 Err(ref e) if e.kind() == ErrorKind::AlreadyExists => (),
 91 |                 Err(e) => return Err(e.into()),
 92 |             }
 93 | 
 94 |             n += 1;
 95 |         }
 96 |     }
 97 | 
 98 |     /// Construct a temp file (as above), but if compression is requested,
 99 |     /// use a writer that compresses when writing.
100 |     pub fn new_temp(&mut self, compressed: bool) -> Result<NamedWriter> {
101 |         let (name, file) = self.temp_file(compressed)?;
102 |         let writer = if compressed {
103 |             // The GzEncoder does a measure of buffering.
104 |             // TODO: Do benchmarks to determine if buffing the result of
105 |             // the GzEncoder help.
106 |             Box::new(GzEncoder::new(file, Compression::default())) as Box<dyn Write>
107 |         } else {
108 |             Box::new(BufWriter::new(file)) as Box<dyn Write>
109 |         };
110 |         Ok(NamedWriter {
111 |             name: name,
112 |             writer: writer,
113 |         })
114 |     }
115 | 
116 |     /// Replace the main file with the given name.  This attempts to rename
117 |     /// the main name to the backup name, and then attempts to rename the
118 |     /// temp file to the main name.
119 |     pub fn rename_to_main(&self, name: &Path) -> Result<()> {
120 |         let main_name = self.make_name(&self.ext, self.compressed);
121 |         let back_name = self.make_name("bak", self.compressed);
122 | 
123 |         match fs::rename(&main_name, &back_name) {
124 |             // Not found means there isn't a main name to rename.
125 |             Err(ref e) if e.kind() == ErrorKind::NotFound => (),
126 |             // Other errors are failure.
127 |             Err(e) => return Err(e.into()),
128 |             Ok(()) => (),
129 |         }
130 | 
131 |         fs::rename(name, main_name)?;
132 |         Ok(())
133 |     }
134 | 
135 |     /// Add a name that must be cleaned up.
136 |     pub fn add_cleanup(&mut self, name: PathBuf) {
137 |         self.cleanup.push(name);
138 |     }
139 | }
140 | 
141 | impl Drop for Naming {
142 |     fn drop(&mut self) {
143 |         for name in &self.cleanup {
144 |             if let Err(e) = fs::remove_file(name) {
145 |                 warn!("Error cleaning up: {:?} ({})", name, e);
146 |             }
147 |         }
148 |     }
149 | }
150 | 


--------------------------------------------------------------------------------
/shell.nix:
--------------------------------------------------------------------------------
 1 | # Shell configuration to build rsure.
 2 | { pkgs ? import <nixos> {} }:
 3 | let
 4 |   lib = pkgs.lib;
 5 |   stdenv = pkgs.stdenv;
 6 | 
 7 |   # SCCS isn't particularly useful, but the file used by weave is
 8 |   # derived from what SCCS uses.  If this program is in the path, then
 9 |   # weave has additional tests that it can run.
10 |   cssc = stdenv.mkDerivation rec {
11 |     name = "cssc-1.4.1";
12 | 
13 |     src = pkgs.fetchurl {
14 |       url = "mirror://gnu/cssc/CSSC-1.4.1.tar.gz";
15 |       sha256 = "1vsisqq573xjr2qpn19iwmpqgl3mq03m790akpa4rvj60b4d1gni";
16 |     };
17 | 
18 |     meta = with lib; {
19 |       homepage = "https://www.gnu.org/software/cssc/";
20 |       description = "GNU replacement for SCCS";
21 |       license = licenses.gpl3;
22 |     };
23 |   };
24 | in
25 | pkgs.mkShell {
26 |   nativeBuildInputs = [
27 |     pkgs.openssl.dev
28 |     pkgs.pkgconfig
29 |     pkgs.sqlite.dev
30 | 
31 |     # pkgs.cargo
32 |     # pkgs.clippy
33 |     # pkgs.rustfmt
34 |     # pkgs.cargo-bloat
35 | 
36 |     cssc
37 |   ];
38 | }
39 | 


--------------------------------------------------------------------------------
/src/errors.rs:
--------------------------------------------------------------------------------
 1 | // Errors.
 2 | 
 3 | use std::result;
 4 | use thiserror::Error;
 5 | 
 6 | pub type Result<T> = result::Result<T, Error>;
 7 | #[derive(Error, Debug)]
 8 | pub enum Error {
 9 |     #[error("weave error")]
10 |     Weave(#[from] weave::Error),
11 | 
12 |     #[error("I/O Error {0:?}")]
13 |     Io(#[from] std::io::Error),
14 | 
15 |     #[error("OpenSSL error: {0:?}")]
16 |     OpenSsl(#[from] openssl::error::ErrorStack),
17 |     #[error("Int parse error: {0:?}")]
18 |     IntParse(#[from] std::num::ParseIntError),
19 | 
20 |     #[error("Root must be a directory")]
21 |     RootMustBeDir,
22 |     #[error("Unknown directory specified")]
23 |     UnknownDirectory,
24 |     #[error("File not in directory")]
25 |     FileNotInDirectory,
26 |     #[error("Path missing final file component")]
27 |     PathMissingFinalFile,
28 | 
29 |     // Errors from comparison.
30 |     #[error("empty left iterator")]
31 |     EmptyLeftIterator,
32 |     #[error("empty right iterator")]
33 |     EmptyRightIterator,
34 |     #[error("Unexpected node in left tree")]
35 |     UnexpectedLeftNode,
36 |     #[error("Unexpected node in right tree")]
37 |     UnexpectedRightNode,
38 |     #[error("Incorrect name of root tree")]
39 |     IncorrectName,
40 | 
41 |     #[error("Unexpected line: {0:?}, expect {1:?}")]
42 |     UnexpectedLine(String, String),
43 |     #[error("Error reading surefile: {0:?}")]
44 |     SureFileError(std::io::Error),
45 |     #[error("Unexpected eof on surefile")]
46 |     SureFileEof,
47 |     #[error("Truncated surefile")]
48 |     TruncatedSurefile,
49 |     #[error("Invalid surefile line start: {0:?}")]
50 |     InvalidSurefileChar(char),
51 | 
52 |     #[error("Sql error: {0:?}")]
53 |     Sql(#[from] rusqlite::Error),
54 |     // For one case that needs to be written to be able to move the error.
55 |     #[error("Sql error: {0}")]
56 |     WrappedSql(String),
57 |     #[error("Hash error: {0:?}")]
58 |     Hash(String),
59 |     #[error("mpsc error: {0:?}")]
60 |     Mpsc(#[from] std::sync::mpsc::RecvError),
61 | }
62 | 
63 | /*
64 | #[derive(Fail, Debug)]
65 | pub enum WeaveError {
66 |     #[fail(display = "Error running BitKeeper: {:?}: {:?}", _0, _1)]
67 |     BkError(ExitStatus, String),
68 | }
69 | */
70 | 


--------------------------------------------------------------------------------
/src/escape.rs:
--------------------------------------------------------------------------------
  1 | //! String escaping.
  2 | //!
  3 | //! Although filenames in Linux are commonly represented as UTF-8
  4 | //! sequences, there is no system requirement that this be the case.  As a
  5 | //! consequence, this means that it is possible for filenames in Linux to
  6 | //! not be valid UTF-8, and therefore not representable as strings.
  7 | //!
  8 | //! To prevent encoding problems, as well as to allow certain characters,
  9 | //! such as space, to separate tokens in the sure file format, we escape
 10 | //! some bytes in strings by replacing them with "=xx" where "xx" is the
 11 | //! lower-cased hex version of the string.  The range of valid characters
 12 | //! is fairly straightforward, including all of the printable characters
 13 | //! from '!' to '~' except for the '=', which is always escaped.  This
 14 | //! means, for example, that a 2-byte encoded UTF-8 sequence will expand to
 15 | //! take 6 bytes.
 16 | 
 17 | use std::{io::prelude::*, result};
 18 | use thiserror::Error;
 19 | 
 20 | pub trait Escape {
 21 |     fn escaped(&self) -> String;
 22 | }
 23 | 
 24 | pub trait Unescape {
 25 |     fn unescape(&self) -> EscapeResult<Vec<u8>>;
 26 | }
 27 | 
 28 | pub type EscapeResult<T> = result::Result<T, EscapeError>;
 29 | 
 30 | #[derive(Error, Debug)]
 31 | pub enum EscapeError {
 32 |     #[error("Invalid hex character: {0:?}")]
 33 |     InvalidHexCharacter(u8),
 34 |     #[error("Invalid hex length")]
 35 |     InvalidHexLength,
 36 | }
 37 | 
 38 | // The basic encoding converts a sequence of bytes into a string.
 39 | impl Escape for [u8] {
 40 |     fn escaped(&self) -> String {
 41 |         let mut result = vec![];
 42 |         for &ch in self.iter() {
 43 |             // TODO: Can be made more efficient.
 44 |             if (b'!'..=b'~').contains(&ch) && ch != b'=' && ch != b'[' && ch != b']' {
 45 |                 result.push(ch);
 46 |             } else {
 47 |                 write!(&mut result, "={:02x}", ch).unwrap();
 48 |             }
 49 |         }
 50 | 
 51 |         // TODO: String::from_utf8_unchecked(result)
 52 |         String::from_utf8(result).unwrap()
 53 |     }
 54 | }
 55 | 
 56 | impl Unescape for str {
 57 |     fn unescape(&self) -> EscapeResult<Vec<u8>> {
 58 |         // Will overestimate.
 59 |         let mut buf = Vec::with_capacity(self.len() / 2);
 60 |         let mut phase = 0;
 61 |         let mut tmp = 0;
 62 | 
 63 |         for byte in self.bytes() {
 64 |             if phase == 0 {
 65 |                 if byte == b'=' {
 66 |                     phase = 1;
 67 |                 } else {
 68 |                     buf.push(byte);
 69 |                 }
 70 |             } else {
 71 |                 tmp <<= 4;
 72 |                 match byte {
 73 |                     b'A'..=b'F' => tmp |= byte - b'A' + 10,
 74 |                     b'a'..=b'f' => tmp |= byte - b'a' + 10,
 75 |                     b'0'..=b'9' => tmp |= byte - b'0',
 76 |                     _ => return Err(EscapeError::InvalidHexCharacter(byte)),
 77 |                 }
 78 |                 phase += 1;
 79 |                 if phase == 3 {
 80 |                     buf.push(tmp);
 81 |                     phase = 0;
 82 |                     tmp = 0;
 83 |                 }
 84 |             }
 85 |         }
 86 | 
 87 |         if phase != 0 {
 88 |             return Err(EscapeError::InvalidHexLength);
 89 |         }
 90 | 
 91 |         Ok(buf)
 92 |     }
 93 | }
 94 | 
 95 | #[test]
 96 | fn test_unescape() {
 97 |     macro_rules! assert_error_kind {
 98 |         ( $expr:expr, $kind:pat ) => {
 99 |             match $expr {
100 |                 Err($kind) => (),
101 |                 Err(e) => panic!(
102 |                     "Unexpected error kind: {:?} (want {})",
103 |                     e,
104 |                     stringify!($kind)
105 |                 ),
106 |                 Ok(_) => panic!("Unexpected success"),
107 |             }
108 |         };
109 |     }
110 | 
111 |     assert_eq!("=00".unescape().unwrap(), vec![0]);
112 |     assert_error_kind!("=00=0".unescape(), EscapeError::InvalidHexLength);
113 |     assert_error_kind!("=00=".unescape(), EscapeError::InvalidHexLength);
114 |     assert_error_kind!("=4g".unescape(), EscapeError::InvalidHexCharacter(b'g'));
115 | }
116 | 
117 | #[test]
118 | fn test_escape() {
119 |     let buf: Vec<u8> = (0u32..256).map(|i| i as u8).collect();
120 |     let text = (&buf[..]).escaped();
121 |     assert_eq!(text.unescape().unwrap(), buf);
122 | }
123 | 


--------------------------------------------------------------------------------
/src/hashes.rs:
--------------------------------------------------------------------------------
 1 | //! Computing hashes for files.
 2 | 
 3 | use crate::Result;
 4 | use openssl::hash::{DigestBytes, Hasher, MessageDigest};
 5 | use std::io::{Read, Write};
 6 | #[derive(Debug)]
 7 | pub struct Estimate {
 8 |     pub files: u64,
 9 |     pub bytes: u64,
10 | }
11 | 
12 | // TODO: Reuse buffer and hasher for a given thread.
13 | pub(crate) fn hash_file<R: Read>(rd: &mut R) -> Result<DigestBytes> {
14 |     let mut h = Hasher::new(MessageDigest::sha1())?;
15 |     let mut buf = vec![0u8; 8192];
16 | 
17 |     loop {
18 |         let count = rd.read(&mut buf)?;
19 |         if count == 0 {
20 |             break;
21 |         }
22 | 
23 |         h.write_all(&buf[0..count])?;
24 |     }
25 |     Ok(h.finish()?)
26 | }
27 | 
28 | pub(crate) use self::atime_impl::noatime_open;
29 | 
30 | /// Open the given file, trying to not update the atime if that is
31 | /// possible.
32 | /// The `custom_flags` method is only stable since Rust 1.10.0.
33 | #[cfg(target_os = "linux")]
34 | mod atime_impl {
35 |     use std::fs::{File, OpenOptions};
36 |     use std::io;
37 |     use std::os::unix::fs::OpenOptionsExt;
38 |     use std::path::Path;
39 | 
40 |     // From linux's fcntl.h, not exported in the libc crate.
41 |     const O_NOATIME: i32 = 0o1000000;
42 | 
43 |     pub fn noatime_open(name: &Path) -> io::Result<File> {
44 |         // Try opening it first with noatime, and if that fails, try the open
45 |         // again without the option.
46 |         match OpenOptions::new()
47 |             .read(true)
48 |             .custom_flags(O_NOATIME)
49 |             .open(name)
50 |         {
51 |             Ok(f) => Ok(f),
52 |             Err(_) => OpenOptions::new().read(true).open(name),
53 |         }
54 |     }
55 | }
56 | 
57 | // Other platforms, just use normal open.
58 | #[cfg(not(target_os = "linux"))]
59 | mod atime_impl {
60 |     use std::fs::{File, OpenOptions};
61 |     use std::io;
62 |     use std::path::Path;
63 | 
64 |     pub fn noatime_open(name: &Path) -> io::Result<File> {
65 |         OpenOptions::new().read(true).open(name)
66 |     }
67 | }
68 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! Rsure is a set of utilities for capturing information about files, and later verifying it is
  2 | //! still true.
  3 | //!
  4 | //! The easiest way to use Rsure is to build the `rsure` executable contained in this crate.  This
  5 | //! program allows you to use most of the functionality of the crate.
  6 | //!
  7 | //! However, it is also possible to use the crate programmatically.  At the top level of the crate
  8 | //! as some utility functions for the most common operations.
  9 | //!
 10 | //! For example, to scan a directory or do an update use `update`.
 11 | //!
 12 | //! This example makes use of several of the building blocks necessary to use the store.  First is
 13 | //! the store itself.  `parse_store` is able to decode options that are passed to the command line.
 14 | //! it is also possible to build a `store::Plain` store directly.
 15 | //!
 16 | //! Next are the tags for the snapshot.  Generally, this should hold some kind of information about
 17 | //! the snapshot itself.  For the `Plain` store, it can be just an empty map.  Other store types
 18 | //! may require certain tags to be present.
 19 | 
 20 | #![warn(bare_trait_objects)]
 21 | 
 22 | use std::{fs::File, path::Path};
 23 | 
 24 | pub use crate::{
 25 |     errors::{Error, Result},
 26 |     hashes::Estimate,
 27 |     node::{
 28 |         compare_trees, fs, load_from, HashCombiner, HashUpdater, NodeWriter, ReadIterator, Source,
 29 |         SureNode,
 30 |     },
 31 |     progress::{log_init, Progress},
 32 |     show::show_tree,
 33 |     store::{parse_store, Store, StoreTags, StoreVersion, TempLoader, Version},
 34 |     suretree::AttMap,
 35 | };
 36 | 
 37 | mod errors;
 38 | mod escape;
 39 | mod hashes;
 40 | pub mod node;
 41 | mod progress;
 42 | mod show;
 43 | mod store;
 44 | mod surefs;
 45 | mod suretree;
 46 | 
 47 | // Some common operations, abstracted here.
 48 | 
 49 | /// Perform an update scan, using the given store.
 50 | ///
 51 | /// If 'update' is true, use the hashes from a previous run, otherwise perform a fresh scan.
 52 | /// Depending on the [`Store`] type, the tags may be kept, or ignored.
 53 | ///
 54 | /// [`Store`]: trait.Store.html
 55 | ///
 56 | /// A simple example:
 57 | ///
 58 | /// ```rust
 59 | /// # use std::error::Error;
 60 | /// #
 61 | /// # fn try_main() -> Result<(), Box<Error>> {
 62 | /// let mut tags = rsure::StoreTags::new();
 63 | /// tags.insert("name".into(), "sample".into());
 64 | /// let store = rsure::parse_store("2sure.dat.gz")?;
 65 | /// rsure::update(".", &*store, false, &tags)?;
 66 | /// #     Ok(())
 67 | /// # }
 68 | /// #
 69 | /// # fn main() {
 70 | /// #     try_main().unwrap();
 71 | /// # }
 72 | /// ```
 73 | pub fn update<P: AsRef<Path>>(
 74 |     dir: P,
 75 |     store: &dyn Store,
 76 |     is_update: bool,
 77 |     tags: &StoreTags,
 78 | ) -> Result<()> {
 79 |     let dir = dir.as_ref();
 80 | 
 81 |     let mut estimate = Estimate { files: 0, bytes: 0 };
 82 |     let tmp = if is_update {
 83 |         // In update mode, first tmp file is just the scan.
 84 |         let scan_temp = {
 85 |             let mut tmp = store.make_temp()?;
 86 |             let src = fs::scan_fs(dir)?;
 87 |             node::save_to(&mut tmp, src)?;
 88 |             tmp
 89 |         }
 90 |         .into_loader()?;
 91 | 
 92 |         let latest = store.load_iter(Version::Latest)?;
 93 | 
 94 |         let tmp = {
 95 |             let mut tmp = store.make_temp()?;
 96 |             let loader = Loader(&*scan_temp);
 97 |             let combiner = HashCombiner::new(latest, loader.iter()?)?.inspect(|node| {
 98 |                 if let Ok(n @ SureNode::File { .. }) = node {
 99 |                     if n.needs_hash() {
100 |                         estimate.files += 1;
101 |                         estimate.bytes += n.size();
102 |                     }
103 |                 }
104 |             });
105 |             node::save_to(&mut tmp, combiner)?;
106 |             tmp
107 |         };
108 | 
109 |         tmp
110 |     } else {
111 |         let mut tmp = store.make_temp()?;
112 |         let src = fs::scan_fs(dir)?.inspect(|node| {
113 |             if let Ok(n @ SureNode::File { .. }) = node {
114 |                 if n.needs_hash() {
115 |                     estimate.files += 1;
116 |                     estimate.bytes += n.size();
117 |                 }
118 |             }
119 |         });
120 |         node::save_to(&mut tmp, src)?;
121 |         tmp
122 |     }
123 |     .into_loader()?;
124 | 
125 |     // TODO: If this is an update, pull in hashes from the old version.
126 | 
127 |     // Update any missing hashes.
128 |     let loader = Loader(&*tmp);
129 |     let hu = HashUpdater::new(loader, store);
130 |     // TODO: This will panic on non-unicode directories.
131 |     let hm = hu.compute_parallel(dir.to_str().unwrap(), &estimate)?;
132 |     let mut tmp2 = store.make_new(tags)?;
133 |     hm.merge(&mut NodeWriter::new(&mut tmp2)?)?;
134 | 
135 |     tmp2.commit()?;
136 |     /*
137 |         let dir = dir.as_ref();
138 | 
139 |         let mut new_tree = scan_fs(dir)?;
140 | 
141 |         if is_update {
142 |             let old_tree = store.load(Version::Latest)?;
143 |             new_tree.update_from(&old_tree);
144 |         }
145 | 
146 |         let estimate = new_tree.hash_estimate();
147 |         let mut progress = Progress::new(estimate.files, estimate.bytes);
148 |         new_tree.hash_update(dir, &mut progress);
149 |         progress.flush();
150 | 
151 |         store.write_new(&new_tree, tags)?;
152 |     */
153 |     Ok(())
154 | }
155 | 
156 | struct Loader<'a>(&'a dyn TempLoader);
157 | 
158 | impl<'a> Source for Loader<'a> {
159 |     fn iter(&self) -> Result<Box<dyn Iterator<Item = Result<SureNode>> + Send>> {
160 |         let rd = File::open(self.0.path_ref())?;
161 |         Ok(Box::new(load_from(rd)?))
162 |     }
163 | }
164 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
  1 | // Playing with paths.
  2 | 
  3 | #![warn(bare_trait_objects)]
  4 | 
  5 | use chrono::Local;
  6 | use std::{collections::BTreeMap, path::Path};
  7 | use structopt::StructOpt;
  8 | use tempdir::TempDir;
  9 | 
 10 | use rsure::{log_init, parse_store, show_tree, Store, StoreTags, StoreVersion, Version};
 11 | 
 12 | // For now, just use the crate's error type.
 13 | pub use rsure::Result;
 14 | 
 15 | #[derive(StructOpt)]
 16 | #[structopt(name = "rsure", about = "File integrity")]
 17 | struct Opt {
 18 |     #[structopt(short = "f", long = "file", default_value = "2sure.dat.gz")]
 19 |     /// Base of file name, default 2sure, will get .dat.gz appended
 20 |     file: String,
 21 |     #[structopt(short = "d", long = "dir", default_value = ".")]
 22 |     /// Directory to scan, defaults to "."
 23 |     dir: String,
 24 |     #[structopt(long = "tag")]
 25 |     /// key=value to associate with scan
 26 |     tag: Vec<String>,
 27 |     #[structopt(short = "v", long = "version")]
 28 |     version: Option<String>,
 29 |     #[structopt(subcommand)]
 30 |     command: Command,
 31 | }
 32 | 
 33 | #[derive(StructOpt)]
 34 | enum Command {
 35 |     #[structopt(name = "scan")]
 36 |     /// Scan a directory for the first time
 37 |     Scan,
 38 |     #[structopt(name = "update")]
 39 |     /// Update the scan using the dat/weave file
 40 |     Update,
 41 |     #[structopt(name = "check")]
 42 |     /// Compare the directory with the dat/weave file
 43 |     Check {
 44 |         #[structopt(short = "i", long = "ignore")]
 45 |         /// Tag to ignore when comparing.
 46 |         ignore: Vec<String>,
 47 |     },
 48 |     #[structopt(name = "signoff")]
 49 |     /// Compare dat with bak file, or last two versions in weave file
 50 |     Signoff {
 51 |         #[structopt(short = "i", long = "ignore")]
 52 |         /// Tag to ignore when comparing.
 53 |         ignore: Vec<String>,
 54 |     },
 55 |     #[structopt(name = "show")]
 56 |     /// Pretty print the dat file
 57 |     Show,
 58 |     #[structopt(name = "list")]
 59 |     /// List revisions in a given sure store
 60 |     List,
 61 | }
 62 | 
 63 | #[allow(dead_code)]
 64 | fn main() -> Result<()> {
 65 |     log_init();
 66 | 
 67 |     let opt = Opt::from_args();
 68 | 
 69 |     let store = parse_store(&opt.file)?;
 70 | 
 71 |     let mut tags = decode_tags(Some(opt.tag.iter().map(|x| x.as_str())));
 72 | 
 73 |     add_name_tag(&mut tags, &opt.dir);
 74 | 
 75 |     // Note that only the "check" command uses the version tag.
 76 |     let latest = match opt.version {
 77 |         None => Version::Latest,
 78 |         Some(ref x) => Version::Tagged(x.to_string()),
 79 |     };
 80 | 
 81 |     match &opt.command {
 82 |         Command::Scan => {
 83 |             rsure::update(&opt.dir, &*store, false, &tags)?;
 84 |         }
 85 |         Command::Update => {
 86 |             rsure::update(&opt.dir, &*store, true, &tags)?;
 87 |         }
 88 |         Command::Check { ignore } => {
 89 |             let ignore: Vec<_> = ignore.iter().map(|x| x.as_str()).collect();
 90 |             run_check(&*store, &opt, latest, &ignore)?;
 91 |         }
 92 |         Command::Signoff { ignore } => {
 93 |             let ignore: Vec<_> = ignore.iter().map(|x| x.as_str()).collect();
 94 |             let old_tree = store.load_iter(Version::Prior)?;
 95 |             let new_tree = store.load_iter(Version::Latest)?;
 96 |             println!("signoff {}", opt.file);
 97 |             rsure::compare_trees(old_tree, new_tree, &Path::new(&opt.dir), &ignore)?;
 98 |         }
 99 |         Command::Show => {
100 |             println!("show {}", opt.file);
101 |             show_tree(&*store)?;
102 |         }
103 |         Command::List => {
104 |             let version = store.get_versions()?;
105 |             dump_versions(&version);
106 |         }
107 |     }
108 | 
109 |     Ok(())
110 | }
111 | 
112 | fn run_check(store: &dyn Store, opt: &Opt, latest: Version, ignore: &[&str]) -> Result<()> {
113 |     // Perform a full scan to a temp store.
114 |     let tdir = TempDir::new("rsure")?;
115 |     let tpath = tdir.path().join("check.dat.gz");
116 |     let tstore = parse_store(tpath.to_str().unwrap())?;
117 |     let mut tags = BTreeMap::new();
118 |     add_name_tag(&mut tags, &opt.dir);
119 |     println!("Scanning");
120 |     rsure::update(&opt.dir, &*tstore, false, &tags)?;
121 | 
122 |     let old_tree = store.load_iter(latest)?;
123 |     let new_tree = tstore.load_iter(Version::Latest)?;
124 |     println!("Check {}", opt.file);
125 |     rsure::compare_trees(old_tree, new_tree, &Path::new(&opt.dir), ignore)?;
126 |     Ok(())
127 | }
128 | 
129 | /// Decode the command-line tags.  Tags should be of the form key=value, and multiple can be
130 | /// specified, terminated by the command.  It is also possible to specify --tag multiple times.
131 | fn decode_tags<'a, I>(tags: Option<I>) -> StoreTags
132 | where
133 |     I: Iterator<Item = &'a str>,
134 | {
135 |     match tags {
136 |         None => BTreeMap::new(),
137 |         Some(tags) => tags.map(|x| decode_tag(x)).collect(),
138 |     }
139 | }
140 | 
141 | fn decode_tag(tag: &str) -> (String, String) {
142 |     let fields: Vec<_> = tag.splitn(2, '=').collect();
143 |     if fields.len() != 2 {
144 |         panic!("Tag must be key=value");
145 |     }
146 |     (fields[0].to_string(), fields[1].to_string())
147 | }
148 | 
149 | /// If the caller doesn't specify a 'name=' tag, generate one based on the current timestamp.
150 | /// Also will add a 'dir' attribute for where the tree was captured.
151 | fn add_name_tag<P: AsRef<Path>>(tags: &mut StoreTags, dir: P) {
152 |     if !tags.contains_key("name") {
153 |         tags.insert("name".to_string(), Local::now().to_rfc3339());
154 |     }
155 | 
156 |     if !tags.contains_key("dir") {
157 |         tags.insert(
158 |             "dir".to_string(),
159 |             dir.as_ref()
160 |                 .canonicalize()
161 |                 .unwrap_or_else(|_| Path::new("invalid").to_owned())
162 |                 .to_string_lossy()
163 |                 .into_owned(),
164 |         );
165 |     }
166 | }
167 | 
168 | fn dump_versions(versions: &[StoreVersion]) {
169 |     println!("vers | Time captured       | name");
170 |     println!("-----+---------------------+------------------");
171 |     for v in versions {
172 |         let vers = match v.version {
173 |             Version::Latest => "tip",
174 |             Version::Prior => "prev",
175 |             Version::Tagged(ref v) => v,
176 |         };
177 |         println!(
178 |             "{:>4} | {} | {}",
179 |             vers,
180 |             v.time.with_timezone(&Local).format("%Y-%m-%d %H:%M:%S"),
181 |             v.name
182 |         );
183 |     }
184 | }
185 | 


--------------------------------------------------------------------------------
/src/node.rs:
--------------------------------------------------------------------------------
  1 | //! The sure stream.
  2 | //!
  3 | //! The sure stream represents a linearization of a SureTree.  By keeping
  4 | //! representations as iterators across SureNodes instead of keeping an
  5 | //! entire tree in memory, we can process larger filesystem trees, using
  6 | //! temporary space on the hard disk instead of using memory.
  7 | use crate::{suretree::AttMap, Error, Result};
  8 | use flate2::{read::GzDecoder, write::GzEncoder, Compression};
  9 | use std::{
 10 |     fs::File,
 11 |     io::{self, BufRead, BufReader, BufWriter, Read, Write},
 12 |     path::{Path, PathBuf},
 13 | };
 14 | use weave::NamingConvention;
 15 | 
 16 | mod compare;
 17 | pub mod fs;
 18 | mod fullpath;
 19 | mod hashes;
 20 | 
 21 | pub use compare::compare_trees;
 22 | pub use fullpath::into_tracker;
 23 | pub use hashes::{HashCombiner, HashUpdater, Source};
 24 | 
 25 | #[derive(Clone, Debug)]
 26 | pub enum SureNode {
 27 |     Enter { name: String, atts: AttMap },
 28 |     Leave,
 29 |     File { name: String, atts: AttMap },
 30 |     Sep,
 31 | }
 32 | 
 33 | impl SureNode {
 34 |     pub fn is_enter(&self) -> bool {
 35 |         matches!(self, SureNode::Enter { .. })
 36 |     }
 37 | 
 38 |     pub fn is_reg_file(&self) -> bool {
 39 |         match self {
 40 |             SureNode::File { atts, .. } => atts["kind"] == "file",
 41 |             _ => false,
 42 |         }
 43 |     }
 44 | 
 45 |     pub fn is_file(&self) -> bool {
 46 |         matches!(self, SureNode::File { .. })
 47 |     }
 48 | 
 49 |     pub fn is_leave(&self) -> bool {
 50 |         matches!(self, SureNode::Leave)
 51 |     }
 52 | 
 53 |     pub fn is_sep(&self) -> bool {
 54 |         matches!(self, SureNode::Sep)
 55 |     }
 56 | 
 57 |     pub fn needs_hash(&self) -> bool {
 58 |         match self {
 59 |             SureNode::File { atts, .. } => atts["kind"] == "file" && !atts.contains_key("sha1"),
 60 |             _ => false,
 61 |         }
 62 |     }
 63 | 
 64 |     pub fn size(&self) -> u64 {
 65 |         match self {
 66 |             SureNode::File { atts, .. } => {
 67 |                 atts.get("size").map(|x| x.parse().unwrap()).unwrap_or(0)
 68 |             }
 69 |             _ => 0,
 70 |         }
 71 |     }
 72 | 
 73 |     /// Get the name of this node.  Panics if the node type does not have
 74 |     /// an associated name.
 75 |     pub fn name(&self) -> &str {
 76 |         match self {
 77 |             SureNode::File { ref name, .. } => name,
 78 |             SureNode::Enter { ref name, .. } => name,
 79 |             _ => panic!("Node does not have a name"),
 80 |         }
 81 |     }
 82 | 
 83 |     /// Safely get the name of this node.
 84 |     pub fn get_name(&self) -> Option<&str> {
 85 |         match self {
 86 |             SureNode::File { ref name, .. } => Some(name),
 87 |             SureNode::Enter { ref name, .. } => Some(name),
 88 |             _ => None,
 89 |         }
 90 |     }
 91 | 
 92 |     /// Get a nice representation of the kind of this node.  Returns "???"
 93 |     /// if the kind isn't meaningful.
 94 |     pub fn kind(&self) -> &str {
 95 |         self.atts()
 96 |             .map(|a| a.get("kind").map(|k| &k[..]).unwrap_or("???"))
 97 |             .unwrap_or("???")
 98 |     }
 99 | 
100 |     /// Access the nodes attributes.
101 |     pub fn atts(&self) -> Option<&AttMap> {
102 |         match self {
103 |             SureNode::File { ref atts, .. } => Some(atts),
104 |             SureNode::Enter { ref atts, .. } => Some(atts),
105 |             _ => None,
106 |         }
107 |     }
108 | 
109 |     /// Access the nodes attributes mutably.
110 |     pub fn atts_mut(&mut self) -> Option<&mut AttMap> {
111 |         match self {
112 |             SureNode::File { ref mut atts, .. } => Some(atts),
113 |             SureNode::Enter { ref mut atts, .. } => Some(atts),
114 |             _ => None,
115 |         }
116 |     }
117 | }
118 | 
119 | // TODO: These might be possible to make more generic, but it gets messy,
120 | // as it might just be best to assume failure.
121 | 
122 | /// Write a sure iterator to a standard gzipped file of the given name.
123 | pub fn save<P, I>(name: P, nodes: I) -> Result<()>
124 | where
125 |     P: AsRef<Path>,
126 |     I: Iterator<Item = Result<SureNode>>,
127 | {
128 |     let wr = File::create(name)?;
129 |     let wr = GzEncoder::new(wr, Compression::default());
130 |     save_to(wr, nodes)
131 | }
132 | 
133 | /// Write a sure iterator to a new temp file with a given naming
134 | /// convention.  Returns the name of the file, if it could be created.  The
135 | /// data will not be written compressed.
136 | pub fn save_naming<I, N>(naming: &N, nodes: I) -> Result<PathBuf>
137 | where
138 |     N: NamingConvention,
139 |     I: Iterator<Item = Result<SureNode>>,
140 | {
141 |     let (tmp_name, mut tmp_file) = naming.temp_file()?;
142 |     save_to(&mut tmp_file, nodes)?;
143 |     Ok(tmp_name)
144 | }
145 | 
146 | /// Save a sure tree to the given writer.
147 | pub fn save_to<W, I>(wr: W, nodes: I) -> Result<()>
148 | where
149 |     W: Write,
150 |     I: Iterator<Item = Result<SureNode>>,
151 | {
152 |     let mut wr = BufWriter::new(wr);
153 | 
154 |     writeln!(&mut wr, "asure-2.0")?;
155 |     writeln!(&mut wr, "-----")?;
156 | 
157 |     for node in nodes {
158 |         match node? {
159 |             SureNode::Enter { name, atts } => header(&mut wr, 'd', &name, &atts)?,
160 |             SureNode::File { name, atts } => header(&mut wr, 'f', &name, &atts)?,
161 |             SureNode::Sep => writeln!(&mut wr, "-")?,
162 |             SureNode::Leave => writeln!(&mut wr, "u")?,
163 |         }
164 |     }
165 |     Ok(())
166 | }
167 | 
168 | /// For pushed based writing, we can also write using a NodeWriter.
169 | pub struct NodeWriter<W: Write> {
170 |     writer: BufWriter<W>,
171 | }
172 | 
173 | impl<W: Write> NodeWriter<W> {
174 |     pub fn new(writer: W) -> Result<NodeWriter<W>> {
175 |         let mut wr = BufWriter::new(writer);
176 |         writeln!(&mut wr, "asure-2.0")?;
177 |         writeln!(&mut wr, "-----")?;
178 | 
179 |         Ok(NodeWriter { writer: wr })
180 |     }
181 | 
182 |     pub fn write_node(&mut self, node: &SureNode) -> Result<()> {
183 |         match node {
184 |             SureNode::Enter { name, atts } => header(&mut self.writer, 'd', &name, &atts)?,
185 |             SureNode::File { name, atts } => header(&mut self.writer, 'f', &name, &atts)?,
186 |             SureNode::Sep => writeln!(&mut self.writer, "-")?,
187 |             SureNode::Leave => writeln!(&mut self.writer, "u")?,
188 |         }
189 |         Ok(())
190 |     }
191 | }
192 | 
193 | fn header<W: Write>(out: &mut W, kind: char, name: &str, atts: &AttMap) -> Result<()> {
194 |     write!(out, "{}{} [", kind, name)?;
195 | 
196 |     for (k, v) in atts {
197 |         write!(out, "{} {} ", k, v)?;
198 |     }
199 |     writeln!(out, "]")?;
200 |     Ok(())
201 | }
202 | 
203 | /// Load and iterate a sure tree from a standard gzip compressed surefile.
204 | pub fn load<P: AsRef<Path>>(name: P) -> Result<ReadIterator<GzDecoder<File>>> {
205 |     let rd = File::open(name)?;
206 |     let rd = GzDecoder::new(rd);
207 |     load_from(rd)
208 | }
209 | 
210 | /// Load a surenode sequence from the given reader.
211 | pub fn load_from<R: Read>(rd: R) -> Result<ReadIterator<R>> {
212 |     let rd = BufReader::new(rd);
213 |     let mut lines = rd.split(b'\n');
214 | 
215 |     fixed(&mut lines, b"asure-2.0")?;
216 |     fixed(&mut lines, b"-----")?;
217 | 
218 |     Ok(ReadIterator {
219 |         lines,
220 |         depth: 0,
221 |         done: false,
222 |     })
223 | }
224 | 
225 | fn fixed<I>(inp: &mut I, exp: &[u8]) -> Result<()>
226 | where
227 |     I: Iterator<Item = io::Result<Vec<u8>>>,
228 | {
229 |     match inp.next() {
230 |         Some(Ok(ref text)) if &text[..] == exp => Ok(()),
231 |         Some(Ok(ref text)) => Err(Error::UnexpectedLine(
232 |             String::from_utf8_lossy(text).into_owned(),
233 |             String::from_utf8_lossy(exp).into_owned(),
234 |         )),
235 |         Some(Err(e)) => Err(Error::SureFileError(e)),
236 |         None => Err(Error::SureFileEof),
237 |     }
238 | }
239 | 
240 | pub struct ReadIterator<R> {
241 |     lines: io::Split<BufReader<R>>,
242 |     depth: usize,
243 |     done: bool,
244 | }
245 | 
246 | impl<R: Read> Iterator for ReadIterator<R> {
247 |     type Item = Result<SureNode>;
248 | 
249 |     fn next(&mut self) -> Option<Result<SureNode>> {
250 |         if self.done {
251 |             return None;
252 |         }
253 | 
254 |         let line = match self.get_line() {
255 |             Ok(line) => line,
256 |             Err(e) => return Some(Err(e)),
257 |         };
258 | 
259 |         match line[0] {
260 |             b'd' => {
261 |                 let (dname, datts) = decode_entity(&line[1..]);
262 |                 self.depth += 1;
263 |                 Some(Ok(SureNode::Enter {
264 |                     name: dname,
265 |                     atts: datts,
266 |                 }))
267 |             }
268 |             b'f' => {
269 |                 let (fname, fatts) = decode_entity(&line[1..]);
270 |                 Some(Ok(SureNode::File {
271 |                     name: fname,
272 |                     atts: fatts,
273 |                 }))
274 |             }
275 |             b'-' => Some(Ok(SureNode::Sep)),
276 |             b'u' => {
277 |                 self.depth -= 1;
278 |                 if self.depth == 0 {
279 |                     self.done = true;
280 |                 }
281 |                 Some(Ok(SureNode::Leave))
282 |             }
283 |             ch => Some(Err(Error::InvalidSurefileChar(ch as char))),
284 |         }
285 |     }
286 | }
287 | 
288 | impl<R: Read> ReadIterator<R> {
289 |     fn get_line(&mut self) -> Result<Vec<u8>> {
290 |         match self.lines.next() {
291 |             None => Err(Error::TruncatedSurefile),
292 |             Some(l) => Ok(l?),
293 |         }
294 |     }
295 | }
296 | 
297 | // TODO: This should return Result to handle errors.
298 | pub(crate) fn decode_entity(text: &[u8]) -> (String, AttMap) {
299 |     let (name, mut text) = get_delim(text, b' ');
300 |     assert!(text[0] == b'[');
301 |     text = &text[1..];
302 | 
303 |     let mut atts = AttMap::new();
304 |     while text[0] != b']' {
305 |         let (key, t2) = get_delim(text, b' ');
306 |         let (value, t2) = get_delim(t2, b' ');
307 |         text = t2;
308 | 
309 |         atts.insert(key, value);
310 |     }
311 | 
312 |     (name, atts)
313 | }
314 | 
315 | fn get_delim(text: &[u8], delim: u8) -> (String, &[u8]) {
316 |     let mut it = text.iter();
317 |     let space = it.position(|&s| s == delim).unwrap();
318 |     (
319 |         String::from_utf8(text[..space].to_owned()).unwrap(),
320 |         &text[space + 1..],
321 |     )
322 | }
323 | 


--------------------------------------------------------------------------------
/src/node/compare.rs:
--------------------------------------------------------------------------------
  1 | //! Compare two iterator-based trees.
  2 | 
  3 | use crate::{node::SureNode, Error, Result};
  4 | use log::error;
  5 | use std::{collections::HashSet, path::Path};
  6 | 
  7 | /// This is the mutable state that is threaded through the recursive
  8 | /// traversal of the two trees.
  9 | struct State<IA, IB> {
 10 |     left: SureNode,
 11 |     right: SureNode,
 12 |     left_iter: IA,
 13 |     right_iter: IB,
 14 | 
 15 |     // Track warning messages about added and deleted attributes.
 16 |     adds: HashSet<String>,
 17 |     missings: HashSet<String>,
 18 | 
 19 |     // Attributes to be ignored
 20 |     ignore: HashSet<String>,
 21 | }
 22 | 
 23 | pub fn compare_trees<P: AsRef<Path>, IA, IB>(
 24 |     mut left: IA,
 25 |     mut right: IB,
 26 |     dir: P,
 27 |     ignore: &[&str],
 28 | ) -> Result<()>
 29 | where
 30 |     IA: Iterator<Item = Result<SureNode>>,
 31 |     IB: Iterator<Item = Result<SureNode>>,
 32 | {
 33 |     let mut ignore: HashSet<String> = ignore.iter().map(|x| (*x).to_owned()).collect();
 34 |     // The ctime and ino will be different if a backup is restored, and we'd still like to get
 35 |     // meaningful results.  Add these to the list of ignored attributes.
 36 |     ignore.insert("ctime".to_owned());
 37 |     ignore.insert("ino".to_owned());
 38 | 
 39 |     let ln = match left.next() {
 40 |         None => return Err(Error::EmptyLeftIterator),
 41 |         Some(Err(e)) => return Err(e),
 42 |         Some(Ok(node)) => node,
 43 |     };
 44 |     let rn = match right.next() {
 45 |         None => return Err(Error::EmptyRightIterator),
 46 |         Some(Err(e)) => return Err(e),
 47 |         Some(Ok(node)) => node,
 48 |     };
 49 |     let mut state = State {
 50 |         left: ln,
 51 |         right: rn,
 52 |         left_iter: left,
 53 |         right_iter: right,
 54 |         adds: HashSet::new(),
 55 |         missings: HashSet::new(),
 56 |         ignore,
 57 |     };
 58 | 
 59 |     state.walk_root(dir.as_ref())
 60 | }
 61 | 
 62 | impl<IA, IB> State<IA, IB>
 63 | where
 64 |     IA: Iterator<Item = Result<SureNode>>,
 65 |     IB: Iterator<Item = Result<SureNode>>,
 66 | {
 67 |     /// Advance the left iterator.  If it sees the end, it will drop in a
 68 |     /// "Leave" node, which shouldn't be visited as long as the tree is
 69 |     /// well-formed.
 70 |     fn next_left(&mut self) -> Result<()> {
 71 |         let next = match self.left_iter.next() {
 72 |             None => SureNode::Leave,
 73 |             Some(Ok(node)) => node,
 74 |             Some(Err(e)) => return Err(e),
 75 |         };
 76 | 
 77 |         self.left = next;
 78 |         Ok(())
 79 |     }
 80 | 
 81 |     /// Advance the right iterator.  If it sees the end, it will drop in a
 82 |     /// "Leave" node, which shouldn't be visited as long as the tree is
 83 |     /// well-formed.
 84 |     fn next_right(&mut self) -> Result<()> {
 85 |         let next = match self.right_iter.next() {
 86 |             None => SureNode::Leave,
 87 |             Some(Ok(node)) => node,
 88 |             Some(Err(e)) => return Err(e),
 89 |         };
 90 | 
 91 |         self.right = next;
 92 |         Ok(())
 93 |     }
 94 | 
 95 |     fn walk_root(&mut self, dir: &Path) -> Result<()> {
 96 |         if !self.left.is_enter() {
 97 |             Err(Error::UnexpectedLeftNode)
 98 |         } else if !self.right.is_enter() {
 99 |             Err(Error::UnexpectedRightNode)
100 |         } else if self.left.name() != "__root__" || self.right.name() != "__root__" {
101 |             Err(Error::IncorrectName)
102 |         } else {
103 |             self.compare_enter(dir)?;
104 |             self.next_left()?;
105 |             self.next_right()?;
106 |             self.walk_samedir(dir)
107 |         }
108 |     }
109 | 
110 |     /// We are within a directory (of the given name) where both trees have
111 |     /// the same directory.  This will recursively compare any children,
112 |     /// and once both have reached the separator, move to `walk_samefiles`.
113 |     fn walk_samedir(&mut self, dir: &Path) -> Result<()> {
114 |         loop {
115 |             match (self.left.is_sep(), self.right.is_sep()) {
116 |                 (true, true) => {
117 |                     self.next_left()?;
118 |                     self.next_right()?;
119 |                     return self.walk_samefiles(dir);
120 |                 }
121 |                 (false, true) => {
122 |                     // The old trees has subdirectories not in this
123 |                     // directory.
124 |                     self.show_delete(dir);
125 |                     self.next_left()?;
126 |                     self.walk_leftdir()?;
127 |                 }
128 |                 (true, false) => {
129 |                     // The new tree has a newly added directory.
130 |                     self.show_add(dir);
131 |                     self.next_right()?;
132 |                     self.walk_rightdir()?;
133 |                 }
134 |                 _ if self.left.name() < self.right.name() => {
135 |                     // Old subdirectory.
136 |                     self.show_delete(dir);
137 |                     self.next_left()?;
138 |                     self.walk_leftdir()?;
139 |                 }
140 |                 _ if self.left.name() > self.right.name() => {
141 |                     // The new tree has a newly added directory.
142 |                     self.show_add(dir);
143 |                     self.next_right()?;
144 |                     self.walk_rightdir()?;
145 |                 }
146 |                 _ => {
147 |                     // Same named directory.
148 |                     let dirname = dir.join(self.left.name());
149 |                     self.compare_enter(&dirname)?;
150 |                     self.next_left()?;
151 |                     self.next_right()?;
152 |                     self.walk_samedir(&dirname)?;
153 |                 }
154 |             }
155 |         }
156 |     }
157 | 
158 |     /// We are within the files section of the same directory in the two
159 |     /// trees.  Walk through the nodes, reading the Leave node in both, and
160 |     /// returning.
161 |     fn walk_samefiles(&mut self, dir: &Path) -> Result<()> {
162 |         loop {
163 |             match (self.left.is_leave(), self.right.is_leave()) {
164 |                 (true, true) => {
165 |                     self.next_left()?;
166 |                     self.next_right()?;
167 |                     return Ok(());
168 |                 }
169 |                 (false, true) => {
170 |                     self.show_delete(dir);
171 |                     self.next_left()?;
172 |                 }
173 |                 (true, false) => {
174 |                     self.show_add(dir);
175 |                     self.next_right()?;
176 |                 }
177 |                 _ if self.left.name() < self.right.name() => {
178 |                     self.show_delete(dir);
179 |                     self.next_left()?;
180 |                 }
181 |                 _ if self.left.name() > self.right.name() => {
182 |                     self.show_add(dir);
183 |                     self.next_right()?;
184 |                 }
185 |                 _ => {
186 |                     // Same file.
187 |                     let nodename = dir.join(self.left.name());
188 |                     self.compare_file(&nodename)?;
189 |                     self.next_left()?;
190 |                     self.next_right()?;
191 |                 }
192 |             }
193 |         }
194 |     }
195 | 
196 |     /// Old directory on the left tree.  Walk through nodes recursively to
197 |     /// discard entire tree.
198 |     fn walk_leftdir(&mut self) -> Result<()> {
199 |         loop {
200 |             if self.left.is_enter() {
201 |                 self.next_left()?;
202 |                 self.walk_leftdir()?;
203 |             } else if self.left.is_leave() {
204 |                 self.next_left()?;
205 |                 return Ok(());
206 |             } else {
207 |                 self.next_left()?;
208 |             }
209 |         }
210 |     }
211 | 
212 |     /// New directory on the right tree.  Walk through nodes recursively to
213 |     /// discard entire tree.
214 |     fn walk_rightdir(&mut self) -> Result<()> {
215 |         loop {
216 |             if self.right.is_enter() {
217 |                 self.next_right()?;
218 |                 self.walk_rightdir()?;
219 |             } else if self.right.is_leave() {
220 |                 self.next_right()?;
221 |                 return Ok(());
222 |             } else {
223 |                 self.next_right()?;
224 |             }
225 |         }
226 |     }
227 | 
228 |     /// Print a message about something added (the name will be the thing
229 |     /// on the right.
230 |     fn show_add(&self, dir: &Path) {
231 |         println!(
232 |             "+ {:22} {:?}",
233 |             self.right.kind(),
234 |             dir.join(self.right.name())
235 |         );
236 |     }
237 | 
238 |     /// Print a message about something removed (the name will be the thing
239 |     /// on the left.
240 |     fn show_delete(&self, dir: &Path) {
241 |         println!("- {:22} {:?}", self.left.kind(), dir.join(self.left.name()));
242 |     }
243 | 
244 |     /// Compare the two "Enter" nodes we are visiting.
245 |     fn compare_enter(&mut self, dir: &Path) -> Result<()> {
246 |         self.compare_atts('d', dir)
247 |     }
248 | 
249 |     /// Compare two file nodes.
250 |     fn compare_file(&mut self, dir: &Path) -> Result<()> {
251 |         self.compare_atts('f', dir)
252 |     }
253 | 
254 |     /// Attribute comparison.
255 |     fn compare_atts(&mut self, _kind: char, dir: &Path) -> Result<()> {
256 |         let mut old = self.left.atts().unwrap().clone();
257 |         let mut new = self.right.atts().unwrap().clone();
258 |         let mut diffs = vec![];
259 | 
260 |         for att in self.ignore.iter() {
261 |             old.remove(att);
262 |             new.remove(att);
263 |         }
264 | 
265 |         for (k, v) in &new {
266 |             match old.get(k) {
267 |                 None => {
268 |                     // This attribute is in the new tree, but not the old
269 |                     // one, warn, but only once.
270 |                     if !self.adds.contains(k) {
271 |                         error!("Added attribute: {}", k);
272 |                         self.adds.insert(k.clone());
273 |                     }
274 |                 }
275 |                 Some(ov) => {
276 |                     if v != ov {
277 |                         diffs.push(k.clone());
278 |                     }
279 |                 }
280 |             }
281 |             old.remove(k);
282 |         }
283 | 
284 |         for k in old.keys() {
285 |             if !self.missings.contains(k) {
286 |                 error!("Missing attribute: {}", k);
287 |                 self.missings.insert(k.clone());
288 |             }
289 |         }
290 | 
291 |         if !diffs.is_empty() {
292 |             let mut buf = String::new();
293 |             diffs.sort();
294 |             for d in &diffs {
295 |                 if !buf.is_empty() {
296 |                     buf.push(',');
297 |                 }
298 |                 buf.push_str(&d);
299 |             }
300 |             println!("  [{:<20}] {:?}", buf, dir);
301 |         }
302 | 
303 |         Ok(())
304 |     }
305 | }
306 | 


--------------------------------------------------------------------------------
/src/node/fs.rs:
--------------------------------------------------------------------------------
  1 | /// Sure tree scanning from the filesystem.
  2 | use crate::{
  3 |     escape::Escape, node::SureNode, progress::ScanProgress, surefs::encode_atts, suretree::AttMap,
  4 |     Error, Result,
  5 | };
  6 | use log::error;
  7 | use std::{
  8 |     collections::VecDeque,
  9 |     fs::{self, symlink_metadata, Metadata},
 10 |     os::unix::prelude::*,
 11 |     path::{Path, PathBuf},
 12 | };
 13 | 
 14 | pub fn walk<P: AsRef<Path>>(root: P) -> Result<()> {
 15 |     for entry in scan_fs(root)? {
 16 |         let entry = entry?;
 17 |         println!("{:?}", entry);
 18 |     }
 19 | 
 20 |     Ok(())
 21 | }
 22 | 
 23 | /// A filesystem scanner walks a filesystem, iterating over a tree as it is
 24 | /// encountered.
 25 | pub fn scan_fs<P: AsRef<Path>>(root: P) -> Result<ScanIterator> {
 26 |     let root = root.as_ref().to_path_buf();
 27 |     let meta = symlink_metadata(&root)?;
 28 | 
 29 |     if !meta.is_dir() {
 30 |         return Err(Error::RootMustBeDir);
 31 |     }
 32 | 
 33 |     let atts = encode_atts(&root, &meta);
 34 |     let root_dev = meta.dev();
 35 |     let mut todo = VecDeque::new();
 36 |     todo.push_back(AugNode::SubDir {
 37 |         path: root,
 38 |         name: "__root__".to_string(),
 39 |         meta,
 40 |         atts,
 41 |     });
 42 | 
 43 |     let si = ScanIterator {
 44 |         todo,
 45 |         root_dev,
 46 |         progress: ScanProgress::new(),
 47 |     };
 48 | 
 49 |     Ok(si)
 50 | }
 51 | 
 52 | pub struct ScanIterator {
 53 |     todo: VecDeque<AugNode>,
 54 |     root_dev: u64,
 55 |     progress: ScanProgress,
 56 | }
 57 | 
 58 | impl Iterator for ScanIterator {
 59 |     type Item = Result<SureNode>;
 60 | 
 61 |     fn next(&mut self) -> Option<Result<SureNode>> {
 62 |         match self.todo.pop_front() {
 63 |             None => None,
 64 |             Some(AugNode::Normal(e)) => Some(Ok(e)),
 65 |             Some(AugNode::SubDir {
 66 |                 path,
 67 |                 name,
 68 |                 atts,
 69 |                 meta,
 70 |             }) => {
 71 |                 // Push the contents of this directory.  Unless we have
 72 |                 // crossed a mountpoint.
 73 |                 if !meta.is_dir() || meta.dev() == self.root_dev {
 74 |                     match self.push_dir(&path) {
 75 |                         Ok(()) => (),
 76 |                         Err(e) => return Some(Err(e)),
 77 |                     };
 78 |                 } else {
 79 |                     self.push_empty_dir();
 80 |                 }
 81 | 
 82 |                 Some(Ok(SureNode::Enter { name, atts }))
 83 |             }
 84 |         }
 85 |     }
 86 | }
 87 | 
 88 | impl ScanIterator {
 89 |     fn push_dir(&mut self, path: &Path) -> Result<()> {
 90 |         let mut entries = vec![];
 91 | 
 92 |         match fs::read_dir(path) {
 93 |             Ok(dir) => {
 94 |                 for entry in dir {
 95 |                     let entry = match entry {
 96 |                         Ok(ent) => ent,
 97 |                         Err(err) => {
 98 |                             error!("Unable to read from dir: {:?} ({})", path, err);
 99 |                             break;
100 |                         }
101 |                     };
102 |                     entries.push(entry);
103 |                 }
104 |             }
105 |             Err(e) => {
106 |                 // Warn about the issue, but otherwise continue, with just an empty directory.
107 |                 error!("Unable to read dir: {:?} ({})", path, e);
108 |             }
109 |         };
110 | 
111 |         // Sort by inode first.  This helps performance on some filesystems
112 |         // (such as ext4).
113 |         entries.sort_by_key(|a| a.ino());
114 | 
115 |         let mut files: Vec<_> = entries
116 |             .iter()
117 |             .filter_map(|e| match e.metadata() {
118 |                 Ok(m) => {
119 |                     let path = e.path();
120 |                     let atts = encode_atts(&path, &m);
121 | 
122 |                     Some(OneFile {
123 |                         path,
124 |                         meta: m,
125 |                         atts,
126 |                     })
127 |                 }
128 |                 Err(err) => {
129 |                     error!("Unable to stat file: {:?} ({})", e.path(), err);
130 |                     None
131 |                 }
132 |             })
133 |             .collect();
134 | 
135 |         // Sort them back by name.
136 |         files.sort_by(|a, b| a.path.file_name().cmp(&b.path.file_name()));
137 | 
138 |         let (dirs, files): (Vec<_>, Vec<_>) = files.into_iter().partition(|n| n.meta.is_dir());
139 | 
140 |         self.progress.update(
141 |             dirs.len() as u64,
142 |             files.len() as u64,
143 |             files.iter().map(|x| x.meta.len()).sum(),
144 |         );
145 | 
146 |         self.todo.push_front(AugNode::Normal(SureNode::Leave));
147 | 
148 |         // The files in reverse order.
149 |         for f in files.into_iter().rev() {
150 |             self.todo.push_front(AugNode::Normal(SureNode::File {
151 |                 name: f.path.file_name().unwrap().as_bytes().escaped(),
152 |                 atts: f.atts,
153 |             }));
154 |         }
155 | 
156 |         self.todo.push_front(AugNode::Normal(SureNode::Sep));
157 | 
158 |         // The dirs in reverse order.
159 |         for d in dirs.into_iter().rev() {
160 |             let name = d.path.file_name().unwrap().as_bytes().escaped();
161 |             self.todo.push_front(AugNode::SubDir {
162 |                 path: d.path,
163 |                 name,
164 |                 meta: d.meta,
165 |                 atts: d.atts,
166 |             });
167 |         }
168 | 
169 |         Ok(())
170 |     }
171 | 
172 |     /// Pushes the Sep and Leave needed to make an empty directory work.
173 |     /// Used when skipping directories that cross mountpoints.
174 |     fn push_empty_dir(&mut self) {
175 |         self.todo.push_front(AugNode::Normal(SureNode::Leave));
176 |         self.todo.push_front(AugNode::Normal(SureNode::Sep));
177 |     }
178 | }
179 | 
180 | struct OneFile {
181 |     path: PathBuf,
182 |     meta: Metadata,
183 |     atts: AttMap,
184 | }
185 | 
186 | /// Augmented entries.  This intersperses regular nodes with special ones
187 | /// containing enough information to add subdirectories.
188 | enum AugNode {
189 |     Normal(SureNode),
190 |     SubDir {
191 |         path: PathBuf,
192 |         name: String,
193 |         meta: Metadata,
194 |         atts: AttMap,
195 |     },
196 | }
197 | 


--------------------------------------------------------------------------------
/src/node/fullpath.rs:
--------------------------------------------------------------------------------
  1 | //! Augment an iterator over nodes with something that tracks the full
  2 | //! path of the files involved.
  3 | //!
  4 | //! Unfortunately, Rust's Iter does not tie any lifetimes between the
  5 | //! iterator and the result of iteration (which is usually good).  This
  6 | //! makes it difficult to avoid computing these paths, however.
  7 | //!
  8 | //! If this becomes a performance bottleneck, we can come up with something
  9 | //! more complicated that avoids computing (and allocating) the result
 10 | //! paths for each node encountered.
 11 | 
 12 | use crate::{escape::Unescape, node::SureNode, Result};
 13 | use std::{
 14 |     ffi::OsString,
 15 |     os::unix::ffi::OsStringExt,
 16 |     path::{Path, PathBuf},
 17 | };
 18 | 
 19 | pub fn into_tracker<I>(iter: I, root: &str) -> impl Iterator<Item = Result<PathedNode>>
 20 | where
 21 |     I: Iterator<Item = Result<SureNode>>,
 22 | {
 23 |     let root: OsString = OsStringExt::from_vec(root.unescape().unwrap());
 24 |     let mut cur = Path::new(&root).to_path_buf();
 25 |     let mut at_root = true;
 26 |     iter.map(move |node| {
 27 |         let node = node?;
 28 |         let path = match &node {
 29 |             SureNode::Enter { name, .. } => {
 30 |                 // Don't add the pseudo "__root__" directory.
 31 |                 if at_root {
 32 |                     if name != "__root__" {
 33 |                         panic!("Root directory not at root");
 34 |                     }
 35 |                     at_root = false;
 36 |                 } else {
 37 |                     let name: OsString = OsStringExt::from_vec(name.unescape().unwrap());
 38 |                     cur.push(&name);
 39 |                 }
 40 |                 Some(cur.clone())
 41 |             }
 42 |             SureNode::File { name, .. } => {
 43 |                 let name: OsString = OsStringExt::from_vec(name.unescape().unwrap());
 44 |                 cur.push(&name);
 45 |                 Some(cur.clone())
 46 |             }
 47 |             _ => None,
 48 |         };
 49 | 
 50 |         let do_pop = node.is_file() || node.is_leave();
 51 | 
 52 |         let result = Ok(PathedNode { node, path });
 53 | 
 54 |         if do_pop {
 55 |             cur.pop();
 56 |         }
 57 | 
 58 |         result
 59 |     })
 60 | }
 61 | 
 62 | #[derive(Debug)]
 63 | pub struct PathedNode {
 64 |     pub node: SureNode,
 65 |     pub path: Option<PathBuf>,
 66 | }
 67 | 
 68 | /*
 69 | pub trait PathTrack: Sized {
 70 |     fn into_tracker(self, root: &str) -> PathTracker<Self>;
 71 | }
 72 | 
 73 | impl<I: Iterator<Item = Result<SureNode>>> PathTrack for I {
 74 |     fn into_tracker(self, root: &str) -> PathTracker<I> {
 75 |         PathTracker {
 76 |             iter: self,
 77 |             root: Some(root.to_owned()),
 78 |             dirs: vec![],
 79 |         }
 80 |     }
 81 | }
 82 | 
 83 | pub struct PathTracker<I> {
 84 |     iter: I,
 85 |     root: Option<String>,
 86 |     dirs: Vec<String>,
 87 | }
 88 | 
 89 | #[derive(Debug)]
 90 | pub struct PathedNode {
 91 |     pub node: SureNode,
 92 |     pub path: Option<String>,
 93 | }
 94 | 
 95 | impl<I> Iterator for PathTracker<I>
 96 |     where I: Iterator<Item = Result<SureNode>>,
 97 | {
 98 |     type Item = Result<PathedNode>;
 99 | 
100 |     fn next(&mut self) -> Option<Result<PathedNode>> {
101 |         match self.iter.next() {
102 |             None => None,
103 |             Some(Err(e)) => Some(Err(e)),
104 |             Some(Ok(node)) => {
105 |                 let path = match &node {
106 |                     SureNode::Enter { name, .. } => {
107 |                         // Don't add the pseudo "__root__ flag.
108 |                         if self.dirs.is_empty() && name == "__root__" {
109 |                             let root = self.root.take().unwrap();
110 |                             self.dirs.push(root);
111 |                         } else {
112 |                             self.dirs.push(name.clone());
113 |                         }
114 |                         Some(self.dirs.join("/"))
115 |                     }
116 |                     SureNode::File { name, .. } => {
117 |                         self.dirs.push(name.clone());
118 |                         Some(self.dirs.join("/"))
119 |                     }
120 |                     _ => None,
121 |                 };
122 | 
123 |                 let do_pop = node.is_file() || node.is_leave();
124 | 
125 |                 let result = Some(Ok(PathedNode {
126 |                     node: node,
127 |                     path: path,
128 |                 }));
129 | 
130 |                 if do_pop {
131 |                     self.dirs.pop();
132 |                 }
133 | 
134 |                 result
135 |             }
136 |         }
137 |     }
138 | }
139 | */
140 | 


--------------------------------------------------------------------------------
/src/node/hashes.rs:
--------------------------------------------------------------------------------
  1 | //! Hash updates for node-based sure file.
  2 | 
  3 | use crate::{
  4 |     hashes::{hash_file, noatime_open, Estimate},
  5 |     node::{into_tracker, NodeWriter, SureNode},
  6 |     progress::Progress,
  7 |     store::{Store, TempCleaner},
  8 |     Error, Result,
  9 | };
 10 | use crossbeam::channel::{bounded, Sender};
 11 | use data_encoding::HEXLOWER;
 12 | use log::{debug, error};
 13 | use rusqlite::{types::ToSql, Connection};
 14 | use std::{
 15 |     cmp::Ordering,
 16 |     io::Write,
 17 |     mem,
 18 |     path::PathBuf,
 19 |     sync::{mpsc::sync_channel, Arc, Mutex},
 20 |     thread,
 21 | };
 22 | 
 23 | /// A Source is something that can repeatedly give us an iterator over
 24 | /// nodes.
 25 | pub trait Source {
 26 |     fn iter(&self) -> Result<Box<dyn Iterator<Item = Result<SureNode>> + Send>>;
 27 | }
 28 | 
 29 | /// The HashUpdater is able to update hashes.  This is the first pass.
 30 | pub struct HashUpdater<'n, S> {
 31 |     source: S,
 32 |     store: &'n dyn Store,
 33 | }
 34 | 
 35 | pub struct HashMerger<S> {
 36 |     source: S,
 37 |     conn: Connection,
 38 |     // Own the temp, so it won't be deleted until the connection is also
 39 |     // closed.
 40 |     _temp: Box<dyn TempCleaner>,
 41 | }
 42 | 
 43 | impl<'a, S: Source> HashUpdater<'a, S> {
 44 |     pub fn new(source: S, store: &dyn Store) -> HashUpdater<S> {
 45 |         HashUpdater { source, store }
 46 |     }
 47 | 
 48 |     /// First pass.  Go through the source nodes, and for any that need a
 49 |     /// hash, compute the hash, and collect the results into a temporary
 50 |     /// file.  Consumes the updater, returning the HashMerger which is used
 51 |     /// to merge the hash results into a datastream.
 52 |     pub fn compute(mut self, base: &str, estimate: &Estimate) -> Result<HashMerger<S>> {
 53 |         let meter = Arc::new(Mutex::new(Progress::new(estimate.files, estimate.bytes)));
 54 |         let (mut conn, temp) = self.setup_db()?;
 55 | 
 56 |         let (tx, rx) = sync_channel(num_cpus::get());
 57 | 
 58 |         let iter = into_tracker(self.source.iter()?, base);
 59 |         let mut count = 0;
 60 |         let meter2 = meter.clone();
 61 |         thread::spawn(move || {
 62 |             for entry in iter {
 63 |                 let entry = entry.unwrap();
 64 |                 if entry.node.needs_hash() {
 65 |                     let path = entry.path.unwrap();
 66 |                     match noatime_open(&path) {
 67 |                         Ok(mut fd) => match hash_file(&mut fd) {
 68 |                             Ok(ref h) => {
 69 |                                 tx.send(Some(HashInfo {
 70 |                                     id: count,
 71 |                                     hash: h.as_ref().to_owned(),
 72 |                                 }))
 73 |                                 .unwrap();
 74 |                             }
 75 |                             Err(e) => {
 76 |                                 error!("Unable to hash file: '{:?}' ({})", path, e);
 77 |                             }
 78 |                         },
 79 |                         Err(e) => {
 80 |                             error!("Unable to open '{:?}' for hashing ({})", path, e);
 81 |                         }
 82 |                     }
 83 |                     // println!("{} {:?}", count, entry.path);
 84 |                     count += 1;
 85 | 
 86 |                     meter2.lock().unwrap().update(1, entry.node.size());
 87 |                 }
 88 |             }
 89 |             tx.send(None).unwrap();
 90 |         });
 91 | 
 92 |         // The above will send Option<HashInfo> over the tx/rx channel.
 93 |         // Capture these and add them all to the database.
 94 |         let trans = conn.transaction()?;
 95 |         while let Some(info) = rx.recv()? {
 96 |             trans.execute(
 97 |                 "INSERT INTO hashes (id, hash) VALUES (?1, ?2)",
 98 |                 &[&info.id as &dyn ToSql, &info.hash as &dyn ToSql],
 99 |             )?;
100 |         }
101 |         trans.commit()?;
102 | 
103 |         meter.lock().unwrap().flush();
104 |         Ok(HashMerger {
105 |             source: self.source,
106 |             conn,
107 |             _temp: temp,
108 |         })
109 |     }
110 | 
111 |     /// First pass, multi-threaded version.  Go through the source nodes,
112 |     /// and for any that need a hash, compute the hash, and collect the
113 |     /// result into a temporary file.  Consumes the updater, returning the
114 |     /// HashMerger which is used to merge the hash results into a
115 |     /// datastream.
116 |     pub fn compute_parallel(mut self, base: &str, estimate: &Estimate) -> Result<HashMerger<S>> {
117 |         let meter = Arc::new(Mutex::new(Progress::new(estimate.files, estimate.bytes)));
118 |         let iter = into_tracker(self.source.iter()?, base);
119 |         let (mut conn, temp) = self.setup_db()?;
120 |         let trans = conn.transaction()?;
121 | 
122 |         let meter2 = meter.clone();
123 |         crossbeam::scope(move |s| {
124 |             let ncpu = num_cpus::get();
125 | 
126 |             // The work channel.  Single sender, multiple receivers (one
127 |             // for each CPU).
128 |             let (work_send, work_recv) = bounded(ncpu);
129 | 
130 |             // The result channel.  Multiple senders, single receiver.
131 |             let (result_send, result_recv) = bounded(ncpu);
132 | 
133 |             // This thread reads the nodes, and submits work requests for
134 |             // them.  This will close the channel when it finishes, as the
135 |             // work_send is moved in.
136 |             s.spawn(move |_| {
137 |                 let mut count = 0;
138 |                 for entry in iter {
139 |                     let entry = entry.unwrap(); // TODO: Handle error.
140 |                     if entry.node.needs_hash() {
141 |                         let path = entry.path.unwrap();
142 |                         work_send
143 |                             .send(HashWork {
144 |                                 id: count,
145 |                                 path,
146 |                                 size: entry.node.size(),
147 |                             })
148 |                             .unwrap();
149 |                         count += 1;
150 |                     }
151 |                 }
152 |             });
153 | 
154 |             // Fire off a thread for each worker.
155 |             for _ in 0..ncpu {
156 |                 let work_recv = work_recv.clone();
157 |                 let result_send = result_send.clone();
158 |                 let meter2 = meter2.clone();
159 |                 s.spawn(move |_| {
160 |                     for work in work_recv {
161 |                         hash_one_file(&work, &result_send, &meter2);
162 |                     }
163 |                 });
164 |             }
165 |             drop(result_send);
166 | 
167 |             // And, in the main thread, take all of the results, and add
168 |             // them to the sql database.
169 |             for info in result_recv {
170 |                 trans
171 |                     .execute(
172 |                         "INSERT INTO hashes (id, hash) VALUES (?1, ?2)",
173 |                         &[&info.id as &dyn ToSql, &info.hash as &dyn ToSql],
174 |                     )
175 |                     .unwrap();
176 |             }
177 |             trans.commit()?;
178 |             ok_result()
179 |         })
180 |         .map_err(|e| Error::Hash(format!("{:?}", e)))??;
181 | 
182 |         meter.lock().unwrap().flush();
183 |         Ok(HashMerger {
184 |             source: self.source,
185 |             conn,
186 |             _temp: temp,
187 |         })
188 |     }
189 | 
190 |     /// Set up the sqlite database to hold the hash updates.
191 |     fn setup_db(&mut self) -> Result<(Connection, Box<dyn TempCleaner>)> {
192 |         // Create the temp file.  Discard the file so that it will be
193 |         // closed.
194 |         let tmp = self.store.make_temp()?.into_loader()?;
195 |         let conn = Connection::open(tmp.path_ref())?;
196 |         conn.execute(
197 |             "CREATE TABLE hashes (
198 |                 id INTEGER PRIMARY KEY,
199 |                 hash BLOB)",
200 |             [],
201 |         )?;
202 | 
203 |         Ok((conn, tmp.into_cleaner()?))
204 |     }
205 | }
206 | 
207 | fn hash_one_file(work: &HashWork, sender: &Sender<HashInfo>, meter: &Arc<Mutex<Progress>>) {
208 |     match noatime_open(&work.path) {
209 |         Ok(mut fd) => match hash_file(&mut fd) {
210 |             Ok(ref h) => {
211 |                 sender
212 |                     .send(HashInfo {
213 |                         id: work.id,
214 |                         hash: h.as_ref().to_owned(),
215 |                     })
216 |                     .unwrap();
217 |             }
218 |             Err(e) => {
219 |                 error!("Unable to hash file: '{:?}' ({})", work.path, e);
220 |             }
221 |         },
222 |         Err(e) => {
223 |             error!("Unable to open '{:?}' for hashing ({})", work.path, e);
224 |         }
225 |     }
226 |     meter.lock().unwrap().update(1, work.size);
227 | }
228 | 
229 | // To make it easier to return a typed result.
230 | fn ok_result() -> Result<()> {
231 |     Ok(())
232 | }
233 | 
234 | impl<S: Source> HashMerger<S> {
235 |     /// Second pass.  Merge the updated hashes back into the data.  Note
236 |     /// that this is 'push' based instead of 'pull' because there is a
237 |     /// chain of lifetime dependencies from Connection->Statement->Rows and
238 |     /// if we tried to return something holding the Rows iterator, the user
239 |     /// would have to manage these lifetimes.
240 |     pub fn merge<W: Write>(self, writer: &mut NodeWriter<W>) -> Result<()> {
241 |         let mut stmt = self
242 |             .conn
243 |             .prepare("SELECT id, hash FROM hashes ORDER BY id")?;
244 |         let mut hash_iter = stmt
245 |             .query_map([], |row| {
246 |                 Ok(HashInfo {
247 |                     id: row.get(0)?,
248 |                     hash: row.get(1)?,
249 |                 })
250 |             })?
251 |             .peekable();
252 | 
253 |         let mut count = 0;
254 |         for entry in self.source.iter()? {
255 |             let mut entry = entry?;
256 |             if entry.needs_hash() {
257 |                 let hnode = loop {
258 |                     match hash_iter.peek() {
259 |                         Some(Ok(hnode)) => {
260 |                             match count.cmp(&hnode.id) {
261 |                                 Ordering::Equal => {
262 |                                     let node = hash_iter.next().unwrap()?;
263 |                                     break Some(node);
264 |                                 }
265 |                                 Ordering::Less => {
266 |                                     // Node not present in hash, means we
267 |                                     // weren't able to compute a hash of the
268 |                                     // file.
269 |                                     break None;
270 |                                 }
271 |                                 _ => panic!("Out of sequence hash"),
272 |                             }
273 |                         }
274 |                         Some(Err(e)) => {
275 |                             return Err(Error::WrappedSql(format!("{:?}", e)));
276 |                         }
277 |                         None => break None,
278 |                     }
279 |                 };
280 | 
281 |                 if let Some(HashInfo { hash, .. }) = &hnode {
282 |                     let hex = HEXLOWER.encode(hash);
283 |                     entry.atts_mut().unwrap().insert("sha1".to_string(), hex);
284 |                 }
285 | 
286 |                 count += 1;
287 |             }
288 |             writer.write_node(&entry)?;
289 |             // println!("{:?}", entry);
290 |         }
291 | 
292 |         Ok(())
293 |     }
294 | }
295 | 
296 | #[derive(Debug)]
297 | struct HashInfo {
298 |     id: i64,
299 |     hash: Vec<u8>,
300 | }
301 | 
302 | #[derive(Debug)]
303 | struct HashWork {
304 |     id: i64,
305 |     size: u64,
306 |     path: PathBuf,
307 | }
308 | 
309 | /// An iterator that pulls hash from old nodes if the file is unchanged.
310 | pub struct HashCombiner<Iold: Iterator, Inew: Iterator> {
311 |     // This works like Peekable, but we keep the head in this structure and
312 |     // swap it out to advance.  Because the nodes are a strict tree
313 |     // traversal, we always have a node to view, which makes this simpler
314 |     // to use than Peekable, where every call can return a node or a
315 |     // failure.
316 |     /// The current head of the left tree.
317 |     left: SureNode,
318 |     /// The current head of the right tree.
319 |     right: SureNode,
320 | 
321 |     /// The iterator for the left node.
322 |     left_iter: Iold,
323 |     /// The iterator for the right node.
324 |     right_iter: Inew,
325 | 
326 |     state: Vec<CombineState>,
327 |     seen_root: bool,
328 | }
329 | 
330 | #[derive(Debug)]
331 | enum CombineState {
332 |     // Discard one tree level on the left side, we are viewing the dir
333 |     // nodes.
334 |     LeftDirs,
335 | 
336 |     // We are passing through the tree on the right.  Visiting the dir
337 |     // nodes.
338 |     RightDirs,
339 | 
340 |     // We are in a common directory, visiting the dir nodes.
341 |     SameDirs,
342 | 
343 |     // We are in a common directory, visiting the file nodes.
344 |     SameFiles,
345 | }
346 | 
347 | impl<Iold, Inew> HashCombiner<Iold, Inew>
348 | where
349 |     Iold: Iterator<Item = Result<SureNode>>,
350 |     Inew: Iterator<Item = Result<SureNode>>,
351 | {
352 |     pub fn new(mut left_iter: Iold, mut right_iter: Inew) -> Result<HashCombiner<Iold, Inew>> {
353 |         let left = match left_iter.next() {
354 |             None => return Err(Error::EmptyLeftIterator),
355 |             Some(Err(e)) => return Err(e),
356 |             Some(Ok(node)) => node,
357 |         };
358 |         let right = match right_iter.next() {
359 |             None => return Err(Error::EmptyRightIterator),
360 |             Some(Err(e)) => return Err(e),
361 |             Some(Ok(node)) => node,
362 |         };
363 | 
364 |         Ok(HashCombiner {
365 |             left,
366 |             right,
367 |             left_iter,
368 |             right_iter,
369 |             state: vec![],
370 |             seen_root: false,
371 |         })
372 |     }
373 | 
374 |     /// Advance the left iterator, replacing 'left' with the new value, and
375 |     /// returning that old value.  Returns the error from the iterator if
376 |     /// that happened.  If we see the end of the iterator, places 'Leave'
377 |     /// in the node, which should be the same as what was there.
378 |     fn next_left(&mut self) -> Result<SureNode> {
379 |         let next = match self.left_iter.next() {
380 |             None => SureNode::Leave,
381 |             Some(Ok(node)) => node,
382 |             Some(Err(e)) => return Err(e),
383 |         };
384 | 
385 |         Ok(mem::replace(&mut self.left, next))
386 |     }
387 | 
388 |     /// Advance the right iterator, replacing 'right' with the new value, and
389 |     /// returning that old value.  Returns the error from the iterator if
390 |     /// that happened.  If we see the end of the iterator, places 'Leave'
391 |     /// in the node, which should be the same as what was there.
392 |     fn next_right(&mut self) -> Result<SureNode> {
393 |         let next = match self.right_iter.next() {
394 |             None => SureNode::Leave,
395 |             Some(Ok(node)) => node,
396 |             Some(Err(e)) => return Err(e),
397 |         };
398 | 
399 |         Ok(mem::replace(&mut self.right, next))
400 |     }
401 | }
402 | 
403 | /// The result of one of the visitors.  Continue means to go ahead and
404 | /// process the next nodes.  Return means that this result should be
405 | /// returned.  Note that we handle the EoF case specially, so this is not
406 | /// an option.
407 | enum VisitResult {
408 |     Continue,
409 |     Node(SureNode),
410 | }
411 | 
412 | macro_rules! vre {
413 |     ($err:expr) => {
414 |         Err($err)
415 |     };
416 | }
417 | 
418 | macro_rules! vro {
419 |     ($result:expr) => {
420 |         Ok(VisitResult::Node($result))
421 |     };
422 | }
423 | 
424 | // The iterator for the hash combiner.  This iterator lazily traverses two
425 | // iterators that are assumed to be and old and new traversal of the same
426 | // filesystem.  The output will be the same nodes as the new, but possibly
427 | // with 'sha1' values carried over from the old tree when there is a
428 | // sufficient match.
429 | impl<Iold, Inew> Iterator for HashCombiner<Iold, Inew>
430 | where
431 |     Iold: Iterator<Item = Result<SureNode>>,
432 |     Inew: Iterator<Item = Result<SureNode>>,
433 | {
434 |     type Item = Result<SureNode>;
435 | 
436 |     fn next(&mut self) -> Option<Result<SureNode>> {
437 |         loop {
438 |             // Handle the completion state separately, so we don't have as
439 |             // many to deal with below.
440 |             if self.seen_root && self.state.is_empty() {
441 |                 return None;
442 |             }
443 | 
444 |             let vr = match self.state.pop() {
445 |                 None => self.visit_root(),
446 |                 Some(CombineState::SameDirs) => self.visit_samedir(),
447 |                 Some(CombineState::SameFiles) => self.visit_samefiles(),
448 |                 Some(CombineState::RightDirs) => self.visit_rightdirs(),
449 |                 Some(CombineState::LeftDirs) => self.visit_leftdirs(),
450 |             };
451 | 
452 |             match vr {
453 |                 Ok(VisitResult::Continue) => (),
454 |                 Ok(VisitResult::Node(node)) => return Some(Ok(node)),
455 |                 Err(e) => return Some(Err(e)),
456 |             }
457 |         }
458 |     }
459 | }
460 | 
461 | // The body, a method for each state.
462 | impl<Iold, Inew> HashCombiner<Iold, Inew>
463 | where
464 |     Iold: Iterator<Item = Result<SureNode>>,
465 |     Inew: Iterator<Item = Result<SureNode>>,
466 | {
467 |     fn visit_root(&mut self) -> Result<VisitResult> {
468 |         if !self.left.is_enter() {
469 |             vre!(Error::UnexpectedLeftNode)
470 |         } else if !self.right.is_enter() {
471 |             vre!(Error::UnexpectedRightNode)
472 |         } else if self.left.name() != "__root__" || self.right.name() != "__root__" {
473 |             vre!(Error::IncorrectName)
474 |         } else {
475 |             let _ = self.next_left()?;
476 |             let rnode = self.next_right()?;
477 |             self.state.push(CombineState::SameDirs);
478 |             self.seen_root = true;
479 |             vro!(rnode)
480 |         }
481 |     }
482 | 
483 |     // Both trees are in the same directory, and we are looking at
484 |     // directory nodes.
485 |     fn visit_samedir(&mut self) -> Result<VisitResult> {
486 |         // Handle the cases where they aren't finished together.
487 |         debug!("visit samedir: {:?}, {:?}", self.left, self.right);
488 |         match (self.left.is_sep(), self.right.is_sep()) {
489 |             (true, true) => {
490 |                 // Both have finished with child directories.
491 |                 let _ = self.next_left()?;
492 |                 let rnode = self.next_right()?;
493 |                 // Push the new state.
494 |                 self.state.push(CombineState::SameFiles);
495 |                 vro!(rnode)
496 |             }
497 |             (false, false) => {
498 |                 // We are still visiting directories.  Assume it is well
499 |                 // formed, and we are only going to see Enter nodes.
500 |                 match self.left.name().cmp(&self.right.name()) {
501 |                     Ordering::Equal => {
502 |                         // This is the same directory, descend it.
503 |                         self.state.push(CombineState::SameDirs);
504 |                         self.state.push(CombineState::SameDirs);
505 |                         let _ = self.next_left()?;
506 |                         vro!(self.next_right()?)
507 |                     }
508 |                     Ordering::Less => {
509 |                         // A directory in the old tree we no longer have.
510 |                         let _ = self.next_left()?;
511 |                         self.state.push(CombineState::SameDirs);
512 |                         self.state.push(CombineState::LeftDirs);
513 |                         Ok(VisitResult::Continue)
514 |                     }
515 |                     Ordering::Greater => {
516 |                         // A new directory entirely.
517 |                         self.state.push(CombineState::SameDirs);
518 |                         self.state.push(CombineState::RightDirs);
519 |                         vro!(self.next_right()?)
520 |                     }
521 |                 }
522 |             }
523 |             (false, true) => {
524 |                 // Old has an old directory no longer present.
525 |                 let _ = self.next_left()?;
526 |                 self.state.push(CombineState::SameDirs);
527 |                 self.state.push(CombineState::LeftDirs);
528 |                 Ok(VisitResult::Continue)
529 |             }
530 |             (true, false) => {
531 |                 // Directories present in new, not in old.
532 |                 self.state.push(CombineState::SameDirs);
533 |                 self.state.push(CombineState::RightDirs);
534 |                 vro!(self.next_right()?)
535 |             }
536 |         }
537 |     }
538 | 
539 |     // Both trees are in the same directory, and we are looking at file
540 |     // nodes.
541 |     fn visit_samefiles(&mut self) -> Result<VisitResult> {
542 |         debug!("visit samefiles: {:?}, {:?}", self.left, self.right);
543 |         match (self.left.is_leave(), self.right.is_leave()) {
544 |             (true, true) => {
545 |                 // Both are leaving at the same time, nothing to push onto
546 |                 // state.  Consume the nodes, and return the leave.
547 |                 let _ = self.next_left()?;
548 |                 vro!(self.next_right()?)
549 |             }
550 |             (true, false) => {
551 |                 self.state.push(CombineState::SameFiles);
552 |                 // New file added in new, not present in old.
553 |                 vro!(self.next_right()?)
554 |             }
555 |             (false, true) => {
556 |                 // File removed.
557 |                 self.state.push(CombineState::SameFiles);
558 |                 let _ = self.next_left()?;
559 |                 Ok(VisitResult::Continue)
560 |             }
561 |             (false, false) => {
562 |                 self.state.push(CombineState::SameFiles);
563 | 
564 |                 // Two names within a directory.
565 |                 match self.left.name().cmp(&self.right.name()) {
566 |                     Ordering::Equal => {
567 |                         let left = self.next_left()?;
568 |                         let mut right = self.next_right()?;
569 |                         maybe_copy_sha(&left, &mut right);
570 |                         vro!(right)
571 |                     }
572 |                     Ordering::Less => {
573 |                         // An old name no longer present.
574 |                         let _ = self.next_left()?;
575 |                         Ok(VisitResult::Continue)
576 |                     }
577 |                     Ordering::Greater => {
578 |                         // A new name with no corresponding old name.
579 |                         vro!(self.next_right()?)
580 |                     }
581 |                 }
582 |             }
583 |         }
584 |     }
585 | 
586 |     fn visit_rightdirs(&mut self) -> Result<VisitResult> {
587 |         debug!("visit rightdirs: {:?}, {:?}", self.left, self.right);
588 |         if self.right.is_sep() {
589 |             // Since we don't care about files, or matching, no need for
590 |             // self.state.push(CombineState::RightFiles)
591 |             // the RightFiles state, just stay.
592 |             self.state.push(CombineState::RightDirs);
593 |         } else if self.right.is_enter() {
594 |             self.state.push(CombineState::RightDirs);
595 |             self.state.push(CombineState::RightDirs);
596 |         } else if self.right.is_leave() {
597 |             // No state change.
598 |         } else {
599 |             // Otherwise, stays the same.
600 |             self.state.push(CombineState::RightDirs);
601 |         }
602 |         vro!(self.next_right()?)
603 |     }
604 | 
605 |     fn visit_leftdirs(&mut self) -> Result<VisitResult> {
606 |         debug!("visit rightdirs: {:?}, {:?}", self.left, self.right);
607 |         if self.left.is_sep() {
608 |             // Since we don't care about files, or matching, no need for
609 |             // self.state.push(CombineState::RightFiles)
610 |             // the RightFiles state, just stay.
611 |             self.state.push(CombineState::LeftDirs);
612 |         } else if self.left.is_enter() {
613 |             self.state.push(CombineState::LeftDirs);
614 |             self.state.push(CombineState::LeftDirs);
615 |         } else if self.left.is_leave() {
616 |             // No state change.
617 |         } else {
618 |             // Otherwise, stays the same.
619 |             self.state.push(CombineState::LeftDirs);
620 |         }
621 |         let _ = self.next_left()?;
622 |         Ok(VisitResult::Continue)
623 |     }
624 | }
625 | 
626 | fn maybe_copy_sha(left: &SureNode, right: &mut SureNode) {
627 |     let latts = left.atts().unwrap();
628 |     let ratts = right.atts_mut().unwrap();
629 | 
630 |     // If we already have a sha1, don't do anything.
631 |     if ratts.contains_key("sha1") {
632 |         return;
633 |     }
634 | 
635 |     // Only compare regular files.
636 |     if latts["kind"] != "file" || ratts["kind"] != "file" {
637 |         return;
638 |     }
639 | 
640 |     // Make sure inode and ctime are identical.
641 |     if latts.get("ino") != ratts.get("ino") || latts.get("ctime") != ratts.get("ctime") {
642 |         return;
643 |     }
644 | 
645 |     // And only update if there is a sha1 to get.
646 |     match latts.get("sha1") {
647 |         None => (),
648 |         Some(v) => {
649 |             ratts.insert("sha1".to_string(), v.to_string());
650 |         }
651 |     }
652 | }
653 | 


--------------------------------------------------------------------------------
/src/progress.rs:
--------------------------------------------------------------------------------
  1 | //! A simple progress meter.
  2 | //!
  3 | //! Records updates of number of files visited, and number of bytes
  4 | //! processed.  When given an estimate, printes a simple periodic report of
  5 | //! how far along we think we are.
  6 | 
  7 | use env_logger::Builder;
  8 | use lazy_static::lazy_static;
  9 | use log::Log;
 10 | use std::{
 11 |     io::{stdout, Write},
 12 |     sync::Mutex,
 13 | };
 14 | use time::{Duration, OffsetDateTime};
 15 | 
 16 | // The Rust logging system (log crate) only allows a single logger to be
 17 | // logged once.  If we want to capture this, it has to be done before any
 18 | // logger is initialized.  Globally, within a mutex, we keep this simple
 19 | // state of what is happening.
 20 | struct State {
 21 |     // The last message printed.  Since an empty string an no message are
 22 |     // the same thing, we don't worry about having an option here.
 23 |     message: String,
 24 | 
 25 |     // When we next expect to update the message.
 26 |     next_update: OffsetDateTime,
 27 | 
 28 |     // Set to true if the logging system has been initialized.
 29 |     is_logging: bool,
 30 | }
 31 | 
 32 | // The SafeLogger wraps another logger, coordinating the logging with the
 33 | // state to properly interleave logs and messages.
 34 | struct SafeLogger {
 35 |     inner: Box<dyn Log>,
 36 | }
 37 | 
 38 | /// Initialize the standard logger, based on `env_logger::init()`, but
 39 | /// coordinated with any progress meters.  Like `init`, this will panic if
 40 | /// the logging system has already been initialized.
 41 | pub fn log_init() {
 42 |     let mut st = STATE.lock().unwrap();
 43 |     let inner = Builder::from_default_env().build();
 44 |     let max_level = inner.filter();
 45 | 
 46 |     let logger = SafeLogger {
 47 |         inner: Box::new(inner),
 48 |     };
 49 |     log::set_boxed_logger(Box::new(logger)).expect("Set Logger");
 50 |     log::set_max_level(max_level);
 51 | 
 52 |     st.is_logging = true;
 53 |     st.next_update = update_interval(true);
 54 | }
 55 | 
 56 | // There are two update intervals, depending on whether we are logging.
 57 | fn update_interval(is_logging: bool) -> OffsetDateTime {
 58 |     if is_logging {
 59 |         OffsetDateTime::now_utc() + Duration::milliseconds(250)
 60 |     } else {
 61 |         OffsetDateTime::now_utc() + Duration::seconds(5)
 62 |     }
 63 | }
 64 | 
 65 | lazy_static! {
 66 |     // The current global state.
 67 |     static ref STATE: Mutex<State> = Mutex::new(State {
 68 |         message: String::new(),
 69 |         next_update: update_interval(false),
 70 |         is_logging: false,
 71 |     });
 72 | }
 73 | 
 74 | impl State {
 75 |     /// Called to advance to the next message, sets the update time
 76 |     /// appropriately.
 77 |     fn next(&mut self) {
 78 |         self.next_update = update_interval(self.is_logging);
 79 |     }
 80 | 
 81 |     /// Clears the visual text of the current message (but not the message
 82 |     /// buffer itself, so that it can be redisplayed if needed).
 83 |     fn clear(&self) {
 84 |         for ch in self.message.chars() {
 85 |             if ch == '\n' {
 86 |                 print!("\x1b[1A\x1b[2K");
 87 |             }
 88 |         }
 89 |         stdout().flush().expect("safe stdout write");
 90 |     }
 91 | 
 92 |     /// Update the current message.
 93 |     fn update(&mut self, message: String) {
 94 |         self.clear();
 95 |         self.message = message;
 96 |         print!("{}", self.message);
 97 |         stdout().flush().expect("safe stdout write");
 98 |         self.next();
 99 |     }
100 | 
101 |     /// Indicates if the time has expired and another update should be
102 |     /// done.  This can be used where the formatting/allocation of the
103 |     /// update message would be slower than the possible system call needed
104 |     /// to determine the current time.
105 |     fn need_update(&self) -> bool {
106 |         OffsetDateTime::now_utc() >= self.next_update
107 |     }
108 | }
109 | 
110 | impl Log for SafeLogger {
111 |     fn enabled(&self, metadata: &log::Metadata) -> bool {
112 |         self.inner.enabled(metadata)
113 |     }
114 | 
115 |     fn log(&self, record: &log::Record) {
116 |         let enabled = self.inner.enabled(record.metadata());
117 | 
118 |         if enabled {
119 |             let st = STATE.lock().unwrap();
120 |             st.clear();
121 |             self.inner.log(record);
122 |             print!("{}", st.message);
123 |             stdout().flush().expect("safe stdout write");
124 |         }
125 |     }
126 | 
127 |     fn flush(&self) {
128 |         let st = STATE.lock().unwrap();
129 |         st.clear();
130 |         self.inner.flush();
131 |         print!("{}", st.message);
132 |         stdout().flush().expect("safe stdout write");
133 |     }
134 | }
135 | 
136 | pub struct Progress {
137 |     cur_files: u64,
138 |     total_files: u64,
139 | 
140 |     cur_bytes: u64,
141 |     total_bytes: u64,
142 | }
143 | 
144 | impl Progress {
145 |     /// Construct a progress meter, with the given number of files and
146 |     /// bytes as an estimate.
147 |     pub fn new(files: u64, bytes: u64) -> Progress {
148 |         Progress {
149 |             cur_files: 0,
150 |             total_files: files,
151 | 
152 |             cur_bytes: 0,
153 |             total_bytes: bytes,
154 |         }
155 |     }
156 | 
157 |     /// Update the progress meter.
158 |     pub fn update(&mut self, files: u64, bytes: u64) {
159 |         self.cur_files += files;
160 |         self.cur_bytes += bytes;
161 | 
162 |         let mut st = STATE.lock().unwrap();
163 |         if st.need_update() {
164 |             st.update(self.message());
165 |         }
166 |     }
167 | 
168 |     /// Flush the output, regardless of if any update is needed.
169 |     pub fn flush(&mut self) {
170 |         let mut st = STATE.lock().unwrap();
171 |         st.update(self.message());
172 | 
173 |         // Clear the current message so that we don't clear out the shown
174 |         // message.
175 |         st.message.clear();
176 |     }
177 | 
178 |     pub fn message(&self) -> String {
179 |         format!(
180 |             "{:7}/{:7} ({:5.1}%) files, {}/{} ({:5.1}%) bytes\n",
181 |             self.cur_files,
182 |             self.total_files,
183 |             (self.cur_files as f64 * 100.0) / self.total_files as f64,
184 |             humanize(self.cur_bytes),
185 |             humanize(self.total_bytes),
186 |             (self.cur_bytes as f64 * 100.0) / self.total_bytes as f64
187 |         )
188 |     }
189 | }
190 | 
191 | /// A progress meter used when initially scanning.
192 | pub struct ScanProgress {
193 |     dirs: u64,
194 |     files: u64,
195 |     bytes: u64,
196 | }
197 | 
198 | impl ScanProgress {
199 |     /// Construct a new scanning progress meter.
200 |     pub fn new() -> ScanProgress {
201 |         ScanProgress {
202 |             dirs: 0,
203 |             files: 0,
204 |             bytes: 0,
205 |         }
206 |     }
207 | 
208 |     /// Update the meter.
209 |     pub fn update(&mut self, dirs: u64, files: u64, bytes: u64) {
210 |         self.dirs += dirs;
211 |         self.files += files;
212 |         self.bytes += bytes;
213 | 
214 |         let mut st = STATE.lock().unwrap();
215 |         if st.need_update() {
216 |             st.update(self.message());
217 |         }
218 |     }
219 | 
220 |     fn message(&self) -> String {
221 |         format!(
222 |             "scan: {} dirs {} files, {} bytes\n",
223 |             self.dirs,
224 |             self.files,
225 |             humanize(self.bytes)
226 |         )
227 |     }
228 | }
229 | 
230 | impl Drop for ScanProgress {
231 |     fn drop(&mut self) {
232 |         let mut st = STATE.lock().unwrap();
233 |         st.update(self.message());
234 | 
235 |         st.message.clear();
236 |     }
237 | }
238 | 
239 | /// Print a size in a more human-friendly format.
240 | pub fn humanize(value: u64) -> String {
241 |     let mut value = value as f64;
242 |     let mut unit = 0;
243 | 
244 |     while value > 1024.0 {
245 |         value /= 1024.0;
246 |         unit += 1;
247 |     }
248 | 
249 |     static UNITS: [&str; 9] = [
250 |         "B  ", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB",
251 |     ];
252 | 
253 |     let precision = if value < 10.0 {
254 |         3
255 |     } else if value < 100.0 {
256 |         2
257 |     } else {
258 |         1
259 |     };
260 | 
261 |     format!("{:6.*}{}", precision, value, UNITS[unit])
262 | }
263 | 


--------------------------------------------------------------------------------
/src/show.rs:
--------------------------------------------------------------------------------
 1 | // Show module.
 2 | 
 3 | use crate::{Result, Store, Version};
 4 | 
 5 | pub fn show_tree(store: &dyn Store) -> Result<()> {
 6 |     for node in store.load_iter(Version::Latest)? {
 7 |         let node = node?;
 8 |         println!("{:?}", node);
 9 |     }
10 |     Ok(())
11 | }
12 | 


--------------------------------------------------------------------------------
/src/store.rs:
--------------------------------------------------------------------------------
  1 | // Surefile store
  2 | 
  3 | use crate::{Error, Result, SureNode};
  4 | use chrono::{DateTime, Utc};
  5 | use log::info;
  6 | use std::{
  7 |     collections::BTreeMap,
  8 |     io::{BufRead, Write},
  9 |     path::Path,
 10 | };
 11 | 
 12 | mod weave;
 13 | 
 14 | pub use self::weave::WeaveStore;
 15 | use self::weave::Compression;
 16 | 
 17 | /// Tags are just key/value pairs.  Both key and value should be printable strings.
 18 | pub type StoreTags = BTreeMap<String, String>;
 19 | 
 20 | /// Something that can store and retrieve SureTrees.
 21 | pub trait Store {
 22 |     /// Retrieve the available versions, in the store.  These should be listed, newest first.
 23 |     fn get_versions(&self) -> Result<Vec<StoreVersion>>;
 24 | 
 25 |     /// Load the specified version, returning an iterator over the nodes.
 26 |     fn load_iter(&self, version: Version) -> Result<Box<dyn Iterator<Item = Result<SureNode>>>>;
 27 | 
 28 |     /// Create a temporary storage location.
 29 |     fn make_temp(&self) -> Result<Box<dyn TempFile + '_>>;
 30 | 
 31 |     /// Create a writer for a new version.
 32 |     fn make_new(&self, tags: &StoreTags) -> Result<Box<dyn StoreWriter + '_>>;
 33 | }
 34 | 
 35 | /// A TempFile is a temporary storage location that can be written to, and
 36 | /// then committed as a new version, or discarded entirely if it is
 37 | /// dropped.
 38 | /// Typical usage patterns are:
 39 | /// - Write to the file, turn into a reader to reread the data.  Will be
 40 | ///   deleted on drop.
 41 | /// - Write to the file, turn into a loader which can make multiple
 42 | ///   readers.  Will be deleted on drop.
 43 | /// - Write to the file, which can then be committed.  File will be
 44 | ///   deleted, but data merged into the latest version in the store.
 45 | pub trait TempFile<'a>: Write {
 46 |     fn into_loader(self: Box<Self>) -> Result<Box<dyn TempLoader + 'a>>;
 47 | 
 48 |     // Close the file, returning a TempCleaner that will clean up the file
 49 |     // when it is dropped.  Significantly, this has no lifetime
 50 |     // dependencies.
 51 |     fn into_cleaner(self: Box<Self>) -> Result<Box<dyn TempCleaner>>;
 52 | }
 53 | 
 54 | /// A temp file that can spawn multiple loaders.
 55 | pub trait TempLoader {
 56 |     /// Open the temp file, and return a reader on it.
 57 |     fn new_loader(&self) -> Result<Box<dyn BufRead>>;
 58 | 
 59 |     /// Return the name of the temp file.
 60 |     fn path_ref(&self) -> &Path;
 61 | 
 62 |     // Close the file, returning a TempCleaner that will clean up the file
 63 |     // when it is dropped.  Significantly, this has no lifetime
 64 |     // dependencies.
 65 |     fn into_cleaner(self: Box<Self>) -> Result<Box<dyn TempCleaner>>;
 66 | }
 67 | 
 68 | /// A Writer for adding a new version.
 69 | pub trait StoreWriter<'a>: Write {
 70 |     /// All data has been written, commit this as a new version.
 71 |     fn commit(self: Box<Self>) -> Result<()>;
 72 | }
 73 | 
 74 | pub trait TempCleaner {}
 75 | 
 76 | /// Indicator of which version of sure data to load.
 77 | #[derive(Clone, Debug)]
 78 | pub enum Version {
 79 |     Latest,
 80 |     Prior,
 81 |     Tagged(String),
 82 | }
 83 | 
 84 | impl Version {
 85 |     /// Retrieve this version as a number, or none if that makes no sense
 86 |     /// (either it is `Latest`, `Prior`, or the textual version is not an
 87 |     /// integer).
 88 |     pub fn numeric(&self) -> Option<usize> {
 89 |         match self {
 90 |             Version::Latest | Version::Prior => None,
 91 |             Version::Tagged(text) => text.parse().ok(),
 92 |         }
 93 |     }
 94 | }
 95 | 
 96 | /// Information about a given version in the store.
 97 | #[derive(Clone, Debug)]
 98 | pub struct StoreVersion {
 99 |     /// A descriptive name.  May be the "name" tag given when this version was created.
100 |     pub name: String,
101 |     /// A timestamp of when the version was made.
102 |     pub time: DateTime<Utc>,
103 |     /// The identifier for this version.
104 |     pub version: Version,
105 | }
106 | 
107 | /// Parse a command line specified path to determine the parameters and type of store desired.  The
108 | /// path can be the path to a directory.  In this case, look at possible filenames to determine the
109 | /// other parameters.  The path can also give a filename of one of the surefiles, and we will
110 | /// derive the name information from that.
111 | pub fn parse_store(text: &str) -> Result<Box<dyn Store>> {
112 |     // First determine if this path is a directory.
113 |     let p = Path::new(text);
114 |     info!("Parsing: {:?}", p);
115 | 
116 |     // If we're given an existing directory, construct a store directly from it.
117 |     // TODO: Look in the directory to see what might be there.
118 |     if p.is_dir() {
119 |         return Ok(Box::new(WeaveStore::new(p.to_path_buf(), "2sure", Compression::Gzip)));
120 |     }
121 | 
122 |     // Otherwise, try to get the parent.  If it seems to be empty, use the current directory as the
123 |     // path.
124 |     let dir = match p.parent() {
125 |         None => return Err(Error::UnknownDirectory),
126 |         Some(dir) => {
127 |             if dir.as_os_str().is_empty() {
128 |                 Path::new(".")
129 |             } else {
130 |                 dir
131 |             }
132 |         }
133 |     };
134 | 
135 |     if !dir.is_dir() {
136 |         return Err(Error::FileNotInDirectory);
137 |     }
138 | 
139 |     let base = match p.file_name() {
140 |         Some(name) => name,
141 |         None => return Err(Error::PathMissingFinalFile),
142 |     };
143 |     let base = match base.to_str() {
144 |         Some(name) => name,
145 |         None => panic!("Path came from string, yet is no longer UTF-8"),
146 |     };
147 | 
148 |     let (base, compression) = if let Some(core_name) = base.strip_suffix(".gz") {
149 |         (core_name, Compression::Gzip)
150 |     } else if let Some(core_name) = base.strip_suffix(".zstd") {
151 |         (core_name, Compression::Zstd)
152 |     } else {
153 |         (base, Compression::Plain)
154 |     };
155 | 
156 |     // Check for weave format.
157 |     if let Some(base) = base.strip_suffix(".weave") {
158 |         return Ok(Box::new(WeaveStore::new(dir, base, compression)));
159 |     }
160 | 
161 |     // Strip off known suffixes.
162 |     let base = if base.ends_with(".dat") || base.ends_with(".bak") {
163 |         &base[..base.len() - 4]
164 |     } else {
165 |         base
166 |     };
167 | 
168 |     Ok(Box::new(WeaveStore::new(dir, base, compression)))
169 | }
170 | 


--------------------------------------------------------------------------------
/src/store/weave.rs:
--------------------------------------------------------------------------------
  1 | //! SCCS-style delta weave stores.
  2 | 
  3 | use crate::{
  4 |     node,
  5 |     store::{
  6 |         Store, StoreTags, StoreVersion, StoreWriter, TempCleaner, TempFile, TempLoader, Version,
  7 |     },
  8 |     Error, Result, SureNode,
  9 | };
 10 | use std::{
 11 |     env,
 12 |     fs::{self, File},
 13 |     io::{self, BufRead, BufReader, BufWriter, Write},
 14 |     path::{Path, PathBuf},
 15 | };
 16 | use weave::{
 17 |     self, DeltaWriter, NamingConvention, NewWeave, PullParser, SimpleNaming,
 18 | };
 19 | pub use weave::Compression;
 20 | 
 21 | pub struct WeaveStore {
 22 |     naming: SimpleNaming,
 23 | }
 24 | 
 25 | impl WeaveStore {
 26 |     pub fn new<P: AsRef<Path>>(path: P, base: &str, compression: Compression) -> WeaveStore {
 27 |         WeaveStore {
 28 |             naming: SimpleNaming::new(path, base, "dat", compression),
 29 |         }
 30 |     }
 31 | }
 32 | 
 33 | impl Store for WeaveStore {
 34 |     fn get_versions(&self) -> Result<Vec<StoreVersion>> {
 35 |         let header = PullParser::new(&self.naming, 1)?.into_header();
 36 |         let mut versions: Vec<_> = header
 37 |             .deltas
 38 |             .iter()
 39 |             .map(|v| StoreVersion {
 40 |                 name: v.name.clone(),
 41 |                 time: v.time,
 42 |                 version: Version::Tagged(v.number.to_string()),
 43 |             })
 44 |             .collect();
 45 |         versions.reverse();
 46 |         Ok(versions)
 47 |     }
 48 | 
 49 |     fn load_iter(&self, version: Version) -> Result<Box<dyn Iterator<Item = Result<SureNode>>>> {
 50 |         let last = weave::get_last_delta(&self.naming)?;
 51 |         let last = match version {
 52 |             Version::Latest => last,
 53 |             Version::Prior => last - 1,
 54 |             Version::Tagged(vers) => vers.parse()?,
 55 |         };
 56 | 
 57 |         Ok(Box::new(WeaveIter::new(&self.naming, last)?))
 58 |     }
 59 | 
 60 |     fn make_temp(&self) -> Result<Box<dyn TempFile + '_>> {
 61 |         // TODO: Fixup naming to allow uncompressed writes.
 62 |         let (path, file) = self.naming.temp_file()?;
 63 |         let cpath = path.clone();
 64 |         Ok(Box::new(WeaveTemp {
 65 |             parent: self,
 66 |             path,
 67 |             file: BufWriter::new(file),
 68 |             cleaner: FileClean(cpath),
 69 |         }))
 70 |     }
 71 | 
 72 |     fn make_new(&self, tags: &StoreTags) -> Result<Box<dyn StoreWriter + '_>> {
 73 |         let itags = tags.iter().map(|(k, v)| (k.as_ref(), v.as_ref()));
 74 |         match weave::get_last_delta(&self.naming) {
 75 |             Ok(base) => {
 76 |                 let wv = DeltaWriter::new(&self.naming, itags, base)?;
 77 |                 Ok(Box::new(NewWeaveDelta { weave: wv }))
 78 |             }
 79 |             Err(_) => {
 80 |                 // Create a new weave file.
 81 |                 let wv = NewWeave::new(&self.naming, itags)?;
 82 |                 Ok(Box::new(NewWeaveWriter { weave: wv }))
 83 |             }
 84 |         }
 85 |     }
 86 | }
 87 | 
 88 | struct WeaveTemp<'a> {
 89 |     parent: &'a WeaveStore,
 90 |     path: PathBuf,
 91 |     file: BufWriter<File>,
 92 |     cleaner: FileClean,
 93 | }
 94 | 
 95 | impl<'a> TempFile<'a> for WeaveTemp<'a> {
 96 |     fn into_loader(self: Box<Self>) -> Result<Box<dyn TempLoader + 'a>> {
 97 |         drop(self.file);
 98 |         Ok(Box::new(WeaveTempLoader {
 99 |             _parent: self.parent,
100 |             path: self.path,
101 |             cleaner: self.cleaner,
102 |         }))
103 |     }
104 | 
105 |     fn into_cleaner(self: Box<Self>) -> Result<Box<dyn TempCleaner>> {
106 |         Ok(Box::new(self.cleaner))
107 |     }
108 | }
109 | 
110 | impl<'a> Write for WeaveTemp<'a> {
111 |     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
112 |         self.file.write(buf)
113 |     }
114 |     fn flush(&mut self) -> io::Result<()> {
115 |         self.file.flush()
116 |     }
117 | }
118 | 
119 | pub struct WeaveTempLoader<'a> {
120 |     _parent: &'a WeaveStore,
121 |     path: PathBuf,
122 |     cleaner: FileClean,
123 | }
124 | 
125 | impl<'a> TempLoader for WeaveTempLoader<'a> {
126 |     fn new_loader(&self) -> Result<Box<dyn BufRead>> {
127 |         let read = BufReader::new(File::open(&self.path)?);
128 |         Ok(Box::new(read))
129 |     }
130 | 
131 |     fn path_ref(&self) -> &Path {
132 |         &self.path
133 |     }
134 | 
135 |     fn into_cleaner(self: Box<Self>) -> Result<Box<dyn TempCleaner>> {
136 |         Ok(Box::new(self.cleaner))
137 |     }
138 | }
139 | 
140 | pub struct NewWeaveWriter<'a> {
141 |     weave: NewWeave<'a>,
142 | }
143 | 
144 | impl<'a> StoreWriter<'a> for NewWeaveWriter<'a> {
145 |     fn commit(self: Box<Self>) -> Result<()> {
146 |         self.weave.close()?;
147 |         Ok(())
148 |     }
149 | }
150 | 
151 | impl<'a> Write for NewWeaveWriter<'a> {
152 |     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
153 |         self.weave.write(buf)
154 |     }
155 | 
156 |     fn flush(&mut self) -> io::Result<()> {
157 |         self.weave.flush()
158 |     }
159 | }
160 | 
161 | pub struct NewWeaveDelta<'a> {
162 |     weave: DeltaWriter<'a>,
163 | }
164 | 
165 | impl<'a> StoreWriter<'a> for NewWeaveDelta<'a> {
166 |     fn commit(self: Box<Self>) -> Result<()> {
167 |         self.weave.close()?;
168 |         Ok(())
169 |     }
170 | }
171 | 
172 | impl<'a> Write for NewWeaveDelta<'a> {
173 |     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
174 |         self.weave.write(buf)
175 |     }
176 | 
177 |     fn flush(&mut self) -> io::Result<()> {
178 |         self.weave.flush()
179 |     }
180 | }
181 | 
182 | pub struct WeaveIter {
183 |     pull: Box<dyn Iterator<Item = Result<String>>>,
184 | }
185 | 
186 | impl WeaveIter {
187 |     fn new(naming: &dyn NamingConvention, delta: usize) -> Result<WeaveIter> {
188 |         let mut pull = PullParser::new(naming, delta)?.filter_map(kept_text);
189 |         fixed(&mut pull, "asure-2.0")?;
190 |         fixed(&mut pull, "-----")?;
191 |         Ok(WeaveIter {
192 |             pull: Box::new(pull),
193 |         })
194 |     }
195 | }
196 | 
197 | impl Iterator for WeaveIter {
198 |     type Item = Result<SureNode>;
199 | 
200 |     fn next(&mut self) -> Option<Result<SureNode>> {
201 |         let line = match self.pull.next() {
202 |             Some(Err(e)) => return Some(Err(e)),
203 |             Some(Ok(line)) => line,
204 |             None => return None,
205 |         };
206 |         let line = line.as_bytes();
207 | 
208 |         match line[0] {
209 |             b'd' => {
210 |                 let (dname, datts) = node::decode_entity(&line[1..]);
211 |                 Some(Ok(SureNode::Enter {
212 |                     name: dname,
213 |                     atts: datts,
214 |                 }))
215 |             }
216 |             b'f' => {
217 |                 let (fname, fatts) = node::decode_entity(&line[1..]);
218 |                 Some(Ok(SureNode::File {
219 |                     name: fname,
220 |                     atts: fatts,
221 |                 }))
222 |             }
223 |             b'-' => Some(Ok(SureNode::Sep)),
224 |             b'u' => Some(Ok(SureNode::Leave)),
225 |             ch => Some(Err(Error::InvalidSurefileChar(ch as char))),
226 |         }
227 |     }
228 | }
229 | 
230 | // Filter nodes to only include kept text lines.
231 | fn kept_text(node: weave::Result<weave::Entry>) -> Option<Result<String>> {
232 |     match node {
233 |         Err(e) => Some(Err(e.into())),
234 |         Ok(weave::Entry::Plain { text, keep }) if keep => Some(Ok(text)),
235 |         _ => None,
236 |     }
237 | }
238 | 
239 | /// Try reading a specific line from the given iterator.  Returns Err if
240 | /// the line didn't match, or something went wrong with the read.
241 | fn fixed<I>(pull: &mut I, expect: &str) -> Result<()>
242 | where
243 |     I: Iterator<Item = Result<String>>,
244 | {
245 |     match pull.next() {
246 |         Some(Ok(line)) => {
247 |             if line == expect {
248 |                 Ok(())
249 |             } else {
250 |                 Err(Error::UnexpectedLine(line, expect.into()))
251 |             }
252 |         }
253 |         Some(Err(e)) => Err(e),
254 |         None => Err(Error::SureFileEof),
255 |     }
256 | }
257 | /*
258 | fn fixed(recv: &Receiver<Option<Result<String>>>, expect: &[u8]) -> Result<()> {
259 |     match recv.recv() {
260 |         Ok(Some(Ok(line))) => {
261 |             if line.as_bytes() == expect {
262 |                 Ok(())
263 |             } else {
264 |                 Err(format_err!("Unexpect line from channel: {:?} expect {:?}", line, expect))
265 |             }
266 |         }
267 |         Ok(Some(Err(e))) => Err(format_err!("Error reading suredata: {:?}", e)),
268 |         Ok(None) => Err(format_err!("Unexpected eof reading suredata")),
269 |         Err(e) => Err(e.into()),
270 |     }
271 | }
272 | */
273 | 
274 | /// Own a PathBuf, and delete this file on drop.  This is in its own type
275 | /// for two reason. 1. I makes it easy to have cleaning in multiple types,
276 | /// passing ownership between them, and 2.  It prevents the need for those
277 | /// types to implement drop, which prevents moves out of the fields.
278 | struct FileClean(PathBuf);
279 | 
280 | impl Drop for FileClean {
281 |     fn drop(&mut self) {
282 |         if env::var_os("RSURE_KEEP").is_none() {
283 |             let _ = fs::remove_file(&self.0);
284 |         }
285 |     }
286 | }
287 | 
288 | impl TempCleaner for FileClean {}
289 | 


--------------------------------------------------------------------------------
/src/surefs.rs:
--------------------------------------------------------------------------------
 1 | // Filesystem scanning.
 2 | 
 3 | use crate::{escape::*, suretree::AttMap};
 4 | use log::error;
 5 | 
 6 | use std::{
 7 |     fs::{self, Metadata},
 8 |     os::unix::prelude::*,
 9 |     path::Path,
10 | };
11 | 
12 | // Encode the attributes for the given node.  Note that this returns, even
13 | // when there is an error (resolving a symlink).  It logs an error, and
14 | // returns a placeholder.
15 | pub(crate) fn encode_atts(name: &Path, meta: &Metadata) -> AttMap {
16 |     // let fname = name.file_name().unwrap().as_bytes().escaped();
17 |     let mode = meta.mode() as libc::mode_t & libc::S_IFMT;
18 | 
19 |     let mut base = AttMap::new();
20 | 
21 |     // These attributes apply to every node.
22 |     base.insert("uid".to_string(), meta.uid().to_string());
23 |     base.insert("gid".to_string(), meta.gid().to_string());
24 |     base.insert(
25 |         "perm".to_string(),
26 |         (meta.mode() as libc::mode_t & !libc::S_IFMT).to_string(),
27 |     );
28 | 
29 |     // Other permissions are based on the type of the node.
30 |     match mode as libc::mode_t {
31 |         libc::S_IFDIR => {
32 |             base.insert("kind".to_string(), "dir".to_string());
33 |         }
34 |         libc::S_IFREG => {
35 |             base.insert("kind".to_string(), "file".to_string());
36 |             base.insert("ino".to_string(), meta.ino().to_string());
37 |             base.insert("size".to_string(), meta.size().to_string());
38 |             time_info(&mut base, meta);
39 |             // Note that the 'sha1' attribute is computed later.
40 |         }
41 |         libc::S_IFLNK => {
42 |             base.insert("kind".to_string(), "lnk".to_string());
43 |             let link = match fs::read_link(name) {
44 |                 Ok(l) => l,
45 |                 Err(err) => {
46 |                     error!("Unable to read link: {:?} ({})", name, err);
47 |                     // TODO: Generate a unique placeholder so this will
48 |                     // always show up.
49 |                     From::from("???")
50 |                 }
51 |             };
52 |             base.insert("targ".to_string(), link.as_os_str().as_bytes().escaped());
53 |         }
54 |         libc::S_IFIFO => {
55 |             base.insert("kind".to_string(), "fifo".to_string());
56 |         }
57 |         libc::S_IFSOCK => {
58 |             base.insert("kind".to_string(), "sock".to_string());
59 |         }
60 |         libc::S_IFCHR => {
61 |             base.insert("kind".to_string(), "chr".to_string());
62 |             add_dev(&mut base, meta);
63 |         }
64 |         libc::S_IFBLK => {
65 |             base.insert("kind".to_string(), "blk".to_string());
66 |             add_dev(&mut base, meta);
67 |         }
68 |         _ => panic!("Unknown file type: 0o{:o}", mode),
69 |     }
70 | 
71 |     // println!("{:?}: atts: {:?}", fname, base);
72 |     base
73 | }
74 | 
75 | fn add_dev(base: &mut AttMap, meta: &Metadata) {
76 |     let rdev = meta.rdev();
77 |     // This is defined in a macro, and hasn't made it into libc.  Given how
78 |     // it is defined in the header, it is unlikely to change, at least on
79 |     // Linux.
80 |     base.insert("devmaj".to_string(), ((rdev >> 8) & 0xfff).to_string());
81 |     base.insert("devmin".to_string(), (rdev & 0xff).to_string());
82 | }
83 | 
84 | fn time_info(base: &mut AttMap, meta: &Metadata) {
85 |     // TODO: Handle the nsec part of the time.
86 |     base.insert("mtime".to_string(), meta.mtime().to_string());
87 |     base.insert("ctime".to_string(), meta.ctime().to_string());
88 | }
89 | 


--------------------------------------------------------------------------------
/src/suretree.rs:
--------------------------------------------------------------------------------
1 | // SureTree
2 | 
3 | use std::collections::BTreeMap;
4 | 
5 | pub type AttMap = BTreeMap<String, String>;
6 | 


--------------------------------------------------------------------------------
/tests/surefiles.rs:
--------------------------------------------------------------------------------
 1 | // Test the rsure API for save and load.
 2 | 
 3 | /*
 4 | extern crate rsure;
 5 | extern crate tempdir;
 6 | 
 7 | use rsure::{stdout_visitor, SureTree, TreeCompare};
 8 | use tempdir::TempDir;
 9 | 
10 | // Test that the API is usable.  Currently, the output only generates a
11 | // report to stdout, and doesn't return any information to the caller, so
12 | // we can only test that the calls work.  If you run the test with
13 | // "--nocapture", it should show the addition of the surefile at the end.
14 | #[test]
15 | fn save_and_load() {
16 |     let tmp = TempDir::new("rsure").unwrap();
17 |     let tree = rsure::scan_fs(tmp.path()).unwrap();
18 | 
19 |     // First surefile.
20 |     let sfile = tmp.path().join("surefile.dat.gz");
21 | 
22 |     // Save it to a file.
23 |     tree.save(&sfile).unwrap();
24 | 
25 |     // Load it back in.
26 |     let t2 = SureTree::load(&sfile).unwrap();
27 |     t2.compare_from(&mut stdout_visitor(), &tree, &sfile);
28 | 
29 |     // Rescan (should catch the newly added surefile).
30 |     let t3 = rsure::scan_fs(tmp.path()).unwrap();
31 |     t3.compare_from(&mut stdout_visitor(), &t2, tmp.path());
32 | }
33 | 
34 | // Test writing to a block.
35 | #[test]
36 | fn save_writer() {
37 |     let tmp = TempDir::new("rsure").unwrap();
38 |     let t1 = rsure::scan_fs(tmp.path()).unwrap();
39 | 
40 |     let mut sf1 = vec![];
41 |     t1.save_to(&mut sf1).unwrap();
42 |     println!("Wrote {} bytes", sf1.len());
43 | 
44 |     let t2 = SureTree::load_from(&sf1[..]).unwrap();
45 |     t2.compare_from(&mut stdout_visitor(), &t1, tmp.path());
46 | }
47 | */
48 | 


--------------------------------------------------------------------------------
/weave/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore the lock file, as this is a library.
2 | Cargo.lock
3 | 


--------------------------------------------------------------------------------
/weave/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "weave"
 3 | version = "0.4.0-dev"
 4 | authors = ["davidb"]
 5 | description = """
 6 | Weave delta file storage.  Inspired by the storage format of SCCS,
 7 | this crate allows multiple revisions of a file to be stored
 8 | efficiently in a single file.
 9 | """
10 | license = "MIT"
11 | readme = "README.rst"
12 | repository = "https://github.com/d3zd3z/rsure"
13 | edition = "2018"
14 | 
15 | [dependencies]
16 | log = "0.4"
17 | regex = "1.4"
18 | serde = "1.0"
19 | serde_json = "1.0"
20 | serde_derive = "1.0"
21 | thiserror = "1.0"
22 | chrono = { version = "0.4", features = ["serde"] }
23 | flate2 = "1.0"
24 | zstd = "0.10"
25 | 
26 | [dev-dependencies]
27 | env_logger = "0.9"
28 | rand = "0.8"
29 | tempdir = "0.3"
30 | 
31 | # Optimize the tests so they don't take too long
32 | [profile.test]
33 | opt-level = 3
34 | 


--------------------------------------------------------------------------------
/weave/README.rst:
--------------------------------------------------------------------------------
 1 | Weave File Support
 2 | ******************
 3 | 
 4 | Testing
 5 | =======
 6 | 
 7 | Many of the tests compare the crates output with that generated by the
 8 | sccs command.  On many Linux distros, a compatible version can be
 9 | found in the ``cssc`` package.
10 | 


--------------------------------------------------------------------------------
/weave/src/delta.rs:
--------------------------------------------------------------------------------
  1 | //! Add a delta to a weave file.
  2 | 
  3 | use regex::Regex;
  4 | use std::{
  5 |     collections::BTreeMap,
  6 |     fs::{remove_file, rename},
  7 |     io::{self, BufRead, BufReader, BufWriter, Write},
  8 |     mem::replace,
  9 |     path::PathBuf,
 10 |     process::{Command, Stdio},
 11 | };
 12 | 
 13 | use crate::{header::Header, Entry, Error, NamingConvention, Parser, PullParser, Result, Sink, WriterInfo};
 14 | 
 15 | /// A DeltaWriter is used to write a new delta.  Data should be written to the writer, and then the
 16 | /// `close` method called to update the weave file with the new delta.
 17 | pub struct DeltaWriter<'n> {
 18 |     naming: &'n dyn NamingConvention,
 19 | 
 20 |     // Where the temporary file will be written.
 21 |     temp: Option<WriterInfo>,
 22 | 
 23 |     // The base delta.
 24 |     base: usize,
 25 | 
 26 |     // The new delta.
 27 |     new_delta: usize,
 28 | 
 29 |     // The name of the file with the base written to it.
 30 |     base_name: PathBuf,
 31 | 
 32 |     // The regex for parsing diff output.
 33 |     diff_re: Regex,
 34 | 
 35 |     // The header to be written for the new delta.
 36 |     header: Header,
 37 | }
 38 | 
 39 | impl<'n> DeltaWriter<'n> {
 40 |     /// Construct a writer for a new delta.  The naming convention and the tags set where the names
 41 |     /// will be written, and what tags will be associated with the convention.  The `base` is the
 42 |     /// existing delta that the change should be based on.
 43 |     pub fn new<'a, 'b, I>(nc: &dyn NamingConvention, tags: I, base: usize) -> Result<DeltaWriter>
 44 |     where
 45 |         I: Iterator<Item = (&'a str, &'b str)>,
 46 |     {
 47 |         // Copy the tags, making sure there is a "name", which is used to index.
 48 |         // TODO: Ensure that "name" is unique among the existing deltas.
 49 |         let mut ntags = BTreeMap::new();
 50 |         for (k, v) in tags {
 51 |             ntags.insert(k.to_owned(), v.to_owned());
 52 |         }
 53 |         if !ntags.contains_key("name") {
 54 |             return Err(Error::NameMissing);
 55 |         }
 56 | 
 57 |         // Extract the base delta to a file.
 58 | 
 59 |         let (base_name, mut base_file) = nc.temp_file()?;
 60 |         let mut header = {
 61 |             let mut parser = PullParser::new(nc, base)?;
 62 |             for node in &mut parser {
 63 |                 match node? {
 64 |                     Entry::Plain { text, keep } => {
 65 |                         if keep {
 66 |                             writeln!(base_file, "{}", text)?;
 67 |                         }
 68 |                     }
 69 |                     _ => (),
 70 |                 }
 71 |             }
 72 |             parser.into_header()
 73 |         };
 74 |         let new_delta = header.add(ntags)?;
 75 | 
 76 |         let (new_name, new_file) = nc.temp_file()?;
 77 |         let new_info = WriterInfo {
 78 |             name: new_name,
 79 |             writer: Box::new(BufWriter::new(new_file)),
 80 |         };
 81 | 
 82 |         Ok(DeltaWriter {
 83 |             naming: nc,
 84 |             temp: Some(new_info),
 85 |             base,
 86 |             new_delta,
 87 |             base_name,
 88 |             diff_re: Regex::new(r"^(\d+)(,(\d+))?([acd]).*$").unwrap(),
 89 |             header,
 90 |         })
 91 |     }
 92 | 
 93 |     pub fn close(mut self) -> Result<()> {
 94 |         // Close the temporary file, getting its name.
 95 |         let temp = replace(&mut self.temp, None);
 96 |         let temp_name = match temp {
 97 |             Some(mut wi) => {
 98 |                 wi.writer.flush()?;
 99 |                 drop(wi.writer);
100 |                 wi.name
101 |             }
102 |             None => return Err(Error::AlreadyClosed),
103 |         };
104 | 
105 |         let tweave_info = self.naming.new_temp()?;
106 | 
107 |         // Invoke diff on the files.
108 |         let mut child = Command::new("diff")
109 |             .arg(self.base_name.as_os_str())
110 |             .arg(temp_name.as_os_str())
111 |             .stdout(Stdio::piped())
112 |             .spawn()?;
113 | 
114 |         {
115 |             let lines = BufReader::new(child.stdout.as_mut().unwrap()).lines();
116 |             let weave_write = WeaveWriter {
117 |                 dest: tweave_info.writer,
118 |             };
119 |             let mut parser = Parser::new(self.naming, weave_write, self.base)?;
120 | 
121 |             let weave_write = parser.get_sink();
122 | 
123 |             self.header.write(&mut weave_write.borrow_mut().dest)?;
124 | 
125 |             let mut is_done = false;
126 |             let mut is_adding = false;
127 | 
128 |             for line in lines {
129 |                 let line = line?;
130 |                 if let Some(cap) = self.diff_re.captures(&line) {
131 |                     // If adding, this completes the add.
132 |                     if is_adding {
133 |                         weave_write.borrow_mut().end(self.new_delta)?;
134 |                         is_adding = false;
135 |                     }
136 | 
137 |                     let left = cap.get(1).unwrap().as_str().parse::<usize>().unwrap();
138 |                     let right = match cap.get(3) {
139 |                         None => left,
140 |                         Some(r) => r.as_str().parse().unwrap(),
141 |                     };
142 |                     let cmd = cap.get(4).unwrap().as_str().chars().next().unwrap();
143 | 
144 |                     if cmd == 'd' || cmd == 'c' {
145 |                         // These include deletions.
146 |                         match parser.parse_to(left)? {
147 |                             0 => return Err(Error::UnexpectedEof),
148 |                             n if n == left => (),
149 |                             _ => panic!("Unexpected parse result"),
150 |                         }
151 |                         weave_write.borrow_mut().delete(self.new_delta)?;
152 |                         match parser.parse_to(right + 1) {
153 |                             Ok(0) => is_done = true,
154 |                             Ok(n) if n == right + 1 => (),
155 |                             Ok(_) => panic!("Unexpected parse result"),
156 |                             Err(e) => return Err(e),
157 |                         }
158 |                         weave_write.borrow_mut().end(self.new_delta)?;
159 |                     } else {
160 |                         match parser.parse_to(right + 1) {
161 |                             Ok(0) => is_done = true,
162 |                             Ok(n) if n == right + 1 => (),
163 |                             Ok(_) => panic!("Unexpected parse result"),
164 |                             Err(e) => return Err(e),
165 |                         }
166 |                     }
167 | 
168 |                     if cmd == 'c' || cmd == 'a' {
169 |                         weave_write.borrow_mut().insert(self.new_delta)?;
170 |                         is_adding = true;
171 |                     }
172 | 
173 |                     continue;
174 |                 }
175 | 
176 |                 match line.chars().next() {
177 |                     None => panic!("Unexpected blank line in diff"),
178 |                     Some('<') => continue,
179 |                     Some('-') => continue,
180 |                     Some('>') => {
181 |                         // Add lines should just be written as-is.
182 |                         weave_write.borrow_mut().plain(&line[2..], true)?;
183 |                     }
184 |                     Some(_) => panic!("Unexpected diff line: {:?}", line),
185 |                 }
186 |             }
187 | 
188 |             if is_adding {
189 |                 weave_write.borrow_mut().end(self.new_delta)?;
190 |             }
191 | 
192 |             if !is_done {
193 |                 match parser.parse_to(0) {
194 |                     Ok(0) => (),
195 |                     Ok(_) => panic!("Unexpected non-eof"),
196 |                     Err(e) => return Err(e),
197 |                 }
198 |             }
199 |         }
200 | 
201 |         match child.wait()?.code() {
202 |             None => return Err(Error::DiffKilled),
203 |             Some(0) => (), // No diffs
204 |             Some(1) => (), // Normal with diffs
205 |             Some(n) => return Err(Error::DiffError(n)),
206 |         }
207 | 
208 |         // Now that is all done, clean up the temp files, and cycle the backup.
209 |         let _ = rename(self.naming.main_file(), self.naming.backup_file());
210 |         rename(tweave_info.name, self.naming.main_file())?;
211 |         remove_file(&self.base_name)?;
212 |         remove_file(&temp_name)?;
213 | 
214 |         Ok(())
215 |     }
216 | }
217 | 
218 | impl<'n> Write for DeltaWriter<'n> {
219 |     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
220 |         self.temp
221 |             .as_mut()
222 |             .expect("Attempt to write to DeltaWriter that is closed")
223 |             .writer
224 |             .write(buf)
225 |     }
226 | 
227 |     fn flush(&mut self) -> io::Result<()> {
228 |         self.temp
229 |             .as_mut()
230 |             .expect("Attempt to flush DeltaWriter that is closed")
231 |             .writer
232 |             .flush()
233 |     }
234 | }
235 | 
236 | struct RevWriter<W: Write> {
237 |     dest: BufWriter<W>,
238 | }
239 | 
240 | impl<W: Write> Sink for RevWriter<W> {
241 |     fn plain(&mut self, text: &str, keep: bool) -> Result<()> {
242 |         if !keep {
243 |             return Ok(());
244 |         }
245 | 
246 |         writeln!(&mut self.dest, "{}", text)?;
247 |         Ok(())
248 |     }
249 | }
250 | 
251 | /// The weave writer writes out the contents of a weave to a file.
252 | struct WeaveWriter<W: Write> {
253 |     dest: W,
254 | }
255 | 
256 | impl<W: Write> Sink for WeaveWriter<W> {
257 |     fn insert(&mut self, delta: usize) -> Result<()> {
258 |         writeln!(&mut self.dest, "\x01I {}", delta)?;
259 |         Ok(())
260 |     }
261 |     fn delete(&mut self, delta: usize) -> Result<()> {
262 |         writeln!(&mut self.dest, "\x01D {}", delta)?;
263 |         Ok(())
264 |     }
265 |     fn end(&mut self, delta: usize) -> Result<()> {
266 |         writeln!(&mut self.dest, "\x01E {}", delta)?;
267 |         Ok(())
268 |     }
269 |     fn plain(&mut self, text: &str, _keep: bool) -> Result<()> {
270 |         writeln!(&mut self.dest, "{}", text)?;
271 |         Ok(())
272 |     }
273 | }
274 | 


--------------------------------------------------------------------------------
/weave/src/errors.rs:
--------------------------------------------------------------------------------
 1 | // Errors in the weave code.
 2 | 
 3 | use std::{io, result};
 4 | use thiserror::Error;
 5 | 
 6 | #[derive(Error, Debug)]
 7 | pub enum Error {
 8 |     #[error("I/O Error")]
 9 |     Io(#[from] io::Error),
10 |     #[error("Json error")]
11 |     Json(#[from] serde_json::Error),
12 |     #[error("Parsing Error")]
13 |     Parse(#[from] std::num::ParseIntError),
14 |     #[error("tag \"name\" missing")]
15 |     NameMissing,
16 |     #[error("already closed")]
17 |     AlreadyClosed,
18 |     #[error("unexpected end of weave file")]
19 |     UnexpectedEof,
20 |     #[error("weave file appears empty")]
21 |     EmptyWeave,
22 |     #[error("diff error status {0}")]
23 |     DiffError(i32),
24 |     #[error("diff killed by signal")]
25 |     DiffKilled,
26 | }
27 | 
28 | pub type Result<T> = result::Result<T, Error>;
29 | 


--------------------------------------------------------------------------------
/weave/src/header.rs:
--------------------------------------------------------------------------------
 1 | //! Weave file information.
 2 | //!
 3 | //! The information about each weave file is stored in a header, as the first line of the file.
 4 | 
 5 | use chrono::{DateTime, Utc};
 6 | use serde_derive::{Deserialize, Serialize};
 7 | use std::{collections::BTreeMap, io::Write};
 8 | 
 9 | use crate::{Error, Result};
10 | 
11 | /// The header placed at the beginning of the each weave file.  The deltas correspond with the
12 | /// deltas checked in.  Note that the value passed to [`crate::PullParser::new`] should be the `number`
13 | /// field of [`DeltaInfo`] and not the index in the `deltas` vec.
14 | #[derive(Clone, Serialize, Deserialize)]
15 | pub struct Header {
16 |     pub version: usize,
17 |     pub deltas: Vec<DeltaInfo>,
18 | }
19 | 
20 | /// Information about a single delta.
21 | #[derive(Clone, Serialize, Deserialize)]
22 | pub struct DeltaInfo {
23 |     /// A tag giving the name for this particular delta.  Should be unique across all deltas.
24 |     pub name: String,
25 |     /// The delta number.  A unique integer that identifies this delta in the woven data below.
26 |     pub number: usize,
27 |     /// Arbitrary tags the user has asked to be stored with this delta.
28 |     pub tags: BTreeMap<String, String>,
29 |     /// A time stamp when this delta was added.
30 |     pub time: DateTime<Utc>,
31 | }
32 | 
33 | const THIS_VERSION: usize = 1;
34 | 
35 | impl Default for Header {
36 |     fn default() -> Header {
37 |         Header {
38 |             version: THIS_VERSION,
39 |             deltas: vec![],
40 |         }
41 |     }
42 | }
43 | 
44 | impl Header {
45 |     /// Decode from the first line of the file.
46 |     pub fn decode(line: &str) -> Result<Header> {
47 |         if let Some(rest) = line.strip_prefix("\x01t") {
48 |             Ok(serde_json::from_str(rest)?)
49 |         } else {
50 |             // This probably comes from an sccs file.
51 |             Ok(Header {
52 |                 version: 0,
53 |                 deltas: vec![],
54 |             })
55 |         }
56 |     }
57 | 
58 |     /// Add a delta to this header.  Returns the delta number to be used.
59 |     pub fn add(&mut self, mut tags: BTreeMap<String, String>) -> Result<usize> {
60 |         let name = if let Some(name) = tags.remove("name") {
61 |             name
62 |         } else {
63 |             return Err(Error::NameMissing);
64 |         };
65 | 
66 |         let next_delta = self.deltas.iter().map(|x| x.number).max().unwrap_or(0) + 1;
67 | 
68 |         self.deltas.push(DeltaInfo {
69 |             name,
70 |             number: next_delta,
71 |             tags,
72 |             time: Utc::now(),
73 |         });
74 | 
75 |         Ok(next_delta)
76 |     }
77 | 
78 |     /// Write the header to the writer, as the first line.
79 |     pub fn write<W: Write>(&self, mut wr: &mut W) -> Result<()> {
80 |         write!(&mut wr, "\x01t")?;
81 |         serde_json::to_writer(&mut wr, &self)?;
82 |         writeln!(&mut wr)?;
83 |         Ok(())
84 |     }
85 | }
86 | 


--------------------------------------------------------------------------------
/weave/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! Weave deltas, inspired by SCCS.
 2 | //!
 3 | //! The [SCCS](https://en.wikipedia.org/wiki/Source_Code_Control_System) revision control system is
 4 | //! one of the oldest source code management systems (1973).  Although many of its concepts are
 5 | //! quite dated in these days of git, the underlying "weave" delta format it used turns out to be a
 6 | //! good way of representing multiple versions of data that differ only in parts.
 7 | //!
 8 | //! This package implements a weave-based storage of "plain text", where plain text consists of
 9 | //! lines of UTF-8 printable characters separated by a newline.
10 | //!
11 | //! The format is similar to SCCS, but with no constraints to keep what are relatively poor design
12 | //! decisions from SCCS, such as putting a checksum at the top of the file, and using limited-sized
13 | //! field for values such as the number of lines in a file, or the use of 2-digit years.  However,
14 | //! the main body of the weaved file, that which describes inserts and deletes is the same, and
15 | //! allows us to test this version by comparing with the storage of sccs.
16 | //!
17 | //! Weave files are written using [`NewWeave`], which works like a regular file writer.  The file
18 | //! itself has a small amount of surrounding metadata, but is otherwise mostly just the contents of
19 | //! the initial file.
20 | //!
21 | //! Adding a delta to a weave file is done with the [`DeltaWriter`].  This is also written to, as a
22 | //! regular file, and then [`DeltaWriter::close`] method will extract a base revision and use the
23 | //! `diff` command to write a new version of the weave.  The `close` method will make several
24 | //! temporary files in the process.
25 | //!
26 | //! The weave data is stored using a [`NamingConvention`], a trait that manages a related
27 | //! collection of files, and temp files.  [`SimpleNaming`] is a basic representation of this that
28 | //! has a base name, a backup file, and some temporary files.  The data in the file can be
29 | //! compressed.
30 | 
31 | #![warn(bare_trait_objects)]
32 | 
33 | mod delta;
34 | mod errors;
35 | mod header;
36 | mod naming;
37 | mod newweave;
38 | mod parse;
39 | 
40 | pub use crate::{
41 |     delta::DeltaWriter,
42 |     errors::{Error, Result},
43 |     header::{DeltaInfo, Header},
44 |     naming::NamingConvention,
45 |     naming::SimpleNaming,
46 |     naming::Compression,
47 |     newweave::NewWeave,
48 |     parse::{Entry, Parser, PullParser, Sink},
49 | };
50 | 
51 | use std::{io::Write, path::PathBuf};
52 | 
53 | /// Something we can write into, that remembers its name.  The writer is boxed because the writer
54 | /// may be compressed.
55 | pub struct WriterInfo {
56 |     name: PathBuf,
57 |     writer: Box<dyn Write>,
58 | }
59 | 
60 | /// Read the header from a weave file.
61 | pub fn read_header(naming: &dyn NamingConvention) -> Result<Header> {
62 |     Ok(PullParser::new(naming, 1)?.into_header())
63 | }
64 | 
65 | /// Retrieve the last delta in the weave file.  Will panic if the weave file is malformed and
66 | /// contains no revisions.
67 | pub fn get_last_delta(naming: &dyn NamingConvention) -> Result<usize> {
68 |     let header = read_header(naming)?;
69 |     Ok(header
70 |         .deltas
71 |         .iter()
72 |         .map(|x| x.number)
73 |         .max()
74 |         .expect("at least one delta in weave file"))
75 | }
76 | 


--------------------------------------------------------------------------------
/weave/src/naming.rs:
--------------------------------------------------------------------------------
  1 | //! Weave files will follow a file naming convention.  This determines the names of various temp
  2 | //! files and other aspects.  The SCCS conventions are not followed, because they are not safe
  3 | //! (this crate will never write to a file that already exists).
  4 | 
  5 | use crate::{Result, WriterInfo};
  6 | use flate2::write::GzEncoder;
  7 | use std::{
  8 |     fs::{File, OpenOptions},
  9 |     io::{BufWriter, ErrorKind, Write},
 10 |     path::{Path, PathBuf},
 11 | };
 12 | 
 13 | /// A naming convention provides utilities needed to find the involved files, and construct
 14 | /// temporary files as part of writing the new weave.  The underlying object should keep the path
 15 | /// and base name.
 16 | ///
 17 | /// The main file is either used by name, or opened for reading.  It should never be written to
 18 | /// directly.  The main file is always compressed if the convention enables compression.
 19 | ///
 20 | /// The backup file is only used by name.  It is neither written to, nor read.  It will be
 21 | /// compressed, as it always comes from renaming the main file.
 22 | ///
 23 | /// The temporary files are used by name, and written to.  They may or may not be compressed,
 24 | /// depending on how they will be used.
 25 | pub trait NamingConvention {
 26 |     /// Create a temporary file for writing.  Upon success, returns the full path of the file, and
 27 |     /// the opened File for writing to the file.  The path should refer to a new file that did not
 28 |     /// exist prior to this call.
 29 |     fn temp_file(&self) -> Result<(PathBuf, File)>;
 30 | 
 31 |     /// Return the pathname of the primary file.
 32 |     fn main_file(&self) -> PathBuf;
 33 | 
 34 |     /// Return the pathname of the backup file.
 35 |     fn backup_file(&self) -> PathBuf;
 36 | 
 37 |     /// Return if compression is requested on main file.
 38 |     fn compression(&self) -> Compression;
 39 | 
 40 |     /// Open a possibly compressed temp file, returning a WriterInfo for it.  The stream will be
 41 |     /// buffered, and possibly compressed.
 42 |     fn new_temp(&self) -> Result<WriterInfo> {
 43 |         let (name, file) = self.temp_file()?;
 44 |         let writer = match self.compression() {
 45 |             Compression::Plain =>
 46 |                 Box::new(BufWriter::new(file)) as Box<dyn Write>,
 47 |             Compression::Gzip =>
 48 |                 Box::new(GzEncoder::new(file, flate2::Compression::default())) as Box<dyn Write>,
 49 |             Compression::Zstd =>
 50 |                 Box::new(zstd::Encoder::new(file, 3)?.auto_finish()) as Box<dyn Write>,
 51 |         };
 52 |         Ok(WriterInfo { name, writer })
 53 |     }
 54 | }
 55 | 
 56 | /// Supported compression types.
 57 | #[derive(Debug, Clone, Copy, Eq, PartialEq)]
 58 | pub enum Compression {
 59 |     Plain,
 60 |     Gzip,
 61 |     Zstd,
 62 | }
 63 | 
 64 | /// The SimpleNaming is a NamingConvention that has a basename, with the main file having a
 65 | /// specified extension, the backup file having a ".bak" extension, and the temp files using a
 66 | /// numbered extension starting with ".0".  If the names are intended to be compressed, a ".gz"
 67 | /// suffix can also be added.
 68 | #[derive(Debug, Clone)]
 69 | pub struct SimpleNaming {
 70 |     // The directory for the files to be written.
 71 |     path: PathBuf,
 72 |     // The string for the base filename.
 73 |     base: String,
 74 |     // The extension to use for the main name.
 75 |     ext: String,
 76 |     // Compression to be used.
 77 |     compression: Compression,
 78 | }
 79 | 
 80 | impl SimpleNaming {
 81 |     pub fn new<P: AsRef<Path>>(path: P, base: &str, ext: &str, compression: Compression) -> SimpleNaming {
 82 |         SimpleNaming {
 83 |             path: path.as_ref().to_path_buf(),
 84 |             base: base.to_string(),
 85 |             ext: ext.to_string(),
 86 |             compression,
 87 |         }
 88 |     }
 89 | 
 90 |     pub fn make_name(&self, ext: &str, compression: Compression) -> PathBuf {
 91 |         let name = format!(
 92 |             "{}.{}{}",
 93 |             self.base,
 94 |             ext,
 95 |             match compression {
 96 |                 Compression::Plain => "",
 97 |                 Compression::Gzip => ".gz",
 98 |                 Compression::Zstd => ".zstd",
 99 |             },
100 |         );
101 |         self.path.join(name)
102 |     }
103 | }
104 | 
105 | impl NamingConvention for SimpleNaming {
106 |     fn main_file(&self) -> PathBuf {
107 |         self.make_name(&self.ext, self.compression)
108 |     }
109 | 
110 |     fn backup_file(&self) -> PathBuf {
111 |         self.make_name("bak", self.compression)
112 |     }
113 | 
114 |     fn temp_file(&self) -> Result<(PathBuf, File)> {
115 |         let mut n = 0;
116 |         loop {
117 |             let name = self.make_name(&n.to_string(), Compression::Plain);
118 | 
119 |             match OpenOptions::new().write(true).create_new(true).open(&name) {
120 |                 Ok(fd) => return Ok((name, fd)),
121 |                 Err(ref e) if e.kind() == ErrorKind::AlreadyExists => (),
122 |                 Err(e) => return Err(e.into()),
123 |             }
124 | 
125 |             n += 1;
126 |         }
127 |     }
128 | 
129 |     fn compression(&self) -> Compression {
130 |         self.compression
131 |     }
132 | }
133 | 


--------------------------------------------------------------------------------
/weave/src/newweave.rs:
--------------------------------------------------------------------------------
 1 | //! Writer for new weaves
 2 | 
 3 | use std::{
 4 |     collections::BTreeMap,
 5 |     fs::rename,
 6 |     io::{self, Write},
 7 |     mem::replace,
 8 | };
 9 | 
10 | use crate::{header::Header, Error, NamingConvention, Result, WriterInfo};
11 | #[allow(unused)]
12 | use crate::Compression;
13 | 
14 | /// A builder for a new weave file.  The data should be written as a writer.  Closing the weaver
15 | /// will finish up the write and move the new file into place.  If the weaver is just dropped, the
16 | /// file will not be moved into place.
17 | pub struct NewWeave<'n> {
18 |     naming: &'n dyn NamingConvention,
19 |     temp: Option<WriterInfo>,
20 | }
21 | 
22 | impl<'n> NewWeave<'n> {
23 |     pub fn new<'a, 'b, I>(nc: &dyn NamingConvention, tags: I) -> Result<NewWeave>
24 |     where
25 |         I: Iterator<Item = (&'a str, &'b str)>,
26 |     {
27 |         let mut writeinfo = nc.new_temp()?;
28 | 
29 |         let mut ntags = BTreeMap::new();
30 |         for (k, v) in tags {
31 |             ntags.insert(k.to_owned(), v.to_owned());
32 |         }
33 |         let mut header: Header = Default::default();
34 |         let delta = header.add(ntags)?;
35 |         header.write(&mut writeinfo.writer)?;
36 |         writeln!(&mut writeinfo.writer, "\x01I {}", delta)?;
37 | 
38 |         Ok(NewWeave {
39 |             naming: nc,
40 |             temp: Some(writeinfo),
41 |         })
42 |     }
43 | 
44 |     pub fn close(mut self) -> Result<()> {
45 |         let temp = replace(&mut self.temp, None);
46 |         let name = match temp {
47 |             Some(mut wi) => {
48 |                 writeln!(&mut wi.writer, "\x01E 1")?;
49 |                 wi.name
50 |             }
51 |             None => return Err(Error::AlreadyClosed),
52 |         };
53 |         let _ = rename(self.naming.main_file(), self.naming.backup_file());
54 |         rename(name, self.naming.main_file())?;
55 |         Ok(())
56 |     }
57 | }
58 | 
59 | impl<'n> Write for NewWeave<'n> {
60 |     // Write the data out, just passing it through to the underlying file write.  We assume the
61 |     // last line is terminated, or the resulting weave will be invalid.
62 |     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
63 |         self.temp
64 |             .as_mut()
65 |             .expect("Attempt to write to NewWeave that is closed")
66 |             .writer
67 |             .write(buf)
68 |     }
69 | 
70 |     fn flush(&mut self) -> io::Result<()> {
71 |         self.temp
72 |             .as_mut()
73 |             .expect("Attempt to flush NewWeave that is closed")
74 |             .writer
75 |             .flush()
76 |     }
77 | }
78 | 
79 | #[test]
80 | #[ignore]
81 | fn try_tag() {
82 |     use crate::SimpleNaming;
83 |     let mut tags = BTreeMap::new();
84 |     tags.insert("name".to_owned(), "initial revision".to_owned());
85 |     // Add a whole bunch of longer tags to show it works.
86 |     for i in 1..100 {
87 |         tags.insert(format!("key{}", i), format!("This is the {}th value", i));
88 |     }
89 |     let nc = SimpleNaming::new(".", "tags", "weave", Compression::Gzip);
90 |     let t2 = tags.iter().map(|(k, v)| (k.as_ref(), v.as_ref()));
91 |     let mut wr = NewWeave::new(&nc, t2).unwrap();
92 |     writeln!(&mut wr, "This is the only line in the file").unwrap();
93 |     wr.close().unwrap();
94 | }
95 | 


--------------------------------------------------------------------------------
/weave/src/parse.rs:
--------------------------------------------------------------------------------
  1 | //! Weave parsing
  2 | 
  3 | use crate::{header::Header, Error, NamingConvention, Compression, Result};
  4 | use flate2::read::GzDecoder;
  5 | use log::info;
  6 | use std::{
  7 |     cell::RefCell,
  8 |     fs::File,
  9 |     io::{BufRead, BufReader, Lines, Read},
 10 |     mem,
 11 |     rc::Rc,
 12 | };
 13 | 
 14 | /// A Sink is a place that a parsed weave can be sent to.  The insert/delete/end commands match
 15 | /// those in the weave file, and `plain` are the lines of data.  With each plain is a flag
 16 | /// indicating if that line should be included in the output (all lines are called, so that updates
 17 | /// can use this same code).  All methods return a result, with the Err value stopping the parse.
 18 | /// Note that the default implementations just return success, and ignore the result.
 19 | pub trait Sink {
 20 |     /// Begin an insert sequence for the given delta.
 21 |     fn insert(&mut self, _delta: usize) -> Result<()> {
 22 |         Ok(())
 23 |     }
 24 | 
 25 |     /// Begin a delete sequence.
 26 |     fn delete(&mut self, _delta: usize) -> Result<()> {
 27 |         Ok(())
 28 |     }
 29 | 
 30 |     /// End a previous insert or delete.
 31 |     fn end(&mut self, _delta: usize) -> Result<()> {
 32 |         Ok(())
 33 |     }
 34 | 
 35 |     /// A single line of plain text from the weave.  `keep` indicates if the line should be
 36 |     /// included in the requested delta.
 37 |     fn plain(&mut self, _text: &str, _keep: bool) -> Result<()> {
 38 |         Ok(())
 39 |     }
 40 | }
 41 | 
 42 | /// The PullParser returns the entries as nodes.  These are equivalent to
 43 | /// the values in Sink.
 44 | #[derive(Debug)]
 45 | pub enum Entry {
 46 |     /// Begin an insert sequence for the given delta.
 47 |     Insert { delta: usize },
 48 | 
 49 |     /// Begin a delete sequence.
 50 |     Delete { delta: usize },
 51 | 
 52 |     /// End a previous insert or delete.
 53 |     End { delta: usize },
 54 | 
 55 |     /// A single line of plaintext from the weave.  `keep` indicates if the
 56 |     /// line should be included in the requested delta.
 57 |     Plain { text: String, keep: bool },
 58 | 
 59 |     /// A control message.  Doesn't currently contain any data, which can be added later if needed.
 60 |     Control,
 61 | }
 62 | 
 63 | /// A Parser is used to process a weave file.  This is a wrapper around the pull parser that
 64 | /// invokes a push parser.
 65 | pub struct Parser<S: Sink, B> {
 66 |     /// The pull parser.
 67 |     pull: PullParser<B>,
 68 | 
 69 |     /// The sink to be given each line record in the weave file.
 70 |     sink: Rc<RefCell<S>>,
 71 | 
 72 |     /// A single pending line, kept from the last invocation.
 73 |     pending: Option<String>,
 74 | 
 75 |     /// Tracking the line number.
 76 |     lineno: usize,
 77 | }
 78 | 
 79 | impl<S: Sink> Parser<S, BufReader<Box<dyn Read>>> {
 80 |     /// Construct a parser, based on the main file of the naming convention.
 81 |     pub fn new(
 82 |         naming: &dyn NamingConvention,
 83 |         sink: S,
 84 |         delta: usize,
 85 |     ) -> Result<Parser<S, BufReader<Box<dyn Read>>>> {
 86 |         let rd = match naming.compression() {
 87 |             Compression::Plain => {
 88 |                 Box::new(File::open(naming.main_file())?) as Box<dyn Read>
 89 |             }
 90 |             Compression::Gzip => {
 91 |                 let fd = File::open(naming.main_file())?;
 92 |                 Box::new(GzDecoder::new(fd)) as Box<dyn Read>
 93 |             }
 94 |             Compression::Zstd => {
 95 |                 let fd = File::open(naming.main_file())?;
 96 |                 Box::new(zstd::Decoder::new(fd)?) as Box<dyn Read>
 97 |             }
 98 |         };
 99 |         let lines = BufReader::new(rd).lines();
100 |         Parser::new_raw(lines, Rc::new(RefCell::new(sink)), delta)
101 |     }
102 | }
103 | 
104 | impl<S: Sink, B: BufRead> Parser<S, B> {
105 |     /// Construct a new Parser, reading from the given Reader, giving records to the given Sink,
106 |     /// and aiming for the specified `delta`.  This is not the intended constructor, normal users
107 |     /// should use `new`.  (This is public, for testing).
108 |     pub fn new_raw(
109 |         source: Lines<B>,
110 |         sink: Rc<RefCell<S>>,
111 |         delta: usize,
112 |     ) -> Result<Parser<S, B>> {
113 |         let pull = PullParser::new_raw(source, delta)?;
114 |         Ok(Parser {
115 |             pull,
116 |             sink,
117 |             pending: None,
118 |             lineno: 0,
119 |         })
120 |     }
121 | 
122 |     /// Run the parser until we either reach the given line number, or the end of the weave.  Lines
123 |     /// are numbered from 1, so calling with a lineno of zero will run the parser until the end of
124 |     /// the input.  Returns Ok(0) for the end of input, Ok(n) for stopping at line n (which should
125 |     /// always be the same as the passed in lineno, or Err if there is an error.
126 |     pub fn parse_to(&mut self, lineno: usize) -> Result<usize> {
127 |         // Handle any pending input line.  Pending lines only happen while keeping.
128 |         if let Some(text) = mem::replace(&mut self.pending, None) {
129 |             self.sink.borrow_mut().plain(&text, true)?;
130 |         }
131 | 
132 |         loop {
133 |             match self.pull.next() {
134 |                 Some(Ok(Entry::Plain { text, keep })) => {
135 |                     if keep {
136 |                         self.lineno += 1;
137 |                         if self.lineno == lineno {
138 |                             // This is the desired stopping point, hold onto this line, and return
139 |                             // to the caller.
140 |                             self.pending = Some(text);
141 |                             return Ok(lineno);
142 |                         }
143 |                     }
144 | 
145 |                     self.sink.borrow_mut().plain(&text, keep)?;
146 |                 }
147 |                 Some(Ok(Entry::Insert { delta })) => {
148 |                     self.sink.borrow_mut().insert(delta)?;
149 |                 }
150 |                 Some(Ok(Entry::Delete { delta })) => {
151 |                     self.sink.borrow_mut().delete(delta)?;
152 |                 }
153 |                 Some(Ok(Entry::End { delta })) => {
154 |                     self.sink.borrow_mut().end(delta)?;
155 |                 }
156 |                 Some(Ok(Entry::Control)) => (),
157 |                 Some(Err(err)) => {
158 |                     return Err(err);
159 |                 }
160 |                 None => {
161 |                     return Ok(0);
162 |                 }
163 |             }
164 |         }
165 |     }
166 | 
167 | 
168 |     /// Get the header read from this weave file.
169 |     pub fn get_header(&self) -> &Header {
170 |         &self.pull.header
171 |     }
172 | 
173 |     /// Consume the parser, returning the header.
174 |     pub fn into_header(self) -> Header {
175 |         self.pull.into_header()
176 |     }
177 | 
178 |     /// Get a copy of the sink.
179 |     pub fn get_sink(&self) -> Rc<RefCell<S>> {
180 |         self.sink.clone()
181 |     }
182 | }
183 | 
184 | /*
185 | /// A PullIterator returns entities in a weave file, extracting either
186 | /// everything, or only a specific delta.
187 | pub struct PullIterator<B> {
188 |     /// The lines of the input.
189 |     source: Lines<B>,
190 | 
191 |     /// The desired delta to retrieve.
192 |     delta: usize,
193 | 
194 |     /// The delta state is kept sorted with the newest (largest) delta at
195 |     /// element 0.
196 |     delta_state: Vec<OneDelta>,
197 | 
198 |     /// Indicates we are currently keeping lines.
199 |     keeping: bool,
200 | 
201 |     /// The current line number.
202 |     lineno: usize,
203 | 
204 |     /// The header extracted from the file.
205 |     header: Header,
206 | }
207 | */
208 | 
209 | /// The pull parser is the intended way of reading from weave files.  After opening a particular
210 | /// delta with [`PullParser::new`], the parser can be used as an iterator, to return [`Entry`] values.  In
211 | /// particular, the entries for [`Entry::Plain`] where `keep` is true will be the lines of the
212 | /// weave that comprise the expected delta.
213 | pub struct PullParser<B> {
214 |     /// The lines of the input.
215 |     source: Lines<B>,
216 | 
217 |     /// The desired delta to retrieve.
218 |     delta: usize,
219 | 
220 |     /// The delta state is kept sorted with the newest (largest) delta at element 0.
221 |     delta_state: Vec<OneDelta>,
222 | 
223 |     /// Indicates that we are currently "keeping" lines.
224 |     keeping: bool,
225 | 
226 |     /// The header extracted from the file.
227 |     header: Header,
228 | }
229 | 
230 | impl PullParser<BufReader<Box<dyn Read>>> {
231 |     /// Construct a parser, based on the main file of the naming
232 |     /// convention.
233 |     pub fn new(
234 |         naming: &dyn NamingConvention,
235 |         delta: usize,
236 |     ) -> Result<PullParser<BufReader<Box<dyn Read>>>> {
237 |         let rd = match naming.compression() {
238 |             Compression::Plain => {
239 |                 Box::new(File::open(naming.main_file())?) as Box<dyn Read>
240 |             }
241 |             Compression::Gzip => {
242 |                 let fd = File::open(naming.main_file())?;
243 |                 Box::new(GzDecoder::new(fd)) as Box<dyn Read>
244 |             }
245 |             Compression::Zstd => {
246 |                 let fd = File::open(naming.main_file())?;
247 |                 Box::new(zstd::Decoder::new(fd)?) as Box<dyn Read>
248 |             }
249 |         };
250 |         let lines = BufReader::new(rd).lines();
251 |         PullParser::new_raw(lines, delta)
252 |     }
253 | }
254 | 
255 | impl<B: BufRead> PullParser<B> {
256 |     /// Construct a new Parser, reading from the given Reader.  The parser
257 |     /// will act as an iterator.  This is the intended constructor, normal
258 |     /// users should use `new`.  (This is public for testing).
259 |     pub fn new_raw(mut source: Lines<B>, delta: usize) -> Result<PullParser<B>> {
260 |         if let Some(line) = source.next() {
261 |             let line = line?;
262 |             let header = Header::decode(&line)?;
263 | 
264 |             Ok(PullParser {
265 |                 source,
266 |                 delta,
267 |                 delta_state: vec![],
268 |                 keeping: false,
269 |                 header,
270 |             })
271 |         } else {
272 |             Err(Error::EmptyWeave)
273 |         }
274 |     }
275 | 
276 |     /// Remove the given numbered state.
277 |     fn pop(&mut self, delta: usize) {
278 |         // The binary search is reversed, so the largest are first.
279 |         let pos = match self
280 |             .delta_state
281 |             .binary_search_by(|ent| delta.cmp(&ent.delta))
282 |         {
283 |             Ok(pos) => pos,
284 |             Err(_) => unreachable!(),
285 |         };
286 | 
287 |         self.delta_state.remove(pos);
288 |     }
289 | 
290 |     /// Add a new state.  It will be inserted in the proper place in the array, based on the delta
291 |     /// number.
292 |     fn push(&mut self, delta: usize, mode: StateMode) {
293 |         match self
294 |             .delta_state
295 |             .binary_search_by(|ent| delta.cmp(&ent.delta))
296 |         {
297 |             Ok(_) => panic!("Duplicate state in push"),
298 |             Err(pos) => self.delta_state.insert(pos, OneDelta { delta, mode }),
299 |         }
300 |     }
301 | 
302 |     /// Update the keep field, based on the current state.
303 |     fn update_keep(&mut self) {
304 |         info!("Update: {:?}", self.delta_state);
305 |         for st in &self.delta_state {
306 |             match st.mode {
307 |                 StateMode::Keep => {
308 |                     self.keeping = true;
309 |                     return;
310 |                 }
311 |                 StateMode::Skip => {
312 |                     self.keeping = false;
313 |                     return;
314 |                 }
315 |                 _ => (),
316 |             }
317 |         }
318 | 
319 |         // This shouldn't be reached if there are any more context lines, but we may get here when
320 |         // we reach the end of the input.
321 |         self.keeping = false;
322 |     }
323 | 
324 |     /// Get the header read from this weave file.
325 |     pub fn get_header(&self) -> &Header {
326 |         &self.header
327 |     }
328 | 
329 |     /// Consume the parser, returning the header.
330 |     pub fn into_header(self) -> Header {
331 |         self.header
332 |     }
333 | }
334 | 
335 | impl<B: BufRead> Iterator for PullParser<B> {
336 |     type Item = Result<Entry>;
337 | 
338 |     fn next(&mut self) -> Option<Result<Entry>> {
339 |         // At this level, there is a 1:1 correspondence between weave input
340 |         // lines and those returned.
341 |         let line = match self.source.next() {
342 |             None => return None,
343 |             Some(Ok(line)) => line,
344 |             Some(Err(e)) => return Some(Err(From::from(e))),
345 |         };
346 | 
347 |         info!("line: {:?}", line);
348 | 
349 |         // Detect the first character, without borrowing.
350 |         let textual = match line.bytes().next() {
351 |             None => true,
352 |             Some(ch) if ch != b'\x01' => true,
353 |             _ => false,
354 |         };
355 | 
356 |         if textual {
357 |             return Some(Ok(Entry::Plain {
358 |                 text: line,
359 |                 keep: self.keeping,
360 |             }));
361 |         }
362 | 
363 |         let linebytes = line.as_bytes();
364 | 
365 |         if linebytes.len() < 4 {
366 |             return Some(Ok(Entry::Control));
367 |         }
368 | 
369 |         if linebytes[1] != b'I' && linebytes[1] != b'D' && linebytes[1] != b'E' {
370 |             return Some(Ok(Entry::Control));
371 |         };
372 | 
373 |         // TODO: Don't panic, but fail.
374 |         let this_delta: usize = line[3..].parse().unwrap();
375 | 
376 |         match linebytes[1] {
377 |             b'E' => {
378 |                 self.pop(this_delta);
379 |                 self.update_keep();
380 |                 Some(Ok(Entry::End { delta: this_delta }))
381 |             }
382 |             b'I' => {
383 |                 if self.delta >= this_delta {
384 |                     self.push(this_delta, StateMode::Keep);
385 |                 } else {
386 |                     self.push(this_delta, StateMode::Skip);
387 |                 }
388 |                 self.update_keep();
389 | 
390 |                 Some(Ok(Entry::Insert { delta: this_delta }))
391 |             }
392 |             b'D' => {
393 |                 if self.delta >= this_delta {
394 |                     self.push(this_delta, StateMode::Skip);
395 |                 } else {
396 |                     self.push(this_delta, StateMode::Next);
397 |                 }
398 |                 self.update_keep();
399 | 
400 |                 Some(Ok(Entry::Delete { delta: this_delta }))
401 |             }
402 |             _ => unreachable!(),
403 |         }
404 |     }
405 | }
406 | 
407 | #[derive(Copy, Clone, Eq, PartialEq, Debug)]
408 | enum StateMode {
409 |     Keep,
410 |     Skip,
411 |     Next,
412 | }
413 | 
414 | #[derive(Debug)]
415 | struct OneDelta {
416 |     delta: usize,
417 |     mode: StateMode,
418 | }
419 | 


--------------------------------------------------------------------------------
/weave/tests/naming.rs:
--------------------------------------------------------------------------------
 1 | // Test the naming convention code.
 2 | 
 3 | extern crate tempdir;
 4 | extern crate weave;
 5 | 
 6 | use std::path::Path;
 7 | 
 8 | use tempdir::TempDir;
 9 | use weave::{NamingConvention, SimpleNaming, Compression};
10 | 
11 | #[test]
12 | fn test_names() {
13 |     let tmp = TempDir::new("weave").unwrap();
14 | 
15 |     let path = tmp.path().to_str().unwrap();
16 | 
17 |     let nm = SimpleNaming::new(tmp.path(), "sample", "weave", Compression::Gzip);
18 |     assert_eq!(
19 |         nm.main_file(),
20 |         Path::new(&format!("{}/sample.weave.gz", path))
21 |     );
22 |     assert_eq!(
23 |         nm.backup_file(),
24 |         Path::new(&format!("{}/sample.bak.gz", path))
25 |     );
26 | 
27 |     for i in 0..100 {
28 |         let (tname, _tfd) = nm.temp_file().unwrap();
29 |         assert_eq!(tname, Path::new(&format!("{}/sample.{}", path, i)));
30 |         println!("tname: {:?}", tname);
31 |     }
32 | }
33 | 


--------------------------------------------------------------------------------
/weave/tests/sccs.rs:
--------------------------------------------------------------------------------
  1 | /// Comparisons against SCCS.
  2 | ///
  3 | /// The weave algorithm used comes from the the SCCS program.  This can be installed on most Linux
  4 | /// distros by installing the package "cssc".
  5 | extern crate env_logger;
  6 | #[macro_use]
  7 | extern crate log;
  8 | extern crate rand;
  9 | extern crate tempdir;
 10 | extern crate weave;
 11 | 
 12 | use rand::{rngs::StdRng, Rng, SeedableRng};
 13 | use std::collections::BTreeMap;
 14 | use std::env;
 15 | use std::fs::{remove_file, File};
 16 | use std::io::{BufRead, BufReader, Write};
 17 | use std::path::{Path, PathBuf};
 18 | use std::process::{Command, ExitStatus, Stdio};
 19 | use tempdir::TempDir;
 20 | use weave::{Compression, DeltaWriter, Entry, NewWeave, PullParser, Result, SimpleNaming, Sink};
 21 | 
 22 | /// Number of iterations to make.  Note that the default check is greater than O(n^2) so the test
 23 | /// will run very long if this is increased too much.
 24 | const ITERATION_COUNT: usize = 100;
 25 | 
 26 | /// Number of lines in the files.  Affects how convoluted the diffs are.
 27 | const FILE_SIZE: usize = 100;
 28 | 
 29 | /// Set to true to verify all previous deltas, not just the most recent.
 30 | const VERIFY_ALL_DELTAS: bool = true;
 31 | 
 32 | #[test]
 33 | fn sccs() {
 34 |     let _ = env_logger::init();
 35 | 
 36 |     // Normally, detect the SCCS command being present, and use it for additional tests.  It can be
 37 |     // ignored by setting NO_SCCS=1 in the environment.
 38 |     let use_sccs = has_sccs() && env::var("NO_SCCS").is_err();
 39 | 
 40 |     let tdir = TempDir::new("sccstest").unwrap();
 41 |     let mut gen = Gen::new(tdir.path(), use_sccs).unwrap();
 42 | 
 43 |     // For debugging, this will cause the directory to not be removed.
 44 |     if env::var("KEEPTEMP").is_ok() {
 45 |         tdir.into_path();
 46 |     }
 47 | 
 48 |     gen.new_sccs();
 49 |     gen.new_weave();
 50 |     gen.next_delta();
 51 |     gen.weave_check();
 52 | 
 53 |     for i in 0..ITERATION_COUNT {
 54 |         gen.shuffle();
 55 |         gen.add_sccs_delta();
 56 |         gen.add_weave_delta(i + 1);
 57 |         gen.next_delta();
 58 | 
 59 |         // Checking with sccs is very slow.  Do we want to do it?
 60 |         // gen.sccs_check();
 61 |         gen.weave_check();
 62 |     }
 63 | }
 64 | 
 65 | /// Determine if we have the sccs command available.  If not, show an error, and return false.
 66 | fn has_sccs() -> bool {
 67 |     match Command::new("sccs").arg("-V").output() {
 68 |         Ok(_) => true,
 69 |         Err(_) => {
 70 |             error!("'sccs' not found in path, skipping some tests, install 'cssc' to fix");
 71 |             false
 72 |         }
 73 |     }
 74 | }
 75 | 
 76 | /// Gen synthesizes a series of deltas, and can add them using SCCS to make a weave file, and later
 77 | /// to this weave implementation to compare the results.
 78 | struct Gen {
 79 |     /// The directory to write the files into.
 80 |     tdir: PathBuf,
 81 | 
 82 |     /// The name of the plain file related to it.
 83 |     sccs_plain: PathBuf,
 84 | 
 85 |     /// The current lines.
 86 |     nums: Vec<usize>,
 87 | 
 88 |     /// Each delta.  Sccs numbers the deltas from 1, so these are off by one.
 89 |     deltas: Vec<Vec<usize>>,
 90 | 
 91 |     /// A Rng for generating the shuffles.
 92 |     rand: StdRng,
 93 | 
 94 |     /// Is sccs enabled.
 95 |     use_sccs: bool,
 96 | }
 97 | 
 98 | impl Gen {
 99 |     fn new<P: AsRef<Path>>(tdir: P, use_sccs: bool) -> Result<Gen> {
100 |         let tdir = tdir.as_ref();
101 |         let mut seed: [u8; 32] = [0; 32];
102 |         seed[0] = 1;
103 |         seed[0] = 2;
104 |         seed[0] = 3;
105 |         seed[0] = 4;
106 |         Ok(Gen {
107 |             tdir: tdir.to_owned(),
108 |             sccs_plain: tdir.join("tfile"),
109 |             nums: (1..FILE_SIZE + 1).collect(),
110 |             rand: SeedableRng::from_seed(seed),
111 |             deltas: vec![],
112 |             use_sccs: use_sccs,
113 |         })
114 |     }
115 | 
116 |     /// Perform a somewhat random modification of the data.  Choose some range of the numbers and
117 |     /// reverse them.
118 |     fn shuffle(&mut self) {
119 |         let a = self.rand.gen_range(0..self.nums.len());
120 |         let b = self.rand.gen_range(0..self.nums.len());
121 | 
122 |         let (a, b) = if a <= b { (a, b) } else { (b, a) };
123 |         self.nums[a..b].reverse();
124 |     }
125 | 
126 |     fn next_delta(&mut self) {
127 |         self.deltas.push(self.nums.clone())
128 |     }
129 | 
130 |     /// Write to a new sccs file, resulting in delta 1.
131 |     fn new_sccs(&mut self) {
132 |         if !self.use_sccs {
133 |             return;
134 |         }
135 | 
136 |         self.emit_to(&self.sccs_plain);
137 |         Command::new("sccs")
138 |             .args(&["admin", "-itfile", "-n", "s.tfile"])
139 |             .current_dir(&self.tdir)
140 |             .status()
141 |             .expect("Unable to run sccs admin")
142 |             .expect_success("Sccs command returned error");
143 |         remove_file(&self.sccs_plain).expect("Unable to remove data file");
144 |     }
145 | 
146 |     /// Add a new delta to the sccs file.
147 |     fn add_sccs_delta(&mut self) {
148 |         if !self.use_sccs {
149 |             return;
150 |         }
151 | 
152 |         Command::new("sccs")
153 |             .args(&["get", "-e", "s.tfile"])
154 |             .current_dir(&self.tdir)
155 |             .stderr(Stdio::null())
156 |             .stdout(Stdio::null())
157 |             .status()
158 |             .expect("Unable to run sccs get")
159 |             .expect_success("sccs get failed");
160 |         self.emit_to(&self.sccs_plain);
161 |         Command::new("sccs")
162 |             .args(&["delta", "-yMessage", "s.tfile"])
163 |             .current_dir(&self.tdir)
164 |             .stderr(Stdio::null())
165 |             .stdout(Stdio::null())
166 |             .status()
167 |             .expect("Unable to run sccs delta")
168 |             .expect_success("sccs delta failed");
169 |     }
170 | 
171 |     /// Emit the current numbers to the given name (in the temp dir).
172 |     fn emit_to<P: AsRef<Path>>(&self, name: P) {
173 |         let mut fd = File::create(self.tdir.join(name)).unwrap();
174 |         for i in &self.nums {
175 |             writeln!(&mut fd, "{}", i).unwrap();
176 |         }
177 |     }
178 | 
179 |     /// Check the output of "sccs get".  This is more of a sanity check.
180 |     #[allow(dead_code)]
181 |     fn sccs_check(&self) {
182 |         for (i, del) in self.deltas.iter().enumerate() {
183 |             self.sccs_check_one(i, del);
184 |         }
185 |     }
186 | 
187 |     #[allow(dead_code)]
188 |     fn sccs_check_one(&self, num: usize, data: &[usize]) {
189 |         if !self.use_sccs {
190 |             return;
191 |         }
192 | 
193 |         let out = Command::new("sccs")
194 |             .args(&["get", &format!("-r1.{}", num + 1), "-p", "s.tfile"])
195 |             .current_dir(&self.tdir)
196 |             .output()
197 |             .expect("Unable to run sccs get");
198 |         out.status.expect_success("Error running sccs get");
199 |         let mut onums: Vec<usize> = vec![];
200 |         for line in BufReader::new(&out.stdout[..]).lines() {
201 |             let line = line.unwrap();
202 |             onums.push(line.as_str().parse::<usize>().unwrap());
203 |         }
204 | 
205 |         assert_eq!(data, &onums[..]);
206 |     }
207 | 
208 |     /// Check that weave decodes all of the sccs files properly.
209 |     fn weave_check(&self) {
210 |         if VERIFY_ALL_DELTAS {
211 |             // Verify all of the previous deltas.
212 |             for (i, del) in self.deltas.iter().enumerate() {
213 |                 self.weave_sccs_check_one(i, del);
214 |                 self.weave_check_one(i, del);
215 |                 self.weave_check_pull(i, del);
216 |             }
217 |         } else {
218 |             // This only checks the last delta for each one.  It will miss any bugs that result in
219 |             // earlier deltas being unreadable.
220 |             let del = self.deltas.iter().last().unwrap();
221 |             self.weave_sccs_check_one(self.deltas.len() - 1, del);
222 |             self.weave_check_one(self.deltas.len() - 1, del);
223 |             self.weave_check_pull(self.deltas.len() - 1, del);
224 |         }
225 |     }
226 | 
227 |     fn weave_sccs_check_one(&self, num: usize, data: &[usize]) {
228 |         if !self.use_sccs {
229 |             return;
230 |         }
231 | 
232 |         let fd = File::open(self.tdir.join("s.tfile")).unwrap();
233 |         let lines = BufReader::new(fd).lines();
234 |         let mut nums: Vec<usize> = vec![];
235 |         for node in PullParser::new_raw(lines, num + 1).unwrap() {
236 |             match node.unwrap() {
237 |                 Entry::Plain { text, keep } => {
238 |                     if keep {
239 |                         nums.push(text.parse::<usize>().unwrap());
240 |                     }
241 |                 }
242 |                 _ => (),
243 |             }
244 |         }
245 |         assert_eq!(data, nums);
246 |     }
247 | 
248 |     fn weave_check_one(&self, num: usize, data: &[usize]) {
249 |         let fd = File::open(self.tdir.join("sample.weave")).unwrap();
250 |         let lines = BufReader::new(fd).lines();
251 |         let mut nums: Vec<usize> = vec![];
252 |         for node in PullParser::new_raw(lines, num + 1).unwrap() {
253 |             match node.unwrap() {
254 |                 Entry::Plain { text, keep } => {
255 |                     if keep {
256 |                         nums.push(text.parse::<usize>().unwrap());
257 |                     }
258 |                 }
259 |                 _ => (),
260 |             }
261 |         }
262 | 
263 |         assert_eq!(data, nums);
264 |     }
265 | 
266 |     fn weave_check_pull(&self, num: usize, data: &[usize]) {
267 |         let fd = File::open(self.tdir.join("sample.weave")).unwrap();
268 |         let lines = BufReader::new(fd).lines();
269 |         let mut nums = vec![];
270 |         for line in PullParser::new_raw(lines, num + 1).unwrap() {
271 |             let line = line.unwrap();
272 |             match line {
273 |                 Entry::Plain { keep, text } if keep => {
274 |                     nums.push(text.parse::<usize>().unwrap());
275 |                 }
276 |                 _ => (),
277 |             }
278 |         }
279 | 
280 |         assert_eq!(data, &nums[..]);
281 |     }
282 | 
283 |     fn new_weave(&mut self) {
284 |         let mut tags = BTreeMap::new();
285 |         tags.insert("name", "initial");
286 |         let nc = SimpleNaming::new(&self.tdir, "sample", "weave", Compression::Plain);
287 |         let mut nw = NewWeave::new(&nc, tags.into_iter()).unwrap();
288 |         for i in &self.nums {
289 |             writeln!(&mut nw, "{}", i).unwrap();
290 |         }
291 |         nw.close().unwrap();
292 |     }
293 | 
294 |     fn add_weave_delta(&mut self, base: usize) {
295 |         let name_value = format!("{}", base + 1);
296 |         let mut tags = BTreeMap::new();
297 |         tags.insert("name", name_value.as_str());
298 |         let nc = SimpleNaming::new(&self.tdir, "sample", "weave", Compression::Plain);
299 |         let mut delta = DeltaWriter::new(&nc, tags.into_iter(), base).unwrap();
300 |         for i in &self.nums {
301 |             writeln!(&mut delta, "{}", i).unwrap();
302 |         }
303 |         delta.close().unwrap();
304 |     }
305 | }
306 | 
307 | /// A Weave Sink that just collects the numbers in the given delta.
308 | struct DeltaSink {
309 |     nums: Vec<usize>,
310 | }
311 | 
312 | impl Sink for DeltaSink {
313 |     fn plain(&mut self, text: &str, keep: bool) -> Result<()> {
314 |         if !keep {
315 |             return Ok(());
316 |         }
317 | 
318 |         self.nums.push(text.parse::<usize>()?);
319 |         Ok(())
320 |     }
321 | }
322 | 
323 | /// A small utility to make asserting success easier.
324 | trait Successful {
325 |     fn expect_success(&self, msg: &str);
326 | }
327 | 
328 | impl Successful for ExitStatus {
329 |     fn expect_success(&self, msg: &str) {
330 |         if !self.success() {
331 |             panic!("{}", msg.to_string());
332 |         }
333 |     }
334 | }
335 | 


--------------------------------------------------------------------------------