├── .github └── funding.yml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── documentation ├── assets │ └── github-banner.png ├── licensing │ ├── fdl-1.3.txt │ ├── gpl-3.0.txt │ └── notice.txt └── readme.md ├── notice.txt ├── readme.md ├── scripts ├── _z-run ├── build.z-run ├── tests.z-run └── workbench.z-run └── sources ├── bin ├── md5-copy.go ├── md5-cpio.rs ├── md5-create.bash ├── md5-create.rs ├── md5-diff.go ├── md5-diff.rs └── md5-link.go └── lib ├── core.rs ├── digests.rs ├── flags.rs ├── hashes.rs ├── lib.rs ├── main_cpio.rs ├── main_create.rs ├── main_diff.rs ├── prelude.rs └── sinks.rs /.github/funding.yml: -------------------------------------------------------------------------------- 1 | github: cipriancraciun 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | .* 3 | !.gitignore 4 | !.github 5 | !.z-run 6 | 7 | /target 8 | 9 | *~ 10 | *# 11 | *.bak 12 | 13 | *.log 14 | *.tmp 15 | 16 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "addr2line" 7 | version = "0.17.0" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "b9ecd88a8c8378ca913a680cd98f0f13ac67383d35993f86c90a70e3f137816b" 10 | dependencies = [ 11 | "gimli", 12 | ] 13 | 14 | [[package]] 15 | name = "adler" 16 | version = "1.0.2" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" 19 | 20 | [[package]] 21 | name = "aho-corasick" 22 | version = "0.7.18" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" 25 | dependencies = [ 26 | "memchr", 27 | ] 28 | 29 | [[package]] 30 | name = "argparse" 31 | version = "0.2.2" 32 | source = "registry+https://github.com/rust-lang/crates.io-index" 33 | checksum = "3f8ebf5827e4ac4fd5946560e6a99776ea73b596d80898f357007317a7141e47" 34 | 35 | [[package]] 36 | name = "atty" 37 | version = "0.2.14" 38 | source = "registry+https://github.com/rust-lang/crates.io-index" 39 | checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" 40 | dependencies = [ 41 | "hermit-abi", 42 | "libc", 43 | "winapi", 44 | ] 45 | 46 | [[package]] 47 | name = "autocfg" 48 | version = "1.1.0" 49 | source = "registry+https://github.com/rust-lang/crates.io-index" 50 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 51 | 52 | [[package]] 53 | name = "backtrace" 54 | version = "0.3.66" 55 | source = "registry+https://github.com/rust-lang/crates.io-index" 56 | checksum = "cab84319d616cfb654d03394f38ab7e6f0919e181b1b57e1fd15e7fb4077d9a7" 57 | dependencies = [ 58 | "addr2line", 59 | "cc", 60 | "cfg-if", 61 | "libc", 62 | "miniz_oxide", 63 | "object", 64 | "rustc-demangle", 65 | ] 66 | 67 | [[package]] 68 | name = "block-buffer" 69 | version = "0.10.2" 70 | source = "registry+https://github.com/rust-lang/crates.io-index" 71 | checksum = "0bf7fe51849ea569fd452f37822f606a5cabb684dc918707a0193fd4664ff324" 72 | dependencies = [ 73 | "generic-array", 74 | ] 75 | 76 | [[package]] 77 | name = "cc" 78 | version = "1.0.73" 79 | source = "registry+https://github.com/rust-lang/crates.io-index" 80 | checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" 81 | 82 | [[package]] 83 | name = "cfg-if" 84 | version = "1.0.0" 85 | source = "registry+https://github.com/rust-lang/crates.io-index" 86 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 87 | 88 | [[package]] 89 | name = "chrono" 90 | version = "0.4.19" 91 | source = "registry+https://github.com/rust-lang/crates.io-index" 92 | checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" 93 | dependencies = [ 94 | "libc", 95 | "num-integer", 96 | "num-traits", 97 | "time", 98 | "winapi", 99 | ] 100 | 101 | [[package]] 102 | name = "console" 103 | version = "0.15.0" 104 | source = "registry+https://github.com/rust-lang/crates.io-index" 105 | checksum = "a28b32d32ca44b70c3e4acd7db1babf555fa026e385fb95f18028f88848b3c31" 106 | dependencies = [ 107 | "encode_unicode", 108 | "libc", 109 | "once_cell", 110 | "terminal_size", 111 | "winapi", 112 | ] 113 | 114 | [[package]] 115 | name = "cpio" 116 | version = "0.2.2" 117 | source = "registry+https://github.com/rust-lang/crates.io-index" 118 | checksum = "27e77cfc4543efb4837662cb7cd53464ae66f0fd5c708d71e0f338b1c11d62d3" 119 | 120 | [[package]] 121 | name = "cpufeatures" 122 | version = "0.2.2" 123 | source = "registry+https://github.com/rust-lang/crates.io-index" 124 | checksum = "59a6001667ab124aebae2a495118e11d30984c3a653e99d86d58971708cf5e4b" 125 | dependencies = [ 126 | "libc", 127 | ] 128 | 129 | [[package]] 130 | name = "cpuprofiler" 131 | version = "0.0.4" 132 | source = "registry+https://github.com/rust-lang/crates.io-index" 133 | checksum = "43f8479dbcfd2bbaa0c0c26779b913052b375981cdf533091f2127ea3d42e52b" 134 | dependencies = [ 135 | "error-chain", 136 | "lazy_static", 137 | "pkg-config", 138 | ] 139 | 140 | [[package]] 141 | name = "crossbeam" 142 | version = "0.8.1" 143 | source = "registry+https://github.com/rust-lang/crates.io-index" 144 | checksum = "4ae5588f6b3c3cb05239e90bd110f257254aecd01e4635400391aeae07497845" 145 | dependencies = [ 146 | "cfg-if", 147 | "crossbeam-channel", 148 | "crossbeam-deque", 149 | "crossbeam-epoch", 150 | "crossbeam-queue", 151 | "crossbeam-utils", 152 | ] 153 | 154 | [[package]] 155 | name = "crossbeam-channel" 156 | version = "0.5.5" 157 | source = "registry+https://github.com/rust-lang/crates.io-index" 158 | checksum = "4c02a4d71819009c192cf4872265391563fd6a84c81ff2c0f2a7026ca4c1d85c" 159 | dependencies = [ 160 | "cfg-if", 161 | "crossbeam-utils", 162 | ] 163 | 164 | [[package]] 165 | name = "crossbeam-deque" 166 | version = "0.8.1" 167 | source = "registry+https://github.com/rust-lang/crates.io-index" 168 | checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" 169 | dependencies = [ 170 | "cfg-if", 171 | "crossbeam-epoch", 172 | "crossbeam-utils", 173 | ] 174 | 175 | [[package]] 176 | name = "crossbeam-epoch" 177 | version = "0.9.9" 178 | source = "registry+https://github.com/rust-lang/crates.io-index" 179 | checksum = "07db9d94cbd326813772c968ccd25999e5f8ae22f4f8d1b11effa37ef6ce281d" 180 | dependencies = [ 181 | "autocfg", 182 | "cfg-if", 183 | "crossbeam-utils", 184 | "memoffset", 185 | "once_cell", 186 | "scopeguard", 187 | ] 188 | 189 | [[package]] 190 | name = "crossbeam-queue" 191 | version = "0.3.5" 192 | source = "registry+https://github.com/rust-lang/crates.io-index" 193 | checksum = "1f25d8400f4a7a5778f0e4e52384a48cbd9b5c495d110786187fc750075277a2" 194 | dependencies = [ 195 | "cfg-if", 196 | "crossbeam-utils", 197 | ] 198 | 199 | [[package]] 200 | name = "crossbeam-utils" 201 | version = "0.8.10" 202 | source = "registry+https://github.com/rust-lang/crates.io-index" 203 | checksum = "7d82ee10ce34d7bc12c2122495e7593a9c41347ecdd64185af4ecf72cb1a7f83" 204 | dependencies = [ 205 | "cfg-if", 206 | "once_cell", 207 | ] 208 | 209 | [[package]] 210 | name = "crypto-common" 211 | version = "0.1.6" 212 | source = "registry+https://github.com/rust-lang/crates.io-index" 213 | checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" 214 | dependencies = [ 215 | "generic-array", 216 | "typenum", 217 | ] 218 | 219 | [[package]] 220 | name = "digest" 221 | version = "0.10.3" 222 | source = "registry+https://github.com/rust-lang/crates.io-index" 223 | checksum = "f2fb860ca6fafa5552fb6d0e816a69c8e49f0908bf524e30a90d97c85892d506" 224 | dependencies = [ 225 | "block-buffer", 226 | "crypto-common", 227 | ] 228 | 229 | [[package]] 230 | name = "encode_unicode" 231 | version = "0.3.6" 232 | source = "registry+https://github.com/rust-lang/crates.io-index" 233 | checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" 234 | 235 | [[package]] 236 | name = "error-chain" 237 | version = "0.12.4" 238 | source = "registry+https://github.com/rust-lang/crates.io-index" 239 | checksum = "2d2f06b9cac1506ece98fe3231e3cc9c4410ec3d5b1f24ae1c8946f0742cdefc" 240 | dependencies = [ 241 | "backtrace", 242 | "version_check", 243 | ] 244 | 245 | [[package]] 246 | name = "generic-array" 247 | version = "0.14.5" 248 | source = "registry+https://github.com/rust-lang/crates.io-index" 249 | checksum = "fd48d33ec7f05fbfa152300fdad764757cbded343c1aa1cff2fbaf4134851803" 250 | dependencies = [ 251 | "typenum", 252 | "version_check", 253 | ] 254 | 255 | [[package]] 256 | name = "gimli" 257 | version = "0.26.2" 258 | source = "registry+https://github.com/rust-lang/crates.io-index" 259 | checksum = "22030e2c5a68ec659fde1e949a745124b48e6fa8b045b7ed5bd1fe4ccc5c4e5d" 260 | 261 | [[package]] 262 | name = "hermit-abi" 263 | version = "0.1.19" 264 | source = "registry+https://github.com/rust-lang/crates.io-index" 265 | checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" 266 | dependencies = [ 267 | "libc", 268 | ] 269 | 270 | [[package]] 271 | name = "indicatif" 272 | version = "0.16.2" 273 | source = "registry+https://github.com/rust-lang/crates.io-index" 274 | checksum = "2d207dc617c7a380ab07ff572a6e52fa202a2a8f355860ac9c38e23f8196be1b" 275 | dependencies = [ 276 | "console", 277 | "lazy_static", 278 | "number_prefix", 279 | "regex", 280 | ] 281 | 282 | [[package]] 283 | name = "keccak" 284 | version = "0.1.2" 285 | source = "registry+https://github.com/rust-lang/crates.io-index" 286 | checksum = "f9b7d56ba4a8344d6be9729995e6b06f928af29998cdf79fe390cbf6b1fee838" 287 | 288 | [[package]] 289 | name = "lazy_static" 290 | version = "1.4.0" 291 | source = "registry+https://github.com/rust-lang/crates.io-index" 292 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 293 | 294 | [[package]] 295 | name = "libc" 296 | version = "0.2.126" 297 | source = "registry+https://github.com/rust-lang/crates.io-index" 298 | checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" 299 | 300 | [[package]] 301 | name = "md-5" 302 | version = "0.10.1" 303 | source = "registry+https://github.com/rust-lang/crates.io-index" 304 | checksum = "658646b21e0b72f7866c7038ab086d3d5e1cd6271f060fd37defb241949d0582" 305 | dependencies = [ 306 | "digest", 307 | ] 308 | 309 | [[package]] 310 | name = "md5-tools" 311 | version = "0.0.0" 312 | dependencies = [ 313 | "argparse", 314 | "atty", 315 | "chrono", 316 | "cpio", 317 | "cpuprofiler", 318 | "crossbeam", 319 | "digest", 320 | "indicatif", 321 | "libc", 322 | "md-5", 323 | "regex", 324 | "sha-1", 325 | "sha2", 326 | "sha3", 327 | "walkdir", 328 | ] 329 | 330 | [[package]] 331 | name = "memchr" 332 | version = "2.5.0" 333 | source = "registry+https://github.com/rust-lang/crates.io-index" 334 | checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" 335 | 336 | [[package]] 337 | name = "memoffset" 338 | version = "0.6.5" 339 | source = "registry+https://github.com/rust-lang/crates.io-index" 340 | checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" 341 | dependencies = [ 342 | "autocfg", 343 | ] 344 | 345 | [[package]] 346 | name = "miniz_oxide" 347 | version = "0.5.3" 348 | source = "registry+https://github.com/rust-lang/crates.io-index" 349 | checksum = "6f5c75688da582b8ffc1f1799e9db273f32133c49e048f614d22ec3256773ccc" 350 | dependencies = [ 351 | "adler", 352 | ] 353 | 354 | [[package]] 355 | name = "num-integer" 356 | version = "0.1.45" 357 | source = "registry+https://github.com/rust-lang/crates.io-index" 358 | checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" 359 | dependencies = [ 360 | "autocfg", 361 | "num-traits", 362 | ] 363 | 364 | [[package]] 365 | name = "num-traits" 366 | version = "0.2.15" 367 | source = "registry+https://github.com/rust-lang/crates.io-index" 368 | checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" 369 | dependencies = [ 370 | "autocfg", 371 | ] 372 | 373 | [[package]] 374 | name = "number_prefix" 375 | version = "0.4.0" 376 | source = "registry+https://github.com/rust-lang/crates.io-index" 377 | checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" 378 | 379 | [[package]] 380 | name = "object" 381 | version = "0.29.0" 382 | source = "registry+https://github.com/rust-lang/crates.io-index" 383 | checksum = "21158b2c33aa6d4561f1c0a6ea283ca92bc54802a93b263e910746d679a7eb53" 384 | dependencies = [ 385 | "memchr", 386 | ] 387 | 388 | [[package]] 389 | name = "once_cell" 390 | version = "1.13.0" 391 | source = "registry+https://github.com/rust-lang/crates.io-index" 392 | checksum = "18a6dbe30758c9f83eb00cbea4ac95966305f5a7772f3f42ebfc7fc7eddbd8e1" 393 | 394 | [[package]] 395 | name = "pkg-config" 396 | version = "0.3.25" 397 | source = "registry+https://github.com/rust-lang/crates.io-index" 398 | checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae" 399 | 400 | [[package]] 401 | name = "regex" 402 | version = "1.6.0" 403 | source = "registry+https://github.com/rust-lang/crates.io-index" 404 | checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" 405 | dependencies = [ 406 | "aho-corasick", 407 | "memchr", 408 | "regex-syntax", 409 | ] 410 | 411 | [[package]] 412 | name = "regex-syntax" 413 | version = "0.6.27" 414 | source = "registry+https://github.com/rust-lang/crates.io-index" 415 | checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" 416 | 417 | [[package]] 418 | name = "rustc-demangle" 419 | version = "0.1.21" 420 | source = "registry+https://github.com/rust-lang/crates.io-index" 421 | checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" 422 | 423 | [[package]] 424 | name = "same-file" 425 | version = "1.0.6" 426 | source = "registry+https://github.com/rust-lang/crates.io-index" 427 | checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" 428 | dependencies = [ 429 | "winapi-util", 430 | ] 431 | 432 | [[package]] 433 | name = "scopeguard" 434 | version = "1.1.0" 435 | source = "registry+https://github.com/rust-lang/crates.io-index" 436 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" 437 | 438 | [[package]] 439 | name = "sha-1" 440 | version = "0.10.0" 441 | source = "registry+https://github.com/rust-lang/crates.io-index" 442 | checksum = "028f48d513f9678cda28f6e4064755b3fbb2af6acd672f2c209b62323f7aea0f" 443 | dependencies = [ 444 | "cfg-if", 445 | "cpufeatures", 446 | "digest", 447 | ] 448 | 449 | [[package]] 450 | name = "sha2" 451 | version = "0.10.2" 452 | source = "registry+https://github.com/rust-lang/crates.io-index" 453 | checksum = "55deaec60f81eefe3cce0dc50bda92d6d8e88f2a27df7c5033b42afeb1ed2676" 454 | dependencies = [ 455 | "cfg-if", 456 | "cpufeatures", 457 | "digest", 458 | ] 459 | 460 | [[package]] 461 | name = "sha3" 462 | version = "0.10.1" 463 | source = "registry+https://github.com/rust-lang/crates.io-index" 464 | checksum = "881bf8156c87b6301fc5ca6b27f11eeb2761224c7081e69b409d5a1951a70c86" 465 | dependencies = [ 466 | "digest", 467 | "keccak", 468 | ] 469 | 470 | [[package]] 471 | name = "terminal_size" 472 | version = "0.1.17" 473 | source = "registry+https://github.com/rust-lang/crates.io-index" 474 | checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df" 475 | dependencies = [ 476 | "libc", 477 | "winapi", 478 | ] 479 | 480 | [[package]] 481 | name = "time" 482 | version = "0.1.44" 483 | source = "registry+https://github.com/rust-lang/crates.io-index" 484 | checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" 485 | dependencies = [ 486 | "libc", 487 | "wasi", 488 | "winapi", 489 | ] 490 | 491 | [[package]] 492 | name = "typenum" 493 | version = "1.15.0" 494 | source = "registry+https://github.com/rust-lang/crates.io-index" 495 | checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" 496 | 497 | [[package]] 498 | name = "version_check" 499 | version = "0.9.4" 500 | source = "registry+https://github.com/rust-lang/crates.io-index" 501 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" 502 | 503 | [[package]] 504 | name = "walkdir" 505 | version = "2.3.2" 506 | source = "registry+https://github.com/rust-lang/crates.io-index" 507 | checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" 508 | dependencies = [ 509 | "same-file", 510 | "winapi", 511 | "winapi-util", 512 | ] 513 | 514 | [[package]] 515 | name = "wasi" 516 | version = "0.10.0+wasi-snapshot-preview1" 517 | source = "registry+https://github.com/rust-lang/crates.io-index" 518 | checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" 519 | 520 | [[package]] 521 | name = "winapi" 522 | version = "0.3.9" 523 | source = "registry+https://github.com/rust-lang/crates.io-index" 524 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 525 | dependencies = [ 526 | "winapi-i686-pc-windows-gnu", 527 | "winapi-x86_64-pc-windows-gnu", 528 | ] 529 | 530 | [[package]] 531 | name = "winapi-i686-pc-windows-gnu" 532 | version = "0.4.0" 533 | source = "registry+https://github.com/rust-lang/crates.io-index" 534 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 535 | 536 | [[package]] 537 | name = "winapi-util" 538 | version = "0.1.5" 539 | source = "registry+https://github.com/rust-lang/crates.io-index" 540 | checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" 541 | dependencies = [ 542 | "winapi", 543 | ] 544 | 545 | [[package]] 546 | name = "winapi-x86_64-pc-windows-gnu" 547 | version = "0.4.0" 548 | source = "registry+https://github.com/rust-lang/crates.io-index" 549 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 550 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | 2 | [package] 3 | name = "md5-tools" 4 | version = "0.0.0" 5 | authors = ["Ciprian Dorin Craciun "] 6 | edition = "2018" 7 | 8 | [features] 9 | default = [ 10 | # "profile", 11 | ] 12 | profile = ["cpuprofiler"] 13 | 14 | [dependencies] 15 | libc = "*" 16 | digest = "*" 17 | md-5 = "*" 18 | sha-1 = "*" 19 | sha2 = "*" 20 | sha3 = "*" 21 | walkdir = "*" 22 | crossbeam = "*" 23 | regex = "*" 24 | cpio = "*" 25 | argparse = "*" 26 | chrono = "*" 27 | atty = "*" 28 | indicatif = "*" 29 | cpuprofiler = { version = "*", optional = true } 30 | 31 | [[bin]] 32 | name = "md5-create" 33 | path = "./sources/bin/md5-create.rs" 34 | 35 | [[bin]] 36 | name = "md5-diff" 37 | path = "./sources/bin/md5-diff.rs" 38 | 39 | [[bin]] 40 | name = "md5-cpio" 41 | path = "./sources/bin/md5-cpio.rs" 42 | 43 | [lib] 44 | name = "md5_tools" 45 | path = "./sources/lib/lib.rs" 46 | 47 | [profile.release] 48 | opt-level = 3 49 | codegen-units = 1 50 | lto = true 51 | debug = false 52 | incremental = false 53 | panic = "abort" 54 | 55 | -------------------------------------------------------------------------------- /documentation/assets/github-banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volution/md5-tools/9619946496af899b2fe426f014c4b40c4c0cf735/documentation/assets/github-banner.png -------------------------------------------------------------------------------- /documentation/licensing/fdl-1.3.txt: -------------------------------------------------------------------------------- 1 | 2 | GNU Free Documentation License 3 | Version 1.3, 3 November 2008 4 | 5 | 6 | Copyright (C) 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc. 7 | 8 | Everyone is permitted to copy and distribute verbatim copies 9 | of this license document, but changing it is not allowed. 10 | 11 | 0. PREAMBLE 12 | 13 | The purpose of this License is to make a manual, textbook, or other 14 | functional and useful document "free" in the sense of freedom: to 15 | assure everyone the effective freedom to copy and redistribute it, 16 | with or without modifying it, either commercially or noncommercially. 17 | Secondarily, this License preserves for the author and publisher a way 18 | to get credit for their work, while not being considered responsible 19 | for modifications made by others. 20 | 21 | This License is a kind of "copyleft", which means that derivative 22 | works of the document must themselves be free in the same sense. It 23 | complements the GNU General Public License, which is a copyleft 24 | license designed for free software. 25 | 26 | We have designed this License in order to use it for manuals for free 27 | software, because free software needs free documentation: a free 28 | program should come with manuals providing the same freedoms that the 29 | software does. But this License is not limited to software manuals; 30 | it can be used for any textual work, regardless of subject matter or 31 | whether it is published as a printed book. We recommend this License 32 | principally for works whose purpose is instruction or reference. 33 | 34 | 35 | 1. APPLICABILITY AND DEFINITIONS 36 | 37 | This License applies to any manual or other work, in any medium, that 38 | contains a notice placed by the copyright holder saying it can be 39 | distributed under the terms of this License. Such a notice grants a 40 | world-wide, royalty-free license, unlimited in duration, to use that 41 | work under the conditions stated herein. The "Document", below, 42 | refers to any such manual or work. Any member of the public is a 43 | licensee, and is addressed as "you". You accept the license if you 44 | copy, modify or distribute the work in a way requiring permission 45 | under copyright law. 46 | 47 | A "Modified Version" of the Document means any work containing the 48 | Document or a portion of it, either copied verbatim, or with 49 | modifications and/or translated into another language. 50 | 51 | A "Secondary Section" is a named appendix or a front-matter section of 52 | the Document that deals exclusively with the relationship of the 53 | publishers or authors of the Document to the Document's overall 54 | subject (or to related matters) and contains nothing that could fall 55 | directly within that overall subject. (Thus, if the Document is in 56 | part a textbook of mathematics, a Secondary Section may not explain 57 | any mathematics.) The relationship could be a matter of historical 58 | connection with the subject or with related matters, or of legal, 59 | commercial, philosophical, ethical or political position regarding 60 | them. 61 | 62 | The "Invariant Sections" are certain Secondary Sections whose titles 63 | are designated, as being those of Invariant Sections, in the notice 64 | that says that the Document is released under this License. If a 65 | section does not fit the above definition of Secondary then it is not 66 | allowed to be designated as Invariant. The Document may contain zero 67 | Invariant Sections. If the Document does not identify any Invariant 68 | Sections then there are none. 69 | 70 | The "Cover Texts" are certain short passages of text that are listed, 71 | as Front-Cover Texts or Back-Cover Texts, in the notice that says that 72 | the Document is released under this License. A Front-Cover Text may 73 | be at most 5 words, and a Back-Cover Text may be at most 25 words. 74 | 75 | A "Transparent" copy of the Document means a machine-readable copy, 76 | represented in a format whose specification is available to the 77 | general public, that is suitable for revising the document 78 | straightforwardly with generic text editors or (for images composed of 79 | pixels) generic paint programs or (for drawings) some widely available 80 | drawing editor, and that is suitable for input to text formatters or 81 | for automatic translation to a variety of formats suitable for input 82 | to text formatters. A copy made in an otherwise Transparent file 83 | format whose markup, or absence of markup, has been arranged to thwart 84 | or discourage subsequent modification by readers is not Transparent. 85 | An image format is not Transparent if used for any substantial amount 86 | of text. A copy that is not "Transparent" is called "Opaque". 87 | 88 | Examples of suitable formats for Transparent copies include plain 89 | ASCII without markup, Texinfo input format, LaTeX input format, SGML 90 | or XML using a publicly available DTD, and standard-conforming simple 91 | HTML, PostScript or PDF designed for human modification. Examples of 92 | transparent image formats include PNG, XCF and JPG. Opaque formats 93 | include proprietary formats that can be read and edited only by 94 | proprietary word processors, SGML or XML for which the DTD and/or 95 | processing tools are not generally available, and the 96 | machine-generated HTML, PostScript or PDF produced by some word 97 | processors for output purposes only. 98 | 99 | The "Title Page" means, for a printed book, the title page itself, 100 | plus such following pages as are needed to hold, legibly, the material 101 | this License requires to appear in the title page. For works in 102 | formats which do not have any title page as such, "Title Page" means 103 | the text near the most prominent appearance of the work's title, 104 | preceding the beginning of the body of the text. 105 | 106 | The "publisher" means any person or entity that distributes copies of 107 | the Document to the public. 108 | 109 | A section "Entitled XYZ" means a named subunit of the Document whose 110 | title either is precisely XYZ or contains XYZ in parentheses following 111 | text that translates XYZ in another language. (Here XYZ stands for a 112 | specific section name mentioned below, such as "Acknowledgements", 113 | "Dedications", "Endorsements", or "History".) To "Preserve the Title" 114 | of such a section when you modify the Document means that it remains a 115 | section "Entitled XYZ" according to this definition. 116 | 117 | The Document may include Warranty Disclaimers next to the notice which 118 | states that this License applies to the Document. These Warranty 119 | Disclaimers are considered to be included by reference in this 120 | License, but only as regards disclaiming warranties: any other 121 | implication that these Warranty Disclaimers may have is void and has 122 | no effect on the meaning of this License. 123 | 124 | 2. VERBATIM COPYING 125 | 126 | You may copy and distribute the Document in any medium, either 127 | commercially or noncommercially, provided that this License, the 128 | copyright notices, and the license notice saying this License applies 129 | to the Document are reproduced in all copies, and that you add no 130 | other conditions whatsoever to those of this License. You may not use 131 | technical measures to obstruct or control the reading or further 132 | copying of the copies you make or distribute. However, you may accept 133 | compensation in exchange for copies. If you distribute a large enough 134 | number of copies you must also follow the conditions in section 3. 135 | 136 | You may also lend copies, under the same conditions stated above, and 137 | you may publicly display copies. 138 | 139 | 140 | 3. COPYING IN QUANTITY 141 | 142 | If you publish printed copies (or copies in media that commonly have 143 | printed covers) of the Document, numbering more than 100, and the 144 | Document's license notice requires Cover Texts, you must enclose the 145 | copies in covers that carry, clearly and legibly, all these Cover 146 | Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on 147 | the back cover. Both covers must also clearly and legibly identify 148 | you as the publisher of these copies. The front cover must present 149 | the full title with all words of the title equally prominent and 150 | visible. You may add other material on the covers in addition. 151 | Copying with changes limited to the covers, as long as they preserve 152 | the title of the Document and satisfy these conditions, can be treated 153 | as verbatim copying in other respects. 154 | 155 | If the required texts for either cover are too voluminous to fit 156 | legibly, you should put the first ones listed (as many as fit 157 | reasonably) on the actual cover, and continue the rest onto adjacent 158 | pages. 159 | 160 | If you publish or distribute Opaque copies of the Document numbering 161 | more than 100, you must either include a machine-readable Transparent 162 | copy along with each Opaque copy, or state in or with each Opaque copy 163 | a computer-network location from which the general network-using 164 | public has access to download using public-standard network protocols 165 | a complete Transparent copy of the Document, free of added material. 166 | If you use the latter option, you must take reasonably prudent steps, 167 | when you begin distribution of Opaque copies in quantity, to ensure 168 | that this Transparent copy will remain thus accessible at the stated 169 | location until at least one year after the last time you distribute an 170 | Opaque copy (directly or through your agents or retailers) of that 171 | edition to the public. 172 | 173 | It is requested, but not required, that you contact the authors of the 174 | Document well before redistributing any large number of copies, to 175 | give them a chance to provide you with an updated version of the 176 | Document. 177 | 178 | 179 | 4. MODIFICATIONS 180 | 181 | You may copy and distribute a Modified Version of the Document under 182 | the conditions of sections 2 and 3 above, provided that you release 183 | the Modified Version under precisely this License, with the Modified 184 | Version filling the role of the Document, thus licensing distribution 185 | and modification of the Modified Version to whoever possesses a copy 186 | of it. In addition, you must do these things in the Modified Version: 187 | 188 | A. Use in the Title Page (and on the covers, if any) a title distinct 189 | from that of the Document, and from those of previous versions 190 | (which should, if there were any, be listed in the History section 191 | of the Document). You may use the same title as a previous version 192 | if the original publisher of that version gives permission. 193 | B. List on the Title Page, as authors, one or more persons or entities 194 | responsible for authorship of the modifications in the Modified 195 | Version, together with at least five of the principal authors of the 196 | Document (all of its principal authors, if it has fewer than five), 197 | unless they release you from this requirement. 198 | C. State on the Title page the name of the publisher of the 199 | Modified Version, as the publisher. 200 | D. Preserve all the copyright notices of the Document. 201 | E. Add an appropriate copyright notice for your modifications 202 | adjacent to the other copyright notices. 203 | F. Include, immediately after the copyright notices, a license notice 204 | giving the public permission to use the Modified Version under the 205 | terms of this License, in the form shown in the Addendum below. 206 | G. Preserve in that license notice the full lists of Invariant Sections 207 | and required Cover Texts given in the Document's license notice. 208 | H. Include an unaltered copy of this License. 209 | I. Preserve the section Entitled "History", Preserve its Title, and add 210 | to it an item stating at least the title, year, new authors, and 211 | publisher of the Modified Version as given on the Title Page. If 212 | there is no section Entitled "History" in the Document, create one 213 | stating the title, year, authors, and publisher of the Document as 214 | given on its Title Page, then add an item describing the Modified 215 | Version as stated in the previous sentence. 216 | J. Preserve the network location, if any, given in the Document for 217 | public access to a Transparent copy of the Document, and likewise 218 | the network locations given in the Document for previous versions 219 | it was based on. These may be placed in the "History" section. 220 | You may omit a network location for a work that was published at 221 | least four years before the Document itself, or if the original 222 | publisher of the version it refers to gives permission. 223 | K. For any section Entitled "Acknowledgements" or "Dedications", 224 | Preserve the Title of the section, and preserve in the section all 225 | the substance and tone of each of the contributor acknowledgements 226 | and/or dedications given therein. 227 | L. Preserve all the Invariant Sections of the Document, 228 | unaltered in their text and in their titles. Section numbers 229 | or the equivalent are not considered part of the section titles. 230 | M. Delete any section Entitled "Endorsements". Such a section 231 | may not be included in the Modified Version. 232 | N. Do not retitle any existing section to be Entitled "Endorsements" 233 | or to conflict in title with any Invariant Section. 234 | O. Preserve any Warranty Disclaimers. 235 | 236 | If the Modified Version includes new front-matter sections or 237 | appendices that qualify as Secondary Sections and contain no material 238 | copied from the Document, you may at your option designate some or all 239 | of these sections as invariant. To do this, add their titles to the 240 | list of Invariant Sections in the Modified Version's license notice. 241 | These titles must be distinct from any other section titles. 242 | 243 | You may add a section Entitled "Endorsements", provided it contains 244 | nothing but endorsements of your Modified Version by various 245 | parties--for example, statements of peer review or that the text has 246 | been approved by an organization as the authoritative definition of a 247 | standard. 248 | 249 | You may add a passage of up to five words as a Front-Cover Text, and a 250 | passage of up to 25 words as a Back-Cover Text, to the end of the list 251 | of Cover Texts in the Modified Version. Only one passage of 252 | Front-Cover Text and one of Back-Cover Text may be added by (or 253 | through arrangements made by) any one entity. If the Document already 254 | includes a cover text for the same cover, previously added by you or 255 | by arrangement made by the same entity you are acting on behalf of, 256 | you may not add another; but you may replace the old one, on explicit 257 | permission from the previous publisher that added the old one. 258 | 259 | The author(s) and publisher(s) of the Document do not by this License 260 | give permission to use their names for publicity for or to assert or 261 | imply endorsement of any Modified Version. 262 | 263 | 264 | 5. COMBINING DOCUMENTS 265 | 266 | You may combine the Document with other documents released under this 267 | License, under the terms defined in section 4 above for modified 268 | versions, provided that you include in the combination all of the 269 | Invariant Sections of all of the original documents, unmodified, and 270 | list them all as Invariant Sections of your combined work in its 271 | license notice, and that you preserve all their Warranty Disclaimers. 272 | 273 | The combined work need only contain one copy of this License, and 274 | multiple identical Invariant Sections may be replaced with a single 275 | copy. If there are multiple Invariant Sections with the same name but 276 | different contents, make the title of each such section unique by 277 | adding at the end of it, in parentheses, the name of the original 278 | author or publisher of that section if known, or else a unique number. 279 | Make the same adjustment to the section titles in the list of 280 | Invariant Sections in the license notice of the combined work. 281 | 282 | In the combination, you must combine any sections Entitled "History" 283 | in the various original documents, forming one section Entitled 284 | "History"; likewise combine any sections Entitled "Acknowledgements", 285 | and any sections Entitled "Dedications". You must delete all sections 286 | Entitled "Endorsements". 287 | 288 | 289 | 6. COLLECTIONS OF DOCUMENTS 290 | 291 | You may make a collection consisting of the Document and other 292 | documents released under this License, and replace the individual 293 | copies of this License in the various documents with a single copy 294 | that is included in the collection, provided that you follow the rules 295 | of this License for verbatim copying of each of the documents in all 296 | other respects. 297 | 298 | You may extract a single document from such a collection, and 299 | distribute it individually under this License, provided you insert a 300 | copy of this License into the extracted document, and follow this 301 | License in all other respects regarding verbatim copying of that 302 | document. 303 | 304 | 305 | 7. AGGREGATION WITH INDEPENDENT WORKS 306 | 307 | A compilation of the Document or its derivatives with other separate 308 | and independent documents or works, in or on a volume of a storage or 309 | distribution medium, is called an "aggregate" if the copyright 310 | resulting from the compilation is not used to limit the legal rights 311 | of the compilation's users beyond what the individual works permit. 312 | When the Document is included in an aggregate, this License does not 313 | apply to the other works in the aggregate which are not themselves 314 | derivative works of the Document. 315 | 316 | If the Cover Text requirement of section 3 is applicable to these 317 | copies of the Document, then if the Document is less than one half of 318 | the entire aggregate, the Document's Cover Texts may be placed on 319 | covers that bracket the Document within the aggregate, or the 320 | electronic equivalent of covers if the Document is in electronic form. 321 | Otherwise they must appear on printed covers that bracket the whole 322 | aggregate. 323 | 324 | 325 | 8. TRANSLATION 326 | 327 | Translation is considered a kind of modification, so you may 328 | distribute translations of the Document under the terms of section 4. 329 | Replacing Invariant Sections with translations requires special 330 | permission from their copyright holders, but you may include 331 | translations of some or all Invariant Sections in addition to the 332 | original versions of these Invariant Sections. You may include a 333 | translation of this License, and all the license notices in the 334 | Document, and any Warranty Disclaimers, provided that you also include 335 | the original English version of this License and the original versions 336 | of those notices and disclaimers. In case of a disagreement between 337 | the translation and the original version of this License or a notice 338 | or disclaimer, the original version will prevail. 339 | 340 | If a section in the Document is Entitled "Acknowledgements", 341 | "Dedications", or "History", the requirement (section 4) to Preserve 342 | its Title (section 1) will typically require changing the actual 343 | title. 344 | 345 | 346 | 9. TERMINATION 347 | 348 | You may not copy, modify, sublicense, or distribute the Document 349 | except as expressly provided under this License. Any attempt 350 | otherwise to copy, modify, sublicense, or distribute it is void, and 351 | will automatically terminate your rights under this License. 352 | 353 | However, if you cease all violation of this License, then your license 354 | from a particular copyright holder is reinstated (a) provisionally, 355 | unless and until the copyright holder explicitly and finally 356 | terminates your license, and (b) permanently, if the copyright holder 357 | fails to notify you of the violation by some reasonable means prior to 358 | 60 days after the cessation. 359 | 360 | Moreover, your license from a particular copyright holder is 361 | reinstated permanently if the copyright holder notifies you of the 362 | violation by some reasonable means, this is the first time you have 363 | received notice of violation of this License (for any work) from that 364 | copyright holder, and you cure the violation prior to 30 days after 365 | your receipt of the notice. 366 | 367 | Termination of your rights under this section does not terminate the 368 | licenses of parties who have received copies or rights from you under 369 | this License. If your rights have been terminated and not permanently 370 | reinstated, receipt of a copy of some or all of the same material does 371 | not give you any rights to use it. 372 | 373 | 374 | 10. FUTURE REVISIONS OF THIS LICENSE 375 | 376 | The Free Software Foundation may publish new, revised versions of the 377 | GNU Free Documentation License from time to time. Such new versions 378 | will be similar in spirit to the present version, but may differ in 379 | detail to address new problems or concerns. See 380 | https://www.gnu.org/licenses/. 381 | 382 | Each version of the License is given a distinguishing version number. 383 | If the Document specifies that a particular numbered version of this 384 | License "or any later version" applies to it, you have the option of 385 | following the terms and conditions either of that specified version or 386 | of any later version that has been published (not as a draft) by the 387 | Free Software Foundation. If the Document does not specify a version 388 | number of this License, you may choose any version ever published (not 389 | as a draft) by the Free Software Foundation. If the Document 390 | specifies that a proxy can decide which future versions of this 391 | License can be used, that proxy's public statement of acceptance of a 392 | version permanently authorizes you to choose that version for the 393 | Document. 394 | 395 | 11. RELICENSING 396 | 397 | "Massive Multiauthor Collaboration Site" (or "MMC Site") means any 398 | World Wide Web server that publishes copyrightable works and also 399 | provides prominent facilities for anybody to edit those works. A 400 | public wiki that anybody can edit is an example of such a server. A 401 | "Massive Multiauthor Collaboration" (or "MMC") contained in the site 402 | means any set of copyrightable works thus published on the MMC site. 403 | 404 | "CC-BY-SA" means the Creative Commons Attribution-Share Alike 3.0 405 | license published by Creative Commons Corporation, a not-for-profit 406 | corporation with a principal place of business in San Francisco, 407 | California, as well as future copyleft versions of that license 408 | published by that same organization. 409 | 410 | "Incorporate" means to publish or republish a Document, in whole or in 411 | part, as part of another Document. 412 | 413 | An MMC is "eligible for relicensing" if it is licensed under this 414 | License, and if all works that were first published under this License 415 | somewhere other than this MMC, and subsequently incorporated in whole or 416 | in part into the MMC, (1) had no cover texts or invariant sections, and 417 | (2) were thus incorporated prior to November 1, 2008. 418 | 419 | The operator of an MMC Site may republish an MMC contained in the site 420 | under CC-BY-SA on the same site at any time before August 1, 2009, 421 | provided the MMC is eligible for relicensing. 422 | 423 | 424 | ADDENDUM: How to use this License for your documents 425 | 426 | To use this License in a document you have written, include a copy of 427 | the License in the document and put the following copyright and 428 | license notices just after the title page: 429 | 430 | Copyright (c) YEAR YOUR NAME. 431 | Permission is granted to copy, distribute and/or modify this document 432 | under the terms of the GNU Free Documentation License, Version 1.3 433 | or any later version published by the Free Software Foundation; 434 | with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. 435 | A copy of the license is included in the section entitled "GNU 436 | Free Documentation License". 437 | 438 | If you have Invariant Sections, Front-Cover Texts and Back-Cover Texts, 439 | replace the "with...Texts." line with this: 440 | 441 | with the Invariant Sections being LIST THEIR TITLES, with the 442 | Front-Cover Texts being LIST, and with the Back-Cover Texts being LIST. 443 | 444 | If you have Invariant Sections without Cover Texts, or some other 445 | combination of the three, merge those two alternatives to suit the 446 | situation. 447 | 448 | If your document contains nontrivial examples of program code, we 449 | recommend releasing these examples in parallel under your choice of 450 | free software license, such as the GNU General Public License, 451 | to permit their use in free software. 452 | -------------------------------------------------------------------------------- /documentation/licensing/notice.txt: -------------------------------------------------------------------------------- 1 | -----BEGIN PGP SIGNED MESSAGE----- 2 | Hash: SHA256 3 | 4 | ######################################################################### 5 | 6 | 7 | Copyright (C) 2019 Ciprian Dorin Craciun 8 | 9 | >> ciprian@volution.ro 10 | ciprian.craciun@gmail.com 11 | 12 | >> https://volution.ro/ciprian 13 | https://github.com/cipriancraciun 14 | 15 | >> openpgp4fpr:58fc2194fcc2478399cb220c5a974037a6fd8839 16 | 17 | 18 | ------------------------------------------------------------ 19 | 20 | 21 | License for the sources: GPL, version 3 or later 22 | (GNU General Public License) 23 | 24 | This program is free software: you can redistribute it and/or modify 25 | it under the terms of the GNU General Public License as 26 | published by the Free Software Foundation, either version 3 27 | of the License, or (at your option) any later version. 28 | 29 | ** This program is distributed in the hope that it will be useful, ** 30 | ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** 31 | ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ** 32 | ** GNU General Public License for more details. ** 33 | 34 | A copy of the license is included with the documentation folder, 35 | or you could look at https://www.gnu.org/licenses for the content: 36 | 37 | https://www.gnu.org/licenses/gpl-3.0.txt 38 | (SHA256: 3972dc9744f6499f0f9b2dbf76696f2ae7ad8af9b23dde66d6af86c9dfb36986) 39 | 40 | 41 | ------------------------------------------------------------ 42 | 43 | 44 | License for the documentation: FDL, version 1.3 or later 45 | (GNU Free Documentation License) 46 | 47 | Permission is granted to copy, distribute and/or modify these documents 48 | under the terms of the GNU Free Documentation License, Version 1.3 49 | or any later version published by the Free Software Foundation; 50 | with no Invariant Sections, no Front-Cover Texts, and no Back-Cover 51 | Texts. 52 | 53 | A copy of the license is included with the documentation folder, 54 | or you could look at https://www.gnu.org/licenses for the content: 55 | 56 | https://www.gnu.org/licenses/fdl-1.3.txt 57 | (SHA256: 6adc7b4f7c74882dbe7564f7b8285ff194d760727ab30036dfe9704039fe32d7) 58 | 59 | 60 | ------------------------------------------------------------ 61 | 62 | 63 | Notes (with purely informative status) 64 | 65 | 66 | There could exist source or documentation files distributed within this 67 | repository or archive that are either copyrighted or licensed under 68 | different terms than described herein; however these files are easily, 69 | and clearly, identifiable either by placing a notice text at the beginning 70 | of the file, or by placing a similar notice file inside the folder 71 | containing them. 72 | 73 | 74 | If someone requires the sources and/or documentation to be released 75 | under a different license, please send an email to the authors, 76 | stating the licensing requirements, accompanied with the reasons 77 | and other details; then, depending on the situation, the authors might 78 | release the sources and/or documentation under a different license. 79 | 80 | 81 | ######## aea1f4f9c9c2a78f8bb23734f6121f98 ############################### 82 | -----BEGIN PGP SIGNATURE----- 83 | 84 | iHUEAREIAB0WIQRY/CGU/MJHg5nLIgxal0A3pv2IOQUCXUb/4gAKCRBal0A3pv2I 85 | OSSgAQCGsAfbSnetJBckIPgLA9UuCnhPgHKY5fYCfL+Bx8BdDwD/WMj2o+Lw0sal 86 | T3Zp8CCbQu58GMTxVpURfJ52FWX5bVk= 87 | =AwMS 88 | -----END PGP SIGNATURE----- 89 | -------------------------------------------------------------------------------- /documentation/readme.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | ![logo](./documentation/assets/github-banner.png) 6 | 7 | 8 | 9 | 10 | ---- 11 | 12 | 13 | 14 | 15 | # **md5-tools** -- lightweight Rust MD5/SHA tools 16 | 17 | 18 | > Table of contents: 19 | > 20 | > * [Features](#features): [`md5-create`](#md5-create-features) 21 | > * [Usage examples](#usage-examples): [`md5-create`](#md5-create-usage), [`md5-diff`](#md5-diff-usage), [`md5-cpio`](#md5-cpio-usage), [`md5-copy`](#md5-copy-usage) 22 | > * [Installing](#installing) 23 | > * [About](#about) and [Copyright and licensing](#notice-copyright-and-licensing) 24 | 25 | 26 | 27 | 28 | ## About 29 | 30 | This is a small collection of lightweight and efficient Rust-based tools related to the MD5 and SHA family hash files: 31 | 32 | * `md5-create` -- takes one argument (a folder or file) and generates an MD5/SHA hash file of all its children (that are files); 33 | (**this is the star of this project**, see [bellow some of its features](#md5-create-features) that set it apart from `md5deep` and other similar tools;) 34 | (also see [below for usage examples](#md5-create-usage);) 35 | * `md5-diff` -- takes two MD5/SHA hash files and prints a report of differences between them; 36 | (see [below for usage examples](#md5-diff-usage);) 37 | * `md5-cpio` -- reads from `stdin` a CPIO archive (in `newc` format, as created by `cpio -o -H newc`) and generates to `stdout` an MD5/SHA hash file of all the archive members (that are files); 38 | (see [below for usage examples](#md5-cpio-usage);) 39 | * all these tools consume or generate files similar to those produced by `md5sum`/`sha1sum`/`sha256sum`; 40 | 41 | I have used all of these to curate my personal file-systems and backups, and they can handle large amounts of data. 42 | (For example `md5-diff` was used on a ~3 million lines long MD5 file.) 43 | 44 | Regarding the insecurity of MD5: 45 | 46 | * **although the tools are named `md5-*`, they do support the SHA family of hashes!** 47 | * yes, I know that MD5 is cryptographically broken; 48 | * yes, I know we should migrate to SHA-2 / SHA-3 family of hash functions; 49 | * but for the purpose of identifying duplicate, missing, or corrupted files I personally think that MD5 it is still acceptable; 50 | 51 | There are also a few other tools and scripts found in `./sources/bin` (which support only MD5): 52 | 53 | * `md5-copy.go` -- takes four arguments, an MD5 hash file, a source folder, a "blobs store" folder, and a number of concurrent workers; it will iterate through the MD5 hash file and copy from the source folder those "blobs" that are missing from the "blobs store" folder; 54 | this tool can be used to make a backup of all unique files (even from multiple sources), thus removing duplicates, and all that is needed to recover the files is the "blob store" and the initial MD5 hash file; 55 | (a "blob" is a file with a name equal to its hash value; a "blob store" is a folder that contains (dispersed on two levels), the "blob" files;) 56 | * `md5-diff.go` -- the Go variant of the `md5-diff` tool; 57 | * `md5-create.bash` -- (supporting only MD5) -- takes one argument (a folder) and creates within it (or if exists the `.md5` folder exists underneath it) a timestamped MD5 hash file of all the folder members (that are files); (it ignores symlinks or sub-mount-points; it also ignores folders that have a `.md5.excluded` file within it;) 58 | 59 | 60 | 61 | 62 | ## Features 63 | 64 | 65 | ### `md5-create` features 66 | 67 | The following is a short list of the most important features that set this tool apart from other similar tools like `md5deep`: 68 | 69 | * **support for various file-access patterns** that (especially for rotating disks and RAID arrays) reduce read latencies, and thus increase read bandwidth: 70 | * (by default) ordered by inode, which on most filesystems translates roughly to a pattern that reads files in the order they are stored on the disk; 71 | * (on Linux only) ordered by extent via the [`ioctl / fiemap`](https://www.kernel.org/doc/Documentation/filesystems/fiemap.txt) syscall that yields an almost sequential access pattern, thus maximizing the I/O bandwidth and approaching the raw performance of the disks; 72 | * randomized, especially over networked file-systems, or other file-systems where no clear insight into how the data is actually stored; (for example linear RAID arrays, or virtual disks;) 73 | * **support for the [`posix_fadvise`](http://man7.org/linux/man-pages/man2/posix_fadvise.2.html) syscall** that instructs the OS not to cache the hashed files in its buffers, thus reducing the OS memory pressure; 74 | * support for the [`nice`](http://man7.org/linux/man-pages/man2/nice.2.html) syscall, that sets the OS scheduler priority (by default 19, the lowest value), thus reducing the OS CPU pressure; 75 | * support for progress monitoring displaying both the number of files (processed and yet to be processed), but also the size of these files; 76 | * support for not crossing to other mount-points (i.e. like `find /path/... -xdev`); 77 | * support for printing relative paths, relative to the root given as argument; (this option is also supported by `md5deep`;) 78 | 79 | 80 | 81 | 82 | ## Usage examples 83 | 84 | 85 | ### `md5-create` usage 86 | 87 | Besides the example bellow it also supports the following features: 88 | 89 | * `--help` -- the obvious "display help" flag; 90 | * `--md5`, `--sha1`, `--sha224`, `--sha256`, `--sha384`, `--sha512`, `--sha3-224`, `--sha3-256`, `--sha3-384`, `--sha3-512` -- to generate hashes files that contain hashes for these algorithms; (MD5 by default;) 91 | * `--output` -- to specify where to write the hashes (it can get quite complex, but it all should make sense in the end): 92 | * `-` -- write them to stdout; (also if stdout is a terminal, it disables the progress monitoring); 93 | * `/dev/stdout`, `/dev/stderr`, `/dev/null`, `/dev/fd/...` -- as a special case that don't involve temporary files (as the next case does); 94 | * a path that does not exist -- a temporary file will be created (by using the `.tmp` suffix), and then renamed as such; (in fact all the next cases do create a temporary file and rename it at the end;) 95 | * a path that exists and is a folder -- a timestamped file will be created into this folder; (thus allowing one to keep track of the source's evolution;) 96 | * a path that exists and and is not a folder -- this is an error; 97 | * (by default) `.` which does the following auto-detection: 98 | * if the source is a file, then a timestamped suffix is added to this path and used as an output; (similar to how for example `gnupg2` generates detached signature files;) 99 | * if the source is a folder, then: 100 | * if inside the source folder there is a sub-folder named `.{hash}` (where `{hash}` is `md5`, `sha1`, i.e. the name of the algorithm), or a sub-folder named `.hashes` or `.md5`, then a timestamped file is created into this sub-folder; 101 | * if inside the source folder there is a file named `.{hash}`, `.hashes` or `.md5` then a timestamped suffix is added to this path and used as an output; 102 | * else create a timestamped file prefixed with `.--` into the source folder; 103 | * `--zero` and `--no-zero` -- whether to output hashes file where lines are terminated by `\0` (as opposed by `\n`); (disabled by default;) 104 | * `--relative` and `--no-relative` -- whether to output relative paths (to the source folder) in the resulting hashes file; (enabled by default;) 105 | * `--xdev` and `--no-xdev` -- when walking the file-system, do not cross into other mount points; (disabled by default;) 106 | * `--follow` and `--no-follow` -- when walking the file-system, do follow any symlinks; (without this option not even symlinks to files are hashed;) (disabled by default;) 107 | * `--workers-count` -- number of parallel threads that compute hashes; (16 by default;) 108 | * `--workers-queue` -- size of the parallel threads queue; (one should not touch this!) 109 | * `--workers-batch` -- size of the files batch that is sorted before being enqueued; (the larger the better data locality;) (use `1` to disable batching, and thus sorting; use the same value as the queue size, and the file-system walking and file reading become mutually exclusive (especially useful for slow rotating disks);) 110 | * `--workers-sort` -- the sorting method for the files batch: 111 | * `walk` -- basically no ordering is done; 112 | * (by default) `inode` -- sort by inode number, which should roughly translate to sequential access patterns; 113 | * `inode-and-size` -- first group files by inodes modulo 128k (which basically clusters the access), then group by log2 of the size, and then order by inode; (useful when in the same folder there are lots of small files and lots of large files intermixed;) 114 | * (on Linux only) `extent` -- sort by the actual physical location of the file, which yields an almost perfect sequential access pattern, thus approaching to the raw physical bandwidth; 115 | * `random` -- randomize files; (useful especially for networked file-systems, or where there is no clear storage layout;) 116 | * `--fadvise` and `--no-fadvise` -- tell the OS that the files are read sequentially, and that their contents shouldn't be cached in the OS buffers; (enabled by default;) 117 | * `--nice ` -- set the `nice` priority; (`19` by default, i.e. the lowest priority;) 118 | * `--progress` and `--no-progress` -- do not monitor the progress by showing a progress bar; (enabled by default;) 119 | * `--errors-to-stdout` and `--no-errors-to-stdout` -- write an invalid hash record for any failed folder or file; (i.e. an all `0000...` hash;) (enabled by default;) 120 | * `--errors-to-stderr` and `--no-errors-to-stderr` -- write an error message to `stderr` if any errors are encountered; (enabled by default;) 121 | * `--ignore-all-errors`, `--ignore-walk-errors`, `--ignore-open-errors`, `--ignore-read-errors` -- if any errors are encountered while walking folders, opening or reading files, the hashing stops with an error; with these options the hashing continues, but the final exit code is still non-zero; (disabled by default;) 122 | * `--` -- denotes the end of flags, and the start of the folder or file to hash; 123 | 124 | Example with output to timestamped file: 125 | ``` 126 | md5-create ./sources 127 | ``` 128 | ``` 129 | [ii] [8cc8542c] creating `./sources/.--2019-11-02-13-54-14.md5`... 130 | | 00:00:00 | ==================== | 4073/s | 16 | 16 | 100% | 131 | | 00:00:00 | ==================== | 6.67MB/s | 90.35KB | 90.35KB | 100% | 132 | ``` 133 | 134 | Example with output to stdout: 135 | ``` 136 | md5-create -o /dev/stdout ./sources 137 | ``` 138 | ``` 139 | b687bba629fdef9f29ba734f9aac90e0 *./sources/md5-diff.go 140 | 855190c3b695519378b057c1f48efdf7 *./sources/md5-cpio.rs 141 | 8ecc4a7b226f0c499eed4852d43003e4 *./sources/md5-create.bash 142 | 12626fb2d7784b35dfd6196fc703cf59 *./sources/md5-diff.rs 143 | ``` 144 | 145 | 146 | 147 | 148 | ### `md5-diff` usage 149 | 150 | Besides the example bellow it also supports the following features: 151 | 152 | * `--help` -- the obvious "display help" flag; 153 | * `--md5`, `--sha1`, `--sha224`, `--sha256`, `--sha384`, `--sha512`, `--sha3-224`, `--sha3-256`, `--sha3-384`, `--sha3-512` -- to handle files that contain hashes for these algorithms; 154 | * `--gzip`, `--bzip2`, `--lzip`, `--xz`, `--lzma`, `--lz4`, `--lzo`, `--zstd` -- to handle files that are compressed; (requires those decompressors to be installed); 155 | * `--zero` -- to handle files where lines are terminated by `\0` (as opposed by `\n`); 156 | * `--` -- denotes the end of flags, and the start of the two files to compare; 157 | 158 | Please note that an all zero hash (i.e. `0000....`) of the proper length is considered an "invalid file"; the normal hashing tools don't generate these hashes, but `md5-create` does it for files or folders that fail to be open or read (either due to permission or I/O errors), also `md5-cpio` does for hard-links. 159 | Also empty files are detected by the hash of an empty string (i.e. for MD5 an empty file has the hash `d41d8cd98f00b204e9800998ecf8427e`). 160 | 161 | Example: 162 | ``` 163 | md5-diff ./old.md5 ./new.md5 164 | ``` 165 | ``` 166 | ## Diff statistics (A) vs (B) 167 | ## * hashes 168 | ## * distinct hashes : 8783 169 | ## * unique hashes in (A) : 879 170 | ## * unique hashes in (B) : 884 171 | ## * common hashes : 7020 172 | ## * matching paths : 7019 173 | ## * conflicting paths : 1 174 | ## * paths 175 | ## * distinct paths : 8353 176 | ## * unique paths in (A) : 1 177 | ## * unique paths in (B) : 6 178 | ## * common paths : 8346 179 | ## * matching hashes : 7467 180 | ## * conflicting hashes : 879 181 | 182 | ## Dataset (A) statistics 183 | ## * records : 8347 184 | ## * hashes 185 | ## * distinct hashes : 7899 186 | ## * unique hashes : 7731 187 | ## * duplicate hashes : 168 188 | ## * files 189 | ## * unique files : 7731 190 | ## * duplicate files : 616 191 | ## * empty files : 0 192 | ## * invalid files : 0 193 | ## * source: `/tmp/man-a.md5` 194 | 195 | ## Dataset (B) statistics 196 | ## * records : 8352 197 | ## * hashes 198 | ## * distinct hashes : 7904 199 | ## * unique hashes : 7736 200 | ## * duplicate hashes : 168 201 | ## * files 202 | ## * unique files : 7736 203 | ## * duplicate files : 616 204 | ## * empty files : 0 205 | ## * invalid files : 0 206 | ## * source: `/tmp/man-b.md5` 207 | 208 | #### Hashes unique in (A) :: 879 209 | 210 | +A 6e71ef15d96f410da0077db29dbdc0e2 */usr/share/man/man1/base32.1.gz 211 | +A 818f379930ca7e4260795d89ef36d802 */usr/share/man/man1/base64.1.gz 212 | +A f590fe438cfd63d31dd8c1f4b844fc7b */usr/share/man/man1/basename.1.gz 213 | +A c9361a23658e759af43c398ea7953a54 */usr/share/man/man1/basenc.1.gz 214 | [...] 215 | 216 | #### Hashes unique in (B) :: 884 217 | 218 | +B cb60a4b041a9591ecc3fba278f9fcbe5 */usr/share/man/man1/base32.1.gz 219 | +B 851aa14b318c7a6fad7081564e04355c */usr/share/man/man1/base64.1.gz 220 | +B a24f0721d88b551411de2e3f45e597ed */usr/share/man/man1/basename.1.gz 221 | +B 2e932d6cc6c7617c1f6e6527fe98d108 */usr/share/man/man1/basenc.1.gz 222 | [...] 223 | 224 | #### Paths conflicting in (A) and (B) :: 879 225 | 226 | !A 6e71ef15d96f410da0077db29dbdc0e2 */usr/share/man/man1/base32.1.gz 227 | !B cb60a4b041a9591ecc3fba278f9fcbe5 */usr/share/man/man1/base32.1.gz 228 | !A 818f379930ca7e4260795d89ef36d802 */usr/share/man/man1/base64.1.gz 229 | !B 851aa14b318c7a6fad7081564e04355c */usr/share/man/man1/base64.1.gz 230 | !A f590fe438cfd63d31dd8c1f4b844fc7b */usr/share/man/man1/basename.1.gz 231 | !B a24f0721d88b551411de2e3f45e597ed */usr/share/man/man1/basename.1.gz 232 | !A c9361a23658e759af43c398ea7953a54 */usr/share/man/man1/basenc.1.gz 233 | !B 2e932d6cc6c7617c1f6e6527fe98d108 */usr/share/man/man1/basenc.1.gz 234 | [...] 235 | 236 | #### Files re-organized in (A) and (B) :: 1 (hashes) 237 | 238 | ~A a1c8dc05804ea038e21cb3c175ce936c */usr/share/man/man3/sd_event_source_ref.3.gz 239 | ~B a1c8dc05804ea038e21cb3c175ce936c */usr/share/man/man3/sd_event_source_disable_unref.3.gz 240 | ``` 241 | 242 | 243 | 244 | 245 | ### `md5-cpio` usage 246 | 247 | Besides the example bellow it also supports the following features: 248 | 249 | * `--help` -- the obvious "display help" flag; 250 | * `--md5`, `--sha1`, `--sha224`, `--sha256`, `--sha384`, `--sha512`, `--sha3-224`, `--sha3-256`, `--sha3-384`, `--sha3-512` -- to generate hashes for one of these algorithms; 251 | * `--zero` -- to generate lines that are terminated by `\0` (as opposed by `\n`); 252 | 253 | Example: 254 | ``` 255 | find ./sources -depth -print | cpio -o -H newc | gzip > ./archive.cpio.gz 256 | ``` 257 | ``` 258 | gunzip < ./archive.cpio.gz | cpio -t -v 259 | ``` 260 | ``` 261 | -rw------- 1 ciprian ciprian 14224 Oct 8 14:02 sources/md5-diff.go 262 | -rw------- 1 ciprian ciprian 1698 Oct 8 01:32 sources/md5-cpio.rs 263 | -rwx------ 1 ciprian ciprian 1017 Oct 8 20:00 sources/md5-create.bash 264 | -rw------- 1 ciprian ciprian 21154 Oct 8 18:13 sources/md5-diff.rs 265 | drwx------ 2 ciprian ciprian 0 Oct 8 20:01 sources 266 | ``` 267 | ``` 268 | gunzip < ./archive.cpio.gz | md5-cpio 269 | ``` 270 | ``` 271 | b687bba629fdef9f29ba734f9aac90e0 *./sources/md5-diff.go 272 | 855190c3b695519378b057c1f48efdf7 *./sources/md5-cpio.rs 273 | 8ecc4a7b226f0c499eed4852d43003e4 *./sources/md5-create.bash 274 | 12626fb2d7784b35dfd6196fc703cf59 *./sources/md5-diff.rs 275 | ``` 276 | 277 | 278 | 279 | 280 | ### `md5-copy` usage 281 | 282 | Example (it expects a zero delimited file): 283 | ``` 284 | md5-copy <( tr '\n' '\0' < ./sources/.--2019-11-02-13-54-14.md5 ) ./sources /tmp/blobs .blob 1 4 285 | ``` 286 | ``` 287 | [dd] [922b3386] cloning `8ecc4a7b226f0c499eed4852d43003e4` -> `sources/bin/md5-create.bash`... 288 | [dd] [922b3386] cloning `2dea36d55be0022488d5ee6efc9c51a2` -> `sources/bin/md5-create.rs`... 289 | [dd] [922b3386] cloning `68198ae4918c38335238d4d36bd1b919` -> `sources/bin/md5-diff.rs`... 290 | [dd] [922b3386] cloning `f7462b371a995bdb1f3974b7df5eb961` -> `sources/bin/md5-cpio.rs`... 291 | [dd] [922b3386] cloning `1ffde758ad4cd0383c22cbc218c51a15` -> `sources/lib/prelude.rs`... 292 | [dd] [922b3386] cloning `0ee1e0a22576ecf992ca61e95b502cab` -> `sources/lib/lib.rs`... 293 | [dd] [922b3386] cloning `dcde2297538da7268443da188d363f66` -> `sources/lib/core.rs`... 294 | [dd] [922b3386] cloning `a46f7044a801a39eb86dac72abd5d11e` -> `sources/lib/hashes.rs`... 295 | [dd] [922b3386] cloning `9757f2a654d3cadc0ee303d214d5aa05` -> `sources/lib/main_cpio.rs`... 296 | [dd] [922b3386] cloning `f1fcd1173154d92e2eebb7ffd1a3b082` -> `sources/bin/md5-copy.go`... 297 | [dd] [922b3386] cloning `30af67bf40d79ad453387fa014fa29d0` -> `sources/lib/sinks.rs`... 298 | [dd] [922b3386] cloning `a3f97c2ef7cf4b36d32ed08a6356d0fd` -> `sources/lib/digests.rs`... 299 | [dd] [922b3386] cloning `495e488e9069ce1e83b4af61cdc886d2` -> `sources/bin/md5-diff.go`... 300 | [dd] [922b3386] cloning `e47f2ae37b592a7d18e1efa92b43f433` -> `sources/lib/flags.rs`... 301 | [dd] [922b3386] cloning `d8866c9528b47056be576cd072bc9704` -> `sources/lib/main_diff.rs`... 302 | [dd] [922b3386] cloning `29bb1db4f8f90d8782c1643b0a9f072b` -> `sources/lib/main_create.rs`... 303 | ``` 304 | 305 | 306 | 307 | 308 | ## Installing 309 | 310 | 311 | ### Installing from sources 312 | 313 | Checkout the sources: 314 | ``` 315 | git clone https://github.com/volution/md5-tools 316 | ``` 317 | ``` 318 | cd ./md5-tools 319 | ``` 320 | 321 | Build and deploy the Rust tools: 322 | ``` 323 | cargo build --release 324 | ``` 325 | ``` 326 | cp ./target/release/md5-create ~/bin/md5-create 327 | cp ./target/release/md5-diff ~/bin/md5-diff 328 | cp ./target/release/md5-cpio ~/bin/md5-cpio 329 | ``` 330 | 331 | Build and deploy the Go tools: 332 | ``` 333 | go build -o ./target/md5-copy ./sources/bin/md5-copy.go 334 | ``` 335 | ``` 336 | cp ./target/md5-copy ~/bin/md5-copy 337 | ``` 338 | 339 | 340 | 341 | 342 | ## Notice (copyright and licensing) 343 | 344 | 345 | ### Notice -- short version 346 | 347 | The code is licensed under GPL 3 or later. 348 | 349 | 350 | ### Notice -- long version 351 | 352 | For details about the copyright and licensing, please consult the `notice.txt` file in the `documentation/licensing` folder. 353 | 354 | If someone requires the sources and/or documentation to be released 355 | under a different license, please send an email to the authors, 356 | stating the licensing requirements, accompanied with the reasons 357 | and other details; then, depending on the situation, the authors might 358 | release the sources and/or documentation under a different license. 359 | 360 | -------------------------------------------------------------------------------- /notice.txt: -------------------------------------------------------------------------------- 1 | ./documentation/licensing/notice.txt -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | ./documentation/readme.md -------------------------------------------------------------------------------- /scripts/_z-run: -------------------------------------------------------------------------------- 1 | #!/dev/null 2 | 3 | 4 | && _/build.z-run 5 | && _/tests.z-run 6 | && _/workbench.z-run 7 | 8 | -------------------------------------------------------------------------------- /scripts/build.z-run: -------------------------------------------------------------------------------- 1 | #!/dev/null 2 | 3 | 4 | 5 | 6 | :: run / md5-create / debug :: exec -- "${ZRUN[@]}" ':: run / rust / debug' md5-create "${@}" 7 | :: run / md5-diff / debug :: exec -- "${ZRUN[@]}" ':: run / rust / debug' md5-diff "${@}" 8 | :: run / md5-cpio / debug :: exec -- "${ZRUN[@]}" ':: run / rust / debug' md5-cpio "${@}" 9 | 10 | :: run / md5-create / release :: exec -- "${ZRUN[@]}" ':: run / rust / release' md5-create "${@}" 11 | :: run / md5-diff / release :: exec -- "${ZRUN[@]}" ':: run / rust / release' md5-diff "${@}" 12 | :: run / md5-cpio / release :: exec -- "${ZRUN[@]}" ':: run / rust / release' md5-cpio "${@}" 13 | 14 | 15 | 16 | 17 | :: build / md5-create / debug :: exec -- "${ZRUN[@]}" ':: build / rust / debug' --bin md5-create "${@}" 18 | :: build / md5-diff / debug :: exec -- "${ZRUN[@]}" ':: build / rust / debug' --bin md5-diff "${@}" 19 | :: build / md5-cpio / debug :: exec -- "${ZRUN[@]}" ':: build / rust / debug' --bin md5-cpio "${@}" 20 | 21 | :: build / md5-create / release :: exec -- "${ZRUN[@]}" ':: build / rust / release' --bin md5-create "${@}" 22 | :: build / md5-diff / release :: exec -- "${ZRUN[@]}" ':: build / rust / release' --bin md5-diff "${@}" 23 | :: build / md5-cpio / release :: exec -- "${ZRUN[@]}" ':: build / rust / release' --bin md5-cpio "${@}" 24 | 25 | 26 | 27 | 28 | --<< run / rust / debug 29 | test "${#}" -ge 1 30 | _binary="${1}" 31 | shift -- 1 32 | export -- CARGO_TARGET="${CARGO_TARGET:-x86_64-unknown-linux-gnu}" 33 | "${ZRUN[@]}" ':: build / rust / debug' --bin "${_binary}" --quiet 34 | exec -- \ 35 | /usr/bin/time -f '(( elapsed: %E (user: %U, kernel: %S), CPU: %P, memory: %M (faults: %F, swapped: %W), I/O: %I / %O (waits: %w) ))' -- \ 36 | "./.outputs/rust/${CARGO_TARGET}/debug/${_binary}" "${@}" \ 37 | # 38 | !! 39 | 40 | --<< run / rust / release 41 | test "${#}" -ge 1 42 | _binary="${1}" 43 | shift -- 1 44 | export -- CARGO_TARGET="${CARGO_TARGET:-x86_64-unknown-linux-gnu}" 45 | "${ZRUN[@]}" ':: build / rust / release' --bin "${_binary}" --quiet 46 | exec -- \ 47 | /usr/bin/time -f '(( elapsed: %E (user: %U, kernel: %S), CPU: %P, memory: %M (faults: %F, swapped: %W), I/O: %I / %O (waits: %w) ))' -- \ 48 | "./.outputs/rust/${CARGO_TARGET}/release/${_binary}" "${@}" \ 49 | # 50 | 51 | !! 52 | 53 | 54 | 55 | 56 | << build / rust / debug 57 | export -- CARGO_INCREMENTAL=1 58 | exec -- "${ZRUN[@]}" ':: build / rust' "${@}" 59 | !! 60 | 61 | << build / rust / release 62 | export -- CARGO_INCREMENTAL=0 63 | export -- CARGO_EXTRA_RUSTFLAGS='-C link-args=-s -C target-feature=+crt-static -C relocation-model=pic' 64 | exec -- "${ZRUN[@]}" ':: build / rust' --release "${@}" 65 | !! 66 | 67 | --<< build / rust 68 | export -- CARGO_TARGET="${CARGO_TARGET:-x86_64-unknown-linux-gnu}" 69 | exec -- "${ZRUN[@]}" ':: workbench / cargo / run' \ 70 | build \ 71 | --target "${CARGO_TARGET}" \ 72 | "${@}" \ 73 | # 74 | !! 75 | 76 | 77 | 78 | 79 | << run / go / md5-diff 80 | "${ZRUN[@]}" ':: build / go / md5-diff' 81 | exec -- \ 82 | /usr/bin/time -f '(( elapsed: %E (user: %U, kernel: %S), CPU: %P, memory: %M (faults: %F, swapped: %W), I/O: %I / %O (waits: %w) ))' -- \ 83 | ./.outputs/go/md5-diff "${@}" \ 84 | # 85 | !! 86 | 87 | << build / go / md5-diff 88 | exec -- "${ZRUN[@]}" ':: build / go' md5-diff 89 | !! 90 | 91 | 92 | << run / go / md5-copy 93 | "${ZRUN[@]}" ':: build / go / md5-copy' 94 | exec -- \ 95 | /usr/bin/time -f '(( elapsed: %E (user: %U, kernel: %S), CPU: %P, memory: %M (faults: %F, swapped: %W), I/O: %I / %O (waits: %w) ))' -- \ 96 | ./.outputs/go/md5-copy "${@}" \ 97 | # 98 | !! 99 | 100 | << build / go / md5-copy 101 | exec -- "${ZRUN[@]}" ':: build / go' md5-copy 102 | !! 103 | 104 | 105 | << run / go / md5-link 106 | "${ZRUN[@]}" ':: build / go / md5-link' 107 | exec -- \ 108 | /usr/bin/time -f '(( elapsed: %E (user: %U, kernel: %S), CPU: %P, memory: %M (faults: %F, swapped: %W), I/O: %I / %O (waits: %w) ))' -- \ 109 | ./.outputs/go/md5-link "${@}" \ 110 | # 111 | !! 112 | 113 | << build / go / md5-link 114 | exec -- "${ZRUN[@]}" ':: build / go' md5-link 115 | !! 116 | 117 | 118 | --<< build / go 119 | test "${#}" -eq 1 ; _tool="${1}" ; shift -- 1 120 | mkdir -p -- ./.outputs/go 121 | test -e "./sources/bin/${_tool}.go" 122 | exec -- go build \ 123 | -ldflags 'all=-s -extld=gcc -extldflags=-static' \ 124 | -gcflags 'all=-l=4' \ 125 | -trimpath \ 126 | -o "./.outputs/go/${_tool}" \ 127 | "${@}" \ 128 | -- \ 129 | "./sources/bin/${_tool}.go" \ 130 | # 131 | !! 132 | 133 | 134 | 135 | 136 | << deploy / release 137 | test "${#}" -eq 0 138 | if test -e ./.outputs/release ; then 139 | rm -R ./.outputs/release 140 | fi 141 | mkdir -- ./.outputs/release 142 | export -- CARGO_TARGET="${CARGO_TARGET:-x86_64-unknown-linux-gnu}" 143 | "${ZRUN[@]}" ':: build / rust / release' --quiet 144 | "${ZRUN[@]}" ':: build / go / md5-copy' 145 | "${ZRUN[@]}" ':: build / go / md5-link' 146 | cp -T -- "./.outputs/rust/${CARGO_TARGET}/release/md5-create" ./.outputs/release/md5-create 147 | cp -T -- "./.outputs/rust/${CARGO_TARGET}/release/md5-diff" ./.outputs/release/md5-diff 148 | cp -T -- "./.outputs/rust/${CARGO_TARGET}/release/md5-cpio" ./.outputs/release/md5-cpio 149 | cp -T -- ./.outputs/go/md5-copy ./.outputs/release/md5-copy 150 | cp -T -- ./.outputs/go/md5-link ./.outputs/release/md5-link 151 | !! 152 | 153 | 154 | << deploy / publish 155 | test "${#}" -eq 0 156 | for _file in md5-create md5-diff md5-cpio md5-copy md5-link ; do 157 | rsync \ 158 | -c -I -i \ 159 | -p --chmod F0555 \ 160 | -- \ 161 | "./.outputs/release/${_file}" \ 162 | "./.publish/${_file}" \ 163 | # 164 | done 165 | !! 166 | 167 | 168 | << deploy / release + publish 169 | test "${#}" -eq 0 170 | "${ZRUN[@]}" ':: deploy / release' 171 | exec -- "${ZRUN[@]}" ':: deploy / publish' 172 | !! 173 | 174 | -------------------------------------------------------------------------------- /scripts/tests.z-run: -------------------------------------------------------------------------------- 1 | #!/dev/null 2 | 3 | 4 | --<< test / md5-diff 5 | test "${#}" -eq 2 ; _script="${1}" ; _dataset="${2}" ; shift -- 2 6 | _source_a="./.tests/datasets/${_dataset}--a" 7 | _source_b="./.tests/datasets/${_dataset}--b" 8 | test -f "${_source_a}" 9 | test -f "${_source_b}" 10 | exec -- "${ZRUN[@]}" "${_script}" "${_source_a}" "${_source_b}" "${@}" 11 | !! 12 | 13 | 14 | <<== test / md5-diff / generate 15 | if test ! -e ./.tests/datasets/ ; then 16 | exit -- 0 17 | fi 18 | printf -- '&&__ %s\n' '../.tests/datasets/' 19 | find ./.tests/datasets/ -xdev -xtype f -printf '%P\n' \ 20 | | sed \ 21 | -r \ 22 | -e 's#^(.+)--[a-z]$#\1#' \ 23 | -e '/^[a-z0-9_-]+$/ !d' \ 24 | | sort -u \ 25 | | while read -r -- _dataset ; do 26 | for _suffix in \ 27 | ' / debug' \ 28 | ' / release' \ 29 | '-go' \ 30 | ; do 31 | printf -- ':: test / md5-diff%s / %s :: exec -- "${ZRUN[@]}" %q %q %q "${@}"\n' "${_suffix}" "${_dataset}" ":: test / md5-diff" ":: run / md5-diff${_suffix}" "${_dataset}" 32 | done 33 | done 34 | # 35 | !! 36 | 37 | -------------------------------------------------------------------------------- /scripts/workbench.z-run: -------------------------------------------------------------------------------- 1 | #!/dev/null 2 | 3 | 4 | 5 | 6 | << workbench / scratch / initialize 7 | 8 | test "${#}" -eq 0 9 | 10 | if test ! -d ./.outputs ; then 11 | if test -h ./.outputs ; then 12 | _outputs_store="$( exec -- readlink -f -- ./.outputs )" 13 | else 14 | _outputs_store="${TMPDIR:-/tmp}/workspace--${UID}--${RANDOM}-${RANDOM}-${RANDOM}-${RANDOM}" 15 | fi 16 | mkdir -- "${_outputs_store}" 17 | if test ! -e ./.outputs ; then 18 | _outputs_store="$( exec -- readlink -e -- "${_outputs_store}" )" 19 | ln -s -f -T -- "${_outputs_store}" ./.outputs 20 | fi 21 | fi 22 | 23 | for _folder in rust go rustup cargo ; do 24 | if test ! -d "./.outputs/${_folder}" ; then 25 | mkdir -- "./.outputs/${_folder}" 26 | fi 27 | done 28 | !! 29 | 30 | 31 | 32 | 33 | << workbench / rustup / initialize 34 | test "${#}" -eq 0 35 | test ! -e ./.outputs/rustup/.initialized 36 | curl -s -o ./.outputs/rustup/rustup-init.tmp -- https://static.rust-lang.org/rustup/dist/x86_64-unknown-linux-gnu/rustup-init 37 | chmod +x -- ./.outputs/rustup/rustup-init.tmp 38 | mv -n -T -- ./.outputs/rustup/rustup-init.tmp ./.outputs/rustup/rustup-init 39 | export -- RUSTUP_HOME="$( exec -- readlink -e -- ./.outputs/rustup )" 40 | export -- CARGO_HOME="$( exec -- readlink -e -- ./.outputs/cargo )" 41 | nice -n 19 -- ./.outputs/rustup/rustup-init --profile minimal --no-modify-path --quiet -y 42 | touch -- ./.outputs/rustup/.initialized 43 | !! 44 | 45 | 46 | << workbench / rustup / install / targets / x86_64-unknown-linux-gnu 47 | test "${#}" -eq 0 48 | "${ZRUN[@]}" ':: workbench / rustup / run' target add -- x86_64-unknown-linux-gnu 49 | !! 50 | 51 | << workbench / rustup / install / targets / x86_64-unknown-linux-musl 52 | test "${#}" -eq 0 53 | "${ZRUN[@]}" ':: workbench / rustup / run' target add -- x86_64-unknown-linux-musl 54 | !! 55 | 56 | << workbench / rustup / install / targets / x86_64-apple-darwin 57 | test "${#}" -eq 0 58 | "${ZRUN[@]}" ':: workbench / rustup / run' target add -- x86_64-apple-darwin 59 | !! 60 | 61 | 62 | 63 | 64 | << workbench / cargo / run 65 | export -- CARGO_TARGET_DIR="$( exec -- readlink -e -- ./.outputs/rust )" 66 | _rustflags=( 67 | --remap-path-prefix "$( exec -- readlink -e -- . )=/home/build" 68 | --remap-path-prefix "$( exec -- readlink -e -- ./sources )=/home/build/sources" 69 | --remap-path-prefix "${HOME}/.cargo/registry/src=/home/build/cargo" 70 | --remap-path-prefix "$( exec -- readlink -m -- "${HOME}/.cargo/registry/src" )=/home/build/cargo" 71 | ) 72 | if test -e ./.outputs/rustup ; then 73 | export -- RUSTUP_HOME="$( exec -- readlink -e -- ./.outputs/rustup )" 74 | export -- CARGO_HOME="$( exec -- readlink -e -- ./.outputs/cargo )" 75 | export -- PATH="$( exec -- readlink -e -- "${RUSTUP_HOME}/toolchains/stable-x86_64-unknown-linux-gnu/bin" ):$( exec -- readlink -e -- "${CARGO_HOME}/bin" ):${PATH}" 76 | _rustflags+=( 77 | --remap-path-prefix "${CARGO_HOME}/registry/src=/home/build/cargo" 78 | --remap-path-prefix "$( exec -- readlink -m -- "${CARGO_HOME}/registry/src" )=/home/build/cargo" 79 | ) 80 | fi 81 | export -- RUSTFLAGS="${_rustflags[*]}" 82 | if test -n "${CARGO_CROSS_BIN:-}" ; then 83 | PATH="$( exec -- readlink -e -- "${CARGO_CROSS_BIN}" ):${PATH}" 84 | fi 85 | if test -n "${CARGO_CROSS_RUSTFLAGS:-}" ; then 86 | RUSTFLAGS="${CARGO_CROSS_RUSTFLAGS} ${RUSTFLAGS}" 87 | fi 88 | if test -n "${CARGO_EXTRA_RUSTFLAGS:-}" ; then 89 | RUSTFLAGS="${CARGO_EXTRA_RUSTFLAGS} ${RUSTFLAGS}" 90 | fi 91 | exec -- nice -n 19 -- cargo "${@}" 92 | !! 93 | 94 | << workbench / rustup / run 95 | export -- RUSTUP_HOME="$( exec -- readlink -e -- ./.outputs/rustup )" 96 | export -- CARGO_HOME="$( exec -- readlink -e -- ./.outputs/cargo )" 97 | export -- CARGO_TARGET_DIR="$( exec -- readlink -e -- ./.outputs/rust )" 98 | export -- PATH="$( exec -- readlink -e -- "${RUSTUP_HOME}/toolchains/stable-x86_64-unknown-linux-gnu/bin" ):$( exec -- readlink -e -- "${CARGO_HOME}/bin" ):${PATH}" 99 | exec -- nice -n 19 -- rustup "${@}" 100 | !! 101 | 102 | 103 | :: workbench / cargo / target / x86_64-unknown-linux-gnu :: export -- CARGO_TARGET=x86_64-unknown-linux-gnu 104 | :: workbench / cargo / target / x86_64-unknown-linux-musl :: export -- CARGO_TARGET=x86_64-unknown-linux-musl 105 | 106 | 107 | ##{{ 108 | NOTE: 109 | https://wapl.es/rust/2019/02/17/rust-cross-compile-linux-to-macos.html 110 | https://github.com/tpoechtrager/osxcross 111 | https://github.com/phracker/MacOSX-SDKs 112 | https://s3.dockerproject.org/darwin/v2/MacOSX10.10.sdk.tar.xz 113 | https://s3.dockerproject.org/darwin/v2/MacOSX10.11.sdk.tar.xz 114 | ##}} 115 | 116 | :: workbench / cargo / target / x86_64-apple-darwin :: export -- CARGO_TARGET=x86_64-apple-darwin CARGO_CROSS_BIN=./.outputs/osxcross/bin CARGO_CROSS_RUSTFLAGS='-C linker=x86_64-apple-darwin14-clang -C ar=x86_64-apple-darwin14-ar' 117 | 118 | -------------------------------------------------------------------------------- /sources/bin/md5-copy.go: -------------------------------------------------------------------------------- 1 | 2 | 3 | package main 4 | 5 | 6 | import "bufio" 7 | import "crypto/md5" 8 | import "encoding/hex" 9 | import "fmt" 10 | import "io" 11 | import "os" 12 | import "path" 13 | import "path/filepath" 14 | import "regexp" 15 | import "strconv" 16 | import "sync" 17 | import "syscall" 18 | 19 | 20 | 21 | 22 | func main () () { 23 | 24 | 25 | if len (os.Args) != 7 { 26 | panic ("[0071e111] invalid arguments, expected: ") 27 | } 28 | 29 | _hashesPath := os.Args[1] 30 | 31 | _sourcePath := os.Args[2] 32 | if _path, _error := filepath.EvalSymlinks (_sourcePath); _error == nil { 33 | _sourcePath = _path 34 | } else { 35 | panic (fmt.Sprintf ("[2c3601cc] invalid source path (eval-links failed) `%s`: %s", _sourcePath, _error)) 36 | } 37 | 38 | _targetPath := os.Args[3] 39 | if _path, _error := filepath.EvalSymlinks (_targetPath); _error == nil { 40 | _targetPath = _path 41 | } else { 42 | panic (fmt.Sprintf ("[43402d50] invalid target path (eval-links failed) `%s`: %s", _targetPath, _error)) 43 | } 44 | 45 | _targetSuffix := os.Args[4] 46 | 47 | _targetLevels := -1 48 | if _value, _error := strconv.ParseUint (os.Args[5], 10, 16); _error == nil { 49 | _targetLevels = int (_value) 50 | } else { 51 | panic (fmt.Sprintf ("[7f407004] invalid target levels (parse failed) `%s`: %s", os.Args[5], _error)) 52 | } 53 | if (_targetLevels < 0) || (_targetLevels > 2) { 54 | panic (fmt.Sprintf ("[ef8c8ebc] invalid target levels (must be between 0 and 2) `%s`", _targetLevels)) 55 | } 56 | 57 | _targetSync := false 58 | 59 | _parallelism := 16 60 | if _value, _error := strconv.ParseUint (os.Args[6], 10, 16); _error == nil { 61 | if _value != 0 { 62 | _parallelism = int (_value) 63 | } 64 | } else { 65 | panic (fmt.Sprintf ("[04d78872] invalid parallelism (parse failed) `%s`: %s", os.Args[6], _error)) 66 | } 67 | if (_parallelism < 1) || (_parallelism > 512) { 68 | panic (fmt.Sprintf ("[29f6c5c4] invalid parallelism (must be between 1 and 512) `%s`", _parallelism)) 69 | } 70 | 71 | 72 | if _stat, _error := os.Stat (_sourcePath); _error == nil { 73 | if ! _stat.Mode () .IsDir () { 74 | panic (fmt.Sprintf ("[0337dae9] invalid source folder (non folder) `%s`", _sourcePath)) 75 | } else { 76 | // NOP 77 | } 78 | } else if os.IsNotExist (_error) { 79 | panic (fmt.Sprintf ("[e8d2029c] invalid source folder (not found) `%s`", _sourcePath)) 80 | } else { 81 | panic (fmt.Sprintf ("[9fd05bc7] invalid source folder (unexpected error) `%s`: %s", _sourcePath, _error)) 82 | } 83 | 84 | if _stat, _error := os.Stat (_targetPath); _error == nil { 85 | if ! _stat.Mode () .IsDir () { 86 | panic (fmt.Sprintf ("[f6ea9a41] invalid target folder (non folder) `%s`", _targetPath)) 87 | } else { 88 | // NOP 89 | } 90 | } else if os.IsNotExist (_error) { 91 | panic (fmt.Sprintf ("[b9843cd6] invalid target folder (not found) `%s`", _targetPath)) 92 | } else { 93 | panic (fmt.Sprintf ("[5dacb884] invalid target folder (unexpected error) `%s`: %s", _targetPath, _error)) 94 | } 95 | 96 | 97 | var _hashesStream *bufio.Reader 98 | if _stream_0 , _error := os.Open (_hashesPath); _error == nil { 99 | _hashesStream = bufio.NewReaderSize (_stream_0, 16 * 1024 * 1024) 100 | } else if os.IsNotExist (_error) { 101 | panic (fmt.Sprintf ("[931f9e3f] invalid hashes file (not found) `%s`", _hashesPath)) 102 | } else { 103 | panic (fmt.Sprintf ("[3d79f70b] invalid hashes file (unexpected error) `%s`: %s", _hashesPath, _error)) 104 | } 105 | 106 | 107 | _workersQueue := make (chan [2]string, _parallelism * 1024) 108 | _workersDone := & sync.WaitGroup {} 109 | for _index := 0; _index < _parallelism; _index += 1 { 110 | _workersDone.Add (1) 111 | go func () () { 112 | // fmt.Fprintf (os.Stderr, "[dd] [23948741] worker started;\n") 113 | for _hash_and_path := range _workersQueue { 114 | _hash := _hash_and_path[0] 115 | _path := _hash_and_path[1] 116 | // fmt.Fprintf (os.Stderr, "[dd] [12ecbee9] worker copying...\n") 117 | copy (_hash, _path, _sourcePath, _targetPath, _targetSuffix, _targetLevels, _targetSync) 118 | // fmt.Fprintf (os.Stderr, "[dd] [c699ec13] worker dequeueing...\n") 119 | } 120 | _workersDone.Done () 121 | // fmt.Fprintf (os.Stderr, "[dd] [824ab6a0] worker finished;\n") 122 | } () 123 | } 124 | 125 | 126 | // fmt.Fprintf (os.Stderr, "[dd] [ae067149] feeder started;\n") 127 | 128 | for { 129 | 130 | 131 | // NOTE: Parse hash lines... 132 | 133 | var _line string 134 | if _line_0, _error := _hashesStream.ReadString (0); _error == nil { 135 | _line = _line_0 136 | if _line[len (_line) - 1] == 0 { 137 | _line = _line[: len (_line) - 1] 138 | } 139 | } else if _error == io.EOF { 140 | if _line == "" { 141 | break 142 | } else { 143 | panic (fmt.Sprintf ("[9e33c96b] invalid line `%s`", _line)) 144 | } 145 | } else { 146 | panic (fmt.Sprintf ("[14519a7a] unexpected error: %s", _error)) 147 | } 148 | 149 | var _hash string 150 | var _path string 151 | if _slices := md5RecordLine.FindStringSubmatch (_line); _slices != nil { 152 | _hash = _slices[1] 153 | _path = _slices[2] 154 | } else { 155 | panic (fmt.Sprintf ("[4ac97db6] invalid line `%s`", _line)) 156 | } 157 | 158 | 159 | // NOTE: Sanity check paths... 160 | 161 | if _path[0:2] == "./" { 162 | _path = _path[2:] 163 | } else if _path[0:1] == "/" { 164 | _path = _path[1:] 165 | } 166 | if (_path != path.Clean (_path)) || (_path[0:1] == "/") { 167 | panic (fmt.Sprintf ("[a28f4f30] invalid path `%s`", _path)) 168 | } 169 | 170 | 171 | // NOTE: Skip empty files... 172 | 173 | if _hash == md5EmptyHash { 174 | continue 175 | } 176 | 177 | // NOTE: Skip invalid files... 178 | 179 | if _hash == md5InvalidHash { 180 | continue 181 | } 182 | 183 | // fmt.Fprintf (os.Stderr, "[dd] [0f59b583] feeder enqueueing...\n") 184 | _workersQueue <- [2]string { _hash, _path } 185 | } 186 | 187 | // fmt.Fprintf (os.Stderr, "[dd] [fb4ae4f3] feeder finished;\n") 188 | 189 | close (_workersQueue) 190 | _workersDone.Wait () 191 | } 192 | 193 | 194 | 195 | 196 | func copy (_hash string, _path string, _sourcePath string, _targetPath string, _targetSuffix string, _targetLevels int, _targetSync bool) () { 197 | 198 | // NOTE: Compute source and target paths... 199 | 200 | _sourceFile := path.Join (_sourcePath, _path) 201 | 202 | var _targetFolders []string 203 | var _targetFolder_X string 204 | if _targetLevels == 0 { 205 | _targetFolder_X = _targetPath 206 | } else if _targetLevels == 1 { 207 | _targetFolder_1 := path.Join (_targetPath, _hash[0:2]) 208 | _targetFolders = append (_targetFolders, _targetFolder_1) 209 | _targetFolder_X = _targetFolder_1 210 | } else if _targetLevels == 2 { 211 | _targetFolder_1 := path.Join (_targetPath, _hash[0:2]) 212 | _targetFolder_2 := path.Join (_targetFolder_1, _hash[0:4]) 213 | _targetFolders = append (_targetFolders, _targetFolder_1, _targetFolder_2) 214 | _targetFolder_X = _targetFolder_2 215 | } else { 216 | panic ("[e48df570]") 217 | } 218 | 219 | _targetFile := path.Join (_targetFolder_X, _hash) 220 | if _targetSuffix != "" { 221 | _targetFile += _targetSuffix 222 | } 223 | _targetFileTmp := path.Join (_targetPath, fmt.Sprintf (".tmp.%08x.%s", os.Getpid (), _hash)) 224 | 225 | 226 | // NOTE: Check if target file exists... 227 | 228 | if _stat, _error := os.Lstat (_targetFile); _error == nil { 229 | if ! _stat.Mode () .IsRegular () { 230 | panic (fmt.Sprintf ("[4a0ef62d] invalid target file (non file) `%s`", _targetFile)) 231 | } else { 232 | // fmt.Fprintf (os.Stderr, "[dd] [85a8bd5a] existing target file `%s`; skipping!\n", _targetFile) 233 | return 234 | } 235 | } else if os.IsNotExist (_error) { 236 | // NOP 237 | } else if _error, _ok := _error.(*os.PathError); _ok && _error.Err == syscall.ENOTDIR { 238 | panic (fmt.Sprintf ("[26c24a68] invalid target file (parent non folder) `%s`", _targetFile)) 239 | } else { 240 | panic (fmt.Sprintf ("[87e53618] unexpected error: %s", _error)) 241 | } 242 | 243 | 244 | fmt.Fprintf (os.Stderr, "[dd] [922b3386] cloning `%s` -> `%s`...\n", _hash, _sourceFile) 245 | 246 | 247 | // NOTE: Check if source file exists and open... 248 | 249 | if _stat, _error := os.Lstat (_sourceFile); _error == nil { 250 | if ! _stat.Mode () .IsRegular () { 251 | fmt.Fprintf (os.Stderr, "[ee] [6ffb7ba4] invalid source file (non file) `%s`; ignoring!\n", _sourceFile) 252 | return 253 | } else { 254 | // NOP 255 | } 256 | } else if os.IsNotExist (_error) { 257 | fmt.Fprintf (os.Stderr, "[ee] [6cf84aa8] invalid source file (not found) `%s`; ignoring!\n", _sourceFile) 258 | return 259 | } else if _error, _ok := _error.(*os.PathError); _ok && _error.Err == syscall.ENOTDIR { 260 | fmt.Fprintf (os.Stderr, "[ee] [9c5ed744] invalid source file (parent non folder) `%s`; ignoring!\n", _sourceFile) 261 | return 262 | } else { 263 | panic (fmt.Sprintf ("[88e79792] unexpected error: %s", _error)) 264 | } 265 | 266 | var _sourceStream *os.File 267 | if _stream_0, _error := os.Open (_sourceFile); _error == nil { 268 | _sourceStream = _stream_0 269 | } else { 270 | panic (fmt.Sprintf ("[81408611] unexpected error: %s", _error)) 271 | } 272 | 273 | var _sourceStat os.FileInfo 274 | if _stat_0, _error := _sourceStream.Stat (); _error == nil { 275 | _sourceStat = _stat_0 276 | } else { 277 | panic (fmt.Sprintf ("[5d4649c4] unexpected error: %s", _error)) 278 | } 279 | 280 | 281 | // NOTE: Check if target folders exist or create... 282 | 283 | for _, _targetFolder := range _targetFolders { 284 | if _stat, _error := os.Lstat (_targetFolder); _error == nil { 285 | if ! _stat.IsDir () { 286 | panic (fmt.Sprintf ("[3aa03105] invalid target folder `%s`", _targetFolder)) 287 | } 288 | } else if os.IsNotExist (_error) { 289 | // fmt.Fprintf (os.Stderr, "[dd] [d26e2ffd] creating target folder `%s`...\n", _targetFolder) 290 | if _error := os.Mkdir (_targetFolder, 0700); (_error != nil) && ! os.IsExist (_error) { 291 | panic (fmt.Sprintf ("[7946185f] unexpected error: %s", _error)) 292 | } 293 | } else { 294 | panic (fmt.Sprintf ("[33e41e43] unexpected error: %s", _error)) 295 | } 296 | } 297 | 298 | 299 | // NOTE: Create and write temporary target file... 300 | // See also: https://github.com/golang/go/issues/22397 301 | 302 | var _targetStreamTmp_1 *os.File 303 | if _stream_0, _error := os.OpenFile (_targetFileTmp, os.O_WRONLY | os.O_CREATE | os.O_EXCL, 0600); _error == nil { 304 | _targetStreamTmp_1 = _stream_0 305 | } else if os.IsExist (_error) { 306 | _sourceStream.Close () 307 | return 308 | } else { 309 | panic (fmt.Sprintf ("[cd5941c6] unexpected error: %s", _error)) 310 | } 311 | 312 | { 313 | var _error error 314 | _error = Fadvise (_sourceStream.Fd (), 0, 0, FADV_SEQUENTIAL) 315 | if _error != nil { panic (fmt.Sprintf ("[0dce2e31] unexpected error: %s", _error)) } 316 | _error = Fadvise (_sourceStream.Fd (), 0, 0, FADV_NOREUSE) 317 | if _error != nil { panic (fmt.Sprintf ("[96737a83] unexpected error: %s", _error)) } 318 | _error = Fadvise (_sourceStream.Fd (), 0, 0, FADV_WILLNEED) 319 | if _error != nil { panic (fmt.Sprintf ("[b749c725] unexpected error: %s", _error)) } 320 | } 321 | 322 | var _dataSize int64 323 | if _size_0, _error := io.Copy (_targetStreamTmp_1, _sourceStream); _error == nil { 324 | _dataSize = _size_0 325 | } else { 326 | panic (fmt.Sprintf ("[4ea17054] unexpected error: %s", _error)) 327 | } 328 | 329 | { 330 | var _error error 331 | _error = Fadvise (_sourceStream.Fd (), 0, 0, FADV_DONTNEED) 332 | if _error != nil { panic (fmt.Sprintf ("[210d6e1f] unexpected error: %s", _error)) } 333 | } 334 | 335 | if _error := _targetStreamTmp_1.Chmod (0400); _error != nil { 336 | panic (fmt.Sprintf ("[b3be47d6] unexpected error: %s", _error)) 337 | } 338 | if _targetSync { 339 | if _error := _targetStreamTmp_1.Sync (); _error != nil { 340 | panic (fmt.Sprintf ("[8bd8f281] unexpected error: %s", _error)) 341 | } 342 | } 343 | 344 | 345 | // NOTE: Re-open temporary target file... 346 | 347 | var _targetStreamTmp_2 *os.File 348 | if _stream_0, _error := os.Open (_targetFileTmp); _error == nil { 349 | _targetStreamTmp_2 = _stream_0 350 | } else { 351 | panic (fmt.Sprintf ("[05b96651] unexpected error: %s", _error)) 352 | } 353 | 354 | 355 | // NOTE: Stat and sanity check both temporary target files... 356 | 357 | var _targetStatTmp_1 os.FileInfo 358 | if _stat, _error := _targetStreamTmp_1.Stat (); _error == nil { 359 | _targetStatTmp_1 = _stat 360 | } else { 361 | panic (fmt.Sprintf ("[5a22c74c] unexpected error: %s", _error)) 362 | } 363 | 364 | var _targetStatTmp_2 os.FileInfo 365 | if _stat, _error := _targetStreamTmp_2.Stat (); _error == nil { 366 | _targetStatTmp_2 = _stat 367 | } else { 368 | panic (fmt.Sprintf ("[2a9b35cc] unexpected error: %s", _error)) 369 | } 370 | 371 | if ! os.SameFile (_targetStatTmp_1, _targetStatTmp_2) { 372 | panic (fmt.Sprintf ("[6a8783b9] invalid target file (invalid inode) `%s`", _targetFileTmp)) 373 | } 374 | if _dataSize != _sourceStat.Size () { 375 | panic (fmt.Sprintf ("[ff0c6916] invalid target file (invalid size) `%s`", _targetFileTmp)) 376 | } 377 | if _dataSize != _targetStatTmp_1.Size () { 378 | panic (fmt.Sprintf ("[26176a7e] invalid target file (invalid size) `%s`", _targetFileTmp)) 379 | } 380 | if _dataSize != _targetStatTmp_2.Size () { 381 | panic (fmt.Sprintf ("[8df8e12d] invalid target file (invalid size) `%s`", _targetFileTmp)) 382 | } 383 | 384 | 385 | // NOTE: Hash temporary target file... 386 | 387 | _hasher := md5.New () 388 | if _size_0, _error := io.Copy (_hasher, _targetStreamTmp_2); _error == nil { 389 | if _size_0 != _dataSize { 390 | panic (fmt.Sprintf ("[fe9fa8a7] invalid target file (invalid size) `%s`", _targetFileTmp)) 391 | } 392 | } else { 393 | panic (fmt.Sprintf ("[10892bcb] unexpected error: %s", _error)) 394 | } 395 | _hashTmp := hex.EncodeToString (_hasher.Sum (nil) [:]) 396 | if _hashTmp != _hash { 397 | panic (fmt.Sprintf ("[fe9fa8a7] invalid target file (invalid hash) `%s`", _targetFileTmp)) 398 | } 399 | 400 | 401 | // NOTE: Rename temporary target file to actual target file... 402 | 403 | if _error := os.Rename (_targetFileTmp, _targetFile); _error != nil { 404 | panic (fmt.Sprintf ("[90d364ab] unexpected error: %s", _error)) 405 | } 406 | 407 | 408 | // NOTE: Stat and sanity check actual target file... 409 | 410 | var _targetStat os.FileInfo 411 | if _stat_0, _error := os.Lstat (_targetFile); _error == nil { 412 | _targetStat = _stat_0 413 | } else { 414 | panic (fmt.Sprintf ("[aa2d7afe] unexpected error: %s", _error)) 415 | } 416 | 417 | if ! os.SameFile (_targetStatTmp_1, _targetStat) { 418 | panic (fmt.Sprintf ("[e7dfab4d] invalid target file (invalid inode) `%s`", _targetFile)) 419 | } 420 | 421 | 422 | // NOTE: Close source and target files... 423 | 424 | { 425 | var _error error 426 | _error = Fadvise (_targetStreamTmp_1.Fd (), 0, 0, FADV_DONTNEED) 427 | if _error != nil { panic (fmt.Sprintf ("[11fac409] unexpected error: %s", _error)) } 428 | } 429 | 430 | if _error := _targetStreamTmp_1.Close (); _error != nil { 431 | panic (fmt.Sprintf ("[293f4d9a] unexpected error: %s", _error)) 432 | } 433 | if _error := _targetStreamTmp_2.Close (); _error != nil { 434 | panic (fmt.Sprintf ("[a7f2341e] unexpected error: %s", _error)) 435 | } 436 | if _error := _sourceStream.Close (); _error != nil { 437 | panic (fmt.Sprintf ("[724b639a] unexpected error: %s", _error)) 438 | } 439 | 440 | 441 | // NOTE: Sync folders... 442 | 443 | if _targetSync { 444 | _folderPath := _targetFile 445 | for { 446 | if _parent := path.Dir (_folderPath); _parent != _folderPath { 447 | _folderPath = _parent 448 | } else { 449 | break 450 | } 451 | if _folderStream, _error := os.OpenFile (_folderPath, os.O_RDONLY | syscall.O_DIRECTORY, 0); _error == nil { 452 | if _error := _folderStream.Sync (); _error != nil { 453 | panic (fmt.Sprintf ("[2e17ce7d] unexpected error: %s", _error)) 454 | } 455 | if _error := _folderStream.Close (); _error != nil { 456 | panic (fmt.Sprintf ("[09934cf8] unexpected error: %s", _error)) 457 | } 458 | } else { 459 | panic (fmt.Sprintf ("[e55f9fa4] unexpected error: %s", _error)) 460 | } 461 | if _folderPath == _targetPath { 462 | break 463 | } 464 | } 465 | } 466 | } 467 | 468 | 469 | var md5RecordLine *regexp.Regexp = regexp.MustCompile (`^([0-9a-f]{32}) \*(.+)$`) 470 | var md5EmptyHash string = "d41d8cd98f00b204e9800998ecf8427e" 471 | var md5InvalidHash string = "00000000000000000000000000000000" 472 | 473 | 474 | 475 | 476 | // NOTE: https://github.com/golang/sys/blob/master/unix/zsyscall_linux_amd64.go#L1800 477 | func Fadvise(fd uintptr, offset int64, length int64, advice int) (error) { 478 | _, _, e := syscall.Syscall6(syscall.SYS_FADVISE64, uintptr(fd), uintptr(offset), uintptr(length), uintptr(advice), 0, 0) 479 | if e == 0 { 480 | return nil 481 | } else { 482 | return e 483 | } 484 | } 485 | 486 | // NOTE: https://github.com/golang/sys/blob/master/unix/ztypes_linux_amd64.go#L188 487 | const ( 488 | FADV_NORMAL = 0x0 489 | FADV_RANDOM = 0x1 490 | FADV_SEQUENTIAL = 0x2 491 | FADV_WILLNEED = 0x3 492 | FADV_DONTNEED = 0x4 493 | FADV_NOREUSE = 0x5 494 | ) 495 | 496 | -------------------------------------------------------------------------------- /sources/bin/md5-cpio.rs: -------------------------------------------------------------------------------- 1 | 2 | fn main () -> ! { 3 | ::md5_tools::main_cpio::main_0 (); 4 | } 5 | -------------------------------------------------------------------------------- /sources/bin/md5-create.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e -E -u -o pipefail -o noclobber -o noglob +o braceexpand || exit -- 1 4 | trap 'printf -- "[ee] failed: %s\n" "${BASH_COMMAND}" >&2' ERR || exit -- 1 5 | 6 | test "${#}" == 1 7 | 8 | _source="${1}" 9 | 10 | _source="$( exec -- readlink -e -- "${_source}" )" 11 | _timestamp="$( exec -- date '+%Y-%m-%d-%H-%M-%S' )" 12 | 13 | if test -e "${_source}/.md5" ; then 14 | _target="${_source}/.md5/${_timestamp}.md5" 15 | _target_skip="./.md5/${_timestamp}.md5" 16 | else 17 | _target="${_source}/.--${_timestamp}.md5" 18 | _target_skip="./.--${_timestamp}.md5" 19 | fi 20 | 21 | test -d "${_source}" 22 | test ! -e "${_target}" 23 | 24 | cd -- "${_source}" 25 | 26 | test ! -e "${_target}" 27 | test ! -e "${_target}.tmp" 28 | touch -- "${_target}.tmp" 29 | test -f "${_target}.tmp" 30 | 31 | find \ 32 | . \ 33 | -xdev \ 34 | \( -type d -exec test -e {}/.md5.excluded \; -prune \) -o \ 35 | \( \ 36 | -type f \ 37 | -not -path "${_target_skip}.tmp" \ 38 | -print0 \ 39 | \) \ 40 | | LC_ALL=C sort -z \ 41 | | xargs -r -0 -n 64 -- md5sum -b -- \ 42 | | tee -- "${_target}.tmp" 43 | 44 | chmod 400 -- "${_target}.tmp" 45 | mv -n -T -- "${_target}.tmp" "${_target}" 46 | 47 | exit -- 0 48 | 49 | -------------------------------------------------------------------------------- /sources/bin/md5-create.rs: -------------------------------------------------------------------------------- 1 | 2 | fn main () -> ! { 3 | ::md5_tools::main_create::main_0 (); 4 | } 5 | -------------------------------------------------------------------------------- /sources/bin/md5-diff.go: -------------------------------------------------------------------------------- 1 | 2 | 3 | package main 4 | 5 | 6 | import "bufio" 7 | import "fmt" 8 | import "os" 9 | import "regexp" 10 | import "sort" 11 | 12 | 13 | type records struct { 14 | name string 15 | hashIndex map[string][]string 16 | pathIndex map[string]string 17 | hashes []string 18 | paths []string 19 | statistics recordStatistics 20 | } 21 | 22 | type recordStatistics struct { 23 | uniqueHashes uint 24 | duplicateHashes uint 25 | paths uint 26 | } 27 | 28 | type diff struct { 29 | recordsA *records 30 | recordsB *records 31 | entries []diffEntry 32 | statistics diffStatistics 33 | } 34 | 35 | type diffEntry struct { 36 | source *records 37 | path string 38 | hash string 39 | status uint 40 | duplicate bool 41 | pathsInOther []string 42 | } 43 | 44 | type diffStatistics struct { 45 | uniqueHashesInA uint 46 | uniqueHashesInB uint 47 | sameHashes uint 48 | uniquePathsInA uint 49 | uniquePathsInB uint 50 | renamedPathsInA uint 51 | renamedPathsInB uint 52 | samePaths uint 53 | matchingPaths uint 54 | conflictingPaths uint 55 | } 56 | 57 | const ( 58 | undefined = iota 59 | unique = iota 60 | matching = iota 61 | conflicting = iota 62 | renamed = iota 63 | ) 64 | 65 | 66 | func main () () { 67 | 68 | if (len (os.Args) != 3) { 69 | abort ("invalid arguments (expected exactly 2)", nil) 70 | } 71 | 72 | var _recordsA *records 73 | { 74 | if _entries, _error := parseFileAtPath (os.Args[1]); _error != nil { 75 | abort ("failed while parsing the first record file", _error) 76 | } else if _records, _error := processRecords (os.Args[1], _entries); _error != nil { 77 | abort ("failed while processing the first record set", _error) 78 | } else { 79 | _recordsA = _records 80 | } 81 | } 82 | var _recordsB *records 83 | { 84 | if _entries, _error := parseFileAtPath (os.Args[2]); _error != nil { 85 | abort ("failed while parsing the second record file", _error) 86 | } else if _records, _error := processRecords (os.Args[2], _entries); _error != nil { 87 | abort ("failed while processing the second record set", _error) 88 | } else { 89 | _recordsB = _records 90 | } 91 | } 92 | 93 | var _diff *diff 94 | if _diff_, _error := processDiff (_recordsA, _recordsB); _error != nil { 95 | abort ("failed while diff-ing the record set", _error) 96 | } else { 97 | _diff = _diff_ 98 | } 99 | 100 | if true { 101 | printRecordStatistics ('A', _recordsA) 102 | printRecordStatistics ('B', _recordsB) 103 | printDiffStatistics (_diff) 104 | } 105 | if true { 106 | printDiffEntries (_diff) 107 | } 108 | } 109 | 110 | 111 | func printRecordStatistics (_tag rune, _records *records) () { 112 | fmt.Fprintf (os.Stdout, "\n") 113 | fmt.Fprintf (os.Stdout, "## Dataset (%c) statistics\n", _tag) 114 | fmt.Fprintf (os.Stdout, "## * paths : %8d\n", _records.statistics.paths) 115 | fmt.Fprintf (os.Stdout, "## * distinct hashes : %8d\n", _records.statistics.uniqueHashes) 116 | fmt.Fprintf (os.Stdout, "## * unique hashes : %8d\n", _records.statistics.uniqueHashes - _records.statistics.duplicateHashes) 117 | fmt.Fprintf (os.Stdout, "## * duplicate hashes : %8d\n", _records.statistics.duplicateHashes) 118 | fmt.Fprintf (os.Stdout, "## * source: `%s`\n", _records.name) 119 | } 120 | 121 | 122 | func printDiffStatistics (_diff *diff) () { 123 | fmt.Fprintf (os.Stdout, "\n") 124 | fmt.Fprintf (os.Stdout, "## Diff statistics (A) vs (B)\n") 125 | // fmt.Fprintf (os.Stdout, "## * (A) -> `%s`\n", _diff.recordsA.name) 126 | // fmt.Fprintf (os.Stdout, "## * (B) -> `%s`\n", _diff.recordsB.name) 127 | fmt.Fprintf (os.Stdout, "## (hashes)\n") 128 | fmt.Fprintf (os.Stdout, "## * unique hashes in (A) : %8d\n", _diff.statistics.uniqueHashesInA) 129 | fmt.Fprintf (os.Stdout, "## * unique hashes in (B) : %8d\n", _diff.statistics.uniqueHashesInB) 130 | fmt.Fprintf (os.Stdout, "## * same hashes in both : %8d\n", _diff.statistics.sameHashes) 131 | fmt.Fprintf (os.Stdout, "## (paths based on hashes)\n") 132 | fmt.Fprintf (os.Stdout, "## * unique paths in (A) : %8d\n", _diff.statistics.uniquePathsInA) 133 | fmt.Fprintf (os.Stdout, "## * unique paths in (B) : %8d\n", _diff.statistics.uniquePathsInB) 134 | fmt.Fprintf (os.Stdout, "## * renamed paths in (A) : %8d\n", _diff.statistics.renamedPathsInA) 135 | fmt.Fprintf (os.Stdout, "## * renamed paths in (B) : %8d\n", _diff.statistics.renamedPathsInB) 136 | fmt.Fprintf (os.Stdout, "## * same paths in both : %8d\n", _diff.statistics.samePaths) 137 | fmt.Fprintf (os.Stdout, "## * matching paths : %8d\n", _diff.statistics.matchingPaths) 138 | fmt.Fprintf (os.Stdout, "## * conflicting paths : %8d\n", _diff.statistics.conflictingPaths) 139 | } 140 | 141 | 142 | func printDiffEntries (_diff *diff) () { 143 | 144 | if true { 145 | fmt.Fprintf (os.Stdout, "\n") 146 | fmt.Fprintf (os.Stdout, "#### All diff entries unique for (A)\n") 147 | fmt.Fprintf (os.Stdout, "## * (A) -> `%s`\n", _diff.recordsA.name) 148 | fmt.Fprintf (os.Stdout, "## * unique hashes in (A) : %8d\n", _diff.statistics.uniqueHashesInA) 149 | fmt.Fprintf (os.Stdout, "\n") 150 | for _, _entry := range _diff.entries { 151 | if (_entry.status != unique) || (_entry.source != _diff.recordsA) { 152 | continue 153 | } 154 | printDiffEntry (_diff, &_entry, false, false, false) 155 | } 156 | fmt.Fprintf (os.Stdout, "\n") 157 | } 158 | 159 | if true { 160 | fmt.Fprintf (os.Stdout, "\n") 161 | fmt.Fprintf (os.Stdout, "#### All diff entries unique for (B)\n") 162 | fmt.Fprintf (os.Stdout, "## * (B) -> `%s`\n", _diff.recordsB.name) 163 | fmt.Fprintf (os.Stdout, "## * unique hashes in (B) : %8d\n", _diff.statistics.uniqueHashesInB) 164 | fmt.Fprintf (os.Stdout, "\n") 165 | for _, _entry := range _diff.entries { 166 | if (_entry.status != unique) || (_entry.source != _diff.recordsB) { 167 | continue 168 | } 169 | printDiffEntry (_diff, &_entry, false, false, false) 170 | } 171 | fmt.Fprintf (os.Stdout, "\n") 172 | } 173 | 174 | if true { 175 | fmt.Fprintf (os.Stdout, "\n") 176 | fmt.Fprintf (os.Stdout, "#### All diff entries conflicting\n") 177 | fmt.Fprintf (os.Stdout, "## * (A) -> `%s`\n", _diff.recordsA.name) 178 | fmt.Fprintf (os.Stdout, "## * (B) -> `%s`\n", _diff.recordsB.name) 179 | fmt.Fprintf (os.Stdout, "## * conflicting paths : %8d\n", _diff.statistics.conflictingPaths) 180 | fmt.Fprintf (os.Stdout, "\n") 181 | for _, _entry := range _diff.entries { 182 | if _entry.status != conflicting { 183 | continue 184 | } 185 | printDiffEntry (_diff, &_entry, false, false, false) 186 | } 187 | fmt.Fprintf (os.Stdout, "\n") 188 | } 189 | 190 | if true { 191 | fmt.Fprintf (os.Stdout, "\n") 192 | fmt.Fprintf (os.Stdout, "#### All diff entries renamed\n") 193 | fmt.Fprintf (os.Stdout, "## * (A) -> `%s`\n", _diff.recordsA.name) 194 | fmt.Fprintf (os.Stdout, "## * (B) -> `%s`\n", _diff.recordsB.name) 195 | fmt.Fprintf (os.Stdout, "## * renamed paths in (A) : %8d\n", _diff.statistics.renamedPathsInA) 196 | fmt.Fprintf (os.Stdout, "## * renamed paths in (B) : %8d\n", _diff.statistics.renamedPathsInB) 197 | fmt.Fprintf (os.Stdout, "\n") 198 | for _, _entry := range _diff.entries { 199 | if _entry.status != renamed { 200 | continue 201 | } 202 | printDiffEntry (_diff, &_entry, false, false, false) 203 | } 204 | fmt.Fprintf (os.Stdout, "\n") 205 | } 206 | 207 | if false { 208 | fmt.Fprintf (os.Stdout, "\n") 209 | fmt.Fprintf (os.Stdout, "#### All diff entries\n") 210 | fmt.Fprintf (os.Stdout, "## * (A) -> `%s`\n", _diff.recordsA.name) 211 | fmt.Fprintf (os.Stdout, "## * (B) -> `%s`\n", _diff.recordsB.name) 212 | fmt.Fprintf (os.Stdout, "\n") 213 | for _, _entry := range _diff.entries { 214 | printDiffEntry (_diff, &_entry, true, true, true) 215 | } 216 | fmt.Fprintf (os.Stdout, "\n") 217 | } 218 | } 219 | 220 | 221 | func printDiffEntry (_diff *diff, _entry *diffEntry, _detailed bool, _alternatives bool, _duplicates bool) () { 222 | _sourceLabel := resolveDiffEntrySourceLabel (_diff, _entry) 223 | _flags := resolveDiffEntryFlags (_diff, _entry) 224 | switch _entry.status { 225 | case unique : 226 | if _detailed { 227 | fmt.Fprintf (os.Stdout, "++ %s %s %s %s\n", _sourceLabel, _entry.hash, _flags, _entry.path) 228 | } else { 229 | fmt.Fprintf (os.Stdout, "+%s %s %s\n", _sourceLabel, _entry.hash, _entry.path) 230 | } 231 | case matching : 232 | if _detailed { 233 | fmt.Fprintf (os.Stdout, "== %s %s %s %s\n", _sourceLabel, _entry.hash, _flags, _entry.path) 234 | } else { 235 | fmt.Fprintf (os.Stdout, "=%s %s %s\n", _sourceLabel, _entry.hash, _entry.path) 236 | } 237 | case conflicting : 238 | if _detailed { 239 | fmt.Fprintf (os.Stdout, "!! %s %s %s %s\n", _sourceLabel, _entry.hash, _flags, _entry.path) 240 | } else { 241 | fmt.Fprintf (os.Stdout, "!%s %s %s\n", _sourceLabel, _entry.hash, _entry.path) 242 | } 243 | case renamed : 244 | if _detailed { 245 | fmt.Fprintf (os.Stdout, "~~ %s %s %s %s\n", _sourceLabel, _entry.hash, _flags, _entry.path) 246 | if _alternatives { 247 | for _, _alternative := range _entry.pathsInOther { 248 | fmt.Fprintf (os.Stdout, "## ~> %s\n", _alternative) 249 | } 250 | } 251 | } else { 252 | fmt.Fprintf (os.Stdout, "~%s %s %s\n", _sourceLabel, _entry.hash, _entry.path) 253 | } 254 | default : 255 | panic ("assertion") 256 | } 257 | if _detailed && _duplicates && _entry.duplicate { 258 | for _, _alternative := range _entry.source.hashIndex[_entry.hash] { 259 | if _alternative == _entry.path { 260 | continue 261 | } 262 | fmt.Fprintf (os.Stdout, "## D> %s\n", _alternative) 263 | } 264 | } 265 | } 266 | 267 | 268 | func resolveDiffEntrySourceLabel (_diff *diff, _entry *diffEntry) (string) { 269 | if _entry.source == _diff.recordsA { 270 | return "A" 271 | } else if _entry.source == _diff.recordsB { 272 | return "B" 273 | } else { 274 | panic ("assertion") 275 | } 276 | } 277 | 278 | func resolveDiffEntryFlags (_diff *diff, _entry *diffEntry) (string) { 279 | if _entry.duplicate { 280 | return "D" 281 | } else { 282 | return " " 283 | } 284 | } 285 | 286 | 287 | func processDiff (_recordsA *records, _recordsB *records) (*diff, error) { 288 | 289 | _hashes := append (_recordsA.hashes, _recordsB.hashes ...) 290 | _paths := append (_recordsA.paths, _recordsB.paths ...) 291 | _entries := make ([]diffEntry, 0, len (_paths)) 292 | var _statistics diffStatistics 293 | sort.Strings (_hashes) 294 | sort.Strings (_paths) 295 | 296 | for _index, _path := range _paths { 297 | if (_index > 0) && (_paths[_index - 1] == _path) { 298 | continue 299 | } 300 | 301 | _hashInA, _existsInA := _recordsA.pathIndex[_path] 302 | _hashInB, _existsInB := _recordsB.pathIndex[_path] 303 | 304 | _entryForA := diffEntry { 305 | source : _recordsA, 306 | path : _path, 307 | hash : _hashInA, 308 | status : undefined, 309 | } 310 | _entryForB := diffEntry { 311 | source : _recordsB, 312 | path : _path, 313 | hash : _hashInB, 314 | status : undefined, 315 | } 316 | 317 | if _existsInA && _existsInB { 318 | if _hashInA == _hashInB { 319 | _entryForA.status = matching 320 | _entryForB.status = matching 321 | _statistics.matchingPaths += 1 322 | } else { 323 | _entryForA.status = conflicting 324 | _entryForB.status = conflicting 325 | _statistics.conflictingPaths += 1 326 | } 327 | _statistics.samePaths += 1 328 | } else if _existsInA { 329 | if _pathsInB, _hashExistsInB := _recordsB.hashIndex[_hashInA]; _hashExistsInB { 330 | _entryForA.status = renamed 331 | _entryForA.pathsInOther = _pathsInB 332 | _statistics.renamedPathsInA += 1 333 | } else { 334 | _entryForA.status = unique 335 | _statistics.uniquePathsInA += 1 336 | } 337 | } else if _existsInB { 338 | if _pathsInA, _hashExistsInA := _recordsA.hashIndex[_hashInB]; _hashExistsInA { 339 | _entryForB.status = renamed 340 | _entryForB.pathsInOther = _pathsInA 341 | _statistics.renamedPathsInB += 1 342 | } else { 343 | _entryForB.status = unique 344 | _statistics.uniquePathsInB += 1 345 | } 346 | } else { 347 | panic ("assertion") 348 | } 349 | 350 | if _existsInA && (len (_recordsA.hashIndex[_hashInA]) > 1) { 351 | _entryForA.duplicate = true 352 | } 353 | if _existsInB && (len (_recordsB.hashIndex[_hashInB]) > 1) { 354 | _entryForB.duplicate = true 355 | } 356 | 357 | if _entryForA.status != undefined { 358 | _entries = append (_entries, _entryForA) 359 | } 360 | if _entryForB.status != undefined { 361 | _entries = append (_entries, _entryForB) 362 | } 363 | } 364 | 365 | for _index, _hash := range _hashes { 366 | if (_index > 0) && (_hashes[_index - 1] == _hash) { 367 | continue 368 | } 369 | _, _existsInA := _recordsA.hashIndex[_hash] 370 | _, _existsInB := _recordsB.hashIndex[_hash] 371 | if _existsInA && _existsInB { 372 | _statistics.sameHashes += 1 373 | } else if _existsInA { 374 | _statistics.uniqueHashesInA += 1 375 | } else if _existsInB { 376 | _statistics.uniqueHashesInB += 1 377 | } else { 378 | panic ("assertion") 379 | } 380 | } 381 | 382 | _diff := & diff { 383 | recordsA : _recordsA, 384 | recordsB : _recordsB, 385 | entries : _entries, 386 | statistics : _statistics, 387 | } 388 | 389 | return _diff, nil 390 | } 391 | 392 | 393 | func processRecords (_name string, _entries map[string][]string) (*records, error) { 394 | _pathIndex := make (map[string]string, len (_entries)) 395 | _hashes := make ([]string, 0, len (_entries)) 396 | _paths := make ([]string, 0, len (_entries)) 397 | _duplicateHashes := 0 398 | for _entryHash, _entryPaths := range _entries { 399 | _hashes = append (_hashes, _entryHash) 400 | _paths = append (_paths, _entryPaths ...) 401 | if len (_entryPaths) > 1 { 402 | _duplicateHashes += 1 403 | } 404 | for _, _entryPath := range _entryPaths { 405 | if _, _exists := _pathIndex[_entryPath]; _exists { 406 | return nil, fmt.Errorf ("found duplicate path `%s`", _entryPath) 407 | } 408 | _pathIndex[_entryPath] = _entryHash 409 | } 410 | } 411 | sort.Strings (_hashes) 412 | sort.Strings (_paths) 413 | _records := & records { 414 | name : _name, 415 | hashIndex : _entries, 416 | pathIndex : _pathIndex, 417 | hashes : _hashes, 418 | paths : _paths, 419 | statistics : recordStatistics { 420 | uniqueHashes : uint (len (_hashes)), 421 | duplicateHashes : uint (_duplicateHashes), 422 | paths : uint (len (_paths)), 423 | }, 424 | } 425 | return _records, nil 426 | } 427 | 428 | 429 | func parseFileAtPath (_path string) (map[string][]string, error) { 430 | if _file, _error := os.Open (_path); _error == nil { 431 | return parseFile (_file) 432 | defer _file.Close () 433 | } else { 434 | return nil, _error 435 | } 436 | panic ("unreachable") 437 | } 438 | 439 | 440 | func parseFile (_file * os.File) (map[string][]string, error) { 441 | _records := make (map[string][]string, 1024) 442 | _scanner := bufio.NewScanner (_file) 443 | for _scanner.Scan () { 444 | if _error := parseLine (_scanner.Text (), _records); _error != nil { 445 | return nil, _error 446 | } 447 | } 448 | if _error := _scanner.Err (); _error != nil { 449 | return nil, _error 450 | } 451 | return _records, nil 452 | } 453 | 454 | 455 | func parseLine (_line string, _records map[string][]string) (error) { 456 | if ignoredLine.MatchString (_line) { 457 | return nil 458 | } 459 | if _slices := md5RecordLine.FindStringSubmatch (_line); _slices != nil { 460 | _hash := _slices[1] 461 | _path := _slices[2] 462 | if _path[0:2] == "./" { 463 | _path = _path[2:] 464 | } else if _path[0:1] == "/" { 465 | _path = _path[1:] 466 | } 467 | if _paths, _exists := _records[_hash]; _exists { 468 | _records[_hash] = append (_paths, _path) 469 | } else { 470 | _records[_hash] = []string {_path} 471 | } 472 | return nil 473 | } else { 474 | return fmt.Errorf ("invalid record line: `%s`", _line) 475 | } 476 | panic ("unreachable") 477 | } 478 | 479 | 480 | func abort (_message string, _error error) () { 481 | fmt.Fprintf (os.Stderr, "[!!] %s\n", _message) 482 | fmt.Fprintf (os.Stderr, "[!!] aborting!\n") 483 | if _error != nil { 484 | fmt.Fprintf (os.Stderr, "[!!] (error:) %s\n", _error) 485 | } 486 | os.Exit (1) 487 | panic ("assertion-failed") 488 | } 489 | 490 | 491 | var ignoredLine *regexp.Regexp = regexp.MustCompile (`(^#.*$)|(^[^\t ]*$)`) 492 | var md5RecordLine *regexp.Regexp = regexp.MustCompile (`^([0-9a-f]{32}) \*(.+)$`) 493 | 494 | -------------------------------------------------------------------------------- /sources/bin/md5-diff.rs: -------------------------------------------------------------------------------- 1 | 2 | fn main () -> ! { 3 | ::md5_tools::main_diff::main_0 (); 4 | } 5 | -------------------------------------------------------------------------------- /sources/bin/md5-link.go: -------------------------------------------------------------------------------- 1 | 2 | 3 | package main 4 | 5 | 6 | import "bufio" 7 | import "fmt" 8 | import "io" 9 | import "os" 10 | import "path" 11 | import "regexp" 12 | import "strconv" 13 | import "sync" 14 | import "syscall" 15 | 16 | 17 | 18 | 19 | func main () () { 20 | 21 | if len (os.Args) != 7 { 22 | panic ("[0071e111] invalid arguments") 23 | } 24 | 25 | _hashesPath := os.Args[1] 26 | _targetPath := os.Args[2] 27 | _blobsPath := os.Args[3] 28 | _blobsSuffix := os.Args[4] 29 | _blobsLevels := -1 30 | if _value, _error := strconv.ParseUint (os.Args[5], 10, 16); _error == nil { 31 | _blobsLevels = int (_value) 32 | } else { 33 | panic (_error) 34 | } 35 | if (_blobsLevels < 0) || (_blobsLevels > 2) { 36 | panic ("[ef8c8ebc] invalid arguments") 37 | } 38 | _parallelism := 16 39 | if _value, _error := strconv.ParseUint (os.Args[6], 10, 16); _error == nil { 40 | if _value != 0 { 41 | _parallelism = int (_value) 42 | } 43 | } else { 44 | panic (_error) 45 | } 46 | if (_parallelism < 1) || (_parallelism > 128) { 47 | panic ("[047e0205] invalid arguments") 48 | } 49 | 50 | 51 | if _stat, _error := os.Stat (_blobsPath); _error == nil { 52 | if ! _stat.Mode () .IsDir () { 53 | panic (fmt.Sprintf ("[0337dae9] invalid blobs folder (non folder) `%s`", _blobsPath)) 54 | } else { 55 | // NOP 56 | } 57 | } else if os.IsNotExist (_error) { 58 | panic (fmt.Sprintf ("[e8d2029c] invalid blobs folder (not found) `%s`", _blobsPath)) 59 | } else { 60 | panic (_error) 61 | } 62 | 63 | if _stat, _error := os.Stat (_targetPath); _error == nil { 64 | if ! _stat.Mode () .IsDir () { 65 | panic (fmt.Sprintf ("[f6ea9a41] invalid target folder (non folder) `%s`", _targetPath)) 66 | } else { 67 | // NOP 68 | } 69 | } else if os.IsNotExist (_error) { 70 | panic (fmt.Sprintf ("[b9843cd6] invalid target folder (not found) `%s`", _targetPath)) 71 | } else { 72 | panic (_error) 73 | } 74 | 75 | 76 | var _hashesStream *bufio.Reader 77 | if _stream_0 , _error := os.Open (_hashesPath); _error == nil { 78 | _hashesStream = bufio.NewReaderSize (_stream_0, 16 * 1024 * 1024) 79 | } else { 80 | panic (_error) 81 | } 82 | 83 | 84 | _workersQueue := make (chan [2]string, _parallelism * 1024) 85 | _workersDone := & sync.WaitGroup {} 86 | for _index := 0; _index < _parallelism; _index += 1 { 87 | _workersDone.Add (1) 88 | go func () () { 89 | for _hash_and_path := range _workersQueue { 90 | _hash := _hash_and_path[0] 91 | _path := _hash_and_path[1] 92 | link (_hash, _path, _targetPath, _blobsPath, _blobsSuffix, _blobsLevels) 93 | } 94 | _workersDone.Done () 95 | } () 96 | } 97 | 98 | 99 | for { 100 | 101 | 102 | // NOTE: Parse hash lines... 103 | 104 | var _line string 105 | if _line_0, _error := _hashesStream.ReadString (0); _error == nil { 106 | _line = _line_0 107 | if _line[len (_line) - 1] == 0 { 108 | _line = _line[: len (_line) - 1] 109 | } 110 | } else if _error == io.EOF { 111 | if _line == "" { 112 | break 113 | } else { 114 | panic (fmt.Sprintf ("[9e33c96b] invalid line `%s`", _line)) 115 | } 116 | } else { 117 | panic (_error) 118 | } 119 | 120 | var _hash string 121 | var _path string 122 | if _slices := md5RecordLine.FindStringSubmatch (_line); _slices != nil { 123 | _hash = _slices[1] 124 | _path = _slices[2] 125 | } else { 126 | panic (fmt.Sprintf ("[4ac97db6] invalid line `%s`", _line)) 127 | } 128 | 129 | 130 | // NOTE: Sanity check paths... 131 | 132 | if _path[0:2] == "./" { 133 | _path = _path[2:] 134 | } else if _path[0:1] == "/" { 135 | _path = _path[1:] 136 | } 137 | if (_path != path.Clean (_path)) || (_path[0:1] == "/") { 138 | panic (fmt.Sprintf ("[a28f4f30] invalid path `%s`", _path)) 139 | } 140 | 141 | 142 | // NOTE: Do not skip empty files... 143 | 144 | if _hash == md5EmptyHash { 145 | // NOP 146 | } 147 | 148 | // NOTE: Skip invalid files... 149 | 150 | if _hash == md5InvalidHash { 151 | continue 152 | } 153 | 154 | _workersQueue <- [2]string { _hash, _path } 155 | } 156 | 157 | close (_workersQueue) 158 | _workersDone.Wait () 159 | } 160 | 161 | 162 | 163 | 164 | func link (_hash string, _path string, _targetPath string, _blobsPath string, _blobsSuffix string, _blobsLevels int) () { 165 | 166 | 167 | // NOTE: Compute source and target paths... 168 | 169 | var _blobsFolder_X string 170 | if _blobsLevels == 0 { 171 | _blobsFolder_X = _blobsPath 172 | } else if _blobsLevels == 1 { 173 | _blobsFolder_1 := path.Join (_blobsPath, _hash[0:2]) 174 | _blobsFolder_X = _blobsFolder_1 175 | } else if _blobsLevels == 2 { 176 | _blobsFolder_1 := path.Join (_blobsPath, _hash[0:2]) 177 | _blobsFolder_2 := path.Join (_blobsFolder_1, _hash[0:4]) 178 | _blobsFolder_X = _blobsFolder_2 179 | } else { 180 | panic ("[e48df570]") 181 | } 182 | 183 | _blobFile := path.Join (_blobsFolder_X, _hash) 184 | if _blobsSuffix != "" { 185 | _blobFile += _blobsSuffix 186 | } 187 | 188 | _targetFile := path.Join (_targetPath, _path) 189 | _targetFolder := path.Dir (_targetFile) 190 | 191 | 192 | // NOTE: Check if blob file exists... 193 | 194 | var _blobStat os.FileInfo 195 | if _hash == md5EmptyHash { 196 | // NOP 197 | } else if _stat, _error := os.Lstat (_blobFile); _error == nil { 198 | if ! _stat.Mode () .IsRegular () { 199 | panic (fmt.Sprintf ("[8484e3c6] invalid blob file (non file) `%s`", _blobFile)) 200 | } else { 201 | _blobStat = _stat 202 | // NOP 203 | } 204 | } else if os.IsNotExist (_error) { 205 | fmt.Fprintf (os.Stderr, "[ee] [b888be36] missing blob file `%s`; skipping!\n", _blobFile) 206 | return 207 | } else if _error, _ok := _error.(*os.PathError); _ok && _error.Err == syscall.ENOTDIR { 208 | panic (fmt.Sprintf ("[931d8f4d] invalid blob file (parent non folder) `%s`", _blobFile)) 209 | } else { 210 | panic (_error) 211 | } 212 | 213 | 214 | // NOTE: Check if source target file exists... 215 | 216 | if _stat, _error := os.Lstat (_targetFile); _error == nil { 217 | if ! _stat.Mode () .IsRegular () { 218 | fmt.Fprintf (os.Stderr, "[ee] [6ffb7ba4] invalid target file (non file) `%s`; ignoring!\n", _targetFile) 219 | return 220 | } else if (_hash != md5EmptyHash) && ! os.SameFile (_blobStat, _stat) { 221 | fmt.Fprintf (os.Stderr, "[ee] [d5c5c73f] invalid target file (existing) `%s`; ignoring!\n", _targetFile) 222 | return 223 | } else if (_hash == md5EmptyHash) && (_stat.Size () != 0) { 224 | fmt.Fprintf (os.Stderr, "[ee] [f2b11a94] invalid target file (not empty) `%s`; ignoring!\n", _targetFile) 225 | return 226 | } else { 227 | // fmt.Fprintf (os.Stderr, "[dd] [518cc370] existing target file `%s`; skipping!\n", _targetFile) 228 | return 229 | } 230 | } else if os.IsNotExist (_error) { 231 | // NOP 232 | } else if _error, _ok := _error.(*os.PathError); _ok && _error.Err == syscall.ENOTDIR { 233 | fmt.Fprintf (os.Stderr, "[ee] [7cd24e86] invalid target file (parent non folder) `%s`; ignoring!\n", _targetFile) 234 | return 235 | } else { 236 | panic (_error) 237 | } 238 | 239 | 240 | fmt.Fprintf (os.Stderr, "[dd] [922b3386] linking `%s` -> `%s`...\n", _targetFile, _hash) 241 | 242 | if _error := os.MkdirAll (_targetFolder, 0700); _error != nil { 243 | fmt.Fprintf (os.Stderr, "[ee] [cefec6b9] failed creating target folder `%s`; ignoring!\n", _targetFolder); 244 | return 245 | } 246 | 247 | if _hash != md5EmptyHash { 248 | if _error := os.Link (_blobFile, _targetFile); _error != nil { 249 | fmt.Fprintf (os.Stderr, "[ee] [cefec6b9] failed linking target file `%s`; ignoring!\n", _targetFile); 250 | return 251 | } 252 | } else { 253 | if _file, _error := os.OpenFile (_targetFile, os.O_CREATE | os.O_EXCL | os.O_RDONLY, 0400); _error != nil { 254 | fmt.Fprintf (os.Stderr, "[ee] [315e2a09] failed creating target file `%s`; ignoring!\n", _targetFile); 255 | return 256 | } else { 257 | if _error := _file.Close (); _error != nil { 258 | fmt.Fprintf (os.Stderr, "[ee] [4eb1ecb6] failed creating target file `%s`; ignoring!\n", _targetFile); 259 | return 260 | } 261 | } 262 | } 263 | } 264 | 265 | 266 | var md5RecordLine *regexp.Regexp = regexp.MustCompile (`^([0-9a-f]{32}) \*(.+)$`) 267 | var md5EmptyHash string = "d41d8cd98f00b204e9800998ecf8427e" 268 | var md5InvalidHash string = "00000000000000000000000000000000" 269 | 270 | -------------------------------------------------------------------------------- /sources/lib/core.rs: -------------------------------------------------------------------------------- 1 | 2 | 3 | pub type HashValue = ::std::string::String; 4 | pub type HashValueRef = str; 5 | 6 | pub type HashBytes = ::std::vec::Vec; 7 | pub type HashBytesRef = [u8]; 8 | 9 | pub type PathValue = ::std::ffi::OsString; 10 | pub type PathValueRef = ::std::ffi::OsStr; 11 | 12 | -------------------------------------------------------------------------------- /sources/lib/digests.rs: -------------------------------------------------------------------------------- 1 | 2 | 3 | use ::digest; 4 | use ::md5; 5 | use ::sha1; 6 | use ::sha2; 7 | use ::sha3; 8 | 9 | 10 | use crate::hashes::*; 11 | use crate::prelude::*; 12 | 13 | 14 | 15 | 16 | pub fn digest (_hash : &HashAlgorithm, _input : &mut Input, _output : &mut Vec) -> (io::Result<()>) { 17 | match _hash.kind { 18 | HashAlgorithmKind::MD5 => 19 | return digest_0:: (_input, _output), 20 | HashAlgorithmKind::SHA1 => 21 | return digest_0:: (_input, _output), 22 | HashAlgorithmKind::SHA2_224 => 23 | return digest_0:: (_input, _output), 24 | HashAlgorithmKind::SHA2_256 => 25 | return digest_0:: (_input, _output), 26 | HashAlgorithmKind::SHA2_384 => 27 | return digest_0:: (_input, _output), 28 | HashAlgorithmKind::SHA2_512 => 29 | return digest_0:: (_input, _output), 30 | HashAlgorithmKind::SHA3_224 => 31 | return digest_0:: (_input, _output), 32 | HashAlgorithmKind::SHA3_256 => 33 | return digest_0:: (_input, _output), 34 | HashAlgorithmKind::SHA3_384 => 35 | return digest_0:: (_input, _output), 36 | HashAlgorithmKind::SHA3_512 => 37 | return digest_0:: (_input, _output), 38 | HashAlgorithmKind::GIT_SHA1 => 39 | return digest_git_sha1::<_> (_input, _output), 40 | } 41 | } 42 | 43 | 44 | pub fn digest_0 (_input : &mut Input, _output : &mut Vec) -> (io::Result<()>) { 45 | 46 | let mut _hasher = Hash::new (); 47 | io::copy (_input, &mut _hasher) ?; 48 | 49 | let _hash = _hasher.finalize (); 50 | _output.extend_from_slice (_hash.as_slice ()); 51 | 52 | return Ok (()); 53 | } 54 | 55 | 56 | pub fn digest_git_sha1 (_input : &mut Input, _output : &mut Vec) -> (io::Result<()>) { 57 | 58 | let mut _buffer = Vec::with_capacity (128 * 1024); 59 | io::copy (_input, &mut _buffer) ?; 60 | 61 | use ::digest::Digest; 62 | let mut _hasher = sha1::Sha1::new (); 63 | 64 | let _ = write! (_hasher, "blob {}\0", _buffer.len ()); 65 | 66 | _hasher.update (_buffer); 67 | 68 | let _hash = _hasher.finalize (); 69 | _output.extend_from_slice (_hash.as_slice ()); 70 | 71 | return Ok (()); 72 | } 73 | 74 | -------------------------------------------------------------------------------- /sources/lib/flags.rs: -------------------------------------------------------------------------------- 1 | 2 | 3 | use ::argparse; 4 | 5 | use crate::hashes::*; 6 | use crate::prelude::*; 7 | 8 | 9 | 10 | 11 | #[ derive (Clone, Eq, PartialEq) ] 12 | #[ derive (Default) ] 13 | pub struct CreateFlags { 14 | 15 | pub source_path : path::PathBuf, 16 | pub output_path : path::PathBuf, 17 | 18 | pub relative : bool, 19 | 20 | pub walk_xdev : bool, 21 | pub walk_follow : bool, 22 | pub walk_skip_marker : path::PathBuf, 23 | 24 | pub threads_count : usize, 25 | pub threads_nice : i8, 26 | 27 | pub queue_size : usize, 28 | pub batch_size : usize, 29 | pub batch_order : CreateBatchOrder, 30 | 31 | pub read_fadvise : bool, 32 | 33 | pub ignore_all_errors : bool, 34 | pub ignore_walk_errors : bool, 35 | pub ignore_open_errors : bool, 36 | pub ignore_read_errors : bool, 37 | 38 | pub report_progress : bool, 39 | pub report_errors_to_sink : bool, 40 | pub report_errors_to_stderr : bool, 41 | 42 | pub hashes_flags : HashesFlags, 43 | pub format_flags : HashesFormatFlags, 44 | 45 | } 46 | 47 | 48 | impl <'a> CreateFlags { 49 | 50 | pub fn argparse (&'a mut self, _parser : &mut argparse::parser::ArgumentParser<'a>) -> () { 51 | 52 | _parser.refer (&mut self.source_path) 53 | .add_argument ("source", argparse::Parse, "source file or folder") .required (); 54 | 55 | _parser.refer (&mut self.output_path) 56 | .add_option (&["--output", "-o"], argparse::Parse, "output file or folder (use `-` for stdout, `.` for auto-detection, or a destination file or folder) (`.` by default)"); 57 | 58 | self.hashes_flags.argparse (_parser); 59 | 60 | self.format_flags.argparse (_parser); 61 | 62 | self.relative = true; 63 | 64 | _parser.refer (&mut self.relative) 65 | .add_option (&["--relative"], argparse::StoreTrue, "output paths relative to source (enabled by default)") 66 | .add_option (&["--no-relative"], argparse::StoreFalse, ""); 67 | 68 | _parser.refer (&mut self.walk_xdev) 69 | .add_option (&["--xdev", "-x"], argparse::StoreTrue, "do not cross mount points (disabled by default)") 70 | .add_option (&["--no-xdev"], argparse::StoreFalse, ""); 71 | 72 | _parser.refer (&mut self.walk_follow) 73 | .add_option (&["--follow", "-L"], argparse::StoreTrue, "follow symlinks (disabley by default) (n.b. source is always followed)") 74 | .add_option (&["--no-follow"], argparse::StoreFalse, ""); 75 | 76 | _parser.refer (&mut self.walk_skip_marker) 77 | .add_option (&["--skip-marker"], argparse::Parse, "skip a folder containing this marker file / symlink (n.b. source is never skipped)") 78 | .add_option (&["--skip-md5-excluded"], argparse::StoreConst (path::PathBuf::from (".md5.excluded")), "skip a folder containing the `.md5.excluded` marker file / symlink (n.b. source is never skipped)"); 79 | 80 | _parser.refer (&mut self.threads_count) 81 | .add_option (&["--workers-count", "-w"], argparse::Parse, "hashing workers count (16 by default)"); 82 | 83 | _parser.refer (&mut self.threads_nice) 84 | .add_option (&["--workers-nice"], argparse::Parse, "set OS process scheduling priority (i.e. `nice`) (19 by default)"); 85 | 86 | self.batch_order = CreateBatchOrder::Inode; 87 | 88 | _parser.refer (&mut self.queue_size) 89 | .add_option (&["--workers-queue"], argparse::Parse, "hashing workers queue size (4096 times the workers count by default)"); 90 | 91 | _parser.refer (&mut self.batch_size) 92 | .add_option (&["--workers-batch"], argparse::Parse, "hashing workers batch size (half the workers queue size by default)"); 93 | 94 | _parser.refer (&mut self.batch_order) 95 | .add_option (&["--workers-sort"], argparse::Parse, "hashing workers batch sorting (use `walk`, `inode`, `inode-and-size`, or `extent`) (`inode` by default)"); 96 | 97 | self.read_fadvise = true; 98 | 99 | _parser.refer (&mut self.read_fadvise) 100 | .add_option (&["--fadvise"], argparse::StoreTrue, "use OS `fadvise` with sequential and no-reuse (enabled by default)") 101 | .add_option (&["--no-fadvise"], argparse::StoreFalse, ""); 102 | 103 | self.report_errors_to_sink = true; 104 | self.report_errors_to_stderr = true; 105 | 106 | _parser.refer (&mut self.report_errors_to_sink) 107 | .add_option (&["--errors-to-stdout"], argparse::StoreTrue, "on errors output an invalid hash (i.e. `00... */path/...`) (enabled by default)") 108 | .add_option (&["--no-errors-to-stdout"], argparse::StoreFalse, ""); 109 | 110 | _parser.refer (&mut self.report_errors_to_stderr) 111 | .add_option (&["--errors-to-stderr"], argparse::StoreTrue, "on errors report a message (enabled by default)") 112 | .add_option (&["--no-errors-to-stderr"], argparse::StoreFalse, ""); 113 | 114 | _parser.refer (&mut self.ignore_all_errors) 115 | .add_option (&["--ignore-all-errors"], argparse::StoreTrue, "ignore all errors (disabled by default)"); 116 | 117 | _parser.refer (&mut self.ignore_walk_errors) 118 | .add_option (&["--ignore-walk-errors"], argparse::StoreTrue, "ignore walk errors (i.e. folder reading, perhaps due to permissions) (disabled by default)"); 119 | 120 | _parser.refer (&mut self.ignore_open_errors) 121 | .add_option (&["--ignore-open-errors"], argparse::StoreTrue, "ignore open errors (i.e. file opening, perhaps due to permissions) (disabled by default)"); 122 | 123 | _parser.refer (&mut self.ignore_read_errors) 124 | .add_option (&["--ignore-read-errors"], argparse::StoreTrue, "ignore open errors (i.e. file reading, perhaps due to I/O) (disabled by default)"); 125 | 126 | self.report_progress = true; 127 | 128 | _parser.refer (&mut self.report_progress) 129 | .add_option (&["--progress"], argparse::StoreTrue, "monitor the progress (enabled by default)") 130 | .add_option (&["--no-progress"], argparse::StoreFalse, ""); 131 | } 132 | } 133 | 134 | 135 | 136 | 137 | #[ derive (Copy, Clone, Eq, PartialEq) ] 138 | pub enum CreateBatchOrder { 139 | Index, 140 | Inode, 141 | InodeAndSizeBuckets, 142 | Extent, 143 | Random, 144 | } 145 | 146 | 147 | impl Default for CreateBatchOrder { 148 | fn default () -> (Self) { 149 | CreateBatchOrder::Index 150 | } 151 | } 152 | 153 | 154 | impl argparse::FromCommandLine for CreateBatchOrder { 155 | 156 | fn from_argument (_value : &str) -> (Result) { 157 | match _value { 158 | "index" | "walk" => 159 | Ok (CreateBatchOrder::Index), 160 | "inode" | "" => 161 | Ok (CreateBatchOrder::Inode), 162 | "inode-and-size" => 163 | Ok (CreateBatchOrder::InodeAndSizeBuckets), 164 | "extent" => 165 | Ok (CreateBatchOrder::Extent), 166 | "random" => 167 | Ok (CreateBatchOrder::Random), 168 | _ => 169 | Err (format! ("[3046e5fa] invalid batch order `{}`", _value)), 170 | } 171 | } 172 | } 173 | 174 | 175 | 176 | 177 | #[ derive (Copy, Clone, Eq, PartialEq) ] 178 | pub struct HashesFlags { 179 | pub algorithm : &'static HashAlgorithm, 180 | } 181 | 182 | 183 | impl Default for HashesFlags { 184 | fn default () -> (Self) { 185 | HashesFlags { 186 | algorithm : &MD5, 187 | } 188 | } 189 | } 190 | 191 | 192 | impl <'a> HashesFlags { 193 | 194 | pub fn argparse (&'a mut self, _parser : &mut argparse::parser::ArgumentParser<'a>) -> () { 195 | _parser.refer (&mut self.algorithm) 196 | .add_option (&["--md5"], argparse::StoreConst (&MD5), "create/expect MD5 hashes (enabled by default)") 197 | .add_option (&["--sha1"], argparse::StoreConst (&SHA1), "create/expect SHA1 hashes") 198 | .add_option (&["--sha224", "--sha2-224"], argparse::StoreConst (&SHA2_224), "create/expect SHA2-224 hashes") 199 | .add_option (&["--sha256", "--sha2-256"], argparse::StoreConst (&SHA2_256), "create/expect SHA2-256 hashes") 200 | .add_option (&["--sha384", "--sha2-384"], argparse::StoreConst (&SHA2_384), "create/expect SHA2-384 hashes") 201 | .add_option (&["--sha512", "--sha2-512"], argparse::StoreConst (&SHA2_512), "create/expect SHA2-512 hashes") 202 | .add_option (&["--sha3-224"], argparse::StoreConst (&SHA3_224), "create/expect SHA3-224 hashes") 203 | .add_option (&["--sha3-256"], argparse::StoreConst (&SHA3_256), "create/expect SHA3-256 hashes") 204 | .add_option (&["--sha3-384"], argparse::StoreConst (&SHA3_384), "create/expect SHA3-384 hashes") 205 | .add_option (&["--sha3-512"], argparse::StoreConst (&SHA3_512), "create/expect SHA3-512 hashes") 206 | .add_option (&["--git-sha1"], argparse::StoreConst (&GIT_SHA1), "create/expect Git specific SHA1 hashes") 207 | ; 208 | } 209 | } 210 | 211 | 212 | 213 | 214 | #[ derive (Copy, Clone, Eq, PartialEq) ] 215 | #[ derive (Default) ] 216 | pub struct HashesFormatFlags { 217 | pub zero : bool, 218 | pub path : bool, 219 | } 220 | 221 | 222 | impl <'a> HashesFormatFlags { 223 | 224 | pub fn argparse (&'a mut self, _parser : &mut argparse::parser::ArgumentParser<'a>) -> () { 225 | _parser.refer (&mut self.zero) 226 | .add_option (&["--zero", "-z"], argparse::StoreTrue, "delimit records by `\\0` (as opposed by `\\n`) (disabled by default)") 227 | .add_option (&["--no-zero"], argparse::StoreFalse, ""); 228 | _parser.refer (&mut self.path) 229 | .add_option (&["--paths"], argparse::StoreTrue, "output paths (enabled by default)") 230 | .add_option (&["--no-paths"], argparse::StoreFalse, ""); 231 | } 232 | } 233 | 234 | 235 | 236 | 237 | #[ derive (Copy, Clone, Eq, PartialEq) ] 238 | #[ derive (Default) ] 239 | pub struct CompressionFlags { 240 | pub algorithm : CompressionAlgorithm, 241 | } 242 | 243 | 244 | impl <'a> CompressionFlags { 245 | 246 | pub fn argparse (&'a mut self, _parser : &mut argparse::parser::ArgumentParser<'a>) -> () { 247 | _parser.refer (&mut self.algorithm) 248 | .add_option (&["--gzip"], argparse::StoreConst (CompressionAlgorithm::Gzip), "create/expect `gzip` compressed") 249 | .add_option (&["--bzip2"], argparse::StoreConst (CompressionAlgorithm::Bzip2), "create/expect `bzip2` compressed") 250 | .add_option (&["--lzip"], argparse::StoreConst (CompressionAlgorithm::Lzip), "create/expect `lzip` compressed") 251 | .add_option (&["--xz"], argparse::StoreConst (CompressionAlgorithm::Xz), "create/expect `xz` compressed") 252 | .add_option (&["--lzma"], argparse::StoreConst (CompressionAlgorithm::Lzma), "create/expect `lzma` compressed") 253 | .add_option (&["--lz4"], argparse::StoreConst (CompressionAlgorithm::Lz4), "create/expect `lz4` compressed") 254 | .add_option (&["--lzo"], argparse::StoreConst (CompressionAlgorithm::Lzo), "create/expect `lzo` compressed") 255 | .add_option (&["--zstd"], argparse::StoreConst (CompressionAlgorithm::Zstd), "create/expect `zstd` compressed") 256 | .add_option (&["--no-compression"], argparse::StoreConst (CompressionAlgorithm::None), "create/expect uncompressed (enabled by default)") 257 | ; 258 | } 259 | } 260 | 261 | 262 | 263 | 264 | #[ derive (Copy, Clone, Eq, PartialEq) ] 265 | pub enum CompressionAlgorithm { 266 | None, 267 | Gzip, // https://www.gzip.org/ 268 | Bzip2, // http://sourceware.org/bzip2/ 269 | Lzip, // https://www.nongnu.org/lzip/ 270 | Xz, // https://tukaani.org/xz/ 271 | Lzma, // https://www.7-zip.org/sdk.html 272 | Lz4, // https://lz4.github.io/lz4/ 273 | Lzo, // http://www.lzop.org/ 274 | Zstd, // https://github.com/facebook/zstd 275 | } 276 | 277 | 278 | impl Default for CompressionAlgorithm { 279 | fn default () -> (Self) { 280 | CompressionAlgorithm::None 281 | } 282 | } 283 | 284 | -------------------------------------------------------------------------------- /sources/lib/hashes.rs: -------------------------------------------------------------------------------- 1 | 2 | 3 | #[ derive (Copy, Clone, Eq, PartialEq) ] 4 | #[ allow (non_camel_case_types) ] 5 | pub enum HashAlgorithmKind { 6 | MD5, 7 | SHA1, 8 | SHA2_224, 9 | SHA2_256, 10 | SHA2_384, 11 | SHA2_512, 12 | SHA3_224, 13 | SHA3_256, 14 | SHA3_384, 15 | SHA3_512, 16 | GIT_SHA1, 17 | } 18 | 19 | 20 | #[ derive (Copy, Clone, Eq, PartialEq) ] 21 | pub struct HashAlgorithm { 22 | pub kind : HashAlgorithmKind, 23 | pub name : &'static str, 24 | pub name_lower : &'static str, 25 | pub empty : &'static str, 26 | pub invalid : &'static str, 27 | pub invalid_raw : &'static [u8], 28 | pub pattern : &'static str, 29 | pub suffix : &'static str, 30 | } 31 | 32 | 33 | 34 | 35 | pub static MD5 : HashAlgorithm = HashAlgorithm { 36 | kind : HashAlgorithmKind::MD5, 37 | name : "MD5", name_lower : "md5", 38 | empty : "d41d8cd98f00b204e9800998ecf8427e", 39 | invalid : "00000000000000000000000000000000", 40 | invalid_raw : b"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 41 | pattern : r"^(?-u)([0-9a-f]{32}) ([ *])(.+)$", 42 | suffix : ".md5", 43 | }; 44 | 45 | 46 | pub static SHA1 : HashAlgorithm = HashAlgorithm { 47 | kind : HashAlgorithmKind::SHA1, 48 | name : "SHA1", name_lower : "sha1", 49 | empty : "da39a3ee5e6b4b0d3255bfef95601890afd80709", 50 | invalid : "0000000000000000000000000000000000000000", 51 | invalid_raw : b"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 52 | pattern : r"^(?-u)([0-9a-f]{40}) ([ *])(.+)$", 53 | suffix : ".sha1", 54 | }; 55 | 56 | 57 | pub static SHA2_224 : HashAlgorithm = HashAlgorithm { 58 | kind : HashAlgorithmKind::SHA2_224, 59 | name : "SHA224", name_lower : "sha224", 60 | empty : "d14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f", 61 | invalid : "00000000000000000000000000000000000000000000000000000000", 62 | invalid_raw : b"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 63 | pattern : r"^(?-u)([0-9a-f]{56}) ([ *])(.+)$", 64 | suffix : ".sha224", 65 | }; 66 | 67 | pub static SHA2_256 : HashAlgorithm = HashAlgorithm { 68 | kind : HashAlgorithmKind::SHA2_256, 69 | name : "SHA256", name_lower : "sha256", 70 | empty : "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", 71 | invalid : "0000000000000000000000000000000000000000000000000000000000000000", 72 | invalid_raw : b"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 73 | pattern : r"^(?-u)([0-9a-f]{64}) ([ *])(.+)$", 74 | suffix : ".sha256", 75 | }; 76 | 77 | pub static SHA2_384 : HashAlgorithm = HashAlgorithm { 78 | kind : HashAlgorithmKind::SHA2_384, 79 | name : "SHA384", name_lower : "sha384", 80 | empty : "38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e1da274edebfe76f65fbd51ad2f14898b95b", 81 | invalid : "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", 82 | invalid_raw : b"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 83 | pattern : r"^(?-u)([0-9a-f]{96}) ([ *])(.+)$", 84 | suffix : ".sha384", 85 | }; 86 | 87 | pub static SHA2_512 : HashAlgorithm = HashAlgorithm { 88 | kind : HashAlgorithmKind::SHA2_512, 89 | name : "SHA512", name_lower : "sha512", 90 | empty : "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e", 91 | invalid : "00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", 92 | invalid_raw : b"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 93 | pattern : r"^(?-u)([0-9a-f]{128}) ([ *])(.+)$", 94 | suffix : ".sha512", 95 | }; 96 | 97 | 98 | pub static SHA3_224 : HashAlgorithm = HashAlgorithm { 99 | kind : HashAlgorithmKind::SHA3_224, 100 | name : "SHA3-224", name_lower : "sha3-224", 101 | empty : "6b4e03423667dbb73b6e15454f0eb1abd4597f9a1b078e3f5b5a6bc7", 102 | invalid : "00000000000000000000000000000000000000000000000000000000", 103 | invalid_raw : b"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 104 | pattern : r"^(?-u)([0-9a-f]{56}) ([ *])(.+)$", 105 | suffix : ".sha3-224", 106 | }; 107 | 108 | pub static SHA3_256 : HashAlgorithm = HashAlgorithm { 109 | kind : HashAlgorithmKind::SHA3_256, 110 | name : "SHA3-256", name_lower : "sha3-256", 111 | empty : "a7ffc6f8bf1ed76651c14756a061d662f580ff4de43b49fa82d80a4b80f8434a", 112 | invalid : "0000000000000000000000000000000000000000000000000000000000000000", 113 | invalid_raw : b"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 114 | pattern : r"^(?-u)([0-9a-f]{64}) ([ *])(.+)$", 115 | suffix : ".sha3-256", 116 | }; 117 | 118 | pub static SHA3_384 : HashAlgorithm = HashAlgorithm { 119 | kind : HashAlgorithmKind::SHA3_384, 120 | name : "SHA3-384", name_lower : "sha3-384", 121 | empty : "0c63a75b845e4f7d01107d852e4c2485c51a50aaaa94fc61995e71bbee983a2ac3713831264adb47fb6bd1e058d5f004", 122 | invalid : "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", 123 | invalid_raw : b"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 124 | pattern : r"^(?-u)([0-9a-f]{96}) ([ *])(.+)$", 125 | suffix : ".sha3-384", 126 | }; 127 | 128 | pub static SHA3_512 : HashAlgorithm = HashAlgorithm { 129 | kind : HashAlgorithmKind::SHA3_512, 130 | name : "SHA3-512", name_lower : "sha3-512", 131 | empty : "a69f73cca23a9ac5c8b567dc185a756e97c982164fe25859e0d1dcc1475c80a615b2123af1f5f94c11e3e9402c3ac558f500199d95b6d3e301758586281dcd26", 132 | invalid : "00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", 133 | invalid_raw : b"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 134 | pattern : r"^(?-u)([0-9a-f]{128}) ([ *])(.+)$", 135 | suffix : ".sha3-512", 136 | }; 137 | 138 | 139 | pub static GIT_SHA1 : HashAlgorithm = HashAlgorithm { 140 | kind : HashAlgorithmKind::GIT_SHA1, 141 | name : "GIT-SHA1", name_lower : "git-sha1", 142 | empty : "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", 143 | invalid : "0000000000000000000000000000000000000000", 144 | invalid_raw : b"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 145 | pattern : r"^(?-u)([0-9a-f]{40}) ([ *])(.+)$", 146 | suffix : ".git-sha1", 147 | }; 148 | 149 | -------------------------------------------------------------------------------- /sources/lib/lib.rs: -------------------------------------------------------------------------------- 1 | 2 | 3 | #![ no_implicit_prelude ] 4 | 5 | 6 | #![ allow (unused_parens) ] 7 | 8 | 9 | pub mod core; 10 | pub mod digests; 11 | pub mod flags; 12 | pub mod hashes; 13 | pub mod sinks; 14 | 15 | pub mod main_create; 16 | pub mod main_diff; 17 | pub mod main_cpio; 18 | 19 | mod prelude; 20 | 21 | -------------------------------------------------------------------------------- /sources/lib/main_cpio.rs: -------------------------------------------------------------------------------- 1 | 2 | 3 | use ::argparse; 4 | use ::cpio; 5 | use ::libc; 6 | 7 | use crate::digests::*; 8 | use crate::flags::*; 9 | use crate::hashes::*; 10 | use crate::prelude::*; 11 | use crate::sinks::*; 12 | 13 | 14 | 15 | 16 | pub fn main () -> (Result<(), io::Error>) { 17 | 18 | 19 | let mut _hashes_flags = HashesFlags { 20 | algorithm : &MD5, 21 | }; 22 | 23 | let mut _format_flags = HashesFormatFlags { 24 | zero : false, 25 | path : true, 26 | }; 27 | 28 | let mut _nice_level = 19 as i8; 29 | 30 | 31 | { 32 | let mut _parser = argparse::ArgumentParser::new (); 33 | _hashes_flags.argparse (&mut _parser); 34 | _format_flags.argparse (&mut _parser); 35 | _parser.refer (&mut _nice_level) .add_option (&["--nice"], argparse::Parse, "OS process scheduling priority (i.e. `nice`) (19 by default)"); 36 | _parser.parse_args_or_exit (); 37 | } 38 | 39 | 40 | if _nice_level != 0 { 41 | unsafe { 42 | // FIXME: Check the return value! 43 | libc::nice (_nice_level as i32); 44 | } 45 | } 46 | 47 | 48 | let mut _input = io::stdin (); 49 | let mut _input = _input.lock (); 50 | 51 | let mut _output = io::stdout (); 52 | let mut _output = _output.lock (); 53 | 54 | let mut _sink = StandardHashesSink::new (&mut _output, _format_flags.zero, _format_flags.path); 55 | 56 | let mut _hash_buffer = Vec::with_capacity (128); 57 | let mut _path_buffer = Vec::with_capacity (4 * 1024); 58 | 59 | 60 | loop { 61 | 62 | let mut _record = cpio::newc::Reader::new (_input) ?; 63 | 64 | let _metadata = _record.entry (); 65 | if _metadata.is_trailer () { 66 | break; 67 | } 68 | 69 | if (_metadata.mode () & (libc::S_IFMT as u32)) == (libc::S_IFREG as u32) { 70 | 71 | let _hash = if (_metadata.file_size () > 0) || (_metadata.nlink () <= 1) { 72 | 73 | _hash_buffer.clear (); 74 | digest (_hashes_flags.algorithm, &mut _record, &mut _hash_buffer) ?; 75 | 76 | } else { 77 | 78 | eprintln! ("[ww] [7c9f8eb7] hard-link detected: `{}`; ignoring!", _metadata.name ()); 79 | 80 | _hash_buffer.clear (); 81 | _hash_buffer.extend_from_slice (_hashes_flags.algorithm.invalid_raw); 82 | }; 83 | 84 | let _metadata = _record.entry (); 85 | let _path = _metadata.name (); 86 | 87 | let _path_prefix = 88 | if _path.starts_with ("/") { "" } 89 | else if _path.starts_with ("./") { "" } 90 | else if _path.starts_with ("../") { "" } 91 | else { "./" }; 92 | 93 | _path_buffer.clear (); 94 | _path_buffer.extend_from_slice (_path_prefix.as_bytes ()); 95 | _path_buffer.extend_from_slice (_path.as_bytes ()); 96 | 97 | _sink.handle (ffi::OsStr::from_bytes (&_path_buffer), &_hash_buffer) ?; 98 | } 99 | 100 | _input = _record.finish () ?; 101 | } 102 | 103 | return Ok (()); 104 | } 105 | 106 | 107 | pub fn main_0 () -> ! { 108 | if let Err (_error) = main () { 109 | eprintln! ("[!!] {}", _error); 110 | process::exit (1); 111 | } else { 112 | process::exit (0); 113 | } 114 | } 115 | 116 | -------------------------------------------------------------------------------- /sources/lib/main_create.rs: -------------------------------------------------------------------------------- 1 | 2 | 3 | use ::atty; 4 | use ::argparse; 5 | use ::chrono; 6 | use ::crossbeam; 7 | use ::indicatif; 8 | use ::libc; 9 | use ::walkdir; 10 | 11 | use crate::digests::*; 12 | use crate::flags::*; 13 | use crate::prelude::*; 14 | use crate::sinks::*; 15 | 16 | 17 | 18 | 19 | #[ derive (Clone) ] 20 | struct HasherContext { 21 | 22 | flags : CreateFlags, 23 | queue : crossbeam::channel::Receiver, 24 | sink : sync::Arc>, 25 | errors : sync::Arc>>, 26 | progress : Option, 27 | } 28 | 29 | 30 | #[ derive (Clone) ] 31 | struct HasherTask { 32 | 33 | path : path::PathBuf, 34 | metadata : fs::Metadata, 35 | } 36 | 37 | 38 | #[ derive (Clone) ] 39 | struct Progress { 40 | 41 | folder : indicatif::ProgressBar, 42 | files : indicatif::ProgressBar, 43 | data : indicatif::ProgressBar, 44 | } 45 | 46 | 47 | 48 | 49 | pub fn main () -> (Result<(), io::Error>) { 50 | 51 | 52 | let mut _flags = CreateFlags::default (); 53 | _flags.format_flags.path = true; 54 | 55 | { 56 | let mut _parser = argparse::ArgumentParser::new (); 57 | _flags.argparse (&mut _parser); 58 | _parser.parse_args_or_exit (); 59 | } 60 | 61 | 62 | if _flags.output_path == path::Path::new ("") { 63 | _flags.output_path = path::PathBuf::from ("."); 64 | } 65 | if _flags.source_path == path::Path::new ("") { 66 | _flags.source_path = path::PathBuf::from ("."); 67 | } 68 | 69 | if _flags.threads_count == 0 { 70 | _flags.threads_count = 16; 71 | } 72 | if _flags.queue_size == 0 { 73 | _flags.queue_size = _flags.threads_count * 1024 * 4; 74 | } 75 | if _flags.batch_size == 0 { 76 | _flags.batch_size = _flags.queue_size / 2; 77 | } 78 | if _flags.ignore_all_errors { 79 | _flags.ignore_walk_errors = true; 80 | _flags.ignore_open_errors = true; 81 | _flags.ignore_read_errors = true; 82 | } 83 | 84 | 85 | if _flags.threads_nice != 0 { 86 | unsafe { 87 | // FIXME: Check the return value! 88 | libc::nice (_flags.threads_nice as i32); 89 | } 90 | } 91 | 92 | 93 | 94 | 95 | let _source_path = _flags.source_path.clone (); 96 | 97 | let _relative_path = match fs::metadata (&_source_path) { 98 | Ok (ref _stat) if _stat.is_dir () => 99 | if _flags.relative { 100 | Some (_source_path.clone ()) 101 | } else { 102 | None 103 | }, 104 | Ok (ref _stat) if _stat.is_file () => 105 | if _flags.relative { 106 | if let Some (_relative_path) = _source_path.parent () { 107 | Some (_relative_path.into ()) 108 | } else { 109 | None 110 | } 111 | } else { 112 | None 113 | }, 114 | Ok (_) => 115 | return Err (io::Error::new (io::ErrorKind::Other, "[a12f1634] invalid source path (non file or folder)")), 116 | Err (ref _error) if _error.kind () == io::ErrorKind::NotFound => 117 | return Err (io::Error::new (io::ErrorKind::Other, "[9ee46264] invalid source path (non exists)")), 118 | Err (_error) => 119 | return Err (_error), 120 | }; 121 | 122 | 123 | 124 | 125 | let mut _output_path = _flags.output_path.clone (); 126 | 127 | let _output_descriptor = if 128 | if _output_path == path::Path::new ("-") { 129 | _output_path = path::PathBuf::from ("/dev/stdout"); 130 | true 131 | } else if _output_path == path::Path::new ("/dev/stdout") { 132 | true 133 | } else if _output_path == path::Path::new ("/dev/stderr") { 134 | true 135 | } else if _output_path == path::Path::new ("/dev/null") { 136 | true 137 | } else if 138 | _output_path.starts_with (path::Path::new ("/dev/fd")) || 139 | _output_path.starts_with (path::Path::new ("/proc/self/fd")) { 140 | true 141 | } else if 142 | _output_path.starts_with (path::Path::new ("/dev")) || 143 | _output_path.starts_with (path::Path::new ("/proc")) || 144 | _output_path.starts_with (path::Path::new ("/sys")) { 145 | return Err (io::Error::new (io::ErrorKind::Other, "[49b2e473] invalid output path")); 146 | } else { 147 | false 148 | } 149 | { 150 | None 151 | 152 | } else { 153 | 154 | let _output_path_with_transformer = if _output_path != path::Path::new (".") { 155 | match fs::metadata (&_output_path) { 156 | Ok (ref _stat) if _stat.is_dir () => 157 | Some ((_output_path.clone (), Some (true))), 158 | Ok (ref _stat) if _stat.is_file () => 159 | return Err (io::Error::new (io::ErrorKind::Other, "[b4ab81b9] invalid output path (already exists)")), 160 | Ok (_) => 161 | return Err (io::Error::new (io::ErrorKind::Other, "[8366e424] invalid output path (non file or folder)")), 162 | Err (ref _error) if _error.kind () == io::ErrorKind::NotFound => 163 | Some ((_output_path.clone (), None)), 164 | Err (_error) => 165 | return Err (_error), 166 | } 167 | 168 | } else { 169 | match fs::metadata (&_source_path) { 170 | Ok (ref _stat) if _stat.is_dir () => { 171 | let mut _outcome = None; 172 | for _suffix in &[_flags.hashes_flags.algorithm.suffix, ".hashes", ".md5"] { 173 | let _output_path_base = _source_path.join (_suffix); 174 | match fs::metadata (&_output_path_base) { 175 | Ok (ref _stat) if _stat.is_dir () => { 176 | _outcome = Some (Some ((_output_path_base, Some (true)))); 177 | break; 178 | }, 179 | Ok (ref _stat) if _stat.is_file () => { 180 | _outcome = Some (Some ((_output_path_base, Some (false)))); 181 | break; 182 | }, 183 | Ok (_) => 184 | return Err (io::Error::new (io::ErrorKind::Other, "[2cb4982d] invalid hashes path (non file or folder)")), 185 | Err (ref _error) if _error.kind () == io::ErrorKind::NotFound => 186 | (), 187 | Err (_error) => 188 | return Err (_error), 189 | } 190 | } 191 | if let Some (_outcome) = _outcome { 192 | _outcome 193 | } else { 194 | let mut _output_path = ffi::OsString::from (&_source_path); 195 | _output_path.push (path::MAIN_SEPARATOR.to_string ()); 196 | _output_path.push ("."); 197 | Some ((_output_path.into (), Some (false))) 198 | } 199 | }, 200 | Ok (ref _stat) if _stat.is_file () => 201 | Some ((_source_path.clone (), Some (false))), 202 | Ok (_) => 203 | return Err (io::Error::new (io::ErrorKind::Other, "[cce14438] invalid source path (non file or folder)")), 204 | Err (ref _error) if _error.kind () == io::ErrorKind::NotFound => 205 | return Err (io::Error::new (io::ErrorKind::Other, "[5f86a63d] invalid source path (non exists)")), 206 | Err (_error) => 207 | return Err (_error), 208 | } 209 | }; 210 | 211 | match _output_path_with_transformer { 212 | None => 213 | None, 214 | Some ((_output_path, None)) => 215 | Some (_output_path), 216 | Some ((_output_path_base, Some (_transformer))) => { 217 | 218 | let _output_path_suffix = _flags.hashes_flags.algorithm.suffix; 219 | 220 | let _output_timestamp = { 221 | 222 | use chrono::Datelike as _; 223 | use chrono::Timelike as _; 224 | let _output_timestamp = chrono::Local::now (); 225 | let _output_timestamp_date = _output_timestamp.date (); 226 | let _output_timestamp_time = _output_timestamp.time (); 227 | 228 | format! ( 229 | "{:04}-{:02}-{:02}-{:02}-{:02}-{:02}", 230 | _output_timestamp_date.year (), 231 | _output_timestamp_date.month (), 232 | _output_timestamp_date.day (), 233 | _output_timestamp_time.hour (), 234 | _output_timestamp_time.minute (), 235 | _output_timestamp_time.second (), 236 | ) 237 | }; 238 | 239 | if _transformer { 240 | let _output_path = _output_path_base.join (_output_timestamp + _output_path_suffix); 241 | Some (_output_path) 242 | } else { 243 | let mut _output_path = ffi::OsString::from (_output_path_base); 244 | _output_path.push ("--"); 245 | _output_path.push (_output_timestamp); 246 | _output_path.push (_output_path_suffix); 247 | Some (_output_path.into ()) 248 | } 249 | } 250 | } 251 | }; 252 | 253 | let _output_path_and_tmp = if let Some (_output_path) = _output_descriptor { 254 | let mut _output_path_tmp = ffi::OsString::from (&_output_path); 255 | _output_path_tmp.push (".tmp"); 256 | let _output_path_tmp = path::PathBuf::from (_output_path_tmp); 257 | Some ((_output_path, _output_path_tmp)) 258 | } else { 259 | None 260 | }; 261 | 262 | 263 | 264 | 265 | if let Some ((ref _output_path, _)) = _output_path_and_tmp { 266 | eprintln! ("[ii] [8cc8542c] creating `{}`...", _output_path.to_string_lossy ()); 267 | } 268 | let (_output_file, _output_stat) = if let Some ((_, ref _output_path_tmp)) = _output_path_and_tmp { 269 | let mut _output_file = fs::OpenOptions::new () .create_new (true) .write (true) .open (_output_path_tmp) ?; 270 | if let Err (_error) = _output_file.set_permissions (fs::Permissions::from_mode (0o600)) { 271 | eprintln! ("[ii] [c892431d] failed making read-only `{}`: {}; ignoring!", _output_path_tmp.to_string_lossy (), _error); 272 | } 273 | let _output_stat = _output_file.metadata () ?; 274 | (_output_file, Some (_output_stat)) 275 | } else { 276 | if _output_path == path::Path::new ("/dev/stdout") && atty::is (atty::Stream::Stdout) { 277 | _flags.report_progress = false; 278 | } 279 | let _output_file = fs::OpenOptions::new () .write (true) .open (_output_path) ?; 280 | (_output_file, None) 281 | }; 282 | 283 | 284 | let _sink = StandardHashesSink::new (_output_file, _flags.format_flags.zero, _flags.format_flags.path); 285 | let _sink = sync::Arc::new (sync::Mutex::new (_sink)); 286 | 287 | 288 | let (_enqueue, _dequeue) = crossbeam::channel::bounded:: (_flags.queue_size); 289 | let mut _completions = Vec::with_capacity (_flags.threads_count); 290 | let _threads_errors = sync::Arc::new (sync::Mutex::new (Vec::new ())); 291 | let _done = crossbeam::sync::WaitGroup::new (); 292 | 293 | 294 | 295 | 296 | if ! atty::is (atty::Stream::Stderr) { 297 | _flags.report_progress = false; 298 | } 299 | 300 | let _progress = if _flags.report_progress { 301 | 302 | let _folder = indicatif::ProgressBar::new (!0); 303 | _folder.set_style ( 304 | indicatif::ProgressStyle::default_bar () 305 | .template ("[..] [{elapsed:>8}] | {wide_msg} |") 306 | .progress_chars ("=>-") 307 | .tick_chars (".|/-\\") 308 | ); 309 | 310 | let _files = indicatif::ProgressBar::new (0); 311 | _files.set_style ( 312 | indicatif::ProgressStyle::default_bar () 313 | .template ("[..] [{elapsed_precise}] | {wide_bar} | {percent:>3}% | {per_sec:>10} | {pos:>10} | {len:>10} |") 314 | .progress_chars ("=>-") 315 | .tick_chars (".|/-\\") 316 | ); 317 | _files.set_draw_delta (10); 318 | 319 | let _data = indicatif::ProgressBar::new (0); 320 | _data.set_style ( 321 | indicatif::ProgressStyle::default_bar () 322 | .template ("[..] [{eta_precise}] | {wide_bar} | {percent:>3}% | {bytes_per_sec:>10} | {bytes:>10} | {total_bytes:>10} |") 323 | .progress_chars ("=>-") 324 | .tick_chars (".|/-\\") 325 | ); 326 | _data.set_draw_delta (128 * 1024); 327 | 328 | { 329 | let _dashboard = indicatif::MultiProgress::new (); 330 | _dashboard.set_draw_target (indicatif::ProgressDrawTarget::stderr_with_hz (4)); 331 | _dashboard.add (_folder.clone ()); 332 | _dashboard.add (_files.clone ()); 333 | _dashboard.add (_data.clone ()); 334 | thread::spawn (move || -> () { 335 | _dashboard.join () .unwrap (); 336 | }); 337 | } 338 | 339 | Some (Progress { 340 | folder : _folder, 341 | files : _files, 342 | data : _data, 343 | }) 344 | 345 | } else { 346 | None 347 | }; 348 | 349 | macro_rules! message { 350 | ( $progress : expr, $( $token : tt )+ ) => ( 351 | if let Some (ref _progress) = $progress { 352 | _progress.files.println (format! ( $( $token )+ )); 353 | } else { 354 | eprintln! ( $( $token )+ ); 355 | } 356 | ) 357 | } 358 | 359 | 360 | 361 | 362 | for _ in 0 .. _flags.threads_count { 363 | 364 | let _context = HasherContext { 365 | flags : _flags.clone (), 366 | queue : _dequeue.clone (), 367 | sink : sync::Arc::clone (&_sink), 368 | errors : sync::Arc::clone (&_threads_errors), 369 | progress : _progress.clone (), 370 | }; 371 | 372 | let _relative_path = _relative_path.clone (); 373 | 374 | let _done = _done.clone (); 375 | let _completion = thread::spawn (move || { 376 | let _outcome = execute_hasher (_context, _relative_path); 377 | drop (_done); 378 | _outcome 379 | }); 380 | 381 | _completions.push (_completion); 382 | } 383 | 384 | 385 | 386 | 387 | let mut _walker = walkdir::WalkDir::new (&_source_path) 388 | .same_file_system (_flags.walk_xdev) 389 | .follow_links (_flags.walk_follow) 390 | .contents_first (false) 391 | .into_iter (); 392 | 393 | let mut _walk_index = 0 as u64; 394 | 395 | 396 | let mut _batch = if _flags.batch_size > 1 { 397 | Some (Vec::<(walkdir::DirEntry, fs::Metadata, DirEntryOrder)>::with_capacity (_flags.batch_size)) 398 | } else { 399 | None 400 | }; 401 | 402 | 403 | let mut _errors = Vec::::new (); 404 | let _unknown_error = io::Error::new (io::ErrorKind::Other, "[31b7b284] unexpected error"); 405 | 406 | 407 | loop { 408 | 409 | _walk_index += 1; 410 | 411 | if let Some (ref mut _batch) = _batch { 412 | if _batch.capacity () == _batch.len () { 413 | if let Some (ref _progress) = _progress { 414 | _progress.folder.set_message ("(enqueueing...)"); 415 | _progress.folder.tick (); 416 | } 417 | _batch.sort_by_key (|&(_, _, _order)| _order); 418 | for (_entry, _metadata, _) in _batch.drain (..) { 419 | let _task = HasherTask { 420 | path : _entry.into_path (), 421 | metadata : _metadata, 422 | }; 423 | _enqueue.send (_task) .unwrap (); 424 | } 425 | if let Some (ref _progress) = _progress { 426 | _progress.folder.set_message ("(walking...)"); 427 | _progress.folder.tick (); 428 | } 429 | } 430 | } 431 | 432 | let _entry = match _walker.next () { 433 | Some (Ok (_entry)) => 434 | _entry, 435 | Some (Err (_error)) => { 436 | let mut _sink = _sink.lock () .unwrap (); 437 | let _path = _error.path () .unwrap_or (&_source_path); 438 | if let Some (_ancestor) = _error.loop_ancestor () { 439 | message! (_progress, "[ww] [55021f5c] detected walking loop for `{}` pointing at `{}`; ignoring!", _path.to_string_lossy (), _ancestor.to_string_lossy ()); 440 | continue; 441 | } 442 | if _flags.report_errors_to_stderr { 443 | message! (_progress, "[ee] [a5e88e25] failed walking path `{}`: `{}`!", _path.to_string_lossy (), _error.io_error () .unwrap_or (&_unknown_error)); 444 | } 445 | if _flags.report_errors_to_sink { 446 | let _path_for_sink = if let Some (ref _relative_path) = _relative_path { 447 | _path.strip_prefix (_relative_path) .unwrap () .as_os_str () 448 | } else { 449 | _path.as_os_str () 450 | }; 451 | let _path_for_sink = if _path_for_sink != "" { _path_for_sink } else { ffi::OsStr::new (".") }; 452 | _sink.handle (_path_for_sink, _flags.hashes_flags.algorithm.invalid_raw) ?; 453 | _sink.flush () ?; 454 | } 455 | if _flags.ignore_walk_errors { 456 | continue; 457 | } else { 458 | let _error = _error.into_io_error () .unwrap_or_else (|| io::Error::new (io::ErrorKind::Other, "[7961fa68] unexpected error")); 459 | _errors.push (_error); 460 | break; 461 | } 462 | }, 463 | None => 464 | break, 465 | }; 466 | 467 | let _metadata = match _entry.metadata () { 468 | Ok (_metadata) => 469 | _metadata, 470 | Err (_error) => { 471 | let mut _sink = _sink.lock () .unwrap (); 472 | let _path = _error.path () .unwrap_or (&_source_path); 473 | if _flags.report_errors_to_stderr { 474 | message! (_progress, "[ee] [96d2838a] failed walking path `{}`: `{}`!", _entry.path () .to_string_lossy (), _error.io_error () .unwrap_or (&_unknown_error)); 475 | } 476 | if _flags.report_errors_to_sink { 477 | let _path = _entry.path (); 478 | let _path_for_sink = if let Some (ref _relative_path) = _relative_path { 479 | _path.strip_prefix (_relative_path) .unwrap () .as_os_str () 480 | } else { 481 | _path.as_os_str () 482 | }; 483 | let _path_for_sink = if _path_for_sink != "" { _path_for_sink } else { ffi::OsStr::new (".") }; 484 | _sink.handle (_path_for_sink, _flags.hashes_flags.algorithm.invalid_raw) ?; 485 | _sink.flush () ?; 486 | } 487 | if _flags.ignore_walk_errors { 488 | continue; 489 | } else { 490 | let _error = _error.into_io_error () .unwrap_or_else (|| io::Error::new (io::ErrorKind::Other, "[7961fa68] unexpected error")); 491 | _errors.push (_error); 492 | break; 493 | } 494 | }, 495 | }; 496 | 497 | if let Some (ref _output_stat) = _output_stat { 498 | if (_metadata.dev () == _output_stat.dev ()) && (_metadata.ino () == _output_stat.ino ()) { 499 | continue; 500 | } 501 | } 502 | 503 | if _metadata.is_dir () { 504 | 505 | if (_flags.walk_skip_marker != path::Path::new ("")) && (_entry.path () != _source_path) { 506 | let _skip_path = _entry.path () .join (&_flags.walk_skip_marker); 507 | match fs::symlink_metadata (&_skip_path) { 508 | Ok (_metadata) => { 509 | let _type = _metadata.file_type (); 510 | if _type.is_file () || _type.is_symlink () { 511 | message! (_progress, "[ii] [44f7b487] skipping path `{}`!", _entry.path () .to_string_lossy ()); 512 | _walker.skip_current_dir (); 513 | continue; 514 | } else if _type.is_dir () { 515 | message! (_progress, "[ww] [db643f95] skipping path `{}`! (although expected file or symlink)", _entry.path () .to_string_lossy ()); 516 | _walker.skip_current_dir (); 517 | continue; 518 | } else { 519 | message! (_progress, "[ee] [d72d95df] failed skipping path `{}`: invalid skip marker; ignoring!", _entry.path () .to_string_lossy ()); 520 | } 521 | }, 522 | Err (ref _error) if _error.kind () == io::ErrorKind::NotFound => 523 | (), 524 | Err (_error) => { 525 | if _flags.report_errors_to_stderr { 526 | message! (_progress, "[ee] [0c67e368] failed skipping path `{}`: `{}`!", _entry.path () .to_string_lossy (), _error); 527 | } 528 | if _flags.ignore_walk_errors { 529 | continue; 530 | } else { 531 | _errors.push (_error); 532 | break; 533 | } 534 | }, 535 | } 536 | } 537 | 538 | if let Some (ref _progress) = _progress { 539 | _progress.folder.set_message (_entry.path () .to_string_lossy () .into_owned ()); 540 | _progress.folder.tick (); 541 | } 542 | } 543 | 544 | if _metadata.is_file () { 545 | 546 | if let Some (ref _progress) = _progress { 547 | _progress.files.inc_length (1); 548 | _progress.files.tick (); 549 | _progress.data.inc_length (_metadata.size ()); 550 | _progress.data.tick (); 551 | } 552 | 553 | if let Some (ref mut _batch) = _batch { 554 | let _order = entry_order (&_entry, &_metadata, _walk_index, _flags.batch_order); 555 | _batch.push ((_entry, _metadata, _order)); 556 | } else { 557 | let _task = HasherTask { 558 | path : _entry.into_path (), 559 | metadata : _metadata, 560 | }; 561 | _enqueue.send (_task) .unwrap (); 562 | } 563 | } 564 | } 565 | 566 | if let Some (ref mut _batch) = _batch { 567 | if let Some (ref _progress) = _progress { 568 | _progress.folder.set_message ("(enqueueing...)"); 569 | _progress.folder.tick (); 570 | } 571 | _batch.sort_by_key (|&(_, _, _order)| _order); 572 | for (_entry, _metadata, _) in _batch.drain (..) { 573 | let _task = HasherTask { 574 | path : _entry.into_path (), 575 | metadata : _metadata, 576 | }; 577 | _enqueue.send (_task) .unwrap (); 578 | } 579 | } 580 | 581 | if let Some (ref _progress) = _progress { 582 | _progress.folder.set_message ("(waiting...)"); 583 | _progress.folder.tick (); 584 | } 585 | 586 | drop (_enqueue); 587 | drop (_dequeue); 588 | 589 | 590 | _done.wait (); 591 | 592 | 593 | if let Some (ref _progress) = _progress { 594 | _progress.folder.set_message ("(completed!)"); 595 | _progress.folder.tick (); 596 | _progress.folder.finish (); 597 | _progress.files.finish (); 598 | _progress.data.finish (); 599 | } 600 | 601 | 602 | let _sink = sync::Arc::try_unwrap (_sink) .ok () .expect ("[3d3636b0]"); 603 | let _sink = _sink.into_inner () .expect ("[1a198ea3]"); 604 | let mut _output_file = _sink.done () ?; 605 | 606 | if let Some ((ref _output_path, ref _output_path_tmp)) = _output_path_and_tmp { 607 | if let Err (_error) = _output_file.set_permissions (fs::Permissions::from_mode (0o400)) { 608 | eprintln! ("[ii] [abfed219] failed making read-only `{}`: {}; ignoring!", _output_path_tmp.to_string_lossy (), _error); 609 | } 610 | _output_file.sync_all () ?; 611 | fs::rename (_output_path_tmp, _output_path) ?; 612 | } 613 | drop (_output_file); 614 | 615 | 616 | for _completion in _completions.into_iter () { 617 | match _completion.join () { 618 | Ok (Ok (())) => 619 | (), 620 | Ok (Err (_error)) => 621 | _errors.push (_error), 622 | Err (_error) => 623 | _errors.push (io::Error::new (io::ErrorKind::Other, "[ee3e2b02] unexpected error")), 624 | } 625 | } 626 | 627 | { 628 | let mut _threads_errors = _threads_errors.lock () .unwrap (); 629 | while let Some (_error) = _threads_errors.pop () { 630 | _errors.push (_error); 631 | } 632 | } 633 | 634 | if _errors.is_empty () { 635 | return Ok (()); 636 | } else { 637 | return Err (io::Error::new (io::ErrorKind::Other, format! ("[32f6fc78] encountered {} errors", _errors.len ()))); 638 | } 639 | } 640 | 641 | 642 | pub fn main_0 () -> ! { 643 | if let Err (_error) = main () { 644 | eprintln! ("[!!] {}", _error); 645 | process::exit (1); 646 | } else { 647 | process::exit (0); 648 | } 649 | } 650 | 651 | 652 | 653 | 654 | fn execute_hasher (_context : HasherContext, _relative_path : Option) -> (Result<(), io::Error>) { 655 | 656 | macro_rules! message { 657 | ( $( $token : tt )+ ) => ( 658 | if let Some (ref _progress) = _context.progress { 659 | _progress.files.println (format! ( $( $token )+ )); 660 | } else { 661 | eprintln! ( $( $token )+ ); 662 | } 663 | ) 664 | } 665 | 666 | let mut _hash_buffer = Vec::with_capacity (128); 667 | 668 | loop { 669 | 670 | let _task = match _context.queue.recv () { 671 | Ok (_task) => 672 | _task, 673 | Err (crossbeam::channel::RecvError) => 674 | break, 675 | }; 676 | 677 | let _path = &_task.path; 678 | let _path_for_sink = if let Some (ref _relative_path) = _relative_path { 679 | _path.strip_prefix (_relative_path) .unwrap () .as_os_str () 680 | } else { 681 | _path.as_os_str () 682 | }; 683 | let _path_for_sink = if _path_for_sink != "" { _path_for_sink } else { ffi::OsStr::new (".") }; 684 | 685 | let mut _open = fs::OpenOptions::new (); 686 | _open.read (true); 687 | 688 | let mut _file = match _open.open (_path) { 689 | Ok (_file) => 690 | _file, 691 | Err (_error) => { 692 | let mut _sink = _context.sink.lock () .unwrap (); 693 | if _context.flags.report_errors_to_stderr { 694 | message! ("[ee] [42f1352f] failed opening file `{}`: `{}`!", _path.to_string_lossy (), _error); 695 | } 696 | if _context.flags.report_errors_to_sink { 697 | _sink.handle (_path_for_sink, _context.flags.hashes_flags.algorithm.invalid_raw) ?; 698 | _sink.flush () ?; 699 | } 700 | _context.errors.lock () .unwrap () .push (_error); 701 | if _context.flags.ignore_open_errors { 702 | continue; 703 | } else { 704 | return Ok (()); 705 | } 706 | }, 707 | }; 708 | 709 | if _context.flags.read_fadvise { 710 | let mut _failed = false; 711 | unsafe { 712 | if libc::posix_fadvise (_file.as_raw_fd (), 0, 0, libc::POSIX_FADV_SEQUENTIAL) != 0 { 713 | _failed = true; 714 | } 715 | if libc::posix_fadvise (_file.as_raw_fd (), 0, 0, libc::POSIX_FADV_NOREUSE) != 0 { 716 | _failed = true; 717 | } 718 | if libc::posix_fadvise (_file.as_raw_fd (), 0, 0, libc::POSIX_FADV_WILLNEED) != 0 { 719 | _failed = true; 720 | } 721 | } 722 | if _failed { 723 | message! ("[ww] [76280772] `fadvise` failed!") 724 | } 725 | } 726 | 727 | _hash_buffer.clear (); 728 | match digest (_context.flags.hashes_flags.algorithm, &mut _file, &mut _hash_buffer) { 729 | Ok (()) => { 730 | let mut _sink = _context.sink.lock () .unwrap (); 731 | _sink.handle (_path_for_sink, &_hash_buffer) ?; 732 | }, 733 | Err (_error) => { 734 | let mut _sink = _context.sink.lock () .unwrap (); 735 | if _context.flags.report_errors_to_stderr { 736 | message! ("[ee] [1aeb2750] failed reading file `{}`: `{}`!", _path.to_string_lossy (), _error); 737 | } 738 | if _context.flags.report_errors_to_sink { 739 | _sink.handle (_path_for_sink, _context.flags.hashes_flags.algorithm.invalid_raw) ?; 740 | _sink.flush () ?; 741 | } 742 | _context.errors.lock () .unwrap () .push (_error); 743 | if _context.flags.ignore_read_errors { 744 | continue; 745 | } else { 746 | return Ok (()); 747 | } 748 | }, 749 | } 750 | 751 | if _context.flags.read_fadvise { 752 | let mut _failed = false; 753 | unsafe { 754 | if libc::posix_fadvise (_file.as_raw_fd (), 0, 0, libc::POSIX_FADV_DONTNEED) != 0 { 755 | _failed = true; 756 | } 757 | } 758 | if _failed { 759 | message! ("[ww] [def753c5] `fadvise` failed!") 760 | } 761 | } 762 | 763 | if let Some (ref _progress) = _context.progress { 764 | _progress.files.inc (1); 765 | _progress.data.inc (_task.metadata.size ()); 766 | } 767 | } 768 | 769 | return Ok (()); 770 | } 771 | 772 | 773 | 774 | 775 | #[ derive (Copy, Clone, Eq, Ord, PartialEq, PartialOrd) ] 776 | struct DirEntryOrder (u64, u64, u64); 777 | 778 | 779 | fn entry_order (_entry : & walkdir::DirEntry, _metadata : & fs::Metadata, _index : u64, _kind : CreateBatchOrder) -> (DirEntryOrder) { 780 | match _kind { 781 | CreateBatchOrder::Index => 782 | DirEntryOrder (_index, 0, 0), 783 | CreateBatchOrder::Inode => 784 | DirEntryOrder (_metadata.ino (), 0, 0), 785 | CreateBatchOrder::InodeAndSizeBuckets => 786 | return entry_order_by_inode (_entry, _metadata, _index), 787 | CreateBatchOrder::Extent => 788 | return entry_order_by_extent (_entry, _metadata, _index), 789 | CreateBatchOrder::Random => 790 | return entry_order_by_hash (_entry, _metadata, _index), 791 | } 792 | } 793 | 794 | 795 | fn entry_order_by_inode (_entry : & walkdir::DirEntry, _metadata : & fs::Metadata, _index : u64) -> (DirEntryOrder) { 796 | 797 | let _dev = _metadata.dev (); 798 | let _inode = _metadata.ino (); 799 | let _blocks = _metadata.blocks () * 512 / _metadata.blksize (); 800 | 801 | // NOTE: First group files based on inode (regardless of device). 802 | let _order_1 = _inode / (1024 * 128); 803 | 804 | // NOTE: Then group files based on log2 actual used file-system blocks. 805 | let _order_2 = (64 - _blocks.leading_zeros ()) as u64; 806 | 807 | // NOTE: Then order files by inode and then based on device. 808 | // (This doesn't perfectly distributes files from different devices, but we try...) 809 | let _order_3 = (_inode % (1024 * 128) << 32) | ((_dev >> 32) ^ (_dev & 0xffffffff)); 810 | 811 | DirEntryOrder (_order_1, _order_2, _order_3) 812 | } 813 | 814 | 815 | fn entry_order_by_hash (_entry : & walkdir::DirEntry, _metadata : & fs::Metadata, _index : u64) -> (DirEntryOrder) { 816 | #[ allow (deprecated) ] 817 | let mut _hasher = hash::SipHasher::new (); 818 | _hasher.write_u64 (_metadata.dev ()); 819 | _hasher.write_u64 (_metadata.ino ()); 820 | _hasher.write_u64 (_metadata.size ()); 821 | let _order = _hasher.finish (); 822 | DirEntryOrder (_order, 0, 0) 823 | } 824 | 825 | 826 | 827 | 828 | #[ allow (dead_code) ] 829 | fn entry_order_by_extent (_entry : & walkdir::DirEntry, _metadata : & fs::Metadata, _index : u64) -> (DirEntryOrder) { 830 | 831 | 832 | // NOTE: See also: https://www.kernel.org/doc/Documentation/filesystems/fiemap.txt 833 | // NOTE: Inspired by: https://github.com/lilydjwg/fiemap-rs/blob/master/fiemap/src/lib.rs 834 | 835 | 836 | #[ repr (C) ] 837 | #[ derive (Default) ] 838 | struct fiemap { 839 | fm_start: u64, 840 | fm_length: u64, 841 | fm_flags: u32, 842 | fm_mapped_extents: u32, 843 | fm_extent_count: u32, 844 | fm_reserved: u32, 845 | fm_extents: [fiemap_extent; 1], 846 | } 847 | 848 | #[repr (C) ] 849 | #[ derive (Default) ] 850 | struct fiemap_extent { 851 | fe_logical: u64, 852 | fe_physical: u64, 853 | fe_length: u64, 854 | fe_reserved64: [u64; 2], 855 | fe_flags: u32, 856 | fe_reserved: [u32; 3], 857 | } 858 | 859 | const FS_IOC_FIEMAP : libc::c_ulong = 0xC020660B; 860 | 861 | const FIEMAP_FLAG_SYNC : u32 = 0x00000001; 862 | const FIEMAP_FLAG_XATTR : u32 = 0x00000002; 863 | const FIEMAP_FLAG_CACHE : u32 = 0x00000004; 864 | 865 | const FIEMAP_EXTENT_LAST : u32 = 0x00000001; 866 | const FIEMAP_EXTENT_UNKNOWN : u32 = 0x00000002; 867 | const FIEMAP_EXTENT_DELALLOC : u32 = 0x00000004; 868 | const FIEMAP_EXTENT_ENCODED : u32 = 0x00000008; 869 | const FIEMAP_EXTENT_DATA_ENCRYPTED : u32 = 0x00000080; 870 | const FIEMAP_EXTENT_NOT_ALIGNED : u32 = 0x00000100; 871 | const FIEMAP_EXTENT_DATA_INLINE : u32 = 0x00000200; 872 | const FIEMAP_EXTENT_DATA_TAIL : u32 = 0x00000400; 873 | const FIEMAP_EXTENT_UNWRITTEN : u32 = 0x00000800; 874 | const FIEMAP_EXTENT_MERGED : u32 = 0x00001000; 875 | const FIEMAP_EXTENT_SHARED : u32 = 0x00002000; 876 | 877 | 878 | let mut _fiemap : fiemap = Default::default (); 879 | _fiemap.fm_length = 1; 880 | _fiemap.fm_extent_count = 1; 881 | 882 | let _path = ffi::CString::new (_entry.path () .as_os_str () .as_bytes ()) .unwrap (); 883 | 884 | let _succeeded = unsafe { 885 | let mut _succeeded = true; 886 | let _file = libc::open (_path.as_ptr (), libc::O_RDONLY | libc::O_NOFOLLOW); 887 | if _file < 0 { 888 | _succeeded = false; 889 | } 890 | if _succeeded { 891 | _succeeded = libc::ioctl (_file, FS_IOC_FIEMAP, &mut _fiemap as *mut _) == 0; 892 | } 893 | if _file >= 0 { 894 | _succeeded = libc::close (_file) == 0; 895 | } 896 | _succeeded 897 | }; 898 | 899 | if !_succeeded { 900 | DirEntryOrder (0, _metadata.ino (), 0) 901 | } else if _fiemap.fm_mapped_extents == 1 { 902 | if (_fiemap.fm_extents[0].fe_flags & FIEMAP_EXTENT_UNKNOWN) == 0 { 903 | let _block = _fiemap.fm_extents[0].fe_physical; 904 | DirEntryOrder (3 + _block, 0, 0) 905 | } else { 906 | DirEntryOrder (2, _metadata.ino (), 0) 907 | } 908 | } else { 909 | DirEntryOrder (1, _metadata.ino (), 0) 910 | } 911 | } 912 | 913 | -------------------------------------------------------------------------------- /sources/lib/main_diff.rs: -------------------------------------------------------------------------------- 1 | 2 | 3 | use ::argparse; 4 | use ::regex; 5 | 6 | use crate::core::*; 7 | use crate::flags::*; 8 | use crate::hashes::*; 9 | use crate::prelude::*; 10 | 11 | #[ cfg (feature = "profile") ] 12 | use ::cpuprofiler::PROFILER as profiler; 13 | 14 | 15 | 16 | 17 | struct Source { 18 | path : path::PathBuf, 19 | records : Vec, 20 | } 21 | 22 | struct SourceRecord { 23 | hash : HashKey, 24 | path : PathKey, 25 | #[ allow (dead_code) ] 26 | line : usize, 27 | } 28 | 29 | struct SourceIndex <'a> { 30 | by_hash : HashMap>, 31 | by_path : HashMap>, 32 | } 33 | 34 | struct SourceStatistics { 35 | records : usize, 36 | distinct_hashes : usize, 37 | unique_hashes : usize, 38 | duplicate_hashes : usize, 39 | unique_files : usize, 40 | duplicate_files : usize, 41 | empty_files : usize, 42 | invalid_files : usize, 43 | distinct_paths : usize, 44 | unique_paths : usize, 45 | duplicate_paths : usize, 46 | } 47 | 48 | 49 | struct Diff { 50 | hashes : Vec, 51 | paths : Vec, 52 | by_hash : HashMap>, 53 | by_path : HashMap>, 54 | by_hash_statistics : DiffStatistics, 55 | by_path_statistics : DiffStatistics, 56 | } 57 | 58 | enum DiffEntry { 59 | UniqueLeft (Vec), 60 | UniqueRight (Vec), 61 | Matching (Vec, Vec), 62 | Conflicting (Vec, Vec), 63 | } 64 | 65 | struct DiffStatistics { 66 | distinct : usize, 67 | matching : usize, 68 | conflicting : usize, 69 | unique_left : usize, 70 | unique_right : usize, 71 | } 72 | 73 | 74 | struct Tokens { 75 | hashes : Vec>, 76 | hashes_index : HashMap, HashKey>, 77 | hashes_order : Vec, 78 | paths : Vec>, 79 | paths_index : HashMap, PathKey>, 80 | paths_order : Vec, 81 | hash_key_empty : HashKey, 82 | hash_key_invalid : HashKey, 83 | } 84 | 85 | type HashKey = usize; 86 | type PathKey = usize; 87 | type TokenOrder = usize; 88 | 89 | 90 | 91 | 92 | pub fn main () -> (Result<(), io::Error>) { 93 | 94 | 95 | let mut _hashes_flags = HashesFlags { 96 | algorithm : &MD5, 97 | }; 98 | 99 | let mut _format_flags = HashesFormatFlags { 100 | zero : false, 101 | path : true, 102 | }; 103 | 104 | let mut _compression_flags = CompressionFlags { 105 | algorithm : CompressionAlgorithm::None, 106 | }; 107 | 108 | let mut _path_left = path::PathBuf::from (""); 109 | let mut _path_right = path::PathBuf::from (""); 110 | 111 | 112 | { 113 | let mut _parser = argparse::ArgumentParser::new (); 114 | _hashes_flags.argparse (&mut _parser); 115 | _format_flags.argparse (&mut _parser); 116 | _compression_flags.argparse (&mut _parser); 117 | _parser.refer (&mut _path_left) .add_argument ("dataset-a", argparse::Parse, "source file for dataset A") .required (); 118 | _parser.refer (&mut _path_right) .add_argument ("dataset-b", argparse::Parse, "source file for dataset B") .required (); 119 | _parser.parse_args_or_exit (); 120 | } 121 | 122 | if !_format_flags.path { 123 | return Err (io::Error::new (io::ErrorKind::Other, "[d9b3891e] paths are mandatory")); 124 | } 125 | 126 | 127 | #[ cfg (feature = "profile") ] 128 | profiler.lock () .unwrap () .start ("./target/md5-diff.profile") .unwrap (); 129 | 130 | 131 | if verbose { eprintln! ("[ii] [42c3ae70] loading..."); } 132 | let mut _tokens = Tokens::new (_hashes_flags.algorithm.empty, _hashes_flags.algorithm.invalid); 133 | let _record_pattern = regex::bytes::Regex::new (_hashes_flags.algorithm.pattern) .unwrap (); 134 | let _source_left = load (_path_left.as_ref (), &mut _tokens, &_record_pattern, _format_flags.zero, _compression_flags.algorithm) ?; 135 | let _source_right = load (_path_right.as_ref (), &mut _tokens, &_record_pattern, _format_flags.zero, _compression_flags.algorithm) ?; 136 | _tokens.sort (); 137 | 138 | if verbose { eprintln! ("[ii] [42c3ae70] indexing..."); } 139 | let (_index_left, _statistics_left) = index (&_source_left, &_tokens); 140 | let (_index_right, _statistics_right) = index (&_source_right, &_tokens); 141 | 142 | if verbose { eprintln! ("[ii] [b89979a2] diffing..."); } 143 | let _diff = diff (&_source_left, &_index_left, &_source_right, &_index_right, &_tokens); 144 | 145 | if verbose { eprintln! ("[ii] [92d696c3] reporting statistics..."); } 146 | report_diff_statistics ('A', 'B', &_diff); 147 | report_source_statistics ('A', &_source_left, &_statistics_left); 148 | report_source_statistics ('B', &_source_right, &_statistics_right); 149 | 150 | if verbose { eprintln! ("[ii] [eedb34f8] reporting details..."); } 151 | report_diff_entries ('A', 'B', &_diff, &_tokens); 152 | 153 | 154 | #[ cfg (feature = "profile") ] 155 | profiler.lock () .unwrap () .stop () .unwrap (); 156 | 157 | 158 | // NOTE: We explicitly exit, so that destructors are not called... 159 | process::exit (0); 160 | } 161 | 162 | 163 | pub fn main_0 () -> ! { 164 | if let Err (_error) = main () { 165 | eprintln! ("[!!] {}", _error); 166 | process::exit (1); 167 | } else { 168 | process::exit (0); 169 | } 170 | } 171 | 172 | 173 | 174 | 175 | fn report_source_statistics (_tag : char, _source : & Source, _statistics : & SourceStatistics) -> () { 176 | 177 | println! (); 178 | println! ("## Dataset ({}) statistics", _tag); 179 | println! ("## * records : {:8}", _statistics.records); 180 | if _statistics.duplicate_paths != 0 { 181 | println! ("## * paths !!!!!!!!"); 182 | println! ("## * distinct paths : {:8}", _statistics.distinct_paths); 183 | println! ("## * unique paths : {:8}", _statistics.unique_paths); 184 | println! ("## * duplicate paths : {:8}", _statistics.unique_paths); 185 | } 186 | println! ("## * hashes"); 187 | println! ("## * distinct hashes : {:8}", _statistics.distinct_hashes); 188 | println! ("## * unique hashes : {:8}", _statistics.unique_hashes); 189 | println! ("## * duplicate hashes : {:8}", _statistics.duplicate_hashes); 190 | println! ("## * files"); 191 | println! ("## * unique files : {:8}", _statistics.unique_files); 192 | println! ("## * duplicate files : {:8}", _statistics.duplicate_files); 193 | println! ("## * empty files : {:8}", _statistics.empty_files); 194 | println! ("## * invalid files : {:8}", _statistics.invalid_files); 195 | println! ("## * source: `{}`", _source.path.display ()); 196 | } 197 | 198 | 199 | fn report_diff_statistics (_tag_left : char, _tag_right : char, _diff : & Diff) -> () { 200 | 201 | println! (); 202 | println! ("## Diff statistics ({}) vs ({})", _tag_left, _tag_right); 203 | println! ("## * hashes"); 204 | println! ("## * distinct hashes : {:8}", _diff.by_hash_statistics.distinct); 205 | println! ("## * unique hashes in ({}) : {:8}", _tag_left, _diff.by_hash_statistics.unique_left); 206 | println! ("## * unique hashes in ({}) : {:8}", _tag_right, _diff.by_hash_statistics.unique_right); 207 | println! ("## * common hashes : {:8}", _diff.by_hash_statistics.matching + _diff.by_hash_statistics.conflicting); 208 | println! ("## * matching paths : {:8}", _diff.by_hash_statistics.matching); 209 | println! ("## * conflicting paths : {:8}", _diff.by_hash_statistics.conflicting); 210 | println! ("## * paths"); 211 | println! ("## * distinct paths : {:8}", _diff.by_path_statistics.distinct); 212 | println! ("## * unique paths in ({}) : {:8}", _tag_left, _diff.by_path_statistics.unique_left); 213 | println! ("## * unique paths in ({}) : {:8}", _tag_right, _diff.by_path_statistics.unique_right); 214 | println! ("## * common paths : {:8}", _diff.by_path_statistics.matching + _diff.by_path_statistics.conflicting); 215 | println! ("## * matching hashes : {:8}", _diff.by_path_statistics.matching); 216 | println! ("## * conflicting hashes : {:8}", _diff.by_path_statistics.conflicting); 217 | } 218 | 219 | 220 | fn report_diff_entries (_tag_left : char, _tag_right : char, _diff : & Diff, _tokens : & Tokens) -> () { 221 | 222 | let mut _unique_hashes_left : Vec<(char, char, PathKey, HashKey)> = Vec::new (); 223 | let mut _unique_hashes_right : Vec<(char, char, PathKey, HashKey)> = Vec::new (); 224 | let mut _conflicting_paths : Vec<(char, char, PathKey, HashKey)> = Vec::new (); 225 | let mut _renamed_hashes : Vec<(char, char, PathKey, HashKey)> = Vec::new (); 226 | 227 | for &_hash in _diff.hashes.iter () { 228 | if (_hash == _tokens.hash_key_empty) || (_hash == _tokens.hash_key_invalid) { 229 | continue; 230 | } 231 | match _diff.by_hash.get (&_hash) .unwrap () { 232 | DiffEntry::UniqueLeft (_paths) => 233 | for &_path in _paths.iter () { 234 | _unique_hashes_left.push (('+', _tag_left, _path, _hash)) 235 | }, 236 | DiffEntry::UniqueRight (_paths) => 237 | for &_path in _paths.iter () { 238 | _unique_hashes_right.push (('+', _tag_right, _path, _hash)) 239 | }, 240 | DiffEntry::Conflicting (_paths_left, _paths_right) => { 241 | for &_path in _paths_left.iter () { 242 | _renamed_hashes.push (('~', _tag_left, _path, _hash)) 243 | } 244 | for &_path in _paths_right.iter () { 245 | _renamed_hashes.push (('~', _tag_right, _path, _hash)) 246 | } 247 | }, 248 | _ => (), 249 | } 250 | } 251 | 252 | for &_path in _diff.paths.iter () { 253 | match _diff.by_path.get (&_path) .unwrap () { 254 | DiffEntry::Conflicting (_hashes_left, _hashes_right) => { 255 | for &_hash in _hashes_left.iter () { 256 | _conflicting_paths.push (('!', _tag_left, _path, _hash)) 257 | } 258 | for &_hash in _hashes_right.iter () { 259 | _conflicting_paths.push (('!', _tag_right, _path, _hash)) 260 | } 261 | }, 262 | _ => (), 263 | } 264 | } 265 | 266 | fn print_pairs (_pairs : &mut Vec<(char, char, PathKey, HashKey)>, _tokens : & Tokens, _sort_by_path : bool) -> () { 267 | println! (); 268 | if _sort_by_path { 269 | _pairs.sort_unstable_by_key (|_x| (_tokens.order_of_path (_x.2), _x.1, _tokens.order_of_hash (_x.3), _x.0)); 270 | } else { 271 | _pairs.sort_unstable_by_key (|_x| (_tokens.order_of_hash (_x.3), _tokens.order_of_path (_x.2), _x.1, _x.0)); 272 | } 273 | for &(_slug, _tag, _path, _hash) in _pairs.iter () { 274 | println! ("{}{} {} {}", _slug, _tag, _tokens.select_hash (_hash), _tokens.select_path (_path).to_string_lossy ()); 275 | } 276 | println! (); 277 | } 278 | 279 | if ! _unique_hashes_left.is_empty () { 280 | println! (); 281 | println! ("#### Hashes unique in ({}) :: {}", _tag_left, _diff.by_hash_statistics.unique_left); 282 | print_pairs (&mut _unique_hashes_left, _tokens, true); 283 | } 284 | 285 | if ! _unique_hashes_right.is_empty () { 286 | println! (); 287 | println! ("#### Hashes unique in ({}) :: {}", _tag_right, _diff.by_hash_statistics.unique_right); 288 | print_pairs (&mut _unique_hashes_right, _tokens, true); 289 | } 290 | 291 | if ! _conflicting_paths.is_empty () { 292 | println! (); 293 | println! ("#### Paths conflicting in ({}) and ({}) :: {}", _tag_left, _tag_right, _diff.by_path_statistics.conflicting); 294 | print_pairs (&mut _conflicting_paths, _tokens, true); 295 | } 296 | 297 | if ! _renamed_hashes.is_empty () { 298 | println! (); 299 | println! ("#### Files re-organized in ({}) and ({}) :: {} (hashes)", _tag_left, _tag_right, _diff.by_hash_statistics.conflicting); 300 | print_pairs (&mut _renamed_hashes, _tokens, false); 301 | } 302 | } 303 | 304 | 305 | 306 | 307 | fn load (_path : & path::Path, _tokens : &mut Tokens, _pattern : & regex::bytes::Regex, _zero : bool, _decompressor : CompressionAlgorithm) -> (Result) { 308 | 309 | let mut _file = fs::File::open (_path) ?; 310 | 311 | if _decompressor != CompressionAlgorithm::None { 312 | 313 | let mut _filter = match _decompressor { 314 | CompressionAlgorithm::Gzip => { 315 | let mut _filter = process::Command::new ("gzip"); 316 | _filter.arg ("-d"); 317 | _filter 318 | }, 319 | CompressionAlgorithm::Bzip2 => { 320 | let mut _filter = process::Command::new ("bzip2"); 321 | _filter.arg ("-d"); 322 | _filter 323 | }, 324 | CompressionAlgorithm::Lzip => { 325 | let mut _filter = process::Command::new ("lzip"); 326 | _filter.arg ("-d"); 327 | _filter 328 | }, 329 | CompressionAlgorithm::Xz => { 330 | let mut _filter = process::Command::new ("xz"); 331 | _filter.arg ("-d"); 332 | _filter 333 | }, 334 | CompressionAlgorithm::Lzma => { 335 | let mut _filter = process::Command::new ("lzma"); 336 | _filter.arg ("-d"); 337 | _filter 338 | }, 339 | CompressionAlgorithm::Lz4 => { 340 | let mut _filter = process::Command::new ("lz4"); 341 | _filter.arg ("-d"); 342 | _filter 343 | }, 344 | CompressionAlgorithm::Lzo => { 345 | let mut _filter = process::Command::new ("lzop"); 346 | _filter.arg ("-d"); 347 | _filter 348 | }, 349 | CompressionAlgorithm::Zstd => { 350 | let mut _filter = process::Command::new ("zstd"); 351 | _filter.arg ("-d"); 352 | _filter 353 | }, 354 | CompressionAlgorithm::None => 355 | unreachable! ("[9c7ca4b5]"), 356 | }; 357 | _filter.stdin (process::Stdio::from (_file)); 358 | _filter.stdout (process::Stdio::piped ()); 359 | _filter.stderr (process::Stdio::inherit ()); 360 | 361 | let mut _filter = _filter.spawn () ?; 362 | let mut _stream = _filter.stdout.as_mut () .unwrap (); 363 | 364 | let _outcome = load_from_stream (_stream, _path, _tokens, _pattern, _zero); 365 | 366 | if _outcome.is_err () { 367 | _filter.kill () ?; 368 | } 369 | let _exit = _filter.wait () ?; 370 | if _outcome.is_ok () && ! _exit.success () { 371 | return Err (io::Error::new (io::ErrorKind::Other, "[7fadf032] filter failed")); 372 | } 373 | 374 | return _outcome; 375 | 376 | } else { 377 | 378 | return load_from_stream (&mut _file, _path, _tokens, _pattern, _zero); 379 | } 380 | } 381 | 382 | 383 | fn load_from_stream (_stream : &mut Stream, _path : & path::Path, _tokens : &mut Tokens, _pattern : & regex::bytes::Regex, _zero : bool) -> (Result) { 384 | 385 | let mut _stream = io::BufReader::with_capacity (16 * 1024 * 1024, _stream); 386 | 387 | let mut _records = Vec::with_capacity (128 * 1024); 388 | 389 | { 390 | let _delimiter = if _zero { b'\0' } else { b'\n' }; 391 | let mut _buffer = Vec::with_capacity (8 * 1024); 392 | let mut _line : usize = 0; 393 | 394 | loop { 395 | 396 | _line += 1; 397 | _buffer.clear (); 398 | _stream.read_until (_delimiter, &mut _buffer) ?; 399 | 400 | match _buffer.pop () { 401 | Some (_byte) if _byte == _delimiter => (), 402 | Some (_byte) => _buffer.push (_byte), 403 | None => break, 404 | } 405 | 406 | if _buffer.is_empty () { 407 | continue; 408 | } 409 | 410 | if _pattern.is_match (&_buffer) { 411 | 412 | let _split = _buffer.iter () .position (|&_byte| _byte == b' ') .unwrap (); 413 | 414 | let _hash = &_buffer[.. _split]; 415 | let mut _path = &_buffer[_split + 2 ..]; 416 | 417 | if (_path.len () >= 2) && &_path[0..2] == b"./" { 418 | _path = &_path[2..]; 419 | } 420 | if (_path.len () >= 1) && &_path[0..1] == b"/" { 421 | _path = &_path[1..]; 422 | } 423 | if _path.is_empty () { 424 | _path = b"."; 425 | } 426 | 427 | let _hash = str::from_utf8 (_hash) .unwrap (); 428 | let _path = ffi::OsStr::from_bytes (_path); 429 | 430 | let _hash = _tokens.include_hash (_hash); 431 | let _path = _tokens.include_path (_path); 432 | 433 | let _record = SourceRecord { 434 | hash : _hash, 435 | path : _path, 436 | line : _line, 437 | }; 438 | 439 | _records.push (_record); 440 | 441 | } else { 442 | 443 | if verbose { eprintln! ("[ee] [d8bd4da9] @{} {:?}", _line, ffi::OsStr::from_bytes (&_buffer)); } 444 | return Err (io::Error::new (io::ErrorKind::Other, "[1bd51464] invalid record line syntax")); 445 | } 446 | } 447 | } 448 | 449 | let _source = Source { 450 | path : _path.into (), 451 | records : _records, 452 | }; 453 | 454 | return Ok (_source); 455 | } 456 | 457 | 458 | 459 | 460 | fn index <'a> (_source : &'a Source, _tokens : &'a Tokens) -> (SourceIndex<'a>, SourceStatistics) { 461 | 462 | let _records = &_source.records; 463 | 464 | let mut _index_by_hash : HashMap> = HashMap::with_capacity (_records.len ()); 465 | let mut _index_by_path : HashMap> = HashMap::with_capacity (_records.len ()); 466 | 467 | let mut _records_count = 0; 468 | for (_index, _record) in _records.iter () .enumerate () { 469 | _index_by_hash.entry (_record.hash) .or_default () .push (_record); 470 | _index_by_path.entry (_record.path) .or_default () .push (_record); 471 | _records_count += 1; 472 | } 473 | 474 | let mut _distinct_hashes = 0; 475 | let mut _unique_hashes = 0; 476 | let mut _duplicate_hashes = 0; 477 | let mut _unique_files = 0; 478 | let mut _duplicate_files = 0; 479 | let mut _empty_files = 0; 480 | let mut _invalid_files = 0; 481 | for (&_hash, _records) in _index_by_hash.iter () { 482 | _distinct_hashes += 1; 483 | if _records.len () == 1 { 484 | _unique_hashes += 1; 485 | } else { 486 | _duplicate_hashes += 1; 487 | } 488 | if _hash == _tokens.hash_key_empty { 489 | _empty_files += _records.len (); 490 | } else if _hash == _tokens.hash_key_invalid { 491 | _invalid_files += _records.len (); 492 | } else if _records.len () == 1 { 493 | _unique_files += 1; 494 | } else { 495 | _duplicate_files += _records.len (); 496 | } 497 | } 498 | 499 | let mut _distinct_paths = 0; 500 | let mut _unique_paths = 0; 501 | let mut _duplicate_paths = 0; 502 | for _records in _index_by_path.values () { 503 | _distinct_paths += 1; 504 | if _records.len () == 1 { 505 | _unique_paths += 1; 506 | } else { 507 | _duplicate_paths += 1; 508 | } 509 | } 510 | 511 | let _index = SourceIndex { 512 | by_hash : _index_by_hash, 513 | by_path : _index_by_path, 514 | }; 515 | 516 | let _statistics = SourceStatistics { 517 | records : _records_count, 518 | distinct_hashes : _distinct_hashes, 519 | unique_hashes : _unique_hashes, 520 | duplicate_hashes : _duplicate_hashes, 521 | unique_files : _unique_files, 522 | duplicate_files : _duplicate_files, 523 | empty_files : _empty_files, 524 | invalid_files : _invalid_files, 525 | distinct_paths : _distinct_paths, 526 | unique_paths : _unique_paths, 527 | duplicate_paths : _duplicate_paths, 528 | }; 529 | 530 | return (_index, _statistics); 531 | } 532 | 533 | 534 | 535 | 536 | fn diff (_source_left : & Source, _index_left : & SourceIndex, _source_right : & Source, _index_right : & SourceIndex, _tokens : & Tokens) -> (Diff) { 537 | 538 | let mut _hashes = Vec::with_capacity (cmp::max (_index_left.by_hash.len (), _index_right.by_hash.len ()) * 3 / 2); 539 | let mut _paths = Vec::with_capacity (cmp::max (_index_left.by_path.len (), _index_right.by_path.len ()) * 3 / 2); 540 | 541 | _hashes.extend (_index_left.by_hash.keys () .cloned ()); 542 | _paths.extend (_index_left.by_path.keys () .cloned ()); 543 | 544 | _hashes.extend (_index_right.by_hash.keys () .cloned ()); 545 | _paths.extend (_index_right.by_path.keys () .cloned ()); 546 | 547 | _hashes.sort_unstable_by_key (|&_x| _tokens.order_of_hash (_x)); 548 | _paths.sort_unstable_by_key (|&_x| _tokens.order_of_path (_x)); 549 | 550 | _hashes.dedup (); 551 | _paths.dedup (); 552 | 553 | let mut _diff_by_hash = HashMap::with_capacity (_hashes.len ()); 554 | let mut _diff_by_path = HashMap::with_capacity (_paths.len ()); 555 | 556 | 557 | let mut _distinct_hashes = 0; 558 | let mut _unique_hashes_left = 0; 559 | let mut _unique_hashes_right = 0; 560 | let mut _matching_hashes = 0; 561 | let mut _conflicting_hashes = 0; 562 | 563 | for &_hash in _hashes.iter () { 564 | 565 | let _records_left = _index_left.by_hash.get (&_hash) 566 | .map (|_records| _records.iter () .map (|_record| _record.path) .collect::> ()) 567 | .map (|mut _values| { _values.sort_unstable_by_key (|&_x| _tokens.order_of_path (_x)); _values }); 568 | 569 | let _records_right = _index_right.by_hash.get (&_hash) 570 | .map (|_records| _records.iter () .map (|_record| _record.path) .collect::> ()) 571 | .map (|mut _values| { _values.sort_unstable_by_key (|&_x| _tokens.order_of_path (_x)); _values }); 572 | 573 | let _entry = match (_records_left, _records_right) { 574 | (Some (_records_left), Some (_records_right)) => 575 | if _records_left == _records_right { 576 | _matching_hashes += 1; 577 | DiffEntry::Matching (_records_left, _records_right) 578 | } else { 579 | _conflicting_hashes += 1; 580 | DiffEntry::Conflicting (_records_left, _records_right) 581 | }, 582 | (Some (_records_left), None) => { 583 | _unique_hashes_left += 1; 584 | DiffEntry::UniqueLeft (_records_left) 585 | }, 586 | (None, Some (_records_right)) => { 587 | _unique_hashes_right += 1; 588 | DiffEntry::UniqueRight (_records_right) 589 | }, 590 | (None, None) => 591 | unreachable! ("[6deb2aea]"), 592 | }; 593 | 594 | _diff_by_hash.insert (_hash, _entry); 595 | _distinct_hashes += 1; 596 | } 597 | 598 | 599 | let mut _distinct_paths = 0; 600 | let mut _unique_paths_left = 0; 601 | let mut _unique_paths_right = 0; 602 | let mut _matching_paths = 0; 603 | let mut _conflicting_paths = 0; 604 | 605 | for &_path in _paths.iter () { 606 | 607 | let _records_left = _index_left.by_path.get (&_path) 608 | .map (|_records| _records.iter () .map (|_record| _record.hash) .collect::> ()) 609 | .map (|mut _values| { _values.sort_unstable_by_key (|&_x| _tokens.order_of_hash (_x)); _values }); 610 | 611 | let _records_right = _index_right.by_path.get (&_path) 612 | .map (|_records| _records.iter () .map (|_record| _record.hash) .collect::> ()) 613 | .map (|mut _values| { _values.sort_unstable_by_key (|&_x| _tokens.order_of_hash (_x)); _values }); 614 | 615 | let _entry = match (_records_left, _records_right) { 616 | (Some (_records_left), Some (_records_right)) => 617 | if _records_left == _records_right { 618 | _matching_paths += 1; 619 | DiffEntry::Matching (_records_left, _records_right) 620 | } else { 621 | _conflicting_paths += 1; 622 | DiffEntry::Conflicting (_records_left, _records_right) 623 | }, 624 | (Some (_records_left), None) => { 625 | _unique_paths_left += 1; 626 | DiffEntry::UniqueLeft (_records_left) 627 | }, 628 | (None, Some (_records_right)) => { 629 | _unique_paths_right += 1; 630 | DiffEntry::UniqueRight (_records_right) 631 | }, 632 | (None, None) => 633 | unreachable! ("[6deb2aea]"), 634 | }; 635 | 636 | _diff_by_path.insert (_path, _entry); 637 | _distinct_paths += 1; 638 | } 639 | 640 | let _diff = Diff { 641 | hashes : _hashes, 642 | paths : _paths, 643 | by_hash : _diff_by_hash, 644 | by_path : _diff_by_path, 645 | by_hash_statistics : DiffStatistics { 646 | distinct : _distinct_hashes, 647 | matching : _matching_hashes, 648 | conflicting : _conflicting_hashes, 649 | unique_left : _unique_hashes_left, 650 | unique_right : _unique_hashes_right, 651 | }, 652 | by_path_statistics : DiffStatistics { 653 | distinct : _distinct_paths, 654 | matching : _matching_paths, 655 | conflicting : _conflicting_paths, 656 | unique_left : _unique_paths_left, 657 | unique_right : _unique_paths_right, 658 | }, 659 | }; 660 | 661 | return _diff; 662 | } 663 | 664 | 665 | 666 | 667 | impl Tokens { 668 | 669 | fn new (_hash_for_empty : & HashValueRef, _hash_for_invalid : & HashValueRef) -> (Self) { 670 | let _size = 512 * 1024; 671 | let mut _tokens = Tokens { 672 | hashes : Vec::with_capacity (_size), 673 | hashes_index : HashMap::with_capacity (_size), 674 | hashes_order : Vec::with_capacity (_size), 675 | paths : Vec::with_capacity (_size), 676 | paths_index : HashMap::with_capacity (_size), 677 | paths_order : Vec::with_capacity (_size), 678 | hash_key_empty : 0, 679 | hash_key_invalid : 0, 680 | }; 681 | _tokens.hash_key_empty = _tokens.include_hash (_hash_for_empty); 682 | _tokens.hash_key_invalid = _tokens.include_hash (_hash_for_invalid); 683 | return _tokens; 684 | } 685 | 686 | fn include_hash (&mut self, _token : &HashValueRef) -> (HashKey) { 687 | let _token = HashValue::from (_token); 688 | if let Some (&_key) = self.hashes_index.get (&_token) { 689 | return _key; 690 | } else { 691 | let _token = Rc::new (_token); 692 | let _key = self.hashes.len (); 693 | self.hashes.push (Rc::clone (&_token)); 694 | self.hashes_index.insert (Rc::clone (&_token), _key); 695 | return _key; 696 | } 697 | } 698 | 699 | fn include_path (&mut self, _token : &PathValueRef) -> (HashKey) { 700 | let _token = PathValue::from (_token); 701 | if let Some (&_key) = self.paths_index.get (&_token) { 702 | return _key; 703 | } else { 704 | let _token = Rc::new (_token); 705 | let _key = self.paths.len (); 706 | self.paths.push (Rc::clone (&_token)); 707 | self.paths_index.insert (Rc::clone (&_token), _key); 708 | return _key; 709 | } 710 | } 711 | 712 | fn select_hash (& self, _key : HashKey) -> (&HashValueRef) { 713 | return self.hashes.get (_key) .unwrap () .as_ref (); 714 | } 715 | 716 | fn select_path (& self, _key : PathKey) -> (&PathValueRef) { 717 | return self.paths.get (_key) .unwrap () .as_ref (); 718 | } 719 | 720 | fn order_of_hash (& self, _key : HashKey) -> (TokenOrder) { 721 | return self.hashes_order[_key]; 722 | } 723 | 724 | fn order_of_path (& self, _key : PathKey) -> (TokenOrder) { 725 | return self.paths_order[_key]; 726 | } 727 | 728 | fn sort (&mut self) -> () { 729 | 730 | let mut _hashes = self.hashes.iter () .map (|_token| Rc::as_ref (_token)) .collect::> (); 731 | let mut _paths = self.paths.iter () .map (|_token| Rc::as_ref (_token)) .collect::> (); 732 | 733 | let mut _hashes_order = Vec::new (); 734 | let mut _paths_order = Vec::new (); 735 | 736 | _hashes_order.resize (_hashes.len (), 0); 737 | _paths_order.resize (_paths.len (), 0); 738 | 739 | _hashes.sort_unstable (); 740 | _paths.sort_unstable (); 741 | 742 | for (_order, &_token) in _hashes.iter () .enumerate () { 743 | let &_key = self.hashes_index.get (_token) .unwrap (); 744 | _hashes_order[_key] = _order; 745 | } 746 | 747 | for (_order, &_token) in _paths.iter () .enumerate () { 748 | let &_key = self.paths_index.get (_token) .unwrap (); 749 | _paths_order[_key] = _order; 750 | } 751 | 752 | self.hashes_order = _hashes_order; 753 | self.paths_order = _paths_order; 754 | } 755 | } 756 | 757 | 758 | #[ allow (non_upper_case_globals) ] 759 | static verbose : bool = false; 760 | 761 | -------------------------------------------------------------------------------- /sources/lib/prelude.rs: -------------------------------------------------------------------------------- 1 | 2 | 3 | #![allow (unused_imports) ] 4 | 5 | 6 | pub(crate) use ::std::clone; 7 | pub(crate) use ::std::cmp; 8 | pub(crate) use ::std::env; 9 | pub(crate) use ::std::default; 10 | pub(crate) use ::std::ffi; 11 | pub(crate) use ::std::fs; 12 | pub(crate) use ::std::hash; 13 | pub(crate) use ::std::io; 14 | pub(crate) use ::std::path; 15 | pub(crate) use ::std::process; 16 | pub(crate) use ::std::str; 17 | pub(crate) use ::std::sync; 18 | pub(crate) use ::std::thread; 19 | 20 | 21 | pub(crate) use ::std::option::{Option, Option::Some, Option::None}; 22 | pub(crate) use ::std::result::{Result, Result::Ok, Result::Err}; 23 | 24 | 25 | pub(crate) use ::std::default::Default; 26 | pub(crate) use ::std::mem::drop; 27 | pub(crate) use ::std::borrow::Cow; 28 | pub(crate) use ::std::rc::Rc; 29 | pub(crate) use ::std::vec::Vec; 30 | pub(crate) use ::std::string::String; 31 | pub(crate) use ::std::collections::HashMap; 32 | 33 | 34 | pub(crate) use ::std::eprintln; 35 | pub(crate) use ::std::format; 36 | pub(crate) use ::std::panic; 37 | pub(crate) use ::std::println; 38 | pub(crate) use ::std::unreachable; 39 | 40 | 41 | pub(crate) use ::std::borrow::Borrow as _; 42 | pub(crate) use ::std::clone::Clone as _; 43 | pub(crate) use ::std::convert::AsRef as _; 44 | pub(crate) use ::std::convert::From as _; 45 | pub(crate) use ::std::convert::Into as _; 46 | pub(crate) use ::std::hash::Hasher as _; 47 | pub(crate) use ::std::io::BufRead as _; 48 | pub(crate) use ::std::io::Read as _; 49 | pub(crate) use ::std::io::Write as _; 50 | pub(crate) use ::std::iter::ExactSizeIterator as _; 51 | pub(crate) use ::std::iter::Extend as _; 52 | pub(crate) use ::std::iter::IntoIterator as _; 53 | pub(crate) use ::std::iter::Iterator as _; 54 | pub(crate) use ::std::os::unix::ffi::OsStrExt as _; 55 | pub(crate) use ::std::os::unix::fs::MetadataExt as _; 56 | pub(crate) use ::std::os::unix::fs::OpenOptionsExt as _; 57 | pub(crate) use ::std::os::unix::fs::PermissionsExt as _; 58 | pub(crate) use ::std::os::unix::io::AsRawFd as _; 59 | pub(crate) use ::std::string::ToString as _; 60 | 61 | 62 | pub(crate) use ::std::write; 63 | 64 | -------------------------------------------------------------------------------- /sources/lib/sinks.rs: -------------------------------------------------------------------------------- 1 | 2 | 3 | use crate::core::*; 4 | use crate::prelude::*; 5 | 6 | 7 | 8 | 9 | pub trait HashesSink { 10 | fn handle (&mut self, _path : & PathValueRef, _hash : & HashBytesRef) -> (Result<(), io::Error>); 11 | fn flush (&mut self) -> (Result<(), io::Error>); 12 | } 13 | 14 | 15 | 16 | 17 | pub struct StandardHashesSink <'a, Stream : io::Write> { 18 | stream : io::BufWriter, 19 | prefix : Cow<'a, [u8]>, 20 | infix : Cow<'a, [u8]>, 21 | suffix : Cow<'a, [u8]>, 22 | path : bool, 23 | flush : bool, 24 | } 25 | 26 | 27 | impl StandardHashesSink<'static, Stream> { 28 | 29 | pub fn new (_stream : Stream, _zero : bool, _path : bool) -> (Self) { 30 | let _stream = io::BufWriter::with_capacity (128 * 1024, _stream); 31 | let _sink = StandardHashesSink { 32 | stream : _stream, 33 | prefix : Cow::Borrowed (b""), 34 | infix : Cow::Borrowed (b" *"), 35 | suffix : Cow::Borrowed (if _zero { b"\0" } else { b"\n" }), 36 | path : _path, 37 | flush : true, 38 | }; 39 | return _sink; 40 | } 41 | 42 | pub fn done (self) -> (Result) { 43 | let _stream = self.stream.into_inner () ?; 44 | return Ok (_stream); 45 | } 46 | } 47 | 48 | 49 | impl HashesSink for StandardHashesSink<'_, Stream> { 50 | 51 | fn handle (&mut self, _path : & PathValueRef, _hash : & HashBytesRef) -> (Result<(), io::Error>) { 52 | self.stream.write_all (&self.prefix) ?; 53 | for _byte in _hash { 54 | self.stream.write_fmt (format_args! ("{:02x}", _byte)) ?; 55 | } 56 | if self.path { 57 | self.stream.write_all (&self.infix) ?; 58 | self.stream.write_all (_path.as_bytes ()) ?; 59 | } 60 | self.stream.write_all (&self.suffix) ?; 61 | if self.flush { 62 | self.stream.flush () ?; 63 | } 64 | return Ok (()); 65 | } 66 | 67 | fn flush (&mut self) -> (Result<(), io::Error>) { 68 | return self.stream.flush (); 69 | } 70 | } 71 | 72 | --------------------------------------------------------------------------------