├── .github ├── FUNDING.yml └── workflows │ └── rust.yml ├── .gitignore ├── CHANGELOG.md ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── compile-all-targets.sh ├── deploy.sh ├── doc ├── screen-1.png ├── screen-2.png ├── screen-3.png ├── screen-4.png └── screen-5.png ├── release.sh └── src ├── args.rs ├── ask.rs ├── dirs.rs ├── dup.rs ├── dup_report.rs ├── ext.rs ├── file_pair.rs ├── hash.rs ├── json.rs ├── lib.rs ├── main.rs └── removal_report.rs /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [Canop] 2 | -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v3 19 | - name: Build 20 | run: cargo build --verbose 21 | - name: Run tests 22 | run: cargo test --verbose 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /build 3 | /pub 4 | /releases 5 | /trav* 6 | *.zip 7 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 2 | ### v1.1.2 - 2024-09-10 3 | - sort files by name - Thanks @nc7s 4 | 5 | 6 | ### v1.1.1 - 2023-07-01 7 | - updated dependencies 8 | - stripped binary (smaller) 9 | 10 | 11 | ### v1.1.0 - 2021-12-05 12 | - option to replace staged files with symlinks (unix only) - Fix #2 13 | 14 | 15 | ### v1.0.1 - 2021-12-05 16 | - option to write the report in a JSON file after staging phase - Fix #3 17 | 18 | 19 | ### v1.0.0 - 2021-10-02 20 | No reason not to call this a 1.0 21 | 22 | 23 | ### v0.2.1 - 2021-07-14 24 | - backdown logs a few things. To have log generated launch backdown with `BACKDOWN_LOG=debug backdown your/dir` 25 | - change hash algorithm from SHA-256 to BLAKE3, which is slightly faster with same guarantees 26 | 27 | 28 | ### v0.2.0 - 2021-07-12 29 | - backdown proposes to remove in 1 question all duplicates with name like "thing (2).AVI" or "thing (3rd copy).png" when they're in the same directory than the "source" 30 | 31 | 32 | ### v0.1.0 - 2021-07-11 33 | - first public release 34 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "aho-corasick" 7 | version = "1.1.3" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" 10 | dependencies = [ 11 | "memchr", 12 | ] 13 | 14 | [[package]] 15 | name = "anyhow" 16 | version = "1.0.49" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "0a03e93e97a28fbc9f42fbc5ba0886a3c67eb637b476dbee711f80a6ffe8223d" 19 | 20 | [[package]] 21 | name = "argh" 22 | version = "0.1.5" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "2e7317a549bc17c5278d9e72bb6e62c6aa801ac2567048e39ebc1c194249323e" 25 | dependencies = [ 26 | "argh_derive", 27 | "argh_shared", 28 | ] 29 | 30 | [[package]] 31 | name = "argh_derive" 32 | version = "0.1.5" 33 | source = "registry+https://github.com/rust-lang/crates.io-index" 34 | checksum = "60949c42375351e9442e354434b0cba2ac402c1237edf673cac3a4bf983b8d3c" 35 | dependencies = [ 36 | "argh_shared", 37 | "heck", 38 | "proc-macro2", 39 | "quote", 40 | "syn 1.0.73", 41 | ] 42 | 43 | [[package]] 44 | name = "argh_shared" 45 | version = "0.1.5" 46 | source = "registry+https://github.com/rust-lang/crates.io-index" 47 | checksum = "8a61eb019cb8f415d162cb9f12130ee6bbe9168b7d953c17f4ad049e4051ca00" 48 | 49 | [[package]] 50 | name = "arrayref" 51 | version = "0.3.6" 52 | source = "registry+https://github.com/rust-lang/crates.io-index" 53 | checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544" 54 | 55 | [[package]] 56 | name = "arrayvec" 57 | version = "0.7.4" 58 | source = "registry+https://github.com/rust-lang/crates.io-index" 59 | checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" 60 | 61 | [[package]] 62 | name = "autocfg" 63 | version = "1.3.0" 64 | source = "registry+https://github.com/rust-lang/crates.io-index" 65 | checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" 66 | 67 | [[package]] 68 | name = "backdown" 69 | version = "1.1.2" 70 | dependencies = [ 71 | "anyhow", 72 | "argh", 73 | "blake3", 74 | "chrono", 75 | "cli-log", 76 | "crossbeam", 77 | "file-size", 78 | "fnv", 79 | "lazy-regex", 80 | "phf", 81 | "rayon", 82 | "serde", 83 | "serde_json", 84 | "termimad", 85 | ] 86 | 87 | [[package]] 88 | name = "bitflags" 89 | version = "2.6.0" 90 | source = "registry+https://github.com/rust-lang/crates.io-index" 91 | checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" 92 | 93 | [[package]] 94 | name = "blake3" 95 | version = "1.4.0" 96 | source = "registry+https://github.com/rust-lang/crates.io-index" 97 | checksum = "729b71f35bd3fa1a4c86b85d32c8b9069ea7fe14f7a53cfabb65f62d4265b888" 98 | dependencies = [ 99 | "arrayref", 100 | "arrayvec", 101 | "cc", 102 | "cfg-if", 103 | "constant_time_eq", 104 | "digest", 105 | ] 106 | 107 | [[package]] 108 | name = "block-buffer" 109 | version = "0.10.4" 110 | source = "registry+https://github.com/rust-lang/crates.io-index" 111 | checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" 112 | dependencies = [ 113 | "generic-array", 114 | ] 115 | 116 | [[package]] 117 | name = "cc" 118 | version = "1.0.69" 119 | source = "registry+https://github.com/rust-lang/crates.io-index" 120 | checksum = "e70cc2f62c6ce1868963827bd677764c62d07c3d9a3e1fb1177ee1a9ab199eb2" 121 | 122 | [[package]] 123 | name = "cfg-if" 124 | version = "1.0.0" 125 | source = "registry+https://github.com/rust-lang/crates.io-index" 126 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 127 | 128 | [[package]] 129 | name = "chrono" 130 | version = "0.4.19" 131 | source = "registry+https://github.com/rust-lang/crates.io-index" 132 | checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" 133 | dependencies = [ 134 | "libc", 135 | "num-integer", 136 | "num-traits", 137 | "time", 138 | "winapi", 139 | ] 140 | 141 | [[package]] 142 | name = "cli-log" 143 | version = "2.0.0" 144 | source = "registry+https://github.com/rust-lang/crates.io-index" 145 | checksum = "3d2ab00dc4c82ec28af25ac085aecc11ffeabf353755715a3113a7aa044ca5cc" 146 | dependencies = [ 147 | "chrono", 148 | "file-size", 149 | "log", 150 | "proc-status", 151 | ] 152 | 153 | [[package]] 154 | name = "constant_time_eq" 155 | version = "0.2.6" 156 | source = "registry+https://github.com/rust-lang/crates.io-index" 157 | checksum = "21a53c0a4d288377e7415b53dcfc3c04da5cdc2cc95c8d5ac178b58f0b861ad6" 158 | 159 | [[package]] 160 | name = "coolor" 161 | version = "1.0.0" 162 | source = "registry+https://github.com/rust-lang/crates.io-index" 163 | checksum = "691defa50318376447a73ced869862baecfab35f6aabaa91a4cd726b315bfe1a" 164 | dependencies = [ 165 | "crossterm", 166 | ] 167 | 168 | [[package]] 169 | name = "crokey" 170 | version = "1.1.0" 171 | source = "registry+https://github.com/rust-lang/crates.io-index" 172 | checksum = "520e83558f4c008ac06fa6a86e5c1d4357be6f994cce7434463ebcdaadf47bb1" 173 | dependencies = [ 174 | "crokey-proc_macros", 175 | "crossterm", 176 | "once_cell", 177 | "serde", 178 | "strict", 179 | ] 180 | 181 | [[package]] 182 | name = "crokey-proc_macros" 183 | version = "1.1.0" 184 | source = "registry+https://github.com/rust-lang/crates.io-index" 185 | checksum = "370956e708a1ce65fe4ac5bb7185791e0ece7485087f17736d54a23a0895049f" 186 | dependencies = [ 187 | "crossterm", 188 | "proc-macro2", 189 | "quote", 190 | "strict", 191 | "syn 1.0.73", 192 | ] 193 | 194 | [[package]] 195 | name = "crossbeam" 196 | version = "0.8.1" 197 | source = "registry+https://github.com/rust-lang/crates.io-index" 198 | checksum = "4ae5588f6b3c3cb05239e90bd110f257254aecd01e4635400391aeae07497845" 199 | dependencies = [ 200 | "cfg-if", 201 | "crossbeam-channel", 202 | "crossbeam-deque", 203 | "crossbeam-epoch", 204 | "crossbeam-queue", 205 | "crossbeam-utils", 206 | ] 207 | 208 | [[package]] 209 | name = "crossbeam-channel" 210 | version = "0.5.1" 211 | source = "registry+https://github.com/rust-lang/crates.io-index" 212 | checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4" 213 | dependencies = [ 214 | "cfg-if", 215 | "crossbeam-utils", 216 | ] 217 | 218 | [[package]] 219 | name = "crossbeam-deque" 220 | version = "0.8.0" 221 | source = "registry+https://github.com/rust-lang/crates.io-index" 222 | checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9" 223 | dependencies = [ 224 | "cfg-if", 225 | "crossbeam-epoch", 226 | "crossbeam-utils", 227 | ] 228 | 229 | [[package]] 230 | name = "crossbeam-epoch" 231 | version = "0.9.5" 232 | source = "registry+https://github.com/rust-lang/crates.io-index" 233 | checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd" 234 | dependencies = [ 235 | "cfg-if", 236 | "crossbeam-utils", 237 | "lazy_static", 238 | "memoffset", 239 | "scopeguard", 240 | ] 241 | 242 | [[package]] 243 | name = "crossbeam-queue" 244 | version = "0.3.2" 245 | source = "registry+https://github.com/rust-lang/crates.io-index" 246 | checksum = "9b10ddc024425c88c2ad148c1b0fd53f4c6d38db9697c9f1588381212fa657c9" 247 | dependencies = [ 248 | "cfg-if", 249 | "crossbeam-utils", 250 | ] 251 | 252 | [[package]] 253 | name = "crossbeam-utils" 254 | version = "0.8.5" 255 | source = "registry+https://github.com/rust-lang/crates.io-index" 256 | checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db" 257 | dependencies = [ 258 | "cfg-if", 259 | "lazy_static", 260 | ] 261 | 262 | [[package]] 263 | name = "crossterm" 264 | version = "0.28.1" 265 | source = "registry+https://github.com/rust-lang/crates.io-index" 266 | checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6" 267 | dependencies = [ 268 | "bitflags", 269 | "crossterm_winapi", 270 | "mio", 271 | "parking_lot", 272 | "rustix", 273 | "signal-hook", 274 | "signal-hook-mio", 275 | "winapi", 276 | ] 277 | 278 | [[package]] 279 | name = "crossterm_winapi" 280 | version = "0.9.1" 281 | source = "registry+https://github.com/rust-lang/crates.io-index" 282 | checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b" 283 | dependencies = [ 284 | "winapi", 285 | ] 286 | 287 | [[package]] 288 | name = "crypto-common" 289 | version = "0.1.6" 290 | source = "registry+https://github.com/rust-lang/crates.io-index" 291 | checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" 292 | dependencies = [ 293 | "generic-array", 294 | "typenum", 295 | ] 296 | 297 | [[package]] 298 | name = "digest" 299 | version = "0.10.7" 300 | source = "registry+https://github.com/rust-lang/crates.io-index" 301 | checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" 302 | dependencies = [ 303 | "block-buffer", 304 | "crypto-common", 305 | "subtle", 306 | ] 307 | 308 | [[package]] 309 | name = "either" 310 | version = "1.6.1" 311 | source = "registry+https://github.com/rust-lang/crates.io-index" 312 | checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" 313 | 314 | [[package]] 315 | name = "errno" 316 | version = "0.3.9" 317 | source = "registry+https://github.com/rust-lang/crates.io-index" 318 | checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" 319 | dependencies = [ 320 | "libc", 321 | "windows-sys", 322 | ] 323 | 324 | [[package]] 325 | name = "file-size" 326 | version = "1.0.3" 327 | source = "registry+https://github.com/rust-lang/crates.io-index" 328 | checksum = "9544f10105d33957765016b8a9baea7e689bf1f0f2f32c2fa2f568770c38d2b3" 329 | 330 | [[package]] 331 | name = "fnv" 332 | version = "1.0.7" 333 | source = "registry+https://github.com/rust-lang/crates.io-index" 334 | checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" 335 | 336 | [[package]] 337 | name = "generic-array" 338 | version = "0.14.4" 339 | source = "registry+https://github.com/rust-lang/crates.io-index" 340 | checksum = "501466ecc8a30d1d3b7fc9229b122b2ce8ed6e9d9223f1138d4babb253e51817" 341 | dependencies = [ 342 | "typenum", 343 | "version_check", 344 | ] 345 | 346 | [[package]] 347 | name = "heck" 348 | version = "0.3.3" 349 | source = "registry+https://github.com/rust-lang/crates.io-index" 350 | checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" 351 | dependencies = [ 352 | "unicode-segmentation", 353 | ] 354 | 355 | [[package]] 356 | name = "hermit-abi" 357 | version = "0.1.19" 358 | source = "registry+https://github.com/rust-lang/crates.io-index" 359 | checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" 360 | dependencies = [ 361 | "libc", 362 | ] 363 | 364 | [[package]] 365 | name = "hermit-abi" 366 | version = "0.3.9" 367 | source = "registry+https://github.com/rust-lang/crates.io-index" 368 | checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" 369 | 370 | [[package]] 371 | name = "itoa" 372 | version = "0.4.8" 373 | source = "registry+https://github.com/rust-lang/crates.io-index" 374 | checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" 375 | 376 | [[package]] 377 | name = "lazy-regex" 378 | version = "3.3.0" 379 | source = "registry+https://github.com/rust-lang/crates.io-index" 380 | checksum = "8d8e41c97e6bc7ecb552016274b99fbb5d035e8de288c582d9b933af6677bfda" 381 | dependencies = [ 382 | "lazy-regex-proc_macros", 383 | "once_cell", 384 | "regex", 385 | ] 386 | 387 | [[package]] 388 | name = "lazy-regex-proc_macros" 389 | version = "3.3.0" 390 | source = "registry+https://github.com/rust-lang/crates.io-index" 391 | checksum = "76e1d8b05d672c53cb9c7b920bbba8783845ae4f0b076e02a3db1d02c81b4163" 392 | dependencies = [ 393 | "proc-macro2", 394 | "quote", 395 | "regex", 396 | "syn 2.0.22", 397 | ] 398 | 399 | [[package]] 400 | name = "lazy_static" 401 | version = "1.4.0" 402 | source = "registry+https://github.com/rust-lang/crates.io-index" 403 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 404 | 405 | [[package]] 406 | name = "libc" 407 | version = "0.2.158" 408 | source = "registry+https://github.com/rust-lang/crates.io-index" 409 | checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439" 410 | 411 | [[package]] 412 | name = "linux-raw-sys" 413 | version = "0.4.14" 414 | source = "registry+https://github.com/rust-lang/crates.io-index" 415 | checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" 416 | 417 | [[package]] 418 | name = "lock_api" 419 | version = "0.4.12" 420 | source = "registry+https://github.com/rust-lang/crates.io-index" 421 | checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" 422 | dependencies = [ 423 | "autocfg", 424 | "scopeguard", 425 | ] 426 | 427 | [[package]] 428 | name = "log" 429 | version = "0.4.14" 430 | source = "registry+https://github.com/rust-lang/crates.io-index" 431 | checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" 432 | dependencies = [ 433 | "cfg-if", 434 | ] 435 | 436 | [[package]] 437 | name = "memchr" 438 | version = "2.7.4" 439 | source = "registry+https://github.com/rust-lang/crates.io-index" 440 | checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" 441 | 442 | [[package]] 443 | name = "memoffset" 444 | version = "0.6.4" 445 | source = "registry+https://github.com/rust-lang/crates.io-index" 446 | checksum = "59accc507f1338036a0477ef61afdae33cde60840f4dfe481319ce3ad116ddf9" 447 | dependencies = [ 448 | "autocfg", 449 | ] 450 | 451 | [[package]] 452 | name = "minimad" 453 | version = "0.13.1" 454 | source = "registry+https://github.com/rust-lang/crates.io-index" 455 | checksum = "a9c5d708226d186590a7b6d4a9780e2bdda5f689e0d58cd17012a298efd745d2" 456 | dependencies = [ 457 | "once_cell", 458 | ] 459 | 460 | [[package]] 461 | name = "mio" 462 | version = "1.0.2" 463 | source = "registry+https://github.com/rust-lang/crates.io-index" 464 | checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" 465 | dependencies = [ 466 | "hermit-abi 0.3.9", 467 | "libc", 468 | "log", 469 | "wasi 0.11.0+wasi-snapshot-preview1", 470 | "windows-sys", 471 | ] 472 | 473 | [[package]] 474 | name = "num-integer" 475 | version = "0.1.44" 476 | source = "registry+https://github.com/rust-lang/crates.io-index" 477 | checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" 478 | dependencies = [ 479 | "autocfg", 480 | "num-traits", 481 | ] 482 | 483 | [[package]] 484 | name = "num-traits" 485 | version = "0.2.14" 486 | source = "registry+https://github.com/rust-lang/crates.io-index" 487 | checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" 488 | dependencies = [ 489 | "autocfg", 490 | ] 491 | 492 | [[package]] 493 | name = "num_cpus" 494 | version = "1.13.0" 495 | source = "registry+https://github.com/rust-lang/crates.io-index" 496 | checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" 497 | dependencies = [ 498 | "hermit-abi 0.1.19", 499 | "libc", 500 | ] 501 | 502 | [[package]] 503 | name = "once_cell" 504 | version = "1.19.0" 505 | source = "registry+https://github.com/rust-lang/crates.io-index" 506 | checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" 507 | 508 | [[package]] 509 | name = "parking_lot" 510 | version = "0.12.3" 511 | source = "registry+https://github.com/rust-lang/crates.io-index" 512 | checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" 513 | dependencies = [ 514 | "lock_api", 515 | "parking_lot_core", 516 | ] 517 | 518 | [[package]] 519 | name = "parking_lot_core" 520 | version = "0.9.10" 521 | source = "registry+https://github.com/rust-lang/crates.io-index" 522 | checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" 523 | dependencies = [ 524 | "cfg-if", 525 | "libc", 526 | "redox_syscall", 527 | "smallvec", 528 | "windows-targets", 529 | ] 530 | 531 | [[package]] 532 | name = "phf" 533 | version = "0.11.2" 534 | source = "registry+https://github.com/rust-lang/crates.io-index" 535 | checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" 536 | dependencies = [ 537 | "phf_macros", 538 | "phf_shared", 539 | ] 540 | 541 | [[package]] 542 | name = "phf_generator" 543 | version = "0.11.2" 544 | source = "registry+https://github.com/rust-lang/crates.io-index" 545 | checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" 546 | dependencies = [ 547 | "phf_shared", 548 | "rand", 549 | ] 550 | 551 | [[package]] 552 | name = "phf_macros" 553 | version = "0.11.2" 554 | source = "registry+https://github.com/rust-lang/crates.io-index" 555 | checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b" 556 | dependencies = [ 557 | "phf_generator", 558 | "phf_shared", 559 | "proc-macro2", 560 | "quote", 561 | "syn 2.0.22", 562 | ] 563 | 564 | [[package]] 565 | name = "phf_shared" 566 | version = "0.11.2" 567 | source = "registry+https://github.com/rust-lang/crates.io-index" 568 | checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" 569 | dependencies = [ 570 | "siphasher", 571 | ] 572 | 573 | [[package]] 574 | name = "proc-macro2" 575 | version = "1.0.63" 576 | source = "registry+https://github.com/rust-lang/crates.io-index" 577 | checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb" 578 | dependencies = [ 579 | "unicode-ident", 580 | ] 581 | 582 | [[package]] 583 | name = "proc-status" 584 | version = "0.1.1" 585 | source = "registry+https://github.com/rust-lang/crates.io-index" 586 | checksum = "f0e0c0ac915e7b76b47850ba4ffc377abde6c6ff9eeace61d0a89623db449712" 587 | dependencies = [ 588 | "thiserror", 589 | ] 590 | 591 | [[package]] 592 | name = "quote" 593 | version = "1.0.29" 594 | source = "registry+https://github.com/rust-lang/crates.io-index" 595 | checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105" 596 | dependencies = [ 597 | "proc-macro2", 598 | ] 599 | 600 | [[package]] 601 | name = "rand" 602 | version = "0.8.5" 603 | source = "registry+https://github.com/rust-lang/crates.io-index" 604 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 605 | dependencies = [ 606 | "rand_core", 607 | ] 608 | 609 | [[package]] 610 | name = "rand_core" 611 | version = "0.6.4" 612 | source = "registry+https://github.com/rust-lang/crates.io-index" 613 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 614 | 615 | [[package]] 616 | name = "rayon" 617 | version = "1.5.1" 618 | source = "registry+https://github.com/rust-lang/crates.io-index" 619 | checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90" 620 | dependencies = [ 621 | "autocfg", 622 | "crossbeam-deque", 623 | "either", 624 | "rayon-core", 625 | ] 626 | 627 | [[package]] 628 | name = "rayon-core" 629 | version = "1.9.1" 630 | source = "registry+https://github.com/rust-lang/crates.io-index" 631 | checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e" 632 | dependencies = [ 633 | "crossbeam-channel", 634 | "crossbeam-deque", 635 | "crossbeam-utils", 636 | "lazy_static", 637 | "num_cpus", 638 | ] 639 | 640 | [[package]] 641 | name = "redox_syscall" 642 | version = "0.5.3" 643 | source = "registry+https://github.com/rust-lang/crates.io-index" 644 | checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4" 645 | dependencies = [ 646 | "bitflags", 647 | ] 648 | 649 | [[package]] 650 | name = "regex" 651 | version = "1.10.6" 652 | source = "registry+https://github.com/rust-lang/crates.io-index" 653 | checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" 654 | dependencies = [ 655 | "aho-corasick", 656 | "memchr", 657 | "regex-automata", 658 | "regex-syntax", 659 | ] 660 | 661 | [[package]] 662 | name = "regex-automata" 663 | version = "0.4.7" 664 | source = "registry+https://github.com/rust-lang/crates.io-index" 665 | checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" 666 | dependencies = [ 667 | "aho-corasick", 668 | "memchr", 669 | "regex-syntax", 670 | ] 671 | 672 | [[package]] 673 | name = "regex-syntax" 674 | version = "0.8.4" 675 | source = "registry+https://github.com/rust-lang/crates.io-index" 676 | checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" 677 | 678 | [[package]] 679 | name = "rustix" 680 | version = "0.38.36" 681 | source = "registry+https://github.com/rust-lang/crates.io-index" 682 | checksum = "3f55e80d50763938498dd5ebb18647174e0c76dc38c5505294bb224624f30f36" 683 | dependencies = [ 684 | "bitflags", 685 | "errno", 686 | "libc", 687 | "linux-raw-sys", 688 | "windows-sys", 689 | ] 690 | 691 | [[package]] 692 | name = "ryu" 693 | version = "1.0.6" 694 | source = "registry+https://github.com/rust-lang/crates.io-index" 695 | checksum = "3c9613b5a66ab9ba26415184cfc41156594925a9cf3a2057e57f31ff145f6568" 696 | 697 | [[package]] 698 | name = "scopeguard" 699 | version = "1.1.0" 700 | source = "registry+https://github.com/rust-lang/crates.io-index" 701 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" 702 | 703 | [[package]] 704 | name = "serde" 705 | version = "1.0.130" 706 | source = "registry+https://github.com/rust-lang/crates.io-index" 707 | checksum = "f12d06de37cf59146fbdecab66aa99f9fe4f78722e3607577a5375d66bd0c913" 708 | dependencies = [ 709 | "serde_derive", 710 | ] 711 | 712 | [[package]] 713 | name = "serde_derive" 714 | version = "1.0.130" 715 | source = "registry+https://github.com/rust-lang/crates.io-index" 716 | checksum = "d7bc1a1ab1961464eae040d96713baa5a724a8152c1222492465b54322ec508b" 717 | dependencies = [ 718 | "proc-macro2", 719 | "quote", 720 | "syn 1.0.73", 721 | ] 722 | 723 | [[package]] 724 | name = "serde_json" 725 | version = "1.0.72" 726 | source = "registry+https://github.com/rust-lang/crates.io-index" 727 | checksum = "d0ffa0837f2dfa6fb90868c2b5468cad482e175f7dad97e7421951e663f2b527" 728 | dependencies = [ 729 | "itoa", 730 | "ryu", 731 | "serde", 732 | ] 733 | 734 | [[package]] 735 | name = "signal-hook" 736 | version = "0.3.17" 737 | source = "registry+https://github.com/rust-lang/crates.io-index" 738 | checksum = "8621587d4798caf8eb44879d42e56b9a93ea5dcd315a6487c357130095b62801" 739 | dependencies = [ 740 | "libc", 741 | "signal-hook-registry", 742 | ] 743 | 744 | [[package]] 745 | name = "signal-hook-mio" 746 | version = "0.2.4" 747 | source = "registry+https://github.com/rust-lang/crates.io-index" 748 | checksum = "34db1a06d485c9142248b7a054f034b349b212551f3dfd19c94d45a754a217cd" 749 | dependencies = [ 750 | "libc", 751 | "mio", 752 | "signal-hook", 753 | ] 754 | 755 | [[package]] 756 | name = "signal-hook-registry" 757 | version = "1.4.0" 758 | source = "registry+https://github.com/rust-lang/crates.io-index" 759 | checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0" 760 | dependencies = [ 761 | "libc", 762 | ] 763 | 764 | [[package]] 765 | name = "siphasher" 766 | version = "0.3.5" 767 | source = "registry+https://github.com/rust-lang/crates.io-index" 768 | checksum = "cbce6d4507c7e4a3962091436e56e95290cb71fa302d0d270e32130b75fbff27" 769 | 770 | [[package]] 771 | name = "smallvec" 772 | version = "1.6.1" 773 | source = "registry+https://github.com/rust-lang/crates.io-index" 774 | checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" 775 | 776 | [[package]] 777 | name = "strict" 778 | version = "0.2.0" 779 | source = "registry+https://github.com/rust-lang/crates.io-index" 780 | checksum = "f42444fea5b87a39db4218d9422087e66a85d0e7a0963a439b07bcdf91804006" 781 | 782 | [[package]] 783 | name = "subtle" 784 | version = "2.4.1" 785 | source = "registry+https://github.com/rust-lang/crates.io-index" 786 | checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" 787 | 788 | [[package]] 789 | name = "syn" 790 | version = "1.0.73" 791 | source = "registry+https://github.com/rust-lang/crates.io-index" 792 | checksum = "f71489ff30030d2ae598524f61326b902466f72a0fb1a8564c001cc63425bcc7" 793 | dependencies = [ 794 | "proc-macro2", 795 | "quote", 796 | "unicode-xid", 797 | ] 798 | 799 | [[package]] 800 | name = "syn" 801 | version = "2.0.22" 802 | source = "registry+https://github.com/rust-lang/crates.io-index" 803 | checksum = "2efbeae7acf4eabd6bcdcbd11c92f45231ddda7539edc7806bd1a04a03b24616" 804 | dependencies = [ 805 | "proc-macro2", 806 | "quote", 807 | "unicode-ident", 808 | ] 809 | 810 | [[package]] 811 | name = "termimad" 812 | version = "0.30.0" 813 | source = "registry+https://github.com/rust-lang/crates.io-index" 814 | checksum = "920e7c4671e79f3d9df269da9c8edf0dbc580044fd727d3594f7bfba5eb6107a" 815 | dependencies = [ 816 | "coolor", 817 | "crokey", 818 | "crossbeam", 819 | "lazy-regex", 820 | "minimad", 821 | "serde", 822 | "thiserror", 823 | "unicode-width", 824 | ] 825 | 826 | [[package]] 827 | name = "thiserror" 828 | version = "1.0.26" 829 | source = "registry+https://github.com/rust-lang/crates.io-index" 830 | checksum = "93119e4feac1cbe6c798c34d3a53ea0026b0b1de6a120deef895137c0529bfe2" 831 | dependencies = [ 832 | "thiserror-impl", 833 | ] 834 | 835 | [[package]] 836 | name = "thiserror-impl" 837 | version = "1.0.26" 838 | source = "registry+https://github.com/rust-lang/crates.io-index" 839 | checksum = "060d69a0afe7796bf42e9e2ff91f5ee691fb15c53d38b4b62a9a53eb23164745" 840 | dependencies = [ 841 | "proc-macro2", 842 | "quote", 843 | "syn 1.0.73", 844 | ] 845 | 846 | [[package]] 847 | name = "time" 848 | version = "0.1.44" 849 | source = "registry+https://github.com/rust-lang/crates.io-index" 850 | checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" 851 | dependencies = [ 852 | "libc", 853 | "wasi 0.10.0+wasi-snapshot-preview1", 854 | "winapi", 855 | ] 856 | 857 | [[package]] 858 | name = "typenum" 859 | version = "1.16.0" 860 | source = "registry+https://github.com/rust-lang/crates.io-index" 861 | checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" 862 | 863 | [[package]] 864 | name = "unicode-ident" 865 | version = "1.0.9" 866 | source = "registry+https://github.com/rust-lang/crates.io-index" 867 | checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0" 868 | 869 | [[package]] 870 | name = "unicode-segmentation" 871 | version = "1.8.0" 872 | source = "registry+https://github.com/rust-lang/crates.io-index" 873 | checksum = "8895849a949e7845e06bd6dc1aa51731a103c42707010a5b591c0038fb73385b" 874 | 875 | [[package]] 876 | name = "unicode-width" 877 | version = "0.1.13" 878 | source = "registry+https://github.com/rust-lang/crates.io-index" 879 | checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" 880 | 881 | [[package]] 882 | name = "unicode-xid" 883 | version = "0.2.2" 884 | source = "registry+https://github.com/rust-lang/crates.io-index" 885 | checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" 886 | 887 | [[package]] 888 | name = "version_check" 889 | version = "0.9.3" 890 | source = "registry+https://github.com/rust-lang/crates.io-index" 891 | checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" 892 | 893 | [[package]] 894 | name = "wasi" 895 | version = "0.10.0+wasi-snapshot-preview1" 896 | source = "registry+https://github.com/rust-lang/crates.io-index" 897 | checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" 898 | 899 | [[package]] 900 | name = "wasi" 901 | version = "0.11.0+wasi-snapshot-preview1" 902 | source = "registry+https://github.com/rust-lang/crates.io-index" 903 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 904 | 905 | [[package]] 906 | name = "winapi" 907 | version = "0.3.9" 908 | source = "registry+https://github.com/rust-lang/crates.io-index" 909 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 910 | dependencies = [ 911 | "winapi-i686-pc-windows-gnu", 912 | "winapi-x86_64-pc-windows-gnu", 913 | ] 914 | 915 | [[package]] 916 | name = "winapi-i686-pc-windows-gnu" 917 | version = "0.4.0" 918 | source = "registry+https://github.com/rust-lang/crates.io-index" 919 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 920 | 921 | [[package]] 922 | name = "winapi-x86_64-pc-windows-gnu" 923 | version = "0.4.0" 924 | source = "registry+https://github.com/rust-lang/crates.io-index" 925 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 926 | 927 | [[package]] 928 | name = "windows-sys" 929 | version = "0.52.0" 930 | source = "registry+https://github.com/rust-lang/crates.io-index" 931 | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" 932 | dependencies = [ 933 | "windows-targets", 934 | ] 935 | 936 | [[package]] 937 | name = "windows-targets" 938 | version = "0.52.6" 939 | source = "registry+https://github.com/rust-lang/crates.io-index" 940 | checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" 941 | dependencies = [ 942 | "windows_aarch64_gnullvm", 943 | "windows_aarch64_msvc", 944 | "windows_i686_gnu", 945 | "windows_i686_gnullvm", 946 | "windows_i686_msvc", 947 | "windows_x86_64_gnu", 948 | "windows_x86_64_gnullvm", 949 | "windows_x86_64_msvc", 950 | ] 951 | 952 | [[package]] 953 | name = "windows_aarch64_gnullvm" 954 | version = "0.52.6" 955 | source = "registry+https://github.com/rust-lang/crates.io-index" 956 | checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" 957 | 958 | [[package]] 959 | name = "windows_aarch64_msvc" 960 | version = "0.52.6" 961 | source = "registry+https://github.com/rust-lang/crates.io-index" 962 | checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" 963 | 964 | [[package]] 965 | name = "windows_i686_gnu" 966 | version = "0.52.6" 967 | source = "registry+https://github.com/rust-lang/crates.io-index" 968 | checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" 969 | 970 | [[package]] 971 | name = "windows_i686_gnullvm" 972 | version = "0.52.6" 973 | source = "registry+https://github.com/rust-lang/crates.io-index" 974 | checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" 975 | 976 | [[package]] 977 | name = "windows_i686_msvc" 978 | version = "0.52.6" 979 | source = "registry+https://github.com/rust-lang/crates.io-index" 980 | checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" 981 | 982 | [[package]] 983 | name = "windows_x86_64_gnu" 984 | version = "0.52.6" 985 | source = "registry+https://github.com/rust-lang/crates.io-index" 986 | checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" 987 | 988 | [[package]] 989 | name = "windows_x86_64_gnullvm" 990 | version = "0.52.6" 991 | source = "registry+https://github.com/rust-lang/crates.io-index" 992 | checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" 993 | 994 | [[package]] 995 | name = "windows_x86_64_msvc" 996 | version = "0.52.6" 997 | source = "registry+https://github.com/rust-lang/crates.io-index" 998 | checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" 999 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "backdown" 3 | version = "1.1.2" 4 | authors = ["Canop "] 5 | edition = "2021" 6 | rust-version = "1.59" 7 | description = "A smart CLI for removing thousands of duplicates on your disks" 8 | repository = "https://github.com/Canop/backdown" 9 | license = "MIT" 10 | readme = "README.md" 11 | 12 | [dependencies] 13 | argh = "0.1.4" 14 | anyhow = "1.0.49" 15 | blake3 = "1.4" 16 | chrono = "0.4" 17 | cli-log = "2.0" 18 | crossbeam = "0.8" 19 | file-size = "1.0" 20 | fnv = "1.0.7" 21 | lazy-regex = "3.3" 22 | phf = { version = "0.11", features = ["macros"] } 23 | rayon = "1.3" 24 | serde ="1.0" 25 | serde_json = "1.0" 26 | termimad = "0.30" 27 | 28 | [profile.release] 29 | strip = true 30 | 31 | [patch.crates-io] 32 | #minimad = { path = "../minimad" } 33 | #termimad = { path = "../termimad" } 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Canop 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # backdown 2 | 3 | [![MIT][s2]][l2] [![Latest Version][s1]][l1] [![Build][s3]][l3] [![Chat on Miaou][s4]][l4] 4 | 5 | [s1]: https://img.shields.io/crates/v/backdown.svg 6 | [l1]: https://crates.io/crates/backdown 7 | 8 | [s2]: https://img.shields.io/badge/license-MIT-blue.svg 9 | [l2]: LICENSE 10 | 11 | [s3]: https://github.com/Canop/backdown/actions/workflows/rust.yml/badge.svg 12 | [l3]: https://github.com/Canop/backdown/actions/workflows/rust.yml 13 | 14 | [s4]: https://miaou.dystroy.org/static/shields/room.svg 15 | [l4]: https://miaou.dystroy.org/3768?Rust 16 | 17 | **Backdown** helps you safely and ergonomically remove duplicate files. 18 | 19 | Its design is based upon my observation of frequent patterns regarding build-up of duplicates with time, especially images and other media files. 20 | 21 | Finding duplicates is easy. Cleaning the disk when there are thousands of them is the hard part. What Backdown brings is the easy way to select and remove the duplicates you don't want to keep. 22 | 23 | A Backdown session goes through the following phases: 24 | 25 | 1. Backdown analyzes the directory of your choice and find sets of duplicates (files whose content is exactly the same). Backdown ignores symlinks and files or directories whose name starts with a dot. 26 | 2. Backdown asks you a few questions depending on the analysis. Nothing is removed at this point: you only stage files for removal. Backdown never lets you stage all items in a set of identical files 27 | 3. After having maybe looked at the list of staged files, you confirm the removals 28 | 4. Backdown does the removals on disk 29 | 30 | # What it looks like 31 | 32 | Analysis and first question: 33 | 34 | ![screen 1](doc/screen-1.png) 35 | 36 | Another kind of question: 37 | 38 | ![screen 2](doc/screen-2.png) 39 | 40 | Yet another one: 41 | 42 | ![screen 3](doc/screen-3.png) 43 | 44 | Yet another one: 45 | 46 | ![screen 4](doc/screen-4.png) 47 | 48 | Review and Confirm: 49 | 50 | ![screen 5](doc/screen-5.png) 51 | 52 | At this point you may also export the report as JSON, and you may decide to replace each removed file with a link to one of the kept ones. 53 | 54 | # Installation 55 | 56 | ## From the crates.io repository 57 | 58 | You must have the Rust env installed: https://rustup.rs 59 | 60 | Run 61 | 62 | ```bash 63 | cargo install --locked backdown 64 | ``` 65 | 66 | ## From Source 67 | 68 | You must have the Rust env installed: https://rustup.rs 69 | 70 | Download this repository then run 71 | 72 | ```bash 73 | cargo install --path . 74 | ``` 75 | 76 | ## Precompiled binaries 77 | 78 | Unless you're a Rust developer, I recommend you just download the precompiled binaries, as this will save a lot of space on your disk. 79 | 80 | Binaries are made available at https://dystroy.org/backdown/download/ 81 | 82 | # Usage 83 | 84 | ## Deduplicate any kind of files 85 | 86 | ```bash 87 | backdown /some/directory 88 | ``` 89 | 90 | ## Deduplicate images 91 | 92 | ```bash 93 | backdown -i /some/directory 94 | ``` 95 | 96 | ## JSON report 97 | 98 | After the staging phase, you may decide to export a report as JSON. This doesn't prevent doing also the removals. 99 | 100 | The JSON looks like this: 101 | 102 | ```JSON 103 | { 104 | "dup_sets": [ 105 | { 106 | "file_len": 1212746, 107 | "files": { 108 | "trav-copy/2006-05 (mai)/HPIM0530.JPG": "remove", 109 | "trav-copy/2006-06 (juin)/HPIM0530 (another copy).JPG": "remove", 110 | "trav-copy/2006-06 (juin)/HPIM0530 (copy).JPG": "remove", 111 | "trav-copy/2006-06 (juin)/HPIM0530.JPG": "keep" 112 | } 113 | }, 114 | { 115 | "file_len": 1980628, 116 | "files": { 117 | "trav-copy/2006-03 (mars)/HPIM0608.JPG": "keep", 118 | "trav-copy/2006-05 (mai)/HPIM0608.JPG": "remove", 119 | "trav-copy/2006-06 (juin)/HPIM0608.JPG": "keep" 120 | } 121 | }, 122 | { 123 | "file_len": 1124764, 124 | "files": { 125 | "trav-copy/2006-05 (mai)/HPIM0529.JPG": "remove", 126 | "trav-copy/2006-06 (juin)/HPIM0529.JPG": "keep" 127 | } 128 | }, 129 | { 130 | "file_len": 1706672, 131 | "files": { 132 | "trav-copy/2006-05 (mai)/test.jpg": "remove", 133 | "trav-copy/2006-06 (juin)/HPIM0598.JPG": "keep" 134 | } 135 | } 136 | ], 137 | "len_to_remove": 8450302 138 | } 139 | ``` 140 | 141 | # Advice 142 | 143 | * If you launch backdown on a big directory, it may find more duplicates you suspect there are. Don't force yourself to answer *all* questions at first: if you stage the removals of the first dozen questions you'll gain already a lot and you may do the other ones another day 144 | * Don't launch backdown at the root of your disk because you don't want to try and deal with duplicates in system resources, programs, build artefacts, etc. Launch backdown where you store your images, or your videos or musics 145 | * Backdown isn't designed for dev directories and doesn't respect .gitignore rules 146 | * If you launch backdown in a directory with millions files on a slow disk, you'll have to wait a long time while the content is hashed. Try with a smaller directory first if you have an HDD 147 | * If you're only interested in images, use the -i option 148 | -------------------------------------------------------------------------------- /compile-all-targets.sh: -------------------------------------------------------------------------------- 1 | # WARNING: This script is NOT meant for normal installation, it's dedicated 2 | # to the compilation of all supported targets, from a linux machine. 3 | 4 | H1="\n\e[30;104;1m\e[2K\n\e[A" # style first header 5 | H2="\n\e[30;104m\e[1K\n\e[A" # style second header 6 | EH="\e[00m\n\e[2K" # end header 7 | 8 | version=$(sed 's/version = "\([0-9.]\{1,\}\(-[a-z]\+\)\?\)"/\1/;t;d' Cargo.toml | head -1) 9 | echo -e "${H1}Compilation of all targets for backdown $version${EH}" 10 | 11 | # clean previous build 12 | rm -rf build 13 | mkdir build 14 | echo " build cleaned" 15 | 16 | # build the linux version 17 | echo -e "${H2}Compiling the linux version${EH}" 18 | cargo clean 19 | cargo build --release 20 | strip target/release/backdown 21 | mkdir build/x86_64-linux/ 22 | cp target/release/backdown build/x86_64-linux/ 23 | 24 | # build a musl version 25 | echo -e "${H2}Compiling the MUSL version${EH}" 26 | cargo clean 27 | cross build --release --target x86_64-unknown-linux-musl 28 | mkdir build/x86_64-unknown-linux-musl 29 | cp target/x86_64-unknown-linux-musl/release/backdown build/x86_64-unknown-linux-musl 30 | 31 | # build the windows version 32 | # use cargo cross 33 | echo -e "${H2}Compiling the Windows version${EH}" 34 | cargo clean 35 | cross build --target x86_64-pc-windows-gnu --release 36 | mkdir build/x86_64-pc-windows-gnu 37 | cp target/x86_64-pc-windows-gnu/release/backdown.exe build/x86_64-pc-windows-gnu/ 38 | -------------------------------------------------------------------------------- /deploy.sh: -------------------------------------------------------------------------------- 1 | # build the release zip 2 | ./release.sh 3 | 4 | version=$(sed 's/version = "\([0-9.]\{1,\}\)"/\1/;t;d' Cargo.toml | head -1) 5 | 6 | # deploy on dystroy.org 7 | rm -rf ~/dev/www/dystroy/backdown/download/* 8 | cp -r build/* ~/dev/www/dystroy/backdown/download/ 9 | cp "backdown_$version.zip" ~/dev/www/dystroy/backdown/download/ 10 | ~/dev/www/dystroy/deploy.sh 11 | -------------------------------------------------------------------------------- /doc/screen-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Canop/backdown/3b8702894a144e27dacd0806fcdbaebf260a0c71/doc/screen-1.png -------------------------------------------------------------------------------- /doc/screen-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Canop/backdown/3b8702894a144e27dacd0806fcdbaebf260a0c71/doc/screen-2.png -------------------------------------------------------------------------------- /doc/screen-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Canop/backdown/3b8702894a144e27dacd0806fcdbaebf260a0c71/doc/screen-3.png -------------------------------------------------------------------------------- /doc/screen-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Canop/backdown/3b8702894a144e27dacd0806fcdbaebf260a0c71/doc/screen-4.png -------------------------------------------------------------------------------- /doc/screen-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Canop/backdown/3b8702894a144e27dacd0806fcdbaebf260a0c71/doc/screen-5.png -------------------------------------------------------------------------------- /release.sh: -------------------------------------------------------------------------------- 1 | # build a new release of backdown 2 | # This isn't used for normal compilation but for the building of the official releases 3 | version=$(sed 's/version = "\([0-9.]\{1,\}\)"/\1/;t;d' Cargo.toml | head -1) 4 | 5 | echo "Building release $version" 6 | 7 | # make the build directory and compile for all targets 8 | ./compile-all-targets.sh 9 | 10 | # add the readme and changelog in the build directory 11 | echo "This is backdown. More info and installation instructions on https://github.com/Canop/backdown" > build/README.md 12 | cp CHANGELOG.md build 13 | 14 | # publish version number 15 | echo "$version" > build/version 16 | 17 | # prepare the release archive 18 | rm backdown_*.zip 19 | zip -r "backdown_$version.zip" build/* 20 | 21 | # copy it to releases folder 22 | mkdir releases 23 | cp "backdown_$version.zip" releases 24 | -------------------------------------------------------------------------------- /src/args.rs: -------------------------------------------------------------------------------- 1 | use { 2 | argh::FromArgs, 3 | std::path::PathBuf, 4 | }; 5 | 6 | #[derive(FromArgs)] 7 | /// Help you remove duplicate files from your disks 8 | /// 9 | /// 10 | /// Source and doc at https://github.com/Canop/backdown 11 | pub struct Args { 12 | /// print the version 13 | #[argh(switch, short = 'v')] 14 | pub version: bool, 15 | 16 | /// whether to only handle image files 17 | #[argh(switch, short = 'i')] 18 | pub only_images: bool, 19 | 20 | #[argh(positional)] 21 | /// where to look for duplicates (will use . if no directory is provided) 22 | pub path: Option, 23 | } 24 | 25 | -------------------------------------------------------------------------------- /src/ask.rs: -------------------------------------------------------------------------------- 1 | use { 2 | crate::*, 3 | fnv::FnvHashMap, 4 | minimad::*, 5 | termimad::*, 6 | }; 7 | 8 | const MAX_LISTED_FILES: usize = 5; 9 | 10 | pub fn ask_on_dirs<'d>( 11 | dirs_report: &'d DirsReport, 12 | dups: &'d [DupSet], 13 | skin: &MadSkin, 14 | ) -> anyhow::Result> { 15 | let mut rr = RemovalReport::default(); 16 | let mut question_idx = 0; 17 | let mut questions = dirs_report.dup_dirs.len() + dirs_report.brotherhoods.len() + dirs_report.dir_pairs.len(); 18 | let ask_about_autosolve = dirs_report.auto_solvable_brotherhoods_count > 1; 19 | if ask_about_autosolve { 20 | questions += 1; 21 | } 22 | 23 | static MD: &str = r#" 24 | I'll now ask you up to *${questions}* questions to determine what files should be removed.\ 25 | No file will be removed until you have the possibility to review them after the staging step.\ 26 | You don't have to answer all questions:\ 27 | you may end the staging phase at any time and then either do the removals or quit. 28 | "#; 29 | let mut expander = OwningTemplateExpander::new(); 30 | expander.set("questions", questions); 31 | skin.print_owning_expander(&expander, &TextTemplate::from(MD)); 32 | 33 | // return true if break 34 | let check = |rr: &RemovalReport| { 35 | if rr.quit { 36 | return true; 37 | } 38 | mad_print_inline!( 39 | skin, 40 | " -> currently staged: **$0** duplicate files for a removed size of **$1**\n", 41 | // two following lines used for some screenshots so that I don't redo the staging 42 | // 1042, 43 | // "5.5G", 44 | rr.staged_removals.len(), 45 | file_size::fit_4(rr.len_to_remove), 46 | ); 47 | rr.broken 48 | }; 49 | 50 | let skip_auto_solvable_brotherhoods = ask_about_autosolve && { 51 | let solved = ask_auto_solve( 52 | question_idx, 53 | questions, 54 | dirs_report, 55 | dups, 56 | skin, 57 | &mut rr, 58 | )?; 59 | if check(&rr) { 60 | return Ok(rr); 61 | } 62 | question_idx += 1; 63 | solved 64 | }; 65 | 66 | for dup_dir in &dirs_report.dup_dirs { 67 | ask_on_dup_dir( 68 | question_idx, 69 | questions, 70 | dup_dir, 71 | dups, 72 | skin, 73 | &mut rr, 74 | )?; 75 | if check(&rr) { 76 | break; 77 | } 78 | question_idx += 1; 79 | } 80 | if rr.broken || rr.quit { 81 | return Ok(rr); 82 | } 83 | 84 | for brotherhood in &dirs_report.brotherhoods { 85 | if skip_auto_solvable_brotherhoods && brotherhood.is_auto_solvable { 86 | mad_print_inline!(skin, "skipping question *$0*\n", question_idx); 87 | } else { 88 | ask_on_brotherhood( 89 | question_idx, 90 | questions, 91 | brotherhood, 92 | dups, 93 | skin, 94 | &mut rr, 95 | )?; 96 | if check(&rr) { 97 | break; 98 | } 99 | } 100 | question_idx += 1; 101 | } 102 | if rr.broken || rr.quit { 103 | return Ok(rr); 104 | } 105 | 106 | for dir_pair in &dirs_report.dir_pairs { 107 | ask_on_dir_pair( 108 | question_idx, 109 | questions, 110 | dir_pair, 111 | dups, 112 | skin, 113 | &mut rr, 114 | )?; 115 | if check(&rr) { 116 | break; 117 | } 118 | question_idx += 1; 119 | } 120 | 121 | Ok(rr) 122 | } 123 | 124 | static MD_AUTO_SOLVE: &str = r#" 125 | 126 | ## Staging Question **${num}**/${questions} 127 | You have several duplicates with "copy" names in the same directory than their identical "source" (for example *${example_1}* and *${example_2}*). 128 | I can automatically stage those **${file_count}** duplicates, which would let you gain **${size}**. 129 | If you accept, you'll skip *${skippable_questions}* questions. 130 | "#; 131 | 132 | /// return whether auto solvable brotherhoods are solved (we'll skip their questions then) 133 | fn ask_auto_solve<'d>( 134 | question_idx: usize, 135 | questions: usize, 136 | dirs_report: &'d DirsReport, 137 | dups: &'d [DupSet], 138 | skin: &MadSkin, 139 | rr: &mut RemovalReport<'d>, 140 | ) -> anyhow::Result { 141 | debug_assert!(question_idx == 0); 142 | let mut removable_count = 0; 143 | let mut removable_len = 0; 144 | let mut skippable_questions = 0; 145 | let mut example_names = Vec::new(); 146 | for brotherhood in dirs_report.brotherhoods.iter().filter(|b| b.is_auto_solvable) { 147 | removable_count += brotherhood.files.len() - 1; 148 | removable_len += (brotherhood.files.len() - 1) as u64 * dups[brotherhood.dup_set_idx].file_len; 149 | skippable_questions += 1; 150 | if example_names.len() < 2 { 151 | example_names.push( 152 | brotherhood.files.iter() 153 | .map(|&dup_file_idx| DupFileRef { 154 | dup_set_idx: brotherhood.dup_set_idx, 155 | dup_file_idx, 156 | }) 157 | .filter_map(|dup_file_ref| dup_file_ref.copy_name(dups)) 158 | .next() 159 | .unwrap() // SAFETY: it's not auto solvable if there's no copy named file 160 | ); 161 | } 162 | } 163 | let mut expander = OwningTemplateExpander::new(); 164 | expander 165 | .set("num", question_idx + 1) 166 | .set("questions", questions) 167 | .set("example_1", example_names[0]) 168 | .set("example_2", example_names[1]) 169 | .set("skippable_questions", skippable_questions) 170 | .set("file_count", removable_count) 171 | .set("size", file_size::fit_4(removable_len)); 172 | skin.print_owning_expander(&expander, &TextTemplate::from(MD_AUTO_SOLVE)); 173 | Ok(ask!(skin, "Do you want me to automatically stage those copies ?", ('y') { 174 | ('y', "**Y**es") => { 175 | for brotherhood in dirs_report.brotherhoods.iter().filter(|b| b.is_auto_solvable) { 176 | let dup_file_refs = brotherhood.files.iter() 177 | .map(|&dup_file_idx| DupFileRef { 178 | dup_set_idx: brotherhood.dup_set_idx, 179 | dup_file_idx, 180 | }) 181 | .filter(|dup_file_ref| dup_file_ref.is_copy_named(dups)); 182 | for dup_file_ref in dup_file_refs { 183 | rr.stage_file(dup_file_ref, dups); 184 | } 185 | } 186 | true 187 | } 188 | ('n', "**N**o") => { 189 | false 190 | } 191 | ('e', "**E**nd staging and quit") => { 192 | rr.quit = true; 193 | false 194 | } 195 | })) 196 | } 197 | 198 | static MD_DUP_DIR: &str = r#" 199 | 200 | ## Staging Question **${num}**/${questions} 201 | The *${directory}* directory contains **${file_count}** files which are all present elsewhere.\ 202 | You can remove the whole directory without losing anything.\ 203 | This would let you gain **${size}**.\ 204 | "#; 205 | 206 | /// ask for a dir which contains only duplicates 207 | fn ask_on_dup_dir<'d>( 208 | question_idx: usize, 209 | questions: usize, 210 | dup_dir: &'d DupDir, 211 | dups: &'d [DupSet], 212 | skin: &MadSkin, 213 | rr: &mut RemovalReport<'d>, 214 | ) -> anyhow::Result<()> { 215 | // first we must make sure the dir doesn't contain the last file(s) of a dupset 216 | let mut file_idxs_per_dupset: FnvHashMap> = FnvHashMap::default(); 217 | for file_ref in &dup_dir.files { 218 | file_idxs_per_dupset.entry(file_ref.dup_set_idx) 219 | .or_default() 220 | .push(file_ref.dup_file_idx); 221 | } 222 | for (&dup_set_idx, file_idxs) in &file_idxs_per_dupset { 223 | let dup_set = &dups[dup_set_idx]; 224 | let not_here_or_staged_count = (0..dup_set.files.len()) 225 | .filter(|&dup_file_idx| { 226 | !rr.staged_removals.contains(&DupFileRef { dup_set_idx, dup_file_idx }) 227 | && 228 | !file_idxs.contains(&dup_file_idx) 229 | }) 230 | .count(); 231 | if not_here_or_staged_count == 0 { 232 | // dup_set would be removed -> skipping 233 | return Ok(()); 234 | } 235 | } 236 | // now we know we can stage the whole directory 237 | let removable_len = dup_dir.files.iter() 238 | .map(|dup_file_ref| dups[dup_file_ref.dup_set_idx].file_len) 239 | .sum(); 240 | let mut expander = OwningTemplateExpander::new(); 241 | expander 242 | .set("num", question_idx + 1) 243 | .set("questions", questions) 244 | .set("directory", dup_dir.path.to_string_lossy()) 245 | .set("file_count", dup_dir.files.len()) 246 | .set("size", file_size::fit_4(removable_len)); 247 | skin.print_owning_expander(&expander, &TextTemplate::from(MD_DUP_DIR)); 248 | ask!(skin, "What do you want to do with this directory?", ('s') { 249 | ('r', "Stage the whole directory for **r**emoval") => { 250 | for &file_ref in &dup_dir.files { 251 | rr.stage_file(file_ref, dups); 252 | } 253 | rr.staged_dir_removals.push(dup_dir.path); 254 | } 255 | ('s', "**S**kip and go to next question") => {} 256 | ('e', "**E**nd staging phase") => { rr.broken = true; } 257 | }); 258 | Ok(()) 259 | } 260 | 261 | static MD_BROTHERHOOD: &str = r#" 262 | 263 | ## Staging Question **${num}**/${questions} 264 | The *${parent}* directory contains **${file_count}** identical files, each one of size **${size}**. 265 | "#; 266 | 267 | // ask for a set of identical files in the same directory 268 | fn ask_on_brotherhood( 269 | question_idx: usize, 270 | questions: usize, 271 | brotherhood: &Brotherhood, 272 | dups: &[DupSet], 273 | skin: &MadSkin, 274 | rr: &mut RemovalReport, 275 | ) -> anyhow::Result<()> { 276 | // we check nothing because questions for brotherhoods come before the other ones 277 | // FIXME we must check it's not autosolved! 278 | let dup_set = &dups[brotherhood.dup_set_idx]; 279 | let mut expander = OwningTemplateExpander::new(); 280 | expander 281 | .set("num", question_idx + 1) 282 | .set("questions", questions) 283 | .set("parent", brotherhood.parent.to_string_lossy()) 284 | .set("file_count", brotherhood.files.len()) 285 | .set("size", file_size::fit_4(dup_set.file_len)); 286 | skin.print_owning_expander(&expander, &TextTemplate::from(MD_BROTHERHOOD)); 287 | let mut q = Question::new("What do you want to do with these duplicates?"); 288 | 289 | struct F<'f> { idx: usize, name: &'f str } 290 | let mut candidates: Vec = brotherhood.files.iter() 291 | .map(|&idx| F{ idx, name: dup_set.files[idx].path.file_name().unwrap().to_str().unwrap() }) 292 | .collect(); 293 | candidates.sort_by(|a, b| a.name.cmp(b.name)); 294 | for (i, f) in candidates.iter().enumerate() { 295 | q.add_answer( 296 | i + 1, 297 | format!("keep *{}* and stage other one(s) for removal", f.name), 298 | ); 299 | } 300 | q.add_answer('s', "**S**kip and go to next question"); 301 | q.add_answer('e', "**E**nd staging phase"); 302 | q.set_default("s"); 303 | match q.ask(skin)?.as_str() { 304 | "s" => {} 305 | "e" => { rr.broken = true; } 306 | a => { 307 | if let Ok(a) = a.parse::() { 308 | if a == 0 { 309 | println!("Options start at 1 - skipping"); 310 | } else { 311 | let chosen = &candidates[a - 1]; 312 | for i in 0..brotherhood.files.len() { 313 | if i != chosen.idx { 314 | rr.stage_file(brotherhood.file_ref(i), dups); 315 | } 316 | } 317 | } 318 | } 319 | } 320 | } 321 | Ok(()) 322 | } 323 | 324 | static MD_DIR_PAIR: &str = r#" 325 | 326 | ## Staging Question **${num}**/${questions} 327 | Left and right directories have **${file_count}** common files for a total duplicate size of **${removable_len}**. 328 | |-:|:-:|:-:| 329 | | |left|right| 330 | |-:|:-:|:-:| 331 | |directory|*${left_path}*|*${right_path}*| 332 | ${common_files 333 | |common files|${file_count}|${file_count}| 334 | } 335 | ${removable_files 336 | |removable file #${removable_file_idx}|**${left_file_name}**|**${right_file_name}**| 337 | } 338 | |already staged for removal|${removed_left_count}|${removed_right_count}| 339 | |other files|${left_other_count}|${right_other_count}| 340 | |-: 341 | "#; 342 | 343 | /// asking the question when left dir and right dir are different 344 | fn ask_on_dir_pair( 345 | question_idx: usize, 346 | questions: usize, 347 | dir_pair: &DirPair, 348 | dups: &[DupSet], 349 | skin: &MadSkin, 350 | rr: &mut RemovalReport, 351 | ) -> anyhow::Result<()> { 352 | // we must recount now because files may have been already 353 | // staged for removals 354 | let (mut removed_left_count, mut removed_right_count) = (0, 0); 355 | let (mut removable_left_count, mut removable_right_count) = (0, 0); 356 | let mut removable_pairs: Vec = Vec::new(); 357 | let mut removable_len: u64 = 0; 358 | for file_pair in &dir_pair.file_pairs { 359 | let removed_left = rr.staged_removals.contains(&file_pair.left_ref()); 360 | let removed_right = rr.staged_removals.contains(&file_pair.right_ref()); 361 | if removed_left { 362 | removed_left_count += 1; 363 | } else { 364 | removable_left_count += 1; 365 | } 366 | if removed_right { 367 | removed_right_count += 1; 368 | } else { 369 | removable_right_count += 1; 370 | } 371 | if !removed_left && !removed_right { 372 | removable_pairs.push(*file_pair); 373 | removable_len += dups[file_pair.dup_set_idx].file_len; 374 | } 375 | } 376 | if removable_pairs.is_empty() { 377 | mad_print_inline!(skin, "*skipping question because of previously staged removals*\n"); 378 | return Ok(()); 379 | } 380 | let left_dir_count = dir_pair.key.left_dir.read_dir()?.count(); 381 | if left_dir_count < removed_left_count + removable_left_count { 382 | println!("skipping question because some files were removed on disk"); 383 | return Ok(()); 384 | } 385 | let left_other_count = left_dir_count - removed_left_count - removable_left_count; 386 | let right_dir_count = dir_pair.key.right_dir.read_dir()?.count(); 387 | if right_dir_count < removed_right_count + removable_right_count { 388 | println!("skipping question because some files were removed on disk"); 389 | return Ok(()); 390 | } 391 | let right_other_count = right_dir_count - removed_right_count - removable_right_count; 392 | let mut expander = OwningTemplateExpander::new(); 393 | expander 394 | .set("num", question_idx + 1) 395 | .set("questions", questions) 396 | .set("file_count", removable_pairs.len()) 397 | .set("removable_len", file_size::fit_4(removable_len)) 398 | .set("left_path", dir_pair.key.left_dir.to_string_lossy()) 399 | .set("right_path", dir_pair.key.right_dir.to_string_lossy()) 400 | .set("removed_left_count", removed_left_count) 401 | .set("removed_right_count", removed_right_count) 402 | .set("left_other_count", left_other_count) 403 | .set("right_other_count", right_other_count); 404 | if removable_pairs.len() <= MAX_LISTED_FILES { 405 | for (removable_file_idx, file_pair) in removable_pairs.iter().enumerate() { 406 | expander.sub("removable_files") 407 | .set("removable_file_idx", removable_file_idx + 1) 408 | .set("left_file_name", file_pair.left_ref().file_name(dups)) 409 | .set("right_file_name", file_pair.right_ref().file_name(dups)); 410 | } 411 | } else { 412 | expander.sub("common_files"); 413 | } 414 | skin.print_owning_expander(&expander, &TextTemplate::from(MD_DIR_PAIR)); 415 | ask!(skin, "What do you want to do here?", ('s') { 416 | ('l', "Stage **l**eft files for removal") => { 417 | for file_pair in removable_pairs { 418 | rr.stage_file(file_pair.left_ref(), dups); 419 | } 420 | } 421 | ('r', "Stage **r**ight files for removal") => { 422 | for file_pair in removable_pairs { 423 | rr.stage_file(file_pair.right_ref(), dups); 424 | } 425 | } 426 | ('s', "**S**kip and go to next question") => { 427 | println!("skipped"); 428 | } 429 | ('e', "**E**nd staging phase") => { 430 | rr.broken = true; 431 | } 432 | }); 433 | Ok(()) 434 | } 435 | 436 | -------------------------------------------------------------------------------- /src/dirs.rs: -------------------------------------------------------------------------------- 1 | use { 2 | crate::*, 3 | fnv::FnvHashMap, 4 | std::{ 5 | cmp::{Ord, Ordering, Reverse}, 6 | path::Path, 7 | }, 8 | }; 9 | 10 | #[derive(Debug)] 11 | pub struct DirsReport<'d> { 12 | pub dup_dirs: Vec>, 13 | pub brotherhoods: Vec>, 14 | pub auto_solvable_brotherhoods_count: usize, 15 | pub dir_pairs: Vec>, 16 | } 17 | 18 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] 19 | pub struct DirPairKey<'d> { 20 | pub left_dir: &'d Path, 21 | pub right_dir: &'d Path, 22 | } 23 | 24 | #[derive(Debug)] 25 | pub struct DirPair<'d> { 26 | pub key: DirPairKey<'d>, 27 | pub file_pairs: Vec, 28 | } 29 | 30 | /// a brotherhood gather duplicates having the same parent 31 | #[derive(Debug)] 32 | pub struct Brotherhood<'d> { 33 | 34 | pub parent: &'d Path, 35 | 36 | pub dup_set_idx: usize, 37 | 38 | /// file indexes 39 | pub files: Vec, 40 | 41 | /// when all files have names like "thing (copy).png", "thing (another copy).png", etc. 42 | /// except one file, we can propose an automated resolution. 43 | /// Note that we don't check the start of filenames are identical because we 44 | /// don't, in fact, care. 45 | pub is_auto_solvable: bool, 46 | } 47 | 48 | /// a directory which contains only duplicates 49 | #[derive(Debug)] 50 | pub struct DupDir<'d> { 51 | pub path: &'d Path, 52 | pub files: Vec, 53 | } 54 | 55 | impl<'d> Brotherhood<'d> { 56 | fn maybe_add_files(&mut self, a_idx: usize, b_idx: usize) { 57 | if !self.files.contains(&a_idx) { 58 | self.files.push(a_idx); 59 | } 60 | if !self.files.contains(&b_idx) { 61 | self.files.push(b_idx); 62 | } 63 | } 64 | pub fn file_ref(&self, i: usize) -> DupFileRef { 65 | DupFileRef { 66 | dup_set_idx: self.dup_set_idx, 67 | dup_file_idx: self.files[i], 68 | } 69 | } 70 | pub fn gain(&self, dups: &[DupSet]) -> u64 { 71 | (self.files.len() - 1) as u64 * dups[self.dup_set_idx].file_len 72 | } 73 | } 74 | 75 | impl<'d> DirPairKey<'d> { 76 | pub fn new(a: &'d Path, b: &'d Path) -> (Self, bool) { 77 | if a.cmp(b) == Ordering::Less { 78 | ( 79 | DirPairKey { 80 | left_dir: a, 81 | right_dir: b, 82 | }, 83 | false 84 | ) 85 | } else { 86 | ( 87 | DirPairKey { 88 | left_dir: b, 89 | right_dir: a, 90 | }, 91 | true 92 | ) 93 | } 94 | } 95 | } 96 | 97 | impl<'d> DirPair<'d> { 98 | pub fn new( 99 | key: DirPairKey<'d>, 100 | file_pairs: Vec, 101 | ) -> Self { 102 | Self { key, file_pairs } 103 | } 104 | } 105 | 106 | impl<'d> DirsReport<'d> { 107 | pub fn compute(dups: &'d[DupSet]) -> anyhow::Result { 108 | let mut brotherhoods = Vec::new(); 109 | let mut auto_solvable_brotherhoods_count = 0; 110 | let mut dp_map: FnvHashMap> = FnvHashMap::default(); 111 | let mut dir_map: FnvHashMap<&Path, Vec> = FnvHashMap::default(); 112 | let mut brotherhood_per_parent: FnvHashMap<&Path, Brotherhood<'d>> = FnvHashMap::default(); 113 | for (dup_set_idx, dup) in dups.iter().enumerate() { 114 | brotherhood_per_parent.clear(); 115 | for (a_file_idx, a) in dup.files.iter().enumerate() { 116 | let a_parent = a.path.parent().unwrap(); 117 | // adding to the dir_map 118 | dir_map.entry(a_parent) 119 | .or_default() 120 | .push(DupFileRef { dup_set_idx, dup_file_idx: a_file_idx }); 121 | 122 | // building dir pair 123 | for b_file_idx in a_file_idx+1..dup.files.len() { 124 | let b = &dup.files[b_file_idx]; 125 | let b_parent = b.path.parent().unwrap(); 126 | let (dpk, inverted) = DirPairKey::new( 127 | a_parent, 128 | b_parent, 129 | ); 130 | let (left_file_idx, right_file_idx) = if inverted { 131 | (b_file_idx, a_file_idx) 132 | } else { 133 | (a_file_idx, b_file_idx) 134 | }; 135 | if a_parent == b_parent { 136 | // brotherhood 137 | brotherhood_per_parent 138 | .entry(a_parent) 139 | .or_insert_with(|| Brotherhood { 140 | parent: a_parent, 141 | dup_set_idx, 142 | files: Vec::new(), 143 | is_auto_solvable: false, 144 | }) 145 | .maybe_add_files(left_file_idx, right_file_idx); 146 | } else { 147 | // dir_pair 148 | dp_map.entry(dpk) 149 | .or_default() 150 | .push(FilePair { 151 | dup_set_idx, 152 | left_file_idx, 153 | right_file_idx, 154 | }); 155 | } 156 | } 157 | } 158 | for (_, mut brotherhood) in brotherhood_per_parent.drain() { 159 | let copy_count = brotherhood.files 160 | .iter() 161 | .map(|&dup_file_idx| DupFileRef { 162 | dup_set_idx: brotherhood.dup_set_idx, 163 | dup_file_idx, 164 | }) 165 | .filter(|dup_file_ref| dup_file_ref.is_copy_named(dups)) 166 | .count(); 167 | if copy_count + 1 == brotherhood.files.len() { 168 | brotherhood.is_auto_solvable = true; 169 | auto_solvable_brotherhoods_count += 1; 170 | } 171 | brotherhoods.push(brotherhood); 172 | } 173 | } 174 | 175 | // we remove the parent of brotherhoods from dir_map 176 | // because we don't want them in dup_dirs 177 | for brotherhood in &brotherhoods { 178 | dir_map.remove(brotherhood.parent); 179 | } 180 | 181 | let mut dup_dirs = Vec::new(); 182 | for (path, files) in dir_map.drain() { 183 | if files.len() < 3 { 184 | // small directories aren't interesting, we'll handle 185 | // the dups by comparing dup dirs 186 | continue; 187 | } 188 | let total_child_count = path.read_dir()?.count(); 189 | if total_child_count == files.len() { 190 | dup_dirs.push(DupDir { path, files }); 191 | } 192 | } 193 | 194 | // ordering 195 | dup_dirs.sort_by_key(|dd| Reverse(dd.files.len())); 196 | brotherhoods.sort_by_key(|b| Reverse(b.gain(dups))); 197 | let mut dir_pairs: Vec<_> = dp_map 198 | .drain() 199 | .map(|(key, file_pairs)| DirPair::new(key, file_pairs)) 200 | .collect(); 201 | dir_pairs.sort_by_key(|dp| Reverse(dp.file_pairs.len())); 202 | 203 | Ok(Self { 204 | dup_dirs, 205 | brotherhoods, 206 | auto_solvable_brotherhoods_count, 207 | dir_pairs, 208 | }) 209 | } 210 | } 211 | 212 | -------------------------------------------------------------------------------- /src/dup.rs: -------------------------------------------------------------------------------- 1 | use { 2 | lazy_regex::*, 3 | std::{ 4 | collections::HashSet, 5 | path::{Path, PathBuf}, 6 | }, 7 | }; 8 | 9 | 10 | // TODO virer et utiliser PathBuf directement ? 11 | #[derive(Debug)] 12 | pub struct DupFile { 13 | pub path: PathBuf, 14 | // pub staged_for_removal: bool, 15 | } 16 | 17 | /// the list of files having a hash 18 | #[derive(Debug, Default)] 19 | pub struct DupSet { 20 | pub files: Vec, // identical files 21 | pub file_len: u64, 22 | } 23 | 24 | #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq,)] 25 | pub struct DupFileRef { 26 | pub dup_set_idx: usize, 27 | pub dup_file_idx: usize, 28 | } 29 | 30 | impl DupFile { 31 | pub fn new(path: PathBuf) -> Self { 32 | Self { 33 | path, 34 | //staged_for_removal: false, 35 | } 36 | } 37 | } 38 | 39 | pub fn reference_file<'a>( 40 | dup_set_idx: usize, 41 | dup_set: &'a DupSet, 42 | staged_removals: &HashSet, 43 | ) -> Option<&'a Path> { 44 | let mut best: Option<&Path> = None; 45 | for (dup_file_idx, file) in dup_set.files.iter().enumerate() { 46 | let path = &file.path; 47 | let dup_file_ref = DupFileRef { dup_set_idx, dup_file_idx }; 48 | if staged_removals.contains(&dup_file_ref) { 49 | continue; 50 | } 51 | if let Some(previous) = best { 52 | if previous.to_string_lossy().len() > path.to_string_lossy().len() { 53 | best = Some(path); 54 | } 55 | } else { 56 | best = Some(path); 57 | } 58 | } 59 | best 60 | } 61 | 62 | impl DupFileRef { 63 | pub fn path(self, dups: &[DupSet]) -> &Path { 64 | &dups[self.dup_set_idx].files[self.dup_file_idx].path 65 | } 66 | pub fn file_name(self, dups:&[DupSet]) -> String { 67 | self.path(dups) 68 | .file_name() 69 | .map_or_else( 70 | || "".to_string(), 71 | |n| n.to_string_lossy().to_string() 72 | ) 73 | } 74 | /// get the file name when the file has a name like "thing (3).jpg" 75 | /// or "thing (3rd copy).png" 76 | pub fn copy_name(self, dups:&[DupSet]) -> Option<&str> { 77 | copy_name(self.path(dups)) 78 | } 79 | /// tells whether the file has a name like "thing (3).jpg" 80 | /// or "thing (3rd copy).png" 81 | pub fn is_copy_named(self, dups:&[DupSet]) -> bool { 82 | self.copy_name(dups).is_some() 83 | } 84 | } 85 | 86 | /// get the name if this path is of a "copy" file, that is an usual name for a copy 87 | pub fn copy_name(path: &Path) -> Option<&str> { 88 | path 89 | .file_name() 90 | .and_then(std::ffi::OsStr::to_str) 91 | .filter(|n| regex_is_match!(r#"(?x) 92 | .+ 93 | \(( 94 | \d+ 95 | | 96 | [^)]* 97 | copy 98 | )\) 99 | (\.\w+)? 100 | $ 101 | "#, n)) 102 | } 103 | 104 | #[test] 105 | fn test_is_copy_named() { 106 | use std::path::PathBuf; 107 | let copies = &[ 108 | "/some/path/to/bla (3).jpg", 109 | "bla (3455).jpg", 110 | "uuuuu (copy).rs", 111 | "/home/dys/Images/pink hexapodes (another copy).jpeg", 112 | "~/uuuuu (copy)", 113 | "uuuuu (3rd copy)", 114 | ]; 115 | for s in copies { 116 | assert!(copy_name(&PathBuf::from(s)).is_some()); 117 | } 118 | let not_copies = &[ 119 | "copy", 120 | "copy.txt", 121 | "bla.png", 122 | "/home/dys/not a copy", 123 | "(don't copy)", 124 | ]; 125 | for s in not_copies { 126 | assert!(copy_name(&PathBuf::from(s)).is_none()); 127 | } 128 | 129 | } 130 | -------------------------------------------------------------------------------- /src/dup_report.rs: -------------------------------------------------------------------------------- 1 | use { 2 | anyhow::Result, 3 | crossbeam::channel, 4 | crate::*, 5 | fnv::FnvHashMap, 6 | minimad::*, 7 | rayon::{ 8 | prelude::ParallelIterator, 9 | iter::ParallelBridge, 10 | }, 11 | std::{ 12 | cmp::Reverse, 13 | fs, 14 | path::PathBuf, 15 | }, 16 | termimad::*, 17 | }; 18 | 19 | #[derive(Default)] 20 | pub struct DupReport { 21 | pub dups: Vec, 22 | pub seen: usize, 23 | /// number of files which could be removed 24 | /// when keeping one of each set 25 | pub duplicate_count: usize, 26 | pub duplicate_len_sum: u64, 27 | } 28 | 29 | impl DupReport { 30 | pub fn len(&self) -> usize { 31 | self.dups.len() 32 | } 33 | pub fn build( 34 | root: PathBuf, 35 | only_images: bool, 36 | ) -> Result { 37 | let (s_matching_files, r_matching_files) = channel::unbounded(); 38 | let (s_hashed_files, r_hashed_files) = channel::unbounded::<(PathBuf, FileHash)>(); 39 | let file_generator = std::thread::spawn(move||{ 40 | let mut dirs = Vec::new(); 41 | dirs.push(root); 42 | while let Some(dir) = dirs.pop() { 43 | if let Ok(entries) = fs::read_dir(&dir) { 44 | for e in entries.flatten() { 45 | let path = e.path(); 46 | let name = match path.file_name().and_then(|s| s.to_str()) { 47 | Some(s) => s, 48 | None => { continue; }, 49 | }; 50 | if name.starts_with('.') { 51 | continue; 52 | } 53 | if let Ok(md) = path.symlink_metadata() { 54 | if md.is_dir() { 55 | // we add the directory to the channel of dirs needing processing 56 | dirs.push(path); 57 | continue; 58 | } 59 | if md.is_file() { 60 | if only_images { 61 | let ext = match path.extension().and_then(|s| s.to_str()) { 62 | Some(s) => s, 63 | None => { continue; }, 64 | }; 65 | if !ext::is_image(ext) { 66 | continue; 67 | } 68 | } 69 | s_matching_files.send(path).unwrap(); 70 | } 71 | } 72 | } 73 | } 74 | } 75 | }); 76 | 77 | // parallel computation of the hashes 78 | r_matching_files.into_iter().par_bridge() 79 | .for_each_with(s_hashed_files, |s, path| { 80 | if let Ok(hash) = FileHash::new(&path) { 81 | s.send((path, hash)).unwrap(); 82 | } 83 | }); 84 | 85 | let mut map: FnvHashMap> = FnvHashMap::default(); 86 | let mut seen = 0; 87 | r_hashed_files.iter() 88 | .for_each(|(path, hash)| { 89 | let e = map.entry(hash).or_default(); 90 | e.push(DupFile::new(path)); 91 | seen += 1; 92 | }); 93 | 94 | file_generator.join().unwrap(); 95 | 96 | let mut dups = Vec::new(); 97 | let mut duplicate_count = 0; 98 | let mut duplicate_len_sum = 0; 99 | for (_hash, files) in map.drain() { 100 | if files.len() < 2 { 101 | continue; 102 | } 103 | if let Ok(md) = fs::metadata(&files[0].path) { 104 | duplicate_count += files.len() - 1; 105 | let file_len = md.len(); 106 | if file_len > 0 { 107 | duplicate_len_sum += (files.len() - 1) as u64 * file_len; 108 | dups.push(DupSet { 109 | files, 110 | file_len, 111 | }); 112 | } 113 | } 114 | } 115 | 116 | dups.sort_by_key(|dup| Reverse(dup.files.len())); 117 | 118 | Ok(Self{ 119 | dups, 120 | seen, 121 | duplicate_count, 122 | duplicate_len_sum, 123 | }) 124 | } 125 | 126 | pub fn print_summary( 127 | &self, 128 | skin: &MadSkin, 129 | ) { 130 | static MD: &str = r#" 131 | I've hashed *${seen}* files and found *${set_count}* sets of duplicates.\ 132 | *${removable_count}* files can be removed to gain **${gain}**.\ 133 | "#; 134 | let mut expander = OwningTemplateExpander::new(); 135 | expander 136 | .set("seen", self.seen) 137 | .set("set_count", self.dups.len()) 138 | .set("removable_count", self.duplicate_count) 139 | .set("gain", file_size::fit_4(self.duplicate_len_sum)); 140 | skin.print_owning_expander(&expander, &TextTemplate::from(MD)); 141 | } 142 | pub fn is_empty(&self) -> bool { 143 | self.dups.is_empty() 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /src/ext.rs: -------------------------------------------------------------------------------- 1 | use { 2 | phf::{phf_set, Set}, 3 | }; 4 | 5 | static IMAGE_EXTENSIONS: Set<&'static str> = phf_set! { 6 | "jpg", "JPG", 7 | "jpeg", "JPEG", 8 | "png", "PNG", 9 | }; 10 | 11 | pub fn is_image(ext: &str) -> bool { 12 | IMAGE_EXTENSIONS.contains(ext) 13 | } 14 | -------------------------------------------------------------------------------- /src/file_pair.rs: -------------------------------------------------------------------------------- 1 | use { 2 | crate::*, 3 | }; 4 | 5 | #[derive(Debug, Clone, Copy)] 6 | pub struct FilePair { 7 | pub dup_set_idx: usize, 8 | pub left_file_idx: usize, 9 | pub right_file_idx: usize, 10 | } 11 | 12 | impl FilePair { 13 | pub fn left_ref(self) -> DupFileRef { 14 | DupFileRef { 15 | dup_set_idx: self.dup_set_idx, 16 | dup_file_idx: self.left_file_idx, 17 | } 18 | } 19 | pub fn right_ref(self) -> DupFileRef { 20 | DupFileRef { 21 | dup_set_idx: self.dup_set_idx, 22 | dup_file_idx: self.right_file_idx, 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/hash.rs: -------------------------------------------------------------------------------- 1 | 2 | use { 3 | anyhow::Result, 4 | std::{ 5 | fs::File, 6 | io, 7 | path::Path, 8 | }, 9 | }; 10 | 11 | #[derive(Debug, PartialEq, Eq, Hash)] 12 | pub struct FileHash { 13 | hash: blake3::Hash, 14 | } 15 | 16 | impl FileHash { 17 | pub fn new>(path: P) -> Result { 18 | let mut file = File::open(path)?; 19 | let mut hasher = blake3::Hasher::new(); 20 | io::copy(&mut file, &mut hasher)?; 21 | let hash = hasher.finalize(); 22 | Ok(Self { 23 | hash, 24 | }) 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/json.rs: -------------------------------------------------------------------------------- 1 | use { 2 | serde_json::Value, 3 | std::{ 4 | fs, 5 | io::Write, 6 | path::PathBuf, 7 | }, 8 | }; 9 | 10 | fn available_path(name: &str) -> PathBuf { 11 | let mut count = 1; 12 | let ext = "json"; 13 | loop { 14 | let cmp = if count > 1 { 15 | format!("-{}", count) 16 | } else { 17 | "".to_string() 18 | }; 19 | let file_name = format!( 20 | "{}-{}{}.{}", 21 | chrono::Local::now().format("%F-%Hh%M"), 22 | name, 23 | cmp, 24 | ext, 25 | ); 26 | let path = PathBuf::from(file_name); 27 | if !path.exists() { 28 | return path; 29 | } 30 | count += 1; 31 | } 32 | } 33 | 34 | /// write a JSON value in a file whose name will be based on the provided 35 | /// name, with a date and if necessary with an additional number to avoid 36 | /// collision. 37 | pub fn write_in_file( 38 | name: &str, 39 | value: &Value, 40 | ) -> anyhow::Result { 41 | let path = available_path(name); 42 | let mut file = fs::File::create(&path)?; 43 | let json = serde_json::to_string_pretty(value)?; 44 | writeln!(&mut file, "{}", json)?; 45 | Ok(path) 46 | } 47 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] extern crate cli_log; 2 | 3 | pub mod args; 4 | pub mod ask; 5 | pub mod dirs; 6 | pub mod dup; 7 | pub mod dup_report; 8 | pub mod ext; 9 | pub mod file_pair; 10 | pub mod hash; 11 | mod json; 12 | pub mod removal_report; 13 | 14 | pub use { 15 | args::*, 16 | ask::*, 17 | dirs::*, 18 | dup::*, 19 | dup_report::*, 20 | file_pair::*, 21 | ext::*, 22 | hash::*, 23 | json::*, 24 | removal_report::*, 25 | }; 26 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] extern crate cli_log; 2 | 3 | use { 4 | backdown::*, 5 | anyhow::Result, 6 | crossterm::style::{Attribute::*, Color::*}, 7 | termimad::*, 8 | }; 9 | 10 | fn run_app() -> Result<()> { 11 | let args: Args = argh::from_env(); 12 | if args.version { 13 | println!("backdown {}", env!("CARGO_PKG_VERSION")); 14 | return Ok(()); 15 | } 16 | let root = args.path 17 | .unwrap_or_else(|| std::env::current_dir().unwrap()); 18 | let skin = make_skin(); 19 | info!("root: {:?}", &root); 20 | skin.print_text("\n# Phase 1) Analysis"); 21 | mad_print_inline!(skin, "Analyzing directory *$0*...\n", root.to_string_lossy()); 22 | let dup_report = time!( 23 | Info, 24 | "computing dup sets", 25 | DupReport::build(root, args.only_images)?, 26 | ); 27 | dup_report.print_summary(&skin); 28 | if dup_report.is_empty() { 29 | println!("There's nothing to remove"); 30 | return Ok(()); 31 | } 32 | let dirs_report = time!( 33 | Info, 34 | "computing dirs report", 35 | DirsReport::compute(&dup_report.dups)?, 36 | ); 37 | skin.print_text("\n# Phase 2) Staging: choose files to remove"); 38 | let rr = ask_on_dirs(&dirs_report, &dup_report.dups, &skin)?; 39 | if rr.is_empty() || rr.quit { 40 | return Ok(()); 41 | } 42 | skin.print_text("\n# Phase 3) Review and confirm removals"); 43 | let mut exported = false; 44 | loop { 45 | let mut question = Question::new("What do you want to do now?"); 46 | question.add_answer('s', "Review touched **s**ets of identical files"); 47 | if !exported { 48 | question.add_answer( 49 | 'j', 50 | "Export touched sets of identical files in a **J**SON file", 51 | ); 52 | } 53 | question.add_answer('f', "Review all **f**iles staged for removal"); 54 | question.add_answer('r', "Do the **r**emovals now"); 55 | #[cfg(unix)] 56 | question.add_answer('l', "Replace removed files with **l**inks"); 57 | question.add_answer('q', "**Q**uit *backdown*, removing nothing"); 58 | match question.ask(&skin)?.as_ref() { 59 | "s" => { 60 | rr.list_dup_sets(&dup_report.dups, &skin); 61 | } 62 | "j" => { 63 | let value = rr.dup_sets_as_json(&dup_report.dups); 64 | let path = write_in_file("backdown-report", &value)?; 65 | mad_print_inline!(skin, "Wrote *$0*\n", path.to_string_lossy()); 66 | exported = true; 67 | } 68 | "f" => { 69 | rr.list_staged_removals(&dup_report.dups, &skin); 70 | } 71 | "r" => { 72 | rr.do_the_removal(&dup_report.dups, &skin)?; 73 | break; 74 | } 75 | "l" => { 76 | #[cfg(unix)] 77 | rr.replace_staged_with_links(&dup_report.dups, &skin)?; 78 | break; 79 | } 80 | "q" => { 81 | break; 82 | } 83 | _ => {} // should not happen 84 | } 85 | } 86 | Ok(()) 87 | } 88 | 89 | fn main() { 90 | init_cli_log!(); 91 | if let Err(e) = run_app() { 92 | eprintln!("{}", e); 93 | } 94 | info!("bye"); 95 | } 96 | 97 | fn make_skin() -> MadSkin { 98 | let mut skin = MadSkin::default(); 99 | skin.table.align = Alignment::Left; 100 | skin.headers[0].align = Alignment::Left; 101 | skin.set_headers_fg(AnsiValue(178)); 102 | skin.bold.set_fg(Yellow); 103 | skin.italic.set_fg(AnsiValue(204)); 104 | skin.italic.remove_attr(Italic); 105 | skin.scrollbar.thumb.set_fg(AnsiValue(178)); 106 | skin.code_block.align = Alignment::Center; 107 | skin 108 | } 109 | -------------------------------------------------------------------------------- /src/removal_report.rs: -------------------------------------------------------------------------------- 1 | use { 2 | crate::*, 3 | minimad::*, 4 | serde_json::{json, Value}, 5 | std::{ 6 | collections::{HashMap, HashSet}, 7 | fs, 8 | path::Path, 9 | }, 10 | termimad::*, 11 | }; 12 | 13 | #[derive(Debug, Clone, Default)] 14 | pub struct RemovalReport<'d> { 15 | pub dup_sets_with_staged: HashSet, 16 | pub staged_removals: HashSet, 17 | pub staged_dir_removals: Vec<&'d Path>, 18 | pub len_to_remove: u64, 19 | pub broken: bool, 20 | pub quit: bool, 21 | } 22 | 23 | 24 | impl<'d> RemovalReport<'d> { 25 | 26 | pub fn stage_file(&mut self, dup_file_ref: DupFileRef, dups: &[DupSet]) { 27 | self.len_to_remove += dups[dup_file_ref.dup_set_idx].file_len; 28 | self.dup_sets_with_staged.insert(dup_file_ref.dup_set_idx); 29 | self.staged_removals.insert(dup_file_ref); 30 | // println!("staged {:?}", &dups[dup_file_ref.dup_set_idx].files[dup_file_ref.dup_file_idx].path); 31 | } 32 | 33 | pub fn is_empty(&self) -> bool { 34 | self.staged_removals.is_empty() 35 | } 36 | 37 | pub fn list_staged_removals( 38 | &self, 39 | dups: &[DupSet], 40 | skin: &MadSkin, 41 | ) { 42 | mad_print_inline!( 43 | skin, 44 | "**$0** files planned for removal for a total size of **$1**:\n", 45 | self.staged_removals.len(), 46 | file_size::fit_4(self.len_to_remove), 47 | ); 48 | for (idx, file_ref) in self.staged_removals.iter().enumerate() { 49 | let path = file_ref.path(dups); 50 | let size = dups[file_ref.dup_set_idx].file_len; 51 | mad_print_inline!( 52 | skin, 53 | "#$0 : *$1* (**$2**)\n", 54 | idx + 1, 55 | path.to_string_lossy(), 56 | file_size::fit_4(size), 57 | ); 58 | } 59 | } 60 | 61 | /// write the report as a JSON file 62 | pub fn dup_sets_as_json( 63 | &self, 64 | dups: &[DupSet], 65 | ) -> Value { 66 | json!({ 67 | "len_to_remove": self.len_to_remove, 68 | "dup_sets": dups.iter().enumerate() 69 | .filter_map(|(dup_set_idx, dup_set)| { 70 | if !self.dup_sets_with_staged.contains(&dup_set_idx) { 71 | return None; 72 | } 73 | Some(json!({ 74 | "file_len": dup_set.file_len, 75 | "files": dup_set.files.iter() 76 | .enumerate() 77 | .map(|(dup_file_idx, file)| { 78 | let file = file.path.to_string_lossy().to_string(); 79 | let file_ref = DupFileRef { dup_set_idx, dup_file_idx }; 80 | let action = if self.staged_removals.contains(&file_ref) { 81 | "remove" 82 | } else { 83 | "keep" 84 | }; 85 | (file, action) 86 | }) 87 | .collect::>() 88 | })) 89 | }) 90 | .collect::>(), 91 | }) 92 | } 93 | 94 | pub fn list_dup_sets( 95 | &self, 96 | dups: &[DupSet], 97 | skin: &MadSkin, 98 | ) { 99 | static MD: &str = r#" 100 | |:-|:-| 101 | |Set #*${set_num}* : each file is **${file_len}**|action| 102 | |:-|:-:| 103 | ${files 104 | |${path}|**${action}**| 105 | } 106 | |- 107 | "#; 108 | let template = TextTemplate::from(MD); 109 | for (dup_set_idx, dup_set) in dups.iter().enumerate() { 110 | if !self.dup_sets_with_staged.contains(&dup_set_idx) { 111 | continue; 112 | } 113 | let mut expander = OwningTemplateExpander::new(); 114 | expander 115 | .set("set_num", dup_set_idx + 1) 116 | .set("file_len", file_size::fit_4(dup_set.file_len)); 117 | for (dup_file_idx, file) in dup_set.files.iter().enumerate() { 118 | let file_ref = DupFileRef { dup_set_idx, dup_file_idx }; 119 | expander.sub("files") 120 | .set("path", file.path.to_string_lossy()) 121 | .set_md( 122 | "action", 123 | if self.staged_removals.contains(&file_ref) { 124 | "*remove*" 125 | } else { 126 | "keep" 127 | } 128 | ); 129 | } 130 | skin.print_owning_expander(&expander, &template); 131 | } 132 | } 133 | 134 | /// "Normally" the algorithms of backdown never remove all files 135 | /// in a set of identical files. But if I change those algorithms 136 | /// and make them more complex, I may make an error. So this 137 | /// function will check there's at least one kept file in each 138 | /// touched set, and will raise an error if a set is totally 139 | /// emptied. 140 | /// This *must* be called just before starting the real removals. 141 | pub fn check_no_emptied_set( 142 | &self, 143 | dups: &[DupSet], 144 | ) -> anyhow::Result<()> { 145 | for (dup_set_idx, dup_set) in dups.iter().enumerate() { 146 | let mut staged_count = 0; 147 | for dup_file_idx in 0..dup_set.files.len() { 148 | if self.staged_removals.contains(&DupFileRef{ dup_set_idx, dup_file_idx }) { 149 | staged_count += 1; 150 | } 151 | } 152 | if staged_count >= dup_set.files.len() { 153 | anyhow::bail!("We staged all files in set for removal! Abort!"); 154 | } 155 | } 156 | Ok(()) 157 | } 158 | 159 | #[cfg(unix)] 160 | pub fn replace_staged_with_links( 161 | &self, 162 | dups: &[DupSet], 163 | skin: &MadSkin, 164 | ) -> anyhow::Result<()> { 165 | use std::os::unix::fs::symlink; 166 | self.check_no_emptied_set(dups)?; 167 | skin.print_text("\n# Phase 4) Replace staged duplicates with links"); 168 | println!("Replacing..."); 169 | let mut removed_len = 0; 170 | let mut removed_count = 0; 171 | // file removals 172 | for dup_file_ref in &self.staged_removals { 173 | let dup_set = &dups[dup_file_ref.dup_set_idx]; 174 | let path = dup_file_ref.path(dups); 175 | let link_destination = match reference_file(dup_file_ref.dup_set_idx, dup_set, &self.staged_removals) { 176 | Some(p) => p, 177 | None => { 178 | anyhow::bail!("unexpected lack of kept file in dup set"); 179 | } 180 | }; 181 | let link_destination = link_destination.canonicalize()?; 182 | match fs::remove_file(path) { 183 | Ok(()) => { 184 | removed_count += 1; 185 | removed_len += dups[dup_file_ref.dup_set_idx].file_len; 186 | match symlink(&link_destination, path) { 187 | Ok(()) => { 188 | // println!("link {:?} -> {:?}", path, link_destination); 189 | } 190 | Err(e) => { 191 | mad_print_inline!( 192 | skin, 193 | " Failed to remove create link *$1* -> *$2* : $3\n", 194 | path.to_string_lossy(), 195 | link_destination.to_string_lossy(), 196 | e, 197 | ); 198 | } 199 | } 200 | } 201 | Err(e) => { 202 | mad_print_inline!( 203 | skin, 204 | " Failed to remove *$1* : $2\n", 205 | path.to_string_lossy(), 206 | e, 207 | ); 208 | } 209 | } 210 | } 211 | mad_print_inline!( 212 | skin, 213 | "Removed *$0* files with a total size of **$1**\n", 214 | removed_count, 215 | file_size::fit_4(removed_len), 216 | ); 217 | Ok(()) 218 | } 219 | 220 | pub fn do_the_removal( 221 | &self, 222 | dups: &[DupSet], 223 | skin: &MadSkin, 224 | ) -> anyhow::Result<()> { 225 | self.check_no_emptied_set(dups)?; 226 | skin.print_text("\n# Phase 4) Removal"); 227 | println!("Removing..."); 228 | let mut removed_len = 0; 229 | let mut removed_count = 0; 230 | // file removals 231 | for dup_file_ref in &self.staged_removals { 232 | let path = dup_file_ref.path(dups); 233 | match fs::remove_file(path) { 234 | Ok(()) => { 235 | removed_count += 1; 236 | removed_len += dups[dup_file_ref.dup_set_idx].file_len; 237 | } 238 | Err(e) => { 239 | mad_print_inline!( 240 | skin, 241 | " Failed to remove *$1* : $2\n", 242 | path.to_string_lossy(), 243 | e, 244 | ); 245 | } 246 | } 247 | } 248 | // directory removals 249 | for path in &self.staged_dir_removals { 250 | debug!("removing {:?}", path); 251 | if let Err(e) = fs::remove_dir(path) { 252 | mad_print_inline!( 253 | skin, 254 | " Failed to remove directory *$1* : $2\n", 255 | path.to_string_lossy(), 256 | e, 257 | ); 258 | } 259 | } 260 | mad_print_inline!( 261 | skin, 262 | "Removed *$0* files with a total size of **$1**\n", 263 | removed_count, 264 | file_size::fit_4(removed_len), 265 | ); 266 | Ok(()) 267 | } 268 | } 269 | --------------------------------------------------------------------------------