├── .github
├── FUNDING.yml
└── workflows
│ └── rust.yml
├── .gitignore
├── CHANGELOG.md
├── Cargo.lock
├── Cargo.toml
├── LICENSE
├── README.md
├── compile-all-targets.sh
├── deploy.sh
├── doc
├── screen-1.png
├── screen-2.png
├── screen-3.png
├── screen-4.png
└── screen-5.png
├── release.sh
└── src
├── args.rs
├── ask.rs
├── dirs.rs
├── dup.rs
├── dup_report.rs
├── ext.rs
├── file_pair.rs
├── hash.rs
├── json.rs
├── lib.rs
├── main.rs
└── removal_report.rs
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: [Canop]
2 |
--------------------------------------------------------------------------------
/.github/workflows/rust.yml:
--------------------------------------------------------------------------------
1 | name: Rust
2 |
3 | on:
4 | push:
5 | branches: [ "master" ]
6 | pull_request:
7 | branches: [ "master" ]
8 |
9 | env:
10 | CARGO_TERM_COLOR: always
11 |
12 | jobs:
13 | build:
14 |
15 | runs-on: ubuntu-latest
16 |
17 | steps:
18 | - uses: actions/checkout@v3
19 | - name: Build
20 | run: cargo build --verbose
21 | - name: Run tests
22 | run: cargo test --verbose
23 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /build
3 | /pub
4 | /releases
5 | /trav*
6 | *.zip
7 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 |
2 | ### v1.1.2 - 2024-09-10
3 | - sort files by name - Thanks @nc7s
4 |
5 |
6 | ### v1.1.1 - 2023-07-01
7 | - updated dependencies
8 | - stripped binary (smaller)
9 |
10 |
11 | ### v1.1.0 - 2021-12-05
12 | - option to replace staged files with symlinks (unix only) - Fix #2
13 |
14 |
15 | ### v1.0.1 - 2021-12-05
16 | - option to write the report in a JSON file after staging phase - Fix #3
17 |
18 |
19 | ### v1.0.0 - 2021-10-02
20 | No reason not to call this a 1.0
21 |
22 |
23 | ### v0.2.1 - 2021-07-14
24 | - backdown logs a few things. To have log generated launch backdown with `BACKDOWN_LOG=debug backdown your/dir`
25 | - change hash algorithm from SHA-256 to BLAKE3, which is slightly faster with same guarantees
26 |
27 |
28 | ### v0.2.0 - 2021-07-12
29 | - backdown proposes to remove in 1 question all duplicates with name like "thing (2).AVI" or "thing (3rd copy).png" when they're in the same directory than the "source"
30 |
31 |
32 | ### v0.1.0 - 2021-07-11
33 | - first public release
34 |
--------------------------------------------------------------------------------
/Cargo.lock:
--------------------------------------------------------------------------------
1 | # This file is automatically @generated by Cargo.
2 | # It is not intended for manual editing.
3 | version = 3
4 |
5 | [[package]]
6 | name = "aho-corasick"
7 | version = "1.1.3"
8 | source = "registry+https://github.com/rust-lang/crates.io-index"
9 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
10 | dependencies = [
11 | "memchr",
12 | ]
13 |
14 | [[package]]
15 | name = "anyhow"
16 | version = "1.0.49"
17 | source = "registry+https://github.com/rust-lang/crates.io-index"
18 | checksum = "0a03e93e97a28fbc9f42fbc5ba0886a3c67eb637b476dbee711f80a6ffe8223d"
19 |
20 | [[package]]
21 | name = "argh"
22 | version = "0.1.5"
23 | source = "registry+https://github.com/rust-lang/crates.io-index"
24 | checksum = "2e7317a549bc17c5278d9e72bb6e62c6aa801ac2567048e39ebc1c194249323e"
25 | dependencies = [
26 | "argh_derive",
27 | "argh_shared",
28 | ]
29 |
30 | [[package]]
31 | name = "argh_derive"
32 | version = "0.1.5"
33 | source = "registry+https://github.com/rust-lang/crates.io-index"
34 | checksum = "60949c42375351e9442e354434b0cba2ac402c1237edf673cac3a4bf983b8d3c"
35 | dependencies = [
36 | "argh_shared",
37 | "heck",
38 | "proc-macro2",
39 | "quote",
40 | "syn 1.0.73",
41 | ]
42 |
43 | [[package]]
44 | name = "argh_shared"
45 | version = "0.1.5"
46 | source = "registry+https://github.com/rust-lang/crates.io-index"
47 | checksum = "8a61eb019cb8f415d162cb9f12130ee6bbe9168b7d953c17f4ad049e4051ca00"
48 |
49 | [[package]]
50 | name = "arrayref"
51 | version = "0.3.6"
52 | source = "registry+https://github.com/rust-lang/crates.io-index"
53 | checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544"
54 |
55 | [[package]]
56 | name = "arrayvec"
57 | version = "0.7.4"
58 | source = "registry+https://github.com/rust-lang/crates.io-index"
59 | checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
60 |
61 | [[package]]
62 | name = "autocfg"
63 | version = "1.3.0"
64 | source = "registry+https://github.com/rust-lang/crates.io-index"
65 | checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0"
66 |
67 | [[package]]
68 | name = "backdown"
69 | version = "1.1.2"
70 | dependencies = [
71 | "anyhow",
72 | "argh",
73 | "blake3",
74 | "chrono",
75 | "cli-log",
76 | "crossbeam",
77 | "file-size",
78 | "fnv",
79 | "lazy-regex",
80 | "phf",
81 | "rayon",
82 | "serde",
83 | "serde_json",
84 | "termimad",
85 | ]
86 |
87 | [[package]]
88 | name = "bitflags"
89 | version = "2.6.0"
90 | source = "registry+https://github.com/rust-lang/crates.io-index"
91 | checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
92 |
93 | [[package]]
94 | name = "blake3"
95 | version = "1.4.0"
96 | source = "registry+https://github.com/rust-lang/crates.io-index"
97 | checksum = "729b71f35bd3fa1a4c86b85d32c8b9069ea7fe14f7a53cfabb65f62d4265b888"
98 | dependencies = [
99 | "arrayref",
100 | "arrayvec",
101 | "cc",
102 | "cfg-if",
103 | "constant_time_eq",
104 | "digest",
105 | ]
106 |
107 | [[package]]
108 | name = "block-buffer"
109 | version = "0.10.4"
110 | source = "registry+https://github.com/rust-lang/crates.io-index"
111 | checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
112 | dependencies = [
113 | "generic-array",
114 | ]
115 |
116 | [[package]]
117 | name = "cc"
118 | version = "1.0.69"
119 | source = "registry+https://github.com/rust-lang/crates.io-index"
120 | checksum = "e70cc2f62c6ce1868963827bd677764c62d07c3d9a3e1fb1177ee1a9ab199eb2"
121 |
122 | [[package]]
123 | name = "cfg-if"
124 | version = "1.0.0"
125 | source = "registry+https://github.com/rust-lang/crates.io-index"
126 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
127 |
128 | [[package]]
129 | name = "chrono"
130 | version = "0.4.19"
131 | source = "registry+https://github.com/rust-lang/crates.io-index"
132 | checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73"
133 | dependencies = [
134 | "libc",
135 | "num-integer",
136 | "num-traits",
137 | "time",
138 | "winapi",
139 | ]
140 |
141 | [[package]]
142 | name = "cli-log"
143 | version = "2.0.0"
144 | source = "registry+https://github.com/rust-lang/crates.io-index"
145 | checksum = "3d2ab00dc4c82ec28af25ac085aecc11ffeabf353755715a3113a7aa044ca5cc"
146 | dependencies = [
147 | "chrono",
148 | "file-size",
149 | "log",
150 | "proc-status",
151 | ]
152 |
153 | [[package]]
154 | name = "constant_time_eq"
155 | version = "0.2.6"
156 | source = "registry+https://github.com/rust-lang/crates.io-index"
157 | checksum = "21a53c0a4d288377e7415b53dcfc3c04da5cdc2cc95c8d5ac178b58f0b861ad6"
158 |
159 | [[package]]
160 | name = "coolor"
161 | version = "1.0.0"
162 | source = "registry+https://github.com/rust-lang/crates.io-index"
163 | checksum = "691defa50318376447a73ced869862baecfab35f6aabaa91a4cd726b315bfe1a"
164 | dependencies = [
165 | "crossterm",
166 | ]
167 |
168 | [[package]]
169 | name = "crokey"
170 | version = "1.1.0"
171 | source = "registry+https://github.com/rust-lang/crates.io-index"
172 | checksum = "520e83558f4c008ac06fa6a86e5c1d4357be6f994cce7434463ebcdaadf47bb1"
173 | dependencies = [
174 | "crokey-proc_macros",
175 | "crossterm",
176 | "once_cell",
177 | "serde",
178 | "strict",
179 | ]
180 |
181 | [[package]]
182 | name = "crokey-proc_macros"
183 | version = "1.1.0"
184 | source = "registry+https://github.com/rust-lang/crates.io-index"
185 | checksum = "370956e708a1ce65fe4ac5bb7185791e0ece7485087f17736d54a23a0895049f"
186 | dependencies = [
187 | "crossterm",
188 | "proc-macro2",
189 | "quote",
190 | "strict",
191 | "syn 1.0.73",
192 | ]
193 |
194 | [[package]]
195 | name = "crossbeam"
196 | version = "0.8.1"
197 | source = "registry+https://github.com/rust-lang/crates.io-index"
198 | checksum = "4ae5588f6b3c3cb05239e90bd110f257254aecd01e4635400391aeae07497845"
199 | dependencies = [
200 | "cfg-if",
201 | "crossbeam-channel",
202 | "crossbeam-deque",
203 | "crossbeam-epoch",
204 | "crossbeam-queue",
205 | "crossbeam-utils",
206 | ]
207 |
208 | [[package]]
209 | name = "crossbeam-channel"
210 | version = "0.5.1"
211 | source = "registry+https://github.com/rust-lang/crates.io-index"
212 | checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4"
213 | dependencies = [
214 | "cfg-if",
215 | "crossbeam-utils",
216 | ]
217 |
218 | [[package]]
219 | name = "crossbeam-deque"
220 | version = "0.8.0"
221 | source = "registry+https://github.com/rust-lang/crates.io-index"
222 | checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9"
223 | dependencies = [
224 | "cfg-if",
225 | "crossbeam-epoch",
226 | "crossbeam-utils",
227 | ]
228 |
229 | [[package]]
230 | name = "crossbeam-epoch"
231 | version = "0.9.5"
232 | source = "registry+https://github.com/rust-lang/crates.io-index"
233 | checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd"
234 | dependencies = [
235 | "cfg-if",
236 | "crossbeam-utils",
237 | "lazy_static",
238 | "memoffset",
239 | "scopeguard",
240 | ]
241 |
242 | [[package]]
243 | name = "crossbeam-queue"
244 | version = "0.3.2"
245 | source = "registry+https://github.com/rust-lang/crates.io-index"
246 | checksum = "9b10ddc024425c88c2ad148c1b0fd53f4c6d38db9697c9f1588381212fa657c9"
247 | dependencies = [
248 | "cfg-if",
249 | "crossbeam-utils",
250 | ]
251 |
252 | [[package]]
253 | name = "crossbeam-utils"
254 | version = "0.8.5"
255 | source = "registry+https://github.com/rust-lang/crates.io-index"
256 | checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db"
257 | dependencies = [
258 | "cfg-if",
259 | "lazy_static",
260 | ]
261 |
262 | [[package]]
263 | name = "crossterm"
264 | version = "0.28.1"
265 | source = "registry+https://github.com/rust-lang/crates.io-index"
266 | checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6"
267 | dependencies = [
268 | "bitflags",
269 | "crossterm_winapi",
270 | "mio",
271 | "parking_lot",
272 | "rustix",
273 | "signal-hook",
274 | "signal-hook-mio",
275 | "winapi",
276 | ]
277 |
278 | [[package]]
279 | name = "crossterm_winapi"
280 | version = "0.9.1"
281 | source = "registry+https://github.com/rust-lang/crates.io-index"
282 | checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b"
283 | dependencies = [
284 | "winapi",
285 | ]
286 |
287 | [[package]]
288 | name = "crypto-common"
289 | version = "0.1.6"
290 | source = "registry+https://github.com/rust-lang/crates.io-index"
291 | checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
292 | dependencies = [
293 | "generic-array",
294 | "typenum",
295 | ]
296 |
297 | [[package]]
298 | name = "digest"
299 | version = "0.10.7"
300 | source = "registry+https://github.com/rust-lang/crates.io-index"
301 | checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
302 | dependencies = [
303 | "block-buffer",
304 | "crypto-common",
305 | "subtle",
306 | ]
307 |
308 | [[package]]
309 | name = "either"
310 | version = "1.6.1"
311 | source = "registry+https://github.com/rust-lang/crates.io-index"
312 | checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
313 |
314 | [[package]]
315 | name = "errno"
316 | version = "0.3.9"
317 | source = "registry+https://github.com/rust-lang/crates.io-index"
318 | checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba"
319 | dependencies = [
320 | "libc",
321 | "windows-sys",
322 | ]
323 |
324 | [[package]]
325 | name = "file-size"
326 | version = "1.0.3"
327 | source = "registry+https://github.com/rust-lang/crates.io-index"
328 | checksum = "9544f10105d33957765016b8a9baea7e689bf1f0f2f32c2fa2f568770c38d2b3"
329 |
330 | [[package]]
331 | name = "fnv"
332 | version = "1.0.7"
333 | source = "registry+https://github.com/rust-lang/crates.io-index"
334 | checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
335 |
336 | [[package]]
337 | name = "generic-array"
338 | version = "0.14.4"
339 | source = "registry+https://github.com/rust-lang/crates.io-index"
340 | checksum = "501466ecc8a30d1d3b7fc9229b122b2ce8ed6e9d9223f1138d4babb253e51817"
341 | dependencies = [
342 | "typenum",
343 | "version_check",
344 | ]
345 |
346 | [[package]]
347 | name = "heck"
348 | version = "0.3.3"
349 | source = "registry+https://github.com/rust-lang/crates.io-index"
350 | checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
351 | dependencies = [
352 | "unicode-segmentation",
353 | ]
354 |
355 | [[package]]
356 | name = "hermit-abi"
357 | version = "0.1.19"
358 | source = "registry+https://github.com/rust-lang/crates.io-index"
359 | checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
360 | dependencies = [
361 | "libc",
362 | ]
363 |
364 | [[package]]
365 | name = "hermit-abi"
366 | version = "0.3.9"
367 | source = "registry+https://github.com/rust-lang/crates.io-index"
368 | checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
369 |
370 | [[package]]
371 | name = "itoa"
372 | version = "0.4.8"
373 | source = "registry+https://github.com/rust-lang/crates.io-index"
374 | checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
375 |
376 | [[package]]
377 | name = "lazy-regex"
378 | version = "3.3.0"
379 | source = "registry+https://github.com/rust-lang/crates.io-index"
380 | checksum = "8d8e41c97e6bc7ecb552016274b99fbb5d035e8de288c582d9b933af6677bfda"
381 | dependencies = [
382 | "lazy-regex-proc_macros",
383 | "once_cell",
384 | "regex",
385 | ]
386 |
387 | [[package]]
388 | name = "lazy-regex-proc_macros"
389 | version = "3.3.0"
390 | source = "registry+https://github.com/rust-lang/crates.io-index"
391 | checksum = "76e1d8b05d672c53cb9c7b920bbba8783845ae4f0b076e02a3db1d02c81b4163"
392 | dependencies = [
393 | "proc-macro2",
394 | "quote",
395 | "regex",
396 | "syn 2.0.22",
397 | ]
398 |
399 | [[package]]
400 | name = "lazy_static"
401 | version = "1.4.0"
402 | source = "registry+https://github.com/rust-lang/crates.io-index"
403 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
404 |
405 | [[package]]
406 | name = "libc"
407 | version = "0.2.158"
408 | source = "registry+https://github.com/rust-lang/crates.io-index"
409 | checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439"
410 |
411 | [[package]]
412 | name = "linux-raw-sys"
413 | version = "0.4.14"
414 | source = "registry+https://github.com/rust-lang/crates.io-index"
415 | checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
416 |
417 | [[package]]
418 | name = "lock_api"
419 | version = "0.4.12"
420 | source = "registry+https://github.com/rust-lang/crates.io-index"
421 | checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
422 | dependencies = [
423 | "autocfg",
424 | "scopeguard",
425 | ]
426 |
427 | [[package]]
428 | name = "log"
429 | version = "0.4.14"
430 | source = "registry+https://github.com/rust-lang/crates.io-index"
431 | checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
432 | dependencies = [
433 | "cfg-if",
434 | ]
435 |
436 | [[package]]
437 | name = "memchr"
438 | version = "2.7.4"
439 | source = "registry+https://github.com/rust-lang/crates.io-index"
440 | checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
441 |
442 | [[package]]
443 | name = "memoffset"
444 | version = "0.6.4"
445 | source = "registry+https://github.com/rust-lang/crates.io-index"
446 | checksum = "59accc507f1338036a0477ef61afdae33cde60840f4dfe481319ce3ad116ddf9"
447 | dependencies = [
448 | "autocfg",
449 | ]
450 |
451 | [[package]]
452 | name = "minimad"
453 | version = "0.13.1"
454 | source = "registry+https://github.com/rust-lang/crates.io-index"
455 | checksum = "a9c5d708226d186590a7b6d4a9780e2bdda5f689e0d58cd17012a298efd745d2"
456 | dependencies = [
457 | "once_cell",
458 | ]
459 |
460 | [[package]]
461 | name = "mio"
462 | version = "1.0.2"
463 | source = "registry+https://github.com/rust-lang/crates.io-index"
464 | checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec"
465 | dependencies = [
466 | "hermit-abi 0.3.9",
467 | "libc",
468 | "log",
469 | "wasi 0.11.0+wasi-snapshot-preview1",
470 | "windows-sys",
471 | ]
472 |
473 | [[package]]
474 | name = "num-integer"
475 | version = "0.1.44"
476 | source = "registry+https://github.com/rust-lang/crates.io-index"
477 | checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db"
478 | dependencies = [
479 | "autocfg",
480 | "num-traits",
481 | ]
482 |
483 | [[package]]
484 | name = "num-traits"
485 | version = "0.2.14"
486 | source = "registry+https://github.com/rust-lang/crates.io-index"
487 | checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
488 | dependencies = [
489 | "autocfg",
490 | ]
491 |
492 | [[package]]
493 | name = "num_cpus"
494 | version = "1.13.0"
495 | source = "registry+https://github.com/rust-lang/crates.io-index"
496 | checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3"
497 | dependencies = [
498 | "hermit-abi 0.1.19",
499 | "libc",
500 | ]
501 |
502 | [[package]]
503 | name = "once_cell"
504 | version = "1.19.0"
505 | source = "registry+https://github.com/rust-lang/crates.io-index"
506 | checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
507 |
508 | [[package]]
509 | name = "parking_lot"
510 | version = "0.12.3"
511 | source = "registry+https://github.com/rust-lang/crates.io-index"
512 | checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27"
513 | dependencies = [
514 | "lock_api",
515 | "parking_lot_core",
516 | ]
517 |
518 | [[package]]
519 | name = "parking_lot_core"
520 | version = "0.9.10"
521 | source = "registry+https://github.com/rust-lang/crates.io-index"
522 | checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8"
523 | dependencies = [
524 | "cfg-if",
525 | "libc",
526 | "redox_syscall",
527 | "smallvec",
528 | "windows-targets",
529 | ]
530 |
531 | [[package]]
532 | name = "phf"
533 | version = "0.11.2"
534 | source = "registry+https://github.com/rust-lang/crates.io-index"
535 | checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc"
536 | dependencies = [
537 | "phf_macros",
538 | "phf_shared",
539 | ]
540 |
541 | [[package]]
542 | name = "phf_generator"
543 | version = "0.11.2"
544 | source = "registry+https://github.com/rust-lang/crates.io-index"
545 | checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0"
546 | dependencies = [
547 | "phf_shared",
548 | "rand",
549 | ]
550 |
551 | [[package]]
552 | name = "phf_macros"
553 | version = "0.11.2"
554 | source = "registry+https://github.com/rust-lang/crates.io-index"
555 | checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b"
556 | dependencies = [
557 | "phf_generator",
558 | "phf_shared",
559 | "proc-macro2",
560 | "quote",
561 | "syn 2.0.22",
562 | ]
563 |
564 | [[package]]
565 | name = "phf_shared"
566 | version = "0.11.2"
567 | source = "registry+https://github.com/rust-lang/crates.io-index"
568 | checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b"
569 | dependencies = [
570 | "siphasher",
571 | ]
572 |
573 | [[package]]
574 | name = "proc-macro2"
575 | version = "1.0.63"
576 | source = "registry+https://github.com/rust-lang/crates.io-index"
577 | checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb"
578 | dependencies = [
579 | "unicode-ident",
580 | ]
581 |
582 | [[package]]
583 | name = "proc-status"
584 | version = "0.1.1"
585 | source = "registry+https://github.com/rust-lang/crates.io-index"
586 | checksum = "f0e0c0ac915e7b76b47850ba4ffc377abde6c6ff9eeace61d0a89623db449712"
587 | dependencies = [
588 | "thiserror",
589 | ]
590 |
591 | [[package]]
592 | name = "quote"
593 | version = "1.0.29"
594 | source = "registry+https://github.com/rust-lang/crates.io-index"
595 | checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105"
596 | dependencies = [
597 | "proc-macro2",
598 | ]
599 |
600 | [[package]]
601 | name = "rand"
602 | version = "0.8.5"
603 | source = "registry+https://github.com/rust-lang/crates.io-index"
604 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
605 | dependencies = [
606 | "rand_core",
607 | ]
608 |
609 | [[package]]
610 | name = "rand_core"
611 | version = "0.6.4"
612 | source = "registry+https://github.com/rust-lang/crates.io-index"
613 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
614 |
615 | [[package]]
616 | name = "rayon"
617 | version = "1.5.1"
618 | source = "registry+https://github.com/rust-lang/crates.io-index"
619 | checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90"
620 | dependencies = [
621 | "autocfg",
622 | "crossbeam-deque",
623 | "either",
624 | "rayon-core",
625 | ]
626 |
627 | [[package]]
628 | name = "rayon-core"
629 | version = "1.9.1"
630 | source = "registry+https://github.com/rust-lang/crates.io-index"
631 | checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e"
632 | dependencies = [
633 | "crossbeam-channel",
634 | "crossbeam-deque",
635 | "crossbeam-utils",
636 | "lazy_static",
637 | "num_cpus",
638 | ]
639 |
640 | [[package]]
641 | name = "redox_syscall"
642 | version = "0.5.3"
643 | source = "registry+https://github.com/rust-lang/crates.io-index"
644 | checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4"
645 | dependencies = [
646 | "bitflags",
647 | ]
648 |
649 | [[package]]
650 | name = "regex"
651 | version = "1.10.6"
652 | source = "registry+https://github.com/rust-lang/crates.io-index"
653 | checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619"
654 | dependencies = [
655 | "aho-corasick",
656 | "memchr",
657 | "regex-automata",
658 | "regex-syntax",
659 | ]
660 |
661 | [[package]]
662 | name = "regex-automata"
663 | version = "0.4.7"
664 | source = "registry+https://github.com/rust-lang/crates.io-index"
665 | checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
666 | dependencies = [
667 | "aho-corasick",
668 | "memchr",
669 | "regex-syntax",
670 | ]
671 |
672 | [[package]]
673 | name = "regex-syntax"
674 | version = "0.8.4"
675 | source = "registry+https://github.com/rust-lang/crates.io-index"
676 | checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
677 |
678 | [[package]]
679 | name = "rustix"
680 | version = "0.38.36"
681 | source = "registry+https://github.com/rust-lang/crates.io-index"
682 | checksum = "3f55e80d50763938498dd5ebb18647174e0c76dc38c5505294bb224624f30f36"
683 | dependencies = [
684 | "bitflags",
685 | "errno",
686 | "libc",
687 | "linux-raw-sys",
688 | "windows-sys",
689 | ]
690 |
691 | [[package]]
692 | name = "ryu"
693 | version = "1.0.6"
694 | source = "registry+https://github.com/rust-lang/crates.io-index"
695 | checksum = "3c9613b5a66ab9ba26415184cfc41156594925a9cf3a2057e57f31ff145f6568"
696 |
697 | [[package]]
698 | name = "scopeguard"
699 | version = "1.1.0"
700 | source = "registry+https://github.com/rust-lang/crates.io-index"
701 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
702 |
703 | [[package]]
704 | name = "serde"
705 | version = "1.0.130"
706 | source = "registry+https://github.com/rust-lang/crates.io-index"
707 | checksum = "f12d06de37cf59146fbdecab66aa99f9fe4f78722e3607577a5375d66bd0c913"
708 | dependencies = [
709 | "serde_derive",
710 | ]
711 |
712 | [[package]]
713 | name = "serde_derive"
714 | version = "1.0.130"
715 | source = "registry+https://github.com/rust-lang/crates.io-index"
716 | checksum = "d7bc1a1ab1961464eae040d96713baa5a724a8152c1222492465b54322ec508b"
717 | dependencies = [
718 | "proc-macro2",
719 | "quote",
720 | "syn 1.0.73",
721 | ]
722 |
723 | [[package]]
724 | name = "serde_json"
725 | version = "1.0.72"
726 | source = "registry+https://github.com/rust-lang/crates.io-index"
727 | checksum = "d0ffa0837f2dfa6fb90868c2b5468cad482e175f7dad97e7421951e663f2b527"
728 | dependencies = [
729 | "itoa",
730 | "ryu",
731 | "serde",
732 | ]
733 |
734 | [[package]]
735 | name = "signal-hook"
736 | version = "0.3.17"
737 | source = "registry+https://github.com/rust-lang/crates.io-index"
738 | checksum = "8621587d4798caf8eb44879d42e56b9a93ea5dcd315a6487c357130095b62801"
739 | dependencies = [
740 | "libc",
741 | "signal-hook-registry",
742 | ]
743 |
744 | [[package]]
745 | name = "signal-hook-mio"
746 | version = "0.2.4"
747 | source = "registry+https://github.com/rust-lang/crates.io-index"
748 | checksum = "34db1a06d485c9142248b7a054f034b349b212551f3dfd19c94d45a754a217cd"
749 | dependencies = [
750 | "libc",
751 | "mio",
752 | "signal-hook",
753 | ]
754 |
755 | [[package]]
756 | name = "signal-hook-registry"
757 | version = "1.4.0"
758 | source = "registry+https://github.com/rust-lang/crates.io-index"
759 | checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0"
760 | dependencies = [
761 | "libc",
762 | ]
763 |
764 | [[package]]
765 | name = "siphasher"
766 | version = "0.3.5"
767 | source = "registry+https://github.com/rust-lang/crates.io-index"
768 | checksum = "cbce6d4507c7e4a3962091436e56e95290cb71fa302d0d270e32130b75fbff27"
769 |
770 | [[package]]
771 | name = "smallvec"
772 | version = "1.6.1"
773 | source = "registry+https://github.com/rust-lang/crates.io-index"
774 | checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e"
775 |
776 | [[package]]
777 | name = "strict"
778 | version = "0.2.0"
779 | source = "registry+https://github.com/rust-lang/crates.io-index"
780 | checksum = "f42444fea5b87a39db4218d9422087e66a85d0e7a0963a439b07bcdf91804006"
781 |
782 | [[package]]
783 | name = "subtle"
784 | version = "2.4.1"
785 | source = "registry+https://github.com/rust-lang/crates.io-index"
786 | checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601"
787 |
788 | [[package]]
789 | name = "syn"
790 | version = "1.0.73"
791 | source = "registry+https://github.com/rust-lang/crates.io-index"
792 | checksum = "f71489ff30030d2ae598524f61326b902466f72a0fb1a8564c001cc63425bcc7"
793 | dependencies = [
794 | "proc-macro2",
795 | "quote",
796 | "unicode-xid",
797 | ]
798 |
799 | [[package]]
800 | name = "syn"
801 | version = "2.0.22"
802 | source = "registry+https://github.com/rust-lang/crates.io-index"
803 | checksum = "2efbeae7acf4eabd6bcdcbd11c92f45231ddda7539edc7806bd1a04a03b24616"
804 | dependencies = [
805 | "proc-macro2",
806 | "quote",
807 | "unicode-ident",
808 | ]
809 |
810 | [[package]]
811 | name = "termimad"
812 | version = "0.30.0"
813 | source = "registry+https://github.com/rust-lang/crates.io-index"
814 | checksum = "920e7c4671e79f3d9df269da9c8edf0dbc580044fd727d3594f7bfba5eb6107a"
815 | dependencies = [
816 | "coolor",
817 | "crokey",
818 | "crossbeam",
819 | "lazy-regex",
820 | "minimad",
821 | "serde",
822 | "thiserror",
823 | "unicode-width",
824 | ]
825 |
826 | [[package]]
827 | name = "thiserror"
828 | version = "1.0.26"
829 | source = "registry+https://github.com/rust-lang/crates.io-index"
830 | checksum = "93119e4feac1cbe6c798c34d3a53ea0026b0b1de6a120deef895137c0529bfe2"
831 | dependencies = [
832 | "thiserror-impl",
833 | ]
834 |
835 | [[package]]
836 | name = "thiserror-impl"
837 | version = "1.0.26"
838 | source = "registry+https://github.com/rust-lang/crates.io-index"
839 | checksum = "060d69a0afe7796bf42e9e2ff91f5ee691fb15c53d38b4b62a9a53eb23164745"
840 | dependencies = [
841 | "proc-macro2",
842 | "quote",
843 | "syn 1.0.73",
844 | ]
845 |
846 | [[package]]
847 | name = "time"
848 | version = "0.1.44"
849 | source = "registry+https://github.com/rust-lang/crates.io-index"
850 | checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255"
851 | dependencies = [
852 | "libc",
853 | "wasi 0.10.0+wasi-snapshot-preview1",
854 | "winapi",
855 | ]
856 |
857 | [[package]]
858 | name = "typenum"
859 | version = "1.16.0"
860 | source = "registry+https://github.com/rust-lang/crates.io-index"
861 | checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba"
862 |
863 | [[package]]
864 | name = "unicode-ident"
865 | version = "1.0.9"
866 | source = "registry+https://github.com/rust-lang/crates.io-index"
867 | checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0"
868 |
869 | [[package]]
870 | name = "unicode-segmentation"
871 | version = "1.8.0"
872 | source = "registry+https://github.com/rust-lang/crates.io-index"
873 | checksum = "8895849a949e7845e06bd6dc1aa51731a103c42707010a5b591c0038fb73385b"
874 |
875 | [[package]]
876 | name = "unicode-width"
877 | version = "0.1.13"
878 | source = "registry+https://github.com/rust-lang/crates.io-index"
879 | checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d"
880 |
881 | [[package]]
882 | name = "unicode-xid"
883 | version = "0.2.2"
884 | source = "registry+https://github.com/rust-lang/crates.io-index"
885 | checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
886 |
887 | [[package]]
888 | name = "version_check"
889 | version = "0.9.3"
890 | source = "registry+https://github.com/rust-lang/crates.io-index"
891 | checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe"
892 |
893 | [[package]]
894 | name = "wasi"
895 | version = "0.10.0+wasi-snapshot-preview1"
896 | source = "registry+https://github.com/rust-lang/crates.io-index"
897 | checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
898 |
899 | [[package]]
900 | name = "wasi"
901 | version = "0.11.0+wasi-snapshot-preview1"
902 | source = "registry+https://github.com/rust-lang/crates.io-index"
903 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
904 |
905 | [[package]]
906 | name = "winapi"
907 | version = "0.3.9"
908 | source = "registry+https://github.com/rust-lang/crates.io-index"
909 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
910 | dependencies = [
911 | "winapi-i686-pc-windows-gnu",
912 | "winapi-x86_64-pc-windows-gnu",
913 | ]
914 |
915 | [[package]]
916 | name = "winapi-i686-pc-windows-gnu"
917 | version = "0.4.0"
918 | source = "registry+https://github.com/rust-lang/crates.io-index"
919 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
920 |
921 | [[package]]
922 | name = "winapi-x86_64-pc-windows-gnu"
923 | version = "0.4.0"
924 | source = "registry+https://github.com/rust-lang/crates.io-index"
925 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
926 |
927 | [[package]]
928 | name = "windows-sys"
929 | version = "0.52.0"
930 | source = "registry+https://github.com/rust-lang/crates.io-index"
931 | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
932 | dependencies = [
933 | "windows-targets",
934 | ]
935 |
936 | [[package]]
937 | name = "windows-targets"
938 | version = "0.52.6"
939 | source = "registry+https://github.com/rust-lang/crates.io-index"
940 | checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
941 | dependencies = [
942 | "windows_aarch64_gnullvm",
943 | "windows_aarch64_msvc",
944 | "windows_i686_gnu",
945 | "windows_i686_gnullvm",
946 | "windows_i686_msvc",
947 | "windows_x86_64_gnu",
948 | "windows_x86_64_gnullvm",
949 | "windows_x86_64_msvc",
950 | ]
951 |
952 | [[package]]
953 | name = "windows_aarch64_gnullvm"
954 | version = "0.52.6"
955 | source = "registry+https://github.com/rust-lang/crates.io-index"
956 | checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
957 |
958 | [[package]]
959 | name = "windows_aarch64_msvc"
960 | version = "0.52.6"
961 | source = "registry+https://github.com/rust-lang/crates.io-index"
962 | checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
963 |
964 | [[package]]
965 | name = "windows_i686_gnu"
966 | version = "0.52.6"
967 | source = "registry+https://github.com/rust-lang/crates.io-index"
968 | checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
969 |
970 | [[package]]
971 | name = "windows_i686_gnullvm"
972 | version = "0.52.6"
973 | source = "registry+https://github.com/rust-lang/crates.io-index"
974 | checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
975 |
976 | [[package]]
977 | name = "windows_i686_msvc"
978 | version = "0.52.6"
979 | source = "registry+https://github.com/rust-lang/crates.io-index"
980 | checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
981 |
982 | [[package]]
983 | name = "windows_x86_64_gnu"
984 | version = "0.52.6"
985 | source = "registry+https://github.com/rust-lang/crates.io-index"
986 | checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
987 |
988 | [[package]]
989 | name = "windows_x86_64_gnullvm"
990 | version = "0.52.6"
991 | source = "registry+https://github.com/rust-lang/crates.io-index"
992 | checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
993 |
994 | [[package]]
995 | name = "windows_x86_64_msvc"
996 | version = "0.52.6"
997 | source = "registry+https://github.com/rust-lang/crates.io-index"
998 | checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
999 |
--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "backdown"
3 | version = "1.1.2"
4 | authors = ["Canop "]
5 | edition = "2021"
6 | rust-version = "1.59"
7 | description = "A smart CLI for removing thousands of duplicates on your disks"
8 | repository = "https://github.com/Canop/backdown"
9 | license = "MIT"
10 | readme = "README.md"
11 |
12 | [dependencies]
13 | argh = "0.1.4"
14 | anyhow = "1.0.49"
15 | blake3 = "1.4"
16 | chrono = "0.4"
17 | cli-log = "2.0"
18 | crossbeam = "0.8"
19 | file-size = "1.0"
20 | fnv = "1.0.7"
21 | lazy-regex = "3.3"
22 | phf = { version = "0.11", features = ["macros"] }
23 | rayon = "1.3"
24 | serde ="1.0"
25 | serde_json = "1.0"
26 | termimad = "0.30"
27 |
28 | [profile.release]
29 | strip = true
30 |
31 | [patch.crates-io]
32 | #minimad = { path = "../minimad" }
33 | #termimad = { path = "../termimad" }
34 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Canop
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # backdown
2 |
3 | [![MIT][s2]][l2] [![Latest Version][s1]][l1] [![Build][s3]][l3] [![Chat on Miaou][s4]][l4]
4 |
5 | [s1]: https://img.shields.io/crates/v/backdown.svg
6 | [l1]: https://crates.io/crates/backdown
7 |
8 | [s2]: https://img.shields.io/badge/license-MIT-blue.svg
9 | [l2]: LICENSE
10 |
11 | [s3]: https://github.com/Canop/backdown/actions/workflows/rust.yml/badge.svg
12 | [l3]: https://github.com/Canop/backdown/actions/workflows/rust.yml
13 |
14 | [s4]: https://miaou.dystroy.org/static/shields/room.svg
15 | [l4]: https://miaou.dystroy.org/3768?Rust
16 |
17 | **Backdown** helps you safely and ergonomically remove duplicate files.
18 |
19 | Its design is based upon my observation of frequent patterns regarding build-up of duplicates with time, especially images and other media files.
20 |
21 | Finding duplicates is easy. Cleaning the disk when there are thousands of them is the hard part. What Backdown brings is the easy way to select and remove the duplicates you don't want to keep.
22 |
23 | A Backdown session goes through the following phases:
24 |
25 | 1. Backdown analyzes the directory of your choice and find sets of duplicates (files whose content is exactly the same). Backdown ignores symlinks and files or directories whose name starts with a dot.
26 | 2. Backdown asks you a few questions depending on the analysis. Nothing is removed at this point: you only stage files for removal. Backdown never lets you stage all items in a set of identical files
27 | 3. After having maybe looked at the list of staged files, you confirm the removals
28 | 4. Backdown does the removals on disk
29 |
30 | # What it looks like
31 |
32 | Analysis and first question:
33 |
34 | 
35 |
36 | Another kind of question:
37 |
38 | 
39 |
40 | Yet another one:
41 |
42 | 
43 |
44 | Yet another one:
45 |
46 | 
47 |
48 | Review and Confirm:
49 |
50 | 
51 |
52 | At this point you may also export the report as JSON, and you may decide to replace each removed file with a link to one of the kept ones.
53 |
54 | # Installation
55 |
56 | ## From the crates.io repository
57 |
58 | You must have the Rust env installed: https://rustup.rs
59 |
60 | Run
61 |
62 | ```bash
63 | cargo install --locked backdown
64 | ```
65 |
66 | ## From Source
67 |
68 | You must have the Rust env installed: https://rustup.rs
69 |
70 | Download this repository then run
71 |
72 | ```bash
73 | cargo install --path .
74 | ```
75 |
76 | ## Precompiled binaries
77 |
78 | Unless you're a Rust developer, I recommend you just download the precompiled binaries, as this will save a lot of space on your disk.
79 |
80 | Binaries are made available at https://dystroy.org/backdown/download/
81 |
82 | # Usage
83 |
84 | ## Deduplicate any kind of files
85 |
86 | ```bash
87 | backdown /some/directory
88 | ```
89 |
90 | ## Deduplicate images
91 |
92 | ```bash
93 | backdown -i /some/directory
94 | ```
95 |
96 | ## JSON report
97 |
98 | After the staging phase, you may decide to export a report as JSON. This doesn't prevent doing also the removals.
99 |
100 | The JSON looks like this:
101 |
102 | ```JSON
103 | {
104 | "dup_sets": [
105 | {
106 | "file_len": 1212746,
107 | "files": {
108 | "trav-copy/2006-05 (mai)/HPIM0530.JPG": "remove",
109 | "trav-copy/2006-06 (juin)/HPIM0530 (another copy).JPG": "remove",
110 | "trav-copy/2006-06 (juin)/HPIM0530 (copy).JPG": "remove",
111 | "trav-copy/2006-06 (juin)/HPIM0530.JPG": "keep"
112 | }
113 | },
114 | {
115 | "file_len": 1980628,
116 | "files": {
117 | "trav-copy/2006-03 (mars)/HPIM0608.JPG": "keep",
118 | "trav-copy/2006-05 (mai)/HPIM0608.JPG": "remove",
119 | "trav-copy/2006-06 (juin)/HPIM0608.JPG": "keep"
120 | }
121 | },
122 | {
123 | "file_len": 1124764,
124 | "files": {
125 | "trav-copy/2006-05 (mai)/HPIM0529.JPG": "remove",
126 | "trav-copy/2006-06 (juin)/HPIM0529.JPG": "keep"
127 | }
128 | },
129 | {
130 | "file_len": 1706672,
131 | "files": {
132 | "trav-copy/2006-05 (mai)/test.jpg": "remove",
133 | "trav-copy/2006-06 (juin)/HPIM0598.JPG": "keep"
134 | }
135 | }
136 | ],
137 | "len_to_remove": 8450302
138 | }
139 | ```
140 |
141 | # Advice
142 |
143 | * If you launch backdown on a big directory, it may find more duplicates you suspect there are. Don't force yourself to answer *all* questions at first: if you stage the removals of the first dozen questions you'll gain already a lot and you may do the other ones another day
144 | * Don't launch backdown at the root of your disk because you don't want to try and deal with duplicates in system resources, programs, build artefacts, etc. Launch backdown where you store your images, or your videos or musics
145 | * Backdown isn't designed for dev directories and doesn't respect .gitignore rules
146 | * If you launch backdown in a directory with millions files on a slow disk, you'll have to wait a long time while the content is hashed. Try with a smaller directory first if you have an HDD
147 | * If you're only interested in images, use the -i option
148 |
--------------------------------------------------------------------------------
/compile-all-targets.sh:
--------------------------------------------------------------------------------
1 | # WARNING: This script is NOT meant for normal installation, it's dedicated
2 | # to the compilation of all supported targets, from a linux machine.
3 |
4 | H1="\n\e[30;104;1m\e[2K\n\e[A" # style first header
5 | H2="\n\e[30;104m\e[1K\n\e[A" # style second header
6 | EH="\e[00m\n\e[2K" # end header
7 |
8 | version=$(sed 's/version = "\([0-9.]\{1,\}\(-[a-z]\+\)\?\)"/\1/;t;d' Cargo.toml | head -1)
9 | echo -e "${H1}Compilation of all targets for backdown $version${EH}"
10 |
11 | # clean previous build
12 | rm -rf build
13 | mkdir build
14 | echo " build cleaned"
15 |
16 | # build the linux version
17 | echo -e "${H2}Compiling the linux version${EH}"
18 | cargo clean
19 | cargo build --release
20 | strip target/release/backdown
21 | mkdir build/x86_64-linux/
22 | cp target/release/backdown build/x86_64-linux/
23 |
24 | # build a musl version
25 | echo -e "${H2}Compiling the MUSL version${EH}"
26 | cargo clean
27 | cross build --release --target x86_64-unknown-linux-musl
28 | mkdir build/x86_64-unknown-linux-musl
29 | cp target/x86_64-unknown-linux-musl/release/backdown build/x86_64-unknown-linux-musl
30 |
31 | # build the windows version
32 | # use cargo cross
33 | echo -e "${H2}Compiling the Windows version${EH}"
34 | cargo clean
35 | cross build --target x86_64-pc-windows-gnu --release
36 | mkdir build/x86_64-pc-windows-gnu
37 | cp target/x86_64-pc-windows-gnu/release/backdown.exe build/x86_64-pc-windows-gnu/
38 |
--------------------------------------------------------------------------------
/deploy.sh:
--------------------------------------------------------------------------------
1 | # build the release zip
2 | ./release.sh
3 |
4 | version=$(sed 's/version = "\([0-9.]\{1,\}\)"/\1/;t;d' Cargo.toml | head -1)
5 |
6 | # deploy on dystroy.org
7 | rm -rf ~/dev/www/dystroy/backdown/download/*
8 | cp -r build/* ~/dev/www/dystroy/backdown/download/
9 | cp "backdown_$version.zip" ~/dev/www/dystroy/backdown/download/
10 | ~/dev/www/dystroy/deploy.sh
11 |
--------------------------------------------------------------------------------
/doc/screen-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Canop/backdown/3b8702894a144e27dacd0806fcdbaebf260a0c71/doc/screen-1.png
--------------------------------------------------------------------------------
/doc/screen-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Canop/backdown/3b8702894a144e27dacd0806fcdbaebf260a0c71/doc/screen-2.png
--------------------------------------------------------------------------------
/doc/screen-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Canop/backdown/3b8702894a144e27dacd0806fcdbaebf260a0c71/doc/screen-3.png
--------------------------------------------------------------------------------
/doc/screen-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Canop/backdown/3b8702894a144e27dacd0806fcdbaebf260a0c71/doc/screen-4.png
--------------------------------------------------------------------------------
/doc/screen-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Canop/backdown/3b8702894a144e27dacd0806fcdbaebf260a0c71/doc/screen-5.png
--------------------------------------------------------------------------------
/release.sh:
--------------------------------------------------------------------------------
1 | # build a new release of backdown
2 | # This isn't used for normal compilation but for the building of the official releases
3 | version=$(sed 's/version = "\([0-9.]\{1,\}\)"/\1/;t;d' Cargo.toml | head -1)
4 |
5 | echo "Building release $version"
6 |
7 | # make the build directory and compile for all targets
8 | ./compile-all-targets.sh
9 |
10 | # add the readme and changelog in the build directory
11 | echo "This is backdown. More info and installation instructions on https://github.com/Canop/backdown" > build/README.md
12 | cp CHANGELOG.md build
13 |
14 | # publish version number
15 | echo "$version" > build/version
16 |
17 | # prepare the release archive
18 | rm backdown_*.zip
19 | zip -r "backdown_$version.zip" build/*
20 |
21 | # copy it to releases folder
22 | mkdir releases
23 | cp "backdown_$version.zip" releases
24 |
--------------------------------------------------------------------------------
/src/args.rs:
--------------------------------------------------------------------------------
1 | use {
2 | argh::FromArgs,
3 | std::path::PathBuf,
4 | };
5 |
6 | #[derive(FromArgs)]
7 | /// Help you remove duplicate files from your disks
8 | ///
9 | ///
10 | /// Source and doc at https://github.com/Canop/backdown
11 | pub struct Args {
12 | /// print the version
13 | #[argh(switch, short = 'v')]
14 | pub version: bool,
15 |
16 | /// whether to only handle image files
17 | #[argh(switch, short = 'i')]
18 | pub only_images: bool,
19 |
20 | #[argh(positional)]
21 | /// where to look for duplicates (will use . if no directory is provided)
22 | pub path: Option,
23 | }
24 |
25 |
--------------------------------------------------------------------------------
/src/ask.rs:
--------------------------------------------------------------------------------
1 | use {
2 | crate::*,
3 | fnv::FnvHashMap,
4 | minimad::*,
5 | termimad::*,
6 | };
7 |
8 | const MAX_LISTED_FILES: usize = 5;
9 |
10 | pub fn ask_on_dirs<'d>(
11 | dirs_report: &'d DirsReport,
12 | dups: &'d [DupSet],
13 | skin: &MadSkin,
14 | ) -> anyhow::Result> {
15 | let mut rr = RemovalReport::default();
16 | let mut question_idx = 0;
17 | let mut questions = dirs_report.dup_dirs.len() + dirs_report.brotherhoods.len() + dirs_report.dir_pairs.len();
18 | let ask_about_autosolve = dirs_report.auto_solvable_brotherhoods_count > 1;
19 | if ask_about_autosolve {
20 | questions += 1;
21 | }
22 |
23 | static MD: &str = r#"
24 | I'll now ask you up to *${questions}* questions to determine what files should be removed.\
25 | No file will be removed until you have the possibility to review them after the staging step.\
26 | You don't have to answer all questions:\
27 | you may end the staging phase at any time and then either do the removals or quit.
28 | "#;
29 | let mut expander = OwningTemplateExpander::new();
30 | expander.set("questions", questions);
31 | skin.print_owning_expander(&expander, &TextTemplate::from(MD));
32 |
33 | // return true if break
34 | let check = |rr: &RemovalReport| {
35 | if rr.quit {
36 | return true;
37 | }
38 | mad_print_inline!(
39 | skin,
40 | " -> currently staged: **$0** duplicate files for a removed size of **$1**\n",
41 | // two following lines used for some screenshots so that I don't redo the staging
42 | // 1042,
43 | // "5.5G",
44 | rr.staged_removals.len(),
45 | file_size::fit_4(rr.len_to_remove),
46 | );
47 | rr.broken
48 | };
49 |
50 | let skip_auto_solvable_brotherhoods = ask_about_autosolve && {
51 | let solved = ask_auto_solve(
52 | question_idx,
53 | questions,
54 | dirs_report,
55 | dups,
56 | skin,
57 | &mut rr,
58 | )?;
59 | if check(&rr) {
60 | return Ok(rr);
61 | }
62 | question_idx += 1;
63 | solved
64 | };
65 |
66 | for dup_dir in &dirs_report.dup_dirs {
67 | ask_on_dup_dir(
68 | question_idx,
69 | questions,
70 | dup_dir,
71 | dups,
72 | skin,
73 | &mut rr,
74 | )?;
75 | if check(&rr) {
76 | break;
77 | }
78 | question_idx += 1;
79 | }
80 | if rr.broken || rr.quit {
81 | return Ok(rr);
82 | }
83 |
84 | for brotherhood in &dirs_report.brotherhoods {
85 | if skip_auto_solvable_brotherhoods && brotherhood.is_auto_solvable {
86 | mad_print_inline!(skin, "skipping question *$0*\n", question_idx);
87 | } else {
88 | ask_on_brotherhood(
89 | question_idx,
90 | questions,
91 | brotherhood,
92 | dups,
93 | skin,
94 | &mut rr,
95 | )?;
96 | if check(&rr) {
97 | break;
98 | }
99 | }
100 | question_idx += 1;
101 | }
102 | if rr.broken || rr.quit {
103 | return Ok(rr);
104 | }
105 |
106 | for dir_pair in &dirs_report.dir_pairs {
107 | ask_on_dir_pair(
108 | question_idx,
109 | questions,
110 | dir_pair,
111 | dups,
112 | skin,
113 | &mut rr,
114 | )?;
115 | if check(&rr) {
116 | break;
117 | }
118 | question_idx += 1;
119 | }
120 |
121 | Ok(rr)
122 | }
123 |
124 | static MD_AUTO_SOLVE: &str = r#"
125 |
126 | ## Staging Question **${num}**/${questions}
127 | You have several duplicates with "copy" names in the same directory than their identical "source" (for example *${example_1}* and *${example_2}*).
128 | I can automatically stage those **${file_count}** duplicates, which would let you gain **${size}**.
129 | If you accept, you'll skip *${skippable_questions}* questions.
130 | "#;
131 |
132 | /// return whether auto solvable brotherhoods are solved (we'll skip their questions then)
133 | fn ask_auto_solve<'d>(
134 | question_idx: usize,
135 | questions: usize,
136 | dirs_report: &'d DirsReport,
137 | dups: &'d [DupSet],
138 | skin: &MadSkin,
139 | rr: &mut RemovalReport<'d>,
140 | ) -> anyhow::Result {
141 | debug_assert!(question_idx == 0);
142 | let mut removable_count = 0;
143 | let mut removable_len = 0;
144 | let mut skippable_questions = 0;
145 | let mut example_names = Vec::new();
146 | for brotherhood in dirs_report.brotherhoods.iter().filter(|b| b.is_auto_solvable) {
147 | removable_count += brotherhood.files.len() - 1;
148 | removable_len += (brotherhood.files.len() - 1) as u64 * dups[brotherhood.dup_set_idx].file_len;
149 | skippable_questions += 1;
150 | if example_names.len() < 2 {
151 | example_names.push(
152 | brotherhood.files.iter()
153 | .map(|&dup_file_idx| DupFileRef {
154 | dup_set_idx: brotherhood.dup_set_idx,
155 | dup_file_idx,
156 | })
157 | .filter_map(|dup_file_ref| dup_file_ref.copy_name(dups))
158 | .next()
159 | .unwrap() // SAFETY: it's not auto solvable if there's no copy named file
160 | );
161 | }
162 | }
163 | let mut expander = OwningTemplateExpander::new();
164 | expander
165 | .set("num", question_idx + 1)
166 | .set("questions", questions)
167 | .set("example_1", example_names[0])
168 | .set("example_2", example_names[1])
169 | .set("skippable_questions", skippable_questions)
170 | .set("file_count", removable_count)
171 | .set("size", file_size::fit_4(removable_len));
172 | skin.print_owning_expander(&expander, &TextTemplate::from(MD_AUTO_SOLVE));
173 | Ok(ask!(skin, "Do you want me to automatically stage those copies ?", ('y') {
174 | ('y', "**Y**es") => {
175 | for brotherhood in dirs_report.brotherhoods.iter().filter(|b| b.is_auto_solvable) {
176 | let dup_file_refs = brotherhood.files.iter()
177 | .map(|&dup_file_idx| DupFileRef {
178 | dup_set_idx: brotherhood.dup_set_idx,
179 | dup_file_idx,
180 | })
181 | .filter(|dup_file_ref| dup_file_ref.is_copy_named(dups));
182 | for dup_file_ref in dup_file_refs {
183 | rr.stage_file(dup_file_ref, dups);
184 | }
185 | }
186 | true
187 | }
188 | ('n', "**N**o") => {
189 | false
190 | }
191 | ('e', "**E**nd staging and quit") => {
192 | rr.quit = true;
193 | false
194 | }
195 | }))
196 | }
197 |
198 | static MD_DUP_DIR: &str = r#"
199 |
200 | ## Staging Question **${num}**/${questions}
201 | The *${directory}* directory contains **${file_count}** files which are all present elsewhere.\
202 | You can remove the whole directory without losing anything.\
203 | This would let you gain **${size}**.\
204 | "#;
205 |
206 | /// ask for a dir which contains only duplicates
207 | fn ask_on_dup_dir<'d>(
208 | question_idx: usize,
209 | questions: usize,
210 | dup_dir: &'d DupDir,
211 | dups: &'d [DupSet],
212 | skin: &MadSkin,
213 | rr: &mut RemovalReport<'d>,
214 | ) -> anyhow::Result<()> {
215 | // first we must make sure the dir doesn't contain the last file(s) of a dupset
216 | let mut file_idxs_per_dupset: FnvHashMap> = FnvHashMap::default();
217 | for file_ref in &dup_dir.files {
218 | file_idxs_per_dupset.entry(file_ref.dup_set_idx)
219 | .or_default()
220 | .push(file_ref.dup_file_idx);
221 | }
222 | for (&dup_set_idx, file_idxs) in &file_idxs_per_dupset {
223 | let dup_set = &dups[dup_set_idx];
224 | let not_here_or_staged_count = (0..dup_set.files.len())
225 | .filter(|&dup_file_idx| {
226 | !rr.staged_removals.contains(&DupFileRef { dup_set_idx, dup_file_idx })
227 | &&
228 | !file_idxs.contains(&dup_file_idx)
229 | })
230 | .count();
231 | if not_here_or_staged_count == 0 {
232 | // dup_set would be removed -> skipping
233 | return Ok(());
234 | }
235 | }
236 | // now we know we can stage the whole directory
237 | let removable_len = dup_dir.files.iter()
238 | .map(|dup_file_ref| dups[dup_file_ref.dup_set_idx].file_len)
239 | .sum();
240 | let mut expander = OwningTemplateExpander::new();
241 | expander
242 | .set("num", question_idx + 1)
243 | .set("questions", questions)
244 | .set("directory", dup_dir.path.to_string_lossy())
245 | .set("file_count", dup_dir.files.len())
246 | .set("size", file_size::fit_4(removable_len));
247 | skin.print_owning_expander(&expander, &TextTemplate::from(MD_DUP_DIR));
248 | ask!(skin, "What do you want to do with this directory?", ('s') {
249 | ('r', "Stage the whole directory for **r**emoval") => {
250 | for &file_ref in &dup_dir.files {
251 | rr.stage_file(file_ref, dups);
252 | }
253 | rr.staged_dir_removals.push(dup_dir.path);
254 | }
255 | ('s', "**S**kip and go to next question") => {}
256 | ('e', "**E**nd staging phase") => { rr.broken = true; }
257 | });
258 | Ok(())
259 | }
260 |
261 | static MD_BROTHERHOOD: &str = r#"
262 |
263 | ## Staging Question **${num}**/${questions}
264 | The *${parent}* directory contains **${file_count}** identical files, each one of size **${size}**.
265 | "#;
266 |
267 | // ask for a set of identical files in the same directory
268 | fn ask_on_brotherhood(
269 | question_idx: usize,
270 | questions: usize,
271 | brotherhood: &Brotherhood,
272 | dups: &[DupSet],
273 | skin: &MadSkin,
274 | rr: &mut RemovalReport,
275 | ) -> anyhow::Result<()> {
276 | // we check nothing because questions for brotherhoods come before the other ones
277 | // FIXME we must check it's not autosolved!
278 | let dup_set = &dups[brotherhood.dup_set_idx];
279 | let mut expander = OwningTemplateExpander::new();
280 | expander
281 | .set("num", question_idx + 1)
282 | .set("questions", questions)
283 | .set("parent", brotherhood.parent.to_string_lossy())
284 | .set("file_count", brotherhood.files.len())
285 | .set("size", file_size::fit_4(dup_set.file_len));
286 | skin.print_owning_expander(&expander, &TextTemplate::from(MD_BROTHERHOOD));
287 | let mut q = Question::new("What do you want to do with these duplicates?");
288 |
289 | struct F<'f> { idx: usize, name: &'f str }
290 | let mut candidates: Vec = brotherhood.files.iter()
291 | .map(|&idx| F{ idx, name: dup_set.files[idx].path.file_name().unwrap().to_str().unwrap() })
292 | .collect();
293 | candidates.sort_by(|a, b| a.name.cmp(b.name));
294 | for (i, f) in candidates.iter().enumerate() {
295 | q.add_answer(
296 | i + 1,
297 | format!("keep *{}* and stage other one(s) for removal", f.name),
298 | );
299 | }
300 | q.add_answer('s', "**S**kip and go to next question");
301 | q.add_answer('e', "**E**nd staging phase");
302 | q.set_default("s");
303 | match q.ask(skin)?.as_str() {
304 | "s" => {}
305 | "e" => { rr.broken = true; }
306 | a => {
307 | if let Ok(a) = a.parse::() {
308 | if a == 0 {
309 | println!("Options start at 1 - skipping");
310 | } else {
311 | let chosen = &candidates[a - 1];
312 | for i in 0..brotherhood.files.len() {
313 | if i != chosen.idx {
314 | rr.stage_file(brotherhood.file_ref(i), dups);
315 | }
316 | }
317 | }
318 | }
319 | }
320 | }
321 | Ok(())
322 | }
323 |
324 | static MD_DIR_PAIR: &str = r#"
325 |
326 | ## Staging Question **${num}**/${questions}
327 | Left and right directories have **${file_count}** common files for a total duplicate size of **${removable_len}**.
328 | |-:|:-:|:-:|
329 | | |left|right|
330 | |-:|:-:|:-:|
331 | |directory|*${left_path}*|*${right_path}*|
332 | ${common_files
333 | |common files|${file_count}|${file_count}|
334 | }
335 | ${removable_files
336 | |removable file #${removable_file_idx}|**${left_file_name}**|**${right_file_name}**|
337 | }
338 | |already staged for removal|${removed_left_count}|${removed_right_count}|
339 | |other files|${left_other_count}|${right_other_count}|
340 | |-:
341 | "#;
342 |
343 | /// asking the question when left dir and right dir are different
344 | fn ask_on_dir_pair(
345 | question_idx: usize,
346 | questions: usize,
347 | dir_pair: &DirPair,
348 | dups: &[DupSet],
349 | skin: &MadSkin,
350 | rr: &mut RemovalReport,
351 | ) -> anyhow::Result<()> {
352 | // we must recount now because files may have been already
353 | // staged for removals
354 | let (mut removed_left_count, mut removed_right_count) = (0, 0);
355 | let (mut removable_left_count, mut removable_right_count) = (0, 0);
356 | let mut removable_pairs: Vec = Vec::new();
357 | let mut removable_len: u64 = 0;
358 | for file_pair in &dir_pair.file_pairs {
359 | let removed_left = rr.staged_removals.contains(&file_pair.left_ref());
360 | let removed_right = rr.staged_removals.contains(&file_pair.right_ref());
361 | if removed_left {
362 | removed_left_count += 1;
363 | } else {
364 | removable_left_count += 1;
365 | }
366 | if removed_right {
367 | removed_right_count += 1;
368 | } else {
369 | removable_right_count += 1;
370 | }
371 | if !removed_left && !removed_right {
372 | removable_pairs.push(*file_pair);
373 | removable_len += dups[file_pair.dup_set_idx].file_len;
374 | }
375 | }
376 | if removable_pairs.is_empty() {
377 | mad_print_inline!(skin, "*skipping question because of previously staged removals*\n");
378 | return Ok(());
379 | }
380 | let left_dir_count = dir_pair.key.left_dir.read_dir()?.count();
381 | if left_dir_count < removed_left_count + removable_left_count {
382 | println!("skipping question because some files were removed on disk");
383 | return Ok(());
384 | }
385 | let left_other_count = left_dir_count - removed_left_count - removable_left_count;
386 | let right_dir_count = dir_pair.key.right_dir.read_dir()?.count();
387 | if right_dir_count < removed_right_count + removable_right_count {
388 | println!("skipping question because some files were removed on disk");
389 | return Ok(());
390 | }
391 | let right_other_count = right_dir_count - removed_right_count - removable_right_count;
392 | let mut expander = OwningTemplateExpander::new();
393 | expander
394 | .set("num", question_idx + 1)
395 | .set("questions", questions)
396 | .set("file_count", removable_pairs.len())
397 | .set("removable_len", file_size::fit_4(removable_len))
398 | .set("left_path", dir_pair.key.left_dir.to_string_lossy())
399 | .set("right_path", dir_pair.key.right_dir.to_string_lossy())
400 | .set("removed_left_count", removed_left_count)
401 | .set("removed_right_count", removed_right_count)
402 | .set("left_other_count", left_other_count)
403 | .set("right_other_count", right_other_count);
404 | if removable_pairs.len() <= MAX_LISTED_FILES {
405 | for (removable_file_idx, file_pair) in removable_pairs.iter().enumerate() {
406 | expander.sub("removable_files")
407 | .set("removable_file_idx", removable_file_idx + 1)
408 | .set("left_file_name", file_pair.left_ref().file_name(dups))
409 | .set("right_file_name", file_pair.right_ref().file_name(dups));
410 | }
411 | } else {
412 | expander.sub("common_files");
413 | }
414 | skin.print_owning_expander(&expander, &TextTemplate::from(MD_DIR_PAIR));
415 | ask!(skin, "What do you want to do here?", ('s') {
416 | ('l', "Stage **l**eft files for removal") => {
417 | for file_pair in removable_pairs {
418 | rr.stage_file(file_pair.left_ref(), dups);
419 | }
420 | }
421 | ('r', "Stage **r**ight files for removal") => {
422 | for file_pair in removable_pairs {
423 | rr.stage_file(file_pair.right_ref(), dups);
424 | }
425 | }
426 | ('s', "**S**kip and go to next question") => {
427 | println!("skipped");
428 | }
429 | ('e', "**E**nd staging phase") => {
430 | rr.broken = true;
431 | }
432 | });
433 | Ok(())
434 | }
435 |
436 |
--------------------------------------------------------------------------------
/src/dirs.rs:
--------------------------------------------------------------------------------
1 | use {
2 | crate::*,
3 | fnv::FnvHashMap,
4 | std::{
5 | cmp::{Ord, Ordering, Reverse},
6 | path::Path,
7 | },
8 | };
9 |
10 | #[derive(Debug)]
11 | pub struct DirsReport<'d> {
12 | pub dup_dirs: Vec>,
13 | pub brotherhoods: Vec>,
14 | pub auto_solvable_brotherhoods_count: usize,
15 | pub dir_pairs: Vec>,
16 | }
17 |
18 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
19 | pub struct DirPairKey<'d> {
20 | pub left_dir: &'d Path,
21 | pub right_dir: &'d Path,
22 | }
23 |
24 | #[derive(Debug)]
25 | pub struct DirPair<'d> {
26 | pub key: DirPairKey<'d>,
27 | pub file_pairs: Vec,
28 | }
29 |
30 | /// a brotherhood gather duplicates having the same parent
31 | #[derive(Debug)]
32 | pub struct Brotherhood<'d> {
33 |
34 | pub parent: &'d Path,
35 |
36 | pub dup_set_idx: usize,
37 |
38 | /// file indexes
39 | pub files: Vec,
40 |
41 | /// when all files have names like "thing (copy).png", "thing (another copy).png", etc.
42 | /// except one file, we can propose an automated resolution.
43 | /// Note that we don't check the start of filenames are identical because we
44 | /// don't, in fact, care.
45 | pub is_auto_solvable: bool,
46 | }
47 |
48 | /// a directory which contains only duplicates
49 | #[derive(Debug)]
50 | pub struct DupDir<'d> {
51 | pub path: &'d Path,
52 | pub files: Vec,
53 | }
54 |
55 | impl<'d> Brotherhood<'d> {
56 | fn maybe_add_files(&mut self, a_idx: usize, b_idx: usize) {
57 | if !self.files.contains(&a_idx) {
58 | self.files.push(a_idx);
59 | }
60 | if !self.files.contains(&b_idx) {
61 | self.files.push(b_idx);
62 | }
63 | }
64 | pub fn file_ref(&self, i: usize) -> DupFileRef {
65 | DupFileRef {
66 | dup_set_idx: self.dup_set_idx,
67 | dup_file_idx: self.files[i],
68 | }
69 | }
70 | pub fn gain(&self, dups: &[DupSet]) -> u64 {
71 | (self.files.len() - 1) as u64 * dups[self.dup_set_idx].file_len
72 | }
73 | }
74 |
75 | impl<'d> DirPairKey<'d> {
76 | pub fn new(a: &'d Path, b: &'d Path) -> (Self, bool) {
77 | if a.cmp(b) == Ordering::Less {
78 | (
79 | DirPairKey {
80 | left_dir: a,
81 | right_dir: b,
82 | },
83 | false
84 | )
85 | } else {
86 | (
87 | DirPairKey {
88 | left_dir: b,
89 | right_dir: a,
90 | },
91 | true
92 | )
93 | }
94 | }
95 | }
96 |
97 | impl<'d> DirPair<'d> {
98 | pub fn new(
99 | key: DirPairKey<'d>,
100 | file_pairs: Vec,
101 | ) -> Self {
102 | Self { key, file_pairs }
103 | }
104 | }
105 |
106 | impl<'d> DirsReport<'d> {
107 | pub fn compute(dups: &'d[DupSet]) -> anyhow::Result {
108 | let mut brotherhoods = Vec::new();
109 | let mut auto_solvable_brotherhoods_count = 0;
110 | let mut dp_map: FnvHashMap> = FnvHashMap::default();
111 | let mut dir_map: FnvHashMap<&Path, Vec> = FnvHashMap::default();
112 | let mut brotherhood_per_parent: FnvHashMap<&Path, Brotherhood<'d>> = FnvHashMap::default();
113 | for (dup_set_idx, dup) in dups.iter().enumerate() {
114 | brotherhood_per_parent.clear();
115 | for (a_file_idx, a) in dup.files.iter().enumerate() {
116 | let a_parent = a.path.parent().unwrap();
117 | // adding to the dir_map
118 | dir_map.entry(a_parent)
119 | .or_default()
120 | .push(DupFileRef { dup_set_idx, dup_file_idx: a_file_idx });
121 |
122 | // building dir pair
123 | for b_file_idx in a_file_idx+1..dup.files.len() {
124 | let b = &dup.files[b_file_idx];
125 | let b_parent = b.path.parent().unwrap();
126 | let (dpk, inverted) = DirPairKey::new(
127 | a_parent,
128 | b_parent,
129 | );
130 | let (left_file_idx, right_file_idx) = if inverted {
131 | (b_file_idx, a_file_idx)
132 | } else {
133 | (a_file_idx, b_file_idx)
134 | };
135 | if a_parent == b_parent {
136 | // brotherhood
137 | brotherhood_per_parent
138 | .entry(a_parent)
139 | .or_insert_with(|| Brotherhood {
140 | parent: a_parent,
141 | dup_set_idx,
142 | files: Vec::new(),
143 | is_auto_solvable: false,
144 | })
145 | .maybe_add_files(left_file_idx, right_file_idx);
146 | } else {
147 | // dir_pair
148 | dp_map.entry(dpk)
149 | .or_default()
150 | .push(FilePair {
151 | dup_set_idx,
152 | left_file_idx,
153 | right_file_idx,
154 | });
155 | }
156 | }
157 | }
158 | for (_, mut brotherhood) in brotherhood_per_parent.drain() {
159 | let copy_count = brotherhood.files
160 | .iter()
161 | .map(|&dup_file_idx| DupFileRef {
162 | dup_set_idx: brotherhood.dup_set_idx,
163 | dup_file_idx,
164 | })
165 | .filter(|dup_file_ref| dup_file_ref.is_copy_named(dups))
166 | .count();
167 | if copy_count + 1 == brotherhood.files.len() {
168 | brotherhood.is_auto_solvable = true;
169 | auto_solvable_brotherhoods_count += 1;
170 | }
171 | brotherhoods.push(brotherhood);
172 | }
173 | }
174 |
175 | // we remove the parent of brotherhoods from dir_map
176 | // because we don't want them in dup_dirs
177 | for brotherhood in &brotherhoods {
178 | dir_map.remove(brotherhood.parent);
179 | }
180 |
181 | let mut dup_dirs = Vec::new();
182 | for (path, files) in dir_map.drain() {
183 | if files.len() < 3 {
184 | // small directories aren't interesting, we'll handle
185 | // the dups by comparing dup dirs
186 | continue;
187 | }
188 | let total_child_count = path.read_dir()?.count();
189 | if total_child_count == files.len() {
190 | dup_dirs.push(DupDir { path, files });
191 | }
192 | }
193 |
194 | // ordering
195 | dup_dirs.sort_by_key(|dd| Reverse(dd.files.len()));
196 | brotherhoods.sort_by_key(|b| Reverse(b.gain(dups)));
197 | let mut dir_pairs: Vec<_> = dp_map
198 | .drain()
199 | .map(|(key, file_pairs)| DirPair::new(key, file_pairs))
200 | .collect();
201 | dir_pairs.sort_by_key(|dp| Reverse(dp.file_pairs.len()));
202 |
203 | Ok(Self {
204 | dup_dirs,
205 | brotherhoods,
206 | auto_solvable_brotherhoods_count,
207 | dir_pairs,
208 | })
209 | }
210 | }
211 |
212 |
--------------------------------------------------------------------------------
/src/dup.rs:
--------------------------------------------------------------------------------
1 | use {
2 | lazy_regex::*,
3 | std::{
4 | collections::HashSet,
5 | path::{Path, PathBuf},
6 | },
7 | };
8 |
9 |
10 | // TODO virer et utiliser PathBuf directement ?
11 | #[derive(Debug)]
12 | pub struct DupFile {
13 | pub path: PathBuf,
14 | // pub staged_for_removal: bool,
15 | }
16 |
17 | /// the list of files having a hash
18 | #[derive(Debug, Default)]
19 | pub struct DupSet {
20 | pub files: Vec, // identical files
21 | pub file_len: u64,
22 | }
23 |
24 | #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq,)]
25 | pub struct DupFileRef {
26 | pub dup_set_idx: usize,
27 | pub dup_file_idx: usize,
28 | }
29 |
30 | impl DupFile {
31 | pub fn new(path: PathBuf) -> Self {
32 | Self {
33 | path,
34 | //staged_for_removal: false,
35 | }
36 | }
37 | }
38 |
39 | pub fn reference_file<'a>(
40 | dup_set_idx: usize,
41 | dup_set: &'a DupSet,
42 | staged_removals: &HashSet,
43 | ) -> Option<&'a Path> {
44 | let mut best: Option<&Path> = None;
45 | for (dup_file_idx, file) in dup_set.files.iter().enumerate() {
46 | let path = &file.path;
47 | let dup_file_ref = DupFileRef { dup_set_idx, dup_file_idx };
48 | if staged_removals.contains(&dup_file_ref) {
49 | continue;
50 | }
51 | if let Some(previous) = best {
52 | if previous.to_string_lossy().len() > path.to_string_lossy().len() {
53 | best = Some(path);
54 | }
55 | } else {
56 | best = Some(path);
57 | }
58 | }
59 | best
60 | }
61 |
62 | impl DupFileRef {
63 | pub fn path(self, dups: &[DupSet]) -> &Path {
64 | &dups[self.dup_set_idx].files[self.dup_file_idx].path
65 | }
66 | pub fn file_name(self, dups:&[DupSet]) -> String {
67 | self.path(dups)
68 | .file_name()
69 | .map_or_else(
70 | || "".to_string(),
71 | |n| n.to_string_lossy().to_string()
72 | )
73 | }
74 | /// get the file name when the file has a name like "thing (3).jpg"
75 | /// or "thing (3rd copy).png"
76 | pub fn copy_name(self, dups:&[DupSet]) -> Option<&str> {
77 | copy_name(self.path(dups))
78 | }
79 | /// tells whether the file has a name like "thing (3).jpg"
80 | /// or "thing (3rd copy).png"
81 | pub fn is_copy_named(self, dups:&[DupSet]) -> bool {
82 | self.copy_name(dups).is_some()
83 | }
84 | }
85 |
86 | /// get the name if this path is of a "copy" file, that is an usual name for a copy
87 | pub fn copy_name(path: &Path) -> Option<&str> {
88 | path
89 | .file_name()
90 | .and_then(std::ffi::OsStr::to_str)
91 | .filter(|n| regex_is_match!(r#"(?x)
92 | .+
93 | \((
94 | \d+
95 | |
96 | [^)]*
97 | copy
98 | )\)
99 | (\.\w+)?
100 | $
101 | "#, n))
102 | }
103 |
104 | #[test]
105 | fn test_is_copy_named() {
106 | use std::path::PathBuf;
107 | let copies = &[
108 | "/some/path/to/bla (3).jpg",
109 | "bla (3455).jpg",
110 | "uuuuu (copy).rs",
111 | "/home/dys/Images/pink hexapodes (another copy).jpeg",
112 | "~/uuuuu (copy)",
113 | "uuuuu (3rd copy)",
114 | ];
115 | for s in copies {
116 | assert!(copy_name(&PathBuf::from(s)).is_some());
117 | }
118 | let not_copies = &[
119 | "copy",
120 | "copy.txt",
121 | "bla.png",
122 | "/home/dys/not a copy",
123 | "(don't copy)",
124 | ];
125 | for s in not_copies {
126 | assert!(copy_name(&PathBuf::from(s)).is_none());
127 | }
128 |
129 | }
130 |
--------------------------------------------------------------------------------
/src/dup_report.rs:
--------------------------------------------------------------------------------
1 | use {
2 | anyhow::Result,
3 | crossbeam::channel,
4 | crate::*,
5 | fnv::FnvHashMap,
6 | minimad::*,
7 | rayon::{
8 | prelude::ParallelIterator,
9 | iter::ParallelBridge,
10 | },
11 | std::{
12 | cmp::Reverse,
13 | fs,
14 | path::PathBuf,
15 | },
16 | termimad::*,
17 | };
18 |
19 | #[derive(Default)]
20 | pub struct DupReport {
21 | pub dups: Vec,
22 | pub seen: usize,
23 | /// number of files which could be removed
24 | /// when keeping one of each set
25 | pub duplicate_count: usize,
26 | pub duplicate_len_sum: u64,
27 | }
28 |
29 | impl DupReport {
30 | pub fn len(&self) -> usize {
31 | self.dups.len()
32 | }
33 | pub fn build(
34 | root: PathBuf,
35 | only_images: bool,
36 | ) -> Result {
37 | let (s_matching_files, r_matching_files) = channel::unbounded();
38 | let (s_hashed_files, r_hashed_files) = channel::unbounded::<(PathBuf, FileHash)>();
39 | let file_generator = std::thread::spawn(move||{
40 | let mut dirs = Vec::new();
41 | dirs.push(root);
42 | while let Some(dir) = dirs.pop() {
43 | if let Ok(entries) = fs::read_dir(&dir) {
44 | for e in entries.flatten() {
45 | let path = e.path();
46 | let name = match path.file_name().and_then(|s| s.to_str()) {
47 | Some(s) => s,
48 | None => { continue; },
49 | };
50 | if name.starts_with('.') {
51 | continue;
52 | }
53 | if let Ok(md) = path.symlink_metadata() {
54 | if md.is_dir() {
55 | // we add the directory to the channel of dirs needing processing
56 | dirs.push(path);
57 | continue;
58 | }
59 | if md.is_file() {
60 | if only_images {
61 | let ext = match path.extension().and_then(|s| s.to_str()) {
62 | Some(s) => s,
63 | None => { continue; },
64 | };
65 | if !ext::is_image(ext) {
66 | continue;
67 | }
68 | }
69 | s_matching_files.send(path).unwrap();
70 | }
71 | }
72 | }
73 | }
74 | }
75 | });
76 |
77 | // parallel computation of the hashes
78 | r_matching_files.into_iter().par_bridge()
79 | .for_each_with(s_hashed_files, |s, path| {
80 | if let Ok(hash) = FileHash::new(&path) {
81 | s.send((path, hash)).unwrap();
82 | }
83 | });
84 |
85 | let mut map: FnvHashMap> = FnvHashMap::default();
86 | let mut seen = 0;
87 | r_hashed_files.iter()
88 | .for_each(|(path, hash)| {
89 | let e = map.entry(hash).or_default();
90 | e.push(DupFile::new(path));
91 | seen += 1;
92 | });
93 |
94 | file_generator.join().unwrap();
95 |
96 | let mut dups = Vec::new();
97 | let mut duplicate_count = 0;
98 | let mut duplicate_len_sum = 0;
99 | for (_hash, files) in map.drain() {
100 | if files.len() < 2 {
101 | continue;
102 | }
103 | if let Ok(md) = fs::metadata(&files[0].path) {
104 | duplicate_count += files.len() - 1;
105 | let file_len = md.len();
106 | if file_len > 0 {
107 | duplicate_len_sum += (files.len() - 1) as u64 * file_len;
108 | dups.push(DupSet {
109 | files,
110 | file_len,
111 | });
112 | }
113 | }
114 | }
115 |
116 | dups.sort_by_key(|dup| Reverse(dup.files.len()));
117 |
118 | Ok(Self{
119 | dups,
120 | seen,
121 | duplicate_count,
122 | duplicate_len_sum,
123 | })
124 | }
125 |
126 | pub fn print_summary(
127 | &self,
128 | skin: &MadSkin,
129 | ) {
130 | static MD: &str = r#"
131 | I've hashed *${seen}* files and found *${set_count}* sets of duplicates.\
132 | *${removable_count}* files can be removed to gain **${gain}**.\
133 | "#;
134 | let mut expander = OwningTemplateExpander::new();
135 | expander
136 | .set("seen", self.seen)
137 | .set("set_count", self.dups.len())
138 | .set("removable_count", self.duplicate_count)
139 | .set("gain", file_size::fit_4(self.duplicate_len_sum));
140 | skin.print_owning_expander(&expander, &TextTemplate::from(MD));
141 | }
142 | pub fn is_empty(&self) -> bool {
143 | self.dups.is_empty()
144 | }
145 | }
146 |
--------------------------------------------------------------------------------
/src/ext.rs:
--------------------------------------------------------------------------------
1 | use {
2 | phf::{phf_set, Set},
3 | };
4 |
5 | static IMAGE_EXTENSIONS: Set<&'static str> = phf_set! {
6 | "jpg", "JPG",
7 | "jpeg", "JPEG",
8 | "png", "PNG",
9 | };
10 |
11 | pub fn is_image(ext: &str) -> bool {
12 | IMAGE_EXTENSIONS.contains(ext)
13 | }
14 |
--------------------------------------------------------------------------------
/src/file_pair.rs:
--------------------------------------------------------------------------------
1 | use {
2 | crate::*,
3 | };
4 |
5 | #[derive(Debug, Clone, Copy)]
6 | pub struct FilePair {
7 | pub dup_set_idx: usize,
8 | pub left_file_idx: usize,
9 | pub right_file_idx: usize,
10 | }
11 |
12 | impl FilePair {
13 | pub fn left_ref(self) -> DupFileRef {
14 | DupFileRef {
15 | dup_set_idx: self.dup_set_idx,
16 | dup_file_idx: self.left_file_idx,
17 | }
18 | }
19 | pub fn right_ref(self) -> DupFileRef {
20 | DupFileRef {
21 | dup_set_idx: self.dup_set_idx,
22 | dup_file_idx: self.right_file_idx,
23 | }
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/src/hash.rs:
--------------------------------------------------------------------------------
1 |
2 | use {
3 | anyhow::Result,
4 | std::{
5 | fs::File,
6 | io,
7 | path::Path,
8 | },
9 | };
10 |
11 | #[derive(Debug, PartialEq, Eq, Hash)]
12 | pub struct FileHash {
13 | hash: blake3::Hash,
14 | }
15 |
16 | impl FileHash {
17 | pub fn new>(path: P) -> Result {
18 | let mut file = File::open(path)?;
19 | let mut hasher = blake3::Hasher::new();
20 | io::copy(&mut file, &mut hasher)?;
21 | let hash = hasher.finalize();
22 | Ok(Self {
23 | hash,
24 | })
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/src/json.rs:
--------------------------------------------------------------------------------
1 | use {
2 | serde_json::Value,
3 | std::{
4 | fs,
5 | io::Write,
6 | path::PathBuf,
7 | },
8 | };
9 |
10 | fn available_path(name: &str) -> PathBuf {
11 | let mut count = 1;
12 | let ext = "json";
13 | loop {
14 | let cmp = if count > 1 {
15 | format!("-{}", count)
16 | } else {
17 | "".to_string()
18 | };
19 | let file_name = format!(
20 | "{}-{}{}.{}",
21 | chrono::Local::now().format("%F-%Hh%M"),
22 | name,
23 | cmp,
24 | ext,
25 | );
26 | let path = PathBuf::from(file_name);
27 | if !path.exists() {
28 | return path;
29 | }
30 | count += 1;
31 | }
32 | }
33 |
34 | /// write a JSON value in a file whose name will be based on the provided
35 | /// name, with a date and if necessary with an additional number to avoid
36 | /// collision.
37 | pub fn write_in_file(
38 | name: &str,
39 | value: &Value,
40 | ) -> anyhow::Result {
41 | let path = available_path(name);
42 | let mut file = fs::File::create(&path)?;
43 | let json = serde_json::to_string_pretty(value)?;
44 | writeln!(&mut file, "{}", json)?;
45 | Ok(path)
46 | }
47 |
--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
1 | #[macro_use] extern crate cli_log;
2 |
3 | pub mod args;
4 | pub mod ask;
5 | pub mod dirs;
6 | pub mod dup;
7 | pub mod dup_report;
8 | pub mod ext;
9 | pub mod file_pair;
10 | pub mod hash;
11 | mod json;
12 | pub mod removal_report;
13 |
14 | pub use {
15 | args::*,
16 | ask::*,
17 | dirs::*,
18 | dup::*,
19 | dup_report::*,
20 | file_pair::*,
21 | ext::*,
22 | hash::*,
23 | json::*,
24 | removal_report::*,
25 | };
26 |
--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
1 | #[macro_use] extern crate cli_log;
2 |
3 | use {
4 | backdown::*,
5 | anyhow::Result,
6 | crossterm::style::{Attribute::*, Color::*},
7 | termimad::*,
8 | };
9 |
10 | fn run_app() -> Result<()> {
11 | let args: Args = argh::from_env();
12 | if args.version {
13 | println!("backdown {}", env!("CARGO_PKG_VERSION"));
14 | return Ok(());
15 | }
16 | let root = args.path
17 | .unwrap_or_else(|| std::env::current_dir().unwrap());
18 | let skin = make_skin();
19 | info!("root: {:?}", &root);
20 | skin.print_text("\n# Phase 1) Analysis");
21 | mad_print_inline!(skin, "Analyzing directory *$0*...\n", root.to_string_lossy());
22 | let dup_report = time!(
23 | Info,
24 | "computing dup sets",
25 | DupReport::build(root, args.only_images)?,
26 | );
27 | dup_report.print_summary(&skin);
28 | if dup_report.is_empty() {
29 | println!("There's nothing to remove");
30 | return Ok(());
31 | }
32 | let dirs_report = time!(
33 | Info,
34 | "computing dirs report",
35 | DirsReport::compute(&dup_report.dups)?,
36 | );
37 | skin.print_text("\n# Phase 2) Staging: choose files to remove");
38 | let rr = ask_on_dirs(&dirs_report, &dup_report.dups, &skin)?;
39 | if rr.is_empty() || rr.quit {
40 | return Ok(());
41 | }
42 | skin.print_text("\n# Phase 3) Review and confirm removals");
43 | let mut exported = false;
44 | loop {
45 | let mut question = Question::new("What do you want to do now?");
46 | question.add_answer('s', "Review touched **s**ets of identical files");
47 | if !exported {
48 | question.add_answer(
49 | 'j',
50 | "Export touched sets of identical files in a **J**SON file",
51 | );
52 | }
53 | question.add_answer('f', "Review all **f**iles staged for removal");
54 | question.add_answer('r', "Do the **r**emovals now");
55 | #[cfg(unix)]
56 | question.add_answer('l', "Replace removed files with **l**inks");
57 | question.add_answer('q', "**Q**uit *backdown*, removing nothing");
58 | match question.ask(&skin)?.as_ref() {
59 | "s" => {
60 | rr.list_dup_sets(&dup_report.dups, &skin);
61 | }
62 | "j" => {
63 | let value = rr.dup_sets_as_json(&dup_report.dups);
64 | let path = write_in_file("backdown-report", &value)?;
65 | mad_print_inline!(skin, "Wrote *$0*\n", path.to_string_lossy());
66 | exported = true;
67 | }
68 | "f" => {
69 | rr.list_staged_removals(&dup_report.dups, &skin);
70 | }
71 | "r" => {
72 | rr.do_the_removal(&dup_report.dups, &skin)?;
73 | break;
74 | }
75 | "l" => {
76 | #[cfg(unix)]
77 | rr.replace_staged_with_links(&dup_report.dups, &skin)?;
78 | break;
79 | }
80 | "q" => {
81 | break;
82 | }
83 | _ => {} // should not happen
84 | }
85 | }
86 | Ok(())
87 | }
88 |
89 | fn main() {
90 | init_cli_log!();
91 | if let Err(e) = run_app() {
92 | eprintln!("{}", e);
93 | }
94 | info!("bye");
95 | }
96 |
97 | fn make_skin() -> MadSkin {
98 | let mut skin = MadSkin::default();
99 | skin.table.align = Alignment::Left;
100 | skin.headers[0].align = Alignment::Left;
101 | skin.set_headers_fg(AnsiValue(178));
102 | skin.bold.set_fg(Yellow);
103 | skin.italic.set_fg(AnsiValue(204));
104 | skin.italic.remove_attr(Italic);
105 | skin.scrollbar.thumb.set_fg(AnsiValue(178));
106 | skin.code_block.align = Alignment::Center;
107 | skin
108 | }
109 |
--------------------------------------------------------------------------------
/src/removal_report.rs:
--------------------------------------------------------------------------------
1 | use {
2 | crate::*,
3 | minimad::*,
4 | serde_json::{json, Value},
5 | std::{
6 | collections::{HashMap, HashSet},
7 | fs,
8 | path::Path,
9 | },
10 | termimad::*,
11 | };
12 |
13 | #[derive(Debug, Clone, Default)]
14 | pub struct RemovalReport<'d> {
15 | pub dup_sets_with_staged: HashSet,
16 | pub staged_removals: HashSet,
17 | pub staged_dir_removals: Vec<&'d Path>,
18 | pub len_to_remove: u64,
19 | pub broken: bool,
20 | pub quit: bool,
21 | }
22 |
23 |
24 | impl<'d> RemovalReport<'d> {
25 |
26 | pub fn stage_file(&mut self, dup_file_ref: DupFileRef, dups: &[DupSet]) {
27 | self.len_to_remove += dups[dup_file_ref.dup_set_idx].file_len;
28 | self.dup_sets_with_staged.insert(dup_file_ref.dup_set_idx);
29 | self.staged_removals.insert(dup_file_ref);
30 | // println!("staged {:?}", &dups[dup_file_ref.dup_set_idx].files[dup_file_ref.dup_file_idx].path);
31 | }
32 |
33 | pub fn is_empty(&self) -> bool {
34 | self.staged_removals.is_empty()
35 | }
36 |
37 | pub fn list_staged_removals(
38 | &self,
39 | dups: &[DupSet],
40 | skin: &MadSkin,
41 | ) {
42 | mad_print_inline!(
43 | skin,
44 | "**$0** files planned for removal for a total size of **$1**:\n",
45 | self.staged_removals.len(),
46 | file_size::fit_4(self.len_to_remove),
47 | );
48 | for (idx, file_ref) in self.staged_removals.iter().enumerate() {
49 | let path = file_ref.path(dups);
50 | let size = dups[file_ref.dup_set_idx].file_len;
51 | mad_print_inline!(
52 | skin,
53 | "#$0 : *$1* (**$2**)\n",
54 | idx + 1,
55 | path.to_string_lossy(),
56 | file_size::fit_4(size),
57 | );
58 | }
59 | }
60 |
61 | /// write the report as a JSON file
62 | pub fn dup_sets_as_json(
63 | &self,
64 | dups: &[DupSet],
65 | ) -> Value {
66 | json!({
67 | "len_to_remove": self.len_to_remove,
68 | "dup_sets": dups.iter().enumerate()
69 | .filter_map(|(dup_set_idx, dup_set)| {
70 | if !self.dup_sets_with_staged.contains(&dup_set_idx) {
71 | return None;
72 | }
73 | Some(json!({
74 | "file_len": dup_set.file_len,
75 | "files": dup_set.files.iter()
76 | .enumerate()
77 | .map(|(dup_file_idx, file)| {
78 | let file = file.path.to_string_lossy().to_string();
79 | let file_ref = DupFileRef { dup_set_idx, dup_file_idx };
80 | let action = if self.staged_removals.contains(&file_ref) {
81 | "remove"
82 | } else {
83 | "keep"
84 | };
85 | (file, action)
86 | })
87 | .collect::>()
88 | }))
89 | })
90 | .collect::>(),
91 | })
92 | }
93 |
94 | pub fn list_dup_sets(
95 | &self,
96 | dups: &[DupSet],
97 | skin: &MadSkin,
98 | ) {
99 | static MD: &str = r#"
100 | |:-|:-|
101 | |Set #*${set_num}* : each file is **${file_len}**|action|
102 | |:-|:-:|
103 | ${files
104 | |${path}|**${action}**|
105 | }
106 | |-
107 | "#;
108 | let template = TextTemplate::from(MD);
109 | for (dup_set_idx, dup_set) in dups.iter().enumerate() {
110 | if !self.dup_sets_with_staged.contains(&dup_set_idx) {
111 | continue;
112 | }
113 | let mut expander = OwningTemplateExpander::new();
114 | expander
115 | .set("set_num", dup_set_idx + 1)
116 | .set("file_len", file_size::fit_4(dup_set.file_len));
117 | for (dup_file_idx, file) in dup_set.files.iter().enumerate() {
118 | let file_ref = DupFileRef { dup_set_idx, dup_file_idx };
119 | expander.sub("files")
120 | .set("path", file.path.to_string_lossy())
121 | .set_md(
122 | "action",
123 | if self.staged_removals.contains(&file_ref) {
124 | "*remove*"
125 | } else {
126 | "keep"
127 | }
128 | );
129 | }
130 | skin.print_owning_expander(&expander, &template);
131 | }
132 | }
133 |
134 | /// "Normally" the algorithms of backdown never remove all files
135 | /// in a set of identical files. But if I change those algorithms
136 | /// and make them more complex, I may make an error. So this
137 | /// function will check there's at least one kept file in each
138 | /// touched set, and will raise an error if a set is totally
139 | /// emptied.
140 | /// This *must* be called just before starting the real removals.
141 | pub fn check_no_emptied_set(
142 | &self,
143 | dups: &[DupSet],
144 | ) -> anyhow::Result<()> {
145 | for (dup_set_idx, dup_set) in dups.iter().enumerate() {
146 | let mut staged_count = 0;
147 | for dup_file_idx in 0..dup_set.files.len() {
148 | if self.staged_removals.contains(&DupFileRef{ dup_set_idx, dup_file_idx }) {
149 | staged_count += 1;
150 | }
151 | }
152 | if staged_count >= dup_set.files.len() {
153 | anyhow::bail!("We staged all files in set for removal! Abort!");
154 | }
155 | }
156 | Ok(())
157 | }
158 |
159 | #[cfg(unix)]
160 | pub fn replace_staged_with_links(
161 | &self,
162 | dups: &[DupSet],
163 | skin: &MadSkin,
164 | ) -> anyhow::Result<()> {
165 | use std::os::unix::fs::symlink;
166 | self.check_no_emptied_set(dups)?;
167 | skin.print_text("\n# Phase 4) Replace staged duplicates with links");
168 | println!("Replacing...");
169 | let mut removed_len = 0;
170 | let mut removed_count = 0;
171 | // file removals
172 | for dup_file_ref in &self.staged_removals {
173 | let dup_set = &dups[dup_file_ref.dup_set_idx];
174 | let path = dup_file_ref.path(dups);
175 | let link_destination = match reference_file(dup_file_ref.dup_set_idx, dup_set, &self.staged_removals) {
176 | Some(p) => p,
177 | None => {
178 | anyhow::bail!("unexpected lack of kept file in dup set");
179 | }
180 | };
181 | let link_destination = link_destination.canonicalize()?;
182 | match fs::remove_file(path) {
183 | Ok(()) => {
184 | removed_count += 1;
185 | removed_len += dups[dup_file_ref.dup_set_idx].file_len;
186 | match symlink(&link_destination, path) {
187 | Ok(()) => {
188 | // println!("link {:?} -> {:?}", path, link_destination);
189 | }
190 | Err(e) => {
191 | mad_print_inline!(
192 | skin,
193 | " Failed to remove create link *$1* -> *$2* : $3\n",
194 | path.to_string_lossy(),
195 | link_destination.to_string_lossy(),
196 | e,
197 | );
198 | }
199 | }
200 | }
201 | Err(e) => {
202 | mad_print_inline!(
203 | skin,
204 | " Failed to remove *$1* : $2\n",
205 | path.to_string_lossy(),
206 | e,
207 | );
208 | }
209 | }
210 | }
211 | mad_print_inline!(
212 | skin,
213 | "Removed *$0* files with a total size of **$1**\n",
214 | removed_count,
215 | file_size::fit_4(removed_len),
216 | );
217 | Ok(())
218 | }
219 |
220 | pub fn do_the_removal(
221 | &self,
222 | dups: &[DupSet],
223 | skin: &MadSkin,
224 | ) -> anyhow::Result<()> {
225 | self.check_no_emptied_set(dups)?;
226 | skin.print_text("\n# Phase 4) Removal");
227 | println!("Removing...");
228 | let mut removed_len = 0;
229 | let mut removed_count = 0;
230 | // file removals
231 | for dup_file_ref in &self.staged_removals {
232 | let path = dup_file_ref.path(dups);
233 | match fs::remove_file(path) {
234 | Ok(()) => {
235 | removed_count += 1;
236 | removed_len += dups[dup_file_ref.dup_set_idx].file_len;
237 | }
238 | Err(e) => {
239 | mad_print_inline!(
240 | skin,
241 | " Failed to remove *$1* : $2\n",
242 | path.to_string_lossy(),
243 | e,
244 | );
245 | }
246 | }
247 | }
248 | // directory removals
249 | for path in &self.staged_dir_removals {
250 | debug!("removing {:?}", path);
251 | if let Err(e) = fs::remove_dir(path) {
252 | mad_print_inline!(
253 | skin,
254 | " Failed to remove directory *$1* : $2\n",
255 | path.to_string_lossy(),
256 | e,
257 | );
258 | }
259 | }
260 | mad_print_inline!(
261 | skin,
262 | "Removed *$0* files with a total size of **$1**\n",
263 | removed_count,
264 | file_size::fit_4(removed_len),
265 | );
266 | Ok(())
267 | }
268 | }
269 |
--------------------------------------------------------------------------------