├── .gitignore ├── tests ├── static │ ├── bar │ ├── foo │ ├── qax │ ├── qix │ └── qux ├── common │ └── mod.rs ├── particular_cases.rs └── integration.rs ├── .cargo └── config.toml ├── .github ├── chglog │ ├── release.yml │ └── RELEASE.tpl.md └── workflows │ ├── ci.yml │ └── release.yml ├── scripts ├── bench.sh └── yadf.py ├── LICENSE ├── src ├── bag │ ├── serialize.rs │ ├── replicates.rs │ └── display.rs ├── path.rs ├── fs │ ├── hash.rs │ └── filter.rs ├── hasher.rs ├── ext.rs ├── fs.rs ├── args.rs ├── lib.rs ├── bag.rs └── main.rs ├── examples └── keep_oldest.py ├── Cargo.toml ├── README.md └── Cargo.lock /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /tests/static/bar: -------------------------------------------------------------------------------- 1 | aa 2 | -------------------------------------------------------------------------------- /tests/static/foo: -------------------------------------------------------------------------------- 1 | aa 2 | -------------------------------------------------------------------------------- /tests/static/qax: -------------------------------------------------------------------------------- 1 | i 2 | -------------------------------------------------------------------------------- /tests/static/qix: -------------------------------------------------------------------------------- 1 | aa 2 | -------------------------------------------------------------------------------- /tests/static/qux: -------------------------------------------------------------------------------- 1 | bb 2 | -------------------------------------------------------------------------------- /.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [build] 2 | rustflags = ["-C", "target-cpu=native"] 3 | -------------------------------------------------------------------------------- /.github/chglog/release.yml: -------------------------------------------------------------------------------- 1 | style: github 2 | template: RELEASE.tpl.md 3 | info: 4 | repository_url: https://github.com/jRimbault/yadf 5 | options: 6 | commits: 7 | # filters: 8 | # Type: 9 | # - feat 10 | # - fix 11 | # - perf 12 | # - refactor 13 | commit_groups: 14 | title_maps: 15 | feat: Features 16 | fix: Bug Fixes 17 | perf: Performance Improvements 18 | refactor: Code Refactoring 19 | ci: Continuous Integration 20 | doc: Documentation 21 | header: 22 | pattern: "^(\\w*)(?:\\(([\\w\\$\\.\\-\\*\\s]*)\\))?\\:\\s(.*)$" 23 | pattern_maps: 24 | - Type 25 | - Scope 26 | - Subject 27 | notes: 28 | keywords: 29 | - BREAKING CHANGE 30 | -------------------------------------------------------------------------------- /.github/chglog/RELEASE.tpl.md: -------------------------------------------------------------------------------- 1 | {{ range .Versions }} 2 | {{ if .Tag.Previous }}[Diff between versions {{ .Tag.Previous.Name }}...{{ .Tag.Name }}]({{ $.Info.RepositoryURL }}/compare/{{ .Tag.Previous.Name }}...{{ .Tag.Name }}){{ else }}{{ .Tag.Name }}{{ end }} ({{ datetime "2006-01-02" .Tag.Date }}) 3 | 4 | {{ range .CommitGroups -}} 5 | ### {{ .Title }} 6 | 7 | {{ range .Commits -}} 8 | * {{ if .Scope }}**{{ .Scope }}:** {{ end }}{{ .Subject }} 9 | {{ end }} 10 | {{ end -}} 11 | 12 | {{- if .RevertCommits -}} 13 | ### Reverts 14 | 15 | {{ range .RevertCommits -}} 16 | * {{ .Revert.Header }} 17 | {{ end }} 18 | {{ end -}} 19 | 20 | {{- if .NoteGroups -}} 21 | {{ range .NoteGroups -}} 22 | ### {{ .Title }} 23 | 24 | {{ range .Notes }} 25 | {{ .Body }} 26 | {{ end }} 27 | {{ end -}} 28 | {{ end -}} 29 | {{ end -}} 30 | -------------------------------------------------------------------------------- /scripts/bench.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # ddh produces a Results.txt file after each run 4 | # 5 | # rmlint produces a number of files all named rmlint.{ext} 6 | # 7 | # fclones and jdupes both don't scan recursively by default 8 | # 9 | # dupe-krill skips file smaller than the block size, hence the -s flag, 10 | # and will hardlinks files together, hence the --dry-run flag 11 | # 12 | # fddf ignores zero length files 13 | 14 | case "$1" in 15 | "cold") 16 | prepare_cmd='rm Results.txt rmlint.* || true && echo "free && sync && echo 3 > /proc/sys/vm/drop_caches && free" | sudo sh' 17 | warmups=0 18 | ;; 19 | *) 20 | prepare_cmd="rm Results.txt rmlint.* || true" 21 | warmups=5 22 | ;; 23 | esac 24 | 25 | hyperfine --warmup "$warmups" \ 26 | --min-runs 10 \ 27 | --export-markdown export.md \ 28 | --prepare "$prepare_cmd" \ 29 | "fclones group --min 0 ~" \ 30 | "jdupes -z -r ~" \ 31 | "ddh --directories ~" \ 32 | "dupe-krill -s -d ~" \ 33 | "fddf -m 0 ~" \ 34 | "yadf ~" 35 | 36 | { 37 | rm Results.txt rmlint.* || true 38 | } 2> /dev/null 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Jacques Rimbault 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/bag/serialize.rs: -------------------------------------------------------------------------------- 1 | use super::{Replicates, TreeBag}; 2 | use serde::ser::{Serialize, Serializer}; 3 | 4 | impl Serialize for Replicates<'_, K, V> 5 | where 6 | V: Serialize, 7 | { 8 | fn serialize(&self, serializer: S) -> Result 9 | where 10 | S: Serializer, 11 | { 12 | serializer.collect_seq(self.iter()) 13 | } 14 | } 15 | 16 | impl Serialize for TreeBag 17 | where 18 | K: Serialize, 19 | V: Serialize, 20 | { 21 | fn serialize(&self, serializer: S) -> Result 22 | where 23 | S: Serializer, 24 | { 25 | serializer.collect_map(self.0.iter()) 26 | } 27 | } 28 | 29 | #[cfg(test)] 30 | mod tests { 31 | use super::super::TreeBag; 32 | 33 | #[test] 34 | fn json() { 35 | let counter: TreeBag = vec![ 36 | (77, "hello"), 37 | (77, "world"), 38 | (1, "ignored"), 39 | (3, "foo"), 40 | (3, "bar"), 41 | ] 42 | .into_iter() 43 | .collect(); 44 | let result = serde_json::to_string(&counter.duplicates()).unwrap(); 45 | let expected = r#"[["foo","bar"],["hello","world"]]"#; 46 | assert_eq!(result, expected); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/path.rs: -------------------------------------------------------------------------------- 1 | /// Serialization wrapper for paths. 2 | #[derive(Debug)] 3 | pub struct Path(std::path::PathBuf); 4 | 5 | use serde::{Serialize, Serializer}; 6 | 7 | impl Serialize for Path { 8 | fn serialize(&self, serializer: S) -> Result 9 | where 10 | S: Serializer, 11 | { 12 | serializer.collect_str(&self.0.display()) 13 | } 14 | } 15 | 16 | impl From for Path 17 | where 18 | T: Into, 19 | { 20 | fn from(path: T) -> Self { 21 | Self(path.into()) 22 | } 23 | } 24 | 25 | impl AsRef for Path { 26 | fn as_ref(&self) -> &std::path::Path { 27 | &self.0 28 | } 29 | } 30 | 31 | #[cfg(test)] 32 | mod tests { 33 | #[cfg(unix)] 34 | #[test] 35 | fn shouldnt_panic_on_invalid_utf8_path() { 36 | use super::*; 37 | use std::ffi::OsString; 38 | use std::os::unix::ffi::OsStringExt; 39 | use std::path::PathBuf; 40 | // asserts its invalidity 41 | let invalid_utf8: &[u8] = b"\xe7\xe7"; 42 | assert!(String::from_utf8(invalid_utf8.to_vec()).is_err()); 43 | // without wrapper it errors 44 | let path = PathBuf::from(OsString::from_vec(invalid_utf8.to_vec())); 45 | assert!(serde_json::to_string(&path).is_err()); 46 | // with wrapper it's ok 47 | let path = Path(PathBuf::from(OsString::from_vec(invalid_utf8.to_vec()))); 48 | assert!(serde_json::to_string(&path).is_ok()); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/bag/replicates.rs: -------------------------------------------------------------------------------- 1 | use super::{Display, Factor, Replicates}; 2 | use std::collections::btree_map::Values; 3 | 4 | /// [`Iterator`](Iterator) adapater. 5 | #[derive(Debug)] 6 | pub struct Iter<'a, K, V> { 7 | values: Values<'a, K, Vec>, 8 | factor: Factor, 9 | } 10 | 11 | impl Replicates<'_, K, V> { 12 | /// Iterator over the buckets. 13 | pub fn iter(&self) -> Iter<'_, K, V> { 14 | Iter { 15 | values: self.tree.0.values(), 16 | factor: self.factor.clone(), 17 | } 18 | } 19 | 20 | /// Returns an object that implements [`Display`](std::fmt::Display). 21 | /// 22 | /// Depending on the contents of the [`TreeBag`](super::TreeBag), the display object 23 | /// can be parameterized to get a different [`Display`](std::fmt::Display) implemenation. 24 | pub fn display(&self) -> Display<'_, K, V, U> { 25 | Display { 26 | format_marker: std::marker::PhantomData, 27 | tree: self, 28 | } 29 | } 30 | } 31 | 32 | impl<'a, K, V> IntoIterator for &'a Replicates<'a, K, V> { 33 | type Item = &'a Vec; 34 | type IntoIter = Iter<'a, K, V>; 35 | 36 | fn into_iter(self) -> Self::IntoIter { 37 | self.iter() 38 | } 39 | } 40 | 41 | #[allow(clippy::manual_find)] 42 | impl<'a, K, V> Iterator for Iter<'a, K, V> { 43 | type Item = &'a Vec; 44 | 45 | fn next(&mut self) -> Option { 46 | for bucket in &mut self.values { 47 | if self.factor.pass(bucket.len()) { 48 | return Some(bucket); 49 | } 50 | } 51 | None 52 | } 53 | } 54 | 55 | impl Factor { 56 | fn pass(&self, x: usize) -> bool { 57 | match *self { 58 | Factor::Under(n) => x < n, 59 | Factor::Equal(n) => x == n, 60 | Factor::Over(n) => x > n, 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /examples/keep_oldest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """Pipe the output of `yadf --format ldjson` into this script. 4 | 5 | Either : 6 | 7 | yadf -f ldjson > results.ldjson 8 | python3 keep_oldest.py results.ldjson 9 | 10 | Or skipping the intermediate file : 11 | 12 | yadf -f ldjson | python3 keep_oldest.py 13 | 14 | This script is provided as an example meant to be modified and tinkered with. 15 | """ 16 | 17 | import fileinput 18 | import itertools 19 | import json 20 | import multiprocessing 21 | import os 22 | import pathlib 23 | from typing import Callable, Iterable, Sized, TypeVar 24 | 25 | Cmp = TypeVar("Cmp", bound=Sized) 26 | Key = Callable[[str], Cmp] 27 | Filter = Callable[[Iterable[str]], Iterable[str]] 28 | 29 | 30 | def main(): 31 | cleaner = Cleaner(most_recent_modification_date, yield_all_except_first) 32 | sequential(fileinput.input(), cleaner) 33 | 34 | 35 | def sequential(ldjson: Iterable[str], cleaner: "Cleaner"): 36 | for line in ldjson: 37 | cleaner(line) 38 | 39 | 40 | def parallel(ldjson: Iterable[str], cleaner: "Cleaner"): 41 | with multiprocessing.Pool() as pool: 42 | pool.imap_unordered(cleaner, ldjson) 43 | 44 | 45 | class Cleaner: 46 | def __init__(self, key: Key = None, filter: Filter = lambda f: f): 47 | self.key = key 48 | self.filter = filter 49 | 50 | def __call__(self, line: str): 51 | files: list[str] = json.loads(line) 52 | files.sort(key=self.key) 53 | # uncomment to actually delete files 54 | for filename in self.filter(files): 55 | # os.remove(filename) 56 | pass 57 | 58 | 59 | def most_recent_modification_date(filename: str) -> float: 60 | return pathlib.Path(filename).stat().st_mtime 61 | 62 | 63 | def yield_all_except_first(files: Iterable[str]) -> Iterable[str]: 64 | return itertools.islice(files, 1, None) 65 | 66 | 67 | if __name__ == "__main__": 68 | main() 69 | -------------------------------------------------------------------------------- /src/fs/hash.rs: -------------------------------------------------------------------------------- 1 | use super::BLOCK_SIZE; 2 | use std::fs::File; 3 | use std::io::{self, Read}; 4 | use std::path::Path; 5 | 6 | /// Get a checksum of the first 4 KiB (at most) of a file. 7 | pub fn partial(path: &Path) -> io::Result 8 | where 9 | H: crate::hasher::Hasher, 10 | { 11 | let mut file = File::open(path)?; 12 | let mut buffer = [0u8; BLOCK_SIZE]; 13 | let mut n = 0; 14 | loop { 15 | match file.read(&mut buffer[n..]) { 16 | Ok(0) => break, 17 | Ok(len) => n += len, 18 | Err(e) if e.kind() == io::ErrorKind::Interrupted => continue, 19 | Err(e) => return Err(e), 20 | } 21 | } 22 | let mut hasher = H::default(); 23 | hasher.write(&file.metadata()?.len().to_le_bytes()); 24 | hasher.write(&buffer[..n]); 25 | Ok(hasher.finish()) 26 | } 27 | 28 | /// Get a complete checksum of a file. 29 | pub fn full(path: &Path) -> io::Result 30 | where 31 | H: crate::hasher::Hasher, 32 | { 33 | /// Compile time [`Write`](std::io::Write) wrapper for a [`Hasher`](core::hash::Hasher). 34 | /// This should get erased at compile time. 35 | #[repr(transparent)] 36 | struct HashWriter(H); 37 | 38 | impl io::Write for HashWriter { 39 | fn write(&mut self, buf: &[u8]) -> io::Result { 40 | crate::hasher::Hasher::write(&mut self.0, buf); 41 | Ok(buf.len()) 42 | } 43 | 44 | fn flush(&mut self) -> io::Result<()> { 45 | Ok(()) 46 | } 47 | } 48 | 49 | let mut hasher = HashWriter(H::default()); 50 | io::copy(&mut File::open(path)?, &mut hasher)?; 51 | Ok(hasher.0.finish()) 52 | } 53 | 54 | #[cfg(test)] 55 | mod tests { 56 | use super::*; 57 | 58 | #[test] 59 | fn different_hash_partial_and_full_for_small_file_because_of_size() { 60 | let h1 = partial::("./tests/static/foo".as_ref()).unwrap(); 61 | let h2 = full::("./tests/static/foo".as_ref()).unwrap(); 62 | assert_ne!(h1, h2); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/hasher.rs: -------------------------------------------------------------------------------- 1 | pub trait Hasher: Default { 2 | type Hash: Hash; 3 | fn write(&mut self, buf: &[u8]); 4 | fn finish(self) -> Self::Hash; 5 | } 6 | 7 | pub trait Hash: PartialEq + Eq + PartialOrd + Ord + Send + Sync + Copy {} 8 | 9 | impl Hash for T where T: PartialEq + Eq + PartialOrd + Ord + Send + Sync + Copy {} 10 | 11 | #[cfg(feature = "build-bin")] 12 | impl Hasher for ahash::AHasher { 13 | type Hash = u64; 14 | fn write(&mut self, buf: &[u8]) { 15 | std::hash::Hasher::write(self, buf); 16 | } 17 | fn finish(self) -> Self::Hash { 18 | std::hash::Hasher::finish(&self) 19 | } 20 | } 21 | 22 | #[cfg(feature = "build-bin")] 23 | impl Hasher for highway::HighwayHasher { 24 | type Hash = [u64; 4]; 25 | fn write(&mut self, buf: &[u8]) { 26 | use highway::HighwayHash; 27 | self.append(buf); 28 | } 29 | 30 | fn finish(self) -> Self::Hash { 31 | use highway::HighwayHash; 32 | self.finalize256() 33 | } 34 | } 35 | 36 | #[cfg(feature = "build-bin")] 37 | impl Hasher for metrohash::MetroHash128 { 38 | type Hash = (u64, u64); 39 | fn write(&mut self, buf: &[u8]) { 40 | std::hash::Hasher::write(self, buf); 41 | } 42 | 43 | fn finish(self) -> Self::Hash { 44 | self.finish128() 45 | } 46 | } 47 | 48 | #[cfg(feature = "build-bin")] 49 | impl Hasher for seahash::SeaHasher { 50 | type Hash = u64; 51 | fn write(&mut self, buf: &[u8]) { 52 | std::hash::Hasher::write(self, buf); 53 | } 54 | fn finish(self) -> Self::Hash { 55 | std::hash::Hasher::finish(&self) 56 | } 57 | } 58 | 59 | #[cfg(feature = "build-bin")] 60 | impl Hasher for twox_hash::xxhash3_128::Hasher { 61 | type Hash = u128; 62 | fn write(&mut self, buf: &[u8]) { 63 | self.write(buf); 64 | } 65 | 66 | fn finish(self) -> Self::Hash { 67 | self.finish_128() 68 | } 69 | } 70 | 71 | #[cfg(feature = "build-bin")] 72 | impl Hasher for blake3::Hasher { 73 | type Hash = [u8; 32]; 74 | fn write(&mut self, buf: &[u8]) { 75 | self.update(buf); 76 | } 77 | fn finish(self) -> Self::Hash { 78 | self.finalize().into() 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/ext.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashSet; 2 | use std::hash::Hash; 3 | use std::path::Path; 4 | 5 | /// Could be replaced by `unique_by` in `itertools` 6 | pub trait IteratorExt: Iterator + Sized { 7 | fn unique_by(self, f: F) -> UniqueBy 8 | where 9 | F: Fn(&Self::Item) -> K, 10 | K: Hash + Eq, 11 | { 12 | UniqueBy::new(self, f) 13 | } 14 | } 15 | 16 | impl IteratorExt for I {} 17 | 18 | pub struct UniqueBy { 19 | iter: I, 20 | set: HashSet, 21 | f: F, 22 | } 23 | 24 | impl UniqueBy 25 | where 26 | I: Iterator, 27 | F: Fn(&I::Item) -> K, 28 | K: Hash + Eq, 29 | { 30 | fn new(iter: I, f: F) -> Self { 31 | Self { 32 | iter, 33 | f, 34 | set: HashSet::new(), 35 | } 36 | } 37 | } 38 | 39 | #[allow(clippy::manual_find)] 40 | impl Iterator for UniqueBy 41 | where 42 | I: Iterator, 43 | F: Fn(&I::Item) -> K, 44 | K: Hash + Eq, 45 | { 46 | type Item = I::Item; 47 | 48 | fn next(&mut self) -> Option { 49 | for item in &mut self.iter { 50 | if self.set.insert((self.f)(&item)) { 51 | return Some(item); 52 | } 53 | } 54 | None 55 | } 56 | } 57 | 58 | pub trait WalkParallelForEach { 59 | fn for_each(self, f: F) 60 | where 61 | F: Fn(Result) -> ignore::WalkState, 62 | F: Send + Copy; 63 | } 64 | 65 | impl WalkParallelForEach for ignore::WalkParallel { 66 | fn for_each(self, f: F) 67 | where 68 | F: Fn(Result) -> ignore::WalkState, 69 | F: Send + Copy, 70 | { 71 | self.run(|| Box::new(f)) 72 | } 73 | } 74 | 75 | pub trait WalkBuilderAddPaths { 76 | fn add_paths(&mut self, paths: I) -> &mut Self 77 | where 78 | P: AsRef, 79 | I: IntoIterator; 80 | } 81 | 82 | impl WalkBuilderAddPaths for ignore::WalkBuilder { 83 | fn add_paths(&mut self, paths: I) -> &mut Self 84 | where 85 | P: AsRef, 86 | I: IntoIterator, 87 | { 88 | for path in paths { 89 | self.add(path); 90 | } 91 | self 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "yadf" 3 | version = "1.3.0" 4 | authors = ["jRimbault "] 5 | edition = "2021" 6 | description = "yet another dupes finder" 7 | license = "MIT" 8 | homepage = "https://github.com/jRimbault/yadf" 9 | repository = "https://github.com/jRimbault/yadf" 10 | readme = "README.md" 11 | rust-version = "1.82.0" 12 | categories = ["command-line-utilities", "filesystem"] 13 | keywords = ["dupe", "duplicate", "finder", "fdupes", "fast"] 14 | 15 | [profile.release] 16 | lto = "fat" 17 | codegen-units = 1 18 | 19 | [[bin]] 20 | name = "yadf" 21 | required-features = ["build-bin"] 22 | 23 | [features] 24 | default = ["build-bin"] 25 | build-bin = [ 26 | "dep:ahash", 27 | "dep:anyhow", 28 | "dep:blake3", 29 | "dep:byte-unit", 30 | "dep:clap", 31 | "dep:clap-verbosity-flag", 32 | "dep:csv", 33 | "dep:env_logger", 34 | "dep:highway", 35 | "dep:human-panic", 36 | "dep:metrohash", 37 | "dep:seahash", 38 | "dep:serde_json", 39 | "dep:twox-hash", 40 | ] 41 | 42 | [dependencies] 43 | # library dependencies 44 | crossbeam-channel = "0.5.15" 45 | dunce = "1.0.5" 46 | globset = "0.4.16" 47 | ignore = "0.4.23" 48 | log = "0.4.27" 49 | num_cpus = "1.17.0" 50 | rayon = "1.11.0" 51 | regex = "1.11.2" 52 | serde = "1.0.219" 53 | typed-builder = "0.21.2" 54 | # binary dependencies 55 | ahash = { version = "0.8.12", optional = true } 56 | anyhow = { version = "1.0.99", optional = true } 57 | byte-unit = { version = "5.1.6", features = ["byte"], optional = true } 58 | clap = { version = "4.5.45", features = [ 59 | "cargo", 60 | "derive", 61 | "string", 62 | ], optional = true } 63 | clap-verbosity-flag = { version = "3.0.4", optional = true } 64 | csv = { version = "1.3.1", optional = true } 65 | env_logger = { version = "0.11.8", optional = true } 66 | highway = { version = "1.3.0", optional = true } 67 | human-panic = { version = "2.0.3", optional = true } 68 | metrohash = { version = "1.0.7", optional = true } 69 | seahash = { version = "4.1.0", optional = true } 70 | serde_json = { version = "1.0.143", optional = true } 71 | twox-hash = { version = "2.1.1", optional = true } 72 | blake3 = { version = "1.8.2", optional = true } 73 | 74 | [dev-dependencies] 75 | assert_cmd = "2" 76 | dirs = "6.0.0" 77 | env_logger = "0.11.8" 78 | highway = "1.3.0" 79 | once_cell = "1.21.3" 80 | serde_json = "1.0.143" 81 | seahash = "4.1.0" 82 | twox-hash = "2.1.1" 83 | predicates = "3.1.3" 84 | rand = "0.9" 85 | -------------------------------------------------------------------------------- /src/bag/display.rs: -------------------------------------------------------------------------------- 1 | use super::{Display, Fdupes, Machine}; 2 | use std::fmt; 3 | use std::path::Path; 4 | 5 | impl fmt::Display for Display<'_, K, V, Fdupes> 6 | where 7 | V: AsRef, 8 | { 9 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 10 | let mut duplicates = self.tree.iter().peekable(); 11 | while let Some(bucket) = duplicates.next() { 12 | let mut bucket = bucket.iter().peekable(); 13 | let is_last_bucket = duplicates.peek().is_none(); 14 | while let Some(dupe) = bucket.next() { 15 | dupe.as_ref().display().fmt(f)?; 16 | if bucket.peek().is_some() || !is_last_bucket { 17 | f.write_str("\n")?; 18 | } 19 | } 20 | if !is_last_bucket { 21 | f.write_str("\n")?; 22 | } 23 | } 24 | Ok(()) 25 | } 26 | } 27 | 28 | impl fmt::Display for Display<'_, K, V, Machine> 29 | where 30 | V: AsRef, 31 | { 32 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 33 | let mut duplicates = self.tree.iter().peekable(); 34 | while let Some(bucket) = duplicates.next() { 35 | let (last, rest) = bucket.split_last().ok_or(fmt::Error)?; 36 | for dupe in rest { 37 | fmt::Debug::fmt(dupe.as_ref(), f)?; 38 | f.write_str(" ")?; 39 | } 40 | fmt::Debug::fmt(last.as_ref(), f)?; 41 | if duplicates.peek().is_some() { 42 | f.write_str("\n")?; 43 | } 44 | } 45 | Ok(()) 46 | } 47 | } 48 | 49 | #[cfg(test)] 50 | mod tests { 51 | use super::super::TreeBag; 52 | use super::*; 53 | use once_cell::sync::Lazy; 54 | 55 | static BAG: Lazy> = Lazy::new(|| { 56 | vec![ 57 | (77, "hello"), 58 | (77, "world"), 59 | (1, "ignored"), 60 | (3, "foo"), 61 | (3, "bar"), 62 | ] 63 | .into_iter() 64 | .collect() 65 | }); 66 | 67 | #[test] 68 | fn machine() { 69 | let result = BAG.duplicates().display::().to_string(); 70 | let expected = "\ 71 | \"foo\" \"bar\"\n\ 72 | \"hello\" \"world\"\ 73 | "; 74 | assert_eq!(result, expected); 75 | } 76 | 77 | #[test] 78 | fn fdupes() { 79 | let result = BAG.duplicates().display::().to_string(); 80 | let expected = "\ 81 | foo\n\ 82 | bar\n\ 83 | \n\ 84 | hello\n\ 85 | world\ 86 | "; 87 | assert_eq!(result, expected); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /tests/common/mod.rs: -------------------------------------------------------------------------------- 1 | pub use test_dir::TestDir; 2 | 3 | /// quick-n-dirty any result type alias 4 | pub type AnyResult> = Result; 5 | 6 | pub const MAX_LEN: usize = 256 * 1024; 7 | 8 | pub fn random_collection(size: usize) -> I 9 | where 10 | rand::distr::StandardUniform: rand::distr::Distribution, 11 | I: std::iter::FromIterator, 12 | { 13 | use rand::Rng; 14 | let mut rng = rand::rng(); 15 | std::iter::repeat_with(|| rng.random()).take(size).collect() 16 | } 17 | 18 | /// test shortcut 19 | #[allow(dead_code)] 20 | pub fn find_dupes>(path: &P) -> yadf::FileCounter { 21 | yadf::Yadf::builder() 22 | .paths([path].as_ref()) 23 | .build() 24 | .scan::() 25 | } 26 | 27 | #[macro_export] 28 | macro_rules! scope_name_iter { 29 | () => {{ 30 | fn fxxfxxf() {} 31 | fn type_name_of(_: T) -> &'static str { 32 | std::any::type_name::() 33 | } 34 | type_name_of(fxxfxxf) 35 | .split("::") 36 | .take_while(|&segment| segment != "fxxfxxf") 37 | }}; 38 | } 39 | 40 | #[macro_export] 41 | macro_rules! test_dir { 42 | () => {{ 43 | ["target", "tests"] 44 | .iter() 45 | .copied() 46 | .chain(scope_name_iter!()) 47 | .collect::() 48 | }}; 49 | } 50 | 51 | mod test_dir { 52 | use std::fs::{self, File}; 53 | use std::io::{self, Write}; 54 | use std::path::{Path, PathBuf}; 55 | 56 | pub struct TestDir(PathBuf); 57 | 58 | impl TestDir { 59 | pub fn new

(dir: P) -> io::Result 60 | where 61 | P: AsRef, 62 | { 63 | match fs::remove_dir_all(&dir) { 64 | // the directory should not exists at this stage 65 | // we're just double checking and don't want to return a spurious error 66 | Err(e) if e.kind() == io::ErrorKind::NotFound => {} 67 | Err(e) => return Err(e), 68 | _ => {} 69 | } 70 | fs::create_dir_all(&dir)?; 71 | Ok(TestDir(dir.as_ref().to_path_buf())) 72 | } 73 | 74 | pub fn write_file(&self, path: P, bytes: B) -> io::Result 75 | where 76 | P: AsRef, 77 | B: AsRef<[u8]>, 78 | { 79 | let path = self.0.join(path); 80 | File::create(&path)?.write_all(bytes.as_ref())?; 81 | Ok(path) 82 | } 83 | } 84 | 85 | impl Drop for TestDir { 86 | fn drop(&mut self) { 87 | fs::remove_dir_all(&self.0) 88 | .unwrap_or_else(|_| panic!("couldn't remove test directory {:?}", self.0)); 89 | } 90 | } 91 | 92 | impl AsRef for TestDir { 93 | fn as_ref(&self) -> &Path { 94 | &self.0 95 | } 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | paths-ignore: 6 | - "**.md" 7 | branches-ignore: 8 | - "try/**" 9 | pull_request: 10 | paths-ignore: 11 | - "**.md" 12 | schedule: 13 | # At 13:23 on day-of-month 23. 14 | - cron: "23 13 23 * *" 15 | 16 | jobs: 17 | check: 18 | name: Check build 19 | runs-on: ${{ matrix.os }} 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | os: [ubuntu-latest, windows-latest, macos-latest] 24 | rust: [stable, 1.82.0] 25 | feature_set: ["--no-default-features", "--all-features"] 26 | steps: 27 | - uses: actions/checkout@v4 28 | - uses: actions/cache@v4 29 | with: 30 | path: | 31 | ~/.cargo/registry 32 | ~/.cargo/git 33 | target 34 | key: check-${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} 35 | - uses: actions-rs/toolchain@v1 36 | with: 37 | profile: minimal 38 | toolchain: ${{ matrix.rust }} 39 | override: true 40 | - name: Run build check 41 | uses: actions-rs/cargo@v1 42 | with: 43 | command: check 44 | args: ${{ matrix.feature_set }} 45 | 46 | clippy: 47 | name: Clippy 48 | runs-on: ubuntu-latest 49 | steps: 50 | - uses: actions/checkout@v4 51 | - uses: actions/cache@v4 52 | with: 53 | path: | 54 | ~/.cargo/registry 55 | ~/.cargo/git 56 | target 57 | key: clippy-${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} 58 | - uses: actions-rs/toolchain@v1 59 | with: 60 | profile: minimal 61 | toolchain: stable 62 | override: true 63 | components: clippy 64 | - name: Run cargo clippy 65 | uses: actions-rs/clippy-check@v1 66 | with: 67 | token: ${{ secrets.GITHUB_TOKEN }} 68 | args: --all-features -- -D warnings 69 | 70 | format: 71 | name: Rustfmt 72 | runs-on: ubuntu-latest 73 | steps: 74 | - uses: actions/checkout@v4 75 | - uses: actions-rs/toolchain@v1 76 | with: 77 | profile: minimal 78 | toolchain: stable 79 | override: true 80 | components: rustfmt 81 | - name: Run cargo fmt 82 | uses: actions-rs/cargo@v1 83 | with: 84 | command: fmt 85 | args: --all -- --check 86 | 87 | tests: 88 | name: Tests 89 | needs: check 90 | runs-on: ${{ matrix.os }} 91 | strategy: 92 | fail-fast: false 93 | matrix: 94 | os: [ubuntu-latest, windows-latest, macos-latest] 95 | rust: [stable] 96 | steps: 97 | - uses: actions/checkout@v4 98 | - uses: actions/cache@v4 99 | with: 100 | path: | 101 | ~/.cargo/registry 102 | ~/.cargo/git 103 | target 104 | key: tests-${{ runner.os }}-${{ matrix.rust }}-cargo-${{ hashFiles('**/Cargo.lock') }} 105 | - uses: actions-rs/toolchain@v1 106 | with: 107 | profile: minimal 108 | toolchain: ${{ matrix.rust }} 109 | override: true 110 | - name: Build tests 111 | uses: actions-rs/cargo@v1 112 | with: 113 | command: build 114 | args: --tests 115 | - name: Run tests 116 | uses: actions-rs/cargo@v1 117 | with: 118 | command: test 119 | args: --all-features 120 | -------------------------------------------------------------------------------- /src/fs/filter.rs: -------------------------------------------------------------------------------- 1 | use std::fs::Metadata; 2 | use std::path::Path; 3 | 4 | #[derive(Debug)] 5 | pub struct FileFilter { 6 | min: Option, 7 | max: Option, 8 | regex: Option, 9 | glob: Option, 10 | #[cfg(unix)] 11 | inodes_filter: inode::Filter, 12 | } 13 | 14 | impl FileFilter { 15 | #[cfg(not(unix))] 16 | pub fn new( 17 | min: Option, 18 | max: Option, 19 | regex: Option, 20 | glob: Option, 21 | ) -> Self { 22 | Self { 23 | min, 24 | max, 25 | regex, 26 | glob, 27 | } 28 | } 29 | 30 | #[cfg(unix)] 31 | pub fn new( 32 | min: Option, 33 | max: Option, 34 | regex: Option, 35 | glob: Option, 36 | disable_hard_links_filter: bool, 37 | ) -> Self { 38 | Self { 39 | min, 40 | max, 41 | regex, 42 | glob, 43 | inodes_filter: inode::Filter::new(disable_hard_links_filter), 44 | } 45 | } 46 | 47 | pub fn is_match(&self, path: &Path, meta: Metadata) -> bool { 48 | #[cfg(unix)] 49 | { 50 | if !self.inodes_filter.is_unique(&meta) { 51 | return false; 52 | } 53 | } 54 | meta.is_file() 55 | && self.min.map_or(true, |m| meta.len() >= m) 56 | && self.max.map_or(true, |m| meta.len() <= m) 57 | && is_match(&self.regex, path).unwrap_or(true) 58 | && is_match(&self.glob, path).unwrap_or(true) 59 | } 60 | } 61 | 62 | fn is_match(opt: &Option, path: &Path) -> Option { 63 | opt.as_ref().and_then(|m| m.is_file_name_match(path)) 64 | } 65 | 66 | trait Matcher { 67 | fn is_file_name_match(&self, path: &Path) -> Option; 68 | } 69 | 70 | impl Matcher for regex::Regex { 71 | fn is_file_name_match(&self, path: &Path) -> Option { 72 | path.file_name() 73 | .and_then(std::ffi::OsStr::to_str) 74 | .map(|file_name| self.is_match(file_name)) 75 | } 76 | } 77 | 78 | impl Matcher for globset::GlobMatcher { 79 | fn is_file_name_match(&self, path: &Path) -> Option { 80 | path.file_name().map(|file_name| self.is_match(file_name)) 81 | } 82 | } 83 | 84 | #[cfg(unix)] 85 | mod inode { 86 | use std::collections::HashSet; 87 | use std::fs::Metadata; 88 | use std::os::unix::fs::MetadataExt; 89 | use std::sync::Mutex; 90 | 91 | /// Filter out unique inodes 92 | #[derive(Debug)] 93 | pub enum Filter { 94 | Disabled, 95 | Enabled(InodeSet), 96 | } 97 | 98 | #[derive(Debug, Default)] 99 | pub struct InodeSet(Mutex>); 100 | 101 | impl Filter { 102 | pub fn new(disable_hard_links_filter: bool) -> Self { 103 | if disable_hard_links_filter { 104 | Self::Disabled 105 | } else { 106 | Self::Enabled(Default::default()) 107 | } 108 | } 109 | 110 | pub fn is_unique(&self, meta: &Metadata) -> bool { 111 | match self { 112 | Self::Disabled => true, 113 | Self::Enabled(set) => set.is_unique(meta), 114 | } 115 | } 116 | } 117 | 118 | impl InodeSet { 119 | fn is_unique(&self, meta: &Metadata) -> bool { 120 | self.0.lock().unwrap().insert(meta.ino()) 121 | } 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /tests/particular_cases.rs: -------------------------------------------------------------------------------- 1 | mod common; 2 | 3 | use common::{find_dupes, random_collection, AnyResult, TestDir, MAX_LEN}; 4 | 5 | /// Test to be sure the sorting by hash only groups together files 6 | /// with the same contents. 7 | /// Takes some time to run. 8 | /// 9 | /// cargo test --package yadf --test common -- sanity_check --exact --nocapture -Z unstable-options --include-ignored 10 | #[test] 11 | #[ignore] 12 | fn sanity_check() { 13 | let home = dirs::home_dir().unwrap(); 14 | let counter = find_dupes(&home); 15 | for bucket in counter.duplicates().iter() { 16 | let (first, bucket) = bucket.split_first().unwrap(); 17 | let reference = std::fs::read(first).unwrap(); 18 | for file in bucket { 19 | let contents = std::fs::read(file).unwrap(); 20 | assert_eq!(reference, contents, "comparing {first:?} and {file:?}"); 21 | } 22 | } 23 | } 24 | 25 | #[test] 26 | // #[ignore] 27 | fn identical_small_files() -> AnyResult { 28 | let root = TestDir::new(test_dir!())?; 29 | println!("{:?}", root.as_ref()); 30 | root.write_file("file1", b"aaa")?; 31 | root.write_file("file2", b"aaa")?; 32 | let counter = find_dupes(&root); 33 | assert_eq!(counter.duplicates().iter().count(), 1); 34 | assert_eq!(counter.as_inner().len(), 1); 35 | Ok(()) 36 | } 37 | 38 | #[test] 39 | // #[ignore] 40 | fn identical_larger_files() -> AnyResult { 41 | let root = TestDir::new(test_dir!())?; 42 | let buffer: Vec<_> = random_collection(MAX_LEN * 3); 43 | root.write_file("file1", &buffer)?; 44 | root.write_file("file2", &buffer)?; 45 | let counter = find_dupes(&root); 46 | assert_eq!(counter.duplicates().iter().count(), 1); 47 | assert_eq!(counter.as_inner().len(), 1); 48 | Ok(()) 49 | } 50 | 51 | #[test] 52 | // #[ignore] 53 | fn files_differing_by_size() -> AnyResult { 54 | let root = TestDir::new(test_dir!())?; 55 | root.write_file("file1", b"aaaa")?; 56 | root.write_file("file2", b"aaa")?; 57 | let counter = find_dupes(&root); 58 | assert_eq!(counter.duplicates().iter().count(), 0); 59 | assert_eq!(counter.as_inner().len(), 2); 60 | Ok(()) 61 | } 62 | 63 | #[test] 64 | // #[ignore] 65 | fn files_differing_by_prefix() -> AnyResult { 66 | let root = TestDir::new(test_dir!())?; 67 | root.write_file("file1", b"aaa")?; 68 | root.write_file("file2", b"bbb")?; 69 | let counter = find_dupes(&root); 70 | assert_eq!(counter.duplicates().iter().count(), 0); 71 | assert_eq!(counter.as_inner().len(), 2); 72 | Ok(()) 73 | } 74 | 75 | #[test] 76 | // #[ignore] 77 | fn files_differing_by_suffix() -> AnyResult { 78 | let root = TestDir::new(test_dir!())?; 79 | let mut buffer1 = Vec::with_capacity(MAX_LEN * 3 + 4); 80 | buffer1.extend_from_slice(&random_collection::<_, Vec<_>>(MAX_LEN * 3)); 81 | let mut buffer2 = buffer1.clone(); 82 | buffer1.extend_from_slice(b"suf1"); 83 | buffer2.extend_from_slice(b"suf2"); 84 | root.write_file("file1", &buffer1)?; 85 | root.write_file("file2", &buffer2)?; 86 | let counter = find_dupes(&root); 87 | assert_eq!(counter.duplicates().iter().count(), 0); 88 | assert_eq!(counter.as_inner().len(), 2); 89 | Ok(()) 90 | } 91 | 92 | #[test] 93 | // #[ignore] 94 | fn files_differing_by_middle() -> AnyResult { 95 | let root = TestDir::new(test_dir!())?; 96 | let mut buffer1 = Vec::with_capacity(MAX_LEN * 2 + 4); 97 | buffer1.extend_from_slice(&random_collection::<_, Vec<_>>(MAX_LEN)); 98 | let mut buffer2 = buffer1.clone(); 99 | buffer1.extend_from_slice(b"mid1"); 100 | buffer2.extend_from_slice(b"mid2"); 101 | let suffix = random_collection::<_, Vec<_>>(MAX_LEN); 102 | buffer1.extend_from_slice(&suffix); 103 | buffer2.extend_from_slice(&suffix); 104 | root.write_file("file1", &buffer1)?; 105 | root.write_file("file2", &buffer2)?; 106 | let counter = find_dupes(&root); 107 | assert_eq!(counter.duplicates().iter().count(), 0); 108 | assert_eq!(counter.as_inner().len(), 2); 109 | Ok(()) 110 | } 111 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | on: 3 | push: 4 | tags: 5 | - "v*" 6 | 7 | jobs: 8 | # Publish yadf to crates.io 9 | publish: 10 | name: Publish crates.io package 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | - uses: actions-rs/toolchain@v1 15 | with: 16 | toolchain: stable 17 | profile: minimal 18 | override: true 19 | - name: Publish 20 | run: cargo publish --token ${{ secrets.CRATES_IO_TOKEN }} 21 | 22 | # Build sources for every OS 23 | github_build: 24 | name: Build release binaries 25 | strategy: 26 | fail-fast: false 27 | matrix: 28 | target: 29 | - x86_64-unknown-linux-gnu 30 | - x86_64-unknown-linux-musl 31 | # - x86_64-apple-darwin 32 | - x86_64-pc-windows-msvc 33 | include: 34 | - target: x86_64-unknown-linux-gnu 35 | os: ubuntu-latest 36 | name: yadf-x86_64-unknown-linux-gnu.tar.gz 37 | - target: x86_64-unknown-linux-musl 38 | os: ubuntu-latest 39 | name: yadf-x86_64-unknown-linux-musl.tar.gz 40 | # - target: x86_64-apple-darwin 41 | # os: macOS-latest 42 | # name: yadf-x86_64-apple-darwin.tar.gz 43 | - target: x86_64-pc-windows-msvc 44 | os: windows-latest 45 | name: yadf-x86_64-pc-windows-msvc.zip 46 | runs-on: ${{ matrix.os }} 47 | steps: 48 | - uses: actions/checkout@v4 49 | - uses: actions-rs/toolchain@v1 50 | with: 51 | toolchain: stable 52 | override: true 53 | profile: minimal 54 | target: ${{ matrix.target }} 55 | 56 | - name: Setup musl tools 57 | if: matrix.target == 'x86_64-unknown-linux-musl' 58 | run: sudo apt install -y musl-tools 59 | 60 | - name: Build 61 | if: matrix.target != 'x86_64-unknown-linux-musl' 62 | run: cargo build --release --target ${{ matrix.target }} 63 | 64 | - name: Build (musl) 65 | if: matrix.target == 'x86_64-unknown-linux-musl' 66 | run: cargo build --release --target ${{ matrix.target }} 67 | 68 | - name: Prepare artifacts [Windows] 69 | if: matrix.os == 'windows-latest' 70 | run: | 71 | cd target/${{ matrix.target }}/release 72 | 7z a ../../../${{ matrix.name }} yadf.exe 73 | cd - 74 | 75 | - name: Prepare artifacts [-nix] 76 | if: matrix.os != 'windows-latest' 77 | run: | 78 | cd target/${{ matrix.target }}/release 79 | tar czvf ../../../${{ matrix.name }} yadf 80 | cd - 81 | 82 | - uses: actions/upload-artifact@v2 83 | with: 84 | name: ${{ matrix.name }} 85 | path: ${{ matrix.name }} 86 | 87 | # Create GitHub release with Rust build targets and release notes 88 | github_release: 89 | name: GitHub Release 90 | needs: github_build 91 | runs-on: ubuntu-latest 92 | steps: 93 | - uses: actions/checkout@v4 94 | with: 95 | fetch-depth: 0 96 | 97 | - uses: actions/setup-go@v2 98 | with: 99 | go-version: "^1.13.1" 100 | 101 | - uses: actions/download-artifact@v2 102 | 103 | - name: Build release notes 104 | run: | 105 | wget https://github.com/git-chglog/git-chglog/releases/download/0.9.1/git-chglog_linux_amd64 -O git-chglog 106 | chmod 744 git-chglog 107 | ./git-chglog -c .github/chglog/release.yml $(git describe --tags) > RELEASE.md 108 | 109 | - name: Compute checksums 110 | run: | 111 | echo >> RELEASE.md 112 | echo "### Checksums" >> RELEASE.md 113 | echo >> RELEASE.md 114 | echo "|File|sha256|" >> RELEASE.md 115 | echo "|---|---|" >> RELEASE.md 116 | for file in yadf-*/yadf-*; do 117 | sha="$(openssl dgst -sha256 -r "$file" | awk '{print $1}')" 118 | file="$(basename "$file")" 119 | echo "|${file}|\`${sha}\`|" >> RELEASE.md 120 | done 121 | 122 | - name: Publish 123 | uses: softprops/action-gh-release@v1 124 | with: 125 | files: yadf-*/yadf-* 126 | body_path: RELEASE.md 127 | prerelease: ${{ endsWith(github.ref, 'pre') }} 128 | env: 129 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 130 | -------------------------------------------------------------------------------- /src/fs.rs: -------------------------------------------------------------------------------- 1 | //! Inner parts of `yadf`. Initial file collection and checksumming. 2 | 3 | pub mod filter; 4 | mod hash; 5 | 6 | use crate::ext::{IteratorExt, WalkBuilderAddPaths, WalkParallelForEach}; 7 | use crate::TreeBag; 8 | use rayon::iter::{IntoParallelIterator, ParallelIterator}; 9 | use std::path::{Path, PathBuf}; 10 | 11 | const CHANNEL_SIZE: usize = 8 * 1024; 12 | const BLOCK_SIZE: usize = 4 * 1024; 13 | 14 | /// Foundation of the API. 15 | /// This will attemps a naive scan of every file, 16 | /// within the given size constraints, at the given path. 17 | pub fn find_dupes_partial( 18 | directories: &[P], 19 | max_depth: Option, 20 | filter: filter::FileFilter, 21 | ) -> TreeBag 22 | where 23 | H: crate::hasher::Hasher, 24 | P: AsRef, 25 | { 26 | let mut paths = directories 27 | .iter() 28 | .unique_by(|path| dunce::canonicalize(path).ok()); 29 | let first = paths.next().expect("there should be at least one path"); 30 | let walker = ignore::WalkBuilder::new(first) 31 | .add_paths(paths) 32 | .standard_filters(false) 33 | .max_depth(max_depth) 34 | .threads(num_cpus::get()) 35 | .build_parallel(); 36 | let (sender, receiver) = crossbeam_channel::bounded(CHANNEL_SIZE); 37 | rayon::join( 38 | move || receiver.into_iter().collect(), 39 | move || { 40 | walker.for_each(|entry| { 41 | if let Err(error) = entry { 42 | log::error!("{}", error); 43 | return ignore::WalkState::Continue; 44 | } 45 | if let Some(key_value) = hash_entry::(&filter, entry.unwrap()) { 46 | if let Err(error) = sender.send(key_value) { 47 | log::error!("{}, couldn't send value across channel", error); 48 | } 49 | } 50 | ignore::WalkState::Continue 51 | }) 52 | }, 53 | ) 54 | .0 55 | } 56 | 57 | fn hash_entry(filter: &filter::FileFilter, entry: ignore::DirEntry) -> Option<(H::Hash, PathBuf)> 58 | where 59 | H: crate::hasher::Hasher, 60 | { 61 | let path = entry.path(); 62 | let meta = entry 63 | .metadata() 64 | .map_err(|error| log::error!("{}, couldn't get metadata for {:?}", error, path)) 65 | .ok()?; 66 | if !filter.is_match(path, meta) { 67 | return None; 68 | } 69 | let hash = hash::partial::(path) 70 | .map_err(|error| log::error!("{}, couldn't hash {:?}", error, path)) 71 | .ok()?; 72 | Some((hash, entry.into_path())) 73 | } 74 | 75 | pub fn dedupe(tree: TreeBag) -> crate::FileCounter 76 | where 77 | H: crate::hasher::Hasher, 78 | { 79 | let (sender, receiver) = crossbeam_channel::bounded(CHANNEL_SIZE); 80 | rayon::join( 81 | move || receiver.into_iter().collect(), 82 | move || { 83 | tree.into_inner() 84 | .into_par_iter() 85 | .for_each_with(sender, process_bucket::) 86 | }, 87 | ) 88 | .0 89 | } 90 | 91 | fn process_bucket( 92 | sender: &mut crossbeam_channel::Sender<(H::Hash, crate::Path)>, 93 | (old_hash, bucket): (H::Hash, Vec), 94 | ) where 95 | H: crate::hasher::Hasher, 96 | { 97 | if bucket.len() == 1 { 98 | let file = bucket.into_iter().next().unwrap(); 99 | if let Err(error) = sender.send((old_hash, file.into())) { 100 | log::error!("{}, couldn't send value across channel", error); 101 | } 102 | } else { 103 | bucket 104 | .into_par_iter() 105 | .for_each_with(sender.clone(), |sender, file| { 106 | let hash = rehash_file::(&file).unwrap_or(old_hash); 107 | if let Err(error) = sender.send((hash, file.into())) { 108 | log::error!("{}, couldn't send value across channel", error); 109 | } 110 | }); 111 | } 112 | } 113 | 114 | fn rehash_file(file: &Path) -> Result 115 | where 116 | H: crate::hasher::Hasher, 117 | { 118 | if file.metadata().map(|f| f.len()).unwrap_or(0) < BLOCK_SIZE as _ { 119 | return Err(()); 120 | } 121 | match hash::full::(file) { 122 | Ok(hash) => Ok(hash), 123 | Err(error) => { 124 | log::error!("{}, couldn't hash {:?}, reusing partial hash", error, file); 125 | Err(()) 126 | } 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /src/args.rs: -------------------------------------------------------------------------------- 1 | use super::{Args, ReplicationFactor}; 2 | use clap::{CommandFactory, FromArgMatches}; 3 | use std::env; 4 | use std::fmt; 5 | use std::io::BufRead; 6 | use std::path::PathBuf; 7 | 8 | impl Args { 9 | pub fn max(&self) -> Option { 10 | self.max 11 | .as_ref() 12 | .map(|m| m.0.get_adjusted_unit(byte_unit::Unit::B)) 13 | .map(|u| u.get_value() as _) 14 | } 15 | 16 | pub fn min(&self) -> Option { 17 | self.min 18 | .as_ref() 19 | .map(|m| m.0.get_adjusted_unit(byte_unit::Unit::B)) 20 | .map(|u| u.get_value() as _) 21 | .or(if self.no_empty { Some(1) } else { None }) 22 | } 23 | 24 | pub fn init_from_env() -> Self { 25 | let long_version = env!("YADF_BUILD_VERSION").replace('|', "\n"); 26 | let short_version = long_version.lines().next().unwrap().to_string(); 27 | let app = Self::command() 28 | .version(short_version) 29 | .long_version(long_version) 30 | .after_help("For sizes, K/M/G/T[B|iB] suffixes can be used (case-insensitive)."); 31 | let mut args = Self::from_arg_matches(&app.get_matches()).unwrap(); 32 | init_logger(&args.verbosity); 33 | args.build_paths(); 34 | args 35 | } 36 | 37 | fn build_paths(&mut self) { 38 | if self.paths.is_empty() { 39 | self.paths = default_paths() 40 | } 41 | } 42 | } 43 | 44 | fn init_logger(verbosity: &clap_verbosity_flag::Verbosity) { 45 | env_logger::Builder::new() 46 | .filter_level( 47 | verbosity 48 | .log_level() 49 | .unwrap_or(log::Level::Error) 50 | .to_level_filter(), 51 | ) 52 | .init(); 53 | } 54 | 55 | fn default_paths() -> Vec { 56 | let stdin = std::io::stdin(); 57 | let mut paths = if std::io::IsTerminal::is_terminal(&stdin) { 58 | Vec::new() 59 | } else { 60 | stdin 61 | .lock() 62 | .lines() 63 | .map_while(Result::ok) 64 | .map(Into::into) 65 | .collect() 66 | }; 67 | if paths.is_empty() { 68 | paths.push(env::current_dir().expect("couldn't get current working directory")); 69 | } 70 | paths 71 | } 72 | 73 | impl Default for ReplicationFactor { 74 | fn default() -> Self { 75 | ReplicationFactor::Over(1) 76 | } 77 | } 78 | 79 | impl std::str::FromStr for ReplicationFactor { 80 | type Err = String; 81 | 82 | fn from_str(value: &str) -> Result { 83 | use ReplicationFactor::*; 84 | const SEPS: &[char] = &[':', '=']; 85 | let mut arg = value.split(SEPS); 86 | 87 | let rf = match ( 88 | arg.next().map(str::to_lowercase).as_deref(), 89 | arg.next().and_then(|v| v.parse().ok()), 90 | ) { 91 | (Some("under"), Some(factor)) => Under(factor), 92 | (Some("equal"), Some(factor)) => Equal(factor), 93 | (Some("over"), Some(factor)) => Over(factor), 94 | _ => { 95 | return Err(format!( 96 | "replication factor must be of the form \ 97 | `over:1` or `under:5` or `equal:2`, \ 98 | got {value:?}" 99 | )) 100 | } 101 | }; 102 | Ok(rf) 103 | } 104 | } 105 | 106 | impl fmt::Display for ReplicationFactor { 107 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 108 | fmt::Debug::fmt(self, f) 109 | } 110 | } 111 | 112 | impl From for yadf::Factor { 113 | fn from(f: ReplicationFactor) -> Self { 114 | match f { 115 | ReplicationFactor::Under(n) => yadf::Factor::Under(n), 116 | ReplicationFactor::Equal(n) => yadf::Factor::Equal(n), 117 | ReplicationFactor::Over(n) => yadf::Factor::Over(n), 118 | } 119 | } 120 | } 121 | 122 | #[cfg(test)] 123 | mod tests { 124 | use super::*; 125 | 126 | #[test] 127 | fn replication_factor_parsing() { 128 | let cases = [ 129 | ("under=6", ReplicationFactor::Under(6)), 130 | ("over:7", ReplicationFactor::Over(7)), 131 | ("over:1", ReplicationFactor::Over(1)), 132 | ("equal=3", ReplicationFactor::Equal(3)), 133 | ]; 134 | 135 | for (value, expected) in cases.iter() { 136 | let rf: ReplicationFactor = value.parse().unwrap(); 137 | assert_eq!(&rf, expected); 138 | } 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! This is a binary crate. You _can_ use it as a library, but I wouldn't recommend it. 2 | //! If you do, remember to disable the default features which are used to build 3 | //! the binary. 4 | //! 5 | //! ```toml 6 | //! [dependencies] 7 | //! yadf = { version = "0.15.0", default-features = false } 8 | //! ``` 9 | //! 10 | //! A collection of functions and structs to find duplicate files. 11 | //! 12 | //! # Example : 13 | //! 14 | //! Find and display all the duplicate files at the given paths : 15 | //! 16 | //! ```no_run 17 | //! # fn foo(paths: &[std::path::PathBuf]) { 18 | //! let counter = yadf::Yadf::builder() 19 | //! .paths(paths) 20 | //! .build() 21 | //! .scan::(); 22 | //! println!("{}", counter.duplicates().display::()); 23 | //! # } 24 | //! ``` 25 | #![deny(unsafe_code)] 26 | #![warn(rust_2018_idioms)] 27 | 28 | mod bag; 29 | mod ext; 30 | mod fs; 31 | mod hasher; 32 | mod path; 33 | 34 | pub use bag::{Factor, Fdupes, Machine, TreeBag}; 35 | pub use globset; 36 | pub use hasher::Hasher; 37 | pub use path::Path; 38 | pub use regex; 39 | use std::rc::Rc; 40 | 41 | pub type FileCounter = TreeBag; 42 | pub type FileReplicates<'a, H> = bag::Replicates<'a, H, Path>; 43 | 44 | /// Search configuration. 45 | /// 46 | /// # Example 47 | /// 48 | /// ```no_run 49 | /// # fn foo(paths: &[std::path::PathBuf]) { 50 | /// let counter = yadf::Yadf::builder() 51 | /// .paths(paths) // required 52 | /// .minimum_file_size(64) // optional 53 | /// .maximum_file_size(1024 * 8) // optional 54 | /// .regex(None) // optional 55 | /// .glob(None) // optional 56 | /// .build() 57 | /// .scan::(); 58 | /// # } 59 | /// ``` 60 | /// 61 | /// see the docs for the [`YadfBuilder`](YadfBuilder) 62 | #[derive(Debug, typed_builder::TypedBuilder)] 63 | #[builder(doc)] 64 | pub struct Yadf> { 65 | #[builder(setter(into, doc = "Paths that will be checked for duplicate files"))] 66 | paths: Rc<[P]>, 67 | #[builder(default, setter(into, doc = "Minimum file size"))] 68 | minimum_file_size: Option, 69 | #[builder(default, setter(into, doc = "Maximum file size"))] 70 | maximum_file_size: Option, 71 | #[builder(default, setter(into, doc = "Maximum recursion depth"))] 72 | max_depth: Option, 73 | #[builder(default, setter(into, doc = "File name must match this regex"))] 74 | regex: Option, 75 | #[builder(default, setter(into, doc = "File name must match this glob"))] 76 | glob: Option, 77 | #[cfg(unix)] 78 | #[builder(default, setter(doc = "Treat hard links as duplicates"))] 79 | hard_links: bool, 80 | } 81 | 82 | impl

Yadf

83 | where 84 | P: AsRef, 85 | { 86 | /// This will attemps a complete scan according to its configuration. 87 | pub fn scan(self) -> FileCounter 88 | where 89 | H: hasher::Hasher, 90 | H::Hash: std::fmt::Debug, 91 | { 92 | #[cfg(unix)] 93 | let file_filter = fs::filter::FileFilter::new( 94 | self.minimum_file_size, 95 | self.maximum_file_size, 96 | self.regex, 97 | self.glob.map(|g| g.compile_matcher()), 98 | self.hard_links, 99 | ); 100 | #[cfg(not(unix))] 101 | let file_filter = fs::filter::FileFilter::new( 102 | self.minimum_file_size, 103 | self.maximum_file_size, 104 | self.regex, 105 | self.glob.map(|g| g.compile_matcher()), 106 | ); 107 | let bag = fs::find_dupes_partial::(&self.paths, self.max_depth, file_filter); 108 | if log::log_enabled!(log::Level::Info) { 109 | log::info!( 110 | "scanned {} files", 111 | bag.as_inner().values().map(Vec::len).sum::() 112 | ); 113 | log::info!( 114 | "found {} possible duplicates after initial scan", 115 | bag.duplicates().iter().map(Vec::len).sum::() 116 | ); 117 | log::trace!("{:?}", bag); 118 | } 119 | let bag = fs::dedupe::(bag); 120 | if log::log_enabled!(log::Level::Info) { 121 | log::info!( 122 | "found {} duplicates in {} groups after checksumming", 123 | bag.duplicates().iter().map(Vec::len).sum::(), 124 | bag.duplicates().iter().count(), 125 | ); 126 | log::trace!("{:?}", bag); 127 | } 128 | bag 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /scripts/yadf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import functools 5 | import hashlib 6 | import locale 7 | import math 8 | import multiprocessing 9 | import os 10 | import re 11 | import sys 12 | from collections import defaultdict 13 | from json import dump as jsondump 14 | 15 | 16 | locale.setlocale(locale.LC_ALL, "") 17 | 18 | 19 | def main(args): 20 | full_counter = find_dupes( 21 | args.directories, HASHERS[args.algorithm], args.min, args.max 22 | ) 23 | partitioned = partition(full_counter, lambda b: len(b) > 1) 24 | duplicates, uniques = partitioned[True], partitioned[False] 25 | DISPLAY[args.format](duplicates) 26 | if args.report: 27 | duplicates_files = sum(map(len, duplicates)) 28 | files_scanned = len(uniques) + duplicates_files 29 | print(f"{files_scanned:n} scanned files", file=sys.stderr) 30 | print(f"{len(uniques):n} unique files", file=sys.stderr) 31 | print( 32 | f"{len(duplicates):n} groups of duplicate files ({duplicates_files:n} files)", 33 | file=sys.stderr, 34 | ) 35 | 36 | 37 | def find_dupes(directories, algorithm, min=0, max=math.inf): 38 | def build_bag(key_value_iterable): 39 | bag = defaultdict(list) 40 | for key, value in key_value_iterable: 41 | bag[key].append(value) 42 | return bag 43 | 44 | walker = ( 45 | file 46 | for file in ( 47 | os.path.join(path, file) 48 | for directory in set(directories) 49 | for (path, _, files) in os.walk(directory) 50 | for file in files 51 | ) 52 | if min <= os.stat(file).st_size <= max 53 | ) 54 | 55 | hasher = functools.partial(hash_file, algorithm=algorithm) 56 | with multiprocessing.Pool() as pool: 57 | tuples = pool.imap_unordered(hasher, walker, chunksize=32) 58 | return build_bag(tuples).values() 59 | 60 | 61 | def hash_file(path, algorithm): 62 | hasher = algorithm() 63 | with open(path, "rb") as fd: 64 | while True: 65 | buf = fd.read(4096) 66 | if len(buf) == 0: 67 | break 68 | hasher.update(buf) 69 | return hasher.digest(), path 70 | 71 | 72 | def fdupes(duplicates): 73 | last = len(duplicates) - 1 74 | for (i, bucket) in enumerate(duplicates): 75 | print(*bucket, sep="\n") 76 | if i != last: 77 | print() 78 | 79 | 80 | def json(duplicates): 81 | jsondump(duplicates, fp=sys.stdout) 82 | 83 | 84 | def ldjson(duplicates): 85 | for bucket in duplicates: 86 | jsondump(bucket, fp=sys.stdout) 87 | 88 | 89 | DISPLAY = { 90 | fdupes.__name__: fdupes, 91 | json.__name__: json, 92 | ldjson.__name__: ldjson, 93 | } 94 | 95 | HASHERS = { 96 | hashlib.blake2b.__name__: hashlib.blake2b, 97 | hashlib.sha384.__name__: hashlib.sha384, 98 | hashlib.md5.__name__: hashlib.md5, 99 | } 100 | 101 | 102 | def partition(iterable, predicate): 103 | results = defaultdict(list) 104 | for item in iterable: 105 | results[predicate(item)].append(item) 106 | return results 107 | 108 | 109 | def parse_args(argv): 110 | units = {"B": 1, "KB": 2 ** 10, "MB": 2 ** 20, "GB": 2 ** 30, "TB": 2 ** 40} 111 | 112 | def byte_size(size): 113 | size = size.upper() 114 | if " " not in size: 115 | size = re.sub(r"([KMGT]?B?)", r" \1", size) 116 | size = size.split() 117 | if len(size) < 2: 118 | size.append("B") 119 | elif len(size[1]) < 2: 120 | size[1] += "B" 121 | number, unit = [string.strip() for string in size] 122 | return int(float(number) * units[unit]) 123 | 124 | parser = argparse.ArgumentParser() 125 | parser.add_argument( 126 | "directories", 127 | help="directories to search", 128 | default=[os.getcwd()], 129 | nargs="*", 130 | ) 131 | parser.add_argument( 132 | "-r", 133 | "--report", 134 | action="store_true", 135 | help="print human readable report to stderr", 136 | ) 137 | parser.add_argument( 138 | "-f", 139 | "--format", 140 | choices=DISPLAY.keys(), 141 | default=next(iter(DISPLAY)), 142 | help="output format", 143 | ) 144 | parser.add_argument( 145 | "-a", 146 | "--algorithm", 147 | choices=HASHERS.keys(), 148 | default=next(iter(HASHERS)), 149 | help="hashing algorithm", 150 | ) 151 | parser.add_argument("--min", type=byte_size, default=0) 152 | parser.add_argument("--max", type=byte_size, default=math.inf) 153 | return parser.parse_args(argv) 154 | 155 | 156 | if __name__ == "__main__": 157 | try: 158 | main(parse_args(sys.argv[1:])) 159 | except KeyboardInterrupt: 160 | print() 161 | -------------------------------------------------------------------------------- /src/bag.rs: -------------------------------------------------------------------------------- 1 | mod display; 2 | mod replicates; 3 | mod serialize; 4 | 5 | use std::borrow::Borrow; 6 | use std::collections::btree_map::Entry; 7 | use std::collections::BTreeMap; 8 | use std::ops::Index; 9 | 10 | /// Ordered counter structure. 11 | /// 12 | /// # Example : 13 | /// 14 | /// ``` 15 | /// use yadf::TreeBag; 16 | /// 17 | /// let bag: TreeBag = vec![ 18 | /// (3, "hello world"), 19 | /// (3, "foobar"), 20 | /// (7, "fizz"), 21 | /// (7, "buzz"), 22 | /// (6, "rust"), 23 | /// ].into_iter().collect(); 24 | /// 25 | /// assert_eq!(bag[&3], ["hello world", "foobar"]); 26 | /// assert_eq!(bag[&7], ["fizz", "buzz"]); 27 | /// assert_eq!(bag[&6], ["rust"]); 28 | /// ``` 29 | #[derive(Debug)] 30 | pub struct TreeBag(BTreeMap>); 31 | 32 | #[derive(Debug, Clone)] 33 | pub enum Factor { 34 | Under(usize), 35 | Equal(usize), 36 | Over(usize), 37 | } 38 | 39 | /// A view which only provides access to n replicated entries. 40 | #[derive(Debug)] 41 | pub struct Replicates<'a, K, V> { 42 | tree: &'a TreeBag, 43 | factor: Factor, 44 | } 45 | 46 | /// Display marker. 47 | #[derive(Debug)] 48 | pub struct Fdupes; 49 | /// Display marker. 50 | #[derive(Debug)] 51 | pub struct Machine; 52 | 53 | #[derive(Debug)] 54 | pub struct Display<'a, K, V, U> { 55 | format_marker: std::marker::PhantomData<&'a U>, 56 | tree: &'a Replicates<'a, K, V>, 57 | } 58 | 59 | impl From>> for TreeBag { 60 | /// Build a [`TreeBag`](TreeBag) from a [`BTreeMap`](BTreeMap). 61 | fn from(btree: BTreeMap>) -> Self { 62 | Self(btree) 63 | } 64 | } 65 | 66 | impl TreeBag { 67 | /// Provides a view only on the buckets containing more than one element. 68 | pub const fn duplicates(&self) -> Replicates<'_, K, V> { 69 | Replicates { 70 | tree: self, 71 | factor: Factor::Over(1), 72 | } 73 | } 74 | 75 | /// Provides a view only on the buckets as constrained by the replication [`Factor`](Factor). 76 | pub const fn replicates(&self, factor: Factor) -> Replicates<'_, K, V> { 77 | Replicates { tree: self, factor } 78 | } 79 | 80 | /// Borrows the backing [`BTreeMap`](BTreeMap) of the bag. 81 | pub const fn as_inner(&self) -> &BTreeMap> { 82 | &self.0 83 | } 84 | 85 | /// Mutably borrows the backing [`BTreeMap`](BTreeMap) of the bag. 86 | pub fn as_inner_mut(&mut self) -> &mut BTreeMap> { 87 | &mut self.0 88 | } 89 | 90 | /// Consumes the wrapper [`TreeBag`](TreeBag) and returns the inner [`BTreeMap`](BTreeMap). 91 | pub fn into_inner(self) -> BTreeMap> { 92 | self.0 93 | } 94 | 95 | /// Returns the number of buckets in the bag. 96 | pub fn len(&self) -> usize { 97 | self.0.len() 98 | } 99 | 100 | /// Returns `true` if the bag contains no elements. 101 | pub fn is_empty(&self) -> bool { 102 | self.0.is_empty() 103 | } 104 | 105 | /// Returns a reference to the bucket corresponding to the key. 106 | pub fn get(&self, key: &Q) -> Option<&Vec> 107 | where 108 | K: Borrow + Ord, 109 | Q: ?Sized + Ord, 110 | { 111 | self.0.get(key) 112 | } 113 | 114 | /// Returns a mutable reference to the bucket corresponding to the key. 115 | pub fn get_mut(&mut self, key: &Q) -> Option<&mut Vec> 116 | where 117 | K: Borrow + Ord, 118 | Q: ?Sized + Ord, 119 | { 120 | self.0.get_mut(key) 121 | } 122 | 123 | /// Gets the given key’s corresponding entry in the bag for in-place manipulation. 124 | pub fn entry(&mut self, key: K) -> Entry<'_, K, Vec> 125 | where 126 | K: Ord, 127 | { 128 | self.0.entry(key) 129 | } 130 | } 131 | 132 | impl std::iter::FromIterator<(K, V)> for TreeBag { 133 | fn from_iter(key_value_iter: I) -> Self 134 | where 135 | I: IntoIterator, 136 | { 137 | let mut bag = TreeBag::default(); 138 | bag.extend(key_value_iter); 139 | bag 140 | } 141 | } 142 | 143 | impl Extend<(K, V)> for TreeBag { 144 | fn extend(&mut self, key_value_iter: I) 145 | where 146 | I: IntoIterator, 147 | { 148 | for (key, value) in key_value_iter { 149 | self.entry(key).or_default().push(value); 150 | } 151 | } 152 | } 153 | 154 | impl Default for TreeBag { 155 | fn default() -> Self { 156 | Self(Default::default()) 157 | } 158 | } 159 | 160 | impl Index<&Q> for TreeBag 161 | where 162 | K: Borrow + Ord, 163 | Q: Ord, 164 | { 165 | type Output = Vec; 166 | 167 | /// Returns a reference to the value corresponding to the supplied key. 168 | /// 169 | /// # Panics 170 | /// 171 | /// Panics if the key is not present in the [`TreeBag`](TreeBag). 172 | fn index(&self, key: &Q) -> &Self::Output { 173 | self.get(key).expect("no entry found for key") 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /tests/integration.rs: -------------------------------------------------------------------------------- 1 | mod common; 2 | 3 | use common::{random_collection, AnyResult, TestDir, MAX_LEN}; 4 | use predicates::{boolean::PredicateBooleanExt, str as predstr}; 5 | 6 | #[test] 7 | fn function_name() { 8 | let fname = scope_name_iter!().collect::>().join("::"); 9 | assert_eq!(fname, "integration::function_name"); 10 | } 11 | 12 | #[test] 13 | fn dir_macro() { 14 | let path = test_dir!(); 15 | #[cfg(windows)] 16 | assert_eq!(path.to_str(), Some("target\\tests\\integration\\dir_macro")); 17 | #[cfg(not(windows))] 18 | assert_eq!(path.to_str(), Some("target/tests/integration/dir_macro")); 19 | } 20 | 21 | #[test] 22 | fn trace_output() -> AnyResult { 23 | let root = TestDir::new(test_dir!())?; 24 | println!("{:?}", root.as_ref()); 25 | let bytes: Vec<_> = random_collection(MAX_LEN); 26 | let file1 = root.write_file("file1", &bytes)?; 27 | let file2 = root.write_file("file2", &bytes)?; 28 | root.write_file("file3", &bytes[..4096])?; 29 | root.write_file("file4", &bytes[..2048])?; 30 | let _expected = serde_json::to_string(&[[file1.to_string_lossy(), file2.to_string_lossy()]]) 31 | .unwrap() 32 | + "\n"; 33 | assert_cmd::Command::cargo_bin(assert_cmd::crate_name!())? 34 | .arg("-vvvv") // test stderr contains enough debug output 35 | .args(["--format", "json"]) 36 | .args(["--algorithm", "seahash"]) 37 | .arg(root.as_ref()) 38 | .assert() 39 | .success() 40 | .stderr( 41 | predstr::contains("Args {") 42 | .and(predstr::contains("Yadf {")) 43 | .and(predstr::contains("format: Json")) 44 | .and(predstr::contains("algorithm: SeaHash")) 45 | .and(predstr::contains("verbose: 4")) 46 | .and(predstr::contains( 47 | "found 2 possible duplicates after initial scan", 48 | )) 49 | .and(predstr::contains( 50 | "found 2 duplicates in 1 groups after checksumming", 51 | )) 52 | .and(predstr::contains("file1")) 53 | .and(predstr::contains("file2")) 54 | .and(predstr::contains("file3")) 55 | .and(predstr::contains("file4")), 56 | ); 57 | Ok(()) 58 | } 59 | 60 | #[test] 61 | fn regex() -> AnyResult { 62 | let root = TestDir::new(test_dir!())?; 63 | let bytes: Vec<_> = random_collection(4096); 64 | let particular_1_name = root.write_file("particular_1_name", &bytes)?; 65 | let particular_2_name = root.write_file("particular_2_name", &bytes)?; 66 | root.write_file("not_particular_2_name", &bytes)?; 67 | root.write_file("completely_different", &bytes)?; 68 | let _expected = [ 69 | particular_1_name.to_string_lossy(), 70 | particular_2_name.to_string_lossy(), 71 | ] 72 | .join("\n") 73 | + "\n"; 74 | assert_cmd::Command::cargo_bin(assert_cmd::crate_name!())? 75 | .args(["--regex", "^particular_\\d_name$"]) 76 | .arg(root.as_ref()) 77 | .assert() 78 | .success() 79 | .stderr(predstr::is_empty()); 80 | Ok(()) 81 | } 82 | 83 | #[test] 84 | fn glob_pattern() -> AnyResult { 85 | let root = TestDir::new(test_dir!())?; 86 | let bytes: Vec<_> = random_collection(4096); 87 | let particular_1_name = root.write_file("particular_1_name", &bytes)?; 88 | let particular_2_name = root.write_file("particular_2_name", &bytes)?; 89 | root.write_file("not_particular_2_name", &bytes)?; 90 | root.write_file("completely_different", &bytes)?; 91 | let _expected = [ 92 | particular_1_name.to_string_lossy(), 93 | particular_2_name.to_string_lossy(), 94 | ] 95 | .join("\n") 96 | + "\n"; 97 | assert_cmd::Command::cargo_bin(assert_cmd::crate_name!())? 98 | .args(["--pattern", "particular*name"]) 99 | .arg(root.as_ref()) 100 | .assert() 101 | .success() 102 | .stderr(predstr::is_empty()); 103 | Ok(()) 104 | } 105 | 106 | #[test] 107 | fn min_file_size() -> AnyResult { 108 | let root = TestDir::new(test_dir!())?; 109 | let bytes: Vec<_> = random_collection(4096); 110 | let particular_1_name = root.write_file("particular_1_name", &bytes)?; 111 | let particular_2_name = root.write_file("particular_2_name", &bytes)?; 112 | root.write_file("not_particular_2_name", &bytes[..2048])?; 113 | root.write_file("completely_different", &bytes[..2048])?; 114 | let _expected = [ 115 | particular_1_name.to_string_lossy(), 116 | particular_2_name.to_string_lossy(), 117 | ] 118 | .join("\n") 119 | + "\n"; 120 | assert_cmd::Command::cargo_bin(assert_cmd::crate_name!())? 121 | .args(["--min", "4K"]) 122 | .arg(root.as_ref()) 123 | .assert() 124 | .success() 125 | .stderr(predstr::is_empty()); 126 | Ok(()) 127 | } 128 | 129 | #[test] 130 | fn max_file_size() -> AnyResult { 131 | let root = TestDir::new(test_dir!())?; 132 | let bytes: Vec<_> = random_collection(4096); 133 | let particular_1_name = root.write_file("particular_1_name", &bytes[..1024])?; 134 | let particular_2_name = root.write_file("particular_2_name", &bytes[..1024])?; 135 | root.write_file("not_particular_2_name", &bytes)?; 136 | root.write_file("completely_different", &bytes)?; 137 | let _expected = [ 138 | particular_1_name.to_string_lossy(), 139 | particular_2_name.to_string_lossy(), 140 | ] 141 | .join("\n") 142 | + "\n"; 143 | assert_cmd::Command::cargo_bin(assert_cmd::crate_name!())? 144 | .args(["--max", "2K"]) 145 | .arg(root.as_ref()) 146 | .assert() 147 | .success() 148 | .stderr(predstr::is_empty()); 149 | Ok(()) 150 | } 151 | 152 | #[cfg(all(unix, not(target_os = "macos")))] 153 | #[test] 154 | fn non_utf8_paths() -> AnyResult { 155 | use std::ffi::OsString; 156 | use std::os::unix::ffi::OsStringExt; 157 | use std::path::PathBuf; 158 | let root = TestDir::new(test_dir!())?; 159 | let filename = PathBuf::from(OsString::from_vec(b"\xe7\xe7".to_vec())); 160 | root.write_file(&filename, b"")?; 161 | root.write_file("aa", b"")?; 162 | assert_cmd::Command::cargo_bin(assert_cmd::crate_name!())? 163 | .arg(root.as_ref()) 164 | .args(["-f", "json"]) 165 | .arg("-vv") 166 | .assert() 167 | .success(); 168 | Ok(()) 169 | } 170 | 171 | #[test] 172 | fn hard_links_flag() -> AnyResult { 173 | let predicate = predstr::contains("--hard-links"); 174 | #[cfg(not(unix))] 175 | let predicate = predicate.not(); 176 | assert_cmd::Command::cargo_bin(assert_cmd::crate_name!())? 177 | .arg("-h") 178 | .assert() 179 | .success() 180 | .stdout(predicate); 181 | Ok(()) 182 | } 183 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # YADF — Yet Another Dupes Finder 2 | 3 | > _It's [fast](#benchmarks) on my machine._ 4 | 5 | ___ 6 | 7 | You should probably use [`fclones`][0]. 8 | 9 | ___ 10 | 11 | ## Installation 12 | 13 | ### Prebuilt Packages 14 | 15 | Executable binaries for some platforms are available in the [releases](https://github.com/jRimbault/yadf/releases) section. 16 | 17 | ### Building from source 18 | 19 | 1. [Install Rust Toolchain](https://www.rust-lang.org/tools/install) 20 | 2. Run `cargo install --locked yadf` 21 | 22 | ## Usage 23 | 24 | `yadf` defaults: 25 | 26 | - search current working directory `$PWD` 27 | - output format is the same as the "standard" `fdupes`, newline separated groups 28 | - descends automatically into subdirectories 29 | - search includes every files (including empty files) 30 | 31 | ```bash 32 | yadf # find duplicate files in current directory 33 | yadf ~/Documents ~/Pictures # find duplicate files in two directories 34 | yadf --depth 0 file1 file2 # compare two files 35 | yadf --depth 1 # find duplicates in current directory without descending 36 | fd --type d a | yadf --depth 1 # find directories with an "a" and search them for duplicates without descending 37 | fd --type f a | yadf # find files with an "a" and check them for duplicates 38 | ``` 39 | 40 | ### Filtering 41 | 42 | ```bash 43 | yadf --min 100M # find duplicate files of at least 100 MB 44 | yadf --max 100M # find duplicate files below 100 MB 45 | yadf --pattern '*.jpg' # find duplicate jpg 46 | yadf --regex '^g' # find duplicate starting with 'g' 47 | yadf --rfactor over:10 # find files with more than 10 copies 48 | yadf --rfactor under:10 # find files with less than 10 copies 49 | yadf --rfactor equal:1 # find unique files 50 | ``` 51 | 52 | ### Formatting 53 | 54 | Look up the help for a list of output formats `yadf -h`. 55 | 56 | ```bash 57 | yadf -f json 58 | yadf -f fdupes 59 | yadf -f csv 60 | yadf -f ldjson 61 | ``` 62 | 63 |

64 | Help output. 65 | 66 | ``` 67 | Yet Another Dupes Finder 68 | 69 | Usage: yadf [OPTIONS] [PATHS]... 70 | 71 | Arguments: 72 | [PATHS]... Directories to search 73 | 74 | Options: 75 | -f, --format Output format [default: fdupes] [possible values: csv, fdupes, json, json-pretty, ld-json, machine] 76 | -a, --algorithm Hashing algorithm [default: ahash] [possible values: ahash, highway, metrohash, seahash, xxhash] 77 | -n, --no-empty Excludes empty files 78 | --min Minimum file size 79 | --max Maximum file size 80 | -d, --depth Maximum recursion depth 81 | -H, --hard-links Treat hard links to same file as duplicates 82 | -R, --regex Check files with a name matching a Perl-style regex, see: https://docs.rs/regex/1.4.2/regex/index.html#syntax 83 | -p, --pattern Check files with a name matching a glob pattern, see: https://docs.rs/globset/0.4.6/globset/index.html#syntax 84 | -v, --verbose... Increase logging verbosity 85 | -q, --quiet... Decrease logging verbosity 86 | --rfactor Replication factor [under|equal|over]:n 87 | -o, --output Optional output file 88 | -h, --help Print help (see more with '--help') 89 | -V, --version Print version 90 | 91 | For sizes, K/M/G/T[B|iB] suffixes can be used (case-insensitive). 92 | ``` 93 | 94 |
95 | 96 | ## Notes on the algorithm 97 | 98 | Most¹ dupe finders follow a 3 steps algorithm: 99 | 100 | 1. group files by their size 101 | 2. group files by their first few bytes 102 | 3. group files by their entire content 103 | 104 | `yadf` skips the first step, and only does the steps 2 and 3, preferring hashing rather than byte comparison. In my [tests][3-steps] having the first step on a SSD actually slowed down the program. 105 | `yadf` makes heavy use of the standard library [`BTreeMap`][btreemap], it uses a cache aware implementation avoiding too many cache misses. `yadf` uses the parallel walker provided by `ignore` (disabling its _ignore_ features) and `rayon`'s parallel iterators to do each of these 2 steps in parallel. 106 | 107 | ¹: some need a different algorithm to support different features or different performance trade-offs 108 | 109 | [btreemap]: https://doc.rust-lang.org/std/collections/struct.BTreeMap.html 110 | [3-steps]: https://github.com/jRimbault/yadf/tree/3-steps 111 | [hashmap]: https://doc.rust-lang.org/std/collections/struct.HashMap.html 112 | 113 | ### Design goals 114 | 115 | I sought out to build a high performing artefact by assembling together libraries doing the actual work, nothing here is custom made, it's all "off-the-shelf" software. 116 | 117 | ## Benchmarks 118 | 119 | The performance of `yadf` is heavily tied to the hardware, specifically the 120 | NVMe SSD. I recommend `fclones` as it has more hardware heuristics. and in general more features. `yadf` on HDDs is _terrible_. 121 | 122 | My home directory contains upwards of 700k paths and 39 GB of data, and is probably a pathological case of file duplication with all the node_modules, python virtual environments, rust target, etc. Arguably, the most important measure here is the mean time when the filesystem cache is cold. 123 | 124 | | Program (warm filesystem cache) | Version | Mean [s] | Min [s] | Max [s] | 125 | | :------------------------------ | ------: | ----------------: | --------: | ------: | 126 | | [`fclones`][0] | 0.29.3 | 7.435 ± 1.609 | 4.622 | 9.317 | 127 | | [`jdupes`][1] | 1.14.0 | 16.787 ± 0.208 | 16.484 | 17.178 | 128 | | [`ddh`][2] | 0.13 | 12.703 ± 1.547 | 10.814 | 14.793 | 129 | | [`dupe-krill`][4] | 1.4.7 | 15.555 ± 1.633 | 12.486 | 16.959 | 130 | | [`fddf`][5] | 1.7.0 | 18.441 ± 1.947 | 15.097 | 22.389 | 131 | | `yadf` | 1.1.0 | **3.157 ± 0.638** | 2.362 | 4.175 | 132 | 133 | | Program (cold filesystem cache) | Version | Mean [s] | Min [s] | Max [s] | 134 | | :------------------------------ | ------: | ----------------: | --------: | ------: | 135 | | [`fclones`][0] | 0.29.3 | 68.950 ± 3.694 | 63.165 | 73.534 | 136 | | [`jdupes`][1] | 1.14.0 | 303.907 ± 11.578 | 277.618 | 314.226 | 137 | | `yadf` | 1.1.0 | 52.481 ± 1.125 | 50.412 | 54.265 | 138 | 139 | _I test less programs here because it takes several hours to run._ 140 | 141 | The script used to benchmark can be read [here](./bench.sh). 142 | 143 | [0]: https://github.com/pkolaczk/fclones 144 | [1]: https://github.com/jbruchon/jdupes 145 | [2]: https://github.com/darakian/ddh 146 | [3]: https://github.com/sahib/rmlint 147 | [4]: https://github.com/kornelski/dupe-krill 148 | [5]: https://github.com/birkenfeld/fddf 149 | 150 |
151 | Hardware used. 152 | 153 | Extract from `neofetch` and `hwinfo --disk`: 154 | 155 | - OS: Ubuntu 20.04.1 LTS x86_64 156 | - Host: XPS 15 9570 157 | - Kernel: 5.4.0-42-generic 158 | - CPU: Intel i9-8950HK (12) @ 4.800GHz 159 | - Memory: 4217MiB / 31755MiB 160 | - Disk: 161 | - model: "SK hynix Disk" 162 | - driver: "nvme" 163 | 164 |
165 | 166 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | #![deny(unsafe_code)] 2 | #![warn(rust_2018_idioms)] 3 | 4 | mod args; 5 | 6 | use anyhow::Context; 7 | use clap::{Parser, ValueEnum}; 8 | use clap_verbosity_flag::ErrorLevel; 9 | use std::fs::File; 10 | use std::io::{self, Write}; 11 | use std::path::PathBuf; 12 | use std::str::FromStr; 13 | use yadf::{Fdupes, Machine}; 14 | 15 | fn main() -> anyhow::Result<()> { 16 | human_panic::setup_panic!(); 17 | let timer = std::time::Instant::now(); 18 | let args = Args::init_from_env(); 19 | log::debug!("{:?}", args); 20 | let config = build_config(&args); 21 | log::debug!("{:?}", config); 22 | args.algorithm.run(args.clone(), config)?; 23 | log::debug!("{:?} elapsed", timer.elapsed()); 24 | Ok(()) 25 | } 26 | 27 | fn write_output(args: Args, bag: yadf::TreeBag) -> Result<(), anyhow::Error> 28 | where 29 | H: yadf::Hasher, 30 | { 31 | let rfactor = args.rfactor.unwrap_or_default(); 32 | let replicates = bag.replicates(rfactor.into()); 33 | match args.output { 34 | Some(path) => { 35 | let context = || format!("writing output to the file: {:?}", path.display()); 36 | let file = File::create(&path).with_context(context)?; 37 | args.format.display::<_, H>(file, replicates) 38 | } 39 | None => args.format.display::<_, H>(io::stdout().lock(), replicates), 40 | } 41 | .context("writing output")?; 42 | Ok(()) 43 | } 44 | 45 | #[cfg(unix)] 46 | fn build_config(args: &Args) -> yadf::Yadf { 47 | yadf::Yadf::builder() 48 | .paths(args.paths.as_ref()) 49 | .minimum_file_size(args.min()) 50 | .maximum_file_size(args.max()) 51 | .regex(args.regex.clone()) 52 | .glob(args.pattern.clone()) 53 | .max_depth(args.max_depth) 54 | .hard_links(args.hard_links) 55 | .build() 56 | } 57 | 58 | #[cfg(not(unix))] 59 | fn build_config(args: &Args) -> yadf::Yadf { 60 | yadf::Yadf::builder() 61 | .paths(args.paths.as_ref()) 62 | .minimum_file_size(args.min()) 63 | .maximum_file_size(args.max()) 64 | .regex(args.regex.clone()) 65 | .glob(args.pattern.clone()) 66 | .max_depth(args.max_depth) 67 | .build() 68 | } 69 | 70 | impl Algorithm { 71 | fn run

(&self, args: Args, config: yadf::Yadf

) -> anyhow::Result<()> 72 | where 73 | P: AsRef, 74 | { 75 | log::debug!("using {:?} hashing", self); 76 | match self { 77 | Algorithm::AHash => { 78 | write_output::(args, config.scan::())? 79 | } 80 | Algorithm::Blake3 => { 81 | write_output::(args, config.scan::())? 82 | } 83 | Algorithm::Highway => write_output::( 84 | args, 85 | config.scan::(), 86 | )?, 87 | Algorithm::MetroHash => write_output::( 88 | args, 89 | config.scan::(), 90 | )?, 91 | Algorithm::SeaHash => { 92 | write_output::(args, config.scan::())? 93 | } 94 | Algorithm::XxHash => write_output::( 95 | args, 96 | config.scan::(), 97 | )?, 98 | } 99 | Ok(()) 100 | } 101 | } 102 | 103 | impl Format { 104 | fn display( 105 | &self, 106 | writer: W, 107 | replicates: yadf::FileReplicates<'_, H::Hash>, 108 | ) -> anyhow::Result<()> 109 | where 110 | H: yadf::Hasher, 111 | W: Write, 112 | { 113 | let mut writer = io::BufWriter::with_capacity(64 * 1024, writer); 114 | match self { 115 | Format::Json => { 116 | serde_json::to_writer(&mut writer, &replicates)?; 117 | writer.write_all(b"\n")?; 118 | } 119 | Format::JsonPretty => { 120 | serde_json::to_writer_pretty(&mut writer, &replicates)?; 121 | writer.write_all(b"\n")?; 122 | } 123 | Format::Csv => csv_to_writer::<_, H>(writer, &replicates)?, 124 | Format::LdJson => ldjson_to_writer::<_, H>(writer, &replicates)?, 125 | Format::Fdupes => writeln!(writer, "{}", replicates.display::())?, 126 | Format::Machine => writeln!(writer, "{}", replicates.display::())?, 127 | }; 128 | Ok(()) 129 | } 130 | } 131 | 132 | /// Yet Another Dupes Finder 133 | #[derive(Parser, Debug, Clone)] 134 | pub struct Args { 135 | /// Directories to search 136 | /// 137 | /// default is to search inside the current working directory 138 | #[clap(value_parser)] 139 | paths: Vec, 140 | /// Output format 141 | #[clap(short, long, value_enum, default_value_t, ignore_case = true)] 142 | format: Format, 143 | /// Hashing algorithm 144 | #[clap(short, long, value_enum, default_value_t, ignore_case = true)] 145 | algorithm: Algorithm, 146 | /// Excludes empty files 147 | #[clap(short, long)] 148 | no_empty: bool, 149 | /// Minimum file size 150 | #[clap(long, value_name = "size")] 151 | min: Option, 152 | /// Maximum file size 153 | #[clap(long, value_name = "size")] 154 | max: Option, 155 | /// Maximum recursion depth 156 | #[clap(short = 'd', long = "depth", value_name = "depth")] 157 | max_depth: Option, 158 | /// Treat hard links to same file as duplicates 159 | #[cfg_attr(unix, clap(short = 'H', long))] 160 | #[cfg(unix)] 161 | hard_links: bool, 162 | /// Check files with a name matching a Perl-style regex, 163 | /// see: https://docs.rs/regex/1.4.2/regex/index.html#syntax 164 | #[clap(short = 'R', long)] 165 | regex: Option, 166 | /// Check files with a name matching a glob pattern, 167 | /// see: https://docs.rs/globset/0.4.6/globset/index.html#syntax 168 | #[clap(short, long, value_name = "glob")] 169 | pattern: Option, 170 | #[clap(flatten)] 171 | verbosity: clap_verbosity_flag::Verbosity, 172 | /// Replication factor [under|equal|over]:n 173 | /// 174 | /// The default is `over:1`, to find uniques use `equal:1`, 175 | /// to find files with less than 10 copies use `under:10` 176 | #[clap(long)] 177 | rfactor: Option, 178 | /// Optional output file 179 | #[clap(short, long)] 180 | output: Option, 181 | } 182 | 183 | #[derive(ValueEnum, Debug, Clone, Default)] 184 | enum Format { 185 | Csv, 186 | #[default] 187 | Fdupes, 188 | Json, 189 | JsonPretty, 190 | LdJson, 191 | Machine, 192 | } 193 | 194 | #[derive(ValueEnum, Debug, Clone, Default)] 195 | #[clap(rename_all = "lower")] 196 | enum Algorithm { 197 | AHash, 198 | Blake3, 199 | #[default] 200 | Highway, 201 | MetroHash, 202 | SeaHash, 203 | XxHash, 204 | } 205 | 206 | #[derive(Debug, Clone)] 207 | struct Byte(byte_unit::Byte); 208 | 209 | impl FromStr for Byte { 210 | type Err = String; 211 | fn from_str(s: &str) -> Result { 212 | byte_unit::Byte::from_str(s) 213 | .map(Byte) 214 | .map_err(|e| e.to_string()) 215 | } 216 | } 217 | 218 | #[derive(Debug, PartialEq, Clone)] 219 | enum ReplicationFactor { 220 | Under(usize), 221 | Equal(usize), 222 | Over(usize), 223 | } 224 | 225 | /// mimic serde_json interface 226 | fn csv_to_writer(writer: W, replicates: &yadf::FileReplicates<'_, H::Hash>) -> csv::Result<()> 227 | where 228 | H: yadf::Hasher, 229 | W: Write, 230 | { 231 | let mut writer = csv::WriterBuilder::new() 232 | .flexible(true) 233 | .has_headers(false) 234 | .from_writer(writer); 235 | writer.serialize(("count", "files"))?; 236 | for files in replicates { 237 | writer.serialize((files.len(), files))?; 238 | } 239 | Ok(()) 240 | } 241 | 242 | /// mimic serde_json interface 243 | fn ldjson_to_writer( 244 | mut writer: W, 245 | replicates: &yadf::FileReplicates<'_, H::Hash>, 246 | ) -> anyhow::Result<()> 247 | where 248 | H: yadf::Hasher, 249 | W: Write, 250 | { 251 | for files in replicates { 252 | serde_json::to_writer(&mut writer, &files)?; 253 | writeln!(writer)?; 254 | } 255 | Ok(()) 256 | } 257 | 258 | // #[cfg(test)] 259 | // mod tests { 260 | // use super::*; 261 | // use once_cell::sync::Lazy; 262 | 263 | // static BAG: Lazy> = Lazy::new(|| { 264 | // vec![ 265 | // (77, "hello".into()), 266 | // (77, "world".into()), 267 | // (3, "foo".into()), 268 | // (3, "bar".into()), 269 | // ] 270 | // .into_iter() 271 | // .collect() 272 | // }); 273 | 274 | // #[test] 275 | // fn csv() { 276 | // let mut buffer = Vec::new(); 277 | // let _ = csv_to_writer(&mut buffer, &BAG.duplicates()); 278 | // let result = String::from_utf8(buffer).unwrap(); 279 | // let expected = r#"count,files 280 | // 2,foo,bar 281 | // 2,hello,world 282 | // "#; 283 | // assert_eq!(result, expected); 284 | // } 285 | 286 | // #[test] 287 | // fn ldjson() { 288 | // let mut buffer = Vec::new(); 289 | // let _ = ldjson_to_writer(&mut buffer, &BAG.duplicates()); 290 | // let result = String::from_utf8(buffer).unwrap(); 291 | // let expected = r#"["foo","bar"] 292 | // ["hello","world"] 293 | // "#; 294 | // assert_eq!(result, expected); 295 | // } 296 | // } 297 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "addr2line" 7 | version = "0.24.2" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" 10 | dependencies = [ 11 | "gimli", 12 | ] 13 | 14 | [[package]] 15 | name = "adler2" 16 | version = "2.0.1" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" 19 | 20 | [[package]] 21 | name = "ahash" 22 | version = "0.7.8" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" 25 | dependencies = [ 26 | "getrandom 0.2.16", 27 | "once_cell", 28 | "version_check", 29 | ] 30 | 31 | [[package]] 32 | name = "ahash" 33 | version = "0.8.12" 34 | source = "registry+https://github.com/rust-lang/crates.io-index" 35 | checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" 36 | dependencies = [ 37 | "cfg-if", 38 | "getrandom 0.3.3", 39 | "once_cell", 40 | "version_check", 41 | "zerocopy", 42 | ] 43 | 44 | [[package]] 45 | name = "aho-corasick" 46 | version = "1.1.3" 47 | source = "registry+https://github.com/rust-lang/crates.io-index" 48 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" 49 | dependencies = [ 50 | "memchr", 51 | ] 52 | 53 | [[package]] 54 | name = "anstream" 55 | version = "0.6.20" 56 | source = "registry+https://github.com/rust-lang/crates.io-index" 57 | checksum = "3ae563653d1938f79b1ab1b5e668c87c76a9930414574a6583a7b7e11a8e6192" 58 | dependencies = [ 59 | "anstyle", 60 | "anstyle-parse", 61 | "anstyle-query", 62 | "anstyle-wincon", 63 | "colorchoice", 64 | "is_terminal_polyfill", 65 | "utf8parse", 66 | ] 67 | 68 | [[package]] 69 | name = "anstyle" 70 | version = "1.0.11" 71 | source = "registry+https://github.com/rust-lang/crates.io-index" 72 | checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" 73 | 74 | [[package]] 75 | name = "anstyle-parse" 76 | version = "0.2.7" 77 | source = "registry+https://github.com/rust-lang/crates.io-index" 78 | checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" 79 | dependencies = [ 80 | "utf8parse", 81 | ] 82 | 83 | [[package]] 84 | name = "anstyle-query" 85 | version = "1.1.4" 86 | source = "registry+https://github.com/rust-lang/crates.io-index" 87 | checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" 88 | dependencies = [ 89 | "windows-sys 0.60.2", 90 | ] 91 | 92 | [[package]] 93 | name = "anstyle-wincon" 94 | version = "3.0.10" 95 | source = "registry+https://github.com/rust-lang/crates.io-index" 96 | checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" 97 | dependencies = [ 98 | "anstyle", 99 | "once_cell_polyfill", 100 | "windows-sys 0.60.2", 101 | ] 102 | 103 | [[package]] 104 | name = "anyhow" 105 | version = "1.0.99" 106 | source = "registry+https://github.com/rust-lang/crates.io-index" 107 | checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100" 108 | 109 | [[package]] 110 | name = "arrayref" 111 | version = "0.3.9" 112 | source = "registry+https://github.com/rust-lang/crates.io-index" 113 | checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" 114 | 115 | [[package]] 116 | name = "arrayvec" 117 | version = "0.7.6" 118 | source = "registry+https://github.com/rust-lang/crates.io-index" 119 | checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" 120 | 121 | [[package]] 122 | name = "assert_cmd" 123 | version = "2.0.17" 124 | source = "registry+https://github.com/rust-lang/crates.io-index" 125 | checksum = "2bd389a4b2970a01282ee455294913c0a43724daedcd1a24c3eb0ec1c1320b66" 126 | dependencies = [ 127 | "anstyle", 128 | "bstr", 129 | "doc-comment", 130 | "libc", 131 | "predicates", 132 | "predicates-core", 133 | "predicates-tree", 134 | "wait-timeout", 135 | ] 136 | 137 | [[package]] 138 | name = "autocfg" 139 | version = "1.5.0" 140 | source = "registry+https://github.com/rust-lang/crates.io-index" 141 | checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" 142 | 143 | [[package]] 144 | name = "backtrace" 145 | version = "0.3.75" 146 | source = "registry+https://github.com/rust-lang/crates.io-index" 147 | checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" 148 | dependencies = [ 149 | "addr2line", 150 | "cfg-if", 151 | "libc", 152 | "miniz_oxide", 153 | "object", 154 | "rustc-demangle", 155 | "windows-targets 0.52.6", 156 | ] 157 | 158 | [[package]] 159 | name = "base64" 160 | version = "0.22.1" 161 | source = "registry+https://github.com/rust-lang/crates.io-index" 162 | checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" 163 | 164 | [[package]] 165 | name = "bitflags" 166 | version = "2.9.3" 167 | source = "registry+https://github.com/rust-lang/crates.io-index" 168 | checksum = "34efbcccd345379ca2868b2b2c9d3782e9cc58ba87bc7d79d5b53d9c9ae6f25d" 169 | 170 | [[package]] 171 | name = "bitvec" 172 | version = "1.0.1" 173 | source = "registry+https://github.com/rust-lang/crates.io-index" 174 | checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" 175 | dependencies = [ 176 | "funty", 177 | "radium", 178 | "tap", 179 | "wyz", 180 | ] 181 | 182 | [[package]] 183 | name = "blake3" 184 | version = "1.8.2" 185 | source = "registry+https://github.com/rust-lang/crates.io-index" 186 | checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0" 187 | dependencies = [ 188 | "arrayref", 189 | "arrayvec", 190 | "cc", 191 | "cfg-if", 192 | "constant_time_eq", 193 | ] 194 | 195 | [[package]] 196 | name = "borsh" 197 | version = "1.5.7" 198 | source = "registry+https://github.com/rust-lang/crates.io-index" 199 | checksum = "ad8646f98db542e39fc66e68a20b2144f6a732636df7c2354e74645faaa433ce" 200 | dependencies = [ 201 | "borsh-derive", 202 | "cfg_aliases", 203 | ] 204 | 205 | [[package]] 206 | name = "borsh-derive" 207 | version = "1.5.7" 208 | source = "registry+https://github.com/rust-lang/crates.io-index" 209 | checksum = "fdd1d3c0c2f5833f22386f252fe8ed005c7f59fdcddeef025c01b4c3b9fd9ac3" 210 | dependencies = [ 211 | "once_cell", 212 | "proc-macro-crate", 213 | "proc-macro2", 214 | "quote", 215 | "syn 2.0.106", 216 | ] 217 | 218 | [[package]] 219 | name = "bstr" 220 | version = "1.12.0" 221 | source = "registry+https://github.com/rust-lang/crates.io-index" 222 | checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4" 223 | dependencies = [ 224 | "memchr", 225 | "regex-automata", 226 | "serde", 227 | ] 228 | 229 | [[package]] 230 | name = "bumpalo" 231 | version = "3.19.0" 232 | source = "registry+https://github.com/rust-lang/crates.io-index" 233 | checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" 234 | 235 | [[package]] 236 | name = "byte-unit" 237 | version = "5.1.6" 238 | source = "registry+https://github.com/rust-lang/crates.io-index" 239 | checksum = "e1cd29c3c585209b0cbc7309bfe3ed7efd8c84c21b7af29c8bfae908f8777174" 240 | dependencies = [ 241 | "rust_decimal", 242 | "serde", 243 | "utf8-width", 244 | ] 245 | 246 | [[package]] 247 | name = "bytecheck" 248 | version = "0.6.12" 249 | source = "registry+https://github.com/rust-lang/crates.io-index" 250 | checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" 251 | dependencies = [ 252 | "bytecheck_derive", 253 | "ptr_meta", 254 | "simdutf8", 255 | ] 256 | 257 | [[package]] 258 | name = "bytecheck_derive" 259 | version = "0.6.12" 260 | source = "registry+https://github.com/rust-lang/crates.io-index" 261 | checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" 262 | dependencies = [ 263 | "proc-macro2", 264 | "quote", 265 | "syn 1.0.109", 266 | ] 267 | 268 | [[package]] 269 | name = "bytes" 270 | version = "1.10.1" 271 | source = "registry+https://github.com/rust-lang/crates.io-index" 272 | checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" 273 | 274 | [[package]] 275 | name = "cc" 276 | version = "1.2.34" 277 | source = "registry+https://github.com/rust-lang/crates.io-index" 278 | checksum = "42bc4aea80032b7bf409b0bc7ccad88853858911b7713a8062fdc0623867bedc" 279 | dependencies = [ 280 | "shlex", 281 | ] 282 | 283 | [[package]] 284 | name = "cfg-if" 285 | version = "1.0.3" 286 | source = "registry+https://github.com/rust-lang/crates.io-index" 287 | checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" 288 | 289 | [[package]] 290 | name = "cfg_aliases" 291 | version = "0.2.1" 292 | source = "registry+https://github.com/rust-lang/crates.io-index" 293 | checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" 294 | 295 | [[package]] 296 | name = "clap" 297 | version = "4.5.45" 298 | source = "registry+https://github.com/rust-lang/crates.io-index" 299 | checksum = "1fc0e74a703892159f5ae7d3aac52c8e6c392f5ae5f359c70b5881d60aaac318" 300 | dependencies = [ 301 | "clap_builder", 302 | "clap_derive", 303 | ] 304 | 305 | [[package]] 306 | name = "clap-verbosity-flag" 307 | version = "3.0.4" 308 | source = "registry+https://github.com/rust-lang/crates.io-index" 309 | checksum = "9d92b1fab272fe943881b77cc6e920d6543e5b1bfadbd5ed81c7c5a755742394" 310 | dependencies = [ 311 | "clap", 312 | "log", 313 | ] 314 | 315 | [[package]] 316 | name = "clap_builder" 317 | version = "4.5.44" 318 | source = "registry+https://github.com/rust-lang/crates.io-index" 319 | checksum = "b3e7f4214277f3c7aa526a59dd3fbe306a370daee1f8b7b8c987069cd8e888a8" 320 | dependencies = [ 321 | "anstream", 322 | "anstyle", 323 | "clap_lex", 324 | "strsim", 325 | ] 326 | 327 | [[package]] 328 | name = "clap_derive" 329 | version = "4.5.45" 330 | source = "registry+https://github.com/rust-lang/crates.io-index" 331 | checksum = "14cb31bb0a7d536caef2639baa7fad459e15c3144efefa6dbd1c84562c4739f6" 332 | dependencies = [ 333 | "heck", 334 | "proc-macro2", 335 | "quote", 336 | "syn 2.0.106", 337 | ] 338 | 339 | [[package]] 340 | name = "clap_lex" 341 | version = "0.7.5" 342 | source = "registry+https://github.com/rust-lang/crates.io-index" 343 | checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" 344 | 345 | [[package]] 346 | name = "colorchoice" 347 | version = "1.0.4" 348 | source = "registry+https://github.com/rust-lang/crates.io-index" 349 | checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" 350 | 351 | [[package]] 352 | name = "constant_time_eq" 353 | version = "0.3.1" 354 | source = "registry+https://github.com/rust-lang/crates.io-index" 355 | checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" 356 | 357 | [[package]] 358 | name = "crossbeam-channel" 359 | version = "0.5.15" 360 | source = "registry+https://github.com/rust-lang/crates.io-index" 361 | checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" 362 | dependencies = [ 363 | "crossbeam-utils", 364 | ] 365 | 366 | [[package]] 367 | name = "crossbeam-deque" 368 | version = "0.8.6" 369 | source = "registry+https://github.com/rust-lang/crates.io-index" 370 | checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" 371 | dependencies = [ 372 | "crossbeam-epoch", 373 | "crossbeam-utils", 374 | ] 375 | 376 | [[package]] 377 | name = "crossbeam-epoch" 378 | version = "0.9.18" 379 | source = "registry+https://github.com/rust-lang/crates.io-index" 380 | checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" 381 | dependencies = [ 382 | "crossbeam-utils", 383 | ] 384 | 385 | [[package]] 386 | name = "crossbeam-utils" 387 | version = "0.8.21" 388 | source = "registry+https://github.com/rust-lang/crates.io-index" 389 | checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" 390 | 391 | [[package]] 392 | name = "csv" 393 | version = "1.3.1" 394 | source = "registry+https://github.com/rust-lang/crates.io-index" 395 | checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" 396 | dependencies = [ 397 | "csv-core", 398 | "itoa", 399 | "ryu", 400 | "serde", 401 | ] 402 | 403 | [[package]] 404 | name = "csv-core" 405 | version = "0.1.12" 406 | source = "registry+https://github.com/rust-lang/crates.io-index" 407 | checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d" 408 | dependencies = [ 409 | "memchr", 410 | ] 411 | 412 | [[package]] 413 | name = "deranged" 414 | version = "0.4.0" 415 | source = "registry+https://github.com/rust-lang/crates.io-index" 416 | checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e" 417 | dependencies = [ 418 | "powerfmt", 419 | ] 420 | 421 | [[package]] 422 | name = "difflib" 423 | version = "0.4.0" 424 | source = "registry+https://github.com/rust-lang/crates.io-index" 425 | checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" 426 | 427 | [[package]] 428 | name = "dirs" 429 | version = "6.0.0" 430 | source = "registry+https://github.com/rust-lang/crates.io-index" 431 | checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" 432 | dependencies = [ 433 | "dirs-sys", 434 | ] 435 | 436 | [[package]] 437 | name = "dirs-sys" 438 | version = "0.5.0" 439 | source = "registry+https://github.com/rust-lang/crates.io-index" 440 | checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" 441 | dependencies = [ 442 | "libc", 443 | "option-ext", 444 | "redox_users", 445 | "windows-sys 0.60.2", 446 | ] 447 | 448 | [[package]] 449 | name = "doc-comment" 450 | version = "0.3.3" 451 | source = "registry+https://github.com/rust-lang/crates.io-index" 452 | checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" 453 | 454 | [[package]] 455 | name = "dunce" 456 | version = "1.0.5" 457 | source = "registry+https://github.com/rust-lang/crates.io-index" 458 | checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" 459 | 460 | [[package]] 461 | name = "either" 462 | version = "1.15.0" 463 | source = "registry+https://github.com/rust-lang/crates.io-index" 464 | checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" 465 | 466 | [[package]] 467 | name = "env_filter" 468 | version = "0.1.3" 469 | source = "registry+https://github.com/rust-lang/crates.io-index" 470 | checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" 471 | dependencies = [ 472 | "log", 473 | "regex", 474 | ] 475 | 476 | [[package]] 477 | name = "env_logger" 478 | version = "0.11.8" 479 | source = "registry+https://github.com/rust-lang/crates.io-index" 480 | checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" 481 | dependencies = [ 482 | "anstream", 483 | "anstyle", 484 | "env_filter", 485 | "jiff", 486 | "log", 487 | ] 488 | 489 | [[package]] 490 | name = "equivalent" 491 | version = "1.0.2" 492 | source = "registry+https://github.com/rust-lang/crates.io-index" 493 | checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" 494 | 495 | [[package]] 496 | name = "float-cmp" 497 | version = "0.10.0" 498 | source = "registry+https://github.com/rust-lang/crates.io-index" 499 | checksum = "b09cf3155332e944990140d967ff5eceb70df778b34f77d8075db46e4704e6d8" 500 | dependencies = [ 501 | "num-traits", 502 | ] 503 | 504 | [[package]] 505 | name = "funty" 506 | version = "2.0.0" 507 | source = "registry+https://github.com/rust-lang/crates.io-index" 508 | checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" 509 | 510 | [[package]] 511 | name = "getrandom" 512 | version = "0.2.16" 513 | source = "registry+https://github.com/rust-lang/crates.io-index" 514 | checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" 515 | dependencies = [ 516 | "cfg-if", 517 | "libc", 518 | "wasi 0.11.1+wasi-snapshot-preview1", 519 | ] 520 | 521 | [[package]] 522 | name = "getrandom" 523 | version = "0.3.3" 524 | source = "registry+https://github.com/rust-lang/crates.io-index" 525 | checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" 526 | dependencies = [ 527 | "cfg-if", 528 | "libc", 529 | "r-efi", 530 | "wasi 0.14.2+wasi-0.2.4", 531 | ] 532 | 533 | [[package]] 534 | name = "gimli" 535 | version = "0.31.1" 536 | source = "registry+https://github.com/rust-lang/crates.io-index" 537 | checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" 538 | 539 | [[package]] 540 | name = "globset" 541 | version = "0.4.16" 542 | source = "registry+https://github.com/rust-lang/crates.io-index" 543 | checksum = "54a1028dfc5f5df5da8a56a73e6c153c9a9708ec57232470703592a3f18e49f5" 544 | dependencies = [ 545 | "aho-corasick", 546 | "bstr", 547 | "log", 548 | "regex-automata", 549 | "regex-syntax", 550 | ] 551 | 552 | [[package]] 553 | name = "hashbrown" 554 | version = "0.12.3" 555 | source = "registry+https://github.com/rust-lang/crates.io-index" 556 | checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" 557 | dependencies = [ 558 | "ahash 0.7.8", 559 | ] 560 | 561 | [[package]] 562 | name = "hashbrown" 563 | version = "0.15.5" 564 | source = "registry+https://github.com/rust-lang/crates.io-index" 565 | checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" 566 | 567 | [[package]] 568 | name = "heck" 569 | version = "0.5.0" 570 | source = "registry+https://github.com/rust-lang/crates.io-index" 571 | checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" 572 | 573 | [[package]] 574 | name = "hermit-abi" 575 | version = "0.5.2" 576 | source = "registry+https://github.com/rust-lang/crates.io-index" 577 | checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" 578 | 579 | [[package]] 580 | name = "highway" 581 | version = "1.3.0" 582 | source = "registry+https://github.com/rust-lang/crates.io-index" 583 | checksum = "9040319a6910b901d5d49cbada4a99db52836a1b63228a05f7e2b7f8feef89b1" 584 | 585 | [[package]] 586 | name = "human-panic" 587 | version = "2.0.3" 588 | source = "registry+https://github.com/rust-lang/crates.io-index" 589 | checksum = "ac63a746b187e95d51fe16850eb04d1cfef203f6af98e6c405a6f262ad3df00a" 590 | dependencies = [ 591 | "anstream", 592 | "anstyle", 593 | "backtrace", 594 | "os_info", 595 | "serde", 596 | "serde_derive", 597 | "toml", 598 | "uuid", 599 | ] 600 | 601 | [[package]] 602 | name = "ignore" 603 | version = "0.4.23" 604 | source = "registry+https://github.com/rust-lang/crates.io-index" 605 | checksum = "6d89fd380afde86567dfba715db065673989d6253f42b88179abd3eae47bda4b" 606 | dependencies = [ 607 | "crossbeam-deque", 608 | "globset", 609 | "log", 610 | "memchr", 611 | "regex-automata", 612 | "same-file", 613 | "walkdir", 614 | "winapi-util", 615 | ] 616 | 617 | [[package]] 618 | name = "indexmap" 619 | version = "2.11.0" 620 | source = "registry+https://github.com/rust-lang/crates.io-index" 621 | checksum = "f2481980430f9f78649238835720ddccc57e52df14ffce1c6f37391d61b563e9" 622 | dependencies = [ 623 | "equivalent", 624 | "hashbrown 0.15.5", 625 | ] 626 | 627 | [[package]] 628 | name = "is_terminal_polyfill" 629 | version = "1.70.1" 630 | source = "registry+https://github.com/rust-lang/crates.io-index" 631 | checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" 632 | 633 | [[package]] 634 | name = "itoa" 635 | version = "1.0.15" 636 | source = "registry+https://github.com/rust-lang/crates.io-index" 637 | checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" 638 | 639 | [[package]] 640 | name = "jiff" 641 | version = "0.2.15" 642 | source = "registry+https://github.com/rust-lang/crates.io-index" 643 | checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" 644 | dependencies = [ 645 | "jiff-static", 646 | "log", 647 | "portable-atomic", 648 | "portable-atomic-util", 649 | "serde", 650 | ] 651 | 652 | [[package]] 653 | name = "jiff-static" 654 | version = "0.2.15" 655 | source = "registry+https://github.com/rust-lang/crates.io-index" 656 | checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" 657 | dependencies = [ 658 | "proc-macro2", 659 | "quote", 660 | "syn 2.0.106", 661 | ] 662 | 663 | [[package]] 664 | name = "js-sys" 665 | version = "0.3.77" 666 | source = "registry+https://github.com/rust-lang/crates.io-index" 667 | checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" 668 | dependencies = [ 669 | "once_cell", 670 | "wasm-bindgen", 671 | ] 672 | 673 | [[package]] 674 | name = "libc" 675 | version = "0.2.175" 676 | source = "registry+https://github.com/rust-lang/crates.io-index" 677 | checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" 678 | 679 | [[package]] 680 | name = "libredox" 681 | version = "0.1.9" 682 | source = "registry+https://github.com/rust-lang/crates.io-index" 683 | checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3" 684 | dependencies = [ 685 | "bitflags", 686 | "libc", 687 | ] 688 | 689 | [[package]] 690 | name = "log" 691 | version = "0.4.27" 692 | source = "registry+https://github.com/rust-lang/crates.io-index" 693 | checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" 694 | 695 | [[package]] 696 | name = "memchr" 697 | version = "2.7.5" 698 | source = "registry+https://github.com/rust-lang/crates.io-index" 699 | checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" 700 | 701 | [[package]] 702 | name = "metrohash" 703 | version = "1.0.7" 704 | source = "registry+https://github.com/rust-lang/crates.io-index" 705 | checksum = "a84011bfadc339f60fbcc38181da8a0a91cd16375394dd52edf9da80deacd8c5" 706 | 707 | [[package]] 708 | name = "miniz_oxide" 709 | version = "0.8.9" 710 | source = "registry+https://github.com/rust-lang/crates.io-index" 711 | checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" 712 | dependencies = [ 713 | "adler2", 714 | ] 715 | 716 | [[package]] 717 | name = "normalize-line-endings" 718 | version = "0.3.0" 719 | source = "registry+https://github.com/rust-lang/crates.io-index" 720 | checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" 721 | 722 | [[package]] 723 | name = "num-conv" 724 | version = "0.1.0" 725 | source = "registry+https://github.com/rust-lang/crates.io-index" 726 | checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" 727 | 728 | [[package]] 729 | name = "num-traits" 730 | version = "0.2.19" 731 | source = "registry+https://github.com/rust-lang/crates.io-index" 732 | checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" 733 | dependencies = [ 734 | "autocfg", 735 | ] 736 | 737 | [[package]] 738 | name = "num_cpus" 739 | version = "1.17.0" 740 | source = "registry+https://github.com/rust-lang/crates.io-index" 741 | checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" 742 | dependencies = [ 743 | "hermit-abi", 744 | "libc", 745 | ] 746 | 747 | [[package]] 748 | name = "object" 749 | version = "0.36.7" 750 | source = "registry+https://github.com/rust-lang/crates.io-index" 751 | checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" 752 | dependencies = [ 753 | "memchr", 754 | ] 755 | 756 | [[package]] 757 | name = "once_cell" 758 | version = "1.21.3" 759 | source = "registry+https://github.com/rust-lang/crates.io-index" 760 | checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" 761 | 762 | [[package]] 763 | name = "once_cell_polyfill" 764 | version = "1.70.1" 765 | source = "registry+https://github.com/rust-lang/crates.io-index" 766 | checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" 767 | 768 | [[package]] 769 | name = "option-ext" 770 | version = "0.2.0" 771 | source = "registry+https://github.com/rust-lang/crates.io-index" 772 | checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" 773 | 774 | [[package]] 775 | name = "os_info" 776 | version = "3.12.0" 777 | source = "registry+https://github.com/rust-lang/crates.io-index" 778 | checksum = "d0e1ac5fde8d43c34139135df8ea9ee9465394b2d8d20f032d38998f64afffc3" 779 | dependencies = [ 780 | "log", 781 | "plist", 782 | "serde", 783 | "windows-sys 0.52.0", 784 | ] 785 | 786 | [[package]] 787 | name = "plist" 788 | version = "1.7.4" 789 | source = "registry+https://github.com/rust-lang/crates.io-index" 790 | checksum = "3af6b589e163c5a788fab00ce0c0366f6efbb9959c2f9874b224936af7fce7e1" 791 | dependencies = [ 792 | "base64", 793 | "indexmap", 794 | "quick-xml", 795 | "serde", 796 | "time", 797 | ] 798 | 799 | [[package]] 800 | name = "portable-atomic" 801 | version = "1.11.1" 802 | source = "registry+https://github.com/rust-lang/crates.io-index" 803 | checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" 804 | 805 | [[package]] 806 | name = "portable-atomic-util" 807 | version = "0.2.4" 808 | source = "registry+https://github.com/rust-lang/crates.io-index" 809 | checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" 810 | dependencies = [ 811 | "portable-atomic", 812 | ] 813 | 814 | [[package]] 815 | name = "powerfmt" 816 | version = "0.2.0" 817 | source = "registry+https://github.com/rust-lang/crates.io-index" 818 | checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" 819 | 820 | [[package]] 821 | name = "ppv-lite86" 822 | version = "0.2.21" 823 | source = "registry+https://github.com/rust-lang/crates.io-index" 824 | checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" 825 | dependencies = [ 826 | "zerocopy", 827 | ] 828 | 829 | [[package]] 830 | name = "predicates" 831 | version = "3.1.3" 832 | source = "registry+https://github.com/rust-lang/crates.io-index" 833 | checksum = "a5d19ee57562043d37e82899fade9a22ebab7be9cef5026b07fda9cdd4293573" 834 | dependencies = [ 835 | "anstyle", 836 | "difflib", 837 | "float-cmp", 838 | "normalize-line-endings", 839 | "predicates-core", 840 | "regex", 841 | ] 842 | 843 | [[package]] 844 | name = "predicates-core" 845 | version = "1.0.9" 846 | source = "registry+https://github.com/rust-lang/crates.io-index" 847 | checksum = "727e462b119fe9c93fd0eb1429a5f7647394014cf3c04ab2c0350eeb09095ffa" 848 | 849 | [[package]] 850 | name = "predicates-tree" 851 | version = "1.0.12" 852 | source = "registry+https://github.com/rust-lang/crates.io-index" 853 | checksum = "72dd2d6d381dfb73a193c7fca536518d7caee39fc8503f74e7dc0be0531b425c" 854 | dependencies = [ 855 | "predicates-core", 856 | "termtree", 857 | ] 858 | 859 | [[package]] 860 | name = "proc-macro-crate" 861 | version = "3.3.0" 862 | source = "registry+https://github.com/rust-lang/crates.io-index" 863 | checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35" 864 | dependencies = [ 865 | "toml_edit", 866 | ] 867 | 868 | [[package]] 869 | name = "proc-macro2" 870 | version = "1.0.101" 871 | source = "registry+https://github.com/rust-lang/crates.io-index" 872 | checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" 873 | dependencies = [ 874 | "unicode-ident", 875 | ] 876 | 877 | [[package]] 878 | name = "ptr_meta" 879 | version = "0.1.4" 880 | source = "registry+https://github.com/rust-lang/crates.io-index" 881 | checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" 882 | dependencies = [ 883 | "ptr_meta_derive", 884 | ] 885 | 886 | [[package]] 887 | name = "ptr_meta_derive" 888 | version = "0.1.4" 889 | source = "registry+https://github.com/rust-lang/crates.io-index" 890 | checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" 891 | dependencies = [ 892 | "proc-macro2", 893 | "quote", 894 | "syn 1.0.109", 895 | ] 896 | 897 | [[package]] 898 | name = "quick-xml" 899 | version = "0.38.3" 900 | source = "registry+https://github.com/rust-lang/crates.io-index" 901 | checksum = "42a232e7487fc2ef313d96dde7948e7a3c05101870d8985e4fd8d26aedd27b89" 902 | dependencies = [ 903 | "memchr", 904 | ] 905 | 906 | [[package]] 907 | name = "quote" 908 | version = "1.0.40" 909 | source = "registry+https://github.com/rust-lang/crates.io-index" 910 | checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" 911 | dependencies = [ 912 | "proc-macro2", 913 | ] 914 | 915 | [[package]] 916 | name = "r-efi" 917 | version = "5.3.0" 918 | source = "registry+https://github.com/rust-lang/crates.io-index" 919 | checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" 920 | 921 | [[package]] 922 | name = "radium" 923 | version = "0.7.0" 924 | source = "registry+https://github.com/rust-lang/crates.io-index" 925 | checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" 926 | 927 | [[package]] 928 | name = "rand" 929 | version = "0.8.5" 930 | source = "registry+https://github.com/rust-lang/crates.io-index" 931 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 932 | dependencies = [ 933 | "libc", 934 | "rand_chacha 0.3.1", 935 | "rand_core 0.6.4", 936 | ] 937 | 938 | [[package]] 939 | name = "rand" 940 | version = "0.9.2" 941 | source = "registry+https://github.com/rust-lang/crates.io-index" 942 | checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" 943 | dependencies = [ 944 | "rand_chacha 0.9.0", 945 | "rand_core 0.9.3", 946 | ] 947 | 948 | [[package]] 949 | name = "rand_chacha" 950 | version = "0.3.1" 951 | source = "registry+https://github.com/rust-lang/crates.io-index" 952 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 953 | dependencies = [ 954 | "ppv-lite86", 955 | "rand_core 0.6.4", 956 | ] 957 | 958 | [[package]] 959 | name = "rand_chacha" 960 | version = "0.9.0" 961 | source = "registry+https://github.com/rust-lang/crates.io-index" 962 | checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" 963 | dependencies = [ 964 | "ppv-lite86", 965 | "rand_core 0.9.3", 966 | ] 967 | 968 | [[package]] 969 | name = "rand_core" 970 | version = "0.6.4" 971 | source = "registry+https://github.com/rust-lang/crates.io-index" 972 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 973 | dependencies = [ 974 | "getrandom 0.2.16", 975 | ] 976 | 977 | [[package]] 978 | name = "rand_core" 979 | version = "0.9.3" 980 | source = "registry+https://github.com/rust-lang/crates.io-index" 981 | checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" 982 | dependencies = [ 983 | "getrandom 0.3.3", 984 | ] 985 | 986 | [[package]] 987 | name = "rayon" 988 | version = "1.11.0" 989 | source = "registry+https://github.com/rust-lang/crates.io-index" 990 | checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" 991 | dependencies = [ 992 | "either", 993 | "rayon-core", 994 | ] 995 | 996 | [[package]] 997 | name = "rayon-core" 998 | version = "1.13.0" 999 | source = "registry+https://github.com/rust-lang/crates.io-index" 1000 | checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" 1001 | dependencies = [ 1002 | "crossbeam-deque", 1003 | "crossbeam-utils", 1004 | ] 1005 | 1006 | [[package]] 1007 | name = "redox_users" 1008 | version = "0.5.2" 1009 | source = "registry+https://github.com/rust-lang/crates.io-index" 1010 | checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" 1011 | dependencies = [ 1012 | "getrandom 0.2.16", 1013 | "libredox", 1014 | "thiserror", 1015 | ] 1016 | 1017 | [[package]] 1018 | name = "regex" 1019 | version = "1.11.2" 1020 | source = "registry+https://github.com/rust-lang/crates.io-index" 1021 | checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912" 1022 | dependencies = [ 1023 | "aho-corasick", 1024 | "memchr", 1025 | "regex-automata", 1026 | "regex-syntax", 1027 | ] 1028 | 1029 | [[package]] 1030 | name = "regex-automata" 1031 | version = "0.4.10" 1032 | source = "registry+https://github.com/rust-lang/crates.io-index" 1033 | checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6" 1034 | dependencies = [ 1035 | "aho-corasick", 1036 | "memchr", 1037 | "regex-syntax", 1038 | ] 1039 | 1040 | [[package]] 1041 | name = "regex-syntax" 1042 | version = "0.8.6" 1043 | source = "registry+https://github.com/rust-lang/crates.io-index" 1044 | checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" 1045 | 1046 | [[package]] 1047 | name = "rend" 1048 | version = "0.4.2" 1049 | source = "registry+https://github.com/rust-lang/crates.io-index" 1050 | checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" 1051 | dependencies = [ 1052 | "bytecheck", 1053 | ] 1054 | 1055 | [[package]] 1056 | name = "rkyv" 1057 | version = "0.7.45" 1058 | source = "registry+https://github.com/rust-lang/crates.io-index" 1059 | checksum = "9008cd6385b9e161d8229e1f6549dd23c3d022f132a2ea37ac3a10ac4935779b" 1060 | dependencies = [ 1061 | "bitvec", 1062 | "bytecheck", 1063 | "bytes", 1064 | "hashbrown 0.12.3", 1065 | "ptr_meta", 1066 | "rend", 1067 | "rkyv_derive", 1068 | "seahash", 1069 | "tinyvec", 1070 | "uuid", 1071 | ] 1072 | 1073 | [[package]] 1074 | name = "rkyv_derive" 1075 | version = "0.7.45" 1076 | source = "registry+https://github.com/rust-lang/crates.io-index" 1077 | checksum = "503d1d27590a2b0a3a4ca4c94755aa2875657196ecbf401a42eff41d7de532c0" 1078 | dependencies = [ 1079 | "proc-macro2", 1080 | "quote", 1081 | "syn 1.0.109", 1082 | ] 1083 | 1084 | [[package]] 1085 | name = "rust_decimal" 1086 | version = "1.37.2" 1087 | source = "registry+https://github.com/rust-lang/crates.io-index" 1088 | checksum = "b203a6425500a03e0919c42d3c47caca51e79f1132046626d2c8871c5092035d" 1089 | dependencies = [ 1090 | "arrayvec", 1091 | "borsh", 1092 | "bytes", 1093 | "num-traits", 1094 | "rand 0.8.5", 1095 | "rkyv", 1096 | "serde", 1097 | "serde_json", 1098 | ] 1099 | 1100 | [[package]] 1101 | name = "rustc-demangle" 1102 | version = "0.1.26" 1103 | source = "registry+https://github.com/rust-lang/crates.io-index" 1104 | checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" 1105 | 1106 | [[package]] 1107 | name = "rustversion" 1108 | version = "1.0.22" 1109 | source = "registry+https://github.com/rust-lang/crates.io-index" 1110 | checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" 1111 | 1112 | [[package]] 1113 | name = "ryu" 1114 | version = "1.0.20" 1115 | source = "registry+https://github.com/rust-lang/crates.io-index" 1116 | checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" 1117 | 1118 | [[package]] 1119 | name = "same-file" 1120 | version = "1.0.6" 1121 | source = "registry+https://github.com/rust-lang/crates.io-index" 1122 | checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" 1123 | dependencies = [ 1124 | "winapi-util", 1125 | ] 1126 | 1127 | [[package]] 1128 | name = "seahash" 1129 | version = "4.1.0" 1130 | source = "registry+https://github.com/rust-lang/crates.io-index" 1131 | checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" 1132 | 1133 | [[package]] 1134 | name = "serde" 1135 | version = "1.0.219" 1136 | source = "registry+https://github.com/rust-lang/crates.io-index" 1137 | checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" 1138 | dependencies = [ 1139 | "serde_derive", 1140 | ] 1141 | 1142 | [[package]] 1143 | name = "serde_derive" 1144 | version = "1.0.219" 1145 | source = "registry+https://github.com/rust-lang/crates.io-index" 1146 | checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" 1147 | dependencies = [ 1148 | "proc-macro2", 1149 | "quote", 1150 | "syn 2.0.106", 1151 | ] 1152 | 1153 | [[package]] 1154 | name = "serde_json" 1155 | version = "1.0.143" 1156 | source = "registry+https://github.com/rust-lang/crates.io-index" 1157 | checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a" 1158 | dependencies = [ 1159 | "itoa", 1160 | "memchr", 1161 | "ryu", 1162 | "serde", 1163 | ] 1164 | 1165 | [[package]] 1166 | name = "serde_spanned" 1167 | version = "1.0.0" 1168 | source = "registry+https://github.com/rust-lang/crates.io-index" 1169 | checksum = "40734c41988f7306bb04f0ecf60ec0f3f1caa34290e4e8ea471dcd3346483b83" 1170 | dependencies = [ 1171 | "serde", 1172 | ] 1173 | 1174 | [[package]] 1175 | name = "shlex" 1176 | version = "1.3.0" 1177 | source = "registry+https://github.com/rust-lang/crates.io-index" 1178 | checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" 1179 | 1180 | [[package]] 1181 | name = "simdutf8" 1182 | version = "0.1.5" 1183 | source = "registry+https://github.com/rust-lang/crates.io-index" 1184 | checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" 1185 | 1186 | [[package]] 1187 | name = "strsim" 1188 | version = "0.11.1" 1189 | source = "registry+https://github.com/rust-lang/crates.io-index" 1190 | checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" 1191 | 1192 | [[package]] 1193 | name = "syn" 1194 | version = "1.0.109" 1195 | source = "registry+https://github.com/rust-lang/crates.io-index" 1196 | checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" 1197 | dependencies = [ 1198 | "proc-macro2", 1199 | "quote", 1200 | "unicode-ident", 1201 | ] 1202 | 1203 | [[package]] 1204 | name = "syn" 1205 | version = "2.0.106" 1206 | source = "registry+https://github.com/rust-lang/crates.io-index" 1207 | checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" 1208 | dependencies = [ 1209 | "proc-macro2", 1210 | "quote", 1211 | "unicode-ident", 1212 | ] 1213 | 1214 | [[package]] 1215 | name = "tap" 1216 | version = "1.0.1" 1217 | source = "registry+https://github.com/rust-lang/crates.io-index" 1218 | checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" 1219 | 1220 | [[package]] 1221 | name = "termtree" 1222 | version = "0.5.1" 1223 | source = "registry+https://github.com/rust-lang/crates.io-index" 1224 | checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683" 1225 | 1226 | [[package]] 1227 | name = "thiserror" 1228 | version = "2.0.16" 1229 | source = "registry+https://github.com/rust-lang/crates.io-index" 1230 | checksum = "3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0" 1231 | dependencies = [ 1232 | "thiserror-impl", 1233 | ] 1234 | 1235 | [[package]] 1236 | name = "thiserror-impl" 1237 | version = "2.0.16" 1238 | source = "registry+https://github.com/rust-lang/crates.io-index" 1239 | checksum = "6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960" 1240 | dependencies = [ 1241 | "proc-macro2", 1242 | "quote", 1243 | "syn 2.0.106", 1244 | ] 1245 | 1246 | [[package]] 1247 | name = "time" 1248 | version = "0.3.41" 1249 | source = "registry+https://github.com/rust-lang/crates.io-index" 1250 | checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40" 1251 | dependencies = [ 1252 | "deranged", 1253 | "itoa", 1254 | "num-conv", 1255 | "powerfmt", 1256 | "serde", 1257 | "time-core", 1258 | "time-macros", 1259 | ] 1260 | 1261 | [[package]] 1262 | name = "time-core" 1263 | version = "0.1.4" 1264 | source = "registry+https://github.com/rust-lang/crates.io-index" 1265 | checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c" 1266 | 1267 | [[package]] 1268 | name = "time-macros" 1269 | version = "0.2.22" 1270 | source = "registry+https://github.com/rust-lang/crates.io-index" 1271 | checksum = "3526739392ec93fd8b359c8e98514cb3e8e021beb4e5f597b00a0221f8ed8a49" 1272 | dependencies = [ 1273 | "num-conv", 1274 | "time-core", 1275 | ] 1276 | 1277 | [[package]] 1278 | name = "tinyvec" 1279 | version = "1.10.0" 1280 | source = "registry+https://github.com/rust-lang/crates.io-index" 1281 | checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" 1282 | dependencies = [ 1283 | "tinyvec_macros", 1284 | ] 1285 | 1286 | [[package]] 1287 | name = "tinyvec_macros" 1288 | version = "0.1.1" 1289 | source = "registry+https://github.com/rust-lang/crates.io-index" 1290 | checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" 1291 | 1292 | [[package]] 1293 | name = "toml" 1294 | version = "0.9.5" 1295 | source = "registry+https://github.com/rust-lang/crates.io-index" 1296 | checksum = "75129e1dc5000bfbaa9fee9d1b21f974f9fbad9daec557a521ee6e080825f6e8" 1297 | dependencies = [ 1298 | "serde", 1299 | "serde_spanned", 1300 | "toml_datetime 0.7.0", 1301 | "toml_writer", 1302 | ] 1303 | 1304 | [[package]] 1305 | name = "toml_datetime" 1306 | version = "0.6.11" 1307 | source = "registry+https://github.com/rust-lang/crates.io-index" 1308 | checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" 1309 | 1310 | [[package]] 1311 | name = "toml_datetime" 1312 | version = "0.7.0" 1313 | source = "registry+https://github.com/rust-lang/crates.io-index" 1314 | checksum = "bade1c3e902f58d73d3f294cd7f20391c1cb2fbcb643b73566bc773971df91e3" 1315 | dependencies = [ 1316 | "serde", 1317 | ] 1318 | 1319 | [[package]] 1320 | name = "toml_edit" 1321 | version = "0.22.27" 1322 | source = "registry+https://github.com/rust-lang/crates.io-index" 1323 | checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" 1324 | dependencies = [ 1325 | "indexmap", 1326 | "toml_datetime 0.6.11", 1327 | "winnow", 1328 | ] 1329 | 1330 | [[package]] 1331 | name = "toml_writer" 1332 | version = "1.0.2" 1333 | source = "registry+https://github.com/rust-lang/crates.io-index" 1334 | checksum = "fcc842091f2def52017664b53082ecbbeb5c7731092bad69d2c63050401dfd64" 1335 | 1336 | [[package]] 1337 | name = "twox-hash" 1338 | version = "2.1.1" 1339 | source = "registry+https://github.com/rust-lang/crates.io-index" 1340 | checksum = "8b907da542cbced5261bd3256de1b3a1bf340a3d37f93425a07362a1d687de56" 1341 | dependencies = [ 1342 | "rand 0.9.2", 1343 | ] 1344 | 1345 | [[package]] 1346 | name = "typed-builder" 1347 | version = "0.21.2" 1348 | source = "registry+https://github.com/rust-lang/crates.io-index" 1349 | checksum = "fef81aec2ca29576f9f6ae8755108640d0a86dd3161b2e8bca6cfa554e98f77d" 1350 | dependencies = [ 1351 | "typed-builder-macro", 1352 | ] 1353 | 1354 | [[package]] 1355 | name = "typed-builder-macro" 1356 | version = "0.21.2" 1357 | source = "registry+https://github.com/rust-lang/crates.io-index" 1358 | checksum = "1ecb9ecf7799210407c14a8cfdfe0173365780968dc57973ed082211958e0b18" 1359 | dependencies = [ 1360 | "proc-macro2", 1361 | "quote", 1362 | "syn 2.0.106", 1363 | ] 1364 | 1365 | [[package]] 1366 | name = "unicode-ident" 1367 | version = "1.0.18" 1368 | source = "registry+https://github.com/rust-lang/crates.io-index" 1369 | checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" 1370 | 1371 | [[package]] 1372 | name = "utf8-width" 1373 | version = "0.1.7" 1374 | source = "registry+https://github.com/rust-lang/crates.io-index" 1375 | checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3" 1376 | 1377 | [[package]] 1378 | name = "utf8parse" 1379 | version = "0.2.2" 1380 | source = "registry+https://github.com/rust-lang/crates.io-index" 1381 | checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" 1382 | 1383 | [[package]] 1384 | name = "uuid" 1385 | version = "1.18.0" 1386 | source = "registry+https://github.com/rust-lang/crates.io-index" 1387 | checksum = "f33196643e165781c20a5ead5582283a7dacbb87855d867fbc2df3f81eddc1be" 1388 | dependencies = [ 1389 | "getrandom 0.3.3", 1390 | "js-sys", 1391 | "wasm-bindgen", 1392 | ] 1393 | 1394 | [[package]] 1395 | name = "version_check" 1396 | version = "0.9.5" 1397 | source = "registry+https://github.com/rust-lang/crates.io-index" 1398 | checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" 1399 | 1400 | [[package]] 1401 | name = "wait-timeout" 1402 | version = "0.2.1" 1403 | source = "registry+https://github.com/rust-lang/crates.io-index" 1404 | checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11" 1405 | dependencies = [ 1406 | "libc", 1407 | ] 1408 | 1409 | [[package]] 1410 | name = "walkdir" 1411 | version = "2.5.0" 1412 | source = "registry+https://github.com/rust-lang/crates.io-index" 1413 | checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" 1414 | dependencies = [ 1415 | "same-file", 1416 | "winapi-util", 1417 | ] 1418 | 1419 | [[package]] 1420 | name = "wasi" 1421 | version = "0.11.1+wasi-snapshot-preview1" 1422 | source = "registry+https://github.com/rust-lang/crates.io-index" 1423 | checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" 1424 | 1425 | [[package]] 1426 | name = "wasi" 1427 | version = "0.14.2+wasi-0.2.4" 1428 | source = "registry+https://github.com/rust-lang/crates.io-index" 1429 | checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" 1430 | dependencies = [ 1431 | "wit-bindgen-rt", 1432 | ] 1433 | 1434 | [[package]] 1435 | name = "wasm-bindgen" 1436 | version = "0.2.100" 1437 | source = "registry+https://github.com/rust-lang/crates.io-index" 1438 | checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" 1439 | dependencies = [ 1440 | "cfg-if", 1441 | "once_cell", 1442 | "rustversion", 1443 | "wasm-bindgen-macro", 1444 | ] 1445 | 1446 | [[package]] 1447 | name = "wasm-bindgen-backend" 1448 | version = "0.2.100" 1449 | source = "registry+https://github.com/rust-lang/crates.io-index" 1450 | checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" 1451 | dependencies = [ 1452 | "bumpalo", 1453 | "log", 1454 | "proc-macro2", 1455 | "quote", 1456 | "syn 2.0.106", 1457 | "wasm-bindgen-shared", 1458 | ] 1459 | 1460 | [[package]] 1461 | name = "wasm-bindgen-macro" 1462 | version = "0.2.100" 1463 | source = "registry+https://github.com/rust-lang/crates.io-index" 1464 | checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" 1465 | dependencies = [ 1466 | "quote", 1467 | "wasm-bindgen-macro-support", 1468 | ] 1469 | 1470 | [[package]] 1471 | name = "wasm-bindgen-macro-support" 1472 | version = "0.2.100" 1473 | source = "registry+https://github.com/rust-lang/crates.io-index" 1474 | checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" 1475 | dependencies = [ 1476 | "proc-macro2", 1477 | "quote", 1478 | "syn 2.0.106", 1479 | "wasm-bindgen-backend", 1480 | "wasm-bindgen-shared", 1481 | ] 1482 | 1483 | [[package]] 1484 | name = "wasm-bindgen-shared" 1485 | version = "0.2.100" 1486 | source = "registry+https://github.com/rust-lang/crates.io-index" 1487 | checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" 1488 | dependencies = [ 1489 | "unicode-ident", 1490 | ] 1491 | 1492 | [[package]] 1493 | name = "winapi-util" 1494 | version = "0.1.10" 1495 | source = "registry+https://github.com/rust-lang/crates.io-index" 1496 | checksum = "0978bf7171b3d90bac376700cb56d606feb40f251a475a5d6634613564460b22" 1497 | dependencies = [ 1498 | "windows-sys 0.60.2", 1499 | ] 1500 | 1501 | [[package]] 1502 | name = "windows-link" 1503 | version = "0.1.3" 1504 | source = "registry+https://github.com/rust-lang/crates.io-index" 1505 | checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" 1506 | 1507 | [[package]] 1508 | name = "windows-sys" 1509 | version = "0.52.0" 1510 | source = "registry+https://github.com/rust-lang/crates.io-index" 1511 | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" 1512 | dependencies = [ 1513 | "windows-targets 0.52.6", 1514 | ] 1515 | 1516 | [[package]] 1517 | name = "windows-sys" 1518 | version = "0.60.2" 1519 | source = "registry+https://github.com/rust-lang/crates.io-index" 1520 | checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" 1521 | dependencies = [ 1522 | "windows-targets 0.53.3", 1523 | ] 1524 | 1525 | [[package]] 1526 | name = "windows-targets" 1527 | version = "0.52.6" 1528 | source = "registry+https://github.com/rust-lang/crates.io-index" 1529 | checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" 1530 | dependencies = [ 1531 | "windows_aarch64_gnullvm 0.52.6", 1532 | "windows_aarch64_msvc 0.52.6", 1533 | "windows_i686_gnu 0.52.6", 1534 | "windows_i686_gnullvm 0.52.6", 1535 | "windows_i686_msvc 0.52.6", 1536 | "windows_x86_64_gnu 0.52.6", 1537 | "windows_x86_64_gnullvm 0.52.6", 1538 | "windows_x86_64_msvc 0.52.6", 1539 | ] 1540 | 1541 | [[package]] 1542 | name = "windows-targets" 1543 | version = "0.53.3" 1544 | source = "registry+https://github.com/rust-lang/crates.io-index" 1545 | checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91" 1546 | dependencies = [ 1547 | "windows-link", 1548 | "windows_aarch64_gnullvm 0.53.0", 1549 | "windows_aarch64_msvc 0.53.0", 1550 | "windows_i686_gnu 0.53.0", 1551 | "windows_i686_gnullvm 0.53.0", 1552 | "windows_i686_msvc 0.53.0", 1553 | "windows_x86_64_gnu 0.53.0", 1554 | "windows_x86_64_gnullvm 0.53.0", 1555 | "windows_x86_64_msvc 0.53.0", 1556 | ] 1557 | 1558 | [[package]] 1559 | name = "windows_aarch64_gnullvm" 1560 | version = "0.52.6" 1561 | source = "registry+https://github.com/rust-lang/crates.io-index" 1562 | checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" 1563 | 1564 | [[package]] 1565 | name = "windows_aarch64_gnullvm" 1566 | version = "0.53.0" 1567 | source = "registry+https://github.com/rust-lang/crates.io-index" 1568 | checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" 1569 | 1570 | [[package]] 1571 | name = "windows_aarch64_msvc" 1572 | version = "0.52.6" 1573 | source = "registry+https://github.com/rust-lang/crates.io-index" 1574 | checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" 1575 | 1576 | [[package]] 1577 | name = "windows_aarch64_msvc" 1578 | version = "0.53.0" 1579 | source = "registry+https://github.com/rust-lang/crates.io-index" 1580 | checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" 1581 | 1582 | [[package]] 1583 | name = "windows_i686_gnu" 1584 | version = "0.52.6" 1585 | source = "registry+https://github.com/rust-lang/crates.io-index" 1586 | checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" 1587 | 1588 | [[package]] 1589 | name = "windows_i686_gnu" 1590 | version = "0.53.0" 1591 | source = "registry+https://github.com/rust-lang/crates.io-index" 1592 | checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" 1593 | 1594 | [[package]] 1595 | name = "windows_i686_gnullvm" 1596 | version = "0.52.6" 1597 | source = "registry+https://github.com/rust-lang/crates.io-index" 1598 | checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" 1599 | 1600 | [[package]] 1601 | name = "windows_i686_gnullvm" 1602 | version = "0.53.0" 1603 | source = "registry+https://github.com/rust-lang/crates.io-index" 1604 | checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" 1605 | 1606 | [[package]] 1607 | name = "windows_i686_msvc" 1608 | version = "0.52.6" 1609 | source = "registry+https://github.com/rust-lang/crates.io-index" 1610 | checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" 1611 | 1612 | [[package]] 1613 | name = "windows_i686_msvc" 1614 | version = "0.53.0" 1615 | source = "registry+https://github.com/rust-lang/crates.io-index" 1616 | checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" 1617 | 1618 | [[package]] 1619 | name = "windows_x86_64_gnu" 1620 | version = "0.52.6" 1621 | source = "registry+https://github.com/rust-lang/crates.io-index" 1622 | checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" 1623 | 1624 | [[package]] 1625 | name = "windows_x86_64_gnu" 1626 | version = "0.53.0" 1627 | source = "registry+https://github.com/rust-lang/crates.io-index" 1628 | checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" 1629 | 1630 | [[package]] 1631 | name = "windows_x86_64_gnullvm" 1632 | version = "0.52.6" 1633 | source = "registry+https://github.com/rust-lang/crates.io-index" 1634 | checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" 1635 | 1636 | [[package]] 1637 | name = "windows_x86_64_gnullvm" 1638 | version = "0.53.0" 1639 | source = "registry+https://github.com/rust-lang/crates.io-index" 1640 | checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" 1641 | 1642 | [[package]] 1643 | name = "windows_x86_64_msvc" 1644 | version = "0.52.6" 1645 | source = "registry+https://github.com/rust-lang/crates.io-index" 1646 | checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" 1647 | 1648 | [[package]] 1649 | name = "windows_x86_64_msvc" 1650 | version = "0.53.0" 1651 | source = "registry+https://github.com/rust-lang/crates.io-index" 1652 | checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" 1653 | 1654 | [[package]] 1655 | name = "winnow" 1656 | version = "0.7.13" 1657 | source = "registry+https://github.com/rust-lang/crates.io-index" 1658 | checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf" 1659 | dependencies = [ 1660 | "memchr", 1661 | ] 1662 | 1663 | [[package]] 1664 | name = "wit-bindgen-rt" 1665 | version = "0.39.0" 1666 | source = "registry+https://github.com/rust-lang/crates.io-index" 1667 | checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" 1668 | dependencies = [ 1669 | "bitflags", 1670 | ] 1671 | 1672 | [[package]] 1673 | name = "wyz" 1674 | version = "0.5.1" 1675 | source = "registry+https://github.com/rust-lang/crates.io-index" 1676 | checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" 1677 | dependencies = [ 1678 | "tap", 1679 | ] 1680 | 1681 | [[package]] 1682 | name = "yadf" 1683 | version = "1.3.0" 1684 | dependencies = [ 1685 | "ahash 0.8.12", 1686 | "anyhow", 1687 | "assert_cmd", 1688 | "blake3", 1689 | "byte-unit", 1690 | "clap", 1691 | "clap-verbosity-flag", 1692 | "crossbeam-channel", 1693 | "csv", 1694 | "dirs", 1695 | "dunce", 1696 | "env_logger", 1697 | "globset", 1698 | "highway", 1699 | "human-panic", 1700 | "ignore", 1701 | "log", 1702 | "metrohash", 1703 | "num_cpus", 1704 | "once_cell", 1705 | "predicates", 1706 | "rand 0.9.2", 1707 | "rayon", 1708 | "regex", 1709 | "seahash", 1710 | "serde", 1711 | "serde_json", 1712 | "twox-hash", 1713 | "typed-builder", 1714 | ] 1715 | 1716 | [[package]] 1717 | name = "zerocopy" 1718 | version = "0.8.26" 1719 | source = "registry+https://github.com/rust-lang/crates.io-index" 1720 | checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" 1721 | dependencies = [ 1722 | "zerocopy-derive", 1723 | ] 1724 | 1725 | [[package]] 1726 | name = "zerocopy-derive" 1727 | version = "0.8.26" 1728 | source = "registry+https://github.com/rust-lang/crates.io-index" 1729 | checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" 1730 | dependencies = [ 1731 | "proc-macro2", 1732 | "quote", 1733 | "syn 2.0.106", 1734 | ] 1735 | --------------------------------------------------------------------------------