├── .cargo └── config.toml ├── .github ├── chglog │ ├── RELEASE.tpl.md │ └── release.yml └── workflows │ ├── ci.yml │ └── release.yml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── build.rs ├── examples └── keep_oldest.py ├── scripts ├── bench.sh └── yadf.py ├── src ├── args.rs ├── bag.rs ├── bag │ ├── display.rs │ ├── replicates.rs │ └── serialize.rs ├── ext.rs ├── fs.rs ├── fs │ ├── filter.rs │ └── hash.rs ├── hasher.rs ├── lib.rs ├── main.rs └── path.rs └── tests ├── common └── mod.rs ├── integration.rs ├── particular_cases.rs └── static ├── bar ├── foo ├── qax ├── qix └── qux /.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [build] 2 | rustflags = ["-C", "target-cpu=native"] 3 | -------------------------------------------------------------------------------- /.github/chglog/RELEASE.tpl.md: -------------------------------------------------------------------------------- 1 | {{ range .Versions }} 2 | {{ if .Tag.Previous }}[Diff between versions {{ .Tag.Previous.Name }}...{{ .Tag.Name }}]({{ $.Info.RepositoryURL }}/compare/{{ .Tag.Previous.Name }}...{{ .Tag.Name }}){{ else }}{{ .Tag.Name }}{{ end }} ({{ datetime "2006-01-02" .Tag.Date }}) 3 | 4 | {{ range .CommitGroups -}} 5 | ### {{ .Title }} 6 | 7 | {{ range .Commits -}} 8 | * {{ if .Scope }}**{{ .Scope }}:** {{ end }}{{ .Subject }} 9 | {{ end }} 10 | {{ end -}} 11 | 12 | {{- if .RevertCommits -}} 13 | ### Reverts 14 | 15 | {{ range .RevertCommits -}} 16 | * {{ .Revert.Header }} 17 | {{ end }} 18 | {{ end -}} 19 | 20 | {{- if .NoteGroups -}} 21 | {{ range .NoteGroups -}} 22 | ### {{ .Title }} 23 | 24 | {{ range .Notes }} 25 | {{ .Body }} 26 | {{ end }} 27 | {{ end -}} 28 | {{ end -}} 29 | {{ end -}} 30 | -------------------------------------------------------------------------------- /.github/chglog/release.yml: -------------------------------------------------------------------------------- 1 | style: github 2 | template: RELEASE.tpl.md 3 | info: 4 | repository_url: https://github.com/jRimbault/yadf 5 | options: 6 | commits: 7 | # filters: 8 | # Type: 9 | # - feat 10 | # - fix 11 | # - perf 12 | # - refactor 13 | commit_groups: 14 | title_maps: 15 | feat: Features 16 | fix: Bug Fixes 17 | perf: Performance Improvements 18 | refactor: Code Refactoring 19 | ci: Continuous Integration 20 | doc: Documentation 21 | header: 22 | pattern: "^(\\w*)(?:\\(([\\w\\$\\.\\-\\*\\s]*)\\))?\\:\\s(.*)$" 23 | pattern_maps: 24 | - Type 25 | - Scope 26 | - Subject 27 | notes: 28 | keywords: 29 | - BREAKING CHANGE 30 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | paths-ignore: 6 | - "**.md" 7 | branches-ignore: 8 | - "try/**" 9 | pull_request: 10 | paths-ignore: 11 | - "**.md" 12 | schedule: 13 | # At 13:23 on day-of-month 23. 14 | - cron: "23 13 23 * *" 15 | 16 | jobs: 17 | check: 18 | name: Check build 19 | runs-on: ${{ matrix.os }} 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | os: [ubuntu-latest, windows-latest, macos-latest] 24 | rust: [stable, 1.81.0] 25 | feature_set: ["--no-default-features", "--all-features"] 26 | steps: 27 | - uses: actions/checkout@v4 28 | - uses: actions/cache@v4 29 | with: 30 | path: | 31 | ~/.cargo/registry 32 | ~/.cargo/git 33 | target 34 | key: check-${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} 35 | - uses: actions-rs/toolchain@v1 36 | with: 37 | profile: minimal 38 | toolchain: ${{ matrix.rust }} 39 | override: true 40 | - name: Run build check 41 | uses: actions-rs/cargo@v1 42 | with: 43 | command: check 44 | args: ${{ matrix.feature_set }} 45 | 46 | clippy: 47 | name: Clippy 48 | runs-on: ubuntu-latest 49 | steps: 50 | - uses: actions/checkout@v4 51 | - uses: actions/cache@v4 52 | with: 53 | path: | 54 | ~/.cargo/registry 55 | ~/.cargo/git 56 | target 57 | key: clippy-${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} 58 | - uses: actions-rs/toolchain@v1 59 | with: 60 | profile: minimal 61 | toolchain: stable 62 | override: true 63 | components: clippy 64 | - name: Run cargo clippy 65 | uses: actions-rs/clippy-check@v1 66 | with: 67 | token: ${{ secrets.GITHUB_TOKEN }} 68 | args: --all-features -- -D warnings 69 | 70 | format: 71 | name: Rustfmt 72 | runs-on: ubuntu-latest 73 | steps: 74 | - uses: actions/checkout@v4 75 | - uses: actions-rs/toolchain@v1 76 | with: 77 | profile: minimal 78 | toolchain: stable 79 | override: true 80 | components: rustfmt 81 | - name: Run cargo fmt 82 | uses: actions-rs/cargo@v1 83 | with: 84 | command: fmt 85 | args: --all -- --check 86 | 87 | tests: 88 | name: Tests 89 | needs: check 90 | runs-on: ${{ matrix.os }} 91 | strategy: 92 | fail-fast: false 93 | matrix: 94 | os: [ubuntu-latest, windows-latest, macos-latest] 95 | rust: [stable] 96 | steps: 97 | - uses: actions/checkout@v4 98 | - uses: actions/cache@v4 99 | with: 100 | path: | 101 | ~/.cargo/registry 102 | ~/.cargo/git 103 | target 104 | key: tests-${{ runner.os }}-${{ matrix.rust }}-cargo-${{ hashFiles('**/Cargo.lock') }} 105 | - uses: actions-rs/toolchain@v1 106 | with: 107 | profile: minimal 108 | toolchain: ${{ matrix.rust }} 109 | override: true 110 | - name: Build tests 111 | uses: actions-rs/cargo@v1 112 | with: 113 | command: build 114 | args: --tests 115 | - name: Run tests 116 | uses: actions-rs/cargo@v1 117 | with: 118 | command: test 119 | args: --all-features 120 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | on: 3 | push: 4 | tags: 5 | - "v*" 6 | 7 | jobs: 8 | # Publish yadf to crates.io 9 | publish: 10 | name: Publish crates.io package 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | - uses: actions-rs/toolchain@v1 15 | with: 16 | toolchain: stable 17 | profile: minimal 18 | override: true 19 | - name: Publish 20 | run: cargo publish --token ${{ secrets.CRATES_IO_TOKEN }} 21 | 22 | # Build sources for every OS 23 | github_build: 24 | name: Build release binaries 25 | strategy: 26 | fail-fast: false 27 | matrix: 28 | target: 29 | - x86_64-unknown-linux-gnu 30 | - x86_64-unknown-linux-musl 31 | # - x86_64-apple-darwin 32 | - x86_64-pc-windows-msvc 33 | include: 34 | - target: x86_64-unknown-linux-gnu 35 | os: ubuntu-latest 36 | name: yadf-x86_64-unknown-linux-gnu.tar.gz 37 | - target: x86_64-unknown-linux-musl 38 | os: ubuntu-latest 39 | name: yadf-x86_64-unknown-linux-musl.tar.gz 40 | # - target: x86_64-apple-darwin 41 | # os: macOS-latest 42 | # name: yadf-x86_64-apple-darwin.tar.gz 43 | - target: x86_64-pc-windows-msvc 44 | os: windows-latest 45 | name: yadf-x86_64-pc-windows-msvc.zip 46 | runs-on: ${{ matrix.os }} 47 | steps: 48 | - uses: actions/checkout@v4 49 | - uses: actions-rs/toolchain@v1 50 | with: 51 | toolchain: stable 52 | override: true 53 | profile: minimal 54 | target: ${{ matrix.target }} 55 | 56 | - name: Setup musl tools 57 | if: matrix.target == 'x86_64-unknown-linux-musl' 58 | run: sudo apt install -y musl-tools 59 | 60 | - name: Build 61 | if: matrix.target != 'x86_64-unknown-linux-musl' 62 | run: cargo build --release --target ${{ matrix.target }} 63 | 64 | - name: Build (musl) 65 | if: matrix.target == 'x86_64-unknown-linux-musl' 66 | run: cargo build --release --target ${{ matrix.target }} 67 | 68 | - name: Prepare artifacts [Windows] 69 | if: matrix.os == 'windows-latest' 70 | run: | 71 | cd target/${{ matrix.target }}/release 72 | 7z a ../../../${{ matrix.name }} yadf.exe 73 | cd - 74 | 75 | - name: Prepare artifacts [-nix] 76 | if: matrix.os != 'windows-latest' 77 | run: | 78 | cd target/${{ matrix.target }}/release 79 | tar czvf ../../../${{ matrix.name }} yadf 80 | cd - 81 | 82 | - uses: actions/upload-artifact@v2 83 | with: 84 | name: ${{ matrix.name }} 85 | path: ${{ matrix.name }} 86 | 87 | # Create GitHub release with Rust build targets and release notes 88 | github_release: 89 | name: GitHub Release 90 | needs: github_build 91 | runs-on: ubuntu-latest 92 | steps: 93 | - uses: actions/checkout@v4 94 | with: 95 | fetch-depth: 0 96 | 97 | - uses: actions/setup-go@v2 98 | with: 99 | go-version: "^1.13.1" 100 | 101 | - uses: actions/download-artifact@v2 102 | 103 | - name: Build release notes 104 | run: | 105 | wget https://github.com/git-chglog/git-chglog/releases/download/0.9.1/git-chglog_linux_amd64 -O git-chglog 106 | chmod 744 git-chglog 107 | ./git-chglog -c .github/chglog/release.yml $(git describe --tags) > RELEASE.md 108 | 109 | - name: Compute checksums 110 | run: | 111 | echo >> RELEASE.md 112 | echo "### Checksums" >> RELEASE.md 113 | echo >> RELEASE.md 114 | echo "|File|sha256|" >> RELEASE.md 115 | echo "|---|---|" >> RELEASE.md 116 | for file in yadf-*/yadf-*; do 117 | sha="$(openssl dgst -sha256 -r "$file" | awk '{print $1}')" 118 | file="$(basename "$file")" 119 | echo "|${file}|\`${sha}\`|" >> RELEASE.md 120 | done 121 | 122 | - name: Publish 123 | uses: softprops/action-gh-release@v1 124 | with: 125 | files: yadf-*/yadf-* 126 | body_path: RELEASE.md 127 | prerelease: ${{ endsWith(github.ref, 'pre') }} 128 | env: 129 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 130 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "addr2line" 7 | version = "0.24.2" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" 10 | dependencies = [ 11 | "gimli", 12 | ] 13 | 14 | [[package]] 15 | name = "adler2" 16 | version = "2.0.0" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" 19 | 20 | [[package]] 21 | name = "ahash" 22 | version = "0.7.8" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" 25 | dependencies = [ 26 | "getrandom 0.2.15", 27 | "once_cell", 28 | "version_check", 29 | ] 30 | 31 | [[package]] 32 | name = "ahash" 33 | version = "0.8.11" 34 | source = "registry+https://github.com/rust-lang/crates.io-index" 35 | checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" 36 | dependencies = [ 37 | "cfg-if", 38 | "getrandom 0.2.15", 39 | "once_cell", 40 | "version_check", 41 | "zerocopy 0.7.35", 42 | ] 43 | 44 | [[package]] 45 | name = "aho-corasick" 46 | version = "1.1.3" 47 | source = "registry+https://github.com/rust-lang/crates.io-index" 48 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" 49 | dependencies = [ 50 | "memchr", 51 | ] 52 | 53 | [[package]] 54 | name = "anstream" 55 | version = "0.6.18" 56 | source = "registry+https://github.com/rust-lang/crates.io-index" 57 | checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" 58 | dependencies = [ 59 | "anstyle", 60 | "anstyle-parse", 61 | "anstyle-query", 62 | "anstyle-wincon", 63 | "colorchoice", 64 | "is_terminal_polyfill", 65 | "utf8parse", 66 | ] 67 | 68 | [[package]] 69 | name = "anstyle" 70 | version = "1.0.10" 71 | source = "registry+https://github.com/rust-lang/crates.io-index" 72 | checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" 73 | 74 | [[package]] 75 | name = "anstyle-parse" 76 | version = "0.2.6" 77 | source = "registry+https://github.com/rust-lang/crates.io-index" 78 | checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" 79 | dependencies = [ 80 | "utf8parse", 81 | ] 82 | 83 | [[package]] 84 | name = "anstyle-query" 85 | version = "1.1.2" 86 | source = "registry+https://github.com/rust-lang/crates.io-index" 87 | checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" 88 | dependencies = [ 89 | "windows-sys 0.59.0", 90 | ] 91 | 92 | [[package]] 93 | name = "anstyle-wincon" 94 | version = "3.0.6" 95 | source = "registry+https://github.com/rust-lang/crates.io-index" 96 | checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" 97 | dependencies = [ 98 | "anstyle", 99 | "windows-sys 0.59.0", 100 | ] 101 | 102 | [[package]] 103 | name = "anyhow" 104 | version = "1.0.95" 105 | source = "registry+https://github.com/rust-lang/crates.io-index" 106 | checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" 107 | 108 | [[package]] 109 | name = "arrayref" 110 | version = "0.3.9" 111 | source = "registry+https://github.com/rust-lang/crates.io-index" 112 | checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" 113 | 114 | [[package]] 115 | name = "arrayvec" 116 | version = "0.7.6" 117 | source = "registry+https://github.com/rust-lang/crates.io-index" 118 | checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" 119 | 120 | [[package]] 121 | name = "assert_cmd" 122 | version = "2.0.16" 123 | source = "registry+https://github.com/rust-lang/crates.io-index" 124 | checksum = "dc1835b7f27878de8525dc71410b5a31cdcc5f230aed5ba5df968e09c201b23d" 125 | dependencies = [ 126 | "anstyle", 127 | "bstr", 128 | "doc-comment", 129 | "libc", 130 | "predicates", 131 | "predicates-core", 132 | "predicates-tree", 133 | "wait-timeout", 134 | ] 135 | 136 | [[package]] 137 | name = "autocfg" 138 | version = "1.4.0" 139 | source = "registry+https://github.com/rust-lang/crates.io-index" 140 | checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" 141 | 142 | [[package]] 143 | name = "backtrace" 144 | version = "0.3.74" 145 | source = "registry+https://github.com/rust-lang/crates.io-index" 146 | checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" 147 | dependencies = [ 148 | "addr2line", 149 | "cfg-if", 150 | "libc", 151 | "miniz_oxide", 152 | "object", 153 | "rustc-demangle", 154 | "windows-targets", 155 | ] 156 | 157 | [[package]] 158 | name = "bitflags" 159 | version = "2.6.0" 160 | source = "registry+https://github.com/rust-lang/crates.io-index" 161 | checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" 162 | 163 | [[package]] 164 | name = "bitvec" 165 | version = "1.0.1" 166 | source = "registry+https://github.com/rust-lang/crates.io-index" 167 | checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" 168 | dependencies = [ 169 | "funty", 170 | "radium", 171 | "tap", 172 | "wyz", 173 | ] 174 | 175 | [[package]] 176 | name = "blake3" 177 | version = "1.5.5" 178 | source = "registry+https://github.com/rust-lang/crates.io-index" 179 | checksum = "b8ee0c1824c4dea5b5f81736aff91bae041d2c07ee1192bec91054e10e3e601e" 180 | dependencies = [ 181 | "arrayref", 182 | "arrayvec", 183 | "cc", 184 | "cfg-if", 185 | "constant_time_eq", 186 | ] 187 | 188 | [[package]] 189 | name = "borsh" 190 | version = "1.5.1" 191 | source = "registry+https://github.com/rust-lang/crates.io-index" 192 | checksum = "a6362ed55def622cddc70a4746a68554d7b687713770de539e59a739b249f8ed" 193 | dependencies = [ 194 | "borsh-derive", 195 | "cfg_aliases", 196 | ] 197 | 198 | [[package]] 199 | name = "borsh-derive" 200 | version = "1.5.1" 201 | source = "registry+https://github.com/rust-lang/crates.io-index" 202 | checksum = "c3ef8005764f53cd4dca619f5bf64cafd4664dada50ece25e4d81de54c80cc0b" 203 | dependencies = [ 204 | "once_cell", 205 | "proc-macro-crate", 206 | "proc-macro2", 207 | "quote", 208 | "syn 2.0.96", 209 | "syn_derive", 210 | ] 211 | 212 | [[package]] 213 | name = "bstr" 214 | version = "1.10.0" 215 | source = "registry+https://github.com/rust-lang/crates.io-index" 216 | checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c" 217 | dependencies = [ 218 | "memchr", 219 | "regex-automata", 220 | "serde", 221 | ] 222 | 223 | [[package]] 224 | name = "byte-unit" 225 | version = "5.1.6" 226 | source = "registry+https://github.com/rust-lang/crates.io-index" 227 | checksum = "e1cd29c3c585209b0cbc7309bfe3ed7efd8c84c21b7af29c8bfae908f8777174" 228 | dependencies = [ 229 | "rust_decimal", 230 | "serde", 231 | "utf8-width", 232 | ] 233 | 234 | [[package]] 235 | name = "bytecheck" 236 | version = "0.6.12" 237 | source = "registry+https://github.com/rust-lang/crates.io-index" 238 | checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" 239 | dependencies = [ 240 | "bytecheck_derive", 241 | "ptr_meta", 242 | "simdutf8", 243 | ] 244 | 245 | [[package]] 246 | name = "bytecheck_derive" 247 | version = "0.6.12" 248 | source = "registry+https://github.com/rust-lang/crates.io-index" 249 | checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" 250 | dependencies = [ 251 | "proc-macro2", 252 | "quote", 253 | "syn 1.0.109", 254 | ] 255 | 256 | [[package]] 257 | name = "byteorder" 258 | version = "1.5.0" 259 | source = "registry+https://github.com/rust-lang/crates.io-index" 260 | checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" 261 | 262 | [[package]] 263 | name = "bytes" 264 | version = "1.8.0" 265 | source = "registry+https://github.com/rust-lang/crates.io-index" 266 | checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da" 267 | 268 | [[package]] 269 | name = "cc" 270 | version = "1.2.10" 271 | source = "registry+https://github.com/rust-lang/crates.io-index" 272 | checksum = "13208fcbb66eaeffe09b99fffbe1af420f00a7b35aa99ad683dfc1aa76145229" 273 | dependencies = [ 274 | "shlex", 275 | ] 276 | 277 | [[package]] 278 | name = "cfg-if" 279 | version = "1.0.0" 280 | source = "registry+https://github.com/rust-lang/crates.io-index" 281 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 282 | 283 | [[package]] 284 | name = "cfg_aliases" 285 | version = "0.2.1" 286 | source = "registry+https://github.com/rust-lang/crates.io-index" 287 | checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" 288 | 289 | [[package]] 290 | name = "clap" 291 | version = "4.5.27" 292 | source = "registry+https://github.com/rust-lang/crates.io-index" 293 | checksum = "769b0145982b4b48713e01ec42d61614425f27b7058bda7180a3a41f30104796" 294 | dependencies = [ 295 | "clap_builder", 296 | "clap_derive", 297 | ] 298 | 299 | [[package]] 300 | name = "clap-verbosity-flag" 301 | version = "3.0.2" 302 | source = "registry+https://github.com/rust-lang/crates.io-index" 303 | checksum = "2678fade3b77aa3a8ff3aae87e9c008d3fb00473a41c71fbf74e91c8c7b37e84" 304 | dependencies = [ 305 | "clap", 306 | "log", 307 | ] 308 | 309 | [[package]] 310 | name = "clap_builder" 311 | version = "4.5.27" 312 | source = "registry+https://github.com/rust-lang/crates.io-index" 313 | checksum = "1b26884eb4b57140e4d2d93652abfa49498b938b3c9179f9fc487b0acc3edad7" 314 | dependencies = [ 315 | "anstream", 316 | "anstyle", 317 | "clap_lex", 318 | "strsim", 319 | ] 320 | 321 | [[package]] 322 | name = "clap_derive" 323 | version = "4.5.24" 324 | source = "registry+https://github.com/rust-lang/crates.io-index" 325 | checksum = "54b755194d6389280185988721fffba69495eed5ee9feeee9a599b53db80318c" 326 | dependencies = [ 327 | "heck", 328 | "proc-macro2", 329 | "quote", 330 | "syn 2.0.96", 331 | ] 332 | 333 | [[package]] 334 | name = "clap_lex" 335 | version = "0.7.4" 336 | source = "registry+https://github.com/rust-lang/crates.io-index" 337 | checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" 338 | 339 | [[package]] 340 | name = "colorchoice" 341 | version = "1.0.3" 342 | source = "registry+https://github.com/rust-lang/crates.io-index" 343 | checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" 344 | 345 | [[package]] 346 | name = "constant_time_eq" 347 | version = "0.3.1" 348 | source = "registry+https://github.com/rust-lang/crates.io-index" 349 | checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" 350 | 351 | [[package]] 352 | name = "crossbeam-channel" 353 | version = "0.5.14" 354 | source = "registry+https://github.com/rust-lang/crates.io-index" 355 | checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471" 356 | dependencies = [ 357 | "crossbeam-utils", 358 | ] 359 | 360 | [[package]] 361 | name = "crossbeam-deque" 362 | version = "0.8.5" 363 | source = "registry+https://github.com/rust-lang/crates.io-index" 364 | checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" 365 | dependencies = [ 366 | "crossbeam-epoch", 367 | "crossbeam-utils", 368 | ] 369 | 370 | [[package]] 371 | name = "crossbeam-epoch" 372 | version = "0.9.18" 373 | source = "registry+https://github.com/rust-lang/crates.io-index" 374 | checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" 375 | dependencies = [ 376 | "crossbeam-utils", 377 | ] 378 | 379 | [[package]] 380 | name = "crossbeam-utils" 381 | version = "0.8.20" 382 | source = "registry+https://github.com/rust-lang/crates.io-index" 383 | checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" 384 | 385 | [[package]] 386 | name = "csv" 387 | version = "1.3.1" 388 | source = "registry+https://github.com/rust-lang/crates.io-index" 389 | checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" 390 | dependencies = [ 391 | "csv-core", 392 | "itoa", 393 | "ryu", 394 | "serde", 395 | ] 396 | 397 | [[package]] 398 | name = "csv-core" 399 | version = "0.1.11" 400 | source = "registry+https://github.com/rust-lang/crates.io-index" 401 | checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" 402 | dependencies = [ 403 | "memchr", 404 | ] 405 | 406 | [[package]] 407 | name = "difflib" 408 | version = "0.4.0" 409 | source = "registry+https://github.com/rust-lang/crates.io-index" 410 | checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" 411 | 412 | [[package]] 413 | name = "dirs" 414 | version = "6.0.0" 415 | source = "registry+https://github.com/rust-lang/crates.io-index" 416 | checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" 417 | dependencies = [ 418 | "dirs-sys", 419 | ] 420 | 421 | [[package]] 422 | name = "dirs-sys" 423 | version = "0.5.0" 424 | source = "registry+https://github.com/rust-lang/crates.io-index" 425 | checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" 426 | dependencies = [ 427 | "libc", 428 | "option-ext", 429 | "redox_users", 430 | "windows-sys 0.59.0", 431 | ] 432 | 433 | [[package]] 434 | name = "doc-comment" 435 | version = "0.3.3" 436 | source = "registry+https://github.com/rust-lang/crates.io-index" 437 | checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" 438 | 439 | [[package]] 440 | name = "dunce" 441 | version = "1.0.5" 442 | source = "registry+https://github.com/rust-lang/crates.io-index" 443 | checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" 444 | 445 | [[package]] 446 | name = "either" 447 | version = "1.13.0" 448 | source = "registry+https://github.com/rust-lang/crates.io-index" 449 | checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" 450 | 451 | [[package]] 452 | name = "env_filter" 453 | version = "0.1.2" 454 | source = "registry+https://github.com/rust-lang/crates.io-index" 455 | checksum = "4f2c92ceda6ceec50f43169f9ee8424fe2db276791afde7b2cd8bc084cb376ab" 456 | dependencies = [ 457 | "log", 458 | "regex", 459 | ] 460 | 461 | [[package]] 462 | name = "env_logger" 463 | version = "0.11.6" 464 | source = "registry+https://github.com/rust-lang/crates.io-index" 465 | checksum = "dcaee3d8e3cfc3fd92428d477bc97fc29ec8716d180c0d74c643bb26166660e0" 466 | dependencies = [ 467 | "anstream", 468 | "anstyle", 469 | "env_filter", 470 | "humantime", 471 | "log", 472 | ] 473 | 474 | [[package]] 475 | name = "equivalent" 476 | version = "1.0.1" 477 | source = "registry+https://github.com/rust-lang/crates.io-index" 478 | checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" 479 | 480 | [[package]] 481 | name = "float-cmp" 482 | version = "0.10.0" 483 | source = "registry+https://github.com/rust-lang/crates.io-index" 484 | checksum = "b09cf3155332e944990140d967ff5eceb70df778b34f77d8075db46e4704e6d8" 485 | dependencies = [ 486 | "num-traits", 487 | ] 488 | 489 | [[package]] 490 | name = "funty" 491 | version = "2.0.0" 492 | source = "registry+https://github.com/rust-lang/crates.io-index" 493 | checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" 494 | 495 | [[package]] 496 | name = "getrandom" 497 | version = "0.2.15" 498 | source = "registry+https://github.com/rust-lang/crates.io-index" 499 | checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" 500 | dependencies = [ 501 | "cfg-if", 502 | "libc", 503 | "wasi 0.11.0+wasi-snapshot-preview1", 504 | ] 505 | 506 | [[package]] 507 | name = "getrandom" 508 | version = "0.3.1" 509 | source = "registry+https://github.com/rust-lang/crates.io-index" 510 | checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" 511 | dependencies = [ 512 | "cfg-if", 513 | "libc", 514 | "wasi 0.13.3+wasi-0.2.2", 515 | "windows-targets", 516 | ] 517 | 518 | [[package]] 519 | name = "gimli" 520 | version = "0.31.1" 521 | source = "registry+https://github.com/rust-lang/crates.io-index" 522 | checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" 523 | 524 | [[package]] 525 | name = "globset" 526 | version = "0.4.15" 527 | source = "registry+https://github.com/rust-lang/crates.io-index" 528 | checksum = "15f1ce686646e7f1e19bf7d5533fe443a45dbfb990e00629110797578b42fb19" 529 | dependencies = [ 530 | "aho-corasick", 531 | "bstr", 532 | "log", 533 | "regex-automata", 534 | "regex-syntax", 535 | ] 536 | 537 | [[package]] 538 | name = "hashbrown" 539 | version = "0.12.3" 540 | source = "registry+https://github.com/rust-lang/crates.io-index" 541 | checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" 542 | dependencies = [ 543 | "ahash 0.7.8", 544 | ] 545 | 546 | [[package]] 547 | name = "hashbrown" 548 | version = "0.15.0" 549 | source = "registry+https://github.com/rust-lang/crates.io-index" 550 | checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" 551 | 552 | [[package]] 553 | name = "heck" 554 | version = "0.5.0" 555 | source = "registry+https://github.com/rust-lang/crates.io-index" 556 | checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" 557 | 558 | [[package]] 559 | name = "hermit-abi" 560 | version = "0.3.9" 561 | source = "registry+https://github.com/rust-lang/crates.io-index" 562 | checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" 563 | 564 | [[package]] 565 | name = "highway" 566 | version = "1.3.0" 567 | source = "registry+https://github.com/rust-lang/crates.io-index" 568 | checksum = "9040319a6910b901d5d49cbada4a99db52836a1b63228a05f7e2b7f8feef89b1" 569 | 570 | [[package]] 571 | name = "human-panic" 572 | version = "2.0.2" 573 | source = "registry+https://github.com/rust-lang/crates.io-index" 574 | checksum = "80b84a66a325082740043a6c28bbea400c129eac0d3a27673a1de971e44bf1f7" 575 | dependencies = [ 576 | "anstream", 577 | "anstyle", 578 | "backtrace", 579 | "os_info", 580 | "serde", 581 | "serde_derive", 582 | "toml", 583 | "uuid", 584 | ] 585 | 586 | [[package]] 587 | name = "humantime" 588 | version = "2.1.0" 589 | source = "registry+https://github.com/rust-lang/crates.io-index" 590 | checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" 591 | 592 | [[package]] 593 | name = "ignore" 594 | version = "0.4.23" 595 | source = "registry+https://github.com/rust-lang/crates.io-index" 596 | checksum = "6d89fd380afde86567dfba715db065673989d6253f42b88179abd3eae47bda4b" 597 | dependencies = [ 598 | "crossbeam-deque", 599 | "globset", 600 | "log", 601 | "memchr", 602 | "regex-automata", 603 | "same-file", 604 | "walkdir", 605 | "winapi-util", 606 | ] 607 | 608 | [[package]] 609 | name = "indexmap" 610 | version = "2.6.0" 611 | source = "registry+https://github.com/rust-lang/crates.io-index" 612 | checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" 613 | dependencies = [ 614 | "equivalent", 615 | "hashbrown 0.15.0", 616 | ] 617 | 618 | [[package]] 619 | name = "is_terminal_polyfill" 620 | version = "1.70.1" 621 | source = "registry+https://github.com/rust-lang/crates.io-index" 622 | checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" 623 | 624 | [[package]] 625 | name = "itoa" 626 | version = "1.0.14" 627 | source = "registry+https://github.com/rust-lang/crates.io-index" 628 | checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" 629 | 630 | [[package]] 631 | name = "libc" 632 | version = "0.2.169" 633 | source = "registry+https://github.com/rust-lang/crates.io-index" 634 | checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" 635 | 636 | [[package]] 637 | name = "libredox" 638 | version = "0.1.3" 639 | source = "registry+https://github.com/rust-lang/crates.io-index" 640 | checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" 641 | dependencies = [ 642 | "bitflags", 643 | "libc", 644 | ] 645 | 646 | [[package]] 647 | name = "log" 648 | version = "0.4.25" 649 | source = "registry+https://github.com/rust-lang/crates.io-index" 650 | checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" 651 | 652 | [[package]] 653 | name = "memchr" 654 | version = "2.7.4" 655 | source = "registry+https://github.com/rust-lang/crates.io-index" 656 | checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" 657 | 658 | [[package]] 659 | name = "metrohash" 660 | version = "1.0.7" 661 | source = "registry+https://github.com/rust-lang/crates.io-index" 662 | checksum = "a84011bfadc339f60fbcc38181da8a0a91cd16375394dd52edf9da80deacd8c5" 663 | 664 | [[package]] 665 | name = "miniz_oxide" 666 | version = "0.8.0" 667 | source = "registry+https://github.com/rust-lang/crates.io-index" 668 | checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" 669 | dependencies = [ 670 | "adler2", 671 | ] 672 | 673 | [[package]] 674 | name = "normalize-line-endings" 675 | version = "0.3.0" 676 | source = "registry+https://github.com/rust-lang/crates.io-index" 677 | checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" 678 | 679 | [[package]] 680 | name = "num-traits" 681 | version = "0.2.19" 682 | source = "registry+https://github.com/rust-lang/crates.io-index" 683 | checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" 684 | dependencies = [ 685 | "autocfg", 686 | ] 687 | 688 | [[package]] 689 | name = "num_cpus" 690 | version = "1.16.0" 691 | source = "registry+https://github.com/rust-lang/crates.io-index" 692 | checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" 693 | dependencies = [ 694 | "hermit-abi", 695 | "libc", 696 | ] 697 | 698 | [[package]] 699 | name = "object" 700 | version = "0.36.5" 701 | source = "registry+https://github.com/rust-lang/crates.io-index" 702 | checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e" 703 | dependencies = [ 704 | "memchr", 705 | ] 706 | 707 | [[package]] 708 | name = "once_cell" 709 | version = "1.20.2" 710 | source = "registry+https://github.com/rust-lang/crates.io-index" 711 | checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" 712 | 713 | [[package]] 714 | name = "option-ext" 715 | version = "0.2.0" 716 | source = "registry+https://github.com/rust-lang/crates.io-index" 717 | checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" 718 | 719 | [[package]] 720 | name = "os_info" 721 | version = "3.8.2" 722 | source = "registry+https://github.com/rust-lang/crates.io-index" 723 | checksum = "ae99c7fa6dd38c7cafe1ec085e804f8f555a2f8659b0dbe03f1f9963a9b51092" 724 | dependencies = [ 725 | "log", 726 | "serde", 727 | "windows-sys 0.52.0", 728 | ] 729 | 730 | [[package]] 731 | name = "ppv-lite86" 732 | version = "0.2.20" 733 | source = "registry+https://github.com/rust-lang/crates.io-index" 734 | checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" 735 | dependencies = [ 736 | "zerocopy 0.7.35", 737 | ] 738 | 739 | [[package]] 740 | name = "predicates" 741 | version = "3.1.3" 742 | source = "registry+https://github.com/rust-lang/crates.io-index" 743 | checksum = "a5d19ee57562043d37e82899fade9a22ebab7be9cef5026b07fda9cdd4293573" 744 | dependencies = [ 745 | "anstyle", 746 | "difflib", 747 | "float-cmp", 748 | "normalize-line-endings", 749 | "predicates-core", 750 | "regex", 751 | ] 752 | 753 | [[package]] 754 | name = "predicates-core" 755 | version = "1.0.8" 756 | source = "registry+https://github.com/rust-lang/crates.io-index" 757 | checksum = "ae8177bee8e75d6846599c6b9ff679ed51e882816914eec639944d7c9aa11931" 758 | 759 | [[package]] 760 | name = "predicates-tree" 761 | version = "1.0.11" 762 | source = "registry+https://github.com/rust-lang/crates.io-index" 763 | checksum = "41b740d195ed3166cd147c8047ec98db0e22ec019eb8eeb76d343b795304fb13" 764 | dependencies = [ 765 | "predicates-core", 766 | "termtree", 767 | ] 768 | 769 | [[package]] 770 | name = "proc-macro-crate" 771 | version = "3.2.0" 772 | source = "registry+https://github.com/rust-lang/crates.io-index" 773 | checksum = "8ecf48c7ca261d60b74ab1a7b20da18bede46776b2e55535cb958eb595c5fa7b" 774 | dependencies = [ 775 | "toml_edit", 776 | ] 777 | 778 | [[package]] 779 | name = "proc-macro-error" 780 | version = "1.0.4" 781 | source = "registry+https://github.com/rust-lang/crates.io-index" 782 | checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" 783 | dependencies = [ 784 | "proc-macro-error-attr", 785 | "proc-macro2", 786 | "quote", 787 | "version_check", 788 | ] 789 | 790 | [[package]] 791 | name = "proc-macro-error-attr" 792 | version = "1.0.4" 793 | source = "registry+https://github.com/rust-lang/crates.io-index" 794 | checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" 795 | dependencies = [ 796 | "proc-macro2", 797 | "quote", 798 | "version_check", 799 | ] 800 | 801 | [[package]] 802 | name = "proc-macro2" 803 | version = "1.0.93" 804 | source = "registry+https://github.com/rust-lang/crates.io-index" 805 | checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" 806 | dependencies = [ 807 | "unicode-ident", 808 | ] 809 | 810 | [[package]] 811 | name = "ptr_meta" 812 | version = "0.1.4" 813 | source = "registry+https://github.com/rust-lang/crates.io-index" 814 | checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" 815 | dependencies = [ 816 | "ptr_meta_derive", 817 | ] 818 | 819 | [[package]] 820 | name = "ptr_meta_derive" 821 | version = "0.1.4" 822 | source = "registry+https://github.com/rust-lang/crates.io-index" 823 | checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" 824 | dependencies = [ 825 | "proc-macro2", 826 | "quote", 827 | "syn 1.0.109", 828 | ] 829 | 830 | [[package]] 831 | name = "quote" 832 | version = "1.0.38" 833 | source = "registry+https://github.com/rust-lang/crates.io-index" 834 | checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" 835 | dependencies = [ 836 | "proc-macro2", 837 | ] 838 | 839 | [[package]] 840 | name = "radium" 841 | version = "0.7.0" 842 | source = "registry+https://github.com/rust-lang/crates.io-index" 843 | checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" 844 | 845 | [[package]] 846 | name = "rand" 847 | version = "0.8.5" 848 | source = "registry+https://github.com/rust-lang/crates.io-index" 849 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 850 | dependencies = [ 851 | "libc", 852 | "rand_chacha 0.3.1", 853 | "rand_core 0.6.4", 854 | ] 855 | 856 | [[package]] 857 | name = "rand" 858 | version = "0.9.0" 859 | source = "registry+https://github.com/rust-lang/crates.io-index" 860 | checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94" 861 | dependencies = [ 862 | "rand_chacha 0.9.0", 863 | "rand_core 0.9.0", 864 | "zerocopy 0.8.14", 865 | ] 866 | 867 | [[package]] 868 | name = "rand_chacha" 869 | version = "0.3.1" 870 | source = "registry+https://github.com/rust-lang/crates.io-index" 871 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 872 | dependencies = [ 873 | "ppv-lite86", 874 | "rand_core 0.6.4", 875 | ] 876 | 877 | [[package]] 878 | name = "rand_chacha" 879 | version = "0.9.0" 880 | source = "registry+https://github.com/rust-lang/crates.io-index" 881 | checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" 882 | dependencies = [ 883 | "ppv-lite86", 884 | "rand_core 0.9.0", 885 | ] 886 | 887 | [[package]] 888 | name = "rand_core" 889 | version = "0.6.4" 890 | source = "registry+https://github.com/rust-lang/crates.io-index" 891 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 892 | dependencies = [ 893 | "getrandom 0.2.15", 894 | ] 895 | 896 | [[package]] 897 | name = "rand_core" 898 | version = "0.9.0" 899 | source = "registry+https://github.com/rust-lang/crates.io-index" 900 | checksum = "b08f3c9802962f7e1b25113931d94f43ed9725bebc59db9d0c3e9a23b67e15ff" 901 | dependencies = [ 902 | "getrandom 0.3.1", 903 | "zerocopy 0.8.14", 904 | ] 905 | 906 | [[package]] 907 | name = "rayon" 908 | version = "1.10.0" 909 | source = "registry+https://github.com/rust-lang/crates.io-index" 910 | checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" 911 | dependencies = [ 912 | "either", 913 | "rayon-core", 914 | ] 915 | 916 | [[package]] 917 | name = "rayon-core" 918 | version = "1.12.1" 919 | source = "registry+https://github.com/rust-lang/crates.io-index" 920 | checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" 921 | dependencies = [ 922 | "crossbeam-deque", 923 | "crossbeam-utils", 924 | ] 925 | 926 | [[package]] 927 | name = "redox_users" 928 | version = "0.5.0" 929 | source = "registry+https://github.com/rust-lang/crates.io-index" 930 | checksum = "dd6f9d3d47bdd2ad6945c5015a226ec6155d0bcdfd8f7cd29f86b71f8de99d2b" 931 | dependencies = [ 932 | "getrandom 0.2.15", 933 | "libredox", 934 | "thiserror", 935 | ] 936 | 937 | [[package]] 938 | name = "regex" 939 | version = "1.11.1" 940 | source = "registry+https://github.com/rust-lang/crates.io-index" 941 | checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" 942 | dependencies = [ 943 | "aho-corasick", 944 | "memchr", 945 | "regex-automata", 946 | "regex-syntax", 947 | ] 948 | 949 | [[package]] 950 | name = "regex-automata" 951 | version = "0.4.9" 952 | source = "registry+https://github.com/rust-lang/crates.io-index" 953 | checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" 954 | dependencies = [ 955 | "aho-corasick", 956 | "memchr", 957 | "regex-syntax", 958 | ] 959 | 960 | [[package]] 961 | name = "regex-syntax" 962 | version = "0.8.5" 963 | source = "registry+https://github.com/rust-lang/crates.io-index" 964 | checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" 965 | 966 | [[package]] 967 | name = "rend" 968 | version = "0.4.2" 969 | source = "registry+https://github.com/rust-lang/crates.io-index" 970 | checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" 971 | dependencies = [ 972 | "bytecheck", 973 | ] 974 | 975 | [[package]] 976 | name = "rkyv" 977 | version = "0.7.45" 978 | source = "registry+https://github.com/rust-lang/crates.io-index" 979 | checksum = "9008cd6385b9e161d8229e1f6549dd23c3d022f132a2ea37ac3a10ac4935779b" 980 | dependencies = [ 981 | "bitvec", 982 | "bytecheck", 983 | "bytes", 984 | "hashbrown 0.12.3", 985 | "ptr_meta", 986 | "rend", 987 | "rkyv_derive", 988 | "seahash", 989 | "tinyvec", 990 | "uuid", 991 | ] 992 | 993 | [[package]] 994 | name = "rkyv_derive" 995 | version = "0.7.45" 996 | source = "registry+https://github.com/rust-lang/crates.io-index" 997 | checksum = "503d1d27590a2b0a3a4ca4c94755aa2875657196ecbf401a42eff41d7de532c0" 998 | dependencies = [ 999 | "proc-macro2", 1000 | "quote", 1001 | "syn 1.0.109", 1002 | ] 1003 | 1004 | [[package]] 1005 | name = "rust_decimal" 1006 | version = "1.36.0" 1007 | source = "registry+https://github.com/rust-lang/crates.io-index" 1008 | checksum = "b082d80e3e3cc52b2ed634388d436fe1f4de6af5786cc2de9ba9737527bdf555" 1009 | dependencies = [ 1010 | "arrayvec", 1011 | "borsh", 1012 | "bytes", 1013 | "num-traits", 1014 | "rand 0.8.5", 1015 | "rkyv", 1016 | "serde", 1017 | "serde_json", 1018 | ] 1019 | 1020 | [[package]] 1021 | name = "rustc-demangle" 1022 | version = "0.1.24" 1023 | source = "registry+https://github.com/rust-lang/crates.io-index" 1024 | checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" 1025 | 1026 | [[package]] 1027 | name = "ryu" 1028 | version = "1.0.18" 1029 | source = "registry+https://github.com/rust-lang/crates.io-index" 1030 | checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" 1031 | 1032 | [[package]] 1033 | name = "same-file" 1034 | version = "1.0.6" 1035 | source = "registry+https://github.com/rust-lang/crates.io-index" 1036 | checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" 1037 | dependencies = [ 1038 | "winapi-util", 1039 | ] 1040 | 1041 | [[package]] 1042 | name = "seahash" 1043 | version = "4.1.0" 1044 | source = "registry+https://github.com/rust-lang/crates.io-index" 1045 | checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" 1046 | 1047 | [[package]] 1048 | name = "serde" 1049 | version = "1.0.217" 1050 | source = "registry+https://github.com/rust-lang/crates.io-index" 1051 | checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" 1052 | dependencies = [ 1053 | "serde_derive", 1054 | ] 1055 | 1056 | [[package]] 1057 | name = "serde_derive" 1058 | version = "1.0.217" 1059 | source = "registry+https://github.com/rust-lang/crates.io-index" 1060 | checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" 1061 | dependencies = [ 1062 | "proc-macro2", 1063 | "quote", 1064 | "syn 2.0.96", 1065 | ] 1066 | 1067 | [[package]] 1068 | name = "serde_json" 1069 | version = "1.0.138" 1070 | source = "registry+https://github.com/rust-lang/crates.io-index" 1071 | checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949" 1072 | dependencies = [ 1073 | "itoa", 1074 | "memchr", 1075 | "ryu", 1076 | "serde", 1077 | ] 1078 | 1079 | [[package]] 1080 | name = "serde_spanned" 1081 | version = "0.6.8" 1082 | source = "registry+https://github.com/rust-lang/crates.io-index" 1083 | checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1" 1084 | dependencies = [ 1085 | "serde", 1086 | ] 1087 | 1088 | [[package]] 1089 | name = "shlex" 1090 | version = "1.3.0" 1091 | source = "registry+https://github.com/rust-lang/crates.io-index" 1092 | checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" 1093 | 1094 | [[package]] 1095 | name = "simdutf8" 1096 | version = "0.1.5" 1097 | source = "registry+https://github.com/rust-lang/crates.io-index" 1098 | checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" 1099 | 1100 | [[package]] 1101 | name = "strsim" 1102 | version = "0.11.1" 1103 | source = "registry+https://github.com/rust-lang/crates.io-index" 1104 | checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" 1105 | 1106 | [[package]] 1107 | name = "syn" 1108 | version = "1.0.109" 1109 | source = "registry+https://github.com/rust-lang/crates.io-index" 1110 | checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" 1111 | dependencies = [ 1112 | "proc-macro2", 1113 | "quote", 1114 | "unicode-ident", 1115 | ] 1116 | 1117 | [[package]] 1118 | name = "syn" 1119 | version = "2.0.96" 1120 | source = "registry+https://github.com/rust-lang/crates.io-index" 1121 | checksum = "d5d0adab1ae378d7f53bdebc67a39f1f151407ef230f0ce2883572f5d8985c80" 1122 | dependencies = [ 1123 | "proc-macro2", 1124 | "quote", 1125 | "unicode-ident", 1126 | ] 1127 | 1128 | [[package]] 1129 | name = "syn_derive" 1130 | version = "0.1.8" 1131 | source = "registry+https://github.com/rust-lang/crates.io-index" 1132 | checksum = "1329189c02ff984e9736652b1631330da25eaa6bc639089ed4915d25446cbe7b" 1133 | dependencies = [ 1134 | "proc-macro-error", 1135 | "proc-macro2", 1136 | "quote", 1137 | "syn 2.0.96", 1138 | ] 1139 | 1140 | [[package]] 1141 | name = "tap" 1142 | version = "1.0.1" 1143 | source = "registry+https://github.com/rust-lang/crates.io-index" 1144 | checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" 1145 | 1146 | [[package]] 1147 | name = "termtree" 1148 | version = "0.4.1" 1149 | source = "registry+https://github.com/rust-lang/crates.io-index" 1150 | checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" 1151 | 1152 | [[package]] 1153 | name = "thiserror" 1154 | version = "2.0.11" 1155 | source = "registry+https://github.com/rust-lang/crates.io-index" 1156 | checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc" 1157 | dependencies = [ 1158 | "thiserror-impl", 1159 | ] 1160 | 1161 | [[package]] 1162 | name = "thiserror-impl" 1163 | version = "2.0.11" 1164 | source = "registry+https://github.com/rust-lang/crates.io-index" 1165 | checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" 1166 | dependencies = [ 1167 | "proc-macro2", 1168 | "quote", 1169 | "syn 2.0.96", 1170 | ] 1171 | 1172 | [[package]] 1173 | name = "tinyvec" 1174 | version = "1.8.0" 1175 | source = "registry+https://github.com/rust-lang/crates.io-index" 1176 | checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938" 1177 | dependencies = [ 1178 | "tinyvec_macros", 1179 | ] 1180 | 1181 | [[package]] 1182 | name = "tinyvec_macros" 1183 | version = "0.1.1" 1184 | source = "registry+https://github.com/rust-lang/crates.io-index" 1185 | checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" 1186 | 1187 | [[package]] 1188 | name = "toml" 1189 | version = "0.8.19" 1190 | source = "registry+https://github.com/rust-lang/crates.io-index" 1191 | checksum = "a1ed1f98e3fdc28d6d910e6737ae6ab1a93bf1985935a1193e68f93eeb68d24e" 1192 | dependencies = [ 1193 | "serde", 1194 | "serde_spanned", 1195 | "toml_datetime", 1196 | "toml_edit", 1197 | ] 1198 | 1199 | [[package]] 1200 | name = "toml_datetime" 1201 | version = "0.6.8" 1202 | source = "registry+https://github.com/rust-lang/crates.io-index" 1203 | checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" 1204 | dependencies = [ 1205 | "serde", 1206 | ] 1207 | 1208 | [[package]] 1209 | name = "toml_edit" 1210 | version = "0.22.22" 1211 | source = "registry+https://github.com/rust-lang/crates.io-index" 1212 | checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" 1213 | dependencies = [ 1214 | "indexmap", 1215 | "serde", 1216 | "serde_spanned", 1217 | "toml_datetime", 1218 | "winnow", 1219 | ] 1220 | 1221 | [[package]] 1222 | name = "twox-hash" 1223 | version = "2.1.0" 1224 | source = "registry+https://github.com/rust-lang/crates.io-index" 1225 | checksum = "e7b17f197b3050ba473acf9181f7b1d3b66d1cf7356c6cc57886662276e65908" 1226 | dependencies = [ 1227 | "rand 0.8.5", 1228 | ] 1229 | 1230 | [[package]] 1231 | name = "typed-builder" 1232 | version = "0.20.0" 1233 | source = "registry+https://github.com/rust-lang/crates.io-index" 1234 | checksum = "7e14ed59dc8b7b26cacb2a92bad2e8b1f098806063898ab42a3bd121d7d45e75" 1235 | dependencies = [ 1236 | "typed-builder-macro", 1237 | ] 1238 | 1239 | [[package]] 1240 | name = "typed-builder-macro" 1241 | version = "0.20.0" 1242 | source = "registry+https://github.com/rust-lang/crates.io-index" 1243 | checksum = "560b82d656506509d43abe30e0ba64c56b1953ab3d4fe7ba5902747a7a3cedd5" 1244 | dependencies = [ 1245 | "proc-macro2", 1246 | "quote", 1247 | "syn 2.0.96", 1248 | ] 1249 | 1250 | [[package]] 1251 | name = "unicode-ident" 1252 | version = "1.0.14" 1253 | source = "registry+https://github.com/rust-lang/crates.io-index" 1254 | checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" 1255 | 1256 | [[package]] 1257 | name = "utf8-width" 1258 | version = "0.1.7" 1259 | source = "registry+https://github.com/rust-lang/crates.io-index" 1260 | checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3" 1261 | 1262 | [[package]] 1263 | name = "utf8parse" 1264 | version = "0.2.2" 1265 | source = "registry+https://github.com/rust-lang/crates.io-index" 1266 | checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" 1267 | 1268 | [[package]] 1269 | name = "uuid" 1270 | version = "1.11.0" 1271 | source = "registry+https://github.com/rust-lang/crates.io-index" 1272 | checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" 1273 | dependencies = [ 1274 | "getrandom 0.2.15", 1275 | ] 1276 | 1277 | [[package]] 1278 | name = "version_check" 1279 | version = "0.9.5" 1280 | source = "registry+https://github.com/rust-lang/crates.io-index" 1281 | checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" 1282 | 1283 | [[package]] 1284 | name = "wait-timeout" 1285 | version = "0.2.0" 1286 | source = "registry+https://github.com/rust-lang/crates.io-index" 1287 | checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6" 1288 | dependencies = [ 1289 | "libc", 1290 | ] 1291 | 1292 | [[package]] 1293 | name = "walkdir" 1294 | version = "2.5.0" 1295 | source = "registry+https://github.com/rust-lang/crates.io-index" 1296 | checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" 1297 | dependencies = [ 1298 | "same-file", 1299 | "winapi-util", 1300 | ] 1301 | 1302 | [[package]] 1303 | name = "wasi" 1304 | version = "0.11.0+wasi-snapshot-preview1" 1305 | source = "registry+https://github.com/rust-lang/crates.io-index" 1306 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 1307 | 1308 | [[package]] 1309 | name = "wasi" 1310 | version = "0.13.3+wasi-0.2.2" 1311 | source = "registry+https://github.com/rust-lang/crates.io-index" 1312 | checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2" 1313 | dependencies = [ 1314 | "wit-bindgen-rt", 1315 | ] 1316 | 1317 | [[package]] 1318 | name = "winapi-util" 1319 | version = "0.1.9" 1320 | source = "registry+https://github.com/rust-lang/crates.io-index" 1321 | checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" 1322 | dependencies = [ 1323 | "windows-sys 0.52.0", 1324 | ] 1325 | 1326 | [[package]] 1327 | name = "windows-sys" 1328 | version = "0.52.0" 1329 | source = "registry+https://github.com/rust-lang/crates.io-index" 1330 | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" 1331 | dependencies = [ 1332 | "windows-targets", 1333 | ] 1334 | 1335 | [[package]] 1336 | name = "windows-sys" 1337 | version = "0.59.0" 1338 | source = "registry+https://github.com/rust-lang/crates.io-index" 1339 | checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" 1340 | dependencies = [ 1341 | "windows-targets", 1342 | ] 1343 | 1344 | [[package]] 1345 | name = "windows-targets" 1346 | version = "0.52.6" 1347 | source = "registry+https://github.com/rust-lang/crates.io-index" 1348 | checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" 1349 | dependencies = [ 1350 | "windows_aarch64_gnullvm", 1351 | "windows_aarch64_msvc", 1352 | "windows_i686_gnu", 1353 | "windows_i686_gnullvm", 1354 | "windows_i686_msvc", 1355 | "windows_x86_64_gnu", 1356 | "windows_x86_64_gnullvm", 1357 | "windows_x86_64_msvc", 1358 | ] 1359 | 1360 | [[package]] 1361 | name = "windows_aarch64_gnullvm" 1362 | version = "0.52.6" 1363 | source = "registry+https://github.com/rust-lang/crates.io-index" 1364 | checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" 1365 | 1366 | [[package]] 1367 | name = "windows_aarch64_msvc" 1368 | version = "0.52.6" 1369 | source = "registry+https://github.com/rust-lang/crates.io-index" 1370 | checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" 1371 | 1372 | [[package]] 1373 | name = "windows_i686_gnu" 1374 | version = "0.52.6" 1375 | source = "registry+https://github.com/rust-lang/crates.io-index" 1376 | checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" 1377 | 1378 | [[package]] 1379 | name = "windows_i686_gnullvm" 1380 | version = "0.52.6" 1381 | source = "registry+https://github.com/rust-lang/crates.io-index" 1382 | checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" 1383 | 1384 | [[package]] 1385 | name = "windows_i686_msvc" 1386 | version = "0.52.6" 1387 | source = "registry+https://github.com/rust-lang/crates.io-index" 1388 | checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" 1389 | 1390 | [[package]] 1391 | name = "windows_x86_64_gnu" 1392 | version = "0.52.6" 1393 | source = "registry+https://github.com/rust-lang/crates.io-index" 1394 | checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" 1395 | 1396 | [[package]] 1397 | name = "windows_x86_64_gnullvm" 1398 | version = "0.52.6" 1399 | source = "registry+https://github.com/rust-lang/crates.io-index" 1400 | checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" 1401 | 1402 | [[package]] 1403 | name = "windows_x86_64_msvc" 1404 | version = "0.52.6" 1405 | source = "registry+https://github.com/rust-lang/crates.io-index" 1406 | checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" 1407 | 1408 | [[package]] 1409 | name = "winnow" 1410 | version = "0.6.20" 1411 | source = "registry+https://github.com/rust-lang/crates.io-index" 1412 | checksum = "36c1fec1a2bb5866f07c25f68c26e565c4c200aebb96d7e55710c19d3e8ac49b" 1413 | dependencies = [ 1414 | "memchr", 1415 | ] 1416 | 1417 | [[package]] 1418 | name = "wit-bindgen-rt" 1419 | version = "0.33.0" 1420 | source = "registry+https://github.com/rust-lang/crates.io-index" 1421 | checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" 1422 | dependencies = [ 1423 | "bitflags", 1424 | ] 1425 | 1426 | [[package]] 1427 | name = "wyz" 1428 | version = "0.5.1" 1429 | source = "registry+https://github.com/rust-lang/crates.io-index" 1430 | checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" 1431 | dependencies = [ 1432 | "tap", 1433 | ] 1434 | 1435 | [[package]] 1436 | name = "yadf" 1437 | version = "1.3.0" 1438 | dependencies = [ 1439 | "ahash 0.8.11", 1440 | "anyhow", 1441 | "assert_cmd", 1442 | "blake3", 1443 | "byte-unit", 1444 | "clap", 1445 | "clap-verbosity-flag", 1446 | "crossbeam-channel", 1447 | "csv", 1448 | "dirs", 1449 | "dunce", 1450 | "env_logger", 1451 | "globset", 1452 | "highway", 1453 | "human-panic", 1454 | "ignore", 1455 | "log", 1456 | "metrohash", 1457 | "num_cpus", 1458 | "once_cell", 1459 | "predicates", 1460 | "rand 0.9.0", 1461 | "rayon", 1462 | "regex", 1463 | "seahash", 1464 | "serde", 1465 | "serde_json", 1466 | "twox-hash", 1467 | "typed-builder", 1468 | ] 1469 | 1470 | [[package]] 1471 | name = "zerocopy" 1472 | version = "0.7.35" 1473 | source = "registry+https://github.com/rust-lang/crates.io-index" 1474 | checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" 1475 | dependencies = [ 1476 | "byteorder", 1477 | "zerocopy-derive 0.7.35", 1478 | ] 1479 | 1480 | [[package]] 1481 | name = "zerocopy" 1482 | version = "0.8.14" 1483 | source = "registry+https://github.com/rust-lang/crates.io-index" 1484 | checksum = "a367f292d93d4eab890745e75a778da40909cab4d6ff8173693812f79c4a2468" 1485 | dependencies = [ 1486 | "zerocopy-derive 0.8.14", 1487 | ] 1488 | 1489 | [[package]] 1490 | name = "zerocopy-derive" 1491 | version = "0.7.35" 1492 | source = "registry+https://github.com/rust-lang/crates.io-index" 1493 | checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" 1494 | dependencies = [ 1495 | "proc-macro2", 1496 | "quote", 1497 | "syn 2.0.96", 1498 | ] 1499 | 1500 | [[package]] 1501 | name = "zerocopy-derive" 1502 | version = "0.8.14" 1503 | source = "registry+https://github.com/rust-lang/crates.io-index" 1504 | checksum = "d3931cb58c62c13adec22e38686b559c86a30565e16ad6e8510a337cedc611e1" 1505 | dependencies = [ 1506 | "proc-macro2", 1507 | "quote", 1508 | "syn 2.0.96", 1509 | ] 1510 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "yadf" 3 | version = "1.3.0" 4 | authors = ["jRimbault "] 5 | edition = "2021" 6 | description = "yet another dupes finder" 7 | license = "MIT" 8 | homepage = "https://github.com/jRimbault/yadf" 9 | repository = "https://github.com/jRimbault/yadf" 10 | readme = "README.md" 11 | rust-version = "1.81.0" 12 | categories = ["command-line-utilities", "filesystem"] 13 | keywords = ["dupe", "duplicate", "finder", "fdupes", "fast"] 14 | 15 | [profile.release] 16 | lto = "fat" 17 | codegen-units = 1 18 | 19 | [[bin]] 20 | name = "yadf" 21 | required-features = ["build-bin"] 22 | 23 | [features] 24 | default = ["build-bin"] 25 | build-bin = [ 26 | "dep:ahash", 27 | "dep:anyhow", 28 | "dep:blake3", 29 | "dep:byte-unit", 30 | "dep:clap", 31 | "dep:clap-verbosity-flag", 32 | "dep:csv", 33 | "dep:env_logger", 34 | "dep:highway", 35 | "dep:human-panic", 36 | "dep:metrohash", 37 | "dep:seahash", 38 | "dep:serde_json", 39 | "dep:twox-hash", 40 | ] 41 | 42 | [dependencies] 43 | # library dependencies 44 | crossbeam-channel = "0.5.14" 45 | dunce = "1.0.5" 46 | globset = "0.4.15" 47 | ignore = "0.4.23" 48 | log = "0.4.25" 49 | num_cpus = "1.16.0" 50 | rayon = "1.10.0" 51 | regex = "1.11.1" 52 | serde = "1.0.217" 53 | typed-builder = "0.20.0" 54 | # binary dependencies 55 | ahash = { version = "0.8.11", optional = true } 56 | anyhow = { version = "1.0.95", optional = true } 57 | byte-unit = { version = "5.1.6", features = ["byte"], optional = true } 58 | clap = { version = "4.5.27", features = [ 59 | "cargo", 60 | "derive", 61 | "string", 62 | ], optional = true } 63 | clap-verbosity-flag = { version = "3.0.2", optional = true } 64 | csv = { version = "1.3.1", optional = true } 65 | env_logger = { version = "0.11.6", optional = true } 66 | highway = { version = "1.3.0", optional = true } 67 | human-panic = { version = "2.0.2", optional = true } 68 | metrohash = { version = "1.0.7", optional = true } 69 | seahash = { version = "4.1.0", optional = true } 70 | serde_json = { version = "1.0.138", optional = true } 71 | twox-hash = { version = "2.1.0", optional = true } 72 | blake3 = { version = "1.5.5", optional = true } 73 | 74 | [dev-dependencies] 75 | assert_cmd = "2" 76 | dirs = "6.0.0" 77 | env_logger = "0.11.6" 78 | highway = "1.3.0" 79 | once_cell = "1.20.2" 80 | serde_json = "1.0.138" 81 | seahash = "4.1.0" 82 | twox-hash = "2.1.0" 83 | predicates = "3.1.3" 84 | rand = "0.9" 85 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Jacques Rimbault 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # YADF — Yet Another Dupes Finder 2 | 3 | > _It's [fast](#benchmarks) on my machine._ 4 | 5 | ___ 6 | 7 | You should probably use [`fclones`][0]. 8 | 9 | ___ 10 | 11 | ## Installation 12 | 13 | ### Prebuilt Packages 14 | 15 | Executable binaries for some platforms are available in the [releases](https://github.com/jRimbault/yadf/releases) section. 16 | 17 | ### Building from source 18 | 19 | 1. [Install Rust Toolchain](https://www.rust-lang.org/tools/install) 20 | 2. Run `cargo install --locked yadf` 21 | 22 | ## Usage 23 | 24 | `yadf` defaults: 25 | 26 | - search current working directory `$PWD` 27 | - output format is the same as the "standard" `fdupes`, newline separated groups 28 | - descends automatically into subdirectories 29 | - search includes every files (including empty files) 30 | 31 | ```bash 32 | yadf # find duplicate files in current directory 33 | yadf ~/Documents ~/Pictures # find duplicate files in two directories 34 | yadf --depth 0 file1 file2 # compare two files 35 | yadf --depth 1 # find duplicates in current directory without descending 36 | fd --type d a | yadf --depth 1 # find directories with an "a" and search them for duplicates without descending 37 | fd --type f a | yadf # find files with an "a" and check them for duplicates 38 | ``` 39 | 40 | ### Filtering 41 | 42 | ```bash 43 | yadf --min 100M # find duplicate files of at least 100 MB 44 | yadf --max 100M # find duplicate files below 100 MB 45 | yadf --pattern '*.jpg' # find duplicate jpg 46 | yadf --regex '^g' # find duplicate starting with 'g' 47 | yadf --rfactor over:10 # find files with more than 10 copies 48 | yadf --rfactor under:10 # find files with less than 10 copies 49 | yadf --rfactor equal:1 # find unique files 50 | ``` 51 | 52 | ### Formatting 53 | 54 | Look up the help for a list of output formats `yadf -h`. 55 | 56 | ```bash 57 | yadf -f json 58 | yadf -f fdupes 59 | yadf -f csv 60 | yadf -f ldjson 61 | ``` 62 | 63 |
64 | Help output. 65 | 66 | ``` 67 | Yet Another Dupes Finder 68 | 69 | Usage: yadf [OPTIONS] [PATHS]... 70 | 71 | Arguments: 72 | [PATHS]... Directories to search 73 | 74 | Options: 75 | -f, --format Output format [default: fdupes] [possible values: csv, fdupes, json, json-pretty, ld-json, machine] 76 | -a, --algorithm Hashing algorithm [default: ahash] [possible values: ahash, highway, metrohash, seahash, xxhash] 77 | -n, --no-empty Excludes empty files 78 | --min Minimum file size 79 | --max Maximum file size 80 | -d, --depth Maximum recursion depth 81 | -H, --hard-links Treat hard links to same file as duplicates 82 | -R, --regex Check files with a name matching a Perl-style regex, see: https://docs.rs/regex/1.4.2/regex/index.html#syntax 83 | -p, --pattern Check files with a name matching a glob pattern, see: https://docs.rs/globset/0.4.6/globset/index.html#syntax 84 | -v, --verbose... Increase logging verbosity 85 | -q, --quiet... Decrease logging verbosity 86 | --rfactor Replication factor [under|equal|over]:n 87 | -o, --output Optional output file 88 | -h, --help Print help (see more with '--help') 89 | -V, --version Print version 90 | 91 | For sizes, K/M/G/T[B|iB] suffixes can be used (case-insensitive). 92 | ``` 93 | 94 |
95 | 96 | ## Notes on the algorithm 97 | 98 | Most¹ dupe finders follow a 3 steps algorithm: 99 | 100 | 1. group files by their size 101 | 2. group files by their first few bytes 102 | 3. group files by their entire content 103 | 104 | `yadf` skips the first step, and only does the steps 2 and 3, preferring hashing rather than byte comparison. In my [tests][3-steps] having the first step on a SSD actually slowed down the program. 105 | `yadf` makes heavy use of the standard library [`BTreeMap`][btreemap], it uses a cache aware implementation avoiding too many cache misses. `yadf` uses the parallel walker provided by `ignore` (disabling its _ignore_ features) and `rayon`'s parallel iterators to do each of these 2 steps in parallel. 106 | 107 | ¹: some need a different algorithm to support different features or different performance trade-offs 108 | 109 | [btreemap]: https://doc.rust-lang.org/std/collections/struct.BTreeMap.html 110 | [3-steps]: https://github.com/jRimbault/yadf/tree/3-steps 111 | [hashmap]: https://doc.rust-lang.org/std/collections/struct.HashMap.html 112 | 113 | ### Design goals 114 | 115 | I sought out to build a high performing artefact by assembling together libraries doing the actual work, nothing here is custom made, it's all "off-the-shelf" software. 116 | 117 | ## Benchmarks 118 | 119 | The performance of `yadf` is heavily tied to the hardware, specifically the 120 | NVMe SSD. I recommend `fclones` as it has more hardware heuristics. and in general more features. `yadf` on HDDs is _terrible_. 121 | 122 | My home directory contains upwards of 700k paths and 39 GB of data, and is probably a pathological case of file duplication with all the node_modules, python virtual environments, rust target, etc. Arguably, the most important measure here is the mean time when the filesystem cache is cold. 123 | 124 | | Program (warm filesystem cache) | Version | Mean [s] | Min [s] | Max [s] | 125 | | :------------------------------ | ------: | ----------------: | --------: | ------: | 126 | | [`fclones`][0] | 0.29.3 | 7.435 ± 1.609 | 4.622 | 9.317 | 127 | | [`jdupes`][1] | 1.14.0 | 16.787 ± 0.208 | 16.484 | 17.178 | 128 | | [`ddh`][2] | 0.13 | 12.703 ± 1.547 | 10.814 | 14.793 | 129 | | [`dupe-krill`][4] | 1.4.7 | 15.555 ± 1.633 | 12.486 | 16.959 | 130 | | [`fddf`][5] | 1.7.0 | 18.441 ± 1.947 | 15.097 | 22.389 | 131 | | `yadf` | 1.1.0 | **3.157 ± 0.638** | 2.362 | 4.175 | 132 | 133 | | Program (cold filesystem cache) | Version | Mean [s] | Min [s] | Max [s] | 134 | | :------------------------------ | ------: | ----------------: | --------: | ------: | 135 | | [`fclones`][0] | 0.29.3 | 68.950 ± 3.694 | 63.165 | 73.534 | 136 | | [`jdupes`][1] | 1.14.0 | 303.907 ± 11.578 | 277.618 | 314.226 | 137 | | `yadf` | 1.1.0 | 52.481 ± 1.125 | 50.412 | 54.265 | 138 | 139 | _I test less programs here because it takes several hours to run._ 140 | 141 | The script used to benchmark can be read [here](./bench.sh). 142 | 143 | [0]: https://github.com/pkolaczk/fclones 144 | [1]: https://github.com/jbruchon/jdupes 145 | [2]: https://github.com/darakian/ddh 146 | [3]: https://github.com/sahib/rmlint 147 | [4]: https://github.com/kornelski/dupe-krill 148 | [5]: https://github.com/birkenfeld/fddf 149 | 150 |
151 | Hardware used. 152 | 153 | Extract from `neofetch` and `hwinfo --disk`: 154 | 155 | - OS: Ubuntu 20.04.1 LTS x86_64 156 | - Host: XPS 15 9570 157 | - Kernel: 5.4.0-42-generic 158 | - CPU: Intel i9-8950HK (12) @ 4.800GHz 159 | - Memory: 4217MiB / 31755MiB 160 | - Disk: 161 | - model: "SK hynix Disk" 162 | - driver: "nvme" 163 | 164 |
165 | 166 | -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | // Make the current git hash available to the build. 3 | let maybe_rev = git_revision_hash(); 4 | if let Some(rev) = maybe_rev.as_deref() { 5 | println!("cargo:rustc-env=YADF_BUILD_GIT_HASH={}", rev); 6 | } 7 | let long_version = long_version(maybe_rev); 8 | println!("cargo:rustc-env=YADF_BUILD_VERSION={}", long_version); 9 | } 10 | 11 | fn git_revision_hash() -> Option { 12 | let result = std::process::Command::new("git") 13 | .args(["rev-parse", "--short=10", "HEAD"]) 14 | .output(); 15 | result.ok().and_then(|output| { 16 | let v = String::from_utf8_lossy(&output.stdout).trim().to_string(); 17 | if v.is_empty() { 18 | None 19 | } else { 20 | Some(v) 21 | } 22 | }) 23 | } 24 | 25 | fn long_version(rev: Option) -> String { 26 | // Do we have a git hash? 27 | // (Yes, if ripgrep was built on a machine with `git` installed.) 28 | let hash = match rev { 29 | None => String::new(), 30 | Some(githash) => format!(" (rev {})", githash), 31 | }; 32 | let runtime = runtime_cpu_features(); 33 | if runtime.is_empty() { 34 | format!( 35 | "{}{}|{} (compiled)", 36 | env!("CARGO_PKG_VERSION"), 37 | hash, 38 | compile_cpu_features().join(" ") 39 | ) 40 | } else { 41 | format!( 42 | "{}{}|{} (compiled)|{} (runtime)", 43 | env!("CARGO_PKG_VERSION"), 44 | hash, 45 | compile_cpu_features().join(" "), 46 | runtime.join(" ") 47 | ) 48 | } 49 | } 50 | 51 | /// Returns the relevant CPU features enabled at compile time. 52 | fn compile_cpu_features() -> Vec<&'static str> { 53 | let mut features = vec![]; 54 | if cfg!(target_feature = "avx2") { 55 | features.push("+AVX"); 56 | } else { 57 | features.push("-AVX"); 58 | } 59 | features 60 | } 61 | 62 | /// Returns the relevant CPU features enabled at runtime. 63 | #[cfg(target_arch = "x86_64")] 64 | fn runtime_cpu_features() -> Vec<&'static str> { 65 | let mut features = vec![]; 66 | if is_x86_feature_detected!("avx2") { 67 | features.push("+AVX"); 68 | } else { 69 | features.push("-AVX"); 70 | } 71 | features 72 | } 73 | 74 | /// Returns the relevant CPU features enabled at runtime. 75 | #[cfg(not(target_arch = "x86_64"))] 76 | fn runtime_cpu_features() -> Vec<&'static str> { 77 | vec![] 78 | } 79 | -------------------------------------------------------------------------------- /examples/keep_oldest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """Pipe the output of `yadf --format ldjson` into this script. 4 | 5 | Either : 6 | 7 | yadf -f ldjson > results.ldjson 8 | python3 keep_oldest.py results.ldjson 9 | 10 | Or skipping the intermediate file : 11 | 12 | yadf -f ldjson | python3 keep_oldest.py 13 | 14 | This script is provided as an example meant to be modified and tinkered with. 15 | """ 16 | 17 | import fileinput 18 | import itertools 19 | import json 20 | import multiprocessing 21 | import os 22 | import pathlib 23 | from typing import Callable, Iterable, Sized, TypeVar 24 | 25 | Cmp = TypeVar("Cmp", bound=Sized) 26 | Key = Callable[[str], Cmp] 27 | Filter = Callable[[Iterable[str]], Iterable[str]] 28 | 29 | 30 | def main(): 31 | cleaner = Cleaner(most_recent_modification_date, yield_all_except_first) 32 | sequential(fileinput.input(), cleaner) 33 | 34 | 35 | def sequential(ldjson: Iterable[str], cleaner: "Cleaner"): 36 | for line in ldjson: 37 | cleaner(line) 38 | 39 | 40 | def parallel(ldjson: Iterable[str], cleaner: "Cleaner"): 41 | with multiprocessing.Pool() as pool: 42 | pool.imap_unordered(cleaner, ldjson) 43 | 44 | 45 | class Cleaner: 46 | def __init__(self, key: Key = None, filter: Filter = lambda f: f): 47 | self.key = key 48 | self.filter = filter 49 | 50 | def __call__(self, line: str): 51 | files: list[str] = json.loads(line) 52 | files.sort(key=self.key) 53 | # uncomment to actually delete files 54 | for filename in self.filter(files): 55 | # os.remove(filename) 56 | pass 57 | 58 | 59 | def most_recent_modification_date(filename: str) -> float: 60 | return pathlib.Path(filename).stat().st_mtime 61 | 62 | 63 | def yield_all_except_first(files: Iterable[str]) -> Iterable[str]: 64 | return itertools.islice(files, 1, None) 65 | 66 | 67 | if __name__ == "__main__": 68 | main() 69 | -------------------------------------------------------------------------------- /scripts/bench.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # ddh produces a Results.txt file after each run 4 | # 5 | # rmlint produces a number of files all named rmlint.{ext} 6 | # 7 | # fclones and jdupes both don't scan recursively by default 8 | # 9 | # dupe-krill skips file smaller than the block size, hence the -s flag, 10 | # and will hardlinks files together, hence the --dry-run flag 11 | # 12 | # fddf ignores zero length files 13 | 14 | case "$1" in 15 | "cold") 16 | prepare_cmd='rm Results.txt rmlint.* || true && echo "free && sync && echo 3 > /proc/sys/vm/drop_caches && free" | sudo sh' 17 | warmups=0 18 | ;; 19 | *) 20 | prepare_cmd="rm Results.txt rmlint.* || true" 21 | warmups=5 22 | ;; 23 | esac 24 | 25 | hyperfine --warmup "$warmups" \ 26 | --min-runs 10 \ 27 | --export-markdown export.md \ 28 | --prepare "$prepare_cmd" \ 29 | "fclones group --min 0 ~" \ 30 | "jdupes -z -r ~" \ 31 | "ddh --directories ~" \ 32 | "dupe-krill -s -d ~" \ 33 | "fddf -m 0 ~" \ 34 | "yadf ~" 35 | 36 | { 37 | rm Results.txt rmlint.* || true 38 | } 2> /dev/null 39 | -------------------------------------------------------------------------------- /scripts/yadf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import functools 5 | import hashlib 6 | import locale 7 | import math 8 | import multiprocessing 9 | import os 10 | import re 11 | import sys 12 | from collections import defaultdict 13 | from json import dump as jsondump 14 | 15 | 16 | locale.setlocale(locale.LC_ALL, "") 17 | 18 | 19 | def main(args): 20 | full_counter = find_dupes( 21 | args.directories, HASHERS[args.algorithm], args.min, args.max 22 | ) 23 | partitioned = partition(full_counter, lambda b: len(b) > 1) 24 | duplicates, uniques = partitioned[True], partitioned[False] 25 | DISPLAY[args.format](duplicates) 26 | if args.report: 27 | duplicates_files = sum(map(len, duplicates)) 28 | files_scanned = len(uniques) + duplicates_files 29 | print(f"{files_scanned:n} scanned files", file=sys.stderr) 30 | print(f"{len(uniques):n} unique files", file=sys.stderr) 31 | print( 32 | f"{len(duplicates):n} groups of duplicate files ({duplicates_files:n} files)", 33 | file=sys.stderr, 34 | ) 35 | 36 | 37 | def find_dupes(directories, algorithm, min=0, max=math.inf): 38 | def build_bag(key_value_iterable): 39 | bag = defaultdict(list) 40 | for key, value in key_value_iterable: 41 | bag[key].append(value) 42 | return bag 43 | 44 | walker = ( 45 | file 46 | for file in ( 47 | os.path.join(path, file) 48 | for directory in set(directories) 49 | for (path, _, files) in os.walk(directory) 50 | for file in files 51 | ) 52 | if min <= os.stat(file).st_size <= max 53 | ) 54 | 55 | hasher = functools.partial(hash_file, algorithm=algorithm) 56 | with multiprocessing.Pool() as pool: 57 | tuples = pool.imap_unordered(hasher, walker, chunksize=32) 58 | return build_bag(tuples).values() 59 | 60 | 61 | def hash_file(path, algorithm): 62 | hasher = algorithm() 63 | with open(path, "rb") as fd: 64 | while True: 65 | buf = fd.read(4096) 66 | if len(buf) == 0: 67 | break 68 | hasher.update(buf) 69 | return hasher.digest(), path 70 | 71 | 72 | def fdupes(duplicates): 73 | last = len(duplicates) - 1 74 | for (i, bucket) in enumerate(duplicates): 75 | print(*bucket, sep="\n") 76 | if i != last: 77 | print() 78 | 79 | 80 | def json(duplicates): 81 | jsondump(duplicates, fp=sys.stdout) 82 | 83 | 84 | def ldjson(duplicates): 85 | for bucket in duplicates: 86 | jsondump(bucket, fp=sys.stdout) 87 | 88 | 89 | DISPLAY = { 90 | fdupes.__name__: fdupes, 91 | json.__name__: json, 92 | ldjson.__name__: ldjson, 93 | } 94 | 95 | HASHERS = { 96 | hashlib.blake2b.__name__: hashlib.blake2b, 97 | hashlib.sha384.__name__: hashlib.sha384, 98 | hashlib.md5.__name__: hashlib.md5, 99 | } 100 | 101 | 102 | def partition(iterable, predicate): 103 | results = defaultdict(list) 104 | for item in iterable: 105 | results[predicate(item)].append(item) 106 | return results 107 | 108 | 109 | def parse_args(argv): 110 | units = {"B": 1, "KB": 2 ** 10, "MB": 2 ** 20, "GB": 2 ** 30, "TB": 2 ** 40} 111 | 112 | def byte_size(size): 113 | size = size.upper() 114 | if " " not in size: 115 | size = re.sub(r"([KMGT]?B?)", r" \1", size) 116 | size = size.split() 117 | if len(size) < 2: 118 | size.append("B") 119 | elif len(size[1]) < 2: 120 | size[1] += "B" 121 | number, unit = [string.strip() for string in size] 122 | return int(float(number) * units[unit]) 123 | 124 | parser = argparse.ArgumentParser() 125 | parser.add_argument( 126 | "directories", 127 | help="directories to search", 128 | default=[os.getcwd()], 129 | nargs="*", 130 | ) 131 | parser.add_argument( 132 | "-r", 133 | "--report", 134 | action="store_true", 135 | help="print human readable report to stderr", 136 | ) 137 | parser.add_argument( 138 | "-f", 139 | "--format", 140 | choices=DISPLAY.keys(), 141 | default=next(iter(DISPLAY)), 142 | help="output format", 143 | ) 144 | parser.add_argument( 145 | "-a", 146 | "--algorithm", 147 | choices=HASHERS.keys(), 148 | default=next(iter(HASHERS)), 149 | help="hashing algorithm", 150 | ) 151 | parser.add_argument("--min", type=byte_size, default=0) 152 | parser.add_argument("--max", type=byte_size, default=math.inf) 153 | return parser.parse_args(argv) 154 | 155 | 156 | if __name__ == "__main__": 157 | try: 158 | main(parse_args(sys.argv[1:])) 159 | except KeyboardInterrupt: 160 | print() 161 | -------------------------------------------------------------------------------- /src/args.rs: -------------------------------------------------------------------------------- 1 | use super::{Args, ReplicationFactor}; 2 | use clap::{CommandFactory, FromArgMatches}; 3 | use std::env; 4 | use std::fmt; 5 | use std::io::BufRead; 6 | use std::path::PathBuf; 7 | 8 | impl Args { 9 | pub fn max(&self) -> Option { 10 | self.max 11 | .as_ref() 12 | .map(|m| m.0.get_adjusted_unit(byte_unit::Unit::B)) 13 | .map(|u| u.get_value() as _) 14 | } 15 | 16 | pub fn min(&self) -> Option { 17 | self.min 18 | .as_ref() 19 | .map(|m| m.0.get_adjusted_unit(byte_unit::Unit::B)) 20 | .map(|u| u.get_value() as _) 21 | .or(if self.no_empty { Some(1) } else { None }) 22 | } 23 | 24 | pub fn init_from_env() -> Self { 25 | let long_version = env!("YADF_BUILD_VERSION").replace('|', "\n"); 26 | let short_version = long_version.lines().next().unwrap().to_string(); 27 | let app = Self::command() 28 | .version(short_version) 29 | .long_version(long_version) 30 | .after_help("For sizes, K/M/G/T[B|iB] suffixes can be used (case-insensitive)."); 31 | let mut args = Self::from_arg_matches(&app.get_matches()).unwrap(); 32 | init_logger(&args.verbosity); 33 | args.build_paths(); 34 | args 35 | } 36 | 37 | fn build_paths(&mut self) { 38 | if self.paths.is_empty() { 39 | self.paths = default_paths() 40 | } 41 | } 42 | } 43 | 44 | fn init_logger(verbosity: &clap_verbosity_flag::Verbosity) { 45 | env_logger::Builder::new() 46 | .filter_level( 47 | verbosity 48 | .log_level() 49 | .unwrap_or(log::Level::Error) 50 | .to_level_filter(), 51 | ) 52 | .init(); 53 | } 54 | 55 | fn default_paths() -> Vec { 56 | let stdin = std::io::stdin(); 57 | let mut paths = if std::io::IsTerminal::is_terminal(&stdin) { 58 | Vec::new() 59 | } else { 60 | stdin 61 | .lock() 62 | .lines() 63 | .map_while(Result::ok) 64 | .map(Into::into) 65 | .collect() 66 | }; 67 | if paths.is_empty() { 68 | paths.push(env::current_dir().expect("couldn't get current working directory")); 69 | } 70 | paths 71 | } 72 | 73 | impl Default for ReplicationFactor { 74 | fn default() -> Self { 75 | ReplicationFactor::Over(1) 76 | } 77 | } 78 | 79 | impl std::str::FromStr for ReplicationFactor { 80 | type Err = String; 81 | 82 | fn from_str(value: &str) -> Result { 83 | use ReplicationFactor::*; 84 | const SEPS: &[char] = &[':', '=']; 85 | let mut arg = value.split(SEPS); 86 | 87 | let rf = match ( 88 | arg.next().map(str::to_lowercase).as_deref(), 89 | arg.next().and_then(|v| v.parse().ok()), 90 | ) { 91 | (Some("under"), Some(factor)) => Under(factor), 92 | (Some("equal"), Some(factor)) => Equal(factor), 93 | (Some("over"), Some(factor)) => Over(factor), 94 | _ => { 95 | return Err(format!( 96 | "replication factor must be of the form \ 97 | `over:1` or `under:5` or `equal:2`, \ 98 | got {:?}", 99 | value 100 | )) 101 | } 102 | }; 103 | Ok(rf) 104 | } 105 | } 106 | 107 | impl fmt::Display for ReplicationFactor { 108 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 109 | fmt::Debug::fmt(self, f) 110 | } 111 | } 112 | 113 | impl From for yadf::Factor { 114 | fn from(f: ReplicationFactor) -> Self { 115 | match f { 116 | ReplicationFactor::Under(n) => yadf::Factor::Under(n), 117 | ReplicationFactor::Equal(n) => yadf::Factor::Equal(n), 118 | ReplicationFactor::Over(n) => yadf::Factor::Over(n), 119 | } 120 | } 121 | } 122 | 123 | #[cfg(test)] 124 | mod tests { 125 | use super::*; 126 | 127 | #[test] 128 | fn replication_factor_parsing() { 129 | let cases = [ 130 | ("under=6", ReplicationFactor::Under(6)), 131 | ("over:7", ReplicationFactor::Over(7)), 132 | ("over:1", ReplicationFactor::Over(1)), 133 | ("equal=3", ReplicationFactor::Equal(3)), 134 | ]; 135 | 136 | for (value, expected) in cases.iter() { 137 | let rf: ReplicationFactor = value.parse().unwrap(); 138 | assert_eq!(&rf, expected); 139 | } 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /src/bag.rs: -------------------------------------------------------------------------------- 1 | mod display; 2 | mod replicates; 3 | mod serialize; 4 | 5 | use std::borrow::Borrow; 6 | use std::collections::btree_map::Entry; 7 | use std::collections::BTreeMap; 8 | use std::ops::Index; 9 | 10 | /// Ordered counter structure. 11 | /// 12 | /// # Example : 13 | /// 14 | /// ``` 15 | /// use yadf::TreeBag; 16 | /// 17 | /// let bag: TreeBag = vec![ 18 | /// (3, "hello world"), 19 | /// (3, "foobar"), 20 | /// (7, "fizz"), 21 | /// (7, "buzz"), 22 | /// (6, "rust"), 23 | /// ].into_iter().collect(); 24 | /// 25 | /// assert_eq!(bag[&3], ["hello world", "foobar"]); 26 | /// assert_eq!(bag[&7], ["fizz", "buzz"]); 27 | /// assert_eq!(bag[&6], ["rust"]); 28 | /// ``` 29 | #[derive(Debug)] 30 | pub struct TreeBag(BTreeMap>); 31 | 32 | #[derive(Debug, Clone)] 33 | pub enum Factor { 34 | Under(usize), 35 | Equal(usize), 36 | Over(usize), 37 | } 38 | 39 | /// A view which only provides access to n replicated entries. 40 | #[derive(Debug)] 41 | pub struct Replicates<'a, K, V> { 42 | tree: &'a TreeBag, 43 | factor: Factor, 44 | } 45 | 46 | /// Display marker. 47 | #[derive(Debug)] 48 | pub struct Fdupes; 49 | /// Display marker. 50 | #[derive(Debug)] 51 | pub struct Machine; 52 | 53 | #[derive(Debug)] 54 | pub struct Display<'a, K, V, U> { 55 | format_marker: std::marker::PhantomData<&'a U>, 56 | tree: &'a Replicates<'a, K, V>, 57 | } 58 | 59 | impl From>> for TreeBag { 60 | /// Build a [`TreeBag`](TreeBag) from a [`BTreeMap`](BTreeMap). 61 | fn from(btree: BTreeMap>) -> Self { 62 | Self(btree) 63 | } 64 | } 65 | 66 | impl TreeBag { 67 | /// Provides a view only on the buckets containing more than one element. 68 | pub const fn duplicates(&self) -> Replicates<'_, K, V> { 69 | Replicates { 70 | tree: self, 71 | factor: Factor::Over(1), 72 | } 73 | } 74 | 75 | /// Provides a view only on the buckets as constrained by the replication [`Factor`](Factor). 76 | pub const fn replicates(&self, factor: Factor) -> Replicates<'_, K, V> { 77 | Replicates { tree: self, factor } 78 | } 79 | 80 | /// Borrows the backing [`BTreeMap`](BTreeMap) of the bag. 81 | pub const fn as_inner(&self) -> &BTreeMap> { 82 | &self.0 83 | } 84 | 85 | /// Mutably borrows the backing [`BTreeMap`](BTreeMap) of the bag. 86 | pub fn as_inner_mut(&mut self) -> &mut BTreeMap> { 87 | &mut self.0 88 | } 89 | 90 | /// Consumes the wrapper [`TreeBag`](TreeBag) and returns the inner [`BTreeMap`](BTreeMap). 91 | pub fn into_inner(self) -> BTreeMap> { 92 | self.0 93 | } 94 | 95 | /// Returns the number of buckets in the bag. 96 | pub fn len(&self) -> usize { 97 | self.0.len() 98 | } 99 | 100 | /// Returns `true` if the bag contains no elements. 101 | pub fn is_empty(&self) -> bool { 102 | self.0.is_empty() 103 | } 104 | 105 | /// Returns a reference to the bucket corresponding to the key. 106 | pub fn get(&self, key: &Q) -> Option<&Vec> 107 | where 108 | K: Borrow + Ord, 109 | Q: ?Sized + Ord, 110 | { 111 | self.0.get(key) 112 | } 113 | 114 | /// Returns a mutable reference to the bucket corresponding to the key. 115 | pub fn get_mut(&mut self, key: &Q) -> Option<&mut Vec> 116 | where 117 | K: Borrow + Ord, 118 | Q: ?Sized + Ord, 119 | { 120 | self.0.get_mut(key) 121 | } 122 | 123 | /// Gets the given key’s corresponding entry in the bag for in-place manipulation. 124 | pub fn entry(&mut self, key: K) -> Entry<'_, K, Vec> 125 | where 126 | K: Ord, 127 | { 128 | self.0.entry(key) 129 | } 130 | } 131 | 132 | impl std::iter::FromIterator<(K, V)> for TreeBag { 133 | fn from_iter(key_value_iter: I) -> Self 134 | where 135 | I: IntoIterator, 136 | { 137 | let mut bag = TreeBag::default(); 138 | bag.extend(key_value_iter); 139 | bag 140 | } 141 | } 142 | 143 | impl Extend<(K, V)> for TreeBag { 144 | fn extend(&mut self, key_value_iter: I) 145 | where 146 | I: IntoIterator, 147 | { 148 | for (key, value) in key_value_iter { 149 | self.entry(key).or_default().push(value); 150 | } 151 | } 152 | } 153 | 154 | impl Default for TreeBag { 155 | fn default() -> Self { 156 | Self(Default::default()) 157 | } 158 | } 159 | 160 | impl Index<&Q> for TreeBag 161 | where 162 | K: Borrow + Ord, 163 | Q: Ord, 164 | { 165 | type Output = Vec; 166 | 167 | /// Returns a reference to the value corresponding to the supplied key. 168 | /// 169 | /// # Panics 170 | /// 171 | /// Panics if the key is not present in the [`TreeBag`](TreeBag). 172 | fn index(&self, key: &Q) -> &Self::Output { 173 | self.get(key).expect("no entry found for key") 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /src/bag/display.rs: -------------------------------------------------------------------------------- 1 | use super::{Display, Fdupes, Machine}; 2 | use std::fmt; 3 | use std::path::Path; 4 | 5 | impl fmt::Display for Display<'_, K, V, Fdupes> 6 | where 7 | V: AsRef, 8 | { 9 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 10 | let mut duplicates = self.tree.iter().peekable(); 11 | while let Some(bucket) = duplicates.next() { 12 | let mut bucket = bucket.iter().peekable(); 13 | let is_last_bucket = duplicates.peek().is_none(); 14 | while let Some(dupe) = bucket.next() { 15 | dupe.as_ref().display().fmt(f)?; 16 | if bucket.peek().is_some() || !is_last_bucket { 17 | f.write_str("\n")?; 18 | } 19 | } 20 | if !is_last_bucket { 21 | f.write_str("\n")?; 22 | } 23 | } 24 | Ok(()) 25 | } 26 | } 27 | 28 | impl fmt::Display for Display<'_, K, V, Machine> 29 | where 30 | V: AsRef, 31 | { 32 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 33 | let mut duplicates = self.tree.iter().peekable(); 34 | while let Some(bucket) = duplicates.next() { 35 | let (last, rest) = bucket.split_last().ok_or(fmt::Error)?; 36 | for dupe in rest { 37 | fmt::Debug::fmt(dupe.as_ref(), f)?; 38 | f.write_str(" ")?; 39 | } 40 | fmt::Debug::fmt(last.as_ref(), f)?; 41 | if duplicates.peek().is_some() { 42 | f.write_str("\n")?; 43 | } 44 | } 45 | Ok(()) 46 | } 47 | } 48 | 49 | #[cfg(test)] 50 | mod tests { 51 | use super::super::TreeBag; 52 | use super::*; 53 | use once_cell::sync::Lazy; 54 | 55 | static BAG: Lazy> = Lazy::new(|| { 56 | vec![ 57 | (77, "hello"), 58 | (77, "world"), 59 | (1, "ignored"), 60 | (3, "foo"), 61 | (3, "bar"), 62 | ] 63 | .into_iter() 64 | .collect() 65 | }); 66 | 67 | #[test] 68 | fn machine() { 69 | let result = BAG.duplicates().display::().to_string(); 70 | let expected = "\ 71 | \"foo\" \"bar\"\n\ 72 | \"hello\" \"world\"\ 73 | "; 74 | assert_eq!(result, expected); 75 | } 76 | 77 | #[test] 78 | fn fdupes() { 79 | let result = BAG.duplicates().display::().to_string(); 80 | let expected = "\ 81 | foo\n\ 82 | bar\n\ 83 | \n\ 84 | hello\n\ 85 | world\ 86 | "; 87 | assert_eq!(result, expected); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/bag/replicates.rs: -------------------------------------------------------------------------------- 1 | use super::{Display, Factor, Replicates}; 2 | use std::collections::btree_map::Values; 3 | 4 | /// [`Iterator`](Iterator) adapater. 5 | #[derive(Debug)] 6 | pub struct Iter<'a, K, V> { 7 | values: Values<'a, K, Vec>, 8 | factor: Factor, 9 | } 10 | 11 | impl Replicates<'_, K, V> { 12 | /// Iterator over the buckets. 13 | pub fn iter(&self) -> Iter<'_, K, V> { 14 | Iter { 15 | values: self.tree.0.values(), 16 | factor: self.factor.clone(), 17 | } 18 | } 19 | 20 | /// Returns an object that implements [`Display`](std::fmt::Display). 21 | /// 22 | /// Depending on the contents of the [`TreeBag`](super::TreeBag), the display object 23 | /// can be parameterized to get a different [`Display`](std::fmt::Display) implemenation. 24 | pub fn display(&self) -> Display<'_, K, V, U> { 25 | Display { 26 | format_marker: std::marker::PhantomData, 27 | tree: self, 28 | } 29 | } 30 | } 31 | 32 | impl<'a, K, V> IntoIterator for &'a Replicates<'a, K, V> { 33 | type Item = &'a Vec; 34 | type IntoIter = Iter<'a, K, V>; 35 | 36 | fn into_iter(self) -> Self::IntoIter { 37 | self.iter() 38 | } 39 | } 40 | 41 | #[allow(clippy::manual_find)] 42 | impl<'a, K, V> Iterator for Iter<'a, K, V> { 43 | type Item = &'a Vec; 44 | 45 | fn next(&mut self) -> Option { 46 | for bucket in &mut self.values { 47 | if self.factor.pass(bucket.len()) { 48 | return Some(bucket); 49 | } 50 | } 51 | None 52 | } 53 | } 54 | 55 | impl Factor { 56 | fn pass(&self, x: usize) -> bool { 57 | match *self { 58 | Factor::Under(n) => x < n, 59 | Factor::Equal(n) => x == n, 60 | Factor::Over(n) => x > n, 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/bag/serialize.rs: -------------------------------------------------------------------------------- 1 | use super::{Replicates, TreeBag}; 2 | use serde::ser::{Serialize, Serializer}; 3 | 4 | impl Serialize for Replicates<'_, K, V> 5 | where 6 | V: Serialize, 7 | { 8 | fn serialize(&self, serializer: S) -> Result 9 | where 10 | S: Serializer, 11 | { 12 | serializer.collect_seq(self.iter()) 13 | } 14 | } 15 | 16 | impl Serialize for TreeBag 17 | where 18 | K: Serialize, 19 | V: Serialize, 20 | { 21 | fn serialize(&self, serializer: S) -> Result 22 | where 23 | S: Serializer, 24 | { 25 | serializer.collect_map(self.0.iter()) 26 | } 27 | } 28 | 29 | #[cfg(test)] 30 | mod tests { 31 | use super::super::TreeBag; 32 | 33 | #[test] 34 | fn json() { 35 | let counter: TreeBag = vec![ 36 | (77, "hello"), 37 | (77, "world"), 38 | (1, "ignored"), 39 | (3, "foo"), 40 | (3, "bar"), 41 | ] 42 | .into_iter() 43 | .collect(); 44 | let result = serde_json::to_string(&counter.duplicates()).unwrap(); 45 | let expected = r#"[["foo","bar"],["hello","world"]]"#; 46 | assert_eq!(result, expected); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/ext.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashSet; 2 | use std::hash::Hash; 3 | use std::path::Path; 4 | 5 | /// Could be replaced by `unique_by` in `itertools` 6 | pub trait IteratorExt: Iterator + Sized { 7 | fn unique_by(self, f: F) -> UniqueBy 8 | where 9 | F: Fn(&Self::Item) -> K, 10 | K: Hash + Eq, 11 | { 12 | UniqueBy::new(self, f) 13 | } 14 | } 15 | 16 | impl IteratorExt for I {} 17 | 18 | pub struct UniqueBy { 19 | iter: I, 20 | set: HashSet, 21 | f: F, 22 | } 23 | 24 | impl UniqueBy 25 | where 26 | I: Iterator, 27 | F: Fn(&I::Item) -> K, 28 | K: Hash + Eq, 29 | { 30 | fn new(iter: I, f: F) -> Self { 31 | Self { 32 | iter, 33 | f, 34 | set: HashSet::new(), 35 | } 36 | } 37 | } 38 | 39 | #[allow(clippy::manual_find)] 40 | impl Iterator for UniqueBy 41 | where 42 | I: Iterator, 43 | F: Fn(&I::Item) -> K, 44 | K: Hash + Eq, 45 | { 46 | type Item = I::Item; 47 | 48 | fn next(&mut self) -> Option { 49 | for item in &mut self.iter { 50 | if self.set.insert((self.f)(&item)) { 51 | return Some(item); 52 | } 53 | } 54 | None 55 | } 56 | } 57 | 58 | pub trait WalkParallelForEach { 59 | fn for_each(self, f: F) 60 | where 61 | F: Fn(Result) -> ignore::WalkState, 62 | F: Send + Copy; 63 | } 64 | 65 | impl WalkParallelForEach for ignore::WalkParallel { 66 | fn for_each(self, f: F) 67 | where 68 | F: Fn(Result) -> ignore::WalkState, 69 | F: Send + Copy, 70 | { 71 | self.run(|| Box::new(f)) 72 | } 73 | } 74 | 75 | pub trait WalkBuilderAddPaths { 76 | fn add_paths(&mut self, paths: I) -> &mut Self 77 | where 78 | P: AsRef, 79 | I: IntoIterator; 80 | } 81 | 82 | impl WalkBuilderAddPaths for ignore::WalkBuilder { 83 | fn add_paths(&mut self, paths: I) -> &mut Self 84 | where 85 | P: AsRef, 86 | I: IntoIterator, 87 | { 88 | for path in paths { 89 | self.add(path); 90 | } 91 | self 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /src/fs.rs: -------------------------------------------------------------------------------- 1 | //! Inner parts of `yadf`. Initial file collection and checksumming. 2 | 3 | pub mod filter; 4 | mod hash; 5 | 6 | use crate::ext::{IteratorExt, WalkBuilderAddPaths, WalkParallelForEach}; 7 | use crate::TreeBag; 8 | use rayon::iter::{IntoParallelIterator, ParallelIterator}; 9 | use std::path::{Path, PathBuf}; 10 | 11 | const CHANNEL_SIZE: usize = 8 * 1024; 12 | const BLOCK_SIZE: usize = 4 * 1024; 13 | 14 | /// Foundation of the API. 15 | /// This will attemps a naive scan of every file, 16 | /// within the given size constraints, at the given path. 17 | pub fn find_dupes_partial( 18 | directories: &[P], 19 | max_depth: Option, 20 | filter: filter::FileFilter, 21 | ) -> TreeBag 22 | where 23 | H: crate::hasher::Hasher, 24 | P: AsRef, 25 | { 26 | let mut paths = directories 27 | .iter() 28 | .unique_by(|path| dunce::canonicalize(path).ok()); 29 | let first = paths.next().expect("there should be at least one path"); 30 | let walker = ignore::WalkBuilder::new(first) 31 | .add_paths(paths) 32 | .standard_filters(false) 33 | .max_depth(max_depth) 34 | .threads(num_cpus::get()) 35 | .build_parallel(); 36 | let (sender, receiver) = crossbeam_channel::bounded(CHANNEL_SIZE); 37 | rayon::join( 38 | move || receiver.into_iter().collect(), 39 | move || { 40 | walker.for_each(|entry| { 41 | if let Err(error) = entry { 42 | log::error!("{}", error); 43 | return ignore::WalkState::Continue; 44 | } 45 | if let Some(key_value) = hash_entry::(&filter, entry.unwrap()) { 46 | if let Err(error) = sender.send(key_value) { 47 | log::error!("{}, couldn't send value across channel", error); 48 | } 49 | } 50 | ignore::WalkState::Continue 51 | }) 52 | }, 53 | ) 54 | .0 55 | } 56 | 57 | fn hash_entry(filter: &filter::FileFilter, entry: ignore::DirEntry) -> Option<(H::Hash, PathBuf)> 58 | where 59 | H: crate::hasher::Hasher, 60 | { 61 | let path = entry.path(); 62 | let meta = entry 63 | .metadata() 64 | .map_err(|error| log::error!("{}, couldn't get metadata for {:?}", error, path)) 65 | .ok()?; 66 | if !filter.is_match(path, meta) { 67 | return None; 68 | } 69 | let hash = hash::partial::(path) 70 | .map_err(|error| log::error!("{}, couldn't hash {:?}", error, path)) 71 | .ok()?; 72 | Some((hash, entry.into_path())) 73 | } 74 | 75 | pub fn dedupe(tree: TreeBag) -> crate::FileCounter 76 | where 77 | H: crate::hasher::Hasher, 78 | { 79 | let (sender, receiver) = crossbeam_channel::bounded(CHANNEL_SIZE); 80 | rayon::join( 81 | move || receiver.into_iter().collect(), 82 | move || { 83 | tree.into_inner() 84 | .into_par_iter() 85 | .for_each_with(sender, process_bucket::) 86 | }, 87 | ) 88 | .0 89 | } 90 | 91 | fn process_bucket( 92 | sender: &mut crossbeam_channel::Sender<(H::Hash, crate::Path)>, 93 | (old_hash, bucket): (H::Hash, Vec), 94 | ) where 95 | H: crate::hasher::Hasher, 96 | { 97 | if bucket.len() == 1 { 98 | let file = bucket.into_iter().next().unwrap(); 99 | if let Err(error) = sender.send((old_hash, file.into())) { 100 | log::error!("{}, couldn't send value across channel", error); 101 | } 102 | } else { 103 | bucket 104 | .into_par_iter() 105 | .for_each_with(sender.clone(), |sender, file| { 106 | let hash = rehash_file::(&file).unwrap_or(old_hash); 107 | if let Err(error) = sender.send((hash, file.into())) { 108 | log::error!("{}, couldn't send value across channel", error); 109 | } 110 | }); 111 | } 112 | } 113 | 114 | fn rehash_file(file: &Path) -> Result 115 | where 116 | H: crate::hasher::Hasher, 117 | { 118 | if file.metadata().map(|f| f.len()).unwrap_or(0) < BLOCK_SIZE as _ { 119 | return Err(()); 120 | } 121 | match hash::full::(file) { 122 | Ok(hash) => Ok(hash), 123 | Err(error) => { 124 | log::error!("{}, couldn't hash {:?}, reusing partial hash", error, file); 125 | Err(()) 126 | } 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /src/fs/filter.rs: -------------------------------------------------------------------------------- 1 | use std::fs::Metadata; 2 | use std::path::Path; 3 | 4 | #[derive(Debug)] 5 | pub struct FileFilter { 6 | min: Option, 7 | max: Option, 8 | regex: Option, 9 | glob: Option, 10 | #[cfg(unix)] 11 | inodes_filter: inode::Filter, 12 | } 13 | 14 | impl FileFilter { 15 | #[cfg(not(unix))] 16 | pub fn new( 17 | min: Option, 18 | max: Option, 19 | regex: Option, 20 | glob: Option, 21 | ) -> Self { 22 | Self { 23 | min, 24 | max, 25 | regex, 26 | glob, 27 | } 28 | } 29 | 30 | #[cfg(unix)] 31 | pub fn new( 32 | min: Option, 33 | max: Option, 34 | regex: Option, 35 | glob: Option, 36 | disable_hard_links_filter: bool, 37 | ) -> Self { 38 | Self { 39 | min, 40 | max, 41 | regex, 42 | glob, 43 | inodes_filter: inode::Filter::new(disable_hard_links_filter), 44 | } 45 | } 46 | 47 | pub fn is_match(&self, path: &Path, meta: Metadata) -> bool { 48 | #[cfg(unix)] 49 | { 50 | if !self.inodes_filter.is_unique(&meta) { 51 | return false; 52 | } 53 | } 54 | meta.is_file() 55 | && self.min.map_or(true, |m| meta.len() >= m) 56 | && self.max.map_or(true, |m| meta.len() <= m) 57 | && is_match(&self.regex, path).unwrap_or(true) 58 | && is_match(&self.glob, path).unwrap_or(true) 59 | } 60 | } 61 | 62 | fn is_match(opt: &Option, path: &Path) -> Option { 63 | opt.as_ref().and_then(|m| m.is_file_name_match(path)) 64 | } 65 | 66 | trait Matcher { 67 | fn is_file_name_match(&self, path: &Path) -> Option; 68 | } 69 | 70 | impl Matcher for regex::Regex { 71 | fn is_file_name_match(&self, path: &Path) -> Option { 72 | path.file_name() 73 | .and_then(std::ffi::OsStr::to_str) 74 | .map(|file_name| self.is_match(file_name)) 75 | } 76 | } 77 | 78 | impl Matcher for globset::GlobMatcher { 79 | fn is_file_name_match(&self, path: &Path) -> Option { 80 | path.file_name().map(|file_name| self.is_match(file_name)) 81 | } 82 | } 83 | 84 | #[cfg(unix)] 85 | mod inode { 86 | use std::collections::HashSet; 87 | use std::fs::Metadata; 88 | use std::os::unix::fs::MetadataExt; 89 | use std::sync::Mutex; 90 | 91 | /// Filter out unique inodes 92 | #[derive(Debug)] 93 | pub enum Filter { 94 | Disabled, 95 | Enabled(InodeSet), 96 | } 97 | 98 | #[derive(Debug, Default)] 99 | pub struct InodeSet(Mutex>); 100 | 101 | impl Filter { 102 | pub fn new(disable_hard_links_filter: bool) -> Self { 103 | if disable_hard_links_filter { 104 | Self::Disabled 105 | } else { 106 | Self::Enabled(Default::default()) 107 | } 108 | } 109 | 110 | pub fn is_unique(&self, meta: &Metadata) -> bool { 111 | match self { 112 | Self::Disabled => true, 113 | Self::Enabled(set) => set.is_unique(meta), 114 | } 115 | } 116 | } 117 | 118 | impl InodeSet { 119 | fn is_unique(&self, meta: &Metadata) -> bool { 120 | self.0.lock().unwrap().insert(meta.ino()) 121 | } 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /src/fs/hash.rs: -------------------------------------------------------------------------------- 1 | use super::BLOCK_SIZE; 2 | use std::fs::File; 3 | use std::io::{self, Read}; 4 | use std::path::Path; 5 | 6 | /// Get a checksum of the first 4 KiB (at most) of a file. 7 | pub fn partial(path: &Path) -> io::Result 8 | where 9 | H: crate::hasher::Hasher, 10 | { 11 | let mut file = File::open(path)?; 12 | let mut buffer = [0u8; BLOCK_SIZE]; 13 | let mut n = 0; 14 | loop { 15 | match file.read(&mut buffer[n..]) { 16 | Ok(0) => break, 17 | Ok(len) => n += len, 18 | Err(e) if e.kind() == io::ErrorKind::Interrupted => continue, 19 | Err(e) => return Err(e), 20 | } 21 | } 22 | let mut hasher = H::default(); 23 | hasher.write(&file.metadata()?.len().to_le_bytes()); 24 | hasher.write(&buffer[..n]); 25 | Ok(hasher.finish()) 26 | } 27 | 28 | /// Get a complete checksum of a file. 29 | pub fn full(path: &Path) -> io::Result 30 | where 31 | H: crate::hasher::Hasher, 32 | { 33 | /// Compile time [`Write`](std::io::Write) wrapper for a [`Hasher`](core::hash::Hasher). 34 | /// This should get erased at compile time. 35 | #[repr(transparent)] 36 | struct HashWriter(H); 37 | 38 | impl io::Write for HashWriter { 39 | fn write(&mut self, buf: &[u8]) -> io::Result { 40 | crate::hasher::Hasher::write(&mut self.0, buf); 41 | Ok(buf.len()) 42 | } 43 | 44 | fn flush(&mut self) -> io::Result<()> { 45 | Ok(()) 46 | } 47 | } 48 | 49 | let mut hasher = HashWriter(H::default()); 50 | io::copy(&mut File::open(path)?, &mut hasher)?; 51 | Ok(hasher.0.finish()) 52 | } 53 | 54 | #[cfg(test)] 55 | mod tests { 56 | use super::*; 57 | 58 | #[test] 59 | fn different_hash_partial_and_full_for_small_file_because_of_size() { 60 | let h1 = partial::("./tests/static/foo".as_ref()).unwrap(); 61 | let h2 = full::("./tests/static/foo".as_ref()).unwrap(); 62 | assert_ne!(h1, h2); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/hasher.rs: -------------------------------------------------------------------------------- 1 | pub trait Hasher: Default { 2 | type Hash: Hash; 3 | fn write(&mut self, buf: &[u8]); 4 | fn finish(self) -> Self::Hash; 5 | } 6 | 7 | pub trait Hash: PartialEq + Eq + PartialOrd + Ord + Send + Sync + Copy {} 8 | 9 | impl Hash for T where T: PartialEq + Eq + PartialOrd + Ord + Send + Sync + Copy {} 10 | 11 | #[cfg(feature = "build-bin")] 12 | impl Hasher for ahash::AHasher { 13 | type Hash = u64; 14 | fn write(&mut self, buf: &[u8]) { 15 | std::hash::Hasher::write(self, buf); 16 | } 17 | fn finish(self) -> Self::Hash { 18 | std::hash::Hasher::finish(&self) 19 | } 20 | } 21 | 22 | #[cfg(feature = "build-bin")] 23 | impl Hasher for highway::HighwayHasher { 24 | type Hash = [u64; 4]; 25 | fn write(&mut self, buf: &[u8]) { 26 | use highway::HighwayHash; 27 | self.append(buf); 28 | } 29 | 30 | fn finish(self) -> Self::Hash { 31 | use highway::HighwayHash; 32 | self.finalize256() 33 | } 34 | } 35 | 36 | #[cfg(feature = "build-bin")] 37 | impl Hasher for metrohash::MetroHash128 { 38 | type Hash = (u64, u64); 39 | fn write(&mut self, buf: &[u8]) { 40 | std::hash::Hasher::write(self, buf); 41 | } 42 | 43 | fn finish(self) -> Self::Hash { 44 | self.finish128() 45 | } 46 | } 47 | 48 | #[cfg(feature = "build-bin")] 49 | impl Hasher for seahash::SeaHasher { 50 | type Hash = u64; 51 | fn write(&mut self, buf: &[u8]) { 52 | std::hash::Hasher::write(self, buf); 53 | } 54 | fn finish(self) -> Self::Hash { 55 | std::hash::Hasher::finish(&self) 56 | } 57 | } 58 | 59 | #[cfg(feature = "build-bin")] 60 | impl Hasher for twox_hash::xxhash3_128::Hasher { 61 | type Hash = u128; 62 | fn write(&mut self, buf: &[u8]) { 63 | self.write(buf); 64 | } 65 | 66 | fn finish(self) -> Self::Hash { 67 | self.finish_128() 68 | } 69 | } 70 | 71 | #[cfg(feature = "build-bin")] 72 | impl Hasher for blake3::Hasher { 73 | type Hash = [u8; 32]; 74 | fn write(&mut self, buf: &[u8]) { 75 | self.update(buf); 76 | } 77 | fn finish(self) -> Self::Hash { 78 | self.finalize().into() 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! This is a binary crate. You _can_ use it as a library, but I wouldn't recommend it. 2 | //! If you do, remember to disable the default features which are used to build 3 | //! the binary. 4 | //! 5 | //! ```toml 6 | //! [dependencies] 7 | //! yadf = { version = "0.15.0", default-features = false } 8 | //! ``` 9 | //! 10 | //! A collection of functions and structs to find duplicate files. 11 | //! 12 | //! # Example : 13 | //! 14 | //! Find and display all the duplicate files at the given paths : 15 | //! 16 | //! ```no_run 17 | //! # fn foo(paths: &[std::path::PathBuf]) { 18 | //! let counter = yadf::Yadf::builder() 19 | //! .paths(paths) 20 | //! .build() 21 | //! .scan::(); 22 | //! println!("{}", counter.duplicates().display::()); 23 | //! # } 24 | //! ``` 25 | #![deny(unsafe_code)] 26 | #![warn(rust_2018_idioms)] 27 | 28 | mod bag; 29 | mod ext; 30 | mod fs; 31 | mod hasher; 32 | mod path; 33 | 34 | pub use bag::{Factor, Fdupes, Machine, TreeBag}; 35 | pub use globset; 36 | pub use hasher::Hasher; 37 | pub use path::Path; 38 | pub use regex; 39 | use std::rc::Rc; 40 | 41 | pub type FileCounter = TreeBag; 42 | pub type FileReplicates<'a, H> = bag::Replicates<'a, H, Path>; 43 | 44 | /// Search configuration. 45 | /// 46 | /// # Example 47 | /// 48 | /// ```no_run 49 | /// # fn foo(paths: &[std::path::PathBuf]) { 50 | /// let counter = yadf::Yadf::builder() 51 | /// .paths(paths) // required 52 | /// .minimum_file_size(64) // optional 53 | /// .maximum_file_size(1024 * 8) // optional 54 | /// .regex(None) // optional 55 | /// .glob(None) // optional 56 | /// .build() 57 | /// .scan::(); 58 | /// # } 59 | /// ``` 60 | /// 61 | /// see the docs for the [`YadfBuilder`](YadfBuilder) 62 | #[derive(Debug, typed_builder::TypedBuilder)] 63 | #[builder(doc)] 64 | pub struct Yadf> { 65 | #[builder(setter(into, doc = "Paths that will be checked for duplicate files"))] 66 | paths: Rc<[P]>, 67 | #[builder(default, setter(into, doc = "Minimum file size"))] 68 | minimum_file_size: Option, 69 | #[builder(default, setter(into, doc = "Maximum file size"))] 70 | maximum_file_size: Option, 71 | #[builder(default, setter(into, doc = "Maximum recursion depth"))] 72 | max_depth: Option, 73 | #[builder(default, setter(into, doc = "File name must match this regex"))] 74 | regex: Option, 75 | #[builder(default, setter(into, doc = "File name must match this glob"))] 76 | glob: Option, 77 | #[cfg(unix)] 78 | #[builder(default, setter(doc = "Treat hard links as duplicates"))] 79 | hard_links: bool, 80 | } 81 | 82 | impl

Yadf

83 | where 84 | P: AsRef, 85 | { 86 | /// This will attemps a complete scan according to its configuration. 87 | pub fn scan(self) -> FileCounter 88 | where 89 | H: hasher::Hasher, 90 | H::Hash: std::fmt::Debug, 91 | { 92 | #[cfg(unix)] 93 | let file_filter = fs::filter::FileFilter::new( 94 | self.minimum_file_size, 95 | self.maximum_file_size, 96 | self.regex, 97 | self.glob.map(|g| g.compile_matcher()), 98 | self.hard_links, 99 | ); 100 | #[cfg(not(unix))] 101 | let file_filter = fs::filter::FileFilter::new( 102 | self.minimum_file_size, 103 | self.maximum_file_size, 104 | self.regex, 105 | self.glob.map(|g| g.compile_matcher()), 106 | ); 107 | let bag = fs::find_dupes_partial::(&self.paths, self.max_depth, file_filter); 108 | if log::log_enabled!(log::Level::Info) { 109 | log::info!( 110 | "scanned {} files", 111 | bag.as_inner().values().map(Vec::len).sum::() 112 | ); 113 | log::info!( 114 | "found {} possible duplicates after initial scan", 115 | bag.duplicates().iter().map(Vec::len).sum::() 116 | ); 117 | log::trace!("{:?}", bag); 118 | } 119 | let bag = fs::dedupe::(bag); 120 | if log::log_enabled!(log::Level::Info) { 121 | log::info!( 122 | "found {} duplicates in {} groups after checksumming", 123 | bag.duplicates().iter().map(Vec::len).sum::(), 124 | bag.duplicates().iter().count(), 125 | ); 126 | log::trace!("{:?}", bag); 127 | } 128 | bag 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | #![deny(unsafe_code)] 2 | #![warn(rust_2018_idioms)] 3 | 4 | mod args; 5 | 6 | use anyhow::Context; 7 | use clap::{Parser, ValueEnum}; 8 | use clap_verbosity_flag::ErrorLevel; 9 | use std::fs::File; 10 | use std::io::{self, Write}; 11 | use std::path::PathBuf; 12 | use std::str::FromStr; 13 | use yadf::{Fdupes, Machine}; 14 | 15 | fn main() -> anyhow::Result<()> { 16 | human_panic::setup_panic!(); 17 | let timer = std::time::Instant::now(); 18 | let args = Args::init_from_env(); 19 | log::debug!("{:?}", args); 20 | let config = build_config(&args); 21 | log::debug!("{:?}", config); 22 | args.algorithm.run(args.clone(), config)?; 23 | log::debug!("{:?} elapsed", timer.elapsed()); 24 | Ok(()) 25 | } 26 | 27 | fn write_output(args: Args, bag: yadf::TreeBag) -> Result<(), anyhow::Error> 28 | where 29 | H: yadf::Hasher, 30 | { 31 | let rfactor = args.rfactor.unwrap_or_default(); 32 | let replicates = bag.replicates(rfactor.into()); 33 | match args.output { 34 | Some(path) => { 35 | let context = || format!("writing output to the file: {:?}", path.display()); 36 | let file = File::create(&path).with_context(context)?; 37 | args.format.display::<_, H>(file, replicates) 38 | } 39 | None => args.format.display::<_, H>(io::stdout().lock(), replicates), 40 | } 41 | .context("writing output")?; 42 | Ok(()) 43 | } 44 | 45 | #[cfg(unix)] 46 | fn build_config(args: &Args) -> yadf::Yadf { 47 | yadf::Yadf::builder() 48 | .paths(args.paths.as_ref()) 49 | .minimum_file_size(args.min()) 50 | .maximum_file_size(args.max()) 51 | .regex(args.regex.clone()) 52 | .glob(args.pattern.clone()) 53 | .max_depth(args.max_depth) 54 | .hard_links(args.hard_links) 55 | .build() 56 | } 57 | 58 | #[cfg(not(unix))] 59 | fn build_config(args: &Args) -> yadf::Yadf { 60 | yadf::Yadf::builder() 61 | .paths(args.paths.as_ref()) 62 | .minimum_file_size(args.min()) 63 | .maximum_file_size(args.max()) 64 | .regex(args.regex.clone()) 65 | .glob(args.pattern.clone()) 66 | .max_depth(args.max_depth) 67 | .build() 68 | } 69 | 70 | impl Algorithm { 71 | fn run

(&self, args: Args, config: yadf::Yadf

) -> anyhow::Result<()> 72 | where 73 | P: AsRef, 74 | { 75 | log::debug!("using {:?} hashing", self); 76 | match self { 77 | Algorithm::AHash => { 78 | write_output::(args, config.scan::())? 79 | } 80 | Algorithm::Blake3 => { 81 | write_output::(args, config.scan::())? 82 | } 83 | Algorithm::Highway => write_output::( 84 | args, 85 | config.scan::(), 86 | )?, 87 | Algorithm::MetroHash => write_output::( 88 | args, 89 | config.scan::(), 90 | )?, 91 | Algorithm::SeaHash => { 92 | write_output::(args, config.scan::())? 93 | } 94 | Algorithm::XxHash => write_output::( 95 | args, 96 | config.scan::(), 97 | )?, 98 | } 99 | Ok(()) 100 | } 101 | } 102 | 103 | impl Format { 104 | fn display( 105 | &self, 106 | writer: W, 107 | replicates: yadf::FileReplicates<'_, H::Hash>, 108 | ) -> anyhow::Result<()> 109 | where 110 | H: yadf::Hasher, 111 | W: Write, 112 | { 113 | let mut writer = io::BufWriter::with_capacity(64 * 1024, writer); 114 | match self { 115 | Format::Json => { 116 | serde_json::to_writer(&mut writer, &replicates)?; 117 | writer.write_all(b"\n")?; 118 | } 119 | Format::JsonPretty => { 120 | serde_json::to_writer_pretty(&mut writer, &replicates)?; 121 | writer.write_all(b"\n")?; 122 | } 123 | Format::Csv => csv_to_writer::<_, H>(writer, &replicates)?, 124 | Format::LdJson => ldjson_to_writer::<_, H>(writer, &replicates)?, 125 | Format::Fdupes => writeln!(writer, "{}", replicates.display::())?, 126 | Format::Machine => writeln!(writer, "{}", replicates.display::())?, 127 | }; 128 | Ok(()) 129 | } 130 | } 131 | 132 | /// Yet Another Dupes Finder 133 | #[derive(Parser, Debug, Clone)] 134 | pub struct Args { 135 | /// Directories to search 136 | /// 137 | /// default is to search inside the current working directory 138 | #[clap(value_parser)] 139 | paths: Vec, 140 | /// Output format 141 | #[clap(short, long, value_enum, default_value_t, ignore_case = true)] 142 | format: Format, 143 | /// Hashing algorithm 144 | #[clap(short, long, value_enum, default_value_t, ignore_case = true)] 145 | algorithm: Algorithm, 146 | /// Excludes empty files 147 | #[clap(short, long)] 148 | no_empty: bool, 149 | /// Minimum file size 150 | #[clap(long, value_name = "size")] 151 | min: Option, 152 | /// Maximum file size 153 | #[clap(long, value_name = "size")] 154 | max: Option, 155 | /// Maximum recursion depth 156 | #[clap(short = 'd', long = "depth", value_name = "depth")] 157 | max_depth: Option, 158 | /// Treat hard links to same file as duplicates 159 | #[cfg_attr(unix, clap(short = 'H', long))] 160 | #[cfg(unix)] 161 | hard_links: bool, 162 | /// Check files with a name matching a Perl-style regex, 163 | /// see: https://docs.rs/regex/1.4.2/regex/index.html#syntax 164 | #[clap(short = 'R', long)] 165 | regex: Option, 166 | /// Check files with a name matching a glob pattern, 167 | /// see: https://docs.rs/globset/0.4.6/globset/index.html#syntax 168 | #[clap(short, long, value_name = "glob")] 169 | pattern: Option, 170 | #[clap(flatten)] 171 | verbosity: clap_verbosity_flag::Verbosity, 172 | /// Replication factor [under|equal|over]:n 173 | /// 174 | /// The default is `over:1`, to find uniques use `equal:1`, 175 | /// to find files with less than 10 copies use `under:10` 176 | #[clap(long)] 177 | rfactor: Option, 178 | /// Optional output file 179 | #[clap(short, long)] 180 | output: Option, 181 | } 182 | 183 | #[derive(ValueEnum, Debug, Clone, Default)] 184 | enum Format { 185 | Csv, 186 | #[default] 187 | Fdupes, 188 | Json, 189 | JsonPretty, 190 | LdJson, 191 | Machine, 192 | } 193 | 194 | #[derive(ValueEnum, Debug, Clone, Default)] 195 | #[clap(rename_all = "lower")] 196 | enum Algorithm { 197 | AHash, 198 | Blake3, 199 | #[default] 200 | Highway, 201 | MetroHash, 202 | SeaHash, 203 | XxHash, 204 | } 205 | 206 | #[derive(Debug, Clone)] 207 | struct Byte(byte_unit::Byte); 208 | 209 | impl FromStr for Byte { 210 | type Err = String; 211 | fn from_str(s: &str) -> Result { 212 | byte_unit::Byte::from_str(s) 213 | .map(Byte) 214 | .map_err(|e| e.to_string()) 215 | } 216 | } 217 | 218 | #[derive(Debug, PartialEq, Clone)] 219 | enum ReplicationFactor { 220 | Under(usize), 221 | Equal(usize), 222 | Over(usize), 223 | } 224 | 225 | /// mimic serde_json interface 226 | fn csv_to_writer(writer: W, replicates: &yadf::FileReplicates<'_, H::Hash>) -> csv::Result<()> 227 | where 228 | H: yadf::Hasher, 229 | W: Write, 230 | { 231 | let mut writer = csv::WriterBuilder::new() 232 | .flexible(true) 233 | .has_headers(false) 234 | .from_writer(writer); 235 | writer.serialize(("count", "files"))?; 236 | for files in replicates { 237 | writer.serialize((files.len(), files))?; 238 | } 239 | Ok(()) 240 | } 241 | 242 | /// mimic serde_json interface 243 | fn ldjson_to_writer( 244 | mut writer: W, 245 | replicates: &yadf::FileReplicates<'_, H::Hash>, 246 | ) -> anyhow::Result<()> 247 | where 248 | H: yadf::Hasher, 249 | W: Write, 250 | { 251 | for files in replicates { 252 | serde_json::to_writer(&mut writer, &files)?; 253 | writeln!(writer)?; 254 | } 255 | Ok(()) 256 | } 257 | 258 | // #[cfg(test)] 259 | // mod tests { 260 | // use super::*; 261 | // use once_cell::sync::Lazy; 262 | 263 | // static BAG: Lazy> = Lazy::new(|| { 264 | // vec![ 265 | // (77, "hello".into()), 266 | // (77, "world".into()), 267 | // (3, "foo".into()), 268 | // (3, "bar".into()), 269 | // ] 270 | // .into_iter() 271 | // .collect() 272 | // }); 273 | 274 | // #[test] 275 | // fn csv() { 276 | // let mut buffer = Vec::new(); 277 | // let _ = csv_to_writer(&mut buffer, &BAG.duplicates()); 278 | // let result = String::from_utf8(buffer).unwrap(); 279 | // let expected = r#"count,files 280 | // 2,foo,bar 281 | // 2,hello,world 282 | // "#; 283 | // assert_eq!(result, expected); 284 | // } 285 | 286 | // #[test] 287 | // fn ldjson() { 288 | // let mut buffer = Vec::new(); 289 | // let _ = ldjson_to_writer(&mut buffer, &BAG.duplicates()); 290 | // let result = String::from_utf8(buffer).unwrap(); 291 | // let expected = r#"["foo","bar"] 292 | // ["hello","world"] 293 | // "#; 294 | // assert_eq!(result, expected); 295 | // } 296 | // } 297 | -------------------------------------------------------------------------------- /src/path.rs: -------------------------------------------------------------------------------- 1 | /// Serialization wrapper for paths. 2 | #[derive(Debug)] 3 | pub struct Path(std::path::PathBuf); 4 | 5 | use serde::{Serialize, Serializer}; 6 | 7 | impl Serialize for Path { 8 | fn serialize(&self, serializer: S) -> Result 9 | where 10 | S: Serializer, 11 | { 12 | serializer.collect_str(&self.0.display()) 13 | } 14 | } 15 | 16 | impl From for Path 17 | where 18 | T: Into, 19 | { 20 | fn from(path: T) -> Self { 21 | Self(path.into()) 22 | } 23 | } 24 | 25 | impl AsRef for Path { 26 | fn as_ref(&self) -> &std::path::Path { 27 | &self.0 28 | } 29 | } 30 | 31 | #[cfg(test)] 32 | mod tests { 33 | #[cfg(unix)] 34 | #[test] 35 | fn shouldnt_panic_on_invalid_utf8_path() { 36 | use super::*; 37 | use std::ffi::OsString; 38 | use std::os::unix::ffi::OsStringExt; 39 | use std::path::PathBuf; 40 | // asserts its invalidity 41 | let invalid_utf8: &[u8] = b"\xe7\xe7"; 42 | assert!(String::from_utf8(invalid_utf8.to_vec()).is_err()); 43 | // without wrapper it errors 44 | let path = PathBuf::from(OsString::from_vec(invalid_utf8.to_vec())); 45 | assert!(serde_json::to_string(&path).is_err()); 46 | // with wrapper it's ok 47 | let path = Path(PathBuf::from(OsString::from_vec(invalid_utf8.to_vec()))); 48 | assert!(serde_json::to_string(&path).is_ok()); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /tests/common/mod.rs: -------------------------------------------------------------------------------- 1 | pub use test_dir::TestDir; 2 | 3 | /// quick-n-dirty any result type alias 4 | pub type AnyResult> = Result; 5 | 6 | pub const MAX_LEN: usize = 256 * 1024; 7 | 8 | pub fn random_collection(size: usize) -> I 9 | where 10 | rand::distr::StandardUniform: rand::distr::Distribution, 11 | I: std::iter::FromIterator, 12 | { 13 | use rand::Rng; 14 | let mut rng = rand::rng(); 15 | std::iter::repeat_with(|| rng.random()).take(size).collect() 16 | } 17 | 18 | /// test shortcut 19 | #[allow(dead_code)] 20 | pub fn find_dupes>(path: &P) -> yadf::FileCounter { 21 | yadf::Yadf::builder() 22 | .paths([path].as_ref()) 23 | .build() 24 | .scan::() 25 | } 26 | 27 | #[macro_export] 28 | macro_rules! scope_name_iter { 29 | () => {{ 30 | fn fxxfxxf() {} 31 | fn type_name_of(_: T) -> &'static str { 32 | std::any::type_name::() 33 | } 34 | type_name_of(fxxfxxf) 35 | .split("::") 36 | .take_while(|&segment| segment != "fxxfxxf") 37 | }}; 38 | } 39 | 40 | #[macro_export] 41 | macro_rules! test_dir { 42 | () => {{ 43 | ["target", "tests"] 44 | .iter() 45 | .copied() 46 | .chain(scope_name_iter!()) 47 | .collect::() 48 | }}; 49 | } 50 | 51 | mod test_dir { 52 | use std::fs::{self, File}; 53 | use std::io::{self, Write}; 54 | use std::path::{Path, PathBuf}; 55 | 56 | pub struct TestDir(PathBuf); 57 | 58 | impl TestDir { 59 | pub fn new

(dir: P) -> io::Result 60 | where 61 | P: AsRef, 62 | { 63 | match fs::remove_dir_all(&dir) { 64 | // the directory should not exists at this stage 65 | // we're just double checking and don't want to return a spurious error 66 | Err(e) if e.kind() == io::ErrorKind::NotFound => {} 67 | Err(e) => return Err(e), 68 | _ => {} 69 | } 70 | fs::create_dir_all(&dir)?; 71 | Ok(TestDir(dir.as_ref().to_path_buf())) 72 | } 73 | 74 | pub fn write_file(&self, path: P, bytes: B) -> io::Result 75 | where 76 | P: AsRef, 77 | B: AsRef<[u8]>, 78 | { 79 | let path = self.0.join(path); 80 | File::create(&path)?.write_all(bytes.as_ref())?; 81 | Ok(path) 82 | } 83 | } 84 | 85 | impl Drop for TestDir { 86 | fn drop(&mut self) { 87 | fs::remove_dir_all(&self.0) 88 | .expect(&format!("couldn't remove test directory {:?}", self.0)); 89 | } 90 | } 91 | 92 | impl AsRef for TestDir { 93 | fn as_ref(&self) -> &Path { 94 | &self.0 95 | } 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /tests/integration.rs: -------------------------------------------------------------------------------- 1 | mod common; 2 | 3 | use common::{random_collection, AnyResult, TestDir, MAX_LEN}; 4 | use predicates::{boolean::PredicateBooleanExt, str as predstr}; 5 | 6 | #[test] 7 | fn function_name() { 8 | let fname = scope_name_iter!().collect::>().join("::"); 9 | assert_eq!(fname, "integration::function_name"); 10 | } 11 | 12 | #[test] 13 | fn dir_macro() { 14 | let path = test_dir!(); 15 | #[cfg(windows)] 16 | assert_eq!(path.to_str(), Some("target\\tests\\integration\\dir_macro")); 17 | #[cfg(not(windows))] 18 | assert_eq!(path.to_str(), Some("target/tests/integration/dir_macro")); 19 | } 20 | 21 | #[test] 22 | fn trace_output() -> AnyResult { 23 | let root = TestDir::new(test_dir!())?; 24 | println!("{:?}", root.as_ref()); 25 | let bytes: Vec<_> = random_collection(MAX_LEN); 26 | let file1 = root.write_file("file1", &bytes)?; 27 | let file2 = root.write_file("file2", &bytes)?; 28 | root.write_file("file3", &bytes[..4096])?; 29 | root.write_file("file4", &bytes[..2048])?; 30 | let _expected = serde_json::to_string(&[[file1.to_string_lossy(), file2.to_string_lossy()]]) 31 | .unwrap() 32 | + "\n"; 33 | assert_cmd::Command::cargo_bin(assert_cmd::crate_name!())? 34 | .arg("-vvvv") // test stderr contains enough debug output 35 | .args(&["--format", "json"]) 36 | .args(&["--algorithm", "seahash"]) 37 | .arg(root.as_ref()) 38 | .assert() 39 | .success() 40 | .stderr( 41 | predstr::contains("Args {") 42 | .and(predstr::contains("Yadf {")) 43 | .and(predstr::contains("format: Json")) 44 | .and(predstr::contains("algorithm: SeaHash")) 45 | .and(predstr::contains("verbose: 4")) 46 | .and(predstr::contains( 47 | "found 2 possible duplicates after initial scan", 48 | )) 49 | .and(predstr::contains( 50 | "found 2 duplicates in 1 groups after checksumming", 51 | )) 52 | .and(predstr::contains("file1")) 53 | .and(predstr::contains("file2")) 54 | .and(predstr::contains("file3")) 55 | .and(predstr::contains("file4")), 56 | ); 57 | Ok(()) 58 | } 59 | 60 | #[test] 61 | fn regex() -> AnyResult { 62 | let root = TestDir::new(test_dir!())?; 63 | let bytes: Vec<_> = random_collection(4096); 64 | let particular_1_name = root.write_file("particular_1_name", &bytes)?; 65 | let particular_2_name = root.write_file("particular_2_name", &bytes)?; 66 | root.write_file("not_particular_2_name", &bytes)?; 67 | root.write_file("completely_different", &bytes)?; 68 | let _expected = [ 69 | particular_1_name.to_string_lossy(), 70 | particular_2_name.to_string_lossy(), 71 | ] 72 | .join("\n") 73 | + "\n"; 74 | assert_cmd::Command::cargo_bin(assert_cmd::crate_name!())? 75 | .args(&["--regex", "^particular_\\d_name$"]) 76 | .arg(root.as_ref()) 77 | .assert() 78 | .success() 79 | .stderr(predstr::is_empty()); 80 | Ok(()) 81 | } 82 | 83 | #[test] 84 | fn glob_pattern() -> AnyResult { 85 | let root = TestDir::new(test_dir!())?; 86 | let bytes: Vec<_> = random_collection(4096); 87 | let particular_1_name = root.write_file("particular_1_name", &bytes)?; 88 | let particular_2_name = root.write_file("particular_2_name", &bytes)?; 89 | root.write_file("not_particular_2_name", &bytes)?; 90 | root.write_file("completely_different", &bytes)?; 91 | let _expected = [ 92 | particular_1_name.to_string_lossy(), 93 | particular_2_name.to_string_lossy(), 94 | ] 95 | .join("\n") 96 | + "\n"; 97 | assert_cmd::Command::cargo_bin(assert_cmd::crate_name!())? 98 | .args(&["--pattern", "particular*name"]) 99 | .arg(root.as_ref()) 100 | .assert() 101 | .success() 102 | .stderr(predstr::is_empty()); 103 | Ok(()) 104 | } 105 | 106 | #[test] 107 | fn min_file_size() -> AnyResult { 108 | let root = TestDir::new(test_dir!())?; 109 | let bytes: Vec<_> = random_collection(4096); 110 | let particular_1_name = root.write_file("particular_1_name", &bytes)?; 111 | let particular_2_name = root.write_file("particular_2_name", &bytes)?; 112 | root.write_file("not_particular_2_name", &bytes[..2048])?; 113 | root.write_file("completely_different", &bytes[..2048])?; 114 | let _expected = [ 115 | particular_1_name.to_string_lossy(), 116 | particular_2_name.to_string_lossy(), 117 | ] 118 | .join("\n") 119 | + "\n"; 120 | assert_cmd::Command::cargo_bin(assert_cmd::crate_name!())? 121 | .args(&["--min", "4K"]) 122 | .arg(root.as_ref()) 123 | .assert() 124 | .success() 125 | .stderr(predstr::is_empty()); 126 | Ok(()) 127 | } 128 | 129 | #[test] 130 | fn max_file_size() -> AnyResult { 131 | let root = TestDir::new(test_dir!())?; 132 | let bytes: Vec<_> = random_collection(4096); 133 | let particular_1_name = root.write_file("particular_1_name", &bytes[..1024])?; 134 | let particular_2_name = root.write_file("particular_2_name", &bytes[..1024])?; 135 | root.write_file("not_particular_2_name", &bytes)?; 136 | root.write_file("completely_different", &bytes)?; 137 | let _expected = [ 138 | particular_1_name.to_string_lossy(), 139 | particular_2_name.to_string_lossy(), 140 | ] 141 | .join("\n") 142 | + "\n"; 143 | assert_cmd::Command::cargo_bin(assert_cmd::crate_name!())? 144 | .args(&["--max", "2K"]) 145 | .arg(root.as_ref()) 146 | .assert() 147 | .success() 148 | .stderr(predstr::is_empty()); 149 | Ok(()) 150 | } 151 | 152 | #[cfg(all(unix, not(target_os = "macos")))] 153 | #[test] 154 | fn non_utf8_paths() -> AnyResult { 155 | use std::ffi::OsString; 156 | use std::os::unix::ffi::OsStringExt; 157 | use std::path::PathBuf; 158 | let root = TestDir::new(test_dir!())?; 159 | let filename = PathBuf::from(OsString::from_vec(b"\xe7\xe7".to_vec())); 160 | root.write_file(&filename, b"")?; 161 | root.write_file(&"aa", b"")?; 162 | assert_cmd::Command::cargo_bin(assert_cmd::crate_name!())? 163 | .arg(root.as_ref()) 164 | .args(&["-f", "json"]) 165 | .arg("-vv") 166 | .assert() 167 | .success(); 168 | Ok(()) 169 | } 170 | 171 | #[test] 172 | fn hard_links_flag() -> AnyResult { 173 | let predicate = predstr::contains("--hard-links"); 174 | #[cfg(not(unix))] 175 | let predicate = predicate.not(); 176 | assert_cmd::Command::cargo_bin(assert_cmd::crate_name!())? 177 | .arg("-h") 178 | .assert() 179 | .success() 180 | .stdout(predicate); 181 | Ok(()) 182 | } 183 | -------------------------------------------------------------------------------- /tests/particular_cases.rs: -------------------------------------------------------------------------------- 1 | mod common; 2 | 3 | use common::{find_dupes, random_collection, AnyResult, TestDir, MAX_LEN}; 4 | 5 | /// Test to be sure the sorting by hash only groups together files 6 | /// with the same contents. 7 | /// Takes some time to run. 8 | /// 9 | /// cargo test --package yadf --test common -- sanity_check --exact --nocapture -Z unstable-options --include-ignored 10 | #[test] 11 | #[ignore] 12 | fn sanity_check() { 13 | let home = dirs::home_dir().unwrap(); 14 | let counter = find_dupes(&home); 15 | for bucket in counter.duplicates().iter() { 16 | let (first, bucket) = bucket.split_first().unwrap(); 17 | let reference = std::fs::read(&first).unwrap(); 18 | for file in bucket { 19 | let contents = std::fs::read(&file).unwrap(); 20 | assert_eq!(reference, contents, "comparing {:?} and {:?}", first, file); 21 | } 22 | } 23 | } 24 | 25 | #[test] 26 | // #[ignore] 27 | fn identical_small_files() -> AnyResult { 28 | let root = TestDir::new(test_dir!())?; 29 | println!("{:?}", root.as_ref()); 30 | root.write_file("file1", b"aaa")?; 31 | root.write_file("file2", b"aaa")?; 32 | let counter = find_dupes(&root); 33 | assert_eq!(counter.duplicates().iter().count(), 1); 34 | assert_eq!(counter.as_inner().len(), 1); 35 | Ok(()) 36 | } 37 | 38 | #[test] 39 | // #[ignore] 40 | fn identical_larger_files() -> AnyResult { 41 | let root = TestDir::new(test_dir!())?; 42 | let buffer: Vec<_> = random_collection(MAX_LEN * 3); 43 | root.write_file("file1", &buffer)?; 44 | root.write_file("file2", &buffer)?; 45 | let counter = find_dupes(&root); 46 | assert_eq!(counter.duplicates().iter().count(), 1); 47 | assert_eq!(counter.as_inner().len(), 1); 48 | Ok(()) 49 | } 50 | 51 | #[test] 52 | // #[ignore] 53 | fn files_differing_by_size() -> AnyResult { 54 | let root = TestDir::new(test_dir!())?; 55 | root.write_file("file1", b"aaaa")?; 56 | root.write_file("file2", b"aaa")?; 57 | let counter = find_dupes(&root); 58 | assert_eq!(counter.duplicates().iter().count(), 0); 59 | assert_eq!(counter.as_inner().len(), 2); 60 | Ok(()) 61 | } 62 | 63 | #[test] 64 | // #[ignore] 65 | fn files_differing_by_prefix() -> AnyResult { 66 | let root = TestDir::new(test_dir!())?; 67 | root.write_file("file1", b"aaa")?; 68 | root.write_file("file2", b"bbb")?; 69 | let counter = find_dupes(&root); 70 | assert_eq!(counter.duplicates().iter().count(), 0); 71 | assert_eq!(counter.as_inner().len(), 2); 72 | Ok(()) 73 | } 74 | 75 | #[test] 76 | // #[ignore] 77 | fn files_differing_by_suffix() -> AnyResult { 78 | let root = TestDir::new(test_dir!())?; 79 | let mut buffer1 = Vec::with_capacity(MAX_LEN * 3 + 4); 80 | buffer1.extend_from_slice(&random_collection::<_, Vec<_>>(MAX_LEN * 3)); 81 | let mut buffer2 = buffer1.clone(); 82 | buffer1.extend_from_slice(b"suf1"); 83 | buffer2.extend_from_slice(b"suf2"); 84 | root.write_file("file1", &buffer1)?; 85 | root.write_file("file2", &buffer2)?; 86 | let counter = find_dupes(&root); 87 | assert_eq!(counter.duplicates().iter().count(), 0); 88 | assert_eq!(counter.as_inner().len(), 2); 89 | Ok(()) 90 | } 91 | 92 | #[test] 93 | // #[ignore] 94 | fn files_differing_by_middle() -> AnyResult { 95 | let root = TestDir::new(test_dir!())?; 96 | let mut buffer1 = Vec::with_capacity(MAX_LEN * 2 + 4); 97 | buffer1.extend_from_slice(&random_collection::<_, Vec<_>>(MAX_LEN)); 98 | let mut buffer2 = buffer1.clone(); 99 | buffer1.extend_from_slice(b"mid1"); 100 | buffer2.extend_from_slice(b"mid2"); 101 | let suffix = random_collection::<_, Vec<_>>(MAX_LEN); 102 | buffer1.extend_from_slice(&suffix); 103 | buffer2.extend_from_slice(&suffix); 104 | root.write_file("file1", &buffer1)?; 105 | root.write_file("file2", &buffer2)?; 106 | let counter = find_dupes(&root); 107 | assert_eq!(counter.duplicates().iter().count(), 0); 108 | assert_eq!(counter.as_inner().len(), 2); 109 | Ok(()) 110 | } 111 | -------------------------------------------------------------------------------- /tests/static/bar: -------------------------------------------------------------------------------- 1 | aa 2 | -------------------------------------------------------------------------------- /tests/static/foo: -------------------------------------------------------------------------------- 1 | aa 2 | -------------------------------------------------------------------------------- /tests/static/qax: -------------------------------------------------------------------------------- 1 | i 2 | -------------------------------------------------------------------------------- /tests/static/qix: -------------------------------------------------------------------------------- 1 | aa 2 | -------------------------------------------------------------------------------- /tests/static/qux: -------------------------------------------------------------------------------- 1 | bb 2 | --------------------------------------------------------------------------------